Implemented multi-ein searches. Confirmed EINData.compare() works. Test.py contains example of usage.

master
Griffiths Lott 3 years ago
parent f9748d9662
commit d698197d52
  1. 2
      .gitignore
  2. 59
      EINService.py
  3. 31
      test.py

2
.gitignore vendored

@ -1 +1,3 @@
/venv /venv
*.xlsx
/__pycache__

@ -5,6 +5,7 @@ from abc import ABC, abstractmethod
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime as dt from datetime import datetime as dt
from pprint import pprint as prt from pprint import pprint as prt
import pandas as pd
SAMPLE_EIN = "59-1571026" SAMPLE_EIN = "59-1571026"
@ -14,32 +15,34 @@ class EINData:
def __init__(self, ein: str, buinessName: str, address1: str, city:str, state:str, zip: str, phone: str) -> None: def __init__(self, ein: str, buinessName: str, address1: str, city:str, state:str, zip: str, phone: str) -> None:
if re.search("\d{2}(-|)\d{7}", str(ein)) == None: if re.search("\d{2}(-|)\d{7}", str(ein)) == None:
try: raise Exception(f"Invalid EIN: {ein}") raise Exception(f"Invalid EIN: {ein}")
except Exception as e:
print(e)
return None
self.ein = ein.strip().replace('-','') self.ein = ein.strip().replace('-','')
self.buinessName = buinessName.lower().strip() self.buinessName = buinessName.lower().strip()
self.address1 = address1.lower().strip() self.address1 = address1.lower().strip()
self.city = city.lower().strip() self.city = city.lower().strip()
self.state = state.lower().strip() self.state = state.lower().strip()
self.phone = phone.strip().replace('-','').replace('(',"").replace(')','').replace('+','') self.phone = phone.strip().replace('-','').replace('(',"").replace(')','').replace('+','')
try: self.zip = int(zip.replace('-','').strip())
self.zip = int(zip.replace('-','').strip())
except:
print(f"Invalid ZIP code: {zip}") def __str__(self) -> str:
return f"""EIN: {self.ein}\t | Name: {self.buinessName}\t\t| Address: {self.address1}\t\t| City: {self.city}\t| State: {self.state}\t| Phone: {self.phone}"""
def get_ein(self) -> str: def get_ein(self) -> str:
return f"{self.ein[0:2]}-{self.ein[2:]}" return f"{self.ein[0:2]}-{self.ein[2:]}"
def compare(self, otherEIN: 'EINData') -> dict: def compare(self, otherEIN: 'EINData') -> dict:
compareDict = { try:
"buinessName" : True if self.buinessName == otherEIN.businessName else False, compareDict = {
"address" : True if self.address1 == otherEIN.address1 else False, "buinessName" : True if self.buinessName == otherEIN.buinessName else False,
"city": True if self.city == otherEIN.city else False, "address" : True if self.address1 == otherEIN.address1 else False,
"state": True if self.state == otherEIN.state else False, "city": True if self.city == otherEIN.city else False,
"zip" : True if self.zip == otherEIN.zip else False "state": True if self.state == otherEIN.state else False,
} "zip" : True if self.zip == otherEIN.zip else False
}
except Exception as e:
print(f"""Exception:\n{e}\nSelf:{self}\nOther: {otherEIN}\n""")
return None
score = 0 score = 0
for v in compareDict.values(): for v in compareDict.values():
score += 1 if v else 0 score += 1 if v else 0
@ -47,7 +50,6 @@ class EINData:
return compareDict return compareDict
class EINService(ABC): class EINService(ABC):
@classmethod @classmethod
@abstractmethod @abstractmethod
@ -64,15 +66,14 @@ class EINService(ABC):
""" """
@classmethod @classmethod
def _isEIN(self, ein: int) -> bool: def _isEIN(self, ein: str) -> bool:
return re.search("\d{2}(-|)\d{7}", str(ein)) != None return re.search("\d{2}(-|)\d{7}", ein) != None
class EINTaxIDService(EINService): class EINTaxIDService(EINService):
_url = "https://eintaxid.com" _url = "https://eintaxid.com"
_htmlSearchInputID = "searchterm"
def search_ein(self, ein: str) -> EINResult: def search_ein(self, ein: str) -> EINData:
try: try:
if not self._isEIN(ein): if not self._isEIN(ein):
print(f"{ein} is not a valid EIN!\nValid formats are: XX-XXXXXXX and XXXXXXXXX") print(f"{ein} is not a valid EIN!\nValid formats are: XX-XXXXXXX and XXXXXXXXX")
@ -84,7 +85,11 @@ class EINTaxIDService(EINService):
req = rq.request("POST",self._url + "/search-ajax.php", data={"query": ein}, \ req = rq.request("POST",self._url + "/search-ajax.php", data={"query": ein}, \
headers={'X-Requested-With': 'XMLHttpRequest'}) headers={'X-Requested-With': 'XMLHttpRequest'})
soup = bsp(req.content, "html.parser") soup = bsp(req.content, "html.parser")
text = soup.text.splitlines()[4] try:
text = soup.text.splitlines()[4]
except:
print(f"Failed: {ein} | {soup}")
return None
data = self._parse_return(text) data = self._parse_return(text)
return EINData( return EINData(
@ -97,10 +102,11 @@ class EINTaxIDService(EINService):
data["phone"] data["phone"]
) )
def search_eins(eins: list[EINData]): def search_eins(self, eins: str) -> list[EINData]:
pass return [self.search_ein(ein) for ein in eins]
def _parse_return(self, content: str) -> EINData: def _parse_return(self, content: str) -> dict:
m = re.search("EIN Number:", content) m = re.search("EIN Number:", content)
company = content[0:m.start()].strip() company = content[0:m.start()].strip()
@ -131,8 +137,3 @@ class EINTaxIDService(EINService):
"zip": zip, "zip": zip,
"phone": phone "phone": phone
} }
testService = EINTaxIDService()
print(dt.now())
prt(testService.search_ein(SAMPLE_EIN))

@ -0,0 +1,31 @@
import EINService as es
import pandas as pd
data = pd.read_excel("ExampleCSP.xlsx")
einService = es.EINTaxIDService()
scores = []
for _, deal in data.iterrows():
try:
leafEIN = es.EINData(
str(deal["Lessee Tax-ID"]),
deal["NAME"],
deal["ADDRESS"],
deal["CITY"],
deal["STATE"],
str(deal["ZIP"]),
str(deal["PHONE"]),
)
except:
print(f"Failed to create EINData (LEAF): {deal['Lessee Tax-ID']}")
continue
try:
external = einService.search_ein(leafEIN.get_ein())
if external == None: continue
except:
print(f"Failed to create EINData (EXTERNAL): {deal['Lessee Tax-ID']}")
continue
scores.append(leafEIN.compare(external))
print(f"Scores:\n{scores}")
Loading…
Cancel
Save