Implemented multi-ein searches. Confirmed EINData.compare() works. Test.py contains example of usage.

master
Griffiths Lott 3 years ago
parent f9748d9662
commit d698197d52
  1. 4
      .gitignore
  2. 61
      EINService.py
  3. 31
      test.py

4
.gitignore vendored

@ -1 +1,3 @@
/venv
/venv
*.xlsx
/__pycache__

@ -5,6 +5,7 @@ from abc import ABC, abstractmethod
from dataclasses import dataclass
from datetime import datetime as dt
from pprint import pprint as prt
import pandas as pd
SAMPLE_EIN = "59-1571026"
@ -14,32 +15,34 @@ class EINData:
def __init__(self, ein: str, buinessName: str, address1: str, city:str, state:str, zip: str, phone: str) -> None:
if re.search("\d{2}(-|)\d{7}", str(ein)) == None:
try: raise Exception(f"Invalid EIN: {ein}")
except Exception as e:
print(e)
return None
raise Exception(f"Invalid EIN: {ein}")
self.ein = ein.strip().replace('-','')
self.buinessName = buinessName.lower().strip()
self.address1 = address1.lower().strip()
self.city = city.lower().strip()
self.state = state.lower().strip()
self.phone = phone.strip().replace('-','').replace('(',"").replace(')','').replace('+','')
try:
self.zip = int(zip.replace('-','').strip())
except:
print(f"Invalid ZIP code: {zip}")
self.zip = int(zip.replace('-','').strip())
def __str__(self) -> str:
return f"""EIN: {self.ein}\t | Name: {self.buinessName}\t\t| Address: {self.address1}\t\t| City: {self.city}\t| State: {self.state}\t| Phone: {self.phone}"""
def get_ein(self) -> str:
return f"{self.ein[0:2]}-{self.ein[2:]}"
def compare(self, otherEIN: 'EINData') -> dict:
compareDict = {
"buinessName" : True if self.buinessName == otherEIN.businessName else False,
"address" : True if self.address1 == otherEIN.address1 else False,
"city": True if self.city == otherEIN.city else False,
"state": True if self.state == otherEIN.state else False,
"zip" : True if self.zip == otherEIN.zip else False
}
try:
compareDict = {
"buinessName" : True if self.buinessName == otherEIN.buinessName else False,
"address" : True if self.address1 == otherEIN.address1 else False,
"city": True if self.city == otherEIN.city else False,
"state": True if self.state == otherEIN.state else False,
"zip" : True if self.zip == otherEIN.zip else False
}
except Exception as e:
print(f"""Exception:\n{e}\nSelf:{self}\nOther: {otherEIN}\n""")
return None
score = 0
for v in compareDict.values():
score += 1 if v else 0
@ -47,7 +50,6 @@ class EINData:
return compareDict
class EINService(ABC):
@classmethod
@abstractmethod
@ -64,15 +66,14 @@ class EINService(ABC):
"""
@classmethod
def _isEIN(self, ein: int) -> bool:
return re.search("\d{2}(-|)\d{7}", str(ein)) != None
def _isEIN(self, ein: str) -> bool:
return re.search("\d{2}(-|)\d{7}", ein) != None
class EINTaxIDService(EINService):
_url = "https://eintaxid.com"
_htmlSearchInputID = "searchterm"
def search_ein(self, ein: str) -> EINResult:
def search_ein(self, ein: str) -> EINData:
try:
if not self._isEIN(ein):
print(f"{ein} is not a valid EIN!\nValid formats are: XX-XXXXXXX and XXXXXXXXX")
@ -84,7 +85,11 @@ class EINTaxIDService(EINService):
req = rq.request("POST",self._url + "/search-ajax.php", data={"query": ein}, \
headers={'X-Requested-With': 'XMLHttpRequest'})
soup = bsp(req.content, "html.parser")
text = soup.text.splitlines()[4]
try:
text = soup.text.splitlines()[4]
except:
print(f"Failed: {ein} | {soup}")
return None
data = self._parse_return(text)
return EINData(
@ -97,10 +102,11 @@ class EINTaxIDService(EINService):
data["phone"]
)
def search_eins(eins: list[EINData]):
pass
def search_eins(self, eins: str) -> list[EINData]:
return [self.search_ein(ein) for ein in eins]
def _parse_return(self, content: str) -> EINData:
def _parse_return(self, content: str) -> dict:
m = re.search("EIN Number:", content)
company = content[0:m.start()].strip()
@ -130,9 +136,4 @@ class EINTaxIDService(EINService):
"state": state,
"zip": zip,
"phone": phone
}
testService = EINTaxIDService()
print(dt.now())
prt(testService.search_ein(SAMPLE_EIN))
}

@ -0,0 +1,31 @@
import EINService as es
import pandas as pd
data = pd.read_excel("ExampleCSP.xlsx")
einService = es.EINTaxIDService()
scores = []
for _, deal in data.iterrows():
try:
leafEIN = es.EINData(
str(deal["Lessee Tax-ID"]),
deal["NAME"],
deal["ADDRESS"],
deal["CITY"],
deal["STATE"],
str(deal["ZIP"]),
str(deal["PHONE"]),
)
except:
print(f"Failed to create EINData (LEAF): {deal['Lessee Tax-ID']}")
continue
try:
external = einService.search_ein(leafEIN.get_ein())
if external == None: continue
except:
print(f"Failed to create EINData (EXTERNAL): {deal['Lessee Tax-ID']}")
continue
scores.append(leafEIN.compare(external))
print(f"Scores:\n{scores}")
Loading…
Cancel
Save