diff --git a/.gitignore b/.gitignore index a979ee7..1e4f1b6 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ -/venv \ No newline at end of file +/venv +*.xlsx +/__pycache__ \ No newline at end of file diff --git a/EINService.py b/EINService.py index 2168775..6c8a8c7 100644 --- a/EINService.py +++ b/EINService.py @@ -5,6 +5,7 @@ from abc import ABC, abstractmethod from dataclasses import dataclass from datetime import datetime as dt from pprint import pprint as prt +import pandas as pd SAMPLE_EIN = "59-1571026" @@ -14,32 +15,34 @@ class EINData: def __init__(self, ein: str, buinessName: str, address1: str, city:str, state:str, zip: str, phone: str) -> None: if re.search("\d{2}(-|)\d{7}", str(ein)) == None: - try: raise Exception(f"Invalid EIN: {ein}") - except Exception as e: - print(e) - return None + raise Exception(f"Invalid EIN: {ein}") self.ein = ein.strip().replace('-','') self.buinessName = buinessName.lower().strip() self.address1 = address1.lower().strip() self.city = city.lower().strip() self.state = state.lower().strip() self.phone = phone.strip().replace('-','').replace('(',"").replace(')','').replace('+','') - try: - self.zip = int(zip.replace('-','').strip()) - except: - print(f"Invalid ZIP code: {zip}") + self.zip = int(zip.replace('-','').strip()) + + def __str__(self) -> str: + return f"""EIN: {self.ein}\t | Name: {self.buinessName}\t\t| Address: {self.address1}\t\t| City: {self.city}\t| State: {self.state}\t| Phone: {self.phone}""" + def get_ein(self) -> str: return f"{self.ein[0:2]}-{self.ein[2:]}" def compare(self, otherEIN: 'EINData') -> dict: - compareDict = { - "buinessName" : True if self.buinessName == otherEIN.businessName else False, - "address" : True if self.address1 == otherEIN.address1 else False, - "city": True if self.city == otherEIN.city else False, - "state": True if self.state == otherEIN.state else False, - "zip" : True if self.zip == otherEIN.zip else False - } + try: + compareDict = { + "buinessName" : True if self.buinessName == otherEIN.buinessName else False, + "address" : True if self.address1 == otherEIN.address1 else False, + "city": True if self.city == otherEIN.city else False, + "state": True if self.state == otherEIN.state else False, + "zip" : True if self.zip == otherEIN.zip else False + } + except Exception as e: + print(f"""Exception:\n{e}\nSelf:{self}\nOther: {otherEIN}\n""") + return None score = 0 for v in compareDict.values(): score += 1 if v else 0 @@ -47,7 +50,6 @@ class EINData: return compareDict - class EINService(ABC): @classmethod @abstractmethod @@ -64,15 +66,14 @@ class EINService(ABC): """ @classmethod - def _isEIN(self, ein: int) -> bool: - return re.search("\d{2}(-|)\d{7}", str(ein)) != None + def _isEIN(self, ein: str) -> bool: + return re.search("\d{2}(-|)\d{7}", ein) != None class EINTaxIDService(EINService): _url = "https://eintaxid.com" - _htmlSearchInputID = "searchterm" - def search_ein(self, ein: str) -> EINResult: + def search_ein(self, ein: str) -> EINData: try: if not self._isEIN(ein): print(f"{ein} is not a valid EIN!\nValid formats are: XX-XXXXXXX and XXXXXXXXX") @@ -84,7 +85,11 @@ class EINTaxIDService(EINService): req = rq.request("POST",self._url + "/search-ajax.php", data={"query": ein}, \ headers={'X-Requested-With': 'XMLHttpRequest'}) soup = bsp(req.content, "html.parser") - text = soup.text.splitlines()[4] + try: + text = soup.text.splitlines()[4] + except: + print(f"Failed: {ein} | {soup}") + return None data = self._parse_return(text) return EINData( @@ -97,10 +102,11 @@ class EINTaxIDService(EINService): data["phone"] ) - def search_eins(eins: list[EINData]): - pass + def search_eins(self, eins: str) -> list[EINData]: + return [self.search_ein(ein) for ein in eins] - def _parse_return(self, content: str) -> EINData: + + def _parse_return(self, content: str) -> dict: m = re.search("EIN Number:", content) company = content[0:m.start()].strip() @@ -130,9 +136,4 @@ class EINTaxIDService(EINService): "state": state, "zip": zip, "phone": phone - } - - -testService = EINTaxIDService() -print(dt.now()) -prt(testService.search_ein(SAMPLE_EIN)) \ No newline at end of file + } \ No newline at end of file diff --git a/test.py b/test.py new file mode 100644 index 0000000..41139b0 --- /dev/null +++ b/test.py @@ -0,0 +1,31 @@ +import EINService as es +import pandas as pd + +data = pd.read_excel("ExampleCSP.xlsx") +einService = es.EINTaxIDService() + +scores = [] +for _, deal in data.iterrows(): + try: + leafEIN = es.EINData( + str(deal["Lessee Tax-ID"]), + deal["NAME"], + deal["ADDRESS"], + deal["CITY"], + deal["STATE"], + str(deal["ZIP"]), + str(deal["PHONE"]), + ) + except: + print(f"Failed to create EINData (LEAF): {deal['Lessee Tax-ID']}") + continue + + try: + external = einService.search_ein(leafEIN.get_ein()) + if external == None: continue + except: + print(f"Failed to create EINData (EXTERNAL): {deal['Lessee Tax-ID']}") + continue + scores.append(leafEIN.compare(external)) + +print(f"Scores:\n{scores}") \ No newline at end of file