import requests as rq from bs4 import BeautifulSoup as bsp import re from abc import ABC, abstractmethod from dataclasses import dataclass from datetime import datetime as dt from pprint import pprint as prt SAMPLE_EIN = "59-1571026" @dataclass class EINData: def __init__(self, ein: str, buinessName: str, address1: str, city:str, state:str, zip: str, phone: str) -> None: if re.search("\d{2}(-|)\d{7}", str(ein)) == None: try: raise Exception(f"Invalid EIN: {ein}") except Exception as e: print(e) return None self.ein = ein.strip().replace('-','') self.buinessName = buinessName.lower().strip() self.address1 = address1.lower().strip() self.city = city.lower().strip() self.state = state.lower().strip() self.phone = phone.strip().replace('-','').replace('(',"").replace(')','').replace('+','') try: self.zip = int(zip.replace('-','').strip()) except: print(f"Invalid ZIP code: {zip}") def get_ein(self) -> str: return f"{self.ein[0:2]}-{self.ein[2:]}" def compare(self, otherEIN: 'EINData') -> dict: compareDict = { "buinessName" : True if self.buinessName == otherEIN.businessName else False, "address" : True if self.address1 == otherEIN.address1 else False, "city": True if self.city == otherEIN.city else False, "state": True if self.state == otherEIN.state else False, "zip" : True if self.zip == otherEIN.zip else False } score = 0 for v in compareDict.values(): score += 1 if v else 0 compareDict["score"] = score return compareDict class EINService(ABC): @classmethod @abstractmethod def search_ein(self, ein: str) -> EINData: """ Takes a an ein and returns information from a search using the service """ @classmethod @abstractmethod def search_eins(self, eins: list[EINData]): """ """ @classmethod def _isEIN(self, ein: int) -> bool: return re.search("\d{2}(-|)\d{7}", str(ein)) != None class EINTaxIDService(EINService): _url = "https://eintaxid.com" _htmlSearchInputID = "searchterm" def search_ein(self, ein: str) -> EINResult: try: if not self._isEIN(ein): print(f"{ein} is not a valid EIN!\nValid formats are: XX-XXXXXXX and XXXXXXXXX") raise Exception("Invalid EIN") except Exception as e: print(e) return None req = rq.request("POST",self._url + "/search-ajax.php", data={"query": ein}, \ headers={'X-Requested-With': 'XMLHttpRequest'}) soup = bsp(req.content, "html.parser") text = soup.text.splitlines()[4] data = self._parse_return(text) return EINData( ein, data["company"], data["address1"], data["city"], data["state"], data["zip"], data["phone"] ) def search_eins(eins: list[EINData]): pass def _parse_return(self, content: str) -> EINData: m = re.search("EIN Number:", content) company = content[0:m.start()].strip() m = re.search("Doing Business As:.*Address:", content) dba = content[m.start()+18:m.end()-8].strip() if m != None else None m = re.search("Address:.*Phone:", content) address = content[m.start()+8:m.end()-6].strip() if m != None else None if address != None: addressPieces = address.split(',') address1 = addressPieces[0].strip() city = addressPieces[1].strip() state = addressPieces[2].strip().split(" ")[0].strip() zip = addressPieces[2].strip().split(" ")[1].strip() else: address1 = None city = None state, zip = None phone = content[m.end():].strip() if m != None else None return { "company": company, "dba": dba, "address": address, "address1": address1, "city": city, "state": state, "zip": zip, "phone": phone } testService = EINTaxIDService() print(dt.now()) prt(testService.search_ein(SAMPLE_EIN))