You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
EINService/EINService.py

138 lines
4.3 KiB

import requests as rq
from bs4 import BeautifulSoup as bsp
import re
from abc import ABC, abstractmethod
from dataclasses import dataclass
from datetime import datetime as dt
from pprint import pprint as prt
SAMPLE_EIN = "59-1571026"
@dataclass
class EINData:
def __init__(self, ein: str, buinessName: str, address1: str, city:str, state:str, zip: str, phone: str) -> None:
if re.search("\d{2}(-|)\d{7}", str(ein)) == None:
try: raise Exception(f"Invalid EIN: {ein}")
except Exception as e:
print(e)
return None
self.ein = ein.strip().replace('-','')
self.buinessName = buinessName.lower().strip()
self.address1 = address1.lower().strip()
self.city = city.lower().strip()
self.state = state.lower().strip()
self.phone = phone.strip().replace('-','').replace('(',"").replace(')','').replace('+','')
try:
self.zip = int(zip.replace('-','').strip())
except:
print(f"Invalid ZIP code: {zip}")
def get_ein(self) -> str:
return f"{self.ein[0:2]}-{self.ein[2:]}"
def compare(self, otherEIN: 'EINData') -> dict:
compareDict = {
"buinessName" : True if self.buinessName == otherEIN.businessName else False,
"address" : True if self.address1 == otherEIN.address1 else False,
"city": True if self.city == otherEIN.city else False,
"state": True if self.state == otherEIN.state else False,
"zip" : True if self.zip == otherEIN.zip else False
}
score = 0
for v in compareDict.values():
score += 1 if v else 0
compareDict["score"] = score
return compareDict
class EINService(ABC):
@classmethod
@abstractmethod
def search_ein(self, ein: str) -> EINData:
"""
Takes a an ein and returns information from a search using the service
"""
@classmethod
@abstractmethod
def search_eins(self, eins: list[EINData]):
"""
"""
@classmethod
def _isEIN(self, ein: int) -> bool:
return re.search("\d{2}(-|)\d{7}", str(ein)) != None
class EINTaxIDService(EINService):
_url = "https://eintaxid.com"
_htmlSearchInputID = "searchterm"
def search_ein(self, ein: str) -> EINResult:
try:
if not self._isEIN(ein):
print(f"{ein} is not a valid EIN!\nValid formats are: XX-XXXXXXX and XXXXXXXXX")
raise Exception("Invalid EIN")
except Exception as e:
print(e)
return None
req = rq.request("POST",self._url + "/search-ajax.php", data={"query": ein}, \
headers={'X-Requested-With': 'XMLHttpRequest'})
soup = bsp(req.content, "html.parser")
text = soup.text.splitlines()[4]
data = self._parse_return(text)
return EINData(
ein,
data["company"],
data["address1"],
data["city"],
data["state"],
data["zip"],
data["phone"]
)
def search_eins(eins: list[EINData]):
pass
def _parse_return(self, content: str) -> EINData:
m = re.search("EIN Number:", content)
company = content[0:m.start()].strip()
m = re.search("Doing Business As:.*Address:", content)
dba = content[m.start()+18:m.end()-8].strip() if m != None else None
m = re.search("Address:.*Phone:", content)
address = content[m.start()+8:m.end()-6].strip() if m != None else None
if address != None:
addressPieces = address.split(',')
address1 = addressPieces[0].strip()
city = addressPieces[1].strip()
state = addressPieces[2].strip().split(" ")[0].strip()
zip = addressPieces[2].strip().split(" ")[1].strip()
else:
address1 = None
city = None
state, zip = None
phone = content[m.end():].strip() if m != None else None
return {
"company": company,
"dba": dba,
"address": address,
"address1": address1,
"city": city,
"state": state,
"zip": zip,
"phone": phone
}
testService = EINTaxIDService()
print(dt.now())
prt(testService.search_ein(SAMPLE_EIN))