You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
138 lines
4.3 KiB
138 lines
4.3 KiB
import requests as rq
|
|
from bs4 import BeautifulSoup as bsp
|
|
import re
|
|
from abc import ABC, abstractmethod
|
|
from dataclasses import dataclass
|
|
from datetime import datetime as dt
|
|
from pprint import pprint as prt
|
|
|
|
|
|
SAMPLE_EIN = "59-1571026"
|
|
|
|
@dataclass
|
|
class EINData:
|
|
|
|
def __init__(self, ein: str, buinessName: str, address1: str, city:str, state:str, zip: str, phone: str) -> None:
|
|
if re.search("\d{2}(-|)\d{7}", str(ein)) == None:
|
|
try: raise Exception(f"Invalid EIN: {ein}")
|
|
except Exception as e:
|
|
print(e)
|
|
return None
|
|
self.ein = ein.strip().replace('-','')
|
|
self.buinessName = buinessName.lower().strip()
|
|
self.address1 = address1.lower().strip()
|
|
self.city = city.lower().strip()
|
|
self.state = state.lower().strip()
|
|
self.phone = phone.strip().replace('-','').replace('(',"").replace(')','').replace('+','')
|
|
try:
|
|
self.zip = int(zip.replace('-','').strip())
|
|
except:
|
|
print(f"Invalid ZIP code: {zip}")
|
|
|
|
def get_ein(self) -> str:
|
|
return f"{self.ein[0:2]}-{self.ein[2:]}"
|
|
|
|
def compare(self, otherEIN: 'EINData') -> dict:
|
|
compareDict = {
|
|
"buinessName" : True if self.buinessName == otherEIN.businessName else False,
|
|
"address" : True if self.address1 == otherEIN.address1 else False,
|
|
"city": True if self.city == otherEIN.city else False,
|
|
"state": True if self.state == otherEIN.state else False,
|
|
"zip" : True if self.zip == otherEIN.zip else False
|
|
}
|
|
score = 0
|
|
for v in compareDict.values():
|
|
score += 1 if v else 0
|
|
compareDict["score"] = score
|
|
return compareDict
|
|
|
|
|
|
|
|
class EINService(ABC):
|
|
@classmethod
|
|
@abstractmethod
|
|
def search_ein(self, ein: str) -> EINData:
|
|
"""
|
|
Takes a an ein and returns information from a search using the service
|
|
"""
|
|
|
|
@classmethod
|
|
@abstractmethod
|
|
def search_eins(self, eins: list[EINData]):
|
|
"""
|
|
|
|
"""
|
|
|
|
@classmethod
|
|
def _isEIN(self, ein: int) -> bool:
|
|
return re.search("\d{2}(-|)\d{7}", str(ein)) != None
|
|
|
|
|
|
class EINTaxIDService(EINService):
|
|
_url = "https://eintaxid.com"
|
|
_htmlSearchInputID = "searchterm"
|
|
|
|
def search_ein(self, ein: str) -> EINResult:
|
|
try:
|
|
if not self._isEIN(ein):
|
|
print(f"{ein} is not a valid EIN!\nValid formats are: XX-XXXXXXX and XXXXXXXXX")
|
|
raise Exception("Invalid EIN")
|
|
except Exception as e:
|
|
print(e)
|
|
return None
|
|
|
|
req = rq.request("POST",self._url + "/search-ajax.php", data={"query": ein}, \
|
|
headers={'X-Requested-With': 'XMLHttpRequest'})
|
|
soup = bsp(req.content, "html.parser")
|
|
text = soup.text.splitlines()[4]
|
|
data = self._parse_return(text)
|
|
|
|
return EINData(
|
|
ein,
|
|
data["company"],
|
|
data["address1"],
|
|
data["city"],
|
|
data["state"],
|
|
data["zip"],
|
|
data["phone"]
|
|
)
|
|
|
|
def search_eins(eins: list[EINData]):
|
|
pass
|
|
|
|
def _parse_return(self, content: str) -> EINData:
|
|
m = re.search("EIN Number:", content)
|
|
company = content[0:m.start()].strip()
|
|
|
|
m = re.search("Doing Business As:.*Address:", content)
|
|
dba = content[m.start()+18:m.end()-8].strip() if m != None else None
|
|
|
|
m = re.search("Address:.*Phone:", content)
|
|
address = content[m.start()+8:m.end()-6].strip() if m != None else None
|
|
if address != None:
|
|
addressPieces = address.split(',')
|
|
address1 = addressPieces[0].strip()
|
|
city = addressPieces[1].strip()
|
|
state = addressPieces[2].strip().split(" ")[0].strip()
|
|
zip = addressPieces[2].strip().split(" ")[1].strip()
|
|
else:
|
|
address1 = None
|
|
city = None
|
|
state, zip = None
|
|
|
|
phone = content[m.end():].strip() if m != None else None
|
|
return {
|
|
"company": company,
|
|
"dba": dba,
|
|
"address": address,
|
|
"address1": address1,
|
|
"city": city,
|
|
"state": state,
|
|
"zip": zip,
|
|
"phone": phone
|
|
}
|
|
|
|
|
|
testService = EINTaxIDService()
|
|
print(dt.now())
|
|
prt(testService.search_ein(SAMPLE_EIN)) |