You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
EINService/EINService.py

139 lines
4.6 KiB

import requests as rq
from bs4 import BeautifulSoup as bsp
import re
from abc import ABC, abstractmethod
from dataclasses import dataclass
from datetime import datetime as dt
from pprint import pprint as prt
import pandas as pd
SAMPLE_EIN = "59-1571026"
@dataclass
class EINData:
def __init__(self, ein: str, buinessName: str, address1: str, city:str, state:str, zip: str, phone: str) -> None:
if re.search("\d{2}(-|)\d{7}", str(ein)) == None:
raise Exception(f"Invalid EIN: {ein}")
self.ein = ein.strip().replace('-','')
self.buinessName = buinessName.lower().strip()
self.address1 = address1.lower().strip()
self.city = city.lower().strip()
self.state = state.lower().strip()
self.phone = phone.strip().replace('-','').replace('(',"").replace(')','').replace('+','')
self.zip = int(zip.replace('-','').strip())
def __str__(self) -> str:
return f"""EIN: {self.ein}\t | Name: {self.buinessName}\t\t| Address: {self.address1}\t\t| City: {self.city}\t| State: {self.state}\t| Phone: {self.phone}"""
def get_ein(self) -> str:
return f"{self.ein[0:2]}-{self.ein[2:]}"
def compare(self, otherEIN: 'EINData') -> dict:
try:
compareDict = {
"buinessName" : True if self.buinessName == otherEIN.buinessName else False,
"address" : True if self.address1 == otherEIN.address1 else False,
"city": True if self.city == otherEIN.city else False,
"state": True if self.state == otherEIN.state else False,
"zip" : True if self.zip == otherEIN.zip else False
}
except Exception as e:
print(f"""Exception:\n{e}\nSelf:{self}\nOther: {otherEIN}\n""")
return None
score = 0
for v in compareDict.values():
score += 1 if v else 0
compareDict["score"] = score
return compareDict
class EINService(ABC):
@classmethod
@abstractmethod
def search_ein(self, ein: str) -> EINData:
"""
Takes a an ein and returns information from a search using the service
"""
@classmethod
@abstractmethod
def search_eins(self, eins: list[EINData]):
"""
"""
@classmethod
def _isEIN(self, ein: str) -> bool:
return re.search("\d{2}(-|)\d{7}", ein) != None
class EINTaxIDService(EINService):
_url = "https://eintaxid.com"
def search_ein(self, ein: str) -> EINData:
try:
if not self._isEIN(ein):
print(f"{ein} is not a valid EIN!\nValid formats are: XX-XXXXXXX and XXXXXXXXX")
raise Exception("Invalid EIN")
except Exception as e:
print(e)
return None
req = rq.request("POST",self._url + "/search-ajax.php", data={"query": ein}, \
headers={'X-Requested-With': 'XMLHttpRequest'})
soup = bsp(req.content, "html.parser")
try:
text = soup.text.splitlines()[4]
except:
print(f"Failed: {ein} | {soup}")
return None
data = self._parse_return(text)
return EINData(
ein,
data["company"],
data["address1"],
data["city"],
data["state"],
data["zip"],
data["phone"]
)
def search_eins(self, eins: str) -> list[EINData]:
return [self.search_ein(ein) for ein in eins]
def _parse_return(self, content: str) -> dict:
m = re.search("EIN Number:", content)
company = content[0:m.start()].strip()
m = re.search("Doing Business As:.*Address:", content)
dba = content[m.start()+18:m.end()-8].strip() if m != None else None
m = re.search("Address:.*Phone:", content)
address = content[m.start()+8:m.end()-6].strip() if m != None else None
if address != None:
addressPieces = address.split(',')
address1 = addressPieces[0].strip()
city = addressPieces[1].strip()
state = addressPieces[2].strip().split(" ")[0].strip()
zip = addressPieces[2].strip().split(" ")[1].strip()
else:
address1 = None
city = None
state, zip = None
phone = content[m.end():].strip() if m != None else None
return {
"company": company,
"dba": dba,
"address": address,
"address1": address1,
"city": city,
"state": state,
"zip": zip,
"phone": phone
}