You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
101 lines
2.8 KiB
101 lines
2.8 KiB
import requests as rq
|
|
from bs4 import BeautifulSoup as bsp
|
|
import re
|
|
from abc import ABC, abstractmethod
|
|
from dataclasses import dataclass
|
|
from datetime import datetime as dt
|
|
from pprint import pprint as prt
|
|
|
|
|
|
SAMPLE_EIN = "59-1571026"
|
|
|
|
@dataclass
|
|
class EINResult:
|
|
found: bool
|
|
match: bool
|
|
businessName: str
|
|
address: str
|
|
city: str
|
|
state: zip
|
|
|
|
|
|
class EINService(ABC):
|
|
@classmethod
|
|
@abstractmethod
|
|
def search_ein(self, ein: str) -> EINResult:
|
|
"""
|
|
Takes a an ein and returns information from a search using the service
|
|
"""
|
|
|
|
@classmethod
|
|
@abstractmethod
|
|
def search_eins(self, eins: list[EINResult]):
|
|
"""
|
|
|
|
"""
|
|
|
|
@classmethod
|
|
def _isEIN(self, ein: int) -> bool:
|
|
return re.search("\d{2}(-|)\d{7}", str(ein)) != None
|
|
|
|
|
|
class EINTaxIDService(EINService):
|
|
_url = "https://eintaxid.com"
|
|
_htmlSearchInputID = "searchterm"
|
|
|
|
def search_ein(self, ein: str) -> EINResult:
|
|
try:
|
|
if not self._isEIN(ein):
|
|
print(f"{ein} is not a valid EIN!\nValid formats are: XX-XXXXXXX and XXXXXXXXX")
|
|
raise Exception("Invalid EIN")
|
|
except Exception as e:
|
|
print(e)
|
|
return None
|
|
|
|
req = rq.request("POST",self._url + "/search-ajax.php", data={"query": ein}, \
|
|
headers={'X-Requested-With': 'XMLHttpRequest'})
|
|
soup = bsp(req.content, "html.parser")
|
|
text = soup.text.splitlines()[4]
|
|
data = self._parse_return(text)
|
|
|
|
return data
|
|
|
|
def search_eins(eins: list[EINResult]):
|
|
pass
|
|
|
|
def _parse_return(self, content: str):
|
|
m = re.search("EIN Number:", content)
|
|
company = content[0:m.start()].strip()
|
|
|
|
m = re.search("Doing Business As:.*Address:", content)
|
|
dba = content[m.start()+18:m.end()-8].strip() if m != None else None
|
|
|
|
m = re.search("Address:.*Phone:", content)
|
|
address = content[m.start()+8:m.end()-6].strip() if m != None else None
|
|
if address != None:
|
|
addressPieces = address.split(',')
|
|
address1 = addressPieces[0].strip()
|
|
city = addressPieces[1].strip()
|
|
state = addressPieces[2].strip().split(" ")[0].strip()
|
|
zip = addressPieces[2].strip().split(" ")[1].strip()
|
|
else:
|
|
address1 = None
|
|
city = None
|
|
state, zip = None
|
|
|
|
phone = content[m.end():].strip() if m != None else None
|
|
return {
|
|
"company": company,
|
|
"dba": dba,
|
|
"address": address,
|
|
"address1": address1,
|
|
"city": city,
|
|
"state": state,
|
|
"zip": zip,
|
|
"phone": phone
|
|
}
|
|
|
|
|
|
testService = EINTaxIDService()
|
|
print(dt.now())
|
|
prt(testService.search_ein(SAMPLE_EIN)) |