From 369bc2fd1445e8bde8fad37f90015124d61802d3 Mon Sep 17 00:00:00 2001 From: Griffiths Lott Date: Tue, 13 Dec 2022 23:00:17 -0500 Subject: [PATCH] Inital commit | Basic working service for eintaxid.com. Only single search. Returns dict --- .gitignore | 1 + EINService.py | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 .gitignore create mode 100644 EINService.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a979ee7 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/venv \ No newline at end of file diff --git a/EINService.py b/EINService.py new file mode 100644 index 0000000..65c2817 --- /dev/null +++ b/EINService.py @@ -0,0 +1,101 @@ +import requests as rq +from bs4 import BeautifulSoup as bsp +import re +from abc import ABC, abstractmethod +from dataclasses import dataclass +from datetime import datetime as dt +from pprint import pprint as prt + + +SAMPLE_EIN = "59-1571026" + +@dataclass +class EINResult: + found: bool + match: bool + businessName: str + address: str + city: str + state: zip + + +class EINService(ABC): + @classmethod + @abstractmethod + def search_ein(self, ein: str) -> EINResult: + """ + Takes a an ein and returns information from a search using the service + """ + + @classmethod + @abstractmethod + def search_eins(self, eins: list[EINResult]): + """ + + """ + + @classmethod + def _isEIN(self, ein: int) -> bool: + return re.search("\d{2}(-|)\d{7}", str(ein)) != None + + +class EINTaxIDService(EINService): + _url = "https://eintaxid.com" + _htmlSearchInputID = "searchterm" + + def search_ein(self, ein: str) -> EINResult: + try: + if not self._isEIN(ein): + print(f"{ein} is not a valid EIN!\nValid formats are: XX-XXXXXXX and XXXXXXXXX") + raise Exception("Invalid EIN") + except Exception as e: + print(e) + return None + + req = rq.request("POST",self._url + "/search-ajax.php", data={"query": ein}, \ + headers={'X-Requested-With': 'XMLHttpRequest'}) + soup = bsp(req.content, "html.parser") + text = soup.text.splitlines()[4] + data = self._parse_return(text) + + return data + + def search_eins(eins: list[EINResult]): + pass + + def _parse_return(self, content: str): + m = re.search("EIN Number:", content) + company = content[0:m.start()].strip() + + m = re.search("Doing Business As:.*Address:", content) + dba = content[m.start()+18:m.end()-8].strip() if m != None else None + + m = re.search("Address:.*Phone:", content) + address = content[m.start()+8:m.end()-6].strip() if m != None else None + if address != None: + addressPieces = address.split(',') + address1 = addressPieces[0].strip() + city = addressPieces[1].strip() + state = addressPieces[2].strip().split(" ")[0].strip() + zip = addressPieces[2].strip().split(" ")[1].strip() + else: + address1 = None + city = None + state, zip = None + + phone = content[m.end():].strip() if m != None else None + return { + "company": company, + "dba": dba, + "address": address, + "address1": address1, + "city": city, + "state": state, + "zip": zip, + "phone": phone + } + + +testService = EINTaxIDService() +print(dt.now()) +prt(testService.search_ein(SAMPLE_EIN)) \ No newline at end of file