Added example files and logging (replaces prints)

master
Griffiths Lott 3 years ago
parent 2b6c71598b
commit 1488442612
  1. 29
      EINService.py
  2. 14
      README.md
  3. 15
      basic_example.py
  4. 36
      example_from_excel.py
  5. 31
      test.py

@ -3,13 +3,13 @@ from bs4 import BeautifulSoup as bsp
import re
from abc import ABC, abstractmethod
from dataclasses import dataclass
from datetime import datetime as dt
from pprint import pprint as prt
import pandas as pd
from logging import warning, debug, error
from pandas import DataFrame
SAMPLE_EIN = "59-1571026"
@dataclass
class EINData:
"""
@ -64,9 +64,10 @@ class EINData:
"zip" : True if self.zip == otherEIN.zip else False
}
except Exception as e:
print(f"""Exception:\n{e}\nSelf:{self}\nOther: {otherEIN}\n""")
error(f"""Exception:\n{e}\nSelf:{self}\nOther: {otherEIN}\n""")
# If we cannot succesfully compare the data return None
return None
debug(compareDict)
score = 0
for v in compareDict.values():
# increase score by one for every value that is true
@ -75,7 +76,7 @@ class EINData:
return compareDict
def dataframe_to_eins(df: pd.DataFrame,
def dataframe_to_eins(df: DataFrame,
einLabel: str = "Lessee Tax-ID",
nameLabel: str = "NAME",
addressLabel: str = "ADDRESS",
@ -90,6 +91,7 @@ def dataframe_to_eins(df: pd.DataFrame,
Requires the dataframe contains ceratain data labels.
"""
# Confirm all correct columns exist
debug(df)
try:
columns = df.columns
assert (einLabel in columns), f"EIN label not present: {einLabel} | {columns}"
@ -100,13 +102,14 @@ def dataframe_to_eins(df: pd.DataFrame,
assert (zipLabel in columns), f"EIN label not present: {zipLabel} | {columns}"
assert (phoneLabel in columns), f"EIN label not present: {phoneLabel} | {columns}"
except Exception as e:
print(f"NOT ALL REQUIRED COLUMNS PRESENT!\n{e}\n{df}")
error(f"NOT ALL REQUIRED COLUMNS PRESENT!\n{e}\n{df}")
eins = []
for _, data in df.iterrows():
# _ is the row index which is not used
# The rest is a pandas serices
# The column labels are used to pull the data from the series
debug(data)
try:
eins.append(EINData(
str(data[einLabel]),
@ -119,7 +122,7 @@ def dataframe_to_eins(df: pd.DataFrame,
))
except Exception as e:
# If we fail, we port the error and move on to the next item in dataframe
print(f"Could not add {einLabel}!")
warning(f"Could not add {einLabel}!")
continue
class __EINService(ABC):
@ -161,26 +164,29 @@ class EINTaxIDService(__EINService):
_url = "https://eintaxid.com"
def search_ein(self, ein: str) -> EINData:
debug(ein)
try:
# We don't even want to attempt this unless we're using a valid EIN
if not self._isEIN(ein):
print(f"{ein} is not a valid EIN!\nValid formats are: XX-XXXXXXX and XXXXXXXXX")
warning(f"{ein} is not a valid EIN!\nValid formats are: XX-XXXXXXX and XXXXXXXXX")
raise Exception("Invalid EIN")
except Exception as e:
print(e)
warning(f"{e} | {ein}")
return None
# Send a POST HTTP request to the site using the search-ajax script
# query just needs to include EIN, XML header required for parsing
req = rq.request("POST",self._url + "/search-ajax.php", data={"query": ein}, \
headers={'X-Requested-With': 'XMLHttpRequest'})
debug(req)
# Use BeautifulSoup to parse the HTML content
soup = bsp(req.content, "html.parser")
debug(soup)
try:
# The actual data return is always on line 4. The rest are DIV set up
text = soup.text.splitlines()[4]
except:
print(f"Failed: {ein} | {soup}")
return None
warning(f"Failed: {ein} | {soup}")
return EINData(ein, None,None,None,None,None,None)
data = self._parse_return(text)
return EINData(
@ -207,6 +213,7 @@ class EINTaxIDService(__EINService):
Data that cannot be found will be returned as None
"""
debug(f"EIN Service returned content:\n{content}")
m = re.search("EIN Number:", content)
company = content[0:m.start()].strip()

@ -0,0 +1,14 @@
# Usage Guide Available
For information on how to set up and utilize this library please view: *https://git.glott.me/LEAF/EINService/wiki/Usage-Guide*
# Information:
- Currently only EINTaxIDService is available. This uses https://eintaxid.com to fetch information about a company.
- The EINData object utilized by this library consists of the following data: EIN, Company Name, Address, City, State, Zip Code, Phone
# Upcoming functionality
Future versions will hopefully provide the following capabilities:
- Option to use other EIN search services
- Automatically format EINData lists into dataframes
- Compare 'EINData dataframes' adding the compare dict as an extra column
A GUI application is also planned for release, but will be located in a seperate repo

@ -0,0 +1,15 @@
from EINService import EINTaxIDService
# Instatiate an EINService object
# This is what will be used to do all of our searches
einService = EINTaxIDService()
# Advanced Micro Devices Inc's EIN identifier:
# This is the ein we will be searching.
AMD_EIN = "94-1692300"
# The return will be an EINData object
# If the search was unsuccessful data members other than EIN will be None
searchResult = einService.search_ein(AMD_EIN)
print(searchResult)
# EIN: 941692300 | Name: advanced micro devices inc | Address: 2485 augustine drive | City: santa clara | State: ca | Phone: 408 7494000

@ -0,0 +1,36 @@
from EINService import EINTaxIDService, dataframe_to_eins
import pandas as pd
# Instatiate an EINService object
# This is what will be used to do all of our searches
einService = EINTaxIDService()
# Here we pull in the data from excel
einData = pd.read_excel("SampleData.xlsx")
# Extract the eins column as a list of strings
einList = einData["Lessee Tax-ID"].to_list()
# The service will return a list EINData objects
# if no match what found all data members besides ein will be None
searchResults = einService.search_eins(einList)
print(searchResults)
# Can also convert a dataframe into a list of EINData
# The requires that our dataframe has all of the nessary columns.
# The defaults for these columns are:
# "Lessee Tax-ID",
# "NAME",
# "ADDRESS"
# "CITY"
# "STATE"
# "ZIP"
# "PHONE"
#
# You can also specify your own column names as paramaters.
einDataList = dataframe_to_eins(einData)
print(einData)
# This allows us to compare our search results to our 'local data'
for i, localData in enumerate(einDataList):
comparisonDict = localData.compare(searchResults[i])
print(comparisonDict)

@ -1,31 +0,0 @@
import EINService as es
import pandas as pd
data = pd.read_excel("ExampleCSP.xlsx")
einService = es.EINTaxIDService()
scores = []
for _, deal in data.iterrows():
try:
leafEIN = es.EINData(
str(deal["Lessee Tax-ID"]),
deal["NAME"],
deal["ADDRESS"],
deal["CITY"],
deal["STATE"],
str(deal["ZIP"]),
str(deal["PHONE"]),
)
except:
print(f"Failed to create EINData (LEAF): {deal['Lessee Tax-ID']}")
continue
try:
external = einService.search_ein(leafEIN.get_ein())
if external == None: continue
except:
print(f"Failed to create EINData (EXTERNAL): {deal['Lessee Tax-ID']}")
continue
scores.append(leafEIN.compare(external))
print(f"Scores:\n{scores}")
Loading…
Cancel
Save