From 14884426121e3e54b5bf7d378429455d3ecdd00e Mon Sep 17 00:00:00 2001
From: Griffiths Lott <g@glott.me>
Date: Sun, 18 Dec 2022 12:58:31 -0500
Subject: [PATCH] Added example files and logging (replaces prints)

---
 EINService.py         | 29 ++++++++++++++++++-----------
 README.md             | 14 ++++++++++++++
 basic_example.py      | 15 +++++++++++++++
 example_from_excel.py | 36 ++++++++++++++++++++++++++++++++++++
 test.py               | 31 -------------------------------
 5 files changed, 83 insertions(+), 42 deletions(-)
 create mode 100644 README.md
 create mode 100644 basic_example.py
 create mode 100644 example_from_excel.py
 delete mode 100644 test.py

diff --git a/EINService.py b/EINService.py
index 951ca43..25f2a4b 100644
--- a/EINService.py
+++ b/EINService.py
@@ -3,13 +3,13 @@ from bs4 import BeautifulSoup as bsp
 import re
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from datetime import datetime as dt
-from pprint import pprint as prt
-import pandas as pd
+from logging import warning, debug, error
+from pandas import DataFrame
 
 
 SAMPLE_EIN = "59-1571026"
 
+
 @dataclass
 class EINData:
     """
@@ -64,9 +64,10 @@ class EINData:
                 "zip" : True if self.zip == otherEIN.zip else False
             }
         except Exception as e:
-            print(f"""Exception:\n{e}\nSelf:{self}\nOther: {otherEIN}\n""")
+            error(f"""Exception:\n{e}\nSelf:{self}\nOther: {otherEIN}\n""")
             # If we cannot succesfully compare the data return None
             return None
+        debug(compareDict)
         score = 0
         for v in compareDict.values():
             # increase score by one for every value that is true
@@ -75,7 +76,7 @@ class EINData:
         return compareDict
 
 
-def dataframe_to_eins(df: pd.DataFrame, 
+def dataframe_to_eins(df: DataFrame, 
     einLabel: str = "Lessee Tax-ID",
     nameLabel: str = "NAME",
     addressLabel: str = "ADDRESS",
@@ -90,6 +91,7 @@ def dataframe_to_eins(df: pd.DataFrame,
     Requires the dataframe contains ceratain data labels.
     """
     # Confirm all correct columns exist
+    debug(df)
     try:
         columns = df.columns
         assert (einLabel in columns), f"EIN label not present: {einLabel} | {columns}" 
@@ -100,13 +102,14 @@ def dataframe_to_eins(df: pd.DataFrame,
         assert (zipLabel in columns), f"EIN label not present: {zipLabel} | {columns}" 
         assert (phoneLabel in columns), f"EIN label not present: {phoneLabel} | {columns}"         
     except Exception as e:
-        print(f"NOT ALL REQUIRED COLUMNS PRESENT!\n{e}\n{df}")
+        error(f"NOT ALL REQUIRED COLUMNS PRESENT!\n{e}\n{df}")
     
     eins = []
     for _, data in df.iterrows():
         # _ is the row index which is not used
         # The rest is a pandas serices
         # The column labels are used to pull the data from the series
+        debug(data)
         try:
             eins.append(EINData(
                 str(data[einLabel]),
@@ -119,7 +122,7 @@ def dataframe_to_eins(df: pd.DataFrame,
             ))
         except Exception as e:
             # If we fail, we port the error and move on to the next item in dataframe
-            print(f"Could not add {einLabel}!")
+            warning(f"Could not add {einLabel}!")
             continue
     
 class __EINService(ABC):
@@ -161,26 +164,29 @@ class EINTaxIDService(__EINService):
     _url = "https://eintaxid.com"
 
     def search_ein(self, ein: str) -> EINData:
+        debug(ein)
         try:
             # We don't even want to attempt this unless we're using a valid EIN
             if not self._isEIN(ein):
-                print(f"{ein} is not a valid EIN!\nValid formats are: XX-XXXXXXX and XXXXXXXXX")
+                warning(f"{ein} is not a valid EIN!\nValid formats are: XX-XXXXXXX and XXXXXXXXX")
                 raise Exception("Invalid EIN")
         except Exception as e:
-            print(e)
+            warning(f"{e} | {ein}")
             return None
         # Send a POST HTTP request to the site using the search-ajax script
         # query just needs to include EIN, XML header required for parsing
         req = rq.request("POST",self._url + "/search-ajax.php", data={"query": ein}, \
             headers={'X-Requested-With': 'XMLHttpRequest'})
+        debug(req)
         # Use BeautifulSoup to parse the HTML content
         soup = bsp(req.content, "html.parser")
+        debug(soup)
         try:
             # The actual data return is always on line 4. The rest are DIV set up
             text = soup.text.splitlines()[4]
         except:
-            print(f"Failed: {ein} | {soup}")
-            return None
+            warning(f"Failed: {ein} | {soup}")
+            return EINData(ein, None,None,None,None,None,None)
         data = self._parse_return(text)
 
         return EINData(
@@ -207,6 +213,7 @@ class EINTaxIDService(__EINService):
 
         Data that cannot be found will be returned as None
         """
+        debug(f"EIN Service returned content:\n{content}")
         m = re.search("EIN Number:", content)
         company = content[0:m.start()].strip()
 
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..f475f3d
--- /dev/null
+++ b/README.md
@@ -0,0 +1,14 @@
+# Usage Guide Available
+For information on how to set up and utilize this library please view: *https://git.glott.me/LEAF/EINService/wiki/Usage-Guide*
+
+# Information:
+- Currently only EINTaxIDService is available. This uses https://eintaxid.com to fetch information about a company.
+- The EINData object utilized by this library consists of the following data: EIN, Company Name, Address, City, State, Zip Code, Phone
+
+# Upcoming functionality
+Future versions will hopefully provide the following capabilities:
+- Option to use other EIN search services
+- Automatically format EINData lists into dataframes
+- Compare 'EINData dataframes' adding the compare dict as an extra column
+
+A GUI application is also planned for release, but will be located in a seperate repo
\ No newline at end of file
diff --git a/basic_example.py b/basic_example.py
new file mode 100644
index 0000000..2210afc
--- /dev/null
+++ b/basic_example.py
@@ -0,0 +1,15 @@
+from EINService import EINTaxIDService
+
+# Instatiate an EINService object
+# This is what will be used to do all of our searches
+einService = EINTaxIDService()
+
+# Advanced Micro Devices Inc's EIN identifier:
+# This is the ein we will be searching.
+AMD_EIN = "94-1692300"
+# The return will be an EINData object
+# If the search was unsuccessful data members other than EIN will be None 
+searchResult = einService.search_ein(AMD_EIN)
+
+print(searchResult)
+# EIN: 941692300   | Name: advanced micro devices inc             | Address: 2485 augustine drive         | City: santa clara     | State: ca     | Phone: 408 7494000
\ No newline at end of file
diff --git a/example_from_excel.py b/example_from_excel.py
new file mode 100644
index 0000000..71e0602
--- /dev/null
+++ b/example_from_excel.py
@@ -0,0 +1,36 @@
+from EINService import EINTaxIDService, dataframe_to_eins
+import pandas as pd
+
+# Instatiate an EINService object
+# This is what will be used to do all of our searches
+einService = EINTaxIDService()
+
+# Here we pull in the data from excel
+einData = pd.read_excel("SampleData.xlsx")
+# Extract the eins column as a list of strings
+einList = einData["Lessee Tax-ID"].to_list()
+# The service will return a list EINData objects
+# if no match what found all data members besides ein will be None
+searchResults = einService.search_eins(einList)
+
+print(searchResults)
+
+# Can also convert a dataframe into a list of EINData
+# The requires that our dataframe has all of the nessary columns.
+# The defaults for these columns are:
+#   "Lessee Tax-ID",
+#   "NAME",
+#   "ADDRESS"
+#   "CITY"
+#   "STATE"
+#   "ZIP"
+#   "PHONE"
+# 
+# You can also specify your own column names as paramaters.
+einDataList = dataframe_to_eins(einData)
+print(einData)
+
+# This allows us to compare our search results to our 'local data'
+for i, localData in enumerate(einDataList):
+    comparisonDict = localData.compare(searchResults[i])
+    print(comparisonDict)
\ No newline at end of file
diff --git a/test.py b/test.py
deleted file mode 100644
index 41139b0..0000000
--- a/test.py
+++ /dev/null
@@ -1,31 +0,0 @@
-import EINService as es
-import pandas as pd
-
-data = pd.read_excel("ExampleCSP.xlsx")
-einService = es.EINTaxIDService()
-
-scores = []
-for _, deal in data.iterrows():
-    try:
-        leafEIN = es.EINData(
-            str(deal["Lessee Tax-ID"]),
-            deal["NAME"],
-            deal["ADDRESS"],
-            deal["CITY"],
-            deal["STATE"],
-            str(deal["ZIP"]),
-            str(deal["PHONE"]),
-            )
-    except:
-        print(f"Failed to create EINData (LEAF): {deal['Lessee Tax-ID']}")
-        continue
-
-    try:
-        external = einService.search_ein(leafEIN.get_ein())
-        if external == None: continue
-    except:
-        print(f"Failed to create EINData (EXTERNAL): {deal['Lessee Tax-ID']}")
-        continue
-    scores.append(leafEIN.compare(external))
-
-print(f"Scores:\n{scores}")
\ No newline at end of file