Reworked how consolidated reports are created. Uses new relative path

from IL file. Uses row hashing with extract date to avoid dups. Switched to TOML for settings instead of JSON.
3 years ago · 1bb12c722a
parent 5caaf3d7ac
commit 1bb12c722a
5 changed files with 126 additions and 52 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,7 +3,9 @@ venv/
 dist/
 InputFiles/
 __pycache__/
+2023/

+*.lnk
 *.spec
 *.log
 *.xlsx
--- a/settings.json
+++ b/settings.json
@ -1 +1 @@
-{"debug": true, "consolidatedBasePath": ".", "defaultLocations": {"ach": "Z:/Business Solutions/Griff/Code/InfoLeaseExtract/InputFiles", "disp": "", "gl": "", "lb": "Z:/Business Solutions/Griff/Code/InfoLeaseExtract/InputFiles", "minv": "", "niv": "", "ren": "", "pymt": "Z:/Business Solutions/Griff/Code/InfoLeaseExtract/InputFiles", "uap": "", "pastdue": ""}}
+{"debug": true, "consolidatedBasePath": ".", "defaultLocations": {"ach": "Z:/shared/Business Solutions/Griff/Code/InfoLeaseExtract/2023/2023.05/2023.05.24/ACH", "disp": "", "gl": "", "lb": "Z:/Business Solutions/Griff/Code/InfoLeaseExtract/InputFiles", "minv": "", "niv": "", "ren": "", "pymt": "Z:/Business Solutions/Griff/Code/InfoLeaseExtract/InputFiles", "uap": "", "pastdue": ""}}
--- a/settings.toml
+++ b/settings.toml
@ -0,0 +1,14 @@
+debug = true
+consolidatedBasePath = '\\leafnow.com\shared\Accounting\CASH APPS\2023'
+
+[defaultLocations]
+ach = ''
+disp = ''
+gl = ''
+lb = ''
+minv =''
+niv = ''
+ren = ''
+pymt = ''
+uap =''
+pastdue = ''
--- a/src/il_extract.py
+++ b/src/il_extract.py
@ -7,14 +7,15 @@ from PyQt5 import QtWidgets
 from datetime import datetime as dt
 import il_reports as ilx #TODO redo aliasing
 from logging import debug, DEBUG, basicConfig
+from tomllib import load
+from tomli_w import dump

-
-with open("settings.json") as s:
-    settings = json.loads(s.read())
+with open("settings.toml", mode='rb') as s:
+    settings = load(s)
    #if settings["debug"]:
    basicConfig(filename='debug.log', mode='w', encoding='utf-8', level=DEBUG)

-debug("\n\n\n########################### VERSION = 3.2 ###########################\n\n\n")
+debug("\n\n\n########################### VERSION = 3.3 ###########################\n\n\n")
 debug("Running main.py...")
 class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow):
    """
@ -33,9 +34,9 @@ class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow):
        self.inputFile = ""
        # The location that the outputfile will be saved at
        self.outputFile = ""
-        # Load the settings.json
-        with open("settings.json") as s:
-            self.settings = json.loads(s.read())
+        # Load the settings.toml
+        with open("settings.toml") as s:
+            self.settings = load(s)
        
        # Set the current report type to ACH as default
        self.curReportType = "ach"
@ -220,9 +221,9 @@ class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow):
        if self.settings["defaultLocations"][self.curReportType] == '':
            self.settings["defaultLocations"][self.curReportType] = ('/').join(self.inputFile.split('/')[:-1])
            debug(f"checked_for_saved: saved new deafult location | {self.curReportType} | {self.settings['defaultLocations'][self.curReportType]}")
-        with open('settings.json', 'w') as s:
+        with open('settings.toml', 'w') as s:
            # Save changes to the setting
-            json.dump(self.settings, s)
+            dump(self.settings, s)

    def report_type_change(self):
        debug(f"Changing report type | Was: {self.curReportType} -> {self.reportTypeCB.currentText()}")
--- a/src/il_reports.py
+++ b/src/il_reports.py
@ -7,18 +7,40 @@ from pathlib import Path
 import numpy as np
 from glob import glob
 from logging import debug, DEBUG, basicConfig, warn, error
+from hashlib import md5
+import openpyxl as pxl
+from tomllib import load

-# V3.2 | 04/21/23
+# V332 | 05/24/23

-with open("settings.json") as s:
-    settings = json.loads(s.read())
+with open("settings.toml", mode='rb') as s:
+    settings = load(s)
    if settings["debug"]:
-        basicConfig(filename='debug.log', encoding='utf-8', level=DEBUG)
+        basicConfig(filename='debug.log', filemode='w',encoding='utf-8', 
+            format="%(asctime)s %(message)s",        
+            level=DEBUG)

 # contract numbers are a common feature in many reports to it's
 # useful to have the regex for them globally avaiable
 contract_number_regex = "\d{3}-\d{7}-\d{3}"

+
+def extract_date_path(path: Path) -> Path|None:
+    """
+    Used to get the month folder for a report
+    """
+    date_pattern = re.compile(r'^\d{4}\.\d{2}$')
+
+    for parent in path.parents:
+        if date_pattern.match(parent.name):
+            return parent
+    return None
+
+def hash_cols(row: pd.Series, cols_to_hash: list[str]) -> str:
+    md5_hash = md5()
+    md5_hash.update((''.join(str(row[col]) for col in cols_to_hash)).encode('utf-8'))
+    return md5_hash.hexdigest()
+
 class ILReport:
    """
    InfoLease Report class will be used to work with the files.
@ -57,7 +79,8 @@ class ILReport:
        self._append_to_consolidated_report(dataframe, settings["consolidatedBasePath"])
        return dataframe
    
-    def _append_to_consolidated_report(self, dataframe_to_append: DataFrame, base_path: str):
+    def _append_to_consolidated_report(self, dataframe_to_append: DataFrame,
+        reports_base_path: Path = None):
        """
        """
        # Decide the sheet name based on the save_location_name
@ -79,54 +102,88 @@ class ILReport:
        elif re.search("(?i)RETURNS_BER", self.location) != None:
            sheet_name = "RETURNS Portal"
        else:
+            debug(f"No consolidated report for {self.location}!")
            return None
+        il_report_path: Path = Path(self.location)
+        debug(f"{il_report_path=}")
+
+        month_dir: Path|None = extract_date_path(il_report_path)
+        if month_dir is None and reports_base_path is None:
+            warn(f"Consolidated report not created! No valid base path: {il_report_path} | {reports_base_path}")
+            return None
+        
+        report_date: str = dt.now().date() \
+            if re.search(r"^\d{4}\.\d{2}\.\d{2}$",il_report_path.parent.parent.name) is None\
+            else il_report_path.parent.parent.name.replace('.','/')
+        debug(f"{report_date=}")
+        report_month:str = il_report_path.parents[2].name.replace('.','-')
+        
+        if month_dir is None:
+            
+            if reports_base_path is None:
+                warn(f"Consolidated report not created! Could not find month folder: {il_report_path} | {reports_base_path}")
+                return None
+            else:
+                month_dir = reports_base_path

-        current_date: list(str) = dt.now().strftime("%Y.%m.%d").split('.')
-        report_name = f"{dt.now().strftime('%B')}_ConsolidatedReport.xlsx"
-        debug(f"Consolidated Reports {report_name} | {self.output_location} | {self.x_method} | {current_date}")
-        year = current_date[0]
-        month = current_date[1]
-        year_month = f"{year}.{month}"
-
-        save_path = f"{base_path}/{year}/{year_month}/{report_name}"
-        # Check if the current month has a consolidated report
-        month_summary_file: list(str) = glob(save_path)
-        if len(month_summary_file) == 0:
+            
+        
+        report_name: Path = Path(f"{report_month} ConsolidatedReport.xlsx")
+        debug(f"{month_dir=}")
+        debug(f"{report_name=}")
+        save_path = Path(month_dir, report_name)
+        debug(f"Consolidated Report {save_path=}")   
+        
+        
+        consolidated_df = dataframe_to_append.copy(deep=True)
+        consolidated_df["ExtractDate"] = report_date
+        consolidated_df.fillna('--', inplace=True)
+        consolidated_df["Hash"] = consolidated_df.apply(
+            lambda r: hash_cols(r,r.keys())
+        , axis=1
+        )
+        consolidated_df.replace("--", None, inplace=True)
+        debug(consolidated_df)
+
+        if not save_path.exists():
            debug(f"Consolidated Report | No monthly summary file!\n\tCreating: {save_path}")
            # No file exists yet
            # Create it and add the current month
            try:
                with pd.ExcelWriter(save_path) as writer:  
                    debug(f"Consolidated Report | {sheet_name}: Saving data as: {report_name}")
-                    dataframe_to_append.to_excel(writer, index=False, sheet_name=sheet_name) 
+                    consolidated_df.to_excel(writer, index=False, sheet_name=sheet_name) 
            except Exception as e:
-                    error(f"[E] Failed to create consolidated report! {sheet_name}:\n{e}")
+                    error(f"Failed to create to consolidated report! {report_name} | {sheet_name} | {il_report_path} :\n{e}")
                
        else:
+            debug(f"{save_path} already exisits.")
+            # Get the current worksheets
+            wb: pxl.Workbook = pxl.open(save_path, read_only=True)
+            current_sheets = wb.worksheets
+            wb.close()
+            with pd.ExcelWriter(save_path, engine='openpyxl', mode='a',if_sheet_exists="overlay") as writer:  
+
+                if sheet_name in [w.title for w in current_sheets]:
+                    debug(f"{sheet_name} sheet already exisits.")
                    # We need to read the dataframe in the current monthly report
-            # Check that we are not adding matching data
-            # Save the new report
-            #FIXME: This is so hacky it's embaressing
-            add_headers = False
+                    # Check that we are not adding duplicate data
                    try:
-                current_data: DataFrame = pd.read_excel(month_summary_file[0], sheet_name=sheet_name)
-                new_data_len = len(dataframe_to_append)
-                cur_first_col = current_data.iloc[len(current_data)-new_data_len:,0].to_list()
-                new_first_col = dataframe_to_append.iloc[:,0].to_list()
-                if cur_first_col == new_first_col:
-                    debug(f"Consolidated Report | Data is same as previous! Skipping!")
-                    return None
-            except ValueError as ve:
-                ve == ValueError(f"Worksheet named '{sheet_name} not found")
-                current_data = []
-                add_headers = True
-            # We need to find the start cols (where the new data should go)
-            try:
-                with pd.ExcelWriter(save_path, engine='openpyxl', mode='a',if_sheet_exists="overlay") as writer:  
+                        # Merge the current data and drop duplicates
+                        prev_data: DataFrame = pd.read_excel(save_path, sheet_name=sheet_name)
+                        debug(f"Prev:\n{prev_data}")
+                        consolidated_df = consolidated_df[~(consolidated_df["Hash"].isin(prev_data["Hash"]) )]
+                        debug(f"New data:\n{consolidated_df}")
                        debug(f"Consolidated Report | {sheet_name}: Saving data as: {report_name}")
-                    dataframe_to_append.to_excel(writer, index=False, sheet_name=sheet_name,startrow=len(current_data),header=add_headers)
+                        consolidated_df.to_excel(writer, index=False, sheet_name=sheet_name, header=False,
+                                                 startrow=prev_data.shape[0]+1)
                    except Exception as e:
-                error(f"[E] Failed to append to consolidated report! {sheet_name}:\n{e}")
+                            error(f"Failed to append to consolidated report! {report_name} | {sheet_name} | {il_report_path} :\n{e}")
+                else:
+                    consolidated_df.to_excel(writer, index=False, sheet_name=sheet_name)
+
+
+       
   

 def create_line_divider(breakage_list: list):