Reworked how consolidated reports are created. Uses new relative path

from IL file.
Uses row hashing with extract date to avoid dups.
Switched to TOML for settings instead of JSON.
dev
= 3 years ago
parent 5caaf3d7ac
commit 1bb12c722a
Signed by untrusted user who does not match committer: gprog
GPG Key ID: 5BE9BB58D37713F8
  1. 2
      .gitignore
  2. 2
      settings.json
  3. 14
      settings.toml
  4. 19
      src/il_extract.py
  5. 133
      src/il_reports.py

2
.gitignore vendored

@ -3,7 +3,9 @@ venv/
dist/ dist/
InputFiles/ InputFiles/
__pycache__/ __pycache__/
2023/
*.lnk
*.spec *.spec
*.log *.log
*.xlsx *.xlsx

@ -1 +1 @@
{"debug": true, "consolidatedBasePath": ".", "defaultLocations": {"ach": "Z:/Business Solutions/Griff/Code/InfoLeaseExtract/InputFiles", "disp": "", "gl": "", "lb": "Z:/Business Solutions/Griff/Code/InfoLeaseExtract/InputFiles", "minv": "", "niv": "", "ren": "", "pymt": "Z:/Business Solutions/Griff/Code/InfoLeaseExtract/InputFiles", "uap": "", "pastdue": ""}} {"debug": true, "consolidatedBasePath": ".", "defaultLocations": {"ach": "Z:/shared/Business Solutions/Griff/Code/InfoLeaseExtract/2023/2023.05/2023.05.24/ACH", "disp": "", "gl": "", "lb": "Z:/Business Solutions/Griff/Code/InfoLeaseExtract/InputFiles", "minv": "", "niv": "", "ren": "", "pymt": "Z:/Business Solutions/Griff/Code/InfoLeaseExtract/InputFiles", "uap": "", "pastdue": ""}}

@ -0,0 +1,14 @@
debug = true
consolidatedBasePath = '\\leafnow.com\shared\Accounting\CASH APPS\2023'
[defaultLocations]
ach = ''
disp = ''
gl = ''
lb = ''
minv =''
niv = ''
ren = ''
pymt = ''
uap =''
pastdue = ''

@ -7,14 +7,15 @@ from PyQt5 import QtWidgets
from datetime import datetime as dt from datetime import datetime as dt
import il_reports as ilx #TODO redo aliasing import il_reports as ilx #TODO redo aliasing
from logging import debug, DEBUG, basicConfig from logging import debug, DEBUG, basicConfig
from tomllib import load
from tomli_w import dump
with open("settings.toml", mode='rb') as s:
with open("settings.json") as s: settings = load(s)
settings = json.loads(s.read())
#if settings["debug"]: #if settings["debug"]:
basicConfig(filename='debug.log', mode='w', encoding='utf-8', level=DEBUG) basicConfig(filename='debug.log', mode='w', encoding='utf-8', level=DEBUG)
debug("\n\n\n########################### VERSION = 3.2 ###########################\n\n\n") debug("\n\n\n########################### VERSION = 3.3 ###########################\n\n\n")
debug("Running main.py...") debug("Running main.py...")
class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow): class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow):
""" """
@ -33,9 +34,9 @@ class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow):
self.inputFile = "" self.inputFile = ""
# The location that the outputfile will be saved at # The location that the outputfile will be saved at
self.outputFile = "" self.outputFile = ""
# Load the settings.json # Load the settings.toml
with open("settings.json") as s: with open("settings.toml") as s:
self.settings = json.loads(s.read()) self.settings = load(s)
# Set the current report type to ACH as default # Set the current report type to ACH as default
self.curReportType = "ach" self.curReportType = "ach"
@ -220,9 +221,9 @@ class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow):
if self.settings["defaultLocations"][self.curReportType] == '': if self.settings["defaultLocations"][self.curReportType] == '':
self.settings["defaultLocations"][self.curReportType] = ('/').join(self.inputFile.split('/')[:-1]) self.settings["defaultLocations"][self.curReportType] = ('/').join(self.inputFile.split('/')[:-1])
debug(f"checked_for_saved: saved new deafult location | {self.curReportType} | {self.settings['defaultLocations'][self.curReportType]}") debug(f"checked_for_saved: saved new deafult location | {self.curReportType} | {self.settings['defaultLocations'][self.curReportType]}")
with open('settings.json', 'w') as s: with open('settings.toml', 'w') as s:
# Save changes to the setting # Save changes to the setting
json.dump(self.settings, s) dump(self.settings, s)
def report_type_change(self): def report_type_change(self):
debug(f"Changing report type | Was: {self.curReportType} -> {self.reportTypeCB.currentText()}") debug(f"Changing report type | Was: {self.curReportType} -> {self.reportTypeCB.currentText()}")

@ -7,18 +7,40 @@ from pathlib import Path
import numpy as np import numpy as np
from glob import glob from glob import glob
from logging import debug, DEBUG, basicConfig, warn, error from logging import debug, DEBUG, basicConfig, warn, error
from hashlib import md5
import openpyxl as pxl
from tomllib import load
# V3.2 | 04/21/23 # V332 | 05/24/23
with open("settings.json") as s: with open("settings.toml", mode='rb') as s:
settings = json.loads(s.read()) settings = load(s)
if settings["debug"]: if settings["debug"]:
basicConfig(filename='debug.log', encoding='utf-8', level=DEBUG) basicConfig(filename='debug.log', filemode='w',encoding='utf-8',
format="%(asctime)s %(message)s",
level=DEBUG)
# contract numbers are a common feature in many reports to it's # contract numbers are a common feature in many reports to it's
# useful to have the regex for them globally avaiable # useful to have the regex for them globally avaiable
contract_number_regex = "\d{3}-\d{7}-\d{3}" contract_number_regex = "\d{3}-\d{7}-\d{3}"
def extract_date_path(path: Path) -> Path|None:
"""
Used to get the month folder for a report
"""
date_pattern = re.compile(r'^\d{4}\.\d{2}$')
for parent in path.parents:
if date_pattern.match(parent.name):
return parent
return None
def hash_cols(row: pd.Series, cols_to_hash: list[str]) -> str:
md5_hash = md5()
md5_hash.update((''.join(str(row[col]) for col in cols_to_hash)).encode('utf-8'))
return md5_hash.hexdigest()
class ILReport: class ILReport:
""" """
InfoLease Report class will be used to work with the files. InfoLease Report class will be used to work with the files.
@ -57,7 +79,8 @@ class ILReport:
self._append_to_consolidated_report(dataframe, settings["consolidatedBasePath"]) self._append_to_consolidated_report(dataframe, settings["consolidatedBasePath"])
return dataframe return dataframe
def _append_to_consolidated_report(self, dataframe_to_append: DataFrame, base_path: str): def _append_to_consolidated_report(self, dataframe_to_append: DataFrame,
reports_base_path: Path = None):
""" """
""" """
# Decide the sheet name based on the save_location_name # Decide the sheet name based on the save_location_name
@ -79,54 +102,88 @@ class ILReport:
elif re.search("(?i)RETURNS_BER", self.location) != None: elif re.search("(?i)RETURNS_BER", self.location) != None:
sheet_name = "RETURNS Portal" sheet_name = "RETURNS Portal"
else: else:
debug(f"No consolidated report for {self.location}!")
return None return None
il_report_path: Path = Path(self.location)
debug(f"{il_report_path=}")
month_dir: Path|None = extract_date_path(il_report_path)
if month_dir is None and reports_base_path is None:
warn(f"Consolidated report not created! No valid base path: {il_report_path} | {reports_base_path}")
return None
report_date: str = dt.now().date() \
if re.search(r"^\d{4}\.\d{2}\.\d{2}$",il_report_path.parent.parent.name) is None\
else il_report_path.parent.parent.name.replace('.','/')
debug(f"{report_date=}")
report_month:str = il_report_path.parents[2].name.replace('.','-')
if month_dir is None:
if reports_base_path is None:
warn(f"Consolidated report not created! Could not find month folder: {il_report_path} | {reports_base_path}")
return None
else:
month_dir = reports_base_path
current_date: list(str) = dt.now().strftime("%Y.%m.%d").split('.')
report_name = f"{dt.now().strftime('%B')}_ConsolidatedReport.xlsx"
debug(f"Consolidated Reports {report_name} | {self.output_location} | {self.x_method} | {current_date}") report_name: Path = Path(f"{report_month} ConsolidatedReport.xlsx")
year = current_date[0] debug(f"{month_dir=}")
month = current_date[1] debug(f"{report_name=}")
year_month = f"{year}.{month}" save_path = Path(month_dir, report_name)
debug(f"Consolidated Report {save_path=}")
save_path = f"{base_path}/{year}/{year_month}/{report_name}"
# Check if the current month has a consolidated report
month_summary_file: list(str) = glob(save_path) consolidated_df = dataframe_to_append.copy(deep=True)
if len(month_summary_file) == 0: consolidated_df["ExtractDate"] = report_date
consolidated_df.fillna('--', inplace=True)
consolidated_df["Hash"] = consolidated_df.apply(
lambda r: hash_cols(r,r.keys())
, axis=1
)
consolidated_df.replace("--", None, inplace=True)
debug(consolidated_df)
if not save_path.exists():
debug(f"Consolidated Report | No monthly summary file!\n\tCreating: {save_path}") debug(f"Consolidated Report | No monthly summary file!\n\tCreating: {save_path}")
# No file exists yet # No file exists yet
# Create it and add the current month # Create it and add the current month
try: try:
with pd.ExcelWriter(save_path) as writer: with pd.ExcelWriter(save_path) as writer:
debug(f"Consolidated Report | {sheet_name}: Saving data as: {report_name}") debug(f"Consolidated Report | {sheet_name}: Saving data as: {report_name}")
dataframe_to_append.to_excel(writer, index=False, sheet_name=sheet_name) consolidated_df.to_excel(writer, index=False, sheet_name=sheet_name)
except Exception as e: except Exception as e:
error(f"[E] Failed to create consolidated report! {sheet_name}:\n{e}") error(f"Failed to create to consolidated report! {report_name} | {sheet_name} | {il_report_path} :\n{e}")
else: else:
debug(f"{save_path} already exisits.")
# Get the current worksheets
wb: pxl.Workbook = pxl.open(save_path, read_only=True)
current_sheets = wb.worksheets
wb.close()
with pd.ExcelWriter(save_path, engine='openpyxl', mode='a',if_sheet_exists="overlay") as writer:
if sheet_name in [w.title for w in current_sheets]:
debug(f"{sheet_name} sheet already exisits.")
# We need to read the dataframe in the current monthly report # We need to read the dataframe in the current monthly report
# Check that we are not adding matching data # Check that we are not adding duplicate data
# Save the new report
#FIXME: This is so hacky it's embaressing
add_headers = False
try: try:
current_data: DataFrame = pd.read_excel(month_summary_file[0], sheet_name=sheet_name) # Merge the current data and drop duplicates
new_data_len = len(dataframe_to_append) prev_data: DataFrame = pd.read_excel(save_path, sheet_name=sheet_name)
cur_first_col = current_data.iloc[len(current_data)-new_data_len:,0].to_list() debug(f"Prev:\n{prev_data}")
new_first_col = dataframe_to_append.iloc[:,0].to_list() consolidated_df = consolidated_df[~(consolidated_df["Hash"].isin(prev_data["Hash"]) )]
if cur_first_col == new_first_col: debug(f"New data:\n{consolidated_df}")
debug(f"Consolidated Report | Data is same as previous! Skipping!")
return None
except ValueError as ve:
ve == ValueError(f"Worksheet named '{sheet_name} not found")
current_data = []
add_headers = True
# We need to find the start cols (where the new data should go)
try:
with pd.ExcelWriter(save_path, engine='openpyxl', mode='a',if_sheet_exists="overlay") as writer:
debug(f"Consolidated Report | {sheet_name}: Saving data as: {report_name}") debug(f"Consolidated Report | {sheet_name}: Saving data as: {report_name}")
dataframe_to_append.to_excel(writer, index=False, sheet_name=sheet_name,startrow=len(current_data),header=add_headers) consolidated_df.to_excel(writer, index=False, sheet_name=sheet_name, header=False,
startrow=prev_data.shape[0]+1)
except Exception as e: except Exception as e:
error(f"[E] Failed to append to consolidated report! {sheet_name}:\n{e}") error(f"Failed to append to consolidated report! {report_name} | {sheet_name} | {il_report_path} :\n{e}")
else:
consolidated_df.to_excel(writer, index=False, sheet_name=sheet_name)
def create_line_divider(breakage_list: list): def create_line_divider(breakage_list: list):

Loading…
Cancel
Save