Reworked how consolidated reports are created. Uses new relative path

from IL file.
Uses row hashing with extract date to avoid dups.
Switched to TOML for settings instead of JSON.
dev
= 3 years ago
parent 5caaf3d7ac
commit 1bb12c722a
Signed by untrusted user who does not match committer: gprog
GPG Key ID: 5BE9BB58D37713F8
  1. 2
      .gitignore
  2. 2
      settings.json
  3. 14
      settings.toml
  4. 19
      src/il_extract.py
  5. 133
      src/il_reports.py

2
.gitignore vendored

@ -3,7 +3,9 @@ venv/
dist/
InputFiles/
__pycache__/
2023/
*.lnk
*.spec
*.log
*.xlsx

@ -1 +1 @@
{"debug": true, "consolidatedBasePath": ".", "defaultLocations": {"ach": "Z:/Business Solutions/Griff/Code/InfoLeaseExtract/InputFiles", "disp": "", "gl": "", "lb": "Z:/Business Solutions/Griff/Code/InfoLeaseExtract/InputFiles", "minv": "", "niv": "", "ren": "", "pymt": "Z:/Business Solutions/Griff/Code/InfoLeaseExtract/InputFiles", "uap": "", "pastdue": ""}}
{"debug": true, "consolidatedBasePath": ".", "defaultLocations": {"ach": "Z:/shared/Business Solutions/Griff/Code/InfoLeaseExtract/2023/2023.05/2023.05.24/ACH", "disp": "", "gl": "", "lb": "Z:/Business Solutions/Griff/Code/InfoLeaseExtract/InputFiles", "minv": "", "niv": "", "ren": "", "pymt": "Z:/Business Solutions/Griff/Code/InfoLeaseExtract/InputFiles", "uap": "", "pastdue": ""}}

@ -0,0 +1,14 @@
debug = true
consolidatedBasePath = '\\leafnow.com\shared\Accounting\CASH APPS\2023'
[defaultLocations]
ach = ''
disp = ''
gl = ''
lb = ''
minv =''
niv = ''
ren = ''
pymt = ''
uap =''
pastdue = ''

@ -7,14 +7,15 @@ from PyQt5 import QtWidgets
from datetime import datetime as dt
import il_reports as ilx #TODO redo aliasing
from logging import debug, DEBUG, basicConfig
from tomllib import load
from tomli_w import dump
with open("settings.json") as s:
settings = json.loads(s.read())
with open("settings.toml", mode='rb') as s:
settings = load(s)
#if settings["debug"]:
basicConfig(filename='debug.log', mode='w', encoding='utf-8', level=DEBUG)
debug("\n\n\n########################### VERSION = 3.2 ###########################\n\n\n")
debug("\n\n\n########################### VERSION = 3.3 ###########################\n\n\n")
debug("Running main.py...")
class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow):
"""
@ -33,9 +34,9 @@ class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow):
self.inputFile = ""
# The location that the outputfile will be saved at
self.outputFile = ""
# Load the settings.json
with open("settings.json") as s:
self.settings = json.loads(s.read())
# Load the settings.toml
with open("settings.toml") as s:
self.settings = load(s)
# Set the current report type to ACH as default
self.curReportType = "ach"
@ -220,9 +221,9 @@ class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow):
if self.settings["defaultLocations"][self.curReportType] == '':
self.settings["defaultLocations"][self.curReportType] = ('/').join(self.inputFile.split('/')[:-1])
debug(f"checked_for_saved: saved new deafult location | {self.curReportType} | {self.settings['defaultLocations'][self.curReportType]}")
with open('settings.json', 'w') as s:
with open('settings.toml', 'w') as s:
# Save changes to the setting
json.dump(self.settings, s)
dump(self.settings, s)
def report_type_change(self):
debug(f"Changing report type | Was: {self.curReportType} -> {self.reportTypeCB.currentText()}")

@ -7,18 +7,40 @@ from pathlib import Path
import numpy as np
from glob import glob
from logging import debug, DEBUG, basicConfig, warn, error
from hashlib import md5
import openpyxl as pxl
from tomllib import load
# V3.2 | 04/21/23
# V332 | 05/24/23
with open("settings.json") as s:
settings = json.loads(s.read())
with open("settings.toml", mode='rb') as s:
settings = load(s)
if settings["debug"]:
basicConfig(filename='debug.log', encoding='utf-8', level=DEBUG)
basicConfig(filename='debug.log', filemode='w',encoding='utf-8',
format="%(asctime)s %(message)s",
level=DEBUG)
# contract numbers are a common feature in many reports to it's
# useful to have the regex for them globally avaiable
contract_number_regex = "\d{3}-\d{7}-\d{3}"
def extract_date_path(path: Path) -> Path|None:
"""
Used to get the month folder for a report
"""
date_pattern = re.compile(r'^\d{4}\.\d{2}$')
for parent in path.parents:
if date_pattern.match(parent.name):
return parent
return None
def hash_cols(row: pd.Series, cols_to_hash: list[str]) -> str:
md5_hash = md5()
md5_hash.update((''.join(str(row[col]) for col in cols_to_hash)).encode('utf-8'))
return md5_hash.hexdigest()
class ILReport:
"""
InfoLease Report class will be used to work with the files.
@ -57,7 +79,8 @@ class ILReport:
self._append_to_consolidated_report(dataframe, settings["consolidatedBasePath"])
return dataframe
def _append_to_consolidated_report(self, dataframe_to_append: DataFrame, base_path: str):
def _append_to_consolidated_report(self, dataframe_to_append: DataFrame,
reports_base_path: Path = None):
"""
"""
# Decide the sheet name based on the save_location_name
@ -79,54 +102,88 @@ class ILReport:
elif re.search("(?i)RETURNS_BER", self.location) != None:
sheet_name = "RETURNS Portal"
else:
debug(f"No consolidated report for {self.location}!")
return None
il_report_path: Path = Path(self.location)
debug(f"{il_report_path=}")
month_dir: Path|None = extract_date_path(il_report_path)
if month_dir is None and reports_base_path is None:
warn(f"Consolidated report not created! No valid base path: {il_report_path} | {reports_base_path}")
return None
report_date: str = dt.now().date() \
if re.search(r"^\d{4}\.\d{2}\.\d{2}$",il_report_path.parent.parent.name) is None\
else il_report_path.parent.parent.name.replace('.','/')
debug(f"{report_date=}")
report_month:str = il_report_path.parents[2].name.replace('.','-')
if month_dir is None:
if reports_base_path is None:
warn(f"Consolidated report not created! Could not find month folder: {il_report_path} | {reports_base_path}")
return None
else:
month_dir = reports_base_path
current_date: list(str) = dt.now().strftime("%Y.%m.%d").split('.')
report_name = f"{dt.now().strftime('%B')}_ConsolidatedReport.xlsx"
debug(f"Consolidated Reports {report_name} | {self.output_location} | {self.x_method} | {current_date}")
year = current_date[0]
month = current_date[1]
year_month = f"{year}.{month}"
save_path = f"{base_path}/{year}/{year_month}/{report_name}"
# Check if the current month has a consolidated report
month_summary_file: list(str) = glob(save_path)
if len(month_summary_file) == 0:
report_name: Path = Path(f"{report_month} ConsolidatedReport.xlsx")
debug(f"{month_dir=}")
debug(f"{report_name=}")
save_path = Path(month_dir, report_name)
debug(f"Consolidated Report {save_path=}")
consolidated_df = dataframe_to_append.copy(deep=True)
consolidated_df["ExtractDate"] = report_date
consolidated_df.fillna('--', inplace=True)
consolidated_df["Hash"] = consolidated_df.apply(
lambda r: hash_cols(r,r.keys())
, axis=1
)
consolidated_df.replace("--", None, inplace=True)
debug(consolidated_df)
if not save_path.exists():
debug(f"Consolidated Report | No monthly summary file!\n\tCreating: {save_path}")
# No file exists yet
# Create it and add the current month
try:
with pd.ExcelWriter(save_path) as writer:
debug(f"Consolidated Report | {sheet_name}: Saving data as: {report_name}")
dataframe_to_append.to_excel(writer, index=False, sheet_name=sheet_name)
consolidated_df.to_excel(writer, index=False, sheet_name=sheet_name)
except Exception as e:
error(f"[E] Failed to create consolidated report! {sheet_name}:\n{e}")
error(f"Failed to create to consolidated report! {report_name} | {sheet_name} | {il_report_path} :\n{e}")
else:
debug(f"{save_path} already exisits.")
# Get the current worksheets
wb: pxl.Workbook = pxl.open(save_path, read_only=True)
current_sheets = wb.worksheets
wb.close()
with pd.ExcelWriter(save_path, engine='openpyxl', mode='a',if_sheet_exists="overlay") as writer:
if sheet_name in [w.title for w in current_sheets]:
debug(f"{sheet_name} sheet already exisits.")
# We need to read the dataframe in the current monthly report
# Check that we are not adding matching data
# Save the new report
#FIXME: This is so hacky it's embaressing
add_headers = False
# Check that we are not adding duplicate data
try:
current_data: DataFrame = pd.read_excel(month_summary_file[0], sheet_name=sheet_name)
new_data_len = len(dataframe_to_append)
cur_first_col = current_data.iloc[len(current_data)-new_data_len:,0].to_list()
new_first_col = dataframe_to_append.iloc[:,0].to_list()
if cur_first_col == new_first_col:
debug(f"Consolidated Report | Data is same as previous! Skipping!")
return None
except ValueError as ve:
ve == ValueError(f"Worksheet named '{sheet_name} not found")
current_data = []
add_headers = True
# We need to find the start cols (where the new data should go)
try:
with pd.ExcelWriter(save_path, engine='openpyxl', mode='a',if_sheet_exists="overlay") as writer:
# Merge the current data and drop duplicates
prev_data: DataFrame = pd.read_excel(save_path, sheet_name=sheet_name)
debug(f"Prev:\n{prev_data}")
consolidated_df = consolidated_df[~(consolidated_df["Hash"].isin(prev_data["Hash"]) )]
debug(f"New data:\n{consolidated_df}")
debug(f"Consolidated Report | {sheet_name}: Saving data as: {report_name}")
dataframe_to_append.to_excel(writer, index=False, sheet_name=sheet_name,startrow=len(current_data),header=add_headers)
consolidated_df.to_excel(writer, index=False, sheet_name=sheet_name, header=False,
startrow=prev_data.shape[0]+1)
except Exception as e:
error(f"[E] Failed to append to consolidated report! {sheet_name}:\n{e}")
error(f"Failed to append to consolidated report! {report_name} | {sheet_name} | {il_report_path} :\n{e}")
else:
consolidated_df.to_excel(writer, index=False, sheet_name=sheet_name)
def create_line_divider(breakage_list: list):

Loading…
Cancel
Save