You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
136 lines
5.1 KiB
136 lines
5.1 KiB
"""
|
|
This is the main entry point for this application. It find the newest reports (GP & OB)
|
|
then utilizes the reconcile module to find the differences between them. The output is
|
|
saved as an excel file with todays date.
|
|
"""
|
|
# Custom module for reconciliation
|
|
from helpers import setup_logging, find_most_recent_file
|
|
from reports import OnBaseReport, GreatPlainsReport
|
|
|
|
import pandas as pd
|
|
from pandas import DataFrame
|
|
import re
|
|
from re import Pattern
|
|
import logging
|
|
from tomllib import load
|
|
import logging.config
|
|
from datetime import datetime as dt
|
|
from openpyxl import load_workbook, Workbook
|
|
import pathlib
|
|
from pathlib import Path
|
|
|
|
"""
|
|
[ ] Pull in past reconciliations to check against
|
|
[ ] Record reconciled transaction (connect with VBA)
|
|
[ ] Check GP against the database
|
|
[ ] Check OB against the database
|
|
[X] Add resolution column to error sheets
|
|
[ ] Add sheet for problem contractas already seen and 'resolved'
|
|
"""
|
|
|
|
setup_logging()
|
|
logger = logging.getLogger(__name__)
|
|
logger.info(f"Logger started with level: {logger.level}")
|
|
|
|
|
|
def get_reports(work_dir: str, report_config: dict) -> tuple[pd.DataFrame|None, pd.DataFrame|None]:
|
|
"""
|
|
Given a dictionary of Excel configuration options, this function searches for the most recently modified GP and OB
|
|
Excel files in a "Work" folder and returns their corresponding dataframes.
|
|
|
|
Args:
|
|
excelConfig (dict): A dictionary containing configuration options for the GP and OB Excel files.
|
|
|
|
Returns:
|
|
tuple[pd.DataFrame|None, pd.DataFrame|None]: A tuple containing the OB and GP dataframes, respectively.
|
|
"""
|
|
|
|
# Define regular expression patterns to match the GP and OB Excel files
|
|
gp_regex: Pattern = re.compile(".*gp.*\.xlsx$", re.IGNORECASE)
|
|
ob_regex: Pattern = re.compile(".*ob.*\.xlsx$", re.IGNORECASE)
|
|
|
|
# Find the paths of the most recently modified GP and OB Excel files
|
|
gp_file_path = find_most_recent_file(work_dir, gp_regex)
|
|
logger.debug(f"gp_file_path: {gp_file_path}")
|
|
ob_file_path = find_most_recent_file(work_dir, ob_regex)
|
|
logger.debug(f"gp_file_path: {ob_file_path}")
|
|
|
|
# Read the GP and OB Excel files into dataframes and check that each dataframe has the required columns
|
|
gp_xl = pd.ExcelFile(gp_file_path)
|
|
gp_req_cols = [col["GP"] for _, col in report_config["shared_columns"].items()]
|
|
logger.debug(f"GP_Req_cols: {gp_req_cols}")
|
|
gp_sheets = gp_xl.sheet_names
|
|
gp_dfs = pd.read_excel(gp_xl, sheet_name=gp_sheets)
|
|
for sheet in gp_dfs:
|
|
sheet_columns: list[str] = list(gp_dfs[sheet].columns)
|
|
logger.debug(f"gp ({sheet}) : {sheet_columns}")
|
|
logger.debug(f"Matches {[r in sheet_columns for r in gp_req_cols]}")
|
|
if all([r in sheet_columns for r in gp_req_cols]):
|
|
logger.debug("FOUND")
|
|
gp_df = gp_dfs[sheet]
|
|
break
|
|
|
|
ob_xl = pd.ExcelFile(ob_file_path)
|
|
ob_req_cols = [col["OB"] for _, col in report_config["shared_columns"].items()]
|
|
ob_sheets = ob_xl.sheet_names
|
|
ob_dfs = pd.read_excel(ob_xl, sheet_name=ob_sheets)
|
|
for sheet in ob_dfs:
|
|
sheet_columns: list[str] = list(ob_dfs[sheet].columns)
|
|
if all([r in sheet_columns for r in ob_req_cols]):
|
|
ob_df = ob_dfs[sheet]
|
|
break
|
|
|
|
return ob_df, gp_df
|
|
|
|
|
|
def main() -> int:
|
|
"""
|
|
This is the main function for the script. It reads configuration options from a TOML file, reads in the GP and OB
|
|
Excel files, performs data reconciliation and analysis, and writes the results to a new Excel file.
|
|
|
|
Returns:
|
|
int: 0 if the script executes successfully.
|
|
"""
|
|
# Read the configuration options from a TOML file
|
|
with open("config_reports.toml", "rb") as f:
|
|
reports_config: dict = load(f)
|
|
logger.debug(f"Reports Config: {reports_config}")
|
|
|
|
# Get the GP and OB dataframes from the Excel files
|
|
ob_df, gp_df = get_reports("Work", reports_config)
|
|
assert not ob_df.empty, "OB Data empty!"
|
|
assert not gp_df.empty, "GP Data empty!"
|
|
|
|
obr: OnBaseReport = OnBaseReport(ob_df, reports_config)
|
|
gpr: GreatPlainsReport = GreatPlainsReport(gp_df, reports_config)
|
|
|
|
overdue: DataFrame = obr.get_overdue()
|
|
|
|
no_match, amt_mismatch = obr.reconcile(gpr)
|
|
|
|
# Write the results to a new Excel file
|
|
output_name: Path = Path(f"Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx")
|
|
output_path: Path = Path("./Work", output_name)
|
|
with pd.ExcelWriter(output_path, mode='w') as writer:
|
|
no_match.to_excel(writer, sheet_name="No Match",
|
|
index=False, freeze_panes=(1,3)
|
|
)
|
|
amt_mismatch.to_excel(writer, sheet_name="Amount Mismatch",
|
|
index=False, freeze_panes=(1,3)
|
|
)
|
|
overdue.to_excel(writer, sheet_name="Overdue", index=False)
|
|
|
|
wb: Workbook = load_workbook(output_path)
|
|
for sheet in ["No Match", "Amount Mismatch"]:
|
|
ws = wb[sheet]
|
|
ws.column_dimensions['A'].hidden = True
|
|
ws.column_dimensions['B'].hidden = True
|
|
wb.save(output_path)
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print("Starting")
|
|
main()
|
|
print("Completed") |