""" This is the main entry point for this application. It find the newest reports (GP & OB) then utilizes the reconcile module to find the differences between them. The output is saved as an excel file with todays date. """ # Custom module for reconciliation from helpers import setup_logging, find_most_recent_file from reports import OnBaseReport, GreatPlainsReport import pandas as pd from pandas import DataFrame import re from re import Pattern import logging from tomllib import load import logging.config from datetime import datetime as dt from openpyxl import load_workbook, Workbook import pathlib from pathlib import Path """ [ ] Pull in past reconciliations to check against [ ] Record reconciled transaction (connect with VBA) [ ] Check GP against the database [ ] Check OB against the database [X] Add resolution column to error sheets [ ] Add sheet for problem contractas already seen and 'resolved' """ setup_logging() logger = logging.getLogger(__name__) logger.info(f"Logger started with level: {logger.level}") def get_reports(work_dir: str, report_config: dict) -> tuple[pd.DataFrame|None, pd.DataFrame|None]: """ Given a dictionary of Excel configuration options, this function searches for the most recently modified GP and OB Excel files in a "Work" folder and returns their corresponding dataframes. Args: excelConfig (dict): A dictionary containing configuration options for the GP and OB Excel files. Returns: tuple[pd.DataFrame|None, pd.DataFrame|None]: A tuple containing the OB and GP dataframes, respectively. """ # Define regular expression patterns to match the GP and OB Excel files gp_regex: Pattern = re.compile(".*gp.*\.xlsx$", re.IGNORECASE) ob_regex: Pattern = re.compile(".*ob.*\.xlsx$", re.IGNORECASE) # Find the paths of the most recently modified GP and OB Excel files gp_file_path = find_most_recent_file(work_dir, gp_regex) logger.debug(f"gp_file_path: {gp_file_path}") ob_file_path = find_most_recent_file(work_dir, ob_regex) logger.debug(f"gp_file_path: {ob_file_path}") # Read the GP and OB Excel files into dataframes and check that each dataframe has the required columns gp_xl = pd.ExcelFile(gp_file_path) gp_req_cols = [col["GP"] for _, col in report_config["shared_columns"].items()] logger.debug(f"GP_Req_cols: {gp_req_cols}") gp_sheets = gp_xl.sheet_names gp_dfs = pd.read_excel(gp_xl, sheet_name=gp_sheets) for sheet in gp_dfs: sheet_columns: list[str] = list(gp_dfs[sheet].columns) logger.debug(f"gp ({sheet}) : {sheet_columns}") logger.debug(f"Matches {[r in sheet_columns for r in gp_req_cols]}") if all([r in sheet_columns for r in gp_req_cols]): logger.debug("FOUND") gp_df = gp_dfs[sheet] break ob_xl = pd.ExcelFile(ob_file_path) ob_req_cols = [col["OB"] for _, col in report_config["shared_columns"].items()] ob_sheets = ob_xl.sheet_names ob_dfs = pd.read_excel(ob_xl, sheet_name=ob_sheets) for sheet in ob_dfs: sheet_columns: list[str] = list(ob_dfs[sheet].columns) if all([r in sheet_columns for r in ob_req_cols]): ob_df = ob_dfs[sheet] break return ob_df, gp_df def main() -> int: """ This is the main function for the script. It reads configuration options from a TOML file, reads in the GP and OB Excel files, performs data reconciliation and analysis, and writes the results to a new Excel file. Returns: int: 0 if the script executes successfully. """ # Read the configuration options from a TOML file with open("config_reports.toml", "rb") as f: reports_config: dict = load(f) logger.debug(f"Reports Config: {reports_config}") # Get the GP and OB dataframes from the Excel files ob_df, gp_df = get_reports("Work", reports_config) assert not ob_df.empty, "OB Data empty!" assert not gp_df.empty, "GP Data empty!" obr: OnBaseReport = OnBaseReport(ob_df, reports_config) gpr: GreatPlainsReport = GreatPlainsReport(gp_df, reports_config) overdue: DataFrame = obr.get_overdue() no_match, amt_mismatch = obr.reconcile(gpr) # Write the results to a new Excel file output_name: Path = Path(f"Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx") output_path: Path = Path("./Work", output_name) with pd.ExcelWriter(output_path, mode='w') as writer: no_match.to_excel(writer, sheet_name="No Match", index=False, freeze_panes=(1,3) ) amt_mismatch.to_excel(writer, sheet_name="Amount Mismatch", index=False, freeze_panes=(1,3) ) overdue.to_excel(writer, sheet_name="Overdue", index=False) wb: Workbook = load_workbook(output_path) for sheet in ["No Match", "Amount Mismatch"]: ws = wb[sheet] ws.column_dimensions['A'].hidden = True ws.column_dimensions['B'].hidden = True wb.save(output_path) return 0 if __name__ == "__main__": print("Starting") main() print("Completed")