OnHoldReconciler/hold_reconciler.py

"""
This is the main entry point for this application. It find the newest reports (GP & OB)
then utilizes the reconcile module to find the differences between them. The output is
saved as an excel file with todays date.
"""
# Custom module for reconciliation
from helpers import setup_logging, find_most_recent_file
from reports import OnBaseReport, GreatPlainsReport

import pandas as pd
from pandas import DataFrame
import re
from re import Pattern
import logging
from tomllib import load
import logging.config
from datetime import datetime as dt
from openpyxl import load_workbook, Workbook
import pathlib
from pathlib import Path

"""
[ ] Pull in past reconciliations to check against
[ ] Record reconciled transaction (connect with VBA)
[ ] Check GP against the database
[ ] Check OB against the database
[X] Add resolution column to error sheets
[ ] Add sheet for problem contractas already seen and 'resolved'
"""

setup_logging()
logger = logging.getLogger(__name__)
logger.info(f"Logger started with level: {logger.level}")


def get_reports(work_dir: str, report_config: dict) -> tuple[pd.DataFrame|None, pd.DataFrame|None]:
    """
    Given a dictionary of Excel configuration options, this function searches for the most recently modified GP and OB
    Excel files in a "Work" folder and returns their corresponding dataframes.

    Args:
        excelConfig (dict): A dictionary containing configuration options for the GP and OB Excel files.

    Returns:
        tuple[pd.DataFrame|None, pd.DataFrame|None]: A tuple containing the OB and GP dataframes, respectively.
    """

    # Define regular expression patterns to match the GP and OB Excel files
    gp_regex: Pattern = re.compile(".*gp.*\.xlsx$", re.IGNORECASE)
    ob_regex: Pattern = re.compile(".*ob.*\.xlsx$", re.IGNORECASE)

    # Find the paths of the most recently modified GP and OB Excel files
    gp_file_path = find_most_recent_file(work_dir, gp_regex)
    logger.debug(f"gp_file_path: {gp_file_path}")
    ob_file_path = find_most_recent_file(work_dir, ob_regex)
    logger.debug(f"gp_file_path: {ob_file_path}")

    # Read the GP and OB Excel files into dataframes and check that each dataframe has the required columns
    gp_xl = pd.ExcelFile(gp_file_path)
    gp_req_cols = [col["GP"] for _, col in report_config["shared_columns"].items()]
    logger.debug(f"GP_Req_cols: {gp_req_cols}")
    gp_sheets = gp_xl.sheet_names
    gp_dfs = pd.read_excel(gp_xl, sheet_name=gp_sheets)
    for sheet in gp_dfs:
        sheet_columns: list[str] = list(gp_dfs[sheet].columns)
        logger.debug(f"gp ({sheet}) : {sheet_columns}")
        logger.debug(f"Matches {[r in  sheet_columns for r in gp_req_cols]}")
        if all([r in  sheet_columns for r in gp_req_cols]):
            logger.debug("FOUND")
            gp_df = gp_dfs[sheet]
            break

    ob_xl = pd.ExcelFile(ob_file_path)
    ob_req_cols = [col["OB"] for _, col in report_config["shared_columns"].items()]
    ob_sheets = ob_xl.sheet_names
    ob_dfs = pd.read_excel(ob_xl, sheet_name=ob_sheets)
    for sheet in ob_dfs:
        sheet_columns: list[str] = list(ob_dfs[sheet].columns)
        if all([r in  sheet_columns for r in ob_req_cols]):
            ob_df = ob_dfs[sheet]
            break

    return ob_df, gp_df


def main() -> int:
    """
    This is the main function for the script. It reads configuration options from a TOML file, reads in the GP and OB
    Excel files, performs data reconciliation and analysis, and writes the results to a new Excel file.

    Returns:
        int: 0 if the script executes successfully.
    """
    # Read the configuration options from a TOML file
    with open("config_reports.toml", "rb") as f:
        reports_config: dict = load(f)
    logger.debug(f"Reports Config: {reports_config}")

    # Get the GP and OB dataframes from the Excel files
    ob_df, gp_df = get_reports("Work", reports_config)
    assert not ob_df.empty, "OB Data empty!"
    assert not gp_df.empty, "GP Data empty!"

    obr: OnBaseReport = OnBaseReport(ob_df, reports_config)
    gpr: GreatPlainsReport = GreatPlainsReport(gp_df, reports_config)

    overdue: DataFrame = obr.get_overdue()

    no_match, amt_mismatch = obr.reconcile(gpr)

    # Write the results to a new Excel file
    output_name: Path = Path(f"Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx")
    output_path: Path = Path("./Work", output_name)
    with pd.ExcelWriter(output_path, mode='w') as writer:
        no_match.to_excel(writer, sheet_name="No Match",
                          index=False, freeze_panes=(1,3)
                          )
        amt_mismatch.to_excel(writer, sheet_name="Amount Mismatch",
                            index=False, freeze_panes=(1,3)
                            )
        overdue.to_excel(writer, sheet_name="Overdue", index=False)

    wb: Workbook = load_workbook(output_path)
    for sheet in ["No Match", "Amount Mismatch"]:
        ws = wb[sheet]
        ws.column_dimensions['A'].hidden = True
        ws.column_dimensions['B'].hidden = True
    wb.save(output_path)

    return 0


if __name__ == "__main__":
    print("Starting")
    main()
    print("Completed")