""" This is the main entry point for this application. It find the newest reports (GP & OB) then utilizes the reconcile module to find the differences between them. The output is saved as an excel file with todays date. """ # Custom module for reconciliation from helpers import setup_logging, find_most_recent_file from reports import OnBaseReport, GreatPlainsReport, ReconciledReports import pandas as pd from pandas import DataFrame import re from re import Pattern import logging from tomllib import load import logging.config from datetime import datetime as dt from pathlib import Path setup_logging() logger = logging.getLogger(__name__) logger.info(f"Logger started with level: {logger.level}") def get_reports(work_dir: str, report_config: dict) -> tuple[pd.DataFrame|None, pd.DataFrame|None]: """ Given a dictionary of Excel configuration options, this function searches for the most recently modified GP and OB Excel files in a "Work" folder and returns their corresponding dataframes. Args: excelConfig (dict): A dictionary containing configuration options for the GP and OB Excel files. Returns: tuple[pd.DataFrame|None, pd.DataFrame|None]: A tuple containing the OB and GP dataframes, respectively. """ # Define regular expression patterns to match the GP and OB Excel files gp_regex: Pattern = re.compile(".*gp.*\.xlsx$", re.IGNORECASE) ob_regex: Pattern = re.compile(".*ob.*\.xlsx$", re.IGNORECASE) # Find the paths of the most recently modified GP and OB Excel files gp_file_path = find_most_recent_file(work_dir, gp_regex) logger.debug(f"gp_file_path: {gp_file_path}") ob_file_path = find_most_recent_file(work_dir, ob_regex) logger.debug(f"gp_file_path: {ob_file_path}") # Read the GP and OB Excel files into dataframes and check that each dataframe has the required columns gp_xl = pd.ExcelFile(gp_file_path) gp_req_cols = [col["GP"] for _, col in report_config["shared_columns"].items()] logger.debug(f"GP_Req_cols: {gp_req_cols}") gp_sheets = gp_xl.sheet_names gp_dfs = pd.read_excel(gp_xl, sheet_name=gp_sheets) for sheet in gp_dfs: sheet_columns: list[str] = list(gp_dfs[sheet].columns) logger.debug(f"gp ({sheet}) : {sheet_columns}") logger.debug(f"Matches {[r in sheet_columns for r in gp_req_cols]}") if all([r in sheet_columns for r in gp_req_cols]): logger.debug("FOUND") gp_df = gp_dfs[sheet] break ob_xl = pd.ExcelFile(ob_file_path) ob_req_cols = [col["OB"] for _, col in report_config["shared_columns"].items()] ob_sheets = ob_xl.sheet_names ob_dfs = pd.read_excel(ob_xl, sheet_name=ob_sheets) for sheet in ob_dfs: sheet_columns: list[str] = list(ob_dfs[sheet].columns) if all([r in sheet_columns for r in ob_req_cols]): ob_df = ob_dfs[sheet] break return ob_df, gp_df def main() -> int: """ This is the main function for the script. It reads configuration options from a TOML file, reads in the GP and OB Excel files, performs data reconciliation and analysis, and writes the results to a new Excel file. Returns: int: 0 if the script executes successfully. """ # Read the configuration options from a TOML file with open("config_reports.toml", "rb") as f: reports_config: dict = load(f) logger.debug(f"Reports Config: {reports_config}") # Get the GP and OB dataframes from the Excel files ob_df, gp_df = get_reports("Work", reports_config) assert not ob_df.empty, "OB Data empty!" assert not gp_df.empty, "GP Data empty!" obr: OnBaseReport = OnBaseReport(ob_df, reports_config) gpr: GreatPlainsReport = GreatPlainsReport(gp_df, reports_config) rec_output: ReconciledReports = obr.reconcile(gpr) output_name: Path = Path(f"Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx") output_base: Path = Path(reports_config["output_path"]) output_path: Path = Path(output_base, output_name) rec_output.save_reports(output_path) return 0 if __name__ == "__main__": print("Starting") main() print("Completed")