You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
110 lines
4.1 KiB
110 lines
4.1 KiB
"""
|
|
This is the main entry point for this application. It find the newest reports (GP & OB)
|
|
then utilizes the reconcile module to find the differences between them. The output is
|
|
saved as an excel file with todays date.
|
|
"""
|
|
# Custom module for reconciliation
|
|
from helpers import setup_logging, find_most_recent_file
|
|
from reports import OnBaseReport, GreatPlainsReport, ReconciledReports
|
|
|
|
import pandas as pd
|
|
from pandas import DataFrame
|
|
import re
|
|
from re import Pattern
|
|
import logging
|
|
from tomllib import load
|
|
import logging.config
|
|
from datetime import datetime as dt
|
|
from pathlib import Path
|
|
|
|
setup_logging()
|
|
logger = logging.getLogger(__name__)
|
|
logger.info(f"Logger started with level: {logger.level}")
|
|
|
|
|
|
def get_reports(work_dir: str, report_config: dict) -> tuple[pd.DataFrame|None, pd.DataFrame|None]:
|
|
"""
|
|
Given a dictionary of Excel configuration options, this function searches for the most recently modified GP and OB
|
|
Excel files in a "Work" folder and returns their corresponding dataframes.
|
|
|
|
Args:
|
|
excelConfig (dict): A dictionary containing configuration options for the GP and OB Excel files.
|
|
|
|
Returns:
|
|
tuple[pd.DataFrame|None, pd.DataFrame|None]: A tuple containing the OB and GP dataframes, respectively.
|
|
"""
|
|
|
|
# Define regular expression patterns to match the GP and OB Excel files
|
|
gp_regex: Pattern = re.compile(".*gp.*\.xlsx$", re.IGNORECASE)
|
|
ob_regex: Pattern = re.compile(".*ob.*\.xlsx$", re.IGNORECASE)
|
|
|
|
# Find the paths of the most recently modified GP and OB Excel files
|
|
gp_file_path = find_most_recent_file(work_dir, gp_regex)
|
|
logger.debug(f"gp_file_path: {gp_file_path}")
|
|
ob_file_path = find_most_recent_file(work_dir, ob_regex)
|
|
logger.debug(f"gp_file_path: {ob_file_path}")
|
|
|
|
# Read the GP and OB Excel files into dataframes and check that each dataframe has the required columns
|
|
gp_xl = pd.ExcelFile(gp_file_path)
|
|
gp_req_cols = [col["GP"] for _, col in report_config["shared_columns"].items()]
|
|
logger.debug(f"GP_Req_cols: {gp_req_cols}")
|
|
gp_sheets = gp_xl.sheet_names
|
|
gp_dfs = pd.read_excel(gp_xl, sheet_name=gp_sheets)
|
|
for sheet in gp_dfs:
|
|
sheet_columns: list[str] = list(gp_dfs[sheet].columns)
|
|
logger.debug(f"gp ({sheet}) : {sheet_columns}")
|
|
logger.debug(f"Matches {[r in sheet_columns for r in gp_req_cols]}")
|
|
if all([r in sheet_columns for r in gp_req_cols]):
|
|
logger.debug("FOUND")
|
|
gp_df = gp_dfs[sheet]
|
|
break
|
|
|
|
ob_xl = pd.ExcelFile(ob_file_path)
|
|
ob_req_cols = [col["OB"] for _, col in report_config["shared_columns"].items()]
|
|
ob_sheets = ob_xl.sheet_names
|
|
ob_dfs = pd.read_excel(ob_xl, sheet_name=ob_sheets)
|
|
for sheet in ob_dfs:
|
|
sheet_columns: list[str] = list(ob_dfs[sheet].columns)
|
|
if all([r in sheet_columns for r in ob_req_cols]):
|
|
ob_df = ob_dfs[sheet]
|
|
break
|
|
|
|
return ob_df, gp_df
|
|
|
|
|
|
def main() -> int:
|
|
"""
|
|
This is the main function for the script. It reads configuration options from a TOML file, reads in the GP and OB
|
|
Excel files, performs data reconciliation and analysis, and writes the results to a new Excel file.
|
|
|
|
Returns:
|
|
int: 0 if the script executes successfully.
|
|
"""
|
|
# Read the configuration options from a TOML file
|
|
with open("config_reports.toml", "rb") as f:
|
|
reports_config: dict = load(f)
|
|
logger.debug(f"Reports Config: {reports_config}")
|
|
|
|
# Get the GP and OB dataframes from the Excel files
|
|
ob_df, gp_df = get_reports("Work", reports_config)
|
|
assert not ob_df.empty, "OB Data empty!"
|
|
assert not gp_df.empty, "GP Data empty!"
|
|
|
|
obr: OnBaseReport = OnBaseReport(ob_df, reports_config)
|
|
gpr: GreatPlainsReport = GreatPlainsReport(gp_df, reports_config)
|
|
|
|
rec_output: ReconciledReports = obr.reconcile(gpr)
|
|
|
|
output_name: Path = Path(f"Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx")
|
|
output_base: Path = Path(reports_config["output_path"])
|
|
output_path: Path = Path(output_base, output_name)
|
|
|
|
rec_output.save_reports(output_path)
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print("Starting")
|
|
main()
|
|
print("Completed") |