You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
190 lines
7.4 KiB
190 lines
7.4 KiB
import pandas as pd
|
|
from pandas import DataFrame, Series
|
|
import re
|
|
from re import Pattern
|
|
import os
|
|
from os.path import basename
|
|
import glob
|
|
import logging
|
|
from pathlib import Path
|
|
from tomllib import load
|
|
import logging.config
|
|
from datetime import datetime as dt
|
|
|
|
"""
|
|
[ ] Pull in past reconciliations to check against
|
|
[ ] Record reconciled transaction (connect with VBA)
|
|
[ ] Check GP against the database
|
|
[ ] Check OB against the database
|
|
"""
|
|
|
|
# Custom module for reconciliation
|
|
from rec_lib import get_contract_match, get_no_match, \
|
|
get_not_full_match, get_overdue, filter_gp, create_transaction_df
|
|
|
|
def setup_logging():
|
|
"""
|
|
Sets up logging configuration from the TOML file. If the logging configuration fails to be loaded from the file,
|
|
a default logging configuration is used instead.
|
|
|
|
Returns:
|
|
logging.Logger: The logger instance.
|
|
"""
|
|
with open("config.toml", "rb") as f:
|
|
config_dict: dict = load(f)
|
|
try:
|
|
# Try to load logging configuration from the TOML file
|
|
logging.config.dictConfig(config_dict["logger"])
|
|
except Exception as e:
|
|
# If the logging configuration fails, use a default configuration and log the error
|
|
logger = logging.getLogger()
|
|
logger.setLevel(logging.DEBUG)
|
|
logger.warning("Failed setting up logger!")
|
|
logger.exception(e)
|
|
logger.warning(f"Config:\n{config_dict}")
|
|
return logger
|
|
|
|
|
|
setup_logging()
|
|
logger = logging.getLogger(__name__)
|
|
logger.info(f"Logger started with level: {logger.level}")
|
|
|
|
def find_most_recent_file(folder_path: Path, file_pattern: Pattern) -> str:
|
|
"""
|
|
Given a folder path and a regular expression pattern, this function returns the path of the most recently modified
|
|
file in the folder that matches the pattern.
|
|
|
|
Args:
|
|
folder_path (Path): A pathlib.Path object representing the folder to search.
|
|
file_pattern (Pattern): A regular expression pattern used to filter the files in the folder.
|
|
|
|
Returns:
|
|
str: The path of the most recently modified file in the folder that matches the pattern.
|
|
"""
|
|
# Find all files in the folder that match the pattern
|
|
files = glob.glob(f"{folder_path}/*")
|
|
logger.debug(f"files: {files}")
|
|
|
|
# Get the modification time of each file and filter to only those that match the pattern
|
|
file_times = [(os.path.getmtime(path), path) for path in files if re.match(file_pattern, basename(path))]
|
|
|
|
# Sort the files by modification time (most recent first)
|
|
file_times.sort(reverse=True)
|
|
logger.debug(f"file times: {file_times}")
|
|
|
|
# Return the path of the most recent file
|
|
return file_times[0][1]
|
|
|
|
|
|
def check_sheet(df_cols: list[str], excel_col_config: dict) -> bool:
|
|
"""
|
|
Given a list of column names and a dictionary of column name configurations, this function checks if the required
|
|
columns are present in the list of column names.
|
|
|
|
Args:
|
|
df_cols (list[str]): A list of column names.
|
|
excel_col_config (dict): A dictionary of column name configurations.
|
|
|
|
Returns:
|
|
bool: True if all of the required columns are present in the list of column names, False otherwise.
|
|
"""
|
|
# Get the list of required columns from the column configuration dictionary
|
|
required_cols: list[str] = list(excel_col_config.values())
|
|
# Check if all of the required columns are present in the list of column names
|
|
return all([col in df_cols for col in required_cols])
|
|
|
|
|
|
def get_dataframes(work_dir: str, excelConfig: dict) -> tuple[pd.DataFrame|None, pd.DataFrame|None]:
|
|
"""
|
|
Given a dictionary of Excel configuration options, this function searches for the most recently modified GP and OB
|
|
Excel files in a "Work" folder and returns their corresponding dataframes.
|
|
|
|
Args:
|
|
excelConfig (dict): A dictionary containing configuration options for the GP and OB Excel files.
|
|
|
|
Returns:
|
|
tuple[pd.DataFrame|None, pd.DataFrame|None]: A tuple containing the OB and GP dataframes, respectively.
|
|
"""
|
|
|
|
# Define regular expression patterns to match the GP and OB Excel files
|
|
gp_regex: Pattern = re.compile(".*gp.*\.xlsx$", re.IGNORECASE)
|
|
ob_regex: Pattern = re.compile(".*ob.*\.xlsx$", re.IGNORECASE)
|
|
|
|
# Find the paths of the most recently modified GP and OB Excel files
|
|
gp_file_path = find_most_recent_file(work_dir, gp_regex)
|
|
logger.debug(f"gp_file_path: {gp_file_path}")
|
|
ob_file_path = find_most_recent_file(work_dir, ob_regex)
|
|
logger.debug(f"gp_file_path: {ob_file_path}")
|
|
|
|
# Read the GP and OB Excel files into dataframes and check that each dataframe has the required columns
|
|
gp_xl = pd.ExcelFile(gp_file_path)
|
|
gp_config = excelConfig["GP"]
|
|
gp_sheets = gp_xl.sheet_names
|
|
gp_dfs = pd.read_excel(gp_xl, sheet_name=gp_sheets)
|
|
for sheet in gp_dfs:
|
|
if check_sheet(gp_dfs[sheet].columns, gp_config):
|
|
gp_df = gp_dfs[sheet]
|
|
break
|
|
|
|
ob_xl = pd.ExcelFile(ob_file_path)
|
|
ob_config = excelConfig["OB"]
|
|
ob_sheets = ob_xl.sheet_names
|
|
ob_dfs = pd.read_excel(ob_xl, sheet_name=ob_sheets)
|
|
for sheet in ob_dfs:
|
|
if check_sheet(ob_dfs[sheet].columns, ob_config):
|
|
ob_df = ob_dfs[sheet]
|
|
break
|
|
|
|
return ob_df, gp_df
|
|
|
|
|
|
def main() -> int:
|
|
"""
|
|
This is the main function for the script. It reads configuration options from a TOML file, reads in the GP and OB
|
|
Excel files, performs data reconciliation and analysis, and writes the results to a new Excel file.
|
|
|
|
Returns:
|
|
int: 0 if the script executes successfully.
|
|
"""
|
|
# Read the configuration options from a TOML file
|
|
with open("config.toml", "rb") as f:
|
|
config_dict: dict = load(f)
|
|
logger.debug(f"Config: {config_dict}")
|
|
|
|
excelConfig: dict = config_dict["ExcelColumns"]
|
|
|
|
# Get the GP and OB dataframes from the Excel files
|
|
ob_df, gp_df = get_dataframes(config_dict["write_dir"] ,excelConfig)
|
|
assert not ob_df.empty, "OB Data empty!"
|
|
assert not gp_df.empty, "GP Data empty!"
|
|
|
|
# Filter the GP dataframe to include only relevant transactions
|
|
fgp_df: DataFrame = filter_gp(gp_df, config_dict)
|
|
# Get the overdue transactions from the OB dataframe
|
|
overdue: DataFrame = get_overdue(ob_df, excelConfig["OB"])
|
|
|
|
# Create transaction dataframes for the GP and OB dataframes
|
|
ob_transactions: DataFrame = create_transaction_df(ob_df, 'OB', excelConfig)
|
|
gp_transactions: DataFrame = create_transaction_df(fgp_df, 'GP', excelConfig)
|
|
|
|
# Get the transactions that do not have matches in both the GP and OB dataframes
|
|
no_match: DataFrame = get_no_match(ob_transactions, gp_transactions)
|
|
|
|
# Get the transactions that have matches in both the GP and OB dataframes but have amount mismatches
|
|
full_match, not_full_match = get_not_full_match(ob_transactions, gp_transactions)
|
|
only_contracts_match: DataFrame = get_contract_match(not_full_match)
|
|
|
|
# Write the results to a new Excel file
|
|
with pd.ExcelWriter(f"{config_dict['write_dir']}/Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx", mode='w') as writer:
|
|
full_match.to_excel(writer,sheet_name="FULL", index=False)
|
|
no_match.to_excel(writer, sheet_name="No Match", index=False)
|
|
only_contracts_match.to_excel(writer, sheet_name="Amount Mismatch", index=False)
|
|
overdue.to_excel(writer, sheet_name="Overdue", index=False)
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print("Starting")
|
|
main()
|
|
print("Completed") |