""" Classes and functions to parse completed reconciliation reports and remember the resolutions of contracts. Also provides a way for the reconciler to check hold against previously resolved holds. *Last Updated: version 1.3 """ from helpers import drop_unnamed, setup_logging from ghlib.database.database_manager import SQLiteManager, select_fields_statement from pandas import DataFrame, Series, read_sql_query, read_excel, concat from numpy import NaN from logging import getLogger from dataclasses import dataclass from hashlib import md5 from typing import TypeAlias setup_logging() logger = getLogger(__name__) col_hash: TypeAlias = str def hash_cols(row: Series, cols_to_hash: list[str]) -> col_hash: md5_hash = md5() md5_hash.update((''.join(str(row[col]) for col in cols_to_hash)).encode('utf-8')) return md5_hash.hexdigest() def create_identifier(df: DataFrame) -> DataFrame: for id in ["ID_OB","ID_GP"]: df[id].fillna("x", inplace=True) df["Indentifier"] = df.apply(lambda row: hash_cols(row, ["ID_OB","ID_GP"]), axis=1 ) for id in ["ID_OB","ID_GP"]: df[id].replace('x',NaN, inplace=True) return df def save_rec(resolved_dataframes: list[DataFrame]): """ """ sqlManager: SQLiteManager = SQLiteManager("OnHold.db") with sqlManager.get_session() as session: rdf: DataFrame for rdf in resolved_dataframes: cols: list[str] = rdf.columns.to_list() logger.debug(f"{cols=}") if "onhold_amount" in cols: logger.debug("Found 'onhold_amount' in rdf: no_match dataframe") # Split the on_hold col to normalize with amount mismatch rdf["onhold_amount_GP"] = rdf.apply(lambda row: row["onhold_amount"] if row["Source"] == "GP" else None , axis=1) rdf["onhold_amount_OB"] = rdf.apply(lambda row: row["onhold_amount"] if row["Source"] == "OB" else None , axis=1 ) else: logger.debug("No 'onhold_amount' col found in rdf: amount_mismatch dataframe") # Create a unified column for index rdf = create_identifier(rdf) rec_cols: list[str] = [ "Indentifier", "ID_GP", "ID_OB", "HideNextMonth", "Resolution" ] rdf = rdf[rec_cols] rdf.set_index("Indentifier", inplace=True, drop=True) rdf.drop_duplicates(inplace=True) rdf = rdf.dropna(axis=0, how="all", subset=["HideNextMonth", "Resolution"]) logger.debug(f"Saving resolutions to db:\n{rdf}") rdf.to_sql('Resolutions', con=session.connection(), if_exists="append" ) def get_prev_reconciled(identfiers: list[col_hash]) -> DataFrame|None: """ Get a DataFrame of previously reconciled contracts from an SQLite database. Args: contracts (list[str]): A list of contract numbers to check for previously reconciled contracts. Returns: DataFrame: A DataFrame of previously reconciled contracts, or an empty DataFrame if none are found. """ # Create a DB manager sqlManager: SQLiteManager = SQLiteManager("OnHold.db") # Create a temp table to hold this batches contract numbers # this table will be cleared when sqlManager goes out of scope temp_table_statement = """ CREATE TEMPORARY TABLE CUR_IDENT (Indentifier VARCHAR(32)); """ sqlManager.execute(temp_table_statement) # Insert the current contracts into the temp table insert_idents = f""" INSERT INTO CUR_IDENT (Indentifier) VALUES {', '.join([f"('{cn}')" for cn in identfiers])}; """ logger.debug(f"{insert_idents=}") sqlManager.execute(insert_idents) # Select previously resolved contracts res_query = """ SELECT r.* FROM Resolutions r JOIN CUR_IDENT i ON r.Indentifier = i.Indentifier; """ resolved: DataFrame = sqlManager.execute(res_query, as_dataframe=True) return resolved if __name__ == "__main__": import argparse from logging import DEBUG logger.setLevel(DEBUG) parser = argparse.ArgumentParser( prog="HoldReconcilerRecord", ) parser.add_argument("-i", "--input") args = parser.parse_args() # No Match no_match: DataFrame = read_excel(args.input, sheet_name="No Match") # Amount Mismatch amt_mm: DataFrame = read_excel(args.input, sheet_name="Amount Mismatch") save_rec(resolved_dataframes=[no_match, amt_mm])