OnHoldReconciler/src/memory.py

"""
Classes and functions to parse completed reconciliation reports and remember
the resolutions of contracts.

Also provides a way for the reconciler to check hold against previously
resolved holds.

*Last Updated: version 1.3
"""
from helpers import drop_unnamed, setup_logging
from ghlib.database.database_manager import SQLiteManager, select_fields_statement

from pandas import DataFrame, Series, read_sql_query, read_excel, concat
from numpy import NaN
from logging import getLogger
from dataclasses import dataclass
from hashlib import md5
from typing import TypeAlias

setup_logging()
logger = getLogger(__name__)

col_hash: TypeAlias = str

def hash_cols(row: Series, cols_to_hash: list[str]) -> col_hash:
    md5_hash = md5()
    md5_hash.update((''.join(str(row[col]) for col in cols_to_hash)).encode('utf-8'))
    return md5_hash.hexdigest()

def create_identifier(df: DataFrame) -> DataFrame:
    for id in ["ID_OB","ID_GP"]:
        df[id].fillna("x", inplace=True)
    df["Indentifier"] = df.apply(lambda row:
        hash_cols(row, ["ID_OB","ID_GP"]), axis=1
    )
    for id in ["ID_OB","ID_GP"]:
        df[id].replace('x',NaN, inplace=True)
    return df

def save_rec(resolved_dataframes: list[DataFrame]):
    """
    """
    sqlManager: SQLiteManager = SQLiteManager("OnHold.db")
    with sqlManager.get_session() as session:

        rdf: DataFrame
        for rdf in resolved_dataframes:
            cols: list[str] = rdf.columns.to_list()
            logger.debug(f"{cols=}")
            if "onhold_amount" in cols:
                logger.debug("Found 'onhold_amount' in rdf: no_match dataframe")
                # Split the on_hold col to normalize with amount mismatch
                rdf["onhold_amount_GP"] = rdf.apply(lambda row:
                    row["onhold_amount"] if row["Source"] == "GP" else None
                , axis=1)
                rdf["onhold_amount_OB"] = rdf.apply(lambda row:
                    row["onhold_amount"] if row["Source"] == "OB" else None
                , axis=1 )
            else:
                logger.debug("No 'onhold_amount' col found in rdf: amount_mismatch dataframe")

            # Create a unified column for index
            rdf = create_identifier(rdf)

            rec_cols: list[str] = [
                "Indentifier",
                "ID_GP",
                "ID_OB",
                "HideNextMonth",
                "Resolution"
            ]

            rdf = rdf[rec_cols]
            rdf.set_index("Indentifier", inplace=True, drop=True)
            rdf.drop_duplicates(inplace=True)
            rdf = rdf.dropna(axis=0, how="all", subset=["HideNextMonth", "Resolution"])
            logger.debug(f"Saving resolutions to db:\n{rdf}")

            rdf.to_sql('Resolutions',
                con=session.connection(),
                if_exists="append"
            )


def get_prev_reconciled(identfiers: list[col_hash]) -> DataFrame|None:
    """
    Get a DataFrame of previously reconciled contracts from an SQLite database.

    Args:
        contracts (list[str]): A list of contract numbers to check for previously reconciled contracts.

    Returns:
        DataFrame: A DataFrame of previously reconciled contracts, or an empty DataFrame if none are found.
    """
    # Create a DB manager
    sqlManager: SQLiteManager = SQLiteManager("OnHold.db")

    # Create a temp table to hold this batches contract numbers
    # this table will be cleared when sqlManager goes out of scope
    temp_table_statement = """
    CREATE TEMPORARY TABLE CUR_IDENT (Indentifier VARCHAR(32));
    """
    sqlManager.execute(temp_table_statement)

    # Insert the current contracts into the temp table
    insert_idents = f"""
    INSERT INTO CUR_IDENT (Indentifier) VALUES
    {', '.join([f"('{cn}')" for cn in identfiers])};
    """

    logger.debug(f"{insert_idents=}")

    sqlManager.execute(insert_idents)

    # Select previously resolved contracts
    res_query = """
    SELECT r.*
    FROM Resolutions r
    JOIN CUR_IDENT i
    ON r.Indentifier = i.Indentifier;
    """
    resolved: DataFrame = sqlManager.execute(res_query, as_dataframe=True)
    return resolved


if __name__ == "__main__":
    import argparse
    from logging import DEBUG
    logger.setLevel(DEBUG)

    parser = argparse.ArgumentParser(
    prog="HoldReconcilerRecord",
    )
    parser.add_argument("-i", "--input")
    args = parser.parse_args()

    # No Match
    no_match: DataFrame = read_excel(args.input, sheet_name="No Match")
    # Amount Mismatch
    amt_mm: DataFrame = read_excel(args.input, sheet_name="Amount Mismatch")

    save_rec(resolved_dataframes=[no_match, amt_mm])