You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
OnHoldReconciler/src/memory.py

143 lines
4.6 KiB

"""
Classes and functions to parse completed reconciliation reports and remember
the resolutions of contracts.
Also provides a way for the reconciler to check hold against previously
resolved holds.
*Last Updated: version 1.3
"""
from helpers import drop_unnamed, setup_logging
from ghlib.database.database_manager import SQLiteManager, select_fields_statement
from pandas import DataFrame, Series, read_sql_query, read_excel, concat
from numpy import NaN
from logging import getLogger
from dataclasses import dataclass
from hashlib import md5
from typing import TypeAlias
setup_logging()
logger = getLogger(__name__)
col_hash: TypeAlias = str
def hash_cols(row: Series, cols_to_hash: list[str]) -> col_hash:
md5_hash = md5()
md5_hash.update((''.join(str(row[col]) for col in cols_to_hash)).encode('utf-8'))
return md5_hash.hexdigest()
def create_identifier(df: DataFrame) -> DataFrame:
for id in ["ID_OB","ID_GP"]:
df[id].fillna("x", inplace=True)
df["Indentifier"] = df.apply(lambda row:
hash_cols(row, ["ID_OB","ID_GP"]), axis=1
)
for id in ["ID_OB","ID_GP"]:
df[id].replace('x',NaN, inplace=True)
return df
def save_rec(resolved_dataframes: list[DataFrame]):
"""
"""
sqlManager: SQLiteManager = SQLiteManager("OnHold.db")
with sqlManager.get_session() as session:
rdf: DataFrame
for rdf in resolved_dataframes:
cols: list[str] = rdf.columns.to_list()
logger.debug(f"{cols=}")
if "onhold_amount" in cols:
logger.debug("Found 'onhold_amount' in rdf: no_match dataframe")
# Split the on_hold col to normalize with amount mismatch
rdf["onhold_amount_GP"] = rdf.apply(lambda row:
row["onhold_amount"] if row["Source"] == "GP" else None
, axis=1)
rdf["onhold_amount_OB"] = rdf.apply(lambda row:
row["onhold_amount"] if row["Source"] == "OB" else None
, axis=1 )
else:
logger.debug("No 'onhold_amount' col found in rdf: amount_mismatch dataframe")
# Create a unified column for index
rdf = create_identifier(rdf)
rec_cols: list[str] = [
"Indentifier",
"ID_GP",
"ID_OB",
"HideNextMonth",
"Resolution"
]
rdf = rdf[rec_cols]
rdf.set_index("Indentifier", inplace=True, drop=True)
rdf.drop_duplicates(inplace=True)
rdf = rdf.dropna(axis=0, how="all", subset=["HideNextMonth", "Resolution"])
logger.debug(f"Saving resolutions to db:\n{rdf}")
rdf.to_sql('Resolutions',
con=session.connection(),
if_exists="append"
)
def get_prev_reconciled(identfiers: list[col_hash]) -> DataFrame|None:
"""
Get a DataFrame of previously reconciled contracts from an SQLite database.
Args:
contracts (list[str]): A list of contract numbers to check for previously reconciled contracts.
Returns:
DataFrame: A DataFrame of previously reconciled contracts, or an empty DataFrame if none are found.
"""
# Create a DB manager
sqlManager: SQLiteManager = SQLiteManager("OnHold.db")
# Create a temp table to hold this batches contract numbers
# this table will be cleared when sqlManager goes out of scope
temp_table_statement = """
CREATE TEMPORARY TABLE CUR_IDENT (Indentifier VARCHAR(32));
"""
sqlManager.execute(temp_table_statement)
# Insert the current contracts into the temp table
insert_idents = f"""
INSERT INTO CUR_IDENT (Indentifier) VALUES
{', '.join([f"('{cn}')" for cn in identfiers])};
"""
logger.debug(f"{insert_idents=}")
sqlManager.execute(insert_idents)
# Select previously resolved contracts
res_query = """
SELECT r.*
FROM Resolutions r
JOIN CUR_IDENT i
ON r.Indentifier = i.Indentifier;
"""
resolved: DataFrame = sqlManager.execute(res_query, as_dataframe=True)
return resolved
if __name__ == "__main__":
import argparse
from logging import DEBUG
logger.setLevel(DEBUG)
parser = argparse.ArgumentParser(
prog="HoldReconcilerRecord",
)
parser.add_argument("-i", "--input")
args = parser.parse_args()
# No Match
no_match: DataFrame = read_excel(args.input, sheet_name="No Match")
# Amount Mismatch
amt_mm: DataFrame = read_excel(args.input, sheet_name="Amount Mismatch")
save_rec(resolved_dataframes=[no_match, amt_mm])