diff --git a/.gitignore b/.gitignore index 9ed4880..fccecf6 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ venv/ work/ build/ dist/ +ghlib/ *.log *.xlsx diff --git a/config_reports.toml b/config_reports.toml index 794217b..5ec63c8 100644 --- a/config_reports.toml +++ b/config_reports.toml @@ -1,13 +1,14 @@ output_columns = [ "contract_number", "vendor_name", - "AppNum", # OB only - "DateBooked", # OB only - "Document Number"# GP Only + "AppNum", # OB only + "DateBooked", # OB only + "Document Number",# GP Only + "Resolution", + "Notes" # 'Source' added for 'no match' ] - [gp_filters] # These regex will be combined and with ORs and used to filer # the document number column of the GP report @@ -23,12 +24,8 @@ output_columns = [ "cma" ] po_filter = "^(?!.*cma(\\s|\\d)).*$" - - - + [shared_columns] contract_number = { GP = "Transaction Description", OB = "Contract"} onhold_amount = { GP = "Current Trx Amount", OB = "CurrentOnHold" } -vendor_name = { GP = "Vendor Name", OB = "DealerName"} - - +vendor_name = { GP = "Vendor Name", OB = "DealerName"} \ No newline at end of file diff --git a/hold_reconciler.py b/hold_reconciler.py index b20d204..5718b57 100644 --- a/hold_reconciler.py +++ b/hold_reconciler.py @@ -4,8 +4,8 @@ then utilizes the reconcile module to find the differences between them. The out saved as an excel file with todays date. """ # Custom module for reconciliation -from helpers import setup_logging, find_most_recent_file, check_sheet -from models import OnBaseReport, GreatPlainsReport +from helpers import setup_logging, find_most_recent_file +from reports import OnBaseReport, GreatPlainsReport import pandas as pd from pandas import DataFrame @@ -15,6 +15,9 @@ import logging from tomllib import load import logging.config from datetime import datetime as dt +from openpyxl import load_workbook, Workbook +import pathlib +from pathlib import Path """ [ ] Pull in past reconciliations to check against @@ -106,11 +109,24 @@ def main() -> int: no_match, amt_mismatch = obr.reconcile(gpr) # Write the results to a new Excel file - with pd.ExcelWriter(f"Work/Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx", mode='w') as writer: - no_match.to_excel(writer, sheet_name="No Match", index=False) - amt_mismatch.to_excel(writer, sheet_name="Amount Mismatch", index=False) + output_name: Path = Path(f"Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx") + output_path: Path = Path("./Work", output_name) + with pd.ExcelWriter(output_path, mode='w') as writer: + no_match.to_excel(writer, sheet_name="No Match", + index=False, freeze_panes=(1,3) + ) + amt_mismatch.to_excel(writer, sheet_name="Amount Mismatch", + index=False, freeze_panes=(1,3) + ) overdue.to_excel(writer, sheet_name="Overdue", index=False) - + + wb: Workbook = load_workbook(output_path) + for sheet in ["No Match", "Amount Mismatch"]: + ws = wb[sheet] + ws.column_dimensions['A'].hidden = True + ws.column_dimensions['B'].hidden = True + wb.save(output_path) + return 0 diff --git a/memory.py b/memory.py index c7cd8b3..cdf4935 100644 --- a/memory.py +++ b/memory.py @@ -7,99 +7,64 @@ resolved holds. *Last Updated: version 1.3 """ -from . import drop_unnamed +from helpers import drop_unnamed, setup_logging from ghlib.database.database_manager import SQLiteManager from pandas import DataFrame, Series, read_sql_query, read_excel, concat from logging import getLogger +from dataclasses import dataclass +from hashlib import md5 - +setup_logging() logger = getLogger(__name__) -def normalize_cols(df: DataFrame) -> DataFrame: - """ - - """ - - - +def hash_cols(row: Series, cols_to_hash: list[str]) -> str: + md5_hash = md5() + md5_hash.update((''.join(row[col] for col in cols_to_hash)).encode('utf-8')) + return md5_hash.hexdigest() -def process_resolutions(df: DataFrame) -> DataFrame: - """ - - """ - # Drop unnamed columns: - drop_unnamed(df) # Works 'inplace' - - # Drop anything where resolution is blanks - df: DataFrame = df[~df["Resolution"].isnull()] - - # Standardize the resolution - df["Resolution"] = df["Resolution"].astype(str) - df["Resolution"] = df["Resolution"].apply(lambda res: res.strip().lower()) - - # Check for multiple 'onhold_amount' columns - cols: list[str] = list(df.keys()) - mult_amounts: bool = True if "onhold_amount_ob" in cols else False - - if mult_amounts: - # Create duplicates with the other amounts - gp_amts: DataFrame = df[ - ["contract_number", - "onhold_amount_gp", - "Resolution", - "Notes" - ]] - df = df[ - ["contract_number", - "onhold_amount_ob", - "Resolution", - "Notes" - ]] - - # Rename the amount columns and add the source - gp_amts.rename(columns={"onhold_amount_gp":"onhold_amount"}, inplace=True) - gp_amts["Source"] = "GP" - df.rename(columns={"onhold_amount_ob":"onhold_amount"}, inplace=True) - df["Source"] = "OB" - - # Combine them back together - df: DataFrame = concat([df, gp_amts]) - df["Type"] = "AmountMismatch" - - else: - # Filter columns - df = df[ - ["Source", - "contract_number", - "onhold_amount", - "Resolution", - "Notes" - ]] - df["Type"] = "NoMatch" - - return df - - -def save_recs(resolved_dataframes: list[DataFrame]): + +def save_rec(resolved_dataframes: list[DataFrame]): """ + #TODO Actually handle this... """ + #raise NotImplementedError("You were too lazy to fix this after the rewrite. FIX PLZ!") sqlManager: SQLiteManager = SQLiteManager("OnHold.db") with sqlManager.get_session() as session: conn = session.connection() - - df: DataFrame - for df in resolved_dataframes: - try: - # Drop uneeded columns and filter only to resolved data - df = process_resolutions(df) - # Save to the database - df.to_sql("Resolutions", conn, if_exists="append") - except Exception as e: - logger.exception(f"Could not save resolution dataframe: {e}") - continue + rdf: DataFrame + for rdf in resolved_dataframes: + cols: list[str] = rdf.columns.to_list() + if "onhold_amount" in cols: + logger.debug(f"Found 'onhold_amount' in rdf: no_match dataframe") + # Split the on_hold col to normalize with amount mismatch + rdf["onhold_amount_GP"] = rdf.apply(lambda row: + row.onhold_amount if row.Source == "GP" else None + ) + rdf["onhold_amount_OB"] = rdf.apply(lambda row: + row.onhold_amount if row.Source == "OB" else None + ) + else: + logger.debug(f"No 'onhold_amount' col found in rdf: amount_mismatch dataframe") + # Create a unified column for index + rdf["Indentifier"] = rdf.apply(lambda row: + hash_cols(row, ["ID_OB","ID_GP"]), axis=1 + ) + + + rec_cols: list[str] = [ + "Indentifier", + "ID_GP", + "ID_OB", + "Hide Next Month", + "Resolution" + ] + + + + def get_prev_reconciled(contracts: list[str]) -> DataFrame: """ @@ -117,13 +82,13 @@ def get_prev_reconciled(contracts: list[str]) -> DataFrame: # Create a temp table to hold this batches contract numbers # this table will be cleared when sqlManager goes out of scope temp_table_statement = """ - CREATE TEMPORARY TABLE CUR_CONTRACTS (contract_numbers VARCHAR(11)); + CREATE TEMPORARY TABLE CUR_CONTRACTS (contract_number VARCHAR(11)); """ sqlManager.execute(temp_table_statement) # Insert the current contracts into the temp table insert_contracts = f""" - INSERT INTO CUR_CONTRACTS (contract_numbers) VALUES + INSERT INTO CUR_CONTRACTS (contract_number) VALUES {', '.join([f"('{cn}')" for cn in contracts])}; """ sqlManager.execute(insert_contracts) @@ -141,6 +106,8 @@ def get_prev_reconciled(contracts: list[str]) -> DataFrame: if __name__ == "__main__": import argparse + from logging import DEBUG + logger.setLevel(DEBUG) parser = argparse.ArgumentParser( prog="HoldReconcilerRecord", @@ -153,4 +120,4 @@ if __name__ == "__main__": # Amount Mismatch amt_mm: DataFrame = read_excel(args.input, sheet_name="Amount Mismatch") - save_recs(resolved_dataframes=[no_match, amt_mm]) \ No newline at end of file + save_rec(resolved_dataframes=[no_match, amt_mm]) \ No newline at end of file diff --git a/reports.py b/reports.py index 533b93c..3949325 100644 --- a/reports.py +++ b/reports.py @@ -1,12 +1,14 @@ -from pandas import DataFrame, merge, to_datetime, NaT +from pandas import DataFrame, merge, to_datetime, NaT, concat, Series from numpy import concatenate from abc import ABC, abstractmethod from logging import getLogger import re from typing import Literal import datetime +from copy import deepcopy -from helpers import CN_REGEX +from helpers import CN_REGEX, drop_unnamed +from memory import get_prev_reconciled logger = getLogger(__name__) @@ -17,8 +19,11 @@ class HoldReport(ABC): def __init__(self, dataframe: DataFrame, reports_config: dict) -> None: self.config = reports_config + drop_unnamed(dataframe) self.df = dataframe + self.prev_rec = None self._normalize() + self._previsouly_resolved() def _normalize(self): @@ -45,37 +50,88 @@ class HoldReport(ABC): self.df["Source"] = self.source - def _get_no_match(self, other: 'HoldReport'): - # Merge the two DataFrames using the contract number as the join key - outer_merge = merge( - self.df, other.df, - how="outer", - on=["contract_number"], - suffixes=('_'+self.source, '_'+other.source) + def _previsouly_resolved(self): + """ + """ + current_contracts: list[str] = self.df["contract_number"] + + prev_recd: DataFrame = get_prev_reconciled(contracts=current_contracts) + if not prev_recd: + logger.info("No previously reconciled!") + self.df = self._add_work_columns(self.df) + return + self.prev_rec = prev_recd + + start_size = self.df.shape[0] + logger.debug(f"Report DF: \n{self.df}") + logger.debug(f"prev_rec: \n{prev_recd}") + + source_id = f"ID_{self.source}" + self.df[source_id] = self.df["ID"] + self.df = merge( + self.df, + prev_recd, + how="left", + on= source_id, + suffixes=("_cur", "_prev") ) + #self.df.to_excel(f"merged_df_{self.source}.xlsx") + + # Drop anything that should be ignored + self.df = self.df[self.df["Hide Next Month"] != True] + logger.info(f"Prev res added:\n{self.df}") + + col_to_drop = [] + for c in self.df.keys().to_list(): + logger.debug(f"{c=}") + if "_prev" in c or "ID_" in c: + logger.debug(f"Found '_prev' in {c}") + col_to_drop.append(c) + else: + logger.debug(f"{c} is a good col!") + #col_to_drop.extend([c for c in self.df.keys().to_list() if '_prev' in c]) + logger.debug(f"{col_to_drop=}") + self.df.drop( + columns= col_to_drop, + inplace=True + ) + # Restandardize + self.df.rename(columns={"contract_number_cur": "contract_number"}, inplace=True) + end_size = self.df.shape[0] + logger.info(f"Reduced df by {start_size-end_size}") - # Filter the merged DataFrame to include only the transactions that do not have a match in both OBT and GPT - no_match = outer_merge.loc[ - (outer_merge[f"Source_{self.source}"].isna()) | - (outer_merge[f"Source_{other.source}"].isna()) - ] + def _remove_full_matches(self, other: 'HoldReport'): + """ + Removes any contracts that match both contract number and hold amount. + These do not need to be reconciled. - # Fill in missing values and drop unnecessary columns - no_match["Source"] = no_match[f"Source_{self.source}"].fillna("GP") - no_match["onhold_amount"] = no_match[f"onhold_amount_{self.source}"].fillna( - no_match[f"onhold_amount_{other.source}"] - ) - no_match["vendor_name"] = no_match[f"vendor_name_{self.source}"].fillna( - no_match[f"vendor_name_{other.source}"] + This id done 'in place' to both dataframes + """ + filter_id_match: DataFrame = self.df[~(self.df["ID"].isin(other.df["ID"]))] + other.df: DataFrame = other.df[~(other.df["ID"].isin(self.df["ID"]))] + self.df = filter_id_match + self.combined_missing: DataFrame = concat([self.df, other.df], ignore_index=True) + self.combined_missing.to_excel("ALL MISSING.xlsx") + logger.debug(f"Combined Missing:\n{self.combined_missing}") + logger.info(f"Payments with errors: {self.combined_missing.shape[0]}") + + @staticmethod + def _created_combined_col(column: str, target_df: DataFrame, sources: tuple[str, str]) -> DataFrame : + """ + Creates a new column by filling empty columns of this source, with the matching column from another source + """ + this, that = sources + target_df[column] = target_df[f"{column}_{this}"].fillna( + target_df[f"{column}_{that}"] ) - - return no_match - + return target_df + - def _get_contract_matches(self, other: 'HoldReport') -> DataFrame: + def _requires_rec(self, other: 'HoldReport') -> DataFrame: """ - + To be run after full matches have been re """ + # Merge the two filtered DataFrames on the contract number contract_match = merge( self.df, other.df, @@ -84,47 +140,76 @@ class HoldReport(ABC): suffixes=('_'+self.source, '_'+other.source) ) - contract_match["vendor_name"] = contract_match[f"vendor_name_{self.source}"].fillna( - contract_match[f"vendor_name_{other.source}"] - ) - - - return contract_match + #contract_match.to_excel("CONTRACT_MATCH.xlsx") + + for col in ["vendor_name", "Resolution", "Notes"]: + self._created_combined_col(col, contract_match, (self.source, other.source)) + + logger.debug(f"_requires_rec | contract_match:\n{contract_match.columns} ({contract_match.shape})") + + no_match: DataFrame = self.combined_missing[~( + self.combined_missing["contract_number"].isin( + contract_match["contract_number"] + )) + ] + no_match[f"ID_{self.source}"] = no_match.apply(lambda row: + row["ID"] if row["Source"] == self.source else None + , axis=1) + no_match[f"ID_{other.source}"] = no_match.apply(lambda row: + row["ID"] if row["Source"] == other.source else None + , axis=1) + + logger.debug(f"_requires_rec | no_match:\n{no_match.columns} ({no_match.shape})") + + return contract_match, no_match @staticmethod def _add_work_columns(df: DataFrame) -> DataFrame: """ Add empty columns to the dataframe to faciliate working through the report. """ - WORK_COLS = ["Resolution", "Notes"] + logger.debug("Adding work columns!") + df_cols: list[str] = df.columns.to_list() + WORK_COLS = ["Hide Next Month","Resolution"] for col in WORK_COLS: - df[col] = '' + if col not in df_cols: + df[col] = '' return df def reconcile(self, other: 'HoldReport') -> tuple[DataFrame]: """ """ - no_match: DataFrame = self._get_no_match(other) - no_match.to_excel("NOMATCH.xlsx") - logger.debug(f"No_match: {no_match}") + self._remove_full_matches(other) + all_prev_reced = concat([self.prev_rec, other.prev_rec],ignore_index=True) + logger.debug(f"Removed matches:\n{self.df}") + - amount_mismatch: DataFrame = self._get_contract_matches(other) - amount_mismatch.to_excel("AMTMM.xlsx") - logger.debug(f"amt_mismatche: {no_match}") + amount_mismatch, no_match = self._requires_rec(other) + + logger.debug(f"reconcile | no_match unaltered\n{no_match.columns} ({no_match.shape})") + logger.debug(f"reconcile | am_mm unaltered:\n{amount_mismatch.columns} ({amount_mismatch.shape})") + + columns: list[str] = ["ID_GP", "ID_OB"] + columns.extend(self.config["output_columns"]) + + nm_cols:list[str] = deepcopy(columns) + nm_cols.insert(3,"onhold_amount") + nm_cols.insert(4,"Source") + + columns.insert(3,"onhold_amount_GP") + columns.insert(4, "onhold_amount_OB") # Select and reorder columns no_match = no_match[ - ["Source"].extend(self.config["output_columns"]) + nm_cols ] - no_match = self._add_work_columns(no_match) - + amount_mismatch = amount_mismatch[ - self.config["output_columns"] + columns ] - amount_mismatch = self._add_work_columns(amount_mismatch) - + logger.info(f"no_match: {no_match.shape[0]}") + logger.info(f"am_mm: {amount_mismatch.shape[0]}") return no_match, amount_mismatch - class OnBaseReport(HoldReport): @@ -134,16 +219,14 @@ class OnBaseReport(HoldReport): def get_overdue(self) -> DataFrame: """ """ - self.df["install_date"] = to_datetime(self.df["install_date"]) - self.df["install_date"].fillna(NaT, inplace=True) - return self.df[self.df["install_date"].dt.date < datetime.date.today()] - + self.df["InstallDate"] = to_datetime(self.df["InstallDate"]) + self.df["InstallDate"].fillna(NaT, inplace=True) + return self.df[self.df["InstallDate"].dt.date < datetime.date.today()] class GreatPlainsReport(HoldReport): source = "GP" - filted_df: bool = False def __init__(self, dataframe: DataFrame, report_config: dict) -> None: @@ -185,4 +268,4 @@ class GreatPlainsReport(HoldReport): rows_to_drop = gp_report_df[remove_mask].index gp_report_df.drop(rows_to_drop, inplace=True) - return gp_report_df + return gp_report_df \ No newline at end of file