Reworked how the reconcilation is done: filter gp -> remove prev ->

remove full match -> get contract match -> remaining = no match Changed how the memory cols work. Not finished
3 years ago · 9ad5e9180c
parent 7ad4f76943
commit 9ad5e9180c
5 changed files with 214 additions and 150 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,6 +3,7 @@ venv/
 work/
 build/
 dist/
+ghlib/

 *.log
 *.xlsx
--- a/config_reports.toml
+++ b/config_reports.toml
@ -1,13 +1,14 @@
 output_columns = [
        "contract_number",
        "vendor_name",
-        "AppNum",        # OB only
-        "DateBooked",    # OB only
-        "Document Number"# GP Only
+        "AppNum",         # OB only
+        "DateBooked",     # OB only
+        "Document Number",# GP Only
+        "Resolution",
+        "Notes"
        # 'Source' added for 'no match'
    ]

-
 [gp_filters]
    # These regex will be combined and with ORs and used to filer
    # the document number column of the GP report 
@ -23,12 +24,8 @@ output_columns = [
        "cma"
    ]
    po_filter = "^(?!.*cma(\\s|\\d)).*$"
-    
-
-
+  
 [shared_columns]
 contract_number = { GP = "Transaction Description", OB = "Contract"}
 onhold_amount = { GP = "Current Trx Amount", OB = "CurrentOnHold" }
-vendor_name = { GP = "Vendor Name", OB = "DealerName"} 
-
-
+vendor_name = { GP = "Vendor Name", OB = "DealerName"} 
--- a/hold_reconciler.py
+++ b/hold_reconciler.py
@ -4,8 +4,8 @@ then utilizes the reconcile module to find the differences between them. The out
 saved as an excel file with todays date.
 """
 # Custom module for reconciliation
-from helpers import setup_logging, find_most_recent_file, check_sheet
-from models import OnBaseReport, GreatPlainsReport
+from helpers import setup_logging, find_most_recent_file
+from reports import OnBaseReport, GreatPlainsReport

 import pandas as pd
 from pandas import DataFrame
@ -15,6 +15,9 @@ import logging
 from tomllib import load
 import logging.config
 from datetime import datetime as dt
+from openpyxl import load_workbook, Workbook
+import pathlib
+from pathlib import Path

 """
 [ ] Pull in past reconciliations to check against
@ -106,11 +109,24 @@ def main() -> int:
    no_match, amt_mismatch = obr.reconcile(gpr)

    # Write the results to a new Excel file
-    with pd.ExcelWriter(f"Work/Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx", mode='w') as writer:
-        no_match.to_excel(writer, sheet_name="No Match", index=False)
-        amt_mismatch.to_excel(writer, sheet_name="Amount Mismatch", index=False)
+    output_name: Path = Path(f"Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx")
+    output_path: Path = Path("./Work", output_name)
+    with pd.ExcelWriter(output_path, mode='w') as writer:
+        no_match.to_excel(writer, sheet_name="No Match", 
+                          index=False, freeze_panes=(1,3)
+                          )
+        amt_mismatch.to_excel(writer, sheet_name="Amount Mismatch", 
+                            index=False, freeze_panes=(1,3)
+                            )
        overdue.to_excel(writer, sheet_name="Overdue", index=False)
-        
+
+    wb: Workbook = load_workbook(output_path)
+    for sheet in ["No Match", "Amount Mismatch"]:
+        ws = wb[sheet]
+        ws.column_dimensions['A'].hidden = True
+        ws.column_dimensions['B'].hidden = True
+    wb.save(output_path)
+
    return 0


--- a/memory.py
+++ b/memory.py
@ -7,99 +7,64 @@ resolved holds.

 *Last Updated: version 1.3
 """
-from . import drop_unnamed
+from helpers import drop_unnamed, setup_logging
 from ghlib.database.database_manager import SQLiteManager

 from pandas import DataFrame, Series, read_sql_query, read_excel, concat
 from logging import getLogger
+from dataclasses import dataclass
+from hashlib import md5

-
+setup_logging()
 logger = getLogger(__name__)


-def normalize_cols(df: DataFrame) -> DataFrame:
-    """
-    
-    """
-
-
-        
+def hash_cols(row: Series, cols_to_hash: list[str]) -> str:
+    md5_hash = md5()
+    md5_hash.update((''.join(row[col] for col in cols_to_hash)).encode('utf-8'))
+    return md5_hash.hexdigest()

-def process_resolutions(df: DataFrame) -> DataFrame:
-    """
-    
-    """
-    # Drop unnamed columns:
-    drop_unnamed(df) # Works 'inplace'
-
-    # Drop anything where resolution is blanks
-    df: DataFrame = df[~df["Resolution"].isnull()]
-
-    # Standardize the resolution
-    df["Resolution"] = df["Resolution"].astype(str)
-    df["Resolution"] = df["Resolution"].apply(lambda res: res.strip().lower())
-
-    # Check for multiple 'onhold_amount' columns
-    cols: list[str] = list(df.keys())
-    mult_amounts: bool = True if "onhold_amount_ob" in cols else False
-
-    if mult_amounts:
-        # Create duplicates with the other amounts
-        gp_amts: DataFrame = df[
-            ["contract_number", 
-             "onhold_amount_gp", 
-             "Resolution",
-             "Notes"
-            ]]
-        df = df[
-            ["contract_number", 
-             "onhold_amount_ob", 
-             "Resolution",
-             "Notes"
-            ]]
-
-        # Rename the amount columns and add the source
-        gp_amts.rename(columns={"onhold_amount_gp":"onhold_amount"}, inplace=True)
-        gp_amts["Source"] = "GP"
-        df.rename(columns={"onhold_amount_ob":"onhold_amount"}, inplace=True)
-        df["Source"] = "OB"
-           
-        # Combine them back together
-        df: DataFrame = concat([df, gp_amts])
-        df["Type"] = "AmountMismatch"
-        
-    else:
-        # Filter columns
-        df = df[
-            ["Source",
-             "contract_number",
-             "onhold_amount", 
-             "Resolution",
-             "Notes"
-            ]]
-        df["Type"] = "NoMatch"
-
-    return df
-
-
-def save_recs(resolved_dataframes: list[DataFrame]):
+     
+def save_rec(resolved_dataframes: list[DataFrame]):
    """
+    #TODO Actually handle this...
    """
+    #raise NotImplementedError("You were too lazy to fix this after the rewrite. FIX PLZ!")
    sqlManager: SQLiteManager = SQLiteManager("OnHold.db")
    with sqlManager.get_session() as session:
        conn = session.connection()
-       
-        df: DataFrame
-        for df in resolved_dataframes:
-            try:
-                # Drop uneeded columns and filter only to resolved data
-                df = process_resolutions(df)
-                # Save to the database
-                df.to_sql("Resolutions", conn, if_exists="append")
-            except Exception as e:
-                logger.exception(f"Could not save resolution dataframe: {e}")
-                continue

+        rdf: DataFrame
+        for rdf in resolved_dataframes:
+            cols: list[str] = rdf.columns.to_list()
+            if "onhold_amount" in cols:
+                logger.debug(f"Found 'onhold_amount' in rdf: no_match dataframe")
+                # Split the on_hold col to normalize with amount mismatch
+                rdf["onhold_amount_GP"] = rdf.apply(lambda row:
+                    row.onhold_amount if row.Source == "GP" else None
+                )
+                rdf["onhold_amount_OB"] = rdf.apply(lambda row:
+                    row.onhold_amount if row.Source == "OB" else None
+                )
+            else:
+                logger.debug(f"No 'onhold_amount' col found in rdf: amount_mismatch dataframe")
+            # Create a unified column for index 
+            rdf["Indentifier"] = rdf.apply(lambda row: 
+                hash_cols(row, ["ID_OB","ID_GP"]), axis=1
+            )
+
+
+            rec_cols: list[str] = [
+                "Indentifier",
+                "ID_GP",
+                "ID_OB",
+                "Hide Next Month",
+                "Resolution"
+            ]
+            
+
+    
+         

 def get_prev_reconciled(contracts: list[str]) -> DataFrame:
    """
@ -117,13 +82,13 @@ def get_prev_reconciled(contracts: list[str]) -> DataFrame:
    # Create a temp table to hold this batches contract numbers
    # this table will be cleared when sqlManager goes out of scope
    temp_table_statement = """
-    CREATE TEMPORARY TABLE CUR_CONTRACTS (contract_numbers VARCHAR(11));
+    CREATE TEMPORARY TABLE CUR_CONTRACTS (contract_number VARCHAR(11));
    """
    sqlManager.execute(temp_table_statement)

    # Insert the current contracts into the temp table
    insert_contracts = f"""
-    INSERT INTO CUR_CONTRACTS (contract_numbers) VALUES
+    INSERT INTO CUR_CONTRACTS (contract_number) VALUES
    {', '.join([f"('{cn}')" for cn in contracts])};
    """
    sqlManager.execute(insert_contracts)
@ -141,6 +106,8 @@ def get_prev_reconciled(contracts: list[str]) -> DataFrame:

 if __name__ == "__main__":
    import argparse
+    from logging import DEBUG
+    logger.setLevel(DEBUG)

    parser = argparse.ArgumentParser(
    prog="HoldReconcilerRecord",
@ -153,4 +120,4 @@ if __name__ == "__main__":
    # Amount Mismatch
    amt_mm: DataFrame = read_excel(args.input, sheet_name="Amount Mismatch")
    
-    save_recs(resolved_dataframes=[no_match, amt_mm])
+    save_rec(resolved_dataframes=[no_match, amt_mm])
--- a/reports.py
+++ b/reports.py
@ -1,12 +1,14 @@
-from pandas import DataFrame, merge, to_datetime, NaT
+from pandas import DataFrame, merge, to_datetime, NaT, concat, Series
 from numpy import concatenate
 from abc import ABC, abstractmethod
 from logging import getLogger
 import re
 from typing import Literal
 import datetime
+from copy import deepcopy

-from helpers import CN_REGEX
+from helpers import CN_REGEX, drop_unnamed
+from memory import get_prev_reconciled

 logger = getLogger(__name__)

@ -17,8 +19,11 @@ class HoldReport(ABC):

    def __init__(self, dataframe: DataFrame, reports_config: dict) -> None:
        self.config = reports_config
+        drop_unnamed(dataframe)
        self.df = dataframe
+        self.prev_rec = None
        self._normalize()
+        self._previsouly_resolved()


    def _normalize(self):
@ -45,37 +50,88 @@ class HoldReport(ABC):
        self.df["Source"] = self.source


-    def _get_no_match(self,  other: 'HoldReport'):
-        # Merge the two DataFrames using the contract number as the join key
-        outer_merge = merge(
-            self.df, other.df,
-            how="outer",
-            on=["contract_number"],
-            suffixes=('_'+self.source, '_'+other.source)
+    def _previsouly_resolved(self):
+        """
+        """
+        current_contracts: list[str] = self.df["contract_number"]
+
+        prev_recd: DataFrame = get_prev_reconciled(contracts=current_contracts)
+        if not prev_recd:
+            logger.info("No previously reconciled!")
+            self.df = self._add_work_columns(self.df)
+            return
+        self.prev_rec = prev_recd
+
+        start_size = self.df.shape[0]
+        logger.debug(f"Report DF: \n{self.df}")
+        logger.debug(f"prev_rec: \n{prev_recd}")
+      
+        source_id = f"ID_{self.source}"
+        self.df[source_id] = self.df["ID"]
+        self.df = merge(
+            self.df,
+            prev_recd,
+            how="left",
+            on= source_id,
+            suffixes=("_cur", "_prev")
        )
+        #self.df.to_excel(f"merged_df_{self.source}.xlsx")
+        
+        # Drop anything that should be ignored
+        self.df = self.df[self.df["Hide Next Month"] != True]
+        logger.info(f"Prev res added:\n{self.df}")
+
+        col_to_drop = []
+        for c in self.df.keys().to_list():
+            logger.debug(f"{c=}")
+            if "_prev" in c or "ID_" in c:
+                logger.debug(f"Found '_prev' in {c}")
+                col_to_drop.append(c)
+            else:
+                logger.debug(f"{c} is a good col!")
+        #col_to_drop.extend([c for c in self.df.keys().to_list() if '_prev' in c])
+        logger.debug(f"{col_to_drop=}")
+        self.df.drop(
+            columns= col_to_drop,
+            inplace=True
+        )
+        # Restandardize
+        self.df.rename(columns={"contract_number_cur": "contract_number"}, inplace=True)
+        end_size = self.df.shape[0]
+        logger.info(f"Reduced df by {start_size-end_size}")

-        # Filter the merged DataFrame to include only the transactions that do not have a match in both OBT and GPT
-        no_match = outer_merge.loc[
-            (outer_merge[f"Source_{self.source}"].isna()) |
-            (outer_merge[f"Source_{other.source}"].isna())
-        ]
+    def _remove_full_matches(self, other: 'HoldReport'):
+        """
+        Removes any contracts that match both contract number and hold amount. 
+        These do not need to be reconciled.

-        # Fill in missing values and drop unnecessary columns
-        no_match["Source"] = no_match[f"Source_{self.source}"].fillna("GP")
-        no_match["onhold_amount"] = no_match[f"onhold_amount_{self.source}"].fillna(
-            no_match[f"onhold_amount_{other.source}"]
-        )
-        no_match["vendor_name"] = no_match[f"vendor_name_{self.source}"].fillna(
-            no_match[f"vendor_name_{other.source}"]
+        This id done 'in place' to both dataframes
+        """
+        filter_id_match: DataFrame  =  self.df[~(self.df["ID"].isin(other.df["ID"]))]
+        other.df: DataFrame = other.df[~(other.df["ID"].isin(self.df["ID"]))]
+        self.df = filter_id_match
+        self.combined_missing: DataFrame = concat([self.df, other.df], ignore_index=True)
+        self.combined_missing.to_excel("ALL MISSING.xlsx")
+        logger.debug(f"Combined Missing:\n{self.combined_missing}")
+        logger.info(f"Payments with errors: {self.combined_missing.shape[0]}")
+
+    @staticmethod
+    def _created_combined_col(column: str, target_df: DataFrame, sources: tuple[str, str]) -> DataFrame :
+        """
+        Creates a new column by filling empty columns of this source, with the matching column from another source
+        """
+        this, that = sources
+        target_df[column] = target_df[f"{column}_{this}"].fillna(
+            target_df[f"{column}_{that}"]
        )
-          
-        return no_match
-    
+        return target_df
+           

-    def _get_contract_matches(self,  other: 'HoldReport') -> DataFrame:
+    def _requires_rec(self,  other: 'HoldReport') -> DataFrame:
        """
-        
+        To be run after full matches have been re
        """
+
        # Merge the two filtered DataFrames on the contract number
        contract_match = merge(
            self.df, other.df,
@ -84,47 +140,76 @@ class HoldReport(ABC):
            suffixes=('_'+self.source, '_'+other.source)
        )

-        contract_match["vendor_name"] = contract_match[f"vendor_name_{self.source}"].fillna(
-            contract_match[f"vendor_name_{other.source}"]
-        )
-        
-        
-        return contract_match      
+        #contract_match.to_excel("CONTRACT_MATCH.xlsx")
+
+        for col in ["vendor_name", "Resolution", "Notes"]:
+            self._created_combined_col(col, contract_match, (self.source, other.source)) 
+
+        logger.debug(f"_requires_rec | contract_match:\n{contract_match.columns} ({contract_match.shape})")
+
+        no_match: DataFrame = self.combined_missing[~(
+            self.combined_missing["contract_number"].isin(
+                contract_match["contract_number"]
+            ))
+        ]
+        no_match[f"ID_{self.source}"] = no_match.apply(lambda row:
+            row["ID"] if row["Source"] == self.source else None                                          
+        , axis=1)
+        no_match[f"ID_{other.source}"] = no_match.apply(lambda row:
+            row["ID"] if row["Source"] == other.source else None                                          
+        , axis=1)
+
+        logger.debug(f"_requires_rec | no_match:\n{no_match.columns} ({no_match.shape})")
+
+        return contract_match, no_match      

    @staticmethod
    def _add_work_columns(df: DataFrame) -> DataFrame:
        """
        Add empty columns to the dataframe to faciliate working through the report.
        """
-        WORK_COLS = ["Resolution", "Notes"]
+        logger.debug("Adding work columns!")
+        df_cols: list[str] = df.columns.to_list()
+        WORK_COLS = ["Hide Next Month","Resolution"]
        for col in WORK_COLS:
-            df[col] = ''
+            if col not in df_cols:
+                df[col] = ''
        return df

    def reconcile(self, other: 'HoldReport') -> tuple[DataFrame]:
        """
        """
-        no_match: DataFrame = self._get_no_match(other)
-        no_match.to_excel("NOMATCH.xlsx")
-        logger.debug(f"No_match: {no_match}")
+        self._remove_full_matches(other)
+        all_prev_reced = concat([self.prev_rec, other.prev_rec],ignore_index=True)
+        logger.debug(f"Removed matches:\n{self.df}")
+
        
-        amount_mismatch: DataFrame = self._get_contract_matches(other)
-        amount_mismatch.to_excel("AMTMM.xlsx")
-        logger.debug(f"amt_mismatche: {no_match}")
+        amount_mismatch, no_match = self._requires_rec(other)
+
+        logger.debug(f"reconcile | no_match unaltered\n{no_match.columns} ({no_match.shape})")
+        logger.debug(f"reconcile | am_mm unaltered:\n{amount_mismatch.columns} ({amount_mismatch.shape})")
+        
+        columns: list[str] = ["ID_GP", "ID_OB"]
+        columns.extend(self.config["output_columns"])
+
+        nm_cols:list[str] = deepcopy(columns)
+        nm_cols.insert(3,"onhold_amount")
+        nm_cols.insert(4,"Source")
+
+        columns.insert(3,"onhold_amount_GP")
+        columns.insert(4, "onhold_amount_OB")

        # Select and reorder columns
        no_match = no_match[
-            ["Source"].extend(self.config["output_columns"])
+            nm_cols
        ]
-        no_match = self._add_work_columns(no_match)
-
+        
        amount_mismatch = amount_mismatch[
-           self.config["output_columns"]
+           columns
        ]
-        amount_mismatch = self._add_work_columns(amount_mismatch)
-
+        logger.info(f"no_match: {no_match.shape[0]}")
+        logger.info(f"am_mm: {amount_mismatch.shape[0]}")
        return no_match, amount_mismatch
-
    

 class OnBaseReport(HoldReport):
@ -134,16 +219,14 @@ class OnBaseReport(HoldReport):
    def get_overdue(self) -> DataFrame:
        """
        """
-        self.df["install_date"] = to_datetime(self.df["install_date"])
-        self.df["install_date"].fillna(NaT, inplace=True)
-        return  self.df[self.df["install_date"].dt.date < datetime.date.today()]
-
+        self.df["InstallDate"] = to_datetime(self.df["InstallDate"])
+        self.df["InstallDate"].fillna(NaT, inplace=True)
+        return  self.df[self.df["InstallDate"].dt.date < datetime.date.today()]


 class GreatPlainsReport(HoldReport):
    
    source = "GP"
-    filted_df: bool = False

    def __init__(self, dataframe: DataFrame, report_config: dict) -> None:

@ -185,4 +268,4 @@ class GreatPlainsReport(HoldReport):
        rows_to_drop = gp_report_df[remove_mask].index
        gp_report_df.drop(rows_to_drop, inplace=True)

-        return gp_report_df
+        return gp_report_df