Reworked how the reconcilation is done: filter gp -> remove prev ->

remove full match -> get contract match -> remaining = no match
Changed how the memory cols work. Not finished
dev
= 3 years ago
parent 7ad4f76943
commit 9ad5e9180c
Signed by untrusted user who does not match committer: gprog
GPG Key ID: 5BE9BB58D37713F8
  1. 1
      .gitignore
  2. 17
      config_reports.toml
  3. 28
      hold_reconciler.py
  4. 129
      memory.py
  5. 189
      reports.py

1
.gitignore vendored

@ -3,6 +3,7 @@ venv/
work/ work/
build/ build/
dist/ dist/
ghlib/
*.log *.log
*.xlsx *.xlsx

@ -1,13 +1,14 @@
output_columns = [ output_columns = [
"contract_number", "contract_number",
"vendor_name", "vendor_name",
"AppNum", # OB only "AppNum", # OB only
"DateBooked", # OB only "DateBooked", # OB only
"Document Number"# GP Only "Document Number",# GP Only
"Resolution",
"Notes"
# 'Source' added for 'no match' # 'Source' added for 'no match'
] ]
[gp_filters] [gp_filters]
# These regex will be combined and with ORs and used to filer # These regex will be combined and with ORs and used to filer
# the document number column of the GP report # the document number column of the GP report
@ -23,12 +24,8 @@ output_columns = [
"cma" "cma"
] ]
po_filter = "^(?!.*cma(\\s|\\d)).*$" po_filter = "^(?!.*cma(\\s|\\d)).*$"
[shared_columns] [shared_columns]
contract_number = { GP = "Transaction Description", OB = "Contract"} contract_number = { GP = "Transaction Description", OB = "Contract"}
onhold_amount = { GP = "Current Trx Amount", OB = "CurrentOnHold" } onhold_amount = { GP = "Current Trx Amount", OB = "CurrentOnHold" }
vendor_name = { GP = "Vendor Name", OB = "DealerName"} vendor_name = { GP = "Vendor Name", OB = "DealerName"}

@ -4,8 +4,8 @@ then utilizes the reconcile module to find the differences between them. The out
saved as an excel file with todays date. saved as an excel file with todays date.
""" """
# Custom module for reconciliation # Custom module for reconciliation
from helpers import setup_logging, find_most_recent_file, check_sheet from helpers import setup_logging, find_most_recent_file
from models import OnBaseReport, GreatPlainsReport from reports import OnBaseReport, GreatPlainsReport
import pandas as pd import pandas as pd
from pandas import DataFrame from pandas import DataFrame
@ -15,6 +15,9 @@ import logging
from tomllib import load from tomllib import load
import logging.config import logging.config
from datetime import datetime as dt from datetime import datetime as dt
from openpyxl import load_workbook, Workbook
import pathlib
from pathlib import Path
""" """
[ ] Pull in past reconciliations to check against [ ] Pull in past reconciliations to check against
@ -106,11 +109,24 @@ def main() -> int:
no_match, amt_mismatch = obr.reconcile(gpr) no_match, amt_mismatch = obr.reconcile(gpr)
# Write the results to a new Excel file # Write the results to a new Excel file
with pd.ExcelWriter(f"Work/Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx", mode='w') as writer: output_name: Path = Path(f"Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx")
no_match.to_excel(writer, sheet_name="No Match", index=False) output_path: Path = Path("./Work", output_name)
amt_mismatch.to_excel(writer, sheet_name="Amount Mismatch", index=False) with pd.ExcelWriter(output_path, mode='w') as writer:
no_match.to_excel(writer, sheet_name="No Match",
index=False, freeze_panes=(1,3)
)
amt_mismatch.to_excel(writer, sheet_name="Amount Mismatch",
index=False, freeze_panes=(1,3)
)
overdue.to_excel(writer, sheet_name="Overdue", index=False) overdue.to_excel(writer, sheet_name="Overdue", index=False)
wb: Workbook = load_workbook(output_path)
for sheet in ["No Match", "Amount Mismatch"]:
ws = wb[sheet]
ws.column_dimensions['A'].hidden = True
ws.column_dimensions['B'].hidden = True
wb.save(output_path)
return 0 return 0

@ -7,99 +7,64 @@ resolved holds.
*Last Updated: version 1.3 *Last Updated: version 1.3
""" """
from . import drop_unnamed from helpers import drop_unnamed, setup_logging
from ghlib.database.database_manager import SQLiteManager from ghlib.database.database_manager import SQLiteManager
from pandas import DataFrame, Series, read_sql_query, read_excel, concat from pandas import DataFrame, Series, read_sql_query, read_excel, concat
from logging import getLogger from logging import getLogger
from dataclasses import dataclass
from hashlib import md5
setup_logging()
logger = getLogger(__name__) logger = getLogger(__name__)
def normalize_cols(df: DataFrame) -> DataFrame: def hash_cols(row: Series, cols_to_hash: list[str]) -> str:
""" md5_hash = md5()
md5_hash.update((''.join(row[col] for col in cols_to_hash)).encode('utf-8'))
""" return md5_hash.hexdigest()
def process_resolutions(df: DataFrame) -> DataFrame:
""" def save_rec(resolved_dataframes: list[DataFrame]):
"""
# Drop unnamed columns:
drop_unnamed(df) # Works 'inplace'
# Drop anything where resolution is blanks
df: DataFrame = df[~df["Resolution"].isnull()]
# Standardize the resolution
df["Resolution"] = df["Resolution"].astype(str)
df["Resolution"] = df["Resolution"].apply(lambda res: res.strip().lower())
# Check for multiple 'onhold_amount' columns
cols: list[str] = list(df.keys())
mult_amounts: bool = True if "onhold_amount_ob" in cols else False
if mult_amounts:
# Create duplicates with the other amounts
gp_amts: DataFrame = df[
["contract_number",
"onhold_amount_gp",
"Resolution",
"Notes"
]]
df = df[
["contract_number",
"onhold_amount_ob",
"Resolution",
"Notes"
]]
# Rename the amount columns and add the source
gp_amts.rename(columns={"onhold_amount_gp":"onhold_amount"}, inplace=True)
gp_amts["Source"] = "GP"
df.rename(columns={"onhold_amount_ob":"onhold_amount"}, inplace=True)
df["Source"] = "OB"
# Combine them back together
df: DataFrame = concat([df, gp_amts])
df["Type"] = "AmountMismatch"
else:
# Filter columns
df = df[
["Source",
"contract_number",
"onhold_amount",
"Resolution",
"Notes"
]]
df["Type"] = "NoMatch"
return df
def save_recs(resolved_dataframes: list[DataFrame]):
""" """
#TODO Actually handle this...
""" """
#raise NotImplementedError("You were too lazy to fix this after the rewrite. FIX PLZ!")
sqlManager: SQLiteManager = SQLiteManager("OnHold.db") sqlManager: SQLiteManager = SQLiteManager("OnHold.db")
with sqlManager.get_session() as session: with sqlManager.get_session() as session:
conn = session.connection() conn = session.connection()
df: DataFrame
for df in resolved_dataframes:
try:
# Drop uneeded columns and filter only to resolved data
df = process_resolutions(df)
# Save to the database
df.to_sql("Resolutions", conn, if_exists="append")
except Exception as e:
logger.exception(f"Could not save resolution dataframe: {e}")
continue
rdf: DataFrame
for rdf in resolved_dataframes:
cols: list[str] = rdf.columns.to_list()
if "onhold_amount" in cols:
logger.debug(f"Found 'onhold_amount' in rdf: no_match dataframe")
# Split the on_hold col to normalize with amount mismatch
rdf["onhold_amount_GP"] = rdf.apply(lambda row:
row.onhold_amount if row.Source == "GP" else None
)
rdf["onhold_amount_OB"] = rdf.apply(lambda row:
row.onhold_amount if row.Source == "OB" else None
)
else:
logger.debug(f"No 'onhold_amount' col found in rdf: amount_mismatch dataframe")
# Create a unified column for index
rdf["Indentifier"] = rdf.apply(lambda row:
hash_cols(row, ["ID_OB","ID_GP"]), axis=1
)
rec_cols: list[str] = [
"Indentifier",
"ID_GP",
"ID_OB",
"Hide Next Month",
"Resolution"
]
def get_prev_reconciled(contracts: list[str]) -> DataFrame: def get_prev_reconciled(contracts: list[str]) -> DataFrame:
""" """
@ -117,13 +82,13 @@ def get_prev_reconciled(contracts: list[str]) -> DataFrame:
# Create a temp table to hold this batches contract numbers # Create a temp table to hold this batches contract numbers
# this table will be cleared when sqlManager goes out of scope # this table will be cleared when sqlManager goes out of scope
temp_table_statement = """ temp_table_statement = """
CREATE TEMPORARY TABLE CUR_CONTRACTS (contract_numbers VARCHAR(11)); CREATE TEMPORARY TABLE CUR_CONTRACTS (contract_number VARCHAR(11));
""" """
sqlManager.execute(temp_table_statement) sqlManager.execute(temp_table_statement)
# Insert the current contracts into the temp table # Insert the current contracts into the temp table
insert_contracts = f""" insert_contracts = f"""
INSERT INTO CUR_CONTRACTS (contract_numbers) VALUES INSERT INTO CUR_CONTRACTS (contract_number) VALUES
{', '.join([f"('{cn}')" for cn in contracts])}; {', '.join([f"('{cn}')" for cn in contracts])};
""" """
sqlManager.execute(insert_contracts) sqlManager.execute(insert_contracts)
@ -141,6 +106,8 @@ def get_prev_reconciled(contracts: list[str]) -> DataFrame:
if __name__ == "__main__": if __name__ == "__main__":
import argparse import argparse
from logging import DEBUG
logger.setLevel(DEBUG)
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
prog="HoldReconcilerRecord", prog="HoldReconcilerRecord",
@ -153,4 +120,4 @@ if __name__ == "__main__":
# Amount Mismatch # Amount Mismatch
amt_mm: DataFrame = read_excel(args.input, sheet_name="Amount Mismatch") amt_mm: DataFrame = read_excel(args.input, sheet_name="Amount Mismatch")
save_recs(resolved_dataframes=[no_match, amt_mm]) save_rec(resolved_dataframes=[no_match, amt_mm])

@ -1,12 +1,14 @@
from pandas import DataFrame, merge, to_datetime, NaT from pandas import DataFrame, merge, to_datetime, NaT, concat, Series
from numpy import concatenate from numpy import concatenate
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from logging import getLogger from logging import getLogger
import re import re
from typing import Literal from typing import Literal
import datetime import datetime
from copy import deepcopy
from helpers import CN_REGEX from helpers import CN_REGEX, drop_unnamed
from memory import get_prev_reconciled
logger = getLogger(__name__) logger = getLogger(__name__)
@ -17,8 +19,11 @@ class HoldReport(ABC):
def __init__(self, dataframe: DataFrame, reports_config: dict) -> None: def __init__(self, dataframe: DataFrame, reports_config: dict) -> None:
self.config = reports_config self.config = reports_config
drop_unnamed(dataframe)
self.df = dataframe self.df = dataframe
self.prev_rec = None
self._normalize() self._normalize()
self._previsouly_resolved()
def _normalize(self): def _normalize(self):
@ -45,37 +50,88 @@ class HoldReport(ABC):
self.df["Source"] = self.source self.df["Source"] = self.source
def _get_no_match(self, other: 'HoldReport'): def _previsouly_resolved(self):
# Merge the two DataFrames using the contract number as the join key """
outer_merge = merge( """
self.df, other.df, current_contracts: list[str] = self.df["contract_number"]
how="outer",
on=["contract_number"], prev_recd: DataFrame = get_prev_reconciled(contracts=current_contracts)
suffixes=('_'+self.source, '_'+other.source) if not prev_recd:
logger.info("No previously reconciled!")
self.df = self._add_work_columns(self.df)
return
self.prev_rec = prev_recd
start_size = self.df.shape[0]
logger.debug(f"Report DF: \n{self.df}")
logger.debug(f"prev_rec: \n{prev_recd}")
source_id = f"ID_{self.source}"
self.df[source_id] = self.df["ID"]
self.df = merge(
self.df,
prev_recd,
how="left",
on= source_id,
suffixes=("_cur", "_prev")
) )
#self.df.to_excel(f"merged_df_{self.source}.xlsx")
# Drop anything that should be ignored
self.df = self.df[self.df["Hide Next Month"] != True]
logger.info(f"Prev res added:\n{self.df}")
col_to_drop = []
for c in self.df.keys().to_list():
logger.debug(f"{c=}")
if "_prev" in c or "ID_" in c:
logger.debug(f"Found '_prev' in {c}")
col_to_drop.append(c)
else:
logger.debug(f"{c} is a good col!")
#col_to_drop.extend([c for c in self.df.keys().to_list() if '_prev' in c])
logger.debug(f"{col_to_drop=}")
self.df.drop(
columns= col_to_drop,
inplace=True
)
# Restandardize
self.df.rename(columns={"contract_number_cur": "contract_number"}, inplace=True)
end_size = self.df.shape[0]
logger.info(f"Reduced df by {start_size-end_size}")
# Filter the merged DataFrame to include only the transactions that do not have a match in both OBT and GPT def _remove_full_matches(self, other: 'HoldReport'):
no_match = outer_merge.loc[ """
(outer_merge[f"Source_{self.source}"].isna()) | Removes any contracts that match both contract number and hold amount.
(outer_merge[f"Source_{other.source}"].isna()) These do not need to be reconciled.
]
# Fill in missing values and drop unnecessary columns This id done 'in place' to both dataframes
no_match["Source"] = no_match[f"Source_{self.source}"].fillna("GP") """
no_match["onhold_amount"] = no_match[f"onhold_amount_{self.source}"].fillna( filter_id_match: DataFrame = self.df[~(self.df["ID"].isin(other.df["ID"]))]
no_match[f"onhold_amount_{other.source}"] other.df: DataFrame = other.df[~(other.df["ID"].isin(self.df["ID"]))]
) self.df = filter_id_match
no_match["vendor_name"] = no_match[f"vendor_name_{self.source}"].fillna( self.combined_missing: DataFrame = concat([self.df, other.df], ignore_index=True)
no_match[f"vendor_name_{other.source}"] self.combined_missing.to_excel("ALL MISSING.xlsx")
logger.debug(f"Combined Missing:\n{self.combined_missing}")
logger.info(f"Payments with errors: {self.combined_missing.shape[0]}")
@staticmethod
def _created_combined_col(column: str, target_df: DataFrame, sources: tuple[str, str]) -> DataFrame :
"""
Creates a new column by filling empty columns of this source, with the matching column from another source
"""
this, that = sources
target_df[column] = target_df[f"{column}_{this}"].fillna(
target_df[f"{column}_{that}"]
) )
return target_df
return no_match
def _get_contract_matches(self, other: 'HoldReport') -> DataFrame: def _requires_rec(self, other: 'HoldReport') -> DataFrame:
""" """
To be run after full matches have been re
""" """
# Merge the two filtered DataFrames on the contract number # Merge the two filtered DataFrames on the contract number
contract_match = merge( contract_match = merge(
self.df, other.df, self.df, other.df,
@ -84,47 +140,76 @@ class HoldReport(ABC):
suffixes=('_'+self.source, '_'+other.source) suffixes=('_'+self.source, '_'+other.source)
) )
contract_match["vendor_name"] = contract_match[f"vendor_name_{self.source}"].fillna( #contract_match.to_excel("CONTRACT_MATCH.xlsx")
contract_match[f"vendor_name_{other.source}"]
) for col in ["vendor_name", "Resolution", "Notes"]:
self._created_combined_col(col, contract_match, (self.source, other.source))
return contract_match logger.debug(f"_requires_rec | contract_match:\n{contract_match.columns} ({contract_match.shape})")
no_match: DataFrame = self.combined_missing[~(
self.combined_missing["contract_number"].isin(
contract_match["contract_number"]
))
]
no_match[f"ID_{self.source}"] = no_match.apply(lambda row:
row["ID"] if row["Source"] == self.source else None
, axis=1)
no_match[f"ID_{other.source}"] = no_match.apply(lambda row:
row["ID"] if row["Source"] == other.source else None
, axis=1)
logger.debug(f"_requires_rec | no_match:\n{no_match.columns} ({no_match.shape})")
return contract_match, no_match
@staticmethod @staticmethod
def _add_work_columns(df: DataFrame) -> DataFrame: def _add_work_columns(df: DataFrame) -> DataFrame:
""" """
Add empty columns to the dataframe to faciliate working through the report. Add empty columns to the dataframe to faciliate working through the report.
""" """
WORK_COLS = ["Resolution", "Notes"] logger.debug("Adding work columns!")
df_cols: list[str] = df.columns.to_list()
WORK_COLS = ["Hide Next Month","Resolution"]
for col in WORK_COLS: for col in WORK_COLS:
df[col] = '' if col not in df_cols:
df[col] = ''
return df return df
def reconcile(self, other: 'HoldReport') -> tuple[DataFrame]: def reconcile(self, other: 'HoldReport') -> tuple[DataFrame]:
""" """
""" """
no_match: DataFrame = self._get_no_match(other) self._remove_full_matches(other)
no_match.to_excel("NOMATCH.xlsx") all_prev_reced = concat([self.prev_rec, other.prev_rec],ignore_index=True)
logger.debug(f"No_match: {no_match}") logger.debug(f"Removed matches:\n{self.df}")
amount_mismatch: DataFrame = self._get_contract_matches(other) amount_mismatch, no_match = self._requires_rec(other)
amount_mismatch.to_excel("AMTMM.xlsx")
logger.debug(f"amt_mismatche: {no_match}") logger.debug(f"reconcile | no_match unaltered\n{no_match.columns} ({no_match.shape})")
logger.debug(f"reconcile | am_mm unaltered:\n{amount_mismatch.columns} ({amount_mismatch.shape})")
columns: list[str] = ["ID_GP", "ID_OB"]
columns.extend(self.config["output_columns"])
nm_cols:list[str] = deepcopy(columns)
nm_cols.insert(3,"onhold_amount")
nm_cols.insert(4,"Source")
columns.insert(3,"onhold_amount_GP")
columns.insert(4, "onhold_amount_OB")
# Select and reorder columns # Select and reorder columns
no_match = no_match[ no_match = no_match[
["Source"].extend(self.config["output_columns"]) nm_cols
] ]
no_match = self._add_work_columns(no_match)
amount_mismatch = amount_mismatch[ amount_mismatch = amount_mismatch[
self.config["output_columns"] columns
] ]
amount_mismatch = self._add_work_columns(amount_mismatch) logger.info(f"no_match: {no_match.shape[0]}")
logger.info(f"am_mm: {amount_mismatch.shape[0]}")
return no_match, amount_mismatch return no_match, amount_mismatch
class OnBaseReport(HoldReport): class OnBaseReport(HoldReport):
@ -134,16 +219,14 @@ class OnBaseReport(HoldReport):
def get_overdue(self) -> DataFrame: def get_overdue(self) -> DataFrame:
""" """
""" """
self.df["install_date"] = to_datetime(self.df["install_date"]) self.df["InstallDate"] = to_datetime(self.df["InstallDate"])
self.df["install_date"].fillna(NaT, inplace=True) self.df["InstallDate"].fillna(NaT, inplace=True)
return self.df[self.df["install_date"].dt.date < datetime.date.today()] return self.df[self.df["InstallDate"].dt.date < datetime.date.today()]
class GreatPlainsReport(HoldReport): class GreatPlainsReport(HoldReport):
source = "GP" source = "GP"
filted_df: bool = False
def __init__(self, dataframe: DataFrame, report_config: dict) -> None: def __init__(self, dataframe: DataFrame, report_config: dict) -> None:
@ -185,4 +268,4 @@ class GreatPlainsReport(HoldReport):
rows_to_drop = gp_report_df[remove_mask].index rows_to_drop = gp_report_df[remove_mask].index
gp_report_df.drop(rows_to_drop, inplace=True) gp_report_df.drop(rows_to_drop, inplace=True)
return gp_report_df return gp_report_df
Loading…
Cancel
Save