Reworked how the reconcilation is done: filter gp -> remove prev ->

remove full match -> get contract match -> remaining = no match
Changed how the memory cols work. Not finished
dev
= 3 years ago
parent 7ad4f76943
commit 9ad5e9180c
Signed by untrusted user who does not match committer: gprog
GPG Key ID: 5BE9BB58D37713F8
  1. 1
      .gitignore
  2. 17
      config_reports.toml
  3. 28
      hold_reconciler.py
  4. 129
      memory.py
  5. 189
      reports.py

1
.gitignore vendored

@ -3,6 +3,7 @@ venv/
work/
build/
dist/
ghlib/
*.log
*.xlsx

@ -1,13 +1,14 @@
output_columns = [
"contract_number",
"vendor_name",
"AppNum", # OB only
"DateBooked", # OB only
"Document Number"# GP Only
"AppNum", # OB only
"DateBooked", # OB only
"Document Number",# GP Only
"Resolution",
"Notes"
# 'Source' added for 'no match'
]
[gp_filters]
# These regex will be combined and with ORs and used to filer
# the document number column of the GP report
@ -23,12 +24,8 @@ output_columns = [
"cma"
]
po_filter = "^(?!.*cma(\\s|\\d)).*$"
[shared_columns]
contract_number = { GP = "Transaction Description", OB = "Contract"}
onhold_amount = { GP = "Current Trx Amount", OB = "CurrentOnHold" }
vendor_name = { GP = "Vendor Name", OB = "DealerName"}
vendor_name = { GP = "Vendor Name", OB = "DealerName"}

@ -4,8 +4,8 @@ then utilizes the reconcile module to find the differences between them. The out
saved as an excel file with todays date.
"""
# Custom module for reconciliation
from helpers import setup_logging, find_most_recent_file, check_sheet
from models import OnBaseReport, GreatPlainsReport
from helpers import setup_logging, find_most_recent_file
from reports import OnBaseReport, GreatPlainsReport
import pandas as pd
from pandas import DataFrame
@ -15,6 +15,9 @@ import logging
from tomllib import load
import logging.config
from datetime import datetime as dt
from openpyxl import load_workbook, Workbook
import pathlib
from pathlib import Path
"""
[ ] Pull in past reconciliations to check against
@ -106,11 +109,24 @@ def main() -> int:
no_match, amt_mismatch = obr.reconcile(gpr)
# Write the results to a new Excel file
with pd.ExcelWriter(f"Work/Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx", mode='w') as writer:
no_match.to_excel(writer, sheet_name="No Match", index=False)
amt_mismatch.to_excel(writer, sheet_name="Amount Mismatch", index=False)
output_name: Path = Path(f"Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx")
output_path: Path = Path("./Work", output_name)
with pd.ExcelWriter(output_path, mode='w') as writer:
no_match.to_excel(writer, sheet_name="No Match",
index=False, freeze_panes=(1,3)
)
amt_mismatch.to_excel(writer, sheet_name="Amount Mismatch",
index=False, freeze_panes=(1,3)
)
overdue.to_excel(writer, sheet_name="Overdue", index=False)
wb: Workbook = load_workbook(output_path)
for sheet in ["No Match", "Amount Mismatch"]:
ws = wb[sheet]
ws.column_dimensions['A'].hidden = True
ws.column_dimensions['B'].hidden = True
wb.save(output_path)
return 0

@ -7,99 +7,64 @@ resolved holds.
*Last Updated: version 1.3
"""
from . import drop_unnamed
from helpers import drop_unnamed, setup_logging
from ghlib.database.database_manager import SQLiteManager
from pandas import DataFrame, Series, read_sql_query, read_excel, concat
from logging import getLogger
from dataclasses import dataclass
from hashlib import md5
setup_logging()
logger = getLogger(__name__)
def normalize_cols(df: DataFrame) -> DataFrame:
"""
"""
def hash_cols(row: Series, cols_to_hash: list[str]) -> str:
md5_hash = md5()
md5_hash.update((''.join(row[col] for col in cols_to_hash)).encode('utf-8'))
return md5_hash.hexdigest()
def process_resolutions(df: DataFrame) -> DataFrame:
"""
"""
# Drop unnamed columns:
drop_unnamed(df) # Works 'inplace'
# Drop anything where resolution is blanks
df: DataFrame = df[~df["Resolution"].isnull()]
# Standardize the resolution
df["Resolution"] = df["Resolution"].astype(str)
df["Resolution"] = df["Resolution"].apply(lambda res: res.strip().lower())
# Check for multiple 'onhold_amount' columns
cols: list[str] = list(df.keys())
mult_amounts: bool = True if "onhold_amount_ob" in cols else False
if mult_amounts:
# Create duplicates with the other amounts
gp_amts: DataFrame = df[
["contract_number",
"onhold_amount_gp",
"Resolution",
"Notes"
]]
df = df[
["contract_number",
"onhold_amount_ob",
"Resolution",
"Notes"
]]
# Rename the amount columns and add the source
gp_amts.rename(columns={"onhold_amount_gp":"onhold_amount"}, inplace=True)
gp_amts["Source"] = "GP"
df.rename(columns={"onhold_amount_ob":"onhold_amount"}, inplace=True)
df["Source"] = "OB"
# Combine them back together
df: DataFrame = concat([df, gp_amts])
df["Type"] = "AmountMismatch"
else:
# Filter columns
df = df[
["Source",
"contract_number",
"onhold_amount",
"Resolution",
"Notes"
]]
df["Type"] = "NoMatch"
return df
def save_recs(resolved_dataframes: list[DataFrame]):
def save_rec(resolved_dataframes: list[DataFrame]):
"""
#TODO Actually handle this...
"""
#raise NotImplementedError("You were too lazy to fix this after the rewrite. FIX PLZ!")
sqlManager: SQLiteManager = SQLiteManager("OnHold.db")
with sqlManager.get_session() as session:
conn = session.connection()
df: DataFrame
for df in resolved_dataframes:
try:
# Drop uneeded columns and filter only to resolved data
df = process_resolutions(df)
# Save to the database
df.to_sql("Resolutions", conn, if_exists="append")
except Exception as e:
logger.exception(f"Could not save resolution dataframe: {e}")
continue
rdf: DataFrame
for rdf in resolved_dataframes:
cols: list[str] = rdf.columns.to_list()
if "onhold_amount" in cols:
logger.debug(f"Found 'onhold_amount' in rdf: no_match dataframe")
# Split the on_hold col to normalize with amount mismatch
rdf["onhold_amount_GP"] = rdf.apply(lambda row:
row.onhold_amount if row.Source == "GP" else None
)
rdf["onhold_amount_OB"] = rdf.apply(lambda row:
row.onhold_amount if row.Source == "OB" else None
)
else:
logger.debug(f"No 'onhold_amount' col found in rdf: amount_mismatch dataframe")
# Create a unified column for index
rdf["Indentifier"] = rdf.apply(lambda row:
hash_cols(row, ["ID_OB","ID_GP"]), axis=1
)
rec_cols: list[str] = [
"Indentifier",
"ID_GP",
"ID_OB",
"Hide Next Month",
"Resolution"
]
def get_prev_reconciled(contracts: list[str]) -> DataFrame:
"""
@ -117,13 +82,13 @@ def get_prev_reconciled(contracts: list[str]) -> DataFrame:
# Create a temp table to hold this batches contract numbers
# this table will be cleared when sqlManager goes out of scope
temp_table_statement = """
CREATE TEMPORARY TABLE CUR_CONTRACTS (contract_numbers VARCHAR(11));
CREATE TEMPORARY TABLE CUR_CONTRACTS (contract_number VARCHAR(11));
"""
sqlManager.execute(temp_table_statement)
# Insert the current contracts into the temp table
insert_contracts = f"""
INSERT INTO CUR_CONTRACTS (contract_numbers) VALUES
INSERT INTO CUR_CONTRACTS (contract_number) VALUES
{', '.join([f"('{cn}')" for cn in contracts])};
"""
sqlManager.execute(insert_contracts)
@ -141,6 +106,8 @@ def get_prev_reconciled(contracts: list[str]) -> DataFrame:
if __name__ == "__main__":
import argparse
from logging import DEBUG
logger.setLevel(DEBUG)
parser = argparse.ArgumentParser(
prog="HoldReconcilerRecord",
@ -153,4 +120,4 @@ if __name__ == "__main__":
# Amount Mismatch
amt_mm: DataFrame = read_excel(args.input, sheet_name="Amount Mismatch")
save_recs(resolved_dataframes=[no_match, amt_mm])
save_rec(resolved_dataframes=[no_match, amt_mm])

@ -1,12 +1,14 @@
from pandas import DataFrame, merge, to_datetime, NaT
from pandas import DataFrame, merge, to_datetime, NaT, concat, Series
from numpy import concatenate
from abc import ABC, abstractmethod
from logging import getLogger
import re
from typing import Literal
import datetime
from copy import deepcopy
from helpers import CN_REGEX
from helpers import CN_REGEX, drop_unnamed
from memory import get_prev_reconciled
logger = getLogger(__name__)
@ -17,8 +19,11 @@ class HoldReport(ABC):
def __init__(self, dataframe: DataFrame, reports_config: dict) -> None:
self.config = reports_config
drop_unnamed(dataframe)
self.df = dataframe
self.prev_rec = None
self._normalize()
self._previsouly_resolved()
def _normalize(self):
@ -45,37 +50,88 @@ class HoldReport(ABC):
self.df["Source"] = self.source
def _get_no_match(self, other: 'HoldReport'):
# Merge the two DataFrames using the contract number as the join key
outer_merge = merge(
self.df, other.df,
how="outer",
on=["contract_number"],
suffixes=('_'+self.source, '_'+other.source)
def _previsouly_resolved(self):
"""
"""
current_contracts: list[str] = self.df["contract_number"]
prev_recd: DataFrame = get_prev_reconciled(contracts=current_contracts)
if not prev_recd:
logger.info("No previously reconciled!")
self.df = self._add_work_columns(self.df)
return
self.prev_rec = prev_recd
start_size = self.df.shape[0]
logger.debug(f"Report DF: \n{self.df}")
logger.debug(f"prev_rec: \n{prev_recd}")
source_id = f"ID_{self.source}"
self.df[source_id] = self.df["ID"]
self.df = merge(
self.df,
prev_recd,
how="left",
on= source_id,
suffixes=("_cur", "_prev")
)
#self.df.to_excel(f"merged_df_{self.source}.xlsx")
# Drop anything that should be ignored
self.df = self.df[self.df["Hide Next Month"] != True]
logger.info(f"Prev res added:\n{self.df}")
col_to_drop = []
for c in self.df.keys().to_list():
logger.debug(f"{c=}")
if "_prev" in c or "ID_" in c:
logger.debug(f"Found '_prev' in {c}")
col_to_drop.append(c)
else:
logger.debug(f"{c} is a good col!")
#col_to_drop.extend([c for c in self.df.keys().to_list() if '_prev' in c])
logger.debug(f"{col_to_drop=}")
self.df.drop(
columns= col_to_drop,
inplace=True
)
# Restandardize
self.df.rename(columns={"contract_number_cur": "contract_number"}, inplace=True)
end_size = self.df.shape[0]
logger.info(f"Reduced df by {start_size-end_size}")
# Filter the merged DataFrame to include only the transactions that do not have a match in both OBT and GPT
no_match = outer_merge.loc[
(outer_merge[f"Source_{self.source}"].isna()) |
(outer_merge[f"Source_{other.source}"].isna())
]
def _remove_full_matches(self, other: 'HoldReport'):
"""
Removes any contracts that match both contract number and hold amount.
These do not need to be reconciled.
# Fill in missing values and drop unnecessary columns
no_match["Source"] = no_match[f"Source_{self.source}"].fillna("GP")
no_match["onhold_amount"] = no_match[f"onhold_amount_{self.source}"].fillna(
no_match[f"onhold_amount_{other.source}"]
)
no_match["vendor_name"] = no_match[f"vendor_name_{self.source}"].fillna(
no_match[f"vendor_name_{other.source}"]
This id done 'in place' to both dataframes
"""
filter_id_match: DataFrame = self.df[~(self.df["ID"].isin(other.df["ID"]))]
other.df: DataFrame = other.df[~(other.df["ID"].isin(self.df["ID"]))]
self.df = filter_id_match
self.combined_missing: DataFrame = concat([self.df, other.df], ignore_index=True)
self.combined_missing.to_excel("ALL MISSING.xlsx")
logger.debug(f"Combined Missing:\n{self.combined_missing}")
logger.info(f"Payments with errors: {self.combined_missing.shape[0]}")
@staticmethod
def _created_combined_col(column: str, target_df: DataFrame, sources: tuple[str, str]) -> DataFrame :
"""
Creates a new column by filling empty columns of this source, with the matching column from another source
"""
this, that = sources
target_df[column] = target_df[f"{column}_{this}"].fillna(
target_df[f"{column}_{that}"]
)
return no_match
return target_df
def _get_contract_matches(self, other: 'HoldReport') -> DataFrame:
def _requires_rec(self, other: 'HoldReport') -> DataFrame:
"""
To be run after full matches have been re
"""
# Merge the two filtered DataFrames on the contract number
contract_match = merge(
self.df, other.df,
@ -84,47 +140,76 @@ class HoldReport(ABC):
suffixes=('_'+self.source, '_'+other.source)
)
contract_match["vendor_name"] = contract_match[f"vendor_name_{self.source}"].fillna(
contract_match[f"vendor_name_{other.source}"]
)
return contract_match
#contract_match.to_excel("CONTRACT_MATCH.xlsx")
for col in ["vendor_name", "Resolution", "Notes"]:
self._created_combined_col(col, contract_match, (self.source, other.source))
logger.debug(f"_requires_rec | contract_match:\n{contract_match.columns} ({contract_match.shape})")
no_match: DataFrame = self.combined_missing[~(
self.combined_missing["contract_number"].isin(
contract_match["contract_number"]
))
]
no_match[f"ID_{self.source}"] = no_match.apply(lambda row:
row["ID"] if row["Source"] == self.source else None
, axis=1)
no_match[f"ID_{other.source}"] = no_match.apply(lambda row:
row["ID"] if row["Source"] == other.source else None
, axis=1)
logger.debug(f"_requires_rec | no_match:\n{no_match.columns} ({no_match.shape})")
return contract_match, no_match
@staticmethod
def _add_work_columns(df: DataFrame) -> DataFrame:
"""
Add empty columns to the dataframe to faciliate working through the report.
"""
WORK_COLS = ["Resolution", "Notes"]
logger.debug("Adding work columns!")
df_cols: list[str] = df.columns.to_list()
WORK_COLS = ["Hide Next Month","Resolution"]
for col in WORK_COLS:
df[col] = ''
if col not in df_cols:
df[col] = ''
return df
def reconcile(self, other: 'HoldReport') -> tuple[DataFrame]:
"""
"""
no_match: DataFrame = self._get_no_match(other)
no_match.to_excel("NOMATCH.xlsx")
logger.debug(f"No_match: {no_match}")
self._remove_full_matches(other)
all_prev_reced = concat([self.prev_rec, other.prev_rec],ignore_index=True)
logger.debug(f"Removed matches:\n{self.df}")
amount_mismatch: DataFrame = self._get_contract_matches(other)
amount_mismatch.to_excel("AMTMM.xlsx")
logger.debug(f"amt_mismatche: {no_match}")
amount_mismatch, no_match = self._requires_rec(other)
logger.debug(f"reconcile | no_match unaltered\n{no_match.columns} ({no_match.shape})")
logger.debug(f"reconcile | am_mm unaltered:\n{amount_mismatch.columns} ({amount_mismatch.shape})")
columns: list[str] = ["ID_GP", "ID_OB"]
columns.extend(self.config["output_columns"])
nm_cols:list[str] = deepcopy(columns)
nm_cols.insert(3,"onhold_amount")
nm_cols.insert(4,"Source")
columns.insert(3,"onhold_amount_GP")
columns.insert(4, "onhold_amount_OB")
# Select and reorder columns
no_match = no_match[
["Source"].extend(self.config["output_columns"])
nm_cols
]
no_match = self._add_work_columns(no_match)
amount_mismatch = amount_mismatch[
self.config["output_columns"]
columns
]
amount_mismatch = self._add_work_columns(amount_mismatch)
logger.info(f"no_match: {no_match.shape[0]}")
logger.info(f"am_mm: {amount_mismatch.shape[0]}")
return no_match, amount_mismatch
class OnBaseReport(HoldReport):
@ -134,16 +219,14 @@ class OnBaseReport(HoldReport):
def get_overdue(self) -> DataFrame:
"""
"""
self.df["install_date"] = to_datetime(self.df["install_date"])
self.df["install_date"].fillna(NaT, inplace=True)
return self.df[self.df["install_date"].dt.date < datetime.date.today()]
self.df["InstallDate"] = to_datetime(self.df["InstallDate"])
self.df["InstallDate"].fillna(NaT, inplace=True)
return self.df[self.df["InstallDate"].dt.date < datetime.date.today()]
class GreatPlainsReport(HoldReport):
source = "GP"
filted_df: bool = False
def __init__(self, dataframe: DataFrame, report_config: dict) -> None:
@ -185,4 +268,4 @@ class GreatPlainsReport(HoldReport):
rows_to_drop = gp_report_df[remove_mask].index
gp_report_df.drop(rows_to_drop, inplace=True)
return gp_report_df
return gp_report_df
Loading…
Cancel
Save