Compare commits

...

2 Commits

Author SHA1 Message Date
= 9ad5e9180c
Reworked how the reconcilation is done: filter gp -> remove prev -> 3 years ago
= 7ad4f76943
Reworked the report system to use classes for each report type. 3 years ago
  1. 9
      .gitignore
  2. 0
      __init__.py
  3. 53
      config.toml
  4. 22
      config_logger.toml
  5. 31
      config_reports.toml
  6. 90
      helpers.py
  7. 136
      hold_reconciler.py
  8. 123
      memory.py
  9. 251
      rec_lib.py
  10. 21
      rec_records.py
  11. 191
      reconcile_holds.py
  12. 271
      reports.py
  13. 1
      version.txt

9
.gitignore vendored

@ -3,5 +3,12 @@ venv/
work/
build/
dist/
ghlib/
*.log
*.log
*.xlsx
*.csv
*.db
*.txt
!version.txt

@ -1,53 +0,0 @@
write_dir = "Work"
DocNumFilter = [
"p(oin)?ts",
"pool",
"promo",
"o(ver)?f(und)?",
"m(ar)?ke?t",
"title",
"adj",
"reg free",
"cma"
]
[ExcelColumns]
[ExcelColumns.OB]
contract_number = "Contract" # 3070508-007
onhold_amount = "CurrentOnHold"
install_date = "InstallDate"
[ExcelColumns.GP]
contract_number = "Transaction Description" # 1234-56789
onhold_amount = "Current Trx Amount"
doc_num = "Document Number" # 1-316141 HOLD
pur_order = "Purchase Order Number" # ABC123
doc_type = "Document Type" # Invoice or Credit Memo
[logger]
version = 1
disable_existing_loggers = false
[logger.formatters.custom]
format = "'%(asctime)s - %(module)s - %(levelname)s - %(message)s'"
[logger.handlers.console]
class = "logging.StreamHandler"
level = "DEBUG"
formatter = "custom"
stream = "ext://sys.stdout"
[logger.handlers.file]
class = "logging.FileHandler"
level = "DEBUG"
formatter = "custom"
filename = "on_hold.log"
[logger.root]
level = "DEBUG"
handlers = ["console", "file"]

@ -0,0 +1,22 @@
version = 1
disable_existing_loggers = false
[formatters.custom]
format = "'%(asctime)s - %(module)s - %(levelname)s - %(message)s'"
[handlers.console]
class = "logging.StreamHandler"
level = "DEBUG"
formatter = "custom"
stream = "ext://sys.stdout"
[handlers.file]
class = "logging.FileHandler"
level = "DEBUG"
formatter = "custom"
filename = "on_hold.log"
[root]
level = "DEBUG"
handlers = ["console", "file"]

@ -0,0 +1,31 @@
output_columns = [
"contract_number",
"vendor_name",
"AppNum", # OB only
"DateBooked", # OB only
"Document Number",# GP Only
"Resolution",
"Notes"
# 'Source' added for 'no match'
]
[gp_filters]
# These regex will be combined and with ORs and used to filer
# the document number column of the GP report
doc_num_filters = [
"p(oin)?ts",
"pool",
"promo",
"o(ver)?f(und)?",
"m(ar)?ke?t",
"title",
"adj",
"reg free",
"cma"
]
po_filter = "^(?!.*cma(\\s|\\d)).*$"
[shared_columns]
contract_number = { GP = "Transaction Description", OB = "Contract"}
onhold_amount = { GP = "Current Trx Amount", OB = "CurrentOnHold" }
vendor_name = { GP = "Vendor Name", OB = "DealerName"}

@ -0,0 +1,90 @@
"""
Hold Reconciler is an application meant to help reconcile the differences in payments
that marked as on hold in Great Plains and OnBase.
It takes a report csv from OnBase and a report from GreatPlains and checks them
against each other. It attempts to make them based on contract number and payment
amount, or just the contract number.
It also does a lot of filtering for the Great Plains report to remove irrelevant data.
*Last Updated: version 1.3*
*Originally developed in Spring of 2023 by Griffiths Lott (g@glott.me)*
"""
import re
from re import Pattern
import os
from os.path import basename
import glob
import logging
from pathlib import Path
from tomllib import load
from pandas import DataFrame, Series
from typing import TypeVar, Literal
import logging.config
from logging import getLogger
logger = getLogger(__name__)
CN_REGEX = re.compile(r"\d{7}(-\d{3})?")
def setup_logging():
"""
Sets up logging configuration from the TOML file. If the logging configuration fails to be loaded from the file,
a default logging configuration is used instead.
Returns:
logging.Logger: The logger instance.
"""
with open("config_logger.toml", "rb") as f:
config_dict: dict = load(f)
try:
# Try to load logging configuration from the TOML file
logging.config.dictConfig(config_dict)
except Exception as e:
# If the logging configuration fails, use a default configuration and log the error
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logger.warning("Failed setting up logger!")
logger.exception(e)
logger.warning(f"Config:\n{config_dict}")
return logger
def drop_unnamed(df: DataFrame, inplace: bool = True) -> DataFrame|None:
"""
Drops all Unnamed columns from a dataframe.
### CAUTION : This function acts *inplace* by deafult
(on the orignal dataframe, not a copy!)
"""
cols = [c for c in df.columns if "Unnamed" in c]
return df.drop(cols, axis=1, inplace=inplace)
def find_most_recent_file(folder_path: Path, file_pattern: Pattern) -> str:
"""
Given a folder path and a regular expression pattern, this function returns the path of the most recently modified
file in the folder that matches the pattern.
Args:
folder_path (Path): A pathlib.Path object representing the folder to search.
file_pattern (Pattern): A regular expression pattern used to filter the files in the folder.
Returns:
str: The path of the most recently modified file in the folder that matches the pattern.
"""
# Find all files in the folder that match the pattern
files = glob.glob(f"{folder_path}/*")
logger.debug(f"files: {files}")
# Get the modification time of each file and filter to only those that match the pattern
file_times = [(os.path.getmtime(path), path) for path in files if re.match(file_pattern, basename(path))]
# Sort the files by modification time (most recent first)
file_times.sort(reverse=True)
logger.debug(f"file times: {file_times}")
# Return the path of the most recent file
return file_times[0][1]

@ -0,0 +1,136 @@
"""
This is the main entry point for this application. It find the newest reports (GP & OB)
then utilizes the reconcile module to find the differences between them. The output is
saved as an excel file with todays date.
"""
# Custom module for reconciliation
from helpers import setup_logging, find_most_recent_file
from reports import OnBaseReport, GreatPlainsReport
import pandas as pd
from pandas import DataFrame
import re
from re import Pattern
import logging
from tomllib import load
import logging.config
from datetime import datetime as dt
from openpyxl import load_workbook, Workbook
import pathlib
from pathlib import Path
"""
[ ] Pull in past reconciliations to check against
[ ] Record reconciled transaction (connect with VBA)
[ ] Check GP against the database
[ ] Check OB against the database
[X] Add resolution column to error sheets
[ ] Add sheet for problem contractas already seen and 'resolved'
"""
setup_logging()
logger = logging.getLogger(__name__)
logger.info(f"Logger started with level: {logger.level}")
def get_reports(work_dir: str, report_config: dict) -> tuple[pd.DataFrame|None, pd.DataFrame|None]:
"""
Given a dictionary of Excel configuration options, this function searches for the most recently modified GP and OB
Excel files in a "Work" folder and returns their corresponding dataframes.
Args:
excelConfig (dict): A dictionary containing configuration options for the GP and OB Excel files.
Returns:
tuple[pd.DataFrame|None, pd.DataFrame|None]: A tuple containing the OB and GP dataframes, respectively.
"""
# Define regular expression patterns to match the GP and OB Excel files
gp_regex: Pattern = re.compile(".*gp.*\.xlsx$", re.IGNORECASE)
ob_regex: Pattern = re.compile(".*ob.*\.xlsx$", re.IGNORECASE)
# Find the paths of the most recently modified GP and OB Excel files
gp_file_path = find_most_recent_file(work_dir, gp_regex)
logger.debug(f"gp_file_path: {gp_file_path}")
ob_file_path = find_most_recent_file(work_dir, ob_regex)
logger.debug(f"gp_file_path: {ob_file_path}")
# Read the GP and OB Excel files into dataframes and check that each dataframe has the required columns
gp_xl = pd.ExcelFile(gp_file_path)
gp_req_cols = [col["GP"] for _, col in report_config["shared_columns"].items()]
logger.debug(f"GP_Req_cols: {gp_req_cols}")
gp_sheets = gp_xl.sheet_names
gp_dfs = pd.read_excel(gp_xl, sheet_name=gp_sheets)
for sheet in gp_dfs:
sheet_columns: list[str] = list(gp_dfs[sheet].columns)
logger.debug(f"gp ({sheet}) : {sheet_columns}")
logger.debug(f"Matches {[r in sheet_columns for r in gp_req_cols]}")
if all([r in sheet_columns for r in gp_req_cols]):
logger.debug("FOUND")
gp_df = gp_dfs[sheet]
break
ob_xl = pd.ExcelFile(ob_file_path)
ob_req_cols = [col["OB"] for _, col in report_config["shared_columns"].items()]
ob_sheets = ob_xl.sheet_names
ob_dfs = pd.read_excel(ob_xl, sheet_name=ob_sheets)
for sheet in ob_dfs:
sheet_columns: list[str] = list(ob_dfs[sheet].columns)
if all([r in sheet_columns for r in ob_req_cols]):
ob_df = ob_dfs[sheet]
break
return ob_df, gp_df
def main() -> int:
"""
This is the main function for the script. It reads configuration options from a TOML file, reads in the GP and OB
Excel files, performs data reconciliation and analysis, and writes the results to a new Excel file.
Returns:
int: 0 if the script executes successfully.
"""
# Read the configuration options from a TOML file
with open("config_reports.toml", "rb") as f:
reports_config: dict = load(f)
logger.debug(f"Reports Config: {reports_config}")
# Get the GP and OB dataframes from the Excel files
ob_df, gp_df = get_reports("Work", reports_config)
assert not ob_df.empty, "OB Data empty!"
assert not gp_df.empty, "GP Data empty!"
obr: OnBaseReport = OnBaseReport(ob_df, reports_config)
gpr: GreatPlainsReport = GreatPlainsReport(gp_df, reports_config)
overdue: DataFrame = obr.get_overdue()
no_match, amt_mismatch = obr.reconcile(gpr)
# Write the results to a new Excel file
output_name: Path = Path(f"Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx")
output_path: Path = Path("./Work", output_name)
with pd.ExcelWriter(output_path, mode='w') as writer:
no_match.to_excel(writer, sheet_name="No Match",
index=False, freeze_panes=(1,3)
)
amt_mismatch.to_excel(writer, sheet_name="Amount Mismatch",
index=False, freeze_panes=(1,3)
)
overdue.to_excel(writer, sheet_name="Overdue", index=False)
wb: Workbook = load_workbook(output_path)
for sheet in ["No Match", "Amount Mismatch"]:
ws = wb[sheet]
ws.column_dimensions['A'].hidden = True
ws.column_dimensions['B'].hidden = True
wb.save(output_path)
return 0
if __name__ == "__main__":
print("Starting")
main()
print("Completed")

@ -0,0 +1,123 @@
"""
Classes and functions to parse completed reconciliation reports and remember
the resolutions of contracts.
Also provides a way for the reconciler to check hold against previously
resolved holds.
*Last Updated: version 1.3
"""
from helpers import drop_unnamed, setup_logging
from ghlib.database.database_manager import SQLiteManager
from pandas import DataFrame, Series, read_sql_query, read_excel, concat
from logging import getLogger
from dataclasses import dataclass
from hashlib import md5
setup_logging()
logger = getLogger(__name__)
def hash_cols(row: Series, cols_to_hash: list[str]) -> str:
md5_hash = md5()
md5_hash.update((''.join(row[col] for col in cols_to_hash)).encode('utf-8'))
return md5_hash.hexdigest()
def save_rec(resolved_dataframes: list[DataFrame]):
"""
#TODO Actually handle this...
"""
#raise NotImplementedError("You were too lazy to fix this after the rewrite. FIX PLZ!")
sqlManager: SQLiteManager = SQLiteManager("OnHold.db")
with sqlManager.get_session() as session:
conn = session.connection()
rdf: DataFrame
for rdf in resolved_dataframes:
cols: list[str] = rdf.columns.to_list()
if "onhold_amount" in cols:
logger.debug(f"Found 'onhold_amount' in rdf: no_match dataframe")
# Split the on_hold col to normalize with amount mismatch
rdf["onhold_amount_GP"] = rdf.apply(lambda row:
row.onhold_amount if row.Source == "GP" else None
)
rdf["onhold_amount_OB"] = rdf.apply(lambda row:
row.onhold_amount if row.Source == "OB" else None
)
else:
logger.debug(f"No 'onhold_amount' col found in rdf: amount_mismatch dataframe")
# Create a unified column for index
rdf["Indentifier"] = rdf.apply(lambda row:
hash_cols(row, ["ID_OB","ID_GP"]), axis=1
)
rec_cols: list[str] = [
"Indentifier",
"ID_GP",
"ID_OB",
"Hide Next Month",
"Resolution"
]
def get_prev_reconciled(contracts: list[str]) -> DataFrame:
"""
Get a DataFrame of previously reconciled contracts from an SQLite database.
Args:
contracts (list[str]): A list of contract numbers to check for previously reconciled contracts.
Returns:
DataFrame: A DataFrame of previously reconciled contracts, or an empty DataFrame if none are found.
"""
# Create a DB manager
sqlManager: SQLiteManager = SQLiteManager("OnHold.db")
# Create a temp table to hold this batches contract numbers
# this table will be cleared when sqlManager goes out of scope
temp_table_statement = """
CREATE TEMPORARY TABLE CUR_CONTRACTS (contract_number VARCHAR(11));
"""
sqlManager.execute(temp_table_statement)
# Insert the current contracts into the temp table
insert_contracts = f"""
INSERT INTO CUR_CONTRACTS (contract_number) VALUES
{', '.join([f"('{cn}')" for cn in contracts])};
"""
sqlManager.execute(insert_contracts)
# Select previously resolved contracts
res_query = """
SELECT r.*
FROM Resolutions r
JOIN CUR_CONTRACTS t
ON r.contract_number = t.contract_number;
"""
resolved: DataFrame = sqlManager.execute(res_query, as_dataframe=True)
return resolved
if __name__ == "__main__":
import argparse
from logging import DEBUG
logger.setLevel(DEBUG)
parser = argparse.ArgumentParser(
prog="HoldReconcilerRecord",
)
parser.add_argument("-i", "--input")
args = parser.parse_args()
# No Match
no_match: DataFrame = read_excel(args.input, sheet_name="No Match")
# Amount Mismatch
amt_mm: DataFrame = read_excel(args.input, sheet_name="Amount Mismatch")
save_rec(resolved_dataframes=[no_match, amt_mm])

@ -1,251 +0,0 @@
import pandas as pd
from pandas import DataFrame
from datetime import datetime as dt
import datetime
import re
from typing import Literal
import logging
logger = logging.getLogger(__name__)
def get_overdue(onbase_df: DataFrame, onbase_excel_config) -> DataFrame:
"""
Given a DataFrame containing OnBase installation data and a dictionary containing the OnBase Excel configuration,
this function returns a DataFrame containing the rows from `onbase_df` that have an installation date that is before
the current date.
Args:
onbase_df (pd.DataFrame): A pandas DataFrame containing OnBase installation data.
onbase_excel_config (dict): A dictionary containing the OnBase Excel configuration.
Returns:
pd.DataFrame: A pandas DataFrame containing the rows from `onbase_df` that have an installation date that is before
the current date.
"""
id_col = onbase_excel_config["install_date"]
onbase_df[id_col] = pd.to_datetime(onbase_df[id_col])
onbase_df[id_col].fillna(pd.NaT, inplace=True)
return onbase_df[onbase_df[id_col].dt.date < datetime.date.today()]
def filter_gp(gp_dataframe: pd.DataFrame, full_config: dict) -> pd.DataFrame:
"""
Given a pandas DataFrame containing GP data and a dictionary containing the GP configuration, this function
filters out rows from the DataFrame that are not needed for further analysis based on certain criteria.
Args:
gp_dataframe (pd.DataFrame): A pandas DataFrame containing GP data.
gp_config (dict): A dictionary containing the GP configuration.
Returns:
pd.DataFrame: A pandas DataFrame containing the filtered GP data.
"""
# Excludes anything that contains cma with a space or digit following it
# CMA23532 would be excluded but 'John Locman' would be allowed
GOOD_PO_NUM = re.compile(r"^(?!.*cma(\s|\d)).*$", re.IGNORECASE)
gp_config: dict = full_config["ExcelColumns"]["GP"]
doc_num_regexes: list[str] = full_config["DocNumFilter"]
bad_doc_num = ''
rx : str
for rx in doc_num_regexes:
bad_doc_num += f"({rx})|"
bad_doc_num = re.compile(bad_doc_num[:-1], re.IGNORECASE)
logger.debug(f"Doc # filter: {bad_doc_num}")
# Create a filter/mask to use on the data
mask = (
(gp_dataframe[gp_config['doc_type']] == "Invoice") &
(gp_dataframe[gp_config['pur_order']].str.contains(GOOD_PO_NUM))
)
# Get the rows to drop based on the filter/mask
rows_to_drop = gp_dataframe[~mask].index
# Drop the rows and return the filtered DataFrame
filtered_df = gp_dataframe.drop(rows_to_drop, inplace=False)
mask = filtered_df[gp_config['doc_num']].str.contains(bad_doc_num)
rows_to_drop = filtered_df[mask].index
return filtered_df.drop(rows_to_drop, inplace=False)
def create_transaction_df(dataframe: pd.DataFrame, source: Literal["GP", "OB"], excelConfig: dict):
"""
Given a pandas DataFrame containing transaction data, the source of the data ("GP" or "OB"), and a dictionary
containing the Excel configuration, this function creates a new DataFrame with columns for the contract number,
the amount on hold, a unique transaction ID, and the source of the data.
Args:
dataframe (pd.DataFrame): A pandas DataFrame containing transaction data.
source (Literal["GP", "OB"]): The source of the data ("GP" or "OB").
excelConfig (dict): A dictionary containing the Excel configuration.
Returns:
pd.DataFrame: A pandas DataFrame containing the contract number, amount on hold, transaction ID, and data source
for each transaction in the original DataFrame.
"""
column_config: dict = excelConfig[source]
logger.debug(f"column_config: {column_config}")
# Create a new DataFrame with the contract number and on-hold amount columns
transactions = dataframe[[column_config["contract_number"], column_config["onhold_amount"]]].copy()
# Rename the columns to standardize the column names
transactions.rename(columns={
column_config["contract_number"]: "contract_number",
column_config["onhold_amount"]: "onhold_amount",
}, inplace=True)
# Convert the on-hold amount column to float format and round to two decimal places
transactions["onhold_amount"] = transactions["onhold_amount"].astype(float).round(2)
# Use regex to extract the contract number from the column values and create a new column with the standardized format
CN_REGEX = re.compile(r"\d{7}(-\d{3})?")
transactions["contract_number"] = transactions["contract_number"].apply(
lambda cn: str(cn) if not re.search(CN_REGEX, str(cn))
else re.search(CN_REGEX, str(cn)).group(0)
)
# Create a new column with a unique transaction ID
transactions["ID"] = transactions["contract_number"] +'_'+\
transactions["onhold_amount"].astype(str)
# Create a new column with the data source
transactions["Source"] = source
# Return the new DataFrame with the contract number, on-hold amount, transaction ID, and data source columns
return transactions
def get_no_match(obt_df: pd.DataFrame, gpt_df: pd.DataFrame):
"""
Given two pandas DataFrames containing transaction data from OBT and GPT, respectively, this function returns a new
DataFrame containing only the transactions that do not have a match in both the OBT and GPT DataFrames.
Args:
obt_df (pd.DataFrame): A pandas DataFrame containing transaction data from OBT.
gpt_df (pd.DataFrame): A pandas DataFrame containing transaction data from GPT.
Returns:
pd.DataFrame: A pandas DataFrame containing the transactions that do not have a match in both the OBT and GPT
DataFrames.
"""
# Merge the two DataFrames using the contract number as the join key
merged_df = pd.merge(
obt_df, gpt_df,
how="outer",
on=["contract_number"],
suffixes=("_ob", "_gp")
)
# Filter the merged DataFrame to include only the transactions that do not have a match in both OBT and GPT
no_match = merged_df.loc[
(merged_df["Source_ob"].isna()) |
(merged_df["Source_gp"].isna())
]
# Fill in missing values and drop unnecessary columns
no_match["Source"] = no_match["Source_ob"].fillna("GP")
no_match["onhold_amount"] = no_match["onhold_amount_ob"].fillna(no_match["onhold_amount_gp"])
no_match.drop(columns=[
"ID_ob", "ID_gp",
"onhold_amount_ob", "onhold_amount_gp",
"Source_ob", "Source_gp"
],
inplace=True)
# Reorder and return the new DataFrame with the source, contract number, and on-hold amount columns
no_match = no_match[
[ "Source", "contract_number", "onhold_amount"]
]
return no_match
def get_not_full_match(obt_df: pd.DataFrame, gpt_df: pd.DataFrame):
"""
Given two pandas DataFrames containing transaction data from OBT and GPT, respectively, this function returns two new
DataFrames. The first DataFrame contains the transactions that have a full match on both the OBT and GPT DataFrames,
and the second DataFrame contains the transactions that do not have a full match.
Args:
obt_df (pd.DataFrame): A pandas DataFrame containing transaction data from OBT.
gpt_df (pd.DataFrame): A pandas DataFrame containing transaction data from GPT.
Returns:
tuple(pd.DataFrame, pd.DataFrame): A tuple of two DataFrames. The first DataFrame contains the transactions that
have a full match on both the OBT and GPT DataFrames, and the second DataFrame contains the transactions that do
not have a full match.
"""
# Combine the two DataFrames using an outer join on the contract number and on-hold amount
merged_df = pd.merge(
obt_df, gpt_df,
how="outer",
on=["ID", "contract_number", "onhold_amount"],
suffixes=("_ob", "_gp")
)
# Filter the merged DataFrame to include only the transactions that have a full match in both OBT and GPT
full_matched = merged_df.dropna(subset=["Source_ob", "Source_gp"])
full_matched.drop(columns=["Source_ob", "Source_gp"], inplace=True)
# Create a boolean mask for the rows to drop in full_matched
mask = merged_df["ID"].isin(full_matched["ID"])
# Use the mask to remove the selected rows and create a new DataFrame for not full match
not_full_match = merged_df[~mask]
# This includes items that DO match contracts, but not amounts
# It can have multiple items from one source with the same contract number
# Create a new column with the data source, using OBT as the default and GPT as backup if missing
not_full_match["Source"] = not_full_match["Source_ob"].fillna(not_full_match["Source_gp"])
# Drop the redundant Source columns
not_full_match.drop(columns=["Source_ob", "Source_gp"], inplace=True)
# Reorder and return the new DataFrame with the source, contract number, and on-hold amount columns
not_full_match = not_full_match[
[ "Source", "contract_number", "onhold_amount"]
]
# Return the two DataFrames
return full_matched, not_full_match
def get_contract_match(not_full_match: pd.DataFrame) -> pd.DataFrame:
"""
Given a pandas DataFrame containing transactions that do not have a full match between OBT and GPT, this function
returns a new DataFrame containing only the transactions that have a matching contract number in both OBT and GPT.
Args:
not_full_match (pd.DataFrame): A pandas DataFrame containing transactions that do not have a full match between
OBT and GPT.
Returns:
pd.DataFrame: A pandas DataFrame containing only the transactions that have a matching contract number in both
OBT and GPT.
"""
# Filter the not_full_match DataFrame by source
ob_df = not_full_match[not_full_match["Source"] == "OB"]
gp_df = not_full_match[not_full_match["Source"] == "GP"]
# Merge the two filtered DataFrames on the contract number
contract_match = pd.merge(
ob_df, gp_df,
how="inner",
on=["contract_number"],
suffixes=("_ob", "_gp")
)
# Fill in missing values in the Source column and drop the redundant columns
contract_match.drop(columns=["Source_ob", "Source_gp"], inplace=True)
# Reorder and return the new DataFrame with the source, contract number, and on-hold amount columns
contract_match = contract_match[
[ "contract_number", "onhold_amount_ob", "onhold_amount_gp"]
]
return contract_match

@ -1,21 +0,0 @@
from pandas import DataFrame, Series, read_sql_query, read_excel
import sqlite3 as sqll
import sqlalchemy as sqa
import argparse
def drop_unnamed(df: DataFrame):
cols = [c for c in df.columns if "Unnamed" in c]
df.drop(cols, axis=1, inplace=True)
parser = argparse.ArgumentParser(
prog="HoldReconcilerRecord",
)
parser.add_argument("-i", "--input")
args = parser.parse_args()
# Resolution col
no_match: DataFrame = read_excel(args.input, sheet_name="No Match")
amt_mm: DataFrame = read_excel(args.input, sheet_name="Amount Mismatch")
drop_unnamed(no_match)
drop_unnamed(amt_mm)
print(no_match)

@ -1,191 +0,0 @@
import pandas as pd
from pandas import DataFrame, Series
import re
from re import Pattern
import os
from os.path import basename
import glob
import logging
from pathlib import Path
from tomllib import load
import logging.config
from datetime import datetime as dt
"""
[ ] Pull in past reconciliations to check against
[ ] Record reconciled transaction (connect with VBA)
[ ] Check GP against the database
[ ] Check OB against the database
[ ] Add resolution column to error sheets
"""
# Custom module for reconciliation
from rec_lib import get_contract_match, get_no_match, \
get_not_full_match, get_overdue, filter_gp, create_transaction_df
def setup_logging():
"""
Sets up logging configuration from the TOML file. If the logging configuration fails to be loaded from the file,
a default logging configuration is used instead.
Returns:
logging.Logger: The logger instance.
"""
with open("config.toml", "rb") as f:
config_dict: dict = load(f)
try:
# Try to load logging configuration from the TOML file
logging.config.dictConfig(config_dict["logger"])
except Exception as e:
# If the logging configuration fails, use a default configuration and log the error
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logger.warning("Failed setting up logger!")
logger.exception(e)
logger.warning(f"Config:\n{config_dict}")
return logger
setup_logging()
logger = logging.getLogger(__name__)
logger.info(f"Logger started with level: {logger.level}")
def find_most_recent_file(folder_path: Path, file_pattern: Pattern) -> str:
"""
Given a folder path and a regular expression pattern, this function returns the path of the most recently modified
file in the folder that matches the pattern.
Args:
folder_path (Path): A pathlib.Path object representing the folder to search.
file_pattern (Pattern): A regular expression pattern used to filter the files in the folder.
Returns:
str: The path of the most recently modified file in the folder that matches the pattern.
"""
# Find all files in the folder that match the pattern
files = glob.glob(f"{folder_path}/*")
logger.debug(f"files: {files}")
# Get the modification time of each file and filter to only those that match the pattern
file_times = [(os.path.getmtime(path), path) for path in files if re.match(file_pattern, basename(path))]
# Sort the files by modification time (most recent first)
file_times.sort(reverse=True)
logger.debug(f"file times: {file_times}")
# Return the path of the most recent file
return file_times[0][1]
def check_sheet(df_cols: list[str], excel_col_config: dict) -> bool:
"""
Given a list of column names and a dictionary of column name configurations, this function checks if the required
columns are present in the list of column names.
Args:
df_cols (list[str]): A list of column names.
excel_col_config (dict): A dictionary of column name configurations.
Returns:
bool: True if all of the required columns are present in the list of column names, False otherwise.
"""
# Get the list of required columns from the column configuration dictionary
required_cols: list[str] = list(excel_col_config.values())
# Check if all of the required columns are present in the list of column names
return all([col in df_cols for col in required_cols])
def get_dataframes(work_dir: str, excelConfig: dict) -> tuple[pd.DataFrame|None, pd.DataFrame|None]:
"""
Given a dictionary of Excel configuration options, this function searches for the most recently modified GP and OB
Excel files in a "Work" folder and returns their corresponding dataframes.
Args:
excelConfig (dict): A dictionary containing configuration options for the GP and OB Excel files.
Returns:
tuple[pd.DataFrame|None, pd.DataFrame|None]: A tuple containing the OB and GP dataframes, respectively.
"""
# Define regular expression patterns to match the GP and OB Excel files
gp_regex: Pattern = re.compile(".*gp.*\.xlsx$", re.IGNORECASE)
ob_regex: Pattern = re.compile(".*ob.*\.xlsx$", re.IGNORECASE)
# Find the paths of the most recently modified GP and OB Excel files
gp_file_path = find_most_recent_file(work_dir, gp_regex)
logger.debug(f"gp_file_path: {gp_file_path}")
ob_file_path = find_most_recent_file(work_dir, ob_regex)
logger.debug(f"gp_file_path: {ob_file_path}")
# Read the GP and OB Excel files into dataframes and check that each dataframe has the required columns
gp_xl = pd.ExcelFile(gp_file_path)
gp_config = excelConfig["GP"]
gp_sheets = gp_xl.sheet_names
gp_dfs = pd.read_excel(gp_xl, sheet_name=gp_sheets)
for sheet in gp_dfs:
if check_sheet(gp_dfs[sheet].columns, gp_config):
gp_df = gp_dfs[sheet]
break
ob_xl = pd.ExcelFile(ob_file_path)
ob_config = excelConfig["OB"]
ob_sheets = ob_xl.sheet_names
ob_dfs = pd.read_excel(ob_xl, sheet_name=ob_sheets)
for sheet in ob_dfs:
if check_sheet(ob_dfs[sheet].columns, ob_config):
ob_df = ob_dfs[sheet]
break
return ob_df, gp_df
def main() -> int:
"""
This is the main function for the script. It reads configuration options from a TOML file, reads in the GP and OB
Excel files, performs data reconciliation and analysis, and writes the results to a new Excel file.
Returns:
int: 0 if the script executes successfully.
"""
# Read the configuration options from a TOML file
with open("config.toml", "rb") as f:
config_dict: dict = load(f)
logger.debug(f"Config: {config_dict}")
excelConfig: dict = config_dict["ExcelColumns"]
# Get the GP and OB dataframes from the Excel files
ob_df, gp_df = get_dataframes(config_dict["write_dir"] ,excelConfig)
assert not ob_df.empty, "OB Data empty!"
assert not gp_df.empty, "GP Data empty!"
# Filter the GP dataframe to include only relevant transactions
fgp_df: DataFrame = filter_gp(gp_df, config_dict)
# Get the overdue transactions from the OB dataframe
overdue: DataFrame = get_overdue(ob_df, excelConfig["OB"])
# Create transaction dataframes for the GP and OB dataframes
ob_transactions: DataFrame = create_transaction_df(ob_df, 'OB', excelConfig)
gp_transactions: DataFrame = create_transaction_df(fgp_df, 'GP', excelConfig)
# Get the transactions that do not have matches in both the GP and OB dataframes
no_match: DataFrame = get_no_match(ob_transactions, gp_transactions)
# Get the transactions that have matches in both the GP and OB dataframes but have amount mismatches
full_match, not_full_match = get_not_full_match(ob_transactions, gp_transactions)
only_contracts_match: DataFrame = get_contract_match(not_full_match)
# Write the results to a new Excel file
with pd.ExcelWriter(f"{config_dict['write_dir']}/Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx", mode='w') as writer:
full_match.to_excel(writer,sheet_name="FULL", index=False)
no_match.to_excel(writer, sheet_name="No Match", index=False)
only_contracts_match.to_excel(writer, sheet_name="Amount Mismatch", index=False)
overdue.to_excel(writer, sheet_name="Overdue", index=False)
return 0
if __name__ == "__main__":
print("Starting")
main()
print("Completed")

@ -0,0 +1,271 @@
from pandas import DataFrame, merge, to_datetime, NaT, concat, Series
from numpy import concatenate
from abc import ABC, abstractmethod
from logging import getLogger
import re
from typing import Literal
import datetime
from copy import deepcopy
from helpers import CN_REGEX, drop_unnamed
from memory import get_prev_reconciled
logger = getLogger(__name__)
class HoldReport(ABC):
source = ""
def __init__(self, dataframe: DataFrame, reports_config: dict) -> None:
self.config = reports_config
drop_unnamed(dataframe)
self.df = dataframe
self.prev_rec = None
self._normalize()
self._previsouly_resolved()
def _normalize(self):
# Rename the columns to standardize the column names
self.df.rename( columns= { unique_cols[self.source] : common_col
for common_col, unique_cols in self.config["shared_columns"].items()
}, inplace=True)
# Convert the on-hold amount column to float format and round to two decimal places
self.df["onhold_amount"] = self.df["onhold_amount"].astype(float).round(2)
# Use regex to extract the contract number from the column values and create a new column with the standardized format
self.df["contract_number"] = self.df["contract_number"].apply(
lambda cn: str(cn) if not re.search(CN_REGEX, str(cn))
else re.search(CN_REGEX, str(cn)).group(0)
)
# Create a new column with a unique transaction ID
self.df["ID"] = self.df["contract_number"] +'_'+\
self.df["onhold_amount"].astype(str)
# Create a new column with the data source
self.df["Source"] = self.source
def _previsouly_resolved(self):
"""
"""
current_contracts: list[str] = self.df["contract_number"]
prev_recd: DataFrame = get_prev_reconciled(contracts=current_contracts)
if not prev_recd:
logger.info("No previously reconciled!")
self.df = self._add_work_columns(self.df)
return
self.prev_rec = prev_recd
start_size = self.df.shape[0]
logger.debug(f"Report DF: \n{self.df}")
logger.debug(f"prev_rec: \n{prev_recd}")
source_id = f"ID_{self.source}"
self.df[source_id] = self.df["ID"]
self.df = merge(
self.df,
prev_recd,
how="left",
on= source_id,
suffixes=("_cur", "_prev")
)
#self.df.to_excel(f"merged_df_{self.source}.xlsx")
# Drop anything that should be ignored
self.df = self.df[self.df["Hide Next Month"] != True]
logger.info(f"Prev res added:\n{self.df}")
col_to_drop = []
for c in self.df.keys().to_list():
logger.debug(f"{c=}")
if "_prev" in c or "ID_" in c:
logger.debug(f"Found '_prev' in {c}")
col_to_drop.append(c)
else:
logger.debug(f"{c} is a good col!")
#col_to_drop.extend([c for c in self.df.keys().to_list() if '_prev' in c])
logger.debug(f"{col_to_drop=}")
self.df.drop(
columns= col_to_drop,
inplace=True
)
# Restandardize
self.df.rename(columns={"contract_number_cur": "contract_number"}, inplace=True)
end_size = self.df.shape[0]
logger.info(f"Reduced df by {start_size-end_size}")
def _remove_full_matches(self, other: 'HoldReport'):
"""
Removes any contracts that match both contract number and hold amount.
These do not need to be reconciled.
This id done 'in place' to both dataframes
"""
filter_id_match: DataFrame = self.df[~(self.df["ID"].isin(other.df["ID"]))]
other.df: DataFrame = other.df[~(other.df["ID"].isin(self.df["ID"]))]
self.df = filter_id_match
self.combined_missing: DataFrame = concat([self.df, other.df], ignore_index=True)
self.combined_missing.to_excel("ALL MISSING.xlsx")
logger.debug(f"Combined Missing:\n{self.combined_missing}")
logger.info(f"Payments with errors: {self.combined_missing.shape[0]}")
@staticmethod
def _created_combined_col(column: str, target_df: DataFrame, sources: tuple[str, str]) -> DataFrame :
"""
Creates a new column by filling empty columns of this source, with the matching column from another source
"""
this, that = sources
target_df[column] = target_df[f"{column}_{this}"].fillna(
target_df[f"{column}_{that}"]
)
return target_df
def _requires_rec(self, other: 'HoldReport') -> DataFrame:
"""
To be run after full matches have been re
"""
# Merge the two filtered DataFrames on the contract number
contract_match = merge(
self.df, other.df,
how="inner",
on=["contract_number"],
suffixes=('_'+self.source, '_'+other.source)
)
#contract_match.to_excel("CONTRACT_MATCH.xlsx")
for col in ["vendor_name", "Resolution", "Notes"]:
self._created_combined_col(col, contract_match, (self.source, other.source))
logger.debug(f"_requires_rec | contract_match:\n{contract_match.columns} ({contract_match.shape})")
no_match: DataFrame = self.combined_missing[~(
self.combined_missing["contract_number"].isin(
contract_match["contract_number"]
))
]
no_match[f"ID_{self.source}"] = no_match.apply(lambda row:
row["ID"] if row["Source"] == self.source else None
, axis=1)
no_match[f"ID_{other.source}"] = no_match.apply(lambda row:
row["ID"] if row["Source"] == other.source else None
, axis=1)
logger.debug(f"_requires_rec | no_match:\n{no_match.columns} ({no_match.shape})")
return contract_match, no_match
@staticmethod
def _add_work_columns(df: DataFrame) -> DataFrame:
"""
Add empty columns to the dataframe to faciliate working through the report.
"""
logger.debug("Adding work columns!")
df_cols: list[str] = df.columns.to_list()
WORK_COLS = ["Hide Next Month","Resolution"]
for col in WORK_COLS:
if col not in df_cols:
df[col] = ''
return df
def reconcile(self, other: 'HoldReport') -> tuple[DataFrame]:
"""
"""
self._remove_full_matches(other)
all_prev_reced = concat([self.prev_rec, other.prev_rec],ignore_index=True)
logger.debug(f"Removed matches:\n{self.df}")
amount_mismatch, no_match = self._requires_rec(other)
logger.debug(f"reconcile | no_match unaltered\n{no_match.columns} ({no_match.shape})")
logger.debug(f"reconcile | am_mm unaltered:\n{amount_mismatch.columns} ({amount_mismatch.shape})")
columns: list[str] = ["ID_GP", "ID_OB"]
columns.extend(self.config["output_columns"])
nm_cols:list[str] = deepcopy(columns)
nm_cols.insert(3,"onhold_amount")
nm_cols.insert(4,"Source")
columns.insert(3,"onhold_amount_GP")
columns.insert(4, "onhold_amount_OB")
# Select and reorder columns
no_match = no_match[
nm_cols
]
amount_mismatch = amount_mismatch[
columns
]
logger.info(f"no_match: {no_match.shape[0]}")
logger.info(f"am_mm: {amount_mismatch.shape[0]}")
return no_match, amount_mismatch
class OnBaseReport(HoldReport):
source = "OB"
def get_overdue(self) -> DataFrame:
"""
"""
self.df["InstallDate"] = to_datetime(self.df["InstallDate"])
self.df["InstallDate"].fillna(NaT, inplace=True)
return self.df[self.df["InstallDate"].dt.date < datetime.date.today()]
class GreatPlainsReport(HoldReport):
source = "GP"
def __init__(self, dataframe: DataFrame, report_config: dict) -> None:
self._filter(
gp_report_df= dataframe,
doc_num_filters= report_config["gp_filters"]["doc_num_filters"],
good_po_num_regex= report_config["gp_filters"]["po_filter"]
)
super().__init__(dataframe, report_config)
@staticmethod
def _filter(gp_report_df: DataFrame,
doc_num_filters: list[str], good_po_num_regex: str) -> DataFrame:
GOOD_PO_NUM = re.compile(good_po_num_regex, re.IGNORECASE)
bad_doc_num = ''
rx : str
for rx in doc_num_filters:
bad_doc_num += f"({rx})|"
bad_doc_num = re.compile(bad_doc_num[:-1], re.IGNORECASE)
# Create a mask/filter that will keep rows that match these
# requirments
keep_mask = (
(gp_report_df["Document Type"] == "Invoice") &
(gp_report_df["Purchase Order Number"].str.contains(GOOD_PO_NUM))
)
# Get the rows that DO NOT fit the keep_mask
rows_to_drop = gp_report_df[~keep_mask].index
# Drop the rows to filter
gp_report_df.drop(rows_to_drop, inplace=True)
# Create a filter to remove rows that meet this requirment
# Making this a negative in the keep mask is more trouble than
# it's worth
remove_mask = gp_report_df["Document Number"].str.contains(bad_doc_num)
rows_to_drop = gp_report_df[remove_mask].index
gp_report_df.drop(rows_to_drop, inplace=True)
return gp_report_df
Loading…
Cancel
Save