Added doc number filtering to config, made it an exclusive rather than

inclusive filter
No longer just HOLD or only number
master
= 3 years ago
parent 075a84133b
commit 8ffd24840e
Signed by untrusted user who does not match committer: gprog
GPG Key ID: 5BE9BB58D37713F8
  1. 3
      .gitignore
  2. 50
      Hold Reconciler.spec
  3. 19
      config.toml
  4. 34
      rec_lib.py
  5. 20
      reconcile_holds.py

3
.gitignore vendored

@ -1,4 +1,7 @@
__pycache__/
venv/
work/
build/
dist/
*.log

@ -0,0 +1,50 @@
# -*- mode: python ; coding: utf-8 -*-
block_cipher = None
a = Analysis(
['reconcile_holds.py'],
pathex=[],
binaries=[],
datas=[('config.toml', '.'), ('requirements.txt', '.')],
hiddenimports=['openpyxl'],
hookspath=[],
hooksconfig={},
runtime_hooks=[],
excludes=[],
win_no_prefer_redirects=False,
win_private_assemblies=False,
cipher=block_cipher,
noarchive=False,
)
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
exe = EXE(
pyz,
a.scripts,
[],
exclude_binaries=True,
name='Hold Reconciler',
debug=False,
bootloader_ignore_signals=False,
strip=False,
upx=True,
console=True,
disable_windowed_traceback=False,
argv_emulation=False,
target_arch=None,
codesign_identity=None,
entitlements_file=None,
)
coll = COLLECT(
exe,
a.binaries,
a.zipfiles,
a.datas,
strip=False,
upx=True,
upx_exclude=[],
name='Hold Reconciler',
)

@ -1,5 +1,15 @@
write_dir = "../Work"
write_dir = "Work"
DocNumFilter = [
"p(oin)?ts",
"pool",
"promo",
"o(ver)?f(und)?",
"m(ar)?ke?t",
"title",
"adj",
"reg free",
"cma"
]
[ExcelColumns]
[ExcelColumns.OB]
@ -14,6 +24,9 @@ write_dir = "../Work"
pur_order = "Purchase Order Number" # ABC123
doc_type = "Document Type" # Invoice or Credit Memo
[logger]
version = 1
@ -24,7 +37,7 @@ write_dir = "../Work"
[logger.handlers.console]
class = "logging.StreamHandler"
level = "INFO"
level = "DEBUG"
formatter = "custom"
stream = "ext://sys.stdout"

@ -4,6 +4,11 @@ from datetime import datetime as dt
import datetime
import re
from typing import Literal
import logging
logger = logging.getLogger(__name__)
def get_overdue(onbase_df: DataFrame, onbase_excel_config) -> DataFrame:
"""
@ -25,7 +30,7 @@ def get_overdue(onbase_df: DataFrame, onbase_excel_config) -> DataFrame:
return onbase_df[onbase_df[id_col].dt.date < datetime.date.today()]
def filter_gp(gp_dataframe: pd.DataFrame, gp_config: dict) -> pd.DataFrame:
def filter_gp(gp_dataframe: pd.DataFrame, full_config: dict) -> pd.DataFrame:
"""
Given a pandas DataFrame containing GP data and a dictionary containing the GP configuration, this function
filters out rows from the DataFrame that are not needed for further analysis based on certain criteria.
@ -37,18 +42,23 @@ def filter_gp(gp_dataframe: pd.DataFrame, gp_config: dict) -> pd.DataFrame:
Returns:
pd.DataFrame: A pandas DataFrame containing the filtered GP data.
"""
# Regex used to filter unneeded transactions
# filters anything that does not contain a ONLY contract number OR
# The work hold or just hld
GOOD_DOC_NUM = re.compile(r"(^(\d+-?)+$)|(ho?ld)", re.IGNORECASE)
# Excludes anything that contains cma with a space or digit following it
# CMA23532 would be excluded but 'John Locman' would be allowed
GOOD_PO_NUM = re.compile(r"^(?!.*cma(\s|\d)).*$", re.IGNORECASE)
gp_config: dict = full_config["ExcelColumns"]["GP"]
doc_num_regexes: list[str] = full_config["DocNumFilter"]
bad_doc_num = ''
rx : str
for rx in doc_num_regexes:
bad_doc_num += f"({rx})|"
bad_doc_num = re.compile(bad_doc_num[:-1], re.IGNORECASE)
logger.debug(f"Doc # filter: {bad_doc_num}")
# Create a filter/mask to use on the data
mask = (
(gp_dataframe[gp_config['doc_type']] == "Invoice") &
(gp_dataframe[gp_config['doc_num']].str.contains(GOOD_DOC_NUM)) &
(gp_dataframe[gp_config['pur_order']].str.contains(GOOD_PO_NUM))
)
@ -56,7 +66,12 @@ def filter_gp(gp_dataframe: pd.DataFrame, gp_config: dict) -> pd.DataFrame:
rows_to_drop = gp_dataframe[~mask].index
# Drop the rows and return the filtered DataFrame
return gp_dataframe.drop(rows_to_drop, inplace=False)
filtered_df = gp_dataframe.drop(rows_to_drop, inplace=False)
mask = filtered_df[gp_config['doc_num']].str.contains(bad_doc_num)
rows_to_drop = filtered_df[mask].index
return filtered_df.drop(rows_to_drop, inplace=False)
def create_transaction_df(dataframe: pd.DataFrame, source: Literal["GP", "OB"], excelConfig: dict):
@ -75,7 +90,7 @@ def create_transaction_df(dataframe: pd.DataFrame, source: Literal["GP", "OB"],
for each transaction in the original DataFrame.
"""
column_config: dict = excelConfig[source]
logger.debug(f"column_config: {column_config}")
# Create a new DataFrame with the contract number and on-hold amount columns
transactions = dataframe[[column_config["contract_number"], column_config["onhold_amount"]]].copy()
@ -226,12 +241,11 @@ def get_contract_match(not_full_match: pd.DataFrame) -> pd.DataFrame:
)
# Fill in missing values in the Source column and drop the redundant columns
contract_match["Source"] = contract_match["Source_ob"].fillna("GP")
contract_match.drop(columns=["Source_ob", "Source_gp"], inplace=True)
# Reorder and return the new DataFrame with the source, contract number, and on-hold amount columns
contract_match = contract_match[
[ "Source", "contract_number", "onhold_amount_ob", "onhold_amount_gp"]
[ "contract_number", "onhold_amount_ob", "onhold_amount_gp"]
]
return contract_match

@ -94,7 +94,7 @@ def check_sheet(df_cols: list[str], excel_col_config: dict) -> bool:
return all([col in df_cols for col in required_cols])
def get_dataframes(excelConfig: dict) -> tuple[pd.DataFrame|None, pd.DataFrame|None]:
def get_dataframes(work_dir: str, excelConfig: dict) -> tuple[pd.DataFrame|None, pd.DataFrame|None]:
"""
Given a dictionary of Excel configuration options, this function searches for the most recently modified GP and OB
Excel files in a "Work" folder and returns their corresponding dataframes.
@ -105,22 +105,15 @@ def get_dataframes(excelConfig: dict) -> tuple[pd.DataFrame|None, pd.DataFrame|N
Returns:
tuple[pd.DataFrame|None, pd.DataFrame|None]: A tuple containing the OB and GP dataframes, respectively.
"""
# Get the current working directory and the path to the "Work" folder
current_dir: Path = Path(os.getcwd())
work_folder: Path = current_dir / 'Work'
logger.debug(f"Workpath: {work_folder}")
# Check that the "Work" folder exists
assert work_folder.exists, "No work folder found!"
# Define regular expression patterns to match the GP and OB Excel files
gp_regex: Pattern = re.compile(".*gp.*\.xlsx$", re.IGNORECASE)
ob_regex: Pattern = re.compile(".*ob.*\.xlsx$", re.IGNORECASE)
# Find the paths of the most recently modified GP and OB Excel files
gp_file_path = find_most_recent_file(work_folder, gp_regex)
gp_file_path = find_most_recent_file(work_dir, gp_regex)
logger.debug(f"gp_file_path: {gp_file_path}")
ob_file_path = find_most_recent_file(work_folder, ob_regex)
ob_file_path = find_most_recent_file(work_dir, ob_regex)
logger.debug(f"gp_file_path: {ob_file_path}")
# Read the GP and OB Excel files into dataframes and check that each dataframe has the required columns
@ -156,16 +149,17 @@ def main() -> int:
# Read the configuration options from a TOML file
with open("config.toml", "rb") as f:
config_dict: dict = load(f)
logger.debug(f"Config: {config_dict}")
excelConfig: dict = config_dict["ExcelColumns"]
# Get the GP and OB dataframes from the Excel files
ob_df, gp_df = get_dataframes(excelConfig)
ob_df, gp_df = get_dataframes(config_dict["write_dir"] ,excelConfig)
assert not ob_df.empty, "OB Data empty!"
assert not gp_df.empty, "GP Data empty!"
# Filter the GP dataframe to include only relevant transactions
fgp_df: DataFrame = filter_gp(gp_df, excelConfig["GP"])
fgp_df: DataFrame = filter_gp(gp_df, config_dict)
# Get the overdue transactions from the OB dataframe
overdue: DataFrame = get_overdue(ob_df, excelConfig["OB"])
@ -181,7 +175,7 @@ def main() -> int:
only_contracts_match: DataFrame = get_contract_match(not_full_match)
# Write the results to a new Excel file
with pd.ExcelWriter(f"{config_dict['work_dir']}/Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx", mode='w') as writer:
with pd.ExcelWriter(f"{config_dict['write_dir']}/Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx", mode='w') as writer:
full_match.to_excel(writer,sheet_name="FULL", index=False)
no_match.to_excel(writer, sheet_name="No Match", index=False)
only_contracts_match.to_excel(writer, sheet_name="Amount Mismatch", index=False)

Loading…
Cancel
Save