Added doc number filtering to config, made it an exclusive rather than

inclusive filter
No longer just HOLD or only number
master
= 3 years ago
parent 075a84133b
commit 8ffd24840e
Signed by untrusted user who does not match committer: gprog
GPG Key ID: 5BE9BB58D37713F8
  1. 3
      .gitignore
  2. 50
      Hold Reconciler.spec
  3. 19
      config.toml
  4. 34
      rec_lib.py
  5. 20
      reconcile_holds.py

3
.gitignore vendored

@ -1,4 +1,7 @@
__pycache__/ __pycache__/
venv/ venv/
work/ work/
build/
dist/
*.log *.log

@ -0,0 +1,50 @@
# -*- mode: python ; coding: utf-8 -*-
block_cipher = None
a = Analysis(
['reconcile_holds.py'],
pathex=[],
binaries=[],
datas=[('config.toml', '.'), ('requirements.txt', '.')],
hiddenimports=['openpyxl'],
hookspath=[],
hooksconfig={},
runtime_hooks=[],
excludes=[],
win_no_prefer_redirects=False,
win_private_assemblies=False,
cipher=block_cipher,
noarchive=False,
)
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
exe = EXE(
pyz,
a.scripts,
[],
exclude_binaries=True,
name='Hold Reconciler',
debug=False,
bootloader_ignore_signals=False,
strip=False,
upx=True,
console=True,
disable_windowed_traceback=False,
argv_emulation=False,
target_arch=None,
codesign_identity=None,
entitlements_file=None,
)
coll = COLLECT(
exe,
a.binaries,
a.zipfiles,
a.datas,
strip=False,
upx=True,
upx_exclude=[],
name='Hold Reconciler',
)

@ -1,5 +1,15 @@
write_dir = "../Work" write_dir = "Work"
DocNumFilter = [
"p(oin)?ts",
"pool",
"promo",
"o(ver)?f(und)?",
"m(ar)?ke?t",
"title",
"adj",
"reg free",
"cma"
]
[ExcelColumns] [ExcelColumns]
[ExcelColumns.OB] [ExcelColumns.OB]
@ -14,6 +24,9 @@ write_dir = "../Work"
pur_order = "Purchase Order Number" # ABC123 pur_order = "Purchase Order Number" # ABC123
doc_type = "Document Type" # Invoice or Credit Memo doc_type = "Document Type" # Invoice or Credit Memo
[logger] [logger]
version = 1 version = 1
@ -24,7 +37,7 @@ write_dir = "../Work"
[logger.handlers.console] [logger.handlers.console]
class = "logging.StreamHandler" class = "logging.StreamHandler"
level = "INFO" level = "DEBUG"
formatter = "custom" formatter = "custom"
stream = "ext://sys.stdout" stream = "ext://sys.stdout"

@ -4,6 +4,11 @@ from datetime import datetime as dt
import datetime import datetime
import re import re
from typing import Literal from typing import Literal
import logging
logger = logging.getLogger(__name__)
def get_overdue(onbase_df: DataFrame, onbase_excel_config) -> DataFrame: def get_overdue(onbase_df: DataFrame, onbase_excel_config) -> DataFrame:
""" """
@ -25,7 +30,7 @@ def get_overdue(onbase_df: DataFrame, onbase_excel_config) -> DataFrame:
return onbase_df[onbase_df[id_col].dt.date < datetime.date.today()] return onbase_df[onbase_df[id_col].dt.date < datetime.date.today()]
def filter_gp(gp_dataframe: pd.DataFrame, gp_config: dict) -> pd.DataFrame: def filter_gp(gp_dataframe: pd.DataFrame, full_config: dict) -> pd.DataFrame:
""" """
Given a pandas DataFrame containing GP data and a dictionary containing the GP configuration, this function Given a pandas DataFrame containing GP data and a dictionary containing the GP configuration, this function
filters out rows from the DataFrame that are not needed for further analysis based on certain criteria. filters out rows from the DataFrame that are not needed for further analysis based on certain criteria.
@ -37,18 +42,23 @@ def filter_gp(gp_dataframe: pd.DataFrame, gp_config: dict) -> pd.DataFrame:
Returns: Returns:
pd.DataFrame: A pandas DataFrame containing the filtered GP data. pd.DataFrame: A pandas DataFrame containing the filtered GP data.
""" """
# Regex used to filter unneeded transactions
# filters anything that does not contain a ONLY contract number OR
# The work hold or just hld
GOOD_DOC_NUM = re.compile(r"(^(\d+-?)+$)|(ho?ld)", re.IGNORECASE)
# Excludes anything that contains cma with a space or digit following it # Excludes anything that contains cma with a space or digit following it
# CMA23532 would be excluded but 'John Locman' would be allowed # CMA23532 would be excluded but 'John Locman' would be allowed
GOOD_PO_NUM = re.compile(r"^(?!.*cma(\s|\d)).*$", re.IGNORECASE) GOOD_PO_NUM = re.compile(r"^(?!.*cma(\s|\d)).*$", re.IGNORECASE)
gp_config: dict = full_config["ExcelColumns"]["GP"]
doc_num_regexes: list[str] = full_config["DocNumFilter"]
bad_doc_num = ''
rx : str
for rx in doc_num_regexes:
bad_doc_num += f"({rx})|"
bad_doc_num = re.compile(bad_doc_num[:-1], re.IGNORECASE)
logger.debug(f"Doc # filter: {bad_doc_num}")
# Create a filter/mask to use on the data # Create a filter/mask to use on the data
mask = ( mask = (
(gp_dataframe[gp_config['doc_type']] == "Invoice") & (gp_dataframe[gp_config['doc_type']] == "Invoice") &
(gp_dataframe[gp_config['doc_num']].str.contains(GOOD_DOC_NUM)) &
(gp_dataframe[gp_config['pur_order']].str.contains(GOOD_PO_NUM)) (gp_dataframe[gp_config['pur_order']].str.contains(GOOD_PO_NUM))
) )
@ -56,7 +66,12 @@ def filter_gp(gp_dataframe: pd.DataFrame, gp_config: dict) -> pd.DataFrame:
rows_to_drop = gp_dataframe[~mask].index rows_to_drop = gp_dataframe[~mask].index
# Drop the rows and return the filtered DataFrame # Drop the rows and return the filtered DataFrame
return gp_dataframe.drop(rows_to_drop, inplace=False) filtered_df = gp_dataframe.drop(rows_to_drop, inplace=False)
mask = filtered_df[gp_config['doc_num']].str.contains(bad_doc_num)
rows_to_drop = filtered_df[mask].index
return filtered_df.drop(rows_to_drop, inplace=False)
def create_transaction_df(dataframe: pd.DataFrame, source: Literal["GP", "OB"], excelConfig: dict): def create_transaction_df(dataframe: pd.DataFrame, source: Literal["GP", "OB"], excelConfig: dict):
@ -75,7 +90,7 @@ def create_transaction_df(dataframe: pd.DataFrame, source: Literal["GP", "OB"],
for each transaction in the original DataFrame. for each transaction in the original DataFrame.
""" """
column_config: dict = excelConfig[source] column_config: dict = excelConfig[source]
logger.debug(f"column_config: {column_config}")
# Create a new DataFrame with the contract number and on-hold amount columns # Create a new DataFrame with the contract number and on-hold amount columns
transactions = dataframe[[column_config["contract_number"], column_config["onhold_amount"]]].copy() transactions = dataframe[[column_config["contract_number"], column_config["onhold_amount"]]].copy()
@ -226,12 +241,11 @@ def get_contract_match(not_full_match: pd.DataFrame) -> pd.DataFrame:
) )
# Fill in missing values in the Source column and drop the redundant columns # Fill in missing values in the Source column and drop the redundant columns
contract_match["Source"] = contract_match["Source_ob"].fillna("GP")
contract_match.drop(columns=["Source_ob", "Source_gp"], inplace=True) contract_match.drop(columns=["Source_ob", "Source_gp"], inplace=True)
# Reorder and return the new DataFrame with the source, contract number, and on-hold amount columns # Reorder and return the new DataFrame with the source, contract number, and on-hold amount columns
contract_match = contract_match[ contract_match = contract_match[
[ "Source", "contract_number", "onhold_amount_ob", "onhold_amount_gp"] [ "contract_number", "onhold_amount_ob", "onhold_amount_gp"]
] ]
return contract_match return contract_match

@ -94,7 +94,7 @@ def check_sheet(df_cols: list[str], excel_col_config: dict) -> bool:
return all([col in df_cols for col in required_cols]) return all([col in df_cols for col in required_cols])
def get_dataframes(excelConfig: dict) -> tuple[pd.DataFrame|None, pd.DataFrame|None]: def get_dataframes(work_dir: str, excelConfig: dict) -> tuple[pd.DataFrame|None, pd.DataFrame|None]:
""" """
Given a dictionary of Excel configuration options, this function searches for the most recently modified GP and OB Given a dictionary of Excel configuration options, this function searches for the most recently modified GP and OB
Excel files in a "Work" folder and returns their corresponding dataframes. Excel files in a "Work" folder and returns their corresponding dataframes.
@ -105,22 +105,15 @@ def get_dataframes(excelConfig: dict) -> tuple[pd.DataFrame|None, pd.DataFrame|N
Returns: Returns:
tuple[pd.DataFrame|None, pd.DataFrame|None]: A tuple containing the OB and GP dataframes, respectively. tuple[pd.DataFrame|None, pd.DataFrame|None]: A tuple containing the OB and GP dataframes, respectively.
""" """
# Get the current working directory and the path to the "Work" folder
current_dir: Path = Path(os.getcwd())
work_folder: Path = current_dir / 'Work'
logger.debug(f"Workpath: {work_folder}")
# Check that the "Work" folder exists
assert work_folder.exists, "No work folder found!"
# Define regular expression patterns to match the GP and OB Excel files # Define regular expression patterns to match the GP and OB Excel files
gp_regex: Pattern = re.compile(".*gp.*\.xlsx$", re.IGNORECASE) gp_regex: Pattern = re.compile(".*gp.*\.xlsx$", re.IGNORECASE)
ob_regex: Pattern = re.compile(".*ob.*\.xlsx$", re.IGNORECASE) ob_regex: Pattern = re.compile(".*ob.*\.xlsx$", re.IGNORECASE)
# Find the paths of the most recently modified GP and OB Excel files # Find the paths of the most recently modified GP and OB Excel files
gp_file_path = find_most_recent_file(work_folder, gp_regex) gp_file_path = find_most_recent_file(work_dir, gp_regex)
logger.debug(f"gp_file_path: {gp_file_path}") logger.debug(f"gp_file_path: {gp_file_path}")
ob_file_path = find_most_recent_file(work_folder, ob_regex) ob_file_path = find_most_recent_file(work_dir, ob_regex)
logger.debug(f"gp_file_path: {ob_file_path}") logger.debug(f"gp_file_path: {ob_file_path}")
# Read the GP and OB Excel files into dataframes and check that each dataframe has the required columns # Read the GP and OB Excel files into dataframes and check that each dataframe has the required columns
@ -156,16 +149,17 @@ def main() -> int:
# Read the configuration options from a TOML file # Read the configuration options from a TOML file
with open("config.toml", "rb") as f: with open("config.toml", "rb") as f:
config_dict: dict = load(f) config_dict: dict = load(f)
logger.debug(f"Config: {config_dict}")
excelConfig: dict = config_dict["ExcelColumns"] excelConfig: dict = config_dict["ExcelColumns"]
# Get the GP and OB dataframes from the Excel files # Get the GP and OB dataframes from the Excel files
ob_df, gp_df = get_dataframes(excelConfig) ob_df, gp_df = get_dataframes(config_dict["write_dir"] ,excelConfig)
assert not ob_df.empty, "OB Data empty!" assert not ob_df.empty, "OB Data empty!"
assert not gp_df.empty, "GP Data empty!" assert not gp_df.empty, "GP Data empty!"
# Filter the GP dataframe to include only relevant transactions # Filter the GP dataframe to include only relevant transactions
fgp_df: DataFrame = filter_gp(gp_df, excelConfig["GP"]) fgp_df: DataFrame = filter_gp(gp_df, config_dict)
# Get the overdue transactions from the OB dataframe # Get the overdue transactions from the OB dataframe
overdue: DataFrame = get_overdue(ob_df, excelConfig["OB"]) overdue: DataFrame = get_overdue(ob_df, excelConfig["OB"])
@ -181,7 +175,7 @@ def main() -> int:
only_contracts_match: DataFrame = get_contract_match(not_full_match) only_contracts_match: DataFrame = get_contract_match(not_full_match)
# Write the results to a new Excel file # Write the results to a new Excel file
with pd.ExcelWriter(f"{config_dict['work_dir']}/Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx", mode='w') as writer: with pd.ExcelWriter(f"{config_dict['write_dir']}/Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx", mode='w') as writer:
full_match.to_excel(writer,sheet_name="FULL", index=False) full_match.to_excel(writer,sheet_name="FULL", index=False)
no_match.to_excel(writer, sheet_name="No Match", index=False) no_match.to_excel(writer, sheet_name="No Match", index=False)
only_contracts_match.to_excel(writer, sheet_name="Amount Mismatch", index=False) only_contracts_match.to_excel(writer, sheet_name="Amount Mismatch", index=False)

Loading…
Cancel
Save