From 8ffd24840e32eb3028114541f8cbd6f39f8d64c1 Mon Sep 17 00:00:00 2001 From: = <=> Date: Wed, 5 Apr 2023 13:37:35 -0400 Subject: [PATCH] Added doc number filtering to config, made it an exclusive rather than inclusive filter No longer just HOLD or only number --- .gitignore | 3 +++ Hold Reconciler.spec | 50 ++++++++++++++++++++++++++++++++++++++++++++ config.toml | 19 ++++++++++++++--- rec_lib.py | 34 +++++++++++++++++++++--------- reconcile_holds.py | 20 +++++++----------- 5 files changed, 100 insertions(+), 26 deletions(-) create mode 100644 Hold Reconciler.spec diff --git a/.gitignore b/.gitignore index 0093a18..4cee17f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,7 @@ __pycache__/ venv/ work/ +build/ +dist/ + *.log \ No newline at end of file diff --git a/Hold Reconciler.spec b/Hold Reconciler.spec new file mode 100644 index 0000000..34520af --- /dev/null +++ b/Hold Reconciler.spec @@ -0,0 +1,50 @@ +# -*- mode: python ; coding: utf-8 -*- + + +block_cipher = None + + +a = Analysis( + ['reconcile_holds.py'], + pathex=[], + binaries=[], + datas=[('config.toml', '.'), ('requirements.txt', '.')], + hiddenimports=['openpyxl'], + hookspath=[], + hooksconfig={}, + runtime_hooks=[], + excludes=[], + win_no_prefer_redirects=False, + win_private_assemblies=False, + cipher=block_cipher, + noarchive=False, +) +pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) + +exe = EXE( + pyz, + a.scripts, + [], + exclude_binaries=True, + name='Hold Reconciler', + debug=False, + bootloader_ignore_signals=False, + strip=False, + upx=True, + console=True, + disable_windowed_traceback=False, + argv_emulation=False, + target_arch=None, + codesign_identity=None, + entitlements_file=None, +) +coll = COLLECT( + exe, + a.binaries, + a.zipfiles, + a.datas, + strip=False, + upx=True, + upx_exclude=[], + name='Hold Reconciler', +) diff --git a/config.toml b/config.toml index f823325..fdb455e 100644 --- a/config.toml +++ b/config.toml @@ -1,5 +1,15 @@ -write_dir = "../Work" - +write_dir = "Work" +DocNumFilter = [ + "p(oin)?ts", + "pool", + "promo", + "o(ver)?f(und)?", + "m(ar)?ke?t", + "title", + "adj", + "reg free", + "cma" +] [ExcelColumns] [ExcelColumns.OB] @@ -14,6 +24,9 @@ write_dir = "../Work" pur_order = "Purchase Order Number" # ABC123 doc_type = "Document Type" # Invoice or Credit Memo + + + [logger] version = 1 @@ -24,7 +37,7 @@ write_dir = "../Work" [logger.handlers.console] class = "logging.StreamHandler" - level = "INFO" + level = "DEBUG" formatter = "custom" stream = "ext://sys.stdout" diff --git a/rec_lib.py b/rec_lib.py index c3dd180..98d9a3b 100644 --- a/rec_lib.py +++ b/rec_lib.py @@ -4,6 +4,11 @@ from datetime import datetime as dt import datetime import re from typing import Literal +import logging + + +logger = logging.getLogger(__name__) + def get_overdue(onbase_df: DataFrame, onbase_excel_config) -> DataFrame: """ @@ -25,7 +30,7 @@ def get_overdue(onbase_df: DataFrame, onbase_excel_config) -> DataFrame: return onbase_df[onbase_df[id_col].dt.date < datetime.date.today()] -def filter_gp(gp_dataframe: pd.DataFrame, gp_config: dict) -> pd.DataFrame: +def filter_gp(gp_dataframe: pd.DataFrame, full_config: dict) -> pd.DataFrame: """ Given a pandas DataFrame containing GP data and a dictionary containing the GP configuration, this function filters out rows from the DataFrame that are not needed for further analysis based on certain criteria. @@ -37,18 +42,23 @@ def filter_gp(gp_dataframe: pd.DataFrame, gp_config: dict) -> pd.DataFrame: Returns: pd.DataFrame: A pandas DataFrame containing the filtered GP data. """ - # Regex used to filter unneeded transactions - # filters anything that does not contain a ONLY contract number OR - # The work hold or just hld - GOOD_DOC_NUM = re.compile(r"(^(\d+-?)+$)|(ho?ld)", re.IGNORECASE) + # Excludes anything that contains cma with a space or digit following it # CMA23532 would be excluded but 'John Locman' would be allowed GOOD_PO_NUM = re.compile(r"^(?!.*cma(\s|\d)).*$", re.IGNORECASE) + gp_config: dict = full_config["ExcelColumns"]["GP"] + doc_num_regexes: list[str] = full_config["DocNumFilter"] + + bad_doc_num = '' + rx : str + for rx in doc_num_regexes: + bad_doc_num += f"({rx})|" + bad_doc_num = re.compile(bad_doc_num[:-1], re.IGNORECASE) + logger.debug(f"Doc # filter: {bad_doc_num}") # Create a filter/mask to use on the data mask = ( (gp_dataframe[gp_config['doc_type']] == "Invoice") & - (gp_dataframe[gp_config['doc_num']].str.contains(GOOD_DOC_NUM)) & (gp_dataframe[gp_config['pur_order']].str.contains(GOOD_PO_NUM)) ) @@ -56,7 +66,12 @@ def filter_gp(gp_dataframe: pd.DataFrame, gp_config: dict) -> pd.DataFrame: rows_to_drop = gp_dataframe[~mask].index # Drop the rows and return the filtered DataFrame - return gp_dataframe.drop(rows_to_drop, inplace=False) + filtered_df = gp_dataframe.drop(rows_to_drop, inplace=False) + + mask = filtered_df[gp_config['doc_num']].str.contains(bad_doc_num) + rows_to_drop = filtered_df[mask].index + + return filtered_df.drop(rows_to_drop, inplace=False) def create_transaction_df(dataframe: pd.DataFrame, source: Literal["GP", "OB"], excelConfig: dict): @@ -75,7 +90,7 @@ def create_transaction_df(dataframe: pd.DataFrame, source: Literal["GP", "OB"], for each transaction in the original DataFrame. """ column_config: dict = excelConfig[source] - + logger.debug(f"column_config: {column_config}") # Create a new DataFrame with the contract number and on-hold amount columns transactions = dataframe[[column_config["contract_number"], column_config["onhold_amount"]]].copy() @@ -226,12 +241,11 @@ def get_contract_match(not_full_match: pd.DataFrame) -> pd.DataFrame: ) # Fill in missing values in the Source column and drop the redundant columns - contract_match["Source"] = contract_match["Source_ob"].fillna("GP") contract_match.drop(columns=["Source_ob", "Source_gp"], inplace=True) # Reorder and return the new DataFrame with the source, contract number, and on-hold amount columns contract_match = contract_match[ - [ "Source", "contract_number", "onhold_amount_ob", "onhold_amount_gp"] + [ "contract_number", "onhold_amount_ob", "onhold_amount_gp"] ] return contract_match \ No newline at end of file diff --git a/reconcile_holds.py b/reconcile_holds.py index 7259e86..23b9230 100644 --- a/reconcile_holds.py +++ b/reconcile_holds.py @@ -94,7 +94,7 @@ def check_sheet(df_cols: list[str], excel_col_config: dict) -> bool: return all([col in df_cols for col in required_cols]) -def get_dataframes(excelConfig: dict) -> tuple[pd.DataFrame|None, pd.DataFrame|None]: +def get_dataframes(work_dir: str, excelConfig: dict) -> tuple[pd.DataFrame|None, pd.DataFrame|None]: """ Given a dictionary of Excel configuration options, this function searches for the most recently modified GP and OB Excel files in a "Work" folder and returns their corresponding dataframes. @@ -105,22 +105,15 @@ def get_dataframes(excelConfig: dict) -> tuple[pd.DataFrame|None, pd.DataFrame|N Returns: tuple[pd.DataFrame|None, pd.DataFrame|None]: A tuple containing the OB and GP dataframes, respectively. """ - # Get the current working directory and the path to the "Work" folder - current_dir: Path = Path(os.getcwd()) - work_folder: Path = current_dir / 'Work' - logger.debug(f"Workpath: {work_folder}") - # Check that the "Work" folder exists - assert work_folder.exists, "No work folder found!" - # Define regular expression patterns to match the GP and OB Excel files gp_regex: Pattern = re.compile(".*gp.*\.xlsx$", re.IGNORECASE) ob_regex: Pattern = re.compile(".*ob.*\.xlsx$", re.IGNORECASE) # Find the paths of the most recently modified GP and OB Excel files - gp_file_path = find_most_recent_file(work_folder, gp_regex) + gp_file_path = find_most_recent_file(work_dir, gp_regex) logger.debug(f"gp_file_path: {gp_file_path}") - ob_file_path = find_most_recent_file(work_folder, ob_regex) + ob_file_path = find_most_recent_file(work_dir, ob_regex) logger.debug(f"gp_file_path: {ob_file_path}") # Read the GP and OB Excel files into dataframes and check that each dataframe has the required columns @@ -156,16 +149,17 @@ def main() -> int: # Read the configuration options from a TOML file with open("config.toml", "rb") as f: config_dict: dict = load(f) + logger.debug(f"Config: {config_dict}") excelConfig: dict = config_dict["ExcelColumns"] # Get the GP and OB dataframes from the Excel files - ob_df, gp_df = get_dataframes(excelConfig) + ob_df, gp_df = get_dataframes(config_dict["write_dir"] ,excelConfig) assert not ob_df.empty, "OB Data empty!" assert not gp_df.empty, "GP Data empty!" # Filter the GP dataframe to include only relevant transactions - fgp_df: DataFrame = filter_gp(gp_df, excelConfig["GP"]) + fgp_df: DataFrame = filter_gp(gp_df, config_dict) # Get the overdue transactions from the OB dataframe overdue: DataFrame = get_overdue(ob_df, excelConfig["OB"]) @@ -181,7 +175,7 @@ def main() -> int: only_contracts_match: DataFrame = get_contract_match(not_full_match) # Write the results to a new Excel file - with pd.ExcelWriter(f"{config_dict['work_dir']}/Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx", mode='w') as writer: + with pd.ExcelWriter(f"{config_dict['write_dir']}/Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx", mode='w') as writer: full_match.to_excel(writer,sheet_name="FULL", index=False) no_match.to_excel(writer, sheet_name="No Match", index=False) only_contracts_match.to_excel(writer, sheet_name="Amount Mismatch", index=False)