From 8ffd24840e32eb3028114541f8cbd6f39f8d64c1 Mon Sep 17 00:00:00 2001
From: = <=>
Date: Wed, 5 Apr 2023 13:37:35 -0400
Subject: [PATCH] Added doc number filtering to config, made it an exclusive
 rather than inclusive filter No longer just HOLD or only number

---
 .gitignore           |  3 +++
 Hold Reconciler.spec | 50 ++++++++++++++++++++++++++++++++++++++++++++
 config.toml          | 19 ++++++++++++++---
 rec_lib.py           | 34 +++++++++++++++++++++---------
 reconcile_holds.py   | 20 +++++++-----------
 5 files changed, 100 insertions(+), 26 deletions(-)
 create mode 100644 Hold Reconciler.spec

diff --git a/.gitignore b/.gitignore
index 0093a18..4cee17f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,7 @@
 __pycache__/
 venv/
 work/
+build/
+dist/
+
 *.log
\ No newline at end of file
diff --git a/Hold Reconciler.spec b/Hold Reconciler.spec
new file mode 100644
index 0000000..34520af
--- /dev/null
+++ b/Hold Reconciler.spec	
@@ -0,0 +1,50 @@
+# -*- mode: python ; coding: utf-8 -*-
+
+
+block_cipher = None
+
+
+a = Analysis(
+    ['reconcile_holds.py'],
+    pathex=[],
+    binaries=[],
+    datas=[('config.toml', '.'), ('requirements.txt', '.')],
+    hiddenimports=['openpyxl'],
+    hookspath=[],
+    hooksconfig={},
+    runtime_hooks=[],
+    excludes=[],
+    win_no_prefer_redirects=False,
+    win_private_assemblies=False,
+    cipher=block_cipher,
+    noarchive=False,
+)
+pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
+
+exe = EXE(
+    pyz,
+    a.scripts,
+    [],
+    exclude_binaries=True,
+    name='Hold Reconciler',
+    debug=False,
+    bootloader_ignore_signals=False,
+    strip=False,
+    upx=True,
+    console=True,
+    disable_windowed_traceback=False,
+    argv_emulation=False,
+    target_arch=None,
+    codesign_identity=None,
+    entitlements_file=None,
+)
+coll = COLLECT(
+    exe,
+    a.binaries,
+    a.zipfiles,
+    a.datas,
+    strip=False,
+    upx=True,
+    upx_exclude=[],
+    name='Hold Reconciler',
+)
diff --git a/config.toml b/config.toml
index f823325..fdb455e 100644
--- a/config.toml
+++ b/config.toml
@@ -1,5 +1,15 @@
-write_dir = "../Work"
-
+write_dir = "Work"
+DocNumFilter = [
+    "p(oin)?ts",
+    "pool",
+    "promo",
+    "o(ver)?f(und)?",
+    "m(ar)?ke?t",
+    "title",
+    "adj",
+    "reg free",
+    "cma"
+]
 [ExcelColumns]
 
     [ExcelColumns.OB]
@@ -14,6 +24,9 @@ write_dir = "../Work"
     pur_order = "Purchase Order Number" # ABC123
     doc_type = "Document Type" # Invoice or Credit Memo
 
+
+
+
 [logger]
     version = 1
 
@@ -24,7 +37,7 @@ write_dir = "../Work"
 
     [logger.handlers.console]
     class = "logging.StreamHandler"
-    level = "INFO"
+    level = "DEBUG"
     formatter = "custom"
     stream = "ext://sys.stdout"
 
diff --git a/rec_lib.py b/rec_lib.py
index c3dd180..98d9a3b 100644
--- a/rec_lib.py
+++ b/rec_lib.py
@@ -4,6 +4,11 @@ from datetime import datetime as dt
 import datetime
 import re
 from typing import Literal
+import logging
+
+
+logger = logging.getLogger(__name__)
+
 
 def get_overdue(onbase_df: DataFrame, onbase_excel_config) -> DataFrame:
     """
@@ -25,7 +30,7 @@ def get_overdue(onbase_df: DataFrame, onbase_excel_config) -> DataFrame:
     return  onbase_df[onbase_df[id_col].dt.date < datetime.date.today()]
 
 
-def filter_gp(gp_dataframe: pd.DataFrame, gp_config: dict) -> pd.DataFrame:
+def filter_gp(gp_dataframe: pd.DataFrame, full_config: dict) -> pd.DataFrame:
     """
     Given a pandas DataFrame containing GP data and a dictionary containing the GP configuration, this function
     filters out rows from the DataFrame that are not needed for further analysis based on certain criteria.
@@ -37,18 +42,23 @@ def filter_gp(gp_dataframe: pd.DataFrame, gp_config: dict) -> pd.DataFrame:
     Returns:
         pd.DataFrame: A pandas DataFrame containing the filtered GP data.
     """
-    # Regex used to filter unneeded transactions
-    # filters anything that does not contain a ONLY contract number OR
-    # The work hold or just hld
-    GOOD_DOC_NUM = re.compile(r"(^(\d+-?)+$)|(ho?ld)", re.IGNORECASE)
+
     # Excludes anything that contains cma with a space or digit following it
     # CMA23532 would be excluded but 'John Locman' would be allowed
     GOOD_PO_NUM = re.compile(r"^(?!.*cma(\s|\d)).*$", re.IGNORECASE)
 
+    gp_config: dict = full_config["ExcelColumns"]["GP"]
+    doc_num_regexes: list[str] = full_config["DocNumFilter"]
+
+    bad_doc_num = ''
+    rx : str
+    for rx in doc_num_regexes:
+        bad_doc_num += f"({rx})|"
+    bad_doc_num = re.compile(bad_doc_num[:-1], re.IGNORECASE)
+    logger.debug(f"Doc # filter: {bad_doc_num}")
     # Create a filter/mask to use on the data
     mask = (
         (gp_dataframe[gp_config['doc_type']] == "Invoice") &
-        (gp_dataframe[gp_config['doc_num']].str.contains(GOOD_DOC_NUM)) &
         (gp_dataframe[gp_config['pur_order']].str.contains(GOOD_PO_NUM))
     )
 
@@ -56,7 +66,12 @@ def filter_gp(gp_dataframe: pd.DataFrame, gp_config: dict) -> pd.DataFrame:
     rows_to_drop = gp_dataframe[~mask].index
 
     # Drop the rows and return the filtered DataFrame
-    return gp_dataframe.drop(rows_to_drop, inplace=False)
+    filtered_df = gp_dataframe.drop(rows_to_drop, inplace=False)
+
+    mask = filtered_df[gp_config['doc_num']].str.contains(bad_doc_num)
+    rows_to_drop = filtered_df[mask].index
+
+    return filtered_df.drop(rows_to_drop, inplace=False)
 
 
 def create_transaction_df(dataframe: pd.DataFrame, source: Literal["GP", "OB"], excelConfig: dict):
@@ -75,7 +90,7 @@ def create_transaction_df(dataframe: pd.DataFrame, source: Literal["GP", "OB"],
         for each transaction in the original DataFrame.
     """
     column_config: dict = excelConfig[source]
-
+    logger.debug(f"column_config: {column_config}")
     # Create a new DataFrame with the contract number and on-hold amount columns
     transactions = dataframe[[column_config["contract_number"], column_config["onhold_amount"]]].copy()
 
@@ -226,12 +241,11 @@ def get_contract_match(not_full_match: pd.DataFrame) -> pd.DataFrame:
     )
 
     # Fill in missing values in the Source column and drop the redundant columns
-    contract_match["Source"] = contract_match["Source_ob"].fillna("GP")
     contract_match.drop(columns=["Source_ob", "Source_gp"], inplace=True)
 
     # Reorder and return the new DataFrame with the source, contract number, and on-hold amount columns
     contract_match = contract_match[
-        [ "Source", "contract_number", "onhold_amount_ob", "onhold_amount_gp"]
+        [ "contract_number", "onhold_amount_ob", "onhold_amount_gp"]
     ]
     
     return contract_match
\ No newline at end of file
diff --git a/reconcile_holds.py b/reconcile_holds.py
index 7259e86..23b9230 100644
--- a/reconcile_holds.py
+++ b/reconcile_holds.py
@@ -94,7 +94,7 @@ def check_sheet(df_cols: list[str], excel_col_config: dict) -> bool:
     return all([col in df_cols for col in required_cols])
 
 
-def get_dataframes(excelConfig: dict) -> tuple[pd.DataFrame|None, pd.DataFrame|None]:
+def get_dataframes(work_dir: str, excelConfig: dict) -> tuple[pd.DataFrame|None, pd.DataFrame|None]:
     """
     Given a dictionary of Excel configuration options, this function searches for the most recently modified GP and OB
     Excel files in a "Work" folder and returns their corresponding dataframes.
@@ -105,22 +105,15 @@ def get_dataframes(excelConfig: dict) -> tuple[pd.DataFrame|None, pd.DataFrame|N
     Returns:
         tuple[pd.DataFrame|None, pd.DataFrame|None]: A tuple containing the OB and GP dataframes, respectively.
     """
-    # Get the current working directory and the path to the "Work" folder
-    current_dir: Path = Path(os.getcwd())
-    work_folder: Path = current_dir / 'Work'
-    logger.debug(f"Workpath: {work_folder}")
     
-    # Check that the "Work" folder exists
-    assert work_folder.exists, "No work folder found!"
-
     # Define regular expression patterns to match the GP and OB Excel files
     gp_regex: Pattern = re.compile(".*gp.*\.xlsx$", re.IGNORECASE)
     ob_regex: Pattern = re.compile(".*ob.*\.xlsx$", re.IGNORECASE)
 
     # Find the paths of the most recently modified GP and OB Excel files
-    gp_file_path = find_most_recent_file(work_folder, gp_regex)
+    gp_file_path = find_most_recent_file(work_dir, gp_regex)
     logger.debug(f"gp_file_path: {gp_file_path}")
-    ob_file_path = find_most_recent_file(work_folder, ob_regex)
+    ob_file_path = find_most_recent_file(work_dir, ob_regex)
     logger.debug(f"gp_file_path: {ob_file_path}")
 
     # Read the GP and OB Excel files into dataframes and check that each dataframe has the required columns
@@ -156,16 +149,17 @@ def main() -> int:
     # Read the configuration options from a TOML file
     with open("config.toml", "rb") as f:
         config_dict: dict = load(f)
+    logger.debug(f"Config: {config_dict}")
 
     excelConfig: dict = config_dict["ExcelColumns"]
 
     # Get the GP and OB dataframes from the Excel files
-    ob_df, gp_df = get_dataframes(excelConfig)
+    ob_df, gp_df = get_dataframes(config_dict["write_dir"] ,excelConfig)
     assert not ob_df.empty, "OB Data empty!"
     assert not gp_df.empty, "GP Data empty!"
 
     # Filter the GP dataframe to include only relevant transactions
-    fgp_df: DataFrame = filter_gp(gp_df, excelConfig["GP"])
+    fgp_df: DataFrame = filter_gp(gp_df, config_dict)
     # Get the overdue transactions from the OB dataframe
     overdue: DataFrame = get_overdue(ob_df, excelConfig["OB"])
 
@@ -181,7 +175,7 @@ def main() -> int:
     only_contracts_match: DataFrame = get_contract_match(not_full_match)
 
     # Write the results to a new Excel file
-    with pd.ExcelWriter(f"{config_dict['work_dir']}/Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx", mode='w') as writer:
+    with pd.ExcelWriter(f"{config_dict['write_dir']}/Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx", mode='w') as writer:
         full_match.to_excel(writer,sheet_name="FULL", index=False)
         no_match.to_excel(writer, sheet_name="No Match", index=False)
         only_contracts_match.to_excel(writer, sheet_name="Amount Mismatch", index=False)