From 7ad4f76943dbf8f4e05cc6bfa7a775098b913096 Mon Sep 17 00:00:00 2001
From: = <=>
Date: Thu, 20 Apr 2023 16:02:22 -0400
Subject: [PATCH] Reworked the report system to use classes for each report
 type. Helps unify everything. Not yet prepared for memory or db search...

---
 .gitignore          |   8 +-
 __init__.py         |   0
 config.toml         |  53 ----------
 config_logger.toml  |  22 ++++
 config_reports.toml |  34 ++++++
 helpers.py          |  90 ++++++++++++++++
 hold_reconciler.py  | 120 +++++++++++++++++++++
 memory.py           | 156 +++++++++++++++++++++++++++
 rec_lib.py          | 251 --------------------------------------------
 rec_records.py      |  21 ----
 reconcile_holds.py  | 191 ---------------------------------
 reports.py          | 188 +++++++++++++++++++++++++++++++++
 version.txt         |   1 +
 13 files changed, 618 insertions(+), 517 deletions(-)
 create mode 100644 __init__.py
 delete mode 100644 config.toml
 create mode 100644 config_logger.toml
 create mode 100644 config_reports.toml
 create mode 100644 helpers.py
 create mode 100644 hold_reconciler.py
 create mode 100644 memory.py
 delete mode 100644 rec_lib.py
 delete mode 100644 rec_records.py
 delete mode 100644 reconcile_holds.py
 create mode 100644 reports.py
 create mode 100644 version.txt

diff --git a/.gitignore b/.gitignore
index 4cee17f..9ed4880 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,4 +4,10 @@ work/
 build/
 dist/
 
-*.log
\ No newline at end of file
+*.log
+*.xlsx
+*.csv
+*.db
+*.txt
+
+!version.txt
\ No newline at end of file
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/config.toml b/config.toml
deleted file mode 100644
index 86beb1f..0000000
--- a/config.toml
+++ /dev/null
@@ -1,53 +0,0 @@
-write_dir = "Work"
-DocNumFilter = [
-    "p(oin)?ts",
-    "pool",
-    "promo",
-    "o(ver)?f(und)?",
-    "m(ar)?ke?t",
-    "title",
-    "adj",
-    "reg free",
-    "cma"
-]
-[ExcelColumns]
-
-    [ExcelColumns.OB]
-    contract_number = "Contract" # 3070508-007
-    onhold_amount = "CurrentOnHold"
-    install_date = "InstallDate"
-
-    [ExcelColumns.GP]
-    contract_number = "Transaction Description" # 1234-56789
-    onhold_amount = "Current Trx Amount"
-    doc_num = "Document Number" # 1-316141 HOLD
-    pur_order = "Purchase Order Number" # ABC123
-    doc_type = "Document Type" # Invoice or Credit Memo
-
-
-
-
-[logger]
-    version = 1
-
-    disable_existing_loggers = false
-
-    [logger.formatters.custom]
-    format = "'%(asctime)s - %(module)s - %(levelname)s - %(message)s'"
-
-    [logger.handlers.console]
-    class = "logging.StreamHandler"
-    level = "DEBUG"
-    formatter = "custom"
-    stream = "ext://sys.stdout"
-
-    [logger.handlers.file]
-    class = "logging.FileHandler"
-    level = "DEBUG"
-    formatter = "custom"
-    filename = "on_hold.log"
-
-    [logger.root]
-    level = "DEBUG"
-    handlers = ["console", "file"]
-    
\ No newline at end of file
diff --git a/config_logger.toml b/config_logger.toml
new file mode 100644
index 0000000..c29dad5
--- /dev/null
+++ b/config_logger.toml
@@ -0,0 +1,22 @@
+version = 1
+
+disable_existing_loggers = false
+
+[formatters.custom]
+format = "'%(asctime)s - %(module)s - %(levelname)s - %(message)s'"
+
+[handlers.console]
+class = "logging.StreamHandler"
+level = "DEBUG"
+formatter = "custom"
+stream = "ext://sys.stdout"
+
+[handlers.file]
+class = "logging.FileHandler"
+level = "DEBUG"
+formatter = "custom"
+filename = "on_hold.log"
+
+[root]
+level = "DEBUG"
+handlers = ["console", "file"]
\ No newline at end of file
diff --git a/config_reports.toml b/config_reports.toml
new file mode 100644
index 0000000..794217b
--- /dev/null
+++ b/config_reports.toml
@@ -0,0 +1,34 @@
+output_columns = [
+        "contract_number",
+        "vendor_name",
+        "AppNum",        # OB only
+        "DateBooked",    # OB only
+        "Document Number"# GP Only
+        # 'Source' added for 'no match'
+    ]
+
+
+[gp_filters]
+    # These regex will be combined and with ORs and used to filer
+    # the document number column of the GP report 
+    doc_num_filters = [
+        "p(oin)?ts",
+        "pool",
+        "promo",
+        "o(ver)?f(und)?",
+        "m(ar)?ke?t",
+        "title",
+        "adj",
+        "reg free",
+        "cma"
+    ]
+    po_filter = "^(?!.*cma(\\s|\\d)).*$"
+    
+
+
+[shared_columns]
+contract_number = { GP = "Transaction Description", OB = "Contract"}
+onhold_amount = { GP = "Current Trx Amount", OB = "CurrentOnHold" }
+vendor_name = { GP = "Vendor Name", OB = "DealerName"} 
+
+
diff --git a/helpers.py b/helpers.py
new file mode 100644
index 0000000..5e4261d
--- /dev/null
+++ b/helpers.py
@@ -0,0 +1,90 @@
+"""
+Hold Reconciler is an application meant to help reconcile the differences in payments 
+that marked as on hold in Great Plains and OnBase. 
+
+It takes a report csv from OnBase and a report from GreatPlains and checks them
+against each other. It attempts to make them based on contract number and payment
+amount, or just the contract number. 
+
+It also does a lot of filtering for the Great Plains report to remove irrelevant data.
+
+*Last Updated: version 1.3*
+*Originally developed in Spring of 2023 by Griffiths Lott (g@glott.me)*
+"""
+import re
+from re import Pattern
+import os
+from os.path import basename
+import glob
+import logging
+from pathlib import Path
+from tomllib import load
+from pandas import DataFrame, Series
+from typing import TypeVar, Literal
+
+
+import logging.config
+from logging import getLogger
+
+logger = getLogger(__name__)
+
+CN_REGEX = re.compile(r"\d{7}(-\d{3})?")
+
+def setup_logging():
+    """
+    Sets up logging configuration from the TOML file. If the logging configuration fails to be loaded from the file,
+    a default logging configuration is used instead.
+
+    Returns:
+        logging.Logger: The logger instance.
+    """
+    with open("config_logger.toml", "rb") as f:
+        config_dict: dict = load(f)
+        try:
+            # Try to load logging configuration from the TOML file
+            logging.config.dictConfig(config_dict)
+        except Exception as e:
+            # If the logging configuration fails, use a default configuration and log the error
+            logger = logging.getLogger()
+            logger.setLevel(logging.DEBUG)
+            logger.warning("Failed setting up logger!")
+            logger.exception(e)
+            logger.warning(f"Config:\n{config_dict}")
+            return logger
+
+
+def drop_unnamed(df: DataFrame, inplace: bool = True) -> DataFrame|None:
+    """
+    Drops all Unnamed columns from a dataframe.
+    ### CAUTION : This function acts *inplace* by deafult
+    (on the orignal dataframe, not a copy!)
+    """
+    cols = [c for c in df.columns if "Unnamed" in c]
+    return df.drop(cols, axis=1, inplace=inplace)
+
+
+def find_most_recent_file(folder_path: Path, file_pattern: Pattern) -> str:
+    """
+    Given a folder path and a regular expression pattern, this function returns the path of the most recently modified
+    file in the folder that matches the pattern.
+
+    Args:
+        folder_path (Path): A pathlib.Path object representing the folder to search.
+        file_pattern (Pattern): A regular expression pattern used to filter the files in the folder.
+
+    Returns:
+        str: The path of the most recently modified file in the folder that matches the pattern.
+    """
+    # Find all files in the folder that match the pattern
+    files = glob.glob(f"{folder_path}/*")
+    logger.debug(f"files: {files}")
+    
+    # Get the modification time of each file and filter to only those that match the pattern
+    file_times = [(os.path.getmtime(path), path) for path in files if re.match(file_pattern, basename(path))]
+    
+    # Sort the files by modification time (most recent first)
+    file_times.sort(reverse=True)
+    logger.debug(f"file times: {file_times}")
+
+    # Return the path of the most recent file
+    return file_times[0][1]
diff --git a/hold_reconciler.py b/hold_reconciler.py
new file mode 100644
index 0000000..b20d204
--- /dev/null
+++ b/hold_reconciler.py
@@ -0,0 +1,120 @@
+"""
+This is the main entry point for this application. It find the newest reports (GP & OB)
+then utilizes the reconcile module to find the differences between them. The output is
+saved as an excel file with todays date.
+"""
+# Custom module for reconciliation
+from helpers import setup_logging, find_most_recent_file, check_sheet
+from models import OnBaseReport, GreatPlainsReport
+
+import pandas as pd
+from pandas import DataFrame
+import re
+from re import Pattern
+import logging
+from tomllib import load
+import logging.config
+from datetime import datetime as dt
+
+"""
+[ ] Pull in past reconciliations to check against
+[ ] Record reconciled transaction (connect with VBA)
+[ ] Check GP against the database
+[ ] Check OB against the database
+[X] Add resolution column to error sheets 
+[ ] Add sheet for problem contractas already seen and 'resolved'
+"""
+
+setup_logging()
+logger = logging.getLogger(__name__)
+logger.info(f"Logger started with level: {logger.level}")
+
+
+def get_reports(work_dir: str, report_config: dict) -> tuple[pd.DataFrame|None, pd.DataFrame|None]:
+    """
+    Given a dictionary of Excel configuration options, this function searches for the most recently modified GP and OB
+    Excel files in a "Work" folder and returns their corresponding dataframes.
+
+    Args:
+        excelConfig (dict): A dictionary containing configuration options for the GP and OB Excel files.
+
+    Returns:
+        tuple[pd.DataFrame|None, pd.DataFrame|None]: A tuple containing the OB and GP dataframes, respectively.
+    """
+    
+    # Define regular expression patterns to match the GP and OB Excel files
+    gp_regex: Pattern = re.compile(".*gp.*\.xlsx$", re.IGNORECASE)
+    ob_regex: Pattern = re.compile(".*ob.*\.xlsx$", re.IGNORECASE)
+
+    # Find the paths of the most recently modified GP and OB Excel files
+    gp_file_path = find_most_recent_file(work_dir, gp_regex)
+    logger.debug(f"gp_file_path: {gp_file_path}")
+    ob_file_path = find_most_recent_file(work_dir, ob_regex)
+    logger.debug(f"gp_file_path: {ob_file_path}")
+
+    # Read the GP and OB Excel files into dataframes and check that each dataframe has the required columns
+    gp_xl = pd.ExcelFile(gp_file_path)
+    gp_req_cols = [col["GP"] for _, col in report_config["shared_columns"].items()]
+    logger.debug(f"GP_Req_cols: {gp_req_cols}")
+    gp_sheets = gp_xl.sheet_names
+    gp_dfs = pd.read_excel(gp_xl, sheet_name=gp_sheets)
+    for sheet in gp_dfs:
+        sheet_columns: list[str] = list(gp_dfs[sheet].columns)
+        logger.debug(f"gp ({sheet}) : {sheet_columns}")
+        logger.debug(f"Matches {[r in  sheet_columns for r in gp_req_cols]}")
+        if all([r in  sheet_columns for r in gp_req_cols]):
+            logger.debug("FOUND")
+            gp_df = gp_dfs[sheet]
+            break
+    
+    ob_xl = pd.ExcelFile(ob_file_path)
+    ob_req_cols = [col["OB"] for _, col in report_config["shared_columns"].items()]
+    ob_sheets = ob_xl.sheet_names
+    ob_dfs = pd.read_excel(ob_xl, sheet_name=ob_sheets)
+    for sheet in ob_dfs:
+        sheet_columns: list[str] = list(ob_dfs[sheet].columns)
+        if all([r in  sheet_columns for r in ob_req_cols]):
+            ob_df = ob_dfs[sheet]
+            break
+    
+    return ob_df, gp_df
+
+
+def main() -> int:
+    """
+    This is the main function for the script. It reads configuration options from a TOML file, reads in the GP and OB
+    Excel files, performs data reconciliation and analysis, and writes the results to a new Excel file.
+
+    Returns:
+        int: 0 if the script executes successfully.
+    """
+    # Read the configuration options from a TOML file
+    with open("config_reports.toml", "rb") as f:
+        reports_config: dict = load(f)
+    logger.debug(f"Reports Config: {reports_config}")
+
+    # Get the GP and OB dataframes from the Excel files
+    ob_df, gp_df = get_reports("Work", reports_config)
+    assert not ob_df.empty, "OB Data empty!"
+    assert not gp_df.empty, "GP Data empty!"
+
+    obr: OnBaseReport = OnBaseReport(ob_df, reports_config)
+    gpr: GreatPlainsReport = GreatPlainsReport(gp_df, reports_config)
+
+    overdue: DataFrame = obr.get_overdue()
+   
+    no_match, amt_mismatch = obr.reconcile(gpr)
+
+    # Write the results to a new Excel file
+    with pd.ExcelWriter(f"Work/Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx", mode='w') as writer:
+        no_match.to_excel(writer, sheet_name="No Match", index=False)
+        amt_mismatch.to_excel(writer, sheet_name="Amount Mismatch", index=False)
+        overdue.to_excel(writer, sheet_name="Overdue", index=False)
+        
+    return 0
+
+
+if __name__ == "__main__":
+    print("Starting")
+    main()
+    print("Completed")
\ No newline at end of file
diff --git a/memory.py b/memory.py
new file mode 100644
index 0000000..c7cd8b3
--- /dev/null
+++ b/memory.py
@@ -0,0 +1,156 @@
+"""
+Classes and functions to parse completed reconciliation reports and remember
+the resolutions of contracts. 
+
+Also provides a way for the reconciler to check hold against previously
+resolved holds. 
+
+*Last Updated: version 1.3
+"""
+from . import drop_unnamed
+from ghlib.database.database_manager import SQLiteManager
+
+from pandas import DataFrame, Series, read_sql_query, read_excel, concat
+from logging import getLogger
+
+
+logger = getLogger(__name__)
+
+
+def normalize_cols(df: DataFrame) -> DataFrame:
+    """
+    
+    """
+
+
+        
+
+def process_resolutions(df: DataFrame) -> DataFrame:
+    """
+    
+    """
+    # Drop unnamed columns:
+    drop_unnamed(df) # Works 'inplace'
+
+    # Drop anything where resolution is blanks
+    df: DataFrame = df[~df["Resolution"].isnull()]
+
+    # Standardize the resolution
+    df["Resolution"] = df["Resolution"].astype(str)
+    df["Resolution"] = df["Resolution"].apply(lambda res: res.strip().lower())
+
+    # Check for multiple 'onhold_amount' columns
+    cols: list[str] = list(df.keys())
+    mult_amounts: bool = True if "onhold_amount_ob" in cols else False
+
+    if mult_amounts:
+        # Create duplicates with the other amounts
+        gp_amts: DataFrame = df[
+            ["contract_number", 
+             "onhold_amount_gp", 
+             "Resolution",
+             "Notes"
+            ]]
+        df = df[
+            ["contract_number", 
+             "onhold_amount_ob", 
+             "Resolution",
+             "Notes"
+            ]]
+
+        # Rename the amount columns and add the source
+        gp_amts.rename(columns={"onhold_amount_gp":"onhold_amount"}, inplace=True)
+        gp_amts["Source"] = "GP"
+        df.rename(columns={"onhold_amount_ob":"onhold_amount"}, inplace=True)
+        df["Source"] = "OB"
+           
+        # Combine them back together
+        df: DataFrame = concat([df, gp_amts])
+        df["Type"] = "AmountMismatch"
+        
+    else:
+        # Filter columns
+        df = df[
+            ["Source",
+             "contract_number",
+             "onhold_amount", 
+             "Resolution",
+             "Notes"
+            ]]
+        df["Type"] = "NoMatch"
+
+    return df
+
+
+def save_recs(resolved_dataframes: list[DataFrame]):
+    """
+    """
+    sqlManager: SQLiteManager = SQLiteManager("OnHold.db")
+    with sqlManager.get_session() as session:
+        conn = session.connection()
+       
+        df: DataFrame
+        for df in resolved_dataframes:
+            try:
+                # Drop uneeded columns and filter only to resolved data
+                df = process_resolutions(df)
+                # Save to the database
+                df.to_sql("Resolutions", conn, if_exists="append")
+            except Exception as e:
+                logger.exception(f"Could not save resolution dataframe: {e}")
+                continue
+
+
+def get_prev_reconciled(contracts: list[str]) -> DataFrame:
+    """
+    Get a DataFrame of previously reconciled contracts from an SQLite database.
+
+    Args:
+        contracts (list[str]): A list of contract numbers to check for previously reconciled contracts.
+
+    Returns:
+        DataFrame: A DataFrame of previously reconciled contracts, or an empty DataFrame if none are found.
+    """
+    # Create a DB manager
+    sqlManager: SQLiteManager = SQLiteManager("OnHold.db")
+
+    # Create a temp table to hold this batches contract numbers
+    # this table will be cleared when sqlManager goes out of scope
+    temp_table_statement = """
+    CREATE TEMPORARY TABLE CUR_CONTRACTS (contract_numbers VARCHAR(11));
+    """
+    sqlManager.execute(temp_table_statement)
+
+    # Insert the current contracts into the temp table
+    insert_contracts = f"""
+    INSERT INTO CUR_CONTRACTS (contract_numbers) VALUES
+    {', '.join([f"('{cn}')" for cn in contracts])};
+    """
+    sqlManager.execute(insert_contracts)
+
+    # Select previously resolved contracts
+    res_query = """
+    SELECT r.*
+    FROM Resolutions r
+    JOIN CUR_CONTRACTS t
+    ON r.contract_number = t.contract_number;
+    """
+    resolved: DataFrame = sqlManager.execute(res_query, as_dataframe=True)
+    return resolved
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser(
+    prog="HoldReconcilerRecord",
+    )
+    parser.add_argument("-i", "--input")
+    args = parser.parse_args()
+
+    # No Match
+    no_match: DataFrame = read_excel(args.input, sheet_name="No Match")
+    # Amount Mismatch
+    amt_mm: DataFrame = read_excel(args.input, sheet_name="Amount Mismatch")
+    
+    save_recs(resolved_dataframes=[no_match, amt_mm])
\ No newline at end of file
diff --git a/rec_lib.py b/rec_lib.py
deleted file mode 100644
index 98d9a3b..0000000
--- a/rec_lib.py
+++ /dev/null
@@ -1,251 +0,0 @@
-import pandas as pd
-from pandas import DataFrame
-from datetime import datetime as dt
-import datetime
-import re
-from typing import Literal
-import logging
-
-
-logger = logging.getLogger(__name__)
-
-
-def get_overdue(onbase_df: DataFrame, onbase_excel_config) -> DataFrame:
-    """
-    Given a DataFrame containing OnBase installation data and a dictionary containing the OnBase Excel configuration,
-    this function returns a DataFrame containing the rows from `onbase_df` that have an installation date that is before
-    the current date.
-
-    Args:
-        onbase_df (pd.DataFrame): A pandas DataFrame containing OnBase installation data.
-        onbase_excel_config (dict): A dictionary containing the OnBase Excel configuration.
-
-    Returns:
-        pd.DataFrame: A pandas DataFrame containing the rows from `onbase_df` that have an installation date that is before
-        the current date.
-    """
-    id_col = onbase_excel_config["install_date"]
-    onbase_df[id_col] = pd.to_datetime(onbase_df[id_col])
-    onbase_df[id_col].fillna(pd.NaT, inplace=True)
-    return  onbase_df[onbase_df[id_col].dt.date < datetime.date.today()]
-
-
-def filter_gp(gp_dataframe: pd.DataFrame, full_config: dict) -> pd.DataFrame:
-    """
-    Given a pandas DataFrame containing GP data and a dictionary containing the GP configuration, this function
-    filters out rows from the DataFrame that are not needed for further analysis based on certain criteria.
-
-    Args:
-        gp_dataframe (pd.DataFrame): A pandas DataFrame containing GP data.
-        gp_config (dict): A dictionary containing the GP configuration.
-
-    Returns:
-        pd.DataFrame: A pandas DataFrame containing the filtered GP data.
-    """
-
-    # Excludes anything that contains cma with a space or digit following it
-    # CMA23532 would be excluded but 'John Locman' would be allowed
-    GOOD_PO_NUM = re.compile(r"^(?!.*cma(\s|\d)).*$", re.IGNORECASE)
-
-    gp_config: dict = full_config["ExcelColumns"]["GP"]
-    doc_num_regexes: list[str] = full_config["DocNumFilter"]
-
-    bad_doc_num = ''
-    rx : str
-    for rx in doc_num_regexes:
-        bad_doc_num += f"({rx})|"
-    bad_doc_num = re.compile(bad_doc_num[:-1], re.IGNORECASE)
-    logger.debug(f"Doc # filter: {bad_doc_num}")
-    # Create a filter/mask to use on the data
-    mask = (
-        (gp_dataframe[gp_config['doc_type']] == "Invoice") &
-        (gp_dataframe[gp_config['pur_order']].str.contains(GOOD_PO_NUM))
-    )
-
-    # Get the rows to drop based on the filter/mask
-    rows_to_drop = gp_dataframe[~mask].index
-
-    # Drop the rows and return the filtered DataFrame
-    filtered_df = gp_dataframe.drop(rows_to_drop, inplace=False)
-
-    mask = filtered_df[gp_config['doc_num']].str.contains(bad_doc_num)
-    rows_to_drop = filtered_df[mask].index
-
-    return filtered_df.drop(rows_to_drop, inplace=False)
-
-
-def create_transaction_df(dataframe: pd.DataFrame, source: Literal["GP", "OB"], excelConfig: dict):
-    """
-    Given a pandas DataFrame containing transaction data, the source of the data ("GP" or "OB"), and a dictionary
-    containing the Excel configuration, this function creates a new DataFrame with columns for the contract number,
-    the amount on hold, a unique transaction ID, and the source of the data.
-
-    Args:
-        dataframe (pd.DataFrame): A pandas DataFrame containing transaction data.
-        source (Literal["GP", "OB"]): The source of the data ("GP" or "OB").
-        excelConfig (dict): A dictionary containing the Excel configuration.
-
-    Returns:
-        pd.DataFrame: A pandas DataFrame containing the contract number, amount on hold, transaction ID, and data source
-        for each transaction in the original DataFrame.
-    """
-    column_config: dict = excelConfig[source]
-    logger.debug(f"column_config: {column_config}")
-    # Create a new DataFrame with the contract number and on-hold amount columns
-    transactions = dataframe[[column_config["contract_number"], column_config["onhold_amount"]]].copy()
-
-    # Rename the columns to standardize the column names
-    transactions.rename(columns={
-        column_config["contract_number"]: "contract_number",
-        column_config["onhold_amount"]: "onhold_amount",
-    }, inplace=True)
-
-    # Convert the on-hold amount column to float format and round to two decimal places
-    transactions["onhold_amount"] = transactions["onhold_amount"].astype(float).round(2)
-
-    # Use regex to extract the contract number from the column values and create a new column with the standardized format
-    CN_REGEX = re.compile(r"\d{7}(-\d{3})?")
-    transactions["contract_number"] = transactions["contract_number"].apply(
-        lambda cn: str(cn) if not re.search(CN_REGEX, str(cn))
-        else re.search(CN_REGEX, str(cn)).group(0)
-    )
-
-    # Create a new column with a unique transaction ID
-    transactions["ID"] = transactions["contract_number"] +'_'+\
-        transactions["onhold_amount"].astype(str)
-
-    # Create a new column with the data source
-    transactions["Source"] = source
-
-    # Return the new DataFrame with the contract number, on-hold amount, transaction ID, and data source columns
-    return transactions
-
-
-def get_no_match(obt_df: pd.DataFrame, gpt_df: pd.DataFrame):
-    """
-    Given two pandas DataFrames containing transaction data from OBT and GPT, respectively, this function returns a new
-    DataFrame containing only the transactions that do not have a match in both the OBT and GPT DataFrames.
-
-    Args:
-        obt_df (pd.DataFrame): A pandas DataFrame containing transaction data from OBT.
-        gpt_df (pd.DataFrame): A pandas DataFrame containing transaction data from GPT.
-
-    Returns:
-        pd.DataFrame: A pandas DataFrame containing the transactions that do not have a match in both the OBT and GPT
-        DataFrames.
-    """
-    # Merge the two DataFrames using the contract number as the join key
-    merged_df = pd.merge(
-        obt_df, gpt_df,
-        how="outer",
-        on=["contract_number"],
-        suffixes=("_ob", "_gp")
-    )
-
-    # Filter the merged DataFrame to include only the transactions that do not have a match in both OBT and GPT
-    no_match = merged_df.loc[
-        (merged_df["Source_ob"].isna()) |
-        (merged_df["Source_gp"].isna())
-    ]
-
-    # Fill in missing values and drop unnecessary columns
-    no_match["Source"] = no_match["Source_ob"].fillna("GP")
-    no_match["onhold_amount"] = no_match["onhold_amount_ob"].fillna(no_match["onhold_amount_gp"])
-    no_match.drop(columns=[
-        "ID_ob", "ID_gp",
-        "onhold_amount_ob", "onhold_amount_gp",
-        "Source_ob", "Source_gp"
-        ],
-    inplace=True)
-
-    # Reorder and return the new DataFrame with the source, contract number, and on-hold amount columns
-    no_match = no_match[
-        [ "Source", "contract_number", "onhold_amount"]
-    ]
-    
-    return no_match
-
-
-def get_not_full_match(obt_df: pd.DataFrame, gpt_df: pd.DataFrame):
-    """
-    Given two pandas DataFrames containing transaction data from OBT and GPT, respectively, this function returns two new
-    DataFrames. The first DataFrame contains the transactions that have a full match on both the OBT and GPT DataFrames,
-    and the second DataFrame contains the transactions that do not have a full match.
-
-    Args:
-        obt_df (pd.DataFrame): A pandas DataFrame containing transaction data from OBT.
-        gpt_df (pd.DataFrame): A pandas DataFrame containing transaction data from GPT.
-
-    Returns:
-        tuple(pd.DataFrame, pd.DataFrame): A tuple of two DataFrames. The first DataFrame contains the transactions that
-        have a full match on both the OBT and GPT DataFrames, and the second DataFrame contains the transactions that do
-        not have a full match.
-    """
-    # Combine the two DataFrames using an outer join on the contract number and on-hold amount
-    merged_df = pd.merge(
-        obt_df, gpt_df,
-        how="outer",
-        on=["ID", "contract_number", "onhold_amount"],
-        suffixes=("_ob", "_gp")
-    )
-
-    # Filter the merged DataFrame to include only the transactions that have a full match in both OBT and GPT
-    full_matched = merged_df.dropna(subset=["Source_ob", "Source_gp"])
-    full_matched.drop(columns=["Source_ob", "Source_gp"], inplace=True)
-
-    # Create a boolean mask for the rows to drop in full_matched
-    mask = merged_df["ID"].isin(full_matched["ID"])
-    # Use the mask to remove the selected rows and create a new DataFrame for not full match
-    not_full_match = merged_df[~mask]
-    # This includes items that DO match contracts, but not amounts
-    # It can have multiple items from one source with the same contract number
-
-    # Create a new column with the data source, using OBT as the default and GPT as backup if missing
-    not_full_match["Source"] = not_full_match["Source_ob"].fillna(not_full_match["Source_gp"])
-
-    # Drop the redundant Source columns
-    not_full_match.drop(columns=["Source_ob", "Source_gp"], inplace=True)
-
-    # Reorder and return the new DataFrame with the source, contract number, and on-hold amount columns
-    not_full_match = not_full_match[
-        [ "Source", "contract_number", "onhold_amount"]
-    ]
-    
-    # Return the two DataFrames
-    return full_matched, not_full_match
-
-
-def get_contract_match(not_full_match: pd.DataFrame) -> pd.DataFrame:
-    """
-    Given a pandas DataFrame containing transactions that do not have a full match between OBT and GPT, this function
-    returns a new DataFrame containing only the transactions that have a matching contract number in both OBT and GPT.
-
-    Args:
-        not_full_match (pd.DataFrame): A pandas DataFrame containing transactions that do not have a full match between
-        OBT and GPT.
-
-    Returns:
-        pd.DataFrame: A pandas DataFrame containing only the transactions that have a matching contract number in both
-        OBT and GPT.
-    """
-    # Filter the not_full_match DataFrame by source
-    ob_df = not_full_match[not_full_match["Source"] == "OB"]
-    gp_df = not_full_match[not_full_match["Source"] == "GP"]
-
-    # Merge the two filtered DataFrames on the contract number
-    contract_match = pd.merge(
-        ob_df, gp_df,
-        how="inner",
-        on=["contract_number"],
-        suffixes=("_ob", "_gp")
-    )
-
-    # Fill in missing values in the Source column and drop the redundant columns
-    contract_match.drop(columns=["Source_ob", "Source_gp"], inplace=True)
-
-    # Reorder and return the new DataFrame with the source, contract number, and on-hold amount columns
-    contract_match = contract_match[
-        [ "contract_number", "onhold_amount_ob", "onhold_amount_gp"]
-    ]
-    
-    return contract_match
\ No newline at end of file
diff --git a/rec_records.py b/rec_records.py
deleted file mode 100644
index 2072e2e..0000000
--- a/rec_records.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from pandas import DataFrame, Series, read_sql_query, read_excel
-import sqlite3 as sqll
-import sqlalchemy as sqa
-import argparse
-
-def drop_unnamed(df: DataFrame):
-    cols = [c for c in df.columns if "Unnamed" in c]
-    df.drop(cols, axis=1, inplace=True)
-
-parser = argparse.ArgumentParser(
-    prog="HoldReconcilerRecord",
-)
-parser.add_argument("-i", "--input")
-args = parser.parse_args()
-# Resolution col
-
-no_match: DataFrame = read_excel(args.input, sheet_name="No Match")
-amt_mm: DataFrame = read_excel(args.input, sheet_name="Amount Mismatch")
-drop_unnamed(no_match)
-drop_unnamed(amt_mm)
-print(no_match)
\ No newline at end of file
diff --git a/reconcile_holds.py b/reconcile_holds.py
deleted file mode 100644
index 63bbe6b..0000000
--- a/reconcile_holds.py
+++ /dev/null
@@ -1,191 +0,0 @@
-import pandas as pd
-from pandas import DataFrame, Series
-import re
-from re import Pattern
-import os
-from os.path import basename
-import glob
-import logging
-from pathlib import Path
-from tomllib import load
-import logging.config
-from datetime import datetime as dt
-
-"""
-[ ] Pull in past reconciliations to check against
-[ ] Record reconciled transaction (connect with VBA)
-[ ] Check GP against the database
-[ ] Check OB against the database
-[ ] Add resolution column to error sheets 
-"""
-
-# Custom module for reconciliation
-from rec_lib import get_contract_match, get_no_match, \
-    get_not_full_match, get_overdue, filter_gp, create_transaction_df
-
-def setup_logging():
-    """
-    Sets up logging configuration from the TOML file. If the logging configuration fails to be loaded from the file,
-    a default logging configuration is used instead.
-
-    Returns:
-        logging.Logger: The logger instance.
-    """
-    with open("config.toml", "rb") as f:
-        config_dict: dict = load(f)
-        try:
-            # Try to load logging configuration from the TOML file
-            logging.config.dictConfig(config_dict["logger"])
-        except Exception as e:
-            # If the logging configuration fails, use a default configuration and log the error
-            logger = logging.getLogger()
-            logger.setLevel(logging.DEBUG)
-            logger.warning("Failed setting up logger!")
-            logger.exception(e)
-            logger.warning(f"Config:\n{config_dict}")
-            return logger
-
-
-setup_logging()
-logger = logging.getLogger(__name__)
-logger.info(f"Logger started with level: {logger.level}")
-
-def find_most_recent_file(folder_path: Path, file_pattern: Pattern) -> str:
-    """
-    Given a folder path and a regular expression pattern, this function returns the path of the most recently modified
-    file in the folder that matches the pattern.
-
-    Args:
-        folder_path (Path): A pathlib.Path object representing the folder to search.
-        file_pattern (Pattern): A regular expression pattern used to filter the files in the folder.
-
-    Returns:
-        str: The path of the most recently modified file in the folder that matches the pattern.
-    """
-    # Find all files in the folder that match the pattern
-    files = glob.glob(f"{folder_path}/*")
-    logger.debug(f"files: {files}")
-    
-    # Get the modification time of each file and filter to only those that match the pattern
-    file_times = [(os.path.getmtime(path), path) for path in files if re.match(file_pattern, basename(path))]
-    
-    # Sort the files by modification time (most recent first)
-    file_times.sort(reverse=True)
-    logger.debug(f"file times: {file_times}")
-
-    # Return the path of the most recent file
-    return file_times[0][1]
-
-
-def check_sheet(df_cols: list[str], excel_col_config: dict) -> bool:
-    """
-    Given a list of column names and a dictionary of column name configurations, this function checks if the required
-    columns are present in the list of column names.
-
-    Args:
-        df_cols (list[str]): A list of column names.
-        excel_col_config (dict): A dictionary of column name configurations.
-
-    Returns:
-        bool: True if all of the required columns are present in the list of column names, False otherwise.
-    """
-    # Get the list of required columns from the column configuration dictionary
-    required_cols: list[str] = list(excel_col_config.values())
-    # Check if all of the required columns are present in the list of column names
-    return all([col in df_cols for col in required_cols])
-
-
-def get_dataframes(work_dir: str, excelConfig: dict) -> tuple[pd.DataFrame|None, pd.DataFrame|None]:
-    """
-    Given a dictionary of Excel configuration options, this function searches for the most recently modified GP and OB
-    Excel files in a "Work" folder and returns their corresponding dataframes.
-
-    Args:
-        excelConfig (dict): A dictionary containing configuration options for the GP and OB Excel files.
-
-    Returns:
-        tuple[pd.DataFrame|None, pd.DataFrame|None]: A tuple containing the OB and GP dataframes, respectively.
-    """
-    
-    # Define regular expression patterns to match the GP and OB Excel files
-    gp_regex: Pattern = re.compile(".*gp.*\.xlsx$", re.IGNORECASE)
-    ob_regex: Pattern = re.compile(".*ob.*\.xlsx$", re.IGNORECASE)
-
-    # Find the paths of the most recently modified GP and OB Excel files
-    gp_file_path = find_most_recent_file(work_dir, gp_regex)
-    logger.debug(f"gp_file_path: {gp_file_path}")
-    ob_file_path = find_most_recent_file(work_dir, ob_regex)
-    logger.debug(f"gp_file_path: {ob_file_path}")
-
-    # Read the GP and OB Excel files into dataframes and check that each dataframe has the required columns
-    gp_xl = pd.ExcelFile(gp_file_path)
-    gp_config = excelConfig["GP"]
-    gp_sheets = gp_xl.sheet_names
-    gp_dfs = pd.read_excel(gp_xl, sheet_name=gp_sheets)
-    for sheet in gp_dfs:
-        if check_sheet(gp_dfs[sheet].columns, gp_config):
-            gp_df = gp_dfs[sheet]
-            break
-    
-    ob_xl = pd.ExcelFile(ob_file_path)
-    ob_config = excelConfig["OB"]
-    ob_sheets = ob_xl.sheet_names
-    ob_dfs = pd.read_excel(ob_xl, sheet_name=ob_sheets)
-    for sheet in ob_dfs:
-        if check_sheet(ob_dfs[sheet].columns, ob_config):
-            ob_df = ob_dfs[sheet]
-            break
-    
-    return ob_df, gp_df
-
-
-def main() -> int:
-    """
-    This is the main function for the script. It reads configuration options from a TOML file, reads in the GP and OB
-    Excel files, performs data reconciliation and analysis, and writes the results to a new Excel file.
-
-    Returns:
-        int: 0 if the script executes successfully.
-    """
-    # Read the configuration options from a TOML file
-    with open("config.toml", "rb") as f:
-        config_dict: dict = load(f)
-    logger.debug(f"Config: {config_dict}")
-
-    excelConfig: dict = config_dict["ExcelColumns"]
-
-    # Get the GP and OB dataframes from the Excel files
-    ob_df, gp_df = get_dataframes(config_dict["write_dir"] ,excelConfig)
-    assert not ob_df.empty, "OB Data empty!"
-    assert not gp_df.empty, "GP Data empty!"
-
-    # Filter the GP dataframe to include only relevant transactions
-    fgp_df: DataFrame = filter_gp(gp_df, config_dict)
-    # Get the overdue transactions from the OB dataframe
-    overdue: DataFrame = get_overdue(ob_df, excelConfig["OB"])
-
-    # Create transaction dataframes for the GP and OB dataframes
-    ob_transactions: DataFrame = create_transaction_df(ob_df, 'OB', excelConfig)
-    gp_transactions: DataFrame = create_transaction_df(fgp_df, 'GP', excelConfig)
-
-    # Get the transactions that do not have matches in both the GP and OB dataframes
-    no_match: DataFrame = get_no_match(ob_transactions, gp_transactions)
-
-    # Get the transactions that have matches in both the GP and OB dataframes but have amount mismatches
-    full_match, not_full_match = get_not_full_match(ob_transactions, gp_transactions)
-    only_contracts_match: DataFrame = get_contract_match(not_full_match)
-
-    # Write the results to a new Excel file
-    with pd.ExcelWriter(f"{config_dict['write_dir']}/Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx", mode='w') as writer:
-        full_match.to_excel(writer,sheet_name="FULL", index=False)
-        no_match.to_excel(writer, sheet_name="No Match", index=False)
-        only_contracts_match.to_excel(writer, sheet_name="Amount Mismatch", index=False)
-        overdue.to_excel(writer, sheet_name="Overdue", index=False)
-        
-    return 0
-
-
-if __name__ == "__main__":
-    print("Starting")
-    main()
-    print("Completed")
\ No newline at end of file
diff --git a/reports.py b/reports.py
new file mode 100644
index 0000000..533b93c
--- /dev/null
+++ b/reports.py
@@ -0,0 +1,188 @@
+from pandas import DataFrame, merge, to_datetime, NaT
+from numpy import concatenate
+from abc import ABC, abstractmethod
+from logging import getLogger
+import re
+from typing import Literal
+import datetime
+
+from helpers import CN_REGEX
+
+logger = getLogger(__name__)
+
+
+class HoldReport(ABC):
+
+    source = ""
+
+    def __init__(self, dataframe: DataFrame, reports_config: dict) -> None:
+        self.config = reports_config
+        self.df = dataframe
+        self._normalize()
+
+
+    def _normalize(self):
+
+        # Rename the columns to standardize the column names
+        self.df.rename( columns= {  unique_cols[self.source] : common_col 
+                                    for common_col, unique_cols in self.config["shared_columns"].items()
+        }, inplace=True)
+
+        # Convert the on-hold amount column to float format and round to two decimal places
+        self.df["onhold_amount"] = self.df["onhold_amount"].astype(float).round(2)
+
+        # Use regex to extract the contract number from the column values and create a new column with the standardized format
+        self.df["contract_number"] = self.df["contract_number"].apply(
+            lambda cn: str(cn) if not re.search(CN_REGEX, str(cn))
+            else re.search(CN_REGEX, str(cn)).group(0)
+        )
+
+        # Create a new column with a unique transaction ID
+        self.df["ID"] = self.df["contract_number"] +'_'+\
+                self.df["onhold_amount"].astype(str)
+
+        # Create a new column with the data source
+        self.df["Source"] = self.source
+
+
+    def _get_no_match(self,  other: 'HoldReport'):
+        # Merge the two DataFrames using the contract number as the join key
+        outer_merge = merge(
+            self.df, other.df,
+            how="outer",
+            on=["contract_number"],
+            suffixes=('_'+self.source, '_'+other.source)
+        )
+
+        # Filter the merged DataFrame to include only the transactions that do not have a match in both OBT and GPT
+        no_match = outer_merge.loc[
+            (outer_merge[f"Source_{self.source}"].isna()) |
+            (outer_merge[f"Source_{other.source}"].isna())
+        ]
+
+        # Fill in missing values and drop unnecessary columns
+        no_match["Source"] = no_match[f"Source_{self.source}"].fillna("GP")
+        no_match["onhold_amount"] = no_match[f"onhold_amount_{self.source}"].fillna(
+            no_match[f"onhold_amount_{other.source}"]
+        )
+        no_match["vendor_name"] = no_match[f"vendor_name_{self.source}"].fillna(
+            no_match[f"vendor_name_{other.source}"]
+        )
+          
+        return no_match
+    
+
+    def _get_contract_matches(self,  other: 'HoldReport') -> DataFrame:
+        """
+        
+        """
+        # Merge the two filtered DataFrames on the contract number
+        contract_match = merge(
+            self.df, other.df,
+            how="inner",
+            on=["contract_number"],
+            suffixes=('_'+self.source, '_'+other.source)
+        )
+
+        contract_match["vendor_name"] = contract_match[f"vendor_name_{self.source}"].fillna(
+            contract_match[f"vendor_name_{other.source}"]
+        )
+        
+        
+        return contract_match      
+
+    @staticmethod
+    def _add_work_columns(df: DataFrame) -> DataFrame:
+        """
+        Add empty columns to the dataframe to faciliate working through the report.
+        """
+        WORK_COLS = ["Resolution", "Notes"]
+        for col in WORK_COLS:
+            df[col] = ''
+        return df
+
+    def reconcile(self, other: 'HoldReport') -> tuple[DataFrame]:
+        """
+        """
+        no_match: DataFrame = self._get_no_match(other)
+        no_match.to_excel("NOMATCH.xlsx")
+        logger.debug(f"No_match: {no_match}")
+        
+        amount_mismatch: DataFrame = self._get_contract_matches(other)
+        amount_mismatch.to_excel("AMTMM.xlsx")
+        logger.debug(f"amt_mismatche: {no_match}")
+
+        # Select and reorder columns
+        no_match = no_match[
+            ["Source"].extend(self.config["output_columns"])
+        ]
+        no_match = self._add_work_columns(no_match)
+
+        amount_mismatch = amount_mismatch[
+           self.config["output_columns"]
+        ]
+        amount_mismatch = self._add_work_columns(amount_mismatch)
+
+        return no_match, amount_mismatch
+
+    
+
+class OnBaseReport(HoldReport):
+
+    source = "OB"
+
+    def get_overdue(self) -> DataFrame:
+        """
+        """
+        self.df["install_date"] = to_datetime(self.df["install_date"])
+        self.df["install_date"].fillna(NaT, inplace=True)
+        return  self.df[self.df["install_date"].dt.date < datetime.date.today()]
+
+
+
+class GreatPlainsReport(HoldReport):
+    
+    source = "GP"
+    filted_df: bool = False
+
+    def __init__(self, dataframe: DataFrame, report_config: dict) -> None:
+
+        self._filter(
+            gp_report_df= dataframe,
+            doc_num_filters= report_config["gp_filters"]["doc_num_filters"],
+            good_po_num_regex=  report_config["gp_filters"]["po_filter"]
+        )
+        super().__init__(dataframe, report_config)
+
+    @staticmethod
+    def _filter(gp_report_df: DataFrame, 
+                doc_num_filters: list[str], good_po_num_regex: str) -> DataFrame:
+
+        GOOD_PO_NUM = re.compile(good_po_num_regex, re.IGNORECASE)
+
+        bad_doc_num = ''
+        rx : str
+        for rx in doc_num_filters:
+            bad_doc_num += f"({rx})|"
+        bad_doc_num = re.compile(bad_doc_num[:-1], re.IGNORECASE)
+
+        # Create a mask/filter that will keep rows that match these
+        # requirments
+        keep_mask = (
+            (gp_report_df["Document Type"] == "Invoice") &
+            (gp_report_df["Purchase Order Number"].str.contains(GOOD_PO_NUM))
+        )
+
+        # Get the rows that DO NOT fit the keep_mask
+        rows_to_drop = gp_report_df[~keep_mask].index
+        # Drop the rows to filter
+        gp_report_df.drop(rows_to_drop, inplace=True)
+
+        # Create a filter to remove rows that meet this requirment
+        # Making this a negative in the keep mask is more trouble than
+        # it's worth
+        remove_mask = gp_report_df["Document Number"].str.contains(bad_doc_num)
+        rows_to_drop = gp_report_df[remove_mask].index
+        gp_report_df.drop(rows_to_drop, inplace=True)
+
+        return gp_report_df
diff --git a/version.txt b/version.txt
new file mode 100644
index 0000000..415b19f
--- /dev/null
+++ b/version.txt
@@ -0,0 +1 @@
+2.0
\ No newline at end of file