- Integrated new ReportConfig into program

- Added full test to check everything works as expected after small changes - A bit of project restructuring, with switch to absolute imports
Finished config implementation and added testing for config classes.
22 changed files with 1257 additions and 498 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,5 +3,13 @@ venv/
 work/
 build/
 dist/
+ghlib/

-*.log
+*.log
+*.xlsx
+*.csv
+*.db
+*.txt
+
+!version.txt
+!tests/test_inputs/TestSearch/*
--- a/Reconciler.spec
+++ b/Reconciler.spec
@ -5,11 +5,11 @@ block_cipher = None


 a = Analysis(
-    ['reconcile_holds.py'],
-    pathex=[],
+    ['hold_reconciler.py'],
+    pathex=['\\leafnow.com\shared\Business Solutions\Griff\Code\HoldReconciler'],
    binaries=[],
-    datas=[('config.toml', '.'), ('requirements.txt', '.')],
-    hiddenimports=['openpyxl'],
+    datas=[('.\\config_logger.toml', '.'), ('.\\config_reports.toml', '.')],
+    hiddenimports=['reports.*','memory.*','helpers.*'],
    hookspath=[],
    hooksconfig={},
    runtime_hooks=[],
--- a/config.toml
+++ b/config.toml
@ -1,52 +0,0 @@
-write_dir = "Work"
-DocNumFilter = [
-    "p(oin)?ts",
-    "pool",
-    "promo",
-    "o(ver)?f(und)?",
-    "m(ar)?ke?t",
-    "title",
-    "adj",
-    "reg free",
-    "cma"
-]
-[ExcelColumns]
-
-    [ExcelColumns.OB]
-    contract_number = "Contract" # 3070508-007
-    onhold_amount = "CurrentOnHold"
-    install_date = "InstallDate"
-
-    [ExcelColumns.GP]
-    contract_number = "Transaction Description" # 1234-56789
-    onhold_amount = "Current Trx Amount"
-    doc_num = "Document Number" # 1-316141 HOLD
-    pur_order = "Purchase Order Number" # ABC123
-    doc_type = "Document Type" # Invoice or Credit Memo
-
-
-
-
-[logger]
-    version = 1
-
-    disable_existing_loggers = false
-
-    [logger.formatters.custom]
-    format = "'%(asctime)s - %(module)s - %(levelname)s - %(message)s'"
-
-    [logger.handlers.console]
-    class = "logging.StreamHandler"
-    level = "DEBUG"
-    formatter = "custom"
-    stream = "ext://sys.stdout"
-
-    [logger.handlers.file]
-    class = "logging.FileHandler"
-    level = "DEBUG"
-    formatter = "custom"
-    filename = "on_hold.log"
-
-    [logger.root]
-    level = "DEBUG"
-    handlers = ["console", "file"]
--- a/rec_lib.py
+++ b/rec_lib.py
@ -1,251 +0,0 @@
-import pandas as pd
-from pandas import DataFrame
-from datetime import datetime as dt
-import datetime
-import re
-from typing import Literal
-import logging
-
-
-logger = logging.getLogger(__name__)
-
-
-def get_overdue(onbase_df: DataFrame, onbase_excel_config) -> DataFrame:
-    """
-    Given a DataFrame containing OnBase installation data and a dictionary containing the OnBase Excel configuration,
-    this function returns a DataFrame containing the rows from `onbase_df` that have an installation date that is before
-    the current date.
-
-    Args:
-        onbase_df (pd.DataFrame): A pandas DataFrame containing OnBase installation data.
-        onbase_excel_config (dict): A dictionary containing the OnBase Excel configuration.
-
-    Returns:
-        pd.DataFrame: A pandas DataFrame containing the rows from `onbase_df` that have an installation date that is before
-        the current date.
-    """
-    id_col = onbase_excel_config["install_date"]
-    onbase_df[id_col] = pd.to_datetime(onbase_df[id_col])
-    onbase_df[id_col].fillna(pd.NaT, inplace=True)
-    return  onbase_df[onbase_df[id_col].dt.date < datetime.date.today()]
-
-
-def filter_gp(gp_dataframe: pd.DataFrame, full_config: dict) -> pd.DataFrame:
-    """
-    Given a pandas DataFrame containing GP data and a dictionary containing the GP configuration, this function
-    filters out rows from the DataFrame that are not needed for further analysis based on certain criteria.
-
-    Args:
-        gp_dataframe (pd.DataFrame): A pandas DataFrame containing GP data.
-        gp_config (dict): A dictionary containing the GP configuration.
-
-    Returns:
-        pd.DataFrame: A pandas DataFrame containing the filtered GP data.
-    """
-
-    # Excludes anything that contains cma with a space or digit following it
-    # CMA23532 would be excluded but 'John Locman' would be allowed
-    GOOD_PO_NUM = re.compile(r"^(?!.*cma(\s|\d)).*$", re.IGNORECASE)
-
-    gp_config: dict = full_config["ExcelColumns"]["GP"]
-    doc_num_regexes: list[str] = full_config["DocNumFilter"]
-
-    bad_doc_num = ''
-    rx : str
-    for rx in doc_num_regexes:
-        bad_doc_num += f"({rx})|"
-    bad_doc_num = re.compile(bad_doc_num[:-1], re.IGNORECASE)
-    logger.debug(f"Doc # filter: {bad_doc_num}")
-    # Create a filter/mask to use on the data
-    mask = (
-        (gp_dataframe[gp_config['doc_type']] == "Invoice") &
-        (gp_dataframe[gp_config['pur_order']].str.contains(GOOD_PO_NUM))
-    )
-
-    # Get the rows to drop based on the filter/mask
-    rows_to_drop = gp_dataframe[~mask].index
-
-    # Drop the rows and return the filtered DataFrame
-    filtered_df = gp_dataframe.drop(rows_to_drop, inplace=False)
-
-    mask = filtered_df[gp_config['doc_num']].str.contains(bad_doc_num)
-    rows_to_drop = filtered_df[mask].index
-
-    return filtered_df.drop(rows_to_drop, inplace=False)
-
-
-def create_transaction_df(dataframe: pd.DataFrame, source: Literal["GP", "OB"], excelConfig: dict):
-    """
-    Given a pandas DataFrame containing transaction data, the source of the data ("GP" or "OB"), and a dictionary
-    containing the Excel configuration, this function creates a new DataFrame with columns for the contract number,
-    the amount on hold, a unique transaction ID, and the source of the data.
-
-    Args:
-        dataframe (pd.DataFrame): A pandas DataFrame containing transaction data.
-        source (Literal["GP", "OB"]): The source of the data ("GP" or "OB").
-        excelConfig (dict): A dictionary containing the Excel configuration.
-
-    Returns:
-        pd.DataFrame: A pandas DataFrame containing the contract number, amount on hold, transaction ID, and data source
-        for each transaction in the original DataFrame.
-    """
-    column_config: dict = excelConfig[source]
-    logger.debug(f"column_config: {column_config}")
-    # Create a new DataFrame with the contract number and on-hold amount columns
-    transactions = dataframe[[column_config["contract_number"], column_config["onhold_amount"]]].copy()
-
-    # Rename the columns to standardize the column names
-    transactions.rename(columns={
-        column_config["contract_number"]: "contract_number",
-        column_config["onhold_amount"]: "onhold_amount",
-    }, inplace=True)
-
-    # Convert the on-hold amount column to float format and round to two decimal places
-    transactions["onhold_amount"] = transactions["onhold_amount"].astype(float).round(2)
-
-    # Use regex to extract the contract number from the column values and create a new column with the standardized format
-    CN_REGEX = re.compile(r"\d{7}(-\d{3})?")
-    transactions["contract_number"] = transactions["contract_number"].apply(
-        lambda cn: str(cn) if not re.search(CN_REGEX, str(cn))
-        else re.search(CN_REGEX, str(cn)).group(0)
-    )
-
-    # Create a new column with a unique transaction ID
-    transactions["ID"] = transactions["contract_number"] +'_'+\
-        transactions["onhold_amount"].astype(str)
-
-    # Create a new column with the data source
-    transactions["Source"] = source
-
-    # Return the new DataFrame with the contract number, on-hold amount, transaction ID, and data source columns
-    return transactions
-
-
-def get_no_match(obt_df: pd.DataFrame, gpt_df: pd.DataFrame):
-    """
-    Given two pandas DataFrames containing transaction data from OBT and GPT, respectively, this function returns a new
-    DataFrame containing only the transactions that do not have a match in both the OBT and GPT DataFrames.
-
-    Args:
-        obt_df (pd.DataFrame): A pandas DataFrame containing transaction data from OBT.
-        gpt_df (pd.DataFrame): A pandas DataFrame containing transaction data from GPT.
-
-    Returns:
-        pd.DataFrame: A pandas DataFrame containing the transactions that do not have a match in both the OBT and GPT
-        DataFrames.
-    """
-    # Merge the two DataFrames using the contract number as the join key
-    merged_df = pd.merge(
-        obt_df, gpt_df,
-        how="outer",
-        on=["contract_number"],
-        suffixes=("_ob", "_gp")
-    )
-
-    # Filter the merged DataFrame to include only the transactions that do not have a match in both OBT and GPT
-    no_match = merged_df.loc[
-        (merged_df["Source_ob"].isna()) |
-        (merged_df["Source_gp"].isna())
-    ]
-
-    # Fill in missing values and drop unnecessary columns
-    no_match["Source"] = no_match["Source_ob"].fillna("GP")
-    no_match["onhold_amount"] = no_match["onhold_amount_ob"].fillna(no_match["onhold_amount_gp"])
-    no_match.drop(columns=[
-        "ID_ob", "ID_gp",
-        "onhold_amount_ob", "onhold_amount_gp",
-        "Source_ob", "Source_gp"
-        ],
-    inplace=True)
-
-    # Reorder and return the new DataFrame with the source, contract number, and on-hold amount columns
-    no_match = no_match[
-        [ "Source", "contract_number", "onhold_amount"]
-    ]
-    
-    return no_match
-
-
-def get_not_full_match(obt_df: pd.DataFrame, gpt_df: pd.DataFrame):
-    """
-    Given two pandas DataFrames containing transaction data from OBT and GPT, respectively, this function returns two new
-    DataFrames. The first DataFrame contains the transactions that have a full match on both the OBT and GPT DataFrames,
-    and the second DataFrame contains the transactions that do not have a full match.
-
-    Args:
-        obt_df (pd.DataFrame): A pandas DataFrame containing transaction data from OBT.
-        gpt_df (pd.DataFrame): A pandas DataFrame containing transaction data from GPT.
-
-    Returns:
-        tuple(pd.DataFrame, pd.DataFrame): A tuple of two DataFrames. The first DataFrame contains the transactions that
-        have a full match on both the OBT and GPT DataFrames, and the second DataFrame contains the transactions that do
-        not have a full match.
-    """
-    # Combine the two DataFrames using an outer join on the contract number and on-hold amount
-    merged_df = pd.merge(
-        obt_df, gpt_df,
-        how="outer",
-        on=["ID", "contract_number", "onhold_amount"],
-        suffixes=("_ob", "_gp")
-    )
-
-    # Filter the merged DataFrame to include only the transactions that have a full match in both OBT and GPT
-    full_matched = merged_df.dropna(subset=["Source_ob", "Source_gp"])
-    full_matched.drop(columns=["Source_ob", "Source_gp"], inplace=True)
-
-    # Create a boolean mask for the rows to drop in full_matched
-    mask = merged_df["ID"].isin(full_matched["ID"])
-    # Use the mask to remove the selected rows and create a new DataFrame for not full match
-    not_full_match = merged_df[~mask]
-    # This includes items that DO match contracts, but not amounts
-    # It can have multiple items from one source with the same contract number
-
-    # Create a new column with the data source, using OBT as the default and GPT as backup if missing
-    not_full_match["Source"] = not_full_match["Source_ob"].fillna(not_full_match["Source_gp"])
-
-    # Drop the redundant Source columns
-    not_full_match.drop(columns=["Source_ob", "Source_gp"], inplace=True)
-
-    # Reorder and return the new DataFrame with the source, contract number, and on-hold amount columns
-    not_full_match = not_full_match[
-        [ "Source", "contract_number", "onhold_amount"]
-    ]
-    
-    # Return the two DataFrames
-    return full_matched, not_full_match
-
-
-def get_contract_match(not_full_match: pd.DataFrame) -> pd.DataFrame:
-    """
-    Given a pandas DataFrame containing transactions that do not have a full match between OBT and GPT, this function
-    returns a new DataFrame containing only the transactions that have a matching contract number in both OBT and GPT.
-
-    Args:
-        not_full_match (pd.DataFrame): A pandas DataFrame containing transactions that do not have a full match between
-        OBT and GPT.
-
-    Returns:
-        pd.DataFrame: A pandas DataFrame containing only the transactions that have a matching contract number in both
-        OBT and GPT.
-    """
-    # Filter the not_full_match DataFrame by source
-    ob_df = not_full_match[not_full_match["Source"] == "OB"]
-    gp_df = not_full_match[not_full_match["Source"] == "GP"]
-
-    # Merge the two filtered DataFrames on the contract number
-    contract_match = pd.merge(
-        ob_df, gp_df,
-        how="inner",
-        on=["contract_number"],
-        suffixes=("_ob", "_gp")
-    )
-
-    # Fill in missing values in the Source column and drop the redundant columns
-    contract_match.drop(columns=["Source_ob", "Source_gp"], inplace=True)
-
-    # Reorder and return the new DataFrame with the source, contract number, and on-hold amount columns
-    contract_match = contract_match[
-        [ "contract_number", "onhold_amount_ob", "onhold_amount_gp"]
-    ]
-    
-    return contract_match
--- a/reconcile_holds.py
+++ b/reconcile_holds.py
@ -1,190 +0,0 @@
-import pandas as pd
-from pandas import DataFrame, Series
-import re
-from re import Pattern
-import os
-from os.path import basename
-import glob
-import logging
-from pathlib import Path
-from tomllib import load
-import logging.config
-from datetime import datetime as dt
-
-"""
-[ ] Pull in past reconciliations to check against
-[ ] Record reconciled transaction (connect with VBA)
-[ ] Check GP against the database
-[ ] Check OB against the database
-"""
-
-# Custom module for reconciliation
-from rec_lib import get_contract_match, get_no_match, \
-    get_not_full_match, get_overdue, filter_gp, create_transaction_df
-
-def setup_logging():
-    """
-    Sets up logging configuration from the TOML file. If the logging configuration fails to be loaded from the file,
-    a default logging configuration is used instead.
-
-    Returns:
-        logging.Logger: The logger instance.
-    """
-    with open("config.toml", "rb") as f:
-        config_dict: dict = load(f)
-        try:
-            # Try to load logging configuration from the TOML file
-            logging.config.dictConfig(config_dict["logger"])
-        except Exception as e:
-            # If the logging configuration fails, use a default configuration and log the error
-            logger = logging.getLogger()
-            logger.setLevel(logging.DEBUG)
-            logger.warning("Failed setting up logger!")
-            logger.exception(e)
-            logger.warning(f"Config:\n{config_dict}")
-            return logger
-
-
-setup_logging()
-logger = logging.getLogger(__name__)
-logger.info(f"Logger started with level: {logger.level}")
-
-def find_most_recent_file(folder_path: Path, file_pattern: Pattern) -> str:
-    """
-    Given a folder path and a regular expression pattern, this function returns the path of the most recently modified
-    file in the folder that matches the pattern.
-
-    Args:
-        folder_path (Path): A pathlib.Path object representing the folder to search.
-        file_pattern (Pattern): A regular expression pattern used to filter the files in the folder.
-
-    Returns:
-        str: The path of the most recently modified file in the folder that matches the pattern.
-    """
-    # Find all files in the folder that match the pattern
-    files = glob.glob(f"{folder_path}/*")
-    logger.debug(f"files: {files}")
-    
-    # Get the modification time of each file and filter to only those that match the pattern
-    file_times = [(os.path.getmtime(path), path) for path in files if re.match(file_pattern, basename(path))]
-    
-    # Sort the files by modification time (most recent first)
-    file_times.sort(reverse=True)
-    logger.debug(f"file times: {file_times}")
-
-    # Return the path of the most recent file
-    return file_times[0][1]
-
-
-def check_sheet(df_cols: list[str], excel_col_config: dict) -> bool:
-    """
-    Given a list of column names and a dictionary of column name configurations, this function checks if the required
-    columns are present in the list of column names.
-
-    Args:
-        df_cols (list[str]): A list of column names.
-        excel_col_config (dict): A dictionary of column name configurations.
-
-    Returns:
-        bool: True if all of the required columns are present in the list of column names, False otherwise.
-    """
-    # Get the list of required columns from the column configuration dictionary
-    required_cols: list[str] = list(excel_col_config.values())
-    # Check if all of the required columns are present in the list of column names
-    return all([col in df_cols for col in required_cols])
-
-
-def get_dataframes(work_dir: str, excelConfig: dict) -> tuple[pd.DataFrame|None, pd.DataFrame|None]:
-    """
-    Given a dictionary of Excel configuration options, this function searches for the most recently modified GP and OB
-    Excel files in a "Work" folder and returns their corresponding dataframes.
-
-    Args:
-        excelConfig (dict): A dictionary containing configuration options for the GP and OB Excel files.
-
-    Returns:
-        tuple[pd.DataFrame|None, pd.DataFrame|None]: A tuple containing the OB and GP dataframes, respectively.
-    """
-    
-    # Define regular expression patterns to match the GP and OB Excel files
-    gp_regex: Pattern = re.compile(".*gp.*\.xlsx$", re.IGNORECASE)
-    ob_regex: Pattern = re.compile(".*ob.*\.xlsx$", re.IGNORECASE)
-
-    # Find the paths of the most recently modified GP and OB Excel files
-    gp_file_path = find_most_recent_file(work_dir, gp_regex)
-    logger.debug(f"gp_file_path: {gp_file_path}")
-    ob_file_path = find_most_recent_file(work_dir, ob_regex)
-    logger.debug(f"gp_file_path: {ob_file_path}")
-
-    # Read the GP and OB Excel files into dataframes and check that each dataframe has the required columns
-    gp_xl = pd.ExcelFile(gp_file_path)
-    gp_config = excelConfig["GP"]
-    gp_sheets = gp_xl.sheet_names
-    gp_dfs = pd.read_excel(gp_xl, sheet_name=gp_sheets)
-    for sheet in gp_dfs:
-        if check_sheet(gp_dfs[sheet].columns, gp_config):
-            gp_df = gp_dfs[sheet]
-            break
-    
-    ob_xl = pd.ExcelFile(ob_file_path)
-    ob_config = excelConfig["OB"]
-    ob_sheets = ob_xl.sheet_names
-    ob_dfs = pd.read_excel(ob_xl, sheet_name=ob_sheets)
-    for sheet in ob_dfs:
-        if check_sheet(ob_dfs[sheet].columns, ob_config):
-            ob_df = ob_dfs[sheet]
-            break
-    
-    return ob_df, gp_df
-
-
-def main() -> int:
-    """
-    This is the main function for the script. It reads configuration options from a TOML file, reads in the GP and OB
-    Excel files, performs data reconciliation and analysis, and writes the results to a new Excel file.
-
-    Returns:
-        int: 0 if the script executes successfully.
-    """
-    # Read the configuration options from a TOML file
-    with open("config.toml", "rb") as f:
-        config_dict: dict = load(f)
-    logger.debug(f"Config: {config_dict}")
-
-    excelConfig: dict = config_dict["ExcelColumns"]
-
-    # Get the GP and OB dataframes from the Excel files
-    ob_df, gp_df = get_dataframes(config_dict["write_dir"] ,excelConfig)
-    assert not ob_df.empty, "OB Data empty!"
-    assert not gp_df.empty, "GP Data empty!"
-
-    # Filter the GP dataframe to include only relevant transactions
-    fgp_df: DataFrame = filter_gp(gp_df, config_dict)
-    # Get the overdue transactions from the OB dataframe
-    overdue: DataFrame = get_overdue(ob_df, excelConfig["OB"])
-
-    # Create transaction dataframes for the GP and OB dataframes
-    ob_transactions: DataFrame = create_transaction_df(ob_df, 'OB', excelConfig)
-    gp_transactions: DataFrame = create_transaction_df(fgp_df, 'GP', excelConfig)
-
-    # Get the transactions that do not have matches in both the GP and OB dataframes
-    no_match: DataFrame = get_no_match(ob_transactions, gp_transactions)
-
-    # Get the transactions that have matches in both the GP and OB dataframes but have amount mismatches
-    full_match, not_full_match = get_not_full_match(ob_transactions, gp_transactions)
-    only_contracts_match: DataFrame = get_contract_match(not_full_match)
-
-    # Write the results to a new Excel file
-    with pd.ExcelWriter(f"{config_dict['write_dir']}/Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx", mode='w') as writer:
-        full_match.to_excel(writer,sheet_name="FULL", index=False)
-        no_match.to_excel(writer, sheet_name="No Match", index=False)
-        only_contracts_match.to_excel(writer, sheet_name="Amount Mismatch", index=False)
-        overdue.to_excel(writer, sheet_name="Overdue", index=False)
-        
-    return 0
-
-
-if __name__ == "__main__":
-    print("Starting")
-    main()
-    print("Completed")
--- a/src/init.py
+++ b/src/init.py
@ -0,0 +1,6 @@
+from typing import TypeVar, Literal
+from enum import Enum
+
+class ReportSource(Enum):
+    OB = "OB"
+    GP = "GP"
--- a/src/config.py
+++ b/src/config.py
@ -0,0 +1,198 @@
+from tomllib import load as t_load
+from json import load as j_load
+from pathlib import Path
+from dataclasses import dataclass
+from typing import TypedDict
+from re import Pattern, compile
+
+from src import ReportSource
+
+
+Regex = str | Pattern
+
+
+class ReportConfigError(Exception):
+    """
+    Exception stemming from a report configuration
+    """
+    pass
+
+
+class SharedColumn(TypedDict, total=True):
+    """
+    Excel/Dataframe column that is shared between both GP & OB
+    """
+    standard: str
+    gp: str
+    ob: str
+
+
+class PathsConfig:
+    """
+    Configuration holding the paths to:
+     - input_directory: Where to search for new report files
+     - gp/ob_glob: regex used to find new OB & GP files in the report location
+     - db_path: path to an SQLite database if any
+    """
+        
+    def __init__(self, in_dir: str, out_dir: str, 
+        input_regex_dict: dict[str:str] , db_path: str = None) -> None:
+
+        self.input_directory: Path  = Path(in_dir)
+        self.output_directory: Path = Path(out_dir)
+
+        self.gp_glob: str = r"*.xlsx"
+        self.ob_glob: str = r"*.xlsx"
+
+        if db_path is not None:
+            self.db_path: Path = Path(db_path)
+        
+        try: 
+            self.gp_glob: str = input_regex_dict["GP"]
+            self.ob_glob: str = input_regex_dict["OB"]
+        except KeyError:
+            # Defaulting to newest of any xlsx file!
+            # TODO investigate warning
+            pass # will remain as *.xlsx
+
+    def get_most_recent(self, report_type: ReportSource = None) -> Path|None| tuple[Path|None, Path|None]:
+        """
+        Gets the most recent hold reports for OnBase and Great Plains.
+        If no report type is specified both OnBase & GreatPlains are returned.
+
+        If no matching reports are found, None will be returned
+        """
+        
+        report_files = []
+        report_types = [ReportSource.OB, ReportSource.GP] if report_type is None else [report_type]
+        rt: ReportSource
+        for rt in report_types:
+            match rt:
+                case rt.OB:
+                    file_glob: str = self.ob_glob
+                case rt.GP:
+                    file_glob: str = self.gp_glob
+                case _:
+                    raise NotImplementedError(\
+                        f"No regex pattern for report type: {rt}"
+                    )
+
+            files = self.input_directory.glob(file_glob)
+            
+            # Find the most recently created file
+            most_recent_file = None
+            most_recent_creation_time = None
+
+            file: Path
+            for file in files:
+                creation_time = file.stat().st_ctime
+                if most_recent_creation_time is None or creation_time > most_recent_creation_time:
+                    most_recent_file = file
+                    most_recent_creation_time = creation_time
+            report_files.append(most_recent_file)
+
+        if len(report_files) > 1:
+            return report_files
+        
+        return report_files[0]           
+    
+    def has_database(self) -> tuple[bool, bool]:
+        """
+        Returns whether the config has a SQlite database path and
+        whether that path exists
+        """
+        has_db: bool = isinstance(self.db_path, Path)
+        exists: bool = self.db_path.exists() if has_db else False
+        return has_db, exists
+
+
+@dataclass
+class ReportConfig:
+    """
+    Allows easy interaction with program configuration.
+    - Paths to files, db
+    - Report/Excel column naming
+    - Regexes
+    """
+
+    # Paths to work with
+    # - input/output
+    # - input discovery regexes
+    # - SQLite database path
+    paths: PathsConfig 
+
+    use_mssql: bool
+
+    # Work columns are included in finsished columns
+    work_columns: list[str]
+    finished_columns: list[str]
+
+    filters: dict[str:list[Pattern]|Pattern]
+
+    # Columns featured in both reports
+    # unified col name -> origin report -> origin col name
+    # e.g. contract_number -> GP -> Transaction Description
+    shared_columns: list[SharedColumn]
+
+    @staticmethod
+    def from_file(config_path: str|Path) -> 'ReportConfig':
+
+        config_path = Path(config_path) if isinstance(config_path, str) else config_path
+       
+        with open(config_path, "rb") as config_file:
+            match config_path.suffix:
+                case ".toml":
+                    c_dict: dict = t_load(config_file)
+                case ".json":
+                    c_dict: dict= j_load(config_file)
+                case _:
+                    raise NotImplementedError(f"Only json and toml configs are supported not: {config_path.suffix}")
+        
+        try:
+
+            path_config: PathsConfig = PathsConfig(
+                in_dir = c_dict["input_directory"],
+                out_dir= c_dict["output_directory"],
+                input_regex_dict= c_dict["input_glob_pattern"],
+                db_path= c_dict["database_path"]
+            )
+            
+            use_mssql = False #TODO no yet implemented
+
+            work_columns = c_dict["work_columns"]
+            finished_column = c_dict["finished_column"]
+            
+            # Create filter dict with compiled regex
+            filters_dict : dict = c_dict["filters"]
+            filters: dict[str:list[Pattern]|Pattern] =  {}
+            k: str
+            v: Regex|list[Regex]
+            for k, v in filters_dict.items():
+
+                if not isinstance(v, Regex) and not isinstance(v, list):
+                    raise ReportConfigError(f"Filter items must be a valid regex pattern or a list of valid patterns!\
+                        {v} ({type(v)}) is not valid!")
+                
+                # Convert the strings to regex patterns
+                if isinstance(v, list):
+                    filters[k] = [
+                        r if isinstance(r, Pattern)
+                        else compile(r)
+                        for r in v 
+                    ]
+                else:
+                    filters[k] = compile(v) if isinstance(v, Pattern) else v
+               
+            shared_columns: list[SharedColumn] = c_dict["shared_columns"]
+        
+        except KeyError as ke:
+            raise ReportConfigError(f"Invalid report config!\n{ke}")
+
+        return ReportConfig(
+            paths= path_config,
+            use_mssql= use_mssql,
+            work_columns= work_columns,
+            finished_columns= finished_column,
+            filters= filters,
+            shared_columns= shared_columns,
+        )
--- a/src/configs/config_logger.toml
+++ b/src/configs/config_logger.toml
@ -0,0 +1,22 @@
+version = 1
+
+disable_existing_loggers = false
+
+[formatters.custom]
+format = "'%(asctime)s - %(module)s - %(levelname)s - %(message)s'"
+
+[handlers.console]
+class = "logging.StreamHandler"
+level = "DEBUG"
+formatter = "custom"
+stream = "ext://sys.stdout"
+
+[handlers.file]
+class = "logging.FileHandler"
+level = "DEBUG"
+formatter = "custom"
+filename = "on_hold.log"
+
+[root]
+level = "ERROR"
+handlers = ["console", "file"]
--- a/src/configs/report_config_template.json
+++ b/src/configs/report_config_template.json
@ -0,0 +1,33 @@
+{
+  "input_directory": "/path/to/input/folder",
+  "input_glob_pattern": {
+    "GP": "*GP*.xlsx",
+    "OB": "*OB*.xlsx"
+  },
+  "output_directory": "/path/to/output",
+  "interactive_inputs": false,
+  "use_mssql": false,
+  "database_path": "./onhold.db",
+  "work_columns": [
+    "Col_A",
+    "Col_B"
+  ],
+  "finished_column": [
+    "Notes",
+    "Conctract Number"
+  ],
+  "filters": {
+    "filter_name": [
+      "\\d{7}",
+      "\\w+"
+    ],
+    "other_filter": "(OB|GP)$"
+  },
+  "shared_columns": [
+    {
+      "standardized_name": "contract_number",
+      "GP": "Transactoin Description",
+      "OB": "ContractNumber"
+    }
+  ]
+}
--- a/src/configs/reports_config.toml
+++ b/src/configs/reports_config.toml
@ -0,0 +1,72 @@
+####  Paths: using '' makes the string 'raw' to avoid escape characters
+
+# Path to the directory to search for input report files
+input_directory = 'Work/Reports'
+# Regex used to discover newest files
+input_glob_pattern = { GP = "*GP*.xlsx", OB = '*OB*.xlsx'}
+# Path to the directory to save the reconcilation work report
+output_directory = 'Work/Output'
+# Fallback to interactive?
+interactive_inputs = false # NOT YET IMPLEMENTED
+
+
+#### DB
+
+# Whether to try using a mssql database
+# NOT YET IMPLEMENTED!
+use_mssql = false
+# Path to the SQLite database used to view/save reconcilations
+database_path = 'src/onhold_reconciliation.db'
+
+
+### Finished rec details
+
+# Columns to add to all 'work' sheets
+# also saved 'Reconcilations' database
+work_columns = [
+    "HideNextMonth", # Boolean column for user to indicate if this contract should be ignored next month
+    "Resolution" # Text field describing the disprecany and how it may be resolved
+]
+# Columns to keep on reconcilation 'work' sheets
+finished_column = [
+        "contract_number",
+        "vendor_name",
+        "AppNum",           # OB only
+        "Document Number",  # GP Only
+        "DateBooked",       # OB only
+        "Document Date",    # GP Only
+        # 'Source' added for 'no match'
+    ]
+
+# Any regex filters that might be needed 
+[filters]
+# Use label to distinguish a regex set
+doc_num_filters = [
+        "p(oin)?ts",
+        "pool",
+        "promo",
+        "o(ver)?f(und)?",
+        "m(ar)?ke?t",
+        "title",
+        "adj",
+        "reg fee",
+        "rent",
+        "cma"
+    ]
+po_filter = ['(?i)^(?!.*cma(\s|\d)).*$']
+
+# Columns that are featured & expected on both OB & GP
+[[shared_columns]]
+standardized_name = "contract_number" # The name you'd like to use to standardize them
+GP = "Transaction Description" # Column name used in GP
+OB = "Contract" # Column name used in GP
+
+[[shared_columns]]
+standardized_name = "onhold_amount"
+GP = "Current Trx Amount"
+OB = "CurrentOnHold"
+
+[[shared_columns]]
+standardized_name = "vendor_name" 
+GP = "Vendor Name" 
+OB = "DealerName"
--- a/src/configs/reports_config_template.toml
+++ b/src/configs/reports_config_template.toml
@ -0,0 +1,40 @@
+####  Paths: using '' makes the string 'raw' to avoid escape characters
+
+# Path to the directory to search for input report files
+input_directory = '/path/to/input/folder'
+# Regex used to discover newest files
+input_glob_pattern = { GP = "*GP*.xlsx", OB = '*OB*.xlsx'}
+# Path to the directory to save the reconcilation work report
+output_directory = '/path/to/output'
+# Fallback to interactive?
+interactive_inputs = false # NOT YET IMPLEMENTED
+
+
+#### DB
+
+# Whether to try using a mssql database
+# NOT YET IMPLEMENTED!
+use_mssql = false
+# Path to the SQLite database used to view/save reconcilations
+database_path = './onhold.db'
+
+
+### Finished rec details
+
+# Columns to add to all 'work' sheets
+# also saved 'Reconcilations' database
+work_columns = ["Col_A", "Col_B" ]
+# Columns to keep on reconcilation 'work' sheets
+finished_column = [ "Notes", "Conctract Number" ]
+
+# Any regex filters that might be needed 
+[filters]
+# Use label to distinguish a regex set
+filter_name = [ '\d{7}', '\w+']
+other_filter = '(OB|GP)$'
+
+# Columns that are featured & expected on both OB & GP
+[[shared_columns]]
+standardized_name = "contract_number" # The name you'd like to use to standardize them
+GP = "Transactoin Description" # Column name used in GP
+OB = "ContractNumber" # Column name used in GP
--- a/src/helpers.py
+++ b/src/helpers.py
@ -0,0 +1,63 @@
+"""
+Hold Reconciler is an application meant to help reconcile the differences in payments 
+that marked as on hold in Great Plains and OnBase. 
+
+It takes a report csv from OnBase and a report from GreatPlains and checks them
+against each other. It attempts to make them based on contract number and payment
+amount, or just the contract number. 
+
+It also does a lot of filtering for the Great Plains report to remove irrelevant data.
+
+*Last Updated: version 1.3*
+*Originally developed in Spring of 2023 by Griffiths Lott (g@glott.me)*
+"""
+import re
+from re import Pattern
+import os
+from os.path import basename
+import glob
+import logging
+from pathlib import Path
+from tomllib import load
+from pandas import DataFrame, Series
+from typing import TypeVar, Literal
+
+
+import logging.config
+from logging import getLogger
+
+logger = getLogger(__name__)
+
+CN_REGEX = re.compile(r"\d{7}(-\d{3})?")
+
+def setup_logging():
+    """
+    Sets up logging configuration from the TOML file. If the logging configuration fails to be loaded from the file,
+    a default logging configuration is used instead.
+
+    Returns:
+        logging.Logger: The logger instance.
+    """
+    with open("src/configs/config_logger.toml", "rb") as f:
+        config_dict: dict = load(f)
+        try:
+            # Try to load logging configuration from the TOML file
+            logging.config.dictConfig(config_dict)
+        except Exception as e:
+            # If the logging configuration fails, use a default configuration and log the error
+            logger = logging.getLogger()
+            logger.setLevel(logging.DEBUG)
+            logger.warning("Failed setting up logger!")
+            logger.exception(e)
+            logger.warning(f"Config:\n{config_dict}")
+            return logger
+
+
+def drop_unnamed(df: DataFrame, inplace: bool = True) -> DataFrame|None:
+    """
+    Drops all Unnamed columns from a dataframe.
+    ### CAUTION : This function acts *inplace* by deafult
+    (on the orignal dataframe, not a copy!)
+    """
+    cols = [c for c in df.columns if "Unnamed" in c]
+    return df.drop(cols, axis=1, inplace=inplace)
--- a/src/hold_reconciler.py
+++ b/src/hold_reconciler.py
@ -0,0 +1,86 @@
+"""
+This is the main entry point for this application. It find the newest reports (GP & OB)
+then utilizes the reconcile module to find the differences between them. The output is
+saved as an excel file with todays date.
+"""
+# Custom module for reconciliation
+from src.helpers import setup_logging
+from src.reports import OnBaseReport, GreatPlainsReport, ReconciledReports
+from src.config import ReportConfig
+from src import ReportSource
+
+import pandas as pd
+from pandas import DataFrame, read_excel, ExcelFile
+import re
+from re import Pattern
+import logging
+from tomllib import load
+import logging.config
+from datetime import datetime as dt
+from pathlib import Path
+
+setup_logging()
+logger = logging.getLogger(__name__)
+logger.info(f"Logger started with level: {logger.level}")
+
+
+def pull_report_sheet(report_path: Path, report_source: ReportSource, report_config: ReportConfig) -> DataFrame|None:
+
+    xl_file = ExcelFile(report_path)
+    # Get the columns required to be a valid report for the given report type
+    req_cols = [col[report_source.value] for col in report_config.shared_columns]
+    
+    logger.debug(f"GP_Req_cols: {req_cols}")
+    # Sheets avaialble in the excel file
+    sheets = xl_file.sheet_names
+    # Dictionary of dataframes keyed by their sheet name
+    sheet_dataframes: dict[str:DataFrame] = read_excel(xl_file, sheet_name=sheets)
+    # Check each dataframe for the required column
+    for sheet in sheet_dataframes:
+        sheet_columns: list[str] = list(sheet_dataframes[sheet].columns)
+        logger.debug(f"{report_source.value} ({sheet}) : {sheet_columns}")
+        logger.debug(f"Matches {[r in  sheet_columns for r in req_cols]}")
+        if all([r in  sheet_columns for r in req_cols]):
+            logger.debug(f"FOUND: {sheet}")
+            return sheet_dataframes[sheet]
+    return None
+
+
+def main() -> int:
+    """
+    This is the main function for the script. It reads configuration options from a TOML file, reads in the GP and OB
+    Excel files, performs data reconciliation and analysis, and writes the results to a new Excel file.
+
+    Returns:
+        int: 0 if the script executes successfully.
+    """
+    # Read the configuration options
+    report_config: ReportConfig = ReportConfig.from_file(Path("src/configs/reports_config.toml"))
+    
+    # Get the GP and OB dataframes from the Excel files
+    ob_report, gp_report = report_config.paths.get_most_recent()
+    print(ob_report)
+    print(gp_report)
+    ob_df: DataFrame = pull_report_sheet(ob_report, ReportSource.OB, report_config)
+    gp_df: DataFrame = pull_report_sheet(gp_report, ReportSource.GP, report_config)
+    assert not ob_df.empty, "OB Data empty!"
+    assert not gp_df.empty, "GP Data empty!"
+
+    obr: OnBaseReport = OnBaseReport(ob_df, report_config)
+    gpr: GreatPlainsReport = GreatPlainsReport(gp_df, report_config)
+   
+    rec_output: ReconciledReports = obr.reconcile(gpr)
+
+    output_name: Path = Path(f"Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx")
+    output_base: Path = report_config.paths.output_directory
+    output_path: Path = Path(output_base, output_name)
+    
+    rec_output.save_reports(output_path)
+
+    return 0
+
+
+if __name__ == "__main__":
+    print("Starting")
+    main()
+    print("Completed")
--- a/src/memory.py
+++ b/src/memory.py
@ -0,0 +1,155 @@
+"""
+Classes and functions to parse completed reconciliation reports and remember
+the resolutions of contracts. 
+
+Also provides a way for the reconciler to check hold against previously
+resolved holds. 
+
+*Last Updated: version 1.3
+"""
+from src.helpers import drop_unnamed, setup_logging
+from src.config import ReportConfig, ReportSource
+from src.ghlib.database.database_manager import SQLiteManager, select_fields_statement
+
+from pathlib import Path
+from pandas import DataFrame, Series, read_sql_query, read_excel, concat
+from numpy import NaN
+from logging import getLogger
+from dataclasses import dataclass
+from hashlib import md5
+from typing import TypeAlias
+
+setup_logging()
+logger = getLogger(__name__)
+
+col_hash: TypeAlias = str
+
+def hash_cols(row: Series, cols_to_hash: list[str]) -> col_hash:
+    md5_hash = md5()
+    md5_hash.update((''.join(str(row[col]) for col in cols_to_hash)).encode('utf-8'))
+    return md5_hash.hexdigest()
+
+def create_identifier(df: DataFrame) -> DataFrame:
+    """
+    We want to create a unqiue and replicable ID to identify each payment pair.
+    Some transactions may have 1 blank ID which can cause an undeterimable hash.
+    For this reason we must replace empty IDs with x so that it will have a replicable
+    value.
+
+    Then the two ideas are hashed together using md5. Resulting in a unique 32 character
+    identifier that can be reproduced.
+    """
+    for id in ["ID_OB","ID_GP"]:
+        df[id].fillna("x", inplace=True)
+    df["Indentifier"] = df.apply(lambda row: 
+        hash_cols(row, ["ID_OB","ID_GP"]), axis=1
+    )
+    for id in ["ID_OB","ID_GP"]:
+        df[id].replace('x',NaN, inplace=True)
+    return df
+
+def save_rec(resolved_dataframes: list[DataFrame], report_config: ReportConfig):
+    """
+    """
+    sqlManager: SQLiteManager = SQLiteManager(report_config.paths.db_path)
+    with sqlManager.get_session() as session:
+
+        rdf: DataFrame
+        for rdf in resolved_dataframes:
+            cols: list[str] = rdf.columns.to_list()
+            logger.debug(f"{cols=}")
+            if "onhold_amount" in cols:
+                logger.debug("Found 'onhold_amount' in rdf: no_match dataframe")
+                # Split the on_hold col to normalize with amount mismatch
+                rdf["onhold_amount_GP"] = rdf.apply(lambda row:
+                    row["onhold_amount"] if row["Source"] == "GP" else None
+                , axis=1)
+                rdf["onhold_amount_OB"] = rdf.apply(lambda row:
+                    row["onhold_amount"] if row["Source"] == "OB" else None
+                , axis=1 )
+            else:
+                logger.debug("No 'onhold_amount' col found in rdf: amount_mismatch dataframe")
+            
+            # Create a unified column for index 
+            rdf = create_identifier(rdf)
+
+            rec_cols: list[str] = [
+                "Indentifier",
+                "ID_GP",
+                "ID_OB",
+            ]
+            rec_cols.extend(report_config.work_columns)
+
+            rdf = rdf[rec_cols]
+            rdf.set_index("Indentifier", inplace=True, drop=True)
+            rdf.drop_duplicates(inplace=True)
+            rdf = rdf.dropna(axis=0, how="all", subset=report_config.work_columns)
+            logger.debug(f"Saving resolutions to db:\n{rdf}")
+
+            rdf.to_sql('Resolutions', 
+                con=session.connection(),
+                if_exists="append"    
+            )
+           
+             
+
+def get_prev_reconciled(identfiers: list[col_hash], db_location: Path) -> DataFrame|None:
+    """
+    Get a DataFrame of previously reconciled contracts from an SQLite database.
+
+    Args:
+        contracts (list[str]): A list of contract numbers to check for previously reconciled contracts.
+
+    Returns:
+        DataFrame: A DataFrame of previously reconciled contracts, or an empty DataFrame if none are found.
+    """
+    # Create a DB manager
+    sqlManager: SQLiteManager = SQLiteManager(db_location)
+
+    # Create a temp table to hold this batches contract numbers
+    # this table will be cleared when sqlManager goes out of scope
+    temp_table_statement = """
+    CREATE TEMPORARY TABLE CUR_IDENT (Indentifier VARCHAR(32));
+    """
+    sqlManager.execute(temp_table_statement)
+
+    # Insert the current contracts into the temp table
+    insert_idents = f"""
+    INSERT INTO CUR_IDENT (Indentifier) VALUES
+    {', '.join([f"('{cn}')" for cn in identfiers])};
+    """
+
+    logger.debug(f"{insert_idents=}")
+
+    sqlManager.execute(insert_idents)
+
+    # Select previously resolved contracts
+    res_query = """
+    SELECT r.*
+    FROM Resolutions r
+    JOIN CUR_IDENT i
+    ON r.Indentifier = i.Indentifier;
+    """
+    resolved: DataFrame = sqlManager.execute(res_query, as_dataframe=True)
+    return resolved
+
+
+if __name__ == "__main__":
+    import argparse
+    from logging import DEBUG
+    logger.setLevel(DEBUG)
+
+    parser = argparse.ArgumentParser(
+    prog="HoldReconcilerRecord",
+    )
+    parser.add_argument("-i", "--input")
+    args = parser.parse_args()
+
+    # No Match
+    no_match: DataFrame = read_excel(args.input, sheet_name="No Match")
+    # Amount Mismatch
+    amt_mm: DataFrame = read_excel(args.input, sheet_name="Amount Mismatch")
+
+    report_config = ReportConfig(Path(r"configs\reports_config.toml"))
+    
+    save_rec(report_config, resolved_dataframes=[no_match, amt_mm])
--- a/src/reports.py
+++ b/src/reports.py
@ -0,0 +1,346 @@
+from pandas import DataFrame, merge, to_datetime, NaT, concat, ExcelWriter
+from openpyxl import Workbook, load_workbook
+from abc import ABC
+from logging import getLogger
+import re
+from re import Pattern
+import datetime
+from copy import deepcopy
+from dataclasses import dataclass
+from pathlib import Path
+
+from src.helpers import CN_REGEX, drop_unnamed
+from src.memory import get_prev_reconciled, hash_cols, col_hash, create_identifier
+from src.config import ReportConfig, ReportSource
+
+logger = getLogger(__name__)
+
+@dataclass
+class ReconciledReports:
+    no_match: DataFrame
+    amt_mismatch: DataFrame
+    prev_rec: DataFrame
+    gp_filtered: DataFrame
+    ob_overdue: DataFrame
+    
+    def save_reports(self, output_path: Path):
+        with ExcelWriter(output_path, mode='w') as writer:
+            self.no_match.drop_duplicates(inplace=True)
+            self.no_match.to_excel(writer, sheet_name="No Match", 
+                                index=False, freeze_panes=(1,3)
+                                )
+            self.amt_mismatch.drop_duplicates(inplace=True)
+            self.amt_mismatch.to_excel(writer, sheet_name="Amount Mismatch", 
+                                index=False, freeze_panes=(1,3)
+                                )
+            self.ob_overdue.to_excel(writer, sheet_name="Overdue", 
+                                index=False
+                                )
+            self.prev_rec.to_excel(writer, sheet_name="Previously Reconciled", 
+                                index=False, freeze_panes=(1,3)
+                                )
+            self.gp_filtered.to_excel(writer, sheet_name="Filtered from GP", 
+                                index=False, freeze_panes=(1,0)
+                                )
+
+        wb: Workbook = load_workbook(output_path)
+        for sheet in ["No Match", "Amount Mismatch"]:
+            ws = wb[sheet]
+            ws.column_dimensions['A'].hidden = True
+            ws.column_dimensions['B'].hidden = True
+        for sheet in ["Filtered from GP", "Previously Reconciled"]:
+            wb[sheet].sheet_state = "hidden"          
+        wb.save(output_path)
+        wb.close()
+
+class HoldReport(ABC):
+
+    source = ""
+
+    def __init__(self, dataframe: DataFrame, reports_config: ReportConfig) -> None:
+        self.config = reports_config
+        drop_unnamed(dataframe)
+        self.df = dataframe
+        self.df = self._add_work_columns(self.df, reports_config.work_columns)
+        self._normalize()
+
+
+    def _normalize(self):
+
+        # Rename the columns to standardize the column names
+        self.df.rename( columns= {  sc_dict[self.source] : sc_dict["standardized_name"] 
+                                    for sc_dict in self.config.shared_columns
+        }, inplace=True)
+
+        # Convert the on-hold amount column to float format and round to two decimal places
+        self.df["onhold_amount"] = self.df["onhold_amount"].astype(float).round(2)
+
+        # Use regex to extract the contract number from the column values and create a new column with the standardized format
+        self.df["contract_number"] = self.df["contract_number"].apply(
+            lambda cn: str(cn) if not re.search(CN_REGEX, str(cn))
+            else re.search(CN_REGEX, str(cn)).group(0)
+        )
+
+        # Create a new column with a unique transaction ID
+        self.df["ID"] = self.df["contract_number"] +'_'+\
+                self.df["onhold_amount"].astype(str)
+
+        # Create a new column with the data source
+        self.df["Source"] = self.source
+
+
+    @staticmethod
+    def _remove_prev_recs(contract_match, no_match, db_location: Path) -> \
+        tuple[DataFrame, DataFrame, DataFrame]:
+        """
+        """
+
+        idents: list[col_hash] = create_identifier(contract_match)["Indentifier"].to_list()
+        idents.extend(create_identifier(no_match)["Indentifier"].to_list())
+        logger.debug(f"{idents=}")
+        # Get previsouly reced
+        prev_recs: DataFrame|None = get_prev_reconciled(idents, db_location)
+
+        if prev_recs is None:
+            logger.info("No previously reconciled!")
+            return DataFrame(), contract_match, no_match
+
+        dfs = []
+        for df in [contract_match, no_match]:
+            start_size = df.shape[0]
+            logger.debug(f"Report DF: \n{df}")
+            logger.debug(f"prev_rec: \n{prev_recs}")
+        
+            df = merge(
+                df,
+                prev_recs,
+                how="left",
+                on= "Indentifier",
+                suffixes=("_cur", "_prev")
+            )
+
+            df = HoldReport._created_combined_col("HideNextMonth", df, ["prev", "cur"])
+            df = HoldReport._created_combined_col("Resolution", df, ["prev", "cur"])
+            df["ID_OB"] = df["ID_OB_cur"]
+            df["ID_GP"] = df["ID_GP_cur"]
+
+            # Drop anything that should be ignored
+            df = df[df["HideNextMonth"] != True]
+            logger.info(f"Prev res added:\n{df}")
+
+            col_to_drop = []
+            for c in df.keys().to_list():
+                if "_prev" in c in c or "_cur" in c:
+                    col_to_drop.append(c)
+                
+            logger.debug(f"{col_to_drop=}")
+            df.drop(
+                columns= col_to_drop,
+                inplace=True
+            )
+            # Restandardize
+            end_size = df.shape[0]
+            logger.info(f"Reduced df by {start_size-end_size}")
+            dfs.append(df)
+        return prev_recs, dfs[0], dfs[1]
+
+    def _remove_full_matches(self, other: 'HoldReport'):
+        """
+        Removes any contracts that match both contract number and hold amount. 
+        These do not need to be reconciled.
+
+        This id done 'in place' to both dataframes
+        """
+        filter_id_match: DataFrame  =  self.df[~(self.df["ID"].isin(other.df["ID"]))]
+        other.df: DataFrame = other.df[~(other.df["ID"].isin(self.df["ID"]))]
+        self.df = filter_id_match
+        self.combined_missing: DataFrame = concat([self.df, other.df], ignore_index=True)
+        #self.combined_missing.to_excel("ALL MISSING.xlsx")
+        logger.debug(f"Combined Missing:\n{self.combined_missing}")
+        logger.info(f"Payments with errors: {self.combined_missing.shape[0]}")
+
+    @staticmethod
+    def _created_combined_col(column: str, target_df: DataFrame, sources: tuple[str, str]) -> DataFrame :
+        """
+        Creates a new column by filling empty columns of this source, with the matching column from another source
+        """
+        this, that = sources
+        target_df[column] = target_df[f"{column}_{this}"].fillna(
+            target_df[f"{column}_{that}"]
+        )
+        return target_df
+           
+
+    def _requires_rec(self,  other: 'HoldReport') -> tuple[DataFrame, DataFrame]:
+        """
+        To be run after full matches have been re
+        """
+
+        # Merge the two filtered DataFrames on the contract number
+        contract_match = merge(
+            self.df, other.df,
+            how="inner",
+            on=["contract_number"],
+            suffixes=('_'+self.source, '_'+other.source)
+        )
+
+        contract_match = create_identifier(contract_match)
+
+        #contract_match.to_excel("CONTRACT_MATCH.xlsx")
+
+        for col in ["vendor_name", "HideNextMonth", "Resolution"]:
+            self._created_combined_col(col, contract_match, (self.source, other.source)) 
+
+        logger.debug(f"_requires_rec | contract_match:\n{contract_match.columns} ({contract_match.shape})")
+
+        no_match: DataFrame = self.combined_missing[~(
+            self.combined_missing["contract_number"].isin(
+                contract_match["contract_number"]
+            ))
+        ]
+        no_match[f"ID_{self.source}"] = no_match.apply(lambda row:
+            row["ID"] if row["Source"] == self.source else None                                          
+        , axis=1)
+        no_match[f"ID_{other.source}"] = no_match.apply(lambda row:
+            row["ID"] if row["Source"] == other.source else None                                          
+        , axis=1)
+
+        no_match = create_identifier(no_match)
+
+        logger.debug(f"_requires_rec | no_match:\n{no_match.columns} ({no_match.shape})")
+        self.prev_recs, contract_match, no_match = self._remove_prev_recs(contract_match,
+            no_match, self.config.paths.db_path
+        )
+
+        return contract_match, no_match      
+
+    @staticmethod
+    def _add_work_columns(df: DataFrame, work_cols: list) -> DataFrame:
+        """
+        Add empty columns to the dataframe to faciliate working through the report.
+        """
+        logger.debug("Adding work columns!")
+        df_cols: list[str] = df.columns.to_list()
+        for col in work_cols:
+            if col not in df_cols:
+                df[col] = ''
+        return df
+
+    def reconcile(self, other: 'HoldReport') -> ReconciledReports:
+        """
+        """
+        assert self.source != other.source, f"Reports to reconcile must be from different sources.\
+            ({self.source} , {other.source})."
+        self._remove_full_matches(other)
+        
+        if self.source == "OB":
+            over_due: DataFrame = self.overdue
+            filtered_gp: DataFrame = other.filtered
+        elif self.source == "GP":
+            over_due: DataFrame = other.overdue
+            filtered_gp: DataFrame = self.filtered
+        
+        logger.debug(f"Removed matches:\n{self.df}")
+
+        amount_mismatch, no_match = self._requires_rec(other)
+
+        logger.debug(f"reconcile | no_match unaltered\n{no_match.columns} ({no_match.shape})")
+        logger.debug(f"reconcile | am_mm unaltered:\n{amount_mismatch.columns} ({amount_mismatch.shape})")
+        
+        # Formatting
+        columns: list[str] = ["ID_GP", "ID_OB"]
+        columns.extend(self.config.finished_columns)
+
+        nm_cols:list[str] = deepcopy(columns)
+        nm_cols.insert(3,"onhold_amount")
+        nm_cols.insert(4,"Source")
+
+        columns.insert(3,"onhold_amount_GP")
+        columns.insert(4, "onhold_amount_OB")
+
+        # Select and reorder columns
+        no_match = no_match[
+            nm_cols
+        ]
+        
+        amount_mismatch = amount_mismatch[
+           columns
+        ]
+        logger.info(f"no_match: {no_match.shape[0]}")
+        logger.info(f"am_mm: {amount_mismatch.shape[0]}")
+
+        reconciled: ReconciledReports = ReconciledReports(
+            no_match=no_match,
+            amt_mismatch=amount_mismatch,
+            prev_rec=self.prev_recs,
+            gp_filtered=filtered_gp,
+            ob_overdue = over_due
+        )
+        return reconciled
+    
+
+class OnBaseReport(HoldReport):
+
+    source = "OB"
+
+    def __init__(self, dataframe: DataFrame, reports_config: ReportConfig) -> None:
+        self.overdue = self._get_overdue(dataframe)
+        super().__init__(dataframe, reports_config)
+
+    @staticmethod
+    def _get_overdue(dataframe: DataFrame) -> DataFrame:
+        """
+        """
+        dataframe["InstallDate"] = to_datetime(dataframe["InstallDate"])
+        dataframe["InstallDate"].fillna(NaT, inplace=True)
+        overdue: DataFrame = dataframe[dataframe["InstallDate"].dt.date\
+            < datetime.date.today()]
+        return  overdue
+
+
+class GreatPlainsReport(HoldReport):
+    
+    source = "GP"
+
+    def __init__(self, dataframe: DataFrame, report_config: ReportConfig) -> None:
+
+        self.filtered: DataFrame = self._filter(
+            gp_report_df= dataframe,
+            doc_num_filters= report_config.filters["doc_num_filters"],
+            good_po_num_regex=  report_config.filters["po_filter"][0]
+        )
+        super().__init__(dataframe, report_config)
+
+    @staticmethod
+    def _filter(gp_report_df: DataFrame, 
+                doc_num_filters: list[Pattern], good_po_num_regex: Pattern
+        ) -> DataFrame:
+
+        GOOD_PO_NUM = good_po_num_regex
+
+        bad_doc_num = '(?i)'
+        rx : Pattern
+        for rx in doc_num_filters:
+            bad_doc_num += f"({rx})|"
+        bad_doc_num = re.compile(bad_doc_num[:-1], re.IGNORECASE)
+
+        # Create a mask/filter that will keep rows that match these
+        # requirments
+        keep_mask = (
+            (gp_report_df["Document Type"] == "Invoice") &
+            (gp_report_df["Purchase Order Number"].str.contains(GOOD_PO_NUM))
+        )
+
+        # Get the rows that DO NOT fit the keep_mask
+        dropped_posotives: DataFrame = gp_report_df[~keep_mask]
+        # Drop the rows to filter
+        gp_report_df.drop(dropped_posotives.index, inplace=True)
+
+        # Create a filter to remove rows that meet this requirment
+        # Making this a negative in the keep mask is more trouble than
+        # it's worth
+        remove_mask = gp_report_df["Document Number"].str.contains(bad_doc_num)
+        dropped_negatives: DataFrame = gp_report_df[remove_mask]
+        gp_report_df.drop(dropped_negatives.index, inplace=True)
+
+        return concat([dropped_posotives,dropped_negatives], ignore_index=False)
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/test_config.py
+++ b/tests/test_config.py
@ -0,0 +1,72 @@
+import unittest
+from pathlib import Path
+from re import Pattern, compile
+from src import config
+from src import ReportSource
+
+class TestReportConfig(unittest.TestCase):
+
+    def test_from_file(self):
+        # Provide the path to your config file
+        config_file = Path(r"tests\test_inputs\TEST_reports_config.toml")
+
+        # Call the static method from_file to create an instance of ReportConfig
+        report_config = config.ReportConfig.from_file(config_file)
+
+        # Assert the values of the attributes in the created instance
+        self.assertEqual(report_config.paths.input_directory, Path(r"tests\test_inputs\TestSearch"))
+        self.assertEqual(report_config.paths.gp_glob, r'*GP*.xlsx')
+        self.assertEqual(report_config.paths.ob_glob, r"*OB*.xlsx")
+        self.assertEqual(report_config.paths.output_directory, Path(r"tests\test_outputs"))
+        self.assertEqual(report_config.use_mssql, False)
+        self.assertEqual(report_config.paths.db_path, Path(r"tests\test_inputs\Static\test_static_OnHold.db"))
+        self.assertEqual(report_config.work_columns, ["HideNextMonth", "Resolution"])
+        self.assertEqual(report_config.finished_columns, [
+            "contract_number",
+            "vendor_name",
+            "AppNum",
+            "Document Number",
+            "DateBooked",
+            "Document Date",
+        ])
+        self.assertEqual(report_config.filters["doc_num_filters"], [
+            compile(r"p(oin)?ts",),
+            compile(r"pool",),
+            compile(r"promo",),
+            compile(r"o(ver)?f(und)?",),
+            compile(r"m(ar)?ke?t",),
+            compile(r"title",),
+            compile(r"adj",),
+            compile(r"reg fee",),
+            compile(r"rent",),
+            compile(r"cma",),
+        ])
+        self.assertEqual(report_config.filters["po_filter"], [compile(r"(?i)^(?!.*cma(\s|\d)).*$")])
+        self.assertEqual(report_config.shared_columns[0]["standardized_name"], "contract_number")
+        self.assertEqual(report_config.shared_columns[0]["GP"], "Transaction Description")
+        self.assertEqual(report_config.shared_columns[0]["OB"], "Contract")
+        self.assertEqual(report_config.shared_columns[1]["standardized_name"], "onhold_amount")
+        self.assertEqual(report_config.shared_columns[1]["GP"], "Current Trx Amount")
+        self.assertEqual(report_config.shared_columns[1]["OB"], "CurrentOnHold")
+        self.assertEqual(report_config.shared_columns[2]["standardized_name"], "vendor_name")
+        self.assertEqual(report_config.shared_columns[2]["GP"], "Vendor Name")
+        self.assertEqual(report_config.shared_columns[2]["OB"], "DealerName")
+
+    def test_get_newest(self):
+        # Provide the path to your config file
+        config_file = Path(r"tests\test_inputs\TEST_reports_config.toml")
+
+        # Call the static method from_file to create an instance of ReportConfig
+        report_config = config.ReportConfig.from_file(config_file)
+
+        newest_ob: Path = report_config.paths.get_most_recent(report_type=ReportSource.OB)
+        self.assertEqual(newest_ob.name, "April 2023 OB.xlsx")
+        newest_gp: Path = report_config.paths.get_most_recent(report_type=ReportSource.GP)
+        self.assertEqual(newest_gp.name, "April GP.xlsx")
+
+        nob, ngp = report_config.paths.get_most_recent()
+        self.assertEqual(nob.name, "April 2023 OB.xlsx")
+        self.assertEqual(ngp.name, "April GP.xlsx")
+
+if __name__ == '__main__':
+    unittest.main()
--- a/tests/test_inputs/TEST_reports_config.toml
+++ b/tests/test_inputs/TEST_reports_config.toml
@ -0,0 +1,72 @@
+####  Paths: using '' makes the string 'raw' to avoid escape characters
+
+# Path to the directory to search for input report files
+input_directory = 'tests\test_inputs\TestSearch'
+# Regex used to discover newest files
+input_glob_pattern = { GP = "*GP*.xlsx", OB = '*OB*.xlsx'}
+# Path to the directory to save the reconcilation work report
+output_directory = 'tests\test_outputs'
+# Fallback to interactive?
+interactive_inputs = false # NOT YET IMPLEMENTED
+
+
+#### DB
+
+# Whether to try using a mssql database
+# NOT YET IMPLEMENTED!
+use_mssql = false
+# Path to the SQLite database used to view/save reconcilations
+database_path = 'tests\test_inputs\Static\test_static_OnHold.db'
+
+
+### Finished rec details
+
+# Columns to add to all 'work' sheets
+# also saved 'Reconcilations' database
+work_columns = [
+    "HideNextMonth", # Boolean column for user to indicate if this contract should be ignored next month
+    "Resolution" # Text field describing the disprecany and how it may be resolved
+]
+# Columns to keep on reconcilation 'work' sheets
+finished_column = [
+        "contract_number",
+        "vendor_name",
+        "AppNum",           # OB only
+        "Document Number",  # GP Only
+        "DateBooked",       # OB only
+        "Document Date",    # GP Only
+        # 'Source' added for 'no match'
+    ]
+
+# Any regex filters that might be needed 
+[filters]
+# Use label to distinguish a regex set
+doc_num_filters = [
+        "p(oin)?ts",
+        "pool",
+        "promo",
+        "o(ver)?f(und)?",
+        "m(ar)?ke?t",
+        "title",
+        "adj",
+        "reg fee",
+        "rent",
+        "cma"
+    ]
+po_filter = ['(?i)^(?!.*cma(\s|\d)).*$']
+
+# Columns that are featured & expected on both OB & GP
+[[shared_columns]]
+standardized_name = "contract_number" # The name you'd like to use to standardize them
+GP = "Transaction Description" # Column name used in GP
+OB = "Contract" # Column name used in GP
+
+[[shared_columns]]
+standardized_name = "onhold_amount"
+GP = "Current Trx Amount"
+OB = "CurrentOnHold"
+
+[[shared_columns]]
+standardized_name = "vendor_name" 
+GP = "Vendor Name" 
+OB = "DealerName"
--- a/tests/test_inputs/TestSearch/April
+++ b/tests/test_inputs/TestSearch/April
--- a/tests/test_inputs/TestSearch/April
+++ b/tests/test_inputs/TestSearch/April
--- a/tests/test_report.py
+++ b/tests/test_report.py
@ -0,0 +1,78 @@
+from pandas import DataFrame, merge, to_datetime, NaT, concat, read_excel
+from pathlib import Path
+from re import Pattern
+import pytest as pt
+
+from src.config import ReportConfig, ReportSource
+from src.reports import GreatPlainsReport, OnBaseReport, ReconciledReports
+from src.hold_reconciler import pull_report_sheet
+
+class TestReport:
+
+    @pt.fixture(autouse=True)
+    def setup(self):
+        self.report_config = ReportConfig.from_file(
+            Path(r"./tests/test_inputs/TEST_reports_config.toml")
+        )
+
+
+    def test_full(self):
+        """
+        Full process test.
+
+        This tests inputs will need to be adjust anytime a change is made to the 
+        input/output report layouts, filtering, trimming, normalization.
+
+        Basically, this is just to make sure everything still works after making
+        TINY changes, that are not meant to effect the structure/logic of the program
+        """
+        
+        ob_df = pull_report_sheet(
+            Path(r"./tests/test_inputs\Static\April 2023 OB.xlsx"),
+            ReportSource.OB,
+            self.report_config    
+        )
+        gp_df = pull_report_sheet(
+            Path(r"./tests/test_inputs\Static\April GP.xlsx"),
+            ReportSource.GP,
+            self.report_config    
+        )
+
+        assert not ob_df.empty, "OB Data empty!"
+        assert not gp_df.empty, "GP Data empty!"
+
+        obr: OnBaseReport = OnBaseReport(ob_df, self.report_config)
+        gpr: GreatPlainsReport = GreatPlainsReport(gp_df, self.report_config)
+    
+        rec_output: ReconciledReports = obr.reconcile(gpr)
+
+        output_path: Path = Path(
+            self.report_config.paths.output_directory,
+            "TEST_REPORT.xlsx"
+        )
+        rec_output.save_reports(output_path)
+
+        SHEET_NAMES = [
+            "No Match",
+            "Amount Mismatch",
+            "Overdue",
+            "Previously Reconciled",
+            "Filtered from GP",
+        ]
+
+        CONTROL: dict[str:DataFrame] = read_excel(
+            Path(r"./tests/test_inputs/Static/Reconciled Holds [TEST_FIN].xlsx"),
+            sheet_name=SHEET_NAMES
+        )
+
+        new: dict[str:DataFrame] = read_excel(
+            output_path,
+            sheet_name=SHEET_NAMES
+        )
+
+        for sheet in SHEET_NAMES:
+            print(sheet)
+            print(new[sheet])
+            print("Control: ")
+            print(CONTROL[sheet])
+            assert new[sheet].equals(CONTROL[sheet])
--- a/version.txt
+++ b/version.txt
@ -0,0 +1 @@
+2.1
Author	SHA1	Message	Date
=	231f5ed4ce	- Integrated new ReportConfig into program - Added full test to check everything works as expected after small changes - A bit of project restructuring, with switch to absolute imports	3 years ago
=	fa7f1516c8	Finished config implementation and added testing for config classes.	3 years ago
=	f6245a3413	Reworked the config file to be more flexable and added a config.py file with a ReportConfig class to faciliate easier interaction with the report. Actual program still expects the old config. Must implement	3 years ago
=	9ad5e9180c	Reworked how the reconcilation is done: filter gp -> remove prev -> remove full match -> get contract match -> remaining = no match Changed how the memory cols work. Not finished	3 years ago
=	7ad4f76943	Reworked the report system to use classes for each report type. Helps unify everything. Not yet prepared for memory or db search...	3 years ago
=	6eb57d7978	Prep for adding ability to remember previously reconciled reporting. No inplace, just set up	3 years ago