Finished config implementation and added testing for config classes.

3 years ago · fa7f1516c8
parent f6245a3413
commit fa7f1516c8
13 changed files with 312 additions and 114 deletions
--- a/.gitignore
+++ b/.gitignore
@ -12,3 +12,4 @@ ghlib/
 *.txt

 !version.txt
+!tests/test_inputs/*
--- a/src/config.py
+++ b/src/config.py
@ -31,59 +31,64 @@ class PathsConfig:
    """
    Configuration holding the paths to:
     - input_directory: Where to search for new report files
-     - gp/ob_regex: regex used to find new OB & GP files in the report location
+     - gp/ob_glob: regex used to find new OB & GP files in the report location
     - db_path: path to an SQLite database if any
    """
        
    def __init__(self, in_dir: str, out_dir: str, 
-        input_regex_dict: dict[str:Regex] , db_path: str = None) -> None:
+        input_regex_dict: dict[str:str] , db_path: str = None) -> None:

        self.input_directory: Path  = Path(in_dir)
        self.output_directory: Path = Path(out_dir)

-        self.gp_regex: Pattern = compile("*.xlsx")
-        self.ob_regex: Pattern = compile("*.xlsx")
+        self.gp_glob: str = r"*.xlsx"
+        self.ob_glob: str = r"*.xlsx"

        if db_path is not None:
            self.db_path: Path = Path(db_path)
        
        try: 
-            self.gp_regex: Pattern = compile(input_regex_dict["GP"])
-            self.ob_regex: Pattern = compile(input_regex_dict["OB"])
+            self.gp_glob: str = input_regex_dict["GP"]
+            self.ob_glob: str = input_regex_dict["OB"]
        except KeyError:
            # Defaulting to newest of any xlsx file!
            # TODO investigate warning
            pass # will remain as *.xlsx

-    def get_most_recent(self, report_type: ReportSource) -> Path|None:
-
-        match report_type:
-            case report_type.OB:
-                file_regex: Pattern = self.ob_regex
-            case report_type.GP:
-                file_regex: Pattern = self.gp_regex
-            case _:
-                raise NotImplementedError(\
-                    f"No regex pattern for report type: {report_type}"
-                )
-            
-        files = self.input_directory.glob(file_regex)
-        
-        # Find the most recently created file
-        most_recent_file = None
-        most_recent_creation_time = None
-        
-        file: Path
-        for file in files:
-            creation_time = file.stat().st_ctime
-            if most_recent_creation_time is None or creation_time > most_recent_creation_time:
-                most_recent_file = file
-                most_recent_creation_time = creation_time
-        
-        return most_recent_file
-            
-            
-
+    def get_most_recent(self, report_type: ReportSource = None) -> Path|None| tuple[Path|None, Path|None]:
+        
+        report_files = []
+        report_types = [ReportSource.OB, ReportSource.GP] if report_type is None else [report_type]
+        rt: ReportSource
+        for rt in report_types:
+            match rt:
+                case rt.OB:
+                    file_glob: str = self.ob_glob
+                case rt.GP:
+                    file_glob: str = self.gp_glob
+                case _:
+                    raise NotImplementedError(\
+                        f"No regex pattern for report type: {rt}"
+                    )
+
+            files = self.input_directory.glob(file_glob)
+            
+            # Find the most recently created file
+            most_recent_file = None
+            most_recent_creation_time = None
+
+            file: Path
+            for file in files:
+                creation_time = file.stat().st_ctime
+                if most_recent_creation_time is None or creation_time > most_recent_creation_time:
+                    most_recent_file = file
+                    most_recent_creation_time = creation_time
+            report_files.append(most_recent_file)
+
+        if len(report_files) > 1:
+            return report_files
+        
+        return report_files[0]           
    
    def has_database(self) -> tuple[bool, bool]:
        """
@ -105,7 +110,6 @@ class ReportConfig:
    paths: PathsConfig 

    use_mssql: bool
-    db_path: Path

    # Work columns are included in finsished columns
    work_columns: list[str]
@ -123,36 +127,37 @@ class ReportConfig:

        config_path = Path(config_path) if isinstance(config_path, str) else config_path
       
-        match config_path.suffix:
-            case ".toml":
-                c_dict: dict = t_load(config_path)
-            case ".json":
-                c_dict: dict= j_load(config_path)
-            case _:
-                raise NotImplementedError(f"Only json and toml configs are supported not: {config_path.suffix}")
+        with open(config_path, "rb") as config_file:
+            match config_path.suffix:
+                case ".toml":
+                    c_dict: dict = t_load(config_file)
+                case ".json":
+                    c_dict: dict= j_load(config_file)
+                case _:
+                    raise NotImplementedError(f"Only json and toml configs are supported not: {config_path.suffix}")
        
        try:

            path_config: PathsConfig = PathsConfig(
-                in_dir = c_dict["input_path"],
-                out_dir= c_dict["output_path"],
-                input_regex_dict= c_dict["input_regex"],
-                db_path= c_dict["db_path"]
+                in_dir = c_dict["input_directory"],
+                out_dir= c_dict["output_directory"],
+                input_regex_dict= c_dict["input_glob_pattern"],
+                db_path= c_dict["database_path"]
            )
            
            use_mssql = False #TODO no yet implemented

            work_columns = c_dict["work_columns"]
-            output_columns = c_dict["output_columns"]
+            finished_column = c_dict["finished_column"]
            
-            # Add create out filter dict
+            # Create filter dict with compiled regex
            filters_dict : dict = c_dict["filters"]
            filters: dict[str:list[Pattern]|Pattern] =  {}
            k: str
            v: Regex|list[Regex]
            for k, v in filters_dict.items():

-                if not isinstance(v, Regex|list[Regex]):
+                if not isinstance(v, Regex) and not isinstance(v, list):
                    raise ReportConfigError(f"Filter items must be a valid regex pattern or a list of valid patterns!\
                        {v} ({type(v)}) is not valid!")
                
@ -175,7 +180,7 @@ class ReportConfig:
            paths= path_config,
            use_mssql= use_mssql,
            work_columns= work_columns,
-            finished_columns= output_columns,
+            finished_columns= finished_column,
            filters= filters,
            shared_columns= shared_columns,
        )
--- a/src/config_reports.toml
+++ b/src/config_reports.toml
@ -1,44 +0,0 @@
-output_path = '../Work'
-db_path = "OnHold.db"
-
-# Columns added each 'working' sheet in the new report dataframe
-work_columns = [
-    "HideNextMonth", # Boolean column for user to indicate if this contract should be ignored next month
-    "Resolution" # Text field describing the disprecany and how it may be resolved
-]
-
-# List of Columns to show on the 'working' sheets of the rec report
-output_columns = [
-        "contract_number",
-        "vendor_name",
-        "AppNum",         # OB only
-        "Document Number",# GP Only
-        "DateBooked",# OB only
-        "Document Date", #GP Only
-        "HideNextMonth",
-        "Resolution",
-        # 'Source' added for 'no match'
-    ]
-
-[filters]
-    # These regex will be combined and with ORs and used to filer
-    # the document number column of the GP report 
-    doc_num_filters = [
-        "p(oin)?ts",
-        "pool",
-        "promo",
-        "o(ver)?f(und)?",
-        "m(ar)?ke?t",
-        "title",
-        "adj",
-        "reg fee",
-        "rent",
-        "cma"
-    ]
-    po_filter = ["^(?!.*cma(\\s|\\d)).*$"]
-
-# Columns that are common to both GP and OB
-[shared_columns]
-contract_number = { GP = "Transaction Description", OB = "Contract"}
-onhold_amount = { GP = "Current Trx Amount", OB = "CurrentOnHold" }
-vendor_name = { GP = "Vendor Name", OB = "DealerName"} 
--- a/src/configs/config_logger.toml
+++ b/src/configs/config_logger.toml
--- a/src/configs/report_config_template.json
+++ b/src/configs/report_config_template.json
@ -1,19 +1,33 @@
 {
-    "input_directory": "",
-    "output_directory": "",
-    "use_mssql": false,
-    "database_path": "",
-    "work_columns": [],
-    "finished_column": [],
-    "filters": {
-      "filter_name": [],
-      "other_filter": ""
-    },
-    "shared_columns": [
-      {
-        "standardized_name": "",
-        "GP": "",
-        "OB": ""
-      }
-    ]
-  }
+  "input_directory": "/path/to/input/folder",
+  "input_glob_pattern": {
+    "GP": "*GP*.xlsx",
+    "OB": "*OB*.xlsx"
+  },
+  "output_directory": "/path/to/output",
+  "interactive_inputs": false,
+  "use_mssql": false,
+  "database_path": "./onhold.db",
+  "work_columns": [
+    "Col_A",
+    "Col_B"
+  ],
+  "finished_column": [
+    "Notes",
+    "Conctract Number"
+  ],
+  "filters": {
+    "filter_name": [
+      "\\d{7}",
+      "\\w+"
+    ],
+    "other_filter": "(OB|GP)$"
+  },
+  "shared_columns": [
+    {
+      "standardized_name": "contract_number",
+      "GP": "Transactoin Description",
+      "OB": "ContractNumber"
+    }
+  ]
+}
--- a/src/configs/reports_config.toml
+++ b/src/configs/reports_config.toml
@ -0,0 +1,72 @@
+####  Paths: using '' makes the string 'raw' to avoid escape characters
+
+# Path to the directory to search for input report files
+input_directory = '../Reports'
+# Regex used to discover newest files
+input_glob_pattern = { GP = "*GP*.xlsx", OB = '*OB*.xlsx'}
+# Path to the directory to save the reconcilation work report
+output_directory = '../Output'
+# Fallback to interactive?
+interactive_inputs = false # NOT YET IMPLEMENTED
+
+
+#### DB
+
+# Whether to try using a mssql database
+# NOT YET IMPLEMENTED!
+use_mssql = false
+# Path to the SQLite database used to view/save reconcilations
+database_path = './onhold_reconciliation.db'
+
+
+### Finished rec details
+
+# Columns to add to all 'work' sheets
+# also saved 'Reconcilations' database
+work_columns = [
+    "HideNextMonth", # Boolean column for user to indicate if this contract should be ignored next month
+    "Resolution" # Text field describing the disprecany and how it may be resolved
+]
+# Columns to keep on reconcilation 'work' sheets
+finished_column = [
+        "contract_number",
+        "vendor_name",
+        "AppNum",           # OB only
+        "Document Number",  # GP Only
+        "DateBooked",       # OB only
+        "Document Date",    # GP Only
+        # 'Source' added for 'no match'
+    ]
+
+# Any regex filters that might be needed 
+[filters]
+# Use label to distinguish a regex set
+doc_num_filters = [
+        "p(oin)?ts",
+        "pool",
+        "promo",
+        "o(ver)?f(und)?",
+        "m(ar)?ke?t",
+        "title",
+        "adj",
+        "reg fee",
+        "rent",
+        "cma"
+    ]
+po_filter = ["^(?!.*cma(\\s|\\d)).*$"]
+
+# Columns that are featured & expected on both OB & GP
+[[shared_columns]]
+standardized_name = "contract_number" # The name you'd like to use to standardize them
+GP = "Transaction Description" # Column name used in GP
+OB = "Contract" # Column name used in GP
+
+[[shared_columns]]
+standardized_name = "onhold_amount"
+GP = "Current Trx Amount"
+OB = "CurrentOnHold"
+
+[[shared_columns]]
+standardized_name = "vendor_name" 
+GP = "Vendor Name" 
+OB = "DealerName"
--- a/src/configs/reports_config_template.toml
+++ b/src/configs/reports_config_template.toml
@ -3,7 +3,7 @@
 # Path to the directory to search for input report files
 input_directory = '/path/to/input/folder'
 # Regex used to discover newest files
-input_regex = { GP = '*likeThis*.xlsx', OB = '*.csv'}
+input_glob_pattern = { GP = "*GP*.xlsx", OB = '*OB*.xlsx'}
 # Path to the directory to save the reconcilation work report
 output_directory = '/path/to/output'
 # Fallback to interactive?
--- a/tests/context.py
+++ b/tests/context.py
@ -0,0 +1,5 @@
+import os
+import sys
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+import src
--- a/tests/test_config.py
+++ b/tests/test_config.py
@ -0,0 +1,73 @@
+import unittest
+from pathlib import Path
+from re import Pattern, compile
+from .context import src
+from src import config
+from src import ReportSource
+
+class TestReportConfig(unittest.TestCase):
+
+    def test_from_file(self):
+        # Provide the path to your config file
+        config_file = Path(r"tests\test_inputs\TEST_reports_config.toml")
+
+        # Call the static method from_file to create an instance of ReportConfig
+        report_config = config.ReportConfig.from_file(config_file)
+
+        # Assert the values of the attributes in the created instance
+        self.assertEqual(report_config.paths.input_directory, Path(r"tests\test_inputs"))
+        self.assertEqual(report_config.paths.gp_glob, r'*GP*.xlsx')
+        self.assertEqual(report_config.paths.ob_glob, r"*OB*.xlsx")
+        self.assertEqual(report_config.paths.output_directory, Path(r"tests\test_outputs"))
+        self.assertEqual(report_config.use_mssql, False)
+        self.assertEqual(report_config.paths.db_path, Path("./onhold_reconciliation.db"))
+        self.assertEqual(report_config.work_columns, ["HideNextMonth", "Resolution"])
+        self.assertEqual(report_config.finished_columns, [
+            "contract_number",
+            "vendor_name",
+            "AppNum",
+            "Document Number",
+            "DateBooked",
+            "Document Date",
+        ])
+        self.assertEqual(report_config.filters["doc_num_filters"], [
+            compile(r"p(oin)?ts",),
+            compile(r"pool",),
+            compile(r"promo",),
+            compile(r"o(ver)?f(und)?",),
+            compile(r"m(ar)?ke?t",),
+            compile(r"title",),
+            compile(r"adj",),
+            compile(r"reg fee",),
+            compile(r"rent",),
+            compile(r"cma",),
+        ])
+        self.assertEqual(report_config.filters["po_filter"], [compile(r"^(?!.*cma(\s|\d)).*$")])
+        self.assertEqual(report_config.shared_columns[0]["standardized_name"], "contract_number")
+        self.assertEqual(report_config.shared_columns[0]["GP"], "Transaction Description")
+        self.assertEqual(report_config.shared_columns[0]["OB"], "Contract")
+        self.assertEqual(report_config.shared_columns[1]["standardized_name"], "onhold_amount")
+        self.assertEqual(report_config.shared_columns[1]["GP"], "Current Trx Amount")
+        self.assertEqual(report_config.shared_columns[1]["OB"], "CurrentOnHold")
+        self.assertEqual(report_config.shared_columns[2]["standardized_name"], "vendor_name")
+        self.assertEqual(report_config.shared_columns[2]["GP"], "Vendor Name")
+        self.assertEqual(report_config.shared_columns[2]["OB"], "DealerName")
+
+    def test_get_newest(self):
+        # Provide the path to your config file
+        config_file = Path(r"tests\test_inputs\TEST_reports_config.toml")
+
+        # Call the static method from_file to create an instance of ReportConfig
+        report_config = config.ReportConfig.from_file(config_file)
+
+        newest_ob: Path = report_config.paths.get_most_recent(report_type=ReportSource.OB)
+        self.assertEqual(newest_ob.name, "April 2023 OB.xlsx")
+        newest_gp: Path = report_config.paths.get_most_recent(report_type=ReportSource.GP)
+        self.assertEqual(newest_gp.name, "April GP.xlsx")
+
+        nob, ngp = report_config.paths.get_most_recent()
+        self.assertEqual(nob.name, "April 2023 OB.xlsx")
+        self.assertEqual(ngp.name, "April GP.xlsx")
+
+if __name__ == '__main__':
+    unittest.main()
--- a/tests/test_inputs/April
+++ b/tests/test_inputs/April
--- a/tests/test_inputs/April
+++ b/tests/test_inputs/April
--- a/tests/test_inputs/April
+++ b/tests/test_inputs/April
--- a/tests/test_inputs/TEST_reports_config.toml
+++ b/tests/test_inputs/TEST_reports_config.toml
@ -0,0 +1,72 @@
+####  Paths: using '' makes the string 'raw' to avoid escape characters
+
+# Path to the directory to search for input report files
+input_directory = 'tests\test_inputs'
+# Regex used to discover newest files
+input_glob_pattern = { GP = "*GP*.xlsx", OB = '*OB*.xlsx'}
+# Path to the directory to save the reconcilation work report
+output_directory = 'tests\test_outputs'
+# Fallback to interactive?
+interactive_inputs = false # NOT YET IMPLEMENTED
+
+
+#### DB
+
+# Whether to try using a mssql database
+# NOT YET IMPLEMENTED!
+use_mssql = false
+# Path to the SQLite database used to view/save reconcilations
+database_path = './onhold_reconciliation.db'
+
+
+### Finished rec details
+
+# Columns to add to all 'work' sheets
+# also saved 'Reconcilations' database
+work_columns = [
+    "HideNextMonth", # Boolean column for user to indicate if this contract should be ignored next month
+    "Resolution" # Text field describing the disprecany and how it may be resolved
+]
+# Columns to keep on reconcilation 'work' sheets
+finished_column = [
+        "contract_number",
+        "vendor_name",
+        "AppNum",           # OB only
+        "Document Number",  # GP Only
+        "DateBooked",       # OB only
+        "Document Date",    # GP Only
+        # 'Source' added for 'no match'
+    ]
+
+# Any regex filters that might be needed 
+[filters]
+# Use label to distinguish a regex set
+doc_num_filters = [
+        "p(oin)?ts",
+        "pool",
+        "promo",
+        "o(ver)?f(und)?",
+        "m(ar)?ke?t",
+        "title",
+        "adj",
+        "reg fee",
+        "rent",
+        "cma"
+    ]
+po_filter = ['^(?!.*cma(\s|\d)).*$']
+
+# Columns that are featured & expected on both OB & GP
+[[shared_columns]]
+standardized_name = "contract_number" # The name you'd like to use to standardize them
+GP = "Transaction Description" # Column name used in GP
+OB = "Contract" # Column name used in GP
+
+[[shared_columns]]
+standardized_name = "onhold_amount"
+GP = "Current Trx Amount"
+OB = "CurrentOnHold"
+
+[[shared_columns]]
+standardized_name = "vendor_name" 
+GP = "Vendor Name" 
+OB = "DealerName"