diff --git a/.gitignore b/.gitignore index fccecf6..974a291 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,5 @@ ghlib/ *.db *.txt -!version.txt \ No newline at end of file +!version.txt +!tests/test_inputs/* \ No newline at end of file diff --git a/src/config.py b/src/config.py index 1efe126..a05346d 100644 --- a/src/config.py +++ b/src/config.py @@ -31,59 +31,64 @@ class PathsConfig: """ Configuration holding the paths to: - input_directory: Where to search for new report files - - gp/ob_regex: regex used to find new OB & GP files in the report location + - gp/ob_glob: regex used to find new OB & GP files in the report location - db_path: path to an SQLite database if any """ def __init__(self, in_dir: str, out_dir: str, - input_regex_dict: dict[str:Regex] , db_path: str = None) -> None: + input_regex_dict: dict[str:str] , db_path: str = None) -> None: self.input_directory: Path = Path(in_dir) self.output_directory: Path = Path(out_dir) - self.gp_regex: Pattern = compile("*.xlsx") - self.ob_regex: Pattern = compile("*.xlsx") + self.gp_glob: str = r"*.xlsx" + self.ob_glob: str = r"*.xlsx" if db_path is not None: self.db_path: Path = Path(db_path) try: - self.gp_regex: Pattern = compile(input_regex_dict["GP"]) - self.ob_regex: Pattern = compile(input_regex_dict["OB"]) + self.gp_glob: str = input_regex_dict["GP"] + self.ob_glob: str = input_regex_dict["OB"] except KeyError: # Defaulting to newest of any xlsx file! # TODO investigate warning pass # will remain as *.xlsx - def get_most_recent(self, report_type: ReportSource) -> Path|None: - - match report_type: - case report_type.OB: - file_regex: Pattern = self.ob_regex - case report_type.GP: - file_regex: Pattern = self.gp_regex - case _: - raise NotImplementedError(\ - f"No regex pattern for report type: {report_type}" - ) - - files = self.input_directory.glob(file_regex) - - # Find the most recently created file - most_recent_file = None - most_recent_creation_time = None + def get_most_recent(self, report_type: ReportSource = None) -> Path|None| tuple[Path|None, Path|None]: - file: Path - for file in files: - creation_time = file.stat().st_ctime - if most_recent_creation_time is None or creation_time > most_recent_creation_time: - most_recent_file = file - most_recent_creation_time = creation_time - - return most_recent_file - + report_files = [] + report_types = [ReportSource.OB, ReportSource.GP] if report_type is None else [report_type] + rt: ReportSource + for rt in report_types: + match rt: + case rt.OB: + file_glob: str = self.ob_glob + case rt.GP: + file_glob: str = self.gp_glob + case _: + raise NotImplementedError(\ + f"No regex pattern for report type: {rt}" + ) + + files = self.input_directory.glob(file_glob) - + # Find the most recently created file + most_recent_file = None + most_recent_creation_time = None + + file: Path + for file in files: + creation_time = file.stat().st_ctime + if most_recent_creation_time is None or creation_time > most_recent_creation_time: + most_recent_file = file + most_recent_creation_time = creation_time + report_files.append(most_recent_file) + + if len(report_files) > 1: + return report_files + + return report_files[0] def has_database(self) -> tuple[bool, bool]: """ @@ -105,7 +110,6 @@ class ReportConfig: paths: PathsConfig use_mssql: bool - db_path: Path # Work columns are included in finsished columns work_columns: list[str] @@ -122,37 +126,38 @@ class ReportConfig: def from_file(config_path: str|Path) -> 'ReportConfig': config_path = Path(config_path) if isinstance(config_path, str) else config_path - - match config_path.suffix: - case ".toml": - c_dict: dict = t_load(config_path) - case ".json": - c_dict: dict= j_load(config_path) - case _: - raise NotImplementedError(f"Only json and toml configs are supported not: {config_path.suffix}") + + with open(config_path, "rb") as config_file: + match config_path.suffix: + case ".toml": + c_dict: dict = t_load(config_file) + case ".json": + c_dict: dict= j_load(config_file) + case _: + raise NotImplementedError(f"Only json and toml configs are supported not: {config_path.suffix}") try: path_config: PathsConfig = PathsConfig( - in_dir = c_dict["input_path"], - out_dir= c_dict["output_path"], - input_regex_dict= c_dict["input_regex"], - db_path= c_dict["db_path"] + in_dir = c_dict["input_directory"], + out_dir= c_dict["output_directory"], + input_regex_dict= c_dict["input_glob_pattern"], + db_path= c_dict["database_path"] ) use_mssql = False #TODO no yet implemented work_columns = c_dict["work_columns"] - output_columns = c_dict["output_columns"] + finished_column = c_dict["finished_column"] - # Add create out filter dict + # Create filter dict with compiled regex filters_dict : dict = c_dict["filters"] filters: dict[str:list[Pattern]|Pattern] = {} k: str v: Regex|list[Regex] for k, v in filters_dict.items(): - if not isinstance(v, Regex|list[Regex]): + if not isinstance(v, Regex) and not isinstance(v, list): raise ReportConfigError(f"Filter items must be a valid regex pattern or a list of valid patterns!\ {v} ({type(v)}) is not valid!") @@ -175,7 +180,7 @@ class ReportConfig: paths= path_config, use_mssql= use_mssql, work_columns= work_columns, - finished_columns= output_columns, + finished_columns= finished_column, filters= filters, shared_columns= shared_columns, ) \ No newline at end of file diff --git a/src/config_reports.toml b/src/config_reports.toml deleted file mode 100644 index 7a324bf..0000000 --- a/src/config_reports.toml +++ /dev/null @@ -1,44 +0,0 @@ -output_path = '../Work' -db_path = "OnHold.db" - -# Columns added each 'working' sheet in the new report dataframe -work_columns = [ - "HideNextMonth", # Boolean column for user to indicate if this contract should be ignored next month - "Resolution" # Text field describing the disprecany and how it may be resolved -] - -# List of Columns to show on the 'working' sheets of the rec report -output_columns = [ - "contract_number", - "vendor_name", - "AppNum", # OB only - "Document Number",# GP Only - "DateBooked",# OB only - "Document Date", #GP Only - "HideNextMonth", - "Resolution", - # 'Source' added for 'no match' - ] - -[filters] - # These regex will be combined and with ORs and used to filer - # the document number column of the GP report - doc_num_filters = [ - "p(oin)?ts", - "pool", - "promo", - "o(ver)?f(und)?", - "m(ar)?ke?t", - "title", - "adj", - "reg fee", - "rent", - "cma" - ] - po_filter = ["^(?!.*cma(\\s|\\d)).*$"] - -# Columns that are common to both GP and OB -[shared_columns] -contract_number = { GP = "Transaction Description", OB = "Contract"} -onhold_amount = { GP = "Current Trx Amount", OB = "CurrentOnHold" } -vendor_name = { GP = "Vendor Name", OB = "DealerName"} \ No newline at end of file diff --git a/src/config_logger.toml b/src/configs/config_logger.toml similarity index 100% rename from src/config_logger.toml rename to src/configs/config_logger.toml diff --git a/src/configs/report_config_template.json b/src/configs/report_config_template.json index 8035975..720c97e 100644 --- a/src/configs/report_config_template.json +++ b/src/configs/report_config_template.json @@ -1,19 +1,33 @@ { - "input_directory": "", - "output_directory": "", - "use_mssql": false, - "database_path": "", - "work_columns": [], - "finished_column": [], - "filters": { - "filter_name": [], - "other_filter": "" - }, - "shared_columns": [ - { - "standardized_name": "", - "GP": "", - "OB": "" - } - ] - } \ No newline at end of file + "input_directory": "/path/to/input/folder", + "input_glob_pattern": { + "GP": "*GP*.xlsx", + "OB": "*OB*.xlsx" + }, + "output_directory": "/path/to/output", + "interactive_inputs": false, + "use_mssql": false, + "database_path": "./onhold.db", + "work_columns": [ + "Col_A", + "Col_B" + ], + "finished_column": [ + "Notes", + "Conctract Number" + ], + "filters": { + "filter_name": [ + "\\d{7}", + "\\w+" + ], + "other_filter": "(OB|GP)$" + }, + "shared_columns": [ + { + "standardized_name": "contract_number", + "GP": "Transactoin Description", + "OB": "ContractNumber" + } + ] +} \ No newline at end of file diff --git a/src/configs/reports_config.toml b/src/configs/reports_config.toml new file mode 100644 index 0000000..4a85a94 --- /dev/null +++ b/src/configs/reports_config.toml @@ -0,0 +1,72 @@ +#### Paths: using '' makes the string 'raw' to avoid escape characters + +# Path to the directory to search for input report files +input_directory = '../Reports' +# Regex used to discover newest files +input_glob_pattern = { GP = "*GP*.xlsx", OB = '*OB*.xlsx'} +# Path to the directory to save the reconcilation work report +output_directory = '../Output' +# Fallback to interactive? +interactive_inputs = false # NOT YET IMPLEMENTED + + +#### DB + +# Whether to try using a mssql database +# NOT YET IMPLEMENTED! +use_mssql = false +# Path to the SQLite database used to view/save reconcilations +database_path = './onhold_reconciliation.db' + + +### Finished rec details + +# Columns to add to all 'work' sheets +# also saved 'Reconcilations' database +work_columns = [ + "HideNextMonth", # Boolean column for user to indicate if this contract should be ignored next month + "Resolution" # Text field describing the disprecany and how it may be resolved +] +# Columns to keep on reconcilation 'work' sheets +finished_column = [ + "contract_number", + "vendor_name", + "AppNum", # OB only + "Document Number", # GP Only + "DateBooked", # OB only + "Document Date", # GP Only + # 'Source' added for 'no match' + ] + +# Any regex filters that might be needed +[filters] +# Use label to distinguish a regex set +doc_num_filters = [ + "p(oin)?ts", + "pool", + "promo", + "o(ver)?f(und)?", + "m(ar)?ke?t", + "title", + "adj", + "reg fee", + "rent", + "cma" + ] +po_filter = ["^(?!.*cma(\\s|\\d)).*$"] + +# Columns that are featured & expected on both OB & GP +[[shared_columns]] +standardized_name = "contract_number" # The name you'd like to use to standardize them +GP = "Transaction Description" # Column name used in GP +OB = "Contract" # Column name used in GP + +[[shared_columns]] +standardized_name = "onhold_amount" +GP = "Current Trx Amount" +OB = "CurrentOnHold" + +[[shared_columns]] +standardized_name = "vendor_name" +GP = "Vendor Name" +OB = "DealerName" \ No newline at end of file diff --git a/src/configs/reports_config_template.toml b/src/configs/reports_config_template.toml index c4472fa..06753a2 100644 --- a/src/configs/reports_config_template.toml +++ b/src/configs/reports_config_template.toml @@ -3,7 +3,7 @@ # Path to the directory to search for input report files input_directory = '/path/to/input/folder' # Regex used to discover newest files -input_regex = { GP = '*likeThis*.xlsx', OB = '*.csv'} +input_glob_pattern = { GP = "*GP*.xlsx", OB = '*OB*.xlsx'} # Path to the directory to save the reconcilation work report output_directory = '/path/to/output' # Fallback to interactive? diff --git a/tests/context.py b/tests/context.py new file mode 100644 index 0000000..1bea8fb --- /dev/null +++ b/tests/context.py @@ -0,0 +1,5 @@ +import os +import sys +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +import src \ No newline at end of file diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..495c171 --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,73 @@ +import unittest +from pathlib import Path +from re import Pattern, compile +from .context import src +from src import config +from src import ReportSource + +class TestReportConfig(unittest.TestCase): + + def test_from_file(self): + # Provide the path to your config file + config_file = Path(r"tests\test_inputs\TEST_reports_config.toml") + + # Call the static method from_file to create an instance of ReportConfig + report_config = config.ReportConfig.from_file(config_file) + + # Assert the values of the attributes in the created instance + self.assertEqual(report_config.paths.input_directory, Path(r"tests\test_inputs")) + self.assertEqual(report_config.paths.gp_glob, r'*GP*.xlsx') + self.assertEqual(report_config.paths.ob_glob, r"*OB*.xlsx") + self.assertEqual(report_config.paths.output_directory, Path(r"tests\test_outputs")) + self.assertEqual(report_config.use_mssql, False) + self.assertEqual(report_config.paths.db_path, Path("./onhold_reconciliation.db")) + self.assertEqual(report_config.work_columns, ["HideNextMonth", "Resolution"]) + self.assertEqual(report_config.finished_columns, [ + "contract_number", + "vendor_name", + "AppNum", + "Document Number", + "DateBooked", + "Document Date", + ]) + self.assertEqual(report_config.filters["doc_num_filters"], [ + compile(r"p(oin)?ts",), + compile(r"pool",), + compile(r"promo",), + compile(r"o(ver)?f(und)?",), + compile(r"m(ar)?ke?t",), + compile(r"title",), + compile(r"adj",), + compile(r"reg fee",), + compile(r"rent",), + compile(r"cma",), + ]) + self.assertEqual(report_config.filters["po_filter"], [compile(r"^(?!.*cma(\s|\d)).*$")]) + self.assertEqual(report_config.shared_columns[0]["standardized_name"], "contract_number") + self.assertEqual(report_config.shared_columns[0]["GP"], "Transaction Description") + self.assertEqual(report_config.shared_columns[0]["OB"], "Contract") + self.assertEqual(report_config.shared_columns[1]["standardized_name"], "onhold_amount") + self.assertEqual(report_config.shared_columns[1]["GP"], "Current Trx Amount") + self.assertEqual(report_config.shared_columns[1]["OB"], "CurrentOnHold") + self.assertEqual(report_config.shared_columns[2]["standardized_name"], "vendor_name") + self.assertEqual(report_config.shared_columns[2]["GP"], "Vendor Name") + self.assertEqual(report_config.shared_columns[2]["OB"], "DealerName") + + def test_get_newest(self): + # Provide the path to your config file + config_file = Path(r"tests\test_inputs\TEST_reports_config.toml") + + # Call the static method from_file to create an instance of ReportConfig + report_config = config.ReportConfig.from_file(config_file) + + newest_ob: Path = report_config.paths.get_most_recent(report_type=ReportSource.OB) + self.assertEqual(newest_ob.name, "April 2023 OB.xlsx") + newest_gp: Path = report_config.paths.get_most_recent(report_type=ReportSource.GP) + self.assertEqual(newest_gp.name, "April GP.xlsx") + + nob, ngp = report_config.paths.get_most_recent() + self.assertEqual(nob.name, "April 2023 OB.xlsx") + self.assertEqual(ngp.name, "April GP.xlsx") + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/tests/test_inputs/April 2023 OB.xlsx b/tests/test_inputs/April 2023 OB.xlsx new file mode 100644 index 0000000..c7f10b3 Binary files /dev/null and b/tests/test_inputs/April 2023 OB.xlsx differ diff --git a/tests/test_inputs/April GP.xlsx b/tests/test_inputs/April GP.xlsx new file mode 100644 index 0000000..1ee11a7 Binary files /dev/null and b/tests/test_inputs/April GP.xlsx differ diff --git a/tests/test_inputs/April Reconciled Holds.xlsx b/tests/test_inputs/April Reconciled Holds.xlsx new file mode 100644 index 0000000..2fb1e9a Binary files /dev/null and b/tests/test_inputs/April Reconciled Holds.xlsx differ diff --git a/tests/test_inputs/TEST_reports_config.toml b/tests/test_inputs/TEST_reports_config.toml new file mode 100644 index 0000000..80a2754 --- /dev/null +++ b/tests/test_inputs/TEST_reports_config.toml @@ -0,0 +1,72 @@ +#### Paths: using '' makes the string 'raw' to avoid escape characters + +# Path to the directory to search for input report files +input_directory = 'tests\test_inputs' +# Regex used to discover newest files +input_glob_pattern = { GP = "*GP*.xlsx", OB = '*OB*.xlsx'} +# Path to the directory to save the reconcilation work report +output_directory = 'tests\test_outputs' +# Fallback to interactive? +interactive_inputs = false # NOT YET IMPLEMENTED + + +#### DB + +# Whether to try using a mssql database +# NOT YET IMPLEMENTED! +use_mssql = false +# Path to the SQLite database used to view/save reconcilations +database_path = './onhold_reconciliation.db' + + +### Finished rec details + +# Columns to add to all 'work' sheets +# also saved 'Reconcilations' database +work_columns = [ + "HideNextMonth", # Boolean column for user to indicate if this contract should be ignored next month + "Resolution" # Text field describing the disprecany and how it may be resolved +] +# Columns to keep on reconcilation 'work' sheets +finished_column = [ + "contract_number", + "vendor_name", + "AppNum", # OB only + "Document Number", # GP Only + "DateBooked", # OB only + "Document Date", # GP Only + # 'Source' added for 'no match' + ] + +# Any regex filters that might be needed +[filters] +# Use label to distinguish a regex set +doc_num_filters = [ + "p(oin)?ts", + "pool", + "promo", + "o(ver)?f(und)?", + "m(ar)?ke?t", + "title", + "adj", + "reg fee", + "rent", + "cma" + ] +po_filter = ['^(?!.*cma(\s|\d)).*$'] + +# Columns that are featured & expected on both OB & GP +[[shared_columns]] +standardized_name = "contract_number" # The name you'd like to use to standardize them +GP = "Transaction Description" # Column name used in GP +OB = "Contract" # Column name used in GP + +[[shared_columns]] +standardized_name = "onhold_amount" +GP = "Current Trx Amount" +OB = "CurrentOnHold" + +[[shared_columns]] +standardized_name = "vendor_name" +GP = "Vendor Name" +OB = "DealerName" \ No newline at end of file