|
|
|
@ -94,7 +94,7 @@ def check_sheet(df_cols: list[str], excel_col_config: dict) -> bool: |
|
|
|
return all([col in df_cols for col in required_cols]) |
|
|
|
return all([col in df_cols for col in required_cols]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_dataframes(excelConfig: dict) -> tuple[pd.DataFrame|None, pd.DataFrame|None]: |
|
|
|
def get_dataframes(work_dir: str, excelConfig: dict) -> tuple[pd.DataFrame|None, pd.DataFrame|None]: |
|
|
|
""" |
|
|
|
""" |
|
|
|
Given a dictionary of Excel configuration options, this function searches for the most recently modified GP and OB |
|
|
|
Given a dictionary of Excel configuration options, this function searches for the most recently modified GP and OB |
|
|
|
Excel files in a "Work" folder and returns their corresponding dataframes. |
|
|
|
Excel files in a "Work" folder and returns their corresponding dataframes. |
|
|
|
@ -105,22 +105,15 @@ def get_dataframes(excelConfig: dict) -> tuple[pd.DataFrame|None, pd.DataFrame|N |
|
|
|
Returns: |
|
|
|
Returns: |
|
|
|
tuple[pd.DataFrame|None, pd.DataFrame|None]: A tuple containing the OB and GP dataframes, respectively. |
|
|
|
tuple[pd.DataFrame|None, pd.DataFrame|None]: A tuple containing the OB and GP dataframes, respectively. |
|
|
|
""" |
|
|
|
""" |
|
|
|
# Get the current working directory and the path to the "Work" folder |
|
|
|
|
|
|
|
current_dir: Path = Path(os.getcwd()) |
|
|
|
|
|
|
|
work_folder: Path = current_dir / 'Work' |
|
|
|
|
|
|
|
logger.debug(f"Workpath: {work_folder}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Check that the "Work" folder exists |
|
|
|
|
|
|
|
assert work_folder.exists, "No work folder found!" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Define regular expression patterns to match the GP and OB Excel files |
|
|
|
# Define regular expression patterns to match the GP and OB Excel files |
|
|
|
gp_regex: Pattern = re.compile(".*gp.*\.xlsx$", re.IGNORECASE) |
|
|
|
gp_regex: Pattern = re.compile(".*gp.*\.xlsx$", re.IGNORECASE) |
|
|
|
ob_regex: Pattern = re.compile(".*ob.*\.xlsx$", re.IGNORECASE) |
|
|
|
ob_regex: Pattern = re.compile(".*ob.*\.xlsx$", re.IGNORECASE) |
|
|
|
|
|
|
|
|
|
|
|
# Find the paths of the most recently modified GP and OB Excel files |
|
|
|
# Find the paths of the most recently modified GP and OB Excel files |
|
|
|
gp_file_path = find_most_recent_file(work_folder, gp_regex) |
|
|
|
gp_file_path = find_most_recent_file(work_dir, gp_regex) |
|
|
|
logger.debug(f"gp_file_path: {gp_file_path}") |
|
|
|
logger.debug(f"gp_file_path: {gp_file_path}") |
|
|
|
ob_file_path = find_most_recent_file(work_folder, ob_regex) |
|
|
|
ob_file_path = find_most_recent_file(work_dir, ob_regex) |
|
|
|
logger.debug(f"gp_file_path: {ob_file_path}") |
|
|
|
logger.debug(f"gp_file_path: {ob_file_path}") |
|
|
|
|
|
|
|
|
|
|
|
# Read the GP and OB Excel files into dataframes and check that each dataframe has the required columns |
|
|
|
# Read the GP and OB Excel files into dataframes and check that each dataframe has the required columns |
|
|
|
@ -156,16 +149,17 @@ def main() -> int: |
|
|
|
# Read the configuration options from a TOML file |
|
|
|
# Read the configuration options from a TOML file |
|
|
|
with open("config.toml", "rb") as f: |
|
|
|
with open("config.toml", "rb") as f: |
|
|
|
config_dict: dict = load(f) |
|
|
|
config_dict: dict = load(f) |
|
|
|
|
|
|
|
logger.debug(f"Config: {config_dict}") |
|
|
|
|
|
|
|
|
|
|
|
excelConfig: dict = config_dict["ExcelColumns"] |
|
|
|
excelConfig: dict = config_dict["ExcelColumns"] |
|
|
|
|
|
|
|
|
|
|
|
# Get the GP and OB dataframes from the Excel files |
|
|
|
# Get the GP and OB dataframes from the Excel files |
|
|
|
ob_df, gp_df = get_dataframes(excelConfig) |
|
|
|
ob_df, gp_df = get_dataframes(config_dict["write_dir"] ,excelConfig) |
|
|
|
assert not ob_df.empty, "OB Data empty!" |
|
|
|
assert not ob_df.empty, "OB Data empty!" |
|
|
|
assert not gp_df.empty, "GP Data empty!" |
|
|
|
assert not gp_df.empty, "GP Data empty!" |
|
|
|
|
|
|
|
|
|
|
|
# Filter the GP dataframe to include only relevant transactions |
|
|
|
# Filter the GP dataframe to include only relevant transactions |
|
|
|
fgp_df: DataFrame = filter_gp(gp_df, excelConfig["GP"]) |
|
|
|
fgp_df: DataFrame = filter_gp(gp_df, config_dict) |
|
|
|
# Get the overdue transactions from the OB dataframe |
|
|
|
# Get the overdue transactions from the OB dataframe |
|
|
|
overdue: DataFrame = get_overdue(ob_df, excelConfig["OB"]) |
|
|
|
overdue: DataFrame = get_overdue(ob_df, excelConfig["OB"]) |
|
|
|
|
|
|
|
|
|
|
|
@ -181,7 +175,7 @@ def main() -> int: |
|
|
|
only_contracts_match: DataFrame = get_contract_match(not_full_match) |
|
|
|
only_contracts_match: DataFrame = get_contract_match(not_full_match) |
|
|
|
|
|
|
|
|
|
|
|
# Write the results to a new Excel file |
|
|
|
# Write the results to a new Excel file |
|
|
|
with pd.ExcelWriter(f"{config_dict['work_dir']}/Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx", mode='w') as writer: |
|
|
|
with pd.ExcelWriter(f"{config_dict['write_dir']}/Reconciled Holds [{dt.now().strftime('%m-%d-%Y')}].xlsx", mode='w') as writer: |
|
|
|
full_match.to_excel(writer,sheet_name="FULL", index=False) |
|
|
|
full_match.to_excel(writer,sheet_name="FULL", index=False) |
|
|
|
no_match.to_excel(writer, sheet_name="No Match", index=False) |
|
|
|
no_match.to_excel(writer, sheet_name="No Match", index=False) |
|
|
|
only_contracts_match.to_excel(writer, sheet_name="Amount Mismatch", index=False) |
|
|
|
only_contracts_match.to_excel(writer, sheet_name="Amount Mismatch", index=False) |
|
|
|
|