You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
188 lines
6.1 KiB
188 lines
6.1 KiB
from pandas import DataFrame, merge, to_datetime, NaT
|
|
from numpy import concatenate
|
|
from abc import ABC, abstractmethod
|
|
from logging import getLogger
|
|
import re
|
|
from typing import Literal
|
|
import datetime
|
|
|
|
from helpers import CN_REGEX
|
|
|
|
logger = getLogger(__name__)
|
|
|
|
|
|
class HoldReport(ABC):
|
|
|
|
source = ""
|
|
|
|
def __init__(self, dataframe: DataFrame, reports_config: dict) -> None:
|
|
self.config = reports_config
|
|
self.df = dataframe
|
|
self._normalize()
|
|
|
|
|
|
def _normalize(self):
|
|
|
|
# Rename the columns to standardize the column names
|
|
self.df.rename( columns= { unique_cols[self.source] : common_col
|
|
for common_col, unique_cols in self.config["shared_columns"].items()
|
|
}, inplace=True)
|
|
|
|
# Convert the on-hold amount column to float format and round to two decimal places
|
|
self.df["onhold_amount"] = self.df["onhold_amount"].astype(float).round(2)
|
|
|
|
# Use regex to extract the contract number from the column values and create a new column with the standardized format
|
|
self.df["contract_number"] = self.df["contract_number"].apply(
|
|
lambda cn: str(cn) if not re.search(CN_REGEX, str(cn))
|
|
else re.search(CN_REGEX, str(cn)).group(0)
|
|
)
|
|
|
|
# Create a new column with a unique transaction ID
|
|
self.df["ID"] = self.df["contract_number"] +'_'+\
|
|
self.df["onhold_amount"].astype(str)
|
|
|
|
# Create a new column with the data source
|
|
self.df["Source"] = self.source
|
|
|
|
|
|
def _get_no_match(self, other: 'HoldReport'):
|
|
# Merge the two DataFrames using the contract number as the join key
|
|
outer_merge = merge(
|
|
self.df, other.df,
|
|
how="outer",
|
|
on=["contract_number"],
|
|
suffixes=('_'+self.source, '_'+other.source)
|
|
)
|
|
|
|
# Filter the merged DataFrame to include only the transactions that do not have a match in both OBT and GPT
|
|
no_match = outer_merge.loc[
|
|
(outer_merge[f"Source_{self.source}"].isna()) |
|
|
(outer_merge[f"Source_{other.source}"].isna())
|
|
]
|
|
|
|
# Fill in missing values and drop unnecessary columns
|
|
no_match["Source"] = no_match[f"Source_{self.source}"].fillna("GP")
|
|
no_match["onhold_amount"] = no_match[f"onhold_amount_{self.source}"].fillna(
|
|
no_match[f"onhold_amount_{other.source}"]
|
|
)
|
|
no_match["vendor_name"] = no_match[f"vendor_name_{self.source}"].fillna(
|
|
no_match[f"vendor_name_{other.source}"]
|
|
)
|
|
|
|
return no_match
|
|
|
|
|
|
def _get_contract_matches(self, other: 'HoldReport') -> DataFrame:
|
|
"""
|
|
|
|
"""
|
|
# Merge the two filtered DataFrames on the contract number
|
|
contract_match = merge(
|
|
self.df, other.df,
|
|
how="inner",
|
|
on=["contract_number"],
|
|
suffixes=('_'+self.source, '_'+other.source)
|
|
)
|
|
|
|
contract_match["vendor_name"] = contract_match[f"vendor_name_{self.source}"].fillna(
|
|
contract_match[f"vendor_name_{other.source}"]
|
|
)
|
|
|
|
|
|
return contract_match
|
|
|
|
@staticmethod
|
|
def _add_work_columns(df: DataFrame) -> DataFrame:
|
|
"""
|
|
Add empty columns to the dataframe to faciliate working through the report.
|
|
"""
|
|
WORK_COLS = ["Resolution", "Notes"]
|
|
for col in WORK_COLS:
|
|
df[col] = ''
|
|
return df
|
|
|
|
def reconcile(self, other: 'HoldReport') -> tuple[DataFrame]:
|
|
"""
|
|
"""
|
|
no_match: DataFrame = self._get_no_match(other)
|
|
no_match.to_excel("NOMATCH.xlsx")
|
|
logger.debug(f"No_match: {no_match}")
|
|
|
|
amount_mismatch: DataFrame = self._get_contract_matches(other)
|
|
amount_mismatch.to_excel("AMTMM.xlsx")
|
|
logger.debug(f"amt_mismatche: {no_match}")
|
|
|
|
# Select and reorder columns
|
|
no_match = no_match[
|
|
["Source"].extend(self.config["output_columns"])
|
|
]
|
|
no_match = self._add_work_columns(no_match)
|
|
|
|
amount_mismatch = amount_mismatch[
|
|
self.config["output_columns"]
|
|
]
|
|
amount_mismatch = self._add_work_columns(amount_mismatch)
|
|
|
|
return no_match, amount_mismatch
|
|
|
|
|
|
|
|
class OnBaseReport(HoldReport):
|
|
|
|
source = "OB"
|
|
|
|
def get_overdue(self) -> DataFrame:
|
|
"""
|
|
"""
|
|
self.df["install_date"] = to_datetime(self.df["install_date"])
|
|
self.df["install_date"].fillna(NaT, inplace=True)
|
|
return self.df[self.df["install_date"].dt.date < datetime.date.today()]
|
|
|
|
|
|
|
|
class GreatPlainsReport(HoldReport):
|
|
|
|
source = "GP"
|
|
filted_df: bool = False
|
|
|
|
def __init__(self, dataframe: DataFrame, report_config: dict) -> None:
|
|
|
|
self._filter(
|
|
gp_report_df= dataframe,
|
|
doc_num_filters= report_config["gp_filters"]["doc_num_filters"],
|
|
good_po_num_regex= report_config["gp_filters"]["po_filter"]
|
|
)
|
|
super().__init__(dataframe, report_config)
|
|
|
|
@staticmethod
|
|
def _filter(gp_report_df: DataFrame,
|
|
doc_num_filters: list[str], good_po_num_regex: str) -> DataFrame:
|
|
|
|
GOOD_PO_NUM = re.compile(good_po_num_regex, re.IGNORECASE)
|
|
|
|
bad_doc_num = ''
|
|
rx : str
|
|
for rx in doc_num_filters:
|
|
bad_doc_num += f"({rx})|"
|
|
bad_doc_num = re.compile(bad_doc_num[:-1], re.IGNORECASE)
|
|
|
|
# Create a mask/filter that will keep rows that match these
|
|
# requirments
|
|
keep_mask = (
|
|
(gp_report_df["Document Type"] == "Invoice") &
|
|
(gp_report_df["Purchase Order Number"].str.contains(GOOD_PO_NUM))
|
|
)
|
|
|
|
# Get the rows that DO NOT fit the keep_mask
|
|
rows_to_drop = gp_report_df[~keep_mask].index
|
|
# Drop the rows to filter
|
|
gp_report_df.drop(rows_to_drop, inplace=True)
|
|
|
|
# Create a filter to remove rows that meet this requirment
|
|
# Making this a negative in the keep mask is more trouble than
|
|
# it's worth
|
|
remove_mask = gp_report_df["Document Number"].str.contains(bad_doc_num)
|
|
rows_to_drop = gp_report_df[remove_mask].index
|
|
gp_report_df.drop(rows_to_drop, inplace=True)
|
|
|
|
return gp_report_df
|
|
|