You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
142 lines
4.6 KiB
142 lines
4.6 KiB
import os
|
|
import pandas as pd
|
|
from datetime import datetime as dt, timedelta
|
|
import re
|
|
from pathlib import Path
|
|
import time
|
|
import numpy as np
|
|
from pprint import pprint as prt
|
|
|
|
|
|
def pfd(df: pd.DataFrame):
|
|
with pd.option_context('display.max_rows', None, 'display.max_columns', None): # more options can be specified also
|
|
print(df)
|
|
|
|
|
|
def create_line_divider(breakage_list: list):
|
|
"""
|
|
This allows for the creation of a custom data extractor
|
|
Breakage list defines the split points that will be used for the line
|
|
Example
|
|
Given breakage_list [10, 20, 30]
|
|
using slot_num 0 in the resulting extract_line_slot will yield
|
|
characters 0 - 10 from the string.
|
|
Slot 1 would give characters 10 - 20
|
|
"""
|
|
def extract_line_slot(slot_num : int, line_string: str, debug : bool = False):
|
|
"""
|
|
Pulls data from a line/string using break points defined by the
|
|
parent function.
|
|
ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor'
|
|
Will automatically convert numbers to floats
|
|
"""
|
|
# We can't have a slot number higher than the number of slots
|
|
assert(slot_num < len(breakage_list)+1)
|
|
low_range = 0 if slot_num == 0 else breakage_list[slot_num-1]
|
|
high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num]
|
|
# In order to create a float we need to remove the , from the string
|
|
data = line_string[low_range:high_range].strip().replace(",", "")
|
|
try: data = float(data)
|
|
except: pass
|
|
if debug:
|
|
print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}")
|
|
return data
|
|
return extract_line_slot
|
|
|
|
|
|
def minv(report: str, save_name: str):
|
|
lines = report.splitlines()
|
|
data_extractor = create_line_divider([15,32,52,71,83,107,116,128])
|
|
extracted_data_dict = {
|
|
"ContractNumber" : [],
|
|
"UTAB_OIC_DUE" : [],
|
|
"RentalDue" : [],
|
|
"UTAB_OIC_PYMT" : [],
|
|
"ChargeType" : [],
|
|
"OutstandBalance" : [],
|
|
"BizSegment" : [],
|
|
"BookingDate" : [],
|
|
"Branch" : [],
|
|
}
|
|
columns = list(extracted_data_dict.keys())
|
|
for line in enumerate(lines):
|
|
if re.search(contract_number_regex, line[1]) != None:
|
|
[extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(0,len(columns))]
|
|
#All the list lengths need to be the same so if anything was missed it will fail to build
|
|
dataframe = pd.DataFrame(extracted_data_dict)
|
|
# ( bookdate != today & rent = 0 ) OR (outstanding > 100 & rent = 0)
|
|
# dt.today().strftime("%m/%m/%Y")
|
|
filtered = dataframe[
|
|
((dataframe["BookingDate"] != '04/26/2022') & (dataframe["RentalDue"] == 0)) |\
|
|
((dataframe["RentalDue"] == 0 ) & (dataframe["OutstandBalance"] > 100))]
|
|
filtered.to_excel(save_name, index=False)
|
|
return filtered
|
|
|
|
current_output = [
|
|
'100-1011756-004',
|
|
'100-1354567-002',
|
|
'100-1637209-005',
|
|
'100-1665517-003',
|
|
'100-1670517-003',
|
|
'100-2081987-008',
|
|
'100-2139037-002',
|
|
'100-2446458-002',
|
|
'100-2453558-003',
|
|
'100-2611389-007',
|
|
'100-3492758-003',
|
|
'100-3500858-001',
|
|
'100-3694757-001',
|
|
'100-3725849-003',
|
|
'100-3876959-007',
|
|
'100-3910629-001',
|
|
'100-3964329-001',
|
|
'100-4462739-001',
|
|
'100-4850431-001',
|
|
'100-4945021-001',
|
|
'100-5382471-001',
|
|
'100-6738611-001',
|
|
'100-6849836-001',
|
|
'100-7037791-001',
|
|
'100-7045691-001',
|
|
'100-7052571-001',
|
|
'100-7059671-001',
|
|
'100-7087121-001',
|
|
'100-7107941-001',
|
|
'100-7146771-001',
|
|
'100-7156851-001',
|
|
'100-7178461-001',
|
|
'100-7203371-001',
|
|
'100-7219911-001',
|
|
'100-7232561-001',
|
|
'100-7237601-001',
|
|
'100-7242461-001',
|
|
'100-9660710-001',
|
|
'100-9723689-001',
|
|
]
|
|
|
|
contract_number_regex = "\d{3}-\d{7}-\d{3}"
|
|
|
|
with open("2022.05.04_MINV_C", errors="replace") as ifile:
|
|
report = ifile.read()
|
|
|
|
fin_df = minv(report, "man_inv_test.xlsx")
|
|
pfd(fin_df)
|
|
il_contracts = fin_df.ContractNumber.to_list()
|
|
prt(il_contracts)
|
|
|
|
extra_contracts = []
|
|
not_included = []
|
|
for c in il_contracts:
|
|
if c not in current_output:
|
|
extra_contracts.append(c)
|
|
for c in current_output:
|
|
if c not in il_contracts:
|
|
not_included.append(c)
|
|
|
|
print("\nExtra Contracts:")
|
|
prt(extra_contracts)
|
|
print("Not Included Contracts:")
|
|
prt(not_included)
|
|
print(f"MATCHING CONTRACTS: {il_contracts == current_output}")
|
|
print(f"Current # contract {len(current_output)} | ILE Processed Contracts: {len(il_contracts)}")
|
|
print(f"# Extra contracts included: {len(extra_contracts)} | # Contracts not included: {len(not_included)}") |