A PyQT GUI application for converting InfoLease report outputs into Excel files. Handles parsing and summarizing. Learns where files are meant to be store and compiles monthly and yearly summaries.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
InfoLeaseExtract/ManualInvoice.py

142 lines
4.6 KiB

import os
import pandas as pd
from datetime import datetime as dt, timedelta
import re
from pathlib import Path
import time
import numpy as np
from pprint import pprint as prt
def pfd(df: pd.DataFrame):
with pd.option_context('display.max_rows', None, 'display.max_columns', None): # more options can be specified also
print(df)
def create_line_divider(breakage_list: list):
"""
This allows for the creation of a custom data extractor
Breakage list defines the split points that will be used for the line
Example
Given breakage_list [10, 20, 30]
using slot_num 0 in the resulting extract_line_slot will yield
characters 0 - 10 from the string.
Slot 1 would give characters 10 - 20
"""
def extract_line_slot(slot_num : int, line_string: str, debug : bool = False):
"""
Pulls data from a line/string using break points defined by the
parent function.
ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor'
Will automatically convert numbers to floats
"""
# We can't have a slot number higher than the number of slots
assert(slot_num < len(breakage_list)+1)
low_range = 0 if slot_num == 0 else breakage_list[slot_num-1]
high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num]
# In order to create a float we need to remove the , from the string
data = line_string[low_range:high_range].strip().replace(",", "")
try: data = float(data)
except: pass
if debug:
print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}")
return data
return extract_line_slot
def minv(report: str, save_name: str):
lines = report.splitlines()
data_extractor = create_line_divider([15,32,52,71,83,107,116,128])
extracted_data_dict = {
"ContractNumber" : [],
"UTAB_OIC_DUE" : [],
"RentalDue" : [],
"UTAB_OIC_PYMT" : [],
"ChargeType" : [],
"OutstandBalance" : [],
"BizSegment" : [],
"BookingDate" : [],
"Branch" : [],
}
columns = list(extracted_data_dict.keys())
for line in enumerate(lines):
if re.search(contract_number_regex, line[1]) != None:
[extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(0,len(columns))]
#All the list lengths need to be the same so if anything was missed it will fail to build
dataframe = pd.DataFrame(extracted_data_dict)
# ( bookdate != today & rent = 0 ) OR (outstanding > 100 & rent = 0)
# dt.today().strftime("%m/%m/%Y")
filtered = dataframe[
((dataframe["BookingDate"] != '04/26/2022') & (dataframe["RentalDue"] == 0)) |\
((dataframe["RentalDue"] == 0 ) & (dataframe["OutstandBalance"] > 100))]
filtered.to_excel(save_name, index=False)
return filtered
current_output = [
'100-1011756-004',
'100-1354567-002',
'100-1637209-005',
'100-1665517-003',
'100-1670517-003',
'100-2081987-008',
'100-2139037-002',
'100-2446458-002',
'100-2453558-003',
'100-2611389-007',
'100-3492758-003',
'100-3500858-001',
'100-3694757-001',
'100-3725849-003',
'100-3876959-007',
'100-3910629-001',
'100-3964329-001',
'100-4462739-001',
'100-4850431-001',
'100-4945021-001',
'100-5382471-001',
'100-6738611-001',
'100-6849836-001',
'100-7037791-001',
'100-7045691-001',
'100-7052571-001',
'100-7059671-001',
'100-7087121-001',
'100-7107941-001',
'100-7146771-001',
'100-7156851-001',
'100-7178461-001',
'100-7203371-001',
'100-7219911-001',
'100-7232561-001',
'100-7237601-001',
'100-7242461-001',
'100-9660710-001',
'100-9723689-001',
]
contract_number_regex = "\d{3}-\d{7}-\d{3}"
with open("2022.05.04_MINV_C", errors="replace") as ifile:
report = ifile.read()
fin_df = minv(report, "man_inv_test.xlsx")
pfd(fin_df)
il_contracts = fin_df.ContractNumber.to_list()
prt(il_contracts)
extra_contracts = []
not_included = []
for c in il_contracts:
if c not in current_output:
extra_contracts.append(c)
for c in current_output:
if c not in il_contracts:
not_included.append(c)
print("\nExtra Contracts:")
prt(extra_contracts)
print("Not Included Contracts:")
prt(not_included)
print(f"MATCHING CONTRACTS: {il_contracts == current_output}")
print(f"Current # contract {len(current_output)} | ILE Processed Contracts: {len(il_contracts)}")
print(f"# Extra contracts included: {len(extra_contracts)} | # Contracts not included: {len(not_included)}")