parent
df96574a98
commit
d01fdde480
@ -0,0 +1,112 @@ |
|||||||
|
|
||||||
|
|
||||||
|
DAILY.MANUAL.INVOICE |
||||||
|
CONTRACTS THAT WERE NOT INVOICED |
||||||
|
PAGE 04-26-22 1 |
||||||
|
|
||||||
|
CHRG BUSINESS |
||||||
|
CONTRACT.NO........ UATB.OIC.DUE RENTAL DUE......... UATB.IDS.OIC.PAYME TYPEM..... OUTSTANDING BALANCE.... SEGMENT. BOOKING.DATE BRANCH |
||||||
|
|
||||||
|
100-2453558-003 05/09/2022 183.71 0.00 MISC 201.16 001.000 03/09/2018 9 |
||||||
|
100-2453558-003 04/09/2022 183.71 180.67 MISC 20.49 001.000 03/09/2018 9 |
||||||
|
100-2453558-003 03/09/2022 183.71 183.71 MISC 17.45 001.000 03/09/2018 9 |
||||||
|
100-1665517-003 05/15/2022 412.97 0.00 MISC 438.78 001.000 10/31/2014 9 |
||||||
|
100-4850431-001 05/10/2022 411.80 0.00 MISC 441.21 001.000 12/10/2018 3 |
||||||
|
100-4462739-001 04/18/2022 157.08 0.00 RENT 157.08 001.000 06/18/2018 9 |
||||||
|
100-4850431-001 04/10/2022 411.80 0.00 MISC 441.21 001.000 12/10/2018 3 |
||||||
|
100-3500858-001 05/12/2022 262.37 0.00 MISC 279.42 001.000 04/13/2016 9 |
||||||
|
100-3725849-003 05/13/2022 559.32 0.00 MISC 612.45 001.000 10/19/2017 9 |
||||||
|
100-3500858-001 04/12/2022 262.37 0.00 MISC 279.42 001.000 04/13/2016 9 |
||||||
|
047-2580598-001 04/05/2022 0.00 0.00 MISC 72.53 001.000 03/06/2014 9 |
||||||
|
100-3725849-003 03/13/2022 559.32 559.32 MISC 53.13 001.000 10/19/2017 9 |
||||||
|
100-4566489-001 04/14/2022 0.00 354.04 MISC 2.25 001.000 06/14/2018 9 |
||||||
|
100-4566489-001 05/14/2022 0.00 328.00 MISC 28.29 001.000 06/14/2018 9 |
||||||
|
100-5382471-001 04/15/2022 1,128.00 0.00 MISC 1,228.11 001.000 10/09/2019 9 |
||||||
|
100-5382471-001 05/15/2022 1,128.00 0.00 MISC 1,228.11 001.000 10/09/2019 9 |
||||||
|
100-9723689-001 04/20/2022 0.00 0.00 RENT 571.58 001.000 04/20/2022 10 |
||||||
|
100-9723689-001 04/20/2022 0.00 0.00 RENT 571.58 001.000 04/20/2022 10 |
||||||
|
100-7219911-001 04/20/2022 0.00 0.00 RENT 813.08 001.000 04/20/2022 9 |
||||||
|
100-1354567-002 05/25/2022 170.00 0.00 RENT 170.00 001.000 05/31/2016 9 |
||||||
|
100-6651721-001 07/14/2021 0.00 761.00 MISC 53.27 001.000 07/14/2021 9 |
||||||
|
100-2081987-008 05/25/2022 407.15 0.00 MISC 439.73 001.000 10/02/2017 9 |
||||||
|
100-2139037-002 05/20/2022 105.00 0.00 MISC 111.67 001.000 03/20/2017 9 |
||||||
|
100-3725849-003 04/13/2022 559.32 440.68 MISC 171.77 001.000 10/19/2017 9 |
||||||
|
100-3344078-002 05/01/2022 -28.53 18.47 RENT 63.00 001.000 10/01/2020 9 |
||||||
|
100-2081987-008 03/25/2022 407.15 0.00 MISC 439.73 001.000 10/02/2017 9 |
||||||
|
100-1354567-002 04/25/2022 170.00 0.00 RENT 170.00 001.000 05/31/2016 9 |
||||||
|
100-2081987-008 04/25/2022 407.15 0.00 MISC 439.73 001.000 10/02/2017 9 |
||||||
|
100-2081987-008 02/25/2022 407.15 407.15 MISC 32.58 001.000 10/02/2017 9 |
||||||
|
100-3876959-007 04/21/2022 61.07 0.00 RENT 61.07 001.000 06/21/2018 9 |
||||||
|
100-1637209-005 05/20/2022 2,023.20 0.00 RENT 2,023.20 001.000 04/25/2022 9 |
||||||
|
100-7146771-001 04/20/2022 183.28 167.00 RENT 16.28 001.000 04/25/2022 3 |
||||||
|
100-7146771-001 05/20/2022 183.28 0.00 RENT 183.28 001.000 04/25/2022 3 |
||||||
|
100-7045691-001 05/20/2022 244.57 0.00 RENT 244.57 001.000 04/25/2022 3 |
||||||
|
100-7059671-001 05/20/2022 60.00 0.00 MISC 64.20 001.000 04/25/2022 3 |
||||||
|
100-7237601-001 04/20/2022 0.00 0.00 RENT 34,192.91 001.000 04/25/2022 3 |
||||||
|
100-7242461-001 05/20/2022 57.00 0.00 MISC 60.99 001.000 04/25/2022 9 |
||||||
|
100-7178461-001 05/20/2022 197.45 0.00 MISC 209.30 001.000 04/25/2022 3 |
||||||
|
100-2611389-007 05/20/2022 171.76 0.00 RENT 171.76 001.000 04/25/2022 3 |
||||||
|
100-7037791-001 05/01/2022 444.00 0.00 MISC 478.41 001.000 04/25/2022 9 |
||||||
|
100-7203371-001 05/20/2022 1,566.40 0.00 RENT 1,566.40 001.000 04/25/2022 3 |
||||||
|
100-6630017-005 05/01/2022 0.00 178.55 MISC 0.01 001.000 04/25/2022 3 |
||||||
|
100-6738611-001 04/20/2022 0.00 0.00 RENT 4,545.94 001.000 04/25/2022 3 |
||||||
|
100-6738611-001 04/25/2022 0.00 0.00 RENT 4,545.94 001.000 04/25/2022 3 |
||||||
|
100-7052571-001 05/14/2022 255.87 0.00 MISC 278.90 001.000 04/25/2022 9 |
||||||
|
100-1011756-004 05/20/2022 1,001.64 0.00 MISC 1,081.77 001.000 04/25/2022 9 |
||||||
|
100-6849836-001 05/20/2022 1,077.47 0.00 RENT 1,077.47 001.000 04/25/2022 3 |
||||||
|
100-3492758-003 05/15/2022 312.41 0.00 RENT 312.41 001.000 04/25/2022 9 |
||||||
|
100-7156851-001 05/20/2022 150.00 0.00 MISC 159.00 001.000 04/25/2022 3 |
||||||
|
100-7232561-001 05/20/2022 113.60 0.00 MISC 122.12 001.000 04/25/2022 9 |
||||||
|
100-3876959-007 05/21/2022 61.07 0.00 RENT 61.07 001.000 06/21/2018 9 |
||||||
|
100-5382931-003 05/20/2022 146.69 0.00 RENT 146.69 001.000 04/26/2022 3 |
||||||
|
100-5722341-003 05/20/2022 170.00 0.00 MISC 181.90 001.000 04/26/2022 3 |
||||||
|
100-7150721-001 04/20/2022 174.96 0.00 RENT 174.96 001.000 04/26/2022 3 |
||||||
|
100-7150721-001 05/20/2022 174.96 0.00 RENT 174.96 001.000 04/26/2022 3 |
||||||
|
100-7165521-001 05/20/2022 1,417.88 0.00 RENT 1,417.88 001.000 04/26/2022 3 |
||||||
|
100-7227921-001 05/20/2022 64.00 0.00 MISC 69.28 001.000 04/26/2022 3 |
||||||
|
100-4858739-002 05/15/2022 208.00 0.00 MISC 225.16 001.000 04/26/2022 3 |
||||||
|
100-7100621-001 05/13/2022 880.10 0.00 MISC 954.90 001.000 04/26/2022 9 |
||||||
|
100-9725556-001 04/25/2022 0.00 0.00 RENT 600.77 001.000 04/26/2022 10 |
||||||
|
100-9725556-001 04/26/2022 0.00 0.00 RENT 600.77 001.000 04/26/2022 10 |
||||||
|
100-7209051-001 05/20/2022 1,652.01 0.00 RENT 1,652.01 001.000 04/26/2022 3 |
||||||
|
100-9660710-001 05/09/2022 174.75 0.00 RENT 174.75 001.000 06/09/2021 10 |
||||||
|
100-5329301-002 04/20/2022 0.00 0.00 RENT 263.44 001.000 04/26/2022 3 |
||||||
|
100-7087121-001 05/16/2022 3,294.46 1,125.58 RENT 3,294.46 001.000 02/16/2022 12 |
||||||
|
100-6602681-003 04/25/2022 0.00 0.00 RENT 478.00 001.000 04/26/2022 3 |
||||||
|
100-6602681-003 04/25/2022 0.00 0.00 RENT 478.00 001.000 04/26/2022 3 |
||||||
|
100-6754131-001 05/20/2022 747.75 0.00 RENT 747.75 001.000 04/26/2022 3 |
||||||
|
100-7214111-001 05/21/2022 542.97 0.00 RENT 542.97 001.000 04/26/2022 9 |
||||||
|
101-6898811-001 04/20/2022 0.00 0.00 RENT 15,035.55 001.000 04/26/2022 3 |
||||||
|
100-2406418-003 05/20/2022 200.00 0.00 MISC 219.00 001.000 04/26/2022 9 |
||||||
|
100-6943901-002 05/16/2022 236.40 0.00 MISC 257.67 001.000 04/26/2022 9 |
||||||
|
100-1623380-901 05/15/2022 1,769.11 0.00 RENT 1,769.11 001.000 04/26/2022 10 |
||||||
|
100-7107941-001 05/20/2022 1,038.95 0.00 RENT 1,038.95 001.000 02/23/2022 3 |
||||||
|
100-7031531-001 05/20/2022 120.00 0.00 MISC 130.92 001.000 04/26/2022 3 |
||||||
|
100-3630389-005 05/20/2022 168.00 0.00 MISC 181.86 001.000 04/26/2022 3 |
||||||
|
100-7174941-002 05/20/2022 1,667.38 0.00 MISC 1,804.93 001.000 04/26/2022 9 |
||||||
|
100-5204521-002 05/25/2022 3,222.20 0.00 RENT 3,222.20 001.000 04/26/2022 12 |
||||||
|
100-7241571-001 05/20/2022 55.00 0.00 MISC 59.54 001.000 04/26/2022 3 |
||||||
|
100-7182731-001 04/20/2022 0.00 0.00 RENT 1,025.37 001.000 04/26/2022 3 |
||||||
|
100-7182731-001 04/26/2022 0.00 0.00 RENT 1,025.37 001.000 04/26/2022 3 |
||||||
|
100-9726258-001 04/25/2022 0.00 0.00 RENT 255.97 001.000 04/26/2022 10 |
||||||
|
100-9726258-001 04/26/2022 0.00 0.00 RENT 255.97 001.000 04/26/2022 10 |
||||||
|
100-7220301-001 04/20/2022 0.00 0.00 RENT 1,238.00 001.000 04/26/2022 3 |
||||||
|
100-7151521-001 05/15/2022 94.00 0.00 MISC 102.46 001.000 04/26/2022 9 |
||||||
|
100-7237751-001 05/25/2022 2.00 101.65 MISC 2.14 001.000 04/26/2022 9 |
||||||
|
100-3876959-005 03/25/2022 0.00 0.00 RENT 60.74 001.000 09/27/2017 9 |
||||||
|
100-3910629-001 03/25/2022 0.00 0.00 RENT 245.81 001.000 03/30/2017 9 |
||||||
|
100-3876959-005 04/25/2022 0.00 0.00 RENT 60.74 001.000 09/27/2017 9 |
||||||
|
100-3910629-001 04/25/2022 0.00 0.00 RENT 245.81 001.000 03/30/2017 9 |
||||||
|
104-4687809-001 04/25/2022 -2,161.94 140.00 MISC 9.80 001.000 08/29/2018 9 |
||||||
|
100-3964329-001 04/28/2022 318.13 0.00 MISC 340.40 001.000 03/31/2017 9 |
||||||
|
100-3964329-001 03/28/2022 318.13 0.00 MISC 340.40 001.000 03/31/2017 9 |
||||||
|
100-1670517-003 04/16/2022 0.00 0.00 RENT 165.00 001.000 09/16/2021 3 |
||||||
|
100-4945021-001 05/15/2022 0.00 0.00 RENT 1,357.77 001.000 02/15/2019 3 |
||||||
|
100-3694757-001 05/01/2022 298.00 0.00 MISC 324.45 001.000 09/02/2016 9 |
||||||
|
100-3694757-001 04/01/2022 298.00 0.00 MISC 324.45 001.000 09/02/2016 9 |
||||||
|
100-6651721-002 07/25/2021 0.00 761.00 MISC 53.27 001.000 07/27/2021 9 |
||||||
|
100-6814061-001 04/06/2022 0.00 169.00 RENT 15.63 001.000 04/06/2022 3 |
||||||
|
100-7170651-001 04/07/2022 0.00 99.00 RENT 9.50 001.000 04/07/2022 3 |
||||||
|
100-2446458-002 04/06/2022 865.00 859.26 MISC 66.29 001.000 12/06/2016 9 |
||||||
|
100-2446458-002 05/06/2022 865.00 0.00 MISC 925.55 001.000 12/06/2016 9 |
||||||
|
102 records listed |
||||||
@ -0,0 +1,142 @@ |
|||||||
|
import os |
||||||
|
import pandas as pd |
||||||
|
from datetime import datetime as dt, timedelta |
||||||
|
import re |
||||||
|
from pathlib import Path |
||||||
|
import time |
||||||
|
import numpy as np |
||||||
|
from pprint import pprint as prt |
||||||
|
|
||||||
|
|
||||||
|
def pfd(df: pd.DataFrame): |
||||||
|
with pd.option_context('display.max_rows', None, 'display.max_columns', None): # more options can be specified also |
||||||
|
print(df) |
||||||
|
|
||||||
|
|
||||||
|
def create_line_divider(breakage_list: list): |
||||||
|
""" |
||||||
|
This allows for the creation of a custom data extractor |
||||||
|
Breakage list defines the split points that will be used for the line |
||||||
|
Example |
||||||
|
Given breakage_list [10, 20, 30] |
||||||
|
using slot_num 0 in the resulting extract_line_slot will yield |
||||||
|
characters 0 - 10 from the string. |
||||||
|
Slot 1 would give characters 10 - 20 |
||||||
|
""" |
||||||
|
def extract_line_slot(slot_num : int, line_string: str, debug : bool = False): |
||||||
|
""" |
||||||
|
Pulls data from a line/string using break points defined by the |
||||||
|
parent function. |
||||||
|
ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor' |
||||||
|
Will automatically convert numbers to floats |
||||||
|
""" |
||||||
|
# We can't have a slot number higher than the number of slots |
||||||
|
assert(slot_num < len(breakage_list)+1) |
||||||
|
low_range = 0 if slot_num == 0 else breakage_list[slot_num-1] |
||||||
|
high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num] |
||||||
|
# In order to create a float we need to remove the , from the string |
||||||
|
data = line_string[low_range:high_range].strip().replace(",", "") |
||||||
|
try: data = float(data) |
||||||
|
except: pass |
||||||
|
if debug: |
||||||
|
print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}") |
||||||
|
return data |
||||||
|
return extract_line_slot |
||||||
|
|
||||||
|
|
||||||
|
def minv(report: str, save_name: str): |
||||||
|
lines = report.splitlines() |
||||||
|
data_extractor = create_line_divider([15,32,52,71,83,107,116,128]) |
||||||
|
extracted_data_dict = { |
||||||
|
"ContractNumber" : [], |
||||||
|
"UTAB_OIC_DUE" : [], |
||||||
|
"RentalDue" : [], |
||||||
|
"UTAB_OIC_PYMT" : [], |
||||||
|
"ChargeType" : [], |
||||||
|
"OutstandBalance" : [], |
||||||
|
"BizSegment" : [], |
||||||
|
"BookingDate" : [], |
||||||
|
"Branch" : [], |
||||||
|
} |
||||||
|
columns = list(extracted_data_dict.keys()) |
||||||
|
for line in enumerate(lines): |
||||||
|
if re.search(contract_number_regex, line[1]) != None: |
||||||
|
[extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(0,len(columns))] |
||||||
|
#All the list lengths need to be the same so if anything was missed it will fail to build |
||||||
|
dataframe = pd.DataFrame(extracted_data_dict) |
||||||
|
# ( bookdate != today & rent = 0 ) OR (outstanding > 100 & rent = 0) |
||||||
|
# dt.today().strftime("%m/%m/%Y") |
||||||
|
filtered = dataframe[ |
||||||
|
((dataframe["BookingDate"] != '04/26/2022') & (dataframe["RentalDue"] == 0)) |\ |
||||||
|
((dataframe["RentalDue"] == 0 ) & (dataframe["OutstandBalance"] > 100))] |
||||||
|
filtered.to_excel(save_name, index=False) |
||||||
|
return filtered |
||||||
|
|
||||||
|
current_output = [ |
||||||
|
'100-1011756-004', |
||||||
|
'100-1354567-002', |
||||||
|
'100-1637209-005', |
||||||
|
'100-1665517-003', |
||||||
|
'100-1670517-003', |
||||||
|
'100-2081987-008', |
||||||
|
'100-2139037-002', |
||||||
|
'100-2446458-002', |
||||||
|
'100-2453558-003', |
||||||
|
'100-2611389-007', |
||||||
|
'100-3492758-003', |
||||||
|
'100-3500858-001', |
||||||
|
'100-3694757-001', |
||||||
|
'100-3725849-003', |
||||||
|
'100-3876959-007', |
||||||
|
'100-3910629-001', |
||||||
|
'100-3964329-001', |
||||||
|
'100-4462739-001', |
||||||
|
'100-4850431-001', |
||||||
|
'100-4945021-001', |
||||||
|
'100-5382471-001', |
||||||
|
'100-6738611-001', |
||||||
|
'100-6849836-001', |
||||||
|
'100-7037791-001', |
||||||
|
'100-7045691-001', |
||||||
|
'100-7052571-001', |
||||||
|
'100-7059671-001', |
||||||
|
'100-7087121-001', |
||||||
|
'100-7107941-001', |
||||||
|
'100-7146771-001', |
||||||
|
'100-7156851-001', |
||||||
|
'100-7178461-001', |
||||||
|
'100-7203371-001', |
||||||
|
'100-7219911-001', |
||||||
|
'100-7232561-001', |
||||||
|
'100-7237601-001', |
||||||
|
'100-7242461-001', |
||||||
|
'100-9660710-001', |
||||||
|
'100-9723689-001', |
||||||
|
] |
||||||
|
|
||||||
|
contract_number_regex = "\d{3}-\d{7}-\d{3}" |
||||||
|
|
||||||
|
with open("2022.05.04_MINV_C", errors="replace") as ifile: |
||||||
|
report = ifile.read() |
||||||
|
|
||||||
|
fin_df = minv(report, "man_inv_test.xlsx") |
||||||
|
pfd(fin_df) |
||||||
|
il_contracts = fin_df.ContractNumber.to_list() |
||||||
|
prt(il_contracts) |
||||||
|
|
||||||
|
extra_contracts = [] |
||||||
|
not_included = [] |
||||||
|
for c in il_contracts: |
||||||
|
if c not in current_output: |
||||||
|
extra_contracts.append(c) |
||||||
|
for c in current_output: |
||||||
|
if c not in il_contracts: |
||||||
|
not_included.append(c) |
||||||
|
|
||||||
|
print("\nExtra Contracts:") |
||||||
|
prt(extra_contracts) |
||||||
|
print("Not Included Contracts:") |
||||||
|
prt(not_included) |
||||||
|
print(f"MATCHING CONTRACTS: {il_contracts == current_output}") |
||||||
|
print(f"Current # contract {len(current_output)} | ILE Processed Contracts: {len(il_contracts)}") |
||||||
|
print(f"# Extra contracts included: {len(extra_contracts)} | # Contracts not included: {len(not_included)}") |
||||||
@ -0,0 +1,110 @@ |
|||||||
|
import os |
||||||
|
import pandas as pd |
||||||
|
from datetime import datetime as dt, timedelta |
||||||
|
import sys, getopt |
||||||
|
import re |
||||||
|
from pathlib import Path |
||||||
|
import time |
||||||
|
import numpy as np |
||||||
|
|
||||||
|
|
||||||
|
contract_number_regex = "\d{3}-\d{7}-\d{3}" |
||||||
|
|
||||||
|
|
||||||
|
def create_line_divider(breakage_list: list): |
||||||
|
""" |
||||||
|
This allows for the creation of a custom data extractor |
||||||
|
Breakage list defines the split points that will be used for the line |
||||||
|
Example |
||||||
|
Given breakage_list [10, 20, 30] |
||||||
|
using slot_num 0 in the resulting extract_line_slot will yield |
||||||
|
characters 0 - 10 from the string. |
||||||
|
Slot 1 would give characters 10 - 20 |
||||||
|
""" |
||||||
|
def extract_line_slot(slot_num : int, line_string: str, debug : bool = False): |
||||||
|
""" |
||||||
|
Pulls data from a line/string using break points defined by the |
||||||
|
parent function. |
||||||
|
ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor' |
||||||
|
Will automatically convert numbers to floats |
||||||
|
""" |
||||||
|
# We can't have a slot number higher than the number of slots |
||||||
|
assert(slot_num < len(breakage_list)+1) |
||||||
|
low_range = 0 if slot_num == 0 else breakage_list[slot_num-1] |
||||||
|
high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num] |
||||||
|
# In order to create a float we need to remove the , from the string |
||||||
|
data = line_string[low_range:high_range].strip().replace(",", "") |
||||||
|
try: data = float(data) |
||||||
|
except: pass |
||||||
|
if debug: |
||||||
|
print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}") |
||||||
|
return data |
||||||
|
return extract_line_slot |
||||||
|
|
||||||
|
def ach(report: str, save_name: str): |
||||||
|
lines = report.splitlines() |
||||||
|
extracted_data_dict = { |
||||||
|
"ContractNumber" : [], |
||||||
|
"CustomerName" : [], |
||||||
|
"BankCode" : [], |
||||||
|
"BankNumber": [], |
||||||
|
"AccountNumber" : [], |
||||||
|
"Payment" : [], |
||||||
|
"Batch": [], |
||||||
|
"Lessor": [], |
||||||
|
"PaymentDate": [], |
||||||
|
} |
||||||
|
columns = list(extracted_data_dict.keys()) |
||||||
|
batches = { |
||||||
|
"batch_num": [], |
||||||
|
"payment_date": [], |
||||||
|
"lessor": [], |
||||||
|
#"count": [], |
||||||
|
"total": [] |
||||||
|
} |
||||||
|
|
||||||
|
data_extractor = create_line_divider([19,57,67,82,104]) |
||||||
|
bank_number_regex = "\d{9}" |
||||||
|
batch_num_regex = "BATCH \d{4} TOTAL" |
||||||
|
for line in enumerate(lines): |
||||||
|
# Check for a contract number and a bank number in the line |
||||||
|
if (re.search(contract_number_regex, line[1]) != None) & (re.search(bank_number_regex, line[1]) != None): |
||||||
|
# Iterates through the columns list and adds the corresponding slot number to the dictonary for the column |
||||||
|
# Here the order of the columns (keys in dictonary) matter since they need to be in the same order as |
||||||
|
# the slot numbers |
||||||
|
[extracted_data_dict[columns[c]].append(data_extractor(c, line[1])) for c in range(0, len(columns)-3)] |
||||||
|
# This searches for a statement that looks like a batch number |
||||||
|
# This sums the contracts by thier lessor code. A feature requested by cash apps |
||||||
|
if re.search(batch_num_regex, line[1]) != None: |
||||||
|
# Batch number is always in characters 96 to 101 |
||||||
|
batches["batch_num"].append(line[1][96:101]) |
||||||
|
# Payment date will be 2 lines below that between charactes 114 and 125 |
||||||
|
batches["payment_date"].append(lines[line[0]+2][114:125]) |
||||||
|
# Lessor is just the first three number sof the contract number |
||||||
|
batches["lessor"].append(extracted_data_dict["ContractNumber"][-1][0:3]) |
||||||
|
# Total is a number given by the report for that batch. ',' is removed so that it can be transformed into a float |
||||||
|
batches["total"].append(float(line[1][107:125].strip().replace(",", ""))) |
||||||
|
#print(f"{line[0]+6} | {lines[line[0]+6][107:125]}\n{lines[line[0]+6]}") |
||||||
|
#batches["count"].append(float(lines[line[0]+6][107:125].strip().replace(",", ""))) |
||||||
|
# Any time there's a new batch we need to add this data to the dictionary up up to the currrent place |
||||||
|
# So we iterate over the number of contracts and add in the newest value for each that don't have one of these values already |
||||||
|
[extracted_data_dict["Batch"].append(batches["batch_num"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["Batch"])))] |
||||||
|
[extracted_data_dict["Lessor"].append(batches["lessor"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["Lessor"])))] |
||||||
|
[extracted_data_dict["PaymentDate"].append(batches["payment_date"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["PaymentDate"])))] |
||||||
|
# Now the dictioanry lists should all be equal lengths and we can create a dataframe |
||||||
|
dataframe = pd.DataFrame(extracted_data_dict) |
||||||
|
# We're creating two sheets: data & summary so we need to open and excel writer |
||||||
|
# This also helps with a bug caused by larger dataframes |
||||||
|
with pd.ExcelWriter(save_name) as writer: |
||||||
|
dataframe.to_excel(writer, index=False, sheet_name="data") |
||||||
|
# The batches dictioanry is converted to a dataframe and added as it's own sheet |
||||||
|
pd.DataFrame(batches).to_excel(writer, index=False, sheet_name="Summary") |
||||||
|
return dataframe |
||||||
|
|
||||||
|
r1 = "/config/workspace/LEAF/IL Extract SRC/ach_errors/2022.05.27_ACH_C" |
||||||
|
r2 = "/config/workspace/LEAF/IL Extract SRC/ach_errors/2022.06.03_ACH_C" |
||||||
|
|
||||||
|
with open(r2, errors="replace") as ifile: |
||||||
|
report = ifile.read() |
||||||
|
|
||||||
|
ach(report, "test_ach_0613.xlsx") |
||||||
@ -0,0 +1,168 @@ |
|||||||
|
import os |
||||||
|
import pandas as pd |
||||||
|
from datetime import datetime as dt, timedelta |
||||||
|
import sys, getopt |
||||||
|
import re |
||||||
|
from pathlib import Path |
||||||
|
import time |
||||||
|
import numpy as np |
||||||
|
from pprint import pprint as prt |
||||||
|
|
||||||
|
|
||||||
|
contract_number_regex = "\d{3}-\d{7}-\d{3}" |
||||||
|
|
||||||
|
def dict_lens(dictionary): |
||||||
|
columns = list(dictionary.keys()) |
||||||
|
for c in columns: |
||||||
|
print(f"{c} : {len(dictionary[c])}") |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def create_line_divider(breakage_list: list): |
||||||
|
""" |
||||||
|
This allows for the creation of a custom data extractor |
||||||
|
Breakage list defines the split points that will be used for the line |
||||||
|
Example |
||||||
|
Given breakage_list [10, 20, 30] |
||||||
|
using slot_num 0 in the resulting extract_line_slot will yield |
||||||
|
characters 0 - 10 from the string. |
||||||
|
Slot 1 would give characters 10 - 20 |
||||||
|
""" |
||||||
|
def extract_line_slot(slot_num : int, line_string: str, debug : bool = False): |
||||||
|
""" |
||||||
|
Pulls data from a line/string using break points defined by the |
||||||
|
parent function. |
||||||
|
ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor' |
||||||
|
Will automatically convert numbers to floats |
||||||
|
""" |
||||||
|
# We can't have a slot number higher than the number of slots |
||||||
|
assert(slot_num < len(breakage_list)+1) |
||||||
|
low_range = 0 if slot_num == 0 else breakage_list[slot_num-1] |
||||||
|
high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num] |
||||||
|
# In order to create a float we need to remove the , from the string |
||||||
|
data = line_string[low_range:high_range].strip().replace(",", "") |
||||||
|
try: data = float(data) |
||||||
|
except: pass |
||||||
|
if debug: |
||||||
|
print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}") |
||||||
|
return data |
||||||
|
return extract_line_slot |
||||||
|
|
||||||
|
def lockbox(report: str, save_name: str): |
||||||
|
lines = report.splitlines() |
||||||
|
extracted_data_dict = { |
||||||
|
"CustomerName" : [], |
||||||
|
"PaymentDate" : [], |
||||||
|
"InvoiceNumber" : [], |
||||||
|
"CheckNumber" : [], |
||||||
|
"InvoicePayment" : [], |
||||||
|
"ContractNumber" : [], |
||||||
|
"ContractPayment" : [], |
||||||
|
} |
||||||
|
# These are lists of the dictionary columns/keys and the data slots in which |
||||||
|
# that data can be found in the report. this way we can iterate through them |
||||||
|
# While extracting data |
||||||
|
bank_payment_records = [list(extracted_data_dict.keys())[1:5],[1,2,3,4]] |
||||||
|
infolease_payment_records = [list(extracted_data_dict.keys())[5:],[7,8]] |
||||||
|
|
||||||
|
# Below are the Regular Exppressions used to find relvant data lines |
||||||
|
full_line = "\d*\s{5}\d{2}/\d{2}/\d{4}\s{4}1" |
||||||
|
contract_only_line = "\s{90}\d.{7}1\d{2}-" |
||||||
|
cust_name_line = "\s{98}.{28}\D*" |
||||||
|
# The data extractor allows us to extract data from the report using slots |
||||||
|
# Slots are ranges of character denote by the list feed into the creation function |
||||||
|
data_extractor = create_line_divider([9,19,39,56,69,90,98,118]) |
||||||
|
for line in enumerate(lines): |
||||||
|
# We can skip empty lines |
||||||
|
if len(line[1]) == 0: continue |
||||||
|
# First we should check if there is a full line of data (defined by regex) |
||||||
|
if re.search(full_line, line[1]): |
||||||
|
# If this is true then we can iterate through the lists we created earlier and append the data to our dict |
||||||
|
for k in range(0,len(bank_payment_records[0])): |
||||||
|
extracted_data_dict[bank_payment_records[0][k]].append(data_extractor(bank_payment_records[1][k],line[1])) |
||||||
|
for k in range(0,len(infolease_payment_records[0])): |
||||||
|
extracted_data_dict[infolease_payment_records[0][k]].append(data_extractor(infolease_payment_records[1][k],line[1])) |
||||||
|
# Otherwise we should check if this is a line with only contract data |
||||||
|
elif re.search(contract_only_line,line[1]): |
||||||
|
# If that's the case we can use the 'bank payment data' from the previous entry since it should apply to his contract |
||||||
|
for k in range(0,len(bank_payment_records[0])): |
||||||
|
extracted_data_dict[bank_payment_records[0][k]].append(extracted_data_dict[bank_payment_records[0][k]][-1]) |
||||||
|
for k in range(0,len(infolease_payment_records[0])): |
||||||
|
extracted_data_dict[infolease_payment_records[0][k]].append(data_extractor(infolease_payment_records[1][k],line[1])) |
||||||
|
# If it doesn't hit either of these critera then continue since it's irelevant data |
||||||
|
else: continue |
||||||
|
i = 1 |
||||||
|
# used to track how many lines below the current line we're looking for the customer name |
||||||
|
# keep moving down a line and checking for a customer name |
||||||
|
# Customer name typically happens 1 line under data but can be 13 lines if cut off by page end |
||||||
|
while re.search(cust_name_line,lines[line[0]+i]) == None: |
||||||
|
i += 1 |
||||||
|
# Once it hits, add the name to the dict |
||||||
|
extracted_data_dict["CustomerName"].append(data_extractor(7,lines[line[0]+i])) |
||||||
|
dataframe = pd.DataFrame(extracted_data_dict) |
||||||
|
dataframe.to_excel(save_name, index=False) |
||||||
|
return dataframe |
||||||
|
|
||||||
|
|
||||||
|
def lb2(report:str, save_name:str): |
||||||
|
lines = report.splitlines() |
||||||
|
extracted_data_dict = { |
||||||
|
"SEQ" : [], |
||||||
|
"PYMT DATE" : [], |
||||||
|
"INV NUM" : [], |
||||||
|
"CHECK NUMBER" : [], |
||||||
|
"PAYMENT AMOUNT" : [], |
||||||
|
"NOTE" : [], |
||||||
|
"IL SEQ" : [], |
||||||
|
"CONTRACT NUM" : [], |
||||||
|
"IL PAYMENT AMOUNT" : [], |
||||||
|
"CUST NAME" : [], |
||||||
|
} |
||||||
|
columns = list(extracted_data_dict.keys()) |
||||||
|
data_extractor = create_line_divider([9,19,39,56,69,89,98,118]) |
||||||
|
for line in enumerate(lines): |
||||||
|
match = False |
||||||
|
# Try to find the first SEQ # & a contract payment date e.i. ' 197 05/10/2022' |
||||||
|
if re.match("(\s|\d){3}\d{1}\s{5}\d{2}/\d{2}/\d{4}", line[1]): |
||||||
|
match = True |
||||||
|
# Add all of the data points except customer name |
||||||
|
[extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(0,len(columns)-1)] |
||||||
|
# Check to see if this line contains only an infolease payment |
||||||
|
# Some times there are multiple infolease payments for a single bank record |
||||||
|
elif re.search(contract_number_regex, line[1]) != None: |
||||||
|
match = True |
||||||
|
# If there is then we can add the same data as the previous complete line |
||||||
|
[extracted_data_dict[columns[c]].append(extracted_data_dict[columns[c]][-1]) for c in range(0,6)] |
||||||
|
# Then add the new data for the infolease contract |
||||||
|
[extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(6,len(columns)-1)] |
||||||
|
# If we had a match we need a customer name to associate with it |
||||||
|
# Sometimes these can appear on the next page hense the while loop searching for a match |
||||||
|
if match: |
||||||
|
# We can tell the cust name will be on the next page if the word "PAGE" appears three lines under the current line |
||||||
|
# And the next line is blank |
||||||
|
if (lines[line[0]+1].strip() == "") & (lines[line[0]+3].find("PAGE") != -1): |
||||||
|
i = 0 |
||||||
|
# Look for a bunch of whitespace then some writing |
||||||
|
while not re.match("\s{98}.{34}", lines[line[0]+i]): |
||||||
|
i +=1 |
||||||
|
# Once we find it add the cust name to the dict (it's the only thing on the line) |
||||||
|
extracted_data_dict["CUST NAME"].append(lines[line[0]+i].strip()) |
||||||
|
# if the condition above isnt met then the cust name is on the next line (even if that line is blank) |
||||||
|
else: |
||||||
|
extracted_data_dict["CUST NAME"].append(lines[line[0]+1].strip()) |
||||||
|
dataframe = pd.DataFrame(extracted_data_dict) |
||||||
|
dataframe.to_excel(save_name, index=False) |
||||||
|
return dataframe |
||||||
|
|
||||||
|
|
||||||
|
r1 = "/config/workspace/LEAF/IL Extract SRC/lb_errors/2022.05.10_LOCKBOX_094_C" |
||||||
|
r2 = "/config/workspace/LEAF/IL Extract SRC/lb_errors/2022.05.11_LOCKBOX_094_C" |
||||||
|
|
||||||
|
with open(r1, errors="replace") as ifile: |
||||||
|
report = ifile.read() |
||||||
|
|
||||||
|
lb2(report, "test_lb_0510.xlsx") |
||||||
|
|
||||||
|
with open(r2, errors="replace") as ifile: |
||||||
|
report = ifile.read() |
||||||
|
lb2(report, "test_lb_0511.xlsx") |
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in new issue