Fixed incorrect columns, handled unexpected line gaps

4 years ago · 3baea9331e
parent d690c75399
commit 3baea9331e
2 changed files with 244 additions and 0 deletions
--- a/NI_sum.py
+++ b/NI_sum.py
@ -0,0 +1,152 @@
+import os
+import pandas as pd
+from datetime import datetime as dt, timedelta
+import sys, getopt
+import re
+from pathlib import Path
+import time
+from pprint import pprint as prt
+import numpy as np
+
+contract_number_regex = "\d{3}-\d{7}-\d{3}"
+
+def create_line_divider(breakage_list: list):
+    """
+    This allows for the creation of a custom data extractor
+    Breakage list defines the split points that will be used for the line
+    Example
+    Given breakage_list [10, 20, 30]
+    using slot_num 0 in the resulting extract_line_slot will yield
+    characters 0 - 10 from the string. 
+    Slot 1 would give characters 10 - 20
+    """
+    def extract_line_slot(slot_num : int, line_string: str, debug : bool = False):
+        """
+        Pulls data from a line/string using break points defined by the
+        parent function.
+        ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor'
+        Will automatically convert numbers to floats
+        """
+        assert(slot_num < len(breakage_list)+1)
+        low_range = 0 if slot_num == 0 else breakage_list[slot_num-1]
+        high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num]
+        data = line_string[low_range:high_range].strip().replace(",", "")
+        try: data = float(data)
+        except: pass
+        if debug:
+            print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}")
+        return data
+    return extract_line_slot
+
+
+def net_invest_trial_balance(report: str, save_name: str):
+    lines = report.splitlines()
+    extracted_data_dict = {
+        'CUSTOMER NAME' :  [],
+        'CURR INT RCVB' :  [],
+        'UNEARNED BLENDED' :  [],
+        'BLEND NET INV' :  [],
+        'LEASE NUMBER' :  [],
+        'GROSS CONTRACT' :  [],
+        'CURR RENT RCVB' :  [],
+        'UNEARN FIN' :  [],
+        'END DEPOSIT' :  [],
+        'SEC DEPOSIT' :  [],
+        'LEASE PYMTS' :  [],
+        'TOTAL' :  [],
+        'CONTRACT STAT' :  [],
+        'PAYMENTS RCVD' :  [],
+        'REM RENT RCVB' :  [],
+        'UNEARN RESID' :  [],
+        'PROV LOSS' :  [],
+        'NET RESERVE' :  [],
+        'UNEARN INC' :  [],
+        'BAL REMAINING' :  [],
+        'RESIDUAL' :  [],
+        'UNPAID INT' :  [],
+        'NET INV' :  [],
+        'UNEARNED IDC' :  [],
+        "LESSOR": []
+    }
+    lessors = []
+    columns = list(extracted_data_dict.keys())
+    line0 = list(zip(columns[0:4], [0,3,4,5]))
+    line1 = list(zip(columns[4:12], [i for i in range(0,8)]))
+    line2 = list(zip(columns[12:19], [i for i in range(0,7)]))
+    line3 = list(zip(columns[19:-1], [i for i in range(1,6)]))
+
+    for l in [line0,line1,line2,line3]:
+        print(f"\n{l}")
+
+    data_extractor = create_line_divider([18,32,50,66,84,100,117])
+    for line in enumerate(lines):
+        slot1 = data_extractor(0,line[1],False)
+        if type(slot1) != str : continue
+        if re.search(contract_number_regex, slot1) != None:
+                data_section = lines[line[0]-1:line[0]+3]
+
+                if data_section[0].find(".") == -1:
+                    data_section[0] = lines[line[0]-2]
+                for ds in enumerate(data_section):
+                    if ds[1].find(".") == -1:
+                        if ds[0] < len(data_section) -1:
+                            for i in range(ds[0], len(data_section)-1):
+                                #print(f"{i}: { data_section[i]}")
+                                data_section[i] = data_section[i+1]
+                                #print(f"DELTA| {i}: { data_section[i]}")
+                            data_section[3] = lines[line[0]+3]
+                        else:
+                            data_section[3] = lines[line[0]+3]
+                        
+                        
+                # [print(f"\n{d[0]}: {d[1]}") for d in enumerate(data_section)]
+                # print('\n')
+                [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[0], False)) for c in line0]
+                [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1], False)) for c in line1]
+                [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[2], False)) for c in line2]
+                [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[3], False)) for c in line3]
+                extracted_data_dict["LESSOR"].append(extracted_data_dict["LEASE NUMBER"][-1][0:3])
+                if extracted_data_dict["LESSOR"][-1] not in lessors:
+                    print(extracted_data_dict["LESSOR"][-1])
+                    lessors.append(extracted_data_dict["LESSOR"][-1])      
+                    print(lessors)
+    for c in columns:
+        print(f"C: {c} | {len(extracted_data_dict[c])}")
+    print(lessors)
+    dataframe = pd.DataFrame(extracted_data_dict)
+
+    summary_series = []
+    for lessor in lessors:
+        reduced_df = dataframe.loc[dataframe["LESSOR"] == lessor]
+        del reduced_df["CUSTOMER NAME"]
+        del reduced_df["LEASE NUMBER"]
+        del reduced_df["CONTRACT STAT"]
+        reduced_df = reduced_df.replace("", np.NaN)
+        reduced_df = reduced_df.replace("REVOLV", np.NaN)
+        reduced_df = reduced_df.replace("ING ACCOUNT", np.NaN)
+        summation = reduced_df.sum(skipna=True, axis=0)
+        summation["LESSOR"] = lessor
+        summation["CONTRACT COUNT"] = len(reduced_df.index)
+        summary_series.append(summation)
+    summary_df = pd.concat(summary_series, axis=1).transpose().set_index("LESSOR")
+    prt(summary_df)
+    with pd.ExcelWriter(save_name) as writer:  
+        dataframe.to_excel(writer, index=False, sheet_name="data")
+        pd.DataFrame(summary_df).to_excel(writer, index=True, sheet_name="Summary")
+    return dataframe
+
+
+
+
+
+
+
+
+
+
+
+
+with open("/config/workspace/LEAF/IL Extract SRC/2022.05.20 Net Investment", errors="replace") as rep_file:
+    report = rep_file.read()
+
+prt(net_invest_trial_balance(report, "520_NI_TEST.xlsx"))
--- a/RenewalTest.py
+++ b/RenewalTest.py
@ -0,0 +1,92 @@
+import os
+import pandas as pd
+from datetime import datetime as dt, timedelta
+import sys, getopt
+import re
+from pathlib import Path
+import time
+from pprint import pprint as prt
+import numpy as np
+
+contract_number_regex = "\d{3}-\d{7}-\d{3}"
+
+def create_line_divider(breakage_list: list):
+    """
+    This allows for the creation of a custom data extractor
+    Breakage list defines the split points that will be used for the line
+    Example
+    Given breakage_list [10, 20, 30]
+    using slot_num 0 in the resulting extract_line_slot will yield
+    characters 0 - 10 from the string. 
+    Slot 1 would give characters 10 - 20
+    """
+    def extract_line_slot(slot_num : int, line_string: str, debug : bool = False):
+        """
+        Pulls data from a line/string using break points defined by the
+        parent function.
+        ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor'
+        Will automatically convert numbers to floats
+        """
+        assert(slot_num < len(breakage_list)+1)
+        low_range = 0 if slot_num == 0 else breakage_list[slot_num-1]
+        high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num]
+        data = line_string[low_range:high_range].strip().replace(",", "")
+        try: data = float(data)
+        except: pass
+        if debug:
+            print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}")
+        return data
+    return extract_line_slot
+
+def renewal_net_invest_trial_balance(report: str, save_name: str):
+    lines = report.splitlines()
+    data_extractor = create_line_divider([21,29,43,58,71,88,99,113])
+    extracted_data_dict = {
+        'CUSTOMER NAME' :  [],
+        'TYPE' :  [],
+        'GROSS RENEWAL' :  [],
+        'REMAINING BAL' :  [],
+        'FINANCED RES' :  [],
+        'REMAINING RES' :  [],
+        'LEASE PYMTS' :  [],
+        'CONTRACT NUMBER' :  [],
+        'RENEWAL' :  [],
+        'PAYMENTS RCVD' :  [],
+        'CUR RENT RCVB' :  [],
+        'UNEARNED RIN' :  [],
+        'SECURITY DEP' :  [],
+        'NET INVEST' :  [],
+        'UNEARN INCOME' :  [],
+        'TOTAL' :  [],
+        'REM RENT RCVB' :  [],
+        'UNPAID RES' :  [],
+    }
+    columns = list(extracted_data_dict.keys())
+    line0 = list(zip(columns[0:7], [0,1,2,3,4,5,7]))
+    line1 = list(zip(columns[7:16], [i for i in range(0,9)]))
+    line2 = list(zip(columns[16:], [3,4]))
+
+    for line in enumerate(lines):
+        slot1 = data_extractor(0,line[1],False)
+        if type(slot1) != str : continue
+        if re.search(contract_number_regex, slot1) != None:
+            data_section = lines[line[0]-1:line[0]+2]
+
+            for ds in enumerate(data_section):
+                print(ds[1])
+                if ds[1].find(".") == -1:
+                    [print(f"\n{d[0]}: {d[1]}") for d in enumerate(data_section)]
+                    print('\n')
+
+            [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[0])) for c in line0]
+            [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1])) for c in line1]
+            [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[2])) for c in line2]
+    dataframe = pd.DataFrame(extracted_data_dict)
+    dataframe.to_excel(save_name, index=False)
+    return dataframe
+
+
+with open("/config/workspace/LEAF/IL Extract SRC/2022.05.20 Renewal Net Investment", errors="replace") as rep_file:
+    report = rep_file.read()
+
+prt(renewal_net_invest_trial_balance(report, "rn_TESTING.xlsx"))