From 6a9811fbc0970fd0264a4c542e4cae99e36c0a65 Mon Sep 17 00:00:00 2001 From: Griffiths Lott Date: Mon, 23 May 2022 19:02:24 -0400 Subject: [PATCH] Fixed failure of large df | save moved into process functions --- ILExtract.py | 73 +++++++++++++++++++++++++++++++--------------------- main.py | 6 ++--- 2 files changed, 47 insertions(+), 32 deletions(-) diff --git a/ILExtract.py b/ILExtract.py index ce86472..f6d0186 100644 --- a/ILExtract.py +++ b/ILExtract.py @@ -54,7 +54,7 @@ class ILReport: return 1 try: # Run the associated method to extract the data and get the dataframe - dataframe = self.x_method(report) + dataframe = self.x_method(report, self.output_location) try: assert(len(dataframe) > 1) except Exception as e: @@ -65,14 +65,14 @@ class ILReport: print(f"{self.output_name} failed to process:\n{e}") self.successful = False return 1 - try: - # Save the dataframe as an excel document - dataframe.to_excel(f"{self.output_location}/{self.output_name}_{dt.now().strftime('%Y%m%d-%H%M')}.xlsx", index = False) - except Exception as e: - self.successful = False - print(f"{self.output_name} failed to save to excel!\n{dataframe}\n{e}") - return 1 - self.successful = True + # try: + # # Save the dataframe as an excel document + # dataframe.to_excel(f"{self.output_location}/{self.output_name}_{dt.now().strftime('%Y%m%d-%H%M')}.xlsx", index = False) + # except Exception as e: + # self.successful = False + # print(f"{self.output_name} failed to save to excel!\n{dataframe}\n{e}") + # return 1 + # self.successful = True return 0 def process(self): try: @@ -85,7 +85,7 @@ class ILReport: return 1 try: # Run the associated method to extract the data and get the dataframe - dataframe = self.x_method(report) + dataframe = self.x_method(report, self.output_name) try: assert(len(dataframe) > 1) except Exception as e: @@ -161,7 +161,7 @@ COMMON REGEX COMPONENTS """ -def ach(report: str): +def ach(report: str, save_name: str): lines = report.splitlines() extracted_data_dict = { "ContractNumber" : [], @@ -177,10 +177,11 @@ def ach(report: str): for line in enumerate(lines): if (re.search(contract_number_regex, line[1]) != None) & (re.search(bank_number_regex, line[1]) != None): [extracted_data_dict[columns[c]].append(data_extractor(c, line[1])) for c in range(0, len(columns))] - return pd.DataFrame(extracted_data_dict) - + dataframe = pd.DataFrame(extracted_data_dict) + dataframe.to_excel(save_name, index=False, engine="xlsxwrtier") + return dataframe -def disposition(report: str): +def disposition(report: str, save_name: str): lines = report.splitlines() extracted_data_dict = { "ContractNumber" : [], @@ -201,10 +202,12 @@ def disposition(report: str): if re.search(contract_number_regex, data_extractor(0,line[1])): [extracted_data_dict[columns[c]].append(data_extractor(c,line[1])) for c in range(0, len(columns)-1)] extracted_data_dict["Customer Name"].append(lines[line[0]+1].strip()) - return pd.DataFrame(extracted_data_dict) + dataframe = pd.DataFrame(extracted_data_dict) + dataframe.to_excel(save_name, index=False, engine="xlsxwrtier") + return dataframe -def gainloss(report: str): +def gainloss(report: str, save_name: str): lines = report.splitlines() extracted_data_dict = { 'REM RENT RCVB' : [], @@ -286,10 +289,11 @@ def gainloss(report: str): disp_descriptoin.append(" ".join(disp_split[1:])) df["DISPOSITION CODE"] = disp_code df["DISPOSITION DESC"] = disp_descriptoin + df.to_excel(save_name, index=False, engine="xlsxwrtier") return df # Works for Net-inv-loans & NIV-after -def net_invest_trial_balance(report: str): +def net_invest_trial_balance(report: str, save_name: str): lines = report.splitlines() extracted_data_dict = { 'CUSTOMER NAME' : [], @@ -333,10 +337,12 @@ def net_invest_trial_balance(report: str): [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1])) for c in line1] [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[2])) for c in line2] [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[3])) for c in line3] - return pd.DataFrame(extracted_data_dict) + dataframe = pd.DataFrame(extracted_data_dict) + dataframe.to_excel(save_name, index=False, engine="xlsxwrtier") + return dataframe -def lockbox(report: str): +def lockbox(report: str, save_name: str): lines = report.splitlines() extracted_data_dict = { "CustomerName" : [], @@ -387,10 +393,12 @@ def lockbox(report: str): i += 1 # Once it hits, add the name to the dict extracted_data_dict["CustomerName"].append(data_extractor(7,lines[line[0]+i])) - return pd.DataFrame(extracted_data_dict) + dataframe = pd.DataFrame(extracted_data_dict) + dataframe.to_excel(save_name, index=False, engine="xlsxwrtier") + return dataframe -def minv(report: str): +def minv(report: str, save_name: str): lines = report.splitlines() data_extractor = create_line_divider([15,32,52,71,83,107,116,128]) extracted_data_dict = { @@ -409,10 +417,12 @@ def minv(report: str): if re.search(contract_number_regex, line[1]) != None: [extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(0,len(columns))] #All the list lengths need to be the same so if anything was missed it will fail to build - return pd.DataFrame(extracted_data_dict) + dataframe = pd.DataFrame(extracted_data_dict) + dataframe.to_excel(save_name, index=False, engine="xlsxwrtier") + return dataframe # Good for PUB_WIRES, VMCC, PBP_EPAY, returned check -def payment_transactions(report: str): +def payment_transactions(report: str, save_name: str): lines = report.splitlines() data_extractor = create_line_divider([6,33,52,62,80,89,110,121]) extracted_data_dict = { @@ -446,10 +456,12 @@ def payment_transactions(report: str): extracted_data_dict['CUSTOMER NAME'].append(cname) inv_no = lines[line[0]+1][79:90].strip() extracted_data_dict['INV NO'].append(inv_no) - return pd.DataFrame(extracted_data_dict) + dataframe = pd.DataFrame(extracted_data_dict) + dataframe.to_excel(save_name, index=False, engine="xlsxwrtier") + return dataframe -def renewal_net_invest_trial_balance(report: str): +def renewal_net_invest_trial_balance(report: str, save_name: str): lines = report.splitlines() data_extractor = create_line_divider([21,29,43,58,71,88,99,113]) extracted_data_dict = { @@ -485,10 +497,12 @@ def renewal_net_invest_trial_balance(report: str): [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[0])) for c in line0] [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1])) for c in line1] [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[2])) for c in line2] - return pd.DataFrame(extracted_data_dict) + dataframe = pd.DataFrame(extracted_data_dict) + dataframe.to_excel(save_name, index=False, engine="xlsxwrtier") + return dataframe -def unapplied(report: str): +def unapplied(report: str, save_name: str): lines = report.splitlines() extracted_data_dict = { "Trans Num" : [], @@ -521,5 +535,6 @@ def unapplied(report: str): (re.search("\d{2}/\d{2}/\d{4}", str(data_extractor(3,line[1],debug=False))) != None): [extracted_data_dict[columns[c]].append(data_extractor(c,line[1])) for c in range(0,9)] [extracted_data_dict[columns[8+c]].append(data_extractor(c,lines[line[0]+1])) for c in range(1,len(columns)-8)] - return pd.DataFrame(extracted_data_dict) - + dataframe = pd.DataFrame(extracted_data_dict) + dataframe.to_excel(save_name, index=False, engine="xlsxwrtier") + return dataframe \ No newline at end of file diff --git a/main.py b/main.py index de3d5f7..8cfd2b5 100644 --- a/main.py +++ b/main.py @@ -36,7 +36,7 @@ class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow): self.inputFile = inFile[0] inFileEnd = inFile[0].split('/')[-1] outputRoot = self.inputFile.removesuffix(inFileEnd) - self.outputFile = f"{outputRoot}{self.reportTypeCB.currentText()}_{dt.now().strftime('%Y%m%d_%H%M')}.csv" + self.outputFile = f"{outputRoot}{self.reportTypeCB.currentText()}_{dt.now().strftime('%Y%m%d_%H%M')}.xlsx" self.outputFileLE.setText(self.outputFile) if self.reportTypeCB.currentText().split(" ")[-1].lower() not in self.inputFile.lower(): print("Possibly wrong file type") @@ -96,7 +96,7 @@ Make sure you select the correct report type before processing!") extraction_function=extract_function, output_location=self.outputFile, ).process() - dataframe.to_csv(self.outputFile, index=False) + dataframe.to_excel(self.outputFile, index=False, engine="xlsxwriter") smallDF = dataframe.iloc[0:500,:] self.inputFilePreview.setText(smallDF.to_html(index=False)) self.openReportButton.setEnabled(True) @@ -110,7 +110,7 @@ Make sure you select the correct report type before processing!") self.inputFilePreview.setText(df.to_html()) def to_clipboard(self): - df = pd.read_csv(self.outputFile) + df = pd.read_excel(self.outputFile) df.to_clipboard(excel=True)