import re import pandas as pd for input in ["ASSET", "CUST", "DOB"]: with open(f"Inputs/{input}", errors="replace") as reportFile: report: str = reportFile.read() # Removes characters that cause errors report: str = report.replace("^"," ") colDict = {"ColName": [], "start": [], "end": []} for line in report.splitlines(): print(line.strip()) if len(line.strip()) > 50: matches = re.finditer('(\w|\.|/|#)+\s', line) for match in matches: print(match) colDict["ColName"].append(match.group()) colDict["start"].append(match.start()) colDict["end"].append(match.end()) break pd.DataFrame(colDict).to_csv(f"Outputs/{input}.csv")