import pandas as pd file = "/config/workspace/LEAF/Collections/old_query_bt.txt" with open(file) as output: report = output.read().splitlines() columns = ['AppKey', 'CustomerKey', 'DealerKey', 'AssetVendorDealerKey', 'ProgramKey', 'PromotionKey', 'ProductLineKey', 'QueueKey', 'CurrentUserKey', 'PurchaseOptionID', 'DealStatusKey', 'DealStatusCommentKey', 'DecisionCodeKey', 'DealerSalesRepId', 'AppSubmitById', 'ExtendedDecisionCodeKey', 'DecisionDate', 'DecisionUserInfoKey', 'DealStatusNote', 'OriginalDecisionCodeKey', 'OriginalDecisionDate', 'CreateDate', 'ActualBookingDate', 'ActualBookingUserInfoKey', 'OfficialBookingDate', 'ProjectedBooking', 'ActiveDate', 'FollowUpDate', 'CreditAmount', 'EquipmentCost', 'TotalListPrice', 'ContractPayment', 'ContractTermMonths', 'IsProgressPayment', 'IsImported', 'MarketingRepID', 'MarketingRepSecondaryID', 'MarketingRepPercent', 'MarketingRepSecondaryPercent', 'IsSyndication', 'DeclineReasonKey1', 'DeclineReasonKey2', 'DeclineReasonKey3', 'DeclineReasonKey4', 'RegionID', 'ContractID', 'ScheduleID', 'LessorID', 'RecordCreateDate', 'RecordCreateSystemId', 'RecordUpdateDate', 'RecordUpdateSystemId', 'RateFactor', 'CommissionPremiumEligible', 'BranchKey', 'MarketingCampaignId', 'SecondMarketingCampaignId', 'PurchaseOrderNumber', 'SyndicationID', 'CombinedIRR', 'IRROverrideRSN', 'Deferred', 'RateCardID', 'InterimRentLEAF', 'InterimRentDealer', 'ProductCategory', 'AppTransactionTypeId', 'OriginalDocFee', 'DocFeeWaivedTypeId', 'InterimRentWaivedTypeId', 'ContractTypeKey', 'CreditExpirationDate', 'CreditTier', 'CreditTierOverride', 'CreditTierOriginal', 'SalesRepPoints', 'SaleLeaseback', 'OFAC', 'CreditRelationship', 'DealerAdvancedFundingOption', 'CustomerApp', 'DealerPoints', 'DealerOwnerPoints', 'DealerSalesRepPoints', 'ProductType', 'CustomerReferenceNumber', 'LCBeginDate', 'TierPricing', 'CreditExceptionTypeID', 'UNLPool', 'AppKey2', 'AppSyndicationStatusId', 'AppKey3', 'HistoryDate', 'CurrentDecisionCode', 'CustomerKey2', 'BusinessTypeID'] rundateList = [] phd = {c:[] for c in columns} i = 0 for line in report: items = [_ for _ in line.split(" ") if _ != ""] if len(items) == 0: continue if (items[0] == "AppKey") | (items[0] == "-----------") | (items[0] == "Completion") | (items[0].find("(") != -1): continue if len(items) == 1: date = items[0] continue rundateList.append(date) mult = [(16,18),(20,22),(21,23),(26,28),(48,50),(50,52),(93,95)] for i,j in mult: items[i:j] = [(" ").join(items[i:j])] if items[87] == "Tier": items[87:89] = [(" ").join(items[87:89])] [phd[columns[c]].append(items[c]) for c in range(0, len(items))] phd["RunDate"] = rundateList df = pd.DataFrame(phd) df.to_csv("OLD_QUERY_BT.csv")