import re from pandas import DataFrame from typing import Union from logging import debug as dbg, getLogger, exception as exc, FileHandler, StreamHandler import win32clipboard import argparse as ap from pathlib import Path def create_table(xmlStr: str) -> Union[DataFrame, Exception]: dataDict = { "SEQ": [], } MATCH_SCENARIO = "((?!((?!" UPDATE_REGEX = r"\n?((?!(.)*<", group) if valueGroup == None: valueGroup = re.search("e\"(((?!(\w+|\.)+", update).span() dbg(f"value: {valueMatch}") value = update[valueMatch[0]+1:valueMatch[1]-8] dbg(f"{seq} UPDATE | {id} : {value}") senarioDict[id] = value # Now merge the values from that senario into the main dict seen = [] for key in dataDict.keys(): dbg(dataDict[key]) try: senarioValue = senarioDict[key] except KeyError: senarioValue = '' dataDict[key].append(senarioValue) seen.append(key) for key in [k for k in senarioDict.keys() if k not in seen]: dataFill = ['' for _ in range(1,int(seq))] dataFill.append(senarioDict[key]) dataDict[key] = dataFill dbg(f"New key: {key} | {dataDict[key]}") dbg(f"{seq} | {dataDict}\n") dbg(dataDict) if getLogger().level == 10: for key in dataDict.keys(): dbg(f"{key} : {len(dataDict[key])}") try: table = DataFrame(dataDict) table.set_index('SEQ', inplace=True) dbg(table) if table.empty: raise Exception("No data found...", color='RED', effect='BOLD') return table except Exception as e: return e def process_clipboard() -> str: correct = False while not correct: win32clipboard.OpenClipboard() try: xml = win32clipboard.GetClipboardData() except: xml = "None" win32clipboard.CloseClipboard() print(f"\n\nYour current clipboard is as follows:") print(xml) yn = input("\nIs this the XML you'd like to parse? (y/n)\n >") if yn.lower() == "debug": getLogger().setLevel(10) print("\nYou have now entered debug mode...") correct = True if re.search("(?i)y|1", yn) != None else False if not correct: input("Please copy the xml then press enter...") return xml def alter_suffix(p: Path, desired: str) -> Path: if p.suffix != desired: p = Path(p.name.replace(p.suffix, desired)) return p def main(xml: str) -> DataFrame: table = None while type(table) != DataFrame: table: Union[DataFrame, Exception] = create_table(xml) if type(table) != DataFrame: print(f"\n\nENCOUNTERED ERROR!:\n{table}\n") input("Please try again...") continue print(f"Table sample:") print(table) table.to_clipboard() print("This table is now in your clipboard to paste into excel.") return table if __name__ == "__main__": logger = getLogger().setLevel(40) try: parser = ap.ArgumentParser( prog="XML Parser", description='''This program parses XML data into a pandas DataFrame. The XML data can come from an input file or the clipboard. If an output file is specified, the DataFrame will be written to this file in Excel format. If debug mode is enabled, detailed logging information will be written to "xml_parse.log".''' ) parser.add_argument( "-i", "--input", help="Path to the XML file to parse. If not specified, the program will ask for XML data from the clipboard." ) parser.add_argument( "-o", "--output", help="Path to the output Excel file. If not specified, the DataFrame will be written to 'Parsed XML.xlsx' in the current directory." ) parser.add_argument( "--debug", action="store_true", help="Enable debug mode. Detailed logging information will be written to 'xml_parse.log'." ) args = parser.parse_args() if args.debug: logger = getLogger() logger.setLevel(10) f_handler = FileHandler( Path("xml_parse.log") ) f_handler.setLevel(10) s_handler = StreamHandler() s_handler.setLevel(40) logger.addHandler(f_handler) logger.addHandler(s_handler) if args.input is not None: i_file : Path = Path(args.input) if not i_file.exists(): raise ValueError(f"{i_file} could not be found. Make sure the path is correct.") elif i_file.suffix != ".xml": raise NotImplementedError(f"This program can only parse .xml not {i_file}!") with open(i_file) as xml_file: xml_str: str = xml_file.read() output_path = Path(i_file.parent, i_file.name) else: xml_str = process_clipboard() output_path = Path("Parsed XML.xlsx") xml_df: DataFrame = main(xml_str) if args.output is not None: try: output_path = Path(args.output) if output_path.suffix != ".xlsx": output_path.suffix = ".xlsx" except Exception as e: exc(f"Failed to use passed output file: {args.output}.\ Using {output_path}.\n{e}") output_path = alter_suffix(output_path, ".xlsx") xml_df.to_excel(output_path, freeze_panes=(0,1), index=False) print(f"Processing Complete!\nOutput data available here: {output_path}") input("\n\nPress any key to exit.") except Exception as e: print(f"The program failed to start do the the following exception:\n{e}") input(f"Please make note of the error before closing so that you can report it.")