From 5b3c56e51f2f7e1bd4c7d49214b37351b64aec67 Mon Sep 17 00:00:00 2001 From: = <=> Date: Wed, 17 May 2023 16:18:21 -0400 Subject: [PATCH] Added CLI and redid the pyinstaller spec --- parse_xml.py | 310 +++++++++++++++++++++++++++++------------------ requirements.txt | Bin 0 -> 526 bytes 2 files changed, 195 insertions(+), 115 deletions(-) create mode 100644 requirements.txt diff --git a/parse_xml.py b/parse_xml.py index be46b71..083420b 100644 --- a/parse_xml.py +++ b/parse_xml.py @@ -1,119 +1,125 @@ -try: - import re - from pandas import DataFrame - from typing import Union - from logging import debug as dbg, getLogger - import win32clipboard - - - def create_table(xmlStr: str) -> Union[DataFrame, Exception]: - dataDict = { - "SEQ": [], - } - MATCH_SCENARIO = "((?!((?!" - UPDATE_REGEX = r"\n?((?!(.)*<", group) - if valueGroup == None: - valueGroup = re.search("e\"(((?!(\w+|\.)+", update).span() - dbg(f"value: {valueMatch}") - value = update[valueMatch[0]+1:valueMatch[1]-8] - dbg(f"{seq} UPDATE | {id} : {value}") - senarioDict[id] = value - # Now merge the values from that senario into the main dict - seen = [] - for key in dataDict.keys(): - dbg(dataDict[key]) - try: - senarioValue = senarioDict[key] - except KeyError: - senarioValue = '' - dataDict[key].append(senarioValue) - seen.append(key) - for key in [k for k in senarioDict.keys() if k not in seen]: - dataFill = ['' for _ in range(1,int(seq))] - dataFill.append(senarioDict[key]) - dataDict[key] = dataFill - dbg(f"New key: {key} | {dataDict[key]}") - - dbg(f"{seq} | {dataDict}\n") - - dbg(dataDict) - if getLogger().level == 10: - for key in dataDict.keys(): - dbg(f"{key} : {len(dataDict[key])}") - try: - table = DataFrame(dataDict) - table.set_index('SEQ', inplace=True) - dbg(table) - if table.empty: - raise Exception("No data found...", color='RED', effect='BOLD') - return table - except Exception as e: - return e - - - def process_clipboard() -> str: - correct = False - while not correct: - win32clipboard.OpenClipboard() +import re +from pandas import DataFrame +from typing import Union +from logging import debug as dbg, getLogger, exception as exc, FileHandler, StreamHandler +import win32clipboard +import argparse as ap +from pathlib import Path + + +def create_table(xmlStr: str) -> Union[DataFrame, Exception]: + dataDict = { + "SEQ": [], + } + MATCH_SCENARIO = "((?!((?!" + UPDATE_REGEX = r"\n?((?!(.)*<", group) + if valueGroup == None: + valueGroup = re.search("e\"(((?!(\w+|\.)+", update).span() + dbg(f"value: {valueMatch}") + value = update[valueMatch[0]+1:valueMatch[1]-8] + dbg(f"{seq} UPDATE | {id} : {value}") + senarioDict[id] = value + # Now merge the values from that senario into the main dict + seen = [] + for key in dataDict.keys(): + dbg(dataDict[key]) try: - xml = win32clipboard.GetClipboardData() - except: - xml = "None" - win32clipboard.CloseClipboard() - print(f"\n\nYour current clipboard is as follows:") - print(xml) - yn = input("\nIs this the XML you'd like to parse? (y/n)\n >") - if yn.lower() == "debug": - getLogger().setLevel(10) - print("\nYou have now entered debug mode...") - correct = True if re.search("(?i)y|1", yn) != None else False - if not correct: - input("Please copy the xml then press enter...") - return xml + senarioValue = senarioDict[key] + except KeyError: + senarioValue = '' + dataDict[key].append(senarioValue) + seen.append(key) + for key in [k for k in senarioDict.keys() if k not in seen]: + dataFill = ['' for _ in range(1,int(seq))] + dataFill.append(senarioDict[key]) + dataDict[key] = dataFill + dbg(f"New key: {key} | {dataDict[key]}") + + dbg(f"{seq} | {dataDict}\n") + dbg(dataDict) + if getLogger().level == 10: + for key in dataDict.keys(): + dbg(f"{key} : {len(dataDict[key])}") + try: + table = DataFrame(dataDict) + table.set_index('SEQ', inplace=True) + dbg(table) + if table.empty: + raise Exception("No data found...", color='RED', effect='BOLD') + return table + except Exception as e: + return e + +def process_clipboard() -> str: + correct = False + while not correct: + win32clipboard.OpenClipboard() + try: + xml = win32clipboard.GetClipboardData() + except: + xml = "None" + win32clipboard.CloseClipboard() + print(f"\n\nYour current clipboard is as follows:") + print(xml) + yn = input("\nIs this the XML you'd like to parse? (y/n)\n >") + if yn.lower() == "debug": + getLogger().setLevel(10) + print("\nYou have now entered debug mode...") + correct = True if re.search("(?i)y|1", yn) != None else False + if not correct: + input("Please copy the xml then press enter...") + return xml + + +def alter_suffix(p: Path, desired: str) -> Path: + if p.suffix != desired: + p = Path(p.name.replace(p.suffix, desired)) + return p + +def main(xml: str) -> DataFrame: table = None while type(table) != DataFrame: - xml = process_clipboard() + table: Union[DataFrame, Exception] = create_table(xml) if type(table) != DataFrame: print(f"\n\nENCOUNTERED ERROR!:\n{table}\n") @@ -122,7 +128,81 @@ try: print(f"Table sample:") print(table) table.to_clipboard() - input("This table is now in your clipboard to paste into excel.") -except Exception as e: - print(f"The program failed to start do the the following exception:\n{e}") - input(f"Please make note of the error before closing so that you can report it.") \ No newline at end of file + print("This table is now in your clipboard to paste into excel.") + return table + +if __name__ == "__main__": + + logger = getLogger().setLevel(40) + + try: + parser = ap.ArgumentParser( + prog="XML Parser", + description='''This program parses XML data into a pandas DataFrame. + The XML data can come from an input file or the clipboard. + If an output file is specified, the DataFrame will be written to this file in Excel format. + If debug mode is enabled, detailed logging information will be written to "xml_parse.log".''' + ) + + parser.add_argument( + "-i", "--input", + help="Path to the XML file to parse. If not specified, the program will ask for XML data from the clipboard." + ) + + parser.add_argument( + "-o", "--output", + help="Path to the output Excel file. If not specified, the DataFrame will be written to 'Parsed XML.xlsx' in the current directory." + ) + + parser.add_argument( + "--debug", action="store_true", + help="Enable debug mode. Detailed logging information will be written to 'xml_parse.log'." + ) + + args = parser.parse_args() + + if args.debug: + logger = getLogger() + logger.setLevel(10) + f_handler = FileHandler( + Path("xml_parse.log") + ) + f_handler.setLevel(10) + s_handler = StreamHandler() + s_handler.setLevel(40) + logger.addHandler(f_handler) + logger.addHandler(s_handler) + + + if args.input is not None: + i_file : Path = Path(args.input) + + if not i_file.exists(): + raise ValueError(f"{i_file} could not be found. Make sure the path is correct.") + elif i_file.suffix != ".xml": + raise NotImplementedError(f"This program can only parse .xml not {i_file}!") + with open(i_file) as xml_file: + xml_str: str = xml_file.read() + output_path = Path(i_file.parent, i_file.name) + + else: + xml_str = process_clipboard() + output_path = Path("Parsed XML.xlsx") + + xml_df: DataFrame = main(xml_str) + if args.output is not None: + try: + output_path = Path(args.output) + if output_path.suffix != ".xlsx": + output_path.suffix = ".xlsx" + except Exception as e: + exc(f"Failed to use passed output file: {args.output}.\ + Using {output_path}.\n{e}") + output_path = alter_suffix(output_path, ".xlsx") + xml_df.to_excel(output_path, freeze_panes=(0,1), index=False) + print(f"Processing Complete!\nOutput data available here: {output_path}") + input("\n\nPress any key to exit.") + + except Exception as e: + print(f"The program failed to start do the the following exception:\n{e}") + input(f"Please make note of the error before closing so that you can report it.") diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..bad3f846ac35877e44a3125f7dc327469a342c7f GIT binary patch literal 526 zcmZ{h+YW+23`FPI#81KK;_bzcLB&hdmEan|k5^~9fF^1P4N0fd)8YBZa74fsB{CG) zVZmpJ0Y;dyrs$)GoQeb$PSkF&XU$DFGTnfBsXVS!6sWtN!?^P}vy)S!enkiENt=0R z^92b!)~pxnI(8$8+%^5V+p*`&LAqJvP?OGcKG5Z8oTYh9Y?F8+t0=ye3GYAiInW`& zieH{iC6&vDsNETJkxQ@{Prb+kDX*-`NEeE2&bPf#`ukt6n=z$S$8?66c6YsdA`9HC cL}F4(DW;{QE_A8PA^x9fceQp$%Iqh@3mn}|cmMzZ literal 0 HcmV?d00001