XMLRuleParser/parse_xml.py

try:
    import re
    from pandas import DataFrame
    from typing import Union
    from logging import debug as dbg, getLogger
    import win32clipboard


    def create_table(xmlStr: str) -> Union[DataFrame, Exception]:
        dataDict = {
            "SEQ": [],
        }
        MATCH_SCENARIO = "<Scenario Seq=\"\d{1,3}\">((?!<Sc)(.|\n))*</S"
        senarios = re.finditer(MATCH_SCENARIO,xmlStr)
        dbg(senarios)
        senario: re.Match
        for senario in senarios:
            senarioGroup = senario.group()
            seqMatch = re.search("\"\d{1,3}\"",senarioGroup).group()
            seq = seqMatch[1:-1]
            dbg(f"\nSeq: {seq}")

            CONDITION_REGEX = r"<Condition Id=\"\w+\" Group=\"\w+\" CompareTo=\"(Value|Range)\">((?!</C)(.|\n))*</Condition>"
            UPDATE_REGEX = r"<UpdateField Id=\"\w+\" Group=\"\w+\" UIRequired=\"\d+\" UIDisabled=\"\d+\" ForceUpdate=\"\d+\">\n?((?!</U)(.|\n))*</Up"


            c = list(re.finditer(CONDITION_REGEX,senarioGroup))
            dbg(f"\n\nSenario Group: {senarioGroup}")
            updates = list(re.finditer(UPDATE_REGEX, senarioGroup))
            dbg(f"{seq} | Updates: {updates}")
            dbg(f"conditions:\n{[cond for cond in c]}")
            senarioDict = {}
            senarioDict["SEQ"] = int(seq)
            for m in c:
                group = m.group()
                idStart, idEnd = re.search("\"[^\"]*\"", group).span()
                id = group[idStart+1:idEnd-1]
                dbg(f"SEQ: {seq} | {id}")
                valueGroup = re.search("e\">(.)*<", group)
                if valueGroup == None:
                    valueGroup = re.search("e\"(((?!</C)(.|\n))*)</C", group)
                    value = valueGroup.group()[4:-3].strip()
                else:
                    value = valueGroup.group()[3:-1]
                dbg(f"SEQ: {seq} | {valueGroup}")
                dbg(f"SEQ: {seq} | {value}")
                senarioDict[id] = value
            update: re.Match
            for update in updates:
                update = update.group()
                dbg(f"{seq} | Update: {update}")
                idMatch = re.search(r"\"\w+\"",update).span()
                dbg(f"ID: {idMatch}")
                id = update[idMatch[0]+1:idMatch[1]-1]
                valueMatch = re.search(">(\w+|\.)+</Value>", update).span()
                dbg(f"value: {valueMatch}")
                value = update[valueMatch[0]+1:valueMatch[1]-8]
                dbg(f"{seq} UPDATE | {id} : {value}")
                senarioDict[id] = value
            # Now merge the values from that senario into the main dict
            seen = []
            for key in dataDict.keys():
                dbg(dataDict[key])
                try:
                    senarioValue = senarioDict[key]
                except KeyError:
                    senarioValue = ''
                dataDict[key].append(senarioValue)
                seen.append(key)
            for key in [k for k in senarioDict.keys() if k not in seen]:
                dataFill = ['' for _ in range(1,int(seq))]
                dataFill.append(senarioDict[key])
                dataDict[key] = dataFill
                dbg(f"New key: {key} | {dataDict[key]}")

            dbg(f"{seq} | {dataDict}\n")

        dbg(dataDict)
        if getLogger().level == 10:
            for key in dataDict.keys():
                dbg(f"{key} : {len(dataDict[key])}")
        try:
            table = DataFrame(dataDict)
            table.set_index('SEQ', inplace=True)
            dbg(table)
            if table.empty:
                raise Exception("No data found...", color='RED', effect='BOLD')
            return table
        except Exception as e:
            return e


    def process_clipboard() -> str:
        correct = False
        while not correct:
            win32clipboard.OpenClipboard()
            try:
                xml = win32clipboard.GetClipboardData()
            except:
                xml = "None"
            win32clipboard.CloseClipboard()
            print(f"\n\nYour current clipboard is as follows:")
            print(xml)
            yn = input("\nIs this the XML you'd like to parse? (y/n)\n >")
            if yn.lower() == "debug":
                getLogger().setLevel(10)
                print("\nYou have now entered debug mode...")
            correct = True if re.search("(?i)y|1", yn) != None else False
            if not correct:
                input("Please copy the xml then press enter...")
        return xml


    table = None
    while type(table) != DataFrame:
        xml = process_clipboard()
        table: Union[DataFrame, Exception] = create_table(xml)
        if type(table) != DataFrame:
            print(f"\n\nENCOUNTERED ERROR!:\n{table}\n")
            input("Please try again...")
            continue
        print(f"Table sample:")
        print(table)
        table.to_clipboard()
        input("This table is now in your clipboard to paste into excel.")
except Exception as e:
    print(f"The program failed to start do the the following exception:\n{e}")
    input(f"Please make note of the error before closing so that you can report it.")