A PyQT GUI application for converting InfoLease report outputs into Excel files. Handles parsing and summarizing. Learns where files are meant to be store and compiles monthly and yearly summaries.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
InfoLeaseExtract/venv/Lib/site-packages/pip/_vendor/pygments/regexopt.py

91 lines
3.0 KiB

"""
pygments.regexopt
~~~~~~~~~~~~~~~~~
An algorithm that generates optimized regexes for matching long lists of
literal strings.
:copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import re
from re import escape
from os.path import commonprefix
from itertools import groupby
from operator import itemgetter
CS_ESCAPE = re.compile(r'[\[\^\\\-\]]')
FIRST_ELEMENT = itemgetter(0)
def make_charset(letters):
return '[' + CS_ESCAPE.sub(lambda m: '\\' + m.group(), ''.join(letters)) + ']'
def regex_opt_inner(strings, open_paren):
"""Return a regex that matches any string in the sorted list of strings."""
close_paren = open_paren and ')' or ''
# print strings, repr(open_paren)
if not strings:
# print '-> nothing left'
return ''
first = strings[0]
if len(strings) == 1:
# print '-> only 1 string'
return open_paren + escape(first) + close_paren
if not first:
# print '-> first string empty'
return open_paren + regex_opt_inner(strings[1:], '(?:') \
+ '?' + close_paren
if len(first) == 1:
# multiple one-char strings? make a charset
oneletter = []
rest = []
for s in strings:
if len(s) == 1:
oneletter.append(s)
else:
rest.append(s)
if len(oneletter) > 1: # do we have more than one oneletter string?
if rest:
# print '-> 1-character + rest'
return open_paren + regex_opt_inner(rest, '') + '|' \
+ make_charset(oneletter) + close_paren
# print '-> only 1-character'
return open_paren + make_charset(oneletter) + close_paren
prefix = commonprefix(strings)
if prefix:
plen = len(prefix)
# we have a prefix for all strings
# print '-> prefix:', prefix
return open_paren + escape(prefix) \
+ regex_opt_inner([s[plen:] for s in strings], '(?:') \
+ close_paren
# is there a suffix?
strings_rev = [s[::-1] for s in strings]
suffix = commonprefix(strings_rev)
if suffix:
slen = len(suffix)
# print '-> suffix:', suffix[::-1]
return open_paren \
+ regex_opt_inner(sorted(s[:-slen] for s in strings), '(?:') \
+ escape(suffix[::-1]) + close_paren
# recurse on common 1-string prefixes
# print '-> last resort'
return open_paren + \
'|'.join(regex_opt_inner(list(group[1]), '')
for group in groupby(strings, lambda s: s[0] == first[0])) \
+ close_paren
def regex_opt(strings, prefix='', suffix=''):
"""Return a compiled regex that matches any string in the given list.
The strings to match must be literal strings, not regexes. They will be
regex-escaped.
*prefix* and *suffix* are pre- and appended to the final regex.
"""
strings = sorted(strings)
return prefix + regex_opt_inner(strings, '(') + suffix