A PyQT GUI application for converting InfoLease report outputs into Excel files. Handles parsing and summarizing. Learns where files are meant to be store and compiles monthly and yearly summaries.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
InfoLeaseExtract/venv/Lib/site-packages/pip/_vendor/pyparsing/common.py

424 lines
13 KiB

# common.py
from .core import *
from .helpers import delimited_list, any_open_tag, any_close_tag
from datetime import datetime
# some other useful expressions - using lower-case class name since we are really using this as a namespace
class pyparsing_common:
"""Here are some common low-level expressions that may be useful in
jump-starting parser development:
- numeric forms (:class:`integers<integer>`, :class:`reals<real>`,
:class:`scientific notation<sci_real>`)
- common :class:`programming identifiers<identifier>`
- network addresses (:class:`MAC<mac_address>`,
:class:`IPv4<ipv4_address>`, :class:`IPv6<ipv6_address>`)
- ISO8601 :class:`dates<iso8601_date>` and
:class:`datetime<iso8601_datetime>`
- :class:`UUID<uuid>`
- :class:`comma-separated list<comma_separated_list>`
- :class:`url`
Parse actions:
- :class:`convertToInteger`
- :class:`convertToFloat`
- :class:`convertToDate`
- :class:`convertToDatetime`
- :class:`stripHTMLTags`
- :class:`upcaseTokens`
- :class:`downcaseTokens`
Example::
pyparsing_common.number.runTests('''
# any int or real number, returned as the appropriate type
100
-100
+100
3.14159
6.02e23
1e-12
''')
pyparsing_common.fnumber.runTests('''
# any int or real number, returned as float
100
-100
+100
3.14159
6.02e23
1e-12
''')
pyparsing_common.hex_integer.runTests('''
# hex numbers
100
FF
''')
pyparsing_common.fraction.runTests('''
# fractions
1/2
-3/4
''')
pyparsing_common.mixed_integer.runTests('''
# mixed fractions
1
1/2
-3/4
1-3/4
''')
import uuid
pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
pyparsing_common.uuid.runTests('''
# uuid
12345678-1234-5678-1234-567812345678
''')
prints::
# any int or real number, returned as the appropriate type
100
[100]
-100
[-100]
+100
[100]
3.14159
[3.14159]
6.02e23
[6.02e+23]
1e-12
[1e-12]
# any int or real number, returned as float
100
[100.0]
-100
[-100.0]
+100
[100.0]
3.14159
[3.14159]
6.02e23
[6.02e+23]
1e-12
[1e-12]
# hex numbers
100
[256]
FF
[255]
# fractions
1/2
[0.5]
-3/4
[-0.75]
# mixed fractions
1
[1]
1/2
[0.5]
-3/4
[-0.75]
1-3/4
[1.75]
# uuid
12345678-1234-5678-1234-567812345678
[UUID('12345678-1234-5678-1234-567812345678')]
"""
convert_to_integer = token_map(int)
"""
Parse action for converting parsed integers to Python int
"""
convert_to_float = token_map(float)
"""
Parse action for converting parsed numbers to Python float
"""
integer = Word(nums).set_name("integer").set_parse_action(convert_to_integer)
"""expression that parses an unsigned integer, returns an int"""
hex_integer = (
Word(hexnums).set_name("hex integer").set_parse_action(token_map(int, 16))
)
"""expression that parses a hexadecimal integer, returns an int"""
signed_integer = (
Regex(r"[+-]?\d+")
.set_name("signed integer")
.set_parse_action(convert_to_integer)
)
"""expression that parses an integer with optional leading sign, returns an int"""
fraction = (
signed_integer().set_parse_action(convert_to_float)
+ "/"
+ signed_integer().set_parse_action(convert_to_float)
).set_name("fraction")
"""fractional expression of an integer divided by an integer, returns a float"""
fraction.add_parse_action(lambda tt: tt[0] / tt[-1])
mixed_integer = (
fraction | signed_integer + Opt(Opt("-").suppress() + fraction)
).set_name("fraction or mixed integer-fraction")
"""mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
mixed_integer.add_parse_action(sum)
real = (
Regex(r"[+-]?(?:\d+\.\d*|\.\d+)")
.set_name("real number")
.set_parse_action(convert_to_float)
)
"""expression that parses a floating point number and returns a float"""
sci_real = (
Regex(r"[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)")
.set_name("real number with scientific notation")
.set_parse_action(convert_to_float)
)
"""expression that parses a floating point number with optional
scientific notation and returns a float"""
# streamlining this expression makes the docs nicer-looking
number = (sci_real | real | signed_integer).setName("number").streamline()
"""any numeric expression, returns the corresponding Python type"""
fnumber = (
Regex(r"[+-]?\d+\.?\d*([eE][+-]?\d+)?")
.set_name("fnumber")
.set_parse_action(convert_to_float)
)
"""any int or real number, returned as float"""
identifier = Word(identchars, identbodychars).set_name("identifier")
"""typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
ipv4_address = Regex(
r"(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}"
).set_name("IPv4 address")
"IPv4 address (``0.0.0.0 - 255.255.255.255``)"
_ipv6_part = Regex(r"[0-9a-fA-F]{1,4}").set_name("hex_integer")
_full_ipv6_address = (_ipv6_part + (":" + _ipv6_part) * 7).set_name(
"full IPv6 address"
)
_short_ipv6_address = (
Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6))
+ "::"
+ Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6))
).set_name("short IPv6 address")
_short_ipv6_address.add_condition(
lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8
)
_mixed_ipv6_address = ("::ffff:" + ipv4_address).set_name("mixed IPv6 address")
ipv6_address = Combine(
(_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).set_name(
"IPv6 address"
)
).set_name("IPv6 address")
"IPv6 address (long, short, or mixed form)"
mac_address = Regex(
r"[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}"
).set_name("MAC address")
"MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
@staticmethod
def convert_to_date(fmt: str = "%Y-%m-%d"):
"""
Helper to create a parse action for converting parsed date string to Python datetime.date
Params -
- fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``)
Example::
date_expr = pyparsing_common.iso8601_date.copy()
date_expr.setParseAction(pyparsing_common.convertToDate())
print(date_expr.parseString("1999-12-31"))
prints::
[datetime.date(1999, 12, 31)]
"""
def cvt_fn(ss, ll, tt):
try:
return datetime.strptime(tt[0], fmt).date()
except ValueError as ve:
raise ParseException(ss, ll, str(ve))
return cvt_fn
@staticmethod
def convert_to_datetime(fmt: str = "%Y-%m-%dT%H:%M:%S.%f"):
"""Helper to create a parse action for converting parsed
datetime string to Python datetime.datetime
Params -
- fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``)
Example::
dt_expr = pyparsing_common.iso8601_datetime.copy()
dt_expr.setParseAction(pyparsing_common.convertToDatetime())
print(dt_expr.parseString("1999-12-31T23:59:59.999"))
prints::
[datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
"""
def cvt_fn(s, l, t):
try:
return datetime.strptime(t[0], fmt)
except ValueError as ve:
raise ParseException(s, l, str(ve))
return cvt_fn
iso8601_date = Regex(
r"(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?"
).set_name("ISO8601 date")
"ISO8601 date (``yyyy-mm-dd``)"
iso8601_datetime = Regex(
r"(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?"
).set_name("ISO8601 datetime")
"ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``"
uuid = Regex(r"[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}").set_name("UUID")
"UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)"
_html_stripper = any_open_tag.suppress() | any_close_tag.suppress()
@staticmethod
def strip_html_tags(s: str, l: int, tokens: ParseResults):
"""Parse action to remove HTML tags from web page HTML source
Example::
# strip HTML links from normal text
text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
td, td_end = makeHTMLTags("TD")
table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end
print(table_text.parseString(text).body)
Prints::
More info at the pyparsing wiki page
"""
return pyparsing_common._html_stripper.transform_string(tokens[0])
_commasepitem = (
Combine(
OneOrMore(
~Literal(",")
+ ~LineEnd()
+ Word(printables, exclude_chars=",")
+ Opt(White(" \t") + ~FollowedBy(LineEnd() | ","))
)
)
.streamline()
.set_name("commaItem")
)
comma_separated_list = delimited_list(
Opt(quoted_string.copy() | _commasepitem, default="")
).set_name("comma separated list")
"""Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
upcase_tokens = staticmethod(token_map(lambda t: t.upper()))
"""Parse action to convert tokens to upper case."""
downcase_tokens = staticmethod(token_map(lambda t: t.lower()))
"""Parse action to convert tokens to lower case."""
# fmt: off
url = Regex(
# https://mathiasbynens.be/demo/url-regex
# https://gist.github.com/dperini/729294
r"^" +
# protocol identifier (optional)
# short syntax // still required
r"(?:(?:(?P<scheme>https?|ftp):)?\/\/)" +
# user:pass BasicAuth (optional)
r"(?:(?P<auth>\S+(?::\S*)?)@)?" +
r"(?P<host>" +
# IP address exclusion
# private & local networks
r"(?!(?:10|127)(?:\.\d{1,3}){3})" +
r"(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})" +
r"(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})" +
# IP address dotted notation octets
# excludes loopback network 0.0.0.0
# excludes reserved space >= 224.0.0.0
# excludes network & broadcast addresses
# (first & last IP address of each class)
r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" +
r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}" +
r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" +
r"|" +
# host & domain names, may end with dot
# can be replaced by a shortest alternative
# (?![-_])(?:[-\w\u00a1-\uffff]{0,63}[^-_]\.)+
r"(?:" +
r"(?:" +
r"[a-z0-9\u00a1-\uffff]" +
r"[a-z0-9\u00a1-\uffff_-]{0,62}" +
r")?" +
r"[a-z0-9\u00a1-\uffff]\." +
r")+" +
# TLD identifier name, may end with dot
r"(?:[a-z\u00a1-\uffff]{2,}\.?)" +
r")" +
# port number (optional)
r"(:(?P<port>\d{2,5}))?" +
# resource path (optional)
r"(?P<path>\/[^?# ]*)?" +
# query string (optional)
r"(\?(?P<query>[^#]*))?" +
# fragment (optional)
r"(#(?P<fragment>\S*))?" +
r"$"
).set_name("url")
# fmt: on
# pre-PEP8 compatibility names
convertToInteger = convert_to_integer
convertToFloat = convert_to_float
convertToDate = convert_to_date
convertToDatetime = convert_to_datetime
stripHTMLTags = strip_html_tags
upcaseTokens = upcase_tokens
downcaseTokens = downcase_tokens
_builtin_exprs = [
v for v in vars(pyparsing_common).values() if isinstance(v, ParserElement)
]