A PyQT GUI application for converting InfoLease report outputs into Excel files. Handles parsing and summarizing. Learns where files are meant to be store and compiles monthly and yearly summaries.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
InfoLeaseExtract/venv/Lib/site-packages/pip/_internal/utils/encoding.py

36 lines
1.1 KiB

import codecs
import locale
import re
import sys
from typing import List, Tuple
BOMS: List[Tuple[bytes, str]] = [
(codecs.BOM_UTF8, "utf-8"),
(codecs.BOM_UTF16, "utf-16"),
(codecs.BOM_UTF16_BE, "utf-16-be"),
(codecs.BOM_UTF16_LE, "utf-16-le"),
(codecs.BOM_UTF32, "utf-32"),
(codecs.BOM_UTF32_BE, "utf-32-be"),
(codecs.BOM_UTF32_LE, "utf-32-le"),
]
ENCODING_RE = re.compile(br"coding[:=]\s*([-\w.]+)")
def auto_decode(data: bytes) -> str:
"""Check a bytes string for a BOM to correctly detect the encoding
Fallback to locale.getpreferredencoding(False) like open() on Python3"""
for bom, encoding in BOMS:
if data.startswith(bom):
return data[len(bom) :].decode(encoding)
# Lets check the first two lines as in PEP263
for line in data.split(b"\n")[:2]:
if line[0:1] == b"#" and ENCODING_RE.search(line):
result = ENCODING_RE.search(line)
assert result is not None
encoding = result.groups()[0].decode("ascii")
return data.decode(encoding)
return data.decode(
locale.getpreferredencoding(False) or sys.getdefaultencoding(),
)