A PyQT GUI application for converting InfoLease report outputs into Excel files. Handles parsing and summarizing. Learns where files are meant to be store and compiles monthly and yearly summaries.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
InfoLeaseExtract/venv/Lib/site-packages/pandas/_libs/tslib.pyx

779 lines
25 KiB

import warnings
import cython
from cpython.datetime cimport (
PyDate_Check,
PyDateTime_Check,
PyDateTime_IMPORT,
datetime,
tzinfo,
)
# import datetime C API
PyDateTime_IMPORT
cimport numpy as cnp
from numpy cimport (
float64_t,
int64_t,
ndarray,
)
import numpy as np
cnp.import_array()
import pytz
from pandas._libs.tslibs.np_datetime cimport (
_string_to_dts,
check_dts_bounds,
dt64_to_dtstruct,
dtstruct_to_dt64,
get_datetime64_value,
npy_datetimestruct,
pydate_to_dt64,
pydatetime_to_dt64,
)
from pandas._libs.util cimport (
is_datetime64_object,
is_float_object,
is_integer_object,
)
from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
from pandas._libs.tslibs.parsing import parse_datetime_string
from pandas._libs.tslibs.conversion cimport (
_TSObject,
cast_from_unit,
convert_datetime_to_tsobject,
get_datetime64_nanos,
precision_from_unit,
)
from pandas._libs.tslibs.nattype cimport (
NPY_NAT,
c_NaT as NaT,
c_nat_strings as nat_strings,
)
from pandas._libs.tslibs.timestamps cimport _Timestamp
from pandas._libs.tslibs.timestamps import Timestamp
# Note: this is the only non-tslibs intra-pandas dependency here
from pandas._libs.missing cimport checknull_with_nat_and_na
from pandas._libs.tslibs.tzconversion cimport tz_localize_to_utc_single
def _test_parse_iso8601(ts: str):
"""
TESTING ONLY: Parse string into Timestamp using iso8601 parser. Used
only for testing, actual construction uses `convert_str_to_tsobject`
"""
cdef:
_TSObject obj
int out_local = 0, out_tzoffset = 0
obj = _TSObject()
if ts == 'now':
return Timestamp.utcnow()
elif ts == 'today':
return Timestamp.now().normalize()
_string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset, True)
obj.value = dtstruct_to_dt64(&obj.dts)
check_dts_bounds(&obj.dts)
if out_local == 1:
obj.tzinfo = pytz.FixedOffset(out_tzoffset)
obj.value = tz_localize_to_utc_single(obj.value, obj.tzinfo)
return Timestamp(obj.value, tz=obj.tzinfo)
else:
return Timestamp(obj.value)
@cython.wraparound(False)
@cython.boundscheck(False)
def format_array_from_datetime(
ndarray[int64_t] values,
tzinfo tz=None,
str format=None,
object na_rep=None
) -> np.ndarray:
"""
return a np object array of the string formatted values
Parameters
----------
values : a 1-d i8 array
tz : tzinfo or None, default None
format : str or None, default None
a strftime capable string
na_rep : optional, default is None
a nat format
Returns
-------
np.ndarray[object]
"""
cdef:
int64_t val, ns, N = len(values)
ndarray[int64_t] consider_values
bint show_ms = False, show_us = False, show_ns = False
bint basic_format = False
ndarray[object] result = np.empty(N, dtype=object)
object ts, res
npy_datetimestruct dts
if na_rep is None:
na_rep = 'NaT'
# if we don't have a format nor tz, then choose
# a format based on precision
basic_format = format is None and tz is None
if basic_format:
consider_values = values[values != NPY_NAT]
show_ns = (consider_values % 1000).any()
if not show_ns:
consider_values //= 1000
show_us = (consider_values % 1000).any()
if not show_ms:
consider_values //= 1000
show_ms = (consider_values % 1000).any()
for i in range(N):
val = values[i]
if val == NPY_NAT:
result[i] = na_rep
elif basic_format:
dt64_to_dtstruct(val, &dts)
res = (f'{dts.year}-{dts.month:02d}-{dts.day:02d} '
f'{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}')
if show_ns:
ns = dts.ps // 1000
res += f'.{ns + dts.us * 1000:09d}'
elif show_us:
res += f'.{dts.us:06d}'
elif show_ms:
res += f'.{dts.us // 1000:03d}'
result[i] = res
else:
ts = Timestamp(val, tz=tz)
if format is None:
result[i] = str(ts)
else:
# invalid format string
# requires dates > 1900
try:
result[i] = ts.strftime(format)
except ValueError:
result[i] = str(ts)
return result
def array_with_unit_to_datetime(
ndarray values,
str unit,
str errors="coerce"
):
"""
Convert the ndarray to datetime according to the time unit.
This function converts an array of objects into a numpy array of
datetime64[ns]. It returns the converted array
and also returns the timezone offset
if errors:
- raise: return converted values or raise OutOfBoundsDatetime
if out of range on the conversion or
ValueError for other conversions (e.g. a string)
- ignore: return non-convertible values as the same unit
- coerce: NaT for non-convertibles
Parameters
----------
values : ndarray
Date-like objects to convert.
unit : str
Time unit to use during conversion.
errors : str, default 'raise'
Error behavior when parsing.
Returns
-------
result : ndarray of m8 values
tz : parsed timezone offset or None
"""
cdef:
Py_ssize_t i, j, n=len(values)
int64_t m
int prec = 0
ndarray[float64_t] fvalues
bint is_ignore = errors=='ignore'
bint is_coerce = errors=='coerce'
bint is_raise = errors=='raise'
bint need_to_iterate = True
ndarray[int64_t] iresult
ndarray[object] oresult
ndarray mask
object tz = None
assert is_ignore or is_coerce or is_raise
if unit == "ns":
if issubclass(values.dtype.type, (np.integer, np.float_)):
result = values.astype("M8[ns]", copy=False)
else:
result, tz = array_to_datetime(
values.astype(object, copy=False),
errors=errors,
)
return result, tz
m, p = precision_from_unit(unit)
if is_raise:
# try a quick conversion to i8/f8
# if we have nulls that are not type-compat
# then need to iterate
if values.dtype.kind in ["i", "f", "u"]:
iresult = values.astype("i8", copy=False)
# fill missing values by comparing to NPY_NAT
mask = iresult == NPY_NAT
iresult[mask] = 0
fvalues = iresult.astype("f8") * m
need_to_iterate = False
if not need_to_iterate:
# check the bounds
if (fvalues < Timestamp.min.value).any() or (
(fvalues > Timestamp.max.value).any()
):
raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'")
if values.dtype.kind in ["i", "u"]:
result = (iresult * m).astype("M8[ns]")
elif values.dtype.kind == "f":
fresult = (values * m).astype("f8")
fresult[mask] = 0
if prec:
fresult = round(fresult, prec)
result = fresult.astype("M8[ns]", copy=False)
iresult = result.view("i8")
iresult[mask] = NPY_NAT
return result, tz
result = np.empty(n, dtype='M8[ns]')
iresult = result.view('i8')
try:
for i in range(n):
val = values[i]
if checknull_with_nat_and_na(val):
iresult[i] = NPY_NAT
elif is_integer_object(val) or is_float_object(val):
if val != val or val == NPY_NAT:
iresult[i] = NPY_NAT
else:
try:
iresult[i] = cast_from_unit(val, unit)
except OverflowError:
if is_raise:
raise OutOfBoundsDatetime(
f"cannot convert input {val} with the unit '{unit}'"
)
elif is_ignore:
raise AssertionError
iresult[i] = NPY_NAT
elif isinstance(val, str):
if len(val) == 0 or val in nat_strings:
iresult[i] = NPY_NAT
else:
try:
iresult[i] = cast_from_unit(float(val), unit)
except ValueError:
if is_raise:
raise ValueError(
f"non convertible value {val} with the unit '{unit}'"
)
elif is_ignore:
raise AssertionError
iresult[i] = NPY_NAT
except OverflowError:
if is_raise:
raise OutOfBoundsDatetime(
f"cannot convert input {val} with the unit '{unit}'"
)
elif is_ignore:
raise AssertionError
iresult[i] = NPY_NAT
else:
if is_raise:
raise ValueError(
f"unit='{unit}' not valid with non-numerical val='{val}'"
)
if is_ignore:
raise AssertionError
iresult[i] = NPY_NAT
return result, tz
except AssertionError:
pass
# we have hit an exception
# and are in ignore mode
# redo as object
oresult = np.empty(n, dtype=object)
for i in range(n):
val = values[i]
if checknull_with_nat_and_na(val):
oresult[i] = <object>NaT
elif is_integer_object(val) or is_float_object(val):
if val != val or val == NPY_NAT:
oresult[i] = <object>NaT
else:
try:
oresult[i] = Timestamp(cast_from_unit(val, unit))
except OverflowError:
oresult[i] = val
elif isinstance(val, str):
if len(val) == 0 or val in nat_strings:
oresult[i] = <object>NaT
else:
oresult[i] = val
return oresult, tz
@cython.wraparound(False)
@cython.boundscheck(False)
cpdef array_to_datetime(
ndarray[object] values,
str errors='raise',
bint dayfirst=False,
bint yearfirst=False,
bint utc=False,
bint require_iso8601=False,
bint allow_mixed=False,
):
"""
Converts a 1D array of date-like values to a numpy array of either:
1) datetime64[ns] data
2) datetime.datetime objects, if OutOfBoundsDatetime or TypeError
is encountered
Also returns a pytz.FixedOffset if an array of strings with the same
timezone offset is passed and utc=True is not passed. Otherwise, None
is returned
Handles datetime.date, datetime.datetime, np.datetime64 objects, numeric,
strings
Parameters
----------
values : ndarray of object
date-like objects to convert
errors : str, default 'raise'
error behavior when parsing
dayfirst : bool, default False
dayfirst parsing behavior when encountering datetime strings
yearfirst : bool, default False
yearfirst parsing behavior when encountering datetime strings
utc : bool, default False
indicator whether the dates should be UTC
require_iso8601 : bool, default False
indicator whether the datetime string should be iso8601
allow_mixed : bool, default False
Whether to allow mixed datetimes and integers.
Returns
-------
np.ndarray
May be datetime64[ns] or object dtype
tzinfo or None
"""
cdef:
Py_ssize_t i, n = len(values)
object val, py_dt, tz, tz_out = None
ndarray[int64_t] iresult
ndarray[object] oresult
npy_datetimestruct dts
bint utc_convert = bool(utc)
bint seen_integer = False
bint seen_string = False
bint seen_datetime = False
bint seen_datetime_offset = False
bint is_raise = errors=='raise'
bint is_ignore = errors=='ignore'
bint is_coerce = errors=='coerce'
bint is_same_offsets
_TSObject _ts
int64_t value
int out_local = 0, out_tzoffset = 0
float offset_seconds, tz_offset
set out_tzoffset_vals = set()
bint string_to_dts_failed
# specify error conditions
assert is_raise or is_ignore or is_coerce
result = np.empty(n, dtype='M8[ns]')
iresult = result.view('i8')
try:
for i in range(n):
val = values[i]
try:
if checknull_with_nat_and_na(val):
iresult[i] = NPY_NAT
elif PyDateTime_Check(val):
seen_datetime = True
if val.tzinfo is not None:
if utc_convert:
_ts = convert_datetime_to_tsobject(val, None)
iresult[i] = _ts.value
else:
raise ValueError('Tz-aware datetime.datetime '
'cannot be converted to '
'datetime64 unless utc=True')
elif isinstance(val, _Timestamp):
iresult[i] = val.value
else:
iresult[i] = pydatetime_to_dt64(val, &dts)
check_dts_bounds(&dts)
elif PyDate_Check(val):
seen_datetime = True
iresult[i] = pydate_to_dt64(val, &dts)
check_dts_bounds(&dts)
elif is_datetime64_object(val):
seen_datetime = True
iresult[i] = get_datetime64_nanos(val)
elif is_integer_object(val) or is_float_object(val):
# these must be ns unit by-definition
seen_integer = True
if val != val or val == NPY_NAT:
iresult[i] = NPY_NAT
elif is_raise or is_ignore:
iresult[i] = val
else:
# coerce
# we now need to parse this as if unit='ns'
# we can ONLY accept integers at this point
# if we have previously (or in future accept
# datetimes/strings, then we must coerce)
try:
iresult[i] = cast_from_unit(val, 'ns')
except OverflowError:
iresult[i] = NPY_NAT
elif isinstance(val, str):
# string
seen_string = True
if len(val) == 0 or val in nat_strings:
iresult[i] = NPY_NAT
continue
string_to_dts_failed = _string_to_dts(
val, &dts, &out_local,
&out_tzoffset, False
)
if string_to_dts_failed:
# An error at this point is a _parsing_ error
# specifically _not_ OutOfBoundsDatetime
if _parse_today_now(val, &iresult[i], utc):
continue
elif require_iso8601:
# if requiring iso8601 strings, skip trying
# other formats
if is_coerce:
iresult[i] = NPY_NAT
continue
elif is_raise:
raise ValueError(
f"time data {val} doesn't match format specified"
)
return values, tz_out
try:
py_dt = parse_datetime_string(val,
dayfirst=dayfirst,
yearfirst=yearfirst)
# If the dateutil parser returned tzinfo, capture it
# to check if all arguments have the same tzinfo
tz = py_dt.utcoffset()
except (ValueError, OverflowError):
if is_coerce:
iresult[i] = NPY_NAT
continue
raise TypeError("invalid string coercion to datetime")
if tz is not None:
seen_datetime_offset = True
# dateutil timezone objects cannot be hashed, so
# store the UTC offsets in seconds instead
out_tzoffset_vals.add(tz.total_seconds())
else:
# Add a marker for naive string, to track if we are
# parsing mixed naive and aware strings
out_tzoffset_vals.add('naive')
_ts = convert_datetime_to_tsobject(py_dt, None)
iresult[i] = _ts.value
if not string_to_dts_failed:
# No error reported by string_to_dts, pick back up
# where we left off
value = dtstruct_to_dt64(&dts)
if out_local == 1:
seen_datetime_offset = True
# Store the out_tzoffset in seconds
# since we store the total_seconds of
# dateutil.tz.tzoffset objects
out_tzoffset_vals.add(out_tzoffset * 60.)
tz = pytz.FixedOffset(out_tzoffset)
value = tz_localize_to_utc_single(value, tz)
out_local = 0
out_tzoffset = 0
else:
# Add a marker for naive string, to track if we are
# parsing mixed naive and aware strings
out_tzoffset_vals.add('naive')
iresult[i] = value
check_dts_bounds(&dts)
else:
if is_coerce:
iresult[i] = NPY_NAT
else:
raise TypeError(f"{type(val)} is not convertible to datetime")
except OutOfBoundsDatetime:
if is_coerce:
iresult[i] = NPY_NAT
continue
elif require_iso8601 and isinstance(val, str):
# GH#19382 for just-barely-OutOfBounds falling back to
# dateutil parser will return incorrect result because
# it will ignore nanoseconds
if is_raise:
# Still raise OutOfBoundsDatetime,
# as error message is informative.
raise
assert is_ignore
return values, tz_out
raise
except OutOfBoundsDatetime:
if is_raise:
raise
return ignore_errors_out_of_bounds_fallback(values), tz_out
except TypeError:
return _array_to_datetime_object(values, errors, dayfirst, yearfirst)
if seen_datetime and seen_integer:
# we have mixed datetimes & integers
if is_coerce:
# coerce all of the integers/floats to NaT, preserve
# the datetimes and other convertibles
for i in range(n):
val = values[i]
if is_integer_object(val) or is_float_object(val):
result[i] = NPY_NAT
elif allow_mixed:
pass
elif is_raise:
raise ValueError("mixed datetimes and integers in passed array")
else:
return _array_to_datetime_object(values, errors, dayfirst, yearfirst)
if seen_datetime_offset and not utc_convert:
# GH#17697
# 1) If all the offsets are equal, return one offset for
# the parsed dates to (maybe) pass to DatetimeIndex
# 2) If the offsets are different, then force the parsing down the
# object path where an array of datetimes
# (with individual dateutil.tzoffsets) are returned
is_same_offsets = len(out_tzoffset_vals) == 1
if not is_same_offsets:
return _array_to_datetime_object(values, errors, dayfirst, yearfirst)
else:
tz_offset = out_tzoffset_vals.pop()
tz_out = pytz.FixedOffset(tz_offset / 60.)
return result, tz_out
cdef ndarray[object] ignore_errors_out_of_bounds_fallback(ndarray[object] values):
"""
Fallback for array_to_datetime if an OutOfBoundsDatetime is raised
and errors == "ignore"
Parameters
----------
values : ndarray[object]
Returns
-------
ndarray[object]
"""
cdef:
Py_ssize_t i, n = len(values)
object val
oresult = np.empty(n, dtype=object)
for i in range(n):
val = values[i]
# set as nan except if its a NaT
if checknull_with_nat_and_na(val):
if isinstance(val, float):
oresult[i] = np.nan
else:
oresult[i] = NaT
elif is_datetime64_object(val):
if get_datetime64_value(val) == NPY_NAT:
oresult[i] = NaT
else:
oresult[i] = val.item()
else:
oresult[i] = val
return oresult
@cython.wraparound(False)
@cython.boundscheck(False)
cdef _array_to_datetime_object(
ndarray[object] values,
str errors,
bint dayfirst=False,
bint yearfirst=False,
):
"""
Fall back function for array_to_datetime
Attempts to parse datetime strings with dateutil to return an array
of datetime objects
Parameters
----------
values : ndarray[object]
date-like objects to convert
errors : str
error behavior when parsing
dayfirst : bool, default False
dayfirst parsing behavior when encountering datetime strings
yearfirst : bool, default False
yearfirst parsing behavior when encountering datetime strings
Returns
-------
np.ndarray[object]
Literal[None]
"""
cdef:
Py_ssize_t i, n = len(values)
object val
bint is_ignore = errors == 'ignore'
bint is_coerce = errors == 'coerce'
bint is_raise = errors == 'raise'
ndarray[object] oresult
npy_datetimestruct dts
assert is_raise or is_ignore or is_coerce
oresult = np.empty(n, dtype=object)
# We return an object array and only attempt to parse:
# 1) NaT or NaT-like values
# 2) datetime strings, which we return as datetime.datetime
for i in range(n):
val = values[i]
if checknull_with_nat_and_na(val) or PyDateTime_Check(val):
# GH 25978. No need to parse NaT-like or datetime-like vals
oresult[i] = val
elif isinstance(val, str):
if len(val) == 0 or val in nat_strings:
oresult[i] = 'NaT'
continue
try:
oresult[i] = parse_datetime_string(val, dayfirst=dayfirst,
yearfirst=yearfirst)
pydatetime_to_dt64(oresult[i], &dts)
check_dts_bounds(&dts)
except (ValueError, OverflowError):
if is_coerce:
oresult[i] = <object>NaT
continue
if is_raise:
raise
return values, None
else:
if is_raise:
raise
return values, None
return oresult, None
cdef inline bint _parse_today_now(str val, int64_t* iresult, bint utc):
# We delay this check for as long as possible
# because it catches relatively rare cases
if val == "now":
iresult[0] = Timestamp.utcnow().value
if not utc:
# GH#18705 make sure to_datetime("now") matches Timestamp("now")
warnings.warn(
"The parsing of 'now' in pd.to_datetime without `utc=True` is "
"deprecated. In a future version, this will match Timestamp('now') "
"and Timestamp.now()",
FutureWarning,
stacklevel=1,
)
return True
elif val == "today":
iresult[0] = Timestamp.today().value
return True
return False