A PyQT GUI application for converting InfoLease report outputs into Excel files. Handles parsing and summarizing. Learns where files are meant to be store and compiles monthly and yearly summaries.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
InfoLeaseExtract/venv/Lib/site-packages/pandas/_libs/tslibs/timedeltas.pyx

1642 lines
47 KiB

import collections
import warnings
import cython
from cpython.object cimport (
Py_EQ,
Py_NE,
PyObject_RichCompare,
)
import numpy as np
cimport numpy as cnp
from numpy cimport (
int64_t,
ndarray,
)
cnp.import_array()
from cpython.datetime cimport (
PyDateTime_Check,
PyDateTime_IMPORT,
PyDelta_Check,
timedelta,
)
PyDateTime_IMPORT
cimport pandas._libs.tslibs.util as util
from pandas._libs.tslibs.base cimport ABCTimestamp
from pandas._libs.tslibs.conversion cimport (
cast_from_unit,
precision_from_unit,
)
from pandas._libs.tslibs.nattype cimport (
NPY_NAT,
c_NaT as NaT,
c_nat_strings as nat_strings,
checknull_with_nat,
)
from pandas._libs.tslibs.np_datetime cimport (
NPY_DATETIMEUNIT,
cmp_scalar,
get_datetime64_unit,
get_timedelta64_value,
pandas_timedeltastruct,
td64_to_tdstruct,
)
from pandas._libs.tslibs.offsets cimport is_tick_object
from pandas._libs.tslibs.util cimport (
is_array,
is_datetime64_object,
is_float_object,
is_integer_object,
is_timedelta64_object,
)
from pandas._libs.tslibs.fields import (
RoundTo,
round_nsint64,
)
# ----------------------------------------------------------------------
# Constants
# components named tuple
Components = collections.namedtuple(
"Components",
[
"days",
"hours",
"minutes",
"seconds",
"milliseconds",
"microseconds",
"nanoseconds",
],
)
cdef dict timedelta_abbrevs = {
"Y": "Y",
"y": "Y",
"M": "M",
"W": "W",
"w": "W",
"D": "D",
"d": "D",
"days": "D",
"day": "D",
"hours": "h",
"hour": "h",
"hr": "h",
"h": "h",
"m": "m",
"minute": "m",
"min": "m",
"minutes": "m",
"t": "m",
"s": "s",
"seconds": "s",
"sec": "s",
"second": "s",
"ms": "ms",
"milliseconds": "ms",
"millisecond": "ms",
"milli": "ms",
"millis": "ms",
"l": "ms",
"us": "us",
"microseconds": "us",
"microsecond": "us",
"µs": "us",
"micro": "us",
"micros": "us",
"u": "us",
"ns": "ns",
"nanoseconds": "ns",
"nano": "ns",
"nanos": "ns",
"nanosecond": "ns",
"n": "ns",
}
_no_input = object()
# ----------------------------------------------------------------------
# API
@cython.boundscheck(False)
@cython.wraparound(False)
def ints_to_pytimedelta(const int64_t[:] arr, box=False):
"""
convert an i8 repr to an ndarray of timedelta or Timedelta (if box ==
True)
Parameters
----------
arr : ndarray[int64_t]
box : bool, default False
Returns
-------
result : ndarray[object]
array of Timedelta or timedeltas objects
"""
cdef:
Py_ssize_t i, n = len(arr)
int64_t value
object[:] result = np.empty(n, dtype=object)
for i in range(n):
value = arr[i]
if value == NPY_NAT:
result[i] = <object>NaT
else:
if box:
result[i] = Timedelta(value)
else:
result[i] = timedelta(microseconds=int(value) / 1000)
return result.base # .base to access underlying np.ndarray
# ----------------------------------------------------------------------
cpdef int64_t delta_to_nanoseconds(delta) except? -1:
if is_tick_object(delta):
return delta.nanos
if isinstance(delta, _Timedelta):
delta = delta.value
if is_timedelta64_object(delta):
return get_timedelta64_value(ensure_td64ns(delta))
if is_integer_object(delta):
return delta
if PyDelta_Check(delta):
try:
return (
delta.days * 24 * 3600 * 1_000_000
+ delta.seconds * 1_000_000
+ delta.microseconds
) * 1000
except OverflowError as err:
from pandas._libs.tslibs.conversion import OutOfBoundsTimedelta
raise OutOfBoundsTimedelta(*err.args) from err
raise TypeError(type(delta))
cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit):
if unit == NPY_DATETIMEUNIT.NPY_FR_ns or unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
# generic -> default to nanoseconds
return "ns"
elif unit == NPY_DATETIMEUNIT.NPY_FR_us:
return "us"
elif unit == NPY_DATETIMEUNIT.NPY_FR_ms:
return "ms"
elif unit == NPY_DATETIMEUNIT.NPY_FR_s:
return "s"
elif unit == NPY_DATETIMEUNIT.NPY_FR_m:
return "m"
elif unit == NPY_DATETIMEUNIT.NPY_FR_h:
return "h"
elif unit == NPY_DATETIMEUNIT.NPY_FR_D:
return "D"
elif unit == NPY_DATETIMEUNIT.NPY_FR_W:
return "W"
elif unit == NPY_DATETIMEUNIT.NPY_FR_M:
return "M"
elif unit == NPY_DATETIMEUNIT.NPY_FR_Y:
return "Y"
else:
raise NotImplementedError(unit)
@cython.overflowcheck(True)
cdef object ensure_td64ns(object ts):
"""
Overflow-safe implementation of td64.astype("m8[ns]")
Parameters
----------
ts : np.timedelta64
Returns
-------
np.timedelta64[ns]
"""
cdef:
NPY_DATETIMEUNIT td64_unit
int64_t td64_value, mult
str unitstr
td64_unit = get_datetime64_unit(ts)
if (
td64_unit != NPY_DATETIMEUNIT.NPY_FR_ns
and td64_unit != NPY_DATETIMEUNIT.NPY_FR_GENERIC
):
unitstr = npy_unit_to_abbrev(td64_unit)
td64_value = get_timedelta64_value(ts)
mult = precision_from_unit(unitstr)[0]
try:
# NB: cython#1381 this cannot be *=
td64_value = td64_value * mult
except OverflowError as err:
from pandas._libs.tslibs.conversion import OutOfBoundsTimedelta
raise OutOfBoundsTimedelta(ts) from err
return np.timedelta64(td64_value, "ns")
return ts
cdef convert_to_timedelta64(object ts, str unit):
"""
Convert an incoming object to a timedelta64 if possible.
Before calling, unit must be standardized to avoid repeated unit conversion
Handle these types of objects:
- timedelta/Timedelta
- timedelta64
- an offset
- np.int64 (with unit providing a possible modifier)
- None/NaT
Return an ns based int64
"""
if checknull_with_nat(ts):
return np.timedelta64(NPY_NAT, "ns")
elif isinstance(ts, _Timedelta):
# already in the proper format
ts = np.timedelta64(ts.value, "ns")
elif is_timedelta64_object(ts):
ts = ensure_td64ns(ts)
elif is_integer_object(ts):
if ts == NPY_NAT:
return np.timedelta64(NPY_NAT, "ns")
else:
if unit in ["Y", "M", "W"]:
ts = np.timedelta64(ts, unit)
else:
ts = cast_from_unit(ts, unit)
ts = np.timedelta64(ts, "ns")
elif is_float_object(ts):
if unit in ["Y", "M", "W"]:
ts = np.timedelta64(int(ts), unit)
else:
ts = cast_from_unit(ts, unit)
ts = np.timedelta64(ts, "ns")
elif isinstance(ts, str):
if (len(ts) > 0 and ts[0] == "P") or (len(ts) > 1 and ts[:2] == "-P"):
ts = parse_iso_format_string(ts)
else:
ts = parse_timedelta_string(ts)
ts = np.timedelta64(ts, "ns")
elif is_tick_object(ts):
ts = np.timedelta64(ts.nanos, "ns")
if PyDelta_Check(ts):
ts = np.timedelta64(delta_to_nanoseconds(ts), "ns")
elif not is_timedelta64_object(ts):
raise ValueError(f"Invalid type for timedelta scalar: {type(ts)}")
return ts.astype("timedelta64[ns]")
@cython.boundscheck(False)
@cython.wraparound(False)
def array_to_timedelta64(
ndarray[object] values, str unit=None, str errors="raise"
) -> ndarray:
"""
Convert an ndarray to an array of timedeltas. If errors == 'coerce',
coerce non-convertible objects to NaT. Otherwise, raise.
Returns
-------
np.ndarray[timedelta64ns]
"""
cdef:
Py_ssize_t i, n
int64_t[:] iresult
if errors not in {'ignore', 'raise', 'coerce'}:
raise ValueError("errors must be one of {'ignore', 'raise', or 'coerce'}")
n = values.shape[0]
result = np.empty(n, dtype='m8[ns]')
iresult = result.view('i8')
if unit is not None:
for i in range(n):
if isinstance(values[i], str) and errors != "coerce":
raise ValueError(
"unit must not be specified if the input contains a str"
)
# Usually, we have all strings. If so, we hit the fast path.
# If this path fails, we try conversion a different way, and
# this is where all of the error handling will take place.
try:
for i in range(n):
if values[i] is NaT:
# we allow this check in the fast-path because NaT is a C-object
# so this is an inexpensive check
iresult[i] = NPY_NAT
else:
result[i] = parse_timedelta_string(values[i])
except (TypeError, ValueError):
parsed_unit = parse_timedelta_unit(unit or 'ns')
for i in range(n):
try:
result[i] = convert_to_timedelta64(values[i], parsed_unit)
except ValueError as err:
if errors == 'coerce':
result[i] = NPY_NAT
elif "unit abbreviation w/o a number" in str(err):
# re-raise with more pertinent message
msg = f"Could not convert '{values[i]}' to NumPy timedelta"
raise ValueError(msg) from err
else:
raise
return iresult.base # .base to access underlying np.ndarray
cdef inline int64_t parse_timedelta_string(str ts) except? -1:
"""
Parse a regular format timedelta string. Return an int64_t (in ns)
or raise a ValueError on an invalid parse.
"""
cdef:
unicode c
bint neg = 0, have_dot = 0, have_value = 0, have_hhmmss = 0
object current_unit = None
int64_t result = 0, m = 0, r
list number = [], frac = [], unit = []
# neg : tracks if we have a leading negative for the value
# have_dot : tracks if we are processing a dot (either post hhmmss or
# inside an expression)
# have_value : track if we have at least 1 leading unit
# have_hhmmss : tracks if we have a regular format hh:mm:ss
if len(ts) == 0 or ts in nat_strings:
return NPY_NAT
for c in ts:
# skip whitespace / commas
if c == ' ' or c == ',':
pass
# positive signs are ignored
elif c == '+':
pass
# neg
elif c == '-':
if neg or have_value or have_hhmmss:
raise ValueError("only leading negative signs are allowed")
neg = 1
# number (ascii codes)
elif ord(c) >= 48 and ord(c) <= 57:
if have_dot:
# we found a dot, but now its just a fraction
if len(unit):
number.append(c)
have_dot = 0
else:
frac.append(c)
elif not len(unit):
number.append(c)
else:
r = timedelta_from_spec(number, frac, unit)
unit, number, frac = [], [c], []
result += timedelta_as_neg(r, neg)
# hh:mm:ss.
elif c == ':':
# we flip this off if we have a leading value
if have_value:
neg = 0
# we are in the pattern hh:mm:ss pattern
if len(number):
if current_unit is None:
current_unit = 'h'
m = 1000000000 * 3600
elif current_unit == 'h':
current_unit = 'm'
m = 1000000000 * 60
elif current_unit == 'm':
current_unit = 's'
m = 1000000000
r = <int64_t>int(''.join(number)) * m
result += timedelta_as_neg(r, neg)
have_hhmmss = 1
else:
raise ValueError(f"expecting hh:mm:ss format, received: {ts}")
unit, number = [], []
# after the decimal point
elif c == '.':
if len(number) and current_unit is not None:
# by definition we had something like
# so we need to evaluate the final field from a
# hh:mm:ss (so current_unit is 'm')
if current_unit != 'm':
raise ValueError("expected hh:mm:ss format before .")
m = 1000000000
r = <int64_t>int(''.join(number)) * m
result += timedelta_as_neg(r, neg)
have_value = 1
unit, number, frac = [], [], []
have_dot = 1
# unit
else:
unit.append(c)
have_value = 1
have_dot = 0
# we had a dot, but we have a fractional
# value since we have an unit
if have_dot and len(unit):
r = timedelta_from_spec(number, frac, unit)
result += timedelta_as_neg(r, neg)
# we have a dot as part of a regular format
# e.g. hh:mm:ss.fffffff
elif have_dot:
if ((len(number) or len(frac)) and not len(unit)
and current_unit is None):
raise ValueError("no units specified")
if len(frac) > 0 and len(frac) <= 3:
m = 10**(3 -len(frac)) * 1000 * 1000
elif len(frac) > 3 and len(frac) <= 6:
m = 10**(6 -len(frac)) * 1000
elif len(frac) > 6 and len(frac) <= 9:
m = 10**(9 -len(frac))
else:
m = 1
frac = frac[:9]
r = <int64_t>int(''.join(frac)) * m
result += timedelta_as_neg(r, neg)
# we have a regular format
# we must have seconds at this point (hence the unit is still 'm')
elif current_unit is not None:
if current_unit != 'm':
raise ValueError("expected hh:mm:ss format")
m = 1000000000
r = <int64_t>int(''.join(number)) * m
result += timedelta_as_neg(r, neg)
# we have a last abbreviation
elif len(unit):
if len(number):
r = timedelta_from_spec(number, frac, unit)
result += timedelta_as_neg(r, neg)
else:
raise ValueError("unit abbreviation w/o a number")
# we only have symbols and no numbers
elif len(number) == 0:
raise ValueError("symbols w/o a number")
# treat as nanoseconds
# but only if we don't have anything else
else:
if have_value:
raise ValueError("have leftover units")
if len(number):
r = timedelta_from_spec(number, frac, 'ns')
result += timedelta_as_neg(r, neg)
return result
cdef inline int64_t timedelta_as_neg(int64_t value, bint neg):
"""
Parameters
----------
value : int64_t of the timedelta value
neg : bool if the a negative value
"""
if neg:
return -value
return value
cdef inline timedelta_from_spec(object number, object frac, object unit):
"""
Parameters
----------
number : a list of number digits
frac : a list of frac digits
unit : a list of unit characters
"""
cdef:
str n
try:
unit = ''.join(unit)
if unit in ["M", "Y", "y"]:
warnings.warn(
"Units 'M', 'Y' and 'y' do not represent unambiguous "
"timedelta values and will be removed in a future version.",
FutureWarning,
stacklevel=2,
)
if unit == 'M':
# To parse ISO 8601 string, 'M' should be treated as minute,
# not month
unit = 'm'
unit = parse_timedelta_unit(unit)
except KeyError:
raise ValueError(f"invalid abbreviation: {unit}")
n = ''.join(number) + '.' + ''.join(frac)
return cast_from_unit(float(n), unit)
cpdef inline str parse_timedelta_unit(str unit):
"""
Parameters
----------
unit : str or None
Returns
-------
str
Canonical unit string.
Raises
------
ValueError : on non-parseable input
"""
if unit is None:
return "ns"
elif unit == "M":
return unit
try:
return timedelta_abbrevs[unit.lower()]
except (KeyError, AttributeError):
raise ValueError(f"invalid unit abbreviation: {unit}")
# ----------------------------------------------------------------------
# Timedelta ops utilities
cdef bint _validate_ops_compat(other):
# return True if we are compat with operating
if checknull_with_nat(other):
return True
elif is_any_td_scalar(other):
return True
elif isinstance(other, str):
return True
return False
def _op_unary_method(func, name):
def f(self):
return Timedelta(func(self.value), unit='ns')
f.__name__ = name
return f
def _binary_op_method_timedeltalike(op, name):
# define a binary operation that only works if the other argument is
# timedelta like or an array of timedeltalike
def f(self, other):
if other is NaT:
return NaT
elif is_datetime64_object(other) or (
PyDateTime_Check(other) and not isinstance(other, ABCTimestamp)
):
# this case is for a datetime object that is specifically
# *not* a Timestamp, as the Timestamp case will be
# handled after `_validate_ops_compat` returns False below
from pandas._libs.tslibs.timestamps import Timestamp
return op(self, Timestamp(other))
# We are implicitly requiring the canonical behavior to be
# defined by Timestamp methods.
elif is_array(other):
# nd-array like
if other.dtype.kind in ['m', 'M']:
return op(self.to_timedelta64(), other)
elif other.dtype.kind == 'O':
return np.array([op(self, x) for x in other])
else:
return NotImplemented
elif not _validate_ops_compat(other):
# Includes any of our non-cython classes
return NotImplemented
try:
other = Timedelta(other)
except ValueError:
# failed to parse as timedelta
return NotImplemented
if other is NaT:
# e.g. if original other was timedelta64('NaT')
return NaT
return Timedelta(op(self.value, other.value), unit='ns')
f.__name__ = name
return f
# ----------------------------------------------------------------------
# Timedelta Construction
cdef inline int64_t parse_iso_format_string(str ts) except? -1:
"""
Extracts and cleanses the appropriate values from a match object with
groups for each component of an ISO 8601 duration
Parameters
----------
ts: str
ISO 8601 Duration formatted string
Returns
-------
ns: int64_t
Precision in nanoseconds of matched ISO 8601 duration
Raises
------
ValueError
If ``ts`` cannot be parsed
"""
cdef:
unicode c
int64_t result = 0, r
int p = 0, sign = 1
object dec_unit = 'ms', err_msg
bint have_dot = 0, have_value = 0, neg = 0
list number = [], unit = []
err_msg = f"Invalid ISO 8601 Duration format - {ts}"
if ts[0] == "-":
sign = -1
ts = ts[1:]
for c in ts:
# number (ascii codes)
if 48 <= ord(c) <= 57:
have_value = 1
if have_dot:
if p == 3 and dec_unit != 'ns':
unit.append(dec_unit)
if dec_unit == 'ms':
dec_unit = 'us'
elif dec_unit == 'us':
dec_unit = 'ns'
p = 0
p += 1
if not len(unit):
number.append(c)
else:
r = timedelta_from_spec(number, '0', unit)
result += timedelta_as_neg(r, neg)
neg = 0
unit, number = [], [c]
else:
if c == 'P' or c == 'T':
pass # ignore marking characters P and T
elif c == '-':
if neg or have_value:
raise ValueError(err_msg)
else:
neg = 1
elif c == "+":
pass
elif c in ['W', 'D', 'H', 'M']:
if c in ['H', 'M'] and len(number) > 2:
raise ValueError(err_msg)
if c == 'M':
c = 'min'
unit.append(c)
r = timedelta_from_spec(number, '0', unit)
result += timedelta_as_neg(r, neg)
neg = 0
unit, number = [], []
elif c == '.':
# append any seconds
if len(number):
r = timedelta_from_spec(number, '0', 'S')
result += timedelta_as_neg(r, neg)
unit, number = [], []
have_dot = 1
elif c == 'S':
if have_dot: # ms, us, or ns
if not len(number) or p > 3:
raise ValueError(err_msg)
# pad to 3 digits as required
pad = 3 - p
while pad > 0:
number.append('0')
pad -= 1
r = timedelta_from_spec(number, '0', dec_unit)
result += timedelta_as_neg(r, neg)
else: # seconds
r = timedelta_from_spec(number, '0', 'S')
result += timedelta_as_neg(r, neg)
else:
raise ValueError(err_msg)
if not have_value:
# Received string only - never parsed any values
raise ValueError(err_msg)
return sign*result
cdef _to_py_int_float(v):
# Note: This used to be defined inside Timedelta.__new__
# but cython will not allow `cdef` functions to be defined dynamically.
if is_integer_object(v):
return int(v)
elif is_float_object(v):
return float(v)
raise TypeError(f"Invalid type {type(v)}. Must be int or float.")
# Similar to Timestamp/datetime, this is a construction requirement for
# timedeltas that we need to do object instantiation in python. This will
# serve as a C extension type that shadows the Python class, where we do any
# heavy lifting.
cdef class _Timedelta(timedelta):
# cdef readonly:
# int64_t value # nanoseconds
# object freq # frequency reference
# bint is_populated # are my components populated
# int64_t _d, _h, _m, _s, _ms, _us, _ns
# higher than np.ndarray and np.matrix
__array_priority__ = 100
def __hash__(_Timedelta self):
if self._has_ns():
return hash(self.value)
else:
return timedelta.__hash__(self)
def __richcmp__(_Timedelta self, object other, int op):
cdef:
_Timedelta ots
int ndim
if isinstance(other, _Timedelta):
ots = other
elif is_any_td_scalar(other):
ots = Timedelta(other)
# TODO: watch out for overflows
elif other is NaT:
return op == Py_NE
elif util.is_array(other):
# TODO: watch out for zero-dim
if other.dtype.kind == "m":
return PyObject_RichCompare(self.asm8, other, op)
elif other.dtype.kind == "O":
# operate element-wise
return np.array(
[PyObject_RichCompare(self, x, op) for x in other],
dtype=bool,
)
if op == Py_EQ:
return np.zeros(other.shape, dtype=bool)
elif op == Py_NE:
return np.ones(other.shape, dtype=bool)
return NotImplemented # let other raise TypeError
else:
return NotImplemented
return cmp_scalar(self.value, ots.value, op)
cpdef bint _has_ns(self):
return self.value % 1000 != 0
def _ensure_components(_Timedelta self):
"""
compute the components
"""
if self.is_populated:
return
cdef:
pandas_timedeltastruct tds
td64_to_tdstruct(self.value, &tds)
self._d = tds.days
self._h = tds.hrs
self._m = tds.min
self._s = tds.sec
self._ms = tds.ms
self._us = tds.us
self._ns = tds.ns
self._seconds = tds.seconds
self._microseconds = tds.microseconds
self.is_populated = 1
cpdef timedelta to_pytimedelta(_Timedelta self):
"""
Convert a pandas Timedelta object into a python ``datetime.timedelta`` object.
Timedelta objects are internally saved as numpy datetime64[ns] dtype.
Use to_pytimedelta() to convert to object dtype.
Returns
-------
datetime.timedelta or numpy.array of datetime.timedelta
See Also
--------
to_timedelta : Convert argument to Timedelta type.
Notes
-----
Any nanosecond resolution will be lost.
"""
return timedelta(microseconds=int(self.value) / 1000)
def to_timedelta64(self) -> np.timedelta64:
"""
Return a numpy.timedelta64 object with 'ns' precision.
"""
return np.timedelta64(self.value, 'ns')
def to_numpy(self, dtype=None, copy=False) -> np.timedelta64:
"""
Convert the Timedelta to a NumPy timedelta64.
.. versionadded:: 0.25.0
This is an alias method for `Timedelta.to_timedelta64()`. The dtype and
copy parameters are available here only for compatibility. Their values
will not affect the return value.
Returns
-------
numpy.timedelta64
See Also
--------
Series.to_numpy : Similar method for Series.
"""
if dtype is not None or copy is not False:
raise ValueError(
"Timedelta.to_numpy dtype and copy arguments are ignored"
)
return self.to_timedelta64()
def view(self, dtype):
"""
Array view compatibility.
"""
return np.timedelta64(self.value).view(dtype)
@property
def components(self):
"""
Return a components namedtuple-like.
"""
self._ensure_components()
# return the named tuple
return Components(self._d, self._h, self._m, self._s,
self._ms, self._us, self._ns)
@property
def delta(self):
"""
Return the timedelta in nanoseconds (ns), for internal compatibility.
Returns
-------
int
Timedelta in nanoseconds.
Examples
--------
>>> td = pd.Timedelta('1 days 42 ns')
>>> td.delta
86400000000042
>>> td = pd.Timedelta('3 s')
>>> td.delta
3000000000
>>> td = pd.Timedelta('3 ms 5 us')
>>> td.delta
3005000
>>> td = pd.Timedelta(42, unit='ns')
>>> td.delta
42
"""
return self.value
@property
def asm8(self) -> np.timedelta64:
"""
Return a numpy timedelta64 array scalar view.
Provides access to the array scalar view (i.e. a combination of the
value and the units) associated with the numpy.timedelta64().view(),
including a 64-bit integer representation of the timedelta in
nanoseconds (Python int compatible).
Returns
-------
numpy timedelta64 array scalar view
Array scalar view of the timedelta in nanoseconds.
Examples
--------
>>> td = pd.Timedelta('1 days 2 min 3 us 42 ns')
>>> td.asm8
numpy.timedelta64(86520000003042,'ns')
>>> td = pd.Timedelta('2 min 3 s')
>>> td.asm8
numpy.timedelta64(123000000000,'ns')
>>> td = pd.Timedelta('3 ms 5 us')
>>> td.asm8
numpy.timedelta64(3005000,'ns')
>>> td = pd.Timedelta(42, unit='ns')
>>> td.asm8
numpy.timedelta64(42,'ns')
"""
return np.int64(self.value).view('m8[ns]')
@property
def resolution_string(self) -> str:
"""
Return a string representing the lowest timedelta resolution.
Each timedelta has a defined resolution that represents the lowest OR
most granular level of precision. Each level of resolution is
represented by a short string as defined below:
Resolution: Return value
* Days: 'D'
* Hours: 'H'
* Minutes: 'T'
* Seconds: 'S'
* Milliseconds: 'L'
* Microseconds: 'U'
* Nanoseconds: 'N'
Returns
-------
str
Timedelta resolution.
Examples
--------
>>> td = pd.Timedelta('1 days 2 min 3 us 42 ns')
>>> td.resolution_string
'N'
>>> td = pd.Timedelta('1 days 2 min 3 us')
>>> td.resolution_string
'U'
>>> td = pd.Timedelta('2 min 3 s')
>>> td.resolution_string
'S'
>>> td = pd.Timedelta(36, unit='us')
>>> td.resolution_string
'U'
"""
self._ensure_components()
if self._ns:
return "N"
elif self._us:
return "U"
elif self._ms:
return "L"
elif self._s:
return "S"
elif self._m:
return "T"
elif self._h:
return "H"
else:
return "D"
@property
def nanoseconds(self):
"""
Return the number of nanoseconds (n), where 0 <= n < 1 microsecond.
Returns
-------
int
Number of nanoseconds.
See Also
--------
Timedelta.components : Return all attributes with assigned values
(i.e. days, hours, minutes, seconds, milliseconds, microseconds,
nanoseconds).
Examples
--------
**Using string input**
>>> td = pd.Timedelta('1 days 2 min 3 us 42 ns')
>>> td.nanoseconds
42
**Using integer input**
>>> td = pd.Timedelta(42, unit='ns')
>>> td.nanoseconds
42
"""
self._ensure_components()
return self._ns
def _repr_base(self, format=None) -> str:
"""
Parameters
----------
format : None|all|sub_day|long
Returns
-------
converted : string of a Timedelta
"""
cdef object sign, seconds_pretty, subs, fmt, comp_dict
self._ensure_components()
if self._d < 0:
sign = " +"
else:
sign = " "
if format == 'all':
fmt = ("{days} days{sign}{hours:02}:{minutes:02}:{seconds:02}."
"{milliseconds:03}{microseconds:03}{nanoseconds:03}")
else:
# if we have a partial day
subs = (self._h or self._m or self._s or
self._ms or self._us or self._ns)
if self._ms or self._us or self._ns:
seconds_fmt = "{seconds:02}.{milliseconds:03}{microseconds:03}"
if self._ns:
# GH#9309
seconds_fmt += "{nanoseconds:03}"
else:
seconds_fmt = "{seconds:02}"
if format == 'sub_day' and not self._d:
fmt = "{hours:02}:{minutes:02}:" + seconds_fmt
elif subs or format == 'long':
fmt = "{days} days{sign}{hours:02}:{minutes:02}:" + seconds_fmt
else:
fmt = "{days} days"
comp_dict = self.components._asdict()
comp_dict['sign'] = sign
return fmt.format(**comp_dict)
def __repr__(self) -> str:
repr_based = self._repr_base(format='long')
return f"Timedelta('{repr_based}')"
def __str__(self) -> str:
return self._repr_base(format='long')
def __bool__(self) -> bool:
return self.value != 0
def isoformat(self) -> str:
"""
Format Timedelta as ISO 8601 Duration like
``P[n]Y[n]M[n]DT[n]H[n]M[n]S``, where the ``[n]`` s are replaced by the
values. See https://en.wikipedia.org/wiki/ISO_8601#Durations.
Returns
-------
str
See Also
--------
Timestamp.isoformat : Function is used to convert the given
Timestamp object into the ISO format.
Notes
-----
The longest component is days, whose value may be larger than
365.
Every component is always included, even if its value is 0.
Pandas uses nanosecond precision, so up to 9 decimal places may
be included in the seconds component.
Trailing 0's are removed from the seconds component after the decimal.
We do not 0 pad components, so it's `...T5H...`, not `...T05H...`
Examples
--------
>>> td = pd.Timedelta(days=6, minutes=50, seconds=3,
... milliseconds=10, microseconds=10, nanoseconds=12)
>>> td.isoformat()
'P6DT0H50M3.010010012S'
>>> pd.Timedelta(hours=1, seconds=10).isoformat()
'P0DT1H0M10S'
>>> pd.Timedelta(days=500.5).isoformat()
'P500DT12H0M0S'
"""
components = self.components
seconds = (f'{components.seconds}.'
f'{components.milliseconds:0>3}'
f'{components.microseconds:0>3}'
f'{components.nanoseconds:0>3}')
# Trim unnecessary 0s, 1.000000000 -> 1
seconds = seconds.rstrip('0').rstrip('.')
tpl = (f'P{components.days}DT{components.hours}'
f'H{components.minutes}M{seconds}S')
return tpl
# Python front end to C extension type _Timedelta
# This serves as the box for timedelta64
class Timedelta(_Timedelta):
"""
Represents a duration, the difference between two dates or times.
Timedelta is the pandas equivalent of python's ``datetime.timedelta``
and is interchangeable with it in most cases.
Parameters
----------
value : Timedelta, timedelta, np.timedelta64, str, or int
unit : str, default 'ns'
Denote the unit of the input, if input is an integer.
Possible values:
* 'W', 'D', 'T', 'S', 'L', 'U', or 'N'
* 'days' or 'day'
* 'hours', 'hour', 'hr', or 'h'
* 'minutes', 'minute', 'min', or 'm'
* 'seconds', 'second', or 'sec'
* 'milliseconds', 'millisecond', 'millis', or 'milli'
* 'microseconds', 'microsecond', 'micros', or 'micro'
* 'nanoseconds', 'nanosecond', 'nanos', 'nano', or 'ns'.
**kwargs
Available kwargs: {days, seconds, microseconds,
milliseconds, minutes, hours, weeks}.
Values for construction in compat with datetime.timedelta.
Numpy ints and floats will be coerced to python ints and floats.
Notes
-----
The constructor may take in either both values of value and unit or
kwargs as above. Either one of them must be used during initialization
The ``.value`` attribute is always in ns.
If the precision is higher than nanoseconds, the precision of the duration is
truncated to nanoseconds.
Examples
--------
Here we initialize Timedelta object with both value and unit
>>> td = pd.Timedelta(1, "d")
>>> td
Timedelta('1 days 00:00:00')
Here we initialize the Timedelta object with kwargs
>>> td2 = pd.Timedelta(days=1)
>>> td2
Timedelta('1 days 00:00:00')
We see that either way we get the same result
"""
_req_any_kwargs_new = {"weeks", "days", "hours", "minutes", "seconds",
"milliseconds", "microseconds", "nanoseconds"}
def __new__(cls, object value=_no_input, unit=None, **kwargs):
cdef _Timedelta td_base
if value is _no_input:
if not len(kwargs):
raise ValueError("cannot construct a Timedelta without a "
"value/unit or descriptive keywords "
"(days,seconds....)")
kwargs = {key: _to_py_int_float(kwargs[key]) for key in kwargs}
unsupported_kwargs = set(kwargs)
unsupported_kwargs.difference_update(cls._req_any_kwargs_new)
if unsupported_kwargs or not cls._req_any_kwargs_new.intersection(kwargs):
raise ValueError(
"cannot construct a Timedelta from the passed arguments, "
"allowed keywords are "
"[weeks, days, hours, minutes, seconds, "
"milliseconds, microseconds, nanoseconds]"
)
# GH43764, convert any input to nanoseconds first and then
# create the timestamp. This ensures that any potential
# nanosecond contributions from kwargs parsed as floats
# are taken into consideration.
seconds = int((
(
(kwargs.get('days', 0) + kwargs.get('weeks', 0) * 7) * 24
+ kwargs.get('hours', 0)
) * 3600
+ kwargs.get('minutes', 0) * 60
+ kwargs.get('seconds', 0)
) * 1_000_000_000
)
value = np.timedelta64(
int(kwargs.get('nanoseconds', 0))
+ int(kwargs.get('microseconds', 0) * 1_000)
+ int(kwargs.get('milliseconds', 0) * 1_000_000)
+ seconds
)
if unit in {'Y', 'y', 'M'}:
raise ValueError(
"Units 'M', 'Y', and 'y' are no longer supported, as they do not "
"represent unambiguous timedelta values durations."
)
# GH 30543 if pd.Timedelta already passed, return it
# check that only value is passed
if isinstance(value, _Timedelta) and unit is None and len(kwargs) == 0:
return value
elif isinstance(value, _Timedelta):
value = value.value
elif isinstance(value, str):
if unit is not None:
raise ValueError("unit must not be specified if the value is a str")
if (len(value) > 0 and value[0] == 'P') or (
len(value) > 1 and value[:2] == '-P'
):
value = parse_iso_format_string(value)
else:
value = parse_timedelta_string(value)
value = np.timedelta64(value)
elif PyDelta_Check(value):
value = convert_to_timedelta64(value, 'ns')
elif is_timedelta64_object(value):
if unit is not None:
value = value.astype(f'timedelta64[{unit}]')
value = ensure_td64ns(value)
elif is_tick_object(value):
value = np.timedelta64(value.nanos, 'ns')
elif is_integer_object(value) or is_float_object(value):
# unit=None is de-facto 'ns'
unit = parse_timedelta_unit(unit)
value = convert_to_timedelta64(value, unit)
elif checknull_with_nat(value):
return NaT
else:
raise ValueError(
"Value must be Timedelta, string, integer, "
f"float, timedelta or convertible, not {type(value).__name__}"
)
if is_timedelta64_object(value):
value = value.view('i8')
# nat
if value == NPY_NAT:
return NaT
# make timedelta happy
td_base = _Timedelta.__new__(cls, microseconds=int(value) // 1000)
td_base.value = value
td_base.is_populated = 0
return td_base
def __setstate__(self, state):
(value) = state
self.value = value
def __reduce__(self):
object_state = self.value,
return (Timedelta, object_state)
@cython.cdivision(True)
def _round(self, freq, mode):
cdef:
int64_t result, unit, remainder
ndarray[int64_t] arr
from pandas._libs.tslibs.offsets import to_offset
unit = to_offset(freq).nanos
arr = np.array([self.value], dtype="i8")
result = round_nsint64(arr, mode, unit)[0]
return Timedelta(result, unit="ns")
def round(self, freq):
"""
Round the Timedelta to the specified resolution.
Parameters
----------
freq : str
Frequency string indicating the rounding resolution.
Returns
-------
a new Timedelta rounded to the given resolution of `freq`
Raises
------
ValueError if the freq cannot be converted
"""
return self._round(freq, RoundTo.NEAREST_HALF_EVEN)
def floor(self, freq):
"""
Return a new Timedelta floored to this resolution.
Parameters
----------
freq : str
Frequency string indicating the flooring resolution.
"""
return self._round(freq, RoundTo.MINUS_INFTY)
def ceil(self, freq):
"""
Return a new Timedelta ceiled to this resolution.
Parameters
----------
freq : str
Frequency string indicating the ceiling resolution.
"""
return self._round(freq, RoundTo.PLUS_INFTY)
# ----------------------------------------------------------------
# Arithmetic Methods
# TODO: Can some of these be defined in the cython class?
__neg__ = _op_unary_method(lambda x: -x, '__neg__')
__pos__ = _op_unary_method(lambda x: x, '__pos__')
__abs__ = _op_unary_method(lambda x: abs(x), '__abs__')
__add__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__add__')
__radd__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__radd__')
__sub__ = _binary_op_method_timedeltalike(lambda x, y: x - y, '__sub__')
__rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, '__rsub__')
def __mul__(self, other):
if is_integer_object(other) or is_float_object(other):
return Timedelta(other * self.value, unit='ns')
elif is_array(other):
# ndarray-like
return other * self.to_timedelta64()
return NotImplemented
__rmul__ = __mul__
def __truediv__(self, other):
if _should_cast_to_timedelta(other):
# We interpret NaT as timedelta64("NaT")
other = Timedelta(other)
if other is NaT:
return np.nan
return self.value / float(other.value)
elif is_integer_object(other) or is_float_object(other):
# integers or floats
return Timedelta(self.value / other, unit='ns')
elif is_array(other):
return self.to_timedelta64() / other
return NotImplemented
def __rtruediv__(self, other):
if _should_cast_to_timedelta(other):
# We interpret NaT as timedelta64("NaT")
other = Timedelta(other)
if other is NaT:
return np.nan
return float(other.value) / self.value
elif is_array(other):
if other.dtype.kind == "O":
# GH#31869
return np.array([x / self for x in other])
return other / self.to_timedelta64()
return NotImplemented
def __floordiv__(self, other):
# numpy does not implement floordiv for timedelta64 dtype, so we cannot
# just defer
if _should_cast_to_timedelta(other):
# We interpret NaT as timedelta64("NaT")
other = Timedelta(other)
if other is NaT:
return np.nan
return self.value // other.value
elif is_integer_object(other) or is_float_object(other):
return Timedelta(self.value // other, unit='ns')
elif is_array(other):
if other.dtype.kind == 'm':
# also timedelta-like
return _broadcast_floordiv_td64(self.value, other, _floordiv)
elif other.dtype.kind in ['i', 'u', 'f']:
if other.ndim == 0:
return Timedelta(self.value // other)
else:
return self.to_timedelta64() // other
raise TypeError(f'Invalid dtype {other.dtype} for __floordiv__')
return NotImplemented
def __rfloordiv__(self, other):
# numpy does not implement floordiv for timedelta64 dtype, so we cannot
# just defer
if _should_cast_to_timedelta(other):
# We interpret NaT as timedelta64("NaT")
other = Timedelta(other)
if other is NaT:
return np.nan
return other.value // self.value
elif is_array(other):
if other.dtype.kind == 'm':
# also timedelta-like
return _broadcast_floordiv_td64(self.value, other, _rfloordiv)
# Includes integer array // Timedelta, disallowed in GH#19761
raise TypeError(f'Invalid dtype {other.dtype} for __floordiv__')
return NotImplemented
def __mod__(self, other):
# Naive implementation, room for optimization
return self.__divmod__(other)[1]
def __rmod__(self, other):
# Naive implementation, room for optimization
return self.__rdivmod__(other)[1]
def __divmod__(self, other):
# Naive implementation, room for optimization
div = self // other
return div, self - div * other
def __rdivmod__(self, other):
# Naive implementation, room for optimization
div = other // self
return div, other - div * self
cdef bint is_any_td_scalar(object obj):
"""
Cython equivalent for `isinstance(obj, (timedelta, np.timedelta64, Tick))`
Parameters
----------
obj : object
Returns
-------
bool
"""
return (
PyDelta_Check(obj) or is_timedelta64_object(obj) or is_tick_object(obj)
)
cdef bint _should_cast_to_timedelta(object obj):
"""
Should we treat this object as a Timedelta for the purpose of a binary op
"""
return (
is_any_td_scalar(obj) or obj is None or obj is NaT or isinstance(obj, str)
)
cdef _floordiv(int64_t value, right):
return value // right
cdef _rfloordiv(int64_t value, right):
# analogous to referencing operator.div, but there is no operator.rfloordiv
return right // value
cdef _broadcast_floordiv_td64(
int64_t value,
ndarray other,
object (*operation)(int64_t value, object right)
):
"""
Boilerplate code shared by Timedelta.__floordiv__ and
Timedelta.__rfloordiv__ because np.timedelta64 does not implement these.
Parameters
----------
value : int64_t; `self.value` from a Timedelta object
other : object
operation : function, either _floordiv or _rfloordiv
Returns
-------
result : varies based on `other`
"""
# assumes other.dtype.kind == 'm', i.e. other is timedelta-like
# We need to watch out for np.timedelta64('NaT').
mask = other.view('i8') == NPY_NAT
if other.ndim == 0:
if mask:
return np.nan
return operation(value, other.astype('m8[ns]').astype('i8'))
else:
res = operation(value, other.astype('m8[ns]').astype('i8'))
if mask.any():
res = res.astype('f8')
res[mask] = np.nan
return res
# resolution in ns
Timedelta.min = Timedelta(np.iinfo(np.int64).min + 1)
Timedelta.max = Timedelta(np.iinfo(np.int64).max)
Timedelta.resolution = Timedelta(nanoseconds=1)