A PyQT GUI application for converting InfoLease report outputs into Excel files. Handles parsing and summarizing. Learns where files are meant to be store and compiles monthly and yearly summaries.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
InfoLeaseExtract/venv/Lib/site-packages/pandas/tests/series/indexing/test_datetime.py

484 lines
14 KiB

"""
Also test support for datetime64[ns] in Series / DataFrame
"""
from datetime import (
datetime,
timedelta,
)
import re
from dateutil.tz import (
gettz,
tzutc,
)
import numpy as np
import pytest
import pytz
from pandas._libs import index as libindex
import pandas as pd
from pandas import (
DataFrame,
Series,
Timestamp,
date_range,
period_range,
)
import pandas._testing as tm
def test_fancy_getitem():
dti = date_range(
freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
)
s = Series(np.arange(len(dti)), index=dti)
assert s[48] == 48
assert s["1/2/2009"] == 48
assert s["2009-1-2"] == 48
assert s[datetime(2009, 1, 2)] == 48
assert s[Timestamp(datetime(2009, 1, 2))] == 48
with pytest.raises(KeyError, match=r"^'2009-1-3'$"):
s["2009-1-3"]
tm.assert_series_equal(
s["3/6/2009":"2009-06-05"], s[datetime(2009, 3, 6) : datetime(2009, 6, 5)]
)
def test_fancy_setitem():
dti = date_range(
freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
)
s = Series(np.arange(len(dti)), index=dti)
s[48] = -1
assert s[48] == -1
s["1/2/2009"] = -2
assert s[48] == -2
s["1/2/2009":"2009-06-05"] = -3
assert (s[48:54] == -3).all()
@pytest.mark.parametrize("tz_source", ["pytz", "dateutil"])
def test_getitem_setitem_datetime_tz(tz_source):
if tz_source == "pytz":
tzget = pytz.timezone
else:
# handle special case for utc in dateutil
tzget = lambda x: tzutc() if x == "UTC" else gettz(x)
N = 50
# testing with timezone, GH #2785
rng = date_range("1/1/1990", periods=N, freq="H", tz=tzget("US/Eastern"))
ts = Series(np.random.randn(N), index=rng)
# also test Timestamp tz handling, GH #2789
result = ts.copy()
result["1990-01-01 09:00:00+00:00"] = 0
result["1990-01-01 09:00:00+00:00"] = ts[4]
tm.assert_series_equal(result, ts)
result = ts.copy()
result["1990-01-01 03:00:00-06:00"] = 0
result["1990-01-01 03:00:00-06:00"] = ts[4]
tm.assert_series_equal(result, ts)
# repeat with datetimes
result = ts.copy()
result[datetime(1990, 1, 1, 9, tzinfo=tzget("UTC"))] = 0
result[datetime(1990, 1, 1, 9, tzinfo=tzget("UTC"))] = ts[4]
tm.assert_series_equal(result, ts)
result = ts.copy()
dt = Timestamp(1990, 1, 1, 3).tz_localize(tzget("US/Central"))
dt = dt.to_pydatetime()
result[dt] = 0
result[dt] = ts[4]
tm.assert_series_equal(result, ts)
def test_getitem_setitem_datetimeindex():
N = 50
# testing with timezone, GH #2785
rng = date_range("1/1/1990", periods=N, freq="H", tz="US/Eastern")
ts = Series(np.random.randn(N), index=rng)
result = ts["1990-01-01 04:00:00"]
expected = ts[4]
assert result == expected
result = ts.copy()
result["1990-01-01 04:00:00"] = 0
result["1990-01-01 04:00:00"] = ts[4]
tm.assert_series_equal(result, ts)
result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"]
expected = ts[4:8]
tm.assert_series_equal(result, expected)
result = ts.copy()
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8]
tm.assert_series_equal(result, ts)
lb = "1990-01-01 04:00:00"
rb = "1990-01-01 07:00:00"
# GH#18435 strings get a pass from tzawareness compat
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
tm.assert_series_equal(result, expected)
lb = "1990-01-01 04:00:00-0500"
rb = "1990-01-01 07:00:00-0500"
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
tm.assert_series_equal(result, expected)
# But we do not give datetimes a pass on tzawareness compat
msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
naive = datetime(1990, 1, 1, 4)
for key in [naive, Timestamp(naive), np.datetime64(naive, "ns")]:
with tm.assert_produces_warning(FutureWarning):
# GH#36148 will require tzawareness compat
result = ts[key]
expected = ts[4]
assert result == expected
result = ts.copy()
with tm.assert_produces_warning(FutureWarning):
# GH#36148 will require tzawareness compat
result[datetime(1990, 1, 1, 4)] = 0
with tm.assert_produces_warning(FutureWarning):
# GH#36148 will require tzawareness compat
result[datetime(1990, 1, 1, 4)] = ts[4]
tm.assert_series_equal(result, ts)
with tm.assert_produces_warning(FutureWarning):
# GH#36148 will require tzawareness compat
result = ts[datetime(1990, 1, 1, 4) : datetime(1990, 1, 1, 7)]
expected = ts[4:8]
tm.assert_series_equal(result, expected)
result = ts.copy()
with tm.assert_produces_warning(FutureWarning):
# GH#36148 will require tzawareness compat
result[datetime(1990, 1, 1, 4) : datetime(1990, 1, 1, 7)] = 0
with tm.assert_produces_warning(FutureWarning):
# GH#36148 will require tzawareness compat
result[datetime(1990, 1, 1, 4) : datetime(1990, 1, 1, 7)] = ts[4:8]
tm.assert_series_equal(result, ts)
lb = datetime(1990, 1, 1, 4)
rb = datetime(1990, 1, 1, 7)
msg = r"Invalid comparison between dtype=datetime64\[ns, US/Eastern\] and datetime"
with pytest.raises(TypeError, match=msg):
# tznaive vs tzaware comparison is invalid
# see GH#18376, GH#18162
ts[(ts.index >= lb) & (ts.index <= rb)]
lb = Timestamp(datetime(1990, 1, 1, 4)).tz_localize(rng.tzinfo)
rb = Timestamp(datetime(1990, 1, 1, 7)).tz_localize(rng.tzinfo)
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
tm.assert_series_equal(result, expected)
result = ts[ts.index[4]]
expected = ts[4]
assert result == expected
result = ts[ts.index[4:8]]
expected = ts[4:8]
tm.assert_series_equal(result, expected)
result = ts.copy()
result[ts.index[4:8]] = 0
result.iloc[4:8] = ts.iloc[4:8]
tm.assert_series_equal(result, ts)
# also test partial date slicing
result = ts["1990-01-02"]
expected = ts[24:48]
tm.assert_series_equal(result, expected)
result = ts.copy()
result["1990-01-02"] = 0
result["1990-01-02"] = ts[24:48]
tm.assert_series_equal(result, ts)
def test_getitem_setitem_periodindex():
N = 50
rng = period_range("1/1/1990", periods=N, freq="H")
ts = Series(np.random.randn(N), index=rng)
result = ts["1990-01-01 04"]
expected = ts[4]
assert result == expected
result = ts.copy()
result["1990-01-01 04"] = 0
result["1990-01-01 04"] = ts[4]
tm.assert_series_equal(result, ts)
result = ts["1990-01-01 04":"1990-01-01 07"]
expected = ts[4:8]
tm.assert_series_equal(result, expected)
result = ts.copy()
result["1990-01-01 04":"1990-01-01 07"] = 0
result["1990-01-01 04":"1990-01-01 07"] = ts[4:8]
tm.assert_series_equal(result, ts)
lb = "1990-01-01 04"
rb = "1990-01-01 07"
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
tm.assert_series_equal(result, expected)
# GH 2782
result = ts[ts.index[4]]
expected = ts[4]
assert result == expected
result = ts[ts.index[4:8]]
expected = ts[4:8]
tm.assert_series_equal(result, expected)
result = ts.copy()
result[ts.index[4:8]] = 0
result.iloc[4:8] = ts.iloc[4:8]
tm.assert_series_equal(result, ts)
def test_datetime_indexing():
index = date_range("1/1/2000", "1/7/2000")
index = index.repeat(3)
s = Series(len(index), index=index)
stamp = Timestamp("1/8/2000")
with pytest.raises(KeyError, match=re.escape(repr(stamp))):
s[stamp]
s[stamp] = 0
assert s[stamp] == 0
# not monotonic
s = Series(len(index), index=index)
s = s[::-1]
with pytest.raises(KeyError, match=re.escape(repr(stamp))):
s[stamp]
s[stamp] = 0
assert s[stamp] == 0
"""
test duplicates in time series
"""
def test_indexing_with_duplicate_datetimeindex(
rand_series_with_duplicate_datetimeindex,
):
ts = rand_series_with_duplicate_datetimeindex
uniques = ts.index.unique()
for date in uniques:
result = ts[date]
mask = ts.index == date
total = (ts.index == date).sum()
expected = ts[mask]
if total > 1:
tm.assert_series_equal(result, expected)
else:
tm.assert_almost_equal(result, expected[0])
cp = ts.copy()
cp[date] = 0
expected = Series(np.where(mask, 0, ts), index=ts.index)
tm.assert_series_equal(cp, expected)
key = datetime(2000, 1, 6)
with pytest.raises(KeyError, match=re.escape(repr(key))):
ts[key]
# new index
ts[datetime(2000, 1, 6)] = 0
assert ts[datetime(2000, 1, 6)] == 0
def test_loc_getitem_over_size_cutoff(monkeypatch):
# #1821
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000)
# create large list of non periodic datetime
dates = []
sec = timedelta(seconds=1)
half_sec = timedelta(microseconds=500000)
d = datetime(2011, 12, 5, 20, 30)
n = 1100
for i in range(n):
dates.append(d)
dates.append(d + sec)
dates.append(d + sec + half_sec)
dates.append(d + sec + sec + half_sec)
d += 3 * sec
# duplicate some values in the list
duplicate_positions = np.random.randint(0, len(dates) - 1, 20)
for p in duplicate_positions:
dates[p + 1] = dates[p]
df = DataFrame(np.random.randn(len(dates), 4), index=dates, columns=list("ABCD"))
pos = n * 3
timestamp = df.index[pos]
assert timestamp in df.index
# it works!
df.loc[timestamp]
assert len(df.loc[[timestamp]]) > 0
def test_indexing_over_size_cutoff_period_index(monkeypatch):
# GH 27136
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000)
n = 1100
idx = period_range("1/1/2000", freq="T", periods=n)
assert idx._engine.over_size_threshold
s = Series(np.random.randn(len(idx)), index=idx)
pos = n - 1
timestamp = idx[pos]
assert timestamp in s.index
# it works!
s[timestamp]
assert len(s.loc[[timestamp]]) > 0
def test_indexing_unordered():
# GH 2437
rng = date_range(start="2011-01-01", end="2011-01-15")
ts = Series(np.random.rand(len(rng)), index=rng)
ts2 = pd.concat([ts[0:4], ts[-4:], ts[4:-4]])
for t in ts.index:
expected = ts[t]
result = ts2[t]
assert expected == result
# GH 3448 (ranges)
def compare(slobj):
result = ts2[slobj].copy()
result = result.sort_index()
expected = ts[slobj]
expected.index = expected.index._with_freq(None)
tm.assert_series_equal(result, expected)
compare(slice("2011-01-01", "2011-01-15"))
with tm.assert_produces_warning(FutureWarning):
compare(slice("2010-12-30", "2011-01-15"))
compare(slice("2011-01-01", "2011-01-16"))
# partial ranges
compare(slice("2011-01-01", "2011-01-6"))
compare(slice("2011-01-06", "2011-01-8"))
compare(slice("2011-01-06", "2011-01-12"))
# single values
result = ts2["2011"].sort_index()
expected = ts["2011"]
expected.index = expected.index._with_freq(None)
tm.assert_series_equal(result, expected)
def test_indexing_unordered2():
# diff freq
rng = date_range(datetime(2005, 1, 1), periods=20, freq="M")
ts = Series(np.arange(len(rng)), index=rng)
ts = ts.take(np.random.permutation(20))
result = ts["2005"]
for t in result.index:
assert t.year == 2005
def test_indexing():
idx = date_range("2001-1-1", periods=20, freq="M")
ts = Series(np.random.rand(len(idx)), index=idx)
# getting
# GH 3070, make sure semantics work on Series/Frame
expected = ts["2001"]
expected.name = "A"
df = DataFrame({"A": ts})
with tm.assert_produces_warning(FutureWarning):
# GH#36179 string indexing on rows for DataFrame deprecated
result = df["2001"]["A"]
tm.assert_series_equal(expected, result)
# setting
ts["2001"] = 1
expected = ts["2001"]
expected.name = "A"
df.loc["2001", "A"] = 1
with tm.assert_produces_warning(FutureWarning):
# GH#36179 string indexing on rows for DataFrame deprecated
result = df["2001"]["A"]
tm.assert_series_equal(expected, result)
def test_getitem_str_month_with_datetimeindex():
# GH3546 (not including times on the last day)
idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:00", freq="H")
ts = Series(range(len(idx)), index=idx)
expected = ts["2013-05"]
tm.assert_series_equal(expected, ts)
idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:59", freq="S")
ts = Series(range(len(idx)), index=idx)
expected = ts["2013-05"]
tm.assert_series_equal(expected, ts)
def test_getitem_str_year_with_datetimeindex():
idx = [
Timestamp("2013-05-31 00:00"),
Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999)),
]
ts = Series(range(len(idx)), index=idx)
expected = ts["2013"]
tm.assert_series_equal(expected, ts)
def test_getitem_str_second_with_datetimeindex():
# GH14826, indexing with a seconds resolution string / datetime object
df = DataFrame(
np.random.rand(5, 5),
columns=["open", "high", "low", "close", "volume"],
index=date_range("2012-01-02 18:01:00", periods=5, tz="US/Central", freq="s"),
)
# this is a single date, so will raise
with pytest.raises(KeyError, match=r"^'2012-01-02 18:01:02'$"):
df["2012-01-02 18:01:02"]
msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central', freq='S'\)"
with pytest.raises(KeyError, match=msg):
df[df.index[2]]