A PyQT GUI application for converting InfoLease report outputs into Excel files. Handles parsing and summarizing. Learns where files are meant to be store and compiles monthly and yearly summaries.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
InfoLeaseExtract/venv/Lib/site-packages/pandas/tests/window/test_base_indexer.py

497 lines
15 KiB

import numpy as np
import pytest
from pandas import (
DataFrame,
MultiIndex,
Series,
concat,
date_range,
)
import pandas._testing as tm
from pandas.api.indexers import (
BaseIndexer,
FixedForwardWindowIndexer,
)
from pandas.core.indexers.objects import (
ExpandingIndexer,
FixedWindowIndexer,
VariableOffsetWindowIndexer,
)
from pandas.tseries.offsets import BusinessDay
def test_bad_get_window_bounds_signature():
class BadIndexer(BaseIndexer):
def get_window_bounds(self):
return None
indexer = BadIndexer()
with pytest.raises(ValueError, match="BadIndexer does not implement"):
Series(range(5)).rolling(indexer)
def test_expanding_indexer():
s = Series(range(10))
indexer = ExpandingIndexer()
result = s.rolling(indexer).mean()
expected = s.expanding().mean()
tm.assert_series_equal(result, expected)
def test_indexer_constructor_arg():
# Example found in computation.rst
use_expanding = [True, False, True, False, True]
df = DataFrame({"values": range(5)})
class CustomIndexer(BaseIndexer):
def get_window_bounds(self, num_values, min_periods, center, closed):
start = np.empty(num_values, dtype=np.int64)
end = np.empty(num_values, dtype=np.int64)
for i in range(num_values):
if self.use_expanding[i]:
start[i] = 0
end[i] = i + 1
else:
start[i] = i
end[i] = i + self.window_size
return start, end
indexer = CustomIndexer(window_size=1, use_expanding=use_expanding)
result = df.rolling(indexer).sum()
expected = DataFrame({"values": [0.0, 1.0, 3.0, 3.0, 10.0]})
tm.assert_frame_equal(result, expected)
def test_indexer_accepts_rolling_args():
df = DataFrame({"values": range(5)})
class CustomIndexer(BaseIndexer):
def get_window_bounds(self, num_values, min_periods, center, closed):
start = np.empty(num_values, dtype=np.int64)
end = np.empty(num_values, dtype=np.int64)
for i in range(num_values):
if center and min_periods == 1 and closed == "both" and i == 2:
start[i] = 0
end[i] = num_values
else:
start[i] = i
end[i] = i + self.window_size
return start, end
indexer = CustomIndexer(window_size=1)
result = df.rolling(indexer, center=True, min_periods=1, closed="both").sum()
expected = DataFrame({"values": [0.0, 1.0, 10.0, 3.0, 4.0]})
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("constructor", [Series, DataFrame])
@pytest.mark.parametrize(
"func,np_func,expected,np_kwargs",
[
("count", len, [3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 2.0, np.nan], {}),
("min", np.min, [0.0, 1.0, 2.0, 3.0, 4.0, 6.0, 6.0, 7.0, 8.0, np.nan], {}),
(
"max",
np.max,
[2.0, 3.0, 4.0, 100.0, 100.0, 100.0, 8.0, 9.0, 9.0, np.nan],
{},
),
(
"std",
np.std,
[
1.0,
1.0,
1.0,
55.71654452,
54.85739087,
53.9845657,
1.0,
1.0,
0.70710678,
np.nan,
],
{"ddof": 1},
),
(
"var",
np.var,
[
1.0,
1.0,
1.0,
3104.333333,
3009.333333,
2914.333333,
1.0,
1.0,
0.500000,
np.nan,
],
{"ddof": 1},
),
(
"median",
np.median,
[1.0, 2.0, 3.0, 4.0, 6.0, 7.0, 7.0, 8.0, 8.5, np.nan],
{},
),
],
)
@pytest.mark.filterwarnings("ignore:min_periods:FutureWarning")
def test_rolling_forward_window(constructor, func, np_func, expected, np_kwargs):
# GH 32865
values = np.arange(10.0)
values[5] = 100.0
indexer = FixedForwardWindowIndexer(window_size=3)
match = "Forward-looking windows can't have center=True"
with pytest.raises(ValueError, match=match):
rolling = constructor(values).rolling(window=indexer, center=True)
getattr(rolling, func)()
match = "Forward-looking windows don't support setting the closed argument"
with pytest.raises(ValueError, match=match):
rolling = constructor(values).rolling(window=indexer, closed="right")
getattr(rolling, func)()
rolling = constructor(values).rolling(window=indexer, min_periods=2)
result = getattr(rolling, func)()
# Check that the function output matches the explicitly provided array
expected = constructor(expected)
tm.assert_equal(result, expected)
# Check that the rolling function output matches applying an alternative
# function to the rolling window object
expected2 = constructor(rolling.apply(lambda x: np_func(x, **np_kwargs)))
tm.assert_equal(result, expected2)
# Check that the function output matches applying an alternative function
# if min_periods isn't specified
# GH 39604: After count-min_periods deprecation, apply(lambda x: len(x))
# is equivalent to count after setting min_periods=0
min_periods = 0 if func == "count" else None
rolling3 = constructor(values).rolling(window=indexer, min_periods=min_periods)
result3 = getattr(rolling3, func)()
expected3 = constructor(rolling3.apply(lambda x: np_func(x, **np_kwargs)))
tm.assert_equal(result3, expected3)
@pytest.mark.parametrize("constructor", [Series, DataFrame])
def test_rolling_forward_skewness(constructor):
values = np.arange(10.0)
values[5] = 100.0
indexer = FixedForwardWindowIndexer(window_size=5)
rolling = constructor(values).rolling(window=indexer, min_periods=3)
result = rolling.skew()
expected = constructor(
[
0.0,
2.232396,
2.229508,
2.228340,
2.229091,
2.231989,
0.0,
0.0,
np.nan,
np.nan,
]
)
tm.assert_equal(result, expected)
@pytest.mark.parametrize(
"func,expected",
[
("cov", [2.0, 2.0, 2.0, 97.0, 2.0, -93.0, 2.0, 2.0, np.nan, np.nan]),
(
"corr",
[
1.0,
1.0,
1.0,
0.8704775290207161,
0.018229084250926637,
-0.861357304646493,
1.0,
1.0,
np.nan,
np.nan,
],
),
],
)
def test_rolling_forward_cov_corr(func, expected):
values1 = np.arange(10).reshape(-1, 1)
values2 = values1 * 2
values1[5, 0] = 100
values = np.concatenate([values1, values2], axis=1)
indexer = FixedForwardWindowIndexer(window_size=3)
rolling = DataFrame(values).rolling(window=indexer, min_periods=3)
# We are interested in checking only pairwise covariance / correlation
result = getattr(rolling, func)().loc[(slice(None), 1), 0]
result = result.reset_index(drop=True)
expected = Series(expected)
expected.name = result.name
tm.assert_equal(result, expected)
@pytest.mark.parametrize(
"closed,expected_data",
[
["right", [0.0, 1.0, 2.0, 3.0, 7.0, 12.0, 6.0, 7.0, 8.0, 9.0]],
["left", [0.0, 0.0, 1.0, 2.0, 5.0, 9.0, 5.0, 6.0, 7.0, 8.0]],
],
)
def test_non_fixed_variable_window_indexer(closed, expected_data):
index = date_range("2020", periods=10)
df = DataFrame(range(10), index=index)
offset = BusinessDay(1)
indexer = VariableOffsetWindowIndexer(index=index, offset=offset)
result = df.rolling(indexer, closed=closed).sum()
expected = DataFrame(expected_data, index=index)
tm.assert_frame_equal(result, expected)
def test_fixed_forward_indexer_count():
# GH: 35579
df = DataFrame({"b": [None, None, None, 7]})
indexer = FixedForwardWindowIndexer(window_size=2)
result = df.rolling(window=indexer, min_periods=0).count()
expected = DataFrame({"b": [0.0, 0.0, 1.0, 1.0]})
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
("end_value", "values"), [(1, [0.0, 1, 1, 3, 2]), (-1, [0.0, 1, 0, 3, 1])]
)
@pytest.mark.parametrize(("func", "args"), [("median", []), ("quantile", [0.5])])
def test_indexer_quantile_sum(end_value, values, func, args):
# GH 37153
class CustomIndexer(BaseIndexer):
def get_window_bounds(self, num_values, min_periods, center, closed):
start = np.empty(num_values, dtype=np.int64)
end = np.empty(num_values, dtype=np.int64)
for i in range(num_values):
if self.use_expanding[i]:
start[i] = 0
end[i] = max(i + end_value, 1)
else:
start[i] = i
end[i] = i + self.window_size
return start, end
use_expanding = [True, False, True, False, True]
df = DataFrame({"values": range(5)})
indexer = CustomIndexer(window_size=1, use_expanding=use_expanding)
result = getattr(df.rolling(indexer), func)(*args)
expected = DataFrame({"values": values})
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"indexer_class", [FixedWindowIndexer, FixedForwardWindowIndexer, ExpandingIndexer]
)
@pytest.mark.parametrize("window_size", [1, 2, 12])
@pytest.mark.parametrize(
"df_data",
[
{"a": [1, 1], "b": [0, 1]},
{"a": [1, 2], "b": [0, 1]},
{"a": [1] * 16, "b": [np.nan, 1, 2, np.nan] + list(range(4, 16))},
],
)
def test_indexers_are_reusable_after_groupby_rolling(
indexer_class, window_size, df_data
):
# GH 43267
df = DataFrame(df_data)
num_trials = 3
indexer = indexer_class(window_size=window_size)
original_window_size = indexer.window_size
for i in range(num_trials):
df.groupby("a")["b"].rolling(window=indexer, min_periods=1).mean()
assert indexer.window_size == original_window_size
@pytest.mark.parametrize(
"window_size, num_values, expected_start, expected_end",
[
(1, 1, [0], [1]),
(1, 2, [0, 1], [1, 2]),
(2, 1, [0], [1]),
(2, 2, [0, 1], [2, 2]),
(5, 12, range(12), list(range(5, 12)) + [12] * 5),
(12, 5, range(5), [5] * 5),
(0, 0, np.array([]), np.array([])),
(1, 0, np.array([]), np.array([])),
(0, 1, [0], [0]),
],
)
def test_fixed_forward_indexer_bounds(
window_size, num_values, expected_start, expected_end
):
# GH 43267
indexer = FixedForwardWindowIndexer(window_size=window_size)
start, end = indexer.get_window_bounds(num_values=num_values)
tm.assert_numpy_array_equal(start, np.array(expected_start), check_dtype=False)
tm.assert_numpy_array_equal(end, np.array(expected_end), check_dtype=False)
assert len(start) == len(end)
@pytest.mark.parametrize(
"df, window_size, expected",
[
(
DataFrame({"b": [0, 1, 2], "a": [1, 2, 2]}),
2,
Series(
[0, 1.5, 2.0],
index=MultiIndex.from_arrays([[1, 2, 2], range(3)], names=["a", None]),
name="b",
dtype=np.float64,
),
),
(
DataFrame(
{
"b": [np.nan, 1, 2, np.nan] + list(range(4, 18)),
"a": [1] * 7 + [2] * 11,
"c": range(18),
}
),
12,
Series(
[
3.6,
3.6,
4.25,
5.0,
5.0,
5.5,
6.0,
12.0,
12.5,
13.0,
13.5,
14.0,
14.5,
15.0,
15.5,
16.0,
16.5,
17.0,
],
index=MultiIndex.from_arrays(
[[1] * 7 + [2] * 11, range(18)], names=["a", None]
),
name="b",
dtype=np.float64,
),
),
],
)
def test_rolling_groupby_with_fixed_forward_specific(df, window_size, expected):
# GH 43267
indexer = FixedForwardWindowIndexer(window_size=window_size)
result = df.groupby("a")["b"].rolling(window=indexer, min_periods=1).mean()
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"group_keys",
[
(1,),
(1, 2),
(2, 1),
(1, 1, 2),
(1, 2, 1),
(1, 1, 2, 2),
(1, 2, 3, 2, 3),
(1, 1, 2) * 4,
(1, 2, 3) * 5,
],
)
@pytest.mark.parametrize("window_size", [1, 2, 3, 4, 5, 8, 20])
def test_rolling_groupby_with_fixed_forward_many(group_keys, window_size):
# GH 43267
df = DataFrame(
{
"a": np.array(list(group_keys)),
"b": np.arange(len(group_keys), dtype=np.float64) + 17,
"c": np.arange(len(group_keys), dtype=np.int64),
}
)
indexer = FixedForwardWindowIndexer(window_size=window_size)
result = df.groupby("a")["b"].rolling(window=indexer, min_periods=1).sum()
result.index.names = ["a", "c"]
groups = df.groupby("a")[["a", "b", "c"]]
manual = concat(
[
g.assign(
b=[
g["b"].iloc[i : i + window_size].sum(min_count=1)
for i in range(len(g))
]
)
for _, g in groups
]
)
manual = manual.set_index(["a", "c"])["b"]
tm.assert_series_equal(result, manual)
def test_unequal_start_end_bounds():
class CustomIndexer(BaseIndexer):
def get_window_bounds(self, num_values, min_periods, center, closed):
return np.array([1]), np.array([1, 2])
indexer = CustomIndexer()
roll = Series(1).rolling(indexer)
match = "start"
with pytest.raises(ValueError, match=match):
roll.mean()
with pytest.raises(ValueError, match=match):
next(iter(roll))
with pytest.raises(ValueError, match=match):
roll.corr(pairwise=True)
with pytest.raises(ValueError, match=match):
roll.cov(pairwise=True)
def test_unequal_bounds_to_object():
# GH 44470
class CustomIndexer(BaseIndexer):
def get_window_bounds(self, num_values, min_periods, center, closed):
return np.array([1]), np.array([2])
indexer = CustomIndexer()
roll = Series([1, 1]).rolling(indexer)
match = "start and end"
with pytest.raises(ValueError, match=match):
roll.mean()
with pytest.raises(ValueError, match=match):
next(iter(roll))
with pytest.raises(ValueError, match=match):
roll.corr(pairwise=True)
with pytest.raises(ValueError, match=match):
roll.cov(pairwise=True)