A PyQT GUI application for converting InfoLease report outputs into Excel files. Handles parsing and summarizing. Learns where files are meant to be store and compiles monthly and yearly summaries.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
InfoLeaseExtract/venv/Lib/site-packages/pandas/tests/generic/test_duplicate_labels.py

452 lines
16 KiB

"""Tests dealing with the NDFrame.allows_duplicates."""
import operator
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
not_implemented = pytest.mark.xfail(reason="Not implemented.")
# ----------------------------------------------------------------------------
# Preservation
class TestPreserves:
@pytest.mark.parametrize(
"cls, data",
[
(pd.Series, np.array([])),
(pd.Series, [1, 2]),
(pd.DataFrame, {}),
(pd.DataFrame, {"A": [1, 2]}),
],
)
def test_construction_ok(self, cls, data):
result = cls(data)
assert result.flags.allows_duplicate_labels is True
result = cls(data).set_flags(allows_duplicate_labels=False)
assert result.flags.allows_duplicate_labels is False
@pytest.mark.parametrize(
"func",
[
operator.itemgetter(["a"]),
operator.methodcaller("add", 1),
operator.methodcaller("rename", str.upper),
operator.methodcaller("rename", "name"),
operator.methodcaller("abs"),
np.abs,
],
)
def test_preserved_series(self, func):
s = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False)
assert func(s).flags.allows_duplicate_labels is False
@pytest.mark.parametrize(
"other", [pd.Series(0, index=["a", "b", "c"]), pd.Series(0, index=["a", "b"])]
)
# TODO: frame
@not_implemented
def test_align(self, other):
s = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False)
a, b = s.align(other)
assert a.flags.allows_duplicate_labels is False
assert b.flags.allows_duplicate_labels is False
def test_preserved_frame(self):
df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).set_flags(
allows_duplicate_labels=False
)
assert df.loc[["a"]].flags.allows_duplicate_labels is False
assert df.loc[:, ["A", "B"]].flags.allows_duplicate_labels is False
@not_implemented
def test_to_frame(self):
s = pd.Series(dtype=float).set_flags(allows_duplicate_labels=False)
assert s.to_frame().flags.allows_duplicate_labels is False
@pytest.mark.parametrize("func", ["add", "sub"])
@pytest.mark.parametrize(
"frame", [False, pytest.param(True, marks=not_implemented)]
)
@pytest.mark.parametrize("other", [1, pd.Series([1, 2], name="A")])
def test_binops(self, func, other, frame):
df = pd.Series([1, 2], name="A", index=["a", "b"]).set_flags(
allows_duplicate_labels=False
)
if frame:
df = df.to_frame()
if isinstance(other, pd.Series) and frame:
other = other.to_frame()
func = operator.methodcaller(func, other)
assert df.flags.allows_duplicate_labels is False
assert func(df).flags.allows_duplicate_labels is False
@not_implemented
def test_preserve_getitem(self):
df = pd.DataFrame({"A": [1, 2]}).set_flags(allows_duplicate_labels=False)
assert df[["A"]].flags.allows_duplicate_labels is False
assert df["A"].flags.allows_duplicate_labels is False
assert df.loc[0].flags.allows_duplicate_labels is False
assert df.loc[[0]].flags.allows_duplicate_labels is False
assert df.loc[0, ["A"]].flags.allows_duplicate_labels is False
@pytest.mark.xfail(reason="Unclear behavior.")
def test_ndframe_getitem_caching_issue(self):
# NDFrame.__getitem__ will cache the first df['A']. May need to
# invalidate that cache? Update the cached entries?
df = pd.DataFrame({"A": [0]}).set_flags(allows_duplicate_labels=False)
assert df["A"].flags.allows_duplicate_labels is False
df.flags.allows_duplicate_labels = True
assert df["A"].flags.allows_duplicate_labels is True
@pytest.mark.parametrize(
"objs, kwargs",
[
# Series
(
[
pd.Series(1, index=["a", "b"]).set_flags(
allows_duplicate_labels=False
),
pd.Series(2, index=["c", "d"]).set_flags(
allows_duplicate_labels=False
),
],
{},
),
(
[
pd.Series(1, index=["a", "b"]).set_flags(
allows_duplicate_labels=False
),
pd.Series(2, index=["a", "b"]).set_flags(
allows_duplicate_labels=False
),
],
{"ignore_index": True},
),
(
[
pd.Series(1, index=["a", "b"]).set_flags(
allows_duplicate_labels=False
),
pd.Series(2, index=["a", "b"]).set_flags(
allows_duplicate_labels=False
),
],
{"axis": 1},
),
# Frame
(
[
pd.DataFrame({"A": [1, 2]}, index=["a", "b"]).set_flags(
allows_duplicate_labels=False
),
pd.DataFrame({"A": [1, 2]}, index=["c", "d"]).set_flags(
allows_duplicate_labels=False
),
],
{},
),
(
[
pd.DataFrame({"A": [1, 2]}, index=["a", "b"]).set_flags(
allows_duplicate_labels=False
),
pd.DataFrame({"A": [1, 2]}, index=["a", "b"]).set_flags(
allows_duplicate_labels=False
),
],
{"ignore_index": True},
),
(
[
pd.DataFrame({"A": [1, 2]}, index=["a", "b"]).set_flags(
allows_duplicate_labels=False
),
pd.DataFrame({"B": [1, 2]}, index=["a", "b"]).set_flags(
allows_duplicate_labels=False
),
],
{"axis": 1},
),
# Series / Frame
(
[
pd.DataFrame({"A": [1, 2]}, index=["a", "b"]).set_flags(
allows_duplicate_labels=False
),
pd.Series([1, 2], index=["a", "b"], name="B",).set_flags(
allows_duplicate_labels=False,
),
],
{"axis": 1},
),
],
)
def test_concat(self, objs, kwargs):
result = pd.concat(objs, **kwargs)
assert result.flags.allows_duplicate_labels is False
@pytest.mark.parametrize(
"left, right, kwargs, expected",
[
# false false false
pytest.param(
pd.DataFrame({"A": [0, 1]}, index=["a", "b"]).set_flags(
allows_duplicate_labels=False
),
pd.DataFrame({"B": [0, 1]}, index=["a", "d"]).set_flags(
allows_duplicate_labels=False
),
{"left_index": True, "right_index": True},
False,
marks=not_implemented,
),
# false true false
pytest.param(
pd.DataFrame({"A": [0, 1]}, index=["a", "b"]).set_flags(
allows_duplicate_labels=False
),
pd.DataFrame({"B": [0, 1]}, index=["a", "d"]),
{"left_index": True, "right_index": True},
False,
marks=not_implemented,
),
# true true true
(
pd.DataFrame({"A": [0, 1]}, index=["a", "b"]),
pd.DataFrame({"B": [0, 1]}, index=["a", "d"]),
{"left_index": True, "right_index": True},
True,
),
],
)
def test_merge(self, left, right, kwargs, expected):
result = pd.merge(left, right, **kwargs)
assert result.flags.allows_duplicate_labels is expected
@not_implemented
def test_groupby(self):
# XXX: This is under tested
# TODO:
# - apply
# - transform
# - Should passing a grouper that disallows duplicates propagate?
df = pd.DataFrame({"A": [1, 2, 3]}).set_flags(allows_duplicate_labels=False)
result = df.groupby([0, 0, 1]).agg("count")
assert result.flags.allows_duplicate_labels is False
@pytest.mark.parametrize("frame", [True, False])
@not_implemented
def test_window(self, frame):
df = pd.Series(
1,
index=pd.date_range("2000", periods=12),
name="A",
allows_duplicate_labels=False,
)
if frame:
df = df.to_frame()
assert df.rolling(3).mean().flags.allows_duplicate_labels is False
assert df.ewm(3).mean().flags.allows_duplicate_labels is False
assert df.expanding(3).mean().flags.allows_duplicate_labels is False
# ----------------------------------------------------------------------------
# Raises
class TestRaises:
@pytest.mark.parametrize(
"cls, axes",
[
(pd.Series, {"index": ["a", "a"], "dtype": float}),
(pd.DataFrame, {"index": ["a", "a"]}),
(pd.DataFrame, {"index": ["a", "a"], "columns": ["b", "b"]}),
(pd.DataFrame, {"columns": ["b", "b"]}),
],
)
def test_set_flags_with_duplicates(self, cls, axes):
result = cls(**axes)
assert result.flags.allows_duplicate_labels is True
msg = "Index has duplicates."
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
cls(**axes).set_flags(allows_duplicate_labels=False)
@pytest.mark.parametrize(
"data",
[
pd.Series(index=[0, 0], dtype=float),
pd.DataFrame(index=[0, 0]),
pd.DataFrame(columns=[0, 0]),
],
)
def test_setting_allows_duplicate_labels_raises(self, data):
msg = "Index has duplicates."
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
data.flags.allows_duplicate_labels = False
assert data.flags.allows_duplicate_labels is True
def test_series_raises(self):
a = pd.Series(0, index=["a", "b"])
b = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False)
msg = "Index has duplicates."
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
pd.concat([a, b])
@pytest.mark.parametrize(
"getter, target",
[
(operator.itemgetter(["A", "A"]), None),
# loc
(operator.itemgetter(["a", "a"]), "loc"),
pytest.param(
operator.itemgetter(("a", ["A", "A"])), "loc", marks=not_implemented
),
(operator.itemgetter((["a", "a"], "A")), "loc"),
# iloc
(operator.itemgetter([0, 0]), "iloc"),
pytest.param(
operator.itemgetter((0, [0, 0])), "iloc", marks=not_implemented
),
pytest.param(operator.itemgetter(([0, 0], 0)), "iloc"),
],
)
def test_getitem_raises(self, getter, target):
df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).set_flags(
allows_duplicate_labels=False
)
if target:
# df, df.loc, or df.iloc
target = getattr(df, target)
else:
target = df
msg = "Index has duplicates."
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
getter(target)
@pytest.mark.parametrize(
"objs, kwargs",
[
(
[
pd.Series(1, index=[0, 1], name="a").set_flags(
allows_duplicate_labels=False
),
pd.Series(2, index=[0, 1], name="a").set_flags(
allows_duplicate_labels=False
),
],
{"axis": 1},
)
],
)
def test_concat_raises(self, objs, kwargs):
msg = "Index has duplicates."
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
pd.concat(objs, **kwargs)
@not_implemented
def test_merge_raises(self):
a = pd.DataFrame({"A": [0, 1, 2]}, index=["a", "b", "c"]).set_flags(
allows_duplicate_labels=False
)
b = pd.DataFrame({"B": [0, 1, 2]}, index=["a", "b", "b"])
msg = "Index has duplicates."
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
pd.merge(a, b, left_index=True, right_index=True)
@pytest.mark.parametrize(
"idx",
[
pd.Index([1, 1]),
pd.Index(["a", "a"]),
pd.Index([1.1, 1.1]),
pd.PeriodIndex([pd.Period("2000", "D")] * 2),
pd.DatetimeIndex([pd.Timestamp("2000")] * 2),
pd.TimedeltaIndex([pd.Timedelta("1D")] * 2),
pd.CategoricalIndex(["a", "a"]),
pd.IntervalIndex([pd.Interval(0, 1)] * 2),
pd.MultiIndex.from_tuples([("a", 1), ("a", 1)]),
],
ids=lambda x: type(x).__name__,
)
def test_raises_basic(idx):
msg = "Index has duplicates."
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
pd.Series(1, index=idx).set_flags(allows_duplicate_labels=False)
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
pd.DataFrame({"A": [1, 1]}, index=idx).set_flags(allows_duplicate_labels=False)
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
pd.DataFrame([[1, 2]], columns=idx).set_flags(allows_duplicate_labels=False)
def test_format_duplicate_labels_message():
idx = pd.Index(["a", "b", "a", "b", "c"])
result = idx._format_duplicate_message()
expected = pd.DataFrame(
{"positions": [[0, 2], [1, 3]]}, index=pd.Index(["a", "b"], name="label")
)
tm.assert_frame_equal(result, expected)
def test_format_duplicate_labels_message_multi():
idx = pd.MultiIndex.from_product([["A"], ["a", "b", "a", "b", "c"]])
result = idx._format_duplicate_message()
expected = pd.DataFrame(
{"positions": [[0, 2], [1, 3]]},
index=pd.MultiIndex.from_product([["A"], ["a", "b"]]),
)
tm.assert_frame_equal(result, expected)
def test_dataframe_insert_raises():
df = pd.DataFrame({"A": [1, 2]}).set_flags(allows_duplicate_labels=False)
msg = "Cannot specify"
with pytest.raises(ValueError, match=msg):
df.insert(0, "A", [3, 4], allow_duplicates=True)
@pytest.mark.parametrize(
"method, frame_only",
[
(operator.methodcaller("set_index", "A", inplace=True), True),
(operator.methodcaller("set_axis", ["A", "B"], inplace=True), False),
(operator.methodcaller("reset_index", inplace=True), True),
(operator.methodcaller("rename", lambda x: x, inplace=True), False),
],
)
def test_inplace_raises(method, frame_only):
df = pd.DataFrame({"A": [0, 0], "B": [1, 2]}).set_flags(
allows_duplicate_labels=False
)
s = df["A"]
s.flags.allows_duplicate_labels = False
msg = "Cannot specify"
with pytest.raises(ValueError, match=msg):
method(df)
if not frame_only:
with pytest.raises(ValueError, match=msg):
method(s)
def test_pickle():
a = pd.Series([1, 2]).set_flags(allows_duplicate_labels=False)
b = tm.round_trip_pickle(a)
tm.assert_series_equal(a, b)
a = pd.DataFrame({"A": []}).set_flags(allows_duplicate_labels=False)
b = tm.round_trip_pickle(a)
tm.assert_frame_equal(a, b)