A PyQT GUI application for converting InfoLease report outputs into Excel files. Handles parsing and summarizing. Learns where files are meant to be store and compiles monthly and yearly summaries.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
InfoLeaseExtract/venv/Lib/site-packages/pandas/tests/frame/indexing/test_getitem.py

406 lines
12 KiB

import re
import numpy as np
import pytest
from pandas import (
Categorical,
CategoricalDtype,
CategoricalIndex,
DataFrame,
DatetimeIndex,
Index,
MultiIndex,
Series,
Timestamp,
concat,
get_dummies,
period_range,
)
import pandas._testing as tm
from pandas.core.arrays import SparseArray
class TestGetitem:
def test_getitem_unused_level_raises(self):
# GH#20410
mi = MultiIndex(
levels=[["a_lot", "onlyone", "notevenone"], [1970, ""]],
codes=[[1, 0], [1, 0]],
)
df = DataFrame(-1, index=range(3), columns=mi)
with pytest.raises(KeyError, match="notevenone"):
df["notevenone"]
def test_getitem_periodindex(self):
rng = period_range("1/1/2000", periods=5)
df = DataFrame(np.random.randn(10, 5), columns=rng)
ts = df[rng[0]]
tm.assert_series_equal(ts, df.iloc[:, 0])
# GH#1211; smoketest unrelated to the rest of this test
repr(df)
ts = df["1/1/2000"]
tm.assert_series_equal(ts, df.iloc[:, 0])
def test_getitem_list_of_labels_categoricalindex_cols(self):
# GH#16115
cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")])
expected = DataFrame(
[[1, 0], [0, 1]], dtype="uint8", index=[0, 1], columns=cats
)
dummies = get_dummies(cats)
result = dummies[list(dummies.columns)]
tm.assert_frame_equal(result, expected)
def test_getitem_sparse_column_return_type_and_dtype(self):
# https://github.com/pandas-dev/pandas/issues/23559
data = SparseArray([0, 1])
df = DataFrame({"A": data})
expected = Series(data, name="A")
result = df["A"]
tm.assert_series_equal(result, expected)
# Also check iloc and loc while we're here
result = df.iloc[:, 0]
tm.assert_series_equal(result, expected)
result = df.loc[:, "A"]
tm.assert_series_equal(result, expected)
class TestGetitemListLike:
def test_getitem_list_missing_key(self):
# GH#13822, incorrect error string with non-unique columns when missing
# column is accessed
df = DataFrame({"x": [1.0], "y": [2.0], "z": [3.0]})
df.columns = ["x", "x", "z"]
# Check that we get the correct value in the KeyError
with pytest.raises(KeyError, match=r"\['y'\] not in index"):
df[["x", "y", "z"]]
def test_getitem_list_duplicates(self):
# GH#1943
df = DataFrame(np.random.randn(4, 4), columns=list("AABC"))
df.columns.name = "foo"
result = df[["B", "C"]]
assert result.columns.name == "foo"
expected = df.iloc[:, 2:]
tm.assert_frame_equal(result, expected)
def test_getitem_dupe_cols(self):
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"])
msg = "\"None of [Index(['baf'], dtype='object')] are in the [columns]\""
with pytest.raises(KeyError, match=re.escape(msg)):
df[["baf"]]
@pytest.mark.parametrize(
"idx_type",
[
list,
iter,
Index,
set,
lambda l: dict(zip(l, range(len(l)))),
lambda l: dict(zip(l, range(len(l)))).keys(),
],
ids=["list", "iter", "Index", "set", "dict", "dict_keys"],
)
@pytest.mark.parametrize("levels", [1, 2])
def test_getitem_listlike(self, idx_type, levels, float_frame):
# GH#21294
if levels == 1:
frame, missing = float_frame, "food"
else:
# MultiIndex columns
frame = DataFrame(
np.random.randn(8, 3),
columns=Index(
[("foo", "bar"), ("baz", "qux"), ("peek", "aboo")],
name=("sth", "sth2"),
),
)
missing = ("good", "food")
keys = [frame.columns[1], frame.columns[0]]
idx = idx_type(keys)
idx_check = list(idx_type(keys))
if isinstance(idx, (set, dict)):
with tm.assert_produces_warning(FutureWarning):
result = frame[idx]
else:
result = frame[idx]
expected = frame.loc[:, idx_check]
expected.columns.names = frame.columns.names
tm.assert_frame_equal(result, expected)
idx = idx_type(keys + [missing])
with pytest.raises(KeyError, match="not in index"):
with tm.assert_produces_warning(FutureWarning):
frame[idx]
def test_getitem_iloc_generator(self):
# GH#39614
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
indexer = (x for x in [1, 2])
result = df.iloc[indexer]
expected = DataFrame({"a": [2, 3], "b": [5, 6]}, index=[1, 2])
tm.assert_frame_equal(result, expected)
def test_getitem_iloc_two_dimensional_generator(self):
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
indexer = (x for x in [1, 2])
result = df.iloc[indexer, 1]
expected = Series([5, 6], name="b", index=[1, 2])
tm.assert_series_equal(result, expected)
class TestGetitemCallable:
def test_getitem_callable(self, float_frame):
# GH#12533
result = float_frame[lambda x: "A"]
expected = float_frame.loc[:, "A"]
tm.assert_series_equal(result, expected)
result = float_frame[lambda x: ["A", "B"]]
expected = float_frame.loc[:, ["A", "B"]]
tm.assert_frame_equal(result, float_frame.loc[:, ["A", "B"]])
df = float_frame[:3]
result = df[lambda x: [True, False, True]]
expected = float_frame.iloc[[0, 2], :]
tm.assert_frame_equal(result, expected)
def test_loc_multiindex_columns_one_level(self):
# GH#29749
df = DataFrame([[1, 2]], columns=[["a", "b"]])
expected = DataFrame([1], columns=[["a"]])
result = df["a"]
tm.assert_frame_equal(result, expected)
result = df.loc[:, "a"]
tm.assert_frame_equal(result, expected)
class TestGetitemBooleanMask:
def test_getitem_bool_mask_categorical_index(self):
df3 = DataFrame(
{
"A": np.arange(6, dtype="int64"),
},
index=CategoricalIndex(
[1, 1, 2, 1, 3, 2],
dtype=CategoricalDtype([3, 2, 1], ordered=True),
name="B",
),
)
df4 = DataFrame(
{
"A": np.arange(6, dtype="int64"),
},
index=CategoricalIndex(
[1, 1, 2, 1, 3, 2],
dtype=CategoricalDtype([3, 2, 1], ordered=False),
name="B",
),
)
result = df3[df3.index == "a"]
expected = df3.iloc[[]]
tm.assert_frame_equal(result, expected)
result = df4[df4.index == "a"]
expected = df4.iloc[[]]
tm.assert_frame_equal(result, expected)
result = df3[df3.index == 1]
expected = df3.iloc[[0, 1, 3]]
tm.assert_frame_equal(result, expected)
result = df4[df4.index == 1]
expected = df4.iloc[[0, 1, 3]]
tm.assert_frame_equal(result, expected)
# since we have an ordered categorical
# CategoricalIndex([1, 1, 2, 1, 3, 2],
# categories=[3, 2, 1],
# ordered=True,
# name='B')
result = df3[df3.index < 2]
expected = df3.iloc[[4]]
tm.assert_frame_equal(result, expected)
result = df3[df3.index > 1]
expected = df3.iloc[[]]
tm.assert_frame_equal(result, expected)
# unordered
# cannot be compared
# CategoricalIndex([1, 1, 2, 1, 3, 2],
# categories=[3, 2, 1],
# ordered=False,
# name='B')
msg = "Unordered Categoricals can only compare equality or not"
with pytest.raises(TypeError, match=msg):
df4[df4.index < 2]
with pytest.raises(TypeError, match=msg):
df4[df4.index > 1]
@pytest.mark.parametrize(
"data1,data2,expected_data",
(
(
[[1, 2], [3, 4]],
[[0.5, 6], [7, 8]],
[[np.nan, 3.0], [np.nan, 4.0], [np.nan, 7.0], [6.0, 8.0]],
),
(
[[1, 2], [3, 4]],
[[5, 6], [7, 8]],
[[np.nan, 3.0], [np.nan, 4.0], [5, 7], [6, 8]],
),
),
)
def test_getitem_bool_mask_duplicate_columns_mixed_dtypes(
self,
data1,
data2,
expected_data,
):
# GH#31954
df1 = DataFrame(np.array(data1))
df2 = DataFrame(np.array(data2))
df = concat([df1, df2], axis=1)
result = df[df > 2]
exdict = {i: np.array(col) for i, col in enumerate(expected_data)}
expected = DataFrame(exdict).rename(columns={2: 0, 3: 1})
tm.assert_frame_equal(result, expected)
@pytest.fixture
def df_dup_cols(self):
dups = ["A", "A", "C", "D"]
df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64")
return df
def test_getitem_boolean_frame_unaligned_with_duplicate_columns(self, df_dup_cols):
# `df.A > 6` is a DataFrame with a different shape from df
# boolean with the duplicate raises
df = df_dup_cols
msg = "cannot reindex on an axis with duplicate labels"
with pytest.raises(ValueError, match=msg):
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
df[df.A > 6]
def test_getitem_boolean_series_with_duplicate_columns(self, df_dup_cols):
# boolean indexing
# GH#4879
df = DataFrame(
np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64"
)
expected = df[df.C > 6]
expected.columns = df_dup_cols.columns
df = df_dup_cols
result = df[df.C > 6]
tm.assert_frame_equal(result, expected)
result.dtypes
str(result)
def test_getitem_boolean_frame_with_duplicate_columns(self, df_dup_cols):
# where
df = DataFrame(
np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64"
)
# `df > 6` is a DataFrame with the same shape+alignment as df
expected = df[df > 6]
expected.columns = df_dup_cols.columns
df = df_dup_cols
result = df[df > 6]
tm.assert_frame_equal(result, expected)
result.dtypes
str(result)
def test_getitem_empty_frame_with_boolean(self):
# Test for issue GH#11859
df = DataFrame()
df2 = df[df > 0]
tm.assert_frame_equal(df, df2)
class TestGetitemSlice:
def test_getitem_slice_float64(self, frame_or_series):
values = np.arange(10.0, 50.0, 2)
index = Index(values)
start, end = values[[5, 15]]
data = np.random.randn(20, 3)
if frame_or_series is not DataFrame:
data = data[:, 0]
obj = frame_or_series(data, index=index)
result = obj[start:end]
expected = obj.iloc[5:16]
tm.assert_equal(result, expected)
result = obj.loc[start:end]
tm.assert_equal(result, expected)
def test_getitem_datetime_slice(self):
# GH#43223
df = DataFrame(
{"a": 0},
index=DatetimeIndex(
[
"11.01.2011 22:00",
"11.01.2011 23:00",
"12.01.2011 00:00",
"2011-01-13 00:00",
]
),
)
with tm.assert_produces_warning(FutureWarning):
result = df["2011-01-01":"2011-11-01"]
expected = DataFrame(
{"a": 0},
index=DatetimeIndex(
["11.01.2011 22:00", "11.01.2011 23:00", "2011-01-13 00:00"]
),
)
tm.assert_frame_equal(result, expected)
class TestGetitemDeprecatedIndexers:
@pytest.mark.parametrize("key", [{"a", "b"}, {"a": "a"}])
def test_getitem_dict_and_set_deprecated(self, key):
# GH#42825
df = DataFrame(
[[1, 2], [3, 4]], columns=MultiIndex.from_tuples([("a", 1), ("b", 2)])
)
with tm.assert_produces_warning(FutureWarning):
df[key]