A PyQT GUI application for converting InfoLease report outputs into Excel files. Handles parsing and summarizing. Learns where files are meant to be store and compiles monthly and yearly summaries.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
InfoLeaseExtract/venv/Lib/site-packages/pandas/tests/indexing/test_loc.py

2985 lines
101 KiB

""" test label based indexing with loc """
from collections import namedtuple
from datetime import (
date,
datetime,
time,
timedelta,
)
import re
from dateutil.tz import gettz
import numpy as np
import pytest
import pandas.util._test_decorators as td
import pandas as pd
from pandas import (
Categorical,
CategoricalIndex,
DataFrame,
DatetimeIndex,
Index,
IndexSlice,
MultiIndex,
Period,
Series,
SparseDtype,
Timedelta,
Timestamp,
date_range,
timedelta_range,
to_datetime,
to_timedelta,
)
import pandas._testing as tm
from pandas.api.types import is_scalar
from pandas.core.api import Float64Index
from pandas.core.indexing import (
IndexingError,
_one_ellipsis_message,
)
from pandas.tests.indexing.common import Base
class TestLoc(Base):
def test_loc_getitem_int(self):
# int label
self.check_result("loc", 2, typs=["labels"], fails=KeyError)
def test_loc_getitem_label(self):
# label
self.check_result("loc", "c", typs=["empty"], fails=KeyError)
def test_loc_getitem_label_out_of_range(self):
# out of range label
self.check_result(
"loc", "f", typs=["ints", "uints", "labels", "mixed", "ts"], fails=KeyError
)
self.check_result("loc", "f", typs=["floats"], fails=KeyError)
self.check_result("loc", "f", typs=["floats"], fails=KeyError)
self.check_result("loc", 20, typs=["ints", "uints", "mixed"], fails=KeyError)
self.check_result("loc", 20, typs=["labels"], fails=KeyError)
self.check_result("loc", 20, typs=["ts"], axes=0, fails=KeyError)
self.check_result("loc", 20, typs=["floats"], axes=0, fails=KeyError)
def test_loc_getitem_label_list(self):
# list of labels
self.check_result(
"loc", [0, 1, 2], typs=["ints", "uints", "floats"], fails=KeyError
)
self.check_result(
"loc", [1, 3.0, "A"], typs=["ints", "uints", "floats"], fails=KeyError
)
def test_loc_getitem_label_list_with_missing(self):
self.check_result("loc", [0, 1, 2], typs=["empty"], fails=KeyError)
self.check_result(
"loc", [0, 2, 10], typs=["ints", "uints", "floats"], axes=0, fails=KeyError
)
self.check_result(
"loc", [3, 6, 7], typs=["ints", "uints", "floats"], axes=1, fails=KeyError
)
# GH 17758 - MultiIndex and missing keys
self.check_result(
"loc", [(1, 3), (1, 4), (2, 5)], typs=["multi"], axes=0, fails=KeyError
)
def test_loc_getitem_label_list_fails(self):
# fails
self.check_result(
"loc", [20, 30, 40], typs=["ints", "uints"], axes=1, fails=KeyError
)
def test_loc_getitem_label_array_like(self):
# TODO: test something?
# array like
pass
def test_loc_getitem_bool(self):
# boolean indexers
b = [True, False, True, False]
self.check_result("loc", b, typs=["empty"], fails=IndexError)
def test_loc_getitem_label_slice(self):
# label slices (with ints)
# real label slices
# GH 14316
self.check_result(
"loc",
slice(1, 3),
typs=["labels", "mixed", "empty", "ts", "floats"],
fails=TypeError,
)
self.check_result(
"loc", slice("20130102", "20130104"), typs=["ts"], axes=1, fails=TypeError
)
self.check_result("loc", slice(2, 8), typs=["mixed"], axes=0, fails=TypeError)
self.check_result("loc", slice(2, 8), typs=["mixed"], axes=1, fails=KeyError)
self.check_result(
"loc", slice(2, 4, 2), typs=["mixed"], axes=0, fails=TypeError
)
def test_setitem_from_duplicate_axis(self):
# GH#34034
df = DataFrame(
[[20, "a"], [200, "a"], [200, "a"]],
columns=["col1", "col2"],
index=[10, 1, 1],
)
df.loc[1, "col1"] = np.arange(2)
expected = DataFrame(
[[20, "a"], [0, "a"], [1, "a"]], columns=["col1", "col2"], index=[10, 1, 1]
)
tm.assert_frame_equal(df, expected)
def test_column_types_consistent(self):
# GH 26779
df = DataFrame(
data={
"channel": [1, 2, 3],
"A": ["String 1", np.NaN, "String 2"],
"B": [
Timestamp("2019-06-11 11:00:00"),
pd.NaT,
Timestamp("2019-06-11 12:00:00"),
],
}
)
df2 = DataFrame(
data={"A": ["String 3"], "B": [Timestamp("2019-06-11 12:00:00")]}
)
# Change Columns A and B to df2.values wherever Column A is NaN
df.loc[df["A"].isna(), ["A", "B"]] = df2.values
expected = DataFrame(
data={
"channel": [1, 2, 3],
"A": ["String 1", "String 3", "String 2"],
"B": [
Timestamp("2019-06-11 11:00:00"),
Timestamp("2019-06-11 12:00:00"),
Timestamp("2019-06-11 12:00:00"),
],
}
)
tm.assert_frame_equal(df, expected)
@pytest.mark.parametrize(
"obj, key, exp",
[
(
DataFrame([[1]], columns=Index([False])),
IndexSlice[:, False],
Series([1], name=False),
),
(Series([1], index=Index([False])), False, [1]),
(DataFrame([[1]], index=Index([False])), False, Series([1], name=False)),
],
)
def test_loc_getitem_single_boolean_arg(self, obj, key, exp):
# GH 44322
res = obj.loc[key]
if isinstance(exp, (DataFrame, Series)):
tm.assert_equal(res, exp)
else:
assert res == exp
class TestLocBaseIndependent:
# Tests for loc that do not depend on subclassing Base
@pytest.mark.parametrize(
"msg, key",
[
(r"Period\('2019', 'A-DEC'\), 'foo', 'bar'", (Period(2019), "foo", "bar")),
(r"Period\('2019', 'A-DEC'\), 'y1', 'bar'", (Period(2019), "y1", "bar")),
(r"Period\('2019', 'A-DEC'\), 'foo', 'z1'", (Period(2019), "foo", "z1")),
(
r"Period\('2018', 'A-DEC'\), Period\('2016', 'A-DEC'\), 'bar'",
(Period(2018), Period(2016), "bar"),
),
(r"Period\('2018', 'A-DEC'\), 'foo', 'y1'", (Period(2018), "foo", "y1")),
(
r"Period\('2017', 'A-DEC'\), 'foo', Period\('2015', 'A-DEC'\)",
(Period(2017), "foo", Period(2015)),
),
(r"Period\('2017', 'A-DEC'\), 'z1', 'bar'", (Period(2017), "z1", "bar")),
],
)
def test_contains_raise_error_if_period_index_is_in_multi_index(self, msg, key):
# GH#20684
"""
parse_time_string return parameter if type not matched.
PeriodIndex.get_loc takes returned value from parse_time_string as a tuple.
If first argument is Period and a tuple has 3 items,
process go on not raise exception
"""
df = DataFrame(
{
"A": [Period(2019), "x1", "x2"],
"B": [Period(2018), Period(2016), "y1"],
"C": [Period(2017), "z1", Period(2015)],
"V1": [1, 2, 3],
"V2": [10, 20, 30],
}
).set_index(["A", "B", "C"])
with pytest.raises(KeyError, match=msg):
df.loc[key]
def test_loc_getitem_missing_unicode_key(self):
df = DataFrame({"a": [1]})
with pytest.raises(KeyError, match="\u05d0"):
df.loc[:, "\u05d0"] # should not raise UnicodeEncodeError
def test_loc_getitem_dups(self):
# GH 5678
# repeated getitems on a dup index returning a ndarray
df = DataFrame(
np.random.random_sample((20, 5)), index=["ABCDE"[x % 5] for x in range(20)]
)
expected = df.loc["A", 0]
result = df.loc[:, 0].loc["A"]
tm.assert_series_equal(result, expected)
def test_loc_getitem_dups2(self):
# GH4726
# dup indexing with iloc/loc
df = DataFrame(
[[1, 2, "foo", "bar", Timestamp("20130101")]],
columns=["a", "a", "a", "a", "a"],
index=[1],
)
expected = Series(
[1, 2, "foo", "bar", Timestamp("20130101")],
index=["a", "a", "a", "a", "a"],
name=1,
)
result = df.iloc[0]
tm.assert_series_equal(result, expected)
result = df.loc[1]
tm.assert_series_equal(result, expected)
def test_loc_setitem_dups(self):
# GH 6541
df_orig = DataFrame(
{
"me": list("rttti"),
"foo": list("aaade"),
"bar": np.arange(5, dtype="float64") * 1.34 + 2,
"bar2": np.arange(5, dtype="float64") * -0.34 + 2,
}
).set_index("me")
indexer = (
"r",
["bar", "bar2"],
)
df = df_orig.copy()
df.loc[indexer] *= 2.0
tm.assert_series_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer])
indexer = (
"r",
"bar",
)
df = df_orig.copy()
df.loc[indexer] *= 2.0
assert df.loc[indexer] == 2.0 * df_orig.loc[indexer]
indexer = (
"t",
["bar", "bar2"],
)
df = df_orig.copy()
df.loc[indexer] *= 2.0
tm.assert_frame_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer])
def test_loc_setitem_slice(self):
# GH10503
# assigning the same type should not change the type
df1 = DataFrame({"a": [0, 1, 1], "b": Series([100, 200, 300], dtype="uint32")})
ix = df1["a"] == 1
newb1 = df1.loc[ix, "b"] + 1
df1.loc[ix, "b"] = newb1
expected = DataFrame(
{"a": [0, 1, 1], "b": Series([100, 201, 301], dtype="uint32")}
)
tm.assert_frame_equal(df1, expected)
# assigning a new type should get the inferred type
df2 = DataFrame({"a": [0, 1, 1], "b": [100, 200, 300]}, dtype="uint64")
ix = df1["a"] == 1
newb2 = df2.loc[ix, "b"]
df1.loc[ix, "b"] = newb2
expected = DataFrame({"a": [0, 1, 1], "b": [100, 200, 300]}, dtype="uint64")
tm.assert_frame_equal(df2, expected)
def test_loc_setitem_dtype(self):
# GH31340
df = DataFrame({"id": ["A"], "a": [1.2], "b": [0.0], "c": [-2.5]})
cols = ["a", "b", "c"]
df.loc[:, cols] = df.loc[:, cols].astype("float32")
expected = DataFrame(
{
"id": ["A"],
"a": np.array([1.2], dtype="float32"),
"b": np.array([0.0], dtype="float32"),
"c": np.array([-2.5], dtype="float32"),
}
) # id is inferred as object
tm.assert_frame_equal(df, expected)
def test_getitem_label_list_with_missing(self):
s = Series(range(3), index=["a", "b", "c"])
# consistency
with pytest.raises(KeyError, match="not in index"):
s[["a", "d"]]
s = Series(range(3))
with pytest.raises(KeyError, match="not in index"):
s[[0, 3]]
@pytest.mark.parametrize("index", [[True, False], [True, False, True, False]])
def test_loc_getitem_bool_diff_len(self, index):
# GH26658
s = Series([1, 2, 3])
msg = f"Boolean index has wrong length: {len(index)} instead of {len(s)}"
with pytest.raises(IndexError, match=msg):
s.loc[index]
def test_loc_getitem_int_slice(self):
# TODO: test something here?
pass
def test_loc_to_fail(self):
# GH3449
df = DataFrame(
np.random.random((3, 3)), index=["a", "b", "c"], columns=["e", "f", "g"]
)
# raise a KeyError?
msg = (
r"\"None of \[Int64Index\(\[1, 2\], dtype='int64'\)\] are "
r"in the \[index\]\""
)
with pytest.raises(KeyError, match=msg):
df.loc[[1, 2], [1, 2]]
def test_loc_to_fail2(self):
# GH 7496
# loc should not fallback
s = Series(dtype=object)
s.loc[1] = 1
s.loc["a"] = 2
with pytest.raises(KeyError, match=r"^-1$"):
s.loc[-1]
msg = (
r"\"None of \[Int64Index\(\[-1, -2\], dtype='int64'\)\] are "
r"in the \[index\]\""
)
with pytest.raises(KeyError, match=msg):
s.loc[[-1, -2]]
msg = r"\"None of \[Index\(\['4'\], dtype='object'\)\] are in the \[index\]\""
with pytest.raises(KeyError, match=msg):
s.loc[["4"]]
s.loc[-1] = 3
with pytest.raises(KeyError, match="not in index"):
s.loc[[-1, -2]]
s["a"] = 2
msg = (
r"\"None of \[Int64Index\(\[-2\], dtype='int64'\)\] are "
r"in the \[index\]\""
)
with pytest.raises(KeyError, match=msg):
s.loc[[-2]]
del s["a"]
with pytest.raises(KeyError, match=msg):
s.loc[[-2]] = 0
def test_loc_to_fail3(self):
# inconsistency between .loc[values] and .loc[values,:]
# GH 7999
df = DataFrame([["a"], ["b"]], index=[1, 2], columns=["value"])
msg = (
r"\"None of \[Int64Index\(\[3\], dtype='int64'\)\] are "
r"in the \[index\]\""
)
with pytest.raises(KeyError, match=msg):
df.loc[[3], :]
with pytest.raises(KeyError, match=msg):
df.loc[[3]]
def test_loc_getitem_list_with_fail(self):
# 15747
# should KeyError if *any* missing labels
s = Series([1, 2, 3])
s.loc[[2]]
with pytest.raises(
KeyError,
match=re.escape(
"\"None of [Int64Index([3], dtype='int64')] are in the [index]\""
),
):
s.loc[[3]]
# a non-match and a match
with pytest.raises(KeyError, match="not in index"):
s.loc[[2, 3]]
def test_loc_index(self):
# gh-17131
# a boolean index should index like a boolean numpy array
df = DataFrame(
np.random.random(size=(5, 10)),
index=["alpha_0", "alpha_1", "alpha_2", "beta_0", "beta_1"],
)
mask = df.index.map(lambda x: "alpha" in x)
expected = df.loc[np.array(mask)]
result = df.loc[mask]
tm.assert_frame_equal(result, expected)
result = df.loc[mask.values]
tm.assert_frame_equal(result, expected)
result = df.loc[pd.array(mask, dtype="boolean")]
tm.assert_frame_equal(result, expected)
def test_loc_general(self):
df = DataFrame(
np.random.rand(4, 4),
columns=["A", "B", "C", "D"],
index=["A", "B", "C", "D"],
)
# want this to work
result = df.loc[:, "A":"B"].iloc[0:2, :]
assert (result.columns == ["A", "B"]).all()
assert (result.index == ["A", "B"]).all()
# mixed type
result = DataFrame({"a": [Timestamp("20130101")], "b": [1]}).iloc[0]
expected = Series([Timestamp("20130101"), 1], index=["a", "b"], name=0)
tm.assert_series_equal(result, expected)
assert result.dtype == object
@pytest.fixture
def frame_for_consistency(self):
return DataFrame(
{
"date": date_range("2000-01-01", "2000-01-5"),
"val": Series(range(5), dtype=np.int64),
}
)
@pytest.mark.parametrize(
"val",
[0, np.array(0, dtype=np.int64), np.array([0, 0, 0, 0, 0], dtype=np.int64)],
)
def test_loc_setitem_consistency(self, frame_for_consistency, val):
# GH 6149
# coerce similarly for setitem and loc when rows have a null-slice
expected = DataFrame(
{
"date": Series(0, index=range(5), dtype=np.int64),
"val": Series(range(5), dtype=np.int64),
}
)
df = frame_for_consistency.copy()
df.loc[:, "date"] = val
tm.assert_frame_equal(df, expected)
def test_loc_setitem_consistency_dt64_to_str(self, frame_for_consistency):
# GH 6149
# coerce similarly for setitem and loc when rows have a null-slice
expected = DataFrame(
{
"date": Series("foo", index=range(5)),
"val": Series(range(5), dtype=np.int64),
}
)
df = frame_for_consistency.copy()
df.loc[:, "date"] = "foo"
tm.assert_frame_equal(df, expected)
def test_loc_setitem_consistency_dt64_to_float(self, frame_for_consistency):
# GH 6149
# coerce similarly for setitem and loc when rows have a null-slice
expected = DataFrame(
{
"date": Series(1.0, index=range(5)),
"val": Series(range(5), dtype=np.int64),
}
)
df = frame_for_consistency.copy()
df.loc[:, "date"] = 1.0
tm.assert_frame_equal(df, expected)
def test_loc_setitem_consistency_single_row(self):
# GH 15494
# setting on frame with single row
df = DataFrame({"date": Series([Timestamp("20180101")])})
df.loc[:, "date"] = "string"
expected = DataFrame({"date": Series(["string"])})
tm.assert_frame_equal(df, expected)
def test_loc_setitem_consistency_empty(self):
# empty (essentially noops)
expected = DataFrame(columns=["x", "y"])
expected["x"] = expected["x"].astype(np.int64)
df = DataFrame(columns=["x", "y"])
df.loc[:, "x"] = 1
tm.assert_frame_equal(df, expected)
df = DataFrame(columns=["x", "y"])
df["x"] = 1
tm.assert_frame_equal(df, expected)
def test_loc_setitem_consistency_slice_column_len(self):
# .loc[:,column] setting with slice == len of the column
# GH10408
levels = [
["Region_1"] * 4,
["Site_1", "Site_1", "Site_2", "Site_2"],
[3987227376, 3980680971, 3977723249, 3977723089],
]
mi = MultiIndex.from_arrays(levels, names=["Region", "Site", "RespondentID"])
clevels = [
["Respondent", "Respondent", "Respondent", "OtherCat", "OtherCat"],
["Something", "StartDate", "EndDate", "Yes/No", "SomethingElse"],
]
cols = MultiIndex.from_arrays(clevels, names=["Level_0", "Level_1"])
values = [
["A", "5/25/2015 10:59", "5/25/2015 11:22", "Yes", np.nan],
["A", "5/21/2015 9:40", "5/21/2015 9:52", "Yes", "Yes"],
["A", "5/20/2015 8:27", "5/20/2015 8:41", "Yes", np.nan],
["A", "5/20/2015 8:33", "5/20/2015 9:09", "Yes", "No"],
]
df = DataFrame(values, index=mi, columns=cols)
df.loc[:, ("Respondent", "StartDate")] = to_datetime(
df.loc[:, ("Respondent", "StartDate")]
)
df.loc[:, ("Respondent", "EndDate")] = to_datetime(
df.loc[:, ("Respondent", "EndDate")]
)
df.loc[:, ("Respondent", "Duration")] = (
df.loc[:, ("Respondent", "EndDate")]
- df.loc[:, ("Respondent", "StartDate")]
)
df.loc[:, ("Respondent", "Duration")] = df.loc[
:, ("Respondent", "Duration")
].astype("timedelta64[s]")
expected = Series(
[1380, 720, 840, 2160.0], index=df.index, name=("Respondent", "Duration")
)
tm.assert_series_equal(df[("Respondent", "Duration")], expected)
@pytest.mark.parametrize("unit", ["Y", "M", "D", "h", "m", "s", "ms", "us"])
def test_loc_assign_non_ns_datetime(self, unit):
# GH 27395, non-ns dtype assignment via .loc should work
# and return the same result when using simple assignment
df = DataFrame(
{
"timestamp": [
np.datetime64("2017-02-11 12:41:29"),
np.datetime64("1991-11-07 04:22:37"),
]
}
)
df.loc[:, unit] = df.loc[:, "timestamp"].values.astype(f"datetime64[{unit}]")
df["expected"] = df.loc[:, "timestamp"].values.astype(f"datetime64[{unit}]")
expected = Series(df.loc[:, "expected"], name=unit)
tm.assert_series_equal(df.loc[:, unit], expected)
def test_loc_modify_datetime(self):
# see gh-28837
df = DataFrame.from_dict(
{"date": [1485264372711, 1485265925110, 1540215845888, 1540282121025]}
)
df["date_dt"] = to_datetime(df["date"], unit="ms", cache=True)
df.loc[:, "date_dt_cp"] = df.loc[:, "date_dt"]
df.loc[[2, 3], "date_dt_cp"] = df.loc[[2, 3], "date_dt"]
expected = DataFrame(
[
[1485264372711, "2017-01-24 13:26:12.711", "2017-01-24 13:26:12.711"],
[1485265925110, "2017-01-24 13:52:05.110", "2017-01-24 13:52:05.110"],
[1540215845888, "2018-10-22 13:44:05.888", "2018-10-22 13:44:05.888"],
[1540282121025, "2018-10-23 08:08:41.025", "2018-10-23 08:08:41.025"],
],
columns=["date", "date_dt", "date_dt_cp"],
)
columns = ["date_dt", "date_dt_cp"]
expected[columns] = expected[columns].apply(to_datetime)
tm.assert_frame_equal(df, expected)
def test_loc_setitem_frame_with_reindex(self, using_array_manager):
# GH#6254 setting issue
df = DataFrame(index=[3, 5, 4], columns=["A"], dtype=float)
df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64")
# setting integer values into a float dataframe with loc is inplace,
# so we retain float dtype
ser = Series([2, 3, 1], index=[3, 5, 4], dtype=float)
if using_array_manager:
# TODO(ArrayManager) with "split" path, we still overwrite the column
# and therefore don't take the dtype of the underlying object into account
ser = Series([2, 3, 1], index=[3, 5, 4], dtype="int64")
expected = DataFrame({"A": ser})
tm.assert_frame_equal(df, expected)
def test_loc_setitem_frame_with_reindex_mixed(self):
# GH#40480
df = DataFrame(index=[3, 5, 4], columns=["A", "B"], dtype=float)
df["B"] = "string"
df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64")
ser = Series([2, 3, 1], index=[3, 5, 4], dtype="int64")
expected = DataFrame({"A": ser})
expected["B"] = "string"
tm.assert_frame_equal(df, expected)
def test_loc_setitem_frame_with_inverted_slice(self):
# GH#40480
df = DataFrame(index=[1, 2, 3], columns=["A", "B"], dtype=float)
df["B"] = "string"
df.loc[slice(3, 0, -1), "A"] = np.array([1, 2, 3], dtype="int64")
expected = DataFrame({"A": [3, 2, 1], "B": "string"}, index=[1, 2, 3])
tm.assert_frame_equal(df, expected)
# TODO(ArrayManager) "split" path overwrites column and therefore don't take
# the dtype of the underlying object into account
@td.skip_array_manager_not_yet_implemented
def test_loc_setitem_empty_frame(self):
# GH#6252 setting with an empty frame
keys1 = ["@" + str(i) for i in range(5)]
val1 = np.arange(5, dtype="int64")
keys2 = ["@" + str(i) for i in range(4)]
val2 = np.arange(4, dtype="int64")
index = list(set(keys1).union(keys2))
df = DataFrame(index=index)
df["A"] = np.nan
df.loc[keys1, "A"] = val1
df["B"] = np.nan
df.loc[keys2, "B"] = val2
# Because df["A"] was initialized as float64, setting values into it
# is inplace, so that dtype is retained
sera = Series(val1, index=keys1, dtype=np.float64)
serb = Series(val2, index=keys2)
expected = DataFrame({"A": sera, "B": serb}).reindex(index=index)
tm.assert_frame_equal(df, expected)
def test_loc_setitem_frame(self):
df = DataFrame(np.random.randn(4, 4), index=list("abcd"), columns=list("ABCD"))
result = df.iloc[0, 0]
df.loc["a", "A"] = 1
result = df.loc["a", "A"]
assert result == 1
result = df.iloc[0, 0]
assert result == 1
df.loc[:, "B":"D"] = 0
expected = df.loc[:, "B":"D"]
result = df.iloc[:, 1:]
tm.assert_frame_equal(result, expected)
def test_loc_setitem_frame_nan_int_coercion_invalid(self):
# GH 8669
# invalid coercion of nan -> int
df = DataFrame({"A": [1, 2, 3], "B": np.nan})
df.loc[df.B > df.A, "B"] = df.A
expected = DataFrame({"A": [1, 2, 3], "B": np.nan})
tm.assert_frame_equal(df, expected)
def test_loc_setitem_frame_mixed_labels(self):
# GH 6546
# setting with mixed labels
df = DataFrame({1: [1, 2], 2: [3, 4], "a": ["a", "b"]})
result = df.loc[0, [1, 2]]
expected = Series(
[1, 3], index=Index([1, 2], dtype=object), dtype=object, name=0
)
tm.assert_series_equal(result, expected)
expected = DataFrame({1: [5, 2], 2: [6, 4], "a": ["a", "b"]})
df.loc[0, [1, 2]] = [5, 6]
tm.assert_frame_equal(df, expected)
def test_loc_setitem_frame_multiples(self):
# multiple setting
df = DataFrame(
{"A": ["foo", "bar", "baz"], "B": Series(range(3), dtype=np.int64)}
)
rhs = df.loc[1:2]
rhs.index = df.index[0:2]
df.loc[0:1] = rhs
expected = DataFrame(
{"A": ["bar", "baz", "baz"], "B": Series([1, 2, 2], dtype=np.int64)}
)
tm.assert_frame_equal(df, expected)
# multiple setting with frame on rhs (with M8)
df = DataFrame(
{
"date": date_range("2000-01-01", "2000-01-5"),
"val": Series(range(5), dtype=np.int64),
}
)
expected = DataFrame(
{
"date": [
Timestamp("20000101"),
Timestamp("20000102"),
Timestamp("20000101"),
Timestamp("20000102"),
Timestamp("20000103"),
],
"val": Series([0, 1, 0, 1, 2], dtype=np.int64),
}
)
rhs = df.loc[0:2]
rhs.index = df.index[2:5]
df.loc[2:4] = rhs
tm.assert_frame_equal(df, expected)
@pytest.mark.parametrize(
"indexer", [["A"], slice(None, "A", None), np.array(["A"])]
)
@pytest.mark.parametrize("value", [["Z"], np.array(["Z"])])
def test_loc_setitem_with_scalar_index(self, indexer, value):
# GH #19474
# assigning like "df.loc[0, ['A']] = ['Z']" should be evaluated
# elementwisely, not using "setter('A', ['Z'])".
df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
df.loc[0, indexer] = value
result = df.loc[0, "A"]
assert is_scalar(result) and result == "Z"
@pytest.mark.parametrize(
"index,box,expected",
[
(
([0, 2], ["A", "B", "C", "D"]),
7,
DataFrame(
[[7, 7, 7, 7], [3, 4, np.nan, np.nan], [7, 7, 7, 7]],
columns=["A", "B", "C", "D"],
),
),
(
(1, ["C", "D"]),
[7, 8],
DataFrame(
[[1, 2, np.nan, np.nan], [3, 4, 7, 8], [5, 6, np.nan, np.nan]],
columns=["A", "B", "C", "D"],
),
),
(
(1, ["A", "B", "C"]),
np.array([7, 8, 9], dtype=np.int64),
DataFrame(
[[1, 2, np.nan], [7, 8, 9], [5, 6, np.nan]], columns=["A", "B", "C"]
),
),
(
(slice(1, 3, None), ["B", "C", "D"]),
[[7, 8, 9], [10, 11, 12]],
DataFrame(
[[1, 2, np.nan, np.nan], [3, 7, 8, 9], [5, 10, 11, 12]],
columns=["A", "B", "C", "D"],
),
),
(
(slice(1, 3, None), ["C", "A", "D"]),
np.array([[7, 8, 9], [10, 11, 12]], dtype=np.int64),
DataFrame(
[[1, 2, np.nan, np.nan], [8, 4, 7, 9], [11, 6, 10, 12]],
columns=["A", "B", "C", "D"],
),
),
(
(slice(None, None, None), ["A", "C"]),
DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]),
DataFrame(
[[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"]
),
),
],
)
def test_loc_setitem_missing_columns(self, index, box, expected):
# GH 29334
df = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
df.loc[index] = box
tm.assert_frame_equal(df, expected)
def test_loc_coercion(self):
# GH#12411
df = DataFrame({"date": [Timestamp("20130101").tz_localize("UTC"), pd.NaT]})
expected = df.dtypes
result = df.iloc[[0]]
tm.assert_series_equal(result.dtypes, expected)
result = df.iloc[[1]]
tm.assert_series_equal(result.dtypes, expected)
def test_loc_coercion2(self):
# GH#12045
import datetime
df = DataFrame(
{"date": [datetime.datetime(2012, 1, 1), datetime.datetime(1012, 1, 2)]}
)
expected = df.dtypes
result = df.iloc[[0]]
tm.assert_series_equal(result.dtypes, expected)
result = df.iloc[[1]]
tm.assert_series_equal(result.dtypes, expected)
def test_loc_coercion3(self):
# GH#11594
df = DataFrame({"text": ["some words"] + [None] * 9})
expected = df.dtypes
result = df.iloc[0:2]
tm.assert_series_equal(result.dtypes, expected)
result = df.iloc[3:]
tm.assert_series_equal(result.dtypes, expected)
def test_setitem_new_key_tz(self, indexer_sl):
# GH#12862 should not raise on assigning the second value
vals = [
to_datetime(42).tz_localize("UTC"),
to_datetime(666).tz_localize("UTC"),
]
expected = Series(vals, index=["foo", "bar"])
ser = Series(dtype=object)
indexer_sl(ser)["foo"] = vals[0]
indexer_sl(ser)["bar"] = vals[1]
tm.assert_series_equal(ser, expected)
def test_loc_non_unique(self):
# GH3659
# non-unique indexer with loc slice
# https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs
# these are going to raise because the we are non monotonic
df = DataFrame(
{"A": [1, 2, 3, 4, 5, 6], "B": [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3]
)
msg = "'Cannot get left slice bound for non-unique label: 1'"
with pytest.raises(KeyError, match=msg):
df.loc[1:]
msg = "'Cannot get left slice bound for non-unique label: 0'"
with pytest.raises(KeyError, match=msg):
df.loc[0:]
msg = "'Cannot get left slice bound for non-unique label: 1'"
with pytest.raises(KeyError, match=msg):
df.loc[1:2]
# monotonic are ok
df = DataFrame(
{"A": [1, 2, 3, 4, 5, 6], "B": [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3]
).sort_index(axis=0)
result = df.loc[1:]
expected = DataFrame({"A": [2, 4, 5, 6], "B": [4, 6, 7, 8]}, index=[1, 1, 2, 3])
tm.assert_frame_equal(result, expected)
result = df.loc[0:]
tm.assert_frame_equal(result, df)
result = df.loc[1:2]
expected = DataFrame({"A": [2, 4, 5], "B": [4, 6, 7]}, index=[1, 1, 2])
tm.assert_frame_equal(result, expected)
@pytest.mark.arm_slow
def test_loc_non_unique_memory_error(self):
# GH 4280
# non_unique index with a large selection triggers a memory error
columns = list("ABCDEFG")
def gen_test(length, l2):
return pd.concat(
[
DataFrame(
np.random.randn(length, len(columns)),
index=np.arange(length),
columns=columns,
),
DataFrame(
np.ones((l2, len(columns))), index=[0] * l2, columns=columns
),
]
)
def gen_expected(df, mask):
len_mask = len(mask)
return pd.concat(
[
df.take([0]),
DataFrame(
np.ones((len_mask, len(columns))),
index=[0] * len_mask,
columns=columns,
),
df.take(mask[1:]),
]
)
df = gen_test(900, 100)
assert df.index.is_unique is False
mask = np.arange(100)
result = df.loc[mask]
expected = gen_expected(df, mask)
tm.assert_frame_equal(result, expected)
df = gen_test(900000, 100000)
assert df.index.is_unique is False
mask = np.arange(100000)
result = df.loc[mask]
expected = gen_expected(df, mask)
tm.assert_frame_equal(result, expected)
def test_loc_name(self):
# GH 3880
df = DataFrame([[1, 1], [1, 1]])
df.index.name = "index_name"
result = df.iloc[[0, 1]].index.name
assert result == "index_name"
result = df.loc[[0, 1]].index.name
assert result == "index_name"
def test_loc_empty_list_indexer_is_ok(self):
df = tm.makeCustomDataframe(5, 2)
# vertical empty
tm.assert_frame_equal(
df.loc[:, []], df.iloc[:, :0], check_index_type=True, check_column_type=True
)
# horizontal empty
tm.assert_frame_equal(
df.loc[[], :], df.iloc[:0, :], check_index_type=True, check_column_type=True
)
# horizontal empty
tm.assert_frame_equal(
df.loc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True
)
def test_identity_slice_returns_new_object(self, using_array_manager, request):
# GH13873
if using_array_manager:
mark = pytest.mark.xfail(
reason="setting with .loc[:, 'a'] does not alter inplace"
)
request.node.add_marker(mark)
original_df = DataFrame({"a": [1, 2, 3]})
sliced_df = original_df.loc[:]
assert sliced_df is not original_df
assert original_df[:] is not original_df
# should be a shallow copy
assert np.shares_memory(original_df["a"]._values, sliced_df["a"]._values)
# Setting using .loc[:, "a"] sets inplace so alters both sliced and orig
original_df.loc[:, "a"] = [4, 4, 4]
assert (sliced_df["a"] == 4).all()
# These should not return copies
assert original_df is original_df.loc[:, :]
df = DataFrame(np.random.randn(10, 4))
assert df[0] is df.loc[:, 0]
# Same tests for Series
original_series = Series([1, 2, 3, 4, 5, 6])
sliced_series = original_series.loc[:]
assert sliced_series is not original_series
assert original_series[:] is not original_series
original_series[:3] = [7, 8, 9]
assert all(sliced_series[:3] == [7, 8, 9])
@pytest.mark.xfail(reason="accidental fix reverted - GH37497")
def test_loc_copy_vs_view(self):
# GH 15631
x = DataFrame(zip(range(3), range(3)), columns=["a", "b"])
y = x.copy()
q = y.loc[:, "a"]
q += 2
tm.assert_frame_equal(x, y)
z = x.copy()
q = z.loc[x.index, "a"]
q += 2
tm.assert_frame_equal(x, z)
def test_loc_uint64(self):
# GH20722
# Test whether loc accept uint64 max value as index.
umax = np.iinfo("uint64").max
ser = Series([1, 2], index=[umax - 1, umax])
result = ser.loc[umax - 1]
expected = ser.iloc[0]
assert result == expected
result = ser.loc[[umax - 1]]
expected = ser.iloc[[0]]
tm.assert_series_equal(result, expected)
result = ser.loc[[umax - 1, umax]]
tm.assert_series_equal(result, ser)
def test_loc_uint64_disallow_negative(self):
# GH#41775
umax = np.iinfo("uint64").max
ser = Series([1, 2], index=[umax - 1, umax])
with pytest.raises(KeyError, match="-1"):
# don't wrap around
ser.loc[-1]
with pytest.raises(KeyError, match="-1"):
# don't wrap around
ser.loc[[-1]]
def test_loc_setitem_empty_append_expands_rows(self):
# GH6173, various appends to an empty dataframe
data = [1, 2, 3]
expected = DataFrame({"x": data, "y": [None] * len(data)})
# appends to fit length of data
df = DataFrame(columns=["x", "y"])
df.loc[:, "x"] = data
tm.assert_frame_equal(df, expected)
def test_loc_setitem_empty_append_expands_rows_mixed_dtype(self):
# GH#37932 same as test_loc_setitem_empty_append_expands_rows
# but with mixed dtype so we go through take_split_path
data = [1, 2, 3]
expected = DataFrame({"x": data, "y": [None] * len(data)})
df = DataFrame(columns=["x", "y"])
df["x"] = df["x"].astype(np.int64)
df.loc[:, "x"] = data
tm.assert_frame_equal(df, expected)
def test_loc_setitem_empty_append_single_value(self):
# only appends one value
expected = DataFrame({"x": [1.0], "y": [np.nan]})
df = DataFrame(columns=["x", "y"], dtype=float)
df.loc[0, "x"] = expected.loc[0, "x"]
tm.assert_frame_equal(df, expected)
def test_loc_setitem_empty_append_raises(self):
# GH6173, various appends to an empty dataframe
data = [1, 2]
df = DataFrame(columns=["x", "y"])
df.index = df.index.astype(np.int64)
msg = (
r"None of \[Int64Index\(\[0, 1\], dtype='int64'\)\] "
r"are in the \[index\]"
)
with pytest.raises(KeyError, match=msg):
df.loc[[0, 1], "x"] = data
msg = "|".join(
[
"cannot copy sequence with size 2 to array axis with dimension 0",
r"could not broadcast input array from shape \(2,\) into shape \(0,\)",
"Must have equal len keys and value when setting with an iterable",
]
)
with pytest.raises(ValueError, match=msg):
df.loc[0:2, "x"] = data
def test_indexing_zerodim_np_array(self):
# GH24924
df = DataFrame([[1, 2], [3, 4]])
result = df.loc[np.array(0)]
s = Series([1, 2], name=0)
tm.assert_series_equal(result, s)
def test_series_indexing_zerodim_np_array(self):
# GH24924
s = Series([1, 2])
result = s.loc[np.array(0)]
assert result == 1
def test_loc_reverse_assignment(self):
# GH26939
data = [1, 2, 3, 4, 5, 6] + [None] * 4
expected = Series(data, index=range(2010, 2020))
result = Series(index=range(2010, 2020), dtype=np.float64)
result.loc[2015:2010:-1] = [6, 5, 4, 3, 2, 1]
tm.assert_series_equal(result, expected)
def test_loc_setitem_str_to_small_float_conversion_type(self):
# GH#20388
np.random.seed(13)
col_data = [str(np.random.random() * 1e-12) for _ in range(5)]
result = DataFrame(col_data, columns=["A"])
expected = DataFrame(col_data, columns=["A"], dtype=object)
tm.assert_frame_equal(result, expected)
# assigning with loc/iloc attempts to set the values inplace, which
# in this case is successful
result.loc[result.index, "A"] = [float(x) for x in col_data]
expected = DataFrame(col_data, columns=["A"], dtype=float).astype(object)
tm.assert_frame_equal(result, expected)
# assigning the entire column using __setitem__ swaps in the new array
# GH#???
result["A"] = [float(x) for x in col_data]
expected = DataFrame(col_data, columns=["A"], dtype=float)
tm.assert_frame_equal(result, expected)
def test_loc_getitem_time_object(self, frame_or_series):
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
mask = (rng.hour == 9) & (rng.minute == 30)
obj = DataFrame(np.random.randn(len(rng), 3), index=rng)
obj = tm.get_obj(obj, frame_or_series)
result = obj.loc[time(9, 30)]
exp = obj.loc[mask]
tm.assert_equal(result, exp)
chunk = obj.loc["1/4/2000":]
result = chunk.loc[time(9, 30)]
expected = result[-1:]
# Without resetting the freqs, these are 5 min and 1440 min, respectively
result.index = result.index._with_freq(None)
expected.index = expected.index._with_freq(None)
tm.assert_equal(result, expected)
@pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"])
@pytest.mark.parametrize("dtype", [np.int64, np.float64, complex])
@td.skip_if_no_scipy
def test_loc_getitem_range_from_spmatrix(self, spmatrix_t, dtype):
import scipy.sparse
spmatrix_t = getattr(scipy.sparse, spmatrix_t)
# The bug is triggered by a sparse matrix with purely sparse columns. So the
# recipe below generates a rectangular matrix of dimension (5, 7) where all the
# diagonal cells are ones, meaning the last two columns are purely sparse.
rows, cols = 5, 7
spmatrix = spmatrix_t(np.eye(rows, cols, dtype=dtype), dtype=dtype)
df = DataFrame.sparse.from_spmatrix(spmatrix)
# regression test for GH#34526
itr_idx = range(2, rows)
result = df.loc[itr_idx].values
expected = spmatrix.toarray()[itr_idx]
tm.assert_numpy_array_equal(result, expected)
# regression test for GH#34540
result = df.loc[itr_idx].dtypes.values
expected = np.full(cols, SparseDtype(dtype, fill_value=0))
tm.assert_numpy_array_equal(result, expected)
def test_loc_getitem_listlike_all_retains_sparse(self):
df = DataFrame({"A": pd.array([0, 0], dtype=SparseDtype("int64"))})
result = df.loc[[0, 1]]
tm.assert_frame_equal(result, df)
@td.skip_if_no_scipy
def test_loc_getitem_sparse_frame(self):
# GH34687
from scipy.sparse import eye
df = DataFrame.sparse.from_spmatrix(eye(5))
result = df.loc[range(2)]
expected = DataFrame(
[[1.0, 0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0]],
dtype=SparseDtype("float64", 0.0),
)
tm.assert_frame_equal(result, expected)
result = df.loc[range(2)].loc[range(1)]
expected = DataFrame(
[[1.0, 0.0, 0.0, 0.0, 0.0]], dtype=SparseDtype("float64", 0.0)
)
tm.assert_frame_equal(result, expected)
def test_loc_getitem_sparse_series(self):
# GH34687
s = Series([1.0, 0.0, 0.0, 0.0, 0.0], dtype=SparseDtype("float64", 0.0))
result = s.loc[range(2)]
expected = Series([1.0, 0.0], dtype=SparseDtype("float64", 0.0))
tm.assert_series_equal(result, expected)
result = s.loc[range(3)].loc[range(2)]
expected = Series([1.0, 0.0], dtype=SparseDtype("float64", 0.0))
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("key_type", [iter, np.array, Series, Index])
def test_loc_getitem_iterable(self, float_frame, key_type):
idx = key_type(["A", "B", "C"])
result = float_frame.loc[:, idx]
expected = float_frame.loc[:, ["A", "B", "C"]]
tm.assert_frame_equal(result, expected)
def test_loc_getitem_timedelta_0seconds(self):
# GH#10583
df = DataFrame(np.random.normal(size=(10, 4)))
df.index = timedelta_range(start="0s", periods=10, freq="s")
expected = df.loc[Timedelta("0s") :, :]
result = df.loc["0s":, :]
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"val,expected", [(2**63 - 1, Series([1])), (2**63, Series([2]))]
)
def test_loc_getitem_uint64_scalar(self, val, expected):
# see GH#19399
df = DataFrame([1, 2], index=[2**63 - 1, 2**63])
result = df.loc[val]
expected.name = val
tm.assert_series_equal(result, expected)
def test_loc_setitem_int_label_with_float64index(self):
# note labels are floats
ser = Series(["a", "b", "c"], index=[0, 0.5, 1])
expected = ser.copy()
ser.loc[1] = "zoo"
expected.iloc[2] = "zoo"
tm.assert_series_equal(ser, expected)
@pytest.mark.parametrize(
"indexer, expected",
[
# The test name is a misnomer in the 0 case as df.index[indexer]
# is a scalar.
(0, [20, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
(slice(4, 8), [0, 1, 2, 3, 20, 20, 20, 20, 8, 9]),
([3, 5], [0, 1, 2, 20, 4, 20, 6, 7, 8, 9]),
],
)
def test_loc_setitem_listlike_with_timedelta64index(self, indexer, expected):
# GH#16637
tdi = to_timedelta(range(10), unit="s")
df = DataFrame({"x": range(10)}, dtype="int64", index=tdi)
df.loc[df.index[indexer], "x"] = 20
expected = DataFrame(
expected,
index=tdi,
columns=["x"],
dtype="int64",
)
tm.assert_frame_equal(expected, df)
def test_loc_setitem_categorical_values_partial_column_slice(self):
# Assigning a Category to parts of a int/... column uses the values of
# the Categorical
df = DataFrame({"a": [1, 1, 1, 1, 1], "b": list("aaaaa")})
exp = DataFrame({"a": [1, "b", "b", 1, 1], "b": list("aabba")})
df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"])
df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"])
tm.assert_frame_equal(df, exp)
def test_loc_setitem_single_row_categorical(self):
# GH#25495
df = DataFrame({"Alpha": ["a"], "Numeric": [0]})
categories = Categorical(df["Alpha"], categories=["a", "b", "c"])
df.loc[:, "Alpha"] = categories
result = df["Alpha"]
expected = Series(categories, index=df.index, name="Alpha")
tm.assert_series_equal(result, expected)
def test_loc_setitem_datetime_coercion(self):
# GH#1048
df = DataFrame({"c": [Timestamp("2010-10-01")] * 3})
df.loc[0:1, "c"] = np.datetime64("2008-08-08")
assert Timestamp("2008-08-08") == df.loc[0, "c"]
assert Timestamp("2008-08-08") == df.loc[1, "c"]
df.loc[2, "c"] = date(2005, 5, 5)
with tm.assert_produces_warning(FutureWarning):
# Comparing Timestamp to date obj is deprecated
assert Timestamp("2005-05-05") == df.loc[2, "c"]
assert Timestamp("2005-05-05").date() == df.loc[2, "c"]
@pytest.mark.parametrize("idxer", ["var", ["var"]])
def test_loc_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture):
# GH#11365
tz = tz_naive_fixture
idx = date_range(start="2015-07-12", periods=3, freq="H", tz=tz)
expected = DataFrame(1.2, index=idx, columns=["var"])
# if result started off with object dtype, then the .loc.__setitem__
# below would retain object dtype
result = DataFrame(index=idx, columns=["var"], dtype=np.float64)
result.loc[:, idxer] = expected
tm.assert_frame_equal(result, expected)
def test_loc_setitem_time_key(self, using_array_manager):
index = date_range("2012-01-01", "2012-01-05", freq="30min")
df = DataFrame(np.random.randn(len(index), 5), index=index)
akey = time(12, 0, 0)
bkey = slice(time(13, 0, 0), time(14, 0, 0))
ainds = [24, 72, 120, 168]
binds = [26, 27, 28, 74, 75, 76, 122, 123, 124, 170, 171, 172]
result = df.copy()
result.loc[akey] = 0
result = result.loc[akey]
expected = df.loc[akey].copy()
expected.loc[:] = 0
if using_array_manager:
# TODO(ArrayManager) we are still overwriting columns
expected = expected.astype(float)
tm.assert_frame_equal(result, expected)
result = df.copy()
result.loc[akey] = 0
result.loc[akey] = df.iloc[ainds]
tm.assert_frame_equal(result, df)
result = df.copy()
result.loc[bkey] = 0
result = result.loc[bkey]
expected = df.loc[bkey].copy()
expected.loc[:] = 0
if using_array_manager:
# TODO(ArrayManager) we are still overwriting columns
expected = expected.astype(float)
tm.assert_frame_equal(result, expected)
result = df.copy()
result.loc[bkey] = 0
result.loc[bkey] = df.iloc[binds]
tm.assert_frame_equal(result, df)
@pytest.mark.parametrize("key", ["A", ["A"], ("A", slice(None))])
def test_loc_setitem_unsorted_multiindex_columns(self, key):
# GH#38601
mi = MultiIndex.from_tuples([("A", 4), ("B", "3"), ("A", "2")])
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=mi)
obj = df.copy()
obj.loc[:, key] = np.zeros((2, 2), dtype=int)
expected = DataFrame([[0, 2, 0], [0, 5, 0]], columns=mi)
tm.assert_frame_equal(obj, expected)
df = df.sort_index(axis=1)
df.loc[:, key] = np.zeros((2, 2), dtype=int)
expected = expected.sort_index(axis=1)
tm.assert_frame_equal(df, expected)
def test_loc_setitem_uint_drop(self, any_int_numpy_dtype):
# see GH#18311
# assigning series.loc[0] = 4 changed series.dtype to int
series = Series([1, 2, 3], dtype=any_int_numpy_dtype)
series.loc[0] = 4
expected = Series([4, 2, 3], dtype=any_int_numpy_dtype)
tm.assert_series_equal(series, expected)
def test_loc_setitem_td64_non_nano(self):
# GH#14155
ser = Series(10 * [np.timedelta64(10, "m")])
ser.loc[[1, 2, 3]] = np.timedelta64(20, "m")
expected = Series(10 * [np.timedelta64(10, "m")])
expected.loc[[1, 2, 3]] = Timedelta(np.timedelta64(20, "m"))
tm.assert_series_equal(ser, expected)
def test_loc_setitem_2d_to_1d_raises(self):
data = np.random.randn(2, 2)
ser = Series(range(2))
msg = "|".join(
[
r"shape mismatch: value array of shape \(2,2\)",
r"cannot reshape array of size 4 into shape \(2,\)",
]
)
with pytest.raises(ValueError, match=msg):
ser.loc[range(2)] = data
msg = r"could not broadcast input array from shape \(2,2\) into shape \(2,?\)"
with pytest.raises(ValueError, match=msg):
ser.loc[:] = data
def test_loc_getitem_interval_index(self):
# GH#19977
index = pd.interval_range(start=0, periods=3)
df = DataFrame(
[[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=index, columns=["A", "B", "C"]
)
expected = 1
result = df.loc[0.5, "A"]
tm.assert_almost_equal(result, expected)
def test_loc_getitem_interval_index2(self):
# GH#19977
index = pd.interval_range(start=0, periods=3, closed="both")
df = DataFrame(
[[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=index, columns=["A", "B", "C"]
)
index_exp = pd.interval_range(start=0, periods=2, freq=1, closed="both")
expected = Series([1, 4], index=index_exp, name="A")
result = df.loc[1, "A"]
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("tpl", [(1,), (1, 2)])
def test_loc_getitem_index_single_double_tuples(self, tpl):
# GH#20991
idx = Index(
[(1,), (1, 2)],
name="A",
tupleize_cols=False,
)
df = DataFrame(index=idx)
result = df.loc[[tpl]]
idx = Index([tpl], name="A", tupleize_cols=False)
expected = DataFrame(index=idx)
tm.assert_frame_equal(result, expected)
def test_loc_getitem_index_namedtuple(self):
IndexType = namedtuple("IndexType", ["a", "b"])
idx1 = IndexType("foo", "bar")
idx2 = IndexType("baz", "bof")
index = Index([idx1, idx2], name="composite_index", tupleize_cols=False)
df = DataFrame([(1, 2), (3, 4)], index=index, columns=["A", "B"])
result = df.loc[IndexType("foo", "bar")]["A"]
assert result == 1
def test_loc_setitem_single_column_mixed(self):
df = DataFrame(
np.random.randn(5, 3),
index=["a", "b", "c", "d", "e"],
columns=["foo", "bar", "baz"],
)
df["str"] = "qux"
df.loc[df.index[::2], "str"] = np.nan
expected = np.array([np.nan, "qux", np.nan, "qux", np.nan], dtype=object)
tm.assert_almost_equal(df["str"].values, expected)
def test_loc_setitem_cast2(self):
# GH#7704
# dtype conversion on setting
df = DataFrame(np.random.rand(30, 3), columns=tuple("ABC"))
df["event"] = np.nan
df.loc[10, "event"] = "foo"
result = df.dtypes
expected = Series(
[np.dtype("float64")] * 3 + [np.dtype("object")],
index=["A", "B", "C", "event"],
)
tm.assert_series_equal(result, expected)
def test_loc_setitem_cast3(self):
# Test that data type is preserved . GH#5782
df = DataFrame({"one": np.arange(6, dtype=np.int8)})
df.loc[1, "one"] = 6
assert df.dtypes.one == np.dtype(np.int8)
df.one = np.int8(7)
assert df.dtypes.one == np.dtype(np.int8)
class TestLocWithEllipsis:
@pytest.fixture(params=[tm.loc, tm.iloc])
def indexer(self, request):
# Test iloc while we're here
return request.param
@pytest.fixture
def obj(self, series_with_simple_index, frame_or_series):
obj = series_with_simple_index
if frame_or_series is not Series:
obj = obj.to_frame()
return obj
def test_loc_iloc_getitem_ellipsis(self, obj, indexer):
result = indexer(obj)[...]
tm.assert_equal(result, obj)
def test_loc_iloc_getitem_leading_ellipses(self, series_with_simple_index, indexer):
obj = series_with_simple_index
key = 0 if (indexer is tm.iloc or len(obj) == 0) else obj.index[0]
if indexer is tm.loc and obj.index.is_boolean():
# passing [False] will get interpreted as a boolean mask
# TODO: should it? unambiguous when lengths dont match?
return
if indexer is tm.loc and isinstance(obj.index, MultiIndex):
msg = "MultiIndex does not support indexing with Ellipsis"
with pytest.raises(NotImplementedError, match=msg):
result = indexer(obj)[..., [key]]
elif len(obj) != 0:
result = indexer(obj)[..., [key]]
expected = indexer(obj)[[key]]
tm.assert_series_equal(result, expected)
key2 = 0 if indexer is tm.iloc else obj.name
df = obj.to_frame()
result = indexer(df)[..., [key2]]
expected = indexer(df)[:, [key2]]
tm.assert_frame_equal(result, expected)
def test_loc_iloc_getitem_ellipses_only_one_ellipsis(self, obj, indexer):
# GH37750
key = 0 if (indexer is tm.iloc or len(obj) == 0) else obj.index[0]
with pytest.raises(IndexingError, match=_one_ellipsis_message):
indexer(obj)[..., ...]
with pytest.raises(IndexingError, match=_one_ellipsis_message):
indexer(obj)[..., [key], ...]
with pytest.raises(IndexingError, match=_one_ellipsis_message):
indexer(obj)[..., ..., key]
# one_ellipsis_message takes precedence over "Too many indexers"
# only when the first key is Ellipsis
with pytest.raises(IndexingError, match="Too many indexers"):
indexer(obj)[key, ..., ...]
class TestLocWithMultiIndex:
@pytest.mark.parametrize(
"keys, expected",
[
(["b", "a"], [["b", "b", "a", "a"], [1, 2, 1, 2]]),
(["a", "b"], [["a", "a", "b", "b"], [1, 2, 1, 2]]),
((["a", "b"], [1, 2]), [["a", "a", "b", "b"], [1, 2, 1, 2]]),
((["a", "b"], [2, 1]), [["a", "a", "b", "b"], [2, 1, 2, 1]]),
((["b", "a"], [2, 1]), [["b", "b", "a", "a"], [2, 1, 2, 1]]),
((["b", "a"], [1, 2]), [["b", "b", "a", "a"], [1, 2, 1, 2]]),
((["c", "a"], [2, 1]), [["c", "a", "a"], [1, 2, 1]]),
],
)
@pytest.mark.parametrize("dim", ["index", "columns"])
def test_loc_getitem_multilevel_index_order(self, dim, keys, expected):
# GH#22797
# Try to respect order of keys given for MultiIndex.loc
kwargs = {dim: [["c", "a", "a", "b", "b"], [1, 1, 2, 1, 2]]}
df = DataFrame(np.arange(25).reshape(5, 5), **kwargs)
exp_index = MultiIndex.from_arrays(expected)
if dim == "index":
res = df.loc[keys, :]
tm.assert_index_equal(res.index, exp_index)
elif dim == "columns":
res = df.loc[:, keys]
tm.assert_index_equal(res.columns, exp_index)
def test_loc_preserve_names(self, multiindex_year_month_day_dataframe_random_data):
ymd = multiindex_year_month_day_dataframe_random_data
result = ymd.loc[2000]
result2 = ymd["A"].loc[2000]
assert result.index.names == ymd.index.names[1:]
assert result2.index.names == ymd.index.names[1:]
result = ymd.loc[2000, 2]
result2 = ymd["A"].loc[2000, 2]
assert result.index.name == ymd.index.names[2]
assert result2.index.name == ymd.index.names[2]
def test_loc_getitem_multiindex_nonunique_len_zero(self):
# GH#13691
mi = MultiIndex.from_product([[0], [1, 1]])
ser = Series(0, index=mi)
res = ser.loc[[]]
expected = ser[:0]
tm.assert_series_equal(res, expected)
res2 = ser.loc[ser.iloc[0:0]]
tm.assert_series_equal(res2, expected)
def test_loc_getitem_access_none_value_in_multiindex(self):
# GH#34318: test that you can access a None value using .loc
# through a Multiindex
ser = Series([None], MultiIndex.from_arrays([["Level1"], ["Level2"]]))
result = ser.loc[("Level1", "Level2")]
assert result is None
midx = MultiIndex.from_product([["Level1"], ["Level2_a", "Level2_b"]])
ser = Series([None] * len(midx), dtype=object, index=midx)
result = ser.loc[("Level1", "Level2_a")]
assert result is None
ser = Series([1] * len(midx), dtype=object, index=midx)
result = ser.loc[("Level1", "Level2_a")]
assert result == 1
def test_loc_setitem_multiindex_slice(self):
# GH 34870
index = MultiIndex.from_tuples(
zip(
["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
["one", "two", "one", "two", "one", "two", "one", "two"],
),
names=["first", "second"],
)
result = Series([1, 1, 1, 1, 1, 1, 1, 1], index=index)
result.loc[("baz", "one"):("foo", "two")] = 100
expected = Series([1, 1, 100, 100, 100, 100, 1, 1], index=index)
tm.assert_series_equal(result, expected)
def test_loc_getitem_slice_datetime_objs_with_datetimeindex(self):
times = date_range("2000-01-01", freq="10min", periods=100000)
ser = Series(range(100000), times)
result = ser.loc[datetime(1900, 1, 1) : datetime(2100, 1, 1)]
tm.assert_series_equal(result, ser)
def test_loc_getitem_datetime_string_with_datetimeindex(self):
# GH 16710
df = DataFrame(
{"a": range(10), "b": range(10)},
index=date_range("2010-01-01", "2010-01-10"),
)
result = df.loc[["2010-01-01", "2010-01-05"], ["a", "b"]]
expected = DataFrame(
{"a": [0, 4], "b": [0, 4]},
index=DatetimeIndex(["2010-01-01", "2010-01-05"]),
)
tm.assert_frame_equal(result, expected)
def test_loc_getitem_sorted_index_level_with_duplicates(self):
# GH#4516 sorting a MultiIndex with duplicates and multiple dtypes
mi = MultiIndex.from_tuples(
[
("foo", "bar"),
("foo", "bar"),
("bah", "bam"),
("bah", "bam"),
("foo", "bar"),
("bah", "bam"),
],
names=["A", "B"],
)
df = DataFrame(
[
[1.0, 1],
[2.0, 2],
[3.0, 3],
[4.0, 4],
[5.0, 5],
[6.0, 6],
],
index=mi,
columns=["C", "D"],
)
df = df.sort_index(level=0)
expected = DataFrame(
[[1.0, 1], [2.0, 2], [5.0, 5]], columns=["C", "D"], index=mi.take([0, 1, 4])
)
result = df.loc[("foo", "bar")]
tm.assert_frame_equal(result, expected)
def test_loc_getitem_preserves_index_level_category_dtype(self):
# GH#15166
df = DataFrame(
data=np.arange(2, 22, 2),
index=MultiIndex(
levels=[CategoricalIndex(["a", "b"]), range(10)],
codes=[[0] * 5 + [1] * 5, range(10)],
names=["Index1", "Index2"],
),
)
expected = CategoricalIndex(
["a", "b"],
categories=["a", "b"],
ordered=False,
name="Index1",
dtype="category",
)
result = df.index.levels[0]
tm.assert_index_equal(result, expected)
result = df.loc[["a"]].index.levels[0]
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("lt_value", [30, 10])
def test_loc_multiindex_levels_contain_values_not_in_index_anymore(self, lt_value):
# GH#41170
df = DataFrame({"a": [12, 23, 34, 45]}, index=[list("aabb"), [0, 1, 2, 3]])
with pytest.raises(KeyError, match=r"\['b'\] not in index"):
df.loc[df["a"] < lt_value, :].loc[["b"], :]
def test_loc_multiindex_null_slice_na_level(self):
# GH#42055
lev1 = np.array([np.nan, np.nan])
lev2 = ["bar", "baz"]
mi = MultiIndex.from_arrays([lev1, lev2])
ser = Series([0, 1], index=mi)
result = ser.loc[:, "bar"]
# TODO: should we have name="bar"?
expected = Series([0], index=[np.nan])
tm.assert_series_equal(result, expected)
def test_loc_drops_level(self):
# Based on test_series_varied_multiindex_alignment, where
# this used to fail to drop the first level
mi = MultiIndex.from_product(
[list("ab"), list("xy"), [1, 2]], names=["ab", "xy", "num"]
)
ser = Series(range(8), index=mi)
loc_result = ser.loc["a", :, :]
expected = ser.index.droplevel(0)[:4]
tm.assert_index_equal(loc_result.index, expected)
class TestLocSetitemWithExpansion:
@pytest.mark.slow
def test_loc_setitem_with_expansion_large_dataframe(self):
# GH#10692
result = DataFrame({"x": range(10**6)}, dtype="int64")
result.loc[len(result)] = len(result) + 1
expected = DataFrame({"x": range(10**6 + 1)}, dtype="int64")
tm.assert_frame_equal(result, expected)
def test_loc_setitem_empty_series(self):
# GH#5226
# partially set with an empty object series
ser = Series(dtype=object)
ser.loc[1] = 1
tm.assert_series_equal(ser, Series([1], index=[1]))
ser.loc[3] = 3
tm.assert_series_equal(ser, Series([1, 3], index=[1, 3]))
ser = Series(dtype=object)
ser.loc[1] = 1.0
tm.assert_series_equal(ser, Series([1.0], index=[1]))
ser.loc[3] = 3.0
tm.assert_series_equal(ser, Series([1.0, 3.0], index=[1, 3]))
ser = Series(dtype=object)
ser.loc["foo"] = 1
tm.assert_series_equal(ser, Series([1], index=["foo"]))
ser.loc["bar"] = 3
tm.assert_series_equal(ser, Series([1, 3], index=["foo", "bar"]))
ser.loc[3] = 4
tm.assert_series_equal(ser, Series([1, 3, 4], index=["foo", "bar", 3]))
def test_loc_setitem_incremental_with_dst(self):
# GH#20724
base = datetime(2015, 11, 1, tzinfo=gettz("US/Pacific"))
idxs = [base + timedelta(seconds=i * 900) for i in range(16)]
result = Series([0], index=[idxs[0]])
for ts in idxs:
result.loc[ts] = 1
expected = Series(1, index=idxs)
tm.assert_series_equal(result, expected)
def test_loc_setitem_datetime_keys_cast(self):
# GH#9516
dt1 = Timestamp("20130101 09:00:00")
dt2 = Timestamp("20130101 10:00:00")
for conv in [
lambda x: x,
lambda x: x.to_datetime64(),
lambda x: x.to_pydatetime(),
lambda x: np.datetime64(x),
]:
df = DataFrame()
df.loc[conv(dt1), "one"] = 100
df.loc[conv(dt2), "one"] = 200
expected = DataFrame({"one": [100.0, 200.0]}, index=[dt1, dt2])
tm.assert_frame_equal(df, expected)
def test_loc_setitem_categorical_column_retains_dtype(self, ordered):
# GH16360
result = DataFrame({"A": [1]})
result.loc[:, "B"] = Categorical(["b"], ordered=ordered)
expected = DataFrame({"A": [1], "B": Categorical(["b"], ordered=ordered)})
tm.assert_frame_equal(result, expected)
def test_loc_setitem_with_expansion_and_existing_dst(self):
# GH#18308
start = Timestamp("2017-10-29 00:00:00+0200", tz="Europe/Madrid")
end = Timestamp("2017-10-29 03:00:00+0100", tz="Europe/Madrid")
ts = Timestamp("2016-10-10 03:00:00", tz="Europe/Madrid")
idx = date_range(start, end, inclusive="left", freq="H")
assert ts not in idx # i.e. result.loc setitem is with-expansion
result = DataFrame(index=idx, columns=["value"])
result.loc[ts, "value"] = 12
expected = DataFrame(
[np.nan] * len(idx) + [12],
index=idx.append(DatetimeIndex([ts])),
columns=["value"],
dtype=object,
)
tm.assert_frame_equal(result, expected)
def test_setitem_with_expansion(self):
# indexing - setting an element
df = DataFrame(
data=to_datetime(["2015-03-30 20:12:32", "2015-03-12 00:11:11"]),
columns=["time"],
)
df["new_col"] = ["new", "old"]
df.time = df.set_index("time").index.tz_localize("UTC")
v = df[df.new_col == "new"].set_index("time").index.tz_convert("US/Pacific")
# trying to set a single element on a part of a different timezone
# this converts to object
df2 = df.copy()
with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"):
df2.loc[df2.new_col == "new", "time"] = v
expected = Series([v[0], df.loc[1, "time"]], name="time")
tm.assert_series_equal(df2.time, expected)
v = df.loc[df.new_col == "new", "time"] + Timedelta("1s")
df.loc[df.new_col == "new", "time"] = v
tm.assert_series_equal(df.loc[df.new_col == "new", "time"], v)
def test_loc_setitem_with_expansion_inf_upcast_empty(self):
# Test with np.inf in columns
df = DataFrame()
df.loc[0, 0] = 1
df.loc[1, 1] = 2
df.loc[0, np.inf] = 3
result = df.columns
expected = Float64Index([0, 1, np.inf])
tm.assert_index_equal(result, expected)
@pytest.mark.filterwarnings("ignore:indexing past lexsort depth")
def test_loc_setitem_with_expansion_nonunique_index(self, index, request):
# GH#40096
if not len(index):
return
index = index.repeat(2) # ensure non-unique
N = len(index)
arr = np.arange(N).astype(np.int64)
orig = DataFrame(arr, index=index, columns=[0])
# key that will requiring object-dtype casting in the index
key = "kapow"
assert key not in index # otherwise test is invalid
# TODO: using a tuple key breaks here in many cases
exp_index = index.insert(len(index), key)
if isinstance(index, MultiIndex):
assert exp_index[-1][0] == key
else:
assert exp_index[-1] == key
exp_data = np.arange(N + 1).astype(np.float64)
expected = DataFrame(exp_data, index=exp_index, columns=[0])
# Add new row, but no new columns
df = orig.copy()
df.loc[key, 0] = N
tm.assert_frame_equal(df, expected)
# add new row on a Series
ser = orig.copy()[0]
ser.loc[key] = N
# the series machinery lets us preserve int dtype instead of float
expected = expected[0].astype(np.int64)
tm.assert_series_equal(ser, expected)
# add new row and new column
df = orig.copy()
df.loc[key, 1] = N
expected = DataFrame(
{0: list(arr) + [np.nan], 1: [np.nan] * N + [float(N)]},
index=exp_index,
)
tm.assert_frame_equal(df, expected)
@pytest.mark.parametrize(
"dtype", ["Int32", "Int64", "UInt32", "UInt64", "Float32", "Float64"]
)
def test_loc_setitem_with_expansion_preserves_nullable_int(self, dtype):
# GH#42099
ser = Series([0, 1, 2, 3], dtype=dtype)
df = DataFrame({"data": ser})
result = DataFrame(index=df.index)
result.loc[df.index, "data"] = ser
tm.assert_frame_equal(result, df)
result = DataFrame(index=df.index)
result.loc[df.index, "data"] = ser._values
tm.assert_frame_equal(result, df)
class TestLocCallable:
def test_frame_loc_getitem_callable(self):
# GH#11485
df = DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]})
# iloc cannot use boolean Series (see GH3635)
# return bool indexer
res = df.loc[lambda x: x.A > 2]
tm.assert_frame_equal(res, df.loc[df.A > 2])
res = df.loc[lambda x: x.B == "b", :]
tm.assert_frame_equal(res, df.loc[df.B == "b", :])
res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"]
tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]])
res = df.loc[lambda x: x.A > 2, lambda x: "B"]
tm.assert_series_equal(res, df.loc[df.A > 2, "B"])
res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]]
tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]])
res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]]
tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]])
# scalar
res = df.loc[lambda x: 1, lambda x: "A"]
assert res == df.loc[1, "A"]
def test_frame_loc_getitem_callable_mixture(self):
# GH#11485
df = DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]})
res = df.loc[lambda x: x.A > 2, ["A", "B"]]
tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]])
res = df.loc[[2, 3], lambda x: ["A", "B"]]
tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]])
res = df.loc[3, lambda x: ["A", "B"]]
tm.assert_series_equal(res, df.loc[3, ["A", "B"]])
def test_frame_loc_getitem_callable_labels(self):
# GH#11485
df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD"))
# return label
res = df.loc[lambda x: ["A", "C"]]
tm.assert_frame_equal(res, df.loc[["A", "C"]])
res = df.loc[lambda x: ["A", "C"], :]
tm.assert_frame_equal(res, df.loc[["A", "C"], :])
res = df.loc[lambda x: ["A", "C"], lambda x: "X"]
tm.assert_series_equal(res, df.loc[["A", "C"], "X"])
res = df.loc[lambda x: ["A", "C"], lambda x: ["X"]]
tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]])
# mixture
res = df.loc[["A", "C"], lambda x: "X"]
tm.assert_series_equal(res, df.loc[["A", "C"], "X"])
res = df.loc[["A", "C"], lambda x: ["X"]]
tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]])
res = df.loc[lambda x: ["A", "C"], "X"]
tm.assert_series_equal(res, df.loc[["A", "C"], "X"])
res = df.loc[lambda x: ["A", "C"], ["X"]]
tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]])
def test_frame_loc_setitem_callable(self):
# GH#11485
df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD"))
# return label
res = df.copy()
res.loc[lambda x: ["A", "C"]] = -20
exp = df.copy()
exp.loc[["A", "C"]] = -20
tm.assert_frame_equal(res, exp)
res = df.copy()
res.loc[lambda x: ["A", "C"], :] = 20
exp = df.copy()
exp.loc[["A", "C"], :] = 20
tm.assert_frame_equal(res, exp)
res = df.copy()
res.loc[lambda x: ["A", "C"], lambda x: "X"] = -1
exp = df.copy()
exp.loc[["A", "C"], "X"] = -1
tm.assert_frame_equal(res, exp)
res = df.copy()
res.loc[lambda x: ["A", "C"], lambda x: ["X"]] = [5, 10]
exp = df.copy()
exp.loc[["A", "C"], ["X"]] = [5, 10]
tm.assert_frame_equal(res, exp)
# mixture
res = df.copy()
res.loc[["A", "C"], lambda x: "X"] = np.array([-1, -2])
exp = df.copy()
exp.loc[["A", "C"], "X"] = np.array([-1, -2])
tm.assert_frame_equal(res, exp)
res = df.copy()
res.loc[["A", "C"], lambda x: ["X"]] = 10
exp = df.copy()
exp.loc[["A", "C"], ["X"]] = 10
tm.assert_frame_equal(res, exp)
res = df.copy()
res.loc[lambda x: ["A", "C"], "X"] = -2
exp = df.copy()
exp.loc[["A", "C"], "X"] = -2
tm.assert_frame_equal(res, exp)
res = df.copy()
res.loc[lambda x: ["A", "C"], ["X"]] = -4
exp = df.copy()
exp.loc[["A", "C"], ["X"]] = -4
tm.assert_frame_equal(res, exp)
class TestPartialStringSlicing:
def test_loc_getitem_partial_string_slicing_datetimeindex(self):
# GH#35509
df = DataFrame(
{"col1": ["a", "b", "c"], "col2": [1, 2, 3]},
index=to_datetime(["2020-08-01", "2020-07-02", "2020-08-05"]),
)
expected = DataFrame(
{"col1": ["a", "c"], "col2": [1, 3]},
index=to_datetime(["2020-08-01", "2020-08-05"]),
)
result = df.loc["2020-08"]
tm.assert_frame_equal(result, expected)
def test_loc_getitem_partial_string_slicing_with_periodindex(self):
pi = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M")
ser = pi.to_series()
result = ser.loc[:"2017-12"]
expected = ser.iloc[:-1]
tm.assert_series_equal(result, expected)
def test_loc_getitem_partial_string_slicing_with_timedeltaindex(self):
ix = timedelta_range(start="1 day", end="2 days", freq="1H")
ser = ix.to_series()
result = ser.loc[:"1 days"]
expected = ser.iloc[:-1]
tm.assert_series_equal(result, expected)
def test_loc_getitem_str_timedeltaindex(self):
# GH#16896
df = DataFrame({"x": range(3)}, index=to_timedelta(range(3), unit="days"))
expected = df.iloc[0]
sliced = df.loc["0 days"]
tm.assert_series_equal(sliced, expected)
@pytest.mark.parametrize("indexer_end", [None, "2020-01-02 23:59:59.999999999"])
def test_loc_getitem_partial_slice_non_monotonicity(
self, tz_aware_fixture, indexer_end, frame_or_series
):
# GH#33146
obj = frame_or_series(
[1] * 5,
index=DatetimeIndex(
[
Timestamp("2019-12-30"),
Timestamp("2020-01-01"),
Timestamp("2019-12-25"),
Timestamp("2020-01-02 23:59:59.999999999"),
Timestamp("2019-12-19"),
],
tz=tz_aware_fixture,
),
)
expected = frame_or_series(
[1] * 2,
index=DatetimeIndex(
[
Timestamp("2020-01-01"),
Timestamp("2020-01-02 23:59:59.999999999"),
],
tz=tz_aware_fixture,
),
)
indexer = slice("2020-01-01", indexer_end)
result = obj[indexer]
tm.assert_equal(result, expected)
result = obj.loc[indexer]
tm.assert_equal(result, expected)
class TestLabelSlicing:
def test_loc_getitem_slicing_datetimes_frame(self):
# GH#7523
# unique
df_unique = DataFrame(
np.arange(4.0, dtype="float64"),
index=[datetime(2001, 1, i, 10, 00) for i in [1, 2, 3, 4]],
)
# duplicates
df_dups = DataFrame(
np.arange(5.0, dtype="float64"),
index=[datetime(2001, 1, i, 10, 00) for i in [1, 2, 2, 3, 4]],
)
for df in [df_unique, df_dups]:
result = df.loc[datetime(2001, 1, 1, 10) :]
tm.assert_frame_equal(result, df)
result = df.loc[: datetime(2001, 1, 4, 10)]
tm.assert_frame_equal(result, df)
result = df.loc[datetime(2001, 1, 1, 10) : datetime(2001, 1, 4, 10)]
tm.assert_frame_equal(result, df)
result = df.loc[datetime(2001, 1, 1, 11) :]
expected = df.iloc[1:]
tm.assert_frame_equal(result, expected)
result = df.loc["20010101 11":]
tm.assert_frame_equal(result, expected)
def test_loc_getitem_label_slice_across_dst(self):
# GH#21846
idx = date_range(
"2017-10-29 01:30:00", tz="Europe/Berlin", periods=5, freq="30 min"
)
series2 = Series([0, 1, 2, 3, 4], index=idx)
t_1 = Timestamp("2017-10-29 02:30:00+02:00", tz="Europe/Berlin")
t_2 = Timestamp("2017-10-29 02:00:00+01:00", tz="Europe/Berlin")
result = series2.loc[t_1:t_2]
expected = Series([2, 3], index=idx[2:4])
tm.assert_series_equal(result, expected)
result = series2[t_1]
expected = 2
assert result == expected
@pytest.mark.parametrize(
"index",
[
pd.period_range(start="2017-01-01", end="2018-01-01", freq="M"),
timedelta_range(start="1 day", end="2 days", freq="1H"),
],
)
def test_loc_getitem_label_slice_period_timedelta(self, index):
ser = index.to_series()
result = ser.loc[: index[-2]]
expected = ser.iloc[:-1]
tm.assert_series_equal(result, expected)
def test_loc_getitem_slice_floats_inexact(self):
index = [52195.504153, 52196.303147, 52198.369883]
df = DataFrame(np.random.rand(3, 2), index=index)
s1 = df.loc[52195.1:52196.5]
assert len(s1) == 2
s1 = df.loc[52195.1:52196.6]
assert len(s1) == 2
s1 = df.loc[52195.1:52198.9]
assert len(s1) == 3
def test_loc_getitem_float_slice_float64index(self):
ser = Series(np.random.rand(10), index=np.arange(10, 20, dtype=float))
assert len(ser.loc[12.0:]) == 8
assert len(ser.loc[12.5:]) == 7
idx = np.arange(10, 20, dtype=float)
idx[2] = 12.2
ser.index = idx
assert len(ser.loc[12.0:]) == 8
assert len(ser.loc[12.5:]) == 7
@pytest.mark.parametrize(
"start,stop, expected_slice",
[
[np.timedelta64(0, "ns"), None, slice(0, 11)],
[np.timedelta64(1, "D"), np.timedelta64(6, "D"), slice(1, 7)],
[None, np.timedelta64(4, "D"), slice(0, 5)],
],
)
def test_loc_getitem_slice_label_td64obj(self, start, stop, expected_slice):
# GH#20393
ser = Series(range(11), timedelta_range("0 days", "10 days"))
result = ser.loc[slice(start, stop)]
expected = ser.iloc[expected_slice]
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("start", ["2018", "2020"])
def test_loc_getitem_slice_unordered_dt_index(self, frame_or_series, start):
obj = frame_or_series(
[1, 2, 3],
index=[Timestamp("2016"), Timestamp("2019"), Timestamp("2017")],
)
with tm.assert_produces_warning(FutureWarning):
obj.loc[start:"2022"]
@pytest.mark.parametrize("value", [1, 1.5])
def test_loc_getitem_slice_labels_int_in_object_index(self, frame_or_series, value):
# GH: 26491
obj = frame_or_series(range(4), index=[value, "first", 2, "third"])
result = obj.loc[value:"third"]
expected = frame_or_series(range(4), index=[value, "first", 2, "third"])
tm.assert_equal(result, expected)
def test_loc_getitem_slice_columns_mixed_dtype(self):
# GH: 20975
df = DataFrame({"test": 1, 1: 2, 2: 3}, index=[0])
expected = DataFrame(
data=[[2, 3]], index=[0], columns=Index([1, 2], dtype=object)
)
tm.assert_frame_equal(df.loc[:, 1:], expected)
class TestLocBooleanLabelsAndSlices(Base):
@pytest.mark.parametrize("bool_value", [True, False])
def test_loc_bool_incompatible_index_raises(
self, index, frame_or_series, bool_value
):
# GH20432
message = f"{bool_value}: boolean label can not be used without a boolean index"
if index.inferred_type != "boolean":
obj = frame_or_series(index=index, dtype="object")
with pytest.raises(KeyError, match=message):
obj.loc[bool_value]
@pytest.mark.parametrize("bool_value", [True, False])
def test_loc_bool_should_not_raise(self, frame_or_series, bool_value):
obj = frame_or_series(
index=Index([True, False], dtype="boolean"), dtype="object"
)
obj.loc[bool_value]
def test_loc_bool_slice_raises(self, index, frame_or_series):
# GH20432
message = (
r"slice\(True, False, None\): boolean values can not be used in a slice"
)
obj = frame_or_series(index=index, dtype="object")
with pytest.raises(TypeError, match=message):
obj.loc[True:False]
class TestLocBooleanMask:
def test_loc_setitem_bool_mask_timedeltaindex(self):
# GH#14946
df = DataFrame({"x": range(10)})
df.index = to_timedelta(range(10), unit="s")
conditions = [df["x"] > 3, df["x"] == 3, df["x"] < 3]
expected_data = [
[0, 1, 2, 3, 10, 10, 10, 10, 10, 10],
[0, 1, 2, 10, 4, 5, 6, 7, 8, 9],
[10, 10, 10, 3, 4, 5, 6, 7, 8, 9],
]
for cond, data in zip(conditions, expected_data):
result = df.copy()
result.loc[cond, "x"] = 10
expected = DataFrame(
data,
index=to_timedelta(range(10), unit="s"),
columns=["x"],
dtype="int64",
)
tm.assert_frame_equal(expected, result)
@pytest.mark.parametrize("tz", [None, "UTC"])
def test_loc_setitem_mask_with_datetimeindex_tz(self, tz):
# GH#16889
# support .loc with alignment and tz-aware DatetimeIndex
mask = np.array([True, False, True, False])
idx = date_range("20010101", periods=4, tz=tz)
df = DataFrame({"a": np.arange(4)}, index=idx).astype("float64")
result = df.copy()
result.loc[mask, :] = df.loc[mask, :]
tm.assert_frame_equal(result, df)
result = df.copy()
result.loc[mask] = df.loc[mask]
tm.assert_frame_equal(result, df)
def test_loc_setitem_mask_and_label_with_datetimeindex(self):
# GH#9478
# a datetimeindex alignment issue with partial setting
df = DataFrame(
np.arange(6.0).reshape(3, 2),
columns=list("AB"),
index=date_range("1/1/2000", periods=3, freq="1H"),
)
expected = df.copy()
expected["C"] = [expected.index[0]] + [pd.NaT, pd.NaT]
mask = df.A < 1
df.loc[mask, "C"] = df.loc[mask].index
tm.assert_frame_equal(df, expected)
def test_loc_setitem_mask_td64_series_value(self):
# GH#23462 key list of bools, value is a Series
td1 = Timedelta(0)
td2 = Timedelta(28767471428571405)
df = DataFrame({"col": Series([td1, td2])})
df_copy = df.copy()
ser = Series([td1])
expected = df["col"].iloc[1].value
df.loc[[True, False]] = ser
result = df["col"].iloc[1].value
assert expected == result
tm.assert_frame_equal(df, df_copy)
@td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values
def test_loc_setitem_boolean_and_column(self, float_frame):
expected = float_frame.copy()
mask = float_frame["A"] > 0
float_frame.loc[mask, "B"] = 0
expected.values[mask.values, 1] = 0
tm.assert_frame_equal(float_frame, expected)
class TestLocListlike:
@pytest.mark.parametrize("box", [lambda x: x, np.asarray, list])
def test_loc_getitem_list_of_labels_categoricalindex_with_na(self, box):
# passing a list can include valid categories _or_ NA values
ci = CategoricalIndex(["A", "B", np.nan])
ser = Series(range(3), index=ci)
result = ser.loc[box(ci)]
tm.assert_series_equal(result, ser)
result = ser[box(ci)]
tm.assert_series_equal(result, ser)
result = ser.to_frame().loc[box(ci)]
tm.assert_frame_equal(result, ser.to_frame())
ser2 = ser[:-1]
ci2 = ci[1:]
# but if there are no NAs present, this should raise KeyError
msg = "not in index"
with pytest.raises(KeyError, match=msg):
ser2.loc[box(ci2)]
with pytest.raises(KeyError, match=msg):
ser2[box(ci2)]
with pytest.raises(KeyError, match=msg):
ser2.to_frame().loc[box(ci2)]
def test_loc_getitem_series_label_list_missing_values(self):
# gh-11428
key = np.array(
["2001-01-04", "2001-01-02", "2001-01-04", "2001-01-14"], dtype="datetime64"
)
ser = Series([2, 5, 8, 11], date_range("2001-01-01", freq="D", periods=4))
with pytest.raises(KeyError, match="not in index"):
ser.loc[key]
def test_loc_getitem_series_label_list_missing_integer_values(self):
# GH: 25927
ser = Series(
index=np.array([9730701000001104, 10049011000001109]),
data=np.array([999000011000001104, 999000011000001104]),
)
with pytest.raises(KeyError, match="not in index"):
ser.loc[np.array([9730701000001104, 10047311000001102])]
@pytest.mark.parametrize("to_period", [True, False])
def test_loc_getitem_listlike_of_datetimelike_keys(self, to_period):
# GH#11497
idx = date_range("2011-01-01", "2011-01-02", freq="D", name="idx")
if to_period:
idx = idx.to_period("D")
ser = Series([0.1, 0.2], index=idx, name="s")
keys = [Timestamp("2011-01-01"), Timestamp("2011-01-02")]
if to_period:
keys = [x.to_period("D") for x in keys]
result = ser.loc[keys]
exp = Series([0.1, 0.2], index=idx, name="s")
if not to_period:
exp.index = exp.index._with_freq(None)
tm.assert_series_equal(result, exp, check_index_type=True)
keys = [
Timestamp("2011-01-02"),
Timestamp("2011-01-02"),
Timestamp("2011-01-01"),
]
if to_period:
keys = [x.to_period("D") for x in keys]
exp = Series(
[0.2, 0.2, 0.1], index=Index(keys, name="idx", dtype=idx.dtype), name="s"
)
result = ser.loc[keys]
tm.assert_series_equal(result, exp, check_index_type=True)
keys = [
Timestamp("2011-01-03"),
Timestamp("2011-01-02"),
Timestamp("2011-01-03"),
]
if to_period:
keys = [x.to_period("D") for x in keys]
with pytest.raises(KeyError, match="not in index"):
ser.loc[keys]
def test_loc_named_index(self):
# GH 42790
df = DataFrame(
[[1, 2], [4, 5], [7, 8]],
index=["cobra", "viper", "sidewinder"],
columns=["max_speed", "shield"],
)
expected = df.iloc[:2]
expected.index.name = "foo"
result = df.loc[Index(["cobra", "viper"], name="foo")]
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"columns, column_key, expected_columns",
[
([2011, 2012, 2013], [2011, 2012], [0, 1]),
([2011, 2012, "All"], [2011, 2012], [0, 1]),
([2011, 2012, "All"], [2011, "All"], [0, 2]),
],
)
def test_loc_getitem_label_list_integer_labels(columns, column_key, expected_columns):
# gh-14836
df = DataFrame(np.random.rand(3, 3), columns=columns, index=list("ABC"))
expected = df.iloc[:, expected_columns]
result = df.loc[["A", "B", "C"], column_key]
tm.assert_frame_equal(result, expected, check_column_type=True)
def test_loc_setitem_float_intindex():
# GH 8720
rand_data = np.random.randn(8, 4)
result = DataFrame(rand_data)
result.loc[:, 0.5] = np.nan
expected_data = np.hstack((rand_data, np.array([np.nan] * 8).reshape(8, 1)))
expected = DataFrame(expected_data, columns=[0.0, 1.0, 2.0, 3.0, 0.5])
tm.assert_frame_equal(result, expected)
result = DataFrame(rand_data)
result.loc[:, 0.5] = np.nan
tm.assert_frame_equal(result, expected)
def test_loc_axis_1_slice():
# GH 10586
cols = [(yr, m) for yr in [2014, 2015] for m in [7, 8, 9, 10]]
df = DataFrame(
np.ones((10, 8)),
index=tuple("ABCDEFGHIJ"),
columns=MultiIndex.from_tuples(cols),
)
result = df.loc(axis=1)[(2014, 9):(2015, 8)]
expected = DataFrame(
np.ones((10, 4)),
index=tuple("ABCDEFGHIJ"),
columns=MultiIndex.from_tuples([(2014, 9), (2014, 10), (2015, 7), (2015, 8)]),
)
tm.assert_frame_equal(result, expected)
def test_loc_set_dataframe_multiindex():
# GH 14592
expected = DataFrame(
"a", index=range(2), columns=MultiIndex.from_product([range(2), range(2)])
)
result = expected.copy()
result.loc[0, [(0, 1)]] = result.loc[0, [(0, 1)]]
tm.assert_frame_equal(result, expected)
def test_loc_mixed_int_float():
# GH#19456
ser = Series(range(2), Index([1, 2.0], dtype=object))
result = ser.loc[1]
assert result == 0
def test_loc_with_positional_slice_deprecation():
# GH#31840
ser = Series(range(4), index=["A", "B", "C", "D"])
with tm.assert_produces_warning(FutureWarning):
ser.loc[:3] = 2
expected = Series([2, 2, 2, 3], index=["A", "B", "C", "D"])
tm.assert_series_equal(ser, expected)
def test_loc_slice_disallows_positional():
# GH#16121, GH#24612, GH#31810
dti = date_range("2016-01-01", periods=3)
df = DataFrame(np.random.random((3, 2)), index=dti)
ser = df[0]
msg = (
"cannot do slice indexing on DatetimeIndex with these "
r"indexers \[1\] of type int"
)
for obj in [df, ser]:
with pytest.raises(TypeError, match=msg):
obj.loc[1:3]
with tm.assert_produces_warning(FutureWarning):
# GH#31840 deprecated incorrect behavior
obj.loc[1:3] = 1
with pytest.raises(TypeError, match=msg):
df.loc[1:3, 1]
with tm.assert_produces_warning(FutureWarning):
# GH#31840 deprecated incorrect behavior
df.loc[1:3, 1] = 2
def test_loc_datetimelike_mismatched_dtypes():
# GH#32650 dont mix and match datetime/timedelta/period dtypes
df = DataFrame(
np.random.randn(5, 3),
columns=["a", "b", "c"],
index=date_range("2012", freq="H", periods=5),
)
# create dataframe with non-unique DatetimeIndex
df = df.iloc[[0, 2, 2, 3]].copy()
dti = df.index
tdi = pd.TimedeltaIndex(dti.asi8) # matching i8 values
msg = r"None of \[TimedeltaIndex.* are in the \[index\]"
with pytest.raises(KeyError, match=msg):
df.loc[tdi]
with pytest.raises(KeyError, match=msg):
df["a"].loc[tdi]
def test_loc_with_period_index_indexer():
# GH#4125
idx = pd.period_range("2002-01", "2003-12", freq="M")
df = DataFrame(np.random.randn(24, 10), index=idx)
tm.assert_frame_equal(df, df.loc[idx])
tm.assert_frame_equal(df, df.loc[list(idx)])
tm.assert_frame_equal(df, df.loc[list(idx)])
tm.assert_frame_equal(df.iloc[0:5], df.loc[idx[0:5]])
tm.assert_frame_equal(df, df.loc[list(idx)])
def test_loc_getitem_multiindex_tuple_level():
# GH#27591
lev1 = ["a", "b", "c"]
lev2 = [(0, 1), (1, 0)]
lev3 = [0, 1]
cols = MultiIndex.from_product([lev1, lev2, lev3], names=["x", "y", "z"])
df = DataFrame(6, index=range(5), columns=cols)
# the lev2[0] here should be treated as a single label, not as a sequence
# of labels
result = df.loc[:, (lev1[0], lev2[0], lev3[0])]
# TODO: i think this actually should drop levels
expected = df.iloc[:, :1]
tm.assert_frame_equal(result, expected)
alt = df.xs((lev1[0], lev2[0], lev3[0]), level=[0, 1, 2], axis=1)
tm.assert_frame_equal(alt, expected)
# same thing on a Series
ser = df.iloc[0]
expected2 = ser.iloc[:1]
alt2 = ser.xs((lev1[0], lev2[0], lev3[0]), level=[0, 1, 2], axis=0)
tm.assert_series_equal(alt2, expected2)
result2 = ser.loc[lev1[0], lev2[0], lev3[0]]
assert result2 == 6
def test_loc_getitem_nullable_index_with_duplicates():
# GH#34497
df = DataFrame(
data=np.array([[1, 2, 3, 4], [5, 6, 7, 8], [1, 2, np.nan, np.nan]]).T,
columns=["a", "b", "c"],
dtype="Int64",
)
df2 = df.set_index("c")
assert df2.index.dtype == "Int64"
res = df2.loc[1]
expected = Series([1, 5], index=df2.columns, dtype="Int64", name=1)
tm.assert_series_equal(res, expected)
# pd.NA and duplicates in an object-dtype Index
df2.index = df2.index.astype(object)
res = df2.loc[1]
tm.assert_series_equal(res, expected)
class TestLocSeries:
@pytest.mark.parametrize("val,expected", [(2**63 - 1, 3), (2**63, 4)])
def test_loc_uint64(self, val, expected):
# see GH#19399
ser = Series({2**63 - 1: 3, 2**63: 4})
assert ser.loc[val] == expected
def test_loc_getitem(self, string_series, datetime_series):
inds = string_series.index[[3, 4, 7]]
tm.assert_series_equal(string_series.loc[inds], string_series.reindex(inds))
tm.assert_series_equal(string_series.iloc[5::2], string_series[5::2])
# slice with indices
d1, d2 = datetime_series.index[[5, 15]]
result = datetime_series.loc[d1:d2]
expected = datetime_series.truncate(d1, d2)
tm.assert_series_equal(result, expected)
# boolean
mask = string_series > string_series.median()
tm.assert_series_equal(string_series.loc[mask], string_series[mask])
# ask for index value
assert datetime_series.loc[d1] == datetime_series[d1]
assert datetime_series.loc[d2] == datetime_series[d2]
def test_loc_getitem_not_monotonic(self, datetime_series):
d1, d2 = datetime_series.index[[5, 15]]
ts2 = datetime_series[::2][[1, 2, 0]]
msg = r"Timestamp\('2000-01-10 00:00:00'\)"
with pytest.raises(KeyError, match=msg):
ts2.loc[d1:d2]
with pytest.raises(KeyError, match=msg):
ts2.loc[d1:d2] = 0
def test_loc_getitem_setitem_integer_slice_keyerrors(self):
ser = Series(np.random.randn(10), index=list(range(0, 20, 2)))
# this is OK
cp = ser.copy()
cp.iloc[4:10] = 0
assert (cp.iloc[4:10] == 0).all()
# so is this
cp = ser.copy()
cp.iloc[3:11] = 0
assert (cp.iloc[3:11] == 0).values.all()
result = ser.iloc[2:6]
result2 = ser.loc[3:11]
expected = ser.reindex([4, 6, 8, 10])
tm.assert_series_equal(result, expected)
tm.assert_series_equal(result2, expected)
# non-monotonic, raise KeyError
s2 = ser.iloc[list(range(5)) + list(range(9, 4, -1))]
with pytest.raises(KeyError, match=r"^3$"):
s2.loc[3:11]
with pytest.raises(KeyError, match=r"^3$"):
s2.loc[3:11] = 0
def test_loc_getitem_iterator(self, string_series):
idx = iter(string_series.index[:10])
result = string_series.loc[idx]
tm.assert_series_equal(result, string_series[:10])
def test_loc_setitem_boolean(self, string_series):
mask = string_series > string_series.median()
result = string_series.copy()
result.loc[mask] = 0
expected = string_series
expected[mask] = 0
tm.assert_series_equal(result, expected)
def test_loc_setitem_corner(self, string_series):
inds = list(string_series.index[[5, 8, 12]])
string_series.loc[inds] = 5
msg = r"\['foo'\] not in index"
with pytest.raises(KeyError, match=msg):
string_series.loc[inds + ["foo"]] = 5
def test_basic_setitem_with_labels(self, datetime_series):
indices = datetime_series.index[[5, 10, 15]]
cp = datetime_series.copy()
exp = datetime_series.copy()
cp[indices] = 0
exp.loc[indices] = 0
tm.assert_series_equal(cp, exp)
cp = datetime_series.copy()
exp = datetime_series.copy()
cp[indices[0] : indices[2]] = 0
exp.loc[indices[0] : indices[2]] = 0
tm.assert_series_equal(cp, exp)
def test_loc_setitem_listlike_of_ints(self):
# integer indexes, be careful
ser = Series(np.random.randn(10), index=list(range(0, 20, 2)))
inds = [0, 4, 6]
arr_inds = np.array([0, 4, 6])
cp = ser.copy()
exp = ser.copy()
ser[inds] = 0
ser.loc[inds] = 0
tm.assert_series_equal(cp, exp)
cp = ser.copy()
exp = ser.copy()
ser[arr_inds] = 0
ser.loc[arr_inds] = 0
tm.assert_series_equal(cp, exp)
inds_notfound = [0, 4, 5, 6]
arr_inds_notfound = np.array([0, 4, 5, 6])
msg = r"\[5\] not in index"
with pytest.raises(KeyError, match=msg):
ser[inds_notfound] = 0
with pytest.raises(Exception, match=msg):
ser[arr_inds_notfound] = 0
def test_loc_setitem_dt64tz_values(self):
# GH#12089
ser = Series(
date_range("2011-01-01", periods=3, tz="US/Eastern"),
index=["a", "b", "c"],
)
s2 = ser.copy()
expected = Timestamp("2011-01-03", tz="US/Eastern")
s2.loc["a"] = expected
result = s2.loc["a"]
assert result == expected
s2 = ser.copy()
s2.iloc[0] = expected
result = s2.iloc[0]
assert result == expected
s2 = ser.copy()
s2["a"] = expected
result = s2["a"]
assert result == expected
@pytest.mark.parametrize("array_fn", [np.array, pd.array, list, tuple])
@pytest.mark.parametrize("size", [0, 4, 5, 6])
def test_loc_iloc_setitem_with_listlike(self, size, array_fn):
# GH37748
# testing insertion, in a Series of size N (here 5), of a listlike object
# of size 0, N-1, N, N+1
arr = array_fn([0] * size)
expected = Series([arr, 0, 0, 0, 0], index=list("abcde"), dtype=object)
ser = Series(0, index=list("abcde"), dtype=object)
ser.loc["a"] = arr
tm.assert_series_equal(ser, expected)
ser = Series(0, index=list("abcde"), dtype=object)
ser.iloc[0] = arr
tm.assert_series_equal(ser, expected)
@pytest.mark.parametrize("indexer", [IndexSlice["A", :], ("A", slice(None))])
def test_loc_series_getitem_too_many_dimensions(self, indexer):
# GH#35349
ser = Series(
index=MultiIndex.from_tuples([("A", "0"), ("A", "1"), ("B", "0")]),
data=[21, 22, 23],
)
msg = "Too many indices"
with pytest.raises(ValueError, match=msg):
ser.loc[indexer, :]
with pytest.raises(ValueError, match=msg):
ser.loc[indexer, :] = 1
def test_loc_setitem(self, string_series):
inds = string_series.index[[3, 4, 7]]
result = string_series.copy()
result.loc[inds] = 5
expected = string_series.copy()
expected[[3, 4, 7]] = 5
tm.assert_series_equal(result, expected)
result.iloc[5:10] = 10
expected[5:10] = 10
tm.assert_series_equal(result, expected)
# set slice with indices
d1, d2 = string_series.index[[5, 15]]
result.loc[d1:d2] = 6
expected[5:16] = 6 # because it's inclusive
tm.assert_series_equal(result, expected)
# set index value
string_series.loc[d1] = 4
string_series.loc[d2] = 6
assert string_series[d1] == 4
assert string_series[d2] == 6
@pytest.mark.parametrize("dtype", ["object", "string"])
def test_loc_assign_dict_to_row(self, dtype):
# GH41044
df = DataFrame({"A": ["abc", "def"], "B": ["ghi", "jkl"]}, dtype=dtype)
df.loc[0, :] = {"A": "newA", "B": "newB"}
expected = DataFrame({"A": ["newA", "def"], "B": ["newB", "jkl"]}, dtype=dtype)
tm.assert_frame_equal(df, expected)
@td.skip_array_manager_invalid_test
def test_loc_setitem_dict_timedelta_multiple_set(self):
# GH 16309
result = DataFrame(columns=["time", "value"])
result.loc[1] = {"time": Timedelta(6, unit="s"), "value": "foo"}
result.loc[1] = {"time": Timedelta(6, unit="s"), "value": "foo"}
expected = DataFrame(
[[Timedelta(6, unit="s"), "foo"]], columns=["time", "value"], index=[1]
)
tm.assert_frame_equal(result, expected)
def test_loc_set_multiple_items_in_multiple_new_columns(self):
# GH 25594
df = DataFrame(index=[1, 2], columns=["a"])
df.loc[1, ["b", "c"]] = [6, 7]
expected = DataFrame(
{
"a": Series([np.nan, np.nan], dtype="object"),
"b": [6, np.nan],
"c": [7, np.nan],
},
index=[1, 2],
)
tm.assert_frame_equal(df, expected)
def test_getitem_loc_str_periodindex(self):
# GH#33964
index = pd.period_range(start="2000", periods=20, freq="B")
series = Series(range(20), index=index)
assert series.loc["2000-01-14"] == 9