A PyQT GUI application for converting InfoLease report outputs into Excel files. Handles parsing and summarizing. Learns where files are meant to be store and compiles monthly and yearly summaries.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
InfoLeaseExtract/venv/Lib/site-packages/pandas/tests/indexes/numeric/test_indexing.py

584 lines
22 KiB

import numpy as np
import pytest
from pandas.errors import InvalidIndexError
from pandas import (
Index,
RangeIndex,
Series,
Timestamp,
)
import pandas._testing as tm
from pandas.core.indexes.api import (
Float64Index,
Int64Index,
UInt64Index,
)
@pytest.fixture
def index_large():
# large values used in UInt64Index tests where no compat needed with Int64/Float64
large = [2**63, 2**63 + 10, 2**63 + 15, 2**63 + 20, 2**63 + 25]
return UInt64Index(large)
class TestGetLoc:
@pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"])
def test_get_loc(self, method):
index = Index([0, 1, 2])
warn = None if method is None else FutureWarning
with tm.assert_produces_warning(warn, match="deprecated"):
assert index.get_loc(1, method=method) == 1
if method:
with tm.assert_produces_warning(warn, match="deprecated"):
assert index.get_loc(1, method=method, tolerance=0) == 1
@pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"])
@pytest.mark.filterwarnings("ignore:Passing method:FutureWarning")
def test_get_loc_raises_bad_label(self, method):
index = Index([0, 1, 2])
if method:
msg = "not supported between"
err = TypeError
else:
msg = r"\[1, 2\]"
err = InvalidIndexError
with pytest.raises(err, match=msg):
index.get_loc([1, 2], method=method)
@pytest.mark.parametrize(
"method,loc", [("pad", 1), ("backfill", 2), ("nearest", 1)]
)
@pytest.mark.filterwarnings("ignore:Passing method:FutureWarning")
def test_get_loc_tolerance(self, method, loc):
index = Index([0, 1, 2])
assert index.get_loc(1.1, method) == loc
assert index.get_loc(1.1, method, tolerance=1) == loc
@pytest.mark.parametrize("method", ["pad", "backfill", "nearest"])
def test_get_loc_outside_tolerance_raises(self, method):
index = Index([0, 1, 2])
with pytest.raises(KeyError, match="1.1"):
with tm.assert_produces_warning(FutureWarning, match="deprecated"):
index.get_loc(1.1, method, tolerance=0.05)
def test_get_loc_bad_tolerance_raises(self):
index = Index([0, 1, 2])
with pytest.raises(ValueError, match="must be numeric"):
with tm.assert_produces_warning(FutureWarning, match="deprecated"):
index.get_loc(1.1, "nearest", tolerance="invalid")
def test_get_loc_tolerance_no_method_raises(self):
index = Index([0, 1, 2])
with pytest.raises(ValueError, match="tolerance .* valid if"):
index.get_loc(1.1, tolerance=1)
def test_get_loc_raises_missized_tolerance(self):
index = Index([0, 1, 2])
with pytest.raises(ValueError, match="tolerance size must match"):
with tm.assert_produces_warning(FutureWarning, match="deprecated"):
index.get_loc(1.1, "nearest", tolerance=[1, 1])
@pytest.mark.filterwarnings("ignore:Passing method:FutureWarning")
def test_get_loc_float64(self):
idx = Float64Index([0.0, 1.0, 2.0])
for method in [None, "pad", "backfill", "nearest"]:
assert idx.get_loc(1, method) == 1
if method is not None:
assert idx.get_loc(1, method, tolerance=0) == 1
for method, loc in [("pad", 1), ("backfill", 2), ("nearest", 1)]:
assert idx.get_loc(1.1, method) == loc
assert idx.get_loc(1.1, method, tolerance=0.9) == loc
with pytest.raises(KeyError, match="^'foo'$"):
idx.get_loc("foo")
with pytest.raises(KeyError, match=r"^1\.5$"):
idx.get_loc(1.5)
with pytest.raises(KeyError, match=r"^1\.5$"):
idx.get_loc(1.5, method="pad", tolerance=0.1)
with pytest.raises(KeyError, match="^True$"):
idx.get_loc(True)
with pytest.raises(KeyError, match="^False$"):
idx.get_loc(False)
with pytest.raises(ValueError, match="must be numeric"):
idx.get_loc(1.4, method="nearest", tolerance="foo")
with pytest.raises(ValueError, match="must contain numeric elements"):
idx.get_loc(1.4, method="nearest", tolerance=np.array(["foo"]))
with pytest.raises(
ValueError, match="tolerance size must match target index size"
):
idx.get_loc(1.4, method="nearest", tolerance=np.array([1, 2]))
def test_get_loc_na(self):
idx = Float64Index([np.nan, 1, 2])
assert idx.get_loc(1) == 1
assert idx.get_loc(np.nan) == 0
idx = Float64Index([np.nan, 1, np.nan])
assert idx.get_loc(1) == 1
# representable by slice [0:2:2]
msg = "'Cannot get left slice bound for non-unique label: nan'"
with pytest.raises(KeyError, match=msg):
idx.slice_locs(np.nan)
# not representable by slice
idx = Float64Index([np.nan, 1, np.nan, np.nan])
assert idx.get_loc(1) == 1
msg = "'Cannot get left slice bound for non-unique label: nan"
with pytest.raises(KeyError, match=msg):
idx.slice_locs(np.nan)
def test_get_loc_missing_nan(self):
# GH#8569
idx = Float64Index([1, 2])
assert idx.get_loc(1) == 0
with pytest.raises(KeyError, match=r"^3$"):
idx.get_loc(3)
with pytest.raises(KeyError, match="^nan$"):
idx.get_loc(np.nan)
with pytest.raises(InvalidIndexError, match=r"\[nan\]"):
# listlike/non-hashable raises TypeError
idx.get_loc([np.nan])
@pytest.mark.parametrize("vals", [[1], [1.0], [Timestamp("2019-12-31")], ["test"]])
@pytest.mark.parametrize("method", ["nearest", "pad", "backfill"])
def test_get_loc_float_index_nan_with_method(self, vals, method):
# GH#39382
idx = Index(vals)
with pytest.raises(KeyError, match="nan"):
with tm.assert_produces_warning(FutureWarning, match="deprecated"):
idx.get_loc(np.nan, method=method)
@pytest.mark.parametrize("dtype", ["f8", "i8", "u8"])
def test_get_loc_numericindex_none_raises(self, dtype):
# case that goes through searchsorted and key is non-comparable to values
arr = np.arange(10**7, dtype=dtype)
idx = Index(arr)
with pytest.raises(KeyError, match="None"):
idx.get_loc(None)
def test_get_loc_overflows(self):
# unique but non-monotonic goes through IndexEngine.mapping.get_item
idx = Index([0, 2, 1])
val = np.iinfo(np.int64).max + 1
with pytest.raises(KeyError, match=str(val)):
idx.get_loc(val)
with pytest.raises(KeyError, match=str(val)):
idx._engine.get_loc(val)
class TestGetIndexer:
def test_get_indexer(self):
index1 = Index([1, 2, 3, 4, 5])
index2 = Index([2, 4, 6])
r1 = index1.get_indexer(index2)
e1 = np.array([1, 3, -1], dtype=np.intp)
tm.assert_almost_equal(r1, e1)
@pytest.mark.parametrize("reverse", [True, False])
@pytest.mark.parametrize(
"expected,method",
[
(np.array([-1, 0, 0, 1, 1], dtype=np.intp), "pad"),
(np.array([-1, 0, 0, 1, 1], dtype=np.intp), "ffill"),
(np.array([0, 0, 1, 1, 2], dtype=np.intp), "backfill"),
(np.array([0, 0, 1, 1, 2], dtype=np.intp), "bfill"),
],
)
def test_get_indexer_methods(self, reverse, expected, method):
index1 = Index([1, 2, 3, 4, 5])
index2 = Index([2, 4, 6])
if reverse:
index1 = index1[::-1]
expected = expected[::-1]
result = index2.get_indexer(index1, method=method)
tm.assert_almost_equal(result, expected)
def test_get_indexer_invalid(self):
# GH10411
index = Index(np.arange(10))
with pytest.raises(ValueError, match="tolerance argument"):
index.get_indexer([1, 0], tolerance=1)
with pytest.raises(ValueError, match="limit argument"):
index.get_indexer([1, 0], limit=1)
@pytest.mark.parametrize(
"method, tolerance, indexer, expected",
[
("pad", None, [0, 5, 9], [0, 5, 9]),
("backfill", None, [0, 5, 9], [0, 5, 9]),
("nearest", None, [0, 5, 9], [0, 5, 9]),
("pad", 0, [0, 5, 9], [0, 5, 9]),
("backfill", 0, [0, 5, 9], [0, 5, 9]),
("nearest", 0, [0, 5, 9], [0, 5, 9]),
("pad", None, [0.2, 1.8, 8.5], [0, 1, 8]),
("backfill", None, [0.2, 1.8, 8.5], [1, 2, 9]),
("nearest", None, [0.2, 1.8, 8.5], [0, 2, 9]),
("pad", 1, [0.2, 1.8, 8.5], [0, 1, 8]),
("backfill", 1, [0.2, 1.8, 8.5], [1, 2, 9]),
("nearest", 1, [0.2, 1.8, 8.5], [0, 2, 9]),
("pad", 0.2, [0.2, 1.8, 8.5], [0, -1, -1]),
("backfill", 0.2, [0.2, 1.8, 8.5], [-1, 2, -1]),
("nearest", 0.2, [0.2, 1.8, 8.5], [0, 2, -1]),
],
)
def test_get_indexer_nearest(self, method, tolerance, indexer, expected):
index = Index(np.arange(10))
actual = index.get_indexer(indexer, method=method, tolerance=tolerance)
tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp))
@pytest.mark.parametrize("listtype", [list, tuple, Series, np.array])
@pytest.mark.parametrize(
"tolerance, expected",
list(
zip(
[[0.3, 0.3, 0.1], [0.2, 0.1, 0.1], [0.1, 0.5, 0.5]],
[[0, 2, -1], [0, -1, -1], [-1, 2, 9]],
)
),
)
def test_get_indexer_nearest_listlike_tolerance(
self, tolerance, expected, listtype
):
index = Index(np.arange(10))
actual = index.get_indexer(
[0.2, 1.8, 8.5], method="nearest", tolerance=listtype(tolerance)
)
tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp))
def test_get_indexer_nearest_error(self):
index = Index(np.arange(10))
with pytest.raises(ValueError, match="limit argument"):
index.get_indexer([1, 0], method="nearest", limit=1)
with pytest.raises(ValueError, match="tolerance size must match"):
index.get_indexer([1, 0], method="nearest", tolerance=[1, 2, 3])
@pytest.mark.parametrize(
"method,expected",
[("pad", [8, 7, 0]), ("backfill", [9, 8, 1]), ("nearest", [9, 7, 0])],
)
def test_get_indexer_nearest_decreasing(self, method, expected):
index = Index(np.arange(10))[::-1]
actual = index.get_indexer([0, 5, 9], method=method)
tm.assert_numpy_array_equal(actual, np.array([9, 4, 0], dtype=np.intp))
actual = index.get_indexer([0.2, 1.8, 8.5], method=method)
tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp))
@pytest.mark.parametrize(
"idx_class", [Int64Index, RangeIndex, Float64Index, UInt64Index]
)
@pytest.mark.parametrize("method", ["get_indexer", "get_indexer_non_unique"])
def test_get_indexer_numeric_index_boolean_target(self, method, idx_class):
# GH 16877
numeric_index = idx_class(RangeIndex(4))
other = Index([True, False, True])
result = getattr(numeric_index, method)(other)
expected = np.array([-1, -1, -1], dtype=np.intp)
if method == "get_indexer":
tm.assert_numpy_array_equal(result, expected)
else:
missing = np.arange(3, dtype=np.intp)
tm.assert_numpy_array_equal(result[0], expected)
tm.assert_numpy_array_equal(result[1], missing)
@pytest.mark.parametrize("method", ["pad", "backfill", "nearest"])
def test_get_indexer_with_method_numeric_vs_bool(self, method):
left = Index([1, 2, 3])
right = Index([True, False])
with pytest.raises(TypeError, match="Cannot compare"):
left.get_indexer(right, method=method)
with pytest.raises(TypeError, match="Cannot compare"):
right.get_indexer(left, method=method)
def test_get_indexer_numeric_vs_bool(self):
left = Index([1, 2, 3])
right = Index([True, False])
res = left.get_indexer(right)
expected = -1 * np.ones(len(right), dtype=np.intp)
tm.assert_numpy_array_equal(res, expected)
res = right.get_indexer(left)
expected = -1 * np.ones(len(left), dtype=np.intp)
tm.assert_numpy_array_equal(res, expected)
res = left.get_indexer_non_unique(right)[0]
expected = -1 * np.ones(len(right), dtype=np.intp)
tm.assert_numpy_array_equal(res, expected)
res = right.get_indexer_non_unique(left)[0]
expected = -1 * np.ones(len(left), dtype=np.intp)
tm.assert_numpy_array_equal(res, expected)
def test_get_indexer_float64(self):
idx = Float64Index([0.0, 1.0, 2.0])
tm.assert_numpy_array_equal(
idx.get_indexer(idx), np.array([0, 1, 2], dtype=np.intp)
)
target = [-0.1, 0.5, 1.1]
tm.assert_numpy_array_equal(
idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp)
)
tm.assert_numpy_array_equal(
idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp)
)
tm.assert_numpy_array_equal(
idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp)
)
def test_get_indexer_nan(self):
# GH#7820
result = Float64Index([1, 2, np.nan]).get_indexer([np.nan])
expected = np.array([2], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_int64(self):
index = Int64Index(range(0, 20, 2))
target = Int64Index(np.arange(10))
indexer = index.get_indexer(target)
expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected)
target = Int64Index(np.arange(10))
indexer = index.get_indexer(target, method="pad")
expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected)
target = Int64Index(np.arange(10))
indexer = index.get_indexer(target, method="backfill")
expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected)
def test_get_indexer_uint64(self, index_large):
target = UInt64Index(np.arange(10).astype("uint64") * 5 + 2**63)
indexer = index_large.get_indexer(target)
expected = np.array([0, -1, 1, 2, 3, 4, -1, -1, -1, -1], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected)
target = UInt64Index(np.arange(10).astype("uint64") * 5 + 2**63)
indexer = index_large.get_indexer(target, method="pad")
expected = np.array([0, 0, 1, 2, 3, 4, 4, 4, 4, 4], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected)
target = UInt64Index(np.arange(10).astype("uint64") * 5 + 2**63)
indexer = index_large.get_indexer(target, method="backfill")
expected = np.array([0, 1, 1, 2, 3, 4, -1, -1, -1, -1], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected)
class TestWhere:
@pytest.mark.parametrize(
"index",
[
Float64Index(np.arange(5, dtype="float64")),
Int64Index(range(0, 20, 2)),
UInt64Index(np.arange(5, dtype="uint64")),
],
)
def test_where(self, listlike_box, index):
cond = [True] * len(index)
expected = index
result = index.where(listlike_box(cond))
cond = [False] + [True] * (len(index) - 1)
expected = Float64Index([index._na_value] + index[1:].tolist())
result = index.where(listlike_box(cond))
tm.assert_index_equal(result, expected)
def test_where_uint64(self):
idx = UInt64Index([0, 6, 2])
mask = np.array([False, True, False])
other = np.array([1], dtype=np.int64)
expected = UInt64Index([1, 6, 1])
result = idx.where(mask, other)
tm.assert_index_equal(result, expected)
result = idx.putmask(~mask, other)
tm.assert_index_equal(result, expected)
class TestTake:
@pytest.mark.parametrize("klass", [Float64Index, Int64Index, UInt64Index])
def test_take_preserve_name(self, klass):
index = klass([1, 2, 3, 4], name="foo")
taken = index.take([3, 0, 1])
assert index.name == taken.name
def test_take_fill_value_float64(self):
# GH 12631
idx = Float64Index([1.0, 2.0, 3.0], name="xxx")
result = idx.take(np.array([1, 0, -1]))
expected = Float64Index([2.0, 1.0, 3.0], name="xxx")
tm.assert_index_equal(result, expected)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
expected = Float64Index([2.0, 1.0, np.nan], name="xxx")
tm.assert_index_equal(result, expected)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
expected = Float64Index([2.0, 1.0, 3.0], name="xxx")
tm.assert_index_equal(result, expected)
msg = (
"When allow_fill=True and fill_value is not None, "
"all indices must be >= -1"
)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
msg = "index -5 is out of bounds for (axis 0 with )?size 3"
with pytest.raises(IndexError, match=msg):
idx.take(np.array([1, -5]))
@pytest.mark.parametrize("klass", [Int64Index, UInt64Index])
def test_take_fill_value_ints(self, klass):
# see gh-12631
idx = klass([1, 2, 3], name="xxx")
result = idx.take(np.array([1, 0, -1]))
expected = klass([2, 1, 3], name="xxx")
tm.assert_index_equal(result, expected)
name = klass.__name__
msg = f"Unable to fill values because {name} cannot contain NA"
# fill_value=True
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -1]), fill_value=True)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
expected = klass([2, 1, 3], name="xxx")
tm.assert_index_equal(result, expected)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
msg = "index -5 is out of bounds for (axis 0 with )?size 3"
with pytest.raises(IndexError, match=msg):
idx.take(np.array([1, -5]))
class TestContains:
@pytest.mark.parametrize("klass", [Float64Index, Int64Index, UInt64Index])
def test_contains_none(self, klass):
# GH#35788 should return False, not raise TypeError
index = klass([0, 1, 2, 3, 4])
assert None not in index
def test_contains_float64_nans(self):
index = Float64Index([1.0, 2.0, np.nan])
assert np.nan in index
def test_contains_float64_not_nans(self):
index = Float64Index([1.0, 2.0, np.nan])
assert 1.0 in index
class TestSliceLocs:
@pytest.mark.parametrize("dtype", [int, float])
def test_slice_locs(self, dtype):
index = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype))
n = len(index)
assert index.slice_locs(start=2) == (2, n)
assert index.slice_locs(start=3) == (3, n)
assert index.slice_locs(3, 8) == (3, 6)
assert index.slice_locs(5, 10) == (3, n)
assert index.slice_locs(end=8) == (0, 6)
assert index.slice_locs(end=9) == (0, 7)
# reversed
index2 = index[::-1]
assert index2.slice_locs(8, 2) == (2, 6)
assert index2.slice_locs(7, 3) == (2, 5)
@pytest.mark.parametrize("dtype", [int, float])
def test_slice_locs_float_locs(self, dtype):
index = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype))
n = len(index)
assert index.slice_locs(5.0, 10.0) == (3, n)
assert index.slice_locs(4.5, 10.5) == (3, 8)
index2 = index[::-1]
assert index2.slice_locs(8.5, 1.5) == (2, 6)
assert index2.slice_locs(10.5, -1) == (0, n)
@pytest.mark.parametrize("dtype", [int, float])
def test_slice_locs_dup_numeric(self, dtype):
index = Index(np.array([10, 12, 12, 14], dtype=dtype))
assert index.slice_locs(12, 12) == (1, 3)
assert index.slice_locs(11, 13) == (1, 3)
index2 = index[::-1]
assert index2.slice_locs(12, 12) == (1, 3)
assert index2.slice_locs(13, 11) == (1, 3)
def test_slice_locs_na(self):
index = Index([np.nan, 1, 2])
assert index.slice_locs(1) == (1, 3)
assert index.slice_locs(np.nan) == (0, 3)
index = Index([0, np.nan, np.nan, 1, 2])
assert index.slice_locs(np.nan) == (1, 5)
def test_slice_locs_na_raises(self):
index = Index([np.nan, 1, 2])
with pytest.raises(KeyError, match=""):
index.slice_locs(start=1.5)
with pytest.raises(KeyError, match=""):
index.slice_locs(end=1.5)
class TestGetSliceBounds:
@pytest.mark.parametrize("kind", ["getitem", "loc", None])
@pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)])
def test_get_slice_bounds_within(self, kind, side, expected):
index = Index(range(6))
with tm.assert_produces_warning(FutureWarning, match="'kind' argument"):
result = index.get_slice_bound(4, kind=kind, side=side)
assert result == expected
@pytest.mark.parametrize("kind", ["getitem", "loc", None])
@pytest.mark.parametrize("side", ["left", "right"])
@pytest.mark.parametrize("bound, expected", [(-1, 0), (10, 6)])
def test_get_slice_bounds_outside(self, kind, side, expected, bound):
index = Index(range(6))
with tm.assert_produces_warning(FutureWarning, match="'kind' argument"):
result = index.get_slice_bound(bound, kind=kind, side=side)
assert result == expected