A PyQT GUI application for converting InfoLease report outputs into Excel files. Handles parsing and summarizing. Learns where files are meant to be store and compiles monthly and yearly summaries.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
InfoLeaseExtract/venv/Lib/site-packages/pandas/tests/frame/methods/test_explode.py

277 lines
8.0 KiB

import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
def test_error():
df = pd.DataFrame(
{"A": pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")), "B": 1}
)
with pytest.raises(
ValueError, match="column must be a scalar, tuple, or list thereof"
):
df.explode([list("AA")])
with pytest.raises(ValueError, match="column must be unique"):
df.explode(list("AA"))
df.columns = list("AA")
with pytest.raises(ValueError, match="columns must be unique"):
df.explode("A")
@pytest.mark.parametrize(
"input_subset, error_message",
[
(
list("AC"),
"columns must have matching element counts",
),
(
[],
"column must be nonempty",
),
(
list("AC"),
"columns must have matching element counts",
),
],
)
def test_error_multi_columns(input_subset, error_message):
# GH 39240
df = pd.DataFrame(
{
"A": [[0, 1, 2], np.nan, [], (3, 4)],
"B": 1,
"C": [["a", "b", "c"], "foo", [], ["d", "e", "f"]],
},
index=list("abcd"),
)
with pytest.raises(ValueError, match=error_message):
df.explode(input_subset)
@pytest.mark.parametrize(
"scalar",
["a", 0, 1.5, pd.Timedelta("1 days"), pd.Timestamp("2019-12-31")],
)
def test_basic(scalar):
df = pd.DataFrame(
{scalar: pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")), "B": 1}
)
result = df.explode(scalar)
expected = pd.DataFrame(
{
scalar: pd.Series(
[0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object
),
"B": 1,
}
)
tm.assert_frame_equal(result, expected)
def test_multi_index_rows():
df = pd.DataFrame(
{"A": np.array([[0, 1, 2], np.nan, [], (3, 4)], dtype=object), "B": 1},
index=pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)]),
)
result = df.explode("A")
expected = pd.DataFrame(
{
"A": pd.Series(
[0, 1, 2, np.nan, np.nan, 3, 4],
index=pd.MultiIndex.from_tuples(
[
("a", 1),
("a", 1),
("a", 1),
("a", 2),
("b", 1),
("b", 2),
("b", 2),
]
),
dtype=object,
),
"B": 1,
}
)
tm.assert_frame_equal(result, expected)
def test_multi_index_columns():
df = pd.DataFrame(
{("A", 1): np.array([[0, 1, 2], np.nan, [], (3, 4)], dtype=object), ("A", 2): 1}
)
result = df.explode(("A", 1))
expected = pd.DataFrame(
{
("A", 1): pd.Series(
[0, 1, 2, np.nan, np.nan, 3, 4],
index=pd.Index([0, 0, 0, 1, 2, 3, 3]),
dtype=object,
),
("A", 2): 1,
}
)
tm.assert_frame_equal(result, expected)
def test_usecase():
# explode a single column
# gh-10511
df = pd.DataFrame(
[[11, range(5), 10], [22, range(3), 20]], columns=list("ABC")
).set_index("C")
result = df.explode("B")
expected = pd.DataFrame(
{
"A": [11, 11, 11, 11, 11, 22, 22, 22],
"B": np.array([0, 1, 2, 3, 4, 0, 1, 2], dtype=object),
"C": [10, 10, 10, 10, 10, 20, 20, 20],
},
columns=list("ABC"),
).set_index("C")
tm.assert_frame_equal(result, expected)
# gh-8517
df = pd.DataFrame(
[["2014-01-01", "Alice", "A B"], ["2014-01-02", "Bob", "C D"]],
columns=["dt", "name", "text"],
)
result = df.assign(text=df.text.str.split(" ")).explode("text")
expected = pd.DataFrame(
[
["2014-01-01", "Alice", "A"],
["2014-01-01", "Alice", "B"],
["2014-01-02", "Bob", "C"],
["2014-01-02", "Bob", "D"],
],
columns=["dt", "name", "text"],
index=[0, 0, 1, 1],
)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"input_dict, input_index, expected_dict, expected_index",
[
(
{"col1": [[1, 2], [3, 4]], "col2": ["foo", "bar"]},
[0, 0],
{"col1": [1, 2, 3, 4], "col2": ["foo", "foo", "bar", "bar"]},
[0, 0, 0, 0],
),
(
{"col1": [[1, 2], [3, 4]], "col2": ["foo", "bar"]},
pd.Index([0, 0], name="my_index"),
{"col1": [1, 2, 3, 4], "col2": ["foo", "foo", "bar", "bar"]},
pd.Index([0, 0, 0, 0], name="my_index"),
),
(
{"col1": [[1, 2], [3, 4]], "col2": ["foo", "bar"]},
pd.MultiIndex.from_arrays(
[[0, 0], [1, 1]], names=["my_first_index", "my_second_index"]
),
{"col1": [1, 2, 3, 4], "col2": ["foo", "foo", "bar", "bar"]},
pd.MultiIndex.from_arrays(
[[0, 0, 0, 0], [1, 1, 1, 1]],
names=["my_first_index", "my_second_index"],
),
),
(
{"col1": [[1, 2], [3, 4]], "col2": ["foo", "bar"]},
pd.MultiIndex.from_arrays([[0, 0], [1, 1]], names=["my_index", None]),
{"col1": [1, 2, 3, 4], "col2": ["foo", "foo", "bar", "bar"]},
pd.MultiIndex.from_arrays(
[[0, 0, 0, 0], [1, 1, 1, 1]], names=["my_index", None]
),
),
],
)
def test_duplicate_index(input_dict, input_index, expected_dict, expected_index):
# GH 28005
df = pd.DataFrame(input_dict, index=input_index)
result = df.explode("col1")
expected = pd.DataFrame(expected_dict, index=expected_index, dtype=object)
tm.assert_frame_equal(result, expected)
def test_ignore_index():
# GH 34932
df = pd.DataFrame({"id": range(0, 20, 10), "values": [list("ab"), list("cd")]})
result = df.explode("values", ignore_index=True)
expected = pd.DataFrame(
{"id": [0, 0, 10, 10], "values": list("abcd")}, index=[0, 1, 2, 3]
)
tm.assert_frame_equal(result, expected)
def test_explode_sets():
# https://github.com/pandas-dev/pandas/issues/35614
df = pd.DataFrame({"a": [{"x", "y"}], "b": [1]}, index=[1])
result = df.explode(column="a").sort_values(by="a")
expected = pd.DataFrame({"a": ["x", "y"], "b": [1, 1]}, index=[1, 1])
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"input_subset, expected_dict, expected_index",
[
(
list("AC"),
{
"A": pd.Series(
[0, 1, 2, np.nan, np.nan, 3, 4, np.nan],
index=list("aaabcdde"),
dtype=object,
),
"B": 1,
"C": ["a", "b", "c", "foo", np.nan, "d", "e", np.nan],
},
list("aaabcdde"),
),
(
list("A"),
{
"A": pd.Series(
[0, 1, 2, np.nan, np.nan, 3, 4, np.nan],
index=list("aaabcdde"),
dtype=object,
),
"B": 1,
"C": [
["a", "b", "c"],
["a", "b", "c"],
["a", "b", "c"],
"foo",
[],
["d", "e"],
["d", "e"],
np.nan,
],
},
list("aaabcdde"),
),
],
)
def test_multi_columns(input_subset, expected_dict, expected_index):
# GH 39240
df = pd.DataFrame(
{
"A": [[0, 1, 2], np.nan, [], (3, 4), np.nan],
"B": 1,
"C": [["a", "b", "c"], "foo", [], ["d", "e"], np.nan],
},
index=list("abcde"),
)
result = df.explode(input_subset)
expected = pd.DataFrame(expected_dict, expected_index)
tm.assert_frame_equal(result, expected)