A PyQT GUI application for converting InfoLease report outputs into Excel files. Handles parsing and summarizing. Learns where files are meant to be store and compiles monthly and yearly summaries.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
InfoLeaseExtract/venv/Lib/site-packages/pandas/tests/reshape/concat/test_index.py

308 lines
11 KiB

import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
Index,
MultiIndex,
Series,
concat,
)
import pandas._testing as tm
class TestIndexConcat:
def test_concat_ignore_index(self, sort):
frame1 = DataFrame(
{"test1": ["a", "b", "c"], "test2": [1, 2, 3], "test3": [4.5, 3.2, 1.2]}
)
frame2 = DataFrame({"test3": [5.2, 2.2, 4.3]})
frame1.index = Index(["x", "y", "z"])
frame2.index = Index(["x", "y", "q"])
v1 = concat([frame1, frame2], axis=1, ignore_index=True, sort=sort)
nan = np.nan
expected = DataFrame(
[
[nan, nan, nan, 4.3],
["a", 1, 4.5, 5.2],
["b", 2, 3.2, 2.2],
["c", 3, 1.2, nan],
],
index=Index(["q", "x", "y", "z"]),
)
if not sort:
expected = expected.loc[["x", "y", "z", "q"]]
tm.assert_frame_equal(v1, expected)
@pytest.mark.parametrize(
"name_in1,name_in2,name_in3,name_out",
[
("idx", "idx", "idx", "idx"),
("idx", "idx", None, None),
("idx", None, None, None),
("idx1", "idx2", None, None),
("idx1", "idx1", "idx2", None),
("idx1", "idx2", "idx3", None),
(None, None, None, None),
],
)
def test_concat_same_index_names(self, name_in1, name_in2, name_in3, name_out):
# GH13475
indices = [
Index(["a", "b", "c"], name=name_in1),
Index(["b", "c", "d"], name=name_in2),
Index(["c", "d", "e"], name=name_in3),
]
frames = [
DataFrame({c: [0, 1, 2]}, index=i) for i, c in zip(indices, ["x", "y", "z"])
]
result = concat(frames, axis=1)
exp_ind = Index(["a", "b", "c", "d", "e"], name=name_out)
expected = DataFrame(
{
"x": [0, 1, 2, np.nan, np.nan],
"y": [np.nan, 0, 1, 2, np.nan],
"z": [np.nan, np.nan, 0, 1, 2],
},
index=exp_ind,
)
tm.assert_frame_equal(result, expected)
def test_concat_rename_index(self):
a = DataFrame(
np.random.rand(3, 3),
columns=list("ABC"),
index=Index(list("abc"), name="index_a"),
)
b = DataFrame(
np.random.rand(3, 3),
columns=list("ABC"),
index=Index(list("abc"), name="index_b"),
)
result = concat([a, b], keys=["key0", "key1"], names=["lvl0", "lvl1"])
exp = concat([a, b], keys=["key0", "key1"], names=["lvl0"])
names = list(exp.index.names)
names[1] = "lvl1"
exp.index.set_names(names, inplace=True)
tm.assert_frame_equal(result, exp)
assert result.index.names == exp.index.names
def test_concat_copy_index_series(self, axis):
# GH 29879
ser = Series([1, 2])
comb = concat([ser, ser], axis=axis, copy=True)
assert comb.index is not ser.index
def test_concat_copy_index_frame(self, axis):
# GH 29879
df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
comb = concat([df, df], axis=axis, copy=True)
assert comb.index is not df.index
assert comb.columns is not df.columns
def test_default_index(self):
# is_series and ignore_index
s1 = Series([1, 2, 3], name="x")
s2 = Series([4, 5, 6], name="y")
res = concat([s1, s2], axis=1, ignore_index=True)
assert isinstance(res.columns, pd.RangeIndex)
exp = DataFrame([[1, 4], [2, 5], [3, 6]])
# use check_index_type=True to check the result have
# RangeIndex (default index)
tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True)
# is_series and all inputs have no names
s1 = Series([1, 2, 3])
s2 = Series([4, 5, 6])
res = concat([s1, s2], axis=1, ignore_index=False)
assert isinstance(res.columns, pd.RangeIndex)
exp = DataFrame([[1, 4], [2, 5], [3, 6]])
exp.columns = pd.RangeIndex(2)
tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True)
# is_dataframe and ignore_index
df1 = DataFrame({"A": [1, 2], "B": [5, 6]})
df2 = DataFrame({"A": [3, 4], "B": [7, 8]})
res = concat([df1, df2], axis=0, ignore_index=True)
exp = DataFrame([[1, 5], [2, 6], [3, 7], [4, 8]], columns=["A", "B"])
tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True)
res = concat([df1, df2], axis=1, ignore_index=True)
exp = DataFrame([[1, 5, 3, 7], [2, 6, 4, 8]])
tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True)
def test_dups_index(self):
# GH 4771
# single dtypes
df = DataFrame(
np.random.randint(0, 10, size=40).reshape(10, 4),
columns=["A", "A", "C", "C"],
)
result = concat([df, df], axis=1)
tm.assert_frame_equal(result.iloc[:, :4], df)
tm.assert_frame_equal(result.iloc[:, 4:], df)
result = concat([df, df], axis=0)
tm.assert_frame_equal(result.iloc[:10], df)
tm.assert_frame_equal(result.iloc[10:], df)
# multi dtypes
df = concat(
[
DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]),
DataFrame(
np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"]
),
],
axis=1,
)
result = concat([df, df], axis=1)
tm.assert_frame_equal(result.iloc[:, :6], df)
tm.assert_frame_equal(result.iloc[:, 6:], df)
result = concat([df, df], axis=0)
tm.assert_frame_equal(result.iloc[:10], df)
tm.assert_frame_equal(result.iloc[10:], df)
# append
result = df.iloc[0:8, :]._append(df.iloc[8:])
tm.assert_frame_equal(result, df)
result = df.iloc[0:8, :]._append(df.iloc[8:9])._append(df.iloc[9:10])
tm.assert_frame_equal(result, df)
expected = concat([df, df], axis=0)
result = df._append(df)
tm.assert_frame_equal(result, expected)
class TestMultiIndexConcat:
def test_concat_multiindex_with_keys(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
index = frame.index
result = concat([frame, frame], keys=[0, 1], names=["iteration"])
assert result.index.names == ("iteration",) + index.names
tm.assert_frame_equal(result.loc[0], frame)
tm.assert_frame_equal(result.loc[1], frame)
assert result.index.nlevels == 3
def test_concat_multiindex_with_none_in_index_names(self):
# GH 15787
index = MultiIndex.from_product([[1], range(5)], names=["level1", None])
df = DataFrame({"col": range(5)}, index=index, dtype=np.int32)
result = concat([df, df], keys=[1, 2], names=["level2"])
index = MultiIndex.from_product(
[[1, 2], [1], range(5)], names=["level2", "level1", None]
)
expected = DataFrame({"col": list(range(5)) * 2}, index=index, dtype=np.int32)
tm.assert_frame_equal(result, expected)
result = concat([df, df[:2]], keys=[1, 2], names=["level2"])
level2 = [1] * 5 + [2] * 2
level1 = [1] * 7
no_name = list(range(5)) + list(range(2))
tuples = list(zip(level2, level1, no_name))
index = MultiIndex.from_tuples(tuples, names=["level2", "level1", None])
expected = DataFrame({"col": no_name}, index=index, dtype=np.int32)
tm.assert_frame_equal(result, expected)
def test_concat_multiindex_rangeindex(self):
# GH13542
# when multi-index levels are RangeIndex objects
# there is a bug in concat with objects of len 1
df = DataFrame(np.random.randn(9, 2))
df.index = MultiIndex(
levels=[pd.RangeIndex(3), pd.RangeIndex(3)],
codes=[np.repeat(np.arange(3), 3), np.tile(np.arange(3), 3)],
)
res = concat([df.iloc[[2, 3, 4], :], df.iloc[[5], :]])
exp = df.iloc[[2, 3, 4, 5], :]
tm.assert_frame_equal(res, exp)
def test_concat_multiindex_dfs_with_deepcopy(self):
# GH 9967
from copy import deepcopy
example_multiindex1 = MultiIndex.from_product([["a"], ["b"]])
example_dataframe1 = DataFrame([0], index=example_multiindex1)
example_multiindex2 = MultiIndex.from_product([["a"], ["c"]])
example_dataframe2 = DataFrame([1], index=example_multiindex2)
example_dict = {"s1": example_dataframe1, "s2": example_dataframe2}
expected_index = MultiIndex(
levels=[["s1", "s2"], ["a"], ["b", "c"]],
codes=[[0, 1], [0, 0], [0, 1]],
names=["testname", None, None],
)
expected = DataFrame([[0], [1]], index=expected_index)
result_copy = concat(deepcopy(example_dict), names=["testname"])
tm.assert_frame_equal(result_copy, expected)
result_no_copy = concat(example_dict, names=["testname"])
tm.assert_frame_equal(result_no_copy, expected)
@pytest.mark.parametrize(
"mi1_list",
[
[["a"], range(2)],
[["b"], np.arange(2.0, 4.0)],
[["c"], ["A", "B"]],
[["d"], pd.date_range(start="2017", end="2018", periods=2)],
],
)
@pytest.mark.parametrize(
"mi2_list",
[
[["a"], range(2)],
[["b"], np.arange(2.0, 4.0)],
[["c"], ["A", "B"]],
[["d"], pd.date_range(start="2017", end="2018", periods=2)],
],
)
def test_concat_with_various_multiindex_dtypes(
self, mi1_list: list, mi2_list: list
):
# GitHub #23478
mi1 = MultiIndex.from_product(mi1_list)
mi2 = MultiIndex.from_product(mi2_list)
df1 = DataFrame(np.zeros((1, len(mi1))), columns=mi1)
df2 = DataFrame(np.zeros((1, len(mi2))), columns=mi2)
if mi1_list[0] == mi2_list[0]:
expected_mi = MultiIndex(
levels=[mi1_list[0], list(mi1_list[1])],
codes=[[0, 0, 0, 0], [0, 1, 0, 1]],
)
else:
expected_mi = MultiIndex(
levels=[
mi1_list[0] + mi2_list[0],
list(mi1_list[1]) + list(mi2_list[1]),
],
codes=[[0, 0, 1, 1], [0, 1, 2, 3]],
)
expected_df = DataFrame(np.zeros((1, len(expected_mi))), columns=expected_mi)
with tm.assert_produces_warning(None):
result_df = concat((df1, df2), axis=1)
tm.assert_frame_equal(expected_df, result_df)