A PyQT GUI application for converting InfoLease report outputs into Excel files. Handles parsing and summarizing. Learns where files are meant to be store and compiles monthly and yearly summaries.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
InfoLeaseExtract/venv/Lib/site-packages/pandas/tests/frame/methods/test_append.py

285 lines
10 KiB

import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
Series,
Timestamp,
date_range,
timedelta_range,
)
import pandas._testing as tm
class TestDataFrameAppend:
@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning")
def test_append_multiindex(self, multiindex_dataframe_random_data, frame_or_series):
obj = multiindex_dataframe_random_data
obj = tm.get_obj(obj, frame_or_series)
a = obj[:5]
b = obj[5:]
result = a.append(b)
tm.assert_equal(result, obj)
def test_append_empty_list(self):
# GH 28769
df = DataFrame()
result = df._append([])
expected = df
tm.assert_frame_equal(result, expected)
assert result is not df
df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"])
result = df._append([])
expected = df
tm.assert_frame_equal(result, expected)
assert result is not df # ._append() should return a new object
def test_append_series_dict(self):
df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"])
series = df.loc[4]
msg = "Indexes have overlapping values"
with pytest.raises(ValueError, match=msg):
df._append(series, verify_integrity=True)
series.name = None
msg = "Can only append a Series if ignore_index=True"
with pytest.raises(TypeError, match=msg):
df._append(series, verify_integrity=True)
result = df._append(series[::-1], ignore_index=True)
expected = df._append(
DataFrame({0: series[::-1]}, index=df.columns).T, ignore_index=True
)
tm.assert_frame_equal(result, expected)
# dict
result = df._append(series.to_dict(), ignore_index=True)
tm.assert_frame_equal(result, expected)
result = df._append(series[::-1][:3], ignore_index=True)
expected = df._append(
DataFrame({0: series[::-1][:3]}).T, ignore_index=True, sort=True
)
tm.assert_frame_equal(result, expected.loc[:, result.columns])
msg = "Can only append a dict if ignore_index=True"
with pytest.raises(TypeError, match=msg):
df._append(series.to_dict())
# can append when name set
row = df.loc[4]
row.name = 5
result = df._append(row)
expected = df._append(df[-1:], ignore_index=True)
tm.assert_frame_equal(result, expected)
def test_append_list_of_series_dicts(self):
df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"])
dicts = [x.to_dict() for idx, x in df.iterrows()]
result = df._append(dicts, ignore_index=True)
expected = df._append(df, ignore_index=True)
tm.assert_frame_equal(result, expected)
# different columns
dicts = [
{"foo": 1, "bar": 2, "baz": 3, "peekaboo": 4},
{"foo": 5, "bar": 6, "baz": 7, "peekaboo": 8},
]
result = df._append(dicts, ignore_index=True, sort=True)
expected = df._append(DataFrame(dicts), ignore_index=True, sort=True)
tm.assert_frame_equal(result, expected)
def test_append_list_retain_index_name(self):
df = DataFrame(
[[1, 2], [3, 4]], index=pd.Index(["a", "b"], name="keepthisname")
)
serc = Series([5, 6], name="c")
expected = DataFrame(
[[1, 2], [3, 4], [5, 6]],
index=pd.Index(["a", "b", "c"], name="keepthisname"),
)
# append series
result = df._append(serc)
tm.assert_frame_equal(result, expected)
# append list of series
result = df._append([serc])
tm.assert_frame_equal(result, expected)
def test_append_missing_cols(self):
# GH22252
# exercise the conditional branch in append method where the data
# to be appended is a list and does not contain all columns that are in
# the target DataFrame
df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"])
dicts = [{"foo": 9}, {"bar": 10}]
result = df._append(dicts, ignore_index=True, sort=True)
expected = df._append(DataFrame(dicts), ignore_index=True, sort=True)
tm.assert_frame_equal(result, expected)
def test_append_empty_dataframe(self):
# Empty df append empty df
df1 = DataFrame()
df2 = DataFrame()
result = df1._append(df2)
expected = df1.copy()
tm.assert_frame_equal(result, expected)
# Non-empty df append empty df
df1 = DataFrame(np.random.randn(5, 2))
df2 = DataFrame()
result = df1._append(df2)
expected = df1.copy()
tm.assert_frame_equal(result, expected)
# Empty df with columns append empty df
df1 = DataFrame(columns=["bar", "foo"])
df2 = DataFrame()
result = df1._append(df2)
expected = df1.copy()
tm.assert_frame_equal(result, expected)
# Non-Empty df with columns append empty df
df1 = DataFrame(np.random.randn(5, 2), columns=["bar", "foo"])
df2 = DataFrame()
result = df1._append(df2)
expected = df1.copy()
tm.assert_frame_equal(result, expected)
def test_append_dtypes(self, using_array_manager):
# GH 5754
# row appends of different dtypes (so need to do by-item)
# can sometimes infer the correct type
df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(5))
df2 = DataFrame()
result = df1._append(df2)
expected = df1.copy()
tm.assert_frame_equal(result, expected)
df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
df2 = DataFrame({"bar": "foo"}, index=range(1, 2))
result = df1._append(df2)
expected = DataFrame({"bar": [Timestamp("20130101"), "foo"]})
tm.assert_frame_equal(result, expected)
df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
df2 = DataFrame({"bar": np.nan}, index=range(1, 2))
result = df1._append(df2)
expected = DataFrame(
{"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")}
)
if using_array_manager:
# TODO(ArrayManager) decide on exact casting rules in concat
# With ArrayManager, all-NaN float is not ignored
expected = expected.astype(object)
tm.assert_frame_equal(result, expected)
df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
df2 = DataFrame({"bar": np.nan}, index=range(1, 2), dtype=object)
result = df1._append(df2)
expected = DataFrame(
{"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")}
)
if using_array_manager:
# With ArrayManager, all-NaN float is not ignored
expected = expected.astype(object)
tm.assert_frame_equal(result, expected)
df1 = DataFrame({"bar": np.nan}, index=range(1))
df2 = DataFrame({"bar": Timestamp("20130101")}, index=range(1, 2))
result = df1._append(df2)
expected = DataFrame(
{"bar": Series([np.nan, Timestamp("20130101")], dtype="M8[ns]")}
)
if using_array_manager:
# With ArrayManager, all-NaN float is not ignored
expected = expected.astype(object)
tm.assert_frame_equal(result, expected)
df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
df2 = DataFrame({"bar": 1}, index=range(1, 2), dtype=object)
result = df1._append(df2)
expected = DataFrame({"bar": Series([Timestamp("20130101"), 1])})
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"timestamp", ["2019-07-19 07:04:57+0100", "2019-07-19 07:04:57"]
)
def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp):
# GH 30238
tz = tz_naive_fixture
df = DataFrame([Timestamp(timestamp, tz=tz)])
result = df._append(df.iloc[0]).iloc[-1]
expected = Series(Timestamp(timestamp, tz=tz), name=0)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"data, dtype",
[
([1], pd.Int64Dtype()),
([1], pd.CategoricalDtype()),
([pd.Interval(left=0, right=5)], pd.IntervalDtype()),
([pd.Period("2000-03", freq="M")], pd.PeriodDtype("M")),
([1], pd.SparseDtype()),
],
)
def test_other_dtypes(self, data, dtype):
df = DataFrame(data, dtype=dtype)
result = df._append(df.iloc[0]).iloc[-1]
expected = Series(data, name=0, dtype=dtype)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"])
def test_append_numpy_bug_1681(self, dtype):
# another datetime64 bug
if dtype == "datetime64[ns]":
index = date_range("2011/1/1", "2012/1/1", freq="W-FRI")
else:
index = timedelta_range("1 days", "10 days", freq="2D")
df = DataFrame()
other = DataFrame({"A": "foo", "B": index}, index=index)
result = df._append(other)
assert (result["B"] == index).all()
@pytest.mark.filterwarnings("ignore:The values in the array:RuntimeWarning")
def test_multiindex_column_append_multiple(self):
# GH 29699
df = DataFrame(
[[1, 11], [2, 12], [3, 13]],
columns=pd.MultiIndex.from_tuples(
[("multi", "col1"), ("multi", "col2")], names=["level1", None]
),
)
df2 = df.copy()
for i in range(1, 10):
df[i, "colA"] = 10
df = df._append(df2, ignore_index=True)
result = df["multi"]
expected = DataFrame(
{"col1": [1, 2, 3] * (i + 1), "col2": [11, 12, 13] * (i + 1)}
)
tm.assert_frame_equal(result, expected)
def test_append_raises_future_warning(self):
# GH#35407
df1 = DataFrame([[1, 2], [3, 4]])
df2 = DataFrame([[5, 6], [7, 8]])
with tm.assert_produces_warning(FutureWarning):
df1.append(df2)