A PyQT GUI application for converting InfoLease report outputs into Excel files. Handles parsing and summarizing. Learns where files are meant to be store and compiles monthly and yearly summaries.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
InfoLeaseExtract/venv/Lib/site-packages/pandas/tests/frame/methods/test_diff.py

303 lines
9.6 KiB

import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
Series,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestDataFrameDiff:
def test_diff_requires_integer(self):
df = DataFrame(np.random.randn(2, 2))
with pytest.raises(ValueError, match="periods must be an integer"):
df.diff(1.5)
# GH#44572 np.int64 is accepted
@pytest.mark.parametrize("num", [1, np.int64(1)])
def test_diff(self, datetime_frame, num):
df = datetime_frame
the_diff = df.diff(num)
expected = df["A"] - df["A"].shift(num)
tm.assert_series_equal(the_diff["A"], expected)
def test_diff_int_dtype(self):
# int dtype
a = 10_000_000_000_000_000
b = a + 1
ser = Series([a, b])
rs = DataFrame({"s": ser}).diff()
assert rs.s[1] == 1
def test_diff_mixed_numeric(self, datetime_frame):
# mixed numeric
tf = datetime_frame.astype("float32")
the_diff = tf.diff(1)
tm.assert_series_equal(the_diff["A"], tf["A"] - tf["A"].shift(1))
def test_diff_axis1_nonconsolidated(self):
# GH#10907
df = DataFrame({"y": Series([2]), "z": Series([3])})
df.insert(0, "x", 1)
result = df.diff(axis=1)
expected = DataFrame({"x": np.nan, "y": Series(1), "z": Series(1)})
tm.assert_frame_equal(result, expected)
def test_diff_timedelta64_with_nat(self):
# GH#32441
arr = np.arange(6).reshape(3, 2).astype("timedelta64[ns]")
arr[:, 0] = np.timedelta64("NaT", "ns")
df = DataFrame(arr)
result = df.diff(1, axis=0)
expected = DataFrame({0: df[0], 1: [pd.NaT, pd.Timedelta(2), pd.Timedelta(2)]})
tm.assert_equal(result, expected)
result = df.diff(0)
expected = df - df
assert expected[0].isna().all()
tm.assert_equal(result, expected)
result = df.diff(-1, axis=1)
expected = df * np.nan
tm.assert_equal(result, expected)
@pytest.mark.parametrize("tz", [None, "UTC"])
def test_diff_datetime_axis0_with_nat(self, tz):
# GH#32441
dti = pd.DatetimeIndex(["NaT", "2019-01-01", "2019-01-02"], tz=tz)
ser = Series(dti)
df = ser.to_frame()
result = df.diff()
ex_index = pd.TimedeltaIndex([pd.NaT, pd.NaT, pd.Timedelta(days=1)])
expected = Series(ex_index).to_frame()
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("tz", [None, "UTC"])
def test_diff_datetime_with_nat_zero_periods(self, tz):
# diff on NaT values should give NaT, not timedelta64(0)
dti = date_range("2016-01-01", periods=4, tz=tz)
ser = Series(dti)
df = ser.to_frame()
df[1] = ser.copy()
df.iloc[:, 0] = pd.NaT
expected = df - df
assert expected[0].isna().all()
result = df.diff(0, axis=0)
tm.assert_frame_equal(result, expected)
result = df.diff(0, axis=1)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("tz", [None, "UTC"])
def test_diff_datetime_axis0(self, tz):
# GH#18578
df = DataFrame(
{
0: date_range("2010", freq="D", periods=2, tz=tz),
1: date_range("2010", freq="D", periods=2, tz=tz),
}
)
result = df.diff(axis=0)
expected = DataFrame(
{
0: pd.TimedeltaIndex(["NaT", "1 days"]),
1: pd.TimedeltaIndex(["NaT", "1 days"]),
}
)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("tz", [None, "UTC"])
def test_diff_datetime_axis1(self, tz):
# GH#18578
df = DataFrame(
{
0: date_range("2010", freq="D", periods=2, tz=tz),
1: date_range("2010", freq="D", periods=2, tz=tz),
}
)
result = df.diff(axis=1)
expected = DataFrame(
{
0: pd.TimedeltaIndex(["NaT", "NaT"]),
1: pd.TimedeltaIndex(["0 days", "0 days"]),
}
)
tm.assert_frame_equal(result, expected)
def test_diff_timedelta(self):
# GH#4533
df = DataFrame(
{
"time": [Timestamp("20130101 9:01"), Timestamp("20130101 9:02")],
"value": [1.0, 2.0],
}
)
res = df.diff()
exp = DataFrame(
[[pd.NaT, np.nan], [pd.Timedelta("00:01:00"), 1]], columns=["time", "value"]
)
tm.assert_frame_equal(res, exp)
def test_diff_mixed_dtype(self):
df = DataFrame(np.random.randn(5, 3))
df["A"] = np.array([1, 2, 3, 4, 5], dtype=object)
result = df.diff()
assert result[0].dtype == np.float64
def test_diff_neg_n(self, datetime_frame):
rs = datetime_frame.diff(-1)
xp = datetime_frame - datetime_frame.shift(-1)
tm.assert_frame_equal(rs, xp)
def test_diff_float_n(self, datetime_frame):
rs = datetime_frame.diff(1.0)
xp = datetime_frame.diff(1)
tm.assert_frame_equal(rs, xp)
def test_diff_axis(self):
# GH#9727
df = DataFrame([[1.0, 2.0], [3.0, 4.0]])
tm.assert_frame_equal(
df.diff(axis=1), DataFrame([[np.nan, 1.0], [np.nan, 1.0]])
)
tm.assert_frame_equal(
df.diff(axis=0), DataFrame([[np.nan, np.nan], [2.0, 2.0]])
)
def test_diff_period(self):
# GH#32995 Don't pass an incorrect axis
pi = date_range("2016-01-01", periods=3).to_period("D")
df = DataFrame({"A": pi})
result = df.diff(1, axis=1)
expected = (df - pd.NaT).astype(object)
tm.assert_frame_equal(result, expected)
def test_diff_axis1_mixed_dtypes(self):
# GH#32995 operate column-wise when we have mixed dtypes and axis=1
df = DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)})
expected = DataFrame({"A": [np.nan, np.nan, np.nan], "B": df["B"] / 2})
result = df.diff(axis=1)
tm.assert_frame_equal(result, expected)
# GH#21437 mixed-float-dtypes
df = DataFrame(
{"a": np.arange(3, dtype="float32"), "b": np.arange(3, dtype="float64")}
)
result = df.diff(axis=1)
expected = DataFrame({"a": df["a"] * np.nan, "b": df["b"] * 0})
tm.assert_frame_equal(result, expected)
def test_diff_axis1_mixed_dtypes_large_periods(self):
# GH#32995 operate column-wise when we have mixed dtypes and axis=1
df = DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)})
expected = df * np.nan
result = df.diff(axis=1, periods=3)
tm.assert_frame_equal(result, expected)
def test_diff_axis1_mixed_dtypes_negative_periods(self):
# GH#32995 operate column-wise when we have mixed dtypes and axis=1
df = DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)})
expected = DataFrame({"A": -1.0 * df["A"], "B": df["B"] * np.nan})
result = df.diff(axis=1, periods=-1)
tm.assert_frame_equal(result, expected)
def test_diff_sparse(self):
# GH#28813 .diff() should work for sparse dataframes as well
sparse_df = DataFrame([[0, 1], [1, 0]], dtype="Sparse[int]")
result = sparse_df.diff()
expected = DataFrame(
[[np.nan, np.nan], [1.0, -1.0]], dtype=pd.SparseDtype("float", 0.0)
)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"axis,expected",
[
(
0,
DataFrame(
{
"a": [np.nan, 0, 1, 0, np.nan, np.nan, np.nan, 0],
"b": [np.nan, 1, np.nan, np.nan, -2, 1, np.nan, np.nan],
"c": np.repeat(np.nan, 8),
"d": [np.nan, 3, 5, 7, 9, 11, 13, 15],
},
dtype="Int64",
),
),
(
1,
DataFrame(
{
"a": np.repeat(np.nan, 8),
"b": [0, 1, np.nan, 1, np.nan, np.nan, np.nan, 0],
"c": np.repeat(np.nan, 8),
"d": np.repeat(np.nan, 8),
},
dtype="Int64",
),
),
],
)
def test_diff_integer_na(self, axis, expected):
# GH#24171 IntegerNA Support for DataFrame.diff()
df = DataFrame(
{
"a": np.repeat([0, 1, np.nan, 2], 2),
"b": np.tile([0, 1, np.nan, 2], 2),
"c": np.repeat(np.nan, 8),
"d": np.arange(1, 9) ** 2,
},
dtype="Int64",
)
# Test case for default behaviour of diff
result = df.diff(axis=axis)
tm.assert_frame_equal(result, expected)
def test_diff_readonly(self):
# https://github.com/pandas-dev/pandas/issues/35559
arr = np.random.randn(5, 2)
arr.flags.writeable = False
df = DataFrame(arr)
result = df.diff()
expected = DataFrame(np.array(df)).diff()
tm.assert_frame_equal(result, expected)
def test_diff_all_int_dtype(self, any_int_numpy_dtype):
# GH 14773
df = DataFrame(range(5))
df = df.astype(any_int_numpy_dtype)
result = df.diff()
expected_dtype = (
"float32" if any_int_numpy_dtype in ("int8", "int16") else "float64"
)
expected = DataFrame([np.nan, 1.0, 1.0, 1.0, 1.0], dtype=expected_dtype)
tm.assert_frame_equal(result, expected)