A PyQT GUI application for converting InfoLease report outputs into Excel files. Handles parsing and summarizing. Learns where files are meant to be store and compiles monthly and yearly summaries.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
InfoLeaseExtract/venv/Lib/site-packages/pandas/plotting/_matplotlib/groupby.py

134 lines
3.9 KiB

from __future__ import annotations
import numpy as np
from pandas._typing import (
Dict,
IndexLabel,
)
from pandas.core.dtypes.missing import remove_na_arraylike
from pandas import (
DataFrame,
MultiIndex,
Series,
concat,
)
def create_iter_data_given_by(
data: DataFrame, kind: str = "hist"
) -> Dict[str, DataFrame | Series]:
"""
Create data for iteration given `by` is assigned or not, and it is only
used in both hist and boxplot.
If `by` is assigned, return a dictionary of DataFrames in which the key of
dictionary is the values in groups.
If `by` is not assigned, return input as is, and this preserves current
status of iter_data.
Parameters
----------
data : reformatted grouped data from `_compute_plot_data` method.
kind : str, plot kind. This function is only used for `hist` and `box` plots.
Returns
-------
iter_data : DataFrame or Dictionary of DataFrames
Examples
--------
If `by` is assigned:
>>> import numpy as np
>>> tuples = [('h1', 'a'), ('h1', 'b'), ('h2', 'a'), ('h2', 'b')]
>>> mi = MultiIndex.from_tuples(tuples)
>>> value = [[1, 3, np.nan, np.nan],
... [3, 4, np.nan, np.nan], [np.nan, np.nan, 5, 6]]
>>> data = DataFrame(value, columns=mi)
>>> create_iter_data_given_by(data)
{'h1': h1
a b
0 1.0 3.0
1 3.0 4.0
2 NaN NaN, 'h2': h2
a b
0 NaN NaN
1 NaN NaN
2 5.0 6.0}
"""
# For `hist` plot, before transformation, the values in level 0 are values
# in groups and subplot titles, and later used for column subselection and
# iteration; For `box` plot, values in level 1 are column names to show,
# and are used for iteration and as subplots titles.
if kind == "hist":
level = 0
else:
level = 1
# Select sub-columns based on the value of level of MI, and if `by` is
# assigned, data must be a MI DataFrame
assert isinstance(data.columns, MultiIndex)
return {
col: data.loc[:, data.columns.get_level_values(level) == col]
for col in data.columns.levels[level]
}
def reconstruct_data_with_by(
data: DataFrame, by: IndexLabel, cols: IndexLabel
) -> DataFrame:
"""
Internal function to group data, and reassign multiindex column names onto the
result in order to let grouped data be used in _compute_plot_data method.
Parameters
----------
data : Original DataFrame to plot
by : grouped `by` parameter selected by users
cols : columns of data set (excluding columns used in `by`)
Returns
-------
Output is the reconstructed DataFrame with MultiIndex columns. The first level
of MI is unique values of groups, and second level of MI is the columns
selected by users.
Examples
--------
>>> d = {'h': ['h1', 'h1', 'h2'], 'a': [1, 3, 5], 'b': [3, 4, 6]}
>>> df = DataFrame(d)
>>> reconstruct_data_with_by(df, by='h', cols=['a', 'b'])
h1 h2
a b a b
0 1.0 3.0 NaN NaN
1 3.0 4.0 NaN NaN
2 NaN NaN 5.0 6.0
"""
grouped = data.groupby(by)
data_list = []
for key, group in grouped:
columns = MultiIndex.from_product([[key], cols])
sub_group = group[cols]
sub_group.columns = columns
data_list.append(sub_group)
data = concat(data_list, axis=1)
return data
def reformat_hist_y_given_by(
y: Series | np.ndarray, by: IndexLabel | None
) -> Series | np.ndarray:
"""Internal function to reformat y given `by` is applied or not for hist plot.
If by is None, input y is 1-d with NaN removed; and if by is not None, groupby
will take place and input y is multi-dimensional array.
"""
if by is not None and len(y.shape) > 1:
return np.array([remove_na_arraylike(col) for col in y.T]).T
return remove_na_arraylike(y)