Source code for energy_analysis_toolbox.timeseries.create.from_intervals

"""Module converting tabular representations of interval data into timeseries."""

import numpy as np
import pandas as pd

from energy_analysis_toolbox import keywords as eatk



[docs]
def flatten_and_fill(
    data: pd.DataFrame,
    fill_values: dict | None = None,
    start_f: str = eatk.start_f,
    end_f: str = eatk.end_f,
    time_f: str = eatk.time_f,
) -> pd.DataFrame:
    """Return data as a table of timeseries.

    Parameters
    ----------
    data : pd.DataFrame
        A table of overconsumption which are each defined by a ``start_f``, used as
        index, and a ``end_f``.
    fill_values : dict or None
        A dict where keys are names of columns of ``data`` and values are
        used to fill the duration between the overconsumption for each of the column
        in the dict. Missing column receive ``np.nan``.
        Default is |None| which is treated as an empty dict.
    start_f : str, default |eatk.start_f|
        The name of the column defining interval starts.
    end_f : str, default |eatk.end_f|
        The name of the column defining interval ends.
    time_f : str, default |eatk.end_f|
        The name of the index of the returned Dataframe.

    Returns
    -------
    pd.DataFrame :
        A table describing the input data as a timeseries.
        The dataframe is indexed with the times of the interval starts and ends,
        such that the rows indexed with an interval end in the dataframe contains
        only filler values.


    .. note::

        An empty table is considered to be flattened as an empty table.


    .. warning::

        The function assumes that the overconsumption are correctly defined meaning
        that :

        - no interval overlap one each other,
        - overconsumption have no common boundary.

        The function does not check these assumptions.

    Example
    -------
    Consider e.g. the following dataframe (from the function tests).

    >>> table
                                        end col_str  col_float
    timestamp
    2022-12-05 18:00:00 2022-12-05 18:05:00    toto          0
    2022-12-05 18:10:00 2022-12-05 18:15:00    toto          2
    2022-12-05 18:20:00 2022-12-05 18:25:00    toto          4

    It can be flattened as follows :

    >>> flatten_and_fill(table)
                        col_str  col_float
    timestamp
    2022-12-05 18:00:00    toto        0.0
    2022-12-05 18:05:00     NaN        NaN
    2022-12-05 18:10:00    toto        2.0
    2022-12-05 18:15:00     NaN        NaN
    2022-12-05 18:20:00    toto        4.0
    2022-12-05 18:25:00     NaN        NaN

    The ``nan`` values can be filled with fixed values for each column during
    the flattening process :

    >>> flatten_and_fill(table, fill_values={'col_str': 'tata', 'col_float': 0})
                        col_str  col_float
    timestamp
    2022-12-05 18:00:00    toto          0
    2022-12-05 18:05:00    tata          0
    2022-12-05 18:10:00    toto          2
    2022-12-05 18:15:00    tata          0
    2022-12-05 18:20:00    toto          4
    2022-12-05 18:25:00    tata          0

    """
    if data.empty:
        return data
    fillers = data.set_index(end_f).drop(columns=[start_f])
    fill_inputs = {col: np.nan for col in fillers.columns}
    if fill_values is not None:
        fill_inputs.update(fill_values)
        for col, value in fill_inputs.items():
            new_col = pd.Series(
                data=value,
                index=fillers.index,
                name=col,
                dtype=fillers.dtypes[col],
            )
            fillers.loc[:, col] = new_col
    else:
        for col in fillers.columns:
            fillers.loc[:, col] = pd.Series(
                data=np.nan,
                index=fillers.index,
                name=col,
            ).astype(fillers.dtypes[col])
    timeseries_table = pd.concat(
        [data.set_index(start_f).drop(columns=[end_f]), fillers],
    ).sort_index()
    timeseries_table.index.name = time_f
    return timeseries_table