Source code for energy_analysis_toolbox.power.overconsumption.select
"""Selects overconsumption according to various criteria.
The possible criteria to select overconsumption are the following:
- :py:func:`by_individual_proportion` selects those which energy content (over
their reference) is beyond a certain proportion of the total overconsumption
energy. It can be used to keep only "big enough" overconsumptions.
- :py:func:`by_cumulated_proportion` selects the minimum set of overconsumption which
are necessary to explain a certain proportion of the total overconsumption
energy. It can be used when one wants to keep all the overconsumption which explain
"most of the overconsumption", but can include small non-significant overshoots
in certain cases.
- :py:func:`by_combined_proportions` is the combination of both previous approaches.
It enables the selection of the most significant overconsumption to explain "most of
the overconsumption".
"""
import pandas as pd
[docs]
def by_individual_proportion(
intervals_overshoot: pd.DataFrame,
proportion_tshd: float = 0.05,
energy_reference: float | None = None,
) -> pd.DataFrame:
"""Return overconsumption for which overshoot is above a proportion of the total.
Parameters
----------
intervals_overshoot : pd.DataFrame
A table of overshoot overconsumption with at least 'start', 'end' and 'energy'
columns.
proportion_tshd : float, optional
Proportion (in [0,1]) of the total energy of the intervals_overshoot.
An interval is conserved by the function only if it represents at least
(>=) this proportion of the total. Default is 5%.
energy_reference : float or None, optional
The total energy (in the same unit as the values in the ``"energy"``
column, relatively to which the proportions are computed. Default is None,
in which case the sum of the column values is used.
Returns
-------
pd.DataFrame :
A copy of the input dataframe with an additional "proportion" column, where only
the overconsumption which represent at least the specified proportion of the
total energy are conserved.
The returned overconsumption are sorted by decreasing order of overshoot energy.
"""
intervals_overshoot = intervals_overshoot.copy()
if energy_reference is None:
energy_reference = intervals_overshoot["energy"].sum()
# Select overconsumption with prominent overconsumption
intervals_overshoot["proportion"] = intervals_overshoot["energy"] / energy_reference
intervals_overshoot = intervals_overshoot.sort_values(
by="energy",
ascending=False,
)
return intervals_overshoot[intervals_overshoot["proportion"] >= proportion_tshd]
[docs]
def by_cumulated_proportion(
intervals_overshoot: pd.DataFrame,
proportion_tshd: float | None = 0.80,
energy_reference: float | None = None,
) -> pd.DataFrame:
"""Return overconsumption where total energy is a proportion of the overshoot.
Parameters
----------
intervals_overshoot : pd.DataFrame
A table of overshoot overconsumption with at least 'start', 'end' and 'energy'
columns.
proportion_tshd : float, optional
Proportion (in [0,1]) of the total energy of the intervals_overshoot.
Intervals are sorted by decreasing order of energy and conserved until
this proportion - at least (>=) - of the total energy is reached.
Default is 0.8, meaning that the minimum set of overconsumption which represent
at least 80% of the total overshoot energy is conserved.
energy_reference : float or None, optional
The total energy (in the same unit as the values in the ``"energy"``
column), relatively to which the proportions are computed. Default is None,
in which case the sum of the column values is used.
Returns
-------
pd.DataFrame :
A copy of the input dataframe with an additional "cum_energy_prop" column, where
only the overconsumption which represent together at least the specified
proportion of the total energy are conserved.
The returned overconsumption are sorted by decreasing order of overshoot energy.
"""
intervals_overshoot = intervals_overshoot.sort_values(by="energy", ascending=False)
if energy_reference is None:
energy_reference = intervals_overshoot["energy"].sum()
intervals_overshoot["cum_energy_prop"] = intervals_overshoot["energy"].cumsum()
intervals_overshoot["cum_energy_prop"] /= energy_reference
# ensure at least one value
last_selected = (
intervals_overshoot["cum_energy_prop"] < proportion_tshd
).argmin() + 1
return intervals_overshoot.iloc[:last_selected, :]
[docs]
def by_combined_proportions(
intervals_overshoot: pd.DataFrame,
proportion_tshd: float = 0.80,
proportion_indiv_tshd: float = 0.05,
energy_reference: float | None = None,
) -> pd.DataFrame:
"""Return overshoot intervals by their energy contribution based on proportions.
Parameters
----------
intervals_overshoot : pd.DataFrame
A table of overshoot overconsumption with at least 'start', 'end' and 'energy'
columns.
proportion_tshd : float, optional
Proportion (in [0,1]) of the total energy of the intervals_overshoot.
Intervals are sorted by decreasing order of energy and conserved until
this proportion - at least - of ``energy_reference`` is reached.
Default is 0.8, meaning that the minimum set of intervals which represent
at least 80% of the total overshoot energy is conserved.
proportion_indiv_tshd : float, optional
Proportion (in [0,1]) of the total energy of the intervals_overshoot.
An interval is conserved by the function only if it represents at least
this proportion of the ``energy_reference``. Default is 5%.
energy_reference : float or None, optional
The total energy (in the same unit as the values in the ``"energy"``
column, relatively to which the proportions are computed. Default is None,
in which case the sum of the column values is used.
Returns
-------
pd.DataFrame :
A copy of the input dataframe with additional "proportion" and "cum_energy_prop"
columns, where only the overconsumption which represent together and
individually at least the specified proportions of the total energy, are
conserved. The returned overconsumption are sorted by decreasing order of
overshoot energy.
"""
if energy_reference is None:
energy_reference = intervals_overshoot["energy"].sum()
intervals_overshoot = intervals_overshoot.sort_values(by="energy", ascending=False)
intervals_overshoot["cum_energy_prop"] = intervals_overshoot["energy"].cumsum()
intervals_overshoot["cum_energy_prop"] /= energy_reference
intervals_overshoot["proportion"] = intervals_overshoot["energy"] / energy_reference
# ensure at least one value
last_selected = (
intervals_overshoot["cum_energy_prop"] < proportion_tshd
).argmin() + 1
selected = intervals_overshoot.iloc[:last_selected, :]
return selected[selected["proportion"] >= proportion_indiv_tshd]