"""Convert power timeseries of flows and volumes without breaking conservation laws."""
from typing import Literal
import numpy as np
import pandas as pd
from energy_analysis_toolbox.errors import (
EATEmptySourceError,
EATEmptyTargetsError,
EATInvalidTimestepDurationError,
)
from energy_analysis_toolbox.timeseries.extract_features.basics import (
index_to_timesteps,
timestep_durations,
)
from energy_analysis_toolbox.timeseries.resample.index_transformation import (
index_to_freq,
)
from energy_analysis_toolbox.timeseries.resample.interpolate import piecewise_affine
# =============================================================================
# Resampling with volume conservation
# =============================================================================
[docs]
def deduce_flows(
volumes: pd.Series,
last_step_duration: float | None = None,
) -> pd.Series:
"""Return a timeseries of "flow-rates" from one of an extensive variable.
Parameters
----------
volumes : pd.Series
The timeseries of "volumes" (X). In practice, any extensive variable which
"flows" during the overconsumption of the series.
The series is assumed to be indexed such that the value at a certain
index is the volume which flows *until the next index*.
last_step_duration : float, optional
Duration of the last time-step in the series in (s).
The default is |None| meaning that the same duration as the former-last
one is used.
Returns
-------
pd.Series
The series of flow-rates in (X.s-1).
Notes
-----
The flow-rate is simply obtained as the ratio between the volume flowing
during each time-step VS the time-step duration.
As the values in the series are volumes consumed until the next index, the
duration to be associated with the last element in the series is ambiguous.
The chosen value is controlled by ``last_step_duration`` argument.
"""
durations = timestep_durations(volumes, last_step_duration)
return volumes / durations
[docs]
def flow_rate_conservative(
flow_rates: pd.Series,
target_instants: pd.DatetimeIndex,
last_step_duration: float | None = None,
last_target_step_duration: float | None = None,
) -> pd.Series:
"""Return the series of flow-rate on target instants.
In what follows, flow-rates and volumes are considered in a broad meaning
of these terms :
- volumes represent amounts of an extensive variable which is subjected
to a conservation law, such as actual volume of water (under certain
assumptions), or mass, or energy;
- flow-rates represent flows of this variable, such as volumic flow-rate,
mass flow-rate or power.
Parameters
----------
flow_rates : pd.Series
Timeseries of flow-rates from which to interpolate in (X.s-1) where X
is any extensive variable.
target_instants : pd.DatetimeIndex
Instants at which the flow-rates values have to be returned.
last_step_duration : float, optional
Duration of the last time-step in the series in (s).
The default is |None| in which case the duration of the former-last
time-step is used.
last_target_step_duration : float, optional
Duration of the last time-step in the returned series in (s).
The default is |None| in which case the duration of the former-last
time-step is used.
This duration is used to deduce, if any, what proportion of the volume
consumption implicitly defined by the ``flow_rates`` series located
after the last target instant should be attributed to this instant.
.. important::
The ``flow_rates`` series is assumed to be indexed such that the value
at time ``ti`` is the flow-rate during the interval ``[ti, ti+1[`` until
the next value in the series.
.. seealso::
:py:func:`volume_conservative` which resamples the consumed volumes
on each interval.
Raises
------
EATEmptySourceError :
In case ``flow_rates`` is empty.
EATEmptyTargetsError :
In case ``target_instants`` is empty.
EATInvalidTimestepDurationError :
In case ``last_step_duration <= 0``
EATInvalidTimestepDurationError :
In case ``last_target_step_duration <= 0``
.. note ::
Uncaught zero-division error will be encountered in case there is a
null timestep in the source or target values.
Returns
-------
pd.Series :
Flow-rates values in interpolated at the target instants.
Notes
-----
The flow-rates are interpolated in such a way that the "volume is conserved".
This means that the volume obtained when integrating the resampled flow-rate
between two indices in the series should be the same as when computing this
volume from the initial series.
The algorithm used by the function is the following :
- First, the interval durations are computed. The last interval duration
ambiguity is removed thanks to ``last_step_duration``. [1.]
- A series of "volumes" consumed during each timestep is created. This
is the series of the flow-rate integrals over the time-step in the
series assuming the ``flow_rates`` series defines a piecewise constant
function. [2.]
- The volumes are resampled in a conservative way using
:py:func:`energy_analysis_toolbox.timeseries.power.volume_conservative`
function. [3.]
- The resulting flow-rates are deduced as the ratio between these volumes
and the interval durations of the target series. The duration of the
last interval is defined by ``last_target_step_duration``. [4.]
"""
if flow_rates.empty:
err = (
"Resampling an empty flow-rates series to new instants is an invalid "
"(undefined) operation."
)
raise EATEmptySourceError(err)
if target_instants.empty:
err = "Target instants must be provided for the series to be resampled."
raise EATEmptyTargetsError(err)
durations = timestep_durations(flow_rates, last_step=last_step_duration) # [1.]
volumes = flow_rates * durations # [2.]
interp_volumes = volume_conservative(
volumes,
target_instants,
last_step_duration=last_step_duration,
last_target_step_duration=last_target_step_duration,
) # [3.]
target_durations = index_to_timesteps(target_instants, last_target_step_duration)
interp_flow_rates = interp_volumes / target_durations # [4.]
interp_flow_rates.name = flow_rates.name
interp_flow_rates.index.name = flow_rates.index.name
return interp_flow_rates
[docs]
def volume_conservative(
volumes: pd.Series,
target_instants: pd.DatetimeIndex,
last_step_duration: float | None = None,
last_target_step_duration: float | None = None,
) -> pd.Series:
"""Resample the volume on target instants assuming it is a conservative variable.
In what follows, flow-rates and volumes are considered in a broad meaning
of these terms :
- volumes represent amounts of an extensive variable such as actual
volume of water, or mass, or energy;
- flow-rates represent flows of this variable, such as volumic flow-rate,
mass flow-rate or power.
The resampling is conservative in the sense that :
- assuming that ``volumes`` defines a piecewise constant function defined
on ``[closed, open[`` overconsumption (i.e. constant flow-rate between
timesteps),
- the integral of the function defined by the interpolated volume series
is always equal to which of the function defined by ``volumes`` on the
same support.
Parameters
----------
volumes : pd.Series
The timeseries of volumes in (X). X is the unit of an "extensive"
variable which can flow along time.
The series is assumed to be indexed such that the value at a certain
index is the volume consumed *until the next index*.
target_instants : pd.DatetimeIndex
Instants at which the flow-rates values have to be returned.
last_step_duration : float, optional
Duration of the last time-step in the ``volume`` series in (s).
The default is |None| in which case the duration of the former-last
time-step is used.
This duration is used to deduce the flow-rate in the last time-step.
last_target_step_duration : float, optional
Duration of the last time-step in the returned series in (s).
The default is |None| in which case the duration of the former-last
time-step is used.
This duration is used to deduce, if any, what proportion of the volume
in the ``volumes`` series located after the last target instant should
be attributed to this instant.
Returns
-------
pd.Series
The volumes series sampled on the overconsumption defined in
``target_instants``.
Raises
------
EATEmptySourceError :
In case ``volumes`` is empty.
EATEmptyTargetsError :
In case ``target_instants`` is empty.
EATInvalidTimestepDurationError :
In case ``last_step_duration <= 0``.
EATInvalidTimestepDurationError :
In case ``last_target_step_duration <= 0``.
Example
-------
This function can resample extensive variables (typically, volumes) which
consumption is spread along time using non-matching indices. It assumes a
constant flux in order to ensure the conservation of the integrated variable
on the new sampling overconsumption.
>>> volumes = pd.Series(
np.array([0, 0.1, 0.05, 0.08]),
index=pd.date_range("2021-12-15", periods=4, freq='6h')
)
>>> volumes
2021-12-15 00:00:00 0.00
2021-12-15 06:00:00 0.10
2021-12-15 12:00:00 0.05
2021-12-15 18:00:00 0.08
Freq: 6H, dtype: float64
>>> target_index = pd.date_range("2021-12-15", periods=3, freq='8h')
>>> volumes_obtained = volume_conservative(volumes, target_index)
>>> volumes_obtained
2021-12-15 00:00:00 0.033333
2021-12-15 08:00:00 0.100000
2021-12-15 16:00:00 0.096667
As the timeseries of extensive variables defined a consumption spread on
an interval, the last step duration is ambiguous. An arbitrary duration
can be enforced using the ``last_step_duration`` argument.
>>> volumes = pd.Series(
np.array([0.1]),
index=pd.date_range("2021-12-15", periods=1, freq='1d')
)
2021-12-15 0.1
Freq: D, dtype: float64
>>> volumes
>>> target_index = pd.date_range("2021-12-15", periods=4, freq='6h')
>>> volumes_obtained = volume_conservative(volumes, target_index,
last_step_duration=SK.day)
>>> volumes_obtained
2021-12-15 00:00:00 0.025
2021-12-15 06:00:00 0.025
2021-12-15 12:00:00 0.025
2021-12-15 18:00:00 0.025
Freq: 6H, dtype: float64
This is also true and applicable for the last target step duration.
>>> target_index = pd.date_range("2021-12-15", periods=1, freq='12h')
>>> volumes_obtained = volume_conservative(
volumes, target_index, last_step_duration=SK.day,
last_target_step_duration=SK.day / 2)
>>> volumes_obtained
2021-12-15 0.05
Freq: 12H, dtype: float64
Padding the source series with zeros until the desired end of resampling can
also ease removing the ambiguity on the "operative" part of the input data.
Notes
-----
The volumes are resampled in such a way that for ``ti`` and ``ti+1`` two
consecutive indices in ``target_instants``, the resampled volume associated
with ``ti`` is the volume "consumed" during ``[ti, ti+1[``, assuming a
constant flow-rate during the time-overconsumption defined in ``volumes``.
Accordingly, the function computes the resampled volumes as follows :
- Compute the cumulated sum of volumes for each time-step. [1.]
- Add a virtual timestep at the end of the volume series located
``last_step_duration`` after the last sample, with dummy value. [2.]
- As the ``volumes`` series was indexed in a way that element at time
``t`` was the volume which flows between ``t`` and the next timestep
the cumulated series is shifted by one step on the right and left-padded
with a zero. This way, element at position ``t`` is the total volume
which has flowed until time ``t``. [3.]
- Also add a virtual timestep in the target series so that the cumulated
sum at the implicit end of the target series is computed as well. [4.1]
- Interpolate the series of cumulated volumes as a piecewise affine
function. Target timesteps outside the convex span of the source-indices
(including the fictive one) are assigned the corresponding border value:
* 0 before the beginning of the ``volumes`` series
* The total cumulated volume after the end of the volume series
This is consistent with the fact that no volume flow is considered
outside the overconsumption defined in the series. [4.2]
- Compute elementwise differences of the cumulated volume series, which
returns the series of volume "consumed" between each timestep and the
previous one. [5.]
- Shift the obtained result one time-step backward in order to match the
volume-series sampling convention. [6.]
- Discard the last fictive time-step which has no meaning in this convention. [7.]
As the flow-rate is the time-derivative of the cumulated consumption curve,
using a piecewise linear interpolation is equivalent to assuming a constant
flow-rate during all the time-steps of the ``volumes`` series.
.. note::
**Why does this function work ?** I.e. why does it conserve the volume ?
**Short explanation "with hands" :**
The sampling convention of the ``volumes`` series is just an indirect way
of defining a piecewise constant flow-rate function.
The current method works by resampling exactly the integral of the
flow-rate function, to, then, recover a new volume series (piecewise
constant flow-rate function). By construction, the integral of the resulting
piecewise constant flow-rate function on the overconsumption of its support of
definition matches which of the original one
.. seealso::
:py:func:`flow_rate_conservative` which resamples the flow-rate on the
time-overconsumption in a volume-conservative way.
"""
if volumes.empty:
err = (
"Resampling an empty volumes series to new instants is an "
"invalid operation."
)
raise EATEmptySourceError(err)
if target_instants.empty:
err = "Target instants must be provided for the series to be resampled."
raise EATEmptyTargetsError(err)
if (
last_step_duration is not None
and last_step_duration <= 0
or last_target_step_duration is not None
and last_target_step_duration <= 0
):
err = "Last step duration cannot be zero."
raise EATInvalidTimestepDurationError(err)
vol_index = volumes.cumsum() # [1.]
# the function deals with None last_step_duration values
durations = timestep_durations(volumes.iloc[-2:], last_step=last_step_duration)
ghost_right = vol_index.index[-1] + pd.Timedelta(seconds=durations.iloc[-1]) # [2.]
vol_index.loc[ghost_right] = np.nan # np.nan to not break dtype
vol_index = vol_index.shift(1, fill_value=0.0) # [3.]
# the function deals with None last_target_step_duration
target_durations = index_to_timesteps(
target_instants[-2:],
last_target_step_duration,
)
target_instants = target_instants.insert(
target_instants.size, # at the end
target_instants[-1] + pd.Timedelta(seconds=target_durations[-1]),
) # [4.1]
interp_vol_index = piecewise_affine(vol_index, target_instants) # [4.2]
interp_volumes = interp_vol_index.diff().shift(-1).dropna() # [5.] [6.] [7.]
interp_volumes.name = volumes.name
interp_volumes.index.name = volumes.index.name
return interp_volumes
[docs]
def volume_to_freq(
series: "pd.Series[float]",
freq: str | pd.Timedelta,
origin: None | Literal["floor", "ceil"] | pd.Timestamp = None,
last_step_duration: float | None = None,
) -> "pd.Series[float]":
"""Return a series resampled to freq such that the volume is conserved.
The last step duration of the resampled series is set to the frequency ``freq``.
Parameters
----------
series : pd.Series
A series of values of a volume-alike quantity with a DatetimeIndex.
The sampling period can be constant or not (in this case, use
``last_step_duration`` argument).
freq : str | pd.Timedelta
the freq to which the series is resampled. Must be a valid
pandas frequency.
origin : {None, 'floor, 'ceil', pd.Timestamp}, optional
What origin should be used for the target resampling range.
See :py:func:`.index_to_freq` for details.
last_step_duration : float, optional
Duration of the last time-step in the ``series`` in (s).
The default is |None| in which case the duration of the former-last
time-step is used.
See :py:func:`.index_to_freq` for details.
Returns
-------
pd.Series
The resampled series.
.. seealso::
* :py:func:`volume_conservative` which resamples the flow-rate on the
target instants.
* :py:func:`flow_rate_to_freq` which resamples the volume on the giver
frequency.
"""
target_instants = index_to_freq(series.index, freq, origin, last_step_duration)
return volume_conservative(
series,
target_instants,
last_step_duration=last_step_duration,
last_target_step_duration=pd.Timedelta(freq).total_seconds(),
)
[docs]
def flow_rate_to_freq(
series: "pd.Series[float]",
freq: str | pd.Timedelta,
origin: None | Literal["floor", "ceil"] | pd.Timestamp = None,
last_step_duration: float | None = None,
) -> "pd.Series[float]":
"""Return a series resampled to freq such that the flow rate is conserved.
The last step duration of the resampled series is set to the frequency ``freq``.
Parameters
----------
series : pd.Series
A series of values of a flow_rate-alike quantity with a DatetimeIndex.
The sampling period can be constant or not (in this case, use
``last_step_duration`` argument).
freq : str, pd.Timedelta
the freq to which the series is resampled. Must be a valid
pandas frequency.
origin : {None, 'floor, 'ceil', pd.Timestamp}, optional
What origin should be used for the target resampling range.
See :py:func:`.index_to_freq` for details.
last_step_duration : float, optional
Duration of the last time-step in the ``series`` in (s).
The default is |None| in which case the duration of the former-last
time-step is used.
This duration is used to deduce the volume in the last time-step
as well as the last index of the resampled series.
Returns
-------
pd.Series
The resampled series.
.. seealso::
* :py:func:`flow_rate_conservative` which resamples the flow-rate on
the target instants.
* :py:func:`volume_to_freq` which resamples the volume on the giver frequency.
"""
target_instants = index_to_freq(series.index, freq, origin, last_step_duration)
return flow_rate_conservative(
series,
target_instants,
last_step_duration=last_step_duration,
last_target_step_duration=pd.Timedelta(freq).total_seconds(),
)