Source code for energy_analysis_toolbox.thermosensitivity.daily_analysis

"""Analyze daily-sampled thermosensitivity data."""

import logging
from collections.abc import Callable

import numpy as np
import pandas as pd

from energy_analysis_toolbox.weather.degree_days import (
    literal_computation_dd_types,
    literal_dd_types,
)

from .thermosensitivity import CategoricalThermoSensitivity



[docs]
class DailyCategoricalThermoSensitivity(
    CategoricalThermoSensitivity,
):
    """Class for daily analysis of thermosensitivity data.

    Based on CategoricalThermoSensitivity, it is made to categorize the days.

    Example:
    --------
    See :py:class:`DayOfWeekCategoricalThermoSensitivity`

    """

    def __init__(
        self,
        energy_data: pd.Series,
        temperature_data: pd.Series,
        categories_func: Callable[[pd.DatetimeIndex], pd.Series],
        degree_days_type: literal_dd_types = "heating",
        degree_days_base_temperature: dict | None = None,
        degree_days_computation_method: literal_computation_dd_types = "integral",
        interseason_mean_temperature: float = 20,
        base_logger_name: str | None = None,
        min_logger_level_stdout: int | str = logging.ERROR,
    ) -> None:
        """Initialize a ``DailyCategoricalThermoSensitivity`` instance.

        Parameters
        ----------
        energy_data : pd.Series
            Time series of energy consumption data for the building.
        temperature_data : pd.Series
            Time series of outdoor temperature data.
        categories_func : Callable[[pd.DatetimeIndex], pd.Series]
            A function that takes a ``pd.DatetimeIndex`` (representing days in the
            analysis period) and returns a ``pd.Series`` that assigns a category to
            each day. This allows the analysis to group energy data by custom-defined
            categories (e.g., weekdays vs. weekends, seasons).
        degree_days_type : str, optional
            Type of degree days to compute (default is "heating").
            Options are:

            - "heating": heating degree days.
            - "cooling": cooling degree days.
            - "both": both heating and cooling degree days.
            - "auto": automatically detect the degree days type.

        degree_days_base_temperature : dict, optional
            Base temperature(s) for degree day calculations (default is an empty dict).
            Should include keys "heating" and/or "cooling".
        degree_days_computation_method : str, optional
            Method to compute degree days (default is "integral").
            Options are:

            - "integral": integral calculation.
            - "mean": mean temperature calculation.
            - "min_max": min-max temperature calculation.
            - "pro": energy-professionals calculation.

        interseason_mean_temperature : float, optional
            Mean temperature to differentiate heating and cooling periods (default 20).
        base_logger_name : str, optional
            Name of the logger. By default, it is the class name. All following
            following instances receive a unique identifier, based on the first
            one, with the pattern:

            - ``<base_logger_name>``
            - ``<base_logger_name>_1``
            - ...

        min_logger_level_stdout: str, int, optional
            Minimum logger level below which no message is transferred to stdout
            (i.e. not printed). Default is ``"ERROR"``.

        """
        frequency = "1D"
        degree_days_base_temperature = degree_days_base_temperature or {}
        start_ts = min(energy_data.index.min(), temperature_data.index.min())
        end_ts = max(energy_data.index.max(), temperature_data.index.max())
        days = pd.date_range(
            start=start_ts,
            end=end_ts,
            freq=frequency,
            inclusive="both",
        )
        categories = categories_func(days)
        super().__init__(
            energy_data=energy_data,
            temperature_data=temperature_data,
            categories=categories,
            frequency=frequency,
            degree_days_type=degree_days_type,
            degree_days_base_temperature=degree_days_base_temperature,
            degree_days_computation_method=degree_days_computation_method,
            interseason_mean_temperature=interseason_mean_temperature,
            base_logger_name=base_logger_name,
            min_logger_level_stdout=min_logger_level_stdout,
        )




[docs]
class DayOfWeekCategoricalThermoSensitivity(
    DailyCategoricalThermoSensitivity,
):
    """Models independently the 7 days of the week.

    Based on :py:class:`DailyCategoricalThermoSensitivity`.
    """

    def __init__(
        self,
        energy_data: pd.Series,
        temperature_data: pd.Series,
        degree_days_type: literal_dd_types = "heating",
        degree_days_base_temperature: dict | None = None,
        degree_days_computation_method: literal_computation_dd_types = "integral",
        interseason_mean_temperature: float = 20,
        base_logger_name: str | None = None,
        min_logger_level_stdout: int | str = logging.ERROR,
    ) -> None:
        """Initialize a ``DayOfWeekCategoricalThermoSensitivity`` instance.

        Parameters
        ----------
        energy_data : pd.Series
            Time series of energy consumption data for the building.
        temperature_data : pd.Series
            Time series of outdoor temperature data.
        degree_days_type : str, optional
            Type of degree days to compute (default is "heating").
            Options are:

            - "heating": heating degree days.
            - "cooling": cooling degree days.
            - "both": both heating and cooling degree days.
            - "auto": automatically detect the degree days type.

        degree_days_base_temperature : dict, optional
            Base temperature(s) for degree day calculations (default is an empty dict).
            Should include keys "heating" and/or "cooling".
        degree_days_computation_method : str, optional
            Method to compute degree days (default is "integral").
            Options are:

            - "integral": integral calculation.
            - "mean": mean temperature calculation.
            - "min_max": min-max temperature calculation.
            - "pro": energy-professionals calculation.

        interseason_mean_temperature : float, optional
            Mean temperature to differentiate heating and cooling periods (default 20).
        base_logger_name : str, optional
            Name of the logger. By default, it is the class name. All following
            following instances receive a unique identifier, based on the first
            one, with the pattern:

            - ``<base_logger_name>``
            - ``<base_logger_name>_1``
            - ...

        min_logger_level_stdout: str, int, optional
            Minimum logger level below which no message is transferred to stdout
            (i.e. not printed). Default is ``"ERROR"``.

        """
        degree_days_base_temperature = degree_days_base_temperature or {}

        def day_of_week_categoriser(
            index: pd.DatetimeIndex,
        ) -> pd.Series:
            """Return a series of categories based on the day of the week of the index.

            Parameters
            ----------
            index : pd.DatetimeIndex
                A datetime index representing the dates to categorize.

            Returns
            -------
            pd.Series
                A pandas Series where each entry is the name of the day of the week
                corresponding to the respective index value.

            Examples
            --------
            >>> index = pd.date_range(start="2023-01-01", periods=7, freq="D")
            >>> day_of_week_categoriser(index)
            2023-01-01       Sunday
            2023-01-02       Monday
            2023-01-03      Tuesday
            2023-01-04    Wednesday
            2023-01-05     Thursday
            2023-01-06       Friday
            2023-01-07     Saturday
            Freq: D, dtype: object

            """
            return pd.Series(index=index, data=index.day_name())

        super().__init__(
            energy_data=energy_data,
            temperature_data=temperature_data,
            categories_func=day_of_week_categoriser,
            degree_days_type=degree_days_type,
            degree_days_base_temperature=degree_days_base_temperature,
            degree_days_computation_method=degree_days_computation_method,
            interseason_mean_temperature=interseason_mean_temperature,
            base_logger_name=base_logger_name,
            min_logger_level_stdout=min_logger_level_stdout,
        )




[docs]
class AutoCategoricalThermoSensitivity(
    DayOfWeekCategoricalThermoSensitivity,
):
    """Automatically categorizes thermosensitivity data based on predefined criteria.

    Based on :py:class:`DayOfWeekCategoricalThermoSensitivity`.
    """

    @property
    def categories(
        self,
    ) -> pd.Series:
        """The categories of the periods."""
        return self._categories

    @categories.setter
    def categories(
        self,
        value: pd.Series,
    ) -> None:
        """Set the categories and reset associated cached data.

        This method assigns new category labels to the ``categories`` attribute and
        resets the cached data that depends on these categories, ensuring that all
        computations are up to date with the new categorization.

        Parameters
        ----------
        value : pd.Series
            A pandas Series representing the new categories to assign.

        .. note::
            Setting new categories automatically resets the following internal
            attributes:

            - ``resampled_energy_temperature_category``: Cached energy-temperature data
            that is resampled by category.
            - ``resampled_categories``: Cached resampled category values.
            - ``_aggregated_data``: Cached aggregated data, if any.

            These attributes are recalculated upon the next request, ensuring
            consistency with the newly assigned categories.

        """
        self._categories = value
        self.__dict__.pop("resampled_energy_temperature_category", None)
        self.__dict__.pop("resampled_categories", None)
        self._aggregated_data = None


[docs]
    def new_categories(
        self,
        significant_level: float = 0.1,
    ) -> dict:
        """Return new category mappings based on interaction term significance.

        This method identifies significant differences between the thermosensitivity
        of each category and provides new mappings that merge similar categories.
        Categories are grouped based on the significance of interaction terms
        in the model, which helps reduce complexity while retaining meaningful
        distinctions.

        Parameters
        ----------
        significant_level : float, optional
            The significance level for the Wald test (a p-value below this level
            is considered significant). Must be between 0 and 1. The higher the
            value, the more categories will be kept separate. Lower values will
            merge categories that are not significantly different.

        Returns
        -------
        dict
            A dictionary mapping old categories to new merged categories.
            The new labels are concatenated with a "-" separator to indicate
            merged groups.

        Notes
        -----
        - The new categories are based on the result of multiple Wald tests conducted
          between interaction terms for each category.
        - The returned dictionary allows for updating the category labels to reflect
          merged groupings that exhibit similar behavior.


        Example
        -------
        >>> auto = AutoCategoricalThermoSensitivity(...)
        >>> auto.fit()
        >>> auto.new_categories(significant_level=0.1)
        {'Monday': 'Monday-Wednesday-Sunday',
         'Tuesday': 'Tuesday',
         'Wednesday': 'Monday-Wednesday-Sunday',
         'Thursday': 'Thursday',
         'Friday': 'Friday',
         'Saturday': 'Monday-Wednesday-Sunday',
         'Sunday': 'Sunday'
        }

        """
        categories_sorted = [
            "Monday",
            "Tuesday",
            "Wednesday",
            "Thursday",
            "Friday",
            "Saturday",
            "Sunday",
        ]
        categories = self.resampled_categories.unique()
        predictors = [*self.predictors, "Intercept"]
        new_categories_mapping = {str(term): [str(term)] for term in categories}
        for i, cat_term1 in enumerate(categories):
            for _, cat_term2 in enumerate(categories[i + 1 :]):
                is_same_group = True
                for pred in predictors:
                    interaction_term1 = pred + ":" + cat_term1
                    interaction_term2 = pred + ":" + cat_term2
                    contrast_matrix = np.zeros((1, len(self.model.params)))
                    contrast_matrix[
                        0,
                        self.model.params.index.get_loc(interaction_term1),
                    ] = 1
                    contrast_matrix[
                        0,
                        self.model.params.index.get_loc(interaction_term2),
                    ] = -1
                    wald_test = self.model.wald_test(contrast_matrix, scalar=True)
                    if wald_test.pvalue < significant_level:
                        is_same_group &= False
                if is_same_group:
                    new_categories_mapping[cat_term1].append(cat_term2)
                    new_categories_mapping[cat_term2] = new_categories_mapping[
                        cat_term1
                    ]
        reduced_mapping = {
            k: sorted(set(v), key=lambda d: categories_sorted.index(d))
            for k, v in new_categories_mapping.items()
        }
        return {k: "-".join(v) for k, v in reduced_mapping.items()}



[docs]
    def merge_and_fit(
        self,
        significant_level: float = 0.1,
    ) -> None:
        """Merge similar categories and fit the model with updated categories.

        This method merges categories that exhibit similar thermosensitivity based
        on their interaction term significance and then refits the model using
        the updated categorization. This helps reduce model complexity by grouping
        categories with similar behavior.

        Parameters
        ----------
        significant_level : float, optional
            The significance level for the Wald test (a p-value below this level
            is considered significant). Must be between 0 and 1. The higher the value,
            the more categories will be kept separate. Lower values will merge
            categories that are not significantly different.

        Returns
        -------
        None

        Notes
        -----
        - The method first calculates new categories using the ``new_categories``
          method, then assigns these new categories and fits the model to reflect the
          updated categorization.
        - This process is especially useful for reducing overfitting by combining
          similar days (e.g., merging weekdays that do not show significant
          thermosensitivity differences).

        """
        new_cats_maps = self.new_categories(significant_level=significant_level)
        self.categories = self.categories.map(new_cats_maps)
        self.fit()