Source code for energy_analysis_toolbox.thermosensitivity.daily_analysis

"""Analyze daily-sampled thermosensitivity data."""

import logging
from collections.abc import Callable

import numpy as np
import pandas as pd

from energy_analysis_toolbox.weather.degree_days import (
    literal_computation_dd_types,
    literal_dd_types,
)

from .thermosensitivity import CategoricalThermoSensitivity


[docs] class DailyCategoricalThermoSensitivity( CategoricalThermoSensitivity, ): """Class for daily analysis of thermosensitivity data. Based on CategoricalThermoSensitivity, it is made to categorize the days. Example: -------- See :py:class:`DayOfWeekCategoricalThermoSensitivity` """ def __init__( self, energy_data: pd.Series, temperature_data: pd.Series, categories_func: Callable[[pd.DatetimeIndex], pd.Series], degree_days_type: literal_dd_types = "heating", degree_days_base_temperature: dict | None = None, degree_days_computation_method: literal_computation_dd_types = "integral", interseason_mean_temperature: float = 20, base_logger_name: str | None = None, min_logger_level_stdout: int | str = logging.ERROR, ) -> None: """Initialize a ``DailyCategoricalThermoSensitivity`` instance. Parameters ---------- energy_data : pd.Series Time series of energy consumption data for the building. temperature_data : pd.Series Time series of outdoor temperature data. categories_func : Callable[[pd.DatetimeIndex], pd.Series] A function that takes a ``pd.DatetimeIndex`` (representing days in the analysis period) and returns a ``pd.Series`` that assigns a category to each day. This allows the analysis to group energy data by custom-defined categories (e.g., weekdays vs. weekends, seasons). degree_days_type : str, optional Type of degree days to compute (default is "heating"). Options are: - "heating": heating degree days. - "cooling": cooling degree days. - "both": both heating and cooling degree days. - "auto": automatically detect the degree days type. degree_days_base_temperature : dict, optional Base temperature(s) for degree day calculations (default is an empty dict). Should include keys "heating" and/or "cooling". degree_days_computation_method : str, optional Method to compute degree days (default is "integral"). Options are: - "integral": integral calculation. - "mean": mean temperature calculation. - "min_max": min-max temperature calculation. - "pro": energy-professionals calculation. interseason_mean_temperature : float, optional Mean temperature to differentiate heating and cooling periods (default 20). base_logger_name : str, optional Name of the logger. By default, it is the class name. All following following instances receive a unique identifier, based on the first one, with the pattern: - ``<base_logger_name>`` - ``<base_logger_name>_1`` - ... min_logger_level_stdout: str, int, optional Minimum logger level below which no message is transferred to stdout (i.e. not printed). Default is ``"ERROR"``. """ frequency = "1D" degree_days_base_temperature = degree_days_base_temperature or {} start_ts = min(energy_data.index.min(), temperature_data.index.min()) end_ts = max(energy_data.index.max(), temperature_data.index.max()) days = pd.date_range( start=start_ts, end=end_ts, freq=frequency, inclusive="both", ) categories = categories_func(days) super().__init__( energy_data=energy_data, temperature_data=temperature_data, categories=categories, frequency=frequency, degree_days_type=degree_days_type, degree_days_base_temperature=degree_days_base_temperature, degree_days_computation_method=degree_days_computation_method, interseason_mean_temperature=interseason_mean_temperature, base_logger_name=base_logger_name, min_logger_level_stdout=min_logger_level_stdout, )
[docs] class DayOfWeekCategoricalThermoSensitivity( DailyCategoricalThermoSensitivity, ): """Models independently the 7 days of the week. Based on :py:class:`DailyCategoricalThermoSensitivity`. """ def __init__( self, energy_data: pd.Series, temperature_data: pd.Series, degree_days_type: literal_dd_types = "heating", degree_days_base_temperature: dict | None = None, degree_days_computation_method: literal_computation_dd_types = "integral", interseason_mean_temperature: float = 20, base_logger_name: str | None = None, min_logger_level_stdout: int | str = logging.ERROR, ) -> None: """Initialize a ``DayOfWeekCategoricalThermoSensitivity`` instance. Parameters ---------- energy_data : pd.Series Time series of energy consumption data for the building. temperature_data : pd.Series Time series of outdoor temperature data. degree_days_type : str, optional Type of degree days to compute (default is "heating"). Options are: - "heating": heating degree days. - "cooling": cooling degree days. - "both": both heating and cooling degree days. - "auto": automatically detect the degree days type. degree_days_base_temperature : dict, optional Base temperature(s) for degree day calculations (default is an empty dict). Should include keys "heating" and/or "cooling". degree_days_computation_method : str, optional Method to compute degree days (default is "integral"). Options are: - "integral": integral calculation. - "mean": mean temperature calculation. - "min_max": min-max temperature calculation. - "pro": energy-professionals calculation. interseason_mean_temperature : float, optional Mean temperature to differentiate heating and cooling periods (default 20). base_logger_name : str, optional Name of the logger. By default, it is the class name. All following following instances receive a unique identifier, based on the first one, with the pattern: - ``<base_logger_name>`` - ``<base_logger_name>_1`` - ... min_logger_level_stdout: str, int, optional Minimum logger level below which no message is transferred to stdout (i.e. not printed). Default is ``"ERROR"``. """ degree_days_base_temperature = degree_days_base_temperature or {} def day_of_week_categoriser( index: pd.DatetimeIndex, ) -> pd.Series: """Return a series of categories based on the day of the week of the index. Parameters ---------- index : pd.DatetimeIndex A datetime index representing the dates to categorize. Returns ------- pd.Series A pandas Series where each entry is the name of the day of the week corresponding to the respective index value. Examples -------- >>> index = pd.date_range(start="2023-01-01", periods=7, freq="D") >>> day_of_week_categoriser(index) 2023-01-01 Sunday 2023-01-02 Monday 2023-01-03 Tuesday 2023-01-04 Wednesday 2023-01-05 Thursday 2023-01-06 Friday 2023-01-07 Saturday Freq: D, dtype: object """ return pd.Series(index=index, data=index.day_name()) super().__init__( energy_data=energy_data, temperature_data=temperature_data, categories_func=day_of_week_categoriser, degree_days_type=degree_days_type, degree_days_base_temperature=degree_days_base_temperature, degree_days_computation_method=degree_days_computation_method, interseason_mean_temperature=interseason_mean_temperature, base_logger_name=base_logger_name, min_logger_level_stdout=min_logger_level_stdout, )
[docs] class AutoCategoricalThermoSensitivity( DayOfWeekCategoricalThermoSensitivity, ): """Automatically categorizes thermosensitivity data based on predefined criteria. Based on :py:class:`DayOfWeekCategoricalThermoSensitivity`. """ @property def categories( self, ) -> pd.Series: """The categories of the periods.""" return self._categories @categories.setter def categories( self, value: pd.Series, ) -> None: """Set the categories and reset associated cached data. This method assigns new category labels to the ``categories`` attribute and resets the cached data that depends on these categories, ensuring that all computations are up to date with the new categorization. Parameters ---------- value : pd.Series A pandas Series representing the new categories to assign. .. note:: Setting new categories automatically resets the following internal attributes: - ``resampled_energy_temperature_category``: Cached energy-temperature data that is resampled by category. - ``resampled_categories``: Cached resampled category values. - ``_aggregated_data``: Cached aggregated data, if any. These attributes are recalculated upon the next request, ensuring consistency with the newly assigned categories. """ self._categories = value self.__dict__.pop("resampled_energy_temperature_category", None) self.__dict__.pop("resampled_categories", None) self._aggregated_data = None
[docs] def new_categories( self, significant_level: float = 0.1, ) -> dict: """Return new category mappings based on interaction term significance. This method identifies significant differences between the thermosensitivity of each category and provides new mappings that merge similar categories. Categories are grouped based on the significance of interaction terms in the model, which helps reduce complexity while retaining meaningful distinctions. Parameters ---------- significant_level : float, optional The significance level for the Wald test (a p-value below this level is considered significant). Must be between 0 and 1. The higher the value, the more categories will be kept separate. Lower values will merge categories that are not significantly different. Returns ------- dict A dictionary mapping old categories to new merged categories. The new labels are concatenated with a "-" separator to indicate merged groups. Notes ----- - The new categories are based on the result of multiple Wald tests conducted between interaction terms for each category. - The returned dictionary allows for updating the category labels to reflect merged groupings that exhibit similar behavior. Example ------- >>> auto = AutoCategoricalThermoSensitivity(...) >>> auto.fit() >>> auto.new_categories(significant_level=0.1) {'Monday': 'Monday-Wednesday-Sunday', 'Tuesday': 'Tuesday', 'Wednesday': 'Monday-Wednesday-Sunday', 'Thursday': 'Thursday', 'Friday': 'Friday', 'Saturday': 'Monday-Wednesday-Sunday', 'Sunday': 'Sunday' } """ categories_sorted = [ "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday", ] categories = self.resampled_categories.unique() predictors = [*self.predictors, "Intercept"] new_categories_mapping = {str(term): [str(term)] for term in categories} for i, cat_term1 in enumerate(categories): for _, cat_term2 in enumerate(categories[i + 1 :]): is_same_group = True for pred in predictors: interaction_term1 = pred + ":" + cat_term1 interaction_term2 = pred + ":" + cat_term2 contrast_matrix = np.zeros((1, len(self.model.params))) contrast_matrix[ 0, self.model.params.index.get_loc(interaction_term1), ] = 1 contrast_matrix[ 0, self.model.params.index.get_loc(interaction_term2), ] = -1 wald_test = self.model.wald_test(contrast_matrix, scalar=True) if wald_test.pvalue < significant_level: is_same_group &= False if is_same_group: new_categories_mapping[cat_term1].append(cat_term2) new_categories_mapping[cat_term2] = new_categories_mapping[ cat_term1 ] reduced_mapping = { k: sorted(set(v), key=lambda d: categories_sorted.index(d)) for k, v in new_categories_mapping.items() } return {k: "-".join(v) for k, v in reduced_mapping.items()}
[docs] def merge_and_fit( self, significant_level: float = 0.1, ) -> None: """Merge similar categories and fit the model with updated categories. This method merges categories that exhibit similar thermosensitivity based on their interaction term significance and then refits the model using the updated categorization. This helps reduce model complexity by grouping categories with similar behavior. Parameters ---------- significant_level : float, optional The significance level for the Wald test (a p-value below this level is considered significant). Must be between 0 and 1. The higher the value, the more categories will be kept separate. Lower values will merge categories that are not significantly different. Returns ------- None Notes ----- - The method first calculates new categories using the ``new_categories`` method, then assigns these new categories and fits the model to reflect the updated categorization. - This process is especially useful for reducing overfitting by combining similar days (e.g., merging weekdays that do not show significant thermosensitivity differences). """ new_cats_maps = self.new_categories(significant_level=significant_level) self.categories = self.categories.map(new_cats_maps) self.fit()