Source code for pycast.errors.baseerrormeasure

#!/usr/bin/env python
# -*- coding: UTF-8 -*-

#Copyright (c) 2012-2015 Christian Schwarz
#
#Permission is hereby granted, free of charge, to any person obtaining
#a copy of this software and associated documentation files (the
#"Software"), to deal in the Software without restriction, including
#without limitation the rights to use, copy, modify, merge, publish,
#distribute, sublicense, and/or sell copies of the Software, and to
#permit persons to whom the Software is furnished to do so, subject to
#the following conditions:
#
#The above copyright notice and this permission notice shall be
#included in all copies or substantial portions of the Software.
#
#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
#EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
#MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
#NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
#LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
#WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

from pycast.common import PyCastObject
from pycast.common.decorators import optimized

[docs]class BaseErrorMeasure(PyCastObject): """Baseclass for all error measures."""
[docs] def __init__(self, minimalErrorCalculationPercentage=60): """Initializes the error measure. :param integer minimalErrorCalculationPercentage: The number of entries in an original TimeSeries that have to have corresponding partners in the calculated TimeSeries. Corresponding partners have the same time stamp. Valid values are in [0.0, 100.0]. :raise: Raises a :py:exc:`ValueError` if minimalErrorCalculationPercentage is not in [0.0, 100.0]. """ super(BaseErrorMeasure, self).__init__() if not 0.0 <= minimalErrorCalculationPercentage <= 100.0: raise ValueError("minimalErrorCalculationPercentage has to be in [0.0, 100.0].") self._minimalErrorCalculationPercentage = minimalErrorCalculationPercentage / 100.0 self._errorValues = [] self._errorDates = []
@optimized
[docs] def initialize(self, originalTimeSeries, calculatedTimeSeries): """Initializes the ErrorMeasure. During initialization, all :py:meth:`BaseErrorMeasure.local_errors` are calculated. :param TimeSeries originalTimeSeries: TimeSeries containing the original data. :param TimeSeries calculatedTimeSeries: TimeSeries containing calculated data. Calculated data is smoothed or forecasted data. :return: Return :py:const:`True` if the error could be calculated, :py:const:`False` otherwise based on the minimalErrorCalculationPercentage. :rtype: boolean :raise: Raises a :py:exc:`StandardError` if the error measure is initialized multiple times. """ ## ErrorMeasure was already initialized. if 0 < len(self._errorValues): raise StandardError("An ErrorMeasure can only be initialized once.") ## sort the TimeSeries to reduce the required comparison operations originalTimeSeries.sort_timeseries() calculatedTimeSeries.sort_timeseries() ## Performance optimization append = self._errorValues.append appendDate = self._errorDates.append local_error = self.local_error minCalcIdx = 0 ## calculate all valid local errors for orgPair in originalTimeSeries: for calcIdx in xrange(minCalcIdx, len(calculatedTimeSeries)): calcPair = calculatedTimeSeries[calcIdx] ## Skip values that can not be compared if calcPair[0] != orgPair[0]: continue append(local_error(orgPair[1:], calcPair[1:])) appendDate(orgPair[0]) ## return False, if the error cannot be calculated if len(filter(lambda item: item != None, self._errorValues)) < self._minimalErrorCalculationPercentage * len(originalTimeSeries): self._errorValues = [] self._errorDates = [] return False return True
[docs] def _get_error_values(self, startingPercentage, endPercentage, startDate, endDate): """Gets the defined subset of self._errorValues. Both parameters will be correct at this time. :param float startingPercentage: Defines the start of the interval. This has to be a value in [0.0, 100.0]. It represents the value, where the error calculation should be started. 25.0 for example means that the first 25% of all calculated errors will be ignored. :param float endPercentage: Defines the end of the interval. This has to be a value in [0.0, 100.0]. It represents the value, after which all error values will be ignored. 90.0 for example means that the last 10% of all local errors will be ignored. :param float startDate: Epoch representing the start date used for error calculation. :param float endDate: Epoch representing the end date used in the error calculation. :return: Returns a list with the defined error values. :rtype: list :raise: Raises a ValueError if startDate or endDate do not represent correct boundaries for error calculation. """ if None != startDate: possibleDates = filter(lambda date: date >= startDate, self._errorDates) if 0 == len(possibleDates): raise ValueError("%s does not represent a valid startDate." % startDate) startIdx = self._errorDates.index(min(possibleDates)) else: startIdx = int((startingPercentage * len(self._errorValues)) / 100.0) if None != endDate: possibleDates = filter(lambda date: date <= endDate, self._errorDates) if 0 == len(possibleDates): raise ValueError("%s does not represent a valid endDate." % endDate) endIdx = self._errorDates.index(max(possibleDates)) + 1 else: endIdx = int((endPercentage * len(self._errorValues)) / 100.0) return self._errorValues[startIdx:endIdx]
[docs] def get_error(self, startingPercentage=0.0, endPercentage=100.0, startDate=None, endDate=None): """Calculates the error for the given interval (startingPercentage, endPercentage) between the TimeSeries given during :py:meth:`BaseErrorMeasure.initialize`. :param float startingPercentage: Defines the start of the interval. This has to be a value in [0.0, 100.0]. It represents the value, where the error calculation should be started. 25.0 for example means that the first 25% of all calculated errors will be ignored. :param float endPercentage: Defines the end of the interval. This has to be a value in [0.0, 100.0]. It represents the value, after which all error values will be ignored. 90.0 for example means that the last 10% of all local errors will be ignored. :param float startDate: Epoch representing the start date used for error calculation. :param float endDate: Epoch representing the end date used in the error calculation. :return: Returns a float representing the error. :rtype: float :raise: Raises a :py:exc:`ValueError` in one of the following cases: - startingPercentage not in [0.0, 100.0] - endPercentage not in [0.0, 100.0] - endPercentage < startingPercentage :raise: Raises a :py:exc:`StandardError` if :py:meth:`BaseErrorMeasure.initialize` was not successfull before. """ ## not initialized: if len(self._errorValues) == 0: raise StandardError("The last call of initialize(...) was not successfull.") ## check for wrong parameters if not (0.0 <= startingPercentage <= 100.0): raise ValueError("startingPercentage has to be in [0.0, 100.0].") if not (0.0 <= endPercentage <= 100.0): raise ValueError("endPercentage has to be in [0.0, 100.0].") if endPercentage < startingPercentage: raise ValueError("endPercentage has to be greater or equal than startingPercentage.") return self._calculate(startingPercentage, endPercentage, startDate, endDate)
[docs] def _calculate(self, startingPercentage, endPercentage, startDate, endDate): """This is the error calculation function that gets called by :py:meth:`BaseErrorMeasure.get_error`. Both parameters will be correct at this time. :param float startingPercentage: Defines the start of the interval. This has to be a value in [0.0, 100.0]. It represents the value, where the error calculation should be started. 25.0 for example means that the first 25% of all calculated errors will be ignored. :param float endPercentage: Defines the end of the interval. This has to be a value in [0.0, 100.0]. It represents the value, after which all error values will be ignored. 90.0 for example means that the last 10% of all local errors will be ignored. :param float startDate: Epoch representing the start date used for error calculation. :param float endDate: Epoch representing the end date used in the error calculation. :return: Returns a float representing the error. :rtype: float :raise: Raises a :py:exc:`NotImplementedError` if the child class does not overwrite this method. """ raise NotImplementedError
[docs] def local_error(self, originalValue, calculatedValue): """Calculates the error between the two given values. :param list originalValue: List containing the values of the original data. :param list calculatedValue: List containing the values of the calculated TimeSeries that corresponds to originalValue. :return: Returns the error measure of the two given values. :rtype: numeric :raise: Raises a :py:exc:`NotImplementedError` if the child class does not overwrite this method. """ raise NotImplementedError
[docs] def confidence_interval(self, confidenceLevel): """Calculates for which value confidenceLevel% of the errors are closer to 0. :param float confidenceLevel: percentage of the errors that should be smaller than the returned value for overestimations and larger than the returned value for underestimations. confidenceLevel has to be in [0.0, 1.0] :return: return a tuple containing the underestimation and overestimation for the given confidenceLevel :rtype: tuple :warning: Index is still not calculated correctly """ if not (confidenceLevel >= 0 and confidenceLevel <= 1): raise ValueError("Parameter percentage has to be in [0,1]") underestimations = [] overestimations = [] for error in self._errorValues: if error is None: # None was in the lists causing some confidenceLevels not be calculated, not sure if that was intended, I suggested ignoring None values continue #Want 0 errors in both lists! if error >= 0: overestimations.append(error) if error <= 0: underestimations.append(error) #sort and cut off at confidence level. overestimations.sort() underestimations.sort(reverse=True) overIdx = int(len(overestimations) * confidenceLevel) - 1 underIdx = int(len(underestimations) * confidenceLevel) - 1 overestimation = 0.0 underestimation = 0.0 if overIdx >= 0: overestimation = overestimations[overIdx] else: print len(overestimations), confidenceLevel if underIdx >= 0: underestimation = underestimations[underIdx] return underestimation, overestimation