#!/usr/bin/env python
# -*- coding: UTF-8 -*-
#Copyright (c) 2012-2015 Christian Schwarz
#
#Permission is hereby granted, free of charge, to any person obtaining
#a copy of this software and associated documentation files (the
#"Software"), to deal in the Software without restriction, including
#without limitation the rights to use, copy, modify, merge, publish,
#distribute, sublicense, and/or sell copies of the Software, and to
#permit persons to whom the Software is furnished to do so, subject to
#the following conditions:
#
#The above copyright notice and this permission notice shall be
#included in all copies or substantial portions of the Software.
#
#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
#EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
#MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
#NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
#LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
#WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
from pycast.common import PyCastObject
from pycast.common.decorators import optimized
[docs]class BaseErrorMeasure(PyCastObject):
"""Baseclass for all error measures."""
[docs] def __init__(self, minimalErrorCalculationPercentage=60):
"""Initializes the error measure.
:param integer minimalErrorCalculationPercentage: The number of entries in an
original TimeSeries that have to have corresponding partners in the calculated
TimeSeries. Corresponding partners have the same time stamp.
Valid values are in [0.0, 100.0].
:raise: Raises a :py:exc:`ValueError` if minimalErrorCalculationPercentage is not
in [0.0, 100.0].
"""
super(BaseErrorMeasure, self).__init__()
if not 0.0 <= minimalErrorCalculationPercentage <= 100.0:
raise ValueError("minimalErrorCalculationPercentage has to be in [0.0, 100.0].")
self._minimalErrorCalculationPercentage = minimalErrorCalculationPercentage / 100.0
self._errorValues = []
self._errorDates = []
@optimized
[docs] def initialize(self, originalTimeSeries, calculatedTimeSeries):
"""Initializes the ErrorMeasure.
During initialization, all :py:meth:`BaseErrorMeasure.local_errors` are calculated.
:param TimeSeries originalTimeSeries: TimeSeries containing the original data.
:param TimeSeries calculatedTimeSeries: TimeSeries containing calculated data.
Calculated data is smoothed or forecasted data.
:return: Return :py:const:`True` if the error could be calculated, :py:const:`False`
otherwise based on the minimalErrorCalculationPercentage.
:rtype: boolean
:raise: Raises a :py:exc:`StandardError` if the error measure is initialized multiple times.
"""
## ErrorMeasure was already initialized.
if 0 < len(self._errorValues):
raise StandardError("An ErrorMeasure can only be initialized once.")
## sort the TimeSeries to reduce the required comparison operations
originalTimeSeries.sort_timeseries()
calculatedTimeSeries.sort_timeseries()
## Performance optimization
append = self._errorValues.append
appendDate = self._errorDates.append
local_error = self.local_error
minCalcIdx = 0
## calculate all valid local errors
for orgPair in originalTimeSeries:
for calcIdx in xrange(minCalcIdx, len(calculatedTimeSeries)):
calcPair = calculatedTimeSeries[calcIdx]
## Skip values that can not be compared
if calcPair[0] != orgPair[0]:
continue
append(local_error(orgPair[1:], calcPair[1:]))
appendDate(orgPair[0])
## return False, if the error cannot be calculated
if len(filter(lambda item: item != None, self._errorValues)) < self._minimalErrorCalculationPercentage * len(originalTimeSeries):
self._errorValues = []
self._errorDates = []
return False
return True
[docs] def _get_error_values(self, startingPercentage, endPercentage, startDate, endDate):
"""Gets the defined subset of self._errorValues.
Both parameters will be correct at this time.
:param float startingPercentage: Defines the start of the interval. This has to be a value in [0.0, 100.0].
It represents the value, where the error calculation should be started.
25.0 for example means that the first 25% of all calculated errors will be ignored.
:param float endPercentage: Defines the end of the interval. This has to be a value in [0.0, 100.0].
It represents the value, after which all error values will be ignored. 90.0 for example means that
the last 10% of all local errors will be ignored.
:param float startDate: Epoch representing the start date used for error calculation.
:param float endDate: Epoch representing the end date used in the error calculation.
:return: Returns a list with the defined error values.
:rtype: list
:raise: Raises a ValueError if startDate or endDate do not represent correct boundaries for error calculation.
"""
if None != startDate:
possibleDates = filter(lambda date: date >= startDate, self._errorDates)
if 0 == len(possibleDates):
raise ValueError("%s does not represent a valid startDate." % startDate)
startIdx = self._errorDates.index(min(possibleDates))
else:
startIdx = int((startingPercentage * len(self._errorValues)) / 100.0)
if None != endDate:
possibleDates = filter(lambda date: date <= endDate, self._errorDates)
if 0 == len(possibleDates):
raise ValueError("%s does not represent a valid endDate." % endDate)
endIdx = self._errorDates.index(max(possibleDates)) + 1
else:
endIdx = int((endPercentage * len(self._errorValues)) / 100.0)
return self._errorValues[startIdx:endIdx]
[docs] def get_error(self, startingPercentage=0.0, endPercentage=100.0, startDate=None, endDate=None):
"""Calculates the error for the given interval (startingPercentage, endPercentage) between the TimeSeries
given during :py:meth:`BaseErrorMeasure.initialize`.
:param float startingPercentage: Defines the start of the interval. This has to be a value in [0.0, 100.0].
It represents the value, where the error calculation should be started.
25.0 for example means that the first 25% of all calculated errors will be ignored.
:param float endPercentage: Defines the end of the interval. This has to be a value in [0.0, 100.0].
It represents the value, after which all error values will be ignored. 90.0 for example means that
the last 10% of all local errors will be ignored.
:param float startDate: Epoch representing the start date used for error calculation.
:param float endDate: Epoch representing the end date used in the error calculation.
:return: Returns a float representing the error.
:rtype: float
:raise: Raises a :py:exc:`ValueError` in one of the following cases:
- startingPercentage not in [0.0, 100.0]
- endPercentage not in [0.0, 100.0]
- endPercentage < startingPercentage
:raise: Raises a :py:exc:`StandardError` if :py:meth:`BaseErrorMeasure.initialize` was not successfull before.
"""
## not initialized:
if len(self._errorValues) == 0:
raise StandardError("The last call of initialize(...) was not successfull.")
## check for wrong parameters
if not (0.0 <= startingPercentage <= 100.0):
raise ValueError("startingPercentage has to be in [0.0, 100.0].")
if not (0.0 <= endPercentage <= 100.0):
raise ValueError("endPercentage has to be in [0.0, 100.0].")
if endPercentage < startingPercentage:
raise ValueError("endPercentage has to be greater or equal than startingPercentage.")
return self._calculate(startingPercentage, endPercentage, startDate, endDate)
[docs] def _calculate(self, startingPercentage, endPercentage, startDate, endDate):
"""This is the error calculation function that gets called by :py:meth:`BaseErrorMeasure.get_error`.
Both parameters will be correct at this time.
:param float startingPercentage: Defines the start of the interval. This has to be a value in [0.0, 100.0].
It represents the value, where the error calculation should be started.
25.0 for example means that the first 25% of all calculated errors will be ignored.
:param float endPercentage: Defines the end of the interval. This has to be a value in [0.0, 100.0].
It represents the value, after which all error values will be ignored. 90.0 for example means that
the last 10% of all local errors will be ignored.
:param float startDate: Epoch representing the start date used for error calculation.
:param float endDate: Epoch representing the end date used in the error calculation.
:return: Returns a float representing the error.
:rtype: float
:raise: Raises a :py:exc:`NotImplementedError` if the child class does not overwrite this method.
"""
raise NotImplementedError
[docs] def local_error(self, originalValue, calculatedValue):
"""Calculates the error between the two given values.
:param list originalValue: List containing the values of the original data.
:param list calculatedValue: List containing the values of the calculated TimeSeries that
corresponds to originalValue.
:return: Returns the error measure of the two given values.
:rtype: numeric
:raise: Raises a :py:exc:`NotImplementedError` if the child class does not overwrite this method.
"""
raise NotImplementedError
[docs] def confidence_interval(self, confidenceLevel):
"""Calculates for which value confidenceLevel% of the errors are closer to 0.
:param float confidenceLevel: percentage of the errors that should be
smaller than the returned value for overestimations and larger than
the returned value for underestimations.
confidenceLevel has to be in [0.0, 1.0]
:return: return a tuple containing the underestimation and overestimation for
the given confidenceLevel
:rtype: tuple
:warning: Index is still not calculated correctly
"""
if not (confidenceLevel >= 0 and confidenceLevel <= 1):
raise ValueError("Parameter percentage has to be in [0,1]")
underestimations = []
overestimations = []
for error in self._errorValues:
if error is None:
# None was in the lists causing some confidenceLevels not be calculated, not sure if that was intended, I suggested ignoring None values
continue
#Want 0 errors in both lists!
if error >= 0:
overestimations.append(error)
if error <= 0:
underestimations.append(error)
#sort and cut off at confidence level.
overestimations.sort()
underestimations.sort(reverse=True)
overIdx = int(len(overestimations) * confidenceLevel) - 1
underIdx = int(len(underestimations) * confidenceLevel) - 1
overestimation = 0.0
underestimation = 0.0
if overIdx >= 0:
overestimation = overestimations[overIdx]
else:
print len(overestimations), confidenceLevel
if underIdx >= 0:
underestimation = underestimations[underIdx]
return underestimation, overestimation