Source code for concept_formation.continuous_value

from __future__ import print_function
from __future__ import unicode_literals
from __future__ import absolute_import
from __future__ import division
from math import sqrt
from math import exp
from math import pi

from concept_formation.utils import c4

[docs]class ContinuousValue(): """ This class is used to store the number of samples, the mean of the samples, and the squared error of the samples for :ref:`Numeric Values<val-num>`. It can be used to perform incremental estimation of the attribute's mean, std, and unbiased std. Initially the number of values, the mean of the values, and the squared errors of the values are set to 0. """ def __init__(self): """constructor""" self.num = 0.0 self.mean = 0.0 self.meanSq = 0.0 def __len__(self): return 1
[docs] def copy(self): """ Returns a deep copy of itself. :return: a deep copy of the continuous value :rtype: ContinuousValue """ v = ContinuousValue() v.num = self.num v.mean = self.mean v.meanSq = self.meanSq return v
[docs] def unbiased_mean(self): """ Returns an unbiased estimate of the mean. :return: the unbiased mean :rtype: float """ return self.mean
[docs] def scaled_unbiased_mean(self, shift, scale): """ Returns a shifted and scaled unbiased mean. This is equivelent to (self.unbiased_mean() - shift) / scale This is used as part of numerical value scaling. :param shift: the amount to shift the mean by :type shift: float :param scale: the amount to scale the returned mean by :type scale: float :return: ``(self.mean - shift) / scale`` :rtype: float """ if scale <= 0: scale = 1 return (self.mean - shift) / scale
[docs] def biased_std(self): """ Returns a biased estimate of the std (i.e., the sample std) :return: biased estimate of the std (i.e., the sample std) :rtype: float """ return sqrt(self.meanSq / (self.num))
[docs] def unbiased_std(self): """ Returns an unbiased estimate of the std, but for n < 2 the std is estimated to be 0.0. This implementation uses Bessel's correction and Cochran's theorem: `<https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation#Bias_correction>`_ :return: an unbiased estimate of the std :rtype: float .. seealso:: :meth:`concept_formation.utils.c4` """ if self.num < 2: return 0.0 return sqrt(self.meanSq / (self.num - 1)) / c4(self.num)
[docs] def scaled_biased_std(self, scale): """ Returns an biased estimate of the std (see: :meth:`ContinuousValue.biased_std`), but also adjusts the std given a scale parameter. This is used to return std values that have been normalized by some value. For edge cases, if scale is less than or equal to 0, then scaling is disabled (i.e., scale = 1.0). :param scale: an amount to scale biased std estimates by :type scale: float :return: A scaled unbiased estimate of std :rtype: float """ if scale <= 0: scale = 1.0 return self.biased_std() / scale
[docs] def scaled_unbiased_std(self, scale): """ Returns an unbiased estimate of the std (see: :meth:`ContinuousValue.unbiased_std`), but also adjusts the std given a scale parameter. This is used to return std values that have been normalized by some value. For edge cases, if scale is less than or equal to 0, then scaling is disabled (i.e., scale = 1.0). :param scale: an amount to scale unbiased std estimates by :type scale: float :return: A scaled unbiased estimate of std :rtype: float """ if scale <= 0: scale = 1.0 return self.unbiased_std() / scale
def __hash__(self): """ This hashing function returns the hash of a constant string, so that all lookups of a continuous value in a dictionary get mapped to the same entry. """ return hash("#ContinuousValue#") def __repr__(self): """ The textual representation of a continuous value." """ return "%0.4f (%0.4f) [%i]" % (self.unbiased_mean(), self.unbiased_std(), self.num)
[docs] def update_batch(self, data): """ Calls the update function on every value in a given dataset :param data: A list of numberic values to add to the distribution :type data: [Number, Number, ...] """ for x in data: self.update(x)
[docs] def update(self, x): """ Incrementally update the mean and squared mean error (meanSq) values in an efficient and practical (no precision problems) way. This uses and algorithm by Knuth found here: `<https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance>`_ :param x: A new value to incorporate into the distribution :type x: Number """ self.num += 1 delta = x - self.mean self.mean += delta / self.num self.meanSq += delta * (x - self.mean)
[docs] def combine(self, other): """ Combine another ContinuousValue's distribution into this one in an efficient and practical (no precision problems) way. This uses the parallel algorithm by Chan et al. found at: `<https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm>`_ :param other: Another ContinuousValue distribution to be incorporated into this one. :type other: ContinuousValue """ if not isinstance(other, ContinuousValue): raise ValueError("Can only merge 2 continuous values.") delta = other.mean - self.mean self.meanSq = (self.meanSq + other.meanSq + delta * delta * ((self.num * other.num) / (self.num + other.num))) self.mean = ((self.num * self.mean + other.num * other.mean) / (self.num + other.num)) self.num += other.num
[docs] def integral_of_gaussian_product(self, other): """ Computes the integral (from -inf to inf) of the product of two gaussians. It adds gaussian noise to both stds, so that the integral of their product never exceeds 1. Use formula computed here: `<http://www.tina-vision.net/docs/memos/2003-003.pdf>`_ """ mu1 = self.unbiased_mean() mu2 = other.unbiased_mean() sd1 = self.unbiased_std() sd2 = other.unbiased_std() noisy_sd_squared = 1 / (4 * pi) sd1 = sqrt(sd1 * sd1 + noisy_sd_squared) sd2 = sqrt(sd2 * sd2 + noisy_sd_squared) return ((1 / sqrt(2 * pi * (sd1 * sd1 + sd2 * sd2))) * exp(-1 * (mu1 - mu2) * (mu1 - mu2) / (2 * (sd1 * sd1 + sd2 * sd2))))
[docs] def output_json(self): return { 'mean':self.unbiased_mean(), 'std':self.unbiased_std(), 'n':self.num }