Source code for concept_formation.continuous_value

from __future__ import print_function
from __future__ import unicode_literals
from __future__ import absolute_import
from __future__ import division
from math import sqrt
from math import exp
from math import pi

from concept_formation.utils import c4

[docs]class ContinuousValue():
    """ 
    This class is used to store the number of samples, the mean of the samples,
    and the squared error of the samples for :ref:`Numeric Values<val-num>`. 
    It can be used to perform incremental estimation of the attribute's mean, 
    std, and unbiased std.

    Initially the number of values, the mean of the values, and the
    squared errors of the values are set to 0.
    """

    def __init__(self):
        """constructor"""
        self.num = 0.0
        self.mean = 0.0
        self.meanSq = 0.0

    def __len__(self):
        return 1

[docs]    def copy(self):
        """
        Returns a deep copy of itself.

        :return: a deep copy of the continuous value
        :rtype: ContinuousValue
        """
        v = ContinuousValue()
        v.num = self.num
        v.mean = self.mean
        v.meanSq = self.meanSq
        return v

[docs]    def unbiased_mean(self):
        """
        Returns an unbiased estimate of the mean.

        :return: the unbiased mean
        :rtype: float
        """
        return self.mean

[docs]    def scaled_unbiased_mean(self, shift, scale):
        """
        Returns a shifted and scaled unbiased mean. This is equivelent to
        (self.unbiased_mean() - shift) / scale

        This is used as part of numerical value scaling.

        :param shift: the amount to shift the mean by
        :type shift: float
        :param scale: the amount to scale the returned mean by
        :type scale: float
        :return: ``(self.mean - shift) / scale``
        :rtype: float
        """
        if scale <= 0:
            scale = 1
        return (self.mean - shift) / scale

[docs]    def biased_std(self):
        """
        Returns a biased estimate of the std (i.e., the sample std)

        :return: biased estimate of the std (i.e., the sample std)
        :rtype: float
        """
        return sqrt(self.meanSq / (self.num))

[docs]    def unbiased_std(self):
        """
        Returns an unbiased estimate of the std, but for n < 2 the std is
        estimated to be 0.0.

        This implementation uses Bessel's correction and Cochran's theorem: 
        `<https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation#Bias_correction>`_

        :return: an unbiased estimate of the std
        :rtype: float

        .. seealso:: :meth:`concept_formation.utils.c4`
        """
        if self.num < 2:
            return 0.0
        return sqrt(self.meanSq / (self.num - 1)) / c4(self.num)

[docs]    def scaled_biased_std(self, scale):
        """
        Returns an biased estimate of the std (see:
        :meth:`ContinuousValue.biased_std`), but also adjusts the std given a
        scale parameter.

        This is used to return std values that have been normalized by some
        value. For edge cases, if scale is less than or equal to 0, then scaling
        is disabled (i.e., scale = 1.0).

        :param scale: an amount to scale biased std estimates by
        :type scale: float
        :return: A scaled unbiased estimate of std
        :rtype: float
        """
        if scale <= 0:
            scale = 1.0
        return self.biased_std() / scale

[docs]    def scaled_unbiased_std(self, scale):
        """
        Returns an unbiased estimate of the std (see:
        :meth:`ContinuousValue.unbiased_std`), but also adjusts the std given a
        scale parameter.

        This is used to return std values that have been normalized by some
        value. For edge cases, if scale is less than or equal to 0, then scaling
        is disabled (i.e., scale = 1.0).

        :param scale: an amount to scale unbiased std estimates by
        :type scale: float
        :return: A scaled unbiased estimate of std
        :rtype: float
        """
        if scale <= 0:
            scale = 1.0
        return self.unbiased_std() / scale

    def __hash__(self):
        """
        This hashing function returns the hash of a constant string, so that
        all lookups of a continuous value in a dictionary get mapped to the
        same entry. 
        """
        return hash("#ContinuousValue#")

    def __repr__(self):
        """
        The textual representation of a continuous value."
        """
        return "%0.4f (%0.4f) [%i]" % (self.unbiased_mean(), self.unbiased_std(), self.num)

[docs]    def update_batch(self, data):
        """
        Calls the update function on every value in a given dataset

        :param data: A list of numberic values to add to the distribution
        :type data: [Number, Number, ...]
        """
        for x in data:
            self.update(x)

[docs]    def update(self, x):
        """
        Incrementally update the mean and squared mean error (meanSq) values in
        an efficient and practical (no precision problems) way. 

        This uses and algorithm by Knuth found here:
        `<https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance>`_

        :param x: A new value to incorporate into the distribution
        :type x: Number
        """
        self.num += 1
        delta = x - self.mean 
        self.mean += delta / self.num
        self.meanSq += delta * (x - self.mean)

[docs]    def combine(self, other):
        """
        Combine another ContinuousValue's distribution into this one in
        an efficient and practical (no precision problems) way. 

        This uses the parallel algorithm by Chan et al. found at:
        `<https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm>`_

        :param other: Another ContinuousValue distribution to be incorporated
            into this one.
        :type other: ContinuousValue
        """
        if not isinstance(other, ContinuousValue):
            raise ValueError("Can only merge 2 continuous values.")
        delta = other.mean - self.mean
        self.meanSq = (self.meanSq + other.meanSq + delta * delta * 
                       ((self.num * other.num) / (self.num + other.num)))
        self.mean = ((self.num * self.mean + other.num * other.mean) / 
                     (self.num + other.num))
        self.num += other.num

[docs]    def integral_of_gaussian_product(self, other):
        """
        Computes the integral (from -inf to inf) of the product of two
        gaussians. It adds gaussian noise to both stds, so that the integral of
        their product never exceeds 1.

        Use formula computed here:
            `<http://www.tina-vision.net/docs/memos/2003-003.pdf>`_
        """
        mu1 = self.unbiased_mean()
        mu2 = other.unbiased_mean()
        sd1 = self.unbiased_std()
        sd2 = other.unbiased_std()

        noisy_sd_squared = 1 / (4 * pi)
        sd1 = sqrt(sd1 * sd1 + noisy_sd_squared)
        sd2 = sqrt(sd2 * sd2 + noisy_sd_squared)
        return ((1 / sqrt(2 * pi * (sd1 * sd1 + sd2 * sd2))) *
                exp(-1 * (mu1 - mu2) * (mu1 - mu2) /
                    (2 * (sd1 * sd1 + sd2 * sd2))))

[docs]    def output_json(self):
        return {
            'mean':self.unbiased_mean(),
            'std':self.unbiased_std(),
            'n':self.num
        }