Coverage Summary for Class: PairedStatsAccumulator (com.google.common.math)

Class Class, % Method, % Line, %
PairedStatsAccumulator 0% (0/1) 0% (0/13) 0% (0/53)


1 /* 2  * Copyright (C) 2012 The Guava Authors 3  * 4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5  * in compliance with the License. You may obtain a copy of the License at 6  * 7  * http://www.apache.org/licenses/LICENSE-2.0 8  * 9  * Unless required by applicable law or agreed to in writing, software distributed under the License 10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11  * or implied. See the License for the specific language governing permissions and limitations under 12  * the License. 13  */ 14  15 package com.google.common.math; 16  17 import static com.google.common.base.Preconditions.checkState; 18 import static com.google.common.primitives.Doubles.isFinite; 19 import static java.lang.Double.NaN; 20 import static java.lang.Double.isNaN; 21  22 import com.google.common.annotations.Beta; 23 import com.google.common.annotations.GwtIncompatible; 24 import com.google.common.primitives.Doubles; 25  26 /** 27  * A mutable object which accumulates paired double values (e.g. points on a plane) and tracks some 28  * basic statistics over all the values added so far. This class is not thread safe. 29  * 30  * @author Pete Gillin 31  * @since 20.0 32  */ 33 @Beta 34 @GwtIncompatible 35 @ElementTypesAreNonnullByDefault 36 public final class PairedStatsAccumulator { 37  38  // These fields must satisfy the requirements of PairedStats' constructor as well as those of the 39  // stat methods of this class. 40  private final StatsAccumulator xStats = new StatsAccumulator(); 41  private final StatsAccumulator yStats = new StatsAccumulator(); 42  private double sumOfProductsOfDeltas = 0.0; 43  44  /** Adds the given pair of values to the dataset. */ 45  public void add(double x, double y) { 46  // We extend the recursive expression for the one-variable case at Art of Computer Programming 47  // vol. 2, Knuth, 4.2.2, (16) to the two-variable case. We have two value series x_i and y_i. 48  // We define the arithmetic means X_n = 1/n \sum_{i=1}^n x_i, and Y_n = 1/n \sum_{i=1}^n y_i. 49  // We also define the sum of the products of the differences from the means 50  // C_n = \sum_{i=1}^n x_i y_i - n X_n Y_n 51  // for all n >= 1. Then for all n > 1: 52  // C_{n-1} = \sum_{i=1}^{n-1} x_i y_i - (n-1) X_{n-1} Y_{n-1} 53  // C_n - C_{n-1} = x_n y_n - n X_n Y_n + (n-1) X_{n-1} Y_{n-1} 54  // = x_n y_n - X_n [ y_n + (n-1) Y_{n-1} ] + [ n X_n - x_n ] Y_{n-1} 55  // = x_n y_n - X_n y_n - x_n Y_{n-1} + X_n Y_{n-1} 56  // = (x_n - X_n) (y_n - Y_{n-1}) 57  xStats.add(x); 58  if (isFinite(x) && isFinite(y)) { 59  if (xStats.count() > 1) { 60  sumOfProductsOfDeltas += (x - xStats.mean()) * (y - yStats.mean()); 61  } 62  } else { 63  sumOfProductsOfDeltas = NaN; 64  } 65  yStats.add(y); 66  } 67  68  /** 69  * Adds the given statistics to the dataset, as if the individual values used to compute the 70  * statistics had been added directly. 71  */ 72  public void addAll(PairedStats values) { 73  if (values.count() == 0) { 74  return; 75  } 76  77  xStats.addAll(values.xStats()); 78  if (yStats.count() == 0) { 79  sumOfProductsOfDeltas = values.sumOfProductsOfDeltas(); 80  } else { 81  // This is a generalized version of the calculation in add(double, double) above. Note that 82  // non-finite inputs will have sumOfProductsOfDeltas = NaN, so non-finite values will result 83  // in NaN naturally. 84  sumOfProductsOfDeltas += 85  values.sumOfProductsOfDeltas() 86  + (values.xStats().mean() - xStats.mean()) 87  * (values.yStats().mean() - yStats.mean()) 88  * values.count(); 89  } 90  yStats.addAll(values.yStats()); 91  } 92  93  /** Returns an immutable snapshot of the current statistics. */ 94  public PairedStats snapshot() { 95  return new PairedStats(xStats.snapshot(), yStats.snapshot(), sumOfProductsOfDeltas); 96  } 97  98  /** Returns the number of pairs in the dataset. */ 99  public long count() { 100  return xStats.count(); 101  } 102  103  /** Returns an immutable snapshot of the statistics on the {@code x} values alone. */ 104  public Stats xStats() { 105  return xStats.snapshot(); 106  } 107  108  /** Returns an immutable snapshot of the statistics on the {@code y} values alone. */ 109  public Stats yStats() { 110  return yStats.snapshot(); 111  } 112  113  /** 114  * Returns the population covariance of the values. The count must be non-zero. 115  * 116  * <p>This is guaranteed to return zero if the dataset contains a single pair of finite values. It 117  * is not guaranteed to return zero when the dataset consists of the same pair of values multiple 118  * times, due to numerical errors. 119  * 120  * <h3>Non-finite values</h3> 121  * 122  * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 123  * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 124  * 125  * @throws IllegalStateException if the dataset is empty 126  */ 127  public double populationCovariance() { 128  checkState(count() != 0); 129  return sumOfProductsOfDeltas / count(); 130  } 131  132  /** 133  * Returns the sample covariance of the values. The count must be greater than one. 134  * 135  * <p>This is not guaranteed to return zero when the dataset consists of the same pair of values 136  * multiple times, due to numerical errors. 137  * 138  * <h3>Non-finite values</h3> 139  * 140  * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 141  * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 142  * 143  * @throws IllegalStateException if the dataset is empty or contains a single pair of values 144  */ 145  public final double sampleCovariance() { 146  checkState(count() > 1); 147  return sumOfProductsOfDeltas / (count() - 1); 148  } 149  150  /** 151  * Returns the <a href="http://mathworld.wolfram.com/CorrelationCoefficient.html">Pearson's or 152  * product-moment correlation coefficient</a> of the values. The count must greater than one, and 153  * the {@code x} and {@code y} values must both have non-zero population variance (i.e. {@code 154  * xStats().populationVariance() > 0.0 && yStats().populationVariance() > 0.0}). The result is not 155  * guaranteed to be exactly +/-1 even when the data are perfectly (anti-)correlated, due to 156  * numerical errors. However, it is guaranteed to be in the inclusive range [-1, +1]. 157  * 158  * <h3>Non-finite values</h3> 159  * 160  * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 161  * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 162  * 163  * @throws IllegalStateException if the dataset is empty or contains a single pair of values, or 164  * either the {@code x} and {@code y} dataset has zero population variance 165  */ 166  public final double pearsonsCorrelationCoefficient() { 167  checkState(count() > 1); 168  if (isNaN(sumOfProductsOfDeltas)) { 169  return NaN; 170  } 171  double xSumOfSquaresOfDeltas = xStats.sumOfSquaresOfDeltas(); 172  double ySumOfSquaresOfDeltas = yStats.sumOfSquaresOfDeltas(); 173  checkState(xSumOfSquaresOfDeltas > 0.0); 174  checkState(ySumOfSquaresOfDeltas > 0.0); 175  // The product of two positive numbers can be zero if the multiplication underflowed. We 176  // force a positive value by effectively rounding up to MIN_VALUE. 177  double productOfSumsOfSquaresOfDeltas = 178  ensurePositive(xSumOfSquaresOfDeltas * ySumOfSquaresOfDeltas); 179  return ensureInUnitRange(sumOfProductsOfDeltas / Math.sqrt(productOfSumsOfSquaresOfDeltas)); 180  } 181  182  /** 183  * Returns a linear transformation giving the best fit to the data according to <a 184  * href="http://mathworld.wolfram.com/LeastSquaresFitting.html">Ordinary Least Squares linear 185  * regression</a> of {@code y} as a function of {@code x}. The count must be greater than one, and 186  * either the {@code x} or {@code y} data must have a non-zero population variance (i.e. {@code 187  * xStats().populationVariance() > 0.0 || yStats().populationVariance() > 0.0}). The result is 188  * guaranteed to be horizontal if there is variance in the {@code x} data but not the {@code y} 189  * data, and vertical if there is variance in the {@code y} data but not the {@code x} data. 190  * 191  * <p>This fit minimizes the root-mean-square error in {@code y} as a function of {@code x}. This 192  * error is defined as the square root of the mean of the squares of the differences between the 193  * actual {@code y} values of the data and the values predicted by the fit for the {@code x} 194  * values (i.e. it is the square root of the mean of the squares of the vertical distances between 195  * the data points and the best fit line). For this fit, this error is a fraction {@code sqrt(1 - 196  * R*R)} of the population standard deviation of {@code y}, where {@code R} is the Pearson's 197  * correlation coefficient (as given by {@link #pearsonsCorrelationCoefficient()}). 198  * 199  * <p>The corresponding root-mean-square error in {@code x} as a function of {@code y} is a 200  * fraction {@code sqrt(1/(R*R) - 1)} of the population standard deviation of {@code x}. This fit 201  * does not normally minimize that error: to do that, you should swap the roles of {@code x} and 202  * {@code y}. 203  * 204  * <h3>Non-finite values</h3> 205  * 206  * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 207  * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link 208  * LinearTransformation#forNaN()}. 209  * 210  * @throws IllegalStateException if the dataset is empty or contains a single pair of values, or 211  * both the {@code x} and {@code y} dataset have zero population variance 212  */ 213  public final LinearTransformation leastSquaresFit() { 214  checkState(count() > 1); 215  if (isNaN(sumOfProductsOfDeltas)) { 216  return LinearTransformation.forNaN(); 217  } 218  double xSumOfSquaresOfDeltas = xStats.sumOfSquaresOfDeltas(); 219  if (xSumOfSquaresOfDeltas > 0.0) { 220  if (yStats.sumOfSquaresOfDeltas() > 0.0) { 221  return LinearTransformation.mapping(xStats.mean(), yStats.mean()) 222  .withSlope(sumOfProductsOfDeltas / xSumOfSquaresOfDeltas); 223  } else { 224  return LinearTransformation.horizontal(yStats.mean()); 225  } 226  } else { 227  checkState(yStats.sumOfSquaresOfDeltas() > 0.0); 228  return LinearTransformation.vertical(xStats.mean()); 229  } 230  } 231  232  private double ensurePositive(double value) { 233  if (value > 0.0) { 234  return value; 235  } else { 236  return Double.MIN_VALUE; 237  } 238  } 239  240  private static double ensureInUnitRange(double value) { 241  return Doubles.constrainToRange(value, -1.0, 1.0); 242  } 243 }