Coverage Summary for Class: StatsAccumulator (com.google.common.math)

Class Class, % Method, % Line, %
StatsAccumulator 0% (0/1) 0% (0/25) 0% (0/84)


1 /* 2  * Copyright (C) 2012 The Guava Authors 3  * 4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5  * in compliance with the License. You may obtain a copy of the License at 6  * 7  * http://www.apache.org/licenses/LICENSE-2.0 8  * 9  * Unless required by applicable law or agreed to in writing, software distributed under the License 10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11  * or implied. See the License for the specific language governing permissions and limitations under 12  * the License. 13  */ 14  15 package com.google.common.math; 16  17 import static com.google.common.base.Preconditions.checkState; 18 import static com.google.common.math.DoubleUtils.ensureNonNegative; 19 import static com.google.common.primitives.Doubles.isFinite; 20 import static java.lang.Double.NaN; 21 import static java.lang.Double.isNaN; 22  23 import com.google.common.annotations.Beta; 24 import com.google.common.annotations.GwtIncompatible; 25 import java.util.Iterator; 26 import java.util.stream.DoubleStream; 27 import java.util.stream.IntStream; 28 import java.util.stream.LongStream; 29  30 /** 31  * A mutable object which accumulates double values and tracks some basic statistics over all the 32  * values added so far. The values may be added singly or in groups. This class is not thread safe. 33  * 34  * @author Pete Gillin 35  * @author Kevin Bourrillion 36  * @since 20.0 37  */ 38 @Beta 39 @GwtIncompatible 40 @ElementTypesAreNonnullByDefault 41 public final class StatsAccumulator { 42  43  // These fields must satisfy the requirements of Stats' constructor as well as those of the stat 44  // methods of this class. 45  private long count = 0; 46  private double mean = 0.0; // any finite value will do, we only use it to multiply by zero for sum 47  private double sumOfSquaresOfDeltas = 0.0; 48  private double min = NaN; // any value will do 49  private double max = NaN; // any value will do 50  51  /** Adds the given value to the dataset. */ 52  public void add(double value) { 53  if (count == 0) { 54  count = 1; 55  mean = value; 56  min = value; 57  max = value; 58  if (!isFinite(value)) { 59  sumOfSquaresOfDeltas = NaN; 60  } 61  } else { 62  count++; 63  if (isFinite(value) && isFinite(mean)) { 64  // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) and (16) 65  double delta = value - mean; 66  mean += delta / count; 67  sumOfSquaresOfDeltas += delta * (value - mean); 68  } else { 69  mean = calculateNewMeanNonFinite(mean, value); 70  sumOfSquaresOfDeltas = NaN; 71  } 72  min = Math.min(min, value); 73  max = Math.max(max, value); 74  } 75  } 76  77  /** 78  * Adds the given values to the dataset. 79  * 80  * @param values a series of values, which will be converted to {@code double} values (this may 81  * cause loss of precision) 82  */ 83  public void addAll(Iterable<? extends Number> values) { 84  for (Number value : values) { 85  add(value.doubleValue()); 86  } 87  } 88  89  /** 90  * Adds the given values to the dataset. 91  * 92  * @param values a series of values, which will be converted to {@code double} values (this may 93  * cause loss of precision) 94  */ 95  public void addAll(Iterator<? extends Number> values) { 96  while (values.hasNext()) { 97  add(values.next().doubleValue()); 98  } 99  } 100  101  /** 102  * Adds the given values to the dataset. 103  * 104  * @param values a series of values 105  */ 106  public void addAll(double... values) { 107  for (double value : values) { 108  add(value); 109  } 110  } 111  112  /** 113  * Adds the given values to the dataset. 114  * 115  * @param values a series of values 116  */ 117  public void addAll(int... values) { 118  for (int value : values) { 119  add(value); 120  } 121  } 122  123  /** 124  * Adds the given values to the dataset. 125  * 126  * @param values a series of values, which will be converted to {@code double} values (this may 127  * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 128  */ 129  public void addAll(long... values) { 130  for (long value : values) { 131  add(value); 132  } 133  } 134  135  /** 136  * Adds the given values to the dataset. The stream will be completely consumed by this method. 137  * 138  * @param values a series of values 139  * @since 28.2 140  */ 141  public void addAll(DoubleStream values) { 142  addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll)); 143  } 144  145  /** 146  * Adds the given values to the dataset. The stream will be completely consumed by this method. 147  * 148  * @param values a series of values 149  * @since 28.2 150  */ 151  public void addAll(IntStream values) { 152  addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll)); 153  } 154  155  /** 156  * Adds the given values to the dataset. The stream will be completely consumed by this method. 157  * 158  * @param values a series of values, which will be converted to {@code double} values (this may 159  * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 160  * @since 28.2 161  */ 162  public void addAll(LongStream values) { 163  addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll)); 164  } 165  166  /** 167  * Adds the given statistics to the dataset, as if the individual values used to compute the 168  * statistics had been added directly. 169  */ 170  public void addAll(Stats values) { 171  if (values.count() == 0) { 172  return; 173  } 174  merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max()); 175  } 176  177  /** 178  * Adds the given statistics to the dataset, as if the individual values used to compute the 179  * statistics had been added directly. 180  * 181  * @since 28.2 182  */ 183  public void addAll(StatsAccumulator values) { 184  if (values.count() == 0) { 185  return; 186  } 187  merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max()); 188  } 189  190  private void merge( 191  long otherCount, 192  double otherMean, 193  double otherSumOfSquaresOfDeltas, 194  double otherMin, 195  double otherMax) { 196  if (count == 0) { 197  count = otherCount; 198  mean = otherMean; 199  sumOfSquaresOfDeltas = otherSumOfSquaresOfDeltas; 200  min = otherMin; 201  max = otherMax; 202  } else { 203  count += otherCount; 204  if (isFinite(mean) && isFinite(otherMean)) { 205  // This is a generalized version of the calculation in add(double) above. 206  double delta = otherMean - mean; 207  mean += delta * otherCount / count; 208  sumOfSquaresOfDeltas += otherSumOfSquaresOfDeltas + delta * (otherMean - mean) * otherCount; 209  } else { 210  mean = calculateNewMeanNonFinite(mean, otherMean); 211  sumOfSquaresOfDeltas = NaN; 212  } 213  min = Math.min(min, otherMin); 214  max = Math.max(max, otherMax); 215  } 216  } 217  218  /** Returns an immutable snapshot of the current statistics. */ 219  public Stats snapshot() { 220  return new Stats(count, mean, sumOfSquaresOfDeltas, min, max); 221  } 222  223  /** Returns the number of values. */ 224  public long count() { 225  return count; 226  } 227  228  /** 229  * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 230  * values. The count must be non-zero. 231  * 232  * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of 233  * the arithmetic mean of the population. 234  * 235  * <h3>Non-finite values</h3> 236  * 237  * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 238  * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 239  * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 240  * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 241  * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link 242  * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 243  * 244  * @throws IllegalStateException if the dataset is empty 245  */ 246  public double mean() { 247  checkState(count != 0); 248  return mean; 249  } 250  251  /** 252  * Returns the sum of the values. 253  * 254  * <h3>Non-finite values</h3> 255  * 256  * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 257  * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 258  * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 259  * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 260  * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link 261  * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 262  */ 263  public final double sum() { 264  return mean * count; 265  } 266  267  /** 268  * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population 269  * variance</a> of the values. The count must be non-zero. 270  * 271  * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It 272  * is not guaranteed to return zero when the dataset consists of the same value multiple times, 273  * due to numerical errors. However, it is guaranteed never to return a negative result. 274  * 275  * <h3>Non-finite values</h3> 276  * 277  * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 278  * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 279  * 280  * @throws IllegalStateException if the dataset is empty 281  */ 282  public final double populationVariance() { 283  checkState(count != 0); 284  if (isNaN(sumOfSquaresOfDeltas)) { 285  return NaN; 286  } 287  if (count == 1) { 288  return 0.0; 289  } 290  return ensureNonNegative(sumOfSquaresOfDeltas) / count; 291  } 292  293  /** 294  * Returns the <a 295  * href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values"> 296  * population standard deviation</a> of the values. The count must be non-zero. 297  * 298  * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It 299  * is not guaranteed to return zero when the dataset consists of the same value multiple times, 300  * due to numerical errors. However, it is guaranteed never to return a negative result. 301  * 302  * <h3>Non-finite values</h3> 303  * 304  * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 305  * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 306  * 307  * @throws IllegalStateException if the dataset is empty 308  */ 309  public final double populationStandardDeviation() { 310  return Math.sqrt(populationVariance()); 311  } 312  313  /** 314  * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample 315  * variance</a> of the values. If this dataset is a sample drawn from a population, this is an 316  * unbiased estimator of the population variance of the population. The count must be greater than 317  * one. 318  * 319  * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 320  * times, due to numerical errors. However, it is guaranteed never to return a negative result. 321  * 322  * <h3>Non-finite values</h3> 323  * 324  * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 325  * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 326  * 327  * @throws IllegalStateException if the dataset is empty or contains a single value 328  */ 329  public final double sampleVariance() { 330  checkState(count > 1); 331  if (isNaN(sumOfSquaresOfDeltas)) { 332  return NaN; 333  } 334  return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1); 335  } 336  337  /** 338  * Returns the <a 339  * href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation"> 340  * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a 341  * population, this is an estimator of the population standard deviation of the population which 342  * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on 343  * the distribution). The count must be greater than one. 344  * 345  * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 346  * times, due to numerical errors. However, it is guaranteed never to return a negative result. 347  * 348  * <h3>Non-finite values</h3> 349  * 350  * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 351  * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 352  * 353  * @throws IllegalStateException if the dataset is empty or contains a single value 354  */ 355  public final double sampleStandardDeviation() { 356  return Math.sqrt(sampleVariance()); 357  } 358  359  /** 360  * Returns the lowest value in the dataset. The count must be non-zero. 361  * 362  * <h3>Non-finite values</h3> 363  * 364  * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 365  * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is {@link 366  * Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 367  * only then the result is the lowest finite value. If it contains {@link 368  * Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}. 369  * 370  * @throws IllegalStateException if the dataset is empty 371  */ 372  public double min() { 373  checkState(count != 0); 374  return min; 375  } 376  377  /** 378  * Returns the highest value in the dataset. The count must be non-zero. 379  * 380  * <h3>Non-finite values</h3> 381  * 382  * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 383  * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is {@link 384  * Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite values 385  * only then the result is the highest finite value. If it contains {@link 386  * Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}. 387  * 388  * @throws IllegalStateException if the dataset is empty 389  */ 390  public double max() { 391  checkState(count != 0); 392  return max; 393  } 394  395  double sumOfSquaresOfDeltas() { 396  return sumOfSquaresOfDeltas; 397  } 398  399  /** 400  * Calculates the new value for the accumulated mean when a value is added, in the case where at 401  * least one of the previous mean and the value is non-finite. 402  */ 403  static double calculateNewMeanNonFinite(double previousMean, double value) { 404  /* 405  * Desired behaviour is to match the results of applying the naive mean formula. In particular, 406  * the update formula can subtract infinities in cases where the naive formula would add them. 407  * 408  * Consequently: 409  * 1. If the previous mean is finite and the new value is non-finite then the new mean is that 410  * value (whether it is NaN or infinity). 411  * 2. If the new value is finite and the previous mean is non-finite then the mean is unchanged 412  * (whether it is NaN or infinity). 413  * 3. If both the previous mean and the new value are non-finite and... 414  * 3a. ...either or both is NaN (so mean != value) then the new mean is NaN. 415  * 3b. ...they are both the same infinities (so mean == value) then the mean is unchanged. 416  * 3c. ...they are different infinities (so mean != value) then the new mean is NaN. 417  */ 418  if (isFinite(previousMean)) { 419  // This is case 1. 420  return value; 421  } else if (isFinite(value) || previousMean == value) { 422  // This is case 2. or 3b. 423  return previousMean; 424  } else { 425  // This is case 3a. or 3c. 426  return NaN; 427  } 428  } 429 }