Coverage Summary for Class: PairedStats (com.google.common.math)

Class Class, % Method, % Line, %
PairedStats 0% (0/1) 0% (0/16) 0% (0/72)


1 /* 2  * Copyright (C) 2012 The Guava Authors 3  * 4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5  * in compliance with the License. You may obtain a copy of the License at 6  * 7  * http://www.apache.org/licenses/LICENSE-2.0 8  * 9  * Unless required by applicable law or agreed to in writing, software distributed under the License 10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11  * or implied. See the License for the specific language governing permissions and limitations under 12  * the License. 13  */ 14  15 package com.google.common.math; 16  17 import static com.google.common.base.Preconditions.checkArgument; 18 import static com.google.common.base.Preconditions.checkNotNull; 19 import static com.google.common.base.Preconditions.checkState; 20 import static java.lang.Double.NaN; 21 import static java.lang.Double.doubleToLongBits; 22 import static java.lang.Double.isNaN; 23  24 import com.google.common.annotations.Beta; 25 import com.google.common.annotations.GwtIncompatible; 26 import com.google.common.base.MoreObjects; 27 import com.google.common.base.Objects; 28 import java.io.Serializable; 29 import java.nio.ByteBuffer; 30 import java.nio.ByteOrder; 31 import javax.annotation.CheckForNull; 32  33 /** 34  * An immutable value object capturing some basic statistics about a collection of paired double 35  * values (e.g. points on a plane). Build instances with {@link PairedStatsAccumulator#snapshot}. 36  * 37  * @author Pete Gillin 38  * @since 20.0 39  */ 40 @Beta 41 @GwtIncompatible 42 @ElementTypesAreNonnullByDefault 43 public final class PairedStats implements Serializable { 44  45  private final Stats xStats; 46  private final Stats yStats; 47  private final double sumOfProductsOfDeltas; 48  49  /** 50  * Internal constructor. Users should use {@link PairedStatsAccumulator#snapshot}. 51  * 52  * <p>To ensure that the created instance obeys its contract, the parameters should satisfy the 53  * following constraints. This is the callers responsibility and is not enforced here. 54  * 55  * <ul> 56  * <li>Both {@code xStats} and {@code yStats} must have the same {@code count}. 57  * <li>If that {@code count} is 1, {@code sumOfProductsOfDeltas} must be exactly 0.0. 58  * <li>If that {@code count} is more than 1, {@code sumOfProductsOfDeltas} must be finite. 59  * </ul> 60  */ 61  PairedStats(Stats xStats, Stats yStats, double sumOfProductsOfDeltas) { 62  this.xStats = xStats; 63  this.yStats = yStats; 64  this.sumOfProductsOfDeltas = sumOfProductsOfDeltas; 65  } 66  67  /** Returns the number of pairs in the dataset. */ 68  public long count() { 69  return xStats.count(); 70  } 71  72  /** Returns the statistics on the {@code x} values alone. */ 73  public Stats xStats() { 74  return xStats; 75  } 76  77  /** Returns the statistics on the {@code y} values alone. */ 78  public Stats yStats() { 79  return yStats; 80  } 81  82  /** 83  * Returns the population covariance of the values. The count must be non-zero. 84  * 85  * <p>This is guaranteed to return zero if the dataset contains a single pair of finite values. It 86  * is not guaranteed to return zero when the dataset consists of the same pair of values multiple 87  * times, due to numerical errors. 88  * 89  * <h3>Non-finite values</h3> 90  * 91  * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 92  * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 93  * 94  * @throws IllegalStateException if the dataset is empty 95  */ 96  public double populationCovariance() { 97  checkState(count() != 0); 98  return sumOfProductsOfDeltas / count(); 99  } 100  101  /** 102  * Returns the sample covariance of the values. The count must be greater than one. 103  * 104  * <p>This is not guaranteed to return zero when the dataset consists of the same pair of values 105  * multiple times, due to numerical errors. 106  * 107  * <h3>Non-finite values</h3> 108  * 109  * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 110  * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 111  * 112  * @throws IllegalStateException if the dataset is empty or contains a single pair of values 113  */ 114  public double sampleCovariance() { 115  checkState(count() > 1); 116  return sumOfProductsOfDeltas / (count() - 1); 117  } 118  119  /** 120  * Returns the <a href="http://mathworld.wolfram.com/CorrelationCoefficient.html">Pearson's or 121  * product-moment correlation coefficient</a> of the values. The count must greater than one, and 122  * the {@code x} and {@code y} values must both have non-zero population variance (i.e. {@code 123  * xStats().populationVariance() > 0.0 && yStats().populationVariance() > 0.0}). The result is not 124  * guaranteed to be exactly +/-1 even when the data are perfectly (anti-)correlated, due to 125  * numerical errors. However, it is guaranteed to be in the inclusive range [-1, +1]. 126  * 127  * <h3>Non-finite values</h3> 128  * 129  * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 130  * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 131  * 132  * @throws IllegalStateException if the dataset is empty or contains a single pair of values, or 133  * either the {@code x} and {@code y} dataset has zero population variance 134  */ 135  public double pearsonsCorrelationCoefficient() { 136  checkState(count() > 1); 137  if (isNaN(sumOfProductsOfDeltas)) { 138  return NaN; 139  } 140  double xSumOfSquaresOfDeltas = xStats().sumOfSquaresOfDeltas(); 141  double ySumOfSquaresOfDeltas = yStats().sumOfSquaresOfDeltas(); 142  checkState(xSumOfSquaresOfDeltas > 0.0); 143  checkState(ySumOfSquaresOfDeltas > 0.0); 144  // The product of two positive numbers can be zero if the multiplication underflowed. We 145  // force a positive value by effectively rounding up to MIN_VALUE. 146  double productOfSumsOfSquaresOfDeltas = 147  ensurePositive(xSumOfSquaresOfDeltas * ySumOfSquaresOfDeltas); 148  return ensureInUnitRange(sumOfProductsOfDeltas / Math.sqrt(productOfSumsOfSquaresOfDeltas)); 149  } 150  151  /** 152  * Returns a linear transformation giving the best fit to the data according to <a 153  * href="http://mathworld.wolfram.com/LeastSquaresFitting.html">Ordinary Least Squares linear 154  * regression</a> of {@code y} as a function of {@code x}. The count must be greater than one, and 155  * either the {@code x} or {@code y} data must have a non-zero population variance (i.e. {@code 156  * xStats().populationVariance() > 0.0 || yStats().populationVariance() > 0.0}). The result is 157  * guaranteed to be horizontal if there is variance in the {@code x} data but not the {@code y} 158  * data, and vertical if there is variance in the {@code y} data but not the {@code x} data. 159  * 160  * <p>This fit minimizes the root-mean-square error in {@code y} as a function of {@code x}. This 161  * error is defined as the square root of the mean of the squares of the differences between the 162  * actual {@code y} values of the data and the values predicted by the fit for the {@code x} 163  * values (i.e. it is the square root of the mean of the squares of the vertical distances between 164  * the data points and the best fit line). For this fit, this error is a fraction {@code sqrt(1 - 165  * R*R)} of the population standard deviation of {@code y}, where {@code R} is the Pearson's 166  * correlation coefficient (as given by {@link #pearsonsCorrelationCoefficient()}). 167  * 168  * <p>The corresponding root-mean-square error in {@code x} as a function of {@code y} is a 169  * fraction {@code sqrt(1/(R*R) - 1)} of the population standard deviation of {@code x}. This fit 170  * does not normally minimize that error: to do that, you should swap the roles of {@code x} and 171  * {@code y}. 172  * 173  * <h3>Non-finite values</h3> 174  * 175  * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 176  * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link 177  * LinearTransformation#forNaN()}. 178  * 179  * @throws IllegalStateException if the dataset is empty or contains a single pair of values, or 180  * both the {@code x} and {@code y} dataset must have zero population variance 181  */ 182  public LinearTransformation leastSquaresFit() { 183  checkState(count() > 1); 184  if (isNaN(sumOfProductsOfDeltas)) { 185  return LinearTransformation.forNaN(); 186  } 187  double xSumOfSquaresOfDeltas = xStats.sumOfSquaresOfDeltas(); 188  if (xSumOfSquaresOfDeltas > 0.0) { 189  if (yStats.sumOfSquaresOfDeltas() > 0.0) { 190  return LinearTransformation.mapping(xStats.mean(), yStats.mean()) 191  .withSlope(sumOfProductsOfDeltas / xSumOfSquaresOfDeltas); 192  } else { 193  return LinearTransformation.horizontal(yStats.mean()); 194  } 195  } else { 196  checkState(yStats.sumOfSquaresOfDeltas() > 0.0); 197  return LinearTransformation.vertical(xStats.mean()); 198  } 199  } 200  201  /** 202  * {@inheritDoc} 203  * 204  * <p><b>Note:</b> This tests exact equality of the calculated statistics, including the floating 205  * point values. Two instances are guaranteed to be considered equal if one is copied from the 206  * other using {@code second = new PairedStatsAccumulator().addAll(first).snapshot()}, if both 207  * were obtained by calling {@code snapshot()} on the same {@link PairedStatsAccumulator} without 208  * adding any values in between the two calls, or if one is obtained from the other after 209  * round-tripping through java serialization. However, floating point rounding errors mean that it 210  * may be false for some instances where the statistics are mathematically equal, including 211  * instances constructed from the same values in a different order... or (in the general case) 212  * even in the same order. (It is guaranteed to return true for instances constructed from the 213  * same values in the same order if {@code strictfp} is in effect, or if the system architecture 214  * guarantees {@code strictfp}-like semantics.) 215  */ 216  @Override 217  public boolean equals(@CheckForNull Object obj) { 218  if (obj == null) { 219  return false; 220  } 221  if (getClass() != obj.getClass()) { 222  return false; 223  } 224  PairedStats other = (PairedStats) obj; 225  return xStats.equals(other.xStats) 226  && yStats.equals(other.yStats) 227  && doubleToLongBits(sumOfProductsOfDeltas) == doubleToLongBits(other.sumOfProductsOfDeltas); 228  } 229  230  /** 231  * {@inheritDoc} 232  * 233  * <p><b>Note:</b> This hash code is consistent with exact equality of the calculated statistics, 234  * including the floating point values. See the note on {@link #equals} for details. 235  */ 236  @Override 237  public int hashCode() { 238  return Objects.hashCode(xStats, yStats, sumOfProductsOfDeltas); 239  } 240  241  @Override 242  public String toString() { 243  if (count() > 0) { 244  return MoreObjects.toStringHelper(this) 245  .add("xStats", xStats) 246  .add("yStats", yStats) 247  .add("populationCovariance", populationCovariance()) 248  .toString(); 249  } else { 250  return MoreObjects.toStringHelper(this) 251  .add("xStats", xStats) 252  .add("yStats", yStats) 253  .toString(); 254  } 255  } 256  257  double sumOfProductsOfDeltas() { 258  return sumOfProductsOfDeltas; 259  } 260  261  private static double ensurePositive(double value) { 262  if (value > 0.0) { 263  return value; 264  } else { 265  return Double.MIN_VALUE; 266  } 267  } 268  269  private static double ensureInUnitRange(double value) { 270  if (value >= 1.0) { 271  return 1.0; 272  } 273  if (value <= -1.0) { 274  return -1.0; 275  } 276  return value; 277  } 278  279  // Serialization helpers 280  281  /** The size of byte array representation in bytes. */ 282  private static final int BYTES = Stats.BYTES * 2 + Double.SIZE / Byte.SIZE; 283  284  /** 285  * Gets a byte array representation of this instance. 286  * 287  * <p><b>Note:</b> No guarantees are made regarding stability of the representation between 288  * versions. 289  */ 290  public byte[] toByteArray() { 291  ByteBuffer buffer = ByteBuffer.allocate(BYTES).order(ByteOrder.LITTLE_ENDIAN); 292  xStats.writeTo(buffer); 293  yStats.writeTo(buffer); 294  buffer.putDouble(sumOfProductsOfDeltas); 295  return buffer.array(); 296  } 297  298  /** 299  * Creates a {@link PairedStats} instance from the given byte representation which was obtained by 300  * {@link #toByteArray}. 301  * 302  * <p><b>Note:</b> No guarantees are made regarding stability of the representation between 303  * versions. 304  */ 305  public static PairedStats fromByteArray(byte[] byteArray) { 306  checkNotNull(byteArray); 307  checkArgument( 308  byteArray.length == BYTES, 309  "Expected PairedStats.BYTES = %s, got %s", 310  BYTES, 311  byteArray.length); 312  ByteBuffer buffer = ByteBuffer.wrap(byteArray).order(ByteOrder.LITTLE_ENDIAN); 313  Stats xStats = Stats.readFrom(buffer); 314  Stats yStats = Stats.readFrom(buffer); 315  double sumOfProductsOfDeltas = buffer.getDouble(); 316  return new PairedStats(xStats, yStats, sumOfProductsOfDeltas); 317  } 318  319  private static final long serialVersionUID = 0; 320 }