Coverage Summary for Class: TopKSelector (com.google.common.collect)

Class Class, % Method, % Line, %
TopKSelector 0% (0/1) 0% (0/13) 0% (0/73)


1 /* 2  * Copyright (C) 2014 The Guava Authors 3  * 4  * Licensed under the Apache License, Version 2.0 (the "License"); 5  * you may not use this file except in compliance with the License. 6  * You may obtain a copy of the License at 7  * 8  * http://www.apache.org/licenses/LICENSE-2.0 9  * 10  * Unless required by applicable law or agreed to in writing, software 11  * distributed under the License is distributed on an "AS IS" BASIS, 12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13  * See the License for the specific language governing permissions and 14  * limitations under the License. 15  */ 16  17 package com.google.common.collect; 18  19 import static com.google.common.base.Preconditions.checkArgument; 20 import static com.google.common.base.Preconditions.checkNotNull; 21  22 import com.google.common.annotations.GwtCompatible; 23 import com.google.common.math.IntMath; 24 import java.math.RoundingMode; 25 import java.util.Arrays; 26 import java.util.Collections; 27 import java.util.Comparator; 28 import java.util.Iterator; 29 import java.util.List; 30 import java.util.stream.Stream; 31 import org.checkerframework.checker.nullness.qual.Nullable; 32  33 /** 34  * An accumulator that selects the "top" {@code k} elements added to it, relative to a provided 35  * comparator. "Top" can mean the greatest or the lowest elements, specified in the factory used to 36  * create the {@code TopKSelector} instance. 37  * 38  * <p>If your input data is available as a {@link Stream}, prefer passing {@link 39  * Comparators#least(int)} to {@link Stream#collect(java.util.stream.Collector)}. If it is available 40  * as an {@link Iterable} or {@link Iterator}, prefer {@link Ordering#leastOf(Iterable, int)}. 41  * 42  * <p>This uses the same efficient implementation as {@link Ordering#leastOf(Iterable, int)}, 43  * offering expected O(n + k log k) performance (worst case O(n log k)) for n calls to {@link 44  * #offer} and a call to {@link #topK}, with O(k) memory. In comparison, quickselect has the same 45  * asymptotics but requires O(n) memory, and a {@code PriorityQueue} implementation takes O(n log 46  * k). In benchmarks, this implementation performs at least as well as either implementation, and 47  * degrades more gracefully for worst-case input. 48  * 49  * <p>The implementation does not necessarily use a <i>stable</i> sorting algorithm; when multiple 50  * equivalent elements are added to it, it is undefined which will come first in the output. 51  * 52  * @author Louis Wasserman 53  */ 54 @GwtCompatible 55 final class TopKSelector<T> { 56  57  /** 58  * Returns a {@code TopKSelector} that collects the lowest {@code k} elements added to it, 59  * relative to the natural ordering of the elements, and returns them via {@link #topK} in 60  * ascending order. 61  * 62  * @throws IllegalArgumentException if {@code k < 0} or {@code k > Integer.MAX_VALUE / 2} 63  */ 64  public static <T extends Comparable<? super T>> TopKSelector<T> least(int k) { 65  return least(k, Ordering.natural()); 66  } 67  68  /** 69  * Returns a {@code TopKSelector} that collects the lowest {@code k} elements added to it, 70  * relative to the specified comparator, and returns them via {@link #topK} in ascending order. 71  * 72  * @throws IllegalArgumentException if {@code k < 0} or {@code k > Integer.MAX_VALUE / 2} 73  */ 74  public static <T> TopKSelector<T> least(int k, Comparator<? super T> comparator) { 75  return new TopKSelector<T>(comparator, k); 76  } 77  78  /** 79  * Returns a {@code TopKSelector} that collects the greatest {@code k} elements added to it, 80  * relative to the natural ordering of the elements, and returns them via {@link #topK} in 81  * descending order. 82  * 83  * @throws IllegalArgumentException if {@code k < 0} or {@code k > Integer.MAX_VALUE / 2} 84  */ 85  public static <T extends Comparable<? super T>> TopKSelector<T> greatest(int k) { 86  return greatest(k, Ordering.natural()); 87  } 88  89  /** 90  * Returns a {@code TopKSelector} that collects the greatest {@code k} elements added to it, 91  * relative to the specified comparator, and returns them via {@link #topK} in descending order. 92  * 93  * @throws IllegalArgumentException if {@code k < 0} or {@code k > Integer.MAX_VALUE / 2} 94  */ 95  public static <T> TopKSelector<T> greatest(int k, Comparator<? super T> comparator) { 96  return new TopKSelector<T>(Ordering.from(comparator).reverse(), k); 97  } 98  99  private final int k; 100  private final Comparator<? super T> comparator; 101  102  /* 103  * We are currently considering the elements in buffer in the range [0, bufferSize) as candidates 104  * for the top k elements. Whenever the buffer is filled, we quickselect the top k elements to the 105  * range [0, k) and ignore the remaining elements. 106  */ 107  private final T[] buffer; 108  private int bufferSize; 109  110  /** 111  * The largest of the lowest k elements we've seen so far relative to this comparator. If 112  * bufferSize ? k, then we can ignore any elements greater than this value. 113  */ 114  private @Nullable T threshold; 115  116  private TopKSelector(Comparator<? super T> comparator, int k) { 117  this.comparator = checkNotNull(comparator, "comparator"); 118  this.k = k; 119  checkArgument(k >= 0, "k (%s) must be >= 0", k); 120  checkArgument(k <= Integer.MAX_VALUE / 2, "k (%s) must be <= Integer.MAX_VALUE / 2", k); 121  this.buffer = (T[]) new Object[IntMath.checkedMultiply(k, 2)]; 122  this.bufferSize = 0; 123  this.threshold = null; 124  } 125  126  /** 127  * Adds {@code elem} as a candidate for the top {@code k} elements. This operation takes amortized 128  * O(1) time. 129  */ 130  public void offer(@Nullable T elem) { 131  if (k == 0) { 132  return; 133  } else if (bufferSize == 0) { 134  buffer[0] = elem; 135  threshold = elem; 136  bufferSize = 1; 137  } else if (bufferSize < k) { 138  buffer[bufferSize++] = elem; 139  if (comparator.compare(elem, threshold) > 0) { 140  threshold = elem; 141  } 142  } else if (comparator.compare(elem, threshold) < 0) { 143  // Otherwise, we can ignore elem; we've seen k better elements. 144  buffer[bufferSize++] = elem; 145  if (bufferSize == 2 * k) { 146  trim(); 147  } 148  } 149  } 150  151  /** 152  * Quickselects the top k elements from the 2k elements in the buffer. O(k) expected time, O(k log 153  * k) worst case. 154  */ 155  private void trim() { 156  int left = 0; 157  int right = 2 * k - 1; 158  159  int minThresholdPosition = 0; 160  // The leftmost position at which the greatest of the k lower elements 161  // -- the new value of threshold -- might be found. 162  163  int iterations = 0; 164  int maxIterations = IntMath.log2(right - left, RoundingMode.CEILING) * 3; 165  while (left < right) { 166  int pivotIndex = (left + right + 1) >>> 1; 167  168  int pivotNewIndex = partition(left, right, pivotIndex); 169  170  if (pivotNewIndex > k) { 171  right = pivotNewIndex - 1; 172  } else if (pivotNewIndex < k) { 173  left = Math.max(pivotNewIndex, left + 1); 174  minThresholdPosition = pivotNewIndex; 175  } else { 176  break; 177  } 178  iterations++; 179  if (iterations >= maxIterations) { 180  // We've already taken O(k log k), let's make sure we don't take longer than O(k log k). 181  Arrays.sort(buffer, left, right, comparator); 182  break; 183  } 184  } 185  bufferSize = k; 186  187  threshold = buffer[minThresholdPosition]; 188  for (int i = minThresholdPosition + 1; i < k; i++) { 189  if (comparator.compare(buffer[i], threshold) > 0) { 190  threshold = buffer[i]; 191  } 192  } 193  } 194  195  /** 196  * Partitions the contents of buffer in the range [left, right] around the pivot element 197  * previously stored in buffer[pivotValue]. Returns the new index of the pivot element, 198  * pivotNewIndex, so that everything in [left, pivotNewIndex] is ? pivotValue and everything in 199  * (pivotNewIndex, right] is greater than pivotValue. 200  */ 201  private int partition(int left, int right, int pivotIndex) { 202  T pivotValue = buffer[pivotIndex]; 203  buffer[pivotIndex] = buffer[right]; 204  205  int pivotNewIndex = left; 206  for (int i = left; i < right; i++) { 207  if (comparator.compare(buffer[i], pivotValue) < 0) { 208  swap(pivotNewIndex, i); 209  pivotNewIndex++; 210  } 211  } 212  buffer[right] = buffer[pivotNewIndex]; 213  buffer[pivotNewIndex] = pivotValue; 214  return pivotNewIndex; 215  } 216  217  private void swap(int i, int j) { 218  T tmp = buffer[i]; 219  buffer[i] = buffer[j]; 220  buffer[j] = tmp; 221  } 222  223  TopKSelector<T> combine(TopKSelector<T> other) { 224  for (int i = 0; i < other.bufferSize; i++) { 225  this.offer(other.buffer[i]); 226  } 227  return this; 228  } 229  230  /** 231  * Adds each member of {@code elements} as a candidate for the top {@code k} elements. This 232  * operation takes amortized linear time in the length of {@code elements}. 233  * 234  * <p>If all input data to this {@code TopKSelector} is in a single {@code Iterable}, prefer 235  * {@link Ordering#leastOf(Iterable, int)}, which provides a simpler API for that use case. 236  */ 237  public void offerAll(Iterable<? extends T> elements) { 238  offerAll(elements.iterator()); 239  } 240  241  /** 242  * Adds each member of {@code elements} as a candidate for the top {@code k} elements. This 243  * operation takes amortized linear time in the length of {@code elements}. The iterator is 244  * consumed after this operation completes. 245  * 246  * <p>If all input data to this {@code TopKSelector} is in a single {@code Iterator}, prefer 247  * {@link Ordering#leastOf(Iterator, int)}, which provides a simpler API for that use case. 248  */ 249  public void offerAll(Iterator<? extends T> elements) { 250  while (elements.hasNext()) { 251  offer(elements.next()); 252  } 253  } 254  255  /** 256  * Returns the top {@code k} elements offered to this {@code TopKSelector}, or all elements if 257  * fewer than {@code k} have been offered, in the order specified by the factory used to create 258  * this {@code TopKSelector}. 259  * 260  * <p>The returned list is an unmodifiable copy and will not be affected by further changes to 261  * this {@code TopKSelector}. This method returns in O(k log k) time. 262  */ 263  public List<T> topK() { 264  Arrays.sort(buffer, 0, bufferSize, comparator); 265  if (bufferSize > k) { 266  Arrays.fill(buffer, k, buffer.length, null); 267  bufferSize = k; 268  threshold = buffer[k - 1]; 269  } 270  // we have to support null elements, so no ImmutableList for us 271  return Collections.unmodifiableList(Arrays.asList(Arrays.copyOf(buffer, bufferSize))); 272  } 273 }