Coverage Summary for Class: TopKSelector (com.google.common.collect)
| Class | Class, % | Method, % | Line, % |
|---|---|---|---|
| TopKSelector | 0% (0/1) | 0% (0/13) | 0% (0/73) |
1 /* 2 * Copyright (C) 2014 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.common.collect; 18 19 import static com.google.common.base.Preconditions.checkArgument; 20 import static com.google.common.base.Preconditions.checkNotNull; 21 22 import com.google.common.annotations.GwtCompatible; 23 import com.google.common.math.IntMath; 24 import java.math.RoundingMode; 25 import java.util.Arrays; 26 import java.util.Collections; 27 import java.util.Comparator; 28 import java.util.Iterator; 29 import java.util.List; 30 import java.util.stream.Stream; 31 import org.checkerframework.checker.nullness.qual.Nullable; 32 33 /** 34 * An accumulator that selects the "top" {@code k} elements added to it, relative to a provided 35 * comparator. "Top" can mean the greatest or the lowest elements, specified in the factory used to 36 * create the {@code TopKSelector} instance. 37 * 38 * <p>If your input data is available as a {@link Stream}, prefer passing {@link 39 * Comparators#least(int)} to {@link Stream#collect(java.util.stream.Collector)}. If it is available 40 * as an {@link Iterable} or {@link Iterator}, prefer {@link Ordering#leastOf(Iterable, int)}. 41 * 42 * <p>This uses the same efficient implementation as {@link Ordering#leastOf(Iterable, int)}, 43 * offering expected O(n + k log k) performance (worst case O(n log k)) for n calls to {@link 44 * #offer} and a call to {@link #topK}, with O(k) memory. In comparison, quickselect has the same 45 * asymptotics but requires O(n) memory, and a {@code PriorityQueue} implementation takes O(n log 46 * k). In benchmarks, this implementation performs at least as well as either implementation, and 47 * degrades more gracefully for worst-case input. 48 * 49 * <p>The implementation does not necessarily use a <i>stable</i> sorting algorithm; when multiple 50 * equivalent elements are added to it, it is undefined which will come first in the output. 51 * 52 * @author Louis Wasserman 53 */ 54 @GwtCompatible 55 final class TopKSelector<T> { 56 57 /** 58 * Returns a {@code TopKSelector} that collects the lowest {@code k} elements added to it, 59 * relative to the natural ordering of the elements, and returns them via {@link #topK} in 60 * ascending order. 61 * 62 * @throws IllegalArgumentException if {@code k < 0} or {@code k > Integer.MAX_VALUE / 2} 63 */ 64 public static <T extends Comparable<? super T>> TopKSelector<T> least(int k) { 65 return least(k, Ordering.natural()); 66 } 67 68 /** 69 * Returns a {@code TopKSelector} that collects the lowest {@code k} elements added to it, 70 * relative to the specified comparator, and returns them via {@link #topK} in ascending order. 71 * 72 * @throws IllegalArgumentException if {@code k < 0} or {@code k > Integer.MAX_VALUE / 2} 73 */ 74 public static <T> TopKSelector<T> least(int k, Comparator<? super T> comparator) { 75 return new TopKSelector<T>(comparator, k); 76 } 77 78 /** 79 * Returns a {@code TopKSelector} that collects the greatest {@code k} elements added to it, 80 * relative to the natural ordering of the elements, and returns them via {@link #topK} in 81 * descending order. 82 * 83 * @throws IllegalArgumentException if {@code k < 0} or {@code k > Integer.MAX_VALUE / 2} 84 */ 85 public static <T extends Comparable<? super T>> TopKSelector<T> greatest(int k) { 86 return greatest(k, Ordering.natural()); 87 } 88 89 /** 90 * Returns a {@code TopKSelector} that collects the greatest {@code k} elements added to it, 91 * relative to the specified comparator, and returns them via {@link #topK} in descending order. 92 * 93 * @throws IllegalArgumentException if {@code k < 0} or {@code k > Integer.MAX_VALUE / 2} 94 */ 95 public static <T> TopKSelector<T> greatest(int k, Comparator<? super T> comparator) { 96 return new TopKSelector<T>(Ordering.from(comparator).reverse(), k); 97 } 98 99 private final int k; 100 private final Comparator<? super T> comparator; 101 102 /* 103 * We are currently considering the elements in buffer in the range [0, bufferSize) as candidates 104 * for the top k elements. Whenever the buffer is filled, we quickselect the top k elements to the 105 * range [0, k) and ignore the remaining elements. 106 */ 107 private final T[] buffer; 108 private int bufferSize; 109 110 /** 111 * The largest of the lowest k elements we've seen so far relative to this comparator. If 112 * bufferSize ? k, then we can ignore any elements greater than this value. 113 */ 114 private @Nullable T threshold; 115 116 private TopKSelector(Comparator<? super T> comparator, int k) { 117 this.comparator = checkNotNull(comparator, "comparator"); 118 this.k = k; 119 checkArgument(k >= 0, "k (%s) must be >= 0", k); 120 checkArgument(k <= Integer.MAX_VALUE / 2, "k (%s) must be <= Integer.MAX_VALUE / 2", k); 121 this.buffer = (T[]) new Object[IntMath.checkedMultiply(k, 2)]; 122 this.bufferSize = 0; 123 this.threshold = null; 124 } 125 126 /** 127 * Adds {@code elem} as a candidate for the top {@code k} elements. This operation takes amortized 128 * O(1) time. 129 */ 130 public void offer(@Nullable T elem) { 131 if (k == 0) { 132 return; 133 } else if (bufferSize == 0) { 134 buffer[0] = elem; 135 threshold = elem; 136 bufferSize = 1; 137 } else if (bufferSize < k) { 138 buffer[bufferSize++] = elem; 139 if (comparator.compare(elem, threshold) > 0) { 140 threshold = elem; 141 } 142 } else if (comparator.compare(elem, threshold) < 0) { 143 // Otherwise, we can ignore elem; we've seen k better elements. 144 buffer[bufferSize++] = elem; 145 if (bufferSize == 2 * k) { 146 trim(); 147 } 148 } 149 } 150 151 /** 152 * Quickselects the top k elements from the 2k elements in the buffer. O(k) expected time, O(k log 153 * k) worst case. 154 */ 155 private void trim() { 156 int left = 0; 157 int right = 2 * k - 1; 158 159 int minThresholdPosition = 0; 160 // The leftmost position at which the greatest of the k lower elements 161 // -- the new value of threshold -- might be found. 162 163 int iterations = 0; 164 int maxIterations = IntMath.log2(right - left, RoundingMode.CEILING) * 3; 165 while (left < right) { 166 int pivotIndex = (left + right + 1) >>> 1; 167 168 int pivotNewIndex = partition(left, right, pivotIndex); 169 170 if (pivotNewIndex > k) { 171 right = pivotNewIndex - 1; 172 } else if (pivotNewIndex < k) { 173 left = Math.max(pivotNewIndex, left + 1); 174 minThresholdPosition = pivotNewIndex; 175 } else { 176 break; 177 } 178 iterations++; 179 if (iterations >= maxIterations) { 180 // We've already taken O(k log k), let's make sure we don't take longer than O(k log k). 181 Arrays.sort(buffer, left, right, comparator); 182 break; 183 } 184 } 185 bufferSize = k; 186 187 threshold = buffer[minThresholdPosition]; 188 for (int i = minThresholdPosition + 1; i < k; i++) { 189 if (comparator.compare(buffer[i], threshold) > 0) { 190 threshold = buffer[i]; 191 } 192 } 193 } 194 195 /** 196 * Partitions the contents of buffer in the range [left, right] around the pivot element 197 * previously stored in buffer[pivotValue]. Returns the new index of the pivot element, 198 * pivotNewIndex, so that everything in [left, pivotNewIndex] is ? pivotValue and everything in 199 * (pivotNewIndex, right] is greater than pivotValue. 200 */ 201 private int partition(int left, int right, int pivotIndex) { 202 T pivotValue = buffer[pivotIndex]; 203 buffer[pivotIndex] = buffer[right]; 204 205 int pivotNewIndex = left; 206 for (int i = left; i < right; i++) { 207 if (comparator.compare(buffer[i], pivotValue) < 0) { 208 swap(pivotNewIndex, i); 209 pivotNewIndex++; 210 } 211 } 212 buffer[right] = buffer[pivotNewIndex]; 213 buffer[pivotNewIndex] = pivotValue; 214 return pivotNewIndex; 215 } 216 217 private void swap(int i, int j) { 218 T tmp = buffer[i]; 219 buffer[i] = buffer[j]; 220 buffer[j] = tmp; 221 } 222 223 TopKSelector<T> combine(TopKSelector<T> other) { 224 for (int i = 0; i < other.bufferSize; i++) { 225 this.offer(other.buffer[i]); 226 } 227 return this; 228 } 229 230 /** 231 * Adds each member of {@code elements} as a candidate for the top {@code k} elements. This 232 * operation takes amortized linear time in the length of {@code elements}. 233 * 234 * <p>If all input data to this {@code TopKSelector} is in a single {@code Iterable}, prefer 235 * {@link Ordering#leastOf(Iterable, int)}, which provides a simpler API for that use case. 236 */ 237 public void offerAll(Iterable<? extends T> elements) { 238 offerAll(elements.iterator()); 239 } 240 241 /** 242 * Adds each member of {@code elements} as a candidate for the top {@code k} elements. This 243 * operation takes amortized linear time in the length of {@code elements}. The iterator is 244 * consumed after this operation completes. 245 * 246 * <p>If all input data to this {@code TopKSelector} is in a single {@code Iterator}, prefer 247 * {@link Ordering#leastOf(Iterator, int)}, which provides a simpler API for that use case. 248 */ 249 public void offerAll(Iterator<? extends T> elements) { 250 while (elements.hasNext()) { 251 offer(elements.next()); 252 } 253 } 254 255 /** 256 * Returns the top {@code k} elements offered to this {@code TopKSelector}, or all elements if 257 * fewer than {@code k} have been offered, in the order specified by the factory used to create 258 * this {@code TopKSelector}. 259 * 260 * <p>The returned list is an unmodifiable copy and will not be affected by further changes to 261 * this {@code TopKSelector}. This method returns in O(k log k) time. 262 */ 263 public List<T> topK() { 264 Arrays.sort(buffer, 0, bufferSize, comparator); 265 if (bufferSize > k) { 266 Arrays.fill(buffer, k, buffer.length, null); 267 bufferSize = k; 268 threshold = buffer[k - 1]; 269 } 270 // we have to support null elements, so no ImmutableList for us 271 return Collections.unmodifiableList(Arrays.asList(Arrays.copyOf(buffer, bufferSize))); 272 } 273 }