Coverage Summary for Class: BloomFilterStrategies (com.google.common.hash)
| Class | Method, % | Line, % |
|---|---|---|
| BloomFilterStrategies | 0% (0/2) | 0% (0/4) |
| BloomFilterStrategies$1 | 0% (0/3) | 0% (0/23) |
| BloomFilterStrategies$2 | 0% (0/5) | 0% (0/23) |
| BloomFilterStrategies$LockFreeBitArray | 0% (0/11) | 0% (0/53) |
| Total | 0% (0/21) | 0% (0/103) |
1 /* 2 * Copyright (C) 2011 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 * in compliance with the License. You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software distributed under the License 10 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 * or implied. See the License for the specific language governing permissions and limitations under 12 * the License. 13 */ 14 15 package com.google.common.hash; 16 17 import static com.google.common.base.Preconditions.checkArgument; 18 19 import com.google.common.math.LongMath; 20 import com.google.common.primitives.Ints; 21 import com.google.common.primitives.Longs; 22 import java.math.RoundingMode; 23 import java.util.Arrays; 24 import java.util.concurrent.atomic.AtomicLongArray; 25 import javax.annotation.CheckForNull; 26 import org.checkerframework.checker.nullness.qual.Nullable; 27 28 /** 29 * Collections of strategies of generating the k * log(M) bits required for an element to be mapped 30 * to a BloomFilter of M bits and k hash functions. These strategies are part of the serialized form 31 * of the Bloom filters that use them, thus they must be preserved as is (no updates allowed, only 32 * introduction of new versions). 33 * 34 * <p>Important: the order of the constants cannot change, and they cannot be deleted - we depend on 35 * their ordinal for BloomFilter serialization. 36 * 37 * @author Dimitris Andreou 38 * @author Kurt Alfred Kluever 39 */ 40 @ElementTypesAreNonnullByDefault 41 enum BloomFilterStrategies implements BloomFilter.Strategy { 42 /** 43 * See "Less Hashing, Same Performance: Building a Better Bloom Filter" by Adam Kirsch and Michael 44 * Mitzenmacher. The paper argues that this trick doesn't significantly deteriorate the 45 * performance of a Bloom filter (yet only needs two 32bit hash functions). 46 */ 47 MURMUR128_MITZ_32() { 48 @Override 49 public <T extends @Nullable Object> boolean put( 50 @ParametricNullness T object, 51 Funnel<? super T> funnel, 52 int numHashFunctions, 53 LockFreeBitArray bits) { 54 long bitSize = bits.bitSize(); 55 long hash64 = Hashing.murmur3_128().hashObject(object, funnel).asLong(); 56 int hash1 = (int) hash64; 57 int hash2 = (int) (hash64 >>> 32); 58 59 boolean bitsChanged = false; 60 for (int i = 1; i <= numHashFunctions; i++) { 61 int combinedHash = hash1 + (i * hash2); 62 // Flip all the bits if it's negative (guaranteed positive number) 63 if (combinedHash < 0) { 64 combinedHash = ~combinedHash; 65 } 66 bitsChanged |= bits.set(combinedHash % bitSize); 67 } 68 return bitsChanged; 69 } 70 71 @Override 72 public <T extends @Nullable Object> boolean mightContain( 73 @ParametricNullness T object, 74 Funnel<? super T> funnel, 75 int numHashFunctions, 76 LockFreeBitArray bits) { 77 long bitSize = bits.bitSize(); 78 long hash64 = Hashing.murmur3_128().hashObject(object, funnel).asLong(); 79 int hash1 = (int) hash64; 80 int hash2 = (int) (hash64 >>> 32); 81 82 for (int i = 1; i <= numHashFunctions; i++) { 83 int combinedHash = hash1 + (i * hash2); 84 // Flip all the bits if it's negative (guaranteed positive number) 85 if (combinedHash < 0) { 86 combinedHash = ~combinedHash; 87 } 88 if (!bits.get(combinedHash % bitSize)) { 89 return false; 90 } 91 } 92 return true; 93 } 94 }, 95 /** 96 * This strategy uses all 128 bits of {@link Hashing#murmur3_128} when hashing. It looks different 97 * than the implementation in MURMUR128_MITZ_32 because we're avoiding the multiplication in the 98 * loop and doing a (much simpler) += hash2. We're also changing the index to a positive number by 99 * AND'ing with Long.MAX_VALUE instead of flipping the bits. 100 */ 101 MURMUR128_MITZ_64() { 102 @Override 103 public <T extends @Nullable Object> boolean put( 104 @ParametricNullness T object, 105 Funnel<? super T> funnel, 106 int numHashFunctions, 107 LockFreeBitArray bits) { 108 long bitSize = bits.bitSize(); 109 byte[] bytes = Hashing.murmur3_128().hashObject(object, funnel).getBytesInternal(); 110 long hash1 = lowerEight(bytes); 111 long hash2 = upperEight(bytes); 112 113 boolean bitsChanged = false; 114 long combinedHash = hash1; 115 for (int i = 0; i < numHashFunctions; i++) { 116 // Make the combined hash positive and indexable 117 bitsChanged |= bits.set((combinedHash & Long.MAX_VALUE) % bitSize); 118 combinedHash += hash2; 119 } 120 return bitsChanged; 121 } 122 123 @Override 124 public <T extends @Nullable Object> boolean mightContain( 125 @ParametricNullness T object, 126 Funnel<? super T> funnel, 127 int numHashFunctions, 128 LockFreeBitArray bits) { 129 long bitSize = bits.bitSize(); 130 byte[] bytes = Hashing.murmur3_128().hashObject(object, funnel).getBytesInternal(); 131 long hash1 = lowerEight(bytes); 132 long hash2 = upperEight(bytes); 133 134 long combinedHash = hash1; 135 for (int i = 0; i < numHashFunctions; i++) { 136 // Make the combined hash positive and indexable 137 if (!bits.get((combinedHash & Long.MAX_VALUE) % bitSize)) { 138 return false; 139 } 140 combinedHash += hash2; 141 } 142 return true; 143 } 144 145 private /* static */ long lowerEight(byte[] bytes) { 146 return Longs.fromBytes( 147 bytes[7], bytes[6], bytes[5], bytes[4], bytes[3], bytes[2], bytes[1], bytes[0]); 148 } 149 150 private /* static */ long upperEight(byte[] bytes) { 151 return Longs.fromBytes( 152 bytes[15], bytes[14], bytes[13], bytes[12], bytes[11], bytes[10], bytes[9], bytes[8]); 153 } 154 }; 155 156 /** 157 * Models a lock-free array of bits. 158 * 159 * <p>We use this instead of java.util.BitSet because we need access to the array of longs and we 160 * need compare-and-swap. 161 */ 162 static final class LockFreeBitArray { 163 private static final int LONG_ADDRESSABLE_BITS = 6; 164 final AtomicLongArray data; 165 private final LongAddable bitCount; 166 167 LockFreeBitArray(long bits) { 168 checkArgument(bits > 0, "data length is zero!"); 169 // Avoid delegating to this(long[]), since AtomicLongArray(long[]) will clone its input and 170 // thus double memory usage. 171 this.data = 172 new AtomicLongArray(Ints.checkedCast(LongMath.divide(bits, 64, RoundingMode.CEILING))); 173 this.bitCount = LongAddables.create(); 174 } 175 176 // Used by serialization 177 LockFreeBitArray(long[] data) { 178 checkArgument(data.length > 0, "data length is zero!"); 179 this.data = new AtomicLongArray(data); 180 this.bitCount = LongAddables.create(); 181 long bitCount = 0; 182 for (long value : data) { 183 bitCount += Long.bitCount(value); 184 } 185 this.bitCount.add(bitCount); 186 } 187 188 /** Returns true if the bit changed value. */ 189 boolean set(long bitIndex) { 190 if (get(bitIndex)) { 191 return false; 192 } 193 194 int longIndex = (int) (bitIndex >>> LONG_ADDRESSABLE_BITS); 195 long mask = 1L << bitIndex; // only cares about low 6 bits of bitIndex 196 197 long oldValue; 198 long newValue; 199 do { 200 oldValue = data.get(longIndex); 201 newValue = oldValue | mask; 202 if (oldValue == newValue) { 203 return false; 204 } 205 } while (!data.compareAndSet(longIndex, oldValue, newValue)); 206 207 // We turned the bit on, so increment bitCount. 208 bitCount.increment(); 209 return true; 210 } 211 212 boolean get(long bitIndex) { 213 return (data.get((int) (bitIndex >>> LONG_ADDRESSABLE_BITS)) & (1L << bitIndex)) != 0; 214 } 215 216 /** 217 * Careful here: if threads are mutating the atomicLongArray while this method is executing, the 218 * final long[] will be a "rolling snapshot" of the state of the bit array. This is usually good 219 * enough, but should be kept in mind. 220 */ 221 public static long[] toPlainArray(AtomicLongArray atomicLongArray) { 222 long[] array = new long[atomicLongArray.length()]; 223 for (int i = 0; i < array.length; ++i) { 224 array[i] = atomicLongArray.get(i); 225 } 226 return array; 227 } 228 229 /** Number of bits */ 230 long bitSize() { 231 return (long) data.length() * Long.SIZE; 232 } 233 234 /** 235 * Number of set bits (1s). 236 * 237 * <p>Note that because of concurrent set calls and uses of atomics, this bitCount is a (very) 238 * close *estimate* of the actual number of bits set. It's not possible to do better than an 239 * estimate without locking. Note that the number, if not exactly accurate, is *always* 240 * underestimating, never overestimating. 241 */ 242 long bitCount() { 243 return bitCount.sum(); 244 } 245 246 LockFreeBitArray copy() { 247 return new LockFreeBitArray(toPlainArray(data)); 248 } 249 250 /** 251 * Combines the two BitArrays using bitwise OR. 252 * 253 * <p>NOTE: Because of the use of atomics, if the other LockFreeBitArray is being mutated while 254 * this operation is executing, not all of those new 1's may be set in the final state of this 255 * LockFreeBitArray. The ONLY guarantee provided is that all the bits that were set in the other 256 * LockFreeBitArray at the start of this method will be set in this LockFreeBitArray at the end 257 * of this method. 258 */ 259 void putAll(LockFreeBitArray other) { 260 checkArgument( 261 data.length() == other.data.length(), 262 "BitArrays must be of equal length (%s != %s)", 263 data.length(), 264 other.data.length()); 265 for (int i = 0; i < data.length(); i++) { 266 long otherLong = other.data.get(i); 267 268 long ourLongOld; 269 long ourLongNew; 270 boolean changedAnyBits = true; 271 do { 272 ourLongOld = data.get(i); 273 ourLongNew = ourLongOld | otherLong; 274 if (ourLongOld == ourLongNew) { 275 changedAnyBits = false; 276 break; 277 } 278 } while (!data.compareAndSet(i, ourLongOld, ourLongNew)); 279 280 if (changedAnyBits) { 281 int bitsAdded = Long.bitCount(ourLongNew) - Long.bitCount(ourLongOld); 282 bitCount.add(bitsAdded); 283 } 284 } 285 } 286 287 @Override 288 public boolean equals(@CheckForNull Object o) { 289 if (o instanceof LockFreeBitArray) { 290 LockFreeBitArray lockFreeBitArray = (LockFreeBitArray) o; 291 // TODO(lowasser): avoid allocation here 292 return Arrays.equals(toPlainArray(data), toPlainArray(lockFreeBitArray.data)); 293 } 294 return false; 295 } 296 297 @Override 298 public int hashCode() { 299 // TODO(lowasser): avoid allocation here 300 return Arrays.hashCode(toPlainArray(data)); 301 } 302 } 303 }