Coverage Summary for Class: Murmur3_32HashFunction (com.google.common.hash)
| Class | Method, % | Line, % |
|---|---|---|
| Murmur3_32HashFunction | 0% (0/20) | 0% (0/110) |
| Murmur3_32HashFunction$Murmur3_32Hasher | 0% (0/10) | 0% (0/66) |
| Total | 0% (0/30) | 0% (0/176) |
1 /* 2 * Copyright (C) 2011 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 * in compliance with the License. You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software distributed under the License 10 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 * or implied. See the License for the specific language governing permissions and limitations under 12 * the License. 13 */ 14 15 /* 16 * MurmurHash3 was written by Austin Appleby, and is placed in the public 17 * domain. The author hereby disclaims copyright to this source code. 18 */ 19 20 /* 21 * Source: 22 * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp 23 * (Modified to adapt to Guava coding conventions and to use the HashFunction interface) 24 */ 25 26 package com.google.common.hash; 27 28 import static com.google.common.base.Preconditions.checkPositionIndexes; 29 import static com.google.common.base.Preconditions.checkState; 30 import static com.google.common.primitives.UnsignedBytes.toInt; 31 32 import com.google.common.base.Charsets; 33 import com.google.common.primitives.Chars; 34 import com.google.common.primitives.Ints; 35 import com.google.common.primitives.Longs; 36 import com.google.errorprone.annotations.CanIgnoreReturnValue; 37 import com.google.errorprone.annotations.Immutable; 38 import java.io.Serializable; 39 import java.nio.ByteBuffer; 40 import java.nio.ByteOrder; 41 import java.nio.charset.Charset; 42 import javax.annotation.CheckForNull; 43 44 /** 45 * See MurmurHash3_x86_32 in <a 46 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">the C++ 47 * implementation</a>. 48 * 49 * @author Austin Appleby 50 * @author Dimitris Andreou 51 * @author Kurt Alfred Kluever 52 */ 53 @Immutable 54 @ElementTypesAreNonnullByDefault 55 final class Murmur3_32HashFunction extends AbstractHashFunction implements Serializable { 56 static final HashFunction MURMUR3_32 = new Murmur3_32HashFunction(0); 57 58 static final HashFunction GOOD_FAST_HASH_32 = 59 new Murmur3_32HashFunction(Hashing.GOOD_FAST_HASH_SEED); 60 61 private static final int CHUNK_SIZE = 4; 62 63 private static final int C1 = 0xcc9e2d51; 64 private static final int C2 = 0x1b873593; 65 66 private final int seed; 67 68 Murmur3_32HashFunction(int seed) { 69 this.seed = seed; 70 } 71 72 @Override 73 public int bits() { 74 return 32; 75 } 76 77 @Override 78 public Hasher newHasher() { 79 return new Murmur3_32Hasher(seed); 80 } 81 82 @Override 83 public String toString() { 84 return "Hashing.murmur3_32(" + seed + ")"; 85 } 86 87 @Override 88 public boolean equals(@CheckForNull Object object) { 89 if (object instanceof Murmur3_32HashFunction) { 90 Murmur3_32HashFunction other = (Murmur3_32HashFunction) object; 91 return seed == other.seed; 92 } 93 return false; 94 } 95 96 @Override 97 public int hashCode() { 98 return getClass().hashCode() ^ seed; 99 } 100 101 @Override 102 public HashCode hashInt(int input) { 103 int k1 = mixK1(input); 104 int h1 = mixH1(seed, k1); 105 106 return fmix(h1, Ints.BYTES); 107 } 108 109 @Override 110 public HashCode hashLong(long input) { 111 int low = (int) input; 112 int high = (int) (input >>> 32); 113 114 int k1 = mixK1(low); 115 int h1 = mixH1(seed, k1); 116 117 k1 = mixK1(high); 118 h1 = mixH1(h1, k1); 119 120 return fmix(h1, Longs.BYTES); 121 } 122 123 @Override 124 public HashCode hashUnencodedChars(CharSequence input) { 125 int h1 = seed; 126 127 // step through the CharSequence 2 chars at a time 128 for (int i = 1; i < input.length(); i += 2) { 129 int k1 = input.charAt(i - 1) | (input.charAt(i) << 16); 130 k1 = mixK1(k1); 131 h1 = mixH1(h1, k1); 132 } 133 134 // deal with any remaining characters 135 if ((input.length() & 1) == 1) { 136 int k1 = input.charAt(input.length() - 1); 137 k1 = mixK1(k1); 138 h1 ^= k1; 139 } 140 141 return fmix(h1, Chars.BYTES * input.length()); 142 } 143 144 @SuppressWarnings("deprecation") // need to use Charsets for Android tests to pass 145 @Override 146 public HashCode hashString(CharSequence input, Charset charset) { 147 if (Charsets.UTF_8.equals(charset)) { 148 int utf16Length = input.length(); 149 int h1 = seed; 150 int i = 0; 151 int len = 0; 152 153 // This loop optimizes for pure ASCII. 154 while (i + 4 <= utf16Length) { 155 char c0 = input.charAt(i); 156 char c1 = input.charAt(i + 1); 157 char c2 = input.charAt(i + 2); 158 char c3 = input.charAt(i + 3); 159 if (c0 < 0x80 && c1 < 0x80 && c2 < 0x80 && c3 < 0x80) { 160 int k1 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); 161 k1 = mixK1(k1); 162 h1 = mixH1(h1, k1); 163 i += 4; 164 len += 4; 165 } else { 166 break; 167 } 168 } 169 170 long buffer = 0; 171 int shift = 0; 172 for (; i < utf16Length; i++) { 173 char c = input.charAt(i); 174 if (c < 0x80) { 175 buffer |= (long) c << shift; 176 shift += 8; 177 len++; 178 } else if (c < 0x800) { 179 buffer |= charToTwoUtf8Bytes(c) << shift; 180 shift += 16; 181 len += 2; 182 } else if (c < Character.MIN_SURROGATE || c > Character.MAX_SURROGATE) { 183 buffer |= charToThreeUtf8Bytes(c) << shift; 184 shift += 24; 185 len += 3; 186 } else { 187 int codePoint = Character.codePointAt(input, i); 188 if (codePoint == c) { 189 // not a valid code point; let the JDK handle invalid Unicode 190 return hashBytes(input.toString().getBytes(charset)); 191 } 192 i++; 193 buffer |= codePointToFourUtf8Bytes(codePoint) << shift; 194 len += 4; 195 } 196 197 if (shift >= 32) { 198 int k1 = mixK1((int) buffer); 199 h1 = mixH1(h1, k1); 200 buffer = buffer >>> 32; 201 shift -= 32; 202 } 203 } 204 205 int k1 = mixK1((int) buffer); 206 h1 ^= k1; 207 return fmix(h1, len); 208 } else { 209 return hashBytes(input.toString().getBytes(charset)); 210 } 211 } 212 213 @Override 214 public HashCode hashBytes(byte[] input, int off, int len) { 215 checkPositionIndexes(off, off + len, input.length); 216 int h1 = seed; 217 int i; 218 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE) { 219 int k1 = mixK1(getIntLittleEndian(input, off + i)); 220 h1 = mixH1(h1, k1); 221 } 222 223 int k1 = 0; 224 for (int shift = 0; i < len; i++, shift += 8) { 225 k1 ^= toInt(input[off + i]) << shift; 226 } 227 h1 ^= mixK1(k1); 228 return fmix(h1, len); 229 } 230 231 private static int getIntLittleEndian(byte[] input, int offset) { 232 return Ints.fromBytes(input[offset + 3], input[offset + 2], input[offset + 1], input[offset]); 233 } 234 235 private static int mixK1(int k1) { 236 k1 *= C1; 237 k1 = Integer.rotateLeft(k1, 15); 238 k1 *= C2; 239 return k1; 240 } 241 242 private static int mixH1(int h1, int k1) { 243 h1 ^= k1; 244 h1 = Integer.rotateLeft(h1, 13); 245 h1 = h1 * 5 + 0xe6546b64; 246 return h1; 247 } 248 249 // Finalization mix - force all bits of a hash block to avalanche 250 private static HashCode fmix(int h1, int length) { 251 h1 ^= length; 252 h1 ^= h1 >>> 16; 253 h1 *= 0x85ebca6b; 254 h1 ^= h1 >>> 13; 255 h1 *= 0xc2b2ae35; 256 h1 ^= h1 >>> 16; 257 return HashCode.fromInt(h1); 258 } 259 260 @CanIgnoreReturnValue 261 private static final class Murmur3_32Hasher extends AbstractHasher { 262 private int h1; 263 private long buffer; 264 private int shift; 265 private int length; 266 private boolean isDone; 267 268 Murmur3_32Hasher(int seed) { 269 this.h1 = seed; 270 this.length = 0; 271 isDone = false; 272 } 273 274 private void update(int nBytes, long update) { 275 // 1 <= nBytes <= 4 276 buffer |= (update & 0xFFFFFFFFL) << shift; 277 shift += nBytes * 8; 278 length += nBytes; 279 280 if (shift >= 32) { 281 h1 = mixH1(h1, mixK1((int) buffer)); 282 buffer >>>= 32; 283 shift -= 32; 284 } 285 } 286 287 @Override 288 public Hasher putByte(byte b) { 289 update(1, b & 0xFF); 290 return this; 291 } 292 293 @Override 294 public Hasher putBytes(byte[] bytes, int off, int len) { 295 checkPositionIndexes(off, off + len, bytes.length); 296 int i; 297 for (i = 0; i + 4 <= len; i += 4) { 298 update(4, getIntLittleEndian(bytes, off + i)); 299 } 300 for (; i < len; i++) { 301 putByte(bytes[off + i]); 302 } 303 return this; 304 } 305 306 @Override 307 public Hasher putBytes(ByteBuffer buffer) { 308 ByteOrder bo = buffer.order(); 309 buffer.order(ByteOrder.LITTLE_ENDIAN); 310 while (buffer.remaining() >= 4) { 311 putInt(buffer.getInt()); 312 } 313 while (buffer.hasRemaining()) { 314 putByte(buffer.get()); 315 } 316 buffer.order(bo); 317 return this; 318 } 319 320 @Override 321 public Hasher putInt(int i) { 322 update(4, i); 323 return this; 324 } 325 326 @Override 327 public Hasher putLong(long l) { 328 update(4, (int) l); 329 update(4, l >>> 32); 330 return this; 331 } 332 333 @Override 334 public Hasher putChar(char c) { 335 update(2, c); 336 return this; 337 } 338 339 @SuppressWarnings("deprecation") // need to use Charsets for Android tests to pass 340 @Override 341 public Hasher putString(CharSequence input, Charset charset) { 342 if (Charsets.UTF_8.equals(charset)) { 343 int utf16Length = input.length(); 344 int i = 0; 345 346 // This loop optimizes for pure ASCII. 347 while (i + 4 <= utf16Length) { 348 char c0 = input.charAt(i); 349 char c1 = input.charAt(i + 1); 350 char c2 = input.charAt(i + 2); 351 char c3 = input.charAt(i + 3); 352 if (c0 < 0x80 && c1 < 0x80 && c2 < 0x80 && c3 < 0x80) { 353 update(4, c0 | (c1 << 8) | (c2 << 16) | (c3 << 24)); 354 i += 4; 355 } else { 356 break; 357 } 358 } 359 360 for (; i < utf16Length; i++) { 361 char c = input.charAt(i); 362 if (c < 0x80) { 363 update(1, c); 364 } else if (c < 0x800) { 365 update(2, charToTwoUtf8Bytes(c)); 366 } else if (c < Character.MIN_SURROGATE || c > Character.MAX_SURROGATE) { 367 update(3, charToThreeUtf8Bytes(c)); 368 } else { 369 int codePoint = Character.codePointAt(input, i); 370 if (codePoint == c) { 371 // fall back to JDK getBytes instead of trying to handle invalid surrogates ourselves 372 putBytes(input.subSequence(i, utf16Length).toString().getBytes(charset)); 373 return this; 374 } 375 i++; 376 update(4, codePointToFourUtf8Bytes(codePoint)); 377 } 378 } 379 return this; 380 } else { 381 return super.putString(input, charset); 382 } 383 } 384 385 @Override 386 public HashCode hash() { 387 checkState(!isDone); 388 isDone = true; 389 h1 ^= mixK1((int) buffer); 390 return fmix(h1, length); 391 } 392 } 393 394 private static long codePointToFourUtf8Bytes(int codePoint) { 395 return (((0xFL << 4) | (codePoint >>> 18)) & 0xFF) 396 | ((0x80L | (0x3F & (codePoint >>> 12))) << 8) 397 | ((0x80L | (0x3F & (codePoint >>> 6))) << 16) 398 | ((0x80L | (0x3F & codePoint)) << 24); 399 } 400 401 private static long charToThreeUtf8Bytes(char c) { 402 return (((0xF << 5) | (c >>> 12)) & 0xFF) 403 | ((0x80 | (0x3F & (c >>> 6))) << 8) 404 | ((0x80 | (0x3F & c)) << 16); 405 } 406 407 private static long charToTwoUtf8Bytes(char c) { 408 return (((0xF << 6) | (c >>> 6)) & 0xFF) | ((0x80 | (0x3F & c)) << 8); 409 } 410 411 private static final long serialVersionUID = 0L; 412 }