Coverage Summary for Class: Murmur3_32HashFunction (com.google.common.hash)

Class Method, % Line, %
Murmur3_32HashFunction 0% (0/20) 0% (0/110)
Murmur3_32HashFunction$Murmur3_32Hasher 0% (0/10) 0% (0/66)
Total 0% (0/30) 0% (0/176)


1 /* 2  * Copyright (C) 2011 The Guava Authors 3  * 4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5  * in compliance with the License. You may obtain a copy of the License at 6  * 7  * http://www.apache.org/licenses/LICENSE-2.0 8  * 9  * Unless required by applicable law or agreed to in writing, software distributed under the License 10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11  * or implied. See the License for the specific language governing permissions and limitations under 12  * the License. 13  */ 14  15 /* 16  * MurmurHash3 was written by Austin Appleby, and is placed in the public 17  * domain. The author hereby disclaims copyright to this source code. 18  */ 19  20 /* 21  * Source: 22  * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp 23  * (Modified to adapt to Guava coding conventions and to use the HashFunction interface) 24  */ 25  26 package com.google.common.hash; 27  28 import static com.google.common.base.Preconditions.checkPositionIndexes; 29 import static com.google.common.base.Preconditions.checkState; 30 import static com.google.common.primitives.UnsignedBytes.toInt; 31  32 import com.google.common.base.Charsets; 33 import com.google.common.primitives.Chars; 34 import com.google.common.primitives.Ints; 35 import com.google.common.primitives.Longs; 36 import com.google.errorprone.annotations.CanIgnoreReturnValue; 37 import com.google.errorprone.annotations.Immutable; 38 import java.io.Serializable; 39 import java.nio.ByteBuffer; 40 import java.nio.ByteOrder; 41 import java.nio.charset.Charset; 42 import javax.annotation.CheckForNull; 43  44 /** 45  * See MurmurHash3_x86_32 in <a 46  * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">the C++ 47  * implementation</a>. 48  * 49  * @author Austin Appleby 50  * @author Dimitris Andreou 51  * @author Kurt Alfred Kluever 52  */ 53 @Immutable 54 @ElementTypesAreNonnullByDefault 55 final class Murmur3_32HashFunction extends AbstractHashFunction implements Serializable { 56  static final HashFunction MURMUR3_32 = new Murmur3_32HashFunction(0); 57  58  static final HashFunction GOOD_FAST_HASH_32 = 59  new Murmur3_32HashFunction(Hashing.GOOD_FAST_HASH_SEED); 60  61  private static final int CHUNK_SIZE = 4; 62  63  private static final int C1 = 0xcc9e2d51; 64  private static final int C2 = 0x1b873593; 65  66  private final int seed; 67  68  Murmur3_32HashFunction(int seed) { 69  this.seed = seed; 70  } 71  72  @Override 73  public int bits() { 74  return 32; 75  } 76  77  @Override 78  public Hasher newHasher() { 79  return new Murmur3_32Hasher(seed); 80  } 81  82  @Override 83  public String toString() { 84  return "Hashing.murmur3_32(" + seed + ")"; 85  } 86  87  @Override 88  public boolean equals(@CheckForNull Object object) { 89  if (object instanceof Murmur3_32HashFunction) { 90  Murmur3_32HashFunction other = (Murmur3_32HashFunction) object; 91  return seed == other.seed; 92  } 93  return false; 94  } 95  96  @Override 97  public int hashCode() { 98  return getClass().hashCode() ^ seed; 99  } 100  101  @Override 102  public HashCode hashInt(int input) { 103  int k1 = mixK1(input); 104  int h1 = mixH1(seed, k1); 105  106  return fmix(h1, Ints.BYTES); 107  } 108  109  @Override 110  public HashCode hashLong(long input) { 111  int low = (int) input; 112  int high = (int) (input >>> 32); 113  114  int k1 = mixK1(low); 115  int h1 = mixH1(seed, k1); 116  117  k1 = mixK1(high); 118  h1 = mixH1(h1, k1); 119  120  return fmix(h1, Longs.BYTES); 121  } 122  123  @Override 124  public HashCode hashUnencodedChars(CharSequence input) { 125  int h1 = seed; 126  127  // step through the CharSequence 2 chars at a time 128  for (int i = 1; i < input.length(); i += 2) { 129  int k1 = input.charAt(i - 1) | (input.charAt(i) << 16); 130  k1 = mixK1(k1); 131  h1 = mixH1(h1, k1); 132  } 133  134  // deal with any remaining characters 135  if ((input.length() & 1) == 1) { 136  int k1 = input.charAt(input.length() - 1); 137  k1 = mixK1(k1); 138  h1 ^= k1; 139  } 140  141  return fmix(h1, Chars.BYTES * input.length()); 142  } 143  144  @SuppressWarnings("deprecation") // need to use Charsets for Android tests to pass 145  @Override 146  public HashCode hashString(CharSequence input, Charset charset) { 147  if (Charsets.UTF_8.equals(charset)) { 148  int utf16Length = input.length(); 149  int h1 = seed; 150  int i = 0; 151  int len = 0; 152  153  // This loop optimizes for pure ASCII. 154  while (i + 4 <= utf16Length) { 155  char c0 = input.charAt(i); 156  char c1 = input.charAt(i + 1); 157  char c2 = input.charAt(i + 2); 158  char c3 = input.charAt(i + 3); 159  if (c0 < 0x80 && c1 < 0x80 && c2 < 0x80 && c3 < 0x80) { 160  int k1 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); 161  k1 = mixK1(k1); 162  h1 = mixH1(h1, k1); 163  i += 4; 164  len += 4; 165  } else { 166  break; 167  } 168  } 169  170  long buffer = 0; 171  int shift = 0; 172  for (; i < utf16Length; i++) { 173  char c = input.charAt(i); 174  if (c < 0x80) { 175  buffer |= (long) c << shift; 176  shift += 8; 177  len++; 178  } else if (c < 0x800) { 179  buffer |= charToTwoUtf8Bytes(c) << shift; 180  shift += 16; 181  len += 2; 182  } else if (c < Character.MIN_SURROGATE || c > Character.MAX_SURROGATE) { 183  buffer |= charToThreeUtf8Bytes(c) << shift; 184  shift += 24; 185  len += 3; 186  } else { 187  int codePoint = Character.codePointAt(input, i); 188  if (codePoint == c) { 189  // not a valid code point; let the JDK handle invalid Unicode 190  return hashBytes(input.toString().getBytes(charset)); 191  } 192  i++; 193  buffer |= codePointToFourUtf8Bytes(codePoint) << shift; 194  len += 4; 195  } 196  197  if (shift >= 32) { 198  int k1 = mixK1((int) buffer); 199  h1 = mixH1(h1, k1); 200  buffer = buffer >>> 32; 201  shift -= 32; 202  } 203  } 204  205  int k1 = mixK1((int) buffer); 206  h1 ^= k1; 207  return fmix(h1, len); 208  } else { 209  return hashBytes(input.toString().getBytes(charset)); 210  } 211  } 212  213  @Override 214  public HashCode hashBytes(byte[] input, int off, int len) { 215  checkPositionIndexes(off, off + len, input.length); 216  int h1 = seed; 217  int i; 218  for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE) { 219  int k1 = mixK1(getIntLittleEndian(input, off + i)); 220  h1 = mixH1(h1, k1); 221  } 222  223  int k1 = 0; 224  for (int shift = 0; i < len; i++, shift += 8) { 225  k1 ^= toInt(input[off + i]) << shift; 226  } 227  h1 ^= mixK1(k1); 228  return fmix(h1, len); 229  } 230  231  private static int getIntLittleEndian(byte[] input, int offset) { 232  return Ints.fromBytes(input[offset + 3], input[offset + 2], input[offset + 1], input[offset]); 233  } 234  235  private static int mixK1(int k1) { 236  k1 *= C1; 237  k1 = Integer.rotateLeft(k1, 15); 238  k1 *= C2; 239  return k1; 240  } 241  242  private static int mixH1(int h1, int k1) { 243  h1 ^= k1; 244  h1 = Integer.rotateLeft(h1, 13); 245  h1 = h1 * 5 + 0xe6546b64; 246  return h1; 247  } 248  249  // Finalization mix - force all bits of a hash block to avalanche 250  private static HashCode fmix(int h1, int length) { 251  h1 ^= length; 252  h1 ^= h1 >>> 16; 253  h1 *= 0x85ebca6b; 254  h1 ^= h1 >>> 13; 255  h1 *= 0xc2b2ae35; 256  h1 ^= h1 >>> 16; 257  return HashCode.fromInt(h1); 258  } 259  260  @CanIgnoreReturnValue 261  private static final class Murmur3_32Hasher extends AbstractHasher { 262  private int h1; 263  private long buffer; 264  private int shift; 265  private int length; 266  private boolean isDone; 267  268  Murmur3_32Hasher(int seed) { 269  this.h1 = seed; 270  this.length = 0; 271  isDone = false; 272  } 273  274  private void update(int nBytes, long update) { 275  // 1 <= nBytes <= 4 276  buffer |= (update & 0xFFFFFFFFL) << shift; 277  shift += nBytes * 8; 278  length += nBytes; 279  280  if (shift >= 32) { 281  h1 = mixH1(h1, mixK1((int) buffer)); 282  buffer >>>= 32; 283  shift -= 32; 284  } 285  } 286  287  @Override 288  public Hasher putByte(byte b) { 289  update(1, b & 0xFF); 290  return this; 291  } 292  293  @Override 294  public Hasher putBytes(byte[] bytes, int off, int len) { 295  checkPositionIndexes(off, off + len, bytes.length); 296  int i; 297  for (i = 0; i + 4 <= len; i += 4) { 298  update(4, getIntLittleEndian(bytes, off + i)); 299  } 300  for (; i < len; i++) { 301  putByte(bytes[off + i]); 302  } 303  return this; 304  } 305  306  @Override 307  public Hasher putBytes(ByteBuffer buffer) { 308  ByteOrder bo = buffer.order(); 309  buffer.order(ByteOrder.LITTLE_ENDIAN); 310  while (buffer.remaining() >= 4) { 311  putInt(buffer.getInt()); 312  } 313  while (buffer.hasRemaining()) { 314  putByte(buffer.get()); 315  } 316  buffer.order(bo); 317  return this; 318  } 319  320  @Override 321  public Hasher putInt(int i) { 322  update(4, i); 323  return this; 324  } 325  326  @Override 327  public Hasher putLong(long l) { 328  update(4, (int) l); 329  update(4, l >>> 32); 330  return this; 331  } 332  333  @Override 334  public Hasher putChar(char c) { 335  update(2, c); 336  return this; 337  } 338  339  @SuppressWarnings("deprecation") // need to use Charsets for Android tests to pass 340  @Override 341  public Hasher putString(CharSequence input, Charset charset) { 342  if (Charsets.UTF_8.equals(charset)) { 343  int utf16Length = input.length(); 344  int i = 0; 345  346  // This loop optimizes for pure ASCII. 347  while (i + 4 <= utf16Length) { 348  char c0 = input.charAt(i); 349  char c1 = input.charAt(i + 1); 350  char c2 = input.charAt(i + 2); 351  char c3 = input.charAt(i + 3); 352  if (c0 < 0x80 && c1 < 0x80 && c2 < 0x80 && c3 < 0x80) { 353  update(4, c0 | (c1 << 8) | (c2 << 16) | (c3 << 24)); 354  i += 4; 355  } else { 356  break; 357  } 358  } 359  360  for (; i < utf16Length; i++) { 361  char c = input.charAt(i); 362  if (c < 0x80) { 363  update(1, c); 364  } else if (c < 0x800) { 365  update(2, charToTwoUtf8Bytes(c)); 366  } else if (c < Character.MIN_SURROGATE || c > Character.MAX_SURROGATE) { 367  update(3, charToThreeUtf8Bytes(c)); 368  } else { 369  int codePoint = Character.codePointAt(input, i); 370  if (codePoint == c) { 371  // fall back to JDK getBytes instead of trying to handle invalid surrogates ourselves 372  putBytes(input.subSequence(i, utf16Length).toString().getBytes(charset)); 373  return this; 374  } 375  i++; 376  update(4, codePointToFourUtf8Bytes(codePoint)); 377  } 378  } 379  return this; 380  } else { 381  return super.putString(input, charset); 382  } 383  } 384  385  @Override 386  public HashCode hash() { 387  checkState(!isDone); 388  isDone = true; 389  h1 ^= mixK1((int) buffer); 390  return fmix(h1, length); 391  } 392  } 393  394  private static long codePointToFourUtf8Bytes(int codePoint) { 395  return (((0xFL << 4) | (codePoint >>> 18)) & 0xFF) 396  | ((0x80L | (0x3F & (codePoint >>> 12))) << 8) 397  | ((0x80L | (0x3F & (codePoint >>> 6))) << 16) 398  | ((0x80L | (0x3F & codePoint)) << 24); 399  } 400  401  private static long charToThreeUtf8Bytes(char c) { 402  return (((0xF << 5) | (c >>> 12)) & 0xFF) 403  | ((0x80 | (0x3F & (c >>> 6))) << 8) 404  | ((0x80 | (0x3F & c)) << 16); 405  } 406  407  private static long charToTwoUtf8Bytes(char c) { 408  return (((0xF << 6) | (c >>> 6)) & 0xFF) | ((0x80 | (0x3F & c)) << 8); 409  } 410  411  private static final long serialVersionUID = 0L; 412 }