Coverage Summary for Class: Murmur3_32HashFunction (com.google.common.hash)

Class	Method, %	Line, %
Murmur3_32HashFunction	0% (0/20)	0% (0/110)
Murmur3_32HashFunction$Murmur3_32Hasher	0% (0/10)	0% (0/66)
Total	0% (0/30)	0% (0/176)
1 /*
2  * Copyright (C) 2011 The Guava Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5  * in compliance with the License. You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software distributed under the License
10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11  * or implied. See the License for the specific language governing permissions and limitations under
12  * the License.
13  */
14 
15 /*
16  * MurmurHash3 was written by Austin Appleby, and is placed in the public
17  * domain. The author hereby disclaims copyright to this source code.
18  */
19 
20 /*
21  * Source:
22  * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
23  * (Modified to adapt to Guava coding conventions and to use the HashFunction interface)
24  */
25 
26 package com.google.common.hash;
27 
28 import static com.google.common.base.Preconditions.checkPositionIndexes;
29 import static com.google.common.base.Preconditions.checkState;
30 import static com.google.common.primitives.UnsignedBytes.toInt;
31 
32 import com.google.common.base.Charsets;
33 import com.google.common.primitives.Chars;
34 import com.google.common.primitives.Ints;
35 import com.google.common.primitives.Longs;
36 import com.google.errorprone.annotations.CanIgnoreReturnValue;
37 import com.google.errorprone.annotations.Immutable;
38 import java.io.Serializable;
39 import java.nio.ByteBuffer;
40 import java.nio.ByteOrder;
41 import java.nio.charset.Charset;
42 import javax.annotation.CheckForNull;
43 
44 /**
45  * See MurmurHash3_x86_32 in <a
46  * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">the C++
47  * implementation</a>.
48  *
49  * @author Austin Appleby
50  * @author Dimitris Andreou
51  * @author Kurt Alfred Kluever
52  */
53 @Immutable
54 @ElementTypesAreNonnullByDefault
55 final class Murmur3_32HashFunction extends AbstractHashFunction implements Serializable {
56   static final HashFunction MURMUR3_32 = new Murmur3_32HashFunction(0);
57 
58   static final HashFunction GOOD_FAST_HASH_32 =
59       new Murmur3_32HashFunction(Hashing.GOOD_FAST_HASH_SEED);
60 
61   private static final int CHUNK_SIZE = 4;
62 
63   private static final int C1 = 0xcc9e2d51;
64   private static final int C2 = 0x1b873593;
65 
66   private final int seed;
67 
68   Murmur3_32HashFunction(int seed) {
69     this.seed = seed;
70   }
71 
72   @Override
73   public int bits() {
74     return 32;
75   }
76 
77   @Override
78   public Hasher newHasher() {
79     return new Murmur3_32Hasher(seed);
80   }
81 
82   @Override
83   public String toString() {
84     return "Hashing.murmur3_32(" + seed + ")";
85   }
86 
87   @Override
88   public boolean equals(@CheckForNull Object object) {
89     if (object instanceof Murmur3_32HashFunction) {
90       Murmur3_32HashFunction other = (Murmur3_32HashFunction) object;
91       return seed == other.seed;
92     }
93     return false;
94   }
95 
96   @Override
97   public int hashCode() {
98     return getClass().hashCode() ^ seed;
99   }
100 
101   @Override
102   public HashCode hashInt(int input) {
103     int k1 = mixK1(input);
104     int h1 = mixH1(seed, k1);
105 
106     return fmix(h1, Ints.BYTES);
107   }
108 
109   @Override
110   public HashCode hashLong(long input) {
111     int low = (int) input;
112     int high = (int) (input >>> 32);
113 
114     int k1 = mixK1(low);
115     int h1 = mixH1(seed, k1);
116 
117     k1 = mixK1(high);
118     h1 = mixH1(h1, k1);
119 
120     return fmix(h1, Longs.BYTES);
121   }
122 
123   @Override
124   public HashCode hashUnencodedChars(CharSequence input) {
125     int h1 = seed;
126 
127     // step through the CharSequence 2 chars at a time
128     for (int i = 1; i < input.length(); i += 2) {
129       int k1 = input.charAt(i - 1) | (input.charAt(i) << 16);
130       k1 = mixK1(k1);
131       h1 = mixH1(h1, k1);
132     }
133 
134     // deal with any remaining characters
135     if ((input.length() & 1) == 1) {
136       int k1 = input.charAt(input.length() - 1);
137       k1 = mixK1(k1);
138       h1 ^= k1;
139     }
140 
141     return fmix(h1, Chars.BYTES * input.length());
142   }
143 
144   @SuppressWarnings("deprecation") // need to use Charsets for Android tests to pass
145   @Override
146   public HashCode hashString(CharSequence input, Charset charset) {
147     if (Charsets.UTF_8.equals(charset)) {
148       int utf16Length = input.length();
149       int h1 = seed;
150       int i = 0;
151       int len = 0;
152 
153       // This loop optimizes for pure ASCII.
154       while (i + 4 <= utf16Length) {
155         char c0 = input.charAt(i);
156         char c1 = input.charAt(i + 1);
157         char c2 = input.charAt(i + 2);
158         char c3 = input.charAt(i + 3);
159         if (c0 < 0x80 && c1 < 0x80 && c2 < 0x80 && c3 < 0x80) {
160           int k1 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
161           k1 = mixK1(k1);
162           h1 = mixH1(h1, k1);
163           i += 4;
164           len += 4;
165         } else {
166           break;
167         }
168       }
169 
170       long buffer = 0;
171       int shift = 0;
172       for (; i < utf16Length; i++) {
173         char c = input.charAt(i);
174         if (c < 0x80) {
175           buffer |= (long) c << shift;
176           shift += 8;
177           len++;
178         } else if (c < 0x800) {
179           buffer |= charToTwoUtf8Bytes(c) << shift;
180           shift += 16;
181           len += 2;
182         } else if (c < Character.MIN_SURROGATE || c > Character.MAX_SURROGATE) {
183           buffer |= charToThreeUtf8Bytes(c) << shift;
184           shift += 24;
185           len += 3;
186         } else {
187           int codePoint = Character.codePointAt(input, i);
188           if (codePoint == c) {
189             // not a valid code point; let the JDK handle invalid Unicode
190             return hashBytes(input.toString().getBytes(charset));
191           }
192           i++;
193           buffer |= codePointToFourUtf8Bytes(codePoint) << shift;
194           len += 4;
195         }
196 
197         if (shift >= 32) {
198           int k1 = mixK1((int) buffer);
199           h1 = mixH1(h1, k1);
200           buffer = buffer >>> 32;
201           shift -= 32;
202         }
203       }
204 
205       int k1 = mixK1((int) buffer);
206       h1 ^= k1;
207       return fmix(h1, len);
208     } else {
209       return hashBytes(input.toString().getBytes(charset));
210     }
211   }
212 
213   @Override
214   public HashCode hashBytes(byte[] input, int off, int len) {
215     checkPositionIndexes(off, off + len, input.length);
216     int h1 = seed;
217     int i;
218     for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE) {
219       int k1 = mixK1(getIntLittleEndian(input, off + i));
220       h1 = mixH1(h1, k1);
221     }
222 
223     int k1 = 0;
224     for (int shift = 0; i < len; i++, shift += 8) {
225       k1 ^= toInt(input[off + i]) << shift;
226     }
227     h1 ^= mixK1(k1);
228     return fmix(h1, len);
229   }
230 
231   private static int getIntLittleEndian(byte[] input, int offset) {
232     return Ints.fromBytes(input[offset + 3], input[offset + 2], input[offset + 1], input[offset]);
233   }
234 
235   private static int mixK1(int k1) {
236     k1 *= C1;
237     k1 = Integer.rotateLeft(k1, 15);
238     k1 *= C2;
239     return k1;
240   }
241 
242   private static int mixH1(int h1, int k1) {
243     h1 ^= k1;
244     h1 = Integer.rotateLeft(h1, 13);
245     h1 = h1 * 5 + 0xe6546b64;
246     return h1;
247   }
248 
249   // Finalization mix - force all bits of a hash block to avalanche
250   private static HashCode fmix(int h1, int length) {
251     h1 ^= length;
252     h1 ^= h1 >>> 16;
253     h1 *= 0x85ebca6b;
254     h1 ^= h1 >>> 13;
255     h1 *= 0xc2b2ae35;
256     h1 ^= h1 >>> 16;
257     return HashCode.fromInt(h1);
258   }
259 
260   @CanIgnoreReturnValue
261   private static final class Murmur3_32Hasher extends AbstractHasher {
262     private int h1;
263     private long buffer;
264     private int shift;
265     private int length;
266     private boolean isDone;
267 
268     Murmur3_32Hasher(int seed) {
269       this.h1 = seed;
270       this.length = 0;
271       isDone = false;
272     }
273 
274     private void update(int nBytes, long update) {
275       // 1 <= nBytes <= 4
276       buffer |= (update & 0xFFFFFFFFL) << shift;
277       shift += nBytes * 8;
278       length += nBytes;
279 
280       if (shift >= 32) {
281         h1 = mixH1(h1, mixK1((int) buffer));
282         buffer >>>= 32;
283         shift -= 32;
284       }
285     }
286 
287     @Override
288     public Hasher putByte(byte b) {
289       update(1, b & 0xFF);
290       return this;
291     }
292 
293     @Override
294     public Hasher putBytes(byte[] bytes, int off, int len) {
295       checkPositionIndexes(off, off + len, bytes.length);
296       int i;
297       for (i = 0; i + 4 <= len; i += 4) {
298         update(4, getIntLittleEndian(bytes, off + i));
299       }
300       for (; i < len; i++) {
301         putByte(bytes[off + i]);
302       }
303       return this;
304     }
305 
306     @Override
307     public Hasher putBytes(ByteBuffer buffer) {
308       ByteOrder bo = buffer.order();
309       buffer.order(ByteOrder.LITTLE_ENDIAN);
310       while (buffer.remaining() >= 4) {
311         putInt(buffer.getInt());
312       }
313       while (buffer.hasRemaining()) {
314         putByte(buffer.get());
315       }
316       buffer.order(bo);
317       return this;
318     }
319 
320     @Override
321     public Hasher putInt(int i) {
322       update(4, i);
323       return this;
324     }
325 
326     @Override
327     public Hasher putLong(long l) {
328       update(4, (int) l);
329       update(4, l >>> 32);
330       return this;
331     }
332 
333     @Override
334     public Hasher putChar(char c) {
335       update(2, c);
336       return this;
337     }
338 
339     @SuppressWarnings("deprecation") // need to use Charsets for Android tests to pass
340     @Override
341     public Hasher putString(CharSequence input, Charset charset) {
342       if (Charsets.UTF_8.equals(charset)) {
343         int utf16Length = input.length();
344         int i = 0;
345 
346         // This loop optimizes for pure ASCII.
347         while (i + 4 <= utf16Length) {
348           char c0 = input.charAt(i);
349           char c1 = input.charAt(i + 1);
350           char c2 = input.charAt(i + 2);
351           char c3 = input.charAt(i + 3);
352           if (c0 < 0x80 && c1 < 0x80 && c2 < 0x80 && c3 < 0x80) {
353             update(4, c0 | (c1 << 8) | (c2 << 16) | (c3 << 24));
354             i += 4;
355           } else {
356             break;
357           }
358         }
359 
360         for (; i < utf16Length; i++) {
361           char c = input.charAt(i);
362           if (c < 0x80) {
363             update(1, c);
364           } else if (c < 0x800) {
365             update(2, charToTwoUtf8Bytes(c));
366           } else if (c < Character.MIN_SURROGATE || c > Character.MAX_SURROGATE) {
367             update(3, charToThreeUtf8Bytes(c));
368           } else {
369             int codePoint = Character.codePointAt(input, i);
370             if (codePoint == c) {
371               // fall back to JDK getBytes instead of trying to handle invalid surrogates ourselves
372               putBytes(input.subSequence(i, utf16Length).toString().getBytes(charset));
373               return this;
374             }
375             i++;
376             update(4, codePointToFourUtf8Bytes(codePoint));
377           }
378         }
379         return this;
380       } else {
381         return super.putString(input, charset);
382       }
383     }
384 
385     @Override
386     public HashCode hash() {
387       checkState(!isDone);
388       isDone = true;
389       h1 ^= mixK1((int) buffer);
390       return fmix(h1, length);
391     }
392   }
393 
394   private static long codePointToFourUtf8Bytes(int codePoint) {
395     return (((0xFL << 4) | (codePoint >>> 18)) & 0xFF)
396         | ((0x80L | (0x3F & (codePoint >>> 12))) << 8)
397         | ((0x80L | (0x3F & (codePoint >>> 6))) << 16)
398         | ((0x80L | (0x3F & codePoint)) << 24);
399   }
400 
401   private static long charToThreeUtf8Bytes(char c) {
402     return (((0xF << 5) | (c >>> 12)) & 0xFF)
403         | ((0x80 | (0x3F & (c >>> 6))) << 8)
404         | ((0x80 | (0x3F & c)) << 16);
405   }
406 
407   private static long charToTwoUtf8Bytes(char c) {
408     return (((0xF << 6) | (c >>> 6)) & 0xFF) | ((0x80 | (0x3F & c)) << 8);
409   }
410 
411   private static final long serialVersionUID = 0L;
412 }