mirror of
https://github.com/Eaglercraft-Archive/Eaglercraftx-1.8.8-src.git
synced 2025-06-28 02:48:14 -05:00
Update #0 - First Release
This commit is contained in:
124
sources/main/java/com/google/common/hash/AbstractByteHasher.java
Normal file
124
sources/main/java/com/google/common/hash/AbstractByteHasher.java
Normal file
@ -0,0 +1,124 @@
|
||||
/*
|
||||
* Copyright (C) 2012 The Guava Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.google.common.hash;
|
||||
|
||||
import static com.google.common.base.Preconditions.checkNotNull;
|
||||
import static com.google.common.base.Preconditions.checkPositionIndexes;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
|
||||
import com.google.common.primitives.Chars;
|
||||
import com.google.common.primitives.Ints;
|
||||
import com.google.common.primitives.Longs;
|
||||
import com.google.common.primitives.Shorts;
|
||||
|
||||
/**
|
||||
* Abstract {@link Hasher} that handles converting primitives to bytes using a
|
||||
* scratch {@code
|
||||
* ByteBuffer} and streams all bytes to a sink to compute the hash.
|
||||
*
|
||||
* @author Colin Decker
|
||||
*/
|
||||
abstract class AbstractByteHasher extends AbstractHasher {
|
||||
|
||||
private final ByteBuffer scratch = ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN);
|
||||
|
||||
/**
|
||||
* Updates this hasher with the given byte.
|
||||
*/
|
||||
protected abstract void update(byte b);
|
||||
|
||||
/**
|
||||
* Updates this hasher with the given bytes.
|
||||
*/
|
||||
protected void update(byte[] b) {
|
||||
update(b, 0, b.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates this hasher with {@code len} bytes starting at {@code off} in the
|
||||
* given buffer.
|
||||
*/
|
||||
protected void update(byte[] b, int off, int len) {
|
||||
for (int i = off; i < off + len; i++) {
|
||||
update(b[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putByte(byte b) {
|
||||
update(b);
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putBytes(byte[] bytes) {
|
||||
checkNotNull(bytes);
|
||||
update(bytes);
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putBytes(byte[] bytes, int off, int len) {
|
||||
checkPositionIndexes(off, off + len, bytes.length);
|
||||
update(bytes, off, len);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates the sink with the given number of bytes from the buffer.
|
||||
*/
|
||||
private Hasher update(int bytes) {
|
||||
try {
|
||||
update(scratch.array(), 0, bytes);
|
||||
} finally {
|
||||
scratch.clear();
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putShort(short s) {
|
||||
scratch.putShort(s);
|
||||
return update(Shorts.BYTES);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putInt(int i) {
|
||||
scratch.putInt(i);
|
||||
return update(Ints.BYTES);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putLong(long l) {
|
||||
scratch.putLong(l);
|
||||
return update(Longs.BYTES);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putChar(char c) {
|
||||
scratch.putChar(c);
|
||||
return update(Chars.BYTES);
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> Hasher putObject(T instance, Funnel<? super T> funnel) {
|
||||
funnel.funnel(instance, this);
|
||||
return this;
|
||||
}
|
||||
}
|
@ -0,0 +1,170 @@
|
||||
/*
|
||||
* Copyright (C) 2011 The Guava Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.google.common.hash;
|
||||
|
||||
import static com.google.common.base.Preconditions.checkNotNull;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
/**
|
||||
* An abstract composition of multiple hash functions. {@linkplain #newHasher()}
|
||||
* delegates to the {@code Hasher} objects of the delegate hash functions, and
|
||||
* in the end, they are used by {@linkplain #makeHash(Hasher[])} that constructs
|
||||
* the final {@code HashCode}.
|
||||
*
|
||||
* @author Dimitris Andreou
|
||||
*/
|
||||
abstract class AbstractCompositeHashFunction extends AbstractStreamingHashFunction {
|
||||
final HashFunction[] functions;
|
||||
|
||||
AbstractCompositeHashFunction(HashFunction... functions) {
|
||||
for (HashFunction function : functions) {
|
||||
checkNotNull(function);
|
||||
}
|
||||
this.functions = functions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a {@code HashCode} from the {@code Hasher} objects of the
|
||||
* functions. Each of them has consumed the entire input and they are ready to
|
||||
* output a {@code HashCode}. The order of the hashers are the same order as the
|
||||
* functions given to the constructor.
|
||||
*/
|
||||
// this could be cleaner if it passed HashCode[], but that would create yet
|
||||
// another array...
|
||||
/* protected */ abstract HashCode makeHash(Hasher[] hashers);
|
||||
|
||||
@Override
|
||||
public Hasher newHasher() {
|
||||
final Hasher[] hashers = new Hasher[functions.length];
|
||||
for (int i = 0; i < hashers.length; i++) {
|
||||
hashers[i] = functions[i].newHasher();
|
||||
}
|
||||
return new Hasher() {
|
||||
@Override
|
||||
public Hasher putByte(byte b) {
|
||||
for (Hasher hasher : hashers) {
|
||||
hasher.putByte(b);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putBytes(byte[] bytes) {
|
||||
for (Hasher hasher : hashers) {
|
||||
hasher.putBytes(bytes);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putBytes(byte[] bytes, int off, int len) {
|
||||
for (Hasher hasher : hashers) {
|
||||
hasher.putBytes(bytes, off, len);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putShort(short s) {
|
||||
for (Hasher hasher : hashers) {
|
||||
hasher.putShort(s);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putInt(int i) {
|
||||
for (Hasher hasher : hashers) {
|
||||
hasher.putInt(i);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putLong(long l) {
|
||||
for (Hasher hasher : hashers) {
|
||||
hasher.putLong(l);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putFloat(float f) {
|
||||
for (Hasher hasher : hashers) {
|
||||
hasher.putFloat(f);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putDouble(double d) {
|
||||
for (Hasher hasher : hashers) {
|
||||
hasher.putDouble(d);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putBoolean(boolean b) {
|
||||
for (Hasher hasher : hashers) {
|
||||
hasher.putBoolean(b);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putChar(char c) {
|
||||
for (Hasher hasher : hashers) {
|
||||
hasher.putChar(c);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putUnencodedChars(CharSequence chars) {
|
||||
for (Hasher hasher : hashers) {
|
||||
hasher.putUnencodedChars(chars);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putString(CharSequence chars, Charset charset) {
|
||||
for (Hasher hasher : hashers) {
|
||||
hasher.putString(chars, charset);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> Hasher putObject(T instance, Funnel<? super T> funnel) {
|
||||
for (Hasher hasher : hashers) {
|
||||
hasher.putObject(instance, funnel);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashCode hash() {
|
||||
return makeHash(hashers);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private static final long serialVersionUID = 0L;
|
||||
}
|
55
sources/main/java/com/google/common/hash/AbstractHasher.java
Normal file
55
sources/main/java/com/google/common/hash/AbstractHasher.java
Normal file
@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Copyright (C) 2011 The Guava Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
* in compliance with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
* or implied. See the License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
package com.google.common.hash;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
/**
|
||||
* An abstract hasher, implementing {@link #putBoolean(boolean)},
|
||||
* {@link #putDouble(double)}, {@link #putFloat(float)},
|
||||
* {@link #putUnencodedChars(CharSequence)}, and
|
||||
* {@link #putString(CharSequence, Charset)} as prescribed by {@link Hasher}.
|
||||
*
|
||||
* @author Dimitris Andreou
|
||||
*/
|
||||
abstract class AbstractHasher implements Hasher {
|
||||
@Override
|
||||
public final Hasher putBoolean(boolean b) {
|
||||
return putByte(b ? (byte) 1 : (byte) 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Hasher putDouble(double d) {
|
||||
return putLong(Double.doubleToRawLongBits(d));
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Hasher putFloat(float f) {
|
||||
return putInt(Float.floatToRawIntBits(f));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putUnencodedChars(CharSequence charSequence) {
|
||||
for (int i = 0, len = charSequence.length(); i < len; i++) {
|
||||
putChar(charSequence.charAt(i));
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putString(CharSequence charSequence, Charset charset) {
|
||||
return putBytes(charSequence.toString().getBytes(charset));
|
||||
}
|
||||
}
|
@ -0,0 +1,170 @@
|
||||
/*
|
||||
* Copyright (C) 2011 The Guava Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.google.common.hash;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
|
||||
/**
|
||||
* Skeleton implementation of {@link HashFunction}, appropriate for
|
||||
* non-streaming algorithms. All the hash computation done using
|
||||
* {@linkplain #newHasher()} are delegated to the
|
||||
* {@linkplain #hashBytes(byte[], int, int)} method.
|
||||
*
|
||||
* @author Dimitris Andreou
|
||||
*/
|
||||
abstract class AbstractNonStreamingHashFunction implements HashFunction {
|
||||
@Override
|
||||
public Hasher newHasher() {
|
||||
return new BufferingHasher(32);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher newHasher(int expectedInputSize) {
|
||||
Preconditions.checkArgument(expectedInputSize >= 0);
|
||||
return new BufferingHasher(expectedInputSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> HashCode hashObject(T instance, Funnel<? super T> funnel) {
|
||||
return newHasher().putObject(instance, funnel).hash();
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashCode hashUnencodedChars(CharSequence input) {
|
||||
int len = input.length();
|
||||
Hasher hasher = newHasher(len * 2);
|
||||
for (int i = 0; i < len; i++) {
|
||||
hasher.putChar(input.charAt(i));
|
||||
}
|
||||
return hasher.hash();
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashCode hashString(CharSequence input, Charset charset) {
|
||||
return hashBytes(input.toString().getBytes(charset));
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashCode hashInt(int input) {
|
||||
return newHasher(4).putInt(input).hash();
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashCode hashLong(long input) {
|
||||
return newHasher(8).putLong(input).hash();
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashCode hashBytes(byte[] input) {
|
||||
return hashBytes(input, 0, input.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* In-memory stream-based implementation of Hasher.
|
||||
*/
|
||||
private final class BufferingHasher extends AbstractHasher {
|
||||
final ExposedByteArrayOutputStream stream;
|
||||
static final int BOTTOM_BYTE = 0xFF;
|
||||
|
||||
BufferingHasher(int expectedInputSize) {
|
||||
this.stream = new ExposedByteArrayOutputStream(expectedInputSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putByte(byte b) {
|
||||
stream.write(b);
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putBytes(byte[] bytes) {
|
||||
try {
|
||||
stream.write(bytes);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putBytes(byte[] bytes, int off, int len) {
|
||||
stream.write(bytes, off, len);
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putShort(short s) {
|
||||
stream.write(s & BOTTOM_BYTE);
|
||||
stream.write((s >>> 8) & BOTTOM_BYTE);
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putInt(int i) {
|
||||
stream.write(i & BOTTOM_BYTE);
|
||||
stream.write((i >>> 8) & BOTTOM_BYTE);
|
||||
stream.write((i >>> 16) & BOTTOM_BYTE);
|
||||
stream.write((i >>> 24) & BOTTOM_BYTE);
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putLong(long l) {
|
||||
for (int i = 0; i < 64; i += 8) {
|
||||
stream.write((byte) ((l >>> i) & BOTTOM_BYTE));
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher putChar(char c) {
|
||||
stream.write(c & BOTTOM_BYTE);
|
||||
stream.write((c >>> 8) & BOTTOM_BYTE);
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> Hasher putObject(T instance, Funnel<? super T> funnel) {
|
||||
funnel.funnel(instance, this);
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashCode hash() {
|
||||
return hashBytes(stream.byteArray(), 0, stream.length());
|
||||
}
|
||||
}
|
||||
|
||||
// Just to access the byte[] without introducing an unnecessary copy
|
||||
private static final class ExposedByteArrayOutputStream extends ByteArrayOutputStream {
|
||||
ExposedByteArrayOutputStream(int expectedInputSize) {
|
||||
super(expectedInputSize);
|
||||
}
|
||||
|
||||
byte[] byteArray() {
|
||||
return buf;
|
||||
}
|
||||
|
||||
int length() {
|
||||
return count;
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,270 @@
|
||||
/*
|
||||
* Copyright (C) 2011 The Guava Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
* in compliance with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
* or implied. See the License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
package com.google.common.hash;
|
||||
|
||||
import static com.google.common.base.Preconditions.checkArgument;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
|
||||
/**
|
||||
* Skeleton implementation of {@link HashFunction}. Provides default
|
||||
* implementations which invokes the appropriate method on {@link #newHasher()},
|
||||
* then return the result of {@link Hasher#hash}.
|
||||
*
|
||||
* <p>
|
||||
* Invocations of {@link #newHasher(int)} also delegate to
|
||||
* {@linkplain #newHasher()}, ignoring the expected input size parameter.
|
||||
*
|
||||
* @author Kevin Bourrillion
|
||||
*/
|
||||
abstract class AbstractStreamingHashFunction implements HashFunction {
|
||||
@Override
|
||||
public <T> HashCode hashObject(T instance, Funnel<? super T> funnel) {
|
||||
return newHasher().putObject(instance, funnel).hash();
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashCode hashUnencodedChars(CharSequence input) {
|
||||
return newHasher().putUnencodedChars(input).hash();
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashCode hashString(CharSequence input, Charset charset) {
|
||||
return newHasher().putString(input, charset).hash();
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashCode hashInt(int input) {
|
||||
return newHasher().putInt(input).hash();
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashCode hashLong(long input) {
|
||||
return newHasher().putLong(input).hash();
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashCode hashBytes(byte[] input) {
|
||||
return newHasher().putBytes(input).hash();
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashCode hashBytes(byte[] input, int off, int len) {
|
||||
return newHasher().putBytes(input, off, len).hash();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher newHasher(int expectedInputSize) {
|
||||
Preconditions.checkArgument(expectedInputSize >= 0);
|
||||
return newHasher();
|
||||
}
|
||||
|
||||
/**
|
||||
* A convenience base class for implementors of {@code Hasher}; handles
|
||||
* accumulating data until an entire "chunk" (of implementation-dependent
|
||||
* length) is ready to be hashed.
|
||||
*
|
||||
* @author Kevin Bourrillion
|
||||
* @author Dimitris Andreou
|
||||
*/
|
||||
// TODO(kevinb): this class still needs some design-and-document-for-inheritance
|
||||
// love
|
||||
protected static abstract class AbstractStreamingHasher extends AbstractHasher {
|
||||
/** Buffer via which we pass data to the hash algorithm (the implementor) */
|
||||
private final ByteBuffer buffer;
|
||||
|
||||
/** Number of bytes to be filled before process() invocation(s). */
|
||||
private final int bufferSize;
|
||||
|
||||
/** Number of bytes processed per process() invocation. */
|
||||
private final int chunkSize;
|
||||
|
||||
/**
|
||||
* Constructor for use by subclasses. This hasher instance will process chunks
|
||||
* of the specified size.
|
||||
*
|
||||
* @param chunkSize the number of bytes available per
|
||||
* {@link #process(ByteBuffer)} invocation; must be at least 4
|
||||
*/
|
||||
protected AbstractStreamingHasher(int chunkSize) {
|
||||
this(chunkSize, chunkSize);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor for use by subclasses. This hasher instance will process chunks
|
||||
* of the specified size, using an internal buffer of {@code bufferSize} size,
|
||||
* which must be a multiple of {@code chunkSize}.
|
||||
*
|
||||
* @param chunkSize the number of bytes available per
|
||||
* {@link #process(ByteBuffer)} invocation; must be at least 4
|
||||
* @param bufferSize the size of the internal buffer. Must be a multiple of
|
||||
* chunkSize
|
||||
*/
|
||||
protected AbstractStreamingHasher(int chunkSize, int bufferSize) {
|
||||
// TODO(kevinb): check more preconditions (as bufferSize >= chunkSize) if this
|
||||
// is ever public
|
||||
checkArgument(bufferSize % chunkSize == 0);
|
||||
|
||||
// TODO(user): benchmark performance difference with longer buffer
|
||||
this.buffer = ByteBuffer.allocate(bufferSize + 7) // always space for a single primitive
|
||||
.order(ByteOrder.LITTLE_ENDIAN);
|
||||
this.bufferSize = bufferSize;
|
||||
this.chunkSize = chunkSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes the available bytes of the buffer (at most {@code chunk} bytes).
|
||||
*/
|
||||
protected abstract void process(ByteBuffer bb);
|
||||
|
||||
/**
|
||||
* This is invoked for the last bytes of the input, which are not enough to fill
|
||||
* a whole chunk. The passed {@code ByteBuffer} is guaranteed to be non-empty.
|
||||
*
|
||||
* <p>
|
||||
* This implementation simply pads with zeros and delegates to
|
||||
* {@link #process(ByteBuffer)}.
|
||||
*/
|
||||
protected void processRemaining(ByteBuffer bb) {
|
||||
bb.position(bb.limit()); // move at the end
|
||||
bb.limit(chunkSize + 7); // get ready to pad with longs
|
||||
while (bb.position() < chunkSize) {
|
||||
bb.putLong(0);
|
||||
}
|
||||
bb.limit(chunkSize);
|
||||
bb.flip();
|
||||
process(bb);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Hasher putBytes(byte[] bytes) {
|
||||
return putBytes(bytes, 0, bytes.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Hasher putBytes(byte[] bytes, int off, int len) {
|
||||
return putBytes(ByteBuffer.wrap(bytes, off, len).order(ByteOrder.LITTLE_ENDIAN));
|
||||
}
|
||||
|
||||
private Hasher putBytes(ByteBuffer readBuffer) {
|
||||
// If we have room for all of it, this is easy
|
||||
if (readBuffer.remaining() <= buffer.remaining()) {
|
||||
buffer.put(readBuffer);
|
||||
munchIfFull();
|
||||
return this;
|
||||
}
|
||||
|
||||
// First add just enough to fill buffer size, and munch that
|
||||
int bytesToCopy = bufferSize - buffer.position();
|
||||
for (int i = 0; i < bytesToCopy; i++) {
|
||||
buffer.put(readBuffer.get());
|
||||
}
|
||||
munch(); // buffer becomes empty here, since chunkSize divides bufferSize
|
||||
|
||||
// Now process directly from the rest of the input buffer
|
||||
while (readBuffer.remaining() >= chunkSize) {
|
||||
process(readBuffer);
|
||||
}
|
||||
|
||||
// Finally stick the remainder back in our usual buffer
|
||||
buffer.put(readBuffer);
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Hasher putUnencodedChars(CharSequence charSequence) {
|
||||
for (int i = 0; i < charSequence.length(); i++) {
|
||||
putChar(charSequence.charAt(i));
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Hasher putByte(byte b) {
|
||||
buffer.put(b);
|
||||
munchIfFull();
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Hasher putShort(short s) {
|
||||
buffer.putShort(s);
|
||||
munchIfFull();
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Hasher putChar(char c) {
|
||||
buffer.putChar(c);
|
||||
munchIfFull();
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Hasher putInt(int i) {
|
||||
buffer.putInt(i);
|
||||
munchIfFull();
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Hasher putLong(long l) {
|
||||
buffer.putLong(l);
|
||||
munchIfFull();
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final <T> Hasher putObject(T instance, Funnel<? super T> funnel) {
|
||||
funnel.funnel(instance, this);
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final HashCode hash() {
|
||||
munch();
|
||||
buffer.flip();
|
||||
if (buffer.remaining() > 0) {
|
||||
processRemaining(buffer);
|
||||
}
|
||||
return makeHash();
|
||||
}
|
||||
|
||||
abstract HashCode makeHash();
|
||||
|
||||
// Process pent-up data in chunks
|
||||
private void munchIfFull() {
|
||||
if (buffer.remaining() < 8) {
|
||||
// buffer is full; not enough room for a primitive. We have at least one full
|
||||
// chunk.
|
||||
munch();
|
||||
}
|
||||
}
|
||||
|
||||
private void munch() {
|
||||
buffer.flip();
|
||||
while (buffer.remaining() >= chunkSize) {
|
||||
// we could limit the buffer to ensure process() does not read more than
|
||||
// chunkSize number of bytes, but we trust the implementations
|
||||
process(buffer);
|
||||
}
|
||||
buffer.compact(); // preserve any remaining data that do not make a full chunk
|
||||
}
|
||||
}
|
||||
}
|
420
sources/main/java/com/google/common/hash/BloomFilter.java
Normal file
420
sources/main/java/com/google/common/hash/BloomFilter.java
Normal file
@ -0,0 +1,420 @@
|
||||
/*
|
||||
* Copyright (C) 2011 The Guava Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
* in compliance with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
* or implied. See the License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
package com.google.common.hash;
|
||||
|
||||
import static com.google.common.base.Preconditions.checkArgument;
|
||||
import static com.google.common.base.Preconditions.checkNotNull;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
import com.google.common.annotations.Beta;
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.base.Objects;
|
||||
import com.google.common.base.Predicate;
|
||||
import com.google.common.hash.BloomFilterStrategies.BitArray;
|
||||
|
||||
/**
|
||||
* A Bloom filter for instances of {@code T}. A Bloom filter offers an
|
||||
* approximate containment test with one-sided error: if it claims that an
|
||||
* element is contained in it, this might be in error, but if it claims that an
|
||||
* element is <i>not</i> contained in it, then this is definitely true.
|
||||
*
|
||||
* <p>
|
||||
* If you are unfamiliar with Bloom filters, this nice
|
||||
* <a href="http://llimllib.github.com/bloomfilter-tutorial/">tutorial</a> may
|
||||
* help you understand how they work.
|
||||
*
|
||||
* <p>
|
||||
* The false positive probability ({@code FPP}) of a bloom filter is defined as
|
||||
* the probability that {@linkplain #mightContain(Object)} will erroneously
|
||||
* return {@code true} for an object that has not actually been put in the
|
||||
* {@code BloomFilter}.
|
||||
*
|
||||
* <p>
|
||||
* Bloom filters are serializable. However, serial forms generated by newer
|
||||
* versions of the code may not be readable by older versions of the code (e.g.,
|
||||
* a serialized bloom filter generated today may <i>not</i> be readable by a
|
||||
* binary that was compiled 6 months ago).
|
||||
*
|
||||
* @param <T> the type of instances that the {@code BloomFilter} accepts
|
||||
* @author Dimitris Andreou
|
||||
* @author Kevin Bourrillion
|
||||
* @since 11.0
|
||||
*/
|
||||
@Beta
|
||||
public final class BloomFilter<T> implements Predicate<T>, Serializable {
|
||||
/**
|
||||
* A strategy to translate T instances, to {@code numHashFunctions} bit indexes.
|
||||
*
|
||||
* <p>
|
||||
* Implementations should be collections of pure functions (i.e. stateless).
|
||||
*/
|
||||
interface Strategy extends java.io.Serializable {
|
||||
|
||||
/**
|
||||
* Sets {@code numHashFunctions} bits of the given bit array, by hashing a user
|
||||
* element.
|
||||
*
|
||||
* <p>
|
||||
* Returns whether any bits changed as a result of this operation.
|
||||
*/
|
||||
<T> boolean put(T object, Funnel<? super T> funnel, int numHashFunctions, BitArray bits);
|
||||
|
||||
/**
|
||||
* Queries {@code numHashFunctions} bits of the given bit array, by hashing a
|
||||
* user element; returns {@code true} if and only if all selected bits are set.
|
||||
*/
|
||||
<T> boolean mightContain(T object, Funnel<? super T> funnel, int numHashFunctions, BitArray bits);
|
||||
|
||||
/**
|
||||
* Identifier used to encode this strategy, when marshalled as part of a
|
||||
* BloomFilter. Only values in the [-128, 127] range are valid for the compact
|
||||
* serial form. Non-negative values are reserved for enums defined in
|
||||
* BloomFilterStrategies; negative values are reserved for any custom, stateful
|
||||
* strategy we may define (e.g. any kind of strategy that would depend on user
|
||||
* input).
|
||||
*/
|
||||
int ordinal();
|
||||
}
|
||||
|
||||
/** The bit set of the BloomFilter (not necessarily power of 2!) */
|
||||
private final BitArray bits;
|
||||
|
||||
/** Number of hashes per element */
|
||||
private final int numHashFunctions;
|
||||
|
||||
/** The funnel to translate Ts to bytes */
|
||||
private final Funnel<T> funnel;
|
||||
|
||||
/**
|
||||
* The strategy we employ to map an element T to {@code numHashFunctions} bit
|
||||
* indexes.
|
||||
*/
|
||||
private final Strategy strategy;
|
||||
|
||||
/**
|
||||
* Creates a BloomFilter.
|
||||
*/
|
||||
private BloomFilter(BitArray bits, int numHashFunctions, Funnel<T> funnel, Strategy strategy) {
|
||||
checkArgument(numHashFunctions > 0, "numHashFunctions (%s) must be > 0", numHashFunctions);
|
||||
checkArgument(numHashFunctions <= 255, "numHashFunctions (%s) must be <= 255", numHashFunctions);
|
||||
this.bits = checkNotNull(bits);
|
||||
this.numHashFunctions = numHashFunctions;
|
||||
this.funnel = checkNotNull(funnel);
|
||||
this.strategy = checkNotNull(strategy);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@code BloomFilter} that's a copy of this instance. The new
|
||||
* instance is equal to this instance but shares no mutable state.
|
||||
*
|
||||
* @since 12.0
|
||||
*/
|
||||
public BloomFilter<T> copy() {
|
||||
return new BloomFilter<T>(bits.copy(), numHashFunctions, funnel, strategy);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns {@code true} if the element <i>might</i> have been put in this Bloom
|
||||
* filter, {@code false} if this is <i>definitely</i> not the case.
|
||||
*/
|
||||
public boolean mightContain(T object) {
|
||||
return strategy.mightContain(object, funnel, numHashFunctions, bits);
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated Provided only to satisfy the {@link Predicate} interface; use
|
||||
* {@link #mightContain} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
@Override
|
||||
public boolean apply(T input) {
|
||||
return mightContain(input);
|
||||
}
|
||||
|
||||
/**
|
||||
* Puts an element into this {@code BloomFilter}. Ensures that subsequent
|
||||
* invocations of {@link #mightContain(Object)} with the same element will
|
||||
* always return {@code true}.
|
||||
*
|
||||
* @return true if the bloom filter's bits changed as a result of this
|
||||
* operation. If the bits changed, this is <i>definitely</i> the first
|
||||
* time {@code object} has been added to the filter. If the bits haven't
|
||||
* changed, this <i>might</i> be the first time {@code object} has been
|
||||
* added to the filter. Note that {@code put(t)} always returns the
|
||||
* <i>opposite</i> result to what {@code mightContain(t)} would have
|
||||
* returned at the time it is called."
|
||||
* @since 12.0 (present in 11.0 with {@code void} return type})
|
||||
*/
|
||||
public boolean put(T object) {
|
||||
return strategy.put(object, funnel, numHashFunctions, bits);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the probability that {@linkplain #mightContain(Object)} will
|
||||
* erroneously return {@code true} for an object that has not actually been put
|
||||
* in the {@code BloomFilter}.
|
||||
*
|
||||
* <p>
|
||||
* Ideally, this number should be close to the {@code fpp} parameter passed in
|
||||
* {@linkplain #create(Funnel, int, double)}, or smaller. If it is significantly
|
||||
* higher, it is usually the case that too many elements (more than expected)
|
||||
* have been put in the {@code BloomFilter}, degenerating it.
|
||||
*
|
||||
* @since 14.0 (since 11.0 as expectedFalsePositiveProbability())
|
||||
*/
|
||||
public double expectedFpp() {
|
||||
// You down with FPP? (Yeah you know me!) Who's down with FPP? (Every last
|
||||
// homie!)
|
||||
return Math.pow((double) bits.bitCount() / bitSize(), numHashFunctions);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of bits in the underlying bit array.
|
||||
*/
|
||||
@VisibleForTesting
|
||||
long bitSize() {
|
||||
return bits.bitSize();
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether a given bloom filter is compatible with this bloom filter.
|
||||
* For two bloom filters to be compatible, they must:
|
||||
*
|
||||
* <ul>
|
||||
* <li>not be the same instance
|
||||
* <li>have the same number of hash functions
|
||||
* <li>have the same bit size
|
||||
* <li>have the same strategy
|
||||
* <li>have equal funnels
|
||||
* <ul>
|
||||
*
|
||||
* @param that The bloom filter to check for compatibility.
|
||||
* @since 15.0
|
||||
*/
|
||||
public boolean isCompatible(BloomFilter<T> that) {
|
||||
checkNotNull(that);
|
||||
return (this != that) && (this.numHashFunctions == that.numHashFunctions) && (this.bitSize() == that.bitSize())
|
||||
&& (this.strategy.equals(that.strategy)) && (this.funnel.equals(that.funnel));
|
||||
}
|
||||
|
||||
/**
|
||||
* Combines this bloom filter with another bloom filter by performing a bitwise
|
||||
* OR of the underlying data. The mutations happen to <b>this</b> instance.
|
||||
* Callers must ensure the bloom filters are appropriately sized to avoid
|
||||
* saturating them.
|
||||
*
|
||||
* @param that The bloom filter to combine this bloom filter with. It is not
|
||||
* mutated.
|
||||
* @throws IllegalArgumentException if {@code isCompatible(that) == false}
|
||||
*
|
||||
* @since 15.0
|
||||
*/
|
||||
public void putAll(BloomFilter<T> that) {
|
||||
checkNotNull(that);
|
||||
checkArgument(this != that, "Cannot combine a BloomFilter with itself.");
|
||||
checkArgument(this.numHashFunctions == that.numHashFunctions,
|
||||
"BloomFilters must have the same number of hash functions (%s != %s)", this.numHashFunctions,
|
||||
that.numHashFunctions);
|
||||
checkArgument(this.bitSize() == that.bitSize(),
|
||||
"BloomFilters must have the same size underlying bit arrays (%s != %s)", this.bitSize(),
|
||||
that.bitSize());
|
||||
checkArgument(this.strategy.equals(that.strategy), "BloomFilters must have equal strategies (%s != %s)",
|
||||
this.strategy, that.strategy);
|
||||
checkArgument(this.funnel.equals(that.funnel), "BloomFilters must have equal funnels (%s != %s)", this.funnel,
|
||||
that.funnel);
|
||||
this.bits.putAll(that.bits);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(@Nullable Object object) {
|
||||
if (object == this) {
|
||||
return true;
|
||||
}
|
||||
if (object instanceof BloomFilter) {
|
||||
BloomFilter<?> that = (BloomFilter<?>) object;
|
||||
return this.numHashFunctions == that.numHashFunctions && this.funnel.equals(that.funnel)
|
||||
&& this.bits.equals(that.bits) && this.strategy.equals(that.strategy);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hashCode(numHashFunctions, funnel, strategy, bits);
|
||||
}
|
||||
|
||||
private static final Strategy DEFAULT_STRATEGY = getDefaultStrategyFromSystemProperty();
|
||||
|
||||
@VisibleForTesting
|
||||
static final String USE_MITZ32_PROPERTY = "com.google.common.hash.BloomFilter.useMitz32";
|
||||
|
||||
@VisibleForTesting
|
||||
static Strategy getDefaultStrategyFromSystemProperty() {
|
||||
return Boolean.parseBoolean(System.getProperty(USE_MITZ32_PROPERTY)) ? BloomFilterStrategies.MURMUR128_MITZ_32
|
||||
: BloomFilterStrategies.MURMUR128_MITZ_64;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a {@link BloomFilter BloomFilter<T>} with the expected number of
|
||||
* insertions and expected false positive probability.
|
||||
*
|
||||
* <p>
|
||||
* Note that overflowing a {@code BloomFilter} with significantly more elements
|
||||
* than specified, will result in its saturation, and a sharp deterioration of
|
||||
* its false positive probability.
|
||||
*
|
||||
* <p>
|
||||
* The constructed {@code BloomFilter<T>} will be serializable if the provided
|
||||
* {@code Funnel<T>} is.
|
||||
*
|
||||
* <p>
|
||||
* It is recommended that the funnel be implemented as a Java enum. This has the
|
||||
* benefit of ensuring proper serialization and deserialization, which is
|
||||
* important since {@link #equals} also relies on object identity of funnels.
|
||||
*
|
||||
* @param funnel the funnel of T's that the constructed
|
||||
* {@code BloomFilter<T>} will use
|
||||
* @param expectedInsertions the number of expected insertions to the
|
||||
* constructed {@code BloomFilter<T>}; must be
|
||||
* positive
|
||||
* @param fpp the desired false positive probability (must be
|
||||
* positive and less than 1.0)
|
||||
* @return a {@code BloomFilter}
|
||||
*/
|
||||
public static <T> BloomFilter<T> create(Funnel<T> funnel, int expectedInsertions /* n */, double fpp) {
|
||||
return create(funnel, expectedInsertions, fpp, DEFAULT_STRATEGY);
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
static <T> BloomFilter<T> create(Funnel<T> funnel, int expectedInsertions /* n */, double fpp, Strategy strategy) {
|
||||
checkNotNull(funnel);
|
||||
checkArgument(expectedInsertions >= 0, "Expected insertions (%s) must be >= 0", expectedInsertions);
|
||||
checkArgument(fpp > 0.0, "False positive probability (%s) must be > 0.0", fpp);
|
||||
checkArgument(fpp < 1.0, "False positive probability (%s) must be < 1.0", fpp);
|
||||
checkNotNull(strategy);
|
||||
|
||||
if (expectedInsertions == 0) {
|
||||
expectedInsertions = 1;
|
||||
}
|
||||
/*
|
||||
* TODO(user): Put a warning in the javadoc about tiny fpp values, since the
|
||||
* resulting size is proportional to -log(p), but there is not much of a point
|
||||
* after all, e.g. optimalM(1000, 0.0000000000000001) = 76680 which is less than
|
||||
* 10kb. Who cares!
|
||||
*/
|
||||
long numBits = optimalNumOfBits(expectedInsertions, fpp);
|
||||
int numHashFunctions = optimalNumOfHashFunctions(expectedInsertions, numBits);
|
||||
try {
|
||||
return new BloomFilter<T>(new BitArray(numBits), numHashFunctions, funnel, strategy);
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new IllegalArgumentException("Could not create BloomFilter of " + numBits + " bits", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a {@link BloomFilter BloomFilter<T>} with the expected number of
|
||||
* insertions and a default expected false positive probability of 3%.
|
||||
*
|
||||
* <p>
|
||||
* Note that overflowing a {@code BloomFilter} with significantly more elements
|
||||
* than specified, will result in its saturation, and a sharp deterioration of
|
||||
* its false positive probability.
|
||||
*
|
||||
* <p>
|
||||
* The constructed {@code BloomFilter<T>} will be serializable if the provided
|
||||
* {@code Funnel<T>} is.
|
||||
*
|
||||
* @param funnel the funnel of T's that the constructed
|
||||
* {@code BloomFilter<T>} will use
|
||||
* @param expectedInsertions the number of expected insertions to the
|
||||
* constructed {@code BloomFilter<T>}; must be
|
||||
* positive
|
||||
* @return a {@code BloomFilter}
|
||||
*/
|
||||
public static <T> BloomFilter<T> create(Funnel<T> funnel, int expectedInsertions /* n */) {
|
||||
return create(funnel, expectedInsertions, 0.03); // FYI, for 3%, we always get 5 hash functions
|
||||
}
|
||||
|
||||
/*
|
||||
* Cheat sheet:
|
||||
*
|
||||
* m: total bits n: expected insertions b: m/n, bits per insertion p: expected
|
||||
* false positive probability
|
||||
*
|
||||
* 1) Optimal k = b * ln2 2) p = (1 - e ^ (-kn/m))^k 3) For optimal k: p = 2 ^
|
||||
* (-k) ~= 0.6185^b 4) For optimal k: m = -nlnp / ((ln2) ^ 2)
|
||||
*/
|
||||
|
||||
/**
|
||||
* Computes the optimal k (number of hashes per element inserted in Bloom
|
||||
* filter), given the expected insertions and total number of bits in the Bloom
|
||||
* filter.
|
||||
*
|
||||
* See http://en.wikipedia.org/wiki/File:Bloom_filter_fp_probability.svg for the
|
||||
* formula.
|
||||
*
|
||||
* @param n expected insertions (must be positive)
|
||||
* @param m total number of bits in Bloom filter (must be positive)
|
||||
*/
|
||||
@VisibleForTesting
|
||||
static int optimalNumOfHashFunctions(long n, long m) {
|
||||
return Math.max(1, (int) Math.round(m / n * Math.log(2)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes m (total bits of Bloom filter) which is expected to achieve, for the
|
||||
* specified expected insertions, the required false positive probability.
|
||||
*
|
||||
* See http://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives
|
||||
* for the formula.
|
||||
*
|
||||
* @param n expected insertions (must be positive)
|
||||
* @param p false positive rate (must be 0 < p < 1)
|
||||
*/
|
||||
@VisibleForTesting
|
||||
static long optimalNumOfBits(long n, double p) {
|
||||
if (p == 0) {
|
||||
p = Double.MIN_VALUE;
|
||||
}
|
||||
return (long) (-n * Math.log(p) / (Math.log(2) * Math.log(2)));
|
||||
}
|
||||
|
||||
private Object writeReplace() {
|
||||
return new SerialForm<T>(this);
|
||||
}
|
||||
|
||||
private static class SerialForm<T> implements Serializable {
|
||||
final long[] data;
|
||||
final int numHashFunctions;
|
||||
final Funnel<T> funnel;
|
||||
final Strategy strategy;
|
||||
|
||||
SerialForm(BloomFilter<T> bf) {
|
||||
this.data = bf.bits.data;
|
||||
this.numHashFunctions = bf.numHashFunctions;
|
||||
this.funnel = bf.funnel;
|
||||
this.strategy = bf.strategy;
|
||||
}
|
||||
|
||||
Object readResolve() {
|
||||
return new BloomFilter<T>(new BitArray(data), numHashFunctions, funnel, strategy);
|
||||
}
|
||||
|
||||
private static final long serialVersionUID = 1;
|
||||
}
|
||||
}
|
@ -0,0 +1,213 @@
|
||||
/*
|
||||
* Copyright (C) 2011 The Guava Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
* in compliance with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
* or implied. See the License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
package com.google.common.hash;
|
||||
|
||||
import static com.google.common.base.Preconditions.checkArgument;
|
||||
|
||||
import java.math.RoundingMode;
|
||||
import java.util.Arrays;
|
||||
|
||||
import com.google.common.math.LongMath;
|
||||
import com.google.common.primitives.Ints;
|
||||
import com.google.common.primitives.Longs;
|
||||
|
||||
/**
|
||||
* Collections of strategies of generating the k * log(M) bits required for an
|
||||
* element to be mapped to a BloomFilter of M bits and k hash functions. These
|
||||
* strategies are part of the serialized form of the Bloom filters that use
|
||||
* them, thus they must be preserved as is (no updates allowed, only
|
||||
* introduction of new versions).
|
||||
*
|
||||
* Important: the order of the constants cannot change, and they cannot be
|
||||
* deleted - we depend on their ordinal for BloomFilter serialization.
|
||||
*
|
||||
* @author Dimitris Andreou
|
||||
* @author Kurt Alfred Kluever
|
||||
*/
|
||||
enum BloomFilterStrategies implements BloomFilter.Strategy {
|
||||
/**
|
||||
* See "Less Hashing, Same Performance: Building a Better Bloom Filter" by Adam
|
||||
* Kirsch and Michael Mitzenmacher. The paper argues that this trick doesn't
|
||||
* significantly deteriorate the performance of a Bloom filter (yet only needs
|
||||
* two 32bit hash functions).
|
||||
*/
|
||||
MURMUR128_MITZ_32() {
|
||||
@Override
|
||||
public <T> boolean put(T object, Funnel<? super T> funnel, int numHashFunctions, BitArray bits) {
|
||||
long bitSize = bits.bitSize();
|
||||
long hash64 = Hashing.murmur3_128().hashObject(object, funnel).asLong();
|
||||
int hash1 = (int) hash64;
|
||||
int hash2 = (int) (hash64 >>> 32);
|
||||
|
||||
boolean bitsChanged = false;
|
||||
for (int i = 1; i <= numHashFunctions; i++) {
|
||||
int combinedHash = hash1 + (i * hash2);
|
||||
// Flip all the bits if it's negative (guaranteed positive number)
|
||||
if (combinedHash < 0) {
|
||||
combinedHash = ~combinedHash;
|
||||
}
|
||||
bitsChanged |= bits.set(combinedHash % bitSize);
|
||||
}
|
||||
return bitsChanged;
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> boolean mightContain(T object, Funnel<? super T> funnel, int numHashFunctions, BitArray bits) {
|
||||
long bitSize = bits.bitSize();
|
||||
long hash64 = Hashing.murmur3_128().hashObject(object, funnel).asLong();
|
||||
int hash1 = (int) hash64;
|
||||
int hash2 = (int) (hash64 >>> 32);
|
||||
|
||||
for (int i = 1; i <= numHashFunctions; i++) {
|
||||
int combinedHash = hash1 + (i * hash2);
|
||||
// Flip all the bits if it's negative (guaranteed positive number)
|
||||
if (combinedHash < 0) {
|
||||
combinedHash = ~combinedHash;
|
||||
}
|
||||
if (!bits.get(combinedHash % bitSize)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
},
|
||||
/**
|
||||
* This strategy uses all 128 bits of {@link Hashing#murmur3_128} when hashing.
|
||||
* It looks different than the implementation in MURMUR128_MITZ_32 because we're
|
||||
* avoiding the multiplication in the loop and doing a (much simpler) += hash2.
|
||||
* We're also changing the index to a positive number by AND'ing with
|
||||
* Long.MAX_VALUE instead of flipping the bits.
|
||||
*/
|
||||
MURMUR128_MITZ_64() {
|
||||
@Override
|
||||
public <T> boolean put(T object, Funnel<? super T> funnel, int numHashFunctions, BitArray bits) {
|
||||
long bitSize = bits.bitSize();
|
||||
byte[] bytes = Hashing.murmur3_128().hashObject(object, funnel).getBytesInternal();
|
||||
long hash1 = lowerEight(bytes);
|
||||
long hash2 = upperEight(bytes);
|
||||
|
||||
boolean bitsChanged = false;
|
||||
long combinedHash = hash1;
|
||||
for (int i = 0; i < numHashFunctions; i++) {
|
||||
// Make the combined hash positive and indexable
|
||||
bitsChanged |= bits.set((combinedHash & Long.MAX_VALUE) % bitSize);
|
||||
combinedHash += hash2;
|
||||
}
|
||||
return bitsChanged;
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> boolean mightContain(T object, Funnel<? super T> funnel, int numHashFunctions, BitArray bits) {
|
||||
long bitSize = bits.bitSize();
|
||||
byte[] bytes = Hashing.murmur3_128().hashObject(object, funnel).getBytesInternal();
|
||||
long hash1 = lowerEight(bytes);
|
||||
long hash2 = upperEight(bytes);
|
||||
|
||||
long combinedHash = hash1;
|
||||
for (int i = 0; i < numHashFunctions; i++) {
|
||||
// Make the combined hash positive and indexable
|
||||
if (!bits.get((combinedHash & Long.MAX_VALUE) % bitSize)) {
|
||||
return false;
|
||||
}
|
||||
combinedHash += hash2;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private /* static */ long lowerEight(byte[] bytes) {
|
||||
return Longs.fromBytes(bytes[7], bytes[6], bytes[5], bytes[4], bytes[3], bytes[2], bytes[1], bytes[0]);
|
||||
}
|
||||
|
||||
private /* static */ long upperEight(byte[] bytes) {
|
||||
return Longs.fromBytes(bytes[15], bytes[14], bytes[13], bytes[12], bytes[11], bytes[10], bytes[9],
|
||||
bytes[8]);
|
||||
}
|
||||
};
|
||||
|
||||
// Note: We use this instead of java.util.BitSet because we need access to the
|
||||
// long[] data field
|
||||
static final class BitArray {
|
||||
final long[] data;
|
||||
long bitCount;
|
||||
|
||||
BitArray(long bits) {
|
||||
this(new long[Ints.checkedCast(LongMath.divide(bits, 64, RoundingMode.CEILING))]);
|
||||
}
|
||||
|
||||
// Used by serialization
|
||||
BitArray(long[] data) {
|
||||
checkArgument(data.length > 0, "data length is zero!");
|
||||
this.data = data;
|
||||
long bitCount = 0;
|
||||
for (long value : data) {
|
||||
bitCount += Long.bitCount(value);
|
||||
}
|
||||
this.bitCount = bitCount;
|
||||
}
|
||||
|
||||
/** Returns true if the bit changed value. */
|
||||
boolean set(long index) {
|
||||
if (!get(index)) {
|
||||
data[(int) (index >>> 6)] |= (1L << index);
|
||||
bitCount++;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
boolean get(long index) {
|
||||
return (data[(int) (index >>> 6)] & (1L << index)) != 0;
|
||||
}
|
||||
|
||||
/** Number of bits */
|
||||
long bitSize() {
|
||||
return (long) data.length * Long.SIZE;
|
||||
}
|
||||
|
||||
/** Number of set bits (1s) */
|
||||
long bitCount() {
|
||||
return bitCount;
|
||||
}
|
||||
|
||||
BitArray copy() {
|
||||
return new BitArray(data.clone());
|
||||
}
|
||||
|
||||
/** Combines the two BitArrays using bitwise OR. */
|
||||
void putAll(BitArray array) {
|
||||
checkArgument(data.length == array.data.length, "BitArrays must be of equal length (%s != %s)", data.length,
|
||||
array.data.length);
|
||||
bitCount = 0;
|
||||
for (int i = 0; i < data.length; i++) {
|
||||
data[i] |= array.data[i];
|
||||
bitCount += Long.bitCount(data[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (o instanceof BitArray) {
|
||||
BitArray bitArray = (BitArray) o;
|
||||
return Arrays.equals(data, bitArray.data);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Arrays.hashCode(data);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright (C) 2012 The Guava Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
* in compliance with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
* or implied. See the License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
package com.google.common.hash;
|
||||
|
||||
import static com.google.common.base.Preconditions.checkArgument;
|
||||
import static com.google.common.base.Preconditions.checkNotNull;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.zip.Checksum;
|
||||
|
||||
import com.google.common.base.Supplier;
|
||||
|
||||
/**
|
||||
* {@link HashFunction} adapter for {@link Checksum} instances.
|
||||
*
|
||||
* @author Colin Decker
|
||||
*/
|
||||
final class ChecksumHashFunction extends AbstractStreamingHashFunction implements Serializable {
|
||||
|
||||
private final Supplier<? extends Checksum> checksumSupplier;
|
||||
private final int bits;
|
||||
private final String toString;
|
||||
|
||||
ChecksumHashFunction(Supplier<? extends Checksum> checksumSupplier, int bits, String toString) {
|
||||
this.checksumSupplier = checkNotNull(checksumSupplier);
|
||||
checkArgument(bits == 32 || bits == 64, "bits (%s) must be either 32 or 64", bits);
|
||||
this.bits = bits;
|
||||
this.toString = checkNotNull(toString);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int bits() {
|
||||
return bits;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher newHasher() {
|
||||
return new ChecksumHasher(checksumSupplier.get());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return toString;
|
||||
}
|
||||
|
||||
/**
|
||||
* Hasher that updates a checksum.
|
||||
*/
|
||||
private final class ChecksumHasher extends AbstractByteHasher {
|
||||
|
||||
private final Checksum checksum;
|
||||
|
||||
private ChecksumHasher(Checksum checksum) {
|
||||
this.checksum = checkNotNull(checksum);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void update(byte b) {
|
||||
checksum.update(b);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void update(byte[] bytes, int off, int len) {
|
||||
checksum.update(bytes, off, len);
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashCode hash() {
|
||||
long value = checksum.getValue();
|
||||
if (bits == 32) {
|
||||
/*
|
||||
* The long returned from a 32-bit Checksum will have all 0s for its second
|
||||
* word, so the cast won't lose any information and is necessary to return a
|
||||
* HashCode of the correct size.
|
||||
*/
|
||||
return HashCode.fromInt((int) value);
|
||||
} else {
|
||||
return HashCode.fromLong(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static final long serialVersionUID = 0L;
|
||||
}
|
59
sources/main/java/com/google/common/hash/Funnel.java
Normal file
59
sources/main/java/com/google/common/hash/Funnel.java
Normal file
@ -0,0 +1,59 @@
|
||||
/*
|
||||
* Copyright (C) 2011 The Guava Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
* in compliance with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
* or implied. See the License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
package com.google.common.hash;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import com.google.common.annotations.Beta;
|
||||
|
||||
/**
|
||||
* An object which can send data from an object of type {@code T} into a
|
||||
* {@code PrimitiveSink}. Implementations for common types can be found in
|
||||
* {@link Funnels}.
|
||||
*
|
||||
* <p>
|
||||
* Note that serialization of {@linkplain BloomFilter bloom filters} requires
|
||||
* the proper serialization of funnels. When possible, it is recommended that
|
||||
* funnels be implemented as a single-element enum to maintain serialization
|
||||
* guarantees. See Effective Java (2nd Edition), Item 3: "Enforce the singleton
|
||||
* property with a private constructor or an enum type". For example:
|
||||
*
|
||||
* <pre>
|
||||
* {@code
|
||||
* public enum PersonFunnel implements Funnel<Person> {
|
||||
* INSTANCE;
|
||||
* public void funnel(Person person, PrimitiveSink into) {
|
||||
* into.putUnencodedChars(person.getFirstName())
|
||||
* .putUnencodedChars(person.getLastName())
|
||||
* .putInt(person.getAge());
|
||||
* }
|
||||
* }}
|
||||
* </pre>
|
||||
*
|
||||
* @author Dimitris Andreou
|
||||
* @since 11.0
|
||||
*/
|
||||
@Beta
|
||||
public interface Funnel<T> extends Serializable {
|
||||
|
||||
/**
|
||||
* Sends a stream of data from the {@code from} object into the sink
|
||||
* {@code into}. There is no requirement that this data be complete enough to
|
||||
* fully reconstitute the object later.
|
||||
*
|
||||
* @since 12.0 (in Guava 11.0, {@code PrimitiveSink} was named {@code Sink})
|
||||
*/
|
||||
void funnel(T from, PrimitiveSink into);
|
||||
}
|
270
sources/main/java/com/google/common/hash/Funnels.java
Normal file
270
sources/main/java/com/google/common/hash/Funnels.java
Normal file
@ -0,0 +1,270 @@
|
||||
/*
|
||||
* Copyright (C) 2011 The Guava Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
* in compliance with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
* or implied. See the License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
package com.google.common.hash;
|
||||
|
||||
import java.io.OutputStream;
|
||||
import java.io.Serializable;
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
import com.google.common.annotations.Beta;
|
||||
import com.google.common.base.Preconditions;
|
||||
|
||||
/**
|
||||
* Funnels for common types. All implementations are serializable.
|
||||
*
|
||||
* @author Dimitris Andreou
|
||||
* @since 11.0
|
||||
*/
|
||||
@Beta
|
||||
public final class Funnels {
|
||||
private Funnels() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a funnel that extracts the bytes from a {@code byte} array.
|
||||
*/
|
||||
public static Funnel<byte[]> byteArrayFunnel() {
|
||||
return ByteArrayFunnel.INSTANCE;
|
||||
}
|
||||
|
||||
private enum ByteArrayFunnel implements Funnel<byte[]> {
|
||||
INSTANCE;
|
||||
|
||||
public void funnel(byte[] from, PrimitiveSink into) {
|
||||
into.putBytes(from);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Funnels.byteArrayFunnel()";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a funnel that extracts the characters from a {@code CharSequence}, a
|
||||
* character at a time, without performing any encoding. If you need to use a
|
||||
* specific encoding, use {@link Funnels#stringFunnel(Charset)} instead.
|
||||
*
|
||||
* @since 15.0 (since 11.0 as {@code Funnels.stringFunnel()}.
|
||||
*/
|
||||
public static Funnel<CharSequence> unencodedCharsFunnel() {
|
||||
return UnencodedCharsFunnel.INSTANCE;
|
||||
}
|
||||
|
||||
private enum UnencodedCharsFunnel implements Funnel<CharSequence> {
|
||||
INSTANCE;
|
||||
|
||||
public void funnel(CharSequence from, PrimitiveSink into) {
|
||||
into.putUnencodedChars(from);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Funnels.unencodedCharsFunnel()";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a funnel that encodes the characters of a {@code CharSequence} with
|
||||
* the specified {@code Charset}.
|
||||
*
|
||||
* @since 15.0
|
||||
*/
|
||||
public static Funnel<CharSequence> stringFunnel(Charset charset) {
|
||||
return new StringCharsetFunnel(charset);
|
||||
}
|
||||
|
||||
private static class StringCharsetFunnel implements Funnel<CharSequence>, Serializable {
|
||||
private final Charset charset;
|
||||
|
||||
StringCharsetFunnel(Charset charset) {
|
||||
this.charset = Preconditions.checkNotNull(charset);
|
||||
}
|
||||
|
||||
public void funnel(CharSequence from, PrimitiveSink into) {
|
||||
into.putString(from, charset);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Funnels.stringFunnel(" + charset.name() + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(@Nullable Object o) {
|
||||
if (o instanceof StringCharsetFunnel) {
|
||||
StringCharsetFunnel funnel = (StringCharsetFunnel) o;
|
||||
return this.charset.equals(funnel.charset);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return StringCharsetFunnel.class.hashCode() ^ charset.hashCode();
|
||||
}
|
||||
|
||||
Object writeReplace() {
|
||||
return new SerializedForm(charset);
|
||||
}
|
||||
|
||||
private static class SerializedForm implements Serializable {
|
||||
private final String charsetCanonicalName;
|
||||
|
||||
SerializedForm(Charset charset) {
|
||||
this.charsetCanonicalName = charset.name();
|
||||
}
|
||||
|
||||
private Object readResolve() {
|
||||
return stringFunnel(Charset.forName(charsetCanonicalName));
|
||||
}
|
||||
|
||||
private static final long serialVersionUID = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a funnel for integers.
|
||||
*
|
||||
* @since 13.0
|
||||
*/
|
||||
public static Funnel<Integer> integerFunnel() {
|
||||
return IntegerFunnel.INSTANCE;
|
||||
}
|
||||
|
||||
private enum IntegerFunnel implements Funnel<Integer> {
|
||||
INSTANCE;
|
||||
|
||||
public void funnel(Integer from, PrimitiveSink into) {
|
||||
into.putInt(from);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Funnels.integerFunnel()";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a funnel that processes an {@code Iterable} by funneling its elements
|
||||
* in iteration order with the specified funnel. No separators are added between
|
||||
* the elements.
|
||||
*
|
||||
* @since 15.0
|
||||
*/
|
||||
public static <E> Funnel<Iterable<? extends E>> sequentialFunnel(Funnel<E> elementFunnel) {
|
||||
return new SequentialFunnel<E>(elementFunnel);
|
||||
}
|
||||
|
||||
private static class SequentialFunnel<E> implements Funnel<Iterable<? extends E>>, Serializable {
|
||||
private final Funnel<E> elementFunnel;
|
||||
|
||||
SequentialFunnel(Funnel<E> elementFunnel) {
|
||||
this.elementFunnel = Preconditions.checkNotNull(elementFunnel);
|
||||
}
|
||||
|
||||
public void funnel(Iterable<? extends E> from, PrimitiveSink into) {
|
||||
for (E e : from) {
|
||||
elementFunnel.funnel(e, into);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Funnels.sequentialFunnel(" + elementFunnel + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(@Nullable Object o) {
|
||||
if (o instanceof SequentialFunnel) {
|
||||
SequentialFunnel<?> funnel = (SequentialFunnel<?>) o;
|
||||
return elementFunnel.equals(funnel.elementFunnel);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return SequentialFunnel.class.hashCode() ^ elementFunnel.hashCode();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a funnel for longs.
|
||||
*
|
||||
* @since 13.0
|
||||
*/
|
||||
public static Funnel<Long> longFunnel() {
|
||||
return LongFunnel.INSTANCE;
|
||||
}
|
||||
|
||||
private enum LongFunnel implements Funnel<Long> {
|
||||
INSTANCE;
|
||||
|
||||
public void funnel(Long from, PrimitiveSink into) {
|
||||
into.putLong(from);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Funnels.longFunnel()";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wraps a {@code PrimitiveSink} as an {@link OutputStream}, so it is easy to
|
||||
* {@link Funnel#funnel funnel} an object to a {@code PrimitiveSink} if there is
|
||||
* already a way to write the contents of the object to an {@code OutputStream}.
|
||||
*
|
||||
* <p>
|
||||
* The {@code close} and {@code flush} methods of the returned
|
||||
* {@code OutputStream} do nothing, and no method throws {@code IOException}.
|
||||
*
|
||||
* @since 13.0
|
||||
*/
|
||||
public static OutputStream asOutputStream(PrimitiveSink sink) {
|
||||
return new SinkAsStream(sink);
|
||||
}
|
||||
|
||||
private static class SinkAsStream extends OutputStream {
|
||||
final PrimitiveSink sink;
|
||||
|
||||
SinkAsStream(PrimitiveSink sink) {
|
||||
this.sink = Preconditions.checkNotNull(sink);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(int b) {
|
||||
sink.putByte((byte) b);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(byte[] bytes) {
|
||||
sink.putBytes(bytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(byte[] bytes, int off, int len) {
|
||||
sink.putBytes(bytes, off, len);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Funnels.asOutputStream(" + sink + ")";
|
||||
}
|
||||
}
|
||||
}
|
384
sources/main/java/com/google/common/hash/HashCode.java
Normal file
384
sources/main/java/com/google/common/hash/HashCode.java
Normal file
@ -0,0 +1,384 @@
|
||||
/*
|
||||
* Copyright (C) 2011 The Guava Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
* in compliance with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
* or implied. See the License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
package com.google.common.hash;
|
||||
|
||||
import static com.google.common.base.Preconditions.checkArgument;
|
||||
import static com.google.common.base.Preconditions.checkNotNull;
|
||||
import static com.google.common.base.Preconditions.checkState;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.security.MessageDigest;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
import com.google.common.annotations.Beta;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.primitives.Ints;
|
||||
import com.google.common.primitives.UnsignedInts;
|
||||
|
||||
/**
|
||||
* An immutable hash code of arbitrary bit length.
|
||||
*
|
||||
* @author Dimitris Andreou
|
||||
* @author Kurt Alfred Kluever
|
||||
* @since 11.0
|
||||
*/
|
||||
@Beta
|
||||
public abstract class HashCode {
|
||||
HashCode() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of bits in this hash code; a positive multiple of 8.
|
||||
*/
|
||||
public abstract int bits();
|
||||
|
||||
/**
|
||||
* Returns the first four bytes of {@linkplain #asBytes() this hashcode's
|
||||
* bytes}, converted to an {@code int} value in little-endian order.
|
||||
*
|
||||
* @throws IllegalStateException if {@code bits() < 32}
|
||||
*/
|
||||
public abstract int asInt();
|
||||
|
||||
/**
|
||||
* Returns the first eight bytes of {@linkplain #asBytes() this hashcode's
|
||||
* bytes}, converted to a {@code long} value in little-endian order.
|
||||
*
|
||||
* @throws IllegalStateException if {@code bits() < 64}
|
||||
*/
|
||||
public abstract long asLong();
|
||||
|
||||
/**
|
||||
* If this hashcode has enough bits, returns {@code asLong()}, otherwise returns
|
||||
* a {@code long} value with {@code asBytes()} as the least-significant bytes
|
||||
* and {@code 0x00} as the remaining most-significant bytes.
|
||||
*
|
||||
* @since 14.0 (since 11.0 as {@code Hashing.padToLong(HashCode)})
|
||||
*/
|
||||
public abstract long padToLong();
|
||||
|
||||
/**
|
||||
* Returns the value of this hash code as a byte array. The caller may modify
|
||||
* the byte array; changes to it will <i>not</i> be reflected in this
|
||||
* {@code HashCode} object or any other arrays returned by this method.
|
||||
*/
|
||||
// TODO(user): consider ByteString here, when that is available
|
||||
public abstract byte[] asBytes();
|
||||
|
||||
/**
|
||||
* Copies bytes from this hash code into {@code dest}.
|
||||
*
|
||||
* @param dest the byte array into which the hash code will be written
|
||||
* @param offset the start offset in the data
|
||||
* @param maxLength the maximum number of bytes to write
|
||||
* @return the number of bytes written to {@code dest}
|
||||
* @throws IndexOutOfBoundsException if there is not enough room in {@code dest}
|
||||
*/
|
||||
public int writeBytesTo(byte[] dest, int offset, int maxLength) {
|
||||
maxLength = Ints.min(maxLength, bits() / 8);
|
||||
Preconditions.checkPositionIndexes(offset, offset + maxLength, dest.length);
|
||||
writeBytesToImpl(dest, offset, maxLength);
|
||||
return maxLength;
|
||||
}
|
||||
|
||||
abstract void writeBytesToImpl(byte[] dest, int offset, int maxLength);
|
||||
|
||||
/**
|
||||
* Returns a mutable view of the underlying bytes for the given {@code HashCode}
|
||||
* if it is a byte-based hashcode. Otherwise it returns
|
||||
* {@link HashCode#asBytes}. Do <i>not</i> mutate this array or else you will
|
||||
* break the immutability contract of {@code HashCode}.
|
||||
*/
|
||||
byte[] getBytesInternal() {
|
||||
return asBytes();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a 32-bit {@code HashCode} representation of the given int value. The
|
||||
* underlying bytes are interpreted in little endian order.
|
||||
*
|
||||
* @since 15.0 (since 12.0 in HashCodes)
|
||||
*/
|
||||
public static HashCode fromInt(int hash) {
|
||||
return new IntHashCode(hash);
|
||||
}
|
||||
|
||||
private static final class IntHashCode extends HashCode implements Serializable {
|
||||
final int hash;
|
||||
|
||||
IntHashCode(int hash) {
|
||||
this.hash = hash;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int bits() {
|
||||
return 32;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] asBytes() {
|
||||
return new byte[] { (byte) hash, (byte) (hash >> 8), (byte) (hash >> 16), (byte) (hash >> 24) };
|
||||
}
|
||||
|
||||
@Override
|
||||
public int asInt() {
|
||||
return hash;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long asLong() {
|
||||
throw new IllegalStateException("this HashCode only has 32 bits; cannot create a long");
|
||||
}
|
||||
|
||||
@Override
|
||||
public long padToLong() {
|
||||
return UnsignedInts.toLong(hash);
|
||||
}
|
||||
|
||||
@Override
|
||||
void writeBytesToImpl(byte[] dest, int offset, int maxLength) {
|
||||
for (int i = 0; i < maxLength; i++) {
|
||||
dest[offset + i] = (byte) (hash >> (i * 8));
|
||||
}
|
||||
}
|
||||
|
||||
private static final long serialVersionUID = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a 64-bit {@code HashCode} representation of the given long value. The
|
||||
* underlying bytes are interpreted in little endian order.
|
||||
*
|
||||
* @since 15.0 (since 12.0 in HashCodes)
|
||||
*/
|
||||
public static HashCode fromLong(long hash) {
|
||||
return new LongHashCode(hash);
|
||||
}
|
||||
|
||||
private static final class LongHashCode extends HashCode implements Serializable {
|
||||
final long hash;
|
||||
|
||||
LongHashCode(long hash) {
|
||||
this.hash = hash;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int bits() {
|
||||
return 64;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] asBytes() {
|
||||
return new byte[] { (byte) hash, (byte) (hash >> 8), (byte) (hash >> 16), (byte) (hash >> 24),
|
||||
(byte) (hash >> 32), (byte) (hash >> 40), (byte) (hash >> 48), (byte) (hash >> 56) };
|
||||
}
|
||||
|
||||
@Override
|
||||
public int asInt() {
|
||||
return (int) hash;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long asLong() {
|
||||
return hash;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long padToLong() {
|
||||
return hash;
|
||||
}
|
||||
|
||||
@Override
|
||||
void writeBytesToImpl(byte[] dest, int offset, int maxLength) {
|
||||
for (int i = 0; i < maxLength; i++) {
|
||||
dest[offset + i] = (byte) (hash >> (i * 8));
|
||||
}
|
||||
}
|
||||
|
||||
private static final long serialVersionUID = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a {@code HashCode} from a byte array. The array is defensively copied
|
||||
* to preserve the immutability contract of {@code HashCode}. The array cannot
|
||||
* be empty.
|
||||
*
|
||||
* @since 15.0 (since 12.0 in HashCodes)
|
||||
*/
|
||||
public static HashCode fromBytes(byte[] bytes) {
|
||||
checkArgument(bytes.length >= 1, "A HashCode must contain at least 1 byte.");
|
||||
return fromBytesNoCopy(bytes.clone());
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a {@code HashCode} from a byte array. The array is <i>not</i> copied
|
||||
* defensively, so it must be handed-off so as to preserve the immutability
|
||||
* contract of {@code HashCode}.
|
||||
*/
|
||||
static HashCode fromBytesNoCopy(byte[] bytes) {
|
||||
return new BytesHashCode(bytes);
|
||||
}
|
||||
|
||||
private static final class BytesHashCode extends HashCode implements Serializable {
|
||||
final byte[] bytes;
|
||||
|
||||
BytesHashCode(byte[] bytes) {
|
||||
this.bytes = checkNotNull(bytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int bits() {
|
||||
return bytes.length * 8;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] asBytes() {
|
||||
return bytes.clone();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int asInt() {
|
||||
checkState(bytes.length >= 4, "HashCode#asInt() requires >= 4 bytes (it only has %s bytes).", bytes.length);
|
||||
return (bytes[0] & 0xFF) | ((bytes[1] & 0xFF) << 8) | ((bytes[2] & 0xFF) << 16) | ((bytes[3] & 0xFF) << 24);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long asLong() {
|
||||
checkState(bytes.length >= 8, "HashCode#asLong() requires >= 8 bytes (it only has %s bytes).",
|
||||
bytes.length);
|
||||
return padToLong();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long padToLong() {
|
||||
long retVal = (bytes[0] & 0xFF);
|
||||
for (int i = 1; i < Math.min(bytes.length, 8); i++) {
|
||||
retVal |= (bytes[i] & 0xFFL) << (i * 8);
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
@Override
|
||||
void writeBytesToImpl(byte[] dest, int offset, int maxLength) {
|
||||
System.arraycopy(bytes, 0, dest, offset, maxLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
byte[] getBytesInternal() {
|
||||
return bytes;
|
||||
}
|
||||
|
||||
private static final long serialVersionUID = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a {@code HashCode} from a hexadecimal ({@code base 16}) encoded
|
||||
* string. The string must be at least 2 characters long, and contain only
|
||||
* valid, lower-cased hexadecimal characters.
|
||||
*
|
||||
* <p>
|
||||
* This method accepts the exact format generated by {@link #toString}. If you
|
||||
* require more lenient {@code base 16} decoding, please use
|
||||
* {@link com.google.common.io.BaseEncoding#decode} (and pass the result to
|
||||
* {@link #fromBytes}).
|
||||
*
|
||||
* @since 15.0
|
||||
*/
|
||||
public static HashCode fromString(String string) {
|
||||
checkArgument(string.length() >= 2, "input string (%s) must have at least 2 characters", string);
|
||||
checkArgument(string.length() % 2 == 0, "input string (%s) must have an even number of characters", string);
|
||||
|
||||
byte[] bytes = new byte[string.length() / 2];
|
||||
for (int i = 0; i < string.length(); i += 2) {
|
||||
int ch1 = decode(string.charAt(i)) << 4;
|
||||
int ch2 = decode(string.charAt(i + 1));
|
||||
bytes[i / 2] = (byte) (ch1 + ch2);
|
||||
}
|
||||
return fromBytesNoCopy(bytes);
|
||||
}
|
||||
|
||||
private static int decode(char ch) {
|
||||
if (ch >= '0' && ch <= '9') {
|
||||
return ch - '0';
|
||||
}
|
||||
if (ch >= 'a' && ch <= 'f') {
|
||||
return ch - 'a' + 10;
|
||||
}
|
||||
throw new IllegalArgumentException("Illegal hexadecimal character: " + ch);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean equals(@Nullable Object object) {
|
||||
if (object instanceof HashCode) {
|
||||
HashCode that = (HashCode) object;
|
||||
// Undocumented: this is a non-short-circuiting equals(), in case this is a
|
||||
// cryptographic
|
||||
// hash code, in which case we don't want to leak timing information
|
||||
return MessageDigest.isEqual(this.asBytes(), that.asBytes());
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a "Java hash code" for this {@code HashCode} instance; this is
|
||||
* well-defined (so, for example, you can safely put {@code HashCode} instances
|
||||
* into a {@code
|
||||
* HashSet}) but is otherwise probably not what you want to use.
|
||||
*/
|
||||
@Override
|
||||
public final int hashCode() {
|
||||
// If we have at least 4 bytes (32 bits), just take the first 4 bytes. Since
|
||||
// this is
|
||||
// already a (presumably) high-quality hash code, any four bytes of it will do.
|
||||
if (bits() >= 32) {
|
||||
return asInt();
|
||||
}
|
||||
// If we have less than 4 bytes, use them all.
|
||||
byte[] bytes = asBytes();
|
||||
int val = (bytes[0] & 0xFF);
|
||||
for (int i = 1; i < bytes.length; i++) {
|
||||
val |= ((bytes[i] & 0xFF) << (i * 8));
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a string containing each byte of {@link #asBytes}, in order, as a
|
||||
* two-digit unsigned hexadecimal number in lower case.
|
||||
*
|
||||
* <p>
|
||||
* Note that if the output is considered to be a single hexadecimal number, this
|
||||
* hash code's bytes are the <i>big-endian</i> representation of that number.
|
||||
* This may be surprising since everything else in the hashing API uniformly
|
||||
* treats multibyte values as little-endian. But this format conveniently
|
||||
* matches that of utilities such as the UNIX {@code md5sum} command.
|
||||
*
|
||||
* <p>
|
||||
* To create a {@code HashCode} from its string representation, see
|
||||
* {@link #fromString}.
|
||||
*/
|
||||
@Override
|
||||
public final String toString() {
|
||||
byte[] bytes = asBytes();
|
||||
StringBuilder sb = new StringBuilder(2 * bytes.length);
|
||||
for (byte b : bytes) {
|
||||
sb.append(hexDigits[(b >> 4) & 0xf]).append(hexDigits[b & 0xf]);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private static final char[] hexDigits = "0123456789abcdef".toCharArray();
|
||||
}
|
246
sources/main/java/com/google/common/hash/HashFunction.java
Normal file
246
sources/main/java/com/google/common/hash/HashFunction.java
Normal file
@ -0,0 +1,246 @@
|
||||
/*
|
||||
* Copyright (C) 2011 The Guava Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
* in compliance with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
* or implied. See the License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
package com.google.common.hash;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
import com.google.common.annotations.Beta;
|
||||
import com.google.common.primitives.Ints;
|
||||
|
||||
/**
|
||||
* A hash function is a collision-averse pure function that maps an arbitrary
|
||||
* block of data to a number called a <i>hash code</i>.
|
||||
*
|
||||
* <h3>Definition</h3>
|
||||
*
|
||||
* <p>
|
||||
* Unpacking this definition:
|
||||
*
|
||||
* <ul>
|
||||
* <li><b>block of data:</b> the input for a hash function is always, in
|
||||
* concept, an ordered byte array. This hashing API accepts an arbitrary
|
||||
* sequence of byte and multibyte values (via {@link Hasher}), but this is
|
||||
* merely a convenience; these are always translated into raw byte sequences
|
||||
* under the covers.
|
||||
*
|
||||
* <li><b>hash code:</b> each hash function always yields hash codes of the same
|
||||
* fixed bit length (given by {@link #bits}). For example, {@link Hashing#sha1}
|
||||
* produces a 160-bit number, while {@link Hashing#murmur3_32()} yields only 32
|
||||
* bits. Because a {@code long} value is clearly insufficient to hold all hash
|
||||
* code values, this API represents a hash code as an instance of
|
||||
* {@link HashCode}.
|
||||
*
|
||||
* <li><b>pure function:</b> the value produced must depend only on the input
|
||||
* bytes, in the order they appear. Input data is never modified.
|
||||
* {@link HashFunction} instances should always be stateless, and therefore
|
||||
* thread-safe.
|
||||
*
|
||||
* <li><b>collision-averse:</b> while it can't be helped that a hash function
|
||||
* will sometimes produce the same hash code for distinct inputs (a
|
||||
* "collision"), every hash function strives to <i>some</i> degree to make this
|
||||
* unlikely. (Without this condition, a function that always returns zero could
|
||||
* be called a hash function. It is not.)
|
||||
* </ul>
|
||||
*
|
||||
* <p>
|
||||
* Summarizing the last two points: "equal yield equal <i>always</i>; unequal
|
||||
* yield unequal <i>often</i>." This is the most important characteristic of all
|
||||
* hash functions.
|
||||
*
|
||||
* <h3>Desirable properties</h3>
|
||||
*
|
||||
* <p>
|
||||
* A high-quality hash function strives for some subset of the following
|
||||
* virtues:
|
||||
*
|
||||
* <ul>
|
||||
* <li><b>collision-resistant:</b> while the definition above requires making at
|
||||
* least <i>some</i> token attempt, one measure of the quality of a hash
|
||||
* function is <i>how well</i> it succeeds at this goal. Important note: it may
|
||||
* be easy to achieve the theoretical minimum collision rate when using
|
||||
* completely <i>random</i> sample input. The true test of a hash function is
|
||||
* how it performs on representative real-world data, which tends to contain
|
||||
* many hidden patterns and clumps. The goal of a good hash function is to stamp
|
||||
* these patterns out as thoroughly as possible.
|
||||
*
|
||||
* <li><b>bit-dispersing:</b> masking out any <i>single bit</i> from a hash code
|
||||
* should yield only the expected <i>twofold</i> increase to all collision
|
||||
* rates. Informally, the "information" in the hash code should be as evenly
|
||||
* "spread out" through the hash code's bits as possible. The result is that,
|
||||
* for example, when choosing a bucket in a hash table of size 2^8, <i>any</i>
|
||||
* eight bits could be consistently used.
|
||||
*
|
||||
* <li><b>cryptographic:</b> certain hash functions such as
|
||||
* {@link Hashing#sha512} are designed to make it as infeasible as possible to
|
||||
* reverse-engineer the input that produced a given hash code, or even to
|
||||
* discover <i>any</i> two distinct inputs that yield the same result. These are
|
||||
* called <i>cryptographic hash functions</i>. But, whenever it is learned that
|
||||
* either of these feats has become computationally feasible, the function is
|
||||
* deemed "broken" and should no longer be used for secure purposes. (This is
|
||||
* the likely eventual fate of <i>all</i> cryptographic hashes.)
|
||||
*
|
||||
* <li><b>fast:</b> perhaps self-explanatory, but often the most important
|
||||
* consideration. We have published <a href="#noWeHaventYet">microbenchmark
|
||||
* results</a> for many common hash functions.
|
||||
* </ul>
|
||||
*
|
||||
* <h3>Providing input to a hash function</h3>
|
||||
*
|
||||
* <p>
|
||||
* The primary way to provide the data that your hash function should act on is
|
||||
* via a {@link Hasher}. Obtain a new hasher from the hash function using
|
||||
* {@link #newHasher}, "push" the relevant data into it using methods like
|
||||
* {@link Hasher#putBytes(byte[])}, and finally ask for the {@code HashCode}
|
||||
* when finished using {@link Hasher#hash}. (See an {@linkplain #newHasher
|
||||
* example} of this.)
|
||||
*
|
||||
* <p>
|
||||
* If all you want to hash is a single byte array, string or {@code long} value,
|
||||
* there are convenient shortcut methods defined directly on
|
||||
* {@link HashFunction} to make this easier.
|
||||
*
|
||||
* <p>
|
||||
* Hasher accepts primitive data types, but can also accept any Object of type
|
||||
* {@code
|
||||
* T} provided that you implement a {@link Funnel Funnel<T>} to specify how to
|
||||
* "feed" data from that object into the function. (See
|
||||
* {@linkplain Hasher#putObject an example} of this.)
|
||||
*
|
||||
* <p>
|
||||
* <b>Compatibility note:</b> Throughout this API, multibyte values are always
|
||||
* interpreted in <i>little-endian</i> order. That is, hashing the byte array
|
||||
* {@code {0x01, 0x02, 0x03, 0x04}} is equivalent to hashing the {@code int}
|
||||
* value {@code
|
||||
* 0x04030201}. If this isn't what you need, methods such as
|
||||
* {@link Integer#reverseBytes} and {@link Ints#toByteArray} will help.
|
||||
*
|
||||
* <h3>Relationship to {@link Object#hashCode}</h3>
|
||||
*
|
||||
* <p>
|
||||
* Java's baked-in concept of hash codes is constrained to 32 bits, and provides
|
||||
* no separation between hash algorithms and the data they act on, so alternate
|
||||
* hash algorithms can't be easily substituted. Also, implementations of
|
||||
* {@code hashCode} tend to be poor-quality, in part because they end up
|
||||
* depending on <i>other</i> existing poor-quality {@code hashCode}
|
||||
* implementations, including those in many JDK classes.
|
||||
*
|
||||
* <p>
|
||||
* {@code Object.hashCode} implementations tend to be very fast, but have weak
|
||||
* collision prevention and <i>no</i> expectation of bit dispersion. This leaves
|
||||
* them perfectly suitable for use in hash tables, because extra collisions
|
||||
* cause only a slight performance hit, while poor bit dispersion is easily
|
||||
* corrected using a secondary hash function (which all reasonable hash table
|
||||
* implementations in Java use). For the many uses of hash functions beyond data
|
||||
* structures, however, {@code Object.hashCode} almost always falls short --
|
||||
* hence this library.
|
||||
*
|
||||
* @author Kevin Bourrillion
|
||||
* @since 11.0
|
||||
*/
|
||||
@Beta
|
||||
public interface HashFunction {
|
||||
/**
|
||||
* Begins a new hash code computation by returning an initialized, stateful
|
||||
* {@code
|
||||
* Hasher} instance that is ready to receive data. Example:
|
||||
*
|
||||
* <pre>
|
||||
* {
|
||||
* @code
|
||||
*
|
||||
* HashFunction hf = Hashing.md5();
|
||||
* HashCode hc = hf.newHasher().putLong(id).putBoolean(isActive).hash();
|
||||
* }
|
||||
* </pre>
|
||||
*/
|
||||
Hasher newHasher();
|
||||
|
||||
/**
|
||||
* Begins a new hash code computation as {@link #newHasher()}, but provides a
|
||||
* hint of the expected size of the input (in bytes). This is only important for
|
||||
* non-streaming hash functions (hash functions that need to buffer their whole
|
||||
* input before processing any of it).
|
||||
*/
|
||||
Hasher newHasher(int expectedInputSize);
|
||||
|
||||
/**
|
||||
* Shortcut for {@code newHasher().putInt(input).hash()}; returns the hash code
|
||||
* for the given {@code int} value, interpreted in little-endian byte order. The
|
||||
* implementation <i>might</i> perform better than its longhand equivalent, but
|
||||
* should not perform worse.
|
||||
*
|
||||
* @since 12.0
|
||||
*/
|
||||
HashCode hashInt(int input);
|
||||
|
||||
/**
|
||||
* Shortcut for {@code newHasher().putLong(input).hash()}; returns the hash code
|
||||
* for the given {@code long} value, interpreted in little-endian byte order.
|
||||
* The implementation <i>might</i> perform better than its longhand equivalent,
|
||||
* but should not perform worse.
|
||||
*/
|
||||
HashCode hashLong(long input);
|
||||
|
||||
/**
|
||||
* Shortcut for {@code newHasher().putBytes(input).hash()}. The implementation
|
||||
* <i>might</i> perform better than its longhand equivalent, but should not
|
||||
* perform worse.
|
||||
*/
|
||||
HashCode hashBytes(byte[] input);
|
||||
|
||||
/**
|
||||
* Shortcut for {@code newHasher().putBytes(input, off, len).hash()}. The
|
||||
* implementation <i>might</i> perform better than its longhand equivalent, but
|
||||
* should not perform worse.
|
||||
*
|
||||
* @throws IndexOutOfBoundsException if {@code off < 0} or
|
||||
* {@code off + len > bytes.length} or
|
||||
* {@code len < 0}
|
||||
*/
|
||||
HashCode hashBytes(byte[] input, int off, int len);
|
||||
|
||||
/**
|
||||
* Shortcut for {@code newHasher().putUnencodedChars(input).hash()}. The
|
||||
* implementation <i>might</i> perform better than its longhand equivalent, but
|
||||
* should not perform worse. Note that no character encoding is performed; the
|
||||
* low byte and high byte of each {@code char} are hashed directly (in that
|
||||
* order).
|
||||
*
|
||||
* @since 15.0 (since 11.0 as hashString(CharSequence)).
|
||||
*/
|
||||
HashCode hashUnencodedChars(CharSequence input);
|
||||
|
||||
/**
|
||||
* Shortcut for {@code newHasher().putString(input, charset).hash()}. Characters
|
||||
* are encoded using the given {@link Charset}. The implementation <i>might</i>
|
||||
* perform better than its longhand equivalent, but should not perform worse.
|
||||
*/
|
||||
HashCode hashString(CharSequence input, Charset charset);
|
||||
|
||||
/**
|
||||
* Shortcut for {@code newHasher().putObject(instance, funnel).hash()}. The
|
||||
* implementation <i>might</i> perform better than its longhand equivalent, but
|
||||
* should not perform worse.
|
||||
*
|
||||
* @since 14.0
|
||||
*/
|
||||
<T> HashCode hashObject(T instance, Funnel<? super T> funnel);
|
||||
|
||||
/**
|
||||
* Returns the number of bits (a multiple of 32) that each hash code produced by
|
||||
* this hash function has.
|
||||
*/
|
||||
int bits();
|
||||
}
|
131
sources/main/java/com/google/common/hash/Hasher.java
Normal file
131
sources/main/java/com/google/common/hash/Hasher.java
Normal file
@ -0,0 +1,131 @@
|
||||
/*
|
||||
* Copyright (C) 2011 The Guava Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
* in compliance with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
* or implied. See the License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
package com.google.common.hash;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
import com.google.common.annotations.Beta;
|
||||
|
||||
/**
|
||||
* A {@link PrimitiveSink} that can compute a hash code after reading the input.
|
||||
* Each hasher should translate all multibyte values ({@link #putInt(int)},
|
||||
* {@link #putLong(long)}, etc) to bytes in little-endian order.
|
||||
*
|
||||
* <p>
|
||||
* <b>Warning:</b> The result of calling any methods after calling {@link #hash}
|
||||
* is undefined.
|
||||
*
|
||||
* <p>
|
||||
* <b>Warning:</b> Using a specific character encoding when hashing a
|
||||
* {@link CharSequence} with {@link #putString(CharSequence, Charset)} is
|
||||
* generally only useful for cross-language compatibility (otherwise prefer
|
||||
* {@link #putUnencodedChars}). However, the character encodings must be
|
||||
* identical across languages. Also beware that {@link Charset} definitions may
|
||||
* occasionally change between Java releases.
|
||||
*
|
||||
* <p>
|
||||
* <b>Warning:</b> Chunks of data that are put into the {@link Hasher} are not
|
||||
* delimited. The resulting {@link HashCode} is dependent only on the bytes
|
||||
* inserted, and the order in which they were inserted, not how those bytes were
|
||||
* chunked into discrete put() operations. For example, the following three
|
||||
* expressions all generate colliding hash codes:
|
||||
*
|
||||
* <pre>
|
||||
* {@code
|
||||
*
|
||||
* newHasher().putByte(b1).putByte(b2).putByte(b3).hash()
|
||||
* newHasher().putByte(b1).putBytes(new byte[] { b2, b3 }).hash()
|
||||
* newHasher().putBytes(new byte[] { b1, b2, b3 }).hash()}
|
||||
* </pre>
|
||||
*
|
||||
* <p>
|
||||
* If you wish to avoid this, you should either prepend or append the size of
|
||||
* each chunk. Keep in mind that when dealing with char sequences, the encoded
|
||||
* form of two concatenated char sequences is not equivalent to the
|
||||
* concatenation of their encoded form. Therefore,
|
||||
* {@link #putString(CharSequence, Charset)} should only be used consistently
|
||||
* with <i>complete</i> sequences and not broken into chunks.
|
||||
*
|
||||
* @author Kevin Bourrillion
|
||||
* @since 11.0
|
||||
*/
|
||||
@Beta
|
||||
public interface Hasher extends PrimitiveSink {
|
||||
@Override
|
||||
Hasher putByte(byte b);
|
||||
|
||||
@Override
|
||||
Hasher putBytes(byte[] bytes);
|
||||
|
||||
@Override
|
||||
Hasher putBytes(byte[] bytes, int off, int len);
|
||||
|
||||
@Override
|
||||
Hasher putShort(short s);
|
||||
|
||||
@Override
|
||||
Hasher putInt(int i);
|
||||
|
||||
@Override
|
||||
Hasher putLong(long l);
|
||||
|
||||
/**
|
||||
* Equivalent to {@code putInt(Float.floatToRawIntBits(f))}.
|
||||
*/
|
||||
@Override
|
||||
Hasher putFloat(float f);
|
||||
|
||||
/**
|
||||
* Equivalent to {@code putLong(Double.doubleToRawLongBits(d))}.
|
||||
*/
|
||||
@Override
|
||||
Hasher putDouble(double d);
|
||||
|
||||
/**
|
||||
* Equivalent to {@code putByte(b ? (byte) 1 : (byte) 0)}.
|
||||
*/
|
||||
@Override
|
||||
Hasher putBoolean(boolean b);
|
||||
|
||||
@Override
|
||||
Hasher putChar(char c);
|
||||
|
||||
/**
|
||||
* Equivalent to processing each {@code char} value in the {@code CharSequence},
|
||||
* in order. The input must not be updated while this method is in progress.
|
||||
*
|
||||
* @since 15.0 (since 11.0 as putString(CharSequence)).
|
||||
*/
|
||||
@Override
|
||||
Hasher putUnencodedChars(CharSequence charSequence);
|
||||
|
||||
/**
|
||||
* Equivalent to {@code putBytes(charSequence.toString().getBytes(charset))}.
|
||||
*/
|
||||
@Override
|
||||
Hasher putString(CharSequence charSequence, Charset charset);
|
||||
|
||||
/**
|
||||
* A simple convenience for {@code funnel.funnel(object, this)}.
|
||||
*/
|
||||
<T> Hasher putObject(T instance, Funnel<? super T> funnel);
|
||||
|
||||
/**
|
||||
* Computes a hash code based on the data that have been provided to this
|
||||
* hasher. The result is unspecified if this method is called more than once on
|
||||
* the same instance.
|
||||
*/
|
||||
HashCode hash();
|
||||
}
|
483
sources/main/java/com/google/common/hash/Hashing.java
Normal file
483
sources/main/java/com/google/common/hash/Hashing.java
Normal file
@ -0,0 +1,483 @@
|
||||
/*
|
||||
* Copyright (C) 2011 The Guava Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
* in compliance with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
* or implied. See the License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
package com.google.common.hash;
|
||||
|
||||
import static com.google.common.base.Preconditions.checkArgument;
|
||||
|
||||
import java.security.MessageDigest;
|
||||
import java.util.Iterator;
|
||||
import java.util.zip.Adler32;
|
||||
import java.util.zip.CRC32;
|
||||
import java.util.zip.Checksum;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
import com.google.common.annotations.Beta;
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.base.Supplier;
|
||||
|
||||
/**
|
||||
* Static methods to obtain {@link HashFunction} instances, and other static
|
||||
* hashing-related utilities.
|
||||
*
|
||||
* <p>
|
||||
* A comparison of the various hash functions can be found
|
||||
* <a href="http://goo.gl/jS7HH">here</a>.
|
||||
*
|
||||
* @author Kevin Bourrillion
|
||||
* @author Dimitris Andreou
|
||||
* @author Kurt Alfred Kluever
|
||||
* @since 11.0
|
||||
*/
|
||||
@Beta
|
||||
public final class Hashing {
|
||||
/**
|
||||
* Returns a general-purpose, <b>temporary-use</b>, non-cryptographic hash
|
||||
* function. The algorithm the returned function implements is unspecified and
|
||||
* subject to change without notice.
|
||||
*
|
||||
* <p>
|
||||
* <b>Warning:</b> a new random seed for these functions is chosen each time the
|
||||
* {@code
|
||||
* Hashing} class is loaded. <b>Do not use this method</b> if hash codes may
|
||||
* escape the current process in any way, for example being sent over RPC, or
|
||||
* saved to disk.
|
||||
*
|
||||
* <p>
|
||||
* Repeated calls to this method on the same loaded {@code Hashing} class, using
|
||||
* the same value for {@code minimumBits}, will return identically-behaving
|
||||
* {@link HashFunction} instances.
|
||||
*
|
||||
* @param minimumBits a positive integer (can be arbitrarily large)
|
||||
* @return a hash function, described above, that produces hash codes of length
|
||||
* {@code
|
||||
* minimumBits} or greater
|
||||
*/
|
||||
public static HashFunction goodFastHash(int minimumBits) {
|
||||
int bits = checkPositiveAndMakeMultipleOf32(minimumBits);
|
||||
|
||||
if (bits == 32) {
|
||||
return Murmur3_32Holder.GOOD_FAST_HASH_FUNCTION_32;
|
||||
}
|
||||
if (bits <= 128) {
|
||||
return Murmur3_128Holder.GOOD_FAST_HASH_FUNCTION_128;
|
||||
}
|
||||
|
||||
// Otherwise, join together some 128-bit murmur3s
|
||||
int hashFunctionsNeeded = (bits + 127) / 128;
|
||||
HashFunction[] hashFunctions = new HashFunction[hashFunctionsNeeded];
|
||||
hashFunctions[0] = Murmur3_128Holder.GOOD_FAST_HASH_FUNCTION_128;
|
||||
int seed = GOOD_FAST_HASH_SEED;
|
||||
for (int i = 1; i < hashFunctionsNeeded; i++) {
|
||||
seed += 1500450271; // a prime; shouldn't matter
|
||||
hashFunctions[i] = murmur3_128(seed);
|
||||
}
|
||||
return new ConcatenatedHashFunction(hashFunctions);
|
||||
}
|
||||
|
||||
/**
|
||||
* Used to randomize {@link #goodFastHash} instances, so that programs which
|
||||
* persist anything dependent on the hash codes they produce will fail sooner.
|
||||
*/
|
||||
private static final int GOOD_FAST_HASH_SEED = (int) System.currentTimeMillis();
|
||||
|
||||
/**
|
||||
* Returns a hash function implementing the
|
||||
* <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp"> 32-bit
|
||||
* murmur3 algorithm, x86 variant</a> (little-endian variant), using the given
|
||||
* seed value.
|
||||
*
|
||||
* <p>
|
||||
* The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A).
|
||||
*/
|
||||
public static HashFunction murmur3_32(int seed) {
|
||||
return new Murmur3_32HashFunction(seed);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a hash function implementing the
|
||||
* <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp"> 32-bit
|
||||
* murmur3 algorithm, x86 variant</a> (little-endian variant), using a seed
|
||||
* value of zero.
|
||||
*
|
||||
* <p>
|
||||
* The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A).
|
||||
*/
|
||||
public static HashFunction murmur3_32() {
|
||||
return Murmur3_32Holder.MURMUR3_32;
|
||||
}
|
||||
|
||||
private static class Murmur3_32Holder {
|
||||
static final HashFunction MURMUR3_32 = new Murmur3_32HashFunction(0);
|
||||
|
||||
/** Returned by {@link #goodFastHash} when {@code minimumBits <= 32}. */
|
||||
static final HashFunction GOOD_FAST_HASH_FUNCTION_32 = murmur3_32(GOOD_FAST_HASH_SEED);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a hash function implementing the
|
||||
* <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp"> 128-bit
|
||||
* murmur3 algorithm, x64 variant</a> (little-endian variant), using the given
|
||||
* seed value.
|
||||
*
|
||||
* <p>
|
||||
* The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F).
|
||||
*/
|
||||
public static HashFunction murmur3_128(int seed) {
|
||||
return new Murmur3_128HashFunction(seed);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a hash function implementing the
|
||||
* <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp"> 128-bit
|
||||
* murmur3 algorithm, x64 variant</a> (little-endian variant), using a seed
|
||||
* value of zero.
|
||||
*
|
||||
* <p>
|
||||
* The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F).
|
||||
*/
|
||||
public static HashFunction murmur3_128() {
|
||||
return Murmur3_128Holder.MURMUR3_128;
|
||||
}
|
||||
|
||||
private static class Murmur3_128Holder {
|
||||
static final HashFunction MURMUR3_128 = new Murmur3_128HashFunction(0);
|
||||
|
||||
/** Returned by {@link #goodFastHash} when {@code 32 < minimumBits <= 128}. */
|
||||
static final HashFunction GOOD_FAST_HASH_FUNCTION_128 = murmur3_128(GOOD_FAST_HASH_SEED);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a hash function implementing the
|
||||
* <a href="https://131002.net/siphash/">64-bit SipHash-2-4 algorithm</a> using
|
||||
* a seed value of {@code k = 00 01 02 ...}.
|
||||
*
|
||||
* @since 15.0
|
||||
*/
|
||||
public static HashFunction sipHash24() {
|
||||
return SipHash24Holder.SIP_HASH_24;
|
||||
}
|
||||
|
||||
private static class SipHash24Holder {
|
||||
static final HashFunction SIP_HASH_24 = new SipHashFunction(2, 4, 0x0706050403020100L, 0x0f0e0d0c0b0a0908L);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a hash function implementing the
|
||||
* <a href="https://131002.net/siphash/">64-bit SipHash-2-4 algorithm</a> using
|
||||
* the given seed.
|
||||
*
|
||||
* @since 15.0
|
||||
*/
|
||||
public static HashFunction sipHash24(long k0, long k1) {
|
||||
return new SipHashFunction(2, 4, k0, k1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a hash function implementing the MD5 hash algorithm (128 hash bits)
|
||||
* by delegating to the MD5 {@link MessageDigest}.
|
||||
*/
|
||||
public static HashFunction md5() {
|
||||
return Md5Holder.MD5;
|
||||
}
|
||||
|
||||
private static class Md5Holder {
|
||||
static final HashFunction MD5 = new MessageDigestHashFunction("MD5", "Hashing.md5()");
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a hash function implementing the SHA-1 algorithm (160 hash bits) by
|
||||
* delegating to the SHA-1 {@link MessageDigest}.
|
||||
*/
|
||||
public static HashFunction sha1() {
|
||||
return Sha1Holder.SHA_1;
|
||||
}
|
||||
|
||||
private static class Sha1Holder {
|
||||
static final HashFunction SHA_1 = new MessageDigestHashFunction("SHA-1", "Hashing.sha1()");
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a hash function implementing the SHA-256 algorithm (256 hash bits) by
|
||||
* delegating to the SHA-256 {@link MessageDigest}.
|
||||
*/
|
||||
public static HashFunction sha256() {
|
||||
return Sha256Holder.SHA_256;
|
||||
}
|
||||
|
||||
private static class Sha256Holder {
|
||||
static final HashFunction SHA_256 = new MessageDigestHashFunction("SHA-256", "Hashing.sha256()");
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a hash function implementing the SHA-512 algorithm (512 hash bits) by
|
||||
* delegating to the SHA-512 {@link MessageDigest}.
|
||||
*/
|
||||
public static HashFunction sha512() {
|
||||
return Sha512Holder.SHA_512;
|
||||
}
|
||||
|
||||
private static class Sha512Holder {
|
||||
static final HashFunction SHA_512 = new MessageDigestHashFunction("SHA-512", "Hashing.sha512()");
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a hash function implementing the CRC-32 checksum algorithm (32 hash
|
||||
* bits) by delegating to the {@link CRC32} {@link Checksum}.
|
||||
*
|
||||
* <p>
|
||||
* To get the {@code long} value equivalent to {@link Checksum#getValue()} for a
|
||||
* {@code HashCode} produced by this function, use {@link HashCode#padToLong()}.
|
||||
*
|
||||
* @since 14.0
|
||||
*/
|
||||
public static HashFunction crc32() {
|
||||
return Crc32Holder.CRC_32;
|
||||
}
|
||||
|
||||
private static class Crc32Holder {
|
||||
static final HashFunction CRC_32 = checksumHashFunction(ChecksumType.CRC_32, "Hashing.crc32()");
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a hash function implementing the Adler-32 checksum algorithm (32 hash
|
||||
* bits) by delegating to the {@link Adler32} {@link Checksum}.
|
||||
*
|
||||
* <p>
|
||||
* To get the {@code long} value equivalent to {@link Checksum#getValue()} for a
|
||||
* {@code HashCode} produced by this function, use {@link HashCode#padToLong()}.
|
||||
*
|
||||
* @since 14.0
|
||||
*/
|
||||
public static HashFunction adler32() {
|
||||
return Adler32Holder.ADLER_32;
|
||||
}
|
||||
|
||||
private static class Adler32Holder {
|
||||
static final HashFunction ADLER_32 = checksumHashFunction(ChecksumType.ADLER_32, "Hashing.adler32()");
|
||||
}
|
||||
|
||||
private static HashFunction checksumHashFunction(ChecksumType type, String toString) {
|
||||
return new ChecksumHashFunction(type, type.bits, toString);
|
||||
}
|
||||
|
||||
enum ChecksumType implements Supplier<Checksum> {
|
||||
CRC_32(32) {
|
||||
@Override
|
||||
public Checksum get() {
|
||||
return new CRC32();
|
||||
}
|
||||
},
|
||||
ADLER_32(32) {
|
||||
@Override
|
||||
public Checksum get() {
|
||||
return new Adler32();
|
||||
}
|
||||
};
|
||||
|
||||
private final int bits;
|
||||
|
||||
ChecksumType(int bits) {
|
||||
this.bits = bits;
|
||||
}
|
||||
|
||||
@Override
|
||||
public abstract Checksum get();
|
||||
}
|
||||
|
||||
/**
|
||||
* Assigns to {@code hashCode} a "bucket" in the range {@code [0, buckets)}, in
|
||||
* a uniform manner that minimizes the need for remapping as {@code buckets}
|
||||
* grows. That is, {@code consistentHash(h, n)} equals:
|
||||
*
|
||||
* <ul>
|
||||
* <li>{@code n - 1}, with approximate probability {@code 1/n}
|
||||
* <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n})
|
||||
* </ul>
|
||||
*
|
||||
* <p>
|
||||
* See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">wikipedia
|
||||
* article on consistent hashing</a> for more information.
|
||||
*/
|
||||
public static int consistentHash(HashCode hashCode, int buckets) {
|
||||
return consistentHash(hashCode.padToLong(), buckets);
|
||||
}
|
||||
|
||||
/**
|
||||
* Assigns to {@code input} a "bucket" in the range {@code [0, buckets)}, in a
|
||||
* uniform manner that minimizes the need for remapping as {@code buckets}
|
||||
* grows. That is, {@code consistentHash(h, n)} equals:
|
||||
*
|
||||
* <ul>
|
||||
* <li>{@code n - 1}, with approximate probability {@code 1/n}
|
||||
* <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n})
|
||||
* </ul>
|
||||
*
|
||||
* <p>
|
||||
* See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">wikipedia
|
||||
* article on consistent hashing</a> for more information.
|
||||
*/
|
||||
public static int consistentHash(long input, int buckets) {
|
||||
checkArgument(buckets > 0, "buckets must be positive: %s", buckets);
|
||||
LinearCongruentialGenerator generator = new LinearCongruentialGenerator(input);
|
||||
int candidate = 0;
|
||||
int next;
|
||||
|
||||
// Jump from bucket to bucket until we go out of range
|
||||
while (true) {
|
||||
next = (int) ((candidate + 1) / generator.nextDouble());
|
||||
if (next >= 0 && next < buckets) {
|
||||
candidate = next;
|
||||
} else {
|
||||
return candidate;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a hash code, having the same bit length as each of the input hash
|
||||
* codes, that combines the information of these hash codes in an ordered
|
||||
* fashion. That is, whenever two equal hash codes are produced by two calls to
|
||||
* this method, it is <i>as likely as possible</i> that each was computed from
|
||||
* the <i>same</i> input hash codes in the <i>same</i> order.
|
||||
*
|
||||
* @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash
|
||||
* codes do not all have the same bit length
|
||||
*/
|
||||
public static HashCode combineOrdered(Iterable<HashCode> hashCodes) {
|
||||
Iterator<HashCode> iterator = hashCodes.iterator();
|
||||
checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine.");
|
||||
int bits = iterator.next().bits();
|
||||
byte[] resultBytes = new byte[bits / 8];
|
||||
for (HashCode hashCode : hashCodes) {
|
||||
byte[] nextBytes = hashCode.asBytes();
|
||||
checkArgument(nextBytes.length == resultBytes.length, "All hashcodes must have the same bit length.");
|
||||
for (int i = 0; i < nextBytes.length; i++) {
|
||||
resultBytes[i] = (byte) (resultBytes[i] * 37 ^ nextBytes[i]);
|
||||
}
|
||||
}
|
||||
return HashCode.fromBytesNoCopy(resultBytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a hash code, having the same bit length as each of the input hash
|
||||
* codes, that combines the information of these hash codes in an unordered
|
||||
* fashion. That is, whenever two equal hash codes are produced by two calls to
|
||||
* this method, it is <i>as likely as possible</i> that each was computed from
|
||||
* the <i>same</i> input hash codes in <i>some</i> order.
|
||||
*
|
||||
* @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash
|
||||
* codes do not all have the same bit length
|
||||
*/
|
||||
public static HashCode combineUnordered(Iterable<HashCode> hashCodes) {
|
||||
Iterator<HashCode> iterator = hashCodes.iterator();
|
||||
checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine.");
|
||||
byte[] resultBytes = new byte[iterator.next().bits() / 8];
|
||||
for (HashCode hashCode : hashCodes) {
|
||||
byte[] nextBytes = hashCode.asBytes();
|
||||
checkArgument(nextBytes.length == resultBytes.length, "All hashcodes must have the same bit length.");
|
||||
for (int i = 0; i < nextBytes.length; i++) {
|
||||
resultBytes[i] += nextBytes[i];
|
||||
}
|
||||
}
|
||||
return HashCode.fromBytesNoCopy(resultBytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks that the passed argument is positive, and ceils it to a multiple of
|
||||
* 32.
|
||||
*/
|
||||
static int checkPositiveAndMakeMultipleOf32(int bits) {
|
||||
checkArgument(bits > 0, "Number of bits must be positive");
|
||||
return (bits + 31) & ~31;
|
||||
}
|
||||
|
||||
// TODO(kevinb): Maybe expose this class via a static Hashing method?
|
||||
@VisibleForTesting
|
||||
static final class ConcatenatedHashFunction extends AbstractCompositeHashFunction {
|
||||
private final int bits;
|
||||
|
||||
ConcatenatedHashFunction(HashFunction... functions) {
|
||||
super(functions);
|
||||
int bitSum = 0;
|
||||
for (HashFunction function : functions) {
|
||||
bitSum += function.bits();
|
||||
}
|
||||
this.bits = bitSum;
|
||||
}
|
||||
|
||||
@Override
|
||||
HashCode makeHash(Hasher[] hashers) {
|
||||
byte[] bytes = new byte[bits / 8];
|
||||
int i = 0;
|
||||
for (Hasher hasher : hashers) {
|
||||
HashCode newHash = hasher.hash();
|
||||
i += newHash.writeBytesTo(bytes, i, newHash.bits() / 8);
|
||||
}
|
||||
return HashCode.fromBytesNoCopy(bytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int bits() {
|
||||
return bits;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(@Nullable Object object) {
|
||||
if (object instanceof ConcatenatedHashFunction) {
|
||||
ConcatenatedHashFunction other = (ConcatenatedHashFunction) object;
|
||||
if (bits != other.bits || functions.length != other.functions.length) {
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < functions.length; i++) {
|
||||
if (!functions[i].equals(other.functions[i])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int hash = bits;
|
||||
for (HashFunction function : functions) {
|
||||
hash ^= function.hashCode();
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Linear CongruentialGenerator to use for consistent hashing. See
|
||||
* http://en.wikipedia.org/wiki/Linear_congruential_generator
|
||||
*/
|
||||
private static final class LinearCongruentialGenerator {
|
||||
private long state;
|
||||
|
||||
public LinearCongruentialGenerator(long seed) {
|
||||
this.state = seed;
|
||||
}
|
||||
|
||||
public double nextDouble() {
|
||||
state = 2862933555777941757L * state + 1;
|
||||
return ((double) ((int) (state >>> 33) + 1)) / (0x1.0p31);
|
||||
}
|
||||
}
|
||||
|
||||
private Hashing() {
|
||||
}
|
||||
}
|
108
sources/main/java/com/google/common/hash/HashingInputStream.java
Normal file
108
sources/main/java/com/google/common/hash/HashingInputStream.java
Normal file
@ -0,0 +1,108 @@
|
||||
/*
|
||||
* Copyright (C) 2013 The Guava Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
* in compliance with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
* or implied. See the License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
package com.google.common.hash;
|
||||
|
||||
import static com.google.common.base.Preconditions.checkNotNull;
|
||||
|
||||
import java.io.FilterInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import com.google.common.annotations.Beta;
|
||||
|
||||
/**
|
||||
* An {@link InputStream} that maintains a hash of the data read from it.
|
||||
*
|
||||
* @author Qian Huang
|
||||
* @since 16.0
|
||||
*/
|
||||
@Beta
|
||||
public final class HashingInputStream extends FilterInputStream {
|
||||
private final Hasher hasher;
|
||||
|
||||
/**
|
||||
* Creates an input stream that hashes using the given {@link HashFunction} and
|
||||
* delegates all data read from it to the underlying {@link InputStream}.
|
||||
*
|
||||
* <p>
|
||||
* The {@link InputStream} should not be read from before or after the hand-off.
|
||||
*/
|
||||
public HashingInputStream(HashFunction hashFunction, InputStream in) {
|
||||
super(checkNotNull(in));
|
||||
this.hasher = checkNotNull(hashFunction.newHasher());
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the next byte of data from the underlying input stream and updates the
|
||||
* hasher with the byte read.
|
||||
*/
|
||||
@Override
|
||||
public int read() throws IOException {
|
||||
int b = in.read();
|
||||
if (b != -1) {
|
||||
hasher.putByte((byte) b);
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the specified bytes of data from the underlying input stream and
|
||||
* updates the hasher with the bytes read.
|
||||
*/
|
||||
@Override
|
||||
public int read(byte[] bytes, int off, int len) throws IOException {
|
||||
int numOfBytesRead = in.read(bytes, off, len);
|
||||
if (numOfBytesRead != -1) {
|
||||
hasher.putBytes(bytes, off, numOfBytesRead);
|
||||
}
|
||||
return numOfBytesRead;
|
||||
}
|
||||
|
||||
/**
|
||||
* mark() is not supported for HashingInputStream
|
||||
*
|
||||
* @return {@code false} always
|
||||
*/
|
||||
@Override
|
||||
public boolean markSupported() {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* mark() is not supported for HashingInputStream
|
||||
*/
|
||||
@Override
|
||||
public void mark(int readlimit) {
|
||||
}
|
||||
|
||||
/**
|
||||
* reset() is not supported for HashingInputStream.
|
||||
*
|
||||
* @throws IOException this operation is not supported
|
||||
*/
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
throw new IOException("reset not supported");
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the {@link HashCode} based on the data read from this stream. The
|
||||
* result is unspecified if this method is called more than once on the same
|
||||
* instance.
|
||||
*/
|
||||
public HashCode hash() {
|
||||
return hasher.hash();
|
||||
}
|
||||
}
|
@ -0,0 +1,84 @@
|
||||
/*
|
||||
* Copyright (C) 2011 The Guava Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
* in compliance with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
* or implied. See the License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
package com.google.common.hash;
|
||||
|
||||
import static com.google.common.base.Preconditions.checkNotNull;
|
||||
|
||||
import java.io.FilterOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
|
||||
import com.google.common.annotations.Beta;
|
||||
|
||||
/**
|
||||
* An {@link OutputStream} that maintains a hash of the data written to it.
|
||||
*
|
||||
* @author Nick Piepmeier
|
||||
* @since 16.0
|
||||
*/
|
||||
@Beta
|
||||
public final class HashingOutputStream extends FilterOutputStream {
|
||||
private final Hasher hasher;
|
||||
|
||||
/**
|
||||
* Creates an output stream that hashes using the given {@link HashFunction},
|
||||
* and forwards all data written to it to the underlying {@link OutputStream}.
|
||||
*
|
||||
* <p>
|
||||
* The {@link OutputStream} should not be written to before or after the
|
||||
* hand-off.
|
||||
*/
|
||||
// TODO(user): Evaluate whether it makes sense to always piggyback the
|
||||
// computation of a
|
||||
// HashCode on an existing OutputStream, compared to creating a separate
|
||||
// OutputStream that could
|
||||
// be (optionally) be combined with another if needed (with something like
|
||||
// MultiplexingOutputStream).
|
||||
public HashingOutputStream(HashFunction hashFunction, OutputStream out) {
|
||||
super(checkNotNull(out));
|
||||
this.hasher = checkNotNull(hashFunction.newHasher());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(int b) throws IOException {
|
||||
hasher.putByte((byte) b);
|
||||
out.write(b);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(byte[] bytes, int off, int len) throws IOException {
|
||||
hasher.putBytes(bytes, off, len);
|
||||
out.write(bytes, off, len);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the {@link HashCode} based on the data written to this stream. The
|
||||
* result is unspecified if this method is called more than once on the same
|
||||
* instance.
|
||||
*/
|
||||
public HashCode hash() {
|
||||
return hasher.hash();
|
||||
}
|
||||
|
||||
// Overriding close() because FilterOutputStream's close() method pre-JDK8 has
|
||||
// bad behavior:
|
||||
// it silently ignores any exception thrown by flush(). Instead, just close the
|
||||
// delegate stream.
|
||||
// It should flush itself if necessary.
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
out.close();
|
||||
}
|
||||
}
|
@ -0,0 +1,159 @@
|
||||
/*
|
||||
* Copyright (C) 2011 The Guava Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
* in compliance with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
* or implied. See the License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
package com.google.common.hash;
|
||||
|
||||
import static com.google.common.base.Preconditions.checkArgument;
|
||||
import static com.google.common.base.Preconditions.checkNotNull;
|
||||
import static com.google.common.base.Preconditions.checkState;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* {@link HashFunction} adapter for {@link MessageDigest} instances.
|
||||
*
|
||||
* @author Kevin Bourrillion
|
||||
* @author Dimitris Andreou
|
||||
*/
|
||||
final class MessageDigestHashFunction extends AbstractStreamingHashFunction implements Serializable {
|
||||
private final MessageDigest prototype;
|
||||
private final int bytes;
|
||||
private final boolean supportsClone;
|
||||
private final String toString;
|
||||
|
||||
MessageDigestHashFunction(String algorithmName, String toString) {
|
||||
this.prototype = getMessageDigest(algorithmName);
|
||||
this.bytes = prototype.getDigestLength();
|
||||
this.toString = checkNotNull(toString);
|
||||
this.supportsClone = supportsClone();
|
||||
}
|
||||
|
||||
MessageDigestHashFunction(String algorithmName, int bytes, String toString) {
|
||||
this.toString = checkNotNull(toString);
|
||||
this.prototype = getMessageDigest(algorithmName);
|
||||
int maxLength = prototype.getDigestLength();
|
||||
checkArgument(bytes >= 4 && bytes <= maxLength, "bytes (%s) must be >= 4 and < %s", bytes, maxLength);
|
||||
this.bytes = bytes;
|
||||
this.supportsClone = supportsClone();
|
||||
}
|
||||
|
||||
private boolean supportsClone() {
|
||||
try {
|
||||
prototype.clone();
|
||||
return true;
|
||||
} catch (CloneNotSupportedException e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int bits() {
|
||||
return bytes * Byte.SIZE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return toString;
|
||||
}
|
||||
|
||||
private static MessageDigest getMessageDigest(String algorithmName) {
|
||||
try {
|
||||
return MessageDigest.getInstance(algorithmName);
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
throw new AssertionError(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher newHasher() {
|
||||
if (supportsClone) {
|
||||
try {
|
||||
return new MessageDigestHasher((MessageDigest) prototype.clone(), bytes);
|
||||
} catch (CloneNotSupportedException e) {
|
||||
// falls through
|
||||
}
|
||||
}
|
||||
return new MessageDigestHasher(getMessageDigest(prototype.getAlgorithm()), bytes);
|
||||
}
|
||||
|
||||
private static final class SerializedForm implements Serializable {
|
||||
private final String algorithmName;
|
||||
private final int bytes;
|
||||
private final String toString;
|
||||
|
||||
private SerializedForm(String algorithmName, int bytes, String toString) {
|
||||
this.algorithmName = algorithmName;
|
||||
this.bytes = bytes;
|
||||
this.toString = toString;
|
||||
}
|
||||
|
||||
private Object readResolve() {
|
||||
return new MessageDigestHashFunction(algorithmName, bytes, toString);
|
||||
}
|
||||
|
||||
private static final long serialVersionUID = 0;
|
||||
}
|
||||
|
||||
Object writeReplace() {
|
||||
return new SerializedForm(prototype.getAlgorithm(), bytes, toString);
|
||||
}
|
||||
|
||||
/**
|
||||
* Hasher that updates a message digest.
|
||||
*/
|
||||
private static final class MessageDigestHasher extends AbstractByteHasher {
|
||||
|
||||
private final MessageDigest digest;
|
||||
private final int bytes;
|
||||
private boolean done;
|
||||
|
||||
private MessageDigestHasher(MessageDigest digest, int bytes) {
|
||||
this.digest = digest;
|
||||
this.bytes = bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void update(byte b) {
|
||||
checkNotDone();
|
||||
digest.update(b);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void update(byte[] b) {
|
||||
checkNotDone();
|
||||
digest.update(b);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void update(byte[] b, int off, int len) {
|
||||
checkNotDone();
|
||||
digest.update(b, off, len);
|
||||
}
|
||||
|
||||
private void checkNotDone() {
|
||||
checkState(!done, "Cannot re-use a Hasher after calling hash() on it");
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashCode hash() {
|
||||
checkNotDone();
|
||||
done = true;
|
||||
return (bytes == digest.getDigestLength()) ? HashCode.fromBytesNoCopy(digest.digest())
|
||||
: HashCode.fromBytesNoCopy(Arrays.copyOf(digest.digest(), bytes));
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,205 @@
|
||||
/*
|
||||
* Copyright (C) 2011 The Guava Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
* in compliance with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
* or implied. See the License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* MurmurHash3 was written by Austin Appleby, and is placed in the public
|
||||
* domain. The author hereby disclaims copyright to this source code.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Source:
|
||||
* http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
|
||||
* (Modified to adapt to Guava coding conventions and to use the HashFunction interface)
|
||||
*/
|
||||
|
||||
package com.google.common.hash;
|
||||
|
||||
import static com.google.common.primitives.UnsignedBytes.toInt;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
/**
|
||||
* See http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp
|
||||
* MurmurHash3_x64_128
|
||||
*
|
||||
* @author Austin Appleby
|
||||
* @author Dimitris Andreou
|
||||
*/
|
||||
final class Murmur3_128HashFunction extends AbstractStreamingHashFunction implements Serializable {
|
||||
// TODO(user): when the shortcuts are implemented, update BloomFilterStrategies
|
||||
private final int seed;
|
||||
|
||||
Murmur3_128HashFunction(int seed) {
|
||||
this.seed = seed;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int bits() {
|
||||
return 128;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher newHasher() {
|
||||
return new Murmur3_128Hasher(seed);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Hashing.murmur3_128(" + seed + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(@Nullable Object object) {
|
||||
if (object instanceof Murmur3_128HashFunction) {
|
||||
Murmur3_128HashFunction other = (Murmur3_128HashFunction) object;
|
||||
return seed == other.seed;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return getClass().hashCode() ^ seed;
|
||||
}
|
||||
|
||||
private static final class Murmur3_128Hasher extends AbstractStreamingHasher {
|
||||
private static final int CHUNK_SIZE = 16;
|
||||
private static final long C1 = 0x87c37b91114253d5L;
|
||||
private static final long C2 = 0x4cf5ad432745937fL;
|
||||
private long h1;
|
||||
private long h2;
|
||||
private int length;
|
||||
|
||||
Murmur3_128Hasher(int seed) {
|
||||
super(CHUNK_SIZE);
|
||||
this.h1 = seed;
|
||||
this.h2 = seed;
|
||||
this.length = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void process(ByteBuffer bb) {
|
||||
long k1 = bb.getLong();
|
||||
long k2 = bb.getLong();
|
||||
bmix64(k1, k2);
|
||||
length += CHUNK_SIZE;
|
||||
}
|
||||
|
||||
private void bmix64(long k1, long k2) {
|
||||
h1 ^= mixK1(k1);
|
||||
|
||||
h1 = Long.rotateLeft(h1, 27);
|
||||
h1 += h2;
|
||||
h1 = h1 * 5 + 0x52dce729;
|
||||
|
||||
h2 ^= mixK2(k2);
|
||||
|
||||
h2 = Long.rotateLeft(h2, 31);
|
||||
h2 += h1;
|
||||
h2 = h2 * 5 + 0x38495ab5;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void processRemaining(ByteBuffer bb) {
|
||||
long k1 = 0;
|
||||
long k2 = 0;
|
||||
length += bb.remaining();
|
||||
switch (bb.remaining()) {
|
||||
case 15:
|
||||
k2 ^= (long) toInt(bb.get(14)) << 48; // fall through
|
||||
case 14:
|
||||
k2 ^= (long) toInt(bb.get(13)) << 40; // fall through
|
||||
case 13:
|
||||
k2 ^= (long) toInt(bb.get(12)) << 32; // fall through
|
||||
case 12:
|
||||
k2 ^= (long) toInt(bb.get(11)) << 24; // fall through
|
||||
case 11:
|
||||
k2 ^= (long) toInt(bb.get(10)) << 16; // fall through
|
||||
case 10:
|
||||
k2 ^= (long) toInt(bb.get(9)) << 8; // fall through
|
||||
case 9:
|
||||
k2 ^= (long) toInt(bb.get(8)); // fall through
|
||||
case 8:
|
||||
k1 ^= bb.getLong();
|
||||
break;
|
||||
case 7:
|
||||
k1 ^= (long) toInt(bb.get(6)) << 48; // fall through
|
||||
case 6:
|
||||
k1 ^= (long) toInt(bb.get(5)) << 40; // fall through
|
||||
case 5:
|
||||
k1 ^= (long) toInt(bb.get(4)) << 32; // fall through
|
||||
case 4:
|
||||
k1 ^= (long) toInt(bb.get(3)) << 24; // fall through
|
||||
case 3:
|
||||
k1 ^= (long) toInt(bb.get(2)) << 16; // fall through
|
||||
case 2:
|
||||
k1 ^= (long) toInt(bb.get(1)) << 8; // fall through
|
||||
case 1:
|
||||
k1 ^= (long) toInt(bb.get(0));
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError("Should never get here.");
|
||||
}
|
||||
h1 ^= mixK1(k1);
|
||||
h2 ^= mixK2(k2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashCode makeHash() {
|
||||
h1 ^= length;
|
||||
h2 ^= length;
|
||||
|
||||
h1 += h2;
|
||||
h2 += h1;
|
||||
|
||||
h1 = fmix64(h1);
|
||||
h2 = fmix64(h2);
|
||||
|
||||
h1 += h2;
|
||||
h2 += h1;
|
||||
|
||||
return HashCode.fromBytesNoCopy(ByteBuffer.wrap(new byte[CHUNK_SIZE]).order(ByteOrder.LITTLE_ENDIAN)
|
||||
.putLong(h1).putLong(h2).array());
|
||||
}
|
||||
|
||||
private static long fmix64(long k) {
|
||||
k ^= k >>> 33;
|
||||
k *= 0xff51afd7ed558ccdL;
|
||||
k ^= k >>> 33;
|
||||
k *= 0xc4ceb9fe1a85ec53L;
|
||||
k ^= k >>> 33;
|
||||
return k;
|
||||
}
|
||||
|
||||
private static long mixK1(long k1) {
|
||||
k1 *= C1;
|
||||
k1 = Long.rotateLeft(k1, 31);
|
||||
k1 *= C2;
|
||||
return k1;
|
||||
}
|
||||
|
||||
private static long mixK2(long k2) {
|
||||
k2 *= C2;
|
||||
k2 = Long.rotateLeft(k2, 33);
|
||||
k2 *= C1;
|
||||
return k2;
|
||||
}
|
||||
}
|
||||
|
||||
private static final long serialVersionUID = 0L;
|
||||
}
|
@ -0,0 +1,190 @@
|
||||
/*
|
||||
* Copyright (C) 2011 The Guava Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
* in compliance with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
* or implied. See the License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* MurmurHash3 was written by Austin Appleby, and is placed in the public
|
||||
* domain. The author hereby disclaims copyright to this source code.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Source:
|
||||
* http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
|
||||
* (Modified to adapt to Guava coding conventions and to use the HashFunction interface)
|
||||
*/
|
||||
|
||||
package com.google.common.hash;
|
||||
|
||||
import static com.google.common.primitives.UnsignedBytes.toInt;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
import com.google.common.primitives.Chars;
|
||||
import com.google.common.primitives.Ints;
|
||||
import com.google.common.primitives.Longs;
|
||||
|
||||
/**
|
||||
* See http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp
|
||||
* MurmurHash3_x86_32
|
||||
*
|
||||
* @author Austin Appleby
|
||||
* @author Dimitris Andreou
|
||||
* @author Kurt Alfred Kluever
|
||||
*/
|
||||
final class Murmur3_32HashFunction extends AbstractStreamingHashFunction implements Serializable {
|
||||
private static final int C1 = 0xcc9e2d51;
|
||||
private static final int C2 = 0x1b873593;
|
||||
|
||||
private final int seed;
|
||||
|
||||
Murmur3_32HashFunction(int seed) {
|
||||
this.seed = seed;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int bits() {
|
||||
return 32;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher newHasher() {
|
||||
return new Murmur3_32Hasher(seed);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Hashing.murmur3_32(" + seed + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(@Nullable Object object) {
|
||||
if (object instanceof Murmur3_32HashFunction) {
|
||||
Murmur3_32HashFunction other = (Murmur3_32HashFunction) object;
|
||||
return seed == other.seed;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return getClass().hashCode() ^ seed;
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashCode hashInt(int input) {
|
||||
int k1 = mixK1(input);
|
||||
int h1 = mixH1(seed, k1);
|
||||
|
||||
return fmix(h1, Ints.BYTES);
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashCode hashLong(long input) {
|
||||
int low = (int) input;
|
||||
int high = (int) (input >>> 32);
|
||||
|
||||
int k1 = mixK1(low);
|
||||
int h1 = mixH1(seed, k1);
|
||||
|
||||
k1 = mixK1(high);
|
||||
h1 = mixH1(h1, k1);
|
||||
|
||||
return fmix(h1, Longs.BYTES);
|
||||
}
|
||||
|
||||
// TODO(user): Maybe implement #hashBytes instead?
|
||||
@Override
|
||||
public HashCode hashUnencodedChars(CharSequence input) {
|
||||
int h1 = seed;
|
||||
|
||||
// step through the CharSequence 2 chars at a time
|
||||
for (int i = 1; i < input.length(); i += 2) {
|
||||
int k1 = input.charAt(i - 1) | (input.charAt(i) << 16);
|
||||
k1 = mixK1(k1);
|
||||
h1 = mixH1(h1, k1);
|
||||
}
|
||||
|
||||
// deal with any remaining characters
|
||||
if ((input.length() & 1) == 1) {
|
||||
int k1 = input.charAt(input.length() - 1);
|
||||
k1 = mixK1(k1);
|
||||
h1 ^= k1;
|
||||
}
|
||||
|
||||
return fmix(h1, Chars.BYTES * input.length());
|
||||
}
|
||||
|
||||
private static int mixK1(int k1) {
|
||||
k1 *= C1;
|
||||
k1 = Integer.rotateLeft(k1, 15);
|
||||
k1 *= C2;
|
||||
return k1;
|
||||
}
|
||||
|
||||
private static int mixH1(int h1, int k1) {
|
||||
h1 ^= k1;
|
||||
h1 = Integer.rotateLeft(h1, 13);
|
||||
h1 = h1 * 5 + 0xe6546b64;
|
||||
return h1;
|
||||
}
|
||||
|
||||
// Finalization mix - force all bits of a hash block to avalanche
|
||||
private static HashCode fmix(int h1, int length) {
|
||||
h1 ^= length;
|
||||
h1 ^= h1 >>> 16;
|
||||
h1 *= 0x85ebca6b;
|
||||
h1 ^= h1 >>> 13;
|
||||
h1 *= 0xc2b2ae35;
|
||||
h1 ^= h1 >>> 16;
|
||||
return HashCode.fromInt(h1);
|
||||
}
|
||||
|
||||
private static final class Murmur3_32Hasher extends AbstractStreamingHasher {
|
||||
private static final int CHUNK_SIZE = 4;
|
||||
private int h1;
|
||||
private int length;
|
||||
|
||||
Murmur3_32Hasher(int seed) {
|
||||
super(CHUNK_SIZE);
|
||||
this.h1 = seed;
|
||||
this.length = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void process(ByteBuffer bb) {
|
||||
int k1 = Murmur3_32HashFunction.mixK1(bb.getInt());
|
||||
h1 = Murmur3_32HashFunction.mixH1(h1, k1);
|
||||
length += CHUNK_SIZE;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void processRemaining(ByteBuffer bb) {
|
||||
length += bb.remaining();
|
||||
int k1 = 0;
|
||||
for (int i = 0; bb.hasRemaining(); i += 8) {
|
||||
k1 ^= toInt(bb.get()) << i;
|
||||
}
|
||||
h1 ^= Murmur3_32HashFunction.mixK1(k1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashCode makeHash() {
|
||||
return Murmur3_32HashFunction.fmix(h1, length);
|
||||
}
|
||||
}
|
||||
|
||||
private static final long serialVersionUID = 0L;
|
||||
}
|
105
sources/main/java/com/google/common/hash/PrimitiveSink.java
Normal file
105
sources/main/java/com/google/common/hash/PrimitiveSink.java
Normal file
@ -0,0 +1,105 @@
|
||||
/*
|
||||
* Copyright (C) 2011 The Guava Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
* in compliance with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
* or implied. See the License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
package com.google.common.hash;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
import com.google.common.annotations.Beta;
|
||||
|
||||
/**
|
||||
* An object which can receive a stream of primitive values.
|
||||
*
|
||||
* @author Kevin Bourrillion
|
||||
* @since 12.0 (in 11.0 as {@code Sink})
|
||||
*/
|
||||
@Beta
|
||||
public interface PrimitiveSink {
|
||||
/**
|
||||
* Puts a byte into this sink.
|
||||
*
|
||||
* @param b a byte
|
||||
* @return this instance
|
||||
*/
|
||||
PrimitiveSink putByte(byte b);
|
||||
|
||||
/**
|
||||
* Puts an array of bytes into this sink.
|
||||
*
|
||||
* @param bytes a byte array
|
||||
* @return this instance
|
||||
*/
|
||||
PrimitiveSink putBytes(byte[] bytes);
|
||||
|
||||
/**
|
||||
* Puts a chunk of an array of bytes into this sink. {@code bytes[off]} is the
|
||||
* first byte written, {@code bytes[off + len - 1]} is the last.
|
||||
*
|
||||
* @param bytes a byte array
|
||||
* @param off the start offset in the array
|
||||
* @param len the number of bytes to write
|
||||
* @return this instance
|
||||
* @throws IndexOutOfBoundsException if {@code off < 0} or
|
||||
* {@code off + len > bytes.length} or
|
||||
* {@code len < 0}
|
||||
*/
|
||||
PrimitiveSink putBytes(byte[] bytes, int off, int len);
|
||||
|
||||
/**
|
||||
* Puts a short into this sink.
|
||||
*/
|
||||
PrimitiveSink putShort(short s);
|
||||
|
||||
/**
|
||||
* Puts an int into this sink.
|
||||
*/
|
||||
PrimitiveSink putInt(int i);
|
||||
|
||||
/**
|
||||
* Puts a long into this sink.
|
||||
*/
|
||||
PrimitiveSink putLong(long l);
|
||||
|
||||
/**
|
||||
* Puts a float into this sink.
|
||||
*/
|
||||
PrimitiveSink putFloat(float f);
|
||||
|
||||
/**
|
||||
* Puts a double into this sink.
|
||||
*/
|
||||
PrimitiveSink putDouble(double d);
|
||||
|
||||
/**
|
||||
* Puts a boolean into this sink.
|
||||
*/
|
||||
PrimitiveSink putBoolean(boolean b);
|
||||
|
||||
/**
|
||||
* Puts a character into this sink.
|
||||
*/
|
||||
PrimitiveSink putChar(char c);
|
||||
|
||||
/**
|
||||
* Puts each 16-bit code unit from the {@link CharSequence} into this sink.
|
||||
*
|
||||
* @since 15.0 (since 11.0 as putString(CharSequence))
|
||||
*/
|
||||
PrimitiveSink putUnencodedChars(CharSequence charSequence);
|
||||
|
||||
/**
|
||||
* Puts a string into this sink using the given charset.
|
||||
*/
|
||||
PrimitiveSink putString(CharSequence charSequence, Charset charset);
|
||||
}
|
182
sources/main/java/com/google/common/hash/SipHashFunction.java
Normal file
182
sources/main/java/com/google/common/hash/SipHashFunction.java
Normal file
@ -0,0 +1,182 @@
|
||||
/*
|
||||
* Copyright (C) 2012 The Guava Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
* in compliance with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
* or implied. See the License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* SipHash-c-d was designed by Jean-Philippe Aumasson and Daniel J. Bernstein and is described in
|
||||
* "SipHash: a fast short-input PRF" (available at https://131002.net/siphash/siphash.pdf).
|
||||
*/
|
||||
|
||||
package com.google.common.hash;
|
||||
|
||||
import static com.google.common.base.Preconditions.checkArgument;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
/**
|
||||
* {@link HashFunction} implementation of SipHash-c-d.
|
||||
*
|
||||
* @author Kurt Alfred Kluever
|
||||
* @author Jean-Philippe Aumasson
|
||||
* @author Daniel J. Bernstein
|
||||
*/
|
||||
final class SipHashFunction extends AbstractStreamingHashFunction implements Serializable {
|
||||
|
||||
// The number of compression rounds.
|
||||
private final int c;
|
||||
// The number of finalization rounds.
|
||||
private final int d;
|
||||
// Two 64-bit keys (represent a single 128-bit key).
|
||||
private final long k0;
|
||||
private final long k1;
|
||||
|
||||
/**
|
||||
* @param c the number of compression rounds (must be positive)
|
||||
* @param d the number of finalization rounds (must be positive)
|
||||
* @param k0 the first half of the key
|
||||
* @param k1 the second half of the key
|
||||
*/
|
||||
SipHashFunction(int c, int d, long k0, long k1) {
|
||||
checkArgument(c > 0, "The number of SipRound iterations (c=%s) during Compression must be positive.", c);
|
||||
checkArgument(d > 0, "The number of SipRound iterations (d=%s) during Finalization must be positive.", d);
|
||||
this.c = c;
|
||||
this.d = d;
|
||||
this.k0 = k0;
|
||||
this.k1 = k1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int bits() {
|
||||
return 64;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hasher newHasher() {
|
||||
return new SipHasher(c, d, k0, k1);
|
||||
}
|
||||
|
||||
// TODO(user): Implement and benchmark the hashFoo() shortcuts.
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Hashing.sipHash" + c + "" + d + "(" + k0 + ", " + k1 + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(@Nullable Object object) {
|
||||
if (object instanceof SipHashFunction) {
|
||||
SipHashFunction other = (SipHashFunction) object;
|
||||
return (c == other.c) && (d == other.d) && (k0 == other.k0) && (k1 == other.k1);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return (int) (getClass().hashCode() ^ c ^ d ^ k0 ^ k1);
|
||||
}
|
||||
|
||||
private static final class SipHasher extends AbstractStreamingHasher {
|
||||
private static final int CHUNK_SIZE = 8;
|
||||
|
||||
// The number of compression rounds.
|
||||
private final int c;
|
||||
// The number of finalization rounds.
|
||||
private final int d;
|
||||
|
||||
// Four 64-bit words of internal state.
|
||||
// The initial state corresponds to the ASCII string
|
||||
// "somepseudorandomlygeneratedbytes",
|
||||
// big-endian encoded. There is nothing special about this value; the only
|
||||
// requirement
|
||||
// was some asymmetry so that the initial v0 and v1 differ from v2 and v3.
|
||||
private long v0 = 0x736f6d6570736575L;
|
||||
private long v1 = 0x646f72616e646f6dL;
|
||||
private long v2 = 0x6c7967656e657261L;
|
||||
private long v3 = 0x7465646279746573L;
|
||||
|
||||
// The number of bytes in the input.
|
||||
private long b = 0;
|
||||
|
||||
// The final 64-bit chunk includes the last 0 through 7 bytes of m followed by
|
||||
// null bytes
|
||||
// and ending with a byte encoding the positive integer b mod 256.
|
||||
private long finalM = 0;
|
||||
|
||||
SipHasher(int c, int d, long k0, long k1) {
|
||||
super(CHUNK_SIZE);
|
||||
this.c = c;
|
||||
this.d = d;
|
||||
this.v0 ^= k0;
|
||||
this.v1 ^= k1;
|
||||
this.v2 ^= k0;
|
||||
this.v3 ^= k1;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void process(ByteBuffer buffer) {
|
||||
b += CHUNK_SIZE;
|
||||
processM(buffer.getLong());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void processRemaining(ByteBuffer buffer) {
|
||||
b += buffer.remaining();
|
||||
for (int i = 0; buffer.hasRemaining(); i += 8) {
|
||||
finalM ^= (buffer.get() & 0xFFL) << i;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashCode makeHash() {
|
||||
// End with a byte encoding the positive integer b mod 256.
|
||||
finalM ^= b << 56;
|
||||
processM(finalM);
|
||||
|
||||
// Finalization
|
||||
v2 ^= 0xFFL;
|
||||
sipRound(d);
|
||||
return HashCode.fromLong(v0 ^ v1 ^ v2 ^ v3);
|
||||
}
|
||||
|
||||
private void processM(long m) {
|
||||
v3 ^= m;
|
||||
sipRound(c);
|
||||
v0 ^= m;
|
||||
}
|
||||
|
||||
private void sipRound(int iterations) {
|
||||
for (int i = 0; i < iterations; i++) {
|
||||
v0 += v1;
|
||||
v2 += v3;
|
||||
v1 = Long.rotateLeft(v1, 13);
|
||||
v3 = Long.rotateLeft(v3, 16);
|
||||
v1 ^= v0;
|
||||
v3 ^= v2;
|
||||
v0 = Long.rotateLeft(v0, 32);
|
||||
v2 += v1;
|
||||
v0 += v3;
|
||||
v1 = Long.rotateLeft(v1, 17);
|
||||
v3 = Long.rotateLeft(v3, 21);
|
||||
v1 ^= v2;
|
||||
v3 ^= v0;
|
||||
v2 = Long.rotateLeft(v2, 32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static final long serialVersionUID = 0L;
|
||||
}
|
27
sources/main/java/com/google/common/hash/package-info.java
Normal file
27
sources/main/java/com/google/common/hash/package-info.java
Normal file
@ -0,0 +1,27 @@
|
||||
/*
|
||||
* Copyright (C) 2011 The Guava Authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
* in compliance with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
* or implied. See the License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
// TODO(user): when things stabilize, flesh this out
|
||||
/**
|
||||
* Hash functions and related structures.
|
||||
*
|
||||
* <p>
|
||||
* See the Guava User Guide article on
|
||||
* <a href= "http://code.google.com/p/guava-libraries/wiki/HashingExplained">
|
||||
* hashing</a>.
|
||||
*/
|
||||
@ParametersAreNonnullByDefault
|
||||
package com.google.common.hash;
|
||||
|
||||
import javax.annotation.ParametersAreNonnullByDefault;
|
Reference in New Issue
Block a user