Update #0 - First Release

This commit is contained in:
LAX1DUDE
2022-12-25 01:12:28 -08:00
commit e7179fad45
2154 changed files with 256324 additions and 0 deletions

View File

@ -0,0 +1,124 @@
/*
* Copyright (C) 2012 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.common.hash;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkPositionIndexes;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import com.google.common.primitives.Chars;
import com.google.common.primitives.Ints;
import com.google.common.primitives.Longs;
import com.google.common.primitives.Shorts;
/**
* Abstract {@link Hasher} that handles converting primitives to bytes using a
* scratch {@code
* ByteBuffer} and streams all bytes to a sink to compute the hash.
*
* @author Colin Decker
*/
abstract class AbstractByteHasher extends AbstractHasher {
private final ByteBuffer scratch = ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN);
/**
* Updates this hasher with the given byte.
*/
protected abstract void update(byte b);
/**
* Updates this hasher with the given bytes.
*/
protected void update(byte[] b) {
update(b, 0, b.length);
}
/**
* Updates this hasher with {@code len} bytes starting at {@code off} in the
* given buffer.
*/
protected void update(byte[] b, int off, int len) {
for (int i = off; i < off + len; i++) {
update(b[i]);
}
}
@Override
public Hasher putByte(byte b) {
update(b);
return this;
}
@Override
public Hasher putBytes(byte[] bytes) {
checkNotNull(bytes);
update(bytes);
return this;
}
@Override
public Hasher putBytes(byte[] bytes, int off, int len) {
checkPositionIndexes(off, off + len, bytes.length);
update(bytes, off, len);
return this;
}
/**
* Updates the sink with the given number of bytes from the buffer.
*/
private Hasher update(int bytes) {
try {
update(scratch.array(), 0, bytes);
} finally {
scratch.clear();
}
return this;
}
@Override
public Hasher putShort(short s) {
scratch.putShort(s);
return update(Shorts.BYTES);
}
@Override
public Hasher putInt(int i) {
scratch.putInt(i);
return update(Ints.BYTES);
}
@Override
public Hasher putLong(long l) {
scratch.putLong(l);
return update(Longs.BYTES);
}
@Override
public Hasher putChar(char c) {
scratch.putChar(c);
return update(Chars.BYTES);
}
@Override
public <T> Hasher putObject(T instance, Funnel<? super T> funnel) {
funnel.funnel(instance, this);
return this;
}
}

View File

@ -0,0 +1,170 @@
/*
* Copyright (C) 2011 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.common.hash;
import static com.google.common.base.Preconditions.checkNotNull;
import java.nio.charset.Charset;
/**
* An abstract composition of multiple hash functions. {@linkplain #newHasher()}
* delegates to the {@code Hasher} objects of the delegate hash functions, and
* in the end, they are used by {@linkplain #makeHash(Hasher[])} that constructs
* the final {@code HashCode}.
*
* @author Dimitris Andreou
*/
abstract class AbstractCompositeHashFunction extends AbstractStreamingHashFunction {
final HashFunction[] functions;
AbstractCompositeHashFunction(HashFunction... functions) {
for (HashFunction function : functions) {
checkNotNull(function);
}
this.functions = functions;
}
/**
* Constructs a {@code HashCode} from the {@code Hasher} objects of the
* functions. Each of them has consumed the entire input and they are ready to
* output a {@code HashCode}. The order of the hashers are the same order as the
* functions given to the constructor.
*/
// this could be cleaner if it passed HashCode[], but that would create yet
// another array...
/* protected */ abstract HashCode makeHash(Hasher[] hashers);
@Override
public Hasher newHasher() {
final Hasher[] hashers = new Hasher[functions.length];
for (int i = 0; i < hashers.length; i++) {
hashers[i] = functions[i].newHasher();
}
return new Hasher() {
@Override
public Hasher putByte(byte b) {
for (Hasher hasher : hashers) {
hasher.putByte(b);
}
return this;
}
@Override
public Hasher putBytes(byte[] bytes) {
for (Hasher hasher : hashers) {
hasher.putBytes(bytes);
}
return this;
}
@Override
public Hasher putBytes(byte[] bytes, int off, int len) {
for (Hasher hasher : hashers) {
hasher.putBytes(bytes, off, len);
}
return this;
}
@Override
public Hasher putShort(short s) {
for (Hasher hasher : hashers) {
hasher.putShort(s);
}
return this;
}
@Override
public Hasher putInt(int i) {
for (Hasher hasher : hashers) {
hasher.putInt(i);
}
return this;
}
@Override
public Hasher putLong(long l) {
for (Hasher hasher : hashers) {
hasher.putLong(l);
}
return this;
}
@Override
public Hasher putFloat(float f) {
for (Hasher hasher : hashers) {
hasher.putFloat(f);
}
return this;
}
@Override
public Hasher putDouble(double d) {
for (Hasher hasher : hashers) {
hasher.putDouble(d);
}
return this;
}
@Override
public Hasher putBoolean(boolean b) {
for (Hasher hasher : hashers) {
hasher.putBoolean(b);
}
return this;
}
@Override
public Hasher putChar(char c) {
for (Hasher hasher : hashers) {
hasher.putChar(c);
}
return this;
}
@Override
public Hasher putUnencodedChars(CharSequence chars) {
for (Hasher hasher : hashers) {
hasher.putUnencodedChars(chars);
}
return this;
}
@Override
public Hasher putString(CharSequence chars, Charset charset) {
for (Hasher hasher : hashers) {
hasher.putString(chars, charset);
}
return this;
}
@Override
public <T> Hasher putObject(T instance, Funnel<? super T> funnel) {
for (Hasher hasher : hashers) {
hasher.putObject(instance, funnel);
}
return this;
}
@Override
public HashCode hash() {
return makeHash(hashers);
}
};
}
private static final long serialVersionUID = 0L;
}

View File

@ -0,0 +1,55 @@
/*
* Copyright (C) 2011 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.common.hash;
import java.nio.charset.Charset;
/**
* An abstract hasher, implementing {@link #putBoolean(boolean)},
* {@link #putDouble(double)}, {@link #putFloat(float)},
* {@link #putUnencodedChars(CharSequence)}, and
* {@link #putString(CharSequence, Charset)} as prescribed by {@link Hasher}.
*
* @author Dimitris Andreou
*/
abstract class AbstractHasher implements Hasher {
@Override
public final Hasher putBoolean(boolean b) {
return putByte(b ? (byte) 1 : (byte) 0);
}
@Override
public final Hasher putDouble(double d) {
return putLong(Double.doubleToRawLongBits(d));
}
@Override
public final Hasher putFloat(float f) {
return putInt(Float.floatToRawIntBits(f));
}
@Override
public Hasher putUnencodedChars(CharSequence charSequence) {
for (int i = 0, len = charSequence.length(); i < len; i++) {
putChar(charSequence.charAt(i));
}
return this;
}
@Override
public Hasher putString(CharSequence charSequence, Charset charset) {
return putBytes(charSequence.toString().getBytes(charset));
}
}

View File

@ -0,0 +1,170 @@
/*
* Copyright (C) 2011 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.common.hash;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import com.google.common.base.Preconditions;
/**
* Skeleton implementation of {@link HashFunction}, appropriate for
* non-streaming algorithms. All the hash computation done using
* {@linkplain #newHasher()} are delegated to the
* {@linkplain #hashBytes(byte[], int, int)} method.
*
* @author Dimitris Andreou
*/
abstract class AbstractNonStreamingHashFunction implements HashFunction {
@Override
public Hasher newHasher() {
return new BufferingHasher(32);
}
@Override
public Hasher newHasher(int expectedInputSize) {
Preconditions.checkArgument(expectedInputSize >= 0);
return new BufferingHasher(expectedInputSize);
}
@Override
public <T> HashCode hashObject(T instance, Funnel<? super T> funnel) {
return newHasher().putObject(instance, funnel).hash();
}
@Override
public HashCode hashUnencodedChars(CharSequence input) {
int len = input.length();
Hasher hasher = newHasher(len * 2);
for (int i = 0; i < len; i++) {
hasher.putChar(input.charAt(i));
}
return hasher.hash();
}
@Override
public HashCode hashString(CharSequence input, Charset charset) {
return hashBytes(input.toString().getBytes(charset));
}
@Override
public HashCode hashInt(int input) {
return newHasher(4).putInt(input).hash();
}
@Override
public HashCode hashLong(long input) {
return newHasher(8).putLong(input).hash();
}
@Override
public HashCode hashBytes(byte[] input) {
return hashBytes(input, 0, input.length);
}
/**
* In-memory stream-based implementation of Hasher.
*/
private final class BufferingHasher extends AbstractHasher {
final ExposedByteArrayOutputStream stream;
static final int BOTTOM_BYTE = 0xFF;
BufferingHasher(int expectedInputSize) {
this.stream = new ExposedByteArrayOutputStream(expectedInputSize);
}
@Override
public Hasher putByte(byte b) {
stream.write(b);
return this;
}
@Override
public Hasher putBytes(byte[] bytes) {
try {
stream.write(bytes);
} catch (IOException e) {
throw new RuntimeException(e);
}
return this;
}
@Override
public Hasher putBytes(byte[] bytes, int off, int len) {
stream.write(bytes, off, len);
return this;
}
@Override
public Hasher putShort(short s) {
stream.write(s & BOTTOM_BYTE);
stream.write((s >>> 8) & BOTTOM_BYTE);
return this;
}
@Override
public Hasher putInt(int i) {
stream.write(i & BOTTOM_BYTE);
stream.write((i >>> 8) & BOTTOM_BYTE);
stream.write((i >>> 16) & BOTTOM_BYTE);
stream.write((i >>> 24) & BOTTOM_BYTE);
return this;
}
@Override
public Hasher putLong(long l) {
for (int i = 0; i < 64; i += 8) {
stream.write((byte) ((l >>> i) & BOTTOM_BYTE));
}
return this;
}
@Override
public Hasher putChar(char c) {
stream.write(c & BOTTOM_BYTE);
stream.write((c >>> 8) & BOTTOM_BYTE);
return this;
}
@Override
public <T> Hasher putObject(T instance, Funnel<? super T> funnel) {
funnel.funnel(instance, this);
return this;
}
@Override
public HashCode hash() {
return hashBytes(stream.byteArray(), 0, stream.length());
}
}
// Just to access the byte[] without introducing an unnecessary copy
private static final class ExposedByteArrayOutputStream extends ByteArrayOutputStream {
ExposedByteArrayOutputStream(int expectedInputSize) {
super(expectedInputSize);
}
byte[] byteArray() {
return buf;
}
int length() {
return count;
}
}
}

View File

@ -0,0 +1,270 @@
/*
* Copyright (C) 2011 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.common.hash;
import static com.google.common.base.Preconditions.checkArgument;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.charset.Charset;
import com.google.common.base.Preconditions;
/**
* Skeleton implementation of {@link HashFunction}. Provides default
* implementations which invokes the appropriate method on {@link #newHasher()},
* then return the result of {@link Hasher#hash}.
*
* <p>
* Invocations of {@link #newHasher(int)} also delegate to
* {@linkplain #newHasher()}, ignoring the expected input size parameter.
*
* @author Kevin Bourrillion
*/
abstract class AbstractStreamingHashFunction implements HashFunction {
@Override
public <T> HashCode hashObject(T instance, Funnel<? super T> funnel) {
return newHasher().putObject(instance, funnel).hash();
}
@Override
public HashCode hashUnencodedChars(CharSequence input) {
return newHasher().putUnencodedChars(input).hash();
}
@Override
public HashCode hashString(CharSequence input, Charset charset) {
return newHasher().putString(input, charset).hash();
}
@Override
public HashCode hashInt(int input) {
return newHasher().putInt(input).hash();
}
@Override
public HashCode hashLong(long input) {
return newHasher().putLong(input).hash();
}
@Override
public HashCode hashBytes(byte[] input) {
return newHasher().putBytes(input).hash();
}
@Override
public HashCode hashBytes(byte[] input, int off, int len) {
return newHasher().putBytes(input, off, len).hash();
}
@Override
public Hasher newHasher(int expectedInputSize) {
Preconditions.checkArgument(expectedInputSize >= 0);
return newHasher();
}
/**
* A convenience base class for implementors of {@code Hasher}; handles
* accumulating data until an entire "chunk" (of implementation-dependent
* length) is ready to be hashed.
*
* @author Kevin Bourrillion
* @author Dimitris Andreou
*/
// TODO(kevinb): this class still needs some design-and-document-for-inheritance
// love
protected static abstract class AbstractStreamingHasher extends AbstractHasher {
/** Buffer via which we pass data to the hash algorithm (the implementor) */
private final ByteBuffer buffer;
/** Number of bytes to be filled before process() invocation(s). */
private final int bufferSize;
/** Number of bytes processed per process() invocation. */
private final int chunkSize;
/**
* Constructor for use by subclasses. This hasher instance will process chunks
* of the specified size.
*
* @param chunkSize the number of bytes available per
* {@link #process(ByteBuffer)} invocation; must be at least 4
*/
protected AbstractStreamingHasher(int chunkSize) {
this(chunkSize, chunkSize);
}
/**
* Constructor for use by subclasses. This hasher instance will process chunks
* of the specified size, using an internal buffer of {@code bufferSize} size,
* which must be a multiple of {@code chunkSize}.
*
* @param chunkSize the number of bytes available per
* {@link #process(ByteBuffer)} invocation; must be at least 4
* @param bufferSize the size of the internal buffer. Must be a multiple of
* chunkSize
*/
protected AbstractStreamingHasher(int chunkSize, int bufferSize) {
// TODO(kevinb): check more preconditions (as bufferSize >= chunkSize) if this
// is ever public
checkArgument(bufferSize % chunkSize == 0);
// TODO(user): benchmark performance difference with longer buffer
this.buffer = ByteBuffer.allocate(bufferSize + 7) // always space for a single primitive
.order(ByteOrder.LITTLE_ENDIAN);
this.bufferSize = bufferSize;
this.chunkSize = chunkSize;
}
/**
* Processes the available bytes of the buffer (at most {@code chunk} bytes).
*/
protected abstract void process(ByteBuffer bb);
/**
* This is invoked for the last bytes of the input, which are not enough to fill
* a whole chunk. The passed {@code ByteBuffer} is guaranteed to be non-empty.
*
* <p>
* This implementation simply pads with zeros and delegates to
* {@link #process(ByteBuffer)}.
*/
protected void processRemaining(ByteBuffer bb) {
bb.position(bb.limit()); // move at the end
bb.limit(chunkSize + 7); // get ready to pad with longs
while (bb.position() < chunkSize) {
bb.putLong(0);
}
bb.limit(chunkSize);
bb.flip();
process(bb);
}
@Override
public final Hasher putBytes(byte[] bytes) {
return putBytes(bytes, 0, bytes.length);
}
@Override
public final Hasher putBytes(byte[] bytes, int off, int len) {
return putBytes(ByteBuffer.wrap(bytes, off, len).order(ByteOrder.LITTLE_ENDIAN));
}
private Hasher putBytes(ByteBuffer readBuffer) {
// If we have room for all of it, this is easy
if (readBuffer.remaining() <= buffer.remaining()) {
buffer.put(readBuffer);
munchIfFull();
return this;
}
// First add just enough to fill buffer size, and munch that
int bytesToCopy = bufferSize - buffer.position();
for (int i = 0; i < bytesToCopy; i++) {
buffer.put(readBuffer.get());
}
munch(); // buffer becomes empty here, since chunkSize divides bufferSize
// Now process directly from the rest of the input buffer
while (readBuffer.remaining() >= chunkSize) {
process(readBuffer);
}
// Finally stick the remainder back in our usual buffer
buffer.put(readBuffer);
return this;
}
@Override
public final Hasher putUnencodedChars(CharSequence charSequence) {
for (int i = 0; i < charSequence.length(); i++) {
putChar(charSequence.charAt(i));
}
return this;
}
@Override
public final Hasher putByte(byte b) {
buffer.put(b);
munchIfFull();
return this;
}
@Override
public final Hasher putShort(short s) {
buffer.putShort(s);
munchIfFull();
return this;
}
@Override
public final Hasher putChar(char c) {
buffer.putChar(c);
munchIfFull();
return this;
}
@Override
public final Hasher putInt(int i) {
buffer.putInt(i);
munchIfFull();
return this;
}
@Override
public final Hasher putLong(long l) {
buffer.putLong(l);
munchIfFull();
return this;
}
@Override
public final <T> Hasher putObject(T instance, Funnel<? super T> funnel) {
funnel.funnel(instance, this);
return this;
}
@Override
public final HashCode hash() {
munch();
buffer.flip();
if (buffer.remaining() > 0) {
processRemaining(buffer);
}
return makeHash();
}
abstract HashCode makeHash();
// Process pent-up data in chunks
private void munchIfFull() {
if (buffer.remaining() < 8) {
// buffer is full; not enough room for a primitive. We have at least one full
// chunk.
munch();
}
}
private void munch() {
buffer.flip();
while (buffer.remaining() >= chunkSize) {
// we could limit the buffer to ensure process() does not read more than
// chunkSize number of bytes, but we trust the implementations
process(buffer);
}
buffer.compact(); // preserve any remaining data that do not make a full chunk
}
}
}

View File

@ -0,0 +1,420 @@
/*
* Copyright (C) 2011 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.common.hash;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import java.io.Serializable;
import javax.annotation.Nullable;
import com.google.common.annotations.Beta;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Objects;
import com.google.common.base.Predicate;
import com.google.common.hash.BloomFilterStrategies.BitArray;
/**
* A Bloom filter for instances of {@code T}. A Bloom filter offers an
* approximate containment test with one-sided error: if it claims that an
* element is contained in it, this might be in error, but if it claims that an
* element is <i>not</i> contained in it, then this is definitely true.
*
* <p>
* If you are unfamiliar with Bloom filters, this nice
* <a href="http://llimllib.github.com/bloomfilter-tutorial/">tutorial</a> may
* help you understand how they work.
*
* <p>
* The false positive probability ({@code FPP}) of a bloom filter is defined as
* the probability that {@linkplain #mightContain(Object)} will erroneously
* return {@code true} for an object that has not actually been put in the
* {@code BloomFilter}.
*
* <p>
* Bloom filters are serializable. However, serial forms generated by newer
* versions of the code may not be readable by older versions of the code (e.g.,
* a serialized bloom filter generated today may <i>not</i> be readable by a
* binary that was compiled 6 months ago).
*
* @param <T> the type of instances that the {@code BloomFilter} accepts
* @author Dimitris Andreou
* @author Kevin Bourrillion
* @since 11.0
*/
@Beta
public final class BloomFilter<T> implements Predicate<T>, Serializable {
/**
* A strategy to translate T instances, to {@code numHashFunctions} bit indexes.
*
* <p>
* Implementations should be collections of pure functions (i.e. stateless).
*/
interface Strategy extends java.io.Serializable {
/**
* Sets {@code numHashFunctions} bits of the given bit array, by hashing a user
* element.
*
* <p>
* Returns whether any bits changed as a result of this operation.
*/
<T> boolean put(T object, Funnel<? super T> funnel, int numHashFunctions, BitArray bits);
/**
* Queries {@code numHashFunctions} bits of the given bit array, by hashing a
* user element; returns {@code true} if and only if all selected bits are set.
*/
<T> boolean mightContain(T object, Funnel<? super T> funnel, int numHashFunctions, BitArray bits);
/**
* Identifier used to encode this strategy, when marshalled as part of a
* BloomFilter. Only values in the [-128, 127] range are valid for the compact
* serial form. Non-negative values are reserved for enums defined in
* BloomFilterStrategies; negative values are reserved for any custom, stateful
* strategy we may define (e.g. any kind of strategy that would depend on user
* input).
*/
int ordinal();
}
/** The bit set of the BloomFilter (not necessarily power of 2!) */
private final BitArray bits;
/** Number of hashes per element */
private final int numHashFunctions;
/** The funnel to translate Ts to bytes */
private final Funnel<T> funnel;
/**
* The strategy we employ to map an element T to {@code numHashFunctions} bit
* indexes.
*/
private final Strategy strategy;
/**
* Creates a BloomFilter.
*/
private BloomFilter(BitArray bits, int numHashFunctions, Funnel<T> funnel, Strategy strategy) {
checkArgument(numHashFunctions > 0, "numHashFunctions (%s) must be > 0", numHashFunctions);
checkArgument(numHashFunctions <= 255, "numHashFunctions (%s) must be <= 255", numHashFunctions);
this.bits = checkNotNull(bits);
this.numHashFunctions = numHashFunctions;
this.funnel = checkNotNull(funnel);
this.strategy = checkNotNull(strategy);
}
/**
* Creates a new {@code BloomFilter} that's a copy of this instance. The new
* instance is equal to this instance but shares no mutable state.
*
* @since 12.0
*/
public BloomFilter<T> copy() {
return new BloomFilter<T>(bits.copy(), numHashFunctions, funnel, strategy);
}
/**
* Returns {@code true} if the element <i>might</i> have been put in this Bloom
* filter, {@code false} if this is <i>definitely</i> not the case.
*/
public boolean mightContain(T object) {
return strategy.mightContain(object, funnel, numHashFunctions, bits);
}
/**
* @deprecated Provided only to satisfy the {@link Predicate} interface; use
* {@link #mightContain} instead.
*/
@Deprecated
@Override
public boolean apply(T input) {
return mightContain(input);
}
/**
* Puts an element into this {@code BloomFilter}. Ensures that subsequent
* invocations of {@link #mightContain(Object)} with the same element will
* always return {@code true}.
*
* @return true if the bloom filter's bits changed as a result of this
* operation. If the bits changed, this is <i>definitely</i> the first
* time {@code object} has been added to the filter. If the bits haven't
* changed, this <i>might</i> be the first time {@code object} has been
* added to the filter. Note that {@code put(t)} always returns the
* <i>opposite</i> result to what {@code mightContain(t)} would have
* returned at the time it is called."
* @since 12.0 (present in 11.0 with {@code void} return type})
*/
public boolean put(T object) {
return strategy.put(object, funnel, numHashFunctions, bits);
}
/**
* Returns the probability that {@linkplain #mightContain(Object)} will
* erroneously return {@code true} for an object that has not actually been put
* in the {@code BloomFilter}.
*
* <p>
* Ideally, this number should be close to the {@code fpp} parameter passed in
* {@linkplain #create(Funnel, int, double)}, or smaller. If it is significantly
* higher, it is usually the case that too many elements (more than expected)
* have been put in the {@code BloomFilter}, degenerating it.
*
* @since 14.0 (since 11.0 as expectedFalsePositiveProbability())
*/
public double expectedFpp() {
// You down with FPP? (Yeah you know me!) Who's down with FPP? (Every last
// homie!)
return Math.pow((double) bits.bitCount() / bitSize(), numHashFunctions);
}
/**
* Returns the number of bits in the underlying bit array.
*/
@VisibleForTesting
long bitSize() {
return bits.bitSize();
}
/**
* Determines whether a given bloom filter is compatible with this bloom filter.
* For two bloom filters to be compatible, they must:
*
* <ul>
* <li>not be the same instance
* <li>have the same number of hash functions
* <li>have the same bit size
* <li>have the same strategy
* <li>have equal funnels
* <ul>
*
* @param that The bloom filter to check for compatibility.
* @since 15.0
*/
public boolean isCompatible(BloomFilter<T> that) {
checkNotNull(that);
return (this != that) && (this.numHashFunctions == that.numHashFunctions) && (this.bitSize() == that.bitSize())
&& (this.strategy.equals(that.strategy)) && (this.funnel.equals(that.funnel));
}
/**
* Combines this bloom filter with another bloom filter by performing a bitwise
* OR of the underlying data. The mutations happen to <b>this</b> instance.
* Callers must ensure the bloom filters are appropriately sized to avoid
* saturating them.
*
* @param that The bloom filter to combine this bloom filter with. It is not
* mutated.
* @throws IllegalArgumentException if {@code isCompatible(that) == false}
*
* @since 15.0
*/
public void putAll(BloomFilter<T> that) {
checkNotNull(that);
checkArgument(this != that, "Cannot combine a BloomFilter with itself.");
checkArgument(this.numHashFunctions == that.numHashFunctions,
"BloomFilters must have the same number of hash functions (%s != %s)", this.numHashFunctions,
that.numHashFunctions);
checkArgument(this.bitSize() == that.bitSize(),
"BloomFilters must have the same size underlying bit arrays (%s != %s)", this.bitSize(),
that.bitSize());
checkArgument(this.strategy.equals(that.strategy), "BloomFilters must have equal strategies (%s != %s)",
this.strategy, that.strategy);
checkArgument(this.funnel.equals(that.funnel), "BloomFilters must have equal funnels (%s != %s)", this.funnel,
that.funnel);
this.bits.putAll(that.bits);
}
@Override
public boolean equals(@Nullable Object object) {
if (object == this) {
return true;
}
if (object instanceof BloomFilter) {
BloomFilter<?> that = (BloomFilter<?>) object;
return this.numHashFunctions == that.numHashFunctions && this.funnel.equals(that.funnel)
&& this.bits.equals(that.bits) && this.strategy.equals(that.strategy);
}
return false;
}
@Override
public int hashCode() {
return Objects.hashCode(numHashFunctions, funnel, strategy, bits);
}
private static final Strategy DEFAULT_STRATEGY = getDefaultStrategyFromSystemProperty();
@VisibleForTesting
static final String USE_MITZ32_PROPERTY = "com.google.common.hash.BloomFilter.useMitz32";
@VisibleForTesting
static Strategy getDefaultStrategyFromSystemProperty() {
return Boolean.parseBoolean(System.getProperty(USE_MITZ32_PROPERTY)) ? BloomFilterStrategies.MURMUR128_MITZ_32
: BloomFilterStrategies.MURMUR128_MITZ_64;
}
/**
* Creates a {@link BloomFilter BloomFilter<T>} with the expected number of
* insertions and expected false positive probability.
*
* <p>
* Note that overflowing a {@code BloomFilter} with significantly more elements
* than specified, will result in its saturation, and a sharp deterioration of
* its false positive probability.
*
* <p>
* The constructed {@code BloomFilter<T>} will be serializable if the provided
* {@code Funnel<T>} is.
*
* <p>
* It is recommended that the funnel be implemented as a Java enum. This has the
* benefit of ensuring proper serialization and deserialization, which is
* important since {@link #equals} also relies on object identity of funnels.
*
* @param funnel the funnel of T's that the constructed
* {@code BloomFilter<T>} will use
* @param expectedInsertions the number of expected insertions to the
* constructed {@code BloomFilter<T>}; must be
* positive
* @param fpp the desired false positive probability (must be
* positive and less than 1.0)
* @return a {@code BloomFilter}
*/
public static <T> BloomFilter<T> create(Funnel<T> funnel, int expectedInsertions /* n */, double fpp) {
return create(funnel, expectedInsertions, fpp, DEFAULT_STRATEGY);
}
@VisibleForTesting
static <T> BloomFilter<T> create(Funnel<T> funnel, int expectedInsertions /* n */, double fpp, Strategy strategy) {
checkNotNull(funnel);
checkArgument(expectedInsertions >= 0, "Expected insertions (%s) must be >= 0", expectedInsertions);
checkArgument(fpp > 0.0, "False positive probability (%s) must be > 0.0", fpp);
checkArgument(fpp < 1.0, "False positive probability (%s) must be < 1.0", fpp);
checkNotNull(strategy);
if (expectedInsertions == 0) {
expectedInsertions = 1;
}
/*
* TODO(user): Put a warning in the javadoc about tiny fpp values, since the
* resulting size is proportional to -log(p), but there is not much of a point
* after all, e.g. optimalM(1000, 0.0000000000000001) = 76680 which is less than
* 10kb. Who cares!
*/
long numBits = optimalNumOfBits(expectedInsertions, fpp);
int numHashFunctions = optimalNumOfHashFunctions(expectedInsertions, numBits);
try {
return new BloomFilter<T>(new BitArray(numBits), numHashFunctions, funnel, strategy);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException("Could not create BloomFilter of " + numBits + " bits", e);
}
}
/**
* Creates a {@link BloomFilter BloomFilter<T>} with the expected number of
* insertions and a default expected false positive probability of 3%.
*
* <p>
* Note that overflowing a {@code BloomFilter} with significantly more elements
* than specified, will result in its saturation, and a sharp deterioration of
* its false positive probability.
*
* <p>
* The constructed {@code BloomFilter<T>} will be serializable if the provided
* {@code Funnel<T>} is.
*
* @param funnel the funnel of T's that the constructed
* {@code BloomFilter<T>} will use
* @param expectedInsertions the number of expected insertions to the
* constructed {@code BloomFilter<T>}; must be
* positive
* @return a {@code BloomFilter}
*/
public static <T> BloomFilter<T> create(Funnel<T> funnel, int expectedInsertions /* n */) {
return create(funnel, expectedInsertions, 0.03); // FYI, for 3%, we always get 5 hash functions
}
/*
* Cheat sheet:
*
* m: total bits n: expected insertions b: m/n, bits per insertion p: expected
* false positive probability
*
* 1) Optimal k = b * ln2 2) p = (1 - e ^ (-kn/m))^k 3) For optimal k: p = 2 ^
* (-k) ~= 0.6185^b 4) For optimal k: m = -nlnp / ((ln2) ^ 2)
*/
/**
* Computes the optimal k (number of hashes per element inserted in Bloom
* filter), given the expected insertions and total number of bits in the Bloom
* filter.
*
* See http://en.wikipedia.org/wiki/File:Bloom_filter_fp_probability.svg for the
* formula.
*
* @param n expected insertions (must be positive)
* @param m total number of bits in Bloom filter (must be positive)
*/
@VisibleForTesting
static int optimalNumOfHashFunctions(long n, long m) {
return Math.max(1, (int) Math.round(m / n * Math.log(2)));
}
/**
* Computes m (total bits of Bloom filter) which is expected to achieve, for the
* specified expected insertions, the required false positive probability.
*
* See http://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives
* for the formula.
*
* @param n expected insertions (must be positive)
* @param p false positive rate (must be 0 < p < 1)
*/
@VisibleForTesting
static long optimalNumOfBits(long n, double p) {
if (p == 0) {
p = Double.MIN_VALUE;
}
return (long) (-n * Math.log(p) / (Math.log(2) * Math.log(2)));
}
private Object writeReplace() {
return new SerialForm<T>(this);
}
private static class SerialForm<T> implements Serializable {
final long[] data;
final int numHashFunctions;
final Funnel<T> funnel;
final Strategy strategy;
SerialForm(BloomFilter<T> bf) {
this.data = bf.bits.data;
this.numHashFunctions = bf.numHashFunctions;
this.funnel = bf.funnel;
this.strategy = bf.strategy;
}
Object readResolve() {
return new BloomFilter<T>(new BitArray(data), numHashFunctions, funnel, strategy);
}
private static final long serialVersionUID = 1;
}
}

View File

@ -0,0 +1,213 @@
/*
* Copyright (C) 2011 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.common.hash;
import static com.google.common.base.Preconditions.checkArgument;
import java.math.RoundingMode;
import java.util.Arrays;
import com.google.common.math.LongMath;
import com.google.common.primitives.Ints;
import com.google.common.primitives.Longs;
/**
* Collections of strategies of generating the k * log(M) bits required for an
* element to be mapped to a BloomFilter of M bits and k hash functions. These
* strategies are part of the serialized form of the Bloom filters that use
* them, thus they must be preserved as is (no updates allowed, only
* introduction of new versions).
*
* Important: the order of the constants cannot change, and they cannot be
* deleted - we depend on their ordinal for BloomFilter serialization.
*
* @author Dimitris Andreou
* @author Kurt Alfred Kluever
*/
enum BloomFilterStrategies implements BloomFilter.Strategy {
/**
* See "Less Hashing, Same Performance: Building a Better Bloom Filter" by Adam
* Kirsch and Michael Mitzenmacher. The paper argues that this trick doesn't
* significantly deteriorate the performance of a Bloom filter (yet only needs
* two 32bit hash functions).
*/
MURMUR128_MITZ_32() {
@Override
public <T> boolean put(T object, Funnel<? super T> funnel, int numHashFunctions, BitArray bits) {
long bitSize = bits.bitSize();
long hash64 = Hashing.murmur3_128().hashObject(object, funnel).asLong();
int hash1 = (int) hash64;
int hash2 = (int) (hash64 >>> 32);
boolean bitsChanged = false;
for (int i = 1; i <= numHashFunctions; i++) {
int combinedHash = hash1 + (i * hash2);
// Flip all the bits if it's negative (guaranteed positive number)
if (combinedHash < 0) {
combinedHash = ~combinedHash;
}
bitsChanged |= bits.set(combinedHash % bitSize);
}
return bitsChanged;
}
@Override
public <T> boolean mightContain(T object, Funnel<? super T> funnel, int numHashFunctions, BitArray bits) {
long bitSize = bits.bitSize();
long hash64 = Hashing.murmur3_128().hashObject(object, funnel).asLong();
int hash1 = (int) hash64;
int hash2 = (int) (hash64 >>> 32);
for (int i = 1; i <= numHashFunctions; i++) {
int combinedHash = hash1 + (i * hash2);
// Flip all the bits if it's negative (guaranteed positive number)
if (combinedHash < 0) {
combinedHash = ~combinedHash;
}
if (!bits.get(combinedHash % bitSize)) {
return false;
}
}
return true;
}
},
/**
* This strategy uses all 128 bits of {@link Hashing#murmur3_128} when hashing.
* It looks different than the implementation in MURMUR128_MITZ_32 because we're
* avoiding the multiplication in the loop and doing a (much simpler) += hash2.
* We're also changing the index to a positive number by AND'ing with
* Long.MAX_VALUE instead of flipping the bits.
*/
MURMUR128_MITZ_64() {
@Override
public <T> boolean put(T object, Funnel<? super T> funnel, int numHashFunctions, BitArray bits) {
long bitSize = bits.bitSize();
byte[] bytes = Hashing.murmur3_128().hashObject(object, funnel).getBytesInternal();
long hash1 = lowerEight(bytes);
long hash2 = upperEight(bytes);
boolean bitsChanged = false;
long combinedHash = hash1;
for (int i = 0; i < numHashFunctions; i++) {
// Make the combined hash positive and indexable
bitsChanged |= bits.set((combinedHash & Long.MAX_VALUE) % bitSize);
combinedHash += hash2;
}
return bitsChanged;
}
@Override
public <T> boolean mightContain(T object, Funnel<? super T> funnel, int numHashFunctions, BitArray bits) {
long bitSize = bits.bitSize();
byte[] bytes = Hashing.murmur3_128().hashObject(object, funnel).getBytesInternal();
long hash1 = lowerEight(bytes);
long hash2 = upperEight(bytes);
long combinedHash = hash1;
for (int i = 0; i < numHashFunctions; i++) {
// Make the combined hash positive and indexable
if (!bits.get((combinedHash & Long.MAX_VALUE) % bitSize)) {
return false;
}
combinedHash += hash2;
}
return true;
}
private /* static */ long lowerEight(byte[] bytes) {
return Longs.fromBytes(bytes[7], bytes[6], bytes[5], bytes[4], bytes[3], bytes[2], bytes[1], bytes[0]);
}
private /* static */ long upperEight(byte[] bytes) {
return Longs.fromBytes(bytes[15], bytes[14], bytes[13], bytes[12], bytes[11], bytes[10], bytes[9],
bytes[8]);
}
};
// Note: We use this instead of java.util.BitSet because we need access to the
// long[] data field
static final class BitArray {
final long[] data;
long bitCount;
BitArray(long bits) {
this(new long[Ints.checkedCast(LongMath.divide(bits, 64, RoundingMode.CEILING))]);
}
// Used by serialization
BitArray(long[] data) {
checkArgument(data.length > 0, "data length is zero!");
this.data = data;
long bitCount = 0;
for (long value : data) {
bitCount += Long.bitCount(value);
}
this.bitCount = bitCount;
}
/** Returns true if the bit changed value. */
boolean set(long index) {
if (!get(index)) {
data[(int) (index >>> 6)] |= (1L << index);
bitCount++;
return true;
}
return false;
}
boolean get(long index) {
return (data[(int) (index >>> 6)] & (1L << index)) != 0;
}
/** Number of bits */
long bitSize() {
return (long) data.length * Long.SIZE;
}
/** Number of set bits (1s) */
long bitCount() {
return bitCount;
}
BitArray copy() {
return new BitArray(data.clone());
}
/** Combines the two BitArrays using bitwise OR. */
void putAll(BitArray array) {
checkArgument(data.length == array.data.length, "BitArrays must be of equal length (%s != %s)", data.length,
array.data.length);
bitCount = 0;
for (int i = 0; i < data.length; i++) {
data[i] |= array.data[i];
bitCount += Long.bitCount(data[i]);
}
}
@Override
public boolean equals(Object o) {
if (o instanceof BitArray) {
BitArray bitArray = (BitArray) o;
return Arrays.equals(data, bitArray.data);
}
return false;
}
@Override
public int hashCode() {
return Arrays.hashCode(data);
}
}
}

View File

@ -0,0 +1,96 @@
/*
* Copyright (C) 2012 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.common.hash;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import java.io.Serializable;
import java.util.zip.Checksum;
import com.google.common.base.Supplier;
/**
* {@link HashFunction} adapter for {@link Checksum} instances.
*
* @author Colin Decker
*/
final class ChecksumHashFunction extends AbstractStreamingHashFunction implements Serializable {
private final Supplier<? extends Checksum> checksumSupplier;
private final int bits;
private final String toString;
ChecksumHashFunction(Supplier<? extends Checksum> checksumSupplier, int bits, String toString) {
this.checksumSupplier = checkNotNull(checksumSupplier);
checkArgument(bits == 32 || bits == 64, "bits (%s) must be either 32 or 64", bits);
this.bits = bits;
this.toString = checkNotNull(toString);
}
@Override
public int bits() {
return bits;
}
@Override
public Hasher newHasher() {
return new ChecksumHasher(checksumSupplier.get());
}
@Override
public String toString() {
return toString;
}
/**
* Hasher that updates a checksum.
*/
private final class ChecksumHasher extends AbstractByteHasher {
private final Checksum checksum;
private ChecksumHasher(Checksum checksum) {
this.checksum = checkNotNull(checksum);
}
@Override
protected void update(byte b) {
checksum.update(b);
}
@Override
protected void update(byte[] bytes, int off, int len) {
checksum.update(bytes, off, len);
}
@Override
public HashCode hash() {
long value = checksum.getValue();
if (bits == 32) {
/*
* The long returned from a 32-bit Checksum will have all 0s for its second
* word, so the cast won't lose any information and is necessary to return a
* HashCode of the correct size.
*/
return HashCode.fromInt((int) value);
} else {
return HashCode.fromLong(value);
}
}
}
private static final long serialVersionUID = 0L;
}

View File

@ -0,0 +1,59 @@
/*
* Copyright (C) 2011 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.common.hash;
import java.io.Serializable;
import com.google.common.annotations.Beta;
/**
* An object which can send data from an object of type {@code T} into a
* {@code PrimitiveSink}. Implementations for common types can be found in
* {@link Funnels}.
*
* <p>
* Note that serialization of {@linkplain BloomFilter bloom filters} requires
* the proper serialization of funnels. When possible, it is recommended that
* funnels be implemented as a single-element enum to maintain serialization
* guarantees. See Effective Java (2nd Edition), Item 3: "Enforce the singleton
* property with a private constructor or an enum type". For example:
*
* <pre>
* {@code
* public enum PersonFunnel implements Funnel<Person> {
* INSTANCE;
* public void funnel(Person person, PrimitiveSink into) {
* into.putUnencodedChars(person.getFirstName())
* .putUnencodedChars(person.getLastName())
* .putInt(person.getAge());
* }
* }}
* </pre>
*
* @author Dimitris Andreou
* @since 11.0
*/
@Beta
public interface Funnel<T> extends Serializable {
/**
* Sends a stream of data from the {@code from} object into the sink
* {@code into}. There is no requirement that this data be complete enough to
* fully reconstitute the object later.
*
* @since 12.0 (in Guava 11.0, {@code PrimitiveSink} was named {@code Sink})
*/
void funnel(T from, PrimitiveSink into);
}

View File

@ -0,0 +1,270 @@
/*
* Copyright (C) 2011 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.common.hash;
import java.io.OutputStream;
import java.io.Serializable;
import java.nio.charset.Charset;
import javax.annotation.Nullable;
import com.google.common.annotations.Beta;
import com.google.common.base.Preconditions;
/**
* Funnels for common types. All implementations are serializable.
*
* @author Dimitris Andreou
* @since 11.0
*/
@Beta
public final class Funnels {
private Funnels() {
}
/**
* Returns a funnel that extracts the bytes from a {@code byte} array.
*/
public static Funnel<byte[]> byteArrayFunnel() {
return ByteArrayFunnel.INSTANCE;
}
private enum ByteArrayFunnel implements Funnel<byte[]> {
INSTANCE;
public void funnel(byte[] from, PrimitiveSink into) {
into.putBytes(from);
}
@Override
public String toString() {
return "Funnels.byteArrayFunnel()";
}
}
/**
* Returns a funnel that extracts the characters from a {@code CharSequence}, a
* character at a time, without performing any encoding. If you need to use a
* specific encoding, use {@link Funnels#stringFunnel(Charset)} instead.
*
* @since 15.0 (since 11.0 as {@code Funnels.stringFunnel()}.
*/
public static Funnel<CharSequence> unencodedCharsFunnel() {
return UnencodedCharsFunnel.INSTANCE;
}
private enum UnencodedCharsFunnel implements Funnel<CharSequence> {
INSTANCE;
public void funnel(CharSequence from, PrimitiveSink into) {
into.putUnencodedChars(from);
}
@Override
public String toString() {
return "Funnels.unencodedCharsFunnel()";
}
}
/**
* Returns a funnel that encodes the characters of a {@code CharSequence} with
* the specified {@code Charset}.
*
* @since 15.0
*/
public static Funnel<CharSequence> stringFunnel(Charset charset) {
return new StringCharsetFunnel(charset);
}
private static class StringCharsetFunnel implements Funnel<CharSequence>, Serializable {
private final Charset charset;
StringCharsetFunnel(Charset charset) {
this.charset = Preconditions.checkNotNull(charset);
}
public void funnel(CharSequence from, PrimitiveSink into) {
into.putString(from, charset);
}
@Override
public String toString() {
return "Funnels.stringFunnel(" + charset.name() + ")";
}
@Override
public boolean equals(@Nullable Object o) {
if (o instanceof StringCharsetFunnel) {
StringCharsetFunnel funnel = (StringCharsetFunnel) o;
return this.charset.equals(funnel.charset);
}
return false;
}
@Override
public int hashCode() {
return StringCharsetFunnel.class.hashCode() ^ charset.hashCode();
}
Object writeReplace() {
return new SerializedForm(charset);
}
private static class SerializedForm implements Serializable {
private final String charsetCanonicalName;
SerializedForm(Charset charset) {
this.charsetCanonicalName = charset.name();
}
private Object readResolve() {
return stringFunnel(Charset.forName(charsetCanonicalName));
}
private static final long serialVersionUID = 0;
}
}
/**
* Returns a funnel for integers.
*
* @since 13.0
*/
public static Funnel<Integer> integerFunnel() {
return IntegerFunnel.INSTANCE;
}
private enum IntegerFunnel implements Funnel<Integer> {
INSTANCE;
public void funnel(Integer from, PrimitiveSink into) {
into.putInt(from);
}
@Override
public String toString() {
return "Funnels.integerFunnel()";
}
}
/**
* Returns a funnel that processes an {@code Iterable} by funneling its elements
* in iteration order with the specified funnel. No separators are added between
* the elements.
*
* @since 15.0
*/
public static <E> Funnel<Iterable<? extends E>> sequentialFunnel(Funnel<E> elementFunnel) {
return new SequentialFunnel<E>(elementFunnel);
}
private static class SequentialFunnel<E> implements Funnel<Iterable<? extends E>>, Serializable {
private final Funnel<E> elementFunnel;
SequentialFunnel(Funnel<E> elementFunnel) {
this.elementFunnel = Preconditions.checkNotNull(elementFunnel);
}
public void funnel(Iterable<? extends E> from, PrimitiveSink into) {
for (E e : from) {
elementFunnel.funnel(e, into);
}
}
@Override
public String toString() {
return "Funnels.sequentialFunnel(" + elementFunnel + ")";
}
@Override
public boolean equals(@Nullable Object o) {
if (o instanceof SequentialFunnel) {
SequentialFunnel<?> funnel = (SequentialFunnel<?>) o;
return elementFunnel.equals(funnel.elementFunnel);
}
return false;
}
@Override
public int hashCode() {
return SequentialFunnel.class.hashCode() ^ elementFunnel.hashCode();
}
}
/**
* Returns a funnel for longs.
*
* @since 13.0
*/
public static Funnel<Long> longFunnel() {
return LongFunnel.INSTANCE;
}
private enum LongFunnel implements Funnel<Long> {
INSTANCE;
public void funnel(Long from, PrimitiveSink into) {
into.putLong(from);
}
@Override
public String toString() {
return "Funnels.longFunnel()";
}
}
/**
* Wraps a {@code PrimitiveSink} as an {@link OutputStream}, so it is easy to
* {@link Funnel#funnel funnel} an object to a {@code PrimitiveSink} if there is
* already a way to write the contents of the object to an {@code OutputStream}.
*
* <p>
* The {@code close} and {@code flush} methods of the returned
* {@code OutputStream} do nothing, and no method throws {@code IOException}.
*
* @since 13.0
*/
public static OutputStream asOutputStream(PrimitiveSink sink) {
return new SinkAsStream(sink);
}
private static class SinkAsStream extends OutputStream {
final PrimitiveSink sink;
SinkAsStream(PrimitiveSink sink) {
this.sink = Preconditions.checkNotNull(sink);
}
@Override
public void write(int b) {
sink.putByte((byte) b);
}
@Override
public void write(byte[] bytes) {
sink.putBytes(bytes);
}
@Override
public void write(byte[] bytes, int off, int len) {
sink.putBytes(bytes, off, len);
}
@Override
public String toString() {
return "Funnels.asOutputStream(" + sink + ")";
}
}
}

View File

@ -0,0 +1,384 @@
/*
* Copyright (C) 2011 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.common.hash;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import java.io.Serializable;
import java.security.MessageDigest;
import javax.annotation.Nullable;
import com.google.common.annotations.Beta;
import com.google.common.base.Preconditions;
import com.google.common.primitives.Ints;
import com.google.common.primitives.UnsignedInts;
/**
* An immutable hash code of arbitrary bit length.
*
* @author Dimitris Andreou
* @author Kurt Alfred Kluever
* @since 11.0
*/
@Beta
public abstract class HashCode {
HashCode() {
}
/**
* Returns the number of bits in this hash code; a positive multiple of 8.
*/
public abstract int bits();
/**
* Returns the first four bytes of {@linkplain #asBytes() this hashcode's
* bytes}, converted to an {@code int} value in little-endian order.
*
* @throws IllegalStateException if {@code bits() < 32}
*/
public abstract int asInt();
/**
* Returns the first eight bytes of {@linkplain #asBytes() this hashcode's
* bytes}, converted to a {@code long} value in little-endian order.
*
* @throws IllegalStateException if {@code bits() < 64}
*/
public abstract long asLong();
/**
* If this hashcode has enough bits, returns {@code asLong()}, otherwise returns
* a {@code long} value with {@code asBytes()} as the least-significant bytes
* and {@code 0x00} as the remaining most-significant bytes.
*
* @since 14.0 (since 11.0 as {@code Hashing.padToLong(HashCode)})
*/
public abstract long padToLong();
/**
* Returns the value of this hash code as a byte array. The caller may modify
* the byte array; changes to it will <i>not</i> be reflected in this
* {@code HashCode} object or any other arrays returned by this method.
*/
// TODO(user): consider ByteString here, when that is available
public abstract byte[] asBytes();
/**
* Copies bytes from this hash code into {@code dest}.
*
* @param dest the byte array into which the hash code will be written
* @param offset the start offset in the data
* @param maxLength the maximum number of bytes to write
* @return the number of bytes written to {@code dest}
* @throws IndexOutOfBoundsException if there is not enough room in {@code dest}
*/
public int writeBytesTo(byte[] dest, int offset, int maxLength) {
maxLength = Ints.min(maxLength, bits() / 8);
Preconditions.checkPositionIndexes(offset, offset + maxLength, dest.length);
writeBytesToImpl(dest, offset, maxLength);
return maxLength;
}
abstract void writeBytesToImpl(byte[] dest, int offset, int maxLength);
/**
* Returns a mutable view of the underlying bytes for the given {@code HashCode}
* if it is a byte-based hashcode. Otherwise it returns
* {@link HashCode#asBytes}. Do <i>not</i> mutate this array or else you will
* break the immutability contract of {@code HashCode}.
*/
byte[] getBytesInternal() {
return asBytes();
}
/**
* Creates a 32-bit {@code HashCode} representation of the given int value. The
* underlying bytes are interpreted in little endian order.
*
* @since 15.0 (since 12.0 in HashCodes)
*/
public static HashCode fromInt(int hash) {
return new IntHashCode(hash);
}
private static final class IntHashCode extends HashCode implements Serializable {
final int hash;
IntHashCode(int hash) {
this.hash = hash;
}
@Override
public int bits() {
return 32;
}
@Override
public byte[] asBytes() {
return new byte[] { (byte) hash, (byte) (hash >> 8), (byte) (hash >> 16), (byte) (hash >> 24) };
}
@Override
public int asInt() {
return hash;
}
@Override
public long asLong() {
throw new IllegalStateException("this HashCode only has 32 bits; cannot create a long");
}
@Override
public long padToLong() {
return UnsignedInts.toLong(hash);
}
@Override
void writeBytesToImpl(byte[] dest, int offset, int maxLength) {
for (int i = 0; i < maxLength; i++) {
dest[offset + i] = (byte) (hash >> (i * 8));
}
}
private static final long serialVersionUID = 0;
}
/**
* Creates a 64-bit {@code HashCode} representation of the given long value. The
* underlying bytes are interpreted in little endian order.
*
* @since 15.0 (since 12.0 in HashCodes)
*/
public static HashCode fromLong(long hash) {
return new LongHashCode(hash);
}
private static final class LongHashCode extends HashCode implements Serializable {
final long hash;
LongHashCode(long hash) {
this.hash = hash;
}
@Override
public int bits() {
return 64;
}
@Override
public byte[] asBytes() {
return new byte[] { (byte) hash, (byte) (hash >> 8), (byte) (hash >> 16), (byte) (hash >> 24),
(byte) (hash >> 32), (byte) (hash >> 40), (byte) (hash >> 48), (byte) (hash >> 56) };
}
@Override
public int asInt() {
return (int) hash;
}
@Override
public long asLong() {
return hash;
}
@Override
public long padToLong() {
return hash;
}
@Override
void writeBytesToImpl(byte[] dest, int offset, int maxLength) {
for (int i = 0; i < maxLength; i++) {
dest[offset + i] = (byte) (hash >> (i * 8));
}
}
private static final long serialVersionUID = 0;
}
/**
* Creates a {@code HashCode} from a byte array. The array is defensively copied
* to preserve the immutability contract of {@code HashCode}. The array cannot
* be empty.
*
* @since 15.0 (since 12.0 in HashCodes)
*/
public static HashCode fromBytes(byte[] bytes) {
checkArgument(bytes.length >= 1, "A HashCode must contain at least 1 byte.");
return fromBytesNoCopy(bytes.clone());
}
/**
* Creates a {@code HashCode} from a byte array. The array is <i>not</i> copied
* defensively, so it must be handed-off so as to preserve the immutability
* contract of {@code HashCode}.
*/
static HashCode fromBytesNoCopy(byte[] bytes) {
return new BytesHashCode(bytes);
}
private static final class BytesHashCode extends HashCode implements Serializable {
final byte[] bytes;
BytesHashCode(byte[] bytes) {
this.bytes = checkNotNull(bytes);
}
@Override
public int bits() {
return bytes.length * 8;
}
@Override
public byte[] asBytes() {
return bytes.clone();
}
@Override
public int asInt() {
checkState(bytes.length >= 4, "HashCode#asInt() requires >= 4 bytes (it only has %s bytes).", bytes.length);
return (bytes[0] & 0xFF) | ((bytes[1] & 0xFF) << 8) | ((bytes[2] & 0xFF) << 16) | ((bytes[3] & 0xFF) << 24);
}
@Override
public long asLong() {
checkState(bytes.length >= 8, "HashCode#asLong() requires >= 8 bytes (it only has %s bytes).",
bytes.length);
return padToLong();
}
@Override
public long padToLong() {
long retVal = (bytes[0] & 0xFF);
for (int i = 1; i < Math.min(bytes.length, 8); i++) {
retVal |= (bytes[i] & 0xFFL) << (i * 8);
}
return retVal;
}
@Override
void writeBytesToImpl(byte[] dest, int offset, int maxLength) {
System.arraycopy(bytes, 0, dest, offset, maxLength);
}
@Override
byte[] getBytesInternal() {
return bytes;
}
private static final long serialVersionUID = 0;
}
/**
* Creates a {@code HashCode} from a hexadecimal ({@code base 16}) encoded
* string. The string must be at least 2 characters long, and contain only
* valid, lower-cased hexadecimal characters.
*
* <p>
* This method accepts the exact format generated by {@link #toString}. If you
* require more lenient {@code base 16} decoding, please use
* {@link com.google.common.io.BaseEncoding#decode} (and pass the result to
* {@link #fromBytes}).
*
* @since 15.0
*/
public static HashCode fromString(String string) {
checkArgument(string.length() >= 2, "input string (%s) must have at least 2 characters", string);
checkArgument(string.length() % 2 == 0, "input string (%s) must have an even number of characters", string);
byte[] bytes = new byte[string.length() / 2];
for (int i = 0; i < string.length(); i += 2) {
int ch1 = decode(string.charAt(i)) << 4;
int ch2 = decode(string.charAt(i + 1));
bytes[i / 2] = (byte) (ch1 + ch2);
}
return fromBytesNoCopy(bytes);
}
private static int decode(char ch) {
if (ch >= '0' && ch <= '9') {
return ch - '0';
}
if (ch >= 'a' && ch <= 'f') {
return ch - 'a' + 10;
}
throw new IllegalArgumentException("Illegal hexadecimal character: " + ch);
}
@Override
public final boolean equals(@Nullable Object object) {
if (object instanceof HashCode) {
HashCode that = (HashCode) object;
// Undocumented: this is a non-short-circuiting equals(), in case this is a
// cryptographic
// hash code, in which case we don't want to leak timing information
return MessageDigest.isEqual(this.asBytes(), that.asBytes());
}
return false;
}
/**
* Returns a "Java hash code" for this {@code HashCode} instance; this is
* well-defined (so, for example, you can safely put {@code HashCode} instances
* into a {@code
* HashSet}) but is otherwise probably not what you want to use.
*/
@Override
public final int hashCode() {
// If we have at least 4 bytes (32 bits), just take the first 4 bytes. Since
// this is
// already a (presumably) high-quality hash code, any four bytes of it will do.
if (bits() >= 32) {
return asInt();
}
// If we have less than 4 bytes, use them all.
byte[] bytes = asBytes();
int val = (bytes[0] & 0xFF);
for (int i = 1; i < bytes.length; i++) {
val |= ((bytes[i] & 0xFF) << (i * 8));
}
return val;
}
/**
* Returns a string containing each byte of {@link #asBytes}, in order, as a
* two-digit unsigned hexadecimal number in lower case.
*
* <p>
* Note that if the output is considered to be a single hexadecimal number, this
* hash code's bytes are the <i>big-endian</i> representation of that number.
* This may be surprising since everything else in the hashing API uniformly
* treats multibyte values as little-endian. But this format conveniently
* matches that of utilities such as the UNIX {@code md5sum} command.
*
* <p>
* To create a {@code HashCode} from its string representation, see
* {@link #fromString}.
*/
@Override
public final String toString() {
byte[] bytes = asBytes();
StringBuilder sb = new StringBuilder(2 * bytes.length);
for (byte b : bytes) {
sb.append(hexDigits[(b >> 4) & 0xf]).append(hexDigits[b & 0xf]);
}
return sb.toString();
}
private static final char[] hexDigits = "0123456789abcdef".toCharArray();
}

View File

@ -0,0 +1,246 @@
/*
* Copyright (C) 2011 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.common.hash;
import java.nio.charset.Charset;
import com.google.common.annotations.Beta;
import com.google.common.primitives.Ints;
/**
* A hash function is a collision-averse pure function that maps an arbitrary
* block of data to a number called a <i>hash code</i>.
*
* <h3>Definition</h3>
*
* <p>
* Unpacking this definition:
*
* <ul>
* <li><b>block of data:</b> the input for a hash function is always, in
* concept, an ordered byte array. This hashing API accepts an arbitrary
* sequence of byte and multibyte values (via {@link Hasher}), but this is
* merely a convenience; these are always translated into raw byte sequences
* under the covers.
*
* <li><b>hash code:</b> each hash function always yields hash codes of the same
* fixed bit length (given by {@link #bits}). For example, {@link Hashing#sha1}
* produces a 160-bit number, while {@link Hashing#murmur3_32()} yields only 32
* bits. Because a {@code long} value is clearly insufficient to hold all hash
* code values, this API represents a hash code as an instance of
* {@link HashCode}.
*
* <li><b>pure function:</b> the value produced must depend only on the input
* bytes, in the order they appear. Input data is never modified.
* {@link HashFunction} instances should always be stateless, and therefore
* thread-safe.
*
* <li><b>collision-averse:</b> while it can't be helped that a hash function
* will sometimes produce the same hash code for distinct inputs (a
* "collision"), every hash function strives to <i>some</i> degree to make this
* unlikely. (Without this condition, a function that always returns zero could
* be called a hash function. It is not.)
* </ul>
*
* <p>
* Summarizing the last two points: "equal yield equal <i>always</i>; unequal
* yield unequal <i>often</i>." This is the most important characteristic of all
* hash functions.
*
* <h3>Desirable properties</h3>
*
* <p>
* A high-quality hash function strives for some subset of the following
* virtues:
*
* <ul>
* <li><b>collision-resistant:</b> while the definition above requires making at
* least <i>some</i> token attempt, one measure of the quality of a hash
* function is <i>how well</i> it succeeds at this goal. Important note: it may
* be easy to achieve the theoretical minimum collision rate when using
* completely <i>random</i> sample input. The true test of a hash function is
* how it performs on representative real-world data, which tends to contain
* many hidden patterns and clumps. The goal of a good hash function is to stamp
* these patterns out as thoroughly as possible.
*
* <li><b>bit-dispersing:</b> masking out any <i>single bit</i> from a hash code
* should yield only the expected <i>twofold</i> increase to all collision
* rates. Informally, the "information" in the hash code should be as evenly
* "spread out" through the hash code's bits as possible. The result is that,
* for example, when choosing a bucket in a hash table of size 2^8, <i>any</i>
* eight bits could be consistently used.
*
* <li><b>cryptographic:</b> certain hash functions such as
* {@link Hashing#sha512} are designed to make it as infeasible as possible to
* reverse-engineer the input that produced a given hash code, or even to
* discover <i>any</i> two distinct inputs that yield the same result. These are
* called <i>cryptographic hash functions</i>. But, whenever it is learned that
* either of these feats has become computationally feasible, the function is
* deemed "broken" and should no longer be used for secure purposes. (This is
* the likely eventual fate of <i>all</i> cryptographic hashes.)
*
* <li><b>fast:</b> perhaps self-explanatory, but often the most important
* consideration. We have published <a href="#noWeHaventYet">microbenchmark
* results</a> for many common hash functions.
* </ul>
*
* <h3>Providing input to a hash function</h3>
*
* <p>
* The primary way to provide the data that your hash function should act on is
* via a {@link Hasher}. Obtain a new hasher from the hash function using
* {@link #newHasher}, "push" the relevant data into it using methods like
* {@link Hasher#putBytes(byte[])}, and finally ask for the {@code HashCode}
* when finished using {@link Hasher#hash}. (See an {@linkplain #newHasher
* example} of this.)
*
* <p>
* If all you want to hash is a single byte array, string or {@code long} value,
* there are convenient shortcut methods defined directly on
* {@link HashFunction} to make this easier.
*
* <p>
* Hasher accepts primitive data types, but can also accept any Object of type
* {@code
* T} provided that you implement a {@link Funnel Funnel<T>} to specify how to
* "feed" data from that object into the function. (See
* {@linkplain Hasher#putObject an example} of this.)
*
* <p>
* <b>Compatibility note:</b> Throughout this API, multibyte values are always
* interpreted in <i>little-endian</i> order. That is, hashing the byte array
* {@code {0x01, 0x02, 0x03, 0x04}} is equivalent to hashing the {@code int}
* value {@code
* 0x04030201}. If this isn't what you need, methods such as
* {@link Integer#reverseBytes} and {@link Ints#toByteArray} will help.
*
* <h3>Relationship to {@link Object#hashCode}</h3>
*
* <p>
* Java's baked-in concept of hash codes is constrained to 32 bits, and provides
* no separation between hash algorithms and the data they act on, so alternate
* hash algorithms can't be easily substituted. Also, implementations of
* {@code hashCode} tend to be poor-quality, in part because they end up
* depending on <i>other</i> existing poor-quality {@code hashCode}
* implementations, including those in many JDK classes.
*
* <p>
* {@code Object.hashCode} implementations tend to be very fast, but have weak
* collision prevention and <i>no</i> expectation of bit dispersion. This leaves
* them perfectly suitable for use in hash tables, because extra collisions
* cause only a slight performance hit, while poor bit dispersion is easily
* corrected using a secondary hash function (which all reasonable hash table
* implementations in Java use). For the many uses of hash functions beyond data
* structures, however, {@code Object.hashCode} almost always falls short --
* hence this library.
*
* @author Kevin Bourrillion
* @since 11.0
*/
@Beta
public interface HashFunction {
/**
* Begins a new hash code computation by returning an initialized, stateful
* {@code
* Hasher} instance that is ready to receive data. Example:
*
* <pre>
* {
* &#64;code
*
* HashFunction hf = Hashing.md5();
* HashCode hc = hf.newHasher().putLong(id).putBoolean(isActive).hash();
* }
* </pre>
*/
Hasher newHasher();
/**
* Begins a new hash code computation as {@link #newHasher()}, but provides a
* hint of the expected size of the input (in bytes). This is only important for
* non-streaming hash functions (hash functions that need to buffer their whole
* input before processing any of it).
*/
Hasher newHasher(int expectedInputSize);
/**
* Shortcut for {@code newHasher().putInt(input).hash()}; returns the hash code
* for the given {@code int} value, interpreted in little-endian byte order. The
* implementation <i>might</i> perform better than its longhand equivalent, but
* should not perform worse.
*
* @since 12.0
*/
HashCode hashInt(int input);
/**
* Shortcut for {@code newHasher().putLong(input).hash()}; returns the hash code
* for the given {@code long} value, interpreted in little-endian byte order.
* The implementation <i>might</i> perform better than its longhand equivalent,
* but should not perform worse.
*/
HashCode hashLong(long input);
/**
* Shortcut for {@code newHasher().putBytes(input).hash()}. The implementation
* <i>might</i> perform better than its longhand equivalent, but should not
* perform worse.
*/
HashCode hashBytes(byte[] input);
/**
* Shortcut for {@code newHasher().putBytes(input, off, len).hash()}. The
* implementation <i>might</i> perform better than its longhand equivalent, but
* should not perform worse.
*
* @throws IndexOutOfBoundsException if {@code off < 0} or
* {@code off + len > bytes.length} or
* {@code len < 0}
*/
HashCode hashBytes(byte[] input, int off, int len);
/**
* Shortcut for {@code newHasher().putUnencodedChars(input).hash()}. The
* implementation <i>might</i> perform better than its longhand equivalent, but
* should not perform worse. Note that no character encoding is performed; the
* low byte and high byte of each {@code char} are hashed directly (in that
* order).
*
* @since 15.0 (since 11.0 as hashString(CharSequence)).
*/
HashCode hashUnencodedChars(CharSequence input);
/**
* Shortcut for {@code newHasher().putString(input, charset).hash()}. Characters
* are encoded using the given {@link Charset}. The implementation <i>might</i>
* perform better than its longhand equivalent, but should not perform worse.
*/
HashCode hashString(CharSequence input, Charset charset);
/**
* Shortcut for {@code newHasher().putObject(instance, funnel).hash()}. The
* implementation <i>might</i> perform better than its longhand equivalent, but
* should not perform worse.
*
* @since 14.0
*/
<T> HashCode hashObject(T instance, Funnel<? super T> funnel);
/**
* Returns the number of bits (a multiple of 32) that each hash code produced by
* this hash function has.
*/
int bits();
}

View File

@ -0,0 +1,131 @@
/*
* Copyright (C) 2011 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.common.hash;
import java.nio.charset.Charset;
import com.google.common.annotations.Beta;
/**
* A {@link PrimitiveSink} that can compute a hash code after reading the input.
* Each hasher should translate all multibyte values ({@link #putInt(int)},
* {@link #putLong(long)}, etc) to bytes in little-endian order.
*
* <p>
* <b>Warning:</b> The result of calling any methods after calling {@link #hash}
* is undefined.
*
* <p>
* <b>Warning:</b> Using a specific character encoding when hashing a
* {@link CharSequence} with {@link #putString(CharSequence, Charset)} is
* generally only useful for cross-language compatibility (otherwise prefer
* {@link #putUnencodedChars}). However, the character encodings must be
* identical across languages. Also beware that {@link Charset} definitions may
* occasionally change between Java releases.
*
* <p>
* <b>Warning:</b> Chunks of data that are put into the {@link Hasher} are not
* delimited. The resulting {@link HashCode} is dependent only on the bytes
* inserted, and the order in which they were inserted, not how those bytes were
* chunked into discrete put() operations. For example, the following three
* expressions all generate colliding hash codes:
*
* <pre>
* {@code
*
* newHasher().putByte(b1).putByte(b2).putByte(b3).hash()
* newHasher().putByte(b1).putBytes(new byte[] { b2, b3 }).hash()
* newHasher().putBytes(new byte[] { b1, b2, b3 }).hash()}
* </pre>
*
* <p>
* If you wish to avoid this, you should either prepend or append the size of
* each chunk. Keep in mind that when dealing with char sequences, the encoded
* form of two concatenated char sequences is not equivalent to the
* concatenation of their encoded form. Therefore,
* {@link #putString(CharSequence, Charset)} should only be used consistently
* with <i>complete</i> sequences and not broken into chunks.
*
* @author Kevin Bourrillion
* @since 11.0
*/
@Beta
public interface Hasher extends PrimitiveSink {
@Override
Hasher putByte(byte b);
@Override
Hasher putBytes(byte[] bytes);
@Override
Hasher putBytes(byte[] bytes, int off, int len);
@Override
Hasher putShort(short s);
@Override
Hasher putInt(int i);
@Override
Hasher putLong(long l);
/**
* Equivalent to {@code putInt(Float.floatToRawIntBits(f))}.
*/
@Override
Hasher putFloat(float f);
/**
* Equivalent to {@code putLong(Double.doubleToRawLongBits(d))}.
*/
@Override
Hasher putDouble(double d);
/**
* Equivalent to {@code putByte(b ? (byte) 1 : (byte) 0)}.
*/
@Override
Hasher putBoolean(boolean b);
@Override
Hasher putChar(char c);
/**
* Equivalent to processing each {@code char} value in the {@code CharSequence},
* in order. The input must not be updated while this method is in progress.
*
* @since 15.0 (since 11.0 as putString(CharSequence)).
*/
@Override
Hasher putUnencodedChars(CharSequence charSequence);
/**
* Equivalent to {@code putBytes(charSequence.toString().getBytes(charset))}.
*/
@Override
Hasher putString(CharSequence charSequence, Charset charset);
/**
* A simple convenience for {@code funnel.funnel(object, this)}.
*/
<T> Hasher putObject(T instance, Funnel<? super T> funnel);
/**
* Computes a hash code based on the data that have been provided to this
* hasher. The result is unspecified if this method is called more than once on
* the same instance.
*/
HashCode hash();
}

View File

@ -0,0 +1,483 @@
/*
* Copyright (C) 2011 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.common.hash;
import static com.google.common.base.Preconditions.checkArgument;
import java.security.MessageDigest;
import java.util.Iterator;
import java.util.zip.Adler32;
import java.util.zip.CRC32;
import java.util.zip.Checksum;
import javax.annotation.Nullable;
import com.google.common.annotations.Beta;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Supplier;
/**
* Static methods to obtain {@link HashFunction} instances, and other static
* hashing-related utilities.
*
* <p>
* A comparison of the various hash functions can be found
* <a href="http://goo.gl/jS7HH">here</a>.
*
* @author Kevin Bourrillion
* @author Dimitris Andreou
* @author Kurt Alfred Kluever
* @since 11.0
*/
@Beta
public final class Hashing {
/**
* Returns a general-purpose, <b>temporary-use</b>, non-cryptographic hash
* function. The algorithm the returned function implements is unspecified and
* subject to change without notice.
*
* <p>
* <b>Warning:</b> a new random seed for these functions is chosen each time the
* {@code
* Hashing} class is loaded. <b>Do not use this method</b> if hash codes may
* escape the current process in any way, for example being sent over RPC, or
* saved to disk.
*
* <p>
* Repeated calls to this method on the same loaded {@code Hashing} class, using
* the same value for {@code minimumBits}, will return identically-behaving
* {@link HashFunction} instances.
*
* @param minimumBits a positive integer (can be arbitrarily large)
* @return a hash function, described above, that produces hash codes of length
* {@code
* minimumBits} or greater
*/
public static HashFunction goodFastHash(int minimumBits) {
int bits = checkPositiveAndMakeMultipleOf32(minimumBits);
if (bits == 32) {
return Murmur3_32Holder.GOOD_FAST_HASH_FUNCTION_32;
}
if (bits <= 128) {
return Murmur3_128Holder.GOOD_FAST_HASH_FUNCTION_128;
}
// Otherwise, join together some 128-bit murmur3s
int hashFunctionsNeeded = (bits + 127) / 128;
HashFunction[] hashFunctions = new HashFunction[hashFunctionsNeeded];
hashFunctions[0] = Murmur3_128Holder.GOOD_FAST_HASH_FUNCTION_128;
int seed = GOOD_FAST_HASH_SEED;
for (int i = 1; i < hashFunctionsNeeded; i++) {
seed += 1500450271; // a prime; shouldn't matter
hashFunctions[i] = murmur3_128(seed);
}
return new ConcatenatedHashFunction(hashFunctions);
}
/**
* Used to randomize {@link #goodFastHash} instances, so that programs which
* persist anything dependent on the hash codes they produce will fail sooner.
*/
private static final int GOOD_FAST_HASH_SEED = (int) System.currentTimeMillis();
/**
* Returns a hash function implementing the
* <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp"> 32-bit
* murmur3 algorithm, x86 variant</a> (little-endian variant), using the given
* seed value.
*
* <p>
* The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A).
*/
public static HashFunction murmur3_32(int seed) {
return new Murmur3_32HashFunction(seed);
}
/**
* Returns a hash function implementing the
* <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp"> 32-bit
* murmur3 algorithm, x86 variant</a> (little-endian variant), using a seed
* value of zero.
*
* <p>
* The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A).
*/
public static HashFunction murmur3_32() {
return Murmur3_32Holder.MURMUR3_32;
}
private static class Murmur3_32Holder {
static final HashFunction MURMUR3_32 = new Murmur3_32HashFunction(0);
/** Returned by {@link #goodFastHash} when {@code minimumBits <= 32}. */
static final HashFunction GOOD_FAST_HASH_FUNCTION_32 = murmur3_32(GOOD_FAST_HASH_SEED);
}
/**
* Returns a hash function implementing the
* <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp"> 128-bit
* murmur3 algorithm, x64 variant</a> (little-endian variant), using the given
* seed value.
*
* <p>
* The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F).
*/
public static HashFunction murmur3_128(int seed) {
return new Murmur3_128HashFunction(seed);
}
/**
* Returns a hash function implementing the
* <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp"> 128-bit
* murmur3 algorithm, x64 variant</a> (little-endian variant), using a seed
* value of zero.
*
* <p>
* The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F).
*/
public static HashFunction murmur3_128() {
return Murmur3_128Holder.MURMUR3_128;
}
private static class Murmur3_128Holder {
static final HashFunction MURMUR3_128 = new Murmur3_128HashFunction(0);
/** Returned by {@link #goodFastHash} when {@code 32 < minimumBits <= 128}. */
static final HashFunction GOOD_FAST_HASH_FUNCTION_128 = murmur3_128(GOOD_FAST_HASH_SEED);
}
/**
* Returns a hash function implementing the
* <a href="https://131002.net/siphash/">64-bit SipHash-2-4 algorithm</a> using
* a seed value of {@code k = 00 01 02 ...}.
*
* @since 15.0
*/
public static HashFunction sipHash24() {
return SipHash24Holder.SIP_HASH_24;
}
private static class SipHash24Holder {
static final HashFunction SIP_HASH_24 = new SipHashFunction(2, 4, 0x0706050403020100L, 0x0f0e0d0c0b0a0908L);
}
/**
* Returns a hash function implementing the
* <a href="https://131002.net/siphash/">64-bit SipHash-2-4 algorithm</a> using
* the given seed.
*
* @since 15.0
*/
public static HashFunction sipHash24(long k0, long k1) {
return new SipHashFunction(2, 4, k0, k1);
}
/**
* Returns a hash function implementing the MD5 hash algorithm (128 hash bits)
* by delegating to the MD5 {@link MessageDigest}.
*/
public static HashFunction md5() {
return Md5Holder.MD5;
}
private static class Md5Holder {
static final HashFunction MD5 = new MessageDigestHashFunction("MD5", "Hashing.md5()");
}
/**
* Returns a hash function implementing the SHA-1 algorithm (160 hash bits) by
* delegating to the SHA-1 {@link MessageDigest}.
*/
public static HashFunction sha1() {
return Sha1Holder.SHA_1;
}
private static class Sha1Holder {
static final HashFunction SHA_1 = new MessageDigestHashFunction("SHA-1", "Hashing.sha1()");
}
/**
* Returns a hash function implementing the SHA-256 algorithm (256 hash bits) by
* delegating to the SHA-256 {@link MessageDigest}.
*/
public static HashFunction sha256() {
return Sha256Holder.SHA_256;
}
private static class Sha256Holder {
static final HashFunction SHA_256 = new MessageDigestHashFunction("SHA-256", "Hashing.sha256()");
}
/**
* Returns a hash function implementing the SHA-512 algorithm (512 hash bits) by
* delegating to the SHA-512 {@link MessageDigest}.
*/
public static HashFunction sha512() {
return Sha512Holder.SHA_512;
}
private static class Sha512Holder {
static final HashFunction SHA_512 = new MessageDigestHashFunction("SHA-512", "Hashing.sha512()");
}
/**
* Returns a hash function implementing the CRC-32 checksum algorithm (32 hash
* bits) by delegating to the {@link CRC32} {@link Checksum}.
*
* <p>
* To get the {@code long} value equivalent to {@link Checksum#getValue()} for a
* {@code HashCode} produced by this function, use {@link HashCode#padToLong()}.
*
* @since 14.0
*/
public static HashFunction crc32() {
return Crc32Holder.CRC_32;
}
private static class Crc32Holder {
static final HashFunction CRC_32 = checksumHashFunction(ChecksumType.CRC_32, "Hashing.crc32()");
}
/**
* Returns a hash function implementing the Adler-32 checksum algorithm (32 hash
* bits) by delegating to the {@link Adler32} {@link Checksum}.
*
* <p>
* To get the {@code long} value equivalent to {@link Checksum#getValue()} for a
* {@code HashCode} produced by this function, use {@link HashCode#padToLong()}.
*
* @since 14.0
*/
public static HashFunction adler32() {
return Adler32Holder.ADLER_32;
}
private static class Adler32Holder {
static final HashFunction ADLER_32 = checksumHashFunction(ChecksumType.ADLER_32, "Hashing.adler32()");
}
private static HashFunction checksumHashFunction(ChecksumType type, String toString) {
return new ChecksumHashFunction(type, type.bits, toString);
}
enum ChecksumType implements Supplier<Checksum> {
CRC_32(32) {
@Override
public Checksum get() {
return new CRC32();
}
},
ADLER_32(32) {
@Override
public Checksum get() {
return new Adler32();
}
};
private final int bits;
ChecksumType(int bits) {
this.bits = bits;
}
@Override
public abstract Checksum get();
}
/**
* Assigns to {@code hashCode} a "bucket" in the range {@code [0, buckets)}, in
* a uniform manner that minimizes the need for remapping as {@code buckets}
* grows. That is, {@code consistentHash(h, n)} equals:
*
* <ul>
* <li>{@code n - 1}, with approximate probability {@code 1/n}
* <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n})
* </ul>
*
* <p>
* See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">wikipedia
* article on consistent hashing</a> for more information.
*/
public static int consistentHash(HashCode hashCode, int buckets) {
return consistentHash(hashCode.padToLong(), buckets);
}
/**
* Assigns to {@code input} a "bucket" in the range {@code [0, buckets)}, in a
* uniform manner that minimizes the need for remapping as {@code buckets}
* grows. That is, {@code consistentHash(h, n)} equals:
*
* <ul>
* <li>{@code n - 1}, with approximate probability {@code 1/n}
* <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n})
* </ul>
*
* <p>
* See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">wikipedia
* article on consistent hashing</a> for more information.
*/
public static int consistentHash(long input, int buckets) {
checkArgument(buckets > 0, "buckets must be positive: %s", buckets);
LinearCongruentialGenerator generator = new LinearCongruentialGenerator(input);
int candidate = 0;
int next;
// Jump from bucket to bucket until we go out of range
while (true) {
next = (int) ((candidate + 1) / generator.nextDouble());
if (next >= 0 && next < buckets) {
candidate = next;
} else {
return candidate;
}
}
}
/**
* Returns a hash code, having the same bit length as each of the input hash
* codes, that combines the information of these hash codes in an ordered
* fashion. That is, whenever two equal hash codes are produced by two calls to
* this method, it is <i>as likely as possible</i> that each was computed from
* the <i>same</i> input hash codes in the <i>same</i> order.
*
* @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash
* codes do not all have the same bit length
*/
public static HashCode combineOrdered(Iterable<HashCode> hashCodes) {
Iterator<HashCode> iterator = hashCodes.iterator();
checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine.");
int bits = iterator.next().bits();
byte[] resultBytes = new byte[bits / 8];
for (HashCode hashCode : hashCodes) {
byte[] nextBytes = hashCode.asBytes();
checkArgument(nextBytes.length == resultBytes.length, "All hashcodes must have the same bit length.");
for (int i = 0; i < nextBytes.length; i++) {
resultBytes[i] = (byte) (resultBytes[i] * 37 ^ nextBytes[i]);
}
}
return HashCode.fromBytesNoCopy(resultBytes);
}
/**
* Returns a hash code, having the same bit length as each of the input hash
* codes, that combines the information of these hash codes in an unordered
* fashion. That is, whenever two equal hash codes are produced by two calls to
* this method, it is <i>as likely as possible</i> that each was computed from
* the <i>same</i> input hash codes in <i>some</i> order.
*
* @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash
* codes do not all have the same bit length
*/
public static HashCode combineUnordered(Iterable<HashCode> hashCodes) {
Iterator<HashCode> iterator = hashCodes.iterator();
checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine.");
byte[] resultBytes = new byte[iterator.next().bits() / 8];
for (HashCode hashCode : hashCodes) {
byte[] nextBytes = hashCode.asBytes();
checkArgument(nextBytes.length == resultBytes.length, "All hashcodes must have the same bit length.");
for (int i = 0; i < nextBytes.length; i++) {
resultBytes[i] += nextBytes[i];
}
}
return HashCode.fromBytesNoCopy(resultBytes);
}
/**
* Checks that the passed argument is positive, and ceils it to a multiple of
* 32.
*/
static int checkPositiveAndMakeMultipleOf32(int bits) {
checkArgument(bits > 0, "Number of bits must be positive");
return (bits + 31) & ~31;
}
// TODO(kevinb): Maybe expose this class via a static Hashing method?
@VisibleForTesting
static final class ConcatenatedHashFunction extends AbstractCompositeHashFunction {
private final int bits;
ConcatenatedHashFunction(HashFunction... functions) {
super(functions);
int bitSum = 0;
for (HashFunction function : functions) {
bitSum += function.bits();
}
this.bits = bitSum;
}
@Override
HashCode makeHash(Hasher[] hashers) {
byte[] bytes = new byte[bits / 8];
int i = 0;
for (Hasher hasher : hashers) {
HashCode newHash = hasher.hash();
i += newHash.writeBytesTo(bytes, i, newHash.bits() / 8);
}
return HashCode.fromBytesNoCopy(bytes);
}
@Override
public int bits() {
return bits;
}
@Override
public boolean equals(@Nullable Object object) {
if (object instanceof ConcatenatedHashFunction) {
ConcatenatedHashFunction other = (ConcatenatedHashFunction) object;
if (bits != other.bits || functions.length != other.functions.length) {
return false;
}
for (int i = 0; i < functions.length; i++) {
if (!functions[i].equals(other.functions[i])) {
return false;
}
}
return true;
}
return false;
}
@Override
public int hashCode() {
int hash = bits;
for (HashFunction function : functions) {
hash ^= function.hashCode();
}
return hash;
}
}
/**
* Linear CongruentialGenerator to use for consistent hashing. See
* http://en.wikipedia.org/wiki/Linear_congruential_generator
*/
private static final class LinearCongruentialGenerator {
private long state;
public LinearCongruentialGenerator(long seed) {
this.state = seed;
}
public double nextDouble() {
state = 2862933555777941757L * state + 1;
return ((double) ((int) (state >>> 33) + 1)) / (0x1.0p31);
}
}
private Hashing() {
}
}

View File

@ -0,0 +1,108 @@
/*
* Copyright (C) 2013 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.common.hash;
import static com.google.common.base.Preconditions.checkNotNull;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import com.google.common.annotations.Beta;
/**
* An {@link InputStream} that maintains a hash of the data read from it.
*
* @author Qian Huang
* @since 16.0
*/
@Beta
public final class HashingInputStream extends FilterInputStream {
private final Hasher hasher;
/**
* Creates an input stream that hashes using the given {@link HashFunction} and
* delegates all data read from it to the underlying {@link InputStream}.
*
* <p>
* The {@link InputStream} should not be read from before or after the hand-off.
*/
public HashingInputStream(HashFunction hashFunction, InputStream in) {
super(checkNotNull(in));
this.hasher = checkNotNull(hashFunction.newHasher());
}
/**
* Reads the next byte of data from the underlying input stream and updates the
* hasher with the byte read.
*/
@Override
public int read() throws IOException {
int b = in.read();
if (b != -1) {
hasher.putByte((byte) b);
}
return b;
}
/**
* Reads the specified bytes of data from the underlying input stream and
* updates the hasher with the bytes read.
*/
@Override
public int read(byte[] bytes, int off, int len) throws IOException {
int numOfBytesRead = in.read(bytes, off, len);
if (numOfBytesRead != -1) {
hasher.putBytes(bytes, off, numOfBytesRead);
}
return numOfBytesRead;
}
/**
* mark() is not supported for HashingInputStream
*
* @return {@code false} always
*/
@Override
public boolean markSupported() {
return false;
}
/**
* mark() is not supported for HashingInputStream
*/
@Override
public void mark(int readlimit) {
}
/**
* reset() is not supported for HashingInputStream.
*
* @throws IOException this operation is not supported
*/
@Override
public void reset() throws IOException {
throw new IOException("reset not supported");
}
/**
* Returns the {@link HashCode} based on the data read from this stream. The
* result is unspecified if this method is called more than once on the same
* instance.
*/
public HashCode hash() {
return hasher.hash();
}
}

View File

@ -0,0 +1,84 @@
/*
* Copyright (C) 2011 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.common.hash;
import static com.google.common.base.Preconditions.checkNotNull;
import java.io.FilterOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import com.google.common.annotations.Beta;
/**
* An {@link OutputStream} that maintains a hash of the data written to it.
*
* @author Nick Piepmeier
* @since 16.0
*/
@Beta
public final class HashingOutputStream extends FilterOutputStream {
private final Hasher hasher;
/**
* Creates an output stream that hashes using the given {@link HashFunction},
* and forwards all data written to it to the underlying {@link OutputStream}.
*
* <p>
* The {@link OutputStream} should not be written to before or after the
* hand-off.
*/
// TODO(user): Evaluate whether it makes sense to always piggyback the
// computation of a
// HashCode on an existing OutputStream, compared to creating a separate
// OutputStream that could
// be (optionally) be combined with another if needed (with something like
// MultiplexingOutputStream).
public HashingOutputStream(HashFunction hashFunction, OutputStream out) {
super(checkNotNull(out));
this.hasher = checkNotNull(hashFunction.newHasher());
}
@Override
public void write(int b) throws IOException {
hasher.putByte((byte) b);
out.write(b);
}
@Override
public void write(byte[] bytes, int off, int len) throws IOException {
hasher.putBytes(bytes, off, len);
out.write(bytes, off, len);
}
/**
* Returns the {@link HashCode} based on the data written to this stream. The
* result is unspecified if this method is called more than once on the same
* instance.
*/
public HashCode hash() {
return hasher.hash();
}
// Overriding close() because FilterOutputStream's close() method pre-JDK8 has
// bad behavior:
// it silently ignores any exception thrown by flush(). Instead, just close the
// delegate stream.
// It should flush itself if necessary.
@Override
public void close() throws IOException {
out.close();
}
}

View File

@ -0,0 +1,159 @@
/*
* Copyright (C) 2011 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.common.hash;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import java.io.Serializable;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Arrays;
/**
* {@link HashFunction} adapter for {@link MessageDigest} instances.
*
* @author Kevin Bourrillion
* @author Dimitris Andreou
*/
final class MessageDigestHashFunction extends AbstractStreamingHashFunction implements Serializable {
private final MessageDigest prototype;
private final int bytes;
private final boolean supportsClone;
private final String toString;
MessageDigestHashFunction(String algorithmName, String toString) {
this.prototype = getMessageDigest(algorithmName);
this.bytes = prototype.getDigestLength();
this.toString = checkNotNull(toString);
this.supportsClone = supportsClone();
}
MessageDigestHashFunction(String algorithmName, int bytes, String toString) {
this.toString = checkNotNull(toString);
this.prototype = getMessageDigest(algorithmName);
int maxLength = prototype.getDigestLength();
checkArgument(bytes >= 4 && bytes <= maxLength, "bytes (%s) must be >= 4 and < %s", bytes, maxLength);
this.bytes = bytes;
this.supportsClone = supportsClone();
}
private boolean supportsClone() {
try {
prototype.clone();
return true;
} catch (CloneNotSupportedException e) {
return false;
}
}
@Override
public int bits() {
return bytes * Byte.SIZE;
}
@Override
public String toString() {
return toString;
}
private static MessageDigest getMessageDigest(String algorithmName) {
try {
return MessageDigest.getInstance(algorithmName);
} catch (NoSuchAlgorithmException e) {
throw new AssertionError(e);
}
}
@Override
public Hasher newHasher() {
if (supportsClone) {
try {
return new MessageDigestHasher((MessageDigest) prototype.clone(), bytes);
} catch (CloneNotSupportedException e) {
// falls through
}
}
return new MessageDigestHasher(getMessageDigest(prototype.getAlgorithm()), bytes);
}
private static final class SerializedForm implements Serializable {
private final String algorithmName;
private final int bytes;
private final String toString;
private SerializedForm(String algorithmName, int bytes, String toString) {
this.algorithmName = algorithmName;
this.bytes = bytes;
this.toString = toString;
}
private Object readResolve() {
return new MessageDigestHashFunction(algorithmName, bytes, toString);
}
private static final long serialVersionUID = 0;
}
Object writeReplace() {
return new SerializedForm(prototype.getAlgorithm(), bytes, toString);
}
/**
* Hasher that updates a message digest.
*/
private static final class MessageDigestHasher extends AbstractByteHasher {
private final MessageDigest digest;
private final int bytes;
private boolean done;
private MessageDigestHasher(MessageDigest digest, int bytes) {
this.digest = digest;
this.bytes = bytes;
}
@Override
protected void update(byte b) {
checkNotDone();
digest.update(b);
}
@Override
protected void update(byte[] b) {
checkNotDone();
digest.update(b);
}
@Override
protected void update(byte[] b, int off, int len) {
checkNotDone();
digest.update(b, off, len);
}
private void checkNotDone() {
checkState(!done, "Cannot re-use a Hasher after calling hash() on it");
}
@Override
public HashCode hash() {
checkNotDone();
done = true;
return (bytes == digest.getDigestLength()) ? HashCode.fromBytesNoCopy(digest.digest())
: HashCode.fromBytesNoCopy(Arrays.copyOf(digest.digest(), bytes));
}
}
}

View File

@ -0,0 +1,205 @@
/*
* Copyright (C) 2011 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
/*
* MurmurHash3 was written by Austin Appleby, and is placed in the public
* domain. The author hereby disclaims copyright to this source code.
*/
/*
* Source:
* http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
* (Modified to adapt to Guava coding conventions and to use the HashFunction interface)
*/
package com.google.common.hash;
import static com.google.common.primitives.UnsignedBytes.toInt;
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import javax.annotation.Nullable;
/**
* See http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp
* MurmurHash3_x64_128
*
* @author Austin Appleby
* @author Dimitris Andreou
*/
final class Murmur3_128HashFunction extends AbstractStreamingHashFunction implements Serializable {
// TODO(user): when the shortcuts are implemented, update BloomFilterStrategies
private final int seed;
Murmur3_128HashFunction(int seed) {
this.seed = seed;
}
@Override
public int bits() {
return 128;
}
@Override
public Hasher newHasher() {
return new Murmur3_128Hasher(seed);
}
@Override
public String toString() {
return "Hashing.murmur3_128(" + seed + ")";
}
@Override
public boolean equals(@Nullable Object object) {
if (object instanceof Murmur3_128HashFunction) {
Murmur3_128HashFunction other = (Murmur3_128HashFunction) object;
return seed == other.seed;
}
return false;
}
@Override
public int hashCode() {
return getClass().hashCode() ^ seed;
}
private static final class Murmur3_128Hasher extends AbstractStreamingHasher {
private static final int CHUNK_SIZE = 16;
private static final long C1 = 0x87c37b91114253d5L;
private static final long C2 = 0x4cf5ad432745937fL;
private long h1;
private long h2;
private int length;
Murmur3_128Hasher(int seed) {
super(CHUNK_SIZE);
this.h1 = seed;
this.h2 = seed;
this.length = 0;
}
@Override
protected void process(ByteBuffer bb) {
long k1 = bb.getLong();
long k2 = bb.getLong();
bmix64(k1, k2);
length += CHUNK_SIZE;
}
private void bmix64(long k1, long k2) {
h1 ^= mixK1(k1);
h1 = Long.rotateLeft(h1, 27);
h1 += h2;
h1 = h1 * 5 + 0x52dce729;
h2 ^= mixK2(k2);
h2 = Long.rotateLeft(h2, 31);
h2 += h1;
h2 = h2 * 5 + 0x38495ab5;
}
@Override
protected void processRemaining(ByteBuffer bb) {
long k1 = 0;
long k2 = 0;
length += bb.remaining();
switch (bb.remaining()) {
case 15:
k2 ^= (long) toInt(bb.get(14)) << 48; // fall through
case 14:
k2 ^= (long) toInt(bb.get(13)) << 40; // fall through
case 13:
k2 ^= (long) toInt(bb.get(12)) << 32; // fall through
case 12:
k2 ^= (long) toInt(bb.get(11)) << 24; // fall through
case 11:
k2 ^= (long) toInt(bb.get(10)) << 16; // fall through
case 10:
k2 ^= (long) toInt(bb.get(9)) << 8; // fall through
case 9:
k2 ^= (long) toInt(bb.get(8)); // fall through
case 8:
k1 ^= bb.getLong();
break;
case 7:
k1 ^= (long) toInt(bb.get(6)) << 48; // fall through
case 6:
k1 ^= (long) toInt(bb.get(5)) << 40; // fall through
case 5:
k1 ^= (long) toInt(bb.get(4)) << 32; // fall through
case 4:
k1 ^= (long) toInt(bb.get(3)) << 24; // fall through
case 3:
k1 ^= (long) toInt(bb.get(2)) << 16; // fall through
case 2:
k1 ^= (long) toInt(bb.get(1)) << 8; // fall through
case 1:
k1 ^= (long) toInt(bb.get(0));
break;
default:
throw new AssertionError("Should never get here.");
}
h1 ^= mixK1(k1);
h2 ^= mixK2(k2);
}
@Override
public HashCode makeHash() {
h1 ^= length;
h2 ^= length;
h1 += h2;
h2 += h1;
h1 = fmix64(h1);
h2 = fmix64(h2);
h1 += h2;
h2 += h1;
return HashCode.fromBytesNoCopy(ByteBuffer.wrap(new byte[CHUNK_SIZE]).order(ByteOrder.LITTLE_ENDIAN)
.putLong(h1).putLong(h2).array());
}
private static long fmix64(long k) {
k ^= k >>> 33;
k *= 0xff51afd7ed558ccdL;
k ^= k >>> 33;
k *= 0xc4ceb9fe1a85ec53L;
k ^= k >>> 33;
return k;
}
private static long mixK1(long k1) {
k1 *= C1;
k1 = Long.rotateLeft(k1, 31);
k1 *= C2;
return k1;
}
private static long mixK2(long k2) {
k2 *= C2;
k2 = Long.rotateLeft(k2, 33);
k2 *= C1;
return k2;
}
}
private static final long serialVersionUID = 0L;
}

View File

@ -0,0 +1,190 @@
/*
* Copyright (C) 2011 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
/*
* MurmurHash3 was written by Austin Appleby, and is placed in the public
* domain. The author hereby disclaims copyright to this source code.
*/
/*
* Source:
* http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
* (Modified to adapt to Guava coding conventions and to use the HashFunction interface)
*/
package com.google.common.hash;
import static com.google.common.primitives.UnsignedBytes.toInt;
import java.io.Serializable;
import java.nio.ByteBuffer;
import javax.annotation.Nullable;
import com.google.common.primitives.Chars;
import com.google.common.primitives.Ints;
import com.google.common.primitives.Longs;
/**
* See http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp
* MurmurHash3_x86_32
*
* @author Austin Appleby
* @author Dimitris Andreou
* @author Kurt Alfred Kluever
*/
final class Murmur3_32HashFunction extends AbstractStreamingHashFunction implements Serializable {
private static final int C1 = 0xcc9e2d51;
private static final int C2 = 0x1b873593;
private final int seed;
Murmur3_32HashFunction(int seed) {
this.seed = seed;
}
@Override
public int bits() {
return 32;
}
@Override
public Hasher newHasher() {
return new Murmur3_32Hasher(seed);
}
@Override
public String toString() {
return "Hashing.murmur3_32(" + seed + ")";
}
@Override
public boolean equals(@Nullable Object object) {
if (object instanceof Murmur3_32HashFunction) {
Murmur3_32HashFunction other = (Murmur3_32HashFunction) object;
return seed == other.seed;
}
return false;
}
@Override
public int hashCode() {
return getClass().hashCode() ^ seed;
}
@Override
public HashCode hashInt(int input) {
int k1 = mixK1(input);
int h1 = mixH1(seed, k1);
return fmix(h1, Ints.BYTES);
}
@Override
public HashCode hashLong(long input) {
int low = (int) input;
int high = (int) (input >>> 32);
int k1 = mixK1(low);
int h1 = mixH1(seed, k1);
k1 = mixK1(high);
h1 = mixH1(h1, k1);
return fmix(h1, Longs.BYTES);
}
// TODO(user): Maybe implement #hashBytes instead?
@Override
public HashCode hashUnencodedChars(CharSequence input) {
int h1 = seed;
// step through the CharSequence 2 chars at a time
for (int i = 1; i < input.length(); i += 2) {
int k1 = input.charAt(i - 1) | (input.charAt(i) << 16);
k1 = mixK1(k1);
h1 = mixH1(h1, k1);
}
// deal with any remaining characters
if ((input.length() & 1) == 1) {
int k1 = input.charAt(input.length() - 1);
k1 = mixK1(k1);
h1 ^= k1;
}
return fmix(h1, Chars.BYTES * input.length());
}
private static int mixK1(int k1) {
k1 *= C1;
k1 = Integer.rotateLeft(k1, 15);
k1 *= C2;
return k1;
}
private static int mixH1(int h1, int k1) {
h1 ^= k1;
h1 = Integer.rotateLeft(h1, 13);
h1 = h1 * 5 + 0xe6546b64;
return h1;
}
// Finalization mix - force all bits of a hash block to avalanche
private static HashCode fmix(int h1, int length) {
h1 ^= length;
h1 ^= h1 >>> 16;
h1 *= 0x85ebca6b;
h1 ^= h1 >>> 13;
h1 *= 0xc2b2ae35;
h1 ^= h1 >>> 16;
return HashCode.fromInt(h1);
}
private static final class Murmur3_32Hasher extends AbstractStreamingHasher {
private static final int CHUNK_SIZE = 4;
private int h1;
private int length;
Murmur3_32Hasher(int seed) {
super(CHUNK_SIZE);
this.h1 = seed;
this.length = 0;
}
@Override
protected void process(ByteBuffer bb) {
int k1 = Murmur3_32HashFunction.mixK1(bb.getInt());
h1 = Murmur3_32HashFunction.mixH1(h1, k1);
length += CHUNK_SIZE;
}
@Override
protected void processRemaining(ByteBuffer bb) {
length += bb.remaining();
int k1 = 0;
for (int i = 0; bb.hasRemaining(); i += 8) {
k1 ^= toInt(bb.get()) << i;
}
h1 ^= Murmur3_32HashFunction.mixK1(k1);
}
@Override
public HashCode makeHash() {
return Murmur3_32HashFunction.fmix(h1, length);
}
}
private static final long serialVersionUID = 0L;
}

View File

@ -0,0 +1,105 @@
/*
* Copyright (C) 2011 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.common.hash;
import java.nio.charset.Charset;
import com.google.common.annotations.Beta;
/**
* An object which can receive a stream of primitive values.
*
* @author Kevin Bourrillion
* @since 12.0 (in 11.0 as {@code Sink})
*/
@Beta
public interface PrimitiveSink {
/**
* Puts a byte into this sink.
*
* @param b a byte
* @return this instance
*/
PrimitiveSink putByte(byte b);
/**
* Puts an array of bytes into this sink.
*
* @param bytes a byte array
* @return this instance
*/
PrimitiveSink putBytes(byte[] bytes);
/**
* Puts a chunk of an array of bytes into this sink. {@code bytes[off]} is the
* first byte written, {@code bytes[off + len - 1]} is the last.
*
* @param bytes a byte array
* @param off the start offset in the array
* @param len the number of bytes to write
* @return this instance
* @throws IndexOutOfBoundsException if {@code off < 0} or
* {@code off + len > bytes.length} or
* {@code len < 0}
*/
PrimitiveSink putBytes(byte[] bytes, int off, int len);
/**
* Puts a short into this sink.
*/
PrimitiveSink putShort(short s);
/**
* Puts an int into this sink.
*/
PrimitiveSink putInt(int i);
/**
* Puts a long into this sink.
*/
PrimitiveSink putLong(long l);
/**
* Puts a float into this sink.
*/
PrimitiveSink putFloat(float f);
/**
* Puts a double into this sink.
*/
PrimitiveSink putDouble(double d);
/**
* Puts a boolean into this sink.
*/
PrimitiveSink putBoolean(boolean b);
/**
* Puts a character into this sink.
*/
PrimitiveSink putChar(char c);
/**
* Puts each 16-bit code unit from the {@link CharSequence} into this sink.
*
* @since 15.0 (since 11.0 as putString(CharSequence))
*/
PrimitiveSink putUnencodedChars(CharSequence charSequence);
/**
* Puts a string into this sink using the given charset.
*/
PrimitiveSink putString(CharSequence charSequence, Charset charset);
}

View File

@ -0,0 +1,182 @@
/*
* Copyright (C) 2012 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
/*
* SipHash-c-d was designed by Jean-Philippe Aumasson and Daniel J. Bernstein and is described in
* "SipHash: a fast short-input PRF" (available at https://131002.net/siphash/siphash.pdf).
*/
package com.google.common.hash;
import static com.google.common.base.Preconditions.checkArgument;
import java.io.Serializable;
import java.nio.ByteBuffer;
import javax.annotation.Nullable;
/**
* {@link HashFunction} implementation of SipHash-c-d.
*
* @author Kurt Alfred Kluever
* @author Jean-Philippe Aumasson
* @author Daniel J. Bernstein
*/
final class SipHashFunction extends AbstractStreamingHashFunction implements Serializable {
// The number of compression rounds.
private final int c;
// The number of finalization rounds.
private final int d;
// Two 64-bit keys (represent a single 128-bit key).
private final long k0;
private final long k1;
/**
* @param c the number of compression rounds (must be positive)
* @param d the number of finalization rounds (must be positive)
* @param k0 the first half of the key
* @param k1 the second half of the key
*/
SipHashFunction(int c, int d, long k0, long k1) {
checkArgument(c > 0, "The number of SipRound iterations (c=%s) during Compression must be positive.", c);
checkArgument(d > 0, "The number of SipRound iterations (d=%s) during Finalization must be positive.", d);
this.c = c;
this.d = d;
this.k0 = k0;
this.k1 = k1;
}
@Override
public int bits() {
return 64;
}
@Override
public Hasher newHasher() {
return new SipHasher(c, d, k0, k1);
}
// TODO(user): Implement and benchmark the hashFoo() shortcuts.
@Override
public String toString() {
return "Hashing.sipHash" + c + "" + d + "(" + k0 + ", " + k1 + ")";
}
@Override
public boolean equals(@Nullable Object object) {
if (object instanceof SipHashFunction) {
SipHashFunction other = (SipHashFunction) object;
return (c == other.c) && (d == other.d) && (k0 == other.k0) && (k1 == other.k1);
}
return false;
}
@Override
public int hashCode() {
return (int) (getClass().hashCode() ^ c ^ d ^ k0 ^ k1);
}
private static final class SipHasher extends AbstractStreamingHasher {
private static final int CHUNK_SIZE = 8;
// The number of compression rounds.
private final int c;
// The number of finalization rounds.
private final int d;
// Four 64-bit words of internal state.
// The initial state corresponds to the ASCII string
// "somepseudorandomlygeneratedbytes",
// big-endian encoded. There is nothing special about this value; the only
// requirement
// was some asymmetry so that the initial v0 and v1 differ from v2 and v3.
private long v0 = 0x736f6d6570736575L;
private long v1 = 0x646f72616e646f6dL;
private long v2 = 0x6c7967656e657261L;
private long v3 = 0x7465646279746573L;
// The number of bytes in the input.
private long b = 0;
// The final 64-bit chunk includes the last 0 through 7 bytes of m followed by
// null bytes
// and ending with a byte encoding the positive integer b mod 256.
private long finalM = 0;
SipHasher(int c, int d, long k0, long k1) {
super(CHUNK_SIZE);
this.c = c;
this.d = d;
this.v0 ^= k0;
this.v1 ^= k1;
this.v2 ^= k0;
this.v3 ^= k1;
}
@Override
protected void process(ByteBuffer buffer) {
b += CHUNK_SIZE;
processM(buffer.getLong());
}
@Override
protected void processRemaining(ByteBuffer buffer) {
b += buffer.remaining();
for (int i = 0; buffer.hasRemaining(); i += 8) {
finalM ^= (buffer.get() & 0xFFL) << i;
}
}
@Override
public HashCode makeHash() {
// End with a byte encoding the positive integer b mod 256.
finalM ^= b << 56;
processM(finalM);
// Finalization
v2 ^= 0xFFL;
sipRound(d);
return HashCode.fromLong(v0 ^ v1 ^ v2 ^ v3);
}
private void processM(long m) {
v3 ^= m;
sipRound(c);
v0 ^= m;
}
private void sipRound(int iterations) {
for (int i = 0; i < iterations; i++) {
v0 += v1;
v2 += v3;
v1 = Long.rotateLeft(v1, 13);
v3 = Long.rotateLeft(v3, 16);
v1 ^= v0;
v3 ^= v2;
v0 = Long.rotateLeft(v0, 32);
v2 += v1;
v0 += v3;
v1 = Long.rotateLeft(v1, 17);
v3 = Long.rotateLeft(v3, 21);
v1 ^= v2;
v3 ^= v0;
v2 = Long.rotateLeft(v2, 32);
}
}
}
private static final long serialVersionUID = 0L;
}

View File

@ -0,0 +1,27 @@
/*
* Copyright (C) 2011 The Guava Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
// TODO(user): when things stabilize, flesh this out
/**
* Hash functions and related structures.
*
* <p>
* See the Guava User Guide article on
* <a href= "http://code.google.com/p/guava-libraries/wiki/HashingExplained">
* hashing</a>.
*/
@ParametersAreNonnullByDefault
package com.google.common.hash;
import javax.annotation.ParametersAreNonnullByDefault;