Skip to content
22 changes: 21 additions & 1 deletion fastfilter/src/main/java/org/fastfilter/Filter.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package org.fastfilter;

import java.nio.ByteBuffer;

/**
* An approximate membership filter.
*/
Expand All @@ -14,7 +16,7 @@ public interface Filter {
boolean mayContain(long key);

/**
* Get the number of bits in thhe filter.
* Get the number of bits in the filter.
*
* @return the number of bits
*/
Expand Down Expand Up @@ -65,4 +67,22 @@ default long cardinality() {
return -1;
}

/**
* Get the serialized size of the filter.
*
* @return the size in bytes
*/
default int getSerializedSize() {
return -1;
}

/**
* Serializes the filter state into the provided {@code ByteBuffer}.
*
* @param buffer the byte buffer where the serialized state of the filter will be written
* @throws UnsupportedOperationException if the operation is not supported by the filter implementation
*/
default void serialize(ByteBuffer buffer) {
throw new UnsupportedOperationException();
}
}
12 changes: 8 additions & 4 deletions fastfilter/src/main/java/org/fastfilter/utils/Hash.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@

import java.util.Random;

public class Hash {
public final class Hash {
private Hash() {

private static Random random = new Random();
}

private static final Random random = new Random();

public static void setSeed(long seed) {
// shouldn't we use ThreadLocalRandom.current() instead?
random.setSeed(seed);
}

Expand All @@ -23,7 +27,7 @@ public static long randomSeed() {
}

/**
* Shrink the hash to a value 0..n. Kind of like modulo, but using
* Shrink the hash to value 0..n. Kind of like modulo, but using
* multiplication and shift, which are faster to compute.
*
* @param hash the hash
Expand All @@ -37,7 +41,7 @@ public static int reduce(int hash, int n) {

/**
* Multiply two unsigned 64-bit values.
* See https://bugs.java.com/bugdatabase/view_bug.do?bug_id=8188044
* See <a href="https://bugs.java.com/bugdatabase/view_bug.do?bug_id=8188044">JDK-8188044</a>
*
* @param a the first value
* @param b the second value
Expand Down
85 changes: 67 additions & 18 deletions fastfilter/src/main/java/org/fastfilter/xor/XorBinaryFuse16.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package org.fastfilter.xor;

import java.lang.reflect.Constructor;
import java.nio.ByteBuffer;
import java.util.Arrays;

import org.fastfilter.Filter;
Expand All @@ -20,27 +22,33 @@ public class XorBinaryFuse16 implements Filter {
private final short[] fingerprints;
private long seed;

public XorBinaryFuse16(int segmentCount, int segmentLength) {
private XorBinaryFuse16(int segmentCount, int segmentLength, long seed, short[] fingerprints) {
if (segmentLength < 0 || Integer.bitCount(segmentLength) != 1) {
throw new IllegalArgumentException("Segment length needs to be a power of 2, is " + segmentLength);
}
if (segmentCount <= 0) {
throw new IllegalArgumentException("Illegal segment count: " + segmentCount);
}
this.segmentLength = segmentLength;

this.segmentCount = segmentCount;
this.segmentLengthMask = segmentLength - 1;
this.segmentCountLength = segmentCount * segmentLength;
this.arrayLength = (segmentCount + ARITY - 1) * segmentLength;
this.fingerprints = new short[arrayLength];
this.segmentLength = segmentLength;
this.segmentLengthMask = segmentLength - 1;
this.arrayLength = fingerprints.length;
this.fingerprints = fingerprints;
this.seed = seed;
}

public XorBinaryFuse16(int segmentCount, int segmentLength) {
this(segmentCount, segmentLength, 0L, new short[(segmentCount + ARITY - 1) * segmentLength]);
}

public long getBitCount() {
return arrayLength * 16L;
}

static int calculateSegmentLength(int arity, int size) {
int segmentLength;
final int segmentLength;
if (arity == 3) {
segmentLength = 1 << (int) Math.floor(Math.log(size) / Math.log(3.33) + 2.11);
} else if (arity == 4) {
Expand All @@ -53,7 +61,7 @@ static int calculateSegmentLength(int arity, int size) {
}

static double calculateSizeFactor(int arity, int size) {
double sizeFactor;
final double sizeFactor;
if (arity == 3) {
sizeFactor = Math.max(1.125, 0.875 + 0.25 * Math.log(1000000) / Math.log(size));
} else if (arity == 4) {
Expand Down Expand Up @@ -143,8 +151,7 @@ private void addAll(long[] keys) {
countMask |= t2count[index];
}
}
startPos = null;
if (countMask < 0) {
if (countMask < 0) {
// we have a possible counter overflow
continue mainloop;
}
Expand Down Expand Up @@ -202,19 +209,14 @@ private void addAll(long[] keys) {
// if construction doesn't succeed eventually,
// then there is likely a problem with the hash function
// let us not crash the system:
for(int i = 0; i < fingerprints.length; i++) {
fingerprints[i] = (short)0xFFFF;
}
Arrays.fill(fingerprints, (short) 0xFFFF);
return;
}
// use a new random numbers
// use a new random number
seed = Hash.randomSeed();
}
alone = null;
t2count = null;
t2hash = null;

for (int i = reverseOrderPos - 1; i >= 0; i--) {
for (int i = reverseOrderPos - 1; i >= 0; i--) {
long hash = reverseOrder[i];
int found = reverseH[i];
short xor2 = fingerprint(hash);
Expand Down Expand Up @@ -261,4 +263,51 @@ private short fingerprint(long hash) {
return (short) hash;
}

}
@Override
public int getSerializedSize() {
return 2 * Integer.BYTES + Long.BYTES + Integer.BYTES + fingerprints.length * Short.BYTES;
}

@Override
public void serialize(ByteBuffer buffer) {
if (buffer.remaining() < getSerializedSize()) {
throw new IllegalArgumentException("Buffer too small");
}

buffer.putInt(segmentLength);
buffer.putInt(segmentCountLength);
buffer.putLong(seed);
buffer.putInt(fingerprints.length);
for (final short fp : fingerprints) {
buffer.putShort(fp);
}
}

public static XorBinaryFuse16 deserialize(ByteBuffer buffer) {
// Check minimum size for header (2 ints + 1 long + 1 int for length)
if (buffer.remaining() < 2 * Integer.BYTES + Long.BYTES + Integer.BYTES) {
throw new IllegalArgumentException("Buffer too small");
}

final int segmentLength = buffer.getInt();
final int segmentCountLength = buffer.getInt();
final long seed = buffer.getLong();

final int len = buffer.getInt();

// Check if buffer has enough bytes for all fingerprints
if (buffer.remaining() < len * Short.BYTES) {
throw new IllegalArgumentException("Buffer too small");
}

final short[] fingerprints = new short[len];
for (int i = 0; i < len; i++) {
fingerprints[i] = buffer.getShort();
}

// Calculate segmentCount from segmentCountLength and segmentLength
final int segmentCount = segmentCountLength / segmentLength;

return new XorBinaryFuse16(segmentCount, segmentLength, seed, fingerprints);
}
}
Loading