-
Notifications
You must be signed in to change notification settings - Fork 478
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[COLLECTIONS-843] Implement Layered Bloom filter (#402)
* Adjusted tests to handle bloom filter implementations that utilized automatic decay. * cleaned up spacing * fixed indent * updated for layered testing * removed spaces * fixed merge issue * initial checkin * cleaned up tests * fixed timing on test * fixed formatting * added javadoc * fixed typos * removed blank lines * fixed javadocs * Fix Javadoc * Add Javadoc @SInCE 4.5 * Add Javadoc @SInCE 4.5 * updated tests and added BloomFilterProducer code * Cleaned up javadoc and BiPredicate<BloomFilter,BloomFilter> processing * fixed javadoc issues * fixed typography issue * Fixed a documentation error * code format cleanup * code simplification and documentation * added isEmpty and associated tests * Changes as requested by review * cleaned up formatting errors * fixed javadoc issues * added LayeredBloomFilter to overview. * added coco driven test cases. * attempt to fix formatting * cleaned up javadoc differences * cleaned up javadoc * Made flatten() part of BloomFilterProducer * fixed since tag. * changed X() methods to setX() * updated javadoc * fixed javadoc errors * merged changes from master * renamed to Test to CellProducerFromLayeredBloomFilterTest * changed to jupiter from junit. * added override for uniqueIndices as optimization. * fixed checkstyle issue * modified as per review * Updated tests as per review * fixed variable initialization issues * made suggested test changes * fixed broken test * Remove dead comments per code reviews --------- Co-authored-by: Gary Gregory <[email protected]>
- Loading branch information
1 parent
3b8dce4
commit 0438ede
Showing
27 changed files
with
2,379 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
143 changes: 143 additions & 0 deletions
143
src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterProducer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.commons.collections4.bloomfilter; | ||
|
||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.Objects; | ||
import java.util.function.BiPredicate; | ||
import java.util.function.Predicate; | ||
|
||
/** | ||
* Produces Bloom filters from a collection (e.g. LayeredBloomFilter). | ||
* | ||
* @since 4.5 | ||
*/ | ||
public interface BloomFilterProducer { | ||
|
||
/** | ||
* Executes a Bloom filter Predicate on each Bloom filter in the collection. The | ||
* ordering of the Bloom filters is not specified by this interface. | ||
* | ||
* @param bloomFilterPredicate the predicate to evaluate each Bloom filter with. | ||
* @return {@code false} when the first filter fails the predicate test. Returns | ||
* {@code true} if all filters pass the test. | ||
*/ | ||
boolean forEachBloomFilter(Predicate<BloomFilter> bloomFilterPredicate); | ||
|
||
/** | ||
* Return an array of the Bloom filters in the collection. | ||
* <p><em>Implementations should specify if the array contains deep copies, immutable instances, | ||
* or references to the filters in the collection.</em></p> | ||
* <p>The default method returns a deep copy of the enclosed filters.</p> | ||
* | ||
* @return An array of Bloom filters. | ||
*/ | ||
default BloomFilter[] asBloomFilterArray() { | ||
final List<BloomFilter> filters = new ArrayList<>(); | ||
forEachBloomFilter(f -> filters.add(f.copy())); | ||
return filters.toArray(new BloomFilter[0]); | ||
} | ||
|
||
/** | ||
* Applies the {@code func} to each Bloom filter pair in order. Will apply all | ||
* of the Bloom filters from the other BloomFilterProducer to this producer. If | ||
* either {@code this} producer or {@code other} producer has fewer BloomFilters | ||
* ths method will provide {@code null} for all excess calls to the {@code func}. | ||
* | ||
* <p><em>This implementation returns references to the Bloom filter. Other implementations | ||
* should specify if the array contains deep copies, immutable instances, | ||
* or references to the filters in the collection.</em></p> | ||
* | ||
* @param other The other BloomFilterProducer that provides the y values in the | ||
* (x,y) pair. | ||
* @param func The function to apply. | ||
* @return {@code true} if the {@code func} returned {@code true} for every pair, | ||
* {@code false} otherwise. | ||
*/ | ||
default boolean forEachBloomFilterPair(final BloomFilterProducer other, | ||
final BiPredicate<BloomFilter, BloomFilter> func) { | ||
final CountingPredicate<BloomFilter> p = new CountingPredicate<>(asBloomFilterArray(), func); | ||
return other.forEachBloomFilter(p) && p.forEachRemaining(); | ||
} | ||
|
||
/** | ||
* Create a standard (non-layered) Bloom filter by merging all of the layers. If | ||
* the filter is empty this method will return an empty Bloom filter. | ||
* | ||
* @return the merged bloom filter. | ||
*/ | ||
default BloomFilter flatten() { | ||
BloomFilter[] bf = {null}; | ||
forEachBloomFilter( x -> { | ||
if (bf[0] == null) { | ||
bf[0] = new SimpleBloomFilter( x.getShape()); | ||
} | ||
return bf[0].merge( x ); | ||
}); | ||
return bf[0]; | ||
} | ||
|
||
/** | ||
* Creates a BloomFilterProducer from an array of Bloom filters. | ||
* | ||
* <ul> | ||
* <li>The asBloomFilterArray() method returns a copy of the original array | ||
* with references to the original filters.</li> | ||
* <li>The forEachBloomFilterPair() method uses references to the original filters.</li> | ||
* </ul> | ||
* <p><em>All modifications to the Bloom filters are reflected in the original filters</em></p> | ||
* | ||
* @param filters The filters to be returned by the producer. | ||
* @return THe BloomFilterProducer containing the filters. | ||
*/ | ||
static BloomFilterProducer fromBloomFilterArray(BloomFilter... filters) { | ||
Objects.requireNonNull(filters, "filters"); | ||
return new BloomFilterProducer() { | ||
@Override | ||
public boolean forEachBloomFilter(final Predicate<BloomFilter> predicate) { | ||
for (final BloomFilter filter : filters) { | ||
if (!predicate.test(filter)) { | ||
return false; | ||
} | ||
} | ||
return true; | ||
} | ||
|
||
/** | ||
* This implementation returns a copy the original array, the contained Bloom filters | ||
* are references to the originals, any modifications to them are reflected in the original | ||
* filters. | ||
*/ | ||
@Override | ||
public BloomFilter[] asBloomFilterArray() { | ||
return filters.clone(); | ||
} | ||
|
||
/** | ||
* This implementation uses references to the original filters. Any modifications to the | ||
* filters are reflected in the originals. | ||
*/ | ||
@Override | ||
public boolean forEachBloomFilterPair(final BloomFilterProducer other, | ||
final BiPredicate<BloomFilter, BloomFilter> func) { | ||
final CountingPredicate<BloomFilter> p = new CountingPredicate<>(filters, func); | ||
return other.forEachBloomFilter(p) && p.forEachRemaining(); | ||
} | ||
}; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
75 changes: 75 additions & 0 deletions
75
src/main/java/org/apache/commons/collections4/bloomfilter/CountingPredicate.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.commons.collections4.bloomfilter; | ||
|
||
import java.util.function.BiPredicate; | ||
import java.util.function.Predicate; | ||
|
||
/** | ||
* A predicate that applies the test {@code func} to each member of the {@code ary} in | ||
* sequence for each call to {@code test()}. if the {@code ary} is exhausted, | ||
* the subsequent calls to {@code test} are executed with a {@code null} value. | ||
* If the calls to {@code test} do not exhaust the {@code ary} the {@code | ||
* forEachRemaining} method can be called to execute the @{code test} with a | ||
* {@code null} value for each remaining {@code idx} value. | ||
* | ||
* @param <T> the type of object being compared. | ||
* @since 4.5 | ||
*/ | ||
class CountingPredicate<T> implements Predicate<T> { | ||
private int idx; | ||
private final T[] ary; | ||
private final BiPredicate<T, T> func; | ||
|
||
/** | ||
* Constructs an instance that will compare the elements in {@code ary} with the | ||
* elements returned by {@code func}. function is called as {@code func.test( | ||
* idxValue, otherValue )}. If there are more {@code otherValue} values than | ||
* {@code idxValues} then {@code func} is called as {@code func.test(null, otherValue)}. | ||
* | ||
* @param ary The array of long values to compare. | ||
* @param func The function to apply to the pairs of long values. | ||
*/ | ||
CountingPredicate(final T[] ary, final BiPredicate<T, T> func) { | ||
this.ary = ary; | ||
this.func = func; | ||
} | ||
|
||
@Override | ||
public boolean test(final T other) { | ||
return func.test(idx == ary.length ? null : ary[idx++], other); | ||
} | ||
|
||
/** | ||
* Call {@code BiPredicate<T, T>} for each remaining unpaired {@code <T>} in the | ||
* input array. This method should be invoked after the predicate has been | ||
* passed to a {@code Producer.forEach<T>(BiPredicate<T, T>)} to consume any | ||
* unpaired {@code <T>}s. The second argument to the BiPredicate will be {@code null}. | ||
* | ||
* @return true if all calls the predicate were successful | ||
*/ | ||
boolean forEachRemaining() { | ||
// uses local references for optimization benefit. | ||
int i = idx; | ||
final T[] a = ary; | ||
final int limit = a.length; | ||
while (i != limit && func.test(a[i], null)) { | ||
i++; | ||
} | ||
return i == limit; | ||
} | ||
} |
Oops, something went wrong.