29 #ifndef HYPERTABLE_BLOOM_FILTER_H
30 #define HYPERTABLE_BLOOM_FILTER_H
49 template <
class HasherT = MurmurHash2>
67 "Num elements=%lu false_positive_prob=%.3f",
68 (
Lu)items_estimate, false_positive_prob);
77 <<
" bits per element=" << double(
m_num_bits) / items_estimate
94 m_num_bits = (size_t)((
double)items_estimate * (double)bits_per_item);
97 (
Lu)items_estimate, bits_per_item);
118 int64_t length,
size_t num_hashes) {
126 "Estimated items=%lu actual items=%lu length=%lld num hashes=%lu",
127 (
Lu)items_estimate, (
Lu)items_actual, (
Lld)length,
159 void insert(
const void *key,
size_t len) {
164 m_bloom_bits[hash / CHAR_BIT] |= (1 << (hash % CHAR_BIT));
174 insert(key.c_str(), key.length());
196 byte_mask = (1 << (hash % CHAR_BIT));
198 if ((byte & byte_mask) == 0) {
290 #endif // HYPERTABLE_BLOOM_FILTER_H
size_t get_num_hashes()
Getter for the number of hash functions.
A memory buffer of static size.
size_t get_items_estimate()
Getter for the estimated number of items.
void set(uint8_t *data, uint32_t len, bool take_ownership=true)
Sets data pointer; the existing buffer is discarded and deleted.
uint8_t * m_bloom_bits
The actual bloom filter bit-array.
MurmurHash2 digest routine.
size_t get_items_actual()
Getter for the actual number of items.
std::string String
A String is simply a typedef to std::string.
~BasicBloomFilter()
Destructor; releases resources.
size_t get_length_bits()
Getter for the number of bits.
uint8_t * ptr()
Getter for the bloom filter data.
size_t m_num_bytes
Number of bytes (approx.
void insert(const String &key)
Overloaded insert function for Strings.
size_t m_items_estimate
Estimated number of items.
float m_false_positive_prob
Probability of returning a false positive.
size_t m_num_bits
Number of bits.
BasicBloomFilter(size_t items_estimate, size_t items_actual, int64_t length, size_t num_hashes)
Alternative constructor.
size_t size()
Getter for the bloom filter size.
BasicBloomFilter(size_t items_estimate, float false_positive_prob)
Constructor.
Logging routines and macros.
BasicBloomFilter BloomFilter
A memory buffer of static size.
bool may_contain(const void *key, size_t len) const
Checks if the data set "may" contain the key.
long long int Lld
Shortcut for printf formats.
size_t m_num_hash_functions
Number of hash functions.
BasicBloomFilter(size_t items_estimate, float bits_per_item, size_t num_hashes)
Alternative constructor.
#define HT_THROWF(_code_, _fmt_,...)
void serialize(StaticBuffer &buf)
Serializes the BloomFilter into a static memory buffer.
size_t m_items_actual
Actual number of items.
long unsigned int Lu
Shortcut for printf formats.
void insert(const void *key, size_t len)
Inserts a new blob into the hash.
A space-efficent probabilistic set for membership test, false postives are possible, but false negatives are not.
String extensions and helpers: sets, maps, append operators etc.
HasherT m_hasher
The hash function implementation.
bool may_contain(const String &key) const
Overloaded may_contain function for Strings.