29 #ifndef HYPERTABLE_BLOOM_FILTER_WITH_CHECKSUM_H
30 #define HYPERTABLE_BLOOM_FILTER_WITH_CHECKSUM_H
53 template <
class HasherT = MurmurHash2>
63 float false_positive_prob) {
72 "Num elements=%lu false_positive_prob=%.3f",
73 (
Lu)items_estimate, false_positive_prob);
98 m_num_bits = (size_t)((
double)items_estimate * (double)bits_per_item);
101 (
Lu)items_estimate, bits_per_item);
122 int64_t length,
size_t num_hashes) {
130 "Estimated items=%lu actual items=%lu length=%lld num hashes=%lu",
131 (
Lu)items_estimate, (
Lu)items_actual, (
Lld)length, (
Lu)num_hashes);
162 void insert(
const void *key,
size_t len) {
167 m_bloom_bits[hash / CHAR_BIT] |= (1 << (hash % CHAR_BIT));
177 insert(key.c_str(), key.length());
199 byte_mask = (1 << (hash % CHAR_BIT));
201 if ((byte & byte_mask) == 0) {
223 uint8_t *ptr = buf.
base;
246 if (stored_checksum != computed_checksum)
322 #endif // HYPERTABLE_BLOOM_FILTER_WITH_CHECKSUM_H
size_t get_num_hashes()
Getter for the number of hash functions.
size_t get_items_actual()
Getter for the actual number of items.
A memory buffer of static size.
Retrieves system information (hardware, installation directory, etc)
#define HT_IO_ALIGNMENT_PADDING(size)
void set(uint8_t *data, uint32_t len, bool take_ownership=true)
Sets data pointer; the existing buffer is discarded and deleted.
MurmurHash2 digest routine.
Abstract base class for a filesystem.
size_t get_items_estimate()
Getter for the estimated number of items.
std::string String
A String is simply a typedef to std::string.
bool may_contain(const String &key) const
Overloaded may_contain function for Strings.
A space-efficent probabilistic set for membership test, false postives are possible, but false negatives are not.
size_t m_num_bits
Number of bits.
uint32_t decode_i32(const uint8_t **bufp, size_t *remainp)
Decode a 32-bit integer in little-endian order.
size_t m_num_hash_functions
Number of hash functions.
uint32_t fletcher32(const void *data8, size_t len8)
Compute fletcher32 checksum for arbitary data.
void serialize(StaticBuffer &buf)
Serializes the BloomFilter into a static memory buffer.
size_t total_size()
Getter for the total size (including checksum and metadata)
BasicBloomFilterWithChecksum(size_t items_estimate, size_t items_actual, int64_t length, size_t num_hashes)
Alternative constructor.
Logging routines and macros.
void encode_i32(uint8_t **bufp, uint32_t val)
Encode a 32-bit integer in little-endian order.
size_t m_items_estimate
Estimated number of items.
void insert(const void *key, size_t len)
Inserts a new blob into the hash.
Functions to serialize/deserialize primitives to/from a memory buffer.
uint8_t * m_bloom_base
The serialized bloom filter data, including metadata and checksums.
A memory buffer of static size.
float m_false_positive_prob
Probability of returning a false positive.
BasicBloomFilterWithChecksum(size_t items_estimate, float false_positive_prob)
Constructor.
size_t get_length_bits()
Getter for the number of bits.
size_t size()
Getter for the bloom filter size.
long long int Lld
Shortcut for printf formats.
Implementation of checksum routines.
~BasicBloomFilterWithChecksum()
Destructor; releases resources.
HasherT m_hasher
The hash function implementation.
uint8_t * m_bloom_bits
The actual bloom filter bit-array.
void validate(String &filename)
Validates the checksum of the BloomFilter.
#define HT_THROWF(_code_, _fmt_,...)
size_t m_items_actual
Actual number of items.
BasicBloomFilterWithChecksum(size_t items_estimate, float bits_per_item, size_t num_hashes)
Alternative constructor.
void insert(const String &key)
Overloaded insert function for Strings.
long unsigned int Lu
Shortcut for printf formats.
BasicBloomFilterWithChecksum BloomFilterWithChecksum
String extensions and helpers: sets, maps, append operators etc.
#define HT_THROW(_code_, _msg_)
bool may_contain(const void *key, size_t len) const
Checks if the data set "may" contain the key.
size_t m_num_bytes
Number of bytes (approx.
uint8_t * base()
Getter for the serialized bloom filter data, including metadata and checksums.