25 #include <boost/algorithm/string.hpp> 
   26 #include <boost/scoped_array.hpp> 
   55   const uint32_t MAX_APPENDS_OUTSTANDING = 3;
 
   56   const uint16_t BLOCK_HEADER_FORMAT = 0;
 
   61   : m_filesys(filesys) {
 
   63   assert(
sizeof(
float) == 4);
 
   67   : m_filesys(filesys), m_schema(schema) {
 
   69   assert(
sizeof(
float) == 4);
 
  112     return make_shared<CellStoreScanner<CellStoreBlockIndexArray<int64_t>>>(shared_from_this(), scan_ctx, need_index ? &
m_index_map64 : 0);
 
  113   return make_shared<CellStoreScanner<CellStoreBlockIndexArray<uint32_t>>>(shared_from_this(), scan_ctx, need_index ? &
m_index_map32 : 0);
 
  120   int32_t replication = props->get_i32(
"replication", int32_t(-1));
 
  121   int64_t blocksize = props->get(
"blocksize", 0);
 
  128   if (replication == -1 && 
Config::has(
"Hypertable.RangeServer.CellStore.DefaultReplication"))
 
  129     replication = Config::get_i32(
"Hypertable.RangeServer.CellStore.DefaultReplication");
 
  132     blocksize = Config::get_i32(
"Hypertable.RangeServer.CellStore" 
  133                                 ".DefaultBlockSize");
 
  134   if (compressor.empty())
 
  135     compressor = Config::get_str(
"Hypertable.RangeServer.CellStore" 
  136                                  ".DefaultCompressor");
 
  137   if (!props->has(
"bloom-filter-mode")) {
 
  140         ".CellStore.DefaultBloomFilter"), props);
 
  163   for (
size_t i=0; i<column_family_specs.size(); i++) {
 
  164     if (column_family_specs[i]->get_option_ttl()) {
 
  169       m_column_ttl[ column_family_specs[i]->get_id() ] = column_family_specs[i]->get_option_ttl() * 1000000000LL;
 
  192     bool has_num_hashes = props->has(
"num-hashes");
 
  193     bool has_bits_per_item = props->has(
"bits-per-item");
 
  195     if (has_num_hashes || has_bits_per_item) {
 
  196       if (!(has_num_hashes && has_bits_per_item)) {
 
  197         HT_WARN(
"Bloom filter option --bits-per-item must be used with " 
  198                 "--num-hashes, defaulting to false probability of 0.01");
 
  211       <<
" max-approx-items="<< m_max_approx_items <<
" false-positive=" 
  219   HT_DEBUG_OUT << 
"Creating new BloomFilter for CellStore '" 
  220     << 
m_filename <<
"' for "<< (is_approx ? 
"estimated " : 
"")
 
  232     HT_FATAL_OUT << 
"Error creating new BloomFilter for CellStore '" 
  233                  << m_filename <<
"' for "<< (is_approx ? 
"estimated " : 
"")
 
  234                  << m_trailer.filter_items_estimate << 
" items - "<< e << 
HT_END;
 
  237   for (
const auto &blob : *m_bloom_filter_items)
 
  238     m_bloom_filter->insert(blob.start, blob.size);
 
  240   delete m_bloom_filter_items;
 
  241   m_bloom_filter_items = 0;
 
  243   HT_DEBUG_OUT << 
"Created new BloomFilter for CellStore '" 
  244     << m_filename <<
"'"<< 
HT_END;
 
  248 void CellStoreV4::load_bloom_filter() {
 
  263     HT_FATAL_OUT << 
"Error loading BloomFilter for CellStore '" 
  265                  << 
" items -"<< e << 
HT_END;
 
  270     bool second_try = 
false;
 
  290                 "CellStore '%s' : tried to read %lld but only got %lld",
 
  306   uint64_t memory_purged = 0;
 
  326   return memory_purged;
 
  363              "Problem writing to FS file '%s' : %s", 
m_filename.c_str(),
 
  366                   "Problem writing to FS file '%s'", 
m_filename.c_str());
 
  376     size_t zlen = zbuf.
fill();
 
  392   size_t value_len = value.
length();
 
  409       m_bloom_filter_items->insert(key.
row, key.
row_len);
 
  412         m_bloom_filter_items->insert(key.
row, key.
row_len + 2);
 
  418           HT_INFOF(
"max_entries = %lld, max_approx_items = %lld, bloom_filter_items_size = %lld",
 
  445   int64_t index_memory = 0;
 
  466                   "Problem finalizing CellStore file '%s' : %s",
 
  541     if (m_bloom_filter_items && m_bloom_filter_items->size() > 0) {
 
  633   if (!
m_bigint && offset >= 4294967296LL) {
 
  636     uint8_t *dst = tmp_buf.
base;
 
  650   size_t key_len = key_compressor->length_uncompressed();
 
  673   base = m_fixed.release(&len);
 
  674   m_fixed.reserve(len);
 
  675   m_fixed.add_unchecked(base, len);
 
  678   base = m_variable.release(&len);
 
  679   m_variable.reserve(len);
 
  680   m_variable.add_unchecked(base, len);
 
  688                   const String &end_row, int32_t fd, int64_t file_length,
 
  717               "Bad index offsets in CellStore trailer fd=%u fix=%lld, var=%lld, " 
  725   int64_t amount, index_amount;
 
  729   bool inflating_fixed=
true;
 
  730   bool second_try = 
false;
 
  749                 "CellStore '%s' : tried to read %lld but only got %lld",
 
  757     inflating_fixed = 
false;
 
  766     vbuf.
ptr = buf.
ptr + amount;
 
  777     if (inflating_fixed) {
 
  778       msg = 
String(
"Error inflating FIXED index for cellstore '")
 
  783       msg = 
"Error inflating VARIABLE index for cellstore '" + 
m_filename + 
"'";
 
  787         << index_amount << 
")\n" << 
HT_END;
 
  832         size_t rowlen = scan_ctx->
start_row.length();
 
  833         boost::scoped_array<char> rowcol(
new char[rowlen + 2]);
 
  834         memcpy(rowcol.get(), scan_ctx->
start_row.c_str(), rowlen + 1);
 
  837           uint8_t column_family_id = schema->get_column_family(col)->get_id();
 
  838           rowcol[rowlen + 1] = column_family_id;
 
  846       HT_ASSERT(!
"unpossible bloom filter mode!");
 
  879   return BLOCK_HEADER_FORMAT;
 
size_t get_num_hashes()
Getter for the number of hash functions. 
void free()
Frees resources. 
size_t get_items_actual()
Getter for the actual number of items. 
uint64_t bloom_filter_access_counter
static const char INDEX_FIXED_BLOCK_MAGIC[10]
#define HT_THROW2F(_code_, _ex_, _fmt_,...)
A memory buffer of static size. 
Retrieves system information (hardware, installation directory, etc) 
virtual void clear()
Clears the contents of this trailer;. 
virtual void append(int fd, StaticBuffer &buffer, Flags flags, DispatchHandler *handler)=0
Appends data to a file asynchronously. 
Abstract base class for cell store trailer. 
virtual void close(int fd, DispatchHandler *handler)=0
Closes a file asynchronously. 
#define HT_IO_ALIGNMENT_PADDING(size)
void create_bloom_filter(bool is_approx=false)
virtual void pread(int fd, size_t amount, uint64_t offset, bool verify_checksum, DispatchHandler *handler)=0
Reads data from a file at the specified position asynchronously. 
static const char INDEX_VARIABLE_BLOCK_MAGIC[10]
BloomFilterMode m_bloom_filter_mode
static int32_t response_code(const Event *event)
Returns the response code from an event event generated in response to a request message. 
PropertiesPtr properties
This singleton map stores all options. 
std::string String
A String is simply a typedef to std::string. 
#define HT_IO_ALIGNED(size)
DynamicBuffer & variable_buf()
static String string_format_message(const Event *event)
Returns error message decoded standard error MESSAGE generated in response to a request message...
String format(const char *fmt,...)
Returns a String using printf like format facilities Vanilla snprintf is about 1.5x faster than this...
BlockCompressionCodec * m_compressor
std::shared_ptr< KeyCompressor > KeyCompressorPtr
long long unsigned int Llu
Shortcut for printf formats. 
KeyDecompressor * create_key_decompressor() override
Creates a key decompressor suitable for decompressing the keys stored in this cell store...
Declarations for CellStoreScanner. 
static void parse_bloom_filter(const std::string &spec, PropertiesPtr &props)
Parsers a bloom filter specification and sets properties. 
std::shared_ptr< Event > EventPtr
Smart pointer to Event. 
float m_bloom_bits_per_item
Scan context information. 
BlockCompressionCodec::Args m_compressor_args
CellStoreBlockIndexArray< uint32_t > m_index_map32
Type
Enumeration for compression type. 
uint8_t * ptr
Pointer to the end of the used part of the buffer. 
KeyCompressorPtr m_key_compressor
bool wait_for_reply(EventPtr &event)
This method is used by a client to synchronize. 
A dynamic, resizable and reference counted memory buffer. 
uint32_t decode_i32(const uint8_t **bufp, size_t *remainp)
Decode a 32-bit integer in little-endian order. 
bool has(const String &name)
Check existence of a configuration value. 
int64_t m_max_approx_items
A class managing one or more serializable ByteStrings. 
virtual size_t size()
Returns the serialized size of the trailer. 
float m_filter_false_positive_prob
void serialize(StaticBuffer &buf)
Serializes the BloomFilter into a static memory buffer. 
IndexMemoryStats m_index_stats
virtual void serialize(uint8_t *buf)
Serializes this trailer to the given buffer;. 
static BlockCompressionCodec::Type parse_block_codec_spec(const std::string &spec, BlockCompressionCodec::Args &args)
Given a block codec config string return its type and put config. 
size_t total_size()
Getter for the total size (including checksum and metadata) 
static Hypertable::MemoryTracker * memory_tracker
uint64_t block_index_access_counter
CellStoreBlockIndexArray< int64_t > m_index_map64
bool may_contain(const void *ptr, size_t len)
static uint64_t access_counter
std::shared_ptr< Properties > PropertiesPtr
uint32_t size
The size of the allocated memory buffer (base) 
Logging routines and macros. 
BloomFilterMode
Enumeration for bloom filter modes. 
void add_entry(KeyCompressorPtr &key_compressor, int64_t offset)
Compatibility Macros for C/C++. 
int64_t block_index_memory
void insert(const void *key, size_t len)
Inserts a new blob into the hash. 
void encode_i64(uint8_t **bufp, uint64_t val)
Encode a 64-bit integer in little-endian order. 
static const int64_t TIMESTAMP_NULL
size_t length() const 
Retrieves the length of the serialized string. 
uint8_t bloom_filter_mode
static BlockCompressionCodec * create_block_codec(BlockCompressionCodec::Type, const BlockCompressionCodec::Args &args=BlockCompressionCodec::Args())
int64_t filter_items_estimate
void add(const Key &key, const ByteString value) override
Inserts a key/value pair into the cell list. 
Time related declarations. 
size_t get_length_bits()
Getter for the number of bits. 
void create(const char *fname, size_t max_entries, PropertiesPtr &props, const TableIdentifier *table_id=0) override
Creates a new cell store. 
BloomFilterItems * m_bloom_filter_items
bool own
If true then the buffer (base) will be released when going out of scope; if false then the caller has...
const uint8_t * ptr
The pointer to the serialized data. 
void finalize(TableIdentifier *table_identifier) override
Finalizes the creation of a cell store, by writing block index and metadata trailer. 
long long int Lld
Shortcut for printf formats. 
void load(DynamicBuffer &fixed, DynamicBuffer &variable, int64_t end_of_data, const String &start_row="", const String &end_row="")
void clear()
Clears the buffer. 
uint32_t table_generation
CellListScannerPtr create_scanner(ScanContext *scan_ctx) override
Creates a scanner on this cell list. 
uint32_t m_outstanding_appends
DispatchHandlerSynchronizer m_sync_handler
void validate(String &filename)
Validates the checksum of the BloomFilter. 
Declarations for Protocol. 
#define HT_INFOF(msg,...)
#define HT_THROWF(_code_, _fmt_,...)
Provides access to internal components of opaque key. 
uint8_t * base
Pointer to the allocated memory buffer. 
CellStoreV4(Filesystem *filesys)
size_t fill() const 
Returns the size of the used portion. 
virtual void deflate(const DynamicBuffer &input, DynamicBuffer &output, BlockHeader &header, size_t reserve=0)=0
Compresses a buffer. 
DynamicBuffer & fixed_buf()
static const char DATA_BLOCK_MAGIC[10]
Request/response message event. 
void subtract(int64_t amount)
Subtract to memory used. 
float m_uncompressed_data
This is a generic exception class for Hypertable. 
CellStoreTrailerV4 m_trailer
BlockCompressionCodec * create_block_compression_codec() override
Creates a block compression codec suitable for decompressing the cell store's blocks. 
uint64_t purge_indexes() override
Purges bloom filter and block indexes. 
uint8_t * release(size_t *lenp=0)
Moves ownership of the buffer to the caller. 
std::shared_ptr< Schema > SchemaPtr
Smart pointer to Schema. 
uint8_t column_family_code
std::shared_ptr< CellListScanner > CellListScannerPtr
BloomFilterWithChecksum * m_bloom_filter
virtual void open(const String &name, uint32_t flags, DispatchHandler *handler)=0
Opens a file asynchronously. 
uint16_t key_compression_scheme
uint8_t bloom_filter_hash_count
std::vector< ColumnFamilySpec * > ColumnFamilySpecs
Vector of ColumnFamilySpec pointers. 
BasicBloomFilterWithChecksum BloomFilterWithChecksum
int64_t bloom_filter_memory
BlobHashSet BloomFilterItems
void display_block_info() override
Displays block information to stdout. 
virtual void inflate(const DynamicBuffer &input, DynamicBuffer &output, BlockHeader &header)=0
Decompresses a buffer. 
Error codes, Exception handling, error logging. 
#define HT_THROW(_code_, _msg_)
static const char * END_ROW_MARKER
int64_t m_uncompressed_blocksize
void add(int64_t amount)
Add to memory used. 
bool may_contain(const void *key, size_t len) const 
Checks if the data set "may" contain the key. 
void ensure(size_t len)
Ensure space for additional data Will grow the space to 1.5 of the needed space with existing data un...
static int get_next_file_id()
uint8_t * add_unchecked(const void *data, size_t len)
Adds additional data without boundary checks. 
uint16_t block_header_format() override
virtual void create(const String &name, uint32_t flags, int32_t bufsz, int32_t replication, int64_t blksz, DispatchHandler *handler)=0
Creates a file asynchronously. 
uint16_t compression_type
uint8_t * base()
Getter for the serialized bloom filter data, including metadata and checksums. 
int64_t filter_items_actual
int64_t get_ts64()
Returns the current time in nanoseconds as a 64bit number. 
#define HT_DIRECT_IO_ALIGNMENT
Abstract base class for block compression codecs. 
Abstract base class for a filesystem. 
void open(const String &fname, const String &start_row, const String &end_row, int32_t fd, int64_t file_length, CellStoreTrailer *trailer) override
Opens a cell store with possibly a restricted view. 
int code() const 
Returns the error code. 
IndexBuilder m_index_builder
void reserve(size_t len, bool nocopy=false)
Reserve space for additional data Will grow the space to exactly what's needed. 
#define HT_THROW2(_code_, _ex_, _msg_)