57 m_location(rsc->location()), m_rsc(rsc), m_restart(flags==RESTART) {
74 vector<MetaLog::EntityPtr> removable_move_ops;
78 HT_INFOF(
"Entering RecoverServer %s state=%s this=%p",
106 m_rsc->set_recovering(
false);
123 m_rsc->set_recovering(
true);
129 m_context->conn_manager->remove(comm_addr);
132 comm_addr.set_proxy(
m_rsc->location());
133 m_context->conn_manager->remove(comm_addr);
142 subject =
format(
"NOTICE: Recovery of %s (%s) starting",
144 message =
format(
"Failure of range server %s (%s) has been detected. "
148 m_context->notification_hook(subject, message);
156 subject =
format(
"ERROR: Recovery of %s (%s)",
158 message =
format(
"Problem reading RSML - %s - %s",
160 time_t last_notification = 0;
162 time_t now = time(0);
163 int32_t notify_interval =
164 m_context->props->get_i32(
"Hypertable.Master.NotificationInterval");
165 if (last_notification + notify_interval <= now) {
166 m_context->notification_hook(subject, message);
167 last_notification = time(0);
170 this_thread::sleep_for(chrono::milliseconds(30000));
186 HT_INFOF(
"Number of root ranges to recover for location %s = %u",
192 HT_INFOF(
"Number of metadata ranges to recover for location %s = %u",
198 HT_INFOF(
"Number of system ranges to recover for location %s = %d",
204 HT_INFOF(
"Number of user ranges to recover for location %s = %d",
228 std::vector<MetaLog::EntityPtr> additional;
229 m_rsc->mark_for_removal();
230 additional.push_back(
m_rsc);
237 subject =
format(
"NOTICE: Recovery of %s (%s) succeeded",
239 message =
format(
"Recovery of range server %s (%s) has succeeded.",
241 m_context->notification_hook(subject, message);
245 HT_FATALF(
"Unrecognized state %d", state);
249 HT_INFOF(
"Leaving RecoverServer %s state=%s this=%p",
273 handle =
m_context->hyperspace->open(fname, oflags);
279 HT_INFOF(
"Couldn't obtain lock on '%s' due to conflict, lock_status=0x%x",
280 fname.c_str(), lock_status);
283 subject =
format(
"NOTICE: Recovery of %s (%s) aborted",
285 message =
format(
"Aborting recovery of range server %s (%s) because "
289 m_context->notification_hook(subject, message);
296 m_context->hyperspace->attr_set(handle,
"removed",
"", 0);
302 HT_INFOF(
"Acquired lock on '%s', starting recovery...", fname.c_str());
306 <<
" hyperspace lock (" << e <<
"), aborting..." <<
HT_END;
349 = make_shared<MetaLog::DefinitionRangeServer>(
m_location.c_str());
352 vector<MetaLog::EntityPtr> entities;
356 + rsml_definition->name();
358 make_shared<MetaLog::Reader>(
m_context->dfs, rsml_definition, logfile);
360 rsml_reader->get_entities(entities);
363 for (
auto &entity : entities) {
364 if ((range = dynamic_cast<MetaLogEntityRange *>(entity.get())) != 0) {
368 std::stringstream sout;
372 sout <<
"Skipping PHANTOM range " << *range;
379 range->get_table_identifier(table);
380 if (!table.
is_system() && valid_tables.count(table.
id) == 0) {
381 if (missing_tables.count(table.
id) == 0) {
382 if (!
m_context->namemap->id_to_name(table.
id, tablename))
383 missing_tables.insert(table.
id);
385 valid_tables.insert(table.
id);
387 if (missing_tables.count(table.
id) != 0) {
388 sout <<
"Range " << *range <<
": table does not exist, skipping ...";
394 sout <<
"Range " << *range <<
": not PHANTOM; including";
400 range->get_table_identifier(spec.
table);
401 range->get_range_spec(range_spec);
402 range->get_range_state(range_state);
404 spec.
range = range_spec;
424 else if ((ack_task = dynamic_cast<MetaLog::EntityTaskAcknowledgeRelinquish *>(entity.get())) != 0) {
426 make_shared<OperationRelinquishAcknowledge>(
m_context,
431 operation->execute();
442 vector<MetaLog::EntityPtr> &removable_move_ops) {
443 String start_row, end_row;
446 bool split_off_high = split_row.compare(old_boundary_row) < 0;
454 if (split_off_high) {
469 if (operation->remove_approval_add(0x01)) {
470 m_context->remove_move_operation(operation);
471 removable_move_ops.push_back(operation);
531 for (
int ii = 0; ii < nn; ++ii) {
532 spec.
decode(bufp, remainp);
534 state.
decode(bufp, remainp);
538 for (
int ii = 0; ii < nn; ++ii) {
539 spec.
decode(bufp, remainp);
541 state.
decode(bufp, remainp);
545 for (
int ii = 0; ii < nn; ++ii) {
546 spec.
decode(bufp, remainp);
548 state.
decode(bufp, remainp);
552 for (
int ii = 0; ii < nn; ++ii) {
553 spec.
decode(bufp, remainp);
555 state.
decode(bufp, remainp);
568 for (
int ii = 0; ii < nn; ++ii) {
575 for (
int ii = 0; ii < nn; ++ii) {
582 for (
int ii = 0; ii < nn; ++ii) {
589 for (
int ii = 0; ii < nn; ++ii) {
std::set< String > StringSet
STL Set managing Strings.
char * decode_vstr(const uint8_t **bufp, size_t *remainp)
Decode a vstr (vint64, data, null).
void decode_state_old(uint8_t version, const uint8_t **bufp, size_t *remainp) override
Lock successfully granted.
#define HT_WARNF(msg,...)
The FailureInducer simulates errors.
vector< RangeState > m_system_states
ContextPtr m_context
Pointer to Master context.
int64_t md5_hash(const char *input)
Returns a 64-bit hash checksum of a null terminated input buffer.
std::shared_ptr< RangeServerConnection > RangeServerConnectionPtr
std::string String
A String is simply a typedef to std::string.
String format(const char *fmt,...)
Returns a String using printf like format facilities Vanilla snprintf is about 1.5x faster than this...
bool validate_subops()
Handles the results of sub operations.
virtual size_t encoded_length() const
Returns serialized object length.
vector< RangeState > m_user_states
Exclusive lock attempt failed because another has it locked.
virtual int64_t hash_code() const
size_t encoded_length_vstr(size_t len)
Computes the encoded length of vstr (vint64, data, null)
#define HT_ON_SCOPE_EXIT(...)
const char * get_text(int32_t state)
const char * RECOVER_SERVER
uint32_t decode_i32(const uint8_t **bufp, size_t *remainp)
Decode a 32-bit integer in little-endian order.
Declarations for ReferenceManager.
int64_t m_hash_code
Hash code uniqely identifying operation.
std::shared_ptr< Context > ContextPtr
Smart pointer to Context.
void set_start_row(const std::string &s)
vector< QualifiedRangeSpec > m_root_specs
void remove_recovery_plan(const String &location)
Removes a recovery plan for a failed range server.
void read_rsml(std::vector< MetaLog::EntityPtr > &removable_move_ops)
const String name() override
Name of operation used for exclusivity.
static time_point now() noexcept
const String label() override
Human readable label for operation.
void set_state(int32_t state)
const char * get_text(int error)
Returns a descriptive error message.
void display_state(std::ostream &os) override
Write human readable operation state to output stream.
uint16_t decode_i16(const uint8_t **bufp, size_t *remainp)
Decode a 16-bit integer in little-endian order.
RangeServerConnectionPtr m_rsc
void set_proxy(const String &str)
Sets address type to CommAddress::PROXY and proxy name to p.
void decode_state(uint8_t version, const uint8_t **bufp, size_t *remainp) override
Decode operation state.
void encode_i32(uint8_t **bufp, uint32_t val)
Encode a 32-bit integer in little-endian order.
void handle_split_shrunk(MetaLogEntityRange *range_entity, std::vector< MetaLog::EntityPtr > &removable_move_ops)
bool acquire_server_lock()
void close_handle_ptr(SessionPtr hyperspace, uint64_t *handlep)
Compatibility Macros for C/C++.
void stage_subop(std::shared_ptr< Operation > operation)
Stages a sub operation for execution.
void execute() override
Executes (carries out) the operation.
void record_state()
Records operation state to the MML.
vector< RangeState > m_root_states
OperationRecover(ContextPtr &context, RangeServerConnectionPtr &rsc, int flags=0)
Constructor.
virtual void decode(const uint8_t **bufp, size_t *remainp)
Reads serialized representation of object from a buffer.
virtual bool is_root() const
void create_recovery_plan()
vector< QualifiedRangeSpec > m_system_specs
uint64_t m_hyperspace_handle
const char * RECOVERY_BLOCKER
void encode_vstr(uint8_t **bufp, const void *buf, size_t len)
Encode a buffer as variable length string (vint64, data, null)
#define HT_FATALF(msg,...)
uint8_t encoding_version_state() const override
Returns version of encoding format of state.
std::shared_ptr< Reader > ReaderPtr
Smart pointer to Reader.
DependencySet m_obstructions
Set of obstructions.
vector< RangeState > m_metadata_states
Declarations for general-purpose utility functions.
void create_recovery_plan(const String &location, const vector< QualifiedRangeSpec > &root_specs, const vector< RangeState > &root_states, const vector< QualifiedRangeSpec > &metadata_specs, const vector< RangeState > &metadata_states, const vector< QualifiedRangeSpec > &system_specs, const vector< RangeState > &system_states, const vector< QualifiedRangeSpec > &user_specs, const vector< RangeState > &user_states)
Creates a recovery plan for a failed range server.
void legacy_decode(const uint8_t **bufp, size_t *remainp, BalancePlan *plan)
Central authority for balance plans.
vector< QualifiedRangeSpec > m_metadata_specs
DependencySet m_dependencies
Set of dependencies.
#define HT_INFOF(msg,...)
void clear_server_state()
Abstract base class for master operations.
This is a generic exception class for Hypertable.
Qualified (with table identifier) range specification.
virtual ~OperationRecover()
#define HT_ERRORF(msg,...)
void shutdown_rangeserver(ContextPtr &context, CommAddress &addr)
Sends a shutdown command to a rangeserver.
DependencySet m_exclusivities
Set of exclusivities.
void set_end_row(const std::string &e)
vector< QualifiedRangeSpec > m_user_specs
#define HT_MAYBE_FAIL(_label_)
void complete_ok(std::vector< MetaLog::EntityPtr > &additional)
Declarations for BalancePlanAuthority.
std::shared_ptr< Operation > OperationPtr
Smart pointer to Operation.
Error codes, Exception handling, error logging.
Wrapper for RangeSpec providing member storage.
Address abstraction to hold either proxy name or IPv4:port address.
Range state with memory management.
size_t encoded_length_state() const override
Encoded length of operation state.
std::shared_ptr< Definition > DefinitionPtr
Smart pointer to Definition.
void encode_state(uint8_t **bufp) const override
Encode operation state.
int code() const
Returns the error code.
ClockT::time_point m_expiration_time
Expiration time (used by ResponseManager)
Executes user-defined functions when leaving the current scope.