44 String extract_page(
char *request) {
46 const char *base, *ptr;
47 if (!strncmp(request,
"GET ", 4)) {
49 if ((ptr = strchr(base,
' ')) != 0)
50 retstr =
String(base, ptr-base);
64 String format_timestamp(
struct tm tm) {
65 return format(
"%d-%02d-%02d %02d:%02d:%02d", tm.tm_year+1900, tm.tm_mon+1,
66 tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec);
80 std::vector<std::pair<Cell, int> > failed_mutations;
82 mutator_ptr->get_failed(failed_mutations);
83 if (!failed_mutations.empty()) {
84 for (
size_t i=0; i<failed_mutations.size(); i++) {
85 cerr <<
"Failed: (" << failed_mutations[i].first.row_key <<
","
86 << failed_mutations[i].first.column_family;
87 if (failed_mutations[i].first.column_qualifier)
88 cerr <<
":" << failed_mutations[i].first.column_qualifier;
89 cerr <<
"," << failed_mutations[i].first.timestamp <<
") - "
97 " usage: apache_log_load [--time-order] <file>\n"
99 " Loads the Apache web log <file> into the LogDb\n"
100 " table. By default, the row key is constructed\n"
103 " <page> <timestamp>\n"
105 " This format facilitates queries that return\n"
106 " the click history for a specific page. If\n"
107 " the --time-order switch is supplied, then\n"
108 " the row key is constructed as:\n"
110 " <timestamp> <page>\n"
112 " This format facilitates queries that return\n"
113 " a historical portion of the log.\n";
115 const int RETRY_TIMEOUT = 30;
144 int main(
int argc,
char **argv) {
152 const char *inputfile;
153 bool time_order =
false;
158 else if (argc == 3 && !strcmp(argv[1],
"--time-order")) {
163 cout << usage << endl;
173 namespace_ptr = client_ptr->open_namespace(
"/");
176 table_ptr = namespace_ptr->open_table(
"LogDb");
180 mutator_ptr.reset(table_ptr->create_mutator());
190 parser.
load(inputfile);
194 while (parser.
next(entry)) {
198 row = format_timestamp(entry.
tm);
200 row += extract_page(entry.
request);
203 row = extract_page(entry.
request);
205 row += format_timestamp(entry.
tm);
208 key.
row = row.c_str();
215 mutator_ptr->set(key, entry.
userid);
217 mutator_ptr->set(key, entry.
request);
223 mutator_ptr->set(key, entry.
referer);
230 if (!mutator_ptr->need_retry())
231 quick_exit(EXIT_FAILURE);
232 handle_mutation_failure(mutator_ptr);
233 }
while (!mutator_ptr->retry(RETRY_TIMEOUT));
239 mutator_ptr->flush();
244 if (!mutator_ptr->need_retry())
245 quick_exit(EXIT_FAILURE);
246 handle_mutation_failure(mutator_ptr);
247 }
while (!mutator_ptr->retry(RETRY_TIMEOUT));
Retrieves system information (hardware, installation directory, etc)
std::string String
A String is simply a typedef to std::string.
String format(const char *fmt,...)
Returns a String using printf like format facilities Vanilla snprintf is about 1.5x faster than this...
int main(int argc, char **argv)
This program is designed to parse an Apache web server log and insert the values into a table called ...
bool next(ApacheLogEntry &entry)
std::shared_ptr< Namespace > NamespacePtr
Shared smart pointer to Namespace.
std::shared_ptr< Client > ClientPtr
std::shared_ptr< TableMutator > TableMutatorPtr
Smart pointer to TableMutator.
const char * get_text(int error)
Returns a descriptive error message.
Compatibility Macros for C/C++.
static String locate_install_dir(const char *argv0)
Returns the installation directory.
void load(std::string filename)
This is a generic exception class for Hypertable.
const char * column_family
Error codes, Exception handling, error logging.
std::shared_ptr< Table > TablePtr