27 #include <boost/algorithm/string.hpp>
28 #include <boost/algorithm/string/predicate.hpp>
39 using namespace boost;
42 : m_generator(generator), m_keys_only(generator->m_keys_only),
43 m_amount(0), m_count(0), m_last_data_size(0) {
55 HT_ASSERT(!
"Unrecognized row component type");
122 for (
int i=1; i<n; i++)
131 string rowkey_distribution;
132 unsigned int rowkey_seed;
134 std::map<String, int> column_map;
136 if (
has(
"DataGenerator.MaxBytes"))
139 m_max_bytes =
m_props->get_i64(
"DataGenerator.MaxBytes", std::numeric_limits< ::int64_t >::max());
141 if (
has(
"DataGenerator.MaxKeys"))
142 m_max_keys = get_i64(
"DataGenerator.MaxKeys");
144 m_max_keys =
m_props->get_i64(
"DataGenerator.MaxKeys", std::numeric_limits< ::int64_t >::max());
146 if (
has(
"DataGenerator.Seed"))
147 m_seed = get_i32(
"DataGenerator.Seed");
152 rowkey_distribution =
m_props->get_str(
"rowkey.distribution",
"uniform");
153 rowkey_seed =
m_props->get_i32(
"rowkey.seed", 1);
155 std::vector<String> names;
161 for (
size_t i=0; i<names.size(); i++) {
162 if (starts_with(names[i],
"rowkey.component.")) {
163 index = strtol(names[i].c_str()+17, &ptr, 0);
164 if (index < 0 || index > 100)
168 if (!strcmp(ptr,
".type")) {
169 str =
m_props->get_str(names[i]);
170 if (!strcasecmp(str.c_str(),
"integer"))
172 else if (!strcasecmp(str.c_str(),
"string"))
174 else if (!strcasecmp(str.c_str(),
"timestamp"))
179 else if (!strcmp(ptr,
".format")) {
183 else if (!strcmp(ptr,
".min")) {
186 else if (!strcmp(ptr,
".max")) {
189 else if (!strcmp(ptr,
".values")) {
190 str =
m_props->get_str(names[i]);
193 else if (ends_with(ptr,
".order")) {
196 else if (ends_with(ptr,
".distribution")) {
199 else if (ends_with(ptr,
".seed")) {
200 str =
m_props->get_str(names[i]);
203 else if (ends_with(ptr,
".length.min")) {
204 str =
m_props->get_str(names[i]);
207 else if (ends_with(ptr,
".length.max")) {
208 str =
m_props->get_str(names[i]);
215 else if (strstr(names[i].c_str(),
".qualifier.") || strstr(names[i].c_str(),
".value.")) {
217 name =
String(
"") + names[i];
218 tptr = strchr((
char *)name.c_str(),
'.');
221 std::map<String, int>::iterator iter = column_map.find((
String)name.c_str());
222 if (iter == column_map.end()) {
223 columni = column_map.size();
224 column_map[(
String)name.c_str()] = columni;
229 columni = (*iter).second;
231 str =
m_props->get_str(names[i]);
233 if (!strcmp(tptr,
"qualifier.type")) {
234 if (!strcasecmp(str.c_str(),
"STRING"))
238 str.c_str(), names[i].c_str()));
240 else if (!strcmp(tptr,
"qualifier.size")) {
243 else if (!strcmp(tptr,
"qualifier.charset")) {
245 boost::trim_if(
m_column_specs[columni].qualifier.charset, boost::is_any_of(
"'\""));
247 else if (!strcmp(tptr,
"value.random")) {
248 if (!strcasecmp(str.c_str(),
"false"))
250 else if (!strcasecmp(str.c_str(),
"true"))
255 else if (!strcmp(tptr,
"value.seed")) {
258 else if (!strcmp(tptr,
"value.size")) {
261 else if (!strcmp(tptr,
"value.source")) {
264 else if (!strcmp(tptr,
"value.source.words")) {
265 if (!strcasecmp(str.c_str(),
"true"))
267 else if (!strcasecmp(str.c_str(),
"false"))
272 else if (!strcmp(tptr,
"value.cooked-source")) {
275 else if (!strcmp(tptr,
"value.fixed")) {
276 if (!strcasecmp(str.c_str(),
"false"))
278 else if (!strcasecmp(str.c_str(),
"true"))
294 format(
"No qualifier size specified for column '%s'",
298 format(
"No qualifier charset specified for column '%s'",
303 format(
"No value size specified for column '%s'",
315 HT_FATALF(
"Missing type for component %lu", (
Lu)i);
319 HT_FATALF(
"Format sequence (%s) must contain 'lld'",
327 if (!strcasecmp(str.c_str(),
"ascending"))
329 else if (!strcasecmp(str.c_str(),
"random"))
std::string String
A String is simply a typedef to std::string.
DataGeneratorIterator & operator++()
String format(const char *fmt,...)
Returns a String using printf like format facilities Vanilla snprintf is about 1.5x faster than this...
std::vector< RowComponent * > m_row_components
std::vector< Column * > m_columns
Po::typed_value< String > * str(String *v=0)
int32_t random_int32(int32_t maximum)
Generate random 32-bit integer.
const char * column_qualifier
std::vector< RowComponentSpec > m_row_component_specs
std::vector< ColumnSpec > m_column_specs
bool has(const String &name)
Check existence of a configuration value.
std::shared_ptr< Properties > PropertiesPtr
Compatibility Macros for C/C++.
DataGeneratorIterator(DataGenerator *generator)
#define HT_FATALF(msg,...)
int parse_order(const std::string &str)
Provides an STL-style iterator on DataGenerator objects.
const char * column_family
DataGenerator(PropertiesPtr &props, bool keys_only=false)
long unsigned int Lu
Shortcut for printf formats.
#define HT_THROW(_code_, _msg_)
unsigned long m_last_data_size