0.9.8.10
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
XmlParser.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2007-2015 Hypertable, Inc.
3  *
4  * This file is part of Hypertable.
5  *
6  * Hypertable is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; version 3 of the
9  * License, or any later version.
10  *
11  * Hypertable is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19  * 02110-1301, USA.
20  */
21 
26 
27 #include <Common/Compat.h>
28 #include "XmlParser.h"
29 
30 #include <Common/Error.h>
31 #include <Common/Logger.h>
32 
33 #include <boost/algorithm/string.hpp>
34 
35 #include <cctype>
36 #include <cstdlib>
37 #include <iostream>
38 #include <limits>
39 
40 using namespace Hypertable;
41 using namespace std;
42 
43 XmlParser::XmlParser(const char *base, int len) : m_base(base), m_length(len) {
44  m_parser = XML_ParserCreate("US-ASCII");
45  XML_SetElementHandler(m_parser, &start_element_handler, &end_element_handler);
46  XML_SetCharacterDataHandler(m_parser, &character_data_handler);
47  XML_SetUserData(m_parser, this);
48 }
49 
50 XmlParser::XmlParser(const char *base, int len,
51  const std::initializer_list<std::string> &sub_parsers) :
52  XmlParser(base, len) {
53  m_sub_parsers = sub_parsers;
54 }
55 
56 
58  XML_ParserFree(m_parser);
59 }
60 
62 
63  try {
64  if (XML_Parse(m_parser, m_base, m_length, 1) == 0) {
65  string msg;
66  if (XML_GetErrorCode(m_parser) == XML_ERROR_TAG_MISMATCH)
67  msg = format("%s (%s)",
68  (const char *)XML_ErrorString(XML_GetErrorCode(m_parser)),
69  m_current_element.c_str());
70  else {
71  const char *base = m_base + XML_GetCurrentByteIndex(m_parser);
72  const char *end = base;
73  for (; *end; ++end)
74  if (isspace(*end))
75  break;
76  msg = format("%s (%s)",
77  (const char *)XML_ErrorString(XML_GetErrorCode(m_parser)),
78  string(base, end-base).c_str());
79  }
81  }
82  }
83  catch (Exception &e) {
86  throw;
87  }
88 }
89 
90 int64_t XmlParser::content_to_i64(const std::string &name,
91  const std::string &content) {
92  if (content.empty())
94  "Empty content for %s", name.c_str());
95  char *end;
96  errno = 0;
97  int64_t val = strtoll(content.c_str(), &end, 10);
98  if (errno || *end)
100  "Invalid value for %s (%s)", name.c_str(), content.c_str());
101  return val;
102 }
103 
104 int32_t XmlParser::content_to_i32(const std::string &name,
105  const std::string &content) {
106  int64_t val64 = content_to_i64(name, content);
107  if (val64 > (int64_t)numeric_limits<int32_t>::max() ||
108  val64 < (int64_t)numeric_limits<int32_t>::min())
110  "Invalid value for %s (%s)", name.c_str(), content.c_str());
111  return (int32_t)val64;
112 }
113 
114 int16_t XmlParser::content_to_i16(const std::string &name,
115  const std::string &content) {
116  int64_t val64 = content_to_i64(name, content);
117  if (val64 > (int64_t)numeric_limits<int16_t>::max() ||
118  val64 < (int64_t)numeric_limits<int16_t>::min())
120  "Invalid value for %s (%s)", name.c_str(), content.c_str());
121  return (int16_t)val64;
122 }
123 
124 bool XmlParser::content_to_bool(const std::string &name,
125  const std::string &content) {
126  if (content.empty())
128  "Empty content for %s", name.c_str());
129  if (!strcasecmp(content.c_str(), "true"))
130  return true;
131  else if (!strcasecmp(content.c_str(), "false"))
132  return false;
134  "Invalid boolean value for %s (%s)", name.c_str(), content.c_str());
135 }
136 
137 const std::string XmlParser::content_to_text(const std::string &name,
138  const std::string &content,
139  const std::initializer_list<std::string> &valid) {
140  if (content.empty())
142  "Empty content for %s", name.c_str());
143  for (auto &v : valid) {
144  if (!strcasecmp(v.c_str(), content.c_str()))
145  return v;
146  }
148  "Invalid value for %s (%s)", name.c_str(), content.c_str());
149 }
150 
151 
152 bool XmlParser::open_element(const XML_Char *name) {
153  if (m_sub_parse_toplevel >= 0) {
155  push_element(name);
156  return false;
157  }
158  for (auto &sp : m_sub_parsers) {
159  if (!strcasecmp(sp.c_str(), name)) {
160  m_sub_parse_base_offset = XML_GetCurrentByteIndex(m_parser);
162  push_element(name);
163  return false;
164  }
165  }
166  return true;
167 }
168 
169 bool XmlParser::close_element(const XML_Char *name) {
170  if (m_sub_parse_toplevel > 0) {
171  if (m_sub_parse_toplevel == (int)m_element_stack.size()) {
172  int offset = XML_GetCurrentByteIndex(m_parser);
173  for (; offset < m_length; ++offset) {
174  if (m_base[offset] == '>') {
175  offset++;
177  offset - m_sub_parse_base_offset);
179  return false;
180  }
181  }
183  "Unable to find '>' in close tag '%s'", name);
184  }
185  return false;
186  }
187  return true;
188 }
189 
190 void XmlParser::add_text(const XML_Char *s, int len) {
191  m_collected_text.assign(s, len);
192 }
193 
194 void XmlParser::push_element(const XML_Char *name) {
195  m_element_stack.push(name);
196  m_current_element.clear();
197  m_current_element.append(name);
198  m_collected_text.clear();
199 }
200 
202  m_element_stack.pop();
203  m_current_element.clear();
204  if (!m_element_stack.empty())
205  m_current_element.append(m_element_stack.top());
206 }
207 
208 void XmlParser::start_element_handler(void *userdata, const XML_Char *name,
209  const XML_Char **atts) {
210  XmlParser *parser = reinterpret_cast<XmlParser *>(userdata);
211  if (!parser->open_element(name))
212  return;
213  parser->start_element(name, atts);
214  parser->push_element(name);
215 }
216 
217 void XmlParser::end_element_handler(void *userdata, const XML_Char *name) {
218  XmlParser *parser = reinterpret_cast<XmlParser *>(userdata);
219  parser->pop_element();
220  if (!parser->close_element(name))
221  return;
222  string content = parser->collected_text();
223  boost::trim(content);
224  parser->end_element(name, content);
225 }
226 
227 void XmlParser::character_data_handler(void *userdata, const XML_Char *s, int len) {
228  XmlParser *parser = reinterpret_cast<XmlParser *>(userdata);
229  parser->add_text(s, len);
230 }
231 
232 
std::string m_collected_text
Collected element text.
Definition: XmlParser.h:302
const char * m_base
Pointer to buffer holding content to be parsed.
Definition: XmlParser.h:209
std::stack< std::string > m_element_stack
Element stack.
Definition: XmlParser.h:215
String format(const char *fmt,...)
Returns a String using printf like format facilities Vanilla snprintf is about 1.5x faster than this...
Definition: String.cc:37
int m_sub_parse_base_offset
Raw text offset (from m_base) of beginning of sub parse.
Definition: XmlParser.h:314
static bool content_to_bool(const std::string &name, const std::string &content)
Helper function to convert content to a boolean value.
Definition: XmlParser.cc:124
static void end_element_handler(void *userdata, const XML_Char *name)
eXpat end element handler.
Definition: XmlParser.cc:217
virtual void end_element(const XML_Char *name, const std::string &content)
End element callback member function.
Definition: XmlParser.h:136
STL namespace.
static int32_t content_to_i32(const std::string &name, const std::string &content)
Helper function to convert content to an int32_t value.
Definition: XmlParser.cc:104
bool open_element(const XML_Char *name)
Determines if element should be parsed or included in a sub parse.
Definition: XmlParser.cc:152
void pop_element()
Pops element from element stack.
Definition: XmlParser.cc:201
#define HT_ASSERT(_e_)
Definition: Logger.h:396
void push_element(const XML_Char *name)
Push element onto element stack.
Definition: XmlParser.cc:194
Definitions for XmlParser.
static void character_data_handler(void *userdata, const XML_Char *s, int len)
eXpat character data handler add_text() is called to add len characters starting at s to collected te...
Definition: XmlParser.cc:227
Logging routines and macros.
Compatibility Macros for C/C++.
virtual ~XmlParser()
Destructor.
Definition: XmlParser.cc:57
bool close_element(const XML_Char *name)
Checks for and performs sub parse.
Definition: XmlParser.cc:169
int m_length
Length of data at m_base to be parsed.
Definition: XmlParser.h:212
XmlParser(const char *base, int len)
Constructor.
Definition: XmlParser.cc:43
static int64_t content_to_i64(const std::string &name, const std::string &content)
Helper function to convert content to an int64_t value.
Definition: XmlParser.cc:90
Hypertable definitions
void add_text(const XML_Char *s, int len)
Collects text.
Definition: XmlParser.cc:190
int m_sub_parse_toplevel
Toplevel element of current sub parse.
Definition: XmlParser.h:311
static int16_t content_to_i16(const std::string &name, const std::string &content)
Helper function to convert content to an int16_t value.
Definition: XmlParser.cc:114
const std::string collected_text()
Returns collected text.
Definition: XmlParser.h:269
std::vector< std::string > m_sub_parsers
List of element names for which there is a sub-parser.
Definition: XmlParser.h:305
Base class for XML document parsers.
Definition: XmlParser.h:94
static const std::string content_to_text(const std::string &name, const std::string &content, const std::initializer_list< std::string > &valid)
Helper function to convert content to one of a set of valid text values.
Definition: XmlParser.cc:137
virtual void start_element(const XML_Char *name, const XML_Char **atts)
Start element callback member function.
Definition: XmlParser.h:128
#define HT_THROWF(_code_, _fmt_,...)
Definition: Error.h:490
XML_Parser m_parser
eXpat parser
Definition: XmlParser.h:206
This is a generic exception class for Hypertable.
Definition: Error.h:314
static void start_element_handler(void *userdata, const XML_Char *name, const XML_Char **atts)
eXpat start element handler.
Definition: XmlParser.cc:208
virtual void sub_parse(const XML_Char *name, const char *base, int len)
Performs a sub-parse.
Definition: XmlParser.h:149
Error codes, Exception handling, error logging.
#define HT_THROW(_code_, _msg_)
Definition: Error.h:478
std::string m_current_element
Current element being parsed.
Definition: XmlParser.h:308
int code() const
Returns the error code.
Definition: Error.h:391