libfilezilla
xml.hpp
Go to the documentation of this file.
1 #ifndef LIBFILEZILLA_XML_HEADER
2 #define LIBFILEZILLA_XML_HEADER
3 
10 #include <functional>
11 #include <string>
12 #include <tuple>
13 #include <vector>
14 
15 #include "buffer.hpp"
16 #include "logger.hpp"
17 
18 namespace fz {
19 
20 namespace xml {
21 
23 enum class callback_event
24 {
26  open,
27 
29  close,
30 
32  attribute,
33 
36  value,
37 
40 
41  doctype,
42 };
43 
53 class FZ_PUBLIC_SYMBOL parser final
54 {
55 public:
62  typedef std::function<bool(callback_event type, std::string_view path, std::string_view name, std::string && value)> callback_t;
63 
64  parser();
65  parser(callback_t const& cb);
66  parser(callback_t && cb);
67 
70  void set_callback(callback_t && cb);
71  void set_callback(callback_t const& cb);
72 
74  bool parse(std::string_view data);
75 
78  bool finalize();
79 
81  std::string get_error() const;
82 
85  void set_limits(size_t value_size_limit, size_t path_size_limit);
86 
87 private:
88  bool FZ_PRIVATE_SYMBOL decode_ref();
89  bool FZ_PRIVATE_SYMBOL is_valid_tag_or_attr(std::string_view s) const;
90  bool FZ_PRIVATE_SYMBOL normalize_value();
91 
92  bool FZ_PRIVATE_SYMBOL parse_valid_utf8(std::string_view data);
93  bool FZ_PRIVATE_SYMBOL parse(char const* const begin, char const* const end);
94  void FZ_PRIVATE_SYMBOL set_error(std::string_view msg, size_t offset);
95 
96  bool FZ_PRIVATE_SYMBOL deduce_encoding(std::string_view & data);
97 
98  enum class state {
99  content,
100  tag_start, // Just after reading <
101  tag_name, // Reading tag name
102  tag_closing, // In a closing tag, matching the tag name
103  tag_end, // Just before reading >
104 
105  attributes,
106  attribute_name,
107  attribute_equal,
108  attribute_quote,
109  attribute_value,
110 
111  // <?xml and other parsing intructions
112  pi,
113  pi_value,
114 
115  // entity and character references
116  reference,
117  attrvalue_reference,
118 
119  comment_start,
120  comment_end,
121 
122  doctype_start,
123  doctype_name,
124  doctype_value,
125 
126  cdata_start,
127  cdata_end,
128 
129  done,
130  error
131  };
132 
133  callback_t cb_;
134 
135  std::string path_;
136  std::vector<size_t> nodes_;
137  std::string name_;
138  std::string value_;
139  size_t processed_{};
140  std::string converted_{};
141 
142  size_t path_size_limit_{1024*1024};
143  size_t value_size_limit_{10*1024*1024};
144 
145  union {
146  size_t utf8_state_{};
147  uint32_t utf16_state_;
148  };
149 
150  state s_{ state::content };
151 
152  enum class encoding {
153  unknown,
154  utf8,
155  utf16le,
156  utf16be
157  };
158  encoding encoding_{};
159 
160  union {
161  size_t tag_match_pos_{};
162  char quotes_;
163  unsigned char dashes_;
164  };
165 
166  bool got_xmldecl_{};
167  bool got_doctype_{};
168  bool got_element_{};
169 };
170 
183 class FZ_PUBLIC_SYMBOL namespace_parser final
184 {
185 public:
189 
190  void set_callback(parser::callback_t && cb);
191  void set_callback(parser::callback_t const& cb);
192 
193  bool parse(std::string_view data);
194  bool finalize();
195 
196  std::string get_error() const;
197 
199  typedef std::function<bool(callback_event type, std::string_view path, std::string_view name, std::string_view value)> raw_callback_t;
200  void set_raw_callback(raw_callback_t && cb);
201  void set_raw_callback(raw_callback_t const& cb);
202 private:
203  std::string_view FZ_PRIVATE_SYMBOL apply_namespaces(std::string_view in);
204  bool FZ_PRIVATE_SYMBOL apply_namespace_to_path();
205 
206  bool FZ_PRIVATE_SYMBOL on_callback(callback_event type, std::string_view path, std::string_view name, std::string && value);
207 
208  parser parser_;
209 
210  parser::callback_t cb_;
211  raw_callback_t raw_cb_;
212 
213  std::string path_;
214  fz::buffer applied_;
215  std::vector<size_t> nodes_;
216  std::vector<std::pair<std::string, std::string>> attributes_;
217  std::vector<std::tuple<size_t, std::string, std::string>> namespaces_;
218  bool needs_namespace_expansion_{};
219  bool error_{};
220 };
221 
223 class FZ_PUBLIC_SYMBOL pretty_printer
224 {
225 public:
226  pretty_printer() = default;
227  virtual ~pretty_printer();
228 
229  void log(callback_event type, std::string_view, std::string_view name, std::string_view value);
230 
231 protected:
232  virtual void on_line(std::string_view line) = 0;
233 
234 private:
235  void FZ_PRIVATE_SYMBOL finish_line();
236  void FZ_PRIVATE_SYMBOL print_line();
237 
238  size_t depth_{};
239  std::string value_;
240  std::string line_;
241 };
242 
244 class FZ_PUBLIC_SYMBOL pretty_logger final : public pretty_printer
245 {
246 public:
248 
249 protected:
250  virtual void on_line(std::string_view line) override;
251 
252  logmsg::type level_;
253  logger_interface & logger_;
254 };
255 
256 
257 }
258 }
259 
260 #endif
Declares fz::buffer.
The buffer class is a simple buffer where data can be appended at the end and consumed at the front....
Definition: buffer.hpp:27
Abstract interface for logging strings.
Definition: logger.hpp:51
A stremable XML parser that resolves namespace declarations and namespace prefixes.
Definition: xml.hpp:184
std::function< bool(callback_event type, std::string_view path, std::string_view name, std::string_view value)> raw_callback_t
Additional raw callback to look at events before namespace processing takes place.
Definition: xml.hpp:199
A streaming XML parser.
Definition: xml.hpp:54
void set_callback(callback_t &&cb)
std::string get_error() const
Returns an error description. Empty if parsing was stopped by a callback.
void set_limits(size_t value_size_limit, size_t path_size_limit)
bool parse(std::string_view data)
Processes the block of data. Can be partial.
std::function< bool(callback_event type, std::string_view path, std::string_view name, std::string &&value)> callback_t
Definition: xml.hpp:62
Pretty-prints XML as it is being parsed to a logger.
Definition: xml.hpp:245
A slow pretty printer for XML as it is being parsed.
Definition: xml.hpp:224
Interface for logging.
type
Definition: logger.hpp:16
The namespace used by libfilezilla.
Definition: apply.hpp:17
callback_event
Types of callbacks when parsing XML.
Definition: xml.hpp:24
@ close
An element is closed, passed name.
@ parsing_instruction
Parsing instruction, includes the <?xml?> declaration. Until first space in name, rest in value.
@ open
An element is opened, passed name.
@ attribute
Attribute in name and value.