10 merge_separators =
false;
13 whitespaces =
" \t\n";
29 end = p + std::string(p).size();
62 merge_separators = merge;
74 merge_separators = merge;
78bool tokenizer::handle_skip(
token& result)
80 if (result.end >= end)
82 std::size_t i = begin_skip.find_first_of(*result.end);
83 if (i == std::string::npos)
86 if (escape_skip.size() > i) {
87 bool last_was_escape =
false;
88 while (result.end < end) {
90 last_was_escape =
false;
91 else if (*result.end == escape_skip[i])
92 last_was_escape =
true;
93 else if (*result.end == end_skip[i])
99 while (result.end < end) {
100 if (*result.end == end_skip[i])
110bool tokenizer::reverse_handle_skip(
token& result)
112 if (result.begin <=
begin)
114 std::size_t i = end_skip.find_first_of(*(result.begin-1));
115 if (i == std::string::npos)
118 if (escape_skip.size() > i) {
119 while (result.begin >
begin) {
120 if (*(result.begin-1) == begin_skip[i]) {
121 unsigned nr_escape = 0;
122 while (result.begin-nr_escape-1 >
begin && *(result.begin-nr_escape-2) == escape_skip[i])
124 if ((nr_escape & 1) == 1)
131 while (result.begin >
begin) {
132 if (*(result.begin-1) == begin_skip[i])
142bool tokenizer::handle_separators(
token& result,
bool check_skip)
145 bool did_skip =
false;
147 did_skip = handle_skip(result);
148 if (result.end < end &&
is_element(*result.end, separators)) {
149 if (merge_separators && !did_skip) {
154 begin = result.end = result.end+1;
160bool tokenizer::reverse_handle_separators(
token& result,
bool check_skip)
163 bool did_skip =
false;
165 did_skip = reverse_handle_skip(result);
167 if (result.begin >
begin &&
is_element(*(result.begin-1), separators)) {
168 if (merge_separators && !did_skip) {
173 end = result.begin = result.begin+1;
185 if (handle_separators(result)) {
189 if (result.end == end) {
194 while (++result.end < end) {
196 const char* tmp_end = result.end;
197 if (handle_skip(result) && result.end == end)
199 if (result.end < end &&
202 begin = result.end = tmp_end;
214 token result(end, end);
216 if (reverse_handle_separators(result))
223 const char* tmp_begin = result.
begin;
224 reverse_handle_skip(result);
228 end = result.
begin = tmp_begin;
239 std::vector<int> nesting;
240 nesting.resize(open_parenthesis.size());
241 std::fill(nesting.begin(),nesting.end(),0);
247 if (handle_separators(result,
false))
250 bool inside_token =
true;
252 while (result.end < end) {
254 std::size_t i_close = close_parenthesis.find_first_of(*result.end);
255 std::size_t i_open = open_parenthesis.find_first_of(*result.end);
256 if (i_close != std::string::npos || i_open != std::string::npos) {
258 inside_token =
false;
260 if (i_open == i_close) {
261 if (nesting[i_open] == 0) {
266 if (--nesting[i_open] == 0)
270 else if (i_close != std::string::npos) {
271 if (nesting[i_close] == 0)
273 if (--nesting[i_close] == 0)
277 if (++nesting[i_open] == 1)
283 const char* tmp_end = result.end;
286 (
is_element(*result.end, whitespaces) && nr_nested == 0) ) {
287 begin = result.end = tmp_end;
288 return nr_nested == 0;
292 if (!inside_token && nr_nested == 0)
295 if (nr_nested == 0) {
313void tokenizer::bite_all(std::vector<token>& result)
316 result.push_back(
bite());
the tokenizer allows to split text into tokens in a convenient way.
void skip_whitespaces()
skip whitespaces at the front
void reverse_skip_whitespaces()
skip whitespaces at the back
bool balanced_bite(token &result, const std::string &open_parenthesis, const std::string &close_parenthesis, bool wait_for_sep=false)
bite one token until all potentially nested opended parenthesis have been closed again
tokenizer & set_sep(const std::string &sep, bool merge)
set the list of separators and specify whether succeeding separators are merged into single tokens
tokenizer & set_sep_merge(bool merge)
specify whether succeeding separators are merged into single tokens
tokenizer & set_ws(const std::string &ws)
set the list of white spaces, that separate tokens and are skipped
bool skip_ws_check_empty()
skip whitespaces at the front and return whether the complete text has been processed
token bite()
bite away a single token from the front
token reverse_bite()
bite away a single token from the back
tokenizer()
construct empty tokenizer
tokenizer & set_skip(const std::string &open, const std::string &close)
set several character pairs that enclose tokens that are not split
bool is_element(char c, const std::string &s)
check if char c arises in string s
Helper functions to process strings.
representation of a token in a text by two pointers begin and end, that point to the first character ...
void skip(const std::string &skip_chars)
set begin by skipping all instances of the given character set
const char * begin
pointers that define the range of characters
token()
construct with both pointers set to 0
void reverse_skip(const std::string &skip_chars)
set end by skipping all instances of the given character set