7 const char* begin,
const char* end,
8 std::vector<token>& tokens,
9 const std::string& separators,
10 bool merge_separators,
11 const std::string& open_parenthesis,
const std::string& close_parenthesis,
12 const std::string& whitespaces,
13 unsigned int max_nr_tokens)
15 const char* b = begin;
18 bool last_is_sep =
false;
21 bool is_whitespace =
false;
22 bool create_token =
false;
23 if (p == end || (is_whitespace =
is_element(*p, whitespaces)))
28 if (!(last_is_sep && merge_separators))
38 tokens.push_back(
token(b, p));
44 if ((pos = open_parenthesis.find_first_of(*p)) != std::string::npos) {
46 tokens.push_back(
token(b, p));
48 while (p != end && *p != close_parenthesis[pos])
50 tokens.push_back(
token(b, p));
56 if (tokens.size() >= max_nr_tokens)
60 tokens.push_back(
token(b,p));
66 std::vector<line>& lines,
67 bool truncate_trailing_spaces)
69 const char* ptr = global_begin;
70 while (ptr < global_end) {
71 const char* begin = ptr;
72 while (ptr < global_end && *ptr !=
'\n' )
74 const char* end = ptr;
75 if (truncate_trailing_spaces) {
76 while (end > begin &&
is_space(end[-1]))
80 if (end > begin && end[-1] ==
'\r')
82 lines.push_back(
line(begin,end));
88 const char* begin,
const char* end,
90 char open_parenthesis,
char close_parenthesis)
95 if (*begin != open_parenthesis)
98 content.
begin = begin;
99 const char* p = begin;
100 int nesting_level = 1;
102 if (*p == close_parenthesis) {
103 if (--nesting_level == 0) {
108 else if (*p == open_parenthesis)
118 char string_open = 0;
119 bool last_was_escape =
false;
120 bool last_was_slash =
false;
121 bool in_line_comment =
false;
122 bool in_block_comment =
false;
123 std::string filtered_source;
124 for (
auto c : source) {
125 if (in_line_comment) {
128 in_line_comment =
false;
132 else if (in_block_comment) {
133 if (last_was_slash) {
138 in_block_comment =
false;
140 last_was_slash =
false;
147 last_was_slash =
true;
152 else if (string_open) {
153 if (last_was_escape) {
154 last_was_escape =
false;
157 if (c == string_open) {
161 last_was_escape =
true;
165 if (last_was_slash) {
174 in_line_comment =
true;
177 in_block_comment =
true;
180 if (!in_line_comment && !in_block_comment) {
181 filtered_source.push_back(
'/');
182 filtered_source.push_back(c);
184 last_was_slash =
false;
195 last_was_slash =
true;
199 filtered_source.push_back(c);
203 if (correct_new_lines) {
204 std::string temp = filtered_source;
205 filtered_source =
"";
206 for (
auto C : temp) {
209 filtered_source.push_back(C);
212 return filtered_source;
More advanced text processing for splitting text into lines or tokens.
void split_to_tokens(const char *begin, const char *end, std::vector< token > &tokens, const std::string &separators, bool merge_separators, const std::string &open_parenthesis, const std::string &close_parenthesis, const std::string &whitespaces, unsigned int max_nr_tokens)
this function splits a text range into tokens.
void split_to_lines(const char *global_begin, const char *global_end, std::vector< line > &lines, bool truncate_trailing_spaces)
this function splits a text range at the newline characters into single lines.
bool is_space(char c)
check if char is a whitespace
std::string strip_cpp_comments(const std::string &source, bool correct_new_lines)
remove cpp-style comments from string
bool balanced_find_content(const char *begin, const char *end, token &content, char open_parenthesis, char close_parenthesis)
the input range must begin with an open parenthesis.
bool is_element(char c, const std::string &s)
check if char c arises in string s
const char * skip_spaces(const char *begin, const char *end)
return new start pointer by skipping spaces at begin
a line in a text is simply represented as a token
representation of a token in a text by two pointers begin and end, that point to the first character ...
const char * begin
pointers that define the range of characters