cgv
Loading...
Searching...
No Matches
advanced_scan.cxx
1#include "advanced_scan.h"
2
3namespace cgv {
4 namespace utils {
5
7 const char* begin, const char* end,
8 std::vector<token>& tokens,
9 const std::string& separators,
10 bool merge_separators,
11 const std::string& open_parenthesis, const std::string& close_parenthesis,
12 const std::string& whitespaces,
13 unsigned int max_nr_tokens)
14{
15 const char* b = begin;
16 const char* p = b;
17 size_t pos;
18 bool last_is_sep = false;
19 while (p < end) {
20 bool is_sep = false;
21 bool is_whitespace = false;
22 bool create_token = false;
23 if (p == end || (is_whitespace = is_element(*p, whitespaces)))
24 create_token = true;
25 else {
26 is_sep=is_element(*p, separators);
27 if (is_sep) {
28 if (!(last_is_sep && merge_separators))
29 create_token = true;
30 }
31 else
32 if (last_is_sep)
33 create_token = true;
34 }
35 last_is_sep = is_sep;
36 if (create_token) {
37 if (p > b)
38 tokens.push_back(token(b, p));
39 if (is_whitespace)
40 b = p+1;
41 else
42 b = p;
43 }
44 if ((pos = open_parenthesis.find_first_of(*p)) != std::string::npos) {
45 if (p > b)
46 tokens.push_back(token(b, p));
47 b = ++p;
48 while (p != end && *p != close_parenthesis[pos])
49 ++p;
50 tokens.push_back(token(b, p));
51 b = p + 1;
52 }
53 if (p == end)
54 break;
55 ++p;
56 if (tokens.size() >= max_nr_tokens)
57 break;
58 if (p == end) {
59 if (p > b)
60 tokens.push_back(token(b,p));
61 }
62 };
63}
64
65void split_to_lines(const char* global_begin, const char* global_end,
66 std::vector<line>& lines,
67 bool truncate_trailing_spaces)
68{
69 const char* ptr = global_begin;
70 while (ptr < global_end) {
71 const char* begin = ptr;
72 while (ptr < global_end && *ptr != '\n' )
73 ++ptr;
74 const char* end = ptr;
75 if (truncate_trailing_spaces) {
76 while (end > begin && is_space(end[-1]))
77 --end;
78 }
79 else
80 if (end > begin && end[-1] == '\r')
81 --end;
82 lines.push_back(line(begin,end));
83 ++ptr;
84 }
85}
86
88 const char* begin, const char* end,
89 token& content,
90 char open_parenthesis, char close_parenthesis)
91{
92 begin = skip_spaces(begin, end);
93 if (end-begin < 2)
94 return false;
95 if (*begin != open_parenthesis)
96 return false;
97 ++begin;
98 content.begin = begin;
99 const char* p = begin;
100 int nesting_level = 1;
101 while (p < end) {
102 if (*p == close_parenthesis) {
103 if (--nesting_level == 0) {
104 content.end = p;
105 return true;
106 }
107 }
108 else if (*p == open_parenthesis)
109 ++nesting_level;
110 ++p;
111 }
112 return false;
113}
114
115std::string strip_cpp_comments(const std::string& source, bool correct_new_lines)
116{
117 // preprocess to eliminate C-style comments account for strings defined with "" or '' and escape with \ inside of strings
118 char string_open = 0;
119 bool last_was_escape = false;
120 bool last_was_slash = false;
121 bool in_line_comment = false;
122 bool in_block_comment = false;
123 std::string filtered_source;
124 for (auto c : source) {
125 if (in_line_comment) {
126 switch (c) {
127 case '\n':
128 in_line_comment = false;
129 break;
130 }
131 }
132 else if (in_block_comment) {
133 if (last_was_slash) {
134 switch (c) {
135 case '*':
136 break;
137 case '/':
138 in_block_comment = false;
139 default:
140 last_was_slash = false;
141 break;
142 }
143 }
144 else {
145 switch (c) {
146 case '*':
147 last_was_slash = true;
148 break;
149 }
150 }
151 }
152 else if (string_open) {
153 if (last_was_escape) {
154 last_was_escape = false;
155 }
156 else {
157 if (c == string_open) {
158 string_open = 0;
159 }
160 else if (c == '\\')
161 last_was_escape = true;
162 }
163 }
164 else {
165 if (last_was_slash) {
166 switch (c) {
167 case '"':
168 string_open = '"';
169 break;
170 case '\'':
171 string_open = '\'';
172 break;
173 case '/':
174 in_line_comment = true;
175 break;
176 case '*':
177 in_block_comment = true;
178 break;
179 }
180 if (!in_line_comment && !in_block_comment) {
181 filtered_source.push_back('/');
182 filtered_source.push_back(c);
183 }
184 last_was_slash = false;
185 }
186 else {
187 switch (c) {
188 case '"':
189 string_open = '"';
190 break;
191 case '\'':
192 string_open = '\'';
193 break;
194 case '/':
195 last_was_slash = true;
196 break;
197 }
198 if (!last_was_slash)
199 filtered_source.push_back(c);
200 }
201 }
202 }
203 if (correct_new_lines) {
204 std::string temp = filtered_source;
205 filtered_source = "";
206 for (auto C : temp) {
207 if (C == '\r')
208 continue;
209 filtered_source.push_back(C);
210 }
211 }
212 return filtered_source;
213}
214
215 }
216}
More advanced text processing for splitting text into lines or tokens.
void split_to_tokens(const char *begin, const char *end, std::vector< token > &tokens, const std::string &separators, bool merge_separators, const std::string &open_parenthesis, const std::string &close_parenthesis, const std::string &whitespaces, unsigned int max_nr_tokens)
this function splits a text range into tokens.
void split_to_lines(const char *global_begin, const char *global_end, std::vector< line > &lines, bool truncate_trailing_spaces)
this function splits a text range at the newline characters into single lines.
bool is_space(char c)
check if char is a whitespace
Definition scan.cxx:12
std::string strip_cpp_comments(const std::string &source, bool correct_new_lines)
remove cpp-style comments from string
bool balanced_find_content(const char *begin, const char *end, token &content, char open_parenthesis, char close_parenthesis)
the input range must begin with an open parenthesis.
bool is_element(char c, const std::string &s)
check if char c arises in string s
Definition scan.cxx:291
const char * skip_spaces(const char *begin, const char *end)
return new start pointer by skipping spaces at begin
Definition scan.cxx:683
the cgv namespace
Definition print.h:11
a line in a text is simply represented as a token
representation of a token in a text by two pointers begin and end, that point to the first character ...
Definition token.h:18
const char * begin
pointers that define the range of characters
Definition token.h:20