22 ExpressionStartForceLstrip,
27 StatementStartNoLstrip,
28 StatementStartForceLstrip,
31 CommentStartForceLstrip,
35 enum class MinusState {
43 MinusState minus_state;
44 std::string_view m_in;
48 Token scan_body(std::string_view close, Token::Kind closeKind, std::string_view close_trim = std::string_view(),
bool trim =
false) {
51 if (tok_start >= m_in.size()) {
52 return make_token(Token::Kind::Eof);
54 const char ch = m_in[tok_start];
55 if (ch ==
' ' || ch ==
'\t' || ch ==
'\r') {
61 if (!close_trim.empty() && inja::string_view::starts_with(m_in.substr(tok_start), close_trim)) {
63 pos = tok_start + close_trim.size();
64 const Token tok = make_token(closeKind);
65 skip_whitespaces_and_newlines();
69 if (inja::string_view::starts_with(m_in.substr(tok_start), close)) {
71 pos = tok_start + close.size();
72 const Token tok = make_token(closeKind);
74 skip_whitespaces_and_first_newline();
86 if (std::isalpha(ch)) {
87 minus_state = MinusState::Operator;
91 const MinusState current_minus_state = minus_state;
92 if (minus_state == MinusState::Operator) {
93 minus_state = MinusState::Number;
98 return make_token(Token::Kind::Plus);
100 if (current_minus_state == MinusState::Operator) {
101 return make_token(Token::Kind::Minus);
103 return scan_number();
105 return make_token(Token::Kind::Times);
107 return make_token(Token::Kind::Slash);
109 return make_token(Token::Kind::Power);
111 return make_token(Token::Kind::Percent);
113 return make_token(Token::Kind::Dot);
115 return make_token(Token::Kind::Comma);
117 return make_token(Token::Kind::Colon);
119 return make_token(Token::Kind::Pipe);
121 return make_token(Token::Kind::LeftParen);
123 minus_state = MinusState::Operator;
124 return make_token(Token::Kind::RightParen);
126 return make_token(Token::Kind::LeftBracket);
128 minus_state = MinusState::Operator;
129 return make_token(Token::Kind::RightBracket);
131 return make_token(Token::Kind::LeftBrace);
133 minus_state = MinusState::Operator;
134 return make_token(Token::Kind::RightBrace);
136 if (pos < m_in.size() && m_in[pos] ==
'=') {
138 return make_token(Token::Kind::GreaterEqual);
140 return make_token(Token::Kind::GreaterThan);
142 if (pos < m_in.size() && m_in[pos] ==
'=') {
144 return make_token(Token::Kind::LessEqual);
146 return make_token(Token::Kind::LessThan);
148 if (pos < m_in.size() && m_in[pos] ==
'=') {
150 return make_token(Token::Kind::Equal);
152 return make_token(Token::Kind::Unknown);
154 if (pos < m_in.size() && m_in[pos] ==
'=') {
156 return make_token(Token::Kind::NotEqual);
158 return make_token(Token::Kind::Unknown);
160 return scan_string();
171 minus_state = MinusState::Operator;
172 return scan_number();
176 minus_state = MinusState::Operator;
179 return make_token(Token::Kind::Unknown);
185 if (pos >= m_in.size()) {
188 const char ch = m_in[pos];
189 if (!std::isalnum(ch) && ch !=
'.' && ch !=
'/' && ch !=
'_' && ch !=
'-') {
194 return make_token(Token::Kind::Id);
197 Token scan_number() {
199 if (pos >= m_in.size()) {
202 const char ch = m_in[pos];
204 if (!(std::isdigit(ch) || ch ==
'.' || ch ==
'e' || ch ==
'E' || (ch ==
'+' && (pos == 0 || m_in[pos-1] ==
'e' || m_in[pos-1] ==
'E')) || (ch ==
'-' && (pos == 0 || m_in[pos-1] ==
'e' || m_in[pos-1] ==
'E')))) {
209 return make_token(Token::Kind::Number);
212 Token scan_string() {
215 if (pos >= m_in.size()) {
218 const char ch = m_in[pos++];
221 }
else if (!escape && ch == m_in[tok_start]) {
227 return make_token(Token::Kind::String);
230 Token make_token(Token::Kind kind)
const {
231 return Token(kind, string_view::slice(m_in, tok_start, pos));
234 void skip_whitespaces_and_newlines() {
235 if (pos < m_in.size()) {
236 while (pos < m_in.size() && (m_in[pos] ==
' ' || m_in[pos] ==
'\t' || m_in[pos] ==
'\n' || m_in[pos] ==
'\r')) {
242 void skip_whitespaces_and_first_newline() {
243 if (pos < m_in.size()) {
244 while (pos < m_in.size() && (m_in[pos] ==
' ' || m_in[pos] ==
'\t')) {
249 if (pos < m_in.size()) {
250 const char ch = m_in[pos];
253 }
else if (ch ==
'\r') {
255 if (pos < m_in.size() && m_in[pos] ==
'\n') {
262 static std::string_view clear_final_line_if_whitespace(std::string_view text) {
263 std::string_view result = text;
264 while (!result.empty()) {
265 const char ch = result.back();
266 if (ch ==
' ' || ch ==
'\t') {
267 result.remove_suffix(1);
268 }
else if (ch ==
'\n' || ch ==
'\r') {
278 explicit Lexer(
const LexerConfig& config): config(config), state(State::Text), minus_state(MinusState::Number), tok_start(0), pos(0) {}
281 return get_source_location(m_in, tok_start);
284 void start(std::string_view input) {
289 minus_state = MinusState::Number;
292 if (inja::string_view::starts_with(m_in,
"\xEF\xBB\xBF")) {
293 m_in = m_in.substr(3);
301 if (tok_start >= m_in.size()) {
302 return make_token(Token::Kind::Eof);
309 const size_t open_start = m_in.substr(pos).find_first_of(config.open_chars);
310 if (open_start == std::string_view::npos) {
313 return make_token(Token::Kind::Text);
318 const std::string_view open_str = m_in.substr(pos);
319 bool must_lstrip =
false;
320 if (inja::string_view::starts_with(open_str, config.expression_open)) {
321 if (inja::string_view::starts_with(open_str, config.expression_open_force_lstrip)) {
322 state = State::ExpressionStartForceLstrip;
325 state = State::ExpressionStart;
327 }
else if (inja::string_view::starts_with(open_str, config.statement_open)) {
328 if (inja::string_view::starts_with(open_str, config.statement_open_no_lstrip)) {
329 state = State::StatementStartNoLstrip;
330 }
else if (inja::string_view::starts_with(open_str, config.statement_open_force_lstrip)) {
331 state = State::StatementStartForceLstrip;
334 state = State::StatementStart;
335 must_lstrip = config.lstrip_blocks;
337 }
else if (inja::string_view::starts_with(open_str, config.comment_open)) {
338 if (inja::string_view::starts_with(open_str, config.comment_open_force_lstrip)) {
339 state = State::CommentStartForceLstrip;
342 state = State::CommentStart;
343 must_lstrip = config.lstrip_blocks;
345 }
else if ((pos == 0 || m_in[pos - 1] ==
'\n') && inja::string_view::starts_with(open_str, config.line_statement)) {
346 state = State::LineStart;
352 std::string_view text = string_view::slice(m_in, tok_start, pos);
354 text = clear_final_line_if_whitespace(text);
360 return Token(Token::Kind::Text, text);
362 case State::ExpressionStart: {
363 state = State::ExpressionBody;
364 pos += config.expression_open.size();
365 return make_token(Token::Kind::ExpressionOpen);
367 case State::ExpressionStartForceLstrip: {
368 state = State::ExpressionBody;
369 pos += config.expression_open_force_lstrip.size();
370 return make_token(Token::Kind::ExpressionOpen);
372 case State::LineStart: {
373 state = State::LineBody;
374 pos += config.line_statement.size();
375 return make_token(Token::Kind::LineStatementOpen);
377 case State::StatementStart: {
378 state = State::StatementBody;
379 pos += config.statement_open.size();
380 return make_token(Token::Kind::StatementOpen);
382 case State::StatementStartNoLstrip: {
383 state = State::StatementBody;
384 pos += config.statement_open_no_lstrip.size();
385 return make_token(Token::Kind::StatementOpen);
387 case State::StatementStartForceLstrip: {
388 state = State::StatementBody;
389 pos += config.statement_open_force_lstrip.size();
390 return make_token(Token::Kind::StatementOpen);
392 case State::CommentStart: {
393 state = State::CommentBody;
394 pos += config.comment_open.size();
395 return make_token(Token::Kind::CommentOpen);
397 case State::CommentStartForceLstrip: {
398 state = State::CommentBody;
399 pos += config.comment_open_force_lstrip.size();
400 return make_token(Token::Kind::CommentOpen);
402 case State::ExpressionBody:
403 return scan_body(config.expression_close, Token::Kind::ExpressionClose, config.expression_close_force_rstrip);
404 case State::LineBody:
405 return scan_body(
"\n", Token::Kind::LineStatementClose);
406 case State::StatementBody:
407 return scan_body(config.statement_close, Token::Kind::StatementClose, config.statement_close_force_rstrip, config.trim_blocks);
408 case State::CommentBody: {
410 const size_t end = m_in.substr(pos).find(config.comment_close);
411 if (end == std::string_view::npos) {
413 return make_token(Token::Kind::Eof);
417 const bool must_rstrip = inja::string_view::starts_with(m_in.substr(pos + end - 1), config.comment_close_force_rstrip);
421 pos += end + config.comment_close.size();
422 Token tok = make_token(Token::Kind::CommentClose);
424 if (must_rstrip || config.trim_blocks) {
425 skip_whitespaces_and_first_newline();