1 #ifndef INCLUDE_INJA_LEXER_HPP_
2 #define INCLUDE_INJA_LEXER_HPP_
20 ExpressionStartForceLstrip,
25 StatementStartNoLstrip,
26 StatementStartForceLstrip,
29 CommentStartForceLstrip,
33 enum class MinusState {
41 MinusState minus_state;
42 std::string_view m_in;
46 Token scan_body(std::string_view close, Token::Kind closeKind, std::string_view close_trim = std::string_view(),
bool trim =
false) {
49 if (tok_start >= m_in.size()) {
50 return make_token(Token::Kind::Eof);
52 const char ch = m_in[tok_start];
53 if (ch ==
' ' || ch ==
'\t' || ch ==
'\r') {
59 if (!close_trim.empty() && inja::string_view::starts_with(m_in.substr(tok_start), close_trim)) {
61 pos = tok_start + close_trim.size();
62 const Token tok = make_token(closeKind);
63 skip_whitespaces_and_newlines();
67 if (inja::string_view::starts_with(m_in.substr(tok_start), close)) {
69 pos = tok_start + close.size();
70 const Token tok = make_token(closeKind);
72 skip_whitespaces_and_first_newline();
84 if (std::isalpha(ch)) {
85 minus_state = MinusState::Operator;
89 const MinusState current_minus_state = minus_state;
90 if (minus_state == MinusState::Operator) {
91 minus_state = MinusState::Number;
96 return make_token(Token::Kind::Plus);
98 if (current_minus_state == MinusState::Operator) {
99 return make_token(Token::Kind::Minus);
101 return scan_number();
103 return make_token(Token::Kind::Times);
105 return make_token(Token::Kind::Slash);
107 return make_token(Token::Kind::Power);
109 return make_token(Token::Kind::Percent);
111 return make_token(Token::Kind::Dot);
113 return make_token(Token::Kind::Comma);
115 return make_token(Token::Kind::Colon);
117 return make_token(Token::Kind::LeftParen);
119 minus_state = MinusState::Operator;
120 return make_token(Token::Kind::RightParen);
122 return make_token(Token::Kind::LeftBracket);
124 minus_state = MinusState::Operator;
125 return make_token(Token::Kind::RightBracket);
127 return make_token(Token::Kind::LeftBrace);
129 minus_state = MinusState::Operator;
130 return make_token(Token::Kind::RightBrace);
132 if (pos < m_in.size() && m_in[pos] ==
'=') {
134 return make_token(Token::Kind::GreaterEqual);
136 return make_token(Token::Kind::GreaterThan);
138 if (pos < m_in.size() && m_in[pos] ==
'=') {
140 return make_token(Token::Kind::LessEqual);
142 return make_token(Token::Kind::LessThan);
144 if (pos < m_in.size() && m_in[pos] ==
'=') {
146 return make_token(Token::Kind::Equal);
148 return make_token(Token::Kind::Unknown);
150 if (pos < m_in.size() && m_in[pos] ==
'=') {
152 return make_token(Token::Kind::NotEqual);
154 return make_token(Token::Kind::Unknown);
156 return scan_string();
167 minus_state = MinusState::Operator;
168 return scan_number();
172 minus_state = MinusState::Operator;
175 return make_token(Token::Kind::Unknown);
181 if (pos >= m_in.size()) {
184 const char ch = m_in[pos];
185 if (!std::isalnum(ch) && ch !=
'.' && ch !=
'/' && ch !=
'_' && ch !=
'-') {
190 return make_token(Token::Kind::Id);
193 Token scan_number() {
195 if (pos >= m_in.size()) {
198 const char ch = m_in[pos];
200 if (!(std::isdigit(ch) || ch ==
'.' || ch ==
'e' || ch ==
'E' || (ch ==
'+' && (pos == 0 || m_in[pos-1] ==
'e' || m_in[pos-1] ==
'E')) || (ch ==
'-' && (pos == 0 || m_in[pos-1] ==
'e' || m_in[pos-1] ==
'E')))) {
205 return make_token(Token::Kind::Number);
208 Token scan_string() {
211 if (pos >= m_in.size()) {
214 const char ch = m_in[pos++];
217 }
else if (!escape && ch == m_in[tok_start]) {
223 return make_token(Token::Kind::String);
226 Token make_token(Token::Kind kind)
const {
227 return Token(kind, string_view::slice(m_in, tok_start, pos));
230 void skip_whitespaces_and_newlines() {
231 if (pos < m_in.size()) {
232 while (pos < m_in.size() && (m_in[pos] ==
' ' || m_in[pos] ==
'\t' || m_in[pos] ==
'\n' || m_in[pos] ==
'\r')) {
238 void skip_whitespaces_and_first_newline() {
239 if (pos < m_in.size()) {
240 while (pos < m_in.size() && (m_in[pos] ==
' ' || m_in[pos] ==
'\t')) {
245 if (pos < m_in.size()) {
246 const char ch = m_in[pos];
249 }
else if (ch ==
'\r') {
251 if (pos < m_in.size() && m_in[pos] ==
'\n') {
258 static std::string_view clear_final_line_if_whitespace(std::string_view text) {
259 std::string_view result = text;
260 while (!result.empty()) {
261 const char ch = result.back();
262 if (ch ==
' ' || ch ==
'\t') {
263 result.remove_suffix(1);
264 }
else if (ch ==
'\n' || ch ==
'\r') {
274 explicit Lexer(
const LexerConfig& config): config(config), state(State::Text), minus_state(MinusState::Number) {}
277 return get_source_location(m_in, tok_start);
280 void start(std::string_view input) {
285 minus_state = MinusState::Number;
288 if (inja::string_view::starts_with(m_in,
"\xEF\xBB\xBF")) {
289 m_in = m_in.substr(3);
297 if (tok_start >= m_in.size()) {
298 return make_token(Token::Kind::Eof);
305 const size_t open_start = m_in.substr(pos).find_first_of(config.open_chars);
306 if (open_start == std::string_view::npos) {
309 return make_token(Token::Kind::Text);
314 std::string_view open_str = m_in.substr(pos);
315 bool must_lstrip =
false;
316 if (inja::string_view::starts_with(open_str, config.expression_open)) {
317 if (inja::string_view::starts_with(open_str, config.expression_open_force_lstrip)) {
318 state = State::ExpressionStartForceLstrip;
321 state = State::ExpressionStart;
323 }
else if (inja::string_view::starts_with(open_str, config.statement_open)) {
324 if (inja::string_view::starts_with(open_str, config.statement_open_no_lstrip)) {
325 state = State::StatementStartNoLstrip;
326 }
else if (inja::string_view::starts_with(open_str, config.statement_open_force_lstrip)) {
327 state = State::StatementStartForceLstrip;
330 state = State::StatementStart;
331 must_lstrip = config.lstrip_blocks;
333 }
else if (inja::string_view::starts_with(open_str, config.comment_open)) {
334 if (inja::string_view::starts_with(open_str, config.comment_open_force_lstrip)) {
335 state = State::CommentStartForceLstrip;
338 state = State::CommentStart;
339 must_lstrip = config.lstrip_blocks;
341 }
else if ((pos == 0 || m_in[pos - 1] ==
'\n') && inja::string_view::starts_with(open_str, config.line_statement)) {
342 state = State::LineStart;
348 std::string_view text = string_view::slice(m_in, tok_start, pos);
350 text = clear_final_line_if_whitespace(text);
356 return Token(Token::Kind::Text, text);
358 case State::ExpressionStart: {
359 state = State::ExpressionBody;
360 pos += config.expression_open.size();
361 return make_token(Token::Kind::ExpressionOpen);
363 case State::ExpressionStartForceLstrip: {
364 state = State::ExpressionBody;
365 pos += config.expression_open_force_lstrip.size();
366 return make_token(Token::Kind::ExpressionOpen);
368 case State::LineStart: {
369 state = State::LineBody;
370 pos += config.line_statement.size();
371 return make_token(Token::Kind::LineStatementOpen);
373 case State::StatementStart: {
374 state = State::StatementBody;
375 pos += config.statement_open.size();
376 return make_token(Token::Kind::StatementOpen);
378 case State::StatementStartNoLstrip: {
379 state = State::StatementBody;
380 pos += config.statement_open_no_lstrip.size();
381 return make_token(Token::Kind::StatementOpen);
383 case State::StatementStartForceLstrip: {
384 state = State::StatementBody;
385 pos += config.statement_open_force_lstrip.size();
386 return make_token(Token::Kind::StatementOpen);
388 case State::CommentStart: {
389 state = State::CommentBody;
390 pos += config.comment_open.size();
391 return make_token(Token::Kind::CommentOpen);
393 case State::CommentStartForceLstrip: {
394 state = State::CommentBody;
395 pos += config.comment_open_force_lstrip.size();
396 return make_token(Token::Kind::CommentOpen);
398 case State::ExpressionBody:
399 return scan_body(config.expression_close, Token::Kind::ExpressionClose, config.expression_close_force_rstrip);
400 case State::LineBody:
401 return scan_body(
"\n", Token::Kind::LineStatementClose);
402 case State::StatementBody:
403 return scan_body(config.statement_close, Token::Kind::StatementClose, config.statement_close_force_rstrip, config.trim_blocks);
404 case State::CommentBody: {
406 const size_t end = m_in.substr(pos).find(config.comment_close);
407 if (end == std::string_view::npos) {
409 return make_token(Token::Kind::Eof);
413 const bool must_rstrip = inja::string_view::starts_with(m_in.substr(pos + end - 1), config.comment_close_force_rstrip);
417 pos += end + config.comment_close.size();
418 Token tok = make_token(Token::Kind::CommentClose);
420 if (must_rstrip || config.trim_blocks) {
421 skip_whitespaces_and_first_newline();
Class for lexing an inja Template.
Definition: lexer.hpp:16
Class for lexer configuration.
Definition: config.hpp:14
Definition: exceptions.hpp:9
Helper-class for the inja Lexer.
Definition: token.hpp:12