Inja  3.4.0
A Template Engine for Modern C++
parser.hpp
1 #ifndef INCLUDE_INJA_PARSER_HPP_
2 #define INCLUDE_INJA_PARSER_HPP_
3 
4 #include <limits>
5 #include <stack>
6 #include <string>
7 #include <utility>
8 #include <vector>
9 
10 #include "config.hpp"
11 #include "exceptions.hpp"
12 #include "function_storage.hpp"
13 #include "lexer.hpp"
14 #include "node.hpp"
15 #include "template.hpp"
16 #include "token.hpp"
17 #include "utils.hpp"
18 
19 namespace inja {
20 
24 class Parser {
25  using Arguments = std::vector<std::shared_ptr<ExpressionNode>>;
26  using OperatorStack = std::stack<std::shared_ptr<FunctionNode>>;
27 
28  const ParserConfig& config;
29 
30  Lexer lexer;
31  TemplateStorage& template_storage;
32  const FunctionStorage& function_storage;
33 
34  Token tok, peek_tok;
35  bool have_peek_tok {false};
36 
37  std::string_view literal_start;
38 
39  BlockNode* current_block {nullptr};
40  ExpressionListNode* current_expression_list {nullptr};
41 
42  std::stack<IfStatementNode*> if_statement_stack;
43  std::stack<ForStatementNode*> for_statement_stack;
44  std::stack<BlockStatementNode*> block_statement_stack;
45 
46  inline void throw_parser_error(const std::string& message) const {
47  INJA_THROW(ParserError(message, lexer.current_position()));
48  }
49 
50  inline void get_next_token() {
51  if (have_peek_tok) {
52  tok = peek_tok;
53  have_peek_tok = false;
54  } else {
55  tok = lexer.scan();
56  }
57  }
58 
59  inline void get_peek_token() {
60  if (!have_peek_tok) {
61  peek_tok = lexer.scan();
62  have_peek_tok = true;
63  }
64  }
65 
66  inline void add_literal(Arguments &arguments, const char* content_ptr) {
67  std::string_view data_text(literal_start.data(), tok.text.data() - literal_start.data() + tok.text.size());
68  arguments.emplace_back(std::make_shared<LiteralNode>(data_text, data_text.data() - content_ptr));
69  }
70 
71  inline void add_operator(Arguments &arguments, OperatorStack &operator_stack) {
72  auto function = operator_stack.top();
73  operator_stack.pop();
74 
75  if (static_cast<int>(arguments.size()) < function->number_args) {
76  throw_parser_error("too few arguments");
77  }
78 
79  for (int i = 0; i < function->number_args; ++i) {
80  function->arguments.insert(function->arguments.begin(), arguments.back());
81  arguments.pop_back();
82  }
83  arguments.emplace_back(function);
84  }
85 
86  void add_to_template_storage(std::string_view path, std::string& template_name) {
87  if (template_storage.find(template_name) != template_storage.end()) {
88  return;
89  }
90 
91  std::string original_path = static_cast<std::string>(path);
92  std::string original_name = template_name;
93 
94  if (config.search_included_templates_in_files) {
95  // Build the relative path
96  template_name = original_path + original_name;
97  if (template_name.compare(0, 2, "./") == 0) {
98  template_name.erase(0, 2);
99  }
100 
101  if (template_storage.find(template_name) == template_storage.end()) {
102  // Load file
103  std::ifstream file;
104  file.open(template_name);
105  if (!file.fail()) {
106  std::string text((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
107 
108  auto include_template = Template(text);
109  template_storage.emplace(template_name, include_template);
110  parse_into_template(template_storage[template_name], template_name);
111  return;
112  } else if (!config.include_callback) {
113  INJA_THROW(FileError("failed accessing file at '" + template_name + "'"));
114  }
115  }
116  }
117 
118  // Try include callback
119  if (config.include_callback) {
120  auto include_template = config.include_callback(original_path, original_name);
121  template_storage.emplace(template_name, include_template);
122  }
123  }
124 
125  std::string parse_filename() const {
126  if (tok.kind != Token::Kind::String) {
127  throw_parser_error("expected string, got '" + tok.describe() + "'");
128  }
129 
130  if (tok.text.length() < 2) {
131  throw_parser_error("expected filename, got '" + static_cast<std::string>(tok.text) + "'");
132  }
133 
134  // Remove first and last character ""
135  return std::string {tok.text.substr(1, tok.text.length() - 2)};
136  }
137 
138  bool parse_expression(Template& tmpl, Token::Kind closing) {
139  current_expression_list->root = parse_expression(tmpl);
140  return tok.kind == closing;
141  }
142 
143  std::shared_ptr<ExpressionNode> parse_expression(Template& tmpl) {
144  size_t current_bracket_level {0};
145  size_t current_brace_level {0};
146  Arguments arguments;
147  OperatorStack operator_stack;
148 
149  while (tok.kind != Token::Kind::Eof) {
150  // Literals
151  switch (tok.kind) {
152  case Token::Kind::String: {
153  if (current_brace_level == 0 && current_bracket_level == 0) {
154  literal_start = tok.text;
155  add_literal(arguments, tmpl.content.c_str());
156  }
157  } break;
158  case Token::Kind::Number: {
159  if (current_brace_level == 0 && current_bracket_level == 0) {
160  literal_start = tok.text;
161  add_literal(arguments, tmpl.content.c_str());
162  }
163  } break;
164  case Token::Kind::LeftBracket: {
165  if (current_brace_level == 0 && current_bracket_level == 0) {
166  literal_start = tok.text;
167  }
168  current_bracket_level += 1;
169  } break;
170  case Token::Kind::LeftBrace: {
171  if (current_brace_level == 0 && current_bracket_level == 0) {
172  literal_start = tok.text;
173  }
174  current_brace_level += 1;
175  } break;
176  case Token::Kind::RightBracket: {
177  if (current_bracket_level == 0) {
178  throw_parser_error("unexpected ']'");
179  }
180 
181  current_bracket_level -= 1;
182  if (current_brace_level == 0 && current_bracket_level == 0) {
183  add_literal(arguments, tmpl.content.c_str());
184  }
185  } break;
186  case Token::Kind::RightBrace: {
187  if (current_brace_level == 0) {
188  throw_parser_error("unexpected '}'");
189  }
190 
191  current_brace_level -= 1;
192  if (current_brace_level == 0 && current_bracket_level == 0) {
193  add_literal(arguments, tmpl.content.c_str());
194  }
195  } break;
196  case Token::Kind::Id: {
197  get_peek_token();
198 
199  // Data Literal
200  if (tok.text == static_cast<decltype(tok.text)>("true") || tok.text == static_cast<decltype(tok.text)>("false") ||
201  tok.text == static_cast<decltype(tok.text)>("null")) {
202  if (current_brace_level == 0 && current_bracket_level == 0) {
203  literal_start = tok.text;
204  add_literal(arguments, tmpl.content.c_str());
205  }
206 
207  // Operator
208  } else if (tok.text == "and" || tok.text == "or" || tok.text == "in" || tok.text == "not") {
209  goto parse_operator;
210 
211  // Functions
212  } else if (peek_tok.kind == Token::Kind::LeftParen) {
213  auto func = std::make_shared<FunctionNode>(tok.text, tok.text.data() - tmpl.content.c_str());
214  get_next_token();
215  do {
216  get_next_token();
217  auto expr = parse_expression(tmpl);
218  if (!expr) {
219  break;
220  }
221  func->number_args += 1;
222  func->arguments.emplace_back(expr);
223  } while (tok.kind == Token::Kind::Comma);
224  if (tok.kind != Token::Kind::RightParen) {
225  throw_parser_error("expected right parenthesis, got '" + tok.describe() + "'");
226  }
227 
228  auto function_data = function_storage.find_function(func->name, func->number_args);
229  if (function_data.operation == FunctionStorage::Operation::None) {
230  throw_parser_error("unknown function " + func->name);
231  }
232  func->operation = function_data.operation;
233  if (function_data.operation == FunctionStorage::Operation::Callback) {
234  func->callback = function_data.callback;
235  }
236  arguments.emplace_back(func);
237 
238  // Variables
239  } else {
240  arguments.emplace_back(std::make_shared<DataNode>(static_cast<std::string>(tok.text), tok.text.data() - tmpl.content.c_str()));
241  }
242 
243  // Operators
244  } break;
245  case Token::Kind::Equal:
246  case Token::Kind::NotEqual:
247  case Token::Kind::GreaterThan:
248  case Token::Kind::GreaterEqual:
249  case Token::Kind::LessThan:
250  case Token::Kind::LessEqual:
251  case Token::Kind::Plus:
252  case Token::Kind::Minus:
253  case Token::Kind::Times:
254  case Token::Kind::Slash:
255  case Token::Kind::Power:
256  case Token::Kind::Percent:
257  case Token::Kind::Dot: {
258 
259  parse_operator:
260  FunctionStorage::Operation operation;
261  switch (tok.kind) {
262  case Token::Kind::Id: {
263  if (tok.text == "and") {
264  operation = FunctionStorage::Operation::And;
265  } else if (tok.text == "or") {
266  operation = FunctionStorage::Operation::Or;
267  } else if (tok.text == "in") {
268  operation = FunctionStorage::Operation::In;
269  } else if (tok.text == "not") {
270  operation = FunctionStorage::Operation::Not;
271  } else {
272  throw_parser_error("unknown operator in parser.");
273  }
274  } break;
275  case Token::Kind::Equal: {
276  operation = FunctionStorage::Operation::Equal;
277  } break;
278  case Token::Kind::NotEqual: {
279  operation = FunctionStorage::Operation::NotEqual;
280  } break;
281  case Token::Kind::GreaterThan: {
282  operation = FunctionStorage::Operation::Greater;
283  } break;
284  case Token::Kind::GreaterEqual: {
285  operation = FunctionStorage::Operation::GreaterEqual;
286  } break;
287  case Token::Kind::LessThan: {
288  operation = FunctionStorage::Operation::Less;
289  } break;
290  case Token::Kind::LessEqual: {
291  operation = FunctionStorage::Operation::LessEqual;
292  } break;
293  case Token::Kind::Plus: {
294  operation = FunctionStorage::Operation::Add;
295  } break;
296  case Token::Kind::Minus: {
297  operation = FunctionStorage::Operation::Subtract;
298  } break;
299  case Token::Kind::Times: {
300  operation = FunctionStorage::Operation::Multiplication;
301  } break;
302  case Token::Kind::Slash: {
303  operation = FunctionStorage::Operation::Division;
304  } break;
305  case Token::Kind::Power: {
306  operation = FunctionStorage::Operation::Power;
307  } break;
308  case Token::Kind::Percent: {
309  operation = FunctionStorage::Operation::Modulo;
310  } break;
311  case Token::Kind::Dot: {
312  operation = FunctionStorage::Operation::AtId;
313  } break;
314  default: {
315  throw_parser_error("unknown operator in parser.");
316  }
317  }
318  auto function_node = std::make_shared<FunctionNode>(operation, tok.text.data() - tmpl.content.c_str());
319 
320  while (!operator_stack.empty() &&
321  ((operator_stack.top()->precedence > function_node->precedence) ||
322  (operator_stack.top()->precedence == function_node->precedence && function_node->associativity == FunctionNode::Associativity::Left))) {
323  add_operator(arguments, operator_stack);
324  }
325 
326  operator_stack.emplace(function_node);
327  } break;
328  case Token::Kind::Comma: {
329  if (current_brace_level == 0 && current_bracket_level == 0) {
330  goto break_loop;
331  }
332  } break;
333  case Token::Kind::Colon: {
334  if (current_brace_level == 0 && current_bracket_level == 0) {
335  throw_parser_error("unexpected ':'");
336  }
337  } break;
338  case Token::Kind::LeftParen: {
339  get_next_token();
340  auto expr = parse_expression(tmpl);
341  if (tok.kind != Token::Kind::RightParen) {
342  throw_parser_error("expected right parenthesis, got '" + tok.describe() + "'");
343  }
344  if (!expr) {
345  throw_parser_error("empty expression in parentheses");
346  }
347  arguments.emplace_back(expr);
348  } break;
349  default:
350  goto break_loop;
351  }
352 
353  get_next_token();
354  }
355 
356  break_loop:
357  while (!operator_stack.empty()) {
358  add_operator(arguments, operator_stack);
359  }
360 
361  std::shared_ptr<ExpressionNode> expr;
362  if (arguments.size() == 1) {
363  expr = arguments[0];
364  arguments = {};
365  } else if (arguments.size() > 1) {
366  throw_parser_error("malformed expression");
367  }
368  return expr;
369  }
370 
371  bool parse_statement(Template& tmpl, Token::Kind closing, std::string_view path) {
372  if (tok.kind != Token::Kind::Id) {
373  return false;
374  }
375 
376  if (tok.text == static_cast<decltype(tok.text)>("if")) {
377  get_next_token();
378 
379  auto if_statement_node = std::make_shared<IfStatementNode>(current_block, tok.text.data() - tmpl.content.c_str());
380  current_block->nodes.emplace_back(if_statement_node);
381  if_statement_stack.emplace(if_statement_node.get());
382  current_block = &if_statement_node->true_statement;
383  current_expression_list = &if_statement_node->condition;
384 
385  if (!parse_expression(tmpl, closing)) {
386  return false;
387  }
388  } else if (tok.text == static_cast<decltype(tok.text)>("else")) {
389  if (if_statement_stack.empty()) {
390  throw_parser_error("else without matching if");
391  }
392  auto& if_statement_data = if_statement_stack.top();
393  get_next_token();
394 
395  if_statement_data->has_false_statement = true;
396  current_block = &if_statement_data->false_statement;
397 
398  // Chained else if
399  if (tok.kind == Token::Kind::Id && tok.text == static_cast<decltype(tok.text)>("if")) {
400  get_next_token();
401 
402  auto if_statement_node = std::make_shared<IfStatementNode>(true, current_block, tok.text.data() - tmpl.content.c_str());
403  current_block->nodes.emplace_back(if_statement_node);
404  if_statement_stack.emplace(if_statement_node.get());
405  current_block = &if_statement_node->true_statement;
406  current_expression_list = &if_statement_node->condition;
407 
408  if (!parse_expression(tmpl, closing)) {
409  return false;
410  }
411  }
412  } else if (tok.text == static_cast<decltype(tok.text)>("endif")) {
413  if (if_statement_stack.empty()) {
414  throw_parser_error("endif without matching if");
415  }
416 
417  // Nested if statements
418  while (if_statement_stack.top()->is_nested) {
419  if_statement_stack.pop();
420  }
421 
422  auto& if_statement_data = if_statement_stack.top();
423  get_next_token();
424 
425  current_block = if_statement_data->parent;
426  if_statement_stack.pop();
427  } else if (tok.text == static_cast<decltype(tok.text)>("block")) {
428  get_next_token();
429 
430  if (tok.kind != Token::Kind::Id) {
431  throw_parser_error("expected block name, got '" + tok.describe() + "'");
432  }
433 
434  const std::string block_name = static_cast<std::string>(tok.text);
435 
436  auto block_statement_node = std::make_shared<BlockStatementNode>(current_block, block_name, tok.text.data() - tmpl.content.c_str());
437  current_block->nodes.emplace_back(block_statement_node);
438  block_statement_stack.emplace(block_statement_node.get());
439  current_block = &block_statement_node->block;
440  auto success = tmpl.block_storage.emplace(block_name, block_statement_node);
441  if (!success.second) {
442  throw_parser_error("block with the name '" + block_name + "' does already exist");
443  }
444 
445  get_next_token();
446  } else if (tok.text == static_cast<decltype(tok.text)>("endblock")) {
447  if (block_statement_stack.empty()) {
448  throw_parser_error("endblock without matching block");
449  }
450 
451  auto& block_statement_data = block_statement_stack.top();
452  get_next_token();
453 
454  current_block = block_statement_data->parent;
455  block_statement_stack.pop();
456  } else if (tok.text == static_cast<decltype(tok.text)>("for")) {
457  get_next_token();
458 
459  // options: for a in arr; for a, b in obj
460  if (tok.kind != Token::Kind::Id) {
461  throw_parser_error("expected id, got '" + tok.describe() + "'");
462  }
463 
464  Token value_token = tok;
465  get_next_token();
466 
467  // Object type
468  std::shared_ptr<ForStatementNode> for_statement_node;
469  if (tok.kind == Token::Kind::Comma) {
470  get_next_token();
471  if (tok.kind != Token::Kind::Id) {
472  throw_parser_error("expected id, got '" + tok.describe() + "'");
473  }
474 
475  Token key_token = std::move(value_token);
476  value_token = tok;
477  get_next_token();
478 
479  for_statement_node = std::make_shared<ForObjectStatementNode>(static_cast<std::string>(key_token.text), static_cast<std::string>(value_token.text),
480  current_block, tok.text.data() - tmpl.content.c_str());
481 
482  // Array type
483  } else {
484  for_statement_node =
485  std::make_shared<ForArrayStatementNode>(static_cast<std::string>(value_token.text), current_block, tok.text.data() - tmpl.content.c_str());
486  }
487 
488  current_block->nodes.emplace_back(for_statement_node);
489  for_statement_stack.emplace(for_statement_node.get());
490  current_block = &for_statement_node->body;
491  current_expression_list = &for_statement_node->condition;
492 
493  if (tok.kind != Token::Kind::Id || tok.text != static_cast<decltype(tok.text)>("in")) {
494  throw_parser_error("expected 'in', got '" + tok.describe() + "'");
495  }
496  get_next_token();
497 
498  if (!parse_expression(tmpl, closing)) {
499  return false;
500  }
501  } else if (tok.text == static_cast<decltype(tok.text)>("endfor")) {
502  if (for_statement_stack.empty()) {
503  throw_parser_error("endfor without matching for");
504  }
505 
506  auto& for_statement_data = for_statement_stack.top();
507  get_next_token();
508 
509  current_block = for_statement_data->parent;
510  for_statement_stack.pop();
511  } else if (tok.text == static_cast<decltype(tok.text)>("include")) {
512  get_next_token();
513 
514  std::string template_name = parse_filename();
515  add_to_template_storage(path, template_name);
516 
517  current_block->nodes.emplace_back(std::make_shared<IncludeStatementNode>(template_name, tok.text.data() - tmpl.content.c_str()));
518 
519  get_next_token();
520  } else if (tok.text == static_cast<decltype(tok.text)>("extends")) {
521  get_next_token();
522 
523  std::string template_name = parse_filename();
524  add_to_template_storage(path, template_name);
525 
526  current_block->nodes.emplace_back(std::make_shared<ExtendsStatementNode>(template_name, tok.text.data() - tmpl.content.c_str()));
527 
528  get_next_token();
529  } else if (tok.text == static_cast<decltype(tok.text)>("set")) {
530  get_next_token();
531 
532  if (tok.kind != Token::Kind::Id) {
533  throw_parser_error("expected variable name, got '" + tok.describe() + "'");
534  }
535 
536  std::string key = static_cast<std::string>(tok.text);
537  get_next_token();
538 
539  auto set_statement_node = std::make_shared<SetStatementNode>(key, tok.text.data() - tmpl.content.c_str());
540  current_block->nodes.emplace_back(set_statement_node);
541  current_expression_list = &set_statement_node->expression;
542 
543  if (tok.text != static_cast<decltype(tok.text)>("=")) {
544  throw_parser_error("expected '=', got '" + tok.describe() + "'");
545  }
546  get_next_token();
547 
548  if (!parse_expression(tmpl, closing)) {
549  return false;
550  }
551  } else {
552  return false;
553  }
554  return true;
555  }
556 
557  void parse_into(Template& tmpl, std::string_view path) {
558  lexer.start(tmpl.content);
559  current_block = &tmpl.root;
560 
561  for (;;) {
562  get_next_token();
563  switch (tok.kind) {
564  case Token::Kind::Eof: {
565  if (!if_statement_stack.empty()) {
566  throw_parser_error("unmatched if");
567  }
568  if (!for_statement_stack.empty()) {
569  throw_parser_error("unmatched for");
570  }
571  }
572  return;
573  case Token::Kind::Text: {
574  current_block->nodes.emplace_back(std::make_shared<TextNode>(tok.text.data() - tmpl.content.c_str(), tok.text.size()));
575  } break;
576  case Token::Kind::StatementOpen: {
577  get_next_token();
578  if (!parse_statement(tmpl, Token::Kind::StatementClose, path)) {
579  throw_parser_error("expected statement, got '" + tok.describe() + "'");
580  }
581  if (tok.kind != Token::Kind::StatementClose) {
582  throw_parser_error("expected statement close, got '" + tok.describe() + "'");
583  }
584  } break;
585  case Token::Kind::LineStatementOpen: {
586  get_next_token();
587  if (!parse_statement(tmpl, Token::Kind::LineStatementClose, path)) {
588  throw_parser_error("expected statement, got '" + tok.describe() + "'");
589  }
590  if (tok.kind != Token::Kind::LineStatementClose && tok.kind != Token::Kind::Eof) {
591  throw_parser_error("expected line statement close, got '" + tok.describe() + "'");
592  }
593  } break;
594  case Token::Kind::ExpressionOpen: {
595  get_next_token();
596 
597  auto expression_list_node = std::make_shared<ExpressionListNode>(tok.text.data() - tmpl.content.c_str());
598  current_block->nodes.emplace_back(expression_list_node);
599  current_expression_list = expression_list_node.get();
600 
601  if (!parse_expression(tmpl, Token::Kind::ExpressionClose)) {
602  throw_parser_error("expected expression close, got '" + tok.describe() + "'");
603  }
604  } break;
605  case Token::Kind::CommentOpen: {
606  get_next_token();
607  if (tok.kind != Token::Kind::CommentClose) {
608  throw_parser_error("expected comment close, got '" + tok.describe() + "'");
609  }
610  } break;
611  default: {
612  throw_parser_error("unexpected token '" + tok.describe() + "'");
613  } break;
614  }
615  }
616  }
617 
618 public:
619  explicit Parser(const ParserConfig& parser_config, const LexerConfig& lexer_config, TemplateStorage& template_storage,
620  const FunctionStorage& function_storage)
621  : config(parser_config), lexer(lexer_config), template_storage(template_storage), function_storage(function_storage) {}
622 
623  Template parse(std::string_view input, std::string_view path) {
624  auto result = Template(static_cast<std::string>(input));
625  parse_into(result, path);
626  return result;
627  }
628 
629  void parse_into_template(Template& tmpl, std::string_view filename) {
630  std::string_view path = filename.substr(0, filename.find_last_of("/\\") + 1);
631 
632  // StringRef path = sys::path::parent_path(filename);
633  auto sub_parser = Parser(config, lexer.get_config(), template_storage, function_storage);
634  sub_parser.parse_into(tmpl, path);
635  }
636 
637  static std::string load_file(const std::string& filename) {
638  std::ifstream file;
639  file.open(filename);
640  if (file.fail()) {
641  INJA_THROW(FileError("failed accessing file at '" + filename + "'"));
642  }
643  std::string text((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
644  return text;
645  }
646 };
647 
648 } // namespace inja
649 
650 #endif // INCLUDE_INJA_PARSER_HPP_
Definition: node.hpp:66
Definition: node.hpp:251
Class for builtin functions and user-defined callbacks.
Definition: function_storage.hpp:16
Class for lexing an inja Template.
Definition: lexer.hpp:16
Class for parsing an inja Template.
Definition: parser.hpp:24
Definition: exceptions.hpp:36
Class for lexer configuration.
Definition: config.hpp:14
Class for parser configuration.
Definition: config.hpp:66
Definition: exceptions.hpp:28
The main inja Template.
Definition: template.hpp:17
Helper-class for the inja Lexer.
Definition: token.hpp:12