1#ifndef INCLUDE_INJA_LEXER_HPP_
2#define INCLUDE_INJA_LEXER_HPP_
20 ExpressionStartForceLstrip,
25 StatementStartNoLstrip,
26 StatementStartForceLstrip,
29 CommentStartForceLstrip,
33 enum class MinusState {
41 MinusState minus_state;
42 nonstd::string_view m_in;
47 Token scan_body(nonstd::string_view close, Token::Kind closeKind, nonstd::string_view close_trim = nonstd::string_view(),
bool trim =
false) {
50 if (tok_start >= m_in.size()) {
51 return make_token(Token::Kind::Eof);
53 const char ch = m_in[tok_start];
54 if (ch ==
' ' || ch ==
'\t' || ch ==
'\r') {
60 if (!close_trim.empty() && inja::string_view::starts_with(m_in.substr(tok_start), close_trim)) {
62 pos = tok_start + close_trim.size();
63 const Token tok = make_token(closeKind);
64 skip_whitespaces_and_newlines();
68 if (inja::string_view::starts_with(m_in.substr(tok_start), close)) {
70 pos = tok_start + close.size();
71 const Token tok = make_token(closeKind);
73 skip_whitespaces_and_first_newline();
85 if (std::isalpha(ch)) {
86 minus_state = MinusState::Operator;
90 const MinusState current_minus_state = minus_state;
91 if (minus_state == MinusState::Operator) {
92 minus_state = MinusState::Number;
97 return make_token(Token::Kind::Plus);
99 if (current_minus_state == MinusState::Operator) {
100 return make_token(Token::Kind::Minus);
102 return scan_number();
104 return make_token(Token::Kind::Times);
106 return make_token(Token::Kind::Slash);
108 return make_token(Token::Kind::Power);
110 return make_token(Token::Kind::Percent);
112 return make_token(Token::Kind::Dot);
114 return make_token(Token::Kind::Comma);
116 return make_token(Token::Kind::Colon);
118 return make_token(Token::Kind::LeftParen);
120 minus_state = MinusState::Operator;
121 return make_token(Token::Kind::RightParen);
123 return make_token(Token::Kind::LeftBracket);
125 minus_state = MinusState::Operator;
126 return make_token(Token::Kind::RightBracket);
128 return make_token(Token::Kind::LeftBrace);
130 minus_state = MinusState::Operator;
131 return make_token(Token::Kind::RightBrace);
133 if (pos < m_in.size() && m_in[pos] ==
'=') {
135 return make_token(Token::Kind::GreaterEqual);
137 return make_token(Token::Kind::GreaterThan);
139 if (pos < m_in.size() && m_in[pos] ==
'=') {
141 return make_token(Token::Kind::LessEqual);
143 return make_token(Token::Kind::LessThan);
145 if (pos < m_in.size() && m_in[pos] ==
'=') {
147 return make_token(Token::Kind::Equal);
149 return make_token(Token::Kind::Unknown);
151 if (pos < m_in.size() && m_in[pos] ==
'=') {
153 return make_token(Token::Kind::NotEqual);
155 return make_token(Token::Kind::Unknown);
157 return scan_string();
168 minus_state = MinusState::Operator;
169 return scan_number();
173 minus_state = MinusState::Operator;
176 return make_token(Token::Kind::Unknown);
182 if (pos >= m_in.size()) {
185 const char ch = m_in[pos];
186 if (!std::isalnum(ch) && ch !=
'.' && ch !=
'/' && ch !=
'_' && ch !=
'-') {
191 return make_token(Token::Kind::Id);
194 Token scan_number() {
196 if (pos >= m_in.size()) {
199 const char ch = m_in[pos];
201 if (!std::isdigit(ch) && ch !=
'.' && ch !=
'e' && ch !=
'E' && ch !=
'+' && ch !=
'-') {
206 return make_token(Token::Kind::Number);
209 Token scan_string() {
212 if (pos >= m_in.size()) {
215 const char ch = m_in[pos++];
218 }
else if (!escape && ch == m_in[tok_start]) {
224 return make_token(Token::Kind::String);
227 Token make_token(Token::Kind kind)
const {
return Token(kind, string_view::slice(m_in, tok_start, pos)); }
229 void skip_whitespaces_and_newlines() {
230 if (pos < m_in.size()) {
231 while (pos < m_in.size() && (m_in[pos] ==
' ' || m_in[pos] ==
'\t' || m_in[pos] ==
'\n' || m_in[pos] ==
'\r')) {
237 void skip_whitespaces_and_first_newline() {
238 if (pos < m_in.size()) {
239 while (pos < m_in.size() && (m_in[pos] ==
' ' || m_in[pos] ==
'\t')) {
244 if (pos < m_in.size()) {
245 const char ch = m_in[pos];
248 }
else if (ch ==
'\r') {
250 if (pos < m_in.size() && m_in[pos] ==
'\n') {
257 static nonstd::string_view clear_final_line_if_whitespace(nonstd::string_view text) {
258 nonstd::string_view result = text;
259 while (!result.empty()) {
260 const char ch = result.back();
261 if (ch ==
' ' || ch ==
'\t') {
262 result.remove_suffix(1);
263 }
else if (ch ==
'\n' || ch ==
'\r') {
273 explicit Lexer(
const LexerConfig &config) : config(config), state(State::Text), minus_state(MinusState::Number) {}
276 return get_source_location(m_in, tok_start);
279 void start(nonstd::string_view input) {
284 minus_state = MinusState::Number;
287 if (inja::string_view::starts_with(m_in,
"\xEF\xBB\xBF")) {
288 m_in = m_in.substr(3);
296 if (tok_start >= m_in.size()) {
297 return make_token(Token::Kind::Eof);
304 const size_t open_start = m_in.substr(pos).find_first_of(config.open_chars);
305 if (open_start == nonstd::string_view::npos) {
308 return make_token(Token::Kind::Text);
313 nonstd::string_view open_str = m_in.substr(pos);
314 bool must_lstrip =
false;
315 if (inja::string_view::starts_with(open_str, config.expression_open)) {
316 if (inja::string_view::starts_with(open_str, config.expression_open_force_lstrip)) {
317 state = State::ExpressionStartForceLstrip;
320 state = State::ExpressionStart;
322 }
else if (inja::string_view::starts_with(open_str, config.statement_open)) {
323 if (inja::string_view::starts_with(open_str, config.statement_open_no_lstrip)) {
324 state = State::StatementStartNoLstrip;
325 }
else if (inja::string_view::starts_with(open_str, config.statement_open_force_lstrip )) {
326 state = State::StatementStartForceLstrip;
329 state = State::StatementStart;
330 must_lstrip = config.lstrip_blocks;
332 }
else if (inja::string_view::starts_with(open_str, config.comment_open)) {
333 if (inja::string_view::starts_with(open_str, config.comment_open_force_lstrip)) {
334 state = State::CommentStartForceLstrip;
337 state = State::CommentStart;
338 must_lstrip = config.lstrip_blocks;
340 }
else if ((pos == 0 || m_in[pos - 1] ==
'\n') && inja::string_view::starts_with(open_str, config.line_statement)) {
341 state = State::LineStart;
347 nonstd::string_view text = string_view::slice(m_in, tok_start, pos);
349 text = clear_final_line_if_whitespace(text);
355 return Token(Token::Kind::Text, text);
357 case State::ExpressionStart: {
358 state = State::ExpressionBody;
359 pos += config.expression_open.size();
360 return make_token(Token::Kind::ExpressionOpen);
362 case State::ExpressionStartForceLstrip: {
363 state = State::ExpressionBody;
364 pos += config.expression_open_force_lstrip.size();
365 return make_token(Token::Kind::ExpressionOpen);
367 case State::LineStart: {
368 state = State::LineBody;
369 pos += config.line_statement.size();
370 return make_token(Token::Kind::LineStatementOpen);
372 case State::StatementStart: {
373 state = State::StatementBody;
374 pos += config.statement_open.size();
375 return make_token(Token::Kind::StatementOpen);
377 case State::StatementStartNoLstrip: {
378 state = State::StatementBody;
379 pos += config.statement_open_no_lstrip.size();
380 return make_token(Token::Kind::StatementOpen);
382 case State::StatementStartForceLstrip: {
383 state = State::StatementBody;
384 pos += config.statement_open_force_lstrip.size();
385 return make_token(Token::Kind::StatementOpen);
387 case State::CommentStart: {
388 state = State::CommentBody;
389 pos += config.comment_open.size();
390 return make_token(Token::Kind::CommentOpen);
392 case State::CommentStartForceLstrip: {
393 state = State::CommentBody;
394 pos += config.comment_open_force_lstrip.size();
395 return make_token(Token::Kind::CommentOpen);
397 case State::ExpressionBody:
398 return scan_body(config.expression_close, Token::Kind::ExpressionClose, config.expression_close_force_rstrip);
399 case State::LineBody:
400 return scan_body(
"\n", Token::Kind::LineStatementClose);
401 case State::StatementBody:
402 return scan_body(config.statement_close, Token::Kind::StatementClose, config.statement_close_force_rstrip, config.trim_blocks);
403 case State::CommentBody: {
405 const size_t end = m_in.substr(pos).find(config.comment_close);
406 if (end == nonstd::string_view::npos) {
408 return make_token(Token::Kind::Eof);
412 const bool must_rstrip = inja::string_view::starts_with(m_in.substr(pos + end - 1), config.comment_close_force_rstrip);
416 pos += end + config.comment_close.size();
417 Token tok = make_token(Token::Kind::CommentClose);
419 if (must_rstrip || config.trim_blocks) {
420 skip_whitespaces_and_first_newline();
Class for lexing an inja Template.
Definition: lexer.hpp:16
Class for lexer configuration.
Definition: config.hpp:14
Definition: exceptions.hpp:9
Helper-class for the inja Lexer.
Definition: token.hpp:13