XACC
lexer.h
Go to the documentation of this file.
1 /*
2  * This file is part of Quantum++.
3  *
4  * MIT License
5  *
6  * Copyright (c) 2013 - 2020 Vlad Gheorghiu.
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a copy
9  * of this software and associated documentation files (the "Software"), to deal
10  * in the Software without restriction, including without limitation the rights
11  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12  * copies of the Software, and to permit persons to whom the Software is
13  * furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice shall be included in
16  * all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24  * SOFTWARE.
25  *
26  * Adapted from Bruno Schmitt's Tweedledee library
27  */
28 
34 #ifndef QASM_LEXER_H_
35 #define QASM_LEXER_H_
36 
37 namespace qpp {
38 namespace qasm {
39 
44 class Lexer {
45  public:
49  Lexer(const Lexer&) = delete;
50 
54  Lexer& operator=(const Lexer&) = delete;
55 
62  Lexer(std::shared_ptr<std::istream> buffer, const std::string& fname = "")
63  : loc_(fname, 1, 1), buf_(buffer) {}
64 
72  Token next_token() { return lex(); }
73 
74  private:
75  Location loc_;
76  std::shared_ptr<std::istream> buf_;
77 
83  void skip_char(int n = 1) {
84  buf_->ignore(n);
85  loc_.advance_column(n);
86  }
87 
93  bool skip_whitespace() {
94  int consumed = 0;
95 
96  while (buf_->peek() == ' ' || buf_->peek() == '\t') {
97  buf_->ignore();
98  ++consumed;
99  }
100 
101  loc_.advance_column(consumed);
102  if (consumed != 0)
103  return true;
104  else
105  return false;
106  }
107 
111  void skip_line_comment() {
112  int consumed = 0;
113 
114  while (buf_->peek() != 0 && buf_->peek() != '\n' &&
115  buf_->peek() != '\r') {
116  buf_->ignore();
117  ++consumed;
118  }
119 
120  loc_.advance_column(consumed);
121  }
122 
131  Token lex_numeric_constant(Location tok_start) {
132  std::string str;
133  str.reserve(64); // Reserve space to avoid reallocation
134 
135  while (std::isdigit(buf_->peek())) {
136  str.push_back(buf_->peek());
137  skip_char();
138  }
139 
140  if (buf_->peek() != '.') {
141  return Token(tok_start, Token::Kind::nninteger, str);
142  }
143 
144  // lex decimal part
145  str.push_back(buf_->peek());
146  skip_char();
147  while (std::isdigit(buf_->peek())) {
148  str.push_back(buf_->peek());
149  skip_char();
150  }
151 
152  return Token(tok_start, Token::Kind::real, str);
153  }
154 
163  Token lex_identifier(Location tok_start) {
164  std::string str;
165  str.reserve(64); // Reserve space to avoid reallocation
166 
167  while (std::isalpha(buf_->peek()) || std::isdigit(buf_->peek()) ||
168  buf_->peek() == '_') {
169  str.push_back(buf_->peek());
170  skip_char();
171  }
172 
173  // Check if the identifier is a known keyword
174  auto keyword = keywords.find(str);
175  if (keyword != keywords.end()) {
176  return Token(tok_start, keyword->second, str);
177  }
178 
179  return Token(tok_start, Token::Kind::identifier, str);
180  }
181 
190  Token lex_string(Location tok_start) {
191  std::string str;
192  str.reserve(64); // Reserve space to avoid reallocation
193 
194  while (buf_->peek() != '"' && buf_->peek() != '\n' &&
195  buf_->peek() != '\r') {
196  str.push_back(buf_->peek());
197  skip_char();
198  }
199 
200  if (buf_->peek() != '"') {
201  std::cerr << "Unmatched \", strings must on the same line\n";
202  return Token(tok_start, Token::Kind::unknown, str);
203  }
204 
205  skip_char();
206  return Token(tok_start, Token::Kind::string, str);
207  }
208 
216  Token lex() {
217  Location tok_start = loc_;
218  skip_whitespace();
219 
220  switch (buf_->peek()) {
221  case EOF:
222  skip_char();
223  return Token(tok_start, Token::Kind::eof, "");
224 
225  case '\r':
226  skip_char();
227  if (buf_->peek() != '\n') {
228  buf_->ignore();
229  loc_.advance_line();
230  return lex();
231  }
232  // FALLTHROUGH
233  case '\n':
234  buf_->ignore();
235  loc_.advance_line();
236  return lex();
237 
238  case '/':
239  skip_char();
240  if (buf_->peek() == '/') {
241  skip_line_comment();
242  return lex();
243  }
244  return Token(tok_start, Token::Kind::slash, "/");
245 
246  // clang-format off
247  case '0': case '1': case '2': case '3': case '4':
248  case '5': case '6': case '7': case '8': case '9':
249  return lex_numeric_constant(tok_start);
250  // clang-format on
251 
252  case 'C':
253  skip_char();
254  if (buf_->peek() == 'X') {
255  skip_char();
256  return Token(tok_start, Token::Kind::kw_cx, "CX");
257  }
258 
259  loc_.advance_column();
260  return Token(tok_start, Token::Kind::unknown,
261  std::string({'C', (char) buf_->get()}));
262 
263  case 'U':
264  skip_char();
265  return Token(tok_start, Token::Kind::kw_u, "U");
266 
267  // clang-format off
268  case 'O':
269  case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
270  case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
271  case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
272  case 'v': case 'w': case 'x': case 'y': case 'z':
273  return lex_identifier(tok_start);
274  // clang-format on
275 
276  case '[':
277  skip_char();
278  return Token(tok_start, Token::Kind::l_square, "[");
279 
280  case ']':
281  skip_char();
282  return Token(tok_start, Token::Kind::r_square, "]");
283 
284  case '(':
285  skip_char();
286  return Token(tok_start, Token::Kind::l_paren, "(");
287 
288  case ')':
289  skip_char();
290  return Token(tok_start, Token::Kind::r_paren, ")");
291 
292  case '{':
293  skip_char();
294  return Token(tok_start, Token::Kind::l_brace, "{");
295 
296  case '}':
297  skip_char();
298  return Token(tok_start, Token::Kind::r_brace, "}");
299 
300  case '*':
301  skip_char();
302  return Token(tok_start, Token::Kind::star, "*");
303 
304  case '+':
305  skip_char();
306  return Token(tok_start, Token::Kind::plus, "+");
307 
308  case '-':
309  skip_char();
310 
311  if (buf_->peek() == '>') {
312  skip_char();
313  return Token(tok_start, Token::Kind::arrow, "->");
314  }
315 
316  return Token(tok_start, Token::Kind::minus, "-");
317 
318  case '^':
319  skip_char();
320  return Token(tok_start, Token::Kind::caret, "^");
321 
322  case ';':
323  skip_char();
324  return Token(tok_start, Token::Kind::semicolon, ";");
325 
326  case '=':
327  skip_char();
328  if (buf_->peek() == '=') {
329  skip_char();
330  return Token(tok_start, Token::Kind::equalequal, "==");
331  }
332 
333  loc_.advance_column();
334  return Token(tok_start, Token::Kind::unknown,
335  std::string({'=', (char) buf_->get()}));
336 
337  case ',':
338  skip_char();
339  return Token(tok_start, Token::Kind::comma, ";");
340 
341  case '"':
342  skip_char();
343  return lex_string(tok_start);
344 
345  default:
346  loc_.advance_column();
347  return Token(tok_start, Token::Kind::unknown,
348  std::string({(char) buf_->get()}));
349  }
350  }
351 };
352 
353 } /* namespace qasm */
354 } /* namespace qpp */
355 
356 #endif /* QASM_LEXER_H_ */
Quantum++ main namespace.
Definition: circuits.h:35