Ninja
lexer.in.cc
Go to the documentation of this file.
1 // Copyright 2011 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "lexer.h"
16 
17 #include <stdio.h>
18 
19 #include "eval_env.h"
20 #include "util.h"
21 
22 using namespace std;
23 
24 bool Lexer::Error(const string& message, string* err) {
25  // Compute line/column.
26  int line = 1;
27  const char* line_start = input_.str_;
28  for (const char* p = input_.str_; p < last_token_; ++p) {
29  if (*p == '\n') {
30  ++line;
31  line_start = p + 1;
32  }
33  }
34  int col = last_token_ ? (int)(last_token_ - line_start) : 0;
35 
36  char buf[1024];
37  snprintf(buf, sizeof(buf), "%s:%d: ", filename_.AsString().c_str(), line);
38  *err = buf;
39  *err += message + "\n";
40 
41  // Add some context to the message.
42  const int kTruncateColumn = 72;
43  if (col > 0 && col < kTruncateColumn) {
44  int len;
45  bool truncated = true;
46  for (len = 0; len < kTruncateColumn; ++len) {
47  if (line_start[len] == 0 || line_start[len] == '\n') {
48  truncated = false;
49  break;
50  }
51  }
52  *err += string(line_start, len);
53  if (truncated)
54  *err += "...";
55  *err += "\n";
56  *err += string(col, ' ');
57  *err += "^ near here";
58  }
59 
60  return false;
61 }
62 
63 Lexer::Lexer(const char* input) {
64  Start("input", input);
65 }
66 
67 void Lexer::Start(StringPiece filename, StringPiece input) {
68  filename_ = filename;
69  input_ = input;
70  ofs_ = input_.str_;
71  last_token_ = NULL;
72 }
73 
74 const char* Lexer::TokenName(Token t) {
75  switch (t) {
76  case ERROR: return "lexing error";
77  case BUILD: return "'build'";
78  case COLON: return "':'";
79  case DEFAULT: return "'default'";
80  case EQUALS: return "'='";
81  case IDENT: return "identifier";
82  case INCLUDE: return "'include'";
83  case INDENT: return "indent";
84  case NEWLINE: return "newline";
85  case PIPE2: return "'||'";
86  case PIPE: return "'|'";
87  case PIPEAT: return "'|@'";
88  case POOL: return "'pool'";
89  case RULE: return "'rule'";
90  case SUBNINJA: return "'subninja'";
91  case TEOF: return "eof";
92  }
93  return NULL; // not reached
94 }
95 
96 const char* Lexer::TokenErrorHint(Token expected) {
97  switch (expected) {
98  case COLON:
99  return " ($ also escapes ':')";
100  default:
101  return "";
102  }
103 }
104 
105 string Lexer::DescribeLastError() {
106  if (last_token_) {
107  switch (last_token_[0]) {
108  case '\t':
109  return "tabs are not allowed, use spaces";
110  }
111  }
112  return "lexing error";
113 }
114 
115 void Lexer::UnreadToken() {
116  ofs_ = last_token_;
117 }
118 
120  const char* p = ofs_;
121  const char* q;
122  const char* start;
123  Lexer::Token token;
124  for (;;) {
125  start = p;
126  /*!re2c
127  re2c:define:YYCTYPE = "unsigned char";
128  re2c:define:YYCURSOR = p;
129  re2c:define:YYMARKER = q;
130  re2c:yyfill:enable = 0;
131 
132  nul = "\000";
133  simple_varname = [a-zA-Z0-9_-]+;
134  varname = [a-zA-Z0-9_.-]+;
135 
136  [ ]*"#"[^\000\n]*"\n" { continue; }
137  [ ]*"\r\n" { token = NEWLINE; break; }
138  [ ]*"\n" { token = NEWLINE; break; }
139  [ ]+ { token = INDENT; break; }
140  "build" { token = BUILD; break; }
141  "pool" { token = POOL; break; }
142  "rule" { token = RULE; break; }
143  "default" { token = DEFAULT; break; }
144  "=" { token = EQUALS; break; }
145  ":" { token = COLON; break; }
146  "|@" { token = PIPEAT; break; }
147  "||" { token = PIPE2; break; }
148  "|" { token = PIPE; break; }
149  "include" { token = INCLUDE; break; }
150  "subninja" { token = SUBNINJA; break; }
151  varname { token = IDENT; break; }
152  nul { token = TEOF; break; }
153  [^] { token = ERROR; break; }
154  */
155  }
156 
157  last_token_ = start;
158  ofs_ = p;
159  if (token != NEWLINE && token != TEOF)
160  EatWhitespace();
161  return token;
162 }
163 
164 bool Lexer::PeekToken(Token token) {
165  Token t = ReadToken();
166  if (t == token)
167  return true;
168  UnreadToken();
169  return false;
170 }
171 
172 void Lexer::EatWhitespace() {
173  const char* p = ofs_;
174  const char* q;
175  for (;;) {
176  ofs_ = p;
177  /*!re2c
178  [ ]+ { continue; }
179  "$\r\n" { continue; }
180  "$\n" { continue; }
181  nul { break; }
182  [^] { break; }
183  */
184  }
185 }
186 
187 bool Lexer::ReadIdent(string* out) {
188  const char* p = ofs_;
189  const char* start;
190  for (;;) {
191  start = p;
192  /*!re2c
193  varname {
194  out->assign(start, p - start);
195  break;
196  }
197  [^] {
198  last_token_ = start;
199  return false;
200  }
201  */
202  }
203  last_token_ = start;
204  ofs_ = p;
205  EatWhitespace();
206  return true;
207 }
208 
209 bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) {
210  const char* p = ofs_;
211  const char* q;
212  const char* start;
213  for (;;) {
214  start = p;
215  /*!re2c
216  [^$ :\r\n|\000]+ {
217  eval->AddText(StringPiece(start, p - start));
218  continue;
219  }
220  "\r\n" {
221  if (path)
222  p = start;
223  break;
224  }
225  [ :|\n] {
226  if (path) {
227  p = start;
228  break;
229  } else {
230  if (*start == '\n')
231  break;
232  eval->AddText(StringPiece(start, 1));
233  continue;
234  }
235  }
236  "$$" {
237  eval->AddText(StringPiece("$", 1));
238  continue;
239  }
240  "$ " {
241  eval->AddText(StringPiece(" ", 1));
242  continue;
243  }
244  "$\r\n"[ ]* {
245  continue;
246  }
247  "$\n"[ ]* {
248  continue;
249  }
250  "${"varname"}" {
251  eval->AddSpecial(StringPiece(start + 2, p - start - 3));
252  continue;
253  }
254  "$"simple_varname {
255  eval->AddSpecial(StringPiece(start + 1, p - start - 1));
256  continue;
257  }
258  "$:" {
259  eval->AddText(StringPiece(":", 1));
260  continue;
261  }
262  "$". {
263  last_token_ = start;
264  return Error("bad $-escape (literal $ must be written as $$)", err);
265  }
266  nul {
267  last_token_ = start;
268  return Error("unexpected EOF", err);
269  }
270  [^] {
271  last_token_ = start;
272  return Error(DescribeLastError(), err);
273  }
274  */
275  }
276  last_token_ = start;
277  ofs_ = p;
278  if (path)
279  EatWhitespace();
280  // Non-path strings end in newlines, so there's no whitespace to eat.
281  return true;
282 }
Definition: hash_map.h:26
A tokenized string that contains variable references.
Definition: eval_env.h:35
static const char * TokenName(Token t)
Return a human-readable form of a token, used in error messages.
Definition: lexer.cc:75
Token
Definition: lexer.h:32
bool PeekToken(Token token)
If the next token is token, read it and return true.
Definition: lexer.cc:463
Lexer()
Definition: lexer.h:28
std::string DescribeLastError()
If the last token read was an ERROR token, provide more info or the empty string.
Definition: lexer.cc:106
void UnreadToken()
Rewind to the last read Token.
Definition: lexer.cc:116
bool ReadEvalString(EvalString *eval, bool path, std::string *err)
Read a $-escaped string.
Definition: lexer.cc:623
void Start(StringPiece filename, StringPiece input)
Start parsing some input.
Definition: lexer.cc:68
Token ReadToken()
Read a Token from the Token enum.
Definition: lexer.cc:120
static const char * TokenErrorHint(Token expected)
Return a human-readable token hint, used in error messages.
Definition: lexer.cc:97
void EatWhitespace()
Skip past whitespace (called after each read token/ident/etc.).
Definition: lexer.cc:471
bool ReadIdent(std::string *out)
Read a simple identifier (a rule or variable name).
Definition: lexer.cc:554
bool Error(const std::string &message, std::string *err)
Construct an error message with context.
Definition: lexer.cc:25
StringPiece represents a slice of a string whose memory is managed externally.
Definition: string_piece.h:25
const char * str_
Definition: string_piece.h:70