Ninja
depfile_parser.cc
Go to the documentation of this file.
1 /* Generated by re2c */
2 // Copyright 2011 Google Inc. All Rights Reserved.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 #include "depfile_parser.h"
17 #include "util.h"
18 
19 #include <algorithm>
20 
21 using namespace std;
22 
24  : options_(options)
25 {
26 }
27 
28 // A note on backslashes in Makefiles, from reading the docs:
29 // Backslash-newline is the line continuation character.
30 // Backslash-# escapes a # (otherwise meaningful as a comment start).
31 // Backslash-% escapes a % (otherwise meaningful as a special).
32 // Finally, quoting the GNU manual, "Backslashes that are not in danger
33 // of quoting ‘%’ characters go unmolested."
34 // How do you end a line with a backslash? The netbsd Make docs suggest
35 // reading the result of a shell command echoing a backslash!
36 //
37 // Rather than implement all of above, we follow what GCC/Clang produces:
38 // Backslashes escape a space or hash sign.
39 // When a space is preceded by 2N+1 backslashes, it is represents N backslashes
40 // followed by space.
41 // When a space is preceded by 2N backslashes, it represents 2N backslashes at
42 // the end of a filename.
43 // A hash sign is escaped by a single backslash. All other backslashes remain
44 // unchanged.
45 //
46 // If anyone actually has depfiles that rely on the more complicated
47 // behavior we can adjust this.
48 bool DepfileParser::Parse(string* content, string* err) {
49  // in: current parser input point.
50  // end: end of input.
51  // parsing_targets: whether we are parsing targets or dependencies.
52  char* in = &(*content)[0];
53  char* end = in + content->size();
54  bool have_target = false;
55  bool parsing_targets = true;
56  bool poisoned_input = false;
57  bool is_empty = true;
58  while (in < end) {
59  bool have_newline = false;
60  // out: current output point (typically same as in, but can fall behind
61  // as we de-escape backslashes).
62  char* out = in;
63  // filename: start of the current parsed filename.
64  char* filename = out;
65  for (;;) {
66  // start: beginning of the current parsed span.
67  const char* start = in;
68  char* yymarker = NULL;
69 
70  {
71  unsigned char yych;
72  static const unsigned char yybm[] = {
73  0, 0, 0, 0, 0, 0, 0, 0,
74  0, 0, 0, 0, 0, 0, 0, 0,
75  0, 0, 0, 0, 0, 0, 0, 0,
76  0, 0, 0, 0, 0, 0, 0, 0,
77  0, 128, 128, 0, 0, 128, 128, 128,
78  128, 128, 0, 128, 128, 128, 128, 128,
79  128, 128, 128, 128, 128, 128, 128, 128,
80  128, 128, 128, 0, 0, 128, 0, 128,
81  128, 128, 128, 128, 128, 128, 128, 128,
82  128, 128, 128, 128, 128, 128, 128, 128,
83  128, 128, 128, 128, 128, 128, 128, 128,
84  128, 128, 128, 128, 0, 128, 0, 128,
85  0, 128, 128, 128, 128, 128, 128, 128,
86  128, 128, 128, 128, 128, 128, 128, 128,
87  128, 128, 128, 128, 128, 128, 128, 128,
88  128, 128, 128, 128, 0, 128, 128, 0,
89  128, 128, 128, 128, 128, 128, 128, 128,
90  128, 128, 128, 128, 128, 128, 128, 128,
91  128, 128, 128, 128, 128, 128, 128, 128,
92  128, 128, 128, 128, 128, 128, 128, 128,
93  128, 128, 128, 128, 128, 128, 128, 128,
94  128, 128, 128, 128, 128, 128, 128, 128,
95  128, 128, 128, 128, 128, 128, 128, 128,
96  128, 128, 128, 128, 128, 128, 128, 128,
97  128, 128, 128, 128, 128, 128, 128, 128,
98  128, 128, 128, 128, 128, 128, 128, 128,
99  128, 128, 128, 128, 128, 128, 128, 128,
100  128, 128, 128, 128, 128, 128, 128, 128,
101  128, 128, 128, 128, 128, 128, 128, 128,
102  128, 128, 128, 128, 128, 128, 128, 128,
103  128, 128, 128, 128, 128, 128, 128, 128,
104  128, 128, 128, 128, 128, 128, 128, 128,
105  };
106  yych = *in;
107  if (yybm[0+yych] & 128) {
108  goto yy5;
109  }
110  if (yych <= '\r') {
111  if (yych <= '\t') {
112  if (yych >= 0x01) goto yy1;
113  } else {
114  if (yych <= '\n') goto yy3;
115  if (yych <= '\f') goto yy1;
116  goto yy4;
117  }
118  } else {
119  if (yych <= '$') {
120  if (yych <= '#') goto yy1;
121  goto yy7;
122  } else {
123  if (yych <= '>') goto yy1;
124  if (yych <= '\\') goto yy8;
125  goto yy1;
126  }
127  }
128  ++in;
129  {
130  break;
131  }
132 yy1:
133  ++in;
134 yy2:
135  {
136  // For any other character (e.g. whitespace), swallow it here,
137  // allowing the outer logic to loop around again.
138  break;
139  }
140 yy3:
141  ++in;
142  {
143  // A newline ends the current file name and the current rule.
144  have_newline = true;
145  break;
146  }
147 yy4:
148  yych = *++in;
149  if (yych == '\n') goto yy3;
150  goto yy2;
151 yy5:
152  yych = *++in;
153  if (yybm[0+yych] & 128) {
154  goto yy5;
155  }
156 yy6:
157  {
158  // Got a span of plain text.
159  int len = (int)(in - start);
160  // Need to shift it over if we're overwriting backslashes.
161  if (out < start)
162  memmove(out, start, len);
163  out += len;
164  continue;
165  }
166 yy7:
167  yych = *++in;
168  if (yych == '$') goto yy9;
169  goto yy2;
170 yy8:
171  yych = *(yymarker = ++in);
172  if (yych <= ' ') {
173  if (yych <= '\n') {
174  if (yych <= 0x00) goto yy2;
175  if (yych <= '\t') goto yy10;
176  goto yy11;
177  } else {
178  if (yych == '\r') goto yy12;
179  if (yych <= 0x1F) goto yy10;
180  goto yy13;
181  }
182  } else {
183  if (yych <= '9') {
184  if (yych == '#') goto yy14;
185  goto yy10;
186  } else {
187  if (yych <= ':') goto yy15;
188  if (yych == '\\') goto yy17;
189  goto yy10;
190  }
191  }
192 yy9:
193  ++in;
194  {
195  // De-escape dollar character.
196  *out++ = '$';
197  continue;
198  }
199 yy10:
200  ++in;
201  goto yy6;
202 yy11:
203  ++in;
204  {
205  // A line continuation ends the current file name.
206  break;
207  }
208 yy12:
209  yych = *++in;
210  if (yych == '\n') goto yy11;
211  in = yymarker;
212  goto yy2;
213 yy13:
214  ++in;
215  {
216  // 2N+1 backslashes plus space -> N backslashes plus space.
217  int len = (int)(in - start);
218  int n = len / 2 - 1;
219  if (out < start)
220  memset(out, '\\', n);
221  out += n;
222  *out++ = ' ';
223  continue;
224  }
225 yy14:
226  ++in;
227  {
228  // De-escape hash sign, but preserve other leading backslashes.
229  int len = (int)(in - start);
230  if (len > 2 && out < start)
231  memset(out, '\\', len - 2);
232  out += len - 2;
233  *out++ = '#';
234  continue;
235  }
236 yy15:
237  yych = *++in;
238  if (yych <= '\f') {
239  if (yych <= 0x00) goto yy18;
240  if (yych <= 0x08) goto yy16;
241  if (yych <= '\n') goto yy18;
242  } else {
243  if (yych <= '\r') goto yy18;
244  if (yych == ' ') goto yy18;
245  }
246 yy16:
247  {
248  // De-escape colon sign, but preserve other leading backslashes.
249  // Regular expression uses lookahead to make sure that no whitespace
250  // nor EOF follows. In that case it'd be the : at the end of a target
251  int len = (int)(in - start);
252  if (len > 2 && out < start)
253  memset(out, '\\', len - 2);
254  out += len - 2;
255  *out++ = ':';
256  continue;
257  }
258 yy17:
259  yych = *++in;
260  if (yych <= ' ') {
261  if (yych <= '\n') {
262  if (yych <= 0x00) goto yy6;
263  if (yych <= '\t') goto yy10;
264  goto yy6;
265  } else {
266  if (yych == '\r') goto yy6;
267  if (yych <= 0x1F) goto yy10;
268  goto yy19;
269  }
270  } else {
271  if (yych <= '9') {
272  if (yych == '#') goto yy14;
273  goto yy10;
274  } else {
275  if (yych <= ':') goto yy15;
276  if (yych == '\\') goto yy20;
277  goto yy10;
278  }
279  }
280 yy18:
281  ++in;
282  {
283  // Backslash followed by : and whitespace.
284  // It is therefore normal text and not an escaped colon
285  int len = (int)(in - start - 1);
286  // Need to shift it over if we're overwriting backslashes.
287  if (out < start)
288  memmove(out, start, len);
289  out += len;
290  if (*(in - 1) == '\n')
291  have_newline = true;
292  break;
293  }
294 yy19:
295  ++in;
296  {
297  // 2N backslashes plus space -> 2N backslashes, end of filename.
298  int len = (int)(in - start);
299  if (out < start)
300  memset(out, '\\', len - 1);
301  out += len - 1;
302  break;
303  }
304 yy20:
305  yych = *++in;
306  if (yych <= ' ') {
307  if (yych <= '\n') {
308  if (yych <= 0x00) goto yy6;
309  if (yych <= '\t') goto yy10;
310  goto yy6;
311  } else {
312  if (yych == '\r') goto yy6;
313  if (yych <= 0x1F) goto yy10;
314  goto yy13;
315  }
316  } else {
317  if (yych <= '9') {
318  if (yych == '#') goto yy14;
319  goto yy10;
320  } else {
321  if (yych <= ':') goto yy15;
322  if (yych == '\\') goto yy17;
323  goto yy10;
324  }
325  }
326  }
327 
328  }
329 
330  int len = (int)(out - filename);
331  const bool is_dependency = !parsing_targets;
332  if (len > 0 && filename[len - 1] == ':') {
333  len--; // Strip off trailing colon, if any.
334  parsing_targets = false;
335  have_target = true;
336  }
337 
338  if (len > 0) {
339  is_empty = false;
340  StringPiece piece = StringPiece(filename, len);
341  // If we've seen this as an input before, skip it.
342  std::vector<StringPiece>::iterator pos = std::find(ins_.begin(), ins_.end(), piece);
343  if (pos == ins_.end()) {
344  if (is_dependency) {
345  if (poisoned_input) {
346  *err = "inputs may not also have inputs";
347  return false;
348  }
349  // New input.
350  ins_.push_back(piece);
351  } else {
352  // Check for a new output.
353  if (std::find(outs_.begin(), outs_.end(), piece) == outs_.end())
354  outs_.push_back(piece);
355  }
356  } else if (!is_dependency) {
357  // We've passed an input on the left side; reject new inputs.
358  poisoned_input = true;
359  }
360  }
361 
362  if (have_newline) {
363  // A newline ends a rule so the next filename will be a new target.
364  parsing_targets = true;
365  poisoned_input = false;
366  }
367  }
368  if (!have_target && !is_empty) {
369  *err = "expected ':' in depfile";
370  return false;
371  }
372  return true;
373 }
Definition: hash_map.h:26
bool Parse(std::string *content, std::string *err)
Parse an input file.
DepfileParser(DepfileParserOptions options=DepfileParserOptions())
std::vector< StringPiece > outs_
std::vector< StringPiece > ins_
StringPiece represents a slice of a string whose memory is managed externally.
Definition: string_piece.h:25