Ninja
depfile_parser.in.cc
Go to the documentation of this file.
1 // Copyright 2011 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "depfile_parser.h"
16 #include "util.h"
17 
18 #include <algorithm>
19 
20 using namespace std;
21 
23  : options_(options)
24 {
25 }
26 
27 // A note on backslashes in Makefiles, from reading the docs:
28 // Backslash-newline is the line continuation character.
29 // Backslash-# escapes a # (otherwise meaningful as a comment start).
30 // Backslash-% escapes a % (otherwise meaningful as a special).
31 // Finally, quoting the GNU manual, "Backslashes that are not in danger
32 // of quoting ‘%’ characters go unmolested."
33 // How do you end a line with a backslash? The netbsd Make docs suggest
34 // reading the result of a shell command echoing a backslash!
35 //
36 // Rather than implement all of above, we follow what GCC/Clang produces:
37 // Backslashes escape a space or hash sign.
38 // When a space is preceded by 2N+1 backslashes, it is represents N backslashes
39 // followed by space.
40 // When a space is preceded by 2N backslashes, it represents 2N backslashes at
41 // the end of a filename.
42 // A hash sign is escaped by a single backslash. All other backslashes remain
43 // unchanged.
44 //
45 // If anyone actually has depfiles that rely on the more complicated
46 // behavior we can adjust this.
47 bool DepfileParser::Parse(string* content, string* err) {
48  // in: current parser input point.
49  // end: end of input.
50  // parsing_targets: whether we are parsing targets or dependencies.
51  char* in = &(*content)[0];
52  char* end = in + content->size();
53  bool have_target = false;
54  bool parsing_targets = true;
55  bool poisoned_input = false;
56  bool is_empty = true;
57  while (in < end) {
58  bool have_newline = false;
59  // out: current output point (typically same as in, but can fall behind
60  // as we de-escape backslashes).
61  char* out = in;
62  // filename: start of the current parsed filename.
63  char* filename = out;
64  for (;;) {
65  // start: beginning of the current parsed span.
66  const char* start = in;
67  char* yymarker = NULL;
68  /*!re2c
69  re2c:define:YYCTYPE = "unsigned char";
70  re2c:define:YYCURSOR = in;
71  re2c:define:YYLIMIT = end;
72  re2c:define:YYMARKER = yymarker;
73 
74  re2c:yyfill:enable = 0;
75 
76  re2c:indent:top = 2;
77  re2c:indent:string = " ";
78 
79  nul = "\000";
80  newline = '\r'?'\n';
81 
82  '\\\\'* '\\ ' {
83  // 2N+1 backslashes plus space -> N backslashes plus space.
84  int len = (int)(in - start);
85  int n = len / 2 - 1;
86  if (out < start)
87  memset(out, '\\', n);
88  out += n;
89  *out++ = ' ';
90  continue;
91  }
92  '\\\\'+ ' ' {
93  // 2N backslashes plus space -> 2N backslashes, end of filename.
94  int len = (int)(in - start);
95  if (out < start)
96  memset(out, '\\', len - 1);
97  out += len - 1;
98  break;
99  }
100  '\\'+ '#' {
101  // De-escape hash sign, but preserve other leading backslashes.
102  int len = (int)(in - start);
103  if (len > 2 && out < start)
104  memset(out, '\\', len - 2);
105  out += len - 2;
106  *out++ = '#';
107  continue;
108  }
109  '\\'+ ':' [\x00\x20\r\n\t] {
110  // Backslash followed by : and whitespace.
111  // It is therefore normal text and not an escaped colon
112  int len = (int)(in - start - 1);
113  // Need to shift it over if we're overwriting backslashes.
114  if (out < start)
115  memmove(out, start, len);
116  out += len;
117  if (*(in - 1) == '\n')
118  have_newline = true;
119  break;
120  }
121  '\\'+ ':' {
122  // De-escape colon sign, but preserve other leading backslashes.
123  // Regular expression uses lookahead to make sure that no whitespace
124  // nor EOF follows. In that case it'd be the : at the end of a target
125  int len = (int)(in - start);
126  if (len > 2 && out < start)
127  memset(out, '\\', len - 2);
128  out += len - 2;
129  *out++ = ':';
130  continue;
131  }
132  '$$' {
133  // De-escape dollar character.
134  *out++ = '$';
135  continue;
136  }
137  '\\'+ [^\000\r\n] | [a-zA-Z0-9+?"'&,/_:.~()}{%=@\x5B\x5D!\x80-\xFF-]+ {
138  // Got a span of plain text.
139  int len = (int)(in - start);
140  // Need to shift it over if we're overwriting backslashes.
141  if (out < start)
142  memmove(out, start, len);
143  out += len;
144  continue;
145  }
146  nul {
147  break;
148  }
149  '\\' newline {
150  // A line continuation ends the current file name.
151  break;
152  }
153  newline {
154  // A newline ends the current file name and the current rule.
155  have_newline = true;
156  break;
157  }
158  [^] {
159  // For any other character (e.g. whitespace), swallow it here,
160  // allowing the outer logic to loop around again.
161  break;
162  }
163  */
164  }
165 
166  int len = (int)(out - filename);
167  const bool is_dependency = !parsing_targets;
168  if (len > 0 && filename[len - 1] == ':') {
169  len--; // Strip off trailing colon, if any.
170  parsing_targets = false;
171  have_target = true;
172  }
173 
174  if (len > 0) {
175  is_empty = false;
176  StringPiece piece = StringPiece(filename, len);
177  // If we've seen this as an input before, skip it.
178  std::vector<StringPiece>::iterator pos = std::find(ins_.begin(), ins_.end(), piece);
179  if (pos == ins_.end()) {
180  if (is_dependency) {
181  if (poisoned_input) {
182  *err = "inputs may not also have inputs";
183  return false;
184  }
185  // New input.
186  ins_.push_back(piece);
187  } else {
188  // Check for a new output.
189  if (std::find(outs_.begin(), outs_.end(), piece) == outs_.end())
190  outs_.push_back(piece);
191  }
192  } else if (!is_dependency) {
193  // We've passed an input on the left side; reject new inputs.
194  poisoned_input = true;
195  }
196  }
197 
198  if (have_newline) {
199  // A newline ends a rule so the next filename will be a new target.
200  parsing_targets = true;
201  poisoned_input = false;
202  }
203  }
204  if (!have_target && !is_empty) {
205  *err = "expected ':' in depfile";
206  return false;
207  }
208  return true;
209 }
Definition: hash_map.h:26
bool Parse(std::string *content, std::string *err)
Parse an input file.
DepfileParser(DepfileParserOptions options=DepfileParserOptions())
std::vector< StringPiece > outs_
std::vector< StringPiece > ins_
StringPiece represents a slice of a string whose memory is managed externally.
Definition: string_piece.h:25