Ninja
deps_log.cc
Go to the documentation of this file.
1 // Copyright 2012 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "deps_log.h"
16 
17 #include <assert.h>
18 #include <errno.h>
19 #include <stdint.h>
20 #include <stdio.h>
21 #include <string.h>
22 #ifndef _WIN32
23 #include <unistd.h>
24 #elif defined(_MSC_VER) && (_MSC_VER < 1900)
25 typedef __int32 int32_t;
26 typedef unsigned __int32 uint32_t;
27 #endif
28 
29 #include "graph.h"
30 #include "metrics.h"
31 #include "state.h"
32 #include "util.h"
33 
34 using namespace std;
35 
36 // The version is stored as 4 bytes after the signature and also serves as a
37 // byte order mark. Signature and version combined are 16 bytes long.
38 static const char kFileSignature[] = "# ninjadeps\n";
39 static const size_t kFileSignatureSize = sizeof(kFileSignature) - 1u;
40 
41 static const int32_t kCurrentVersion = 4;
42 
43 // Record size is currently limited to less than the full 32 bit, due to
44 // internal buffers having to have this size.
45 static constexpr size_t kMaxRecordSize = (1 << 19) - 1;
46 
48  Close();
49 }
50 
51 bool DepsLog::OpenForWrite(const string& path, string* err) {
52  if (needs_recompaction_) {
53  if (!Recompact(path, err))
54  return false;
55  }
56 
57  assert(!file_);
58  file_path_ = path; // we don't actually open the file right now, but will do
59  // so on the first write attempt
60  return true;
61 }
62 
63 bool DepsLog::RecordDeps(Node* node, TimeStamp mtime,
64  const vector<Node*>& nodes) {
65  return RecordDeps(node, mtime, static_cast<int>(nodes.size()), nodes.data());
66 }
67 
68 bool DepsLog::RecordDeps(Node* node, TimeStamp mtime, int node_count,
69  Node* const* nodes) {
70  // Track whether there's any new data to be recorded.
71  bool made_change = false;
72 
73  // Assign ids to all nodes that are missing one.
74  if (node->id() < 0) {
75  if (!RecordId(node))
76  return false;
77  made_change = true;
78  }
79  for (int i = 0; i < node_count; ++i) {
80  if (nodes[i]->id() < 0) {
81  if (!RecordId(nodes[i]))
82  return false;
83  made_change = true;
84  }
85  }
86 
87  // See if the new data is different than the existing data, if any.
88  if (!made_change) {
89  Deps* deps = GetDeps(node);
90  if (!deps ||
91  deps->mtime != mtime ||
92  deps->node_count != node_count) {
93  made_change = true;
94  } else {
95  for (int i = 0; i < node_count; ++i) {
96  if (deps->nodes[i] != nodes[i]) {
97  made_change = true;
98  break;
99  }
100  }
101  }
102  }
103 
104  // Don't write anything if there's no new info.
105  if (!made_change)
106  return true;
107 
108  // Update on-disk representation.
109  unsigned size = 4 * (1 + 2 + node_count);
110  if (size > kMaxRecordSize) {
111  errno = ERANGE;
112  return false;
113  }
114 
115  if (!OpenForWriteIfNeeded()) {
116  return false;
117  }
118  size |= 0x80000000; // Deps record: set high bit.
119  if (fwrite(&size, 4, 1, file_) < 1)
120  return false;
121  int id = node->id();
122  if (fwrite(&id, 4, 1, file_) < 1)
123  return false;
124  uint32_t mtime_part = static_cast<uint32_t>(mtime & 0xffffffff);
125  if (fwrite(&mtime_part, 4, 1, file_) < 1)
126  return false;
127  mtime_part = static_cast<uint32_t>((mtime >> 32) & 0xffffffff);
128  if (fwrite(&mtime_part, 4, 1, file_) < 1)
129  return false;
130  for (int i = 0; i < node_count; ++i) {
131  id = nodes[i]->id();
132  if (fwrite(&id, 4, 1, file_) < 1)
133  return false;
134  }
135  if (fflush(file_) != 0)
136  return false;
137 
138  // Update in-memory representation.
139  Deps* deps = new Deps(mtime, node_count);
140  for (int i = 0; i < node_count; ++i)
141  deps->nodes[i] = nodes[i];
142  UpdateDeps(node->id(), deps);
143 
144  return true;
145 }
146 
148  OpenForWriteIfNeeded(); // create the file even if nothing has been recorded
149  if (file_)
150  fclose(file_);
151  file_ = NULL;
152 }
153 
154 LoadStatus DepsLog::Load(const string& path, State* state, string* err) {
155  METRIC_RECORD(".ninja_deps load");
156  char buf[kMaxRecordSize + 1];
157  FILE* f = fopen(path.c_str(), "rb");
158  if (!f) {
159  if (errno == ENOENT)
160  return LOAD_NOT_FOUND;
161  *err = strerror(errno);
162  return LOAD_ERROR;
163  }
164 
165  bool valid_header = fread(buf, kFileSignatureSize, 1, f) == 1 &&
166  !memcmp(buf, kFileSignature, kFileSignatureSize);
167 
168  int32_t version = 0;
169  bool valid_version =
170  fread(&version, 4, 1, f) == 1 && version == kCurrentVersion;
171 
172  // Note: For version differences, this should migrate to the new format.
173  // But the v1 format could sometimes (rarely) end up with invalid data, so
174  // don't migrate v1 to v3 to force a rebuild. (v2 only existed for a few days,
175  // and there was no release with it, so pretend that it never happened.)
176  if (!valid_header || !valid_version) {
177  if (version == 1)
178  *err = "deps log version change; rebuilding";
179  else
180  *err = "bad deps log signature or version; starting over";
181  fclose(f);
182  platformAwareUnlink(path.c_str());
183  // Don't report this as a failure. An empty deps log will cause
184  // us to rebuild the outputs anyway.
185  return LOAD_SUCCESS;
186  }
187 
188  long offset = ftell(f);
189  bool read_failed = false;
190  int unique_dep_record_count = 0;
191  int total_dep_record_count = 0;
192  for (;;) {
193  unsigned size;
194  if (fread(&size, sizeof(size), 1, f) < 1) {
195  if (!feof(f))
196  read_failed = true;
197  break;
198  }
199  bool is_deps = (size >> 31) != 0;
200  size = size & 0x7FFFFFFF;
201 
202  if (size > kMaxRecordSize || fread(buf, size, 1, f) < 1) {
203  read_failed = true;
204  break;
205  }
206  offset += size + sizeof(size);
207 
208  if (is_deps) {
209  if ((size % 4) != 0) {
210  read_failed = true;
211  break;
212  }
213  int* deps_data = reinterpret_cast<int*>(buf);
214  int out_id = deps_data[0];
215  TimeStamp mtime;
216  mtime = (TimeStamp)(((uint64_t)(unsigned int)deps_data[2] << 32) |
217  (uint64_t)(unsigned int)deps_data[1]);
218  deps_data += 3;
219  int deps_count = (size / 4) - 3;
220 
221  for (int i = 0; i < deps_count; ++i) {
222  int node_id = deps_data[i];
223  if (node_id >= (int)nodes_.size() || !nodes_[node_id]) {
224  read_failed = true;
225  break;
226  }
227  }
228  if (read_failed)
229  break;
230 
231  Deps* deps = new Deps(mtime, deps_count);
232  for (int i = 0; i < deps_count; ++i) {
233  deps->nodes[i] = nodes_[deps_data[i]];
234  }
235 
236  total_dep_record_count++;
237  if (!UpdateDeps(out_id, deps))
238  ++unique_dep_record_count;
239  } else {
240  int path_size = size - 4;
241  if (path_size <= 0) {
242  read_failed = true;
243  break;
244  }
245  // There can be up to 3 bytes of padding.
246  if (buf[path_size - 1] == '\0') --path_size;
247  if (buf[path_size - 1] == '\0') --path_size;
248  if (buf[path_size - 1] == '\0') --path_size;
249  StringPiece subpath(buf, path_size);
250  // It is not necessary to pass in a correct slash_bits here. It will
251  // either be a Node that's in the manifest (in which case it will already
252  // have a correct slash_bits that GetNode will look up), or it is an
253  // implicit dependency from a .d which does not affect the build command
254  // (and so need not have its slashes maintained).
255  Node* node = state->GetNode(subpath, 0);
256 
257  // Check that the expected index matches the actual index. This can only
258  // happen if two ninja processes write to the same deps log concurrently.
259  // (This uses unary complement to make the checksum look less like a
260  // dependency record entry.)
261  unsigned checksum = *reinterpret_cast<unsigned*>(buf + size - 4);
262  int expected_id = ~checksum;
263  int id = static_cast<int>(nodes_.size());
264  if (id != expected_id || node->id() >= 0) {
265  read_failed = true;
266  break;
267  }
268  node->set_id(id);
269  nodes_.push_back(node);
270  }
271  }
272 
273  if (read_failed) {
274  // An error occurred while loading; try to recover by truncating the
275  // file to the last fully-read record.
276  if (ferror(f)) {
277  *err = strerror(ferror(f));
278  } else {
279  *err = "premature end of file";
280  }
281  fclose(f);
282 
283  if (!Truncate(path, offset, err))
284  return LOAD_ERROR;
285 
286  // The truncate succeeded; we'll just report the load error as a
287  // warning because the build can proceed.
288  *err += "; recovering";
289  return LOAD_SUCCESS;
290  }
291 
292  fclose(f);
293 
294  // Rebuild the log if there are too many dead records.
295  int kMinCompactionEntryCount = 1000;
296  int kCompactionRatio = 3;
297  if (total_dep_record_count > kMinCompactionEntryCount &&
298  total_dep_record_count > unique_dep_record_count * kCompactionRatio) {
299  needs_recompaction_ = true;
300  }
301 
302  return LOAD_SUCCESS;
303 }
304 
306  // Abort if the node has no id (never referenced in the deps) or if
307  // there's no deps recorded for the node.
308  if (node->id() < 0 || node->id() >= (int)deps_.size())
309  return NULL;
310  return deps_[node->id()];
311 }
312 
314  for (size_t id = 0; id < deps_.size(); ++id) {
315  Deps* deps = deps_[id];
316  if (!deps)
317  continue;
318  for (int i = 0; i < deps->node_count; ++i) {
319  if (deps->nodes[i] == node)
320  return nodes_[id];
321  }
322  }
323  return NULL;
324 }
325 
326 bool DepsLog::Recompact(const string& path, string* err) {
327  METRIC_RECORD(".ninja_deps recompact");
328 
329  Close();
330  string temp_path = path + ".recompact";
331 
332  // OpenForWrite() opens for append. Make sure it's not appending to a
333  // left-over file from a previous recompaction attempt that crashed somehow.
334  platformAwareUnlink(temp_path.c_str());
335 
336  DepsLog new_log;
337  if (!new_log.OpenForWrite(temp_path, err))
338  return false;
339 
340  // Clear all known ids so that new ones can be reassigned. The new indices
341  // will refer to the ordering in new_log, not in the current log.
342  for (vector<Node*>::iterator i = nodes_.begin(); i != nodes_.end(); ++i)
343  (*i)->set_id(-1);
344 
345  // Write out all deps again.
346  for (int old_id = 0; old_id < (int)deps_.size(); ++old_id) {
347  Deps* deps = deps_[old_id];
348  if (!deps) continue; // If nodes_[old_id] is a leaf, it has no deps.
349 
350  if (!IsDepsEntryLiveFor(nodes_[old_id]))
351  continue;
352 
353  if (!new_log.RecordDeps(nodes_[old_id], deps->mtime,
354  deps->node_count, deps->nodes)) {
355  new_log.Close();
356  return false;
357  }
358  }
359 
360  new_log.Close();
361 
362  // All nodes now have ids that refer to new_log, so steal its data.
363  deps_.swap(new_log.deps_);
364  nodes_.swap(new_log.nodes_);
365 
366  if (platformAwareUnlink(path.c_str()) < 0) {
367  *err = strerror(errno);
368  return false;
369  }
370 
371  if (rename(temp_path.c_str(), path.c_str()) < 0) {
372  *err = strerror(errno);
373  return false;
374  }
375 
376  return true;
377 }
378 
380  // Skip entries that don't have in-edges or whose edges don't have a
381  // "deps" attribute. They were in the deps log from previous builds, but
382  // the the files they were for were removed from the build and their deps
383  // entries are no longer needed.
384  // (Without the check for "deps", a chain of two or more nodes that each
385  // had deps wouldn't be collected in a single recompaction.)
386  return node->in_edge() && !node->in_edge()->GetBinding("deps").empty();
387 }
388 
389 bool DepsLog::UpdateDeps(int out_id, Deps* deps) {
390  if (out_id >= (int)deps_.size())
391  deps_.resize(out_id + 1);
392 
393  bool delete_old = deps_[out_id] != NULL;
394  if (delete_old)
395  delete deps_[out_id];
396  deps_[out_id] = deps;
397  return delete_old;
398 }
399 
400 bool DepsLog::RecordId(Node* node) {
401  int path_size = static_cast<int>(node->path().size());
402  assert(path_size > 0 && "Trying to record empty path Node!");
403  int padding = (4 - path_size % 4) % 4; // Pad path to 4 byte boundary.
404 
405  unsigned size = path_size + padding + 4;
406  if (size > kMaxRecordSize) {
407  errno = ERANGE;
408  return false;
409  }
410 
411  if (!OpenForWriteIfNeeded()) {
412  return false;
413  }
414  if (fwrite(&size, 4, 1, file_) < 1)
415  return false;
416  if (fwrite(node->path().data(), path_size, 1, file_) < 1) {
417  return false;
418  }
419  if (padding && fwrite("\0\0", padding, 1, file_) < 1)
420  return false;
421  int id = static_cast<int>(nodes_.size());
422  unsigned checksum = ~(unsigned)id;
423  if (fwrite(&checksum, 4, 1, file_) < 1)
424  return false;
425  if (fflush(file_) != 0)
426  return false;
427 
428  node->set_id(id);
429  nodes_.push_back(node);
430 
431  return true;
432 }
433 
435  if (file_path_.empty()) {
436  return true;
437  }
438  file_ = fopen(file_path_.c_str(), "ab");
439  if (!file_) {
440  return false;
441  }
442  // Set the buffer size to this and flush the file buffer after every record
443  // to make sure records aren't written partially.
444  if (setvbuf(file_, NULL, _IOFBF, kMaxRecordSize + 1) != 0) {
445  return false;
446  }
447  SetCloseOnExec(fileno(file_));
448 
449  // Opening a file in append mode doesn't set the file pointer to the file's
450  // end on Windows. Do that explicitly.
451  fseek(file_, 0, SEEK_END);
452 
453  if (ftell(file_) == 0) {
454  if (fwrite(kFileSignature, sizeof(kFileSignature) - 1, 1, file_) < 1) {
455  return false;
456  }
457  if (fwrite(&kCurrentVersion, 4, 1, file_) < 1) {
458  return false;
459  }
460  }
461  if (fflush(file_) != 0) {
462  return false;
463  }
464  file_path_.clear();
465  return true;
466 }
static const size_t kFileSignatureSize
Definition: deps_log.cc:39
static const int32_t kCurrentVersion
Definition: deps_log.cc:41
static constexpr size_t kMaxRecordSize
Definition: deps_log.cc:45
static const char kFileSignature[]
Definition: deps_log.cc:38
LoadStatus
Definition: load_status.h:18
@ LOAD_ERROR
Definition: load_status.h:19
@ LOAD_SUCCESS
Definition: load_status.h:20
@ LOAD_NOT_FOUND
Definition: load_status.h:21
#define METRIC_RECORD(name)
The primary interface to metrics.
Definition: metrics.h:83
Definition: hash_map.h:26
TimeStamp mtime
Definition: deps_log.h:84
Node ** nodes
Definition: deps_log.h:86
int node_count
Definition: deps_log.h:85
As build commands run they can output extra dependency information (e.g.
Definition: deps_log.h:68
Deps * GetDeps(Node *node)
Definition: deps_log.cc:305
bool UpdateDeps(int out_id, Deps *deps)
Definition: deps_log.cc:389
Node * GetFirstReverseDepsNode(Node *node)
Definition: deps_log.cc:313
bool OpenForWriteIfNeeded()
Should be called before using file_.
Definition: deps_log.cc:434
bool Recompact(const std::string &path, std::string *err)
Rewrite the known log entries, throwing away old data.
Definition: deps_log.cc:326
~DepsLog()
Definition: deps_log.cc:47
void Close()
Definition: deps_log.cc:147
bool OpenForWrite(const std::string &path, std::string *err)
Definition: deps_log.cc:51
static bool IsDepsEntryLiveFor(const Node *node)
Returns if the deps entry for a node is still reachable from the manifest.
Definition: deps_log.cc:379
std::vector< Node * > nodes_
Maps id -> Node.
Definition: deps_log.h:123
bool RecordId(Node *node)
Definition: deps_log.cc:400
bool RecordDeps(Node *node, TimeStamp mtime, const std::vector< Node * > &nodes)
std::vector< Deps * > deps_
Maps id -> deps of that id.
Definition: deps_log.h:125
LoadStatus Load(const std::string &path, State *state, std::string *err)
Definition: deps_log.cc:154
std::string GetBinding(const std::string &key) const
Returns the shell-escaped value of |key|.
Definition: graph.cc:511
Information about a node in the dependency graph: the file, whether it's dirty, mtime,...
Definition: graph.h:42
int id() const
Definition: graph.h:111
void set_id(int id)
Definition: graph.h:112
const std::string & path() const
Definition: graph.h:82
Edge * in_edge() const
Definition: graph.h:100
Global state (file status) for a single run.
Definition: state.h:95
Node * GetNode(StringPiece path, uint64_t slash_bits)
Definition: state.cc:95
StringPiece represents a slice of a string whose memory is managed externally.
Definition: string_piece.h:25
int64_t TimeStamp
Definition: timestamp.h:31
int platformAwareUnlink(const char *filename)
Definition: util.cc:1025
bool Truncate(const string &path, size_t size, string *err)
Definition: util.cc:1007
void SetCloseOnExec(int fd)
Mark a file descriptor to not be inherited on exec()s.
Definition: util.cc:480
unsigned long long uint64_t
Definition: win32port.h:29