Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(203)

Side by Side Diff: src/platform/update_engine/delta_diff_generator.cc

Issue 545072: AU: Gut code for old updater. New protobuf for v2 updater. (Closed)
Patch Set: better comments Created 10 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "update_engine/delta_diff_generator.h" 5 #include "update_engine/delta_diff_generator.h"
6 #include <dirent.h>
7 #include <endian.h>
8 #include <errno.h>
9 #include <stdio.h>
10 #include <unistd.h>
11 #include <algorithm>
12 #include <map>
13 #include <set>
14 #include <string>
15 #include <vector>
16 #include <tr1/memory>
17 #include <zlib.h>
18 #include "chromeos/obsolete_logging.h"
19 #include "base/scoped_ptr.h"
20 #include "update_engine/delta_diff_parser.h"
21 #include "update_engine/gzip.h"
22 #include "update_engine/subprocess.h"
23 #include "update_engine/utils.h"
24
25 using std::map;
26 using std::set;
27 using std::string;
28 using std::vector;
29 using std::tr1::shared_ptr;
30 using chromeos_update_engine::DeltaArchiveManifest;
31
32 namespace chromeos_update_engine {
33
34 namespace {
35
36 const char* kBsdiffPath = "/usr/bin/bsdiff";
37
38 // These structs and methods are helpers for EncodeDataToDeltaFile()
39
40 // Before moving the data into a proto buffer, the data is stored in
41 // memory in these Node and Child structures.
42
43 // Each Node struct represents a file on disk (which can be regular file,
44 // directory, fifo, socket, symlink, etc). Nodes that contain children
45 // (just directories) will have a vector of Child objects. Each child
46 // object has a filename and a pointer to the associated Node. Thus,
47 // filenames for files are stored with their parents, not as part of
48 // the file itself.
49
50 // These structures are easier to work with than the protobuf format
51 // when adding files. When generating a delta file, we add an entry
52 // for each file to a root Node object. Then, we sort each Node's
53 // children vector so the children are stored alphabetically. Then,
54 // we assign an index value to the idx field of each Node by a preorder
55 // tree traversal. The index value assigned to a Node is the index it
56 // will have in the DeltaArchiveManifest protobuf.
57 // Finally, we add each Node to a DeltaArchiveManifest protobuf.
58
59 struct Node;
60
61 struct Child {
62 Child(const string& the_name,
63 Node* the_node)
64 : name(the_name),
65 node(the_node) {}
66 string name;
67 // Use shared_ptr here rather than scoped_ptr b/c this struct will be copied
68 // in stl containers
69 scoped_ptr<Node> node;
70 };
71
72 // For the C++ sort() function.
73 struct ChildLessThan {
74 bool operator()(const shared_ptr<Child>& a, const shared_ptr<Child>& b) {
75 return a->name < b->name;
76 }
77 };
78
79 struct Node {
80 Node()
81 : mode(0),
82 uid(0),
83 gid(0),
84 nlink(0),
85 inode(0),
86 compressed(false),
87 offset(-1),
88 length(0),
89 idx(0) {}
90
91 mode_t mode;
92 uid_t uid;
93 gid_t gid;
94
95 // a file may be a potential hardlink if it's not a directory
96 // and it has a link count > 1.
97 bool IsPotentialHardlink() const {
98 return !S_ISDIR(mode) && nlink > 1;
99 }
100 nlink_t nlink; // number of hard links
101 ino_t inode;
102
103 // data
104 bool compressed;
105 int offset; // -1 means no data
106 int length;
107
108 vector<shared_ptr<Child> > children;
109 int idx;
110 };
111
112 // This function sets *node's variables to match what's at path.
113 // This includes calling this function recursively on all children. Children
114 // not on the same device as the original node will not be considered.
115 // Returns true on success.
116 bool UpdateNodeFromPath(const string& path, Node* node) {
117 // Set metadata
118 struct stat stbuf;
119 TEST_AND_RETURN_FALSE_ERRNO(lstat(path.c_str(), &stbuf) == 0);
120 const dev_t dev = stbuf.st_dev;
121 node->mode = stbuf.st_mode;
122 node->uid = stbuf.st_uid;
123 node->gid = stbuf.st_gid;
124 node->nlink = stbuf.st_nlink;
125 node->inode = stbuf.st_ino;
126 if (!S_ISDIR(node->mode)) {
127 return true;
128 }
129
130 DIR* dir = opendir(path.c_str());
131 TEST_AND_RETURN_FALSE(dir);
132
133 struct dirent entry;
134 struct dirent* dir_entry;
135
136 for (;;) {
137 TEST_AND_RETURN_FALSE_ERRNO(readdir_r(dir, &entry, &dir_entry) == 0);
138 if (!dir_entry) {
139 // done
140 break;
141 }
142 if (!strcmp(".", dir_entry->d_name))
143 continue;
144 if (!strcmp("..", dir_entry->d_name))
145 continue;
146
147 string child_path = path + "/" + dir_entry->d_name;
148 struct stat child_stbuf;
149 TEST_AND_RETURN_FALSE_ERRNO(lstat(child_path.c_str(), &child_stbuf) == 0);
150 // make sure it's on the same dev
151 if (child_stbuf.st_dev != dev)
152 continue;
153 shared_ptr<Child> child(new Child(dir_entry->d_name, new Node));
154 node->children.push_back(child);
155 TEST_AND_RETURN_FALSE(UpdateNodeFromPath(path + "/" + child->name,
156 child->node.get()));
157 }
158 TEST_AND_RETURN_FALSE_ERRNO(closedir(dir) == 0);
159 // Done with all subdirs. sort children.
160 sort(node->children.begin(), node->children.end(), ChildLessThan());
161 return true;
162 }
163
164 // We go through n setting the index value of each Node to
165 // *next_index_value, then increment next_index_value.
166 // We then recursively assign index values to children.
167 // The first caller should call this with *next_index_value == 0 and
168 // the root Node, thus setting the root Node's index to 0.
169 void PopulateChildIndexes(Node* n, int* next_index_value) {
170 n->idx = (*next_index_value)++;
171 for (unsigned int i = 0; i < n->children.size(); i++) {
172 PopulateChildIndexes(n->children[i]->node.get(), next_index_value);
173 }
174 }
175
176 // This converts a Node tree rooted at n into a DeltaArchiveManifest.
177 void NodeToDeltaArchiveManifest(Node* n, DeltaArchiveManifest* archive,
178 map<ino_t, string>* hard_links,
179 const string& path) {
180 DeltaArchiveManifest_File *f = archive->add_files();
181 f->set_mode(n->mode);
182 f->set_uid(n->uid);
183 f->set_gid(n->gid);
184 if (utils::MapContainsKey(*hard_links, n->inode)) {
185 // We have a hard link
186 CHECK(!S_ISDIR(n->mode));
187 f->set_hardlink_path((*hard_links)[n->inode]);
188 } else if (n->IsPotentialHardlink()) {
189 (*hard_links)[n->inode] = path;
190 }
191 if (!S_ISDIR(n->mode))
192 return;
193 for (unsigned int i = 0; i < n->children.size(); i++) {
194 DeltaArchiveManifest_File_Child* child = f->add_children();
195 child->set_name(n->children[i]->name);
196 child->set_index(n->children[i]->node->idx);
197 }
198 for (unsigned int i = 0; i < n->children.size(); i++) {
199 NodeToDeltaArchiveManifest(n->children[i]->node.get(), archive, hard_links,
200 path + "/" + n->children[i]->name);
201 }
202 }
203
204 } // namespace {}
205
206 // For each file in archive, write a delta for it into out_file
207 // and update 'file' to refer to the delta.
208 // This is a recursive function. Returns true on success.
209 bool DeltaDiffGenerator::WriteFileDiffsToDeltaFile(
210 DeltaArchiveManifest* archive,
211 DeltaArchiveManifest_File* file,
212 const string& file_name,
213 const string& old_path,
214 const string& new_path,
215 FileWriter* out_file_writer,
216 int* out_file_length,
217 set<string> always_full_target_paths,
218 const string& force_compress_dev_path) {
219 TEST_AND_RETURN_FALSE(file->has_mode());
220
221 // Stat the actual file, too
222 struct stat stbuf;
223 TEST_AND_RETURN_FALSE_ERRNO(lstat((new_path + "/" + file_name).c_str(),
224 &stbuf) == 0);
225 TEST_AND_RETURN_FALSE(stbuf.st_mode == file->mode());
226
227 // See if we're a directory or not
228 if (S_ISDIR(file->mode())) {
229 for (int i = 0; i < file->children_size(); i++) {
230 DeltaArchiveManifest_File_Child* child = file->mutable_children(i);
231 DeltaArchiveManifest_File* child_file =
232 archive->mutable_files(child->index());
233 string recurse_old_path = old_path;
234 string recurse_new_path = new_path;
235 if (!file_name.empty()) {
236 recurse_new_path += "/" + file_name;
237 recurse_old_path += "/" + file_name;
238 }
239 TEST_AND_RETURN_FALSE(WriteFileDiffsToDeltaFile(
240 archive,
241 child_file,
242 child->name(),
243 recurse_old_path,
244 recurse_new_path,
245 out_file_writer,
246 out_file_length,
247 always_full_target_paths,
248 force_compress_dev_path));
249 }
250 return true;
251 }
252
253 if (S_ISFIFO(file->mode()) || S_ISSOCK(file->mode()) ||
254 file->has_hardlink_path()) {
255 // These don't store any additional data
256 return true;
257 }
258
259 vector<char> data;
260 bool should_compress = true;
261 bool format_set = false;
262 DeltaArchiveManifest_File_DataFormat format;
263 if (S_ISLNK(file->mode())) {
264 TEST_AND_RETURN_FALSE(EncodeLink(new_path + "/" + file_name, &data));
265 } else if (S_ISCHR(file->mode()) || S_ISBLK(file->mode())) {
266 TEST_AND_RETURN_FALSE(EncodeDev(stbuf, &data, &format,
267 new_path + "/" + file_name ==
268 force_compress_dev_path));
269 format_set = true;
270 } else if (S_ISREG(file->mode())) {
271 // regular file. We may use a delta here.
272 const bool avoid_diff = utils::SetContainsKey(always_full_target_paths,
273 new_path + "/" + file_name);
274 bool no_change = false;
275 TEST_AND_RETURN_FALSE(EncodeFile(old_path, new_path, file_name,
276 avoid_diff, &format, &data, &no_change));
277 if (no_change) {
278 // No data change. We're done!
279 return true;
280 }
281 should_compress = false;
282 format_set = true;
283 if ((format == DeltaArchiveManifest_File_DataFormat_BSDIFF) ||
284 (format == DeltaArchiveManifest_File_DataFormat_FULL_GZ))
285 TEST_AND_RETURN_FALSE(!data.empty());
286 } else {
287 // Should never get here; unhandled mode type.
288 LOG(ERROR) << "Unhandled mode type: " << file->mode();
289 return false;
290 }
291
292 if (!format_set) {
293 // Pick a format now
294 vector<char> compressed_data;
295 TEST_AND_RETURN_FALSE(GzipCompress(data, &compressed_data));
296 if (compressed_data.size() < data.size()) {
297 format = DeltaArchiveManifest_File_DataFormat_FULL_GZ;
298 data.swap(compressed_data);
299 } else {
300 format = DeltaArchiveManifest_File_DataFormat_FULL;
301 }
302 format_set = true;
303 }
304
305 TEST_AND_RETURN_FALSE(format_set);
306 file->set_data_format(format);
307 file->set_data_offset(*out_file_length);
308 TEST_AND_RETURN_FALSE(static_cast<ssize_t>(data.size()) ==
309 out_file_writer->Write(&data[0], data.size()));
310 file->set_data_length(data.size());
311 *out_file_length += data.size();
312 return true;
313 }
314
315 bool DeltaDiffGenerator::EncodeLink(const string& path, vector<char>* out) {
316 // Store symlink path as file data
317 vector<char> link_data(4096);
318 int rc = readlink(path.c_str(), &link_data[0], link_data.size());
319 TEST_AND_RETURN_FALSE_ERRNO(rc >= 0);
320 link_data.resize(rc);
321 out->swap(link_data);
322 return true;
323 }
324
325 bool DeltaDiffGenerator::EncodeDev(
326 const struct stat& stbuf,
327 vector<char>* out,
328 DeltaArchiveManifest_File_DataFormat* format,
329 bool force_compression) {
330 LinuxDevice dev;
331 dev.set_major(major(stbuf.st_rdev));
332 dev.set_minor(minor(stbuf.st_rdev));
333 out->resize(dev.ByteSize());
334 TEST_AND_RETURN_FALSE(dev.SerializeToArray(&(*out)[0], out->size()));
335 if (force_compression) {
336 vector<char> compressed;
337 TEST_AND_RETURN_FALSE(GzipCompress(*out, &compressed));
338 out->swap(compressed);
339 *format = DeltaArchiveManifest_File_DataFormat_FULL_GZ;
340 } else {
341 *format = DeltaArchiveManifest_File_DataFormat_FULL;
342 }
343 return true;
344 }
345
346 // Encode the file at new_path + "/" + file_name. It may be a binary diff
347 // based on old_path + "/" + file_name. out_data_format will be set to
348 // the format used. out_data_format may not be NULL.
349 bool DeltaDiffGenerator::EncodeFile(
350 const string& old_dir,
351 const string& new_dir,
352 const string& file_name,
353 const bool avoid_diff,
354 DeltaArchiveManifest_File_DataFormat* out_data_format,
355 vector<char>* out,
356 bool* no_change) {
357 TEST_AND_RETURN_FALSE(out_data_format);
358 vector<char> ret;
359 vector<char> full_data;
360 {
361 // First, see the full length:
362 TEST_AND_RETURN_FALSE(utils::ReadFile(new_dir + "/" + file_name,
363 &full_data));
364 vector<char> gz_data;
365 if (!full_data.empty()) {
366 TEST_AND_RETURN_FALSE(GzipCompress(full_data, &gz_data));
367 }
368
369 if (gz_data.size() < full_data.size()) {
370 *out_data_format = DeltaArchiveManifest_File_DataFormat_FULL_GZ;
371 ret.swap(gz_data);
372 } else {
373 *out_data_format = DeltaArchiveManifest_File_DataFormat_FULL;
374 ret = full_data;
375 }
376 }
377
378 if (avoid_diff) {
379 out->swap(ret);
380 return true;
381 }
382
383 struct stat old_stbuf;
384 if ((stat((old_dir + "/" + file_name).c_str(), &old_stbuf) < 0) ||
385 (!S_ISREG(old_stbuf.st_mode))) {
386 // stat() failed or old file is not a regular file. Just send back
387 // the full contents
388 out->swap(ret);
389 return true;
390 }
391 // We have an old file.
392 // First see if the data is _exactly_ the same
393 {
394 vector<char> original_data;
395 TEST_AND_RETURN_FALSE(utils::ReadFile(old_dir + "/" + file_name,
396 &original_data));
397 if (original_data == full_data) {
398 // Original data unchanged in new file.
399 *no_change = true;
400 return true;
401 }
402 }
403
404 // Do a binary diff. For now use bsdiff.
405 const string kPatchFile = "/tmp/delta.patchXXXXXX";
406 vector<char> patch_file_path(kPatchFile.begin(), kPatchFile.end());
407 patch_file_path.push_back('\0');
408
409 int fd = mkstemp(&patch_file_path[0]);
410 if (fd >= 0)
411 close(fd);
412 TEST_AND_RETURN_FALSE(fd != -1);
413
414 vector<string> cmd;
415 cmd.push_back(kBsdiffPath);
416 cmd.push_back(old_dir + "/" + file_name);
417 cmd.push_back(new_dir + "/" + file_name);
418 cmd.push_back(&patch_file_path[0]);
419
420 int rc = 1;
421 vector<char> patch_file;
422 TEST_AND_RETURN_FALSE(Subprocess::SynchronousExec(cmd, &rc));
423 TEST_AND_RETURN_FALSE(rc == 0);
424 TEST_AND_RETURN_FALSE(utils::ReadFile(&patch_file_path[0], &patch_file));
425 unlink(&patch_file_path[0]);
426
427 if (patch_file.size() < ret.size()) {
428 *out_data_format = DeltaArchiveManifest_File_DataFormat_BSDIFF;
429 ret.swap(patch_file);
430 }
431 out->swap(ret);
432 return true;
433 }
434
435 DeltaArchiveManifest* DeltaDiffGenerator::EncodeMetadataToProtoBuffer(
436 const char* new_path) {
437 Node node;
438 if (!UpdateNodeFromPath(new_path, &node))
439 return NULL;
440 int index = 0;
441 PopulateChildIndexes(&node, &index);
442 DeltaArchiveManifest *ret = new DeltaArchiveManifest;
443 map<ino_t, string> hard_links; // inode -> first found path for inode
444 NodeToDeltaArchiveManifest(&node, ret, &hard_links, "");
445 return ret;
446 }
447
448 bool DeltaDiffGenerator::EncodeDataToDeltaFile(
449 DeltaArchiveManifest* archive,
450 const string& old_path,
451 const string& new_path,
452 const string& out_file,
453 const set<string>& nondiff_paths,
454 const string& force_compress_dev_path) {
455 DirectFileWriter out_writer;
456 int r = out_writer.Open(out_file.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666);
457 TEST_AND_RETURN_FALSE_ERRNO(r >= 0);
458 ScopedFileWriterCloser closer(&out_writer);
459 TEST_AND_RETURN_FALSE(out_writer.Write(DeltaDiffParser::kFileMagic,
460 strlen(DeltaDiffParser::kFileMagic))
461 == static_cast<ssize_t>(
462 strlen(DeltaDiffParser::kFileMagic)));
463 // Write 8 null bytes. This will be filled in w/ the offset of
464 // the protobuf.
465 TEST_AND_RETURN_FALSE(out_writer.Write("\0\0\0\0\0\0\0\0", 8) == 8);
466 // 8 more bytes will be filled w/ the protobuf length.
467 TEST_AND_RETURN_FALSE(out_writer.Write("\0\0\0\0\0\0\0\0", 8) == 8);
468 int out_file_length = strlen(DeltaDiffParser::kFileMagic) + 16;
469
470 TEST_AND_RETURN_FALSE(archive->files_size() > 0);
471 DeltaArchiveManifest_File* file = archive->mutable_files(0);
472
473 // nondiff_paths is passed in w/ paths relative to the installed
474 // system (e.g. /etc/fstab), but WriteFileDiffsToDeltaFile requires them
475 // to be the entire path of the new file. We create a new set
476 // here with nondiff_paths expanded.
477 set<string> always_full_target_paths;
478 for (set<string>::const_iterator it = nondiff_paths.begin();
479 it != nondiff_paths.end(); ++it) {
480 always_full_target_paths.insert(new_path + *it);
481 }
482
483 TEST_AND_RETURN_FALSE(WriteFileDiffsToDeltaFile(archive,
484 file,
485 "",
486 old_path,
487 new_path,
488 &out_writer,
489 &out_file_length,
490 always_full_target_paths,
491 force_compress_dev_path));
492
493 // Finally, write the protobuf to the end of the file
494 string encoded_archive;
495 TEST_AND_RETURN_FALSE(archive->SerializeToString(&encoded_archive));
496
497 // Compress the protobuf (which contains filenames)
498 vector<char> compressed_encoded_archive;
499 TEST_AND_RETURN_FALSE(GzipCompressString(encoded_archive,
500 &compressed_encoded_archive));
501
502 TEST_AND_RETURN_FALSE(out_writer.Write(compressed_encoded_archive.data(),
503 compressed_encoded_archive.size()) ==
504 static_cast<ssize_t>(
505 compressed_encoded_archive.size()));
506
507 // write offset of protobut to just after the file magic
508 int64 big_endian_protobuf_offset = htobe64(out_file_length);
509 TEST_AND_RETURN_FALSE(pwrite(out_writer.fd(),
510 &big_endian_protobuf_offset,
511 sizeof(big_endian_protobuf_offset),
512 strlen(DeltaDiffParser::kFileMagic)) ==
513 sizeof(big_endian_protobuf_offset));
514 // Write the size just after the offset
515 int64 pb_length = htobe64(compressed_encoded_archive.size());
516 TEST_AND_RETURN_FALSE(pwrite(out_writer.fd(),
517 &pb_length,
518 sizeof(pb_length),
519 strlen(DeltaDiffParser::kFileMagic) +
520 sizeof(big_endian_protobuf_offset)) ==
521 sizeof(pb_length));
522 return true;
523 }
524
525 } // namespace chromeos_update_engine
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698