OLD | NEW |
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "update_engine/delta_diff_generator.h" | 5 #include "update_engine/delta_diff_generator.h" |
6 #include <dirent.h> | |
7 #include <endian.h> | |
8 #include <errno.h> | |
9 #include <stdio.h> | |
10 #include <unistd.h> | |
11 #include <algorithm> | |
12 #include <map> | |
13 #include <set> | |
14 #include <string> | |
15 #include <vector> | |
16 #include <tr1/memory> | |
17 #include <zlib.h> | |
18 #include "chromeos/obsolete_logging.h" | |
19 #include "base/scoped_ptr.h" | |
20 #include "update_engine/delta_diff_parser.h" | |
21 #include "update_engine/gzip.h" | |
22 #include "update_engine/subprocess.h" | |
23 #include "update_engine/utils.h" | |
24 | |
25 using std::map; | |
26 using std::set; | |
27 using std::string; | |
28 using std::vector; | |
29 using std::tr1::shared_ptr; | |
30 using chromeos_update_engine::DeltaArchiveManifest; | |
31 | |
32 namespace chromeos_update_engine { | |
33 | |
34 namespace { | |
35 | |
36 const char* kBsdiffPath = "/usr/bin/bsdiff"; | |
37 | |
38 // These structs and methods are helpers for EncodeDataToDeltaFile() | |
39 | |
40 // Before moving the data into a proto buffer, the data is stored in | |
41 // memory in these Node and Child structures. | |
42 | |
43 // Each Node struct represents a file on disk (which can be regular file, | |
44 // directory, fifo, socket, symlink, etc). Nodes that contain children | |
45 // (just directories) will have a vector of Child objects. Each child | |
46 // object has a filename and a pointer to the associated Node. Thus, | |
47 // filenames for files are stored with their parents, not as part of | |
48 // the file itself. | |
49 | |
50 // These structures are easier to work with than the protobuf format | |
51 // when adding files. When generating a delta file, we add an entry | |
52 // for each file to a root Node object. Then, we sort each Node's | |
53 // children vector so the children are stored alphabetically. Then, | |
54 // we assign an index value to the idx field of each Node by a preorder | |
55 // tree traversal. The index value assigned to a Node is the index it | |
56 // will have in the DeltaArchiveManifest protobuf. | |
57 // Finally, we add each Node to a DeltaArchiveManifest protobuf. | |
58 | |
59 struct Node; | |
60 | |
61 struct Child { | |
62 Child(const string& the_name, | |
63 Node* the_node) | |
64 : name(the_name), | |
65 node(the_node) {} | |
66 string name; | |
67 // Use shared_ptr here rather than scoped_ptr b/c this struct will be copied | |
68 // in stl containers | |
69 scoped_ptr<Node> node; | |
70 }; | |
71 | |
72 // For the C++ sort() function. | |
73 struct ChildLessThan { | |
74 bool operator()(const shared_ptr<Child>& a, const shared_ptr<Child>& b) { | |
75 return a->name < b->name; | |
76 } | |
77 }; | |
78 | |
79 struct Node { | |
80 Node() | |
81 : mode(0), | |
82 uid(0), | |
83 gid(0), | |
84 nlink(0), | |
85 inode(0), | |
86 compressed(false), | |
87 offset(-1), | |
88 length(0), | |
89 idx(0) {} | |
90 | |
91 mode_t mode; | |
92 uid_t uid; | |
93 gid_t gid; | |
94 | |
95 // a file may be a potential hardlink if it's not a directory | |
96 // and it has a link count > 1. | |
97 bool IsPotentialHardlink() const { | |
98 return !S_ISDIR(mode) && nlink > 1; | |
99 } | |
100 nlink_t nlink; // number of hard links | |
101 ino_t inode; | |
102 | |
103 // data | |
104 bool compressed; | |
105 int offset; // -1 means no data | |
106 int length; | |
107 | |
108 vector<shared_ptr<Child> > children; | |
109 int idx; | |
110 }; | |
111 | |
112 // This function sets *node's variables to match what's at path. | |
113 // This includes calling this function recursively on all children. Children | |
114 // not on the same device as the original node will not be considered. | |
115 // Returns true on success. | |
116 bool UpdateNodeFromPath(const string& path, Node* node) { | |
117 // Set metadata | |
118 struct stat stbuf; | |
119 TEST_AND_RETURN_FALSE_ERRNO(lstat(path.c_str(), &stbuf) == 0); | |
120 const dev_t dev = stbuf.st_dev; | |
121 node->mode = stbuf.st_mode; | |
122 node->uid = stbuf.st_uid; | |
123 node->gid = stbuf.st_gid; | |
124 node->nlink = stbuf.st_nlink; | |
125 node->inode = stbuf.st_ino; | |
126 if (!S_ISDIR(node->mode)) { | |
127 return true; | |
128 } | |
129 | |
130 DIR* dir = opendir(path.c_str()); | |
131 TEST_AND_RETURN_FALSE(dir); | |
132 | |
133 struct dirent entry; | |
134 struct dirent* dir_entry; | |
135 | |
136 for (;;) { | |
137 TEST_AND_RETURN_FALSE_ERRNO(readdir_r(dir, &entry, &dir_entry) == 0); | |
138 if (!dir_entry) { | |
139 // done | |
140 break; | |
141 } | |
142 if (!strcmp(".", dir_entry->d_name)) | |
143 continue; | |
144 if (!strcmp("..", dir_entry->d_name)) | |
145 continue; | |
146 | |
147 string child_path = path + "/" + dir_entry->d_name; | |
148 struct stat child_stbuf; | |
149 TEST_AND_RETURN_FALSE_ERRNO(lstat(child_path.c_str(), &child_stbuf) == 0); | |
150 // make sure it's on the same dev | |
151 if (child_stbuf.st_dev != dev) | |
152 continue; | |
153 shared_ptr<Child> child(new Child(dir_entry->d_name, new Node)); | |
154 node->children.push_back(child); | |
155 TEST_AND_RETURN_FALSE(UpdateNodeFromPath(path + "/" + child->name, | |
156 child->node.get())); | |
157 } | |
158 TEST_AND_RETURN_FALSE_ERRNO(closedir(dir) == 0); | |
159 // Done with all subdirs. sort children. | |
160 sort(node->children.begin(), node->children.end(), ChildLessThan()); | |
161 return true; | |
162 } | |
163 | |
164 // We go through n setting the index value of each Node to | |
165 // *next_index_value, then increment next_index_value. | |
166 // We then recursively assign index values to children. | |
167 // The first caller should call this with *next_index_value == 0 and | |
168 // the root Node, thus setting the root Node's index to 0. | |
169 void PopulateChildIndexes(Node* n, int* next_index_value) { | |
170 n->idx = (*next_index_value)++; | |
171 for (unsigned int i = 0; i < n->children.size(); i++) { | |
172 PopulateChildIndexes(n->children[i]->node.get(), next_index_value); | |
173 } | |
174 } | |
175 | |
176 // This converts a Node tree rooted at n into a DeltaArchiveManifest. | |
177 void NodeToDeltaArchiveManifest(Node* n, DeltaArchiveManifest* archive, | |
178 map<ino_t, string>* hard_links, | |
179 const string& path) { | |
180 DeltaArchiveManifest_File *f = archive->add_files(); | |
181 f->set_mode(n->mode); | |
182 f->set_uid(n->uid); | |
183 f->set_gid(n->gid); | |
184 if (utils::MapContainsKey(*hard_links, n->inode)) { | |
185 // We have a hard link | |
186 CHECK(!S_ISDIR(n->mode)); | |
187 f->set_hardlink_path((*hard_links)[n->inode]); | |
188 } else if (n->IsPotentialHardlink()) { | |
189 (*hard_links)[n->inode] = path; | |
190 } | |
191 if (!S_ISDIR(n->mode)) | |
192 return; | |
193 for (unsigned int i = 0; i < n->children.size(); i++) { | |
194 DeltaArchiveManifest_File_Child* child = f->add_children(); | |
195 child->set_name(n->children[i]->name); | |
196 child->set_index(n->children[i]->node->idx); | |
197 } | |
198 for (unsigned int i = 0; i < n->children.size(); i++) { | |
199 NodeToDeltaArchiveManifest(n->children[i]->node.get(), archive, hard_links, | |
200 path + "/" + n->children[i]->name); | |
201 } | |
202 } | |
203 | |
204 } // namespace {} | |
205 | |
206 // For each file in archive, write a delta for it into out_file | |
207 // and update 'file' to refer to the delta. | |
208 // This is a recursive function. Returns true on success. | |
209 bool DeltaDiffGenerator::WriteFileDiffsToDeltaFile( | |
210 DeltaArchiveManifest* archive, | |
211 DeltaArchiveManifest_File* file, | |
212 const string& file_name, | |
213 const string& old_path, | |
214 const string& new_path, | |
215 FileWriter* out_file_writer, | |
216 int* out_file_length, | |
217 set<string> always_full_target_paths, | |
218 const string& force_compress_dev_path) { | |
219 TEST_AND_RETURN_FALSE(file->has_mode()); | |
220 | |
221 // Stat the actual file, too | |
222 struct stat stbuf; | |
223 TEST_AND_RETURN_FALSE_ERRNO(lstat((new_path + "/" + file_name).c_str(), | |
224 &stbuf) == 0); | |
225 TEST_AND_RETURN_FALSE(stbuf.st_mode == file->mode()); | |
226 | |
227 // See if we're a directory or not | |
228 if (S_ISDIR(file->mode())) { | |
229 for (int i = 0; i < file->children_size(); i++) { | |
230 DeltaArchiveManifest_File_Child* child = file->mutable_children(i); | |
231 DeltaArchiveManifest_File* child_file = | |
232 archive->mutable_files(child->index()); | |
233 string recurse_old_path = old_path; | |
234 string recurse_new_path = new_path; | |
235 if (!file_name.empty()) { | |
236 recurse_new_path += "/" + file_name; | |
237 recurse_old_path += "/" + file_name; | |
238 } | |
239 TEST_AND_RETURN_FALSE(WriteFileDiffsToDeltaFile( | |
240 archive, | |
241 child_file, | |
242 child->name(), | |
243 recurse_old_path, | |
244 recurse_new_path, | |
245 out_file_writer, | |
246 out_file_length, | |
247 always_full_target_paths, | |
248 force_compress_dev_path)); | |
249 } | |
250 return true; | |
251 } | |
252 | |
253 if (S_ISFIFO(file->mode()) || S_ISSOCK(file->mode()) || | |
254 file->has_hardlink_path()) { | |
255 // These don't store any additional data | |
256 return true; | |
257 } | |
258 | |
259 vector<char> data; | |
260 bool should_compress = true; | |
261 bool format_set = false; | |
262 DeltaArchiveManifest_File_DataFormat format; | |
263 if (S_ISLNK(file->mode())) { | |
264 TEST_AND_RETURN_FALSE(EncodeLink(new_path + "/" + file_name, &data)); | |
265 } else if (S_ISCHR(file->mode()) || S_ISBLK(file->mode())) { | |
266 TEST_AND_RETURN_FALSE(EncodeDev(stbuf, &data, &format, | |
267 new_path + "/" + file_name == | |
268 force_compress_dev_path)); | |
269 format_set = true; | |
270 } else if (S_ISREG(file->mode())) { | |
271 // regular file. We may use a delta here. | |
272 const bool avoid_diff = utils::SetContainsKey(always_full_target_paths, | |
273 new_path + "/" + file_name); | |
274 bool no_change = false; | |
275 TEST_AND_RETURN_FALSE(EncodeFile(old_path, new_path, file_name, | |
276 avoid_diff, &format, &data, &no_change)); | |
277 if (no_change) { | |
278 // No data change. We're done! | |
279 return true; | |
280 } | |
281 should_compress = false; | |
282 format_set = true; | |
283 if ((format == DeltaArchiveManifest_File_DataFormat_BSDIFF) || | |
284 (format == DeltaArchiveManifest_File_DataFormat_FULL_GZ)) | |
285 TEST_AND_RETURN_FALSE(!data.empty()); | |
286 } else { | |
287 // Should never get here; unhandled mode type. | |
288 LOG(ERROR) << "Unhandled mode type: " << file->mode(); | |
289 return false; | |
290 } | |
291 | |
292 if (!format_set) { | |
293 // Pick a format now | |
294 vector<char> compressed_data; | |
295 TEST_AND_RETURN_FALSE(GzipCompress(data, &compressed_data)); | |
296 if (compressed_data.size() < data.size()) { | |
297 format = DeltaArchiveManifest_File_DataFormat_FULL_GZ; | |
298 data.swap(compressed_data); | |
299 } else { | |
300 format = DeltaArchiveManifest_File_DataFormat_FULL; | |
301 } | |
302 format_set = true; | |
303 } | |
304 | |
305 TEST_AND_RETURN_FALSE(format_set); | |
306 file->set_data_format(format); | |
307 file->set_data_offset(*out_file_length); | |
308 TEST_AND_RETURN_FALSE(static_cast<ssize_t>(data.size()) == | |
309 out_file_writer->Write(&data[0], data.size())); | |
310 file->set_data_length(data.size()); | |
311 *out_file_length += data.size(); | |
312 return true; | |
313 } | |
314 | |
315 bool DeltaDiffGenerator::EncodeLink(const string& path, vector<char>* out) { | |
316 // Store symlink path as file data | |
317 vector<char> link_data(4096); | |
318 int rc = readlink(path.c_str(), &link_data[0], link_data.size()); | |
319 TEST_AND_RETURN_FALSE_ERRNO(rc >= 0); | |
320 link_data.resize(rc); | |
321 out->swap(link_data); | |
322 return true; | |
323 } | |
324 | |
325 bool DeltaDiffGenerator::EncodeDev( | |
326 const struct stat& stbuf, | |
327 vector<char>* out, | |
328 DeltaArchiveManifest_File_DataFormat* format, | |
329 bool force_compression) { | |
330 LinuxDevice dev; | |
331 dev.set_major(major(stbuf.st_rdev)); | |
332 dev.set_minor(minor(stbuf.st_rdev)); | |
333 out->resize(dev.ByteSize()); | |
334 TEST_AND_RETURN_FALSE(dev.SerializeToArray(&(*out)[0], out->size())); | |
335 if (force_compression) { | |
336 vector<char> compressed; | |
337 TEST_AND_RETURN_FALSE(GzipCompress(*out, &compressed)); | |
338 out->swap(compressed); | |
339 *format = DeltaArchiveManifest_File_DataFormat_FULL_GZ; | |
340 } else { | |
341 *format = DeltaArchiveManifest_File_DataFormat_FULL; | |
342 } | |
343 return true; | |
344 } | |
345 | |
346 // Encode the file at new_path + "/" + file_name. It may be a binary diff | |
347 // based on old_path + "/" + file_name. out_data_format will be set to | |
348 // the format used. out_data_format may not be NULL. | |
349 bool DeltaDiffGenerator::EncodeFile( | |
350 const string& old_dir, | |
351 const string& new_dir, | |
352 const string& file_name, | |
353 const bool avoid_diff, | |
354 DeltaArchiveManifest_File_DataFormat* out_data_format, | |
355 vector<char>* out, | |
356 bool* no_change) { | |
357 TEST_AND_RETURN_FALSE(out_data_format); | |
358 vector<char> ret; | |
359 vector<char> full_data; | |
360 { | |
361 // First, see the full length: | |
362 TEST_AND_RETURN_FALSE(utils::ReadFile(new_dir + "/" + file_name, | |
363 &full_data)); | |
364 vector<char> gz_data; | |
365 if (!full_data.empty()) { | |
366 TEST_AND_RETURN_FALSE(GzipCompress(full_data, &gz_data)); | |
367 } | |
368 | |
369 if (gz_data.size() < full_data.size()) { | |
370 *out_data_format = DeltaArchiveManifest_File_DataFormat_FULL_GZ; | |
371 ret.swap(gz_data); | |
372 } else { | |
373 *out_data_format = DeltaArchiveManifest_File_DataFormat_FULL; | |
374 ret = full_data; | |
375 } | |
376 } | |
377 | |
378 if (avoid_diff) { | |
379 out->swap(ret); | |
380 return true; | |
381 } | |
382 | |
383 struct stat old_stbuf; | |
384 if ((stat((old_dir + "/" + file_name).c_str(), &old_stbuf) < 0) || | |
385 (!S_ISREG(old_stbuf.st_mode))) { | |
386 // stat() failed or old file is not a regular file. Just send back | |
387 // the full contents | |
388 out->swap(ret); | |
389 return true; | |
390 } | |
391 // We have an old file. | |
392 // First see if the data is _exactly_ the same | |
393 { | |
394 vector<char> original_data; | |
395 TEST_AND_RETURN_FALSE(utils::ReadFile(old_dir + "/" + file_name, | |
396 &original_data)); | |
397 if (original_data == full_data) { | |
398 // Original data unchanged in new file. | |
399 *no_change = true; | |
400 return true; | |
401 } | |
402 } | |
403 | |
404 // Do a binary diff. For now use bsdiff. | |
405 const string kPatchFile = "/tmp/delta.patchXXXXXX"; | |
406 vector<char> patch_file_path(kPatchFile.begin(), kPatchFile.end()); | |
407 patch_file_path.push_back('\0'); | |
408 | |
409 int fd = mkstemp(&patch_file_path[0]); | |
410 if (fd >= 0) | |
411 close(fd); | |
412 TEST_AND_RETURN_FALSE(fd != -1); | |
413 | |
414 vector<string> cmd; | |
415 cmd.push_back(kBsdiffPath); | |
416 cmd.push_back(old_dir + "/" + file_name); | |
417 cmd.push_back(new_dir + "/" + file_name); | |
418 cmd.push_back(&patch_file_path[0]); | |
419 | |
420 int rc = 1; | |
421 vector<char> patch_file; | |
422 TEST_AND_RETURN_FALSE(Subprocess::SynchronousExec(cmd, &rc)); | |
423 TEST_AND_RETURN_FALSE(rc == 0); | |
424 TEST_AND_RETURN_FALSE(utils::ReadFile(&patch_file_path[0], &patch_file)); | |
425 unlink(&patch_file_path[0]); | |
426 | |
427 if (patch_file.size() < ret.size()) { | |
428 *out_data_format = DeltaArchiveManifest_File_DataFormat_BSDIFF; | |
429 ret.swap(patch_file); | |
430 } | |
431 out->swap(ret); | |
432 return true; | |
433 } | |
434 | |
435 DeltaArchiveManifest* DeltaDiffGenerator::EncodeMetadataToProtoBuffer( | |
436 const char* new_path) { | |
437 Node node; | |
438 if (!UpdateNodeFromPath(new_path, &node)) | |
439 return NULL; | |
440 int index = 0; | |
441 PopulateChildIndexes(&node, &index); | |
442 DeltaArchiveManifest *ret = new DeltaArchiveManifest; | |
443 map<ino_t, string> hard_links; // inode -> first found path for inode | |
444 NodeToDeltaArchiveManifest(&node, ret, &hard_links, ""); | |
445 return ret; | |
446 } | |
447 | |
448 bool DeltaDiffGenerator::EncodeDataToDeltaFile( | |
449 DeltaArchiveManifest* archive, | |
450 const string& old_path, | |
451 const string& new_path, | |
452 const string& out_file, | |
453 const set<string>& nondiff_paths, | |
454 const string& force_compress_dev_path) { | |
455 DirectFileWriter out_writer; | |
456 int r = out_writer.Open(out_file.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666); | |
457 TEST_AND_RETURN_FALSE_ERRNO(r >= 0); | |
458 ScopedFileWriterCloser closer(&out_writer); | |
459 TEST_AND_RETURN_FALSE(out_writer.Write(DeltaDiffParser::kFileMagic, | |
460 strlen(DeltaDiffParser::kFileMagic)) | |
461 == static_cast<ssize_t>( | |
462 strlen(DeltaDiffParser::kFileMagic))); | |
463 // Write 8 null bytes. This will be filled in w/ the offset of | |
464 // the protobuf. | |
465 TEST_AND_RETURN_FALSE(out_writer.Write("\0\0\0\0\0\0\0\0", 8) == 8); | |
466 // 8 more bytes will be filled w/ the protobuf length. | |
467 TEST_AND_RETURN_FALSE(out_writer.Write("\0\0\0\0\0\0\0\0", 8) == 8); | |
468 int out_file_length = strlen(DeltaDiffParser::kFileMagic) + 16; | |
469 | |
470 TEST_AND_RETURN_FALSE(archive->files_size() > 0); | |
471 DeltaArchiveManifest_File* file = archive->mutable_files(0); | |
472 | |
473 // nondiff_paths is passed in w/ paths relative to the installed | |
474 // system (e.g. /etc/fstab), but WriteFileDiffsToDeltaFile requires them | |
475 // to be the entire path of the new file. We create a new set | |
476 // here with nondiff_paths expanded. | |
477 set<string> always_full_target_paths; | |
478 for (set<string>::const_iterator it = nondiff_paths.begin(); | |
479 it != nondiff_paths.end(); ++it) { | |
480 always_full_target_paths.insert(new_path + *it); | |
481 } | |
482 | |
483 TEST_AND_RETURN_FALSE(WriteFileDiffsToDeltaFile(archive, | |
484 file, | |
485 "", | |
486 old_path, | |
487 new_path, | |
488 &out_writer, | |
489 &out_file_length, | |
490 always_full_target_paths, | |
491 force_compress_dev_path)); | |
492 | |
493 // Finally, write the protobuf to the end of the file | |
494 string encoded_archive; | |
495 TEST_AND_RETURN_FALSE(archive->SerializeToString(&encoded_archive)); | |
496 | |
497 // Compress the protobuf (which contains filenames) | |
498 vector<char> compressed_encoded_archive; | |
499 TEST_AND_RETURN_FALSE(GzipCompressString(encoded_archive, | |
500 &compressed_encoded_archive)); | |
501 | |
502 TEST_AND_RETURN_FALSE(out_writer.Write(compressed_encoded_archive.data(), | |
503 compressed_encoded_archive.size()) == | |
504 static_cast<ssize_t>( | |
505 compressed_encoded_archive.size())); | |
506 | |
507 // write offset of protobut to just after the file magic | |
508 int64 big_endian_protobuf_offset = htobe64(out_file_length); | |
509 TEST_AND_RETURN_FALSE(pwrite(out_writer.fd(), | |
510 &big_endian_protobuf_offset, | |
511 sizeof(big_endian_protobuf_offset), | |
512 strlen(DeltaDiffParser::kFileMagic)) == | |
513 sizeof(big_endian_protobuf_offset)); | |
514 // Write the size just after the offset | |
515 int64 pb_length = htobe64(compressed_encoded_archive.size()); | |
516 TEST_AND_RETURN_FALSE(pwrite(out_writer.fd(), | |
517 &pb_length, | |
518 sizeof(pb_length), | |
519 strlen(DeltaDiffParser::kFileMagic) + | |
520 sizeof(big_endian_protobuf_offset)) == | |
521 sizeof(pb_length)); | |
522 return true; | |
523 } | |
524 | |
525 } // namespace chromeos_update_engine | |
OLD | NEW |