Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(69)

Side by Side Diff: metadata.cc

Issue 5684002: Add support for bsdiff of file system metadata blocks (Closed) Base URL: http://git.chromium.org/git/update_engine.git@master
Patch Set: Forgot newly added metadata processing files. Created 10 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <algorithm>
6 #include <string>
7 #include <vector>
8
9 #include <base/string_util.h>
10 #include <et/com_err.h>
11 #include <ext2fs/ext2_io.h>
12 #include <ext2fs/ext2fs.h>
13
14 #include "update_engine/bzip.h"
15 #include "update_engine/delta_diff_generator.h"
16 #include "update_engine/extent_ranges.h"
17 #include "update_engine/graph_utils.h"
18 #include "update_engine/metadata.h"
19 #include "update_engine/utils.h"
20
21 using std::min;
22 using std::string;
23 using std::vector;
24
25 namespace chromeos_update_engine {
26
27 namespace {
28 const size_t kBlockSize = 4096;
adlr 2010/12/15 03:11:23 unindent
thieule 2010/12/15 19:57:01 Done.
29 } // namespace {}
30
31 typedef DeltaDiffGenerator::Block Block;
32
33 // Read data from the specified extents.
34 bool ReadExtentsData(const ext2_filsys fs,
petkov 2010/12/15 18:40:32 this and all other internal only routines should b
thieule 2010/12/15 19:57:01 Done.
35 const vector<Extent>& extents,
36 vector<char>* data) {
37 // Resize the data buffer to hold all data in the extents
38 size_t num_data_blocks = 0;
39 for (vector<Extent>::const_iterator it = extents.begin();
40 it != extents.end(); it++) {
41 num_data_blocks += it->num_blocks();
42 }
43
44 data->resize(num_data_blocks * kBlockSize);
45
46 // Read in the data blocks
adlr 2010/12/15 03:11:23 would "metadata blocks" be more appropriate?
thieule 2010/12/15 19:57:01 ReadExtentsData is generic and just reads data fro
47 const size_t max_read_blocks = 256;
adlr 2010/12/15 03:11:23 kMaxReadBlocks
thieule 2010/12/15 19:57:01 Done.
48 vector<Block>::size_type blocks_copied_count = 0;
49 for (vector<Extent>::const_iterator it = extents.begin();
50 it != extents.end(); it++) {
51 vector<Block>::size_type blocks_read = 0;
52 while (blocks_read < it->num_blocks()) {
53 const int copy_block_cnt =
54 min(max_read_blocks,
55 static_cast<vector<char>::size_type>(
adlr 2010/12/15 03:11:23 why vector<char>? you could just use size_t or vec
thieule 2010/12/15 19:57:01 Had a synapse misfire here. On 2010/12/15 03:11:2
56 it->num_blocks() - blocks_read));
57 TEST_AND_RETURN_FALSE_ERRCODE(
58 io_channel_read_blk(fs->io,
59 it->start_block() + blocks_read,
60 copy_block_cnt,
61 &(*data)[blocks_copied_count * kBlockSize]));
62 blocks_read += copy_block_cnt;
63 blocks_copied_count += copy_block_cnt;
64 }
65 }
66
67 return true;
68 }
69
70 // Compute the bsdiff between two metadata blobs.
71 bool ComputeMetadataBsdiff(const vector<char>& old_metadata,
72 const vector<char>& new_metadata,
73 vector<char>* bsdiff_delta) {
74 const string kTempFileTemplate("/tmp/CrAU_temp_data.XXXXXX");
75
76 // Write the metadata buffers to temporary files
77 int old_fd;
78 string temp_old_file_path;
79 TEST_AND_RETURN_FALSE(
80 utils::MakeTempFile(kTempFileTemplate, &temp_old_file_path, &old_fd));
81 TEST_AND_RETURN_FALSE(old_fd >= 0);
82 ScopedPathUnlinker temp_old_file_path_unlinker(temp_old_file_path);
83 ScopedFdCloser old_fd_closer(&old_fd);
84 TEST_AND_RETURN_FALSE(utils::WriteAll(old_fd,
85 &old_metadata[0],
86 old_metadata.size()));
87
88 int new_fd;
89 string temp_new_file_path;
90 TEST_AND_RETURN_FALSE(
91 utils::MakeTempFile(kTempFileTemplate, &temp_new_file_path, &new_fd));
92 TEST_AND_RETURN_FALSE(new_fd >= 0);
93 ScopedPathUnlinker temp_new_file_path_unlinker(temp_new_file_path);
94 ScopedFdCloser new_fd_closer(&new_fd);
95 TEST_AND_RETURN_FALSE(utils::WriteAll(new_fd,
96 &new_metadata[0],
97 new_metadata.size()));
98
99 // Perform bsdiff on these files
100 TEST_AND_RETURN_FALSE(
101 BsdiffFiles(temp_old_file_path, temp_new_file_path, bsdiff_delta));
102 CHECK_GT(bsdiff_delta->size(), 0);
adlr 2010/12/15 03:11:23 do you really want to crash if this fails? would T
thieule 2010/12/15 19:57:01 I've removed this, it's a redundant check. The be
103
104 return true;
105 }
106
107 // Add the specified metadata extents to the graph and blocks vector.
108 bool AddMetadataExtents(Graph* graph,
109 vector<Block>* blocks,
110 const ext2_filsys fs_old,
111 const ext2_filsys fs_new,
112 const string& metadata_name,
113 const vector<Extent>& extents,
114 int data_fd,
115 off_t* data_file_size) {
116 vector<char> data; // Data blob that will be written to delta file.
117
118 // Read in the metadata blocks from the old and new image.
adlr 2010/12/15 03:11:23 add { (more below)
thieule 2010/12/15 19:57:01 Done.
119 vector<char> old_data;
120 TEST_AND_RETURN_FALSE(ReadExtentsData(fs_old, extents, &old_data));
121
122 vector<char> new_data;
123 TEST_AND_RETURN_FALSE(ReadExtentsData(fs_new, extents, &new_data));
124
125 // Determine the best way to compress this.
126 vector<char> new_data_bz;
127 TEST_AND_RETURN_FALSE(BzipCompress(new_data, &new_data_bz));
128 CHECK(!new_data_bz.empty());
129
130 DeltaArchiveManifest_InstallOperation op;
131 size_t current_best_size = 0;
132 if (new_data.size() <= new_data_bz.size()) {
133 op.set_type(DeltaArchiveManifest_InstallOperation_Type_REPLACE);
134 current_best_size = new_data.size();
135 data = new_data;
136 } else {
137 op.set_type(DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ);
138 current_best_size = new_data_bz.size();
139 data = new_data_bz;
140 }
141
142 if (old_data == new_data) {
143 // No change in data.
144 op.set_type(DeltaArchiveManifest_InstallOperation_Type_MOVE);
145 current_best_size = 0;
146 data.clear();
147 } else {
148 // Try bsdiff of old to new data
149 vector<char> bsdiff_delta;
150 TEST_AND_RETURN_FALSE(ComputeMetadataBsdiff(old_data,
151 new_data,
152 &bsdiff_delta));
153 CHECK_GT(bsdiff_delta.size(), 0);
154
155 if (bsdiff_delta.size() < current_best_size) {
156 op.set_type(DeltaArchiveManifest_InstallOperation_Type_BSDIFF);
157 current_best_size = bsdiff_delta.size();
158 data = bsdiff_delta;
159 }
160 }
161
adlr 2010/12/15 03:11:23 add } here this deallocates the extra copy/copies
thieule 2010/12/15 19:57:01 Done.
162 CHECK_EQ(data.size(), current_best_size);
163
164 // Set the source and dest extents to be the same since the filesystem
165 // structures are identical
166 if (op.type() == DeltaArchiveManifest_InstallOperation_Type_MOVE ||
167 op.type() == DeltaArchiveManifest_InstallOperation_Type_BSDIFF) {
168 DeltaDiffGenerator::StoreExtents(extents, op.mutable_src_extents());
169 op.set_src_length(old_data.size());
170 }
171
172 DeltaDiffGenerator::StoreExtents(extents, op.mutable_dst_extents());
173 op.set_dst_length(new_data.size());
174
175 // Write data to output file
176 if (op.type() != DeltaArchiveManifest_InstallOperation_Type_MOVE) {
177 op.set_data_offset(*data_file_size);
178 op.set_data_length(data.size());
179 }
180
181 TEST_AND_RETURN_FALSE(utils::WriteAll(data_fd, &data[0], data.size()));
182 *data_file_size += data.size();
183
184 // Now, insert into graph and blocks vector
185 graph->resize(graph->size() + 1);
186 Vertex::Index vertex = graph->size() - 1;
187 (*graph)[vertex].op = op;
188 CHECK((*graph)[vertex].op.has_type());
189 (*graph)[vertex].file_name = metadata_name;
190
191 TEST_AND_RETURN_FALSE(AddInstallOpToBlocksVector((*graph)[vertex].op,
192 blocks,
193 *graph,
194 vertex));
195
196 return true;
197 }
198
199 // Reads the file system metadata extents.
200 bool ReadFilesystemMetadata(Graph* graph,
201 vector<Block>* blocks,
202 const ext2_filsys fs_old,
203 const ext2_filsys fs_new,
204 int data_fd,
205 off_t* data_file_size) {
206 LOG(INFO) << "Processing <rootfs-metadata>";
207
208 // Read all the extents that belong to the main file system metadata.
209 // The metadata blocks are at the start of each block group and goes
210 // until the end of the inode table.
211 for (dgrp_t bg = 0; bg < fs_old->group_desc_count; bg++) {
212 struct ext2_group_desc* group_desc = &fs_old->group_desc[bg];
213 __u32 num_metadata_blocks = (group_desc->bg_inode_table +
214 fs_old->inode_blocks_per_group) -
215 (bg * fs_old->super->s_blocks_per_group);
216 __u32 bg_start_block = bg * fs_old->super->s_blocks_per_group;
217
218 // Due to bsdiff slowness, we're going to break each block group down
219 // into metadata chunks and feed them to bsdiff.
220 __u32 num_chunks = 4;
221 __u32 blocks_per_chunk = num_metadata_blocks / num_chunks;
222 __u32 curr_block = bg_start_block;
223 for (__u32 chunk = 0; chunk < num_chunks; chunk++) {
224 Extent extent;
225 if (chunk < num_chunks - 1) {
226 extent = ExtentForRange(curr_block, blocks_per_chunk);
227 } else {
228 extent = ExtentForRange(curr_block,
229 bg_start_block + num_metadata_blocks -
230 curr_block);
231 }
232
233 vector<Extent> extents;
234 extents.push_back(extent);
235
236 string metadata_name = StringPrintf("<rootfs-bg-%d-%d-metadata>",
237 bg, chunk);
238
239 LOG(INFO) << "Processing " << metadata_name;
240
241 TEST_AND_RETURN_FALSE(AddMetadataExtents(graph,
242 blocks,
243 fs_old,
244 fs_new,
245 metadata_name,
246 extents,
247 data_fd,
248 data_file_size));
249
250 curr_block += blocks_per_chunk;
251 }
252 }
253
254 return true;
255 }
256
257 // Processes all blocks belonging to an inode
258 int ProcessInodeAllBlocks(ext2_filsys fs,
259 blk_t* blocknr,
260 e2_blkcnt_t blockcnt,
261 blk_t ref_blk,
262 int ref_offset,
263 void* priv) {
264 vector<Extent>* extents = static_cast<vector<Extent>*>(priv);
265 graph_utils::AppendBlockToExtents(extents, *blocknr);
266 return 0;
267 }
268
269 // Processes only indirect, double indirect or triple indirect metadata
270 // blocks belonging to an inode
271 int ProcessInodeMetadataBlocks(ext2_filsys fs,
272 blk_t* blocknr,
273 e2_blkcnt_t blockcnt,
274 blk_t ref_blk,
275 int ref_offset,
276 void* priv) {
277 vector<Extent>* extents = static_cast<vector<Extent>*>(priv);
278 if (blockcnt < 0) {
279 graph_utils::AppendBlockToExtents(extents, *blocknr);
280 }
281 return 0;
282 }
283
284 // Read inode metadata blocks.
285 bool ReadInodeMetadata(Graph* graph,
286 vector<Block>* blocks,
287 const ext2_filsys fs_old,
288 const ext2_filsys fs_new,
289 int data_fd,
290 off_t* data_file_size) {
291 TEST_AND_RETURN_FALSE_ERRCODE(ext2fs_read_inode_bitmap(fs_old));
292
petkov 2010/12/15 18:40:32 remove blank line
thieule 2010/12/15 19:57:01 Done.
293 TEST_AND_RETURN_FALSE_ERRCODE(ext2fs_read_inode_bitmap(fs_new));
294
295 ext2_inode_scan iscan;
296 TEST_AND_RETURN_FALSE_ERRCODE(ext2fs_open_inode_scan(fs_old, 0, &iscan));
297
298 ext2_ino_t ino;
299 ext2_inode old_inode;
300 while (true) {
301 // Get the next inode on both file systems
302 errcode_t error = ext2fs_get_next_inode(iscan, &ino, &old_inode);
adlr 2010/12/15 03:11:23 is there a TEST_AND_RETURN_FALSE* thing you can us
thieule 2010/12/15 19:57:01 If we return false here, it will cause the delta_g
303 if (error) {
304 LOG(ERROR) << "Failed to retrieve next inode (" << error << ")";
305 break;
306 }
307
308 if (ino == 0) {
309 break;
310 }
311
312 if (ino == EXT2_RESIZE_INO) {
313 continue;
314 }
315
316 ext2_inode new_inode;
317 error = ext2fs_read_inode(fs_new, ino, &new_inode);
318 if (error) {
319 LOG(ERROR) << "Failed to retrieve new inode (" << error << ")";
320 continue;
321 }
322
323 // Skip inodes that are not in use
324 if (!ext2fs_test_inode_bitmap(fs_old->inode_map, ino) ||
325 !ext2fs_test_inode_bitmap(fs_new->inode_map, ino)) {
326 continue;
327 }
328
329 // Skip inodes that have no data blocks
330 if (old_inode.i_blocks == 0 || new_inode.i_blocks == 0) {
331 continue;
332 }
333
334 // Skip inodes that are not the same type
335 bool is_old_dir = (ext2fs_check_directory(fs_old, ino) == 0);
336 bool is_new_dir = (ext2fs_check_directory(fs_new, ino) == 0);
337 if (is_old_dir != is_new_dir) {
338 continue;
339 }
340
341 // Process the inodes metadata blocks
342 // For normal files, metadata blocks are indirect, double indirect
343 // and triple indirect blocks (no data blocks). For directories and
344 // the journal, all blocks are considered metadata blocks.
345 LOG(INFO) << "Processing inode " << ino << " metadata";
346
347 bool all_blocks = ((ino == EXT2_JOURNAL_INO) || is_old_dir || is_new_dir);
348
349 vector<Extent> old_extents;
350 error = ext2fs_block_iterate2(fs_old, ino, 0, NULL,
351 all_blocks ? ProcessInodeAllBlocks :
352 ProcessInodeMetadataBlocks,
353 &old_extents);
354 if (error) {
355 LOG(ERROR) << "Failed to enumerate old inode " << ino
356 << " blocks (" << error << ")";
357 continue;
358 }
359
360 vector<Extent> new_extents;
361 error = ext2fs_block_iterate2(fs_new, ino, 0, NULL,
362 all_blocks ? ProcessInodeAllBlocks :
363 ProcessInodeMetadataBlocks,
364 &new_extents);
365 if (error) {
366 LOG(ERROR) << "Failed to enumerate new inode " << ino
367 << " blocks (" << error << ")";
368 continue;
369 }
370
371 // Skip inode if there are no metadata blocks
372 if (old_extents.size() == 0 || new_extents.size() == 0) {
373 continue;
374 }
375
376 // Make sure the two inodes have the same metadata blocks
377 if (old_extents.size() != new_extents.size()) {
378 continue;
379 }
380
381 bool same_metadata_extents = true;
382 vector<Extent>::iterator it_old;
383 vector<Extent>::iterator it_new;
384 for (it_old = old_extents.begin(),
385 it_new = new_extents.begin();
386 it_old != old_extents.end() && it_new != new_extents.end();
387 it_old++, it_new++) {
388 if (it_old->start_block() != it_new->start_block() ||
389 it_old->num_blocks() != it_new->num_blocks()) {
390 same_metadata_extents = false;
391 break;
392 }
393 }
394
395 if (!same_metadata_extents) {
396 continue;
397 }
398
399 // We have identical inode metadata blocks, we can now add them to
400 // our graph and blocks vector
401 string metadata_name = StringPrintf("<rootfs-inode-%d-metadata>", ino);
402 TEST_AND_RETURN_FALSE(AddMetadataExtents(graph,
403 blocks,
404 fs_old,
405 fs_new,
406 metadata_name,
407 old_extents,
408 data_fd,
409 data_file_size));
410 }
411
412 ext2fs_close_inode_scan(iscan);
413
414 return true;
415 }
416
417 // Reads metadata from old image and new image and determines
418 // the smallest way to encode the metadata for the diff.
419 // If there's no change in the metadata, it creates a MOVE
420 // operation. If there is a change, the smallest of REPLACE, REPLACE_BZ,
421 // or BSDIFF wins. It writes the diff to data_fd and updates data_file_size
422 // accordingly. It also adds the required operation to the graph and adds the
423 // metadata extents to blocks.
424 // Returns true on success.
425 bool DeltaReadMetadata(Graph* graph,
426 vector<Block>* blocks,
427 const string& old_image,
428 const string& new_image,
429 int data_fd,
430 off_t* data_file_size) {
431 // Open the two file systems.
432 ext2_filsys fs_old;
433 TEST_AND_RETURN_FALSE_ERRCODE(ext2fs_open(old_image.c_str(), 0, 0, 0,
434 unix_io_manager, &fs_old));
435 ScopedExt2fsCloser fs_old_closer(fs_old);
436
437 ext2_filsys fs_new;
438 TEST_AND_RETURN_FALSE_ERRCODE(ext2fs_open(new_image.c_str(), 0, 0, 0,
439 unix_io_manager, &fs_new));
440 ScopedExt2fsCloser fs_new_closer(fs_new);
441
442 // Make sure these two file systems are the same.
443 // If they are not the same, the metadata blocks will be packaged up in its
444 // entirety by ReadUnwrittenBlocks().
445 if (fs_old->blocksize != fs_new->blocksize ||
446 fs_old->fragsize != fs_new->fragsize ||
447 fs_old->group_desc_count != fs_new->group_desc_count ||
448 fs_old->inode_blocks_per_group != fs_new->inode_blocks_per_group ||
449 fs_old->super->s_inodes_count != fs_new->super->s_inodes_count ||
450 fs_old->super->s_blocks_count != fs_new->super->s_blocks_count) {
451 return true;
452 }
453
454 // Process the main file system metadata (superblock, inode tables, etc)
455 TEST_AND_RETURN_FALSE(ReadFilesystemMetadata(graph,
456 blocks,
457 fs_old,
458 fs_new,
459 data_fd,
460 data_file_size));
461
462 // Process each inode metadata blocks.
463 TEST_AND_RETURN_FALSE(ReadInodeMetadata(graph,
464 blocks,
465 fs_old,
466 fs_new,
467 data_fd,
468 data_file_size));
469
470 return true;
471 }
472
473 }; // namespace chromeos_update_engine
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698