Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1394)

Unified Diff: delta_diff_generator.cc

Issue 5684002: Add support for bsdiff of file system metadata blocks (Closed) Base URL: http://git.chromium.org/git/update_engine.git@master
Patch Set: Created 10 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: delta_diff_generator.cc
diff --git a/delta_diff_generator.cc b/delta_diff_generator.cc
index 21a4c03ad84b98decd39afa9c7a94bfd6e4f29d1..cfcfdfac73d17ec5a3876464ef47737a14cff32e 100644
--- a/delta_diff_generator.cc
+++ b/delta_diff_generator.cc
@@ -10,6 +10,13 @@
#include <sys/stat.h>
#include <sys/types.h>
+#include <base/logging.h>
petkov 2010/12/10 00:46:55 move this block of includes after the c++ headers
thieule 2010/12/14 23:11:21 Done.
+#include <base/string_util.h>
+#include <bzlib.h>
+
petkov 2010/12/10 00:46:55 no need for blank line
thieule 2010/12/14 23:11:21 Done.
+#include <ext2fs/ext2_io.h>
+#include <ext2fs/ext2fs.h>
+
#include <algorithm>
#include <map>
#include <set>
@@ -17,10 +24,6 @@
#include <utility>
#include <vector>
-#include <base/logging.h>
-#include <base/string_util.h>
-#include <bzlib.h>
-
#include "update_engine/bzip.h"
#include "update_engine/cycle_breaker.h"
#include "update_engine/extent_mapper.h"
@@ -241,6 +244,392 @@ bool DeltaReadFiles(Graph* graph,
return true;
}
+// Read data from the specified extents.
+bool ReadExtentsData(const ext2_filsys fs,
+ const vector<Extent>& extents,
+ vector<char>* data) {
+ // Resize the data buffer to hold all data in the extents
+ size_t num_data_blocks = 0;
+ for (vector<Extent>::const_iterator it = extents.begin();
+ it != extents.end(); it++) {
+ num_data_blocks += it->num_blocks();
+ }
+
+ data->resize(num_data_blocks * kBlockSize);
+
+ // Read in the data blocks
+ const size_t max_read_blocks = 256;
+ vector<Block>::size_type blocks_copied_count = 0;
+ for (vector<Extent>::const_iterator it = extents.begin();
+ it != extents.end(); it++) {
+ vector<Block>::size_type blocks_read = 0;
+ while (blocks_read < it->num_blocks()) {
+ const int copy_block_cnt =
+ min(max_read_blocks,
+ static_cast<vector<char>::size_type>(
+ it->num_blocks() - blocks_read));
+ TEST_AND_RETURN_FALSE_ERRCODE(
+ io_channel_read_blk(fs->io,
+ it->start_block() + blocks_read,
+ copy_block_cnt,
+ &(*data)[blocks_copied_count * kBlockSize]));
+ blocks_read += copy_block_cnt;
+ blocks_copied_count += copy_block_cnt;
+ }
+ }
+
+ return true;
+}
+
+// Compute the bsdiff between two metadata blobs.
+bool ComputeMetadataBsdiff(const vector<char>& old_metadata,
+ const vector<char>& new_metadata,
+ vector<char>* bsdiff_delta) {
+ const string kTempFileTemplate("/tmp/CrAU_temp_data.XXXXXX");
+
+ // Write the metadata buffers to temporary files
+ int old_fd;
+ string temp_old_file_path;
+ TEST_AND_RETURN_FALSE(
+ utils::MakeTempFile(kTempFileTemplate, &temp_old_file_path, &old_fd));
+ TEST_AND_RETURN_FALSE(old_fd >= 0);
+ ScopedPathUnlinker temp_old_file_path_unlinker(temp_old_file_path);
+ ScopedFdCloser old_fd_closer(&old_fd);
+ TEST_AND_RETURN_FALSE(utils::WriteAll(old_fd,
+ &old_metadata[0],
+ old_metadata.size()));
+
+ int new_fd;
+ string temp_new_file_path;
+ TEST_AND_RETURN_FALSE(
+ utils::MakeTempFile(kTempFileTemplate, &temp_new_file_path, &new_fd));
+ TEST_AND_RETURN_FALSE(new_fd >= 0);
+ ScopedPathUnlinker temp_new_file_path_unlinker(temp_new_file_path);
+ ScopedFdCloser new_fd_closer(&new_fd);
+ TEST_AND_RETURN_FALSE(utils::WriteAll(new_fd,
+ &new_metadata[0],
+ new_metadata.size()));
+
+ // Perform bsdiff on these files
+ TEST_AND_RETURN_FALSE(
+ BsdiffFiles(temp_old_file_path, temp_new_file_path, bsdiff_delta));
+ CHECK_GT(bsdiff_delta->size(), 0);
+
+ return true;
+}
+
+// Add the specified metadata extents to the graph and blocks vector.
+bool AddMetadataExtents(Graph* graph,
+ vector<Block>* blocks,
+ const ext2_filsys fs_old,
+ const ext2_filsys fs_new,
+ const string& metadata_name,
+ const vector<Extent>& extents,
+ int data_fd,
+ off_t* data_file_size) {
+ vector<char> data; // Data blob that will be written to delta file.
+
+ // Read in the metadata blocks from the old and new image.
+ vector<char> old_data;
+ TEST_AND_RETURN_FALSE(ReadExtentsData(fs_old, extents, &old_data));
+
+ vector<char> new_data;
+ TEST_AND_RETURN_FALSE(ReadExtentsData(fs_new, extents, &new_data));
+
+ // Determine the best way to compress this.
+ vector<char> new_data_bz;
+ TEST_AND_RETURN_FALSE(BzipCompress(new_data, &new_data_bz));
+ CHECK(!new_data_bz.empty());
+
+ DeltaArchiveManifest_InstallOperation op;
+ size_t current_best_size = 0;
+ if (new_data.size() <= new_data_bz.size()) {
+ op.set_type(DeltaArchiveManifest_InstallOperation_Type_REPLACE);
+ current_best_size = new_data.size();
+ data = new_data;
+ } else {
+ op.set_type(DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ);
+ current_best_size = new_data_bz.size();
+ data = new_data_bz;
+ }
+
+ if (old_data == new_data) {
+ // No change in data.
+ op.set_type(DeltaArchiveManifest_InstallOperation_Type_MOVE);
+ current_best_size = 0;
+ data.clear();
+ } else {
+ // Try bsdiff of old to new data
+ vector<char> bsdiff_delta;
+ TEST_AND_RETURN_FALSE(ComputeMetadataBsdiff(old_data,
+ new_data,
+ &bsdiff_delta));
+ CHECK_GT(bsdiff_delta.size(), 0);
+
+ if (bsdiff_delta.size() < current_best_size) {
+ op.set_type(DeltaArchiveManifest_InstallOperation_Type_BSDIFF);
+ current_best_size = bsdiff_delta.size();
+ data = bsdiff_delta;
+ }
+ }
+
+ CHECK_EQ(data.size(), current_best_size);
+
+ // Set the source and dest extents to be the same since the filesystem
+ // structures are identical
+ if (op.type() == DeltaArchiveManifest_InstallOperation_Type_MOVE ||
+ op.type() == DeltaArchiveManifest_InstallOperation_Type_BSDIFF) {
+ DeltaDiffGenerator::StoreExtents(extents, op.mutable_src_extents());
+ op.set_src_length(old_data.size());
+ }
+
+ DeltaDiffGenerator::StoreExtents(extents, op.mutable_dst_extents());
+ op.set_dst_length(new_data.size());
+
+ // Write data to output file
+ if (op.type() != DeltaArchiveManifest_InstallOperation_Type_MOVE) {
+ op.set_data_offset(*data_file_size);
+ op.set_data_length(data.size());
+ }
+
+ TEST_AND_RETURN_FALSE(utils::WriteAll(data_fd, &data[0], data.size()));
+ *data_file_size += data.size();
+
+ // Now, insert into graph and blocks vector
+ graph->resize(graph->size() + 1);
+ Vertex::Index vertex = graph->size() - 1;
+ (*graph)[vertex].op = op;
+ CHECK((*graph)[vertex].op.has_type());
+ (*graph)[vertex].file_name = metadata_name;
+
+ TEST_AND_RETURN_FALSE(AddInstallOpToBlocksVector((*graph)[vertex].op,
+ blocks,
+ *graph,
+ vertex));
+
+ return true;
+}
+
+// Reads the file system metadata extents.
+bool ReadFilesystemMetadata(Graph* graph,
+ vector<Block>* blocks,
+ const ext2_filsys fs_old,
+ const ext2_filsys fs_new,
+ int data_fd,
+ off_t* data_file_size) {
+ LOG(INFO) << "Processing <rootfs-metadata>";
+
+ // Read all the extents that belong to the main file system metadata.
+ // The metadata blocks are at the the start of each block group and goes
petkov 2010/12/10 00:46:55 typo: the the
thieule 2010/12/14 23:11:21 Done.
+ // until the end of the inode table.
+ for (dgrp_t bg = 0; bg < fs_old->group_desc_count; bg++) {
+ struct ext2_group_desc* group_desc = &fs_old->group_desc[bg];
+ __u32 num_metadata_blocks = (group_desc->bg_inode_table +
+ fs_old->inode_blocks_per_group) -
+ (bg * fs_old->super->s_blocks_per_group);
+ __u32 bg_start_block = bg * fs_old->super->s_blocks_per_group;
+
+ // Due to bsdiff slowness, we're going to break each block group down
petkov 2010/12/10 00:46:55 would it be better for the payload size to not bre
thieule 2010/12/14 23:11:21 I ran this without breaking up into chunks and bsd
+ // into metadata chunks and feed them to bsdiff.
+ __u32 num_chunks = 4;
+ __u32 blocks_per_chunk = num_metadata_blocks / num_chunks;
+ __u32 curr_block = bg_start_block;
+ for (__u32 chunk = 0; chunk < num_chunks; chunk++) {
+
petkov 2010/12/10 00:46:55 remove blank line
thieule 2010/12/14 23:11:21 Done.
+ __u32 end_block = 0;
+ if (chunk < num_chunks - 1) {
+ end_block = curr_block + blocks_per_chunk;
+ } else {
+ end_block = bg_start_block + num_metadata_blocks;
+ }
+
+ vector<Extent> extents;
+ for (__u32 i = curr_block; i < end_block; i++) {
petkov 2010/12/10 00:46:55 there's an ExtentForRange utility in extent_ranges
thieule 2010/12/14 23:11:21 Done.
+ graph_utils::AppendBlockToExtents(&extents, i);
+ }
+
+ std::stringstream metadata_name;
+ metadata_name << "<rootfs-bg-" << bg << "-" << chunk << "-metadata>";
petkov 2010/12/10 00:46:55 maybe use StringPrintf instead?
thieule 2010/12/14 23:11:21 Done.
+
+ LOG(INFO) << "Processing " << metadata_name.str();
+
+ TEST_AND_RETURN_FALSE(AddMetadataExtents(graph,
+ blocks,
+ fs_old,
+ fs_new,
+ metadata_name.str(),
+ extents,
+ data_fd,
+ data_file_size));
+
+ curr_block += blocks_per_chunk;
+ }
+ }
+
+ return true;
+}
+
+// Processes all blocks belonging to an inode
+int ProcessInodeAllBlocks(ext2_filsys fs,
+ blk_t* blocknr,
+ e2_blkcnt_t blockcnt,
+ blk_t ref_blk,
+ int ref_offset,
+ void* priv) {
+ vector<Extent>* extents = static_cast<vector<Extent>*>(priv);
+ graph_utils::AppendBlockToExtents(extents, *blocknr);
+ return 0;
+}
+
+// Processes only indirect, double indirect or triple indirect metadata
+// blocks belonging to an inode
+int ProcessInodeMetadataBlocks(ext2_filsys fs,
+ blk_t* blocknr,
+ e2_blkcnt_t blockcnt,
+ blk_t ref_blk,
+ int ref_offset,
+ void* priv) {
+ vector<Extent>* extents = static_cast<vector<Extent>*>(priv);
+ if (blockcnt < 0) {
+ graph_utils::AppendBlockToExtents(extents, *blocknr);
+ }
+ return 0;
+}
+
+// Read inode metadata blocks.
+bool ReadInodeMetadata(Graph* graph,
+ vector<Block>* blocks,
+ const ext2_filsys fs_old,
+ const ext2_filsys fs_new,
+ int data_fd,
+ off_t* data_file_size) {
+ TEST_AND_RETURN_FALSE_ERRCODE(ext2fs_read_inode_bitmap(fs_old));
+
+ TEST_AND_RETURN_FALSE_ERRCODE(ext2fs_read_inode_bitmap(fs_new));
+
+ ext2_inode_scan iscan;
+ TEST_AND_RETURN_FALSE_ERRCODE(ext2fs_open_inode_scan(fs_old, 0, &iscan));
+
+ ext2_ino_t ino;
+ ext2_inode old_inode;
+ while (true) {
+ // Get the next inode on both file systems
+ errcode_t error = ext2fs_get_next_inode(iscan, &ino, &old_inode);
+ if (error) {
+ LOG(ERROR) << "Failed to retrieve next inode (" << error << ")";
+ break;
+ }
+
+ if (ino == 0) {
+ break;
+ }
+
+ if (ino == EXT2_RESIZE_INO) {
+ continue;
+ }
+
+ ext2_inode new_inode;
+ error = ext2fs_read_inode(fs_new, ino, &new_inode);
+ if (error) {
+ LOG(ERROR) << "Failed to retrieve new inode (" << error << ")";
+ continue;
+ }
+
+ // Skip inodes that are not in use
+ if (!ext2fs_test_inode_bitmap(fs_old->inode_map, ino) ||
+ !ext2fs_test_inode_bitmap(fs_new->inode_map, ino)) {
+ continue;
+ }
+
+ // Skip inodes that have no data blocks
+ if (old_inode.i_blocks == 0 || new_inode.i_blocks == 0) {
+ continue;
+ }
+
+ // Skip inodes that are not the same type
+ bool is_old_dir = (ext2fs_check_directory(fs_old, ino) == 0);
+ bool is_new_dir = (ext2fs_check_directory(fs_new, ino) == 0);
+ if (is_old_dir != is_new_dir) {
+ continue;
+ }
+
+ // Process the inodes metadata blocks
+ // For normal files, metadata blocks are indirect, double indirect
+ // and triple indirect blocks (no data blocks). For directories and
+ // the journal, all blocks are considered metadata blocks.
+ LOG(INFO) << "Processing inode " << ino << " metadata";
+
+ bool all_blocks = ((ino == EXT2_JOURNAL_INO) || is_old_dir || is_new_dir);
+
+ vector<Extent> old_extents;
+ error = ext2fs_block_iterate2(fs_old, ino, 0, NULL,
+ all_blocks ? ProcessInodeAllBlocks :
+ ProcessInodeMetadataBlocks,
+ &old_extents);
+ if (error) {
+ LOG(ERROR) << "Failed to enumerate old inode " << ino
+ << " blocks (" << error << ")";
+ continue;
+ }
+
+ vector<Extent> new_extents;
+ error = ext2fs_block_iterate2(fs_new, ino, 0, NULL,
+ all_blocks ? ProcessInodeAllBlocks :
+ ProcessInodeMetadataBlocks,
+ &new_extents);
+ if (error) {
+ LOG(ERROR) << "Failed to enumerate new inode " << ino
+ << " blocks (" << error << ")";
+ continue;
+ }
+
+ // Skip inode if there are no metadata blocks
+ if (old_extents.size() == 0 || new_extents.size() == 0) {
+ continue;
+ }
+
+ // Make sure the two inodes have the same metadata blocks
+ if (old_extents.size() != new_extents.size()) {
+ continue;
+ }
+
+ bool same_metadata_extents = true;
+ vector<Extent>::iterator it_old;
+ vector<Extent>::iterator it_new;
+ for (it_old = old_extents.begin(),
+ it_new = new_extents.begin();
+ it_old != old_extents.end() && it_new != new_extents.end();
+ it_old++, it_new++) {
+ if (it_old->start_block() != it_new->start_block() ||
+ it_old->num_blocks() != it_new->num_blocks()) {
+ same_metadata_extents = false;
+ break;
+ }
+ }
+
+ if (!same_metadata_extents) {
+ continue;
+ }
+
+ // We have identical inode metadata blocks, we can now add them to
+ // our graph and blocks vector
+ std::stringstream metadata_name;
+ metadata_name << "<rootfs-inode-" << ino << "-metadata>";
+ TEST_AND_RETURN_FALSE(AddMetadataExtents(graph,
+ blocks,
+ fs_old,
+ fs_new,
+ metadata_name.str(),
+ old_extents,
+ data_fd,
+ data_file_size));
+ }
+
+ ext2fs_close_inode_scan(iscan);
+
+ return true;
+}
+
// This class allocates non-existent temp blocks, starting from
// kTempBlockStart. Other code is responsible for converting these
// temp blocks into real blocks, as the client can't read or write to
@@ -620,6 +1009,52 @@ bool DeltaDiffGenerator::ReadFileToDiff(
return true;
}
+bool DeltaDiffGenerator::DeltaReadMetadata(Graph* graph,
+ vector<Block>* blocks,
+ const string& old_image,
+ const string& new_image,
+ int data_fd,
+ off_t* data_file_size) {
+ // Open the two file systems.
+ ext2_filsys fs_old;
+ TEST_AND_RETURN_FALSE_ERRCODE(ext2fs_open(old_image.c_str(), 0, 0, 0,
+ unix_io_manager, &fs_old));
+ ScopedExt2fsCloser fs_old_closer(fs_old);
+
+ ext2_filsys fs_new;
+ TEST_AND_RETURN_FALSE_ERRCODE(ext2fs_open(new_image.c_str(), 0, 0, 0,
+ unix_io_manager, &fs_new));
+ ScopedExt2fsCloser fs_new_closer(fs_new);
+
+ // Make sure these two file systems are the same.
petkov 2010/12/10 00:46:55 clarify that if they aren't the same we'll just se
thieule 2010/12/14 23:11:21 Done.
+ if (fs_old->blocksize != fs_new->blocksize ||
+ fs_old->fragsize != fs_new->fragsize ||
+ fs_old->group_desc_count != fs_new->group_desc_count ||
+ fs_old->inode_blocks_per_group != fs_new->inode_blocks_per_group ||
+ fs_old->super->s_inodes_count != fs_new->super->s_inodes_count ||
+ fs_old->super->s_blocks_count != fs_new->super->s_blocks_count) {
+ return true;
+ }
+
+ // Process the main file system metadata (superblock, inode tables, etc)
+ TEST_AND_RETURN_FALSE(ReadFilesystemMetadata(graph,
+ blocks,
+ fs_old,
+ fs_new,
+ data_fd,
+ data_file_size));
+
+ // Process each inode metadata blocks.
+ TEST_AND_RETURN_FALSE(ReadInodeMetadata(graph,
+ blocks,
+ fs_old,
+ fs_new,
+ data_fd,
+ data_file_size));
+
+ return true;
+}
+
bool DeltaDiffGenerator::InitializePartitionInfo(bool is_kernel,
const string& partition,
PartitionInfo* info) {
@@ -1390,6 +1825,16 @@ bool DeltaDiffGenerator::GenerateDeltaUpdateFile(
LOG(INFO) << "done reading normal files";
CheckGraph(graph);
+ LOG(INFO) << "Starting metadata processing";
+ TEST_AND_RETURN_FALSE(DeltaReadMetadata(&graph,
+ &blocks,
+ old_image,
+ new_image,
+ fd,
+ &data_file_size));
+ LOG(INFO) << "Done metadata processing";
+ CheckGraph(graph);
+
graph.resize(graph.size() + 1);
TEST_AND_RETURN_FALSE(ReadUnwrittenBlocks(blocks,
fd,

Powered by Google App Engine
This is Rietveld 408576698