Index: delta_diff_generator.cc |
diff --git a/delta_diff_generator.cc b/delta_diff_generator.cc |
index 21a4c03ad84b98decd39afa9c7a94bfd6e4f29d1..cfcfdfac73d17ec5a3876464ef47737a14cff32e 100644 |
--- a/delta_diff_generator.cc |
+++ b/delta_diff_generator.cc |
@@ -10,6 +10,13 @@ |
#include <sys/stat.h> |
#include <sys/types.h> |
+#include <base/logging.h> |
petkov
2010/12/10 00:46:55
move this block of includes after the c++ headers
thieule
2010/12/14 23:11:21
Done.
|
+#include <base/string_util.h> |
+#include <bzlib.h> |
+ |
petkov
2010/12/10 00:46:55
no need for blank line
thieule
2010/12/14 23:11:21
Done.
|
+#include <ext2fs/ext2_io.h> |
+#include <ext2fs/ext2fs.h> |
+ |
#include <algorithm> |
#include <map> |
#include <set> |
@@ -17,10 +24,6 @@ |
#include <utility> |
#include <vector> |
-#include <base/logging.h> |
-#include <base/string_util.h> |
-#include <bzlib.h> |
- |
#include "update_engine/bzip.h" |
#include "update_engine/cycle_breaker.h" |
#include "update_engine/extent_mapper.h" |
@@ -241,6 +244,392 @@ bool DeltaReadFiles(Graph* graph, |
return true; |
} |
+// Read data from the specified extents. |
+bool ReadExtentsData(const ext2_filsys fs, |
+ const vector<Extent>& extents, |
+ vector<char>* data) { |
+ // Resize the data buffer to hold all data in the extents |
+ size_t num_data_blocks = 0; |
+ for (vector<Extent>::const_iterator it = extents.begin(); |
+ it != extents.end(); it++) { |
+ num_data_blocks += it->num_blocks(); |
+ } |
+ |
+ data->resize(num_data_blocks * kBlockSize); |
+ |
+ // Read in the data blocks |
+ const size_t max_read_blocks = 256; |
+ vector<Block>::size_type blocks_copied_count = 0; |
+ for (vector<Extent>::const_iterator it = extents.begin(); |
+ it != extents.end(); it++) { |
+ vector<Block>::size_type blocks_read = 0; |
+ while (blocks_read < it->num_blocks()) { |
+ const int copy_block_cnt = |
+ min(max_read_blocks, |
+ static_cast<vector<char>::size_type>( |
+ it->num_blocks() - blocks_read)); |
+ TEST_AND_RETURN_FALSE_ERRCODE( |
+ io_channel_read_blk(fs->io, |
+ it->start_block() + blocks_read, |
+ copy_block_cnt, |
+ &(*data)[blocks_copied_count * kBlockSize])); |
+ blocks_read += copy_block_cnt; |
+ blocks_copied_count += copy_block_cnt; |
+ } |
+ } |
+ |
+ return true; |
+} |
+ |
+// Compute the bsdiff between two metadata blobs. |
+bool ComputeMetadataBsdiff(const vector<char>& old_metadata, |
+ const vector<char>& new_metadata, |
+ vector<char>* bsdiff_delta) { |
+ const string kTempFileTemplate("/tmp/CrAU_temp_data.XXXXXX"); |
+ |
+ // Write the metadata buffers to temporary files |
+ int old_fd; |
+ string temp_old_file_path; |
+ TEST_AND_RETURN_FALSE( |
+ utils::MakeTempFile(kTempFileTemplate, &temp_old_file_path, &old_fd)); |
+ TEST_AND_RETURN_FALSE(old_fd >= 0); |
+ ScopedPathUnlinker temp_old_file_path_unlinker(temp_old_file_path); |
+ ScopedFdCloser old_fd_closer(&old_fd); |
+ TEST_AND_RETURN_FALSE(utils::WriteAll(old_fd, |
+ &old_metadata[0], |
+ old_metadata.size())); |
+ |
+ int new_fd; |
+ string temp_new_file_path; |
+ TEST_AND_RETURN_FALSE( |
+ utils::MakeTempFile(kTempFileTemplate, &temp_new_file_path, &new_fd)); |
+ TEST_AND_RETURN_FALSE(new_fd >= 0); |
+ ScopedPathUnlinker temp_new_file_path_unlinker(temp_new_file_path); |
+ ScopedFdCloser new_fd_closer(&new_fd); |
+ TEST_AND_RETURN_FALSE(utils::WriteAll(new_fd, |
+ &new_metadata[0], |
+ new_metadata.size())); |
+ |
+ // Perform bsdiff on these files |
+ TEST_AND_RETURN_FALSE( |
+ BsdiffFiles(temp_old_file_path, temp_new_file_path, bsdiff_delta)); |
+ CHECK_GT(bsdiff_delta->size(), 0); |
+ |
+ return true; |
+} |
+ |
+// Add the specified metadata extents to the graph and blocks vector. |
+bool AddMetadataExtents(Graph* graph, |
+ vector<Block>* blocks, |
+ const ext2_filsys fs_old, |
+ const ext2_filsys fs_new, |
+ const string& metadata_name, |
+ const vector<Extent>& extents, |
+ int data_fd, |
+ off_t* data_file_size) { |
+ vector<char> data; // Data blob that will be written to delta file. |
+ |
+ // Read in the metadata blocks from the old and new image. |
+ vector<char> old_data; |
+ TEST_AND_RETURN_FALSE(ReadExtentsData(fs_old, extents, &old_data)); |
+ |
+ vector<char> new_data; |
+ TEST_AND_RETURN_FALSE(ReadExtentsData(fs_new, extents, &new_data)); |
+ |
+ // Determine the best way to compress this. |
+ vector<char> new_data_bz; |
+ TEST_AND_RETURN_FALSE(BzipCompress(new_data, &new_data_bz)); |
+ CHECK(!new_data_bz.empty()); |
+ |
+ DeltaArchiveManifest_InstallOperation op; |
+ size_t current_best_size = 0; |
+ if (new_data.size() <= new_data_bz.size()) { |
+ op.set_type(DeltaArchiveManifest_InstallOperation_Type_REPLACE); |
+ current_best_size = new_data.size(); |
+ data = new_data; |
+ } else { |
+ op.set_type(DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ); |
+ current_best_size = new_data_bz.size(); |
+ data = new_data_bz; |
+ } |
+ |
+ if (old_data == new_data) { |
+ // No change in data. |
+ op.set_type(DeltaArchiveManifest_InstallOperation_Type_MOVE); |
+ current_best_size = 0; |
+ data.clear(); |
+ } else { |
+ // Try bsdiff of old to new data |
+ vector<char> bsdiff_delta; |
+ TEST_AND_RETURN_FALSE(ComputeMetadataBsdiff(old_data, |
+ new_data, |
+ &bsdiff_delta)); |
+ CHECK_GT(bsdiff_delta.size(), 0); |
+ |
+ if (bsdiff_delta.size() < current_best_size) { |
+ op.set_type(DeltaArchiveManifest_InstallOperation_Type_BSDIFF); |
+ current_best_size = bsdiff_delta.size(); |
+ data = bsdiff_delta; |
+ } |
+ } |
+ |
+ CHECK_EQ(data.size(), current_best_size); |
+ |
+ // Set the source and dest extents to be the same since the filesystem |
+ // structures are identical |
+ if (op.type() == DeltaArchiveManifest_InstallOperation_Type_MOVE || |
+ op.type() == DeltaArchiveManifest_InstallOperation_Type_BSDIFF) { |
+ DeltaDiffGenerator::StoreExtents(extents, op.mutable_src_extents()); |
+ op.set_src_length(old_data.size()); |
+ } |
+ |
+ DeltaDiffGenerator::StoreExtents(extents, op.mutable_dst_extents()); |
+ op.set_dst_length(new_data.size()); |
+ |
+ // Write data to output file |
+ if (op.type() != DeltaArchiveManifest_InstallOperation_Type_MOVE) { |
+ op.set_data_offset(*data_file_size); |
+ op.set_data_length(data.size()); |
+ } |
+ |
+ TEST_AND_RETURN_FALSE(utils::WriteAll(data_fd, &data[0], data.size())); |
+ *data_file_size += data.size(); |
+ |
+ // Now, insert into graph and blocks vector |
+ graph->resize(graph->size() + 1); |
+ Vertex::Index vertex = graph->size() - 1; |
+ (*graph)[vertex].op = op; |
+ CHECK((*graph)[vertex].op.has_type()); |
+ (*graph)[vertex].file_name = metadata_name; |
+ |
+ TEST_AND_RETURN_FALSE(AddInstallOpToBlocksVector((*graph)[vertex].op, |
+ blocks, |
+ *graph, |
+ vertex)); |
+ |
+ return true; |
+} |
+ |
+// Reads the file system metadata extents. |
+bool ReadFilesystemMetadata(Graph* graph, |
+ vector<Block>* blocks, |
+ const ext2_filsys fs_old, |
+ const ext2_filsys fs_new, |
+ int data_fd, |
+ off_t* data_file_size) { |
+ LOG(INFO) << "Processing <rootfs-metadata>"; |
+ |
+ // Read all the extents that belong to the main file system metadata. |
+ // The metadata blocks are at the the start of each block group and goes |
petkov
2010/12/10 00:46:55
typo: the the
thieule
2010/12/14 23:11:21
Done.
|
+ // until the end of the inode table. |
+ for (dgrp_t bg = 0; bg < fs_old->group_desc_count; bg++) { |
+ struct ext2_group_desc* group_desc = &fs_old->group_desc[bg]; |
+ __u32 num_metadata_blocks = (group_desc->bg_inode_table + |
+ fs_old->inode_blocks_per_group) - |
+ (bg * fs_old->super->s_blocks_per_group); |
+ __u32 bg_start_block = bg * fs_old->super->s_blocks_per_group; |
+ |
+ // Due to bsdiff slowness, we're going to break each block group down |
petkov
2010/12/10 00:46:55
would it be better for the payload size to not bre
thieule
2010/12/14 23:11:21
I ran this without breaking up into chunks and bsd
|
+ // into metadata chunks and feed them to bsdiff. |
+ __u32 num_chunks = 4; |
+ __u32 blocks_per_chunk = num_metadata_blocks / num_chunks; |
+ __u32 curr_block = bg_start_block; |
+ for (__u32 chunk = 0; chunk < num_chunks; chunk++) { |
+ |
petkov
2010/12/10 00:46:55
remove blank line
thieule
2010/12/14 23:11:21
Done.
|
+ __u32 end_block = 0; |
+ if (chunk < num_chunks - 1) { |
+ end_block = curr_block + blocks_per_chunk; |
+ } else { |
+ end_block = bg_start_block + num_metadata_blocks; |
+ } |
+ |
+ vector<Extent> extents; |
+ for (__u32 i = curr_block; i < end_block; i++) { |
petkov
2010/12/10 00:46:55
there's an ExtentForRange utility in extent_ranges
thieule
2010/12/14 23:11:21
Done.
|
+ graph_utils::AppendBlockToExtents(&extents, i); |
+ } |
+ |
+ std::stringstream metadata_name; |
+ metadata_name << "<rootfs-bg-" << bg << "-" << chunk << "-metadata>"; |
petkov
2010/12/10 00:46:55
maybe use StringPrintf instead?
thieule
2010/12/14 23:11:21
Done.
|
+ |
+ LOG(INFO) << "Processing " << metadata_name.str(); |
+ |
+ TEST_AND_RETURN_FALSE(AddMetadataExtents(graph, |
+ blocks, |
+ fs_old, |
+ fs_new, |
+ metadata_name.str(), |
+ extents, |
+ data_fd, |
+ data_file_size)); |
+ |
+ curr_block += blocks_per_chunk; |
+ } |
+ } |
+ |
+ return true; |
+} |
+ |
+// Processes all blocks belonging to an inode |
+int ProcessInodeAllBlocks(ext2_filsys fs, |
+ blk_t* blocknr, |
+ e2_blkcnt_t blockcnt, |
+ blk_t ref_blk, |
+ int ref_offset, |
+ void* priv) { |
+ vector<Extent>* extents = static_cast<vector<Extent>*>(priv); |
+ graph_utils::AppendBlockToExtents(extents, *blocknr); |
+ return 0; |
+} |
+ |
+// Processes only indirect, double indirect or triple indirect metadata |
+// blocks belonging to an inode |
+int ProcessInodeMetadataBlocks(ext2_filsys fs, |
+ blk_t* blocknr, |
+ e2_blkcnt_t blockcnt, |
+ blk_t ref_blk, |
+ int ref_offset, |
+ void* priv) { |
+ vector<Extent>* extents = static_cast<vector<Extent>*>(priv); |
+ if (blockcnt < 0) { |
+ graph_utils::AppendBlockToExtents(extents, *blocknr); |
+ } |
+ return 0; |
+} |
+ |
+// Read inode metadata blocks. |
+bool ReadInodeMetadata(Graph* graph, |
+ vector<Block>* blocks, |
+ const ext2_filsys fs_old, |
+ const ext2_filsys fs_new, |
+ int data_fd, |
+ off_t* data_file_size) { |
+ TEST_AND_RETURN_FALSE_ERRCODE(ext2fs_read_inode_bitmap(fs_old)); |
+ |
+ TEST_AND_RETURN_FALSE_ERRCODE(ext2fs_read_inode_bitmap(fs_new)); |
+ |
+ ext2_inode_scan iscan; |
+ TEST_AND_RETURN_FALSE_ERRCODE(ext2fs_open_inode_scan(fs_old, 0, &iscan)); |
+ |
+ ext2_ino_t ino; |
+ ext2_inode old_inode; |
+ while (true) { |
+ // Get the next inode on both file systems |
+ errcode_t error = ext2fs_get_next_inode(iscan, &ino, &old_inode); |
+ if (error) { |
+ LOG(ERROR) << "Failed to retrieve next inode (" << error << ")"; |
+ break; |
+ } |
+ |
+ if (ino == 0) { |
+ break; |
+ } |
+ |
+ if (ino == EXT2_RESIZE_INO) { |
+ continue; |
+ } |
+ |
+ ext2_inode new_inode; |
+ error = ext2fs_read_inode(fs_new, ino, &new_inode); |
+ if (error) { |
+ LOG(ERROR) << "Failed to retrieve new inode (" << error << ")"; |
+ continue; |
+ } |
+ |
+ // Skip inodes that are not in use |
+ if (!ext2fs_test_inode_bitmap(fs_old->inode_map, ino) || |
+ !ext2fs_test_inode_bitmap(fs_new->inode_map, ino)) { |
+ continue; |
+ } |
+ |
+ // Skip inodes that have no data blocks |
+ if (old_inode.i_blocks == 0 || new_inode.i_blocks == 0) { |
+ continue; |
+ } |
+ |
+ // Skip inodes that are not the same type |
+ bool is_old_dir = (ext2fs_check_directory(fs_old, ino) == 0); |
+ bool is_new_dir = (ext2fs_check_directory(fs_new, ino) == 0); |
+ if (is_old_dir != is_new_dir) { |
+ continue; |
+ } |
+ |
+ // Process the inodes metadata blocks |
+ // For normal files, metadata blocks are indirect, double indirect |
+ // and triple indirect blocks (no data blocks). For directories and |
+ // the journal, all blocks are considered metadata blocks. |
+ LOG(INFO) << "Processing inode " << ino << " metadata"; |
+ |
+ bool all_blocks = ((ino == EXT2_JOURNAL_INO) || is_old_dir || is_new_dir); |
+ |
+ vector<Extent> old_extents; |
+ error = ext2fs_block_iterate2(fs_old, ino, 0, NULL, |
+ all_blocks ? ProcessInodeAllBlocks : |
+ ProcessInodeMetadataBlocks, |
+ &old_extents); |
+ if (error) { |
+ LOG(ERROR) << "Failed to enumerate old inode " << ino |
+ << " blocks (" << error << ")"; |
+ continue; |
+ } |
+ |
+ vector<Extent> new_extents; |
+ error = ext2fs_block_iterate2(fs_new, ino, 0, NULL, |
+ all_blocks ? ProcessInodeAllBlocks : |
+ ProcessInodeMetadataBlocks, |
+ &new_extents); |
+ if (error) { |
+ LOG(ERROR) << "Failed to enumerate new inode " << ino |
+ << " blocks (" << error << ")"; |
+ continue; |
+ } |
+ |
+ // Skip inode if there are no metadata blocks |
+ if (old_extents.size() == 0 || new_extents.size() == 0) { |
+ continue; |
+ } |
+ |
+ // Make sure the two inodes have the same metadata blocks |
+ if (old_extents.size() != new_extents.size()) { |
+ continue; |
+ } |
+ |
+ bool same_metadata_extents = true; |
+ vector<Extent>::iterator it_old; |
+ vector<Extent>::iterator it_new; |
+ for (it_old = old_extents.begin(), |
+ it_new = new_extents.begin(); |
+ it_old != old_extents.end() && it_new != new_extents.end(); |
+ it_old++, it_new++) { |
+ if (it_old->start_block() != it_new->start_block() || |
+ it_old->num_blocks() != it_new->num_blocks()) { |
+ same_metadata_extents = false; |
+ break; |
+ } |
+ } |
+ |
+ if (!same_metadata_extents) { |
+ continue; |
+ } |
+ |
+ // We have identical inode metadata blocks, we can now add them to |
+ // our graph and blocks vector |
+ std::stringstream metadata_name; |
+ metadata_name << "<rootfs-inode-" << ino << "-metadata>"; |
+ TEST_AND_RETURN_FALSE(AddMetadataExtents(graph, |
+ blocks, |
+ fs_old, |
+ fs_new, |
+ metadata_name.str(), |
+ old_extents, |
+ data_fd, |
+ data_file_size)); |
+ } |
+ |
+ ext2fs_close_inode_scan(iscan); |
+ |
+ return true; |
+} |
+ |
// This class allocates non-existent temp blocks, starting from |
// kTempBlockStart. Other code is responsible for converting these |
// temp blocks into real blocks, as the client can't read or write to |
@@ -620,6 +1009,52 @@ bool DeltaDiffGenerator::ReadFileToDiff( |
return true; |
} |
+bool DeltaDiffGenerator::DeltaReadMetadata(Graph* graph, |
+ vector<Block>* blocks, |
+ const string& old_image, |
+ const string& new_image, |
+ int data_fd, |
+ off_t* data_file_size) { |
+ // Open the two file systems. |
+ ext2_filsys fs_old; |
+ TEST_AND_RETURN_FALSE_ERRCODE(ext2fs_open(old_image.c_str(), 0, 0, 0, |
+ unix_io_manager, &fs_old)); |
+ ScopedExt2fsCloser fs_old_closer(fs_old); |
+ |
+ ext2_filsys fs_new; |
+ TEST_AND_RETURN_FALSE_ERRCODE(ext2fs_open(new_image.c_str(), 0, 0, 0, |
+ unix_io_manager, &fs_new)); |
+ ScopedExt2fsCloser fs_new_closer(fs_new); |
+ |
+ // Make sure these two file systems are the same. |
petkov
2010/12/10 00:46:55
clarify that if they aren't the same we'll just se
thieule
2010/12/14 23:11:21
Done.
|
+ if (fs_old->blocksize != fs_new->blocksize || |
+ fs_old->fragsize != fs_new->fragsize || |
+ fs_old->group_desc_count != fs_new->group_desc_count || |
+ fs_old->inode_blocks_per_group != fs_new->inode_blocks_per_group || |
+ fs_old->super->s_inodes_count != fs_new->super->s_inodes_count || |
+ fs_old->super->s_blocks_count != fs_new->super->s_blocks_count) { |
+ return true; |
+ } |
+ |
+ // Process the main file system metadata (superblock, inode tables, etc) |
+ TEST_AND_RETURN_FALSE(ReadFilesystemMetadata(graph, |
+ blocks, |
+ fs_old, |
+ fs_new, |
+ data_fd, |
+ data_file_size)); |
+ |
+ // Process each inode metadata blocks. |
+ TEST_AND_RETURN_FALSE(ReadInodeMetadata(graph, |
+ blocks, |
+ fs_old, |
+ fs_new, |
+ data_fd, |
+ data_file_size)); |
+ |
+ return true; |
+} |
+ |
bool DeltaDiffGenerator::InitializePartitionInfo(bool is_kernel, |
const string& partition, |
PartitionInfo* info) { |
@@ -1390,6 +1825,16 @@ bool DeltaDiffGenerator::GenerateDeltaUpdateFile( |
LOG(INFO) << "done reading normal files"; |
CheckGraph(graph); |
+ LOG(INFO) << "Starting metadata processing"; |
+ TEST_AND_RETURN_FALSE(DeltaReadMetadata(&graph, |
+ &blocks, |
+ old_image, |
+ new_image, |
+ fd, |
+ &data_file_size)); |
+ LOG(INFO) << "Done metadata processing"; |
+ CheckGraph(graph); |
+ |
graph.resize(graph.size() + 1); |
TEST_AND_RETURN_FALSE(ReadUnwrittenBlocks(blocks, |
fd, |