Index: delta_diff_generator.cc |
diff --git a/delta_diff_generator.cc b/delta_diff_generator.cc |
index 03b59d937bb06608486d2b48bfe1ac1ae9a1ff4f..9cd1e48e54d972e8c83b614c5d358df485fee49c 100644 |
--- a/delta_diff_generator.cc |
+++ b/delta_diff_generator.cc |
@@ -40,6 +40,7 @@ using std::make_pair; |
using std::map; |
using std::max; |
using std::min; |
+using std::pair; |
using std::set; |
using std::string; |
using std::vector; |
@@ -923,6 +924,162 @@ bool TempBlocksExistInExtents(const T& extents) { |
return false; |
} |
+bool ConvertCutsToFull( |
petkov
2010/10/23 04:52:52
Might be useful to add a docstring.
|
+ Graph* graph, |
+ const string& new_root, |
+ int data_fd, |
+ off_t* data_file_size, |
+ vector<Vertex::Index>* op_indexes, |
+ vector<vector<Vertex::Index>::size_type>* reverse_op_indexes, |
+ const vector<CutEdgeVertexes>& cuts) { |
+ CHECK(!cuts.empty()); |
+ set<Vertex::Index> deleted_nodes; |
+ for (vector<CutEdgeVertexes>::const_iterator it = cuts.begin(), |
+ e = cuts.end(); it != e; ++it) { |
+ TEST_AND_RETURN_FALSE(DeltaDiffGenerator::ConvertCutToFullOp( |
+ graph, |
+ *it, |
+ new_root, |
+ data_fd, |
+ data_file_size)); |
+ deleted_nodes.insert(it->new_vertex); |
+ } |
+ deleted_nodes.insert(cuts[0].old_dst); |
+ |
+ vector<Vertex::Index> new_op_indexes; |
+ new_op_indexes.reserve(op_indexes->size()); |
+ for (vector<Vertex::Index>::iterator it = op_indexes->begin(), |
+ e = op_indexes->end(); it != e; ++it) { |
+ if (utils::SetContainsKey(deleted_nodes, *it)) |
+ continue; |
+ new_op_indexes.push_back(*it); |
+ } |
+ new_op_indexes.push_back(cuts[0].old_dst); |
+ op_indexes->swap(new_op_indexes); |
+ DeltaDiffGenerator::GenerateReverseTopoOrderMap(*op_indexes, |
+ reverse_op_indexes); |
+ return true; |
+} |
+ |
+// All cuts must have the same old_dst. |
+bool AssignBlockForAdjoiningCuts( |
petkov
2010/10/23 04:52:52
A docstring might be useful. Also, it might make s
|
+ Graph* graph, |
+ const string& new_root, |
+ int data_fd, |
+ off_t* data_file_size, |
+ vector<Vertex::Index>* op_indexes, |
+ vector<vector<Vertex::Index>::size_type>* reverse_op_indexes, |
+ const vector<CutEdgeVertexes>& cuts) { |
+ CHECK(!cuts.empty()); |
+ const Vertex::Index old_dst = cuts[0].old_dst; |
+ // Calculate # of blocks needed |
+ uint64_t blocks_needed = 0; |
+ map<const CutEdgeVertexes*, uint64_t> cuts_blocks_needed; |
+ for (vector<CutEdgeVertexes>::const_iterator it = cuts.begin(), |
+ e = cuts.end(); it != e; ++it) { |
+ uint64_t cut_blocks_needed = 0; |
+ for (vector<Extent>::const_iterator jt = it->tmp_extents.begin(), |
+ je = it->tmp_extents.end(); jt != je; ++jt) { |
+ cut_blocks_needed += jt->num_blocks(); |
+ } |
+ blocks_needed += cut_blocks_needed; |
+ cuts_blocks_needed[&*it] = cut_blocks_needed; |
+ } |
+ LOG(INFO) << "Need to find " << blocks_needed << " blocks for " |
+ << cuts.size() << " cuts"; |
+ |
+ // Find enough blocks |
+ ExtentRanges scratch_ranges; |
+ // Each block that's supplying temp blocks and the corresponding blocks: |
+ typedef vector<pair<Vertex::Index, ExtentRanges> > SupplierVector; |
+ SupplierVector block_suppliers; |
+ uint64_t scratch_blocks_found = 0; |
+ LOG(INFO) << "scan from " << (*reverse_op_indexes)[old_dst] + 1 |
+ << " to " << op_indexes->size(); |
+ for (vector<Vertex::Index>::size_type i = (*reverse_op_indexes)[old_dst] + 1, |
+ e = op_indexes->size(); i < e; ++i) { |
+ Vertex::Index test_node = (*op_indexes)[i]; |
+ if (!(*graph)[test_node].valid) |
+ continue; |
+ // See if this node has sufficient blocks |
+ ExtentRanges ranges; |
+ ranges.AddRepeatedExtents((*graph)[test_node].op.dst_extents()); |
+ ranges.SubtractExtent(ExtentForRange( |
+ kTempBlockStart, kSparseHole - kTempBlockStart)); |
+ ranges.SubtractRepeatedExtents((*graph)[test_node].op.src_extents()); |
+ // For now, for simplicity, subtract out all blocks in read-before |
+ // dependencies. |
+ for (Vertex::EdgeMap::const_iterator edge_i = |
+ (*graph)[test_node].out_edges.begin(), |
+ edge_e = (*graph)[test_node].out_edges.end(); |
+ edge_i != edge_e; ++edge_i) { |
+ ranges.SubtractExtents(edge_i->second.extents); |
+ } |
+ if (ranges.blocks() == 0) |
+ continue; |
+ |
+ if (ranges.blocks() + scratch_blocks_found > blocks_needed) { |
+ // trim down ranges |
+ vector<Extent> new_ranges = ranges.GetExtentsForBlockCount( |
+ blocks_needed - scratch_blocks_found); |
+ ranges = ExtentRanges(); |
+ ranges.AddExtents(new_ranges); |
+ } |
+ scratch_ranges.AddRanges(ranges); |
+ block_suppliers.push_back(make_pair(test_node, ranges)); |
+ scratch_blocks_found += ranges.blocks(); |
+ LOG(INFO) << "Adding " << ranges.blocks() << " blocks. Now have " |
+ << scratch_ranges.blocks(); |
+ if (scratch_ranges.blocks() >= blocks_needed) |
+ break; |
+ } |
+ if (scratch_ranges.blocks() < blocks_needed) { |
+ LOG(INFO) << "Unable to find sufficient scratch"; |
+ TEST_AND_RETURN_FALSE(ConvertCutsToFull(graph, |
+ new_root, |
+ data_fd, |
+ data_file_size, |
+ op_indexes, |
+ reverse_op_indexes, |
+ cuts)); |
+ return true; |
+ } |
+ // Use the scratch we found |
+ TEST_AND_RETURN_FALSE(scratch_ranges.blocks() == scratch_blocks_found); |
+ |
+ // Make all the suppliers depend on this node |
+ for (SupplierVector::iterator it = block_suppliers.begin(), |
+ e = block_suppliers.end(); it != e; ++it) { |
+ graph_utils::AddReadBeforeDepExtents( |
+ &(*graph)[it->first], |
+ old_dst, |
+ it->second.GetExtentsForBlockCount(it->second.blocks())); |
+ } |
+ |
+ // Replace temp blocks in each cut |
+ for (vector<CutEdgeVertexes>::const_iterator it = cuts.begin(), |
+ e = cuts.end(); it != e; ++it) { |
+ vector<Extent> real_extents = |
+ scratch_ranges.GetExtentsForBlockCount(cuts_blocks_needed[&*it]); |
+ scratch_ranges.SubtractExtents(real_extents); |
+ |
+ // Fix the old dest node w/ the real blocks |
+ DeltaDiffGenerator::SubstituteBlocks(&(*graph)[old_dst], |
+ it->tmp_extents, |
+ real_extents); |
+ |
+ // Fix the new node w/ the real blocks. Since the new node is just a |
+ // copy operation, we can replace all the dest extents w/ the real |
+ // blocks. |
+ DeltaArchiveManifest_InstallOperation *op = |
+ &(*graph)[it->new_vertex].op; |
+ op->clear_dst_extents(); |
+ DeltaDiffGenerator::StoreExtents(real_extents, op->mutable_dst_extents()); |
+ } |
+ LOG(INFO) << "Done subbing in for cut set"; |
+ return true; |
+} |
+ |
} // namespace {} |
// Returns true if |op| is a no-op operation that doesn't do any useful work |
@@ -940,119 +1097,47 @@ bool DeltaDiffGenerator::AssignTempBlocks( |
off_t* data_file_size, |
vector<Vertex::Index>* op_indexes, |
vector<vector<Vertex::Index>::size_type>* reverse_op_indexes, |
- vector<CutEdgeVertexes>& cuts) { |
+ const vector<CutEdgeVertexes>& cuts) { |
CHECK(!cuts.empty()); |
+ |
+ // group of cuts w/ the same old_dst: |
+ vector<CutEdgeVertexes> cuts_group; |
+ |
for (vector<CutEdgeVertexes>::size_type i = cuts.size() - 1, e = 0; |
true ; --i) { |
LOG(INFO) << "Fixing temp blocks in cut " << i |
<< ": old dst: " << cuts[i].old_dst << " new vertex: " |
- << cuts[i].new_vertex; |
- const uint64_t blocks_needed = |
- graph_utils::BlocksInExtents(cuts[i].tmp_extents); |
- LOG(INFO) << "Scanning for usable blocks (" << blocks_needed << " needed)"; |
- // For now, just look for a single op w/ sufficient blocks, not |
- // considering blocks from outgoing read-before deps. |
- Vertex::Index node = cuts[i].old_dst; |
- DeltaArchiveManifest_InstallOperation_Type node_type = |
- (*graph)[node].op.type(); |
- if (node_type == DeltaArchiveManifest_InstallOperation_Type_REPLACE || |
- node_type == DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ) { |
- LOG(INFO) << "This was already converted to full, so skipping."; |
- // Delete the temp node and pointer to it from old src |
- if (!(*graph)[cuts[i].old_src].out_edges.erase(cuts[i].new_vertex)) { |
- LOG(INFO) << "Odd. node " << cuts[i].old_src << " didn't point to " |
- << cuts[i].new_vertex; |
- } |
- (*graph)[cuts[i].new_vertex].valid = false; |
- vector<Vertex::Index>::size_type new_topo_idx = |
- (*reverse_op_indexes)[cuts[i].new_vertex]; |
- op_indexes->erase(op_indexes->begin() + new_topo_idx); |
- GenerateReverseTopoOrderMap(*op_indexes, reverse_op_indexes); |
- continue; |
- } |
- bool found_node = false; |
- for (vector<Vertex::Index>::size_type j = (*reverse_op_indexes)[node] + 1, |
- je = op_indexes->size(); j < je; ++j) { |
- Vertex::Index test_node = (*op_indexes)[j]; |
- // See if this node has sufficient blocks |
- ExtentRanges ranges; |
- ranges.AddRepeatedExtents((*graph)[test_node].op.dst_extents()); |
- ranges.SubtractExtent(ExtentForRange( |
- kTempBlockStart, kSparseHole - kTempBlockStart)); |
- ranges.SubtractRepeatedExtents((*graph)[test_node].op.src_extents()); |
- // For now, for simplicity, subtract out all blocks in read-before |
- // dependencies. |
- for (Vertex::EdgeMap::const_iterator edge_i = |
- (*graph)[test_node].out_edges.begin(), |
- edge_e = (*graph)[test_node].out_edges.end(); |
- edge_i != edge_e; ++edge_i) { |
- ranges.SubtractExtents(edge_i->second.extents); |
- } |
+ << cuts[i].new_vertex << " path: " |
+ << (*graph)[cuts[i].old_dst].file_name; |
- uint64_t blocks_found = ranges.blocks(); |
- if (blocks_found < blocks_needed) { |
- if (blocks_found > 0) |
- LOG(INFO) << "insufficient blocks found in topo node " << j |
- << " (node " << (*op_indexes)[j] << "). Found only " |
- << blocks_found; |
- continue; |
- } |
- found_node = true; |
- LOG(INFO) << "Found sufficient blocks in topo node " << j |
- << " (node " << (*op_indexes)[j] << ")"; |
- // Sub in the blocks, and make the node supplying the blocks |
- // depend on old_dst. |
- vector<Extent> real_extents = |
- ranges.GetExtentsForBlockCount(blocks_needed); |
- |
- // Fix the old dest node w/ the real blocks |
- SubstituteBlocks(&(*graph)[node], |
- cuts[i].tmp_extents, |
- real_extents); |
- |
- // Fix the new node w/ the real blocks. Since the new node is just a |
- // copy operation, we can replace all the dest extents w/ the real |
- // blocks. |
- DeltaArchiveManifest_InstallOperation *op = |
- &(*graph)[cuts[i].new_vertex].op; |
- op->clear_dst_extents(); |
- StoreExtents(real_extents, op->mutable_dst_extents()); |
- |
- // Add an edge from the real-block supplier to the old dest block. |
- graph_utils::AddReadBeforeDepExtents(&(*graph)[test_node], |
- node, |
- real_extents); |
- break; |
+ if (cuts_group.empty() || (cuts_group[0].old_dst == cuts[i].old_dst)) { |
+ cuts_group.push_back(cuts[i]); |
+ } else { |
+ CHECK(!cuts_group.empty()); |
+ TEST_AND_RETURN_FALSE(AssignBlockForAdjoiningCuts(graph, |
+ new_root, |
+ data_fd, |
+ data_file_size, |
+ op_indexes, |
+ reverse_op_indexes, |
+ cuts_group)); |
+ cuts_group.clear(); |
+ cuts_group.push_back(cuts[i]); |
} |
- if (!found_node) { |
- // convert to full op |
- LOG(WARNING) << "Failed to find enough temp blocks for cut " << i |
- << " with old dest (graph node " << node |
- << "). Converting to a full op, at the expense of a " |
- << "good compression ratio."; |
- TEST_AND_RETURN_FALSE(ConvertCutToFullOp(graph, |
- cuts[i], |
- new_root, |
- data_fd, |
- data_file_size)); |
- // move the full op to the back |
- vector<Vertex::Index> new_op_indexes; |
- for (vector<Vertex::Index>::const_iterator iter_i = op_indexes->begin(), |
- iter_e = op_indexes->end(); iter_i != iter_e; ++iter_i) { |
- if ((*iter_i == cuts[i].old_dst) || (*iter_i == cuts[i].new_vertex)) |
- continue; |
- new_op_indexes.push_back(*iter_i); |
- } |
- new_op_indexes.push_back(cuts[i].old_dst); |
- op_indexes->swap(new_op_indexes); |
- GenerateReverseTopoOrderMap(*op_indexes, reverse_op_indexes); |
- } |
if (i == e) { |
// break out of for() loop |
break; |
} |
} |
+ CHECK(!cuts_group.empty()); |
+ TEST_AND_RETURN_FALSE(AssignBlockForAdjoiningCuts(graph, |
+ new_root, |
+ data_fd, |
+ data_file_size, |
+ op_indexes, |
+ reverse_op_indexes, |
+ cuts_group)); |
return true; |
} |
@@ -1132,29 +1217,35 @@ bool DeltaDiffGenerator::ConvertCutToFullOp(Graph* graph, |
// Drop all incoming edges, keep all outgoing edges |
// Keep all outgoing edges |
- Vertex::EdgeMap out_edges = (*graph)[cut.old_dst].out_edges; |
- graph_utils::DropWriteBeforeDeps(&out_edges); |
+ if ((*graph)[cut.old_dst].op.type() != |
+ DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ && |
+ (*graph)[cut.old_dst].op.type() != |
+ DeltaArchiveManifest_InstallOperation_Type_REPLACE) { |
+ Vertex::EdgeMap out_edges = (*graph)[cut.old_dst].out_edges; |
+ graph_utils::DropWriteBeforeDeps(&out_edges); |
- TEST_AND_RETURN_FALSE(DeltaReadFile(graph, |
- cut.old_dst, |
- NULL, |
- "/-!@:&*nonexistent_path", |
- new_root, |
- (*graph)[cut.old_dst].file_name, |
- data_fd, |
- data_file_size)); |
+ TEST_AND_RETURN_FALSE(DeltaReadFile(graph, |
+ cut.old_dst, |
+ NULL, |
+ "/-!@:&*nonexistent_path", |
+ new_root, |
+ (*graph)[cut.old_dst].file_name, |
+ data_fd, |
+ data_file_size)); |
- (*graph)[cut.old_dst].out_edges = out_edges; |
+ (*graph)[cut.old_dst].out_edges = out_edges; |
- // Right now we don't have doubly-linked edges, so we have to scan |
- // the whole graph. |
- graph_utils::DropIncomingEdgesTo(graph, cut.old_dst); |
+ // Right now we don't have doubly-linked edges, so we have to scan |
+ // the whole graph. |
+ graph_utils::DropIncomingEdgesTo(graph, cut.old_dst); |
+ } |
// Delete temp node |
(*graph)[cut.old_src].out_edges.erase(cut.new_vertex); |
CHECK((*graph)[cut.old_dst].out_edges.find(cut.new_vertex) == |
(*graph)[cut.old_dst].out_edges.end()); |
(*graph)[cut.new_vertex].valid = false; |
+ LOG(INFO) << "marked node invalid: " << cut.new_vertex; |
return true; |
} |
@@ -1191,6 +1282,8 @@ bool DeltaDiffGenerator::ConvertGraphToDag(Graph* graph, |
vector<vector<Vertex::Index>::size_type> inverse_final_order; |
GenerateReverseTopoOrderMap(*final_order, &inverse_final_order); |
+ SortCutsByTopoOrder(*final_order, &cuts); |
+ |
if (!cuts.empty()) |
TEST_AND_RETURN_FALSE(AssignTempBlocks(graph, |
new_root, |
@@ -1200,6 +1293,7 @@ bool DeltaDiffGenerator::ConvertGraphToDag(Graph* graph, |
&inverse_final_order, |
cuts)); |
LOG(INFO) << "Making sure all temp blocks have been allocated"; |
+ |
graph_utils::DumpGraph(*graph); |
CHECK(NoTempBlocksRemain(*graph)); |
LOG(INFO) << "done making sure all temp blocks are allocated"; |
@@ -1423,8 +1517,8 @@ bool DeltaDiffGenerator::GenerateDeltaUpdateFile( |
i - manifest.install_operations_size()); |
if (op->has_data_offset()) { |
if (op->data_offset() != next_blob_offset) { |
- LOG(FATAL) << "bad blob offset! " << op->data_offset() << " != " |
- << next_blob_offset; |
+ LOG(WARNING) << "bad blob offset! " << op->data_offset() << " != " |
petkov
2010/10/23 04:52:52
why is this not FATAL any more?
|
+ << next_blob_offset; |
} |
next_blob_offset += op->data_length(); |
} |