Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(116)

Unified Diff: ui/base/resource/data_pack.cc

Issue 2969123002: Add deduplication logic to .pak files (Closed)
Patch Set: fix resource_sizes computation Created 3 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: ui/base/resource/data_pack.cc
diff --git a/ui/base/resource/data_pack.cc b/ui/base/resource/data_pack.cc
index 3d07d985a2a7acb35e10dab926e17cc515fafa57..299b5d13b5d2f3c76ed55fed337b3c663994256f 100644
--- a/ui/base/resource/data_pack.cc
+++ b/ui/base/resource/data_pack.cc
@@ -24,31 +24,10 @@
namespace {
-static const uint32_t kFileFormatVersion = 4;
-// Length of file header: version, entry count and text encoding type.
-static const size_t kHeaderLength = 2 * sizeof(uint32_t) + sizeof(uint8_t);
-
-#pragma pack(push, 2)
-struct DataPackEntry {
- uint16_t resource_id;
- uint32_t file_offset;
-
- static int CompareById(const void* void_key, const void* void_entry) {
- uint16_t key = *reinterpret_cast<const uint16_t*>(void_key);
- const DataPackEntry* entry =
- reinterpret_cast<const DataPackEntry*>(void_entry);
- if (key < entry->resource_id) {
- return -1;
- } else if (key > entry->resource_id) {
- return 1;
- } else {
- return 0;
- }
- }
-};
-#pragma pack(pop)
-
-static_assert(sizeof(DataPackEntry) == 6, "size of entry must be six");
+static const uint32_t kFileFormatVersionWithoutAliases = 4;
+static const uint32_t kFileFormatVersionWithAliases = 5;
+static const size_t kHeaderLengthV4 = 2 * sizeof(uint32_t) + sizeof(uint8_t);
+static const size_t kHeaderLengthV5 = 4 * sizeof(uint16_t);
flackr 2017/07/07 18:54:12 This seems to be masquerading the actual data it r
agrieve 2017/07/07 20:47:08 I think it's actually simpler to just say encoding
// We're crashing when trying to load a pak file on Windows. Add some error
// codes for logging.
@@ -102,6 +81,30 @@ void MaybePrintResourceId(uint16_t resource_id) {
namespace ui {
+#pragma pack(push, 2)
+struct DataPack::Entry {
+ uint16_t resource_id;
+ uint32_t file_offset;
+
+ static int CompareById(const void* void_key, const void* void_entry) {
+ uint16_t key = *reinterpret_cast<const uint16_t*>(void_key);
+ const Entry* entry = reinterpret_cast<const Entry*>(void_entry);
+ return key - entry->resource_id;
+ }
+};
+
+struct DataPack::Alias {
+ uint16_t resource_id;
+ uint16_t entry_index;
+
+ static int CompareById(const void* void_key, const void* void_entry) {
+ uint16_t key = *reinterpret_cast<const uint16_t*>(void_key);
+ const Alias* entry = reinterpret_cast<const Alias*>(void_entry);
+ return key - entry->resource_id;
+ }
+};
+#pragma pack(pop)
+
// Abstraction of a data source (memory mapped file or in-memory buffer).
class DataPack::DataSource {
public:
@@ -149,9 +152,15 @@ class DataPack::BufferDataSource : public DataPack::DataSource {
};
DataPack::DataPack(ui::ScaleFactor scale_factor)
- : resource_count_(0),
+ : resource_table_(nullptr),
+ resource_count_(0),
+ alias_table_(nullptr),
+ alias_count_(0),
text_encoding_type_(BINARY),
scale_factor_(scale_factor) {
+ // Static assert must be within a DataPack member to appease visiblity rules.
+ static_assert(sizeof(Entry) == 6, "size of Entry must be 6");
+ static_assert(sizeof(Alias) == 4, "size of Alias must be 4");
}
DataPack::~DataPack() {
@@ -193,29 +202,37 @@ bool DataPack::LoadFromBuffer(base::StringPiece buffer) {
}
bool DataPack::LoadImpl(std::unique_ptr<DataPack::DataSource> data_source) {
- // Sanity check the header of the file.
- if (kHeaderLength > data_source->GetLength()) {
+ // Sanity check the header of the file. There will be at least one entry in
+ // the resource table, so pick the larger of the two versions.
+ if (data_source->GetLength() < kHeaderLengthV4) {
flackr 2017/07/07 18:54:12 This means we don't get HEADER_TRUNCATED for an 8
agrieve 2017/07/07 20:47:09 I think the main point of this check is just to en
flackr 2017/07/18 20:21:23 It does mean that on a truncated v4 file with just
agrieve 2017/07/19 02:34:04 Done.
DLOG(ERROR) << "Data pack file corruption: incomplete file header.";
LogDataPackError(HEADER_TRUNCATED);
return false;
}
// Parse the header of the file.
- // First uint32_t: version; second: resource count;
- const uint32_t* ptr =
- reinterpret_cast<const uint32_t*>(data_source->GetData());
- uint32_t version = ptr[0];
- if (version != kFileFormatVersion) {
+ const uint8_t* data = data_source->GetData();
+ int version = data[0];
flackr 2017/07/07 18:54:12 I think we should keep the version length the same
agrieve 2017/07/07 20:47:08 I'm not sure what your concern is here. The code a
flackr 2017/07/10 14:07:49 I'm worried it's bad practice to shorten the versi
agrieve 2017/07/18 19:30:29 Yeah, I suppose a few bytes per-file isn't worth c
+ size_t header_length;
+ if (version == kFileFormatVersionWithoutAliases) {
+ resource_count_ = reinterpret_cast<const uint32_t*>(data)[1];
+ alias_count_ = 0;
+ text_encoding_type_ = static_cast<TextEncodingType>(data[8]);
+ header_length = kHeaderLengthV4;
+ } else if (version == kFileFormatVersionWithAliases) {
+ // Version 5 added the alias table and change the header format.
+ text_encoding_type_ = static_cast<TextEncodingType>(data[2]);
+ resource_count_ = reinterpret_cast<const uint16_t*>(data)[2];
+ alias_count_ = reinterpret_cast<const uint16_t*>(data)[3];
+ header_length = kHeaderLengthV5;
+ } else {
LOG(ERROR) << "Bad data pack version: got " << version << ", expected "
- << kFileFormatVersion;
+ << kFileFormatVersionWithoutAliases << " or "
+ << kFileFormatVersionWithAliases;
LogDataPackError(BAD_VERSION);
return false;
}
- resource_count_ = ptr[1];
- // third: text encoding.
- const uint8_t* ptr_encoding = reinterpret_cast<const uint8_t*>(ptr + 2);
- text_encoding_type_ = static_cast<TextEncodingType>(*ptr_encoding);
if (text_encoding_type_ != UTF8 && text_encoding_type_ != UTF16 &&
text_encoding_type_ != BINARY) {
LOG(ERROR) << "Bad data pack text encoding: got " << text_encoding_type_
@@ -227,35 +244,52 @@ bool DataPack::LoadImpl(std::unique_ptr<DataPack::DataSource> data_source) {
// Sanity check the file.
// 1) Check we have enough entries. There's an extra entry after the last item
// which gives the length of the last item.
- if (kHeaderLength + (resource_count_ + 1) * sizeof(DataPackEntry) >
+ size_t resource_table_size = (resource_count_ + 1) * sizeof(Entry);
+ size_t alias_table_size = alias_count_ * sizeof(Alias);
+ if (header_length + resource_table_size + alias_table_size >
data_source->GetLength()) {
- LOG(ERROR) << "Data pack file corruption: too short for number of "
- "entries specified.";
+ LOG(ERROR) << "Data pack file corruption: "
+ << "too short for number of entries.";
LogDataPackError(INDEX_TRUNCATED);
return false;
}
+
+ resource_table_ = reinterpret_cast<const Entry*>(&data[header_length]);
+ alias_table_ = reinterpret_cast<const Alias*>(
+ &data[header_length + resource_table_size]);
+
// 2) Verify the entries are within the appropriate bounds. There's an extra
// entry after the last item which gives us the length of the last item.
flackr 2017/07/07 18:54:12 Also verify the indices of the alias table are wit
agrieve 2017/07/07 20:47:08 Done.
for (size_t i = 0; i < resource_count_ + 1; ++i) {
- const DataPackEntry* entry = reinterpret_cast<const DataPackEntry*>(
- data_source->GetData() + kHeaderLength + (i * sizeof(DataPackEntry)));
- if (entry->file_offset > data_source->GetLength()) {
- LOG(ERROR) << "Entry #" << i << " in data pack points off end of file. "
- << "Was the file corrupted?";
+ if (resource_table_[i].file_offset > data_source->GetLength()) {
+ LOG(ERROR) << "Data pack file corruption: "
+ << "Entry #" << i << " past end of file.";
LogDataPackError(ENTRY_NOT_FOUND);
return false;
}
}
data_source_ = std::move(data_source);
-
return true;
}
+const DataPack::Entry* DataPack::LookupEntryById(uint16_t resource_id) const {
+ const Entry* ret = reinterpret_cast<const Entry*>(
flackr 2017/07/07 18:54:12 nit: Add a comment explaining that we search the r
agrieve 2017/07/07 20:47:08 Done.
+ bsearch(&resource_id, resource_table_, resource_count_, sizeof(Entry),
+ Entry::CompareById));
+ if (ret == nullptr) {
+ const Alias* alias = reinterpret_cast<const Alias*>(
+ bsearch(&resource_id, alias_table_, alias_count_, sizeof(Alias),
+ Alias::CompareById));
+ if (alias != nullptr) {
+ ret = &resource_table_[alias->entry_index];
+ }
+ }
+ return ret;
+}
+
bool DataPack::HasResource(uint16_t resource_id) const {
- return !!bsearch(&resource_id, data_source_->GetData() + kHeaderLength,
- resource_count_, sizeof(DataPackEntry),
- DataPackEntry::CompareById);
+ return !!LookupEntryById(resource_id);
}
bool DataPack::GetStringPiece(uint16_t resource_id,
@@ -271,20 +305,19 @@ bool DataPack::GetStringPiece(uint16_t resource_id,
#error DataPack assumes little endian
#endif
- const DataPackEntry* target = reinterpret_cast<const DataPackEntry*>(bsearch(
- &resource_id, data_source_->GetData() + kHeaderLength, resource_count_,
- sizeof(DataPackEntry), DataPackEntry::CompareById));
- if (!target) {
+ const Entry* target = LookupEntryById(resource_id);
+ if (!target)
return false;
- }
- const DataPackEntry* next_entry = target + 1;
+ const Entry* next_entry = target + 1;
// If the next entry points beyond the end of the file this data pack's entry
// table is corrupt. Log an error and return false. See
// http://crbug.com/371301.
- if (next_entry->file_offset > data_source_->GetLength()) {
- size_t entry_index = target - reinterpret_cast<const DataPackEntry*>(
- data_source_->GetData() + kHeaderLength);
+ size_t entry_offset =
+ reinterpret_cast<const uint8_t*>(next_entry) - data_source_->GetData();
+ size_t pak_size = data_source_->GetLength();
+ if (entry_offset > pak_size || next_entry->file_offset > pak_size) {
+ size_t entry_index = target - resource_table_;
LOG(ERROR) << "Entry #" << entry_index << " in data pack points off end "
<< "of file. This should have been caught when loading. Was the "
<< "file modified?";
@@ -320,9 +353,7 @@ ui::ScaleFactor DataPack::GetScaleFactor() const {
void DataPack::CheckForDuplicateResources(
const std::vector<std::unique_ptr<ResourceHandle>>& packs) {
for (size_t i = 0; i < resource_count_ + 1; ++i) {
- const DataPackEntry* entry = reinterpret_cast<const DataPackEntry*>(
- data_source_->GetData() + kHeaderLength + (i * sizeof(DataPackEntry)));
- const uint16_t resource_id = entry->resource_id;
+ const uint16_t resource_id = resource_table_[i].resource_id;
const float resource_scale = GetScaleForScaleFactor(scale_factor_);
for (const auto& handle : packs) {
if (GetScaleForScaleFactor(handle->GetScaleFactor()) != resource_scale)
@@ -343,7 +374,9 @@ bool DataPack::WritePack(const base::FilePath& path,
if (!file)
return false;
- if (fwrite(&kFileFormatVersion, sizeof(kFileFormatVersion), 1, file) != 1) {
+ // TODO(agrieve): Is there any benefit to writing a v5 file (with aliases)?
flackr 2017/07/07 18:54:12 Oh this is interesting. Maybe we should change thi
agrieve 2017/07/07 20:47:09 Sounds good. Just heading out now, but will addres
agrieve 2017/07/18 19:30:29 Done.
+ if (fwrite(&kFileFormatVersionWithoutAliases,
+ sizeof(kFileFormatVersionWithoutAliases), 1, file) != 1) {
LOG(ERROR) << "Failed to write file version";
base::CloseFile(file);
return false;
@@ -375,8 +408,8 @@ bool DataPack::WritePack(const base::FilePath& path,
// Each entry is a uint16_t + a uint32_t. We have an extra entry after the
// last item so we can compute the size of the list item.
- uint32_t index_length = (entry_count + 1) * sizeof(DataPackEntry);
- uint32_t data_offset = kHeaderLength + index_length;
+ uint32_t index_length = (entry_count + 1) * sizeof(Entry);
+ uint32_t data_offset = kHeaderLengthV4 + index_length;
for (std::map<uint16_t, base::StringPiece>::const_iterator it =
resources.begin();
it != resources.end(); ++it) {

Powered by Google App Engine
This is Rietveld 408576698