ui/base/resource/data_pack.cc - Issue 2969123002: Add deduplication logic to .pak files

Unified Diff: ui/base/resource/data_pack.cc

Issue 2969123002: Add deduplication logic to .pak files (Closed)

Patch Set: fix resource_sizes computation Created 3 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: ui/base/resource/data_pack.cc

diff --git a/ui/base/resource/data_pack.cc b/ui/base/resource/data_pack.cc

index 3d07d985a2a7acb35e10dab926e17cc515fafa57..299b5d13b5d2f3c76ed55fed337b3c663994256f 100644

--- a/ui/base/resource/data_pack.cc

+++ b/ui/base/resource/data_pack.cc

@@ -24,31 +24,10 @@

namespace {

-static const uint32_t kFileFormatVersion = 4;

-// Length of file header: version, entry count and text encoding type.

-static const size_t kHeaderLength = 2 * sizeof(uint32_t) + sizeof(uint8_t);

-#pragma pack(push, 2)

-struct DataPackEntry {

- uint16_t resource_id;

- uint32_t file_offset;

- static int CompareById(const void* void_key, const void* void_entry) {

- uint16_t key = *reinterpret_cast<const uint16_t*>(void_key);

- const DataPackEntry* entry =

- reinterpret_cast<const DataPackEntry*>(void_entry);

- if (key < entry->resource_id) {

- return -1;

- } else if (key > entry->resource_id) {

- return 1;

- } else {

- return 0;

- }

-};

-#pragma pack(pop)

-static_assert(sizeof(DataPackEntry) == 6, "size of entry must be six");

+static const uint32_t kFileFormatVersionWithoutAliases = 4;

+static const uint32_t kFileFormatVersionWithAliases = 5;

+static const size_t kHeaderLengthV4 = 2 * sizeof(uint32_t) + sizeof(uint8_t);

+static const size_t kHeaderLengthV5 = 4 * sizeof(uint16_t);

flackr 2017/07/07 18:54:12 This seems to be masquerading the actual data it r

agrieve 2017/07/07 20:47:08 I think it's actually simpler to just say encoding

// We're crashing when trying to load a pak file on Windows. Add some error

// codes for logging.

@@ -102,6 +81,30 @@ void MaybePrintResourceId(uint16_t resource_id) {

namespace ui {

+#pragma pack(push, 2)

+struct DataPack::Entry {

+ uint16_t resource_id;

+ uint32_t file_offset;

+ static int CompareById(const void* void_key, const void* void_entry) {

+ uint16_t key = *reinterpret_cast<const uint16_t*>(void_key);

+ const Entry* entry = reinterpret_cast<const Entry*>(void_entry);

+ return key - entry->resource_id;

+ }

+};

+struct DataPack::Alias {

+ uint16_t resource_id;

+ uint16_t entry_index;

+ static int CompareById(const void* void_key, const void* void_entry) {

+ uint16_t key = *reinterpret_cast<const uint16_t*>(void_key);

+ const Alias* entry = reinterpret_cast<const Alias*>(void_entry);

+ return key - entry->resource_id;

+ }

+};

+#pragma pack(pop)

// Abstraction of a data source (memory mapped file or in-memory buffer).

class DataPack::DataSource {

public:

@@ -149,9 +152,15 @@ class DataPack::BufferDataSource : public DataPack::DataSource {

};

DataPack::DataPack(ui::ScaleFactor scale_factor)

- : resource_count_(0),

+ : resource_table_(nullptr),

+ resource_count_(0),

+ alias_table_(nullptr),

+ alias_count_(0),

text_encoding_type_(BINARY),

scale_factor_(scale_factor) {

+ // Static assert must be within a DataPack member to appease visiblity rules.

+ static_assert(sizeof(Entry) == 6, "size of Entry must be 6");

+ static_assert(sizeof(Alias) == 4, "size of Alias must be 4");

}

DataPack::~DataPack() {

@@ -193,29 +202,37 @@ bool DataPack::LoadFromBuffer(base::StringPiece buffer) {

}

bool DataPack::LoadImpl(std::unique_ptr<DataPack::DataSource> data_source) {

- // Sanity check the header of the file.

- if (kHeaderLength > data_source->GetLength()) {

+ // Sanity check the header of the file. There will be at least one entry in

+ // the resource table, so pick the larger of the two versions.

+ if (data_source->GetLength() < kHeaderLengthV4) {

flackr 2017/07/07 18:54:12 This means we don't get HEADER_TRUNCATED for an 8

agrieve 2017/07/07 20:47:09 I think the main point of this check is just to en

flackr 2017/07/18 20:21:23 It does mean that on a truncated v4 file with just

agrieve 2017/07/19 02:34:04 Done.

DLOG(ERROR) << "Data pack file corruption: incomplete file header.";

LogDataPackError(HEADER_TRUNCATED);

return false;

}

// Parse the header of the file.

- // First uint32_t: version; second: resource count;

- const uint32_t* ptr =

- reinterpret_cast<const uint32_t*>(data_source->GetData());

- uint32_t version = ptr[0];

- if (version != kFileFormatVersion) {

+ const uint8_t* data = data_source->GetData();

+ int version = data[0];

flackr 2017/07/07 18:54:12 I think we should keep the version length the same

agrieve 2017/07/07 20:47:08 I'm not sure what your concern is here. The code a

flackr 2017/07/10 14:07:49 I'm worried it's bad practice to shorten the versi

agrieve 2017/07/18 19:30:29 Yeah, I suppose a few bytes per-file isn't worth c

+ size_t header_length;

+ if (version == kFileFormatVersionWithoutAliases) {

+ resource_count_ = reinterpret_cast<const uint32_t*>(data)[1];

+ alias_count_ = 0;

+ text_encoding_type_ = static_cast<TextEncodingType>(data[8]);

+ header_length = kHeaderLengthV4;

+ } else if (version == kFileFormatVersionWithAliases) {

+ // Version 5 added the alias table and change the header format.

+ text_encoding_type_ = static_cast<TextEncodingType>(data[2]);

+ resource_count_ = reinterpret_cast<const uint16_t*>(data)[2];

+ alias_count_ = reinterpret_cast<const uint16_t*>(data)[3];

+ header_length = kHeaderLengthV5;

+ } else {

LOG(ERROR) << "Bad data pack version: got " << version << ", expected "

- << kFileFormatVersion;

+ << kFileFormatVersionWithoutAliases << " or "

+ << kFileFormatVersionWithAliases;

LogDataPackError(BAD_VERSION);

return false;

}

- resource_count_ = ptr[1];

- // third: text encoding.

- const uint8_t* ptr_encoding = reinterpret_cast<const uint8_t*>(ptr + 2);

- text_encoding_type_ = static_cast<TextEncodingType>(*ptr_encoding);

if (text_encoding_type_ != UTF8 && text_encoding_type_ != UTF16 &&

text_encoding_type_ != BINARY) {

LOG(ERROR) << "Bad data pack text encoding: got " << text_encoding_type_

@@ -227,35 +244,52 @@ bool DataPack::LoadImpl(std::unique_ptr<DataPack::DataSource> data_source) {

// Sanity check the file.

// 1) Check we have enough entries. There's an extra entry after the last item

// which gives the length of the last item.

- if (kHeaderLength + (resource_count_ + 1) * sizeof(DataPackEntry) >

+ size_t resource_table_size = (resource_count_ + 1) * sizeof(Entry);

+ size_t alias_table_size = alias_count_ * sizeof(Alias);

+ if (header_length + resource_table_size + alias_table_size >

data_source->GetLength()) {

- LOG(ERROR) << "Data pack file corruption: too short for number of "

- "entries specified.";

+ LOG(ERROR) << "Data pack file corruption: "

+ << "too short for number of entries.";

LogDataPackError(INDEX_TRUNCATED);

return false;

}

+ resource_table_ = reinterpret_cast<const Entry*>(&data[header_length]);

+ alias_table_ = reinterpret_cast<const Alias*>(

+ &data[header_length + resource_table_size]);

// 2) Verify the entries are within the appropriate bounds. There's an extra

// entry after the last item which gives us the length of the last item.

flackr 2017/07/07 18:54:12 Also verify the indices of the alias table are wit

agrieve 2017/07/07 20:47:08 Done.

for (size_t i = 0; i < resource_count_ + 1; ++i) {

- const DataPackEntry* entry = reinterpret_cast<const DataPackEntry*>(

- data_source->GetData() + kHeaderLength + (i * sizeof(DataPackEntry)));

- if (entry->file_offset > data_source->GetLength()) {

- LOG(ERROR) << "Entry #" << i << " in data pack points off end of file. "

- << "Was the file corrupted?";

+ if (resource_table_[i].file_offset > data_source->GetLength()) {

+ LOG(ERROR) << "Data pack file corruption: "

+ << "Entry #" << i << " past end of file.";

LogDataPackError(ENTRY_NOT_FOUND);

return false;

}

data_source_ = std::move(data_source);

return true;

}

+const DataPack::Entry* DataPack::LookupEntryById(uint16_t resource_id) const {

+ const Entry* ret = reinterpret_cast<const Entry*>(

flackr 2017/07/07 18:54:12 nit: Add a comment explaining that we search the r

agrieve 2017/07/07 20:47:08 Done.

+ bsearch(&resource_id, resource_table_, resource_count_, sizeof(Entry),

+ Entry::CompareById));

+ if (ret == nullptr) {

+ const Alias* alias = reinterpret_cast<const Alias*>(

+ bsearch(&resource_id, alias_table_, alias_count_, sizeof(Alias),

+ Alias::CompareById));

+ if (alias != nullptr) {

+ ret = &resource_table_[alias->entry_index];

+ }

+ return ret;

bool DataPack::HasResource(uint16_t resource_id) const {

- return !!bsearch(&resource_id, data_source_->GetData() + kHeaderLength,

- resource_count_, sizeof(DataPackEntry),

- DataPackEntry::CompareById);

+ return !!LookupEntryById(resource_id);

}

bool DataPack::GetStringPiece(uint16_t resource_id,

@@ -271,20 +305,19 @@ bool DataPack::GetStringPiece(uint16_t resource_id,

#error DataPack assumes little endian

#endif

- const DataPackEntry* target = reinterpret_cast<const DataPackEntry*>(bsearch(

- &resource_id, data_source_->GetData() + kHeaderLength, resource_count_,

- sizeof(DataPackEntry), DataPackEntry::CompareById));

- if (!target) {

+ const Entry* target = LookupEntryById(resource_id);

+ if (!target)

return false;

- }

- const DataPackEntry* next_entry = target + 1;

+ const Entry* next_entry = target + 1;

// If the next entry points beyond the end of the file this data pack's entry

// table is corrupt. Log an error and return false. See

// http://crbug.com/371301.

- if (next_entry->file_offset > data_source_->GetLength()) {

- size_t entry_index = target - reinterpret_cast<const DataPackEntry*>(

- data_source_->GetData() + kHeaderLength);

+ size_t entry_offset =

+ reinterpret_cast<const uint8_t*>(next_entry) - data_source_->GetData();

+ size_t pak_size = data_source_->GetLength();

+ if (entry_offset > pak_size || next_entry->file_offset > pak_size) {

+ size_t entry_index = target - resource_table_;

LOG(ERROR) << "Entry #" << entry_index << " in data pack points off end "

<< "of file. This should have been caught when loading. Was the "

<< "file modified?";

@@ -320,9 +353,7 @@ ui::ScaleFactor DataPack::GetScaleFactor() const {

void DataPack::CheckForDuplicateResources(

const std::vector<std::unique_ptr<ResourceHandle>>& packs) {

for (size_t i = 0; i < resource_count_ + 1; ++i) {

- const DataPackEntry* entry = reinterpret_cast<const DataPackEntry*>(

- data_source_->GetData() + kHeaderLength + (i * sizeof(DataPackEntry)));

- const uint16_t resource_id = entry->resource_id;

+ const uint16_t resource_id = resource_table_[i].resource_id;

const float resource_scale = GetScaleForScaleFactor(scale_factor_);

for (const auto& handle : packs) {

if (GetScaleForScaleFactor(handle->GetScaleFactor()) != resource_scale)

@@ -343,7 +374,9 @@ bool DataPack::WritePack(const base::FilePath& path,

if (!file)

return false;

- if (fwrite(&kFileFormatVersion, sizeof(kFileFormatVersion), 1, file) != 1) {

+ // TODO(agrieve): Is there any benefit to writing a v5 file (with aliases)?

flackr 2017/07/07 18:54:12 Oh this is interesting. Maybe we should change thi

agrieve 2017/07/07 20:47:09 Sounds good. Just heading out now, but will addres

agrieve 2017/07/18 19:30:29 Done.

+ if (fwrite(&kFileFormatVersionWithoutAliases,

+ sizeof(kFileFormatVersionWithoutAliases), 1, file) != 1) {

LOG(ERROR) << "Failed to write file version";

base::CloseFile(file);

return false;

@@ -375,8 +408,8 @@ bool DataPack::WritePack(const base::FilePath& path,

// Each entry is a uint16_t + a uint32_t. We have an extra entry after the

// last item so we can compute the size of the list item.

- uint32_t index_length = (entry_count + 1) * sizeof(DataPackEntry);

- uint32_t data_offset = kHeaderLength + index_length;

+ uint32_t index_length = (entry_count + 1) * sizeof(Entry);

+ uint32_t data_offset = kHeaderLengthV4 + index_length;

for (std::map<uint16_t, base::StringPiece>::const_iterator it =

resources.begin();

it != resources.end(); ++it) {

« tools/grit/grit/format/data_pack_unittest.py ('K') | « ui/base/resource/data_pack.h ('k') | ui/base/resource/data_pack_literal.h » ('j') | ui/base/resource/data_pack_literal.cc » ('J')