Chromium Code Reviews| Index: tools/grit/grit/format/data_pack.py |
| diff --git a/tools/grit/grit/format/data_pack.py b/tools/grit/grit/format/data_pack.py |
| index f9bfc845f11b78ac8f69294026dd7233efa479d6..dd9ec170a16a9d7494da6a662a9fd08ae12b5c07 100755 |
| --- a/tools/grit/grit/format/data_pack.py |
| +++ b/tools/grit/grit/format/data_pack.py |
| @@ -21,9 +21,7 @@ from grit.node import message |
| from grit.node import structure |
| -PACK_FILE_VERSION = 4 |
| -HEADER_LENGTH = 2 * 4 + 1 # Two uint32s. (file version, number of entries) and |
| - # one uint8 (encoding of text resources) |
| +PACK_FILE_VERSION = 5 |
| BINARY, UTF8, UTF16 = range(3) |
| @@ -31,6 +29,10 @@ class WrongFileVersion(Exception): |
| pass |
| +class CorruptDataPack(Exception): |
| + pass |
| + |
| + |
| DataPackContents = collections.namedtuple( |
| 'DataPackContents', 'resources encoding') |
| @@ -49,56 +51,100 @@ def Format(root, lang='en', output_dir='.'): |
| def ReadDataPack(input_file): |
| + return ReadDataPackFromString(util.ReadFile(input_file, util.BINARY)) |
| + |
| + |
| +def ReadDataPackFromString(data): |
| """Reads a data pack file and returns a dictionary.""" |
| - data = util.ReadFile(input_file, util.BINARY) |
| original_data = data |
| # Read the header. |
| - version, num_entries, encoding = struct.unpack('<IIB', data[:HEADER_LENGTH]) |
| - if version != PACK_FILE_VERSION: |
| - print 'Wrong file version in ', input_file |
| - raise WrongFileVersion |
| + version = struct.unpack('<H', data[:2])[0] |
| + if version == 4: |
| + resource_count, encoding = struct.unpack('<IB', data[4:9]) |
| + alias_count = 0 |
| + data = data[9:] |
| + elif version == 5: |
| + encoding, resource_count, alias_count = struct.unpack('<HHH', data[2:8]) |
| + data = data[8:] |
| + else: |
| + raise WrongFileVersion('Found version: ' + str(version)) |
| resources = {} |
| - if num_entries == 0: |
| - return DataPackContents(resources, encoding) |
| - |
| - # Read the index and data. |
| - data = data[HEADER_LENGTH:] |
| kIndexEntrySize = 2 + 4 # Each entry is a uint16 and a uint32. |
| - for _ in range(num_entries): |
| - id, offset = struct.unpack('<HI', data[:kIndexEntrySize]) |
| - data = data[kIndexEntrySize:] |
| - next_id, next_offset = struct.unpack('<HI', data[:kIndexEntrySize]) |
| - resources[id] = original_data[offset:next_offset] |
| + def entry_at_index(idx): |
| + offset = idx * kIndexEntrySize |
| + return struct.unpack('<HI', data[offset:offset + kIndexEntrySize]) |
| + |
| + # Read the main table in reverse so that prev_offset > offset. |
| + prev_offset = entry_at_index(resource_count)[1] |
|
flackr
2017/07/07 18:54:12
It's a little strange to not read in a forwards di
agrieve
2017/07/07 20:47:08
ah, yeah, that's nicer :)
|
| + for i in xrange(resource_count - 1, -1, -1): |
| + resource_id, offset = entry_at_index(i) |
| + resources[resource_id] = original_data[offset:prev_offset] |
| + prev_offset = offset |
| + |
| + # Read the alias table. |
| + alias_data = data[(resource_count + 1) * kIndexEntrySize:] |
| + kAliasEntrySize = 2 + 2 # uint16, uint16 |
| + def alias_at_index(idx): |
| + offset = idx * kAliasEntrySize |
| + return struct.unpack('<HH', alias_data[offset:offset + kAliasEntrySize]) |
| + |
| + for i in xrange(alias_count): |
| + resource_id, index = alias_at_index(i) |
| + aliased_id = entry_at_index(index)[0] |
| + resources[resource_id] = resources[aliased_id] |
| return DataPackContents(resources, encoding) |
| def WriteDataPackToString(resources, encoding): |
| """Returns a string with a map of id=>data in the data pack format.""" |
| - ids = sorted(resources.keys()) |
| ret = [] |
| - # Write file header. |
| - ret.append(struct.pack('<IIB', PACK_FILE_VERSION, len(ids), encoding)) |
| - HEADER_LENGTH = 2 * 4 + 1 # Two uint32s and one uint8. |
| - |
| - # Each entry is a uint16 + a uint32s. We have one extra entry for the last |
| - # item. |
| - index_length = (len(ids) + 1) * (2 + 4) |
| - |
| - # Write index. |
| - data_offset = HEADER_LENGTH + index_length |
| - for id in ids: |
| - ret.append(struct.pack('<HI', id, data_offset)) |
| - data_offset += len(resources[id]) |
| + # Compute alias map. |
| + resource_ids = sorted(resources) |
| + id_by_data = {resources[k]: k for k in reversed(resource_ids)} |
|
flackr
2017/07/07 18:54:12
Comment why we use reversed - presumably want lowe
agrieve
2017/07/07 20:47:08
Done.
|
| + # Map of resource_id -> resource_id, where value < key. |
| + alias_map = {k: id_by_data[v] for k, v in resources.iteritems() |
| + if id_by_data[v] != k} |
| + # Write file header. |
| + resource_count = len(resources) - len(alias_map) |
| + # Note: 2nd and 4th byte are always 0 since version and encoding < 256. |
|
flackr
2017/07/07 18:54:12
Version I could see may someday be > 256, I can ju
agrieve
2017/07/07 20:47:08
Expanded the comment. I just speculated that thing
flackr
2017/07/10 14:07:49
I suppose that could help, but it's better IMO to
agrieve
2017/07/18 19:30:29
Done.
|
| + ret.append(struct.pack('<HHHH', PACK_FILE_VERSION, encoding, |
| + resource_count, len(alias_map))) |
| + HEADER_LENGTH = 2 + 2 + 2 + 2 |
| + |
| + # Each main table entry is a uint16 + a uint32. |
|
flackr
2017/07/07 18:54:12
Preserve the comment that we have an extra entry f
agrieve
2017/07/07 20:47:08
Done.
|
| + # Each alias table entry is a uint16 + a uint16. |
| + data_offset = HEADER_LENGTH + (resource_count + 1) * 6 + len(alias_map) * 4 |
| + |
| + # Write main table. |
| + index_by_id = {} |
| + deduped_data = [] |
| + index = 0 |
| + for resource_id in resource_ids: |
| + if resource_id in alias_map: |
| + continue |
| + data = resources[resource_id] |
| + index_by_id[resource_id] = index |
| + ret.append(struct.pack('<HI', resource_id, data_offset)) |
| + data_offset += len(data) |
| + deduped_data.append(data) |
| + index += 1 |
| + |
| + assert index == resource_count |
| + # Add an extra entry at the end. |
| ret.append(struct.pack('<HI', 0, data_offset)) |
| + # Write alias table. |
| + for resource_id in sorted(alias_map): |
| + index = index_by_id[alias_map[resource_id]] |
| + ret.append(struct.pack('<HH', resource_id, index)) |
| + |
| # Write data. |
| - for id in ids: |
| - ret.append(resources[id]) |
| + ret.extend(deduped_data) |
| return ''.join(ret) |