Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3)

Side by Side Diff: tools/grit/grit/format/data_pack.py

Issue 2969123002: Add deduplication logic to .pak files (Closed)
Patch Set: fix resource_sizes computation Created 3 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """Support for formatting a data pack file used for platform agnostic resource 6 """Support for formatting a data pack file used for platform agnostic resource
7 files. 7 files.
8 """ 8 """
9 9
10 import collections 10 import collections
11 import exceptions 11 import exceptions
12 import os 12 import os
13 import struct 13 import struct
14 import sys 14 import sys
15 if __name__ == '__main__': 15 if __name__ == '__main__':
16 sys.path.append(os.path.join(os.path.dirname(__file__), '../..')) 16 sys.path.append(os.path.join(os.path.dirname(__file__), '../..'))
17 17
18 from grit import util 18 from grit import util
19 from grit.node import include 19 from grit.node import include
20 from grit.node import message 20 from grit.node import message
21 from grit.node import structure 21 from grit.node import structure
22 22
23 23
24 PACK_FILE_VERSION = 4 24 PACK_FILE_VERSION = 5
25 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s. (file version, number of entries) and
26 # one uint8 (encoding of text resources)
27 BINARY, UTF8, UTF16 = range(3) 25 BINARY, UTF8, UTF16 = range(3)
28 26
29 27
30 class WrongFileVersion(Exception): 28 class WrongFileVersion(Exception):
31 pass 29 pass
32 30
33 31
32 class CorruptDataPack(Exception):
33 pass
34
35
34 DataPackContents = collections.namedtuple( 36 DataPackContents = collections.namedtuple(
35 'DataPackContents', 'resources encoding') 37 'DataPackContents', 'resources encoding')
36 38
37 39
38 def Format(root, lang='en', output_dir='.'): 40 def Format(root, lang='en', output_dir='.'):
39 """Writes out the data pack file format (platform agnostic resource file).""" 41 """Writes out the data pack file format (platform agnostic resource file)."""
40 data = {} 42 data = {}
41 for node in root.ActiveDescendants(): 43 for node in root.ActiveDescendants():
42 with node: 44 with node:
43 if isinstance(node, (include.IncludeNode, message.MessageNode, 45 if isinstance(node, (include.IncludeNode, message.MessageNode,
44 structure.StructureNode)): 46 structure.StructureNode)):
45 id, value = node.GetDataPackPair(lang, UTF8) 47 id, value = node.GetDataPackPair(lang, UTF8)
46 if value is not None: 48 if value is not None:
47 data[id] = value 49 data[id] = value
48 return WriteDataPackToString(data, UTF8) 50 return WriteDataPackToString(data, UTF8)
49 51
50 52
51 def ReadDataPack(input_file): 53 def ReadDataPack(input_file):
54 return ReadDataPackFromString(util.ReadFile(input_file, util.BINARY))
55
56
57 def ReadDataPackFromString(data):
52 """Reads a data pack file and returns a dictionary.""" 58 """Reads a data pack file and returns a dictionary."""
53 data = util.ReadFile(input_file, util.BINARY)
54 original_data = data 59 original_data = data
55 60
56 # Read the header. 61 # Read the header.
57 version, num_entries, encoding = struct.unpack('<IIB', data[:HEADER_LENGTH]) 62 version = struct.unpack('<H', data[:2])[0]
58 if version != PACK_FILE_VERSION: 63 if version == 4:
59 print 'Wrong file version in ', input_file 64 resource_count, encoding = struct.unpack('<IB', data[4:9])
60 raise WrongFileVersion 65 alias_count = 0
66 data = data[9:]
67 elif version == 5:
68 encoding, resource_count, alias_count = struct.unpack('<HHH', data[2:8])
69 data = data[8:]
70 else:
71 raise WrongFileVersion('Found version: ' + str(version))
61 72
62 resources = {} 73 resources = {}
63 if num_entries == 0: 74 kIndexEntrySize = 2 + 4 # Each entry is a uint16 and a uint32.
64 return DataPackContents(resources, encoding) 75 def entry_at_index(idx):
76 offset = idx * kIndexEntrySize
77 return struct.unpack('<HI', data[offset:offset + kIndexEntrySize])
65 78
66 # Read the index and data. 79 # Read the main table in reverse so that prev_offset > offset.
67 data = data[HEADER_LENGTH:] 80 prev_offset = entry_at_index(resource_count)[1]
flackr 2017/07/07 18:54:12 It's a little strange to not read in a forwards di
agrieve 2017/07/07 20:47:08 ah, yeah, that's nicer :)
68 kIndexEntrySize = 2 + 4 # Each entry is a uint16 and a uint32. 81 for i in xrange(resource_count - 1, -1, -1):
69 for _ in range(num_entries): 82 resource_id, offset = entry_at_index(i)
70 id, offset = struct.unpack('<HI', data[:kIndexEntrySize]) 83 resources[resource_id] = original_data[offset:prev_offset]
71 data = data[kIndexEntrySize:] 84 prev_offset = offset
72 next_id, next_offset = struct.unpack('<HI', data[:kIndexEntrySize]) 85
73 resources[id] = original_data[offset:next_offset] 86 # Read the alias table.
87 alias_data = data[(resource_count + 1) * kIndexEntrySize:]
88 kAliasEntrySize = 2 + 2 # uint16, uint16
89 def alias_at_index(idx):
90 offset = idx * kAliasEntrySize
91 return struct.unpack('<HH', alias_data[offset:offset + kAliasEntrySize])
92
93 for i in xrange(alias_count):
94 resource_id, index = alias_at_index(i)
95 aliased_id = entry_at_index(index)[0]
96 resources[resource_id] = resources[aliased_id]
74 97
75 return DataPackContents(resources, encoding) 98 return DataPackContents(resources, encoding)
76 99
77 100
78 def WriteDataPackToString(resources, encoding): 101 def WriteDataPackToString(resources, encoding):
79 """Returns a string with a map of id=>data in the data pack format.""" 102 """Returns a string with a map of id=>data in the data pack format."""
80 ids = sorted(resources.keys())
81 ret = [] 103 ret = []
82 104
105 # Compute alias map.
106 resource_ids = sorted(resources)
107 id_by_data = {resources[k]: k for k in reversed(resource_ids)}
flackr 2017/07/07 18:54:12 Comment why we use reversed - presumably want lowe
agrieve 2017/07/07 20:47:08 Done.
108 # Map of resource_id -> resource_id, where value < key.
109 alias_map = {k: id_by_data[v] for k, v in resources.iteritems()
110 if id_by_data[v] != k}
111
83 # Write file header. 112 # Write file header.
84 ret.append(struct.pack('<IIB', PACK_FILE_VERSION, len(ids), encoding)) 113 resource_count = len(resources) - len(alias_map)
85 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s and one uint8. 114 # Note: 2nd and 4th byte are always 0 since version and encoding < 256.
flackr 2017/07/07 18:54:12 Version I could see may someday be > 256, I can ju
agrieve 2017/07/07 20:47:08 Expanded the comment. I just speculated that thing
flackr 2017/07/10 14:07:49 I suppose that could help, but it's better IMO to
agrieve 2017/07/18 19:30:29 Done.
115 ret.append(struct.pack('<HHHH', PACK_FILE_VERSION, encoding,
116 resource_count, len(alias_map)))
117 HEADER_LENGTH = 2 + 2 + 2 + 2
86 118
87 # Each entry is a uint16 + a uint32s. We have one extra entry for the last 119 # Each main table entry is a uint16 + a uint32.
flackr 2017/07/07 18:54:12 Preserve the comment that we have an extra entry f
agrieve 2017/07/07 20:47:08 Done.
88 # item. 120 # Each alias table entry is a uint16 + a uint16.
89 index_length = (len(ids) + 1) * (2 + 4) 121 data_offset = HEADER_LENGTH + (resource_count + 1) * 6 + len(alias_map) * 4
90 122
91 # Write index. 123 # Write main table.
92 data_offset = HEADER_LENGTH + index_length 124 index_by_id = {}
93 for id in ids: 125 deduped_data = []
94 ret.append(struct.pack('<HI', id, data_offset)) 126 index = 0
95 data_offset += len(resources[id]) 127 for resource_id in resource_ids:
128 if resource_id in alias_map:
129 continue
130 data = resources[resource_id]
131 index_by_id[resource_id] = index
132 ret.append(struct.pack('<HI', resource_id, data_offset))
133 data_offset += len(data)
134 deduped_data.append(data)
135 index += 1
96 136
137 assert index == resource_count
138 # Add an extra entry at the end.
97 ret.append(struct.pack('<HI', 0, data_offset)) 139 ret.append(struct.pack('<HI', 0, data_offset))
98 140
141 # Write alias table.
142 for resource_id in sorted(alias_map):
143 index = index_by_id[alias_map[resource_id]]
144 ret.append(struct.pack('<HH', resource_id, index))
145
99 # Write data. 146 # Write data.
100 for id in ids: 147 ret.extend(deduped_data)
101 ret.append(resources[id])
102 return ''.join(ret) 148 return ''.join(ret)
103 149
104 150
105 def WriteDataPack(resources, output_file, encoding): 151 def WriteDataPack(resources, output_file, encoding):
106 """Writes a map of id=>data into output_file as a data pack.""" 152 """Writes a map of id=>data into output_file as a data pack."""
107 content = WriteDataPackToString(resources, encoding) 153 content = WriteDataPackToString(resources, encoding)
108 with open(output_file, 'wb') as file: 154 with open(output_file, 'wb') as file:
109 file.write(content) 155 file.write(content)
110 156
111 157
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
202 # Write a simple file. 248 # Write a simple file.
203 data = {1: '', 4: 'this is id 4', 6: 'this is id 6', 10: ''} 249 data = {1: '', 4: 'this is id 4', 6: 'this is id 6', 10: ''}
204 WriteDataPack(data, 'datapack1.pak', UTF8) 250 WriteDataPack(data, 'datapack1.pak', UTF8)
205 data2 = {1000: 'test', 5: 'five'} 251 data2 = {1000: 'test', 5: 'five'}
206 WriteDataPack(data2, 'datapack2.pak', UTF8) 252 WriteDataPack(data2, 'datapack2.pak', UTF8)
207 print 'wrote datapack1 and datapack2 to current directory.' 253 print 'wrote datapack1 and datapack2 to current directory.'
208 254
209 255
210 if __name__ == '__main__': 256 if __name__ == '__main__':
211 main() 257 main()
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698