Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(72)

Side by Side Diff: tools/grit/grit/format/data_pack.py

Issue 2969123002: Add deduplication logic to .pak files (Closed)
Patch Set: sizeof() Created 3 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « build/android/resource_sizes.py ('k') | tools/grit/grit/format/data_pack_unittest.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """Support for formatting a data pack file used for platform agnostic resource 6 """Support for formatting a data pack file used for platform agnostic resource
7 files. 7 files.
8 """ 8 """
9 9
10 import collections 10 import collections
11 import exceptions 11 import exceptions
12 import os 12 import os
13 import struct 13 import struct
14 import sys 14 import sys
15 if __name__ == '__main__': 15 if __name__ == '__main__':
16 sys.path.append(os.path.join(os.path.dirname(__file__), '../..')) 16 sys.path.append(os.path.join(os.path.dirname(__file__), '../..'))
17 17
18 from grit import util 18 from grit import util
19 from grit.node import include 19 from grit.node import include
20 from grit.node import message 20 from grit.node import message
21 from grit.node import structure 21 from grit.node import structure
22 22
23 23
24 PACK_FILE_VERSION = 4 24 PACK_FILE_VERSION = 5
25 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s. (file version, number of entries) and
26 # one uint8 (encoding of text resources)
27 BINARY, UTF8, UTF16 = range(3) 25 BINARY, UTF8, UTF16 = range(3)
28 26
29 27
30 class WrongFileVersion(Exception): 28 class WrongFileVersion(Exception):
31 pass 29 pass
32 30
33 31
32 class CorruptDataPack(Exception):
33 pass
34
35
34 DataPackContents = collections.namedtuple( 36 DataPackContents = collections.namedtuple(
35 'DataPackContents', 'resources encoding') 37 'DataPackContents', 'resources encoding')
36 38
37 39
38 def Format(root, lang='en', output_dir='.'): 40 def Format(root, lang='en', output_dir='.'):
39 """Writes out the data pack file format (platform agnostic resource file).""" 41 """Writes out the data pack file format (platform agnostic resource file)."""
40 data = {} 42 data = {}
41 for node in root.ActiveDescendants(): 43 for node in root.ActiveDescendants():
42 with node: 44 with node:
43 if isinstance(node, (include.IncludeNode, message.MessageNode, 45 if isinstance(node, (include.IncludeNode, message.MessageNode,
44 structure.StructureNode)): 46 structure.StructureNode)):
45 id, value = node.GetDataPackPair(lang, UTF8) 47 id, value = node.GetDataPackPair(lang, UTF8)
46 if value is not None: 48 if value is not None:
47 data[id] = value 49 data[id] = value
48 return WriteDataPackToString(data, UTF8) 50 return WriteDataPackToString(data, UTF8)
49 51
50 52
51 def ReadDataPack(input_file): 53 def ReadDataPack(input_file):
54 return ReadDataPackFromString(util.ReadFile(input_file, util.BINARY))
55
56
57 def ReadDataPackFromString(data):
52 """Reads a data pack file and returns a dictionary.""" 58 """Reads a data pack file and returns a dictionary."""
53 data = util.ReadFile(input_file, util.BINARY)
54 original_data = data 59 original_data = data
55 60
56 # Read the header. 61 # Read the header.
57 version, num_entries, encoding = struct.unpack('<IIB', data[:HEADER_LENGTH]) 62 version = struct.unpack('<I', data[:4])[0]
58 if version != PACK_FILE_VERSION: 63 if version == 4:
59 print 'Wrong file version in ', input_file 64 resource_count, encoding = struct.unpack('<IB', data[4:9])
60 raise WrongFileVersion 65 alias_count = 0
66 data = data[9:]
67 elif version == 5:
68 encoding, resource_count, alias_count = struct.unpack('<BxxxHH', data[4:12])
69 data = data[12:]
70 else:
71 raise WrongFileVersion('Found version: ' + str(version))
61 72
62 resources = {} 73 resources = {}
63 if num_entries == 0: 74 kIndexEntrySize = 2 + 4 # Each entry is a uint16 and a uint32.
64 return DataPackContents(resources, encoding) 75 def entry_at_index(idx):
76 offset = idx * kIndexEntrySize
77 return struct.unpack('<HI', data[offset:offset + kIndexEntrySize])
65 78
66 # Read the index and data. 79 prev_resource_id, prev_offset = entry_at_index(0)
67 data = data[HEADER_LENGTH:] 80 for i in xrange(1, resource_count + 1):
68 kIndexEntrySize = 2 + 4 # Each entry is a uint16 and a uint32. 81 resource_id, offset = entry_at_index(i)
69 for _ in range(num_entries): 82 resources[prev_resource_id] = original_data[prev_offset:offset]
70 id, offset = struct.unpack('<HI', data[:kIndexEntrySize]) 83 prev_resource_id, prev_offset = resource_id, offset
71 data = data[kIndexEntrySize:] 84
72 next_id, next_offset = struct.unpack('<HI', data[:kIndexEntrySize]) 85 # Read the alias table.
73 resources[id] = original_data[offset:next_offset] 86 alias_data = data[(resource_count + 1) * kIndexEntrySize:]
87 kAliasEntrySize = 2 + 2 # uint16, uint16
88 def alias_at_index(idx):
89 offset = idx * kAliasEntrySize
90 return struct.unpack('<HH', alias_data[offset:offset + kAliasEntrySize])
91
92 for i in xrange(alias_count):
93 resource_id, index = alias_at_index(i)
94 aliased_id = entry_at_index(index)[0]
95 resources[resource_id] = resources[aliased_id]
74 96
75 return DataPackContents(resources, encoding) 97 return DataPackContents(resources, encoding)
76 98
77 99
78 def WriteDataPackToString(resources, encoding): 100 def WriteDataPackToString(resources, encoding):
79 """Returns a string with a map of id=>data in the data pack format.""" 101 """Returns a string with a map of id=>data in the data pack format."""
80 ids = sorted(resources.keys())
81 ret = [] 102 ret = []
82 103
104 # Compute alias map.
105 resource_ids = sorted(resources)
106 # Use reversed() so that for duplicates lower IDs clobber higher ones.
107 id_by_data = {resources[k]: k for k in reversed(resource_ids)}
108 # Map of resource_id -> resource_id, where value < key.
109 alias_map = {k: id_by_data[v] for k, v in resources.iteritems()
110 if id_by_data[v] != k}
111
83 # Write file header. 112 # Write file header.
84 ret.append(struct.pack('<IIB', PACK_FILE_VERSION, len(ids), encoding)) 113 resource_count = len(resources) - len(alias_map)
85 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s and one uint8. 114 # Padding bytes added for alignment.
115 ret.append(struct.pack('<IBxxxHH', PACK_FILE_VERSION, encoding,
116 resource_count, len(alias_map)))
117 HEADER_LENGTH = 4 + 4 + 2 + 2
86 118
87 # Each entry is a uint16 + a uint32s. We have one extra entry for the last 119 # Each main table entry is: uint16 + uint32 (and an extra entry at the end).
88 # item. 120 # Each alias table entry is: uint16 + uint16.
89 index_length = (len(ids) + 1) * (2 + 4) 121 data_offset = HEADER_LENGTH + (resource_count + 1) * 6 + len(alias_map) * 4
90 122
91 # Write index. 123 # Write main table.
92 data_offset = HEADER_LENGTH + index_length 124 index_by_id = {}
93 for id in ids: 125 deduped_data = []
94 ret.append(struct.pack('<HI', id, data_offset)) 126 index = 0
95 data_offset += len(resources[id]) 127 for resource_id in resource_ids:
128 if resource_id in alias_map:
129 continue
130 data = resources[resource_id]
131 index_by_id[resource_id] = index
132 ret.append(struct.pack('<HI', resource_id, data_offset))
133 data_offset += len(data)
134 deduped_data.append(data)
135 index += 1
96 136
137 assert index == resource_count
138 # Add an extra entry at the end.
97 ret.append(struct.pack('<HI', 0, data_offset)) 139 ret.append(struct.pack('<HI', 0, data_offset))
98 140
141 # Write alias table.
142 for resource_id in sorted(alias_map):
143 index = index_by_id[alias_map[resource_id]]
144 ret.append(struct.pack('<HH', resource_id, index))
145
99 # Write data. 146 # Write data.
100 for id in ids: 147 ret.extend(deduped_data)
101 ret.append(resources[id])
102 return ''.join(ret) 148 return ''.join(ret)
103 149
104 150
105 def WriteDataPack(resources, output_file, encoding): 151 def WriteDataPack(resources, output_file, encoding):
106 """Writes a map of id=>data into output_file as a data pack.""" 152 """Writes a map of id=>data into output_file as a data pack."""
107 content = WriteDataPackToString(resources, encoding) 153 content = WriteDataPackToString(resources, encoding)
108 with open(output_file, 'wb') as file: 154 with open(output_file, 'wb') as file:
109 file.write(content) 155 file.write(content)
110 156
111 157
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
202 # Write a simple file. 248 # Write a simple file.
203 data = {1: '', 4: 'this is id 4', 6: 'this is id 6', 10: ''} 249 data = {1: '', 4: 'this is id 4', 6: 'this is id 6', 10: ''}
204 WriteDataPack(data, 'datapack1.pak', UTF8) 250 WriteDataPack(data, 'datapack1.pak', UTF8)
205 data2 = {1000: 'test', 5: 'five'} 251 data2 = {1000: 'test', 5: 'five'}
206 WriteDataPack(data2, 'datapack2.pak', UTF8) 252 WriteDataPack(data2, 'datapack2.pak', UTF8)
207 print 'wrote datapack1 and datapack2 to current directory.' 253 print 'wrote datapack1 and datapack2 to current directory.'
208 254
209 255
210 if __name__ == '__main__': 256 if __name__ == '__main__':
211 main() 257 main()
OLDNEW
« no previous file with comments | « build/android/resource_sizes.py ('k') | tools/grit/grit/format/data_pack_unittest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698