Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(111)

Side by Side Diff: grit/format/data_pack.py

Issue 217273003: Add whitelist support to repack. (Closed) Base URL: https://chromium.googlesource.com/external/grit-i18n.git@master
Patch Set: Whitespace fix Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | grit/format/data_pack_unittest.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 '''Support for formatting a data pack file used for platform agnostic resource 6 """Support for formatting a data pack file used for platform agnostic resource
7 files. 7 files.
8 ''' 8 """
9 9
10 import collections 10 import collections
11 import exceptions 11 import exceptions
12 import os 12 import os
13 import struct 13 import struct
14 import sys 14 import sys
15 if __name__ == '__main__': 15 if __name__ == '__main__':
16 sys.path.append(os.path.join(os.path.dirname(__file__), '../..')) 16 sys.path.append(os.path.join(os.path.dirname(__file__), '../..'))
17 17
18 from grit import util 18 from grit import util
19 from grit.node import include 19 from grit.node import include
20 from grit.node import message 20 from grit.node import message
21 from grit.node import structure 21 from grit.node import structure
22 from grit.node import misc
23 22
24 23
25 PACK_FILE_VERSION = 4 24 PACK_FILE_VERSION = 4
26 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s. (file version, number of entries) and 25 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s. (file version, number of entries) and
27 # one uint8 (encoding of text resources) 26 # one uint8 (encoding of text resources)
28 BINARY, UTF8, UTF16 = range(3) 27 BINARY, UTF8, UTF16 = range(3)
29 28
30 29
31 class WrongFileVersion(Exception): 30 class WrongFileVersion(Exception):
32 pass 31 pass
33 32
34 33
35 DataPackContents = collections.namedtuple( 34 DataPackContents = collections.namedtuple(
36 'DataPackContents', 'resources encoding') 35 'DataPackContents', 'resources encoding')
37 36
38 37
39 def Format(root, lang='en', output_dir='.'): 38 def Format(root, lang='en', output_dir='.'):
40 '''Writes out the data pack file format (platform agnostic resource file).''' 39 """Writes out the data pack file format (platform agnostic resource file)."""
41 data = {} 40 data = {}
42 for node in root.ActiveDescendants(): 41 for node in root.ActiveDescendants():
43 with node: 42 with node:
44 if isinstance(node, (include.IncludeNode, message.MessageNode, 43 if isinstance(node, (include.IncludeNode, message.MessageNode,
45 structure.StructureNode)): 44 structure.StructureNode)):
46 id, value = node.GetDataPackPair(lang, UTF8) 45 id, value = node.GetDataPackPair(lang, UTF8)
47 if value is not None: 46 if value is not None:
48 data[id] = value 47 data[id] = value
49 return WriteDataPackToString(data, UTF8) 48 return WriteDataPackToString(data, UTF8)
50 49
51 50
52 def ReadDataPack(input_file): 51 def ReadDataPack(input_file):
53 """Reads a data pack file and returns a dictionary.""" 52 """Reads a data pack file and returns a dictionary."""
54 data = util.ReadFile(input_file, util.BINARY) 53 data = util.ReadFile(input_file, util.BINARY)
55 original_data = data 54 original_data = data
56 55
57 # Read the header. 56 # Read the header.
58 version, num_entries, encoding = struct.unpack("<IIB", data[:HEADER_LENGTH]) 57 version, num_entries, encoding = struct.unpack('<IIB', data[:HEADER_LENGTH])
59 if version != PACK_FILE_VERSION: 58 if version != PACK_FILE_VERSION:
60 print "Wrong file version in ", input_file 59 print 'Wrong file version in ', input_file
61 raise WrongFileVersion 60 raise WrongFileVersion
62 61
63 resources = {} 62 resources = {}
64 if num_entries == 0: 63 if num_entries == 0:
65 return DataPackContents(resources, encoding) 64 return DataPackContents(resources, encoding)
66 65
67 # Read the index and data. 66 # Read the index and data.
68 data = data[HEADER_LENGTH:] 67 data = data[HEADER_LENGTH:]
69 kIndexEntrySize = 2 + 4 # Each entry is a uint16 and a uint32. 68 kIndexEntrySize = 2 + 4 # Each entry is a uint16 and a uint32.
70 for _ in range(num_entries): 69 for _ in range(num_entries):
71 id, offset = struct.unpack("<HI", data[:kIndexEntrySize]) 70 id, offset = struct.unpack('<HI', data[:kIndexEntrySize])
72 data = data[kIndexEntrySize:] 71 data = data[kIndexEntrySize:]
73 next_id, next_offset = struct.unpack("<HI", data[:kIndexEntrySize]) 72 next_id, next_offset = struct.unpack('<HI', data[:kIndexEntrySize])
74 resources[id] = original_data[offset:next_offset] 73 resources[id] = original_data[offset:next_offset]
75 74
76 return DataPackContents(resources, encoding) 75 return DataPackContents(resources, encoding)
77 76
78 77
79 def WriteDataPackToString(resources, encoding): 78 def WriteDataPackToString(resources, encoding):
80 """Write a map of id=>data into a string in the data pack format and return 79 """Returns a string with a map of id=>data in the data pack format."""
81 it."""
82 ids = sorted(resources.keys()) 80 ids = sorted(resources.keys())
83 ret = [] 81 ret = []
84 82
85 # Write file header. 83 # Write file header.
86 ret.append(struct.pack("<IIB", PACK_FILE_VERSION, len(ids), encoding)) 84 ret.append(struct.pack('<IIB', PACK_FILE_VERSION, len(ids), encoding))
87 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s and one uint8. 85 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s and one uint8.
88 86
89 # Each entry is a uint16 + a uint32s. We have one extra entry for the last 87 # Each entry is a uint16 + a uint32s. We have one extra entry for the last
90 # item. 88 # item.
91 index_length = (len(ids) + 1) * (2 + 4) 89 index_length = (len(ids) + 1) * (2 + 4)
92 90
93 # Write index. 91 # Write index.
94 data_offset = HEADER_LENGTH + index_length 92 data_offset = HEADER_LENGTH + index_length
95 for id in ids: 93 for id in ids:
96 ret.append(struct.pack("<HI", id, data_offset)) 94 ret.append(struct.pack('<HI', id, data_offset))
97 data_offset += len(resources[id]) 95 data_offset += len(resources[id])
98 96
99 ret.append(struct.pack("<HI", 0, data_offset)) 97 ret.append(struct.pack('<HI', 0, data_offset))
100 98
101 # Write data. 99 # Write data.
102 for id in ids: 100 for id in ids:
103 ret.append(resources[id]) 101 ret.append(resources[id])
104 return ''.join(ret) 102 return ''.join(ret)
105 103
106 104
107 def WriteDataPack(resources, output_file, encoding): 105 def WriteDataPack(resources, output_file, encoding):
108 """Write a map of id=>data into output_file as a data pack.""" 106 """Writes a map of id=>data into output_file as a data pack."""
109 content = WriteDataPackToString(resources, encoding) 107 content = WriteDataPackToString(resources, encoding)
110 with open(output_file, "wb") as file: 108 with open(output_file, 'wb') as file:
111 file.write(content) 109 file.write(content)
112 110
113 111
114 def RePack(output_file, input_files): 112 def RePack(output_file, input_files, whitelist_file=None):
115 """Write a new data pack to |output_file| based on a list of filenames 113 """Write a new data pack file by combining input pack files.
116 (|input_files|)""" 114
115 Args:
116 output_file: path to the new data pack file.
117 input_files: a list of paths to the data pack files to combine.
118 whitelist_file: path to the file that contains the list of resource IDs
119 that should be kept in the output file or None to include
120 all resources.
121
122 Raises:
123 KeyError: if there are duplicate keys or resource encoding is
124 inconsistent.
125 """
126 input_data_packs = [ReadDataPack(filename) for filename in input_files]
127 whitelist = None
128 if whitelist_file:
129 whitelist = util.ReadFile(whitelist_file, util.RAW_TEXT).strip().split('\n')
130 whitelist = map(int, whitelist)
Nico 2014/04/21 18:21:03 Should this be whitelist = set(map(int, whiteli
131 resources, encoding = RePackFromDataPackStrings(input_data_packs, whitelist)
132 WriteDataPack(resources, output_file, encoding)
133
134
135 def RePackFromDataPackStrings(inputs, whitelist):
136 """Returns a data pack string that combines the resources from inputs.
137
138 Args:
139 inputs: a list of data pack strings that need to be combined.
140 whitelist: a list of resource IDs that should be kep in the output string
141 or None to include all resources.
142
143 Returns:
144 DataPackContents: a tuple containing the new combined data pack and its
145 encoding.
146
147 Raises:
148 KeyError: if there are duplicate keys or resource encoding is
149 inconsistent.
150 """
117 resources = {} 151 resources = {}
118 encoding = None 152 encoding = None
119 for filename in input_files: 153 for content in inputs:
120 new_content = ReadDataPack(filename)
121
122 # Make sure we have no dups. 154 # Make sure we have no dups.
123 duplicate_keys = set(new_content.resources.keys()) & set(resources.keys()) 155 duplicate_keys = set(content.resources.keys()) & set(resources.keys())
124 if len(duplicate_keys) != 0: 156 if duplicate_keys:
125 raise exceptions.KeyError("Duplicate keys: " + str(list(duplicate_keys))) 157 raise exceptions.KeyError('Duplicate keys: ' + str(list(duplicate_keys)))
126 158
127 # Make sure encoding is consistent. 159 # Make sure encoding is consistent.
128 if encoding in (None, BINARY): 160 if encoding in (None, BINARY):
129 encoding = new_content.encoding 161 encoding = content.encoding
130 elif new_content.encoding not in (BINARY, encoding): 162 elif content.encoding not in (BINARY, encoding):
131 raise exceptions.KeyError("Inconsistent encodings: " + 163 raise exceptions.KeyError('Inconsistent encodings: ' + str(encoding) +
132 str(encoding) + " vs " + 164 ' vs ' + str(content.encoding))
133 str(new_content.encoding))
134 165
135 resources.update(new_content.resources) 166 if whitelist:
167 whitelisted_resources = dict([(key, content.resources[key])
168 for key in content.resources.keys()
169 if key in whitelist])
170 resources.update(whitelisted_resources)
171 else:
172 resources.update(content.resources)
136 173
137 # Encoding is 0 for BINARY, 1 for UTF8 and 2 for UTF16 174 # Encoding is 0 for BINARY, 1 for UTF8 and 2 for UTF16
138 if encoding is None: 175 if encoding is None:
139 encoding = BINARY 176 encoding = BINARY
140 WriteDataPack(resources, output_file, encoding) 177 return DataPackContents(resources, encoding)
141 178
142 179
143 # Temporary hack for external programs that import data_pack. 180 # Temporary hack for external programs that import data_pack.
144 # TODO(benrg): Remove this. 181 # TODO(benrg): Remove this.
145 class DataPack(object): 182 class DataPack(object):
146 pass 183 pass
147 DataPack.ReadDataPack = staticmethod(ReadDataPack) 184 DataPack.ReadDataPack = staticmethod(ReadDataPack)
148 DataPack.WriteDataPackToString = staticmethod(WriteDataPackToString) 185 DataPack.WriteDataPackToString = staticmethod(WriteDataPackToString)
149 DataPack.WriteDataPack = staticmethod(WriteDataPack) 186 DataPack.WriteDataPack = staticmethod(WriteDataPack)
150 DataPack.RePack = staticmethod(RePack) 187 DataPack.RePack = staticmethod(RePack)
151 188
152 189
153 def main(): 190 def main():
154 if len(sys.argv) > 1: 191 if len(sys.argv) > 1:
155 # When an argument is given, read and explode the file to text 192 # When an argument is given, read and explode the file to text
156 # format, for easier diffing. 193 # format, for easier diffing.
157 data = ReadDataPack(sys.argv[1]) 194 data = ReadDataPack(sys.argv[1])
158 print data.encoding 195 print data.encoding
159 for (resource_id, text) in data.resources.iteritems(): 196 for (resource_id, text) in data.resources.iteritems():
160 print "%s: %s" % (resource_id, text) 197 print '%s: %s' % (resource_id, text)
161 else: 198 else:
162 # Just write a simple file. 199 # Just write a simple file.
163 data = { 1: "", 4: "this is id 4", 6: "this is id 6", 10: "" } 200 data = {1: '', 4: 'this is id 4', 6: 'this is id 6', 10: ''}
164 WriteDataPack(data, "datapack1.pak", UTF8) 201 WriteDataPack(data, 'datapack1.pak', UTF8)
165 data2 = { 1000: "test", 5: "five" } 202 data2 = {1000: 'test', 5: 'five'}
166 WriteDataPack(data2, "datapack2.pak", UTF8) 203 WriteDataPack(data2, 'datapack2.pak', UTF8)
167 print "wrote datapack1 and datapack2 to current directory." 204 print 'wrote datapack1 and datapack2 to current directory.'
168 205
169 206
170 if __name__ == '__main__': 207 if __name__ == '__main__':
171 main() 208 main()
OLDNEW
« no previous file with comments | « no previous file | grit/format/data_pack_unittest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698