 Chromium Code Reviews
 Chromium Code Reviews Issue 217273003:
  Add whitelist support to repack.  (Closed) 
  Base URL: https://chromium.googlesource.com/external/grit-i18n.git@master
    
  
    Issue 217273003:
  Add whitelist support to repack.  (Closed) 
  Base URL: https://chromium.googlesource.com/external/grit-i18n.git@master| OLD | NEW | 
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python | 
| 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be | 
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. | 
| 5 | 5 | 
| 6 '''Support for formatting a data pack file used for platform agnostic resource | 6 """Support for formatting a data pack file used for platform agnostic resource | 
| 7 files. | 7 files. | 
| 8 ''' | 8 """ | 
| 9 | 9 | 
| 10 import collections | 10 import collections | 
| 11 import exceptions | 11 import exceptions | 
| 12 import os | 12 import os | 
| 13 import struct | 13 import struct | 
| 14 import sys | 14 import sys | 
| 15 if __name__ == '__main__': | 15 if __name__ == '__main__': | 
| 16 sys.path.append(os.path.join(os.path.dirname(__file__), '../..')) | 16 sys.path.append(os.path.join(os.path.dirname(__file__), '../..')) | 
| 17 | 17 | 
| 18 from grit import util | 18 from grit import util | 
| 19 from grit.node import include | 19 from grit.node import include | 
| 20 from grit.node import message | 20 from grit.node import message | 
| 21 from grit.node import structure | 21 from grit.node import structure | 
| 22 from grit.node import misc | |
| 23 | 22 | 
| 24 | 23 | 
| 25 PACK_FILE_VERSION = 4 | 24 PACK_FILE_VERSION = 4 | 
| 26 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s. (file version, number of entries) and | 25 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s. (file version, number of entries) and | 
| 27 # one uint8 (encoding of text resources) | 26 # one uint8 (encoding of text resources) | 
| 28 BINARY, UTF8, UTF16 = range(3) | 27 BINARY, UTF8, UTF16 = range(3) | 
| 29 | 28 | 
| 30 | 29 | 
| 31 class WrongFileVersion(Exception): | 30 class WrongFileVersion(Exception): | 
| 32 pass | 31 pass | 
| 33 | 32 | 
| 34 | 33 | 
| 35 DataPackContents = collections.namedtuple( | 34 DataPackContents = collections.namedtuple( | 
| 36 'DataPackContents', 'resources encoding') | 35 'DataPackContents', 'resources encoding') | 
| 37 | 36 | 
| 38 | 37 | 
| 39 def Format(root, lang='en', output_dir='.'): | 38 def Format(root, lang='en', output_dir='.'): | 
| 40 '''Writes out the data pack file format (platform agnostic resource file).''' | 39 """Writes out the data pack file format (platform agnostic resource file).""" | 
| 41 data = {} | 40 data = {} | 
| 42 for node in root.ActiveDescendants(): | 41 for node in root.ActiveDescendants(): | 
| 43 with node: | 42 with node: | 
| 44 if isinstance(node, (include.IncludeNode, message.MessageNode, | 43 if isinstance(node, (include.IncludeNode, message.MessageNode, | 
| 45 structure.StructureNode)): | 44 structure.StructureNode)): | 
| 46 id, value = node.GetDataPackPair(lang, UTF8) | 45 id, value = node.GetDataPackPair(lang, UTF8) | 
| 47 if value is not None: | 46 if value is not None: | 
| 48 data[id] = value | 47 data[id] = value | 
| 49 return WriteDataPackToString(data, UTF8) | 48 return WriteDataPackToString(data, UTF8) | 
| 50 | 49 | 
| 51 | 50 | 
| 52 def ReadDataPack(input_file): | 51 def ReadDataPack(input_file): | 
| 53 """Reads a data pack file and returns a dictionary.""" | 52 """Reads a data pack file and returns a dictionary.""" | 
| 54 data = util.ReadFile(input_file, util.BINARY) | 53 data = util.ReadFile(input_file, util.BINARY) | 
| 55 original_data = data | 54 original_data = data | 
| 56 | 55 | 
| 57 # Read the header. | 56 # Read the header. | 
| 58 version, num_entries, encoding = struct.unpack("<IIB", data[:HEADER_LENGTH]) | 57 version, num_entries, encoding = struct.unpack('<IIB', data[:HEADER_LENGTH]) | 
| 59 if version != PACK_FILE_VERSION: | 58 if version != PACK_FILE_VERSION: | 
| 60 print "Wrong file version in ", input_file | 59 print 'Wrong file version in ', input_file | 
| 61 raise WrongFileVersion | 60 raise WrongFileVersion | 
| 62 | 61 | 
| 63 resources = {} | 62 resources = {} | 
| 64 if num_entries == 0: | 63 if num_entries == 0: | 
| 65 return DataPackContents(resources, encoding) | 64 return DataPackContents(resources, encoding) | 
| 66 | 65 | 
| 67 # Read the index and data. | 66 # Read the index and data. | 
| 68 data = data[HEADER_LENGTH:] | 67 data = data[HEADER_LENGTH:] | 
| 69 kIndexEntrySize = 2 + 4 # Each entry is a uint16 and a uint32. | 68 kIndexEntrySize = 2 + 4 # Each entry is a uint16 and a uint32. | 
| 70 for _ in range(num_entries): | 69 for _ in range(num_entries): | 
| 71 id, offset = struct.unpack("<HI", data[:kIndexEntrySize]) | 70 id, offset = struct.unpack('<HI', data[:kIndexEntrySize]) | 
| 72 data = data[kIndexEntrySize:] | 71 data = data[kIndexEntrySize:] | 
| 73 next_id, next_offset = struct.unpack("<HI", data[:kIndexEntrySize]) | 72 next_id, next_offset = struct.unpack('<HI', data[:kIndexEntrySize]) | 
| 74 resources[id] = original_data[offset:next_offset] | 73 resources[id] = original_data[offset:next_offset] | 
| 75 | 74 | 
| 76 return DataPackContents(resources, encoding) | 75 return DataPackContents(resources, encoding) | 
| 77 | 76 | 
| 78 | 77 | 
| 79 def WriteDataPackToString(resources, encoding): | 78 def WriteDataPackToString(resources, encoding): | 
| 80 """Write a map of id=>data into a string in the data pack format and return | 79 """Returns a string with a map of id=>data in the data pack format.""" | 
| 81 it.""" | |
| 82 ids = sorted(resources.keys()) | 80 ids = sorted(resources.keys()) | 
| 83 ret = [] | 81 ret = [] | 
| 84 | 82 | 
| 85 # Write file header. | 83 # Write file header. | 
| 86 ret.append(struct.pack("<IIB", PACK_FILE_VERSION, len(ids), encoding)) | 84 ret.append(struct.pack('<IIB', PACK_FILE_VERSION, len(ids), encoding)) | 
| 87 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s and one uint8. | 85 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s and one uint8. | 
| 88 | 86 | 
| 89 # Each entry is a uint16 + a uint32s. We have one extra entry for the last | 87 # Each entry is a uint16 + a uint32s. We have one extra entry for the last | 
| 90 # item. | 88 # item. | 
| 91 index_length = (len(ids) + 1) * (2 + 4) | 89 index_length = (len(ids) + 1) * (2 + 4) | 
| 92 | 90 | 
| 93 # Write index. | 91 # Write index. | 
| 94 data_offset = HEADER_LENGTH + index_length | 92 data_offset = HEADER_LENGTH + index_length | 
| 95 for id in ids: | 93 for id in ids: | 
| 96 ret.append(struct.pack("<HI", id, data_offset)) | 94 ret.append(struct.pack('<HI', id, data_offset)) | 
| 97 data_offset += len(resources[id]) | 95 data_offset += len(resources[id]) | 
| 98 | 96 | 
| 99 ret.append(struct.pack("<HI", 0, data_offset)) | 97 ret.append(struct.pack('<HI', 0, data_offset)) | 
| 100 | 98 | 
| 101 # Write data. | 99 # Write data. | 
| 102 for id in ids: | 100 for id in ids: | 
| 103 ret.append(resources[id]) | 101 ret.append(resources[id]) | 
| 104 return ''.join(ret) | 102 return ''.join(ret) | 
| 105 | 103 | 
| 106 | 104 | 
| 107 def WriteDataPack(resources, output_file, encoding): | 105 def WriteDataPack(resources, output_file, encoding): | 
| 108 """Write a map of id=>data into output_file as a data pack.""" | 106 """Writes a map of id=>data into output_file as a data pack.""" | 
| 109 content = WriteDataPackToString(resources, encoding) | 107 content = WriteDataPackToString(resources, encoding) | 
| 110 with open(output_file, "wb") as file: | 108 with open(output_file, 'wb') as file: | 
| 111 file.write(content) | 109 file.write(content) | 
| 112 | 110 | 
| 113 | 111 | 
| 114 def RePack(output_file, input_files): | 112 def RePack(output_file, input_files, whitelist_file=None): | 
| 115 """Write a new data pack to |output_file| based on a list of filenames | 113 """Write a new data pack file by combining input pack files. | 
| 116 (|input_files|)""" | 114 | 
| 115 Args: | |
| 116 output_file: path to the new data pack file. | |
| 117 input_files: a list of paths to the data pack files to combine. | |
| 118 whitelist_file: path to the file that contains the list of resource IDs | |
| 119 that should be kept in the output file or None to include | |
| 120 all resources. | |
| 121 | |
| 122 Raises: | |
| 123 KeyError: if there are duplicate keys or resource encoding is | |
| 124 inconsistent. | |
| 125 """ | |
| 126 input_data_packs = [ReadDataPack(filename) for filename in input_files] | |
| 127 whitelist = None | |
| 128 if whitelist_file: | |
| 129 whitelist = util.ReadFile(whitelist_file, util.RAW_TEXT).strip().split('\n') | |
| 130 whitelist = map(int, whitelist) | |
| 
Nico
2014/04/21 18:21:03
Should this be
  whitelist = set(map(int, whiteli
 | |
| 131 resources, encoding = RePackFromDataPackStrings(input_data_packs, whitelist) | |
| 132 WriteDataPack(resources, output_file, encoding) | |
| 133 | |
| 134 | |
| 135 def RePackFromDataPackStrings(inputs, whitelist): | |
| 136 """Returns a data pack string that combines the resources from inputs. | |
| 137 | |
| 138 Args: | |
| 139 inputs: a list of data pack strings that need to be combined. | |
| 140 whitelist: a list of resource IDs that should be kep in the output string | |
| 141 or None to include all resources. | |
| 142 | |
| 143 Returns: | |
| 144 DataPackContents: a tuple containing the new combined data pack and its | |
| 145 encoding. | |
| 146 | |
| 147 Raises: | |
| 148 KeyError: if there are duplicate keys or resource encoding is | |
| 149 inconsistent. | |
| 150 """ | |
| 117 resources = {} | 151 resources = {} | 
| 118 encoding = None | 152 encoding = None | 
| 119 for filename in input_files: | 153 for content in inputs: | 
| 120 new_content = ReadDataPack(filename) | |
| 121 | |
| 122 # Make sure we have no dups. | 154 # Make sure we have no dups. | 
| 123 duplicate_keys = set(new_content.resources.keys()) & set(resources.keys()) | 155 duplicate_keys = set(content.resources.keys()) & set(resources.keys()) | 
| 124 if len(duplicate_keys) != 0: | 156 if duplicate_keys: | 
| 125 raise exceptions.KeyError("Duplicate keys: " + str(list(duplicate_keys))) | 157 raise exceptions.KeyError('Duplicate keys: ' + str(list(duplicate_keys))) | 
| 126 | 158 | 
| 127 # Make sure encoding is consistent. | 159 # Make sure encoding is consistent. | 
| 128 if encoding in (None, BINARY): | 160 if encoding in (None, BINARY): | 
| 129 encoding = new_content.encoding | 161 encoding = content.encoding | 
| 130 elif new_content.encoding not in (BINARY, encoding): | 162 elif content.encoding not in (BINARY, encoding): | 
| 131 raise exceptions.KeyError("Inconsistent encodings: " + | 163 raise exceptions.KeyError('Inconsistent encodings: ' + str(encoding) + | 
| 132 str(encoding) + " vs " + | 164 ' vs ' + str(content.encoding)) | 
| 133 str(new_content.encoding)) | |
| 134 | 165 | 
| 135 resources.update(new_content.resources) | 166 if whitelist: | 
| 167 whitelisted_resources = dict([(key, content.resources[key]) | |
| 168 for key in content.resources.keys() | |
| 169 if key in whitelist]) | |
| 170 resources.update(whitelisted_resources) | |
| 171 else: | |
| 172 resources.update(content.resources) | |
| 136 | 173 | 
| 137 # Encoding is 0 for BINARY, 1 for UTF8 and 2 for UTF16 | 174 # Encoding is 0 for BINARY, 1 for UTF8 and 2 for UTF16 | 
| 138 if encoding is None: | 175 if encoding is None: | 
| 139 encoding = BINARY | 176 encoding = BINARY | 
| 140 WriteDataPack(resources, output_file, encoding) | 177 return DataPackContents(resources, encoding) | 
| 141 | 178 | 
| 142 | 179 | 
| 143 # Temporary hack for external programs that import data_pack. | 180 # Temporary hack for external programs that import data_pack. | 
| 144 # TODO(benrg): Remove this. | 181 # TODO(benrg): Remove this. | 
| 145 class DataPack(object): | 182 class DataPack(object): | 
| 146 pass | 183 pass | 
| 147 DataPack.ReadDataPack = staticmethod(ReadDataPack) | 184 DataPack.ReadDataPack = staticmethod(ReadDataPack) | 
| 148 DataPack.WriteDataPackToString = staticmethod(WriteDataPackToString) | 185 DataPack.WriteDataPackToString = staticmethod(WriteDataPackToString) | 
| 149 DataPack.WriteDataPack = staticmethod(WriteDataPack) | 186 DataPack.WriteDataPack = staticmethod(WriteDataPack) | 
| 150 DataPack.RePack = staticmethod(RePack) | 187 DataPack.RePack = staticmethod(RePack) | 
| 151 | 188 | 
| 152 | 189 | 
| 153 def main(): | 190 def main(): | 
| 154 if len(sys.argv) > 1: | 191 if len(sys.argv) > 1: | 
| 155 # When an argument is given, read and explode the file to text | 192 # When an argument is given, read and explode the file to text | 
| 156 # format, for easier diffing. | 193 # format, for easier diffing. | 
| 157 data = ReadDataPack(sys.argv[1]) | 194 data = ReadDataPack(sys.argv[1]) | 
| 158 print data.encoding | 195 print data.encoding | 
| 159 for (resource_id, text) in data.resources.iteritems(): | 196 for (resource_id, text) in data.resources.iteritems(): | 
| 160 print "%s: %s" % (resource_id, text) | 197 print '%s: %s' % (resource_id, text) | 
| 161 else: | 198 else: | 
| 162 # Just write a simple file. | 199 # Just write a simple file. | 
| 163 data = { 1: "", 4: "this is id 4", 6: "this is id 6", 10: "" } | 200 data = {1: '', 4: 'this is id 4', 6: 'this is id 6', 10: ''} | 
| 164 WriteDataPack(data, "datapack1.pak", UTF8) | 201 WriteDataPack(data, 'datapack1.pak', UTF8) | 
| 165 data2 = { 1000: "test", 5: "five" } | 202 data2 = {1000: 'test', 5: 'five'} | 
| 166 WriteDataPack(data2, "datapack2.pak", UTF8) | 203 WriteDataPack(data2, 'datapack2.pak', UTF8) | 
| 167 print "wrote datapack1 and datapack2 to current directory." | 204 print 'wrote datapack1 and datapack2 to current directory.' | 
| 168 | 205 | 
| 169 | 206 | 
| 170 if __name__ == '__main__': | 207 if __name__ == '__main__': | 
| 171 main() | 208 main() | 
| OLD | NEW |