grit/format/data_pack.py - Issue 217273003: Add whitelist support to repack.

Side by Side Diff: grit/format/data_pack.py

Issue 217273003: Add whitelist support to repack. (Closed) Base URL: https://chromium.googlesource.com/external/grit-i18n.git@master

Patch Set: Whitespace fix Created 6 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 #!/usr/bin/env python	1 #!/usr/bin/env python

2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.	2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 '''Support for formatting a data pack file used for platform agnostic resource	6 """Support for formatting a data pack file used for platform agnostic resource

7 files.	7 files.

8 '''	8 """

9	9

10 import collections	10 import collections

11 import exceptions	11 import exceptions

12 import os	12 import os

13 import struct	13 import struct

14 import sys	14 import sys

15 if __name__ == '__main__':	15 if __name__ == '__main__':

16 sys.path.append(os.path.join(os.path.dirname(__file__), '../..'))	16 sys.path.append(os.path.join(os.path.dirname(__file__), '../..'))

17	17

18 from grit import util	18 from grit import util

19 from grit.node import include	19 from grit.node import include

20 from grit.node import message	20 from grit.node import message

21 from grit.node import structure	21 from grit.node import structure

22 from grit.node import misc

23	22

24	23

25 PACK_FILE_VERSION = 4	24 PACK_FILE_VERSION = 4

26 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s. (file version, number of entries) and	25 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s. (file version, number of entries) and

27 # one uint8 (encoding of text resources)	26 # one uint8 (encoding of text resources)

28 BINARY, UTF8, UTF16 = range(3)	27 BINARY, UTF8, UTF16 = range(3)

29	28

30	29

31 class WrongFileVersion(Exception):	30 class WrongFileVersion(Exception):

32 pass	31 pass

33	32

34	33

35 DataPackContents = collections.namedtuple(	34 DataPackContents = collections.namedtuple(

36 'DataPackContents', 'resources encoding')	35 'DataPackContents', 'resources encoding')

37	36

38	37

39 def Format(root, lang='en', output_dir='.'):	38 def Format(root, lang='en', output_dir='.'):

40 '''Writes out the data pack file format (platform agnostic resource file).'''	39 """Writes out the data pack file format (platform agnostic resource file)."""

41 data = {}	40 data = {}

42 for node in root.ActiveDescendants():	41 for node in root.ActiveDescendants():

43 with node:	42 with node:

44 if isinstance(node, (include.IncludeNode, message.MessageNode,	43 if isinstance(node, (include.IncludeNode, message.MessageNode,

45 structure.StructureNode)):	44 structure.StructureNode)):

46 id, value = node.GetDataPackPair(lang, UTF8)	45 id, value = node.GetDataPackPair(lang, UTF8)

47 if value is not None:	46 if value is not None:

48 data[id] = value	47 data[id] = value

49 return WriteDataPackToString(data, UTF8)	48 return WriteDataPackToString(data, UTF8)

50	49

51	50

52 def ReadDataPack(input_file):	51 def ReadDataPack(input_file):

53 """Reads a data pack file and returns a dictionary."""	52 """Reads a data pack file and returns a dictionary."""

54 data = util.ReadFile(input_file, util.BINARY)	53 data = util.ReadFile(input_file, util.BINARY)

55 original_data = data	54 original_data = data

56	55

57 # Read the header.	56 # Read the header.

58 version, num_entries, encoding = struct.unpack("<IIB", data[:HEADER_LENGTH])	57 version, num_entries, encoding = struct.unpack('<IIB', data[:HEADER_LENGTH])

59 if version != PACK_FILE_VERSION:	58 if version != PACK_FILE_VERSION:

60 print "Wrong file version in ", input_file	59 print 'Wrong file version in ', input_file

61 raise WrongFileVersion	60 raise WrongFileVersion

62	61

63 resources = {}	62 resources = {}

64 if num_entries == 0:	63 if num_entries == 0:

65 return DataPackContents(resources, encoding)	64 return DataPackContents(resources, encoding)

66	65

67 # Read the index and data.	66 # Read the index and data.

68 data = data[HEADER_LENGTH:]	67 data = data[HEADER_LENGTH:]

69 kIndexEntrySize = 2 + 4 # Each entry is a uint16 and a uint32.	68 kIndexEntrySize = 2 + 4 # Each entry is a uint16 and a uint32.

70 for _ in range(num_entries):	69 for _ in range(num_entries):

71 id, offset = struct.unpack("<HI", data[:kIndexEntrySize])	70 id, offset = struct.unpack('<HI', data[:kIndexEntrySize])

72 data = data[kIndexEntrySize:]	71 data = data[kIndexEntrySize:]

73 next_id, next_offset = struct.unpack("<HI", data[:kIndexEntrySize])	72 next_id, next_offset = struct.unpack('<HI', data[:kIndexEntrySize])

74 resources[id] = original_data[offset:next_offset]	73 resources[id] = original_data[offset:next_offset]

75	74

76 return DataPackContents(resources, encoding)	75 return DataPackContents(resources, encoding)

77	76

78	77

79 def WriteDataPackToString(resources, encoding):	78 def WriteDataPackToString(resources, encoding):

80 """Write a map of id=>data into a string in the data pack format and return	79 """Returns a string with a map of id=>data in the data pack format."""

81 it."""

82 ids = sorted(resources.keys())	80 ids = sorted(resources.keys())

83 ret = []	81 ret = []

84	82

85 # Write file header.	83 # Write file header.

86 ret.append(struct.pack("<IIB", PACK_FILE_VERSION, len(ids), encoding))	84 ret.append(struct.pack('<IIB', PACK_FILE_VERSION, len(ids), encoding))

87 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s and one uint8.	85 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s and one uint8.

88	86

89 # Each entry is a uint16 + a uint32s. We have one extra entry for the last	87 # Each entry is a uint16 + a uint32s. We have one extra entry for the last

90 # item.	88 # item.

91 index_length = (len(ids) + 1) * (2 + 4)	89 index_length = (len(ids) + 1) * (2 + 4)

92	90

93 # Write index.	91 # Write index.

94 data_offset = HEADER_LENGTH + index_length	92 data_offset = HEADER_LENGTH + index_length

95 for id in ids:	93 for id in ids:

96 ret.append(struct.pack("<HI", id, data_offset))	94 ret.append(struct.pack('<HI', id, data_offset))

97 data_offset += len(resources[id])	95 data_offset += len(resources[id])

98	96

99 ret.append(struct.pack("<HI", 0, data_offset))	97 ret.append(struct.pack('<HI', 0, data_offset))

100	98

101 # Write data.	99 # Write data.

102 for id in ids:	100 for id in ids:

103 ret.append(resources[id])	101 ret.append(resources[id])

104 return ''.join(ret)	102 return ''.join(ret)

105	103

106	104

107 def WriteDataPack(resources, output_file, encoding):	105 def WriteDataPack(resources, output_file, encoding):

108 """Write a map of id=>data into output_file as a data pack."""	106 """Writes a map of id=>data into output_file as a data pack."""

109 content = WriteDataPackToString(resources, encoding)	107 content = WriteDataPackToString(resources, encoding)

110 with open(output_file, "wb") as file:	108 with open(output_file, 'wb') as file:

111 file.write(content)	109 file.write(content)

112	110

113	111

114 def RePack(output_file, input_files):	112 def RePack(output_file, input_files, whitelist_file=None):

115 """Write a new data pack to \|output_file\| based on a list of filenames	113 """Write a new data pack file by combining input pack files.

116 (\|input_files\|)"""	114

	115 Args:

	116 output_file: path to the new data pack file.

	117 input_files: a list of paths to the data pack files to combine.

	118 whitelist_file: path to the file that contains the list of resource IDs

	119 that should be kept in the output file or None to include

	120 all resources.

	121

	122 Raises:

	123 KeyError: if there are duplicate keys or resource encoding is

	124 inconsistent.

	125 """

	126 input_data_packs = [ReadDataPack(filename) for filename in input_files]

	127 whitelist = None

	128 if whitelist_file:

	129 whitelist = util.ReadFile(whitelist_file, util.RAW_TEXT).strip().split('\n')

	130 whitelist = map(int, whitelist)
	Nico 2014/04/21 18:21:03 Should this be whitelist = set(map(int, whiteli Should this be whitelist = set(map(int, whitelist)) ? As is, the "if key in whitelist" below is a linear list scan I think, and there are ~5k elements in the list. A set might be faster.
	131 resources, encoding = RePackFromDataPackStrings(input_data_packs, whitelist)

	132 WriteDataPack(resources, output_file, encoding)

	133

	134

	135 def RePackFromDataPackStrings(inputs, whitelist):

	136 """Returns a data pack string that combines the resources from inputs.

	137

	138 Args:

	139 inputs: a list of data pack strings that need to be combined.

	140 whitelist: a list of resource IDs that should be kep in the output string

	141 or None to include all resources.

	142

	143 Returns:

	144 DataPackContents: a tuple containing the new combined data pack and its

	145 encoding.

	146

	147 Raises:

	148 KeyError: if there are duplicate keys or resource encoding is

	149 inconsistent.

	150 """

117 resources = {}	151 resources = {}

118 encoding = None	152 encoding = None

119 for filename in input_files:	153 for content in inputs:

120 new_content = ReadDataPack(filename)

121

122 # Make sure we have no dups.	154 # Make sure we have no dups.

123 duplicate_keys = set(new_content.resources.keys()) & set(resources.keys())	155 duplicate_keys = set(content.resources.keys()) & set(resources.keys())

124 if len(duplicate_keys) != 0:	156 if duplicate_keys:

125 raise exceptions.KeyError("Duplicate keys: " + str(list(duplicate_keys)))	157 raise exceptions.KeyError('Duplicate keys: ' + str(list(duplicate_keys)))

126	158

127 # Make sure encoding is consistent.	159 # Make sure encoding is consistent.

128 if encoding in (None, BINARY):	160 if encoding in (None, BINARY):

129 encoding = new_content.encoding	161 encoding = content.encoding

130 elif new_content.encoding not in (BINARY, encoding):	162 elif content.encoding not in (BINARY, encoding):

131 raise exceptions.KeyError("Inconsistent encodings: " +	163 raise exceptions.KeyError('Inconsistent encodings: ' + str(encoding) +

132 str(encoding) + " vs " +	164 ' vs ' + str(content.encoding))

133 str(new_content.encoding))

134	165

135 resources.update(new_content.resources)	166 if whitelist:

	167 whitelisted_resources = dict([(key, content.resources[key])

	168 for key in content.resources.keys()

	169 if key in whitelist])

	170 resources.update(whitelisted_resources)

	171 else:

	172 resources.update(content.resources)

136	173

137 # Encoding is 0 for BINARY, 1 for UTF8 and 2 for UTF16	174 # Encoding is 0 for BINARY, 1 for UTF8 and 2 for UTF16

138 if encoding is None:	175 if encoding is None:

139 encoding = BINARY	176 encoding = BINARY

140 WriteDataPack(resources, output_file, encoding)	177 return DataPackContents(resources, encoding)

141	178

142	179

143 # Temporary hack for external programs that import data_pack.	180 # Temporary hack for external programs that import data_pack.

144 # TODO(benrg): Remove this.	181 # TODO(benrg): Remove this.

145 class DataPack(object):	182 class DataPack(object):

146 pass	183 pass

147 DataPack.ReadDataPack = staticmethod(ReadDataPack)	184 DataPack.ReadDataPack = staticmethod(ReadDataPack)

148 DataPack.WriteDataPackToString = staticmethod(WriteDataPackToString)	185 DataPack.WriteDataPackToString = staticmethod(WriteDataPackToString)

149 DataPack.WriteDataPack = staticmethod(WriteDataPack)	186 DataPack.WriteDataPack = staticmethod(WriteDataPack)

150 DataPack.RePack = staticmethod(RePack)	187 DataPack.RePack = staticmethod(RePack)

151	188

152	189

153 def main():	190 def main():

154 if len(sys.argv) > 1:	191 if len(sys.argv) > 1:

155 # When an argument is given, read and explode the file to text	192 # When an argument is given, read and explode the file to text

156 # format, for easier diffing.	193 # format, for easier diffing.

157 data = ReadDataPack(sys.argv[1])	194 data = ReadDataPack(sys.argv[1])

158 print data.encoding	195 print data.encoding

159 for (resource_id, text) in data.resources.iteritems():	196 for (resource_id, text) in data.resources.iteritems():

160 print "%s: %s" % (resource_id, text)	197 print '%s: %s' % (resource_id, text)

161 else:	198 else:

162 # Just write a simple file.	199 # Just write a simple file.

163 data = { 1: "", 4: "this is id 4", 6: "this is id 6", 10: "" }	200 data = {1: '', 4: 'this is id 4', 6: 'this is id 6', 10: ''}

164 WriteDataPack(data, "datapack1.pak", UTF8)	201 WriteDataPack(data, 'datapack1.pak', UTF8)

165 data2 = { 1000: "test", 5: "five" }	202 data2 = {1000: 'test', 5: 'five'}

166 WriteDataPack(data2, "datapack2.pak", UTF8)	203 WriteDataPack(data2, 'datapack2.pak', UTF8)

167 print "wrote datapack1 and datapack2 to current directory."	204 print 'wrote datapack1 and datapack2 to current directory.'

168	205

169	206

170 if __name__ == '__main__':	207 if __name__ == '__main__':

171 main()	208 main()

OLD	NEW

« no previous file with comments | « no previous file | grit/format/data_pack_unittest.py » ('j') | no next file with comments »