grit/format/data_pack.py - Issue 217273003: Add whitelist support to repack.

Side by Side Diff: grit/format/data_pack.py

Issue 217273003: Add whitelist support to repack. (Closed) Base URL: https://chromium.googlesource.com/external/grit-i18n.git@master

Patch Set: Created 6 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 #!/usr/bin/env python	1 #!/usr/bin/env python

2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.	2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 '''Support for formatting a data pack file used for platform agnostic resource	6 """Support for formatting a data pack file used for platform agnostic resource

7 files.	7 files.

8 '''	8 """

9	9

10 import collections	10 import collections

11 import exceptions	11 import exceptions

12 import os	12 import os

13 import struct	13 import struct

14 import sys	14 import sys

15 if __name__ == '__main__':	15 if __name__ == '__main__':

16 sys.path.append(os.path.join(os.path.dirname(__file__), '../..'))	16 sys.path.append(os.path.join(os.path.dirname(__file__), '../..'))

17	17

18 from grit import util	18 from grit import util

19 from grit.node import include	19 from grit.node import include

20 from grit.node import message	20 from grit.node import message

21 from grit.node import structure	21 from grit.node import structure

22 from grit.node import misc

23	22

24	23

25 PACK_FILE_VERSION = 4	24 PACK_FILE_VERSION = 4

26 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s. (file version, number of entries) and	25 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s. (file version, number of entries) and

27 # one uint8 (encoding of text resources)	26 # one uint8 (encoding of text resources)

28 BINARY, UTF8, UTF16 = range(3)	27 BINARY, UTF8, UTF16 = range(3)

29	28

30	29

31 class WrongFileVersion(Exception):	30 class WrongFileVersion(Exception):

32 pass	31 pass

33	32

34	33

35 DataPackContents = collections.namedtuple(	34 DataPackContents = collections.namedtuple(

36 'DataPackContents', 'resources encoding')	35 'DataPackContents', 'resources encoding')

37	36

38	37

39 def Format(root, lang='en', output_dir='.'):	38 def Format(root, lang='en', output_dir='.'):

40 '''Writes out the data pack file format (platform agnostic resource file).'''	39 """Writes out the data pack file format (platform agnostic resource file)."""

41 data = {}	40 data = {}

42 for node in root.ActiveDescendants():	41 for node in root.ActiveDescendants():

43 with node:	42 with node:

44 if isinstance(node, (include.IncludeNode, message.MessageNode,	43 if isinstance(node, (include.IncludeNode, message.MessageNode,

45 structure.StructureNode)):	44 structure.StructureNode)):

46 id, value = node.GetDataPackPair(lang, UTF8)	45 id, value = node.GetDataPackPair(lang, UTF8)

47 if value is not None:	46 if value is not None:

48 data[id] = value	47 data[id] = value

49 return WriteDataPackToString(data, UTF8)	48 return WriteDataPackToString(data, UTF8)

50	49

51	50

52 def ReadDataPack(input_file):	51 def ReadDataPack(input_file):

53 """Reads a data pack file and returns a dictionary."""	52 """Reads a data pack file and returns a dictionary."""

54 data = util.ReadFile(input_file, util.BINARY)	53 data = util.ReadFile(input_file, util.BINARY)

55 original_data = data	54 original_data = data

56	55

57 # Read the header.	56 # Read the header.

58 version, num_entries, encoding = struct.unpack("<IIB", data[:HEADER_LENGTH])	57 version, num_entries, encoding = struct.unpack('<IIB', data[:HEADER_LENGTH])

59 if version != PACK_FILE_VERSION:	58 if version != PACK_FILE_VERSION:

60 print "Wrong file version in ", input_file	59 print 'Wrong file version in ', input_file

61 raise WrongFileVersion	60 raise WrongFileVersion

62	61

63 resources = {}	62 resources = {}

64 if num_entries == 0:	63 if num_entries == 0:

65 return DataPackContents(resources, encoding)	64 return DataPackContents(resources, encoding)

66	65

67 # Read the index and data.	66 # Read the index and data.

68 data = data[HEADER_LENGTH:]	67 data = data[HEADER_LENGTH:]

69 kIndexEntrySize = 2 + 4 # Each entry is a uint16 and a uint32.	68 kIndexEntrySize = 2 + 4 # Each entry is a uint16 and a uint32.

70 for _ in range(num_entries):	69 for _ in range(num_entries):

71 id, offset = struct.unpack("<HI", data[:kIndexEntrySize])	70 id, offset = struct.unpack('<HI', data[:kIndexEntrySize])

72 data = data[kIndexEntrySize:]	71 data = data[kIndexEntrySize:]

73 next_id, next_offset = struct.unpack("<HI", data[:kIndexEntrySize])	72 next_id, next_offset = struct.unpack('<HI', data[:kIndexEntrySize])

74 resources[id] = original_data[offset:next_offset]	73 resources[id] = original_data[offset:next_offset]

75	74

76 return DataPackContents(resources, encoding)	75 return DataPackContents(resources, encoding)

77	76

78	77

79 def WriteDataPackToString(resources, encoding):	78 def WriteDataPackToString(resources, encoding):

80 """Write a map of id=>data into a string in the data pack format and return	79 """Return a string with a map of id=>data in the data pack format."""
	newt (away) 2014/03/28 19:25:34 nit: Returns nit: Returns aurimas (slooooooooow) 2014/03/28 20:45:40 Done. Show quoted text On 2014/03/28 19:25:34, newt wrote: > nit: Returns Done.
81 it."""

82 ids = sorted(resources.keys())	80 ids = sorted(resources.keys())

83 ret = []	81 ret = []

84	82

85 # Write file header.	83 # Write file header.

86 ret.append(struct.pack("<IIB", PACK_FILE_VERSION, len(ids), encoding))	84 ret.append(struct.pack('<IIB', PACK_FILE_VERSION, len(ids), encoding))

87 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s and one uint8.	85 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s and one uint8.

88	86

89 # Each entry is a uint16 + a uint32s. We have one extra entry for the last	87 # Each entry is a uint16 + a uint32s. We have one extra entry for the last

90 # item.	88 # item.

91 index_length = (len(ids) + 1) * (2 + 4)	89 index_length = (len(ids) + 1) * (2 + 4)

92	90

93 # Write index.	91 # Write index.

94 data_offset = HEADER_LENGTH + index_length	92 data_offset = HEADER_LENGTH + index_length

95 for id in ids:	93 for id in ids:

96 ret.append(struct.pack("<HI", id, data_offset))	94 ret.append(struct.pack('<HI', id, data_offset))

97 data_offset += len(resources[id])	95 data_offset += len(resources[id])

98	96

99 ret.append(struct.pack("<HI", 0, data_offset))	97 ret.append(struct.pack('<HI', 0, data_offset))

100	98

101 # Write data.	99 # Write data.

102 for id in ids:	100 for id in ids:

103 ret.append(resources[id])	101 ret.append(resources[id])

104 return ''.join(ret)	102 return ''.join(ret)

105	103

106	104

107 def WriteDataPack(resources, output_file, encoding):	105 def WriteDataPack(resources, output_file, encoding):

108 """Write a map of id=>data into output_file as a data pack."""	106 """Write a map of id=>data into output_file as a data pack."""

109 content = WriteDataPackToString(resources, encoding)	107 content = WriteDataPackToString(resources, encoding)

110 with open(output_file, "wb") as file:	108 with open(output_file, 'wb') as file:

111 file.write(content)	109 file.write(content)

112	110

113	111

114 def RePack(output_file, input_files):	112 def RePack(output_file, input_files, whitelist_file=None):

115 """Write a new data pack to \|output_file\| based on a list of filenames	113 """Write a new data pack file by combining input pack files.

116 (\|input_files\|)"""	114

	115 Args:

	116 output_file: path to the new data pack file.

	117 input_files: a list of paths to the data pack files to combine.

	118 whitelist_file: path to the file that contains the list of resource IDs

	119 that should be kept in the output file.
	newt (away) 2014/03/28 19:25:34 "or None, to include all resources" "or None, to include all resources" aurimas (slooooooooow) 2014/03/28 20:45:40 Done. Show quoted text On 2014/03/28 19:25:34, newt wrote: > "or None, to include all resources" Done.
	120

	121 Raises:

	122 KeyError: if there are duplicate keys or resource encoding is

	123 inconsistent.

	124 """

117 resources = {}	125 resources = {}

118 encoding = None	126 encoding = None

119 for filename in input_files:	127 for filename in input_files:

120 new_content = ReadDataPack(filename)	128 new_content = ReadDataPack(filename)

121	129

122 # Make sure we have no dups.	130 # Make sure we have no dups.

123 duplicate_keys = set(new_content.resources.keys()) & set(resources.keys())	131 duplicate_keys = set(new_content.resources.keys()) & set(resources.keys())

124 if len(duplicate_keys) != 0:	132 if duplicate_keys:

125 raise exceptions.KeyError("Duplicate keys: " + str(list(duplicate_keys)))	133 raise exceptions.KeyError('Duplicate keys: ' + str(list(duplicate_keys)))

126	134

127 # Make sure encoding is consistent.	135 # Make sure encoding is consistent.

128 if encoding in (None, BINARY):	136 if encoding in (None, BINARY):

129 encoding = new_content.encoding	137 encoding = new_content.encoding

130 elif new_content.encoding not in (BINARY, encoding):	138 elif new_content.encoding not in (BINARY, encoding):

131 raise exceptions.KeyError("Inconsistent encodings: " +	139 raise exceptions.KeyError('Inconsistent encodings: ' +

132 str(encoding) + " vs " +	140 str(encoding) + ' vs ' +

133 str(new_content.encoding))	141 str(new_content.encoding))

134	142

135 resources.update(new_content.resources)	143 if whitelist_file:

	144 whitelist = util.ReadFile(whitelist_file,

	145 util.RAW_TEXT).strip().split('\n')

	146 whitelist = map(int, whitelist)

	147 whitelisted_resources = {key: new_content.resources[key] for key in
	newt (away) 2014/03/28 19:25:34 use something like this, instead of dict comprehen use something like this, instead of dict comprehension: dict([(n, nn) for n in range(10)]) aurimas (slooooooooow)* 2014/03/28 20:45:40 Done. Show quoted text On 2014/03/28 19:25:34, newt wrote: > use something like this, instead of dict comprehension: dict([(n, n*n) for n in > range(10)]) Done.
	148 new_content.resources.keys() if key in whitelist}

	149 resources.update(whitelisted_resources)

	150 else:

	151 resources.update(new_content.resources)

136	152

137 # Encoding is 0 for BINARY, 1 for UTF8 and 2 for UTF16	153 # Encoding is 0 for BINARY, 1 for UTF8 and 2 for UTF16

138 if encoding is None:	154 if encoding is None:

139 encoding = BINARY	155 encoding = BINARY

140 WriteDataPack(resources, output_file, encoding)	156 WriteDataPack(resources, output_file, encoding)

141	157

142	158

143 # Temporary hack for external programs that import data_pack.	159 # Temporary hack for external programs that import data_pack.

144 # TODO(benrg): Remove this.	160 # TODO(benrg): Remove this.

145 class DataPack(object):	161 class DataPack(object):

146 pass	162 pass

147 DataPack.ReadDataPack = staticmethod(ReadDataPack)	163 DataPack.ReadDataPack = staticmethod(ReadDataPack)

148 DataPack.WriteDataPackToString = staticmethod(WriteDataPackToString)	164 DataPack.WriteDataPackToString = staticmethod(WriteDataPackToString)

149 DataPack.WriteDataPack = staticmethod(WriteDataPack)	165 DataPack.WriteDataPack = staticmethod(WriteDataPack)

150 DataPack.RePack = staticmethod(RePack)	166 DataPack.RePack = staticmethod(RePack)

151	167

152	168

153 def main():	169 def main():

154 if len(sys.argv) > 1:	170 if len(sys.argv) > 1:

155 # When an argument is given, read and explode the file to text	171 # When an argument is given, read and explode the file to text

156 # format, for easier diffing.	172 # format, for easier diffing.

157 data = ReadDataPack(sys.argv[1])	173 data = ReadDataPack(sys.argv[1])

158 print data.encoding	174 print data.encoding

159 for (resource_id, text) in data.resources.iteritems():	175 for (resource_id, text) in data.resources.iteritems():

160 print "%s: %s" % (resource_id, text)	176 print '%s: %s' % (resource_id, text)

161 else:	177 else:

162 # Just write a simple file.	178 # Just write a simple file.

163 data = { 1: "", 4: "this is id 4", 6: "this is id 6", 10: "" }	179 data = {1: '', 4: 'this is id 4', 6: 'this is id 6', 10: ''}

164 WriteDataPack(data, "datapack1.pak", UTF8)	180 WriteDataPack(data, 'datapack1.pak', UTF8)

165 data2 = { 1000: "test", 5: "five" }	181 data2 = {1000: 'test', 5: 'five'}

166 WriteDataPack(data2, "datapack2.pak", UTF8)	182 WriteDataPack(data2, 'datapack2.pak', UTF8)

167 print "wrote datapack1 and datapack2 to current directory."	183 print 'wrote datapack1 and datapack2 to current directory.'

168	184

169	185

170 if __name__ == '__main__':	186 if __name__ == '__main__':

171 main()	187 main()

OLD	NEW

« no previous file with comments | « no previous file | grit/format/repack.py » ('j') | no next file with comments »