grit/format/data_pack.py - Issue 217273003: Add whitelist support to repack.

Unified Diff: grit/format/data_pack.py

Issue 217273003: Add whitelist support to repack. (Closed) Base URL: https://chromium.googlesource.com/external/grit-i18n.git@master

Patch Set: Whitespace fix Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: grit/format/data_pack.py

diff --git a/grit/format/data_pack.py b/grit/format/data_pack.py

index 0cdbbd8d341d442d16d2f51ef9d441ff0da60282..779a8624517ec054a15213d220938e7475f6444e 100755

--- a/grit/format/data_pack.py

+++ b/grit/format/data_pack.py

@@ -3,9 +3,9 @@

# Use of this source code is governed by a BSD-style license that can be

# found in the LICENSE file.

-'''Support for formatting a data pack file used for platform agnostic resource

+"""Support for formatting a data pack file used for platform agnostic resource

files.

-'''

+"""

import collections

import exceptions

@@ -19,7 +19,6 @@ from grit import util

from grit.node import include

from grit.node import message

from grit.node import structure

-from grit.node import misc

PACK_FILE_VERSION = 4

@@ -37,7 +36,7 @@ DataPackContents = collections.namedtuple(

def Format(root, lang='en', output_dir='.'):

- '''Writes out the data pack file format (platform agnostic resource file).'''

+ """Writes out the data pack file format (platform agnostic resource file)."""

data = {}

for node in root.ActiveDescendants():

with node:

@@ -55,9 +54,9 @@ def ReadDataPack(input_file):

original_data = data

# Read the header.

- version, num_entries, encoding = struct.unpack("<IIB", data[:HEADER_LENGTH])

+ version, num_entries, encoding = struct.unpack('<IIB', data[:HEADER_LENGTH])

if version != PACK_FILE_VERSION:

- print "Wrong file version in ", input_file

+ print 'Wrong file version in ', input_file

raise WrongFileVersion

resources = {}

@@ -68,22 +67,21 @@ def ReadDataPack(input_file):

data = data[HEADER_LENGTH:]

kIndexEntrySize = 2 + 4 # Each entry is a uint16 and a uint32.

for _ in range(num_entries):

- id, offset = struct.unpack("<HI", data[:kIndexEntrySize])

+ id, offset = struct.unpack('<HI', data[:kIndexEntrySize])

data = data[kIndexEntrySize:]

- next_id, next_offset = struct.unpack("<HI", data[:kIndexEntrySize])

+ next_id, next_offset = struct.unpack('<HI', data[:kIndexEntrySize])

resources[id] = original_data[offset:next_offset]

return DataPackContents(resources, encoding)

def WriteDataPackToString(resources, encoding):

- """Write a map of id=>data into a string in the data pack format and return

- it."""

+ """Returns a string with a map of id=>data in the data pack format."""

ids = sorted(resources.keys())

ret = []

# Write file header.

- ret.append(struct.pack("<IIB", PACK_FILE_VERSION, len(ids), encoding))

+ ret.append(struct.pack('<IIB', PACK_FILE_VERSION, len(ids), encoding))

HEADER_LENGTH = 2 * 4 + 1 # Two uint32s and one uint8.

# Each entry is a uint16 + a uint32s. We have one extra entry for the last

@@ -93,10 +91,10 @@ def WriteDataPackToString(resources, encoding):

# Write index.

data_offset = HEADER_LENGTH + index_length

for id in ids:

- ret.append(struct.pack("<HI", id, data_offset))

+ ret.append(struct.pack('<HI', id, data_offset))

data_offset += len(resources[id])

- ret.append(struct.pack("<HI", 0, data_offset))

+ ret.append(struct.pack('<HI', 0, data_offset))

# Write data.

for id in ids:

@@ -105,39 +103,78 @@ def WriteDataPackToString(resources, encoding):

def WriteDataPack(resources, output_file, encoding):

- """Write a map of id=>data into output_file as a data pack."""

+ """Writes a map of id=>data into output_file as a data pack."""

content = WriteDataPackToString(resources, encoding)

- with open(output_file, "wb") as file:

+ with open(output_file, 'wb') as file:

file.write(content)

-def RePack(output_file, input_files):

- """Write a new data pack to |output_file| based on a list of filenames

- (|input_files|)"""

+def RePack(output_file, input_files, whitelist_file=None):

+ """Write a new data pack file by combining input pack files.

+ Args:

+ output_file: path to the new data pack file.

+ input_files: a list of paths to the data pack files to combine.

+ whitelist_file: path to the file that contains the list of resource IDs

+ that should be kept in the output file or None to include

+ all resources.

+ Raises:

+ KeyError: if there are duplicate keys or resource encoding is

+ inconsistent.

+ """

+ input_data_packs = [ReadDataPack(filename) for filename in input_files]

+ whitelist = None

+ if whitelist_file:

+ whitelist = util.ReadFile(whitelist_file, util.RAW_TEXT).strip().split('\n')

+ whitelist = map(int, whitelist)

Nico 2014/04/21 18:21:03 Should this be whitelist = set(map(int, whiteli

+ resources, encoding = RePackFromDataPackStrings(input_data_packs, whitelist)

+ WriteDataPack(resources, output_file, encoding)

+def RePackFromDataPackStrings(inputs, whitelist):

+ """Returns a data pack string that combines the resources from inputs.

+ Args:

+ inputs: a list of data pack strings that need to be combined.

+ whitelist: a list of resource IDs that should be kep in the output string

+ or None to include all resources.

+ Returns:

+ DataPackContents: a tuple containing the new combined data pack and its

+ encoding.

+ Raises:

+ KeyError: if there are duplicate keys or resource encoding is

+ inconsistent.

+ """

resources = {}

encoding = None

- for filename in input_files:

- new_content = ReadDataPack(filename)

+ for content in inputs:

# Make sure we have no dups.

- duplicate_keys = set(new_content.resources.keys()) & set(resources.keys())

- if len(duplicate_keys) != 0:

- raise exceptions.KeyError("Duplicate keys: " + str(list(duplicate_keys)))

+ duplicate_keys = set(content.resources.keys()) & set(resources.keys())

+ if duplicate_keys:

+ raise exceptions.KeyError('Duplicate keys: ' + str(list(duplicate_keys)))

# Make sure encoding is consistent.

if encoding in (None, BINARY):

- encoding = new_content.encoding

- elif new_content.encoding not in (BINARY, encoding):

- raise exceptions.KeyError("Inconsistent encodings: " +

- str(encoding) + " vs " +

- str(new_content.encoding))

- resources.update(new_content.resources)

+ encoding = content.encoding

+ elif content.encoding not in (BINARY, encoding):

+ raise exceptions.KeyError('Inconsistent encodings: ' + str(encoding) +

+ ' vs ' + str(content.encoding))

+ if whitelist:

+ whitelisted_resources = dict([(key, content.resources[key])

+ for key in content.resources.keys()

+ if key in whitelist])

+ resources.update(whitelisted_resources)

+ else:

+ resources.update(content.resources)

# Encoding is 0 for BINARY, 1 for UTF8 and 2 for UTF16

if encoding is None:

encoding = BINARY

- WriteDataPack(resources, output_file, encoding)

+ return DataPackContents(resources, encoding)

# Temporary hack for external programs that import data_pack.

@@ -157,14 +194,14 @@ def main():

data = ReadDataPack(sys.argv[1])

print data.encoding

for (resource_id, text) in data.resources.iteritems():

- print "%s: %s" % (resource_id, text)

+ print '%s: %s' % (resource_id, text)

else:

# Just write a simple file.

- data = { 1: "", 4: "this is id 4", 6: "this is id 6", 10: "" }

- WriteDataPack(data, "datapack1.pak", UTF8)

- data2 = { 1000: "test", 5: "five" }

- WriteDataPack(data2, "datapack2.pak", UTF8)

- print "wrote datapack1 and datapack2 to current directory."

+ data = {1: '', 4: 'this is id 4', 6: 'this is id 6', 10: ''}

+ WriteDataPack(data, 'datapack1.pak', UTF8)

+ data2 = {1000: 'test', 5: 'five'}

+ WriteDataPack(data2, 'datapack2.pak', UTF8)

+ print 'wrote datapack1 and datapack2 to current directory.'

if __name__ == '__main__':

« no previous file with comments | « no previous file | grit/format/data_pack_unittest.py » ('j') | no next file with comments »