build/win/message_compiler.py - Issue 2523593002: Strip invalid utf-8 characters from mc.exe header output

Unified Diff: build/win/message_compiler.py

Issue 2523593002: Strip invalid utf-8 characters from mc.exe header output (Closed)

Patch Set: Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: build/win/message_compiler.py

diff --git a/build/win/message_compiler.py b/build/win/message_compiler.py

index 86aba4f93009f0a8cee7c1eee3deaf2962ddc1c2..b6fd03ec8ae7d3c86d0da1d436ac7920659e0f27 100644

--- a/build/win/message_compiler.py

+++ b/build/win/message_compiler.py

@@ -7,22 +7,57 @@

# Usage: message_compiler.py <environment_file> [<args to mc.exe>*]

+import os

import subprocess

import sys

-# Read the environment block from the file. This is stored in the format used

-# by CreateProcess. Drop last 2 NULs, one for list terminator, one for trailing

-# vs. separator.

-env_pairs = open(sys.argv[1]).read()[:-2].split('\0')

-env_dict = dict([item.split('=', 1) for item in env_pairs])

+def main():

+ # Read the environment block from the file. This is stored in the format used

+ # by CreateProcess. Drop last 2 NULs, one for list terminator, one for

+ # trailing vs. separator.

+ env_pairs = open(sys.argv[1]).read()[:-2].split('\0')

+ env_dict = dict([item.split('=', 1) for item in env_pairs])

-# mc writes to stderr, so this explicitly redirects to stdout and eats it.

-try:

- # This needs shell=True to search the path in env_dict for the mc executable.

- subprocess.check_output(["mc.exe"] + sys.argv[2:],

- env=env_dict,

- stderr=subprocess.STDOUT,

- shell=True)

-except subprocess.CalledProcessError as e:

- print e.output

- sys.exit(e.returncode)

+ # mc writes to stderr, so this explicitly redirects to stdout and eats it.

+ try:

+ # This needs shell=True to search the path in env_dict for the mc

+ # executable.

+ rest = sys.argv[2:]

+ subprocess.check_output(['mc.exe'] + rest,

+ env=env_dict,

+ stderr=subprocess.STDOUT,

+ shell=True)

+ # We require all source code (in particular, the header generated here) to

+ # be UTF-8. jinja can output the intermediate .mc file in UTF-8 or UTF-16LE.

+ # However, mc.exe only supports Unicode via the -u flag, and it assumes when

+ # that is specified that the input is UTF-16LE (and errors out on UTF-8

+ # files, assuming they're ANSI). Even with -u specified and UTF16-LE input,

+ # it generates an ANSI header, and includes broken versions of the message

+ # text in the comment before the value. To work around this, for any invalid

+ # // comment lines, we simply drop the line in the header after building it.

Nico 2016/11/22 17:45:21 (did you file an msconnect for getting support for

+ header_dir = None

+ input_file = None

+ for i, arg in enumerate(rest):

+ if arg == '-h' and len(rest) > i + 1:

+ assert header_dir == None

+ header_dir = rest[i + 1]

+ elif arg.endswith('.mc') or arg.endswith('.man'):

+ assert input_file == None

+ input_file = arg

+ if header_dir:

+ header_file = os.path.join(

+ header_dir, os.path.splitext(os.path.basename(input_file))[0] + '.h')

+ header_contents = []

+ with open(header_file, 'rb') as f:

+ for line in f.readlines():

+ if line.startswith('//') and '?' in line:

+ continue

+ header_contents.append(line)

+ with open(header_file, 'wb') as f:

+ f.write(''.join(header_contents))

+ except subprocess.CalledProcessError as e:

+ print e.output

+ sys.exit(e.returncode)

+if __name__ == '__main__':

+ main()

« no previous file with comments | « no previous file | no next file » | no next file with comments »