Index: grit/tool/xmb.py |
diff --git a/grit/tool/xmb.py b/grit/tool/xmb.py |
index aaefeecad4b54b554402fb21049d89f22ea30072..0e7950ccde1e237eec79b5e115ff9d7e95ce26f1 100644 |
--- a/grit/tool/xmb.py |
+++ b/grit/tool/xmb.py |
@@ -28,8 +28,10 @@ _XML_QUOTE_ESCAPES = { |
u"'": u''', |
u'"': u'"', |
} |
+# See http://www.w3.org/TR/xml/#charsets |
_XML_BAD_CHAR_REGEX = lazy_re.compile(u'[^\u0009\u000A\u000D' |
- u'\u0020-\uD7FF\uE000-\uFFFD]') |
+ u'\u0020-\uD7FF\uE000-\uFFFD' |
+ u'\U00010000-\U0010FFFF]') |
def _XmlEscape(s): |
@@ -40,7 +42,11 @@ def _XmlEscape(s): |
if not type(s) == unicode: |
s = unicode(s) |
result = saxutils.escape(s, _XML_QUOTE_ESCAPES) |
- return _XML_BAD_CHAR_REGEX.sub(u'', result).encode('utf-8') |
+ illegal_chars = _XML_BAD_CHAR_REGEX.search(result) |
+ if illegal_chars: |
newt (away)
2015/11/10 17:22:52
All of Chrome's grd files pass this stricter error
|
+ raise Exception('String contains characters disallowed in XML: %s' % |
+ repr(result)) |
+ return result.encode('utf-8') |
def _WriteAttribute(file, name, value): |