OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/python2.4 |
| 2 # Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 6 '''Fast and efficient parser for XTB files. |
| 7 ''' |
| 8 |
| 9 |
| 10 import sys |
| 11 import xml.sax |
| 12 import xml.sax.handler |
| 13 |
| 14 |
| 15 class XtbContentHandler(xml.sax.handler.ContentHandler): |
| 16 '''A content handler that calls a given callback function for each |
| 17 translation in the XTB file. |
| 18 ''' |
| 19 |
| 20 def __init__(self, callback, defs=None, debug=False): |
| 21 self.callback = callback |
| 22 self.debug = debug |
| 23 # 0 if we are not currently parsing a translation, otherwise the message |
| 24 # ID of that translation. |
| 25 self.current_id = 0 |
| 26 # Empty if we are not currently parsing a translation, otherwise the |
| 27 # parts we have for that translation - a list of tuples |
| 28 # (is_placeholder, text) |
| 29 self.current_structure = [] |
| 30 # Set to the language ID when we see the <translationbundle> node. |
| 31 self.language = '' |
| 32 # Keep track of the if block we're inside. We can't nest ifs. |
| 33 self.if_expr = None |
| 34 # Root defines to be used with if expr. |
| 35 if defs: |
| 36 self.defines = defs |
| 37 else: |
| 38 self.defines = {} |
| 39 |
| 40 def startElement(self, name, attrs): |
| 41 if name == 'translation': |
| 42 assert self.current_id == 0 and len(self.current_structure) == 0, ( |
| 43 "Didn't expect a <translation> element here.") |
| 44 self.current_id = attrs.getValue('id') |
| 45 elif name == 'ph': |
| 46 assert self.current_id != 0, "Didn't expect a <ph> element here." |
| 47 self.current_structure.append((True, attrs.getValue('name'))) |
| 48 elif name == 'translationbundle': |
| 49 self.language = attrs.getValue('lang') |
| 50 elif name == 'if': |
| 51 assert self.if_expr is None, "Can't nest <if> in xtb files" |
| 52 self.if_expr = attrs.getValue('expr') |
| 53 |
| 54 def endElement(self, name): |
| 55 if name == 'translation': |
| 56 assert self.current_id != 0 |
| 57 |
| 58 defs = self.defines |
| 59 def pp_ifdef(define): |
| 60 return define in defs |
| 61 def pp_if(define): |
| 62 return define in defs and defs[define] |
| 63 |
| 64 # If we're in an if block, only call the callback (add the translation) |
| 65 # if the expression is True. |
| 66 should_run_callback = True |
| 67 if self.if_expr: |
| 68 should_run_callback = eval(self.if_expr, {}, |
| 69 {'os': sys.platform, |
| 70 'defs' : defs, |
| 71 'pp_ifdef' : pp_ifdef, |
| 72 'pp_if' : pp_if}) |
| 73 if should_run_callback: |
| 74 self.callback(self.current_id, self.current_structure) |
| 75 |
| 76 self.current_id = 0 |
| 77 self.current_structure = [] |
| 78 elif name == 'if': |
| 79 assert self.if_expr is not None |
| 80 self.if_expr = None |
| 81 |
| 82 def characters(self, content): |
| 83 if self.current_id != 0: |
| 84 # We are inside a <translation> node so just add the characters to our |
| 85 # structure. |
| 86 # |
| 87 # This naive way of handling characters is OK because in the XTB format, |
| 88 # <ph> nodes are always empty (always <ph name="XXX"/>) and whitespace |
| 89 # inside the <translation> node should be preserved. |
| 90 self.current_structure.append((False, content)) |
| 91 |
| 92 |
| 93 class XtbErrorHandler(xml.sax.handler.ErrorHandler): |
| 94 def error(self, exception): |
| 95 pass |
| 96 |
| 97 def fatalError(self, exception): |
| 98 raise exception |
| 99 |
| 100 def warning(self, exception): |
| 101 pass |
| 102 |
| 103 |
| 104 def Parse(xtb_file, callback_function, defs={}, debug=False): |
| 105 '''Parse xtb_file, making a call to callback_function for every translation |
| 106 in the XTB file. |
| 107 |
| 108 The callback function must have the signature as described below. The 'parts' |
| 109 parameter is a list of tuples (is_placeholder, text). The 'text' part is |
| 110 either the raw text (if is_placeholder is False) or the name of the placeholde
r |
| 111 (if is_placeholder is True). |
| 112 |
| 113 Args: |
| 114 xtb_file: file('fr.xtb') |
| 115 callback_function: def Callback(msg_id, parts): pass |
| 116 |
| 117 Return: |
| 118 The language of the XTB, e.g. 'fr' |
| 119 ''' |
| 120 # Start by advancing the file pointer past the DOCTYPE thing, as the TC |
| 121 # uses a path to the DTD that only works in Unix. |
| 122 # TODO(joi) Remove this ugly hack by getting the TC gang to change the |
| 123 # XTB files somehow? |
| 124 front_of_file = xtb_file.read(1024) |
| 125 xtb_file.seek(front_of_file.find('<translationbundle')) |
| 126 |
| 127 handler = XtbContentHandler(callback=callback_function, defs=defs, |
| 128 debug=debug) |
| 129 xml.sax.parse(xtb_file, handler) |
| 130 assert handler.language != '' |
| 131 return handler.language |
| 132 |
OLD | NEW |