Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1498)

Side by Side Diff: grit/util.py

Issue 1442863002: Remove contents of grit's SVN repository. (Closed) Base URL: http://grit-i18n.googlecode.com/svn/trunk/
Patch Set: Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « grit/tool/xmb_unittest.py ('k') | grit/util_unittest.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 '''Utilities used by GRIT.
7 '''
8
9 import codecs
10 import htmlentitydefs
11 import os
12 import re
13 import shutil
14 import sys
15 import tempfile
16 import time
17 import types
18 from xml.sax import saxutils
19
20 from grit import lazy_re
21
22 _root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
23
24
25 # Unique constants for use by ReadFile().
26 BINARY, RAW_TEXT = range(2)
27
28
29 # Unique constants representing data pack encodings.
30 _, UTF8, UTF16 = range(3)
31
32
33 def Encode(message, encoding):
34 '''Returns a byte stream that represents |message| in the given |encoding|.'''
35 # |message| is a python unicode string, so convert to a byte stream that
36 # has the correct encoding requested for the datapacks. We skip the first
37 # 2 bytes of text resources because it is the BOM.
38 if encoding == UTF8:
39 return message.encode('utf8')
40 if encoding == UTF16:
41 return message.encode('utf16')[2:]
42 # Default is BINARY
43 return message
44
45
46 # Matches all different types of linebreaks.
47 LINEBREAKS = re.compile('\r\n|\n|\r')
48
49 def MakeRelativePath(base_path, path_to_make_relative):
50 """Returns a relative path such from the base_path to
51 the path_to_make_relative.
52
53 In other words, os.join(base_path,
54 MakeRelativePath(base_path, path_to_make_relative))
55 is the same location as path_to_make_relative.
56
57 Args:
58 base_path: the root path
59 path_to_make_relative: an absolute path that is on the same drive
60 as base_path
61 """
62
63 def _GetPathAfterPrefix(prefix_path, path_with_prefix):
64 """Gets the subpath within in prefix_path for the path_with_prefix
65 with no beginning or trailing path separators.
66
67 Args:
68 prefix_path: the base path
69 path_with_prefix: a path that starts with prefix_path
70 """
71 assert path_with_prefix.startswith(prefix_path)
72 path_without_prefix = path_with_prefix[len(prefix_path):]
73 normalized_path = os.path.normpath(path_without_prefix.strip(os.path.sep))
74 if normalized_path == '.':
75 normalized_path = ''
76 return normalized_path
77
78 def _GetCommonBaseDirectory(*args):
79 """Returns the common prefix directory for the given paths
80
81 Args:
82 The list of paths (at least one of which should be a directory)
83 """
84 prefix = os.path.commonprefix(args)
85 # prefix is a character-by-character prefix (i.e. it does not end
86 # on a directory bound, so this code fixes that)
87
88 # if the prefix ends with the separator, then it is prefect.
89 if len(prefix) > 0 and prefix[-1] == os.path.sep:
90 return prefix
91
92 # We need to loop through all paths or else we can get
93 # tripped up by "c:\a" and "c:\abc". The common prefix
94 # is "c:\a" which is a directory and looks good with
95 # respect to the first directory but it is clear that
96 # isn't a common directory when the second path is
97 # examined.
98 for path in args:
99 assert len(path) >= len(prefix)
100 # If the prefix the same length as the path,
101 # then the prefix must be a directory (since one
102 # of the arguements should be a directory).
103 if path == prefix:
104 continue
105 # if the character after the prefix in the path
106 # is the separator, then the prefix appears to be a
107 # valid a directory as well for the given path
108 if path[len(prefix)] == os.path.sep:
109 continue
110 # Otherwise, the prefix is not a directory, so it needs
111 # to be shortened to be one
112 index_sep = prefix.rfind(os.path.sep)
113 # The use "index_sep + 1" because it includes the final sep
114 # and it handles the case when the index_sep is -1 as well
115 prefix = prefix[:index_sep + 1]
116 # At this point we backed up to a directory bound which is
117 # common to all paths, so we can quit going through all of
118 # the paths.
119 break
120 return prefix
121
122 prefix = _GetCommonBaseDirectory(base_path, path_to_make_relative)
123 # If the paths had no commonality at all, then return the absolute path
124 # because it is the best that can be done. If the path had to be relative
125 # then eventually this absolute path will be discovered (when a build breaks)
126 # and an appropriate fix can be made, but having this allows for the best
127 # backward compatibility with the absolute path behavior in the past.
128 if len(prefix) <= 0:
129 return path_to_make_relative
130 # Build a path from the base dir to the common prefix
131 remaining_base_path = _GetPathAfterPrefix(prefix, base_path)
132
133 # The follow handles two case: "" and "foo\\bar"
134 path_pieces = remaining_base_path.split(os.path.sep)
135 base_depth_from_prefix = len([d for d in path_pieces if len(d)])
136 base_to_prefix = (".." + os.path.sep) * base_depth_from_prefix
137
138 # Put add in the path from the prefix to the path_to_make_relative
139 remaining_other_path = _GetPathAfterPrefix(prefix, path_to_make_relative)
140 return base_to_prefix + remaining_other_path
141
142
143 KNOWN_SYSTEM_IDENTIFIERS = set()
144
145 SYSTEM_IDENTIFIERS = None
146
147 def SetupSystemIdentifiers(ids):
148 '''Adds ids to a regexp of known system identifiers.
149
150 Can be called many times, ids will be accumulated.
151
152 Args:
153 ids: an iterable of strings
154 '''
155 KNOWN_SYSTEM_IDENTIFIERS.update(ids)
156 global SYSTEM_IDENTIFIERS
157 SYSTEM_IDENTIFIERS = lazy_re.compile(
158 ' | '.join([r'\b%s\b' % i for i in KNOWN_SYSTEM_IDENTIFIERS]),
159 re.VERBOSE)
160
161
162 # Matches all of the resource IDs predefined by Windows.
163 SetupSystemIdentifiers((
164 'IDOK', 'IDCANCEL', 'IDC_STATIC', 'IDYES', 'IDNO',
165 'ID_FILE_NEW', 'ID_FILE_OPEN', 'ID_FILE_CLOSE', 'ID_FILE_SAVE',
166 'ID_FILE_SAVE_AS', 'ID_FILE_PAGE_SETUP', 'ID_FILE_PRINT_SETUP',
167 'ID_FILE_PRINT', 'ID_FILE_PRINT_DIRECT', 'ID_FILE_PRINT_PREVIEW',
168 'ID_FILE_UPDATE', 'ID_FILE_SAVE_COPY_AS', 'ID_FILE_SEND_MAIL',
169 'ID_FILE_MRU_FIRST', 'ID_FILE_MRU_LAST',
170 'ID_EDIT_CLEAR', 'ID_EDIT_CLEAR_ALL', 'ID_EDIT_COPY',
171 'ID_EDIT_CUT', 'ID_EDIT_FIND', 'ID_EDIT_PASTE', 'ID_EDIT_PASTE_LINK',
172 'ID_EDIT_PASTE_SPECIAL', 'ID_EDIT_REPEAT', 'ID_EDIT_REPLACE',
173 'ID_EDIT_SELECT_ALL', 'ID_EDIT_UNDO', 'ID_EDIT_REDO',
174 'VS_VERSION_INFO', 'IDRETRY',
175 'ID_APP_ABOUT', 'ID_APP_EXIT',
176 'ID_NEXT_PANE', 'ID_PREV_PANE',
177 'ID_WINDOW_NEW', 'ID_WINDOW_ARRANGE', 'ID_WINDOW_CASCADE',
178 'ID_WINDOW_TILE_HORZ', 'ID_WINDOW_TILE_VERT', 'ID_WINDOW_SPLIT',
179 'ATL_IDS_SCSIZE', 'ATL_IDS_SCMOVE', 'ATL_IDS_SCMINIMIZE',
180 'ATL_IDS_SCMAXIMIZE', 'ATL_IDS_SCNEXTWINDOW', 'ATL_IDS_SCPREVWINDOW',
181 'ATL_IDS_SCCLOSE', 'ATL_IDS_SCRESTORE', 'ATL_IDS_SCTASKLIST',
182 'ATL_IDS_MDICHILD', 'ATL_IDS_IDLEMESSAGE', 'ATL_IDS_MRU_FILE' ))
183
184
185 # Matches character entities, whether specified by name, decimal or hex.
186 _HTML_ENTITY = lazy_re.compile(
187 '&(#(?P<decimal>[0-9]+)|#x(?P<hex>[a-fA-F0-9]+)|(?P<named>[a-z0-9]+));',
188 re.IGNORECASE)
189
190 # Matches characters that should be HTML-escaped. This is <, > and &, but only
191 # if the & is not the start of an HTML character entity.
192 _HTML_CHARS_TO_ESCAPE = lazy_re.compile(
193 '"|<|>|&(?!#[0-9]+|#x[0-9a-z]+|[a-z]+;)',
194 re.IGNORECASE | re.MULTILINE)
195
196
197 def ReadFile(filename, encoding):
198 '''Reads and returns the entire contents of the given file.
199
200 Args:
201 filename: The path to the file.
202 encoding: A Python codec name or one of two special values: BINARY to read
203 the file in binary mode, or RAW_TEXT to read it with newline
204 conversion but without decoding to Unicode.
205 '''
206 mode = 'rb' if encoding == BINARY else 'rU'
207 with open(filename, mode) as f:
208 data = f.read()
209 if encoding not in (BINARY, RAW_TEXT):
210 data = data.decode(encoding)
211 return data
212
213
214 def WrapOutputStream(stream, encoding = 'utf-8'):
215 '''Returns a stream that wraps the provided stream, making it write
216 characters using the specified encoding.'''
217 return codecs.getwriter(encoding)(stream)
218
219
220 def ChangeStdoutEncoding(encoding = 'utf-8'):
221 '''Changes STDOUT to print characters using the specified encoding.'''
222 sys.stdout = WrapOutputStream(sys.stdout, encoding)
223
224
225 def EscapeHtml(text, escape_quotes = False):
226 '''Returns 'text' with <, > and & (and optionally ") escaped to named HTML
227 entities. Any existing named entity or HTML entity defined by decimal or
228 hex code will be left untouched. This is appropriate for escaping text for
229 inclusion in HTML, but not for XML.
230 '''
231 def Replace(match):
232 if match.group() == '&': return '&amp;'
233 elif match.group() == '<': return '&lt;'
234 elif match.group() == '>': return '&gt;'
235 elif match.group() == '"':
236 if escape_quotes: return '&quot;'
237 else: return match.group()
238 else: assert False
239 out = _HTML_CHARS_TO_ESCAPE.sub(Replace, text)
240 return out
241
242
243 def UnescapeHtml(text, replace_nbsp=True):
244 '''Returns 'text' with all HTML character entities (both named character
245 entities and those specified by decimal or hexadecimal Unicode ordinal)
246 replaced by their Unicode characters (or latin1 characters if possible).
247
248 The only exception is that &nbsp; will not be escaped if 'replace_nbsp' is
249 False.
250 '''
251 def Replace(match):
252 groups = match.groupdict()
253 if groups['hex']:
254 return unichr(int(groups['hex'], 16))
255 elif groups['decimal']:
256 return unichr(int(groups['decimal'], 10))
257 else:
258 name = groups['named']
259 if name == 'nbsp' and not replace_nbsp:
260 return match.group() # Don't replace &nbsp;
261 assert name != None
262 if name in htmlentitydefs.name2codepoint.keys():
263 return unichr(htmlentitydefs.name2codepoint[name])
264 else:
265 return match.group() # Unknown HTML character entity - don't replace
266
267 out = _HTML_ENTITY.sub(Replace, text)
268 return out
269
270
271 def EncodeCdata(cdata):
272 '''Returns the provided cdata in either escaped format or <![CDATA[xxx]]>
273 format, depending on which is more appropriate for easy editing. The data
274 is escaped for inclusion in an XML element's body.
275
276 Args:
277 cdata: 'If x < y and y < z then x < z'
278
279 Return:
280 '<![CDATA[If x < y and y < z then x < z]]>'
281 '''
282 if cdata.count('<') > 1 or cdata.count('>') > 1 and cdata.count(']]>') == 0:
283 return '<![CDATA[%s]]>' % cdata
284 else:
285 return saxutils.escape(cdata)
286
287
288 def FixupNamedParam(function, param_name, param_value):
289 '''Returns a closure that is identical to 'function' but ensures that the
290 named parameter 'param_name' is always set to 'param_value' unless explicitly
291 set by the caller.
292
293 Args:
294 function: callable
295 param_name: 'bingo'
296 param_value: 'bongo' (any type)
297
298 Return:
299 callable
300 '''
301 def FixupClosure(*args, **kw):
302 if not param_name in kw:
303 kw[param_name] = param_value
304 return function(*args, **kw)
305 return FixupClosure
306
307
308 def PathFromRoot(path):
309 '''Takes a path relative to the root directory for GRIT (the one that grit.py
310 resides in) and returns a path that is either absolute or relative to the
311 current working directory (i.e .a path you can use to open the file).
312
313 Args:
314 path: 'rel_dir\file.ext'
315
316 Return:
317 'c:\src\tools\rel_dir\file.ext
318 '''
319 return os.path.normpath(os.path.join(_root_dir, path))
320
321
322 def ParseGrdForUnittest(body, base_dir=None):
323 '''Parse a skeleton .grd file and return it, for use in unit tests.
324
325 Args:
326 body: XML that goes inside the <release> element.
327 base_dir: The base_dir attribute of the <grit> tag.
328 '''
329 import StringIO
330 from grit import grd_reader
331 if isinstance(body, unicode):
332 body = body.encode('utf-8')
333 if base_dir is None:
334 base_dir = PathFromRoot('.')
335 body = '''<?xml version="1.0" encoding="UTF-8"?>
336 <grit latest_public_release="2" current_release="3" source_lang_id="en" base_dir ="%s">
337 <outputs>
338 </outputs>
339 <release seq="3">
340 %s
341 </release>
342 </grit>''' % (base_dir, body)
343 return grd_reader.Parse(StringIO.StringIO(body), dir=".")
344
345
346 def StripBlankLinesAndComments(text):
347 '''Strips blank lines and comments from C source code, for unit tests.'''
348 return '\n'.join(line for line in text.splitlines()
349 if line and not line.startswith('//'))
350
351
352 def dirname(filename):
353 '''Version of os.path.dirname() that never returns empty paths (returns
354 '.' if the result of os.path.dirname() is empty).
355 '''
356 ret = os.path.dirname(filename)
357 if ret == '':
358 ret = '.'
359 return ret
360
361
362 def normpath(path):
363 '''Version of os.path.normpath that also changes backward slashes to
364 forward slashes when not running on Windows.
365 '''
366 # This is safe to always do because the Windows version of os.path.normpath
367 # will replace forward slashes with backward slashes.
368 path = path.replace('\\', '/')
369 return os.path.normpath(path)
370
371
372 _LANGUAGE_SPLIT_RE = lazy_re.compile('-|_|/')
373
374
375 def CanonicalLanguage(code):
376 '''Canonicalizes two-part language codes by using a dash and making the
377 second part upper case. Returns one-part language codes unchanged.
378
379 Args:
380 code: 'zh_cn'
381
382 Return:
383 code: 'zh-CN'
384 '''
385 parts = _LANGUAGE_SPLIT_RE.split(code)
386 code = [ parts[0] ]
387 for part in parts[1:]:
388 code.append(part.upper())
389 return '-'.join(code)
390
391
392 _LANG_TO_CODEPAGE = {
393 'en' : 1252,
394 'fr' : 1252,
395 'it' : 1252,
396 'de' : 1252,
397 'es' : 1252,
398 'nl' : 1252,
399 'sv' : 1252,
400 'no' : 1252,
401 'da' : 1252,
402 'fi' : 1252,
403 'pt-BR' : 1252,
404 'ru' : 1251,
405 'ja' : 932,
406 'zh-TW' : 950,
407 'zh-CN' : 936,
408 'ko' : 949,
409 }
410
411
412 def LanguageToCodepage(lang):
413 '''Returns the codepage _number_ that can be used to represent 'lang', which
414 may be either in formats such as 'en', 'pt_br', 'pt-BR', etc.
415
416 The codepage returned will be one of the 'cpXXXX' codepage numbers.
417
418 Args:
419 lang: 'de'
420
421 Return:
422 1252
423 '''
424 lang = CanonicalLanguage(lang)
425 if lang in _LANG_TO_CODEPAGE:
426 return _LANG_TO_CODEPAGE[lang]
427 else:
428 print "Not sure which codepage to use for %s, assuming cp1252" % lang
429 return 1252
430
431 def NewClassInstance(class_name, class_type):
432 '''Returns an instance of the class specified in classname
433
434 Args:
435 class_name: the fully qualified, dot separated package + classname,
436 i.e. "my.package.name.MyClass". Short class names are not supported.
437 class_type: the class or superclass this object must implement
438
439 Return:
440 An instance of the class, or None if none was found
441 '''
442 lastdot = class_name.rfind('.')
443 module_name = ''
444 if lastdot >= 0:
445 module_name = class_name[0:lastdot]
446 if module_name:
447 class_name = class_name[lastdot+1:]
448 module = __import__(module_name, globals(), locals(), [''])
449 if hasattr(module, class_name):
450 class_ = getattr(module, class_name)
451 class_instance = class_()
452 if isinstance(class_instance, class_type):
453 return class_instance
454 return None
455
456
457 def FixLineEnd(text, line_end):
458 # First normalize
459 text = text.replace('\r\n', '\n')
460 text = text.replace('\r', '\n')
461 # Then fix
462 text = text.replace('\n', line_end)
463 return text
464
465
466 def BoolToString(bool):
467 if bool:
468 return 'true'
469 else:
470 return 'false'
471
472
473 verbose = False
474 extra_verbose = False
475
476 def IsVerbose():
477 return verbose
478
479 def IsExtraVerbose():
480 return extra_verbose
481
482 def ParseDefine(define):
483 '''Parses a define argument and returns the name and value.
484
485 The format is either "NAME=VAL" or "NAME", using True as the default value.
486 Values of "1" and "0" are transformed to True and False respectively.
487
488 Args:
489 define: a string of the form "NAME=VAL" or "NAME".
490
491 Returns:
492 A (name, value) pair. name is a string, value a string or boolean.
493 '''
494 parts = [part.strip() for part in define.split('=', 1)]
495 assert len(parts) >= 1
496 name = parts[0]
497 val = True
498 if len(parts) > 1:
499 val = parts[1]
500 if val == "1": val = True
501 elif val == "0": val = False
502 return (name, val)
503
504
505 class Substituter(object):
506 '''Finds and substitutes variable names in text strings.
507
508 Given a dictionary of variable names and values, prepares to
509 search for patterns of the form [VAR_NAME] in a text.
510 The value will be substituted back efficiently.
511 Also applies to tclib.Message objects.
512 '''
513
514 def __init__(self):
515 '''Create an empty substituter.'''
516 self.substitutions_ = {}
517 self.dirty_ = True
518
519 def AddSubstitutions(self, subs):
520 '''Add new values to the substitutor.
521
522 Args:
523 subs: A dictionary of new substitutions.
524 '''
525 self.substitutions_.update(subs)
526 self.dirty_ = True
527
528 def AddMessages(self, messages, lang):
529 '''Adds substitutions extracted from node.Message objects.
530
531 Args:
532 messages: a list of node.Message objects.
533 lang: The translation language to use in substitutions.
534 '''
535 subs = [(str(msg.attrs['name']), msg.Translate(lang)) for msg in messages]
536 self.AddSubstitutions(dict(subs))
537 self.dirty_ = True
538
539 def GetExp(self):
540 '''Obtain a regular expression that will find substitution keys in text.
541
542 Create and cache if the substituter has been updated. Use the cached value
543 otherwise. Keys will be enclosed in [square brackets] in text.
544
545 Returns:
546 A regular expression object.
547 '''
548 if self.dirty_:
549 components = ['\[%s\]' % (k,) for k in self.substitutions_.keys()]
550 self.exp = re.compile("(%s)" % ('|'.join(components),))
551 self.dirty_ = False
552 return self.exp
553
554 def Substitute(self, text):
555 '''Substitute the variable values in the given text.
556
557 Text of the form [message_name] will be replaced by the message's value.
558
559 Args:
560 text: A string of text.
561
562 Returns:
563 A string of text with substitutions done.
564 '''
565 return ''.join([self._SubFragment(f) for f in self.GetExp().split(text)])
566
567 def _SubFragment(self, fragment):
568 '''Utility function for Substitute.
569
570 Performs a simple substitution if the fragment is exactly of the form
571 [message_name].
572
573 Args:
574 fragment: A simple string.
575
576 Returns:
577 A string with the substitution done.
578 '''
579 if len(fragment) > 2 and fragment[0] == '[' and fragment[-1] == ']':
580 sub = self.substitutions_.get(fragment[1:-1], None)
581 if sub is not None:
582 return sub
583 return fragment
584
585 def SubstituteMessage(self, msg):
586 '''Apply substitutions to a tclib.Message object.
587
588 Text of the form [message_name] will be replaced by a new placeholder,
589 whose presentation will take the form the message_name_{UsageCount}, and
590 whose example will be the message's value. Existing placeholders are
591 not affected.
592
593 Args:
594 msg: A tclib.Message object.
595
596 Returns:
597 A tclib.Message object, with substitutions done.
598 '''
599 from grit import tclib # avoid circular import
600 counts = {}
601 text = msg.GetPresentableContent()
602 placeholders = []
603 newtext = ''
604 for f in self.GetExp().split(text):
605 sub = self._SubFragment(f)
606 if f != sub:
607 f = str(f)
608 count = counts.get(f, 0) + 1
609 counts[f] = count
610 name = "%s_%d" % (f[1:-1], count)
611 placeholders.append(tclib.Placeholder(name, f, sub))
612 newtext += name
613 else:
614 newtext += f
615 if placeholders:
616 return tclib.Message(newtext, msg.GetPlaceholders() + placeholders,
617 msg.GetDescription(), msg.GetMeaning())
618 else:
619 return msg
620
621
622 class TempDir(object):
623 '''Creates files with the specified contents in a temporary directory,
624 for unit testing.
625 '''
626 def __init__(self, file_data):
627 self._tmp_dir_name = tempfile.mkdtemp()
628 assert not os.listdir(self.GetPath())
629 for name, contents in file_data.items():
630 file_path = self.GetPath(name)
631 dir_path = os.path.split(file_path)[0]
632 if not os.path.exists(dir_path):
633 os.makedirs(dir_path)
634 with open(file_path, 'w') as f:
635 f.write(file_data[name])
636
637 def __enter__(self):
638 return self
639
640 def __exit__(self, *exc_info):
641 self.CleanUp()
642
643 def CleanUp(self):
644 shutil.rmtree(self.GetPath())
645
646 def GetPath(self, name=''):
647 name = os.path.join(self._tmp_dir_name, name)
648 assert name.startswith(self._tmp_dir_name)
649 return name
650
651 def AsCurrentDir(self):
652 return self._AsCurrentDirClass(self.GetPath())
653
654 class _AsCurrentDirClass(object):
655 def __init__(self, path):
656 self.path = path
657 def __enter__(self):
658 self.oldpath = os.getcwd()
659 os.chdir(self.path)
660 def __exit__(self, *exc_info):
661 os.chdir(self.oldpath)
OLDNEW
« no previous file with comments | « grit/tool/xmb_unittest.py ('k') | grit/util_unittest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698