grit/util.py - Issue 1442863002: Remove contents of grit's SVN repository.

Side by Side Diff: grit/util.py

Issue 1442863002: Remove contents of grit's SVN repository. (Closed) Base URL: http://grit-i18n.googlecode.com/svn/trunk/

Patch Set: Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 #!/usr/bin/env python

2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.

5

6 '''Utilities used by GRIT.

7 '''

8

9 import codecs

10 import htmlentitydefs

11 import os

12 import re

13 import shutil

14 import sys

15 import tempfile

16 import time

17 import types

18 from xml.sax import saxutils

19

20 from grit import lazy_re

21

22 _root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))

23

24

25 # Unique constants for use by ReadFile().

26 BINARY, RAW_TEXT = range(2)

27

28

29 # Unique constants representing data pack encodings.

30 _, UTF8, UTF16 = range(3)

31

32

33 def Encode(message, encoding):

34 '''Returns a byte stream that represents \|message\| in the given \|encoding\|.'''

35 # \|message\| is a python unicode string, so convert to a byte stream that

36 # has the correct encoding requested for the datapacks. We skip the first

37 # 2 bytes of text resources because it is the BOM.

38 if encoding == UTF8:

39 return message.encode('utf8')

40 if encoding == UTF16:

41 return message.encode('utf16')[2:]

42 # Default is BINARY

43 return message

44

45

46 # Matches all different types of linebreaks.

47 LINEBREAKS = re.compile('\r\n\|\n\|\r')

48

49 def MakeRelativePath(base_path, path_to_make_relative):

50 """Returns a relative path such from the base_path to

51 the path_to_make_relative.

52

53 In other words, os.join(base_path,

54 MakeRelativePath(base_path, path_to_make_relative))

55 is the same location as path_to_make_relative.

56

57 Args:

58 base_path: the root path

59 path_to_make_relative: an absolute path that is on the same drive

60 as base_path

61 """

62

63 def _GetPathAfterPrefix(prefix_path, path_with_prefix):

64 """Gets the subpath within in prefix_path for the path_with_prefix

65 with no beginning or trailing path separators.

66

67 Args:

68 prefix_path: the base path

69 path_with_prefix: a path that starts with prefix_path

70 """

71 assert path_with_prefix.startswith(prefix_path)

72 path_without_prefix = path_with_prefix[len(prefix_path):]

73 normalized_path = os.path.normpath(path_without_prefix.strip(os.path.sep))

74 if normalized_path == '.':

75 normalized_path = ''

76 return normalized_path

77

78 def _GetCommonBaseDirectory(*args):

79 """Returns the common prefix directory for the given paths

80

81 Args:

82 The list of paths (at least one of which should be a directory)

83 """

84 prefix = os.path.commonprefix(args)

85 # prefix is a character-by-character prefix (i.e. it does not end

86 # on a directory bound, so this code fixes that)

87

88 # if the prefix ends with the separator, then it is prefect.

89 if len(prefix) > 0 and prefix[-1] == os.path.sep:

90 return prefix

91

92 # We need to loop through all paths or else we can get

93 # tripped up by "c:\a" and "c:\abc". The common prefix

94 # is "c:\a" which is a directory and looks good with

95 # respect to the first directory but it is clear that

96 # isn't a common directory when the second path is

97 # examined.

98 for path in args:

99 assert len(path) >= len(prefix)

100 # If the prefix the same length as the path,

101 # then the prefix must be a directory (since one

102 # of the arguements should be a directory).

103 if path == prefix:

104 continue

105 # if the character after the prefix in the path

106 # is the separator, then the prefix appears to be a

107 # valid a directory as well for the given path

108 if path[len(prefix)] == os.path.sep:

109 continue

110 # Otherwise, the prefix is not a directory, so it needs

111 # to be shortened to be one

112 index_sep = prefix.rfind(os.path.sep)

113 # The use "index_sep + 1" because it includes the final sep

114 # and it handles the case when the index_sep is -1 as well

115 prefix = prefix[:index_sep + 1]

116 # At this point we backed up to a directory bound which is

117 # common to all paths, so we can quit going through all of

118 # the paths.

119 break

120 return prefix

121

122 prefix = _GetCommonBaseDirectory(base_path, path_to_make_relative)

123 # If the paths had no commonality at all, then return the absolute path

124 # because it is the best that can be done. If the path had to be relative

125 # then eventually this absolute path will be discovered (when a build breaks)

126 # and an appropriate fix can be made, but having this allows for the best

127 # backward compatibility with the absolute path behavior in the past.

128 if len(prefix) <= 0:

129 return path_to_make_relative

130 # Build a path from the base dir to the common prefix

131 remaining_base_path = _GetPathAfterPrefix(prefix, base_path)

132

133 # The follow handles two case: "" and "foo\\bar"

134 path_pieces = remaining_base_path.split(os.path.sep)

135 base_depth_from_prefix = len([d for d in path_pieces if len(d)])

136 base_to_prefix = (".." + os.path.sep) * base_depth_from_prefix

137

138 # Put add in the path from the prefix to the path_to_make_relative

139 remaining_other_path = _GetPathAfterPrefix(prefix, path_to_make_relative)

140 return base_to_prefix + remaining_other_path

141

142

143 KNOWN_SYSTEM_IDENTIFIERS = set()

144

145 SYSTEM_IDENTIFIERS = None

146

147 def SetupSystemIdentifiers(ids):

148 '''Adds ids to a regexp of known system identifiers.

149

150 Can be called many times, ids will be accumulated.

151

152 Args:

153 ids: an iterable of strings

154 '''

155 KNOWN_SYSTEM_IDENTIFIERS.update(ids)

156 global SYSTEM_IDENTIFIERS

157 SYSTEM_IDENTIFIERS = lazy_re.compile(

158 ' \| '.join([r'\b%s\b' % i for i in KNOWN_SYSTEM_IDENTIFIERS]),

159 re.VERBOSE)

160

161

162 # Matches all of the resource IDs predefined by Windows.

163 SetupSystemIdentifiers((

164 'IDOK', 'IDCANCEL', 'IDC_STATIC', 'IDYES', 'IDNO',

165 'ID_FILE_NEW', 'ID_FILE_OPEN', 'ID_FILE_CLOSE', 'ID_FILE_SAVE',

166 'ID_FILE_SAVE_AS', 'ID_FILE_PAGE_SETUP', 'ID_FILE_PRINT_SETUP',

167 'ID_FILE_PRINT', 'ID_FILE_PRINT_DIRECT', 'ID_FILE_PRINT_PREVIEW',

168 'ID_FILE_UPDATE', 'ID_FILE_SAVE_COPY_AS', 'ID_FILE_SEND_MAIL',

169 'ID_FILE_MRU_FIRST', 'ID_FILE_MRU_LAST',

170 'ID_EDIT_CLEAR', 'ID_EDIT_CLEAR_ALL', 'ID_EDIT_COPY',

171 'ID_EDIT_CUT', 'ID_EDIT_FIND', 'ID_EDIT_PASTE', 'ID_EDIT_PASTE_LINK',

172 'ID_EDIT_PASTE_SPECIAL', 'ID_EDIT_REPEAT', 'ID_EDIT_REPLACE',

173 'ID_EDIT_SELECT_ALL', 'ID_EDIT_UNDO', 'ID_EDIT_REDO',

174 'VS_VERSION_INFO', 'IDRETRY',

175 'ID_APP_ABOUT', 'ID_APP_EXIT',

176 'ID_NEXT_PANE', 'ID_PREV_PANE',

177 'ID_WINDOW_NEW', 'ID_WINDOW_ARRANGE', 'ID_WINDOW_CASCADE',

178 'ID_WINDOW_TILE_HORZ', 'ID_WINDOW_TILE_VERT', 'ID_WINDOW_SPLIT',

179 'ATL_IDS_SCSIZE', 'ATL_IDS_SCMOVE', 'ATL_IDS_SCMINIMIZE',

180 'ATL_IDS_SCMAXIMIZE', 'ATL_IDS_SCNEXTWINDOW', 'ATL_IDS_SCPREVWINDOW',

181 'ATL_IDS_SCCLOSE', 'ATL_IDS_SCRESTORE', 'ATL_IDS_SCTASKLIST',

182 'ATL_IDS_MDICHILD', 'ATL_IDS_IDLEMESSAGE', 'ATL_IDS_MRU_FILE' ))

183

184

185 # Matches character entities, whether specified by name, decimal or hex.

186 _HTML_ENTITY = lazy_re.compile(

187 '&(#(?P<decimal>[0-9]+)\|#x(?P<hex>[a-fA-F0-9]+)\|(?P<named>[a-z0-9]+));',

188 re.IGNORECASE)

189

190 # Matches characters that should be HTML-escaped. This is <, > and &, but only

191 # if the & is not the start of an HTML character entity.

192 _HTML_CHARS_TO_ESCAPE = lazy_re.compile(

193 '"\|<\|>\|&(?!#[0-9]+\|#x[0-9a-z]+\|[a-z]+;)',

194 re.IGNORECASE \| re.MULTILINE)

195

196

197 def ReadFile(filename, encoding):

198 '''Reads and returns the entire contents of the given file.

199

200 Args:

201 filename: The path to the file.

202 encoding: A Python codec name or one of two special values: BINARY to read

203 the file in binary mode, or RAW_TEXT to read it with newline

204 conversion but without decoding to Unicode.

205 '''

206 mode = 'rb' if encoding == BINARY else 'rU'

207 with open(filename, mode) as f:

208 data = f.read()

209 if encoding not in (BINARY, RAW_TEXT):

210 data = data.decode(encoding)

211 return data

212

213

214 def WrapOutputStream(stream, encoding = 'utf-8'):

215 '''Returns a stream that wraps the provided stream, making it write

216 characters using the specified encoding.'''

217 return codecs.getwriter(encoding)(stream)

218

219

220 def ChangeStdoutEncoding(encoding = 'utf-8'):

221 '''Changes STDOUT to print characters using the specified encoding.'''

222 sys.stdout = WrapOutputStream(sys.stdout, encoding)

223

224

225 def EscapeHtml(text, escape_quotes = False):

226 '''Returns 'text' with <, > and & (and optionally ") escaped to named HTML

227 entities. Any existing named entity or HTML entity defined by decimal or

228 hex code will be left untouched. This is appropriate for escaping text for

229 inclusion in HTML, but not for XML.

230 '''

231 def Replace(match):

232 if match.group() == '&': return '&'

233 elif match.group() == '<': return '<'

234 elif match.group() == '>': return '>'

235 elif match.group() == '"':

236 if escape_quotes: return '"'

237 else: return match.group()

238 else: assert False

239 out = _HTML_CHARS_TO_ESCAPE.sub(Replace, text)

240 return out

241

242

243 def UnescapeHtml(text, replace_nbsp=True):

244 '''Returns 'text' with all HTML character entities (both named character

245 entities and those specified by decimal or hexadecimal Unicode ordinal)

246 replaced by their Unicode characters (or latin1 characters if possible).

247

248 The only exception is that   will not be escaped if 'replace_nbsp' is

249 False.

250 '''

251 def Replace(match):

252 groups = match.groupdict()

253 if groups['hex']:

254 return unichr(int(groups['hex'], 16))

255 elif groups['decimal']:

256 return unichr(int(groups['decimal'], 10))

257 else:

258 name = groups['named']

259 if name == 'nbsp' and not replace_nbsp:

260 return match.group() # Don't replace

261 assert name != None

262 if name in htmlentitydefs.name2codepoint.keys():

263 return unichr(htmlentitydefs.name2codepoint[name])

264 else:

265 return match.group() # Unknown HTML character entity - don't replace

266

267 out = _HTML_ENTITY.sub(Replace, text)

268 return out

269

270

271 def EncodeCdata(cdata):

272 '''Returns the provided cdata in either escaped format or <![CDATA[xxx]]>

273 format, depending on which is more appropriate for easy editing. The data

274 is escaped for inclusion in an XML element's body.

275

276 Args:

277 cdata: 'If x < y and y < z then x < z'

278

279 Return:

280 '<![CDATA[If x < y and y < z then x < z]]>'

281 '''

282 if cdata.count('<') > 1 or cdata.count('>') > 1 and cdata.count(']]>') == 0:

283 return '<![CDATA[%s]]>' % cdata

284 else:

285 return saxutils.escape(cdata)

286

287

288 def FixupNamedParam(function, param_name, param_value):

289 '''Returns a closure that is identical to 'function' but ensures that the

290 named parameter 'param_name' is always set to 'param_value' unless explicitly

291 set by the caller.

292

293 Args:

294 function: callable

295 param_name: 'bingo'

296 param_value: 'bongo' (any type)

297

298 Return:

299 callable

300 '''

301 def FixupClosure(args, *kw):

302 if not param_name in kw:

303 kw[param_name] = param_value

304 return function(args, *kw)

305 return FixupClosure

306

307

308 def PathFromRoot(path):

309 '''Takes a path relative to the root directory for GRIT (the one that grit.py

310 resides in) and returns a path that is either absolute or relative to the

311 current working directory (i.e .a path you can use to open the file).

312

313 Args:

314 path: 'rel_dir\file.ext'

315

316 Return:

317 'c:\src\tools\rel_dir\file.ext

318 '''

319 return os.path.normpath(os.path.join(_root_dir, path))

320

321

322 def ParseGrdForUnittest(body, base_dir=None):

323 '''Parse a skeleton .grd file and return it, for use in unit tests.

324

325 Args:

326 body: XML that goes inside the <release> element.

327 base_dir: The base_dir attribute of the <grit> tag.

328 '''

329 import StringIO

330 from grit import grd_reader

331 if isinstance(body, unicode):

332 body = body.encode('utf-8')

333 if base_dir is None:

334 base_dir = PathFromRoot('.')

335 body = '''<?xml version="1.0" encoding="UTF-8"?>

336 <grit latest_public_release="2" current_release="3" source_lang_id="en" base_dir ="%s">

337 <outputs>

338 </outputs>

339 <release seq="3">

340 %s

341 </release>

342 </grit>''' % (base_dir, body)

343 return grd_reader.Parse(StringIO.StringIO(body), dir=".")

344

345

346 def StripBlankLinesAndComments(text):

347 '''Strips blank lines and comments from C source code, for unit tests.'''

348 return '\n'.join(line for line in text.splitlines()

349 if line and not line.startswith('//'))

350

351

352 def dirname(filename):

353 '''Version of os.path.dirname() that never returns empty paths (returns

354 '.' if the result of os.path.dirname() is empty).

355 '''

356 ret = os.path.dirname(filename)

357 if ret == '':

358 ret = '.'

359 return ret

360

361

362 def normpath(path):

363 '''Version of os.path.normpath that also changes backward slashes to

364 forward slashes when not running on Windows.

365 '''

366 # This is safe to always do because the Windows version of os.path.normpath

367 # will replace forward slashes with backward slashes.

368 path = path.replace('\\', '/')

369 return os.path.normpath(path)

370

371

372 _LANGUAGE_SPLIT_RE = lazy_re.compile('-\|_\|/')

373

374

375 def CanonicalLanguage(code):

376 '''Canonicalizes two-part language codes by using a dash and making the

377 second part upper case. Returns one-part language codes unchanged.

378

379 Args:

380 code: 'zh_cn'

381

382 Return:

383 code: 'zh-CN'

384 '''

385 parts = _LANGUAGE_SPLIT_RE.split(code)

386 code = [ parts[0] ]

387 for part in parts[1:]:

388 code.append(part.upper())

389 return '-'.join(code)

390

391

392 _LANG_TO_CODEPAGE = {

393 'en' : 1252,

394 'fr' : 1252,

395 'it' : 1252,

396 'de' : 1252,

397 'es' : 1252,

398 'nl' : 1252,

399 'sv' : 1252,

400 'no' : 1252,

401 'da' : 1252,

402 'fi' : 1252,

403 'pt-BR' : 1252,

404 'ru' : 1251,

405 'ja' : 932,

406 'zh-TW' : 950,

407 'zh-CN' : 936,

408 'ko' : 949,

409 }

410

411

412 def LanguageToCodepage(lang):

413 '''Returns the codepage _number_ that can be used to represent 'lang', which

414 may be either in formats such as 'en', 'pt_br', 'pt-BR', etc.

415

416 The codepage returned will be one of the 'cpXXXX' codepage numbers.

417

418 Args:

419 lang: 'de'

420

421 Return:

422 1252

423 '''

424 lang = CanonicalLanguage(lang)

425 if lang in _LANG_TO_CODEPAGE:

426 return _LANG_TO_CODEPAGE[lang]

427 else:

428 print "Not sure which codepage to use for %s, assuming cp1252" % lang

429 return 1252

430

431 def NewClassInstance(class_name, class_type):

432 '''Returns an instance of the class specified in classname

433

434 Args:

435 class_name: the fully qualified, dot separated package + classname,

436 i.e. "my.package.name.MyClass". Short class names are not supported.

437 class_type: the class or superclass this object must implement

438

439 Return:

440 An instance of the class, or None if none was found

441 '''

442 lastdot = class_name.rfind('.')

443 module_name = ''

444 if lastdot >= 0:

445 module_name = class_name[0:lastdot]

446 if module_name:

447 class_name = class_name[lastdot+1:]

448 module = __import__(module_name, globals(), locals(), [''])

449 if hasattr(module, class_name):

450 class_ = getattr(module, class_name)

451 class_instance = class_()

452 if isinstance(class_instance, class_type):

453 return class_instance

454 return None

455

456

457 def FixLineEnd(text, line_end):

458 # First normalize

459 text = text.replace('\r\n', '\n')

460 text = text.replace('\r', '\n')

461 # Then fix

462 text = text.replace('\n', line_end)

463 return text

464

465

466 def BoolToString(bool):

467 if bool:

468 return 'true'

469 else:

470 return 'false'

471

472

473 verbose = False

474 extra_verbose = False

475

476 def IsVerbose():

477 return verbose

478

479 def IsExtraVerbose():

480 return extra_verbose

481

482 def ParseDefine(define):

483 '''Parses a define argument and returns the name and value.

484

485 The format is either "NAME=VAL" or "NAME", using True as the default value.

486 Values of "1" and "0" are transformed to True and False respectively.

487

488 Args:

489 define: a string of the form "NAME=VAL" or "NAME".

490

491 Returns:

492 A (name, value) pair. name is a string, value a string or boolean.

493 '''

494 parts = [part.strip() for part in define.split('=', 1)]

495 assert len(parts) >= 1

496 name = parts[0]

497 val = True

498 if len(parts) > 1:

499 val = parts[1]

500 if val == "1": val = True

501 elif val == "0": val = False

502 return (name, val)

503

504

505 class Substituter(object):

506 '''Finds and substitutes variable names in text strings.

507

508 Given a dictionary of variable names and values, prepares to

509 search for patterns of the form [VAR_NAME] in a text.

510 The value will be substituted back efficiently.

511 Also applies to tclib.Message objects.

512 '''

513

514 def __init__(self):

515 '''Create an empty substituter.'''

516 self.substitutions_ = {}

517 self.dirty_ = True

518

519 def AddSubstitutions(self, subs):

520 '''Add new values to the substitutor.

521

522 Args:

523 subs: A dictionary of new substitutions.

524 '''

525 self.substitutions_.update(subs)

526 self.dirty_ = True

527

528 def AddMessages(self, messages, lang):

529 '''Adds substitutions extracted from node.Message objects.

530

531 Args:

532 messages: a list of node.Message objects.

533 lang: The translation language to use in substitutions.

534 '''

535 subs = [(str(msg.attrs['name']), msg.Translate(lang)) for msg in messages]

536 self.AddSubstitutions(dict(subs))

537 self.dirty_ = True

538

539 def GetExp(self):

540 '''Obtain a regular expression that will find substitution keys in text.

541

542 Create and cache if the substituter has been updated. Use the cached value

543 otherwise. Keys will be enclosed in [square brackets] in text.

544

545 Returns:

546 A regular expression object.

547 '''

548 if self.dirty_:

549 components = ['\[%s\]' % (k,) for k in self.substitutions_.keys()]

550 self.exp = re.compile("(%s)" % ('\|'.join(components),))

551 self.dirty_ = False

552 return self.exp

553

554 def Substitute(self, text):

555 '''Substitute the variable values in the given text.

556

557 Text of the form [message_name] will be replaced by the message's value.

558

559 Args:

560 text: A string of text.

561

562 Returns:

563 A string of text with substitutions done.

564 '''

565 return ''.join([self._SubFragment(f) for f in self.GetExp().split(text)])

566

567 def _SubFragment(self, fragment):

568 '''Utility function for Substitute.

569

570 Performs a simple substitution if the fragment is exactly of the form

571 [message_name].

572

573 Args:

574 fragment: A simple string.

575

576 Returns:

577 A string with the substitution done.

578 '''

579 if len(fragment) > 2 and fragment[0] == '[' and fragment[-1] == ']':

580 sub = self.substitutions_.get(fragment[1:-1], None)

581 if sub is not None:

582 return sub

583 return fragment

584

585 def SubstituteMessage(self, msg):

586 '''Apply substitutions to a tclib.Message object.

587

588 Text of the form [message_name] will be replaced by a new placeholder,

589 whose presentation will take the form the message_name_{UsageCount}, and

590 whose example will be the message's value. Existing placeholders are

591 not affected.

592

593 Args:

594 msg: A tclib.Message object.

595

596 Returns:

597 A tclib.Message object, with substitutions done.

598 '''

599 from grit import tclib # avoid circular import

600 counts = {}

601 text = msg.GetPresentableContent()

602 placeholders = []

603 newtext = ''

604 for f in self.GetExp().split(text):

605 sub = self._SubFragment(f)

606 if f != sub:

607 f = str(f)

608 count = counts.get(f, 0) + 1

609 counts[f] = count

610 name = "%s_%d" % (f[1:-1], count)

611 placeholders.append(tclib.Placeholder(name, f, sub))

612 newtext += name

613 else:

614 newtext += f

615 if placeholders:

616 return tclib.Message(newtext, msg.GetPlaceholders() + placeholders,

617 msg.GetDescription(), msg.GetMeaning())

618 else:

619 return msg

620

621

622 class TempDir(object):

623 '''Creates files with the specified contents in a temporary directory,

624 for unit testing.

625 '''

626 def __init__(self, file_data):

627 self._tmp_dir_name = tempfile.mkdtemp()

628 assert not os.listdir(self.GetPath())

629 for name, contents in file_data.items():

630 file_path = self.GetPath(name)

631 dir_path = os.path.split(file_path)[0]

632 if not os.path.exists(dir_path):

633 os.makedirs(dir_path)

634 with open(file_path, 'w') as f:

635 f.write(file_data[name])

636

637 def __enter__(self):

638 return self

639

640 def __exit__(self, *exc_info):

641 self.CleanUp()

642

643 def CleanUp(self):

644 shutil.rmtree(self.GetPath())

645

646 def GetPath(self, name=''):

647 name = os.path.join(self._tmp_dir_name, name)

648 assert name.startswith(self._tmp_dir_name)

649 return name

650

651 def AsCurrentDir(self):

652 return self._AsCurrentDirClass(self.GetPath())

653

654 class _AsCurrentDirClass(object):

655 def __init__(self, path):

656 self.path = path

657 def __enter__(self):

658 self.oldpath = os.getcwd()

659 os.chdir(self.path)

660 def __exit__(self, *exc_info):

661 os.chdir(self.oldpath)

OLD	NEW

« no previous file with comments | « grit/tool/xmb_unittest.py ('k') | grit/util_unittest.py » ('j') | no next file with comments »