grit/gather/tr_html_unittest.py - Issue 7994004: Initial source commit to grit-i18n project.

Side by Side Diff: grit/gather/tr_html_unittest.py

Issue 7994004: Initial source commit to grit-i18n project. (Closed) Base URL: http://grit-i18n.googlecode.com/svn/trunk/

Patch Set: Created 9 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 #!/usr/bin/python2.4

	2 # Copyright (c) 2011 The Chromium Authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5

	6 '''Unit tests for grit.gather.tr_html'''

	7

	8

	9 import os

	10 import sys

	11 if __name__ == '__main__':

	12 sys.path.append(os.path.join(os.path.dirname(sys.argv[0]), '../..'))

	13

	14 import types

	15 import unittest

	16

	17 from grit.gather import tr_html

	18 from grit import clique

	19 from grit import util

	20

	21

	22 class ParserUnittest(unittest.TestCase):

	23 def testChunking(self):

	24 p = tr_html.HtmlChunks()

	25 chunks = p.Parse('<p>Hello <b>dear</b> how <i>are</i>you?<p>Fine!')

	26 self.failUnless(chunks == [

	27 (False, '<p>', ''), (True, 'Hello <b>dear</b> how <i>are</i>you?', ''),

	28 (False, '<p>', ''), (True, 'Fine!', '')])

	29

	30 chunks = p.Parse('<p> Hello <b>dear</b> how <i>are</i>you? <p>Fine!')

	31 self.failUnless(chunks == [

	32 (False, '<p> ', ''), (True, 'Hello <b>dear</b> how <i>are</i>you?', ''),

	33 (False, ' <p>', ''), (True, 'Fine!', '')])

	34

	35 chunks = p.Parse('<p> Hello <b>dear how <i>are you? <p> Fine!')

	36 self.failUnless(chunks == [

	37 (False, '<p> ', ''), (True, 'Hello <b>dear how <i>are you?', ''),

	38 (False, ' <p> ', ''), (True, 'Fine!', '')])

	39

	40 # Ensure translateable sections that start with inline tags contain

	41 # the starting inline tag.

	42 chunks = p.Parse('<b>Hello!</b> how are you?<p><i>I am fine.</i>')

	43 self.failUnless(chunks == [

	44 (True, '<b>Hello!</b> how are you?', ''), (False, '<p>', ''),

	45 (True, '<i>I am fine.</i>', '')])

	46

	47 # Ensure translateable sections that end with inline tags contain

	48 # the ending inline tag.

	49 chunks = p.Parse("Hello! How are <b>you?</b><p><i>I'm fine!</i>")

	50 self.failUnless(chunks == [

	51 (True, 'Hello! How are <b>you?</b>', ''), (False, '<p>', ''),

	52 (True, "<i>I'm fine!</i>", '')])

	53

	54 # Check capitals and explicit descriptions

	55 chunks = p.Parse('<!-- desc=bingo! --><B>Hello!</B> how are you?<P><I>I am f ine.</I>')

	56 self.failUnless(chunks == [

	57 (True, '<B>Hello!</B> how are you?', 'bingo!'), (False, '<P>', ''),

	58 (True, '<I>I am fine.</I>', '')])

	59 chunks = p.Parse('<B><!-- desc=bingo! -->Hello!</B> how are you?<P><I>I am f ine.</I>')

	60 self.failUnless(chunks == [

	61 (True, '<B>Hello!</B> how are you?', 'bingo!'), (False, '<P>', ''),

	62 (True, '<I>I am fine.</I>', '')])

	63 # Linebreaks get changed to spaces just like any other HTML content

	64 chunks = p.Parse('<B>Hello!</B> <!-- desc=bi\nngo\n! -->how are you?<P><I>I am fine.</I>')

	65 self.failUnless(chunks == [

	66 (True, '<B>Hello!</B> how are you?', 'bi ngo !'), (False, '<P>', ''),

	67 (True, '<I>I am fine.</I>', '')])

	68

	69 # In this case, because the explicit description appears after the first

	70 # translateable, it will actually apply to the second translateable.

	71 chunks = p.Parse('<B>Hello!</B> how are you?<!-- desc=bingo! --><P><I>I am f ine.</I>')

	72 self.failUnless(chunks == [

	73 (True, '<B>Hello!</B> how are you?', ''), (False, '<P>', ''),

	74 (True, '<I>I am fine.</I>', 'bingo!')])

	75

	76 # Check that replaceables within block tags (where attributes would go) are

	77 # handled correctly.

	78 chunks = p.Parse('<b>Hello!</b> how are you?<p [BINGO] [$~BONGO~$]>'

	79 '<i>I am fine.</i>')

	80 self.failUnless(chunks == [

	81 (True, '<b>Hello!</b> how are you?', ''),

	82 (False, '<p [BINGO] [$~BONGO~$]>', ''),

	83 (True, '<i>I am fine.</i>', '')])

	84

	85 # Check that the contents of preformatted tags preserve line breaks.

	86 chunks = p.Parse('<textarea>Hello\nthere\nhow\nare\nyou?</textarea>')

	87 self.failUnless(chunks == [(False, '<textarea>', ''),

	88 (True, 'Hello\nthere\nhow\nare\nyou?', ''), (False, '</textarea>', '')])

	89

	90 # ...and that other tags' line breaks are converted to spaces

	91 chunks = p.Parse('<p>Hello\nthere\nhow\nare\nyou?</p>')

	92 self.failUnless(chunks == [(False, '<p>', ''),

	93 (True, 'Hello there how are you?', ''), (False, '</p>', '')])

	94

	95 def testTranslateableAttributes(self):

	96 p = tr_html.HtmlChunks()

	97

	98 # Check that the translateable attributes in <img>, <submit>, <button> and

	99 # <text> elements buttons are handled correctly.

	100 chunks = p.Parse('<img src=bingo.jpg alt="hello there">'

	101 '<input type=submit value="hello">'

	102 '<input type="button" value="hello">'

	103 '<input type=\'text\' value=\'Howdie\'>')

	104 self.failUnless(chunks == [

	105 (False, '<img src=bingo.jpg alt="', ''), (True, 'hello there', ''),

	106 (False, '"><input type=submit value="', ''), (True, 'hello', ''),

	107 (False, '"><input type="button" value="', ''), (True, 'hello', ''),

	108 (False, '"><input type=\'text\' value=\'', ''), (True, 'Howdie', ''),

	109 (False, '\'>', '')])

	110

	111

	112 def testTranslateableHtmlToMessage(self):

	113 msg = tr_html.HtmlToMessage(

	114 'Hello <b>[USERNAME]</b>, <how> <i>are</i> you?')

	115 pres = msg.GetPresentableContent()

	116 self.failUnless(pres ==

	117 'Hello BEGIN_BOLDX_USERNAME_XEND_BOLD, '

	118 '<how> BEGIN_ITALICareEND_ITALIC you?')

	119

	120 msg = tr_html.HtmlToMessage('<b>Hello</b><I>Hello</I><b>Hello</b>')

	121 pres = msg.GetPresentableContent()

	122 self.failUnless(pres ==

	123 'BEGIN_BOLD_1HelloEND_BOLD_1BEGIN_ITALICHelloEND_ITALIC'

	124 'BEGIN_BOLD_2HelloEND_BOLD_2')

	125

	126 # Check that nesting (of the <font> tags) is handled correctly - i.e. that

	127 # the closing placeholder numbers match the opening placeholders.

	128 msg = tr_html.HtmlToMessage(

	129 '''<font size=-1><font color=#FF0000>Update!</font> '''

	130 '''<a href='http://desktop.google.com/whatsnew.html?hl=[$~LANG~$]'>'''

	131 '''New Features</a>: Now search PDFs, MP3s, Firefox web history, and '''

	132 '''more</font>''')

	133 pres = msg.GetPresentableContent()

	134 self.failUnless(pres ==

	135 'BEGIN_FONT_1BEGIN_FONT_2Update!END_FONT_2 BEGIN_LINK'

	136 'New FeaturesEND_LINK: Now search PDFs, MP3s, Firefox '

	137 'web history, and moreEND_FONT_1')

	138

	139 msg = tr_html.HtmlToMessage('''<a href='[$~URL~$]'><b>[NUM][CAT]</b></a>''')

	140 pres = msg.GetPresentableContent()

	141 self.failUnless(pres == 'BEGIN_LINKBEGIN_BOLDX_NUM_XX_CAT_XEND_BOLDEND_LINK' )

	142

	143 msg = tr_html.HtmlToMessage(

	144 '''<font size=-1><a class=q onClick='return window.qs?qs(this):1' '''

	145 '''href='http://[WEBSERVER][SEARCH_URI]'>Desktop</a></font>  '''

	146 '''  ''')

	147 pres = msg.GetPresentableContent()

	148 self.failUnless(pres ==

	149 '''BEGIN_FONTBEGIN_LINKDesktopEND_LINKEND_FONTSPACE''')

	150

	151 msg = tr_html.HtmlToMessage(

	152 '''<br><br><center><font size=-2>©2005 Google </font></center>''', 1)

	153 pres = msg.GetPresentableContent()

	154 self.failUnless(pres ==

	155 u'BEGIN_BREAK_1BEGIN_BREAK_2BEGIN_CENTERBEGIN_FONT\xa92005'

	156 u' Google END_FONTEND_CENTER')

	157

	158 msg = tr_html.HtmlToMessage(

	159 ''' - <a class=c href=[$~CACHE~$]>Cached</a>''')

	160 pres = msg.GetPresentableContent()

	161 self.failUnless(pres ==

	162 ' - BEGIN_LINKCachedEND_LINK')

	163

	164 # Check that upper-case tags are handled correctly.

	165 msg = tr_html.HtmlToMessage(

	166 '''You can read the <A HREF='http://desktop.google.com/privacypolicy.'''

	167 '''html?hl=[LANG_CODE]'>Privacy Policy</A> and <A HREF='http://desktop'''

	168 '''.google.com/privacyfaq.html?hl=[LANG_CODE]'>Privacy FAQ</A> online.''')

	169 pres = msg.GetPresentableContent()

	170 self.failUnless(pres ==

	171 'You can read the BEGIN_LINK_1Privacy PolicyEND_LINK_1 and '

	172 'BEGIN_LINK_2Privacy FAQEND_LINK_2 online.')

	173

	174 # Check that tags with linebreaks immediately preceding them are handled

	175 # correctly.

	176 msg = tr_html.HtmlToMessage(

	177 '''You can read the

	178 <A HREF='http://desktop.google.com/privacypolicy.html?hl=[LANG_CODE]'>Privacy Po licy</A>

	179 and <A HREF='http://desktop.google.com/privacyfaq.html?hl=[LANG_CODE]'>Privacy F AQ</A> online.''')

	180 pres = msg.GetPresentableContent()

	181 self.failUnless(pres == '''You can read the

	182 BEGIN_LINK_1Privacy PolicyEND_LINK_1

	183 and BEGIN_LINK_2Privacy FAQEND_LINK_2 online.''')

	184

	185

	186

	187 class TrHtmlUnittest(unittest.TestCase):

	188 def testTable(self):

	189 html = tr_html.TrHtml('''<table class="shaded-header"><tr>

	190 <td class="header-element b expand">Preferences</td>

	191 <td class="header-element s">

	192 <a href="http://desktop.google.com/preferences.html">Preferences Help</a>

	193 </td>

	194 </tr></table>''')

	195 html.Parse()

	196 self.failUnless(html.skeleton_[3].GetMessage().GetPresentableContent() ==

	197 'BEGIN_LINKPreferences HelpEND_LINK')

	198

	199 def testSubmitAttribute(self):

	200 html = tr_html.TrHtml('''</td>

	201 <td class="header-element"><input type=submit value="Save Preferences"

	202 name=submit2></td>

	203 </tr></table>''')

	204 html.Parse()

	205 self.failUnless(html.skeleton_[1].GetMessage().GetPresentableContent() ==

	206 'Save Preferences')

	207

	208 def testWhitespaceAfterInlineTag(self):

	209 '''Test that even if there is whitespace after an inline tag at the start

	210 of a translateable section the inline tag will be included.

	211 '''

	212 html = tr_html.TrHtml('''<label for=DISPLAYNONE><font size=-1> Hello</font>' '')

	213 html.Parse()

	214 self.failUnless(html.skeleton_[1].GetMessage().GetRealContent() ==

	215 '<font size=-1> Hello</font>')

	216

	217 def testSillyHeader(self):

	218 html = tr_html.TrHtml('''[!]

	219 title\tHello

	220 bingo

	221 bongo

	222 bla

	223

	224 <p>Other stuff</p>''')

	225 html.Parse()

	226 content = html.skeleton_[1].GetMessage().GetRealContent()

	227 self.failUnless(content == 'Hello')

	228 self.failUnless(html.skeleton_[-1] == '</p>')

	229 # Right after the translateable the nontranslateable should start with

	230 # a linebreak (this catches a bug we had).

	231 self.failUnless(html.skeleton_[2][0] == '\n')

	232

	233

	234 def testExplicitDescriptions(self):

	235 html = tr_html.TrHtml('Hello [USER]<br/><!-- desc=explicit --><input type="b utton">Go!</input>')

	236 html.Parse()

	237 msg = html.GetCliques()[1].GetMessage()

	238 self.failUnless(msg.GetDescription() == 'explicit')

	239 self.failUnless(msg.GetRealContent() == 'Go!')

	240

	241

	242 def testRegressionInToolbarAbout(self):

	243 html = tr_html.TrHtml.FromFile(

	244 util.PathFromRoot(r'grit/testdata/toolbar_about.html'))

	245 html.Parse()

	246 cliques = html.GetCliques()

	247 for cl in cliques:

	248 content = cl.GetMessage().GetRealContent()

	249 if content.count('De parvis grandis acervus erit'):

	250 self.failIf(content.count('$/translate'))

	251

	252

	253 def HtmlFromFileWithManualCheck(self, f):

	254 html = tr_html.TrHtml.FromFile(f)

	255 html.Parse()

	256

	257 # For manual results inspection only...

	258 list = []

	259 for item in html.skeleton_:

	260 if isinstance(item, types.StringTypes):

	261 list.append(item)

	262 else:

	263 list.append(item.GetMessage().GetPresentableContent())

	264

	265 return html

	266

	267

	268 def testPrivacyHtml(self):

	269 html = self.HtmlFromFileWithManualCheck(

	270 util.PathFromRoot(r'grit/testdata/privacy.html'))

	271

	272 self.failUnless(html.skeleton_[1].GetMessage().GetRealContent() ==

	273 'Privacy and Google Desktop Search')

	274 self.failUnless(html.skeleton_[3].startswith('<'))

	275 self.failUnless(len(html.skeleton_) > 10)

	276

	277

	278 def testPreferencesHtml(self):

	279 html = self.HtmlFromFileWithManualCheck(

	280 util.PathFromRoot(r'grit/testdata/preferences.html'))

	281

	282 # Verify that we don't get '[STATUS-MESSAGE]' as the original content of

	283 # one of the MessageClique objects (it would be a placeholder-only message

	284 # and we're supposed to have stripped those).

	285

	286 for item in filter(lambda x: isinstance(x, clique.MessageClique),

	287 html.skeleton_):

	288 if (item.GetMessage().GetRealContent() == '[STATUS-MESSAGE]' or

	289 item.GetMessage().GetRealContent() == '[ADDIN-DO] [ADDIN-OPTIONS]'):

	290 self.fail()

	291

	292 self.failUnless(len(html.skeleton_) > 100)

	293

	294 def AssertNumberOfTranslateables(self, files, num):

	295 '''Fails if any of the files in files don't have exactly

	296 num translateable sections.

	297

	298 Args:

	299 files: ['file1', 'file2']

	300 num: 3

	301 '''

	302 for f in files:

	303 f = util.PathFromRoot(r'grit/testdata/%s' % f)

	304 html = self.HtmlFromFileWithManualCheck(f)

	305 self.failUnless(len(html.GetCliques()) == num)

	306

	307 def testFewTranslateables(self):

	308 self.AssertNumberOfTranslateables(['browser.html', 'email_thread.html',

	309 'header.html', 'mini.html',

	310 'oneclick.html', 'script.html',

	311 'time_related.html', 'versions.html'], 0)

	312 self.AssertNumberOfTranslateables(['footer.html', 'hover.html'], 1)

	313

	314 def testOtherHtmlFilesForManualInspection(self):

	315 files = [

	316 'about.html', 'bad_browser.html', 'cache_prefix.html',

	317 'cache_prefix_file.html', 'chat_result.html', 'del_footer.html',

	318 'del_header.html', 'deleted.html', 'details.html', 'email_result.html',

	319 'error.html', 'explicit_web.html', 'footer.html',

	320 'homepage.html', 'indexing_speed.html',

	321 'install_prefs.html', 'install_prefs2.html',

	322 'oem_enable.html', 'oem_non_admin.html', 'onebox.html',

	323 'password.html', 'quit_apps.html', 'recrawl.html',

	324 'searchbox.html', 'sidebar_h.html', 'sidebar_v.html', 'status.html',

	325 ]

	326 for f in files:

	327 self.HtmlFromFileWithManualCheck(

	328 util.PathFromRoot(r'grit/testdata/%s' % f))

	329

	330 def testTranslate(self):

	331 # Note that the English translation of documents that use character

	332 # literals (e.g. ©) will not be the same as the original document

	333 # because the character literal will be transformed into the Unicode

	334 # character itself. So for this test we choose some relatively complex

	335 # HTML without character entities (but with   because that's handled

	336 # specially).

	337 html = tr_html.TrHtml(''' <script>

	338 <!--

	339 function checkOffice() { var w = document.getElementById("h7");

	340 var e = document.getElementById("h8"); var o = document.getElementById("h1 0");

	341 if (!(w.checked \|\| e.checked)) { o.checked=0;o.disabled=1;} else {o.disabl ed=0;} }

	342 // -->

	343 </script>

	344 <input type=checkbox [CHECK-DOC] name=DOC id=h7 onclick='checkOffice()'>

	345 <label for=h7> Word</label><br>

	346 <input type=checkbox [CHECK-XLS] name=XLS id=h8 onclick='checkOffice()'>

	347 <label for=h8> Excel</label><br>

	348 <input type=checkbox [CHECK-PPT] name=PPT id=h9>

	349 <label for=h9> PowerPoint</label><br>

	350 </span></td><td nowrap valign=top><span class="s">

	351 <input type=checkbox [CHECK-PDF] name=PDF id=hpdf>

	352 <label for=hpdf> PDF</label><br>

	353 <input type=checkbox [CHECK-TXT] name=TXT id=h6>

	354 <label for=h6> Text, media, and other files</label><br>

	355 </tr>

	356 <tr><td nowrap valign=top colspan=3><span class="s"><br />

	357 <input type=checkbox [CHECK-SECUREOFFICE] name=SECUREOFFICE id=h10>

	358 <label for=h10> Password-protected Office documents (Word, Excel)</label ><br />

	359 <input type=checkbox [DISABLED-HTTPS] [CHECK-HTTPS] name=HTTPS id=h12><l abel

	360 for=h12> Secure pages (HTTPS) in web history</label></span></td></tr>

	361 </table>''')

	362 html.Parse()

	363 trans = html.Translate('en')

	364 if (html.GetText() != trans):

	365 self.fail()

	366

	367

	368 def testHtmlToMessageWithBlockTags(self):

	369 msg = tr_html.HtmlToMessage(

	370 'Hello<p>Howdie<img alt="bingo" src="image.gif">', True)

	371 result = msg.GetPresentableContent()

	372 self.failUnless(

	373 result == 'HelloBEGIN_PARAGRAPHHowdieBEGIN_BLOCKbingoEND_BLOCK')

	374

	375 msg = tr_html.HtmlToMessage(

	376 'Hello<p>Howdie<input type="button" value="bingo">', True)

	377 result = msg.GetPresentableContent()

	378 self.failUnless(

	379 result == 'HelloBEGIN_PARAGRAPHHowdieBEGIN_BLOCKbingoEND_BLOCK')

	380

	381

	382 def testHtmlToMessageRegressions(self):

	383 msg = tr_html.HtmlToMessage(' - ', True)

	384 result = msg.GetPresentableContent()

	385 self.failUnless(result == ' - ')

	386

	387

	388 def testEscapeUnescaped(self):

	389 text = '©  & "<hello>"'

	390 unescaped = util.UnescapeHtml(text)

	391 self.failUnless(unescaped == u'\u00a9\u00a0 & "<hello>"')

	392 escaped_unescaped = util.EscapeHtml(unescaped, True)

	393 self.failUnless(escaped_unescaped ==

	394 u'\u00a9\u00a0 & "<hello>"')

	395

	396 def testRegressionCjkHtmlFile(self):

	397 # TODO(joi) Fix this problem where unquoted attributes that

	398 # have a value that is CJK characters causes the regular expression

	399 # match never to return. (culprit is the _ELEMENT regexp(

	400 if False:

	401 html = self.HtmlFromFileWithManualCheck(util.PathFromRoot(

	402 r'grit/testdata/ko_oem_enable_bug.html'))

	403 self.failUnless(True)

	404

	405 def testRegressionCpuHang(self):

	406 # If this regression occurs, the unit test will never return

	407 html = tr_html.TrHtml(

	408 '''<input type=text size=12 id=advFileTypeEntry [~SHOW-FILETYPE-BOX~] valu e="[EXT]" name=ext>''')

	409 html.Parse()

	410

	411 if __name__ == '__main__':

	412 unittest.main()

OLD	NEW

« no previous file with comments | « grit/gather/tr_html.py ('k') | grit/gather/txt.py » ('j') | no next file with comments »