OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/python2.4 |
| 2 # Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 6 '''Unit tests for grit.gather.tr_html''' |
| 7 |
| 8 |
| 9 import os |
| 10 import sys |
| 11 if __name__ == '__main__': |
| 12 sys.path.append(os.path.join(os.path.dirname(sys.argv[0]), '../..')) |
| 13 |
| 14 import types |
| 15 import unittest |
| 16 |
| 17 from grit.gather import tr_html |
| 18 from grit import clique |
| 19 from grit import util |
| 20 |
| 21 |
| 22 class ParserUnittest(unittest.TestCase): |
| 23 def testChunking(self): |
| 24 p = tr_html.HtmlChunks() |
| 25 chunks = p.Parse('<p>Hello <b>dear</b> how <i>are</i>you?<p>Fine!') |
| 26 self.failUnless(chunks == [ |
| 27 (False, '<p>', ''), (True, 'Hello <b>dear</b> how <i>are</i>you?', ''), |
| 28 (False, '<p>', ''), (True, 'Fine!', '')]) |
| 29 |
| 30 chunks = p.Parse('<p> Hello <b>dear</b> how <i>are</i>you? <p>Fine!') |
| 31 self.failUnless(chunks == [ |
| 32 (False, '<p> ', ''), (True, 'Hello <b>dear</b> how <i>are</i>you?', ''), |
| 33 (False, ' <p>', ''), (True, 'Fine!', '')]) |
| 34 |
| 35 chunks = p.Parse('<p> Hello <b>dear how <i>are you? <p> Fine!') |
| 36 self.failUnless(chunks == [ |
| 37 (False, '<p> ', ''), (True, 'Hello <b>dear how <i>are you?', ''), |
| 38 (False, ' <p> ', ''), (True, 'Fine!', '')]) |
| 39 |
| 40 # Ensure translateable sections that start with inline tags contain |
| 41 # the starting inline tag. |
| 42 chunks = p.Parse('<b>Hello!</b> how are you?<p><i>I am fine.</i>') |
| 43 self.failUnless(chunks == [ |
| 44 (True, '<b>Hello!</b> how are you?', ''), (False, '<p>', ''), |
| 45 (True, '<i>I am fine.</i>', '')]) |
| 46 |
| 47 # Ensure translateable sections that end with inline tags contain |
| 48 # the ending inline tag. |
| 49 chunks = p.Parse("Hello! How are <b>you?</b><p><i>I'm fine!</i>") |
| 50 self.failUnless(chunks == [ |
| 51 (True, 'Hello! How are <b>you?</b>', ''), (False, '<p>', ''), |
| 52 (True, "<i>I'm fine!</i>", '')]) |
| 53 |
| 54 # Check capitals and explicit descriptions |
| 55 chunks = p.Parse('<!-- desc=bingo! --><B>Hello!</B> how are you?<P><I>I am f
ine.</I>') |
| 56 self.failUnless(chunks == [ |
| 57 (True, '<B>Hello!</B> how are you?', 'bingo!'), (False, '<P>', ''), |
| 58 (True, '<I>I am fine.</I>', '')]) |
| 59 chunks = p.Parse('<B><!-- desc=bingo! -->Hello!</B> how are you?<P><I>I am f
ine.</I>') |
| 60 self.failUnless(chunks == [ |
| 61 (True, '<B>Hello!</B> how are you?', 'bingo!'), (False, '<P>', ''), |
| 62 (True, '<I>I am fine.</I>', '')]) |
| 63 # Linebreaks get changed to spaces just like any other HTML content |
| 64 chunks = p.Parse('<B>Hello!</B> <!-- desc=bi\nngo\n! -->how are you?<P><I>I
am fine.</I>') |
| 65 self.failUnless(chunks == [ |
| 66 (True, '<B>Hello!</B> how are you?', 'bi ngo !'), (False, '<P>', ''), |
| 67 (True, '<I>I am fine.</I>', '')]) |
| 68 |
| 69 # In this case, because the explicit description appears after the first |
| 70 # translateable, it will actually apply to the second translateable. |
| 71 chunks = p.Parse('<B>Hello!</B> how are you?<!-- desc=bingo! --><P><I>I am f
ine.</I>') |
| 72 self.failUnless(chunks == [ |
| 73 (True, '<B>Hello!</B> how are you?', ''), (False, '<P>', ''), |
| 74 (True, '<I>I am fine.</I>', 'bingo!')]) |
| 75 |
| 76 # Check that replaceables within block tags (where attributes would go) are |
| 77 # handled correctly. |
| 78 chunks = p.Parse('<b>Hello!</b> how are you?<p [BINGO] [$~BONGO~$]>' |
| 79 '<i>I am fine.</i>') |
| 80 self.failUnless(chunks == [ |
| 81 (True, '<b>Hello!</b> how are you?', ''), |
| 82 (False, '<p [BINGO] [$~BONGO~$]>', ''), |
| 83 (True, '<i>I am fine.</i>', '')]) |
| 84 |
| 85 # Check that the contents of preformatted tags preserve line breaks. |
| 86 chunks = p.Parse('<textarea>Hello\nthere\nhow\nare\nyou?</textarea>') |
| 87 self.failUnless(chunks == [(False, '<textarea>', ''), |
| 88 (True, 'Hello\nthere\nhow\nare\nyou?', ''), (False, '</textarea>', '')]) |
| 89 |
| 90 # ...and that other tags' line breaks are converted to spaces |
| 91 chunks = p.Parse('<p>Hello\nthere\nhow\nare\nyou?</p>') |
| 92 self.failUnless(chunks == [(False, '<p>', ''), |
| 93 (True, 'Hello there how are you?', ''), (False, '</p>', '')]) |
| 94 |
| 95 def testTranslateableAttributes(self): |
| 96 p = tr_html.HtmlChunks() |
| 97 |
| 98 # Check that the translateable attributes in <img>, <submit>, <button> and |
| 99 # <text> elements buttons are handled correctly. |
| 100 chunks = p.Parse('<img src=bingo.jpg alt="hello there">' |
| 101 '<input type=submit value="hello">' |
| 102 '<input type="button" value="hello">' |
| 103 '<input type=\'text\' value=\'Howdie\'>') |
| 104 self.failUnless(chunks == [ |
| 105 (False, '<img src=bingo.jpg alt="', ''), (True, 'hello there', ''), |
| 106 (False, '"><input type=submit value="', ''), (True, 'hello', ''), |
| 107 (False, '"><input type="button" value="', ''), (True, 'hello', ''), |
| 108 (False, '"><input type=\'text\' value=\'', ''), (True, 'Howdie', ''), |
| 109 (False, '\'>', '')]) |
| 110 |
| 111 |
| 112 def testTranslateableHtmlToMessage(self): |
| 113 msg = tr_html.HtmlToMessage( |
| 114 'Hello <b>[USERNAME]</b>, <how> <i>are</i> you?') |
| 115 pres = msg.GetPresentableContent() |
| 116 self.failUnless(pres == |
| 117 'Hello BEGIN_BOLDX_USERNAME_XEND_BOLD, ' |
| 118 '<how> BEGIN_ITALICareEND_ITALIC you?') |
| 119 |
| 120 msg = tr_html.HtmlToMessage('<b>Hello</b><I>Hello</I><b>Hello</b>') |
| 121 pres = msg.GetPresentableContent() |
| 122 self.failUnless(pres == |
| 123 'BEGIN_BOLD_1HelloEND_BOLD_1BEGIN_ITALICHelloEND_ITALIC' |
| 124 'BEGIN_BOLD_2HelloEND_BOLD_2') |
| 125 |
| 126 # Check that nesting (of the <font> tags) is handled correctly - i.e. that |
| 127 # the closing placeholder numbers match the opening placeholders. |
| 128 msg = tr_html.HtmlToMessage( |
| 129 '''<font size=-1><font color=#FF0000>Update!</font> ''' |
| 130 '''<a href='http://desktop.google.com/whatsnew.html?hl=[$~LANG~$]'>''' |
| 131 '''New Features</a>: Now search PDFs, MP3s, Firefox web history, and ''' |
| 132 '''more</font>''') |
| 133 pres = msg.GetPresentableContent() |
| 134 self.failUnless(pres == |
| 135 'BEGIN_FONT_1BEGIN_FONT_2Update!END_FONT_2 BEGIN_LINK' |
| 136 'New FeaturesEND_LINK: Now search PDFs, MP3s, Firefox ' |
| 137 'web history, and moreEND_FONT_1') |
| 138 |
| 139 msg = tr_html.HtmlToMessage('''<a href='[$~URL~$]'><b>[NUM][CAT]</b></a>''') |
| 140 pres = msg.GetPresentableContent() |
| 141 self.failUnless(pres == 'BEGIN_LINKBEGIN_BOLDX_NUM_XX_CAT_XEND_BOLDEND_LINK'
) |
| 142 |
| 143 msg = tr_html.HtmlToMessage( |
| 144 '''<font size=-1><a class=q onClick='return window.qs?qs(this):1' ''' |
| 145 '''href='http://[WEBSERVER][SEARCH_URI]'>Desktop</a></font> ''' |
| 146 ''' ''') |
| 147 pres = msg.GetPresentableContent() |
| 148 self.failUnless(pres == |
| 149 '''BEGIN_FONTBEGIN_LINKDesktopEND_LINKEND_FONTSPACE''') |
| 150 |
| 151 msg = tr_html.HtmlToMessage( |
| 152 '''<br><br><center><font size=-2>©2005 Google </font></center>''', 1) |
| 153 pres = msg.GetPresentableContent() |
| 154 self.failUnless(pres == |
| 155 u'BEGIN_BREAK_1BEGIN_BREAK_2BEGIN_CENTERBEGIN_FONT\xa92005' |
| 156 u' Google END_FONTEND_CENTER') |
| 157 |
| 158 msg = tr_html.HtmlToMessage( |
| 159 ''' - <a class=c href=[$~CACHE~$]>Cached</a>''') |
| 160 pres = msg.GetPresentableContent() |
| 161 self.failUnless(pres == |
| 162 ' - BEGIN_LINKCachedEND_LINK') |
| 163 |
| 164 # Check that upper-case tags are handled correctly. |
| 165 msg = tr_html.HtmlToMessage( |
| 166 '''You can read the <A HREF='http://desktop.google.com/privacypolicy.''' |
| 167 '''html?hl=[LANG_CODE]'>Privacy Policy</A> and <A HREF='http://desktop''' |
| 168 '''.google.com/privacyfaq.html?hl=[LANG_CODE]'>Privacy FAQ</A> online.''') |
| 169 pres = msg.GetPresentableContent() |
| 170 self.failUnless(pres == |
| 171 'You can read the BEGIN_LINK_1Privacy PolicyEND_LINK_1 and ' |
| 172 'BEGIN_LINK_2Privacy FAQEND_LINK_2 online.') |
| 173 |
| 174 # Check that tags with linebreaks immediately preceding them are handled |
| 175 # correctly. |
| 176 msg = tr_html.HtmlToMessage( |
| 177 '''You can read the |
| 178 <A HREF='http://desktop.google.com/privacypolicy.html?hl=[LANG_CODE]'>Privacy Po
licy</A> |
| 179 and <A HREF='http://desktop.google.com/privacyfaq.html?hl=[LANG_CODE]'>Privacy F
AQ</A> online.''') |
| 180 pres = msg.GetPresentableContent() |
| 181 self.failUnless(pres == '''You can read the |
| 182 BEGIN_LINK_1Privacy PolicyEND_LINK_1 |
| 183 and BEGIN_LINK_2Privacy FAQEND_LINK_2 online.''') |
| 184 |
| 185 |
| 186 |
| 187 class TrHtmlUnittest(unittest.TestCase): |
| 188 def testTable(self): |
| 189 html = tr_html.TrHtml('''<table class="shaded-header"><tr> |
| 190 <td class="header-element b expand">Preferences</td> |
| 191 <td class="header-element s"> |
| 192 <a href="http://desktop.google.com/preferences.html">Preferences Help</a> |
| 193 </td> |
| 194 </tr></table>''') |
| 195 html.Parse() |
| 196 self.failUnless(html.skeleton_[3].GetMessage().GetPresentableContent() == |
| 197 'BEGIN_LINKPreferences HelpEND_LINK') |
| 198 |
| 199 def testSubmitAttribute(self): |
| 200 html = tr_html.TrHtml('''</td> |
| 201 <td class="header-element"><input type=submit value="Save Preferences" |
| 202 name=submit2></td> |
| 203 </tr></table>''') |
| 204 html.Parse() |
| 205 self.failUnless(html.skeleton_[1].GetMessage().GetPresentableContent() == |
| 206 'Save Preferences') |
| 207 |
| 208 def testWhitespaceAfterInlineTag(self): |
| 209 '''Test that even if there is whitespace after an inline tag at the start |
| 210 of a translateable section the inline tag will be included. |
| 211 ''' |
| 212 html = tr_html.TrHtml('''<label for=DISPLAYNONE><font size=-1> Hello</font>'
'') |
| 213 html.Parse() |
| 214 self.failUnless(html.skeleton_[1].GetMessage().GetRealContent() == |
| 215 '<font size=-1> Hello</font>') |
| 216 |
| 217 def testSillyHeader(self): |
| 218 html = tr_html.TrHtml('''[!] |
| 219 title\tHello |
| 220 bingo |
| 221 bongo |
| 222 bla |
| 223 |
| 224 <p>Other stuff</p>''') |
| 225 html.Parse() |
| 226 content = html.skeleton_[1].GetMessage().GetRealContent() |
| 227 self.failUnless(content == 'Hello') |
| 228 self.failUnless(html.skeleton_[-1] == '</p>') |
| 229 # Right after the translateable the nontranslateable should start with |
| 230 # a linebreak (this catches a bug we had). |
| 231 self.failUnless(html.skeleton_[2][0] == '\n') |
| 232 |
| 233 |
| 234 def testExplicitDescriptions(self): |
| 235 html = tr_html.TrHtml('Hello [USER]<br/><!-- desc=explicit --><input type="b
utton">Go!</input>') |
| 236 html.Parse() |
| 237 msg = html.GetCliques()[1].GetMessage() |
| 238 self.failUnless(msg.GetDescription() == 'explicit') |
| 239 self.failUnless(msg.GetRealContent() == 'Go!') |
| 240 |
| 241 |
| 242 def testRegressionInToolbarAbout(self): |
| 243 html = tr_html.TrHtml.FromFile( |
| 244 util.PathFromRoot(r'grit/testdata/toolbar_about.html')) |
| 245 html.Parse() |
| 246 cliques = html.GetCliques() |
| 247 for cl in cliques: |
| 248 content = cl.GetMessage().GetRealContent() |
| 249 if content.count('De parvis grandis acervus erit'): |
| 250 self.failIf(content.count('$/translate')) |
| 251 |
| 252 |
| 253 def HtmlFromFileWithManualCheck(self, f): |
| 254 html = tr_html.TrHtml.FromFile(f) |
| 255 html.Parse() |
| 256 |
| 257 # For manual results inspection only... |
| 258 list = [] |
| 259 for item in html.skeleton_: |
| 260 if isinstance(item, types.StringTypes): |
| 261 list.append(item) |
| 262 else: |
| 263 list.append(item.GetMessage().GetPresentableContent()) |
| 264 |
| 265 return html |
| 266 |
| 267 |
| 268 def testPrivacyHtml(self): |
| 269 html = self.HtmlFromFileWithManualCheck( |
| 270 util.PathFromRoot(r'grit/testdata/privacy.html')) |
| 271 |
| 272 self.failUnless(html.skeleton_[1].GetMessage().GetRealContent() == |
| 273 'Privacy and Google Desktop Search') |
| 274 self.failUnless(html.skeleton_[3].startswith('<')) |
| 275 self.failUnless(len(html.skeleton_) > 10) |
| 276 |
| 277 |
| 278 def testPreferencesHtml(self): |
| 279 html = self.HtmlFromFileWithManualCheck( |
| 280 util.PathFromRoot(r'grit/testdata/preferences.html')) |
| 281 |
| 282 # Verify that we don't get '[STATUS-MESSAGE]' as the original content of |
| 283 # one of the MessageClique objects (it would be a placeholder-only message |
| 284 # and we're supposed to have stripped those). |
| 285 |
| 286 for item in filter(lambda x: isinstance(x, clique.MessageClique), |
| 287 html.skeleton_): |
| 288 if (item.GetMessage().GetRealContent() == '[STATUS-MESSAGE]' or |
| 289 item.GetMessage().GetRealContent() == '[ADDIN-DO] [ADDIN-OPTIONS]'): |
| 290 self.fail() |
| 291 |
| 292 self.failUnless(len(html.skeleton_) > 100) |
| 293 |
| 294 def AssertNumberOfTranslateables(self, files, num): |
| 295 '''Fails if any of the files in files don't have exactly |
| 296 num translateable sections. |
| 297 |
| 298 Args: |
| 299 files: ['file1', 'file2'] |
| 300 num: 3 |
| 301 ''' |
| 302 for f in files: |
| 303 f = util.PathFromRoot(r'grit/testdata/%s' % f) |
| 304 html = self.HtmlFromFileWithManualCheck(f) |
| 305 self.failUnless(len(html.GetCliques()) == num) |
| 306 |
| 307 def testFewTranslateables(self): |
| 308 self.AssertNumberOfTranslateables(['browser.html', 'email_thread.html', |
| 309 'header.html', 'mini.html', |
| 310 'oneclick.html', 'script.html', |
| 311 'time_related.html', 'versions.html'], 0) |
| 312 self.AssertNumberOfTranslateables(['footer.html', 'hover.html'], 1) |
| 313 |
| 314 def testOtherHtmlFilesForManualInspection(self): |
| 315 files = [ |
| 316 'about.html', 'bad_browser.html', 'cache_prefix.html', |
| 317 'cache_prefix_file.html', 'chat_result.html', 'del_footer.html', |
| 318 'del_header.html', 'deleted.html', 'details.html', 'email_result.html', |
| 319 'error.html', 'explicit_web.html', 'footer.html', |
| 320 'homepage.html', 'indexing_speed.html', |
| 321 'install_prefs.html', 'install_prefs2.html', |
| 322 'oem_enable.html', 'oem_non_admin.html', 'onebox.html', |
| 323 'password.html', 'quit_apps.html', 'recrawl.html', |
| 324 'searchbox.html', 'sidebar_h.html', 'sidebar_v.html', 'status.html', |
| 325 ] |
| 326 for f in files: |
| 327 self.HtmlFromFileWithManualCheck( |
| 328 util.PathFromRoot(r'grit/testdata/%s' % f)) |
| 329 |
| 330 def testTranslate(self): |
| 331 # Note that the English translation of documents that use character |
| 332 # literals (e.g. ©) will not be the same as the original document |
| 333 # because the character literal will be transformed into the Unicode |
| 334 # character itself. So for this test we choose some relatively complex |
| 335 # HTML without character entities (but with because that's handled |
| 336 # specially). |
| 337 html = tr_html.TrHtml(''' <script> |
| 338 <!-- |
| 339 function checkOffice() { var w = document.getElementById("h7"); |
| 340 var e = document.getElementById("h8"); var o = document.getElementById("h1
0"); |
| 341 if (!(w.checked || e.checked)) { o.checked=0;o.disabled=1;} else {o.disabl
ed=0;} } |
| 342 // --> |
| 343 </script> |
| 344 <input type=checkbox [CHECK-DOC] name=DOC id=h7 onclick='checkOffice()'> |
| 345 <label for=h7> Word</label><br> |
| 346 <input type=checkbox [CHECK-XLS] name=XLS id=h8 onclick='checkOffice()'> |
| 347 <label for=h8> Excel</label><br> |
| 348 <input type=checkbox [CHECK-PPT] name=PPT id=h9> |
| 349 <label for=h9> PowerPoint</label><br> |
| 350 </span></td><td nowrap valign=top><span class="s"> |
| 351 <input type=checkbox [CHECK-PDF] name=PDF id=hpdf> |
| 352 <label for=hpdf> PDF</label><br> |
| 353 <input type=checkbox [CHECK-TXT] name=TXT id=h6> |
| 354 <label for=h6> Text, media, and other files</label><br> |
| 355 </tr> |
| 356 <tr><td nowrap valign=top colspan=3><span class="s"><br /> |
| 357 <input type=checkbox [CHECK-SECUREOFFICE] name=SECUREOFFICE id=h10> |
| 358 <label for=h10> Password-protected Office documents (Word, Excel)</label
><br /> |
| 359 <input type=checkbox [DISABLED-HTTPS] [CHECK-HTTPS] name=HTTPS id=h12><l
abel |
| 360 for=h12> Secure pages (HTTPS) in web history</label></span></td></tr> |
| 361 </table>''') |
| 362 html.Parse() |
| 363 trans = html.Translate('en') |
| 364 if (html.GetText() != trans): |
| 365 self.fail() |
| 366 |
| 367 |
| 368 def testHtmlToMessageWithBlockTags(self): |
| 369 msg = tr_html.HtmlToMessage( |
| 370 'Hello<p>Howdie<img alt="bingo" src="image.gif">', True) |
| 371 result = msg.GetPresentableContent() |
| 372 self.failUnless( |
| 373 result == 'HelloBEGIN_PARAGRAPHHowdieBEGIN_BLOCKbingoEND_BLOCK') |
| 374 |
| 375 msg = tr_html.HtmlToMessage( |
| 376 'Hello<p>Howdie<input type="button" value="bingo">', True) |
| 377 result = msg.GetPresentableContent() |
| 378 self.failUnless( |
| 379 result == 'HelloBEGIN_PARAGRAPHHowdieBEGIN_BLOCKbingoEND_BLOCK') |
| 380 |
| 381 |
| 382 def testHtmlToMessageRegressions(self): |
| 383 msg = tr_html.HtmlToMessage(' - ', True) |
| 384 result = msg.GetPresentableContent() |
| 385 self.failUnless(result == ' - ') |
| 386 |
| 387 |
| 388 def testEscapeUnescaped(self): |
| 389 text = '© & "<hello>"' |
| 390 unescaped = util.UnescapeHtml(text) |
| 391 self.failUnless(unescaped == u'\u00a9\u00a0 & "<hello>"') |
| 392 escaped_unescaped = util.EscapeHtml(unescaped, True) |
| 393 self.failUnless(escaped_unescaped == |
| 394 u'\u00a9\u00a0 & "<hello>"') |
| 395 |
| 396 def testRegressionCjkHtmlFile(self): |
| 397 # TODO(joi) Fix this problem where unquoted attributes that |
| 398 # have a value that is CJK characters causes the regular expression |
| 399 # match never to return. (culprit is the _ELEMENT regexp( |
| 400 if False: |
| 401 html = self.HtmlFromFileWithManualCheck(util.PathFromRoot( |
| 402 r'grit/testdata/ko_oem_enable_bug.html')) |
| 403 self.failUnless(True) |
| 404 |
| 405 def testRegressionCpuHang(self): |
| 406 # If this regression occurs, the unit test will never return |
| 407 html = tr_html.TrHtml( |
| 408 '''<input type=text size=12 id=advFileTypeEntry [~SHOW-FILETYPE-BOX~] valu
e="[EXT]" name=ext>''') |
| 409 html.Parse() |
| 410 |
| 411 if __name__ == '__main__': |
| 412 unittest.main() |
OLD | NEW |