Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(73)

Side by Side Diff: grit/gather/tr_html_unittest.py

Issue 7994004: Initial source commit to grit-i18n project. (Closed) Base URL: http://grit-i18n.googlecode.com/svn/trunk/
Patch Set: Created 9 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « grit/gather/tr_html.py ('k') | grit/gather/txt.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 #!/usr/bin/python2.4
2 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 '''Unit tests for grit.gather.tr_html'''
7
8
9 import os
10 import sys
11 if __name__ == '__main__':
12 sys.path.append(os.path.join(os.path.dirname(sys.argv[0]), '../..'))
13
14 import types
15 import unittest
16
17 from grit.gather import tr_html
18 from grit import clique
19 from grit import util
20
21
22 class ParserUnittest(unittest.TestCase):
23 def testChunking(self):
24 p = tr_html.HtmlChunks()
25 chunks = p.Parse('<p>Hello <b>dear</b> how <i>are</i>you?<p>Fine!')
26 self.failUnless(chunks == [
27 (False, '<p>', ''), (True, 'Hello <b>dear</b> how <i>are</i>you?', ''),
28 (False, '<p>', ''), (True, 'Fine!', '')])
29
30 chunks = p.Parse('<p> Hello <b>dear</b> how <i>are</i>you? <p>Fine!')
31 self.failUnless(chunks == [
32 (False, '<p> ', ''), (True, 'Hello <b>dear</b> how <i>are</i>you?', ''),
33 (False, ' <p>', ''), (True, 'Fine!', '')])
34
35 chunks = p.Parse('<p> Hello <b>dear how <i>are you? <p> Fine!')
36 self.failUnless(chunks == [
37 (False, '<p> ', ''), (True, 'Hello <b>dear how <i>are you?', ''),
38 (False, ' <p> ', ''), (True, 'Fine!', '')])
39
40 # Ensure translateable sections that start with inline tags contain
41 # the starting inline tag.
42 chunks = p.Parse('<b>Hello!</b> how are you?<p><i>I am fine.</i>')
43 self.failUnless(chunks == [
44 (True, '<b>Hello!</b> how are you?', ''), (False, '<p>', ''),
45 (True, '<i>I am fine.</i>', '')])
46
47 # Ensure translateable sections that end with inline tags contain
48 # the ending inline tag.
49 chunks = p.Parse("Hello! How are <b>you?</b><p><i>I'm fine!</i>")
50 self.failUnless(chunks == [
51 (True, 'Hello! How are <b>you?</b>', ''), (False, '<p>', ''),
52 (True, "<i>I'm fine!</i>", '')])
53
54 # Check capitals and explicit descriptions
55 chunks = p.Parse('<!-- desc=bingo! --><B>Hello!</B> how are you?<P><I>I am f ine.</I>')
56 self.failUnless(chunks == [
57 (True, '<B>Hello!</B> how are you?', 'bingo!'), (False, '<P>', ''),
58 (True, '<I>I am fine.</I>', '')])
59 chunks = p.Parse('<B><!-- desc=bingo! -->Hello!</B> how are you?<P><I>I am f ine.</I>')
60 self.failUnless(chunks == [
61 (True, '<B>Hello!</B> how are you?', 'bingo!'), (False, '<P>', ''),
62 (True, '<I>I am fine.</I>', '')])
63 # Linebreaks get changed to spaces just like any other HTML content
64 chunks = p.Parse('<B>Hello!</B> <!-- desc=bi\nngo\n! -->how are you?<P><I>I am fine.</I>')
65 self.failUnless(chunks == [
66 (True, '<B>Hello!</B> how are you?', 'bi ngo !'), (False, '<P>', ''),
67 (True, '<I>I am fine.</I>', '')])
68
69 # In this case, because the explicit description appears after the first
70 # translateable, it will actually apply to the second translateable.
71 chunks = p.Parse('<B>Hello!</B> how are you?<!-- desc=bingo! --><P><I>I am f ine.</I>')
72 self.failUnless(chunks == [
73 (True, '<B>Hello!</B> how are you?', ''), (False, '<P>', ''),
74 (True, '<I>I am fine.</I>', 'bingo!')])
75
76 # Check that replaceables within block tags (where attributes would go) are
77 # handled correctly.
78 chunks = p.Parse('<b>Hello!</b> how are you?<p [BINGO] [$~BONGO~$]>'
79 '<i>I am fine.</i>')
80 self.failUnless(chunks == [
81 (True, '<b>Hello!</b> how are you?', ''),
82 (False, '<p [BINGO] [$~BONGO~$]>', ''),
83 (True, '<i>I am fine.</i>', '')])
84
85 # Check that the contents of preformatted tags preserve line breaks.
86 chunks = p.Parse('<textarea>Hello\nthere\nhow\nare\nyou?</textarea>')
87 self.failUnless(chunks == [(False, '<textarea>', ''),
88 (True, 'Hello\nthere\nhow\nare\nyou?', ''), (False, '</textarea>', '')])
89
90 # ...and that other tags' line breaks are converted to spaces
91 chunks = p.Parse('<p>Hello\nthere\nhow\nare\nyou?</p>')
92 self.failUnless(chunks == [(False, '<p>', ''),
93 (True, 'Hello there how are you?', ''), (False, '</p>', '')])
94
95 def testTranslateableAttributes(self):
96 p = tr_html.HtmlChunks()
97
98 # Check that the translateable attributes in <img>, <submit>, <button> and
99 # <text> elements buttons are handled correctly.
100 chunks = p.Parse('<img src=bingo.jpg alt="hello there">'
101 '<input type=submit value="hello">'
102 '<input type="button" value="hello">'
103 '<input type=\'text\' value=\'Howdie\'>')
104 self.failUnless(chunks == [
105 (False, '<img src=bingo.jpg alt="', ''), (True, 'hello there', ''),
106 (False, '"><input type=submit value="', ''), (True, 'hello', ''),
107 (False, '"><input type="button" value="', ''), (True, 'hello', ''),
108 (False, '"><input type=\'text\' value=\'', ''), (True, 'Howdie', ''),
109 (False, '\'>', '')])
110
111
112 def testTranslateableHtmlToMessage(self):
113 msg = tr_html.HtmlToMessage(
114 'Hello <b>[USERNAME]</b>, &lt;how&gt;&nbsp;<i>are</i> you?')
115 pres = msg.GetPresentableContent()
116 self.failUnless(pres ==
117 'Hello BEGIN_BOLDX_USERNAME_XEND_BOLD, '
118 '<how>&nbsp;BEGIN_ITALICareEND_ITALIC you?')
119
120 msg = tr_html.HtmlToMessage('<b>Hello</b><I>Hello</I><b>Hello</b>')
121 pres = msg.GetPresentableContent()
122 self.failUnless(pres ==
123 'BEGIN_BOLD_1HelloEND_BOLD_1BEGIN_ITALICHelloEND_ITALIC'
124 'BEGIN_BOLD_2HelloEND_BOLD_2')
125
126 # Check that nesting (of the <font> tags) is handled correctly - i.e. that
127 # the closing placeholder numbers match the opening placeholders.
128 msg = tr_html.HtmlToMessage(
129 '''<font size=-1><font color=#FF0000>Update!</font> '''
130 '''<a href='http://desktop.google.com/whatsnew.html?hl=[$~LANG~$]'>'''
131 '''New Features</a>: Now search PDFs, MP3s, Firefox web history, and '''
132 '''more</font>''')
133 pres = msg.GetPresentableContent()
134 self.failUnless(pres ==
135 'BEGIN_FONT_1BEGIN_FONT_2Update!END_FONT_2 BEGIN_LINK'
136 'New FeaturesEND_LINK: Now search PDFs, MP3s, Firefox '
137 'web history, and moreEND_FONT_1')
138
139 msg = tr_html.HtmlToMessage('''<a href='[$~URL~$]'><b>[NUM][CAT]</b></a>''')
140 pres = msg.GetPresentableContent()
141 self.failUnless(pres == 'BEGIN_LINKBEGIN_BOLDX_NUM_XX_CAT_XEND_BOLDEND_LINK' )
142
143 msg = tr_html.HtmlToMessage(
144 '''<font size=-1><a class=q onClick='return window.qs?qs(this):1' '''
145 '''href='http://[WEBSERVER][SEARCH_URI]'>Desktop</a></font>&nbsp;&nbsp;'''
146 '''&nbsp;&nbsp;''')
147 pres = msg.GetPresentableContent()
148 self.failUnless(pres ==
149 '''BEGIN_FONTBEGIN_LINKDesktopEND_LINKEND_FONTSPACE''')
150
151 msg = tr_html.HtmlToMessage(
152 '''<br><br><center><font size=-2>&copy;2005 Google </font></center>''', 1)
153 pres = msg.GetPresentableContent()
154 self.failUnless(pres ==
155 u'BEGIN_BREAK_1BEGIN_BREAK_2BEGIN_CENTERBEGIN_FONT\xa92005'
156 u' Google END_FONTEND_CENTER')
157
158 msg = tr_html.HtmlToMessage(
159 '''&nbsp;-&nbsp;<a class=c href=[$~CACHE~$]>Cached</a>''')
160 pres = msg.GetPresentableContent()
161 self.failUnless(pres ==
162 '&nbsp;-&nbsp;BEGIN_LINKCachedEND_LINK')
163
164 # Check that upper-case tags are handled correctly.
165 msg = tr_html.HtmlToMessage(
166 '''You can read the <A HREF='http://desktop.google.com/privacypolicy.'''
167 '''html?hl=[LANG_CODE]'>Privacy Policy</A> and <A HREF='http://desktop'''
168 '''.google.com/privacyfaq.html?hl=[LANG_CODE]'>Privacy FAQ</A> online.''')
169 pres = msg.GetPresentableContent()
170 self.failUnless(pres ==
171 'You can read the BEGIN_LINK_1Privacy PolicyEND_LINK_1 and '
172 'BEGIN_LINK_2Privacy FAQEND_LINK_2 online.')
173
174 # Check that tags with linebreaks immediately preceding them are handled
175 # correctly.
176 msg = tr_html.HtmlToMessage(
177 '''You can read the
178 <A HREF='http://desktop.google.com/privacypolicy.html?hl=[LANG_CODE]'>Privacy Po licy</A>
179 and <A HREF='http://desktop.google.com/privacyfaq.html?hl=[LANG_CODE]'>Privacy F AQ</A> online.''')
180 pres = msg.GetPresentableContent()
181 self.failUnless(pres == '''You can read the
182 BEGIN_LINK_1Privacy PolicyEND_LINK_1
183 and BEGIN_LINK_2Privacy FAQEND_LINK_2 online.''')
184
185
186
187 class TrHtmlUnittest(unittest.TestCase):
188 def testTable(self):
189 html = tr_html.TrHtml('''<table class="shaded-header"><tr>
190 <td class="header-element b expand">Preferences</td>
191 <td class="header-element s">
192 <a href="http://desktop.google.com/preferences.html">Preferences&nbsp;Help</a>
193 </td>
194 </tr></table>''')
195 html.Parse()
196 self.failUnless(html.skeleton_[3].GetMessage().GetPresentableContent() ==
197 'BEGIN_LINKPreferences&nbsp;HelpEND_LINK')
198
199 def testSubmitAttribute(self):
200 html = tr_html.TrHtml('''</td>
201 <td class="header-element"><input type=submit value="Save Preferences"
202 name=submit2></td>
203 </tr></table>''')
204 html.Parse()
205 self.failUnless(html.skeleton_[1].GetMessage().GetPresentableContent() ==
206 'Save Preferences')
207
208 def testWhitespaceAfterInlineTag(self):
209 '''Test that even if there is whitespace after an inline tag at the start
210 of a translateable section the inline tag will be included.
211 '''
212 html = tr_html.TrHtml('''<label for=DISPLAYNONE><font size=-1> Hello</font>' '')
213 html.Parse()
214 self.failUnless(html.skeleton_[1].GetMessage().GetRealContent() ==
215 '<font size=-1> Hello</font>')
216
217 def testSillyHeader(self):
218 html = tr_html.TrHtml('''[!]
219 title\tHello
220 bingo
221 bongo
222 bla
223
224 <p>Other stuff</p>''')
225 html.Parse()
226 content = html.skeleton_[1].GetMessage().GetRealContent()
227 self.failUnless(content == 'Hello')
228 self.failUnless(html.skeleton_[-1] == '</p>')
229 # Right after the translateable the nontranslateable should start with
230 # a linebreak (this catches a bug we had).
231 self.failUnless(html.skeleton_[2][0] == '\n')
232
233
234 def testExplicitDescriptions(self):
235 html = tr_html.TrHtml('Hello [USER]<br/><!-- desc=explicit --><input type="b utton">Go!</input>')
236 html.Parse()
237 msg = html.GetCliques()[1].GetMessage()
238 self.failUnless(msg.GetDescription() == 'explicit')
239 self.failUnless(msg.GetRealContent() == 'Go!')
240
241
242 def testRegressionInToolbarAbout(self):
243 html = tr_html.TrHtml.FromFile(
244 util.PathFromRoot(r'grit/testdata/toolbar_about.html'))
245 html.Parse()
246 cliques = html.GetCliques()
247 for cl in cliques:
248 content = cl.GetMessage().GetRealContent()
249 if content.count('De parvis grandis acervus erit'):
250 self.failIf(content.count('$/translate'))
251
252
253 def HtmlFromFileWithManualCheck(self, f):
254 html = tr_html.TrHtml.FromFile(f)
255 html.Parse()
256
257 # For manual results inspection only...
258 list = []
259 for item in html.skeleton_:
260 if isinstance(item, types.StringTypes):
261 list.append(item)
262 else:
263 list.append(item.GetMessage().GetPresentableContent())
264
265 return html
266
267
268 def testPrivacyHtml(self):
269 html = self.HtmlFromFileWithManualCheck(
270 util.PathFromRoot(r'grit/testdata/privacy.html'))
271
272 self.failUnless(html.skeleton_[1].GetMessage().GetRealContent() ==
273 'Privacy and Google Desktop Search')
274 self.failUnless(html.skeleton_[3].startswith('<'))
275 self.failUnless(len(html.skeleton_) > 10)
276
277
278 def testPreferencesHtml(self):
279 html = self.HtmlFromFileWithManualCheck(
280 util.PathFromRoot(r'grit/testdata/preferences.html'))
281
282 # Verify that we don't get '[STATUS-MESSAGE]' as the original content of
283 # one of the MessageClique objects (it would be a placeholder-only message
284 # and we're supposed to have stripped those).
285
286 for item in filter(lambda x: isinstance(x, clique.MessageClique),
287 html.skeleton_):
288 if (item.GetMessage().GetRealContent() == '[STATUS-MESSAGE]' or
289 item.GetMessage().GetRealContent() == '[ADDIN-DO] [ADDIN-OPTIONS]'):
290 self.fail()
291
292 self.failUnless(len(html.skeleton_) > 100)
293
294 def AssertNumberOfTranslateables(self, files, num):
295 '''Fails if any of the files in files don't have exactly
296 num translateable sections.
297
298 Args:
299 files: ['file1', 'file2']
300 num: 3
301 '''
302 for f in files:
303 f = util.PathFromRoot(r'grit/testdata/%s' % f)
304 html = self.HtmlFromFileWithManualCheck(f)
305 self.failUnless(len(html.GetCliques()) == num)
306
307 def testFewTranslateables(self):
308 self.AssertNumberOfTranslateables(['browser.html', 'email_thread.html',
309 'header.html', 'mini.html',
310 'oneclick.html', 'script.html',
311 'time_related.html', 'versions.html'], 0)
312 self.AssertNumberOfTranslateables(['footer.html', 'hover.html'], 1)
313
314 def testOtherHtmlFilesForManualInspection(self):
315 files = [
316 'about.html', 'bad_browser.html', 'cache_prefix.html',
317 'cache_prefix_file.html', 'chat_result.html', 'del_footer.html',
318 'del_header.html', 'deleted.html', 'details.html', 'email_result.html',
319 'error.html', 'explicit_web.html', 'footer.html',
320 'homepage.html', 'indexing_speed.html',
321 'install_prefs.html', 'install_prefs2.html',
322 'oem_enable.html', 'oem_non_admin.html', 'onebox.html',
323 'password.html', 'quit_apps.html', 'recrawl.html',
324 'searchbox.html', 'sidebar_h.html', 'sidebar_v.html', 'status.html',
325 ]
326 for f in files:
327 self.HtmlFromFileWithManualCheck(
328 util.PathFromRoot(r'grit/testdata/%s' % f))
329
330 def testTranslate(self):
331 # Note that the English translation of documents that use character
332 # literals (e.g. &copy;) will not be the same as the original document
333 # because the character literal will be transformed into the Unicode
334 # character itself. So for this test we choose some relatively complex
335 # HTML without character entities (but with &nbsp; because that's handled
336 # specially).
337 html = tr_html.TrHtml(''' <script>
338 <!--
339 function checkOffice() { var w = document.getElementById("h7");
340 var e = document.getElementById("h8"); var o = document.getElementById("h1 0");
341 if (!(w.checked || e.checked)) { o.checked=0;o.disabled=1;} else {o.disabl ed=0;} }
342 // -->
343 </script>
344 <input type=checkbox [CHECK-DOC] name=DOC id=h7 onclick='checkOffice()'>
345 <label for=h7> Word</label><br>
346 <input type=checkbox [CHECK-XLS] name=XLS id=h8 onclick='checkOffice()'>
347 <label for=h8> Excel</label><br>
348 <input type=checkbox [CHECK-PPT] name=PPT id=h9>
349 <label for=h9> PowerPoint</label><br>
350 </span></td><td nowrap valign=top><span class="s">
351 <input type=checkbox [CHECK-PDF] name=PDF id=hpdf>
352 <label for=hpdf> PDF</label><br>
353 <input type=checkbox [CHECK-TXT] name=TXT id=h6>
354 <label for=h6> Text, media, and other files</label><br>
355 </tr>&nbsp;&nbsp;
356 <tr><td nowrap valign=top colspan=3><span class="s"><br />
357 <input type=checkbox [CHECK-SECUREOFFICE] name=SECUREOFFICE id=h10>
358 <label for=h10> Password-protected Office documents (Word, Excel)</label ><br />
359 <input type=checkbox [DISABLED-HTTPS] [CHECK-HTTPS] name=HTTPS id=h12><l abel
360 for=h12> Secure pages (HTTPS) in web history</label></span></td></tr>
361 </table>''')
362 html.Parse()
363 trans = html.Translate('en')
364 if (html.GetText() != trans):
365 self.fail()
366
367
368 def testHtmlToMessageWithBlockTags(self):
369 msg = tr_html.HtmlToMessage(
370 'Hello<p>Howdie<img alt="bingo" src="image.gif">', True)
371 result = msg.GetPresentableContent()
372 self.failUnless(
373 result == 'HelloBEGIN_PARAGRAPHHowdieBEGIN_BLOCKbingoEND_BLOCK')
374
375 msg = tr_html.HtmlToMessage(
376 'Hello<p>Howdie<input type="button" value="bingo">', True)
377 result = msg.GetPresentableContent()
378 self.failUnless(
379 result == 'HelloBEGIN_PARAGRAPHHowdieBEGIN_BLOCKbingoEND_BLOCK')
380
381
382 def testHtmlToMessageRegressions(self):
383 msg = tr_html.HtmlToMessage(' - ', True)
384 result = msg.GetPresentableContent()
385 self.failUnless(result == ' - ')
386
387
388 def testEscapeUnescaped(self):
389 text = '&copy;&nbsp; & &quot;&lt;hello&gt;&quot;'
390 unescaped = util.UnescapeHtml(text)
391 self.failUnless(unescaped == u'\u00a9\u00a0 & "<hello>"')
392 escaped_unescaped = util.EscapeHtml(unescaped, True)
393 self.failUnless(escaped_unescaped ==
394 u'\u00a9\u00a0 &amp; &quot;&lt;hello&gt;&quot;')
395
396 def testRegressionCjkHtmlFile(self):
397 # TODO(joi) Fix this problem where unquoted attributes that
398 # have a value that is CJK characters causes the regular expression
399 # match never to return. (culprit is the _ELEMENT regexp(
400 if False:
401 html = self.HtmlFromFileWithManualCheck(util.PathFromRoot(
402 r'grit/testdata/ko_oem_enable_bug.html'))
403 self.failUnless(True)
404
405 def testRegressionCpuHang(self):
406 # If this regression occurs, the unit test will never return
407 html = tr_html.TrHtml(
408 '''<input type=text size=12 id=advFileTypeEntry [~SHOW-FILETYPE-BOX~] valu e="[EXT]" name=ext>''')
409 html.Parse()
410
411 if __name__ == '__main__':
412 unittest.main()
OLDNEW
« no previous file with comments | « grit/gather/tr_html.py ('k') | grit/gather/txt.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698