OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Flattens a HTML file by inlining its external resources. | 6 """Flattens a HTML file by inlining its external resources. |
7 | 7 |
8 This is a small script that takes a HTML file, looks for src attributes | 8 This is a small script that takes a HTML file, looks for src attributes |
9 and inlines the specified file, producing one HTML file with no external | 9 and inlines the specified file, producing one HTML file with no external |
10 dependencies. It recursively inlines the included files. | 10 dependencies. It recursively inlines the included files. |
11 """ | 11 """ |
12 | 12 |
13 import os | 13 import os |
14 import re | 14 import re |
15 import sys | 15 import sys |
16 import base64 | 16 import base64 |
17 import mimetypes | 17 import mimetypes |
18 | 18 |
19 from grit import lazy_re | 19 from grit import lazy_re |
20 from grit import util | 20 from grit import util |
| 21 from grit.format import minifier |
21 | 22 |
22 # There is a python bug that makes mimetypes crash if the Windows | 23 # There is a python bug that makes mimetypes crash if the Windows |
23 # registry contains non-Latin keys ( http://bugs.python.org/issue9291 | 24 # registry contains non-Latin keys ( http://bugs.python.org/issue9291 |
24 # ). Initing manually and blocking external mime-type databases will | 25 # ). Initing manually and blocking external mime-type databases will |
25 # prevent that bug and if we add svg manually, it will still give us | 26 # prevent that bug and if we add svg manually, it will still give us |
26 # the data we need. | 27 # the data we need. |
27 mimetypes.init([]) | 28 mimetypes.init([]) |
28 mimetypes.add_type('image/svg+xml', '.svg') | 29 mimetypes.add_type('image/svg+xml', '.svg') |
29 | 30 |
30 DIST_DEFAULT = 'chromium' | 31 DIST_DEFAULT = 'chromium' |
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
122 | 123 |
123 Holds the inlined data and the set of filenames of all the inlined | 124 Holds the inlined data and the set of filenames of all the inlined |
124 files. | 125 files. |
125 """ | 126 """ |
126 def __init__(self, inlined_data, inlined_files): | 127 def __init__(self, inlined_data, inlined_files): |
127 self.inlined_data = inlined_data | 128 self.inlined_data = inlined_data |
128 self.inlined_files = inlined_files | 129 self.inlined_files = inlined_files |
129 | 130 |
130 def DoInline( | 131 def DoInline( |
131 input_filename, grd_node, allow_external_script=False, | 132 input_filename, grd_node, allow_external_script=False, |
132 preprocess_only=False, names_only=False, rewrite_function=None, | 133 preprocess_only=False, names_only=False, strip_whitespace=False, |
133 filename_expansion_function=None): | 134 rewrite_function=None, filename_expansion_function=None): |
134 """Helper function that inlines the resources in a specified file. | 135 """Helper function that inlines the resources in a specified file. |
135 | 136 |
136 Reads input_filename, finds all the src attributes and attempts to | 137 Reads input_filename, finds all the src attributes and attempts to |
137 inline the files they are referring to, then returns the result and | 138 inline the files they are referring to, then returns the result and |
138 the set of inlined files. | 139 the set of inlined files. |
139 | 140 |
140 Args: | 141 Args: |
141 input_filename: name of file to read in | 142 input_filename: name of file to read in |
142 grd_node: html node from the grd file for this include tag | 143 grd_node: html node from the grd file for this include tag |
143 preprocess_only: Skip all HTML processing, only handle <if> and <include>. | 144 preprocess_only: Skip all HTML processing, only handle <if> and <include>. |
144 names_only: |nil| will be returned for the inlined contents (faster). | 145 names_only: |nil| will be returned for the inlined contents (faster). |
| 146 strip_whitespace: remove whitespace and comments in the input files. |
145 rewrite_function: function(filepath, text, distribution) which will be | 147 rewrite_function: function(filepath, text, distribution) which will be |
146 called to rewrite html content before inlining images. | 148 called to rewrite html content before inlining images. |
147 filename_expansion_function: function(filename) which will be called to | 149 filename_expansion_function: function(filename) which will be called to |
148 rewrite filenames before attempting to read them. | 150 rewrite filenames before attempting to read them. |
149 Returns: | 151 Returns: |
150 a tuple of the inlined data as a string and the set of filenames | 152 a tuple of the inlined data as a string and the set of filenames |
151 of all the inlined files | 153 of all the inlined files |
152 """ | 154 """ |
153 if filename_expansion_function: | 155 if filename_expansion_function: |
154 input_filename = filename_expansion_function(input_filename) | 156 input_filename = filename_expansion_function(input_filename) |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
213 pos = next_if.end() | 215 pos = next_if.end() |
214 | 216 |
215 content = str[content_start:end_if.start()] | 217 content = str[content_start:end_if.start()] |
216 trailing = str[end_if.end():] | 218 trailing = str[end_if.end():] |
217 | 219 |
218 if condition_satisfied: | 220 if condition_satisfied: |
219 str = leading + CheckConditionalElements(content) + trailing | 221 str = leading + CheckConditionalElements(content) + trailing |
220 else: | 222 else: |
221 str = leading + trailing | 223 str = leading + trailing |
222 | 224 |
223 def InlineFileContents(src_match, pattern, inlined_files=inlined_files): | 225 def InlineFileContents(src_match, |
| 226 pattern, |
| 227 inlined_files=inlined_files, |
| 228 strip_whitespace=False): |
224 """Helper function to inline external files of various types""" | 229 """Helper function to inline external files of various types""" |
225 filepath = GetFilepath(src_match) | 230 filepath = GetFilepath(src_match) |
226 if filepath is None: | 231 if filepath is None: |
227 return src_match.group(0) | 232 return src_match.group(0) |
228 inlined_files.add(filepath) | 233 inlined_files.add(filepath) |
229 | 234 |
230 if names_only: | 235 if names_only: |
231 inlined_files.update(GetResourceFilenames( | 236 inlined_files.update(GetResourceFilenames( |
232 filepath, | 237 filepath, |
233 allow_external_script, | 238 allow_external_script, |
234 rewrite_function, | 239 rewrite_function, |
235 filename_expansion_function=filename_expansion_function)) | 240 filename_expansion_function=filename_expansion_function)) |
236 return "" | 241 return "" |
237 | 242 |
238 return pattern % InlineToString( | 243 return pattern % InlineToString( |
239 filepath, grd_node, allow_external_script=allow_external_script, | 244 filepath, grd_node, allow_external_script=allow_external_script, |
| 245 strip_whitespace=strip_whitespace, |
240 filename_expansion_function=filename_expansion_function) | 246 filename_expansion_function=filename_expansion_function) |
241 | 247 |
242 def InlineIncludeFiles(src_match): | 248 def InlineIncludeFiles(src_match): |
243 """Helper function to directly inline generic external files (without | 249 """Helper function to directly inline generic external files (without |
244 wrapping them with any kind of tags). | 250 wrapping them with any kind of tags). |
245 """ | 251 """ |
246 return InlineFileContents(src_match, '%s') | 252 return InlineFileContents(src_match, '%s') |
247 | 253 |
248 def InlineScript(match): | 254 def InlineScript(match): |
249 """Helper function to inline external script files""" | 255 """Helper function to inline external script files""" |
250 attrs = (match.group('attrs1') + match.group('attrs2')).strip() | 256 attrs = (match.group('attrs1') + match.group('attrs2')).strip() |
251 if attrs: | 257 if attrs: |
252 attrs = ' ' + attrs | 258 attrs = ' ' + attrs |
253 return InlineFileContents(match, '<script' + attrs + '>%s</script>') | 259 return InlineFileContents(match, '<script' + attrs + '>%s</script>', |
| 260 strip_whitespace=True) |
254 | 261 |
255 def InlineCSSText(text, css_filepath): | 262 def InlineCSSText(text, css_filepath): |
256 """Helper function that inlines external resources in CSS text""" | 263 """Helper function that inlines external resources in CSS text""" |
257 filepath = os.path.dirname(css_filepath) | 264 filepath = os.path.dirname(css_filepath) |
258 # Allow custom modifications before inlining images. | 265 # Allow custom modifications before inlining images. |
259 if rewrite_function: | 266 if rewrite_function: |
260 text = rewrite_function(filepath, text, distribution) | 267 text = rewrite_function(filepath, text, distribution) |
261 text = InlineCSSImages(text, filepath) | 268 text = InlineCSSImages(text, filepath) |
262 return InlineCSSImports(text, filepath) | 269 return InlineCSSImports(text, filepath) |
263 | 270 |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
314 | 321 |
315 | 322 |
316 flat_text = util.ReadFile(input_filename, util.BINARY) | 323 flat_text = util.ReadFile(input_filename, util.BINARY) |
317 | 324 |
318 # Check conditional elements, remove unsatisfied ones from the file. We do | 325 # Check conditional elements, remove unsatisfied ones from the file. We do |
319 # this twice. The first pass is so that we don't even bother calling | 326 # this twice. The first pass is so that we don't even bother calling |
320 # InlineScript, InlineCSSFile and InlineIncludeFiles on text we're eventually | 327 # InlineScript, InlineCSSFile and InlineIncludeFiles on text we're eventually |
321 # going to throw out anyway. | 328 # going to throw out anyway. |
322 flat_text = CheckConditionalElements(flat_text) | 329 flat_text = CheckConditionalElements(flat_text) |
323 | 330 |
| 331 flat_text = _INCLUDE_RE.sub(InlineIncludeFiles, flat_text) |
| 332 |
324 if not preprocess_only: | 333 if not preprocess_only: |
| 334 if strip_whitespace: |
| 335 flat_text = minifier.Minify(flat_text, |
| 336 os.path.splitext(input_filename)[1]) |
| 337 |
325 if not allow_external_script: | 338 if not allow_external_script: |
326 # We need to inline css and js before we inline images so that image | 339 # We need to inline css and js before we inline images so that image |
327 # references gets inlined in the css and js | 340 # references gets inlined in the css and js |
328 flat_text = re.sub('<script (?P<attrs1>.*?)src="(?P<filename>[^"\']*)"' + | 341 flat_text = re.sub('<script (?P<attrs1>.*?)src="(?P<filename>[^"\']*)"' + |
329 '(?P<attrs2>.*?)></script>', | 342 '(?P<attrs2>.*?)></script>', |
330 InlineScript, | 343 InlineScript, |
331 flat_text) | 344 flat_text) |
332 | 345 |
333 flat_text = _STYLESHEET_RE.sub( | 346 flat_text = _STYLESHEET_RE.sub( |
334 lambda m: InlineCSSFile(m, '<style>%s</style>'), | 347 lambda m: InlineCSSFile(m, '<style>%s</style>'), |
335 flat_text) | 348 flat_text) |
336 | 349 |
337 flat_text = _INCLUDE_RE.sub(InlineIncludeFiles, flat_text) | |
338 | |
339 # Check conditional elements, second pass. This catches conditionals in any | 350 # Check conditional elements, second pass. This catches conditionals in any |
340 # of the text we just inlined. | 351 # of the text we just inlined. |
341 flat_text = CheckConditionalElements(flat_text) | 352 flat_text = CheckConditionalElements(flat_text) |
342 | 353 |
343 if not preprocess_only: | 354 if not preprocess_only: |
344 # Allow custom modifications before inlining images. | 355 # Allow custom modifications before inlining images. |
345 if rewrite_function: | 356 if rewrite_function: |
346 flat_text = rewrite_function(input_filepath, flat_text, distribution) | 357 flat_text = rewrite_function(input_filepath, flat_text, distribution) |
347 flat_text = _SRC_RE.sub(SrcReplace, flat_text) | 358 flat_text = _SRC_RE.sub(SrcReplace, flat_text) |
348 | 359 |
349 # TODO(arv): Only do this inside <style> tags. | 360 # TODO(arv): Only do this inside <style> tags. |
350 flat_text = InlineCSSImages(flat_text) | 361 flat_text = InlineCSSImages(flat_text) |
351 | 362 |
352 flat_text = _ICON_RE.sub(SrcReplace, flat_text) | 363 flat_text = _ICON_RE.sub(SrcReplace, flat_text) |
353 | 364 |
354 if names_only: | 365 if names_only: |
355 flat_text = None # Will contains garbage if the flag is set anyway. | 366 flat_text = None # Will contains garbage if the flag is set anyway. |
356 return InlinedData(flat_text, inlined_files) | 367 return InlinedData(flat_text, inlined_files) |
357 | 368 |
358 | 369 |
359 def InlineToString(input_filename, grd_node, preprocess_only = False, | 370 def InlineToString(input_filename, grd_node, preprocess_only = False, |
360 allow_external_script=False, rewrite_function=None, | 371 allow_external_script=False, strip_whitespace=False, |
361 filename_expansion_function=None): | 372 rewrite_function=None, filename_expansion_function=None): |
362 """Inlines the resources in a specified file and returns it as a string. | 373 """Inlines the resources in a specified file and returns it as a string. |
363 | 374 |
364 Args: | 375 Args: |
365 input_filename: name of file to read in | 376 input_filename: name of file to read in |
366 grd_node: html node from the grd file for this include tag | 377 grd_node: html node from the grd file for this include tag |
367 Returns: | 378 Returns: |
368 the inlined data as a string | 379 the inlined data as a string |
369 """ | 380 """ |
370 try: | 381 try: |
371 return DoInline( | 382 return DoInline( |
372 input_filename, | 383 input_filename, |
373 grd_node, | 384 grd_node, |
374 preprocess_only=preprocess_only, | 385 preprocess_only=preprocess_only, |
375 allow_external_script=allow_external_script, | 386 allow_external_script=allow_external_script, |
| 387 strip_whitespace=strip_whitespace, |
376 rewrite_function=rewrite_function, | 388 rewrite_function=rewrite_function, |
377 filename_expansion_function=filename_expansion_function).inlined_data | 389 filename_expansion_function=filename_expansion_function).inlined_data |
378 except IOError, e: | 390 except IOError, e: |
379 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % | 391 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % |
380 (e.filename, input_filename, e.strerror)) | 392 (e.filename, input_filename, e.strerror)) |
381 | 393 |
382 | 394 |
383 def InlineToFile(input_filename, output_filename, grd_node): | 395 def InlineToFile(input_filename, output_filename, grd_node): |
384 """Inlines the resources in a specified file and writes it. | 396 """Inlines the resources in a specified file and writes it. |
385 | 397 |
(...skipping 18 matching lines...) Expand all Loading... |
404 rewrite_function=None, | 416 rewrite_function=None, |
405 filename_expansion_function=None): | 417 filename_expansion_function=None): |
406 """For a grd file, returns a set of all the files that would be inline.""" | 418 """For a grd file, returns a set of all the files that would be inline.""" |
407 try: | 419 try: |
408 return DoInline( | 420 return DoInline( |
409 filename, | 421 filename, |
410 None, | 422 None, |
411 names_only=True, | 423 names_only=True, |
412 preprocess_only=False, | 424 preprocess_only=False, |
413 allow_external_script=allow_external_script, | 425 allow_external_script=allow_external_script, |
| 426 strip_whitespace=False, |
414 rewrite_function=rewrite_function, | 427 rewrite_function=rewrite_function, |
415 filename_expansion_function=filename_expansion_function).inlined_files | 428 filename_expansion_function=filename_expansion_function).inlined_files |
416 except IOError, e: | 429 except IOError, e: |
417 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % | 430 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % |
418 (e.filename, filename, e.strerror)) | 431 (e.filename, filename, e.strerror)) |
419 | 432 |
420 | 433 |
421 def main(): | 434 def main(): |
422 if len(sys.argv) <= 2: | 435 if len(sys.argv) <= 2: |
423 print "Flattens a HTML file by inlining its external resources.\n" | 436 print "Flattens a HTML file by inlining its external resources.\n" |
424 print "html_inline.py inputfile outputfile" | 437 print "html_inline.py inputfile outputfile" |
425 else: | 438 else: |
426 InlineToFile(sys.argv[1], sys.argv[2], None) | 439 InlineToFile(sys.argv[1], sys.argv[2], None) |
427 | 440 |
428 if __name__ == '__main__': | 441 if __name__ == '__main__': |
429 main() | 442 main() |
OLD | NEW |