Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 """Flattens a HTML file by inlining its external resources. | 6 """Flattens a HTML file by inlining its external resources. |
| 7 | 7 |
| 8 This is a small script that takes a HTML file, looks for src attributes | 8 This is a small script that takes a HTML file, looks for src attributes |
| 9 and inlines the specified file, producing one HTML file with no external | 9 and inlines the specified file, producing one HTML file with no external |
| 10 dependencies. It recursively inlines the included files. | 10 dependencies. It recursively inlines the included files. |
| 11 """ | 11 """ |
| 12 | 12 |
| 13 import os | 13 import os |
| 14 import re | 14 import re |
| 15 import sys | 15 import sys |
| 16 import base64 | 16 import base64 |
| 17 import mimetypes | 17 import mimetypes |
| 18 | 18 |
| 19 from grit import lazy_re | 19 from grit import lazy_re |
| 20 from grit import util | 20 from grit import util |
| 21 from grit.format import minifier | |
| 21 | 22 |
| 22 # There is a python bug that makes mimetypes crash if the Windows | 23 # There is a python bug that makes mimetypes crash if the Windows |
| 23 # registry contains non-Latin keys ( http://bugs.python.org/issue9291 | 24 # registry contains non-Latin keys ( http://bugs.python.org/issue9291 |
| 24 # ). Initing manually and blocking external mime-type databases will | 25 # ). Initing manually and blocking external mime-type databases will |
| 25 # prevent that bug and if we add svg manually, it will still give us | 26 # prevent that bug and if we add svg manually, it will still give us |
| 26 # the data we need. | 27 # the data we need. |
| 27 mimetypes.init([]) | 28 mimetypes.init([]) |
| 28 mimetypes.add_type('image/svg+xml', '.svg') | 29 mimetypes.add_type('image/svg+xml', '.svg') |
| 29 | 30 |
| 30 DIST_DEFAULT = 'chromium' | 31 DIST_DEFAULT = 'chromium' |
| (...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 122 | 123 |
| 123 Holds the inlined data and the set of filenames of all the inlined | 124 Holds the inlined data and the set of filenames of all the inlined |
| 124 files. | 125 files. |
| 125 """ | 126 """ |
| 126 def __init__(self, inlined_data, inlined_files): | 127 def __init__(self, inlined_data, inlined_files): |
| 127 self.inlined_data = inlined_data | 128 self.inlined_data = inlined_data |
| 128 self.inlined_files = inlined_files | 129 self.inlined_files = inlined_files |
| 129 | 130 |
| 130 def DoInline( | 131 def DoInline( |
| 131 input_filename, grd_node, allow_external_script=False, | 132 input_filename, grd_node, allow_external_script=False, |
| 132 preprocess_only=False, names_only=False, rewrite_function=None, | 133 preprocess_only=False, names_only=False, strip_whitespace = False, |
|
Dirk Pranke
2016/07/25 21:12:36
nit: no spaces between strip_whitespace=False.
aberent
2016/07/27 09:46:36
Done.
| |
| 133 filename_expansion_function=None): | 134 rewrite_function=None, filename_expansion_function=None): |
| 134 """Helper function that inlines the resources in a specified file. | 135 """Helper function that inlines the resources in a specified file. |
| 135 | 136 |
| 136 Reads input_filename, finds all the src attributes and attempts to | 137 Reads input_filename, finds all the src attributes and attempts to |
| 137 inline the files they are referring to, then returns the result and | 138 inline the files they are referring to, then returns the result and |
| 138 the set of inlined files. | 139 the set of inlined files. |
| 139 | 140 |
| 140 Args: | 141 Args: |
| 141 input_filename: name of file to read in | 142 input_filename: name of file to read in |
| 142 grd_node: html node from the grd file for this include tag | 143 grd_node: html node from the grd file for this include tag |
| 143 preprocess_only: Skip all HTML processing, only handle <if> and <include>. | 144 preprocess_only: Skip all HTML processing, only handle <if> and <include>. |
| 144 names_only: |nil| will be returned for the inlined contents (faster). | 145 names_only: |nil| will be returned for the inlined contents (faster). |
| 146 strip_whitespace: remove whitespace and comments in the input files. | |
| 145 rewrite_function: function(filepath, text, distribution) which will be | 147 rewrite_function: function(filepath, text, distribution) which will be |
| 146 called to rewrite html content before inlining images. | 148 called to rewrite html content before inlining images. |
| 147 filename_expansion_function: function(filename) which will be called to | 149 filename_expansion_function: function(filename) which will be called to |
| 148 rewrite filenames before attempting to read them. | 150 rewrite filenames before attempting to read them. |
| 149 Returns: | 151 Returns: |
| 150 a tuple of the inlined data as a string and the set of filenames | 152 a tuple of the inlined data as a string and the set of filenames |
| 151 of all the inlined files | 153 of all the inlined files |
| 152 """ | 154 """ |
| 153 if filename_expansion_function: | 155 if filename_expansion_function: |
| 154 input_filename = filename_expansion_function(input_filename) | 156 input_filename = filename_expansion_function(input_filename) |
| (...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 213 pos = next_if.end() | 215 pos = next_if.end() |
| 214 | 216 |
| 215 content = str[content_start:end_if.start()] | 217 content = str[content_start:end_if.start()] |
| 216 trailing = str[end_if.end():] | 218 trailing = str[end_if.end():] |
| 217 | 219 |
| 218 if condition_satisfied: | 220 if condition_satisfied: |
| 219 str = leading + CheckConditionalElements(content) + trailing | 221 str = leading + CheckConditionalElements(content) + trailing |
| 220 else: | 222 else: |
| 221 str = leading + trailing | 223 str = leading + trailing |
| 222 | 224 |
| 223 def InlineFileContents(src_match, pattern, inlined_files=inlined_files): | 225 def InlineFileContents(src_match, |
| 226 pattern, | |
| 227 inlined_files=inlined_files, | |
| 228 strip_whitespace=True): | |
| 224 """Helper function to inline external files of various types""" | 229 """Helper function to inline external files of various types""" |
| 225 filepath = GetFilepath(src_match) | 230 filepath = GetFilepath(src_match) |
| 226 if filepath is None: | 231 if filepath is None: |
| 227 return src_match.group(0) | 232 return src_match.group(0) |
| 228 inlined_files.add(filepath) | 233 inlined_files.add(filepath) |
| 229 | 234 |
| 230 if names_only: | 235 if names_only: |
| 231 inlined_files.update(GetResourceFilenames( | 236 inlined_files.update(GetResourceFilenames( |
| 232 filepath, | 237 filepath, |
| 233 allow_external_script, | 238 allow_external_script, |
| 234 rewrite_function, | 239 rewrite_function, |
| 235 filename_expansion_function=filename_expansion_function)) | 240 filename_expansion_function=filename_expansion_function)) |
| 236 return "" | 241 return "" |
| 237 | 242 |
| 238 return pattern % InlineToString( | 243 return pattern % InlineToString( |
| 239 filepath, grd_node, allow_external_script=allow_external_script, | 244 filepath, grd_node, allow_external_script=allow_external_script, |
| 240 filename_expansion_function=filename_expansion_function) | 245 filename_expansion_function=filename_expansion_function) |
| 241 | 246 |
| 242 def InlineIncludeFiles(src_match): | 247 def InlineIncludeFiles(src_match): |
| 243 """Helper function to directly inline generic external files (without | 248 """Helper function to directly inline generic external files (without |
| 244 wrapping them with any kind of tags). | 249 wrapping them with any kind of tags). |
| 245 """ | 250 """ |
| 246 return InlineFileContents(src_match, '%s') | 251 return InlineFileContents(src_match, '%s', strip_whitespace=False) |
| 247 | 252 |
| 248 def InlineScript(match): | 253 def InlineScript(match): |
| 249 """Helper function to inline external script files""" | 254 """Helper function to inline external script files""" |
| 250 attrs = (match.group('attrs1') + match.group('attrs2')).strip() | 255 attrs = (match.group('attrs1') + match.group('attrs2')).strip() |
| 251 if attrs: | 256 if attrs: |
| 252 attrs = ' ' + attrs | 257 attrs = ' ' + attrs |
| 253 return InlineFileContents(match, '<script' + attrs + '>%s</script>') | 258 return InlineFileContents(match, '<script' + attrs + '>%s</script>', |
| 259 strip_whitespace=strip_whitespace) | |
| 254 | 260 |
| 255 def InlineCSSText(text, css_filepath): | 261 def InlineCSSText(text, css_filepath): |
| 256 """Helper function that inlines external resources in CSS text""" | 262 """Helper function that inlines external resources in CSS text""" |
| 257 filepath = os.path.dirname(css_filepath) | 263 filepath = os.path.dirname(css_filepath) |
| 258 # Allow custom modifications before inlining images. | 264 # Allow custom modifications before inlining images. |
| 259 if rewrite_function: | 265 if rewrite_function: |
| 260 text = rewrite_function(filepath, text, distribution) | 266 text = rewrite_function(filepath, text, distribution) |
| 261 text = InlineCSSImages(text, filepath) | 267 text = InlineCSSImages(text, filepath) |
| 262 return InlineCSSImports(text, filepath) | 268 return InlineCSSImports(text, filepath) |
| 263 | 269 |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 314 | 320 |
| 315 | 321 |
| 316 flat_text = util.ReadFile(input_filename, util.BINARY) | 322 flat_text = util.ReadFile(input_filename, util.BINARY) |
| 317 | 323 |
| 318 # Check conditional elements, remove unsatisfied ones from the file. We do | 324 # Check conditional elements, remove unsatisfied ones from the file. We do |
| 319 # this twice. The first pass is so that we don't even bother calling | 325 # this twice. The first pass is so that we don't even bother calling |
| 320 # InlineScript, InlineCSSFile and InlineIncludeFiles on text we're eventually | 326 # InlineScript, InlineCSSFile and InlineIncludeFiles on text we're eventually |
| 321 # going to throw out anyway. | 327 # going to throw out anyway. |
| 322 flat_text = CheckConditionalElements(flat_text) | 328 flat_text = CheckConditionalElements(flat_text) |
| 323 | 329 |
| 330 flat_text = _INCLUDE_RE.sub(InlineIncludeFiles, flat_text) | |
| 331 | |
| 324 if not preprocess_only: | 332 if not preprocess_only: |
| 333 if strip_whitespace: | |
| 334 flat_text = minifier.Minify(flat_text, | |
| 335 os.path.splitext(input_filename)[1]) | |
| 336 | |
| 325 if not allow_external_script: | 337 if not allow_external_script: |
| 326 # We need to inline css and js before we inline images so that image | 338 # We need to inline css and js before we inline images so that image |
| 327 # references gets inlined in the css and js | 339 # references gets inlined in the css and js |
| 328 flat_text = re.sub('<script (?P<attrs1>.*?)src="(?P<filename>[^"\']*)"' + | 340 flat_text = re.sub('<script (?P<attrs1>.*?)src="(?P<filename>[^"\']*)"' + |
| 329 '(?P<attrs2>.*?)></script>', | 341 '(?P<attrs2>.*?)></script>', |
| 330 InlineScript, | 342 InlineScript, |
| 331 flat_text) | 343 flat_text) |
| 332 | 344 |
| 333 flat_text = _STYLESHEET_RE.sub( | 345 flat_text = _STYLESHEET_RE.sub( |
| 334 lambda m: InlineCSSFile(m, '<style>%s</style>'), | 346 lambda m: InlineCSSFile(m, '<style>%s</style>'), |
| 335 flat_text) | 347 flat_text) |
| 336 | 348 |
| 337 flat_text = _INCLUDE_RE.sub(InlineIncludeFiles, flat_text) | |
| 338 | |
| 339 # Check conditional elements, second pass. This catches conditionals in any | 349 # Check conditional elements, second pass. This catches conditionals in any |
| 340 # of the text we just inlined. | 350 # of the text we just inlined. |
| 341 flat_text = CheckConditionalElements(flat_text) | 351 flat_text = CheckConditionalElements(flat_text) |
| 342 | 352 |
| 343 if not preprocess_only: | 353 if not preprocess_only: |
| 344 # Allow custom modifications before inlining images. | 354 # Allow custom modifications before inlining images. |
| 345 if rewrite_function: | 355 if rewrite_function: |
| 346 flat_text = rewrite_function(input_filepath, flat_text, distribution) | 356 flat_text = rewrite_function(input_filepath, flat_text, distribution) |
| 347 flat_text = _SRC_RE.sub(SrcReplace, flat_text) | 357 flat_text = _SRC_RE.sub(SrcReplace, flat_text) |
| 348 | 358 |
| 349 # TODO(arv): Only do this inside <style> tags. | 359 # TODO(arv): Only do this inside <style> tags. |
| 350 flat_text = InlineCSSImages(flat_text) | 360 flat_text = InlineCSSImages(flat_text) |
| 351 | 361 |
| 352 flat_text = _ICON_RE.sub(SrcReplace, flat_text) | 362 flat_text = _ICON_RE.sub(SrcReplace, flat_text) |
| 353 | 363 |
| 354 if names_only: | 364 if names_only: |
| 355 flat_text = None # Will contains garbage if the flag is set anyway. | 365 flat_text = None # Will contains garbage if the flag is set anyway. |
| 356 return InlinedData(flat_text, inlined_files) | 366 return InlinedData(flat_text, inlined_files) |
| 357 | 367 |
| 358 | 368 |
| 359 def InlineToString(input_filename, grd_node, preprocess_only = False, | 369 def InlineToString(input_filename, grd_node, preprocess_only = False, |
| 360 allow_external_script=False, rewrite_function=None, | 370 allow_external_script=False, strip_whitespace = True, |
|
Dirk Pranke
2016/07/25 21:12:36
same nit.
aberent
2016/07/27 09:46:36
Done.
| |
| 361 filename_expansion_function=None): | 371 rewrite_function=None, filename_expansion_function=None): |
| 362 """Inlines the resources in a specified file and returns it as a string. | 372 """Inlines the resources in a specified file and returns it as a string. |
| 363 | 373 |
| 364 Args: | 374 Args: |
| 365 input_filename: name of file to read in | 375 input_filename: name of file to read in |
| 366 grd_node: html node from the grd file for this include tag | 376 grd_node: html node from the grd file for this include tag |
| 367 Returns: | 377 Returns: |
| 368 the inlined data as a string | 378 the inlined data as a string |
| 369 """ | 379 """ |
| 370 try: | 380 try: |
| 371 return DoInline( | 381 return DoInline( |
| 372 input_filename, | 382 input_filename, |
| 373 grd_node, | 383 grd_node, |
| 374 preprocess_only=preprocess_only, | 384 preprocess_only=preprocess_only, |
| 375 allow_external_script=allow_external_script, | 385 allow_external_script=allow_external_script, |
| 386 strip_whitespace = strip_whitespace, | |
|
Dirk Pranke
2016/07/25 21:12:36
same nit.
aberent
2016/07/27 09:46:36
Done.
| |
| 376 rewrite_function=rewrite_function, | 387 rewrite_function=rewrite_function, |
| 377 filename_expansion_function=filename_expansion_function).inlined_data | 388 filename_expansion_function=filename_expansion_function).inlined_data |
| 378 except IOError, e: | 389 except IOError, e: |
| 379 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % | 390 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % |
| 380 (e.filename, input_filename, e.strerror)) | 391 (e.filename, input_filename, e.strerror)) |
| 381 | 392 |
| 382 | 393 |
| 383 def InlineToFile(input_filename, output_filename, grd_node): | 394 def InlineToFile(input_filename, output_filename, grd_node): |
| 384 """Inlines the resources in a specified file and writes it. | 395 """Inlines the resources in a specified file and writes it. |
| 385 | 396 |
| (...skipping 18 matching lines...) Expand all Loading... | |
| 404 rewrite_function=None, | 415 rewrite_function=None, |
| 405 filename_expansion_function=None): | 416 filename_expansion_function=None): |
| 406 """For a grd file, returns a set of all the files that would be inline.""" | 417 """For a grd file, returns a set of all the files that would be inline.""" |
| 407 try: | 418 try: |
| 408 return DoInline( | 419 return DoInline( |
| 409 filename, | 420 filename, |
| 410 None, | 421 None, |
| 411 names_only=True, | 422 names_only=True, |
| 412 preprocess_only=False, | 423 preprocess_only=False, |
| 413 allow_external_script=allow_external_script, | 424 allow_external_script=allow_external_script, |
| 425 strip_whitespace = False, | |
|
Dirk Pranke
2016/07/25 21:12:36
same nit.
aberent
2016/07/27 09:46:36
Done.
| |
| 414 rewrite_function=rewrite_function, | 426 rewrite_function=rewrite_function, |
| 415 filename_expansion_function=filename_expansion_function).inlined_files | 427 filename_expansion_function=filename_expansion_function).inlined_files |
| 416 except IOError, e: | 428 except IOError, e: |
| 417 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % | 429 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % |
| 418 (e.filename, filename, e.strerror)) | 430 (e.filename, filename, e.strerror)) |
| 419 | 431 |
| 420 | 432 |
| 421 def main(): | 433 def main(): |
| 422 if len(sys.argv) <= 2: | 434 if len(sys.argv) <= 2: |
| 423 print "Flattens a HTML file by inlining its external resources.\n" | 435 print "Flattens a HTML file by inlining its external resources.\n" |
| 424 print "html_inline.py inputfile outputfile" | 436 print "html_inline.py inputfile outputfile" |
| 425 else: | 437 else: |
| 426 InlineToFile(sys.argv[1], sys.argv[2], None) | 438 InlineToFile(sys.argv[1], sys.argv[2], None) |
| 427 | 439 |
| 428 if __name__ == '__main__': | 440 if __name__ == '__main__': |
| 429 main() | 441 main() |
| OLD | NEW |