OLD | NEW |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Flattens a HTML file by inlining its external resources. | 6 """Flattens a HTML file by inlining its external resources. |
7 | 7 |
8 This is a small script that takes a HTML file, looks for src attributes | 8 This is a small script that takes a HTML file, looks for src attributes |
9 and inlines the specified file, producing one HTML file with no external | 9 and inlines the specified file, producing one HTML file with no external |
10 dependencies. It recursively inlines the included files. | 10 dependencies. It recursively inlines the included files. |
11 """ | 11 """ |
12 | 12 |
13 import os | 13 import os |
14 import re | 14 import re |
15 import sys | 15 import sys |
16 import base64 | 16 import base64 |
17 import mimetypes | 17 import mimetypes |
18 | 18 |
19 from grit import lazy_re | 19 from grit import lazy_re |
20 from grit import util | 20 from grit import util |
21 from grit.format import minifier | |
21 | 22 |
22 # There is a python bug that makes mimetypes crash if the Windows | 23 # There is a python bug that makes mimetypes crash if the Windows |
23 # registry contains non-Latin keys ( http://bugs.python.org/issue9291 | 24 # registry contains non-Latin keys ( http://bugs.python.org/issue9291 |
24 # ). Initing manually and blocking external mime-type databases will | 25 # ). Initing manually and blocking external mime-type databases will |
25 # prevent that bug and if we add svg manually, it will still give us | 26 # prevent that bug and if we add svg manually, it will still give us |
26 # the data we need. | 27 # the data we need. |
27 mimetypes.init([]) | 28 mimetypes.init([]) |
28 mimetypes.add_type('image/svg+xml', '.svg') | 29 mimetypes.add_type('image/svg+xml', '.svg') |
29 | 30 |
30 DIST_DEFAULT = 'chromium' | 31 DIST_DEFAULT = 'chromium' |
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
122 | 123 |
123 Holds the inlined data and the set of filenames of all the inlined | 124 Holds the inlined data and the set of filenames of all the inlined |
124 files. | 125 files. |
125 """ | 126 """ |
126 def __init__(self, inlined_data, inlined_files): | 127 def __init__(self, inlined_data, inlined_files): |
127 self.inlined_data = inlined_data | 128 self.inlined_data = inlined_data |
128 self.inlined_files = inlined_files | 129 self.inlined_files = inlined_files |
129 | 130 |
130 def DoInline( | 131 def DoInline( |
131 input_filename, grd_node, allow_external_script=False, | 132 input_filename, grd_node, allow_external_script=False, |
132 preprocess_only=False, names_only=False, rewrite_function=None, | 133 preprocess_only=False, names_only=False, strip_whitespace = False, |
Dirk Pranke
2016/07/25 21:12:36
nit: no spaces between strip_whitespace=False.
aberent
2016/07/27 09:46:36
Done.
| |
133 filename_expansion_function=None): | 134 rewrite_function=None, filename_expansion_function=None): |
134 """Helper function that inlines the resources in a specified file. | 135 """Helper function that inlines the resources in a specified file. |
135 | 136 |
136 Reads input_filename, finds all the src attributes and attempts to | 137 Reads input_filename, finds all the src attributes and attempts to |
137 inline the files they are referring to, then returns the result and | 138 inline the files they are referring to, then returns the result and |
138 the set of inlined files. | 139 the set of inlined files. |
139 | 140 |
140 Args: | 141 Args: |
141 input_filename: name of file to read in | 142 input_filename: name of file to read in |
142 grd_node: html node from the grd file for this include tag | 143 grd_node: html node from the grd file for this include tag |
143 preprocess_only: Skip all HTML processing, only handle <if> and <include>. | 144 preprocess_only: Skip all HTML processing, only handle <if> and <include>. |
144 names_only: |nil| will be returned for the inlined contents (faster). | 145 names_only: |nil| will be returned for the inlined contents (faster). |
146 strip_whitespace: remove whitespace and comments in the input files. | |
145 rewrite_function: function(filepath, text, distribution) which will be | 147 rewrite_function: function(filepath, text, distribution) which will be |
146 called to rewrite html content before inlining images. | 148 called to rewrite html content before inlining images. |
147 filename_expansion_function: function(filename) which will be called to | 149 filename_expansion_function: function(filename) which will be called to |
148 rewrite filenames before attempting to read them. | 150 rewrite filenames before attempting to read them. |
149 Returns: | 151 Returns: |
150 a tuple of the inlined data as a string and the set of filenames | 152 a tuple of the inlined data as a string and the set of filenames |
151 of all the inlined files | 153 of all the inlined files |
152 """ | 154 """ |
153 if filename_expansion_function: | 155 if filename_expansion_function: |
154 input_filename = filename_expansion_function(input_filename) | 156 input_filename = filename_expansion_function(input_filename) |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
213 pos = next_if.end() | 215 pos = next_if.end() |
214 | 216 |
215 content = str[content_start:end_if.start()] | 217 content = str[content_start:end_if.start()] |
216 trailing = str[end_if.end():] | 218 trailing = str[end_if.end():] |
217 | 219 |
218 if condition_satisfied: | 220 if condition_satisfied: |
219 str = leading + CheckConditionalElements(content) + trailing | 221 str = leading + CheckConditionalElements(content) + trailing |
220 else: | 222 else: |
221 str = leading + trailing | 223 str = leading + trailing |
222 | 224 |
223 def InlineFileContents(src_match, pattern, inlined_files=inlined_files): | 225 def InlineFileContents(src_match, |
226 pattern, | |
227 inlined_files=inlined_files, | |
228 strip_whitespace=True): | |
224 """Helper function to inline external files of various types""" | 229 """Helper function to inline external files of various types""" |
225 filepath = GetFilepath(src_match) | 230 filepath = GetFilepath(src_match) |
226 if filepath is None: | 231 if filepath is None: |
227 return src_match.group(0) | 232 return src_match.group(0) |
228 inlined_files.add(filepath) | 233 inlined_files.add(filepath) |
229 | 234 |
230 if names_only: | 235 if names_only: |
231 inlined_files.update(GetResourceFilenames( | 236 inlined_files.update(GetResourceFilenames( |
232 filepath, | 237 filepath, |
233 allow_external_script, | 238 allow_external_script, |
234 rewrite_function, | 239 rewrite_function, |
235 filename_expansion_function=filename_expansion_function)) | 240 filename_expansion_function=filename_expansion_function)) |
236 return "" | 241 return "" |
237 | 242 |
238 return pattern % InlineToString( | 243 return pattern % InlineToString( |
239 filepath, grd_node, allow_external_script=allow_external_script, | 244 filepath, grd_node, allow_external_script=allow_external_script, |
240 filename_expansion_function=filename_expansion_function) | 245 filename_expansion_function=filename_expansion_function) |
241 | 246 |
242 def InlineIncludeFiles(src_match): | 247 def InlineIncludeFiles(src_match): |
243 """Helper function to directly inline generic external files (without | 248 """Helper function to directly inline generic external files (without |
244 wrapping them with any kind of tags). | 249 wrapping them with any kind of tags). |
245 """ | 250 """ |
246 return InlineFileContents(src_match, '%s') | 251 return InlineFileContents(src_match, '%s', strip_whitespace=False) |
247 | 252 |
248 def InlineScript(match): | 253 def InlineScript(match): |
249 """Helper function to inline external script files""" | 254 """Helper function to inline external script files""" |
250 attrs = (match.group('attrs1') + match.group('attrs2')).strip() | 255 attrs = (match.group('attrs1') + match.group('attrs2')).strip() |
251 if attrs: | 256 if attrs: |
252 attrs = ' ' + attrs | 257 attrs = ' ' + attrs |
253 return InlineFileContents(match, '<script' + attrs + '>%s</script>') | 258 return InlineFileContents(match, '<script' + attrs + '>%s</script>', |
259 strip_whitespace=strip_whitespace) | |
254 | 260 |
255 def InlineCSSText(text, css_filepath): | 261 def InlineCSSText(text, css_filepath): |
256 """Helper function that inlines external resources in CSS text""" | 262 """Helper function that inlines external resources in CSS text""" |
257 filepath = os.path.dirname(css_filepath) | 263 filepath = os.path.dirname(css_filepath) |
258 # Allow custom modifications before inlining images. | 264 # Allow custom modifications before inlining images. |
259 if rewrite_function: | 265 if rewrite_function: |
260 text = rewrite_function(filepath, text, distribution) | 266 text = rewrite_function(filepath, text, distribution) |
261 text = InlineCSSImages(text, filepath) | 267 text = InlineCSSImages(text, filepath) |
262 return InlineCSSImports(text, filepath) | 268 return InlineCSSImports(text, filepath) |
263 | 269 |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
314 | 320 |
315 | 321 |
316 flat_text = util.ReadFile(input_filename, util.BINARY) | 322 flat_text = util.ReadFile(input_filename, util.BINARY) |
317 | 323 |
318 # Check conditional elements, remove unsatisfied ones from the file. We do | 324 # Check conditional elements, remove unsatisfied ones from the file. We do |
319 # this twice. The first pass is so that we don't even bother calling | 325 # this twice. The first pass is so that we don't even bother calling |
320 # InlineScript, InlineCSSFile and InlineIncludeFiles on text we're eventually | 326 # InlineScript, InlineCSSFile and InlineIncludeFiles on text we're eventually |
321 # going to throw out anyway. | 327 # going to throw out anyway. |
322 flat_text = CheckConditionalElements(flat_text) | 328 flat_text = CheckConditionalElements(flat_text) |
323 | 329 |
330 flat_text = _INCLUDE_RE.sub(InlineIncludeFiles, flat_text) | |
331 | |
324 if not preprocess_only: | 332 if not preprocess_only: |
333 if strip_whitespace: | |
334 flat_text = minifier.Minify(flat_text, | |
335 os.path.splitext(input_filename)[1]) | |
336 | |
325 if not allow_external_script: | 337 if not allow_external_script: |
326 # We need to inline css and js before we inline images so that image | 338 # We need to inline css and js before we inline images so that image |
327 # references gets inlined in the css and js | 339 # references gets inlined in the css and js |
328 flat_text = re.sub('<script (?P<attrs1>.*?)src="(?P<filename>[^"\']*)"' + | 340 flat_text = re.sub('<script (?P<attrs1>.*?)src="(?P<filename>[^"\']*)"' + |
329 '(?P<attrs2>.*?)></script>', | 341 '(?P<attrs2>.*?)></script>', |
330 InlineScript, | 342 InlineScript, |
331 flat_text) | 343 flat_text) |
332 | 344 |
333 flat_text = _STYLESHEET_RE.sub( | 345 flat_text = _STYLESHEET_RE.sub( |
334 lambda m: InlineCSSFile(m, '<style>%s</style>'), | 346 lambda m: InlineCSSFile(m, '<style>%s</style>'), |
335 flat_text) | 347 flat_text) |
336 | 348 |
337 flat_text = _INCLUDE_RE.sub(InlineIncludeFiles, flat_text) | |
338 | |
339 # Check conditional elements, second pass. This catches conditionals in any | 349 # Check conditional elements, second pass. This catches conditionals in any |
340 # of the text we just inlined. | 350 # of the text we just inlined. |
341 flat_text = CheckConditionalElements(flat_text) | 351 flat_text = CheckConditionalElements(flat_text) |
342 | 352 |
343 if not preprocess_only: | 353 if not preprocess_only: |
344 # Allow custom modifications before inlining images. | 354 # Allow custom modifications before inlining images. |
345 if rewrite_function: | 355 if rewrite_function: |
346 flat_text = rewrite_function(input_filepath, flat_text, distribution) | 356 flat_text = rewrite_function(input_filepath, flat_text, distribution) |
347 flat_text = _SRC_RE.sub(SrcReplace, flat_text) | 357 flat_text = _SRC_RE.sub(SrcReplace, flat_text) |
348 | 358 |
349 # TODO(arv): Only do this inside <style> tags. | 359 # TODO(arv): Only do this inside <style> tags. |
350 flat_text = InlineCSSImages(flat_text) | 360 flat_text = InlineCSSImages(flat_text) |
351 | 361 |
352 flat_text = _ICON_RE.sub(SrcReplace, flat_text) | 362 flat_text = _ICON_RE.sub(SrcReplace, flat_text) |
353 | 363 |
354 if names_only: | 364 if names_only: |
355 flat_text = None # Will contains garbage if the flag is set anyway. | 365 flat_text = None # Will contains garbage if the flag is set anyway. |
356 return InlinedData(flat_text, inlined_files) | 366 return InlinedData(flat_text, inlined_files) |
357 | 367 |
358 | 368 |
359 def InlineToString(input_filename, grd_node, preprocess_only = False, | 369 def InlineToString(input_filename, grd_node, preprocess_only = False, |
360 allow_external_script=False, rewrite_function=None, | 370 allow_external_script=False, strip_whitespace = True, |
Dirk Pranke
2016/07/25 21:12:36
same nit.
aberent
2016/07/27 09:46:36
Done.
| |
361 filename_expansion_function=None): | 371 rewrite_function=None, filename_expansion_function=None): |
362 """Inlines the resources in a specified file and returns it as a string. | 372 """Inlines the resources in a specified file and returns it as a string. |
363 | 373 |
364 Args: | 374 Args: |
365 input_filename: name of file to read in | 375 input_filename: name of file to read in |
366 grd_node: html node from the grd file for this include tag | 376 grd_node: html node from the grd file for this include tag |
367 Returns: | 377 Returns: |
368 the inlined data as a string | 378 the inlined data as a string |
369 """ | 379 """ |
370 try: | 380 try: |
371 return DoInline( | 381 return DoInline( |
372 input_filename, | 382 input_filename, |
373 grd_node, | 383 grd_node, |
374 preprocess_only=preprocess_only, | 384 preprocess_only=preprocess_only, |
375 allow_external_script=allow_external_script, | 385 allow_external_script=allow_external_script, |
386 strip_whitespace = strip_whitespace, | |
Dirk Pranke
2016/07/25 21:12:36
same nit.
aberent
2016/07/27 09:46:36
Done.
| |
376 rewrite_function=rewrite_function, | 387 rewrite_function=rewrite_function, |
377 filename_expansion_function=filename_expansion_function).inlined_data | 388 filename_expansion_function=filename_expansion_function).inlined_data |
378 except IOError, e: | 389 except IOError, e: |
379 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % | 390 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % |
380 (e.filename, input_filename, e.strerror)) | 391 (e.filename, input_filename, e.strerror)) |
381 | 392 |
382 | 393 |
383 def InlineToFile(input_filename, output_filename, grd_node): | 394 def InlineToFile(input_filename, output_filename, grd_node): |
384 """Inlines the resources in a specified file and writes it. | 395 """Inlines the resources in a specified file and writes it. |
385 | 396 |
(...skipping 18 matching lines...) Expand all Loading... | |
404 rewrite_function=None, | 415 rewrite_function=None, |
405 filename_expansion_function=None): | 416 filename_expansion_function=None): |
406 """For a grd file, returns a set of all the files that would be inline.""" | 417 """For a grd file, returns a set of all the files that would be inline.""" |
407 try: | 418 try: |
408 return DoInline( | 419 return DoInline( |
409 filename, | 420 filename, |
410 None, | 421 None, |
411 names_only=True, | 422 names_only=True, |
412 preprocess_only=False, | 423 preprocess_only=False, |
413 allow_external_script=allow_external_script, | 424 allow_external_script=allow_external_script, |
425 strip_whitespace = False, | |
Dirk Pranke
2016/07/25 21:12:36
same nit.
aberent
2016/07/27 09:46:36
Done.
| |
414 rewrite_function=rewrite_function, | 426 rewrite_function=rewrite_function, |
415 filename_expansion_function=filename_expansion_function).inlined_files | 427 filename_expansion_function=filename_expansion_function).inlined_files |
416 except IOError, e: | 428 except IOError, e: |
417 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % | 429 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % |
418 (e.filename, filename, e.strerror)) | 430 (e.filename, filename, e.strerror)) |
419 | 431 |
420 | 432 |
421 def main(): | 433 def main(): |
422 if len(sys.argv) <= 2: | 434 if len(sys.argv) <= 2: |
423 print "Flattens a HTML file by inlining its external resources.\n" | 435 print "Flattens a HTML file by inlining its external resources.\n" |
424 print "html_inline.py inputfile outputfile" | 436 print "html_inline.py inputfile outputfile" |
425 else: | 437 else: |
426 InlineToFile(sys.argv[1], sys.argv[2], None) | 438 InlineToFile(sys.argv[1], sys.argv[2], None) |
427 | 439 |
428 if __name__ == '__main__': | 440 if __name__ == '__main__': |
429 main() | 441 main() |
OLD | NEW |