Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(32)

Side by Side Diff: tools/grit/grit/format/html_inline.py

Issue 2179033002: Strip comments and whitespace from javascript resources (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: One minor fix Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """Flattens a HTML file by inlining its external resources. 6 """Flattens a HTML file by inlining its external resources.
7 7
8 This is a small script that takes a HTML file, looks for src attributes 8 This is a small script that takes a HTML file, looks for src attributes
9 and inlines the specified file, producing one HTML file with no external 9 and inlines the specified file, producing one HTML file with no external
10 dependencies. It recursively inlines the included files. 10 dependencies. It recursively inlines the included files.
11 """ 11 """
12 12
13 import os 13 import os
14 import re 14 import re
15 import sys 15 import sys
16 import base64 16 import base64
17 import mimetypes 17 import mimetypes
18 18
19 from grit import lazy_re 19 from grit import lazy_re
20 from grit import util 20 from grit import util
21 from grit.format import minifier
21 22
22 # There is a python bug that makes mimetypes crash if the Windows 23 # There is a python bug that makes mimetypes crash if the Windows
23 # registry contains non-Latin keys ( http://bugs.python.org/issue9291 24 # registry contains non-Latin keys ( http://bugs.python.org/issue9291
24 # ). Initing manually and blocking external mime-type databases will 25 # ). Initing manually and blocking external mime-type databases will
25 # prevent that bug and if we add svg manually, it will still give us 26 # prevent that bug and if we add svg manually, it will still give us
26 # the data we need. 27 # the data we need.
27 mimetypes.init([]) 28 mimetypes.init([])
28 mimetypes.add_type('image/svg+xml', '.svg') 29 mimetypes.add_type('image/svg+xml', '.svg')
29 30
30 DIST_DEFAULT = 'chromium' 31 DIST_DEFAULT = 'chromium'
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after
122 123
123 Holds the inlined data and the set of filenames of all the inlined 124 Holds the inlined data and the set of filenames of all the inlined
124 files. 125 files.
125 """ 126 """
126 def __init__(self, inlined_data, inlined_files): 127 def __init__(self, inlined_data, inlined_files):
127 self.inlined_data = inlined_data 128 self.inlined_data = inlined_data
128 self.inlined_files = inlined_files 129 self.inlined_files = inlined_files
129 130
130 def DoInline( 131 def DoInline(
131 input_filename, grd_node, allow_external_script=False, 132 input_filename, grd_node, allow_external_script=False,
132 preprocess_only=False, names_only=False, rewrite_function=None, 133 preprocess_only=False, names_only=False, strip_whitespace = False,
Dirk Pranke 2016/07/25 21:12:36 nit: no spaces between strip_whitespace=False.
aberent 2016/07/27 09:46:36 Done.
133 filename_expansion_function=None): 134 rewrite_function=None, filename_expansion_function=None):
134 """Helper function that inlines the resources in a specified file. 135 """Helper function that inlines the resources in a specified file.
135 136
136 Reads input_filename, finds all the src attributes and attempts to 137 Reads input_filename, finds all the src attributes and attempts to
137 inline the files they are referring to, then returns the result and 138 inline the files they are referring to, then returns the result and
138 the set of inlined files. 139 the set of inlined files.
139 140
140 Args: 141 Args:
141 input_filename: name of file to read in 142 input_filename: name of file to read in
142 grd_node: html node from the grd file for this include tag 143 grd_node: html node from the grd file for this include tag
143 preprocess_only: Skip all HTML processing, only handle <if> and <include>. 144 preprocess_only: Skip all HTML processing, only handle <if> and <include>.
144 names_only: |nil| will be returned for the inlined contents (faster). 145 names_only: |nil| will be returned for the inlined contents (faster).
146 strip_whitespace: remove whitespace and comments in the input files.
145 rewrite_function: function(filepath, text, distribution) which will be 147 rewrite_function: function(filepath, text, distribution) which will be
146 called to rewrite html content before inlining images. 148 called to rewrite html content before inlining images.
147 filename_expansion_function: function(filename) which will be called to 149 filename_expansion_function: function(filename) which will be called to
148 rewrite filenames before attempting to read them. 150 rewrite filenames before attempting to read them.
149 Returns: 151 Returns:
150 a tuple of the inlined data as a string and the set of filenames 152 a tuple of the inlined data as a string and the set of filenames
151 of all the inlined files 153 of all the inlined files
152 """ 154 """
153 if filename_expansion_function: 155 if filename_expansion_function:
154 input_filename = filename_expansion_function(input_filename) 156 input_filename = filename_expansion_function(input_filename)
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
213 pos = next_if.end() 215 pos = next_if.end()
214 216
215 content = str[content_start:end_if.start()] 217 content = str[content_start:end_if.start()]
216 trailing = str[end_if.end():] 218 trailing = str[end_if.end():]
217 219
218 if condition_satisfied: 220 if condition_satisfied:
219 str = leading + CheckConditionalElements(content) + trailing 221 str = leading + CheckConditionalElements(content) + trailing
220 else: 222 else:
221 str = leading + trailing 223 str = leading + trailing
222 224
223 def InlineFileContents(src_match, pattern, inlined_files=inlined_files): 225 def InlineFileContents(src_match,
226 pattern,
227 inlined_files=inlined_files,
228 strip_whitespace=True):
224 """Helper function to inline external files of various types""" 229 """Helper function to inline external files of various types"""
225 filepath = GetFilepath(src_match) 230 filepath = GetFilepath(src_match)
226 if filepath is None: 231 if filepath is None:
227 return src_match.group(0) 232 return src_match.group(0)
228 inlined_files.add(filepath) 233 inlined_files.add(filepath)
229 234
230 if names_only: 235 if names_only:
231 inlined_files.update(GetResourceFilenames( 236 inlined_files.update(GetResourceFilenames(
232 filepath, 237 filepath,
233 allow_external_script, 238 allow_external_script,
234 rewrite_function, 239 rewrite_function,
235 filename_expansion_function=filename_expansion_function)) 240 filename_expansion_function=filename_expansion_function))
236 return "" 241 return ""
237 242
238 return pattern % InlineToString( 243 return pattern % InlineToString(
239 filepath, grd_node, allow_external_script=allow_external_script, 244 filepath, grd_node, allow_external_script=allow_external_script,
240 filename_expansion_function=filename_expansion_function) 245 filename_expansion_function=filename_expansion_function)
241 246
242 def InlineIncludeFiles(src_match): 247 def InlineIncludeFiles(src_match):
243 """Helper function to directly inline generic external files (without 248 """Helper function to directly inline generic external files (without
244 wrapping them with any kind of tags). 249 wrapping them with any kind of tags).
245 """ 250 """
246 return InlineFileContents(src_match, '%s') 251 return InlineFileContents(src_match, '%s', strip_whitespace=False)
247 252
248 def InlineScript(match): 253 def InlineScript(match):
249 """Helper function to inline external script files""" 254 """Helper function to inline external script files"""
250 attrs = (match.group('attrs1') + match.group('attrs2')).strip() 255 attrs = (match.group('attrs1') + match.group('attrs2')).strip()
251 if attrs: 256 if attrs:
252 attrs = ' ' + attrs 257 attrs = ' ' + attrs
253 return InlineFileContents(match, '<script' + attrs + '>%s</script>') 258 return InlineFileContents(match, '<script' + attrs + '>%s</script>',
259 strip_whitespace=strip_whitespace)
254 260
255 def InlineCSSText(text, css_filepath): 261 def InlineCSSText(text, css_filepath):
256 """Helper function that inlines external resources in CSS text""" 262 """Helper function that inlines external resources in CSS text"""
257 filepath = os.path.dirname(css_filepath) 263 filepath = os.path.dirname(css_filepath)
258 # Allow custom modifications before inlining images. 264 # Allow custom modifications before inlining images.
259 if rewrite_function: 265 if rewrite_function:
260 text = rewrite_function(filepath, text, distribution) 266 text = rewrite_function(filepath, text, distribution)
261 text = InlineCSSImages(text, filepath) 267 text = InlineCSSImages(text, filepath)
262 return InlineCSSImports(text, filepath) 268 return InlineCSSImports(text, filepath)
263 269
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
314 320
315 321
316 flat_text = util.ReadFile(input_filename, util.BINARY) 322 flat_text = util.ReadFile(input_filename, util.BINARY)
317 323
318 # Check conditional elements, remove unsatisfied ones from the file. We do 324 # Check conditional elements, remove unsatisfied ones from the file. We do
319 # this twice. The first pass is so that we don't even bother calling 325 # this twice. The first pass is so that we don't even bother calling
320 # InlineScript, InlineCSSFile and InlineIncludeFiles on text we're eventually 326 # InlineScript, InlineCSSFile and InlineIncludeFiles on text we're eventually
321 # going to throw out anyway. 327 # going to throw out anyway.
322 flat_text = CheckConditionalElements(flat_text) 328 flat_text = CheckConditionalElements(flat_text)
323 329
330 flat_text = _INCLUDE_RE.sub(InlineIncludeFiles, flat_text)
331
324 if not preprocess_only: 332 if not preprocess_only:
333 if strip_whitespace:
334 flat_text = minifier.Minify(flat_text,
335 os.path.splitext(input_filename)[1])
336
325 if not allow_external_script: 337 if not allow_external_script:
326 # We need to inline css and js before we inline images so that image 338 # We need to inline css and js before we inline images so that image
327 # references gets inlined in the css and js 339 # references gets inlined in the css and js
328 flat_text = re.sub('<script (?P<attrs1>.*?)src="(?P<filename>[^"\']*)"' + 340 flat_text = re.sub('<script (?P<attrs1>.*?)src="(?P<filename>[^"\']*)"' +
329 '(?P<attrs2>.*?)></script>', 341 '(?P<attrs2>.*?)></script>',
330 InlineScript, 342 InlineScript,
331 flat_text) 343 flat_text)
332 344
333 flat_text = _STYLESHEET_RE.sub( 345 flat_text = _STYLESHEET_RE.sub(
334 lambda m: InlineCSSFile(m, '<style>%s</style>'), 346 lambda m: InlineCSSFile(m, '<style>%s</style>'),
335 flat_text) 347 flat_text)
336 348
337 flat_text = _INCLUDE_RE.sub(InlineIncludeFiles, flat_text)
338
339 # Check conditional elements, second pass. This catches conditionals in any 349 # Check conditional elements, second pass. This catches conditionals in any
340 # of the text we just inlined. 350 # of the text we just inlined.
341 flat_text = CheckConditionalElements(flat_text) 351 flat_text = CheckConditionalElements(flat_text)
342 352
343 if not preprocess_only: 353 if not preprocess_only:
344 # Allow custom modifications before inlining images. 354 # Allow custom modifications before inlining images.
345 if rewrite_function: 355 if rewrite_function:
346 flat_text = rewrite_function(input_filepath, flat_text, distribution) 356 flat_text = rewrite_function(input_filepath, flat_text, distribution)
347 flat_text = _SRC_RE.sub(SrcReplace, flat_text) 357 flat_text = _SRC_RE.sub(SrcReplace, flat_text)
348 358
349 # TODO(arv): Only do this inside <style> tags. 359 # TODO(arv): Only do this inside <style> tags.
350 flat_text = InlineCSSImages(flat_text) 360 flat_text = InlineCSSImages(flat_text)
351 361
352 flat_text = _ICON_RE.sub(SrcReplace, flat_text) 362 flat_text = _ICON_RE.sub(SrcReplace, flat_text)
353 363
354 if names_only: 364 if names_only:
355 flat_text = None # Will contains garbage if the flag is set anyway. 365 flat_text = None # Will contains garbage if the flag is set anyway.
356 return InlinedData(flat_text, inlined_files) 366 return InlinedData(flat_text, inlined_files)
357 367
358 368
359 def InlineToString(input_filename, grd_node, preprocess_only = False, 369 def InlineToString(input_filename, grd_node, preprocess_only = False,
360 allow_external_script=False, rewrite_function=None, 370 allow_external_script=False, strip_whitespace = True,
Dirk Pranke 2016/07/25 21:12:36 same nit.
aberent 2016/07/27 09:46:36 Done.
361 filename_expansion_function=None): 371 rewrite_function=None, filename_expansion_function=None):
362 """Inlines the resources in a specified file and returns it as a string. 372 """Inlines the resources in a specified file and returns it as a string.
363 373
364 Args: 374 Args:
365 input_filename: name of file to read in 375 input_filename: name of file to read in
366 grd_node: html node from the grd file for this include tag 376 grd_node: html node from the grd file for this include tag
367 Returns: 377 Returns:
368 the inlined data as a string 378 the inlined data as a string
369 """ 379 """
370 try: 380 try:
371 return DoInline( 381 return DoInline(
372 input_filename, 382 input_filename,
373 grd_node, 383 grd_node,
374 preprocess_only=preprocess_only, 384 preprocess_only=preprocess_only,
375 allow_external_script=allow_external_script, 385 allow_external_script=allow_external_script,
386 strip_whitespace = strip_whitespace,
Dirk Pranke 2016/07/25 21:12:36 same nit.
aberent 2016/07/27 09:46:36 Done.
376 rewrite_function=rewrite_function, 387 rewrite_function=rewrite_function,
377 filename_expansion_function=filename_expansion_function).inlined_data 388 filename_expansion_function=filename_expansion_function).inlined_data
378 except IOError, e: 389 except IOError, e:
379 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % 390 raise Exception("Failed to open %s while trying to flatten %s. (%s)" %
380 (e.filename, input_filename, e.strerror)) 391 (e.filename, input_filename, e.strerror))
381 392
382 393
383 def InlineToFile(input_filename, output_filename, grd_node): 394 def InlineToFile(input_filename, output_filename, grd_node):
384 """Inlines the resources in a specified file and writes it. 395 """Inlines the resources in a specified file and writes it.
385 396
(...skipping 18 matching lines...) Expand all
404 rewrite_function=None, 415 rewrite_function=None,
405 filename_expansion_function=None): 416 filename_expansion_function=None):
406 """For a grd file, returns a set of all the files that would be inline.""" 417 """For a grd file, returns a set of all the files that would be inline."""
407 try: 418 try:
408 return DoInline( 419 return DoInline(
409 filename, 420 filename,
410 None, 421 None,
411 names_only=True, 422 names_only=True,
412 preprocess_only=False, 423 preprocess_only=False,
413 allow_external_script=allow_external_script, 424 allow_external_script=allow_external_script,
425 strip_whitespace = False,
Dirk Pranke 2016/07/25 21:12:36 same nit.
aberent 2016/07/27 09:46:36 Done.
414 rewrite_function=rewrite_function, 426 rewrite_function=rewrite_function,
415 filename_expansion_function=filename_expansion_function).inlined_files 427 filename_expansion_function=filename_expansion_function).inlined_files
416 except IOError, e: 428 except IOError, e:
417 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % 429 raise Exception("Failed to open %s while trying to flatten %s. (%s)" %
418 (e.filename, filename, e.strerror)) 430 (e.filename, filename, e.strerror))
419 431
420 432
421 def main(): 433 def main():
422 if len(sys.argv) <= 2: 434 if len(sys.argv) <= 2:
423 print "Flattens a HTML file by inlining its external resources.\n" 435 print "Flattens a HTML file by inlining its external resources.\n"
424 print "html_inline.py inputfile outputfile" 436 print "html_inline.py inputfile outputfile"
425 else: 437 else:
426 InlineToFile(sys.argv[1], sys.argv[2], None) 438 InlineToFile(sys.argv[1], sys.argv[2], None)
427 439
428 if __name__ == '__main__': 440 if __name__ == '__main__':
429 main() 441 main()
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698