| OLD | NEW |
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 """Flattens a HTML file by inlining its external resources. | 6 """Flattens a HTML file by inlining its external resources. |
| 7 | 7 |
| 8 This is a small script that takes a HTML file, looks for src attributes | 8 This is a small script that takes a HTML file, looks for src attributes |
| 9 and inlines the specified file, producing one HTML file with no external | 9 and inlines the specified file, producing one HTML file with no external |
| 10 dependencies. It recursively inlines the included files. | 10 dependencies. It recursively inlines the included files. |
| (...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 88 """Helper class holding the results from DoInline(). | 88 """Helper class holding the results from DoInline(). |
| 89 | 89 |
| 90 Holds the inlined data and the set of filenames of all the inlined | 90 Holds the inlined data and the set of filenames of all the inlined |
| 91 files. | 91 files. |
| 92 """ | 92 """ |
| 93 def __init__(self, inlined_data, inlined_files): | 93 def __init__(self, inlined_data, inlined_files): |
| 94 self.inlined_data = inlined_data | 94 self.inlined_data = inlined_data |
| 95 self.inlined_files = inlined_files | 95 self.inlined_files = inlined_files |
| 96 | 96 |
| 97 def DoInline( | 97 def DoInline( |
| 98 input_filename, grd_node, allow_external_script=False, names_only=False): | 98 input_filename, grd_node, allow_external_script=False, names_only=False, |
| 99 rewrite_function=None): |
| 99 """Helper function that inlines the resources in a specified file. | 100 """Helper function that inlines the resources in a specified file. |
| 100 | 101 |
| 101 Reads input_filename, finds all the src attributes and attempts to | 102 Reads input_filename, finds all the src attributes and attempts to |
| 102 inline the files they are referring to, then returns the result and | 103 inline the files they are referring to, then returns the result and |
| 103 the set of inlined files. | 104 the set of inlined files. |
| 104 | 105 |
| 105 Args: | 106 Args: |
| 106 input_filename: name of file to read in | 107 input_filename: name of file to read in |
| 107 grd_node: html node from the grd file for this include tag | 108 grd_node: html node from the grd file for this include tag |
| 108 names_only: |nil| will be returned for the inlined contents (faster). | 109 names_only: |nil| will be returned for the inlined contents (faster). |
| 110 rewrite_function: function(filepath, text, distribution) which will be |
| 111 called to rewrite html content before inlining images. |
| 109 Returns: | 112 Returns: |
| 110 a tuple of the inlined data as a string and the set of filenames | 113 a tuple of the inlined data as a string and the set of filenames |
| 111 of all the inlined files | 114 of all the inlined files |
| 112 """ | 115 """ |
| 113 input_filepath = os.path.dirname(input_filename) | 116 input_filepath = os.path.dirname(input_filename) |
| 114 | 117 |
| 115 distribution = DIST_DEFAULT | 118 distribution = DIST_DEFAULT |
| 116 if DIST_ENV_VAR in os.environ.keys(): | 119 if DIST_ENV_VAR in os.environ.keys(): |
| 117 distribution = os.environ[DIST_ENV_VAR] | 120 distribution = os.environ[DIST_ENV_VAR] |
| 118 if len(distribution) > 1 and distribution[0] == '_': | 121 if len(distribution) > 1 and distribution[0] == '_': |
| (...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 201 def InlineScript(match): | 204 def InlineScript(match): |
| 202 """Helper function to inline external script files""" | 205 """Helper function to inline external script files""" |
| 203 attrs = (match.group('attrs1') + match.group('attrs2')).strip() | 206 attrs = (match.group('attrs1') + match.group('attrs2')).strip() |
| 204 if attrs: | 207 if attrs: |
| 205 attrs = ' ' + attrs | 208 attrs = ' ' + attrs |
| 206 return InlineFileContents(match, '<script' + attrs + '>%s</script>') | 209 return InlineFileContents(match, '<script' + attrs + '>%s</script>') |
| 207 | 210 |
| 208 def InlineCSSText(text, css_filepath): | 211 def InlineCSSText(text, css_filepath): |
| 209 """Helper function that inlines external resources in CSS text""" | 212 """Helper function that inlines external resources in CSS text""" |
| 210 filepath = os.path.dirname(css_filepath) | 213 filepath = os.path.dirname(css_filepath) |
| 214 # Allow custom modifications before inlining images. |
| 215 if rewrite_function: |
| 216 text = rewrite_function(filepath, text, distribution) |
| 211 return InlineCSSImages(text, filepath) | 217 return InlineCSSImages(text, filepath) |
| 212 | 218 |
| 213 def InlineCSSFile(src_match, inlined_files=inlined_files): | 219 def InlineCSSFile(src_match, inlined_files=inlined_files): |
| 214 """Helper function to inline external css files. | 220 """Helper function to inline external css files. |
| 215 | 221 |
| 216 Args: | 222 Args: |
| 217 src_match: A regular expression match with a named group named "filename". | 223 src_match: A regular expression match with a named group named "filename". |
| 218 | 224 |
| 219 Returns: | 225 Returns: |
| 220 The text that should replace the reference to the CSS file. | 226 The text that should replace the reference to the CSS file. |
| 221 """ | 227 """ |
| 222 filepath = GetFilepath(src_match) | 228 filepath = GetFilepath(src_match) |
| 223 if filepath is None: | 229 if filepath is None: |
| 224 return src_match.group(0) | 230 return src_match.group(0) |
| 225 | 231 |
| 226 # Even if names_only is set, the CSS file needs to be opened, because it | 232 # Even if names_only is set, the CSS file needs to be opened, because it |
| 227 # can link to images that need to be added to the file set. | 233 # can link to images that need to be added to the file set. |
| 228 inlined_files.add(filepath) | 234 inlined_files.add(filepath) |
| 229 # When resolving CSS files we need to pass in the path so that relative URLs | 235 # When resolving CSS files we need to pass in the path so that relative URLs |
| 230 # can be resolved. | 236 # can be resolved. |
| 231 return '<style>%s</style>' % InlineCSSText(ReadFile(filepath), filepath) | 237 return '<style>%s</style>' % InlineCSSText(ReadFile(filepath), filepath) |
| 232 | 238 |
| 233 def InlineCSSImages(text, filepath=input_filepath): | 239 def InlineCSSImages(text, filepath=input_filepath): |
| 234 """Helper function that inlines external images in CSS backgrounds.""" | 240 """Helper function that inlines external images in CSS backgrounds.""" |
| 235 # Replace contents of url() for css attributes: content, background, | 241 # Replace contents of url() for css attributes: content, background, |
| 236 # or *-image. | 242 # or *-image. |
| 237 return re.sub('(?:content|background|[\w-]*-image):[ ]*' + | 243 return re.sub('(?:content|background|[\w-]*-image):[^;]*' + |
| 238 'url\((?:\'|\")(?P<filename>[^"\'\)\(]*)(?:\'|\")', | 244 '(?:url\((?:\'|\")([^"\'\)\(]*)(?:\'|\")\)|' + |
| 245 'image-set\(' + |
| 246 '([ ]*url\((?:\'|\")([^"\'\)\(]*)(?:\'|\")\)' + |
| 247 '[ ]*[0-9.]*x[ ]*(,[ ]*)?)*\))', |
| 248 lambda m: InlineCSSUrls(m, filepath), |
| 249 text) |
| 250 |
| 251 def InlineCSSUrls(src_match, filepath=input_filepath): |
| 252 """Helper function that inlines each url on a CSS image rule match.""" |
| 253 # Replace contents of url() references in matches. |
| 254 return re.sub('url\((?:\'|\")(?P<filename>[^"\'\)\(]*)(?:\'|\")', |
| 239 lambda m: SrcReplace(m, filepath), | 255 lambda m: SrcReplace(m, filepath), |
| 240 text) | 256 src_match.group(0)) |
| 257 |
| 258 |
| 241 | 259 |
| 242 flat_text = ReadFile(input_filename) | 260 flat_text = ReadFile(input_filename) |
| 243 | 261 |
| 244 if not allow_external_script: | 262 if not allow_external_script: |
| 245 # We need to inline css and js before we inline images so that image | 263 # We need to inline css and js before we inline images so that image |
| 246 # references gets inlined in the css and js | 264 # references gets inlined in the css and js |
| 247 flat_text = re.sub('<script (?P<attrs1>.*?)src="(?P<filename>[^"\']*)"' + | 265 flat_text = re.sub('<script (?P<attrs1>.*?)src="(?P<filename>[^"\']*)"' + |
| 248 '(?P<attrs2>.*?)></script>', | 266 '(?P<attrs2>.*?)></script>', |
| 249 InlineScript, | 267 InlineScript, |
| 250 flat_text) | 268 flat_text) |
| 251 | 269 |
| 252 flat_text = re.sub( | 270 flat_text = re.sub( |
| 253 '<link rel="stylesheet".+?href="(?P<filename>[^"]*)".*?>', | 271 '<link rel="stylesheet".+?href="(?P<filename>[^"]*)".*?>', |
| 254 InlineCSSFile, | 272 InlineCSSFile, |
| 255 flat_text) | 273 flat_text) |
| 256 | 274 |
| 257 flat_text = re.sub( | 275 flat_text = re.sub( |
| 258 '<include\s+src="(?P<filename>[^"\']*)".*>', | 276 '<include\s+src="(?P<filename>[^"\']*)".*>', |
| 259 InlineIncludeFiles, | 277 InlineIncludeFiles, |
| 260 flat_text) | 278 flat_text) |
| 261 | 279 |
| 262 # Check conditional elements, remove unsatisfied ones from the file. | 280 # Check conditional elements, remove unsatisfied ones from the file. |
| 263 flat_text = CheckConditionalElements(flat_text) | 281 flat_text = CheckConditionalElements(flat_text) |
| 264 | 282 |
| 265 flat_text = re.sub('<(?!script)[^>]+?src="(?P<filename>[^"\']*)"', | 283 flat_text = re.sub('<(?!script)[^>]+?src="(?P<filename>[^"\']*)"', |
| 266 SrcReplace, | 284 SrcReplace, |
| 267 flat_text) | 285 flat_text) |
| 268 | 286 |
| 287 # Allow custom modifications before inlining images. |
| 288 if rewrite_function: |
| 289 flat_text = rewrite_function(input_filepath, flat_text, distribution) |
| 290 |
| 269 # TODO(arv): Only do this inside <style> tags. | 291 # TODO(arv): Only do this inside <style> tags. |
| 270 flat_text = InlineCSSImages(flat_text) | 292 flat_text = InlineCSSImages(flat_text) |
| 271 | 293 |
| 272 flat_text = re.sub('<link rel="icon".+?href="(?P<filename>[^"\']*)"', | 294 flat_text = re.sub('<link rel="icon".+?href="(?P<filename>[^"\']*)"', |
| 273 SrcReplace, | 295 SrcReplace, |
| 274 flat_text) | 296 flat_text) |
| 275 | 297 |
| 276 if names_only: | 298 if names_only: |
| 277 flat_text = None # Will contains garbage if the flag is set anyway. | 299 flat_text = None # Will contains garbage if the flag is set anyway. |
| 278 return InlinedData(flat_text, inlined_files) | 300 return InlinedData(flat_text, inlined_files) |
| 279 | 301 |
| 280 | 302 |
| 281 def InlineToString(input_filename, grd_node, allow_external_script=False): | 303 def InlineToString(input_filename, grd_node, allow_external_script=False, |
| 304 rewrite_function=None): |
| 282 """Inlines the resources in a specified file and returns it as a string. | 305 """Inlines the resources in a specified file and returns it as a string. |
| 283 | 306 |
| 284 Args: | 307 Args: |
| 285 input_filename: name of file to read in | 308 input_filename: name of file to read in |
| 286 grd_node: html node from the grd file for this include tag | 309 grd_node: html node from the grd file for this include tag |
| 287 Returns: | 310 Returns: |
| 288 the inlined data as a string | 311 the inlined data as a string |
| 289 """ | 312 """ |
| 290 try: | 313 try: |
| 291 return DoInline(input_filename, | 314 return DoInline(input_filename, |
| 292 grd_node, | 315 grd_node, |
| 293 allow_external_script=allow_external_script).inlined_data | 316 allow_external_script=allow_external_script, |
| 317 rewrite_function=rewrite_function).inlined_data |
| 294 except IOError, e: | 318 except IOError, e: |
| 295 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % | 319 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % |
| 296 (e.filename, input_filename, e.strerror)) | 320 (e.filename, input_filename, e.strerror)) |
| 297 | 321 |
| 298 | 322 |
| 299 def InlineToFile(input_filename, output_filename, grd_node): | 323 def InlineToFile(input_filename, output_filename, grd_node): |
| 300 """Inlines the resources in a specified file and writes it. | 324 """Inlines the resources in a specified file and writes it. |
| 301 | 325 |
| 302 Reads input_filename, finds all the src attributes and attempts to | 326 Reads input_filename, finds all the src attributes and attempts to |
| 303 inline the files they are referring to, then writes the result | 327 inline the files they are referring to, then writes the result |
| (...skipping 24 matching lines...) Expand all Loading... |
| 328 | 352 |
| 329 def main(): | 353 def main(): |
| 330 if len(sys.argv) <= 2: | 354 if len(sys.argv) <= 2: |
| 331 print "Flattens a HTML file by inlining its external resources.\n" | 355 print "Flattens a HTML file by inlining its external resources.\n" |
| 332 print "html_inline.py inputfile outputfile" | 356 print "html_inline.py inputfile outputfile" |
| 333 else: | 357 else: |
| 334 InlineToFile(sys.argv[1], sys.argv[2], None) | 358 InlineToFile(sys.argv[1], sys.argv[2], None) |
| 335 | 359 |
| 336 if __name__ == '__main__': | 360 if __name__ == '__main__': |
| 337 main() | 361 main() |
| OLD | NEW |