OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Flattens a HTML file by inlining its external resources. | 6 """Flattens a HTML file by inlining its external resources. |
7 | 7 |
8 This is a small script that takes a HTML file, looks for src attributes | 8 This is a small script that takes a HTML file, looks for src attributes |
9 and inlines the specified file, producing one HTML file with no external | 9 and inlines the specified file, producing one HTML file with no external |
10 dependencies. It recursively inlines the included files. | 10 dependencies. It recursively inlines the included files. |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
67 string | 67 string |
68 """ | 68 """ |
69 filename = src_match.group('filename') | 69 filename = src_match.group('filename') |
70 quote = src_match.group('quote') | 70 quote = src_match.group('quote') |
71 | 71 |
72 if filename.find(':') != -1: | 72 if filename.find(':') != -1: |
73 # filename is probably a URL, which we don't want to bother inlining | 73 # filename is probably a URL, which we don't want to bother inlining |
74 return src_match.group(0) | 74 return src_match.group(0) |
75 | 75 |
76 filename = filename.replace(DIST_SUBSTR , distribution) | 76 filename = filename.replace(DIST_SUBSTR , distribution) |
77 filepath = os.path.join(base_path, filename) | 77 filepath = os.path.normpath(os.path.join(base_path, filename)) |
78 inlined_files.add(filepath) | 78 inlined_files.add(filepath) |
79 | 79 |
80 if names_only: | 80 if names_only: |
81 return "" | 81 return "" |
82 | 82 |
83 mimetype = mimetypes.guess_type(filename)[0] or 'text/plain' | 83 mimetype = mimetypes.guess_type(filename)[0] or 'text/plain' |
84 inline_data = base64.standard_b64encode(util.ReadFile(filepath, util.BINARY)) | 84 inline_data = base64.standard_b64encode(util.ReadFile(filepath, util.BINARY)) |
85 | 85 |
86 prefix = src_match.string[src_match.start():src_match.start('filename')] | 86 prefix = src_match.string[src_match.start():src_match.start('filename')] |
87 suffix = src_match.string[src_match.end('filename'):src_match.end()] | 87 suffix = src_match.string[src_match.end('filename'):src_match.end()] |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
122 | 122 |
123 # Keep track of all the files we inline. | 123 # Keep track of all the files we inline. |
124 inlined_files = set() | 124 inlined_files = set() |
125 | 125 |
126 def SrcReplace(src_match, filepath=input_filepath, | 126 def SrcReplace(src_match, filepath=input_filepath, |
127 inlined_files=inlined_files): | 127 inlined_files=inlined_files): |
128 """Helper function to provide SrcInlineAsDataURL with the base file path""" | 128 """Helper function to provide SrcInlineAsDataURL with the base file path""" |
129 return SrcInlineAsDataURL( | 129 return SrcInlineAsDataURL( |
130 src_match, filepath, distribution, inlined_files, names_only=names_only) | 130 src_match, filepath, distribution, inlined_files, names_only=names_only) |
131 | 131 |
132 def GetFilepath(src_match): | 132 def GetFilepath(src_match, base_path = input_filepath): |
133 filename = src_match.group('filename') | 133 filename = src_match.group('filename') |
134 | 134 |
135 if filename.find(':') != -1: | 135 if filename.find(':') != -1: |
136 # filename is probably a URL, which we don't want to bother inlining | 136 # filename is probably a URL, which we don't want to bother inlining |
137 return None | 137 return None |
138 | 138 |
139 filename = filename.replace('%DISTRIBUTION%', distribution) | 139 filename = filename.replace('%DISTRIBUTION%', distribution) |
140 return os.path.join(input_filepath, filename) | 140 return os.path.normpath(os.path.join(base_path, filename)) |
141 | 141 |
142 def IsConditionSatisfied(src_match): | 142 def IsConditionSatisfied(src_match): |
143 expression = src_match.group('expression') | 143 expression = src_match.group('expression') |
144 return grd_node is None or grd_node.EvaluateCondition(expression) | 144 return grd_node is None or grd_node.EvaluateCondition(expression) |
145 | 145 |
146 def CheckConditionalElements(str): | 146 def CheckConditionalElements(str): |
147 """Helper function to conditionally inline inner elements""" | 147 """Helper function to conditionally inline inner elements""" |
148 while True: | 148 while True: |
149 begin_if = _BEGIN_IF_BLOCK.search(str) | 149 begin_if = _BEGIN_IF_BLOCK.search(str) |
150 if begin_if is None: | 150 if begin_if is None: |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
210 | 210 |
211 def InlineCSSText(text, css_filepath): | 211 def InlineCSSText(text, css_filepath): |
212 """Helper function that inlines external resources in CSS text""" | 212 """Helper function that inlines external resources in CSS text""" |
213 filepath = os.path.dirname(css_filepath) | 213 filepath = os.path.dirname(css_filepath) |
214 # Allow custom modifications before inlining images. | 214 # Allow custom modifications before inlining images. |
215 if rewrite_function: | 215 if rewrite_function: |
216 text = rewrite_function(filepath, text, distribution) | 216 text = rewrite_function(filepath, text, distribution) |
217 text = InlineCSSImages(text, filepath) | 217 text = InlineCSSImages(text, filepath) |
218 return InlineCSSImports(text, filepath) | 218 return InlineCSSImports(text, filepath) |
219 | 219 |
220 def InlineCSSFile(src_match, inlined_files=inlined_files): | 220 def InlineCSSFile(src_match, pattern, base_path=input_filepath): |
221 """Helper function to inline external css files. | 221 """Helper function to inline external CSS files. |
222 | 222 |
223 Args: | 223 Args: |
224 src_match: A regular expression match with a named group named "filename". | 224 src_match: A regular expression match with a named group named "filename". |
| 225 pattern: The pattern to replace with the contents of the CSS file. |
| 226 base_path: The base path to use for resolving the CSS file. |
225 | 227 |
226 Returns: | 228 Returns: |
227 The text that should replace the reference to the CSS file. | 229 The text that should replace the reference to the CSS file. |
228 """ | 230 """ |
229 filepath = GetFilepath(src_match) | 231 filepath = GetFilepath(src_match, base_path) |
230 if filepath is None: | 232 if filepath is None: |
231 return src_match.group(0) | 233 return src_match.group(0) |
232 | 234 |
233 # Even if names_only is set, the CSS file needs to be opened, because it | 235 # Even if names_only is set, the CSS file needs to be opened, because it |
234 # can link to images that need to be added to the file set. | 236 # can link to images that need to be added to the file set. |
235 inlined_files.add(filepath) | 237 inlined_files.add(filepath) |
236 # When resolving CSS files we need to pass in the path so that relative URLs | 238 # When resolving CSS files we need to pass in the path so that relative URLs |
237 # can be resolved. | 239 # can be resolved. |
238 return InlineCSSText(util.ReadFile(filepath, util.BINARY), filepath) | 240 return pattern % InlineCSSText(util.ReadFile(filepath, util.BINARY), |
| 241 filepath) |
239 | 242 |
240 def InlineCSSImages(text, filepath=input_filepath): | 243 def InlineCSSImages(text, filepath=input_filepath): |
241 """Helper function that inlines external images in CSS backgrounds.""" | 244 """Helper function that inlines external images in CSS backgrounds.""" |
242 # Replace contents of url() for css attributes: content, background, | 245 # Replace contents of url() for css attributes: content, background, |
243 # or *-image. | 246 # or *-image. |
244 return re.sub('(content|background|[\w-]*-image):[^;]*' + | 247 return re.sub('(content|background|[\w-]*-image):[^;]*' + |
245 '(url\((?P<quote1>"|\'|)[^"\'()]*(?P=quote1)\)|' + | 248 '(url\((?P<quote1>"|\'|)[^"\'()]*(?P=quote1)\)|' + |
246 'image-set\(' + | 249 'image-set\(' + |
247 '([ ]*url\((?P<quote2>"|\'|)[^"\'()]*(?P=quote2)\)' + | 250 '([ ]*url\((?P<quote2>"|\'|)[^"\'()]*(?P=quote2)\)' + |
248 '[ ]*[0-9.]*x[ ]*(,[ ]*)?)+\))', | 251 '[ ]*[0-9.]*x[ ]*(,[ ]*)?)+\))', |
249 lambda m: InlineCSSUrls(m, filepath), | 252 lambda m: InlineCSSUrls(m, filepath), |
250 text) | 253 text) |
251 | 254 |
252 def InlineCSSUrls(src_match, filepath=input_filepath): | 255 def InlineCSSUrls(src_match, filepath=input_filepath): |
253 """Helper function that inlines each url on a CSS image rule match.""" | 256 """Helper function that inlines each url on a CSS image rule match.""" |
254 # Replace contents of url() references in matches. | 257 # Replace contents of url() references in matches. |
255 return re.sub('url\((?P<quote>"|\'|)(?P<filename>[^"\'()]*)(?P=quote)\)', | 258 return re.sub('url\((?P<quote>"|\'|)(?P<filename>[^"\'()]*)(?P=quote)\)', |
256 lambda m: SrcReplace(m, filepath), | 259 lambda m: SrcReplace(m, filepath), |
257 src_match.group(0)) | 260 src_match.group(0)) |
258 | 261 |
259 def InlineCSSImports(text, filepath=input_filepath): | 262 def InlineCSSImports(text, filepath=input_filepath): |
260 """Helper function that inlines CSS files included via the @import | 263 """Helper function that inlines CSS files included via the @import |
261 directive. | 264 directive. |
262 """ | 265 """ |
263 return re.sub('@import\s+url\((?P<quote>"|\'|)(?P<filename>[^"\'()]*)' + | 266 return re.sub('@import\s+url\((?P<quote>"|\'|)(?P<filename>[^"\'()]*)' + |
264 '(?P=quote)\)', | 267 '(?P=quote)\);', |
265 InlineCSSFile, | 268 lambda m: InlineCSSFile(m, '%s', filepath), |
266 text) | 269 text) |
267 | 270 |
268 | 271 |
269 flat_text = util.ReadFile(input_filename, util.BINARY) | 272 flat_text = util.ReadFile(input_filename, util.BINARY) |
270 | 273 |
271 # Check conditional elements, remove unsatisfied ones from the file. We do | 274 # Check conditional elements, remove unsatisfied ones from the file. We do |
272 # this twice. The first pass is so that we don't even bother calling | 275 # this twice. The first pass is so that we don't even bother calling |
273 # InlineScript, InlineCSSFile and InlineIncludeFiles on text we're eventually | 276 # InlineScript, InlineCSSFile and InlineIncludeFiles on text we're eventually |
274 # going to throw out anyway. | 277 # going to throw out anyway. |
275 flat_text = CheckConditionalElements(flat_text) | 278 flat_text = CheckConditionalElements(flat_text) |
276 | 279 |
277 if not allow_external_script: | 280 if not allow_external_script: |
278 # We need to inline css and js before we inline images so that image | 281 # We need to inline css and js before we inline images so that image |
279 # references gets inlined in the css and js | 282 # references gets inlined in the css and js |
280 flat_text = re.sub('<script (?P<attrs1>.*?)src="(?P<filename>[^"\']*)"' + | 283 flat_text = re.sub('<script (?P<attrs1>.*?)src="(?P<filename>[^"\']*)"' + |
281 '(?P<attrs2>.*?)></script>', | 284 '(?P<attrs2>.*?)></script>', |
282 InlineScript, | 285 InlineScript, |
283 flat_text) | 286 flat_text) |
284 | 287 |
285 flat_text = re.sub( | 288 flat_text = re.sub( |
286 '<link rel="stylesheet".+?href="(?P<filename>[^"]*)".*?>', | 289 '<link rel="stylesheet".+?href="(?P<filename>[^"]*)".*?>', |
287 lambda m: '<style>%s</style>' % InlineCSSFile(m), | 290 lambda m: InlineCSSFile(m, '<style>%s</style>'), |
288 flat_text) | 291 flat_text) |
289 | 292 |
290 flat_text = re.sub( | 293 flat_text = re.sub( |
291 '<include\s+src="(?P<filename>[^"\']*)".*>', | 294 '<include\s+src="(?P<filename>[^"\']*)".*>', |
292 InlineIncludeFiles, | 295 InlineIncludeFiles, |
293 flat_text) | 296 flat_text) |
294 | 297 |
295 # Check conditional elements, second pass. This catches conditionals in any | 298 # Check conditional elements, second pass. This catches conditionals in any |
296 # of the text we just inlined. | 299 # of the text we just inlined. |
297 flat_text = CheckConditionalElements(flat_text) | 300 flat_text = CheckConditionalElements(flat_text) |
(...skipping 21 matching lines...) Expand all Loading... |
319 def InlineToString(input_filename, grd_node, allow_external_script=False, | 322 def InlineToString(input_filename, grd_node, allow_external_script=False, |
320 rewrite_function=None): | 323 rewrite_function=None): |
321 """Inlines the resources in a specified file and returns it as a string. | 324 """Inlines the resources in a specified file and returns it as a string. |
322 | 325 |
323 Args: | 326 Args: |
324 input_filename: name of file to read in | 327 input_filename: name of file to read in |
325 grd_node: html node from the grd file for this include tag | 328 grd_node: html node from the grd file for this include tag |
326 Returns: | 329 Returns: |
327 the inlined data as a string | 330 the inlined data as a string |
328 """ | 331 """ |
329 return DoInline(input_filename, | 332 try: |
330 grd_node, | 333 return DoInline(input_filename, |
331 allow_external_script=allow_external_script, | 334 grd_node, |
332 rewrite_function=rewrite_function).inlined_data | 335 allow_external_script=allow_external_script, |
| 336 rewrite_function=rewrite_function).inlined_data |
| 337 except IOError, e: |
| 338 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % |
| 339 (e.filename, input_filename, e.strerror)) |
333 | 340 |
334 | 341 |
335 def InlineToFile(input_filename, output_filename, grd_node): | 342 def InlineToFile(input_filename, output_filename, grd_node): |
336 """Inlines the resources in a specified file and writes it. | 343 """Inlines the resources in a specified file and writes it. |
337 | 344 |
338 Reads input_filename, finds all the src attributes and attempts to | 345 Reads input_filename, finds all the src attributes and attempts to |
339 inline the files they are referring to, then writes the result | 346 inline the files they are referring to, then writes the result |
340 to output_filename. | 347 to output_filename. |
341 | 348 |
342 Args: | 349 Args: |
(...skipping 25 matching lines...) Expand all Loading... |
368 | 375 |
369 def main(): | 376 def main(): |
370 if len(sys.argv) <= 2: | 377 if len(sys.argv) <= 2: |
371 print "Flattens a HTML file by inlining its external resources.\n" | 378 print "Flattens a HTML file by inlining its external resources.\n" |
372 print "html_inline.py inputfile outputfile" | 379 print "html_inline.py inputfile outputfile" |
373 else: | 380 else: |
374 InlineToFile(sys.argv[1], sys.argv[2], None) | 381 InlineToFile(sys.argv[1], sys.argv[2], None) |
375 | 382 |
376 if __name__ == '__main__': | 383 if __name__ == '__main__': |
377 main() | 384 main() |
OLD | NEW |