OLD | NEW |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Flattens a HTML file by inlining its external resources. | 6 """Flattens a HTML file by inlining its external resources. |
7 | 7 |
8 This is a small script that takes a HTML file, looks for src attributes | 8 This is a small script that takes a HTML file, looks for src attributes |
9 and inlines the specified file, producing one HTML file with no external | 9 and inlines the specified file, producing one HTML file with no external |
10 dependencies. It recursively inlines the included files. | 10 dependencies. It recursively inlines the included files. |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
82 Returns: | 82 Returns: |
83 string | 83 string |
84 """ | 84 """ |
85 filename = src_match.group('filename') | 85 filename = src_match.group('filename') |
86 quote = src_match.group('quote') | 86 quote = src_match.group('quote') |
87 | 87 |
88 if filename.find(':') != -1: | 88 if filename.find(':') != -1: |
89 # filename is probably a URL, which we don't want to bother inlining | 89 # filename is probably a URL, which we don't want to bother inlining |
90 return src_match.group(0) | 90 return src_match.group(0) |
91 | 91 |
92 filename = filename.replace(DIST_SUBSTR , distribution) | 92 filename = filename.replace(DIST_SUBSTR , distribution) |
flackr
2013/06/06 14:07:19
It looks to me like this won't do the filename rep
dconnelly
2013/06/07 09:10:38
Good catch, thanks. I added the function paramete
| |
93 filepath = os.path.normpath(os.path.join(base_path, filename)) | 93 filepath = os.path.normpath(os.path.join(base_path, filename)) |
94 inlined_files.add(filepath) | 94 inlined_files.add(filepath) |
95 | 95 |
96 if names_only: | 96 if names_only: |
97 return "" | 97 return "" |
98 | 98 |
99 mimetype = mimetypes.guess_type(filename)[0] or 'text/plain' | 99 mimetype = mimetypes.guess_type(filename)[0] or 'text/plain' |
100 inline_data = base64.standard_b64encode(util.ReadFile(filepath, util.BINARY)) | 100 inline_data = base64.standard_b64encode(util.ReadFile(filepath, util.BINARY)) |
101 | 101 |
102 prefix = src_match.string[src_match.start():src_match.start('filename')] | 102 prefix = src_match.string[src_match.start():src_match.start('filename')] |
103 suffix = src_match.string[src_match.end('filename'):src_match.end()] | 103 suffix = src_match.string[src_match.end('filename'):src_match.end()] |
104 return '%sdata:%s;base64,%s%s' % (prefix, mimetype, inline_data, suffix) | 104 return '%sdata:%s;base64,%s%s' % (prefix, mimetype, inline_data, suffix) |
105 | 105 |
106 | 106 |
107 class InlinedData: | 107 class InlinedData: |
108 """Helper class holding the results from DoInline(). | 108 """Helper class holding the results from DoInline(). |
109 | 109 |
110 Holds the inlined data and the set of filenames of all the inlined | 110 Holds the inlined data and the set of filenames of all the inlined |
111 files. | 111 files. |
112 """ | 112 """ |
113 def __init__(self, inlined_data, inlined_files): | 113 def __init__(self, inlined_data, inlined_files): |
114 self.inlined_data = inlined_data | 114 self.inlined_data = inlined_data |
115 self.inlined_files = inlined_files | 115 self.inlined_files = inlined_files |
116 | 116 |
117 def DoInline( | 117 def DoInline( |
118 input_filename, grd_node, allow_external_script=False, names_only=False, | 118 input_filename, grd_node, allow_external_script=False, names_only=False, |
119 rewrite_function=None): | 119 rewrite_function=None, filename_expansion_function=None): |
120 """Helper function that inlines the resources in a specified file. | 120 """Helper function that inlines the resources in a specified file. |
121 | 121 |
122 Reads input_filename, finds all the src attributes and attempts to | 122 Reads input_filename, finds all the src attributes and attempts to |
123 inline the files they are referring to, then returns the result and | 123 inline the files they are referring to, then returns the result and |
124 the set of inlined files. | 124 the set of inlined files. |
125 | 125 |
126 Args: | 126 Args: |
127 input_filename: name of file to read in | 127 input_filename: name of file to read in |
128 grd_node: html node from the grd file for this include tag | 128 grd_node: html node from the grd file for this include tag |
129 names_only: |nil| will be returned for the inlined contents (faster). | 129 names_only: |nil| will be returned for the inlined contents (faster). |
130 rewrite_function: function(filepath, text, distribution) which will be | 130 rewrite_function: function(filepath, text, distribution) which will be |
131 called to rewrite html content before inlining images. | 131 called to rewrite html content before inlining images. |
132 filename_expansion_function: function(filename) which will be called to | |
133 rewrite filenames before attempting to read them. | |
132 Returns: | 134 Returns: |
133 a tuple of the inlined data as a string and the set of filenames | 135 a tuple of the inlined data as a string and the set of filenames |
134 of all the inlined files | 136 of all the inlined files |
135 """ | 137 """ |
138 if filename_expansion_function: | |
139 input_filename = filename_expansion_function(input_filename) | |
136 input_filepath = os.path.dirname(input_filename) | 140 input_filepath = os.path.dirname(input_filename) |
137 distribution = GetDistribution() | 141 distribution = GetDistribution() |
138 | 142 |
139 # Keep track of all the files we inline. | 143 # Keep track of all the files we inline. |
140 inlined_files = set() | 144 inlined_files = set() |
141 | 145 |
142 def SrcReplace(src_match, filepath=input_filepath, | 146 def SrcReplace(src_match, filepath=input_filepath, |
143 inlined_files=inlined_files): | 147 inlined_files=inlined_files): |
144 """Helper function to provide SrcInlineAsDataURL with the base file path""" | 148 """Helper function to provide SrcInlineAsDataURL with the base file path""" |
145 return SrcInlineAsDataURL( | 149 return SrcInlineAsDataURL( |
146 src_match, filepath, distribution, inlined_files, names_only=names_only) | 150 src_match, filepath, distribution, inlined_files, names_only=names_only) |
147 | 151 |
148 def GetFilepath(src_match, base_path = input_filepath): | 152 def GetFilepath(src_match, base_path = input_filepath): |
149 filename = src_match.group('filename') | 153 filename = src_match.group('filename') |
150 | 154 |
151 if filename.find(':') != -1: | 155 if filename.find(':') != -1: |
152 # filename is probably a URL, which we don't want to bother inlining | 156 # filename is probably a URL, which we don't want to bother inlining |
153 return None | 157 return None |
154 | 158 |
155 filename = filename.replace('%DISTRIBUTION%', distribution) | 159 filename = filename.replace('%DISTRIBUTION%', distribution) |
160 if filename_expansion_function: | |
161 filename = filename_expansion_function(filename) | |
156 return os.path.normpath(os.path.join(base_path, filename)) | 162 return os.path.normpath(os.path.join(base_path, filename)) |
157 | 163 |
158 def IsConditionSatisfied(src_match): | 164 def IsConditionSatisfied(src_match): |
159 expression = src_match.group('expression') | 165 expression = src_match.group('expression') |
160 return grd_node is None or grd_node.EvaluateCondition(expression) | 166 return grd_node is None or grd_node.EvaluateCondition(expression) |
161 | 167 |
162 def CheckConditionalElements(str): | 168 def CheckConditionalElements(str): |
163 """Helper function to conditionally inline inner elements""" | 169 """Helper function to conditionally inline inner elements""" |
164 while True: | 170 while True: |
165 begin_if = _BEGIN_IF_BLOCK.search(str) | 171 begin_if = _BEGIN_IF_BLOCK.search(str) |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
197 str = leading + trailing | 203 str = leading + trailing |
198 | 204 |
199 def InlineFileContents(src_match, pattern, inlined_files=inlined_files): | 205 def InlineFileContents(src_match, pattern, inlined_files=inlined_files): |
200 """Helper function to inline external files of various types""" | 206 """Helper function to inline external files of various types""" |
201 filepath = GetFilepath(src_match) | 207 filepath = GetFilepath(src_match) |
202 if filepath is None: | 208 if filepath is None: |
203 return src_match.group(0) | 209 return src_match.group(0) |
204 inlined_files.add(filepath) | 210 inlined_files.add(filepath) |
205 | 211 |
206 if names_only: | 212 if names_only: |
207 inlined_files.update(GetResourceFilenames(filepath, | 213 inlined_files.update(GetResourceFilenames( |
208 allow_external_script, | 214 filepath, |
209 rewrite_function)) | 215 allow_external_script, |
216 rewrite_function, | |
217 filename_expansion_function=filename_expansion_function)) | |
210 return "" | 218 return "" |
211 | 219 |
212 return pattern % InlineToString(filepath, grd_node, allow_external_script) | 220 return pattern % InlineToString( |
221 filepath, grd_node, allow_external_script, | |
222 filename_expansion_function=filename_expansion_function) | |
213 | 223 |
214 def InlineIncludeFiles(src_match): | 224 def InlineIncludeFiles(src_match): |
215 """Helper function to directly inline generic external files (without | 225 """Helper function to directly inline generic external files (without |
216 wrapping them with any kind of tags). | 226 wrapping them with any kind of tags). |
217 """ | 227 """ |
218 return InlineFileContents(src_match, '%s') | 228 return InlineFileContents(src_match, '%s') |
219 | 229 |
220 def InlineScript(match): | 230 def InlineScript(match): |
221 """Helper function to inline external script files""" | 231 """Helper function to inline external script files""" |
222 attrs = (match.group('attrs1') + match.group('attrs2')).strip() | 232 attrs = (match.group('attrs1') + match.group('attrs2')).strip() |
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
321 flat_text = InlineCSSImages(flat_text) | 331 flat_text = InlineCSSImages(flat_text) |
322 | 332 |
323 flat_text = _ICON_RE.sub(SrcReplace, flat_text) | 333 flat_text = _ICON_RE.sub(SrcReplace, flat_text) |
324 | 334 |
325 if names_only: | 335 if names_only: |
326 flat_text = None # Will contains garbage if the flag is set anyway. | 336 flat_text = None # Will contains garbage if the flag is set anyway. |
327 return InlinedData(flat_text, inlined_files) | 337 return InlinedData(flat_text, inlined_files) |
328 | 338 |
329 | 339 |
330 def InlineToString(input_filename, grd_node, allow_external_script=False, | 340 def InlineToString(input_filename, grd_node, allow_external_script=False, |
331 rewrite_function=None): | 341 rewrite_function=None, filename_expansion_function=None): |
332 """Inlines the resources in a specified file and returns it as a string. | 342 """Inlines the resources in a specified file and returns it as a string. |
333 | 343 |
334 Args: | 344 Args: |
335 input_filename: name of file to read in | 345 input_filename: name of file to read in |
336 grd_node: html node from the grd file for this include tag | 346 grd_node: html node from the grd file for this include tag |
337 Returns: | 347 Returns: |
338 the inlined data as a string | 348 the inlined data as a string |
339 """ | 349 """ |
340 try: | 350 try: |
341 return DoInline(input_filename, | 351 return DoInline( |
342 grd_node, | 352 input_filename, |
343 allow_external_script=allow_external_script, | 353 grd_node, |
344 rewrite_function=rewrite_function).inlined_data | 354 allow_external_script=allow_external_script, |
355 rewrite_function=rewrite_function, | |
356 filename_expansion_function=filename_expansion_function).inlined_data | |
345 except IOError, e: | 357 except IOError, e: |
346 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % | 358 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % |
347 (e.filename, input_filename, e.strerror)) | 359 (e.filename, input_filename, e.strerror)) |
348 | 360 |
349 | 361 |
350 def InlineToFile(input_filename, output_filename, grd_node): | 362 def InlineToFile(input_filename, output_filename, grd_node): |
351 """Inlines the resources in a specified file and writes it. | 363 """Inlines the resources in a specified file and writes it. |
352 | 364 |
353 Reads input_filename, finds all the src attributes and attempts to | 365 Reads input_filename, finds all the src attributes and attempts to |
354 inline the files they are referring to, then writes the result | 366 inline the files they are referring to, then writes the result |
355 to output_filename. | 367 to output_filename. |
356 | 368 |
357 Args: | 369 Args: |
358 input_filename: name of file to read in | 370 input_filename: name of file to read in |
359 output_filename: name of file to be written to | 371 output_filename: name of file to be written to |
360 grd_node: html node from the grd file for this include tag | 372 grd_node: html node from the grd file for this include tag |
361 Returns: | 373 Returns: |
362 a set of filenames of all the inlined files | 374 a set of filenames of all the inlined files |
363 """ | 375 """ |
364 inlined_data = InlineToString(input_filename, grd_node) | 376 inlined_data = InlineToString(input_filename, grd_node) |
365 with open(output_filename, 'wb') as out_file: | 377 with open(output_filename, 'wb') as out_file: |
366 out_file.writelines(inlined_data) | 378 out_file.writelines(inlined_data) |
367 | 379 |
368 | 380 |
369 def GetResourceFilenames(filename, | 381 def GetResourceFilenames(filename, |
370 allow_external_script=False, | 382 allow_external_script=False, |
371 rewrite_function=None): | 383 rewrite_function=None, |
384 filename_expansion_function=None): | |
372 """For a grd file, returns a set of all the files that would be inline.""" | 385 """For a grd file, returns a set of all the files that would be inline.""" |
373 try: | 386 try: |
374 return DoInline(filename, | 387 return DoInline( |
375 None, | 388 filename, |
376 names_only=True, | 389 None, |
377 allow_external_script=allow_external_script, | 390 names_only=True, |
378 rewrite_function=rewrite_function).inlined_files | 391 allow_external_script=allow_external_script, |
392 rewrite_function=rewrite_function, | |
393 filename_expansion_function=filename_expansion_function).inlined_files | |
379 except IOError, e: | 394 except IOError, e: |
380 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % | 395 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % |
381 (e.filename, filename, e.strerror)) | 396 (e.filename, filename, e.strerror)) |
382 | 397 |
383 | 398 |
384 def main(): | 399 def main(): |
385 if len(sys.argv) <= 2: | 400 if len(sys.argv) <= 2: |
386 print "Flattens a HTML file by inlining its external resources.\n" | 401 print "Flattens a HTML file by inlining its external resources.\n" |
387 print "html_inline.py inputfile outputfile" | 402 print "html_inline.py inputfile outputfile" |
388 else: | 403 else: |
389 InlineToFile(sys.argv[1], sys.argv[2], None) | 404 InlineToFile(sys.argv[1], sys.argv[2], None) |
390 | 405 |
391 if __name__ == '__main__': | 406 if __name__ == '__main__': |
392 main() | 407 main() |
OLD | NEW |