OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Flattens a HTML file by inlining its external resources. | 6 """Flattens a HTML file by inlining its external resources. |
7 | 7 |
8 This is a small script that takes a HTML file, looks for src attributes | 8 This is a small script that takes a HTML file, looks for src attributes |
9 and inlines the specified file, producing one HTML file with no external | 9 and inlines the specified file, producing one HTML file with no external |
10 dependencies. It recursively inlines the included files. | 10 dependencies. It recursively inlines the included files. |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
65 """ | 65 """ |
66 distribution = DIST_DEFAULT | 66 distribution = DIST_DEFAULT |
67 if DIST_ENV_VAR in os.environ.keys(): | 67 if DIST_ENV_VAR in os.environ.keys(): |
68 distribution = os.environ[DIST_ENV_VAR] | 68 distribution = os.environ[DIST_ENV_VAR] |
69 if len(distribution) > 1 and distribution[0] == '_': | 69 if len(distribution) > 1 and distribution[0] == '_': |
70 distribution = distribution[1:].lower() | 70 distribution = distribution[1:].lower() |
71 return distribution | 71 return distribution |
72 | 72 |
73 | 73 |
74 def SrcInlineAsDataURL( | 74 def SrcInlineAsDataURL( |
75 src_match, base_path, distribution, inlined_files, names_only=False): | 75 src_match, base_path, distribution, inlined_files, names_only=False, |
| 76 filename_expansion_function=None): |
76 """regex replace function. | 77 """regex replace function. |
77 | 78 |
78 Takes a regex match for src="filename", attempts to read the file | 79 Takes a regex match for src="filename", attempts to read the file |
79 at 'filename' and returns the src attribute with the file inlined | 80 at 'filename' and returns the src attribute with the file inlined |
80 as a data URI. If it finds DIST_SUBSTR string in file name, replaces | 81 as a data URI. If it finds DIST_SUBSTR string in file name, replaces |
81 it with distribution. | 82 it with distribution. |
82 | 83 |
83 Args: | 84 Args: |
84 src_match: regex match object with 'filename' and 'quote' named capturing | 85 src_match: regex match object with 'filename' and 'quote' named capturing |
85 groups | 86 groups |
86 base_path: path that to look for files in | 87 base_path: path that to look for files in |
87 distribution: string that should replace DIST_SUBSTR | 88 distribution: string that should replace DIST_SUBSTR |
88 inlined_files: The name of the opened file is appended to this list. | 89 inlined_files: The name of the opened file is appended to this list. |
89 names_only: If true, the function will not read the file but just return "". | 90 names_only: If true, the function will not read the file but just return "". |
90 It will still add the filename to |inlined_files|. | 91 It will still add the filename to |inlined_files|. |
91 | 92 |
92 Returns: | 93 Returns: |
93 string | 94 string |
94 """ | 95 """ |
95 filename = src_match.group('filename') | 96 filename = src_match.group('filename') |
| 97 if filename_expansion_function: |
| 98 filename = filename_expansion_function(filename) |
96 quote = src_match.group('quote') | 99 quote = src_match.group('quote') |
97 | 100 |
98 if filename.find(':') != -1: | 101 if filename.find(':') != -1: |
99 # filename is probably a URL, which we don't want to bother inlining | 102 # filename is probably a URL, which we don't want to bother inlining |
100 return src_match.group(0) | 103 return src_match.group(0) |
101 | 104 |
102 filename = filename.replace(DIST_SUBSTR , distribution) | 105 filename = filename.replace(DIST_SUBSTR , distribution) |
103 filepath = os.path.normpath(os.path.join(base_path, filename)) | 106 filepath = os.path.normpath(os.path.join(base_path, filename)) |
104 inlined_files.add(filepath) | 107 inlined_files.add(filepath) |
105 | 108 |
(...skipping 13 matching lines...) Expand all Loading... |
119 | 122 |
120 Holds the inlined data and the set of filenames of all the inlined | 123 Holds the inlined data and the set of filenames of all the inlined |
121 files. | 124 files. |
122 """ | 125 """ |
123 def __init__(self, inlined_data, inlined_files): | 126 def __init__(self, inlined_data, inlined_files): |
124 self.inlined_data = inlined_data | 127 self.inlined_data = inlined_data |
125 self.inlined_files = inlined_files | 128 self.inlined_files = inlined_files |
126 | 129 |
127 def DoInline( | 130 def DoInline( |
128 input_filename, grd_node, allow_external_script=False, names_only=False, | 131 input_filename, grd_node, allow_external_script=False, names_only=False, |
129 rewrite_function=None): | 132 rewrite_function=None, filename_expansion_function=None): |
130 """Helper function that inlines the resources in a specified file. | 133 """Helper function that inlines the resources in a specified file. |
131 | 134 |
132 Reads input_filename, finds all the src attributes and attempts to | 135 Reads input_filename, finds all the src attributes and attempts to |
133 inline the files they are referring to, then returns the result and | 136 inline the files they are referring to, then returns the result and |
134 the set of inlined files. | 137 the set of inlined files. |
135 | 138 |
136 Args: | 139 Args: |
137 input_filename: name of file to read in | 140 input_filename: name of file to read in |
138 grd_node: html node from the grd file for this include tag | 141 grd_node: html node from the grd file for this include tag |
139 names_only: |nil| will be returned for the inlined contents (faster). | 142 names_only: |nil| will be returned for the inlined contents (faster). |
140 rewrite_function: function(filepath, text, distribution) which will be | 143 rewrite_function: function(filepath, text, distribution) which will be |
141 called to rewrite html content before inlining images. | 144 called to rewrite html content before inlining images. |
| 145 filename_expansion_function: function(filename) which will be called to |
| 146 rewrite filenames before attempting to read them. |
142 Returns: | 147 Returns: |
143 a tuple of the inlined data as a string and the set of filenames | 148 a tuple of the inlined data as a string and the set of filenames |
144 of all the inlined files | 149 of all the inlined files |
145 """ | 150 """ |
| 151 if filename_expansion_function: |
| 152 input_filename = filename_expansion_function(input_filename) |
146 input_filepath = os.path.dirname(input_filename) | 153 input_filepath = os.path.dirname(input_filename) |
147 distribution = GetDistribution() | 154 distribution = GetDistribution() |
148 | 155 |
149 # Keep track of all the files we inline. | 156 # Keep track of all the files we inline. |
150 inlined_files = set() | 157 inlined_files = set() |
151 | 158 |
152 def SrcReplace(src_match, filepath=input_filepath, | 159 def SrcReplace(src_match, filepath=input_filepath, |
153 inlined_files=inlined_files): | 160 inlined_files=inlined_files): |
154 """Helper function to provide SrcInlineAsDataURL with the base file path""" | 161 """Helper function to provide SrcInlineAsDataURL with the base file path""" |
155 return SrcInlineAsDataURL( | 162 return SrcInlineAsDataURL( |
156 src_match, filepath, distribution, inlined_files, names_only=names_only) | 163 src_match, filepath, distribution, inlined_files, names_only=names_only, |
| 164 filename_expansion_function=filename_expansion_function) |
157 | 165 |
158 def GetFilepath(src_match, base_path = input_filepath): | 166 def GetFilepath(src_match, base_path = input_filepath): |
159 filename = src_match.group('filename') | 167 filename = src_match.group('filename') |
160 | 168 |
161 if filename.find(':') != -1: | 169 if filename.find(':') != -1: |
162 # filename is probably a URL, which we don't want to bother inlining | 170 # filename is probably a URL, which we don't want to bother inlining |
163 return None | 171 return None |
164 | 172 |
165 filename = filename.replace('%DISTRIBUTION%', distribution) | 173 filename = filename.replace('%DISTRIBUTION%', distribution) |
| 174 if filename_expansion_function: |
| 175 filename = filename_expansion_function(filename) |
166 return os.path.normpath(os.path.join(base_path, filename)) | 176 return os.path.normpath(os.path.join(base_path, filename)) |
167 | 177 |
168 def IsConditionSatisfied(src_match): | 178 def IsConditionSatisfied(src_match): |
169 expression = src_match.group('expression') | 179 expression = src_match.group('expression') |
170 return grd_node is None or grd_node.EvaluateCondition(expression) | 180 return grd_node is None or grd_node.EvaluateCondition(expression) |
171 | 181 |
172 def CheckConditionalElements(str): | 182 def CheckConditionalElements(str): |
173 """Helper function to conditionally inline inner elements""" | 183 """Helper function to conditionally inline inner elements""" |
174 while True: | 184 while True: |
175 begin_if = _BEGIN_IF_BLOCK.search(str) | 185 begin_if = _BEGIN_IF_BLOCK.search(str) |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
207 str = leading + trailing | 217 str = leading + trailing |
208 | 218 |
209 def InlineFileContents(src_match, pattern, inlined_files=inlined_files): | 219 def InlineFileContents(src_match, pattern, inlined_files=inlined_files): |
210 """Helper function to inline external files of various types""" | 220 """Helper function to inline external files of various types""" |
211 filepath = GetFilepath(src_match) | 221 filepath = GetFilepath(src_match) |
212 if filepath is None: | 222 if filepath is None: |
213 return src_match.group(0) | 223 return src_match.group(0) |
214 inlined_files.add(filepath) | 224 inlined_files.add(filepath) |
215 | 225 |
216 if names_only: | 226 if names_only: |
217 inlined_files.update(GetResourceFilenames(filepath, | 227 inlined_files.update(GetResourceFilenames( |
218 allow_external_script, | 228 filepath, |
219 rewrite_function)) | 229 allow_external_script, |
| 230 rewrite_function, |
| 231 filename_expansion_function=filename_expansion_function)) |
220 return "" | 232 return "" |
221 | 233 |
222 return pattern % InlineToString(filepath, grd_node, allow_external_script) | 234 return pattern % InlineToString( |
| 235 filepath, grd_node, allow_external_script, |
| 236 filename_expansion_function=filename_expansion_function) |
223 | 237 |
224 def InlineIncludeFiles(src_match): | 238 def InlineIncludeFiles(src_match): |
225 """Helper function to directly inline generic external files (without | 239 """Helper function to directly inline generic external files (without |
226 wrapping them with any kind of tags). | 240 wrapping them with any kind of tags). |
227 """ | 241 """ |
228 return InlineFileContents(src_match, '%s') | 242 return InlineFileContents(src_match, '%s') |
229 | 243 |
230 def InlineScript(match): | 244 def InlineScript(match): |
231 """Helper function to inline external script files""" | 245 """Helper function to inline external script files""" |
232 attrs = (match.group('attrs1') + match.group('attrs2')).strip() | 246 attrs = (match.group('attrs1') + match.group('attrs2')).strip() |
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
331 flat_text = InlineCSSImages(flat_text) | 345 flat_text = InlineCSSImages(flat_text) |
332 | 346 |
333 flat_text = _ICON_RE.sub(SrcReplace, flat_text) | 347 flat_text = _ICON_RE.sub(SrcReplace, flat_text) |
334 | 348 |
335 if names_only: | 349 if names_only: |
336 flat_text = None # Will contains garbage if the flag is set anyway. | 350 flat_text = None # Will contains garbage if the flag is set anyway. |
337 return InlinedData(flat_text, inlined_files) | 351 return InlinedData(flat_text, inlined_files) |
338 | 352 |
339 | 353 |
340 def InlineToString(input_filename, grd_node, allow_external_script=False, | 354 def InlineToString(input_filename, grd_node, allow_external_script=False, |
341 rewrite_function=None): | 355 rewrite_function=None, filename_expansion_function=None): |
342 """Inlines the resources in a specified file and returns it as a string. | 356 """Inlines the resources in a specified file and returns it as a string. |
343 | 357 |
344 Args: | 358 Args: |
345 input_filename: name of file to read in | 359 input_filename: name of file to read in |
346 grd_node: html node from the grd file for this include tag | 360 grd_node: html node from the grd file for this include tag |
347 Returns: | 361 Returns: |
348 the inlined data as a string | 362 the inlined data as a string |
349 """ | 363 """ |
350 try: | 364 try: |
351 return DoInline(input_filename, | 365 return DoInline( |
352 grd_node, | 366 input_filename, |
353 allow_external_script=allow_external_script, | 367 grd_node, |
354 rewrite_function=rewrite_function).inlined_data | 368 allow_external_script=allow_external_script, |
| 369 rewrite_function=rewrite_function, |
| 370 filename_expansion_function=filename_expansion_function).inlined_data |
355 except IOError, e: | 371 except IOError, e: |
356 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % | 372 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % |
357 (e.filename, input_filename, e.strerror)) | 373 (e.filename, input_filename, e.strerror)) |
358 | 374 |
359 | 375 |
360 def InlineToFile(input_filename, output_filename, grd_node): | 376 def InlineToFile(input_filename, output_filename, grd_node): |
361 """Inlines the resources in a specified file and writes it. | 377 """Inlines the resources in a specified file and writes it. |
362 | 378 |
363 Reads input_filename, finds all the src attributes and attempts to | 379 Reads input_filename, finds all the src attributes and attempts to |
364 inline the files they are referring to, then writes the result | 380 inline the files they are referring to, then writes the result |
365 to output_filename. | 381 to output_filename. |
366 | 382 |
367 Args: | 383 Args: |
368 input_filename: name of file to read in | 384 input_filename: name of file to read in |
369 output_filename: name of file to be written to | 385 output_filename: name of file to be written to |
370 grd_node: html node from the grd file for this include tag | 386 grd_node: html node from the grd file for this include tag |
371 Returns: | 387 Returns: |
372 a set of filenames of all the inlined files | 388 a set of filenames of all the inlined files |
373 """ | 389 """ |
374 inlined_data = InlineToString(input_filename, grd_node) | 390 inlined_data = InlineToString(input_filename, grd_node) |
375 with open(output_filename, 'wb') as out_file: | 391 with open(output_filename, 'wb') as out_file: |
376 out_file.writelines(inlined_data) | 392 out_file.writelines(inlined_data) |
377 | 393 |
378 | 394 |
379 def GetResourceFilenames(filename, | 395 def GetResourceFilenames(filename, |
380 allow_external_script=False, | 396 allow_external_script=False, |
381 rewrite_function=None): | 397 rewrite_function=None, |
| 398 filename_expansion_function=None): |
382 """For a grd file, returns a set of all the files that would be inline.""" | 399 """For a grd file, returns a set of all the files that would be inline.""" |
383 try: | 400 try: |
384 return DoInline(filename, | 401 return DoInline( |
385 None, | 402 filename, |
386 names_only=True, | 403 None, |
387 allow_external_script=allow_external_script, | 404 names_only=True, |
388 rewrite_function=rewrite_function).inlined_files | 405 allow_external_script=allow_external_script, |
| 406 rewrite_function=rewrite_function, |
| 407 filename_expansion_function=filename_expansion_function).inlined_files |
389 except IOError, e: | 408 except IOError, e: |
390 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % | 409 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % |
391 (e.filename, filename, e.strerror)) | 410 (e.filename, filename, e.strerror)) |
392 | 411 |
393 | 412 |
394 def main(): | 413 def main(): |
395 if len(sys.argv) <= 2: | 414 if len(sys.argv) <= 2: |
396 print "Flattens a HTML file by inlining its external resources.\n" | 415 print "Flattens a HTML file by inlining its external resources.\n" |
397 print "html_inline.py inputfile outputfile" | 416 print "html_inline.py inputfile outputfile" |
398 else: | 417 else: |
399 InlineToFile(sys.argv[1], sys.argv[2], None) | 418 InlineToFile(sys.argv[1], sys.argv[2], None) |
400 | 419 |
401 if __name__ == '__main__': | 420 if __name__ == '__main__': |
402 main() | 421 main() |
OLD | NEW |