| OLD | NEW |
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 """Flattens a HTML file by inlining its external resources. | 6 """Flattens a HTML file by inlining its external resources. |
| 7 | 7 |
| 8 This is a small script that takes a HTML file, looks for src attributes | 8 This is a small script that takes a HTML file, looks for src attributes |
| 9 and inlines the specified file, producing one HTML file with no external | 9 and inlines the specified file, producing one HTML file with no external |
| 10 dependencies. It recursively inlines the included files. | 10 dependencies. It recursively inlines the included files. |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 65 """ | 65 """ |
| 66 distribution = DIST_DEFAULT | 66 distribution = DIST_DEFAULT |
| 67 if DIST_ENV_VAR in os.environ.keys(): | 67 if DIST_ENV_VAR in os.environ.keys(): |
| 68 distribution = os.environ[DIST_ENV_VAR] | 68 distribution = os.environ[DIST_ENV_VAR] |
| 69 if len(distribution) > 1 and distribution[0] == '_': | 69 if len(distribution) > 1 and distribution[0] == '_': |
| 70 distribution = distribution[1:].lower() | 70 distribution = distribution[1:].lower() |
| 71 return distribution | 71 return distribution |
| 72 | 72 |
| 73 | 73 |
| 74 def SrcInlineAsDataURL( | 74 def SrcInlineAsDataURL( |
| 75 src_match, base_path, distribution, inlined_files, names_only=False): | 75 src_match, base_path, distribution, inlined_files, names_only=False, |
| 76 filename_expansion_function=None): |
| 76 """regex replace function. | 77 """regex replace function. |
| 77 | 78 |
| 78 Takes a regex match for src="filename", attempts to read the file | 79 Takes a regex match for src="filename", attempts to read the file |
| 79 at 'filename' and returns the src attribute with the file inlined | 80 at 'filename' and returns the src attribute with the file inlined |
| 80 as a data URI. If it finds DIST_SUBSTR string in file name, replaces | 81 as a data URI. If it finds DIST_SUBSTR string in file name, replaces |
| 81 it with distribution. | 82 it with distribution. |
| 82 | 83 |
| 83 Args: | 84 Args: |
| 84 src_match: regex match object with 'filename' and 'quote' named capturing | 85 src_match: regex match object with 'filename' and 'quote' named capturing |
| 85 groups | 86 groups |
| 86 base_path: path that to look for files in | 87 base_path: path that to look for files in |
| 87 distribution: string that should replace DIST_SUBSTR | 88 distribution: string that should replace DIST_SUBSTR |
| 88 inlined_files: The name of the opened file is appended to this list. | 89 inlined_files: The name of the opened file is appended to this list. |
| 89 names_only: If true, the function will not read the file but just return "". | 90 names_only: If true, the function will not read the file but just return "". |
| 90 It will still add the filename to |inlined_files|. | 91 It will still add the filename to |inlined_files|. |
| 91 | 92 |
| 92 Returns: | 93 Returns: |
| 93 string | 94 string |
| 94 """ | 95 """ |
| 95 filename = src_match.group('filename') | 96 filename = src_match.group('filename') |
| 97 if filename_expansion_function: |
| 98 filename = filename_expansion_function(filename) |
| 96 quote = src_match.group('quote') | 99 quote = src_match.group('quote') |
| 97 | 100 |
| 98 if filename.find(':') != -1: | 101 if filename.find(':') != -1: |
| 99 # filename is probably a URL, which we don't want to bother inlining | 102 # filename is probably a URL, which we don't want to bother inlining |
| 100 return src_match.group(0) | 103 return src_match.group(0) |
| 101 | 104 |
| 102 filename = filename.replace(DIST_SUBSTR , distribution) | 105 filename = filename.replace(DIST_SUBSTR , distribution) |
| 103 filepath = os.path.normpath(os.path.join(base_path, filename)) | 106 filepath = os.path.normpath(os.path.join(base_path, filename)) |
| 104 inlined_files.add(filepath) | 107 inlined_files.add(filepath) |
| 105 | 108 |
| (...skipping 13 matching lines...) Expand all Loading... |
| 119 | 122 |
| 120 Holds the inlined data and the set of filenames of all the inlined | 123 Holds the inlined data and the set of filenames of all the inlined |
| 121 files. | 124 files. |
| 122 """ | 125 """ |
| 123 def __init__(self, inlined_data, inlined_files): | 126 def __init__(self, inlined_data, inlined_files): |
| 124 self.inlined_data = inlined_data | 127 self.inlined_data = inlined_data |
| 125 self.inlined_files = inlined_files | 128 self.inlined_files = inlined_files |
| 126 | 129 |
| 127 def DoInline( | 130 def DoInline( |
| 128 input_filename, grd_node, allow_external_script=False, names_only=False, | 131 input_filename, grd_node, allow_external_script=False, names_only=False, |
| 129 rewrite_function=None): | 132 rewrite_function=None, filename_expansion_function=None): |
| 130 """Helper function that inlines the resources in a specified file. | 133 """Helper function that inlines the resources in a specified file. |
| 131 | 134 |
| 132 Reads input_filename, finds all the src attributes and attempts to | 135 Reads input_filename, finds all the src attributes and attempts to |
| 133 inline the files they are referring to, then returns the result and | 136 inline the files they are referring to, then returns the result and |
| 134 the set of inlined files. | 137 the set of inlined files. |
| 135 | 138 |
| 136 Args: | 139 Args: |
| 137 input_filename: name of file to read in | 140 input_filename: name of file to read in |
| 138 grd_node: html node from the grd file for this include tag | 141 grd_node: html node from the grd file for this include tag |
| 139 names_only: |nil| will be returned for the inlined contents (faster). | 142 names_only: |nil| will be returned for the inlined contents (faster). |
| 140 rewrite_function: function(filepath, text, distribution) which will be | 143 rewrite_function: function(filepath, text, distribution) which will be |
| 141 called to rewrite html content before inlining images. | 144 called to rewrite html content before inlining images. |
| 145 filename_expansion_function: function(filename) which will be called to |
| 146 rewrite filenames before attempting to read them. |
| 142 Returns: | 147 Returns: |
| 143 a tuple of the inlined data as a string and the set of filenames | 148 a tuple of the inlined data as a string and the set of filenames |
| 144 of all the inlined files | 149 of all the inlined files |
| 145 """ | 150 """ |
| 151 if filename_expansion_function: |
| 152 input_filename = filename_expansion_function(input_filename) |
| 146 input_filepath = os.path.dirname(input_filename) | 153 input_filepath = os.path.dirname(input_filename) |
| 147 distribution = GetDistribution() | 154 distribution = GetDistribution() |
| 148 | 155 |
| 149 # Keep track of all the files we inline. | 156 # Keep track of all the files we inline. |
| 150 inlined_files = set() | 157 inlined_files = set() |
| 151 | 158 |
| 152 def SrcReplace(src_match, filepath=input_filepath, | 159 def SrcReplace(src_match, filepath=input_filepath, |
| 153 inlined_files=inlined_files): | 160 inlined_files=inlined_files): |
| 154 """Helper function to provide SrcInlineAsDataURL with the base file path""" | 161 """Helper function to provide SrcInlineAsDataURL with the base file path""" |
| 155 return SrcInlineAsDataURL( | 162 return SrcInlineAsDataURL( |
| 156 src_match, filepath, distribution, inlined_files, names_only=names_only) | 163 src_match, filepath, distribution, inlined_files, names_only=names_only, |
| 164 filename_expansion_function=filename_expansion_function) |
| 157 | 165 |
| 158 def GetFilepath(src_match, base_path = input_filepath): | 166 def GetFilepath(src_match, base_path = input_filepath): |
| 159 filename = src_match.group('filename') | 167 filename = src_match.group('filename') |
| 160 | 168 |
| 161 if filename.find(':') != -1: | 169 if filename.find(':') != -1: |
| 162 # filename is probably a URL, which we don't want to bother inlining | 170 # filename is probably a URL, which we don't want to bother inlining |
| 163 return None | 171 return None |
| 164 | 172 |
| 165 filename = filename.replace('%DISTRIBUTION%', distribution) | 173 filename = filename.replace('%DISTRIBUTION%', distribution) |
| 174 if filename_expansion_function: |
| 175 filename = filename_expansion_function(filename) |
| 166 return os.path.normpath(os.path.join(base_path, filename)) | 176 return os.path.normpath(os.path.join(base_path, filename)) |
| 167 | 177 |
| 168 def IsConditionSatisfied(src_match): | 178 def IsConditionSatisfied(src_match): |
| 169 expression = src_match.group('expression') | 179 expression = src_match.group('expression') |
| 170 return grd_node is None or grd_node.EvaluateCondition(expression) | 180 return grd_node is None or grd_node.EvaluateCondition(expression) |
| 171 | 181 |
| 172 def CheckConditionalElements(str): | 182 def CheckConditionalElements(str): |
| 173 """Helper function to conditionally inline inner elements""" | 183 """Helper function to conditionally inline inner elements""" |
| 174 while True: | 184 while True: |
| 175 begin_if = _BEGIN_IF_BLOCK.search(str) | 185 begin_if = _BEGIN_IF_BLOCK.search(str) |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 207 str = leading + trailing | 217 str = leading + trailing |
| 208 | 218 |
| 209 def InlineFileContents(src_match, pattern, inlined_files=inlined_files): | 219 def InlineFileContents(src_match, pattern, inlined_files=inlined_files): |
| 210 """Helper function to inline external files of various types""" | 220 """Helper function to inline external files of various types""" |
| 211 filepath = GetFilepath(src_match) | 221 filepath = GetFilepath(src_match) |
| 212 if filepath is None: | 222 if filepath is None: |
| 213 return src_match.group(0) | 223 return src_match.group(0) |
| 214 inlined_files.add(filepath) | 224 inlined_files.add(filepath) |
| 215 | 225 |
| 216 if names_only: | 226 if names_only: |
| 217 inlined_files.update(GetResourceFilenames(filepath, | 227 inlined_files.update(GetResourceFilenames( |
| 218 allow_external_script, | 228 filepath, |
| 219 rewrite_function)) | 229 allow_external_script, |
| 230 rewrite_function, |
| 231 filename_expansion_function=filename_expansion_function)) |
| 220 return "" | 232 return "" |
| 221 | 233 |
| 222 return pattern % InlineToString(filepath, grd_node, allow_external_script) | 234 return pattern % InlineToString( |
| 235 filepath, grd_node, allow_external_script, |
| 236 filename_expansion_function=filename_expansion_function) |
| 223 | 237 |
| 224 def InlineIncludeFiles(src_match): | 238 def InlineIncludeFiles(src_match): |
| 225 """Helper function to directly inline generic external files (without | 239 """Helper function to directly inline generic external files (without |
| 226 wrapping them with any kind of tags). | 240 wrapping them with any kind of tags). |
| 227 """ | 241 """ |
| 228 return InlineFileContents(src_match, '%s') | 242 return InlineFileContents(src_match, '%s') |
| 229 | 243 |
| 230 def InlineScript(match): | 244 def InlineScript(match): |
| 231 """Helper function to inline external script files""" | 245 """Helper function to inline external script files""" |
| 232 attrs = (match.group('attrs1') + match.group('attrs2')).strip() | 246 attrs = (match.group('attrs1') + match.group('attrs2')).strip() |
| (...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 331 flat_text = InlineCSSImages(flat_text) | 345 flat_text = InlineCSSImages(flat_text) |
| 332 | 346 |
| 333 flat_text = _ICON_RE.sub(SrcReplace, flat_text) | 347 flat_text = _ICON_RE.sub(SrcReplace, flat_text) |
| 334 | 348 |
| 335 if names_only: | 349 if names_only: |
| 336 flat_text = None # Will contains garbage if the flag is set anyway. | 350 flat_text = None # Will contains garbage if the flag is set anyway. |
| 337 return InlinedData(flat_text, inlined_files) | 351 return InlinedData(flat_text, inlined_files) |
| 338 | 352 |
| 339 | 353 |
| 340 def InlineToString(input_filename, grd_node, allow_external_script=False, | 354 def InlineToString(input_filename, grd_node, allow_external_script=False, |
| 341 rewrite_function=None): | 355 rewrite_function=None, filename_expansion_function=None): |
| 342 """Inlines the resources in a specified file and returns it as a string. | 356 """Inlines the resources in a specified file and returns it as a string. |
| 343 | 357 |
| 344 Args: | 358 Args: |
| 345 input_filename: name of file to read in | 359 input_filename: name of file to read in |
| 346 grd_node: html node from the grd file for this include tag | 360 grd_node: html node from the grd file for this include tag |
| 347 Returns: | 361 Returns: |
| 348 the inlined data as a string | 362 the inlined data as a string |
| 349 """ | 363 """ |
| 350 try: | 364 try: |
| 351 return DoInline(input_filename, | 365 return DoInline( |
| 352 grd_node, | 366 input_filename, |
| 353 allow_external_script=allow_external_script, | 367 grd_node, |
| 354 rewrite_function=rewrite_function).inlined_data | 368 allow_external_script=allow_external_script, |
| 369 rewrite_function=rewrite_function, |
| 370 filename_expansion_function=filename_expansion_function).inlined_data |
| 355 except IOError, e: | 371 except IOError, e: |
| 356 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % | 372 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % |
| 357 (e.filename, input_filename, e.strerror)) | 373 (e.filename, input_filename, e.strerror)) |
| 358 | 374 |
| 359 | 375 |
| 360 def InlineToFile(input_filename, output_filename, grd_node): | 376 def InlineToFile(input_filename, output_filename, grd_node): |
| 361 """Inlines the resources in a specified file and writes it. | 377 """Inlines the resources in a specified file and writes it. |
| 362 | 378 |
| 363 Reads input_filename, finds all the src attributes and attempts to | 379 Reads input_filename, finds all the src attributes and attempts to |
| 364 inline the files they are referring to, then writes the result | 380 inline the files they are referring to, then writes the result |
| 365 to output_filename. | 381 to output_filename. |
| 366 | 382 |
| 367 Args: | 383 Args: |
| 368 input_filename: name of file to read in | 384 input_filename: name of file to read in |
| 369 output_filename: name of file to be written to | 385 output_filename: name of file to be written to |
| 370 grd_node: html node from the grd file for this include tag | 386 grd_node: html node from the grd file for this include tag |
| 371 Returns: | 387 Returns: |
| 372 a set of filenames of all the inlined files | 388 a set of filenames of all the inlined files |
| 373 """ | 389 """ |
| 374 inlined_data = InlineToString(input_filename, grd_node) | 390 inlined_data = InlineToString(input_filename, grd_node) |
| 375 with open(output_filename, 'wb') as out_file: | 391 with open(output_filename, 'wb') as out_file: |
| 376 out_file.writelines(inlined_data) | 392 out_file.writelines(inlined_data) |
| 377 | 393 |
| 378 | 394 |
| 379 def GetResourceFilenames(filename, | 395 def GetResourceFilenames(filename, |
| 380 allow_external_script=False, | 396 allow_external_script=False, |
| 381 rewrite_function=None): | 397 rewrite_function=None, |
| 398 filename_expansion_function=None): |
| 382 """For a grd file, returns a set of all the files that would be inline.""" | 399 """For a grd file, returns a set of all the files that would be inline.""" |
| 383 try: | 400 try: |
| 384 return DoInline(filename, | 401 return DoInline( |
| 385 None, | 402 filename, |
| 386 names_only=True, | 403 None, |
| 387 allow_external_script=allow_external_script, | 404 names_only=True, |
| 388 rewrite_function=rewrite_function).inlined_files | 405 allow_external_script=allow_external_script, |
| 406 rewrite_function=rewrite_function, |
| 407 filename_expansion_function=filename_expansion_function).inlined_files |
| 389 except IOError, e: | 408 except IOError, e: |
| 390 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % | 409 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % |
| 391 (e.filename, filename, e.strerror)) | 410 (e.filename, filename, e.strerror)) |
| 392 | 411 |
| 393 | 412 |
| 394 def main(): | 413 def main(): |
| 395 if len(sys.argv) <= 2: | 414 if len(sys.argv) <= 2: |
| 396 print "Flattens a HTML file by inlining its external resources.\n" | 415 print "Flattens a HTML file by inlining its external resources.\n" |
| 397 print "html_inline.py inputfile outputfile" | 416 print "html_inline.py inputfile outputfile" |
| 398 else: | 417 else: |
| 399 InlineToFile(sys.argv[1], sys.argv[2], None) | 418 InlineToFile(sys.argv[1], sys.argv[2], None) |
| 400 | 419 |
| 401 if __name__ == '__main__': | 420 if __name__ == '__main__': |
| 402 main() | 421 main() |
| OLD | NEW |