OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/python |
| 2 # Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 6 """Flattens a HTML file by inlining its external resources. |
| 7 |
| 8 This is a small script that takes a HTML file, looks for src attributes |
| 9 and inlines the specified file, producing one HTML file with no external |
| 10 dependencies. |
| 11 |
| 12 This does not inline anything referenced from an inlined file. |
| 13 """ |
| 14 |
| 15 import os |
| 16 import re |
| 17 import sys |
| 18 import base64 |
| 19 import mimetypes |
| 20 |
| 21 from grit.node import base |
| 22 |
| 23 DIST_DEFAULT = 'chromium' |
| 24 DIST_ENV_VAR = 'CHROMIUM_BUILD' |
| 25 DIST_SUBSTR = '%DISTRIBUTION%' |
| 26 |
| 27 # Matches beginning of an "if" block with trailing spaces. |
| 28 _BEGIN_IF_BLOCK = re.compile('<if [^>]*?expr="(?P<expression>[^"]*)"[^>]*?>\s*') |
| 29 |
| 30 # Matches ending of an "if" block with preceding spaces. |
| 31 _END_IF_BLOCK = re.compile('\s*</if>') |
| 32 |
| 33 def ReadFile(input_filename): |
| 34 """Helper function that returns input_filename as a string. |
| 35 |
| 36 Args: |
| 37 input_filename: name of file to be read |
| 38 |
| 39 Returns: |
| 40 string |
| 41 """ |
| 42 f = open(input_filename, 'rb') |
| 43 file_contents = f.read() |
| 44 f.close() |
| 45 return file_contents |
| 46 |
| 47 def SrcInlineAsDataURL( |
| 48 src_match, base_path, distribution, inlined_files, names_only=False): |
| 49 """regex replace function. |
| 50 |
| 51 Takes a regex match for src="filename", attempts to read the file |
| 52 at 'filename' and returns the src attribute with the file inlined |
| 53 as a data URI. If it finds DIST_SUBSTR string in file name, replaces |
| 54 it with distribution. |
| 55 |
| 56 Args: |
| 57 src_match: regex match object with 'filename' named capturing group |
| 58 base_path: path that to look for files in |
| 59 distribution: string that should replace DIST_SUBSTR |
| 60 inlined_files: The name of the opened file is appended to this list. |
| 61 names_only: If true, the function will not read the file but just return "". |
| 62 It will still add the filename to |inlined_files|. |
| 63 |
| 64 Returns: |
| 65 string |
| 66 """ |
| 67 filename = src_match.group('filename') |
| 68 |
| 69 if filename.find(':') != -1: |
| 70 # filename is probably a URL, which we don't want to bother inlining |
| 71 return src_match.group(0) |
| 72 |
| 73 filename = filename.replace('%DISTRIBUTION%', distribution) |
| 74 filepath = os.path.join(base_path, filename) |
| 75 inlined_files.add(filepath) |
| 76 |
| 77 if names_only: |
| 78 return "" |
| 79 |
| 80 mimetype = mimetypes.guess_type(filename)[0] or 'text/plain' |
| 81 inline_data = base64.standard_b64encode(ReadFile(filepath)) |
| 82 |
| 83 prefix = src_match.string[src_match.start():src_match.start('filename')-1] |
| 84 return "%s\"data:%s;base64,%s\"" % (prefix, mimetype, inline_data) |
| 85 |
| 86 |
| 87 class InlinedData: |
| 88 """Helper class holding the results from DoInline(). |
| 89 |
| 90 Holds the inlined data and the set of filenames of all the inlined |
| 91 files. |
| 92 """ |
| 93 def __init__(self, inlined_data, inlined_files): |
| 94 self.inlined_data = inlined_data |
| 95 self.inlined_files = inlined_files |
| 96 |
| 97 def DoInline( |
| 98 input_filename, grd_node, allow_external_script=False, names_only=False): |
| 99 """Helper function that inlines the resources in a specified file. |
| 100 |
| 101 Reads input_filename, finds all the src attributes and attempts to |
| 102 inline the files they are referring to, then returns the result and |
| 103 the set of inlined files. |
| 104 |
| 105 Args: |
| 106 input_filename: name of file to read in |
| 107 grd_node: html node from the grd file for this include tag |
| 108 names_only: |nil| will be returned for the inlined contents (faster). |
| 109 Returns: |
| 110 a tuple of the inlined data as a string and the set of filenames |
| 111 of all the inlined files |
| 112 """ |
| 113 input_filepath = os.path.dirname(input_filename) |
| 114 |
| 115 distribution = DIST_DEFAULT |
| 116 if DIST_ENV_VAR in os.environ.keys(): |
| 117 distribution = os.environ[DIST_ENV_VAR] |
| 118 if len(distribution) > 1 and distribution[0] == '_': |
| 119 distribution = distribution[1:].lower() |
| 120 |
| 121 # Keep track of all the files we inline. |
| 122 inlined_files = set() |
| 123 |
| 124 def SrcReplace(src_match, filepath=input_filepath, |
| 125 inlined_files=inlined_files): |
| 126 """Helper function to provide SrcInlineAsDataURL with the base file path""" |
| 127 return SrcInlineAsDataURL( |
| 128 src_match, filepath, distribution, inlined_files, names_only=names_only) |
| 129 |
| 130 def GetFilepath(src_match): |
| 131 filename = src_match.group('filename') |
| 132 |
| 133 if filename.find(':') != -1: |
| 134 # filename is probably a URL, which we don't want to bother inlining |
| 135 return None |
| 136 |
| 137 filename = filename.replace('%DISTRIBUTION%', distribution) |
| 138 return os.path.join(input_filepath, filename) |
| 139 |
| 140 def IsConditionSatisfied(src_match): |
| 141 expression = src_match.group('expression') |
| 142 return grd_node is None or grd_node.EvaluateCondition(expression) |
| 143 |
| 144 def CheckConditionalElements(str): |
| 145 """Helper function to conditionally inline inner elements""" |
| 146 while True: |
| 147 begin_if = _BEGIN_IF_BLOCK.search(str) |
| 148 if begin_if is None: |
| 149 return str |
| 150 |
| 151 condition_satisfied = IsConditionSatisfied(begin_if) |
| 152 leading = str[0:begin_if.start()] |
| 153 content_start = begin_if.end() |
| 154 |
| 155 # Find matching "if" block end. |
| 156 count = 1 |
| 157 pos = begin_if.end() |
| 158 while True: |
| 159 end_if = _END_IF_BLOCK.search(str, pos) |
| 160 if end_if is None: |
| 161 raise Exception('Unmatched <if>') |
| 162 |
| 163 next_if = _BEGIN_IF_BLOCK.search(str, pos) |
| 164 if next_if is None or next_if.start() >= end_if.end(): |
| 165 count = count - 1 |
| 166 if count == 0: |
| 167 break |
| 168 pos = end_if.end() |
| 169 else: |
| 170 count = count + 1 |
| 171 pos = next_if.end() |
| 172 |
| 173 content = str[content_start:end_if.start()] |
| 174 trailing = str[end_if.end():] |
| 175 |
| 176 if condition_satisfied: |
| 177 str = leading + CheckConditionalElements(content) + trailing |
| 178 else: |
| 179 str = leading + trailing |
| 180 |
| 181 def InlineFileContents(src_match, pattern, inlined_files=inlined_files): |
| 182 """Helper function to inline external script and css files""" |
| 183 filepath = GetFilepath(src_match) |
| 184 if filepath is None: |
| 185 return src_match.group(0) |
| 186 inlined_files.add(filepath) |
| 187 |
| 188 # Even if names_only is set, html files needs to be opened, because it |
| 189 # can link to images that need to be added to the file set. |
| 190 if names_only and not filepath.endswith('.html'): |
| 191 return "" |
| 192 |
| 193 return pattern % ReadFile(filepath) |
| 194 |
| 195 def InlineIncludeFiles(src_match): |
| 196 """Helper function to inline external script files""" |
| 197 return InlineFileContents(src_match, '%s') |
| 198 |
| 199 def InlineScript(src_match): |
| 200 """Helper function to inline external script files""" |
| 201 return InlineFileContents(src_match, '<script>%s</script>') |
| 202 |
| 203 def InlineCSSText(text, css_filepath): |
| 204 """Helper function that inlines external resources in CSS text""" |
| 205 filepath = os.path.dirname(css_filepath) |
| 206 return InlineCSSImages(text, filepath) |
| 207 |
| 208 def InlineCSSFile(src_match, inlined_files=inlined_files): |
| 209 """Helper function to inline external css files. |
| 210 |
| 211 Args: |
| 212 src_match: A regular expression match with a named group named "filename". |
| 213 |
| 214 Returns: |
| 215 The text that should replace the reference to the CSS file. |
| 216 """ |
| 217 filepath = GetFilepath(src_match) |
| 218 if filepath is None: |
| 219 return src_match.group(0) |
| 220 |
| 221 # Even if names_only is set, the CSS file needs to be opened, because it |
| 222 # can link to images that need to be added to the file set. |
| 223 inlined_files.add(filepath) |
| 224 # When resolving CSS files we need to pass in the path so that relative URLs |
| 225 # can be resolved. |
| 226 return '<style>%s</style>' % InlineCSSText(ReadFile(filepath), filepath) |
| 227 |
| 228 def InlineCSSImages(text, filepath=input_filepath): |
| 229 """Helper function that inlines external images in CSS backgrounds.""" |
| 230 # Replace contents of url() for css attributes: content, background, |
| 231 # or *-image. |
| 232 return re.sub('(?:content|background|[\w-]*-image):[ ]*' + |
| 233 'url\((?:\'|\")(?P<filename>[^"\'\)\(]*)(?:\'|\")', |
| 234 lambda m: SrcReplace(m, filepath), |
| 235 text) |
| 236 |
| 237 flat_text = ReadFile(input_filename) |
| 238 |
| 239 if not allow_external_script: |
| 240 # We need to inline css and js before we inline images so that image |
| 241 # references gets inlined in the css and js |
| 242 flat_text = re.sub('<script .*?src="(?P<filename>[^"\']*)".*?></script>', |
| 243 InlineScript, |
| 244 flat_text) |
| 245 |
| 246 flat_text = re.sub( |
| 247 '<link rel="stylesheet".+?href="(?P<filename>[^"]*)".*?>', |
| 248 InlineCSSFile, |
| 249 flat_text) |
| 250 |
| 251 flat_text = re.sub( |
| 252 '<include\s+src="(?P<filename>[^"\']*)".*>', |
| 253 InlineIncludeFiles, |
| 254 flat_text) |
| 255 |
| 256 # Check conditional elements, remove unsatisfied ones from the file. |
| 257 flat_text = CheckConditionalElements(flat_text) |
| 258 |
| 259 flat_text = re.sub('<(?!script)[^>]+?src="(?P<filename>[^"\']*)"', |
| 260 SrcReplace, |
| 261 flat_text) |
| 262 |
| 263 # TODO(arv): Only do this inside <style> tags. |
| 264 flat_text = InlineCSSImages(flat_text) |
| 265 |
| 266 flat_text = re.sub('<link rel="icon".+?href="(?P<filename>[^"\']*)"', |
| 267 SrcReplace, |
| 268 flat_text) |
| 269 |
| 270 if names_only: |
| 271 flat_text = None # Will contains garbage if the flag is set anyway. |
| 272 return InlinedData(flat_text, inlined_files) |
| 273 |
| 274 |
| 275 def InlineToString(input_filename, grd_node, allow_external_script=False): |
| 276 """Inlines the resources in a specified file and returns it as a string. |
| 277 |
| 278 Args: |
| 279 input_filename: name of file to read in |
| 280 grd_node: html node from the grd file for this include tag |
| 281 Returns: |
| 282 the inlined data as a string |
| 283 """ |
| 284 try: |
| 285 return DoInline(input_filename, |
| 286 grd_node, |
| 287 allow_external_script=allow_external_script).inlined_data |
| 288 except IOError, e: |
| 289 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % |
| 290 (e.filename, input_filename, e.strerror)) |
| 291 |
| 292 |
| 293 def InlineToFile(input_filename, output_filename, grd_node): |
| 294 """Inlines the resources in a specified file and writes it. |
| 295 |
| 296 Reads input_filename, finds all the src attributes and attempts to |
| 297 inline the files they are referring to, then writes the result |
| 298 to output_filename. |
| 299 |
| 300 Args: |
| 301 input_filename: name of file to read in |
| 302 output_filename: name of file to be written to |
| 303 grd_node: html node from the grd file for this include tag |
| 304 Returns: |
| 305 a set of filenames of all the inlined files |
| 306 """ |
| 307 inlined_data = InlineToString(input_filename, grd_node) |
| 308 out_file = open(output_filename, 'wb') |
| 309 out_file.writelines(inlined_data) |
| 310 out_file.close() |
| 311 |
| 312 |
| 313 def GetResourceFilenames(filename): |
| 314 """For a grd file, returns a set of all the files that would be inline.""" |
| 315 try: |
| 316 return DoInline(filename, None, names_only=True).inlined_files |
| 317 except IOError, e: |
| 318 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % |
| 319 (e.filename, filename, e.strerror)) |
| 320 |
| 321 |
| 322 def main(): |
| 323 if len(sys.argv) <= 2: |
| 324 print "Flattens a HTML file by inlining its external resources.\n" |
| 325 print "html_inline.py inputfile outputfile" |
| 326 else: |
| 327 InlineToFile(sys.argv[1], sys.argv[2], None) |
| 328 |
| 329 if __name__ == '__main__': |
| 330 main() |
OLD | NEW |