Index: grit/format/html_inline.py |
=================================================================== |
--- grit/format/html_inline.py (revision 0) |
+++ grit/format/html_inline.py (revision 0) |
@@ -0,0 +1,330 @@ |
+#!/usr/bin/python |
+# Copyright (c) 2011 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+"""Flattens a HTML file by inlining its external resources. |
+ |
+This is a small script that takes a HTML file, looks for src attributes |
+and inlines the specified file, producing one HTML file with no external |
+dependencies. |
+ |
+This does not inline anything referenced from an inlined file. |
+""" |
+ |
+import os |
+import re |
+import sys |
+import base64 |
+import mimetypes |
+ |
+from grit.node import base |
+ |
+DIST_DEFAULT = 'chromium' |
+DIST_ENV_VAR = 'CHROMIUM_BUILD' |
+DIST_SUBSTR = '%DISTRIBUTION%' |
+ |
+# Matches beginning of an "if" block with trailing spaces. |
+_BEGIN_IF_BLOCK = re.compile('<if [^>]*?expr="(?P<expression>[^"]*)"[^>]*?>\s*') |
+ |
+# Matches ending of an "if" block with preceding spaces. |
+_END_IF_BLOCK = re.compile('\s*</if>') |
+ |
+def ReadFile(input_filename): |
+ """Helper function that returns input_filename as a string. |
+ |
+ Args: |
+ input_filename: name of file to be read |
+ |
+ Returns: |
+ string |
+ """ |
+ f = open(input_filename, 'rb') |
+ file_contents = f.read() |
+ f.close() |
+ return file_contents |
+ |
+def SrcInlineAsDataURL( |
+ src_match, base_path, distribution, inlined_files, names_only=False): |
+ """regex replace function. |
+ |
+ Takes a regex match for src="filename", attempts to read the file |
+ at 'filename' and returns the src attribute with the file inlined |
+ as a data URI. If it finds DIST_SUBSTR string in file name, replaces |
+ it with distribution. |
+ |
+ Args: |
+ src_match: regex match object with 'filename' named capturing group |
+ base_path: path that to look for files in |
+ distribution: string that should replace DIST_SUBSTR |
+ inlined_files: The name of the opened file is appended to this list. |
+ names_only: If true, the function will not read the file but just return "". |
+ It will still add the filename to |inlined_files|. |
+ |
+ Returns: |
+ string |
+ """ |
+ filename = src_match.group('filename') |
+ |
+ if filename.find(':') != -1: |
+ # filename is probably a URL, which we don't want to bother inlining |
+ return src_match.group(0) |
+ |
+ filename = filename.replace('%DISTRIBUTION%', distribution) |
+ filepath = os.path.join(base_path, filename) |
+ inlined_files.add(filepath) |
+ |
+ if names_only: |
+ return "" |
+ |
+ mimetype = mimetypes.guess_type(filename)[0] or 'text/plain' |
+ inline_data = base64.standard_b64encode(ReadFile(filepath)) |
+ |
+ prefix = src_match.string[src_match.start():src_match.start('filename')-1] |
+ return "%s\"data:%s;base64,%s\"" % (prefix, mimetype, inline_data) |
+ |
+ |
+class InlinedData: |
+ """Helper class holding the results from DoInline(). |
+ |
+ Holds the inlined data and the set of filenames of all the inlined |
+ files. |
+ """ |
+ def __init__(self, inlined_data, inlined_files): |
+ self.inlined_data = inlined_data |
+ self.inlined_files = inlined_files |
+ |
+def DoInline( |
+ input_filename, grd_node, allow_external_script=False, names_only=False): |
+ """Helper function that inlines the resources in a specified file. |
+ |
+ Reads input_filename, finds all the src attributes and attempts to |
+ inline the files they are referring to, then returns the result and |
+ the set of inlined files. |
+ |
+ Args: |
+ input_filename: name of file to read in |
+ grd_node: html node from the grd file for this include tag |
+ names_only: |nil| will be returned for the inlined contents (faster). |
+ Returns: |
+ a tuple of the inlined data as a string and the set of filenames |
+ of all the inlined files |
+ """ |
+ input_filepath = os.path.dirname(input_filename) |
+ |
+ distribution = DIST_DEFAULT |
+ if DIST_ENV_VAR in os.environ.keys(): |
+ distribution = os.environ[DIST_ENV_VAR] |
+ if len(distribution) > 1 and distribution[0] == '_': |
+ distribution = distribution[1:].lower() |
+ |
+ # Keep track of all the files we inline. |
+ inlined_files = set() |
+ |
+ def SrcReplace(src_match, filepath=input_filepath, |
+ inlined_files=inlined_files): |
+ """Helper function to provide SrcInlineAsDataURL with the base file path""" |
+ return SrcInlineAsDataURL( |
+ src_match, filepath, distribution, inlined_files, names_only=names_only) |
+ |
+ def GetFilepath(src_match): |
+ filename = src_match.group('filename') |
+ |
+ if filename.find(':') != -1: |
+ # filename is probably a URL, which we don't want to bother inlining |
+ return None |
+ |
+ filename = filename.replace('%DISTRIBUTION%', distribution) |
+ return os.path.join(input_filepath, filename) |
+ |
+ def IsConditionSatisfied(src_match): |
+ expression = src_match.group('expression') |
+ return grd_node is None or grd_node.EvaluateCondition(expression) |
+ |
+ def CheckConditionalElements(str): |
+ """Helper function to conditionally inline inner elements""" |
+ while True: |
+ begin_if = _BEGIN_IF_BLOCK.search(str) |
+ if begin_if is None: |
+ return str |
+ |
+ condition_satisfied = IsConditionSatisfied(begin_if) |
+ leading = str[0:begin_if.start()] |
+ content_start = begin_if.end() |
+ |
+ # Find matching "if" block end. |
+ count = 1 |
+ pos = begin_if.end() |
+ while True: |
+ end_if = _END_IF_BLOCK.search(str, pos) |
+ if end_if is None: |
+ raise Exception('Unmatched <if>') |
+ |
+ next_if = _BEGIN_IF_BLOCK.search(str, pos) |
+ if next_if is None or next_if.start() >= end_if.end(): |
+ count = count - 1 |
+ if count == 0: |
+ break |
+ pos = end_if.end() |
+ else: |
+ count = count + 1 |
+ pos = next_if.end() |
+ |
+ content = str[content_start:end_if.start()] |
+ trailing = str[end_if.end():] |
+ |
+ if condition_satisfied: |
+ str = leading + CheckConditionalElements(content) + trailing |
+ else: |
+ str = leading + trailing |
+ |
+ def InlineFileContents(src_match, pattern, inlined_files=inlined_files): |
+ """Helper function to inline external script and css files""" |
+ filepath = GetFilepath(src_match) |
+ if filepath is None: |
+ return src_match.group(0) |
+ inlined_files.add(filepath) |
+ |
+ # Even if names_only is set, html files needs to be opened, because it |
+ # can link to images that need to be added to the file set. |
+ if names_only and not filepath.endswith('.html'): |
+ return "" |
+ |
+ return pattern % ReadFile(filepath) |
+ |
+ def InlineIncludeFiles(src_match): |
+ """Helper function to inline external script files""" |
+ return InlineFileContents(src_match, '%s') |
+ |
+ def InlineScript(src_match): |
+ """Helper function to inline external script files""" |
+ return InlineFileContents(src_match, '<script>%s</script>') |
+ |
+ def InlineCSSText(text, css_filepath): |
+ """Helper function that inlines external resources in CSS text""" |
+ filepath = os.path.dirname(css_filepath) |
+ return InlineCSSImages(text, filepath) |
+ |
+ def InlineCSSFile(src_match, inlined_files=inlined_files): |
+ """Helper function to inline external css files. |
+ |
+ Args: |
+ src_match: A regular expression match with a named group named "filename". |
+ |
+ Returns: |
+ The text that should replace the reference to the CSS file. |
+ """ |
+ filepath = GetFilepath(src_match) |
+ if filepath is None: |
+ return src_match.group(0) |
+ |
+ # Even if names_only is set, the CSS file needs to be opened, because it |
+ # can link to images that need to be added to the file set. |
+ inlined_files.add(filepath) |
+ # When resolving CSS files we need to pass in the path so that relative URLs |
+ # can be resolved. |
+ return '<style>%s</style>' % InlineCSSText(ReadFile(filepath), filepath) |
+ |
+ def InlineCSSImages(text, filepath=input_filepath): |
+ """Helper function that inlines external images in CSS backgrounds.""" |
+ # Replace contents of url() for css attributes: content, background, |
+ # or *-image. |
+ return re.sub('(?:content|background|[\w-]*-image):[ ]*' + |
+ 'url\((?:\'|\")(?P<filename>[^"\'\)\(]*)(?:\'|\")', |
+ lambda m: SrcReplace(m, filepath), |
+ text) |
+ |
+ flat_text = ReadFile(input_filename) |
+ |
+ if not allow_external_script: |
+ # We need to inline css and js before we inline images so that image |
+ # references gets inlined in the css and js |
+ flat_text = re.sub('<script .*?src="(?P<filename>[^"\']*)".*?></script>', |
+ InlineScript, |
+ flat_text) |
+ |
+ flat_text = re.sub( |
+ '<link rel="stylesheet".+?href="(?P<filename>[^"]*)".*?>', |
+ InlineCSSFile, |
+ flat_text) |
+ |
+ flat_text = re.sub( |
+ '<include\s+src="(?P<filename>[^"\']*)".*>', |
+ InlineIncludeFiles, |
+ flat_text) |
+ |
+ # Check conditional elements, remove unsatisfied ones from the file. |
+ flat_text = CheckConditionalElements(flat_text) |
+ |
+ flat_text = re.sub('<(?!script)[^>]+?src="(?P<filename>[^"\']*)"', |
+ SrcReplace, |
+ flat_text) |
+ |
+ # TODO(arv): Only do this inside <style> tags. |
+ flat_text = InlineCSSImages(flat_text) |
+ |
+ flat_text = re.sub('<link rel="icon".+?href="(?P<filename>[^"\']*)"', |
+ SrcReplace, |
+ flat_text) |
+ |
+ if names_only: |
+ flat_text = None # Will contains garbage if the flag is set anyway. |
+ return InlinedData(flat_text, inlined_files) |
+ |
+ |
+def InlineToString(input_filename, grd_node, allow_external_script=False): |
+ """Inlines the resources in a specified file and returns it as a string. |
+ |
+ Args: |
+ input_filename: name of file to read in |
+ grd_node: html node from the grd file for this include tag |
+ Returns: |
+ the inlined data as a string |
+ """ |
+ try: |
+ return DoInline(input_filename, |
+ grd_node, |
+ allow_external_script=allow_external_script).inlined_data |
+ except IOError, e: |
+ raise Exception("Failed to open %s while trying to flatten %s. (%s)" % |
+ (e.filename, input_filename, e.strerror)) |
+ |
+ |
+def InlineToFile(input_filename, output_filename, grd_node): |
+ """Inlines the resources in a specified file and writes it. |
+ |
+ Reads input_filename, finds all the src attributes and attempts to |
+ inline the files they are referring to, then writes the result |
+ to output_filename. |
+ |
+ Args: |
+ input_filename: name of file to read in |
+ output_filename: name of file to be written to |
+ grd_node: html node from the grd file for this include tag |
+ Returns: |
+ a set of filenames of all the inlined files |
+ """ |
+ inlined_data = InlineToString(input_filename, grd_node) |
+ out_file = open(output_filename, 'wb') |
+ out_file.writelines(inlined_data) |
+ out_file.close() |
+ |
+ |
+def GetResourceFilenames(filename): |
+ """For a grd file, returns a set of all the files that would be inline.""" |
+ try: |
+ return DoInline(filename, None, names_only=True).inlined_files |
+ except IOError, e: |
+ raise Exception("Failed to open %s while trying to flatten %s. (%s)" % |
+ (e.filename, filename, e.strerror)) |
+ |
+ |
+def main(): |
+ if len(sys.argv) <= 2: |
+ print "Flattens a HTML file by inlining its external resources.\n" |
+ print "html_inline.py inputfile outputfile" |
+ else: |
+ InlineToFile(sys.argv[1], sys.argv[2], None) |
+ |
+if __name__ == '__main__': |
+ main() |
Property changes on: grit/format/html_inline.py |
___________________________________________________________________ |
Added: svn:executable |
+ * |
Added: svn:eol-style |
+ LF |