| Index: grit/format/html_inline.py
|
| ===================================================================
|
| --- grit/format/html_inline.py (revision 202)
|
| +++ grit/format/html_inline.py (working copy)
|
| @@ -1,423 +0,0 @@
|
| -#!/usr/bin/env python
|
| -# Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
| -# Use of this source code is governed by a BSD-style license that can be
|
| -# found in the LICENSE file.
|
| -
|
| -"""Flattens a HTML file by inlining its external resources.
|
| -
|
| -This is a small script that takes a HTML file, looks for src attributes
|
| -and inlines the specified file, producing one HTML file with no external
|
| -dependencies. It recursively inlines the included files.
|
| -"""
|
| -
|
| -import os
|
| -import re
|
| -import sys
|
| -import base64
|
| -import mimetypes
|
| -
|
| -from grit import lazy_re
|
| -from grit import util
|
| -
|
| -# There is a python bug that makes mimetypes crash if the Windows
|
| -# registry contains non-Latin keys ( http://bugs.python.org/issue9291
|
| -# ). Initing manually and blocking external mime-type databases will
|
| -# prevent that bug and if we add svg manually, it will still give us
|
| -# the data we need.
|
| -mimetypes.init([])
|
| -mimetypes.add_type('image/svg+xml', '.svg')
|
| -
|
| -DIST_DEFAULT = 'chromium'
|
| -DIST_ENV_VAR = 'CHROMIUM_BUILD'
|
| -DIST_SUBSTR = '%DISTRIBUTION%'
|
| -
|
| -# Matches beginning of an "if" block with trailing spaces.
|
| -_BEGIN_IF_BLOCK = lazy_re.compile(
|
| - '<if [^>]*?expr="(?P<expression>[^"]*)"[^>]*?>\s*')
|
| -
|
| -# Matches ending of an "if" block with preceding spaces.
|
| -_END_IF_BLOCK = lazy_re.compile('\s*</if>')
|
| -
|
| -# Used by DoInline to replace various links with inline content.
|
| -_STYLESHEET_RE = lazy_re.compile(
|
| - '<link rel="stylesheet"[^>]+?href="(?P<filename>[^"]*)".*?>(\s*</link>)?',
|
| - re.DOTALL)
|
| -_INCLUDE_RE = lazy_re.compile(
|
| - '<include[^>]+?src="(?P<filename>[^"\']*)".*?>(\s*</include>)?',
|
| - re.DOTALL)
|
| -_SRC_RE = lazy_re.compile(
|
| - r'<(?!script)(?:[^>]+?\s)src=(?P<quote>")(?!\[\[|{{)(?P<filename>[^"\']*)\1',
|
| - re.MULTILINE)
|
| -_ICON_RE = lazy_re.compile(
|
| - r'<link rel="icon"\s(?:[^>]+?\s)?'
|
| - 'href=(?P<quote>")(?P<filename>[^"\']*)\1',
|
| - re.MULTILINE)
|
| -
|
| -
|
| -def GetDistribution():
|
| - """Helper function that gets the distribution we are building.
|
| -
|
| - Returns:
|
| - string
|
| - """
|
| - distribution = DIST_DEFAULT
|
| - if DIST_ENV_VAR in os.environ.keys():
|
| - distribution = os.environ[DIST_ENV_VAR]
|
| - if len(distribution) > 1 and distribution[0] == '_':
|
| - distribution = distribution[1:].lower()
|
| - return distribution
|
| -
|
| -
|
| -def SrcInlineAsDataURL(
|
| - src_match, base_path, distribution, inlined_files, names_only=False,
|
| - filename_expansion_function=None):
|
| - """regex replace function.
|
| -
|
| - Takes a regex match for src="filename", attempts to read the file
|
| - at 'filename' and returns the src attribute with the file inlined
|
| - as a data URI. If it finds DIST_SUBSTR string in file name, replaces
|
| - it with distribution.
|
| -
|
| - Args:
|
| - src_match: regex match object with 'filename' and 'quote' named capturing
|
| - groups
|
| - base_path: path that to look for files in
|
| - distribution: string that should replace DIST_SUBSTR
|
| - inlined_files: The name of the opened file is appended to this list.
|
| - names_only: If true, the function will not read the file but just return "".
|
| - It will still add the filename to |inlined_files|.
|
| -
|
| - Returns:
|
| - string
|
| - """
|
| - filename = src_match.group('filename')
|
| - if filename_expansion_function:
|
| - filename = filename_expansion_function(filename)
|
| - quote = src_match.group('quote')
|
| -
|
| - if filename.find(':') != -1:
|
| - # filename is probably a URL, which we don't want to bother inlining
|
| - return src_match.group(0)
|
| -
|
| - filename = filename.replace(DIST_SUBSTR , distribution)
|
| - filepath = os.path.normpath(os.path.join(base_path, filename))
|
| - inlined_files.add(filepath)
|
| -
|
| - if names_only:
|
| - return ""
|
| -
|
| - mimetype = mimetypes.guess_type(filename)[0]
|
| - if mimetype is None:
|
| - raise Exception('%s is of an an unknown type and '
|
| - 'cannot be stored in a data url.' % filename)
|
| - inline_data = base64.standard_b64encode(util.ReadFile(filepath, util.BINARY))
|
| -
|
| - prefix = src_match.string[src_match.start():src_match.start('filename')]
|
| - suffix = src_match.string[src_match.end('filename'):src_match.end()]
|
| - return '%sdata:%s;base64,%s%s' % (prefix, mimetype, inline_data, suffix)
|
| -
|
| -
|
| -class InlinedData:
|
| - """Helper class holding the results from DoInline().
|
| -
|
| - Holds the inlined data and the set of filenames of all the inlined
|
| - files.
|
| - """
|
| - def __init__(self, inlined_data, inlined_files):
|
| - self.inlined_data = inlined_data
|
| - self.inlined_files = inlined_files
|
| -
|
| -def DoInline(
|
| - input_filename, grd_node, allow_external_script=False, names_only=False,
|
| - rewrite_function=None, filename_expansion_function=None):
|
| - """Helper function that inlines the resources in a specified file.
|
| -
|
| - Reads input_filename, finds all the src attributes and attempts to
|
| - inline the files they are referring to, then returns the result and
|
| - the set of inlined files.
|
| -
|
| - Args:
|
| - input_filename: name of file to read in
|
| - grd_node: html node from the grd file for this include tag
|
| - names_only: |nil| will be returned for the inlined contents (faster).
|
| - rewrite_function: function(filepath, text, distribution) which will be
|
| - called to rewrite html content before inlining images.
|
| - filename_expansion_function: function(filename) which will be called to
|
| - rewrite filenames before attempting to read them.
|
| - Returns:
|
| - a tuple of the inlined data as a string and the set of filenames
|
| - of all the inlined files
|
| - """
|
| - if filename_expansion_function:
|
| - input_filename = filename_expansion_function(input_filename)
|
| - input_filepath = os.path.dirname(input_filename)
|
| - distribution = GetDistribution()
|
| -
|
| - # Keep track of all the files we inline.
|
| - inlined_files = set()
|
| -
|
| - def SrcReplace(src_match, filepath=input_filepath,
|
| - inlined_files=inlined_files):
|
| - """Helper function to provide SrcInlineAsDataURL with the base file path"""
|
| - return SrcInlineAsDataURL(
|
| - src_match, filepath, distribution, inlined_files, names_only=names_only,
|
| - filename_expansion_function=filename_expansion_function)
|
| -
|
| - def GetFilepath(src_match, base_path = input_filepath):
|
| - filename = src_match.group('filename')
|
| -
|
| - if filename.find(':') != -1:
|
| - # filename is probably a URL, which we don't want to bother inlining
|
| - return None
|
| -
|
| - filename = filename.replace('%DISTRIBUTION%', distribution)
|
| - if filename_expansion_function:
|
| - filename = filename_expansion_function(filename)
|
| - return os.path.normpath(os.path.join(base_path, filename))
|
| -
|
| - def IsConditionSatisfied(src_match):
|
| - expression = src_match.group('expression')
|
| - return grd_node is None or grd_node.EvaluateCondition(expression)
|
| -
|
| - def CheckConditionalElements(str):
|
| - """Helper function to conditionally inline inner elements"""
|
| - while True:
|
| - begin_if = _BEGIN_IF_BLOCK.search(str)
|
| - if begin_if is None:
|
| - if _END_IF_BLOCK.search(str) is not None:
|
| - raise Exception('Unmatched </if>')
|
| - return str
|
| -
|
| - condition_satisfied = IsConditionSatisfied(begin_if)
|
| - leading = str[0:begin_if.start()]
|
| - content_start = begin_if.end()
|
| -
|
| - # Find matching "if" block end.
|
| - count = 1
|
| - pos = begin_if.end()
|
| - while True:
|
| - end_if = _END_IF_BLOCK.search(str, pos)
|
| - if end_if is None:
|
| - raise Exception('Unmatched <if>')
|
| -
|
| - next_if = _BEGIN_IF_BLOCK.search(str, pos)
|
| - if next_if is None or next_if.start() >= end_if.end():
|
| - count = count - 1
|
| - if count == 0:
|
| - break
|
| - pos = end_if.end()
|
| - else:
|
| - count = count + 1
|
| - pos = next_if.end()
|
| -
|
| - content = str[content_start:end_if.start()]
|
| - trailing = str[end_if.end():]
|
| -
|
| - if condition_satisfied:
|
| - str = leading + CheckConditionalElements(content) + trailing
|
| - else:
|
| - str = leading + trailing
|
| -
|
| - def InlineFileContents(src_match, pattern, inlined_files=inlined_files):
|
| - """Helper function to inline external files of various types"""
|
| - filepath = GetFilepath(src_match)
|
| - if filepath is None:
|
| - return src_match.group(0)
|
| - inlined_files.add(filepath)
|
| -
|
| - if names_only:
|
| - inlined_files.update(GetResourceFilenames(
|
| - filepath,
|
| - allow_external_script,
|
| - rewrite_function,
|
| - filename_expansion_function=filename_expansion_function))
|
| - return ""
|
| -
|
| - return pattern % InlineToString(
|
| - filepath, grd_node, allow_external_script,
|
| - filename_expansion_function=filename_expansion_function)
|
| -
|
| - def InlineIncludeFiles(src_match):
|
| - """Helper function to directly inline generic external files (without
|
| - wrapping them with any kind of tags).
|
| - """
|
| - return InlineFileContents(src_match, '%s')
|
| -
|
| - def InlineScript(match):
|
| - """Helper function to inline external script files"""
|
| - attrs = (match.group('attrs1') + match.group('attrs2')).strip()
|
| - if attrs:
|
| - attrs = ' ' + attrs
|
| - return InlineFileContents(match, '<script' + attrs + '>%s</script>')
|
| -
|
| - def InlineCSSText(text, css_filepath):
|
| - """Helper function that inlines external resources in CSS text"""
|
| - filepath = os.path.dirname(css_filepath)
|
| - # Allow custom modifications before inlining images.
|
| - if rewrite_function:
|
| - text = rewrite_function(filepath, text, distribution)
|
| - text = InlineCSSImages(text, filepath)
|
| - return InlineCSSImports(text, filepath)
|
| -
|
| - def InlineCSSFile(src_match, pattern, base_path=input_filepath):
|
| - """Helper function to inline external CSS files.
|
| -
|
| - Args:
|
| - src_match: A regular expression match with a named group named "filename".
|
| - pattern: The pattern to replace with the contents of the CSS file.
|
| - base_path: The base path to use for resolving the CSS file.
|
| -
|
| - Returns:
|
| - The text that should replace the reference to the CSS file.
|
| - """
|
| - filepath = GetFilepath(src_match, base_path)
|
| - if filepath is None:
|
| - return src_match.group(0)
|
| -
|
| - # Even if names_only is set, the CSS file needs to be opened, because it
|
| - # can link to images that need to be added to the file set.
|
| - inlined_files.add(filepath)
|
| - # When resolving CSS files we need to pass in the path so that relative URLs
|
| - # can be resolved.
|
| - return pattern % InlineCSSText(util.ReadFile(filepath, util.BINARY),
|
| - filepath)
|
| -
|
| - def InlineCSSImages(text, filepath=input_filepath):
|
| - """Helper function that inlines external images in CSS backgrounds."""
|
| - # Replace contents of url() for css attributes: content, background,
|
| - # or *-image.
|
| - return re.sub('(content|background|[\w-]*-image):[^;]*' +
|
| - '(url\((?P<quote1>"|\'|)[^"\'()]*(?P=quote1)\)|' +
|
| - 'image-set\(' +
|
| - '([ ]*url\((?P<quote2>"|\'|)[^"\'()]*(?P=quote2)\)' +
|
| - '[ ]*[0-9.]*x[ ]*(,[ ]*)?)+\))',
|
| - lambda m: InlineCSSUrls(m, filepath),
|
| - text)
|
| -
|
| - def InlineCSSUrls(src_match, filepath=input_filepath):
|
| - """Helper function that inlines each url on a CSS image rule match."""
|
| - # Replace contents of url() references in matches.
|
| - return re.sub('url\((?P<quote>"|\'|)(?P<filename>[^"\'()]*)(?P=quote)\)',
|
| - lambda m: SrcReplace(m, filepath),
|
| - src_match.group(0))
|
| -
|
| - def InlineCSSImports(text, filepath=input_filepath):
|
| - """Helper function that inlines CSS files included via the @import
|
| - directive.
|
| - """
|
| - return re.sub('@import\s+url\((?P<quote>"|\'|)(?P<filename>[^"\'()]*)' +
|
| - '(?P=quote)\);',
|
| - lambda m: InlineCSSFile(m, '%s', filepath),
|
| - text)
|
| -
|
| -
|
| - flat_text = util.ReadFile(input_filename, util.BINARY)
|
| -
|
| - # Check conditional elements, remove unsatisfied ones from the file. We do
|
| - # this twice. The first pass is so that we don't even bother calling
|
| - # InlineScript, InlineCSSFile and InlineIncludeFiles on text we're eventually
|
| - # going to throw out anyway.
|
| - flat_text = CheckConditionalElements(flat_text)
|
| -
|
| - if not allow_external_script:
|
| - # We need to inline css and js before we inline images so that image
|
| - # references gets inlined in the css and js
|
| - flat_text = re.sub('<script (?P<attrs1>.*?)src="(?P<filename>[^"\']*)"' +
|
| - '(?P<attrs2>.*?)></script>',
|
| - InlineScript,
|
| - flat_text)
|
| -
|
| - flat_text = _STYLESHEET_RE.sub(
|
| - lambda m: InlineCSSFile(m, '<style>%s</style>'),
|
| - flat_text)
|
| -
|
| - flat_text = _INCLUDE_RE.sub(InlineIncludeFiles, flat_text)
|
| -
|
| - # Check conditional elements, second pass. This catches conditionals in any
|
| - # of the text we just inlined.
|
| - flat_text = CheckConditionalElements(flat_text)
|
| -
|
| - # Allow custom modifications before inlining images.
|
| - if rewrite_function:
|
| - flat_text = rewrite_function(input_filepath, flat_text, distribution)
|
| -
|
| - flat_text = _SRC_RE.sub(SrcReplace, flat_text)
|
| -
|
| - # TODO(arv): Only do this inside <style> tags.
|
| - flat_text = InlineCSSImages(flat_text)
|
| -
|
| - flat_text = _ICON_RE.sub(SrcReplace, flat_text)
|
| -
|
| - if names_only:
|
| - flat_text = None # Will contains garbage if the flag is set anyway.
|
| - return InlinedData(flat_text, inlined_files)
|
| -
|
| -
|
| -def InlineToString(input_filename, grd_node, allow_external_script=False,
|
| - rewrite_function=None, filename_expansion_function=None):
|
| - """Inlines the resources in a specified file and returns it as a string.
|
| -
|
| - Args:
|
| - input_filename: name of file to read in
|
| - grd_node: html node from the grd file for this include tag
|
| - Returns:
|
| - the inlined data as a string
|
| - """
|
| - try:
|
| - return DoInline(
|
| - input_filename,
|
| - grd_node,
|
| - allow_external_script=allow_external_script,
|
| - rewrite_function=rewrite_function,
|
| - filename_expansion_function=filename_expansion_function).inlined_data
|
| - except IOError, e:
|
| - raise Exception("Failed to open %s while trying to flatten %s. (%s)" %
|
| - (e.filename, input_filename, e.strerror))
|
| -
|
| -
|
| -def InlineToFile(input_filename, output_filename, grd_node):
|
| - """Inlines the resources in a specified file and writes it.
|
| -
|
| - Reads input_filename, finds all the src attributes and attempts to
|
| - inline the files they are referring to, then writes the result
|
| - to output_filename.
|
| -
|
| - Args:
|
| - input_filename: name of file to read in
|
| - output_filename: name of file to be written to
|
| - grd_node: html node from the grd file for this include tag
|
| - Returns:
|
| - a set of filenames of all the inlined files
|
| - """
|
| - inlined_data = InlineToString(input_filename, grd_node)
|
| - with open(output_filename, 'wb') as out_file:
|
| - out_file.writelines(inlined_data)
|
| -
|
| -
|
| -def GetResourceFilenames(filename,
|
| - allow_external_script=False,
|
| - rewrite_function=None,
|
| - filename_expansion_function=None):
|
| - """For a grd file, returns a set of all the files that would be inline."""
|
| - try:
|
| - return DoInline(
|
| - filename,
|
| - None,
|
| - names_only=True,
|
| - allow_external_script=allow_external_script,
|
| - rewrite_function=rewrite_function,
|
| - filename_expansion_function=filename_expansion_function).inlined_files
|
| - except IOError, e:
|
| - raise Exception("Failed to open %s while trying to flatten %s. (%s)" %
|
| - (e.filename, filename, e.strerror))
|
| -
|
| -
|
| -def main():
|
| - if len(sys.argv) <= 2:
|
| - print "Flattens a HTML file by inlining its external resources.\n"
|
| - print "html_inline.py inputfile outputfile"
|
| - else:
|
| - InlineToFile(sys.argv[1], sys.argv[2], None)
|
| -
|
| -if __name__ == '__main__':
|
| - main()
|
|
|