Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(405)

Unified Diff: grit/format/html_inline.py

Issue 1442863002: Remove contents of grit's SVN repository. (Closed) Base URL: http://grit-i18n.googlecode.com/svn/trunk/
Patch Set: Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « grit/format/data_pack_unittest.py ('k') | grit/format/html_inline_unittest.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: grit/format/html_inline.py
===================================================================
--- grit/format/html_inline.py (revision 202)
+++ grit/format/html_inline.py (working copy)
@@ -1,423 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) 2012 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Flattens a HTML file by inlining its external resources.
-
-This is a small script that takes a HTML file, looks for src attributes
-and inlines the specified file, producing one HTML file with no external
-dependencies. It recursively inlines the included files.
-"""
-
-import os
-import re
-import sys
-import base64
-import mimetypes
-
-from grit import lazy_re
-from grit import util
-
-# There is a python bug that makes mimetypes crash if the Windows
-# registry contains non-Latin keys ( http://bugs.python.org/issue9291
-# ). Initing manually and blocking external mime-type databases will
-# prevent that bug and if we add svg manually, it will still give us
-# the data we need.
-mimetypes.init([])
-mimetypes.add_type('image/svg+xml', '.svg')
-
-DIST_DEFAULT = 'chromium'
-DIST_ENV_VAR = 'CHROMIUM_BUILD'
-DIST_SUBSTR = '%DISTRIBUTION%'
-
-# Matches beginning of an "if" block with trailing spaces.
-_BEGIN_IF_BLOCK = lazy_re.compile(
- '<if [^>]*?expr="(?P<expression>[^"]*)"[^>]*?>\s*')
-
-# Matches ending of an "if" block with preceding spaces.
-_END_IF_BLOCK = lazy_re.compile('\s*</if>')
-
-# Used by DoInline to replace various links with inline content.
-_STYLESHEET_RE = lazy_re.compile(
- '<link rel="stylesheet"[^>]+?href="(?P<filename>[^"]*)".*?>(\s*</link>)?',
- re.DOTALL)
-_INCLUDE_RE = lazy_re.compile(
- '<include[^>]+?src="(?P<filename>[^"\']*)".*?>(\s*</include>)?',
- re.DOTALL)
-_SRC_RE = lazy_re.compile(
- r'<(?!script)(?:[^>]+?\s)src=(?P<quote>")(?!\[\[|{{)(?P<filename>[^"\']*)\1',
- re.MULTILINE)
-_ICON_RE = lazy_re.compile(
- r'<link rel="icon"\s(?:[^>]+?\s)?'
- 'href=(?P<quote>")(?P<filename>[^"\']*)\1',
- re.MULTILINE)
-
-
-def GetDistribution():
- """Helper function that gets the distribution we are building.
-
- Returns:
- string
- """
- distribution = DIST_DEFAULT
- if DIST_ENV_VAR in os.environ.keys():
- distribution = os.environ[DIST_ENV_VAR]
- if len(distribution) > 1 and distribution[0] == '_':
- distribution = distribution[1:].lower()
- return distribution
-
-
-def SrcInlineAsDataURL(
- src_match, base_path, distribution, inlined_files, names_only=False,
- filename_expansion_function=None):
- """regex replace function.
-
- Takes a regex match for src="filename", attempts to read the file
- at 'filename' and returns the src attribute with the file inlined
- as a data URI. If it finds DIST_SUBSTR string in file name, replaces
- it with distribution.
-
- Args:
- src_match: regex match object with 'filename' and 'quote' named capturing
- groups
- base_path: path that to look for files in
- distribution: string that should replace DIST_SUBSTR
- inlined_files: The name of the opened file is appended to this list.
- names_only: If true, the function will not read the file but just return "".
- It will still add the filename to |inlined_files|.
-
- Returns:
- string
- """
- filename = src_match.group('filename')
- if filename_expansion_function:
- filename = filename_expansion_function(filename)
- quote = src_match.group('quote')
-
- if filename.find(':') != -1:
- # filename is probably a URL, which we don't want to bother inlining
- return src_match.group(0)
-
- filename = filename.replace(DIST_SUBSTR , distribution)
- filepath = os.path.normpath(os.path.join(base_path, filename))
- inlined_files.add(filepath)
-
- if names_only:
- return ""
-
- mimetype = mimetypes.guess_type(filename)[0]
- if mimetype is None:
- raise Exception('%s is of an an unknown type and '
- 'cannot be stored in a data url.' % filename)
- inline_data = base64.standard_b64encode(util.ReadFile(filepath, util.BINARY))
-
- prefix = src_match.string[src_match.start():src_match.start('filename')]
- suffix = src_match.string[src_match.end('filename'):src_match.end()]
- return '%sdata:%s;base64,%s%s' % (prefix, mimetype, inline_data, suffix)
-
-
-class InlinedData:
- """Helper class holding the results from DoInline().
-
- Holds the inlined data and the set of filenames of all the inlined
- files.
- """
- def __init__(self, inlined_data, inlined_files):
- self.inlined_data = inlined_data
- self.inlined_files = inlined_files
-
-def DoInline(
- input_filename, grd_node, allow_external_script=False, names_only=False,
- rewrite_function=None, filename_expansion_function=None):
- """Helper function that inlines the resources in a specified file.
-
- Reads input_filename, finds all the src attributes and attempts to
- inline the files they are referring to, then returns the result and
- the set of inlined files.
-
- Args:
- input_filename: name of file to read in
- grd_node: html node from the grd file for this include tag
- names_only: |nil| will be returned for the inlined contents (faster).
- rewrite_function: function(filepath, text, distribution) which will be
- called to rewrite html content before inlining images.
- filename_expansion_function: function(filename) which will be called to
- rewrite filenames before attempting to read them.
- Returns:
- a tuple of the inlined data as a string and the set of filenames
- of all the inlined files
- """
- if filename_expansion_function:
- input_filename = filename_expansion_function(input_filename)
- input_filepath = os.path.dirname(input_filename)
- distribution = GetDistribution()
-
- # Keep track of all the files we inline.
- inlined_files = set()
-
- def SrcReplace(src_match, filepath=input_filepath,
- inlined_files=inlined_files):
- """Helper function to provide SrcInlineAsDataURL with the base file path"""
- return SrcInlineAsDataURL(
- src_match, filepath, distribution, inlined_files, names_only=names_only,
- filename_expansion_function=filename_expansion_function)
-
- def GetFilepath(src_match, base_path = input_filepath):
- filename = src_match.group('filename')
-
- if filename.find(':') != -1:
- # filename is probably a URL, which we don't want to bother inlining
- return None
-
- filename = filename.replace('%DISTRIBUTION%', distribution)
- if filename_expansion_function:
- filename = filename_expansion_function(filename)
- return os.path.normpath(os.path.join(base_path, filename))
-
- def IsConditionSatisfied(src_match):
- expression = src_match.group('expression')
- return grd_node is None or grd_node.EvaluateCondition(expression)
-
- def CheckConditionalElements(str):
- """Helper function to conditionally inline inner elements"""
- while True:
- begin_if = _BEGIN_IF_BLOCK.search(str)
- if begin_if is None:
- if _END_IF_BLOCK.search(str) is not None:
- raise Exception('Unmatched </if>')
- return str
-
- condition_satisfied = IsConditionSatisfied(begin_if)
- leading = str[0:begin_if.start()]
- content_start = begin_if.end()
-
- # Find matching "if" block end.
- count = 1
- pos = begin_if.end()
- while True:
- end_if = _END_IF_BLOCK.search(str, pos)
- if end_if is None:
- raise Exception('Unmatched <if>')
-
- next_if = _BEGIN_IF_BLOCK.search(str, pos)
- if next_if is None or next_if.start() >= end_if.end():
- count = count - 1
- if count == 0:
- break
- pos = end_if.end()
- else:
- count = count + 1
- pos = next_if.end()
-
- content = str[content_start:end_if.start()]
- trailing = str[end_if.end():]
-
- if condition_satisfied:
- str = leading + CheckConditionalElements(content) + trailing
- else:
- str = leading + trailing
-
- def InlineFileContents(src_match, pattern, inlined_files=inlined_files):
- """Helper function to inline external files of various types"""
- filepath = GetFilepath(src_match)
- if filepath is None:
- return src_match.group(0)
- inlined_files.add(filepath)
-
- if names_only:
- inlined_files.update(GetResourceFilenames(
- filepath,
- allow_external_script,
- rewrite_function,
- filename_expansion_function=filename_expansion_function))
- return ""
-
- return pattern % InlineToString(
- filepath, grd_node, allow_external_script,
- filename_expansion_function=filename_expansion_function)
-
- def InlineIncludeFiles(src_match):
- """Helper function to directly inline generic external files (without
- wrapping them with any kind of tags).
- """
- return InlineFileContents(src_match, '%s')
-
- def InlineScript(match):
- """Helper function to inline external script files"""
- attrs = (match.group('attrs1') + match.group('attrs2')).strip()
- if attrs:
- attrs = ' ' + attrs
- return InlineFileContents(match, '<script' + attrs + '>%s</script>')
-
- def InlineCSSText(text, css_filepath):
- """Helper function that inlines external resources in CSS text"""
- filepath = os.path.dirname(css_filepath)
- # Allow custom modifications before inlining images.
- if rewrite_function:
- text = rewrite_function(filepath, text, distribution)
- text = InlineCSSImages(text, filepath)
- return InlineCSSImports(text, filepath)
-
- def InlineCSSFile(src_match, pattern, base_path=input_filepath):
- """Helper function to inline external CSS files.
-
- Args:
- src_match: A regular expression match with a named group named "filename".
- pattern: The pattern to replace with the contents of the CSS file.
- base_path: The base path to use for resolving the CSS file.
-
- Returns:
- The text that should replace the reference to the CSS file.
- """
- filepath = GetFilepath(src_match, base_path)
- if filepath is None:
- return src_match.group(0)
-
- # Even if names_only is set, the CSS file needs to be opened, because it
- # can link to images that need to be added to the file set.
- inlined_files.add(filepath)
- # When resolving CSS files we need to pass in the path so that relative URLs
- # can be resolved.
- return pattern % InlineCSSText(util.ReadFile(filepath, util.BINARY),
- filepath)
-
- def InlineCSSImages(text, filepath=input_filepath):
- """Helper function that inlines external images in CSS backgrounds."""
- # Replace contents of url() for css attributes: content, background,
- # or *-image.
- return re.sub('(content|background|[\w-]*-image):[^;]*' +
- '(url\((?P<quote1>"|\'|)[^"\'()]*(?P=quote1)\)|' +
- 'image-set\(' +
- '([ ]*url\((?P<quote2>"|\'|)[^"\'()]*(?P=quote2)\)' +
- '[ ]*[0-9.]*x[ ]*(,[ ]*)?)+\))',
- lambda m: InlineCSSUrls(m, filepath),
- text)
-
- def InlineCSSUrls(src_match, filepath=input_filepath):
- """Helper function that inlines each url on a CSS image rule match."""
- # Replace contents of url() references in matches.
- return re.sub('url\((?P<quote>"|\'|)(?P<filename>[^"\'()]*)(?P=quote)\)',
- lambda m: SrcReplace(m, filepath),
- src_match.group(0))
-
- def InlineCSSImports(text, filepath=input_filepath):
- """Helper function that inlines CSS files included via the @import
- directive.
- """
- return re.sub('@import\s+url\((?P<quote>"|\'|)(?P<filename>[^"\'()]*)' +
- '(?P=quote)\);',
- lambda m: InlineCSSFile(m, '%s', filepath),
- text)
-
-
- flat_text = util.ReadFile(input_filename, util.BINARY)
-
- # Check conditional elements, remove unsatisfied ones from the file. We do
- # this twice. The first pass is so that we don't even bother calling
- # InlineScript, InlineCSSFile and InlineIncludeFiles on text we're eventually
- # going to throw out anyway.
- flat_text = CheckConditionalElements(flat_text)
-
- if not allow_external_script:
- # We need to inline css and js before we inline images so that image
- # references gets inlined in the css and js
- flat_text = re.sub('<script (?P<attrs1>.*?)src="(?P<filename>[^"\']*)"' +
- '(?P<attrs2>.*?)></script>',
- InlineScript,
- flat_text)
-
- flat_text = _STYLESHEET_RE.sub(
- lambda m: InlineCSSFile(m, '<style>%s</style>'),
- flat_text)
-
- flat_text = _INCLUDE_RE.sub(InlineIncludeFiles, flat_text)
-
- # Check conditional elements, second pass. This catches conditionals in any
- # of the text we just inlined.
- flat_text = CheckConditionalElements(flat_text)
-
- # Allow custom modifications before inlining images.
- if rewrite_function:
- flat_text = rewrite_function(input_filepath, flat_text, distribution)
-
- flat_text = _SRC_RE.sub(SrcReplace, flat_text)
-
- # TODO(arv): Only do this inside <style> tags.
- flat_text = InlineCSSImages(flat_text)
-
- flat_text = _ICON_RE.sub(SrcReplace, flat_text)
-
- if names_only:
- flat_text = None # Will contains garbage if the flag is set anyway.
- return InlinedData(flat_text, inlined_files)
-
-
-def InlineToString(input_filename, grd_node, allow_external_script=False,
- rewrite_function=None, filename_expansion_function=None):
- """Inlines the resources in a specified file and returns it as a string.
-
- Args:
- input_filename: name of file to read in
- grd_node: html node from the grd file for this include tag
- Returns:
- the inlined data as a string
- """
- try:
- return DoInline(
- input_filename,
- grd_node,
- allow_external_script=allow_external_script,
- rewrite_function=rewrite_function,
- filename_expansion_function=filename_expansion_function).inlined_data
- except IOError, e:
- raise Exception("Failed to open %s while trying to flatten %s. (%s)" %
- (e.filename, input_filename, e.strerror))
-
-
-def InlineToFile(input_filename, output_filename, grd_node):
- """Inlines the resources in a specified file and writes it.
-
- Reads input_filename, finds all the src attributes and attempts to
- inline the files they are referring to, then writes the result
- to output_filename.
-
- Args:
- input_filename: name of file to read in
- output_filename: name of file to be written to
- grd_node: html node from the grd file for this include tag
- Returns:
- a set of filenames of all the inlined files
- """
- inlined_data = InlineToString(input_filename, grd_node)
- with open(output_filename, 'wb') as out_file:
- out_file.writelines(inlined_data)
-
-
-def GetResourceFilenames(filename,
- allow_external_script=False,
- rewrite_function=None,
- filename_expansion_function=None):
- """For a grd file, returns a set of all the files that would be inline."""
- try:
- return DoInline(
- filename,
- None,
- names_only=True,
- allow_external_script=allow_external_script,
- rewrite_function=rewrite_function,
- filename_expansion_function=filename_expansion_function).inlined_files
- except IOError, e:
- raise Exception("Failed to open %s while trying to flatten %s. (%s)" %
- (e.filename, filename, e.strerror))
-
-
-def main():
- if len(sys.argv) <= 2:
- print "Flattens a HTML file by inlining its external resources.\n"
- print "html_inline.py inputfile outputfile"
- else:
- InlineToFile(sys.argv[1], sys.argv[2], None)
-
-if __name__ == '__main__':
- main()
« no previous file with comments | « grit/format/data_pack_unittest.py ('k') | grit/format/html_inline_unittest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698