Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(108)

Side by Side Diff: grit/format/html_inline.py

Issue 1442863002: Remove contents of grit's SVN repository. (Closed) Base URL: http://grit-i18n.googlecode.com/svn/trunk/
Patch Set: Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « grit/format/data_pack_unittest.py ('k') | grit/format/html_inline_unittest.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Flattens a HTML file by inlining its external resources.
7
8 This is a small script that takes a HTML file, looks for src attributes
9 and inlines the specified file, producing one HTML file with no external
10 dependencies. It recursively inlines the included files.
11 """
12
13 import os
14 import re
15 import sys
16 import base64
17 import mimetypes
18
19 from grit import lazy_re
20 from grit import util
21
22 # There is a python bug that makes mimetypes crash if the Windows
23 # registry contains non-Latin keys ( http://bugs.python.org/issue9291
24 # ). Initing manually and blocking external mime-type databases will
25 # prevent that bug and if we add svg manually, it will still give us
26 # the data we need.
27 mimetypes.init([])
28 mimetypes.add_type('image/svg+xml', '.svg')
29
30 DIST_DEFAULT = 'chromium'
31 DIST_ENV_VAR = 'CHROMIUM_BUILD'
32 DIST_SUBSTR = '%DISTRIBUTION%'
33
34 # Matches beginning of an "if" block with trailing spaces.
35 _BEGIN_IF_BLOCK = lazy_re.compile(
36 '<if [^>]*?expr="(?P<expression>[^"]*)"[^>]*?>\s*')
37
38 # Matches ending of an "if" block with preceding spaces.
39 _END_IF_BLOCK = lazy_re.compile('\s*</if>')
40
41 # Used by DoInline to replace various links with inline content.
42 _STYLESHEET_RE = lazy_re.compile(
43 '<link rel="stylesheet"[^>]+?href="(?P<filename>[^"]*)".*?>(\s*</link>)?',
44 re.DOTALL)
45 _INCLUDE_RE = lazy_re.compile(
46 '<include[^>]+?src="(?P<filename>[^"\']*)".*?>(\s*</include>)?',
47 re.DOTALL)
48 _SRC_RE = lazy_re.compile(
49 r'<(?!script)(?:[^>]+?\s)src=(?P<quote>")(?!\[\[|{{)(?P<filename>[^"\']*)\1' ,
50 re.MULTILINE)
51 _ICON_RE = lazy_re.compile(
52 r'<link rel="icon"\s(?:[^>]+?\s)?'
53 'href=(?P<quote>")(?P<filename>[^"\']*)\1',
54 re.MULTILINE)
55
56
57 def GetDistribution():
58 """Helper function that gets the distribution we are building.
59
60 Returns:
61 string
62 """
63 distribution = DIST_DEFAULT
64 if DIST_ENV_VAR in os.environ.keys():
65 distribution = os.environ[DIST_ENV_VAR]
66 if len(distribution) > 1 and distribution[0] == '_':
67 distribution = distribution[1:].lower()
68 return distribution
69
70
71 def SrcInlineAsDataURL(
72 src_match, base_path, distribution, inlined_files, names_only=False,
73 filename_expansion_function=None):
74 """regex replace function.
75
76 Takes a regex match for src="filename", attempts to read the file
77 at 'filename' and returns the src attribute with the file inlined
78 as a data URI. If it finds DIST_SUBSTR string in file name, replaces
79 it with distribution.
80
81 Args:
82 src_match: regex match object with 'filename' and 'quote' named capturing
83 groups
84 base_path: path that to look for files in
85 distribution: string that should replace DIST_SUBSTR
86 inlined_files: The name of the opened file is appended to this list.
87 names_only: If true, the function will not read the file but just return "".
88 It will still add the filename to |inlined_files|.
89
90 Returns:
91 string
92 """
93 filename = src_match.group('filename')
94 if filename_expansion_function:
95 filename = filename_expansion_function(filename)
96 quote = src_match.group('quote')
97
98 if filename.find(':') != -1:
99 # filename is probably a URL, which we don't want to bother inlining
100 return src_match.group(0)
101
102 filename = filename.replace(DIST_SUBSTR , distribution)
103 filepath = os.path.normpath(os.path.join(base_path, filename))
104 inlined_files.add(filepath)
105
106 if names_only:
107 return ""
108
109 mimetype = mimetypes.guess_type(filename)[0]
110 if mimetype is None:
111 raise Exception('%s is of an an unknown type and '
112 'cannot be stored in a data url.' % filename)
113 inline_data = base64.standard_b64encode(util.ReadFile(filepath, util.BINARY))
114
115 prefix = src_match.string[src_match.start():src_match.start('filename')]
116 suffix = src_match.string[src_match.end('filename'):src_match.end()]
117 return '%sdata:%s;base64,%s%s' % (prefix, mimetype, inline_data, suffix)
118
119
120 class InlinedData:
121 """Helper class holding the results from DoInline().
122
123 Holds the inlined data and the set of filenames of all the inlined
124 files.
125 """
126 def __init__(self, inlined_data, inlined_files):
127 self.inlined_data = inlined_data
128 self.inlined_files = inlined_files
129
130 def DoInline(
131 input_filename, grd_node, allow_external_script=False, names_only=False,
132 rewrite_function=None, filename_expansion_function=None):
133 """Helper function that inlines the resources in a specified file.
134
135 Reads input_filename, finds all the src attributes and attempts to
136 inline the files they are referring to, then returns the result and
137 the set of inlined files.
138
139 Args:
140 input_filename: name of file to read in
141 grd_node: html node from the grd file for this include tag
142 names_only: |nil| will be returned for the inlined contents (faster).
143 rewrite_function: function(filepath, text, distribution) which will be
144 called to rewrite html content before inlining images.
145 filename_expansion_function: function(filename) which will be called to
146 rewrite filenames before attempting to read them.
147 Returns:
148 a tuple of the inlined data as a string and the set of filenames
149 of all the inlined files
150 """
151 if filename_expansion_function:
152 input_filename = filename_expansion_function(input_filename)
153 input_filepath = os.path.dirname(input_filename)
154 distribution = GetDistribution()
155
156 # Keep track of all the files we inline.
157 inlined_files = set()
158
159 def SrcReplace(src_match, filepath=input_filepath,
160 inlined_files=inlined_files):
161 """Helper function to provide SrcInlineAsDataURL with the base file path"""
162 return SrcInlineAsDataURL(
163 src_match, filepath, distribution, inlined_files, names_only=names_only,
164 filename_expansion_function=filename_expansion_function)
165
166 def GetFilepath(src_match, base_path = input_filepath):
167 filename = src_match.group('filename')
168
169 if filename.find(':') != -1:
170 # filename is probably a URL, which we don't want to bother inlining
171 return None
172
173 filename = filename.replace('%DISTRIBUTION%', distribution)
174 if filename_expansion_function:
175 filename = filename_expansion_function(filename)
176 return os.path.normpath(os.path.join(base_path, filename))
177
178 def IsConditionSatisfied(src_match):
179 expression = src_match.group('expression')
180 return grd_node is None or grd_node.EvaluateCondition(expression)
181
182 def CheckConditionalElements(str):
183 """Helper function to conditionally inline inner elements"""
184 while True:
185 begin_if = _BEGIN_IF_BLOCK.search(str)
186 if begin_if is None:
187 if _END_IF_BLOCK.search(str) is not None:
188 raise Exception('Unmatched </if>')
189 return str
190
191 condition_satisfied = IsConditionSatisfied(begin_if)
192 leading = str[0:begin_if.start()]
193 content_start = begin_if.end()
194
195 # Find matching "if" block end.
196 count = 1
197 pos = begin_if.end()
198 while True:
199 end_if = _END_IF_BLOCK.search(str, pos)
200 if end_if is None:
201 raise Exception('Unmatched <if>')
202
203 next_if = _BEGIN_IF_BLOCK.search(str, pos)
204 if next_if is None or next_if.start() >= end_if.end():
205 count = count - 1
206 if count == 0:
207 break
208 pos = end_if.end()
209 else:
210 count = count + 1
211 pos = next_if.end()
212
213 content = str[content_start:end_if.start()]
214 trailing = str[end_if.end():]
215
216 if condition_satisfied:
217 str = leading + CheckConditionalElements(content) + trailing
218 else:
219 str = leading + trailing
220
221 def InlineFileContents(src_match, pattern, inlined_files=inlined_files):
222 """Helper function to inline external files of various types"""
223 filepath = GetFilepath(src_match)
224 if filepath is None:
225 return src_match.group(0)
226 inlined_files.add(filepath)
227
228 if names_only:
229 inlined_files.update(GetResourceFilenames(
230 filepath,
231 allow_external_script,
232 rewrite_function,
233 filename_expansion_function=filename_expansion_function))
234 return ""
235
236 return pattern % InlineToString(
237 filepath, grd_node, allow_external_script,
238 filename_expansion_function=filename_expansion_function)
239
240 def InlineIncludeFiles(src_match):
241 """Helper function to directly inline generic external files (without
242 wrapping them with any kind of tags).
243 """
244 return InlineFileContents(src_match, '%s')
245
246 def InlineScript(match):
247 """Helper function to inline external script files"""
248 attrs = (match.group('attrs1') + match.group('attrs2')).strip()
249 if attrs:
250 attrs = ' ' + attrs
251 return InlineFileContents(match, '<script' + attrs + '>%s</script>')
252
253 def InlineCSSText(text, css_filepath):
254 """Helper function that inlines external resources in CSS text"""
255 filepath = os.path.dirname(css_filepath)
256 # Allow custom modifications before inlining images.
257 if rewrite_function:
258 text = rewrite_function(filepath, text, distribution)
259 text = InlineCSSImages(text, filepath)
260 return InlineCSSImports(text, filepath)
261
262 def InlineCSSFile(src_match, pattern, base_path=input_filepath):
263 """Helper function to inline external CSS files.
264
265 Args:
266 src_match: A regular expression match with a named group named "filename".
267 pattern: The pattern to replace with the contents of the CSS file.
268 base_path: The base path to use for resolving the CSS file.
269
270 Returns:
271 The text that should replace the reference to the CSS file.
272 """
273 filepath = GetFilepath(src_match, base_path)
274 if filepath is None:
275 return src_match.group(0)
276
277 # Even if names_only is set, the CSS file needs to be opened, because it
278 # can link to images that need to be added to the file set.
279 inlined_files.add(filepath)
280 # When resolving CSS files we need to pass in the path so that relative URLs
281 # can be resolved.
282 return pattern % InlineCSSText(util.ReadFile(filepath, util.BINARY),
283 filepath)
284
285 def InlineCSSImages(text, filepath=input_filepath):
286 """Helper function that inlines external images in CSS backgrounds."""
287 # Replace contents of url() for css attributes: content, background,
288 # or *-image.
289 return re.sub('(content|background|[\w-]*-image):[^;]*' +
290 '(url\((?P<quote1>"|\'|)[^"\'()]*(?P=quote1)\)|' +
291 'image-set\(' +
292 '([ ]*url\((?P<quote2>"|\'|)[^"\'()]*(?P=quote2)\)' +
293 '[ ]*[0-9.]*x[ ]*(,[ ]*)?)+\))',
294 lambda m: InlineCSSUrls(m, filepath),
295 text)
296
297 def InlineCSSUrls(src_match, filepath=input_filepath):
298 """Helper function that inlines each url on a CSS image rule match."""
299 # Replace contents of url() references in matches.
300 return re.sub('url\((?P<quote>"|\'|)(?P<filename>[^"\'()]*)(?P=quote)\)',
301 lambda m: SrcReplace(m, filepath),
302 src_match.group(0))
303
304 def InlineCSSImports(text, filepath=input_filepath):
305 """Helper function that inlines CSS files included via the @import
306 directive.
307 """
308 return re.sub('@import\s+url\((?P<quote>"|\'|)(?P<filename>[^"\'()]*)' +
309 '(?P=quote)\);',
310 lambda m: InlineCSSFile(m, '%s', filepath),
311 text)
312
313
314 flat_text = util.ReadFile(input_filename, util.BINARY)
315
316 # Check conditional elements, remove unsatisfied ones from the file. We do
317 # this twice. The first pass is so that we don't even bother calling
318 # InlineScript, InlineCSSFile and InlineIncludeFiles on text we're eventually
319 # going to throw out anyway.
320 flat_text = CheckConditionalElements(flat_text)
321
322 if not allow_external_script:
323 # We need to inline css and js before we inline images so that image
324 # references gets inlined in the css and js
325 flat_text = re.sub('<script (?P<attrs1>.*?)src="(?P<filename>[^"\']*)"' +
326 '(?P<attrs2>.*?)></script>',
327 InlineScript,
328 flat_text)
329
330 flat_text = _STYLESHEET_RE.sub(
331 lambda m: InlineCSSFile(m, '<style>%s</style>'),
332 flat_text)
333
334 flat_text = _INCLUDE_RE.sub(InlineIncludeFiles, flat_text)
335
336 # Check conditional elements, second pass. This catches conditionals in any
337 # of the text we just inlined.
338 flat_text = CheckConditionalElements(flat_text)
339
340 # Allow custom modifications before inlining images.
341 if rewrite_function:
342 flat_text = rewrite_function(input_filepath, flat_text, distribution)
343
344 flat_text = _SRC_RE.sub(SrcReplace, flat_text)
345
346 # TODO(arv): Only do this inside <style> tags.
347 flat_text = InlineCSSImages(flat_text)
348
349 flat_text = _ICON_RE.sub(SrcReplace, flat_text)
350
351 if names_only:
352 flat_text = None # Will contains garbage if the flag is set anyway.
353 return InlinedData(flat_text, inlined_files)
354
355
356 def InlineToString(input_filename, grd_node, allow_external_script=False,
357 rewrite_function=None, filename_expansion_function=None):
358 """Inlines the resources in a specified file and returns it as a string.
359
360 Args:
361 input_filename: name of file to read in
362 grd_node: html node from the grd file for this include tag
363 Returns:
364 the inlined data as a string
365 """
366 try:
367 return DoInline(
368 input_filename,
369 grd_node,
370 allow_external_script=allow_external_script,
371 rewrite_function=rewrite_function,
372 filename_expansion_function=filename_expansion_function).inlined_data
373 except IOError, e:
374 raise Exception("Failed to open %s while trying to flatten %s. (%s)" %
375 (e.filename, input_filename, e.strerror))
376
377
378 def InlineToFile(input_filename, output_filename, grd_node):
379 """Inlines the resources in a specified file and writes it.
380
381 Reads input_filename, finds all the src attributes and attempts to
382 inline the files they are referring to, then writes the result
383 to output_filename.
384
385 Args:
386 input_filename: name of file to read in
387 output_filename: name of file to be written to
388 grd_node: html node from the grd file for this include tag
389 Returns:
390 a set of filenames of all the inlined files
391 """
392 inlined_data = InlineToString(input_filename, grd_node)
393 with open(output_filename, 'wb') as out_file:
394 out_file.writelines(inlined_data)
395
396
397 def GetResourceFilenames(filename,
398 allow_external_script=False,
399 rewrite_function=None,
400 filename_expansion_function=None):
401 """For a grd file, returns a set of all the files that would be inline."""
402 try:
403 return DoInline(
404 filename,
405 None,
406 names_only=True,
407 allow_external_script=allow_external_script,
408 rewrite_function=rewrite_function,
409 filename_expansion_function=filename_expansion_function).inlined_files
410 except IOError, e:
411 raise Exception("Failed to open %s while trying to flatten %s. (%s)" %
412 (e.filename, filename, e.strerror))
413
414
415 def main():
416 if len(sys.argv) <= 2:
417 print "Flattens a HTML file by inlining its external resources.\n"
418 print "html_inline.py inputfile outputfile"
419 else:
420 InlineToFile(sys.argv[1], sys.argv[2], None)
421
422 if __name__ == '__main__':
423 main()
OLDNEW
« no previous file with comments | « grit/format/data_pack_unittest.py ('k') | grit/format/html_inline_unittest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698