Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(257)

Side by Side Diff: grit/format/html_inline.py

Issue 7994004: Initial source commit to grit-i18n project. (Closed) Base URL: http://grit-i18n.googlecode.com/svn/trunk/
Patch Set: Created 9 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « grit/format/data_pack_unittest.py ('k') | grit/format/interface.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Property Changes:
Added: svn:executable
+ *
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 #!/usr/bin/python
2 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Flattens a HTML file by inlining its external resources.
7
8 This is a small script that takes a HTML file, looks for src attributes
9 and inlines the specified file, producing one HTML file with no external
10 dependencies.
11
12 This does not inline anything referenced from an inlined file.
13 """
14
15 import os
16 import re
17 import sys
18 import base64
19 import mimetypes
20
21 from grit.node import base
22
23 DIST_DEFAULT = 'chromium'
24 DIST_ENV_VAR = 'CHROMIUM_BUILD'
25 DIST_SUBSTR = '%DISTRIBUTION%'
26
27 # Matches beginning of an "if" block with trailing spaces.
28 _BEGIN_IF_BLOCK = re.compile('<if [^>]*?expr="(?P<expression>[^"]*)"[^>]*?>\s*')
29
30 # Matches ending of an "if" block with preceding spaces.
31 _END_IF_BLOCK = re.compile('\s*</if>')
32
33 def ReadFile(input_filename):
34 """Helper function that returns input_filename as a string.
35
36 Args:
37 input_filename: name of file to be read
38
39 Returns:
40 string
41 """
42 f = open(input_filename, 'rb')
43 file_contents = f.read()
44 f.close()
45 return file_contents
46
47 def SrcInlineAsDataURL(
48 src_match, base_path, distribution, inlined_files, names_only=False):
49 """regex replace function.
50
51 Takes a regex match for src="filename", attempts to read the file
52 at 'filename' and returns the src attribute with the file inlined
53 as a data URI. If it finds DIST_SUBSTR string in file name, replaces
54 it with distribution.
55
56 Args:
57 src_match: regex match object with 'filename' named capturing group
58 base_path: path that to look for files in
59 distribution: string that should replace DIST_SUBSTR
60 inlined_files: The name of the opened file is appended to this list.
61 names_only: If true, the function will not read the file but just return "".
62 It will still add the filename to |inlined_files|.
63
64 Returns:
65 string
66 """
67 filename = src_match.group('filename')
68
69 if filename.find(':') != -1:
70 # filename is probably a URL, which we don't want to bother inlining
71 return src_match.group(0)
72
73 filename = filename.replace('%DISTRIBUTION%', distribution)
74 filepath = os.path.join(base_path, filename)
75 inlined_files.add(filepath)
76
77 if names_only:
78 return ""
79
80 mimetype = mimetypes.guess_type(filename)[0] or 'text/plain'
81 inline_data = base64.standard_b64encode(ReadFile(filepath))
82
83 prefix = src_match.string[src_match.start():src_match.start('filename')-1]
84 return "%s\"data:%s;base64,%s\"" % (prefix, mimetype, inline_data)
85
86
87 class InlinedData:
88 """Helper class holding the results from DoInline().
89
90 Holds the inlined data and the set of filenames of all the inlined
91 files.
92 """
93 def __init__(self, inlined_data, inlined_files):
94 self.inlined_data = inlined_data
95 self.inlined_files = inlined_files
96
97 def DoInline(
98 input_filename, grd_node, allow_external_script=False, names_only=False):
99 """Helper function that inlines the resources in a specified file.
100
101 Reads input_filename, finds all the src attributes and attempts to
102 inline the files they are referring to, then returns the result and
103 the set of inlined files.
104
105 Args:
106 input_filename: name of file to read in
107 grd_node: html node from the grd file for this include tag
108 names_only: |nil| will be returned for the inlined contents (faster).
109 Returns:
110 a tuple of the inlined data as a string and the set of filenames
111 of all the inlined files
112 """
113 input_filepath = os.path.dirname(input_filename)
114
115 distribution = DIST_DEFAULT
116 if DIST_ENV_VAR in os.environ.keys():
117 distribution = os.environ[DIST_ENV_VAR]
118 if len(distribution) > 1 and distribution[0] == '_':
119 distribution = distribution[1:].lower()
120
121 # Keep track of all the files we inline.
122 inlined_files = set()
123
124 def SrcReplace(src_match, filepath=input_filepath,
125 inlined_files=inlined_files):
126 """Helper function to provide SrcInlineAsDataURL with the base file path"""
127 return SrcInlineAsDataURL(
128 src_match, filepath, distribution, inlined_files, names_only=names_only)
129
130 def GetFilepath(src_match):
131 filename = src_match.group('filename')
132
133 if filename.find(':') != -1:
134 # filename is probably a URL, which we don't want to bother inlining
135 return None
136
137 filename = filename.replace('%DISTRIBUTION%', distribution)
138 return os.path.join(input_filepath, filename)
139
140 def IsConditionSatisfied(src_match):
141 expression = src_match.group('expression')
142 return grd_node is None or grd_node.EvaluateCondition(expression)
143
144 def CheckConditionalElements(str):
145 """Helper function to conditionally inline inner elements"""
146 while True:
147 begin_if = _BEGIN_IF_BLOCK.search(str)
148 if begin_if is None:
149 return str
150
151 condition_satisfied = IsConditionSatisfied(begin_if)
152 leading = str[0:begin_if.start()]
153 content_start = begin_if.end()
154
155 # Find matching "if" block end.
156 count = 1
157 pos = begin_if.end()
158 while True:
159 end_if = _END_IF_BLOCK.search(str, pos)
160 if end_if is None:
161 raise Exception('Unmatched <if>')
162
163 next_if = _BEGIN_IF_BLOCK.search(str, pos)
164 if next_if is None or next_if.start() >= end_if.end():
165 count = count - 1
166 if count == 0:
167 break
168 pos = end_if.end()
169 else:
170 count = count + 1
171 pos = next_if.end()
172
173 content = str[content_start:end_if.start()]
174 trailing = str[end_if.end():]
175
176 if condition_satisfied:
177 str = leading + CheckConditionalElements(content) + trailing
178 else:
179 str = leading + trailing
180
181 def InlineFileContents(src_match, pattern, inlined_files=inlined_files):
182 """Helper function to inline external script and css files"""
183 filepath = GetFilepath(src_match)
184 if filepath is None:
185 return src_match.group(0)
186 inlined_files.add(filepath)
187
188 # Even if names_only is set, html files needs to be opened, because it
189 # can link to images that need to be added to the file set.
190 if names_only and not filepath.endswith('.html'):
191 return ""
192
193 return pattern % ReadFile(filepath)
194
195 def InlineIncludeFiles(src_match):
196 """Helper function to inline external script files"""
197 return InlineFileContents(src_match, '%s')
198
199 def InlineScript(src_match):
200 """Helper function to inline external script files"""
201 return InlineFileContents(src_match, '<script>%s</script>')
202
203 def InlineCSSText(text, css_filepath):
204 """Helper function that inlines external resources in CSS text"""
205 filepath = os.path.dirname(css_filepath)
206 return InlineCSSImages(text, filepath)
207
208 def InlineCSSFile(src_match, inlined_files=inlined_files):
209 """Helper function to inline external css files.
210
211 Args:
212 src_match: A regular expression match with a named group named "filename".
213
214 Returns:
215 The text that should replace the reference to the CSS file.
216 """
217 filepath = GetFilepath(src_match)
218 if filepath is None:
219 return src_match.group(0)
220
221 # Even if names_only is set, the CSS file needs to be opened, because it
222 # can link to images that need to be added to the file set.
223 inlined_files.add(filepath)
224 # When resolving CSS files we need to pass in the path so that relative URLs
225 # can be resolved.
226 return '<style>%s</style>' % InlineCSSText(ReadFile(filepath), filepath)
227
228 def InlineCSSImages(text, filepath=input_filepath):
229 """Helper function that inlines external images in CSS backgrounds."""
230 # Replace contents of url() for css attributes: content, background,
231 # or *-image.
232 return re.sub('(?:content|background|[\w-]*-image):[ ]*' +
233 'url\((?:\'|\")(?P<filename>[^"\'\)\(]*)(?:\'|\")',
234 lambda m: SrcReplace(m, filepath),
235 text)
236
237 flat_text = ReadFile(input_filename)
238
239 if not allow_external_script:
240 # We need to inline css and js before we inline images so that image
241 # references gets inlined in the css and js
242 flat_text = re.sub('<script .*?src="(?P<filename>[^"\']*)".*?></script>',
243 InlineScript,
244 flat_text)
245
246 flat_text = re.sub(
247 '<link rel="stylesheet".+?href="(?P<filename>[^"]*)".*?>',
248 InlineCSSFile,
249 flat_text)
250
251 flat_text = re.sub(
252 '<include\s+src="(?P<filename>[^"\']*)".*>',
253 InlineIncludeFiles,
254 flat_text)
255
256 # Check conditional elements, remove unsatisfied ones from the file.
257 flat_text = CheckConditionalElements(flat_text)
258
259 flat_text = re.sub('<(?!script)[^>]+?src="(?P<filename>[^"\']*)"',
260 SrcReplace,
261 flat_text)
262
263 # TODO(arv): Only do this inside <style> tags.
264 flat_text = InlineCSSImages(flat_text)
265
266 flat_text = re.sub('<link rel="icon".+?href="(?P<filename>[^"\']*)"',
267 SrcReplace,
268 flat_text)
269
270 if names_only:
271 flat_text = None # Will contains garbage if the flag is set anyway.
272 return InlinedData(flat_text, inlined_files)
273
274
275 def InlineToString(input_filename, grd_node, allow_external_script=False):
276 """Inlines the resources in a specified file and returns it as a string.
277
278 Args:
279 input_filename: name of file to read in
280 grd_node: html node from the grd file for this include tag
281 Returns:
282 the inlined data as a string
283 """
284 try:
285 return DoInline(input_filename,
286 grd_node,
287 allow_external_script=allow_external_script).inlined_data
288 except IOError, e:
289 raise Exception("Failed to open %s while trying to flatten %s. (%s)" %
290 (e.filename, input_filename, e.strerror))
291
292
293 def InlineToFile(input_filename, output_filename, grd_node):
294 """Inlines the resources in a specified file and writes it.
295
296 Reads input_filename, finds all the src attributes and attempts to
297 inline the files they are referring to, then writes the result
298 to output_filename.
299
300 Args:
301 input_filename: name of file to read in
302 output_filename: name of file to be written to
303 grd_node: html node from the grd file for this include tag
304 Returns:
305 a set of filenames of all the inlined files
306 """
307 inlined_data = InlineToString(input_filename, grd_node)
308 out_file = open(output_filename, 'wb')
309 out_file.writelines(inlined_data)
310 out_file.close()
311
312
313 def GetResourceFilenames(filename):
314 """For a grd file, returns a set of all the files that would be inline."""
315 try:
316 return DoInline(filename, None, names_only=True).inlined_files
317 except IOError, e:
318 raise Exception("Failed to open %s while trying to flatten %s. (%s)" %
319 (e.filename, filename, e.strerror))
320
321
322 def main():
323 if len(sys.argv) <= 2:
324 print "Flattens a HTML file by inlining its external resources.\n"
325 print "html_inline.py inputfile outputfile"
326 else:
327 InlineToFile(sys.argv[1], sys.argv[2], None)
328
329 if __name__ == '__main__':
330 main()
OLDNEW
« no previous file with comments | « grit/format/data_pack_unittest.py ('k') | grit/format/interface.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698