Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(251)

Side by Side Diff: grit/format/html_inline.py

Issue 12261055: Correctly resolve relative paths when inlining @import directives in CSS files. (Closed) Base URL: https://grit-i18n.googlecode.com/svn/trunk
Patch Set: Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | grit/format/html_inline_unittest.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """Flattens a HTML file by inlining its external resources. 6 """Flattens a HTML file by inlining its external resources.
7 7
8 This is a small script that takes a HTML file, looks for src attributes 8 This is a small script that takes a HTML file, looks for src attributes
9 and inlines the specified file, producing one HTML file with no external 9 and inlines the specified file, producing one HTML file with no external
10 dependencies. It recursively inlines the included files. 10 dependencies. It recursively inlines the included files.
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
67 string 67 string
68 """ 68 """
69 filename = src_match.group('filename') 69 filename = src_match.group('filename')
70 quote = src_match.group('quote') 70 quote = src_match.group('quote')
71 71
72 if filename.find(':') != -1: 72 if filename.find(':') != -1:
73 # filename is probably a URL, which we don't want to bother inlining 73 # filename is probably a URL, which we don't want to bother inlining
74 return src_match.group(0) 74 return src_match.group(0)
75 75
76 filename = filename.replace(DIST_SUBSTR , distribution) 76 filename = filename.replace(DIST_SUBSTR , distribution)
77 filepath = os.path.join(base_path, filename) 77 filepath = os.path.normpath(os.path.join(base_path, filename))
78 inlined_files.add(filepath) 78 inlined_files.add(filepath)
79 79
80 if names_only: 80 if names_only:
81 return "" 81 return ""
82 82
83 mimetype = mimetypes.guess_type(filename)[0] or 'text/plain' 83 mimetype = mimetypes.guess_type(filename)[0] or 'text/plain'
84 inline_data = base64.standard_b64encode(util.ReadFile(filepath, util.BINARY)) 84 inline_data = base64.standard_b64encode(util.ReadFile(filepath, util.BINARY))
85 85
86 prefix = src_match.string[src_match.start():src_match.start('filename')] 86 prefix = src_match.string[src_match.start():src_match.start('filename')]
87 suffix = src_match.string[src_match.end('filename'):src_match.end()] 87 suffix = src_match.string[src_match.end('filename'):src_match.end()]
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
122 122
123 # Keep track of all the files we inline. 123 # Keep track of all the files we inline.
124 inlined_files = set() 124 inlined_files = set()
125 125
126 def SrcReplace(src_match, filepath=input_filepath, 126 def SrcReplace(src_match, filepath=input_filepath,
127 inlined_files=inlined_files): 127 inlined_files=inlined_files):
128 """Helper function to provide SrcInlineAsDataURL with the base file path""" 128 """Helper function to provide SrcInlineAsDataURL with the base file path"""
129 return SrcInlineAsDataURL( 129 return SrcInlineAsDataURL(
130 src_match, filepath, distribution, inlined_files, names_only=names_only) 130 src_match, filepath, distribution, inlined_files, names_only=names_only)
131 131
132 def GetFilepath(src_match): 132 def GetFilepath(src_match, base_path = input_filepath):
133 filename = src_match.group('filename') 133 filename = src_match.group('filename')
134 134
135 if filename.find(':') != -1: 135 if filename.find(':') != -1:
136 # filename is probably a URL, which we don't want to bother inlining 136 # filename is probably a URL, which we don't want to bother inlining
137 return None 137 return None
138 138
139 filename = filename.replace('%DISTRIBUTION%', distribution) 139 filename = filename.replace('%DISTRIBUTION%', distribution)
140 return os.path.join(input_filepath, filename) 140 return os.path.normpath(os.path.join(base_path, filename))
141 141
142 def IsConditionSatisfied(src_match): 142 def IsConditionSatisfied(src_match):
143 expression = src_match.group('expression') 143 expression = src_match.group('expression')
144 return grd_node is None or grd_node.EvaluateCondition(expression) 144 return grd_node is None or grd_node.EvaluateCondition(expression)
145 145
146 def CheckConditionalElements(str): 146 def CheckConditionalElements(str):
147 """Helper function to conditionally inline inner elements""" 147 """Helper function to conditionally inline inner elements"""
148 while True: 148 while True:
149 begin_if = _BEGIN_IF_BLOCK.search(str) 149 begin_if = _BEGIN_IF_BLOCK.search(str)
150 if begin_if is None: 150 if begin_if is None:
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
210 210
211 def InlineCSSText(text, css_filepath): 211 def InlineCSSText(text, css_filepath):
212 """Helper function that inlines external resources in CSS text""" 212 """Helper function that inlines external resources in CSS text"""
213 filepath = os.path.dirname(css_filepath) 213 filepath = os.path.dirname(css_filepath)
214 # Allow custom modifications before inlining images. 214 # Allow custom modifications before inlining images.
215 if rewrite_function: 215 if rewrite_function:
216 text = rewrite_function(filepath, text, distribution) 216 text = rewrite_function(filepath, text, distribution)
217 text = InlineCSSImages(text, filepath) 217 text = InlineCSSImages(text, filepath)
218 return InlineCSSImports(text, filepath) 218 return InlineCSSImports(text, filepath)
219 219
220 def InlineCSSFile(src_match, inlined_files=inlined_files): 220 def InlineCSSFile(src_match, pattern, base_path=input_filepath):
221 """Helper function to inline external css files. 221 """Helper function to inline external CSS files.
222 222
223 Args: 223 Args:
224 src_match: A regular expression match with a named group named "filename". 224 src_match: A regular expression match with a named group named "filename".
225 pattern: The pattern to replace with the contents of the CSS file.
226 base_path: The base path to use for resolving the CSS file.
225 227
226 Returns: 228 Returns:
227 The text that should replace the reference to the CSS file. 229 The text that should replace the reference to the CSS file.
228 """ 230 """
229 filepath = GetFilepath(src_match) 231 filepath = GetFilepath(src_match, base_path)
230 if filepath is None: 232 if filepath is None:
231 return src_match.group(0) 233 return src_match.group(0)
232 234
233 # Even if names_only is set, the CSS file needs to be opened, because it 235 # Even if names_only is set, the CSS file needs to be opened, because it
234 # can link to images that need to be added to the file set. 236 # can link to images that need to be added to the file set.
235 inlined_files.add(filepath) 237 inlined_files.add(filepath)
236 # When resolving CSS files we need to pass in the path so that relative URLs 238 # When resolving CSS files we need to pass in the path so that relative URLs
237 # can be resolved. 239 # can be resolved.
238 return InlineCSSText(util.ReadFile(filepath, util.BINARY), filepath) 240 return pattern % InlineCSSText(util.ReadFile(filepath, util.BINARY),
241 filepath)
239 242
240 def InlineCSSImages(text, filepath=input_filepath): 243 def InlineCSSImages(text, filepath=input_filepath):
241 """Helper function that inlines external images in CSS backgrounds.""" 244 """Helper function that inlines external images in CSS backgrounds."""
242 # Replace contents of url() for css attributes: content, background, 245 # Replace contents of url() for css attributes: content, background,
243 # or *-image. 246 # or *-image.
244 return re.sub('(content|background|[\w-]*-image):[^;]*' + 247 return re.sub('(content|background|[\w-]*-image):[^;]*' +
245 '(url\((?P<quote1>"|\'|)[^"\'()]*(?P=quote1)\)|' + 248 '(url\((?P<quote1>"|\'|)[^"\'()]*(?P=quote1)\)|' +
246 'image-set\(' + 249 'image-set\(' +
247 '([ ]*url\((?P<quote2>"|\'|)[^"\'()]*(?P=quote2)\)' + 250 '([ ]*url\((?P<quote2>"|\'|)[^"\'()]*(?P=quote2)\)' +
248 '[ ]*[0-9.]*x[ ]*(,[ ]*)?)+\))', 251 '[ ]*[0-9.]*x[ ]*(,[ ]*)?)+\))',
249 lambda m: InlineCSSUrls(m, filepath), 252 lambda m: InlineCSSUrls(m, filepath),
250 text) 253 text)
251 254
252 def InlineCSSUrls(src_match, filepath=input_filepath): 255 def InlineCSSUrls(src_match, filepath=input_filepath):
253 """Helper function that inlines each url on a CSS image rule match.""" 256 """Helper function that inlines each url on a CSS image rule match."""
254 # Replace contents of url() references in matches. 257 # Replace contents of url() references in matches.
255 return re.sub('url\((?P<quote>"|\'|)(?P<filename>[^"\'()]*)(?P=quote)\)', 258 return re.sub('url\((?P<quote>"|\'|)(?P<filename>[^"\'()]*)(?P=quote)\)',
256 lambda m: SrcReplace(m, filepath), 259 lambda m: SrcReplace(m, filepath),
257 src_match.group(0)) 260 src_match.group(0))
258 261
259 def InlineCSSImports(text, filepath=input_filepath): 262 def InlineCSSImports(text, filepath=input_filepath):
260 """Helper function that inlines CSS files included via the @import 263 """Helper function that inlines CSS files included via the @import
261 directive. 264 directive.
262 """ 265 """
263 return re.sub('@import\s+url\((?P<quote>"|\'|)(?P<filename>[^"\'()]*)' + 266 return re.sub('@import\s+url\((?P<quote>"|\'|)(?P<filename>[^"\'()]*)' +
264 '(?P=quote)\)', 267 '(?P=quote)\);',
265 InlineCSSFile, 268 lambda m: InlineCSSFile(m, '%s', filepath),
266 text) 269 text)
267 270
268 271
269 flat_text = util.ReadFile(input_filename, util.BINARY) 272 flat_text = util.ReadFile(input_filename, util.BINARY)
270 273
271 # Check conditional elements, remove unsatisfied ones from the file. We do 274 # Check conditional elements, remove unsatisfied ones from the file. We do
272 # this twice. The first pass is so that we don't even bother calling 275 # this twice. The first pass is so that we don't even bother calling
273 # InlineScript, InlineCSSFile and InlineIncludeFiles on text we're eventually 276 # InlineScript, InlineCSSFile and InlineIncludeFiles on text we're eventually
274 # going to throw out anyway. 277 # going to throw out anyway.
275 flat_text = CheckConditionalElements(flat_text) 278 flat_text = CheckConditionalElements(flat_text)
276 279
277 if not allow_external_script: 280 if not allow_external_script:
278 # We need to inline css and js before we inline images so that image 281 # We need to inline css and js before we inline images so that image
279 # references gets inlined in the css and js 282 # references gets inlined in the css and js
280 flat_text = re.sub('<script (?P<attrs1>.*?)src="(?P<filename>[^"\']*)"' + 283 flat_text = re.sub('<script (?P<attrs1>.*?)src="(?P<filename>[^"\']*)"' +
281 '(?P<attrs2>.*?)></script>', 284 '(?P<attrs2>.*?)></script>',
282 InlineScript, 285 InlineScript,
283 flat_text) 286 flat_text)
284 287
285 flat_text = re.sub( 288 flat_text = re.sub(
286 '<link rel="stylesheet".+?href="(?P<filename>[^"]*)".*?>', 289 '<link rel="stylesheet".+?href="(?P<filename>[^"]*)".*?>',
287 lambda m: '<style>%s</style>' % InlineCSSFile(m), 290 lambda m: InlineCSSFile(m, '<style>%s</style>'),
288 flat_text) 291 flat_text)
289 292
290 flat_text = re.sub( 293 flat_text = re.sub(
291 '<include\s+src="(?P<filename>[^"\']*)".*>', 294 '<include\s+src="(?P<filename>[^"\']*)".*>',
292 InlineIncludeFiles, 295 InlineIncludeFiles,
293 flat_text) 296 flat_text)
294 297
295 # Check conditional elements, second pass. This catches conditionals in any 298 # Check conditional elements, second pass. This catches conditionals in any
296 # of the text we just inlined. 299 # of the text we just inlined.
297 flat_text = CheckConditionalElements(flat_text) 300 flat_text = CheckConditionalElements(flat_text)
(...skipping 21 matching lines...) Expand all
319 def InlineToString(input_filename, grd_node, allow_external_script=False, 322 def InlineToString(input_filename, grd_node, allow_external_script=False,
320 rewrite_function=None): 323 rewrite_function=None):
321 """Inlines the resources in a specified file and returns it as a string. 324 """Inlines the resources in a specified file and returns it as a string.
322 325
323 Args: 326 Args:
324 input_filename: name of file to read in 327 input_filename: name of file to read in
325 grd_node: html node from the grd file for this include tag 328 grd_node: html node from the grd file for this include tag
326 Returns: 329 Returns:
327 the inlined data as a string 330 the inlined data as a string
328 """ 331 """
329 return DoInline(input_filename, 332 try:
330 grd_node, 333 return DoInline(input_filename,
331 allow_external_script=allow_external_script, 334 grd_node,
332 rewrite_function=rewrite_function).inlined_data 335 allow_external_script=allow_external_script,
336 rewrite_function=rewrite_function).inlined_data
337 except IOError, e:
338 raise Exception("Failed to open %s while trying to flatten %s. (%s)" %
339 (e.filename, input_filename, e.strerror))
333 340
334 341
335 def InlineToFile(input_filename, output_filename, grd_node): 342 def InlineToFile(input_filename, output_filename, grd_node):
336 """Inlines the resources in a specified file and writes it. 343 """Inlines the resources in a specified file and writes it.
337 344
338 Reads input_filename, finds all the src attributes and attempts to 345 Reads input_filename, finds all the src attributes and attempts to
339 inline the files they are referring to, then writes the result 346 inline the files they are referring to, then writes the result
340 to output_filename. 347 to output_filename.
341 348
342 Args: 349 Args:
(...skipping 25 matching lines...) Expand all
368 375
369 def main(): 376 def main():
370 if len(sys.argv) <= 2: 377 if len(sys.argv) <= 2:
371 print "Flattens a HTML file by inlining its external resources.\n" 378 print "Flattens a HTML file by inlining its external resources.\n"
372 print "html_inline.py inputfile outputfile" 379 print "html_inline.py inputfile outputfile"
373 else: 380 else:
374 InlineToFile(sys.argv[1], sys.argv[2], None) 381 InlineToFile(sys.argv[1], sys.argv[2], None)
375 382
376 if __name__ == '__main__': 383 if __name__ == '__main__':
377 main() 384 main()
OLDNEW
« no previous file with comments | « no previous file | grit/format/html_inline_unittest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698