| OLD | NEW |
| (Empty) |
| 1 #!/usr/bin/env python | |
| 2 # Copyright 2013 Google Inc. All Rights Reserved. | |
| 3 # | |
| 4 # Licensed under the Apache License, Version 2.0 (the "License"); | |
| 5 # you may not use this file except in compliance with the License. | |
| 6 # You may obtain a copy of the License at | |
| 7 # | |
| 8 # http://www.apache.org/licenses/LICENSE-2.0 | |
| 9 # | |
| 10 # Unless required by applicable law or agreed to in writing, software | |
| 11 # distributed under the License is distributed on an "AS IS" BASIS, | |
| 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 13 # See the License for the specific language governing permissions and | |
| 14 # limitations under the License. | |
| 15 | |
| 16 """Inject javascript into html page source code.""" | |
| 17 | |
| 18 import logging | |
| 19 import os | |
| 20 import re | |
| 21 import util | |
| 22 import third_party.jsmin as jsmin | |
| 23 | |
| 24 DOCTYPE_RE = re.compile(r'^.{,256}?(<!--.*-->)?.{,256}?<!doctype html>', | |
| 25 re.IGNORECASE | re.DOTALL) | |
| 26 HTML_RE = re.compile(r'^.{,256}?(<!--.*-->)?.{,256}?<html.*?>', | |
| 27 re.IGNORECASE | re.DOTALL) | |
| 28 HEAD_RE = re.compile(r'^.{,256}?(<!--.*-->)?.{,256}?<head.*?>', | |
| 29 re.IGNORECASE | re.DOTALL) | |
| 30 | |
| 31 | |
| 32 def GetInjectScript(scripts): | |
| 33 """Loads |scripts| from disk and returns a string of their content.""" | |
| 34 lines = [] | |
| 35 if scripts: | |
| 36 if not isinstance(scripts, list): | |
| 37 scripts = scripts.split(',') | |
| 38 for script in scripts: | |
| 39 if os.path.exists(script): | |
| 40 with open(script) as f: | |
| 41 lines.extend(f.read()) | |
| 42 elif util.resource_exists(script): | |
| 43 lines.extend(util.resource_string(script)) | |
| 44 else: | |
| 45 raise Exception('Script does not exist: %s', script) | |
| 46 | |
| 47 return jsmin.jsmin(''.join(lines), quote_chars="'\"`") | |
| 48 | |
| 49 | |
| 50 def _IsHtmlContent(content): | |
| 51 content = content.strip() | |
| 52 return content.startswith('<') and content.endswith('>') | |
| 53 | |
| 54 | |
| 55 def InjectScript(text_chunks, content_type, script_to_inject): | |
| 56 """Inject |script_to_inject| into |content| if |content_type| is 'text/html'. | |
| 57 | |
| 58 Inject |script_to_inject| into |text_chunks| immediately after <head>, | |
| 59 <html> or <!doctype html>, if one of them is found. Otherwise, inject at | |
| 60 the beginning. | |
| 61 | |
| 62 Returns: | |
| 63 text_chunks, already_injected | |
| 64 |text_chunks| is the new content if script is injected, otherwise | |
| 65 the original. If the script was injected, exactly one chunk in | |
| 66 |text_chunks| will have changed. | |
| 67 |just_injected| indicates if |script_to_inject| was just injected in | |
| 68 the content. | |
| 69 """ | |
| 70 if not content_type or content_type != 'text/html': | |
| 71 return text_chunks, False | |
| 72 content = "".join(text_chunks) | |
| 73 if not content or not _IsHtmlContent(content) or script_to_inject in content: | |
| 74 return text_chunks, False | |
| 75 for regexp in (HEAD_RE, HTML_RE, DOCTYPE_RE): | |
| 76 matchobj = regexp.search(content) | |
| 77 if matchobj: | |
| 78 pos = matchobj.end(0) | |
| 79 for i, chunk in enumerate(text_chunks): | |
| 80 if pos <= len(chunk): | |
| 81 result = text_chunks[:] | |
| 82 result[i] = '%s<script>%s</script>%s' % (chunk[0:pos], | |
| 83 script_to_inject, | |
| 84 chunk[pos:]) | |
| 85 return result, True | |
| 86 pos -= len(chunk) | |
| 87 result = text_chunks[:] | |
| 88 result[0] = '<script>%s</script>%s' % (script_to_inject, | |
| 89 text_chunks[0]) | |
| 90 logging.warning('Inject at the very beginning, because no tag of ' | |
| 91 '<head>, <html> or <!doctype html> is found.') | |
| 92 return result, True | |
| OLD | NEW |