Index: tracing/third_party/tvcm/tvcm/parse_html_deps.py |
diff --git a/tracing/third_party/tvcm/tvcm/parse_html_deps.py b/tracing/third_party/tvcm/tvcm/parse_html_deps.py |
deleted file mode 100644 |
index 8866ba18ab2dc02a56e89eda6c566fc244eec7bf..0000000000000000000000000000000000000000 |
--- a/tracing/third_party/tvcm/tvcm/parse_html_deps.py |
+++ /dev/null |
@@ -1,219 +0,0 @@ |
-# Copyright (c) 2013 The Chromium Authors. All rights reserved. |
-# Use of this source code is governed by a BSD-style license that can be |
-# found in the LICENSE file. |
- |
-import os |
-import sys |
- |
-from tvcm import module |
-from tvcm import strip_js_comments |
-from tvcm import html_generation_controller |
- |
- |
-def _AddToPathIfNeeded(path): |
- if path not in sys.path: |
- sys.path.insert(0, path) |
- |
- |
-def _InitBeautifulSoup(): |
- catapult_path = os.path.abspath(os.path.join(os.path.dirname(__file__), |
- '..', '..', '..', '..')) |
- bs_path = os.path.join(catapult_path, 'third_party', 'beautifulsoup4') |
- _AddToPathIfNeeded(bs_path) |
- |
- html5lib_path = os.path.join(catapult_path, 'third_party', 'html5lib-python') |
- _AddToPathIfNeeded(html5lib_path) |
- |
- |
-_InitBeautifulSoup() |
-import bs4 |
- |
-class InlineScript(object): |
- def __init__(self, soup): |
- if not soup: |
- raise module.DepsException('InlineScript created without soup') |
- self._soup = soup |
- self._stripped_contents = None |
- self._open_tags = None |
- |
- @property |
- def contents(self): |
- return unicode(self._soup.string) |
- |
- @property |
- def stripped_contents(self): |
- if not self._stripped_contents: |
- self._stripped_contents = strip_js_comments.StripJSComments( |
- self.contents) |
- return self._stripped_contents |
- |
- @property |
- def open_tags(self): |
- if self._open_tags: |
- return self._open_tags |
- open_tags = [] |
- cur = self._soup.parent |
- while cur: |
- if isinstance(cur, bs4.BeautifulSoup): |
- break |
- |
- open_tags.append(_Tag(cur.name, cur.attrs)) |
- cur = cur.parent |
- |
- open_tags.reverse() |
- assert open_tags[-1].tag == 'script' |
- del open_tags[-1] |
- |
- self._open_tags = open_tags |
- return self._open_tags |
- |
- |
-def _IsDoctype(x): |
- if not isinstance(x, bs4.Doctype): |
- return False |
- return x == 'html' or x == 'HTML' |
- |
-def _CreateSoupWithoutHeadOrBody(html): |
- soupCopy = bs4.BeautifulSoup(html, 'html5lib') |
- soup = bs4.BeautifulSoup() |
- soup.reset() |
- if soupCopy.head: |
- for n in soupCopy.head.contents: |
- n.extract() |
- soup.append(n) |
- if soupCopy.body: |
- for n in soupCopy.body.contents: |
- n.extract() |
- soup.append(n) |
- return soup |
- |
-class HTMLModuleParserResults(object): |
- def __init__(self, html): |
- self._soup = bs4.BeautifulSoup(html, 'html5lib') |
- self._inline_scripts = None |
- |
- @property |
- def has_decl(self): |
- decls = [x for x in self._soup.contents |
- if _IsDoctype(x)] |
- return len(decls) == 1 |
- |
- @property |
- def scripts_external(self): |
- tags = self._soup.findAll('script', src=True) |
- return [t['src'] for t in tags] |
- |
- @property |
- def inline_scripts(self): |
- if not self._inline_scripts: |
- tags = self._soup.findAll('script', src=None) |
- self._inline_scripts = [InlineScript(t.string) for t in tags] |
- return self._inline_scripts |
- |
- @property |
- def imports(self): |
- tags = self._soup.findAll('link', rel='import') |
- return [t['href'] for t in tags] |
- |
- @property |
- def stylesheets(self): |
- tags = self._soup.findAll('link', rel='stylesheet') |
- return [t['href'] for t in tags] |
- |
- @property |
- def inline_stylesheets(self): |
- tags = self._soup.findAll('style') |
- return [unicode(t.string) for t in tags] |
- |
- def YieldHTMLInPieces(self, controller, minify=False): |
- yield self.GenerateHTML(controller, minify) |
- |
- def GenerateHTML(self, controller, minify=False, prettify=False): |
- soup = _CreateSoupWithoutHeadOrBody(unicode(self._soup)) |
- |
- # Remove declaration. |
- for x in soup.contents: |
- if isinstance(x, bs4.Doctype): |
- x.extract() |
- |
- # Remove declaration. |
- for x in soup.contents: |
- if isinstance(x, bs4.Declaration): |
- x.extract() |
- |
- # Remove all imports. |
- imports = soup.findAll('link', rel='import') |
- for imp in imports: |
- imp.extract() |
- |
- # Remove all script links. |
- scripts_external = soup.findAll('script', src=True) |
- for script in scripts_external: |
- script.extract() |
- |
- # Remove all in-line scripts. |
- scripts_external = soup.findAll('script', src=None) |
- for script in scripts_external: |
- script.extract() |
- |
- # Process all in-line styles. |
- inline_styles = soup.findAll('style') |
- for style in inline_styles: |
- html = controller.GetHTMLForInlineStylesheet(unicode(style.string)) |
- if html: |
- ns = soup.new_tag('style') |
- ns.append(bs4.NavigableString(html)) |
- style.replaceWith(ns) |
- else: |
- style.extract() |
- |
- # Rewrite all external stylesheet hrefs or remove, as needed. |
- stylesheet_links = soup.findAll('link', rel='stylesheet') |
- for stylesheet_link in stylesheet_links: |
- html = controller.GetHTMLForStylesheetHRef(stylesheet_link['href']) |
- if html: |
- tmp = bs4.BeautifulSoup(html, 'html5lib').findAll('style') |
- assert len(tmp) == 1 |
- stylesheet_link.replaceWith(tmp[0]) |
- else: |
- stylesheet_link.extract() |
- |
- # Remove comments if minifying. |
- if minify: |
- comments = soup.findAll( |
- text=lambda text: isinstance(text, bs4.Comment)) |
- for comment in comments: |
- comment.extract() |
- if prettify: |
- return soup.prettify('utf-8').strip() |
- |
- # We are done. |
- return unicode(soup).strip() |
- |
- @property |
- def html_contents_without_links_and_script(self): |
- return self.GenerateHTML( |
- html_generation_controller.HTMLGenerationController()) |
- |
- |
-class _Tag(object): |
- |
- def __init__(self, tag, attrs): |
- self.tag = tag |
- self.attrs = attrs |
- |
- def __repr__(self): |
- attr_string = ' '.join(['%s="%s"' % (x[0], x[1]) for x in self.attrs]) |
- return '<%s %s>' % (self.tag, attr_string) |
- |
- |
-class HTMLModuleParser(): |
- |
- def Parse(self, html): |
- if html is None: |
- html = '' |
- else: |
- if html.find('< /script>') != -1: |
- raise Exception('Escape script tags with <\/script>') |
- |
- return HTMLModuleParserResults(html) |