Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(49)

Unified Diff: tracing/third_party/tvcm/tvcm/parse_html_deps.py

Issue 1376953005: Move tracing/third_party/tvcm -> third_party/py_vulcanize. (Closed) Base URL: git@github.com:catapult-project/catapult.git@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: tracing/third_party/tvcm/tvcm/parse_html_deps.py
diff --git a/tracing/third_party/tvcm/tvcm/parse_html_deps.py b/tracing/third_party/tvcm/tvcm/parse_html_deps.py
deleted file mode 100644
index 8866ba18ab2dc02a56e89eda6c566fc244eec7bf..0000000000000000000000000000000000000000
--- a/tracing/third_party/tvcm/tvcm/parse_html_deps.py
+++ /dev/null
@@ -1,219 +0,0 @@
-# Copyright (c) 2013 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import os
-import sys
-
-from tvcm import module
-from tvcm import strip_js_comments
-from tvcm import html_generation_controller
-
-
-def _AddToPathIfNeeded(path):
- if path not in sys.path:
- sys.path.insert(0, path)
-
-
-def _InitBeautifulSoup():
- catapult_path = os.path.abspath(os.path.join(os.path.dirname(__file__),
- '..', '..', '..', '..'))
- bs_path = os.path.join(catapult_path, 'third_party', 'beautifulsoup4')
- _AddToPathIfNeeded(bs_path)
-
- html5lib_path = os.path.join(catapult_path, 'third_party', 'html5lib-python')
- _AddToPathIfNeeded(html5lib_path)
-
-
-_InitBeautifulSoup()
-import bs4
-
-class InlineScript(object):
- def __init__(self, soup):
- if not soup:
- raise module.DepsException('InlineScript created without soup')
- self._soup = soup
- self._stripped_contents = None
- self._open_tags = None
-
- @property
- def contents(self):
- return unicode(self._soup.string)
-
- @property
- def stripped_contents(self):
- if not self._stripped_contents:
- self._stripped_contents = strip_js_comments.StripJSComments(
- self.contents)
- return self._stripped_contents
-
- @property
- def open_tags(self):
- if self._open_tags:
- return self._open_tags
- open_tags = []
- cur = self._soup.parent
- while cur:
- if isinstance(cur, bs4.BeautifulSoup):
- break
-
- open_tags.append(_Tag(cur.name, cur.attrs))
- cur = cur.parent
-
- open_tags.reverse()
- assert open_tags[-1].tag == 'script'
- del open_tags[-1]
-
- self._open_tags = open_tags
- return self._open_tags
-
-
-def _IsDoctype(x):
- if not isinstance(x, bs4.Doctype):
- return False
- return x == 'html' or x == 'HTML'
-
-def _CreateSoupWithoutHeadOrBody(html):
- soupCopy = bs4.BeautifulSoup(html, 'html5lib')
- soup = bs4.BeautifulSoup()
- soup.reset()
- if soupCopy.head:
- for n in soupCopy.head.contents:
- n.extract()
- soup.append(n)
- if soupCopy.body:
- for n in soupCopy.body.contents:
- n.extract()
- soup.append(n)
- return soup
-
-class HTMLModuleParserResults(object):
- def __init__(self, html):
- self._soup = bs4.BeautifulSoup(html, 'html5lib')
- self._inline_scripts = None
-
- @property
- def has_decl(self):
- decls = [x for x in self._soup.contents
- if _IsDoctype(x)]
- return len(decls) == 1
-
- @property
- def scripts_external(self):
- tags = self._soup.findAll('script', src=True)
- return [t['src'] for t in tags]
-
- @property
- def inline_scripts(self):
- if not self._inline_scripts:
- tags = self._soup.findAll('script', src=None)
- self._inline_scripts = [InlineScript(t.string) for t in tags]
- return self._inline_scripts
-
- @property
- def imports(self):
- tags = self._soup.findAll('link', rel='import')
- return [t['href'] for t in tags]
-
- @property
- def stylesheets(self):
- tags = self._soup.findAll('link', rel='stylesheet')
- return [t['href'] for t in tags]
-
- @property
- def inline_stylesheets(self):
- tags = self._soup.findAll('style')
- return [unicode(t.string) for t in tags]
-
- def YieldHTMLInPieces(self, controller, minify=False):
- yield self.GenerateHTML(controller, minify)
-
- def GenerateHTML(self, controller, minify=False, prettify=False):
- soup = _CreateSoupWithoutHeadOrBody(unicode(self._soup))
-
- # Remove declaration.
- for x in soup.contents:
- if isinstance(x, bs4.Doctype):
- x.extract()
-
- # Remove declaration.
- for x in soup.contents:
- if isinstance(x, bs4.Declaration):
- x.extract()
-
- # Remove all imports.
- imports = soup.findAll('link', rel='import')
- for imp in imports:
- imp.extract()
-
- # Remove all script links.
- scripts_external = soup.findAll('script', src=True)
- for script in scripts_external:
- script.extract()
-
- # Remove all in-line scripts.
- scripts_external = soup.findAll('script', src=None)
- for script in scripts_external:
- script.extract()
-
- # Process all in-line styles.
- inline_styles = soup.findAll('style')
- for style in inline_styles:
- html = controller.GetHTMLForInlineStylesheet(unicode(style.string))
- if html:
- ns = soup.new_tag('style')
- ns.append(bs4.NavigableString(html))
- style.replaceWith(ns)
- else:
- style.extract()
-
- # Rewrite all external stylesheet hrefs or remove, as needed.
- stylesheet_links = soup.findAll('link', rel='stylesheet')
- for stylesheet_link in stylesheet_links:
- html = controller.GetHTMLForStylesheetHRef(stylesheet_link['href'])
- if html:
- tmp = bs4.BeautifulSoup(html, 'html5lib').findAll('style')
- assert len(tmp) == 1
- stylesheet_link.replaceWith(tmp[0])
- else:
- stylesheet_link.extract()
-
- # Remove comments if minifying.
- if minify:
- comments = soup.findAll(
- text=lambda text: isinstance(text, bs4.Comment))
- for comment in comments:
- comment.extract()
- if prettify:
- return soup.prettify('utf-8').strip()
-
- # We are done.
- return unicode(soup).strip()
-
- @property
- def html_contents_without_links_and_script(self):
- return self.GenerateHTML(
- html_generation_controller.HTMLGenerationController())
-
-
-class _Tag(object):
-
- def __init__(self, tag, attrs):
- self.tag = tag
- self.attrs = attrs
-
- def __repr__(self):
- attr_string = ' '.join(['%s="%s"' % (x[0], x[1]) for x in self.attrs])
- return '<%s %s>' % (self.tag, attr_string)
-
-
-class HTMLModuleParser():
-
- def Parse(self, html):
- if html is None:
- html = ''
- else:
- if html.find('< /script>') != -1:
- raise Exception('Escape script tags with <\/script>')
-
- return HTMLModuleParserResults(html)

Powered by Google App Engine
This is Rietveld 408576698