tracing/third_party/tvcm/tvcm/parse_html_deps.py - Issue 1376953005: Move tracing/third_party/tvcm -> third_party/py_vulcanize.

Unified Diff: tracing/third_party/tvcm/tvcm/parse_html_deps.py

Issue 1376953005: Move tracing/third_party/tvcm -> third_party/py_vulcanize. (Closed) Base URL: git@github.com:catapult-project/catapult.git@master

Patch Set: Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« third_party/py_vulcanize/README.chromium ('K') | « tracing/third_party/tvcm/tvcm/module_unittest.py ('k') | tracing/third_party/tvcm/tvcm/parse_html_deps_unittest.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: tracing/third_party/tvcm/tvcm/parse_html_deps.py

diff --git a/tracing/third_party/tvcm/tvcm/parse_html_deps.py b/tracing/third_party/tvcm/tvcm/parse_html_deps.py

deleted file mode 100644

index 8866ba18ab2dc02a56e89eda6c566fc244eec7bf..0000000000000000000000000000000000000000

--- a/tracing/third_party/tvcm/tvcm/parse_html_deps.py

+++ /dev/null

@@ -1,219 +0,0 @@

-# Use of this source code is governed by a BSD-style license that can be

-# found in the LICENSE file.

-import os

-import sys

-from tvcm import module

-from tvcm import strip_js_comments

-from tvcm import html_generation_controller

-def _AddToPathIfNeeded(path):

- if path not in sys.path:

- sys.path.insert(0, path)

-def _InitBeautifulSoup():

- catapult_path = os.path.abspath(os.path.join(os.path.dirname(__file__),

- '..', '..', '..', '..'))

- bs_path = os.path.join(catapult_path, 'third_party', 'beautifulsoup4')

- _AddToPathIfNeeded(bs_path)

- html5lib_path = os.path.join(catapult_path, 'third_party', 'html5lib-python')

- _AddToPathIfNeeded(html5lib_path)

-_InitBeautifulSoup()

-import bs4

-class InlineScript(object):

- def __init__(self, soup):

- if not soup:

- raise module.DepsException('InlineScript created without soup')

- self._soup = soup

- self._stripped_contents = None

- self._open_tags = None

- @property

- def contents(self):

- return unicode(self._soup.string)

- @property

- def stripped_contents(self):

- if not self._stripped_contents:

- self._stripped_contents = strip_js_comments.StripJSComments(

- self.contents)

- return self._stripped_contents

- @property

- def open_tags(self):

- if self._open_tags:

- return self._open_tags

- open_tags = []

- cur = self._soup.parent

- while cur:

- if isinstance(cur, bs4.BeautifulSoup):

- break

- open_tags.append(_Tag(cur.name, cur.attrs))

- cur = cur.parent

- open_tags.reverse()

- assert open_tags[-1].tag == 'script'

- del open_tags[-1]

- self._open_tags = open_tags

- return self._open_tags

-def _IsDoctype(x):

- if not isinstance(x, bs4.Doctype):

- return False

- return x == 'html' or x == 'HTML'

-def _CreateSoupWithoutHeadOrBody(html):

- soupCopy = bs4.BeautifulSoup(html, 'html5lib')

- soup = bs4.BeautifulSoup()

- soup.reset()

- if soupCopy.head:

- for n in soupCopy.head.contents:

- n.extract()

- soup.append(n)

- if soupCopy.body:

- for n in soupCopy.body.contents:

- n.extract()

- soup.append(n)

- return soup

-class HTMLModuleParserResults(object):

- def __init__(self, html):

- self._soup = bs4.BeautifulSoup(html, 'html5lib')

- self._inline_scripts = None

- @property

- def has_decl(self):

- decls = [x for x in self._soup.contents

- if _IsDoctype(x)]

- return len(decls) == 1

- @property

- def scripts_external(self):

- tags = self._soup.findAll('script', src=True)

- return [t['src'] for t in tags]

- @property

- def inline_scripts(self):

- if not self._inline_scripts:

- tags = self._soup.findAll('script', src=None)

- self._inline_scripts = [InlineScript(t.string) for t in tags]

- return self._inline_scripts

- @property

- def imports(self):

- tags = self._soup.findAll('link', rel='import')

- return [t['href'] for t in tags]

- @property

- def stylesheets(self):

- tags = self._soup.findAll('link', rel='stylesheet')

- return [t['href'] for t in tags]

- @property

- def inline_stylesheets(self):

- tags = self._soup.findAll('style')

- return [unicode(t.string) for t in tags]

- def YieldHTMLInPieces(self, controller, minify=False):

- yield self.GenerateHTML(controller, minify)

- def GenerateHTML(self, controller, minify=False, prettify=False):

- soup = _CreateSoupWithoutHeadOrBody(unicode(self._soup))

- # Remove declaration.

- for x in soup.contents:

- if isinstance(x, bs4.Doctype):

- x.extract()

- # Remove declaration.

- for x in soup.contents:

- if isinstance(x, bs4.Declaration):

- x.extract()

- # Remove all imports.

- imports = soup.findAll('link', rel='import')

- for imp in imports:

- imp.extract()

- # Remove all script links.

- scripts_external = soup.findAll('script', src=True)

- for script in scripts_external:

- script.extract()

- # Remove all in-line scripts.

- scripts_external = soup.findAll('script', src=None)

- for script in scripts_external:

- script.extract()

- # Process all in-line styles.

- inline_styles = soup.findAll('style')

- for style in inline_styles:

- html = controller.GetHTMLForInlineStylesheet(unicode(style.string))

- if html:

- ns = soup.new_tag('style')

- ns.append(bs4.NavigableString(html))

- style.replaceWith(ns)

- else:

- style.extract()

- # Rewrite all external stylesheet hrefs or remove, as needed.

- stylesheet_links = soup.findAll('link', rel='stylesheet')

- for stylesheet_link in stylesheet_links:

- html = controller.GetHTMLForStylesheetHRef(stylesheet_link['href'])

- if html:

- tmp = bs4.BeautifulSoup(html, 'html5lib').findAll('style')

- assert len(tmp) == 1

- stylesheet_link.replaceWith(tmp[0])

- else:

- stylesheet_link.extract()

- # Remove comments if minifying.

- if minify:

- comments = soup.findAll(

- text=lambda text: isinstance(text, bs4.Comment))

- for comment in comments:

- comment.extract()

- if prettify:

- return soup.prettify('utf-8').strip()

- # We are done.

- return unicode(soup).strip()

- @property

- def html_contents_without_links_and_script(self):

- return self.GenerateHTML(

- html_generation_controller.HTMLGenerationController())

-class _Tag(object):

- def __init__(self, tag, attrs):

- self.tag = tag

- self.attrs = attrs

- def __repr__(self):

- attr_string = ' '.join(['%s="%s"' % (x[0], x[1]) for x in self.attrs])

- return '<%s %s>' % (self.tag, attr_string)

-class HTMLModuleParser():

- def Parse(self, html):

- if html is None:

- html = ''

- else:

- if html.find('< /script>') != -1:

- raise Exception('Escape script tags with <\/script>')

- return HTMLModuleParserResults(html)