third_party/closure_linter/closure_linter/common/htmlutil.py - Issue 2592193002: Remove closure_linter from Chrome

Unified Diff: third_party/closure_linter/closure_linter/common/htmlutil.py

Issue 2592193002: Remove closure_linter from Chrome (Closed)

Patch Set: Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« no previous file with comments | « third_party/closure_linter/closure_linter/common/filetestcase.py ('k') | third_party/closure_linter/closure_linter/common/lintrunner.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: third_party/closure_linter/closure_linter/common/htmlutil.py

diff --git a/third_party/closure_linter/closure_linter/common/htmlutil.py b/third_party/closure_linter/closure_linter/common/htmlutil.py

deleted file mode 100755

index 26d44c5908353d89d56a131d01846f1328b20485..0000000000000000000000000000000000000000

--- a/third_party/closure_linter/closure_linter/common/htmlutil.py

+++ /dev/null

@@ -1,170 +0,0 @@

-#!/usr/bin/env python

-# Licensed under the Apache License, Version 2.0 (the "License");

-# you may not use this file except in compliance with the License.

-# You may obtain a copy of the License at

-# http://www.apache.org/licenses/LICENSE-2.0

-# Unless required by applicable law or agreed to in writing, software

-# distributed under the License is distributed on an "AS-IS" BASIS,

-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

-# See the License for the specific language governing permissions and

-# limitations under the License.

-"""Utilities for dealing with HTML."""

-__author__ = ('robbyw@google.com (Robert Walker)')

-import cStringIO

-import formatter

-import htmllib

-import HTMLParser

-import re

-class ScriptExtractor(htmllib.HTMLParser):

- """Subclass of HTMLParser that extracts script contents from an HTML file.

- Also inserts appropriate blank lines so that line numbers in the extracted

- code match the line numbers in the original HTML.

- """

- def __init__(self):

- """Initialize a ScriptExtractor."""

- htmllib.HTMLParser.__init__(self, formatter.NullFormatter())

- self._in_script = False

- self._text = ''

- def start_script(self, attrs):

- """Internal handler for the start of a script tag.

- Args:

- attrs: The attributes of the script tag, as a list of tuples.

- """

- for attribute in attrs:

- if attribute[0].lower() == 'src':

- # Skip script tags with a src specified.

- return

- self._in_script = True

- def end_script(self):

- """Internal handler for the end of a script tag."""

- self._in_script = False

- def handle_data(self, data):

- """Internal handler for character data.

- Args:

- data: The character data from the HTML file.

- """

- if self._in_script:

- # If the last line contains whitespace only, i.e. is just there to

- # properly align a </script> tag, strip the whitespace.

- if data.rstrip(' \t') != data.rstrip(' \t\n\r\f'):

- data = data.rstrip(' \t')

- self._text += data

- else:

- self._AppendNewlines(data)

- def handle_comment(self, data):

- """Internal handler for HTML comments.

- Args:

- data: The text of the comment.

- """

- self._AppendNewlines(data)

- def _AppendNewlines(self, data):

- """Count the number of newlines in the given string and append them.

- This ensures line numbers are correct for reported errors.

- Args:

- data: The data to count newlines in.

- """

- # We append 'x' to both sides of the string to ensure that splitlines

- # gives us an accurate count.

- for i in xrange(len(('x' + data + 'x').splitlines()) - 1):

- self._text += '\n'

- def GetScriptLines(self):

- """Return the extracted script lines.

- Returns:

- The extracted script lines as a list of strings.

- """

- return self._text.splitlines()

-def GetScriptLines(f):

- """Extract script tag contents from the given HTML file.

- Args:

- f: The HTML file.

- Returns:

- Lines in the HTML file that are from script tags.

- """

- extractor = ScriptExtractor()

- # The HTML parser chokes on text like Array.<!string>, so we patch

- # that bug by replacing the < with < - escaping all text inside script

- # tags would be better but it's a bit of a catch 22.

- contents = f.read()

- contents = re.sub(r'<([^\s\w/])',

- lambda x: '<%s' % x.group(1),

- contents)

- extractor.feed(contents)

- extractor.close()

- return extractor.GetScriptLines()

-def StripTags(str):

- """Returns the string with HTML tags stripped.

- Args:

- str: An html string.

- Returns:

- The html string with all tags stripped. If there was a parse error, returns

- the text successfully parsed so far.

- """

- # Brute force approach to stripping as much HTML as possible. If there is a

- # parsing error, don't strip text before parse error position, and continue

- # trying from there.

- final_text = ''

- finished = False

- while not finished:

- try:

- strip = _HtmlStripper()

- strip.feed(str)

- strip.close()

- str = strip.get_output()

- final_text += str

- finished = True

- except HTMLParser.HTMLParseError, e:

- final_text += str[:e.offset]

- str = str[e.offset + 1:]

- return final_text

-class _HtmlStripper(HTMLParser.HTMLParser):

- """Simple class to strip tags from HTML.

- Does so by doing nothing when encountering tags, and appending character data

- to a buffer when that is encountered.

- """

- def __init__(self):

- self.reset()

- self.__output = cStringIO.StringIO()

- def handle_data(self, d):

- self.__output.write(d)

- def get_output(self):

- return self.__output.getvalue()