Index: third_party/closure_linter/closure_linter/common/htmlutil.py |
diff --git a/third_party/closure_linter/closure_linter/common/htmlutil.py b/third_party/closure_linter/closure_linter/common/htmlutil.py |
deleted file mode 100755 |
index 26d44c5908353d89d56a131d01846f1328b20485..0000000000000000000000000000000000000000 |
--- a/third_party/closure_linter/closure_linter/common/htmlutil.py |
+++ /dev/null |
@@ -1,170 +0,0 @@ |
-#!/usr/bin/env python |
-# |
-# Copyright 2007 The Closure Linter Authors. All Rights Reserved. |
-# |
-# Licensed under the Apache License, Version 2.0 (the "License"); |
-# you may not use this file except in compliance with the License. |
-# You may obtain a copy of the License at |
-# |
-# http://www.apache.org/licenses/LICENSE-2.0 |
-# |
-# Unless required by applicable law or agreed to in writing, software |
-# distributed under the License is distributed on an "AS-IS" BASIS, |
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
-# See the License for the specific language governing permissions and |
-# limitations under the License. |
- |
-"""Utilities for dealing with HTML.""" |
- |
-__author__ = ('robbyw@google.com (Robert Walker)') |
- |
-import cStringIO |
-import formatter |
-import htmllib |
-import HTMLParser |
-import re |
- |
- |
-class ScriptExtractor(htmllib.HTMLParser): |
- """Subclass of HTMLParser that extracts script contents from an HTML file. |
- |
- Also inserts appropriate blank lines so that line numbers in the extracted |
- code match the line numbers in the original HTML. |
- """ |
- |
- def __init__(self): |
- """Initialize a ScriptExtractor.""" |
- htmllib.HTMLParser.__init__(self, formatter.NullFormatter()) |
- self._in_script = False |
- self._text = '' |
- |
- def start_script(self, attrs): |
- """Internal handler for the start of a script tag. |
- |
- Args: |
- attrs: The attributes of the script tag, as a list of tuples. |
- """ |
- for attribute in attrs: |
- if attribute[0].lower() == 'src': |
- # Skip script tags with a src specified. |
- return |
- self._in_script = True |
- |
- def end_script(self): |
- """Internal handler for the end of a script tag.""" |
- self._in_script = False |
- |
- def handle_data(self, data): |
- """Internal handler for character data. |
- |
- Args: |
- data: The character data from the HTML file. |
- """ |
- if self._in_script: |
- # If the last line contains whitespace only, i.e. is just there to |
- # properly align a </script> tag, strip the whitespace. |
- if data.rstrip(' \t') != data.rstrip(' \t\n\r\f'): |
- data = data.rstrip(' \t') |
- self._text += data |
- else: |
- self._AppendNewlines(data) |
- |
- def handle_comment(self, data): |
- """Internal handler for HTML comments. |
- |
- Args: |
- data: The text of the comment. |
- """ |
- self._AppendNewlines(data) |
- |
- def _AppendNewlines(self, data): |
- """Count the number of newlines in the given string and append them. |
- |
- This ensures line numbers are correct for reported errors. |
- |
- Args: |
- data: The data to count newlines in. |
- """ |
- # We append 'x' to both sides of the string to ensure that splitlines |
- # gives us an accurate count. |
- for i in xrange(len(('x' + data + 'x').splitlines()) - 1): |
- self._text += '\n' |
- |
- def GetScriptLines(self): |
- """Return the extracted script lines. |
- |
- Returns: |
- The extracted script lines as a list of strings. |
- """ |
- return self._text.splitlines() |
- |
- |
-def GetScriptLines(f): |
- """Extract script tag contents from the given HTML file. |
- |
- Args: |
- f: The HTML file. |
- |
- Returns: |
- Lines in the HTML file that are from script tags. |
- """ |
- extractor = ScriptExtractor() |
- |
- # The HTML parser chokes on text like Array.<!string>, so we patch |
- # that bug by replacing the < with < - escaping all text inside script |
- # tags would be better but it's a bit of a catch 22. |
- contents = f.read() |
- contents = re.sub(r'<([^\s\w/])', |
- lambda x: '<%s' % x.group(1), |
- contents) |
- |
- extractor.feed(contents) |
- extractor.close() |
- return extractor.GetScriptLines() |
- |
- |
-def StripTags(str): |
- """Returns the string with HTML tags stripped. |
- |
- Args: |
- str: An html string. |
- |
- Returns: |
- The html string with all tags stripped. If there was a parse error, returns |
- the text successfully parsed so far. |
- """ |
- # Brute force approach to stripping as much HTML as possible. If there is a |
- # parsing error, don't strip text before parse error position, and continue |
- # trying from there. |
- final_text = '' |
- finished = False |
- while not finished: |
- try: |
- strip = _HtmlStripper() |
- strip.feed(str) |
- strip.close() |
- str = strip.get_output() |
- final_text += str |
- finished = True |
- except HTMLParser.HTMLParseError, e: |
- final_text += str[:e.offset] |
- str = str[e.offset + 1:] |
- |
- return final_text |
- |
- |
-class _HtmlStripper(HTMLParser.HTMLParser): |
- """Simple class to strip tags from HTML. |
- |
- Does so by doing nothing when encountering tags, and appending character data |
- to a buffer when that is encountered. |
- """ |
- def __init__(self): |
- self.reset() |
- self.__output = cStringIO.StringIO() |
- |
- def handle_data(self, d): |
- self.__output.write(d) |
- |
- def get_output(self): |
- return self.__output.getvalue() |