| Index: third_party/mozdownload/mozdownload/parser.py
|
| ===================================================================
|
| --- third_party/mozdownload/mozdownload/parser.py (revision 0)
|
| +++ third_party/mozdownload/mozdownload/parser.py (revision 0)
|
| @@ -0,0 +1,48 @@
|
| +# This Source Code Form is subject to the terms of the Mozilla Public
|
| +# License, v. 2.0. If a copy of the MPL was not distributed with this
|
| +# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
| +
|
| +"""Module to parse directory listings on a remote FTP server."""
|
| +
|
| +from HTMLParser import HTMLParser
|
| +import re
|
| +import urllib
|
| +
|
| +
|
| +class DirectoryParser(HTMLParser):
|
| + """Class to parse directory listings"""
|
| +
|
| + def __init__(self, url):
|
| + HTMLParser.__init__(self)
|
| +
|
| + self.entries = [ ]
|
| + self.active_url = None
|
| +
|
| + req = urllib.urlopen(url)
|
| + self.feed(req.read())
|
| +
|
| + def filter(self, regex):
|
| + pattern = re.compile(regex, re.IGNORECASE)
|
| + return [entry for entry in self.entries if pattern.match(entry)]
|
| +
|
| + def handle_starttag(self, tag, attrs):
|
| + if not tag == 'a':
|
| + return
|
| +
|
| + for attr in attrs:
|
| + if attr[0] == 'href':
|
| + self.active_url = attr[1].strip('/')
|
| + return
|
| +
|
| + def handle_endtag(self, tag):
|
| + if tag == 'a':
|
| + self.active_url = None
|
| +
|
| + def handle_data(self, data):
|
| + # Only process the data when we are in an active a tag and have an URL
|
| + if not self.active_url:
|
| + return
|
| +
|
| + name = urllib.quote(data.strip('/'))
|
| + if self.active_url == name:
|
| + self.entries.append(self.active_url)
|
|
|
| Property changes on: third_party/mozdownload/mozdownload/parser.py
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|