Index: third_party/mozdownload/mozdownload/parser.py |
=================================================================== |
--- third_party/mozdownload/mozdownload/parser.py (revision 0) |
+++ third_party/mozdownload/mozdownload/parser.py (revision 0) |
@@ -0,0 +1,48 @@ |
+# This Source Code Form is subject to the terms of the Mozilla Public |
+# License, v. 2.0. If a copy of the MPL was not distributed with this |
+# file, You can obtain one at http://mozilla.org/MPL/2.0/. |
+ |
+"""Module to parse directory listings on a remote FTP server.""" |
+ |
+from HTMLParser import HTMLParser |
+import re |
+import urllib |
+ |
+ |
+class DirectoryParser(HTMLParser): |
+ """Class to parse directory listings""" |
+ |
+ def __init__(self, url): |
+ HTMLParser.__init__(self) |
+ |
+ self.entries = [ ] |
+ self.active_url = None |
+ |
+ req = urllib.urlopen(url) |
+ self.feed(req.read()) |
+ |
+ def filter(self, regex): |
+ pattern = re.compile(regex, re.IGNORECASE) |
+ return [entry for entry in self.entries if pattern.match(entry)] |
+ |
+ def handle_starttag(self, tag, attrs): |
+ if not tag == 'a': |
+ return |
+ |
+ for attr in attrs: |
+ if attr[0] == 'href': |
+ self.active_url = attr[1].strip('/') |
+ return |
+ |
+ def handle_endtag(self, tag): |
+ if tag == 'a': |
+ self.active_url = None |
+ |
+ def handle_data(self, data): |
+ # Only process the data when we are in an active a tag and have an URL |
+ if not self.active_url: |
+ return |
+ |
+ name = urllib.quote(data.strip('/')) |
+ if self.active_url == name: |
+ self.entries.append(self.active_url) |
Property changes on: third_party/mozdownload/mozdownload/parser.py |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |