Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3035)

Unified Diff: Tools/Scripts/webkitpy/thirdparty/mechanize/_rfc3986.py

Issue 18418010: Check in the thirdparty libs needed for webkitpy. (Closed) Base URL: svn://svn.chromium.org/blink/trunk
Patch Set: Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: Tools/Scripts/webkitpy/thirdparty/mechanize/_rfc3986.py
diff --git a/Tools/Scripts/webkitpy/thirdparty/mechanize/_rfc3986.py b/Tools/Scripts/webkitpy/thirdparty/mechanize/_rfc3986.py
new file mode 100644
index 0000000000000000000000000000000000000000..91fcd197f4ec173ef00c5e05555c7936d7fbc4ed
--- /dev/null
+++ b/Tools/Scripts/webkitpy/thirdparty/mechanize/_rfc3986.py
@@ -0,0 +1,245 @@
+"""RFC 3986 URI parsing and relative reference resolution / absolutization.
+
+(aka splitting and joining)
+
+Copyright 2006 John J. Lee <jjl@pobox.com>
+
+This code is free software; you can redistribute it and/or modify it under
+the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
+included with the distribution).
+
+"""
+
+# XXX Wow, this is ugly. Overly-direct translation of the RFC ATM.
+
+import re, urllib
+
+## def chr_range(a, b):
+## return "".join(map(chr, range(ord(a), ord(b)+1)))
+
+## UNRESERVED_URI_CHARS = ("ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+## "abcdefghijklmnopqrstuvwxyz"
+## "0123456789"
+## "-_.~")
+## RESERVED_URI_CHARS = "!*'();:@&=+$,/?#[]"
+## URI_CHARS = RESERVED_URI_CHARS+UNRESERVED_URI_CHARS+'%'
+# this re matches any character that's not in URI_CHARS
+BAD_URI_CHARS_RE = re.compile("[^A-Za-z0-9\-_.~!*'();:@&=+$,/?%#[\]]")
+
+
+def clean_url(url, encoding):
+ # percent-encode illegal URI characters
+ # Trying to come up with test cases for this gave me a headache, revisit
+ # when do switch to unicode.
+ # Somebody else's comments (lost the attribution):
+## - IE will return you the url in the encoding you send it
+## - Mozilla/Firefox will send you latin-1 if there's no non latin-1
+## characters in your link. It will send you utf-8 however if there are...
+ if type(url) == type(""):
+ url = url.decode(encoding, "replace")
+ url = url.strip()
+ # for second param to urllib.quote(), we want URI_CHARS, minus the
+ # 'always_safe' characters that urllib.quote() never percent-encodes
+ return urllib.quote(url.encode(encoding), "!*'();:@&=+$,/?%#[]~")
+
+def is_clean_uri(uri):
+ """
+ >>> is_clean_uri("ABC!")
+ True
+ >>> is_clean_uri(u"ABC!")
+ True
+ >>> is_clean_uri("ABC|")
+ False
+ >>> is_clean_uri(u"ABC|")
+ False
+ >>> is_clean_uri("http://example.com/0")
+ True
+ >>> is_clean_uri(u"http://example.com/0")
+ True
+ """
+ # note module re treats bytestrings as through they were decoded as latin-1
+ # so this function accepts both unicode and bytestrings
+ return not bool(BAD_URI_CHARS_RE.search(uri))
+
+
+SPLIT_MATCH = re.compile(
+ r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?").match
+def urlsplit(absolute_uri):
+ """Return scheme, authority, path, query, fragment."""
+ match = SPLIT_MATCH(absolute_uri)
+ if match:
+ g = match.groups()
+ return g[1], g[3], g[4], g[6], g[8]
+
+def urlunsplit(parts):
+ scheme, authority, path, query, fragment = parts
+ r = []
+ append = r.append
+ if scheme is not None:
+ append(scheme)
+ append(":")
+ if authority is not None:
+ append("//")
+ append(authority)
+ append(path)
+ if query is not None:
+ append("?")
+ append(query)
+ if fragment is not None:
+ append("#")
+ append(fragment)
+ return "".join(r)
+
+def urljoin(base_uri, uri_reference):
+ """Join a base URI with a URI reference and return the resulting URI.
+
+ See RFC 3986.
+ """
+ return urlunsplit(urljoin_parts(urlsplit(base_uri),
+ urlsplit(uri_reference)))
+
+# oops, this doesn't do the same thing as the literal translation
+# from the RFC below
+## import posixpath
+## def urljoin_parts(base_parts, reference_parts):
+## scheme, authority, path, query, fragment = base_parts
+## rscheme, rauthority, rpath, rquery, rfragment = reference_parts
+
+## # compute target URI path
+## if rpath == "":
+## tpath = path
+## else:
+## tpath = rpath
+## if not tpath.startswith("/"):
+## tpath = merge(authority, path, tpath)
+## tpath = posixpath.normpath(tpath)
+
+## if rscheme is not None:
+## return (rscheme, rauthority, tpath, rquery, rfragment)
+## elif rauthority is not None:
+## return (scheme, rauthority, tpath, rquery, rfragment)
+## elif rpath == "":
+## if rquery is not None:
+## tquery = rquery
+## else:
+## tquery = query
+## return (scheme, authority, tpath, tquery, rfragment)
+## else:
+## return (scheme, authority, tpath, rquery, rfragment)
+
+def urljoin_parts(base_parts, reference_parts):
+ scheme, authority, path, query, fragment = base_parts
+ rscheme, rauthority, rpath, rquery, rfragment = reference_parts
+
+ if rscheme == scheme:
+ rscheme = None
+
+ if rscheme is not None:
+ tscheme, tauthority, tpath, tquery = (
+ rscheme, rauthority, remove_dot_segments(rpath), rquery)
+ else:
+ if rauthority is not None:
+ tauthority, tpath, tquery = (
+ rauthority, remove_dot_segments(rpath), rquery)
+ else:
+ if rpath == "":
+ tpath = path
+ if rquery is not None:
+ tquery = rquery
+ else:
+ tquery = query
+ else:
+ if rpath.startswith("/"):
+ tpath = remove_dot_segments(rpath)
+ else:
+ tpath = merge(authority, path, rpath)
+ tpath = remove_dot_segments(tpath)
+ tquery = rquery
+ tauthority = authority
+ tscheme = scheme
+ tfragment = rfragment
+ return (tscheme, tauthority, tpath, tquery, tfragment)
+
+# um, something *vaguely* like this is what I want, but I have to generate
+# lots of test cases first, if only to understand what it is that
+# remove_dot_segments really does...
+## def remove_dot_segments(path):
+## if path == '':
+## return ''
+## comps = path.split('/')
+## new_comps = []
+## for comp in comps:
+## if comp in ['.', '']:
+## if not new_comps or new_comps[-1]:
+## new_comps.append('')
+## continue
+## if comp != '..':
+## new_comps.append(comp)
+## elif new_comps:
+## new_comps.pop()
+## return '/'.join(new_comps)
+
+
+def remove_dot_segments(path):
+ r = []
+ while path:
+ # A
+ if path.startswith("../"):
+ path = path[3:]
+ continue
+ if path.startswith("./"):
+ path = path[2:]
+ continue
+ # B
+ if path.startswith("/./"):
+ path = path[2:]
+ continue
+ if path == "/.":
+ path = "/"
+ continue
+ # C
+ if path.startswith("/../"):
+ path = path[3:]
+ if r:
+ r.pop()
+ continue
+ if path == "/..":
+ path = "/"
+ if r:
+ r.pop()
+ continue
+ # D
+ if path == ".":
+ path = path[1:]
+ continue
+ if path == "..":
+ path = path[2:]
+ continue
+ # E
+ start = 0
+ if path.startswith("/"):
+ start = 1
+ ii = path.find("/", start)
+ if ii < 0:
+ ii = None
+ r.append(path[:ii])
+ if ii is None:
+ break
+ path = path[ii:]
+ return "".join(r)
+
+def merge(base_authority, base_path, ref_path):
+ # XXXX Oddly, the sample Perl implementation of this by Roy Fielding
+ # doesn't even take base_authority as a parameter, despite the wording in
+ # the RFC suggesting otherwise. Perhaps I'm missing some obvious identity.
+ #if base_authority is not None and base_path == "":
+ if base_path == "":
+ return "/" + ref_path
+ ii = base_path.rfind("/")
+ if ii >= 0:
+ return base_path[:ii+1] + ref_path
+ return ref_path
+
+if __name__ == "__main__":
+ import doctest
+ doctest.testmod()
« no previous file with comments | « Tools/Scripts/webkitpy/thirdparty/mechanize/_response.py ('k') | Tools/Scripts/webkitpy/thirdparty/mechanize/_sgmllib_copy.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698