Tools/Scripts/webkitpy/thirdparty/mechanize/_rfc3986.py - Issue 18418010: Check in the thirdparty libs needed for webkitpy.

Unified Diff: Tools/Scripts/webkitpy/thirdparty/mechanize/_rfc3986.py

Issue 18418010: Check in the thirdparty libs needed for webkitpy. (Closed) Base URL: svn://svn.chromium.org/blink/trunk

Patch Set: Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « Tools/Scripts/webkitpy/thirdparty/mechanize/_response.py ('k') | Tools/Scripts/webkitpy/thirdparty/mechanize/_sgmllib_copy.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: Tools/Scripts/webkitpy/thirdparty/mechanize/_rfc3986.py

diff --git a/Tools/Scripts/webkitpy/thirdparty/mechanize/_rfc3986.py b/Tools/Scripts/webkitpy/thirdparty/mechanize/_rfc3986.py

new file mode 100644

index 0000000000000000000000000000000000000000..91fcd197f4ec173ef00c5e05555c7936d7fbc4ed

--- /dev/null

+++ b/Tools/Scripts/webkitpy/thirdparty/mechanize/_rfc3986.py

@@ -0,0 +1,245 @@

+"""RFC 3986 URI parsing and relative reference resolution / absolutization.

+(aka splitting and joining)

+This code is free software; you can redistribute it and/or modify it under

+the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt

+included with the distribution).

+"""

+# XXX Wow, this is ugly. Overly-direct translation of the RFC ATM.

+import re, urllib

+## def chr_range(a, b):

+## return "".join(map(chr, range(ord(a), ord(b)+1)))

+## UNRESERVED_URI_CHARS = ("ABCDEFGHIJKLMNOPQRSTUVWXYZ"

+## "abcdefghijklmnopqrstuvwxyz"

+## "0123456789"

+## "-_.~")

+## RESERVED_URI_CHARS = "!*'();:@&=+$,/?#[]"

+## URI_CHARS = RESERVED_URI_CHARS+UNRESERVED_URI_CHARS+'%'

+# this re matches any character that's not in URI_CHARS

+BAD_URI_CHARS_RE = re.compile("[^A-Za-z0-9\-_.~!*'();:@&=+$,/?%#[\]]")

+def clean_url(url, encoding):

+ # percent-encode illegal URI characters

+ # Trying to come up with test cases for this gave me a headache, revisit

+ # when do switch to unicode.

+ # Somebody else's comments (lost the attribution):

+## - IE will return you the url in the encoding you send it

+## - Mozilla/Firefox will send you latin-1 if there's no non latin-1

+## characters in your link. It will send you utf-8 however if there are...

+ if type(url) == type(""):

+ url = url.decode(encoding, "replace")

+ url = url.strip()

+ # for second param to urllib.quote(), we want URI_CHARS, minus the

+ # 'always_safe' characters that urllib.quote() never percent-encodes

+ return urllib.quote(url.encode(encoding), "!*'();:@&=+$,/?%#[]~")

+def is_clean_uri(uri):

+ """

+ >>> is_clean_uri("ABC!")

+ True

+ >>> is_clean_uri(u"ABC!")

+ True

+ >>> is_clean_uri("ABC|")

+ False

+ >>> is_clean_uri(u"ABC|")

+ False

+ >>> is_clean_uri("http://example.com/0")

+ True

+ >>> is_clean_uri(u"http://example.com/0")

+ True

+ """

+ # note module re treats bytestrings as through they were decoded as latin-1

+ # so this function accepts both unicode and bytestrings

+ return not bool(BAD_URI_CHARS_RE.search(uri))

+SPLIT_MATCH = re.compile(

+ r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?").match

+def urlsplit(absolute_uri):

+ """Return scheme, authority, path, query, fragment."""

+ match = SPLIT_MATCH(absolute_uri)

+ if match:

+ g = match.groups()

+ return g[1], g[3], g[4], g[6], g[8]

+def urlunsplit(parts):

+ scheme, authority, path, query, fragment = parts

+ r = []

+ append = r.append

+ if scheme is not None:

+ append(scheme)

+ append(":")

+ if authority is not None:

+ append("//")

+ append(authority)

+ append(path)

+ if query is not None:

+ append("?")

+ append(query)

+ if fragment is not None:

+ append("#")

+ append(fragment)

+ return "".join(r)

+def urljoin(base_uri, uri_reference):

+ """Join a base URI with a URI reference and return the resulting URI.

+ See RFC 3986.

+ """

+ return urlunsplit(urljoin_parts(urlsplit(base_uri),

+ urlsplit(uri_reference)))

+# oops, this doesn't do the same thing as the literal translation

+# from the RFC below

+## import posixpath

+## def urljoin_parts(base_parts, reference_parts):

+## scheme, authority, path, query, fragment = base_parts

+## rscheme, rauthority, rpath, rquery, rfragment = reference_parts

+## # compute target URI path

+## if rpath == "":

+## tpath = path

+## else:

+## tpath = rpath

+## if not tpath.startswith("/"):

+## tpath = merge(authority, path, tpath)

+## tpath = posixpath.normpath(tpath)

+## if rscheme is not None:

+## return (rscheme, rauthority, tpath, rquery, rfragment)

+## elif rauthority is not None:

+## return (scheme, rauthority, tpath, rquery, rfragment)

+## elif rpath == "":

+## if rquery is not None:

+## tquery = rquery

+## else:

+## tquery = query

+## return (scheme, authority, tpath, tquery, rfragment)

+## else:

+## return (scheme, authority, tpath, rquery, rfragment)

+def urljoin_parts(base_parts, reference_parts):

+ scheme, authority, path, query, fragment = base_parts

+ rscheme, rauthority, rpath, rquery, rfragment = reference_parts

+ if rscheme == scheme:

+ rscheme = None

+ if rscheme is not None:

+ tscheme, tauthority, tpath, tquery = (

+ rscheme, rauthority, remove_dot_segments(rpath), rquery)

+ else:

+ if rauthority is not None:

+ tauthority, tpath, tquery = (

+ rauthority, remove_dot_segments(rpath), rquery)

+ else:

+ if rpath == "":

+ tpath = path

+ if rquery is not None:

+ tquery = rquery

+ else:

+ tquery = query

+ else:

+ if rpath.startswith("/"):

+ tpath = remove_dot_segments(rpath)

+ else:

+ tpath = merge(authority, path, rpath)

+ tpath = remove_dot_segments(tpath)

+ tquery = rquery

+ tauthority = authority

+ tscheme = scheme

+ tfragment = rfragment

+ return (tscheme, tauthority, tpath, tquery, tfragment)

+# um, something *vaguely* like this is what I want, but I have to generate

+# lots of test cases first, if only to understand what it is that

+# remove_dot_segments really does...

+## def remove_dot_segments(path):

+## if path == '':

+## return ''

+## comps = path.split('/')

+## new_comps = []

+## for comp in comps:

+## if comp in ['.', '']:

+## if not new_comps or new_comps[-1]:

+## new_comps.append('')

+## continue

+## if comp != '..':

+## new_comps.append(comp)

+## elif new_comps:

+## new_comps.pop()

+## return '/'.join(new_comps)

+def remove_dot_segments(path):

+ r = []

+ while path:

+ # A

+ if path.startswith("../"):

+ path = path[3:]

+ continue

+ if path.startswith("./"):

+ path = path[2:]

+ continue

+ # B

+ if path.startswith("/./"):

+ path = path[2:]

+ continue

+ if path == "/.":

+ path = "/"

+ continue

+ # C

+ if path.startswith("/../"):

+ path = path[3:]

+ if r:

+ r.pop()

+ continue

+ if path == "/..":

+ path = "/"

+ if r:

+ r.pop()

+ continue

+ # D

+ if path == ".":

+ path = path[1:]

+ continue

+ if path == "..":

+ path = path[2:]

+ continue

+ # E

+ start = 0

+ if path.startswith("/"):

+ start = 1

+ ii = path.find("/", start)

+ if ii < 0:

+ ii = None

+ r.append(path[:ii])

+ if ii is None:

+ break

+ path = path[ii:]

+ return "".join(r)

+def merge(base_authority, base_path, ref_path):

+ # XXXX Oddly, the sample Perl implementation of this by Roy Fielding

+ # doesn't even take base_authority as a parameter, despite the wording in

+ # the RFC suggesting otherwise. Perhaps I'm missing some obvious identity.

+ #if base_authority is not None and base_path == "":

+ if base_path == "":

+ return "/" + ref_path

+ ii = base_path.rfind("/")

+ if ii >= 0:

+ return base_path[:ii+1] + ref_path

+ return ref_path

+if __name__ == "__main__":

+ import doctest

+ doctest.testmod()