Index: pylib/simplejson/decoder.py |
=================================================================== |
--- pylib/simplejson/decoder.py (revision 0) |
+++ pylib/simplejson/decoder.py (revision 0) |
@@ -0,0 +1,421 @@ |
+"""Implementation of JSONDecoder |
+""" |
+import re |
+import sys |
+import struct |
+ |
+from simplejson.scanner import make_scanner |
+def _import_c_scanstring(): |
+ try: |
+ from simplejson._speedups import scanstring |
+ return scanstring |
+ except ImportError: |
+ return None |
+c_scanstring = _import_c_scanstring() |
+ |
+__all__ = ['JSONDecoder'] |
+ |
+FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL |
+ |
+def _floatconstants(): |
+ _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') |
+ # The struct module in Python 2.4 would get frexp() out of range here |
+ # when an endian is specified in the format string. Fixed in Python 2.5+ |
+ if sys.byteorder != 'big': |
+ _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] |
+ nan, inf = struct.unpack('dd', _BYTES) |
+ return nan, inf, -inf |
+ |
+NaN, PosInf, NegInf = _floatconstants() |
+ |
+ |
+class JSONDecodeError(ValueError): |
+ """Subclass of ValueError with the following additional properties: |
+ |
+ msg: The unformatted error message |
+ doc: The JSON document being parsed |
+ pos: The start index of doc where parsing failed |
+ end: The end index of doc where parsing failed (may be None) |
+ lineno: The line corresponding to pos |
+ colno: The column corresponding to pos |
+ endlineno: The line corresponding to end (may be None) |
+ endcolno: The column corresponding to end (may be None) |
+ |
+ """ |
+ def __init__(self, msg, doc, pos, end=None): |
+ ValueError.__init__(self, errmsg(msg, doc, pos, end=end)) |
+ self.msg = msg |
+ self.doc = doc |
+ self.pos = pos |
+ self.end = end |
+ self.lineno, self.colno = linecol(doc, pos) |
+ if end is not None: |
+ self.endlineno, self.endcolno = linecol(doc, end) |
+ else: |
+ self.endlineno, self.endcolno = None, None |
+ |
+ |
+def linecol(doc, pos): |
+ lineno = doc.count('\n', 0, pos) + 1 |
+ if lineno == 1: |
+ colno = pos |
+ else: |
+ colno = pos - doc.rindex('\n', 0, pos) |
+ return lineno, colno |
+ |
+ |
+def errmsg(msg, doc, pos, end=None): |
+ # Note that this function is called from _speedups |
+ lineno, colno = linecol(doc, pos) |
+ if end is None: |
+ #fmt = '{0}: line {1} column {2} (char {3})' |
+ #return fmt.format(msg, lineno, colno, pos) |
+ fmt = '%s: line %d column %d (char %d)' |
+ return fmt % (msg, lineno, colno, pos) |
+ endlineno, endcolno = linecol(doc, end) |
+ #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' |
+ #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) |
+ fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' |
+ return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) |
+ |
+ |
+_CONSTANTS = { |
+ '-Infinity': NegInf, |
+ 'Infinity': PosInf, |
+ 'NaN': NaN, |
+} |
+ |
+STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) |
+BACKSLASH = { |
+ '"': u'"', '\\': u'\\', '/': u'/', |
+ 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', |
+} |
+ |
+DEFAULT_ENCODING = "utf-8" |
+ |
+def py_scanstring(s, end, encoding=None, strict=True, |
+ _b=BACKSLASH, _m=STRINGCHUNK.match): |
+ """Scan the string s for a JSON string. End is the index of the |
+ character in s after the quote that started the JSON string. |
+ Unescapes all valid JSON string escape sequences and raises ValueError |
+ on attempt to decode an invalid string. If strict is False then literal |
+ control characters are allowed in the string. |
+ |
+ Returns a tuple of the decoded string and the index of the character in s |
+ after the end quote.""" |
+ if encoding is None: |
+ encoding = DEFAULT_ENCODING |
+ chunks = [] |
+ _append = chunks.append |
+ begin = end - 1 |
+ while 1: |
+ chunk = _m(s, end) |
+ if chunk is None: |
+ raise JSONDecodeError( |
+ "Unterminated string starting at", s, begin) |
+ end = chunk.end() |
+ content, terminator = chunk.groups() |
+ # Content is contains zero or more unescaped string characters |
+ if content: |
+ if not isinstance(content, unicode): |
+ content = unicode(content, encoding) |
+ _append(content) |
+ # Terminator is the end of string, a literal control character, |
+ # or a backslash denoting that an escape sequence follows |
+ if terminator == '"': |
+ break |
+ elif terminator != '\\': |
+ if strict: |
+ msg = "Invalid control character %r at" % (terminator,) |
+ #msg = "Invalid control character {0!r} at".format(terminator) |
+ raise JSONDecodeError(msg, s, end) |
+ else: |
+ _append(terminator) |
+ continue |
+ try: |
+ esc = s[end] |
+ except IndexError: |
+ raise JSONDecodeError( |
+ "Unterminated string starting at", s, begin) |
+ # If not a unicode escape sequence, must be in the lookup table |
+ if esc != 'u': |
+ try: |
+ char = _b[esc] |
+ except KeyError: |
+ msg = "Invalid \\escape: " + repr(esc) |
+ raise JSONDecodeError(msg, s, end) |
+ end += 1 |
+ else: |
+ # Unicode escape sequence |
+ esc = s[end + 1:end + 5] |
+ next_end = end + 5 |
+ if len(esc) != 4: |
+ msg = "Invalid \\uXXXX escape" |
+ raise JSONDecodeError(msg, s, end) |
+ uni = int(esc, 16) |
+ # Check for surrogate pair on UCS-4 systems |
+ if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: |
+ msg = "Invalid \\uXXXX\\uXXXX surrogate pair" |
+ if not s[end + 5:end + 7] == '\\u': |
+ raise JSONDecodeError(msg, s, end) |
+ esc2 = s[end + 7:end + 11] |
+ if len(esc2) != 4: |
+ raise JSONDecodeError(msg, s, end) |
+ uni2 = int(esc2, 16) |
+ uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) |
+ next_end += 6 |
+ char = unichr(uni) |
+ end = next_end |
+ # Append the unescaped character |
+ _append(char) |
+ return u''.join(chunks), end |
+ |
+ |
+# Use speedup if available |
+scanstring = c_scanstring or py_scanstring |
+ |
+WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) |
+WHITESPACE_STR = ' \t\n\r' |
+ |
+def JSONObject((s, end), encoding, strict, scan_once, object_hook, |
+ object_pairs_hook, memo=None, |
+ _w=WHITESPACE.match, _ws=WHITESPACE_STR): |
+ # Backwards compatibility |
+ if memo is None: |
+ memo = {} |
+ memo_get = memo.setdefault |
+ pairs = [] |
+ # Use a slice to prevent IndexError from being raised, the following |
+ # check will raise a more specific ValueError if the string is empty |
+ nextchar = s[end:end + 1] |
+ # Normally we expect nextchar == '"' |
+ if nextchar != '"': |
+ if nextchar in _ws: |
+ end = _w(s, end).end() |
+ nextchar = s[end:end + 1] |
+ # Trivial empty object |
+ if nextchar == '}': |
+ if object_pairs_hook is not None: |
+ result = object_pairs_hook(pairs) |
+ return result, end + 1 |
+ pairs = {} |
+ if object_hook is not None: |
+ pairs = object_hook(pairs) |
+ return pairs, end + 1 |
+ elif nextchar != '"': |
+ raise JSONDecodeError("Expecting property name", s, end) |
+ end += 1 |
+ while True: |
+ key, end = scanstring(s, end, encoding, strict) |
+ key = memo_get(key, key) |
+ |
+ # To skip some function call overhead we optimize the fast paths where |
+ # the JSON key separator is ": " or just ":". |
+ if s[end:end + 1] != ':': |
+ end = _w(s, end).end() |
+ if s[end:end + 1] != ':': |
+ raise JSONDecodeError("Expecting : delimiter", s, end) |
+ |
+ end += 1 |
+ |
+ try: |
+ if s[end] in _ws: |
+ end += 1 |
+ if s[end] in _ws: |
+ end = _w(s, end + 1).end() |
+ except IndexError: |
+ pass |
+ |
+ try: |
+ value, end = scan_once(s, end) |
+ except StopIteration: |
+ raise JSONDecodeError("Expecting object", s, end) |
+ pairs.append((key, value)) |
+ |
+ try: |
+ nextchar = s[end] |
+ if nextchar in _ws: |
+ end = _w(s, end + 1).end() |
+ nextchar = s[end] |
+ except IndexError: |
+ nextchar = '' |
+ end += 1 |
+ |
+ if nextchar == '}': |
+ break |
+ elif nextchar != ',': |
+ raise JSONDecodeError("Expecting , delimiter", s, end - 1) |
+ |
+ try: |
+ nextchar = s[end] |
+ if nextchar in _ws: |
+ end += 1 |
+ nextchar = s[end] |
+ if nextchar in _ws: |
+ end = _w(s, end + 1).end() |
+ nextchar = s[end] |
+ except IndexError: |
+ nextchar = '' |
+ |
+ end += 1 |
+ if nextchar != '"': |
+ raise JSONDecodeError("Expecting property name", s, end - 1) |
+ |
+ if object_pairs_hook is not None: |
+ result = object_pairs_hook(pairs) |
+ return result, end |
+ pairs = dict(pairs) |
+ if object_hook is not None: |
+ pairs = object_hook(pairs) |
+ return pairs, end |
+ |
+def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): |
+ values = [] |
+ nextchar = s[end:end + 1] |
+ if nextchar in _ws: |
+ end = _w(s, end + 1).end() |
+ nextchar = s[end:end + 1] |
+ # Look-ahead for trivial empty array |
+ if nextchar == ']': |
+ return values, end + 1 |
+ _append = values.append |
+ while True: |
+ try: |
+ value, end = scan_once(s, end) |
+ except StopIteration: |
+ raise JSONDecodeError("Expecting object", s, end) |
+ _append(value) |
+ nextchar = s[end:end + 1] |
+ if nextchar in _ws: |
+ end = _w(s, end + 1).end() |
+ nextchar = s[end:end + 1] |
+ end += 1 |
+ if nextchar == ']': |
+ break |
+ elif nextchar != ',': |
+ raise JSONDecodeError("Expecting , delimiter", s, end) |
+ |
+ try: |
+ if s[end] in _ws: |
+ end += 1 |
+ if s[end] in _ws: |
+ end = _w(s, end + 1).end() |
+ except IndexError: |
+ pass |
+ |
+ return values, end |
+ |
+class JSONDecoder(object): |
+ """Simple JSON <http://json.org> decoder |
+ |
+ Performs the following translations in decoding by default: |
+ |
+ +---------------+-------------------+ |
+ | JSON | Python | |
+ +===============+===================+ |
+ | object | dict | |
+ +---------------+-------------------+ |
+ | array | list | |
+ +---------------+-------------------+ |
+ | string | unicode | |
+ +---------------+-------------------+ |
+ | number (int) | int, long | |
+ +---------------+-------------------+ |
+ | number (real) | float | |
+ +---------------+-------------------+ |
+ | true | True | |
+ +---------------+-------------------+ |
+ | false | False | |
+ +---------------+-------------------+ |
+ | null | None | |
+ +---------------+-------------------+ |
+ |
+ It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as |
+ their corresponding ``float`` values, which is outside the JSON spec. |
+ |
+ """ |
+ |
+ def __init__(self, encoding=None, object_hook=None, parse_float=None, |
+ parse_int=None, parse_constant=None, strict=True, |
+ object_pairs_hook=None): |
+ """ |
+ *encoding* determines the encoding used to interpret any |
+ :class:`str` objects decoded by this instance (``'utf-8'`` by |
+ default). It has no effect when decoding :class:`unicode` objects. |
+ |
+ Note that currently only encodings that are a superset of ASCII work, |
+ strings of other encodings should be passed in as :class:`unicode`. |
+ |
+ *object_hook*, if specified, will be called with the result of every |
+ JSON object decoded and its return value will be used in place of the |
+ given :class:`dict`. This can be used to provide custom |
+ deserializations (e.g. to support JSON-RPC class hinting). |
+ |
+ *object_pairs_hook* is an optional function that will be called with |
+ the result of any object literal decode with an ordered list of pairs. |
+ The return value of *object_pairs_hook* will be used instead of the |
+ :class:`dict`. This feature can be used to implement custom decoders |
+ that rely on the order that the key and value pairs are decoded (for |
+ example, :func:`collections.OrderedDict` will remember the order of |
+ insertion). If *object_hook* is also defined, the *object_pairs_hook* |
+ takes priority. |
+ |
+ *parse_float*, if specified, will be called with the string of every |
+ JSON float to be decoded. By default, this is equivalent to |
+ ``float(num_str)``. This can be used to use another datatype or parser |
+ for JSON floats (e.g. :class:`decimal.Decimal`). |
+ |
+ *parse_int*, if specified, will be called with the string of every |
+ JSON int to be decoded. By default, this is equivalent to |
+ ``int(num_str)``. This can be used to use another datatype or parser |
+ for JSON integers (e.g. :class:`float`). |
+ |
+ *parse_constant*, if specified, will be called with one of the |
+ following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This |
+ can be used to raise an exception if invalid JSON numbers are |
+ encountered. |
+ |
+ *strict* controls the parser's behavior when it encounters an |
+ invalid control character in a string. The default setting of |
+ ``True`` means that unescaped control characters are parse errors, if |
+ ``False`` then control characters will be allowed in strings. |
+ |
+ """ |
+ self.encoding = encoding |
+ self.object_hook = object_hook |
+ self.object_pairs_hook = object_pairs_hook |
+ self.parse_float = parse_float or float |
+ self.parse_int = parse_int or int |
+ self.parse_constant = parse_constant or _CONSTANTS.__getitem__ |
+ self.strict = strict |
+ self.parse_object = JSONObject |
+ self.parse_array = JSONArray |
+ self.parse_string = scanstring |
+ self.memo = {} |
+ self.scan_once = make_scanner(self) |
+ |
+ def decode(self, s, _w=WHITESPACE.match): |
+ """Return the Python representation of ``s`` (a ``str`` or ``unicode`` |
+ instance containing a JSON document) |
+ |
+ """ |
+ obj, end = self.raw_decode(s, idx=_w(s, 0).end()) |
+ end = _w(s, end).end() |
+ if end != len(s): |
+ raise JSONDecodeError("Extra data", s, end, len(s)) |
+ return obj |
+ |
+ def raw_decode(self, s, idx=0): |
+ """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` |
+ beginning with a JSON document) and return a 2-tuple of the Python |
+ representation and the index in ``s`` where the document ended. |
+ |
+ This can be used to decode a JSON document from a string that may |
+ have extraneous data at the end. |
+ |
+ """ |
+ try: |
+ obj, end = self.scan_once(s, idx) |
+ except StopIteration: |
+ raise JSONDecodeError("No JSON object could be decoded", s, idx) |
+ return obj, end |