Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(247)

Unified Diff: third_party/twisted_8_1/twisted/web/sux.py

Issue 12261012: Remove third_party/twisted_8_1 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/build
Patch Set: Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/twisted_8_1/twisted/web/static.py ('k') | third_party/twisted_8_1/twisted/web/tap.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/twisted_8_1/twisted/web/sux.py
diff --git a/third_party/twisted_8_1/twisted/web/sux.py b/third_party/twisted_8_1/twisted/web/sux.py
deleted file mode 100644
index 6f8fea1dc3b1f80e6afa31b46694810e8b279ecc..0000000000000000000000000000000000000000
--- a/third_party/twisted_8_1/twisted/web/sux.py
+++ /dev/null
@@ -1,657 +0,0 @@
-# -*- test-case-name: twisted.web.test.test_xml -*-
-#
-# Copyright (c) 2001-2004 Twisted Matrix Laboratories.
-# See LICENSE for details.
-
-
-"""
-*S*mall, *U*ncomplicated *X*ML.
-
-This is a very simple implementation of XML/HTML as a network
-protocol. It is not at all clever. Its main features are that it
-does not:
-
- - support namespaces
- - mung mnemonic entity references
- - validate
- - perform *any* external actions (such as fetching URLs or writing files)
- under *any* circumstances
- - has lots and lots of horrible hacks for supporting broken HTML (as an
- option, they're not on by default).
-"""
-
-from twisted.internet.protocol import Protocol, FileWrapper
-from twisted.python.reflect import prefixedMethodNames
-
-
-
-# Elements of the three-tuples in the state table.
-BEGIN_HANDLER = 0
-DO_HANDLER = 1
-END_HANDLER = 2
-
-identChars = '.-_:'
-lenientIdentChars = identChars + ';+#/%~'
-
-def nop(*args, **kw):
- "Do nothing."
-
-
-def unionlist(*args):
- l = []
- for x in args:
- l.extend(x)
- d = dict([(x, 1) for x in l])
- return d.keys()
-
-
-def zipfndict(*args, **kw):
- default = kw.get('default', nop)
- d = {}
- for key in unionlist(*[fndict.keys() for fndict in args]):
- d[key] = tuple([x.get(key, default) for x in args])
- return d
-
-
-def prefixedMethodClassDict(clazz, prefix):
- return dict([(name, getattr(clazz, prefix + name)) for name in prefixedMethodNames(clazz, prefix)])
-
-
-def prefixedMethodObjDict(obj, prefix):
- return dict([(name, getattr(obj, prefix + name)) for name in prefixedMethodNames(obj.__class__, prefix)])
-
-
-class ParseError(Exception):
-
- def __init__(self, filename, line, col, message):
- self.filename = filename
- self.line = line
- self.col = col
- self.message = message
-
- def __str__(self):
- return "%s:%s:%s: %s" % (self.filename, self.line, self.col,
- self.message)
-
-class XMLParser(Protocol):
-
- state = None
- encodings = None
- filename = "<xml />"
- beExtremelyLenient = 0
- _prepend = None
-
- # _leadingBodyData will sometimes be set before switching to the
- # 'bodydata' state, when we "accidentally" read a byte of bodydata
- # in a different state.
- _leadingBodyData = None
-
- def connectionMade(self):
- self.lineno = 1
- self.colno = 0
- self.encodings = []
-
- def saveMark(self):
- '''Get the line number and column of the last character parsed'''
- # This gets replaced during dataReceived, restored afterwards
- return (self.lineno, self.colno)
-
- def _parseError(self, message):
- raise ParseError(*((self.filename,)+self.saveMark()+(message,)))
-
- def _buildStateTable(self):
- '''Return a dictionary of begin, do, end state function tuples'''
- # _buildStateTable leaves something to be desired but it does what it
- # does.. probably slowly, so I'm doing some evil caching so it doesn't
- # get called more than once per class.
- stateTable = getattr(self.__class__, '__stateTable', None)
- if stateTable is None:
- stateTable = self.__class__.__stateTable = zipfndict(
- *[prefixedMethodObjDict(self, prefix)
- for prefix in ('begin_', 'do_', 'end_')])
- return stateTable
-
- def _decode(self, data):
- if 'UTF-16' in self.encodings or 'UCS-2' in self.encodings:
- assert not len(data) & 1, 'UTF-16 must come in pairs for now'
- if self._prepend:
- data = self._prepend + data
- for encoding in self.encodings:
- data = unicode(data, encoding)
- return data
-
- def maybeBodyData(self):
- if self.endtag:
- return 'bodydata'
-
- # Get ready for fun! We're going to allow
- # <script>if (foo < bar)</script> to work!
- # We do this by making everything between <script> and
- # </script> a Text
- # BUT <script src="foo"> will be special-cased to do regular,
- # lenient behavior, because those may not have </script>
- # -radix
-
- if (self.tagName == 'script'
- and not self.tagAttributes.has_key('src')):
- # we do this ourselves rather than having begin_waitforendscript
- # becuase that can get called multiple times and we don't want
- # bodydata to get reset other than the first time.
- self.begin_bodydata(None)
- return 'waitforendscript'
- return 'bodydata'
-
-
-
- def dataReceived(self, data):
- stateTable = self._buildStateTable()
- if not self.state:
- # all UTF-16 starts with this string
- if data.startswith('\xff\xfe'):
- self._prepend = '\xff\xfe'
- self.encodings.append('UTF-16')
- data = data[2:]
- elif data.startswith('\xfe\xff'):
- self._prepend = '\xfe\xff'
- self.encodings.append('UTF-16')
- data = data[2:]
- self.state = 'begin'
- if self.encodings:
- data = self._decode(data)
- # bring state, lineno, colno into local scope
- lineno, colno = self.lineno, self.colno
- curState = self.state
- # replace saveMark with a nested scope function
- _saveMark = self.saveMark
- def saveMark():
- return (lineno, colno)
- self.saveMark = saveMark
- # fetch functions from the stateTable
- beginFn, doFn, endFn = stateTable[curState]
- try:
- for byte in data:
- # do newline stuff
- if byte == '\n':
- lineno += 1
- colno = 0
- else:
- colno += 1
- newState = doFn(byte)
- if newState is not None and newState != curState:
- # this is the endFn from the previous state
- endFn()
- curState = newState
- beginFn, doFn, endFn = stateTable[curState]
- beginFn(byte)
- finally:
- self.saveMark = _saveMark
- self.lineno, self.colno = lineno, colno
- # state doesn't make sense if there's an exception..
- self.state = curState
-
-
- def connectionLost(self, reason):
- """
- End the last state we were in.
- """
- stateTable = self._buildStateTable()
- stateTable[self.state][END_HANDLER]()
-
-
- # state methods
-
- def do_begin(self, byte):
- if byte.isspace():
- return
- if byte != '<':
- if self.beExtremelyLenient:
- self._leadingBodyData = byte
- return 'bodydata'
- self._parseError("First char of document [%r] wasn't <" % (byte,))
- return 'tagstart'
-
- def begin_comment(self, byte):
- self.commentbuf = ''
-
- def do_comment(self, byte):
- self.commentbuf += byte
- if self.commentbuf.endswith('-->'):
- self.gotComment(self.commentbuf[:-3])
- return 'bodydata'
-
- def begin_tagstart(self, byte):
- self.tagName = '' # name of the tag
- self.tagAttributes = {} # attributes of the tag
- self.termtag = 0 # is the tag self-terminating
- self.endtag = 0
-
- def do_tagstart(self, byte):
- if byte.isalnum() or byte in identChars:
- self.tagName += byte
- if self.tagName == '!--':
- return 'comment'
- elif byte.isspace():
- if self.tagName:
- if self.endtag:
- # properly strict thing to do here is probably to only
- # accept whitespace
- return 'waitforgt'
- return 'attrs'
- else:
- self._parseError("Whitespace before tag-name")
- elif byte == '>':
- if self.endtag:
- self.gotTagEnd(self.tagName)
- return 'bodydata'
- else:
- self.gotTagStart(self.tagName, {})
- return (not self.beExtremelyLenient) and 'bodydata' or self.maybeBodyData()
- elif byte == '/':
- if self.tagName:
- return 'afterslash'
- else:
- self.endtag = 1
- elif byte in '!?':
- if self.tagName:
- if not self.beExtremelyLenient:
- self._parseError("Invalid character in tag-name")
- else:
- self.tagName += byte
- self.termtag = 1
- elif byte == '[':
- if self.tagName == '!':
- return 'expectcdata'
- else:
- self._parseError("Invalid '[' in tag-name")
- else:
- if self.beExtremelyLenient:
- self.bodydata = '<'
- return 'unentity'
- self._parseError('Invalid tag character: %r'% byte)
-
- def begin_unentity(self, byte):
- self.bodydata += byte
-
- def do_unentity(self, byte):
- self.bodydata += byte
- return 'bodydata'
-
- def end_unentity(self):
- self.gotText(self.bodydata)
-
- def begin_expectcdata(self, byte):
- self.cdatabuf = byte
-
- def do_expectcdata(self, byte):
- self.cdatabuf += byte
- cdb = self.cdatabuf
- cd = '[CDATA['
- if len(cd) > len(cdb):
- if cd.startswith(cdb):
- return
- elif self.beExtremelyLenient:
- ## WHAT THE CRAP!? MSWord9 generates HTML that includes these
- ## bizarre <![if !foo]> <![endif]> chunks, so I've gotta ignore
- ## 'em as best I can. this should really be a separate parse
- ## state but I don't even have any idea what these _are_.
- return 'waitforgt'
- else:
- self._parseError("Mal-formed CDATA header")
- if cd == cdb:
- self.cdatabuf = ''
- return 'cdata'
- self._parseError("Mal-formed CDATA header")
-
- def do_cdata(self, byte):
- self.cdatabuf += byte
- if self.cdatabuf.endswith("]]>"):
- self.cdatabuf = self.cdatabuf[:-3]
- return 'bodydata'
-
- def end_cdata(self):
- self.gotCData(self.cdatabuf)
- self.cdatabuf = ''
-
- def do_attrs(self, byte):
- if byte.isalnum() or byte in identChars:
- # XXX FIXME really handle !DOCTYPE at some point
- if self.tagName == '!DOCTYPE':
- return 'doctype'
- if self.tagName[0] in '!?':
- return 'waitforgt'
- return 'attrname'
- elif byte.isspace():
- return
- elif byte == '>':
- self.gotTagStart(self.tagName, self.tagAttributes)
- return (not self.beExtremelyLenient) and 'bodydata' or self.maybeBodyData()
- elif byte == '/':
- return 'afterslash'
- elif self.beExtremelyLenient:
- # discard and move on? Only case I've seen of this so far was:
- # <foo bar="baz"">
- return
- self._parseError("Unexpected character: %r" % byte)
-
- def begin_doctype(self, byte):
- self.doctype = byte
-
- def do_doctype(self, byte):
- if byte == '>':
- return 'bodydata'
- self.doctype += byte
-
- def end_doctype(self):
- self.gotDoctype(self.doctype)
- self.doctype = None
-
- def do_waitforgt(self, byte):
- if byte == '>':
- if self.endtag or not self.beExtremelyLenient:
- return 'bodydata'
- return self.maybeBodyData()
-
- def begin_attrname(self, byte):
- self.attrname = byte
- self._attrname_termtag = 0
-
- def do_attrname(self, byte):
- if byte.isalnum() or byte in identChars:
- self.attrname += byte
- return
- elif byte == '=':
- return 'beforeattrval'
- elif byte.isspace():
- return 'beforeeq'
- elif self.beExtremelyLenient:
- if byte in '"\'':
- return 'attrval'
- if byte in lenientIdentChars or byte.isalnum():
- self.attrname += byte
- return
- if byte == '/':
- self._attrname_termtag = 1
- return
- if byte == '>':
- self.attrval = 'True'
- self.tagAttributes[self.attrname] = self.attrval
- self.gotTagStart(self.tagName, self.tagAttributes)
- if self._attrname_termtag:
- self.gotTagEnd(self.tagName)
- return 'bodydata'
- return self.maybeBodyData()
- # something is really broken. let's leave this attribute where it
- # is and move on to the next thing
- return
- self._parseError("Invalid attribute name: %r %r" % (self.attrname, byte))
-
- def do_beforeattrval(self, byte):
- if byte in '"\'':
- return 'attrval'
- elif byte.isspace():
- return
- elif self.beExtremelyLenient:
- if byte in lenientIdentChars or byte.isalnum():
- return 'messyattr'
- if byte == '>':
- self.attrval = 'True'
- self.tagAttributes[self.attrname] = self.attrval
- self.gotTagStart(self.tagName, self.tagAttributes)
- return self.maybeBodyData()
- if byte == '\\':
- # I saw this in actual HTML once:
- # <font size=\"3\"><sup>SM</sup></font>
- return
- self._parseError("Invalid initial attribute value: %r; Attribute values must be quoted." % byte)
-
- attrname = ''
- attrval = ''
-
- def begin_beforeeq(self,byte):
- self._beforeeq_termtag = 0
-
- def do_beforeeq(self, byte):
- if byte == '=':
- return 'beforeattrval'
- elif byte.isspace():
- return
- elif self.beExtremelyLenient:
- if byte.isalnum() or byte in identChars:
- self.attrval = 'True'
- self.tagAttributes[self.attrname] = self.attrval
- return 'attrname'
- elif byte == '>':
- self.attrval = 'True'
- self.tagAttributes[self.attrname] = self.attrval
- self.gotTagStart(self.tagName, self.tagAttributes)
- if self._beforeeq_termtag:
- self.gotTagEnd(self.tagName)
- return 'bodydata'
- return self.maybeBodyData()
- elif byte == '/':
- self._beforeeq_termtag = 1
- return
- self._parseError("Invalid attribute")
-
- def begin_attrval(self, byte):
- self.quotetype = byte
- self.attrval = ''
-
- def do_attrval(self, byte):
- if byte == self.quotetype:
- return 'attrs'
- self.attrval += byte
-
- def end_attrval(self):
- self.tagAttributes[self.attrname] = self.attrval
- self.attrname = self.attrval = ''
-
- def begin_messyattr(self, byte):
- self.attrval = byte
-
- def do_messyattr(self, byte):
- if byte.isspace():
- return 'attrs'
- elif byte == '>':
- endTag = 0
- if self.attrval.endswith('/'):
- endTag = 1
- self.attrval = self.attrval[:-1]
- self.tagAttributes[self.attrname] = self.attrval
- self.gotTagStart(self.tagName, self.tagAttributes)
- if endTag:
- self.gotTagEnd(self.tagName)
- return 'bodydata'
- return self.maybeBodyData()
- else:
- self.attrval += byte
-
- def end_messyattr(self):
- if self.attrval:
- self.tagAttributes[self.attrname] = self.attrval
-
- def begin_afterslash(self, byte):
- self._after_slash_closed = 0
-
- def do_afterslash(self, byte):
- # this state is only after a self-terminating slash, e.g. <foo/>
- if self._after_slash_closed:
- self._parseError("Mal-formed")#XXX When does this happen??
- if byte != '>':
- if self.beExtremelyLenient:
- return
- else:
- self._parseError("No data allowed after '/'")
- self._after_slash_closed = 1
- self.gotTagStart(self.tagName, self.tagAttributes)
- self.gotTagEnd(self.tagName)
- # don't need maybeBodyData here because there better not be
- # any javascript code after a <script/>... we'll see :(
- return 'bodydata'
-
- def begin_bodydata(self, byte):
- if self._leadingBodyData:
- self.bodydata = self._leadingBodyData
- del self._leadingBodyData
- else:
- self.bodydata = ''
-
- def do_bodydata(self, byte):
- if byte == '<':
- return 'tagstart'
- if byte == '&':
- return 'entityref'
- self.bodydata += byte
-
- def end_bodydata(self):
- self.gotText(self.bodydata)
- self.bodydata = ''
-
- def do_waitforendscript(self, byte):
- if byte == '<':
- return 'waitscriptendtag'
- self.bodydata += byte
-
- def begin_waitscriptendtag(self, byte):
- self.temptagdata = ''
- self.tagName = ''
- self.endtag = 0
-
- def do_waitscriptendtag(self, byte):
- # 1 enforce / as first byte read
- # 2 enforce following bytes to be subset of "script" until
- # tagName == "script"
- # 2a when that happens, gotText(self.bodydata) and gotTagEnd(self.tagName)
- # 3 spaces can happen anywhere, they're ignored
- # e.g. < / script >
- # 4 anything else causes all data I've read to be moved to the
- # bodydata, and switch back to waitforendscript state
-
- # If it turns out this _isn't_ a </script>, we need to
- # remember all the data we've been through so we can append it
- # to bodydata
- self.temptagdata += byte
-
- # 1
- if byte == '/':
- self.endtag = True
- elif not self.endtag:
- self.bodydata += "<" + self.temptagdata
- return 'waitforendscript'
- # 2
- elif byte.isalnum() or byte in identChars:
- self.tagName += byte
- if not 'script'.startswith(self.tagName):
- self.bodydata += "<" + self.temptagdata
- return 'waitforendscript'
- elif self.tagName == 'script':
- self.gotText(self.bodydata)
- self.gotTagEnd(self.tagName)
- return 'waitforgt'
- # 3
- elif byte.isspace():
- return 'waitscriptendtag'
- # 4
- else:
- self.bodydata += "<" + self.temptagdata
- return 'waitforendscript'
-
-
- def begin_entityref(self, byte):
- self.erefbuf = ''
- self.erefextra = '' # extra bit for lenient mode
-
- def do_entityref(self, byte):
- if byte.isspace() or byte == "<":
- if self.beExtremelyLenient:
- # '&foo' probably was '&amp;foo'
- if self.erefbuf and self.erefbuf != "amp":
- self.erefextra = self.erefbuf
- self.erefbuf = "amp"
- if byte == "<":
- return "tagstart"
- else:
- self.erefextra += byte
- return 'spacebodydata'
- self._parseError("Bad entity reference")
- elif byte != ';':
- self.erefbuf += byte
- else:
- return 'bodydata'
-
- def end_entityref(self):
- self.gotEntityReference(self.erefbuf)
-
- # hacky support for space after & in entityref in beExtremelyLenient
- # state should only happen in that case
- def begin_spacebodydata(self, byte):
- self.bodydata = self.erefextra
- self.erefextra = None
- do_spacebodydata = do_bodydata
- end_spacebodydata = end_bodydata
-
- # Sorta SAX-ish API
-
- def gotTagStart(self, name, attributes):
- '''Encountered an opening tag.
-
- Default behaviour is to print.'''
- print 'begin', name, attributes
-
- def gotText(self, data):
- '''Encountered text
-
- Default behaviour is to print.'''
- print 'text:', repr(data)
-
- def gotEntityReference(self, entityRef):
- '''Encountered mnemonic entity reference
-
- Default behaviour is to print.'''
- print 'entityRef: &%s;' % entityRef
-
- def gotComment(self, comment):
- '''Encountered comment.
-
- Default behaviour is to ignore.'''
- pass
-
- def gotCData(self, cdata):
- '''Encountered CDATA
-
- Default behaviour is to call the gotText method'''
- self.gotText(cdata)
-
- def gotDoctype(self, doctype):
- """Encountered DOCTYPE
-
- This is really grotty: it basically just gives you everything between
- '<!DOCTYPE' and '>' as an argument.
- """
- print '!DOCTYPE', repr(doctype)
-
- def gotTagEnd(self, name):
- '''Encountered closing tag
-
- Default behaviour is to print.'''
- print 'end', name
-
-if __name__ == '__main__':
- from cStringIO import StringIO
- testDocument = '''
-
- <!DOCTYPE ignore all this shit, hah its malformed!!!!@$>
- <?xml version="suck it"?>
- <foo>
- &#65;
- <bar />
- <baz boz="buz">boz &zop;</baz>
- <![CDATA[ foo bar baz ]]>
- </foo>
- '''
- x = XMLParser()
- x.makeConnection(FileWrapper(StringIO()))
- # fn = "/home/glyph/Projects/Twisted/doc/howto/ipc10paper.html"
- fn = "/home/glyph/gruesome.xml"
- # testDocument = open(fn).read()
- x.dataReceived(testDocument)
« no previous file with comments | « third_party/twisted_8_1/twisted/web/static.py ('k') | third_party/twisted_8_1/twisted/web/tap.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698