| OLD | NEW |
| (Empty) |
| 1 """Helper functions for XML. | |
| 2 | |
| 3 This module has misc. helper functions for working with XML DOM nodes.""" | |
| 4 | |
| 5 import re | |
| 6 from compat import * | |
| 7 | |
| 8 import os | |
| 9 if os.name != "java": | |
| 10 from xml.dom import minidom | |
| 11 from xml.sax import saxutils | |
| 12 | |
| 13 def parseDocument(s): | |
| 14 return minidom.parseString(s) | |
| 15 else: | |
| 16 from javax.xml.parsers import * | |
| 17 import java | |
| 18 | |
| 19 builder = DocumentBuilderFactory.newInstance().newDocumentBuilder() | |
| 20 | |
| 21 def parseDocument(s): | |
| 22 stream = java.io.ByteArrayInputStream(java.lang.String(s).getBytes()) | |
| 23 return builder.parse(stream) | |
| 24 | |
| 25 def parseAndStripWhitespace(s): | |
| 26 try: | |
| 27 element = parseDocument(s).documentElement | |
| 28 except BaseException, e: | |
| 29 raise SyntaxError(str(e)) | |
| 30 stripWhitespace(element) | |
| 31 return element | |
| 32 | |
| 33 #Goes through a DOM tree and removes whitespace besides child elements, | |
| 34 #as long as this whitespace is correctly tab-ified | |
| 35 def stripWhitespace(element, tab=0): | |
| 36 element.normalize() | |
| 37 | |
| 38 lastSpacer = "\n" + ("\t"*tab) | |
| 39 spacer = lastSpacer + "\t" | |
| 40 | |
| 41 #Zero children aren't allowed (i.e. <empty/>) | |
| 42 #This makes writing output simpler, and matches Canonical XML | |
| 43 if element.childNodes.length==0: #DON'T DO len(element.childNodes) - doesn't
work in Jython | |
| 44 raise SyntaxError("Empty XML elements not allowed") | |
| 45 | |
| 46 #If there's a single child, it must be text context | |
| 47 if element.childNodes.length==1: | |
| 48 if element.firstChild.nodeType == element.firstChild.TEXT_NODE: | |
| 49 #If it's an empty element, remove | |
| 50 if element.firstChild.data == lastSpacer: | |
| 51 element.removeChild(element.firstChild) | |
| 52 return | |
| 53 #If not text content, give an error | |
| 54 elif element.firstChild.nodeType == element.firstChild.ELEMENT_NODE: | |
| 55 raise SyntaxError("Bad whitespace under '%s'" % element.tagName) | |
| 56 else: | |
| 57 raise SyntaxError("Unexpected node type in XML document") | |
| 58 | |
| 59 #Otherwise there's multiple child element | |
| 60 child = element.firstChild | |
| 61 while child: | |
| 62 if child.nodeType == child.ELEMENT_NODE: | |
| 63 stripWhitespace(child, tab+1) | |
| 64 child = child.nextSibling | |
| 65 elif child.nodeType == child.TEXT_NODE: | |
| 66 if child == element.lastChild: | |
| 67 if child.data != lastSpacer: | |
| 68 raise SyntaxError("Bad whitespace under '%s'" % element.tagN
ame) | |
| 69 elif child.data != spacer: | |
| 70 raise SyntaxError("Bad whitespace under '%s'" % element.tagName) | |
| 71 next = child.nextSibling | |
| 72 element.removeChild(child) | |
| 73 child = next | |
| 74 else: | |
| 75 raise SyntaxError("Unexpected node type in XML document") | |
| 76 | |
| 77 | |
| 78 def checkName(element, name): | |
| 79 if element.nodeType != element.ELEMENT_NODE: | |
| 80 raise SyntaxError("Missing element: '%s'" % name) | |
| 81 | |
| 82 if name == None: | |
| 83 return | |
| 84 | |
| 85 if element.tagName != name: | |
| 86 raise SyntaxError("Wrong element name: should be '%s', is '%s'" % (name,
element.tagName)) | |
| 87 | |
| 88 def getChild(element, index, name=None): | |
| 89 if element.nodeType != element.ELEMENT_NODE: | |
| 90 raise SyntaxError("Wrong node type in getChild()") | |
| 91 | |
| 92 child = element.childNodes.item(index) | |
| 93 if child == None: | |
| 94 raise SyntaxError("Missing child: '%s'" % name) | |
| 95 checkName(child, name) | |
| 96 return child | |
| 97 | |
| 98 def getChildIter(element, index): | |
| 99 class ChildIter: | |
| 100 def __init__(self, element, index): | |
| 101 self.element = element | |
| 102 self.index = index | |
| 103 | |
| 104 def next(self): | |
| 105 if self.index < len(self.element.childNodes): | |
| 106 retVal = self.element.childNodes.item(self.index) | |
| 107 self.index += 1 | |
| 108 else: | |
| 109 retVal = None | |
| 110 return retVal | |
| 111 | |
| 112 def checkEnd(self): | |
| 113 if self.index != len(self.element.childNodes): | |
| 114 raise SyntaxError("Too many elements under: '%s'" % self.element
.tagName) | |
| 115 return ChildIter(element, index) | |
| 116 | |
| 117 def getChildOrNone(element, index): | |
| 118 if element.nodeType != element.ELEMENT_NODE: | |
| 119 raise SyntaxError("Wrong node type in getChild()") | |
| 120 child = element.childNodes.item(index) | |
| 121 return child | |
| 122 | |
| 123 def getLastChild(element, index, name=None): | |
| 124 if element.nodeType != element.ELEMENT_NODE: | |
| 125 raise SyntaxError("Wrong node type in getLastChild()") | |
| 126 | |
| 127 child = element.childNodes.item(index) | |
| 128 if child == None: | |
| 129 raise SyntaxError("Missing child: '%s'" % name) | |
| 130 if child != element.lastChild: | |
| 131 raise SyntaxError("Too many elements under: '%s'" % element.tagName) | |
| 132 checkName(child, name) | |
| 133 return child | |
| 134 | |
| 135 #Regular expressions for syntax-checking attribute and element content | |
| 136 nsRegEx = "http://trevp.net/cryptoID\Z" | |
| 137 cryptoIDRegEx = "([a-km-z3-9]{5}\.){3}[a-km-z3-9]{5}\Z" | |
| 138 urlRegEx = "http(s)?://.{1,100}\Z" | |
| 139 sha1Base64RegEx = "[A-Za-z0-9+/]{27}=\Z" | |
| 140 base64RegEx = "[A-Za-z0-9+/]+={0,4}\Z" | |
| 141 certsListRegEx = "(0)?(1)?(2)?(3)?(4)?(5)?(6)?(7)?(8)?(9)?\Z" | |
| 142 keyRegEx = "[A-Z]\Z" | |
| 143 keysListRegEx = "(A)?(B)?(C)?(D)?(E)?(F)?(G)?(H)?(I)?(J)?(K)?(L)?(M)?(N)?(O)?(P)
?(Q)?(R)?(S)?(T)?(U)?(V)?(W)?(X)?(Y)?(Z)?\Z" | |
| 144 dateTimeRegEx = "\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\dZ\Z" | |
| 145 shortStringRegEx = ".{1,100}\Z" | |
| 146 exprRegEx = "[a-zA-Z0-9 ,()]{1,200}\Z" | |
| 147 notAfterDeltaRegEx = "0|([1-9][0-9]{0,8})\Z" #A number from 0 to (1 billion)-1 | |
| 148 booleanRegEx = "(true)|(false)" | |
| 149 | |
| 150 def getReqAttribute(element, attrName, regEx=""): | |
| 151 if element.nodeType != element.ELEMENT_NODE: | |
| 152 raise SyntaxError("Wrong node type in getReqAttribute()") | |
| 153 | |
| 154 value = element.getAttribute(attrName) | |
| 155 if not value: | |
| 156 raise SyntaxError("Missing Attribute: " + attrName) | |
| 157 if not re.match(regEx, value): | |
| 158 raise SyntaxError("Bad Attribute Value for '%s': '%s' " % (attrName, val
ue)) | |
| 159 element.removeAttribute(attrName) | |
| 160 return str(value) #de-unicode it; this is needed for bsddb, for example | |
| 161 | |
| 162 def getAttribute(element, attrName, regEx=""): | |
| 163 if element.nodeType != element.ELEMENT_NODE: | |
| 164 raise SyntaxError("Wrong node type in getAttribute()") | |
| 165 | |
| 166 value = element.getAttribute(attrName) | |
| 167 if value: | |
| 168 if not re.match(regEx, value): | |
| 169 raise SyntaxError("Bad Attribute Value for '%s': '%s' " % (attrName,
value)) | |
| 170 element.removeAttribute(attrName) | |
| 171 return str(value) #de-unicode it; this is needed for bsddb, for example | |
| 172 | |
| 173 def checkNoMoreAttributes(element): | |
| 174 if element.nodeType != element.ELEMENT_NODE: | |
| 175 raise SyntaxError("Wrong node type in checkNoMoreAttributes()") | |
| 176 | |
| 177 if element.attributes.length!=0: | |
| 178 raise SyntaxError("Extra attributes on '%s'" % element.tagName) | |
| 179 | |
| 180 def getText(element, regEx=""): | |
| 181 textNode = element.firstChild | |
| 182 if textNode == None: | |
| 183 raise SyntaxError("Empty element '%s'" % element.tagName) | |
| 184 if textNode.nodeType != textNode.TEXT_NODE: | |
| 185 raise SyntaxError("Non-text node: '%s'" % element.tagName) | |
| 186 if not re.match(regEx, textNode.data): | |
| 187 raise SyntaxError("Bad Text Value for '%s': '%s' " % (element.tagName, t
extNode.data)) | |
| 188 return str(textNode.data) #de-unicode it; this is needed for bsddb, for exam
ple | |
| 189 | |
| 190 #Function for adding tabs to a string | |
| 191 def indent(s, steps, ch="\t"): | |
| 192 tabs = ch*steps | |
| 193 if s[-1] != "\n": | |
| 194 s = tabs + s.replace("\n", "\n"+tabs) | |
| 195 else: | |
| 196 s = tabs + s.replace("\n", "\n"+tabs) | |
| 197 s = s[ : -len(tabs)] | |
| 198 return s | |
| 199 | |
| 200 def escape(s): | |
| 201 return saxutils.escape(s) | |
| OLD | NEW |