OLD | NEW |
| (Empty) |
1 """Helper functions for XML. | |
2 | |
3 This module has misc. helper functions for working with XML DOM nodes.""" | |
4 | |
5 import re | |
6 from compat import * | |
7 | |
8 import os | |
9 if os.name != "java": | |
10 from xml.dom import minidom | |
11 from xml.sax import saxutils | |
12 | |
13 def parseDocument(s): | |
14 return minidom.parseString(s) | |
15 else: | |
16 from javax.xml.parsers import * | |
17 import java | |
18 | |
19 builder = DocumentBuilderFactory.newInstance().newDocumentBuilder() | |
20 | |
21 def parseDocument(s): | |
22 stream = java.io.ByteArrayInputStream(java.lang.String(s).getBytes()) | |
23 return builder.parse(stream) | |
24 | |
25 def parseAndStripWhitespace(s): | |
26 try: | |
27 element = parseDocument(s).documentElement | |
28 except BaseException, e: | |
29 raise SyntaxError(str(e)) | |
30 stripWhitespace(element) | |
31 return element | |
32 | |
33 #Goes through a DOM tree and removes whitespace besides child elements, | |
34 #as long as this whitespace is correctly tab-ified | |
35 def stripWhitespace(element, tab=0): | |
36 element.normalize() | |
37 | |
38 lastSpacer = "\n" + ("\t"*tab) | |
39 spacer = lastSpacer + "\t" | |
40 | |
41 #Zero children aren't allowed (i.e. <empty/>) | |
42 #This makes writing output simpler, and matches Canonical XML | |
43 if element.childNodes.length==0: #DON'T DO len(element.childNodes) - doesn't
work in Jython | |
44 raise SyntaxError("Empty XML elements not allowed") | |
45 | |
46 #If there's a single child, it must be text context | |
47 if element.childNodes.length==1: | |
48 if element.firstChild.nodeType == element.firstChild.TEXT_NODE: | |
49 #If it's an empty element, remove | |
50 if element.firstChild.data == lastSpacer: | |
51 element.removeChild(element.firstChild) | |
52 return | |
53 #If not text content, give an error | |
54 elif element.firstChild.nodeType == element.firstChild.ELEMENT_NODE: | |
55 raise SyntaxError("Bad whitespace under '%s'" % element.tagName) | |
56 else: | |
57 raise SyntaxError("Unexpected node type in XML document") | |
58 | |
59 #Otherwise there's multiple child element | |
60 child = element.firstChild | |
61 while child: | |
62 if child.nodeType == child.ELEMENT_NODE: | |
63 stripWhitespace(child, tab+1) | |
64 child = child.nextSibling | |
65 elif child.nodeType == child.TEXT_NODE: | |
66 if child == element.lastChild: | |
67 if child.data != lastSpacer: | |
68 raise SyntaxError("Bad whitespace under '%s'" % element.tagN
ame) | |
69 elif child.data != spacer: | |
70 raise SyntaxError("Bad whitespace under '%s'" % element.tagName) | |
71 next = child.nextSibling | |
72 element.removeChild(child) | |
73 child = next | |
74 else: | |
75 raise SyntaxError("Unexpected node type in XML document") | |
76 | |
77 | |
78 def checkName(element, name): | |
79 if element.nodeType != element.ELEMENT_NODE: | |
80 raise SyntaxError("Missing element: '%s'" % name) | |
81 | |
82 if name == None: | |
83 return | |
84 | |
85 if element.tagName != name: | |
86 raise SyntaxError("Wrong element name: should be '%s', is '%s'" % (name,
element.tagName)) | |
87 | |
88 def getChild(element, index, name=None): | |
89 if element.nodeType != element.ELEMENT_NODE: | |
90 raise SyntaxError("Wrong node type in getChild()") | |
91 | |
92 child = element.childNodes.item(index) | |
93 if child == None: | |
94 raise SyntaxError("Missing child: '%s'" % name) | |
95 checkName(child, name) | |
96 return child | |
97 | |
98 def getChildIter(element, index): | |
99 class ChildIter: | |
100 def __init__(self, element, index): | |
101 self.element = element | |
102 self.index = index | |
103 | |
104 def next(self): | |
105 if self.index < len(self.element.childNodes): | |
106 retVal = self.element.childNodes.item(self.index) | |
107 self.index += 1 | |
108 else: | |
109 retVal = None | |
110 return retVal | |
111 | |
112 def checkEnd(self): | |
113 if self.index != len(self.element.childNodes): | |
114 raise SyntaxError("Too many elements under: '%s'" % self.element
.tagName) | |
115 return ChildIter(element, index) | |
116 | |
117 def getChildOrNone(element, index): | |
118 if element.nodeType != element.ELEMENT_NODE: | |
119 raise SyntaxError("Wrong node type in getChild()") | |
120 child = element.childNodes.item(index) | |
121 return child | |
122 | |
123 def getLastChild(element, index, name=None): | |
124 if element.nodeType != element.ELEMENT_NODE: | |
125 raise SyntaxError("Wrong node type in getLastChild()") | |
126 | |
127 child = element.childNodes.item(index) | |
128 if child == None: | |
129 raise SyntaxError("Missing child: '%s'" % name) | |
130 if child != element.lastChild: | |
131 raise SyntaxError("Too many elements under: '%s'" % element.tagName) | |
132 checkName(child, name) | |
133 return child | |
134 | |
135 #Regular expressions for syntax-checking attribute and element content | |
136 nsRegEx = "http://trevp.net/cryptoID\Z" | |
137 cryptoIDRegEx = "([a-km-z3-9]{5}\.){3}[a-km-z3-9]{5}\Z" | |
138 urlRegEx = "http(s)?://.{1,100}\Z" | |
139 sha1Base64RegEx = "[A-Za-z0-9+/]{27}=\Z" | |
140 base64RegEx = "[A-Za-z0-9+/]+={0,4}\Z" | |
141 certsListRegEx = "(0)?(1)?(2)?(3)?(4)?(5)?(6)?(7)?(8)?(9)?\Z" | |
142 keyRegEx = "[A-Z]\Z" | |
143 keysListRegEx = "(A)?(B)?(C)?(D)?(E)?(F)?(G)?(H)?(I)?(J)?(K)?(L)?(M)?(N)?(O)?(P)
?(Q)?(R)?(S)?(T)?(U)?(V)?(W)?(X)?(Y)?(Z)?\Z" | |
144 dateTimeRegEx = "\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\dZ\Z" | |
145 shortStringRegEx = ".{1,100}\Z" | |
146 exprRegEx = "[a-zA-Z0-9 ,()]{1,200}\Z" | |
147 notAfterDeltaRegEx = "0|([1-9][0-9]{0,8})\Z" #A number from 0 to (1 billion)-1 | |
148 booleanRegEx = "(true)|(false)" | |
149 | |
150 def getReqAttribute(element, attrName, regEx=""): | |
151 if element.nodeType != element.ELEMENT_NODE: | |
152 raise SyntaxError("Wrong node type in getReqAttribute()") | |
153 | |
154 value = element.getAttribute(attrName) | |
155 if not value: | |
156 raise SyntaxError("Missing Attribute: " + attrName) | |
157 if not re.match(regEx, value): | |
158 raise SyntaxError("Bad Attribute Value for '%s': '%s' " % (attrName, val
ue)) | |
159 element.removeAttribute(attrName) | |
160 return str(value) #de-unicode it; this is needed for bsddb, for example | |
161 | |
162 def getAttribute(element, attrName, regEx=""): | |
163 if element.nodeType != element.ELEMENT_NODE: | |
164 raise SyntaxError("Wrong node type in getAttribute()") | |
165 | |
166 value = element.getAttribute(attrName) | |
167 if value: | |
168 if not re.match(regEx, value): | |
169 raise SyntaxError("Bad Attribute Value for '%s': '%s' " % (attrName,
value)) | |
170 element.removeAttribute(attrName) | |
171 return str(value) #de-unicode it; this is needed for bsddb, for example | |
172 | |
173 def checkNoMoreAttributes(element): | |
174 if element.nodeType != element.ELEMENT_NODE: | |
175 raise SyntaxError("Wrong node type in checkNoMoreAttributes()") | |
176 | |
177 if element.attributes.length!=0: | |
178 raise SyntaxError("Extra attributes on '%s'" % element.tagName) | |
179 | |
180 def getText(element, regEx=""): | |
181 textNode = element.firstChild | |
182 if textNode == None: | |
183 raise SyntaxError("Empty element '%s'" % element.tagName) | |
184 if textNode.nodeType != textNode.TEXT_NODE: | |
185 raise SyntaxError("Non-text node: '%s'" % element.tagName) | |
186 if not re.match(regEx, textNode.data): | |
187 raise SyntaxError("Bad Text Value for '%s': '%s' " % (element.tagName, t
extNode.data)) | |
188 return str(textNode.data) #de-unicode it; this is needed for bsddb, for exam
ple | |
189 | |
190 #Function for adding tabs to a string | |
191 def indent(s, steps, ch="\t"): | |
192 tabs = ch*steps | |
193 if s[-1] != "\n": | |
194 s = tabs + s.replace("\n", "\n"+tabs) | |
195 else: | |
196 s = tabs + s.replace("\n", "\n"+tabs) | |
197 s = s[ : -len(tabs)] | |
198 return s | |
199 | |
200 def escape(s): | |
201 return saxutils.escape(s) | |
OLD | NEW |