OLD | NEW |
| (Empty) |
1 # Copyright (c) 2001-2004 Twisted Matrix Laboratories. | |
2 # See LICENSE for details. | |
3 | |
4 # | |
5 | |
6 from twisted.lore import tree, process | |
7 from twisted.web import domhelpers, microdom | |
8 from twisted.python import reflect | |
9 | |
10 import parser, urlparse, os.path | |
11 | |
12 # parser.suite in Python 2.3 raises SyntaxError, <2.3 raises parser.ParserError | |
13 parserErrors = (SyntaxError, parser.ParserError) | |
14 | |
15 class TagChecker: | |
16 | |
17 def check(self, dom, filename): | |
18 self.hadErrors = 0 | |
19 for method in reflect.prefixedMethods(self, 'check_'): | |
20 method(dom, filename) | |
21 if self.hadErrors: | |
22 raise process.ProcessingFailure("invalid format") | |
23 | |
24 def _reportError(self, filename, element, error): | |
25 hlint = element.hasAttribute('hlint') and element.getAttribute('hlint') | |
26 if hlint != 'off': | |
27 self.hadErrors = 1 | |
28 pos = getattr(element, '_markpos', None) or (0, 0) | |
29 print "%s:%s:%s: %s" % ((filename,)+pos+(error,)) | |
30 | |
31 | |
32 class DefaultTagChecker(TagChecker): | |
33 | |
34 def __init__(self, allowedTags, allowedClasses): | |
35 self.allowedTags = allowedTags | |
36 self.allowedClasses = allowedClasses | |
37 | |
38 def check_disallowedElements(self, dom, filename): | |
39 def m(node, self=self): | |
40 return not self.allowedTags(node.tagName) | |
41 for element in domhelpers.findElements(dom, m): | |
42 self._reportError(filename, element, | |
43 'unrecommended tag %s' % element.tagName) | |
44 | |
45 def check_disallowedClasses(self, dom, filename): | |
46 def matcher(element, self=self): | |
47 if not element.hasAttribute('class'): | |
48 return 0 | |
49 checker = self.allowedClasses.get(element.tagName, lambda x:0) | |
50 return not checker(element.getAttribute('class')) | |
51 for element in domhelpers.findElements(dom, matcher): | |
52 self._reportError(filename, element, | |
53 'unknown class %s' %element.getAttribute('class')) | |
54 | |
55 def check_quote(self, dom, filename): | |
56 def matcher(node): | |
57 return ('"' in getattr(node, 'data', '') and | |
58 not isinstance(node, microdom.Comment) and | |
59 not [1 for n in domhelpers.getParents(node)[1:-1] | |
60 if n.tagName in ('pre', 'code')]) | |
61 for node in domhelpers.findNodes(dom, matcher): | |
62 self._reportError(filename, node.parentNode, 'contains quote') | |
63 | |
64 def check_styleattr(self, dom, filename): | |
65 for node in domhelpers.findElementsWithAttribute(dom, 'style'): | |
66 self._reportError(filename, node, 'explicit style') | |
67 | |
68 def check_align(self, dom, filename): | |
69 for node in domhelpers.findElementsWithAttribute(dom, 'align'): | |
70 self._reportError(filename, node, 'explicit alignment') | |
71 | |
72 def check_style(self, dom, filename): | |
73 for node in domhelpers.findNodesNamed(dom, 'style'): | |
74 if domhelpers.getNodeText(node) != '': | |
75 self._reportError(filename, node, 'hand hacked style') | |
76 | |
77 def check_title(self, dom, filename): | |
78 doc = dom.documentElement | |
79 title = domhelpers.findNodesNamed(dom, 'title') | |
80 if len(title)!=1: | |
81 return self._reportError(filename, doc, 'not exactly one title') | |
82 h1 = domhelpers.findNodesNamed(dom, 'h1') | |
83 if len(h1)!=1: | |
84 return self._reportError(filename, doc, 'not exactly one h1') | |
85 if domhelpers.getNodeText(h1[0]) != domhelpers.getNodeText(title[0]): | |
86 self._reportError(filename, h1[0], 'title and h1 text differ') | |
87 | |
88 def check_80_columns(self, dom, filename): | |
89 for node in domhelpers.findNodesNamed(dom, 'pre'): | |
90 # the ps/pdf output is in a font that cuts off at 80 characters, | |
91 # so this is enforced to make sure the interesting parts (which | |
92 # are likely to be on the right-hand edge) stay on the printed | |
93 # page. | |
94 for line in domhelpers.gatherTextNodes(node, 1).split('\n'): | |
95 if len(line.rstrip()) > 80: | |
96 self._reportError(filename, node, | |
97 'text wider than 80 columns in pre') | |
98 for node in domhelpers.findNodesNamed(dom, 'a'): | |
99 if node.getAttribute('class', '').endswith('listing'): | |
100 try: | |
101 fn = os.path.dirname(filename) | |
102 fn = os.path.join(fn, node.getAttribute('href')) | |
103 lines = open(fn,'r').readlines() | |
104 except: | |
105 self._reportError(filename, node, | |
106 'bad listing href: %r' % | |
107 node.getAttribute('href')) | |
108 continue | |
109 | |
110 for line in lines: | |
111 if len(line.rstrip()) > 80: | |
112 self._reportError(filename, node, | |
113 'listing wider than 80 columns') | |
114 | |
115 def check_pre_py_listing(self, dom, filename): | |
116 for node in domhelpers.findNodesNamed(dom, 'pre'): | |
117 if node.getAttribute('class') == 'python': | |
118 try: | |
119 text = domhelpers.getNodeText(node) | |
120 # Fix < and > | |
121 text = text.replace('>', '>').replace('<', '<') | |
122 # Strip blank lines | |
123 lines = filter(None,[l.rstrip() for l in text.split('\n')]) | |
124 # Strip leading space | |
125 while not [1 for line in lines if line[:1] not in ('',' ')]: | |
126 lines = [line[1:] for line in lines] | |
127 text = '\n'.join(lines) + '\n' | |
128 try: | |
129 parser.suite(text) | |
130 except parserErrors, e: | |
131 # Pretend the "..." idiom is syntactically valid | |
132 text = text.replace("...","'...'") | |
133 parser.suite(text) | |
134 except parserErrors, e: | |
135 self._reportError(filename, node, | |
136 'invalid python code:' + str(e)) | |
137 | |
138 def check_anchor_in_heading(self, dom, filename): | |
139 headingNames = ['h%d' % n for n in range(1,7)] | |
140 for hname in headingNames: | |
141 for node in domhelpers.findNodesNamed(dom, hname): | |
142 if domhelpers.findNodesNamed(node, 'a'): | |
143 self._reportError(filename, node, 'anchor in heading') | |
144 | |
145 def check_texturl_matches_href(self, dom, filename): | |
146 for node in domhelpers.findNodesNamed(dom, 'a'): | |
147 if not node.hasAttribute('href'): | |
148 continue | |
149 text = domhelpers.getNodeText(node) | |
150 proto = urlparse.urlparse(text)[0] | |
151 if proto and ' ' not in text: | |
152 if text != node.getAttribute('href',''): | |
153 self._reportError(filename, node, | |
154 'link text does not match href') | |
155 | |
156 def check_a_py_listing(self, dom, filename): | |
157 for node in domhelpers.findNodesNamed(dom, 'a'): | |
158 if node.getAttribute('class') == 'py-listing': | |
159 fn = os.path.join(os.path.dirname(filename), | |
160 node.getAttribute('href')) | |
161 lines = open(fn).readlines() | |
162 lines = lines[int(node.getAttribute('skipLines', 0)):] | |
163 for line, num in zip(lines, range(len(lines))): | |
164 if line.count('59 Temple Place, Suite 330, Boston'): | |
165 self._reportError(filename, node, | |
166 'included source file %s has licence boilerplate.' | |
167 ' Use skipLines="%d".' | |
168 % (fn, int(node.getAttribute('skipLines',0))+num+1)) | |
169 | |
170 def check_lists(self, dom, filename): | |
171 for node in (domhelpers.findNodesNamed(dom, 'ul')+ | |
172 domhelpers.findNodesNamed(dom, 'ol')): | |
173 if not node.childNodes: | |
174 self._reportError(filename, node, 'empty list') | |
175 for child in node.childNodes: | |
176 if child.nodeName != 'li': | |
177 self._reportError(filename, node, | |
178 'only list items allowed in lists') | |
179 | |
180 | |
181 def list2dict(l): | |
182 d = {} | |
183 for el in l: | |
184 d[el] = None | |
185 return d | |
186 | |
187 classes = list2dict(['shell', 'API', 'python', 'py-prototype', 'py-filename', | |
188 'py-src-string', 'py-signature', 'py-src-parameter', | |
189 'py-src-identifier', 'py-src-keyword']) | |
190 | |
191 tags = list2dict(["html", "title", "head", "body", "h1", "h2", "h3", "ol", "ul", | |
192 "dl", "li", "dt", "dd", "p", "code", "img", "blockquote", "a", | |
193 "cite", "div", "span", "strong", "em", "pre", "q", "table", | |
194 "tr", "td", "th", "style", "sub", "sup", "link"]) | |
195 | |
196 span = list2dict(['footnote', 'manhole-output', 'index']) | |
197 | |
198 div = list2dict(['note', 'boxed', 'doit']) | |
199 | |
200 a = list2dict(['listing', 'py-listing', 'html-listing', 'absolute']) | |
201 | |
202 pre = list2dict(['python', 'shell', 'python-interpreter', 'elisp']) | |
203 | |
204 allowed = {'code': classes.has_key, 'span': span.has_key, 'div': div.has_key, | |
205 'a': a.has_key, 'pre': pre.has_key, 'ul': lambda x: x=='toc', | |
206 'ol': lambda x: x=='toc', 'li': lambda x: x=='ignoretoc'} | |
207 | |
208 def getDefaultChecker(): | |
209 return DefaultTagChecker(tags.has_key, allowed) | |
210 | |
211 def doFile(file, checker): | |
212 dom = tree.parseFileAndReport(file) | |
213 if dom: | |
214 checker.check(dom, file) | |
OLD | NEW |