| OLD | NEW |
| (Empty) |
| 1 # Copyright (c) 2001-2004 Twisted Matrix Laboratories. | |
| 2 # See LICENSE for details. | |
| 3 | |
| 4 # | |
| 5 | |
| 6 from twisted.lore import tree, process | |
| 7 from twisted.web import domhelpers, microdom | |
| 8 from twisted.python import reflect | |
| 9 | |
| 10 import parser, urlparse, os.path | |
| 11 | |
| 12 # parser.suite in Python 2.3 raises SyntaxError, <2.3 raises parser.ParserError | |
| 13 parserErrors = (SyntaxError, parser.ParserError) | |
| 14 | |
| 15 class TagChecker: | |
| 16 | |
| 17 def check(self, dom, filename): | |
| 18 self.hadErrors = 0 | |
| 19 for method in reflect.prefixedMethods(self, 'check_'): | |
| 20 method(dom, filename) | |
| 21 if self.hadErrors: | |
| 22 raise process.ProcessingFailure("invalid format") | |
| 23 | |
| 24 def _reportError(self, filename, element, error): | |
| 25 hlint = element.hasAttribute('hlint') and element.getAttribute('hlint') | |
| 26 if hlint != 'off': | |
| 27 self.hadErrors = 1 | |
| 28 pos = getattr(element, '_markpos', None) or (0, 0) | |
| 29 print "%s:%s:%s: %s" % ((filename,)+pos+(error,)) | |
| 30 | |
| 31 | |
| 32 class DefaultTagChecker(TagChecker): | |
| 33 | |
| 34 def __init__(self, allowedTags, allowedClasses): | |
| 35 self.allowedTags = allowedTags | |
| 36 self.allowedClasses = allowedClasses | |
| 37 | |
| 38 def check_disallowedElements(self, dom, filename): | |
| 39 def m(node, self=self): | |
| 40 return not self.allowedTags(node.tagName) | |
| 41 for element in domhelpers.findElements(dom, m): | |
| 42 self._reportError(filename, element, | |
| 43 'unrecommended tag %s' % element.tagName) | |
| 44 | |
| 45 def check_disallowedClasses(self, dom, filename): | |
| 46 def matcher(element, self=self): | |
| 47 if not element.hasAttribute('class'): | |
| 48 return 0 | |
| 49 checker = self.allowedClasses.get(element.tagName, lambda x:0) | |
| 50 return not checker(element.getAttribute('class')) | |
| 51 for element in domhelpers.findElements(dom, matcher): | |
| 52 self._reportError(filename, element, | |
| 53 'unknown class %s' %element.getAttribute('class')) | |
| 54 | |
| 55 def check_quote(self, dom, filename): | |
| 56 def matcher(node): | |
| 57 return ('"' in getattr(node, 'data', '') and | |
| 58 not isinstance(node, microdom.Comment) and | |
| 59 not [1 for n in domhelpers.getParents(node)[1:-1] | |
| 60 if n.tagName in ('pre', 'code')]) | |
| 61 for node in domhelpers.findNodes(dom, matcher): | |
| 62 self._reportError(filename, node.parentNode, 'contains quote') | |
| 63 | |
| 64 def check_styleattr(self, dom, filename): | |
| 65 for node in domhelpers.findElementsWithAttribute(dom, 'style'): | |
| 66 self._reportError(filename, node, 'explicit style') | |
| 67 | |
| 68 def check_align(self, dom, filename): | |
| 69 for node in domhelpers.findElementsWithAttribute(dom, 'align'): | |
| 70 self._reportError(filename, node, 'explicit alignment') | |
| 71 | |
| 72 def check_style(self, dom, filename): | |
| 73 for node in domhelpers.findNodesNamed(dom, 'style'): | |
| 74 if domhelpers.getNodeText(node) != '': | |
| 75 self._reportError(filename, node, 'hand hacked style') | |
| 76 | |
| 77 def check_title(self, dom, filename): | |
| 78 doc = dom.documentElement | |
| 79 title = domhelpers.findNodesNamed(dom, 'title') | |
| 80 if len(title)!=1: | |
| 81 return self._reportError(filename, doc, 'not exactly one title') | |
| 82 h1 = domhelpers.findNodesNamed(dom, 'h1') | |
| 83 if len(h1)!=1: | |
| 84 return self._reportError(filename, doc, 'not exactly one h1') | |
| 85 if domhelpers.getNodeText(h1[0]) != domhelpers.getNodeText(title[0]): | |
| 86 self._reportError(filename, h1[0], 'title and h1 text differ') | |
| 87 | |
| 88 def check_80_columns(self, dom, filename): | |
| 89 for node in domhelpers.findNodesNamed(dom, 'pre'): | |
| 90 # the ps/pdf output is in a font that cuts off at 80 characters, | |
| 91 # so this is enforced to make sure the interesting parts (which | |
| 92 # are likely to be on the right-hand edge) stay on the printed | |
| 93 # page. | |
| 94 for line in domhelpers.gatherTextNodes(node, 1).split('\n'): | |
| 95 if len(line.rstrip()) > 80: | |
| 96 self._reportError(filename, node, | |
| 97 'text wider than 80 columns in pre') | |
| 98 for node in domhelpers.findNodesNamed(dom, 'a'): | |
| 99 if node.getAttribute('class', '').endswith('listing'): | |
| 100 try: | |
| 101 fn = os.path.dirname(filename) | |
| 102 fn = os.path.join(fn, node.getAttribute('href')) | |
| 103 lines = open(fn,'r').readlines() | |
| 104 except: | |
| 105 self._reportError(filename, node, | |
| 106 'bad listing href: %r' % | |
| 107 node.getAttribute('href')) | |
| 108 continue | |
| 109 | |
| 110 for line in lines: | |
| 111 if len(line.rstrip()) > 80: | |
| 112 self._reportError(filename, node, | |
| 113 'listing wider than 80 columns') | |
| 114 | |
| 115 def check_pre_py_listing(self, dom, filename): | |
| 116 for node in domhelpers.findNodesNamed(dom, 'pre'): | |
| 117 if node.getAttribute('class') == 'python': | |
| 118 try: | |
| 119 text = domhelpers.getNodeText(node) | |
| 120 # Fix < and > | |
| 121 text = text.replace('>', '>').replace('<', '<') | |
| 122 # Strip blank lines | |
| 123 lines = filter(None,[l.rstrip() for l in text.split('\n')]) | |
| 124 # Strip leading space | |
| 125 while not [1 for line in lines if line[:1] not in ('',' ')]: | |
| 126 lines = [line[1:] for line in lines] | |
| 127 text = '\n'.join(lines) + '\n' | |
| 128 try: | |
| 129 parser.suite(text) | |
| 130 except parserErrors, e: | |
| 131 # Pretend the "..." idiom is syntactically valid | |
| 132 text = text.replace("...","'...'") | |
| 133 parser.suite(text) | |
| 134 except parserErrors, e: | |
| 135 self._reportError(filename, node, | |
| 136 'invalid python code:' + str(e)) | |
| 137 | |
| 138 def check_anchor_in_heading(self, dom, filename): | |
| 139 headingNames = ['h%d' % n for n in range(1,7)] | |
| 140 for hname in headingNames: | |
| 141 for node in domhelpers.findNodesNamed(dom, hname): | |
| 142 if domhelpers.findNodesNamed(node, 'a'): | |
| 143 self._reportError(filename, node, 'anchor in heading') | |
| 144 | |
| 145 def check_texturl_matches_href(self, dom, filename): | |
| 146 for node in domhelpers.findNodesNamed(dom, 'a'): | |
| 147 if not node.hasAttribute('href'): | |
| 148 continue | |
| 149 text = domhelpers.getNodeText(node) | |
| 150 proto = urlparse.urlparse(text)[0] | |
| 151 if proto and ' ' not in text: | |
| 152 if text != node.getAttribute('href',''): | |
| 153 self._reportError(filename, node, | |
| 154 'link text does not match href') | |
| 155 | |
| 156 def check_a_py_listing(self, dom, filename): | |
| 157 for node in domhelpers.findNodesNamed(dom, 'a'): | |
| 158 if node.getAttribute('class') == 'py-listing': | |
| 159 fn = os.path.join(os.path.dirname(filename), | |
| 160 node.getAttribute('href')) | |
| 161 lines = open(fn).readlines() | |
| 162 lines = lines[int(node.getAttribute('skipLines', 0)):] | |
| 163 for line, num in zip(lines, range(len(lines))): | |
| 164 if line.count('59 Temple Place, Suite 330, Boston'): | |
| 165 self._reportError(filename, node, | |
| 166 'included source file %s has licence boilerplate.' | |
| 167 ' Use skipLines="%d".' | |
| 168 % (fn, int(node.getAttribute('skipLines',0))+num+1)) | |
| 169 | |
| 170 def check_lists(self, dom, filename): | |
| 171 for node in (domhelpers.findNodesNamed(dom, 'ul')+ | |
| 172 domhelpers.findNodesNamed(dom, 'ol')): | |
| 173 if not node.childNodes: | |
| 174 self._reportError(filename, node, 'empty list') | |
| 175 for child in node.childNodes: | |
| 176 if child.nodeName != 'li': | |
| 177 self._reportError(filename, node, | |
| 178 'only list items allowed in lists') | |
| 179 | |
| 180 | |
| 181 def list2dict(l): | |
| 182 d = {} | |
| 183 for el in l: | |
| 184 d[el] = None | |
| 185 return d | |
| 186 | |
| 187 classes = list2dict(['shell', 'API', 'python', 'py-prototype', 'py-filename', | |
| 188 'py-src-string', 'py-signature', 'py-src-parameter', | |
| 189 'py-src-identifier', 'py-src-keyword']) | |
| 190 | |
| 191 tags = list2dict(["html", "title", "head", "body", "h1", "h2", "h3", "ol", "ul", | |
| 192 "dl", "li", "dt", "dd", "p", "code", "img", "blockquote", "a", | |
| 193 "cite", "div", "span", "strong", "em", "pre", "q", "table", | |
| 194 "tr", "td", "th", "style", "sub", "sup", "link"]) | |
| 195 | |
| 196 span = list2dict(['footnote', 'manhole-output', 'index']) | |
| 197 | |
| 198 div = list2dict(['note', 'boxed', 'doit']) | |
| 199 | |
| 200 a = list2dict(['listing', 'py-listing', 'html-listing', 'absolute']) | |
| 201 | |
| 202 pre = list2dict(['python', 'shell', 'python-interpreter', 'elisp']) | |
| 203 | |
| 204 allowed = {'code': classes.has_key, 'span': span.has_key, 'div': div.has_key, | |
| 205 'a': a.has_key, 'pre': pre.has_key, 'ul': lambda x: x=='toc', | |
| 206 'ol': lambda x: x=='toc', 'li': lambda x: x=='ignoretoc'} | |
| 207 | |
| 208 def getDefaultChecker(): | |
| 209 return DefaultTagChecker(tags.has_key, allowed) | |
| 210 | |
| 211 def doFile(file, checker): | |
| 212 dom = tree.parseFileAndReport(file) | |
| 213 if dom: | |
| 214 checker.check(dom, file) | |
| OLD | NEW |