Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(316)

Side by Side Diff: sky/tools/webkitpy/thirdparty/coverage/parser.py

Issue 946753002: Delete a bunch of dead python code. (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 """Code parsing for Coverage."""
2
3 import opcode, re, sys, token, tokenize
4
5 from coverage.backward import set, sorted, StringIO # pylint: disable=W0622
6 from coverage.backward import open_source
7 from coverage.bytecode import ByteCodes, CodeObjects
8 from coverage.misc import nice_pair, expensive, join_regex
9 from coverage.misc import CoverageException, NoSource, NotPython
10
11
12 class CodeParser(object):
13 """Parse code to find executable lines, excluded lines, etc."""
14
15 def __init__(self, text=None, filename=None, exclude=None):
16 """
17 Source can be provided as `text`, the text itself, or `filename`, from
18 which the text will be read. Excluded lines are those that match
19 `exclude`, a regex.
20
21 """
22 assert text or filename, "CodeParser needs either text or filename"
23 self.filename = filename or "<code>"
24 self.text = text
25 if not self.text:
26 try:
27 sourcef = open_source(self.filename)
28 try:
29 self.text = sourcef.read()
30 finally:
31 sourcef.close()
32 except IOError:
33 _, err, _ = sys.exc_info()
34 raise NoSource(
35 "No source for code: %r: %s" % (self.filename, err)
36 )
37
38 self.exclude = exclude
39
40 self.show_tokens = False
41
42 # The text lines of the parsed code.
43 self.lines = self.text.split('\n')
44
45 # The line numbers of excluded lines of code.
46 self.excluded = set()
47
48 # The line numbers of docstring lines.
49 self.docstrings = set()
50
51 # The line numbers of class definitions.
52 self.classdefs = set()
53
54 # A dict mapping line numbers to (lo,hi) for multi-line statements.
55 self.multiline = {}
56
57 # The line numbers that start statements.
58 self.statement_starts = set()
59
60 # Lazily-created ByteParser
61 self._byte_parser = None
62
63 def _get_byte_parser(self):
64 """Create a ByteParser on demand."""
65 if not self._byte_parser:
66 self._byte_parser = \
67 ByteParser(text=self.text, filename=self.filename)
68 return self._byte_parser
69 byte_parser = property(_get_byte_parser)
70
71 def lines_matching(self, *regexes):
72 """Find the lines matching one of a list of regexes.
73
74 Returns a set of line numbers, the lines that contain a match for one
75 of the regexes in `regexes`. The entire line needn't match, just a
76 part of it.
77
78 """
79 regex_c = re.compile(join_regex(regexes))
80 matches = set()
81 for i, ltext in enumerate(self.lines):
82 if regex_c.search(ltext):
83 matches.add(i+1)
84 return matches
85
86 def _raw_parse(self):
87 """Parse the source to find the interesting facts about its lines.
88
89 A handful of member fields are updated.
90
91 """
92 # Find lines which match an exclusion pattern.
93 if self.exclude:
94 self.excluded = self.lines_matching(self.exclude)
95
96 # Tokenize, to find excluded suites, to find docstrings, and to find
97 # multi-line statements.
98 indent = 0
99 exclude_indent = 0
100 excluding = False
101 prev_toktype = token.INDENT
102 first_line = None
103 empty = True
104
105 tokgen = tokenize.generate_tokens(StringIO(self.text).readline)
106 for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
107 if self.show_tokens: # pragma: no cover
108 print("%10s %5s %-20r %r" % (
109 tokenize.tok_name.get(toktype, toktype),
110 nice_pair((slineno, elineno)), ttext, ltext
111 ))
112 if toktype == token.INDENT:
113 indent += 1
114 elif toktype == token.DEDENT:
115 indent -= 1
116 elif toktype == token.NAME and ttext == 'class':
117 # Class definitions look like branches in the byte code, so
118 # we need to exclude them. The simplest way is to note the
119 # lines with the 'class' keyword.
120 self.classdefs.add(slineno)
121 elif toktype == token.OP and ttext == ':':
122 if not excluding and elineno in self.excluded:
123 # Start excluding a suite. We trigger off of the colon
124 # token so that the #pragma comment will be recognized on
125 # the same line as the colon.
126 exclude_indent = indent
127 excluding = True
128 elif toktype == token.STRING and prev_toktype == token.INDENT:
129 # Strings that are first on an indented line are docstrings.
130 # (a trick from trace.py in the stdlib.) This works for
131 # 99.9999% of cases. For the rest (!) see:
132 # http://stackoverflow.com/questions/1769332/x/1769794#1769794
133 for i in range(slineno, elineno+1):
134 self.docstrings.add(i)
135 elif toktype == token.NEWLINE:
136 if first_line is not None and elineno != first_line:
137 # We're at the end of a line, and we've ended on a
138 # different line than the first line of the statement,
139 # so record a multi-line range.
140 rng = (first_line, elineno)
141 for l in range(first_line, elineno+1):
142 self.multiline[l] = rng
143 first_line = None
144
145 if ttext.strip() and toktype != tokenize.COMMENT:
146 # A non-whitespace token.
147 empty = False
148 if first_line is None:
149 # The token is not whitespace, and is the first in a
150 # statement.
151 first_line = slineno
152 # Check whether to end an excluded suite.
153 if excluding and indent <= exclude_indent:
154 excluding = False
155 if excluding:
156 self.excluded.add(elineno)
157
158 prev_toktype = toktype
159
160 # Find the starts of the executable statements.
161 if not empty:
162 self.statement_starts.update(self.byte_parser._find_statements())
163
164 def first_line(self, line):
165 """Return the first line number of the statement including `line`."""
166 rng = self.multiline.get(line)
167 if rng:
168 first_line = rng[0]
169 else:
170 first_line = line
171 return first_line
172
173 def first_lines(self, lines, ignore=None):
174 """Map the line numbers in `lines` to the correct first line of the
175 statement.
176
177 Skip any line mentioned in `ignore`.
178
179 Returns a sorted list of the first lines.
180
181 """
182 ignore = ignore or []
183 lset = set()
184 for l in lines:
185 if l in ignore:
186 continue
187 new_l = self.first_line(l)
188 if new_l not in ignore:
189 lset.add(new_l)
190 return sorted(lset)
191
192 def parse_source(self):
193 """Parse source text to find executable lines, excluded lines, etc.
194
195 Return values are 1) a sorted list of executable line numbers, and
196 2) a sorted list of excluded line numbers.
197
198 Reported line numbers are normalized to the first line of multi-line
199 statements.
200
201 """
202 self._raw_parse()
203
204 excluded_lines = self.first_lines(self.excluded)
205 ignore = excluded_lines + list(self.docstrings)
206 lines = self.first_lines(self.statement_starts, ignore)
207
208 return lines, excluded_lines
209
210 def arcs(self):
211 """Get information about the arcs available in the code.
212
213 Returns a sorted list of line number pairs. Line numbers have been
214 normalized to the first line of multiline statements.
215
216 """
217 all_arcs = []
218 for l1, l2 in self.byte_parser._all_arcs():
219 fl1 = self.first_line(l1)
220 fl2 = self.first_line(l2)
221 if fl1 != fl2:
222 all_arcs.append((fl1, fl2))
223 return sorted(all_arcs)
224 arcs = expensive(arcs)
225
226 def exit_counts(self):
227 """Get a mapping from line numbers to count of exits from that line.
228
229 Excluded lines are excluded.
230
231 """
232 excluded_lines = self.first_lines(self.excluded)
233 exit_counts = {}
234 for l1, l2 in self.arcs():
235 if l1 < 0:
236 # Don't ever report -1 as a line number
237 continue
238 if l1 in excluded_lines:
239 # Don't report excluded lines as line numbers.
240 continue
241 if l2 in excluded_lines:
242 # Arcs to excluded lines shouldn't count.
243 continue
244 if l1 not in exit_counts:
245 exit_counts[l1] = 0
246 exit_counts[l1] += 1
247
248 # Class definitions have one extra exit, so remove one for each:
249 for l in self.classdefs:
250 # Ensure key is there: classdefs can include excluded lines.
251 if l in exit_counts:
252 exit_counts[l] -= 1
253
254 return exit_counts
255 exit_counts = expensive(exit_counts)
256
257
258 ## Opcodes that guide the ByteParser.
259
260 def _opcode(name):
261 """Return the opcode by name from the opcode module."""
262 return opcode.opmap[name]
263
264 def _opcode_set(*names):
265 """Return a set of opcodes by the names in `names`."""
266 s = set()
267 for name in names:
268 try:
269 s.add(_opcode(name))
270 except KeyError:
271 pass
272 return s
273
274 # Opcodes that leave the code object.
275 OPS_CODE_END = _opcode_set('RETURN_VALUE')
276
277 # Opcodes that unconditionally end the code chunk.
278 OPS_CHUNK_END = _opcode_set(
279 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'RETURN_VALUE', 'RAISE_VARARGS',
280 'BREAK_LOOP', 'CONTINUE_LOOP',
281 )
282
283 # Opcodes that unconditionally begin a new code chunk. By starting new chunks
284 # with unconditional jump instructions, we neatly deal with jumps to jumps
285 # properly.
286 OPS_CHUNK_BEGIN = _opcode_set('JUMP_ABSOLUTE', 'JUMP_FORWARD')
287
288 # Opcodes that push a block on the block stack.
289 OPS_PUSH_BLOCK = _opcode_set(
290 'SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY', 'SETUP_WITH'
291 )
292
293 # Block types for exception handling.
294 OPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY')
295
296 # Opcodes that pop a block from the block stack.
297 OPS_POP_BLOCK = _opcode_set('POP_BLOCK')
298
299 # Opcodes that have a jump destination, but aren't really a jump.
300 OPS_NO_JUMP = OPS_PUSH_BLOCK
301
302 # Individual opcodes we need below.
303 OP_BREAK_LOOP = _opcode('BREAK_LOOP')
304 OP_END_FINALLY = _opcode('END_FINALLY')
305 OP_COMPARE_OP = _opcode('COMPARE_OP')
306 COMPARE_EXCEPTION = 10 # just have to get this const from the code.
307 OP_LOAD_CONST = _opcode('LOAD_CONST')
308 OP_RETURN_VALUE = _opcode('RETURN_VALUE')
309
310
311 class ByteParser(object):
312 """Parse byte codes to understand the structure of code."""
313
314 def __init__(self, code=None, text=None, filename=None):
315 if code:
316 self.code = code
317 self.text = text
318 else:
319 if not text:
320 assert filename, "If no code or text, need a filename"
321 sourcef = open_source(filename)
322 try:
323 text = sourcef.read()
324 finally:
325 sourcef.close()
326 self.text = text
327
328 try:
329 # Python 2.3 and 2.4 don't like partial last lines, so be sure
330 # the text ends nicely for them.
331 self.code = compile(text + '\n', filename, "exec")
332 except SyntaxError:
333 _, synerr, _ = sys.exc_info()
334 raise NotPython(
335 "Couldn't parse '%s' as Python source: '%s' at line %d" %
336 (filename, synerr.msg, synerr.lineno)
337 )
338
339 # Alternative Python implementations don't always provide all the
340 # attributes on code objects that we need to do the analysis.
341 for attr in ['co_lnotab', 'co_firstlineno', 'co_consts', 'co_code']:
342 if not hasattr(self.code, attr):
343 raise CoverageException(
344 "This implementation of Python doesn't support code "
345 "analysis.\n"
346 "Run coverage.py under CPython for this command."
347 )
348
349 def child_parsers(self):
350 """Iterate over all the code objects nested within this one.
351
352 The iteration includes `self` as its first value.
353
354 """
355 children = CodeObjects(self.code)
356 return [ByteParser(code=c, text=self.text) for c in children]
357
358 # Getting numbers from the lnotab value changed in Py3.0.
359 if sys.version_info >= (3, 0):
360 def _lnotab_increments(self, lnotab):
361 """Return a list of ints from the lnotab bytes in 3.x"""
362 return list(lnotab)
363 else:
364 def _lnotab_increments(self, lnotab):
365 """Return a list of ints from the lnotab string in 2.x"""
366 return [ord(c) for c in lnotab]
367
368 def _bytes_lines(self):
369 """Map byte offsets to line numbers in `code`.
370
371 Uses co_lnotab described in Python/compile.c to map byte offsets to
372 line numbers. Returns a list: [(b0, l0), (b1, l1), ...]
373
374 """
375 # Adapted from dis.py in the standard library.
376 byte_increments = self._lnotab_increments(self.code.co_lnotab[0::2])
377 line_increments = self._lnotab_increments(self.code.co_lnotab[1::2])
378
379 bytes_lines = []
380 last_line_num = None
381 line_num = self.code.co_firstlineno
382 byte_num = 0
383 for byte_incr, line_incr in zip(byte_increments, line_increments):
384 if byte_incr:
385 if line_num != last_line_num:
386 bytes_lines.append((byte_num, line_num))
387 last_line_num = line_num
388 byte_num += byte_incr
389 line_num += line_incr
390 if line_num != last_line_num:
391 bytes_lines.append((byte_num, line_num))
392 return bytes_lines
393
394 def _find_statements(self):
395 """Find the statements in `self.code`.
396
397 Return a set of line numbers that start statements. Recurses into all
398 code objects reachable from `self.code`.
399
400 """
401 stmts = set()
402 for bp in self.child_parsers():
403 # Get all of the lineno information from this code.
404 for _, l in bp._bytes_lines():
405 stmts.add(l)
406 return stmts
407
408 def _split_into_chunks(self):
409 """Split the code object into a list of `Chunk` objects.
410
411 Each chunk is only entered at its first instruction, though there can
412 be many exits from a chunk.
413
414 Returns a list of `Chunk` objects.
415
416 """
417
418 # The list of chunks so far, and the one we're working on.
419 chunks = []
420 chunk = None
421 bytes_lines_map = dict(self._bytes_lines())
422
423 # The block stack: loops and try blocks get pushed here for the
424 # implicit jumps that can occur.
425 # Each entry is a tuple: (block type, destination)
426 block_stack = []
427
428 # Some op codes are followed by branches that should be ignored. This
429 # is a count of how many ignores are left.
430 ignore_branch = 0
431
432 # We have to handle the last two bytecodes specially.
433 ult = penult = None
434
435 for bc in ByteCodes(self.code.co_code):
436 # Maybe have to start a new chunk
437 if bc.offset in bytes_lines_map:
438 # Start a new chunk for each source line number.
439 if chunk:
440 chunk.exits.add(bc.offset)
441 chunk = Chunk(bc.offset, bytes_lines_map[bc.offset])
442 chunks.append(chunk)
443 elif bc.op in OPS_CHUNK_BEGIN:
444 # Jumps deserve their own unnumbered chunk. This fixes
445 # problems with jumps to jumps getting confused.
446 if chunk:
447 chunk.exits.add(bc.offset)
448 chunk = Chunk(bc.offset)
449 chunks.append(chunk)
450
451 if not chunk:
452 chunk = Chunk(bc.offset)
453 chunks.append(chunk)
454
455 # Look at the opcode
456 if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP:
457 if ignore_branch:
458 # Someone earlier wanted us to ignore this branch.
459 ignore_branch -= 1
460 else:
461 # The opcode has a jump, it's an exit for this chunk.
462 chunk.exits.add(bc.jump_to)
463
464 if bc.op in OPS_CODE_END:
465 # The opcode can exit the code object.
466 chunk.exits.add(-self.code.co_firstlineno)
467 if bc.op in OPS_PUSH_BLOCK:
468 # The opcode adds a block to the block_stack.
469 block_stack.append((bc.op, bc.jump_to))
470 if bc.op in OPS_POP_BLOCK:
471 # The opcode pops a block from the block stack.
472 block_stack.pop()
473 if bc.op in OPS_CHUNK_END:
474 # This opcode forces the end of the chunk.
475 if bc.op == OP_BREAK_LOOP:
476 # A break is implicit: jump where the top of the
477 # block_stack points.
478 chunk.exits.add(block_stack[-1][1])
479 chunk = None
480 if bc.op == OP_END_FINALLY:
481 if block_stack:
482 # A break that goes through a finally will jump to whatever
483 # block is on top of the stack.
484 chunk.exits.add(block_stack[-1][1])
485 # For the finally clause we need to find the closest exception
486 # block, and use its jump target as an exit.
487 for iblock in range(len(block_stack)-1, -1, -1):
488 if block_stack[iblock][0] in OPS_EXCEPT_BLOCKS:
489 chunk.exits.add(block_stack[iblock][1])
490 break
491 if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION:
492 # This is an except clause. We want to overlook the next
493 # branch, so that except's don't count as branches.
494 ignore_branch += 1
495
496 penult = ult
497 ult = bc
498
499 if chunks:
500 # The last two bytecodes could be a dummy "return None" that
501 # shouldn't be counted as real code. Every Python code object seems
502 # to end with a return, and a "return None" is inserted if there
503 # isn't an explicit return in the source.
504 if ult and penult:
505 if penult.op == OP_LOAD_CONST and ult.op == OP_RETURN_VALUE:
506 if self.code.co_consts[penult.arg] is None:
507 # This is "return None", but is it dummy? A real line
508 # would be a last chunk all by itself.
509 if chunks[-1].byte != penult.offset:
510 ex = -self.code.co_firstlineno
511 # Split the last chunk
512 last_chunk = chunks[-1]
513 last_chunk.exits.remove(ex)
514 last_chunk.exits.add(penult.offset)
515 chunk = Chunk(penult.offset)
516 chunk.exits.add(ex)
517 chunks.append(chunk)
518
519 # Give all the chunks a length.
520 chunks[-1].length = bc.next_offset - chunks[-1].byte
521 for i in range(len(chunks)-1):
522 chunks[i].length = chunks[i+1].byte - chunks[i].byte
523
524 return chunks
525
526 def _arcs(self):
527 """Find the executable arcs in the code.
528
529 Returns a set of pairs, (from,to). From and to are integer line
530 numbers. If from is < 0, then the arc is an entrance into the code
531 object. If to is < 0, the arc is an exit from the code object.
532
533 """
534 chunks = self._split_into_chunks()
535
536 # A map from byte offsets to chunks jumped into.
537 byte_chunks = dict([(c.byte, c) for c in chunks])
538
539 # Build a map from byte offsets to actual lines reached.
540 byte_lines = {}
541 bytes_to_add = set([c.byte for c in chunks])
542
543 while bytes_to_add:
544 byte_to_add = bytes_to_add.pop()
545 if byte_to_add in byte_lines or byte_to_add < 0:
546 continue
547
548 # Which lines does this chunk lead to?
549 bytes_considered = set()
550 bytes_to_consider = [byte_to_add]
551 lines = set()
552
553 while bytes_to_consider:
554 byte = bytes_to_consider.pop()
555 bytes_considered.add(byte)
556
557 # Find chunk for byte
558 try:
559 ch = byte_chunks[byte]
560 except KeyError:
561 for ch in chunks:
562 if ch.byte <= byte < ch.byte+ch.length:
563 break
564 else:
565 # No chunk for this byte!
566 raise Exception("Couldn't find chunk @ %d" % byte)
567 byte_chunks[byte] = ch
568
569 if ch.line:
570 lines.add(ch.line)
571 else:
572 for ex in ch.exits:
573 if ex < 0:
574 lines.add(ex)
575 elif ex not in bytes_considered:
576 bytes_to_consider.append(ex)
577
578 bytes_to_add.update(ch.exits)
579
580 byte_lines[byte_to_add] = lines
581
582 # Figure out for each chunk where the exits go.
583 arcs = set()
584 for chunk in chunks:
585 if chunk.line:
586 for ex in chunk.exits:
587 if ex < 0:
588 exit_lines = [ex]
589 else:
590 exit_lines = byte_lines[ex]
591 for exit_line in exit_lines:
592 if chunk.line != exit_line:
593 arcs.add((chunk.line, exit_line))
594 for line in byte_lines[0]:
595 arcs.add((-1, line))
596
597 return arcs
598
599 def _all_chunks(self):
600 """Returns a list of `Chunk` objects for this code and its children.
601
602 See `_split_into_chunks` for details.
603
604 """
605 chunks = []
606 for bp in self.child_parsers():
607 chunks.extend(bp._split_into_chunks())
608
609 return chunks
610
611 def _all_arcs(self):
612 """Get the set of all arcs in this code object and its children.
613
614 See `_arcs` for details.
615
616 """
617 arcs = set()
618 for bp in self.child_parsers():
619 arcs.update(bp._arcs())
620
621 return arcs
622
623
624 class Chunk(object):
625 """A sequence of bytecodes with a single entrance.
626
627 To analyze byte code, we have to divide it into chunks, sequences of byte
628 codes such that each basic block has only one entrance, the first
629 instruction in the block.
630
631 This is almost the CS concept of `basic block`_, except that we're willing
632 to have many exits from a chunk, and "basic block" is a more cumbersome
633 term.
634
635 .. _basic block: http://en.wikipedia.org/wiki/Basic_block
636
637 An exit < 0 means the chunk can leave the code (return). The exit is
638 the negative of the starting line number of the code block.
639
640 """
641 def __init__(self, byte, line=0):
642 self.byte = byte
643 self.line = line
644 self.length = 0
645 self.exits = set()
646
647 def __repr__(self):
648 return "<%d+%d @%d %r>" % (
649 self.byte, self.length, self.line, list(self.exits)
650 )
OLDNEW
« no previous file with comments | « sky/tools/webkitpy/thirdparty/coverage/misc.py ('k') | sky/tools/webkitpy/thirdparty/coverage/phystokens.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698