OLD | NEW |
1 """Code parsing for Coverage.""" | 1 """Code parsing for Coverage.""" |
2 | 2 |
3 import opcode, re, sys, token, tokenize | 3 import dis, re, sys, token, tokenize |
4 | 4 |
5 from coverage.backward import set, sorted, StringIO # pylint: disable=W0622 | 5 from coverage.backward import set, sorted, StringIO # pylint: disable=W0622 |
6 from coverage.backward import open_source | 6 from coverage.backward import open_source, range # pylint: disable=W0622 |
| 7 from coverage.backward import reversed # pylint: disable=W0622 |
| 8 from coverage.backward import bytes_to_ints |
7 from coverage.bytecode import ByteCodes, CodeObjects | 9 from coverage.bytecode import ByteCodes, CodeObjects |
8 from coverage.misc import nice_pair, expensive, join_regex | 10 from coverage.misc import nice_pair, expensive, join_regex |
9 from coverage.misc import CoverageException, NoSource, NotPython | 11 from coverage.misc import CoverageException, NoSource, NotPython |
10 | 12 |
11 | 13 |
12 class CodeParser(object): | 14 class CodeParser(object): |
13 """Parse code to find executable lines, excluded lines, etc.""" | 15 """Parse code to find executable lines, excluded lines, etc.""" |
14 | 16 |
15 def __init__(self, text=None, filename=None, exclude=None): | 17 def __init__(self, text=None, filename=None, exclude=None): |
16 """ | 18 """ |
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
99 | 101 |
100 # Tokenize, to find excluded suites, to find docstrings, and to find | 102 # Tokenize, to find excluded suites, to find docstrings, and to find |
101 # multi-line statements. | 103 # multi-line statements. |
102 indent = 0 | 104 indent = 0 |
103 exclude_indent = 0 | 105 exclude_indent = 0 |
104 excluding = False | 106 excluding = False |
105 prev_toktype = token.INDENT | 107 prev_toktype = token.INDENT |
106 first_line = None | 108 first_line = None |
107 empty = True | 109 empty = True |
108 | 110 |
109 tokgen = tokenize.generate_tokens(StringIO(self.text).readline) | 111 tokgen = generate_tokens(self.text) |
110 for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: | 112 for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: |
111 if self.show_tokens: # pragma: not covered | 113 if self.show_tokens: # pragma: not covered |
112 print("%10s %5s %-20r %r" % ( | 114 print("%10s %5s %-20r %r" % ( |
113 tokenize.tok_name.get(toktype, toktype), | 115 tokenize.tok_name.get(toktype, toktype), |
114 nice_pair((slineno, elineno)), ttext, ltext | 116 nice_pair((slineno, elineno)), ttext, ltext |
115 )) | 117 )) |
116 if toktype == token.INDENT: | 118 if toktype == token.INDENT: |
117 indent += 1 | 119 indent += 1 |
118 elif toktype == token.DEDENT: | 120 elif toktype == token.DEDENT: |
119 indent -= 1 | 121 indent -= 1 |
120 elif toktype == token.NAME and ttext == 'class': | 122 elif toktype == token.NAME and ttext == 'class': |
121 # Class definitions look like branches in the byte code, so | 123 # Class definitions look like branches in the byte code, so |
122 # we need to exclude them. The simplest way is to note the | 124 # we need to exclude them. The simplest way is to note the |
123 # lines with the 'class' keyword. | 125 # lines with the 'class' keyword. |
124 self.classdefs.add(slineno) | 126 self.classdefs.add(slineno) |
125 elif toktype == token.OP and ttext == ':': | 127 elif toktype == token.OP and ttext == ':': |
126 if not excluding and elineno in self.excluded: | 128 if not excluding and elineno in self.excluded: |
127 # Start excluding a suite. We trigger off of the colon | 129 # Start excluding a suite. We trigger off of the colon |
128 # token so that the #pragma comment will be recognized on | 130 # token so that the #pragma comment will be recognized on |
129 # the same line as the colon. | 131 # the same line as the colon. |
130 exclude_indent = indent | 132 exclude_indent = indent |
131 excluding = True | 133 excluding = True |
132 elif toktype == token.STRING and prev_toktype == token.INDENT: | 134 elif toktype == token.STRING and prev_toktype == token.INDENT: |
133 # Strings that are first on an indented line are docstrings. | 135 # Strings that are first on an indented line are docstrings. |
134 # (a trick from trace.py in the stdlib.) This works for | 136 # (a trick from trace.py in the stdlib.) This works for |
135 # 99.9999% of cases. For the rest (!) see: | 137 # 99.9999% of cases. For the rest (!) see: |
136 # http://stackoverflow.com/questions/1769332/x/1769794#1769794 | 138 # http://stackoverflow.com/questions/1769332/x/1769794#1769794 |
137 for i in range(slineno, elineno+1): | 139 self.docstrings.update(range(slineno, elineno+1)) |
138 self.docstrings.add(i) | |
139 elif toktype == token.NEWLINE: | 140 elif toktype == token.NEWLINE: |
140 if first_line is not None and elineno != first_line: | 141 if first_line is not None and elineno != first_line: |
141 # We're at the end of a line, and we've ended on a | 142 # We're at the end of a line, and we've ended on a |
142 # different line than the first line of the statement, | 143 # different line than the first line of the statement, |
143 # so record a multi-line range. | 144 # so record a multi-line range. |
144 rng = (first_line, elineno) | 145 rng = (first_line, elineno) |
145 for l in range(first_line, elineno+1): | 146 for l in range(first_line, elineno+1): |
146 self.multiline[l] = rng | 147 self.multiline[l] = rng |
147 first_line = None | 148 first_line = None |
148 | 149 |
(...skipping 18 matching lines...) Expand all Loading... |
167 | 168 |
168 def first_line(self, line): | 169 def first_line(self, line): |
169 """Return the first line number of the statement including `line`.""" | 170 """Return the first line number of the statement including `line`.""" |
170 rng = self.multiline.get(line) | 171 rng = self.multiline.get(line) |
171 if rng: | 172 if rng: |
172 first_line = rng[0] | 173 first_line = rng[0] |
173 else: | 174 else: |
174 first_line = line | 175 first_line = line |
175 return first_line | 176 return first_line |
176 | 177 |
177 def first_lines(self, lines, ignore=None): | 178 def first_lines(self, lines, *ignores): |
178 """Map the line numbers in `lines` to the correct first line of the | 179 """Map the line numbers in `lines` to the correct first line of the |
179 statement. | 180 statement. |
180 | 181 |
181 Skip any line mentioned in `ignore`. | 182 Skip any line mentioned in any of the sequences in `ignores`. |
182 | 183 |
183 Returns a sorted list of the first lines. | 184 Returns a set of the first lines. |
184 | 185 |
185 """ | 186 """ |
186 ignore = ignore or [] | 187 ignore = set() |
| 188 for ign in ignores: |
| 189 ignore.update(ign) |
187 lset = set() | 190 lset = set() |
188 for l in lines: | 191 for l in lines: |
189 if l in ignore: | 192 if l in ignore: |
190 continue | 193 continue |
191 new_l = self.first_line(l) | 194 new_l = self.first_line(l) |
192 if new_l not in ignore: | 195 if new_l not in ignore: |
193 lset.add(new_l) | 196 lset.add(new_l) |
194 return sorted(lset) | 197 return lset |
195 | 198 |
196 def parse_source(self): | 199 def parse_source(self): |
197 """Parse source text to find executable lines, excluded lines, etc. | 200 """Parse source text to find executable lines, excluded lines, etc. |
198 | 201 |
199 Return values are 1) a sorted list of executable line numbers, and | 202 Return values are 1) a set of executable line numbers, and 2) a set of |
200 2) a sorted list of excluded line numbers. | 203 excluded line numbers. |
201 | 204 |
202 Reported line numbers are normalized to the first line of multi-line | 205 Reported line numbers are normalized to the first line of multi-line |
203 statements. | 206 statements. |
204 | 207 |
205 """ | 208 """ |
206 try: | 209 try: |
207 self._raw_parse() | 210 self._raw_parse() |
208 except (tokenize.TokenError, IndentationError): | 211 except (tokenize.TokenError, IndentationError): |
209 _, tokerr, _ = sys.exc_info() | 212 _, tokerr, _ = sys.exc_info() |
210 msg, lineno = tokerr.args | 213 msg, lineno = tokerr.args |
211 raise NotPython( | 214 raise NotPython( |
212 "Couldn't parse '%s' as Python source: '%s' at %s" % | 215 "Couldn't parse '%s' as Python source: '%s' at %s" % |
213 (self.filename, msg, lineno) | 216 (self.filename, msg, lineno) |
214 ) | 217 ) |
215 | 218 |
216 excluded_lines = self.first_lines(self.excluded) | 219 excluded_lines = self.first_lines(self.excluded) |
217 ignore = excluded_lines + list(self.docstrings) | 220 lines = self.first_lines( |
218 lines = self.first_lines(self.statement_starts, ignore) | 221 self.statement_starts, |
| 222 excluded_lines, |
| 223 self.docstrings |
| 224 ) |
219 | 225 |
220 return lines, excluded_lines | 226 return lines, excluded_lines |
221 | 227 |
222 def arcs(self): | 228 def arcs(self): |
223 """Get information about the arcs available in the code. | 229 """Get information about the arcs available in the code. |
224 | 230 |
225 Returns a sorted list of line number pairs. Line numbers have been | 231 Returns a sorted list of line number pairs. Line numbers have been |
226 normalized to the first line of multiline statements. | 232 normalized to the first line of multiline statements. |
227 | 233 |
228 """ | 234 """ |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
263 if l in exit_counts: | 269 if l in exit_counts: |
264 exit_counts[l] -= 1 | 270 exit_counts[l] -= 1 |
265 | 271 |
266 return exit_counts | 272 return exit_counts |
267 exit_counts = expensive(exit_counts) | 273 exit_counts = expensive(exit_counts) |
268 | 274 |
269 | 275 |
270 ## Opcodes that guide the ByteParser. | 276 ## Opcodes that guide the ByteParser. |
271 | 277 |
272 def _opcode(name): | 278 def _opcode(name): |
273 """Return the opcode by name from the opcode module.""" | 279 """Return the opcode by name from the dis module.""" |
274 return opcode.opmap[name] | 280 return dis.opmap[name] |
275 | 281 |
276 def _opcode_set(*names): | 282 def _opcode_set(*names): |
277 """Return a set of opcodes by the names in `names`.""" | 283 """Return a set of opcodes by the names in `names`.""" |
278 s = set() | 284 s = set() |
279 for name in names: | 285 for name in names: |
280 try: | 286 try: |
281 s.add(_opcode(name)) | 287 s.add(_opcode(name)) |
282 except KeyError: | 288 except KeyError: |
283 pass | 289 pass |
284 return s | 290 return s |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
360 | 366 |
361 def child_parsers(self): | 367 def child_parsers(self): |
362 """Iterate over all the code objects nested within this one. | 368 """Iterate over all the code objects nested within this one. |
363 | 369 |
364 The iteration includes `self` as its first value. | 370 The iteration includes `self` as its first value. |
365 | 371 |
366 """ | 372 """ |
367 children = CodeObjects(self.code) | 373 children = CodeObjects(self.code) |
368 return [ByteParser(code=c, text=self.text) for c in children] | 374 return [ByteParser(code=c, text=self.text) for c in children] |
369 | 375 |
370 # Getting numbers from the lnotab value changed in Py3.0. | |
371 if sys.version_info >= (3, 0): | |
372 def _lnotab_increments(self, lnotab): | |
373 """Return a list of ints from the lnotab bytes in 3.x""" | |
374 return list(lnotab) | |
375 else: | |
376 def _lnotab_increments(self, lnotab): | |
377 """Return a list of ints from the lnotab string in 2.x""" | |
378 return [ord(c) for c in lnotab] | |
379 | |
380 def _bytes_lines(self): | 376 def _bytes_lines(self): |
381 """Map byte offsets to line numbers in `code`. | 377 """Map byte offsets to line numbers in `code`. |
382 | 378 |
383 Uses co_lnotab described in Python/compile.c to map byte offsets to | 379 Uses co_lnotab described in Python/compile.c to map byte offsets to |
384 line numbers. Returns a list: [(b0, l0), (b1, l1), ...] | 380 line numbers. Produces a sequence: (b0, l0), (b1, l1), ... |
| 381 |
| 382 Only byte offsets that correspond to line numbers are included in the |
| 383 results. |
385 | 384 |
386 """ | 385 """ |
387 # Adapted from dis.py in the standard library. | 386 # Adapted from dis.py in the standard library. |
388 byte_increments = self._lnotab_increments(self.code.co_lnotab[0::2]) | 387 byte_increments = bytes_to_ints(self.code.co_lnotab[0::2]) |
389 line_increments = self._lnotab_increments(self.code.co_lnotab[1::2]) | 388 line_increments = bytes_to_ints(self.code.co_lnotab[1::2]) |
390 | 389 |
391 bytes_lines = [] | |
392 last_line_num = None | 390 last_line_num = None |
393 line_num = self.code.co_firstlineno | 391 line_num = self.code.co_firstlineno |
394 byte_num = 0 | 392 byte_num = 0 |
395 for byte_incr, line_incr in zip(byte_increments, line_increments): | 393 for byte_incr, line_incr in zip(byte_increments, line_increments): |
396 if byte_incr: | 394 if byte_incr: |
397 if line_num != last_line_num: | 395 if line_num != last_line_num: |
398 bytes_lines.append((byte_num, line_num)) | 396 yield (byte_num, line_num) |
399 last_line_num = line_num | 397 last_line_num = line_num |
400 byte_num += byte_incr | 398 byte_num += byte_incr |
401 line_num += line_incr | 399 line_num += line_incr |
402 if line_num != last_line_num: | 400 if line_num != last_line_num: |
403 bytes_lines.append((byte_num, line_num)) | 401 yield (byte_num, line_num) |
404 return bytes_lines | |
405 | 402 |
406 def _find_statements(self): | 403 def _find_statements(self): |
407 """Find the statements in `self.code`. | 404 """Find the statements in `self.code`. |
408 | 405 |
409 Return a set of line numbers that start statements. Recurses into all | 406 Produce a sequence of line numbers that start statements. Recurses |
410 code objects reachable from `self.code`. | 407 into all code objects reachable from `self.code`. |
411 | 408 |
412 """ | 409 """ |
413 stmts = set() | |
414 for bp in self.child_parsers(): | 410 for bp in self.child_parsers(): |
415 # Get all of the lineno information from this code. | 411 # Get all of the lineno information from this code. |
416 for _, l in bp._bytes_lines(): | 412 for _, l in bp._bytes_lines(): |
417 stmts.add(l) | 413 yield l |
418 return stmts | 414 |
| 415 def _block_stack_repr(self, block_stack): |
| 416 """Get a string version of `block_stack`, for debugging.""" |
| 417 blocks = ", ".join( |
| 418 ["(%s, %r)" % (dis.opname[b[0]], b[1]) for b in block_stack] |
| 419 ) |
| 420 return "[" + blocks + "]" |
419 | 421 |
420 def _split_into_chunks(self): | 422 def _split_into_chunks(self): |
421 """Split the code object into a list of `Chunk` objects. | 423 """Split the code object into a list of `Chunk` objects. |
422 | 424 |
423 Each chunk is only entered at its first instruction, though there can | 425 Each chunk is only entered at its first instruction, though there can |
424 be many exits from a chunk. | 426 be many exits from a chunk. |
425 | 427 |
426 Returns a list of `Chunk` objects. | 428 Returns a list of `Chunk` objects. |
427 | 429 |
428 """ | 430 """ |
429 | |
430 # The list of chunks so far, and the one we're working on. | 431 # The list of chunks so far, and the one we're working on. |
431 chunks = [] | 432 chunks = [] |
432 chunk = None | 433 chunk = None |
| 434 |
| 435 # A dict mapping byte offsets of line starts to the line numbers. |
433 bytes_lines_map = dict(self._bytes_lines()) | 436 bytes_lines_map = dict(self._bytes_lines()) |
434 | 437 |
435 # The block stack: loops and try blocks get pushed here for the | 438 # The block stack: loops and try blocks get pushed here for the |
436 # implicit jumps that can occur. | 439 # implicit jumps that can occur. |
437 # Each entry is a tuple: (block type, destination) | 440 # Each entry is a tuple: (block type, destination) |
438 block_stack = [] | 441 block_stack = [] |
439 | 442 |
440 # Some op codes are followed by branches that should be ignored. This | 443 # Some op codes are followed by branches that should be ignored. This |
441 # is a count of how many ignores are left. | 444 # is a count of how many ignores are left. |
442 ignore_branch = 0 | 445 ignore_branch = 0 |
443 | 446 |
444 # We have to handle the last two bytecodes specially. | 447 # We have to handle the last two bytecodes specially. |
445 ult = penult = None | 448 ult = penult = None |
446 | 449 |
447 for bc in ByteCodes(self.code.co_code): | 450 # Get a set of all of the jump-to points. |
| 451 jump_to = set() |
| 452 bytecodes = list(ByteCodes(self.code.co_code)) |
| 453 for bc in bytecodes: |
| 454 if bc.jump_to >= 0: |
| 455 jump_to.add(bc.jump_to) |
| 456 |
| 457 chunk_lineno = 0 |
| 458 |
| 459 # Walk the byte codes building chunks. |
| 460 for bc in bytecodes: |
448 # Maybe have to start a new chunk | 461 # Maybe have to start a new chunk |
| 462 start_new_chunk = False |
| 463 first_chunk = False |
449 if bc.offset in bytes_lines_map: | 464 if bc.offset in bytes_lines_map: |
450 # Start a new chunk for each source line number. | 465 # Start a new chunk for each source line number. |
451 if chunk: | 466 start_new_chunk = True |
452 chunk.exits.add(bc.offset) | 467 chunk_lineno = bytes_lines_map[bc.offset] |
453 chunk = Chunk(bc.offset, bytes_lines_map[bc.offset]) | 468 first_chunk = True |
454 chunks.append(chunk) | 469 elif bc.offset in jump_to: |
| 470 # To make chunks have a single entrance, we have to make a new |
| 471 # chunk when we get to a place some bytecode jumps to. |
| 472 start_new_chunk = True |
455 elif bc.op in OPS_CHUNK_BEGIN: | 473 elif bc.op in OPS_CHUNK_BEGIN: |
456 # Jumps deserve their own unnumbered chunk. This fixes | 474 # Jumps deserve their own unnumbered chunk. This fixes |
457 # problems with jumps to jumps getting confused. | 475 # problems with jumps to jumps getting confused. |
| 476 start_new_chunk = True |
| 477 |
| 478 if not chunk or start_new_chunk: |
458 if chunk: | 479 if chunk: |
459 chunk.exits.add(bc.offset) | 480 chunk.exits.add(bc.offset) |
460 chunk = Chunk(bc.offset) | 481 chunk = Chunk(bc.offset, chunk_lineno, first_chunk) |
461 chunks.append(chunk) | |
462 | |
463 if not chunk: | |
464 chunk = Chunk(bc.offset) | |
465 chunks.append(chunk) | 482 chunks.append(chunk) |
466 | 483 |
467 # Look at the opcode | 484 # Look at the opcode |
468 if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP: | 485 if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP: |
469 if ignore_branch: | 486 if ignore_branch: |
470 # Someone earlier wanted us to ignore this branch. | 487 # Someone earlier wanted us to ignore this branch. |
471 ignore_branch -= 1 | 488 ignore_branch -= 1 |
472 else: | 489 else: |
473 # The opcode has a jump, it's an exit for this chunk. | 490 # The opcode has a jump, it's an exit for this chunk. |
474 chunk.exits.add(bc.jump_to) | 491 chunk.exits.add(bc.jump_to) |
475 | 492 |
476 if bc.op in OPS_CODE_END: | 493 if bc.op in OPS_CODE_END: |
477 # The opcode can exit the code object. | 494 # The opcode can exit the code object. |
478 chunk.exits.add(-self.code.co_firstlineno) | 495 chunk.exits.add(-self.code.co_firstlineno) |
479 if bc.op in OPS_PUSH_BLOCK: | 496 if bc.op in OPS_PUSH_BLOCK: |
480 # The opcode adds a block to the block_stack. | 497 # The opcode adds a block to the block_stack. |
481 block_stack.append((bc.op, bc.jump_to)) | 498 block_stack.append((bc.op, bc.jump_to)) |
482 if bc.op in OPS_POP_BLOCK: | 499 if bc.op in OPS_POP_BLOCK: |
483 # The opcode pops a block from the block stack. | 500 # The opcode pops a block from the block stack. |
484 block_stack.pop() | 501 block_stack.pop() |
485 if bc.op in OPS_CHUNK_END: | 502 if bc.op in OPS_CHUNK_END: |
486 # This opcode forces the end of the chunk. | 503 # This opcode forces the end of the chunk. |
487 if bc.op == OP_BREAK_LOOP: | 504 if bc.op == OP_BREAK_LOOP: |
488 # A break is implicit: jump where the top of the | 505 # A break is implicit: jump where the top of the |
489 # block_stack points. | 506 # block_stack points. |
490 chunk.exits.add(block_stack[-1][1]) | 507 chunk.exits.add(block_stack[-1][1]) |
491 chunk = None | 508 chunk = None |
492 if bc.op == OP_END_FINALLY: | 509 if bc.op == OP_END_FINALLY: |
493 if block_stack: | |
494 # A break that goes through a finally will jump to whatever | |
495 # block is on top of the stack. | |
496 chunk.exits.add(block_stack[-1][1]) | |
497 # For the finally clause we need to find the closest exception | 510 # For the finally clause we need to find the closest exception |
498 # block, and use its jump target as an exit. | 511 # block, and use its jump target as an exit. |
499 for iblock in range(len(block_stack)-1, -1, -1): | 512 for block in reversed(block_stack): |
500 if block_stack[iblock][0] in OPS_EXCEPT_BLOCKS: | 513 if block[0] in OPS_EXCEPT_BLOCKS: |
501 chunk.exits.add(block_stack[iblock][1]) | 514 chunk.exits.add(block[1]) |
502 break | 515 break |
503 if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION: | 516 if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION: |
504 # This is an except clause. We want to overlook the next | 517 # This is an except clause. We want to overlook the next |
505 # branch, so that except's don't count as branches. | 518 # branch, so that except's don't count as branches. |
506 ignore_branch += 1 | 519 ignore_branch += 1 |
507 | 520 |
508 penult = ult | 521 penult = ult |
509 ult = bc | 522 ult = bc |
510 | 523 |
511 if chunks: | 524 if chunks: |
512 # The last two bytecodes could be a dummy "return None" that | 525 # The last two bytecodes could be a dummy "return None" that |
513 # shouldn't be counted as real code. Every Python code object seems | 526 # shouldn't be counted as real code. Every Python code object seems |
514 # to end with a return, and a "return None" is inserted if there | 527 # to end with a return, and a "return None" is inserted if there |
515 # isn't an explicit return in the source. | 528 # isn't an explicit return in the source. |
516 if ult and penult: | 529 if ult and penult: |
517 if penult.op == OP_LOAD_CONST and ult.op == OP_RETURN_VALUE: | 530 if penult.op == OP_LOAD_CONST and ult.op == OP_RETURN_VALUE: |
518 if self.code.co_consts[penult.arg] is None: | 531 if self.code.co_consts[penult.arg] is None: |
519 # This is "return None", but is it dummy? A real line | 532 # This is "return None", but is it dummy? A real line |
520 # would be a last chunk all by itself. | 533 # would be a last chunk all by itself. |
521 if chunks[-1].byte != penult.offset: | 534 if chunks[-1].byte != penult.offset: |
522 ex = -self.code.co_firstlineno | 535 ex = -self.code.co_firstlineno |
523 # Split the last chunk | 536 # Split the last chunk |
524 last_chunk = chunks[-1] | 537 last_chunk = chunks[-1] |
525 last_chunk.exits.remove(ex) | 538 last_chunk.exits.remove(ex) |
526 last_chunk.exits.add(penult.offset) | 539 last_chunk.exits.add(penult.offset) |
527 chunk = Chunk(penult.offset) | 540 chunk = Chunk( |
| 541 penult.offset, last_chunk.line, False |
| 542 ) |
528 chunk.exits.add(ex) | 543 chunk.exits.add(ex) |
529 chunks.append(chunk) | 544 chunks.append(chunk) |
530 | 545 |
531 # Give all the chunks a length. | 546 # Give all the chunks a length. |
532 chunks[-1].length = bc.next_offset - chunks[-1].byte # pylint: disab
le=W0631,C0301 | 547 chunks[-1].length = bc.next_offset - chunks[-1].byte # pylint: disab
le=W0631,C0301 |
533 for i in range(len(chunks)-1): | 548 for i in range(len(chunks)-1): |
534 chunks[i].length = chunks[i+1].byte - chunks[i].byte | 549 chunks[i].length = chunks[i+1].byte - chunks[i].byte |
535 | 550 |
| 551 #self.validate_chunks(chunks) |
536 return chunks | 552 return chunks |
537 | 553 |
| 554 def validate_chunks(self, chunks): |
| 555 """Validate the rule that chunks have a single entrance.""" |
| 556 # starts is the entrances to the chunks |
| 557 starts = set([ch.byte for ch in chunks]) |
| 558 for ch in chunks: |
| 559 assert all([(ex in starts or ex < 0) for ex in ch.exits]) |
| 560 |
538 def _arcs(self): | 561 def _arcs(self): |
539 """Find the executable arcs in the code. | 562 """Find the executable arcs in the code. |
540 | 563 |
541 Returns a set of pairs, (from,to). From and to are integer line | 564 Yields pairs: (from,to). From and to are integer line numbers. If |
542 numbers. If from is < 0, then the arc is an entrance into the code | 565 from is < 0, then the arc is an entrance into the code object. If to |
543 object. If to is < 0, the arc is an exit from the code object. | 566 is < 0, the arc is an exit from the code object. |
544 | 567 |
545 """ | 568 """ |
546 chunks = self._split_into_chunks() | 569 chunks = self._split_into_chunks() |
547 | 570 |
548 # A map from byte offsets to chunks jumped into. | 571 # A map from byte offsets to chunks jumped into. |
549 byte_chunks = dict([(c.byte, c) for c in chunks]) | 572 byte_chunks = dict([(c.byte, c) for c in chunks]) |
550 | 573 |
551 # Build a map from byte offsets to actual lines reached. | 574 # There's always an entrance at the first chunk. |
552 byte_lines = {} | 575 yield (-1, byte_chunks[0].line) |
553 bytes_to_add = set([c.byte for c in chunks]) | |
554 | 576 |
555 while bytes_to_add: | 577 # Traverse from the first chunk in each line, and yield arcs where |
556 byte_to_add = bytes_to_add.pop() | 578 # the trace function will be invoked. |
557 if byte_to_add in byte_lines or byte_to_add < 0: | 579 for chunk in chunks: |
| 580 if not chunk.first: |
558 continue | 581 continue |
559 | 582 |
560 # Which lines does this chunk lead to? | 583 chunks_considered = set() |
561 bytes_considered = set() | 584 chunks_to_consider = [chunk] |
562 bytes_to_consider = [byte_to_add] | 585 while chunks_to_consider: |
563 lines = set() | 586 # Get the chunk we're considering, and make sure we don't |
| 587 # consider it again |
| 588 this_chunk = chunks_to_consider.pop() |
| 589 chunks_considered.add(this_chunk) |
564 | 590 |
565 while bytes_to_consider: | 591 # For each exit, add the line number if the trace function |
566 byte = bytes_to_consider.pop() | 592 # would be triggered, or add the chunk to those being |
567 bytes_considered.add(byte) | 593 # considered if not. |
| 594 for ex in this_chunk.exits: |
| 595 if ex < 0: |
| 596 yield (chunk.line, ex) |
| 597 else: |
| 598 next_chunk = byte_chunks[ex] |
| 599 if next_chunk in chunks_considered: |
| 600 continue |
568 | 601 |
569 # Find chunk for byte | 602 # The trace function is invoked if visiting the first |
570 try: | 603 # bytecode in a line, or if the transition is a |
571 ch = byte_chunks[byte] | 604 # backward jump. |
572 except KeyError: | 605 backward_jump = next_chunk.byte < this_chunk.byte |
573 for ch in chunks: | 606 if next_chunk.first or backward_jump: |
574 if ch.byte <= byte < ch.byte+ch.length: | 607 if next_chunk.line != chunk.line: |
575 break | 608 yield (chunk.line, next_chunk.line) |
576 else: | 609 else: |
577 # No chunk for this byte! | 610 chunks_to_consider.append(next_chunk) |
578 raise Exception("Couldn't find chunk @ %d" % byte) | |
579 byte_chunks[byte] = ch # pylint: disable=W0631 | |
580 | |
581 if ch.line: | |
582 lines.add(ch.line) | |
583 else: | |
584 for ex in ch.exits: | |
585 if ex < 0: | |
586 lines.add(ex) | |
587 elif ex not in bytes_considered: | |
588 bytes_to_consider.append(ex) | |
589 | |
590 bytes_to_add.update(ch.exits) | |
591 | |
592 byte_lines[byte_to_add] = lines | |
593 | |
594 # Figure out for each chunk where the exits go. | |
595 arcs = set() | |
596 for chunk in chunks: | |
597 if chunk.line: | |
598 for ex in chunk.exits: | |
599 if ex < 0: | |
600 exit_lines = [ex] | |
601 else: | |
602 exit_lines = byte_lines[ex] | |
603 for exit_line in exit_lines: | |
604 if chunk.line != exit_line: | |
605 arcs.add((chunk.line, exit_line)) | |
606 for line in byte_lines[0]: | |
607 arcs.add((-1, line)) | |
608 | |
609 return arcs | |
610 | 611 |
611 def _all_chunks(self): | 612 def _all_chunks(self): |
612 """Returns a list of `Chunk` objects for this code and its children. | 613 """Returns a list of `Chunk` objects for this code and its children. |
613 | 614 |
614 See `_split_into_chunks` for details. | 615 See `_split_into_chunks` for details. |
615 | 616 |
616 """ | 617 """ |
617 chunks = [] | 618 chunks = [] |
618 for bp in self.child_parsers(): | 619 for bp in self.child_parsers(): |
619 chunks.extend(bp._split_into_chunks()) | 620 chunks.extend(bp._split_into_chunks()) |
620 | 621 |
621 return chunks | 622 return chunks |
622 | 623 |
623 def _all_arcs(self): | 624 def _all_arcs(self): |
624 """Get the set of all arcs in this code object and its children. | 625 """Get the set of all arcs in this code object and its children. |
625 | 626 |
626 See `_arcs` for details. | 627 See `_arcs` for details. |
627 | 628 |
628 """ | 629 """ |
629 arcs = set() | 630 arcs = set() |
630 for bp in self.child_parsers(): | 631 for bp in self.child_parsers(): |
631 arcs.update(bp._arcs()) | 632 arcs.update(bp._arcs()) |
632 | 633 |
633 return arcs | 634 return arcs |
634 | 635 |
635 | 636 |
636 class Chunk(object): | 637 class Chunk(object): |
637 """A sequence of bytecodes with a single entrance. | 638 """A sequence of byte codes with a single entrance. |
638 | 639 |
639 To analyze byte code, we have to divide it into chunks, sequences of byte | 640 To analyze byte code, we have to divide it into chunks, sequences of byte |
640 codes such that each basic block has only one entrance, the first | 641 codes such that each chunk has only one entrance, the first instruction in |
641 instruction in the block. | 642 the block. |
642 | 643 |
643 This is almost the CS concept of `basic block`_, except that we're willing | 644 This is almost the CS concept of `basic block`_, except that we're willing |
644 to have many exits from a chunk, and "basic block" is a more cumbersome | 645 to have many exits from a chunk, and "basic block" is a more cumbersome |
645 term. | 646 term. |
646 | 647 |
647 .. _basic block: http://en.wikipedia.org/wiki/Basic_block | 648 .. _basic block: http://en.wikipedia.org/wiki/Basic_block |
648 | 649 |
| 650 `line` is the source line number containing this chunk. |
| 651 |
| 652 `first` is true if this is the first chunk in the source line. |
| 653 |
649 An exit < 0 means the chunk can leave the code (return). The exit is | 654 An exit < 0 means the chunk can leave the code (return). The exit is |
650 the negative of the starting line number of the code block. | 655 the negative of the starting line number of the code block. |
651 | 656 |
652 """ | 657 """ |
653 def __init__(self, byte, line=0): | 658 def __init__(self, byte, line, first): |
654 self.byte = byte | 659 self.byte = byte |
655 self.line = line | 660 self.line = line |
| 661 self.first = first |
656 self.length = 0 | 662 self.length = 0 |
657 self.exits = set() | 663 self.exits = set() |
658 | 664 |
659 def __repr__(self): | 665 def __repr__(self): |
660 return "<%d+%d @%d %r>" % ( | 666 if self.first: |
661 self.byte, self.length, self.line, list(self.exits) | 667 bang = "!" |
| 668 else: |
| 669 bang = "" |
| 670 return "<%d+%d @%d%s %r>" % ( |
| 671 self.byte, self.length, self.line, bang, list(self.exits) |
662 ) | 672 ) |
| 673 |
| 674 |
| 675 class CachedTokenizer(object): |
| 676 """A one-element cache around tokenize.generate_tokens. |
| 677 |
| 678 When reporting, coverage.py tokenizes files twice, once to find the |
| 679 structure of the file, and once to syntax-color it. Tokenizing is |
| 680 expensive, and easily cached. |
| 681 |
| 682 This is a one-element cache so that our twice-in-a-row tokenizing doesn't |
| 683 actually tokenize twice. |
| 684 |
| 685 """ |
| 686 def __init__(self): |
| 687 self.last_text = None |
| 688 self.last_tokens = None |
| 689 |
| 690 def generate_tokens(self, text): |
| 691 """A stand-in for `tokenize.generate_tokens`.""" |
| 692 if text != self.last_text: |
| 693 self.last_text = text |
| 694 self.last_tokens = list( |
| 695 tokenize.generate_tokens(StringIO(text).readline) |
| 696 ) |
| 697 return self.last_tokens |
| 698 |
| 699 # Create our generate_tokens cache as a callable replacement function. |
| 700 generate_tokens = CachedTokenizer().generate_tokens |
OLD | NEW |