OLD | NEW |
(Empty) | |
| 1 """Code parsing for Coverage.""" |
| 2 |
| 3 import dis, re, sys, token, tokenize |
| 4 |
| 5 from coverage.backward import set, sorted, StringIO # pylint: disable=W0622 |
| 6 from coverage.backward import open_source, range # pylint: disable=W0622 |
| 7 from coverage.backward import reversed # pylint: disable=W0622 |
| 8 from coverage.backward import bytes_to_ints |
| 9 from coverage.bytecode import ByteCodes, CodeObjects |
| 10 from coverage.misc import nice_pair, expensive, join_regex |
| 11 from coverage.misc import CoverageException, NoSource, NotPython |
| 12 |
| 13 |
| 14 class CodeParser(object): |
| 15 """Parse code to find executable lines, excluded lines, etc.""" |
| 16 |
| 17 def __init__(self, text=None, filename=None, exclude=None): |
| 18 """ |
| 19 Source can be provided as `text`, the text itself, or `filename`, from |
| 20 which the text will be read. Excluded lines are those that match |
| 21 `exclude`, a regex. |
| 22 |
| 23 """ |
| 24 assert text or filename, "CodeParser needs either text or filename" |
| 25 self.filename = filename or "<code>" |
| 26 self.text = text |
| 27 if not self.text: |
| 28 try: |
| 29 sourcef = open_source(self.filename) |
| 30 try: |
| 31 self.text = sourcef.read() |
| 32 finally: |
| 33 sourcef.close() |
| 34 except IOError: |
| 35 _, err, _ = sys.exc_info() |
| 36 raise NoSource( |
| 37 "No source for code: '%s': %s" % (self.filename, err) |
| 38 ) |
| 39 |
| 40 # Scrap the BOM if it exists. |
| 41 if self.text and ord(self.text[0]) == 0xfeff: |
| 42 self.text = self.text[1:] |
| 43 |
| 44 self.exclude = exclude |
| 45 |
| 46 self.show_tokens = False |
| 47 |
| 48 # The text lines of the parsed code. |
| 49 self.lines = self.text.split('\n') |
| 50 |
| 51 # The line numbers of excluded lines of code. |
| 52 self.excluded = set() |
| 53 |
| 54 # The line numbers of docstring lines. |
| 55 self.docstrings = set() |
| 56 |
| 57 # The line numbers of class definitions. |
| 58 self.classdefs = set() |
| 59 |
| 60 # A dict mapping line numbers to (lo,hi) for multi-line statements. |
| 61 self.multiline = {} |
| 62 |
| 63 # The line numbers that start statements. |
| 64 self.statement_starts = set() |
| 65 |
| 66 # Lazily-created ByteParser |
| 67 self._byte_parser = None |
| 68 |
| 69 def _get_byte_parser(self): |
| 70 """Create a ByteParser on demand.""" |
| 71 if not self._byte_parser: |
| 72 self._byte_parser = \ |
| 73 ByteParser(text=self.text, filename=self.filename) |
| 74 return self._byte_parser |
| 75 byte_parser = property(_get_byte_parser) |
| 76 |
| 77 def lines_matching(self, *regexes): |
| 78 """Find the lines matching one of a list of regexes. |
| 79 |
| 80 Returns a set of line numbers, the lines that contain a match for one |
| 81 of the regexes in `regexes`. The entire line needn't match, just a |
| 82 part of it. |
| 83 |
| 84 """ |
| 85 regex_c = re.compile(join_regex(regexes)) |
| 86 matches = set() |
| 87 for i, ltext in enumerate(self.lines): |
| 88 if regex_c.search(ltext): |
| 89 matches.add(i+1) |
| 90 return matches |
| 91 |
| 92 def _raw_parse(self): |
| 93 """Parse the source to find the interesting facts about its lines. |
| 94 |
| 95 A handful of member fields are updated. |
| 96 |
| 97 """ |
| 98 # Find lines which match an exclusion pattern. |
| 99 if self.exclude: |
| 100 self.excluded = self.lines_matching(self.exclude) |
| 101 |
| 102 # Tokenize, to find excluded suites, to find docstrings, and to find |
| 103 # multi-line statements. |
| 104 indent = 0 |
| 105 exclude_indent = 0 |
| 106 excluding = False |
| 107 prev_toktype = token.INDENT |
| 108 first_line = None |
| 109 empty = True |
| 110 |
| 111 tokgen = tokenize.generate_tokens(StringIO(self.text).readline) |
| 112 for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: |
| 113 if self.show_tokens: # pragma: not covered |
| 114 print("%10s %5s %-20r %r" % ( |
| 115 tokenize.tok_name.get(toktype, toktype), |
| 116 nice_pair((slineno, elineno)), ttext, ltext |
| 117 )) |
| 118 if toktype == token.INDENT: |
| 119 indent += 1 |
| 120 elif toktype == token.DEDENT: |
| 121 indent -= 1 |
| 122 elif toktype == token.NAME and ttext == 'class': |
| 123 # Class definitions look like branches in the byte code, so |
| 124 # we need to exclude them. The simplest way is to note the |
| 125 # lines with the 'class' keyword. |
| 126 self.classdefs.add(slineno) |
| 127 elif toktype == token.OP and ttext == ':': |
| 128 if not excluding and elineno in self.excluded: |
| 129 # Start excluding a suite. We trigger off of the colon |
| 130 # token so that the #pragma comment will be recognized on |
| 131 # the same line as the colon. |
| 132 exclude_indent = indent |
| 133 excluding = True |
| 134 elif toktype == token.STRING and prev_toktype == token.INDENT: |
| 135 # Strings that are first on an indented line are docstrings. |
| 136 # (a trick from trace.py in the stdlib.) This works for |
| 137 # 99.9999% of cases. For the rest (!) see: |
| 138 # http://stackoverflow.com/questions/1769332/x/1769794#1769794 |
| 139 self.docstrings.update(range(slineno, elineno+1)) |
| 140 elif toktype == token.NEWLINE: |
| 141 if first_line is not None and elineno != first_line: |
| 142 # We're at the end of a line, and we've ended on a |
| 143 # different line than the first line of the statement, |
| 144 # so record a multi-line range. |
| 145 rng = (first_line, elineno) |
| 146 for l in range(first_line, elineno+1): |
| 147 self.multiline[l] = rng |
| 148 first_line = None |
| 149 |
| 150 if ttext.strip() and toktype != tokenize.COMMENT: |
| 151 # A non-whitespace token. |
| 152 empty = False |
| 153 if first_line is None: |
| 154 # The token is not whitespace, and is the first in a |
| 155 # statement. |
| 156 first_line = slineno |
| 157 # Check whether to end an excluded suite. |
| 158 if excluding and indent <= exclude_indent: |
| 159 excluding = False |
| 160 if excluding: |
| 161 self.excluded.add(elineno) |
| 162 |
| 163 prev_toktype = toktype |
| 164 |
| 165 # Find the starts of the executable statements. |
| 166 if not empty: |
| 167 self.statement_starts.update(self.byte_parser._find_statements()) |
| 168 |
| 169 def first_line(self, line): |
| 170 """Return the first line number of the statement including `line`.""" |
| 171 rng = self.multiline.get(line) |
| 172 if rng: |
| 173 first_line = rng[0] |
| 174 else: |
| 175 first_line = line |
| 176 return first_line |
| 177 |
| 178 def first_lines(self, lines, ignore=None): |
| 179 """Map the line numbers in `lines` to the correct first line of the |
| 180 statement. |
| 181 |
| 182 Skip any line mentioned in `ignore`. |
| 183 |
| 184 Returns a sorted list of the first lines. |
| 185 |
| 186 """ |
| 187 ignore = ignore or [] |
| 188 lset = set() |
| 189 for l in lines: |
| 190 if l in ignore: |
| 191 continue |
| 192 new_l = self.first_line(l) |
| 193 if new_l not in ignore: |
| 194 lset.add(new_l) |
| 195 return sorted(lset) |
| 196 |
| 197 def parse_source(self): |
| 198 """Parse source text to find executable lines, excluded lines, etc. |
| 199 |
| 200 Return values are 1) a sorted list of executable line numbers, and |
| 201 2) a sorted list of excluded line numbers. |
| 202 |
| 203 Reported line numbers are normalized to the first line of multi-line |
| 204 statements. |
| 205 |
| 206 """ |
| 207 try: |
| 208 self._raw_parse() |
| 209 except (tokenize.TokenError, IndentationError): |
| 210 _, tokerr, _ = sys.exc_info() |
| 211 msg, lineno = tokerr.args |
| 212 raise NotPython( |
| 213 "Couldn't parse '%s' as Python source: '%s' at %s" % |
| 214 (self.filename, msg, lineno) |
| 215 ) |
| 216 |
| 217 excluded_lines = self.first_lines(self.excluded) |
| 218 ignore = excluded_lines + list(self.docstrings) |
| 219 lines = self.first_lines(self.statement_starts, ignore) |
| 220 |
| 221 return lines, excluded_lines |
| 222 |
| 223 def arcs(self): |
| 224 """Get information about the arcs available in the code. |
| 225 |
| 226 Returns a sorted list of line number pairs. Line numbers have been |
| 227 normalized to the first line of multiline statements. |
| 228 |
| 229 """ |
| 230 all_arcs = [] |
| 231 for l1, l2 in self.byte_parser._all_arcs(): |
| 232 fl1 = self.first_line(l1) |
| 233 fl2 = self.first_line(l2) |
| 234 if fl1 != fl2: |
| 235 all_arcs.append((fl1, fl2)) |
| 236 return sorted(all_arcs) |
| 237 arcs = expensive(arcs) |
| 238 |
| 239 def exit_counts(self): |
| 240 """Get a mapping from line numbers to count of exits from that line. |
| 241 |
| 242 Excluded lines are excluded. |
| 243 |
| 244 """ |
| 245 excluded_lines = self.first_lines(self.excluded) |
| 246 exit_counts = {} |
| 247 for l1, l2 in self.arcs(): |
| 248 if l1 < 0: |
| 249 # Don't ever report -1 as a line number |
| 250 continue |
| 251 if l1 in excluded_lines: |
| 252 # Don't report excluded lines as line numbers. |
| 253 continue |
| 254 if l2 in excluded_lines: |
| 255 # Arcs to excluded lines shouldn't count. |
| 256 continue |
| 257 if l1 not in exit_counts: |
| 258 exit_counts[l1] = 0 |
| 259 exit_counts[l1] += 1 |
| 260 |
| 261 # Class definitions have one extra exit, so remove one for each: |
| 262 for l in self.classdefs: |
| 263 # Ensure key is there: classdefs can include excluded lines. |
| 264 if l in exit_counts: |
| 265 exit_counts[l] -= 1 |
| 266 |
| 267 return exit_counts |
| 268 exit_counts = expensive(exit_counts) |
| 269 |
| 270 |
| 271 ## Opcodes that guide the ByteParser. |
| 272 |
| 273 def _opcode(name): |
| 274 """Return the opcode by name from the dis module.""" |
| 275 return dis.opmap[name] |
| 276 |
| 277 def _opcode_set(*names): |
| 278 """Return a set of opcodes by the names in `names`.""" |
| 279 s = set() |
| 280 for name in names: |
| 281 try: |
| 282 s.add(_opcode(name)) |
| 283 except KeyError: |
| 284 pass |
| 285 return s |
| 286 |
| 287 # Opcodes that leave the code object. |
| 288 OPS_CODE_END = _opcode_set('RETURN_VALUE') |
| 289 |
| 290 # Opcodes that unconditionally end the code chunk. |
| 291 OPS_CHUNK_END = _opcode_set( |
| 292 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'RETURN_VALUE', 'RAISE_VARARGS', |
| 293 'BREAK_LOOP', 'CONTINUE_LOOP', |
| 294 ) |
| 295 |
| 296 # Opcodes that unconditionally begin a new code chunk. By starting new chunks |
| 297 # with unconditional jump instructions, we neatly deal with jumps to jumps |
| 298 # properly. |
| 299 OPS_CHUNK_BEGIN = _opcode_set('JUMP_ABSOLUTE', 'JUMP_FORWARD') |
| 300 |
| 301 # Opcodes that push a block on the block stack. |
| 302 OPS_PUSH_BLOCK = _opcode_set( |
| 303 'SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY', 'SETUP_WITH' |
| 304 ) |
| 305 |
| 306 # Block types for exception handling. |
| 307 OPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY') |
| 308 |
| 309 # Opcodes that pop a block from the block stack. |
| 310 OPS_POP_BLOCK = _opcode_set('POP_BLOCK') |
| 311 |
| 312 # Opcodes that have a jump destination, but aren't really a jump. |
| 313 OPS_NO_JUMP = OPS_PUSH_BLOCK |
| 314 |
| 315 # Individual opcodes we need below. |
| 316 OP_BREAK_LOOP = _opcode('BREAK_LOOP') |
| 317 OP_END_FINALLY = _opcode('END_FINALLY') |
| 318 OP_COMPARE_OP = _opcode('COMPARE_OP') |
| 319 COMPARE_EXCEPTION = 10 # just have to get this const from the code. |
| 320 OP_LOAD_CONST = _opcode('LOAD_CONST') |
| 321 OP_RETURN_VALUE = _opcode('RETURN_VALUE') |
| 322 |
| 323 |
| 324 class ByteParser(object): |
| 325 """Parse byte codes to understand the structure of code.""" |
| 326 |
| 327 def __init__(self, code=None, text=None, filename=None): |
| 328 if code: |
| 329 self.code = code |
| 330 self.text = text |
| 331 else: |
| 332 if not text: |
| 333 assert filename, "If no code or text, need a filename" |
| 334 sourcef = open_source(filename) |
| 335 try: |
| 336 text = sourcef.read() |
| 337 finally: |
| 338 sourcef.close() |
| 339 self.text = text |
| 340 |
| 341 try: |
| 342 # Python 2.3 and 2.4 don't like partial last lines, so be sure |
| 343 # the text ends nicely for them. |
| 344 self.code = compile(text + '\n', filename, "exec") |
| 345 except SyntaxError: |
| 346 _, synerr, _ = sys.exc_info() |
| 347 raise NotPython( |
| 348 "Couldn't parse '%s' as Python source: '%s' at line %d" % |
| 349 (filename, synerr.msg, synerr.lineno) |
| 350 ) |
| 351 |
| 352 # Alternative Python implementations don't always provide all the |
| 353 # attributes on code objects that we need to do the analysis. |
| 354 for attr in ['co_lnotab', 'co_firstlineno', 'co_consts', 'co_code']: |
| 355 if not hasattr(self.code, attr): |
| 356 raise CoverageException( |
| 357 "This implementation of Python doesn't support code " |
| 358 "analysis.\n" |
| 359 "Run coverage.py under CPython for this command." |
| 360 ) |
| 361 |
| 362 def child_parsers(self): |
| 363 """Iterate over all the code objects nested within this one. |
| 364 |
| 365 The iteration includes `self` as its first value. |
| 366 |
| 367 """ |
| 368 children = CodeObjects(self.code) |
| 369 return [ByteParser(code=c, text=self.text) for c in children] |
| 370 |
| 371 def _bytes_lines(self): |
| 372 """Map byte offsets to line numbers in `code`. |
| 373 |
| 374 Uses co_lnotab described in Python/compile.c to map byte offsets to |
| 375 line numbers. Produces a sequence: (b0, l0), (b1, l1), ... |
| 376 |
| 377 Only byte offsets that correspond to line numbers are included in the |
| 378 results. |
| 379 |
| 380 """ |
| 381 # Adapted from dis.py in the standard library. |
| 382 byte_increments = bytes_to_ints(self.code.co_lnotab[0::2]) |
| 383 line_increments = bytes_to_ints(self.code.co_lnotab[1::2]) |
| 384 |
| 385 last_line_num = None |
| 386 line_num = self.code.co_firstlineno |
| 387 byte_num = 0 |
| 388 for byte_incr, line_incr in zip(byte_increments, line_increments): |
| 389 if byte_incr: |
| 390 if line_num != last_line_num: |
| 391 yield (byte_num, line_num) |
| 392 last_line_num = line_num |
| 393 byte_num += byte_incr |
| 394 line_num += line_incr |
| 395 if line_num != last_line_num: |
| 396 yield (byte_num, line_num) |
| 397 |
| 398 def _find_statements(self): |
| 399 """Find the statements in `self.code`. |
| 400 |
| 401 Produce a sequence of line numbers that start statements. Recurses |
| 402 into all code objects reachable from `self.code`. |
| 403 |
| 404 """ |
| 405 for bp in self.child_parsers(): |
| 406 # Get all of the lineno information from this code. |
| 407 for _, l in bp._bytes_lines(): |
| 408 yield l |
| 409 |
| 410 def _block_stack_repr(self, block_stack): |
| 411 """Get a string version of `block_stack`, for debugging.""" |
| 412 blocks = ", ".join( |
| 413 ["(%s, %r)" % (dis.opname[b[0]], b[1]) for b in block_stack] |
| 414 ) |
| 415 return "[" + blocks + "]" |
| 416 |
| 417 def _split_into_chunks(self): |
| 418 """Split the code object into a list of `Chunk` objects. |
| 419 |
| 420 Each chunk is only entered at its first instruction, though there can |
| 421 be many exits from a chunk. |
| 422 |
| 423 Returns a list of `Chunk` objects. |
| 424 |
| 425 """ |
| 426 # The list of chunks so far, and the one we're working on. |
| 427 chunks = [] |
| 428 chunk = None |
| 429 |
| 430 # A dict mapping byte offsets of line starts to the line numbers. |
| 431 bytes_lines_map = dict(self._bytes_lines()) |
| 432 |
| 433 # The block stack: loops and try blocks get pushed here for the |
| 434 # implicit jumps that can occur. |
| 435 # Each entry is a tuple: (block type, destination) |
| 436 block_stack = [] |
| 437 |
| 438 # Some op codes are followed by branches that should be ignored. This |
| 439 # is a count of how many ignores are left. |
| 440 ignore_branch = 0 |
| 441 |
| 442 # We have to handle the last two bytecodes specially. |
| 443 ult = penult = None |
| 444 |
| 445 # Get a set of all of the jump-to points. |
| 446 jump_to = set() |
| 447 for bc in ByteCodes(self.code.co_code): |
| 448 if bc.jump_to >= 0: |
| 449 jump_to.add(bc.jump_to) |
| 450 |
| 451 chunk_lineno = 0 |
| 452 |
| 453 # Walk the byte codes building chunks. |
| 454 for bc in ByteCodes(self.code.co_code): |
| 455 # Maybe have to start a new chunk |
| 456 start_new_chunk = False |
| 457 first_chunk = False |
| 458 if bc.offset in bytes_lines_map: |
| 459 # Start a new chunk for each source line number. |
| 460 start_new_chunk = True |
| 461 chunk_lineno = bytes_lines_map[bc.offset] |
| 462 first_chunk = True |
| 463 elif bc.offset in jump_to: |
| 464 # To make chunks have a single entrance, we have to make a new |
| 465 # chunk when we get to a place some bytecode jumps to. |
| 466 start_new_chunk = True |
| 467 elif bc.op in OPS_CHUNK_BEGIN: |
| 468 # Jumps deserve their own unnumbered chunk. This fixes |
| 469 # problems with jumps to jumps getting confused. |
| 470 start_new_chunk = True |
| 471 |
| 472 if not chunk or start_new_chunk: |
| 473 if chunk: |
| 474 chunk.exits.add(bc.offset) |
| 475 chunk = Chunk(bc.offset, chunk_lineno, first_chunk) |
| 476 chunks.append(chunk) |
| 477 |
| 478 # Look at the opcode |
| 479 if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP: |
| 480 if ignore_branch: |
| 481 # Someone earlier wanted us to ignore this branch. |
| 482 ignore_branch -= 1 |
| 483 else: |
| 484 # The opcode has a jump, it's an exit for this chunk. |
| 485 chunk.exits.add(bc.jump_to) |
| 486 |
| 487 if bc.op in OPS_CODE_END: |
| 488 # The opcode can exit the code object. |
| 489 chunk.exits.add(-self.code.co_firstlineno) |
| 490 if bc.op in OPS_PUSH_BLOCK: |
| 491 # The opcode adds a block to the block_stack. |
| 492 block_stack.append((bc.op, bc.jump_to)) |
| 493 if bc.op in OPS_POP_BLOCK: |
| 494 # The opcode pops a block from the block stack. |
| 495 block_stack.pop() |
| 496 if bc.op in OPS_CHUNK_END: |
| 497 # This opcode forces the end of the chunk. |
| 498 if bc.op == OP_BREAK_LOOP: |
| 499 # A break is implicit: jump where the top of the |
| 500 # block_stack points. |
| 501 chunk.exits.add(block_stack[-1][1]) |
| 502 chunk = None |
| 503 if bc.op == OP_END_FINALLY: |
| 504 # For the finally clause we need to find the closest exception |
| 505 # block, and use its jump target as an exit. |
| 506 for block in reversed(block_stack): |
| 507 if block[0] in OPS_EXCEPT_BLOCKS: |
| 508 chunk.exits.add(block[1]) |
| 509 break |
| 510 if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION: |
| 511 # This is an except clause. We want to overlook the next |
| 512 # branch, so that except's don't count as branches. |
| 513 ignore_branch += 1 |
| 514 |
| 515 penult = ult |
| 516 ult = bc |
| 517 |
| 518 if chunks: |
| 519 # The last two bytecodes could be a dummy "return None" that |
| 520 # shouldn't be counted as real code. Every Python code object seems |
| 521 # to end with a return, and a "return None" is inserted if there |
| 522 # isn't an explicit return in the source. |
| 523 if ult and penult: |
| 524 if penult.op == OP_LOAD_CONST and ult.op == OP_RETURN_VALUE: |
| 525 if self.code.co_consts[penult.arg] is None: |
| 526 # This is "return None", but is it dummy? A real line |
| 527 # would be a last chunk all by itself. |
| 528 if chunks[-1].byte != penult.offset: |
| 529 ex = -self.code.co_firstlineno |
| 530 # Split the last chunk |
| 531 last_chunk = chunks[-1] |
| 532 last_chunk.exits.remove(ex) |
| 533 last_chunk.exits.add(penult.offset) |
| 534 chunk = Chunk( |
| 535 penult.offset, last_chunk.line, False |
| 536 ) |
| 537 chunk.exits.add(ex) |
| 538 chunks.append(chunk) |
| 539 |
| 540 # Give all the chunks a length. |
| 541 chunks[-1].length = bc.next_offset - chunks[-1].byte # pylint: disab
le=W0631,C0301 |
| 542 for i in range(len(chunks)-1): |
| 543 chunks[i].length = chunks[i+1].byte - chunks[i].byte |
| 544 |
| 545 #self.validate_chunks(chunks) |
| 546 return chunks |
| 547 |
| 548 def validate_chunks(self, chunks): |
| 549 """Validate the rule that chunks have a single entrance.""" |
| 550 # starts is the entrances to the chunks |
| 551 starts = set([ch.byte for ch in chunks]) |
| 552 for ch in chunks: |
| 553 assert all([(ex in starts or ex < 0) for ex in ch.exits]) |
| 554 |
| 555 def _arcs(self): |
| 556 """Find the executable arcs in the code. |
| 557 |
| 558 Yields pairs: (from,to). From and to are integer line numbers. If |
| 559 from is < 0, then the arc is an entrance into the code object. If to |
| 560 is < 0, the arc is an exit from the code object. |
| 561 |
| 562 """ |
| 563 chunks = self._split_into_chunks() |
| 564 |
| 565 # A map from byte offsets to chunks jumped into. |
| 566 byte_chunks = dict([(c.byte, c) for c in chunks]) |
| 567 |
| 568 # There's always an entrance at the first chunk. |
| 569 yield (-1, byte_chunks[0].line) |
| 570 |
| 571 # Traverse from the first chunk in each line, and yield arcs where |
| 572 # the trace function will be invoked. |
| 573 for chunk in chunks: |
| 574 if not chunk.first: |
| 575 continue |
| 576 |
| 577 chunks_considered = set() |
| 578 chunks_to_consider = [chunk] |
| 579 while chunks_to_consider: |
| 580 # Get the chunk we're considering, and make sure we don't |
| 581 # consider it again |
| 582 this_chunk = chunks_to_consider.pop() |
| 583 chunks_considered.add(this_chunk) |
| 584 |
| 585 # For each exit, add the line number if the trace function |
| 586 # would be triggered, or add the chunk to those being |
| 587 # considered if not. |
| 588 for ex in this_chunk.exits: |
| 589 if ex < 0: |
| 590 yield (chunk.line, ex) |
| 591 else: |
| 592 next_chunk = byte_chunks[ex] |
| 593 if next_chunk in chunks_considered: |
| 594 continue |
| 595 |
| 596 # The trace function is invoked if visiting the first |
| 597 # bytecode in a line, or if the transition is a |
| 598 # backward jump. |
| 599 backward_jump = next_chunk.byte < this_chunk.byte |
| 600 if next_chunk.first or backward_jump: |
| 601 if next_chunk.line != chunk.line: |
| 602 yield (chunk.line, next_chunk.line) |
| 603 else: |
| 604 chunks_to_consider.append(next_chunk) |
| 605 |
| 606 def _all_chunks(self): |
| 607 """Returns a list of `Chunk` objects for this code and its children. |
| 608 |
| 609 See `_split_into_chunks` for details. |
| 610 |
| 611 """ |
| 612 chunks = [] |
| 613 for bp in self.child_parsers(): |
| 614 chunks.extend(bp._split_into_chunks()) |
| 615 |
| 616 return chunks |
| 617 |
| 618 def _all_arcs(self): |
| 619 """Get the set of all arcs in this code object and its children. |
| 620 |
| 621 See `_arcs` for details. |
| 622 |
| 623 """ |
| 624 arcs = set() |
| 625 for bp in self.child_parsers(): |
| 626 arcs.update(bp._arcs()) |
| 627 |
| 628 return arcs |
| 629 |
| 630 |
| 631 class Chunk(object): |
| 632 """A sequence of byte codes with a single entrance. |
| 633 |
| 634 To analyze byte code, we have to divide it into chunks, sequences of byte |
| 635 codes such that each chunk has only one entrance, the first instruction in |
| 636 the block. |
| 637 |
| 638 This is almost the CS concept of `basic block`_, except that we're willing |
| 639 to have many exits from a chunk, and "basic block" is a more cumbersome |
| 640 term. |
| 641 |
| 642 .. _basic block: http://en.wikipedia.org/wiki/Basic_block |
| 643 |
| 644 `line` is the source line number containing this chunk. |
| 645 |
| 646 `first` is true if this is the first chunk in the source line. |
| 647 |
| 648 An exit < 0 means the chunk can leave the code (return). The exit is |
| 649 the negative of the starting line number of the code block. |
| 650 |
| 651 """ |
| 652 def __init__(self, byte, line, first): |
| 653 self.byte = byte |
| 654 self.line = line |
| 655 self.first = first |
| 656 self.length = 0 |
| 657 self.exits = set() |
| 658 |
| 659 def __repr__(self): |
| 660 if self.first: |
| 661 bang = "!" |
| 662 else: |
| 663 bang = "" |
| 664 return "<%d+%d @%d%s %r>" % ( |
| 665 self.byte, self.length, self.line, bang, list(self.exits) |
| 666 ) |
OLD | NEW |