| OLD | NEW |
| (Empty) |
| 1 """Code parsing for Coverage.""" | |
| 2 | |
| 3 import opcode, re, sys, token, tokenize | |
| 4 | |
| 5 from coverage.backward import set, sorted, StringIO # pylint: disable=W0622 | |
| 6 from coverage.backward import open_source | |
| 7 from coverage.bytecode import ByteCodes, CodeObjects | |
| 8 from coverage.misc import nice_pair, expensive, join_regex | |
| 9 from coverage.misc import CoverageException, NoSource, NotPython | |
| 10 | |
| 11 | |
| 12 class CodeParser(object): | |
| 13 """Parse code to find executable lines, excluded lines, etc.""" | |
| 14 | |
| 15 def __init__(self, text=None, filename=None, exclude=None): | |
| 16 """ | |
| 17 Source can be provided as `text`, the text itself, or `filename`, from | |
| 18 which the text will be read. Excluded lines are those that match | |
| 19 `exclude`, a regex. | |
| 20 | |
| 21 """ | |
| 22 assert text or filename, "CodeParser needs either text or filename" | |
| 23 self.filename = filename or "<code>" | |
| 24 self.text = text | |
| 25 if not self.text: | |
| 26 try: | |
| 27 sourcef = open_source(self.filename) | |
| 28 try: | |
| 29 self.text = sourcef.read() | |
| 30 finally: | |
| 31 sourcef.close() | |
| 32 except IOError: | |
| 33 _, err, _ = sys.exc_info() | |
| 34 raise NoSource( | |
| 35 "No source for code: '%s': %s" % (self.filename, err) | |
| 36 ) | |
| 37 | |
| 38 # Scrap the BOM if it exists. | |
| 39 if self.text and ord(self.text[0]) == 0xfeff: | |
| 40 self.text = self.text[1:] | |
| 41 | |
| 42 self.exclude = exclude | |
| 43 | |
| 44 self.show_tokens = False | |
| 45 | |
| 46 # The text lines of the parsed code. | |
| 47 self.lines = self.text.split('\n') | |
| 48 | |
| 49 # The line numbers of excluded lines of code. | |
| 50 self.excluded = set() | |
| 51 | |
| 52 # The line numbers of docstring lines. | |
| 53 self.docstrings = set() | |
| 54 | |
| 55 # The line numbers of class definitions. | |
| 56 self.classdefs = set() | |
| 57 | |
| 58 # A dict mapping line numbers to (lo,hi) for multi-line statements. | |
| 59 self.multiline = {} | |
| 60 | |
| 61 # The line numbers that start statements. | |
| 62 self.statement_starts = set() | |
| 63 | |
| 64 # Lazily-created ByteParser | |
| 65 self._byte_parser = None | |
| 66 | |
| 67 def _get_byte_parser(self): | |
| 68 """Create a ByteParser on demand.""" | |
| 69 if not self._byte_parser: | |
| 70 self._byte_parser = \ | |
| 71 ByteParser(text=self.text, filename=self.filename) | |
| 72 return self._byte_parser | |
| 73 byte_parser = property(_get_byte_parser) | |
| 74 | |
| 75 def lines_matching(self, *regexes): | |
| 76 """Find the lines matching one of a list of regexes. | |
| 77 | |
| 78 Returns a set of line numbers, the lines that contain a match for one | |
| 79 of the regexes in `regexes`. The entire line needn't match, just a | |
| 80 part of it. | |
| 81 | |
| 82 """ | |
| 83 regex_c = re.compile(join_regex(regexes)) | |
| 84 matches = set() | |
| 85 for i, ltext in enumerate(self.lines): | |
| 86 if regex_c.search(ltext): | |
| 87 matches.add(i+1) | |
| 88 return matches | |
| 89 | |
| 90 def _raw_parse(self): | |
| 91 """Parse the source to find the interesting facts about its lines. | |
| 92 | |
| 93 A handful of member fields are updated. | |
| 94 | |
| 95 """ | |
| 96 # Find lines which match an exclusion pattern. | |
| 97 if self.exclude: | |
| 98 self.excluded = self.lines_matching(self.exclude) | |
| 99 | |
| 100 # Tokenize, to find excluded suites, to find docstrings, and to find | |
| 101 # multi-line statements. | |
| 102 indent = 0 | |
| 103 exclude_indent = 0 | |
| 104 excluding = False | |
| 105 prev_toktype = token.INDENT | |
| 106 first_line = None | |
| 107 empty = True | |
| 108 | |
| 109 tokgen = tokenize.generate_tokens(StringIO(self.text).readline) | |
| 110 for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: | |
| 111 if self.show_tokens: # pragma: not covered | |
| 112 print("%10s %5s %-20r %r" % ( | |
| 113 tokenize.tok_name.get(toktype, toktype), | |
| 114 nice_pair((slineno, elineno)), ttext, ltext | |
| 115 )) | |
| 116 if toktype == token.INDENT: | |
| 117 indent += 1 | |
| 118 elif toktype == token.DEDENT: | |
| 119 indent -= 1 | |
| 120 elif toktype == token.NAME and ttext == 'class': | |
| 121 # Class definitions look like branches in the byte code, so | |
| 122 # we need to exclude them. The simplest way is to note the | |
| 123 # lines with the 'class' keyword. | |
| 124 self.classdefs.add(slineno) | |
| 125 elif toktype == token.OP and ttext == ':': | |
| 126 if not excluding and elineno in self.excluded: | |
| 127 # Start excluding a suite. We trigger off of the colon | |
| 128 # token so that the #pragma comment will be recognized on | |
| 129 # the same line as the colon. | |
| 130 exclude_indent = indent | |
| 131 excluding = True | |
| 132 elif toktype == token.STRING and prev_toktype == token.INDENT: | |
| 133 # Strings that are first on an indented line are docstrings. | |
| 134 # (a trick from trace.py in the stdlib.) This works for | |
| 135 # 99.9999% of cases. For the rest (!) see: | |
| 136 # http://stackoverflow.com/questions/1769332/x/1769794#1769794 | |
| 137 for i in range(slineno, elineno+1): | |
| 138 self.docstrings.add(i) | |
| 139 elif toktype == token.NEWLINE: | |
| 140 if first_line is not None and elineno != first_line: | |
| 141 # We're at the end of a line, and we've ended on a | |
| 142 # different line than the first line of the statement, | |
| 143 # so record a multi-line range. | |
| 144 rng = (first_line, elineno) | |
| 145 for l in range(first_line, elineno+1): | |
| 146 self.multiline[l] = rng | |
| 147 first_line = None | |
| 148 | |
| 149 if ttext.strip() and toktype != tokenize.COMMENT: | |
| 150 # A non-whitespace token. | |
| 151 empty = False | |
| 152 if first_line is None: | |
| 153 # The token is not whitespace, and is the first in a | |
| 154 # statement. | |
| 155 first_line = slineno | |
| 156 # Check whether to end an excluded suite. | |
| 157 if excluding and indent <= exclude_indent: | |
| 158 excluding = False | |
| 159 if excluding: | |
| 160 self.excluded.add(elineno) | |
| 161 | |
| 162 prev_toktype = toktype | |
| 163 | |
| 164 # Find the starts of the executable statements. | |
| 165 if not empty: | |
| 166 self.statement_starts.update(self.byte_parser._find_statements()) | |
| 167 | |
| 168 def first_line(self, line): | |
| 169 """Return the first line number of the statement including `line`.""" | |
| 170 rng = self.multiline.get(line) | |
| 171 if rng: | |
| 172 first_line = rng[0] | |
| 173 else: | |
| 174 first_line = line | |
| 175 return first_line | |
| 176 | |
| 177 def first_lines(self, lines, ignore=None): | |
| 178 """Map the line numbers in `lines` to the correct first line of the | |
| 179 statement. | |
| 180 | |
| 181 Skip any line mentioned in `ignore`. | |
| 182 | |
| 183 Returns a sorted list of the first lines. | |
| 184 | |
| 185 """ | |
| 186 ignore = ignore or [] | |
| 187 lset = set() | |
| 188 for l in lines: | |
| 189 if l in ignore: | |
| 190 continue | |
| 191 new_l = self.first_line(l) | |
| 192 if new_l not in ignore: | |
| 193 lset.add(new_l) | |
| 194 return sorted(lset) | |
| 195 | |
| 196 def parse_source(self): | |
| 197 """Parse source text to find executable lines, excluded lines, etc. | |
| 198 | |
| 199 Return values are 1) a sorted list of executable line numbers, and | |
| 200 2) a sorted list of excluded line numbers. | |
| 201 | |
| 202 Reported line numbers are normalized to the first line of multi-line | |
| 203 statements. | |
| 204 | |
| 205 """ | |
| 206 try: | |
| 207 self._raw_parse() | |
| 208 except (tokenize.TokenError, IndentationError): | |
| 209 _, tokerr, _ = sys.exc_info() | |
| 210 msg, lineno = tokerr.args | |
| 211 raise NotPython( | |
| 212 "Couldn't parse '%s' as Python source: '%s' at %s" % | |
| 213 (self.filename, msg, lineno) | |
| 214 ) | |
| 215 | |
| 216 excluded_lines = self.first_lines(self.excluded) | |
| 217 ignore = excluded_lines + list(self.docstrings) | |
| 218 lines = self.first_lines(self.statement_starts, ignore) | |
| 219 | |
| 220 return lines, excluded_lines | |
| 221 | |
| 222 def arcs(self): | |
| 223 """Get information about the arcs available in the code. | |
| 224 | |
| 225 Returns a sorted list of line number pairs. Line numbers have been | |
| 226 normalized to the first line of multiline statements. | |
| 227 | |
| 228 """ | |
| 229 all_arcs = [] | |
| 230 for l1, l2 in self.byte_parser._all_arcs(): | |
| 231 fl1 = self.first_line(l1) | |
| 232 fl2 = self.first_line(l2) | |
| 233 if fl1 != fl2: | |
| 234 all_arcs.append((fl1, fl2)) | |
| 235 return sorted(all_arcs) | |
| 236 arcs = expensive(arcs) | |
| 237 | |
| 238 def exit_counts(self): | |
| 239 """Get a mapping from line numbers to count of exits from that line. | |
| 240 | |
| 241 Excluded lines are excluded. | |
| 242 | |
| 243 """ | |
| 244 excluded_lines = self.first_lines(self.excluded) | |
| 245 exit_counts = {} | |
| 246 for l1, l2 in self.arcs(): | |
| 247 if l1 < 0: | |
| 248 # Don't ever report -1 as a line number | |
| 249 continue | |
| 250 if l1 in excluded_lines: | |
| 251 # Don't report excluded lines as line numbers. | |
| 252 continue | |
| 253 if l2 in excluded_lines: | |
| 254 # Arcs to excluded lines shouldn't count. | |
| 255 continue | |
| 256 if l1 not in exit_counts: | |
| 257 exit_counts[l1] = 0 | |
| 258 exit_counts[l1] += 1 | |
| 259 | |
| 260 # Class definitions have one extra exit, so remove one for each: | |
| 261 for l in self.classdefs: | |
| 262 # Ensure key is there: classdefs can include excluded lines. | |
| 263 if l in exit_counts: | |
| 264 exit_counts[l] -= 1 | |
| 265 | |
| 266 return exit_counts | |
| 267 exit_counts = expensive(exit_counts) | |
| 268 | |
| 269 | |
| 270 ## Opcodes that guide the ByteParser. | |
| 271 | |
| 272 def _opcode(name): | |
| 273 """Return the opcode by name from the opcode module.""" | |
| 274 return opcode.opmap[name] | |
| 275 | |
| 276 def _opcode_set(*names): | |
| 277 """Return a set of opcodes by the names in `names`.""" | |
| 278 s = set() | |
| 279 for name in names: | |
| 280 try: | |
| 281 s.add(_opcode(name)) | |
| 282 except KeyError: | |
| 283 pass | |
| 284 return s | |
| 285 | |
| 286 # Opcodes that leave the code object. | |
| 287 OPS_CODE_END = _opcode_set('RETURN_VALUE') | |
| 288 | |
| 289 # Opcodes that unconditionally end the code chunk. | |
| 290 OPS_CHUNK_END = _opcode_set( | |
| 291 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'RETURN_VALUE', 'RAISE_VARARGS', | |
| 292 'BREAK_LOOP', 'CONTINUE_LOOP', | |
| 293 ) | |
| 294 | |
| 295 # Opcodes that unconditionally begin a new code chunk. By starting new chunks | |
| 296 # with unconditional jump instructions, we neatly deal with jumps to jumps | |
| 297 # properly. | |
| 298 OPS_CHUNK_BEGIN = _opcode_set('JUMP_ABSOLUTE', 'JUMP_FORWARD') | |
| 299 | |
| 300 # Opcodes that push a block on the block stack. | |
| 301 OPS_PUSH_BLOCK = _opcode_set( | |
| 302 'SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY', 'SETUP_WITH' | |
| 303 ) | |
| 304 | |
| 305 # Block types for exception handling. | |
| 306 OPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY') | |
| 307 | |
| 308 # Opcodes that pop a block from the block stack. | |
| 309 OPS_POP_BLOCK = _opcode_set('POP_BLOCK') | |
| 310 | |
| 311 # Opcodes that have a jump destination, but aren't really a jump. | |
| 312 OPS_NO_JUMP = OPS_PUSH_BLOCK | |
| 313 | |
| 314 # Individual opcodes we need below. | |
| 315 OP_BREAK_LOOP = _opcode('BREAK_LOOP') | |
| 316 OP_END_FINALLY = _opcode('END_FINALLY') | |
| 317 OP_COMPARE_OP = _opcode('COMPARE_OP') | |
| 318 COMPARE_EXCEPTION = 10 # just have to get this const from the code. | |
| 319 OP_LOAD_CONST = _opcode('LOAD_CONST') | |
| 320 OP_RETURN_VALUE = _opcode('RETURN_VALUE') | |
| 321 | |
| 322 | |
| 323 class ByteParser(object): | |
| 324 """Parse byte codes to understand the structure of code.""" | |
| 325 | |
| 326 def __init__(self, code=None, text=None, filename=None): | |
| 327 if code: | |
| 328 self.code = code | |
| 329 self.text = text | |
| 330 else: | |
| 331 if not text: | |
| 332 assert filename, "If no code or text, need a filename" | |
| 333 sourcef = open_source(filename) | |
| 334 try: | |
| 335 text = sourcef.read() | |
| 336 finally: | |
| 337 sourcef.close() | |
| 338 self.text = text | |
| 339 | |
| 340 try: | |
| 341 # Python 2.3 and 2.4 don't like partial last lines, so be sure | |
| 342 # the text ends nicely for them. | |
| 343 self.code = compile(text + '\n', filename, "exec") | |
| 344 except SyntaxError: | |
| 345 _, synerr, _ = sys.exc_info() | |
| 346 raise NotPython( | |
| 347 "Couldn't parse '%s' as Python source: '%s' at line %d" % | |
| 348 (filename, synerr.msg, synerr.lineno) | |
| 349 ) | |
| 350 | |
| 351 # Alternative Python implementations don't always provide all the | |
| 352 # attributes on code objects that we need to do the analysis. | |
| 353 for attr in ['co_lnotab', 'co_firstlineno', 'co_consts', 'co_code']: | |
| 354 if not hasattr(self.code, attr): | |
| 355 raise CoverageException( | |
| 356 "This implementation of Python doesn't support code " | |
| 357 "analysis.\n" | |
| 358 "Run coverage.py under CPython for this command." | |
| 359 ) | |
| 360 | |
| 361 def child_parsers(self): | |
| 362 """Iterate over all the code objects nested within this one. | |
| 363 | |
| 364 The iteration includes `self` as its first value. | |
| 365 | |
| 366 """ | |
| 367 children = CodeObjects(self.code) | |
| 368 return [ByteParser(code=c, text=self.text) for c in children] | |
| 369 | |
| 370 # Getting numbers from the lnotab value changed in Py3.0. | |
| 371 if sys.version_info >= (3, 0): | |
| 372 def _lnotab_increments(self, lnotab): | |
| 373 """Return a list of ints from the lnotab bytes in 3.x""" | |
| 374 return list(lnotab) | |
| 375 else: | |
| 376 def _lnotab_increments(self, lnotab): | |
| 377 """Return a list of ints from the lnotab string in 2.x""" | |
| 378 return [ord(c) for c in lnotab] | |
| 379 | |
| 380 def _bytes_lines(self): | |
| 381 """Map byte offsets to line numbers in `code`. | |
| 382 | |
| 383 Uses co_lnotab described in Python/compile.c to map byte offsets to | |
| 384 line numbers. Returns a list: [(b0, l0), (b1, l1), ...] | |
| 385 | |
| 386 """ | |
| 387 # Adapted from dis.py in the standard library. | |
| 388 byte_increments = self._lnotab_increments(self.code.co_lnotab[0::2]) | |
| 389 line_increments = self._lnotab_increments(self.code.co_lnotab[1::2]) | |
| 390 | |
| 391 bytes_lines = [] | |
| 392 last_line_num = None | |
| 393 line_num = self.code.co_firstlineno | |
| 394 byte_num = 0 | |
| 395 for byte_incr, line_incr in zip(byte_increments, line_increments): | |
| 396 if byte_incr: | |
| 397 if line_num != last_line_num: | |
| 398 bytes_lines.append((byte_num, line_num)) | |
| 399 last_line_num = line_num | |
| 400 byte_num += byte_incr | |
| 401 line_num += line_incr | |
| 402 if line_num != last_line_num: | |
| 403 bytes_lines.append((byte_num, line_num)) | |
| 404 return bytes_lines | |
| 405 | |
| 406 def _find_statements(self): | |
| 407 """Find the statements in `self.code`. | |
| 408 | |
| 409 Return a set of line numbers that start statements. Recurses into all | |
| 410 code objects reachable from `self.code`. | |
| 411 | |
| 412 """ | |
| 413 stmts = set() | |
| 414 for bp in self.child_parsers(): | |
| 415 # Get all of the lineno information from this code. | |
| 416 for _, l in bp._bytes_lines(): | |
| 417 stmts.add(l) | |
| 418 return stmts | |
| 419 | |
| 420 def _split_into_chunks(self): | |
| 421 """Split the code object into a list of `Chunk` objects. | |
| 422 | |
| 423 Each chunk is only entered at its first instruction, though there can | |
| 424 be many exits from a chunk. | |
| 425 | |
| 426 Returns a list of `Chunk` objects. | |
| 427 | |
| 428 """ | |
| 429 | |
| 430 # The list of chunks so far, and the one we're working on. | |
| 431 chunks = [] | |
| 432 chunk = None | |
| 433 bytes_lines_map = dict(self._bytes_lines()) | |
| 434 | |
| 435 # The block stack: loops and try blocks get pushed here for the | |
| 436 # implicit jumps that can occur. | |
| 437 # Each entry is a tuple: (block type, destination) | |
| 438 block_stack = [] | |
| 439 | |
| 440 # Some op codes are followed by branches that should be ignored. This | |
| 441 # is a count of how many ignores are left. | |
| 442 ignore_branch = 0 | |
| 443 | |
| 444 # We have to handle the last two bytecodes specially. | |
| 445 ult = penult = None | |
| 446 | |
| 447 for bc in ByteCodes(self.code.co_code): | |
| 448 # Maybe have to start a new chunk | |
| 449 if bc.offset in bytes_lines_map: | |
| 450 # Start a new chunk for each source line number. | |
| 451 if chunk: | |
| 452 chunk.exits.add(bc.offset) | |
| 453 chunk = Chunk(bc.offset, bytes_lines_map[bc.offset]) | |
| 454 chunks.append(chunk) | |
| 455 elif bc.op in OPS_CHUNK_BEGIN: | |
| 456 # Jumps deserve their own unnumbered chunk. This fixes | |
| 457 # problems with jumps to jumps getting confused. | |
| 458 if chunk: | |
| 459 chunk.exits.add(bc.offset) | |
| 460 chunk = Chunk(bc.offset) | |
| 461 chunks.append(chunk) | |
| 462 | |
| 463 if not chunk: | |
| 464 chunk = Chunk(bc.offset) | |
| 465 chunks.append(chunk) | |
| 466 | |
| 467 # Look at the opcode | |
| 468 if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP: | |
| 469 if ignore_branch: | |
| 470 # Someone earlier wanted us to ignore this branch. | |
| 471 ignore_branch -= 1 | |
| 472 else: | |
| 473 # The opcode has a jump, it's an exit for this chunk. | |
| 474 chunk.exits.add(bc.jump_to) | |
| 475 | |
| 476 if bc.op in OPS_CODE_END: | |
| 477 # The opcode can exit the code object. | |
| 478 chunk.exits.add(-self.code.co_firstlineno) | |
| 479 if bc.op in OPS_PUSH_BLOCK: | |
| 480 # The opcode adds a block to the block_stack. | |
| 481 block_stack.append((bc.op, bc.jump_to)) | |
| 482 if bc.op in OPS_POP_BLOCK: | |
| 483 # The opcode pops a block from the block stack. | |
| 484 block_stack.pop() | |
| 485 if bc.op in OPS_CHUNK_END: | |
| 486 # This opcode forces the end of the chunk. | |
| 487 if bc.op == OP_BREAK_LOOP: | |
| 488 # A break is implicit: jump where the top of the | |
| 489 # block_stack points. | |
| 490 chunk.exits.add(block_stack[-1][1]) | |
| 491 chunk = None | |
| 492 if bc.op == OP_END_FINALLY: | |
| 493 if block_stack: | |
| 494 # A break that goes through a finally will jump to whatever | |
| 495 # block is on top of the stack. | |
| 496 chunk.exits.add(block_stack[-1][1]) | |
| 497 # For the finally clause we need to find the closest exception | |
| 498 # block, and use its jump target as an exit. | |
| 499 for iblock in range(len(block_stack)-1, -1, -1): | |
| 500 if block_stack[iblock][0] in OPS_EXCEPT_BLOCKS: | |
| 501 chunk.exits.add(block_stack[iblock][1]) | |
| 502 break | |
| 503 if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION: | |
| 504 # This is an except clause. We want to overlook the next | |
| 505 # branch, so that except's don't count as branches. | |
| 506 ignore_branch += 1 | |
| 507 | |
| 508 penult = ult | |
| 509 ult = bc | |
| 510 | |
| 511 if chunks: | |
| 512 # The last two bytecodes could be a dummy "return None" that | |
| 513 # shouldn't be counted as real code. Every Python code object seems | |
| 514 # to end with a return, and a "return None" is inserted if there | |
| 515 # isn't an explicit return in the source. | |
| 516 if ult and penult: | |
| 517 if penult.op == OP_LOAD_CONST and ult.op == OP_RETURN_VALUE: | |
| 518 if self.code.co_consts[penult.arg] is None: | |
| 519 # This is "return None", but is it dummy? A real line | |
| 520 # would be a last chunk all by itself. | |
| 521 if chunks[-1].byte != penult.offset: | |
| 522 ex = -self.code.co_firstlineno | |
| 523 # Split the last chunk | |
| 524 last_chunk = chunks[-1] | |
| 525 last_chunk.exits.remove(ex) | |
| 526 last_chunk.exits.add(penult.offset) | |
| 527 chunk = Chunk(penult.offset) | |
| 528 chunk.exits.add(ex) | |
| 529 chunks.append(chunk) | |
| 530 | |
| 531 # Give all the chunks a length. | |
| 532 chunks[-1].length = bc.next_offset - chunks[-1].byte # pylint: disab
le=W0631,C0301 | |
| 533 for i in range(len(chunks)-1): | |
| 534 chunks[i].length = chunks[i+1].byte - chunks[i].byte | |
| 535 | |
| 536 return chunks | |
| 537 | |
| 538 def _arcs(self): | |
| 539 """Find the executable arcs in the code. | |
| 540 | |
| 541 Returns a set of pairs, (from,to). From and to are integer line | |
| 542 numbers. If from is < 0, then the arc is an entrance into the code | |
| 543 object. If to is < 0, the arc is an exit from the code object. | |
| 544 | |
| 545 """ | |
| 546 chunks = self._split_into_chunks() | |
| 547 | |
| 548 # A map from byte offsets to chunks jumped into. | |
| 549 byte_chunks = dict([(c.byte, c) for c in chunks]) | |
| 550 | |
| 551 # Build a map from byte offsets to actual lines reached. | |
| 552 byte_lines = {} | |
| 553 bytes_to_add = set([c.byte for c in chunks]) | |
| 554 | |
| 555 while bytes_to_add: | |
| 556 byte_to_add = bytes_to_add.pop() | |
| 557 if byte_to_add in byte_lines or byte_to_add < 0: | |
| 558 continue | |
| 559 | |
| 560 # Which lines does this chunk lead to? | |
| 561 bytes_considered = set() | |
| 562 bytes_to_consider = [byte_to_add] | |
| 563 lines = set() | |
| 564 | |
| 565 while bytes_to_consider: | |
| 566 byte = bytes_to_consider.pop() | |
| 567 bytes_considered.add(byte) | |
| 568 | |
| 569 # Find chunk for byte | |
| 570 try: | |
| 571 ch = byte_chunks[byte] | |
| 572 except KeyError: | |
| 573 for ch in chunks: | |
| 574 if ch.byte <= byte < ch.byte+ch.length: | |
| 575 break | |
| 576 else: | |
| 577 # No chunk for this byte! | |
| 578 raise Exception("Couldn't find chunk @ %d" % byte) | |
| 579 byte_chunks[byte] = ch # pylint: disable=W0631 | |
| 580 | |
| 581 if ch.line: | |
| 582 lines.add(ch.line) | |
| 583 else: | |
| 584 for ex in ch.exits: | |
| 585 if ex < 0: | |
| 586 lines.add(ex) | |
| 587 elif ex not in bytes_considered: | |
| 588 bytes_to_consider.append(ex) | |
| 589 | |
| 590 bytes_to_add.update(ch.exits) | |
| 591 | |
| 592 byte_lines[byte_to_add] = lines | |
| 593 | |
| 594 # Figure out for each chunk where the exits go. | |
| 595 arcs = set() | |
| 596 for chunk in chunks: | |
| 597 if chunk.line: | |
| 598 for ex in chunk.exits: | |
| 599 if ex < 0: | |
| 600 exit_lines = [ex] | |
| 601 else: | |
| 602 exit_lines = byte_lines[ex] | |
| 603 for exit_line in exit_lines: | |
| 604 if chunk.line != exit_line: | |
| 605 arcs.add((chunk.line, exit_line)) | |
| 606 for line in byte_lines[0]: | |
| 607 arcs.add((-1, line)) | |
| 608 | |
| 609 return arcs | |
| 610 | |
| 611 def _all_chunks(self): | |
| 612 """Returns a list of `Chunk` objects for this code and its children. | |
| 613 | |
| 614 See `_split_into_chunks` for details. | |
| 615 | |
| 616 """ | |
| 617 chunks = [] | |
| 618 for bp in self.child_parsers(): | |
| 619 chunks.extend(bp._split_into_chunks()) | |
| 620 | |
| 621 return chunks | |
| 622 | |
| 623 def _all_arcs(self): | |
| 624 """Get the set of all arcs in this code object and its children. | |
| 625 | |
| 626 See `_arcs` for details. | |
| 627 | |
| 628 """ | |
| 629 arcs = set() | |
| 630 for bp in self.child_parsers(): | |
| 631 arcs.update(bp._arcs()) | |
| 632 | |
| 633 return arcs | |
| 634 | |
| 635 | |
| 636 class Chunk(object): | |
| 637 """A sequence of bytecodes with a single entrance. | |
| 638 | |
| 639 To analyze byte code, we have to divide it into chunks, sequences of byte | |
| 640 codes such that each basic block has only one entrance, the first | |
| 641 instruction in the block. | |
| 642 | |
| 643 This is almost the CS concept of `basic block`_, except that we're willing | |
| 644 to have many exits from a chunk, and "basic block" is a more cumbersome | |
| 645 term. | |
| 646 | |
| 647 .. _basic block: http://en.wikipedia.org/wiki/Basic_block | |
| 648 | |
| 649 An exit < 0 means the chunk can leave the code (return). The exit is | |
| 650 the negative of the starting line number of the code block. | |
| 651 | |
| 652 """ | |
| 653 def __init__(self, byte, line=0): | |
| 654 self.byte = byte | |
| 655 self.line = line | |
| 656 self.length = 0 | |
| 657 self.exits = set() | |
| 658 | |
| 659 def __repr__(self): | |
| 660 return "<%d+%d @%d %r>" % ( | |
| 661 self.byte, self.length, self.line, list(self.exits) | |
| 662 ) | |
| OLD | NEW |