tools/isolate/trace_inputs.py - Issue 9834052: [strace] Add support for interrupted calls and proper chdir handling.

Side by Side Diff: tools/isolate/trace_inputs.py

Issue 9834052: [strace] Add support for interrupted calls and proper chdir handling. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Fix isolate.py --mode=trace to use the proper variables Created 8 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 #!/usr/bin/env python	1 #!/usr/bin/env python

2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.	2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 """Runs strace or dtrace on a test and processes the logs to extract the	6 """Runs strace or dtrace on a test and processes the logs to extract the

7 dependencies from the source tree.	7 dependencies from the source tree.

8	8

9 Automatically extracts directories where all the files are used to make the	9 Automatically extracts directories where all the files are used to make the

10 dependencies list more compact.	10 dependencies list more compact.

(...skipping 11 matching lines...) Expand all Loading...
22 ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))	22 ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))

23	23

24	24

25 def isEnabledFor(level):	25 def isEnabledFor(level):

26 return logging.getLogger().isEnabledFor(level)	26 return logging.getLogger().isEnabledFor(level)

27	27

28	28

29 class Strace(object):	29 class Strace(object):

30 """strace implies linux."""	30 """strace implies linux."""

31 IGNORED = (	31 IGNORED = (

	32 '/bin',

32 '/dev',	33 '/dev',

33 '/etc',	34 '/etc',

34 '/lib',	35 '/lib',

35 '/proc',	36 '/proc',

36 '/sys',	37 '/sys',

37 '/tmp',	38 '/tmp',

38 '/usr',	39 '/usr',

39 '/var',	40 '/var',

40 )	41 )

41	42

42 @staticmethod	43 class _Context(object):

43 def gen_trace(cmd, cwd, logname):	44 """Processes a strace log line and keeps the list of existent and non

	45 existent files accessed.

	46

	47 Ignores directories.

	48 """

	49 # This is the most common format. pid function(args) = result

	50 RE_HEADER = re.compile(r'^(\d+)\s+([^$]+)\((.+?)$\s+= (.+)$')

	51 # An interrupted function call, only grab the minimal header.

	52 RE_UNFINISHED = re.compile(r'^(\d+)\s+([^\(]+).*$')

	53 UNFINISHED = ' <unfinished ...>'

	54 # A resumed function call.

	55 RE_RESUMED = re.compile(r'^(\d+)\s+<\.\.\. ([^ ]+) resumed> (.+)$')

	56 # A process received a signal.

	57 RE_SIGNAL = re.compile(r'^\d+\s+--- SIG[A-Z]+ .+ ---')

	58 # A process didn't handle a signal.

	59 RE_KILLED = re.compile(r'^(\d+) \+\+\+ killed by ([A-Z]+) \+\+\+$')

	60

	61 # Arguments parsing.

	62 RE_CHDIR = re.compile(r'^\"(.+?)\"$')

	63 RE_EXECVE = re.compile(r'^\"(.+?)\", \[.+?\], \[.+?\]$')

	64 RE_OPEN2 = re.compile(r'^\"(.*?)\", ([A-Z\_\\|]+)$')

	65 RE_OPEN3 = re.compile(r'^\"(.*?)\", ([A-Z\_\\|]+), (\d+)$')

	66 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')

	67

	68 def __init__(self, blacklist):

	69 self._cwd = {}

	70 self.blacklist = blacklist

	71 self.files = set()

	72 self.non_existent = set()

	73 # Key is a tuple(pid, function name)

	74 self._pending_calls = {}

	75

	76 @classmethod

	77 def traces(cls):

	78 prefix = 'handle_'

	79 return [i[len(prefix):] for i in dir(cls) if i.startswith(prefix)]

	80

	81 def on_line(self, line):

	82 line = line.strip()

	83 if self.RE_SIGNAL.match(line):

	84 # Ignore signals.

	85 return

	86

	87 m = self.RE_KILLED.match(line)

	88 if m:

	89 self.handle_exit_group(int(m.group(1)), m.group(2), None, None)

	90 return

	91

	92 if line.endswith(self.UNFINISHED):

	93 line = line[:-len(self.UNFINISHED)]

	94 m = self.RE_UNFINISHED.match(line)

	95 assert m, line

	96 self._pending_calls[(m.group(1), m.group(2))] = line

	97 return

	98

	99 m = self.RE_RESUMED.match(line)

	100 if m:

	101 pending = self._pending_calls.pop((m.group(1), m.group(2)))

	102 # Reconstruct the line.

	103 line = pending + m.group(3)

	104

	105 m = self.RE_HEADER.match(line)

	106 assert m, line

	107 return getattr(self, 'handle_%s' % m.group(2))(

	108 int(m.group(1)),

	109 m.group(2),

	110 m.group(3),

	111 m.group(4))

	112

	113 def handle_chdir(self, pid, _function, args, result):

	114 """Updates cwd."""

	115 if result.startswith('0'):

	116 cwd = self.RE_CHDIR.match(args).group(1)

	117 if not cwd.startswith('/'):

	118 cwd2 = os.path.join(self._cwd[pid], cwd)

	119 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))

	120 self._cwd[pid] = cwd2

	121 else:

	122 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))

	123 self._cwd[pid] = cwd

	124 else:

	125 assert False, 'Unexecpected fail: %s' % result

	126

	127 def handle_clone(self, pid, _function, _args, result):

	128 """Transfers cwd."""

	129 if result == '? ERESTARTNOINTR (To be restarted)':

	130 return

	131 self._cwd[int(result)] = self._cwd[pid]

	132

	133 def handle_execve(self, pid, _function, args, result):

	134 self._handle_file(pid, self.RE_EXECVE.match(args).group(1), result)

	135

	136 def handle_exit_group(self, pid, _function, _args, _result):

	137 """Removes cwd."""

	138 del self._cwd[pid]

	139

	140 @staticmethod

	141 def handle_fork(_pid, _function, args, result):

	142 assert False, (args, result)

	143

	144 def handle_open(self, pid, _function, args, result):

	145 args = (self.RE_OPEN3.match(args) or self.RE_OPEN2.match(args)).groups()

	146 if 'O_DIRECTORY' in args[1]:

	147 return

	148 self._handle_file(pid, args[0], result)

	149

	150 def handle_rename(self, pid, _function, args, result):

	151 args = self.RE_RENAME.match(args).groups()

	152 self._handle_file(pid, args[0], result)

	153 self._handle_file(pid, args[1], result)

	154

	155 @staticmethod

	156 def handle_stat64(_pid, _function, args, result):

	157 assert False, (args, result)

	158

	159 @staticmethod

	160 def handle_vfork(_pid, _function, args, result):

	161 assert False, (args, result)

	162

	163 def _handle_file(self, pid, filepath, result):

	164 if result.startswith('-1'):

	165 return

	166 if not filepath.startswith('/'):

	167 filepath2 = os.path.join(self._cwd[pid], filepath)

	168 logging.debug('_handle_file(%d, %s) -> %s' % (pid, filepath, filepath2))

	169 filepath = filepath2

	170 else:

	171 logging.debug('_handle_file(%d, %s)' % (pid, filepath))

	172

	173 if self.blacklist(filepath):

	174 return

	175 if filepath not in self.files and filepath not in self.non_existent:

	176 if os.path.isfile(filepath):

	177 self.files.add(filepath)

	178 else:

	179 self.non_existent.add(filepath)

	180

	181 @classmethod

	182 def gen_trace(cls, cmd, cwd, logname):
	Roger Tawa OOO till Jul 10th 2012/03/27 15:52:53 for my own info, any reason to use classmethod ins for my own info, any reason to use classmethod instead of staticmethod? M-A Ruel 2012/03/27 15:58:56 So cls can be used at line 190. Show quoted text On 2012/03/27 15:52:53, Roger Tawa wrote: > for my own info, any reason to use classmethod instead of staticmethod? So cls can be used at line 190.
44 """Runs strace on an executable."""	183 """Runs strace on an executable."""

45 logging.info('gen_trace(%s, %s, %s)' % (cmd, cwd, logname))	184 logging.info('gen_trace(%s, %s, %s)' % (cmd, cwd, logname))

46 silent = not isEnabledFor(logging.INFO)	185 silent = not isEnabledFor(logging.INFO)

47 stdout = stderr = None	186 stdout = stderr = None

48 if silent:	187 if silent:

49 stdout = subprocess.PIPE	188 stdout = subprocess.PIPE

50 stderr = subprocess.PIPE	189 stderr = subprocess.PIPE

51 trace_cmd = ['strace', '-f', '-e', 'trace=open,chdir', '-o', logname]	190 traces = ','.join(cls._Context.traces())

	191 trace_cmd = ['strace', '-f', '-e', 'trace=%s' % traces, '-o', logname]

52 p = subprocess.Popen(	192 p = subprocess.Popen(

53 trace_cmd + cmd, cwd=cwd, stdout=stdout, stderr=stderr)	193 trace_cmd + cmd, cwd=cwd, stdout=stdout, stderr=stderr)

54 out, err = p.communicate()	194 out, err = p.communicate()

55 # Once it's done, inject a chdir() call to cwd to be able to reconstruct	195 # Once it's done, inject a chdir() call to cwd to be able to reconstruct

56 # the full paths.	196 # the full paths.

57 # TODO(maruel): cwd should be saved at each process creation, so forks needs	197 # TODO(maruel): cwd should be saved at each process creation, so forks needs

58 # to be traced properly.	198 # to be traced properly.

59 if os.path.isfile(logname):	199 if os.path.isfile(logname):

60 with open(logname) as f:	200 with open(logname) as f:

61 content = f.read()	201 content = f.read()

62 with open(logname, 'w') as f:	202 with open(logname, 'w') as f:

63 f.write('0 chdir("%s") = 0\n' % cwd)	203 pid = content.split(' ', 1)[0]

	204 f.write('%s chdir("%s") = 0\n' % (pid, cwd))

64 f.write(content)	205 f.write(content)

65	206

66 if p.returncode != 0:	207 if p.returncode != 0:

67 print 'Failure: %d' % p.returncode	208 print 'Failure: %d' % p.returncode

68 # pylint: disable=E1103	209 # pylint: disable=E1103

69 if out:	210 if out:

70 print ''.join(out.splitlines(True)[-100:])	211 print ''.join(out.splitlines(True)[-100:])

71 if err:	212 if err:

72 print ''.join(err.splitlines(True)[-100:])	213 print ''.join(err.splitlines(True)[-100:])

73 return p.returncode	214 return p.returncode

74	215

75 @staticmethod	216 @classmethod

76 def parse_log(filename, blacklist):	217 def parse_log(cls, filename, blacklist):

77 """Processes a strace log and returns the files opened and the files that do	218 """Processes a strace log and returns the files opened and the files that do

78 not exist.	219 not exist.

79	220

	221 It does not track directories.

	222

80 Most of the time, files that do not exist are temporary test files that	223 Most of the time, files that do not exist are temporary test files that

81 should be put in /tmp instead. See http://crbug.com/116251	224 should be put in /tmp instead. See http://crbug.com/116251

82 """	225 """

83 logging.info('parse_log(%s, %s)' % (filename, blacklist))	226 logging.info('parse_log(%s, %s)' % (filename, blacklist))

84 files = set()	227 context = cls._Context(blacklist)

85 non_existent = set()

86 # 1=pid, 2=filepath, 3=mode, 4=result

87 re_open = re.compile(

88 # PID open(PATH, MODE) = RESULT

89 r'^(\d+)\s+open$"([^"]+)", ([^$]+)\)\s+= (.+)$')

90 # 1=pid 2=path 3=result

91 re_chdir = re.compile(

92 # PID chdir(PATH) = RESULT

93 r'^(\d+)\s+chdir$"([^"]+)"$\s+= (.+)$')

94

95 # TODO(maruel): This code is totally wrong. cwd is a process local variable

96 # so this needs to be a dict with key = pid.

97 cwd = None

98 for line in open(filename):	228 for line in open(filename):

99 m = re_open.match(line)	229 context.on_line(line)

100 if m:	230 # Resolve any symlink we hit.

101 if m.group(4).startswith('-1') or 'O_DIRECTORY' in m.group(3):	231 return (

102 # Not present or a directory.	232 set(os.path.realpath(f) for f in context.files),

103 continue	233 set(os.path.realpath(f) for f in context.non_existent))

104 filepath = m.group(2)

105 if not filepath.startswith('/'):

106 filepath = os.path.join(cwd, filepath)

107 if blacklist(filepath):

108 continue

109 if filepath not in files and filepath not in non_existent:

110 if os.path.isfile(filepath):

111 files.add(filepath)

112 else:

113 non_existent.add(filepath)

114 m = re_chdir.match(line)

115 if m:

116 if m.group(3).startswith('0'):

117 cwd = m.group(2)

118 else:

119 assert False, 'Unexecpected fail: %s' % line

120

121 return files, non_existent

122	234

123	235

124 class Dtrace(object):	236 class Dtrace(object):

125 """Uses DTrace framework through dtrace. Requires root access.	237 """Uses DTrace framework through dtrace. Requires root access.

126	238

127 Implies Mac OSX.	239 Implies Mac OSX.

128	240

129 dtruss can't be used because it has compatibility issues with python.	241 dtruss can't be used because it has compatibility issues with python.

130 """	242 """

131 IGNORED = (	243 IGNORED = (

(...skipping 177 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
309	421

310 def relevant_files(files, root):	422 def relevant_files(files, root):

311 """Trims the list of files to keep the expected files and unexpected files.	423 """Trims the list of files to keep the expected files and unexpected files.

312	424

313 Unexpected files are files that are not based inside the \|root\| directory.	425 Unexpected files are files that are not based inside the \|root\| directory.

314 """	426 """

315 expected = []	427 expected = []

316 unexpected = []	428 unexpected = []

317 for f in files:	429 for f in files:

318 if f.startswith(root):	430 if f.startswith(root):

319 expected.append(f[len(root):])	431 f = f[len(root):]

	432 assert f

	433 expected.append(f)

320 else:	434 else:

321 unexpected.append(f)	435 unexpected.append(f)

322 return sorted(set(expected)), sorted(set(unexpected))	436 return sorted(set(expected)), sorted(set(unexpected))

323	437

324	438

325 def extract_directories(files, root):	439 def extract_directories(files, root):

326 """Detects if all the files in a directory were loaded and if so, replace the	440 """Detects if all the files in a directory were loaded and if so, replace the

327 individual files by the directory entry.	441 individual files by the directory entry.

328 """	442 """

329 directories = set(os.path.dirname(f) for f in files)	443 directories = set(os.path.dirname(f) for f in files)

(...skipping 10 matching lines...) Expand all Loading...
340 return sorted(files)	454 return sorted(files)

341	455

342	456

343 def trace_inputs(	457 def trace_inputs(

344 logfile, cmd, root_dir, gyp_proj_dir, product_dir, force_trace):	458 logfile, cmd, root_dir, gyp_proj_dir, product_dir, force_trace):

345 """Tries to load the logs if available. If not, trace the test.	459 """Tries to load the logs if available. If not, trace the test.

346	460

347 Symlinks are not processed at all.	461 Symlinks are not processed at all.

348 """	462 """

349 logging.debug(	463 logging.debug(

350 'trace_inputs(%s, %s, %s, %s, %s)' % (	464 'trace_inputs(%s, %s, %s, %s, %s, %s)' % (

351 logfile, cmd, root_dir, gyp_proj_dir, product_dir))	465 logfile, cmd, root_dir, gyp_proj_dir, product_dir, force_trace))

352	466

353 # It is important to have unambiguous path.	467 # It is important to have unambiguous path.

354 assert os.path.isabs(root_dir), root_dir	468 assert os.path.isabs(root_dir), root_dir

355 assert os.path.isabs(logfile), logfile	469 assert os.path.isabs(logfile), logfile

356 assert os.path.isabs(cmd[0]), cmd[0]	470 assert (

	471 (os.path.isfile(logfile) and not force_trace) or os.path.isabs(cmd[0])

	472 ), cmd[0]

	473 # Resolve any symlink

	474 root_dir = os.path.realpath(root_dir)

357	475

358 def print_if(txt):	476 def print_if(txt):

359 if gyp_proj_dir is None:	477 if gyp_proj_dir is None:

360 print(txt)	478 print(txt)

361	479

362 if sys.platform == 'linux2':	480 if sys.platform == 'linux2':

363 api = Strace()	481 api = Strace()

364 elif sys.platform == 'darwin':	482 elif sys.platform == 'darwin':

365 api = Dtrace()	483 api = Dtrace()

366 else:	484 else:

(...skipping 26 matching lines...) Expand all Loading...
393 for f in unexpected:	511 for f in unexpected:

394 print_if(' %s' % f)	512 print_if(' %s' % f)

395	513

396 simplified = extract_directories(expected, root_dir)	514 simplified = extract_directories(expected, root_dir)

397 print_if('Interesting: %d reduced to %d' % (len(expected), len(simplified)))	515 print_if('Interesting: %d reduced to %d' % (len(expected), len(simplified)))

398 for f in simplified:	516 for f in simplified:

399 print_if(' %s' % f)	517 print_if(' %s' % f)

400	518

401 if gyp_proj_dir is not None:	519 if gyp_proj_dir is not None:

402 def cleanuppath(x):	520 def cleanuppath(x):

	521 """Cleans up a relative path."""

403 if x:	522 if x:

404 x = x.rstrip('/')	523 x = x.rstrip('/')

405 if x == '.':	524 if x == '.':

406 x = ''	525 x = ''

407 if x:	526 if x:

408 x += '/'	527 x += '/'

409 return x	528 return x

410	529

411 gyp_proj_dir = cleanuppath(gyp_proj_dir)	530 gyp_proj_dir = cleanuppath(gyp_proj_dir)

412 product_dir = cleanuppath(product_dir)	531 product_dir = cleanuppath(product_dir)

413	532

414 def fix(f):	533 def fix(f):

415 """Bases the file on the most restrictive variable."""	534 """Bases the file on the most restrictive variable."""

	535 logging.debug('fix(%s)' % f)

416 if product_dir and f.startswith(product_dir):	536 if product_dir and f.startswith(product_dir):

417 return '<(PRODUCT_DIR)/%s' % f[len(product_dir):]	537 return '<(PRODUCT_DIR)/%s' % f[len(product_dir):]

418 elif gyp_proj_dir and f.startswith(gyp_proj_dir):	538 elif gyp_proj_dir and f.startswith(gyp_proj_dir):

419 return f[len(gyp_proj_dir):]	539 # May be empty if the whole directory containing the gyp file is needed.

	540 return f[len(gyp_proj_dir):] or './'

420 else:	541 else:

421 return '<(DEPTH)/%s' % f	542 return '<(DEPTH)/%s' % f

422	543

423 corrected = [fix(f) for f in simplified]	544 corrected = [fix(f) for f in simplified]

424 files = [f for f in corrected if not f.endswith('/')]	545 files = [f for f in corrected if not f.endswith('/')]

425 dirs = [f for f in corrected if f.endswith('/')]	546 dirs = [f for f in corrected if f.endswith('/')]

426 # Constructs the python code manually.	547 # Constructs the python code manually.

427 print(	548 print(

428 '{\n'	549 '{\n'

429 ' \'variables\': {\n'	550 ' \'variables\': {\n'

(...skipping 19 matching lines...) Expand all Loading...
449 '-g', '--gyp',	570 '-g', '--gyp',

450 help='When specified, outputs the inputs files in a way compatible for '	571 help='When specified, outputs the inputs files in a way compatible for '

451 'gyp processing. Should be set to the relative path containing the '	572 'gyp processing. Should be set to the relative path containing the '

452 'gyp file, e.g. \'chrome\' or \'net\'')	573 'gyp file, e.g. \'chrome\' or \'net\'')

453 parser.add_option(	574 parser.add_option(

454 '-p', '--product-dir', default='out/Release',	575 '-p', '--product-dir', default='out/Release',

455 help='Directory for PRODUCT_DIR. Default: %default')	576 help='Directory for PRODUCT_DIR. Default: %default')

456 parser.add_option(	577 parser.add_option(

457 '--root-dir', default=ROOT_DIR,	578 '--root-dir', default=ROOT_DIR,

458 help='Root directory to base everything off. Default: %default')	579 help='Root directory to base everything off. Default: %default')

459 parser.add_option('-f', '--force', help='Force to retrace the file')	580 parser.add_option(

	581 '-f', '--force', action='store_true', help='Force to retrace the file')

460	582

461 options, args = parser.parse_args()	583 options, args = parser.parse_args()

462 level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)]	584 level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)]

463 logging.basicConfig(	585 logging.basicConfig(

464 level=level,	586 level=level,

465 format='%(levelname)5s %(module)15s(%(lineno)3d):%(message)s')	587 format='%(levelname)5s %(module)15s(%(lineno)3d):%(message)s')

466	588

467 if not args:

468 parser.error('Must supply a command to run')

469 if not options.log:	589 if not options.log:

470 parser.error('Must supply a log file with -l')	590 parser.error('Must supply a log file with -l')

	591 if not args:

	592 if not os.path.isfile(options.log) or options.force:

	593 parser.error('Must supply a command to run')

	594 else:

	595 args[0] = os.path.abspath(args[0])

471	596

472 args[0] = os.path.abspath(args[0])	597 if options.root_dir:

	598 options.root_dir = os.path.abspath(options.root_dir)

	599

473 return trace_inputs(	600 return trace_inputs(

474 os.path.abspath(options.log),	601 os.path.abspath(options.log),

475 args,	602 args,

476 options.root_dir,	603 options.root_dir,

477 options.gyp,	604 options.gyp,

478 options.product_dir,	605 options.product_dir,

479 options.force)	606 options.force)

480	607

481	608

482 if __name__ == '__main__':	609 if __name__ == '__main__':

483 sys.exit(main())	610 sys.exit(main())

OLD	NEW

« no previous file with comments | « tools/isolate/isolate_test.py ('k') | tools/isolate/trace_inputs_test.py » ('j') | no next file with comments »