swarm_client/isolate.py - Issue 69143004: Delete swarm_client.

Side by Side Diff: swarm_client/isolate.py

Issue 69143004: Delete swarm_client. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 #!/usr/bin/env python

2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.

5

6 """Front end tool to operate on .isolate files.

7

8 This includes creating, merging or compiling them to generate a .isolated file.

9

10 See more information at

11 https://code.google.com/p/swarming/wiki/IsolateDesign

12 https://code.google.com/p/swarming/wiki/IsolateUserGuide

13 """

14 # Run ./isolate.py --help for more detailed information.

15

16 import ast

17 import copy

18 import itertools

19 import logging

20 import optparse

21 import os

22 import posixpath

23 import re

24 import stat

25 import subprocess

26 import sys

27

28 import isolateserver

29 import run_isolated

30 import trace_inputs

31

32 # Import here directly so isolate is easier to use as a library.

33 from run_isolated import get_flavor

34

35 from third_party import colorama

36 from third_party.depot_tools import fix_encoding

37 from third_party.depot_tools import subcommand

38

39 from utils import file_path

40 from utils import tools

41 from utils import short_expression_finder

42

43

44 __version__ = '0.1.1'

45

46

47 PATH_VARIABLES = ('DEPTH', 'PRODUCT_DIR')

48

49 # Files that should be 0-length when mapped.

50 KEY_TOUCHED = 'isolate_dependency_touched'

51 # Files that should be tracked by the build tool.

52 KEY_TRACKED = 'isolate_dependency_tracked'

53 # Files that should not be tracked by the build tool.

54 KEY_UNTRACKED = 'isolate_dependency_untracked'

55

56

57 class ExecutionError(Exception):

58 """A generic error occurred."""

59 def __str__(self):

60 return self.args[0]

61

62

63 ### Path handling code.

64

65

66 DEFAULT_BLACKLIST = (

67 # Temporary vim or python files.

68 r'^.+\.(?:pyc\|swp)$',

69 # .git or .svn directory.

70 r'^(?:.+' + re.escape(os.path.sep) + r'\|)\.(?:git\|svn)$',

71 )

72

73

74 # Chromium-specific.

75 DEFAULT_BLACKLIST += (

76 r'^.+\.(?:run_test_cases)$',

77 r'^(?:.+' + re.escape(os.path.sep) + r'\|)testserver\.log$',

78 )

79

80

81 def relpath(path, root):

82 """os.path.relpath() that keeps trailing os.path.sep."""

83 out = os.path.relpath(path, root)

84 if path.endswith(os.path.sep):

85 out += os.path.sep

86 return out

87

88

89 def safe_relpath(filepath, basepath):

90 """Do not throw on Windows when filepath and basepath are on different drives.

91

92 Different than relpath() above since this one doesn't keep the trailing

93 os.path.sep and it swallows exceptions on Windows and return the original

94 absolute path in the case of different drives.

95 """

96 try:

97 return os.path.relpath(filepath, basepath)

98 except ValueError:

99 assert sys.platform == 'win32'

100 return filepath

101

102

103 def normpath(path):

104 """os.path.normpath() that keeps trailing os.path.sep."""

105 out = os.path.normpath(path)

106 if path.endswith(os.path.sep):

107 out += os.path.sep

108 return out

109

110

111 def posix_relpath(path, root):

112 """posix.relpath() that keeps trailing slash."""

113 out = posixpath.relpath(path, root)

114 if path.endswith('/'):

115 out += '/'

116 return out

117

118

119 def cleanup_path(x):

120 """Cleans up a relative path. Converts any os.path.sep to '/' on Windows."""

121 if x:

122 x = x.rstrip(os.path.sep).replace(os.path.sep, '/')

123 if x == '.':

124 x = ''

125 if x:

126 x += '/'

127 return x

128

129

130 def is_url(path):

131 return bool(re.match(r'^https?://.+$', path))

132

133

134 def path_starts_with(prefix, path):

135 """Returns true if the components of the path \|prefix\| are the same as the

136 initial components of \|path\| (or all of the components of \|path\|). The paths

137 must be absolute.

138 """

139 assert os.path.isabs(prefix) and os.path.isabs(path)

140 prefix = os.path.normpath(prefix)

141 path = os.path.normpath(path)

142 assert prefix == file_path.get_native_path_case(prefix), prefix

143 assert path == file_path.get_native_path_case(path), path

144 prefix = prefix.rstrip(os.path.sep) + os.path.sep

145 path = path.rstrip(os.path.sep) + os.path.sep

146 return path.startswith(prefix)

147

148

149 def fix_native_path_case(root, path):

150 """Ensures that each component of \|path\| has the proper native case by

151 iterating slowly over the directory elements of \|path\|."""

152 native_case_path = root

153 for raw_part in path.split(os.sep):

154 if not raw_part or raw_part == '.':

155 break

156

157 part = file_path.find_item_native_case(native_case_path, raw_part)

158 if not part:

159 raise isolateserver.MappingError(

160 'Input file %s doesn\'t exist' %

161 os.path.join(native_case_path, raw_part))

162 native_case_path = os.path.join(native_case_path, part)

163

164 return os.path.normpath(native_case_path)

165

166

167 def expand_symlinks(indir, relfile):

168 """Follows symlinks in \|relfile\|, but treating symlinks that point outside the

169 build tree as if they were ordinary directories/files. Returns the final

170 symlink-free target and a list of paths to symlinks encountered in the

171 process.

172

173 The rule about symlinks outside the build tree is for the benefit of the

174 Chromium OS ebuild, which symlinks the output directory to an unrelated path

175 in the chroot.

176

177 Fails when a directory loop is detected, although in theory we could support

178 that case.

179 """

180 is_directory = relfile.endswith(os.path.sep)

181 done = indir

182 todo = relfile.strip(os.path.sep)

183 symlinks = []

184

185 while todo:

186 pre_symlink, symlink, post_symlink = file_path.split_at_symlink(

187 done, todo)

188 if not symlink:

189 todo = fix_native_path_case(done, todo)

190 done = os.path.join(done, todo)

191 break

192 symlink_path = os.path.join(done, pre_symlink, symlink)

193 post_symlink = post_symlink.lstrip(os.path.sep)

194 # readlink doesn't exist on Windows.

195 # pylint: disable=E1101

196 target = os.path.normpath(os.path.join(done, pre_symlink))

197 symlink_target = os.readlink(symlink_path)

198 if os.path.isabs(symlink_target):

199 # Absolute path are considered a normal directories. The use case is

200 # generally someone who puts the output directory on a separate drive.

201 target = symlink_target

202 else:

203 # The symlink itself could be using the wrong path case.

204 target = fix_native_path_case(target, symlink_target)

205

206 if not os.path.exists(target):

207 raise isolateserver.MappingError(

208 'Symlink target doesn\'t exist: %s -> %s' % (symlink_path, target))

209 target = file_path.get_native_path_case(target)

210 if not path_starts_with(indir, target):

211 done = symlink_path

212 todo = post_symlink

213 continue

214 if path_starts_with(target, symlink_path):

215 raise isolateserver.MappingError(

216 'Can\'t map recursive symlink reference %s -> %s' %

217 (symlink_path, target))

218 logging.info('Found symlink: %s -> %s', symlink_path, target)

219 symlinks.append(os.path.relpath(symlink_path, indir))

220 # Treat the common prefix of the old and new paths as done, and start

221 # scanning again.

222 target = target.split(os.path.sep)

223 symlink_path = symlink_path.split(os.path.sep)

224 prefix_length = 0

225 for target_piece, symlink_path_piece in zip(target, symlink_path):

226 if target_piece == symlink_path_piece:

227 prefix_length += 1

228 else:

229 break

230 done = os.path.sep.join(target[:prefix_length])

231 todo = os.path.join(

232 os.path.sep.join(target[prefix_length:]), post_symlink)

233

234 relfile = os.path.relpath(done, indir)

235 relfile = relfile.rstrip(os.path.sep) + is_directory * os.path.sep

236 return relfile, symlinks

237

238

239 def expand_directory_and_symlink(indir, relfile, blacklist, follow_symlinks):

240 """Expands a single input. It can result in multiple outputs.

241

242 This function is recursive when relfile is a directory.

243

244 Note: this code doesn't properly handle recursive symlink like one created

245 with:

246 ln -s .. foo

247 """

248 if os.path.isabs(relfile):

249 raise isolateserver.MappingError(

250 'Can\'t map absolute path %s' % relfile)

251

252 infile = normpath(os.path.join(indir, relfile))

253 if not infile.startswith(indir):

254 raise isolateserver.MappingError(

255 'Can\'t map file %s outside %s' % (infile, indir))

256

257 filepath = os.path.join(indir, relfile)

258 native_filepath = file_path.get_native_path_case(filepath)

259 if filepath != native_filepath:

260 # Special case './'.

261 if filepath != native_filepath + '.' + os.path.sep:

262 # Give up enforcing strict path case on OSX. Really, it's that sad. The

263 # case where it happens is very specific and hard to reproduce:

264 # get_native_path_case(

265 # u'Foo.framework/Versions/A/Resources/Something.nib') will return

266 # u'Foo.framework/Versions/A/resources/Something.nib', e.g. lowercase 'r'.

267 #

268 # Note that this is really something deep in OSX because running

269 # ls Foo.framework/Versions/A

270 # will print out 'Resources', while file_path.get_native_path_case()

271 # returns a lower case 'r'.

272 #

273 # So something is happening under the hood resulting in the command 'ls'

274 # and Carbon.File.FSPathMakeRef('path').FSRefMakePath() to disagree. We

275 # have no idea why.

276 if sys.platform != 'darwin':

277 raise isolateserver.MappingError(

278 'File path doesn\'t equal native file path\n%s != %s' %

279 (filepath, native_filepath))

280

281 symlinks = []

282 if follow_symlinks:

283 relfile, symlinks = expand_symlinks(indir, relfile)

284

285 if relfile.endswith(os.path.sep):

286 if not os.path.isdir(infile):

287 raise isolateserver.MappingError(

288 '%s is not a directory but ends with "%s"' % (infile, os.path.sep))

289

290 # Special case './'.

291 if relfile.startswith('.' + os.path.sep):

292 relfile = relfile[2:]

293 outfiles = symlinks

294 try:

295 for filename in os.listdir(infile):

296 inner_relfile = os.path.join(relfile, filename)

297 if blacklist(inner_relfile):

298 continue

299 if os.path.isdir(os.path.join(indir, inner_relfile)):

300 inner_relfile += os.path.sep

301 outfiles.extend(

302 expand_directory_and_symlink(indir, inner_relfile, blacklist,

303 follow_symlinks))

304 return outfiles

305 except OSError as e:

306 raise isolateserver.MappingError(

307 'Unable to iterate over directory %s.\n%s' % (infile, e))

308 else:

309 # Always add individual files even if they were blacklisted.

310 if os.path.isdir(infile):

311 raise isolateserver.MappingError(

312 'Input directory %s must have a trailing slash' % infile)

313

314 if not os.path.isfile(infile):

315 raise isolateserver.MappingError(

316 'Input file %s doesn\'t exist' % infile)

317

318 return symlinks + [relfile]

319

320

321 def expand_directories_and_symlinks(indir, infiles, blacklist,

322 follow_symlinks, ignore_broken_items):

323 """Expands the directories and the symlinks, applies the blacklist and

324 verifies files exist.

325

326 Files are specified in os native path separator.

327 """

328 outfiles = []

329 for relfile in infiles:

330 try:

331 outfiles.extend(expand_directory_and_symlink(indir, relfile, blacklist,

332 follow_symlinks))

333 except isolateserver.MappingError as e:

334 if ignore_broken_items:

335 logging.info('warning: %s', e)

336 else:

337 raise

338 return outfiles

339

340

341 def recreate_tree(outdir, indir, infiles, action, as_hash):

342 """Creates a new tree with only the input files in it.

343

344 Arguments:

345 outdir: Output directory to create the files in.

346 indir: Root directory the infiles are based in.

347 infiles: dict of files to map from \|indir\| to \|outdir\|.

348 action: One of accepted action of run_isolated.link_file().

349 as_hash: Output filename is the hash instead of relfile.

350 """

351 logging.info(

352 'recreate_tree(outdir=%s, indir=%s, files=%d, action=%s, as_hash=%s)' %

353 (outdir, indir, len(infiles), action, as_hash))

354

355 assert os.path.isabs(outdir) and outdir == os.path.normpath(outdir), outdir

356 if not os.path.isdir(outdir):

357 logging.info('Creating %s' % outdir)

358 os.makedirs(outdir)

359

360 for relfile, metadata in infiles.iteritems():

361 infile = os.path.join(indir, relfile)

362 if as_hash:

363 # Do the hashtable specific checks.

364 if 'l' in metadata:

365 # Skip links when storing a hashtable.

366 continue

367 outfile = os.path.join(outdir, metadata['h'])

368 if os.path.isfile(outfile):

369 # Just do a quick check that the file size matches. No need to stat()

370 # again the input file, grab the value from the dict.

371 if not 's' in metadata:

372 raise isolateserver.MappingError(

373 'Misconfigured item %s: %s' % (relfile, metadata))

374 if metadata['s'] == os.stat(outfile).st_size:

375 continue

376 else:

377 logging.warn('Overwritting %s' % metadata['h'])

378 os.remove(outfile)

379 else:

380 outfile = os.path.join(outdir, relfile)

381 outsubdir = os.path.dirname(outfile)

382 if not os.path.isdir(outsubdir):

383 os.makedirs(outsubdir)

384

385 # TODO(csharp): Fix crbug.com/150823 and enable the touched logic again.

386 # if metadata.get('T') == True:

387 # open(outfile, 'ab').close()

388 if 'l' in metadata:

389 pointed = metadata['l']

390 logging.debug('Symlink: %s -> %s' % (outfile, pointed))

391 # symlink doesn't exist on Windows.

392 os.symlink(pointed, outfile) # pylint: disable=E1101

393 else:

394 run_isolated.link_file(outfile, infile, action)

395

396

397 def process_input(filepath, prevdict, read_only, flavor, algo):

398 """Processes an input file, a dependency, and return meta data about it.

399

400 Arguments:

401 - filepath: File to act on.

402 - prevdict: the previous dictionary. It is used to retrieve the cached sha-1

403 to skip recalculating the hash.

404 - read_only: If True, the file mode is manipulated. In practice, only save

405 one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On

406 windows, mode is not set since all files are 'executable' by

407 default.

408 - algo: Hashing algorithm used.

409

410 Behaviors:

411 - Retrieves the file mode, file size, file timestamp, file link

412 destination if it is a file link and calcultate the SHA-1 of the file's

413 content if the path points to a file and not a symlink.

414 """

415 out = {}

416 # TODO(csharp): Fix crbug.com/150823 and enable the touched logic again.

417 # if prevdict.get('T') == True:

418 # # The file's content is ignored. Skip the time and hard code mode.

419 # if get_flavor() != 'win':

420 # out['m'] = stat.S_IRUSR \| stat.S_IRGRP

421 # out['s'] = 0

422 # out['h'] = algo().hexdigest()

423 # out['T'] = True

424 # return out

425

426 # Always check the file stat and check if it is a link. The timestamp is used

427 # to know if the file's content/symlink destination should be looked into.

428 # E.g. only reuse from prevdict if the timestamp hasn't changed.

429 # There is the risk of the file's timestamp being reset to its last value

430 # manually while its content changed. We don't protect against that use case.

431 try:

432 filestats = os.lstat(filepath)

433 except OSError:

434 # The file is not present.

435 raise isolateserver.MappingError('%s is missing' % filepath)

436 is_link = stat.S_ISLNK(filestats.st_mode)

437

438 if flavor != 'win':

439 # Ignore file mode on Windows since it's not really useful there.

440 filemode = stat.S_IMODE(filestats.st_mode)

441 # Remove write access for group and all access to 'others'.

442 filemode &= ~(stat.S_IWGRP \| stat.S_IRWXO)

443 if read_only:

444 filemode &= ~stat.S_IWUSR

445 if filemode & stat.S_IXUSR:

446 filemode \|= stat.S_IXGRP

447 else:

448 filemode &= ~stat.S_IXGRP

449 if not is_link:

450 out['m'] = filemode

451

452 # Used to skip recalculating the hash or link destination. Use the most recent

453 # update time.

454 # TODO(maruel): Save it in the .state file instead of .isolated so the

455 # .isolated file is deterministic.

456 out['t'] = int(round(filestats.st_mtime))

457

458 if not is_link:

459 out['s'] = filestats.st_size

460 # If the timestamp wasn't updated and the file size is still the same, carry

461 # on the sha-1.

462 if (prevdict.get('t') == out['t'] and

463 prevdict.get('s') == out['s']):

464 # Reuse the previous hash if available.

465 out['h'] = prevdict.get('h')

466 if not out.get('h'):

467 out['h'] = isolateserver.hash_file(filepath, algo)

468 else:

469 # If the timestamp wasn't updated, carry on the link destination.

470 if prevdict.get('t') == out['t']:

471 # Reuse the previous link destination if available.

472 out['l'] = prevdict.get('l')

473 if out.get('l') is None:

474 # The link could be in an incorrect path case. In practice, this only

475 # happen on OSX on case insensitive HFS.

476 # TODO(maruel): It'd be better if it was only done once, in

477 # expand_directory_and_symlink(), so it would not be necessary to do again

478 # here.

479 symlink_value = os.readlink(filepath) # pylint: disable=E1101

480 filedir = file_path.get_native_path_case(os.path.dirname(filepath))

481 native_dest = fix_native_path_case(filedir, symlink_value)

482 out['l'] = os.path.relpath(native_dest, filedir)

483 return out

484

485

486 ### Variable stuff.

487

488

489 def isolatedfile_to_state(filename):

490 """Replaces the file's extension."""

491 return filename + '.state'

492

493

494 def determine_root_dir(relative_root, infiles):

495 """For a list of infiles, determines the deepest root directory that is

496 referenced indirectly.

497

498 All arguments must be using os.path.sep.

499 """

500 # The trick used to determine the root directory is to look at "how far" back

501 # up it is looking up.

502 deepest_root = relative_root

503 for i in infiles:

504 x = relative_root

505 while i.startswith('..' + os.path.sep):

506 i = i[3:]

507 assert not i.startswith(os.path.sep)

508 x = os.path.dirname(x)

509 if deepest_root.startswith(x):

510 deepest_root = x

511 logging.debug(

512 'determine_root_dir(%s, %d files) -> %s' % (

513 relative_root, len(infiles), deepest_root))

514 return deepest_root

515

516

517 def replace_variable(part, variables):

518 m = re.match(r'<$([A-Z_]+)$', part)

519 if m:

520 if m.group(1) not in variables:

521 raise ExecutionError(

522 'Variable "%s" was not found in %s.\nDid you forget to specify '

523 '--variable?' % (m.group(1), variables))

524 return variables[m.group(1)]

525 return part

526

527

528 def process_variables(cwd, variables, relative_base_dir):

529 """Processes path variables as a special case and returns a copy of the dict.

530

531 For each 'path' variable: first normalizes it based on \|cwd\|, verifies it

532 exists then sets it as relative to relative_base_dir.

533 """

534 relative_base_dir = file_path.get_native_path_case(relative_base_dir)

535 variables = variables.copy()

536 for i in PATH_VARIABLES:

537 if i not in variables:

538 continue

539 variable = variables[i].strip()

540 # Variables could contain / or \ on windows. Always normalize to

541 # os.path.sep.

542 variable = variable.replace('/', os.path.sep)

543 variable = os.path.join(cwd, variable)

544 variable = os.path.normpath(variable)

545 variable = file_path.get_native_path_case(variable)

546 if not os.path.isdir(variable):

547 raise ExecutionError('%s=%s is not a directory' % (i, variable))

548

549 # All variables are relative to the .isolate file.

550 variable = os.path.relpath(variable, relative_base_dir)

551 logging.debug(

552 'Translated variable %s from %s to %s', i, variables[i], variable)

553 variables[i] = variable

554 return variables

555

556

557 def eval_variables(item, variables):

558 """Replaces the .isolate variables in a string item.

559

560 Note that the .isolate format is a subset of the .gyp dialect.

561 """

562 return ''.join(

563 replace_variable(p, variables) for p in re.split(r'(<$[A-Z_]+$)', item))

564

565

566 def classify_files(root_dir, tracked, untracked):

567 """Converts the list of files into a .isolate 'variables' dictionary.

568

569 Arguments:

570 - tracked: list of files names to generate a dictionary out of that should

571 probably be tracked.

572 - untracked: list of files names that must not be tracked.

573 """

574 # These directories are not guaranteed to be always present on every builder.

575 OPTIONAL_DIRECTORIES = (

576 'test/data/plugin',

577 'third_party/WebKit/LayoutTests',

578 )

579

580 new_tracked = []

581 new_untracked = list(untracked)

582

583 def should_be_tracked(filepath):

584 """Returns True if it is a file without whitespace in a non-optional

585 directory that has no symlink in its path.

586 """

587 if filepath.endswith('/'):

588 return False

589 if ' ' in filepath:

590 return False

591 if any(i in filepath for i in OPTIONAL_DIRECTORIES):

592 return False

593 # Look if any element in the path is a symlink.

594 split = filepath.split('/')

595 for i in range(len(split)):

596 if os.path.islink(os.path.join(root_dir, '/'.join(split[:i+1]))):

597 return False

598 return True

599

600 for filepath in sorted(tracked):

601 if should_be_tracked(filepath):

602 new_tracked.append(filepath)

603 else:

604 # Anything else.

605 new_untracked.append(filepath)

606

607 variables = {}

608 if new_tracked:

609 variables[KEY_TRACKED] = sorted(new_tracked)

610 if new_untracked:

611 variables[KEY_UNTRACKED] = sorted(new_untracked)

612 return variables

613

614

615 def chromium_fix(f, variables):

616 """Fixes an isolate dependnecy with Chromium-specific fixes."""

617 # Skip log in PRODUCT_DIR. Note that these are applied on '/' style path

618 # separator.

619 LOG_FILE = re.compile(r'^\<$PRODUCT_DIR$\/[^\/]+\.log$')

620 # Ignored items.

621 IGNORED_ITEMS = (

622 # http://crbug.com/160539, on Windows, it's in chrome/.

623 'Media Cache/',

624 'chrome/Media Cache/',

625 # 'First Run' is not created by the compile, but by the test itself.

626 '<(PRODUCT_DIR)/First Run')

627

628 # Blacklist logs and other unimportant files.

629 if LOG_FILE.match(f) or f in IGNORED_ITEMS:

630 logging.debug('Ignoring %s', f)

631 return None

632

633 EXECUTABLE = re.compile(

634 r'^(\<$PRODUCT_DIR$\/[^\/\.]+)' +

635 re.escape(variables.get('EXECUTABLE_SUFFIX', '')) +

636 r'$')

637 match = EXECUTABLE.match(f)

638 if match:

639 return match.group(1) + '<(EXECUTABLE_SUFFIX)'

640

641 if sys.platform == 'darwin':

642 # On OSX, the name of the output is dependent on gyp define, it can be

643 # 'Google Chrome.app' or 'Chromium.app', same for 'XXX

644 # Framework.framework'. Furthermore, they are versioned with a gyp

645 # variable. To lower the complexity of the .isolate file, remove all the

646 # individual entries that show up under any of the 4 entries and replace

647 # them with the directory itself. Overall, this results in a bit more

648 # files than strictly necessary.

649 OSX_BUNDLES = (

650 '<(PRODUCT_DIR)/Chromium Framework.framework/',

651 '<(PRODUCT_DIR)/Chromium.app/',

652 '<(PRODUCT_DIR)/Google Chrome Framework.framework/',

653 '<(PRODUCT_DIR)/Google Chrome.app/',

654 )

655 for prefix in OSX_BUNDLES:

656 if f.startswith(prefix):

657 # Note this result in duplicate values, so the a set() must be used to

658 # remove duplicates.

659 return prefix

660 return f

661

662

663 def generate_simplified(

664 tracked, untracked, touched, root_dir, variables, relative_cwd,

665 trace_blacklist):

666 """Generates a clean and complete .isolate 'variables' dictionary.

667

668 Cleans up and extracts only files from within root_dir then processes

669 variables and relative_cwd.

670 """

671 root_dir = os.path.realpath(root_dir)

672 logging.info(

673 'generate_simplified(%d files, %s, %s, %s)' %

674 (len(tracked) + len(untracked) + len(touched),

675 root_dir, variables, relative_cwd))

676

677 # Preparation work.

678 relative_cwd = cleanup_path(relative_cwd)

679 assert not os.path.isabs(relative_cwd), relative_cwd

680 # Creates the right set of variables here. We only care about PATH_VARIABLES.

681 path_variables = dict(

682 ('<(%s)' % k, variables[k].replace(os.path.sep, '/'))

683 for k in PATH_VARIABLES if k in variables)

684 variables = variables.copy()

685 variables.update(path_variables)

686

687 # Actual work: Process the files.

688 # TODO(maruel): if all the files in a directory are in part tracked and in

689 # part untracked, the directory will not be extracted. Tracked files should be

690 # 'promoted' to be untracked as needed.

691 tracked = trace_inputs.extract_directories(

692 root_dir, tracked, trace_blacklist)

693 untracked = trace_inputs.extract_directories(

694 root_dir, untracked, trace_blacklist)

695 # touched is not compressed, otherwise it would result in files to be archived

696 # that we don't need.

697

698 root_dir_posix = root_dir.replace(os.path.sep, '/')

699 def fix(f):

700 """Bases the file on the most restrictive variable."""

701 # Important, GYP stores the files with / and not \.

702 f = f.replace(os.path.sep, '/')

703 logging.debug('fix(%s)' % f)

704 # If it's not already a variable.

705 if not f.startswith('<'):

706 # relative_cwd is usually the directory containing the gyp file. It may be

707 # empty if the whole directory containing the gyp file is needed.

708 # Use absolute paths in case cwd_dir is outside of root_dir.

709 # Convert the whole thing to / since it's isolate's speak.

710 f = posix_relpath(

711 posixpath.join(root_dir_posix, f),

712 posixpath.join(root_dir_posix, relative_cwd)) or './'

713

714 for variable, root_path in path_variables.iteritems():

715 if f.startswith(root_path):

716 f = variable + f[len(root_path):]

717 logging.debug('Converted to %s' % f)

718 break

719 return f

720

721 def fix_all(items):

722 """Reduces the items to convert variables, removes unneeded items, apply

723 chromium-specific fixes and only return unique items.

724 """

725 variables_converted = (fix(f.path) for f in items)

726 chromium_fixed = (chromium_fix(f, variables) for f in variables_converted)

727 return set(f for f in chromium_fixed if f)

728

729 tracked = fix_all(tracked)

730 untracked = fix_all(untracked)

731 touched = fix_all(touched)

732 out = classify_files(root_dir, tracked, untracked)

733 if touched:

734 out[KEY_TOUCHED] = sorted(touched)

735 return out

736

737

738 def chromium_filter_flags(variables):

739 """Filters out build flags used in Chromium that we don't want to treat as

740 configuration variables.

741 """

742 # TODO(benrg): Need a better way to determine this.

743 blacklist = set(PATH_VARIABLES + ('EXECUTABLE_SUFFIX', 'FLAG'))

744 return dict((k, v) for k, v in variables.iteritems() if k not in blacklist)

745

746

747 def generate_isolate(

748 tracked, untracked, touched, root_dir, variables, relative_cwd,

749 trace_blacklist):

750 """Generates a clean and complete .isolate file."""

751 dependencies = generate_simplified(

752 tracked, untracked, touched, root_dir, variables, relative_cwd,

753 trace_blacklist)

754 config_variables = chromium_filter_flags(variables)

755 config_variable_names, config_values = zip(

756 *sorted(config_variables.iteritems()))

757 out = Configs(None)

758 # The new dependencies apply to just one configuration, namely config_values.

759 out.merge_dependencies(dependencies, config_variable_names, [config_values])

760 return out.make_isolate_file()

761

762

763 def split_touched(files):

764 """Splits files that are touched vs files that are read."""

765 tracked = []

766 touched = []

767 for f in files:

768 if f.size:

769 tracked.append(f)

770 else:

771 touched.append(f)

772 return tracked, touched

773

774

775 def pretty_print(variables, stdout):

776 """Outputs a gyp compatible list from the decoded variables.

777

778 Similar to pprint.print() but with NIH syndrome.

779 """

780 # Order the dictionary keys by these keys in priority.

781 ORDER = (

782 'variables', 'condition', 'command', 'relative_cwd', 'read_only',

783 KEY_TRACKED, KEY_UNTRACKED)

784

785 def sorting_key(x):

786 """Gives priority to 'most important' keys before the others."""

787 if x in ORDER:

788 return str(ORDER.index(x))

789 return x

790

791 def loop_list(indent, items):

792 for item in items:

793 if isinstance(item, basestring):

794 stdout.write('%s\'%s\',\n' % (indent, item))

795 elif isinstance(item, dict):

796 stdout.write('%s{\n' % indent)

797 loop_dict(indent + ' ', item)

798 stdout.write('%s},\n' % indent)

799 elif isinstance(item, list):

800 # A list inside a list will write the first item embedded.

801 stdout.write('%s[' % indent)

802 for index, i in enumerate(item):

803 if isinstance(i, basestring):

804 stdout.write(

805 '\'%s\', ' % i.replace('\\', '\\\\').replace('\'', '\\\''))

806 elif isinstance(i, dict):

807 stdout.write('{\n')

808 loop_dict(indent + ' ', i)

809 if index != len(item) - 1:

810 x = ', '

811 else:

812 x = ''

813 stdout.write('%s}%s' % (indent, x))

814 else:

815 assert False

816 stdout.write('],\n')

817 else:

818 assert False

819

820 def loop_dict(indent, items):

821 for key in sorted(items, key=sorting_key):

822 item = items[key]

823 stdout.write("%s'%s': " % (indent, key))

824 if isinstance(item, dict):

825 stdout.write('{\n')

826 loop_dict(indent + ' ', item)

827 stdout.write(indent + '},\n')

828 elif isinstance(item, list):

829 stdout.write('[\n')

830 loop_list(indent + ' ', item)

831 stdout.write(indent + '],\n')

832 elif isinstance(item, basestring):

833 stdout.write(

834 '\'%s\',\n' % item.replace('\\', '\\\\').replace('\'', '\\\''))

835 elif item in (True, False, None):

836 stdout.write('%s\n' % item)

837 else:

838 assert False, item

839

840 stdout.write('{\n')

841 loop_dict(' ', variables)

842 stdout.write('}\n')

843

844

845 def union(lhs, rhs):

846 """Merges two compatible datastructures composed of dict/list/set."""

847 assert lhs is not None or rhs is not None

848 if lhs is None:

849 return copy.deepcopy(rhs)

850 if rhs is None:

851 return copy.deepcopy(lhs)

852 assert type(lhs) == type(rhs), (lhs, rhs)

853 if hasattr(lhs, 'union'):

854 # Includes set, ConfigSettings and Configs.

855 return lhs.union(rhs)

856 if isinstance(lhs, dict):

857 return dict((k, union(lhs.get(k), rhs.get(k))) for k in set(lhs).union(rhs))

858 elif isinstance(lhs, list):

859 # Do not go inside the list.

860 return lhs + rhs

861 assert False, type(lhs)

862

863

864 def extract_comment(content):

865 """Extracts file level comment."""

866 out = []

867 for line in content.splitlines(True):

868 if line.startswith('#'):

869 out.append(line)

870 else:

871 break

872 return ''.join(out)

873

874

875 def eval_content(content):

876 """Evaluates a python file and return the value defined in it.

877

878 Used in practice for .isolate files.

879 """

880 globs = {'__builtins__': None}

881 locs = {}

882 try:

883 value = eval(content, globs, locs)

884 except TypeError as e:

885 e.args = list(e.args) + [content]

886 raise

887 assert locs == {}, locs

888 assert globs == {'__builtins__': None}, globs

889 return value

890

891

892 def match_configs(expr, config_variables, all_configs):

893 """Returns the configs from \|all_configs\| that match the \|expr\|, where

894 the elements of \|all_configs\| are tuples of values for the \|config_variables\|.

895 Example:

896 >>> match_configs(expr = "(foo==1 or foo==2) and bar=='b'",

897 config_variables = ["foo", "bar"],

898 all_configs = [(1, 'a'), (1, 'b'), (2, 'a'), (2, 'b')])

899 [(1, 'b'), (2, 'b')]

900 """

901 return [

902 config for config in all_configs

903 if eval(expr, dict(zip(config_variables, config)))

904 ]

905

906

907 def verify_variables(variables):

908 """Verifies the \|variables\| dictionary is in the expected format."""

909 VALID_VARIABLES = [

910 KEY_TOUCHED,

911 KEY_TRACKED,

912 KEY_UNTRACKED,

913 'command',

914 'read_only',

915 ]

916 assert isinstance(variables, dict), variables

917 assert set(VALID_VARIABLES).issuperset(set(variables)), variables.keys()

918 for name, value in variables.iteritems():

919 if name == 'read_only':

920 assert value in (True, False, None), value

921 else:

922 assert isinstance(value, list), value

923 assert all(isinstance(i, basestring) for i in value), value

924

925

926 def verify_ast(expr, variables_and_values):

927 """Verifies that \|expr\| is of the form

928 expr ::= expr ( "or" \| "and" ) expr

929 \| identifier "==" ( string \| int )

930 Also collects the variable identifiers and string/int values in the dict

931 \|variables_and_values\|, in the form {'var': set([val1, val2, ...]), ...}.

932 """

933 assert isinstance(expr, (ast.BoolOp, ast.Compare))

934 if isinstance(expr, ast.BoolOp):

935 assert isinstance(expr.op, (ast.And, ast.Or))

936 for subexpr in expr.values:

937 verify_ast(subexpr, variables_and_values)

938 else:

939 assert isinstance(expr.left.ctx, ast.Load)

940 assert len(expr.ops) == 1

941 assert isinstance(expr.ops[0], ast.Eq)

942 var_values = variables_and_values.setdefault(expr.left.id, set())

943 rhs = expr.comparators[0]

944 assert isinstance(rhs, (ast.Str, ast.Num))

945 var_values.add(rhs.n if isinstance(rhs, ast.Num) else rhs.s)

946

947

948 def verify_condition(condition, variables_and_values):

949 """Verifies the \|condition\| dictionary is in the expected format.

950 See verify_ast() for the meaning of \|variables_and_values\|.

951 """

952 VALID_INSIDE_CONDITION = ['variables']

953 assert isinstance(condition, list), condition

954 assert len(condition) == 2, condition

955 expr, then = condition

956

957 test_ast = compile(expr, '<condition>', 'eval', ast.PyCF_ONLY_AST)

958 verify_ast(test_ast.body, variables_and_values)

959

960 assert isinstance(then, dict), then

961 assert set(VALID_INSIDE_CONDITION).issuperset(set(then)), then.keys()

962 verify_variables(then['variables'])

963

964

965 def verify_root(value, variables_and_values):

966 """Verifies that \|value\| is the parsed form of a valid .isolate file.

967 See verify_ast() for the meaning of \|variables_and_values\|.

968 """

969 VALID_ROOTS = ['includes', 'conditions']

970 assert isinstance(value, dict), value

971 assert set(VALID_ROOTS).issuperset(set(value)), value.keys()

972

973 includes = value.get('includes', [])

974 assert isinstance(includes, list), includes

975 for include in includes:

976 assert isinstance(include, basestring), include

977

978 conditions = value.get('conditions', [])

979 assert isinstance(conditions, list), conditions

980 for condition in conditions:

981 verify_condition(condition, variables_and_values)

982

983

984 def remove_weak_dependencies(values, key, item, item_configs):

985 """Removes any configs from this key if the item is already under a

986 strong key.

987 """

988 if key == KEY_TOUCHED:

989 item_configs = set(item_configs)

990 for stronger_key in (KEY_TRACKED, KEY_UNTRACKED):

991 try:

992 item_configs -= values[stronger_key][item]

993 except KeyError:

994 pass

995

996 return item_configs

997

998

999 def remove_repeated_dependencies(folders, key, item, item_configs):

1000 """Removes any configs from this key if the item is in a folder that is

1001 already included."""

1002

1003 if key in (KEY_UNTRACKED, KEY_TRACKED, KEY_TOUCHED):

1004 item_configs = set(item_configs)

1005 for (folder, configs) in folders.iteritems():

1006 if folder != item and item.startswith(folder):

1007 item_configs -= configs

1008

1009 return item_configs

1010

1011

1012 def get_folders(values_dict):

1013 """Returns a dict of all the folders in the given value_dict."""

1014 return dict(

1015 (item, configs) for (item, configs) in values_dict.iteritems()

1016 if item.endswith('/')

1017 )

1018

1019

1020 def invert_map(variables):

1021 """Converts {config: {deptype: list(depvals)}} to

1022 {deptype: {depval: set(configs)}}.

1023 """

1024 KEYS = (

1025 KEY_TOUCHED,

1026 KEY_TRACKED,

1027 KEY_UNTRACKED,

1028 'command',

1029 'read_only',

1030 )

1031 out = dict((key, {}) for key in KEYS)

1032 for config, values in variables.iteritems():

1033 for key in KEYS:

1034 if key == 'command':

1035 items = [tuple(values[key])] if key in values else []

1036 elif key == 'read_only':

1037 items = [values[key]] if key in values else []

1038 else:

1039 assert key in (KEY_TOUCHED, KEY_TRACKED, KEY_UNTRACKED)

1040 items = values.get(key, [])

1041 for item in items:

1042 out[key].setdefault(item, set()).add(config)

1043 return out

1044

1045

1046 def reduce_inputs(values):

1047 """Reduces the output of invert_map() to the strictest minimum list.

1048

1049 Looks at each individual file and directory, maps where they are used and

1050 reconstructs the inverse dictionary.

1051

1052 Returns the minimized dictionary.

1053 """

1054 KEYS = (

1055 KEY_TOUCHED,

1056 KEY_TRACKED,

1057 KEY_UNTRACKED,

1058 'command',

1059 'read_only',

1060 )

1061

1062 # Folders can only live in KEY_UNTRACKED.

1063 folders = get_folders(values.get(KEY_UNTRACKED, {}))

1064

1065 out = dict((key, {}) for key in KEYS)

1066 for key in KEYS:

1067 for item, item_configs in values.get(key, {}).iteritems():

1068 item_configs = remove_weak_dependencies(values, key, item, item_configs)

1069 item_configs = remove_repeated_dependencies(

1070 folders, key, item, item_configs)

1071 if item_configs:

1072 out[key][item] = item_configs

1073 return out

1074

1075

1076 def convert_map_to_isolate_dict(values, config_variables):

1077 """Regenerates back a .isolate configuration dict from files and dirs

1078 mappings generated from reduce_inputs().

1079 """

1080 # Gather a list of configurations for set inversion later.

1081 all_mentioned_configs = set()

1082 for configs_by_item in values.itervalues():

1083 for configs in configs_by_item.itervalues():

1084 all_mentioned_configs.update(configs)

1085

1086 # Invert the mapping to make it dict first.

1087 conditions = {}

1088 for key in values:

1089 for item, configs in values[key].iteritems():

1090 then = conditions.setdefault(frozenset(configs), {})

1091 variables = then.setdefault('variables', {})

1092

1093 if item in (True, False):

1094 # One-off for read_only.

1095 variables[key] = item

1096 else:

1097 assert item

1098 if isinstance(item, tuple):

1099 # One-off for command.

1100 # Do not merge lists and do not sort!

1101 # Note that item is a tuple.

1102 assert key not in variables

1103 variables[key] = list(item)

1104 else:

1105 # The list of items (files or dirs). Append the new item and keep

1106 # the list sorted.

1107 l = variables.setdefault(key, [])

1108 l.append(item)

1109 l.sort()

1110

1111 if all_mentioned_configs:

1112 config_values = map(set, zip(*all_mentioned_configs))

1113 sef = short_expression_finder.ShortExpressionFinder(

1114 zip(config_variables, config_values))

1115

1116 conditions = sorted(

1117 [sef.get_expr(configs), then] for configs, then in conditions.iteritems())

1118 return {'conditions': conditions}

1119

1120

1121 ### Internal state files.

1122

1123

1124 class ConfigSettings(object):

1125 """Represents the dependency variables for a single build configuration.

1126 The structure is immutable.

1127 """

1128 def __init__(self, config, values):

1129 self.config = config

1130 verify_variables(values)

1131 self.touched = sorted(values.get(KEY_TOUCHED, []))

1132 self.tracked = sorted(values.get(KEY_TRACKED, []))

1133 self.untracked = sorted(values.get(KEY_UNTRACKED, []))

1134 self.command = values.get('command', [])[:]

1135 self.read_only = values.get('read_only')

1136

1137 def union(self, rhs):

1138 assert not (self.config and rhs.config) or (self.config == rhs.config)

1139 assert not (self.command and rhs.command) or (self.command == rhs.command)

1140 var = {

1141 KEY_TOUCHED: sorted(self.touched + rhs.touched),

1142 KEY_TRACKED: sorted(self.tracked + rhs.tracked),

1143 KEY_UNTRACKED: sorted(self.untracked + rhs.untracked),

1144 'command': self.command or rhs.command,

1145 'read_only': rhs.read_only if self.read_only is None else self.read_only,

1146 }

1147 return ConfigSettings(self.config or rhs.config, var)

1148

1149 def flatten(self):

1150 out = {}

1151 if self.command:

1152 out['command'] = self.command

1153 if self.touched:

1154 out[KEY_TOUCHED] = self.touched

1155 if self.tracked:

1156 out[KEY_TRACKED] = self.tracked

1157 if self.untracked:

1158 out[KEY_UNTRACKED] = self.untracked

1159 if self.read_only is not None:

1160 out['read_only'] = self.read_only

1161 return out

1162

1163

1164 class Configs(object):

1165 """Represents a processed .isolate file.

1166

1167 Stores the file in a processed way, split by configuration.

1168 """

1169 def __init__(self, file_comment):

1170 self.file_comment = file_comment

1171 # The keys of by_config are tuples of values for the configuration

1172 # variables. The names of the variables (which must be the same for

1173 # every by_config key) are kept in config_variables. Initially by_config

1174 # is empty and we don't know what configuration variables will be used,

1175 # so config_variables also starts out empty. It will be set by the first

1176 # call to union() or merge_dependencies().

1177 self.by_config = {}

1178 self.config_variables = ()

1179

1180 def union(self, rhs):

1181 """Adds variables from rhs (a Configs) to the existing variables.

1182 """

1183 config_variables = self.config_variables

1184 if not config_variables:

1185 config_variables = rhs.config_variables

1186 else:

1187 # We can't proceed if this isn't true since we don't know the correct

1188 # default values for extra variables. The variables are sorted so we

1189 # don't need to worry about permutations.

1190 if rhs.config_variables and rhs.config_variables != config_variables:

1191 raise ExecutionError(

1192 'Variables in merged .isolate files do not match: %r and %r' % (

1193 config_variables, rhs.config_variables))

1194

1195 # Takes the first file comment, prefering lhs.

1196 out = Configs(self.file_comment or rhs.file_comment)

1197 out.config_variables = config_variables

1198 for config in set(self.by_config) \| set(rhs.by_config):

1199 out.by_config[config] = union(

1200 self.by_config.get(config), rhs.by_config.get(config))

1201 return out

1202

1203 def merge_dependencies(self, values, config_variables, configs):

1204 """Adds new dependencies to this object for the given configurations.

1205 Arguments:

1206 values: A variables dict as found in a .isolate file, e.g.,

1207 {KEY_TOUCHED: [...], 'command': ...}.

1208 config_variables: An ordered list of configuration variables, e.g.,

1209 ["OS", "chromeos"]. If this object already contains any dependencies,

1210 the configuration variables must match.

1211 configs: a list of tuples of values of the configuration variables,

1212 e.g., [("mac", 0), ("linux", 1)]. The dependencies in \|values\|

1213 are added to all of these configurations, and other configurations

1214 are unchanged.

1215 """

1216 if not values:

1217 return

1218

1219 if not self.config_variables:

1220 self.config_variables = config_variables

1221 else:

1222 # See comment in Configs.union().

1223 assert self.config_variables == config_variables

1224

1225 for config in configs:

1226 self.by_config[config] = union(

1227 self.by_config.get(config), ConfigSettings(config, values))

1228

1229 def flatten(self):

1230 """Returns a flat dictionary representation of the configuration.

1231 """

1232 return dict((k, v.flatten()) for k, v in self.by_config.iteritems())

1233

1234 def make_isolate_file(self):

1235 """Returns a dictionary suitable for writing to a .isolate file.

1236 """

1237 dependencies_by_config = self.flatten()

1238 configs_by_dependency = reduce_inputs(invert_map(dependencies_by_config))

1239 return convert_map_to_isolate_dict(configs_by_dependency,

1240 self.config_variables)

1241

1242

1243 # TODO(benrg): Remove this function when no old-format files are left.

1244 def convert_old_to_new_format(value):

1245 """Converts from the old .isolate format, which only has one variable (OS),

1246 always includes 'linux', 'mac' and 'win' in the set of valid values for OS,

1247 and allows conditions that depend on the set of all OSes, to the new format,

1248 which allows any set of variables, has no hardcoded values, and only allows

1249 explicit positive tests of variable values.

1250 """

1251 conditions = value.get('conditions', [])

1252 if 'variables' not in value and all(len(cond) == 2 for cond in conditions):

1253 return value # Nothing to change

1254

1255 def parse_condition(cond):

1256 return re.match(r'OS=="(\w+)"\Z', cond[0]).group(1)

1257

1258 oses = set(map(parse_condition, conditions))

1259 default_oses = set(['linux', 'mac', 'win'])

1260 oses = sorted(oses \| default_oses)

1261

1262 def if_not_os(not_os, then):

1263 expr = ' or '.join('OS=="%s"' % os for os in oses if os != not_os)

1264 return [expr, then]

1265

1266 conditions = [

1267 cond[:2] for cond in conditions if cond[1]

1268 ] + [

1269 if_not_os(parse_condition(cond), cond[2])

1270 for cond in conditions if len(cond) == 3

1271 ]

1272

1273 if 'variables' in value:

1274 conditions.append(if_not_os(None, {'variables': value.pop('variables')}))

1275 conditions.sort()

1276

1277 value = value.copy()

1278 value['conditions'] = conditions

1279 return value

1280

1281

1282 def load_isolate_as_config(isolate_dir, value, file_comment):

1283 """Parses one .isolate file and returns a Configs() instance.

1284

1285 \|value\| is the loaded dictionary that was defined in the gyp file.

1286

1287 The expected format is strict, anything diverting from the format below will

1288 throw an assert:

1289 {

1290 'includes': [

1291 'foo.isolate',

1292 ],

1293 'conditions': [

1294 ['OS=="vms" and foo=42', {

1295 'variables': {

1296 'command': [

1297 ...

1298 ],

1299 'isolate_dependency_tracked': [

1300 ...

1301 ],

1302 'isolate_dependency_untracked': [

1303 ...

1304 ],

1305 'read_only': False,

1306 },

1307 }],

1308 ...

1309 ],

1310 }

1311 """

1312 value = convert_old_to_new_format(value)

1313

1314 variables_and_values = {}

1315 verify_root(value, variables_and_values)

1316 if variables_and_values:

1317 config_variables, config_values = zip(

1318 *sorted(variables_and_values.iteritems()))

1319 all_configs = list(itertools.product(*config_values))

1320 else:

1321 config_variables = None

1322 all_configs = []

1323

1324 isolate = Configs(file_comment)

1325

1326 # Add configuration-specific variables.

1327 for expr, then in value.get('conditions', []):

1328 configs = match_configs(expr, config_variables, all_configs)

1329 isolate.merge_dependencies(then['variables'], config_variables, configs)

1330

1331 # Load the includes.

1332 for include in value.get('includes', []):

1333 if os.path.isabs(include):

1334 raise ExecutionError(

1335 'Failed to load configuration; absolute include path \'%s\'' %

1336 include)

1337 included_isolate = os.path.normpath(os.path.join(isolate_dir, include))

1338 with open(included_isolate, 'r') as f:

1339 included_isolate = load_isolate_as_config(

1340 os.path.dirname(included_isolate),

1341 eval_content(f.read()),

1342 None)

1343 isolate = union(isolate, included_isolate)

1344

1345 return isolate

1346

1347

1348 def load_isolate_for_config(isolate_dir, content, variables):

1349 """Loads the .isolate file and returns the information unprocessed but

1350 filtered for the specific OS.

1351

1352 Returns the command, dependencies and read_only flag. The dependencies are

1353 fixed to use os.path.sep.

1354 """

1355 # Load the .isolate file, process its conditions, retrieve the command and

1356 # dependencies.

1357 isolate = load_isolate_as_config(isolate_dir, eval_content(content), None)

1358 try:

1359 config_name = tuple(variables[var] for var in isolate.config_variables)

1360 except KeyError:

1361 raise ExecutionError(

1362 'These configuration variables were missing from the command line: %s' %

1363 ', '.join(sorted(set(isolate.config_variables) - set(variables))))

1364 config = isolate.by_config.get(config_name)

1365 if not config:

1366 raise ExecutionError(

1367 'Failed to load configuration for variable \'%s\' for config(s) \'%s\''

1368 '\nAvailable configs: %s' %

1369 (', '.join(isolate.config_variables),

1370 ', '.join(config_name),

1371 ', '.join(str(s) for s in isolate.by_config)))

1372 # Merge tracked and untracked variables, isolate.py doesn't care about the

1373 # trackability of the variables, only the build tool does.

1374 dependencies = [

1375 f.replace('/', os.path.sep) for f in config.tracked + config.untracked

1376 ]

1377 touched = [f.replace('/', os.path.sep) for f in config.touched]

1378 return config.command, dependencies, touched, config.read_only

1379

1380

1381 def save_isolated(isolated, data):

1382 """Writes one or multiple .isolated files.

1383

1384 Note: this reference implementation does not create child .isolated file so it

1385 always returns an empty list.

1386

1387 Returns the list of child isolated files that are included by \|isolated\|.

1388 """

1389 trace_inputs.write_json(isolated, data, True)

1390 return []

1391

1392

1393 def chromium_save_isolated(isolated, data, variables, algo):

1394 """Writes one or many .isolated files.

1395

1396 This slightly increases the cold cache cost but greatly reduce the warm cache

1397 cost by splitting low-churn files off the master .isolated file. It also

1398 reduces overall isolateserver memcache consumption.

1399 """

1400 slaves = []

1401

1402 def extract_into_included_isolated(prefix):

1403 new_slave = {

1404 'algo': data['algo'],

1405 'files': {},

1406 'os': data['os'],

1407 'version': data['version'],

1408 }

1409 for f in data['files'].keys():

1410 if f.startswith(prefix):

1411 new_slave['files'][f] = data['files'].pop(f)

1412 if new_slave['files']:

1413 slaves.append(new_slave)

1414

1415 # Split test/data/ in its own .isolated file.

1416 extract_into_included_isolated(os.path.join('test', 'data', ''))

1417

1418 # Split everything out of PRODUCT_DIR in its own .isolated file.

1419 if variables.get('PRODUCT_DIR'):

1420 extract_into_included_isolated(variables['PRODUCT_DIR'])

1421

1422 files = []

1423 for index, f in enumerate(slaves):

1424 slavepath = isolated[:-len('.isolated')] + '.%d.isolated' % index

1425 trace_inputs.write_json(slavepath, f, True)

1426 data.setdefault('includes', []).append(

1427 isolateserver.hash_file(slavepath, algo))

1428 files.append(os.path.basename(slavepath))

1429

1430 files.extend(save_isolated(isolated, data))

1431 return files

1432

1433

1434 class Flattenable(object):

1435 """Represents data that can be represented as a json file."""

1436 MEMBERS = ()

1437

1438 def flatten(self):

1439 """Returns a json-serializable version of itself.

1440

1441 Skips None entries.

1442 """

1443 items = ((member, getattr(self, member)) for member in self.MEMBERS)

1444 return dict((member, value) for member, value in items if value is not None)

1445

1446 @classmethod

1447 def load(cls, data, args, *kwargs):

1448 """Loads a flattened version."""

1449 data = data.copy()

1450 out = cls(args, *kwargs)

1451 for member in out.MEMBERS:

1452 if member in data:

1453 # Access to a protected member XXX of a client class

1454 # pylint: disable=W0212

1455 out._load_member(member, data.pop(member))

1456 if data:

1457 raise ValueError(

1458 'Found unexpected entry %s while constructing an object %s' %

1459 (data, cls.__name__), data, cls.__name__)

1460 return out

1461

1462 def _load_member(self, member, value):

1463 """Loads a member into self."""

1464 setattr(self, member, value)

1465

1466 @classmethod

1467 def load_file(cls, filename, args, *kwargs):

1468 """Loads the data from a file or return an empty instance."""

1469 try:

1470 out = cls.load(trace_inputs.read_json(filename), args, *kwargs)

1471 logging.debug('Loaded %s(%s)', cls.__name__, filename)

1472 except (IOError, ValueError) as e:

1473 # On failure, loads the default instance.

1474 out = cls(args, *kwargs)

1475 logging.warn('Failed to load %s: %s', filename, e)

1476 return out

1477

1478

1479 class SavedState(Flattenable):

1480 """Describes the content of a .state file.

1481

1482 This file caches the items calculated by this script and is used to increase

1483 the performance of the script. This file is not loaded by run_isolated.py.

1484 This file can always be safely removed.

1485

1486 It is important to note that the 'files' dict keys are using native OS path

1487 separator instead of '/' used in .isolate file.

1488 """

1489 MEMBERS = (

1490 # Algorithm used to generate the hash. The only supported value is at the

1491 # time of writting 'sha-1'.

1492 'algo',

1493 # Cache of the processed command. This value is saved because .isolated

1494 # files are never loaded by isolate.py so it's the only way to load the

1495 # command safely.

1496 'command',

1497 # Cache of the files found so the next run can skip hash calculation.

1498 'files',

1499 # Path of the original .isolate file. Relative path to isolated_basedir.

1500 'isolate_file',

1501 # List of included .isolated files. Used to support/remember 'slave'

1502 # .isolated files. Relative path to isolated_basedir.

1503 'child_isolated_files',

1504 # If the generated directory tree should be read-only.

1505 'read_only',

1506 # Relative cwd to use to start the command.

1507 'relative_cwd',

1508 # GYP variables used to generate the .isolated file. Variables are saved so

1509 # a user can use isolate.py after building and the GYP variables are still

1510 # defined.

1511 'variables',

1512 # Version of the file format in format 'major.minor'. Any non-breaking

1513 # change must update minor. Any breaking change must update major.

1514 'version',

1515 )

1516

1517 def __init__(self, isolated_basedir):

1518 """Creates an empty SavedState.

1519

1520 \|isolated_basedir\| is the directory where the .isolated and .isolated.state

1521 files are saved.

1522 """

1523 super(SavedState, self).__init__()

1524 assert os.path.isabs(isolated_basedir), isolated_basedir

1525 assert os.path.isdir(isolated_basedir), isolated_basedir

1526 self.isolated_basedir = isolated_basedir

1527

1528 # The default algorithm used.

1529 self.algo = isolateserver.SUPPORTED_ALGOS['sha-1']

1530 self.command = []

1531 self.files = {}

1532 self.isolate_file = None

1533 self.child_isolated_files = []

1534 self.read_only = None

1535 self.relative_cwd = None

1536 self.variables = {'OS': get_flavor()}

1537 # The current version.

1538 self.version = '1.0'

1539

1540 def update(self, isolate_file, variables):

1541 """Updates the saved state with new data to keep GYP variables and internal

1542 reference to the original .isolate file.

1543 """

1544 assert os.path.isabs(isolate_file)

1545 # Convert back to a relative path. On Windows, if the isolate and

1546 # isolated files are on different drives, isolate_file will stay an absolute

1547 # path.

1548 isolate_file = safe_relpath(isolate_file, self.isolated_basedir)

1549

1550 # The same .isolate file should always be used to generate the .isolated and

1551 # .isolated.state.

1552 assert isolate_file == self.isolate_file or not self.isolate_file, (

1553 isolate_file, self.isolate_file)

1554 self.isolate_file = isolate_file

1555 self.variables.update(variables)

1556

1557 def update_isolated(self, command, infiles, touched, read_only, relative_cwd):

1558 """Updates the saved state with data necessary to generate a .isolated file.

1559

1560 The new files in \|infiles\| are added to self.files dict but their hash is

1561 not calculated here.

1562 """

1563 self.command = command

1564 # Add new files.

1565 for f in infiles:

1566 self.files.setdefault(f, {})

1567 for f in touched:

1568 self.files.setdefault(f, {})['T'] = True

1569 # Prune extraneous files that are not a dependency anymore.

1570 for f in set(self.files).difference(set(infiles).union(touched)):

1571 del self.files[f]

1572 if read_only is not None:

1573 self.read_only = read_only

1574 self.relative_cwd = relative_cwd

1575

1576 def to_isolated(self):

1577 """Creates a .isolated dictionary out of the saved state.

1578

1579 https://code.google.com/p/swarming/wiki/IsolatedDesign

1580 """

1581 def strip(data):

1582 """Returns a 'files' entry with only the whitelisted keys."""

1583 return dict((k, data[k]) for k in ('h', 'l', 'm', 's') if k in data)

1584

1585 out = {

1586 'algo': isolateserver.SUPPORTED_ALGOS_REVERSE[self.algo],

1587 'files': dict(

1588 (filepath, strip(data)) for filepath, data in self.files.iteritems()),

1589 'os': self.variables['OS'],

1590 'version': self.version,

1591 }

1592 if self.command:

1593 out['command'] = self.command

1594 if self.read_only is not None:

1595 out['read_only'] = self.read_only

1596 if self.relative_cwd:

1597 out['relative_cwd'] = self.relative_cwd

1598 return out

1599

1600 @property

1601 def isolate_filepath(self):

1602 """Returns the absolute path of self.isolate_file."""

1603 return os.path.normpath(

1604 os.path.join(self.isolated_basedir, self.isolate_file))

1605

1606 # Arguments number differs from overridden method

1607 @classmethod

1608 def load(cls, data, isolated_basedir): # pylint: disable=W0221

1609 """Special case loading to disallow different OS.

1610

1611 It is not possible to load a .isolated.state files from a different OS, this

1612 file is saved in OS-specific format.

1613 """

1614 out = super(SavedState, cls).load(data, isolated_basedir)

1615 if 'os' in data:

1616 out.variables['OS'] = data['os']

1617

1618 # Converts human readable form back into the proper class type.

1619 algo = data.get('algo', 'sha-1')

1620 if not algo in isolateserver.SUPPORTED_ALGOS:

1621 raise isolateserver.ConfigError('Unknown algo \'%s\'' % out.algo)

1622 out.algo = isolateserver.SUPPORTED_ALGOS[algo]

1623

1624 # For example, 1.1 is guaranteed to be backward compatible with 1.0 code.

1625 if not re.match(r'^(\d+)\.(\d+)$', out.version):

1626 raise isolateserver.ConfigError('Unknown version \'%s\'' % out.version)

1627 if out.version.split('.', 1)[0] != '1':

1628 raise isolateserver.ConfigError(

1629 'Unsupported version \'%s\'' % out.version)

1630

1631 # The .isolate file must be valid. It could be absolute on Windows if the

1632 # drive containing the .isolate and the drive containing the .isolated files

1633 # differ.

1634 assert not os.path.isabs(out.isolate_file) or sys.platform == 'win32'

1635 assert os.path.isfile(out.isolate_filepath), out.isolate_filepath

1636 return out

1637

1638 def flatten(self):

1639 """Makes sure 'algo' is in human readable form."""

1640 out = super(SavedState, self).flatten()

1641 out['algo'] = isolateserver.SUPPORTED_ALGOS_REVERSE[out['algo']]

1642 return out

1643

1644 def __str__(self):

1645 out = '%s(\n' % self.__class__.__name__

1646 out += ' command: %s\n' % self.command

1647 out += ' files: %d\n' % len(self.files)

1648 out += ' isolate_file: %s\n' % self.isolate_file

1649 out += ' read_only: %s\n' % self.read_only

1650 out += ' relative_cwd: %s\n' % self.relative_cwd

1651 out += ' child_isolated_files: %s\n' % self.child_isolated_files

1652 out += ' variables: %s' % ''.join(

1653 '\n %s=%s' % (k, self.variables[k]) for k in sorted(self.variables))

1654 out += ')'

1655 return out

1656

1657

1658 class CompleteState(object):

1659 """Contains all the state to run the task at hand."""

1660 def __init__(self, isolated_filepath, saved_state):

1661 super(CompleteState, self).__init__()

1662 assert isolated_filepath is None or os.path.isabs(isolated_filepath)

1663 self.isolated_filepath = isolated_filepath

1664 # Contains the data to ease developer's use-case but that is not strictly

1665 # necessary.

1666 self.saved_state = saved_state

1667

1668 @classmethod

1669 def load_files(cls, isolated_filepath):

1670 """Loads state from disk."""

1671 assert os.path.isabs(isolated_filepath), isolated_filepath

1672 isolated_basedir = os.path.dirname(isolated_filepath)

1673 return cls(

1674 isolated_filepath,

1675 SavedState.load_file(

1676 isolatedfile_to_state(isolated_filepath), isolated_basedir))

1677

1678 def load_isolate(self, cwd, isolate_file, variables, ignore_broken_items):

1679 """Updates self.isolated and self.saved_state with information loaded from a

1680 .isolate file.

1681

1682 Processes the loaded data, deduce root_dir, relative_cwd.

1683 """

1684 # Make sure to not depend on os.getcwd().

1685 assert os.path.isabs(isolate_file), isolate_file

1686 isolate_file = file_path.get_native_path_case(isolate_file)

1687 logging.info(

1688 'CompleteState.load_isolate(%s, %s, %s, %s)',

1689 cwd, isolate_file, variables, ignore_broken_items)

1690 relative_base_dir = os.path.dirname(isolate_file)

1691

1692 # Processes the variables and update the saved state.

1693 variables = process_variables(cwd, variables, relative_base_dir)

1694 self.saved_state.update(isolate_file, variables)

1695 variables = self.saved_state.variables

1696

1697 with open(isolate_file, 'r') as f:

1698 # At that point, variables are not replaced yet in command and infiles.

1699 # infiles may contain directory entries and is in posix style.

1700 command, infiles, touched, read_only = load_isolate_for_config(

1701 os.path.dirname(isolate_file), f.read(), variables)

1702 command = [eval_variables(i, variables) for i in command]

1703 infiles = [eval_variables(f, variables) for f in infiles]

1704 touched = [eval_variables(f, variables) for f in touched]

1705 # root_dir is automatically determined by the deepest root accessed with the

1706 # form '../../foo/bar'. Note that path variables must be taken in account

1707 # too, add them as if they were input files.

1708 path_variables = [variables[v] for v in PATH_VARIABLES if v in variables]

1709 root_dir = determine_root_dir(

1710 relative_base_dir, infiles + touched + path_variables)

1711 # The relative directory is automatically determined by the relative path

1712 # between root_dir and the directory containing the .isolate file,

1713 # isolate_base_dir.

1714 relative_cwd = os.path.relpath(relative_base_dir, root_dir)

1715 # Now that we know where the root is, check that the PATH_VARIABLES point

1716 # inside it.

1717 for i in PATH_VARIABLES:

1718 if i in variables:

1719 if not path_starts_with(

1720 root_dir, os.path.join(relative_base_dir, variables[i])):

1721 raise isolateserver.MappingError(

1722 'Path variable %s=%r points outside the inferred root directory'

1723 ' %s' % (i, variables[i], root_dir))

1724 # Normalize the files based to root_dir. It is important to keep the

1725 # trailing os.path.sep at that step.

1726 infiles = [

1727 relpath(normpath(os.path.join(relative_base_dir, f)), root_dir)

1728 for f in infiles

1729 ]

1730 touched = [

1731 relpath(normpath(os.path.join(relative_base_dir, f)), root_dir)

1732 for f in touched

1733 ]

1734 follow_symlinks = variables['OS'] != 'win'

1735 # Expand the directories by listing each file inside. Up to now, trailing

1736 # os.path.sep must be kept. Do not expand 'touched'.

1737 infiles = expand_directories_and_symlinks(

1738 root_dir,

1739 infiles,

1740 lambda x: re.match(r'.*\.(git\|svn\|pyc)$', x),

1741 follow_symlinks,

1742 ignore_broken_items)

1743

1744 # If we ignore broken items then remove any missing touched items.

1745 if ignore_broken_items:

1746 original_touched_count = len(touched)

1747 touched = [touch for touch in touched if os.path.exists(touch)]

1748

1749 if len(touched) != original_touched_count:

1750 logging.info('Removed %d invalid touched entries',

1751 len(touched) - original_touched_count)

1752

1753 # Finally, update the new data to be able to generate the foo.isolated file,

1754 # the file that is used by run_isolated.py.

1755 self.saved_state.update_isolated(

1756 command, infiles, touched, read_only, relative_cwd)

1757 logging.debug(self)

1758

1759 def process_inputs(self, subdir):

1760 """Updates self.saved_state.files with the files' mode and hash.

1761

1762 If \|subdir\| is specified, filters to a subdirectory. The resulting .isolated

1763 file is tainted.

1764

1765 See process_input() for more information.

1766 """

1767 for infile in sorted(self.saved_state.files):

1768 if subdir and not infile.startswith(subdir):

1769 self.saved_state.files.pop(infile)

1770 else:

1771 filepath = os.path.join(self.root_dir, infile)

1772 self.saved_state.files[infile] = process_input(

1773 filepath,

1774 self.saved_state.files[infile],

1775 self.saved_state.read_only,

1776 self.saved_state.variables['OS'],

1777 self.saved_state.algo)

1778

1779 def save_files(self):

1780 """Saves self.saved_state and creates a .isolated file."""

1781 logging.debug('Dumping to %s' % self.isolated_filepath)

1782 self.saved_state.child_isolated_files = chromium_save_isolated(

1783 self.isolated_filepath,

1784 self.saved_state.to_isolated(),

1785 self.saved_state.variables,

1786 self.saved_state.algo)

1787 total_bytes = sum(

1788 i.get('s', 0) for i in self.saved_state.files.itervalues())

1789 if total_bytes:

1790 # TODO(maruel): Stats are missing the .isolated files.

1791 logging.debug('Total size: %d bytes' % total_bytes)

1792 saved_state_file = isolatedfile_to_state(self.isolated_filepath)

1793 logging.debug('Dumping to %s' % saved_state_file)

1794 trace_inputs.write_json(saved_state_file, self.saved_state.flatten(), True)

1795

1796 @property

1797 def root_dir(self):

1798 """Returns the absolute path of the root_dir to reference the .isolate file

1799 via relative_cwd.

1800

1801 So that join(root_dir, relative_cwd, basename(isolate_file)) is equivalent

1802 to isolate_filepath.

1803 """

1804 if not self.saved_state.isolate_file:

1805 raise ExecutionError('Please specify --isolate')

1806 isolate_dir = os.path.dirname(self.saved_state.isolate_filepath)

1807 # Special case '.'.

1808 if self.saved_state.relative_cwd == '.':

1809 root_dir = isolate_dir

1810 else:

1811 if not isolate_dir.endswith(self.saved_state.relative_cwd):

1812 raise ExecutionError(

1813 ('Make sure the .isolate file is in the directory that will be '

1814 'used as the relative directory. It is currently in %s and should '

1815 'be in %s') % (isolate_dir, self.saved_state.relative_cwd))

1816 # Walk back back to the root directory.

1817 root_dir = isolate_dir[:-(len(self.saved_state.relative_cwd) + 1)]

1818 return file_path.get_native_path_case(root_dir)

1819

1820 @property

1821 def resultdir(self):

1822 """Returns the absolute path containing the .isolated file.

1823

1824 It is usually equivalent to the variable PRODUCT_DIR. Uses the .isolated

1825 path as the value.

1826 """

1827 return os.path.dirname(self.isolated_filepath)

1828

1829 def __str__(self):

1830 def indent(data, indent_length):

1831 """Indents text."""

1832 spacing = ' ' * indent_length

1833 return ''.join(spacing + l for l in str(data).splitlines(True))

1834

1835 out = '%s(\n' % self.__class__.__name__

1836 out += ' root_dir: %s\n' % self.root_dir

1837 out += ' saved_state: %s)' % indent(self.saved_state, 2)

1838 return out

1839

1840

1841 def load_complete_state(options, cwd, subdir, skip_update):

1842 """Loads a CompleteState.

1843

1844 This includes data from .isolate and .isolated.state files. Never reads the

1845 .isolated file.

1846

1847 Arguments:

1848 options: Options instance generated with OptionParserIsolate. For either

1849 options.isolate and options.isolated, if the value is set, it is an

1850 absolute path.

1851 cwd: base directory to be used when loading the .isolate file.

1852 subdir: optional argument to only process file in the subdirectory, relative

1853 to CompleteState.root_dir.

1854 skip_update: Skip trying to load the .isolate file and processing the

1855 dependencies. It is useful when not needed, like when tracing.

1856 """

1857 assert not options.isolate or os.path.isabs(options.isolate)

1858 assert not options.isolated or os.path.isabs(options.isolated)

1859 cwd = file_path.get_native_path_case(unicode(cwd))

1860 if options.isolated:

1861 # Load the previous state if it was present. Namely, "foo.isolated.state".

1862 # Note: this call doesn't load the .isolate file.

1863 complete_state = CompleteState.load_files(options.isolated)

1864 else:

1865 # Constructs a dummy object that cannot be saved. Useful for temporary

1866 # commands like 'run'.

1867 complete_state = CompleteState(None, SavedState())

1868

1869 if not options.isolate:

1870 if not complete_state.saved_state.isolate_file:

1871 if not skip_update:

1872 raise ExecutionError('A .isolate file is required.')

1873 isolate = None

1874 else:

1875 isolate = complete_state.saved_state.isolate_filepath

1876 else:

1877 isolate = options.isolate

1878 if complete_state.saved_state.isolate_file:

1879 rel_isolate = safe_relpath(

1880 options.isolate, complete_state.saved_state.isolated_basedir)

1881 if rel_isolate != complete_state.saved_state.isolate_file:

1882 raise ExecutionError(

1883 '%s and %s do not match.' % (

1884 options.isolate, complete_state.saved_state.isolate_file))

1885

1886 if not skip_update:

1887 # Then load the .isolate and expands directories.

1888 complete_state.load_isolate(

1889 cwd, isolate, options.variables, options.ignore_broken_items)

1890

1891 # Regenerate complete_state.saved_state.files.

1892 if subdir:

1893 subdir = unicode(subdir)

1894 subdir = eval_variables(subdir, complete_state.saved_state.variables)

1895 subdir = subdir.replace('/', os.path.sep)

1896

1897 if not skip_update:

1898 complete_state.process_inputs(subdir)

1899 return complete_state

1900

1901

1902 def read_trace_as_isolate_dict(complete_state, trace_blacklist):

1903 """Reads a trace and returns the .isolate dictionary.

1904

1905 Returns exceptions during the log parsing so it can be re-raised.

1906 """

1907 api = trace_inputs.get_api()

1908 logfile = complete_state.isolated_filepath + '.log'

1909 if not os.path.isfile(logfile):

1910 raise ExecutionError(

1911 'No log file \'%s\' to read, did you forget to \'trace\'?' % logfile)

1912 try:

1913 data = api.parse_log(logfile, trace_blacklist, None)

1914 exceptions = [i['exception'] for i in data if 'exception' in i]

1915 results = (i['results'] for i in data if 'results' in i)

1916 results_stripped = (i.strip_root(complete_state.root_dir) for i in results)

1917 files = set(sum((result.existent for result in results_stripped), []))

1918 tracked, touched = split_touched(files)

1919 value = generate_isolate(

1920 tracked,

1921 [],

1922 touched,

1923 complete_state.root_dir,

1924 complete_state.saved_state.variables,

1925 complete_state.saved_state.relative_cwd,

1926 trace_blacklist)

1927 return value, exceptions

1928 except trace_inputs.TracingFailure, e:

1929 raise ExecutionError(

1930 'Reading traces failed for: %s\n%s' %

1931 (' '.join(complete_state.saved_state.command), str(e)))

1932

1933

1934 def print_all(comment, data, stream):

1935 """Prints a complete .isolate file and its top-level file comment into a

1936 stream.

1937 """

1938 if comment:

1939 stream.write(comment)

1940 pretty_print(data, stream)

1941

1942

1943 def merge(complete_state, trace_blacklist):

1944 """Reads a trace and merges it back into the source .isolate file."""

1945 value, exceptions = read_trace_as_isolate_dict(

1946 complete_state, trace_blacklist)

1947

1948 # Now take that data and union it into the original .isolate file.

1949 with open(complete_state.saved_state.isolate_filepath, 'r') as f:

1950 prev_content = f.read()

1951 isolate_dir = os.path.dirname(complete_state.saved_state.isolate_filepath)

1952 prev_config = load_isolate_as_config(

1953 isolate_dir,

1954 eval_content(prev_content),

1955 extract_comment(prev_content))

1956 new_config = load_isolate_as_config(isolate_dir, value, '')

1957 config = union(prev_config, new_config)

1958 data = config.make_isolate_file()

1959 print('Updating %s' % complete_state.saved_state.isolate_file)

1960 with open(complete_state.saved_state.isolate_filepath, 'wb') as f:

1961 print_all(config.file_comment, data, f)

1962 if exceptions:

1963 # It got an exception, raise the first one.

1964 raise \

1965 exceptions[0][0], \

1966 exceptions[0][1], \

1967 exceptions[0][2]

1968

1969

1970 ### Commands.

1971

1972

1973 def CMDarchive(parser, args):

1974 """Creates a .isolated file and uploads the tree to an isolate server.

1975

1976 All the files listed in the .isolated file are put in the isolate server

1977 cache via isolateserver.py.

1978 """

1979 parser.add_option('--subdir', help='Filters to a subdirectory')

1980 options, args = parser.parse_args(args)

1981 if args:

1982 parser.error('Unsupported argument: %s' % args)

1983

1984 with tools.Profiler('GenerateHashtable'):

1985 success = False

1986 try:

1987 complete_state = load_complete_state(

1988 options, os.getcwd(), options.subdir, False)

1989 if not options.outdir:

1990 options.outdir = os.path.join(

1991 os.path.dirname(complete_state.isolated_filepath), 'hashtable')

1992 # Make sure that complete_state isn't modified until save_files() is

1993 # called, because any changes made to it here will propagate to the files

1994 # created (which is probably not intended).

1995 complete_state.save_files()

1996

1997 infiles = complete_state.saved_state.files

1998 # Add all the .isolated files.

1999 isolated_hash = []

2000 isolated_files = [

2001 options.isolated,

2002 ] + complete_state.saved_state.child_isolated_files

2003 for item in isolated_files:

2004 item_path = os.path.join(

2005 os.path.dirname(complete_state.isolated_filepath), item)

2006 # Do not use isolateserver.hash_file() here because the file is

2007 # likely smallish (under 500kb) and its file size is needed.

2008 with open(item_path, 'rb') as f:

2009 content = f.read()

2010 isolated_hash.append(

2011 complete_state.saved_state.algo(content).hexdigest())

2012 isolated_metadata = {

2013 'h': isolated_hash[-1],

2014 's': len(content),

2015 'priority': '0'

2016 }

2017 infiles[item_path] = isolated_metadata

2018

2019 logging.info('Creating content addressed object store with %d item',

2020 len(infiles))

2021

2022 if is_url(options.outdir):

2023 isolateserver.upload_tree(

2024 base_url=options.outdir,

2025 indir=complete_state.root_dir,

2026 infiles=infiles,

2027 namespace='default-gzip')

2028 else:

2029 recreate_tree(

2030 outdir=options.outdir,

2031 indir=complete_state.root_dir,

2032 infiles=infiles,

2033 action=run_isolated.HARDLINK_WITH_FALLBACK,

2034 as_hash=True)

2035 success = True

2036 print('%s %s' % (isolated_hash[0], os.path.basename(options.isolated)))

2037 finally:

2038 # If the command failed, delete the .isolated file if it exists. This is

2039 # important so no stale swarm job is executed.

2040 if not success and os.path.isfile(options.isolated):

2041 os.remove(options.isolated)

2042 return not success

2043

2044

2045 def CMDcheck(parser, args):

2046 """Checks that all the inputs are present and generates .isolated."""

2047 parser.add_option('--subdir', help='Filters to a subdirectory')

2048 options, args = parser.parse_args(args)

2049 if args:

2050 parser.error('Unsupported argument: %s' % args)

2051

2052 complete_state = load_complete_state(

2053 options, os.getcwd(), options.subdir, False)

2054

2055 # Nothing is done specifically. Just store the result and state.

2056 complete_state.save_files()

2057 return 0

2058

2059

2060 CMDhashtable = CMDarchive

2061

2062

2063 def CMDmerge(parser, args):

2064 """Reads and merges the data from the trace back into the original .isolate.

2065

2066 Ignores --outdir.

2067 """

2068 parser.require_isolated = False

2069 add_trace_option(parser)

2070 options, args = parser.parse_args(args)

2071 if args:

2072 parser.error('Unsupported argument: %s' % args)

2073

2074 complete_state = load_complete_state(options, os.getcwd(), None, False)

2075 blacklist = trace_inputs.gen_blacklist(options.trace_blacklist)

2076 merge(complete_state, blacklist)

2077 return 0

2078

2079

2080 def CMDread(parser, args):

2081 """Reads the trace file generated with command 'trace'.

2082

2083 Ignores --outdir.

2084 """

2085 parser.require_isolated = False

2086 add_trace_option(parser)

2087 parser.add_option(

2088 '--skip-refresh', action='store_true',

2089 help='Skip reading .isolate file and do not refresh the hash of '

2090 'dependencies')

2091 parser.add_option(

2092 '-m', '--merge', action='store_true',

2093 help='merge the results back in the .isolate file instead of printing')

2094 options, args = parser.parse_args(args)

2095 if args:

2096 parser.error('Unsupported argument: %s' % args)

2097

2098 complete_state = load_complete_state(

2099 options, os.getcwd(), None, options.skip_refresh)

2100 blacklist = trace_inputs.gen_blacklist(options.trace_blacklist)

2101 value, exceptions = read_trace_as_isolate_dict(complete_state, blacklist)

2102 if options.merge:

2103 merge(complete_state, blacklist)

2104 else:

2105 pretty_print(value, sys.stdout)

2106

2107 if exceptions:

2108 # It got an exception, raise the first one.

2109 raise \

2110 exceptions[0][0], \

2111 exceptions[0][1], \

2112 exceptions[0][2]

2113 return 0

2114

2115

2116 def CMDremap(parser, args):

2117 """Creates a directory with all the dependencies mapped into it.

2118

2119 Useful to test manually why a test is failing. The target executable is not

2120 run.

2121 """

2122 parser.require_isolated = False

2123 options, args = parser.parse_args(args)

2124 if args:

2125 parser.error('Unsupported argument: %s' % args)

2126 complete_state = load_complete_state(options, os.getcwd(), None, False)

2127

2128 if not options.outdir:

2129 options.outdir = run_isolated.make_temp_dir(

2130 'isolate', complete_state.root_dir)

2131 else:

2132 if is_url(options.outdir):

2133 parser.error('Can\'t use url for --outdir with mode remap.')

2134 if not os.path.isdir(options.outdir):

2135 os.makedirs(options.outdir)

2136 print('Remapping into %s' % options.outdir)

2137 if len(os.listdir(options.outdir)):

2138 raise ExecutionError('Can\'t remap in a non-empty directory')

2139 recreate_tree(

2140 outdir=options.outdir,

2141 indir=complete_state.root_dir,

2142 infiles=complete_state.saved_state.files,

2143 action=run_isolated.HARDLINK_WITH_FALLBACK,

2144 as_hash=False)

2145 if complete_state.saved_state.read_only:

2146 run_isolated.make_writable(options.outdir, True)

2147

2148 if complete_state.isolated_filepath:

2149 complete_state.save_files()

2150 return 0

2151

2152

2153 def CMDrewrite(parser, args):

2154 """Rewrites a .isolate file into the canonical format."""

2155 parser.require_isolated = False

2156 options, args = parser.parse_args(args)

2157 if args:

2158 parser.error('Unsupported argument: %s' % args)

2159

2160 if options.isolated:

2161 # Load the previous state if it was present. Namely, "foo.isolated.state".

2162 complete_state = CompleteState.load_files(options.isolated)

2163 isolate = options.isolate or complete_state.saved_state.isolate_filepath

2164 else:

2165 isolate = options.isolate

2166 if not isolate:

2167 parser.error('--isolate is required.')

2168

2169 with open(isolate, 'r') as f:

2170 content = f.read()

2171 config = load_isolate_as_config(

2172 os.path.dirname(os.path.abspath(isolate)),

2173 eval_content(content),

2174 extract_comment(content))

2175 data = config.make_isolate_file()

2176 print('Updating %s' % isolate)

2177 with open(isolate, 'wb') as f:

2178 print_all(config.file_comment, data, f)

2179 return 0

2180

2181

2182 @subcommand.usage('-- [extra arguments]')

2183 def CMDrun(parser, args):

2184 """Runs the test executable in an isolated (temporary) directory.

2185

2186 All the dependencies are mapped into the temporary directory and the

2187 directory is cleaned up after the target exits. Warning: if --outdir is

2188 specified, it is deleted upon exit.

2189

2190 Argument processing stops at -- and these arguments are appended to the

2191 command line of the target to run. For example, use:

2192 isolate.py run --isolated foo.isolated -- --gtest_filter=Foo.Bar

2193 """

2194 parser.require_isolated = False

2195 parser.add_option(

2196 '--skip-refresh', action='store_true',

2197 help='Skip reading .isolate file and do not refresh the hash of '

2198 'dependencies')

2199 options, args = parser.parse_args(args)

2200 if options.outdir and is_url(options.outdir):

2201 parser.error('Can\'t use url for --outdir with mode run.')

2202

2203 complete_state = load_complete_state(

2204 options, os.getcwd(), None, options.skip_refresh)

2205 cmd = complete_state.saved_state.command + args

2206 if not cmd:

2207 raise ExecutionError('No command to run.')

2208

2209 cmd = tools.fix_python_path(cmd)

2210 try:

2211 root_dir = complete_state.root_dir

2212 if not options.outdir:

2213 if not os.path.isabs(root_dir):

2214 root_dir = os.path.join(os.path.dirname(options.isolated), root_dir)

2215 options.outdir = run_isolated.make_temp_dir('isolate', root_dir)

2216 else:

2217 if not os.path.isdir(options.outdir):

2218 os.makedirs(options.outdir)

2219 recreate_tree(

2220 outdir=options.outdir,

2221 indir=root_dir,

2222 infiles=complete_state.saved_state.files,

2223 action=run_isolated.HARDLINK_WITH_FALLBACK,

2224 as_hash=False)

2225 cwd = os.path.normpath(

2226 os.path.join(options.outdir, complete_state.saved_state.relative_cwd))

2227 if not os.path.isdir(cwd):

2228 # It can happen when no files are mapped from the directory containing the

2229 # .isolate file. But the directory must exist to be the current working

2230 # directory.

2231 os.makedirs(cwd)

2232 if complete_state.saved_state.read_only:

2233 run_isolated.make_writable(options.outdir, True)

2234 logging.info('Running %s, cwd=%s' % (cmd, cwd))

2235 result = subprocess.call(cmd, cwd=cwd)

2236 finally:

2237 if options.outdir:

2238 run_isolated.rmtree(options.outdir)

2239

2240 if complete_state.isolated_filepath:

2241 complete_state.save_files()

2242 return result

2243

2244

2245 @subcommand.usage('-- [extra arguments]')

2246 def CMDtrace(parser, args):

2247 """Traces the target using trace_inputs.py.

2248

2249 It runs the executable without remapping it, and traces all the files it and

2250 its child processes access. Then the 'merge' command can be used to generate

2251 an updated .isolate file out of it or the 'read' command to print it out to

2252 stdout.

2253

2254 Argument processing stops at -- and these arguments are appended to the

2255 command line of the target to run. For example, use:

2256 isolate.py trace --isolated foo.isolated -- --gtest_filter=Foo.Bar

2257 """

2258 add_trace_option(parser)

2259 parser.add_option(

2260 '-m', '--merge', action='store_true',

2261 help='After tracing, merge the results back in the .isolate file')

2262 parser.add_option(

2263 '--skip-refresh', action='store_true',

2264 help='Skip reading .isolate file and do not refresh the hash of '

2265 'dependencies')

2266 options, args = parser.parse_args(args)

2267

2268 complete_state = load_complete_state(

2269 options, os.getcwd(), None, options.skip_refresh)

2270 cmd = complete_state.saved_state.command + args

2271 if not cmd:

2272 raise ExecutionError('No command to run.')

2273 cmd = tools.fix_python_path(cmd)

2274 cwd = os.path.normpath(os.path.join(

2275 unicode(complete_state.root_dir),

2276 complete_state.saved_state.relative_cwd))

2277 cmd[0] = os.path.normpath(os.path.join(cwd, cmd[0]))

2278 if not os.path.isfile(cmd[0]):

2279 raise ExecutionError(

2280 'Tracing failed for: %s\nIt doesn\'t exit' % ' '.join(cmd))

2281 logging.info('Running %s, cwd=%s' % (cmd, cwd))

2282 api = trace_inputs.get_api()

2283 logfile = complete_state.isolated_filepath + '.log'

2284 api.clean_trace(logfile)

2285 out = None

2286 try:

2287 with api.get_tracer(logfile) as tracer:

2288 result, out = tracer.trace(

2289 cmd,

2290 cwd,

2291 'default',

2292 True)

2293 except trace_inputs.TracingFailure, e:

2294 raise ExecutionError('Tracing failed for: %s\n%s' % (' '.join(cmd), str(e)))

2295

2296 if result:

2297 logging.error(

2298 'Tracer exited with %d, which means the tests probably failed so the '

2299 'trace is probably incomplete.', result)

2300 logging.info(out)

2301

2302 complete_state.save_files()

2303

2304 if options.merge:

2305 blacklist = trace_inputs.gen_blacklist(options.trace_blacklist)

2306 merge(complete_state, blacklist)

2307

2308 return result

2309

2310

2311 def _process_variable_arg(_option, _opt, _value, parser):

2312 if not parser.rargs:

2313 raise optparse.OptionValueError(

2314 'Please use --variable FOO=BAR or --variable FOO BAR')

2315 k = parser.rargs.pop(0)

2316 if '=' in k:

2317 parser.values.variables.append(tuple(k.split('=', 1)))

2318 else:

2319 if not parser.rargs:

2320 raise optparse.OptionValueError(

2321 'Please use --variable FOO=BAR or --variable FOO BAR')

2322 v = parser.rargs.pop(0)

2323 parser.values.variables.append((k, v))

2324

2325

2326 def add_variable_option(parser):

2327 """Adds --isolated and --variable to an OptionParser."""

2328 parser.add_option(

2329 '-s', '--isolated',

2330 metavar='FILE',

2331 help='.isolated file to generate or read')

2332 # Keep for compatibility. TODO(maruel): Remove once not used anymore.

2333 parser.add_option(

2334 '-r', '--result',

2335 dest='isolated',

2336 help=optparse.SUPPRESS_HELP)

2337 default_variables = [('OS', get_flavor())]

2338 if sys.platform in ('win32', 'cygwin'):

2339 default_variables.append(('EXECUTABLE_SUFFIX', '.exe'))

2340 else:

2341 default_variables.append(('EXECUTABLE_SUFFIX', ''))

2342 parser.add_option(

2343 '-V', '--variable',

2344 action='callback',

2345 callback=_process_variable_arg,

2346 default=default_variables,

2347 dest='variables',

2348 metavar='FOO BAR',

2349 help='Variables to process in the .isolate file, default: %default. '

2350 'Variables are persistent accross calls, they are saved inside '

2351 '<.isolated>.state')

2352

2353

2354 def add_trace_option(parser):

2355 """Adds --trace-blacklist to the parser."""

2356 parser.add_option(

2357 '--trace-blacklist',

2358 action='append', default=list(DEFAULT_BLACKLIST),

2359 help='List of regexp to use as blacklist filter for files to consider '

2360 'important, not to be confused with --blacklist which blacklists '

2361 'test case.')

2362

2363

2364 def parse_isolated_option(parser, options, cwd, require_isolated):

2365 """Processes --isolated."""

2366 if options.isolated:

2367 options.isolated = os.path.normpath(

2368 os.path.join(cwd, options.isolated.replace('/', os.path.sep)))

2369 if require_isolated and not options.isolated:

2370 parser.error('--isolated is required.')

2371 if options.isolated and not options.isolated.endswith('.isolated'):

2372 parser.error('--isolated value must end with \'.isolated\'')

2373

2374

2375 def parse_variable_option(options):

2376 """Processes --variable."""

2377 # TODO(benrg): Maybe we should use a copy of gyp's NameValueListToDict here,

2378 # but it wouldn't be backward compatible.

2379 def try_make_int(s):

2380 """Converts a value to int if possible, converts to unicode otherwise."""

2381 try:

2382 return int(s)

2383 except ValueError:

2384 return s.decode('utf-8')

2385 options.variables = dict((k, try_make_int(v)) for k, v in options.variables)

2386

2387

2388 class OptionParserIsolate(tools.OptionParserWithLogging):

2389 """Adds automatic --isolate, --isolated, --out and --variable handling."""

2390 # Set it to False if it is not required, e.g. it can be passed on but do not

2391 # fail if not given.

2392 require_isolated = True

2393

2394 def __init__(self, **kwargs):

2395 tools.OptionParserWithLogging.__init__(

2396 self,

2397 verbose=int(os.environ.get('ISOLATE_DEBUG', 0)),

2398 **kwargs)

2399 group = optparse.OptionGroup(self, "Common options")

2400 group.add_option(

2401 '-i', '--isolate',

2402 metavar='FILE',

2403 help='.isolate file to load the dependency data from')

2404 add_variable_option(group)

2405 group.add_option(

2406 '-o', '--outdir', metavar='DIR',

2407 help='Directory used to recreate the tree or store the hash table. '

2408 'Defaults: run\|remap: a /tmp subdirectory, others: '

2409 'defaults to the directory containing --isolated')

2410 group.add_option(

2411 '--ignore_broken_items', action='store_true',

2412 default=bool(os.environ.get('ISOLATE_IGNORE_BROKEN_ITEMS')),

2413 help='Indicates that invalid entries in the isolated file to be '

2414 'only be logged and not stop processing. Defaults to True if '

2415 'env var ISOLATE_IGNORE_BROKEN_ITEMS is set')

2416 self.add_option_group(group)

2417

2418 def parse_args(self, args, *kwargs):

2419 """Makes sure the paths make sense.

2420

2421 On Windows, / and \ are often mixed together in a path.

2422 """

2423 options, args = tools.OptionParserWithLogging.parse_args(

2424 self, args, *kwargs)

2425 if not self.allow_interspersed_args and args:

2426 self.error('Unsupported argument: %s' % args)

2427

2428 cwd = file_path.get_native_path_case(unicode(os.getcwd()))

2429 parse_isolated_option(self, options, cwd, self.require_isolated)

2430 parse_variable_option(options)

2431

2432 if options.isolate:

2433 # TODO(maruel): Work with non-ASCII.

2434 # The path must be in native path case for tracing purposes.

2435 options.isolate = unicode(options.isolate).replace('/', os.path.sep)

2436 options.isolate = os.path.normpath(os.path.join(cwd, options.isolate))

2437 options.isolate = file_path.get_native_path_case(options.isolate)

2438

2439 if options.outdir and not is_url(options.outdir):

2440 options.outdir = unicode(options.outdir).replace('/', os.path.sep)

2441 # outdir doesn't need native path case since tracing is never done from

2442 # there.

2443 options.outdir = os.path.normpath(os.path.join(cwd, options.outdir))

2444

2445 return options, args

2446

2447

2448 def main(argv):

2449 dispatcher = subcommand.CommandDispatcher(__name__)

2450 try:

2451 return dispatcher.execute(OptionParserIsolate(version=__version__), argv)

2452 except Exception as e:

2453 tools.report_error(e)

2454 return 1

2455

2456

2457 if __name__ == '__main__':

2458 fix_encoding.fix_encoding()

2459 tools.disable_buffering()

2460 colorama.init()

2461 sys.exit(main(sys.argv[1:]))

OLD	NEW

« no previous file with comments | « swarm_client/googletest/trace_test_cases.py ('k') | swarm_client/isolate_merge.py » ('j') | no next file with comments »