Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(429)

Side by Side Diff: isolate.py

Issue 11048019: Add everything from src/tools/isolate r159537. (Closed) Base URL: https://git.chromium.org/chromium/tools/swarm_client.git@master
Patch Set: Ensure --similarity is sticky Created 8 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « fix_test_cases.py ('k') | isolate_merge.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Front end tool to manage .isolate files and corresponding tests.
7
8 Run ./isolate.py --help for more detailed information.
9
10 See more information at
11 http://dev.chromium.org/developers/testing/isolated-testing
12 """
13
14 import binascii
15 import copy
16 import hashlib
17 import logging
18 import optparse
19 import os
20 import posixpath
21 import re
22 import stat
23 import subprocess
24 import sys
25 import time
26 import urllib
27 import urllib2
28
29 import run_swarm_step
30 import trace_inputs
31
32 # Import here directly so isolate is easier to use as a library.
33 from run_swarm_step import get_flavor
34
35
36 # Used by process_input().
37 NO_INFO, STATS_ONLY, WITH_HASH = range(56, 59)
38 SHA_1_NULL = hashlib.sha1().hexdigest()
39
40 PATH_VARIABLES = ('DEPTH', 'PRODUCT_DIR')
41 DEFAULT_OSES = ('linux', 'mac', 'win')
42
43 # Files that should be 0-length when mapped.
44 KEY_TOUCHED = 'isolate_dependency_touched'
45 # Files that should be tracked by the build tool.
46 KEY_TRACKED = 'isolate_dependency_tracked'
47 # Files that should not be tracked by the build tool.
48 KEY_UNTRACKED = 'isolate_dependency_untracked'
49
50 _GIT_PATH = os.path.sep + '.git'
51 _SVN_PATH = os.path.sep + '.svn'
52
53 # The maximum number of upload attempts to try when uploading a single file.
54 MAX_UPLOAD_ATTEMPTS = 5
55
56 # The minimum size of files to upload directly to the blobstore.
57 MIN_SIZE_FOR_DIRECT_BLOBSTORE = 20 * 8
58
59
60 class ExecutionError(Exception):
61 """A generic error occurred."""
62 def __str__(self):
63 return self.args[0]
64
65
66 ### Path handling code.
67
68
69 def relpath(path, root):
70 """os.path.relpath() that keeps trailing os.path.sep."""
71 out = os.path.relpath(path, root)
72 if path.endswith(os.path.sep):
73 out += os.path.sep
74 return out
75
76
77 def normpath(path):
78 """os.path.normpath() that keeps trailing os.path.sep."""
79 out = os.path.normpath(path)
80 if path.endswith(os.path.sep):
81 out += os.path.sep
82 return out
83
84
85 def posix_relpath(path, root):
86 """posix.relpath() that keeps trailing slash."""
87 out = posixpath.relpath(path, root)
88 if path.endswith('/'):
89 out += '/'
90 return out
91
92
93 def cleanup_path(x):
94 """Cleans up a relative path. Converts any os.path.sep to '/' on Windows."""
95 if x:
96 x = x.rstrip(os.path.sep).replace(os.path.sep, '/')
97 if x == '.':
98 x = ''
99 if x:
100 x += '/'
101 return x
102
103
104 def default_blacklist(f):
105 """Filters unimportant files normally ignored."""
106 return (
107 f.endswith(('.pyc', '.run_test_cases', 'testserver.log')) or
108 _GIT_PATH in f or
109 _SVN_PATH in f or
110 f in ('.git', '.svn'))
111
112
113 def expand_directory_and_symlink(indir, relfile, blacklist):
114 """Expands a single input. It can result in multiple outputs.
115
116 This function is recursive when relfile is a directory or a symlink.
117
118 Note: this code doesn't properly handle recursive symlink like one created
119 with:
120 ln -s .. foo
121 """
122 if os.path.isabs(relfile):
123 raise run_swarm_step.MappingError(
124 'Can\'t map absolute path %s' % relfile)
125
126 infile = normpath(os.path.join(indir, relfile))
127 if not infile.startswith(indir):
128 raise run_swarm_step.MappingError(
129 'Can\'t map file %s outside %s' % (infile, indir))
130
131 if sys.platform != 'win32':
132 # Look if any item in relfile is a symlink.
133 base, symlink, rest = trace_inputs.split_at_symlink(indir, relfile)
134 if symlink:
135 # Append everything pointed by the symlink. If the symlink is recursive,
136 # this code blows up.
137 symlink_relfile = os.path.join(base, symlink)
138 symlink_path = os.path.join(indir, symlink_relfile)
139 pointed = os.readlink(symlink_path)
140 dest_infile = normpath(
141 os.path.join(os.path.dirname(symlink_path), pointed))
142 if rest:
143 dest_infile = trace_inputs.safe_join(dest_infile, rest)
144 if not dest_infile.startswith(indir):
145 raise run_swarm_step.MappingError(
146 'Can\'t map symlink reference %s (from %s) ->%s outside of %s' %
147 (symlink_relfile, relfile, dest_infile, indir))
148 if infile.startswith(dest_infile):
149 raise run_swarm_step.MappingError(
150 'Can\'t map recursive symlink reference %s->%s' %
151 (symlink_relfile, dest_infile))
152 dest_relfile = dest_infile[len(indir)+1:]
153 logging.info('Found symlink: %s -> %s' % (symlink_relfile, dest_relfile))
154 out = expand_directory_and_symlink(indir, dest_relfile, blacklist)
155 # Add the symlink itself.
156 out.append(symlink_relfile)
157 return out
158
159 if relfile.endswith(os.path.sep):
160 if not os.path.isdir(infile):
161 raise run_swarm_step.MappingError(
162 '%s is not a directory but ends with "%s"' % (infile, os.path.sep))
163
164 outfiles = []
165 for filename in os.listdir(infile):
166 inner_relfile = os.path.join(relfile, filename)
167 if blacklist(inner_relfile):
168 continue
169 if os.path.isdir(os.path.join(indir, inner_relfile)):
170 inner_relfile += os.path.sep
171 outfiles.extend(
172 expand_directory_and_symlink(indir, inner_relfile, blacklist))
173 return outfiles
174 else:
175 # Always add individual files even if they were blacklisted.
176 if os.path.isdir(infile):
177 raise run_swarm_step.MappingError(
178 'Input directory %s must have a trailing slash' % infile)
179
180 if not os.path.isfile(infile):
181 raise run_swarm_step.MappingError(
182 'Input file %s doesn\'t exist' % infile)
183
184 return [relfile]
185
186
187 def expand_directories_and_symlinks(indir, infiles, blacklist):
188 """Expands the directories and the symlinks, applies the blacklist and
189 verifies files exist.
190
191 Files are specified in os native path separator.
192 """
193 outfiles = []
194 for relfile in infiles:
195 outfiles.extend(expand_directory_and_symlink(indir, relfile, blacklist))
196 return outfiles
197
198
199 def recreate_tree(outdir, indir, infiles, action, as_sha1):
200 """Creates a new tree with only the input files in it.
201
202 Arguments:
203 outdir: Output directory to create the files in.
204 indir: Root directory the infiles are based in.
205 infiles: dict of files to map from |indir| to |outdir|.
206 action: See assert below.
207 as_sha1: Output filename is the sha1 instead of relfile.
208 """
209 logging.info(
210 'recreate_tree(outdir=%s, indir=%s, files=%d, action=%s, as_sha1=%s)' %
211 (outdir, indir, len(infiles), action, as_sha1))
212
213 assert action in (
214 run_swarm_step.HARDLINK,
215 run_swarm_step.SYMLINK,
216 run_swarm_step.COPY)
217 outdir = os.path.normpath(outdir)
218 if not os.path.isdir(outdir):
219 logging.info ('Creating %s' % outdir)
220 os.makedirs(outdir)
221 # Do not call abspath until the directory exists.
222 outdir = os.path.abspath(outdir)
223
224 for relfile, metadata in infiles.iteritems():
225 infile = os.path.join(indir, relfile)
226 if as_sha1:
227 # Do the hashtable specific checks.
228 if 'link' in metadata:
229 # Skip links when storing a hashtable.
230 continue
231 outfile = os.path.join(outdir, metadata['sha-1'])
232 if os.path.isfile(outfile):
233 # Just do a quick check that the file size matches. No need to stat()
234 # again the input file, grab the value from the dict.
235 if metadata['size'] == os.stat(outfile).st_size:
236 continue
237 else:
238 logging.warn('Overwritting %s' % metadata['sha-1'])
239 os.remove(outfile)
240 else:
241 outfile = os.path.join(outdir, relfile)
242 outsubdir = os.path.dirname(outfile)
243 if not os.path.isdir(outsubdir):
244 os.makedirs(outsubdir)
245
246 # TODO(csharp): Fix crbug.com/150823 and enable the touched logic again.
247 # if metadata.get('touched_only') == True:
248 # open(outfile, 'ab').close()
249 if 'link' in metadata:
250 pointed = metadata['link']
251 logging.debug('Symlink: %s -> %s' % (outfile, pointed))
252 os.symlink(pointed, outfile)
253 else:
254 run_swarm_step.link_file(outfile, infile, action)
255
256
257 def encode_multipart_formdata(fields, files,
258 mime_mapper=lambda _: 'application/octet-stream'):
259 """Encodes a Multipart form data object.
260
261 Args:
262 fields: a sequence (name, value) elements for
263 regular form fields.
264 files: a sequence of (name, filename, value) elements for data to be
265 uploaded as files.
266 mime_mapper: function to return the mime type from the filename.
267 Returns:
268 content_type: for httplib.HTTP instance
269 body: for httplib.HTTP instance
270 """
271 boundary = hashlib.md5(str(time.time())).hexdigest()
272 body_list = []
273 for (key, value) in fields:
274 body_list.append('--' + boundary)
275 body_list.append('Content-Disposition: form-data; name="%s"' % key)
276 body_list.append('')
277 body_list.append(value)
278 body_list.append('--' + boundary)
279 body_list.append('')
280 for (key, filename, value) in files:
281 body_list.append('--' + boundary)
282 body_list.append('Content-Disposition: form-data; name="%s"; '
283 'filename="%s"' % (key, filename))
284 body_list.append('Content-Type: %s' % mime_mapper(filename))
285 body_list.append('')
286 body_list.append(value)
287 body_list.append('--' + boundary)
288 body_list.append('')
289 if body_list:
290 body_list[-2] += '--'
291 body = '\r\n'.join(body_list)
292 content_type = 'multipart/form-data; boundary=%s' % boundary
293 return content_type, body
294
295
296 def upload_hash_content(url, params=None, payload=None,
297 content_type='application/octet-stream'):
298 """Uploads the given hash contents.
299
300 Arguments:
301 url: The url to upload the hash contents to.
302 params: The params to include with the upload.
303 payload: The data to upload.
304 content_type: The content_type of the data being uploaded.
305 """
306 if params:
307 url = url + '?' + urllib.urlencode(params)
308 request = urllib2.Request(url, data=payload)
309 request.add_header('Content-Type', content_type)
310 request.add_header('Content-Length', len(payload or ''))
311
312 return urllib2.urlopen(request)
313
314
315 def upload_hash_content_to_blobstore(generate_upload_url, params,
316 hash_data):
317 """Uploads the given hash contents directly to the blobsotre via a generated
318 url.
319
320 Arguments:
321 generate_upload_url: The url to get the new upload url from.
322 params: The params to include with the upload.
323 hash_contents: The contents to upload.
324 """
325 content_type, body = encode_multipart_formdata(
326 params.items(), [('hash_contents', 'hash_contest', hash_data)])
327
328 logging.debug('Generating url to directly upload file to blobstore')
329 response = urllib2.urlopen(generate_upload_url)
330 upload_url = response.read()
331
332 if not upload_url:
333 logging.error('Unable to generate upload url')
334 return
335
336 return upload_hash_content(upload_url, payload=body,
337 content_type=content_type)
338
339
340 class UploadRemote(run_swarm_step.Remote):
341 @staticmethod
342 def get_file_handler(base_url):
343 def upload_file(hash_data, hash_key):
344 params = {'hash_key': hash_key}
345 if len(hash_data) > MIN_SIZE_FOR_DIRECT_BLOBSTORE:
346 upload_hash_content_to_blobstore(
347 base_url.rstrip('/') + '/content/generate_blobstore_url',
348 params, hash_data)
349 else:
350 upload_hash_content(
351 base_url.rstrip('/') + '/content/store', params, hash_data)
352 return upload_file
353
354
355 def url_open(url, data=None, max_retries=MAX_UPLOAD_ATTEMPTS):
356 """Opens the given url with the given data, repeating up to max_retries
357 times if it encounters an error.
358
359 Arguments:
360 url: The url to open.
361 data: The data to send to the url.
362 max_retries: The maximum number of times to try connecting to the url.
363
364 Returns:
365 The response from the url, or it raises an exception it it failed to get
366 a response.
367 """
368 for _ in range(max_retries):
369 try:
370 response = urllib2.urlopen(url, data=data)
371 except urllib2.URLError as e:
372 logging.warning('Unable to connect to %s, error msg: %s', url, e)
373 time.sleep(1)
374
375 # If we get no response from the server after max_retries, assume it
376 # is down and raise an exception
377 if response is None:
378 raise run_swarm_step.MappingError('Unable to connect to server, %s, '
379 'to see which files are presents' %
380 url)
381
382 return response
383
384
385 def update_files_to_upload(query_url, queries, files_to_upload):
386 """Queries the server to see which files from this batch already exist there.
387
388 Arguments:
389 queries: The hash files to potential upload to the server.
390 files_to_upload: Any new files that need to be upload are added to
391 this list.
392 """
393 body = ''.join(
394 (binascii.unhexlify(meta_data['sha-1']) for (_, meta_data) in queries))
395 response = url_open(query_url, data=body).read()
396 if len(queries) != len(response):
397 raise run_swarm_step.MappingError(
398 'Got an incorrect number of responses from the server. Expected %d, '
399 'but got %d' % (len(queries), len(response)))
400
401 for i in range(len(response)):
402 if response[i] == chr(0):
403 files_to_upload.append(queries[i])
404 else:
405 logging.debug('Hash for %s already exists on the server, no need '
406 'to upload again', queries[i][0])
407
408
409 def upload_sha1_tree(base_url, indir, infiles):
410 """Uploads the given tree to the given url.
411
412 Arguments:
413 base_url: The base url, it is assume that |base_url|/has/ can be used to
414 query if an element was already uploaded, and |base_url|/store/
415 can be used to upload a new element.
416 indir: Root directory the infiles are based in.
417 infiles: dict of files to map from |indir| to |outdir|.
418 """
419 logging.info('upload tree(base_url=%s, indir=%s, files=%d)' %
420 (base_url, indir, len(infiles)))
421
422 # Generate the list of files that need to be uploaded (since some may already
423 # be on the server.
424 base_url = base_url.rstrip('/')
425 contains_hash_url = base_url + '/content/contains'
426 to_upload = []
427 next_queries = []
428 for relfile, metadata in infiles.iteritems():
429 if 'link' in metadata:
430 # Skip links when uploading.
431 continue
432
433 next_queries.append((relfile, metadata))
434 if len(next_queries) == 1000:
435 update_files_to_upload(contains_hash_url, next_queries, to_upload)
436 next_queries = []
437
438 if next_queries:
439 update_files_to_upload(contains_hash_url, next_queries, to_upload)
440
441
442 # Upload the required files.
443 remote_uploader = UploadRemote(base_url)
444 for relfile, metadata in to_upload:
445 # TODO(csharp): Fix crbug.com/150823 and enable the touched logic again.
446 # if metadata.get('touched_only') == True:
447 # hash_data = ''
448 infile = os.path.join(indir, relfile)
449 with open(infile, 'rb') as f:
450 hash_data = f.read()
451 remote_uploader.add_item(run_swarm_step.Remote.MED,
452 hash_data,
453 metadata['sha-1'])
454 remote_uploader.join()
455
456 exception = remote_uploader.next_exception()
457 if exception:
458 while exception:
459 logging.error('Error uploading file to server:\n%s', exception[1])
460 exception = remote_uploader.next_exception()
461 raise run_swarm_step.MappingError(
462 'Encountered errors uploading hash contents to server. See logs for '
463 'exact failures')
464
465
466 def process_input(filepath, prevdict, level, read_only):
467 """Processes an input file, a dependency, and return meta data about it.
468
469 Arguments:
470 - filepath: File to act on.
471 - prevdict: the previous dictionary. It is used to retrieve the cached sha-1
472 to skip recalculating the hash.
473 - level: determines the amount of information retrieved.
474 - read_only: If True, the file mode is manipulated. In practice, only save
475 one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On
476 windows, mode is not set since all files are 'executable' by
477 default.
478
479 Behaviors:
480 - NO_INFO retrieves no information.
481 - STATS_ONLY retrieves the file mode, file size, file timestamp, file link
482 destination if it is a file link.
483 - WITH_HASH retrieves all of STATS_ONLY plus the sha-1 of the content of the
484 file.
485 """
486 assert level in (NO_INFO, STATS_ONLY, WITH_HASH)
487 out = {}
488 # TODO(csharp): Fix crbug.com/150823 and enable the touched logic again.
489 # if prevdict.get('touched_only') == True:
490 # # The file's content is ignored. Skip the time and hard code mode.
491 # if get_flavor() != 'win':
492 # out['mode'] = stat.S_IRUSR | stat.S_IRGRP
493 # out['size'] = 0
494 # out['sha-1'] = SHA_1_NULL
495 # out['touched_only'] = True
496 # return out
497
498 if level >= STATS_ONLY:
499 try:
500 filestats = os.lstat(filepath)
501 except OSError:
502 # The file is not present.
503 raise run_swarm_step.MappingError('%s is missing' % filepath)
504 is_link = stat.S_ISLNK(filestats.st_mode)
505 if get_flavor() != 'win':
506 # Ignore file mode on Windows since it's not really useful there.
507 filemode = stat.S_IMODE(filestats.st_mode)
508 # Remove write access for group and all access to 'others'.
509 filemode &= ~(stat.S_IWGRP | stat.S_IRWXO)
510 if read_only:
511 filemode &= ~stat.S_IWUSR
512 if filemode & stat.S_IXUSR:
513 filemode |= stat.S_IXGRP
514 else:
515 filemode &= ~stat.S_IXGRP
516 out['mode'] = filemode
517 if not is_link:
518 out['size'] = filestats.st_size
519 # Used to skip recalculating the hash. Use the most recent update time.
520 out['timestamp'] = int(round(filestats.st_mtime))
521 # If the timestamp wasn't updated, carry on the sha-1.
522 if prevdict.get('timestamp') == out['timestamp']:
523 if 'sha-1' in prevdict:
524 # Reuse the previous hash.
525 out['sha-1'] = prevdict['sha-1']
526 if 'link' in prevdict:
527 # Reuse the previous link destination.
528 out['link'] = prevdict['link']
529 if is_link and not 'link' in out:
530 # A symlink, store the link destination.
531 out['link'] = os.readlink(filepath)
532
533 if level >= WITH_HASH and not out.get('sha-1') and not out.get('link'):
534 if not is_link:
535 with open(filepath, 'rb') as f:
536 out['sha-1'] = hashlib.sha1(f.read()).hexdigest()
537 return out
538
539
540 ### Variable stuff.
541
542
543 def result_to_state(filename):
544 """Replaces the file's extension."""
545 return filename.rsplit('.', 1)[0] + '.state'
546
547
548 def determine_root_dir(relative_root, infiles):
549 """For a list of infiles, determines the deepest root directory that is
550 referenced indirectly.
551
552 All arguments must be using os.path.sep.
553 """
554 # The trick used to determine the root directory is to look at "how far" back
555 # up it is looking up.
556 deepest_root = relative_root
557 for i in infiles:
558 x = relative_root
559 while i.startswith('..' + os.path.sep):
560 i = i[3:]
561 assert not i.startswith(os.path.sep)
562 x = os.path.dirname(x)
563 if deepest_root.startswith(x):
564 deepest_root = x
565 logging.debug(
566 'determine_root_dir(%s, %d files) -> %s' % (
567 relative_root, len(infiles), deepest_root))
568 return deepest_root
569
570
571 def replace_variable(part, variables):
572 m = re.match(r'<\(([A-Z_]+)\)', part)
573 if m:
574 if m.group(1) not in variables:
575 raise ExecutionError(
576 'Variable "%s" was not found in %s.\nDid you forget to specify '
577 '--variable?' % (m.group(1), variables))
578 return variables[m.group(1)]
579 return part
580
581
582 def process_variables(variables, relative_base_dir):
583 """Processes path variables as a special case and returns a copy of the dict.
584
585 For each 'path' variable: first normalizes it, verifies it exists, converts it
586 to an absolute path, then sets it as relative to relative_base_dir.
587 """
588 variables = variables.copy()
589 for i in PATH_VARIABLES:
590 if i not in variables:
591 continue
592 variable = os.path.normpath(variables[i])
593 if not os.path.isdir(variable):
594 raise ExecutionError('%s=%s is not a directory' % (i, variable))
595 # Variables could contain / or \ on windows. Always normalize to
596 # os.path.sep.
597 variable = os.path.abspath(variable.replace('/', os.path.sep))
598 # All variables are relative to the .isolate file.
599 variables[i] = os.path.relpath(variable, relative_base_dir)
600 return variables
601
602
603 def eval_variables(item, variables):
604 """Replaces the .isolate variables in a string item.
605
606 Note that the .isolate format is a subset of the .gyp dialect.
607 """
608 return ''.join(
609 replace_variable(p, variables) for p in re.split(r'(<\([A-Z_]+\))', item))
610
611
612 def classify_files(root_dir, tracked, untracked):
613 """Converts the list of files into a .isolate 'variables' dictionary.
614
615 Arguments:
616 - tracked: list of files names to generate a dictionary out of that should
617 probably be tracked.
618 - untracked: list of files names that must not be tracked.
619 """
620 # These directories are not guaranteed to be always present on every builder.
621 OPTIONAL_DIRECTORIES = (
622 'test/data/plugin',
623 'third_party/WebKit/LayoutTests',
624 )
625
626 new_tracked = []
627 new_untracked = list(untracked)
628
629 def should_be_tracked(filepath):
630 """Returns True if it is a file without whitespace in a non-optional
631 directory that has no symlink in its path.
632 """
633 if filepath.endswith('/'):
634 return False
635 if ' ' in filepath:
636 return False
637 if any(i in filepath for i in OPTIONAL_DIRECTORIES):
638 return False
639 # Look if any element in the path is a symlink.
640 split = filepath.split('/')
641 for i in range(len(split)):
642 if os.path.islink(os.path.join(root_dir, '/'.join(split[:i+1]))):
643 return False
644 return True
645
646 for filepath in sorted(tracked):
647 if should_be_tracked(filepath):
648 new_tracked.append(filepath)
649 else:
650 # Anything else.
651 new_untracked.append(filepath)
652
653 variables = {}
654 if new_tracked:
655 variables[KEY_TRACKED] = sorted(new_tracked)
656 if new_untracked:
657 variables[KEY_UNTRACKED] = sorted(new_untracked)
658 return variables
659
660
661 def generate_simplified(
662 tracked, untracked, touched, root_dir, variables, relative_cwd):
663 """Generates a clean and complete .isolate 'variables' dictionary.
664
665 Cleans up and extracts only files from within root_dir then processes
666 variables and relative_cwd.
667 """
668 logging.info(
669 'generate_simplified(%d files, %s, %s, %s)' %
670 (len(tracked) + len(untracked) + len(touched),
671 root_dir, variables, relative_cwd))
672 # Constants.
673 # Skip log in PRODUCT_DIR. Note that these are applied on '/' style path
674 # separator.
675 LOG_FILE = re.compile(r'^\<\(PRODUCT_DIR\)\/[^\/]+\.log$')
676 EXECUTABLE = re.compile(
677 r'^(\<\(PRODUCT_DIR\)\/[^\/\.]+)' +
678 re.escape(variables.get('EXECUTABLE_SUFFIX', '')) +
679 r'$')
680
681 # Preparation work.
682 relative_cwd = cleanup_path(relative_cwd)
683 # Creates the right set of variables here. We only care about PATH_VARIABLES.
684 variables = dict(
685 ('<(%s)' % k, variables[k].replace(os.path.sep, '/'))
686 for k in PATH_VARIABLES if k in variables)
687
688 # Actual work: Process the files.
689 # TODO(maruel): if all the files in a directory are in part tracked and in
690 # part untracked, the directory will not be extracted. Tracked files should be
691 # 'promoted' to be untracked as needed.
692 tracked = trace_inputs.extract_directories(
693 root_dir, tracked, default_blacklist)
694 untracked = trace_inputs.extract_directories(
695 root_dir, untracked, default_blacklist)
696 # touched is not compressed, otherwise it would result in files to be archived
697 # that we don't need.
698
699 def fix(f):
700 """Bases the file on the most restrictive variable."""
701 logging.debug('fix(%s)' % f)
702 # Important, GYP stores the files with / and not \.
703 f = f.replace(os.path.sep, '/')
704 # If it's not already a variable.
705 if not f.startswith('<'):
706 # relative_cwd is usually the directory containing the gyp file. It may be
707 # empty if the whole directory containing the gyp file is needed.
708 f = posix_relpath(f, relative_cwd) or './'
709
710 for variable, root_path in variables.iteritems():
711 if f.startswith(root_path):
712 f = variable + f[len(root_path):]
713 break
714
715 # Now strips off known files we want to ignore and to any specific mangling
716 # as necessary. It's easier to do it here than generate a blacklist.
717 match = EXECUTABLE.match(f)
718 if match:
719 return match.group(1) + '<(EXECUTABLE_SUFFIX)'
720
721 # Blacklist logs and 'First Run' in the PRODUCT_DIR. First Run is not
722 # created by the compile, but by the test itself.
723 if LOG_FILE.match(f) or f == '<(PRODUCT_DIR)/First Run':
724 return None
725
726 if sys.platform == 'darwin':
727 # On OSX, the name of the output is dependent on gyp define, it can be
728 # 'Google Chrome.app' or 'Chromium.app', same for 'XXX
729 # Framework.framework'. Furthermore, they are versioned with a gyp
730 # variable. To lower the complexity of the .isolate file, remove all the
731 # individual entries that show up under any of the 4 entries and replace
732 # them with the directory itself. Overall, this results in a bit more
733 # files than strictly necessary.
734 OSX_BUNDLES = (
735 '<(PRODUCT_DIR)/Chromium Framework.framework/',
736 '<(PRODUCT_DIR)/Chromium.app/',
737 '<(PRODUCT_DIR)/Google Chrome Framework.framework/',
738 '<(PRODUCT_DIR)/Google Chrome.app/',
739 )
740 for prefix in OSX_BUNDLES:
741 if f.startswith(prefix):
742 # Note this result in duplicate values, so the a set() must be used to
743 # remove duplicates.
744 return prefix
745
746 return f
747
748 tracked = set(filter(None, (fix(f.path) for f in tracked)))
749 untracked = set(filter(None, (fix(f.path) for f in untracked)))
750 touched = set(filter(None, (fix(f.path) for f in touched)))
751 out = classify_files(root_dir, tracked, untracked)
752 if touched:
753 out[KEY_TOUCHED] = sorted(touched)
754 return out
755
756
757 def generate_isolate(
758 tracked, untracked, touched, root_dir, variables, relative_cwd):
759 """Generates a clean and complete .isolate file."""
760 result = generate_simplified(
761 tracked, untracked, touched, root_dir, variables, relative_cwd)
762 return {
763 'conditions': [
764 ['OS=="%s"' % get_flavor(), {
765 'variables': result,
766 }],
767 ],
768 }
769
770
771 def split_touched(files):
772 """Splits files that are touched vs files that are read."""
773 tracked = []
774 touched = []
775 for f in files:
776 if f.size:
777 tracked.append(f)
778 else:
779 touched.append(f)
780 return tracked, touched
781
782
783 def pretty_print(variables, stdout):
784 """Outputs a gyp compatible list from the decoded variables.
785
786 Similar to pprint.print() but with NIH syndrome.
787 """
788 # Order the dictionary keys by these keys in priority.
789 ORDER = (
790 'variables', 'condition', 'command', 'relative_cwd', 'read_only',
791 KEY_TRACKED, KEY_UNTRACKED)
792
793 def sorting_key(x):
794 """Gives priority to 'most important' keys before the others."""
795 if x in ORDER:
796 return str(ORDER.index(x))
797 return x
798
799 def loop_list(indent, items):
800 for item in items:
801 if isinstance(item, basestring):
802 stdout.write('%s\'%s\',\n' % (indent, item))
803 elif isinstance(item, dict):
804 stdout.write('%s{\n' % indent)
805 loop_dict(indent + ' ', item)
806 stdout.write('%s},\n' % indent)
807 elif isinstance(item, list):
808 # A list inside a list will write the first item embedded.
809 stdout.write('%s[' % indent)
810 for index, i in enumerate(item):
811 if isinstance(i, basestring):
812 stdout.write(
813 '\'%s\', ' % i.replace('\\', '\\\\').replace('\'', '\\\''))
814 elif isinstance(i, dict):
815 stdout.write('{\n')
816 loop_dict(indent + ' ', i)
817 if index != len(item) - 1:
818 x = ', '
819 else:
820 x = ''
821 stdout.write('%s}%s' % (indent, x))
822 else:
823 assert False
824 stdout.write('],\n')
825 else:
826 assert False
827
828 def loop_dict(indent, items):
829 for key in sorted(items, key=sorting_key):
830 item = items[key]
831 stdout.write("%s'%s': " % (indent, key))
832 if isinstance(item, dict):
833 stdout.write('{\n')
834 loop_dict(indent + ' ', item)
835 stdout.write(indent + '},\n')
836 elif isinstance(item, list):
837 stdout.write('[\n')
838 loop_list(indent + ' ', item)
839 stdout.write(indent + '],\n')
840 elif isinstance(item, basestring):
841 stdout.write(
842 '\'%s\',\n' % item.replace('\\', '\\\\').replace('\'', '\\\''))
843 elif item in (True, False, None):
844 stdout.write('%s\n' % item)
845 else:
846 assert False, item
847
848 stdout.write('{\n')
849 loop_dict(' ', variables)
850 stdout.write('}\n')
851
852
853 def union(lhs, rhs):
854 """Merges two compatible datastructures composed of dict/list/set."""
855 assert lhs is not None or rhs is not None
856 if lhs is None:
857 return copy.deepcopy(rhs)
858 if rhs is None:
859 return copy.deepcopy(lhs)
860 assert type(lhs) == type(rhs), (lhs, rhs)
861 if hasattr(lhs, 'union'):
862 # Includes set, OSSettings and Configs.
863 return lhs.union(rhs)
864 if isinstance(lhs, dict):
865 return dict((k, union(lhs.get(k), rhs.get(k))) for k in set(lhs).union(rhs))
866 elif isinstance(lhs, list):
867 # Do not go inside the list.
868 return lhs + rhs
869 assert False, type(lhs)
870
871
872 def extract_comment(content):
873 """Extracts file level comment."""
874 out = []
875 for line in content.splitlines(True):
876 if line.startswith('#'):
877 out.append(line)
878 else:
879 break
880 return ''.join(out)
881
882
883 def eval_content(content):
884 """Evaluates a python file and return the value defined in it.
885
886 Used in practice for .isolate files.
887 """
888 globs = {'__builtins__': None}
889 locs = {}
890 value = eval(content, globs, locs)
891 assert locs == {}, locs
892 assert globs == {'__builtins__': None}, globs
893 return value
894
895
896 def verify_variables(variables):
897 """Verifies the |variables| dictionary is in the expected format."""
898 VALID_VARIABLES = [
899 KEY_TOUCHED,
900 KEY_TRACKED,
901 KEY_UNTRACKED,
902 'command',
903 'read_only',
904 ]
905 assert isinstance(variables, dict), variables
906 assert set(VALID_VARIABLES).issuperset(set(variables)), variables.keys()
907 for name, value in variables.iteritems():
908 if name == 'read_only':
909 assert value in (True, False, None), value
910 else:
911 assert isinstance(value, list), value
912 assert all(isinstance(i, basestring) for i in value), value
913
914
915 def verify_condition(condition):
916 """Verifies the |condition| dictionary is in the expected format."""
917 VALID_INSIDE_CONDITION = ['variables']
918 assert isinstance(condition, list), condition
919 assert 2 <= len(condition) <= 3, condition
920 assert re.match(r'OS==\"([a-z]+)\"', condition[0]), condition[0]
921 for c in condition[1:]:
922 assert isinstance(c, dict), c
923 assert set(VALID_INSIDE_CONDITION).issuperset(set(c)), c.keys()
924 verify_variables(c.get('variables', {}))
925
926
927 def verify_root(value):
928 VALID_ROOTS = ['variables', 'conditions']
929 assert isinstance(value, dict), value
930 assert set(VALID_ROOTS).issuperset(set(value)), value.keys()
931 verify_variables(value.get('variables', {}))
932
933 conditions = value.get('conditions', [])
934 assert isinstance(conditions, list), conditions
935 for condition in conditions:
936 verify_condition(condition)
937
938
939 def remove_weak_dependencies(values, key, item, item_oses):
940 """Remove any oses from this key if the item is already under a strong key."""
941 if key == KEY_TOUCHED:
942 for stronger_key in (KEY_TRACKED, KEY_UNTRACKED):
943 oses = values.get(stronger_key, {}).get(item, None)
944 if oses:
945 item_oses -= oses
946
947 return item_oses
948
949
950 def invert_map(variables):
951 """Converts a dict(OS, dict(deptype, list(dependencies)) to a flattened view.
952
953 Returns a tuple of:
954 1. dict(deptype, dict(dependency, set(OSes)) for easier processing.
955 2. All the OSes found as a set.
956 """
957 KEYS = (
958 KEY_TOUCHED,
959 KEY_TRACKED,
960 KEY_UNTRACKED,
961 'command',
962 'read_only',
963 )
964 out = dict((key, {}) for key in KEYS)
965 for os_name, values in variables.iteritems():
966 for key in (KEY_TOUCHED, KEY_TRACKED, KEY_UNTRACKED):
967 for item in values.get(key, []):
968 out[key].setdefault(item, set()).add(os_name)
969
970 # command needs special handling.
971 command = tuple(values.get('command', []))
972 out['command'].setdefault(command, set()).add(os_name)
973
974 # read_only needs special handling.
975 out['read_only'].setdefault(values.get('read_only'), set()).add(os_name)
976 return out, set(variables)
977
978
979 def reduce_inputs(values, oses):
980 """Reduces the invert_map() output to the strictest minimum list.
981
982 1. Construct the inverse map first.
983 2. Look at each individual file and directory, map where they are used and
984 reconstruct the inverse dictionary.
985 3. Do not convert back to negative if only 2 OSes were merged.
986
987 Returns a tuple of:
988 1. the minimized dictionary
989 2. oses passed through as-is.
990 """
991 KEYS = (
992 KEY_TOUCHED,
993 KEY_TRACKED,
994 KEY_UNTRACKED,
995 'command',
996 'read_only',
997 )
998 out = dict((key, {}) for key in KEYS)
999 assert all(oses), oses
1000 if len(oses) > 2:
1001 for key in KEYS:
1002 for item, item_oses in values.get(key, {}).iteritems():
1003 item_oses = remove_weak_dependencies(values, key, item, item_oses)
1004 if not item_oses:
1005 continue
1006
1007 # Converts all oses.difference('foo') to '!foo'.
1008 assert all(item_oses), item_oses
1009 missing = oses.difference(item_oses)
1010 if len(missing) == 1:
1011 # Replace it with a negative.
1012 out[key][item] = set(['!' + tuple(missing)[0]])
1013 elif not missing:
1014 out[key][item] = set([None])
1015 else:
1016 out[key][item] = set(item_oses)
1017 else:
1018 for key in KEYS:
1019 for item, item_oses in values.get(key, {}).iteritems():
1020 item_oses = remove_weak_dependencies(values, key, item, item_oses)
1021 if not item_oses:
1022 continue
1023
1024 # Converts all oses.difference('foo') to '!foo'.
1025 assert None not in item_oses, item_oses
1026 out[key][item] = set(item_oses)
1027 return out, oses
1028
1029
1030 def convert_map_to_isolate_dict(values, oses):
1031 """Regenerates back a .isolate configuration dict from files and dirs
1032 mappings generated from reduce_inputs().
1033 """
1034 # First, inverse the mapping to make it dict first.
1035 config = {}
1036 for key in values:
1037 for item, oses in values[key].iteritems():
1038 if item is None:
1039 # For read_only default.
1040 continue
1041 for cond_os in oses:
1042 cond_key = None if cond_os is None else cond_os.lstrip('!')
1043 # Insert the if/else dicts.
1044 condition_values = config.setdefault(cond_key, [{}, {}])
1045 # If condition is negative, use index 1, else use index 0.
1046 cond_value = condition_values[int((cond_os or '').startswith('!'))]
1047 variables = cond_value.setdefault('variables', {})
1048
1049 if item in (True, False):
1050 # One-off for read_only.
1051 variables[key] = item
1052 else:
1053 if isinstance(item, tuple) and item:
1054 # One-off for command.
1055 # Do not merge lists and do not sort!
1056 # Note that item is a tuple.
1057 assert key not in variables
1058 variables[key] = list(item)
1059 elif item:
1060 # The list of items (files or dirs). Append the new item and keep
1061 # the list sorted.
1062 l = variables.setdefault(key, [])
1063 l.append(item)
1064 l.sort()
1065
1066 out = {}
1067 for o in sorted(config):
1068 d = config[o]
1069 if o is None:
1070 assert not d[1]
1071 out = union(out, d[0])
1072 else:
1073 c = out.setdefault('conditions', [])
1074 if d[1]:
1075 c.append(['OS=="%s"' % o] + d)
1076 else:
1077 c.append(['OS=="%s"' % o] + d[0:1])
1078 return out
1079
1080
1081 ### Internal state files.
1082
1083
1084 class OSSettings(object):
1085 """Represents the dependencies for an OS. The structure is immutable.
1086
1087 It's the .isolate settings for a specific file.
1088 """
1089 def __init__(self, name, values):
1090 self.name = name
1091 verify_variables(values)
1092 self.touched = sorted(values.get(KEY_TOUCHED, []))
1093 self.tracked = sorted(values.get(KEY_TRACKED, []))
1094 self.untracked = sorted(values.get(KEY_UNTRACKED, []))
1095 self.command = values.get('command', [])[:]
1096 self.read_only = values.get('read_only')
1097
1098 def union(self, rhs):
1099 assert self.name == rhs.name
1100 assert not (self.command and rhs.command)
1101 var = {
1102 KEY_TOUCHED: sorted(self.touched + rhs.touched),
1103 KEY_TRACKED: sorted(self.tracked + rhs.tracked),
1104 KEY_UNTRACKED: sorted(self.untracked + rhs.untracked),
1105 'command': self.command or rhs.command,
1106 'read_only': rhs.read_only if self.read_only is None else self.read_only,
1107 }
1108 return OSSettings(self.name, var)
1109
1110 def flatten(self):
1111 out = {}
1112 if self.command:
1113 out['command'] = self.command
1114 if self.touched:
1115 out[KEY_TOUCHED] = self.touched
1116 if self.tracked:
1117 out[KEY_TRACKED] = self.tracked
1118 if self.untracked:
1119 out[KEY_UNTRACKED] = self.untracked
1120 if self.read_only is not None:
1121 out['read_only'] = self.read_only
1122 return out
1123
1124
1125 class Configs(object):
1126 """Represents a processed .isolate file.
1127
1128 Stores the file in a processed way, split by each the OS-specific
1129 configurations.
1130
1131 The self.per_os[None] member contains all the 'else' clauses plus the default
1132 values. It is not included in the flatten() result.
1133 """
1134 def __init__(self, oses, file_comment):
1135 self.file_comment = file_comment
1136 self.per_os = {
1137 None: OSSettings(None, {}),
1138 }
1139 self.per_os.update(dict((name, OSSettings(name, {})) for name in oses))
1140
1141 def union(self, rhs):
1142 items = list(set(self.per_os.keys() + rhs.per_os.keys()))
1143 # Takes the first file comment, prefering lhs.
1144 out = Configs(items, self.file_comment or rhs.file_comment)
1145 for key in items:
1146 out.per_os[key] = union(self.per_os.get(key), rhs.per_os.get(key))
1147 return out
1148
1149 def add_globals(self, values):
1150 for key in self.per_os:
1151 self.per_os[key] = self.per_os[key].union(OSSettings(key, values))
1152
1153 def add_values(self, for_os, values):
1154 self.per_os[for_os] = self.per_os[for_os].union(OSSettings(for_os, values))
1155
1156 def add_negative_values(self, for_os, values):
1157 """Includes the variables to all OSes except |for_os|.
1158
1159 This includes 'None' so unknown OSes gets it too.
1160 """
1161 for key in self.per_os:
1162 if key != for_os:
1163 self.per_os[key] = self.per_os[key].union(OSSettings(key, values))
1164
1165 def flatten(self):
1166 """Returns a flat dictionary representation of the configuration.
1167
1168 Skips None pseudo-OS.
1169 """
1170 return dict(
1171 (k, v.flatten()) for k, v in self.per_os.iteritems() if k is not None)
1172
1173
1174 def load_isolate_as_config(value, file_comment, default_oses):
1175 """Parses one .isolate file and returns a Configs() instance.
1176
1177 |value| is the loaded dictionary that was defined in the gyp file.
1178
1179 The expected format is strict, anything diverting from the format below will
1180 throw an assert:
1181 {
1182 'variables': {
1183 'command': [
1184 ...
1185 ],
1186 'isolate_dependency_tracked': [
1187 ...
1188 ],
1189 'isolate_dependency_untracked': [
1190 ...
1191 ],
1192 'read_only': False,
1193 },
1194 'conditions': [
1195 ['OS=="<os>"', {
1196 'variables': {
1197 ...
1198 },
1199 }, { # else
1200 'variables': {
1201 ...
1202 },
1203 }],
1204 ...
1205 ],
1206 }
1207 """
1208 verify_root(value)
1209
1210 # Scan to get the list of OSes.
1211 conditions = value.get('conditions', [])
1212 oses = set(re.match(r'OS==\"([a-z]+)\"', c[0]).group(1) for c in conditions)
1213 oses = oses.union(default_oses)
1214 configs = Configs(oses, file_comment)
1215
1216 # Global level variables.
1217 configs.add_globals(value.get('variables', {}))
1218
1219 # OS specific variables.
1220 for condition in conditions:
1221 condition_os = re.match(r'OS==\"([a-z]+)\"', condition[0]).group(1)
1222 configs.add_values(condition_os, condition[1].get('variables', {}))
1223 if len(condition) > 2:
1224 configs.add_negative_values(
1225 condition_os, condition[2].get('variables', {}))
1226 return configs
1227
1228
1229 def load_isolate_for_flavor(content, flavor):
1230 """Loads the .isolate file and returns the information unprocessed.
1231
1232 Returns the command, dependencies and read_only flag. The dependencies are
1233 fixed to use os.path.sep.
1234 """
1235 # Load the .isolate file, process its conditions, retrieve the command and
1236 # dependencies.
1237 configs = load_isolate_as_config(eval_content(content), None, DEFAULT_OSES)
1238 config = configs.per_os.get(flavor) or configs.per_os.get(None)
1239 if not config:
1240 raise ExecutionError('Failed to load configuration for \'%s\'' % flavor)
1241 # Merge tracked and untracked dependencies, isolate.py doesn't care about the
1242 # trackability of the dependencies, only the build tool does.
1243 dependencies = [
1244 f.replace('/', os.path.sep) for f in config.tracked + config.untracked
1245 ]
1246 touched = [f.replace('/', os.path.sep) for f in config.touched]
1247 return config.command, dependencies, touched, config.read_only
1248
1249
1250 class Flattenable(object):
1251 """Represents data that can be represented as a json file."""
1252 MEMBERS = ()
1253
1254 def flatten(self):
1255 """Returns a json-serializable version of itself.
1256
1257 Skips None entries.
1258 """
1259 items = ((member, getattr(self, member)) for member in self.MEMBERS)
1260 return dict((member, value) for member, value in items if value is not None)
1261
1262 @classmethod
1263 def load(cls, data):
1264 """Loads a flattened version."""
1265 data = data.copy()
1266 out = cls()
1267 for member in out.MEMBERS:
1268 if member in data:
1269 # Access to a protected member XXX of a client class
1270 # pylint: disable=W0212
1271 out._load_member(member, data.pop(member))
1272 if data:
1273 raise ValueError(
1274 'Found unexpected entry %s while constructing an object %s' %
1275 (data, cls.__name__), data, cls.__name__)
1276 return out
1277
1278 def _load_member(self, member, value):
1279 """Loads a member into self."""
1280 setattr(self, member, value)
1281
1282 @classmethod
1283 def load_file(cls, filename):
1284 """Loads the data from a file or return an empty instance."""
1285 out = cls()
1286 try:
1287 out = cls.load(trace_inputs.read_json(filename))
1288 logging.debug('Loaded %s(%s)' % (cls.__name__, filename))
1289 except (IOError, ValueError):
1290 logging.warn('Failed to load %s' % filename)
1291 return out
1292
1293
1294 class Result(Flattenable):
1295 """Describes the content of a .result file.
1296
1297 This file is used by run_swarm_step.py so its content is strictly only
1298 what is necessary to run the test outside of a checkout.
1299
1300 It is important to note that the 'files' dict keys are using native OS path
1301 separator instead of '/' used in .isolate file.
1302 """
1303 MEMBERS = (
1304 'command',
1305 'files',
1306 'os',
1307 'read_only',
1308 'relative_cwd',
1309 )
1310
1311 os = get_flavor()
1312
1313 def __init__(self):
1314 super(Result, self).__init__()
1315 self.command = []
1316 self.files = {}
1317 self.read_only = None
1318 self.relative_cwd = None
1319
1320 def update(self, command, infiles, touched, read_only, relative_cwd):
1321 """Updates the result state with new information."""
1322 self.command = command
1323 # Add new files.
1324 for f in infiles:
1325 self.files.setdefault(f, {})
1326 for f in touched:
1327 self.files.setdefault(f, {})['touched_only'] = True
1328 # Prune extraneous files that are not a dependency anymore.
1329 for f in set(self.files).difference(set(infiles).union(touched)):
1330 del self.files[f]
1331 if read_only is not None:
1332 self.read_only = read_only
1333 self.relative_cwd = relative_cwd
1334
1335 def _load_member(self, member, value):
1336 if member == 'os':
1337 if value != self.os:
1338 raise run_swarm_step.ConfigError(
1339 'The .results file was created on another platform')
1340 else:
1341 super(Result, self)._load_member(member, value)
1342
1343 def __str__(self):
1344 out = '%s(\n' % self.__class__.__name__
1345 out += ' command: %s\n' % self.command
1346 out += ' files: %d\n' % len(self.files)
1347 out += ' read_only: %s\n' % self.read_only
1348 out += ' relative_cwd: %s)' % self.relative_cwd
1349 return out
1350
1351
1352 class SavedState(Flattenable):
1353 """Describes the content of a .state file.
1354
1355 The items in this file are simply to improve the developer's life and aren't
1356 used by run_swarm_step.py. This file can always be safely removed.
1357
1358 isolate_file permits to find back root_dir, variables are used for stateful
1359 rerun.
1360 """
1361 MEMBERS = (
1362 'isolate_file',
1363 'variables',
1364 )
1365
1366 def __init__(self):
1367 super(SavedState, self).__init__()
1368 self.isolate_file = None
1369 self.variables = {}
1370
1371 def update(self, isolate_file, variables):
1372 """Updates the saved state with new information."""
1373 self.isolate_file = isolate_file
1374 self.variables.update(variables)
1375
1376 @classmethod
1377 def load(cls, data):
1378 out = super(SavedState, cls).load(data)
1379 if out.isolate_file:
1380 out.isolate_file = trace_inputs.get_native_path_case(out.isolate_file)
1381 return out
1382
1383 def __str__(self):
1384 out = '%s(\n' % self.__class__.__name__
1385 out += ' isolate_file: %s\n' % self.isolate_file
1386 out += ' variables: %s' % ''.join(
1387 '\n %s=%s' % (k, self.variables[k]) for k in sorted(self.variables))
1388 out += ')'
1389 return out
1390
1391
1392 class CompleteState(object):
1393 """Contains all the state to run the task at hand."""
1394 def __init__(self, result_file, result, saved_state):
1395 super(CompleteState, self).__init__()
1396 self.result_file = result_file
1397 # Contains the data that will be used by run_swarm_step.py
1398 self.result = result
1399 # Contains the data to ease developer's use-case but that is not strictly
1400 # necessary.
1401 self.saved_state = saved_state
1402
1403 @classmethod
1404 def load_files(cls, result_file):
1405 """Loads state from disk."""
1406 assert os.path.isabs(result_file), result_file
1407 return cls(
1408 result_file,
1409 Result.load_file(result_file),
1410 SavedState.load_file(result_to_state(result_file)))
1411
1412 def load_isolate(self, isolate_file, variables):
1413 """Updates self.result and self.saved_state with information loaded from a
1414 .isolate file.
1415
1416 Processes the loaded data, deduce root_dir, relative_cwd.
1417 """
1418 # Make sure to not depend on os.getcwd().
1419 assert os.path.isabs(isolate_file), isolate_file
1420 logging.info(
1421 'CompleteState.load_isolate(%s, %s)' % (isolate_file, variables))
1422 relative_base_dir = os.path.dirname(isolate_file)
1423
1424 # Processes the variables and update the saved state.
1425 variables = process_variables(variables, relative_base_dir)
1426 self.saved_state.update(isolate_file, variables)
1427
1428 with open(isolate_file, 'r') as f:
1429 # At that point, variables are not replaced yet in command and infiles.
1430 # infiles may contain directory entries and is in posix style.
1431 command, infiles, touched, read_only = load_isolate_for_flavor(
1432 f.read(), get_flavor())
1433 command = [eval_variables(i, self.saved_state.variables) for i in command]
1434 infiles = [eval_variables(f, self.saved_state.variables) for f in infiles]
1435 touched = [eval_variables(f, self.saved_state.variables) for f in touched]
1436 # root_dir is automatically determined by the deepest root accessed with the
1437 # form '../../foo/bar'.
1438 root_dir = determine_root_dir(relative_base_dir, infiles + touched)
1439 # The relative directory is automatically determined by the relative path
1440 # between root_dir and the directory containing the .isolate file,
1441 # isolate_base_dir.
1442 relative_cwd = os.path.relpath(relative_base_dir, root_dir)
1443 # Normalize the files based to root_dir. It is important to keep the
1444 # trailing os.path.sep at that step.
1445 infiles = [
1446 relpath(normpath(os.path.join(relative_base_dir, f)), root_dir)
1447 for f in infiles
1448 ]
1449 touched = [
1450 relpath(normpath(os.path.join(relative_base_dir, f)), root_dir)
1451 for f in touched
1452 ]
1453 # Expand the directories by listing each file inside. Up to now, trailing
1454 # os.path.sep must be kept. Do not expand 'touched'.
1455 infiles = expand_directories_and_symlinks(
1456 root_dir,
1457 infiles,
1458 lambda x: re.match(r'.*\.(git|svn|pyc)$', x))
1459
1460 # Finally, update the new stuff in the foo.result file, the file that is
1461 # used by run_swarm_step.py.
1462 self.result.update(command, infiles, touched, read_only, relative_cwd)
1463 logging.debug(self)
1464
1465 def process_inputs(self, level):
1466 """Updates self.result.files with the files' mode and hash.
1467
1468 See process_input() for more information.
1469 """
1470 for infile in sorted(self.result.files):
1471 filepath = os.path.join(self.root_dir, infile)
1472 self.result.files[infile] = process_input(
1473 filepath, self.result.files[infile], level, self.result.read_only)
1474
1475 def save_files(self):
1476 """Saves both self.result and self.saved_state."""
1477 logging.debug('Dumping to %s' % self.result_file)
1478 trace_inputs.write_json(self.result_file, self.result.flatten(), True)
1479 total_bytes = sum(i.get('size', 0) for i in self.result.files.itervalues())
1480 if total_bytes:
1481 logging.debug('Total size: %d bytes' % total_bytes)
1482 saved_state_file = result_to_state(self.result_file)
1483 logging.debug('Dumping to %s' % saved_state_file)
1484 trace_inputs.write_json(saved_state_file, self.saved_state.flatten(), True)
1485
1486 @property
1487 def root_dir(self):
1488 """isolate_file is always inside relative_cwd relative to root_dir."""
1489 isolate_dir = os.path.dirname(self.saved_state.isolate_file)
1490 # Special case '.'.
1491 if self.result.relative_cwd == '.':
1492 return isolate_dir
1493 assert isolate_dir.endswith(self.result.relative_cwd), (
1494 isolate_dir, self.result.relative_cwd)
1495 return isolate_dir[:-(len(self.result.relative_cwd) + 1)]
1496
1497 @property
1498 def resultdir(self):
1499 """Directory containing the results, usually equivalent to the variable
1500 PRODUCT_DIR.
1501 """
1502 return os.path.dirname(self.result_file)
1503
1504 def __str__(self):
1505 def indent(data, indent_length):
1506 """Indents text."""
1507 spacing = ' ' * indent_length
1508 return ''.join(spacing + l for l in str(data).splitlines(True))
1509
1510 out = '%s(\n' % self.__class__.__name__
1511 out += ' root_dir: %s\n' % self.root_dir
1512 out += ' result: %s\n' % indent(self.result, 2)
1513 out += ' saved_state: %s)' % indent(self.saved_state, 2)
1514 return out
1515
1516
1517 def load_complete_state(options, level):
1518 """Loads a CompleteState.
1519
1520 This includes data from .isolate, .result and .state files.
1521
1522 Arguments:
1523 options: Options instance generated with OptionParserIsolate.
1524 level: Amount of data to fetch.
1525 """
1526 if options.result:
1527 # Load the previous state if it was present. Namely, "foo.result" and
1528 # "foo.state".
1529 complete_state = CompleteState.load_files(options.result)
1530 else:
1531 # Constructs a dummy object that cannot be saved. Useful for temporary
1532 # commands like 'run'.
1533 complete_state = CompleteState(None, Result(), SavedState())
1534 options.isolate = options.isolate or complete_state.saved_state.isolate_file
1535 if not options.isolate:
1536 raise ExecutionError('A .isolate file is required.')
1537 if (complete_state.saved_state.isolate_file and
1538 options.isolate != complete_state.saved_state.isolate_file):
1539 raise ExecutionError(
1540 '%s and %s do not match.' % (
1541 options.isolate, complete_state.saved_state.isolate_file))
1542
1543 # Then load the .isolate and expands directories.
1544 complete_state.load_isolate(options.isolate, options.variables)
1545
1546 # Regenerate complete_state.result.files.
1547 complete_state.process_inputs(level)
1548 return complete_state
1549
1550
1551 def read_trace_as_isolate_dict(complete_state):
1552 """Reads a trace and returns the .isolate dictionary."""
1553 api = trace_inputs.get_api()
1554 logfile = complete_state.result_file + '.log'
1555 if not os.path.isfile(logfile):
1556 raise ExecutionError(
1557 'No log file \'%s\' to read, did you forget to \'trace\'?' % logfile)
1558 try:
1559 results = trace_inputs.load_trace(
1560 logfile, complete_state.root_dir, api, default_blacklist)
1561 tracked, touched = split_touched(results.existent)
1562 value = generate_isolate(
1563 tracked,
1564 [],
1565 touched,
1566 complete_state.root_dir,
1567 complete_state.saved_state.variables,
1568 complete_state.result.relative_cwd)
1569 return value
1570 except trace_inputs.TracingFailure, e:
1571 raise ExecutionError(
1572 'Reading traces failed for: %s\n%s' %
1573 (' '.join(complete_state.result.command), str(e)))
1574
1575
1576 def print_all(comment, data, stream):
1577 """Prints a complete .isolate file and its top-level file comment into a
1578 stream.
1579 """
1580 if comment:
1581 stream.write(comment)
1582 pretty_print(data, stream)
1583
1584
1585 def merge(complete_state):
1586 """Reads a trace and merges it back into the source .isolate file."""
1587 value = read_trace_as_isolate_dict(complete_state)
1588
1589 # Now take that data and union it into the original .isolate file.
1590 with open(complete_state.saved_state.isolate_file, 'r') as f:
1591 prev_content = f.read()
1592 prev_config = load_isolate_as_config(
1593 eval_content(prev_content),
1594 extract_comment(prev_content),
1595 DEFAULT_OSES)
1596 new_config = load_isolate_as_config(value, '', DEFAULT_OSES)
1597 config = union(prev_config, new_config)
1598 # pylint: disable=E1103
1599 data = convert_map_to_isolate_dict(
1600 *reduce_inputs(*invert_map(config.flatten())))
1601 print 'Updating %s' % complete_state.saved_state.isolate_file
1602 with open(complete_state.saved_state.isolate_file, 'wb') as f:
1603 print_all(config.file_comment, data, f)
1604
1605
1606 def CMDcheck(args):
1607 """Checks that all the inputs are present and update .result."""
1608 parser = OptionParserIsolate(command='check')
1609 options, _ = parser.parse_args(args)
1610 complete_state = load_complete_state(options, NO_INFO)
1611
1612 # Nothing is done specifically. Just store the result and state.
1613 complete_state.save_files()
1614 return 0
1615
1616
1617 def CMDhashtable(args):
1618 """Creates a hash table content addressed object store.
1619
1620 All the files listed in the .result file are put in the output directory with
1621 the file name being the sha-1 of the file's content.
1622 """
1623 parser = OptionParserIsolate(command='hashtable')
1624 options, _ = parser.parse_args(args)
1625
1626 with run_swarm_step.Profiler('GenerateHashtable'):
1627 success = False
1628 try:
1629 complete_state = load_complete_state(options, WITH_HASH)
1630 options.outdir = (
1631 options.outdir or os.path.join(complete_state.resultdir, 'hashtable'))
1632 # Make sure that complete_state isn't modified until save_files() is
1633 # called, because any changes made to it here will propagate to the files
1634 # created (which is probably not intended).
1635 complete_state.save_files()
1636
1637 logging.info('Creating content addressed object store with %d item',
1638 len(complete_state.result.files))
1639
1640 with open(complete_state.result_file, 'rb') as f:
1641 manifest_hash = hashlib.sha1(f.read()).hexdigest()
1642 manifest_metadata = {'sha-1': manifest_hash}
1643
1644 infiles = complete_state.result.files
1645 infiles[complete_state.result_file] = manifest_metadata
1646
1647 if re.match(r'^https?://.+$', options.outdir):
1648 upload_sha1_tree(
1649 base_url=options.outdir,
1650 indir=complete_state.root_dir,
1651 infiles=infiles)
1652 else:
1653 recreate_tree(
1654 outdir=options.outdir,
1655 indir=complete_state.root_dir,
1656 infiles=infiles,
1657 action=run_swarm_step.HARDLINK,
1658 as_sha1=True)
1659 success = True
1660 finally:
1661 # If the command failed, delete the .results file if it exists. This is
1662 # important so no stale swarm job is executed.
1663 if not success and os.path.isfile(options.result):
1664 os.remove(options.result)
1665
1666
1667 def CMDnoop(args):
1668 """Touches --result but does nothing else.
1669
1670 This mode is to help transition since some builders do not have all the test
1671 data files checked out. Touch result_file and exit silently.
1672 """
1673 parser = OptionParserIsolate(command='noop')
1674 options, _ = parser.parse_args(args)
1675 # In particular, do not call load_complete_state().
1676 open(options.result, 'a').close()
1677 return 0
1678
1679
1680 def CMDmerge(args):
1681 """Reads and merges the data from the trace back into the original .isolate.
1682
1683 Ignores --outdir.
1684 """
1685 parser = OptionParserIsolate(command='merge', require_result=False)
1686 options, _ = parser.parse_args(args)
1687 complete_state = load_complete_state(options, NO_INFO)
1688 merge(complete_state)
1689 return 0
1690
1691
1692 def CMDread(args):
1693 """Reads the trace file generated with command 'trace'.
1694
1695 Ignores --outdir.
1696 """
1697 parser = OptionParserIsolate(command='read', require_result=False)
1698 options, _ = parser.parse_args(args)
1699 complete_state = load_complete_state(options, NO_INFO)
1700 value = read_trace_as_isolate_dict(complete_state)
1701 pretty_print(value, sys.stdout)
1702 return 0
1703
1704
1705 def CMDremap(args):
1706 """Creates a directory with all the dependencies mapped into it.
1707
1708 Useful to test manually why a test is failing. The target executable is not
1709 run.
1710 """
1711 parser = OptionParserIsolate(command='remap', require_result=False)
1712 options, _ = parser.parse_args(args)
1713 complete_state = load_complete_state(options, STATS_ONLY)
1714
1715 if not options.outdir:
1716 options.outdir = run_swarm_step.make_temp_dir(
1717 'isolate', complete_state.root_dir)
1718 else:
1719 if not os.path.isdir(options.outdir):
1720 os.makedirs(options.outdir)
1721 print 'Remapping into %s' % options.outdir
1722 if len(os.listdir(options.outdir)):
1723 raise ExecutionError('Can\'t remap in a non-empty directory')
1724 recreate_tree(
1725 outdir=options.outdir,
1726 indir=complete_state.root_dir,
1727 infiles=complete_state.result.files,
1728 action=run_swarm_step.HARDLINK,
1729 as_sha1=False)
1730 if complete_state.result.read_only:
1731 run_swarm_step.make_writable(options.outdir, True)
1732
1733 if complete_state.result_file:
1734 complete_state.save_files()
1735 return 0
1736
1737
1738 def CMDrun(args):
1739 """Runs the test executable in an isolated (temporary) directory.
1740
1741 All the dependencies are mapped into the temporary directory and the
1742 directory is cleaned up after the target exits. Warning: if -outdir is
1743 specified, it is deleted upon exit.
1744
1745 Argument processing stops at the first non-recognized argument and these
1746 arguments are appended to the command line of the target to run. For example,
1747 use: isolate.py -r foo.results -- --gtest_filter=Foo.Bar
1748 """
1749 parser = OptionParserIsolate(command='run', require_result=False)
1750 parser.enable_interspersed_args()
1751 options, args = parser.parse_args(args)
1752 complete_state = load_complete_state(options, STATS_ONLY)
1753 cmd = complete_state.result.command + args
1754 if not cmd:
1755 raise ExecutionError('No command to run')
1756 cmd = trace_inputs.fix_python_path(cmd)
1757 try:
1758 if not options.outdir:
1759 options.outdir = run_swarm_step.make_temp_dir(
1760 'isolate', complete_state.root_dir)
1761 else:
1762 if not os.path.isdir(options.outdir):
1763 os.makedirs(options.outdir)
1764 recreate_tree(
1765 outdir=options.outdir,
1766 indir=complete_state.root_dir,
1767 infiles=complete_state.result.files,
1768 action=run_swarm_step.HARDLINK,
1769 as_sha1=False)
1770 cwd = os.path.normpath(
1771 os.path.join(options.outdir, complete_state.result.relative_cwd))
1772 if not os.path.isdir(cwd):
1773 # It can happen when no files are mapped from the directory containing the
1774 # .isolate file. But the directory must exist to be the current working
1775 # directory.
1776 os.makedirs(cwd)
1777 if complete_state.result.read_only:
1778 run_swarm_step.make_writable(options.outdir, True)
1779 logging.info('Running %s, cwd=%s' % (cmd, cwd))
1780 result = subprocess.call(cmd, cwd=cwd)
1781 finally:
1782 if options.outdir:
1783 run_swarm_step.rmtree(options.outdir)
1784
1785 if complete_state.result_file:
1786 complete_state.save_files()
1787 return result
1788
1789
1790 def CMDtrace(args):
1791 """Traces the target using trace_inputs.py.
1792
1793 It runs the executable without remapping it, and traces all the files it and
1794 its child processes access. Then the 'read' command can be used to generate an
1795 updated .isolate file out of it.
1796
1797 Argument processing stops at the first non-recognized argument and these
1798 arguments are appended to the command line of the target to run. For example,
1799 use: isolate.py -r foo.results -- --gtest_filter=Foo.Bar
1800 """
1801 parser = OptionParserIsolate(command='trace')
1802 parser.enable_interspersed_args()
1803 parser.add_option(
1804 '-m', '--merge', action='store_true',
1805 help='After tracing, merge the results back in the .isolate file')
1806 options, args = parser.parse_args(args)
1807 complete_state = load_complete_state(options, STATS_ONLY)
1808 cmd = complete_state.result.command + args
1809 if not cmd:
1810 raise ExecutionError('No command to run')
1811 cmd = trace_inputs.fix_python_path(cmd)
1812 cwd = os.path.normpath(os.path.join(
1813 complete_state.root_dir, complete_state.result.relative_cwd))
1814 logging.info('Running %s, cwd=%s' % (cmd, cwd))
1815 api = trace_inputs.get_api()
1816 logfile = complete_state.result_file + '.log'
1817 api.clean_trace(logfile)
1818 try:
1819 with api.get_tracer(logfile) as tracer:
1820 result, _ = tracer.trace(
1821 cmd,
1822 cwd,
1823 'default',
1824 True)
1825 except trace_inputs.TracingFailure, e:
1826 raise ExecutionError('Tracing failed for: %s\n%s' % (' '.join(cmd), str(e)))
1827
1828 complete_state.save_files()
1829
1830 if options.merge:
1831 merge(complete_state)
1832
1833 return result
1834
1835
1836 class OptionParserIsolate(trace_inputs.OptionParserWithNiceDescription):
1837 """Adds automatic --isolate, --result, --out and --variables handling."""
1838 def __init__(self, require_result=True, **kwargs):
1839 trace_inputs.OptionParserWithNiceDescription.__init__(self, **kwargs)
1840 default_variables = [('OS', get_flavor())]
1841 if sys.platform in ('win32', 'cygwin'):
1842 default_variables.append(('EXECUTABLE_SUFFIX', '.exe'))
1843 else:
1844 default_variables.append(('EXECUTABLE_SUFFIX', ''))
1845 group = optparse.OptionGroup(self, "Common options")
1846 group.add_option(
1847 '-r', '--result',
1848 metavar='FILE',
1849 help='.result file to store the json manifest')
1850 group.add_option(
1851 '-i', '--isolate',
1852 metavar='FILE',
1853 help='.isolate file to load the dependency data from')
1854 group.add_option(
1855 '-V', '--variable',
1856 nargs=2,
1857 action='append',
1858 default=default_variables,
1859 dest='variables',
1860 metavar='FOO BAR',
1861 help='Variables to process in the .isolate file, default: %default. '
1862 'Variables are persistent accross calls, they are saved inside '
1863 '<results>.state')
1864 group.add_option(
1865 '-o', '--outdir', metavar='DIR',
1866 help='Directory used to recreate the tree or store the hash table. '
1867 'If the environment variable ISOLATE_HASH_TABLE_DIR exists, it '
1868 'will be used. Otherwise, for run and remap, uses a /tmp '
1869 'subdirectory. For the other modes, defaults to the directory '
1870 'containing --result')
1871 self.add_option_group(group)
1872 self.require_result = require_result
1873
1874 def parse_args(self, *args, **kwargs):
1875 """Makes sure the paths make sense.
1876
1877 On Windows, / and \ are often mixed together in a path.
1878 """
1879 options, args = trace_inputs.OptionParserWithNiceDescription.parse_args(
1880 self, *args, **kwargs)
1881 if not self.allow_interspersed_args and args:
1882 self.error('Unsupported argument: %s' % args)
1883
1884 options.variables = dict(options.variables)
1885
1886 if self.require_result and not options.result:
1887 self.error('--result is required.')
1888 if options.result and not options.result.endswith('.results'):
1889 self.error('--result value must end with \'.results\'')
1890
1891 if options.result:
1892 options.result = os.path.abspath(options.result.replace('/', os.path.sep))
1893
1894 if options.isolate:
1895 options.isolate = trace_inputs.get_native_path_case(
1896 os.path.abspath(
1897 options.isolate.replace('/', os.path.sep)))
1898
1899 if options.outdir and not re.match(r'^https?://.+$', options.outdir):
1900 options.outdir = os.path.abspath(
1901 options.outdir.replace('/', os.path.sep))
1902
1903 return options, args
1904
1905
1906 ### Glue code to make all the commands works magically.
1907
1908
1909 CMDhelp = trace_inputs.CMDhelp
1910
1911
1912 def main(argv):
1913 try:
1914 return trace_inputs.main_impl(argv)
1915 except (
1916 ExecutionError,
1917 run_swarm_step.MappingError,
1918 run_swarm_step.ConfigError) as e:
1919 sys.stderr.write('\nError: ')
1920 sys.stderr.write(str(e))
1921 sys.stderr.write('\n')
1922 return 1
1923
1924
1925 if __name__ == '__main__':
1926 sys.exit(main(sys.argv[1:]))
OLDNEW
« no previous file with comments | « fix_test_cases.py ('k') | isolate_merge.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698