tests/abi_corpus/corpus_utils.py - Issue 9950041: Adding a startup regression test.

Side by Side Diff: tests/abi_corpus/corpus_utils.py

Issue 9950041: Adding a startup regression test. (Closed) Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client

Patch Set: adding missing file Created 8 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 #!/usr/bin/python

	2 # Copyright (c) 2012 The Native Client Authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5

	6 import codecs

	7 import hashlib

	8 import json

	9 import math

	10 import os

	11 import shutil

	12 import struct

	13 import subprocess

	14 import sys

	15 import threading

	16 import time

	17 import zipfile

	18

	19 SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))

	20 TESTS_DIR = os.path.dirname(SCRIPT_DIR)

	21 NACL_DIR = os.path.dirname(TESTS_DIR)

	22

	23 # Imports from the build directory.

	24 sys.path.insert(0, os.path.join(NACL_DIR, 'build'))

	25 from download_utils import RemoveDir

	26

	27

	28 class DownloadError(Exception):

	29 """Indicates a download failed."""

	30 pass

	31

	32

	33 class FailedTests(Exception):

	34 """Indicates a test run failed."""

	35 pass

	36

	37

	38 def GsutilCopySilent(src, dst):

	39 """Invoke gsutil cp, swallowing the output, with retry.

	40

	41 Args:

	42 src: src url.

	43 dst: dst path.

	44 """

	45 env = os.environ.copy()

	46 env['PATH'] = '/b/build/scripts/slave' + os.pathsep + env['PATH']

	47 # Retry to compensate for storage flake.

	48 for attempt in range(3):

	49 process = subprocess.Popen(

	50 ['gsutil', 'cp', src, dst],

	51 env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

	52 process_stdout, process_stderr = process.communicate()

	53 if process.returncode == 0:

	54 return

	55 time.sleep(math.pow(2, attempt + 1) * 5)

	56 raise DownloadError(

	57 'Unexpected return code: %s\n'

	58 '>>> STDOUT\n%s\n'

	59 '>>> STDERR\n%s\n' % (

	60 process.returncode, process_stdout, process_stderr))

	61

	62

	63 def DownloadCorpusTotalList(list_filename):
	Nick Bray 2012/04/03 20:34:29 Optional: DownloadCorpusCRXList? Optional: DownloadCorpusCRXList? bradn 2012/04/12 17:45:54 Done. Show quoted text On 2012/04/03 20:34:29, Nick Bray wrote: > Optional: DownloadCorpusCRXList? Done.
	64 """Download list of all archived files in test corpus.

	65

	66 Args:

	67 list_filename: destination filename (kept around for debugging).

	68 Returns:

	69 List of CRXs.

	70 """

	71 DownloadFileFromCorpus('naclapps.all', list_filename)

	72 fh = open(list_filename)

	73 filenames = fh.read().splitlines()

	74 fh.close()

	75 crx_filenames = [f for f in filenames if f.endswith('.crx')]

	76 return crx_filenames

	77

	78

	79 def DownloadNexeList(filename):

	80 """Download list of NEXEs.

	81

	82 Args:

	83 filename: destination filename.

	84 Returns:

	85 List of NEXEs.

	86 """

	87 GsutilCopySilent('gs://nativeclient-snaps/naclapps.list', filename)

	88 fh = open(filename)

	89 filenames = fh.read().splitlines()

	90 fh.close()

	91 return filenames

	92

	93

	94 def DownloadFileFromCorpus(src_path, dst_filename):

	95 """Download a file from our snapshot.

	96

	97 Args:

	98 src_path: datastore relative path to download from.

	99 dst_filename: destination filename.

	100 """

	101 GsutilCopySilent('gs://nativeclient-snaps/%s' % src_path, dst_filename)

	102

	103

	104 def Sha1Digest(path):

	105 """Determine the sha1 hash of a file's contents given its path."""

	106 m = hashlib.sha1()

	107 fh = open(path, 'rb')

	108 m.update(fh.read())

	109 fh.close()

	110 return m.hexdigest()

	111

	112

	113 def Hex2Alpha(ch):

	114 """Convert a hexadecimal digit from 0-9 / a-f to a-p.

	115

	116 Args:

	117 ch: a character in 0-9 / a-f.

	118 Returns:

	119 A character in a-p.

	120 """

	121 if ch >= '0' and ch <= '9':

	122 return chr(ord(ch) - ord('0') + ord('a'))

	123 else:

	124 return chr(ord(ch) + 10)

	125

	126

	127 def ChromeAppIdFromPath(path):

	128 """Converts a path to the corrisponding chrome app id.

	129

	130 A stable but semi-undocumented property of unpacked chrome extensions is

	131 that they are assigned an app-id based on the first 32 characters of the

	132 sha256 digest of the absolute symlink expanded path of the extension.

	133 Instead of hexadecimal digits, characters a-p.

	134 From discussion with webstore team + inspection of extensions code.

	135 Args:

	136 path: Path to an unpacked extension.

	137 Returns:

	138 A 32 character chrome extension app id.

	139 """

	140 hasher = hashlib.sha256()

	141 hasher.update(os.path.realpath(path))

	142 hexhash = hasher.hexdigest()[:32]

	143 return ''.join([Hex2Alpha(ch) for ch in hexhash])

	144

	145

	146 def RunWithTimeout(cmd, timeout):

	147 """Run a program, capture output, allowing to run up to a timeout.

	148

	149 Args:

	150 cmd: List of strings containing command to run.

	151 timeout: Duration to timeout.

	152 Returns:

	153 Tuple of stdout, stderr, returncode.

	154 """

	155 process = subprocess.Popen(cmd,

	156 stdout=subprocess.PIPE,

	157 stderr=subprocess.PIPE)

	158 def GatherOutput(fh, dst):

	159 dst.append(fh.read())
	Nick Bray 2012/04/03 20:34:29 Document why you're doing this. Document why you're doing this. bradn 2012/04/12 17:45:54 Done. Show quoted text On 2012/04/03 20:34:29, Nick Bray wrote: > Document why you're doing this. Done.
	160 # Gather stdout.

	161 stdout_output = []

	162 stdout_thread = threading.Thread(

	163 target=GatherOutput, args=(process.stdout, stdout_output))

	164 stdout_thread.setDaemon(True)
	Nick Bray 2012/04/03 20:34:29 Why a daemon thread? Comment. If you're worried Why a daemon thread? Comment. If you're worried about behavior on exception, a finally block might be the best bet. bradn 2012/04/12 17:45:54 Cargo cult from the innards of subprocess. Dropped Show quoted text On 2012/04/03 20:34:29, Nick Bray wrote: > Why a daemon thread? Comment. If you're worried about behavior on exception, a > finally block might be the best bet. Cargo cult from the innards of subprocess. Dropped it as its not likely needed in this case.
	165 stdout_thread.start()

	166 # Gather stderr.

	167 stderr_output = []

	168 stderr_thread = threading.Thread(

	169 target=GatherOutput, args=(process.stderr, stderr_output))

	170 stderr_thread.setDaemon(True)

	171 stderr_thread.start()

	172 # Wait for a small span for the app to load.

	173 time.sleep(timeout)

	174 process.kill()

	175 # Join up.

	176 process.wait()

	177 stdout_thread.join()

	178 stderr_thread.join()

	179 # Pick out result.

	180 return stdout_output[0], stderr_output[0], process.returncode

	181

	182

	183 def LoadManifest(app_path):

	184 manifest_data = codecs.open(os.path.join(app_path, 'manifest.json'),

	185 'r', encoding='utf-8').read()

	186 # Ignore CRs as they confuse json.loads.

	187 manifest_data = manifest_data.replace('\r', '')

	188 # Ignore unicode endian markers as they confuse json.loads.

	189 manifest_data = manifest_data.replace(u'\ufeff', '')

	190 manifest_data = manifest_data.replace(u'\uffee', '')

	191 return json.loads(manifest_data)

	192

	193

	194 def CachedPath(options, filename):
	Nick Bray 2012/04/03 20:34:29 Passing "options" through is a little sketchy, now Passing "options" through is a little sketchy, now that this function is used by two scripts with different sets of options. Expand options into explicit arguments wherever prudent? bradn 2012/04/12 17:45:54 Done. Show quoted text On 2012/04/03 20:34:29, Nick Bray wrote: > Passing "options" through is a little sketchy, now that this function is used by > two scripts with different sets of options. Expand options into explicit > arguments wherever prudent? Done.
	195 """Find the full path of a cached file, a cache root relative path.

	196

	197 Args:

	198 options: bags of options.

	199 filename: filename relative to the top of the download url / cache.

	200 Returns:

	201 Absolute path of where the file goes in the cache.

	202 """

	203 return os.path.join(options.cache_dir, 'nacl_abi_corpus_cache', filename)

	204

	205

	206 def Sha1FromFilename(filename):

	207 """Get the expected sha1 of a file path.

	208

	209 Throughout we use the convention that files are store to a name of the form:

	210 <path_to_file>/<sha1hex>[.<some_extention>]

	211 This function extracts the expected sha1.

	212

	213 Args:

	214 filename: filename to extract.

	215 Returns:

	216 Excepted sha1.

	217 """

	218 return os.path.splitext(os.path.basename(filename))[0]

	219

	220

	221 def PrimeCache(options, filename):

	222 """Attempt to add a file to the cache directory if its not already there.

	223

	224 Args:

	225 options: bag of options.

	226 filename: filename relative to the top of the download url / cache.

	227 """

	228 dpath = CachedPath(options, filename)

	229 if (not os.path.exists(dpath) or

	230 Sha1Digest(dpath) != Sha1FromFilename(filename)):

	231 # Try to make the directory, fail is ok, let the download fail instead.

	232 try:

	233 os.makedirs(os.path.basename(dpath))

	234 except OSError:

	235 pass

	236 DownloadFileFromCorpus(filename, dpath)

	237

	238

	239 def CopyFromCache(options, filename, dest_filename):

	240 """Copy an item from the cache.

	241

	242 Args:

	243 options: bag of options.

	244 filename: filename relative to the top of the download url / cache.

	245 dest_filename: location to copy the file to.

	246 """

	247 dpath = CachedPath(options, filename)

	248 shutil.copy(dpath, dest_filename)

	249 assert Sha1Digest(dest_filename) == Sha1FromFilename(filename)

	250

	251

	252 def ExtractFromCache(options, source, dest):

	253 """Extract a crx from the cache.

	254

	255 Args:

	256 options: bag of options.

	257 source: crx file to extract (cache relative).

	258 dest: location to extract to.

	259 """

	260 # We don't want to accidentally extract two extensions on top of each other.

	261 # Assert that the destination doesn't yet exist.

	262 assert not os.path.exists(dest)

	263 dpath = CachedPath(options, source)

	264 # The cached location must exist.

	265 assert os.path.exists(dpath)

	266 zf = zipfile.ZipFile(dpath, 'r')

	267 os.makedirs(dest)

	268 for info in zf.infolist():

	269 # Do not support absolute paths or paths containing ..

	270 assert not os.path.isabs(info.filename) and '..' not in info.filename
	Nick Bray 2012/04/03 20:34:29 Factor assert + join into a function. Change asser Factor assert + join into a function. Change assert into an if/raise. (Asserts can get compiled out.) bradn 2012/04/12 17:45:54 Did the suggestion below instead. Show quoted text On 2012/04/03 20:34:29, Nick Bray wrote: > Factor assert + join into a function. > Change assert into an if/raise. (Asserts can get compiled out.) Did the suggestion below instead.
	271 tpath = os.path.join(dest, info.filename)

	272 if info.filename.endswith('/'):

	273 os.makedirs(tpath)

	274 for info in zf.infolist():
	Nick Bray 2012/04/03 20:34:29 Optional: Instead of two loops, you could skip dir Optional: Instead of two loops, you could skip directories and then create the directory for each file, if it does not exist. In this case there'd be no common code to factor out so you could partially ignore the previous comment. bradn 2012/04/12 17:45:54 Done. Show quoted text On 2012/04/03 20:34:29, Nick Bray wrote: > Optional: Instead of two loops, you could skip directories and then create the > directory for each file, if it does not exist. In this case there'd be no > common code to factor out so you could partially ignore the previous comment. Done.
	275 # Do not support absolute paths or paths containing ..

	276 assert not os.path.isabs(info.filename) and '..' not in info.filename

	277 tpath = os.path.join(dest, info.filename)

	278 if not info.filename.endswith('/'):

	279 zf.extract(info, dest)

	280 zf.close()

	281

	282

	283 def DefaultCacheDirectory():

	284 """Decide a default cache directory.

	285

	286 Decide a default cache directory.

	287 Prefer /b (for the bots)

	288 Failing that, use scons-out.

	289 Failing that, use the current users's home dir.

	290 Returns:

	291 Default to use for a corpus cache directory.

	292 """

	293 default_cache_dir = '/b'

	294 if not os.path.isdir(default_cache_dir):

	295 default_cache_dir = os.path.join(NACL_DIR, 'scons-out')

	296 if not os.path.isdir(default_cache_dir):

	297 default_cache_dir = os.path.expanduser('~/')

	298 default_cache_dir = os.path.realpath(default_cache_dir)

	299 assert os.path.isdir(default_cache_dir)

	300 assert os.path.realpath('.') != default_cache_dir

	301 return default_cache_dir

	302

	303

	304 def NexeArchitecture(filename):

	305 """Decide the architecture of a nexe.

	306

	307 Args:

	308 filename: filename of the nexe.

	309 Returns:

	310 Architecture string (x86-32 / x86-64) or None.

	311 """

	312 fh = open(filename, 'rb')

	313 head = fh.read(20)

	314 # Must not be too short.

	315 if len(head) != 20:

	316 print 'ERROR - header too short'

	317 return None

	318 # Must have ELF header.

	319 if head[0:4] != '\x7fELF':

	320 print 'ERROR - no elf header'

	321 return None

	322 # Decode e_machine

	323 machine = struct.unpack('<H', head[18:])[0]

	324 return {

	325 3: 'x86-32',

	326 #40: 'arm', # TODO(bradnelson): handle arm.

	327 62: 'x86-64',

	328 }.get(machine)

	329

	330

	331 class Progress(object):

	332 def __init__(self, total):

	333 self.total = total

	334 self.count = 0

	335 self.successes = 0

	336 self.failures = 0

	337 self.start = time.time()

	338

	339 def Tally(self):

	340 if self.count > 0:

	341 tm = time.time()

	342 eta = (self.total - self.count) * (tm - self.start) / self.count

	343 eta_minutes = int(eta / 60)

	344 eta_seconds = int(eta - eta_minutes * 60)

	345 eta_str = ' (ETA %d:%02d)' % (eta_minutes, eta_seconds)

	346 else:

	347 eta_str = ''

	348 self.count += 1

	349 print 'Processing %d of %d%s...' % (self.count, self.total, eta_str)

	350

	351 def Result(self, success):

	352 if success:

	353 self.successes += 1

	354 else:

	355 self.failures += 1

	356

	357 def Summary(self, warn_only=False):

	358 print 'Ran tests on %d of %d items.' % (

	359 self.successes + self.failures, self.total)

	360 if self.failures:

	361 # Our alternate validators don't currently cover everything.

	362 # For now, don't fail just emit warning (and a tally of failures).

	363 print '@@@STEP_TEXT@FAILED %d times (%.1f%% are incorrect)@@@' % (

	364 self.failures, self.failures * 100 / (self.successes + self.failures))

	365 if warn_only:

	366 print '@@@STEP_WARNINGS@@@'

	367 else:

	368 raise FailedTests('FAILED %d tests' % self.failures)

	369 else:

	370 print 'SUCCESS'

	371

	372

	373 def SetupOptions(parser):
	Nick Bray 2012/04/03 20:34:29 If you eliminate the "options" arguments as I sugg If you eliminate the "options" arguments as I suggested, you should also inline this function into each file. (The code duplication would not be that bad because argument parsing is "scripty" and not core functionality, in my mind.) bradn 2012/04/12 17:45:54 Done. Show quoted text On 2012/04/03 20:34:29, Nick Bray wrote: > If you eliminate the "options" arguments as I suggested, you should also inline > this function into each file. (The code duplication would not be that bad > because argument parsing is "scripty" and not core functionality, in my mind.) Done.
	374 """Add corpus_utils related options to an optparse.OptionParser.

	375

	376 Args:

	377 parser: an optparse.OptionParser.

	378 """

	379 parser.add_option(

	380 '--cache-dir', dest='cache_dir',

	381 default=DefaultCacheDirectory(),

	382 help='directory to cache downloads in')

OLD	NEW

« no previous file with comments | « buildbot/buildbot_standard.py ('k') | tests/abi_corpus/startup_regression_test.py » ('j') | tests/abi_corpus/startup_regression_test.py » ('J')