bench/gen_bench_expectations.py - Issue 331683003: Added in framework to get more bench data

Unified Diff: bench/gen_bench_expectations.py

Issue 331683003: Added in framework to get more bench data (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: More readability fixes Created 6 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: bench/gen_bench_expectations.py

diff --git a/bench/gen_bench_expectations.py b/bench/gen_bench_expectations.py

index 6d44b6cda2a5d9d76a18cd72d2b4065708538677..4edc38c09d236954a37edaf165231d59e9eeb7c5 100644

--- a/bench/gen_bench_expectations.py

+++ b/bench/gen_bench_expectations.py

@@ -7,9 +7,11 @@

import argparse

import bench_util

+import json

import os

import re

import sys

+import urllib2

# Parameters for calculating bench ranges.

RANGE_RATIO_UPPER = 1.5 # Ratio of range for upper bounds.

@@ -36,12 +38,17 @@ CONFIGS_TO_INCLUDE = ['simple_viewport_1000x1000',

ENTRIES_TO_EXCLUDE = [

]

+_GS_CLOUD_FORMAT = 'http://storage.googleapis.com/chromium-skia-gm/perfdata/%s/%s'

-def compute_ranges(benches):

+def compute_ranges(benches, more_benches=None):

"""Given a list of bench numbers, calculate the alert range.

Args:

benches: a list of float bench values.

+ more_benches: a tuple of lists of additional bench values.

+ The first value of each tuple is the number of commits before the current

+ one that set of values is at, and the second value is a list of

+ bench results.

Returns:

a list of float [lower_bound, upper_bound].

@@ -55,7 +62,7 @@ def compute_ranges(benches):

maximum + diff*RANGE_RATIO_UPPER + avg*ERR_RATIO + ERR_UB]

-def create_expectations_dict(revision_data_points, builder):

+def create_expectations_dict(revision_data_points, builder, extra_data=None):

"""Convert list of bench data points into a dictionary of expectations data.

Args:

@@ -81,13 +88,59 @@ def create_expectations_dict(revision_data_points, builder):

if to_skip:

continue

key = (point.config, point.bench)

+ extras = []

+ for idx, dataset in extra_data:

+ for data in dataset:

+ if (data.bench == point.bench and data.config == point.config and

+ data.time_type == point.time_type and data.per_iter_time):

+ extras.append((idx, data.per_iter_time))

if key in bench_dict:

raise Exception('Duplicate bench entry: ' + str(key))

- bench_dict[key] = [point.time] + compute_ranges(point.per_iter_time)

+ bench_dict[key] = [point.time] + compute_ranges(point.per_iter_time, extras)

return bench_dict

+def get_parent_commits(start_hash, num_back):

+ """Returns a list of commits that are the parent of the commit passed in."""

+ list_commits = urllib2.urlopen(

+ 'https://skia.googlesource.com/skia/+log/%s?format=json&n=%d' %

+ (start_hash, num_back))

+ # NOTE: Very brittle. Removes the four extraneous characters

+ # so json can be read successfully

+ trunc_list = list_commits.read()[4:]

+ json_data = json.loads(trunc_list)

+ return [revision['commit'] for revision in json_data['log']]

+def get_file_suffixes(commit_hash, directory):

+ """Gets all the suffixes available in the directory"""

+ possible_files = os.listdir(directory)

+ prefix = 'bench_' + commit_hash + '_data_'

+ return [name[len(prefix):] for name in possible_files

+ if name.startswith(prefix)]

+def download_bench_data(builder, commit_hash, suffixes, directory):

+ """Downloads data, returns the number successfully downloaded"""

+ cur_files = os.listdir(directory)

+ count = 0

+ for suffix in suffixes:

+ file_name = 'bench_'+commit_hash+'_data_'+suffix

+ if file_name in cur_files:

+ continue

+ try:

+ src = urllib2.urlopen(_GS_CLOUD_FORMAT % (builder, file_name))

+ with open(os.path.join(directory, file_name), 'w') as dest:

+ dest.writelines(src)

+ count += 1

+ except urllib2.HTTPError:

+ pass

+ return count

def main():

"""Reads bench data points, then calculate and export expectations.

"""

@@ -107,6 +160,13 @@ def main():

parser.add_argument(

'-r', '--git_revision', required=True,

help='the git hash to indicate the revision of input data to use.')

+ parser.add_argument(

+ '-t', '--back_track', required=False, default=10,

+ help='the number of commit hashes backwards to look to include' +

+ 'in the calculations.')

+ parser.add_argument(

+ '-m', '--max_commits', required=False, default=1,

+ help='the number of commit hashes to include in the calculations.')

args = parser.parse_args()

builder = args.builder

@@ -114,7 +174,31 @@ def main():

data_points = bench_util.parse_skp_bench_data(

args.input_dir, args.git_revision, args.representation_alg)

- expectations_dict = create_expectations_dict(data_points, builder)

+ parent_commits = get_parent_commits(args.git_revision, args.back_track)

+ print "Using commits: {}".format(parent_commits)

+ suffixes = get_file_suffixes(args.git_revision, args.input_dir)

+ print "Using suffixes: {}".format(suffixes)

+ # TODO(kelvinly): Find a better approach to than directly copying from

+ # the GS server?

+ downloaded_commits = []

+ for idx, commit in enumerate(parent_commits):

+ num_downloaded = download_bench_data(

+ builder, commit, suffixes, args.input_dir)

+ if num_downloaded > 0:

+ downloaded_commits.append((num_downloaded, idx, commit))

+ if len(downloaded_commits) < args.max_commits:

+ print ('Less than desired number of commits found. Please increase'

+ '--back_track in later runs')

+ trunc_commits = sorted(downloaded_commits, reverse=True)[:args.max_commits]

+ extra_data = []

+ for _, idx, commit in trunc_commits:

+ extra_data.append((idx, bench_util.parse_skp_bench_data(

+ args.input_dir, commit, args.representation_alg)))

+ expectations_dict = create_expectations_dict(data_points, builder,

+ extra_data)

out_lines = []

keys = expectations_dict.keys()

« no previous file with comments | « no previous file | no next file » | no next file with comments »