Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(889)

Unified Diff: perf/server/data.go

Issue 335833002: Start loading the BigQuery data and serving it to the UI. (Closed) Base URL: https://skia.googlesource.com/buildbot.git@master
Patch Set: y Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: perf/server/data.go
diff --git a/perf/server/data.go b/perf/server/data.go
new file mode 100644
index 0000000000000000000000000000000000000000..404332e0b0c5adee8bbc542a66d6f9d9f44ceb91
--- /dev/null
+++ b/perf/server/data.go
@@ -0,0 +1,505 @@
+// Copyright (c) 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be found
+// in the LICENSE file.
+
+package main
+
+import (
+ "encoding/json"
+ "fmt"
+ "io"
+ "net/http"
+ "os/exec"
+ "reflect"
+ "strconv"
+ "strings"
+ "time"
+)
+
+import (
+ "code.google.com/p/goauth2/oauth"
+ "code.google.com/p/google-api-go-client/bigquery/v2"
+ "github.com/oxtoacart/webbrowser"
+)
+
+const (
+ MISSING_DATA_SENTINEL = 1e100
mtklein 2014/06/15 15:12:10 Cute. Go has +Inf, -Inf, and NaN too I think.
kelvinly 2014/06/15 15:49:26 Tested those, Go doesn't seem to marshal them corr
jcgregorio 2014/06/15 16:25:39 Go may have +-Ing and Nan, but JSON doesn't, thus
jcgregorio 2014/06/16 02:46:25 Added a note explaining the requirements for the s
+)
+
+// Shouldn't need auth when running from GCE, but will need it for local dev.
+var config = &oauth.Config{
+ ClientId: "470362608618-nlbqngfl87f4b3mhqqe9ojgaoe11vrld.apps.googleusercontent.com",
+ ClientSecret: "J4YCkfMXFJISGyuBuVEiH60T",
mtklein 2014/06/15 15:12:12 Um... secret?
kelvinly 2014/06/15 15:49:26 Time to get new keys!
jcgregorio 2014/06/16 02:46:24 In a future CL I will move this to reading the cli
+ Scope: bigquery.BigqueryScope,
+ AuthURL: "https://accounts.google.com/o/oauth2/auth",
+ TokenURL: "https://accounts.google.com/o/oauth2/token",
+ RedirectURL: "urn:ietf:wg:oauth:2.0:oob",
+ TokenCache: oauth.CacheFile("bqtoken.data"),
+}
+
+// runFlow runs through a 3LO OAuth 2.0 flow to get credentials for BigQuery.
+func runFlow(config *oauth.Config) (*http.Client, error) {
+ transport := &oauth.Transport{Config: config}
+ if _, err := config.TokenCache.Token(); err != nil {
+ url := config.AuthCodeURL("")
+ fmt.Printf(`Your browser has been opened to visit:
+
+ %s
+
+Enter the verification code:`, url)
+ webbrowser.Open(url)
+ var code string
+ fmt.Scan(&code)
+ if _, err := transport.Exchange(code); err != nil {
+ return nil, err
+ }
+ }
+
+ return transport.Client(), nil
+}
+
+// Trace represents all the values of a single measurement over time.
+type Trace struct {
+ Key string `json:"key"`
+ Values []float64 `json:"values"`
+ Params map[string]string `json:"params"`
+ Trybot bool `json:"trybot"`
+}
+
+// NewTrace allocates a new Trace set up for the given number of samples.
+//
+// The Trace Values are pre-filled in with the missing data sentinel since not
+// all tests will be run on all commits.
+func NewTrace(numSamples int) *Trace {
+ t := &Trace{
+ Values: make([]float64, numSamples, numSamples),
+ Params: make(map[string]string),
+ Trybot: false,
+ }
+ for i, _ := range t.Values {
+ t.Values[i] = MISSING_DATA_SENTINEL
+ }
+ return t
+}
+
+// Annotations for commits.
+type Annotation struct {
+ ID int `json:"id"`
mtklein 2014/06/15 15:12:10 Might help to explain ID and Type here a bit.
jcgregorio 2014/06/16 02:46:24 Added note about this mapping to the MySQL databas
+ Notes string `json:"notes"`
+ Author string `json:"author"`
mtklein 2014/06/15 15:12:10 This is author-of-annotation right? Not author-of
jcgregorio 2014/06/16 02:46:25 Same as above. On 2014/06/15 15:12:10, mtklein wr
+ Type int `json:"type"`
+}
+
+// Commit is information about each Git commit.
+type Commit struct {
+ CommitTime time.Time `json:"commit_time"`
+ Hash string `json:"hash"`
+ GitNumber int `json:"git_number"`
+ CommitMessage string `json:"commit_msg"`
+ Annotations []Annotation `json:"annotations,omitempty"`
+}
+
+type Choices []string
mtklein 2014/06/15 15:12:10 Explain this a bit?
jcgregorio 2014/06/16 02:46:25 Done.
+
+// AllData is the top level struct we return via JSON to the UI.
+//
+// The length of the Commits array is the same length as all of the Values
+// arrays in all of the Traces.
+type AllData struct {
+ Traces []Trace `json:"traces"`
+ ParamSet map[string]Choices `json:"param_set"`
+ Commits []Commit `json:"commits"`
+}
+
+// gitCommitsWithTestData returns the list of commits that have perf data
+// associated with them.
+//
+// Not all commits will have perf data, the builders don't necessarily run for
+// each commit.
+func gitCommitsWithTestData(service *bigquery.Service) (map[string]bool, error) {
+ query := `
+SELECT
+ gitHash
+FROM
+ (TABLE_DATE_RANGE(perf_skps_v2.skpbench,
+ DATE_ADD(CURRENT_TIMESTAMP(),
+ -2,
+ 'DAY'),
+ CURRENT_TIMESTAMP()))
+GROUP BY
+ gitHash;
+ `
+ iter, err := NewRowIter(service, query)
+ if err != nil {
+ return nil, fmt.Errorf("Failed to query for the Git hashes used: %s", err)
+ }
+
+ hashes := make(map[string]bool)
+ for iter.Next() {
+ h := &struct {
+ Hash string `bq:"gitHash"`
mtklein 2014/06/15 15:12:11 Neat.
+ }{}
+ err := iter.Decode(h)
+ if err != nil {
+ return nil, fmt.Errorf("Failed reading hashes from BigQuery: %s", err)
+ }
+ hashes[h.Hash] = true
+ }
+ return hashes, nil
+}
+
+// GitHash represents information on a single Git commit.
mtklein 2014/06/15 15:12:11 Seems like a subset of Commit above? TimeStamp ==
jcgregorio 2014/06/16 02:46:24 Yeah, but this code may be changing soon as we may
+type GitHash struct {
+ Hash string
+ TimeStamp time.Time
+}
+
+// readCommitsFromGit reads the commit history from a Git repository.
+func readCommitsFromGit(dir string) ([]GitHash, error) {
+ cmd := exec.Command("git", strings.Split("log --format=%H%x20%ci", " ")...)
+ cmd.Dir = dir
+ b, err := cmd.Output()
+ if err != nil {
+ return nil, fmt.Errorf("Failed to run Git: %s", err)
+ }
+ lines := strings.Split(string(b), "\n")
+ hashes := make([]GitHash, 0, len(lines))
+ for _, line := range lines {
+ parts := strings.SplitN(line, " ", 2)
+ if len(parts) == 2 {
+ t, err := time.Parse("2006-01-02 15:04:05 -0700", parts[1])
+ if err != nil {
+ return nil, fmt.Errorf("Failed parsing Git log timestamp: %s", err)
+ }
+ hashes = append(hashes, GitHash{Hash: parts[0], TimeStamp: t})
+ }
+ }
+ return hashes, nil
+}
+
+// RowIter is a utility for reading data from a BigQuery query response.
+//
+// RowIter will iterate over all the results, even if they span more than one
+// page of results. It automatically uses page tokens to iterate over all the
+// pages to retrieve all results.
+type RowIter struct {
+ response *bigquery.GetQueryResultsResponse
+ jobId string
+ service *bigquery.Service
+ nextPageToken string
+ row int
+}
+
+// poll until the job is complete.
+func (r *RowIter) poll() error {
mtklein 2014/06/15 15:12:10 Odd that bigquery doesn't provide this sort of thi
kelvinly 2014/06/15 15:49:26 I think the Go library was autogenerated from some
jcgregorio 2014/06/16 02:46:25 Yeah, that is true, but I was able to provide pagi
+ var queryResponse *bigquery.GetQueryResultsResponse
+ for {
+ var err error
+ queryCall := r.service.Jobs.GetQueryResults("google.com:chrome-skia", r.jobId)
+ if r.nextPageToken != "" {
+ queryCall.PageToken(r.nextPageToken)
+ }
+ queryResponse, err = queryCall.Do()
+ if err != nil {
+ return err
+ }
+ if queryResponse.JobComplete {
+ break
+ }
+ time.Sleep(time.Second)
+ }
+ r.nextPageToken = queryResponse.PageToken
+ r.response = queryResponse
+ return nil
+}
+
+// NewRowIter starts a query and returns a RowIter for iterating through the
+// results.
+func NewRowIter(service *bigquery.Service, query string) (*RowIter, error) {
+ job := &bigquery.Job{
+ Configuration: &bigquery.JobConfiguration{
mtklein 2014/06/15 15:12:11 Can't you elide the nested type names in this sort
jcgregorio 2014/06/16 02:46:25 AFAIK only if I depend on the order and supply all
+ Query: &bigquery.JobConfigurationQuery{
+ Query: query,
+ },
+ },
+ }
+ jobResponse, err := service.Jobs.Insert("google.com:chrome-skia", job).Do()
+ if err != nil {
+ return nil, err
+ }
+
+ r := &RowIter{
+ jobId: jobResponse.JobReference.JobId,
+ service: service,
+ row: -1, // Start at -1 so the first call to Next() puts us at the 0th Row.
+ }
+ return r, r.poll()
+}
+
+// Next moves to the next row in the response and returns true as long as data
+// is availble, returning false when the end of the results are reached.
+//
+// Calling Next() the first time actually points the iterator at the first row,
+// which makes it possible to use Next if a for loop:
+//
+// for iter.Next() { ... }
+//
+func (r *RowIter) Next() bool {
+ r.row++
+ if r.row >= len(r.response.Rows) {
+ if r.nextPageToken != "" {
+ r.poll()
+ r.row = 0
+ return len(r.response.Rows) > 0
+ } else {
+ return false
+ }
+ }
+ return true
+}
+
+// DecodeParams pulls all the values in the params record out as a map[string]string.
+//
+// The schema for each table has a nested record called 'params' that contains
+// various axes along which queries could be built, such as the gpu the test was
+// run against. Pull out the entire record as a generic map[string]string.
+func (r *RowIter) DecodeParams() map[string]string {
+ row := r.response.Rows[r.row]
+ schema := r.response.Schema
+ params := map[string]string{}
+ for i, cell := range row.F {
+ if cell.V != nil {
+ name := schema.Fields[i].Name
+ if strings.HasPrefix(name, "params_") {
+ params[strings.TrimPrefix(name, "params_")] = cell.V.(string)
+ }
+ }
+ }
+ return params
+}
+
+// Decode uses struct tags to decode a single row into a struct.
+//
+// For example, given a struct:
+//
+// type A struct {
+// Name string `bq:"name"`
+// Value float64 `bq:"measurement"`
+// }
+//
+// And a BigQuery table that contained two columns named "name" and
+// "measurement". Then calling Decode as follows would parse the column values
+// for "name" and "measurement" and place them in the Name and Value fields
+// respectively.
+//
+// a = &A{}
+// iter.Decode(a)
+//
+// Implementation Details:
+//
+// If a tag names a column that doesn't exist, the field is merely ignored,
+// i.e. it is left unchanged from when it was passed into Decode.
+//
+// Not all columns need to be tagged in the struct.
+//
+// The decoder doesn't handle nested structs, only the top level fields are decoded.
+//
+// The decoder only handles struct fields of type string, int, int32, int64,
+// float, float32 and float64.
+func (r *RowIter) Decode(s interface{}) error {
+ row := r.response.Rows[r.row]
+ schema := r.response.Schema
+ // Collapse the data in the row into a map[string]string.
+ rowMap := map[string]string{}
+ for i, cell := range row.F {
+ if cell.V != nil {
+ rowMap[schema.Fields[i].Name] = cell.V.(string)
+ }
+ }
+
+ // Then iter over the fields of 's' and set them from the row data.
+ sv := reflect.ValueOf(s).Elem()
+ st := sv.Type()
+ for i := 0; i < sv.NumField(); i++ {
+ columnName := st.Field(i).Tag.Get("bq")
+ if columnValue, ok := rowMap[columnName]; ok {
+ switch sv.Field(i).Kind() {
+ case reflect.String:
+ sv.Field(i).SetString(columnValue)
+ case reflect.Float32, reflect.Float64:
+ f, err := strconv.ParseFloat(columnValue, 64)
+ if err != nil {
+ return err
+ }
+ sv.Field(i).SetFloat(f)
+ case reflect.Int32, reflect.Int64:
+ parsedInt, err := strconv.ParseInt(columnValue, 10, 64)
+ if err != nil {
+ return err
+ }
+ sv.Field(i).SetInt(parsedInt)
+ default:
+ return fmt.Errorf("can't decode into field of type: %s", sv.Field(i).Kind())
+ }
+ }
+ }
+ return nil
+}
+
+// populateTraces reads the measurement data from BigQuery and populates the Traces.
+func populateTraces(service *bigquery.Service, all *AllData, hashToIndex map[string]int, numSamples int) error {
+ type Measurement struct {
+ Value float64 `bq:"value"`
+ Key string `bq:"key"`
+ Hash string `bq:"gitHash"`
+ }
+
+ // Now query the actual samples.
+ query := `
+ SELECT
+ *
+ FROM
+ (TABLE_DATE_RANGE(perf_skps_v2.skpbench,
+ DATE_ADD(CURRENT_TIMESTAMP(),
+ -2,
+ 'DAY'),
+ CURRENT_TIMESTAMP()))
+ WHERE
+ params.benchName="tabl_worldjournal.skp"
+ OR
+ params.benchName="desk_amazon.skp"
+ ORDER BY
+ key DESC,
+ timestamp DESC;
+ `
+ iter, err := NewRowIter(service, query)
+ if err != nil {
+ return fmt.Errorf("Failed to query data from BigQuery: %s", err)
+ }
+ var trace *Trace = nil
+ currentKey := ""
+ for iter.Next() {
+ m := &Measurement{}
+ if err := iter.Decode(m); err != nil {
+ return fmt.Errorf("Failed to decode Measurement from BigQuery: %s", err)
+ }
+ if m.Key != currentKey {
+ if trace != nil {
+ all.Traces = append(all.Traces, *trace)
+ }
+ currentKey = m.Key
+ trace = NewTrace(numSamples)
+ trace.Params = iter.DecodeParams()
+ trace.Key = m.Key
+ }
+ if index, ok := hashToIndex[m.Hash]; ok {
+ trace.Values[index] = m.Value
+ }
+ }
+ all.Traces = append(all.Traces, *trace)
+
+ return nil
+}
+
+// Data is the full set of traces for the last N days all parsed into structs.
+type Data struct {
+ all *AllData
+}
+
+// AsJSON serializes the data as JSON.
+func (d *Data) AsJSON(w io.Writer) error {
+ // TODO(jcgregorio) Keep a cache of the gzipped JSON around and serve that as long as it's fresh.
mtklein 2014/06/15 15:12:11 How slow is the JSON encoding?
jcgregorio 2014/06/16 02:46:24 Not so much the JSON encoding as the gzip. The ung
jcgregorio 2014/06/16 12:59:55 Sorry wrong numbers, that should be 50MB and 5MB r
+ return json.NewEncoder(w).Encode(d.all)
+}
+
+// populateParamSet returns the set of all possible values for all the 'params'
+// in AllData.
+func populateParamSet(all *AllData) {
+ // First pull the data out into a map of sets.
+ type ChoiceSet map[string]bool
+ c := make(map[string]ChoiceSet)
+ for _, t := range all.Traces {
+ for k, v := range t.Params {
+ if set, ok := c[k]; !ok {
+ c[k] = make(map[string]bool)
+ c[k][v] = true
+ } else {
+ set[v] = true
+ }
+ }
+ }
+ // Now flatten the sets into []string and populate all.ParamsSet with that.
+ for k, v := range c {
+ allOptions := []string{}
+ for option, _ := range v {
+ allOptions = append(allOptions, option)
+ }
+ all.ParamSet[k] = allOptions
+ }
+}
+
+// NewData loads the data the first time and then starts a go routine to
+// preiodically refresh the data.
+//
+// TODO(jcgregorio) Actuall do the bit where we start a go routine.
mtklein 2014/06/15 15:12:11 So Data is going to keep itself updated asynchrono
jcgregorio 2014/06/16 02:46:25 Once I add the go routine for updating I will also
+func NewData(doOauth bool, gitRepoDir string) (*Data, error) {
+ var err error
+ var client *http.Client
+ if doOauth {
+ client, err = runFlow(config)
+ if err != nil {
+ return nil, fmt.Errorf("Failed to auth: %s", err)
+ }
+ } else {
+ client = http.DefaultClient
+ }
+ service, err := bigquery.New(client)
+ if err != nil {
+ return nil, fmt.Errorf("Failed to create a new BigQuery service object: %s", err)
+ }
+
+ // First query and get the list of hashes we are interested in and use that
+ // and the git log results to fill in the Commits.
+ allGitHashes, err := readCommitsFromGit(gitRepoDir)
+ if err != nil {
+ return nil, fmt.Errorf("Failed to read hashes from Git log: %s", err)
+ }
+
+ hashesTested, err := gitCommitsWithTestData(service)
+ if err != nil {
+ return nil, fmt.Errorf("Failed to read hashes from BigQuery: %s", err)
+ }
+
+ // Order the git hashes by commit log order.
+ commits := make([]Commit, 0, len(hashesTested))
+ for i := len(allGitHashes) - 1; i >= 0; i-- {
+ h := allGitHashes[i]
+ if _, ok := hashesTested[h.Hash]; ok {
+ commits = append(commits, Commit{Hash: h.Hash, CommitTime: h.TimeStamp})
+ }
+ }
+
+ // The number of samples that appear in each trace.
+ numSamples := len(commits)
+
+ // A mapping of Git hashes to where they appear in the Commits array, also the index
+ // at which a measurement gets stored in the Values array.
+ hashToIndex := make(map[string]int)
+ for i, commit := range commits {
+ hashToIndex[commit.Hash] = i
+ }
+
+ all := &AllData{
+ Traces: make([]Trace, 0, 0),
+ ParamSet: make(map[string]Choices),
+ Commits: commits,
+ }
+
+ if err := populateTraces(service, all, hashToIndex, numSamples); err != nil {
+ panic(err)
mtklein 2014/06/15 15:12:10 Seems like we'd want something a bit more robust?
jcgregorio 2014/06/16 02:46:24 This is only done once and on startup and I don't
+ }
+
+ populateParamSet(all)
+
+ return &Data{all: all}, nil
+}
« no previous file with comments | « perf/server/README.md ('k') | perf/server/perf.go » ('j') | perf/server/perf.go » ('J')

Powered by Google App Engine
This is Rietveld 408576698