Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(179)

Side by Side Diff: perf/server/data.go

Issue 335833002: Start loading the BigQuery data and serving it to the UI. (Closed) Base URL: https://skia.googlesource.com/buildbot.git@master
Patch Set: y Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be found
3 // in the LICENSE file.
4
5 package main
6
7 import (
8 "encoding/json"
9 "fmt"
10 "io"
11 "net/http"
12 "os/exec"
13 "reflect"
14 "strconv"
15 "strings"
16 "time"
17 )
18
19 import (
20 "code.google.com/p/goauth2/oauth"
21 "code.google.com/p/google-api-go-client/bigquery/v2"
22 "github.com/oxtoacart/webbrowser"
23 )
24
25 const (
26 MISSING_DATA_SENTINEL = 1e100
mtklein 2014/06/15 15:12:10 Cute. Go has +Inf, -Inf, and NaN too I think.
kelvinly 2014/06/15 15:49:26 Tested those, Go doesn't seem to marshal them corr
jcgregorio 2014/06/15 16:25:39 Go may have +-Ing and Nan, but JSON doesn't, thus
jcgregorio 2014/06/16 02:46:25 Added a note explaining the requirements for the s
27 )
28
29 // Shouldn't need auth when running from GCE, but will need it for local dev.
30 var config = &oauth.Config{
31 ClientId: "470362608618-nlbqngfl87f4b3mhqqe9ojgaoe11vrld.apps.google usercontent.com",
32 ClientSecret: "J4YCkfMXFJISGyuBuVEiH60T",
mtklein 2014/06/15 15:12:12 Um... secret?
kelvinly 2014/06/15 15:49:26 Time to get new keys!
jcgregorio 2014/06/16 02:46:24 In a future CL I will move this to reading the cli
33 Scope: bigquery.BigqueryScope,
34 AuthURL: "https://accounts.google.com/o/oauth2/auth",
35 TokenURL: "https://accounts.google.com/o/oauth2/token",
36 RedirectURL: "urn:ietf:wg:oauth:2.0:oob",
37 TokenCache: oauth.CacheFile("bqtoken.data"),
38 }
39
40 // runFlow runs through a 3LO OAuth 2.0 flow to get credentials for BigQuery.
41 func runFlow(config *oauth.Config) (*http.Client, error) {
42 transport := &oauth.Transport{Config: config}
43 if _, err := config.TokenCache.Token(); err != nil {
44 url := config.AuthCodeURL("")
45 fmt.Printf(`Your browser has been opened to visit:
46
47 %s
48
49 Enter the verification code:`, url)
50 webbrowser.Open(url)
51 var code string
52 fmt.Scan(&code)
53 if _, err := transport.Exchange(code); err != nil {
54 return nil, err
55 }
56 }
57
58 return transport.Client(), nil
59 }
60
61 // Trace represents all the values of a single measurement over time.
62 type Trace struct {
63 Key string `json:"key"`
64 Values []float64 `json:"values"`
65 Params map[string]string `json:"params"`
66 Trybot bool `json:"trybot"`
67 }
68
69 // NewTrace allocates a new Trace set up for the given number of samples.
70 //
71 // The Trace Values are pre-filled in with the missing data sentinel since not
72 // all tests will be run on all commits.
73 func NewTrace(numSamples int) *Trace {
74 t := &Trace{
75 Values: make([]float64, numSamples, numSamples),
76 Params: make(map[string]string),
77 Trybot: false,
78 }
79 for i, _ := range t.Values {
80 t.Values[i] = MISSING_DATA_SENTINEL
81 }
82 return t
83 }
84
85 // Annotations for commits.
86 type Annotation struct {
87 ID int `json:"id"`
mtklein 2014/06/15 15:12:10 Might help to explain ID and Type here a bit.
jcgregorio 2014/06/16 02:46:24 Added note about this mapping to the MySQL databas
88 Notes string `json:"notes"`
89 Author string `json:"author"`
mtklein 2014/06/15 15:12:10 This is author-of-annotation right? Not author-of
jcgregorio 2014/06/16 02:46:25 Same as above. On 2014/06/15 15:12:10, mtklein wr
90 Type int `json:"type"`
91 }
92
93 // Commit is information about each Git commit.
94 type Commit struct {
95 CommitTime time.Time `json:"commit_time"`
96 Hash string `json:"hash"`
97 GitNumber int `json:"git_number"`
98 CommitMessage string `json:"commit_msg"`
99 Annotations []Annotation `json:"annotations,omitempty"`
100 }
101
102 type Choices []string
mtklein 2014/06/15 15:12:10 Explain this a bit?
jcgregorio 2014/06/16 02:46:25 Done.
103
104 // AllData is the top level struct we return via JSON to the UI.
105 //
106 // The length of the Commits array is the same length as all of the Values
107 // arrays in all of the Traces.
108 type AllData struct {
109 Traces []Trace `json:"traces"`
110 ParamSet map[string]Choices `json:"param_set"`
111 Commits []Commit `json:"commits"`
112 }
113
114 // gitCommitsWithTestData returns the list of commits that have perf data
115 // associated with them.
116 //
117 // Not all commits will have perf data, the builders don't necessarily run for
118 // each commit.
119 func gitCommitsWithTestData(service *bigquery.Service) (map[string]bool, error) {
120 query := `
121 SELECT
122 gitHash
123 FROM
124 (TABLE_DATE_RANGE(perf_skps_v2.skpbench,
125 DATE_ADD(CURRENT_TIMESTAMP(),
126 -2,
127 'DAY'),
128 CURRENT_TIMESTAMP()))
129 GROUP BY
130 gitHash;
131 `
132 iter, err := NewRowIter(service, query)
133 if err != nil {
134 return nil, fmt.Errorf("Failed to query for the Git hashes used: %s", err)
135 }
136
137 hashes := make(map[string]bool)
138 for iter.Next() {
139 h := &struct {
140 Hash string `bq:"gitHash"`
mtklein 2014/06/15 15:12:11 Neat.
141 }{}
142 err := iter.Decode(h)
143 if err != nil {
144 return nil, fmt.Errorf("Failed reading hashes from BigQu ery: %s", err)
145 }
146 hashes[h.Hash] = true
147 }
148 return hashes, nil
149 }
150
151 // GitHash represents information on a single Git commit.
mtklein 2014/06/15 15:12:11 Seems like a subset of Commit above? TimeStamp ==
jcgregorio 2014/06/16 02:46:24 Yeah, but this code may be changing soon as we may
152 type GitHash struct {
153 Hash string
154 TimeStamp time.Time
155 }
156
157 // readCommitsFromGit reads the commit history from a Git repository.
158 func readCommitsFromGit(dir string) ([]GitHash, error) {
159 cmd := exec.Command("git", strings.Split("log --format=%H%x20%ci", " "). ..)
160 cmd.Dir = dir
161 b, err := cmd.Output()
162 if err != nil {
163 return nil, fmt.Errorf("Failed to run Git: %s", err)
164 }
165 lines := strings.Split(string(b), "\n")
166 hashes := make([]GitHash, 0, len(lines))
167 for _, line := range lines {
168 parts := strings.SplitN(line, " ", 2)
169 if len(parts) == 2 {
170 t, err := time.Parse("2006-01-02 15:04:05 -0700", parts[ 1])
171 if err != nil {
172 return nil, fmt.Errorf("Failed parsing Git log t imestamp: %s", err)
173 }
174 hashes = append(hashes, GitHash{Hash: parts[0], TimeStam p: t})
175 }
176 }
177 return hashes, nil
178 }
179
180 // RowIter is a utility for reading data from a BigQuery query response.
181 //
182 // RowIter will iterate over all the results, even if they span more than one
183 // page of results. It automatically uses page tokens to iterate over all the
184 // pages to retrieve all results.
185 type RowIter struct {
186 response *bigquery.GetQueryResultsResponse
187 jobId string
188 service *bigquery.Service
189 nextPageToken string
190 row int
191 }
192
193 // poll until the job is complete.
194 func (r *RowIter) poll() error {
mtklein 2014/06/15 15:12:10 Odd that bigquery doesn't provide this sort of thi
kelvinly 2014/06/15 15:49:26 I think the Go library was autogenerated from some
jcgregorio 2014/06/16 02:46:25 Yeah, that is true, but I was able to provide pagi
195 var queryResponse *bigquery.GetQueryResultsResponse
196 for {
197 var err error
198 queryCall := r.service.Jobs.GetQueryResults("google.com:chrome-s kia", r.jobId)
199 if r.nextPageToken != "" {
200 queryCall.PageToken(r.nextPageToken)
201 }
202 queryResponse, err = queryCall.Do()
203 if err != nil {
204 return err
205 }
206 if queryResponse.JobComplete {
207 break
208 }
209 time.Sleep(time.Second)
210 }
211 r.nextPageToken = queryResponse.PageToken
212 r.response = queryResponse
213 return nil
214 }
215
216 // NewRowIter starts a query and returns a RowIter for iterating through the
217 // results.
218 func NewRowIter(service *bigquery.Service, query string) (*RowIter, error) {
219 job := &bigquery.Job{
220 Configuration: &bigquery.JobConfiguration{
mtklein 2014/06/15 15:12:11 Can't you elide the nested type names in this sort
jcgregorio 2014/06/16 02:46:25 AFAIK only if I depend on the order and supply all
221 Query: &bigquery.JobConfigurationQuery{
222 Query: query,
223 },
224 },
225 }
226 jobResponse, err := service.Jobs.Insert("google.com:chrome-skia", job).D o()
227 if err != nil {
228 return nil, err
229 }
230
231 r := &RowIter{
232 jobId: jobResponse.JobReference.JobId,
233 service: service,
234 row: -1, // Start at -1 so the first call to Next() puts us at the 0th Row.
235 }
236 return r, r.poll()
237 }
238
239 // Next moves to the next row in the response and returns true as long as data
240 // is availble, returning false when the end of the results are reached.
241 //
242 // Calling Next() the first time actually points the iterator at the first row,
243 // which makes it possible to use Next if a for loop:
244 //
245 // for iter.Next() { ... }
246 //
247 func (r *RowIter) Next() bool {
248 r.row++
249 if r.row >= len(r.response.Rows) {
250 if r.nextPageToken != "" {
251 r.poll()
252 r.row = 0
253 return len(r.response.Rows) > 0
254 } else {
255 return false
256 }
257 }
258 return true
259 }
260
261 // DecodeParams pulls all the values in the params record out as a map[string]st ring.
262 //
263 // The schema for each table has a nested record called 'params' that contains
264 // various axes along which queries could be built, such as the gpu the test was
265 // run against. Pull out the entire record as a generic map[string]string.
266 func (r *RowIter) DecodeParams() map[string]string {
267 row := r.response.Rows[r.row]
268 schema := r.response.Schema
269 params := map[string]string{}
270 for i, cell := range row.F {
271 if cell.V != nil {
272 name := schema.Fields[i].Name
273 if strings.HasPrefix(name, "params_") {
274 params[strings.TrimPrefix(name, "params_")] = ce ll.V.(string)
275 }
276 }
277 }
278 return params
279 }
280
281 // Decode uses struct tags to decode a single row into a struct.
282 //
283 // For example, given a struct:
284 //
285 // type A struct {
286 // Name string `bq:"name"`
287 // Value float64 `bq:"measurement"`
288 // }
289 //
290 // And a BigQuery table that contained two columns named "name" and
291 // "measurement". Then calling Decode as follows would parse the column values
292 // for "name" and "measurement" and place them in the Name and Value fields
293 // respectively.
294 //
295 // a = &A{}
296 // iter.Decode(a)
297 //
298 // Implementation Details:
299 //
300 // If a tag names a column that doesn't exist, the field is merely ignored,
301 // i.e. it is left unchanged from when it was passed into Decode.
302 //
303 // Not all columns need to be tagged in the struct.
304 //
305 // The decoder doesn't handle nested structs, only the top level fields are de coded.
306 //
307 // The decoder only handles struct fields of type string, int, int32, int64,
308 // float, float32 and float64.
309 func (r *RowIter) Decode(s interface{}) error {
310 row := r.response.Rows[r.row]
311 schema := r.response.Schema
312 // Collapse the data in the row into a map[string]string.
313 rowMap := map[string]string{}
314 for i, cell := range row.F {
315 if cell.V != nil {
316 rowMap[schema.Fields[i].Name] = cell.V.(string)
317 }
318 }
319
320 // Then iter over the fields of 's' and set them from the row data.
321 sv := reflect.ValueOf(s).Elem()
322 st := sv.Type()
323 for i := 0; i < sv.NumField(); i++ {
324 columnName := st.Field(i).Tag.Get("bq")
325 if columnValue, ok := rowMap[columnName]; ok {
326 switch sv.Field(i).Kind() {
327 case reflect.String:
328 sv.Field(i).SetString(columnValue)
329 case reflect.Float32, reflect.Float64:
330 f, err := strconv.ParseFloat(columnValue, 64)
331 if err != nil {
332 return err
333 }
334 sv.Field(i).SetFloat(f)
335 case reflect.Int32, reflect.Int64:
336 parsedInt, err := strconv.ParseInt(columnValue, 10, 64)
337 if err != nil {
338 return err
339 }
340 sv.Field(i).SetInt(parsedInt)
341 default:
342 return fmt.Errorf("can't decode into field of ty pe: %s", sv.Field(i).Kind())
343 }
344 }
345 }
346 return nil
347 }
348
349 // populateTraces reads the measurement data from BigQuery and populates the Tra ces.
350 func populateTraces(service *bigquery.Service, all *AllData, hashToIndex map[str ing]int, numSamples int) error {
351 type Measurement struct {
352 Value float64 `bq:"value"`
353 Key string `bq:"key"`
354 Hash string `bq:"gitHash"`
355 }
356
357 // Now query the actual samples.
358 query := `
359 SELECT
360 *
361 FROM
362 (TABLE_DATE_RANGE(perf_skps_v2.skpbench,
363 DATE_ADD(CURRENT_TIMESTAMP(),
364 -2,
365 'DAY'),
366 CURRENT_TIMESTAMP()))
367 WHERE
368 params.benchName="tabl_worldjournal.skp"
369 OR
370 params.benchName="desk_amazon.skp"
371 ORDER BY
372 key DESC,
373 timestamp DESC;
374 `
375 iter, err := NewRowIter(service, query)
376 if err != nil {
377 return fmt.Errorf("Failed to query data from BigQuery: %s", err)
378 }
379 var trace *Trace = nil
380 currentKey := ""
381 for iter.Next() {
382 m := &Measurement{}
383 if err := iter.Decode(m); err != nil {
384 return fmt.Errorf("Failed to decode Measurement from Big Query: %s", err)
385 }
386 if m.Key != currentKey {
387 if trace != nil {
388 all.Traces = append(all.Traces, *trace)
389 }
390 currentKey = m.Key
391 trace = NewTrace(numSamples)
392 trace.Params = iter.DecodeParams()
393 trace.Key = m.Key
394 }
395 if index, ok := hashToIndex[m.Hash]; ok {
396 trace.Values[index] = m.Value
397 }
398 }
399 all.Traces = append(all.Traces, *trace)
400
401 return nil
402 }
403
404 // Data is the full set of traces for the last N days all parsed into structs.
405 type Data struct {
406 all *AllData
407 }
408
409 // AsJSON serializes the data as JSON.
410 func (d *Data) AsJSON(w io.Writer) error {
411 // TODO(jcgregorio) Keep a cache of the gzipped JSON around and serve th at as long as it's fresh.
mtklein 2014/06/15 15:12:11 How slow is the JSON encoding?
jcgregorio 2014/06/16 02:46:24 Not so much the JSON encoding as the gzip. The ung
jcgregorio 2014/06/16 12:59:55 Sorry wrong numbers, that should be 50MB and 5MB r
412 return json.NewEncoder(w).Encode(d.all)
413 }
414
415 // populateParamSet returns the set of all possible values for all the 'params'
416 // in AllData.
417 func populateParamSet(all *AllData) {
418 // First pull the data out into a map of sets.
419 type ChoiceSet map[string]bool
420 c := make(map[string]ChoiceSet)
421 for _, t := range all.Traces {
422 for k, v := range t.Params {
423 if set, ok := c[k]; !ok {
424 c[k] = make(map[string]bool)
425 c[k][v] = true
426 } else {
427 set[v] = true
428 }
429 }
430 }
431 // Now flatten the sets into []string and populate all.ParamsSet with th at.
432 for k, v := range c {
433 allOptions := []string{}
434 for option, _ := range v {
435 allOptions = append(allOptions, option)
436 }
437 all.ParamSet[k] = allOptions
438 }
439 }
440
441 // NewData loads the data the first time and then starts a go routine to
442 // preiodically refresh the data.
443 //
444 // TODO(jcgregorio) Actuall do the bit where we start a go routine.
mtklein 2014/06/15 15:12:11 So Data is going to keep itself updated asynchrono
jcgregorio 2014/06/16 02:46:25 Once I add the go routine for updating I will also
445 func NewData(doOauth bool, gitRepoDir string) (*Data, error) {
446 var err error
447 var client *http.Client
448 if doOauth {
449 client, err = runFlow(config)
450 if err != nil {
451 return nil, fmt.Errorf("Failed to auth: %s", err)
452 }
453 } else {
454 client = http.DefaultClient
455 }
456 service, err := bigquery.New(client)
457 if err != nil {
458 return nil, fmt.Errorf("Failed to create a new BigQuery service object: %s", err)
459 }
460
461 // First query and get the list of hashes we are interested in and use t hat
462 // and the git log results to fill in the Commits.
463 allGitHashes, err := readCommitsFromGit(gitRepoDir)
464 if err != nil {
465 return nil, fmt.Errorf("Failed to read hashes from Git log: %s", err)
466 }
467
468 hashesTested, err := gitCommitsWithTestData(service)
469 if err != nil {
470 return nil, fmt.Errorf("Failed to read hashes from BigQuery: %s" , err)
471 }
472
473 // Order the git hashes by commit log order.
474 commits := make([]Commit, 0, len(hashesTested))
475 for i := len(allGitHashes) - 1; i >= 0; i-- {
476 h := allGitHashes[i]
477 if _, ok := hashesTested[h.Hash]; ok {
478 commits = append(commits, Commit{Hash: h.Hash, CommitTim e: h.TimeStamp})
479 }
480 }
481
482 // The number of samples that appear in each trace.
483 numSamples := len(commits)
484
485 // A mapping of Git hashes to where they appear in the Commits array, al so the index
486 // at which a measurement gets stored in the Values array.
487 hashToIndex := make(map[string]int)
488 for i, commit := range commits {
489 hashToIndex[commit.Hash] = i
490 }
491
492 all := &AllData{
493 Traces: make([]Trace, 0, 0),
494 ParamSet: make(map[string]Choices),
495 Commits: commits,
496 }
497
498 if err := populateTraces(service, all, hashToIndex, numSamples); err != nil {
499 panic(err)
mtklein 2014/06/15 15:12:10 Seems like we'd want something a bit more robust?
jcgregorio 2014/06/16 02:46:24 This is only done once and on startup and I don't
500 }
501
502 populateParamSet(all)
503
504 return &Data{all: all}, nil
505 }
OLDNEW
« no previous file with comments | « perf/server/README.md ('k') | perf/server/perf.go » ('j') | perf/server/perf.go » ('J')

Powered by Google App Engine
This is Rietveld 408576698