OLD | NEW |
---|---|
1 // Application that captures webpage archives on a CT worker and uploads it to | 1 // Application that captures webpage archives on a CT worker and uploads it to |
2 // Google Storage. | 2 // Google Storage. |
3 package main | 3 package main |
4 | 4 |
5 import ( | 5 import ( |
6 "encoding/json" | |
6 "flag" | 7 "flag" |
7 "fmt" | 8 "fmt" |
8 "io/ioutil" | 9 "io/ioutil" |
10 "os" | |
9 "path/filepath" | 11 "path/filepath" |
10 | 12 |
11 "github.com/skia-dev/glog" | 13 "github.com/skia-dev/glog" |
12 | 14 |
13 "strings" | 15 "strings" |
14 "time" | 16 "time" |
15 | 17 |
16 "go.skia.org/infra/ct/go/util" | 18 "go.skia.org/infra/ct/go/util" |
17 "go.skia.org/infra/ct/go/worker_scripts/worker_common" | 19 "go.skia.org/infra/ct/go/worker_scripts/worker_common" |
18 "go.skia.org/infra/go/common" | 20 "go.skia.org/infra/go/common" |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
90 pagesetBaseName := filepath.Base(fileInfo.Name()) | 92 pagesetBaseName := filepath.Base(fileInfo.Name()) |
91 if pagesetBaseName == util.TIMESTAMP_FILE_NAME || filepath.Ext(p agesetBaseName) == ".pyc" { | 93 if pagesetBaseName == util.TIMESTAMP_FILE_NAME || filepath.Ext(p agesetBaseName) == ".pyc" { |
92 // Ignore timestamp files and .pyc files. | 94 // Ignore timestamp files and .pyc files. |
93 continue | 95 continue |
94 } | 96 } |
95 | 97 |
96 // Convert the filename into a format consumable by the record_w pr binary. | 98 // Convert the filename into a format consumable by the record_w pr binary. |
97 pagesetArchiveName := strings.TrimSuffix(pagesetBaseName, filepa th.Ext(pagesetBaseName)) | 99 pagesetArchiveName := strings.TrimSuffix(pagesetBaseName, filepa th.Ext(pagesetBaseName)) |
98 pagesetPath := filepath.Join(pathToPagesets, fileInfo.Name()) | 100 pagesetPath := filepath.Join(pathToPagesets, fileInfo.Name()) |
99 | 101 |
102 pagesetContent, err := os.Open(pagesetPath) | |
dogben
2015/10/14 18:25:50
Nit: maybe create a util function like:
func ReadP
rmistry
2015/10/15 12:23:21
Done.
| |
103 if err != nil { | |
104 glog.Errorf("Could not read %s: %s", pagesetPath, err) | |
105 return | |
106 } | |
107 decodedPageset := util.PagesetVars{} | |
108 if err := json.NewDecoder(pagesetContent).Decode(&decodedPageset ); err != nil { | |
109 glog.Errorf("Could not JSON decode %s: %s", pagesetPath, err) | |
110 return | |
111 } | |
112 | |
100 glog.Infof("===== Processing %s =====", pagesetPath) | 113 glog.Infof("===== Processing %s =====", pagesetPath) |
101 args := []string{ | 114 args := []string{ |
115 util.CAPTURE_ARCHIVES_DEFAULT_CT_BENCHMARK, | |
102 "--extra-browser-args=--disable-setuid-sandbox", | 116 "--extra-browser-args=--disable-setuid-sandbox", |
103 "--browser=exact", | 117 "--browser=exact", |
104 "--browser-executable=" + chromiumBinary, | 118 "--browser-executable=" + chromiumBinary, |
105 fmt.Sprintf("%s_page_set", pagesetArchiveName), | 119 fmt.Sprintf("%s_page_set", pagesetArchiveName), |
106 » » » "--page-set-base-dir=" + pathToPagesets, | 120 » » » "--user-agent=" + decodedPageset.UserAgent, |
121 » » » "--urls-list=" + decodedPageset.UrlsList, | |
122 » » » "--archive-data-file=" + decodedPageset.ArchiveDataFile, | |
107 } | 123 } |
108 env := []string{ | 124 env := []string{ |
109 fmt.Sprintf("PYTHONPATH=%s:$PYTHONPATH", pathToPagesets) , | 125 fmt.Sprintf("PYTHONPATH=%s:$PYTHONPATH", pathToPagesets) , |
110 "DISPLAY=:0", | 126 "DISPLAY=:0", |
111 } | 127 } |
112 skutil.LogErr(util.ExecuteCmd(recordWprBinary, args, env, time.D uration(timeoutSecs)*time.Second, nil, nil)) | 128 skutil.LogErr(util.ExecuteCmd(recordWprBinary, args, env, time.D uration(timeoutSecs)*time.Second, nil, nil)) |
113 } | 129 } |
114 | 130 |
115 // Write timestamp to the webpage archives dir. | 131 // Write timestamp to the webpage archives dir. |
116 skutil.LogErr(util.CreateTimestampFile(pathToArchives)) | 132 skutil.LogErr(util.CreateTimestampFile(pathToArchives)) |
117 | 133 |
118 // Upload webpage archives dir to Google Storage. | 134 // Upload webpage archives dir to Google Storage. |
119 if err := gs.UploadWorkerArtifacts(util.WEB_ARCHIVES_DIR_NAME, *pagesetT ype, *workerNum); err != nil { | 135 if err := gs.UploadWorkerArtifacts(util.WEB_ARCHIVES_DIR_NAME, *pagesetT ype, *workerNum); err != nil { |
120 glog.Error(err) | 136 glog.Error(err) |
121 return | 137 return |
122 } | 138 } |
123 } | 139 } |
OLD | NEW |