OLD | NEW |
1 // Application that captures webpage archives on a CT worker and uploads it to | 1 // Application that captures webpage archives on a CT worker and uploads it to |
2 // Google Storage. | 2 // Google Storage. |
3 package main | 3 package main |
4 | 4 |
5 import ( | 5 import ( |
6 "flag" | 6 "flag" |
7 "fmt" | 7 "fmt" |
8 "io/ioutil" | 8 "io/ioutil" |
9 "path/filepath" | 9 "path/filepath" |
| 10 "time" |
10 | 11 |
11 "github.com/skia-dev/glog" | 12 "github.com/skia-dev/glog" |
12 | 13 |
13 "strings" | |
14 "time" | |
15 | |
16 "go.skia.org/infra/ct/go/util" | 14 "go.skia.org/infra/ct/go/util" |
17 "go.skia.org/infra/ct/go/worker_scripts/worker_common" | 15 "go.skia.org/infra/ct/go/worker_scripts/worker_common" |
18 "go.skia.org/infra/go/common" | 16 "go.skia.org/infra/go/common" |
19 skutil "go.skia.org/infra/go/util" | 17 skutil "go.skia.org/infra/go/util" |
20 ) | 18 ) |
21 | 19 |
22 var ( | 20 var ( |
23 workerNum = flag.Int("worker_num", 1, "The number of this CT worker.
It will be in the {1..100} range.") | 21 workerNum = flag.Int("worker_num", 1, "The number of this CT worker.
It will be in the {1..100} range.") |
24 pagesetType = flag.String("pageset_type", util.PAGESET_TYPE_MOBILE_10k
, "The type of pagesets to create from the Alexa CSV list. Eg: 10k, Mobile10k, A
ll.") | 22 pagesetType = flag.String("pageset_type", util.PAGESET_TYPE_MOBILE_10k
, "The type of pagesets to create from the Alexa CSV list. Eg: 10k, Mobile10k, A
ll.") |
25 chromiumBuild = flag.String("chromium_build", "", "The chromium build to
use for this capture_archives run.") | 23 chromiumBuild = flag.String("chromium_build", "", "The chromium build to
use for this capture_archives run.") |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
86 return | 84 return |
87 } | 85 } |
88 glog.Infof("The %s fileInfos are: %s", len(fileInfos), fileInfos) | 86 glog.Infof("The %s fileInfos are: %s", len(fileInfos), fileInfos) |
89 for _, fileInfo := range fileInfos { | 87 for _, fileInfo := range fileInfos { |
90 pagesetBaseName := filepath.Base(fileInfo.Name()) | 88 pagesetBaseName := filepath.Base(fileInfo.Name()) |
91 if pagesetBaseName == util.TIMESTAMP_FILE_NAME || filepath.Ext(p
agesetBaseName) == ".pyc" { | 89 if pagesetBaseName == util.TIMESTAMP_FILE_NAME || filepath.Ext(p
agesetBaseName) == ".pyc" { |
92 // Ignore timestamp files and .pyc files. | 90 // Ignore timestamp files and .pyc files. |
93 continue | 91 continue |
94 } | 92 } |
95 | 93 |
96 » » // Convert the filename into a format consumable by the record_w
pr binary. | 94 » » // Read the pageset. |
97 » » pagesetArchiveName := strings.TrimSuffix(pagesetBaseName, filepa
th.Ext(pagesetBaseName)) | |
98 pagesetPath := filepath.Join(pathToPagesets, fileInfo.Name()) | 95 pagesetPath := filepath.Join(pathToPagesets, fileInfo.Name()) |
| 96 decodedPageset, err := util.ReadPageset(pagesetPath) |
| 97 if err != nil { |
| 98 glog.Errorf("Could not read %s: %s", pagesetPath, err) |
| 99 return |
| 100 } |
99 | 101 |
100 glog.Infof("===== Processing %s =====", pagesetPath) | 102 glog.Infof("===== Processing %s =====", pagesetPath) |
101 args := []string{ | 103 args := []string{ |
| 104 util.CAPTURE_ARCHIVES_DEFAULT_CT_BENCHMARK, |
102 "--extra-browser-args=--disable-setuid-sandbox", | 105 "--extra-browser-args=--disable-setuid-sandbox", |
103 "--browser=exact", | 106 "--browser=exact", |
104 "--browser-executable=" + chromiumBinary, | 107 "--browser-executable=" + chromiumBinary, |
105 » » » fmt.Sprintf("%s_page_set", pagesetArchiveName), | 108 » » » "--user-agent=" + decodedPageset.UserAgent, |
106 » » » "--page-set-base-dir=" + pathToPagesets, | 109 » » » "--urls-list=" + decodedPageset.UrlsList, |
| 110 » » » "--archive-data-file=" + decodedPageset.ArchiveDataFile, |
107 } | 111 } |
108 env := []string{ | 112 env := []string{ |
109 fmt.Sprintf("PYTHONPATH=%s:$PYTHONPATH", pathToPagesets)
, | 113 fmt.Sprintf("PYTHONPATH=%s:$PYTHONPATH", pathToPagesets)
, |
110 "DISPLAY=:0", | 114 "DISPLAY=:0", |
111 } | 115 } |
112 skutil.LogErr(util.ExecuteCmd(recordWprBinary, args, env, time.D
uration(timeoutSecs)*time.Second, nil, nil)) | 116 skutil.LogErr(util.ExecuteCmd(recordWprBinary, args, env, time.D
uration(timeoutSecs)*time.Second, nil, nil)) |
113 } | 117 } |
114 | 118 |
115 // Write timestamp to the webpage archives dir. | 119 // Write timestamp to the webpage archives dir. |
116 skutil.LogErr(util.CreateTimestampFile(pathToArchives)) | 120 skutil.LogErr(util.CreateTimestampFile(pathToArchives)) |
117 | 121 |
118 // Upload webpage archives dir to Google Storage. | 122 // Upload webpage archives dir to Google Storage. |
119 if err := gs.UploadWorkerArtifacts(util.WEB_ARCHIVES_DIR_NAME, *pagesetT
ype, *workerNum); err != nil { | 123 if err := gs.UploadWorkerArtifacts(util.WEB_ARCHIVES_DIR_NAME, *pagesetT
ype, *workerNum); err != nil { |
120 glog.Error(err) | 124 glog.Error(err) |
121 return | 125 return |
122 } | 126 } |
123 } | 127 } |
OLD | NEW |