| OLD | NEW |
| 1 // Application that captures webpage archives on a CT worker and uploads it to | 1 // Application that captures webpage archives on a CT worker and uploads it to |
| 2 // Google Storage. | 2 // Google Storage. |
| 3 package main | 3 package main |
| 4 | 4 |
| 5 import ( | 5 import ( |
| 6 "flag" | 6 "flag" |
| 7 "fmt" | 7 "fmt" |
| 8 "io/ioutil" | 8 "io/ioutil" |
| 9 "path/filepath" | 9 "path/filepath" |
| 10 "time" |
| 10 | 11 |
| 11 "github.com/skia-dev/glog" | 12 "github.com/skia-dev/glog" |
| 12 | 13 |
| 13 "strings" | |
| 14 "time" | |
| 15 | |
| 16 "go.skia.org/infra/ct/go/util" | 14 "go.skia.org/infra/ct/go/util" |
| 17 "go.skia.org/infra/ct/go/worker_scripts/worker_common" | 15 "go.skia.org/infra/ct/go/worker_scripts/worker_common" |
| 18 "go.skia.org/infra/go/common" | 16 "go.skia.org/infra/go/common" |
| 19 skutil "go.skia.org/infra/go/util" | 17 skutil "go.skia.org/infra/go/util" |
| 20 ) | 18 ) |
| 21 | 19 |
| 22 var ( | 20 var ( |
| 23 workerNum = flag.Int("worker_num", 1, "The number of this CT worker.
It will be in the {1..100} range.") | 21 workerNum = flag.Int("worker_num", 1, "The number of this CT worker.
It will be in the {1..100} range.") |
| 24 pagesetType = flag.String("pageset_type", util.PAGESET_TYPE_MOBILE_10k
, "The type of pagesets to create from the Alexa CSV list. Eg: 10k, Mobile10k, A
ll.") | 22 pagesetType = flag.String("pageset_type", util.PAGESET_TYPE_MOBILE_10k
, "The type of pagesets to create from the Alexa CSV list. Eg: 10k, Mobile10k, A
ll.") |
| 25 chromiumBuild = flag.String("chromium_build", "", "The chromium build to
use for this capture_archives run.") | 23 chromiumBuild = flag.String("chromium_build", "", "The chromium build to
use for this capture_archives run.") |
| (...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 86 return | 84 return |
| 87 } | 85 } |
| 88 glog.Infof("The %s fileInfos are: %s", len(fileInfos), fileInfos) | 86 glog.Infof("The %s fileInfos are: %s", len(fileInfos), fileInfos) |
| 89 for _, fileInfo := range fileInfos { | 87 for _, fileInfo := range fileInfos { |
| 90 pagesetBaseName := filepath.Base(fileInfo.Name()) | 88 pagesetBaseName := filepath.Base(fileInfo.Name()) |
| 91 if pagesetBaseName == util.TIMESTAMP_FILE_NAME || filepath.Ext(p
agesetBaseName) == ".pyc" { | 89 if pagesetBaseName == util.TIMESTAMP_FILE_NAME || filepath.Ext(p
agesetBaseName) == ".pyc" { |
| 92 // Ignore timestamp files and .pyc files. | 90 // Ignore timestamp files and .pyc files. |
| 93 continue | 91 continue |
| 94 } | 92 } |
| 95 | 93 |
| 96 » » // Convert the filename into a format consumable by the record_w
pr binary. | 94 » » // Read the pageset. |
| 97 » » pagesetArchiveName := strings.TrimSuffix(pagesetBaseName, filepa
th.Ext(pagesetBaseName)) | |
| 98 pagesetPath := filepath.Join(pathToPagesets, fileInfo.Name()) | 95 pagesetPath := filepath.Join(pathToPagesets, fileInfo.Name()) |
| 96 decodedPageset, err := util.ReadPageset(pagesetPath) |
| 97 if err != nil { |
| 98 glog.Errorf("Could not read %s: %s", pagesetPath, err) |
| 99 return |
| 100 } |
| 99 | 101 |
| 100 glog.Infof("===== Processing %s =====", pagesetPath) | 102 glog.Infof("===== Processing %s =====", pagesetPath) |
| 101 args := []string{ | 103 args := []string{ |
| 104 util.CAPTURE_ARCHIVES_DEFAULT_CT_BENCHMARK, |
| 102 "--extra-browser-args=--disable-setuid-sandbox", | 105 "--extra-browser-args=--disable-setuid-sandbox", |
| 103 "--browser=exact", | 106 "--browser=exact", |
| 104 "--browser-executable=" + chromiumBinary, | 107 "--browser-executable=" + chromiumBinary, |
| 105 » » » fmt.Sprintf("%s_page_set", pagesetArchiveName), | 108 » » » "--user-agent=" + decodedPageset.UserAgent, |
| 106 » » » "--page-set-base-dir=" + pathToPagesets, | 109 » » » "--urls-list=" + decodedPageset.UrlsList, |
| 110 » » » "--archive-data-file=" + decodedPageset.ArchiveDataFile, |
| 107 } | 111 } |
| 108 env := []string{ | 112 env := []string{ |
| 109 fmt.Sprintf("PYTHONPATH=%s:$PYTHONPATH", pathToPagesets)
, | 113 fmt.Sprintf("PYTHONPATH=%s:$PYTHONPATH", pathToPagesets)
, |
| 110 "DISPLAY=:0", | 114 "DISPLAY=:0", |
| 111 } | 115 } |
| 112 skutil.LogErr(util.ExecuteCmd(recordWprBinary, args, env, time.D
uration(timeoutSecs)*time.Second, nil, nil)) | 116 skutil.LogErr(util.ExecuteCmd(recordWprBinary, args, env, time.D
uration(timeoutSecs)*time.Second, nil, nil)) |
| 113 } | 117 } |
| 114 | 118 |
| 115 // Write timestamp to the webpage archives dir. | 119 // Write timestamp to the webpage archives dir. |
| 116 skutil.LogErr(util.CreateTimestampFile(pathToArchives)) | 120 skutil.LogErr(util.CreateTimestampFile(pathToArchives)) |
| 117 | 121 |
| 118 // Upload webpage archives dir to Google Storage. | 122 // Upload webpage archives dir to Google Storage. |
| 119 if err := gs.UploadWorkerArtifacts(util.WEB_ARCHIVES_DIR_NAME, *pagesetT
ype, *workerNum); err != nil { | 123 if err := gs.UploadWorkerArtifacts(util.WEB_ARCHIVES_DIR_NAME, *pagesetT
ype, *workerNum); err != nil { |
| 120 glog.Error(err) | 124 glog.Error(err) |
| 121 return | 125 return |
| 122 } | 126 } |
| 123 } | 127 } |
| OLD | NEW |