Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(13)

Side by Side Diff: ct/go/worker_scripts/capture_skps_from_pdfs/main.go

Issue 1994663002: Use swarming in capture_skps and capture_skps_from_pdfs CT tasks (Closed) Base URL: https://skia.googlesource.com/buildbot@ct-5-capture_archives
Patch Set: Address comments Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « ct/go/worker_scripts/capture_skps/main.go ('k') | ct/isolates/capture_skps.isolate » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Application that downloads PDFs and then captures SKPs from them. 1 // Application that downloads PDFs and then captures SKPs from them.
2 // TODO(rmistry): Capturing and uploading SKPs has been temporarily disabled due 2 // TODO(rmistry): Capturing and uploading SKPs has been temporarily disabled due
3 // to the comment in https://bugs.chromium.org/p/skia/issues/detail?id=5183#c34 3 // to the comment in https://bugs.chromium.org/p/skia/issues/detail?id=5183#c34
4 package main 4 package main
5 5
6 import ( 6 import (
7 "flag" 7 "flag"
8 "fmt" 8 "fmt"
9 "io" 9 "io"
10 "io/ioutil" 10 "io/ioutil"
11 "net/url" 11 "net/url"
12 "os" 12 "os"
13 "path"
13 "path/filepath" 14 "path/filepath"
15 "strconv"
14 "strings" 16 "strings"
15 "sync" 17 "sync"
16 "time" 18 "time"
17 19
18 "github.com/skia-dev/glog" 20 "github.com/skia-dev/glog"
19 21
20 "go.skia.org/infra/ct/go/util" 22 "go.skia.org/infra/ct/go/util"
21 "go.skia.org/infra/ct/go/worker_scripts/worker_common" 23 "go.skia.org/infra/ct/go/worker_scripts/worker_common"
22 "go.skia.org/infra/go/common" 24 "go.skia.org/infra/go/common"
23 "go.skia.org/infra/go/httputils" 25 "go.skia.org/infra/go/httputils"
24 skutil "go.skia.org/infra/go/util" 26 skutil "go.skia.org/infra/go/util"
25 ) 27 )
26 28
27 const ( 29 const (
28 // The number of goroutines that will run in parallel to download PDFs a nd capture their SKPs. 30 // The number of goroutines that will run in parallel to download PDFs a nd capture their SKPs.
29 WORKER_POOL_SIZE = 10 31 WORKER_POOL_SIZE = 10
30 ) 32 )
31 33
32 var ( 34 var (
33 » workerNum = flag.Int("worker_num", 1, "The number of this CT worker . It will be in the {1..100} range.") 35 » startRange = flag.Int("start_range", 1, "The number this worker will capture SKPs from.")
36 » num = flag.Int("num", 100, "The total number of SKPs to captu re starting from the start_range.")
34 pagesetType = flag.String("pageset_type", util.PAGESET_TYPE_PDF_1m, " The type of pagesets to use for this run. Eg: PDF1m.") 37 pagesetType = flag.String("pageset_type", util.PAGESET_TYPE_PDF_1m, " The type of pagesets to use for this run. Eg: PDF1m.")
35 chromiumBuild = flag.String("chromium_build", "", "The specified chromi um build. This value is used to find the pdfium_test binary from Google Storage and while uploading the PDFs and SKPs to Google Storage.") 38 chromiumBuild = flag.String("chromium_build", "", "The specified chromi um build. This value is used to find the pdfium_test binary from Google Storage and while uploading the PDFs and SKPs to Google Storage.")
36 runID = flag.String("run_id", "", "The unique run id (typically requester + timestamp).") 39 runID = flag.String("run_id", "", "The unique run id (typically requester + timestamp).")
37 targetPlatform = flag.String("target_platform", util.PLATFORM_LINUX, "Th e platform the benchmark will run on (Android / Linux).") 40 targetPlatform = flag.String("target_platform", util.PLATFORM_LINUX, "Th e platform the benchmark will run on (Android / Linux).")
38 ) 41 )
39 42
40 func main() { 43 func main() {
41 defer common.LogPanic() 44 defer common.LogPanic()
42 worker_common.Init() 45 worker_common.Init()
43 if !*worker_common.Local {
44 defer util.CleanTmpDir()
45 }
46 defer util.TimeTrack(time.Now(), "Capturing SKPs from PDFs") 46 defer util.TimeTrack(time.Now(), "Capturing SKPs from PDFs")
47 defer glog.Flush() 47 defer glog.Flush()
48 48
49 // Validate required arguments. 49 // Validate required arguments.
50 if *runID == "" { 50 if *runID == "" {
51 glog.Error("Must specify --run_id") 51 glog.Error("Must specify --run_id")
52 return 52 return
53 } 53 }
54 if *chromiumBuild == "" { 54 if *chromiumBuild == "" {
55 glog.Error("Must specify --chromium_build") 55 glog.Error("Must specify --chromium_build")
56 return 56 return
57 } 57 }
58 58
59 // Instantiate timeout client for downloading PDFs. 59 // Instantiate timeout client for downloading PDFs.
60 httpTimeoutClient := httputils.NewTimeoutClient() 60 httpTimeoutClient := httputils.NewTimeoutClient()
61 // Instantiate GsUtil object. 61 // Instantiate GsUtil object.
62 gs, err := util.NewGsUtil(nil) 62 gs, err := util.NewGsUtil(nil)
63 if err != nil { 63 if err != nil {
64 glog.Error(err) 64 glog.Error(err)
65 return 65 return
66 } 66 }
67 67
68 // Download PDF pagesets if they do not exist locally. 68 // Download PDF pagesets if they do not exist locally.
69 » if err := gs.DownloadWorkerArtifacts(util.PAGESETS_DIR_NAME, *pagesetTyp e, *workerNum); err != nil { 69 » pathToPagesets := filepath.Join(util.PagesetsDir, *pagesetType)
70 » pagesetsToIndex, err := gs.DownloadSwarmingArtifacts(pathToPagesets, uti l.PAGESETS_DIR_NAME, *pagesetType, *startRange, *num)
71 » if err != nil {
70 glog.Error(err) 72 glog.Error(err)
71 return 73 return
72 } 74 }
73 » pathToPagesets := filepath.Join(util.PagesetsDir, *pagesetType) 75 » defer skutil.RemoveAll(pathToPagesets)
74 76
75 // Create the dir that PDFs will be stored in. 77 // Create the dir that PDFs will be stored in.
76 pathToPdfs := filepath.Join(util.PdfsDir, *pagesetType, *chromiumBuild) 78 pathToPdfs := filepath.Join(util.PdfsDir, *pagesetType, *chromiumBuild)
77 // Delete and remake the local PDFs directory. 79 // Delete and remake the local PDFs directory.
78 skutil.RemoveAll(pathToPdfs) 80 skutil.RemoveAll(pathToPdfs)
79 skutil.MkdirAll(pathToPdfs, 0700) 81 skutil.MkdirAll(pathToPdfs, 0700)
80 // Cleanup the dir after the task is done. 82 // Cleanup the dir after the task is done.
81 defer skutil.RemoveAll(pathToPdfs) 83 defer skutil.RemoveAll(pathToPdfs)
82 84
83 // Create the dir that SKPs will be stored in. 85 // Create the dir that SKPs will be stored in.
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
134 for i := 0; i < WORKER_POOL_SIZE; i++ { 136 for i := 0; i < WORKER_POOL_SIZE; i++ {
135 // Increment the WaitGroup counter. 137 // Increment the WaitGroup counter.
136 wg.Add(1) 138 wg.Add(1)
137 139
138 // Create and run a goroutine closure that captures SKPs. 140 // Create and run a goroutine closure that captures SKPs.
139 go func() { 141 go func() {
140 // Decrement the WaitGroup counter when the goroutine co mpletes. 142 // Decrement the WaitGroup counter when the goroutine co mpletes.
141 defer wg.Done() 143 defer wg.Done()
142 144
143 for pagesetName := range pagesetRequests { 145 for pagesetName := range pagesetRequests {
146 index := strconv.Itoa(pagesetsToIndex[path.Join( pathToPagesets, pagesetName)])
144 147
145 // Read the pageset. 148 // Read the pageset.
146 pagesetPath := filepath.Join(pathToPagesets, pag esetName) 149 pagesetPath := filepath.Join(pathToPagesets, pag esetName)
147 decodedPageset, err := util.ReadPageset(pagesetP ath) 150 decodedPageset, err := util.ReadPageset(pagesetP ath)
148 if err != nil { 151 if err != nil {
149 glog.Errorf("Could not read %s: %s", pag esetPath, err) 152 glog.Errorf("Could not read %s: %s", pag esetPath, err)
150 continue 153 continue
151 } 154 }
152 155
153 glog.Infof("===== Processing %s =====", pagesetP ath) 156 glog.Infof("===== Processing %s =====", pagesetP ath)
(...skipping 10 matching lines...) Expand all
164 // Add protocol if it is missing from the URL. 167 // Add protocol if it is missing from the URL.
165 if !(strings.HasPrefix(pdfURL, "http://") || str ings.HasPrefix(pdfURL, "https://")) { 168 if !(strings.HasPrefix(pdfURL, "http://") || str ings.HasPrefix(pdfURL, "https://")) {
166 pdfURL = fmt.Sprintf("http://%s", pdfURL ) 169 pdfURL = fmt.Sprintf("http://%s", pdfURL )
167 } 170 }
168 pdfBase, err := getPdfFileName(pdfURL) 171 pdfBase, err := getPdfFileName(pdfURL)
169 if err != nil { 172 if err != nil {
170 glog.Errorf("Could not parse the URL %s to get a PDF file name: %s", pdfURL, err) 173 glog.Errorf("Could not parse the URL %s to get a PDF file name: %s", pdfURL, err)
171 erroredPDFs = append(erroredPDFs, pdfURL ) 174 erroredPDFs = append(erroredPDFs, pdfURL )
172 continue 175 continue
173 } 176 }
174 » » » » pdfPath := filepath.Join(pathToPdfs, pdfBase) 177 » » » » pdfDirWithIndex := filepath.Join(pathToPdfs, ind ex)
178 » » » » if err := os.MkdirAll(pdfDirWithIndex, 0700); er r != nil {
179 » » » » » glog.Errorf("Could not mkdir %s: %s", pd fDirWithIndex, err)
180 » » » » }
181 » » » » pdfPath := filepath.Join(pdfDirWithIndex, pdfBas e)
175 resp, err := httpTimeoutClient.Get(pdfURL) 182 resp, err := httpTimeoutClient.Get(pdfURL)
176 if err != nil { 183 if err != nil {
177 glog.Errorf("Could not GET %s: %s", pdfU RL, err) 184 glog.Errorf("Could not GET %s: %s", pdfU RL, err)
178 erroredPDFs = append(erroredPDFs, pdfURL ) 185 erroredPDFs = append(erroredPDFs, pdfURL )
179 continue 186 continue
180 } 187 }
181 defer skutil.Close(resp.Body) 188 defer skutil.Close(resp.Body)
182 out, err := os.Create(pdfPath) 189 out, err := os.Create(pdfPath)
183 if err != nil { 190 if err != nil {
184 glog.Errorf("Unable to create file %s: % s", pdfPath, err) 191 glog.Errorf("Unable to create file %s: % s", pdfPath, err)
(...skipping 16 matching lines...) Expand all
201 //pdfiumTestArgs := []string{ 208 //pdfiumTestArgs := []string{
202 // "--skp", pdfPath, 209 // "--skp", pdfPath,
203 //} 210 //}
204 //if err := util.ExecuteCmd(pdfiumLocalPath, pdf iumTestArgs, []string{}, time.Duration(timeoutSecs)*time.Second, nil, nil); err != nil { 211 //if err := util.ExecuteCmd(pdfiumLocalPath, pdf iumTestArgs, []string{}, time.Duration(timeoutSecs)*time.Second, nil, nil); err != nil {
205 // glog.Errorf("Could not run pdfium_test o n %s: %s", pdfPath, err) 212 // glog.Errorf("Could not run pdfium_test o n %s: %s", pdfPath, err)
206 // erroredSKPs = append(erroredSKPs, pdfBas e) 213 // erroredSKPs = append(erroredSKPs, pdfBas e)
207 // continue 214 // continue
208 //} 215 //}
209 // 216 //
210 //// Move generated SKPs into the pathToSKPs dir ectory. 217 //// Move generated SKPs into the pathToSKPs dir ectory.
211 » » » » //skps, err := filepath.Glob(path.Join(pathToPdf s, fmt.Sprintf("%s.*.skp", pdfBase))) 218 » » » » //skps, err := filepath.Glob(path.Join(pdfDirWit hIndex, fmt.Sprintf("%s.*.skp", pdfBase)))
212 //if err != nil { 219 //if err != nil {
213 // glog.Errorf("Found no SKPs for %s: %s", pdfBase, err) 220 // glog.Errorf("Found no SKPs for %s: %s", pdfBase, err)
214 // erroredSKPs = append(erroredSKPs, pdfBas e) 221 // erroredSKPs = append(erroredSKPs, pdfBas e)
215 // continue 222 // continue
216 //} 223 //}
217 //for _, skp := range skps { 224 //for _, skp := range skps {
218 // skpBasename := path.Base(skp) 225 // skpBasename := path.Base(skp)
219 » » » » //» dest := path.Join(pathToSkps, skpBasenam e) 226 » » » » //» destDir := path.Join(pathToSkps, index)
227 » » » » // if err := os.MkdirAll(destDir, 0700); err != nil {
228 » » » » //» » glog.Errorf("Could not mkdir %s: %s", destDir, err)
229 » » » » //» }
230 » » » » //» dest := path.Join(destDir, skpBasename)
220 // if err := os.Rename(skp, dest); err != n il { 231 // if err := os.Rename(skp, dest); err != n il {
221 // glog.Errorf("Could not move %s t o %s: %s", skp, dest, err) 232 // glog.Errorf("Could not move %s t o %s: %s", skp, dest, err)
222 // continue 233 // continue
223 // } 234 // }
224 //} 235 //}
225 } 236 }
226 }() 237 }()
227 } 238 }
228 239
229 // Wait for all spawned goroutines to complete. 240 // Wait for all spawned goroutines to complete.
(...skipping 19 matching lines...) Expand all
249 // glog.Errorf("Could not create any SKP in %s", pathToSkps) 260 // glog.Errorf("Could not create any SKP in %s", pathToSkps)
250 // return 261 // return
251 //} 262 //}
252 // 263 //
253 //// Move and validate all SKP files. 264 //// Move and validate all SKP files.
254 //if err := util.ValidateSKPs(pathToSkps); err != nil { 265 //if err := util.ValidateSKPs(pathToSkps); err != nil {
255 // glog.Error(err) 266 // glog.Error(err)
256 // return 267 // return
257 //} 268 //}
258 269
259 // Write timestamp to the PDFs dir.
260 skutil.LogErr(util.CreateTimestampFile(pathToPdfs))
261 // Write timestamp to the SKPs dir.
262 skutil.LogErr(util.CreateTimestampFile(pathToSkps))
263
264 // Upload PDFs dir to Google Storage. 270 // Upload PDFs dir to Google Storage.
265 » if err := gs.UploadWorkerArtifacts(util.PDFS_DIR_NAME, filepath.Join(*pa gesetType, *chromiumBuild), *workerNum); err != nil { 271 » if err := gs.UploadSwarmingArtifacts(util.PDFS_DIR_NAME, filepath.Join(* pagesetType, *chromiumBuild)); err != nil {
266 glog.Error(err) 272 glog.Error(err)
267 return 273 return
268 } 274 }
269 // Upload SKPs dir to Google Storage. 275 // Upload SKPs dir to Google Storage.
270 » if err := gs.UploadWorkerArtifacts(util.SKPS_DIR_NAME, filepath.Join(*pa gesetType, *chromiumBuild), *workerNum); err != nil { 276 » if err := gs.UploadSwarmingArtifacts(util.SKPS_DIR_NAME, filepath.Join(* pagesetType, *chromiumBuild)); err != nil {
271 glog.Error(err) 277 glog.Error(err)
272 return 278 return
273 } 279 }
274 280
275 // Summarize errors. 281 // Summarize errors.
276 if len(erroredPDFs) > 0 { 282 if len(erroredPDFs) > 0 {
277 glog.Error("The Following URLs could not be downloaded as PDFs:" ) 283 glog.Error("The Following URLs could not be downloaded as PDFs:" )
278 for _, erroredPDF := range erroredPDFs { 284 for _, erroredPDF := range erroredPDFs {
279 glog.Errorf("\t%s", erroredPDF) 285 glog.Errorf("\t%s", erroredPDF)
280 } 286 }
(...skipping 13 matching lines...) Expand all
294 // http://www.ada.gov/emerprepguideprt.pdf will become 300 // http://www.ada.gov/emerprepguideprt.pdf will become
295 // www.ada.gov__emerprepguideprt.pdf 301 // www.ada.gov__emerprepguideprt.pdf
296 func getPdfFileName(u string) (string, error) { 302 func getPdfFileName(u string) (string, error) {
297 p, err := url.Parse(u) 303 p, err := url.Parse(u)
298 if err != nil { 304 if err != nil {
299 return "", err 305 return "", err
300 } 306 }
301 pdfFileName := fmt.Sprintf("%s%s", p.Host, strings.Replace(p.Path, "/", "__", -1)) 307 pdfFileName := fmt.Sprintf("%s%s", p.Host, strings.Replace(p.Path, "/", "__", -1))
302 return pdfFileName, nil 308 return pdfFileName, nil
303 } 309 }
OLDNEW
« no previous file with comments | « ct/go/worker_scripts/capture_skps/main.go ('k') | ct/isolates/capture_skps.isolate » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698