| OLD | NEW |
| 1 // Application that downloads PDFs and then captures SKPs from them. | 1 // Application that downloads PDFs and then captures SKPs from them. |
| 2 // TODO(rmistry): Capturing and uploading SKPs has been temporarily disabled due | 2 // TODO(rmistry): Capturing and uploading SKPs has been temporarily disabled due |
| 3 // to the comment in https://bugs.chromium.org/p/skia/issues/detail?id=5183#c34 | 3 // to the comment in https://bugs.chromium.org/p/skia/issues/detail?id=5183#c34 |
| 4 package main | 4 package main |
| 5 | 5 |
| 6 import ( | 6 import ( |
| 7 "flag" | 7 "flag" |
| 8 "fmt" | 8 "fmt" |
| 9 "io" | 9 "io" |
| 10 "io/ioutil" | 10 "io/ioutil" |
| 11 "net/url" | 11 "net/url" |
| 12 "os" | 12 "os" |
| 13 "path" |
| 13 "path/filepath" | 14 "path/filepath" |
| 15 "strconv" |
| 14 "strings" | 16 "strings" |
| 15 "sync" | 17 "sync" |
| 16 "time" | 18 "time" |
| 17 | 19 |
| 18 "github.com/skia-dev/glog" | 20 "github.com/skia-dev/glog" |
| 19 | 21 |
| 20 "go.skia.org/infra/ct/go/util" | 22 "go.skia.org/infra/ct/go/util" |
| 21 "go.skia.org/infra/ct/go/worker_scripts/worker_common" | 23 "go.skia.org/infra/ct/go/worker_scripts/worker_common" |
| 22 "go.skia.org/infra/go/common" | 24 "go.skia.org/infra/go/common" |
| 23 "go.skia.org/infra/go/httputils" | 25 "go.skia.org/infra/go/httputils" |
| 24 skutil "go.skia.org/infra/go/util" | 26 skutil "go.skia.org/infra/go/util" |
| 25 ) | 27 ) |
| 26 | 28 |
| 27 const ( | 29 const ( |
| 28 // The number of goroutines that will run in parallel to download PDFs a
nd capture their SKPs. | 30 // The number of goroutines that will run in parallel to download PDFs a
nd capture their SKPs. |
| 29 WORKER_POOL_SIZE = 10 | 31 WORKER_POOL_SIZE = 10 |
| 30 ) | 32 ) |
| 31 | 33 |
| 32 var ( | 34 var ( |
| 33 » workerNum = flag.Int("worker_num", 1, "The number of this CT worker
. It will be in the {1..100} range.") | 35 » startRange = flag.Int("start_range", 1, "The number this worker will
capture SKPs from.") |
| 36 » num = flag.Int("num", 100, "The total number of SKPs to captu
re starting from the start_range.") |
| 34 pagesetType = flag.String("pageset_type", util.PAGESET_TYPE_PDF_1m, "
The type of pagesets to use for this run. Eg: PDF1m.") | 37 pagesetType = flag.String("pageset_type", util.PAGESET_TYPE_PDF_1m, "
The type of pagesets to use for this run. Eg: PDF1m.") |
| 35 chromiumBuild = flag.String("chromium_build", "", "The specified chromi
um build. This value is used to find the pdfium_test binary from Google Storage
and while uploading the PDFs and SKPs to Google Storage.") | 38 chromiumBuild = flag.String("chromium_build", "", "The specified chromi
um build. This value is used to find the pdfium_test binary from Google Storage
and while uploading the PDFs and SKPs to Google Storage.") |
| 36 runID = flag.String("run_id", "", "The unique run id (typically
requester + timestamp).") | 39 runID = flag.String("run_id", "", "The unique run id (typically
requester + timestamp).") |
| 37 targetPlatform = flag.String("target_platform", util.PLATFORM_LINUX, "Th
e platform the benchmark will run on (Android / Linux).") | 40 targetPlatform = flag.String("target_platform", util.PLATFORM_LINUX, "Th
e platform the benchmark will run on (Android / Linux).") |
| 38 ) | 41 ) |
| 39 | 42 |
| 40 func main() { | 43 func main() { |
| 41 defer common.LogPanic() | 44 defer common.LogPanic() |
| 42 worker_common.Init() | 45 worker_common.Init() |
| 43 if !*worker_common.Local { | |
| 44 defer util.CleanTmpDir() | |
| 45 } | |
| 46 defer util.TimeTrack(time.Now(), "Capturing SKPs from PDFs") | 46 defer util.TimeTrack(time.Now(), "Capturing SKPs from PDFs") |
| 47 defer glog.Flush() | 47 defer glog.Flush() |
| 48 | 48 |
| 49 // Validate required arguments. | 49 // Validate required arguments. |
| 50 if *runID == "" { | 50 if *runID == "" { |
| 51 glog.Error("Must specify --run_id") | 51 glog.Error("Must specify --run_id") |
| 52 return | 52 return |
| 53 } | 53 } |
| 54 if *chromiumBuild == "" { | 54 if *chromiumBuild == "" { |
| 55 glog.Error("Must specify --chromium_build") | 55 glog.Error("Must specify --chromium_build") |
| 56 return | 56 return |
| 57 } | 57 } |
| 58 | 58 |
| 59 // Instantiate timeout client for downloading PDFs. | 59 // Instantiate timeout client for downloading PDFs. |
| 60 httpTimeoutClient := httputils.NewTimeoutClient() | 60 httpTimeoutClient := httputils.NewTimeoutClient() |
| 61 // Instantiate GsUtil object. | 61 // Instantiate GsUtil object. |
| 62 gs, err := util.NewGsUtil(nil) | 62 gs, err := util.NewGsUtil(nil) |
| 63 if err != nil { | 63 if err != nil { |
| 64 glog.Error(err) | 64 glog.Error(err) |
| 65 return | 65 return |
| 66 } | 66 } |
| 67 | 67 |
| 68 // Download PDF pagesets if they do not exist locally. | 68 // Download PDF pagesets if they do not exist locally. |
| 69 » if err := gs.DownloadWorkerArtifacts(util.PAGESETS_DIR_NAME, *pagesetTyp
e, *workerNum); err != nil { | 69 » pathToPagesets := filepath.Join(util.PagesetsDir, *pagesetType) |
| 70 » pagesetsToIndex, err := gs.DownloadSwarmingArtifacts(pathToPagesets, uti
l.PAGESETS_DIR_NAME, *pagesetType, *startRange, *num) |
| 71 » if err != nil { |
| 70 glog.Error(err) | 72 glog.Error(err) |
| 71 return | 73 return |
| 72 } | 74 } |
| 73 » pathToPagesets := filepath.Join(util.PagesetsDir, *pagesetType) | 75 » defer skutil.RemoveAll(pathToPagesets) |
| 74 | 76 |
| 75 // Create the dir that PDFs will be stored in. | 77 // Create the dir that PDFs will be stored in. |
| 76 pathToPdfs := filepath.Join(util.PdfsDir, *pagesetType, *chromiumBuild) | 78 pathToPdfs := filepath.Join(util.PdfsDir, *pagesetType, *chromiumBuild) |
| 77 // Delete and remake the local PDFs directory. | 79 // Delete and remake the local PDFs directory. |
| 78 skutil.RemoveAll(pathToPdfs) | 80 skutil.RemoveAll(pathToPdfs) |
| 79 skutil.MkdirAll(pathToPdfs, 0700) | 81 skutil.MkdirAll(pathToPdfs, 0700) |
| 80 // Cleanup the dir after the task is done. | 82 // Cleanup the dir after the task is done. |
| 81 defer skutil.RemoveAll(pathToPdfs) | 83 defer skutil.RemoveAll(pathToPdfs) |
| 82 | 84 |
| 83 // Create the dir that SKPs will be stored in. | 85 // Create the dir that SKPs will be stored in. |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 134 for i := 0; i < WORKER_POOL_SIZE; i++ { | 136 for i := 0; i < WORKER_POOL_SIZE; i++ { |
| 135 // Increment the WaitGroup counter. | 137 // Increment the WaitGroup counter. |
| 136 wg.Add(1) | 138 wg.Add(1) |
| 137 | 139 |
| 138 // Create and run a goroutine closure that captures SKPs. | 140 // Create and run a goroutine closure that captures SKPs. |
| 139 go func() { | 141 go func() { |
| 140 // Decrement the WaitGroup counter when the goroutine co
mpletes. | 142 // Decrement the WaitGroup counter when the goroutine co
mpletes. |
| 141 defer wg.Done() | 143 defer wg.Done() |
| 142 | 144 |
| 143 for pagesetName := range pagesetRequests { | 145 for pagesetName := range pagesetRequests { |
| 146 index := strconv.Itoa(pagesetsToIndex[path.Join(
pathToPagesets, pagesetName)]) |
| 144 | 147 |
| 145 // Read the pageset. | 148 // Read the pageset. |
| 146 pagesetPath := filepath.Join(pathToPagesets, pag
esetName) | 149 pagesetPath := filepath.Join(pathToPagesets, pag
esetName) |
| 147 decodedPageset, err := util.ReadPageset(pagesetP
ath) | 150 decodedPageset, err := util.ReadPageset(pagesetP
ath) |
| 148 if err != nil { | 151 if err != nil { |
| 149 glog.Errorf("Could not read %s: %s", pag
esetPath, err) | 152 glog.Errorf("Could not read %s: %s", pag
esetPath, err) |
| 150 continue | 153 continue |
| 151 } | 154 } |
| 152 | 155 |
| 153 glog.Infof("===== Processing %s =====", pagesetP
ath) | 156 glog.Infof("===== Processing %s =====", pagesetP
ath) |
| (...skipping 10 matching lines...) Expand all Loading... |
| 164 // Add protocol if it is missing from the URL. | 167 // Add protocol if it is missing from the URL. |
| 165 if !(strings.HasPrefix(pdfURL, "http://") || str
ings.HasPrefix(pdfURL, "https://")) { | 168 if !(strings.HasPrefix(pdfURL, "http://") || str
ings.HasPrefix(pdfURL, "https://")) { |
| 166 pdfURL = fmt.Sprintf("http://%s", pdfURL
) | 169 pdfURL = fmt.Sprintf("http://%s", pdfURL
) |
| 167 } | 170 } |
| 168 pdfBase, err := getPdfFileName(pdfURL) | 171 pdfBase, err := getPdfFileName(pdfURL) |
| 169 if err != nil { | 172 if err != nil { |
| 170 glog.Errorf("Could not parse the URL %s
to get a PDF file name: %s", pdfURL, err) | 173 glog.Errorf("Could not parse the URL %s
to get a PDF file name: %s", pdfURL, err) |
| 171 erroredPDFs = append(erroredPDFs, pdfURL
) | 174 erroredPDFs = append(erroredPDFs, pdfURL
) |
| 172 continue | 175 continue |
| 173 } | 176 } |
| 174 » » » » pdfPath := filepath.Join(pathToPdfs, pdfBase) | 177 » » » » pdfDirWithIndex := filepath.Join(pathToPdfs, ind
ex) |
| 178 » » » » if err := os.MkdirAll(pdfDirWithIndex, 0700); er
r != nil { |
| 179 » » » » » glog.Errorf("Could not mkdir %s: %s", pd
fDirWithIndex, err) |
| 180 » » » » } |
| 181 » » » » pdfPath := filepath.Join(pdfDirWithIndex, pdfBas
e) |
| 175 resp, err := httpTimeoutClient.Get(pdfURL) | 182 resp, err := httpTimeoutClient.Get(pdfURL) |
| 176 if err != nil { | 183 if err != nil { |
| 177 glog.Errorf("Could not GET %s: %s", pdfU
RL, err) | 184 glog.Errorf("Could not GET %s: %s", pdfU
RL, err) |
| 178 erroredPDFs = append(erroredPDFs, pdfURL
) | 185 erroredPDFs = append(erroredPDFs, pdfURL
) |
| 179 continue | 186 continue |
| 180 } | 187 } |
| 181 defer skutil.Close(resp.Body) | 188 defer skutil.Close(resp.Body) |
| 182 out, err := os.Create(pdfPath) | 189 out, err := os.Create(pdfPath) |
| 183 if err != nil { | 190 if err != nil { |
| 184 glog.Errorf("Unable to create file %s: %
s", pdfPath, err) | 191 glog.Errorf("Unable to create file %s: %
s", pdfPath, err) |
| (...skipping 16 matching lines...) Expand all Loading... |
| 201 //pdfiumTestArgs := []string{ | 208 //pdfiumTestArgs := []string{ |
| 202 // "--skp", pdfPath, | 209 // "--skp", pdfPath, |
| 203 //} | 210 //} |
| 204 //if err := util.ExecuteCmd(pdfiumLocalPath, pdf
iumTestArgs, []string{}, time.Duration(timeoutSecs)*time.Second, nil, nil); err
!= nil { | 211 //if err := util.ExecuteCmd(pdfiumLocalPath, pdf
iumTestArgs, []string{}, time.Duration(timeoutSecs)*time.Second, nil, nil); err
!= nil { |
| 205 // glog.Errorf("Could not run pdfium_test o
n %s: %s", pdfPath, err) | 212 // glog.Errorf("Could not run pdfium_test o
n %s: %s", pdfPath, err) |
| 206 // erroredSKPs = append(erroredSKPs, pdfBas
e) | 213 // erroredSKPs = append(erroredSKPs, pdfBas
e) |
| 207 // continue | 214 // continue |
| 208 //} | 215 //} |
| 209 // | 216 // |
| 210 //// Move generated SKPs into the pathToSKPs dir
ectory. | 217 //// Move generated SKPs into the pathToSKPs dir
ectory. |
| 211 » » » » //skps, err := filepath.Glob(path.Join(pathToPdf
s, fmt.Sprintf("%s.*.skp", pdfBase))) | 218 » » » » //skps, err := filepath.Glob(path.Join(pdfDirWit
hIndex, fmt.Sprintf("%s.*.skp", pdfBase))) |
| 212 //if err != nil { | 219 //if err != nil { |
| 213 // glog.Errorf("Found no SKPs for %s: %s",
pdfBase, err) | 220 // glog.Errorf("Found no SKPs for %s: %s",
pdfBase, err) |
| 214 // erroredSKPs = append(erroredSKPs, pdfBas
e) | 221 // erroredSKPs = append(erroredSKPs, pdfBas
e) |
| 215 // continue | 222 // continue |
| 216 //} | 223 //} |
| 217 //for _, skp := range skps { | 224 //for _, skp := range skps { |
| 218 // skpBasename := path.Base(skp) | 225 // skpBasename := path.Base(skp) |
| 219 » » » » //» dest := path.Join(pathToSkps, skpBasenam
e) | 226 » » » » //» destDir := path.Join(pathToSkps, index) |
| 227 » » » » // if err := os.MkdirAll(destDir, 0700); err !=
nil { |
| 228 » » » » //» » glog.Errorf("Could not mkdir %s:
%s", destDir, err) |
| 229 » » » » //» } |
| 230 » » » » //» dest := path.Join(destDir, skpBasename) |
| 220 // if err := os.Rename(skp, dest); err != n
il { | 231 // if err := os.Rename(skp, dest); err != n
il { |
| 221 // glog.Errorf("Could not move %s t
o %s: %s", skp, dest, err) | 232 // glog.Errorf("Could not move %s t
o %s: %s", skp, dest, err) |
| 222 // continue | 233 // continue |
| 223 // } | 234 // } |
| 224 //} | 235 //} |
| 225 } | 236 } |
| 226 }() | 237 }() |
| 227 } | 238 } |
| 228 | 239 |
| 229 // Wait for all spawned goroutines to complete. | 240 // Wait for all spawned goroutines to complete. |
| (...skipping 19 matching lines...) Expand all Loading... |
| 249 // glog.Errorf("Could not create any SKP in %s", pathToSkps) | 260 // glog.Errorf("Could not create any SKP in %s", pathToSkps) |
| 250 // return | 261 // return |
| 251 //} | 262 //} |
| 252 // | 263 // |
| 253 //// Move and validate all SKP files. | 264 //// Move and validate all SKP files. |
| 254 //if err := util.ValidateSKPs(pathToSkps); err != nil { | 265 //if err := util.ValidateSKPs(pathToSkps); err != nil { |
| 255 // glog.Error(err) | 266 // glog.Error(err) |
| 256 // return | 267 // return |
| 257 //} | 268 //} |
| 258 | 269 |
| 259 // Write timestamp to the PDFs dir. | |
| 260 skutil.LogErr(util.CreateTimestampFile(pathToPdfs)) | |
| 261 // Write timestamp to the SKPs dir. | |
| 262 skutil.LogErr(util.CreateTimestampFile(pathToSkps)) | |
| 263 | |
| 264 // Upload PDFs dir to Google Storage. | 270 // Upload PDFs dir to Google Storage. |
| 265 » if err := gs.UploadWorkerArtifacts(util.PDFS_DIR_NAME, filepath.Join(*pa
gesetType, *chromiumBuild), *workerNum); err != nil { | 271 » if err := gs.UploadSwarmingArtifacts(util.PDFS_DIR_NAME, filepath.Join(*
pagesetType, *chromiumBuild)); err != nil { |
| 266 glog.Error(err) | 272 glog.Error(err) |
| 267 return | 273 return |
| 268 } | 274 } |
| 269 // Upload SKPs dir to Google Storage. | 275 // Upload SKPs dir to Google Storage. |
| 270 » if err := gs.UploadWorkerArtifacts(util.SKPS_DIR_NAME, filepath.Join(*pa
gesetType, *chromiumBuild), *workerNum); err != nil { | 276 » if err := gs.UploadSwarmingArtifacts(util.SKPS_DIR_NAME, filepath.Join(*
pagesetType, *chromiumBuild)); err != nil { |
| 271 glog.Error(err) | 277 glog.Error(err) |
| 272 return | 278 return |
| 273 } | 279 } |
| 274 | 280 |
| 275 // Summarize errors. | 281 // Summarize errors. |
| 276 if len(erroredPDFs) > 0 { | 282 if len(erroredPDFs) > 0 { |
| 277 glog.Error("The Following URLs could not be downloaded as PDFs:"
) | 283 glog.Error("The Following URLs could not be downloaded as PDFs:"
) |
| 278 for _, erroredPDF := range erroredPDFs { | 284 for _, erroredPDF := range erroredPDFs { |
| 279 glog.Errorf("\t%s", erroredPDF) | 285 glog.Errorf("\t%s", erroredPDF) |
| 280 } | 286 } |
| (...skipping 13 matching lines...) Expand all Loading... |
| 294 // http://www.ada.gov/emerprepguideprt.pdf will become | 300 // http://www.ada.gov/emerprepguideprt.pdf will become |
| 295 // www.ada.gov__emerprepguideprt.pdf | 301 // www.ada.gov__emerprepguideprt.pdf |
| 296 func getPdfFileName(u string) (string, error) { | 302 func getPdfFileName(u string) (string, error) { |
| 297 p, err := url.Parse(u) | 303 p, err := url.Parse(u) |
| 298 if err != nil { | 304 if err != nil { |
| 299 return "", err | 305 return "", err |
| 300 } | 306 } |
| 301 pdfFileName := fmt.Sprintf("%s%s", p.Host, strings.Replace(p.Path, "/",
"__", -1)) | 307 pdfFileName := fmt.Sprintf("%s%s", p.Host, strings.Replace(p.Path, "/",
"__", -1)) |
| 302 return pdfFileName, nil | 308 return pdfFileName, nil |
| 303 } | 309 } |
| OLD | NEW |