OLD | NEW |
1 // Application that downloads PDFs and then captures SKPs from them. | 1 // Application that downloads PDFs and then captures SKPs from them. |
2 // TODO(rmistry): Capturing and uploading SKPs has been temporarily disabled due | 2 // TODO(rmistry): Capturing and uploading SKPs has been temporarily disabled due |
3 // to the comment in https://bugs.chromium.org/p/skia/issues/detail?id=5183#c34 | 3 // to the comment in https://bugs.chromium.org/p/skia/issues/detail?id=5183#c34 |
4 package main | 4 package main |
5 | 5 |
6 import ( | 6 import ( |
7 "flag" | 7 "flag" |
8 "fmt" | 8 "fmt" |
9 "io" | 9 "io" |
10 "io/ioutil" | 10 "io/ioutil" |
11 "net/url" | 11 "net/url" |
12 "os" | 12 "os" |
| 13 "path" |
13 "path/filepath" | 14 "path/filepath" |
| 15 "strconv" |
14 "strings" | 16 "strings" |
15 "sync" | 17 "sync" |
16 "time" | 18 "time" |
17 | 19 |
18 "github.com/skia-dev/glog" | 20 "github.com/skia-dev/glog" |
19 | 21 |
20 "go.skia.org/infra/ct/go/util" | 22 "go.skia.org/infra/ct/go/util" |
21 "go.skia.org/infra/ct/go/worker_scripts/worker_common" | 23 "go.skia.org/infra/ct/go/worker_scripts/worker_common" |
22 "go.skia.org/infra/go/common" | 24 "go.skia.org/infra/go/common" |
23 "go.skia.org/infra/go/httputils" | 25 "go.skia.org/infra/go/httputils" |
24 skutil "go.skia.org/infra/go/util" | 26 skutil "go.skia.org/infra/go/util" |
25 ) | 27 ) |
26 | 28 |
27 const ( | 29 const ( |
28 // The number of goroutines that will run in parallel to download PDFs a
nd capture their SKPs. | 30 // The number of goroutines that will run in parallel to download PDFs a
nd capture their SKPs. |
29 WORKER_POOL_SIZE = 10 | 31 WORKER_POOL_SIZE = 10 |
30 ) | 32 ) |
31 | 33 |
32 var ( | 34 var ( |
33 » workerNum = flag.Int("worker_num", 1, "The number of this CT worker
. It will be in the {1..100} range.") | 35 » startRange = flag.Int("start_range", 1, "The number this worker will
capture SKPs from.") |
| 36 » num = flag.Int("num", 100, "The total number of SKPs to captu
re starting from the start_range.") |
34 pagesetType = flag.String("pageset_type", util.PAGESET_TYPE_PDF_1m, "
The type of pagesets to use for this run. Eg: PDF1m.") | 37 pagesetType = flag.String("pageset_type", util.PAGESET_TYPE_PDF_1m, "
The type of pagesets to use for this run. Eg: PDF1m.") |
35 chromiumBuild = flag.String("chromium_build", "", "The specified chromi
um build. This value is used to find the pdfium_test binary from Google Storage
and while uploading the PDFs and SKPs to Google Storage.") | 38 chromiumBuild = flag.String("chromium_build", "", "The specified chromi
um build. This value is used to find the pdfium_test binary from Google Storage
and while uploading the PDFs and SKPs to Google Storage.") |
36 runID = flag.String("run_id", "", "The unique run id (typically
requester + timestamp).") | 39 runID = flag.String("run_id", "", "The unique run id (typically
requester + timestamp).") |
37 targetPlatform = flag.String("target_platform", util.PLATFORM_LINUX, "Th
e platform the benchmark will run on (Android / Linux).") | 40 targetPlatform = flag.String("target_platform", util.PLATFORM_LINUX, "Th
e platform the benchmark will run on (Android / Linux).") |
38 ) | 41 ) |
39 | 42 |
40 func main() { | 43 func main() { |
41 defer common.LogPanic() | 44 defer common.LogPanic() |
42 worker_common.Init() | 45 worker_common.Init() |
43 if !*worker_common.Local { | |
44 defer util.CleanTmpDir() | |
45 } | |
46 defer util.TimeTrack(time.Now(), "Capturing SKPs from PDFs") | 46 defer util.TimeTrack(time.Now(), "Capturing SKPs from PDFs") |
47 defer glog.Flush() | 47 defer glog.Flush() |
48 | 48 |
49 // Validate required arguments. | 49 // Validate required arguments. |
50 if *runID == "" { | 50 if *runID == "" { |
51 glog.Error("Must specify --run_id") | 51 glog.Error("Must specify --run_id") |
52 return | 52 return |
53 } | 53 } |
54 if *chromiumBuild == "" { | 54 if *chromiumBuild == "" { |
55 glog.Error("Must specify --chromium_build") | 55 glog.Error("Must specify --chromium_build") |
56 return | 56 return |
57 } | 57 } |
58 | 58 |
59 // Instantiate timeout client for downloading PDFs. | 59 // Instantiate timeout client for downloading PDFs. |
60 httpTimeoutClient := httputils.NewTimeoutClient() | 60 httpTimeoutClient := httputils.NewTimeoutClient() |
61 // Instantiate GsUtil object. | 61 // Instantiate GsUtil object. |
62 gs, err := util.NewGsUtil(nil) | 62 gs, err := util.NewGsUtil(nil) |
63 if err != nil { | 63 if err != nil { |
64 glog.Error(err) | 64 glog.Error(err) |
65 return | 65 return |
66 } | 66 } |
67 | 67 |
68 // Download PDF pagesets if they do not exist locally. | 68 // Download PDF pagesets if they do not exist locally. |
69 » if err := gs.DownloadWorkerArtifacts(util.PAGESETS_DIR_NAME, *pagesetTyp
e, *workerNum); err != nil { | 69 » pathToPagesets := filepath.Join(util.PagesetsDir, *pagesetType) |
| 70 » pagesetsToIndex, err := gs.DownloadSwarmingArtifacts(pathToPagesets, uti
l.PAGESETS_DIR_NAME, *pagesetType, *startRange, *num) |
| 71 » if err != nil { |
70 glog.Error(err) | 72 glog.Error(err) |
71 return | 73 return |
72 } | 74 } |
73 » pathToPagesets := filepath.Join(util.PagesetsDir, *pagesetType) | 75 » defer skutil.RemoveAll(pathToPagesets) |
74 | 76 |
75 // Create the dir that PDFs will be stored in. | 77 // Create the dir that PDFs will be stored in. |
76 pathToPdfs := filepath.Join(util.PdfsDir, *pagesetType, *chromiumBuild) | 78 pathToPdfs := filepath.Join(util.PdfsDir, *pagesetType, *chromiumBuild) |
77 // Delete and remake the local PDFs directory. | 79 // Delete and remake the local PDFs directory. |
78 skutil.RemoveAll(pathToPdfs) | 80 skutil.RemoveAll(pathToPdfs) |
79 skutil.MkdirAll(pathToPdfs, 0700) | 81 skutil.MkdirAll(pathToPdfs, 0700) |
80 // Cleanup the dir after the task is done. | 82 // Cleanup the dir after the task is done. |
81 defer skutil.RemoveAll(pathToPdfs) | 83 defer skutil.RemoveAll(pathToPdfs) |
82 | 84 |
83 // Create the dir that SKPs will be stored in. | 85 // Create the dir that SKPs will be stored in. |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
134 for i := 0; i < WORKER_POOL_SIZE; i++ { | 136 for i := 0; i < WORKER_POOL_SIZE; i++ { |
135 // Increment the WaitGroup counter. | 137 // Increment the WaitGroup counter. |
136 wg.Add(1) | 138 wg.Add(1) |
137 | 139 |
138 // Create and run a goroutine closure that captures SKPs. | 140 // Create and run a goroutine closure that captures SKPs. |
139 go func() { | 141 go func() { |
140 // Decrement the WaitGroup counter when the goroutine co
mpletes. | 142 // Decrement the WaitGroup counter when the goroutine co
mpletes. |
141 defer wg.Done() | 143 defer wg.Done() |
142 | 144 |
143 for pagesetName := range pagesetRequests { | 145 for pagesetName := range pagesetRequests { |
| 146 index := strconv.Itoa(pagesetsToIndex[path.Join(
pathToPagesets, pagesetName)]) |
144 | 147 |
145 // Read the pageset. | 148 // Read the pageset. |
146 pagesetPath := filepath.Join(pathToPagesets, pag
esetName) | 149 pagesetPath := filepath.Join(pathToPagesets, pag
esetName) |
147 decodedPageset, err := util.ReadPageset(pagesetP
ath) | 150 decodedPageset, err := util.ReadPageset(pagesetP
ath) |
148 if err != nil { | 151 if err != nil { |
149 glog.Errorf("Could not read %s: %s", pag
esetPath, err) | 152 glog.Errorf("Could not read %s: %s", pag
esetPath, err) |
150 continue | 153 continue |
151 } | 154 } |
152 | 155 |
153 glog.Infof("===== Processing %s =====", pagesetP
ath) | 156 glog.Infof("===== Processing %s =====", pagesetP
ath) |
(...skipping 10 matching lines...) Expand all Loading... |
164 // Add protocol if it is missing from the URL. | 167 // Add protocol if it is missing from the URL. |
165 if !(strings.HasPrefix(pdfURL, "http://") || str
ings.HasPrefix(pdfURL, "https://")) { | 168 if !(strings.HasPrefix(pdfURL, "http://") || str
ings.HasPrefix(pdfURL, "https://")) { |
166 pdfURL = fmt.Sprintf("http://%s", pdfURL
) | 169 pdfURL = fmt.Sprintf("http://%s", pdfURL
) |
167 } | 170 } |
168 pdfBase, err := getPdfFileName(pdfURL) | 171 pdfBase, err := getPdfFileName(pdfURL) |
169 if err != nil { | 172 if err != nil { |
170 glog.Errorf("Could not parse the URL %s
to get a PDF file name: %s", pdfURL, err) | 173 glog.Errorf("Could not parse the URL %s
to get a PDF file name: %s", pdfURL, err) |
171 erroredPDFs = append(erroredPDFs, pdfURL
) | 174 erroredPDFs = append(erroredPDFs, pdfURL
) |
172 continue | 175 continue |
173 } | 176 } |
174 » » » » pdfPath := filepath.Join(pathToPdfs, pdfBase) | 177 » » » » pdfDirWithIndex := filepath.Join(pathToPdfs, ind
ex) |
| 178 » » » » if err := os.MkdirAll(pdfDirWithIndex, 0700); er
r != nil { |
| 179 » » » » » glog.Errorf("Could not mkdir %s: %s", pd
fDirWithIndex, err) |
| 180 » » » » } |
| 181 » » » » pdfPath := filepath.Join(pdfDirWithIndex, pdfBas
e) |
175 resp, err := httpTimeoutClient.Get(pdfURL) | 182 resp, err := httpTimeoutClient.Get(pdfURL) |
176 if err != nil { | 183 if err != nil { |
177 glog.Errorf("Could not GET %s: %s", pdfU
RL, err) | 184 glog.Errorf("Could not GET %s: %s", pdfU
RL, err) |
178 erroredPDFs = append(erroredPDFs, pdfURL
) | 185 erroredPDFs = append(erroredPDFs, pdfURL
) |
179 continue | 186 continue |
180 } | 187 } |
181 defer skutil.Close(resp.Body) | 188 defer skutil.Close(resp.Body) |
182 out, err := os.Create(pdfPath) | 189 out, err := os.Create(pdfPath) |
183 if err != nil { | 190 if err != nil { |
184 glog.Errorf("Unable to create file %s: %
s", pdfPath, err) | 191 glog.Errorf("Unable to create file %s: %
s", pdfPath, err) |
(...skipping 16 matching lines...) Expand all Loading... |
201 //pdfiumTestArgs := []string{ | 208 //pdfiumTestArgs := []string{ |
202 // "--skp", pdfPath, | 209 // "--skp", pdfPath, |
203 //} | 210 //} |
204 //if err := util.ExecuteCmd(pdfiumLocalPath, pdf
iumTestArgs, []string{}, time.Duration(timeoutSecs)*time.Second, nil, nil); err
!= nil { | 211 //if err := util.ExecuteCmd(pdfiumLocalPath, pdf
iumTestArgs, []string{}, time.Duration(timeoutSecs)*time.Second, nil, nil); err
!= nil { |
205 // glog.Errorf("Could not run pdfium_test o
n %s: %s", pdfPath, err) | 212 // glog.Errorf("Could not run pdfium_test o
n %s: %s", pdfPath, err) |
206 // erroredSKPs = append(erroredSKPs, pdfBas
e) | 213 // erroredSKPs = append(erroredSKPs, pdfBas
e) |
207 // continue | 214 // continue |
208 //} | 215 //} |
209 // | 216 // |
210 //// Move generated SKPs into the pathToSKPs dir
ectory. | 217 //// Move generated SKPs into the pathToSKPs dir
ectory. |
211 » » » » //skps, err := filepath.Glob(path.Join(pathToPdf
s, fmt.Sprintf("%s.*.skp", pdfBase))) | 218 » » » » //skps, err := filepath.Glob(path.Join(pdfDirWit
hIndex, fmt.Sprintf("%s.*.skp", pdfBase))) |
212 //if err != nil { | 219 //if err != nil { |
213 // glog.Errorf("Found no SKPs for %s: %s",
pdfBase, err) | 220 // glog.Errorf("Found no SKPs for %s: %s",
pdfBase, err) |
214 // erroredSKPs = append(erroredSKPs, pdfBas
e) | 221 // erroredSKPs = append(erroredSKPs, pdfBas
e) |
215 // continue | 222 // continue |
216 //} | 223 //} |
217 //for _, skp := range skps { | 224 //for _, skp := range skps { |
218 // skpBasename := path.Base(skp) | 225 // skpBasename := path.Base(skp) |
219 » » » » //» dest := path.Join(pathToSkps, skpBasenam
e) | 226 » » » » //» destDir := path.Join(pathToSkps, index) |
| 227 » » » » // if err := os.MkdirAll(destDir, 0700); err !=
nil { |
| 228 » » » » //» » glog.Errorf("Could not mkdir %s:
%s", destDir, err) |
| 229 » » » » //» } |
| 230 » » » » //» dest := path.Join(destDir, skpBasename) |
220 // if err := os.Rename(skp, dest); err != n
il { | 231 // if err := os.Rename(skp, dest); err != n
il { |
221 // glog.Errorf("Could not move %s t
o %s: %s", skp, dest, err) | 232 // glog.Errorf("Could not move %s t
o %s: %s", skp, dest, err) |
222 // continue | 233 // continue |
223 // } | 234 // } |
224 //} | 235 //} |
225 } | 236 } |
226 }() | 237 }() |
227 } | 238 } |
228 | 239 |
229 // Wait for all spawned goroutines to complete. | 240 // Wait for all spawned goroutines to complete. |
(...skipping 19 matching lines...) Expand all Loading... |
249 // glog.Errorf("Could not create any SKP in %s", pathToSkps) | 260 // glog.Errorf("Could not create any SKP in %s", pathToSkps) |
250 // return | 261 // return |
251 //} | 262 //} |
252 // | 263 // |
253 //// Move and validate all SKP files. | 264 //// Move and validate all SKP files. |
254 //if err := util.ValidateSKPs(pathToSkps); err != nil { | 265 //if err := util.ValidateSKPs(pathToSkps); err != nil { |
255 // glog.Error(err) | 266 // glog.Error(err) |
256 // return | 267 // return |
257 //} | 268 //} |
258 | 269 |
259 // Write timestamp to the PDFs dir. | |
260 skutil.LogErr(util.CreateTimestampFile(pathToPdfs)) | |
261 // Write timestamp to the SKPs dir. | |
262 skutil.LogErr(util.CreateTimestampFile(pathToSkps)) | |
263 | |
264 // Upload PDFs dir to Google Storage. | 270 // Upload PDFs dir to Google Storage. |
265 » if err := gs.UploadWorkerArtifacts(util.PDFS_DIR_NAME, filepath.Join(*pa
gesetType, *chromiumBuild), *workerNum); err != nil { | 271 » if err := gs.UploadSwarmingArtifacts(util.PDFS_DIR_NAME, filepath.Join(*
pagesetType, *chromiumBuild)); err != nil { |
266 glog.Error(err) | 272 glog.Error(err) |
267 return | 273 return |
268 } | 274 } |
269 // Upload SKPs dir to Google Storage. | 275 // Upload SKPs dir to Google Storage. |
270 » if err := gs.UploadWorkerArtifacts(util.SKPS_DIR_NAME, filepath.Join(*pa
gesetType, *chromiumBuild), *workerNum); err != nil { | 276 » if err := gs.UploadSwarmingArtifacts(util.SKPS_DIR_NAME, filepath.Join(*
pagesetType, *chromiumBuild)); err != nil { |
271 glog.Error(err) | 277 glog.Error(err) |
272 return | 278 return |
273 } | 279 } |
274 | 280 |
275 // Summarize errors. | 281 // Summarize errors. |
276 if len(erroredPDFs) > 0 { | 282 if len(erroredPDFs) > 0 { |
277 glog.Error("The Following URLs could not be downloaded as PDFs:"
) | 283 glog.Error("The Following URLs could not be downloaded as PDFs:"
) |
278 for _, erroredPDF := range erroredPDFs { | 284 for _, erroredPDF := range erroredPDFs { |
279 glog.Errorf("\t%s", erroredPDF) | 285 glog.Errorf("\t%s", erroredPDF) |
280 } | 286 } |
(...skipping 13 matching lines...) Expand all Loading... |
294 // http://www.ada.gov/emerprepguideprt.pdf will become | 300 // http://www.ada.gov/emerprepguideprt.pdf will become |
295 // www.ada.gov__emerprepguideprt.pdf | 301 // www.ada.gov__emerprepguideprt.pdf |
296 func getPdfFileName(u string) (string, error) { | 302 func getPdfFileName(u string) (string, error) { |
297 p, err := url.Parse(u) | 303 p, err := url.Parse(u) |
298 if err != nil { | 304 if err != nil { |
299 return "", err | 305 return "", err |
300 } | 306 } |
301 pdfFileName := fmt.Sprintf("%s%s", p.Host, strings.Replace(p.Path, "/",
"__", -1)) | 307 pdfFileName := fmt.Sprintf("%s%s", p.Host, strings.Replace(p.Path, "/",
"__", -1)) |
302 return pdfFileName, nil | 308 return pdfFileName, nil |
303 } | 309 } |
OLD | NEW |