Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // pdfxform is a server that rasterizes PDF documents into PNG | |
| 2 package main | |
| 3 | |
| 4 import ( | |
| 5 "bytes" | |
| 6 "crypto/md5" | |
| 7 "encoding/hex" | |
| 8 "encoding/json" | |
| 9 "flag" | |
| 10 "fmt" | |
| 11 "io" | |
| 12 "io/ioutil" | |
| 13 "net/http" | |
| 14 "os" | |
| 15 "os/user" | |
| 16 "path" | |
| 17 "path/filepath" | |
| 18 "runtime" | |
| 19 "strings" | |
| 20 "time" | |
| 21 | |
| 22 "github.com/skia-dev/glog" | |
| 23 "go.skia.org/infra/go/auth" | |
| 24 "go.skia.org/infra/go/gs" | |
| 25 "go.skia.org/infra/go/pdf" | |
| 26 "go.skia.org/infra/go/util" | |
| 27 "go.skia.org/infra/perf/go/goldingester" | |
| 28 "google.golang.org/api/storage/v1" | |
| 29 ) | |
| 30 | |
| 31 //////////////////////////////////////////////////////////////////////////////// | |
| 32 | |
| 33 const ( | |
| 34 pngExt = "png" | |
|
jcgregorio
2015/06/26 19:21:05
PNG_EXT
hal.canary
2015/06/26 22:10:52
Done.
| |
| 35 pdfExt = "pdf" | |
|
jcgregorio
2015/06/26 19:21:06
PDF_EXT
hal.canary
2015/06/26 22:10:52
Done.
| |
| 36 ) | |
| 37 | |
| 38 //////////////////////////////////////////////////////////////////////////////// | |
| 39 | |
| 40 // md5OfFile calculates the MD5 checksum of a file. | |
| 41 func md5OfFile(path string) (string, error) { | |
| 42 md5 := md5.New() | |
| 43 f, err := os.Open(path) | |
| 44 if err != nil { | |
| 45 return "", err | |
| 46 } | |
| 47 defer util.Close(f) | |
| 48 if _, err = io.Copy(md5, f); err != nil { | |
| 49 return "", err | |
| 50 } | |
| 51 return hex.EncodeToString(md5.Sum(nil)), nil | |
| 52 } | |
| 53 | |
| 54 //////////////////////////////////////////////////////////////////////////////// | |
| 55 | |
| 56 // removeIf is like util.Remove, but logs no error if the file does not exist. | |
| 57 func removeIf(path string) { | |
| 58 if err := os.Remove(path); err != nil { | |
| 59 if !os.IsNotExist(err) { | |
| 60 glog.Errorf("Failed to Remove(%s): %v", path, err) | |
| 61 } | |
| 62 } | |
| 63 } | |
| 64 | |
| 65 // isPDF returns true if the path appears to point to a PDF file. | |
| 66 func isPDF(path string) bool { | |
| 67 f, err := os.Open(path) | |
| 68 if err != nil { | |
| 69 return false | |
| 70 } | |
| 71 defer util.Close(f) | |
| 72 var buffer [4]byte | |
| 73 if n, err := f.Read(buffer[:]); n != 4 || err != nil { | |
| 74 return false | |
| 75 } | |
| 76 var magic = [4]byte{'%', 'P', 'D', 'F'} | |
| 77 return bytes.Equal(magic[:], buffer[:]) | |
| 78 } | |
| 79 | |
| 80 // writeTo opens a file and dumps the contents of the reader into it. | |
| 81 func writeTo(path string, reader *io.ReadCloser) error { | |
| 82 defer util.Close(*reader) | |
| 83 file, err := os.Create(path) | |
| 84 if err == nil { | |
| 85 _, err = io.Copy(file, *reader) | |
| 86 } | |
| 87 return err | |
| 88 } | |
| 89 | |
| 90 // assertNil logs the err and exits if it is not nil. | |
| 91 func assertNil(err error) { | |
| 92 if err != nil { | |
| 93 errMsg := "" | |
| 94 if _, fileName, line, ok := runtime.Caller(1); ok { | |
| 95 errMsg = fmt.Sprintf("-called from: %s:%d", fileName, li ne) | |
| 96 } | |
| 97 glog.Fatalf("Unexpected error %s: %s", errMsg, err) | |
| 98 } | |
| 99 } | |
| 100 | |
| 101 //////////////////////////////////////////////////////////////////////////////// | |
| 102 | |
| 103 // storageClient struct is used for uploading to cloud storage | |
| 104 type storageClient struct { | |
| 105 httpClient *http.Client | |
| 106 storageService *storage.Service | |
| 107 } | |
| 108 | |
| 109 // getClient returns an authorized storage.Service and the | |
| 110 // corresponding http.Client; if anything goes wrong, it logs a fatal | |
| 111 // error. | |
| 112 func getClient(cacheFilePath string) storageClient { | |
| 113 config := auth.OAuthConfig(cacheFilePath, auth.SCOPE_FULL_CONTROL) | |
|
jcgregorio
2015/06/26 19:21:05
Create two command line flags:
--local
--oaut
hal.canary
2015/06/26 22:38:24
Mostly done.
| |
| 114 client, err := auth.RunFlow(config) | |
| 115 assertNil(err) | |
| 116 gsService, err := storage.New(client) | |
| 117 assertNil(err) | |
| 118 return storageClient{httpClient: client, storageService: gsService} | |
| 119 } | |
| 120 | |
| 121 // gsFetch fetch the object's data from google storage | |
| 122 func gsFetch(object *storage.Object, sc storageClient) (io.ReadCloser, int64, er ror) { | |
| 123 request, err := gs.RequestForStorageURL(object.MediaLink) | |
| 124 if err != nil { | |
| 125 return nil, -1, err | |
| 126 } | |
| 127 resp, err := sc.httpClient.Do(request) | |
| 128 if err != nil { | |
| 129 return nil, -1, err | |
| 130 } | |
| 131 if resp.StatusCode != 200 { | |
| 132 resp.Body.Close() | |
| 133 return nil, -1, fmt.Errorf("Failed to retrieve: %s %d %s", objec t.MediaLink, resp.StatusCode, resp.Status) | |
| 134 } | |
| 135 return resp.Body, resp.ContentLength, nil | |
| 136 } | |
| 137 | |
| 138 // uploadFile uploads the specified file to the remote dir in Google | |
| 139 // Storage. It also sets the appropriate ACLs on the uploaded file. | |
| 140 // If the file already exists on the server, do nothing. | |
| 141 func uploadFile(sc storageClient, input io.Reader, storageBucket, storagePath, a ccessControlEntity string) (bool, error) { | |
| 142 obj, _ := sc.storageService.Objects.Get(storageBucket, storagePath).Do() | |
| 143 if obj != nil { | |
| 144 return false, nil // noclobber | |
| 145 } | |
| 146 fullPath := fmt.Sprintf("gs://%s/%s", storageBucket, storagePath) | |
| 147 object := &storage.Object{Name: storagePath} | |
| 148 if _, err := sc.storageService.Objects.Insert(storageBucket, object).Med ia(input).Do(); err != nil { | |
| 149 return false, fmt.Errorf("Objects.Insert(%s) failed: %s", fullPa th, err) | |
| 150 } | |
| 151 objectAcl := &storage.ObjectAccessControl{ | |
| 152 Bucket: storageBucket, Entity: accessControlEntity, Object: stor agePath, Role: "READER", | |
| 153 } | |
| 154 if _, err := sc.storageService.ObjectAccessControls.Insert(storageBucket , storagePath, objectAcl).Do(); err != nil { | |
| 155 return false, fmt.Errorf("Could not update ACL of %s: %s", fullP ath, err) | |
| 156 } | |
| 157 return true, nil | |
| 158 } | |
| 159 | |
| 160 //////////////////////////////////////////////////////////////////////////////// | |
| 161 | |
| 162 // The pdfXformer struct holds state (results, counter) and constants (bucket, d irectories) | |
| 163 type pdfXformer struct { | |
| 164 client storageClient | |
| 165 storageBucket string | |
| 166 storageJsonDirectory string | |
| 167 storageImagesDirectory string | |
| 168 accessControlEntity string | |
| 169 rasterizers []pdf.Rasterizer | |
| 170 results map[string]map[int]string | |
| 171 counter int | |
| 172 identifier string | |
| 173 } | |
| 174 | |
| 175 const errorImageMd5 = "45aa8af265d16839402583df5756a7c6" | |
| 176 | |
| 177 // rasterizeOnce applies a single rastetizer to the given pdf file. | |
| 178 // If the rasterizer fails, use the errorImage. If everything | |
| 179 // succeeds, upload the PNG. | |
| 180 func (xformer *pdfXformer) rasterizeOnce(pdfPath string, rasterizerIndex int) (s tring, error) { | |
| 181 rasterizer := xformer.rasterizers[rasterizerIndex] | |
| 182 tempdir := filepath.Dir(pdfPath) | |
| 183 pngPath := path.Join(tempdir, fmt.Sprintf("%s.%s", rasterizer.String(), pngExt)) | |
| 184 defer removeIf(pngPath) | |
|
stephana
2015/06/26 20:02:59
We have a function for this already.
defer util.R
hal.canary
2015/06/26 22:10:52
I modified the name of the function to make it cle
| |
| 185 glog.Infof("> > > > rasterizing with %s", rasterizer) | |
| 186 err := rasterizer.Rasterize(pdfPath, pngPath) | |
| 187 if err != nil { | |
| 188 glog.Warningf("rasterizing %s with %s failed: %s", filepath.Base (pdfPath), rasterizer.String(), err) | |
| 189 return errorImageMd5, nil | |
| 190 } | |
| 191 md5, err := md5OfFile(pngPath) | |
| 192 if err != nil { | |
| 193 return "", err | |
| 194 } | |
| 195 f, err := os.Open(pngPath) | |
| 196 if err != nil { | |
| 197 return "", err | |
| 198 } | |
| 199 defer util.Close(f) | |
| 200 pngUploadPath := fmt.Sprintf("%s/%s.%s", xformer.storageImagesDirectory, md5, pngExt) | |
| 201 didUpload, err := uploadFile(xformer.client, f, xformer.storageBucket, p ngUploadPath, xformer.accessControlEntity) | |
| 202 if err != nil { | |
| 203 return "", err | |
| 204 } | |
| 205 if didUpload { | |
| 206 glog.Infof("> > > > uploaded %s", pngUploadPath) | |
| 207 } | |
| 208 return md5, nil | |
| 209 } | |
| 210 | |
| 211 // makeTmpDir returns a nicely-named directory for temp files in $TMPDIR | |
| 212 func (xformer *pdfXformer) makeTmpDir() string { | |
| 213 if xformer.identifier == "" { | |
| 214 var host, userName string | |
| 215 if h, err := os.Hostname(); err == nil { | |
| 216 host = h | |
| 217 if i := strings.Index(host, "."); i >= 0 { | |
| 218 host = host[:i] | |
| 219 } | |
| 220 } | |
| 221 if currentUser, err := user.Current(); err == nil { | |
| 222 userName = currentUser.Username | |
| 223 } | |
| 224 userName = strings.Replace(userName, `\`, "_", -1) | |
| 225 xformer.identifier = fmt.Sprintf("%s.%s.%s.tmp.%d.", filepath.Ba se(os.Args[0]), host, userName, os.Getpid()) | |
|
jcgregorio
2015/06/26 19:21:06
When this is run in prod the exe will sit in /usr/
hal.canary
2015/06/26 22:10:52
Done.
| |
| 226 } | |
| 227 tempdir, err := ioutil.TempDir("", xformer.identifier) | |
| 228 assertNil(err) | |
| 229 return tempdir | |
| 230 } | |
| 231 | |
| 232 func newResult(key map[string]string, rasterizerName, digest string) goldingeste r.Result { | |
| 233 keyCopy := map[string]string{} | |
| 234 for k, v := range key { | |
| 235 keyCopy[k] = v | |
| 236 } | |
| 237 keyCopy["rasterizer"] = rasterizerName | |
| 238 options := map[string]string{"ext": pngExt} | |
| 239 return goldingester.Result{Key: keyCopy, Digest: digest, Options: option s} | |
| 240 } | |
| 241 | |
| 242 // processResult rasterizes a single PDF result and returns a set of new results . | |
| 243 func (xformer *pdfXformer) processResult(res goldingester.Result) []goldingester .Result { | |
| 244 rasterizedResults := []goldingester.Result{} | |
| 245 resultMap, found := xformer.results[res.Digest] | |
| 246 if found { | |
| 247 // Skip rasterizion steps: big win. | |
| 248 for index, rasterizer := range xformer.rasterizers { | |
| 249 digest, ok := resultMap[index] | |
| 250 if ok { | |
| 251 rasterizedResults = append(rasterizedResults, | |
| 252 newResult(res.Key, rasterizer.String(), digest)) | |
| 253 } else { | |
| 254 glog.Errorf("missing rasterizer %s on %s", raste rizer.String(), res.Digest) | |
| 255 } | |
| 256 } | |
| 257 return rasterizedResults | |
| 258 } | |
| 259 | |
| 260 tempdir := xformer.makeTmpDir() | |
| 261 defer util.RemoveAll(tempdir) | |
| 262 pdfPath := path.Join(tempdir, fmt.Sprintf("%s.pdf", res.Digest)) | |
| 263 objectName := fmt.Sprintf("%s/%s.pdf", xformer.storageImagesDirectory, r es.Digest) | |
| 264 storageURL := fmt.Sprintf("gs://%s/%s", xformer.storageBucket, objectNam e) | |
| 265 object, err := xformer.client.storageService.Objects.Get(xformer.storage Bucket, objectName).Do() | |
| 266 if err != nil { | |
| 267 glog.Errorf("unable to find %s: %s", storageURL, err) | |
| 268 return []goldingester.Result{} | |
| 269 } | |
| 270 pdfData, _, err := gsFetch(object, xformer.client) | |
| 271 if err != nil { | |
| 272 glog.Errorf("unable to retrieve %s: %s", storageURL, err) | |
| 273 return []goldingester.Result{} | |
| 274 } | |
| 275 writeTo(pdfPath, &pdfData) | |
| 276 if !isPDF(pdfPath) { | |
| 277 glog.Errorf("%s is not a PDF", objectName) | |
| 278 return []goldingester.Result{} | |
| 279 } | |
| 280 resultMap = map[int]string{} | |
| 281 for index, rasterizer := range xformer.rasterizers { | |
| 282 digest, err := xformer.rasterizeOnce(pdfPath, index) | |
| 283 if err != nil { | |
| 284 glog.Errorf("rasterizer %s failed on %s.pdf: %s", raster izer, res.Digest, err) | |
| 285 continue | |
| 286 } | |
| 287 rasterizedResults = append(rasterizedResults, | |
| 288 newResult(res.Key, rasterizer.String(), digest)) | |
| 289 resultMap[index] = digest | |
| 290 } | |
| 291 xformer.results[res.Digest] = resultMap | |
| 292 return rasterizedResults | |
| 293 } | |
| 294 | |
| 295 // processJsonFile reads a json file and produces a new json file | |
| 296 // with rasterized results. | |
| 297 func (xformer *pdfXformer) processJsonFile(jsonFileObject *storage.Object) { | |
| 298 jsonURL := fmt.Sprintf("gs://%s/%s", xformer.storageBucket, jsonFileObje ct.Name) | |
| 299 if jsonFileObject.Metadata["rasterized"] == "true" { | |
| 300 glog.Infof("> > skipping %s (already processed) {%d}", jsonURL, xformer.counter) | |
| 301 return | |
| 302 } | |
| 303 body, length, err := gsFetch(jsonFileObject, xformer.client) | |
| 304 if err != nil { | |
| 305 glog.Errorf("Failed to fetch %s", jsonURL) | |
| 306 return | |
| 307 } | |
| 308 if 0 == length { | |
| 309 util.Close(body) | |
| 310 glog.Infof("> > skipping %s (empty file) {%d}", jsonURL, xformer .counter) | |
| 311 return | |
| 312 } | |
| 313 dmstruct := goldingester.DMResults{} | |
| 314 err = json.NewDecoder(body).Decode(&dmstruct) | |
| 315 util.Close(body) | |
| 316 if err != nil { | |
| 317 glog.Errorf("Failed to parse %s", jsonURL) | |
| 318 return | |
| 319 } | |
| 320 countPdfResults := 0 | |
| 321 for _, res := range dmstruct.Results { | |
| 322 if res.Options["ext"] == pdfExt { | |
| 323 countPdfResults++ | |
| 324 } | |
| 325 } | |
| 326 if 0 == countPdfResults { | |
| 327 glog.Infof("> > 0 PDFs found %s {%d}", jsonURL, xformer.counter) | |
| 328 xformer.setRasterized(jsonFileObject) | |
| 329 return | |
| 330 } | |
| 331 | |
| 332 glog.Infof("> > processing %d pdfs of %d results {%d}", countPdfResults, len(dmstruct.Results), xformer.counter) | |
| 333 rasterizedResults := []*goldingester.Result{} | |
| 334 i := 0 | |
| 335 for _, res := range dmstruct.Results { | |
| 336 if res.Options["ext"] == pdfExt { | |
| 337 i++ | |
| 338 glog.Infof("> > > processing %s.pdf [%d/%d] {%d}", res.D igest, i, countPdfResults, xformer.counter) | |
| 339 for _, rasterizedResult := range xformer.processResult(* res) { | |
| 340 rasterizedResults = append(rasterizedResults, &r asterizedResult) | |
| 341 } | |
| 342 } | |
| 343 } | |
| 344 newDMStruct := goldingester.DMResults{ | |
| 345 BuildNumber: dmstruct.BuildNumber, | |
| 346 GitHash: dmstruct.GitHash, | |
| 347 Key: dmstruct.Key, | |
| 348 Results: rasterizedResults, | |
| 349 } | |
| 350 newJson, err := json.Marshal(newDMStruct) | |
| 351 assertNil(err) | |
| 352 | |
| 353 now := time.Now() | |
| 354 // Change the date; leave most of the rest of the path components. | |
| 355 jsonPathComponents := strings.Split(jsonFileObject.Name, "/") // []strin g | |
| 356 if len(jsonPathComponents) < 4 { | |
| 357 fmt.Errorf("unexpected number of path components %q", jsonPathCo mponents) | |
| 358 return | |
| 359 } | |
| 360 jsonPathComponents = jsonPathComponents[len(jsonPathComponents)-4:] | |
| 361 jsonPathComponents[1] += "-pdfxformer" | |
| 362 jsonUploadPath := fmt.Sprintf("%s/%d/%02d/%02d/%02d/%s", | |
| 363 xformer.storageJsonDirectory, | |
| 364 now.Year(), | |
| 365 int(now.Month()), | |
| 366 now.Day(), | |
| 367 now.Hour(), | |
| 368 strings.Join(jsonPathComponents, "/")) | |
| 369 | |
| 370 _, err = uploadFile(xformer.client, bytes.NewReader(newJson), xformer.st orageBucket, jsonUploadPath, xformer.accessControlEntity) | |
| 371 glog.Infof("> > wrote gs://%s/%s", xformer.storageBucket, jsonUploadPath ) | |
| 372 newJsonFileObject, err := xformer.client.storageService.Objects.Get(xfor mer.storageBucket, jsonUploadPath).Do() | |
| 373 if err != nil { | |
| 374 glog.Errorf("Failed to find %s: %s", jsonUploadPath, err) | |
| 375 } else { | |
| 376 xformer.setRasterized(newJsonFileObject) | |
| 377 } | |
| 378 xformer.setRasterized(jsonFileObject) | |
| 379 } | |
| 380 | |
| 381 // setRasterized sets the rasterized metadata flag of the given storage.Object | |
| 382 func (xformer *pdfXformer) setRasterized(jsonFileObject *storage.Object) { | |
| 383 if nil == jsonFileObject.Metadata { | |
| 384 jsonFileObject.Metadata = map[string]string{} | |
| 385 } | |
| 386 jsonFileObject.Metadata["rasterized"] = "true" | |
| 387 _, err := xformer.client.storageService.Objects.Patch(xformer.storageBuc ket, jsonFileObject.Name, jsonFileObject).Do() | |
| 388 if err != nil { | |
| 389 glog.Errorf("Failed to update metadata of %s: %s", jsonFileObjec t.Name, err) | |
| 390 } else { | |
| 391 glog.Infof("> > Updated metadata of %s", jsonFileObject.Name) | |
| 392 } | |
| 393 } | |
| 394 | |
| 395 // processTimeRange calls gs.GetLatestGSDirs to get a list of | |
| 396 func (xformer *pdfXformer) processTimeRange(start time.Time, end time.Time) { | |
| 397 glog.Infof("Processing time range: (%s, %s)", start.Truncate(time.Second ), end.Truncate(time.Second)) | |
| 398 for _, dir := range gs.GetLatestGSDirs(start.Unix(), end.Unix(), xformer .storageJsonDirectory) { | |
| 399 glog.Infof("> Reading gs://%s/%s\n", xformer.storageBucket, dir) | |
| 400 requestedObjects := xformer.client.storageService.Objects.List(x former.storageBucket).Prefix(dir).Fields( | |
| 401 "nextPageToken", "items/updated", "items/md5Hash", "item s/mediaLink", "items/name", "items/metadata") | |
| 402 for requestedObjects != nil { | |
| 403 responseObjects, err := requestedObjects.Do() | |
| 404 if err != nil { | |
| 405 glog.Errorf("request %#v failed: %s", requestedO bjects, err) | |
| 406 continue | |
|
stephana
2015/06/26 20:02:59
wouldn't this repeat a failed request indefinitely
hal.canary
2015/06/26 22:10:52
Done. Good catch.
| |
| 407 } | |
| 408 for _, jsonObject := range responseObjects.Items { | |
| 409 xformer.counter++ | |
| 410 glog.Infof("> > Processing object: gs://%s/%s { %d}", xformer.storageBucket, jsonObject.Name, xformer.counter) | |
| 411 xformer.processJsonFile(jsonObject) | |
| 412 } | |
| 413 if len(responseObjects.NextPageToken) > 0 { | |
| 414 requestedObjects.PageToken(responseObjects.NextP ageToken) | |
| 415 } else { | |
| 416 requestedObjects = nil | |
| 417 } | |
| 418 } | |
| 419 } | |
| 420 glog.Infof("finished time range.") | |
| 421 } | |
| 422 | |
| 423 const errorImageData = "\x89PNG\x0D\x0A\x1A\x0A\x00\x00\x00\x0DIHDR\x00\x00" + | |
| 424 "\x00\xC1\x00\x00\x00#\x08\x06\x00\x00\x00-\xCEn\x15\x00\x00\x01\xF8IDAT x" + | |
| 425 "\xDA\xED\x9C\xDB\x8E\xC3 \x0CDK\xB4\xFF\xFF\xCB\xD9\xA7V\x11\x8Ac\x1B\x CC" + | |
| 426 "-\x9Cy\xDAf)x\x0C\x03\xB6\x89\x9A\xCE\xF3<?\x00l\x8C\x03\x17\x00D\x00\x 00" + | |
| 427 "\"\x00\x00\x11\x00\x80\x08\x00@\x04\x00 \x02\x00\x10\x01\x00\x88\x00\x8 0]" + | |
| 428 "\xF1\xB7\x8A\xA1)\xA5O\xCB\xCB\xED\x94\xD2\xEF\xEF\x1E\x97\xE8w|z\xDB\x E0" + | |
| 429 "\xF1I\xA9]w\x9CZ\xF2,\xE9\xFBXE\x00o\xC2\x0A|\xDE\xE6\xF3\xA5O\x82^\x93 " + | |
| 430 "\xD1k\xE7}\xE23\xEBk\\5v\xAD\xF0j\x1A9\x01 '(\x8D\xB5\xA4\xD8\xCB\x13Gj m" + | |
| 431 "\xF3\xFF\x7F??\xC5\xD2\xD2\x98\x9A\xFDZ\xECj\xD9\xE5j\xF9h\xF1l\xFE}\xA DM" + | |
| 432 "\xCE\xCB\xBA3\xB7\xF4{d\x1ER\xC2-\xEC$\x90\x16\x87\xE7\xB9\xB7\x8F([Z\x 85" + | |
| 433 "XQ|\xA4>$\x1FzyFqo\xC1\xB7v\xFD\x94\x8E]\x9C\x13H\x8A\x97\x9E_\xAB!\xDA )" + | |
| 434 "\xF2m\xFBt\xE2x\xFA)\x8DU-\xBB\xB1\x87\xBB\xC6\xC7\xE3w\xEB\x84\xDF\xB5 " + | |
| 435 "\xD7*mV\xBF[\xE6\xBA\xB6\xAA\xE3\x99\xCB\xAE\"\xB83&\"\x01\xEA\x95D\x95 " + | |
| 436 "\x8E#M\xD4\x88\xD2\xEDu\xA1J\x0BDj\xBF\xCA<\xD5\xF8dxu\xC8cHT\xCD\xB8u" + | |
| 437 "\x15\xC9j\xE7N\xA5\xC5\xDE\xA1\xE5\xF0\xC48\xD2xI\xB1\xD21\xBB\xC2D\xB3 " + | |
| 438 "\xF87\xA8\x0Ey\x9D\xA2\xC5iZ\xD5\xC4+\x84\x91\xA1\x94\x97\xFB\xDB\x04\x 10" + | |
| 439 "\xCD\xB7\xF7&x\xF4Z(Z\xFBY/U\xBC\xE1\xDAn\xBF[\xF0\x06\xBEG\xCF\x1D\xE3 " + | |
| 440 "\xFA\\+\xEF\xD5\x8C\x15\xD5\x7FM\xBE\xD2bW\xCC9\xCE\xB4\x10{\xCE\xA5\xC 5'" + | |
| 441 "\xD3'\xC6ye\xC3\xBA\xBB\xE49C\x9E[D]\x9EX9YK\xA8Z\x9F\x1E;g\x0B\xB1\"\x C5" + | |
| 442 "^:\x97\xB56\x84\x9D\x04R\x89NR\xAD\xB5~\xAF-\x90\x9A{\x80\x11\xDCK\xEF" + | |
| 443 "\x06,c\xCC\xC87\"\xB4\xB2\x96\xE4\x8BK\xDF\xFC\xF8\x16\xD8\x1D\xBC@\x07 " + | |
| 444 "\x10\x01.\x00\x88\x00\x00D\x00\x00\"\x00\x00\x11\x00\x80\x08\x00@\x04\x 00" + | |
| 445 " \x02\x00\xB6\xC5?\xE4^\x82\xA6\x8A\xB0\x7F'\x00\x00\x00\x00IEND\xAEB`\ x82" | |
| 446 | |
| 447 // uploadErrorImage should be run once to verify that the image is there | |
|
stephana
2015/06/26 20:03:00
This is really not a good idea. The image should n
jcgregorio
2015/06/26 20:08:07
Upon further thought I agree, should be cmd line f
hal.canary
2015/06/26 22:10:52
Done.
| |
| 448 func (xformer *pdfXformer) uploadErrorImage() { | |
| 449 // Check to see that the data is correct. | |
| 450 errorImageDataReader := bytes.NewReader([]byte(errorImageData)) | |
| 451 md5sum := md5.New() | |
| 452 _, err := io.Copy(md5sum, errorImageDataReader) | |
| 453 assertNil(err) | |
| 454 if errorImageMd5 != hex.EncodeToString(md5sum.Sum(nil)) { | |
| 455 glog.Fatalf("errorImageData is corrupted") | |
| 456 } | |
| 457 _, err = errorImageDataReader.Seek(0, 0) | |
| 458 assertNil(err) | |
| 459 | |
| 460 errorImagePath := fmt.Sprintf("%s/%s.png", xformer.storageImagesDirector y, errorImageMd5) | |
| 461 _, err = uploadFile(xformer.client, errorImageDataReader, xformer.storag eBucket, errorImagePath, xformer.accessControlEntity) | |
| 462 assertNil(err) // If we can't upload this, we can't upload anything. | |
| 463 } | |
| 464 | |
| 465 // Environment variables: we respect $TMPDIR | |
| 466 // Arguments: glog uses -logtostderr, -log_dir | |
| 467 func main() { | |
| 468 flag.Parse() | |
|
jcgregorio
2015/06/26 19:21:06
Use common.InitWithMetrics.
https://github.com/
| |
| 469 | |
| 470 // TODO(halcanary): where should this file exist? | |
| 471 configDir := path.Join(os.Getenv("HOME"), ".config") | |
| 472 assertNil(os.MkdirAll(configDir, 0700)) | |
|
stephana
2015/06/26 20:03:00
Making sure that directory exists is already imple
hal.canary
2015/06/26 22:38:24
Done.
| |
| 473 | |
| 474 xformer := pdfXformer{ | |
|
stephana
2015/06/26 20:02:59
These should not be hard coded, but instead be fla
| |
| 475 client: getClient(path.Join(configDir, "google_s torage_token.data")), | |
| 476 storageBucket: "chromium-skia-gm", | |
| 477 storageJsonDirectory: "dm-json-v1", | |
| 478 storageImagesDirectory: "dm-images-v1", | |
| 479 accessControlEntity: "domain-google.com", | |
| 480 results: map[string]map[int]string{}, | |
| 481 } | |
| 482 | |
| 483 xformer.uploadErrorImage() | |
| 484 | |
| 485 for _, rasterizer := range []pdf.Rasterizer{pdf.Pdfium{}, pdf.Poppler{}} { | |
| 486 if rasterizer.Enabled() { | |
| 487 xformer.rasterizers = append(xformer.rasterizers, raster izer) | |
| 488 } else { | |
| 489 glog.Infof("rasterizer %s is disabled", rasterizer.Strin g()) | |
| 490 } | |
| 491 } | |
| 492 if len(xformer.rasterizers) == 0 { | |
| 493 glog.Fatalf("no rasterizers found") | |
| 494 } | |
| 495 | |
| 496 end := time.Now() | |
|
stephana
2015/06/26 20:02:59
This is not wrong, but the pattern that we use thr
hal.canary
2015/06/26 22:10:52
Done.
| |
| 497 start := end.Add(-72 * time.Hour) | |
| 498 timeTicker := time.Tick(time.Minute) | |
| 499 for { | |
| 500 xformer.processTimeRange(start, end) | |
| 501 glog.Flush() // Flush before waiting for next tick; it may be a while. | |
| 502 _ = <-timeTicker | |
| 503 start = end | |
| 504 end = time.Now() | |
| 505 } | |
| 506 } | |
| OLD | NEW |