Index: ct/go/worker_scripts/create_pagesets/main.go |
diff --git a/ct/go/worker_scripts/create_pagesets/main.go b/ct/go/worker_scripts/create_pagesets/main.go |
index aa2902cf53391c35e7f59af1cdf5c019aa90da6b..38dffab8dc16e3f3de5bb9413dde02bb62287857 100644 |
--- a/ct/go/worker_scripts/create_pagesets/main.go |
+++ b/ct/go/worker_scripts/create_pagesets/main.go |
@@ -6,8 +6,8 @@ import ( |
"flag" |
"io" |
"os" |
+ "path" |
"path/filepath" |
- "runtime" |
"time" |
"github.com/skia-dev/glog" |
@@ -21,8 +21,9 @@ import ( |
) |
var ( |
- workerNum = flag.Int("worker_num", 1, "The number of this CT worker. It will be in the {1..100} range.") |
- pagesetType = flag.String("pageset_type", util.PAGESET_TYPE_MOBILE_10k, "The type of pagesets to create from the Alexa CSV list. Eg: 10k, Mobile10k, All.") |
+ startRange = flag.Int("start_range", 1, "The number this worker will start creating page sets from.") |
+ num = flag.Int("num", 100, "The total number of pagesets to process starting from the start_range.") |
+ pagesetType = flag.String("pageset_type", util.PAGESET_TYPE_MOBILE_10k, "The type of pagesets to create from the CSV list in util.PagesetTypeToInfo.") |
) |
func main() { |
@@ -30,14 +31,12 @@ func main() { |
worker_common.Init() |
defer util.TimeTrack(time.Now(), "Creating Pagesets") |
defer glog.Flush() |
- // Create the task file so that the master knows this worker is still busy. |
- skutil.LogErr(util.CreateTaskFile(util.ACTIVITY_CREATING_PAGESETS)) |
- defer util.DeleteTaskFile(util.ACTIVITY_CREATING_PAGESETS) |
// Delete and remake the local pagesets directory. |
pathToPagesets := filepath.Join(util.PagesetsDir, *pagesetType) |
skutil.RemoveAll(pathToPagesets) |
skutil.MkdirAll(pathToPagesets, 0700) |
+ defer skutil.RemoveAll(pathToPagesets) |
// Get info about the specified pageset type. |
pagesetTypeInfo := util.PagesetTypeToInfo[*pagesetType] |
@@ -70,43 +69,36 @@ func main() { |
return |
} |
- // Figure out which pagesets this worker should generate. |
- numPagesPerSlave := numPages / util.NumWorkers() |
- if *worker_common.Local { |
- // When running locally, just do 10 pagesets to make things fast. |
- numPagesPerSlave = 10 |
- } |
- startNum := (*workerNum-1)*numPagesPerSlave + 1 |
- endNum := *workerNum * numPagesPerSlave |
+ // Figure out the endRange of this worker. |
+ endRange := skutil.MinInt(*startRange+*num-1, numPages) |
// Construct path to the create_page_set.py python script. |
- _, currentFile, _, _ := runtime.Caller(0) |
- createPageSetScript := filepath.Join( |
- filepath.Dir((filepath.Dir(filepath.Dir(filepath.Dir(currentFile))))), |
- "py", "create_page_set.py") |
+ pathToPyFiles := util.GetPathToPyFiles(!*worker_common.Local) |
+ createPageSetScript := filepath.Join(pathToPyFiles, "create_page_set.py") |
// Execute the create_page_set.py python script. |
timeoutSecs := util.PagesetTypeToInfo[*pagesetType].CreatePagesetsTimeoutSecs |
- for currNum := startNum; currNum <= endNum; currNum++ { |
+ for currNum := *startRange; currNum <= endRange; currNum++ { |
+ destDir := path.Join(pathToPagesets, strconv.Itoa(currNum)) |
+ if err := os.MkdirAll(destDir, 0700); err != nil { |
+ glog.Error(err) |
+ return |
+ } |
args := []string{ |
createPageSetScript, |
"-s", strconv.Itoa(currNum), |
- "-e", strconv.Itoa(currNum), |
"-c", csvFile, |
"-p", *pagesetType, |
"-u", userAgent, |
- "-o", pathToPagesets, |
+ "-o", destDir, |
} |
if err := util.ExecuteCmd("python", args, []string{}, time.Duration(timeoutSecs)*time.Second, nil, nil); err != nil { |
glog.Error(err) |
return |
} |
} |
- // Write timestamp to the pagesets dir. |
- skutil.LogErr(util.CreateTimestampFile(pathToPagesets)) |
- |
- // Upload pagesets dir to Google Storage. |
- if err := gs.UploadWorkerArtifacts(util.PAGESETS_DIR_NAME, *pagesetType, *workerNum); err != nil { |
+ // Upload all page sets to Google Storage. |
+ if err := gs.UploadSwarmingArtifacts(util.PAGESETS_DIR_NAME, *pagesetType); err != nil { |
glog.Error(err) |
return |
} |