Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(547)

Unified Diff: cluster_telemetry/csv_merger.py

Issue 231433003: Repeat Chromium Tryserver page_set runs to reduce variance. (Closed) Base URL: https://skia.googlesource.com/buildbot.git@master
Patch Set: Move to cluster_telemetry directory Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | cluster_telemetry/telemetry_slave_scripts/vm_run_telemetry.sh » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: cluster_telemetry/csv_merger.py
diff --git a/cluster_telemetry/csv_merger.py b/cluster_telemetry/csv_merger.py
index ee84bcfaa5489dce2bcddbd4910e15bfb920b91c..8afe29314a59c9962eb40679ab274e0629f0a210 100644
--- a/cluster_telemetry/csv_merger.py
+++ b/cluster_telemetry/csv_merger.py
@@ -3,7 +3,12 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
-"""Python utility to merge many CSV files into a single file."""
+"""Python utility to merge many CSV files into a single file.
+
+If there are multiple CSV files with the same TELEMETRY_PAGE_NAME_KEY then the
+median of all values is stored in the resultant CSV file.
+"""
+
import csv
import glob
@@ -12,6 +17,9 @@ import os
import sys
+TELEMETRY_PAGE_NAME_KEY = 'page_name'
+
+
class CsvMerger(object):
"""Class that merges many CSV files into a single file."""
@@ -29,24 +37,92 @@ class CsvMerger(object):
field_names.update(csv.DictReader(open(csv_file, 'r')).fieldnames)
return field_names
+ def _GetMedian(self, l):
+ """Returns the median value from the specified list."""
+ l.sort()
+ length = len(l)
+ if not length % 2:
+ return (l[(length/2) - 1] + l[length/2]) / 2
+ else:
+ return l[length/2]
+
+ def _GetRowWithMedianValues(self, rows):
+ """Parses the specified rows and returns a single row with median values."""
+ fieldname_to_values = {}
+ for row in rows:
+ for fieldname in row:
+ if fieldname == TELEMETRY_PAGE_NAME_KEY:
+ fieldname_to_values[fieldname] = row[fieldname]
+ continue
+ try:
+ value = float(row[fieldname])
+ except ValueError:
+ # We expected only floats, cannot compare strings. Skip this field.
+ continue
+ if fieldname in fieldname_to_values:
+ fieldname_to_values[fieldname].append(value)
+ else:
+ fieldname_to_values[fieldname] = [value]
+
+ median_row = {}
+ for fieldname, values in fieldname_to_values.items():
+ if fieldname == TELEMETRY_PAGE_NAME_KEY:
+ median_row[fieldname] = values
+ continue
+ median_row[fieldname] = self._GetMedian(values)
+
+ print
+ print 'For rows: %s' % rows
+ print 'Median row is %s' % median_row
+ print
+ return median_row
+
def Merge(self):
"""Method that does the CSV merging."""
field_names = self._GetFieldNames()
print 'Merging %d csv files into %d columns' % (len(self._input_csv_files),
len(field_names))
- dict_writer = csv.DictWriter(open(self._output_csv_name, 'w'), field_names)
- dict_writer.writeheader()
+ # List that will contain all rows read from the CSV files. It will also
+ # combine all rows found with the same TELEMETRY_PAGE_NAME_KEY into one
+ # with median values.
+ csv_rows = []
- total_rows = 0
+ # Dictionary containing all the encountered page names. If a page name that
+ # is already in the dictionary is encountered then the median of its
+ # values is used.
+ page_names_to_rows = {}
for csv_file in self._input_csv_files:
- print 'Merging %s' % csv_file
-
dict_reader = csv.DictReader(open(csv_file, 'r'))
for row in dict_reader:
- dict_writer.writerow(row)
- total_rows += 1
+ if TELEMETRY_PAGE_NAME_KEY in row:
+ # Add rows found with 'page_name' to a different dictionary for
+ # processing.
+ if row[TELEMETRY_PAGE_NAME_KEY] in page_names_to_rows:
+ page_names_to_rows[row[TELEMETRY_PAGE_NAME_KEY]].append(row)
+ else:
+ page_names_to_rows[row[TELEMETRY_PAGE_NAME_KEY]] = [row]
+ else:
+ # Add rows found without TELEMETRY_PAGE_NAME_KEY to the final list of
+ # rows, they require no further processing.
+ csv_rows.append(row)
+
+ if page_names_to_rows:
+ for page_name in page_names_to_rows:
+ rows = page_names_to_rows[page_name]
+ median_row = self._GetRowWithMedianValues(rows)
+ # Add a single row that contains median values from all rows with the
+ # same TELEMETRY_PAGE_NAME_KEY.
+ csv_rows.append(median_row)
+
+ # Write all rows in csv_rows to the specified output CSV.
+ dict_writer = csv.DictWriter(open(self._output_csv_name, 'w'), field_names)
+ dict_writer.writeheader()
+ total_rows = 0
+ for row in csv_rows:
+ dict_writer.writerow(row)
+ total_rows += 1
print 'Successfully merged %d rows' % total_rows
« no previous file with comments | « no previous file | cluster_telemetry/telemetry_slave_scripts/vm_run_telemetry.sh » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698