Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(470)

Unified Diff: tools/telemetry/telemetry/page_set.py

Issue 11881051: Telemetry: add a metadata layer between page set and .wpr. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: . Created 7 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: tools/telemetry/telemetry/page_set.py
diff --git a/tools/telemetry/telemetry/page_set.py b/tools/telemetry/telemetry/page_set.py
index 7c085bb81ffc38b93c65b91809f2cfb72317b616..fc0cf51022d1a06250c422630b7438a40a6aaa79 100644
--- a/tools/telemetry/telemetry/page_set.py
+++ b/tools/telemetry/telemetry/page_set.py
@@ -3,18 +3,22 @@
# found in the LICENSE file.
import csv
import json
+import re
import os
import urlparse
from telemetry import page as page_module
class PageSet(object):
- def __init__(self, base_dir='', attributes=None):
+ def __init__(self, file_path='', attributes=None):
self.description = ''
- self.archive_path = ''
- self.base_dir = base_dir
+ self.archive_data_file = ''
dtu 2013/01/23 21:07:02 It's more useful to keep track of archive_data_dir
marja 2013/01/24 16:03:33 Done.
+ self.base_dir = os.path.dirname(file_path)
+ self.file_name = os.path.basename(file_path)
self.credentials_path = None
self.user_agent_type = None
+ self.wpr_data_per_urls = dict()
dtu 2013/01/23 21:07:02 url_to_wpr_file. Note that these wpr file paths ar
marja 2013/01/24 16:03:33 Done. Also, I changed these to be relative paths,
+ self.wpr_data_per_wpr_files = dict()
dtu 2013/01/23 21:07:02 wpr_file_to_url. Note that these wpr file paths ar
marja 2013/01/24 16:03:33 Done.
if attributes:
for k, v in attributes.iteritems():
@@ -22,12 +26,27 @@ class PageSet(object):
self.pages = []
+ if self.archive_data_file:
+ archive_data_path = os.path.join(self.base_dir, self.archive_data_file)
+ archive_data_dir = os.path.dirname(archive_data_path)
+ with open(archive_data_path, 'r') as f:
+ contents = f.read()
+ wpr_data = json.loads(contents)
dtu 2013/01/23 21:07:02 json.load() to read from the fp directly
marja 2013/01/24 16:03:33 Done.
+ self.wpr_data_per_wpr_files = wpr_data['archives']
+ # Find out the archive file names for each page.
+ for wpr_file in wpr_data['archives']:
+ page_urls = wpr_data['archives'][wpr_file]
dtu 2013/01/23 21:07:02 Are you sure this is right? A for loop in Python l
marja 2013/01/24 16:03:33 wpr_data['archives'] is a dictionary from wpr file
+ for url in page_urls:
+ self.wpr_data_per_urls[url] = (
+ os.path.abspath(os.path.join(
+ self.base_dir, archive_data_dir, wpr_file)))
dtu 2013/01/23 21:07:02 You already included base_dir in archive_data_dir.
marja 2013/01/24 16:03:33 This snippet is gone, because I made the map conta
+
@classmethod
def FromFile(cls, file_path):
with open(file_path, 'r') as f:
contents = f.read()
data = json.loads(contents)
- return cls.FromDict(data, os.path.dirname(file_path))
+ return cls.FromDict(data, file_path)
@classmethod
def FromDict(cls, data, file_path=''):
@@ -35,7 +54,9 @@ class PageSet(object):
for page_attributes in data['pages']:
url = page_attributes.pop('url')
page = page_module.Page(url, attributes=page_attributes,
- base_dir=file_path)
+ base_dir=os.path.dirname(file_path))
+ if url in page_set.wpr_data_per_urls:
+ page.archive_path = page_set.wpr_data_per_urls[url]
page_set.pages.append(page)
return page_set
@@ -81,3 +102,11 @@ class PageSet(object):
def __setitem__(self, key, value):
self.pages[key] = value
+
+ def FilterPages(self, options):
dtu 2013/01/23 21:07:02 Prefer if this takes just the filter string instea
marja 2013/01/24 16:03:33 This will change after my other CL ( https://coder
+ if options.page_filter:
+ try:
+ page_regex = re.compile(options.page_filter)
+ except re.error:
+ raise Exception('--page-filter: invalid regex')
+ self.pages = [page for page in self.pages if page_regex.search(page.url)]

Powered by Google App Engine
This is Rietveld 408576698