Index: tools/telemetry/telemetry/page_set.py |
diff --git a/tools/telemetry/telemetry/page_set.py b/tools/telemetry/telemetry/page_set.py |
index 7c085bb81ffc38b93c65b91809f2cfb72317b616..fc0cf51022d1a06250c422630b7438a40a6aaa79 100644 |
--- a/tools/telemetry/telemetry/page_set.py |
+++ b/tools/telemetry/telemetry/page_set.py |
@@ -3,18 +3,22 @@ |
# found in the LICENSE file. |
import csv |
import json |
+import re |
import os |
import urlparse |
from telemetry import page as page_module |
class PageSet(object): |
- def __init__(self, base_dir='', attributes=None): |
+ def __init__(self, file_path='', attributes=None): |
self.description = '' |
- self.archive_path = '' |
- self.base_dir = base_dir |
+ self.archive_data_file = '' |
dtu
2013/01/23 21:07:02
It's more useful to keep track of archive_data_dir
marja
2013/01/24 16:03:33
Done.
|
+ self.base_dir = os.path.dirname(file_path) |
+ self.file_name = os.path.basename(file_path) |
self.credentials_path = None |
self.user_agent_type = None |
+ self.wpr_data_per_urls = dict() |
dtu
2013/01/23 21:07:02
url_to_wpr_file. Note that these wpr file paths ar
marja
2013/01/24 16:03:33
Done.
Also, I changed these to be relative paths,
|
+ self.wpr_data_per_wpr_files = dict() |
dtu
2013/01/23 21:07:02
wpr_file_to_url. Note that these wpr file paths ar
marja
2013/01/24 16:03:33
Done.
|
if attributes: |
for k, v in attributes.iteritems(): |
@@ -22,12 +26,27 @@ class PageSet(object): |
self.pages = [] |
+ if self.archive_data_file: |
+ archive_data_path = os.path.join(self.base_dir, self.archive_data_file) |
+ archive_data_dir = os.path.dirname(archive_data_path) |
+ with open(archive_data_path, 'r') as f: |
+ contents = f.read() |
+ wpr_data = json.loads(contents) |
dtu
2013/01/23 21:07:02
json.load() to read from the fp directly
marja
2013/01/24 16:03:33
Done.
|
+ self.wpr_data_per_wpr_files = wpr_data['archives'] |
+ # Find out the archive file names for each page. |
+ for wpr_file in wpr_data['archives']: |
+ page_urls = wpr_data['archives'][wpr_file] |
dtu
2013/01/23 21:07:02
Are you sure this is right? A for loop in Python l
marja
2013/01/24 16:03:33
wpr_data['archives'] is a dictionary from wpr file
|
+ for url in page_urls: |
+ self.wpr_data_per_urls[url] = ( |
+ os.path.abspath(os.path.join( |
+ self.base_dir, archive_data_dir, wpr_file))) |
dtu
2013/01/23 21:07:02
You already included base_dir in archive_data_dir.
marja
2013/01/24 16:03:33
This snippet is gone, because I made the map conta
|
+ |
@classmethod |
def FromFile(cls, file_path): |
with open(file_path, 'r') as f: |
contents = f.read() |
data = json.loads(contents) |
- return cls.FromDict(data, os.path.dirname(file_path)) |
+ return cls.FromDict(data, file_path) |
@classmethod |
def FromDict(cls, data, file_path=''): |
@@ -35,7 +54,9 @@ class PageSet(object): |
for page_attributes in data['pages']: |
url = page_attributes.pop('url') |
page = page_module.Page(url, attributes=page_attributes, |
- base_dir=file_path) |
+ base_dir=os.path.dirname(file_path)) |
+ if url in page_set.wpr_data_per_urls: |
+ page.archive_path = page_set.wpr_data_per_urls[url] |
page_set.pages.append(page) |
return page_set |
@@ -81,3 +102,11 @@ class PageSet(object): |
def __setitem__(self, key, value): |
self.pages[key] = value |
+ |
+ def FilterPages(self, options): |
dtu
2013/01/23 21:07:02
Prefer if this takes just the filter string instea
marja
2013/01/24 16:03:33
This will change after my other CL ( https://coder
|
+ if options.page_filter: |
+ try: |
+ page_regex = re.compile(options.page_filter) |
+ except re.error: |
+ raise Exception('--page-filter: invalid regex') |
+ self.pages = [page for page in self.pages if page_regex.search(page.url)] |