Chromium Code Reviews| Index: tools/telemetry/telemetry/page_set.py |
| diff --git a/tools/telemetry/telemetry/page_set.py b/tools/telemetry/telemetry/page_set.py |
| index 7c085bb81ffc38b93c65b91809f2cfb72317b616..fc0cf51022d1a06250c422630b7438a40a6aaa79 100644 |
| --- a/tools/telemetry/telemetry/page_set.py |
| +++ b/tools/telemetry/telemetry/page_set.py |
| @@ -3,18 +3,22 @@ |
| # found in the LICENSE file. |
| import csv |
| import json |
| +import re |
| import os |
| import urlparse |
| from telemetry import page as page_module |
| class PageSet(object): |
| - def __init__(self, base_dir='', attributes=None): |
| + def __init__(self, file_path='', attributes=None): |
| self.description = '' |
| - self.archive_path = '' |
| - self.base_dir = base_dir |
| + self.archive_data_file = '' |
|
dtu
2013/01/23 21:07:02
It's more useful to keep track of archive_data_dir
marja
2013/01/24 16:03:33
Done.
|
| + self.base_dir = os.path.dirname(file_path) |
| + self.file_name = os.path.basename(file_path) |
| self.credentials_path = None |
| self.user_agent_type = None |
| + self.wpr_data_per_urls = dict() |
|
dtu
2013/01/23 21:07:02
url_to_wpr_file. Note that these wpr file paths ar
marja
2013/01/24 16:03:33
Done.
Also, I changed these to be relative paths,
|
| + self.wpr_data_per_wpr_files = dict() |
|
dtu
2013/01/23 21:07:02
wpr_file_to_url. Note that these wpr file paths ar
marja
2013/01/24 16:03:33
Done.
|
| if attributes: |
| for k, v in attributes.iteritems(): |
| @@ -22,12 +26,27 @@ class PageSet(object): |
| self.pages = [] |
| + if self.archive_data_file: |
| + archive_data_path = os.path.join(self.base_dir, self.archive_data_file) |
| + archive_data_dir = os.path.dirname(archive_data_path) |
| + with open(archive_data_path, 'r') as f: |
| + contents = f.read() |
| + wpr_data = json.loads(contents) |
|
dtu
2013/01/23 21:07:02
json.load() to read from the fp directly
marja
2013/01/24 16:03:33
Done.
|
| + self.wpr_data_per_wpr_files = wpr_data['archives'] |
| + # Find out the archive file names for each page. |
| + for wpr_file in wpr_data['archives']: |
| + page_urls = wpr_data['archives'][wpr_file] |
|
dtu
2013/01/23 21:07:02
Are you sure this is right? A for loop in Python l
marja
2013/01/24 16:03:33
wpr_data['archives'] is a dictionary from wpr file
|
| + for url in page_urls: |
| + self.wpr_data_per_urls[url] = ( |
| + os.path.abspath(os.path.join( |
| + self.base_dir, archive_data_dir, wpr_file))) |
|
dtu
2013/01/23 21:07:02
You already included base_dir in archive_data_dir.
marja
2013/01/24 16:03:33
This snippet is gone, because I made the map conta
|
| + |
| @classmethod |
| def FromFile(cls, file_path): |
| with open(file_path, 'r') as f: |
| contents = f.read() |
| data = json.loads(contents) |
| - return cls.FromDict(data, os.path.dirname(file_path)) |
| + return cls.FromDict(data, file_path) |
| @classmethod |
| def FromDict(cls, data, file_path=''): |
| @@ -35,7 +54,9 @@ class PageSet(object): |
| for page_attributes in data['pages']: |
| url = page_attributes.pop('url') |
| page = page_module.Page(url, attributes=page_attributes, |
| - base_dir=file_path) |
| + base_dir=os.path.dirname(file_path)) |
| + if url in page_set.wpr_data_per_urls: |
| + page.archive_path = page_set.wpr_data_per_urls[url] |
| page_set.pages.append(page) |
| return page_set |
| @@ -81,3 +102,11 @@ class PageSet(object): |
| def __setitem__(self, key, value): |
| self.pages[key] = value |
| + |
| + def FilterPages(self, options): |
|
dtu
2013/01/23 21:07:02
Prefer if this takes just the filter string instea
marja
2013/01/24 16:03:33
This will change after my other CL ( https://coder
|
| + if options.page_filter: |
| + try: |
| + page_regex = re.compile(options.page_filter) |
| + except re.error: |
| + raise Exception('--page-filter: invalid regex') |
| + self.pages = [page for page in self.pages if page_regex.search(page.url)] |