tools/telemetry/telemetry/page_set.py - Issue 11881051: Telemetry: add a metadata layer between page set and .wpr.

Unified Diff: tools/telemetry/telemetry/page_set.py

Issue 11881051: Telemetry: add a metadata layer between page set and .wpr. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: . Created 7 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« tools/telemetry/telemetry/page_runner.py ('K') | « tools/telemetry/telemetry/page_runner.py ('k') | tools/telemetry/telemetry/page_set_unittest.py » ('j') | tools/telemetry/telemetry/page_set_unittest.py » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: tools/telemetry/telemetry/page_set.py

diff --git a/tools/telemetry/telemetry/page_set.py b/tools/telemetry/telemetry/page_set.py

index 7c085bb81ffc38b93c65b91809f2cfb72317b616..fc0cf51022d1a06250c422630b7438a40a6aaa79 100644

--- a/tools/telemetry/telemetry/page_set.py

+++ b/tools/telemetry/telemetry/page_set.py

@@ -3,18 +3,22 @@

# found in the LICENSE file.

import csv

import json

+import re

import os

import urlparse

from telemetry import page as page_module

class PageSet(object):

- def __init__(self, base_dir='', attributes=None):

+ def __init__(self, file_path='', attributes=None):

self.description = ''

- self.archive_path = ''

- self.base_dir = base_dir

+ self.archive_data_file = ''

dtu 2013/01/23 21:07:02 It's more useful to keep track of archive_data_dir

marja 2013/01/24 16:03:33 Done.

+ self.base_dir = os.path.dirname(file_path)

+ self.file_name = os.path.basename(file_path)

self.credentials_path = None

self.user_agent_type = None

+ self.wpr_data_per_urls = dict()

dtu 2013/01/23 21:07:02 url_to_wpr_file. Note that these wpr file paths ar

marja 2013/01/24 16:03:33 Done. Also, I changed these to be relative paths,

+ self.wpr_data_per_wpr_files = dict()

dtu 2013/01/23 21:07:02 wpr_file_to_url. Note that these wpr file paths ar

marja 2013/01/24 16:03:33 Done.

if attributes:

for k, v in attributes.iteritems():

@@ -22,12 +26,27 @@ class PageSet(object):

self.pages = []

+ if self.archive_data_file:

+ archive_data_path = os.path.join(self.base_dir, self.archive_data_file)

+ archive_data_dir = os.path.dirname(archive_data_path)

+ with open(archive_data_path, 'r') as f:

+ contents = f.read()

+ wpr_data = json.loads(contents)

dtu 2013/01/23 21:07:02 json.load() to read from the fp directly

marja 2013/01/24 16:03:33 Done.

+ self.wpr_data_per_wpr_files = wpr_data['archives']

+ # Find out the archive file names for each page.

+ for wpr_file in wpr_data['archives']:

+ page_urls = wpr_data['archives'][wpr_file]

dtu 2013/01/23 21:07:02 Are you sure this is right? A for loop in Python l

marja 2013/01/24 16:03:33 wpr_data['archives'] is a dictionary from wpr file

+ for url in page_urls:

+ self.wpr_data_per_urls[url] = (

+ os.path.abspath(os.path.join(

+ self.base_dir, archive_data_dir, wpr_file)))

dtu 2013/01/23 21:07:02 You already included base_dir in archive_data_dir.

marja 2013/01/24 16:03:33 This snippet is gone, because I made the map conta

@classmethod

def FromFile(cls, file_path):

with open(file_path, 'r') as f:

contents = f.read()

data = json.loads(contents)

- return cls.FromDict(data, os.path.dirname(file_path))

+ return cls.FromDict(data, file_path)

@classmethod

def FromDict(cls, data, file_path=''):

@@ -35,7 +54,9 @@ class PageSet(object):

for page_attributes in data['pages']:

url = page_attributes.pop('url')

page = page_module.Page(url, attributes=page_attributes,

- base_dir=file_path)

+ base_dir=os.path.dirname(file_path))

+ if url in page_set.wpr_data_per_urls:

+ page.archive_path = page_set.wpr_data_per_urls[url]

page_set.pages.append(page)

return page_set

@@ -81,3 +102,11 @@ class PageSet(object):

def __setitem__(self, key, value):

self.pages[key] = value

+ def FilterPages(self, options):

dtu 2013/01/23 21:07:02 Prefer if this takes just the filter string instea

marja 2013/01/24 16:03:33 This will change after my other CL ( https://coder

+ if options.page_filter:

+ try:

+ page_regex = re.compile(options.page_filter)

+ except re.error:

+ raise Exception('--page-filter: invalid regex')

+ self.pages = [page for page in self.pages if page_regex.search(page.url)]