tools/telemetry/telemetry/page_set_archive_info.py - Issue 11881051: Telemetry: add a metadata layer between page set and .wpr.

Unified Diff: tools/telemetry/telemetry/page_set_archive_info.py

Issue 11881051: Telemetry: add a metadata layer between page set and .wpr. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: . Created 7 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« tools/telemetry/telemetry/page_set.py ('K') | « tools/telemetry/telemetry/page_set.py ('k') | tools/telemetry/telemetry/page_set_archive_info_unittest.py » ('j') | tools/telemetry/telemetry/record_wpr.py » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: tools/telemetry/telemetry/page_set_archive_info.py

diff --git a/tools/telemetry/telemetry/page_set_archive_info.py b/tools/telemetry/telemetry/page_set_archive_info.py

new file mode 100644

index 0000000000000000000000000000000000000000..e77ed2fb17853344ec84594f56c0db3df2b4c239

--- /dev/null

+++ b/tools/telemetry/telemetry/page_set_archive_info.py

@@ -0,0 +1,118 @@

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+import collections

+import json

+import logging

+import os

+import re

+class PageSetArchiveInfo(object):

+ def __init__(self, archive_data_file_path, page_set_file_path, data):

+ self._archive_data_file_path = archive_data_file_path

+ self._archive_data_file_dir = os.path.dirname(archive_data_file_path)

+ # Back pointer to the page set file.

+ self._page_set_file_path = page_set_file_path

+ # Map from the relative path (as it appears in the metadata file) of the

+ # .wpr file to a list of urls it supports.

+ self._wpr_file_to_urls = collections.OrderedDict(data['archives'])

+ # Map from the page url to a relative path (as it appears in the metadata

+ # file) of the .wpr file.

+ self._url_to_wpr_file = dict()

+ # Find out the wpr file names for each page.

+ for wpr_file in data['archives']:

+ page_urls = data['archives'][wpr_file]

+ for url in page_urls:

+ self._url_to_wpr_file[url] = wpr_file

+ @classmethod

+ def FromFile(cls, file_path, page_set_file_path):

+ with open(file_path, 'r') as f:

+ data = json.load(f)

+ return cls(file_path, page_set_file_path, data)

+ def WprFileForPage(self, page):

+ return self._url_to_wpr_file.get(page.url, None)

+ def WprFilePathForPage(self, page):

+ wpr_file = self.WprFileForPage(page)

+ if wpr_file:

+ return self._WprFileNameToPath(wpr_file)

+ return None

+ def AddNewRecording(self, pages):

+ (target_wpr_file, target_wpr_file_path) = self._NextWprFileName()

+ for page in pages:

+ self._SetWprFileForPage(page, target_wpr_file)

+ return target_wpr_file_path

+ def DeleteAbandonedWprFiles(self):

+ # Update the metadata so that the abandoned wpr files don't have empty url

+ # arrays.

+ abandoned_wpr_files = self.AbandonedWprFiles()

+ for wpr_file in abandoned_wpr_files:

+ del self._wpr_file_to_urls[wpr_file]

+ # Don't fail if we're unable to delete some of the files.

+ wpr_file_path = self._WprFileNameToPath(wpr_file)

+ try:

+ os.remove(wpr_file_path)

+ except Exception:

+ logging.warning('Failed to delete file: %s' % wpr_file_path)

+ def WriteToFile(self):

+ """Writes the metadata into the file passed as constructor parameter."""

+ metadata = dict()

+ metadata['description'] = (

+ 'Describes the Web Page Replay archives for a page set. Don\'t edit by '

+ 'hand! Use record_wpr for updating.')

+ # Pointer from the metadata to the page set .json file.

+ metadata['page_set'] = os.path.relpath(self._page_set_file_path,

+ self._archive_data_file_dir)

+ metadata['archives'] = self._wpr_file_to_urls.copy()

+ # Don't write data for abandones archives.

+ abandoned_wpr_files = self.AbandonedWprFiles()

+ for wpr_file in abandoned_wpr_files:

+ del metadata['archives'][wpr_file]

+ with open(self._archive_data_file_path, 'w') as f:

+ json.dump(metadata, f, indent=4)

+ f.flush()

+ def AbandonedWprFiles(self):

+ abandoned_wpr_files = []

+ for wpr_file, urls in self._wpr_file_to_urls.iteritems():

+ if not urls:

+ abandoned_wpr_files.append(wpr_file)

+ return abandoned_wpr_files

+ def _WprFileNameToPath(self, wpr_file):

+ return os.path.abspath(os.path.join(self._archive_data_file_dir, wpr_file))

+ def _NextWprFileName(self):

+ """Creates a new file name for a wpr archive file."""

+ # The names are of the format "some_thing_number.wpr". Read the numbers.

+ highest_number = -1

+ base = None

+ for wpr_file in self._wpr_file_to_urls:

+ match = re.match(r'(?P<BASE>.*)_(?P<NUMBER>[0-9]+)\.wpr', wpr_file)

+ if not match:

+ raise Exception('Illegal wpr file name ' + wpr_file)

+ highest_number = max(int(match.groupdict()['NUMBER']), highest_number)

+ if base and match.groupdict()['BASE'] != base:

+ raise Exception('Illegal wpr file name ' + wpr_file +

+ ', doesn\'t begin with ' + base)

+ base = match.groupdict()['BASE']

+ new_filename = '%s_%03d.wpr' % (base, highest_number + 1)

+ return new_filename, self._WprFileNameToPath(new_filename)

+ def _SetWprFileForPage(self, page, wpr_file):

+ """For modifying the metadata when we're going to record a new archive."""

+ old_wpr_file = self.WprFileForPage(page)

+ if old_wpr_file:

+ self._wpr_file_to_urls[old_wpr_file].remove(page.url)

+ self._url_to_wpr_file[page.url] = wpr_file

+ if wpr_file not in self._wpr_file_to_urls:

+ self._wpr_file_to_urls[wpr_file] = []

+ self._wpr_file_to_urls[wpr_file].append(page.url)