Index: tools/telemetry/telemetry/page_set_archive_info.py |
diff --git a/tools/telemetry/telemetry/page_set_archive_info.py b/tools/telemetry/telemetry/page_set_archive_info.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..e77ed2fb17853344ec84594f56c0db3df2b4c239 |
--- /dev/null |
+++ b/tools/telemetry/telemetry/page_set_archive_info.py |
@@ -0,0 +1,118 @@ |
+# Copyright (c) 2013 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+import collections |
+import json |
+import logging |
+import os |
+import re |
+ |
+class PageSetArchiveInfo(object): |
+ def __init__(self, archive_data_file_path, page_set_file_path, data): |
+ self._archive_data_file_path = archive_data_file_path |
+ self._archive_data_file_dir = os.path.dirname(archive_data_file_path) |
+ # Back pointer to the page set file. |
+ self._page_set_file_path = page_set_file_path |
+ |
+ # Map from the relative path (as it appears in the metadata file) of the |
+ # .wpr file to a list of urls it supports. |
+ self._wpr_file_to_urls = collections.OrderedDict(data['archives']) |
+ |
+ # Map from the page url to a relative path (as it appears in the metadata |
+ # file) of the .wpr file. |
+ self._url_to_wpr_file = dict() |
+ # Find out the wpr file names for each page. |
+ for wpr_file in data['archives']: |
+ page_urls = data['archives'][wpr_file] |
+ for url in page_urls: |
+ self._url_to_wpr_file[url] = wpr_file |
+ |
+ @classmethod |
+ def FromFile(cls, file_path, page_set_file_path): |
+ with open(file_path, 'r') as f: |
+ data = json.load(f) |
+ return cls(file_path, page_set_file_path, data) |
+ |
+ def WprFileForPage(self, page): |
+ return self._url_to_wpr_file.get(page.url, None) |
+ |
+ def WprFilePathForPage(self, page): |
+ wpr_file = self.WprFileForPage(page) |
+ if wpr_file: |
+ return self._WprFileNameToPath(wpr_file) |
+ return None |
+ |
+ def AddNewRecording(self, pages): |
+ (target_wpr_file, target_wpr_file_path) = self._NextWprFileName() |
+ for page in pages: |
+ self._SetWprFileForPage(page, target_wpr_file) |
+ return target_wpr_file_path |
+ |
+ def DeleteAbandonedWprFiles(self): |
+ # Update the metadata so that the abandoned wpr files don't have empty url |
+ # arrays. |
+ abandoned_wpr_files = self.AbandonedWprFiles() |
+ for wpr_file in abandoned_wpr_files: |
+ del self._wpr_file_to_urls[wpr_file] |
+ # Don't fail if we're unable to delete some of the files. |
+ wpr_file_path = self._WprFileNameToPath(wpr_file) |
+ try: |
+ os.remove(wpr_file_path) |
+ except Exception: |
+ logging.warning('Failed to delete file: %s' % wpr_file_path) |
+ |
+ def WriteToFile(self): |
+ """Writes the metadata into the file passed as constructor parameter.""" |
+ metadata = dict() |
+ metadata['description'] = ( |
+ 'Describes the Web Page Replay archives for a page set. Don\'t edit by ' |
+ 'hand! Use record_wpr for updating.') |
+ # Pointer from the metadata to the page set .json file. |
+ metadata['page_set'] = os.path.relpath(self._page_set_file_path, |
+ self._archive_data_file_dir) |
+ metadata['archives'] = self._wpr_file_to_urls.copy() |
+ # Don't write data for abandones archives. |
+ abandoned_wpr_files = self.AbandonedWprFiles() |
+ for wpr_file in abandoned_wpr_files: |
+ del metadata['archives'][wpr_file] |
+ |
+ with open(self._archive_data_file_path, 'w') as f: |
+ json.dump(metadata, f, indent=4) |
+ f.flush() |
+ |
+ def AbandonedWprFiles(self): |
+ abandoned_wpr_files = [] |
+ for wpr_file, urls in self._wpr_file_to_urls.iteritems(): |
+ if not urls: |
+ abandoned_wpr_files.append(wpr_file) |
+ return abandoned_wpr_files |
+ |
+ def _WprFileNameToPath(self, wpr_file): |
+ return os.path.abspath(os.path.join(self._archive_data_file_dir, wpr_file)) |
+ |
+ def _NextWprFileName(self): |
+ """Creates a new file name for a wpr archive file.""" |
+ # The names are of the format "some_thing_number.wpr". Read the numbers. |
+ highest_number = -1 |
+ base = None |
+ for wpr_file in self._wpr_file_to_urls: |
+ match = re.match(r'(?P<BASE>.*)_(?P<NUMBER>[0-9]+)\.wpr', wpr_file) |
+ if not match: |
+ raise Exception('Illegal wpr file name ' + wpr_file) |
+ highest_number = max(int(match.groupdict()['NUMBER']), highest_number) |
+ if base and match.groupdict()['BASE'] != base: |
+ raise Exception('Illegal wpr file name ' + wpr_file + |
+ ', doesn\'t begin with ' + base) |
+ base = match.groupdict()['BASE'] |
+ new_filename = '%s_%03d.wpr' % (base, highest_number + 1) |
+ return new_filename, self._WprFileNameToPath(new_filename) |
+ |
+ def _SetWprFileForPage(self, page, wpr_file): |
+ """For modifying the metadata when we're going to record a new archive.""" |
+ old_wpr_file = self.WprFileForPage(page) |
+ if old_wpr_file: |
+ self._wpr_file_to_urls[old_wpr_file].remove(page.url) |
+ self._url_to_wpr_file[page.url] = wpr_file |
+ if wpr_file not in self._wpr_file_to_urls: |
+ self._wpr_file_to_urls[wpr_file] = [] |
+ self._wpr_file_to_urls[wpr_file].append(page.url) |