Index: tools/telemetry/telemetry/record_wpr.py |
diff --git a/tools/telemetry/telemetry/record_wpr.py b/tools/telemetry/telemetry/record_wpr.py |
index 8580f7a9da1049e879f605d99dece8629ac3976e..5edbfffcd7c8b12a95dc2c1184cc840cb91a6062 100755 |
--- a/tools/telemetry/telemetry/record_wpr.py |
+++ b/tools/telemetry/telemetry/record_wpr.py |
@@ -2,7 +2,11 @@ |
# Copyright (c) 2012 The Chromium Authors. All rights reserved. |
# Use of this source code is governed by a BSD-style license that can be |
# found in the LICENSE file. |
+import collections |
+import json |
import logging |
+import os |
+import re |
import sys |
import time |
@@ -75,6 +79,67 @@ def Main(benchmark_dir): |
sys.exit(1) |
ps = page_set.PageSet.FromFile(args[0]) |
+ ps.FilterPages(options) |
+ |
+ # Come up with a new name for the wpr file, possibly clobbering one of the |
+ # existing wpr files. |
+ urls_to_record = set(page.url for page in ps.pages) |
+ wpr_files_not_needed = [] |
+ for wpr_file in ps.wpr_data_per_wpr_files: |
+ page_urls = ps.wpr_data_per_wpr_files[wpr_file] |
dtu
2013/01/23 21:07:02
for wpr_file, page_urls in ps.wpr_data_per_wpr_fil
marja
2013/01/24 16:03:33
Done.
|
+ if all(url in urls_to_record for url in page_urls): |
+ wpr_files_not_needed.append(wpr_file) |
+ |
+ if wpr_files_not_needed: |
+ target_wpr_file = wpr_files_not_needed[0] |
+ wpr_files_not_needed = wpr_files_not_needed[1:] |
dtu
2013/01/23 21:07:02
I don't think it's useful to reuse the old filenam
marja
2013/01/24 16:03:33
Done.
|
+ else: |
+ # Need to come up with a new file name. The names are of the format |
+ # "some_thing_number.wpr". Read the numbers. |
+ highest_number = -1 |
+ base = None |
+ for wpr_file in ps.wpr_data_per_wpr_files: |
+ match = re.match(r'(?P<BASE>.*)_(?P<NUMBER>[0-9]+)\.wpr', wpr_file) |
+ if not match: |
+ raise Exception('Illegal wpr file name ' + wpr_file) |
+ highest_number = max(int(match.groupdict()['NUMBER']), highest_number) |
+ if base and match.groupdict()['BASE'] != base: |
+ raise Exception('Illegal wpr file name ' + wpr_file + |
+ ', doesn\'t begin with ' + base) |
+ base = match.groupdict()['BASE'] |
+ target_wpr_file = '%s_%03d.wpr' % (base, highest_number + 1) |
+ |
+ # Construct the new metadata. |
+ new_wpr_data_per_urls = ps.wpr_data_per_urls.copy() |
+ for url in urls_to_record: |
+ new_wpr_data_per_urls[url] = target_wpr_file |
+ |
+ new_wpr_data_per_wpr_files = collections.OrderedDict() |
+ for url in new_wpr_data_per_urls: |
+ wpr_file = os.path.basename(new_wpr_data_per_urls[url]) |
+ if wpr_file not in new_wpr_data_per_wpr_files: |
+ new_wpr_data_per_wpr_files[wpr_file] = [] |
+ new_wpr_data_per_wpr_files[wpr_file].append(url) |
+ |
+ new_metadata = dict() |
+ new_metadata['description'] = ( |
+ 'Describes the Web Page Replay archives for a page set. Don\'t edit by ' |
+ 'hand! Use record_wpr for updating.') |
+ # Pointer from the metadata to the page set .json file. |
+ page_set_abs_path = os.path.abspath(os.path.join(ps.base_dir, ps.file_name)) |
+ archive_data_file_abs_path = os.path.abspath( |
+ os.path.join(ps.base_dir, ps.archive_data_file)) |
+ new_metadata['page_set'] = ( |
+ os.path.relpath(page_set_abs_path, |
+ os.path.dirname(archive_data_file_abs_path))) |
+ new_metadata['archives'] = new_wpr_data_per_wpr_files |
+ |
+ # Set the archive path to something temporary. |
+ target_wpr_path = os.path.join(os.path.dirname(archive_data_file_abs_path), |
+ target_wpr_file) |
+ temp_target_wpr_path = target_wpr_path + '.temp' |
dtu
2013/01/23 21:07:02
temp_target_wpr_path = tempfile.mkstemp()[1]
marja
2013/01/24 16:03:33
Done.
|
+ for page in ps.pages: |
+ page.archive_path = temp_target_wpr_path |
options.wpr_mode = wpr_modes.WPR_RECORD |
recorder.CustomizeBrowserOptions(options) |
@@ -87,11 +152,25 @@ Use --browser=list to figure out which are available.\n""" |
with page_runner.PageRunner(ps) as runner: |
runner.Run(options, possible_browser, recorder, results) |
- if len(results.page_failures): |
+ if results.page_failures: |
logging.warning('Failed pages: %s', '\n'.join( |
[failure['page'].url for failure in results.page_failures])) |
- if len(results.skipped_pages): |
+ if results.skipped_pages: |
logging.warning('Skipped pages: %s', '\n'.join( |
[skipped['page'].url for skipped in results.skipped_pages])) |
+ |
+ # If success, copy the temporary wpr file over the target wpr_file. Write the |
+ # index.json file. Delete all wpr files which are now unnecessary. |
+ if not results.page_failures: |
+ os.rename(temp_target_wpr_path, target_wpr_path) |
+ with open(os.path.join(ps.base_dir, ps.archive_data_file), 'w') as f: |
+ f.write(json.dumps(new_metadata, indent=4)) |
+ f.flush() |
+ for wpr_file in wpr_files_not_needed: |
+ os.remove(os.path.join(ps.base_dir, os.path.dirname(ps.archive_data_file), |
+ wpr_file)) |
+ else: |
+ os.remove(temp_target_wpr_path) |
+ |
return min(255, len(results.page_failures)) |