Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(336)

Unified Diff: telemetry/telemetry/wpr/archive_info.py

Issue 2725323002: Revert of [Telemetry][Wpr] Remove old version of archive info. (Closed)
Patch Set: Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: telemetry/telemetry/wpr/archive_info.py
diff --git a/telemetry/telemetry/wpr/archive_info.py b/telemetry/telemetry/wpr/archive_info.py
index d38e650ffe53ddce042798f7c81312ad4017abc9..7018a41c278701bd63b22c4db3ad2374d0e3c762 100644
--- a/telemetry/telemetry/wpr/archive_info.py
+++ b/telemetry/telemetry/wpr/archive_info.py
@@ -9,13 +9,12 @@
import shutil
import tempfile
+from telemetry.wpr import archive_info2
+
from py_utils import cloud_storage # pylint: disable=import-error
-_DEFAULT_PLATFORM = 'DEFAULT'
-_ALL_PLATFORMS = ['mac', 'linux', 'android', 'win', _DEFAULT_PLATFORM]
-
-
+# TODO(rnephew): Remove this file when archive_info2 is the default.
def AssertValidCloudStorageBucket(bucket):
is_valid = bucket in (None,
cloud_storage.PUBLIC_BUCKET,
@@ -25,6 +24,10 @@
raise ValueError("Cloud storage privacy bucket %s is invalid" % bucket)
+class ArchiveError(Exception):
+ pass
+
+
class WprArchiveInfo(object):
def __init__(self, file_path, data, bucket):
AssertValidCloudStorageBucket(bucket)
@@ -32,27 +35,36 @@
self._base_dir = os.path.dirname(file_path)
self._data = data
self._bucket = bucket
- self.temp_target_wpr_file_path = None
+
# Ensure directory exists.
if not os.path.exists(self._base_dir):
os.makedirs(self._base_dir)
- assert data.get('platform_specific', False), (
- 'Detected old version of archive info json file. Please update to new '
- 'version.')
-
- self._story_name_to_wpr_file = data['archives']
+ # Map from the relative path (as it appears in the metadata file) of the
+ # .wpr file to a list of story names it supports.
+ self._wpr_file_to_story_names = data['archives']
+
+ # Map from the story name to a relative path (as it appears
+ # in the metadata file) of the .wpr file.
+ self._story_name_to_wpr_file = dict()
+ # Find out the wpr file names for each story.
+ for wpr_file in data['archives']:
+ story_names = data['archives'][wpr_file]
+ for story_name in story_names:
+ self._story_name_to_wpr_file[story_name] = wpr_file
+ self.temp_target_wpr_file_path = None
@classmethod
def FromFile(cls, file_path, bucket):
- """ Generates an archive_info instance with the given json file. """
if os.path.exists(file_path):
with open(file_path, 'r') as f:
data = json.load(f)
+ if data.get('platform_specific', False):
+ return archive_info2.WprArchiveInfo(file_path, data, bucket)
return cls(file_path, data, bucket)
- return cls(file_path, {'archives': {}, 'platform_specific': True}, bucket)
-
- def DownloadArchivesIfNeeded(self, target_platforms=None):
+ return cls(file_path, {'archives': {}}, bucket)
+
+ def DownloadArchivesIfNeeded(self):
"""Downloads archives iff the Archive has a bucket parameter and the user
has permission to access the bucket.
@@ -64,28 +76,23 @@
permission to access the archive's bucket but a local copy of the archive
exists.
"""
- # If no target platform is set, download all platforms.
- if target_platforms is None:
- target_platforms = _ALL_PLATFORMS
- else:
- assert isinstance(target_platforms, list), 'Must pass platforms as a list'
- target_platforms = target_platforms + [_DEFAULT_PLATFORM]
# Download all .wpr files.
if not self._bucket:
logging.warning('Story set in %s has no bucket specified, and '
'cannot be downloaded from cloud_storage.', )
return
- assert 'archives' in self._data, ("Invalid data format in %s. 'archives' "
- "field is needed" % self._file_path)
-
- def download_if_needed(path):
+ assert 'archives' in self._data, 'Invalid data format in %s. \'archives\'' \
+ ' field is needed' % self._file_path
+ for archive_path in self._data['archives']:
+ archive_path = self._WprFileNameToPath(archive_path)
try:
- cloud_storage.GetIfChanged(path, self._bucket)
+ cloud_storage.GetIfChanged(archive_path, self._bucket)
except (cloud_storage.CredentialsError, cloud_storage.PermissionError):
- if os.path.exists(path):
- # If the archive exists, assume the user recorded their own and warn
- # them that they do not have the proper credentials to download.
- logging.warning('Need credentials to update WPR archive: %s', path)
+ if os.path.exists(archive_path):
+ # If the archive exists, assume the user recorded their own and
+ # simply warn.
+ logging.warning('Need credentials to update WPR archive: %s',
+ archive_path)
else:
logging.error("You either aren't authenticated or don't have "
"permission to use the archives for this page set."
@@ -95,27 +102,17 @@
"upload_to_cloud_storage")
raise
- story_archives = self._data['archives']
- for story in story_archives:
- for target_platform in target_platforms:
- if story_archives[story].get(target_platform):
- archive_path = self._WprFileNameToPath(
- story_archives[story][target_platform])
- download_if_needed(archive_path)
-
- def WprFilePathForStory(self, story, target_platform=_DEFAULT_PLATFORM):
+ def WprFilePathForStory(self, story, target_platform=None):
+ del target_platform
if self.temp_target_wpr_file_path:
return self.temp_target_wpr_file_path
-
wpr_file = self._story_name_to_wpr_file.get(story.display_name, None)
if wpr_file is None and hasattr(story, 'url'):
# Some old pages always use the URL to identify a page rather than the
# display_name, so try to look for that.
wpr_file = self._story_name_to_wpr_file.get(story.url, None)
if wpr_file:
- if target_platform in wpr_file:
- return self._WprFileNameToPath(wpr_file[target_platform])
- return self._WprFileNameToPath(wpr_file[_DEFAULT_PLATFORM])
+ return self._WprFileNameToPath(wpr_file)
return None
def AddNewTemporaryRecording(self, temp_wpr_file_path=None):
@@ -125,20 +122,15 @@
self.temp_target_wpr_file_path = temp_wpr_file_path
def AddRecordedStories(self, stories, upload_to_cloud_storage=False,
- target_platform=_DEFAULT_PLATFORM):
+ target_platform=None):
+ del target_platform # Used in archive_info2.py
if not stories:
os.remove(self.temp_target_wpr_file_path)
return
(target_wpr_file, target_wpr_file_path) = self._NextWprFileName()
for story in stories:
- # Check to see if the platform has been manually overrided.
- if not story.platform_specific:
- current_target_platform = _DEFAULT_PLATFORM
- else:
- current_target_platform = target_platform
- self._SetWprFileForStory(
- story.display_name, target_wpr_file, current_target_platform)
+ self._SetWprFileForStory(story.display_name, target_wpr_file)
shutil.move(self.temp_target_wpr_file_path, target_wpr_file_path)
# Update the hash file.
@@ -148,6 +140,7 @@
f.flush()
self._WriteToFile()
+ self._DeleteAbandonedWprFiles()
# Upload to cloud storage
if upload_to_cloud_storage:
@@ -162,14 +155,38 @@
logging.warning('Failed to upload wpr file %s to cloud storage. '
'Error:%s' % target_wpr_file_path, e)
+ def _DeleteAbandonedWprFiles(self):
+ # Update the metadata so that the abandoned wpr files don't have
+ # empty story name arrays.
+ abandoned_wpr_files = self._AbandonedWprFiles()
+ for wpr_file in abandoned_wpr_files:
+ del self._wpr_file_to_story_names[wpr_file]
+ # Don't fail if we're unable to delete some of the files.
+ wpr_file_path = self._WprFileNameToPath(wpr_file)
+ try:
+ os.remove(wpr_file_path)
+ except Exception:
+ logging.warning('Failed to delete file: %s' % wpr_file_path)
+
+ def _AbandonedWprFiles(self):
+ abandoned_wpr_files = []
+ for wpr_file, story_names in (
+ self._wpr_file_to_story_names.iteritems()):
+ if not story_names:
+ abandoned_wpr_files.append(wpr_file)
+ return abandoned_wpr_files
+
def _WriteToFile(self):
"""Writes the metadata into the file passed as constructor parameter."""
metadata = dict()
metadata['description'] = (
'Describes the Web Page Replay archives for a story set. '
'Don\'t edit by hand! Use record_wpr for updating.')
- metadata['archives'] = self._story_name_to_wpr_file.copy()
- metadata['platform_specific'] = True
+ metadata['archives'] = self._wpr_file_to_story_names.copy()
+ # Don't write data for abandoned archives.
+ abandoned_wpr_files = self._AbandonedWprFiles()
+ for wpr_file in abandoned_wpr_files:
+ del metadata['archives'][wpr_file]
with open(self._file_path, 'w') as f:
json.dump(metadata, f, indent=4, sort_keys=True, separators=(',', ': '))
@@ -183,12 +200,7 @@
# The names are of the format "some_thing_number.wpr". Read the numbers.
highest_number = -1
base = None
- wpr_files = []
- for story in self._data['archives']:
- for p in self._data['archives'][story]:
- wpr_files.append(self._data['archives'][story][p])
-
- for wpr_file in wpr_files:
+ for wpr_file in self._wpr_file_to_story_names:
match = re.match(r'(?P<BASE>.*)_(?P<NUMBER>[0-9]+)\.wpr', wpr_file)
if not match:
raise Exception('Illegal wpr file name ' + wpr_file)
@@ -204,10 +216,12 @@
new_filename = '%s_%03d.wpr' % (base, highest_number + 1)
return new_filename, self._WprFileNameToPath(new_filename)
- def _SetWprFileForStory(self, story_name, wpr_file, target_platform):
+ def _SetWprFileForStory(self, story_name, wpr_file):
"""For modifying the metadata when we're going to record a new archive."""
- if story_name not in self._data['archives']:
- # If there is no other recording we want the first to be the default
- # until a new default is recorded.
- self._data['archives'][story_name] = {_DEFAULT_PLATFORM: wpr_file}
- self._data['archives'][story_name][target_platform] = wpr_file
+ old_wpr_file = self._story_name_to_wpr_file.get(story_name, None)
+ if old_wpr_file:
+ self._wpr_file_to_story_names[old_wpr_file].remove(story_name)
+ self._story_name_to_wpr_file[story_name] = wpr_file
+ if wpr_file not in self._wpr_file_to_story_names:
+ self._wpr_file_to_story_names[wpr_file] = []
+ self._wpr_file_to_story_names[wpr_file].append(story_name)
« no previous file with comments | « telemetry/telemetry/internal/testing/test_page_sets/data/example_domain.json ('k') | telemetry/telemetry/wpr/archive_info2.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698