OLD | NEW |
| (Empty) |
1 # Copyright 2013 The Chromium Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 | |
5 import json | |
6 import logging | |
7 import os | |
8 import re | |
9 import shutil | |
10 import tempfile | |
11 | |
12 from telemetry import page as page_module | |
13 from telemetry.util import cloud_storage | |
14 | |
15 | |
16 def AssertValidCloudStorageBucket(bucket): | |
17 is_valid = bucket in (None, | |
18 cloud_storage.PUBLIC_BUCKET, | |
19 cloud_storage.PARTNER_BUCKET, | |
20 cloud_storage.INTERNAL_BUCKET) | |
21 if not is_valid: | |
22 raise ValueError("Cloud storage privacy bucket %s is invalid" % bucket) | |
23 | |
24 | |
25 # TODO(chrishenry): Rename this (and module) to wpr_archive_info.WprArchiveInfo | |
26 # and move to telemetry.user_story or telemetry.wpr or telemetry.core. | |
27 class PageSetArchiveInfo(object): | |
28 def __init__(self, file_path, data, bucket, ignore_archive=False): | |
29 AssertValidCloudStorageBucket(bucket) | |
30 self._file_path = file_path | |
31 self._base_dir = os.path.dirname(file_path) | |
32 self._bucket = bucket | |
33 | |
34 # Ensure directory exists. | |
35 if not os.path.exists(self._base_dir): | |
36 os.makedirs(self._base_dir) | |
37 | |
38 # Download all .wpr files. | |
39 if not ignore_archive: | |
40 if not self._bucket: | |
41 logging.warning('page_set in %s has no bucket specified, and cannot be' | |
42 'downloaded from cloud_storage.', file_path) | |
43 else: | |
44 for archive_path in data['archives']: | |
45 archive_path = self._WprFileNameToPath(archive_path) | |
46 try: | |
47 cloud_storage.GetIfChanged(archive_path, bucket) | |
48 except (cloud_storage.CredentialsError, | |
49 cloud_storage.PermissionError): | |
50 if os.path.exists(archive_path): | |
51 # If the archive exists, assume the user recorded their own and | |
52 # simply warn. | |
53 logging.warning('Need credentials to update WPR archive: %s', | |
54 archive_path) | |
55 | |
56 # Map from the relative path (as it appears in the metadata file) of the | |
57 # .wpr file to a list of page names it supports. | |
58 self._wpr_file_to_page_names = data['archives'] | |
59 | |
60 # Map from the page name to a relative path (as it appears in the metadata | |
61 # file) of the .wpr file. | |
62 self._page_name_to_wpr_file = dict() | |
63 # Find out the wpr file names for each page. | |
64 for wpr_file in data['archives']: | |
65 page_names = data['archives'][wpr_file] | |
66 for page_name in page_names: | |
67 self._page_name_to_wpr_file[page_name] = wpr_file | |
68 self.temp_target_wpr_file_path = None | |
69 | |
70 @classmethod | |
71 def FromFile(cls, file_path, bucket, ignore_archive=False): | |
72 if os.path.exists(file_path): | |
73 with open(file_path, 'r') as f: | |
74 data = json.load(f) | |
75 return cls(file_path, data, bucket, ignore_archive=ignore_archive) | |
76 return cls(file_path, {'archives': {}}, bucket, | |
77 ignore_archive=ignore_archive) | |
78 | |
79 def WprFilePathForUserStory(self, story): | |
80 if self.temp_target_wpr_file_path: | |
81 return self.temp_target_wpr_file_path | |
82 wpr_file = self._page_name_to_wpr_file.get(story.display_name, None) | |
83 if wpr_file is None and isinstance(story, page_module.Page): | |
84 # Some old page sets always use the URL to identify a page rather than the | |
85 # display_name, so try to look for that. | |
86 wpr_file = self._page_name_to_wpr_file.get(story.url, None) | |
87 if wpr_file: | |
88 return self._WprFileNameToPath(wpr_file) | |
89 return None | |
90 | |
91 def AddNewTemporaryRecording(self, temp_wpr_file_path=None): | |
92 if temp_wpr_file_path is None: | |
93 temp_wpr_file_handle, temp_wpr_file_path = tempfile.mkstemp() | |
94 os.close(temp_wpr_file_handle) | |
95 self.temp_target_wpr_file_path = temp_wpr_file_path | |
96 | |
97 def AddRecordedPages(self, pages, upload_to_cloud_storage=False): | |
98 if not pages: | |
99 os.remove(self.temp_target_wpr_file_path) | |
100 return | |
101 | |
102 (target_wpr_file, target_wpr_file_path) = self._NextWprFileName() | |
103 for page in pages: | |
104 self._SetWprFileForPage(page.display_name, target_wpr_file) | |
105 shutil.move(self.temp_target_wpr_file_path, target_wpr_file_path) | |
106 | |
107 # Update the hash file. | |
108 with open(target_wpr_file_path + '.sha1', 'wb') as f: | |
109 f.write(cloud_storage.CalculateHash(target_wpr_file_path)) | |
110 f.flush() | |
111 | |
112 self._WriteToFile() | |
113 self._DeleteAbandonedWprFiles() | |
114 | |
115 # Upload to cloud storage | |
116 if upload_to_cloud_storage: | |
117 if not self._bucket: | |
118 logging.warning('PageSet must have bucket specified to upload pages to' | |
119 ' cloud storage.') | |
120 return | |
121 try: | |
122 cloud_storage.Insert(self._bucket, target_wpr_file, | |
123 target_wpr_file_path) | |
124 except cloud_storage.CloudStorageError, e: | |
125 logging.warning('Failed to upload wpr file %s to cloud storage. ' | |
126 'Error:%s' % target_wpr_file_path, e) | |
127 | |
128 def _DeleteAbandonedWprFiles(self): | |
129 # Update the metadata so that the abandoned wpr files don't have empty page | |
130 # name arrays. | |
131 abandoned_wpr_files = self._AbandonedWprFiles() | |
132 for wpr_file in abandoned_wpr_files: | |
133 del self._wpr_file_to_page_names[wpr_file] | |
134 # Don't fail if we're unable to delete some of the files. | |
135 wpr_file_path = self._WprFileNameToPath(wpr_file) | |
136 try: | |
137 os.remove(wpr_file_path) | |
138 except Exception: | |
139 logging.warning('Failed to delete file: %s' % wpr_file_path) | |
140 | |
141 def _AbandonedWprFiles(self): | |
142 abandoned_wpr_files = [] | |
143 for wpr_file, page_names in self._wpr_file_to_page_names.iteritems(): | |
144 if not page_names: | |
145 abandoned_wpr_files.append(wpr_file) | |
146 return abandoned_wpr_files | |
147 | |
148 def _WriteToFile(self): | |
149 """Writes the metadata into the file passed as constructor parameter.""" | |
150 metadata = dict() | |
151 metadata['description'] = ( | |
152 'Describes the Web Page Replay archives for a page set. Don\'t edit by ' | |
153 'hand! Use record_wpr for updating.') | |
154 metadata['archives'] = self._wpr_file_to_page_names.copy() | |
155 # Don't write data for abandoned archives. | |
156 abandoned_wpr_files = self._AbandonedWprFiles() | |
157 for wpr_file in abandoned_wpr_files: | |
158 del metadata['archives'][wpr_file] | |
159 | |
160 with open(self._file_path, 'w') as f: | |
161 json.dump(metadata, f, indent=4) | |
162 f.flush() | |
163 | |
164 def _WprFileNameToPath(self, wpr_file): | |
165 return os.path.abspath(os.path.join(self._base_dir, wpr_file)) | |
166 | |
167 def _NextWprFileName(self): | |
168 """Creates a new file name for a wpr archive file.""" | |
169 # The names are of the format "some_thing_number.wpr". Read the numbers. | |
170 highest_number = -1 | |
171 base = None | |
172 for wpr_file in self._wpr_file_to_page_names: | |
173 match = re.match(r'(?P<BASE>.*)_(?P<NUMBER>[0-9]+)\.wpr', wpr_file) | |
174 if not match: | |
175 raise Exception('Illegal wpr file name ' + wpr_file) | |
176 highest_number = max(int(match.groupdict()['NUMBER']), highest_number) | |
177 if base and match.groupdict()['BASE'] != base: | |
178 raise Exception('Illegal wpr file name ' + wpr_file + | |
179 ', doesn\'t begin with ' + base) | |
180 base = match.groupdict()['BASE'] | |
181 if not base: | |
182 # If we're creating a completely new info file, use the base name of the | |
183 # page set file. | |
184 base = os.path.splitext(os.path.basename(self._file_path))[0] | |
185 new_filename = '%s_%03d.wpr' % (base, highest_number + 1) | |
186 return new_filename, self._WprFileNameToPath(new_filename) | |
187 | |
188 def _SetWprFileForPage(self, page_name, wpr_file): | |
189 """For modifying the metadata when we're going to record a new archive.""" | |
190 old_wpr_file = self._page_name_to_wpr_file.get(page_name, None) | |
191 if old_wpr_file: | |
192 self._wpr_file_to_page_names[old_wpr_file].remove(page_name) | |
193 self._page_name_to_wpr_file[page_name] = wpr_file | |
194 if wpr_file not in self._wpr_file_to_page_names: | |
195 self._wpr_file_to_page_names[wpr_file] = [] | |
196 self._wpr_file_to_page_names[wpr_file].append(page_name) | |
OLD | NEW |