tools/android/loading/gce/main.py - Issue 1895033002: tools/android/loading Switch the GCE worker to pull queues

Side by Side Diff: tools/android/loading/gce/main.py

Issue 1895033002: tools/android/loading Switch the GCE worker to pull queues (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@appengine

Patch Set: Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.

4

5 import json

6 import os

7 import re

8 import threading

9 import time

10 import subprocess

11 import sys

12

13 # NOTE: The parent directory needs to be first in sys.path to avoid conflicts

14 # with catapult modules that have colliding names, as catapult inserts itself

15 # into the path as the second element. This is an ugly and fragile hack.

16 sys.path.insert(0,

17 os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))

18 import controller

19 from google_storage_accessor import GoogleStorageAccessor

20 import loading_trace

21 from loading_trace_database import LoadingTraceDatabase

22 import options

23

24

25 class ServerApp(object):

26 """Simple web server application, collecting traces and writing them in

27 Google Cloud Storage.

28 """

29

30 def __init__(self, configuration_file):

31 """\|configuration_file\| is a path to a file containing JSON as described in

32 README.md.

33 """

34 self._tasks = [] # List of remaining tasks, only modified by _thread.

35 self._failed_tasks = [] # Failed tasks, only modified by _thread.

36 self._thread = None

37 self._tasks_lock = threading.Lock() # Protects _tasks and _failed_tasks.

38 self._initial_task_count = -1

39 self._start_time = None

40 print 'Reading configuration'

41 with open(configuration_file) as config_json:

42 config = json.load(config_json)

43

44 # Separate the cloud storage path into the bucket and the base path under

45 # the bucket.

46 storage_path_components = config['cloud_storage_path'].split('/')

47 self._bucket_name = storage_path_components[0]

48 self._base_path_in_bucket = ''

49 if len(storage_path_components) > 1:

50 self._base_path_in_bucket = '/'.join(storage_path_components[1:])

51 if not self._base_path_in_bucket.endswith('/'):

52 self._base_path_in_bucket += '/'

53

54 self._src_path = config['src_path']

55 self._google_storage_accessor = GoogleStorageAccessor(

56 project_name=config['project_name'], bucket_name=self._bucket_name)

57

58 # Initialize the global options that will be used during trace generation.

59 options.OPTIONS.ParseArgs([])

60 options.OPTIONS.local_binary = config['chrome_path']

61

62 def _IsProcessingTasks(self):

63 """Returns True if the application is currently processing tasks."""

64 return self._thread is not None and self._thread.is_alive()

65

66 def _GenerateTrace(self, url, emulate_device, emulate_network, filename,

67 log_filename):

68 """ Generates a trace on _thread.

69

70 Args:

71 url: URL as a string.

72 emulate_device: Name of the device to emulate. Empty for no emulation.

73 emulate_network: Type of network emulation. Empty for no emulation.

74 filename: Name of the file where the trace is saved.

75 log_filename: Name of the file where standard output and errors are logged

76

77 Returns:

78 A dictionary of metadata about the trace, including a 'succeeded' field

79 indicating whether the trace was successfully generated.

80 """

81 try:

82 os.remove(filename) # Remove any existing trace for this URL.

83 except OSError:

84 pass # Nothing to remove.

85

86 if not url.startswith('http') and not url.startswith('file'):

87 url = 'http://' + url

88

89 old_stdout = sys.stdout

90 old_stderr = sys.stderr

91

92 trace_metadata = { 'succeeded' : False, 'url' : url }

93 trace = None

94 with open(log_filename, 'w') as sys.stdout:

95 try:

96 sys.stderr = sys.stdout

97

98 # Set up the controller.

99 chrome_ctl = controller.LocalChromeController()

100 chrome_ctl.SetHeadless(True)

101 if emulate_device:

102 chrome_ctl.SetDeviceEmulation(emulate_device)

103 if emulate_network:

104 chrome_ctl.SetNetworkEmulation(emulate_network)

105

106 # Record and write the trace.

107 with chrome_ctl.OpenWithRedirection(sys.stdout,

108 sys.stderr) as connection:

109 connection.ClearCache()

110 trace = loading_trace.LoadingTrace.RecordUrlNavigation(

111 url, connection, chrome_ctl.ChromeMetadata())

112 trace_metadata['succeeded'] = True

113 trace_metadata.update(trace.ToJsonDict()[trace._METADATA_KEY])

114 except Exception as e:

115 sys.stderr.write(str(e))

116

117 if trace:

118 with open(filename, 'w') as f:

119 json.dump(trace.ToJsonDict(), f, sort_keys=True, indent=2)

120

121 sys.stdout = old_stdout

122 sys.stderr = old_stderr

123

124 return trace_metadata

125

126 def _GetCurrentTaskCount(self):

127 """Returns the number of remaining tasks. Thread safe."""

128 self._tasks_lock.acquire()

129 task_count = len(self._tasks)

130 self._tasks_lock.release()

131 return task_count

132

133 def _ProcessTasks(self, tasks, repeat_count, emulate_device, emulate_network):

134 """Iterates over _task, generating a trace for each of them. Uploads the

135 resulting traces to Google Cloud Storage. Runs on _thread.

136

137 Args:

138 tasks: The list of URLs to process.

139 repeat_count: The number of traces generated for each URL.

140 emulate_device: Name of the device to emulate. Empty for no emulation.

141 emulate_network: Type of network emulation. Empty for no emulation.

142 """

143 # The main thread might be reading the task lists, take the lock to modify.

144 self._tasks_lock.acquire()

145 self._tasks = tasks

146 self._failed_tasks = []

147 self._tasks_lock.release()

148 failures_dir = self._base_path_in_bucket + 'failures/'

149 traces_dir = self._base_path_in_bucket + 'traces/'

150

151 trace_database = LoadingTraceDatabase({})

152

153 # TODO(blundell): Fix this up.

154 logs_dir = self._base_path_in_bucket + 'analyze_logs/'

155 log_filename = 'analyze.log'

156 # Avoid special characters in storage object names

157 pattern = re.compile(r"[#\?\[\]\*/]")

158 while len(self._tasks) > 0:

159 url = self._tasks[-1]

160 local_filename = pattern.sub('_', url)

161 for repeat in range(repeat_count):

162 print 'Generating trace for URL: %s' % url

163 remote_filename = local_filename + '/' + str(repeat)

164 trace_metadata = self._GenerateTrace(

165 url, emulate_device, emulate_network, local_filename, log_filename)

166 if trace_metadata['succeeded']:

167 print 'Uploading: %s' % remote_filename

168 remote_trace_location = traces_dir + remote_filename

169 self._google_storage_accessor.UploadFile(local_filename,

170 remote_trace_location)

171 full_cloud_storage_path = ('gs://' + self._bucket_name + '/' +

172 remote_trace_location)

173 trace_database.AddTrace(full_cloud_storage_path, trace_metadata)

174 else:

175 print 'Trace generation failed for URL: %s' % url

176 self._tasks_lock.acquire()

177 self._failed_tasks.append({ "url": url, "repeat": repeat})

178 self._tasks_lock.release()

179 if os.path.isfile(local_filename):

180 self._google_storage_accessor.UploadFile(local_filename,

181 failures_dir + remote_filename)

182 print 'Uploading log'

183 self._google_storage_accessor.UploadFile(log_filename,

184 logs_dir + remote_filename)

185 # Pop once task is finished, for accurate status tracking.

186 self._tasks_lock.acquire()

187 url = self._tasks.pop()

188 self._tasks_lock.release()

189

190 self._google_storage_accessor.UploadString(

191 json.dumps(trace_database.ToJsonDict(), indent=2),

192 traces_dir + 'trace_database.json')

193

194 if len(self._failed_tasks) > 0:

195 print 'Uploading failing URLs'

196 self._google_storage_accessor.UploadString(

197 json.dumps(self._failed_tasks, indent=2),

198 failures_dir + 'failures.json')

199

200 def _SetTaskList(self, http_body):

201 """Sets the list of tasks and starts processing them

202

203 Args:

204 http_body: JSON dictionary. See README.md for a description of the format.

205

206 Returns:

207 A string to be sent back to the client, describing the success status of

208 the request.

209 """

210 if self._IsProcessingTasks():

211 return 'Error: Already running\n'

212

213 load_parameters = json.loads(http_body)

214 try:

215 tasks = load_parameters['urls']

216 except KeyError:

217 return 'Error: invalid urls\n'

218 # Optional parameters.

219 try:

220 repeat_count = int(load_parameters.get('repeat_count', '1'))

221 except ValueError:

222 return 'Error: invalid repeat_count\n'

223 emulate_device = load_parameters.get('emulate_device', '')

224 emulate_network = load_parameters.get('emulate_network', '')

225

226 if len(tasks) == 0:

227 return 'Error: Empty task list\n'

228 else:

229 self._initial_task_count = len(tasks)

230 self._start_time = time.time()

231 self._thread = threading.Thread(

232 target = self._ProcessTasks,

233 args = (tasks, repeat_count, emulate_device, emulate_network))

234 self._thread.start()

235 return 'Starting generation of %s tasks\n' % str(self._initial_task_count)

236

237 def __call__(self, environ, start_response):

238 path = environ['PATH_INFO']

239

240 if path == '/set_tasks':

241 # Get the tasks from the HTTP body.

242 try:

243 body_size = int(environ.get('CONTENT_LENGTH', 0))

244 except (ValueError):

245 body_size = 0

246 body = environ['wsgi.input'].read(body_size)

247 data = self._SetTaskList(body)

248 elif path == '/test':

249 data = 'hello\n'

250 elif path == '/status':

251 if not self._IsProcessingTasks():

252 data = 'Idle\n'

253 else:

254 task_count = self._GetCurrentTaskCount()

255 if task_count == 0:

256 data = '%s tasks complete. Finalizing.\n' % self._initial_task_count

257 else:

258 data = 'Remaining tasks: %s / %s\n' % (

259 task_count, self._initial_task_count)

260 elapsed = time.time() - self._start_time

261 data += 'Elapsed time: %s seconds\n' % str(elapsed)

262 self._tasks_lock.acquire()

263 failed_tasks = self._failed_tasks

264 self._tasks_lock.release()

265 data += '%s failed tasks:\n' % len(failed_tasks)

266 data += json.dumps(failed_tasks, indent=2)

267 else:

268 start_response('404 NOT FOUND', [('Content-Length', '0')])

269 return iter([''])

270

271 response_headers = [

272 ('Content-type','text/plain'),

273 ('Content-Length', str(len(data)))

274 ]

275 start_response('200 OK', response_headers)

276 return iter([data])

277

278

279 def StartApp(configuration_file):

280 return ServerApp(configuration_file)

OLD	NEW