appengine/findit/findit_api.py - Issue 2439553002: [Findit] Reduce redundant ndb reads by querying necessary entities ahead of time and share them amo…

Side by Side Diff: appengine/findit/findit_api.py

Issue 2439553002: [Findit] Reduce redundant ndb reads by querying necessary entities ahead of time and share them amo… (Closed)

Patch Set: rebase Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2015 The Chromium Authors. All rights reserved.	1 # Copyright 2015 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 """This module is to provide Findit service APIs through Cloud Endpoints:	5 """This module is to provide Findit service APIs through Cloud Endpoints:

6	6

7 Current APIs include:	7 Current APIs include:

8 1. Analysis of compile/test failures in Chromium waterfalls.	8 1. Analysis of compile/test failures in Chromium waterfalls.

9 Analyzes failures and detects suspected CLs.	9 Analyzes failures and detects suspected CLs.

10 2. Analysis of flakes on Commit Queue.	10 2. Analysis of flakes on Commit Queue.

11 """	11 """

12	12

	13 from collections import defaultdict

13 import json	14 import json

14 import logging	15 import logging

15 import pickle	16 import pickle

16	17

17 import endpoints	18 import endpoints

18 from google.appengine.api import taskqueue	19 from google.appengine.api import taskqueue

19 from protorpc import messages	20 from protorpc import messages

20 from protorpc import remote	21 from protorpc import remote

21	22

22 from common import appengine_util	23 from common import appengine_util

23 from common import auth_util	24 from common import auth_util

24 from common import constants	25 from common import constants

25 from common import time_util	26 from common import time_util

26 from common.waterfall import failure_type	27 from common.waterfall import failure_type

27 from model import analysis_approach_type	28 from model import analysis_approach_type

	29 from model import analysis_status

28 from model.flake.flake_analysis_request import FlakeAnalysisRequest	30 from model.flake.flake_analysis_request import FlakeAnalysisRequest

29 from model.suspected_cl_confidence import SuspectedCLConfidence	31 from model.suspected_cl_confidence import SuspectedCLConfidence

30 from model.wf_analysis import WfAnalysis	32 from model.wf_analysis import WfAnalysis

31 from model.wf_suspected_cl import WfSuspectedCL	33 from model.wf_suspected_cl import WfSuspectedCL

32 from model.wf_swarming_task import WfSwarmingTask	34 from model.wf_swarming_task import WfSwarmingTask

33 from model.wf_try_job import WfTryJob	35 from model.wf_try_job import WfTryJob

34 from waterfall import build_util	36 from waterfall import build_util

35 from waterfall import buildbot	37 from waterfall import buildbot

36 from waterfall import suspected_cl_util	38 from waterfall import suspected_cl_util

37 from waterfall import waterfall_config	39 from waterfall import waterfall_config

(...skipping 123 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
161 cl = WfSuspectedCL.Get(repo_name, revision)	163 cl = WfSuspectedCL.Get(repo_name, revision)

162 if not cl:	164 if not cl:

163 return None, None	165 return None, None

164	166

165 master_name = buildbot.GetMasterNameFromUrl(build.master_url)	167 master_name = buildbot.GetMasterNameFromUrl(build.master_url)

166 builder_name = build.builder_name	168 builder_name = build.builder_name

167 current_build = build.build_number	169 current_build = build.build_number

168	170

169 # If the CL is found by a try job, only the first failure will be recorded.	171 # If the CL is found by a try job, only the first failure will be recorded.

170 # So we might need to go to the first failure to get CL information.	172 # So we might need to go to the first failure to get CL information.

171 build_info = (cl.GetBuildInfo(master_name, builder_name, current_build) or	173 build_info = cl.GetBuildInfo(master_name, builder_name, current_build)

172 cl.GetBuildInfo(master_name, builder_name, first_failure))	174 first_build_info = cl.GetBuildInfo(master_name, builder_name, first_failure)

173	175 return suspected_cl_util.GetSuspectedCLConfidenceScoreAndApproach(

174 confidence = suspected_cl_util.GetSuspectedCLConfidenceScore(	176 confidences, build_info, first_build_info)

175 confidences, build_info)

176

177 cl_approach = (

178 _AnalysisApproach.TRY_JOB if analysis_approach_type.TRY_JOB in

179 build_info['approaches'] else _AnalysisApproach.HEURISTIC)

180

181 return confidence, cl_approach

182	177

183 def _GenerateBuildFailureAnalysisResult(	178 def _GenerateBuildFailureAnalysisResult(

184 self, build, suspected_cls_in_result, step_name, first_failure, test_name,	179 self, build, suspected_cls_in_result, step_name, first_failure, test_name,

185 analysis_approach, confidences, try_job_status, is_flaky_test):	180 analysis_approach, confidences, try_job_status, is_flaky_test):

186	181

187 suspected_cls = []	182 suspected_cls = []

188 for suspected_cl in suspected_cls_in_result:	183 for suspected_cl in suspected_cls_in_result:

189 repo_name = suspected_cl['repo_name']	184 repo_name = suspected_cl['repo_name']

190 revision = suspected_cl['revision']	185 revision = suspected_cl['revision']

191 commit_position = suspected_cl['commit_position']	186 commit_position = suspected_cl['commit_position']

192 confidence, cl_approach = self._GetConfidenceAndApproachForCL(	187 confidence, cl_approach = self._GetConfidenceAndApproachForCL(

193 repo_name, revision, confidences, build, first_failure)	188 repo_name, revision, confidences, build, first_failure)

194 cl_approach = cl_approach or analysis_approach	189 if cl_approach:

	190 cl_approach = (

	191 _AnalysisApproach.HEURISTIC if

	192 cl_approach == analysis_approach_type.HEURISTIC else

	193 _AnalysisApproach.TRY_JOB)

	194 else:

	195 cl_approach = analysis_approach

195	196

196 suspected_cls.append(_SuspectedCL(	197 suspected_cls.append(_SuspectedCL(

197 repo_name=repo_name, revision=revision,	198 repo_name=repo_name, revision=revision,

198 commit_position=commit_position, confidence=confidence,	199 commit_position=commit_position, confidence=confidence,

199 analysis_approach=cl_approach))	200 analysis_approach=cl_approach))

200	201

201 return _BuildFailureAnalysisResult(	202 return _BuildFailureAnalysisResult(

202 master_url=build.master_url,	203 master_url=build.master_url,

203 builder_name=build.builder_name,	204 builder_name=build.builder_name,

204 build_number=build.build_number,	205 build_number=build.build_number,

205 step_name=step_name,	206 step_name=step_name,

206 is_sub_test=test_name is not None,	207 is_sub_test=test_name is not None,

207 test_name=test_name,	208 test_name=test_name,

208 first_known_failed_build_number=first_failure,	209 first_known_failed_build_number=first_failure,

209 suspected_cls=suspected_cls,	210 suspected_cls=suspected_cls,

210 analysis_approach=analysis_approach,	211 analysis_approach=analysis_approach,

211 try_job_status=try_job_status,	212 try_job_status=try_job_status,

212 is_flaky_test=is_flaky_test)	213 is_flaky_test=is_flaky_test)

213	214

214 def _GetStatusAndCulpritFromTryJob(	215 def _GetStatusAndCulpritFromTryJob(

215 self, try_job_map, build_failure_type, step_name, test_name=None):	216 self, try_job, swarming_task, build_failure_type, step_name,

	217 test_name=None):

216 """Returns the culprit found by try-job for the given step or test."""	218 """Returns the culprit found by try-job for the given step or test."""

217 if not try_job_map:

218 return _TryJobStatus.FINISHED, None

219	219

220 if test_name is None:	220 if swarming_task and swarming_task.status in (

221 try_job_key = try_job_map.get(step_name)	221 analysis_status.PENDING, analysis_status.RUNNING):

222 else:	222 return _TryJobStatus.RUNNING, None

223 try_job_key = try_job_map.get(step_name, {}).get(test_name)

224	223

225 if not try_job_key:

226 return _TryJobStatus.FINISHED, None

227

228 try_job = WfTryJob.Get(*build_util.GetBuildInfoFromId(try_job_key))

229 if not try_job or try_job.failed:	224 if not try_job or try_job.failed:

230 return _TryJobStatus.FINISHED, None	225 return _TryJobStatus.FINISHED, None

231	226

232 if not try_job.completed:	227 if not try_job.completed:

233 return _TryJobStatus.RUNNING, None	228 return _TryJobStatus.RUNNING, None

234	229

235 if build_failure_type == failure_type.COMPILE:	230 if build_failure_type == failure_type.COMPILE:

236 if not try_job.compile_results: # pragma: no cover.	231 if not try_job.compile_results: # pragma: no cover.

237 return _TryJobStatus.FINISHED, None	232 return _TryJobStatus.FINISHED, None

238 return (	233 return (

239 _TryJobStatus.FINISHED,	234 _TryJobStatus.FINISHED,

240 try_job.compile_results[-1].get('culprit', {}).get(step_name))	235 try_job.compile_results[-1].get('culprit', {}).get(step_name))

241	236

242 if not try_job.test_results: # pragma: no cover.	237 if not try_job.test_results: # pragma: no cover.

243 return _TryJobStatus.FINISHED, None	238 return _TryJobStatus.FINISHED, None

244	239

245 if test_name is None:	240 if test_name is None:

246 step_info = try_job.test_results[-1].get('culprit', {}).get(step_name)	241 step_info = try_job.test_results[-1].get('culprit', {}).get(step_name)

247 if not step_info or step_info.get('tests'): # pragma: no cover.	242 if not step_info or step_info.get('tests'): # pragma: no cover.

248 # TODO(chanli): For some steps like checkperms/sizes/etc, the culprit	243 # TODO(chanli): For some steps like checkperms/sizes/etc, the culprit

249 # finding try-job might have test-level results.	244 # finding try-job might have test-level results.

250 return _TryJobStatus.FINISHED, None	245 return _TryJobStatus.FINISHED, None

251 return _TryJobStatus.FINISHED, step_info	246 return _TryJobStatus.FINISHED, step_info

252	247

253 task = WfSwarmingTask.Get(*build_util.GetBuildInfoFromId(try_job_key),	248 ref_name = (swarming_task.parameters.get('ref_name') if swarming_task and

254 step_name=step_name)	249 swarming_task.parameters else None)

255 ref_name = (task.parameters.get('ref_name') if task and task.parameters

256 else None)

257 return (	250 return (

258 _TryJobStatus.FINISHED, try_job.test_results[-1].get('culprit', {}).get(	251 _TryJobStatus.FINISHED, try_job.test_results[-1].get('culprit', {}).get(

259 ref_name or step_name, {}).get('tests', {}).get(test_name))	252 ref_name or step_name, {}).get('tests', {}).get(test_name))

260	253

261 def _CheckIsFlaky(self, try_job_map, step_name, test_name):	254 def _CheckIsFlaky(self, swarming_task, test_name):

262 """Checks if the test is flaky."""	255 """Checks if the test is flaky."""

263 if not try_job_map or not test_name:

264 return False

265

266 try_job_key = try_job_map.get(step_name, {}).get(test_name)

267 if not try_job_key:

268 return False

269

270 swarming_task = WfSwarmingTask.Get(

271 *build_util.GetBuildInfoFromId(try_job_key), step_name=step_name)

272 if not swarming_task or not swarming_task.classified_tests:	256 if not swarming_task or not swarming_task.classified_tests:

273 return False	257 return False

274	258

275 return test_name in swarming_task.classified_tests.get('flaky_tests', [])	259 return test_name in swarming_task.classified_tests.get('flaky_tests', [])

276	260

277 def _PopulateResult(	261 def _PopulateResult(

278 self, results, build, try_job_map, build_failure_type,	262 self, results, build, build_failure_type,heuristic_result, step_name,

279 heuristic_result, step_name, confidences, test_name=None):	263 confidences, swarming_task, try_job, test_name=None):

280 """Appends an analysis result for the given step or test.	264 """Appends an analysis result for the given step or test.

281	265

282 Try-job results are always given priority over heuristic results.	266 Try-job results are always given priority over heuristic results.

283 """	267 """

284 # Default to heuristic analysis.	268 # Default to heuristic analysis.

285 suspected_cls = heuristic_result['suspected_cls']	269 suspected_cls = heuristic_result['suspected_cls']

286 analysis_approach = _AnalysisApproach.HEURISTIC	270 analysis_approach = _AnalysisApproach.HEURISTIC

287	271

288 # Check if the test is flaky.	272 # Check if the test is flaky.

289 is_flaky_test = self._CheckIsFlaky(try_job_map, step_name, test_name)	273 is_flaky_test = self._CheckIsFlaky(swarming_task, test_name)

290	274

291 if is_flaky_test:	275 if is_flaky_test:

292 suspected_cls = []	276 suspected_cls = []

293 try_job_status = _TryJobStatus.FINISHED # There will be no try job.	277 try_job_status = _TryJobStatus.FINISHED # There will be no try job.

294 else:	278 else:

295 # Check analysis result from try-job.	279 # Check analysis result from try-job.

296 try_job_status, culprit = self._GetStatusAndCulpritFromTryJob(	280 try_job_status, culprit = self._GetStatusAndCulpritFromTryJob(

297 try_job_map, build_failure_type, step_name, test_name=test_name)	281 try_job, swarming_task, build_failure_type, step_name,

	282 test_name=test_name)

298 if culprit:	283 if culprit:

299 suspected_cls = [culprit]	284 suspected_cls = [culprit]

300 analysis_approach = _AnalysisApproach.TRY_JOB	285 analysis_approach = _AnalysisApproach.TRY_JOB

301	286 if (not is_flaky_test and not suspected_cls and

302 if not is_flaky_test and not suspected_cls:	287 not try_job_status == _TryJobStatus.RUNNING):

303 return	288 return

304	289

305 results.append(self._GenerateBuildFailureAnalysisResult(	290 results.append(self._GenerateBuildFailureAnalysisResult(

306 build, suspected_cls, step_name, heuristic_result['first_failure'],	291 build, suspected_cls, step_name, heuristic_result['first_failure'],

307 test_name, analysis_approach, confidences, try_job_status,	292 test_name, analysis_approach, confidences, try_job_status,

308 is_flaky_test))	293 is_flaky_test))

309	294

	295 def _GetAllSwarmingTasks(self, failure_result_map):

	296 """Returns all swarming tasks related to one build.

	297

	298 Args:

	299 A dict to map each step/test with the key to the build when it failed the

	300 first time.

	301 {

	302 'step1': 'm/b/1',

	303 'step2': {

	304 'test1': 'm/b/1',

	305 'test2': 'm/b/2'

	306 }

	307 }

	308

	309 Returns:

	310 A dict of swarming tasks like below:

	311 {

	312 'step1': {

	313 'm/b/1': WfSwarmingTask(

	314 key=Key('WfBuild', 'm/b/1', 'WfSwarmingTask', 'step1'),...)

	315 },

	316 ...

	317 }

	318 """

	319 if not failure_result_map:

	320 return {}

	321

	322 swarming_tasks = defaultdict(dict)

	323 for step_name, step_map in failure_result_map.iteritems():

	324 if isinstance(step_map, basestring):

	325 swarming_tasks[step_name][step_map] = (

	326 WfSwarmingTask.Get(

	327 *build_util.GetBuildInfoFromId(step_map), step_name=step_name))

	328 else:

	329 for task_key in step_map.values():

	330 if not swarming_tasks[step_name].get(task_key):

	331 swarming_tasks[step_name][task_key] = (

	332 WfSwarmingTask.Get(*build_util.GetBuildInfoFromId(task_key),

	333 step_name=step_name))

	334

	335 return swarming_tasks

	336

	337 def _GetAllTryJobs(self, failure_result_map):

	338 """Returns all try jobs related to one build.

	339

	340 Args:

	341 A dict to map each step/test with the key to the build when it failed the

	342 first time.

	343 {

	344 'step1': 'm/b/1',

	345 'step2': {

	346 'test1': 'm/b/1',

	347 'test2': 'm/b/2'

	348 }

	349 }

	350

	351 Returns:

	352 A dict of try jobs like below:

	353 {

	354 'm/b/1': WfTryJob(

	355 key=Key('WfBuild', 'm/b/1'),...)

	356 ...

	357 }

	358 """

	359 if not failure_result_map:

	360 return {}

	361

	362 try_jobs = {}

	363 for step_map in failure_result_map.values():

	364 if isinstance(step_map, basestring):

	365 try_jobs[step_map] = WfTryJob.Get(*step_map.split('/'))

	366 else:

	367 for task_key in step_map.values():

	368 if not try_jobs.get(task_key):

	369 try_jobs[task_key] = WfTryJob.Get(*task_key.split('/'))

	370

	371 return try_jobs

	372

	373 def _GetSwarmingTaskAndTryJobForFailure(

	374 self, step_name, test_name, failure_result_map, swarming_tasks, try_jobs):

	375 """Gets swarming task and try job for the specific step/test."""

	376 if not failure_result_map:

	377 return None, None

	378

	379 if test_name:

	380 try_job_key = failure_result_map.get(step_name, {}).get(test_name)

	381 else:

	382 try_job_key = failure_result_map.get(step_name)

	383

	384 # Gets the swarming task for the test.

	385 swarming_task = swarming_tasks.get(step_name, {}).get(try_job_key)

	386

	387 # Get the try job for the step/test.

	388 try_job = try_jobs.get(try_job_key)

	389

	390 return swarming_task, try_job

	391

310 def _GenerateResultsForBuild(	392 def _GenerateResultsForBuild(

311 self, build, heuristic_analysis, results, confidences):	393 self, build, heuristic_analysis, results, confidences):

	394

	395 swarming_tasks = self._GetAllSwarmingTasks(

	396 heuristic_analysis.failure_result_map)

	397 try_jobs = self._GetAllTryJobs(heuristic_analysis.failure_result_map)

	398

312 for failure in heuristic_analysis.result['failures']:	399 for failure in heuristic_analysis.result['failures']:

	400 step_name = failure['step_name']

313 if failure.get('tests'): # Test-level analysis.	401 if failure.get('tests'): # Test-level analysis.

314 for test in failure['tests']:	402 for test in failure['tests']:

	403 test_name = test['test_name']

	404 swarming_task, try_job = self._GetSwarmingTaskAndTryJobForFailure(

	405 step_name, test_name, heuristic_analysis.failure_result_map,

	406 swarming_tasks, try_jobs)

	407

315 self._PopulateResult(	408 self._PopulateResult(

316 results, build, heuristic_analysis.failure_result_map,	409 results, build, heuristic_analysis.failure_type, test,

317 heuristic_analysis.failure_type, test,	410 step_name, confidences, swarming_task, try_job,

318 failure['step_name'], confidences, test_name=test['test_name'])	411 test_name=test_name)

319 else:	412 else:

	413 swarming_task, try_job = self._GetSwarmingTaskAndTryJobForFailure(

	414 step_name, None, heuristic_analysis.failure_result_map,

	415 swarming_tasks, try_jobs)

320 self._PopulateResult(	416 self._PopulateResult(

321 results, build, heuristic_analysis.failure_result_map,	417 results, build, heuristic_analysis.failure_type, failure,

322 heuristic_analysis.failure_type, failure, failure['step_name'],	418 step_name, confidences, swarming_task, try_job)

323 confidences)

324	419

325 @endpoints.method(	420 @endpoints.method(

326 _BuildFailureCollection, _BuildFailureAnalysisResultCollection,	421 _BuildFailureCollection, _BuildFailureAnalysisResultCollection,

327 path='buildfailure', name='buildfailure')	422 path='buildfailure', name='buildfailure')

328 def AnalyzeBuildFailures(self, request):	423 def AnalyzeBuildFailures(self, request):

329 """Returns analysis results for the given build failures in the request.	424 """Returns analysis results for the given build failures in the request.

330	425

331 Analysis of build failures will be triggered automatically on demand.	426 Analysis of build failures will be triggered automatically on demand.

332	427

333 Args:	428 Args:

(...skipping 73 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
407	502

408 try:	503 try:

409 _AsyncProcessFlakeReport(flake_analysis_request, user_email, is_admin)	504 _AsyncProcessFlakeReport(flake_analysis_request, user_email, is_admin)

410 queued = True	505 queued = True

411 except Exception:	506 except Exception:

412 # Ignore the report when fail to queue it for async processing.	507 # Ignore the report when fail to queue it for async processing.

413 queued = False	508 queued = False

414 logging.exception('Failed to queue flake report for async processing')	509 logging.exception('Failed to queue flake report for async processing')

415	510

416 return _FlakeAnalysis(queued=queued)	511 return _FlakeAnalysis(queued=queued)

OLD	NEW

« no previous file with comments | « no previous file | appengine/findit/test/findit_api_test.py » ('j') | no next file with comments »