appengine/findit/findit_api.py - Issue 2439553002: [Findit] Reduce redundant ndb reads by querying necessary entities ahead of time and share them amo…

Side by Side Diff: appengine/findit/findit_api.py

Issue 2439553002: [Findit] Reduce redundant ndb reads by querying necessary entities ahead of time and share them amo… (Closed)

Patch Set: fix nit. Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2015 The Chromium Authors. All rights reserved.	1 # Copyright 2015 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 """This module is to provide Findit service APIs through Cloud Endpoints:	5 """This module is to provide Findit service APIs through Cloud Endpoints:

6	6

7 Current APIs include:	7 Current APIs include:

8 1. Analysis of compile/test failures in Chromium waterfalls.	8 1. Analysis of compile/test failures in Chromium waterfalls.

9 Analyzes failures and detects suspected CLs.	9 Analyzes failures and detects suspected CLs.

10 2. Analysis of flakes on Commit Queue.	10 2. Analysis of flakes on Commit Queue.

11 """	11 """

12	12

	13 from collections import defaultdict

13 import json	14 import json

14 import logging	15 import logging

15	16

16 import endpoints	17 import endpoints

17 from google.appengine.api import taskqueue	18 from google.appengine.api import taskqueue

18 from protorpc import messages	19 from protorpc import messages

19 from protorpc import remote	20 from protorpc import remote

20	21

21 from common import appengine_util	22 from common import appengine_util

22 from common import auth_util	23 from common import auth_util

(...skipping 171 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
194 step_name=step_name,	195 step_name=step_name,

195 is_sub_test=test_name is not None,	196 is_sub_test=test_name is not None,

196 test_name=test_name,	197 test_name=test_name,

197 first_known_failed_build_number=first_failure,	198 first_known_failed_build_number=first_failure,

198 suspected_cls=suspected_cls,	199 suspected_cls=suspected_cls,

199 analysis_approach=analysis_approach,	200 analysis_approach=analysis_approach,

200 try_job_status=try_job_status,	201 try_job_status=try_job_status,

201 is_flaky_test=is_flaky_test)	202 is_flaky_test=is_flaky_test)

202	203

203 def _GetStatusAndCulpritFromTryJob(	204 def _GetStatusAndCulpritFromTryJob(

204 self, try_job_map, build_failure_type, step_name, test_name=None):	205 self, try_job, swarming_task, build_failure_type, step_name,

	206 test_name=None):

205 """Returns the culprit found by try-job for the given step or test."""	207 """Returns the culprit found by try-job for the given step or test."""

206 if not try_job_map:

207 return _TryJobStatus.FINISHED, None

208	208

209 if test_name is None:

210 try_job_key = try_job_map.get(step_name)

211 else:

212 try_job_key = try_job_map.get(step_name, {}).get(test_name)

213

214 if not try_job_key:

215 return _TryJobStatus.FINISHED, None

216

217 try_job = WfTryJob.Get(*try_job_key.split('/'))

218 if not try_job or try_job.failed:	209 if not try_job or try_job.failed:

219 return _TryJobStatus.FINISHED, None	210 return _TryJobStatus.FINISHED, None

220	211

221 if not try_job.completed:	212 if not try_job.completed:

222 return _TryJobStatus.RUNNING, None	213 return _TryJobStatus.RUNNING, None

223	214

224 if build_failure_type == failure_type.COMPILE:	215 if build_failure_type == failure_type.COMPILE:

225 if not try_job.compile_results: # pragma: no cover.	216 if not try_job.compile_results: # pragma: no cover.

226 return _TryJobStatus.FINISHED, None	217 return _TryJobStatus.FINISHED, None

227 return (	218 return (

228 _TryJobStatus.FINISHED,	219 _TryJobStatus.FINISHED,

229 try_job.compile_results[-1].get('culprit', {}).get(step_name))	220 try_job.compile_results[-1].get('culprit', {}).get(step_name))

230	221

231 if not try_job.test_results: # pragma: no cover.	222 if not try_job.test_results: # pragma: no cover.

232 return _TryJobStatus.FINISHED, None	223 return _TryJobStatus.FINISHED, None

233	224

234 if test_name is None:	225 if test_name is None:

235 step_info = try_job.test_results[-1].get('culprit', {}).get(step_name)	226 step_info = try_job.test_results[-1].get('culprit', {}).get(step_name)

236 if not step_info or step_info.get('tests'): # pragma: no cover.	227 if not step_info or step_info.get('tests'): # pragma: no cover.

237 # TODO(chanli): For some steps like checkperms/sizes/etc, the culprit	228 # TODO(chanli): For some steps like checkperms/sizes/etc, the culprit

238 # finding try-job might have test-level results.	229 # finding try-job might have test-level results.

239 return _TryJobStatus.FINISHED, None	230 return _TryJobStatus.FINISHED, None

240 return _TryJobStatus.FINISHED, step_info	231 return _TryJobStatus.FINISHED, step_info

241	232

242 task = WfSwarmingTask.Get(*try_job_key.split('/'), step_name=step_name)	233 ref_name = (swarming_task.parameters.get('ref_name') if swarming_task and

243 ref_name = (task.parameters.get('ref_name') if task and task.parameters	234 swarming_task.parameters else None)

244 else None)

245 return (	235 return (

246 _TryJobStatus.FINISHED, try_job.test_results[-1].get('culprit', {}).get(	236 _TryJobStatus.FINISHED, try_job.test_results[-1].get('culprit', {}).get(

247 ref_name or step_name, {}).get('tests', {}).get(test_name))	237 ref_name or step_name, {}).get('tests', {}).get(test_name))

248	238

249 def _CheckIsFlaky(self, try_job_map, step_name, test_name):	239 def _CheckIsFlaky(self, swarming_task, test_name):

250 """Checks if the test is flaky."""	240 """Checks if the test is flaky."""

251 if not try_job_map or not test_name:

252 return False

253

254 try_job_key = try_job_map.get(step_name, {}).get(test_name)

255 if not try_job_key:

256 return False

257

258 swarming_task = WfSwarmingTask.Get(*try_job_key.split('/'),

259 step_name=step_name)

260 if not swarming_task or not swarming_task.classified_tests:	241 if not swarming_task or not swarming_task.classified_tests:

261 return False	242 return False
	stgao 2016/10/22 00:29:03 So for tests, there are three status: 1. Reliable So for tests, there are three status: 1. Reliable 2. Flaky 3. Unclassified It seems Reliable + Unclassified are treated as Reliable here. chanli 2016/10/24 18:42:34 We don't have swarming tasks and try jobs for uncl Show quoted text On 2016/10/22 00:29:03, stgao wrote: > So for tests, there are three status: > 1. Reliable > 2. Flaky > 3. Unclassified > > It seems Reliable + Unclassified are treated as Reliable here. We don't have swarming tasks and try jobs for unclassified tests, so we cannot say they are flaky; and if we have heuristic results for them, we have to treat them as reliable tests and display what we found on SoM. What makes it better is that we now have confidence score, so it should provide some level of guidance to sheriffs about how trustworthy the result is. stgao 2016/10/25 17:24:04 Sorry for the confusion. My question here is: what Show quoted text On 2016/10/24 18:42:34, chanli wrote: > On 2016/10/22 00:29:03, stgao wrote: > > So for tests, there are three status: > > 1. Reliable > > 2. Flaky > > 3. Unclassified > > > > It seems Reliable + Unclassified are treated as Reliable here. > > We don't have swarming tasks and try jobs for unclassified tests, so we cannot > say they are flaky; Sorry for the confusion. My question here is: what "False" mean here? If it means reliable, why Unclassified ones are treated as reliable even we don't know for sure? Show quoted text > and if we have heuristic results for them, we have to treat > them as reliable tests As it is determined to be flaky by Swarm rerun, what's the consideration for a flaky test to be treated as reliable one here? Show quoted text > and display what we found on SoM. What makes it better is > that we now have confidence score, so it should provide some level of guidance > to sheriffs about how trustworthy the result is. chanli 2016/10/26 03:37:00 Basically true means Findit thinks the failure is Show quoted text On 2016/10/25 17:24:04, stgao wrote: > On 2016/10/24 18:42:34, chanli wrote: > > On 2016/10/22 00:29:03, stgao wrote: > > > So for tests, there are three status: > > > 1. Reliable > > > 2. Flaky > > > 3. Unclassified > > > > > > It seems Reliable + Unclassified are treated as Reliable here. > > > > We don't have swarming tasks and try jobs for unclassified tests, so we cannot > > say they are flaky; > > Sorry for the confusion. My question here is: what "False" mean here? If it > means reliable, why Unclassified ones are treated as reliable even we don't know > for sure? > > > and if we have heuristic results for them, we have to treat > > them as reliable tests > > As it is determined to be flaky by Swarm rerun, what's the consideration for a > flaky test to be treated as reliable one here? > > > and display what we found on SoM. What makes it better is > > that we now have confidence score, so it should provide some level of guidance > > to sheriffs about how trustworthy the result is. > Basically true means Findit thinks the failure is flaky, and false means everything else. And for unclassified failures, the results are not necessarily to be wrong, so we should show the result with confidence score
262	243

263 return test_name in swarming_task.classified_tests.get('flaky_tests', [])	244 return test_name in swarming_task.classified_tests.get('flaky_tests', [])

264	245

265 def _PopulateResult(	246 def _PopulateResult(

266 self, results, build, try_job_map, build_failure_type,	247 self, results, build, build_failure_type,heuristic_result, step_name,

267 heuristic_result, step_name, confidences, test_name=None):	248 confidences, swarming_task, try_job, test_name=None):

268 """Appends an analysis result for the given step or test.	249 """Appends an analysis result for the given step or test.

269	250

270 Try-job results are always given priority over heuristic results.	251 Try-job results are always given priority over heuristic results.

271 """	252 """

272 # Default to heuristic analysis.	253 # Default to heuristic analysis.

273 suspected_cls = heuristic_result['suspected_cls']	254 suspected_cls = heuristic_result['suspected_cls']

274 analysis_approach = _AnalysisApproach.HEURISTIC	255 analysis_approach = _AnalysisApproach.HEURISTIC

275	256

276 # Check if the test is flaky.	257 # Check if the test is flaky.

277 is_flaky_test = self._CheckIsFlaky(try_job_map, step_name, test_name)	258 is_flaky_test = self._CheckIsFlaky(swarming_task, test_name)

278	259

279 if is_flaky_test:	260 if is_flaky_test:

280 suspected_cls = []	261 suspected_cls = []

281 try_job_status = _TryJobStatus.FINISHED # There will be no try job.	262 try_job_status = _TryJobStatus.FINISHED # There will be no try job.

282 else:	263 else:

283 # Check analysis result from try-job.	264 # Check analysis result from try-job.

284 try_job_status, culprit = self._GetStatusAndCulpritFromTryJob(	265 try_job_status, culprit = self._GetStatusAndCulpritFromTryJob(

285 try_job_map, build_failure_type, step_name, test_name=test_name)	266 try_job, swarming_task, build_failure_type, step_name,

	267 test_name=test_name)

286 if culprit:	268 if culprit:

287 suspected_cls = [culprit]	269 suspected_cls = [culprit]

288 analysis_approach = _AnalysisApproach.TRY_JOB	270 analysis_approach = _AnalysisApproach.TRY_JOB

289	271

290 if not is_flaky_test and not suspected_cls:	272 if not is_flaky_test and not suspected_cls:

291 return	273 return

292	274

293 results.append(self._GenerateBuildFailureAnalysisResult(	275 results.append(self._GenerateBuildFailureAnalysisResult(

294 build, suspected_cls, step_name, heuristic_result['first_failure'],	276 build, suspected_cls, step_name, heuristic_result['first_failure'],

295 test_name, analysis_approach, confidences, try_job_status,	277 test_name, analysis_approach, confidences, try_job_status,

296 is_flaky_test))	278 is_flaky_test))

297	279

	280 def _GetAllSwarmingTasks(self, failure_result_map):

	281 """Returns all swarming tasks related to one build.

	282

	283 Returns:

	284 A dict of swarming tasks like below:

	285 {

	286 'step1': {

	287 'm/b/1': WfSwarmingTask(

	288 key=Key('WfBuild', 'm/b/1', 'WfSwarmingTask', 'step1'),...)

	289 },

	290 ...

	291 }

	292 """

	293 if not failure_result_map:

	294 return {}

	295

	296 swarming_tasks = defaultdict(dict)

	297 for step_name, step_map in failure_result_map.iteritems():

	298 if isinstance(step_map, basestring):

	299 swarming_tasks[step_name][step_map] = (

	300 WfSwarmingTask.Get(*step_map.split('/'), step_name=step_name))
	stgao 2016/10/22 00:29:03 again: encode & decode should have a function. again: encode & decode should have a function. chanli 2016/10/24 18:42:34 Done. Show quoted text On 2016/10/22 00:29:03, stgao wrote: > again: encode & decode should have a function. Done.
	301 else:
	lijeffrey 2016/10/19 20:35:25 nit: add a comment here for the type of structure nit: add a comment here for the type of structure of step_map chanli 2016/10/24 18:42:34 Added Args in docstring. Show quoted text On 2016/10/19 20:35:25, lijeffrey wrote: > nit: add a comment here for the type of structure of step_map Added Args in docstring.
	302 for task_key in step_map.values():

	303 if not swarming_tasks[step_name].get(task_key):

	304 swarming_tasks[step_name][task_key] = (

	305 WfSwarmingTask.Get(*task_key.split('/'), step_name=step_name))

	306

	307 return swarming_tasks

	308

	309 def _GetAllTryJobs(self, failure_result_map):

	310 """Returns all try jobs related to one build.

	311

	312 Returns:

	313 A dict of try jobs like below:

	314 {

	315 'm/b/1': WfTryJob(

	316 key=Key('WfBuild', 'm/b/1'),...)

	317 ...

	318 }

	319 """

	320 if not failure_result_map:

	321 return {}

	322

	323 try_jobs = {}

	324 for step_map in failure_result_map.values():

	325 if isinstance(step_map, basestring):

	326 try_jobs[step_map] = WfTryJob.Get(*step_map.split('/'))

	327 else:
	lijeffrey 2016/10/19 20:35:24 nit: same here nit: same here chanli 2016/10/24 18:42:34 Done. Show quoted text On 2016/10/19 20:35:24, lijeffrey wrote: > nit: same here Done.
	328 for task_key in step_map.values():

	329 if not try_jobs.get(task_key):

	330 try_jobs[task_key] = WfTryJob.Get(*task_key.split('/'))

	331

	332 return try_jobs

	333

	334 def _GetSwarmingTaskAndTryJobForFailure(

	335 self, step_name, test_name, failure_result_map, swarming_tasks, try_jobs):
	lijeffrey 2016/10/19 20:35:25 nit: please add a docstring for this function nit: please add a docstring for this function chanli 2016/10/24 18:42:34 Done. Show quoted text On 2016/10/19 20:35:25, lijeffrey wrote: > nit: please add a docstring for this function Done.
	336 if not failure_result_map:

	337 return None, None

	338

	339 if test_name:

	340 try_job_key = failure_result_map.get(step_name, {}).get(test_name)

	341 else:

	342 try_job_key = failure_result_map.get(step_name)

	343

	344 # Gets the swarming task for the test.

	345 swarming_task = swarming_tasks.get(step_name, {}).get(try_job_key)

	346 # Get the try job for the step/ test.
	lijeffrey 2016/10/19 20:35:24 nit: remove extra space before 'test' and add 1 em nit: remove extra space before 'test' and add 1 empty line before this comment chanli 2016/10/24 18:42:34 Done. Show quoted text On 2016/10/19 20:35:24, lijeffrey wrote: > nit: remove extra space before 'test' and add 1 empty line before this comment Done.
	347 try_job = try_jobs.get(try_job_key)

	348

	349 return swarming_task, try_job

	350

298 def _GenerateResultsForBuild(	351 def _GenerateResultsForBuild(

299 self, build, heuristic_analysis, results, confidences):	352 self, build, heuristic_analysis, results, confidences):

	353

	354 swarming_tasks = self._GetAllSwarmingTasks(

	355 heuristic_analysis.failure_result_map)

	356 try_jobs = self._GetAllTryJobs(heuristic_analysis.failure_result_map)

	357

300 for failure in heuristic_analysis.result['failures']:	358 for failure in heuristic_analysis.result['failures']:

	359 step_name = failure['step_name']

301 if failure.get('tests'): # Test-level analysis.	360 if failure.get('tests'): # Test-level analysis.

302 for test in failure['tests']:	361 for test in failure['tests']:

	362 test_name = test['test_name']

	363 swarming_task, try_job = self._GetSwarmingTaskAndTryJobForFailure(

	364 step_name, test_name, heuristic_analysis.failure_result_map,

	365 swarming_tasks, try_jobs)

	366

303 self._PopulateResult(	367 self._PopulateResult(

304 results, build, heuristic_analysis.failure_result_map,	368 results, build, heuristic_analysis.failure_type, test,

305 heuristic_analysis.failure_type, test,	369 step_name, confidences, swarming_task, try_job,

306 failure['step_name'], confidences, test_name=test['test_name'])	370 test_name=test_name)

307 else:	371 else:

	372 swarming_task, try_job = self._GetSwarmingTaskAndTryJobForFailure(

	373 step_name, None, heuristic_analysis.failure_result_map,

	374 swarming_tasks, try_jobs)

308 self._PopulateResult(	375 self._PopulateResult(

309 results, build, heuristic_analysis.failure_result_map,	376 results, build, heuristic_analysis.failure_type, failure,

310 heuristic_analysis.failure_type, failure, failure['step_name'],	377 step_name, confidences, swarming_task, try_job)

311 confidences)

312	378

313 @endpoints.method(	379 @endpoints.method(

314 _BuildFailureCollection, _BuildFailureAnalysisResultCollection,	380 _BuildFailureCollection, _BuildFailureAnalysisResultCollection,

315 path='buildfailure', name='buildfailure')	381 path='buildfailure', name='buildfailure')

316 def AnalyzeBuildFailures(self, request):	382 def AnalyzeBuildFailures(self, request):

317 """Returns analysis results for the given build failures in the request.	383 """Returns analysis results for the given build failures in the request.

318	384

319 Analysis of build failures will be triggered automatically on demand.	385 Analysis of build failures will be triggered automatically on demand.

320	386

321 Args:	387 Args:

(...skipping 66 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
388	454

389 logging.info('Flake: %s', CreateFlakeAnalysisRequest(request))	455 logging.info('Flake: %s', CreateFlakeAnalysisRequest(request))

390 analysis_triggered = flake_analysis_service.ScheduleAnalysisForFlake(	456 analysis_triggered = flake_analysis_service.ScheduleAnalysisForFlake(

391 CreateFlakeAnalysisRequest(request), user_email, is_admin,	457 CreateFlakeAnalysisRequest(request), user_email, is_admin,

392 triggering_sources.FINDIT_API)	458 triggering_sources.FINDIT_API)

393	459

394 if analysis_triggered is None:	460 if analysis_triggered is None:

395 raise endpoints.UnauthorizedException(	461 raise endpoints.UnauthorizedException(

396 'No permission for a new analysis! User is %s' % user_email)	462 'No permission for a new analysis! User is %s' % user_email)

397	463

398 return _FlakeAnalysis(analysis_triggered=analysis_triggered)	464 return _FlakeAnalysis(analysis_triggered=analysis_triggered)

OLD	NEW

« no previous file with comments | « no previous file | appengine/findit/test/findit_api_test.py » ('j') | no next file with comments »