appengine/findit/findit_api.py - Issue 2439553002: [Findit] Reduce redundant ndb reads by querying necessary entities ahead of time and share them amo…

Side by Side Diff: appengine/findit/findit_api.py

Issue 2439553002: [Findit] Reduce redundant ndb reads by querying necessary entities ahead of time and share them amo… (Closed)

Patch Set: fix a bug on confidence score. Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2015 The Chromium Authors. All rights reserved.	1 # Copyright 2015 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 """This module is to provide Findit service APIs through Cloud Endpoints:	5 """This module is to provide Findit service APIs through Cloud Endpoints:

6	6

7 Current APIs include:	7 Current APIs include:

8 1. Analysis of compile/test failures in Chromium waterfalls.	8 1. Analysis of compile/test failures in Chromium waterfalls.

9 Analyzes failures and detects suspected CLs.	9 Analyzes failures and detects suspected CLs.

10 2. Analysis of flakes on Commit Queue.	10 2. Analysis of flakes on Commit Queue.

11 """	11 """

12	12

	13 from collections import defaultdict

13 import json	14 import json

14 import logging	15 import logging

15 import pickle	16 import pickle

16	17

17 import endpoints	18 import endpoints

18 from google.appengine.api import taskqueue	19 from google.appengine.api import taskqueue

19 from protorpc import messages	20 from protorpc import messages

20 from protorpc import remote	21 from protorpc import remote

21	22

22 from common import appengine_util	23 from common import appengine_util

23 from common import auth_util	24 from common import auth_util

24 from common import constants	25 from common import constants

25 from common import time_util	26 from common import time_util

26 from common.waterfall import failure_type	27 from common.waterfall import failure_type

27 from model import analysis_approach_type	28 from model import analysis_approach_type

	29 from model import analysis_status

28 from model.flake.flake_analysis_request import FlakeAnalysisRequest	30 from model.flake.flake_analysis_request import FlakeAnalysisRequest

29 from model.suspected_cl_confidence import SuspectedCLConfidence	31 from model.suspected_cl_confidence import SuspectedCLConfidence

30 from model.wf_analysis import WfAnalysis	32 from model.wf_analysis import WfAnalysis

31 from model.wf_suspected_cl import WfSuspectedCL	33 from model.wf_suspected_cl import WfSuspectedCL

32 from model.wf_swarming_task import WfSwarmingTask	34 from model.wf_swarming_task import WfSwarmingTask

33 from model.wf_try_job import WfTryJob	35 from model.wf_try_job import WfTryJob

34 from waterfall import build_util	36 from waterfall import build_util

35 from waterfall import buildbot	37 from waterfall import buildbot

36 from waterfall import suspected_cl_util	38 from waterfall import suspected_cl_util

37 from waterfall import waterfall_config	39 from waterfall import waterfall_config

(...skipping 123 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
161 cl = WfSuspectedCL.Get(repo_name, revision)	163 cl = WfSuspectedCL.Get(repo_name, revision)

162 if not cl:	164 if not cl:

163 return None, None	165 return None, None

164	166

165 master_name = buildbot.GetMasterNameFromUrl(build.master_url)	167 master_name = buildbot.GetMasterNameFromUrl(build.master_url)

166 builder_name = build.builder_name	168 builder_name = build.builder_name

167 current_build = build.build_number	169 current_build = build.build_number

168	170

169 # If the CL is found by a try job, only the first failure will be recorded.	171 # If the CL is found by a try job, only the first failure will be recorded.

170 # So we might need to go to the first failure to get CL information.	172 # So we might need to go to the first failure to get CL information.

171 build_info = (cl.GetBuildInfo(master_name, builder_name, current_build) or	173 build_info = cl.GetBuildInfo(master_name, builder_name, current_build)

172 cl.GetBuildInfo(master_name, builder_name, first_failure))	174 first_build_info = cl.GetBuildInfo(master_name, builder_name, first_failure)

173	175 return suspected_cl_util.GetSuspectedCLConfidenceScoreAndApproach(

174 confidence = suspected_cl_util.GetSuspectedCLConfidenceScore(	176 confidences, build_info, first_build_info)

175 confidences, build_info)

176

177 cl_approach = (

178 _AnalysisApproach.TRY_JOB if analysis_approach_type.TRY_JOB in

179 build_info['approaches'] else _AnalysisApproach.HEURISTIC)

180

181 return confidence, cl_approach

182	177

183 def _GenerateBuildFailureAnalysisResult(	178 def _GenerateBuildFailureAnalysisResult(

184 self, build, suspected_cls_in_result, step_name, first_failure, test_name,	179 self, build, suspected_cls_in_result, step_name, first_failure, test_name,

185 analysis_approach, confidences, try_job_status, is_flaky_test):	180 analysis_approach, confidences, try_job_status, is_flaky_test):

186	181

187 suspected_cls = []	182 suspected_cls = []

188 for suspected_cl in suspected_cls_in_result:	183 for suspected_cl in suspected_cls_in_result:

189 repo_name = suspected_cl['repo_name']	184 repo_name = suspected_cl['repo_name']

190 revision = suspected_cl['revision']	185 revision = suspected_cl['revision']

191 commit_position = suspected_cl['commit_position']	186 commit_position = suspected_cl['commit_position']

192 confidence, cl_approach = self._GetConfidenceAndApproachForCL(	187 confidence, cl_approach = self._GetConfidenceAndApproachForCL(

193 repo_name, revision, confidences, build, first_failure)	188 repo_name, revision, confidences, build, first_failure)

194 cl_approach = cl_approach or analysis_approach	189 if cl_approach:

	190 cl_approach = (

	191 _AnalysisApproach.HEURISTIC if

	192 cl_approach == analysis_approach_type.HEURISTIC else

	193 _AnalysisApproach.TRY_JOB)

	194 else:

	195 cl_approach = analysis_approach

195	196

196 suspected_cls.append(_SuspectedCL(	197 suspected_cls.append(_SuspectedCL(

197 repo_name=repo_name, revision=revision,	198 repo_name=repo_name, revision=revision,

198 commit_position=commit_position, confidence=confidence,	199 commit_position=commit_position, confidence=confidence,

199 analysis_approach=cl_approach))	200 analysis_approach=cl_approach))

200 logging.info('build_number:%s', build.build_number)

201 logging.info('step_name:%s', step_name)

202 logging.info('is_flaky_test: %s', is_flaky_test)

203	201

204 return _BuildFailureAnalysisResult(	202 return _BuildFailureAnalysisResult(

205 master_url=build.master_url,	203 master_url=build.master_url,

206 builder_name=build.builder_name,	204 builder_name=build.builder_name,

207 build_number=build.build_number,	205 build_number=build.build_number,

208 step_name=step_name,	206 step_name=step_name,

209 is_sub_test=test_name is not None,	207 is_sub_test=test_name is not None,

210 test_name=test_name,	208 test_name=test_name,

211 first_known_failed_build_number=first_failure,	209 first_known_failed_build_number=first_failure,

212 suspected_cls=suspected_cls,	210 suspected_cls=suspected_cls,

213 analysis_approach=analysis_approach,	211 analysis_approach=analysis_approach,

214 try_job_status=try_job_status,	212 try_job_status=try_job_status,

215 is_flaky_test=is_flaky_test)	213 is_flaky_test=is_flaky_test)

216	214

217 def _GetStatusAndCulpritFromTryJob(	215 def _GetStatusAndCulpritFromTryJob(

218 self, try_job_map, build_failure_type, step_name, test_name=None):	216 self, try_job, swarming_task, build_failure_type, step_name,

	217 test_name=None):

219 """Returns the culprit found by try-job for the given step or test."""	218 """Returns the culprit found by try-job for the given step or test."""

220 if not try_job_map:

221 return _TryJobStatus.FINISHED, None

222	219

223 if test_name is None:	220 if swarming_task and swarming_task.status in (

224 try_job_key = try_job_map.get(step_name)	221 analysis_status.PENDING, analysis_status.RUNNING):

225 else:	222 return _TryJobStatus.RUNNING, None

226 try_job_key = try_job_map.get(step_name, {}).get(test_name)

227	223

228 if not try_job_key:

229 return _TryJobStatus.FINISHED, None

230

231 try_job = WfTryJob.Get(*build_util.GetBuildInfoFromId(try_job_key))

232 if not try_job or try_job.failed:	224 if not try_job or try_job.failed:

233 return _TryJobStatus.FINISHED, None	225 return _TryJobStatus.FINISHED, None

234	226

235 if not try_job.completed:	227 if not try_job.completed:

236 return _TryJobStatus.RUNNING, None	228 return _TryJobStatus.RUNNING, None

237	229

238 if build_failure_type == failure_type.COMPILE:	230 if build_failure_type == failure_type.COMPILE:

239 if not try_job.compile_results: # pragma: no cover.	231 if not try_job.compile_results: # pragma: no cover.

240 return _TryJobStatus.FINISHED, None	232 return _TryJobStatus.FINISHED, None

241 return (	233 return (

242 _TryJobStatus.FINISHED,	234 _TryJobStatus.FINISHED,

243 try_job.compile_results[-1].get('culprit', {}).get(step_name))	235 try_job.compile_results[-1].get('culprit', {}).get(step_name))

244	236

245 if not try_job.test_results: # pragma: no cover.	237 if not try_job.test_results: # pragma: no cover.

246 return _TryJobStatus.FINISHED, None	238 return _TryJobStatus.FINISHED, None

247	239

248 if test_name is None:	240 if test_name is None:

249 step_info = try_job.test_results[-1].get('culprit', {}).get(step_name)	241 step_info = try_job.test_results[-1].get('culprit', {}).get(step_name)

250 if not step_info or step_info.get('tests'): # pragma: no cover.	242 if not step_info or step_info.get('tests'): # pragma: no cover.

251 # TODO(chanli): For some steps like checkperms/sizes/etc, the culprit	243 # TODO(chanli): For some steps like checkperms/sizes/etc, the culprit

252 # finding try-job might have test-level results.	244 # finding try-job might have test-level results.

253 return _TryJobStatus.FINISHED, None	245 return _TryJobStatus.FINISHED, None

254 return _TryJobStatus.FINISHED, step_info	246 return _TryJobStatus.FINISHED, step_info

255	247

256 task = WfSwarmingTask.Get(*build_util.GetBuildInfoFromId(try_job_key),	248 ref_name = (swarming_task.parameters.get('ref_name') if swarming_task and

257 step_name=step_name)	249 swarming_task.parameters else None)

258 ref_name = (task.parameters.get('ref_name') if task and task.parameters

259 else None)

260 return (	250 return (

261 _TryJobStatus.FINISHED, try_job.test_results[-1].get('culprit', {}).get(	251 _TryJobStatus.FINISHED, try_job.test_results[-1].get('culprit', {}).get(

262 ref_name or step_name, {}).get('tests', {}).get(test_name))	252 ref_name or step_name, {}).get('tests', {}).get(test_name))

263	253

264 def _CheckIsFlaky(self, try_job_map, step_name, test_name):	254 def _CheckIsFlaky(self, swarming_task, test_name):

265 """Checks if the test is flaky."""	255 """Checks if the test is flaky."""

266 if not try_job_map or not test_name:

267 return False

268

269 try_job_key = try_job_map.get(step_name, {}).get(test_name)

270 if not try_job_key:

271 return False

272

273 swarming_task = WfSwarmingTask.Get(

274 *build_util.GetBuildInfoFromId(try_job_key), step_name=step_name)

275 if not swarming_task or not swarming_task.classified_tests:	256 if not swarming_task or not swarming_task.classified_tests:

276 return False	257 return False

277	258

278 return test_name in swarming_task.classified_tests.get('flaky_tests', [])	259 return test_name in swarming_task.classified_tests.get('flaky_tests', [])

279	260

280 def _PopulateResult(	261 def _PopulateResult(

281 self, results, build, try_job_map, build_failure_type,	262 self, results, build, build_failure_type,heuristic_result, step_name,

282 heuristic_result, step_name, confidences, test_name=None):	263 confidences, swarming_task, try_job, test_name=None):

283 """Appends an analysis result for the given step or test.	264 """Appends an analysis result for the given step or test.

284	265

285 Try-job results are always given priority over heuristic results.	266 Try-job results are always given priority over heuristic results.

286 """	267 """

287 # Default to heuristic analysis.	268 # Default to heuristic analysis.

288 suspected_cls = heuristic_result['suspected_cls']	269 suspected_cls = heuristic_result['suspected_cls']

289 analysis_approach = _AnalysisApproach.HEURISTIC	270 analysis_approach = _AnalysisApproach.HEURISTIC

290	271

291 # Check if the test is flaky.	272 # Check if the test is flaky.

292 is_flaky_test = self._CheckIsFlaky(try_job_map, step_name, test_name)	273 is_flaky_test = self._CheckIsFlaky(swarming_task, test_name)

293	274

294 if is_flaky_test:	275 if is_flaky_test:

295 suspected_cls = []	276 suspected_cls = []

296 try_job_status = _TryJobStatus.FINISHED # There will be no try job.	277 try_job_status = _TryJobStatus.FINISHED # There will be no try job.

297 else:	278 else:

298 # Check analysis result from try-job.	279 # Check analysis result from try-job.

299 try_job_status, culprit = self._GetStatusAndCulpritFromTryJob(	280 try_job_status, culprit = self._GetStatusAndCulpritFromTryJob(

300 try_job_map, build_failure_type, step_name, test_name=test_name)	281 try_job, swarming_task, build_failure_type, step_name,

	282 test_name=test_name)

301 if culprit:	283 if culprit:

302 suspected_cls = [culprit]	284 suspected_cls = [culprit]

303 analysis_approach = _AnalysisApproach.TRY_JOB	285 analysis_approach = _AnalysisApproach.TRY_JOB

304	286 if (not is_flaky_test and not suspected_cls and

305 if not is_flaky_test and not suspected_cls:	287 not try_job_status == _TryJobStatus.RUNNING):

306 return	288 return

307	289

308 results.append(self._GenerateBuildFailureAnalysisResult(	290 results.append(self._GenerateBuildFailureAnalysisResult(

309 build, suspected_cls, step_name, heuristic_result['first_failure'],	291 build, suspected_cls, step_name, heuristic_result['first_failure'],

310 test_name, analysis_approach, confidences, try_job_status,	292 test_name, analysis_approach, confidences, try_job_status,

311 is_flaky_test))	293 is_flaky_test))

312	294

	295 def _GetAllSwarmingTasks(self, failure_result_map):

	296 """Returns all swarming tasks related to one build.

	297

	298 Args:

	299 A dict to map each step/test with the key to the build when it failed the

	300 first time.

	301 {

	302 'step1': 'm/b/1',

	303 'step2': {

	304 'test1': 'm/b/1',

	305 'test2': 'm/b/2'

	306 }

	307 }

	308

	309 Returns:

	310 A dict of swarming tasks like below:

	311 {

	312 'step1': {

	313 'm/b/1': WfSwarmingTask(

	314 key=Key('WfBuild', 'm/b/1', 'WfSwarmingTask', 'step1'),...)

	315 },

	316 ...

	317 }

	318 """

	319 if not failure_result_map:

	320 return {}

	321

	322 swarming_tasks = defaultdict(dict)

	323 for step_name, step_map in failure_result_map.iteritems():

	324 if isinstance(step_map, basestring):

	325 swarming_tasks[step_name][step_map] = (

	326 WfSwarmingTask.Get(

	327 *build_util.GetBuildInfoFromId(step_map), step_name=step_name))

	328 else:

	329 for task_key in step_map.values():

	330 if not swarming_tasks[step_name].get(task_key):

	331 swarming_tasks[step_name][task_key] = (

	332 WfSwarmingTask.Get(*build_util.GetBuildInfoFromId(task_key),

	333 step_name=step_name))

	334

	335 return swarming_tasks

	336

	337 def _GetAllTryJobs(self, failure_result_map):

	338 """Returns all try jobs related to one build.

	339

	340 Args:

	341 A dict to map each step/test with the key to the build when it failed the

	342 first time.

	343 {

	344 'step1': 'm/b/1',

	345 'step2': {

	346 'test1': 'm/b/1',

	347 'test2': 'm/b/2'

	348 }

	349 }

	350

	351 Returns:

	352 A dict of try jobs like below:

	353 {

	354 'm/b/1': WfTryJob(

	355 key=Key('WfBuild', 'm/b/1'),...)

	356 ...

	357 }

	358 """

	359 if not failure_result_map:

	360 return {}

	361

	362 try_jobs = {}

	363 for step_map in failure_result_map.values():

	364 if isinstance(step_map, basestring):

	365 try_jobs[step_map] = WfTryJob.Get(*step_map.split('/'))

	366 else:

	367 for task_key in step_map.values():

	368 if not try_jobs.get(task_key):

	369 try_jobs[task_key] = WfTryJob.Get(*task_key.split('/'))

	370

	371 return try_jobs

	372

	373 def _GetSwarmingTaskAndTryJobForFailure(

	374 self, step_name, test_name, failure_result_map, swarming_tasks, try_jobs):

	375 """Gets swarming task and try job for the specific step/test."""

	376 if not failure_result_map:

	377 return None, None

	378

	379 if test_name:

	380 try_job_key = failure_result_map.get(step_name, {}).get(test_name)

	381 else:

	382 try_job_key = failure_result_map.get(step_name)

	383

	384 # Gets the swarming task for the test.

	385 swarming_task = swarming_tasks.get(step_name, {}).get(try_job_key)

	386

	387 # Get the try job for the step/test.

	388 try_job = try_jobs.get(try_job_key)

	389

	390 return swarming_task, try_job

	391

313 def _GenerateResultsForBuild(	392 def _GenerateResultsForBuild(

314 self, build, heuristic_analysis, results, confidences):	393 self, build, heuristic_analysis, results, confidences):

	394

	395 swarming_tasks = self._GetAllSwarmingTasks(

	396 heuristic_analysis.failure_result_map)

	397 try_jobs = self._GetAllTryJobs(heuristic_analysis.failure_result_map)

	398

315 for failure in heuristic_analysis.result['failures']:	399 for failure in heuristic_analysis.result['failures']:

	400 step_name = failure['step_name']

316 if failure.get('tests'): # Test-level analysis.	401 if failure.get('tests'): # Test-level analysis.

317 for test in failure['tests']:	402 for test in failure['tests']:

	403 test_name = test['test_name']

	404 swarming_task, try_job = self._GetSwarmingTaskAndTryJobForFailure(

	405 step_name, test_name, heuristic_analysis.failure_result_map,

	406 swarming_tasks, try_jobs)

	407

318 self._PopulateResult(	408 self._PopulateResult(

319 results, build, heuristic_analysis.failure_result_map,	409 results, build, heuristic_analysis.failure_type, test,

320 heuristic_analysis.failure_type, test,	410 step_name, confidences, swarming_task, try_job,

321 failure['step_name'], confidences, test_name=test['test_name'])	411 test_name=test_name)

322 else:	412 else:

	413 swarming_task, try_job = self._GetSwarmingTaskAndTryJobForFailure(

	414 step_name, None, heuristic_analysis.failure_result_map,

	415 swarming_tasks, try_jobs)

323 self._PopulateResult(	416 self._PopulateResult(

324 results, build, heuristic_analysis.failure_result_map,	417 results, build, heuristic_analysis.failure_type, failure,

325 heuristic_analysis.failure_type, failure, failure['step_name'],	418 step_name, confidences, swarming_task, try_job)

326 confidences)

327	419

328 @endpoints.method(	420 @endpoints.method(

329 _BuildFailureCollection, _BuildFailureAnalysisResultCollection,	421 _BuildFailureCollection, _BuildFailureAnalysisResultCollection,

330 path='buildfailure', name='buildfailure')	422 path='buildfailure', name='buildfailure')

331 def AnalyzeBuildFailures(self, request):	423 def AnalyzeBuildFailures(self, request):

332 """Returns analysis results for the given build failures in the request.	424 """Returns analysis results for the given build failures in the request.

333	425

334 Analysis of build failures will be triggered automatically on demand.	426 Analysis of build failures will be triggered automatically on demand.

335	427

336 Args:	428 Args:

337 request (_BuildFailureCollection): A list of build failures.	429 request (_BuildFailureCollection): A list of build failures.

338	430

339 Returns:	431 Returns:

340 _BuildFailureAnalysisResultCollection	432 _BuildFailureAnalysisResultCollection

341 A list of analysis results for the given build failures.	433 A list of analysis results for the given build failures.

342 """	434 """

343 results = []	435 results = []

344 supported_builds = []	436 supported_builds = []

345 confidences = SuspectedCLConfidence.Get()	437 confidences = SuspectedCLConfidence.Get()

346	438

347 for build in request.builds:	439 for build in request.builds:

348 logging.info('%s/%s/%s', build.master_url, build.builder_name, build.build _number)

349 master_name = buildbot.GetMasterNameFromUrl(build.master_url)	440 master_name = buildbot.GetMasterNameFromUrl(build.master_url)

350 if not (master_name and waterfall_config.MasterIsSupported(master_name)):	441 if not (master_name and waterfall_config.MasterIsSupported(master_name)):

351 logging.info('%s/%s/%s is not supported',	442 logging.info('%s/%s/%s is not supported',

352 build.master_url, build.builder_name, build.build_number)	443 build.master_url, build.builder_name, build.build_number)

353 continue	444 continue

354	445

355 supported_builds.append({	446 supported_builds.append({

356 'master_name': master_name,	447 'master_name': master_name,

357 'builder_name': build.builder_name,	448 'builder_name': build.builder_name,

358 'build_number': build.build_number,	449 'build_number': build.build_number,

(...skipping 52 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
411	502

412 try:	503 try:

413 _AsyncProcessFlakeReport(flake_analysis_request, user_email, is_admin)	504 _AsyncProcessFlakeReport(flake_analysis_request, user_email, is_admin)

414 queued = True	505 queued = True

415 except Exception:	506 except Exception:

416 # Ignore the report when fail to queue it for async processing.	507 # Ignore the report when fail to queue it for async processing.

417 queued = False	508 queued = False

418 logging.exception('Failed to queue flake report for async processing')	509 logging.exception('Failed to queue flake report for async processing')

419	510

420 return _FlakeAnalysis(queued=queued)	511 return _FlakeAnalysis(queued=queued)

OLD	NEW

« no previous file with comments | « no previous file | appengine/findit/test/findit_api_test.py » ('j') | appengine/findit/waterfall/suspected_cl_util.py » ('J')