appengine/findit/crash/crash_pipeline.py - Issue 2455053004: Moving ScheduleNewAnalysis to break the cycle - Code Review

Chromium Code Reviews

chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out

(154)

My Issues | Starred Open | Closed | All

Side by Side Diff: appengine/findit/crash/crash_pipeline.py

Issue 2455053004: Moving ScheduleNewAnalysis to break the cycle (Closed)

Patch Set: rebase & adding some tests Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « no previous file | appengine/findit/crash/findit.py » ('j') | appengine/findit/handlers/crash/crash_handler.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import copy	5 import copy

6 import json	6 import json

7 import logging	7 import logging

8	8

9 from common import appengine_util	9 from common import appengine_util

10 from common import pubsub_util	10 from common import pubsub_util

11 from common import time_util	11 from common import time_util

12 from common.http_client_appengine import HttpClientAppengine	12 from common.http_client_appengine import HttpClientAppengine

13 from common.pipeline_wrapper import BasePipeline	13 from common.pipeline_wrapper import BasePipeline

14 from common.pipeline_wrapper import pipeline	14 from common.pipeline_wrapper import pipeline

15 from crash import findit_for_chromecrash	15 from crash import findit_for_chromecrash

16 from crash import findit_for_clusterfuzz	16 from crash import findit_for_clusterfuzz

17 from crash.type_enums import CrashClient	17 from crash.type_enums import CrashClient

18 from lib.gitiles import gitiles_repository	18 from lib.gitiles import gitiles_repository

19 from model import analysis_status	19 from model import analysis_status

20	20

21	21

22 # TODO(http://crbug.com/659346): write complete coverage tests for this.	22 # TODO(http://crbug.com/659346): this needs complete coverage tests.

23 def FinditForClientID(client_id): # pragma: no cover	23 def FinditForClientID(client_id):

24 """Construct a Findit object from a client id string specifying the class.	24 """Construct a Findit object from a client id string specifying the class.

25	25

26 We cannot pass Findit objects to the various methods in	26 We cannot pass Findit objects to the various methods in

27 ``crash.crash_pipeline``, because they are not JSON serializable. For	27 ``crash.crash_pipeline``, because they are not JSON serializable. For

28 now, we just serialize Findit objects as their ``client_id``, and then	28 now, we just serialize Findit objects as their ``client_id``, and then

29 use this function to reconstruct them. Alas, this means we will lose	29 use this function to reconstruct them. Alas, this means we will lose

30 various other information stored in the Findit object (i.e., stuff that	30 various other information stored in the Findit object (i.e., stuff that

31 comes from CrashConfig); which could lead to some hard-to-diagnose	31 comes from CrashConfig); which could lead to some hard-to-diagnose

32 coherency bugs, since the new Findit object will be based on the	32 coherency bugs, since the new Findit object will be based on the

33 CrashConfig at the time it's constructed, which may be different	33 CrashConfig at the time it's constructed, which may be different

34 than the CrashConfig at the time the previous Findit object was	34 than the CrashConfig at the time the previous Findit object was

35 constructed. In the future we should fix all this to serialize Findit	35 constructed. In the future we should fix all this to serialize Findit

36 objects in a more robust way.	36 objects in a more robust way.

37 """	37 """

38 assert isinstance(client_id, (str, unicode)), (	38 assert isinstance(client_id, (str, unicode)), (

39 'FinditForClientID: expected string or unicode, but got %s'	39 'FinditForClientID: expected string or unicode, but got %s'

40 % client_id.__class__.__name__)	40 % client_id.__class__.__name__)

41 # TODO(wrengr): it'd be nice to replace this with a single lookup in	41 # TODO(wrengr): it'd be nice to replace this with a single lookup in

42 # a dict; but that's buggy for some unknown reason.	42 # a dict; but that's buggy for some unknown reason.

43 if client_id == CrashClient.FRACAS:	43 if client_id == CrashClient.FRACAS:

44 cls = findit_for_chromecrash.FinditForFracas	44 cls = findit_for_chromecrash.FinditForFracas

45 elif client_id == CrashClient.CRACAS:	45 elif client_id == CrashClient.CRACAS: # pragma: no cover

46 cls = findit_for_chromecrash.FinditForCracas	46 cls = findit_for_chromecrash.FinditForCracas

47 elif client_id == CrashClient.CLUSTERFUZZ:	47 elif client_id == CrashClient.CLUSTERFUZZ: # pragma: no cover

48 cls = findit_for_clusterfuzz.FinditForClusterfuzz	48 cls = findit_for_clusterfuzz.FinditForClusterfuzz

49 else:	49 else: # pragma: no cover

50 raise ValueError('FinditForClientID: '	50 raise ValueError('FinditForClientID: '

51 'unknown or unsupported client %s' % client_id)	51 'unknown or unsupported client %s' % client_id)

52	52

53 return cls(	53 return cls(gitiles_repository.GitilesRepository(

54 gitiles_repository.GitilesRepository(http_client=HttpClientAppengine()),	54 http_client=HttpClientAppengine()))

55 CrashWrapperPipeline)

56	55

57	56

58 # Some notes about the classes below, for people who are not	57 # Some notes about the classes below, for people who are not

59 # familiar with AppEngine. The thing that really kicks everything off	58 # familiar with AppEngine. The thing that really kicks everything off

60 # is ``CrashWrapperPipeline.run``. However, an important thing to bear in	59 # is ``CrashWrapperPipeline.run``. However, an important thing to bear in

61 # mind is that whatever arguments are passed to that method will also	60 # mind is that whatever arguments are passed to that method will also

62 # be passed to the ``run`` method on whatever objects it yields. Thus,	61 # be passed to the ``run`` method on whatever objects it yields. Thus,

63 # all the ``run`` methods across these different classes must have	62 # all the ``run`` methods across these different classes must have

64 # the same type. In practice, we end up passing all the arguments to the	63 # the same type. In practice, we end up passing all the arguments to the

65 # constructors, because we need to have the fields around for logging	64 # constructors, because we need to have the fields around for logging

66 # (e.g., in ``finalized``); thus, there's nothing that needs to be passed	65 # (e.g., in ``finalized``); thus, there's nothing that needs to be passed

67 # to ``run``. Another thing to bear in mind is that whatever objects	66 # to ``run``. However, for some inscrutable reason, whatever arguments

	67 # are passed to the constructors will also be passed to ``run`` (it's

	68 # unclear what sort of terrible introspection AppEngine does in order to
	stgao 2016/11/02 01:28:27 It is not AppEngine, but the AppEngine Pipeline fr It is not AppEngine, but the AppEngine Pipeline framework https://github.com/GoogleCloudPlatform/appengine-pipelines/wiki/Python That's by design in the AppEngine Pipeline. One reason I could think of is that users of the framework don't have to create constructor unless other functions like ``finalized`` also need to access those parameters. wrengr 2016/11/02 23:52:26 Acknowledged. Show quoted text On 2016/11/02 01:28:27, stgao (slow on Monday) wrote: > It is not AppEngine, but the AppEngine Pipeline framework > https://github.com/GoogleCloudPlatform/appengine-pipelines/wiki/Python > > That's by design in the AppEngine Pipeline. One reason I could think of is that > users of the framework don't have to create constructor unless other functions > like ``finalized`` also need to access those parameters. Acknowledged.
	69 # figure this out, but no matter); thus, even though the ``run`` methods

	70 # don't need them (and don't want them) they must still accept a bunch

	71 # of arguments. Another thing to bear in mind is that whatever objects

68 # ``CrashWrapperPipeline.run`` yields must be JSON-serializable. The base	72 # ``CrashWrapperPipeline.run`` yields must be JSON-serializable. The base

69 # class handles most of that for us, so the force of this constraint is	73 # class handles most of that for us, so the force of this constraint is

70 # that all the arguments to the constructors for those classes must be	74 # that all the arguments to the constructors for those classes must be

71 # JSON-serializable. Thus, we cannot actually pass a Findit object to	75 # JSON-serializable. Thus, we cannot actually pass a Findit object to the

72 # the constructor, but rather must pass only the ``client_id`` (or whatever	76 # constructor, but rather must pass only the ``client_id`` (or whatever

73 # JSON dict) and then reconstruct the Findit object from that. Moreover,	77 # JSON dict) and then reconstruct the Findit object from that. Moreover,

74 # the ``run`` method and the ``finalized`` method will be run in different	78 # the ``run`` method and the ``finalized`` method will be run in different

75 # processes, so we will actually end up reconstructing the Findit object	79 # processes, so we will actually end up reconstructing the Findit object

76 # twice. Thus, we shouldn't store anything in the pipeline objects outside	80 # twice. Thus, we shouldn't store anything in the pipeline objects outside

77 # of what their constructors store.	81 # of what their constructors store.

78	82

79 class CrashBasePipeline(BasePipeline):	83 class CrashBasePipeline(BasePipeline):

80 def __init__(self, client_id, crash_identifiers):	84 def __init__(self, client_id, crash_identifiers):

81 super(CrashBasePipeline, self).__init__(client_id, crash_identifiers)	85 super(CrashBasePipeline, self).__init__(client_id, crash_identifiers)

82 self._crash_identifiers = crash_identifiers	86 self._crash_identifiers = crash_identifiers

83 self._findit = FinditForClientID(client_id)	87 self._findit = FinditForClientID(client_id)

84	88

85 @property	89 @property

86 def client_id(self): # pragma: no cover	90 def client_id(self): # pragma: no cover

87 return self._findit.client_id	91 return self._findit.client_id

88	92

89 def run(self, args, *kwargs):	93 def run(self, args, *kwargs):

90 raise NotImplementedError()	94 raise NotImplementedError()

91	95

92	96

93 class CrashAnalysisPipeline(CrashBasePipeline):	97 class CrashAnalysisPipeline(CrashBasePipeline):

94 def finalized(self): # pragma: no cover	98 def finalized(self):

95 if self.was_aborted:	99 if self.was_aborted: # pragma: no cover

96 self._PutAbortedError()	100 self._PutAbortedError()

97	101

98 # N.B., this method must be factored out for unittest reasons; since	102 # N.B., this method must be factored out for unittest reasons; since

99 # ``finalized`` takes no arguments (by AppEngine's spec) and	103 # ``finalized`` takes no arguments (by AppEngine's spec) and

100 # ``was_aborted`` can't be altered directly.	104 # ``was_aborted`` can't be altered directly.

101 def _PutAbortedError(self):	105 def _PutAbortedError(self):

102 """Update the ndb.Model to indicate that this pipeline was aborted."""	106 """Update the ndb.Model to indicate that this pipeline was aborted."""

103 logging.error('Aborted analysis for %s', repr(self._crash_identifiers))	107 logging.error('Aborted analysis for %s', repr(self._crash_identifiers))

104 analysis = self._findit.GetAnalysis(self._crash_identifiers)	108 analysis = self._findit.GetAnalysis(self._crash_identifiers)

105 analysis.status = analysis_status.ERROR	109 analysis.status = analysis_status.ERROR

106 analysis.put()	110 analysis.put()

107	111

108 # TODO(http://crbug.com/659346): we misplaced the coverage test; find it!	112 def run(self, _args, *_kwargs):

109 # Arguments number differs from overridden method - pylint: disable=W0221

110 def run(self):

111 # TODO(wrengr): shouldn't this method somehow call _NeedsNewAnalysis	113 # TODO(wrengr): shouldn't this method somehow call _NeedsNewAnalysis

112 # to guard against race conditions?	114 # to guard against race conditions?

113 analysis = self._findit.GetAnalysis(self._crash_identifiers)	115 analysis = self._findit.GetAnalysis(self._crash_identifiers)

114	116

115 # Update the model's status to say we're in the process of doing analysis.	117 # Update the model's status to say we're in the process of doing analysis.

116 analysis.pipeline_status_path = self.pipeline_status_path()	118 analysis.pipeline_status_path = self.pipeline_status_path()

117 analysis.status = analysis_status.RUNNING	119 analysis.status = analysis_status.RUNNING

118 analysis.started_time = time_util.GetUTCNow()	120 analysis.started_time = time_util.GetUTCNow()

119 analysis.findit_version = appengine_util.GetCurrentVersion()	121 analysis.findit_version = appengine_util.GetCurrentVersion()

120 analysis.put()	122 analysis.put()

(...skipping 10 matching lines...) Expand all Loading...
131 'found_components': False,	133 'found_components': False,

132 'has_regression_range': False,	134 'has_regression_range': False,

133 'solution': None,	135 'solution': None,

134 }	136 }

135	137

136 # Update model's status to say we're done, and save the results.	138 # Update model's status to say we're done, and save the results.

137 analysis.completed_time = time_util.GetUTCNow()	139 analysis.completed_time = time_util.GetUTCNow()

138 analysis.result = result	140 analysis.result = result

139 for tag_name, tag_value in tags.iteritems():	141 for tag_name, tag_value in tags.iteritems():

140 # TODO(http://crbug.com/602702): make it possible to add arbitrary tags.	142 # TODO(http://crbug.com/602702): make it possible to add arbitrary tags.

141 if hasattr(analysis, tag_name):	143 # TODO(http://crbug.com/659346): we misplaced the coverage test; find it!

	144 if hasattr(analysis, tag_name): # pragma: no cover

142 setattr(analysis, tag_name, tag_value)	145 setattr(analysis, tag_name, tag_value)

143 analysis.status = analysis_status.COMPLETED	146 analysis.status = analysis_status.COMPLETED

144 analysis.put()	147 analysis.put()

145	148

146	149

147 class PublishResultPipeline(CrashBasePipeline):	150 class PublishResultPipeline(CrashBasePipeline):

148 # TODO(http://crbug.com/659346): we misplaced the coverage test; find it!

149 def finalized(self):	151 def finalized(self):

150 if self.was_aborted: # pragma: no cover.	152 if self.was_aborted: # pragma: no cover.

151 logging.error('Failed to publish %s analysis result for %s',	153 logging.error('Failed to publish %s analysis result for %s',

152 repr(self._crash_identifiers), self.client_id)	154 repr(self._crash_identifiers), self.client_id)

153	155

154 # Arguments number differs from overridden method - pylint: disable=W0221	156 # TODO(http://crbug.com/659346): we misplaced the coverage test; find it!

155 def run(self):	157 def run(self, _args, *_kwargs): # pragma: no cover
	Sharu Jiang 2016/11/01 23:03:24 This is a little hacky, we should at lease add doc This is a little hacky, we should at lease add doc saying we will throw out parameters because of the yield statement. stgao 2016/11/02 01:28:27 I don't understand this. Sharu, do you mind elabor Show quoted text On 2016/11/01 23:03:24, sharu jiang wrote: > This is a little hacky, we should at lease add doc saying we will throw out > parameters because of the yield statement. I don't understand this. Sharu, do you mind elaborating a little bit? Which yield? Why the yield impacts here? Sharu Jiang 2016/11/02 17:01:35 like the `yield` in #190 and #193 What `yield` doe Show quoted text On 2016/11/02 01:28:27, stgao (slow on Monday) wrote: > On 2016/11/01 23:03:24, sharu jiang wrote: > > This is a little hacky, we should at lease add doc saying we will throw out > > parameters because of the yield statement. > > I don't understand this. > Sharu, do you mind elaborating a little bit? Which yield? Why the yield impacts > here? like the `yield` in #190 and #193 What `yield` does is to construct a pipeline and pass whatever parameters of the construct function to run(_args, _kwargs). This way we can force the _args, _kwargs to be the same as in construct function. However, if we do: pipe = PublishResultPipeline(client1, crash_identifiers1) pipe.run(client2, crash_identifiers2, ...) The pipe will run using (client1, crash_identifiers1) instead of using the (client2, crash_identifiers2) passed into `run`. I think this may cause confusion, so if we are to do something like #189-#193, we should at lease add doc to notify users to use `yield` instead of creating a pipeline and call its run separately. stgao** 2016/11/02 21:22:09 Would you mind pointing me to the doc for the usag Show quoted text On 2016/11/02 17:01:35, sharu jiang wrote: > On 2016/11/02 01:28:27, stgao (slow on Monday) wrote: > > On 2016/11/01 23:03:24, sharu jiang wrote: > > > This is a little hacky, we should at lease add doc saying we will throw out > > > parameters because of the yield statement. > > > > I don't understand this. > > Sharu, do you mind elaborating a little bit? Which yield? Why the yield > impacts > > here? > > like the `yield` in #190 and #193 > What `yield` does is to construct a pipeline and pass whatever parameters of the > construct function to run(_args, _kwargs). This way we can force the _args, > _kwargs to be the same as in construct function. > > However, if we do: > pipe = PublishResultPipeline(client1, crash_identifiers1) > pipe.run(client2, crash_identifiers2, ...) > > The pipe will run using (client1, crash_identifiers1) instead of using the > (client2, crash_identifiers2) passed into `run`. I think this may cause > confusion, so if we are to do something like #189-#193, we should at lease add > doc to notify users to use `yield` instead of creating a pipeline and call its > run separately. Would you mind pointing me to the doc for the usage of calling ``run`` directly? It's new to me and I'd like to learn more. wrengr 2016/11/02 23:52:26 I agree it's a terrible hack, but it's forced on u Show quoted text On 2016/11/01 23:03:24, sharu jiang wrote: > This is a little hacky, we should at lease add doc saying we will throw out > parameters because of the yield statement. I agree it's a terrible hack, but it's forced on us by the structure of AppEngine pipelines. I'll add docstrings to reiterate the discussion from lines 57--81. Sharu Jiang 2016/11/03 23:03:25 Sorry for the late response, was busy working on m Show quoted text On 2016/11/02 23:52:26, wrengr wrote: > On 2016/11/01 23:03:24, sharu jiang wrote: > > This is a little hacky, we should at lease add doc saying we will throw out > > parameters because of the yield statement. > > I agree it's a terrible hack, but it's forced on us by the structure of > AppEngine pipelines. I'll add docstrings to reiterate the discussion from lines > 57--81. Sorry for the late response, was busy working on my several pending cls, the doc is in here: https://sookocheff.com/post/appengine/pipelines/connecting-pipelines/ Saying: "The rule of thumb here is that anything you instantiate your pipeline with (and subsequently pass to the run method) is accessible within your pipeline. These are called immediate values and you can treat them as regular Python values." Sharu Jiang** 2016/11/04 00:19:00 Calling ``run`` directly is not a correct usage fo Show quoted text On 2016/11/03 23:03:25, Sharu Jiang wrote: > On 2016/11/02 23:52:26, wrengr wrote: > > On 2016/11/01 23:03:24, sharu jiang wrote: > > > This is a little hacky, we should at lease add doc saying we will throw out > > > parameters because of the yield statement. > > > > I agree it's a terrible hack, but it's forced on us by the structure of > > AppEngine pipelines. I'll add docstrings to reiterate the discussion from > lines > > 57--81. > > Sorry for the late response, was busy working on my several pending cls, the doc > is in here: > https://sookocheff.com/post/appengine/pipelines/connecting-pipelines/ > > Saying: "The rule of thumb here is that anything you instantiate your pipeline > with (and subsequently pass to the run method) is accessible within your > pipeline. These are called immediate values and you can treat them as regular > Python values." Calling ``run`` directly is not a correct usage for a pipeline ( thus, we cannot find an example in a doc), so that's why I was suggesting to add that note in the doc to avoid that. However, if that's very obvious to all pipeline users, then it's ok to not to bother adding that note in the doc :)
156 analysis = self._findit.GetAnalysis(self._crash_identifiers)	158 analysis = self._findit.GetAnalysis(self._crash_identifiers)

157 result = analysis.ToPublishableResult(self._crash_identifiers)	159 result = analysis.ToPublishableResult(self._crash_identifiers)

158 messages_data = [json.dumps(result, sort_keys=True)]	160 messages_data = [json.dumps(result, sort_keys=True)]

159	161

160 # TODO(http://crbug.com/659354): remove Findit's dependency on CrashConfig.	162 # TODO(http://crbug.com/659354): remove Findit's dependency on CrashConfig.

161 client_config = self._findit.config	163 client_config = self._findit.config

162 # TODO(katesonia): Clean string uses in config.	164 # TODO(katesonia): Clean string uses in config.

163 topic = client_config['analysis_result_pubsub_topic']	165 topic = client_config['analysis_result_pubsub_topic']

164 pubsub_util.PublishMessagesToTopic(messages_data, topic)	166 pubsub_util.PublishMessagesToTopic(messages_data, topic)

165 logging.info('Published %s analysis result for %s', self.client_id,	167 logging.info('Published %s analysis result for %s', self.client_id,

166 repr(self._crash_identifiers))	168 repr(self._crash_identifiers))

167	169

168	170

169 class CrashWrapperPipeline(BasePipeline):	171 # TODO(http://crbug.com/659346): we misplaced the coverage test; find it!

	172 class CrashWrapperPipeline(BasePipeline): # pragma: no cover

170 """Fire off pipelines to (1) do the analysis and (2) publish results.	173 """Fire off pipelines to (1) do the analysis and (2) publish results.

171	174

172 The reason we have analysis and publishing as separate pipelines is	175 The reason we have analysis and publishing as separate pipelines is

173 because each of them can fail for independent reasons. E.g., if we	176 because each of them can fail for independent reasons. E.g., if we

174 successfully finish the analysis, but then the publishing fails due to	177 successfully finish the analysis, but then the publishing fails due to

175 network errors, we don't want to have to redo the analysis in order	178 network errors, we don't want to have to redo the analysis in order

176 to redo the publishing. We could try to cache the fact that analysis	179 to redo the publishing. We could try to cache the fact that analysis

177 succeeded in the pipeline object itself, but we'd have to be careful	180 succeeded in the pipeline object itself, but we'd have to be careful

178 because the ``run`` and ``finalized`` methods are executed in different	181 because the ``run`` and ``finalized`` methods are executed in different

179 processes.	182 processes.

180 """	183 """

181 def __init__(self, client_id, crash_identifiers):	184 def __init__(self, client_id, crash_identifiers):

182 super(CrashWrapperPipeline, self).__init__(client_id, crash_identifiers)	185 super(CrashWrapperPipeline, self).__init__(client_id, crash_identifiers)

183 self._crash_identifiers = crash_identifiers	186 self._crash_identifiers = crash_identifiers

184 self._client_id = client_id	187 self._client_id = client_id

185	188

186 # TODO(http://crbug.com/659346): write coverage tests.	189 def run(self, _args, *_kwargs):

187 # Arguments number differs from overridden method - pylint: disable=W0221

188 def run(self): # pragma: no cover

189 run_analysis = yield CrashAnalysisPipeline(	190 run_analysis = yield CrashAnalysisPipeline(

190 self._client_id, self._crash_identifiers)	191 self._client_id, self._crash_identifiers)

191 with pipeline.After(run_analysis):	192 with pipeline.After(run_analysis):

192 yield PublishResultPipeline(self._client_id, self._crash_identifiers)	193 yield PublishResultPipeline(self._client_id, self._crash_identifiers)

OLD	NEW

« no previous file with comments | « no previous file | appengine/findit/crash/findit.py » ('j') | appengine/findit/handlers/crash/crash_handler.py » ('J')

Powered by Google App Engine

This is Rietveld 408576698