Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(258)

Side by Side Diff: appengine/findit/crash/crash_pipeline.py

Issue 2455053004: Moving ScheduleNewAnalysis to break the cycle (Closed)
Patch Set: rebase Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « appengine/findit/crash/changelist_classifier.py ('k') | appengine/findit/crash/findit.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import copy 5 import copy
6 import json 6 import json
7 import logging 7 import logging
8 8
9 from common import appengine_util 9 from common import appengine_util
10 from common import pubsub_util 10 from common import pubsub_util
11 from common import time_util 11 from common import time_util
12 from common.http_client_appengine import HttpClientAppengine 12 from common.http_client_appengine import HttpClientAppengine
13 from common.pipeline_wrapper import BasePipeline 13 from common.pipeline_wrapper import BasePipeline
14 from common.pipeline_wrapper import pipeline 14 from common.pipeline_wrapper import pipeline
15 from crash import findit_for_chromecrash 15 from crash import findit_for_chromecrash
16 from crash import findit_for_clusterfuzz 16 from crash import findit_for_clusterfuzz
17 from crash.type_enums import CrashClient 17 from crash.type_enums import CrashClient
18 from lib.gitiles import gitiles_repository 18 from lib.gitiles import gitiles_repository
19 from model import analysis_status 19 from model import analysis_status
20 20
21 21
22 # TODO(http://crbug.com/659346): write complete coverage tests for this. 22 # TODO(http://crbug.com/659346): this needs complete coverage tests.
23 def FinditForClientID(client_id): # pragma: no cover 23 def FinditForClientID(client_id):
24 """Construct a Findit object from a client id string specifying the class. 24 """Construct a Findit object from a client id string specifying the class.
25 25
26 We cannot pass Findit objects to the various methods in 26 We cannot pass Findit objects to the various methods in
27 ``crash.crash_pipeline``, because they are not JSON serializable. For 27 ``crash.crash_pipeline``, because they are not JSON serializable. For
28 now, we just serialize Findit objects as their ``client_id``, and then 28 now, we just serialize Findit objects as their ``client_id``, and then
29 use this function to reconstruct them. Alas, this means we will lose 29 use this function to reconstruct them. Alas, this means we will lose
30 various other information stored in the Findit object (i.e., stuff that 30 various other information stored in the Findit object (i.e., stuff that
31 comes from CrashConfig); which could lead to some hard-to-diagnose 31 comes from CrashConfig); which could lead to some hard-to-diagnose
32 coherency bugs, since the new Findit object will be based on the 32 coherency bugs, since the new Findit object will be based on the
33 CrashConfig at the time it's constructed, which may be different 33 CrashConfig at the time it's constructed, which may be different
34 than the CrashConfig at the time the previous Findit object was 34 than the CrashConfig at the time the previous Findit object was
35 constructed. In the future we should fix all this to serialize Findit 35 constructed. In the future we should fix all this to serialize Findit
36 objects in a more robust way. 36 objects in a more robust way.
37 """ 37 """
38 assert isinstance(client_id, (str, unicode)), ( 38 assert isinstance(client_id, (str, unicode)), (
39 'FinditForClientID: expected string or unicode, but got %s' 39 'FinditForClientID: expected string or unicode, but got %s'
40 % client_id.__class__.__name__) 40 % client_id.__class__.__name__)
41 # TODO(wrengr): it'd be nice to replace this with a single lookup in 41 # TODO(wrengr): it'd be nice to replace this with a single lookup in
42 # a dict; but that's buggy for some unknown reason. 42 # a dict; but that's buggy for some unknown reason.
43 if client_id == CrashClient.FRACAS: 43 if client_id == CrashClient.FRACAS:
44 cls = findit_for_chromecrash.FinditForFracas 44 cls = findit_for_chromecrash.FinditForFracas
45 elif client_id == CrashClient.CRACAS: 45 elif client_id == CrashClient.CRACAS: # pragma: no cover
46 cls = findit_for_chromecrash.FinditForCracas 46 cls = findit_for_chromecrash.FinditForCracas
47 elif client_id == CrashClient.CLUSTERFUZZ: 47 elif client_id == CrashClient.CLUSTERFUZZ: # pragma: no cover
48 cls = findit_for_clusterfuzz.FinditForClusterfuzz 48 cls = findit_for_clusterfuzz.FinditForClusterfuzz
49 else: 49 else: # pragma: no cover
50 raise ValueError('FinditForClientID: ' 50 raise ValueError('FinditForClientID: '
51 'unknown or unsupported client %s' % client_id) 51 'unknown or unsupported client %s' % client_id)
52 52
53 return cls( 53 return cls(gitiles_repository.GitilesRepository(
54 gitiles_repository.GitilesRepository(http_client=HttpClientAppengine()), 54 http_client=HttpClientAppengine()))
55 CrashWrapperPipeline)
56 55
57 56
58 # Some notes about the classes below, for people who are not 57 # Some notes about the classes below, for people who are not familiar
59 # familiar with AppEngine. The thing that really kicks everything off 58 # with AppEngine pipelines:
60 # is ``CrashWrapperPipeline.run``. However, an important thing to bear in 59 #
61 # mind is that whatever arguments are passed to that method will also 60 # The pipeline library is designed in a strange way which requires that
stgao 2016/11/03 04:48:10 It might be just me, but "strange" seems a little
wrengr 2016/11/03 17:40:34 I've done a lot of software engineering on a lot o
stgao 2016/11/03 18:48:30 I would say "special" instead. But it's fine as is
wrengr 2016/11/03 20:04:21 filed http://crbug.com/662145
62 # be passed to the ``run`` method on whatever objects it yields. Thus, 61 # all the ``__init__`` and ``run`` methods in this file take the exact
63 # all the ``run`` methods across these different classes must have 62 # same arguments. This arises from the interaction between a few
64 # the same type. In practice, we end up passing all the arguments to the 63 # different constraints. First, for any given pipeline, its ``__init__``
65 # constructors, because we need to have the fields around for logging 64 # and ``run`` must take the same arguments. Second, all the objects that
66 # (e.g., in ``finalized``); thus, there's nothing that needs to be passed 65 # ``CrashWrapperPipeline.run`` yields must take the same arguments as that
67 # to ``run``. Another thing to bear in mind is that whatever objects 66 # ``run`` method itself. For more about all this, see:
stgao 2016/11/03 04:48:10 I don't understand why the yielded objects must ta
wrengr 2016/11/03 17:40:34 I have no idea. This is what katesonia has said re
stgao 2016/11/03 18:17:37 Sharu, would you mind clarifying here? I'm confuse
Sharu Jiang 2016/11/04 00:19:00 The comment "``CrashWrapperPipeline.run`` yields m
68 # ``CrashWrapperPipeline.run`` yields must be JSON-serializable. The base 67 # https://github.com/GoogleCloudPlatform/appengine-pipelines/wiki/Python
69 # class handles most of that for us, so the force of this constraint is 68 # For our use case, we pass all the important data to ``__init__``,
70 # that all the arguments to the constructors for those classes must be 69 # since we need it to be available in ``finalized``; thus we ignore the
71 # JSON-serializable. Thus, we cannot actually pass a Findit object to 70 # arguments passed to ``run`` (which will be the same values that were
72 # the constructor, but rather must pass only the ``client_id`` (or whatever 71 # passed to ``__init__``).
73 # JSON dict) and then reconstruct the Findit object from that. Moreover, 72 #
74 # the ``run`` method and the ``finalized`` method will be run in different 73 # In addition, whatever objects ``CrashWrapperPipeline.run`` yields must
75 # processes, so we will actually end up reconstructing the Findit object 74 # be JSON-serializable. The pipeline class handles most of the details,
stgao 2016/11/03 04:48:10 For the yielded objects by ``CrashWrapperPipeline.
wrengr 2016/11/03 17:40:34 The bug I encountered previously about JSON-serial
stgao 2016/11/03 18:48:30 This is true. But my comment above is for "In add
wrengr 2016/11/03 20:04:21 The issue I encountered was that ``yield SomePipel
stgao 2016/11/03 21:07:07 The problem most likely is due to passing ``stuff`
76 # twice. Thus, we shouldn't store anything in the pipeline objects outside 75 # so the force of this constraint is that whatever arguments we pass
stgao 2016/11/03 04:48:10 It is true that parameters passed over to the cons
wrengr 2016/11/03 17:40:34 Which agrees with what I wrote...
stgao 2016/11/03 18:48:30 Sorry for the typo again. I meant to say __yielded
77 # of what their constructors store. 76 # to their ``__init__`` must themselves be JSON-serializable. Alas,
77 # in Python, JSON-serializability isn't a property of classes themselves,
78 # but rather a property of the JSON-encoder object used to do the
79 # serialization. Thus, we cannot pass a ``Findit`` object directly to
80 # any of the methods here, but rather must instead pass the ``client_id``
81 # (or whatever JSON dict), and then reconstruct the ``Findit`` object
82 # from that data.
83 #
84 # Moreover, the ``run`` and ``finalized`` methods are executed in separate
stgao 2016/11/03 04:48:10 Actually, this is one of the goodies from appengin
wrengr 2016/11/03 17:40:34 Does it not? How do changes from inside ``run`` ge
stgao 2016/11/03 18:17:37 Sorry for the typo. I meant to say it __does__ int
wrengr 2016/11/03 18:20:20 Cool, I understood it right :)
85 # processes, so we'll actually end up reconstructing the ``Findit`` object
86 # twice. This also means ``run`` can't store anything in the pipeline
87 # object and expect it to still be available in the ``finalized`` method.
78 88
79 class CrashBasePipeline(BasePipeline): 89 class CrashBasePipeline(BasePipeline):
80 def __init__(self, client_id, crash_identifiers): 90 def __init__(self, client_id, crash_identifiers):
81 super(CrashBasePipeline, self).__init__(client_id, crash_identifiers) 91 super(CrashBasePipeline, self).__init__(client_id, crash_identifiers)
82 self._crash_identifiers = crash_identifiers 92 self._crash_identifiers = crash_identifiers
83 self._findit = FinditForClientID(client_id) 93 self._findit = FinditForClientID(client_id)
84 94
85 @property 95 @property
86 def client_id(self): # pragma: no cover 96 def client_id(self): # pragma: no cover
87 return self._findit.client_id 97 return self._findit.client_id
88 98
89 def run(self, *args, **kwargs): 99 def run(self, *args, **kwargs):
90 raise NotImplementedError() 100 raise NotImplementedError()
91 101
92 102
93 class CrashAnalysisPipeline(CrashBasePipeline): 103 class CrashAnalysisPipeline(CrashBasePipeline):
94 def finalized(self): # pragma: no cover 104 def finalized(self):
95 if self.was_aborted: 105 if self.was_aborted: # pragma: no cover
96 self._PutAbortedError() 106 self._PutAbortedError()
97 107
98 # N.B., this method must be factored out for unittest reasons; since 108 # N.B., this method must be factored out for unittest reasons; since
99 # ``finalized`` takes no arguments (by AppEngine's spec) and 109 # ``finalized`` takes no arguments (by AppEngine's spec) and
100 # ``was_aborted`` can't be altered directly. 110 # ``was_aborted`` can't be altered directly.
101 def _PutAbortedError(self): 111 def _PutAbortedError(self):
102 """Update the ndb.Model to indicate that this pipeline was aborted.""" 112 """Update the ndb.Model to indicate that this pipeline was aborted."""
103 logging.error('Aborted analysis for %s', repr(self._crash_identifiers)) 113 logging.error('Aborted analysis for %s', repr(self._crash_identifiers))
104 analysis = self._findit.GetAnalysis(self._crash_identifiers) 114 analysis = self._findit.GetAnalysis(self._crash_identifiers)
105 analysis.status = analysis_status.ERROR 115 analysis.status = analysis_status.ERROR
106 analysis.put() 116 analysis.put()
107 117
108 # TODO(http://crbug.com/659346): we misplaced the coverage test; find it! 118 def run(self, *_args, **_kwargs):
109 # Arguments number differs from overridden method - pylint: disable=W0221 119 """Call predator to do the analysis of the given crash.
110 def run(self): 120
121 N.B., due to the structure of AppEngine pipelines, this method must
122 accept the same arguments as are passed to ``__init__``; however,
123 because they were already passed to ``__init__`` there's no use in
124 recieving them here. Thus, we discard all the arguments to this method
125 (except for ``self``, naturally).
126 """
111 # TODO(wrengr): shouldn't this method somehow call _NeedsNewAnalysis 127 # TODO(wrengr): shouldn't this method somehow call _NeedsNewAnalysis
112 # to guard against race conditions? 128 # to guard against race conditions?
113 analysis = self._findit.GetAnalysis(self._crash_identifiers) 129 analysis = self._findit.GetAnalysis(self._crash_identifiers)
114 130
115 # Update the model's status to say we're in the process of doing analysis. 131 # Update the model's status to say we're in the process of doing analysis.
116 analysis.pipeline_status_path = self.pipeline_status_path() 132 analysis.pipeline_status_path = self.pipeline_status_path()
117 analysis.status = analysis_status.RUNNING 133 analysis.status = analysis_status.RUNNING
118 analysis.started_time = time_util.GetUTCNow() 134 analysis.started_time = time_util.GetUTCNow()
119 analysis.findit_version = appengine_util.GetCurrentVersion() 135 analysis.findit_version = appengine_util.GetCurrentVersion()
120 analysis.put() 136 analysis.put()
(...skipping 10 matching lines...) Expand all
131 'found_components': False, 147 'found_components': False,
132 'has_regression_range': False, 148 'has_regression_range': False,
133 'solution': None, 149 'solution': None,
134 } 150 }
135 151
136 # Update model's status to say we're done, and save the results. 152 # Update model's status to say we're done, and save the results.
137 analysis.completed_time = time_util.GetUTCNow() 153 analysis.completed_time = time_util.GetUTCNow()
138 analysis.result = result 154 analysis.result = result
139 for tag_name, tag_value in tags.iteritems(): 155 for tag_name, tag_value in tags.iteritems():
140 # TODO(http://crbug.com/602702): make it possible to add arbitrary tags. 156 # TODO(http://crbug.com/602702): make it possible to add arbitrary tags.
141 if hasattr(analysis, tag_name): 157 # TODO(http://crbug.com/659346): we misplaced the coverage test; find it!
158 if hasattr(analysis, tag_name): # pragma: no cover
142 setattr(analysis, tag_name, tag_value) 159 setattr(analysis, tag_name, tag_value)
143 analysis.status = analysis_status.COMPLETED 160 analysis.status = analysis_status.COMPLETED
144 analysis.put() 161 analysis.put()
145 162
146 163
147 class PublishResultPipeline(CrashBasePipeline): 164 class PublishResultPipeline(CrashBasePipeline):
148 # TODO(http://crbug.com/659346): we misplaced the coverage test; find it!
149 def finalized(self): 165 def finalized(self):
150 if self.was_aborted: # pragma: no cover. 166 if self.was_aborted: # pragma: no cover.
151 logging.error('Failed to publish %s analysis result for %s', 167 logging.error('Failed to publish %s analysis result for %s',
152 repr(self._crash_identifiers), self.client_id) 168 repr(self._crash_identifiers), self.client_id)
153 169
154 # Arguments number differs from overridden method - pylint: disable=W0221 170 # TODO(http://crbug.com/659346): we misplaced the coverage test; find it!
155 def run(self): 171 def run(self, *_args, **_kwargs): # pragma: no cover
172 """Publish the results of our analysis back into the ndb.Model.
173
174 N.B., due to the structure of AppEngine pipelines, this method must
175 accept the same arguments as are passed to ``__init__``; however,
176 because they were already passed to ``__init__`` there's no use in
177 recieving them here. Thus, we discard all the arguments to this method
178 (except for ``self``, naturally).
179 """
156 analysis = self._findit.GetAnalysis(self._crash_identifiers) 180 analysis = self._findit.GetAnalysis(self._crash_identifiers)
157 result = analysis.ToPublishableResult(self._crash_identifiers) 181 result = analysis.ToPublishableResult(self._crash_identifiers)
158 messages_data = [json.dumps(result, sort_keys=True)] 182 messages_data = [json.dumps(result, sort_keys=True)]
159 183
160 # TODO(http://crbug.com/659354): remove Findit's dependency on CrashConfig. 184 # TODO(http://crbug.com/659354): remove Findit's dependency on CrashConfig.
161 client_config = self._findit.config 185 client_config = self._findit.config
162 # TODO(katesonia): Clean string uses in config. 186 # TODO(katesonia): Clean string uses in config.
163 topic = client_config['analysis_result_pubsub_topic'] 187 topic = client_config['analysis_result_pubsub_topic']
164 pubsub_util.PublishMessagesToTopic(messages_data, topic) 188 pubsub_util.PublishMessagesToTopic(messages_data, topic)
165 logging.info('Published %s analysis result for %s', self.client_id, 189 logging.info('Published %s analysis result for %s', self.client_id,
166 repr(self._crash_identifiers)) 190 repr(self._crash_identifiers))
167 191
168 192
169 class CrashWrapperPipeline(BasePipeline): 193 # TODO(http://crbug.com/659346): we misplaced the coverage test; find it!
194 class CrashWrapperPipeline(BasePipeline): # pragma: no cover
170 """Fire off pipelines to (1) do the analysis and (2) publish results. 195 """Fire off pipelines to (1) do the analysis and (2) publish results.
171 196
172 The reason we have analysis and publishing as separate pipelines is 197 The reason we have analysis and publishing as separate pipelines is
173 because each of them can fail for independent reasons. E.g., if we 198 because each of them can fail for independent reasons. E.g., if we
174 successfully finish the analysis, but then the publishing fails due to 199 successfully finish the analysis, but then the publishing fails due to
175 network errors, we don't want to have to redo the analysis in order 200 network errors, we don't want to have to redo the analysis in order
176 to redo the publishing. We could try to cache the fact that analysis 201 to redo the publishing. We could try to cache the fact that analysis
177 succeeded in the pipeline object itself, but we'd have to be careful 202 succeeded in the pipeline object itself, but we'd have to be careful
178 because the ``run`` and ``finalized`` methods are executed in different 203 because the ``run`` and ``finalized`` methods are executed in different
179 processes. 204 processes.
180 """ 205 """
181 def __init__(self, client_id, crash_identifiers): 206 def __init__(self, client_id, crash_identifiers):
182 super(CrashWrapperPipeline, self).__init__(client_id, crash_identifiers) 207 super(CrashWrapperPipeline, self).__init__(client_id, crash_identifiers)
183 self._crash_identifiers = crash_identifiers 208 self._crash_identifiers = crash_identifiers
184 self._client_id = client_id 209 self._client_id = client_id
185 210
186 # TODO(http://crbug.com/659346): write coverage tests. 211 def run(self, *_args, **_kwargs):
187 # Arguments number differs from overridden method - pylint: disable=W0221 212 """Fire off pipelines to run the analysis and publish its results.
188 def run(self): # pragma: no cover 213
214 N.B., due to the structure of AppEngine pipelines, this method must
215 accept the same arguments as are passed to ``__init__``; however,
216 because they were already passed to ``__init__`` there's no use in
217 recieving them here. Thus, we discard all the arguments to this method
218 (except for ``self``, naturally).
219 """
189 run_analysis = yield CrashAnalysisPipeline( 220 run_analysis = yield CrashAnalysisPipeline(
190 self._client_id, self._crash_identifiers) 221 self._client_id, self._crash_identifiers)
191 with pipeline.After(run_analysis): 222 with pipeline.After(run_analysis):
192 yield PublishResultPipeline(self._client_id, self._crash_identifiers) 223 yield PublishResultPipeline(self._client_id, self._crash_identifiers)
OLDNEW
« no previous file with comments | « appengine/findit/crash/changelist_classifier.py ('k') | appengine/findit/crash/findit.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698