Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(194)

Side by Side Diff: appengine/findit/crash/stacktrace.py

Issue 2562623004: Making CallStack immutable, so it can be hashable (Closed)
Patch Set: Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 from collections import namedtuple 5 from collections import namedtuple
6 import copy
6 import logging 7 import logging
7 import re 8 import re
8 9
9 from crash import parse_util 10 from crash import parse_util
10 from crash.type_enums import CallStackFormatType 11 from crash.type_enums import CallStackFormatType
11 from crash.type_enums import CallStackLanguageType 12 from crash.type_enums import CallStackLanguageType
12 13
13 # Used to parse a line into StackFrame of a Callstack. 14 # Used to parse a line into StackFrame of a Callstack.
14 CALLSTACK_FORMAT_TO_PATTERN = { 15 CALLSTACK_FORMAT_TO_PATTERN = {
15 CallStackFormatType.JAVA: re.compile( 16 CallStackFormatType.JAVA: re.compile(
16 r'at ([A-Za-z0-9$._<>]+)\(\w+(\.java)?:(\d+)\)'), 17 r'at ([A-Za-z0-9$._<>]+)\(\w+(\.java)?:(\d+)\)'),
17 CallStackFormatType.SYZYASAN: re.compile( 18 CallStackFormatType.SYZYASAN: re.compile(
18 r'(CF: )?(.*?)( \(FPO: .*\) )?( \(CONV: .*\) )?\[(.*) @ (\d+)\]'), 19 r'(CF: )?(.*?)( \(FPO: .*\) )?( \(CONV: .*\) )?\[(.*) @ (\d+)\]'),
19 CallStackFormatType.DEFAULT: re.compile( 20 CallStackFormatType.DEFAULT: re.compile(
20 r'(.*?):(\d+)(:\d+)?$') 21 r'(.*?):(\d+)(:\d+)?$')
21 } 22 }
22 23
23 FRAME_INDEX_PATTERN = re.compile(r'\s*#(\d+)\s.*') 24 FRAME_INDEX_PATTERN = re.compile(r'\s*#(\d+)\s.*')
24 25
26 _DEFAULT_FORMAT_TYPE = CallStackFormatType.DEFAULT
27 _DEFAULT_LANGUAGE_TYPE = CallStackLanguageType.CPP
28
25 29
26 class StackFrame(namedtuple('StackFrame', 30 class StackFrame(namedtuple('StackFrame',
27 ['index', 'dep_path', 'function', 'file_path', 'raw_file_path', 31 ['index', 'dep_path', 'function', 'file_path', 'raw_file_path',
28 'crashed_line_numbers', 'repo_url'])): 32 'crashed_line_numbers', 'repo_url'])):
29 """Represents a frame in a stacktrace. 33 """Represents a frame in a stacktrace.
30 34
31 Attributes: 35 Attributes:
32 index (int): Index shown in the stacktrace if a stackframe line looks like 36 index (int): Index shown in the stacktrace if a stackframe line looks like
33 this - '#0 ...', else use the index in the callstack list. 37 this - '#0 ...', else use the index in the callstack list.
34 dep_path (str): Path of the dep this frame represents, for example, 38 dep_path (str): Path of the dep this frame represents, for example,
35 'src/', 'src/v8', 'src/skia'...etc. 39 'src/', 'src/v8', 'src/skia'...etc.
36 function (str): Function that caused the crash. 40 function (str): Function that caused the crash.
37 file_path (str): Normalized path of the crashed file, with parts dep_path 41 file_path (str): Normalized path of the crashed file, with parts dep_path
38 and parts before it stripped, for example, api.cc. 42 and parts before it stripped, for example, api.cc.
39 raw_file_path (str): Normalized original path of the crashed file, 43 raw_file_path (str): Normalized original path of the crashed file,
40 for example, /b/build/slave/mac64/build/src/v8/src/heap/ 44 for example, /b/build/slave/mac64/build/src/v8/src/heap/
41 incremental-marking-job.cc. 45 incremental-marking-job.cc.
42 crashed_line_numbers (list): Line numbers of the file that caused the crash. 46 crashed_line_numbers (list): Line numbers of the file that caused the crash.
43 repo_url (str): Repo url of this frame. 47 repo_url (str): Repo url of this frame.
44 """ 48 """
45 __slots__ = () 49 __slots__ = ()
46 50
47 def __new__(cls, index, dep_path, function, file_path, raw_file_path, 51 def __new__(cls, index, dep_path, function, file_path, raw_file_path,
48 crashed_line_numbers, repo_url=None): 52 crashed_line_numbers, repo_url=None):
53 assert index is not None, TypeError("The index must be an int")
Martin Barbella 2016/12/08 22:30:55 Nit: single quotes for consistency with other stri
wrengr 2016/12/08 23:42:37 Done.
49 return super(cls, StackFrame).__new__(cls, 54 return super(cls, StackFrame).__new__(cls,
50 index, dep_path, function, file_path, raw_file_path, 55 index, dep_path, function, file_path, raw_file_path,
51 crashed_line_numbers, repo_url) 56 crashed_line_numbers, repo_url)
52 57
53 def ToString(self): 58 def ToString(self):
54 frame_str = '#%d in %s @ %s' % (self.index, self.function, self.file_path) 59 frame_str = '#%d in %s @ %s' % (self.index, self.function, self.file_path)
55 if self.crashed_line_numbers: 60 if self.crashed_line_numbers:
56 frame_str += ':%d' % self.crashed_line_numbers[0] 61 frame_str += ':%d' % self.crashed_line_numbers[0]
57 62
58 # For example, if crashed_line_numbers is [61], returns '... f.cc:61', 63 # For example, if crashed_line_numbers is [61], returns '... f.cc:61',
(...skipping 10 matching lines...) Expand all
69 blame_url = '%s/+blame/%s/%s' % (self.repo_url, revision, self.file_path) 74 blame_url = '%s/+blame/%s/%s' % (self.repo_url, revision, self.file_path)
70 if self.crashed_line_numbers: 75 if self.crashed_line_numbers:
71 blame_url += '#%d' % self.crashed_line_numbers[0] 76 blame_url += '#%d' % self.crashed_line_numbers[0]
72 77
73 return blame_url 78 return blame_url
74 79
75 def __str__(self): 80 def __str__(self):
76 return self.ToString() 81 return self.ToString()
77 82
78 83
79 class CallStack(list): 84 # TODO(wrengr): this should be a method on ``format_type``, or a class
80 """Represents a call stack within a stacktrace. A list of StackFrame objects. 85 # method on ``StackFrame``.
Sharu Jiang 2016/12/08 22:23:44 I think in ``StackFrame`` and change the name to P
Martin Barbella 2016/12/08 22:30:55 Is there any reason not to just do this (my usual
stgao 2016/12/08 23:42:17 Let's make this a method into the class in this CL
wrengr 2016/12/08 23:42:37 Mainly just that I ran into issues getting it to w
86 def ParseStackFrame(language_type, format_type, line, deps,
87 default_stack_frame_index=None):
88 """Parse line into a StackFrame instance, if possible.
89
90 Args:
91 language_type (CallStackLanguageType): the language the line is in.
92 format_type (CallStackFormatType): the format the line is in.
93 line (str): The line to be parsed.
94 deps (dict): Map dependency path to its corresponding Dependency.
95
96 Returns:
97 A ``StackFrame`` or ``None``.
98 """
99 # TODO(wrengr): how can we avoid duplicating this logic from ``CallStack``?
100 if format_type is None: # pragma: no cover
101 format_type = _DEFAULT_FORMAT_TYPE
102
103 if language_type is None:
104 language_type = _DEFAULT_LANGUAGE_TYPE
105
106 if format_type == CallStackFormatType.JAVA:
107 language_type = CallStackLanguageType.JAVA
108
109 line = line.strip()
110 line_pattern = CALLSTACK_FORMAT_TO_PATTERN[format_type]
111
112 if format_type == CallStackFormatType.JAVA:
113 match = line_pattern.match(line)
114 if not match:
115 return None
116
117 function = match.group(1)
118 raw_file_path = parse_util.GetFullPathForJavaFrame(function)
119 crashed_line_numbers = [int(match.group(3))]
120
121 elif format_type == CallStackFormatType.SYZYASAN:
122 match = line_pattern.match(line)
123 if not match:
124 return None
125
126 function = match.group(2).strip()
127 raw_file_path = match.group(5)
128 crashed_line_numbers = [int(match.group(6))]
129
130 else:
131 line_parts = line.split()
132 if not line_parts or not line_parts[0].startswith('#'):
133 return None
134
135 match = line_pattern.match(line_parts[-1])
136 if not match: # pragma: no cover
137 return None
138
139 function = ' '.join(line_parts[3:-1])
140
141 raw_file_path = match.group(1)
142 # Fracas java stack has default format type.
143 if language_type == CallStackLanguageType.JAVA:
144 raw_file_path = parse_util.GetFullPathForJavaFrame(function)
145
146 crashed_line_numbers = parse_util.GetCrashedLineRange(
147 match.group(2) + (match.group(3) if match.group(3) else ''))
148 # Normalize the file path so that it can be compared to repository path.
149 dep_path, file_path, repo_url = parse_util.GetDepPathAndNormalizedFilePath(
150 raw_file_path, deps, language_type == CallStackLanguageType.JAVA)
151
152 # If we have the common stack frame index pattern, then use it
153 # since it is more reliable.
154 index_match = FRAME_INDEX_PATTERN.match(line)
155 if index_match:
156 stack_frame_index = int(index_match.group(1))
157 else:
158 stack_frame_index = int(default_stack_frame_index or 0)
159
160 return StackFrame(stack_frame_index, dep_path, function, file_path,
161 raw_file_path, crashed_line_numbers, repo_url)
162
163
164 # N.B., because ``list`` is mutable it isn't hashable, thus cannot be
165 # used as a key in a dict. Because we want to usecallstacks as keys (for
166 # memoization) we has-a tuple rather than is-a list.
Sharu Jiang 2016/12/08 22:23:44 I think it's not necessary to make every parameter
stgao 2016/12/08 23:42:17 With the current implementation of MemoizedFunctio
wrengr 2016/12/08 23:42:37 Even though the old code did do a lot of mutation,
Sharu Jiang 2016/12/09 00:13:58 We can do, that's one alternative, but this will n
Sharu Jiang 2016/12/09 00:13:58 For those frames we don't even look into, we shoul
Sharu Jiang 2016/12/09 00:29:20 *Only parse top_n frames after the signature frame
Sharu Jiang 2016/12/09 00:57:15 Follow up on "redesign the stacktrace to only pars
wrengr 2016/12/09 18:58:38 All tests pass with 100% coverage. The only modifi
wrengr 2016/12/09 18:58:38 The code using MemoizedFunction is: https://coder
167 class CallStack(namedtuple('CallStack',
168 ['priority', 'frames', 'format_type', 'language_type'])):
169 """A stack (sequence of ``StackFrame`` objects) in a ``Stacktrace``.
81 170
82 Attributes: 171 Attributes:
83 priority (int): The smaller the number, the higher the priority beginning 172 priority (int): The smaller the number, the higher the priority beginning
84 with 0. 173 with 0.
174 frames (tuple of StackFrame): the frames in order from bottom to top.
85 format_type (CallStackFormatType): Represents the type of line format 175 format_type (CallStackFormatType): Represents the type of line format
86 within a callstack. For example: 176 within a callstack. For example:
87 177
88 CallStackFormatType.JAVA - 178 CallStackFormatType.JAVA -
89 'at com.android.commands.am.Am.onRun(Am.java:353)' 179 'at com.android.commands.am.Am.onRun(Am.java:353)'
90 180
91 CallStackFormatType.SYZYASAN - 181 CallStackFormatType.SYZYASAN -
92 'chrome_child!v8::internal::ApplyTransition+0x93 [v8/src/lookup.cc @ 340]' 182 'chrome_child!v8::internal::ApplyTransition+0x93 [v8/src/lookup.cc @ 340]'
93 183
94 CallStackFormatType.DEFAULT - 184 CallStackFormatType.DEFAULT -
95 '#0 0x32b5982 in get third_party/WebKit/Source/wtf/RefPtr.h:61:43' 185 '#0 0x32b5982 in get third_party/WebKit/Source/wtf/RefPtr.h:61:43'
96 language_type (CallStackLanguageType): Either CPP or JAVA language. 186 language_type (CallStackLanguageType): Either CPP or JAVA language.
97 """ 187 """
98 def __init__(self, priority, format_type=CallStackFormatType.DEFAULT, 188 __slots__ = ()
99 language_type=CallStackLanguageType.CPP,
100 frame_list=None):
101 super(CallStack, self).__init__(frame_list or [])
102 189
103 self.priority = priority 190 def __new__(cls, priority, format_type=None, language_type=None,
104 self.format_type = format_type 191 frame_list=None):
105 self.language_type = ( 192 """Construct a new ``CallStack``.
106 CallStackLanguageType.JAVA if format_type == CallStackFormatType.JAVA
107 else language_type)
108 193
109 def ParseLine(self, line, deps): 194 N.B., we use ``None`` as the default value of the optional arguments
110 """Parse line into StackFrame instance and append it if successfully 195 so that if callers need to explicitly provide those arguments but
111 parsed.""" 196 don't have an explicit value, they can pass ``None`` to get at the
112 line = line.strip() 197 default without needing to be kept in sync with this constructor. For
113 line_pattern = CALLSTACK_FORMAT_TO_PATTERN[self.format_type] 198 example, the ``ChromeCrashParser.Parse`` constructs a stack and they
199 need to keep track of all the arguments to be passed to this function.
114 200
115 if self.format_type == CallStackFormatType.JAVA: 201 Args:
116 match = line_pattern.match(line) 202 priority (int): The priority of this stack in its ``Stacktrace``.
117 if not match: 203 format_type (CallStackFormatType): Optional. The stack's format.
118 return 204 language_type (CallStackLanguageType): Optional. The stack's language.
205 frame_list (iterable of StackFrame): Optional. The frames in the stack.
206 """
207 if format_type is None:
208 format_type = _DEFAULT_FORMAT_TYPE
119 209
120 function = match.group(1) 210 if language_type is None:
121 raw_file_path = parse_util.GetFullPathForJavaFrame(function) 211 language_type = _DEFAULT_LANGUAGE_TYPE
122 crashed_line_numbers = [int(match.group(3))]
123 212
124 elif self.format_type == CallStackFormatType.SYZYASAN: 213 if format_type == CallStackFormatType.JAVA: # pragma: no cover
Martin Barbella 2016/12/08 22:30:55 Not too important, but do we really need no cover
wrengr 2016/12/08 23:42:37 We don't *need* the pragma; rather, the old versio
125 match = line_pattern.match(line) 214 language_type = CallStackLanguageType.JAVA
126 if not match:
127 return
128 215
129 function = match.group(2).strip() 216 if frame_list is None:
130 raw_file_path = match.group(5) 217 frame_list = []
131 crashed_line_numbers = [int(match.group(6))]
132 218
133 else: 219 return super(cls, CallStack).__new__(cls,
134 line_parts = line.split() 220 priority, tuple(frame_list), format_type, language_type)
135 if not line_parts or not line_parts[0].startswith('#'):
136 return
137 221
138 match = line_pattern.match(line_parts[-1]) 222 def __len__(self):
139 if not match: 223 """Returns the number of frames in this stack."""
140 return 224 return len(self.frames)
141 225
142 function = ' '.join(line_parts[3:-1]) 226 # TODO: we do actually have unittests for this, but for some reason
Martin Barbella 2016/12/08 22:30:55 Strange. Is there a bug opened for this? might be
wrengr 2016/12/08 23:42:37 Done. crbug.com/672641
227 # coverage isn't seeing them.
228 def __bool__(self): # pragma: no cover
229 """Returns whether this stack is empty."""
230 return bool(self.frames)
143 231
144 raw_file_path = match.group(1) 232 def __iter__(self):
145 # Fracas java stack has default format type. 233 """Iterator over the frames in this stack."""
146 if self.language_type == CallStackLanguageType.JAVA: 234 return iter(self.frames)
147 raw_file_path = parse_util.GetFullPathForJavaFrame(function)
148 235
149 crashed_line_numbers = parse_util.GetCrashedLineRange( 236 def SliceFrames(self, low_index, high_index):
150 match.group(2) + (match.group(3) if match.group(3) else '')) 237 """Returns a new ``CallStack`` keeping only the specified frames.
151 # Normalize the file path so that it can be compared to repository path.
152 dep_path, file_path, repo_url = parse_util.GetDepPathAndNormalizedFilePath(
153 raw_file_path, deps, self.language_type == CallStackLanguageType.JAVA)
154 238
155 # If we have the common stack frame index pattern, then use it 239 Args:
156 # since it is more reliable. 240 low_index (int or None): the lowest index to keep. If ``None``
157 index_match = FRAME_INDEX_PATTERN.match(line) 241 then defaults to 0.
158 if index_match: 242 high_index (int or None): the index after the highest one to
159 stack_frame_index = int(index_match.group(1)) 243 keep. If ``None`` then defaults to one after the highest index.
160 else:
161 stack_frame_index = len(self)
162 244
163 self.append(StackFrame(stack_frame_index, dep_path, function, file_path, 245 Returns:
164 raw_file_path, crashed_line_numbers, repo_url)) 246 A new ``CallStack`` instance. If both arguments are ``None`` then
247 we return the original stack object, because they are equal and
248 due to immutability there's no reason to clone the instance.
249 """
250 if low_index is None and high_index is None:
251 return self
252
253 # TODO(wrengr): can we use ``_replace`` without running into TypeErrors?
Sharu Jiang 2016/12/08 22:23:44 what TypeErrors you encountered?
wrengr 2016/12/08 23:42:37 The TypeError complains about _make not being call
254 return CallStack(self.priority,
255 format_type=self.format_type,
256 language_type=self.language_type,
257 frame_list=self.frames[low_index:high_index])
165 258
166 259
260 # N.B., because ``list`` is mutable it isn't hashable, thus cannot be
261 # used as a key in a dict. Because we want to usecallstacks as keys (for
262 # memoization) we has-a tuple rather than is-a list.
167 # TODO(http://crbug.com/644476): this class needs a better name. 263 # TODO(http://crbug.com/644476): this class needs a better name.
168 class Stacktrace(list): 264 class Stacktrace(object):
169 """A collection of callstacks which together provide a trace of what happened. 265 """A collection of callstacks which together provide a trace of what happened.
170 266
171 For instance, when doing memory debugging we will have callstacks for 267 For instance, when doing memory debugging we will have callstacks for
172 (1) when the crash occurred, (2) when the object causing the crash 268 (1) when the crash occurred, (2) when the object causing the crash
173 was allocated, (3) when the object causing the crash was freed (for 269 was allocated, (3) when the object causing the crash was freed (for
174 use-after-free crashes), etc. What callstacks are included in the 270 use-after-free crashes), etc. What callstacks are included in the
175 trace is unspecified, since this differs for different tools.""" 271 trace is unspecified, since this differs for different tools."""
176 def __init__(self, stack_list=None, signature=None): 272 def __init__(self, stack_list=None, signature=None):
177 super(Stacktrace, self).__init__(stack_list or []) 273 self.stacks = stack_list or []
178
179 self._crash_stack = None 274 self._crash_stack = None
180 self._signature_parts = None 275 self._signature_parts = None
181 if signature: 276 if signature:
182 # Filter out the types of signature, for example [Out of Memory]. 277 # Filter out the types of signature, for example [Out of Memory].
183 signature = re.sub('[[][^]]*[]]\s*', '', signature) 278 signature = re.sub('[[][^]]*[]]\s*', '', signature)
184 # For clusterfuzz crash, the signature is crash state. It is 279 # For clusterfuzz crash, the signature is crash state. It is
185 # usually the top 3 important stack frames separated by '\n'. 280 # usually the top 3 important stack frames separated by '\n'.
186 self._signature_parts = signature.split('\n') 281 self._signature_parts = signature.split('\n')
187 282
283 def __getitem__(self, i): # pragma: no cover
284 return self.stacks[i]
285
286 def __len__(self):
287 return len(self.stacks)
288
289 def __bool__(self): # pragma: no cover
290 return bool(self.stacks)
291
292 def __iter__(self):
293 return iter(self.stacks)
188 294
189 @property 295 @property
190 def crash_stack(self): 296 def crash_stack(self):
191 """Get the callstack with the highest priority (i.e., whose priority 297 """Get the callstack with the highest priority (i.e., whose priority
192 field is numerically the smallest) in the stacktrace.""" 298 field is numerically the smallest) in the stacktrace."""
193 if not self: 299 if not self.stacks:
194 logging.warning('Cannot get crash stack for empty stacktrace: %s', self) 300 logging.warning('Cannot get crash stack for empty stacktrace: %s', self)
195 return None 301 return None
196 302
197 if self._crash_stack is None and self._signature_parts: 303 if self._crash_stack is None and self._signature_parts:
198 def _IsSignatureCallstack(callstack): 304 def _IsSignatureCallstack(callstack):
199 for index, frame in enumerate(callstack): 305 for index, frame in enumerate(callstack):
200 for signature_part in self._signature_parts: 306 for signature_part in self._signature_parts:
201 if signature_part in frame.function: 307 if signature_part in frame.function:
202 return True, index 308 return True, index
203 309
204 return False, 0 310 return False, 0
205 311
206 # Set the crash stack using signature callstack. 312 # Set the crash stack using signature callstack.
207 for callstack in self: 313 for callstack in self.stacks:
208 is_signature_callstack, index = _IsSignatureCallstack(callstack) 314 is_signature_callstack, index = _IsSignatureCallstack(callstack)
209 if is_signature_callstack: 315 if is_signature_callstack:
210 # Filter all the stack frames before signature. 316 # Filter all the stack frames before signature.
211 callstack[:] = callstack[index:] 317 self._crash_stack = callstack.SliceFrames(index, None)
212 self._crash_stack = callstack
213 break 318 break
214 319
215 # If there is no signature callstack, fall back to set crash stack using 320 # If there is no signature callstack, fall back to set crash stack using
216 # the first least priority callstack. 321 # the first least priority callstack.
217 if self._crash_stack is None: 322 if self._crash_stack is None:
218 self._crash_stack = sorted(self, key=lambda stack: stack.priority)[0] 323 self._crash_stack = sorted(self.stacks,
324 key=lambda stack: stack.priority)[0]
219 325
220 return self._crash_stack 326 return self._crash_stack
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698