Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(315)

Side by Side Diff: appengine/findit/crash/stacktrace.py

Issue 2562623004: Making CallStack immutable, so it can be hashable (Closed)
Patch Set: Addressing nits Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 from collections import namedtuple 5 from collections import namedtuple
6 import copy
6 import logging 7 import logging
7 import re 8 import re
8 9
9 from crash import parse_util 10 from crash import parse_util
10 from crash.type_enums import CallStackFormatType 11 from crash.type_enums import CallStackFormatType
11 from crash.type_enums import CallStackLanguageType 12 from crash.type_enums import CallStackLanguageType
12 13
13 # Used to parse a line into StackFrame of a Callstack. 14 # Used to parse a line into StackFrame of a Callstack.
14 CALLSTACK_FORMAT_TO_PATTERN = { 15 CALLSTACK_FORMAT_TO_PATTERN = {
15 CallStackFormatType.JAVA: re.compile( 16 CallStackFormatType.JAVA: re.compile(
16 r'at ([A-Za-z0-9$._<>]+)\(\w+(\.java)?:(\d+)\)'), 17 r'at ([A-Za-z0-9$._<>]+)\(\w+(\.java)?:(\d+)\)'),
17 CallStackFormatType.SYZYASAN: re.compile( 18 CallStackFormatType.SYZYASAN: re.compile(
18 r'(CF: )?(.*?)( \(FPO: .*\) )?( \(CONV: .*\) )?\[(.*) @ (\d+)\]'), 19 r'(CF: )?(.*?)( \(FPO: .*\) )?( \(CONV: .*\) )?\[(.*) @ (\d+)\]'),
19 CallStackFormatType.DEFAULT: re.compile( 20 CallStackFormatType.DEFAULT: re.compile(
20 r'(.*?):(\d+)(:\d+)?$') 21 r'(.*?):(\d+)(:\d+)?$')
21 } 22 }
22 23
23 FRAME_INDEX_PATTERN = re.compile(r'\s*#(\d+)\s.*') 24 FRAME_INDEX_PATTERN = re.compile(r'\s*#(\d+)\s.*')
24 25
26 _DEFAULT_FORMAT_TYPE = CallStackFormatType.DEFAULT
27 _DEFAULT_LANGUAGE_TYPE = CallStackLanguageType.CPP
28
25 29
26 class StackFrame(namedtuple('StackFrame', 30 class StackFrame(namedtuple('StackFrame',
27 ['index', 'dep_path', 'function', 'file_path', 'raw_file_path', 31 ['index', 'dep_path', 'function', 'file_path', 'raw_file_path',
28 'crashed_line_numbers', 'repo_url'])): 32 'crashed_line_numbers', 'repo_url'])):
29 """Represents a frame in a stacktrace. 33 """Represents a frame in a stacktrace.
30 34
31 Attributes: 35 Attributes:
32 index (int): Index shown in the stacktrace if a stackframe line looks like 36 index (int): Index shown in the stacktrace if a stackframe line looks like
33 this - '#0 ...', else use the index in the callstack list. 37 this - '#0 ...', else use the index in the callstack list.
34 dep_path (str): Path of the dep this frame represents, for example, 38 dep_path (str): Path of the dep this frame represents, for example,
35 'src/', 'src/v8', 'src/skia'...etc. 39 'src/', 'src/v8', 'src/skia'...etc.
36 function (str): Function that caused the crash. 40 function (str): Function that caused the crash.
37 file_path (str): Normalized path of the crashed file, with parts dep_path 41 file_path (str): Normalized path of the crashed file, with parts dep_path
38 and parts before it stripped, for example, api.cc. 42 and parts before it stripped, for example, api.cc.
39 raw_file_path (str): Normalized original path of the crashed file, 43 raw_file_path (str): Normalized original path of the crashed file,
40 for example, /b/build/slave/mac64/build/src/v8/src/heap/ 44 for example, /b/build/slave/mac64/build/src/v8/src/heap/
41 incremental-marking-job.cc. 45 incremental-marking-job.cc.
42 crashed_line_numbers (list): Line numbers of the file that caused the crash. 46 crashed_line_numbers (list): Line numbers of the file that caused the crash.
43 repo_url (str): Repo url of this frame. 47 repo_url (str): Repo url of this frame.
44 """ 48 """
45 __slots__ = () 49 __slots__ = ()
46 50
47 def __new__(cls, index, dep_path, function, file_path, raw_file_path, 51 def __new__(cls, index, dep_path, function, file_path, raw_file_path,
48 crashed_line_numbers, repo_url=None): 52 crashed_line_numbers, repo_url=None):
53 assert index is not None, TypeError('The index must be an int')
49 return super(cls, StackFrame).__new__(cls, 54 return super(cls, StackFrame).__new__(cls,
50 index, dep_path, function, file_path, raw_file_path, 55 index, dep_path, function, file_path, raw_file_path,
51 crashed_line_numbers, repo_url) 56 crashed_line_numbers, repo_url)
52 57
53 def ToString(self): 58 def ToString(self):
54 frame_str = '#%d in %s @ %s' % (self.index, self.function, self.file_path) 59 frame_str = '#%d in %s @ %s' % (self.index, self.function, self.file_path)
55 if self.crashed_line_numbers: 60 if self.crashed_line_numbers:
56 frame_str += ':%d' % self.crashed_line_numbers[0] 61 frame_str += ':%d' % self.crashed_line_numbers[0]
57 62
58 # For example, if crashed_line_numbers is [61], returns '... f.cc:61', 63 # For example, if crashed_line_numbers is [61], returns '... f.cc:61',
59 # if is [61, 62], returns '... f.cc:61:1' 64 # if is [61, 62], returns '... f.cc:61:1'
60 if len(self.crashed_line_numbers) > 1: 65 if len(self.crashed_line_numbers) > 1:
61 frame_str += ':%d' % (len(self.crashed_line_numbers) - 1) 66 frame_str += ':%d' % (len(self.crashed_line_numbers) - 1)
62 67
63 return frame_str 68 return frame_str
64 69
65 def BlameUrl(self, revision): 70 def BlameUrl(self, revision):
66 if not self.repo_url or not self.dep_path: 71 if not self.repo_url or not self.dep_path:
67 return None 72 return None
68 73
69 blame_url = '%s/+blame/%s/%s' % (self.repo_url, revision, self.file_path) 74 blame_url = '%s/+blame/%s/%s' % (self.repo_url, revision, self.file_path)
70 if self.crashed_line_numbers: 75 if self.crashed_line_numbers:
71 blame_url += '#%d' % self.crashed_line_numbers[0] 76 blame_url += '#%d' % self.crashed_line_numbers[0]
72 77
73 return blame_url 78 return blame_url
74 79
75 def __str__(self): 80 def __str__(self):
76 return self.ToString() 81 return self.ToString()
77 82
83 @staticmethod
84 def Parse(language_type, format_type, line, deps,
85 default_stack_frame_index=None):
86 """Parse line into a StackFrame instance, if possible.
78 87
79 class CallStack(list): 88 Args:
80 """Represents a call stack within a stacktrace. A list of StackFrame objects. 89 language_type (CallStackLanguageType): the language the line is in.
90 format_type (CallStackFormatType): the format the line is in.
91 line (str): The line to be parsed.
92 deps (dict): Map dependency path to its corresponding Dependency.
81 93
82 Attributes: 94 Returns:
83 priority (int): The smaller the number, the higher the priority beginning 95 A ``StackFrame`` or ``None``.
84 with 0. 96 """
85 format_type (CallStackFormatType): Represents the type of line format 97 # TODO(wrengr): how can we avoid duplicating this logic from ``CallStack``?
86 within a callstack. For example: 98 if format_type is None: # pragma: no cover
99 format_type = _DEFAULT_FORMAT_TYPE
87 100
88 CallStackFormatType.JAVA - 101 if language_type is None:
89 'at com.android.commands.am.Am.onRun(Am.java:353)' 102 language_type = _DEFAULT_LANGUAGE_TYPE
90 103
91 CallStackFormatType.SYZYASAN - 104 if format_type == CallStackFormatType.JAVA:
92 'chrome_child!v8::internal::ApplyTransition+0x93 [v8/src/lookup.cc @ 340]' 105 language_type = CallStackLanguageType.JAVA
93 106
94 CallStackFormatType.DEFAULT - 107 line = line.strip()
95 '#0 0x32b5982 in get third_party/WebKit/Source/wtf/RefPtr.h:61:43' 108 line_pattern = CALLSTACK_FORMAT_TO_PATTERN[format_type]
96 language_type (CallStackLanguageType): Either CPP or JAVA language.
97 """
98 def __init__(self, priority, format_type=CallStackFormatType.DEFAULT,
99 language_type=CallStackLanguageType.CPP,
100 frame_list=None):
101 super(CallStack, self).__init__(frame_list or [])
102 109
103 self.priority = priority 110 if format_type == CallStackFormatType.JAVA:
104 self.format_type = format_type
105 self.language_type = (
106 CallStackLanguageType.JAVA if format_type == CallStackFormatType.JAVA
107 else language_type)
108
109 def ParseLine(self, line, deps):
110 """Parse line into StackFrame instance and append it if successfully
111 parsed."""
112 line = line.strip()
113 line_pattern = CALLSTACK_FORMAT_TO_PATTERN[self.format_type]
114
115 if self.format_type == CallStackFormatType.JAVA:
116 match = line_pattern.match(line) 111 match = line_pattern.match(line)
117 if not match: 112 if not match:
118 return 113 return None
119 114
120 function = match.group(1) 115 function = match.group(1)
121 raw_file_path = parse_util.GetFullPathForJavaFrame(function) 116 raw_file_path = parse_util.GetFullPathForJavaFrame(function)
122 crashed_line_numbers = [int(match.group(3))] 117 crashed_line_numbers = [int(match.group(3))]
123 118
124 elif self.format_type == CallStackFormatType.SYZYASAN: 119 elif format_type == CallStackFormatType.SYZYASAN:
125 match = line_pattern.match(line) 120 match = line_pattern.match(line)
126 if not match: 121 if not match:
127 return 122 return None
128 123
129 function = match.group(2).strip() 124 function = match.group(2).strip()
130 raw_file_path = match.group(5) 125 raw_file_path = match.group(5)
131 crashed_line_numbers = [int(match.group(6))] 126 crashed_line_numbers = [int(match.group(6))]
132 127
133 else: 128 else:
134 line_parts = line.split() 129 line_parts = line.split()
135 if not line_parts or not line_parts[0].startswith('#'): 130 if not line_parts or not line_parts[0].startswith('#'):
136 return 131 return None
137 132
138 match = line_pattern.match(line_parts[-1]) 133 match = line_pattern.match(line_parts[-1])
139 if not match: 134 if not match: # pragma: no cover
140 return 135 return None
141 136
142 function = ' '.join(line_parts[3:-1]) 137 function = ' '.join(line_parts[3:-1])
143 138
144 raw_file_path = match.group(1) 139 raw_file_path = match.group(1)
145 # Fracas java stack has default format type. 140 # Fracas java stack has default format type.
146 if self.language_type == CallStackLanguageType.JAVA: 141 if language_type == CallStackLanguageType.JAVA:
147 raw_file_path = parse_util.GetFullPathForJavaFrame(function) 142 raw_file_path = parse_util.GetFullPathForJavaFrame(function)
148 143
149 crashed_line_numbers = parse_util.GetCrashedLineRange( 144 crashed_line_numbers = parse_util.GetCrashedLineRange(
150 match.group(2) + (match.group(3) if match.group(3) else '')) 145 match.group(2) + (match.group(3) if match.group(3) else ''))
151 # Normalize the file path so that it can be compared to repository path. 146 # Normalize the file path so that it can be compared to repository path.
152 dep_path, file_path, repo_url = parse_util.GetDepPathAndNormalizedFilePath( 147 dep_path, file_path, repo_url = parse_util.GetDepPathAndNormalizedFilePath(
153 raw_file_path, deps, self.language_type == CallStackLanguageType.JAVA) 148 raw_file_path, deps, language_type == CallStackLanguageType.JAVA)
154 149
155 # If we have the common stack frame index pattern, then use it 150 # If we have the common stack frame index pattern, then use it
156 # since it is more reliable. 151 # since it is more reliable.
157 index_match = FRAME_INDEX_PATTERN.match(line) 152 index_match = FRAME_INDEX_PATTERN.match(line)
158 if index_match: 153 if index_match:
159 stack_frame_index = int(index_match.group(1)) 154 stack_frame_index = int(index_match.group(1))
160 else: 155 else:
161 stack_frame_index = len(self) 156 stack_frame_index = int(default_stack_frame_index or 0)
162 157
163 self.append(StackFrame(stack_frame_index, dep_path, function, file_path, 158 return StackFrame(stack_frame_index, dep_path, function, file_path,
164 raw_file_path, crashed_line_numbers, repo_url)) 159 raw_file_path, crashed_line_numbers, repo_url)
165 160
166 161
162 # N.B., because ``list`` is mutable it isn't hashable, thus cannot be
163 # used as a key in a dict. Because we want to usecallstacks as keys (for
164 # memoization) we has-a tuple rather than is-a list.
165 class CallStack(namedtuple('CallStack',
166 ['priority', 'frames', 'format_type', 'language_type'])):
167 """A stack (sequence of ``StackFrame`` objects) in a ``Stacktrace``.
168
169 Attributes:
170 priority (int): The smaller the number, the higher the priority beginning
171 with 0.
172 frames (tuple of StackFrame): the frames in order from bottom to top.
173 format_type (CallStackFormatType): Represents the type of line format
174 within a callstack. For example:
175
176 CallStackFormatType.JAVA -
177 'at com.android.commands.am.Am.onRun(Am.java:353)'
178
179 CallStackFormatType.SYZYASAN -
180 'chrome_child!v8::internal::ApplyTransition+0x93 [v8/src/lookup.cc @ 340]'
181
182 CallStackFormatType.DEFAULT -
183 '#0 0x32b5982 in get third_party/WebKit/Source/wtf/RefPtr.h:61:43'
184 language_type (CallStackLanguageType): Either CPP or JAVA language.
185 """
186 __slots__ = ()
187
188 def __new__(cls, priority, format_type=None, language_type=None,
189 frame_list=None):
190 """Construct a new ``CallStack``.
191
192 N.B., we use ``None`` as the default value of the optional arguments
193 so that if callers need to explicitly provide those arguments but
194 don't have an explicit value, they can pass ``None`` to get at the
195 default without needing to be kept in sync with this constructor. For
196 example, the ``ChromeCrashParser.Parse`` constructs a stack and they
197 need to keep track of all the arguments to be passed to this function.
198
199 Args:
200 priority (int): The priority of this stack in its ``Stacktrace``.
201 format_type (CallStackFormatType): Optional. The stack's format.
202 language_type (CallStackLanguageType): Optional. The stack's language.
203 frame_list (iterable of StackFrame): Optional. The frames in the stack.
204 """
205 if format_type is None:
206 format_type = _DEFAULT_FORMAT_TYPE
207
208 if language_type is None:
209 language_type = _DEFAULT_LANGUAGE_TYPE
210
211 if format_type == CallStackFormatType.JAVA:
212 language_type = CallStackLanguageType.JAVA
213
214 if frame_list is None:
215 frame_list = []
216
217 return super(cls, CallStack).__new__(cls,
218 priority, tuple(frame_list), format_type, language_type)
219
220 def __len__(self):
221 """Returns the number of frames in this stack."""
222 return len(self.frames)
223
224 # TODO(crbug.com/672641): do have unittests for this, but for some
225 # reason coverage isn't seeing them.
226 def __bool__(self): # pragma: no cover
227 """Returns whether this stack is empty."""
228 return bool(self.frames)
229
230 def __iter__(self):
231 """Iterator over the frames in this stack."""
232 return iter(self.frames)
233
234 def SliceFrames(self, low_index, high_index):
235 """Returns a new ``CallStack`` keeping only the specified frames.
236
237 Args:
238 low_index (int or None): the lowest index to keep. If ``None``
239 then defaults to 0.
240 high_index (int or None): the index after the highest one to
241 keep. If ``None`` then defaults to one after the highest index.
242
243 Returns:
244 A new ``CallStack`` instance. If both arguments are ``None`` then
245 we return the original stack object, because they are equal and
246 due to immutability there's no reason to clone the instance.
247 """
248 if low_index is None and high_index is None:
249 return self
250
251 # TODO(wrengr): can we use ``_replace`` without running into TypeErrors?
252 return CallStack(self.priority,
253 format_type=self.format_type,
254 language_type=self.language_type,
255 frame_list=self.frames[low_index:high_index])
256
257
258 # N.B., because ``list`` is mutable it isn't hashable, thus cannot be
259 # used as a key in a dict. Because we want to usecallstacks as keys (for
260 # memoization) we has-a tuple rather than is-a list.
167 # TODO(http://crbug.com/644476): this class needs a better name. 261 # TODO(http://crbug.com/644476): this class needs a better name.
168 class Stacktrace(list): 262 class Stacktrace(object):
169 """A collection of callstacks which together provide a trace of what happened. 263 """A collection of callstacks which together provide a trace of what happened.
170 264
171 For instance, when doing memory debugging we will have callstacks for 265 For instance, when doing memory debugging we will have callstacks for
172 (1) when the crash occurred, (2) when the object causing the crash 266 (1) when the crash occurred, (2) when the object causing the crash
173 was allocated, (3) when the object causing the crash was freed (for 267 was allocated, (3) when the object causing the crash was freed (for
174 use-after-free crashes), etc. What callstacks are included in the 268 use-after-free crashes), etc. What callstacks are included in the
175 trace is unspecified, since this differs for different tools.""" 269 trace is unspecified, since this differs for different tools."""
176 def __init__(self, stack_list=None, signature=None): 270 def __init__(self, stack_list=None, signature=None):
177 super(Stacktrace, self).__init__(stack_list or []) 271 self.stacks = stack_list or []
178
179 self._crash_stack = None 272 self._crash_stack = None
180 self._signature_parts = None 273 self._signature_parts = None
181 if signature: 274 if signature:
182 # Filter out the types of signature, for example [Out of Memory]. 275 # Filter out the types of signature, for example [Out of Memory].
183 signature = re.sub('[[][^]]*[]]\s*', '', signature) 276 signature = re.sub('[[][^]]*[]]\s*', '', signature)
184 # For clusterfuzz crash, the signature is crash state. It is 277 # For clusterfuzz crash, the signature is crash state. It is
185 # usually the top 3 important stack frames separated by '\n'. 278 # usually the top 3 important stack frames separated by '\n'.
186 self._signature_parts = signature.split('\n') 279 self._signature_parts = signature.split('\n')
187 280
281 def __getitem__(self, i): # pragma: no cover
282 return self.stacks[i]
283
284 def __len__(self):
285 return len(self.stacks)
286
287 def __bool__(self): # pragma: no cover
288 return bool(self.stacks)
289
290 def __iter__(self):
291 return iter(self.stacks)
188 292
189 @property 293 @property
190 def crash_stack(self): 294 def crash_stack(self):
191 """Get the callstack with the highest priority (i.e., whose priority 295 """Get the callstack with the highest priority (i.e., whose priority
192 field is numerically the smallest) in the stacktrace.""" 296 field is numerically the smallest) in the stacktrace."""
193 if not self: 297 if not self.stacks:
194 logging.warning('Cannot get crash stack for empty stacktrace: %s', self) 298 logging.warning('Cannot get crash stack for empty stacktrace: %s', self)
195 return None 299 return None
196 300
197 if self._crash_stack is None and self._signature_parts: 301 if self._crash_stack is None and self._signature_parts:
198 def _IsSignatureCallstack(callstack): 302 def _IsSignatureCallstack(callstack):
199 for index, frame in enumerate(callstack): 303 for index, frame in enumerate(callstack):
200 for signature_part in self._signature_parts: 304 for signature_part in self._signature_parts:
201 if signature_part in frame.function: 305 if signature_part in frame.function:
202 return True, index 306 return True, index
203 307
204 return False, 0 308 return False, 0
205 309
206 # Set the crash stack using signature callstack. 310 # Set the crash stack using signature callstack.
207 for callstack in self: 311 for callstack in self.stacks:
208 is_signature_callstack, index = _IsSignatureCallstack(callstack) 312 is_signature_callstack, index = _IsSignatureCallstack(callstack)
209 if is_signature_callstack: 313 if is_signature_callstack:
210 # Filter all the stack frames before signature. 314 # Filter all the stack frames before signature.
211 callstack[:] = callstack[index:] 315 self._crash_stack = callstack.SliceFrames(index, None)
212 self._crash_stack = callstack
213 break 316 break
214 317
215 # If there is no signature callstack, fall back to set crash stack using 318 # If there is no signature callstack, fall back to set crash stack using
216 # the first least priority callstack. 319 # the first least priority callstack.
217 if self._crash_stack is None: 320 if self._crash_stack is None:
218 self._crash_stack = sorted(self, key=lambda stack: stack.priority)[0] 321 self._crash_stack = sorted(self.stacks,
322 key=lambda stack: stack.priority)[0]
219 323
220 return self._crash_stack 324 return self._crash_stack
OLDNEW
« no previous file with comments | « appengine/findit/crash/project_classifier.py ('k') | appengine/findit/crash/test/changelist_classifier_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698