Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(100)

Side by Side Diff: tools/findit/crash_utils.py

Issue 478763003: [Findit] Bug fixing and implemented some feature requests. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Addressed codereview and removed all references to logging Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « tools/findit/config.ini ('k') | tools/findit/findit_for_clusterfuzz.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright (c) 2014 The Chromium Authors. All rights reserved. 1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import cgi 5 import cgi
6 import ConfigParser 6 import ConfigParser
7 import json 7 import json
8 import logging
9 import os 8 import os
10 import time 9 import time
11 import urllib2
12 10
13 from common import utils 11 from common import utils
14 from result import Result 12 from result import Result
15 13
16 14
17 INFINITY = float('inf') 15 INFINITY = float('inf')
18 16
19 17
18 def GetRepositoryType(revision_number):
19 """Returns the repository type of this revision number.
20
21 Args:
22 revision_number: A revision number or git hash.
23
24 Returns:
25 'git' or 'svn', depending on the revision_number.
26 """
27 if utils.IsGitHash(revision_number):
28 return 'git'
29 else:
30 return 'svn'
31
32
20 def ParseURLsFromConfig(file_name): 33 def ParseURLsFromConfig(file_name):
21 """Parses URLS from the config file. 34 """Parses URLS from the config file.
22 35
23 The file should be in python config format, where svn section is in the 36 The file should be in python config format, where svn section is in the
24 format "svn:component_path". 37 format "svn:component_path".
25 Each of the section for svn should contain changelog_url, revision_url, 38 Each of the section for svn should contain changelog_url, revision_url,
26 diff_url and blame_url. 39 diff_url and blame_url.
27 40
28 Args: 41 Args:
29 file_name: The name of the file that contains URL information. 42 file_name: The name of the file that contains URL information.
30 43
31 Returns: 44 Returns:
32 A dictionary that maps repository type to list of URLs. For svn, it maps 45 A dictionary that maps repository type to list of URLs. For svn, it maps
33 key 'svn' to another dictionary, which maps component path to the URLs 46 key 'svn' to another dictionary, which maps component path to the URLs
34 as explained above. For git, it maps to the URLs as explained above. 47 as explained above. For git, it maps to the URLs as explained above.
35 """ 48 """
36 config = ConfigParser.ConfigParser() 49 config = ConfigParser.ConfigParser()
37 50
38 # Get the absolute path of the config file, and read the file. If it fails, 51 # Get the absolute path of the config file, and read the file. If it fails,
39 # return none. 52 # return none.
40 config_file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 53 config_file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)),
41 file_name) 54 file_name)
42 config.read(config_file_path) 55 config.read(config_file_path)
43 if not config: 56 if not config:
44 logging.error('Config file with URLs does not exist.')
45 return None 57 return None
46 58
47 # Iterate through the config file, check for sections. 59 # Iterate through the config file, check for sections.
48 parsed_config = {} 60 config_dict = {}
49 for section in config.sections(): 61 for section in config.sections():
50 # These two do not need another layer of dictionary, so add it and go 62 # These two do not need another layer of dictionary, so add it and go
51 # to next section. 63 # to next section.
52 if ':' not in section: 64 if ':' not in section:
53 for option in config.options(section): 65 for option in config.options(section):
54 if section not in parsed_config: 66 if section not in config_dict:
55 parsed_config[section] = {} 67 config_dict[section] = {}
56 68
57 url = config.get(section, option) 69 url = config.get(section, option)
58 parsed_config[section][option] = url 70 config_dict[section][option] = url
59 71
60 continue 72 continue
61 73
62 # Get repository type and component name from the section name. 74 # Get repository type and component name from the section name.
63 repository_type_and_component = section.split(':') 75 repository_type_and_component = section.split(':')
64 repository_type = repository_type_and_component[0] 76 repository_type = repository_type_and_component[0]
65 component_path = repository_type_and_component[1] 77 component_path = repository_type_and_component[1]
66 78
67 # Add 'svn' as the key, if it is not already there. 79 # Add 'svn' as the key, if it is not already there.
68 if repository_type not in parsed_config: 80 if repository_type not in config_dict:
69 parsed_config[repository_type] = {} 81 config_dict[repository_type] = {}
70 url_map_for_repository = parsed_config[repository_type] 82 url_map_for_repository = config_dict[repository_type]
71 83
72 # Add the path to the 'svn', if it is not already there. 84 # Add the path to the 'svn', if it is not already there.
73 if component_path not in url_map_for_repository: 85 if component_path not in url_map_for_repository:
74 url_map_for_repository[component_path] = {} 86 url_map_for_repository[component_path] = {}
75 type_to_url = url_map_for_repository[component_path] 87 type_to_url = url_map_for_repository[component_path]
76 88
77 # Add all URLs to this map. 89 # Add all URLs to this map.
78 for option in config.options(section): 90 for option in config.options(section):
79 url = config.get(section, option) 91 url = config.get(section, option)
80 type_to_url[option] = url 92 type_to_url[option] = url
81 93
82 return parsed_config 94 return config_dict
83 95
84 96
85 def NormalizePathLinux(path, parsed_deps): 97 def NormalizePath(path, parsed_deps):
86 """Normalizes linux path. 98 """Normalizes the path.
87 99
88 Args: 100 Args:
89 path: A string representing a path. 101 path: A string representing a path.
90 parsed_deps: A map from component path to its component name, repository, 102 parsed_deps: A map from component path to its component name, repository,
91 etc. 103 etc.
92 104
93 Returns: 105 Returns:
94 A tuple containing a component this path is in (e.g blink, skia, etc) 106 A tuple containing a component this path is in (e.g blink, skia, etc)
95 and a path in that component's repository. 107 and a path in that component's repository. Returns None if the component
108 repository is not supported, i.e from googlecode.
96 """ 109 """
97 # First normalize the path by retreiving the absolute path. 110 # First normalize the path by retreiving the normalized path.
98 normalized_path = os.path.abspath(path) 111 normalized_path = os.path.normpath(path.replace('\\', '/'))
99 112
100 # Iterate through all component paths in the parsed DEPS, in the decreasing 113 # Iterate through all component paths in the parsed DEPS, in the decreasing
101 # order of the length of the file path. 114 # order of the length of the file path.
102 for component_path in sorted(parsed_deps, 115 for component_path in sorted(parsed_deps,
103 key=(lambda path: -len(path))): 116 key=(lambda path: -len(path))):
104 # New_path is the component path with 'src/' removed. 117 # new_component_path is the component path with 'src/' removed.
105 new_path = component_path 118 new_component_path = component_path
106 if new_path.startswith('src/') and new_path != 'src/': 119 if new_component_path.startswith('src/') and new_component_path != 'src/':
107 new_path = new_path[len('src/'):] 120 new_component_path = new_component_path[len('src/'):]
121
122 # We need to consider when the lowercased component path is in the path,
123 # because syzyasan build returns lowercased file path.
124 lower_component_path = new_component_path.lower()
108 125
109 # If this path is the part of file path, this file must be from this 126 # If this path is the part of file path, this file must be from this
110 # component. 127 # component.
111 if new_path in normalized_path: 128 if new_component_path in normalized_path or \
129 lower_component_path in normalized_path:
112 130
113 # Currently does not support googlecode. 131 # Case when the retreived path is in lowercase.
114 if 'googlecode' in parsed_deps[component_path]['repository']: 132 if lower_component_path in normalized_path:
115 return (None, '', '') 133 current_component_path = lower_component_path
134 else:
135 current_component_path = new_component_path
116 136
117 # Normalize the path by stripping everything off the component's relative 137 # Normalize the path by stripping everything off the component's relative
118 # path. 138 # path.
119 normalized_path = normalized_path.split(new_path,1)[1] 139 normalized_path = normalized_path.split(current_component_path, 1)[1]
140 lower_normalized_path = normalized_path.lower()
120 141
121 # Add 'src/' or 'Source/' at the front of the normalized path, depending 142 # Add 'src/' or 'Source/' at the front of the normalized path, depending
122 # on what prefix the component path uses. For example, blink uses 143 # on what prefix the component path uses. For example, blink uses
123 # 'Source' but chromium uses 'src/', and blink component path is 144 # 'Source' but chromium uses 'src/', and blink component path is
124 # 'src/third_party/WebKit/Source', so add 'Source/' in front of the 145 # 'src/third_party/WebKit/Source', so add 'Source/' in front of the
125 # normalized path. 146 # normalized path.
126 if not (normalized_path.startswith('src/') or 147 if not (lower_normalized_path.startswith('src/') or
127 normalized_path.startswith('Source/')): 148 lower_normalized_path.startswith('source/')):
128 149
129 if (new_path.lower().endswith('src/') or 150 if (lower_component_path.endswith('src/') or
130 new_path.lower().endswith('source/')): 151 lower_component_path.endswith('source/')):
131 normalized_path = new_path.split('/')[-2] + '/' + normalized_path 152 normalized_path = (current_component_path.split('/')[-2] + '/' +
153 normalized_path)
132 154
133 else: 155 else:
134 normalized_path = 'src/' + normalized_path 156 normalized_path = 'src/' + normalized_path
135 157
136 component_name = parsed_deps[component_path]['name'] 158 component_name = parsed_deps[component_path]['name']
137 159
138 return (component_path, component_name, normalized_path) 160 return (component_path, component_name, normalized_path)
139 161
140 # If the path does not match any component, default to chromium. 162 # If the path does not match any component, default to chromium.
141 return ('src/', 'chromium', normalized_path) 163 return ('src/', 'chromium', normalized_path)
(...skipping 11 matching lines...) Expand all
153 """ 175 """
154 if not regression: 176 if not regression:
155 return None 177 return None
156 178
157 revisions = regression.split(':') 179 revisions = regression.split(':')
158 180
159 # If regression information is not available, return none. 181 # If regression information is not available, return none.
160 if len(revisions) != 2: 182 if len(revisions) != 2:
161 return None 183 return None
162 184
163 # Strip 'r' from both start and end range. 185 range_start = revisions[0]
164 range_start = revisions[0].lstrip('r') 186 range_end = revisions[1]
165 range_end = revisions[1].lstrip('r') 187
188 # Strip 'r' off the range start/end. Not using lstrip to avoid the case when
189 # the range is in git hash and it starts with 'r'.
190 if range_start.startswith('r'):
191 range_start = range_start[1:]
192
193 if range_end.startswith('r'):
194 range_end = range_end[1:]
166 195
167 return [range_start, range_end] 196 return [range_start, range_end]
168 197
169 198
170 def LoadJSON(json_string): 199 def LoadJSON(json_string):
171 """Loads json object from string, or None. 200 """Loads json object from string, or None.
172 201
173 Args: 202 Args:
174 json_string: A string to get object from. 203 json_string: A string to get object from.
175 204
(...skipping 18 matching lines...) Expand all
194 timeout: Time in seconds to wait before time out. 223 timeout: Time in seconds to wait before time out.
195 224
196 Returns: 225 Returns:
197 None if the data retrieval fails, or the raw data. 226 None if the data retrieval fails, or the raw data.
198 """ 227 """
199 count = 0 228 count = 0
200 while True: 229 while True:
201 count += 1 230 count += 1
202 # Retrieves data from URL. 231 # Retrieves data from URL.
203 try: 232 try:
204 _, data = utils.GetHttpClient().Get(url) 233 _, data = utils.GetHttpClient().Get(url, timeout=timeout)
205 return data 234 return data
206 except IOError: 235 except IOError:
207 if count < retries: 236 if count < retries:
208 # If retrieval fails, try after sleep_time second. 237 # If retrieval fails, try after sleep_time second.
209 time.sleep(sleep_time) 238 time.sleep(sleep_time)
210 else: 239 else:
211 break 240 break
212 241
213 # Return None if it fails to read data from URL 'retries' times. 242 # Return None if it fails to read data from URL 'retries' times.
214 return None 243 return None
215 244
216 245
217 def FindMinLineDistance(crashed_line_list, changed_line_numbers): 246 def FindMinLineDistance(crashed_line_list, changed_line_numbers,
247 line_range=3):
218 """Calculates how far the changed line is from one of the crashes. 248 """Calculates how far the changed line is from one of the crashes.
219 249
220 Finds the minimum distance between the lines that the file crashed on 250 Finds the minimum distance between the lines that the file crashed on
221 and the lines that the file changed. For example, if the file crashed on 251 and the lines that the file changed. For example, if the file crashed on
222 line 200 and the CL changes line 203,204 and 205, the function returns 3. 252 line 200 and the CL changes line 203,204 and 205, the function returns 3.
223 253
224 Args: 254 Args:
225 crashed_line_list: A list of lines that the file crashed on. 255 crashed_line_list: A list of lines that the file crashed on.
226 changed_line_numbers: A list of lines that the file changed. 256 changed_line_numbers: A list of lines that the file changed.
257 line_range: Number of lines to look back for.
227 258
228 Returns: 259 Returns:
229 The minimum distance. If either of the input lists is empty, 260 The minimum distance. If either of the input lists is empty,
230 it returns inf. 261 it returns inf.
231 262
232 """ 263 """
233 min_distance = INFINITY 264 min_distance = INFINITY
265 crashed_line = -1
266 changed_line = -1
234 267
235 for line in crashed_line_list: 268 crashed_line_numbers = set()
269 for crashed_line_range in crashed_line_list:
270 for crashed_line in crashed_line_range:
271 for line in range(crashed_line - line_range, crashed_line + 1):
272 crashed_line_numbers.add(line)
273
274 for line in crashed_line_numbers:
236 for distance in changed_line_numbers: 275 for distance in changed_line_numbers:
237 # Find the current distance and update the min if current distance is 276 # Find the current distance and update the min if current distance is
238 # less than current min. 277 # less than current min.
239 current_distance = abs(line - distance) 278 current_distance = abs(line - distance)
240 if current_distance < min_distance: 279 if current_distance < min_distance:
241 min_distance = current_distance 280 min_distance = current_distance
281 crashed_line = line
282 changed_line = distance
242 283
243 return min_distance 284 return (min_distance, crashed_line, changed_line)
244 285
245 286
246 def GuessIfSameSubPath(path1, path2): 287 def GuessIfSameSubPath(path1, path2):
247 """Guesses if two paths represent same path. 288 """Guesses if two paths represent same path.
248 289
249 Compares the name of the folders in the path (by split('/')), and checks 290 Compares the name of the folders in the path (by split('/')), and checks
250 if they match either more than 3 or min of path lengths. 291 if they match either more than 3 or min of path lengths.
251 292
252 Args: 293 Args:
253 path1: First path. 294 path1: First path.
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after
328 Returns: 369 Returns:
329 A string representation of file names with their urls. 370 A string representation of file names with their urls.
330 """ 371 """
331 ret = ['\n'] 372 ret = ['\n']
332 for file_name, file_url in file_list: 373 for file_name, file_url in file_list:
333 ret.append(' %s\n' % AddHyperlink(file_name, file_url)) 374 ret.append(' %s\n' % AddHyperlink(file_name, file_url))
334 return ''.join(ret) 375 return ''.join(ret)
335 376
336 377
337 def Intersection(crashed_line_list, stack_frame_index, changed_line_numbers, 378 def Intersection(crashed_line_list, stack_frame_index, changed_line_numbers,
338 line_range=3): 379 function, line_range=3):
339 """Finds the overlap betwee changed lines and crashed lines. 380 """Finds the overlap betwee changed lines and crashed lines.
340 381
341 Finds the intersection of the lines that caused the crash and 382 Finds the intersection of the lines that caused the crash and
342 lines that the file changes. The intersection looks within 3 lines 383 lines that the file changes. The intersection looks within 3 lines
343 of the line that caused the crash. 384 of the line that caused the crash.
344 385
345 Args: 386 Args:
346 crashed_line_list: A list of lines that the file crashed on. 387 crashed_line_list: A list of lines that the file crashed on.
347 stack_frame_index: A list of positions in stack for each of the lines. 388 stack_frame_index: A list of positions in stack for each of the lines.
348 changed_line_numbers: A list of lines that the file changed. 389 changed_line_numbers: A list of lines that the file changed.
390 function: A list of functions that the file crashed on.
stgao 2014/08/23 00:44:13 function -> functions
349 line_range: Number of lines to look backwards from crashed lines. 391 line_range: Number of lines to look backwards from crashed lines.
350 392
351 Returns: 393 Returns:
352 line_intersection: Intersection between crashed_line_list and 394 line_number_intersection: Intersection between crashed_line_list and
353 changed_line_numbers. 395 changed_line_numbers.
354 stack_frame_index_intersection: Stack number for each of the intersections. 396 stack_frame_index_intersection: Stack number for each of the intersections.
355 """ 397 """
356 line_intersection = [] 398 line_number_intersection = []
357 stack_frame_index_intersection = [] 399 stack_frame_index_intersection = []
400 function_intersection = []
358 401
359 # Iterate through the crashed lines, and its occurence in stack. 402 # Iterate through the crashed lines, and its occurence in stack.
360 for (line, stack_frame_index) in zip(crashed_line_list, stack_frame_index): 403 for (lines, stack_frame_index, function_name) in zip(
361 # Also check previous 'line_range' lines. 404 crashed_line_list, stack_frame_index, function):
362 line_minus_n = range(line - line_range, line + 1) 405 # Also check previous 'line_range' lines. Create a set of all changed lines
406 # and lines within 3 lines range before the crashed line.
407 line_minus_n = set()
408 for line in lines:
409 for line_in_range in range(line - line_range, line + 1):
410 line_minus_n.add(line_in_range)
363 411
364 for changed_line in changed_line_numbers: 412 for changed_line in changed_line_numbers:
365 # If a CL does not change crahsed line, check next line. 413 # If a CL does not change crahsed line, check next line.
366 if changed_line not in line_minus_n: 414 if changed_line not in line_minus_n:
367 continue 415 continue
368 416
417 intersected_line = set()
369 # If the changed line is exactly the crashed line, add that line. 418 # If the changed line is exactly the crashed line, add that line.
370 if line in changed_line_numbers: 419 for line in lines:
371 intersected_line = line 420 if line in changed_line_numbers:
421 intersected_line.add(line)
372 422
373 # If the changed line is in 3 lines of the crashed line, add the line. 423 # If the changed line is in 3 lines of the crashed line, add the line.
374 else: 424 else:
375 intersected_line = changed_line 425 intersected_line.add(changed_line)
376 426
377 # Avoid adding the same line twice. 427 # Avoid adding the same line twice.
378 if intersected_line not in line_intersection: 428 if intersected_line not in line_number_intersection:
379 line_intersection.append(intersected_line) 429 line_number_intersection.append(list(intersected_line))
380 stack_frame_index_intersection.append(stack_frame_index) 430 stack_frame_index_intersection.append(stack_frame_index)
381 431 function_intersection.append(function_name)
382 break 432 break
383 433
384 return (line_intersection, stack_frame_index_intersection) 434 return (line_number_intersection, stack_frame_index_intersection,
435 function_intersection)
385 436
386 437
387 def MatchListToResultList(matches): 438 def MatchListToResultList(matches):
388 """Convert list of matches to the list of result objects. 439 """Convert list of matches to the list of result objects.
389 440
390 Args: 441 Args:
391 matches: A list of match objects along with its stack priority and revision 442 matches: A list of match objects along with its stack priority and revision
392 number/git hash 443 number/git hash
393 Returns: 444 Returns:
394 A list of result object. 445 A list of result object.
395 446
396 """ 447 """
397 result_list = [] 448 result_list = []
398 449
399 for _, cl, match in matches: 450 for _, cl, match in matches:
400 suspected_cl = cl 451 suspected_cl = cl
401 revision_url = match.revision_url 452 revision_url = match.revision_url
402 component_name = match.component_name 453 component_name = match.component_name
403 author = match.author 454 author = match.author
404 reason = match.reason 455 reason = match.reason
405 review_url = match.review_url 456 review_url = match.review_url
406 reviewers = match.reviewers 457 reviewers = match.reviewers
407 # For matches, line content do not exist. 458 # For matches, line content do not exist.
408 line_content = None 459 line_content = None
460 message = match.message
409 461
410 result = Result(suspected_cl, revision_url, component_name, author, reason, 462 result = Result(suspected_cl, revision_url, component_name, author, reason,
411 review_url, reviewers, line_content) 463 review_url, reviewers, line_content, message)
412 result_list.append(result) 464 result_list.append(result)
413 465
414 return result_list 466 return result_list
415 467
416 468
417 def BlameListToResultList(blame_list): 469 def BlameListToResultList(blame_list):
418 """Convert blame list to the list of result objects. 470 """Convert blame list to the list of result objects.
419 471
420 Args: 472 Args:
421 blame_list: A list of blame objects. 473 blame_list: A list of blame objects.
422 474
423 Returns: 475 Returns:
424 A list of result objects. 476 A list of result objects.
425 """ 477 """
426 result_list = [] 478 result_list = []
427 479
428 for blame in blame_list: 480 for blame in blame_list:
429 suspected_cl = blame.revision 481 suspected_cl = blame.revision
430 revision_url = blame.url 482 revision_url = blame.url
431 component_name = blame.component_name 483 component_name = blame.component_name
432 author = blame.author 484 author = blame.author
433 reason = ( 485 reason = (
434 'The CL changes line %s of file %s from stack %d.' % 486 'The CL last changed line %s of file %s, which is stack frame %d.' %
435 (blame.line_number, blame.file, blame.stack_frame_index)) 487 (blame.line_number, blame.file, blame.stack_frame_index))
436 # Blame object does not have review url and reviewers. 488 # Blame object does not have review url and reviewers.
437 review_url = None 489 review_url = None
438 reviewers = None 490 reviewers = None
439 line_content = blame.line_content 491 line_content = blame.line_content
492 message = blame.message
440 493
441 result = Result(suspected_cl, revision_url, component_name, author, reason, 494 result = Result(suspected_cl, revision_url, component_name, author, reason,
442 review_url, reviewers, line_content) 495 review_url, reviewers, line_content, message)
443 result_list.append(result) 496 result_list.append(result)
444 497
445 return result_list 498 return result_list
OLDNEW
« no previous file with comments | « tools/findit/config.ini ('k') | tools/findit/findit_for_clusterfuzz.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698