Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(13)

Side by Side Diff: tools/findit/crash_utils.py

Issue 478763003: [Findit] Bug fixing and implemented some feature requests. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: fixed a bug in intersection Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright (c) 2014 The Chromium Authors. All rights reserved. 1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import cgi 5 import cgi
6 import ConfigParser 6 import ConfigParser
7 import json 7 import json
8 import logging 8 import logging
9 import os 9 import os
10 import time 10 import time
11 import urllib2 11 import urllib2
12 12
13 from common import utils
14 from result import Result 13 from result import Result
15 14
16 15
17 INFINITY = float('inf') 16 INFINITY = float('inf')
18 17
19 18
20 def ParseURLsFromConfig(file_name): 19 def ParseURLsFromConfig(file_name):
21 """Parses URLS from the config file. 20 """Parses URLS from the config file.
22 21
23 The file should be in python config format, where svn section is in the 22 The file should be in python config format, where svn section is in the
(...skipping 14 matching lines...) Expand all
38 # Get the absolute path of the config file, and read the file. If it fails, 37 # Get the absolute path of the config file, and read the file. If it fails,
39 # return none. 38 # return none.
40 config_file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 39 config_file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)),
41 file_name) 40 file_name)
42 config.read(config_file_path) 41 config.read(config_file_path)
43 if not config: 42 if not config:
44 logging.error('Config file with URLs does not exist.') 43 logging.error('Config file with URLs does not exist.')
45 return None 44 return None
46 45
47 # Iterate through the config file, check for sections. 46 # Iterate through the config file, check for sections.
48 parsed_config = {} 47 config_dict = {}
49 for section in config.sections(): 48 for section in config.sections():
50 # These two do not need another layer of dictionary, so add it and go 49 # These two do not need another layer of dictionary, so add it and go
51 # to next section. 50 # to next section.
52 if ':' not in section: 51 if ':' not in section:
53 for option in config.options(section): 52 for option in config.options(section):
54 if section not in parsed_config: 53 if section not in config_dict:
55 parsed_config[section] = {} 54 config_dict[section] = {}
56 55
57 url = config.get(section, option) 56 url = config.get(section, option)
58 parsed_config[section][option] = url 57 config_dict[section][option] = url
59 58
60 continue 59 continue
61 60
62 # Get repository type and component name from the section name. 61 # Get repository type and component name from the section name.
63 repository_type_and_component = section.split(':') 62 repository_type_and_component = section.split(':')
64 repository_type = repository_type_and_component[0] 63 repository_type = repository_type_and_component[0]
65 component_path = repository_type_and_component[1] 64 component_path = repository_type_and_component[1]
66 65
67 # Add 'svn' as the key, if it is not already there. 66 # Add 'svn' as the key, if it is not already there.
68 if repository_type not in parsed_config: 67 if repository_type not in config_dict:
69 parsed_config[repository_type] = {} 68 config_dict[repository_type] = {}
70 url_map_for_repository = parsed_config[repository_type] 69 url_map_for_repository = config_dict[repository_type]
71 70
72 # Add the path to the 'svn', if it is not already there. 71 # Add the path to the 'svn', if it is not already there.
73 if component_path not in url_map_for_repository: 72 if component_path not in url_map_for_repository:
74 url_map_for_repository[component_path] = {} 73 url_map_for_repository[component_path] = {}
75 type_to_url = url_map_for_repository[component_path] 74 type_to_url = url_map_for_repository[component_path]
76 75
77 # Add all URLs to this map. 76 # Add all URLs to this map.
78 for option in config.options(section): 77 for option in config.options(section):
79 url = config.get(section, option) 78 url = config.get(section, option)
80 type_to_url[option] = url 79 type_to_url[option] = url
81 80
82 return parsed_config 81 return config_dict
83 82
84 83
85 def NormalizePathLinux(path, parsed_deps): 84 def NormalizePath(path, parsed_deps):
86 """Normalizes linux path. 85 """Normalizes the path.
87 86
88 Args: 87 Args:
89 path: A string representing a path. 88 path: A string representing a path.
90 parsed_deps: A map from component path to its component name, repository, 89 parsed_deps: A map from component path to its component name, repository,
91 etc. 90 etc.
92 91
93 Returns: 92 Returns:
94 A tuple containing a component this path is in (e.g blink, skia, etc) 93 A tuple containing a component this path is in (e.g blink, skia, etc)
95 and a path in that component's repository. 94 and a path in that component's repository. Returns None if the component
stgao 2014/08/22 06:50:53 It seems googlecode is not checked.
jeun 2014/08/22 22:58:43 Even if we don't check it here, it will fail later
95 repository is not supported, i.e from googlecode.
96 """ 96 """
97 # First normalize the path by retreiving the absolute path. 97 # First normalize the path by retreiving the absolute path.
98 normalized_path = os.path.abspath(path) 98 normalized_path = os.path.normpath(path.replace('\\','/'))
stgao 2014/08/22 06:50:53 Please also update the comment.
jeun 2014/08/22 22:58:43 Done.
99 99
100 # Iterate through all component paths in the parsed DEPS, in the decreasing 100 # Iterate through all component paths in the parsed DEPS, in the decreasing
101 # order of the length of the file path. 101 # order of the length of the file path.
102 for component_path in sorted(parsed_deps, 102 for component_path in sorted(parsed_deps,
103 key=(lambda path: -len(path))): 103 key=(lambda path: -len(path))):
104 # New_path is the component path with 'src/' removed. 104 # new_component_path is the component path with 'src/' removed.
105 new_path = component_path 105 new_component_path = component_path
106 if new_path.startswith('src/') and new_path != 'src/': 106 if new_component_path.startswith('src/') and new_component_path != 'src/':
107 new_path = new_path[len('src/'):] 107 new_component_path = new_component_path[len('src/'):]
108
109 # We need to consider when the lowercased component path is in the path,
110 # because syzyasan build returns lowercased file path.
111 lower_component_path = new_component_path.lower()
108 112
109 # If this path is the part of file path, this file must be from this 113 # If this path is the part of file path, this file must be from this
110 # component. 114 # component.
111 if new_path in normalized_path: 115 if new_component_path in normalized_path or \
116 lower_component_path in normalized_path:
112 117
113 # Currently does not support googlecode. 118 # Case when the retreived path is in lowercase.
114 if 'googlecode' in parsed_deps[component_path]['repository']: 119 if lower_component_path in normalized_path:
115 return (None, '', '') 120 current_component_path = lower_component_path
121 else:
122 current_component_path = new_component_path
116 123
117 # Normalize the path by stripping everything off the component's relative 124 # Normalize the path by stripping everything off the component's relative
118 # path. 125 # path.
119 normalized_path = normalized_path.split(new_path,1)[1] 126 normalized_path = normalized_path.split(current_component_path, 1)[1]
127 lower_normalized_path = normalized_path.lower()
120 128
121 # Add 'src/' or 'Source/' at the front of the normalized path, depending 129 # Add 'src/' or 'Source/' at the front of the normalized path, depending
122 # on what prefix the component path uses. For example, blink uses 130 # on what prefix the component path uses. For example, blink uses
123 # 'Source' but chromium uses 'src/', and blink component path is 131 # 'Source' but chromium uses 'src/', and blink component path is
124 # 'src/third_party/WebKit/Source', so add 'Source/' in front of the 132 # 'src/third_party/WebKit/Source', so add 'Source/' in front of the
125 # normalized path. 133 # normalized path.
126 if not (normalized_path.startswith('src/') or 134 if not (lower_normalized_path.startswith('src/') or
127 normalized_path.startswith('Source/')): 135 lower_normalized_path.startswith('source/')):
128 136
129 if (new_path.lower().endswith('src/') or 137 if (lower_component_path.endswith('src/') or
130 new_path.lower().endswith('source/')): 138 lower_component_path.endswith('source/')):
131 normalized_path = new_path.split('/')[-2] + '/' + normalized_path 139 normalized_path = (current_component_path.split('/')[-2] + '/' +
140 normalized_path)
132 141
133 else: 142 else:
134 normalized_path = 'src/' + normalized_path 143 normalized_path = 'src/' + normalized_path
135 144
136 component_name = parsed_deps[component_path]['name'] 145 component_name = parsed_deps[component_path]['name']
137 146
138 return (component_path, component_name, normalized_path) 147 return (component_path, component_name, normalized_path)
139 148
140 # If the path does not match any component, default to chromium. 149 # If the path does not match any component, default to chromium.
141 return ('src/', 'chromium', normalized_path) 150 return ('src/', 'chromium', normalized_path)
(...skipping 11 matching lines...) Expand all
153 """ 162 """
154 if not regression: 163 if not regression:
155 return None 164 return None
156 165
157 revisions = regression.split(':') 166 revisions = regression.split(':')
158 167
159 # If regression information is not available, return none. 168 # If regression information is not available, return none.
160 if len(revisions) != 2: 169 if len(revisions) != 2:
161 return None 170 return None
162 171
163 # Strip 'r' from both start and end range. 172 range_start = revisions[0]
164 range_start = revisions[0].lstrip('r') 173 range_end = revisions[1]
165 range_end = revisions[1].lstrip('r') 174
175 # Strip 'r' off the range start/end. Not using lstrip to avoid the case when
Martin Barbella 2014/08/22 02:24:14 Maybe it would be best for us not to include the '
jeun 2014/08/22 22:58:43 Done.
176 # the range is in git hash and it starts with 'r'.
177 if range_start.startswith('r'):
178 range_start = range_start[1:]
179
180 if range_end.startswith('r'):
181 range_end = range_end[1:]
166 182
167 return [range_start, range_end] 183 return [range_start, range_end]
168 184
169 185
170 def LoadJSON(json_string): 186 def LoadJSON(json_string):
171 """Loads json object from string, or None. 187 """Loads json object from string, or None.
172 188
173 Args: 189 Args:
174 json_string: A string to get object from. 190 json_string: A string to get object from.
175 191
(...skipping 13 matching lines...) Expand all
189 205
190 Args: 206 Args:
191 url: URL to get data from. 207 url: URL to get data from.
192 retries: Number of times to retry connection. 208 retries: Number of times to retry connection.
193 sleep_time: Time in seconds to wait before retrying connection. 209 sleep_time: Time in seconds to wait before retrying connection.
194 timeout: Time in seconds to wait before time out. 210 timeout: Time in seconds to wait before time out.
195 211
196 Returns: 212 Returns:
197 None if the data retrieval fails, or the raw data. 213 None if the data retrieval fails, or the raw data.
198 """ 214 """
199 count = 0 215 data = None
200 while True: 216 for i in range(retries):
201 count += 1
202 # Retrieves data from URL. 217 # Retrieves data from URL.
203 try: 218 try:
204 _, data = utils.GetHttpClient().Get(url) 219 data = urllib2.urlopen(url, timeout=timeout)
stgao 2014/08/22 06:50:53 Is there a specific reason to revert my change to
jeun 2014/08/22 22:58:43 Done.
205 return data 220
221 # If retrieval is successful, return the data.
222 if data:
223 return data.read()
224
225 # If retrieval fails, try after sleep_time second.
226 except urllib2.URLError:
227 time.sleep(sleep_time)
228 continue
206 except IOError: 229 except IOError:
207 if count < retries: 230 time.sleep(sleep_time)
208 # If retrieval fails, try after sleep_time second. 231 continue
209 time.sleep(sleep_time)
210 else:
211 break
212 232
213 # Return None if it fails to read data from URL 'retries' times. 233 # Return None if it fails to read data from URL 'retries' times.
214 return None 234 return None
215 235
216 236
217 def FindMinLineDistance(crashed_line_list, changed_line_numbers): 237 def FindMinLineDistance(crashed_line_list, changed_line_numbers):
218 """Calculates how far the changed line is from one of the crashes. 238 """Calculates how far the changed line is from one of the crashes.
219 239
220 Finds the minimum distance between the lines that the file crashed on 240 Finds the minimum distance between the lines that the file crashed on
221 and the lines that the file changed. For example, if the file crashed on 241 and the lines that the file changed. For example, if the file crashed on
222 line 200 and the CL changes line 203,204 and 205, the function returns 3. 242 line 200 and the CL changes line 203,204 and 205, the function returns 3.
223 243
224 Args: 244 Args:
225 crashed_line_list: A list of lines that the file crashed on. 245 crashed_line_list: A list of lines that the file crashed on.
226 changed_line_numbers: A list of lines that the file changed. 246 changed_line_numbers: A list of lines that the file changed.
227 247
228 Returns: 248 Returns:
229 The minimum distance. If either of the input lists is empty, 249 The minimum distance. If either of the input lists is empty,
230 it returns inf. 250 it returns inf.
231 251
232 """ 252 """
233 min_distance = INFINITY 253 min_distance = INFINITY
234 254
235 for line in crashed_line_list: 255 crashed_line_numbers = [crashed_line
256 for crashed_line_range in crashed_line_list
257 for crashed_line in crashed_line_range]
258 for line in crashed_line_numbers:
236 for distance in changed_line_numbers: 259 for distance in changed_line_numbers:
237 # Find the current distance and update the min if current distance is 260 # Find the current distance and update the min if current distance is
238 # less than current min. 261 # less than current min.
239 current_distance = abs(line - distance) 262 current_distance = abs(line - distance)
240 if current_distance < min_distance: 263 if current_distance < min_distance:
241 min_distance = current_distance 264 min_distance = current_distance
242 265
243 return min_distance 266 return min_distance
244 267
245 268
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
342 lines that the file changes. The intersection looks within 3 lines 365 lines that the file changes. The intersection looks within 3 lines
343 of the line that caused the crash. 366 of the line that caused the crash.
344 367
345 Args: 368 Args:
346 crashed_line_list: A list of lines that the file crashed on. 369 crashed_line_list: A list of lines that the file crashed on.
347 stack_frame_index: A list of positions in stack for each of the lines. 370 stack_frame_index: A list of positions in stack for each of the lines.
348 changed_line_numbers: A list of lines that the file changed. 371 changed_line_numbers: A list of lines that the file changed.
349 line_range: Number of lines to look backwards from crashed lines. 372 line_range: Number of lines to look backwards from crashed lines.
350 373
351 Returns: 374 Returns:
352 line_intersection: Intersection between crashed_line_list and 375 line_number_intersection: Intersection between crashed_line_list and
353 changed_line_numbers. 376 changed_line_numbers.
354 stack_frame_index_intersection: Stack number for each of the intersections. 377 stack_frame_index_intersection: Stack number for each of the intersections.
355 """ 378 """
356 line_intersection = [] 379 line_number_intersection = []
357 stack_frame_index_intersection = [] 380 stack_frame_index_intersection = []
358 381
359 # Iterate through the crashed lines, and its occurence in stack. 382 # Iterate through the crashed lines, and its occurence in stack.
360 for (line, stack_frame_index) in zip(crashed_line_list, stack_frame_index): 383 for (lines, stack_frame_index) in zip(crashed_line_list, stack_frame_index):
361 # Also check previous 'line_range' lines. 384 # Also check previous 'line_range' lines. Create a set of all changed lines
362 line_minus_n = range(line - line_range, line + 1) 385 # and lines within 3 lines range before the crashed line.
386 line_minus_n = set()
387 for line in lines:
388 for line_in_range in range(line - line_range, line + 1):
389 line_minus_n.add(line_in_range)
363 390
364 for changed_line in changed_line_numbers: 391 for changed_line in changed_line_numbers:
365 # If a CL does not change crahsed line, check next line. 392 # If a CL does not change crahsed line, check next line.
366 if changed_line not in line_minus_n: 393 if changed_line not in line_minus_n:
367 continue 394 continue
368 395
396 intersected_line = set()
369 # If the changed line is exactly the crashed line, add that line. 397 # If the changed line is exactly the crashed line, add that line.
370 if line in changed_line_numbers: 398 for line in lines:
371 intersected_line = line 399 if line in changed_line_numbers:
400 intersected_line.add(line)
372 401
373 # If the changed line is in 3 lines of the crashed line, add the line. 402 # If the changed line is in 3 lines of the crashed line, add the line.
374 else: 403 else:
375 intersected_line = changed_line 404 intersected_line.add(changed_line)
376 405
377 # Avoid adding the same line twice. 406 # Avoid adding the same line twice.
378 if intersected_line not in line_intersection: 407 if intersected_line not in line_number_intersection:
379 line_intersection.append(intersected_line) 408 line_number_intersection.append(list(intersected_line))
380 stack_frame_index_intersection.append(stack_frame_index) 409 stack_frame_index_intersection.append(stack_frame_index)
381 410
382 break 411 break
383 412
384 return (line_intersection, stack_frame_index_intersection) 413 return (line_number_intersection, stack_frame_index_intersection)
385 414
386 415
387 def MatchListToResultList(matches): 416 def MatchListToResultList(matches):
388 """Convert list of matches to the list of result objects. 417 """Convert list of matches to the list of result objects.
389 418
390 Args: 419 Args:
391 matches: A list of match objects along with its stack priority and revision 420 matches: A list of match objects along with its stack priority and revision
392 number/git hash 421 number/git hash
393 Returns: 422 Returns:
394 A list of result object. 423 A list of result object.
395 424
396 """ 425 """
397 result_list = [] 426 result_list = []
398 427
399 for _, cl, match in matches: 428 for _, cl, match in matches:
400 suspected_cl = cl 429 suspected_cl = cl
401 revision_url = match.revision_url 430 revision_url = match.revision_url
402 component_name = match.component_name 431 component_name = match.component_name
403 author = match.author 432 author = match.author
404 reason = match.reason 433 reason = match.reason
405 review_url = match.review_url 434 review_url = match.review_url
406 reviewers = match.reviewers 435 reviewers = match.reviewers
407 # For matches, line content do not exist. 436 # For matches, line content do not exist.
408 line_content = None 437 line_content = None
438 message = match.message
409 439
410 result = Result(suspected_cl, revision_url, component_name, author, reason, 440 result = Result(suspected_cl, revision_url, component_name, author, reason,
411 review_url, reviewers, line_content) 441 review_url, reviewers, line_content, message)
412 result_list.append(result) 442 result_list.append(result)
413 443
414 return result_list 444 return result_list
415 445
416 446
417 def BlameListToResultList(blame_list): 447 def BlameListToResultList(blame_list):
418 """Convert blame list to the list of result objects. 448 """Convert blame list to the list of result objects.
419 449
420 Args: 450 Args:
421 blame_list: A list of blame objects. 451 blame_list: A list of blame objects.
422 452
423 Returns: 453 Returns:
424 A list of result objects. 454 A list of result objects.
425 """ 455 """
426 result_list = [] 456 result_list = []
427 457
428 for blame in blame_list: 458 for blame in blame_list:
429 suspected_cl = blame.revision 459 suspected_cl = blame.revision
430 revision_url = blame.url 460 revision_url = blame.url
431 component_name = blame.component_name 461 component_name = blame.component_name
432 author = blame.author 462 author = blame.author
433 reason = ( 463 reason = (
434 'The CL changes line %s of file %s from stack %d.' % 464 'The CL changes line %s of file %s, which is stack frame index %d.' %
Martin Barbella 2014/08/22 02:24:14 Nit: remove "index".
jeun 2014/08/22 22:58:43 Done.
435 (blame.line_number, blame.file, blame.stack_frame_index)) 465 (blame.line_number, blame.file, blame.stack_frame_index))
436 # Blame object does not have review url and reviewers. 466 # Blame object does not have review url and reviewers.
437 review_url = None 467 review_url = None
438 reviewers = None 468 reviewers = None
439 line_content = blame.line_content 469 line_content = blame.line_content
470 message = blame.message
440 471
441 result = Result(suspected_cl, revision_url, component_name, author, reason, 472 result = Result(suspected_cl, revision_url, component_name, author, reason,
442 review_url, reviewers, line_content) 473 review_url, reviewers, line_content, message)
443 result_list.append(result) 474 result_list.append(result)
444 475
445 return result_list 476 return result_list
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698