Index: infra/bots/recipe_modules/skia/resources/elf_symbolizer.py |
diff --git a/infra/bots/recipe_modules/skia/resources/elf_symbolizer.py b/infra/bots/recipe_modules/skia/resources/elf_symbolizer.py |
deleted file mode 100644 |
index de9c1412193c3976d16f67316790b27e9353fb96..0000000000000000000000000000000000000000 |
--- a/infra/bots/recipe_modules/skia/resources/elf_symbolizer.py |
+++ /dev/null |
@@ -1,477 +0,0 @@ |
-# Copyright 2014 The Chromium Authors. All rights reserved. |
-# Use of this source code is governed by a BSD-style license that can be |
-# found in the LICENSE file. |
- |
-"""The ElfSymbolizer class for symbolizing Executable and Linkable Files. |
- |
-Adapted for Skia's use from |
-chromium/src/build/android/pylib/symbols/elf_symbolizer.py. |
- |
-Main changes: |
--- Added prefix_to_remove param to remove path prefix from tree data. |
-""" |
- |
-import collections |
-import datetime |
-import logging |
-import multiprocessing |
-import os |
-import posixpath |
-import Queue |
-import re |
-import subprocess |
-import sys |
-import threading |
- |
- |
-# addr2line builds a possibly infinite memory cache that can exhaust |
-# the computer's memory if allowed to grow for too long. This constant |
-# controls how many lookups we do before restarting the process. 4000 |
-# gives near peak performance without extreme memory usage. |
-ADDR2LINE_RECYCLE_LIMIT = 4000 |
- |
- |
-class ELFSymbolizer(object): |
- """An uber-fast (multiprocessing, pipelined and asynchronous) ELF symbolizer. |
- |
- This class is a frontend for addr2line (part of GNU binutils), designed to |
- symbolize batches of large numbers of symbols for a given ELF file. It |
- supports sharding symbolization against many addr2line instances and |
- pipelining of multiple requests per each instance (in order to hide addr2line |
- internals and OS pipe latencies). |
- |
- The interface exhibited by this class is a very simple asynchronous interface, |
- which is based on the following three methods: |
- - SymbolizeAsync(): used to request (enqueue) resolution of a given address. |
- - The |callback| method: used to communicated back the symbol information. |
- - Join(): called to conclude the batch to gather the last outstanding results. |
- In essence, before the Join method returns, this class will have issued as |
- many callbacks as the number of SymbolizeAsync() calls. In this regard, note |
- that due to multiprocess sharding, callbacks can be delivered out of order. |
- |
- Some background about addr2line: |
- - it is invoked passing the elf path in the cmdline, piping the addresses in |
- its stdin and getting results on its stdout. |
- - it has pretty large response times for the first requests, but it |
- works very well in streaming mode once it has been warmed up. |
- - it doesn't scale by itself (on more cores). However, spawning multiple |
- instances at the same time on the same file is pretty efficient as they |
- keep hitting the pagecache and become mostly CPU bound. |
- - it might hang or crash, mostly for OOM. This class deals with both of these |
- problems. |
- |
- Despite the "scary" imports and the multi* words above, (almost) no multi- |
- threading/processing is involved from the python viewpoint. Concurrency |
- here is achieved by spawning several addr2line subprocesses and handling their |
- output pipes asynchronously. Therefore, all the code here (with the exception |
- of the Queue instance in Addr2Line) should be free from mind-blowing |
- thread-safety concerns. |
- |
- The multiprocess sharding works as follows: |
- The symbolizer tries to use the lowest number of addr2line instances as |
- possible (with respect of |max_concurrent_jobs|) and enqueue all the requests |
- in a single addr2line instance. For few symbols (i.e. dozens) sharding isn't |
- worth the startup cost. |
- The multiprocess logic kicks in as soon as the queues for the existing |
- instances grow. Specifically, once all the existing instances reach the |
- |max_queue_size| bound, a new addr2line instance is kicked in. |
- In the case of a very eager producer (i.e. all |max_concurrent_jobs| instances |
- have a backlog of |max_queue_size|), back-pressure is applied on the caller by |
- blocking the SymbolizeAsync method. |
- |
- This module has been deliberately designed to be dependency free (w.r.t. of |
- other modules in this project), to allow easy reuse in external projects. |
- """ |
- |
- def __init__(self, elf_file_path, addr2line_path, callback, inlines=False, |
- max_concurrent_jobs=None, addr2line_timeout=30, max_queue_size=50, |
- source_root_path=None, strip_base_path=None, prefix_to_remove=None): |
- """Args: |
- elf_file_path: path of the elf file to be symbolized. |
- addr2line_path: path of the toolchain's addr2line binary. |
- callback: a callback which will be invoked for each resolved symbol with |
- the two args (sym_info, callback_arg). The former is an instance of |
- |ELFSymbolInfo| and contains the symbol information. The latter is an |
- embedder-provided argument which is passed to SymbolizeAsync(). |
- inlines: when True, the ELFSymbolInfo will contain also the details about |
- the outer inlining functions. When False, only the innermost function |
- will be provided. |
- max_concurrent_jobs: Max number of addr2line instances spawned. |
- Parallelize responsibly, addr2line is a memory and I/O monster. |
- max_queue_size: Max number of outstanding requests per addr2line instance. |
- addr2line_timeout: Max time (in seconds) to wait for a addr2line response. |
- After the timeout, the instance will be considered hung and respawned. |
- source_root_path: In some toolchains only the name of the source file is |
- is output, without any path information; disambiguation searches |
- through the source directory specified by |source_root_path| argument |
- for files whose name matches, adding the full path information to the |
- output. For example, if the toolchain outputs "unicode.cc" and there |
- is a file called "unicode.cc" located under |source_root_path|/foo, |
- the tool will replace "unicode.cc" with |
- "|source_root_path|/foo/unicode.cc". If there are multiple files with |
- the same name, disambiguation will fail because the tool cannot |
- determine which of the files was the source of the symbol. |
- strip_base_path: Rebases the symbols source paths onto |source_root_path| |
- (i.e replace |strip_base_path| with |source_root_path). |
- prefix_to_remove: Removes the prefix from ElfSymbolInfo output. Skia added |
- """ |
- assert(os.path.isfile(addr2line_path)), 'Cannot find ' + addr2line_path |
- self.elf_file_path = elf_file_path |
- self.addr2line_path = addr2line_path |
- self.callback = callback |
- self.inlines = inlines |
- self.max_concurrent_jobs = (max_concurrent_jobs or |
- min(multiprocessing.cpu_count(), 4)) |
- self.max_queue_size = max_queue_size |
- self.addr2line_timeout = addr2line_timeout |
- self.requests_counter = 0 # For generating monotonic request IDs. |
- self._a2l_instances = [] # Up to |max_concurrent_jobs| _Addr2Line inst. |
- |
- # Skia addition: remove the given prefix from tree paths. |
- self.prefix_to_remove = prefix_to_remove |
- |
- # If necessary, create disambiguation lookup table |
- self.disambiguate = source_root_path is not None |
- self.disambiguation_table = {} |
- self.strip_base_path = strip_base_path |
- if(self.disambiguate): |
- self.source_root_path = os.path.abspath(source_root_path) |
- self._CreateDisambiguationTable() |
- |
- # Create one addr2line instance. More instances will be created on demand |
- # (up to |max_concurrent_jobs|) depending on the rate of the requests. |
- self._CreateNewA2LInstance() |
- |
- def SymbolizeAsync(self, addr, callback_arg=None): |
- """Requests symbolization of a given address. |
- |
- This method is not guaranteed to return immediately. It generally does, but |
- in some scenarios (e.g. all addr2line instances have full queues) it can |
- block to create back-pressure. |
- |
- Args: |
- addr: address to symbolize. |
- callback_arg: optional argument which will be passed to the |callback|.""" |
- assert(isinstance(addr, int)) |
- |
- # Process all the symbols that have been resolved in the meanwhile. |
- # Essentially, this drains all the addr2line(s) out queues. |
- for a2l_to_purge in self._a2l_instances: |
- a2l_to_purge.ProcessAllResolvedSymbolsInQueue() |
- a2l_to_purge.RecycleIfNecessary() |
- |
- # Find the best instance according to this logic: |
- # 1. Find an existing instance with the shortest queue. |
- # 2. If all of instances' queues are full, but there is room in the pool, |
- # (i.e. < |max_concurrent_jobs|) create a new instance. |
- # 3. If there were already |max_concurrent_jobs| instances and all of them |
- # had full queues, make back-pressure. |
- |
- # 1. |
- def _SortByQueueSizeAndReqID(a2l): |
- return (a2l.queue_size, a2l.first_request_id) |
- a2l = min(self._a2l_instances, key=_SortByQueueSizeAndReqID) |
- |
- # 2. |
- if (a2l.queue_size >= self.max_queue_size and |
- len(self._a2l_instances) < self.max_concurrent_jobs): |
- a2l = self._CreateNewA2LInstance() |
- |
- # 3. |
- if a2l.queue_size >= self.max_queue_size: |
- a2l.WaitForNextSymbolInQueue() |
- |
- a2l.EnqueueRequest(addr, callback_arg) |
- |
- def Join(self): |
- """Waits for all the outstanding requests to complete and terminates.""" |
- for a2l in self._a2l_instances: |
- a2l.WaitForIdle() |
- a2l.Terminate() |
- |
- def _CreateNewA2LInstance(self): |
- assert(len(self._a2l_instances) < self.max_concurrent_jobs) |
- a2l = ELFSymbolizer.Addr2Line(self) |
- self._a2l_instances.append(a2l) |
- return a2l |
- |
- def _CreateDisambiguationTable(self): |
- """ Non-unique file names will result in None entries""" |
- self.disambiguation_table = {} |
- |
- for root, _, filenames in os.walk(self.source_root_path): |
- for f in filenames: |
- self.disambiguation_table[f] = os.path.join(root, f) if (f not in |
- self.disambiguation_table) else None |
- |
- |
- class Addr2Line(object): |
- """A python wrapper around an addr2line instance. |
- |
- The communication with the addr2line process looks as follows: |
- [STDIN] [STDOUT] (from addr2line's viewpoint) |
- > f001111 |
- > f002222 |
- < Symbol::Name(foo, bar) for f001111 |
- < /path/to/source/file.c:line_number |
- > f003333 |
- < Symbol::Name2() for f002222 |
- < /path/to/source/file.c:line_number |
- < Symbol::Name3() for f003333 |
- < /path/to/source/file.c:line_number |
- """ |
- |
- SYM_ADDR_RE = re.compile(r'([^:]+):(\?|\d+).*') |
- |
- def __init__(self, symbolizer): |
- self._symbolizer = symbolizer |
- self._lib_file_name = posixpath.basename(symbolizer.elf_file_path) |
- |
- # The request queue (i.e. addresses pushed to addr2line's stdin and not |
- # yet retrieved on stdout) |
- self._request_queue = collections.deque() |
- |
- # This is essentially len(self._request_queue). It has been optimized to a |
- # separate field because turned out to be a perf hot-spot. |
- self.queue_size = 0 |
- |
- # Keep track of the number of symbols a process has processed to |
- # avoid a single process growing too big and using all the memory. |
- self._processed_symbols_count = 0 |
- |
- # Objects required to handle the addr2line subprocess. |
- self._proc = None # Subprocess.Popen(...) instance. |
- self._thread = None # Threading.thread instance. |
- self._out_queue = None # Queue.Queue instance (for buffering a2l stdout). |
- self._RestartAddr2LineProcess() |
- |
- def EnqueueRequest(self, addr, callback_arg): |
- """Pushes an address to addr2line's stdin (and keeps track of it).""" |
- self._symbolizer.requests_counter += 1 # For global "age" of requests. |
- req_idx = self._symbolizer.requests_counter |
- self._request_queue.append((addr, callback_arg, req_idx)) |
- self.queue_size += 1 |
- self._WriteToA2lStdin(addr) |
- |
- def WaitForIdle(self): |
- """Waits until all the pending requests have been symbolized.""" |
- while self.queue_size > 0: |
- self.WaitForNextSymbolInQueue() |
- |
- def WaitForNextSymbolInQueue(self): |
- """Waits for the next pending request to be symbolized.""" |
- if not self.queue_size: |
- return |
- |
- # This outer loop guards against a2l hanging (detecting stdout timeout). |
- while True: |
- start_time = datetime.datetime.now() |
- timeout = datetime.timedelta(seconds=self._symbolizer.addr2line_timeout) |
- |
- # The inner loop guards against a2l crashing (checking if it exited). |
- while (datetime.datetime.now() - start_time < timeout): |
- # poll() returns !None if the process exited. a2l should never exit. |
- if self._proc.poll(): |
- logging.warning('addr2line crashed, respawning (lib: %s).' % |
- self._lib_file_name) |
- self._RestartAddr2LineProcess() |
- # TODO(primiano): the best thing to do in this case would be |
- # shrinking the pool size as, very likely, addr2line is crashed |
- # due to low memory (and the respawned one will die again soon). |
- |
- try: |
- lines = self._out_queue.get(block=True, timeout=0.25) |
- except Queue.Empty: |
- # On timeout (1/4 s.) repeat the inner loop and check if either the |
- # addr2line process did crash or we waited its output for too long. |
- continue |
- |
- # In nominal conditions, we get straight to this point. |
- self._ProcessSymbolOutput(lines) |
- return |
- |
- # If this point is reached, we waited more than |addr2line_timeout|. |
- logging.warning('Hung addr2line process, respawning (lib: %s).' % |
- self._lib_file_name) |
- self._RestartAddr2LineProcess() |
- |
- def ProcessAllResolvedSymbolsInQueue(self): |
- """Consumes all the addr2line output lines produced (without blocking).""" |
- if not self.queue_size: |
- return |
- while True: |
- try: |
- lines = self._out_queue.get_nowait() |
- except Queue.Empty: |
- break |
- self._ProcessSymbolOutput(lines) |
- |
- def RecycleIfNecessary(self): |
- """Restarts the process if it has been used for too long. |
- |
- A long running addr2line process will consume excessive amounts |
- of memory without any gain in performance.""" |
- if self._processed_symbols_count >= ADDR2LINE_RECYCLE_LIMIT: |
- self._RestartAddr2LineProcess() |
- |
- |
- def Terminate(self): |
- """Kills the underlying addr2line process. |
- |
- The poller |_thread| will terminate as well due to the broken pipe.""" |
- try: |
- self._proc.kill() |
- self._proc.communicate() # Essentially wait() without risking deadlock. |
- except Exception: # An exception while terminating? How interesting. |
- pass |
- self._proc = None |
- |
- def _WriteToA2lStdin(self, addr): |
- self._proc.stdin.write('%s\n' % hex(addr)) |
- if self._symbolizer.inlines: |
- # In the case of inlines we output an extra blank line, which causes |
- # addr2line to emit a (??,??:0) tuple that we use as a boundary marker. |
- self._proc.stdin.write('\n') |
- self._proc.stdin.flush() |
- |
- def _ProcessSymbolOutput(self, lines): |
- """Parses an addr2line symbol output and triggers the client callback.""" |
- (_, callback_arg, _) = self._request_queue.popleft() |
- self.queue_size -= 1 |
- |
- innermost_sym_info = None |
- sym_info = None |
- for (line1, line2) in lines: |
- prev_sym_info = sym_info |
- name = line1 if not line1.startswith('?') else None |
- source_path = None |
- source_line = None |
- m = ELFSymbolizer.Addr2Line.SYM_ADDR_RE.match(line2) |
- if m: |
- if not m.group(1).startswith('?'): |
- source_path = m.group(1) |
- if not m.group(2).startswith('?'): |
- source_line = int(m.group(2)) |
- else: |
- logging.warning('Got invalid symbol path from addr2line: %s' % line2) |
- |
- # In case disambiguation is on, and needed |
- was_ambiguous = False |
- disambiguated = False |
- if self._symbolizer.disambiguate: |
- if source_path and not posixpath.isabs(source_path): |
- path = self._symbolizer.disambiguation_table.get(source_path) |
- was_ambiguous = True |
- disambiguated = path is not None |
- source_path = path if disambiguated else source_path |
- |
- # Use absolute paths (so that paths are consistent, as disambiguation |
- # uses absolute paths) |
- if source_path and not was_ambiguous: |
- source_path = os.path.abspath(source_path) |
- |
- if source_path and self._symbolizer.strip_base_path: |
- # Strip the base path |
- source_path = re.sub('^' + self._symbolizer.strip_base_path, |
- self._symbolizer.source_root_path or '', source_path) |
- |
- sym_info = ELFSymbolInfo(name, source_path, source_line, was_ambiguous, |
- disambiguated, |
- self._symbolizer.prefix_to_remove) |
- if prev_sym_info: |
- prev_sym_info.inlined_by = sym_info |
- if not innermost_sym_info: |
- innermost_sym_info = sym_info |
- |
- self._processed_symbols_count += 1 |
- self._symbolizer.callback(innermost_sym_info, callback_arg) |
- |
- def _RestartAddr2LineProcess(self): |
- if self._proc: |
- self.Terminate() |
- |
- # The only reason of existence of this Queue (and the corresponding |
- # Thread below) is the lack of a subprocess.stdout.poll_avail_lines(). |
- # Essentially this is a pipe able to extract a couple of lines atomically. |
- self._out_queue = Queue.Queue() |
- |
- # Start the underlying addr2line process in line buffered mode. |
- |
- cmd = [self._symbolizer.addr2line_path, '--functions', '--demangle', |
- '--exe=' + self._symbolizer.elf_file_path] |
- if self._symbolizer.inlines: |
- cmd += ['--inlines'] |
- self._proc = subprocess.Popen(cmd, bufsize=1, stdout=subprocess.PIPE, |
- stdin=subprocess.PIPE, stderr=sys.stderr, close_fds=True) |
- |
- # Start the poller thread, which simply moves atomically the lines read |
- # from the addr2line's stdout to the |_out_queue|. |
- self._thread = threading.Thread( |
- target=ELFSymbolizer.Addr2Line.StdoutReaderThread, |
- args=(self._proc.stdout, self._out_queue, self._symbolizer.inlines)) |
- self._thread.daemon = True # Don't prevent early process exit. |
- self._thread.start() |
- |
- self._processed_symbols_count = 0 |
- |
- # Replay the pending requests on the new process (only for the case |
- # of a hung addr2line timing out during the game). |
- for (addr, _, _) in self._request_queue: |
- self._WriteToA2lStdin(addr) |
- |
- @staticmethod |
- def StdoutReaderThread(process_pipe, queue, inlines): |
- """The poller thread fn, which moves the addr2line stdout to the |queue|. |
- |
- This is the only piece of code not running on the main thread. It merely |
- writes to a Queue, which is thread-safe. In the case of inlines, it |
- detects the ??,??:0 marker and sends the lines atomically, such that the |
- main thread always receives all the lines corresponding to one symbol in |
- one shot.""" |
- try: |
- lines_for_one_symbol = [] |
- while True: |
- line1 = process_pipe.readline().rstrip('\r\n') |
- line2 = process_pipe.readline().rstrip('\r\n') |
- if not line1 or not line2: |
- break |
- inline_has_more_lines = inlines and (len(lines_for_one_symbol) == 0 or |
- (line1 != '??' and line2 != '??:0')) |
- if not inlines or inline_has_more_lines: |
- lines_for_one_symbol += [(line1, line2)] |
- if inline_has_more_lines: |
- continue |
- queue.put(lines_for_one_symbol) |
- lines_for_one_symbol = [] |
- process_pipe.close() |
- |
- # Every addr2line processes will die at some point, please die silently. |
- except (IOError, OSError): |
- pass |
- |
- @property |
- def first_request_id(self): |
- """Returns the request_id of the oldest pending request in the queue.""" |
- return self._request_queue[0][2] if self._request_queue else 0 |
- |
- |
-class ELFSymbolInfo(object): |
- """The result of the symbolization passed as first arg. of each callback.""" |
- |
- def __init__(self, name, source_path, source_line, was_ambiguous=False, |
- disambiguated=False, prefix_to_remove=None): |
- """All the fields here can be None (if addr2line replies with '??').""" |
- self.name = name |
- if source_path and source_path.startswith(prefix_to_remove): |
- source_path = source_path[len(prefix_to_remove) : ] |
- self.source_path = source_path |
- self.source_line = source_line |
- # In the case of |inlines|=True, the |inlined_by| points to the outer |
- # function inlining the current one (and so on, to form a chain). |
- self.inlined_by = None |
- self.disambiguated = disambiguated |
- self.was_ambiguous = was_ambiguous |
- |
- def __str__(self): |
- return '%s [%s:%d]' % ( |
- self.name or '??', self.source_path or '??', self.source_line or 0) |