| OLD | NEW |
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # Copyright (c) 2014 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2014 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 """Send system monitoring data to the timeseries monitoring API.""" | 6 """Send system monitoring data to the timeseries monitoring API.""" |
| 7 | 7 |
| 8 import random | 8 import random |
| 9 import time | 9 import time |
| 10 | 10 |
| 11 import psutil | 11 import psutil |
| 12 | 12 |
| 13 from infra.libs.service_utils import outer_loop | 13 from infra.libs.service_utils import outer_loop |
| 14 from infra.services.sysmon import android_device_metrics | 14 from infra.services.sysmon import android_device_metrics |
| 15 from infra.services.sysmon import cipd_metrics | 15 from infra.services.sysmon import cipd_metrics |
| 16 from infra.services.sysmon import puppet_metrics | 16 from infra.services.sysmon import puppet_metrics |
| 17 from infra.services.sysmon import root_setup | 17 from infra.services.sysmon import root_setup |
| 18 from infra.services.sysmon import system_metrics | 18 from infra.services.sysmon import system_metrics |
| 19 from infra_libs import ts_mon | 19 from infra_libs import ts_mon |
| 20 | 20 |
| 21 | 21 |
| 22 class SysMon(outer_loop.Application): | 22 class SysMon(outer_loop.Application): |
| 23 def __init__(self): |
| 24 # make sure we call our super's init |
| 25 super(SysMon, self).__init__() |
| 26 |
| 27 # SysMon.task is called every minute we want to collect some metrics |
| 28 # (e.g. os_info) only once per hour, so here we count the minutes within |
| 29 # the hour |
| 30 # |
| 31 # NB: the guarantee for each call being a minute comes from |
| 32 # chrome_infra/manifests/sysmon.pp in the puppet repo |
| 33 self._minute_count = 0 |
| 34 |
| 35 def count_minute(self): |
| 36 """ should be called at the end of each call to self.task """ |
| 37 # mark that we were called |
| 38 self._minute_count += 1 |
| 39 |
| 40 # roll over each day-ish, 60 minutes * 24 hours |
| 41 self._minute_count %= 60 * 24 |
| 42 |
| 43 def is_hour(self): |
| 44 """ check if this call is on the hour """ |
| 45 return self._minute_count % 60 == 0 |
| 46 |
| 23 def add_argparse_options(self, parser): | 47 def add_argparse_options(self, parser): |
| 24 super(SysMon, self).add_argparse_options(parser) | 48 super(SysMon, self).add_argparse_options(parser) |
| 25 | 49 |
| 26 parser.add_argument( | 50 parser.add_argument( |
| 27 '--interval', | 51 '--interval', |
| 28 default=10, type=int, | 52 default=10, type=int, |
| 29 help='time (in seconds) between sampling system metrics') | 53 help='time (in seconds) between sampling system metrics') |
| 30 parser.add_argument( | 54 parser.add_argument( |
| 31 '--root-setup', | 55 '--root-setup', |
| 32 action='store_true', | 56 action='store_true', |
| 33 help='if this is set sysmon will run once to initialise configs in ' | 57 help='if this is set sysmon will run once to initialise configs in ' |
| 34 '/etc and then exit immediately. Used on GCE bots to bootstrap ' | 58 '/etc and then exit immediately. Used on GCE bots to bootstrap ' |
| 35 'sysmon') | 59 'sysmon') |
| 36 | 60 |
| 37 parser.set_defaults( | 61 parser.set_defaults( |
| 38 ts_mon_flush='manual', | 62 ts_mon_flush='manual', |
| 39 ) | 63 ) |
| 40 | 64 |
| 41 def task(self): | 65 def task(self): |
| 42 try: | 66 try: |
| 43 system_metrics.get_uptime() | 67 system_metrics.get_uptime() |
| 44 system_metrics.get_cpu_info() | 68 system_metrics.get_cpu_info() |
| 45 system_metrics.get_disk_info() | 69 system_metrics.get_disk_info() |
| 46 system_metrics.get_mem_info() | 70 system_metrics.get_mem_info() |
| 47 system_metrics.get_net_info() | 71 system_metrics.get_net_info() |
| 48 system_metrics.get_proc_info() | 72 system_metrics.get_proc_info() |
| 73 if self.is_hour(): |
| 74 # collect once per hour |
| 75 system_metrics.get_os_info() |
| 76 else: |
| 77 # clear on all other minutes |
| 78 system_metrics.clear_os_info() |
| 49 puppet_metrics.get_puppet_summary() | 79 puppet_metrics.get_puppet_summary() |
| 50 cipd_metrics.get_cipd_summary() | 80 cipd_metrics.get_cipd_summary() |
| 51 android_device_metrics.get_device_statuses() | 81 android_device_metrics.get_device_statuses() |
| 52 system_metrics.get_unix_time() # must be the last in the list | 82 system_metrics.get_unix_time() # must be the last in the list |
| 83 |
| 53 finally: | 84 finally: |
| 54 ts_mon.flush() | 85 ts_mon.flush() |
| 86 self.count_minute() |
| 55 return True | 87 return True |
| 56 | 88 |
| 57 def sleep_timeout(self): | 89 def sleep_timeout(self): |
| 58 return self.opts.interval | 90 return self.opts.interval |
| 59 | 91 |
| 60 def main(self, opts): | 92 def main(self, opts): |
| 61 if opts.root_setup: | 93 if opts.root_setup: |
| 62 return root_setup.root_setup() | 94 return root_setup.root_setup() |
| 63 | 95 |
| 64 # This returns a 0 value the first time it's called. Call it now and | 96 # This returns a 0 value the first time it's called. Call it now and |
| 65 # discard the return value. | 97 # discard the return value. |
| 66 psutil.cpu_times_percent() | 98 psutil.cpu_times_percent() |
| 67 | 99 |
| 68 # Wait a random amount of time before starting the loop in case sysmon is | 100 # Wait a random amount of time before starting the loop in case sysmon is |
| 69 # started at exactly the same time on all machines. | 101 # started at exactly the same time on all machines. |
| 70 time.sleep(random.uniform(0, opts.interval)) | 102 time.sleep(random.uniform(0, opts.interval)) |
| 71 | 103 |
| 72 return super(SysMon, self).main(opts) | 104 return super(SysMon, self).main(opts) |
| 73 | 105 |
| 74 | 106 |
| 75 if __name__ == '__main__': | 107 if __name__ == '__main__': |
| 76 SysMon().run() | 108 SysMon().run() |
| OLD | NEW |