Chromium Code Reviews| Index: go/src/infra/tools/device_watchdog/main.go |
| diff --git a/go/src/infra/tools/device_watchdog/main.go b/go/src/infra/tools/device_watchdog/main.go |
| index b76425ba08aeba2a95d8b1af1d74efe470bb7270..b51f6ef2f6ae541b552b08887920e06f9adaa05e 100644 |
| --- a/go/src/infra/tools/device_watchdog/main.go |
| +++ b/go/src/infra/tools/device_watchdog/main.go |
| @@ -1,12 +1,136 @@ |
| -// +build !windows |
| +// Copyright 2016 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +// +build android |
| + |
| +/* |
| +Watchdog daemon for android devices. It will attempt to reboot the device |
| +if its uptime exceeds a specified maximum. |
| +*/ |
| package main |
| +// Needed for logcat integration. |
| +/* |
| +#cgo LDFLAGS: -landroid -llog |
| + |
| +#include <android/log.h> |
| +#include <string.h> |
| +*/ |
| +import "C" |
| + |
| import ( |
| - "C" |
| + "flag" |
| "fmt" |
| + "io/ioutil" |
| + "os" |
| + "strconv" |
| + "strings" |
| + "syscall" |
| + "time" |
| +) |
| + |
| +var ( |
| + logHeader = C.CString("CIT_DeviceWatchdog") |
| ) |
| +func logcatInfo(msg string) { |
|
dnj
2016/08/15 19:43:29
Consider making this accept a format string:
func
bpastene
2016/08/15 22:42:02
Done.
|
| + C.__android_log_write(C.ANDROID_LOG_INFO, logHeader, C.CString(msg)) |
|
dnj
2016/08/15 19:43:30
You need to free the string that you create:
cmsg
bpastene
2016/08/15 22:42:02
Done.
|
| +} |
| + |
| +func logcatError(msg string) { |
| + C.__android_log_write(C.ANDROID_LOG_ERROR, logHeader, C.CString(msg)) |
| +} |
| + |
| +// Spawn a child process via fork, create new process group, chdir and |
| +// redirect std in and out to /dev/null. |
| +func daemonize() (int, error) { |
|
dnj
2016/08/15 19:43:29
Consider using a formal package for daemonizing:
h
bpastene
2016/08/15 22:42:02
Yeah, already tried that exact package. I was seei
|
| + ret, _, errno := syscall.Syscall(syscall.SYS_FORK, 0, 0, 0) |
| + pid := int(ret) |
| + if errno != 0 { |
| + return -1, errno |
|
dnj
2016/08/15 19:43:29
nit: when returning an error code, you can assume
bpastene
2016/08/15 22:42:02
Done.
|
| + } |
| + if pid > 0 { |
| + return pid, nil |
| + } |
| + |
| + _, err := syscall.Setsid() |
| + if err != nil { |
| + return -1, err |
| + } |
| + os.Chdir("/") |
|
dnj
2016/08/15 19:43:29
(Note this is normally not part of daemonizing, ma
bpastene
2016/08/15 22:42:02
Done.
|
| + |
| + f, err := os.Open("/dev/null") |
| + if err != nil { |
| + return -1, err |
| + } |
| + fd := f.Fd() |
| + syscall.Dup2(int(fd), int(os.Stdin.Fd())) |
|
dnj
2016/08/15 19:43:30
Because "os.Stdin" can change, I would just hardco
bpastene
2016/08/15 22:42:02
Done.
|
| + syscall.Dup2(int(fd), int(os.Stdout.Fd())) |
| + syscall.Dup2(int(fd), int(os.Stderr.Fd())) |
| + |
| + return pid, nil |
| +} |
| + |
| +// Read from /proc/uptime. Expected format: |
| +// "uptime_in_seconds cpu_idle_time_in_seconds" |
| +func getDeviceUptime() (float64, error) { |
| + bytes, err := ioutil.ReadFile("/proc/uptime") |
| + if err != nil { |
| + return -1, err |
| + } |
| + // Split on the space to get uptime and drop cpu idle time. |
| + uptimeStr := strings.Fields(string(bytes))[0] |
|
dnj
2016/08/15 19:43:29
I would assert that the field size is what you exp
bpastene
2016/08/15 22:42:02
Done.
|
| + uptime, err := strconv.ParseFloat(uptimeStr, 64) |
| + if err != nil { |
| + return -1, err |
|
dnj
2016/08/15 19:43:29
nit: surround with context:
return 0, fmt.Errorf(
bpastene
2016/08/15 22:42:02
Done.
|
| + } |
| + return uptime, nil |
|
dnj
2016/08/15 19:43:30
nit: You might want to make this a little more go
bpastene
2016/08/15 22:42:02
Done.
|
| +} |
| + |
| +// Reboot device by writing to sysrq-trigger. See: |
| +// https://www.kernel.org/doc/Documentation/sysrq.txt |
| +func rebootDevice() { |
| + err := ioutil.WriteFile("/proc/sysrq-trigger", []byte("b"), 0644) |
|
dnj
2016/08/15 19:43:29
I think rather than WriteFile (which is generally
bpastene
2016/08/15 22:42:02
Done.
|
| + if err != nil { |
| + logcatError(err.Error()) |
| + os.Exit(1) |
| + } |
| + logcatError("I just rebooted. How am I still alive?!?\n") |
|
dnj
2016/08/15 19:43:30
Is the reboot that immediate? Or could this string
bpastene
2016/08/15 22:42:02
For all local testing, it's been instantaneous.
|
| + os.Exit(1) |
| +} |
| + |
| func main() { |
| - fmt.Println("Is this thing working?") |
| + |
| + maxUptimeFlag := flag.Int("max-uptime", 120, "Maximum uptime in minutes before a reboot is triggered.") |
| + flag.Parse() |
| + |
| + pid, err := daemonize() |
| + if err != nil { |
| + logcatError(err.Error()) |
| + os.Exit(1) |
| + } |
| + if pid > 0 { |
| + logcatInfo(fmt.Sprintf("Child spawned with pid %d, exiting parent\n", pid)) |
| + os.Exit(0) |
| + } |
| + |
| + maxUptime := float64(*maxUptimeFlag) |
| + for { |
| + uptime, err := getDeviceUptime() |
| + if err != nil { |
| + logcatError(err.Error()) |
| + os.Exit(1) |
| + } |
| + uptime = uptime / 60 |
| + |
| + if uptime > maxUptime { |
| + logcatInfo(fmt.Sprintf("Max uptime exceeded: (%.2f > %.0f)\n", uptime, maxUptime)) |
| + rebootDevice() |
| + } else { |
| + logcatInfo(fmt.Sprintf("No need to reboot, uptime < max_uptime: (%.2f < %.2f)\n", uptime, maxUptime)) |
| + } |
| + time.Sleep(60 * time.Second) |
|
dnj
2016/08/15 19:43:30
Any reason not to just sleep the difference? e.g.,
bpastene
2016/08/15 22:42:02
No reason currently; changed it to sleep the diffe
|
| + } |
| } |