Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // +build !windows | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 2 | 4 |
| 5 // +build android | |
| 6 | |
| 7 // Watchdog daemon for android devices. It will attempt to reboot the device | |
| 8 // if its uptime exceeds a specified maximum. | |
| 3 package main | 9 package main |
| 4 | 10 |
| 11 /* | |
| 12 #cgo LDFLAGS: -landroid -llog | |
| 13 | |
| 14 #include <android/log.h> | |
| 15 #include <string.h> | |
| 16 */ | |
| 17 import "C" | |
| 18 | |
| 5 import ( | 19 import ( |
| 6 » "C" | 20 » "flag" |
| 7 "fmt" | 21 "fmt" |
| 22 "io/ioutil" | |
| 23 "math" | |
| 24 "os" | |
| 25 "strconv" | |
| 26 "strings" | |
| 27 "syscall" | |
| 28 "time" | |
| 29 "unsafe" | |
| 30 | |
| 31 "github.com/luci/luci-go/common/runtime/paniccatcher" | |
| 8 ) | 32 ) |
| 9 | 33 |
| 34 var ( | |
| 35 logHeader = C.CString("CIT_DeviceWatchdog") | |
| 36 ) | |
| 37 | |
| 38 type logLevel int | |
| 39 | |
| 40 const ( | |
| 41 logInfo = iota | |
| 42 logWarning | |
| 43 logError | |
| 44 ) | |
| 45 | |
| 46 const ( | |
| 47 stdInFd = 0 | |
| 48 stdOutFd = 1 | |
| 49 stdErrFd = 2 | |
| 50 ) | |
| 51 | |
| 52 func (l logLevel) getLogLevel() C.int { | |
| 53 switch l { | |
| 54 case logInfo: | |
| 55 return C.ANDROID_LOG_INFO | |
| 56 case logWarning: | |
| 57 return C.ANDROID_LOG_WARN | |
| 58 case logError: | |
| 59 return C.ANDROID_LOG_ERROR | |
| 60 default: | |
| 61 panic("Unknown log level.") | |
| 62 } | |
| 63 } | |
| 64 | |
| 65 func logcatLog(level logLevel, format string, args ...interface{}) { | |
| 66 cmsg := C.CString(fmt.Sprintf(format, args...)) | |
| 67 defer C.free(unsafe.Pointer(cmsg)) | |
| 68 C.__android_log_write(level.getLogLevel(), logHeader, cmsg) | |
| 69 } | |
| 70 | |
| 71 // Spawn a child process via fork, create new process group, chdir and | |
| 72 // redirect std in and out to /dev/null. | |
| 73 func daemonize() (int, error) { | |
| 74 os.Chdir("/") | |
|
dnj
2016/08/15 23:02:14
(I meant move this to realMain)
bpastene
2016/08/15 23:39:28
Done.
| |
| 75 | |
| 76 ret, _, errno := syscall.Syscall(syscall.SYS_FORK, 0, 0, 0) | |
| 77 pid := int(ret) | |
| 78 if errno != 0 { | |
| 79 return 0, errno | |
| 80 } | |
| 81 if pid > 0 { | |
| 82 return pid, nil | |
| 83 } | |
| 84 | |
| 85 _, err := syscall.Setsid() | |
| 86 if err != nil { | |
| 87 return 0, err | |
| 88 } | |
| 89 | |
| 90 f, err := os.Open("/dev/null") | |
| 91 if err != nil { | |
| 92 return 0, err | |
| 93 } | |
| 94 fd := f.Fd() | |
| 95 syscall.Dup2(int(fd), stdInFd) | |
| 96 syscall.Dup2(int(fd), stdOutFd) | |
| 97 syscall.Dup2(int(fd), stdErrFd) | |
| 98 | |
| 99 return pid, nil | |
| 100 } | |
| 101 | |
| 102 // Read from /proc/uptime. Expected format: | |
| 103 // "uptime_in_seconds cpu_idle_time_in_seconds" | |
| 104 func getDeviceUptime() (time.Duration, error) { | |
| 105 bytes, err := ioutil.ReadFile("/proc/uptime") | |
| 106 if err != nil { | |
| 107 return 0, fmt.Errorf("unable to open /proc/uptime: %s", err.Err or()) | |
|
dnj
2016/08/15 23:02:14
nit: two spaces
bpastene
2016/08/15 23:39:28
Done.
| |
| 108 } | |
| 109 // Split on the space to get uptime and drop cpu idle time. | |
| 110 uptimeFields := strings.Fields(string(bytes)) | |
| 111 if len(uptimeFields) == 0 { | |
| 112 return 0, fmt.Errorf("unable to parse /proc/uptime") | |
| 113 } | |
| 114 uptime, err := strconv.ParseFloat(uptimeFields[0], 64) | |
| 115 if err != nil { | |
| 116 return 0, fmt.Errorf("unable to parse uptime: %s", err.Error()) | |
| 117 } | |
| 118 return time.Duration(uptime * float64(time.Second)), nil | |
| 119 } | |
| 120 | |
| 121 // Reboot device by writing to sysrq-trigger. See: | |
| 122 // https://www.kernel.org/doc/Documentation/sysrq.txt | |
| 123 func rebootDevice() { | |
| 124 fd, err := os.OpenFile("/proc/sysrq-trigger", os.O_WRONLY, 0644) | |
|
dnj
2016/08/15 23:02:14
nit: get rid of 0644 since you're not creating a f
bpastene
2016/08/15 23:39:28
Needs it: https://golang.org/pkg/os/#OpenFile
dnj
2016/08/15 23:50:03
I was thinking pass 0 to be clear that you're not
bpastene
2016/08/16 19:47:26
Done.
| |
| 125 if err != nil { | |
| 126 logcatLog(logError, "Can't open /proc/sysrq-trigger: %s", err.Er ror()) | |
| 127 os.Exit(1) | |
| 128 } | |
| 129 defer fd.Close() | |
| 130 _, err = fd.Write([]byte("b")) | |
|
dnj
2016/08/15 23:02:14
nit, oneline:
if _, err := fd.Write(...); err != n
bpastene
2016/08/15 23:39:28
That makes it more readable? I'm not sure I agree
dnj
2016/08/15 23:50:02
It's a coding preference we've exhibited in LUCI p
| |
| 131 if err != nil { | |
| 132 logcatLog(logError, "Can't reboot: %s", err.Error()) | |
| 133 os.Exit(1) | |
|
dnj
2016/08/15 23:02:14
WDYT about having this function actually return an
bpastene
2016/08/15 23:39:28
Done.
| |
| 134 } | |
| 135 logcatLog(logError, "I just rebooted. How am I still alive?!?\n") | |
| 136 os.Exit(1) | |
| 137 } | |
| 138 | |
| 139 func realMain() { | |
|
dnj
2016/08/15 23:02:14
Just a thought, but if you made "realMain" return
bpastene
2016/08/15 23:39:28
Good idea; done.
| |
| 140 | |
| 141 maxUptimeFlag := flag.Int("max-uptime", 120, "Maximum uptime in minutes before a reboot is triggered.") | |
|
dnj
2016/08/15 23:02:14
If you wanted to, you could use a clockflag.Durati
bpastene
2016/08/15 23:39:28
I think that would add a lot more time.Duration-ty
| |
| 142 flag.Parse() | |
| 143 | |
| 144 pid, err := daemonize() | |
| 145 if err != nil { | |
| 146 logcatLog(logError, "Failed to daemonize: %s", err.Error()) | |
| 147 os.Exit(1) | |
| 148 } | |
| 149 if pid > 0 { | |
| 150 logcatLog(logInfo, "Child spawned with pid %d, exiting parent\n" , pid) | |
| 151 os.Exit(0) | |
| 152 } | |
| 153 | |
| 154 maxUptime := time.Duration(int64(*maxUptimeFlag) * int64(time.Minute)) | |
|
dnj
2016/08/15 23:02:14
Make this:
maxUptime := time.Duration(*maxUptimeFl
bpastene
2016/08/15 23:39:28
Done.
| |
| 155 for { | |
| 156 uptime, err := getDeviceUptime() | |
| 157 if err != nil { | |
| 158 logcatLog(logError, "Failed to get uptime: %s", err.Erro r()) | |
| 159 os.Exit(1) | |
| 160 } | |
| 161 | |
| 162 if uptime > maxUptime { | |
| 163 logcatLog(logInfo, "Max uptime exceeded: (%.2f > %.2f)\n ", float64(uptime)/float64(time.Minute), float64(maxUptime)/float64(time.Minute) ) | |
| 164 rebootDevice() | |
|
dnj
2016/08/15 23:02:14
Suggestion: break this into two loops:
// Wait un
bpastene
2016/08/15 23:39:28
If that first reboot attempt doesn't work, why wou
dnj
2016/08/15 23:50:02
My thought was that this is a terminal point, so y
bpastene
2016/08/16 19:47:26
With 1), it doesn't silently disappear, its error
dnj
2016/08/16 19:51:47
Okay seems fair. Then I would recommend still brea
bpastene
2016/08/16 22:00:56
Done.
| |
| 165 } else { | |
| 166 logcatLog(logInfo, "No need to reboot, uptime < max_upti me: (%.2f < %.2f)\n", float64(uptime)/float64(time.Minute), float64(maxUptime)/f loat64(time.Minute)) | |
| 167 } | |
| 168 maxSleep := math.Max(float64(maxUptime-uptime), float64(time.Sec ond)) | |
|
dnj
2016/08/15 23:02:14
Confused: you know "maxUptime" is >= "uptime", so
bpastene
2016/08/15 23:39:28
Thanks to obscure floating point precision issues,
dnj
2016/08/15 23:50:03
That's weird. Maybe just have it sleep fro (maxUpt
bpastene
2016/08/16 19:47:26
Ahh, the + 1 second is good idea. I'll do that.
| |
| 169 time.Sleep(time.Duration(maxSleep)) | |
| 170 } | |
| 171 } | |
| 172 | |
| 10 func main() { | 173 func main() { |
| 11 » fmt.Println("Is this thing working?") | 174 » paniccatcher.Do(realMain, func(p *paniccatcher.Panic) { |
| 175 » » logcatLog(logError, "Panic: %s\n%s", p.Reason, p.Stack) | |
| 176 » » os.Exit(1) | |
| 177 » }) | |
| 12 } | 178 } |
| OLD | NEW |