Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(984)

Side by Side Diff: go/src/infra/tools/device_watchdog/main.go

Issue 2241963002: Implement device watchdog. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: commentsss Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // +build !windows 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
2 4
5 // +build android
6
7 // Watchdog daemon for android devices. It will attempt to reboot the device
8 // if its uptime exceeds a specified maximum.
3 package main 9 package main
4 10
11 /*
12 #cgo LDFLAGS: -landroid -llog
13
14 #include <android/log.h>
15 #include <string.h>
16 */
17 import "C"
18
5 import ( 19 import (
6 » "C" 20 » "flag"
7 "fmt" 21 "fmt"
22 "io/ioutil"
23 "os"
24 "strconv"
25 "strings"
26 "syscall"
27 "time"
28 "unsafe"
29
30 "github.com/luci/luci-go/common/runtime/paniccatcher"
8 ) 31 )
9 32
33 var (
34 logHeader = C.CString("CIT_DeviceWatchdog")
35 )
36
37 type logLevel int
38
39 const (
40 logInfo = iota
41 logWarning
42 logError
43 )
44
45 const (
46 stdInFd = 0
47 stdOutFd = 1
48 stdErrFd = 2
49 )
50
51 func (l logLevel) getLogLevel() C.int {
52 switch l {
53 case logInfo:
54 return C.ANDROID_LOG_INFO
55 case logWarning:
56 return C.ANDROID_LOG_WARN
57 case logError:
58 return C.ANDROID_LOG_ERROR
59 default:
60 panic("Unknown log level.")
61 }
62 }
63
64 func logcatLog(level logLevel, format string, args ...interface{}) {
65 cmsg := C.CString(fmt.Sprintf(format, args...))
66 defer C.free(unsafe.Pointer(cmsg))
67 C.__android_log_write(level.getLogLevel(), logHeader, cmsg)
68 }
69
70 // Spawn a child process via fork, create new process group, chdir and
71 // redirect std in and out to /dev/null.
72 func daemonize() (int, error) {
73 ret, _, errno := syscall.Syscall(syscall.SYS_FORK, 0, 0, 0)
74 pid := int(ret)
75 if errno != 0 {
76 return 0, errno
77 }
78 if pid > 0 {
79 return pid, nil
80 }
81
82 _, err := syscall.Setsid()
83 if err != nil {
84 return 0, err
85 }
86
87 f, err := os.Open("/dev/null")
88 if err != nil {
89 return 0, err
90 }
91 fd := f.Fd()
92 syscall.Dup2(int(fd), stdInFd)
93 syscall.Dup2(int(fd), stdOutFd)
94 syscall.Dup2(int(fd), stdErrFd)
95
96 return pid, nil
97 }
98
99 // Read from /proc/uptime. Expected format:
100 // "uptime_in_seconds cpu_idle_time_in_seconds"
101 func getDeviceUptime() (time.Duration, error) {
102 bytes, err := ioutil.ReadFile("/proc/uptime")
103 if err != nil {
104 return 0, fmt.Errorf("unable to open /proc/uptime: %s", err.Erro r())
105 }
106 // Split on the space to get uptime and drop cpu idle time.
107 uptimeFields := strings.Fields(string(bytes))
108 if len(uptimeFields) == 0 {
109 return 0, fmt.Errorf("unable to parse /proc/uptime")
110 }
111 uptime, err := strconv.ParseFloat(uptimeFields[0], 64)
112 if err != nil {
113 return 0, fmt.Errorf("unable to parse uptime: %s", err.Error())
114 }
115 return time.Duration(uptime * float64(time.Second)), nil
116 }
117
118 // Reboot device by writing to sysrq-trigger. See:
119 // https://www.kernel.org/doc/Documentation/sysrq.txt
120 func rebootDevice() error {
121 fd, err := os.OpenFile("/proc/sysrq-trigger", os.O_WRONLY, 0)
122 if err != nil {
123 return fmt.Errorf("Can't open /proc/sysrq-trigger: %s", err.Erro r())
124 }
125 defer fd.Close()
126 _, err = fd.Write([]byte("b"))
127 if err != nil {
128 return fmt.Errorf("Can't reboot: %s", err.Error())
129 }
130 return fmt.Errorf("I just rebooted. How am I still alive?!?\n")
131 }
132
133 func realMain() int {
134 maxUptimeFlag := flag.Int("max-uptime", 120, "Maximum uptime in minutes before a reboot is triggered.")
135 flag.Parse()
136
137 os.Chdir("/")
138 pid, err := daemonize()
139 if err != nil {
140 logcatLog(logError, "Failed to daemonize: %s", err.Error())
141 return 1
142 }
143 if pid > 0 {
144 logcatLog(logInfo, "Child spawned with pid %d, exiting parent\n" , pid)
145 return 0
146 }
147
148 maxUptime := time.Duration(*maxUptimeFlag) * time.Minute
149 for {
150 uptime, err := getDeviceUptime()
151 if err != nil {
152 logcatLog(logError, "Failed to get uptime: %s", err.Erro r())
153 return 1
154 }
155
156 if uptime > maxUptime {
157 logcatLog(logInfo, "Max uptime exceeded: (%.2f > %.2f)\n ", float64(uptime)/float64(time.Minute), float64(maxUptime)/float64(time.Minute) )
dnj 2016/08/17 00:42:36 nit: render time as "%s" and pass the duration dir
bpastene 2016/08/17 02:30:33 Ahhhh, that's so much better. Done
158 break
159 } else {
dnj 2016/08/17 00:42:36 nit: You don't need this "else" clause, since you
bpastene 2016/08/17 02:30:33 Done.
160 logcatLog(logInfo, "No need to reboot, uptime < max_upti me: (%.2f < %.2f)\n", float64(uptime)/float64(time.Minute), float64(maxUptime)/f loat64(time.Minute))
dnj 2016/08/17 00:42:36 (same here)
bpastene 2016/08/17 02:30:33 Done.
161 }
162 time.Sleep(maxUptime - uptime + time.Second)
163 }
164 if err = rebootDevice(); err != nil {
165 logcatLog(logError, "Failed to reboot device: %s", err.Error())
166 return 1
167 }
168 return 0
169 }
170
10 func main() { 171 func main() {
11 » fmt.Println("Is this thing working?") 172 » paniccatcher.Do(func() {
173 » » os.Exit(realMain())
174 » }, func(p *paniccatcher.Panic) {
175 » » logcatLog(logError, "Panic: %s\n%s", p.Reason, p.Stack)
176 » » os.Exit(1)
177 » })
12 } 178 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698