Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "chrome/browser/memory/memory_kills_monitor.h" | |
| 6 | |
| 7 #include <errno.h> | |
| 8 #include <fcntl.h> | |
| 9 #include <inttypes.h> | |
| 10 #include <stdio.h> | |
| 11 | |
| 12 #include <vector> | |
| 13 | |
| 14 #include "base/bind.h" | |
| 15 #include "base/command_line.h" | |
| 16 #include "base/debug/leak_annotations.h" | |
| 17 #include "base/files/file_util.h" | |
| 18 #include "base/files/scoped_file.h" | |
| 19 #include "base/lazy_instance.h" | |
| 20 #include "base/location.h" | |
| 21 #include "base/logging.h" | |
| 22 #include "base/memory/ptr_util.h" | |
| 23 #include "base/metrics/histogram_macros.h" | |
| 24 #include "base/posix/safe_strerror.h" | |
| 25 #include "base/sequenced_task_runner.h" | |
| 26 #include "base/strings/string_number_conversions.h" | |
| 27 #include "base/strings/string_piece.h" | |
| 28 #include "base/strings/string_split.h" | |
| 29 #include "base/synchronization/atomic_flag.h" | |
| 30 #include "base/time/time.h" | |
| 31 #include "chrome/browser/memory/memory_kills_histogram.h" | |
| 32 #include "third_party/re2/src/re2/re2.h" | |
| 33 #include "third_party/re2/src/re2/stringpiece.h" | |
| 34 | |
| 35 #if defined(OS_CHROMEOS) | |
| 36 #include "chromeos/chromeos_switches.h" | |
| 37 #endif | |
| 38 | |
| 39 namespace memory { | |
| 40 | |
| 41 using base::StringPiece; | |
| 42 | |
| 43 using base::SequencedWorkerPool; | |
| 44 using base::TimeDelta; | |
| 45 | |
| 46 namespace { | |
| 47 | |
| 48 base::LazyInstance<base::AtomicFlag>::Leaky g_instance_initialized = | |
| 49 LAZY_INSTANCE_INITIALIZER; | |
| 50 | |
| 51 MemoryKillsMonitor* g_instance = nullptr; | |
| 52 | |
| 53 int64_t GetTimestamp(const StringPiece& line) { | |
| 54 std::vector<StringPiece> fields = base::SplitStringPiece( | |
| 55 line, ",", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL); | |
| 56 | |
| 57 int64_t timestamp = -1; | |
| 58 // Timestamp is the third field in a line of /dev/kmsg. | |
| 59 if (fields.size() < 3 || !base::StringToInt64(fields[2], ×tamp)) | |
| 60 return -1; | |
| 61 return timestamp; | |
| 62 } | |
| 63 | |
| 64 void LogToFile(base::Time time_stamp, const char* event) { | |
| 65 if (!g_instance_initialized.Get().IsSet()) | |
| 66 return; | |
| 67 g_instance->WriteToFile(time_stamp, event); | |
| 68 } | |
| 69 | |
| 70 void LogLowMemoryKillInternal(int estimated_freed_kb) { | |
| 71 static base::Time last_kill_time; | |
| 72 static int low_memory_kills = 0; | |
| 73 | |
| 74 base::Time now = base::Time::Now(); | |
| 75 LogToFile(now, "LOW_MEMORY_KILL"); | |
| 76 | |
| 77 const TimeDelta time_delta = | |
| 78 last_kill_time.is_null() ? | |
| 79 TimeDelta::FromSeconds(kMaxMemoryKillTimeDeltaSecs) : | |
| 80 (now - last_kill_time); | |
| 81 UMA_HISTOGRAM_OOM_KILL_TIME_INTERVAL( | |
| 82 "Arc.LowMemoryKiller.TimeDelta", time_delta); | |
| 83 last_kill_time = now; | |
| 84 | |
| 85 ++low_memory_kills; | |
| 86 UMA_HISTOGRAM_CUSTOM_COUNTS( | |
| 87 "Arc.LowMemoryKiller.Count", low_memory_kills, 1, 1000, 1001); | |
| 88 | |
| 89 UMA_HISTOGRAM_MEMORY_KB("Arc.LowMemoryKiller.FreedSize", | |
| 90 estimated_freed_kb); | |
| 91 } | |
| 92 | |
| 93 void LogOOMKill(int64_t time_stamp, int oom_badness) { | |
| 94 static int64_t last_kill_time = -1; | |
| 95 static int oom_kills = 0; | |
| 96 | |
| 97 // Ideally the timestamp should be parsed from /dev/kmsg, but the timestamp | |
| 98 // there is the elapsed time since system boot. So the timestamp |now| used | |
| 99 // here is a bit delayed. | |
| 100 base::Time now = base::Time::Now(); | |
| 101 LogToFile(now, "OOM_KILL"); | |
| 102 | |
| 103 ++oom_kills; | |
| 104 // Report the cumulative count of killed process in one login session. | |
| 105 // For example if there are 3 processes killed, it would report 1 for the | |
| 106 // first kill, 2 for the second kill, then 3 for the final kill. | |
| 107 // It doesn't report a final count at the end of a user session because | |
| 108 // the code runs in a dedicated thread and never ends until browser shutdown | |
| 109 // (or logout on Chrome OS). And on browser shutdown the thread may be | |
| 110 // terminated brutally so there's no chance to execute a "final" block. | |
| 111 // More specifically, code outside the main loop of MemoryKillsMonitor::Run() | |
| 112 // are not guaranteed to be executed. | |
| 113 UMA_HISTOGRAM_CUSTOM_COUNTS("Arc.OOMKills.Count", oom_kills, 1, 1000, 1001); | |
| 114 | |
| 115 // In practice most process has oom_badness < 1000, but | |
| 116 // strictly speaking the number could be [1, 2000]. What it really | |
| 117 // means is the baseline, proportion of memory used (normalized to | |
| 118 // [0, 1000]), plus an adjustment score oom_score_adj [-1000, 1000], | |
| 119 // truncated to 1 if negative (0 means never kill). | |
| 120 // Ref: https://lwn.net/Articles/396552/ | |
| 121 UMA_HISTOGRAM_CUSTOM_COUNTS("Arc.OOMKills.Score", oom_badness, 1, 2000, 2001); | |
| 122 | |
| 123 if (time_stamp > 0) { | |
| 124 TimeDelta time_delta; | |
| 125 // Sets to |kMaxMemoryKillTimeSecs| for the first kill event. | |
| 126 if (last_kill_time < 0) | |
| 127 time_delta = TimeDelta::FromSeconds(kMaxMemoryKillTimeDeltaSecs); | |
| 128 else | |
| 129 time_delta = TimeDelta::FromMicroseconds(time_stamp - last_kill_time); | |
| 130 | |
| 131 last_kill_time = time_stamp; | |
| 132 | |
| 133 UMA_HISTOGRAM_OOM_KILL_TIME_INTERVAL("Arc.OOMKills.TimeDelta", time_delta); | |
| 134 } | |
| 135 } | |
| 136 | |
| 137 void ParseDmesg(const StringPiece& line) { | |
|
hidehiko
2016/11/25 10:13:21
This function seems doing more than parsing. (Actu
cylee1
2016/11/29 20:28:41
Done.
| |
| 138 // Sample OOM log line: | |
| 139 // 3,1362,97646497541,-;Out of memory: Kill process 29582 (android.vending) | |
| 140 // score 961 or sacrifice child. | |
| 141 int oom_badness; | |
| 142 TimeDelta time_delta; | |
| 143 if (RE2::PartialMatch(re2::StringPiece(line.data(), line.size()), | |
| 144 "Out of memory: Kill process .* score (\\d+)", | |
| 145 &oom_badness)) { | |
| 146 int64_t time_stamp = GetTimestamp(line); | |
| 147 LogOOMKill(time_stamp, oom_badness); | |
| 148 } | |
| 149 } | |
| 150 | |
| 151 } // namespace | |
| 152 | |
| 153 MemoryKillsMonitor::Handle::Handle(MemoryKillsMonitor* outer) : outer_(outer) { | |
| 154 DCHECK(outer_); | |
| 155 } | |
| 156 | |
| 157 MemoryKillsMonitor::Handle::~Handle() { | |
| 158 outer_->is_shutting_down_.Set(); | |
| 159 } | |
| 160 | |
| 161 MemoryKillsMonitor::MemoryKillsMonitor() { | |
| 162 base::SimpleThread::Options non_joinable_options; | |
| 163 non_joinable_options.joinable = false; | |
| 164 non_joinable_worker_thread_ = base::MakeUnique<base::DelegateSimpleThread>( | |
| 165 this, "oom_kills_monitor", non_joinable_options); | |
| 166 non_joinable_worker_thread_->Start(); | |
| 167 | |
| 168 #if defined(OS_CHROMEOS) | |
| 169 // Enable file logging on ChromeOS only for now. | |
| 170 base::FilePath log_file_name = base::CommandLine::ForCurrentProcess()-> | |
| 171 GetSwitchValuePath(chromeos::switches::kMemoryKillsLog); | |
| 172 if (!log_file_name.empty()) | |
| 173 log_file_ = base::ScopedFILE(base::OpenFile(log_file_name, "a")); | |
|
hidehiko
2016/11/25 10:13:21
Is this thread allowed to access files?
cylee1
2016/11/29 20:28:41
You're right. I decide to log to standard log file
| |
| 174 #endif | |
| 175 } | |
| 176 | |
| 177 MemoryKillsMonitor::~MemoryKillsMonitor() { | |
| 178 // The instance has to be leaked on shutdown as it is referred to by a | |
| 179 // non-joinable thread but ~MemoryKillsMonitor() can't be explicitly deleted | |
| 180 // as it overrides ~SimpleThread(), it should nevertheless never be invoked. | |
| 181 NOTREACHED(); | |
| 182 } | |
| 183 | |
| 184 // static | |
| 185 MemoryKillsMonitor::Handle MemoryKillsMonitor::StartMonitoring() { | |
| 186 #if DCHECK_IS_ON() | |
| 187 static volatile bool monitoring_active = false; | |
| 188 DCHECK(!monitoring_active); | |
| 189 monitoring_active = true; | |
| 190 #endif | |
| 191 | |
| 192 // Instantiate the MemoryKillsMonitor and its underlying thread. The | |
| 193 // MemoryKillsMonitor itself has to be leaked on shutdown per having a | |
| 194 // non-joinable thread associated to its state. The MemoryKillsMonitor::Handle | |
| 195 // will notify the MemoryKillsMonitor when it is destroyed so that the | |
| 196 // underlying thread can at a minimum not do extra work during shutdown. | |
| 197 g_instance = new MemoryKillsMonitor; | |
|
hidehiko
2016/11/25 10:13:21
Could you add trailing ()?
cf) 2) of https://www.c
cylee1
2016/11/29 20:28:41
Done.
| |
| 198 ANNOTATE_LEAKING_OBJECT_PTR(g_instance); | |
| 199 | |
| 200 g_instance_initialized.Get().Set(); | |
| 201 | |
| 202 return Handle(g_instance); | |
| 203 } | |
| 204 | |
| 205 // static | |
| 206 void MemoryKillsMonitor::LogLowMemoryKill(int estimated_freed_kb) { | |
| 207 LogLowMemoryKillInternal(estimated_freed_kb); | |
| 208 } | |
| 209 | |
| 210 void MemoryKillsMonitor::Run() { | |
| 211 base::ScopedFILE kmsg_handle( | |
| 212 base::OpenFile(base::FilePath("/dev/kmsg"), "r")); | |
| 213 if (!kmsg_handle) { | |
| 214 LOG(WARNING) << "Open /dev/kmsg failed: " << base::safe_strerror(errno); | |
| 215 return; | |
| 216 } | |
| 217 // Skip kernel messages prior to the instantiation of this object to avoid | |
| 218 // double reporting. | |
| 219 fseek(kmsg_handle.get(), 0, SEEK_END); | |
| 220 | |
| 221 static const int kMaxBufSize = 512; | |
| 222 char buf[kMaxBufSize]; | |
| 223 | |
| 224 while (fgets(buf, kMaxBufSize, kmsg_handle.get())) { | |
| 225 if (is_shutting_down_.IsSet()) { | |
| 226 DVLOG(1) << "Chrome is shutting down, exit now."; | |
| 227 break; | |
| 228 } | |
| 229 const StringPiece buf_string(buf); | |
| 230 ParseDmesg(buf_string); | |
| 231 } | |
| 232 } | |
| 233 | |
| 234 void MemoryKillsMonitor::WriteToFile(base::Time time_stamp, const char* event) { | |
| 235 if (!log_file_.get()) { | |
| 236 return; | |
| 237 } | |
| 238 base::AutoLock lock(log_file_lock_); | |
|
hidehiko
2016/11/25 10:13:21
If this is the guard for log_file_, then L235 need
cylee1
2016/11/29 20:28:40
The file lock is removed.
| |
| 239 fprintf(log_file_.get(), "%" PRId64 ", %s\n", time_stamp.ToJavaTime(), event); | |
| 240 fflush(log_file_.get()); | |
| 241 } | |
| 242 | |
| 243 } // namespace memory | |
| OLD | NEW |