Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2766)

Unified Diff: chrome/browser/memory/memory_kills_monitor.cc

Issue 2527973003: Consolidate code monitoring low memory kills and OOM kills to MemoryKillsMonitor on ChromeOS. (Closed)
Patch Set: restrict it to ChromeoS Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/browser/memory/memory_kills_monitor.cc
diff --git a/chrome/browser/memory/memory_kills_monitor.cc b/chrome/browser/memory/memory_kills_monitor.cc
new file mode 100644
index 0000000000000000000000000000000000000000..c9c1cc9e070cd219e79e6c867a6c2e067f3686ed
--- /dev/null
+++ b/chrome/browser/memory/memory_kills_monitor.cc
@@ -0,0 +1,243 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/memory/memory_kills_monitor.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <stdio.h>
+
+#include <vector>
+
+#include "base/bind.h"
+#include "base/command_line.h"
+#include "base/debug/leak_annotations.h"
+#include "base/files/file_util.h"
+#include "base/files/scoped_file.h"
+#include "base/lazy_instance.h"
+#include "base/location.h"
+#include "base/logging.h"
+#include "base/memory/ptr_util.h"
+#include "base/metrics/histogram_macros.h"
+#include "base/posix/safe_strerror.h"
+#include "base/sequenced_task_runner.h"
+#include "base/strings/string_number_conversions.h"
+#include "base/strings/string_piece.h"
+#include "base/strings/string_split.h"
+#include "base/synchronization/atomic_flag.h"
+#include "base/time/time.h"
+#include "chrome/browser/memory/memory_kills_histogram.h"
+#include "third_party/re2/src/re2/re2.h"
+#include "third_party/re2/src/re2/stringpiece.h"
+
+#if defined(OS_CHROMEOS)
+#include "chromeos/chromeos_switches.h"
+#endif
+
+namespace memory {
+
+using base::StringPiece;
+
+using base::SequencedWorkerPool;
+using base::TimeDelta;
+
+namespace {
+
+base::LazyInstance<base::AtomicFlag>::Leaky g_instance_initialized =
+ LAZY_INSTANCE_INITIALIZER;
+
+MemoryKillsMonitor* g_instance = nullptr;
+
+int64_t GetTimestamp(const StringPiece& line) {
+ std::vector<StringPiece> fields = base::SplitStringPiece(
+ line, ",", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
+
+ int64_t timestamp = -1;
+ // Timestamp is the third field in a line of /dev/kmsg.
+ if (fields.size() < 3 || !base::StringToInt64(fields[2], &timestamp))
+ return -1;
+ return timestamp;
+}
+
+void LogToFile(base::Time time_stamp, const char* event) {
+ if (!g_instance_initialized.Get().IsSet())
+ return;
+ g_instance->WriteToFile(time_stamp, event);
+}
+
+void LogLowMemoryKillInternal(int estimated_freed_kb) {
+ static base::Time last_kill_time;
+ static int low_memory_kills = 0;
+
+ base::Time now = base::Time::Now();
+ LogToFile(now, "LOW_MEMORY_KILL");
+
+ const TimeDelta time_delta =
+ last_kill_time.is_null() ?
+ TimeDelta::FromSeconds(kMaxMemoryKillTimeDeltaSecs) :
+ (now - last_kill_time);
+ UMA_HISTOGRAM_OOM_KILL_TIME_INTERVAL(
+ "Arc.LowMemoryKiller.TimeDelta", time_delta);
+ last_kill_time = now;
+
+ ++low_memory_kills;
+ UMA_HISTOGRAM_CUSTOM_COUNTS(
+ "Arc.LowMemoryKiller.Count", low_memory_kills, 1, 1000, 1001);
+
+ UMA_HISTOGRAM_MEMORY_KB("Arc.LowMemoryKiller.FreedSize",
+ estimated_freed_kb);
+}
+
+void LogOOMKill(int64_t time_stamp, int oom_badness) {
+ static int64_t last_kill_time = -1;
+ static int oom_kills = 0;
+
+ // Ideally the timestamp should be parsed from /dev/kmsg, but the timestamp
+ // there is the elapsed time since system boot. So the timestamp |now| used
+ // here is a bit delayed.
+ base::Time now = base::Time::Now();
+ LogToFile(now, "OOM_KILL");
+
+ ++oom_kills;
+ // Report the cumulative count of killed process in one login session.
+ // For example if there are 3 processes killed, it would report 1 for the
+ // first kill, 2 for the second kill, then 3 for the final kill.
+ // It doesn't report a final count at the end of a user session because
+ // the code runs in a dedicated thread and never ends until browser shutdown
+ // (or logout on Chrome OS). And on browser shutdown the thread may be
+ // terminated brutally so there's no chance to execute a "final" block.
+ // More specifically, code outside the main loop of MemoryKillsMonitor::Run()
+ // are not guaranteed to be executed.
+ UMA_HISTOGRAM_CUSTOM_COUNTS("Arc.OOMKills.Count", oom_kills, 1, 1000, 1001);
+
+ // In practice most process has oom_badness < 1000, but
+ // strictly speaking the number could be [1, 2000]. What it really
+ // means is the baseline, proportion of memory used (normalized to
+ // [0, 1000]), plus an adjustment score oom_score_adj [-1000, 1000],
+ // truncated to 1 if negative (0 means never kill).
+ // Ref: https://lwn.net/Articles/396552/
+ UMA_HISTOGRAM_CUSTOM_COUNTS("Arc.OOMKills.Score", oom_badness, 1, 2000, 2001);
+
+ if (time_stamp > 0) {
+ TimeDelta time_delta;
+ // Sets to |kMaxMemoryKillTimeSecs| for the first kill event.
+ if (last_kill_time < 0)
+ time_delta = TimeDelta::FromSeconds(kMaxMemoryKillTimeDeltaSecs);
+ else
+ time_delta = TimeDelta::FromMicroseconds(time_stamp - last_kill_time);
+
+ last_kill_time = time_stamp;
+
+ UMA_HISTOGRAM_OOM_KILL_TIME_INTERVAL("Arc.OOMKills.TimeDelta", time_delta);
+ }
+}
+
+void ParseDmesg(const StringPiece& line) {
hidehiko 2016/11/25 10:13:21 This function seems doing more than parsing. (Actu
cylee1 2016/11/29 20:28:41 Done.
+ // Sample OOM log line:
+ // 3,1362,97646497541,-;Out of memory: Kill process 29582 (android.vending)
+ // score 961 or sacrifice child.
+ int oom_badness;
+ TimeDelta time_delta;
+ if (RE2::PartialMatch(re2::StringPiece(line.data(), line.size()),
+ "Out of memory: Kill process .* score (\\d+)",
+ &oom_badness)) {
+ int64_t time_stamp = GetTimestamp(line);
+ LogOOMKill(time_stamp, oom_badness);
+ }
+}
+
+} // namespace
+
+MemoryKillsMonitor::Handle::Handle(MemoryKillsMonitor* outer) : outer_(outer) {
+ DCHECK(outer_);
+}
+
+MemoryKillsMonitor::Handle::~Handle() {
+ outer_->is_shutting_down_.Set();
+}
+
+MemoryKillsMonitor::MemoryKillsMonitor() {
+ base::SimpleThread::Options non_joinable_options;
+ non_joinable_options.joinable = false;
+ non_joinable_worker_thread_ = base::MakeUnique<base::DelegateSimpleThread>(
+ this, "oom_kills_monitor", non_joinable_options);
+ non_joinable_worker_thread_->Start();
+
+#if defined(OS_CHROMEOS)
+ // Enable file logging on ChromeOS only for now.
+ base::FilePath log_file_name = base::CommandLine::ForCurrentProcess()->
+ GetSwitchValuePath(chromeos::switches::kMemoryKillsLog);
+ if (!log_file_name.empty())
+ log_file_ = base::ScopedFILE(base::OpenFile(log_file_name, "a"));
hidehiko 2016/11/25 10:13:21 Is this thread allowed to access files?
cylee1 2016/11/29 20:28:41 You're right. I decide to log to standard log file
+#endif
+}
+
+MemoryKillsMonitor::~MemoryKillsMonitor() {
+ // The instance has to be leaked on shutdown as it is referred to by a
+ // non-joinable thread but ~MemoryKillsMonitor() can't be explicitly deleted
+ // as it overrides ~SimpleThread(), it should nevertheless never be invoked.
+ NOTREACHED();
+}
+
+// static
+MemoryKillsMonitor::Handle MemoryKillsMonitor::StartMonitoring() {
+#if DCHECK_IS_ON()
+ static volatile bool monitoring_active = false;
+ DCHECK(!monitoring_active);
+ monitoring_active = true;
+#endif
+
+ // Instantiate the MemoryKillsMonitor and its underlying thread. The
+ // MemoryKillsMonitor itself has to be leaked on shutdown per having a
+ // non-joinable thread associated to its state. The MemoryKillsMonitor::Handle
+ // will notify the MemoryKillsMonitor when it is destroyed so that the
+ // underlying thread can at a minimum not do extra work during shutdown.
+ g_instance = new MemoryKillsMonitor;
hidehiko 2016/11/25 10:13:21 Could you add trailing ()? cf) 2) of https://www.c
cylee1 2016/11/29 20:28:41 Done.
+ ANNOTATE_LEAKING_OBJECT_PTR(g_instance);
+
+ g_instance_initialized.Get().Set();
+
+ return Handle(g_instance);
+}
+
+// static
+void MemoryKillsMonitor::LogLowMemoryKill(int estimated_freed_kb) {
+ LogLowMemoryKillInternal(estimated_freed_kb);
+}
+
+void MemoryKillsMonitor::Run() {
+ base::ScopedFILE kmsg_handle(
+ base::OpenFile(base::FilePath("/dev/kmsg"), "r"));
+ if (!kmsg_handle) {
+ LOG(WARNING) << "Open /dev/kmsg failed: " << base::safe_strerror(errno);
+ return;
+ }
+ // Skip kernel messages prior to the instantiation of this object to avoid
+ // double reporting.
+ fseek(kmsg_handle.get(), 0, SEEK_END);
+
+ static const int kMaxBufSize = 512;
+ char buf[kMaxBufSize];
+
+ while (fgets(buf, kMaxBufSize, kmsg_handle.get())) {
+ if (is_shutting_down_.IsSet()) {
+ DVLOG(1) << "Chrome is shutting down, exit now.";
+ break;
+ }
+ const StringPiece buf_string(buf);
+ ParseDmesg(buf_string);
+ }
+}
+
+void MemoryKillsMonitor::WriteToFile(base::Time time_stamp, const char* event) {
+ if (!log_file_.get()) {
+ return;
+ }
+ base::AutoLock lock(log_file_lock_);
hidehiko 2016/11/25 10:13:21 If this is the guard for log_file_, then L235 need
cylee1 2016/11/29 20:28:40 The file lock is removed.
+ fprintf(log_file_.get(), "%" PRId64 ", %s\n", time_stamp.ToJavaTime(), event);
+ fflush(log_file_.get());
+}
+
+} // namespace memory

Powered by Google App Engine
This is Rietveld 408576698