| OLD | NEW |
| (Empty) |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "base/base_switches.h" | |
| 6 #include "base/bind.h" | |
| 7 #include "base/command_line.h" | |
| 8 #include "base/location.h" | |
| 9 #include "base/memory/scoped_vector.h" | |
| 10 #include "base/single_thread_task_runner.h" | |
| 11 #include "base/strings/stringprintf.h" | |
| 12 #include "base/synchronization/condition_variable.h" | |
| 13 #include "base/synchronization/lock.h" | |
| 14 #include "base/synchronization/waitable_event.h" | |
| 15 #include "base/threading/thread.h" | |
| 16 #include "base/time/time.h" | |
| 17 #include "build/build_config.h" | |
| 18 #include "testing/gtest/include/gtest/gtest.h" | |
| 19 #include "testing/perf/perf_test.h" | |
| 20 | |
| 21 #if defined(OS_POSIX) | |
| 22 #include <pthread.h> | |
| 23 #endif | |
| 24 | |
| 25 namespace base { | |
| 26 | |
| 27 namespace { | |
| 28 | |
| 29 const int kNumRuns = 100000; | |
| 30 | |
| 31 // Base class for a threading perf-test. This sets up some threads for the | |
| 32 // test and measures the clock-time in addition to time spent on each thread. | |
| 33 class ThreadPerfTest : public testing::Test { | |
| 34 public: | |
| 35 ThreadPerfTest() | |
| 36 : done_(false, false) { | |
| 37 // Disable the task profiler as it adds significant cost! | |
| 38 CommandLine::Init(0, NULL); | |
| 39 CommandLine::ForCurrentProcess()->AppendSwitchASCII( | |
| 40 switches::kProfilerTiming, | |
| 41 switches::kProfilerTimingDisabledValue); | |
| 42 } | |
| 43 | |
| 44 // To be implemented by each test. Subclass must uses threads_ such that | |
| 45 // their cpu-time can be measured. Test must return from PingPong() _and_ | |
| 46 // call FinishMeasurement from any thread to complete the test. | |
| 47 virtual void Init() {} | |
| 48 virtual void PingPong(int hops) = 0; | |
| 49 virtual void Reset() {} | |
| 50 | |
| 51 void TimeOnThread(base::ThreadTicks* ticks, base::WaitableEvent* done) { | |
| 52 *ticks = base::ThreadTicks::Now(); | |
| 53 done->Signal(); | |
| 54 } | |
| 55 | |
| 56 base::ThreadTicks ThreadNow(base::Thread* thread) { | |
| 57 base::WaitableEvent done(false, false); | |
| 58 base::ThreadTicks ticks; | |
| 59 thread->task_runner()->PostTask( | |
| 60 FROM_HERE, base::Bind(&ThreadPerfTest::TimeOnThread, | |
| 61 base::Unretained(this), &ticks, &done)); | |
| 62 done.Wait(); | |
| 63 return ticks; | |
| 64 } | |
| 65 | |
| 66 void RunPingPongTest(const std::string& name, unsigned num_threads) { | |
| 67 // Create threads and collect starting cpu-time for each thread. | |
| 68 std::vector<base::ThreadTicks> thread_starts; | |
| 69 while (threads_.size() < num_threads) { | |
| 70 threads_.push_back(new base::Thread("PingPonger")); | |
| 71 threads_.back()->Start(); | |
| 72 if (base::ThreadTicks::IsSupported()) | |
| 73 thread_starts.push_back(ThreadNow(threads_.back())); | |
| 74 } | |
| 75 | |
| 76 Init(); | |
| 77 | |
| 78 base::TimeTicks start = base::TimeTicks::Now(); | |
| 79 PingPong(kNumRuns); | |
| 80 done_.Wait(); | |
| 81 base::TimeTicks end = base::TimeTicks::Now(); | |
| 82 | |
| 83 // Gather the cpu-time spent on each thread. This does one extra tasks, | |
| 84 // but that should be in the noise given enough runs. | |
| 85 base::TimeDelta thread_time; | |
| 86 while (threads_.size()) { | |
| 87 if (base::ThreadTicks::IsSupported()) { | |
| 88 thread_time += ThreadNow(threads_.back()) - thread_starts.back(); | |
| 89 thread_starts.pop_back(); | |
| 90 } | |
| 91 threads_.pop_back(); | |
| 92 } | |
| 93 | |
| 94 Reset(); | |
| 95 | |
| 96 double num_runs = static_cast<double>(kNumRuns); | |
| 97 double us_per_task_clock = (end - start).InMicroseconds() / num_runs; | |
| 98 double us_per_task_cpu = thread_time.InMicroseconds() / num_runs; | |
| 99 | |
| 100 // Clock time per task. | |
| 101 perf_test::PrintResult( | |
| 102 "task", "", name + "_time ", us_per_task_clock, "us/hop", true); | |
| 103 | |
| 104 // Total utilization across threads if available (likely higher). | |
| 105 if (base::ThreadTicks::IsSupported()) { | |
| 106 perf_test::PrintResult( | |
| 107 "task", "", name + "_cpu ", us_per_task_cpu, "us/hop", true); | |
| 108 } | |
| 109 } | |
| 110 | |
| 111 protected: | |
| 112 void FinishMeasurement() { done_.Signal(); } | |
| 113 ScopedVector<base::Thread> threads_; | |
| 114 | |
| 115 private: | |
| 116 base::WaitableEvent done_; | |
| 117 }; | |
| 118 | |
| 119 // Class to test task performance by posting empty tasks back and forth. | |
| 120 class TaskPerfTest : public ThreadPerfTest { | |
| 121 base::Thread* NextThread(int count) { | |
| 122 return threads_[count % threads_.size()]; | |
| 123 } | |
| 124 | |
| 125 void PingPong(int hops) override { | |
| 126 if (!hops) { | |
| 127 FinishMeasurement(); | |
| 128 return; | |
| 129 } | |
| 130 NextThread(hops)->task_runner()->PostTask( | |
| 131 FROM_HERE, base::Bind(&ThreadPerfTest::PingPong, base::Unretained(this), | |
| 132 hops - 1)); | |
| 133 } | |
| 134 }; | |
| 135 | |
| 136 // This tries to test the 'best-case' as well as the 'worst-case' task posting | |
| 137 // performance. The best-case keeps one thread alive such that it never yeilds, | |
| 138 // while the worse-case forces a context switch for every task. Four threads are | |
| 139 // used to ensure the threads do yeild (with just two it might be possible for | |
| 140 // both threads to stay awake if they can signal each other fast enough). | |
| 141 TEST_F(TaskPerfTest, TaskPingPong) { | |
| 142 RunPingPongTest("1_Task_Threads", 1); | |
| 143 RunPingPongTest("4_Task_Threads", 4); | |
| 144 } | |
| 145 | |
| 146 | |
| 147 // Same as above, but add observers to test their perf impact. | |
| 148 class MessageLoopObserver : public base::MessageLoop::TaskObserver { | |
| 149 public: | |
| 150 void WillProcessTask(const base::PendingTask& pending_task) override {} | |
| 151 void DidProcessTask(const base::PendingTask& pending_task) override {} | |
| 152 }; | |
| 153 MessageLoopObserver message_loop_observer; | |
| 154 | |
| 155 class TaskObserverPerfTest : public TaskPerfTest { | |
| 156 public: | |
| 157 void Init() override { | |
| 158 TaskPerfTest::Init(); | |
| 159 for (size_t i = 0; i < threads_.size(); i++) { | |
| 160 threads_[i]->message_loop()->AddTaskObserver(&message_loop_observer); | |
| 161 } | |
| 162 } | |
| 163 }; | |
| 164 | |
| 165 TEST_F(TaskObserverPerfTest, TaskPingPong) { | |
| 166 RunPingPongTest("1_Task_Threads_With_Observer", 1); | |
| 167 RunPingPongTest("4_Task_Threads_With_Observer", 4); | |
| 168 } | |
| 169 | |
| 170 // Class to test our WaitableEvent performance by signaling back and fort. | |
| 171 // WaitableEvent is templated so we can also compare with other versions. | |
| 172 template <typename WaitableEventType> | |
| 173 class EventPerfTest : public ThreadPerfTest { | |
| 174 public: | |
| 175 void Init() override { | |
| 176 for (size_t i = 0; i < threads_.size(); i++) | |
| 177 events_.push_back(new WaitableEventType(false, false)); | |
| 178 } | |
| 179 | |
| 180 void Reset() override { events_.clear(); } | |
| 181 | |
| 182 void WaitAndSignalOnThread(size_t event) { | |
| 183 size_t next_event = (event + 1) % events_.size(); | |
| 184 int my_hops = 0; | |
| 185 do { | |
| 186 events_[event]->Wait(); | |
| 187 my_hops = --remaining_hops_; // We own 'hops' between Wait and Signal. | |
| 188 events_[next_event]->Signal(); | |
| 189 } while (my_hops > 0); | |
| 190 // Once we are done, all threads will signal as hops passes zero. | |
| 191 // We only signal completion once, on the thread that reaches zero. | |
| 192 if (!my_hops) | |
| 193 FinishMeasurement(); | |
| 194 } | |
| 195 | |
| 196 void PingPong(int hops) override { | |
| 197 remaining_hops_ = hops; | |
| 198 for (size_t i = 0; i < threads_.size(); i++) { | |
| 199 threads_[i]->task_runner()->PostTask( | |
| 200 FROM_HERE, base::Bind(&EventPerfTest::WaitAndSignalOnThread, | |
| 201 base::Unretained(this), i)); | |
| 202 } | |
| 203 | |
| 204 // Kick off the Signal ping-ponging. | |
| 205 events_.front()->Signal(); | |
| 206 } | |
| 207 | |
| 208 int remaining_hops_; | |
| 209 ScopedVector<WaitableEventType> events_; | |
| 210 }; | |
| 211 | |
| 212 // Similar to the task posting test, this just tests similar functionality | |
| 213 // using WaitableEvents. We only test four threads (worst-case), but we | |
| 214 // might want to craft a way to test the best-case (where the thread doesn't | |
| 215 // end up blocking because the event is already signalled). | |
| 216 typedef EventPerfTest<base::WaitableEvent> WaitableEventPerfTest; | |
| 217 TEST_F(WaitableEventPerfTest, EventPingPong) { | |
| 218 RunPingPongTest("4_WaitableEvent_Threads", 4); | |
| 219 } | |
| 220 | |
| 221 // Build a minimal event using ConditionVariable. | |
| 222 class ConditionVariableEvent { | |
| 223 public: | |
| 224 ConditionVariableEvent(bool manual_reset, bool initially_signaled) | |
| 225 : cond_(&lock_), signaled_(false) { | |
| 226 DCHECK(!manual_reset); | |
| 227 DCHECK(!initially_signaled); | |
| 228 } | |
| 229 | |
| 230 void Signal() { | |
| 231 { | |
| 232 base::AutoLock scoped_lock(lock_); | |
| 233 signaled_ = true; | |
| 234 } | |
| 235 cond_.Signal(); | |
| 236 } | |
| 237 | |
| 238 void Wait() { | |
| 239 base::AutoLock scoped_lock(lock_); | |
| 240 while (!signaled_) | |
| 241 cond_.Wait(); | |
| 242 signaled_ = false; | |
| 243 } | |
| 244 | |
| 245 private: | |
| 246 base::Lock lock_; | |
| 247 base::ConditionVariable cond_; | |
| 248 bool signaled_; | |
| 249 }; | |
| 250 | |
| 251 // This is meant to test the absolute minimal context switching time | |
| 252 // using our own base synchronization code. | |
| 253 typedef EventPerfTest<ConditionVariableEvent> ConditionVariablePerfTest; | |
| 254 TEST_F(ConditionVariablePerfTest, EventPingPong) { | |
| 255 RunPingPongTest("4_ConditionVariable_Threads", 4); | |
| 256 } | |
| 257 #if defined(OS_POSIX) | |
| 258 | |
| 259 // Absolutely 100% minimal posix waitable event. If there is a better/faster | |
| 260 // way to force a context switch, we should use that instead. | |
| 261 class PthreadEvent { | |
| 262 public: | |
| 263 PthreadEvent(bool manual_reset, bool initially_signaled) { | |
| 264 DCHECK(!manual_reset); | |
| 265 DCHECK(!initially_signaled); | |
| 266 pthread_mutex_init(&mutex_, 0); | |
| 267 pthread_cond_init(&cond_, 0); | |
| 268 signaled_ = false; | |
| 269 } | |
| 270 | |
| 271 ~PthreadEvent() { | |
| 272 pthread_cond_destroy(&cond_); | |
| 273 pthread_mutex_destroy(&mutex_); | |
| 274 } | |
| 275 | |
| 276 void Signal() { | |
| 277 pthread_mutex_lock(&mutex_); | |
| 278 signaled_ = true; | |
| 279 pthread_mutex_unlock(&mutex_); | |
| 280 pthread_cond_signal(&cond_); | |
| 281 } | |
| 282 | |
| 283 void Wait() { | |
| 284 pthread_mutex_lock(&mutex_); | |
| 285 while (!signaled_) | |
| 286 pthread_cond_wait(&cond_, &mutex_); | |
| 287 signaled_ = false; | |
| 288 pthread_mutex_unlock(&mutex_); | |
| 289 } | |
| 290 | |
| 291 private: | |
| 292 bool signaled_; | |
| 293 pthread_mutex_t mutex_; | |
| 294 pthread_cond_t cond_; | |
| 295 }; | |
| 296 | |
| 297 // This is meant to test the absolute minimal context switching time. | |
| 298 // If there is any faster way to do this we should substitute it in. | |
| 299 typedef EventPerfTest<PthreadEvent> PthreadEventPerfTest; | |
| 300 TEST_F(PthreadEventPerfTest, EventPingPong) { | |
| 301 RunPingPongTest("4_PthreadCondVar_Threads", 4); | |
| 302 } | |
| 303 | |
| 304 #endif | |
| 305 | |
| 306 } // namespace | |
| 307 | |
| 308 } // namespace base | |
| OLD | NEW |