OLD | NEW |
1 // Copyright (c) 2008, Google Inc. | 1 // Copyright (c) 2008, Google Inc. |
2 // All rights reserved. | 2 // All rights reserved. |
3 // | 3 // |
4 // Redistribution and use in source and binary forms, with or without | 4 // Redistribution and use in source and binary forms, with or without |
5 // modification, are permitted provided that the following conditions are | 5 // modification, are permitted provided that the following conditions are |
6 // met: | 6 // met: |
7 // | 7 // |
8 // * Redistributions of source code must retain the above copyright | 8 // * Redistributions of source code must retain the above copyright |
9 // notice, this list of conditions and the following disclaimer. | 9 // notice, this list of conditions and the following disclaimer. |
10 // * Redistributions in binary form must reproduce the above | 10 // * Redistributions in binary form must reproduce the above |
(...skipping 14 matching lines...) Expand all Loading... |
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 | 29 |
30 // --- | 30 // --- |
31 // Author: Ken Ashcraft <opensource@google.com> | 31 // Author: Ken Ashcraft <opensource@google.com> |
32 | 32 |
33 #include <config.h> | 33 #include <config.h> |
34 #include "thread_cache.h" | 34 #include "thread_cache.h" |
| 35 #include <errno.h> |
35 #include <string.h> // for memcpy | 36 #include <string.h> // for memcpy |
36 #include <algorithm> // for max, min | 37 #include <algorithm> // for max, min |
37 #include "base/commandlineflags.h" // for SpinLockHolder | 38 #include "base/commandlineflags.h" // for SpinLockHolder |
38 #include "base/spinlock.h" // for SpinLockHolder | 39 #include "base/spinlock.h" // for SpinLockHolder |
39 #include "central_freelist.h" // for CentralFreeListPadded | 40 #include "central_freelist.h" // for CentralFreeListPadded |
40 #include "maybe_threads.h" | 41 #include "maybe_threads.h" |
41 | 42 |
42 using std::min; | 43 using std::min; |
43 using std::max; | 44 using std::max; |
44 | 45 |
45 DEFINE_int64(tcmalloc_max_total_thread_cache_bytes, | 46 DEFINE_int64(tcmalloc_max_total_thread_cache_bytes, |
46 EnvToInt64("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES", | 47 EnvToInt64("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES", |
47 kDefaultOverallThreadCacheSize), | 48 kDefaultOverallThreadCacheSize), |
48 "Bound on the total amount of bytes allocated to " | 49 "Bound on the total amount of bytes allocated to " |
49 "thread caches. This bound is not strict, so it is possible " | 50 "thread caches. This bound is not strict, so it is possible " |
50 "for the cache to go over this bound in certain circumstances. "); | 51 "for the cache to go over this bound in certain circumstances. " |
| 52 "Maximum value of this flag is capped to 1 GB."); |
51 | 53 |
52 namespace tcmalloc { | 54 namespace tcmalloc { |
53 | 55 |
54 static bool phinited = false; | 56 static bool phinited = false; |
55 | 57 |
56 volatile size_t ThreadCache::per_thread_cache_size_ = kMaxThreadCacheSize; | 58 volatile size_t ThreadCache::per_thread_cache_size_ = kMaxThreadCacheSize; |
57 size_t ThreadCache::overall_thread_cache_size_ = kDefaultOverallThreadCacheSize; | 59 size_t ThreadCache::overall_thread_cache_size_ = kDefaultOverallThreadCacheSize; |
58 ssize_t ThreadCache::unclaimed_cache_space_ = kDefaultOverallThreadCacheSize; | 60 ssize_t ThreadCache::unclaimed_cache_space_ = kDefaultOverallThreadCacheSize; |
59 PageHeapAllocator<ThreadCache> threadcache_allocator; | 61 PageHeapAllocator<ThreadCache> threadcache_allocator; |
60 ThreadCache* ThreadCache::thread_heaps_ = NULL; | 62 ThreadCache* ThreadCache::thread_heaps_ = NULL; |
61 int ThreadCache::thread_heap_count_ = 0; | 63 int ThreadCache::thread_heap_count_ = 0; |
62 ThreadCache* ThreadCache::next_memory_steal_ = NULL; | 64 ThreadCache* ThreadCache::next_memory_steal_ = NULL; |
63 #ifdef HAVE_TLS | 65 #ifdef HAVE_TLS |
64 __thread ThreadCache* ThreadCache::threadlocal_heap_ | 66 __thread ThreadCache* ThreadCache::threadlocal_heap_ |
65 # ifdef HAVE___ATTRIBUTE__ | 67 # ifdef HAVE___ATTRIBUTE__ |
66 __attribute__ ((tls_model ("initial-exec"))) | 68 __attribute__ ((tls_model ("initial-exec"))) |
67 # endif | 69 # endif |
68 ; | 70 ; |
69 #endif | 71 #endif |
70 bool ThreadCache::tsd_inited_ = false; | 72 bool ThreadCache::tsd_inited_ = false; |
71 pthread_key_t ThreadCache::heap_key_; | 73 pthread_key_t ThreadCache::heap_key_; |
72 | 74 |
73 #if defined(HAVE_TLS) | 75 #if defined(HAVE_TLS) |
74 bool kernel_supports_tls = false; // be conservative | 76 bool kernel_supports_tls = false; // be conservative |
75 # if !HAVE_DECL_UNAME // if too old for uname, probably too old for TLS | 77 # if defined(_WIN32) // windows has supported TLS since winnt, I think. |
| 78 void CheckIfKernelSupportsTLS() { |
| 79 kernel_supports_tls = true; |
| 80 } |
| 81 # elif !HAVE_DECL_UNAME // if too old for uname, probably too old for TLS |
76 void CheckIfKernelSupportsTLS() { | 82 void CheckIfKernelSupportsTLS() { |
77 kernel_supports_tls = false; | 83 kernel_supports_tls = false; |
78 } | 84 } |
79 # else | 85 # else |
80 # include <sys/utsname.h> // DECL_UNAME checked for <sys/utsname.h> too | 86 # include <sys/utsname.h> // DECL_UNAME checked for <sys/utsname.h> too |
81 void CheckIfKernelSupportsTLS() { | 87 void CheckIfKernelSupportsTLS() { |
82 struct utsname buf; | 88 struct utsname buf; |
83 if (uname(&buf) != 0) { // should be impossible | 89 if (uname(&buf) < 0) { // should be impossible |
84 MESSAGE("uname failed assuming no TLS support (errno=%d)\n", errno); | 90 Log(kLog, __FILE__, __LINE__, |
| 91 "uname failed assuming no TLS support (errno)", errno); |
85 kernel_supports_tls = false; | 92 kernel_supports_tls = false; |
86 } else if (strcasecmp(buf.sysname, "linux") == 0) { | 93 } else if (strcasecmp(buf.sysname, "linux") == 0) { |
87 // The linux case: the first kernel to support TLS was 2.6.0 | 94 // The linux case: the first kernel to support TLS was 2.6.0 |
88 if (buf.release[0] < '2' && buf.release[1] == '.') // 0.x or 1.x | 95 if (buf.release[0] < '2' && buf.release[1] == '.') // 0.x or 1.x |
89 kernel_supports_tls = false; | 96 kernel_supports_tls = false; |
90 else if (buf.release[0] == '2' && buf.release[1] == '.' && | 97 else if (buf.release[0] == '2' && buf.release[1] == '.' && |
91 buf.release[2] >= '0' && buf.release[2] < '6' && | 98 buf.release[2] >= '0' && buf.release[2] < '6' && |
92 buf.release[3] == '.') // 2.0 - 2.5 | 99 buf.release[3] == '.') // 2.0 - 2.5 |
93 kernel_supports_tls = false; | 100 kernel_supports_tls = false; |
94 else | 101 else |
95 kernel_supports_tls = true; | 102 kernel_supports_tls = true; |
| 103 } else if (strcasecmp(buf.sysname, "CYGWIN_NT-6.1-WOW64") == 0) { |
| 104 // In my testing, this version of cygwin, at least, would hang |
| 105 // when using TLS. |
| 106 kernel_supports_tls = false; |
96 } else { // some other kernel, we'll be optimisitic | 107 } else { // some other kernel, we'll be optimisitic |
97 kernel_supports_tls = true; | 108 kernel_supports_tls = true; |
98 } | 109 } |
99 // TODO(csilvers): VLOG(1) the tls status once we support RAW_VLOG | 110 // TODO(csilvers): VLOG(1) the tls status once we support RAW_VLOG |
100 } | 111 } |
101 # endif // HAVE_DECL_UNAME | 112 # endif // HAVE_DECL_UNAME |
102 #endif // HAVE_TLS | 113 #endif // HAVE_TLS |
103 | 114 |
104 void ThreadCache::Init(pthread_t tid) { | 115 void ThreadCache::Init(pthread_t tid) { |
105 size_ = 0; | 116 size_ = 0; |
(...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
251 const int batch_size = Static::sizemap()->num_objects_to_move(cl); | 262 const int batch_size = Static::sizemap()->num_objects_to_move(cl); |
252 if (list->max_length() > batch_size) { | 263 if (list->max_length() > batch_size) { |
253 list->set_max_length( | 264 list->set_max_length( |
254 max<int>(list->max_length() - batch_size, batch_size)); | 265 max<int>(list->max_length() - batch_size, batch_size)); |
255 } | 266 } |
256 } | 267 } |
257 list->clear_lowwatermark(); | 268 list->clear_lowwatermark(); |
258 } | 269 } |
259 | 270 |
260 IncreaseCacheLimit(); | 271 IncreaseCacheLimit(); |
261 | |
262 // int64 finish = CycleClock::Now(); | |
263 // CycleTimer ct; | |
264 // MESSAGE("GC: %.0f ns\n", ct.CyclesToUsec(finish-start)*1000.0); | |
265 } | 272 } |
266 | 273 |
267 void ThreadCache::IncreaseCacheLimit() { | 274 void ThreadCache::IncreaseCacheLimit() { |
268 SpinLockHolder h(Static::pageheap_lock()); | 275 SpinLockHolder h(Static::pageheap_lock()); |
269 IncreaseCacheLimitLocked(); | 276 IncreaseCacheLimitLocked(); |
270 } | 277 } |
271 | 278 |
272 void ThreadCache::IncreaseCacheLimitLocked() { | 279 void ThreadCache::IncreaseCacheLimitLocked() { |
273 if (unclaimed_cache_space_ > 0) { | 280 if (unclaimed_cache_space_ > 0) { |
274 // Possibly make unclaimed_cache_space_ negative. | 281 // Possibly make unclaimed_cache_space_ negative. |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
309 Static::InitStaticVars(); | 316 Static::InitStaticVars(); |
310 threadcache_allocator.Init(); | 317 threadcache_allocator.Init(); |
311 phinited = 1; | 318 phinited = 1; |
312 } | 319 } |
313 } | 320 } |
314 | 321 |
315 void ThreadCache::InitTSD() { | 322 void ThreadCache::InitTSD() { |
316 ASSERT(!tsd_inited_); | 323 ASSERT(!tsd_inited_); |
317 perftools_pthread_key_create(&heap_key_, DestroyThreadCache); | 324 perftools_pthread_key_create(&heap_key_, DestroyThreadCache); |
318 tsd_inited_ = true; | 325 tsd_inited_ = true; |
| 326 |
| 327 #ifdef PTHREADS_CRASHES_IF_RUN_TOO_EARLY |
| 328 // We may have used a fake pthread_t for the main thread. Fix it. |
| 329 pthread_t zero; |
| 330 memset(&zero, 0, sizeof(zero)); |
| 331 SpinLockHolder h(Static::pageheap_lock()); |
| 332 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { |
| 333 if (h->tid_ == zero) { |
| 334 h->tid_ = pthread_self(); |
| 335 } |
| 336 } |
| 337 #endif |
319 } | 338 } |
320 | 339 |
321 ThreadCache* ThreadCache::CreateCacheIfNecessary() { | 340 ThreadCache* ThreadCache::CreateCacheIfNecessary() { |
322 // Initialize per-thread data if necessary | 341 // Initialize per-thread data if necessary |
323 ThreadCache* heap = NULL; | 342 ThreadCache* heap = NULL; |
324 { | 343 { |
325 SpinLockHolder h(Static::pageheap_lock()); | 344 SpinLockHolder h(Static::pageheap_lock()); |
326 // On very old libc's, this call may crash if it happens too | 345 // On some old glibc's, and on freebsd's libc (as of freebsd 8.1), |
327 // early. No libc using NPTL should be affected. If there | 346 // calling pthread routines (even pthread_self) too early could |
328 // is a crash here, we could use code (on linux, at least) | 347 // cause a segfault. Since we can call pthreads quite early, we |
329 // to detect NPTL vs LinuxThreads: | 348 // have to protect against that in such situations by making a |
330 // http://www.redhat.com/archives/phil-list/2003-April/msg00038.html | 349 // 'fake' pthread. This is not ideal since it doesn't work well |
331 // If we detect not-NPTL, we could execute the old code from | 350 // when linking tcmalloc statically with apps that create threads |
332 // http://google-perftools.googlecode.com/svn/tags/google-perftools-1.7/sr
c/thread_cache.cc | 351 // before main, so we only do it if we have to. |
333 // that avoids calling pthread_self too early. The problem with | 352 #ifdef PTHREADS_CRASHES_IF_RUN_TOO_EARLY |
334 // that code is it caused a race condition when tcmalloc is linked | 353 pthread_t me; |
335 // in statically and other libraries spawn threads before main. | 354 if (!tsd_inited_) { |
| 355 memset(&me, 0, sizeof(me)); |
| 356 } else { |
| 357 me = pthread_self(); |
| 358 } |
| 359 #else |
336 const pthread_t me = pthread_self(); | 360 const pthread_t me = pthread_self(); |
| 361 #endif |
337 | 362 |
338 // This may be a recursive malloc call from pthread_setspecific() | 363 // This may be a recursive malloc call from pthread_setspecific() |
339 // In that case, the heap for this thread has already been created | 364 // In that case, the heap for this thread has already been created |
340 // and added to the linked list. So we search for that first. | 365 // and added to the linked list. So we search for that first. |
341 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { | 366 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { |
342 if (h->tid_ == me) { | 367 if (h->tid_ == me) { |
343 heap = h; | 368 heap = h; |
344 break; | 369 break; |
345 } | 370 } |
346 } | 371 } |
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
449 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { | 474 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { |
450 // Increasing the total cache size should not circumvent the | 475 // Increasing the total cache size should not circumvent the |
451 // slow-start growth of max_size_. | 476 // slow-start growth of max_size_. |
452 if (ratio < 1.0) { | 477 if (ratio < 1.0) { |
453 h->max_size_ = static_cast<size_t>(h->max_size_ * ratio); | 478 h->max_size_ = static_cast<size_t>(h->max_size_ * ratio); |
454 } | 479 } |
455 claimed += h->max_size_; | 480 claimed += h->max_size_; |
456 } | 481 } |
457 unclaimed_cache_space_ = overall_thread_cache_size_ - claimed; | 482 unclaimed_cache_space_ = overall_thread_cache_size_ - claimed; |
458 per_thread_cache_size_ = space; | 483 per_thread_cache_size_ = space; |
459 // TCMalloc_MESSAGE(__FILE__, __LINE__, "Threads %d => cache size %8d\n", n,
int(space)); | |
460 } | |
461 | |
462 void ThreadCache::Print(TCMalloc_Printer* out) const { | |
463 for (int cl = 0; cl < kNumClasses; ++cl) { | |
464 out->printf(" %5" PRIuS " : %4" PRIuS " len; %4d lo; %4"PRIuS | |
465 " max; %4"PRIuS" overages;\n", | |
466 Static::sizemap()->ByteSizeForClass(cl), | |
467 list_[cl].length(), | |
468 list_[cl].lowwatermark(), | |
469 list_[cl].max_length(), | |
470 list_[cl].length_overages()); | |
471 } | |
472 } | |
473 | |
474 void ThreadCache::PrintThreads(TCMalloc_Printer* out) { | |
475 size_t actual_limit = 0; | |
476 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { | |
477 h->Print(out); | |
478 actual_limit += h->max_size_; | |
479 } | |
480 out->printf("ThreadCache overall: %"PRIuS ", unclaimed: %"PRIuS | |
481 ", actual: %"PRIuS"\n", | |
482 overall_thread_cache_size_, unclaimed_cache_space_, actual_limit); | |
483 } | 484 } |
484 | 485 |
485 void ThreadCache::GetThreadStats(uint64_t* total_bytes, uint64_t* class_count) { | 486 void ThreadCache::GetThreadStats(uint64_t* total_bytes, uint64_t* class_count) { |
486 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { | 487 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { |
487 *total_bytes += h->Size(); | 488 *total_bytes += h->Size(); |
488 if (class_count) { | 489 if (class_count) { |
489 for (int cl = 0; cl < kNumClasses; ++cl) { | 490 for (int cl = 0; cl < kNumClasses; ++cl) { |
490 class_count[cl] += h->freelist_length(cl); | 491 class_count[cl] += h->freelist_length(cl); |
491 } | 492 } |
492 } | 493 } |
493 } | 494 } |
494 } | 495 } |
495 | 496 |
496 void ThreadCache::set_overall_thread_cache_size(size_t new_size) { | 497 void ThreadCache::set_overall_thread_cache_size(size_t new_size) { |
497 // Clip the value to a reasonable range | 498 // Clip the value to a reasonable range |
498 if (new_size < kMinThreadCacheSize) new_size = kMinThreadCacheSize; | 499 if (new_size < kMinThreadCacheSize) new_size = kMinThreadCacheSize; |
499 if (new_size > (1<<30)) new_size = (1<<30); // Limit to 1GB | 500 if (new_size > (1<<30)) new_size = (1<<30); // Limit to 1GB |
500 overall_thread_cache_size_ = new_size; | 501 overall_thread_cache_size_ = new_size; |
501 | 502 |
502 RecomputePerThreadCacheSize(); | 503 RecomputePerThreadCacheSize(); |
503 } | 504 } |
504 | 505 |
505 } // namespace tcmalloc | 506 } // namespace tcmalloc |
OLD | NEW |