| Index: third_party/tcmalloc/chromium/src/tcmalloc.cc
|
| diff --git a/third_party/tcmalloc/chromium/src/tcmalloc.cc b/third_party/tcmalloc/chromium/src/tcmalloc.cc
|
| index 97c366c9f3ab419393a8df13fd046c13e186c7cf..51fe5b31ecd17639e6e58b0d1ec60a66c01cb333 100644
|
| --- a/third_party/tcmalloc/chromium/src/tcmalloc.cc
|
| +++ b/third_party/tcmalloc/chromium/src/tcmalloc.cc
|
| @@ -122,8 +122,8 @@
|
| #include "base/spinlock.h" // for SpinLockHolder
|
| #include "central_freelist.h" // for CentralFreeListPadded
|
| #include "common.h" // for StackTrace, kPageShift, etc
|
| +#include "free_list.h" // for FL_Init
|
| #include "internal_logging.h" // for ASSERT, TCMalloc_Printer, etc
|
| -#include "linked_list.h" // for SLL_SetNext
|
| #include "malloc_hook-inl.h" // for MallocHook::InvokeNewHook, etc
|
| #include "page_heap.h" // for PageHeap, PageHeap::Stats
|
| #include "page_heap_allocator.h" // for PageHeapAllocator
|
| @@ -150,6 +150,13 @@
|
| # define WIN32_DO_PATCHING 1
|
| #endif
|
|
|
| +// GLibc 2.14+ requires the hook functions be declared volatile, based on the
|
| +// value of the define __MALLOC_HOOK_VOLATILE. For compatibility with
|
| +// older/non-GLibc implementations, provide an empty definition.
|
| +#if !defined(__MALLOC_HOOK_VOLATILE)
|
| +#define __MALLOC_HOOK_VOLATILE
|
| +#endif
|
| +
|
| using STL_NAMESPACE::max;
|
| using STL_NAMESPACE::numeric_limits;
|
| using STL_NAMESPACE::vector;
|
| @@ -169,6 +176,14 @@ using tcmalloc::ThreadCache;
|
| # define __THROW // __THROW is just an optimization, so ok to make it ""
|
| #endif
|
|
|
| +// ---- Double free debug declarations
|
| +static size_t ExcludeSpaceForMark(size_t size);
|
| +static void AddRoomForMark(size_t* size);
|
| +static void ExcludeMarkFromSize(size_t* new_size);
|
| +static void MarkAllocatedRegion(void* ptr);
|
| +static void ValidateAllocatedRegion(void* ptr, size_t cl);
|
| +// ---- End Double free debug declarations
|
| +
|
| DECLARE_int64(tcmalloc_sample_parameter);
|
| DECLARE_double(tcmalloc_release_rate);
|
|
|
| @@ -203,7 +218,6 @@ DEFINE_int64(tcmalloc_large_alloc_report_threshold,
|
| // put all callers of MallocHook::Invoke* in this module into
|
| // ATTRIBUTE_SECTION(google_malloc) section, so that
|
| // MallocHook::GetCallerStackTrace can function accurately.
|
| -#ifndef _WIN32 // windows doesn't have attribute_section, so don't bother
|
| extern "C" {
|
| void* tc_malloc(size_t size) __THROW
|
| ATTRIBUTE_SECTION(google_malloc);
|
| @@ -265,7 +279,6 @@ extern "C" {
|
| size_t tc_malloc_size(void* p) __THROW
|
| ATTRIBUTE_SECTION(google_malloc);
|
| } // extern "C"
|
| -#endif // #ifndef _WIN32
|
|
|
| // Override the libc functions to prefer our own instead. This comes
|
| // first so code in tcmalloc.cc can use the overridden versions. One
|
| @@ -274,6 +287,10 @@ extern "C" {
|
| // them. In that case, we don't want to do this overriding here.
|
| #if !defined(WIN32_DO_PATCHING)
|
|
|
| +// TODO(mbelshe): Turn off TCMalloc's symbols for libc. We do that
|
| +// elsewhere.
|
| +#ifndef _WIN32
|
| +
|
| #if defined(__GNUC__) && !defined(__MACH__)
|
| // Potentially faster variants that use the gcc alias extension.
|
| // FreeBSD does support aliases, but apparently not correctly. :-(
|
| @@ -384,6 +401,35 @@ extern "C" {
|
| } // extern "C"
|
| #endif // ifdef __GLIBC__
|
|
|
| +#if defined(__GLIBC__) && defined(HAVE_MALLOC_H)
|
| +// If we're using glibc, then override glibc malloc hooks to make sure that even
|
| +// if calls fall through to ptmalloc (due to dlopen() with RTLD_DEEPBIND or what
|
| +// not), ptmalloc will use TCMalloc.
|
| +
|
| +static void* tc_ptmalloc_malloc_hook(size_t size, const void* caller) {
|
| + return tc_malloc(size);
|
| +}
|
| +
|
| +void* (*__MALLOC_HOOK_VOLATILE __malloc_hook)(
|
| + size_t size, const void* caller) = tc_ptmalloc_malloc_hook;
|
| +
|
| +static void* tc_ptmalloc_realloc_hook(
|
| + void* ptr, size_t size, const void* caller) {
|
| + return tc_realloc(ptr, size);
|
| +}
|
| +
|
| +void* (*__MALLOC_HOOK_VOLATILE __realloc_hook)(
|
| + void* ptr, size_t size, const void* caller) = tc_ptmalloc_realloc_hook;
|
| +
|
| +static void tc_ptmalloc_free_hook(void* ptr, const void* caller) {
|
| + tc_free(ptr);
|
| +}
|
| +
|
| +void (*__MALLOC_HOOK_VOLATILE __free_hook)(void* ptr, const void* caller) = tc_ptmalloc_free_hook;
|
| +
|
| +#endif
|
| +
|
| +#endif // #ifndef _WIN32
|
| #undef ALIAS
|
|
|
| #endif // #ifndef(WIN32_DO_PATCHING)
|
| @@ -466,6 +512,15 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
|
| - stats.transfer_bytes
|
| - stats.thread_bytes);
|
|
|
| + out->printf(
|
| + "WASTE: %7.1f MiB committed but not used\n"
|
| + "WASTE: %7.1f MiB bytes committed, %7.1f MiB bytes in use\n"
|
| + "WASTE: committed/used ratio of %f\n",
|
| + (stats.pageheap.committed_bytes - bytes_in_use_by_app) / MiB,
|
| + stats.pageheap.committed_bytes / MiB,
|
| + bytes_in_use_by_app / MiB,
|
| + stats.pageheap.committed_bytes / static_cast<double>(bytes_in_use_by_app)
|
| + );
|
| #ifdef TCMALLOC_SMALL_BUT_SLOW
|
| out->printf(
|
| "NOTE: SMALL MEMORY MODEL IS IN USE, PERFORMANCE MAY SUFFER.\n");
|
| @@ -473,6 +528,7 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
|
| out->printf(
|
| "------------------------------------------------\n"
|
| "MALLOC: %12" PRIu64 " (%7.1f MiB) Bytes in use by application\n"
|
| + "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes committed\n"
|
| "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in page heap freelist\n"
|
| "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in central cache freelist\n"
|
| "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in transfer cache freelist\n"
|
| @@ -493,6 +549,7 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
|
| "Bytes released to the OS take up virtual address space"
|
| " but no physical memory.\n",
|
| bytes_in_use_by_app, bytes_in_use_by_app / MiB,
|
| + stats.pageheap.committed_bytes, stats.pageheap.committed_bytes / MiB,
|
| stats.pageheap.free_bytes, stats.pageheap.free_bytes / MiB,
|
| stats.central_bytes, stats.central_bytes / MiB,
|
| stats.transfer_bytes, stats.transfer_bytes / MiB,
|
| @@ -972,6 +1029,7 @@ static inline bool CheckCachedSizeClass(void *ptr) {
|
|
|
| static inline void* CheckedMallocResult(void *result) {
|
| ASSERT(result == NULL || CheckCachedSizeClass(result));
|
| + MarkAllocatedRegion(result);
|
| return result;
|
| }
|
|
|
| @@ -1036,7 +1094,7 @@ static void ReportLargeAlloc(Length num_pages, void* result) {
|
| inline void* cpp_alloc(size_t size, bool nothrow);
|
| inline void* do_malloc(size_t size);
|
|
|
| -// TODO(willchan): Investigate whether or not lining this much is harmful to
|
| +// TODO(willchan): Investigate whether or not inlining this much is harmful to
|
| // performance.
|
| // This is equivalent to do_malloc() except when tc_new_mode is set to true.
|
| // Otherwise, it will run the std::new_handler if set.
|
| @@ -1091,6 +1149,8 @@ inline void* do_malloc_pages(ThreadCache* heap, size_t size) {
|
| }
|
|
|
| inline void* do_malloc(size_t size) {
|
| + AddRoomForMark(&size);
|
| +
|
| void* ret = NULL;
|
|
|
| // The following call forces module initialization
|
| @@ -1101,13 +1161,15 @@ inline void* do_malloc(size_t size) {
|
|
|
| if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
|
| ret = DoSampledAllocation(size);
|
| + MarkAllocatedRegion(ret);
|
| } else {
|
| - // The common case, and also the simplest. This just pops the
|
| + // The common case, and also the simplest. This just pops the
|
| // size-appropriate freelist, after replenishing it if it's empty.
|
| ret = CheckedMallocResult(heap->Allocate(size, cl));
|
| }
|
| } else {
|
| ret = do_malloc_pages(heap, size);
|
| + MarkAllocatedRegion(ret);
|
| }
|
| if (ret == NULL) errno = ENOMEM;
|
| return ret;
|
| @@ -1155,6 +1217,9 @@ inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) {
|
| cl = span->sizeclass;
|
| Static::pageheap()->CacheSizeClass(p, cl);
|
| }
|
| +
|
| + ValidateAllocatedRegion(ptr, cl);
|
| +
|
| if (cl != 0) {
|
| ASSERT(!Static::pageheap()->GetDescriptor(p)->sample);
|
| ThreadCache* heap = GetCacheIfPresent();
|
| @@ -1162,7 +1227,7 @@ inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) {
|
| heap->Deallocate(ptr, cl);
|
| } else {
|
| // Delete directly into central cache
|
| - tcmalloc::SLL_SetNext(ptr, NULL);
|
| + tcmalloc::FL_Init(ptr);
|
| Static::central_cache()[cl].InsertRange(ptr, ptr, 1);
|
| }
|
| } else {
|
| @@ -1211,6 +1276,7 @@ inline void* do_realloc_with_callback(
|
| void* old_ptr, size_t new_size,
|
| void (*invalid_free_fn)(void*),
|
| size_t (*invalid_get_size_fn)(void*)) {
|
| + AddRoomForMark(&new_size);
|
| // Get the size of the old entry
|
| const size_t old_size = GetSizeWithCallback(old_ptr, invalid_get_size_fn);
|
|
|
| @@ -1229,6 +1295,7 @@ inline void* do_realloc_with_callback(
|
| if (new_size > old_size && new_size < lower_bound_to_grow) {
|
| new_ptr = do_malloc_or_cpp_alloc(lower_bound_to_grow);
|
| }
|
| + ExcludeMarkFromSize(&new_size); // do_malloc will add space if needed.
|
| if (new_ptr == NULL) {
|
| // Either new_size is not a tiny increment, or last do_malloc failed.
|
| new_ptr = do_malloc_or_cpp_alloc(new_size);
|
| @@ -1247,6 +1314,7 @@ inline void* do_realloc_with_callback(
|
| } else {
|
| // We still need to call hooks to report the updated size:
|
| MallocHook::InvokeDeleteHook(old_ptr);
|
| + ExcludeMarkFromSize(&new_size);
|
| MallocHook::InvokeNewHook(old_ptr, new_size);
|
| return old_ptr;
|
| }
|
| @@ -1267,6 +1335,8 @@ inline void* do_realloc(void* old_ptr, size_t new_size) {
|
| void* do_memalign(size_t align, size_t size) {
|
| ASSERT((align & (align - 1)) == 0);
|
| ASSERT(align > 0);
|
| + // Marked in CheckMallocResult(), which is also inside SpanToMallocResult().
|
| + AddRoomForMark(&size);
|
| if (size + align < size) return NULL; // Overflow
|
|
|
| // Fall back to malloc if we would already align this memory access properly.
|
| @@ -1476,7 +1546,8 @@ void* cpp_memalign(size_t align, size_t size) {
|
|
|
| // As promised, the definition of this function, declared above.
|
| size_t TCMallocImplementation::GetAllocatedSize(void* ptr) {
|
| - return GetSizeWithCallback(ptr, &InvalidGetAllocatedSize);
|
| + return ExcludeSpaceForMark(
|
| + GetSizeWithCallback(ptr, &InvalidGetAllocatedSize));
|
| }
|
|
|
| void TCMallocImplementation::MarkThreadBusy() {
|
| @@ -1693,6 +1764,174 @@ static void *MemalignOverride(size_t align, size_t size, const void *caller)
|
| MallocHook::InvokeNewHook(result, size);
|
| return result;
|
| }
|
| -void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride;
|
| -
|
| +void *(*__MALLOC_HOOK_VOLATILE __memalign_hook)(size_t, size_t, const void *) = MemalignOverride;
|
| #endif // TCMALLOC_USING_DEBUGALLOCATION
|
| +
|
| +// ---Double free() debugging implementation -----------------------------------
|
| +// We will put a mark at the extreme end of each allocation block. We make
|
| +// sure that we always allocate enough "extra memory" that we can fit in the
|
| +// mark, and still provide the requested usable region. If ever that mark is
|
| +// not as expected, then we know that the user is corrupting memory beyond their
|
| +// request size, or that they have called free a second time without having
|
| +// the memory allocated (again). This allows us to spot most double free()s,
|
| +// but some can "slip by" or confuse our logic if the caller reallocates memory
|
| +// (for a second use) before performing an evil double-free of a first
|
| +// allocation
|
| +
|
| +// This code can be optimized, but for now, it is written to be most easily
|
| +// understood, and flexible (since it is evolving a bit). Potential
|
| +// optimizations include using other calculated data, such as class size, or
|
| +// allocation size, which is known in the code above, but then is recalculated
|
| +// below. Another potential optimization would be careful manual inlining of
|
| +// code, but I *think* that the compile will probably do this for me, and I've
|
| +// been careful to avoid aliasing issues that might make a compiler back-off.
|
| +
|
| +// Evolution includes experimenting with different marks, to minimize the chance
|
| +// that a mark would be misunderstood (missed corruption). The marks are meant
|
| +// to be hashed encoding of the location, so that they can't be copied over a
|
| +// different region (by accident) without being detected (most of the time).
|
| +
|
| +// Enable the following define to turn on all the TCMalloc checking.
|
| +// It will cost about 2% in performance, but it will catch double frees (most of
|
| +// the time), and will often catch allocated-buffer overrun errors. This
|
| +// validation is only active when TCMalloc is used as the allocator.
|
| +#ifndef NDEBUG
|
| +#define TCMALLOC_VALIDATION
|
| +#endif
|
| +
|
| +#if !defined(TCMALLOC_VALIDATION)
|
| +
|
| +static size_t ExcludeSpaceForMark(size_t size) { return size; }
|
| +static void AddRoomForMark(size_t* size) {}
|
| +static void ExcludeMarkFromSize(size_t* new_size) {}
|
| +static void MarkAllocatedRegion(void* ptr) {}
|
| +static void ValidateAllocatedRegion(void* ptr, size_t cl) {}
|
| +
|
| +#else // TCMALLOC_VALIDATION
|
| +
|
| +static void DieFromDoubleFree() {
|
| + char* p = NULL;
|
| + p++;
|
| + *p += 1; // Segv.
|
| +}
|
| +
|
| +static size_t DieFromBadFreePointer(void* unused) {
|
| + char* p = NULL;
|
| + p += 2;
|
| + *p += 2; // Segv.
|
| + return 0;
|
| +}
|
| +
|
| +static void DieFromMemoryCorruption() {
|
| + char* p = NULL;
|
| + p += 3;
|
| + *p += 3; // Segv.
|
| +}
|
| +
|
| +// We can either do byte marking, or whole word marking based on the following
|
| +// define. char is as small as we can get, and word marking probably provides
|
| +// more than enough bits that we won't miss a corruption. Any sized integral
|
| +// type can be used, but we just define two examples.
|
| +
|
| +// #define TCMALLOC_SMALL_VALIDATION
|
| +#if defined (TCMALLOC_SMALL_VALIDATION)
|
| +
|
| +typedef char MarkType; // char saves memory... int is more complete.
|
| +static const MarkType kAllocationMarkMask = static_cast<MarkType>(0x36);
|
| +
|
| +#else
|
| +
|
| +typedef int MarkType; // char saves memory... int is more complete.
|
| +static const MarkType kAllocationMarkMask = static_cast<MarkType>(0xE1AB9536);
|
| +
|
| +#endif
|
| +
|
| +// TODO(jar): See if use of reference rather than pointer gets better inlining,
|
| +// or if macro is needed. My fear is that taking address map preclude register
|
| +// allocation :-(.
|
| +inline static void AddRoomForMark(size_t* size) {
|
| + *size += sizeof(kAllocationMarkMask);
|
| +}
|
| +
|
| +inline static void ExcludeMarkFromSize(size_t* new_size) {
|
| + *new_size -= sizeof(kAllocationMarkMask);
|
| +}
|
| +
|
| +inline static size_t ExcludeSpaceForMark(size_t size) {
|
| + return size - sizeof(kAllocationMarkMask); // Lie about size when asked.
|
| +}
|
| +
|
| +inline static MarkType* GetMarkLocation(void* ptr) {
|
| + size_t class_size = GetSizeWithCallback(ptr, DieFromBadFreePointer);
|
| + ASSERT(class_size % sizeof(kAllocationMarkMask) == 0);
|
| + size_t last_index = (class_size / sizeof(kAllocationMarkMask)) - 1;
|
| + return static_cast<MarkType*>(ptr) + last_index;
|
| +}
|
| +
|
| +// We hash in the mark location plus the pointer so that we effectively mix in
|
| +// the size of the block. This means that if a span is used for different sizes
|
| +// that the mark will be different. It would be good to hash in the size (which
|
| +// we effectively get by using both mark location and pointer), but even better
|
| +// would be to also include the class, as it concisely contains the entropy
|
| +// found in the size (when we don't have large allocation), and there is less
|
| +// risk of losing those bits to truncation. It would probably be good to combine
|
| +// the high bits of size (capturing info about large blocks) with the class
|
| +// (which is a 6 bit number).
|
| +inline static MarkType GetMarkValue(void* ptr, MarkType* mark) {
|
| + void* ptr2 = static_cast<void*>(mark);
|
| + size_t offset1 = static_cast<char*>(ptr) - static_cast<char*>(NULL);
|
| + size_t offset2 = static_cast<char*>(ptr2) - static_cast<char*>(NULL);
|
| + static const int kInvariantBits = 2;
|
| + ASSERT((offset1 >> kInvariantBits) << kInvariantBits == offset1);
|
| + // Note: low bits of both offsets are invariants due to alignment. High bits
|
| + // of both offsets are the same (unless we have a large allocation). Avoid
|
| + // XORing high bits together, as they will cancel for most small allocations.
|
| +
|
| + MarkType ret = kAllocationMarkMask;
|
| + // Using a little shift, we can safely XOR together both offsets.
|
| + ret ^= static_cast<MarkType>(offset1 >> kInvariantBits) ^
|
| + static_cast<MarkType>(offset2);
|
| + if (sizeof(ret) == 1) {
|
| + // Try to bring some high level bits into the mix.
|
| + ret += static_cast<MarkType>(offset1 >> 8) ^
|
| + static_cast<MarkType>(offset1 >> 16) ^
|
| + static_cast<MarkType>(offset1 >> 24) ;
|
| + }
|
| + // Hash in high bits on a 64 bit architecture.
|
| + if (sizeof(size_t) == 8 && sizeof(ret) == 4)
|
| + ret += offset1 >> 16;
|
| + if (ret == 0)
|
| + ret = kAllocationMarkMask; // Avoid common pattern of all zeros.
|
| + return ret;
|
| +}
|
| +
|
| +// TODO(jar): Use the passed in TCmalloc Class Index to calculate mark location
|
| +// faster. The current implementation calls general functions, which have to
|
| +// recalculate this in order to get the Class Size. This is a slow and wasteful
|
| +// recomputation... but it is much more readable this way (for now).
|
| +static void ValidateAllocatedRegion(void* ptr, size_t cl) {
|
| + if (ptr == NULL) return;
|
| + MarkType* mark = GetMarkLocation(ptr);
|
| + MarkType allocated_mark = GetMarkValue(ptr, mark);
|
| + MarkType current_mark = *mark;
|
| +
|
| + if (current_mark == ~allocated_mark)
|
| + DieFromDoubleFree();
|
| + if (current_mark != allocated_mark)
|
| + DieFromMemoryCorruption();
|
| +#ifndef NDEBUG
|
| + // In debug mode, copy the mark into all the free'd region.
|
| + size_t class_size = static_cast<size_t>(reinterpret_cast<char*>(mark) -
|
| + reinterpret_cast<char*>(ptr));
|
| + memset(ptr, static_cast<char>(0x36), class_size);
|
| +#endif
|
| + *mark = ~allocated_mark; // Distinctively not allocated.
|
| +}
|
| +
|
| +static void MarkAllocatedRegion(void* ptr) {
|
| + if (ptr == NULL) return;
|
| + MarkType* mark = GetMarkLocation(ptr);
|
| + *mark = GetMarkValue(ptr, mark);
|
| +}
|
| +
|
| +#endif // TCMALLOC_VALIDATION
|
|
|