| Index: third_party/tcmalloc/chromium/src/tcmalloc.cc
|
| diff --git a/third_party/tcmalloc/chromium/src/tcmalloc.cc b/third_party/tcmalloc/chromium/src/tcmalloc.cc
|
| index 425e90b9b240eec8c42c1677d228d67ff7e10931..89220e1b2fa8ae4c42bee82f19ea6211964f596d 100644
|
| --- a/third_party/tcmalloc/chromium/src/tcmalloc.cc
|
| +++ b/third_party/tcmalloc/chromium/src/tcmalloc.cc
|
| @@ -119,8 +119,8 @@
|
| #include "base/spinlock.h" // for SpinLockHolder
|
| #include "central_freelist.h" // for CentralFreeListPadded
|
| #include "common.h" // for StackTrace, kPageShift, etc
|
| +#include "free_list.h" // for FL_Init
|
| #include "internal_logging.h" // for ASSERT, TCMalloc_Printer, etc
|
| -#include "linked_list.h" // for SLL_SetNext
|
| #include "malloc_hook-inl.h" // for MallocHook::InvokeNewHook, etc
|
| #include "page_heap.h" // for PageHeap, PageHeap::Stats
|
| #include "page_heap_allocator.h" // for PageHeapAllocator
|
| @@ -150,6 +150,13 @@
|
| // Some windows file somewhere (at least on cygwin) #define's small (!)
|
| #undef small
|
|
|
| +// GLibc 2.14+ requires the hook functions be declared volatile, based on the
|
| +// value of the define __MALLOC_HOOK_VOLATILE. For compatibility with
|
| +// older/non-GLibc implementations, provide an empty definition.
|
| +#if !defined(__MALLOC_HOOK_VOLATILE)
|
| +#define __MALLOC_HOOK_VOLATILE
|
| +#endif
|
| +
|
| using STL_NAMESPACE::max;
|
| using STL_NAMESPACE::numeric_limits;
|
| using STL_NAMESPACE::vector;
|
| @@ -177,6 +184,14 @@ using tcmalloc::StackTrace;
|
| using tcmalloc::Static;
|
| using tcmalloc::ThreadCache;
|
|
|
| +// ---- Double free debug declarations
|
| +static size_t ExcludeSpaceForMark(size_t size);
|
| +static void AddRoomForMark(size_t* size);
|
| +static void ExcludeMarkFromSize(size_t* new_size);
|
| +static void MarkAllocatedRegion(void* ptr);
|
| +static void ValidateAllocatedRegion(void* ptr, size_t cl);
|
| +// ---- End Double free debug declarations
|
| +
|
| DECLARE_int64(tcmalloc_sample_parameter);
|
| DECLARE_double(tcmalloc_release_rate);
|
|
|
| @@ -211,7 +226,6 @@ DEFINE_int64(tcmalloc_large_alloc_report_threshold,
|
| // put all callers of MallocHook::Invoke* in this module into
|
| // ATTRIBUTE_SECTION(google_malloc) section, so that
|
| // MallocHook::GetCallerStackTrace can function accurately.
|
| -#ifndef _WIN32 // windows doesn't have attribute_section, so don't bother
|
| extern "C" {
|
| void* tc_malloc(size_t size) __THROW
|
| ATTRIBUTE_SECTION(google_malloc);
|
| @@ -273,7 +287,6 @@ extern "C" {
|
| size_t tc_malloc_size(void* p) __THROW
|
| ATTRIBUTE_SECTION(google_malloc);
|
| } // extern "C"
|
| -#endif // #ifndef _WIN32
|
|
|
| // ----------------------- IMPLEMENTATION -------------------------------
|
|
|
| @@ -373,6 +386,15 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
|
| - stats.transfer_bytes
|
| - stats.thread_bytes);
|
|
|
| + out->printf(
|
| + "WASTE: %7.1f MiB committed but not used\n"
|
| + "WASTE: %7.1f MiB bytes committed, %7.1f MiB bytes in use\n"
|
| + "WASTE: committed/used ratio of %f\n",
|
| + (stats.pageheap.committed_bytes - bytes_in_use_by_app) / MiB,
|
| + stats.pageheap.committed_bytes / MiB,
|
| + bytes_in_use_by_app / MiB,
|
| + stats.pageheap.committed_bytes / static_cast<double>(bytes_in_use_by_app)
|
| + );
|
| #ifdef TCMALLOC_SMALL_BUT_SLOW
|
| out->printf(
|
| "NOTE: SMALL MEMORY MODEL IS IN USE, PERFORMANCE MAY SUFFER.\n");
|
| @@ -380,6 +402,7 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
|
| out->printf(
|
| "------------------------------------------------\n"
|
| "MALLOC: %12" PRIu64 " (%7.1f MiB) Bytes in use by application\n"
|
| + "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes committed\n"
|
| "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in page heap freelist\n"
|
| "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in central cache freelist\n"
|
| "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in transfer cache freelist\n"
|
| @@ -400,6 +423,7 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
|
| "Bytes released to the OS take up virtual address space"
|
| " but no physical memory.\n",
|
| bytes_in_use_by_app, bytes_in_use_by_app / MiB,
|
| + stats.pageheap.committed_bytes, stats.pageheap.committed_bytes / MiB,
|
| stats.pageheap.free_bytes, stats.pageheap.free_bytes / MiB,
|
| stats.central_bytes, stats.central_bytes / MiB,
|
| stats.transfer_bytes, stats.transfer_bytes / MiB,
|
| @@ -933,6 +957,7 @@ static inline bool CheckCachedSizeClass(void *ptr) {
|
|
|
| static inline void* CheckedMallocResult(void *result) {
|
| ASSERT(result == NULL || CheckCachedSizeClass(result));
|
| + MarkAllocatedRegion(result);
|
| return result;
|
| }
|
|
|
| @@ -997,7 +1022,7 @@ static void ReportLargeAlloc(Length num_pages, void* result) {
|
| inline void* cpp_alloc(size_t size, bool nothrow);
|
| inline void* do_malloc(size_t size);
|
|
|
| -// TODO(willchan): Investigate whether or not lining this much is harmful to
|
| +// TODO(willchan): Investigate whether or not inlining this much is harmful to
|
| // performance.
|
| // This is equivalent to do_malloc() except when tc_new_mode is set to true.
|
| // Otherwise, it will run the std::new_handler if set.
|
| @@ -1052,6 +1077,8 @@ inline void* do_malloc_pages(ThreadCache* heap, size_t size) {
|
| }
|
|
|
| inline void* do_malloc(size_t size) {
|
| + AddRoomForMark(&size);
|
| +
|
| void* ret = NULL;
|
|
|
| // The following call forces module initialization
|
| @@ -1062,13 +1089,15 @@ inline void* do_malloc(size_t size) {
|
|
|
| if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
|
| ret = DoSampledAllocation(size);
|
| + MarkAllocatedRegion(ret);
|
| } else {
|
| - // The common case, and also the simplest. This just pops the
|
| + // The common case, and also the simplest. This just pops the
|
| // size-appropriate freelist, after replenishing it if it's empty.
|
| ret = CheckedMallocResult(heap->Allocate(size, cl));
|
| }
|
| } else {
|
| ret = do_malloc_pages(heap, size);
|
| + MarkAllocatedRegion(ret);
|
| }
|
| if (ret == NULL) errno = ENOMEM;
|
| return ret;
|
| @@ -1124,6 +1153,9 @@ inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) {
|
| cl = span->sizeclass;
|
| Static::pageheap()->CacheSizeClass(p, cl);
|
| }
|
| +
|
| + ValidateAllocatedRegion(ptr, cl);
|
| +
|
| if (cl != 0) {
|
| ASSERT(!Static::pageheap()->GetDescriptor(p)->sample);
|
| ThreadCache* heap = GetCacheIfPresent();
|
| @@ -1131,7 +1163,7 @@ inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) {
|
| heap->Deallocate(ptr, cl);
|
| } else {
|
| // Delete directly into central cache
|
| - tcmalloc::SLL_SetNext(ptr, NULL);
|
| + tcmalloc::FL_Init(ptr);
|
| Static::central_cache()[cl].InsertRange(ptr, ptr, 1);
|
| }
|
| } else {
|
| @@ -1182,6 +1214,7 @@ inline void* do_realloc_with_callback(
|
| void* old_ptr, size_t new_size,
|
| void (*invalid_free_fn)(void*),
|
| size_t (*invalid_get_size_fn)(const void*)) {
|
| + AddRoomForMark(&new_size);
|
| // Get the size of the old entry
|
| const size_t old_size = GetSizeWithCallback(old_ptr, invalid_get_size_fn);
|
|
|
| @@ -1200,6 +1233,7 @@ inline void* do_realloc_with_callback(
|
| if (new_size > old_size && new_size < lower_bound_to_grow) {
|
| new_ptr = do_malloc_or_cpp_alloc(lower_bound_to_grow);
|
| }
|
| + ExcludeMarkFromSize(&new_size); // do_malloc will add space if needed.
|
| if (new_ptr == NULL) {
|
| // Either new_size is not a tiny increment, or last do_malloc failed.
|
| new_ptr = do_malloc_or_cpp_alloc(new_size);
|
| @@ -1218,6 +1252,7 @@ inline void* do_realloc_with_callback(
|
| } else {
|
| // We still need to call hooks to report the updated size:
|
| MallocHook::InvokeDeleteHook(old_ptr);
|
| + ExcludeMarkFromSize(&new_size);
|
| MallocHook::InvokeNewHook(old_ptr, new_size);
|
| return old_ptr;
|
| }
|
| @@ -1238,6 +1273,8 @@ inline void* do_realloc(void* old_ptr, size_t new_size) {
|
| void* do_memalign(size_t align, size_t size) {
|
| ASSERT((align & (align - 1)) == 0);
|
| ASSERT(align > 0);
|
| + // Marked in CheckMallocResult(), which is also inside SpanToMallocResult().
|
| + AddRoomForMark(&size);
|
| if (size + align < size) return NULL; // Overflow
|
|
|
| // Fall back to malloc if we would already align this memory access properly.
|
| @@ -1449,7 +1486,8 @@ void* cpp_memalign(size_t align, size_t size) {
|
| size_t TCMallocImplementation::GetAllocatedSize(const void* ptr) {
|
| ASSERT(TCMallocImplementation::GetOwnership(ptr)
|
| != TCMallocImplementation::kNotOwned);
|
| - return GetSizeWithCallback(ptr, &InvalidGetAllocatedSize);
|
| + return ExcludeSpaceForMark(
|
| + GetSizeWithCallback(ptr, &InvalidGetAllocatedSize));
|
| }
|
|
|
| void TCMallocImplementation::MarkThreadBusy() {
|
| @@ -1650,3 +1688,172 @@ extern "C" PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) __THROW {
|
| }
|
|
|
| #endif // TCMALLOC_USING_DEBUGALLOCATION
|
| +
|
| +// ---Double free() debugging implementation -----------------------------------
|
| +// We will put a mark at the extreme end of each allocation block. We make
|
| +// sure that we always allocate enough "extra memory" that we can fit in the
|
| +// mark, and still provide the requested usable region. If ever that mark is
|
| +// not as expected, then we know that the user is corrupting memory beyond their
|
| +// request size, or that they have called free a second time without having
|
| +// the memory allocated (again). This allows us to spot most double free()s,
|
| +// but some can "slip by" or confuse our logic if the caller reallocates memory
|
| +// (for a second use) before performing an evil double-free of a first
|
| +// allocation
|
| +
|
| +// This code can be optimized, but for now, it is written to be most easily
|
| +// understood, and flexible (since it is evolving a bit). Potential
|
| +// optimizations include using other calculated data, such as class size, or
|
| +// allocation size, which is known in the code above, but then is recalculated
|
| +// below. Another potential optimization would be careful manual inlining of
|
| +// code, but I *think* that the compile will probably do this for me, and I've
|
| +// been careful to avoid aliasing issues that might make a compiler back-off.
|
| +
|
| +// Evolution includes experimenting with different marks, to minimize the chance
|
| +// that a mark would be misunderstood (missed corruption). The marks are meant
|
| +// to be hashed encoding of the location, so that they can't be copied over a
|
| +// different region (by accident) without being detected (most of the time).
|
| +
|
| +// Enable the following define to turn on all the TCMalloc checking.
|
| +// It will cost about 2% in performance, but it will catch double frees (most of
|
| +// the time), and will often catch allocated-buffer overrun errors. This
|
| +// validation is only active when TCMalloc is used as the allocator.
|
| +#ifndef NDEBUG
|
| +#define TCMALLOC_VALIDATION
|
| +#endif
|
| +
|
| +#if !defined(TCMALLOC_VALIDATION)
|
| +
|
| +static size_t ExcludeSpaceForMark(size_t size) { return size; }
|
| +static void AddRoomForMark(size_t* size) {}
|
| +static void ExcludeMarkFromSize(size_t* new_size) {}
|
| +static void MarkAllocatedRegion(void* ptr) {}
|
| +static void ValidateAllocatedRegion(void* ptr, size_t cl) {}
|
| +
|
| +#else // TCMALLOC_VALIDATION
|
| +
|
| +static void DieFromDoubleFree() {
|
| + char* p = NULL;
|
| + p++;
|
| + *p += 1; // Segv.
|
| +}
|
| +
|
| +static size_t DieFromBadFreePointer(const void* unused) {
|
| + char* p = NULL;
|
| + p += 2;
|
| + *p += 2; // Segv.
|
| + return 0;
|
| +}
|
| +
|
| +static void DieFromMemoryCorruption() {
|
| + char* p = NULL;
|
| + p += 3;
|
| + *p += 3; // Segv.
|
| +}
|
| +
|
| +// We can either do byte marking, or whole word marking based on the following
|
| +// define. char is as small as we can get, and word marking probably provides
|
| +// more than enough bits that we won't miss a corruption. Any sized integral
|
| +// type can be used, but we just define two examples.
|
| +
|
| +// #define TCMALLOC_SMALL_VALIDATION
|
| +#if defined (TCMALLOC_SMALL_VALIDATION)
|
| +
|
| +typedef char MarkType; // char saves memory... int is more complete.
|
| +static const MarkType kAllocationMarkMask = static_cast<MarkType>(0x36);
|
| +
|
| +#else
|
| +
|
| +typedef int MarkType; // char saves memory... int is more complete.
|
| +static const MarkType kAllocationMarkMask = static_cast<MarkType>(0xE1AB9536);
|
| +
|
| +#endif
|
| +
|
| +// TODO(jar): See if use of reference rather than pointer gets better inlining,
|
| +// or if macro is needed. My fear is that taking address map preclude register
|
| +// allocation :-(.
|
| +inline static void AddRoomForMark(size_t* size) {
|
| + *size += sizeof(kAllocationMarkMask);
|
| +}
|
| +
|
| +inline static void ExcludeMarkFromSize(size_t* new_size) {
|
| + *new_size -= sizeof(kAllocationMarkMask);
|
| +}
|
| +
|
| +inline static size_t ExcludeSpaceForMark(size_t size) {
|
| + return size - sizeof(kAllocationMarkMask); // Lie about size when asked.
|
| +}
|
| +
|
| +inline static MarkType* GetMarkLocation(void* ptr) {
|
| + size_t class_size = GetSizeWithCallback(ptr, DieFromBadFreePointer);
|
| + ASSERT(class_size % sizeof(kAllocationMarkMask) == 0);
|
| + size_t last_index = (class_size / sizeof(kAllocationMarkMask)) - 1;
|
| + return static_cast<MarkType*>(ptr) + last_index;
|
| +}
|
| +
|
| +// We hash in the mark location plus the pointer so that we effectively mix in
|
| +// the size of the block. This means that if a span is used for different sizes
|
| +// that the mark will be different. It would be good to hash in the size (which
|
| +// we effectively get by using both mark location and pointer), but even better
|
| +// would be to also include the class, as it concisely contains the entropy
|
| +// found in the size (when we don't have large allocation), and there is less
|
| +// risk of losing those bits to truncation. It would probably be good to combine
|
| +// the high bits of size (capturing info about large blocks) with the class
|
| +// (which is a 6 bit number).
|
| +inline static MarkType GetMarkValue(void* ptr, MarkType* mark) {
|
| + void* ptr2 = static_cast<void*>(mark);
|
| + size_t offset1 = static_cast<char*>(ptr) - static_cast<char*>(NULL);
|
| + size_t offset2 = static_cast<char*>(ptr2) - static_cast<char*>(NULL);
|
| + static const int kInvariantBits = 2;
|
| + ASSERT((offset1 >> kInvariantBits) << kInvariantBits == offset1);
|
| + // Note: low bits of both offsets are invariants due to alignment. High bits
|
| + // of both offsets are the same (unless we have a large allocation). Avoid
|
| + // XORing high bits together, as they will cancel for most small allocations.
|
| +
|
| + MarkType ret = kAllocationMarkMask;
|
| + // Using a little shift, we can safely XOR together both offsets.
|
| + ret ^= static_cast<MarkType>(offset1 >> kInvariantBits) ^
|
| + static_cast<MarkType>(offset2);
|
| + if (sizeof(ret) == 1) {
|
| + // Try to bring some high level bits into the mix.
|
| + ret += static_cast<MarkType>(offset1 >> 8) ^
|
| + static_cast<MarkType>(offset1 >> 16) ^
|
| + static_cast<MarkType>(offset1 >> 24) ;
|
| + }
|
| + // Hash in high bits on a 64 bit architecture.
|
| + if (sizeof(size_t) == 8 && sizeof(ret) == 4)
|
| + ret += offset1 >> 16;
|
| + if (ret == 0)
|
| + ret = kAllocationMarkMask; // Avoid common pattern of all zeros.
|
| + return ret;
|
| +}
|
| +
|
| +// TODO(jar): Use the passed in TCmalloc Class Index to calculate mark location
|
| +// faster. The current implementation calls general functions, which have to
|
| +// recalculate this in order to get the Class Size. This is a slow and wasteful
|
| +// recomputation... but it is much more readable this way (for now).
|
| +static void ValidateAllocatedRegion(void* ptr, size_t cl) {
|
| + if (ptr == NULL) return;
|
| + MarkType* mark = GetMarkLocation(ptr);
|
| + MarkType allocated_mark = GetMarkValue(ptr, mark);
|
| + MarkType current_mark = *mark;
|
| +
|
| + if (current_mark == ~allocated_mark)
|
| + DieFromDoubleFree();
|
| + if (current_mark != allocated_mark)
|
| + DieFromMemoryCorruption();
|
| +#ifndef NDEBUG
|
| + // In debug mode, copy the mark into all the free'd region.
|
| + size_t class_size = static_cast<size_t>(reinterpret_cast<char*>(mark) -
|
| + reinterpret_cast<char*>(ptr));
|
| + memset(ptr, static_cast<char>(0x36), class_size);
|
| +#endif
|
| + *mark = ~allocated_mark; // Distinctively not allocated.
|
| +}
|
| +
|
| +static void MarkAllocatedRegion(void* ptr) {
|
| + if (ptr == NULL) return;
|
| + MarkType* mark = GetMarkLocation(ptr);
|
| + *mark = GetMarkValue(ptr, mark);
|
| +}
|
| +
|
| +#endif // TCMALLOC_VALIDATION
|
|
|