Index: third_party/tcmalloc/chromium/src/tcmalloc.cc |
diff --git a/third_party/tcmalloc/chromium/src/tcmalloc.cc b/third_party/tcmalloc/chromium/src/tcmalloc.cc |
index 97c366c9f3ab419393a8df13fd046c13e186c7cf..51fe5b31ecd17639e6e58b0d1ec60a66c01cb333 100644 |
--- a/third_party/tcmalloc/chromium/src/tcmalloc.cc |
+++ b/third_party/tcmalloc/chromium/src/tcmalloc.cc |
@@ -122,8 +122,8 @@ |
#include "base/spinlock.h" // for SpinLockHolder |
#include "central_freelist.h" // for CentralFreeListPadded |
#include "common.h" // for StackTrace, kPageShift, etc |
+#include "free_list.h" // for FL_Init |
#include "internal_logging.h" // for ASSERT, TCMalloc_Printer, etc |
-#include "linked_list.h" // for SLL_SetNext |
#include "malloc_hook-inl.h" // for MallocHook::InvokeNewHook, etc |
#include "page_heap.h" // for PageHeap, PageHeap::Stats |
#include "page_heap_allocator.h" // for PageHeapAllocator |
@@ -150,6 +150,13 @@ |
# define WIN32_DO_PATCHING 1 |
#endif |
+// GLibc 2.14+ requires the hook functions be declared volatile, based on the |
+// value of the define __MALLOC_HOOK_VOLATILE. For compatibility with |
+// older/non-GLibc implementations, provide an empty definition. |
+#if !defined(__MALLOC_HOOK_VOLATILE) |
+#define __MALLOC_HOOK_VOLATILE |
+#endif |
+ |
using STL_NAMESPACE::max; |
using STL_NAMESPACE::numeric_limits; |
using STL_NAMESPACE::vector; |
@@ -169,6 +176,14 @@ using tcmalloc::ThreadCache; |
# define __THROW // __THROW is just an optimization, so ok to make it "" |
#endif |
+// ---- Double free debug declarations |
+static size_t ExcludeSpaceForMark(size_t size); |
+static void AddRoomForMark(size_t* size); |
+static void ExcludeMarkFromSize(size_t* new_size); |
+static void MarkAllocatedRegion(void* ptr); |
+static void ValidateAllocatedRegion(void* ptr, size_t cl); |
+// ---- End Double free debug declarations |
+ |
DECLARE_int64(tcmalloc_sample_parameter); |
DECLARE_double(tcmalloc_release_rate); |
@@ -203,7 +218,6 @@ DEFINE_int64(tcmalloc_large_alloc_report_threshold, |
// put all callers of MallocHook::Invoke* in this module into |
// ATTRIBUTE_SECTION(google_malloc) section, so that |
// MallocHook::GetCallerStackTrace can function accurately. |
-#ifndef _WIN32 // windows doesn't have attribute_section, so don't bother |
extern "C" { |
void* tc_malloc(size_t size) __THROW |
ATTRIBUTE_SECTION(google_malloc); |
@@ -265,7 +279,6 @@ extern "C" { |
size_t tc_malloc_size(void* p) __THROW |
ATTRIBUTE_SECTION(google_malloc); |
} // extern "C" |
-#endif // #ifndef _WIN32 |
// Override the libc functions to prefer our own instead. This comes |
// first so code in tcmalloc.cc can use the overridden versions. One |
@@ -274,6 +287,10 @@ extern "C" { |
// them. In that case, we don't want to do this overriding here. |
#if !defined(WIN32_DO_PATCHING) |
+// TODO(mbelshe): Turn off TCMalloc's symbols for libc. We do that |
+// elsewhere. |
+#ifndef _WIN32 |
+ |
#if defined(__GNUC__) && !defined(__MACH__) |
// Potentially faster variants that use the gcc alias extension. |
// FreeBSD does support aliases, but apparently not correctly. :-( |
@@ -384,6 +401,35 @@ extern "C" { |
} // extern "C" |
#endif // ifdef __GLIBC__ |
+#if defined(__GLIBC__) && defined(HAVE_MALLOC_H) |
+// If we're using glibc, then override glibc malloc hooks to make sure that even |
+// if calls fall through to ptmalloc (due to dlopen() with RTLD_DEEPBIND or what |
+// not), ptmalloc will use TCMalloc. |
+ |
+static void* tc_ptmalloc_malloc_hook(size_t size, const void* caller) { |
+ return tc_malloc(size); |
+} |
+ |
+void* (*__MALLOC_HOOK_VOLATILE __malloc_hook)( |
+ size_t size, const void* caller) = tc_ptmalloc_malloc_hook; |
+ |
+static void* tc_ptmalloc_realloc_hook( |
+ void* ptr, size_t size, const void* caller) { |
+ return tc_realloc(ptr, size); |
+} |
+ |
+void* (*__MALLOC_HOOK_VOLATILE __realloc_hook)( |
+ void* ptr, size_t size, const void* caller) = tc_ptmalloc_realloc_hook; |
+ |
+static void tc_ptmalloc_free_hook(void* ptr, const void* caller) { |
+ tc_free(ptr); |
+} |
+ |
+void (*__MALLOC_HOOK_VOLATILE __free_hook)(void* ptr, const void* caller) = tc_ptmalloc_free_hook; |
+ |
+#endif |
+ |
+#endif // #ifndef _WIN32 |
#undef ALIAS |
#endif // #ifndef(WIN32_DO_PATCHING) |
@@ -466,6 +512,15 @@ static void DumpStats(TCMalloc_Printer* out, int level) { |
- stats.transfer_bytes |
- stats.thread_bytes); |
+ out->printf( |
+ "WASTE: %7.1f MiB committed but not used\n" |
+ "WASTE: %7.1f MiB bytes committed, %7.1f MiB bytes in use\n" |
+ "WASTE: committed/used ratio of %f\n", |
+ (stats.pageheap.committed_bytes - bytes_in_use_by_app) / MiB, |
+ stats.pageheap.committed_bytes / MiB, |
+ bytes_in_use_by_app / MiB, |
+ stats.pageheap.committed_bytes / static_cast<double>(bytes_in_use_by_app) |
+ ); |
#ifdef TCMALLOC_SMALL_BUT_SLOW |
out->printf( |
"NOTE: SMALL MEMORY MODEL IS IN USE, PERFORMANCE MAY SUFFER.\n"); |
@@ -473,6 +528,7 @@ static void DumpStats(TCMalloc_Printer* out, int level) { |
out->printf( |
"------------------------------------------------\n" |
"MALLOC: %12" PRIu64 " (%7.1f MiB) Bytes in use by application\n" |
+ "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes committed\n" |
"MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in page heap freelist\n" |
"MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in central cache freelist\n" |
"MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in transfer cache freelist\n" |
@@ -493,6 +549,7 @@ static void DumpStats(TCMalloc_Printer* out, int level) { |
"Bytes released to the OS take up virtual address space" |
" but no physical memory.\n", |
bytes_in_use_by_app, bytes_in_use_by_app / MiB, |
+ stats.pageheap.committed_bytes, stats.pageheap.committed_bytes / MiB, |
stats.pageheap.free_bytes, stats.pageheap.free_bytes / MiB, |
stats.central_bytes, stats.central_bytes / MiB, |
stats.transfer_bytes, stats.transfer_bytes / MiB, |
@@ -972,6 +1029,7 @@ static inline bool CheckCachedSizeClass(void *ptr) { |
static inline void* CheckedMallocResult(void *result) { |
ASSERT(result == NULL || CheckCachedSizeClass(result)); |
+ MarkAllocatedRegion(result); |
return result; |
} |
@@ -1036,7 +1094,7 @@ static void ReportLargeAlloc(Length num_pages, void* result) { |
inline void* cpp_alloc(size_t size, bool nothrow); |
inline void* do_malloc(size_t size); |
-// TODO(willchan): Investigate whether or not lining this much is harmful to |
+// TODO(willchan): Investigate whether or not inlining this much is harmful to |
// performance. |
// This is equivalent to do_malloc() except when tc_new_mode is set to true. |
// Otherwise, it will run the std::new_handler if set. |
@@ -1091,6 +1149,8 @@ inline void* do_malloc_pages(ThreadCache* heap, size_t size) { |
} |
inline void* do_malloc(size_t size) { |
+ AddRoomForMark(&size); |
+ |
void* ret = NULL; |
// The following call forces module initialization |
@@ -1101,13 +1161,15 @@ inline void* do_malloc(size_t size) { |
if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) { |
ret = DoSampledAllocation(size); |
+ MarkAllocatedRegion(ret); |
} else { |
- // The common case, and also the simplest. This just pops the |
+ // The common case, and also the simplest. This just pops the |
// size-appropriate freelist, after replenishing it if it's empty. |
ret = CheckedMallocResult(heap->Allocate(size, cl)); |
} |
} else { |
ret = do_malloc_pages(heap, size); |
+ MarkAllocatedRegion(ret); |
} |
if (ret == NULL) errno = ENOMEM; |
return ret; |
@@ -1155,6 +1217,9 @@ inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) { |
cl = span->sizeclass; |
Static::pageheap()->CacheSizeClass(p, cl); |
} |
+ |
+ ValidateAllocatedRegion(ptr, cl); |
+ |
if (cl != 0) { |
ASSERT(!Static::pageheap()->GetDescriptor(p)->sample); |
ThreadCache* heap = GetCacheIfPresent(); |
@@ -1162,7 +1227,7 @@ inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) { |
heap->Deallocate(ptr, cl); |
} else { |
// Delete directly into central cache |
- tcmalloc::SLL_SetNext(ptr, NULL); |
+ tcmalloc::FL_Init(ptr); |
Static::central_cache()[cl].InsertRange(ptr, ptr, 1); |
} |
} else { |
@@ -1211,6 +1276,7 @@ inline void* do_realloc_with_callback( |
void* old_ptr, size_t new_size, |
void (*invalid_free_fn)(void*), |
size_t (*invalid_get_size_fn)(void*)) { |
+ AddRoomForMark(&new_size); |
// Get the size of the old entry |
const size_t old_size = GetSizeWithCallback(old_ptr, invalid_get_size_fn); |
@@ -1229,6 +1295,7 @@ inline void* do_realloc_with_callback( |
if (new_size > old_size && new_size < lower_bound_to_grow) { |
new_ptr = do_malloc_or_cpp_alloc(lower_bound_to_grow); |
} |
+ ExcludeMarkFromSize(&new_size); // do_malloc will add space if needed. |
if (new_ptr == NULL) { |
// Either new_size is not a tiny increment, or last do_malloc failed. |
new_ptr = do_malloc_or_cpp_alloc(new_size); |
@@ -1247,6 +1314,7 @@ inline void* do_realloc_with_callback( |
} else { |
// We still need to call hooks to report the updated size: |
MallocHook::InvokeDeleteHook(old_ptr); |
+ ExcludeMarkFromSize(&new_size); |
MallocHook::InvokeNewHook(old_ptr, new_size); |
return old_ptr; |
} |
@@ -1267,6 +1335,8 @@ inline void* do_realloc(void* old_ptr, size_t new_size) { |
void* do_memalign(size_t align, size_t size) { |
ASSERT((align & (align - 1)) == 0); |
ASSERT(align > 0); |
+ // Marked in CheckMallocResult(), which is also inside SpanToMallocResult(). |
+ AddRoomForMark(&size); |
if (size + align < size) return NULL; // Overflow |
// Fall back to malloc if we would already align this memory access properly. |
@@ -1476,7 +1546,8 @@ void* cpp_memalign(size_t align, size_t size) { |
// As promised, the definition of this function, declared above. |
size_t TCMallocImplementation::GetAllocatedSize(void* ptr) { |
- return GetSizeWithCallback(ptr, &InvalidGetAllocatedSize); |
+ return ExcludeSpaceForMark( |
+ GetSizeWithCallback(ptr, &InvalidGetAllocatedSize)); |
} |
void TCMallocImplementation::MarkThreadBusy() { |
@@ -1693,6 +1764,174 @@ static void *MemalignOverride(size_t align, size_t size, const void *caller) |
MallocHook::InvokeNewHook(result, size); |
return result; |
} |
-void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride; |
- |
+void *(*__MALLOC_HOOK_VOLATILE __memalign_hook)(size_t, size_t, const void *) = MemalignOverride; |
#endif // TCMALLOC_USING_DEBUGALLOCATION |
+ |
+// ---Double free() debugging implementation ----------------------------------- |
+// We will put a mark at the extreme end of each allocation block. We make |
+// sure that we always allocate enough "extra memory" that we can fit in the |
+// mark, and still provide the requested usable region. If ever that mark is |
+// not as expected, then we know that the user is corrupting memory beyond their |
+// request size, or that they have called free a second time without having |
+// the memory allocated (again). This allows us to spot most double free()s, |
+// but some can "slip by" or confuse our logic if the caller reallocates memory |
+// (for a second use) before performing an evil double-free of a first |
+// allocation |
+ |
+// This code can be optimized, but for now, it is written to be most easily |
+// understood, and flexible (since it is evolving a bit). Potential |
+// optimizations include using other calculated data, such as class size, or |
+// allocation size, which is known in the code above, but then is recalculated |
+// below. Another potential optimization would be careful manual inlining of |
+// code, but I *think* that the compile will probably do this for me, and I've |
+// been careful to avoid aliasing issues that might make a compiler back-off. |
+ |
+// Evolution includes experimenting with different marks, to minimize the chance |
+// that a mark would be misunderstood (missed corruption). The marks are meant |
+// to be hashed encoding of the location, so that they can't be copied over a |
+// different region (by accident) without being detected (most of the time). |
+ |
+// Enable the following define to turn on all the TCMalloc checking. |
+// It will cost about 2% in performance, but it will catch double frees (most of |
+// the time), and will often catch allocated-buffer overrun errors. This |
+// validation is only active when TCMalloc is used as the allocator. |
+#ifndef NDEBUG |
+#define TCMALLOC_VALIDATION |
+#endif |
+ |
+#if !defined(TCMALLOC_VALIDATION) |
+ |
+static size_t ExcludeSpaceForMark(size_t size) { return size; } |
+static void AddRoomForMark(size_t* size) {} |
+static void ExcludeMarkFromSize(size_t* new_size) {} |
+static void MarkAllocatedRegion(void* ptr) {} |
+static void ValidateAllocatedRegion(void* ptr, size_t cl) {} |
+ |
+#else // TCMALLOC_VALIDATION |
+ |
+static void DieFromDoubleFree() { |
+ char* p = NULL; |
+ p++; |
+ *p += 1; // Segv. |
+} |
+ |
+static size_t DieFromBadFreePointer(void* unused) { |
+ char* p = NULL; |
+ p += 2; |
+ *p += 2; // Segv. |
+ return 0; |
+} |
+ |
+static void DieFromMemoryCorruption() { |
+ char* p = NULL; |
+ p += 3; |
+ *p += 3; // Segv. |
+} |
+ |
+// We can either do byte marking, or whole word marking based on the following |
+// define. char is as small as we can get, and word marking probably provides |
+// more than enough bits that we won't miss a corruption. Any sized integral |
+// type can be used, but we just define two examples. |
+ |
+// #define TCMALLOC_SMALL_VALIDATION |
+#if defined (TCMALLOC_SMALL_VALIDATION) |
+ |
+typedef char MarkType; // char saves memory... int is more complete. |
+static const MarkType kAllocationMarkMask = static_cast<MarkType>(0x36); |
+ |
+#else |
+ |
+typedef int MarkType; // char saves memory... int is more complete. |
+static const MarkType kAllocationMarkMask = static_cast<MarkType>(0xE1AB9536); |
+ |
+#endif |
+ |
+// TODO(jar): See if use of reference rather than pointer gets better inlining, |
+// or if macro is needed. My fear is that taking address map preclude register |
+// allocation :-(. |
+inline static void AddRoomForMark(size_t* size) { |
+ *size += sizeof(kAllocationMarkMask); |
+} |
+ |
+inline static void ExcludeMarkFromSize(size_t* new_size) { |
+ *new_size -= sizeof(kAllocationMarkMask); |
+} |
+ |
+inline static size_t ExcludeSpaceForMark(size_t size) { |
+ return size - sizeof(kAllocationMarkMask); // Lie about size when asked. |
+} |
+ |
+inline static MarkType* GetMarkLocation(void* ptr) { |
+ size_t class_size = GetSizeWithCallback(ptr, DieFromBadFreePointer); |
+ ASSERT(class_size % sizeof(kAllocationMarkMask) == 0); |
+ size_t last_index = (class_size / sizeof(kAllocationMarkMask)) - 1; |
+ return static_cast<MarkType*>(ptr) + last_index; |
+} |
+ |
+// We hash in the mark location plus the pointer so that we effectively mix in |
+// the size of the block. This means that if a span is used for different sizes |
+// that the mark will be different. It would be good to hash in the size (which |
+// we effectively get by using both mark location and pointer), but even better |
+// would be to also include the class, as it concisely contains the entropy |
+// found in the size (when we don't have large allocation), and there is less |
+// risk of losing those bits to truncation. It would probably be good to combine |
+// the high bits of size (capturing info about large blocks) with the class |
+// (which is a 6 bit number). |
+inline static MarkType GetMarkValue(void* ptr, MarkType* mark) { |
+ void* ptr2 = static_cast<void*>(mark); |
+ size_t offset1 = static_cast<char*>(ptr) - static_cast<char*>(NULL); |
+ size_t offset2 = static_cast<char*>(ptr2) - static_cast<char*>(NULL); |
+ static const int kInvariantBits = 2; |
+ ASSERT((offset1 >> kInvariantBits) << kInvariantBits == offset1); |
+ // Note: low bits of both offsets are invariants due to alignment. High bits |
+ // of both offsets are the same (unless we have a large allocation). Avoid |
+ // XORing high bits together, as they will cancel for most small allocations. |
+ |
+ MarkType ret = kAllocationMarkMask; |
+ // Using a little shift, we can safely XOR together both offsets. |
+ ret ^= static_cast<MarkType>(offset1 >> kInvariantBits) ^ |
+ static_cast<MarkType>(offset2); |
+ if (sizeof(ret) == 1) { |
+ // Try to bring some high level bits into the mix. |
+ ret += static_cast<MarkType>(offset1 >> 8) ^ |
+ static_cast<MarkType>(offset1 >> 16) ^ |
+ static_cast<MarkType>(offset1 >> 24) ; |
+ } |
+ // Hash in high bits on a 64 bit architecture. |
+ if (sizeof(size_t) == 8 && sizeof(ret) == 4) |
+ ret += offset1 >> 16; |
+ if (ret == 0) |
+ ret = kAllocationMarkMask; // Avoid common pattern of all zeros. |
+ return ret; |
+} |
+ |
+// TODO(jar): Use the passed in TCmalloc Class Index to calculate mark location |
+// faster. The current implementation calls general functions, which have to |
+// recalculate this in order to get the Class Size. This is a slow and wasteful |
+// recomputation... but it is much more readable this way (for now). |
+static void ValidateAllocatedRegion(void* ptr, size_t cl) { |
+ if (ptr == NULL) return; |
+ MarkType* mark = GetMarkLocation(ptr); |
+ MarkType allocated_mark = GetMarkValue(ptr, mark); |
+ MarkType current_mark = *mark; |
+ |
+ if (current_mark == ~allocated_mark) |
+ DieFromDoubleFree(); |
+ if (current_mark != allocated_mark) |
+ DieFromMemoryCorruption(); |
+#ifndef NDEBUG |
+ // In debug mode, copy the mark into all the free'd region. |
+ size_t class_size = static_cast<size_t>(reinterpret_cast<char*>(mark) - |
+ reinterpret_cast<char*>(ptr)); |
+ memset(ptr, static_cast<char>(0x36), class_size); |
+#endif |
+ *mark = ~allocated_mark; // Distinctively not allocated. |
+} |
+ |
+static void MarkAllocatedRegion(void* ptr) { |
+ if (ptr == NULL) return; |
+ MarkType* mark = GetMarkLocation(ptr); |
+ *mark = GetMarkValue(ptr, mark); |
+} |
+ |
+#endif // TCMALLOC_VALIDATION |