Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Unified Diff: third_party/tcmalloc/chromium/src/tcmalloc.cc

Issue 9323026: [NOT TO COMMIT!] r109: Diff of the current tcmalloc from the original google-perftools r109. (Closed) Base URL: http://git.chromium.org/git/chromium.git@trunk
Patch Set: Created 8 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/tcmalloc/chromium/src/tcmalloc.cc
diff --git a/third_party/tcmalloc/chromium/src/tcmalloc.cc b/third_party/tcmalloc/chromium/src/tcmalloc.cc
index 97c366c9f3ab419393a8df13fd046c13e186c7cf..51fe5b31ecd17639e6e58b0d1ec60a66c01cb333 100644
--- a/third_party/tcmalloc/chromium/src/tcmalloc.cc
+++ b/third_party/tcmalloc/chromium/src/tcmalloc.cc
@@ -122,8 +122,8 @@
#include "base/spinlock.h" // for SpinLockHolder
#include "central_freelist.h" // for CentralFreeListPadded
#include "common.h" // for StackTrace, kPageShift, etc
+#include "free_list.h" // for FL_Init
#include "internal_logging.h" // for ASSERT, TCMalloc_Printer, etc
-#include "linked_list.h" // for SLL_SetNext
#include "malloc_hook-inl.h" // for MallocHook::InvokeNewHook, etc
#include "page_heap.h" // for PageHeap, PageHeap::Stats
#include "page_heap_allocator.h" // for PageHeapAllocator
@@ -150,6 +150,13 @@
# define WIN32_DO_PATCHING 1
#endif
+// GLibc 2.14+ requires the hook functions be declared volatile, based on the
+// value of the define __MALLOC_HOOK_VOLATILE. For compatibility with
+// older/non-GLibc implementations, provide an empty definition.
+#if !defined(__MALLOC_HOOK_VOLATILE)
+#define __MALLOC_HOOK_VOLATILE
+#endif
+
using STL_NAMESPACE::max;
using STL_NAMESPACE::numeric_limits;
using STL_NAMESPACE::vector;
@@ -169,6 +176,14 @@ using tcmalloc::ThreadCache;
# define __THROW // __THROW is just an optimization, so ok to make it ""
#endif
+// ---- Double free debug declarations
+static size_t ExcludeSpaceForMark(size_t size);
+static void AddRoomForMark(size_t* size);
+static void ExcludeMarkFromSize(size_t* new_size);
+static void MarkAllocatedRegion(void* ptr);
+static void ValidateAllocatedRegion(void* ptr, size_t cl);
+// ---- End Double free debug declarations
+
DECLARE_int64(tcmalloc_sample_parameter);
DECLARE_double(tcmalloc_release_rate);
@@ -203,7 +218,6 @@ DEFINE_int64(tcmalloc_large_alloc_report_threshold,
// put all callers of MallocHook::Invoke* in this module into
// ATTRIBUTE_SECTION(google_malloc) section, so that
// MallocHook::GetCallerStackTrace can function accurately.
-#ifndef _WIN32 // windows doesn't have attribute_section, so don't bother
extern "C" {
void* tc_malloc(size_t size) __THROW
ATTRIBUTE_SECTION(google_malloc);
@@ -265,7 +279,6 @@ extern "C" {
size_t tc_malloc_size(void* p) __THROW
ATTRIBUTE_SECTION(google_malloc);
} // extern "C"
-#endif // #ifndef _WIN32
// Override the libc functions to prefer our own instead. This comes
// first so code in tcmalloc.cc can use the overridden versions. One
@@ -274,6 +287,10 @@ extern "C" {
// them. In that case, we don't want to do this overriding here.
#if !defined(WIN32_DO_PATCHING)
+// TODO(mbelshe): Turn off TCMalloc's symbols for libc. We do that
+// elsewhere.
+#ifndef _WIN32
+
#if defined(__GNUC__) && !defined(__MACH__)
// Potentially faster variants that use the gcc alias extension.
// FreeBSD does support aliases, but apparently not correctly. :-(
@@ -384,6 +401,35 @@ extern "C" {
} // extern "C"
#endif // ifdef __GLIBC__
+#if defined(__GLIBC__) && defined(HAVE_MALLOC_H)
+// If we're using glibc, then override glibc malloc hooks to make sure that even
+// if calls fall through to ptmalloc (due to dlopen() with RTLD_DEEPBIND or what
+// not), ptmalloc will use TCMalloc.
+
+static void* tc_ptmalloc_malloc_hook(size_t size, const void* caller) {
+ return tc_malloc(size);
+}
+
+void* (*__MALLOC_HOOK_VOLATILE __malloc_hook)(
+ size_t size, const void* caller) = tc_ptmalloc_malloc_hook;
+
+static void* tc_ptmalloc_realloc_hook(
+ void* ptr, size_t size, const void* caller) {
+ return tc_realloc(ptr, size);
+}
+
+void* (*__MALLOC_HOOK_VOLATILE __realloc_hook)(
+ void* ptr, size_t size, const void* caller) = tc_ptmalloc_realloc_hook;
+
+static void tc_ptmalloc_free_hook(void* ptr, const void* caller) {
+ tc_free(ptr);
+}
+
+void (*__MALLOC_HOOK_VOLATILE __free_hook)(void* ptr, const void* caller) = tc_ptmalloc_free_hook;
+
+#endif
+
+#endif // #ifndef _WIN32
#undef ALIAS
#endif // #ifndef(WIN32_DO_PATCHING)
@@ -466,6 +512,15 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
- stats.transfer_bytes
- stats.thread_bytes);
+ out->printf(
+ "WASTE: %7.1f MiB committed but not used\n"
+ "WASTE: %7.1f MiB bytes committed, %7.1f MiB bytes in use\n"
+ "WASTE: committed/used ratio of %f\n",
+ (stats.pageheap.committed_bytes - bytes_in_use_by_app) / MiB,
+ stats.pageheap.committed_bytes / MiB,
+ bytes_in_use_by_app / MiB,
+ stats.pageheap.committed_bytes / static_cast<double>(bytes_in_use_by_app)
+ );
#ifdef TCMALLOC_SMALL_BUT_SLOW
out->printf(
"NOTE: SMALL MEMORY MODEL IS IN USE, PERFORMANCE MAY SUFFER.\n");
@@ -473,6 +528,7 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
out->printf(
"------------------------------------------------\n"
"MALLOC: %12" PRIu64 " (%7.1f MiB) Bytes in use by application\n"
+ "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes committed\n"
"MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in page heap freelist\n"
"MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in central cache freelist\n"
"MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in transfer cache freelist\n"
@@ -493,6 +549,7 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
"Bytes released to the OS take up virtual address space"
" but no physical memory.\n",
bytes_in_use_by_app, bytes_in_use_by_app / MiB,
+ stats.pageheap.committed_bytes, stats.pageheap.committed_bytes / MiB,
stats.pageheap.free_bytes, stats.pageheap.free_bytes / MiB,
stats.central_bytes, stats.central_bytes / MiB,
stats.transfer_bytes, stats.transfer_bytes / MiB,
@@ -972,6 +1029,7 @@ static inline bool CheckCachedSizeClass(void *ptr) {
static inline void* CheckedMallocResult(void *result) {
ASSERT(result == NULL || CheckCachedSizeClass(result));
+ MarkAllocatedRegion(result);
return result;
}
@@ -1036,7 +1094,7 @@ static void ReportLargeAlloc(Length num_pages, void* result) {
inline void* cpp_alloc(size_t size, bool nothrow);
inline void* do_malloc(size_t size);
-// TODO(willchan): Investigate whether or not lining this much is harmful to
+// TODO(willchan): Investigate whether or not inlining this much is harmful to
// performance.
// This is equivalent to do_malloc() except when tc_new_mode is set to true.
// Otherwise, it will run the std::new_handler if set.
@@ -1091,6 +1149,8 @@ inline void* do_malloc_pages(ThreadCache* heap, size_t size) {
}
inline void* do_malloc(size_t size) {
+ AddRoomForMark(&size);
+
void* ret = NULL;
// The following call forces module initialization
@@ -1101,13 +1161,15 @@ inline void* do_malloc(size_t size) {
if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
ret = DoSampledAllocation(size);
+ MarkAllocatedRegion(ret);
} else {
- // The common case, and also the simplest. This just pops the
+ // The common case, and also the simplest. This just pops the
// size-appropriate freelist, after replenishing it if it's empty.
ret = CheckedMallocResult(heap->Allocate(size, cl));
}
} else {
ret = do_malloc_pages(heap, size);
+ MarkAllocatedRegion(ret);
}
if (ret == NULL) errno = ENOMEM;
return ret;
@@ -1155,6 +1217,9 @@ inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) {
cl = span->sizeclass;
Static::pageheap()->CacheSizeClass(p, cl);
}
+
+ ValidateAllocatedRegion(ptr, cl);
+
if (cl != 0) {
ASSERT(!Static::pageheap()->GetDescriptor(p)->sample);
ThreadCache* heap = GetCacheIfPresent();
@@ -1162,7 +1227,7 @@ inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) {
heap->Deallocate(ptr, cl);
} else {
// Delete directly into central cache
- tcmalloc::SLL_SetNext(ptr, NULL);
+ tcmalloc::FL_Init(ptr);
Static::central_cache()[cl].InsertRange(ptr, ptr, 1);
}
} else {
@@ -1211,6 +1276,7 @@ inline void* do_realloc_with_callback(
void* old_ptr, size_t new_size,
void (*invalid_free_fn)(void*),
size_t (*invalid_get_size_fn)(void*)) {
+ AddRoomForMark(&new_size);
// Get the size of the old entry
const size_t old_size = GetSizeWithCallback(old_ptr, invalid_get_size_fn);
@@ -1229,6 +1295,7 @@ inline void* do_realloc_with_callback(
if (new_size > old_size && new_size < lower_bound_to_grow) {
new_ptr = do_malloc_or_cpp_alloc(lower_bound_to_grow);
}
+ ExcludeMarkFromSize(&new_size); // do_malloc will add space if needed.
if (new_ptr == NULL) {
// Either new_size is not a tiny increment, or last do_malloc failed.
new_ptr = do_malloc_or_cpp_alloc(new_size);
@@ -1247,6 +1314,7 @@ inline void* do_realloc_with_callback(
} else {
// We still need to call hooks to report the updated size:
MallocHook::InvokeDeleteHook(old_ptr);
+ ExcludeMarkFromSize(&new_size);
MallocHook::InvokeNewHook(old_ptr, new_size);
return old_ptr;
}
@@ -1267,6 +1335,8 @@ inline void* do_realloc(void* old_ptr, size_t new_size) {
void* do_memalign(size_t align, size_t size) {
ASSERT((align & (align - 1)) == 0);
ASSERT(align > 0);
+ // Marked in CheckMallocResult(), which is also inside SpanToMallocResult().
+ AddRoomForMark(&size);
if (size + align < size) return NULL; // Overflow
// Fall back to malloc if we would already align this memory access properly.
@@ -1476,7 +1546,8 @@ void* cpp_memalign(size_t align, size_t size) {
// As promised, the definition of this function, declared above.
size_t TCMallocImplementation::GetAllocatedSize(void* ptr) {
- return GetSizeWithCallback(ptr, &InvalidGetAllocatedSize);
+ return ExcludeSpaceForMark(
+ GetSizeWithCallback(ptr, &InvalidGetAllocatedSize));
}
void TCMallocImplementation::MarkThreadBusy() {
@@ -1693,6 +1764,174 @@ static void *MemalignOverride(size_t align, size_t size, const void *caller)
MallocHook::InvokeNewHook(result, size);
return result;
}
-void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride;
-
+void *(*__MALLOC_HOOK_VOLATILE __memalign_hook)(size_t, size_t, const void *) = MemalignOverride;
#endif // TCMALLOC_USING_DEBUGALLOCATION
+
+// ---Double free() debugging implementation -----------------------------------
+// We will put a mark at the extreme end of each allocation block. We make
+// sure that we always allocate enough "extra memory" that we can fit in the
+// mark, and still provide the requested usable region. If ever that mark is
+// not as expected, then we know that the user is corrupting memory beyond their
+// request size, or that they have called free a second time without having
+// the memory allocated (again). This allows us to spot most double free()s,
+// but some can "slip by" or confuse our logic if the caller reallocates memory
+// (for a second use) before performing an evil double-free of a first
+// allocation
+
+// This code can be optimized, but for now, it is written to be most easily
+// understood, and flexible (since it is evolving a bit). Potential
+// optimizations include using other calculated data, such as class size, or
+// allocation size, which is known in the code above, but then is recalculated
+// below. Another potential optimization would be careful manual inlining of
+// code, but I *think* that the compile will probably do this for me, and I've
+// been careful to avoid aliasing issues that might make a compiler back-off.
+
+// Evolution includes experimenting with different marks, to minimize the chance
+// that a mark would be misunderstood (missed corruption). The marks are meant
+// to be hashed encoding of the location, so that they can't be copied over a
+// different region (by accident) without being detected (most of the time).
+
+// Enable the following define to turn on all the TCMalloc checking.
+// It will cost about 2% in performance, but it will catch double frees (most of
+// the time), and will often catch allocated-buffer overrun errors. This
+// validation is only active when TCMalloc is used as the allocator.
+#ifndef NDEBUG
+#define TCMALLOC_VALIDATION
+#endif
+
+#if !defined(TCMALLOC_VALIDATION)
+
+static size_t ExcludeSpaceForMark(size_t size) { return size; }
+static void AddRoomForMark(size_t* size) {}
+static void ExcludeMarkFromSize(size_t* new_size) {}
+static void MarkAllocatedRegion(void* ptr) {}
+static void ValidateAllocatedRegion(void* ptr, size_t cl) {}
+
+#else // TCMALLOC_VALIDATION
+
+static void DieFromDoubleFree() {
+ char* p = NULL;
+ p++;
+ *p += 1; // Segv.
+}
+
+static size_t DieFromBadFreePointer(void* unused) {
+ char* p = NULL;
+ p += 2;
+ *p += 2; // Segv.
+ return 0;
+}
+
+static void DieFromMemoryCorruption() {
+ char* p = NULL;
+ p += 3;
+ *p += 3; // Segv.
+}
+
+// We can either do byte marking, or whole word marking based on the following
+// define. char is as small as we can get, and word marking probably provides
+// more than enough bits that we won't miss a corruption. Any sized integral
+// type can be used, but we just define two examples.
+
+// #define TCMALLOC_SMALL_VALIDATION
+#if defined (TCMALLOC_SMALL_VALIDATION)
+
+typedef char MarkType; // char saves memory... int is more complete.
+static const MarkType kAllocationMarkMask = static_cast<MarkType>(0x36);
+
+#else
+
+typedef int MarkType; // char saves memory... int is more complete.
+static const MarkType kAllocationMarkMask = static_cast<MarkType>(0xE1AB9536);
+
+#endif
+
+// TODO(jar): See if use of reference rather than pointer gets better inlining,
+// or if macro is needed. My fear is that taking address map preclude register
+// allocation :-(.
+inline static void AddRoomForMark(size_t* size) {
+ *size += sizeof(kAllocationMarkMask);
+}
+
+inline static void ExcludeMarkFromSize(size_t* new_size) {
+ *new_size -= sizeof(kAllocationMarkMask);
+}
+
+inline static size_t ExcludeSpaceForMark(size_t size) {
+ return size - sizeof(kAllocationMarkMask); // Lie about size when asked.
+}
+
+inline static MarkType* GetMarkLocation(void* ptr) {
+ size_t class_size = GetSizeWithCallback(ptr, DieFromBadFreePointer);
+ ASSERT(class_size % sizeof(kAllocationMarkMask) == 0);
+ size_t last_index = (class_size / sizeof(kAllocationMarkMask)) - 1;
+ return static_cast<MarkType*>(ptr) + last_index;
+}
+
+// We hash in the mark location plus the pointer so that we effectively mix in
+// the size of the block. This means that if a span is used for different sizes
+// that the mark will be different. It would be good to hash in the size (which
+// we effectively get by using both mark location and pointer), but even better
+// would be to also include the class, as it concisely contains the entropy
+// found in the size (when we don't have large allocation), and there is less
+// risk of losing those bits to truncation. It would probably be good to combine
+// the high bits of size (capturing info about large blocks) with the class
+// (which is a 6 bit number).
+inline static MarkType GetMarkValue(void* ptr, MarkType* mark) {
+ void* ptr2 = static_cast<void*>(mark);
+ size_t offset1 = static_cast<char*>(ptr) - static_cast<char*>(NULL);
+ size_t offset2 = static_cast<char*>(ptr2) - static_cast<char*>(NULL);
+ static const int kInvariantBits = 2;
+ ASSERT((offset1 >> kInvariantBits) << kInvariantBits == offset1);
+ // Note: low bits of both offsets are invariants due to alignment. High bits
+ // of both offsets are the same (unless we have a large allocation). Avoid
+ // XORing high bits together, as they will cancel for most small allocations.
+
+ MarkType ret = kAllocationMarkMask;
+ // Using a little shift, we can safely XOR together both offsets.
+ ret ^= static_cast<MarkType>(offset1 >> kInvariantBits) ^
+ static_cast<MarkType>(offset2);
+ if (sizeof(ret) == 1) {
+ // Try to bring some high level bits into the mix.
+ ret += static_cast<MarkType>(offset1 >> 8) ^
+ static_cast<MarkType>(offset1 >> 16) ^
+ static_cast<MarkType>(offset1 >> 24) ;
+ }
+ // Hash in high bits on a 64 bit architecture.
+ if (sizeof(size_t) == 8 && sizeof(ret) == 4)
+ ret += offset1 >> 16;
+ if (ret == 0)
+ ret = kAllocationMarkMask; // Avoid common pattern of all zeros.
+ return ret;
+}
+
+// TODO(jar): Use the passed in TCmalloc Class Index to calculate mark location
+// faster. The current implementation calls general functions, which have to
+// recalculate this in order to get the Class Size. This is a slow and wasteful
+// recomputation... but it is much more readable this way (for now).
+static void ValidateAllocatedRegion(void* ptr, size_t cl) {
+ if (ptr == NULL) return;
+ MarkType* mark = GetMarkLocation(ptr);
+ MarkType allocated_mark = GetMarkValue(ptr, mark);
+ MarkType current_mark = *mark;
+
+ if (current_mark == ~allocated_mark)
+ DieFromDoubleFree();
+ if (current_mark != allocated_mark)
+ DieFromMemoryCorruption();
+#ifndef NDEBUG
+ // In debug mode, copy the mark into all the free'd region.
+ size_t class_size = static_cast<size_t>(reinterpret_cast<char*>(mark) -
+ reinterpret_cast<char*>(ptr));
+ memset(ptr, static_cast<char>(0x36), class_size);
+#endif
+ *mark = ~allocated_mark; // Distinctively not allocated.
+}
+
+static void MarkAllocatedRegion(void* ptr) {
+ if (ptr == NULL) return;
+ MarkType* mark = GetMarkLocation(ptr);
+ *mark = GetMarkValue(ptr, mark);
+}
+
+#endif // TCMALLOC_VALIDATION
« no previous file with comments | « third_party/tcmalloc/chromium/src/system-alloc.cc ('k') | third_party/tcmalloc/chromium/src/tests/debugallocation_test.sh » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698