From aad2bbdb6fb7c8217d7e75480b38e45f00cb3abd Mon Sep 17 00:00:00 2001 From: reduz Date: Thu, 8 Oct 2015 15:00:40 -0300 Subject: newline fixes --- drivers/nedmalloc/nedmalloc.cpp | 2934 +++++++++++++++++++-------------------- 1 file changed, 1467 insertions(+), 1467 deletions(-) (limited to 'drivers/nedmalloc/nedmalloc.cpp') diff --git a/drivers/nedmalloc/nedmalloc.cpp b/drivers/nedmalloc/nedmalloc.cpp index 8845d96549..9aac277a2a 100644 --- a/drivers/nedmalloc/nedmalloc.cpp +++ b/drivers/nedmalloc/nedmalloc.cpp @@ -1,1467 +1,1467 @@ -#ifdef NEDMALLOC_ENABLED -/* Alternative malloc implementation for multiple threads without -lock contention based on dlmalloc. (C) 2005-2009 Niall Douglas - -Boost Software License - Version 1.0 - August 17th, 2003 - -Permission is hereby granted, free of charge, to any person or organization -obtaining a copy of the software and accompanying documentation covered by -this license (the "Software") to use, reproduce, display, distribute, -execute, and transmit the Software, and to prepare derivative works of the -Software, and to permit third-parties to whom the Software is furnished to -do so, all subject to the following: - -The copyright notices in the Software and this entire statement, including -the above license grant, this restriction and the following disclaimer, -must be included in all copies of the Software, in whole or in part, and -all derivative works of the Software, unless such copies or derivative -works are solely in the form of machine-executable object code generated by -a source language processor. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT -SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE -FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. -*/ - -#ifdef _MSC_VER -/* Enable full aliasing on MSVC */ -/*#pragma optimize("a", on)*/ -#pragma warning(push) -#pragma warning(disable:4100) /* unreferenced formal parameter */ -#pragma warning(disable:4127) /* conditional expression is constant */ -#pragma warning(disable:4706) /* assignment within conditional expression */ -#endif - -/*#define ENABLE_TOLERANT_NEDMALLOC 1*/ -/*#define ENABLE_FAST_HEAP_DETECTION 1*/ -/*#define NEDMALLOC_DEBUG 1*/ - -/*#define FULLSANITYCHECKS*/ -/* If link time code generation is on, don't force or prevent inlining */ -#if defined(_MSC_VER) && defined(NEDMALLOC_DLL_EXPORTS) -#define FORCEINLINE -#define NOINLINE -#endif - - -#include "nedmalloc.h" -#ifdef WIN32 - #include - #include -#endif -#if USE_ALLOCATOR==1 - #define MSPACES 1 - #define ONLY_MSPACES 1 -#endif -#define USE_DL_PREFIX 1 -#ifndef USE_LOCKS - #define USE_LOCKS 1 -#endif -#define FOOTERS 1 /* Need to enable footers so frees lock the right mspace */ -#ifndef NEDMALLOC_DEBUG - #if defined(DEBUG) || defined(_DEBUG) - #define NEDMALLOC_DEBUG 1 - #else - #define NEDMALLOC_DEBUG 0 - #endif -#endif -/* We need to consistently define DEBUG=0|1, _DEBUG and NDEBUG for dlmalloc */ -#undef DEBUG -#undef _DEBUG -#if NEDMALLOC_DEBUG - #define _DEBUG - #define DEBUG 1 -#else - #define DEBUG 0 -#endif -#ifdef NDEBUG /* Disable assert checking on release builds */ - #undef DEBUG - #undef _DEBUG -#endif -/* The default of 64Kb means we spend too much time kernel-side */ -#ifndef DEFAULT_GRANULARITY -#define DEFAULT_GRANULARITY (1*1024*1024) -#if DEBUG -#define DEFAULT_GRANULARITY_ALIGNED -#endif -#endif -/*#define USE_SPIN_LOCKS 0*/ - - -#include "malloc.c.h" -#ifdef NDEBUG /* Disable assert checking on release builds */ - #undef DEBUG -#elif !NEDMALLOC_DEBUG - #ifdef __GNUC__ - #warning DEBUG is defined so allocator will run with assert checking! Define NDEBUG to run at full speed. - #elif defined(_MSC_VER) - #pragma message(__FILE__ ": WARNING: DEBUG is defined so allocator will run with assert checking! Define NDEBUG to run at full speed.") - #endif -#endif - -/* The maximum concurrent threads in a pool possible */ -#ifndef MAXTHREADSINPOOL -#define MAXTHREADSINPOOL 16 -#endif -/* The maximum number of threadcaches which can be allocated */ -#ifndef THREADCACHEMAXCACHES -#define THREADCACHEMAXCACHES 256 -#endif -/* The maximum size to be allocated from the thread cache */ -#ifndef THREADCACHEMAX -#define THREADCACHEMAX 8192 -#endif -#if 0 -/* The number of cache entries for finer grained bins. This is (topbitpos(THREADCACHEMAX)-4)*2 */ -#define THREADCACHEMAXBINS ((13-4)*2) -#else -/* The number of cache entries. This is (topbitpos(THREADCACHEMAX)-4) */ -#define THREADCACHEMAXBINS (13-4) -#endif -/* Point at which the free space in a thread cache is garbage collected */ -#ifndef THREADCACHEMAXFREESPACE -#define THREADCACHEMAXFREESPACE (512*1024) -#endif - - -#ifdef WIN32 - #define TLSVAR DWORD - #define TLSALLOC(k) (*(k)=TlsAlloc(), TLS_OUT_OF_INDEXES==*(k)) - #define TLSFREE(k) (!TlsFree(k)) - #define TLSGET(k) TlsGetValue(k) - #define TLSSET(k, a) (!TlsSetValue(k, a)) - #ifdef DEBUG -static LPVOID ChkedTlsGetValue(DWORD idx) -{ - LPVOID ret=TlsGetValue(idx); - assert(S_OK==GetLastError()); - return ret; -} - #undef TLSGET - #define TLSGET(k) ChkedTlsGetValue(k) - #endif -#else - #define TLSVAR pthread_key_t - #define TLSALLOC(k) pthread_key_create(k, 0) - #define TLSFREE(k) pthread_key_delete(k) - #define TLSGET(k) pthread_getspecific(k) - #define TLSSET(k, a) pthread_setspecific(k, a) -#endif - -#if defined(__cplusplus) -#if !defined(NO_NED_NAMESPACE) -namespace nedalloc { -#else -extern "C" { -#endif -#endif - -#if USE_ALLOCATOR==0 -static void *unsupported_operation(const char *opname) THROWSPEC -{ - fprintf(stderr, "nedmalloc: The operation %s is not supported under this build configuration\n", opname); - abort(); - return 0; -} -static size_t mspacecounter=(size_t) 0xdeadbeef; -#endif -#ifndef ENABLE_FAST_HEAP_DETECTION -static void *RESTRICT leastusedaddress; -static size_t largestusedblock; -#endif - -static FORCEINLINE void *CallMalloc(void *RESTRICT mspace, size_t size, size_t alignment) THROWSPEC -{ - void *RESTRICT ret=0; - size_t _alignment=alignment; -#if USE_MAGIC_HEADERS - size_t *_ret=0; - size+=alignment+3*sizeof(size_t); - _alignment=0; -#endif -#if USE_ALLOCATOR==0 - ret=_alignment ? -#ifdef _MSC_VER - /* This is the MSVCRT equivalent */ - _aligned_malloc(size, _alignment) -#elif defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__) - /* This is the glibc/ptmalloc2/dlmalloc/BSD libc equivalent. */ - memalign(_alignment, size) -#else -#error Cannot aligned allocate with the memory allocator of an unknown system! -#endif - : malloc(size); -#elif USE_ALLOCATOR==1 - ret=_alignment ? mspace_memalign((mstate) mspace, _alignment, size) : mspace_malloc((mstate) mspace, size); -#ifndef ENABLE_FAST_HEAP_DETECTION - if(ret) - { - size_t truesize=chunksize(mem2chunk(ret)); - if(!leastusedaddress || (void *)((mstate) mspace)->least_addrleast_addr; - if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1); - } -#endif -#endif - if(!ret) return 0; -#if USE_MAGIC_HEADERS - _ret=(size_t *) ret; - ret=(void *)(_ret+3); - if(alignment) ret=(void *)(((size_t) ret+alignment-1)&~(alignment-1)); - for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *)"NEDMALOC"; - _ret[0]=(size_t) mspace; - _ret[1]=size-3*sizeof(size_t); -#endif - return ret; -} - -static FORCEINLINE void *CallCalloc(void *RESTRICT mspace, size_t size, size_t alignment) THROWSPEC -{ - void *RESTRICT ret=0; -#if USE_MAGIC_HEADERS - size_t *_ret=0; - size+=alignment+3*sizeof(size_t); -#endif -#if USE_ALLOCATOR==0 - ret=calloc(1, size); -#elif USE_ALLOCATOR==1 - ret=mspace_calloc((mstate) mspace, 1, size); -#ifndef ENABLE_FAST_HEAP_DETECTION - if(ret) - { - size_t truesize=chunksize(mem2chunk(ret)); - if(!leastusedaddress || (void *)((mstate) mspace)->least_addrleast_addr; - if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1); - } -#endif -#endif - if(!ret) return 0; -#if USE_MAGIC_HEADERS - _ret=(size_t *) ret; - ret=(void *)(_ret+3); - if(alignment) ret=(void *)(((size_t) ret+alignment-1)&~(alignment-1)); - for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *) "NEDMALOC"; - _ret[0]=(size_t) mspace; - _ret[1]=size-3*sizeof(size_t); -#endif - return ret; -} - -static FORCEINLINE void *CallRealloc(void *RESTRICT mspace, void *RESTRICT mem, int isforeign, size_t oldsize, size_t newsize) THROWSPEC -{ - void *RESTRICT ret=0; -#if USE_MAGIC_HEADERS - mstate oldmspace=0; - size_t *_ret=0, *_mem=(size_t *) mem-3; -#endif - if(isforeign) - { /* Transfer */ -#if USE_MAGIC_HEADERS - assert(_mem[0]!=*(size_t *) "NEDMALOC"); -#endif - if((ret=CallMalloc(mspace, newsize, 0))) - { -#if defined(DEBUG) - printf("*** nedmalloc frees system allocated block %p\n", mem); -#endif - memcpy(ret, mem, oldsize=_mem[2]); - for(; *_mem==*(size_t *) "NEDMALOC"; *_mem--=*(size_t *) "nedmaloc"); - mem=(void *)(++_mem); -#endif -#if USE_ALLOCATOR==0 - ret=realloc(mem, newsize); -#elif USE_ALLOCATOR==1 - ret=mspace_realloc((mstate) mspace, mem, newsize); -#ifndef ENABLE_FAST_HEAP_DETECTION - if(ret) - { - size_t truesize=chunksize(mem2chunk(ret)); - if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1); - } -#endif -#endif - if(!ret) - { /* Put it back the way it was */ -#if USE_MAGIC_HEADERS - for(; *_mem==0; *_mem++=*(size_t *) "NEDMALOC"); -#endif - return 0; - } -#if USE_MAGIC_HEADERS - _ret=(size_t *) ret; - ret=(void *)(_ret+3); - for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *) "NEDMALOC"; - _ret[0]=(size_t) mspace; - _ret[1]=newsize-3*sizeof(size_t); -#endif - return ret; -} - -static FORCEINLINE void CallFree(void *RESTRICT mspace, void *RESTRICT mem, int isforeign) THROWSPEC -{ -#if USE_MAGIC_HEADERS - mstate oldmspace=0; - size_t *_mem=(size_t *) mem-3, oldsize=0; -#endif - if(isforeign) - { -#if USE_MAGIC_HEADERS - assert(_mem[0]!=*(size_t *) "NEDMALOC"); -#endif -#if defined(DEBUG) - printf("*** nedmalloc frees system allocated block %p\n", mem); -#endif - free(mem); - return; - } -#if USE_MAGIC_HEADERS - assert(_mem[0]==*(size_t *) "NEDMALOC"); - oldmspace=(mstate) _mem[1]; - oldsize=_mem[2]; - for(; *_mem==*(size_t *) "NEDMALOC"; *_mem--=*(size_t *) "nedmaloc"); - mem=(void *)(++_mem); -#endif -#if USE_ALLOCATOR==0 - free(mem); -#elif USE_ALLOCATOR==1 - mspace_free((mstate) mspace, mem); -#endif -} - -static NEDMALLOCNOALIASATTR mstate nedblkmstate(void *RESTRICT mem) THROWSPEC -{ - if(mem) - { -#if USE_MAGIC_HEADERS - size_t *_mem=(size_t *) mem-3; - if(_mem[0]==*(size_t *) "NEDMALOC") - { - return (mstate) _mem[1]; - } - else return 0; -#else -#if USE_ALLOCATOR==0 - /* Fail everything */ - return 0; -#elif USE_ALLOCATOR==1 -#ifdef ENABLE_FAST_HEAP_DETECTION -#ifdef WIN32 - /* On Windows for RELEASE both x86 and x64 the NT heap precedes each block with an eight byte header - which looks like: - normal: 4 bytes of size, 4 bytes of [char < 64, char < 64, char < 64 bit 0 always set, char random ] - mmaped: 4 bytes of size 4 bytes of [zero, zero, 0xb, zero ] - - On Windows for DEBUG both x86 and x64 the preceding four bytes is always 0xfdfdfdfd (no man's land). - */ -#pragma pack(push, 1) - struct _HEAP_ENTRY - { - USHORT Size; - USHORT PreviousSize; - UCHAR Cookie; /* SegmentIndex */ - UCHAR Flags; /* always bit 0 (HEAP_ENTRY_BUSY). bit 1=(HEAP_ENTRY_EXTRA_PRESENT), bit 2=normal block (HEAP_ENTRY_FILL_PATTERN), bit 3=mmap block (HEAP_ENTRY_VIRTUAL_ALLOC). Bit 4 (HEAP_ENTRY_LAST_ENTRY) could be set */ - UCHAR UnusedBytes; - UCHAR SmallTagIndex; /* fastbin index. Always one of 0x02, 0x03, 0x04 < 0x80 */ - } *RESTRICT he=((struct _HEAP_ENTRY *) mem)-1; -#pragma pack(pop) - unsigned int header=((unsigned int *)mem)[-1], mask1=0x8080E100, result1, mask2=0xFFFFFF06, result2; - result1=header & mask1; /* Positive testing for NT heap */ - result2=header & mask2; /* Positive testing for dlmalloc */ - if(result1==0x00000100 && result2!=0x00000102) - { /* This is likely a NT heap block */ - return 0; - } -#endif -#ifdef __linux__ - /* On Linux glibc uses ptmalloc2 (really dlmalloc) just as we do, but prev_foot contains rubbish - when the preceding block is allocated because ptmalloc2 finds the local mstate by rounding the ptr - down to the nearest megabyte. It's like dlmalloc with FOOTERS disabled. */ - mchunkptr p=mem2chunk(mem); - mstate fm=get_mstate_for(p); - /* If it's a ptmalloc2 block, fm is likely to be some crazy value */ - if(!is_aligned(fm)) return 0; - if((size_t)mem-(size_t)fm>=(size_t)1<<(SIZE_T_BITSIZE-1)) return 0; - if(ok_magic(fm)) - return fm; - else - return 0; - if(1) { } -#endif - else - { - mchunkptr p=mem2chunk(mem); - mstate fm=get_mstate_for(p); - assert(ok_magic(fm)); /* If this fails, someone tried to free a block twice */ - if(ok_magic(fm)) - return fm; - } -#else -//#ifdef WIN32 -// __try -//#endif - { - /* We try to return zero here if it isn't one of our own blocks, however - the current block annotation scheme used by dlmalloc makes it impossible - to be absolutely sure of avoiding a segfault. - - mchunkptr->prev_foot = mem-(2*size_t) = mstate ^ mparams.magic for PRECEDING block; - mchunkptr->head = mem-(1*size_t) = 8 multiple size of this block with bottom three bits = FLAG_BITS - FLAG_BITS = bit 0 is CINUSE (currently in use unless is mmap), bit 1 is PINUSE (previous block currently - in use unless mmap), bit 2 is UNUSED and currently is always zero. - */ - register void *RESTRICT leastusedaddress_=leastusedaddress; /* Cache these to avoid register reloading */ - register size_t largestusedblock_=largestusedblock; - if(!is_aligned(mem)) return 0; /* Would fail very rarely as all allocators return aligned blocks */ - if(memhead & FLAG4_BIT)) return 0; - /* Reduced uncertainty by 0.5^2 = 25.0% */ - /* size should never exceed largestusedblock */ - if(chunksize(p)>largestusedblock_) return 0; - /* Reduced uncertainty by a minimum of 0.5^3 = 12.5%, maximum 0.5^16 = 0.0015% */ - /* Having sanity checked prev_foot and head, check next block */ - if(!ismmapped && (!next_pinuse(p) || (next_chunk(p)->head & FLAG4_BIT))) return 0; - /* Reduced uncertainty by 0.5^5 = 3.13% or 0.5^18 = 0.00038% */ - #if 0 - /* If previous block is free, check that its next block pointer equals us */ - if(!ismmapped && !pinuse(p)) - if(next_chunk(prev_chunk(p))!=p) return 0; - /* We could start comparing prev_foot's for similarity but it starts getting slow. */ - #endif - fm = get_mstate_for(p); - if(!is_aligned(fm) || (void *)fm=(size_t)1<<(SIZE_T_BITSIZE-1)) return 0; - assert(ok_magic(fm)); /* If this fails, someone tried to free a block twice */ - if(ok_magic(fm)) - return fm; - } - } -//#ifdef WIN32 -// __except(1) { } -//#endif -#endif -#endif -#endif - } - return 0; -} -NEDMALLOCNOALIASATTR size_t nedblksize(int *RESTRICT isforeign, void *RESTRICT mem) THROWSPEC -{ - if(mem) - { - if(isforeign) *isforeign=1; -#if USE_MAGIC_HEADERS - { - size_t *_mem=(size_t *) mem-3; - if(_mem[0]==*(size_t *) "NEDMALOC") - { - mstate mspace=(mstate) _mem[1]; - size_t size=_mem[2]; - if(isforeign) *isforeign=0; - return size; - } - } -#elif USE_ALLOCATOR==1 - if(nedblkmstate(mem)) - { - mchunkptr p=mem2chunk(mem); - if(isforeign) *isforeign=0; - return chunksize(p)-overhead_for(p); - } -#ifdef DEBUG - else - { - int a=1; /* Set breakpoints here if needed */ - } -#endif -#endif -#if defined(ENABLE_TOLERANT_NEDMALLOC) || USE_ALLOCATOR==0 -#ifdef _MSC_VER - /* This is the MSVCRT equivalent */ - return _msize(mem); -#elif defined(__linux__) - /* This is the glibc/ptmalloc2/dlmalloc equivalent. */ - return malloc_usable_size(mem); -#elif defined(__FreeBSD__) || defined(__APPLE__) - /* This is the BSD libc equivalent. */ - return malloc_size(mem); -#else -#error Cannot tolerate the memory allocator of an unknown system! -#endif -#endif - } - return 0; -} - -NEDMALLOCNOALIASATTR void nedsetvalue(void *v) THROWSPEC { nedpsetvalue((nedpool *) 0, v); } -NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmalloc(size_t size) THROWSPEC { return nedpmalloc((nedpool *) 0, size); } -NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedcalloc(size_t no, size_t size) THROWSPEC { return nedpcalloc((nedpool *) 0, no, size); } -NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedrealloc(void *mem, size_t size) THROWSPEC { return nedprealloc((nedpool *) 0, mem, size); } -NEDMALLOCNOALIASATTR void nedfree(void *mem) THROWSPEC { nedpfree((nedpool *) 0, mem); } -NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC { return nedpmemalign((nedpool *) 0, alignment, bytes); } -NEDMALLOCNOALIASATTR struct nedmallinfo nedmallinfo(void) THROWSPEC { return nedpmallinfo((nedpool *) 0); } -NEDMALLOCNOALIASATTR int nedmallopt(int parno, int value) THROWSPEC { return nedpmallopt((nedpool *) 0, parno, value); } -NEDMALLOCNOALIASATTR int nedmalloc_trim(size_t pad) THROWSPEC { return nedpmalloc_trim((nedpool *) 0, pad); } -void nedmalloc_stats() THROWSPEC { nedpmalloc_stats((nedpool *) 0); } -NEDMALLOCNOALIASATTR size_t nedmalloc_footprint() THROWSPEC { return nedpmalloc_footprint((nedpool *) 0); } -NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC { return nedpindependent_calloc((nedpool *) 0, elemsno, elemsize, chunks); } -NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC { return nedpindependent_comalloc((nedpool *) 0, elems, sizes, chunks); } - -struct threadcacheblk_t; -typedef struct threadcacheblk_t threadcacheblk; -struct threadcacheblk_t -{ /* Keep less than 16 bytes on 32 bit systems and 32 bytes on 64 bit systems */ -#ifdef FULLSANITYCHECKS - unsigned int magic; -#endif - unsigned int lastUsed, size; - threadcacheblk *next, *prev; -}; -typedef struct threadcache_t -{ -#ifdef FULLSANITYCHECKS - unsigned int magic1; -#endif - int mymspace; /* Last mspace entry this thread used */ - long threadid; - unsigned int mallocs, frees, successes; - size_t freeInCache; /* How much free space is stored in this cache */ - threadcacheblk *bins[(THREADCACHEMAXBINS+1)*2]; -#ifdef FULLSANITYCHECKS - unsigned int magic2; -#endif -} threadcache; -struct nedpool_t -{ - MLOCK_T mutex; - void *uservalue; - int threads; /* Max entries in m to use */ - threadcache *caches[THREADCACHEMAXCACHES]; - TLSVAR mycache; /* Thread cache for this thread. 0 for unset, negative for use mspace-1 directly, otherwise is cache-1 */ - mstate m[MAXTHREADSINPOOL+1]; /* mspace entries for this pool */ -}; -static nedpool syspool; - -static FORCEINLINE NEDMALLOCNOALIASATTR unsigned int size2binidx(size_t _size) THROWSPEC -{ /* 8=1000 16=10000 20=10100 24=11000 32=100000 48=110000 4096=1000000000000 */ - unsigned int topbit, size=(unsigned int)(_size>>4); - /* 16=1 20=1 24=1 32=10 48=11 64=100 96=110 128=1000 4096=100000000 */ - -#if defined(__GNUC__) - topbit = sizeof(size)*__CHAR_BIT__ - 1 - __builtin_clz(size); -#elif defined(_MSC_VER) && _MSC_VER>=1300 - { - unsigned long bsrTopBit; - - _BitScanReverse(&bsrTopBit, size); - - topbit = bsrTopBit; - } -#else -#if 0 - union { - unsigned asInt[2]; - double asDouble; - }; - int n; - - asDouble = (double)size + 0.5; - topbit = (asInt[!FOX_BIGENDIAN] >> 20) - 1023; -#else - { - unsigned int x=size; - x = x | (x >> 1); - x = x | (x >> 2); - x = x | (x >> 4); - x = x | (x >> 8); - x = x | (x >>16); - x = ~x; - x = x - ((x >> 1) & 0x55555555); - x = (x & 0x33333333) + ((x >> 2) & 0x33333333); - x = (x + (x >> 4)) & 0x0F0F0F0F; - x = x + (x << 8); - x = x + (x << 16); - topbit=31 - (x >> 24); - } -#endif -#endif - return topbit; -} - - -#ifdef FULLSANITYCHECKS -static void tcsanitycheck(threadcacheblk **ptr) THROWSPEC -{ - assert((ptr[0] && ptr[1]) || (!ptr[0] && !ptr[1])); - if(ptr[0] && ptr[1]) - { - assert(nedblksize(ptr[0])>=sizeof(threadcacheblk)); - assert(nedblksize(ptr[1])>=sizeof(threadcacheblk)); - assert(*(unsigned int *) "NEDN"==ptr[0]->magic); - assert(*(unsigned int *) "NEDN"==ptr[1]->magic); - assert(!ptr[0]->prev); - assert(!ptr[1]->next); - if(ptr[0]==ptr[1]) - { - assert(!ptr[0]->next); - assert(!ptr[1]->prev); - } - } -} -static void tcfullsanitycheck(threadcache *tc) THROWSPEC -{ - threadcacheblk **tcbptr=tc->bins; - int n; - for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2) - { - threadcacheblk *b, *ob=0; - tcsanitycheck(tcbptr); - for(b=tcbptr[0]; b; ob=b, b=b->next) - { - assert(*(unsigned int *) "NEDN"==b->magic); - assert(!ob || ob->next==b); - assert(!ob || b->prev==ob); - } - } -} -#endif - -static NOINLINE void RemoveCacheEntries(nedpool *RESTRICT p, threadcache *RESTRICT tc, unsigned int age) THROWSPEC -{ -#ifdef FULLSANITYCHECKS - tcfullsanitycheck(tc); -#endif - if(tc->freeInCache) - { - threadcacheblk **tcbptr=tc->bins; - int n; - for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2) - { - threadcacheblk **tcb=tcbptr+1; /* come from oldest end of list */ - /*tcsanitycheck(tcbptr);*/ - for(; *tcb && tc->frees-(*tcb)->lastUsed>=age; ) - { - threadcacheblk *f=*tcb; - size_t blksize=f->size; /*nedblksize(f);*/ - assert(blksize<=nedblksize(0, f)); - assert(blksize); -#ifdef FULLSANITYCHECKS - assert(*(unsigned int *) "NEDN"==(*tcb)->magic); -#endif - *tcb=(*tcb)->prev; - if(*tcb) - (*tcb)->next=0; - else - *tcbptr=0; - tc->freeInCache-=blksize; - assert((long) tc->freeInCache>=0); - CallFree(0, f, 0); - /*tcsanitycheck(tcbptr);*/ - } - } - } -#ifdef FULLSANITYCHECKS - tcfullsanitycheck(tc); -#endif -} -static void DestroyCaches(nedpool *RESTRICT p) THROWSPEC -{ - if(p->caches) - { - threadcache *tc; - int n; - for(n=0; ncaches[n])) - { - tc->frees++; - RemoveCacheEntries(p, tc, 0); - assert(!tc->freeInCache); - tc->mymspace=-1; - tc->threadid=0; - CallFree(0, tc, 0); - p->caches[n]=0; - } - } - } -} - -static NOINLINE threadcache *AllocCache(nedpool *RESTRICT p) THROWSPEC -{ - threadcache *tc=0; - int n, end; - ACQUIRE_LOCK(&p->mutex); - for(n=0; ncaches[n]; n++); - if(THREADCACHEMAXCACHES==n) - { /* List exhausted, so disable for this thread */ - RELEASE_LOCK(&p->mutex); - return 0; - } - tc=p->caches[n]=(threadcache *) CallCalloc(p->m[0], sizeof(threadcache), 0); - if(!tc) - { - RELEASE_LOCK(&p->mutex); - return 0; - } -#ifdef FULLSANITYCHECKS - tc->magic1=*(unsigned int *)"NEDMALC1"; - tc->magic2=*(unsigned int *)"NEDMALC2"; -#endif - tc->threadid=(long)(size_t)CURRENT_THREAD; - for(end=0; p->m[end]; end++); - tc->mymspace=abs(tc->threadid) % end; - RELEASE_LOCK(&p->mutex); - if(TLSSET(p->mycache, (void *)(size_t)(n+1))) abort(); - return tc; -} - -static void *threadcache_malloc(nedpool *RESTRICT p, threadcache *RESTRICT tc, size_t *RESTRICT _size) THROWSPEC -{ - void *RESTRICT ret=0; - size_t size=*_size, blksize=0; - unsigned int bestsize; - unsigned int idx=size2binidx(size); - threadcacheblk *RESTRICT blk, **RESTRICT binsptr; -#ifdef FULLSANITYCHECKS - tcfullsanitycheck(tc); -#endif - /* Calculate best fit bin size */ - bestsize=1<<(idx+4); -#if 0 - /* Finer grained bin fit */ - idx<<=1; - if(size>bestsize) - { - idx++; - bestsize+=bestsize>>1; - } - if(size>bestsize) - { - idx++; - bestsize=1<<(4+(idx>>1)); - } -#else - if(size>bestsize) - { - idx++; - bestsize<<=1; - } -#endif - assert(bestsize>=size); - if(sizebins[idx*2]; - /* Try to match close, but move up a bin if necessary */ - blk=*binsptr; - if(!blk || blk->sizesize; /*nedblksize(blk);*/ - assert(nedblksize(0, blk)>=blksize); - assert(blksize>=size); - if(blk->next) - blk->next->prev=0; - *binsptr=blk->next; - if(!*binsptr) - binsptr[1]=0; -#ifdef FULLSANITYCHECKS - blk->magic=0; -#endif - assert(binsptr[0]!=blk && binsptr[1]!=blk); - assert(nedblksize(0, blk)>=sizeof(threadcacheblk) && nedblksize(0, blk)<=THREADCACHEMAX+CHUNK_OVERHEAD); - /*printf("malloc: %p, %p, %p, %lu\n", p, tc, blk, (long) _size);*/ - ret=(void *) blk; - } - ++tc->mallocs; - if(ret) - { - assert(blksize>=size); - ++tc->successes; - tc->freeInCache-=blksize; - assert((long) tc->freeInCache>=0); - } -#if defined(DEBUG) && 0 - if(!(tc->mallocs & 0xfff)) - { - printf("*** threadcache=%u, mallocs=%u (%f), free=%u (%f), freeInCache=%u\n", (unsigned int) tc->threadid, tc->mallocs, - (float) tc->successes/tc->mallocs, tc->frees, (float) tc->successes/tc->frees, (unsigned int) tc->freeInCache); - } -#endif -#ifdef FULLSANITYCHECKS - tcfullsanitycheck(tc); -#endif - *_size=size; - return ret; -} -static NOINLINE void ReleaseFreeInCache(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace) THROWSPEC -{ - unsigned int age=THREADCACHEMAXFREESPACE/8192; - /*ACQUIRE_LOCK(&p->m[mymspace]->mutex);*/ - while(age && tc->freeInCache>=THREADCACHEMAXFREESPACE) - { - RemoveCacheEntries(p, tc, age); - /*printf("*** Removing cache entries older than %u (%u)\n", age, (unsigned int) tc->freeInCache);*/ - age>>=1; - } - /*RELEASE_LOCK(&p->m[mymspace]->mutex);*/ -} -static void threadcache_free(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace, void *RESTRICT mem, size_t size) THROWSPEC -{ - unsigned int bestsize; - unsigned int idx=size2binidx(size); - threadcacheblk **RESTRICT binsptr, *RESTRICT tck=(threadcacheblk *) mem; - assert(size>=sizeof(threadcacheblk) && size<=THREADCACHEMAX+CHUNK_OVERHEAD); -#ifdef DEBUG - /* Make sure this is a valid memory block */ - assert(nedblksize(0, mem)); -#endif -#ifdef FULLSANITYCHECKS - tcfullsanitycheck(tc); -#endif - /* Calculate best fit bin size */ - bestsize=1<<(idx+4); -#if 0 - /* Finer grained bin fit */ - idx<<=1; - if(size>bestsize) - { - unsigned int biggerbestsize=bestsize+bestsize<<1; - if(size>=biggerbestsize) - { - idx++; - bestsize=biggerbestsize; - } - } -#endif - if(bestsize!=size) /* dlmalloc can round up, so we round down to preserve indexing */ - size=bestsize; - binsptr=&tc->bins[idx*2]; - assert(idx<=THREADCACHEMAXBINS); - if(tck==*binsptr) - { - fprintf(stderr, "nedmalloc: Attempt to free already freed memory block %p - aborting!\n", tck); - abort(); - } -#ifdef FULLSANITYCHECKS - tck->magic=*(unsigned int *) "NEDN"; -#endif - tck->lastUsed=++tc->frees; - tck->size=(unsigned int) size; - tck->next=*binsptr; - tck->prev=0; - if(tck->next) - tck->next->prev=tck; - else - binsptr[1]=tck; - assert(!*binsptr || (*binsptr)->size==tck->size); - *binsptr=tck; - assert(tck==tc->bins[idx*2]); - assert(tc->bins[idx*2+1]==tck || binsptr[0]->next->prev==tck); - /*printf("free: %p, %p, %p, %lu\n", p, tc, mem, (long) size);*/ - tc->freeInCache+=size; -#ifdef FULLSANITYCHECKS - tcfullsanitycheck(tc); -#endif -#if 1 - if(tc->freeInCache>=THREADCACHEMAXFREESPACE) - ReleaseFreeInCache(p, tc, mymspace); -#endif -} - - - - -static NOINLINE int InitPool(nedpool *RESTRICT p, size_t capacity, int threads) THROWSPEC -{ /* threads is -1 for system pool */ - ensure_initialization(); - ACQUIRE_MALLOC_GLOBAL_LOCK(); - if(p->threads) goto done; - if(INITIAL_LOCK(&p->mutex)) goto err; - if(TLSALLOC(&p->mycache)) goto err; -#if USE_ALLOCATOR==0 - p->m[0]=(mstate) mspacecounter++; -#elif USE_ALLOCATOR==1 - if(!(p->m[0]=(mstate) create_mspace(capacity, 1))) goto err; - p->m[0]->extp=p; -#endif - p->threads=(threads<1 || threads>MAXTHREADSINPOOL) ? MAXTHREADSINPOOL : threads; -done: - RELEASE_MALLOC_GLOBAL_LOCK(); - return 1; -err: - if(threads<0) - abort(); /* If you can't allocate for system pool, we're screwed */ - DestroyCaches(p); - if(p->m[0]) - { -#if USE_ALLOCATOR==1 - destroy_mspace(p->m[0]); -#endif - p->m[0]=0; - } - if(p->mycache) - { - if(TLSFREE(p->mycache)) abort(); - p->mycache=0; - } - RELEASE_MALLOC_GLOBAL_LOCK(); - return 0; -} -static NOINLINE mstate FindMSpace(nedpool *RESTRICT p, threadcache *RESTRICT tc, int *RESTRICT lastUsed, size_t size) THROWSPEC -{ /* Gets called when thread's last used mspace is in use. The strategy - is to run through the list of all available mspaces looking for an - unlocked one and if we fail, we create a new one so long as we don't - exceed p->threads */ - int n, end; - for(n=end=*lastUsed+1; p->m[n]; end=++n) - { - if(TRY_LOCK(&p->m[n]->mutex)) goto found; - } - for(n=0; n<*lastUsed && p->m[n]; n++) - { - if(TRY_LOCK(&p->m[n]->mutex)) goto found; - } - if(endthreads) - { - mstate temp; -#if USE_ALLOCATOR==0 - temp=(mstate) mspacecounter++; -#elif USE_ALLOCATOR==1 - if(!(temp=(mstate) create_mspace(size, 1))) - goto badexit; -#endif - /* Now we're ready to modify the lists, we lock */ - ACQUIRE_LOCK(&p->mutex); - while(p->m[end] && endthreads) - end++; - if(end>=p->threads) - { /* Drat, must destroy it now */ - RELEASE_LOCK(&p->mutex); -#if USE_ALLOCATOR==1 - destroy_mspace((mstate) temp); -#endif - goto badexit; - } - /* We really want to make sure this goes into memory now but we - have to be careful of breaking aliasing rules, so write it twice */ - *((volatile struct malloc_state **) &p->m[end])=p->m[end]=temp; - ACQUIRE_LOCK(&p->m[end]->mutex); - /*printf("Created mspace idx %d\n", end);*/ - RELEASE_LOCK(&p->mutex); - n=end; - goto found; - } - /* Let it lock on the last one it used */ -badexit: - ACQUIRE_LOCK(&p->m[*lastUsed]->mutex); - return p->m[*lastUsed]; -found: - *lastUsed=n; - if(tc) - tc->mymspace=n; - else - { - if(TLSSET(p->mycache, (void *)(size_t)(-(n+1)))) abort(); - } - return p->m[n]; -} - -typedef struct PoolList_t -{ - size_t size; /* Size of list */ - size_t length; /* Actual entries in list */ -#ifdef DEBUG - nedpool *list[1]; /* Force testing of list expansion */ -#else - nedpool *list[16]; -#endif -} PoolList; -static MLOCK_T poollistlock; -static PoolList *poollist; -NEDMALLOCPTRATTR nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC -{ - nedpool *ret=0; - if(!poollist) - { - PoolList *newpoollist=0; - if(!(newpoollist=(PoolList *) nedpcalloc(0, 1, sizeof(PoolList)+sizeof(nedpool *)))) return 0; - INITIAL_LOCK(&poollistlock); - ACQUIRE_LOCK(&poollistlock); - poollist=newpoollist; - poollist->size=sizeof(poollist->list)/sizeof(nedpool *); - } - else - ACQUIRE_LOCK(&poollistlock); - if(poollist->length==poollist->size) - { - PoolList *newpoollist=0; - size_t newsize=0; - newsize=sizeof(PoolList)+(poollist->size+1)*sizeof(nedpool *); - if(!(newpoollist=(PoolList *) nedprealloc(0, poollist, newsize))) goto badexit; - poollist=newpoollist; - memset(&poollist->list[poollist->size], 0, newsize-((size_t)&poollist->list[poollist->size]-(size_t)&poollist->list[0])); - poollist->size=((newsize-((char *)&poollist->list[0]-(char *)poollist))/sizeof(nedpool *))-1; - assert(poollist->size>poollist->length); - } - if(!(ret=(nedpool *) nedpcalloc(0, 1, sizeof(nedpool)))) goto badexit; - if(!InitPool(ret, capacity, threads)) - { - nedpfree(0, ret); - goto badexit; - } - poollist->list[poollist->length++]=ret; -badexit: - RELEASE_LOCK(&poollistlock); - return ret; -} -void neddestroypool(nedpool *p) THROWSPEC -{ - unsigned int n; - ACQUIRE_LOCK(&p->mutex); - DestroyCaches(p); - for(n=0; p->m[n]; n++) - { -#if USE_ALLOCATOR==1 - destroy_mspace(p->m[n]); -#endif - p->m[n]=0; - } - RELEASE_LOCK(&p->mutex); - if(TLSFREE(p->mycache)) abort(); - nedpfree(0, p); - ACQUIRE_LOCK(&poollistlock); - assert(poollist); - for(n=0; nlength && poollist->list[n]!=p; n++); - assert(n!=poollist->length); - memmove(&poollist->list[n], &poollist->list[n+1], (size_t)&poollist->list[poollist->length]-(size_t)&poollist->list[n]); - if(!--poollist->length) - { - assert(!poollist->list[0]); - nedpfree(0, poollist); - poollist=0; - } - RELEASE_LOCK(&poollistlock); -} -void neddestroysyspool() THROWSPEC -{ - nedpool *p=&syspool; - int n; - ACQUIRE_LOCK(&p->mutex); - DestroyCaches(p); - for(n=0; p->m[n]; n++) - { -#if USE_ALLOCATOR==1 - destroy_mspace(p->m[n]); -#endif - p->m[n]=0; - } - /* Render syspool unusable */ - for(n=0; ncaches[n]=(threadcache *)(size_t)(sizeof(size_t)>4 ? 0xdeadbeefdeadbeefULL : 0xdeadbeefUL); - for(n=0; nm[n]=(mstate)(size_t)(sizeof(size_t)>4 ? 0xdeadbeefdeadbeefULL : 0xdeadbeefUL); - if(TLSFREE(p->mycache)) abort(); - RELEASE_LOCK(&p->mutex); -} -nedpool **nedpoollist() THROWSPEC -{ - nedpool **ret=0; - if(poollist) - { - ACQUIRE_LOCK(&poollistlock); - if(!(ret=(nedpool **) nedmalloc((poollist->length+1)*sizeof(nedpool *)))) goto badexit; - memcpy(ret, poollist->list, (poollist->length+1)*sizeof(nedpool *)); -badexit: - RELEASE_LOCK(&poollistlock); - } - return ret; -} - -void nedpsetvalue(nedpool *p, void *v) THROWSPEC -{ - if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } - p->uservalue=v; -} -void *nedgetvalue(nedpool **p, void *mem) THROWSPEC -{ - nedpool *np=0; - mstate fm=nedblkmstate(mem); - if(!fm || !fm->extp) return 0; - np=(nedpool *) fm->extp; - if(p) *p=np; - return np->uservalue; -} - -void nedtrimthreadcache(nedpool *p, int disable) THROWSPEC -{ - int mycache; - if(!p) - { - p=&syspool; - if(!syspool.threads) InitPool(&syspool, 0, -1); - } - mycache=(int)(size_t) TLSGET(p->mycache); - if(!mycache) - { /* Set to mspace 0 */ - if(disable && TLSSET(p->mycache, (void *)(size_t)-1)) abort(); - } - else if(mycache>0) - { /* Set to last used mspace */ - threadcache *tc=p->caches[mycache-1]; -#if defined(DEBUG) - printf("Threadcache utilisation: %lf%% in cache with %lf%% lost to other threads\n", - 100.0*tc->successes/tc->mallocs, 100.0*((double) tc->mallocs-tc->frees)/tc->mallocs); -#endif - if(disable && TLSSET(p->mycache, (void *)(size_t)(-tc->mymspace))) abort(); - tc->frees++; - RemoveCacheEntries(p, tc, 0); - assert(!tc->freeInCache); - if(disable) - { - tc->mymspace=-1; - tc->threadid=0; - CallFree(0, p->caches[mycache-1], 0); - p->caches[mycache-1]=0; - } - } -} -void neddisablethreadcache(nedpool *p) THROWSPEC -{ - nedtrimthreadcache(p, 1); -} - -#define GETMSPACE(m,p,tc,ms,s,action) \ - do \ - { \ - mstate m = GetMSpace((p),(tc),(ms),(s)); \ - action; \ - if(USE_ALLOCATOR==1) { RELEASE_LOCK(&m->mutex); } \ - } while (0) - -static FORCEINLINE mstate GetMSpace(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace, size_t size) THROWSPEC -{ /* Returns a locked and ready for use mspace */ - mstate m=p->m[mymspace]; - assert(m); -#if USE_ALLOCATOR==1 - if(!TRY_LOCK(&p->m[mymspace]->mutex)) m=FindMSpace(p, tc, &mymspace, size); - /*assert(IS_LOCKED(&p->m[mymspace]->mutex));*/ -#endif - return m; -} -static NOINLINE void GetThreadCache_cold1(nedpool *RESTRICT *RESTRICT p) THROWSPEC -{ - *p=&syspool; - if(!syspool.threads) InitPool(&syspool, 0, -1); -} -static NOINLINE void GetThreadCache_cold2(nedpool *RESTRICT *RESTRICT p, threadcache *RESTRICT *RESTRICT tc, int *RESTRICT mymspace, int mycache) THROWSPEC -{ - if(!mycache) - { /* Need to allocate a new cache */ - *tc=AllocCache(*p); - if(!*tc) - { /* Disable */ - if(TLSSET((*p)->mycache, (void *)(size_t)-1)) abort(); - *mymspace=0; - } - else - *mymspace=(*tc)->mymspace; - } - else - { /* Cache disabled, but we do have an assigned thread pool */ - *tc=0; - *mymspace=-mycache-1; - } -} -static FORCEINLINE void GetThreadCache(nedpool *RESTRICT *RESTRICT p, threadcache *RESTRICT *RESTRICT tc, int *RESTRICT mymspace, size_t *RESTRICT size) THROWSPEC -{ - int mycache; - if(size && *sizemycache); - if(mycache>0) - { /* Already have a cache */ - *tc=(*p)->caches[mycache-1]; - *mymspace=(*tc)->mymspace; - } - else GetThreadCache_cold2(p, tc, mymspace, mycache); - assert(*mymspace>=0); - assert(!(*tc) || (long)(size_t)CURRENT_THREAD==(*tc)->threadid); -#ifdef FULLSANITYCHECKS - if(*tc) - { - if(*(unsigned int *)"NEDMALC1"!=(*tc)->magic1 || *(unsigned int *)"NEDMALC2"!=(*tc)->magic2) - { - abort(); - } - } -#endif -} - -NEDMALLOCPTRATTR void * nedpmalloc(nedpool *p, size_t size) THROWSPEC -{ - void *ret=0; - threadcache *tc; - int mymspace; - GetThreadCache(&p, &tc, &mymspace, &size); -#if THREADCACHEMAX - if(tc && size<=THREADCACHEMAX) - { /* Use the thread cache */ - ret=threadcache_malloc(p, tc, &size); - } -#endif - if(!ret) - { /* Use this thread's mspace */ - GETMSPACE(m, p, tc, mymspace, size, - ret=CallMalloc(m, size, 0)); - } - return ret; -} -NEDMALLOCPTRATTR void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC -{ - size_t rsize=size*no; - void *ret=0; - threadcache *tc; - int mymspace; - GetThreadCache(&p, &tc, &mymspace, &rsize); -#if THREADCACHEMAX - if(tc && rsize<=THREADCACHEMAX) - { /* Use the thread cache */ - if((ret=threadcache_malloc(p, tc, &rsize))) - memset(ret, 0, rsize); - } -#endif - if(!ret) - { /* Use this thread's mspace */ - GETMSPACE(m, p, tc, mymspace, rsize, - ret=CallCalloc(m, rsize, 0)); - } - return ret; -} -NEDMALLOCPTRATTR void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC -{ - void *ret=0; - threadcache *tc; - int mymspace, isforeign=1; - size_t memsize; - if(!mem) return nedpmalloc(p, size); - memsize=nedblksize(&isforeign, mem); - assert(memsize); - if(!memsize) - { - fprintf(stderr, "nedmalloc: nedprealloc() called with a block not created by nedmalloc!\n"); - abort(); - } - else if(size<=memsize && memsize-size< -#ifdef DEBUG - 32 -#else - 1024 -#endif - ) /* If realloc size is within 1Kb smaller than existing, noop it */ - return mem; - GetThreadCache(&p, &tc, &mymspace, &size); -#if THREADCACHEMAX - if(tc && size && size<=THREADCACHEMAX) - { /* Use the thread cache */ - if((ret=threadcache_malloc(p, tc, &size))) - { - memcpy(ret, mem, memsize=sizeof(threadcacheblk) && memsize<=(THREADCACHEMAX+CHUNK_OVERHEAD)) - threadcache_free(p, tc, mymspace, mem, memsize); - else - CallFree(0, mem, isforeign); - } - } -#endif - if(!ret) - { /* Reallocs always happen in the mspace they happened in, so skip - locking the preferred mspace for this thread */ - ret=CallRealloc(p->m[mymspace], mem, isforeign, memsize, size); - } - return ret; -} -void nedpfree(nedpool *p, void *mem) THROWSPEC -{ /* Frees always happen in the mspace they happened in, so skip - locking the preferred mspace for this thread */ - threadcache *tc; - int mymspace, isforeign=1; - size_t memsize; - if(!mem) - { /* If you tried this on FreeBSD you'd be sorry! */ -#ifdef DEBUG - fprintf(stderr, "nedmalloc: WARNING nedpfree() called with zero. This is not portable behaviour!\n"); -#endif - return; - } - memsize=nedblksize(&isforeign, mem); - assert(memsize); - if(!memsize) - { - fprintf(stderr, "nedmalloc: nedpfree() called with a block not created by nedmalloc!\n"); - abort(); - } - GetThreadCache(&p, &tc, &mymspace, 0); -#if THREADCACHEMAX - if(mem && tc && memsize>=sizeof(threadcacheblk) && memsize<=(THREADCACHEMAX+CHUNK_OVERHEAD)) - threadcache_free(p, tc, mymspace, mem, memsize); - else -#endif - CallFree(0, mem, isforeign); -} -NEDMALLOCPTRATTR void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC -{ - void *ret; - threadcache *tc; - int mymspace; - GetThreadCache(&p, &tc, &mymspace, &bytes); - { /* Use this thread's mspace */ - GETMSPACE(m, p, tc, mymspace, bytes, - ret=CallMalloc(m, bytes, alignment)); - } - return ret; -} -struct nedmallinfo nedpmallinfo(nedpool *p) THROWSPEC -{ - int n; - struct nedmallinfo ret={0}; - if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } - for(n=0; p->m[n]; n++) - { -#if USE_ALLOCATOR==1 && !NO_MALLINFO - struct mallinfo t=mspace_mallinfo(p->m[n]); - ret.arena+=t.arena; - ret.ordblks+=t.ordblks; - ret.hblkhd+=t.hblkhd; - ret.usmblks+=t.usmblks; - ret.uordblks+=t.uordblks; - ret.fordblks+=t.fordblks; - ret.keepcost+=t.keepcost; -#endif - } - return ret; -} -int nedpmallopt(nedpool *p, int parno, int value) THROWSPEC -{ -#if USE_ALLOCATOR==1 - return mspace_mallopt(parno, value); -#else - return 0; -#endif -} -NEDMALLOCNOALIASATTR void* nedmalloc_internals(size_t *granularity, size_t *magic) THROWSPEC -{ -#if USE_ALLOCATOR==1 - if(granularity) *granularity=mparams.granularity; - if(magic) *magic=mparams.magic; - return (void *) &syspool; -#else - if(granularity) *granularity=0; - if(magic) *magic=0; - return 0; -#endif -} -int nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC -{ - int n, ret=0; - if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } - for(n=0; p->m[n]; n++) - { -#if USE_ALLOCATOR==1 - ret+=mspace_trim(p->m[n], pad); -#endif - } - return ret; -} -void nedpmalloc_stats(nedpool *p) THROWSPEC -{ - int n; - if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } - for(n=0; p->m[n]; n++) - { -#if USE_ALLOCATOR==1 - mspace_malloc_stats(p->m[n]); -#endif - } -} -size_t nedpmalloc_footprint(nedpool *p) THROWSPEC -{ - size_t ret=0; - int n; - if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } - for(n=0; p->m[n]; n++) - { -#if USE_ALLOCATOR==1 - ret+=mspace_footprint(p->m[n]); -#endif - } - return ret; -} -NEDMALLOCPTRATTR void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC -{ - void **ret; - threadcache *tc; - int mymspace; - GetThreadCache(&p, &tc, &mymspace, &elemsize); -#if USE_ALLOCATOR==0 - GETMSPACE(m, p, tc, mymspace, elemsno*elemsize, - ret=unsupported_operation("independent_calloc")); -#elif USE_ALLOCATOR==1 - GETMSPACE(m, p, tc, mymspace, elemsno*elemsize, - ret=mspace_independent_calloc(m, elemsno, elemsize, chunks)); -#endif - return ret; -} -NEDMALLOCPTRATTR void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC -{ - void **ret; - threadcache *tc; - int mymspace; - size_t i, *adjustedsizes=(size_t *) alloca(elems*sizeof(size_t)); - if(!adjustedsizes) return 0; - for(i=0; i + #include +#endif +#if USE_ALLOCATOR==1 + #define MSPACES 1 + #define ONLY_MSPACES 1 +#endif +#define USE_DL_PREFIX 1 +#ifndef USE_LOCKS + #define USE_LOCKS 1 +#endif +#define FOOTERS 1 /* Need to enable footers so frees lock the right mspace */ +#ifndef NEDMALLOC_DEBUG + #if defined(DEBUG) || defined(_DEBUG) + #define NEDMALLOC_DEBUG 1 + #else + #define NEDMALLOC_DEBUG 0 + #endif +#endif +/* We need to consistently define DEBUG=0|1, _DEBUG and NDEBUG for dlmalloc */ +#undef DEBUG +#undef _DEBUG +#if NEDMALLOC_DEBUG + #define _DEBUG + #define DEBUG 1 +#else + #define DEBUG 0 +#endif +#ifdef NDEBUG /* Disable assert checking on release builds */ + #undef DEBUG + #undef _DEBUG +#endif +/* The default of 64Kb means we spend too much time kernel-side */ +#ifndef DEFAULT_GRANULARITY +#define DEFAULT_GRANULARITY (1*1024*1024) +#if DEBUG +#define DEFAULT_GRANULARITY_ALIGNED +#endif +#endif +/*#define USE_SPIN_LOCKS 0*/ + + +#include "malloc.c.h" +#ifdef NDEBUG /* Disable assert checking on release builds */ + #undef DEBUG +#elif !NEDMALLOC_DEBUG + #ifdef __GNUC__ + #warning DEBUG is defined so allocator will run with assert checking! Define NDEBUG to run at full speed. + #elif defined(_MSC_VER) + #pragma message(__FILE__ ": WARNING: DEBUG is defined so allocator will run with assert checking! Define NDEBUG to run at full speed.") + #endif +#endif + +/* The maximum concurrent threads in a pool possible */ +#ifndef MAXTHREADSINPOOL +#define MAXTHREADSINPOOL 16 +#endif +/* The maximum number of threadcaches which can be allocated */ +#ifndef THREADCACHEMAXCACHES +#define THREADCACHEMAXCACHES 256 +#endif +/* The maximum size to be allocated from the thread cache */ +#ifndef THREADCACHEMAX +#define THREADCACHEMAX 8192 +#endif +#if 0 +/* The number of cache entries for finer grained bins. This is (topbitpos(THREADCACHEMAX)-4)*2 */ +#define THREADCACHEMAXBINS ((13-4)*2) +#else +/* The number of cache entries. This is (topbitpos(THREADCACHEMAX)-4) */ +#define THREADCACHEMAXBINS (13-4) +#endif +/* Point at which the free space in a thread cache is garbage collected */ +#ifndef THREADCACHEMAXFREESPACE +#define THREADCACHEMAXFREESPACE (512*1024) +#endif + + +#ifdef WIN32 + #define TLSVAR DWORD + #define TLSALLOC(k) (*(k)=TlsAlloc(), TLS_OUT_OF_INDEXES==*(k)) + #define TLSFREE(k) (!TlsFree(k)) + #define TLSGET(k) TlsGetValue(k) + #define TLSSET(k, a) (!TlsSetValue(k, a)) + #ifdef DEBUG +static LPVOID ChkedTlsGetValue(DWORD idx) +{ + LPVOID ret=TlsGetValue(idx); + assert(S_OK==GetLastError()); + return ret; +} + #undef TLSGET + #define TLSGET(k) ChkedTlsGetValue(k) + #endif +#else + #define TLSVAR pthread_key_t + #define TLSALLOC(k) pthread_key_create(k, 0) + #define TLSFREE(k) pthread_key_delete(k) + #define TLSGET(k) pthread_getspecific(k) + #define TLSSET(k, a) pthread_setspecific(k, a) +#endif + +#if defined(__cplusplus) +#if !defined(NO_NED_NAMESPACE) +namespace nedalloc { +#else +extern "C" { +#endif +#endif + +#if USE_ALLOCATOR==0 +static void *unsupported_operation(const char *opname) THROWSPEC +{ + fprintf(stderr, "nedmalloc: The operation %s is not supported under this build configuration\n", opname); + abort(); + return 0; +} +static size_t mspacecounter=(size_t) 0xdeadbeef; +#endif +#ifndef ENABLE_FAST_HEAP_DETECTION +static void *RESTRICT leastusedaddress; +static size_t largestusedblock; +#endif + +static FORCEINLINE void *CallMalloc(void *RESTRICT mspace, size_t size, size_t alignment) THROWSPEC +{ + void *RESTRICT ret=0; + size_t _alignment=alignment; +#if USE_MAGIC_HEADERS + size_t *_ret=0; + size+=alignment+3*sizeof(size_t); + _alignment=0; +#endif +#if USE_ALLOCATOR==0 + ret=_alignment ? +#ifdef _MSC_VER + /* This is the MSVCRT equivalent */ + _aligned_malloc(size, _alignment) +#elif defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__) + /* This is the glibc/ptmalloc2/dlmalloc/BSD libc equivalent. */ + memalign(_alignment, size) +#else +#error Cannot aligned allocate with the memory allocator of an unknown system! +#endif + : malloc(size); +#elif USE_ALLOCATOR==1 + ret=_alignment ? mspace_memalign((mstate) mspace, _alignment, size) : mspace_malloc((mstate) mspace, size); +#ifndef ENABLE_FAST_HEAP_DETECTION + if(ret) + { + size_t truesize=chunksize(mem2chunk(ret)); + if(!leastusedaddress || (void *)((mstate) mspace)->least_addrleast_addr; + if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1); + } +#endif +#endif + if(!ret) return 0; +#if USE_MAGIC_HEADERS + _ret=(size_t *) ret; + ret=(void *)(_ret+3); + if(alignment) ret=(void *)(((size_t) ret+alignment-1)&~(alignment-1)); + for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *)"NEDMALOC"; + _ret[0]=(size_t) mspace; + _ret[1]=size-3*sizeof(size_t); +#endif + return ret; +} + +static FORCEINLINE void *CallCalloc(void *RESTRICT mspace, size_t size, size_t alignment) THROWSPEC +{ + void *RESTRICT ret=0; +#if USE_MAGIC_HEADERS + size_t *_ret=0; + size+=alignment+3*sizeof(size_t); +#endif +#if USE_ALLOCATOR==0 + ret=calloc(1, size); +#elif USE_ALLOCATOR==1 + ret=mspace_calloc((mstate) mspace, 1, size); +#ifndef ENABLE_FAST_HEAP_DETECTION + if(ret) + { + size_t truesize=chunksize(mem2chunk(ret)); + if(!leastusedaddress || (void *)((mstate) mspace)->least_addrleast_addr; + if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1); + } +#endif +#endif + if(!ret) return 0; +#if USE_MAGIC_HEADERS + _ret=(size_t *) ret; + ret=(void *)(_ret+3); + if(alignment) ret=(void *)(((size_t) ret+alignment-1)&~(alignment-1)); + for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *) "NEDMALOC"; + _ret[0]=(size_t) mspace; + _ret[1]=size-3*sizeof(size_t); +#endif + return ret; +} + +static FORCEINLINE void *CallRealloc(void *RESTRICT mspace, void *RESTRICT mem, int isforeign, size_t oldsize, size_t newsize) THROWSPEC +{ + void *RESTRICT ret=0; +#if USE_MAGIC_HEADERS + mstate oldmspace=0; + size_t *_ret=0, *_mem=(size_t *) mem-3; +#endif + if(isforeign) + { /* Transfer */ +#if USE_MAGIC_HEADERS + assert(_mem[0]!=*(size_t *) "NEDMALOC"); +#endif + if((ret=CallMalloc(mspace, newsize, 0))) + { +#if defined(DEBUG) + printf("*** nedmalloc frees system allocated block %p\n", mem); +#endif + memcpy(ret, mem, oldsize=_mem[2]); + for(; *_mem==*(size_t *) "NEDMALOC"; *_mem--=*(size_t *) "nedmaloc"); + mem=(void *)(++_mem); +#endif +#if USE_ALLOCATOR==0 + ret=realloc(mem, newsize); +#elif USE_ALLOCATOR==1 + ret=mspace_realloc((mstate) mspace, mem, newsize); +#ifndef ENABLE_FAST_HEAP_DETECTION + if(ret) + { + size_t truesize=chunksize(mem2chunk(ret)); + if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1); + } +#endif +#endif + if(!ret) + { /* Put it back the way it was */ +#if USE_MAGIC_HEADERS + for(; *_mem==0; *_mem++=*(size_t *) "NEDMALOC"); +#endif + return 0; + } +#if USE_MAGIC_HEADERS + _ret=(size_t *) ret; + ret=(void *)(_ret+3); + for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *) "NEDMALOC"; + _ret[0]=(size_t) mspace; + _ret[1]=newsize-3*sizeof(size_t); +#endif + return ret; +} + +static FORCEINLINE void CallFree(void *RESTRICT mspace, void *RESTRICT mem, int isforeign) THROWSPEC +{ +#if USE_MAGIC_HEADERS + mstate oldmspace=0; + size_t *_mem=(size_t *) mem-3, oldsize=0; +#endif + if(isforeign) + { +#if USE_MAGIC_HEADERS + assert(_mem[0]!=*(size_t *) "NEDMALOC"); +#endif +#if defined(DEBUG) + printf("*** nedmalloc frees system allocated block %p\n", mem); +#endif + free(mem); + return; + } +#if USE_MAGIC_HEADERS + assert(_mem[0]==*(size_t *) "NEDMALOC"); + oldmspace=(mstate) _mem[1]; + oldsize=_mem[2]; + for(; *_mem==*(size_t *) "NEDMALOC"; *_mem--=*(size_t *) "nedmaloc"); + mem=(void *)(++_mem); +#endif +#if USE_ALLOCATOR==0 + free(mem); +#elif USE_ALLOCATOR==1 + mspace_free((mstate) mspace, mem); +#endif +} + +static NEDMALLOCNOALIASATTR mstate nedblkmstate(void *RESTRICT mem) THROWSPEC +{ + if(mem) + { +#if USE_MAGIC_HEADERS + size_t *_mem=(size_t *) mem-3; + if(_mem[0]==*(size_t *) "NEDMALOC") + { + return (mstate) _mem[1]; + } + else return 0; +#else +#if USE_ALLOCATOR==0 + /* Fail everything */ + return 0; +#elif USE_ALLOCATOR==1 +#ifdef ENABLE_FAST_HEAP_DETECTION +#ifdef WIN32 + /* On Windows for RELEASE both x86 and x64 the NT heap precedes each block with an eight byte header + which looks like: + normal: 4 bytes of size, 4 bytes of [char < 64, char < 64, char < 64 bit 0 always set, char random ] + mmaped: 4 bytes of size 4 bytes of [zero, zero, 0xb, zero ] + + On Windows for DEBUG both x86 and x64 the preceding four bytes is always 0xfdfdfdfd (no man's land). + */ +#pragma pack(push, 1) + struct _HEAP_ENTRY + { + USHORT Size; + USHORT PreviousSize; + UCHAR Cookie; /* SegmentIndex */ + UCHAR Flags; /* always bit 0 (HEAP_ENTRY_BUSY). bit 1=(HEAP_ENTRY_EXTRA_PRESENT), bit 2=normal block (HEAP_ENTRY_FILL_PATTERN), bit 3=mmap block (HEAP_ENTRY_VIRTUAL_ALLOC). Bit 4 (HEAP_ENTRY_LAST_ENTRY) could be set */ + UCHAR UnusedBytes; + UCHAR SmallTagIndex; /* fastbin index. Always one of 0x02, 0x03, 0x04 < 0x80 */ + } *RESTRICT he=((struct _HEAP_ENTRY *) mem)-1; +#pragma pack(pop) + unsigned int header=((unsigned int *)mem)[-1], mask1=0x8080E100, result1, mask2=0xFFFFFF06, result2; + result1=header & mask1; /* Positive testing for NT heap */ + result2=header & mask2; /* Positive testing for dlmalloc */ + if(result1==0x00000100 && result2!=0x00000102) + { /* This is likely a NT heap block */ + return 0; + } +#endif +#ifdef __linux__ + /* On Linux glibc uses ptmalloc2 (really dlmalloc) just as we do, but prev_foot contains rubbish + when the preceding block is allocated because ptmalloc2 finds the local mstate by rounding the ptr + down to the nearest megabyte. It's like dlmalloc with FOOTERS disabled. */ + mchunkptr p=mem2chunk(mem); + mstate fm=get_mstate_for(p); + /* If it's a ptmalloc2 block, fm is likely to be some crazy value */ + if(!is_aligned(fm)) return 0; + if((size_t)mem-(size_t)fm>=(size_t)1<<(SIZE_T_BITSIZE-1)) return 0; + if(ok_magic(fm)) + return fm; + else + return 0; + if(1) { } +#endif + else + { + mchunkptr p=mem2chunk(mem); + mstate fm=get_mstate_for(p); + assert(ok_magic(fm)); /* If this fails, someone tried to free a block twice */ + if(ok_magic(fm)) + return fm; + } +#else +//#ifdef WIN32 +// __try +//#endif + { + /* We try to return zero here if it isn't one of our own blocks, however + the current block annotation scheme used by dlmalloc makes it impossible + to be absolutely sure of avoiding a segfault. + + mchunkptr->prev_foot = mem-(2*size_t) = mstate ^ mparams.magic for PRECEDING block; + mchunkptr->head = mem-(1*size_t) = 8 multiple size of this block with bottom three bits = FLAG_BITS + FLAG_BITS = bit 0 is CINUSE (currently in use unless is mmap), bit 1 is PINUSE (previous block currently + in use unless mmap), bit 2 is UNUSED and currently is always zero. + */ + register void *RESTRICT leastusedaddress_=leastusedaddress; /* Cache these to avoid register reloading */ + register size_t largestusedblock_=largestusedblock; + if(!is_aligned(mem)) return 0; /* Would fail very rarely as all allocators return aligned blocks */ + if(memhead & FLAG4_BIT)) return 0; + /* Reduced uncertainty by 0.5^2 = 25.0% */ + /* size should never exceed largestusedblock */ + if(chunksize(p)>largestusedblock_) return 0; + /* Reduced uncertainty by a minimum of 0.5^3 = 12.5%, maximum 0.5^16 = 0.0015% */ + /* Having sanity checked prev_foot and head, check next block */ + if(!ismmapped && (!next_pinuse(p) || (next_chunk(p)->head & FLAG4_BIT))) return 0; + /* Reduced uncertainty by 0.5^5 = 3.13% or 0.5^18 = 0.00038% */ + #if 0 + /* If previous block is free, check that its next block pointer equals us */ + if(!ismmapped && !pinuse(p)) + if(next_chunk(prev_chunk(p))!=p) return 0; + /* We could start comparing prev_foot's for similarity but it starts getting slow. */ + #endif + fm = get_mstate_for(p); + if(!is_aligned(fm) || (void *)fm=(size_t)1<<(SIZE_T_BITSIZE-1)) return 0; + assert(ok_magic(fm)); /* If this fails, someone tried to free a block twice */ + if(ok_magic(fm)) + return fm; + } + } +//#ifdef WIN32 +// __except(1) { } +//#endif +#endif +#endif +#endif + } + return 0; +} +NEDMALLOCNOALIASATTR size_t nedblksize(int *RESTRICT isforeign, void *RESTRICT mem) THROWSPEC +{ + if(mem) + { + if(isforeign) *isforeign=1; +#if USE_MAGIC_HEADERS + { + size_t *_mem=(size_t *) mem-3; + if(_mem[0]==*(size_t *) "NEDMALOC") + { + mstate mspace=(mstate) _mem[1]; + size_t size=_mem[2]; + if(isforeign) *isforeign=0; + return size; + } + } +#elif USE_ALLOCATOR==1 + if(nedblkmstate(mem)) + { + mchunkptr p=mem2chunk(mem); + if(isforeign) *isforeign=0; + return chunksize(p)-overhead_for(p); + } +#ifdef DEBUG + else + { + int a=1; /* Set breakpoints here if needed */ + } +#endif +#endif +#if defined(ENABLE_TOLERANT_NEDMALLOC) || USE_ALLOCATOR==0 +#ifdef _MSC_VER + /* This is the MSVCRT equivalent */ + return _msize(mem); +#elif defined(__linux__) + /* This is the glibc/ptmalloc2/dlmalloc equivalent. */ + return malloc_usable_size(mem); +#elif defined(__FreeBSD__) || defined(__APPLE__) + /* This is the BSD libc equivalent. */ + return malloc_size(mem); +#else +#error Cannot tolerate the memory allocator of an unknown system! +#endif +#endif + } + return 0; +} + +NEDMALLOCNOALIASATTR void nedsetvalue(void *v) THROWSPEC { nedpsetvalue((nedpool *) 0, v); } +NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmalloc(size_t size) THROWSPEC { return nedpmalloc((nedpool *) 0, size); } +NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedcalloc(size_t no, size_t size) THROWSPEC { return nedpcalloc((nedpool *) 0, no, size); } +NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedrealloc(void *mem, size_t size) THROWSPEC { return nedprealloc((nedpool *) 0, mem, size); } +NEDMALLOCNOALIASATTR void nedfree(void *mem) THROWSPEC { nedpfree((nedpool *) 0, mem); } +NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC { return nedpmemalign((nedpool *) 0, alignment, bytes); } +NEDMALLOCNOALIASATTR struct nedmallinfo nedmallinfo(void) THROWSPEC { return nedpmallinfo((nedpool *) 0); } +NEDMALLOCNOALIASATTR int nedmallopt(int parno, int value) THROWSPEC { return nedpmallopt((nedpool *) 0, parno, value); } +NEDMALLOCNOALIASATTR int nedmalloc_trim(size_t pad) THROWSPEC { return nedpmalloc_trim((nedpool *) 0, pad); } +void nedmalloc_stats() THROWSPEC { nedpmalloc_stats((nedpool *) 0); } +NEDMALLOCNOALIASATTR size_t nedmalloc_footprint() THROWSPEC { return nedpmalloc_footprint((nedpool *) 0); } +NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC { return nedpindependent_calloc((nedpool *) 0, elemsno, elemsize, chunks); } +NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC { return nedpindependent_comalloc((nedpool *) 0, elems, sizes, chunks); } + +struct threadcacheblk_t; +typedef struct threadcacheblk_t threadcacheblk; +struct threadcacheblk_t +{ /* Keep less than 16 bytes on 32 bit systems and 32 bytes on 64 bit systems */ +#ifdef FULLSANITYCHECKS + unsigned int magic; +#endif + unsigned int lastUsed, size; + threadcacheblk *next, *prev; +}; +typedef struct threadcache_t +{ +#ifdef FULLSANITYCHECKS + unsigned int magic1; +#endif + int mymspace; /* Last mspace entry this thread used */ + long threadid; + unsigned int mallocs, frees, successes; + size_t freeInCache; /* How much free space is stored in this cache */ + threadcacheblk *bins[(THREADCACHEMAXBINS+1)*2]; +#ifdef FULLSANITYCHECKS + unsigned int magic2; +#endif +} threadcache; +struct nedpool_t +{ + MLOCK_T mutex; + void *uservalue; + int threads; /* Max entries in m to use */ + threadcache *caches[THREADCACHEMAXCACHES]; + TLSVAR mycache; /* Thread cache for this thread. 0 for unset, negative for use mspace-1 directly, otherwise is cache-1 */ + mstate m[MAXTHREADSINPOOL+1]; /* mspace entries for this pool */ +}; +static nedpool syspool; + +static FORCEINLINE NEDMALLOCNOALIASATTR unsigned int size2binidx(size_t _size) THROWSPEC +{ /* 8=1000 16=10000 20=10100 24=11000 32=100000 48=110000 4096=1000000000000 */ + unsigned int topbit, size=(unsigned int)(_size>>4); + /* 16=1 20=1 24=1 32=10 48=11 64=100 96=110 128=1000 4096=100000000 */ + +#if defined(__GNUC__) + topbit = sizeof(size)*__CHAR_BIT__ - 1 - __builtin_clz(size); +#elif defined(_MSC_VER) && _MSC_VER>=1300 + { + unsigned long bsrTopBit; + + _BitScanReverse(&bsrTopBit, size); + + topbit = bsrTopBit; + } +#else +#if 0 + union { + unsigned asInt[2]; + double asDouble; + }; + int n; + + asDouble = (double)size + 0.5; + topbit = (asInt[!FOX_BIGENDIAN] >> 20) - 1023; +#else + { + unsigned int x=size; + x = x | (x >> 1); + x = x | (x >> 2); + x = x | (x >> 4); + x = x | (x >> 8); + x = x | (x >>16); + x = ~x; + x = x - ((x >> 1) & 0x55555555); + x = (x & 0x33333333) + ((x >> 2) & 0x33333333); + x = (x + (x >> 4)) & 0x0F0F0F0F; + x = x + (x << 8); + x = x + (x << 16); + topbit=31 - (x >> 24); + } +#endif +#endif + return topbit; +} + + +#ifdef FULLSANITYCHECKS +static void tcsanitycheck(threadcacheblk **ptr) THROWSPEC +{ + assert((ptr[0] && ptr[1]) || (!ptr[0] && !ptr[1])); + if(ptr[0] && ptr[1]) + { + assert(nedblksize(ptr[0])>=sizeof(threadcacheblk)); + assert(nedblksize(ptr[1])>=sizeof(threadcacheblk)); + assert(*(unsigned int *) "NEDN"==ptr[0]->magic); + assert(*(unsigned int *) "NEDN"==ptr[1]->magic); + assert(!ptr[0]->prev); + assert(!ptr[1]->next); + if(ptr[0]==ptr[1]) + { + assert(!ptr[0]->next); + assert(!ptr[1]->prev); + } + } +} +static void tcfullsanitycheck(threadcache *tc) THROWSPEC +{ + threadcacheblk **tcbptr=tc->bins; + int n; + for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2) + { + threadcacheblk *b, *ob=0; + tcsanitycheck(tcbptr); + for(b=tcbptr[0]; b; ob=b, b=b->next) + { + assert(*(unsigned int *) "NEDN"==b->magic); + assert(!ob || ob->next==b); + assert(!ob || b->prev==ob); + } + } +} +#endif + +static NOINLINE void RemoveCacheEntries(nedpool *RESTRICT p, threadcache *RESTRICT tc, unsigned int age) THROWSPEC +{ +#ifdef FULLSANITYCHECKS + tcfullsanitycheck(tc); +#endif + if(tc->freeInCache) + { + threadcacheblk **tcbptr=tc->bins; + int n; + for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2) + { + threadcacheblk **tcb=tcbptr+1; /* come from oldest end of list */ + /*tcsanitycheck(tcbptr);*/ + for(; *tcb && tc->frees-(*tcb)->lastUsed>=age; ) + { + threadcacheblk *f=*tcb; + size_t blksize=f->size; /*nedblksize(f);*/ + assert(blksize<=nedblksize(0, f)); + assert(blksize); +#ifdef FULLSANITYCHECKS + assert(*(unsigned int *) "NEDN"==(*tcb)->magic); +#endif + *tcb=(*tcb)->prev; + if(*tcb) + (*tcb)->next=0; + else + *tcbptr=0; + tc->freeInCache-=blksize; + assert((long) tc->freeInCache>=0); + CallFree(0, f, 0); + /*tcsanitycheck(tcbptr);*/ + } + } + } +#ifdef FULLSANITYCHECKS + tcfullsanitycheck(tc); +#endif +} +static void DestroyCaches(nedpool *RESTRICT p) THROWSPEC +{ + if(p->caches) + { + threadcache *tc; + int n; + for(n=0; ncaches[n])) + { + tc->frees++; + RemoveCacheEntries(p, tc, 0); + assert(!tc->freeInCache); + tc->mymspace=-1; + tc->threadid=0; + CallFree(0, tc, 0); + p->caches[n]=0; + } + } + } +} + +static NOINLINE threadcache *AllocCache(nedpool *RESTRICT p) THROWSPEC +{ + threadcache *tc=0; + int n, end; + ACQUIRE_LOCK(&p->mutex); + for(n=0; ncaches[n]; n++); + if(THREADCACHEMAXCACHES==n) + { /* List exhausted, so disable for this thread */ + RELEASE_LOCK(&p->mutex); + return 0; + } + tc=p->caches[n]=(threadcache *) CallCalloc(p->m[0], sizeof(threadcache), 0); + if(!tc) + { + RELEASE_LOCK(&p->mutex); + return 0; + } +#ifdef FULLSANITYCHECKS + tc->magic1=*(unsigned int *)"NEDMALC1"; + tc->magic2=*(unsigned int *)"NEDMALC2"; +#endif + tc->threadid=(long)(size_t)CURRENT_THREAD; + for(end=0; p->m[end]; end++); + tc->mymspace=abs(tc->threadid) % end; + RELEASE_LOCK(&p->mutex); + if(TLSSET(p->mycache, (void *)(size_t)(n+1))) abort(); + return tc; +} + +static void *threadcache_malloc(nedpool *RESTRICT p, threadcache *RESTRICT tc, size_t *RESTRICT _size) THROWSPEC +{ + void *RESTRICT ret=0; + size_t size=*_size, blksize=0; + unsigned int bestsize; + unsigned int idx=size2binidx(size); + threadcacheblk *RESTRICT blk, **RESTRICT binsptr; +#ifdef FULLSANITYCHECKS + tcfullsanitycheck(tc); +#endif + /* Calculate best fit bin size */ + bestsize=1<<(idx+4); +#if 0 + /* Finer grained bin fit */ + idx<<=1; + if(size>bestsize) + { + idx++; + bestsize+=bestsize>>1; + } + if(size>bestsize) + { + idx++; + bestsize=1<<(4+(idx>>1)); + } +#else + if(size>bestsize) + { + idx++; + bestsize<<=1; + } +#endif + assert(bestsize>=size); + if(sizebins[idx*2]; + /* Try to match close, but move up a bin if necessary */ + blk=*binsptr; + if(!blk || blk->sizesize; /*nedblksize(blk);*/ + assert(nedblksize(0, blk)>=blksize); + assert(blksize>=size); + if(blk->next) + blk->next->prev=0; + *binsptr=blk->next; + if(!*binsptr) + binsptr[1]=0; +#ifdef FULLSANITYCHECKS + blk->magic=0; +#endif + assert(binsptr[0]!=blk && binsptr[1]!=blk); + assert(nedblksize(0, blk)>=sizeof(threadcacheblk) && nedblksize(0, blk)<=THREADCACHEMAX+CHUNK_OVERHEAD); + /*printf("malloc: %p, %p, %p, %lu\n", p, tc, blk, (long) _size);*/ + ret=(void *) blk; + } + ++tc->mallocs; + if(ret) + { + assert(blksize>=size); + ++tc->successes; + tc->freeInCache-=blksize; + assert((long) tc->freeInCache>=0); + } +#if defined(DEBUG) && 0 + if(!(tc->mallocs & 0xfff)) + { + printf("*** threadcache=%u, mallocs=%u (%f), free=%u (%f), freeInCache=%u\n", (unsigned int) tc->threadid, tc->mallocs, + (float) tc->successes/tc->mallocs, tc->frees, (float) tc->successes/tc->frees, (unsigned int) tc->freeInCache); + } +#endif +#ifdef FULLSANITYCHECKS + tcfullsanitycheck(tc); +#endif + *_size=size; + return ret; +} +static NOINLINE void ReleaseFreeInCache(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace) THROWSPEC +{ + unsigned int age=THREADCACHEMAXFREESPACE/8192; + /*ACQUIRE_LOCK(&p->m[mymspace]->mutex);*/ + while(age && tc->freeInCache>=THREADCACHEMAXFREESPACE) + { + RemoveCacheEntries(p, tc, age); + /*printf("*** Removing cache entries older than %u (%u)\n", age, (unsigned int) tc->freeInCache);*/ + age>>=1; + } + /*RELEASE_LOCK(&p->m[mymspace]->mutex);*/ +} +static void threadcache_free(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace, void *RESTRICT mem, size_t size) THROWSPEC +{ + unsigned int bestsize; + unsigned int idx=size2binidx(size); + threadcacheblk **RESTRICT binsptr, *RESTRICT tck=(threadcacheblk *) mem; + assert(size>=sizeof(threadcacheblk) && size<=THREADCACHEMAX+CHUNK_OVERHEAD); +#ifdef DEBUG + /* Make sure this is a valid memory block */ + assert(nedblksize(0, mem)); +#endif +#ifdef FULLSANITYCHECKS + tcfullsanitycheck(tc); +#endif + /* Calculate best fit bin size */ + bestsize=1<<(idx+4); +#if 0 + /* Finer grained bin fit */ + idx<<=1; + if(size>bestsize) + { + unsigned int biggerbestsize=bestsize+bestsize<<1; + if(size>=biggerbestsize) + { + idx++; + bestsize=biggerbestsize; + } + } +#endif + if(bestsize!=size) /* dlmalloc can round up, so we round down to preserve indexing */ + size=bestsize; + binsptr=&tc->bins[idx*2]; + assert(idx<=THREADCACHEMAXBINS); + if(tck==*binsptr) + { + fprintf(stderr, "nedmalloc: Attempt to free already freed memory block %p - aborting!\n", tck); + abort(); + } +#ifdef FULLSANITYCHECKS + tck->magic=*(unsigned int *) "NEDN"; +#endif + tck->lastUsed=++tc->frees; + tck->size=(unsigned int) size; + tck->next=*binsptr; + tck->prev=0; + if(tck->next) + tck->next->prev=tck; + else + binsptr[1]=tck; + assert(!*binsptr || (*binsptr)->size==tck->size); + *binsptr=tck; + assert(tck==tc->bins[idx*2]); + assert(tc->bins[idx*2+1]==tck || binsptr[0]->next->prev==tck); + /*printf("free: %p, %p, %p, %lu\n", p, tc, mem, (long) size);*/ + tc->freeInCache+=size; +#ifdef FULLSANITYCHECKS + tcfullsanitycheck(tc); +#endif +#if 1 + if(tc->freeInCache>=THREADCACHEMAXFREESPACE) + ReleaseFreeInCache(p, tc, mymspace); +#endif +} + + + + +static NOINLINE int InitPool(nedpool *RESTRICT p, size_t capacity, int threads) THROWSPEC +{ /* threads is -1 for system pool */ + ensure_initialization(); + ACQUIRE_MALLOC_GLOBAL_LOCK(); + if(p->threads) goto done; + if(INITIAL_LOCK(&p->mutex)) goto err; + if(TLSALLOC(&p->mycache)) goto err; +#if USE_ALLOCATOR==0 + p->m[0]=(mstate) mspacecounter++; +#elif USE_ALLOCATOR==1 + if(!(p->m[0]=(mstate) create_mspace(capacity, 1))) goto err; + p->m[0]->extp=p; +#endif + p->threads=(threads<1 || threads>MAXTHREADSINPOOL) ? MAXTHREADSINPOOL : threads; +done: + RELEASE_MALLOC_GLOBAL_LOCK(); + return 1; +err: + if(threads<0) + abort(); /* If you can't allocate for system pool, we're screwed */ + DestroyCaches(p); + if(p->m[0]) + { +#if USE_ALLOCATOR==1 + destroy_mspace(p->m[0]); +#endif + p->m[0]=0; + } + if(p->mycache) + { + if(TLSFREE(p->mycache)) abort(); + p->mycache=0; + } + RELEASE_MALLOC_GLOBAL_LOCK(); + return 0; +} +static NOINLINE mstate FindMSpace(nedpool *RESTRICT p, threadcache *RESTRICT tc, int *RESTRICT lastUsed, size_t size) THROWSPEC +{ /* Gets called when thread's last used mspace is in use. The strategy + is to run through the list of all available mspaces looking for an + unlocked one and if we fail, we create a new one so long as we don't + exceed p->threads */ + int n, end; + for(n=end=*lastUsed+1; p->m[n]; end=++n) + { + if(TRY_LOCK(&p->m[n]->mutex)) goto found; + } + for(n=0; n<*lastUsed && p->m[n]; n++) + { + if(TRY_LOCK(&p->m[n]->mutex)) goto found; + } + if(endthreads) + { + mstate temp; +#if USE_ALLOCATOR==0 + temp=(mstate) mspacecounter++; +#elif USE_ALLOCATOR==1 + if(!(temp=(mstate) create_mspace(size, 1))) + goto badexit; +#endif + /* Now we're ready to modify the lists, we lock */ + ACQUIRE_LOCK(&p->mutex); + while(p->m[end] && endthreads) + end++; + if(end>=p->threads) + { /* Drat, must destroy it now */ + RELEASE_LOCK(&p->mutex); +#if USE_ALLOCATOR==1 + destroy_mspace((mstate) temp); +#endif + goto badexit; + } + /* We really want to make sure this goes into memory now but we + have to be careful of breaking aliasing rules, so write it twice */ + *((volatile struct malloc_state **) &p->m[end])=p->m[end]=temp; + ACQUIRE_LOCK(&p->m[end]->mutex); + /*printf("Created mspace idx %d\n", end);*/ + RELEASE_LOCK(&p->mutex); + n=end; + goto found; + } + /* Let it lock on the last one it used */ +badexit: + ACQUIRE_LOCK(&p->m[*lastUsed]->mutex); + return p->m[*lastUsed]; +found: + *lastUsed=n; + if(tc) + tc->mymspace=n; + else + { + if(TLSSET(p->mycache, (void *)(size_t)(-(n+1)))) abort(); + } + return p->m[n]; +} + +typedef struct PoolList_t +{ + size_t size; /* Size of list */ + size_t length; /* Actual entries in list */ +#ifdef DEBUG + nedpool *list[1]; /* Force testing of list expansion */ +#else + nedpool *list[16]; +#endif +} PoolList; +static MLOCK_T poollistlock; +static PoolList *poollist; +NEDMALLOCPTRATTR nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC +{ + nedpool *ret=0; + if(!poollist) + { + PoolList *newpoollist=0; + if(!(newpoollist=(PoolList *) nedpcalloc(0, 1, sizeof(PoolList)+sizeof(nedpool *)))) return 0; + INITIAL_LOCK(&poollistlock); + ACQUIRE_LOCK(&poollistlock); + poollist=newpoollist; + poollist->size=sizeof(poollist->list)/sizeof(nedpool *); + } + else + ACQUIRE_LOCK(&poollistlock); + if(poollist->length==poollist->size) + { + PoolList *newpoollist=0; + size_t newsize=0; + newsize=sizeof(PoolList)+(poollist->size+1)*sizeof(nedpool *); + if(!(newpoollist=(PoolList *) nedprealloc(0, poollist, newsize))) goto badexit; + poollist=newpoollist; + memset(&poollist->list[poollist->size], 0, newsize-((size_t)&poollist->list[poollist->size]-(size_t)&poollist->list[0])); + poollist->size=((newsize-((char *)&poollist->list[0]-(char *)poollist))/sizeof(nedpool *))-1; + assert(poollist->size>poollist->length); + } + if(!(ret=(nedpool *) nedpcalloc(0, 1, sizeof(nedpool)))) goto badexit; + if(!InitPool(ret, capacity, threads)) + { + nedpfree(0, ret); + goto badexit; + } + poollist->list[poollist->length++]=ret; +badexit: + RELEASE_LOCK(&poollistlock); + return ret; +} +void neddestroypool(nedpool *p) THROWSPEC +{ + unsigned int n; + ACQUIRE_LOCK(&p->mutex); + DestroyCaches(p); + for(n=0; p->m[n]; n++) + { +#if USE_ALLOCATOR==1 + destroy_mspace(p->m[n]); +#endif + p->m[n]=0; + } + RELEASE_LOCK(&p->mutex); + if(TLSFREE(p->mycache)) abort(); + nedpfree(0, p); + ACQUIRE_LOCK(&poollistlock); + assert(poollist); + for(n=0; nlength && poollist->list[n]!=p; n++); + assert(n!=poollist->length); + memmove(&poollist->list[n], &poollist->list[n+1], (size_t)&poollist->list[poollist->length]-(size_t)&poollist->list[n]); + if(!--poollist->length) + { + assert(!poollist->list[0]); + nedpfree(0, poollist); + poollist=0; + } + RELEASE_LOCK(&poollistlock); +} +void neddestroysyspool() THROWSPEC +{ + nedpool *p=&syspool; + int n; + ACQUIRE_LOCK(&p->mutex); + DestroyCaches(p); + for(n=0; p->m[n]; n++) + { +#if USE_ALLOCATOR==1 + destroy_mspace(p->m[n]); +#endif + p->m[n]=0; + } + /* Render syspool unusable */ + for(n=0; ncaches[n]=(threadcache *)(size_t)(sizeof(size_t)>4 ? 0xdeadbeefdeadbeefULL : 0xdeadbeefUL); + for(n=0; nm[n]=(mstate)(size_t)(sizeof(size_t)>4 ? 0xdeadbeefdeadbeefULL : 0xdeadbeefUL); + if(TLSFREE(p->mycache)) abort(); + RELEASE_LOCK(&p->mutex); +} +nedpool **nedpoollist() THROWSPEC +{ + nedpool **ret=0; + if(poollist) + { + ACQUIRE_LOCK(&poollistlock); + if(!(ret=(nedpool **) nedmalloc((poollist->length+1)*sizeof(nedpool *)))) goto badexit; + memcpy(ret, poollist->list, (poollist->length+1)*sizeof(nedpool *)); +badexit: + RELEASE_LOCK(&poollistlock); + } + return ret; +} + +void nedpsetvalue(nedpool *p, void *v) THROWSPEC +{ + if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } + p->uservalue=v; +} +void *nedgetvalue(nedpool **p, void *mem) THROWSPEC +{ + nedpool *np=0; + mstate fm=nedblkmstate(mem); + if(!fm || !fm->extp) return 0; + np=(nedpool *) fm->extp; + if(p) *p=np; + return np->uservalue; +} + +void nedtrimthreadcache(nedpool *p, int disable) THROWSPEC +{ + int mycache; + if(!p) + { + p=&syspool; + if(!syspool.threads) InitPool(&syspool, 0, -1); + } + mycache=(int)(size_t) TLSGET(p->mycache); + if(!mycache) + { /* Set to mspace 0 */ + if(disable && TLSSET(p->mycache, (void *)(size_t)-1)) abort(); + } + else if(mycache>0) + { /* Set to last used mspace */ + threadcache *tc=p->caches[mycache-1]; +#if defined(DEBUG) + printf("Threadcache utilisation: %lf%% in cache with %lf%% lost to other threads\n", + 100.0*tc->successes/tc->mallocs, 100.0*((double) tc->mallocs-tc->frees)/tc->mallocs); +#endif + if(disable && TLSSET(p->mycache, (void *)(size_t)(-tc->mymspace))) abort(); + tc->frees++; + RemoveCacheEntries(p, tc, 0); + assert(!tc->freeInCache); + if(disable) + { + tc->mymspace=-1; + tc->threadid=0; + CallFree(0, p->caches[mycache-1], 0); + p->caches[mycache-1]=0; + } + } +} +void neddisablethreadcache(nedpool *p) THROWSPEC +{ + nedtrimthreadcache(p, 1); +} + +#define GETMSPACE(m,p,tc,ms,s,action) \ + do \ + { \ + mstate m = GetMSpace((p),(tc),(ms),(s)); \ + action; \ + if(USE_ALLOCATOR==1) { RELEASE_LOCK(&m->mutex); } \ + } while (0) + +static FORCEINLINE mstate GetMSpace(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace, size_t size) THROWSPEC +{ /* Returns a locked and ready for use mspace */ + mstate m=p->m[mymspace]; + assert(m); +#if USE_ALLOCATOR==1 + if(!TRY_LOCK(&p->m[mymspace]->mutex)) m=FindMSpace(p, tc, &mymspace, size); + /*assert(IS_LOCKED(&p->m[mymspace]->mutex));*/ +#endif + return m; +} +static NOINLINE void GetThreadCache_cold1(nedpool *RESTRICT *RESTRICT p) THROWSPEC +{ + *p=&syspool; + if(!syspool.threads) InitPool(&syspool, 0, -1); +} +static NOINLINE void GetThreadCache_cold2(nedpool *RESTRICT *RESTRICT p, threadcache *RESTRICT *RESTRICT tc, int *RESTRICT mymspace, int mycache) THROWSPEC +{ + if(!mycache) + { /* Need to allocate a new cache */ + *tc=AllocCache(*p); + if(!*tc) + { /* Disable */ + if(TLSSET((*p)->mycache, (void *)(size_t)-1)) abort(); + *mymspace=0; + } + else + *mymspace=(*tc)->mymspace; + } + else + { /* Cache disabled, but we do have an assigned thread pool */ + *tc=0; + *mymspace=-mycache-1; + } +} +static FORCEINLINE void GetThreadCache(nedpool *RESTRICT *RESTRICT p, threadcache *RESTRICT *RESTRICT tc, int *RESTRICT mymspace, size_t *RESTRICT size) THROWSPEC +{ + int mycache; + if(size && *sizemycache); + if(mycache>0) + { /* Already have a cache */ + *tc=(*p)->caches[mycache-1]; + *mymspace=(*tc)->mymspace; + } + else GetThreadCache_cold2(p, tc, mymspace, mycache); + assert(*mymspace>=0); + assert(!(*tc) || (long)(size_t)CURRENT_THREAD==(*tc)->threadid); +#ifdef FULLSANITYCHECKS + if(*tc) + { + if(*(unsigned int *)"NEDMALC1"!=(*tc)->magic1 || *(unsigned int *)"NEDMALC2"!=(*tc)->magic2) + { + abort(); + } + } +#endif +} + +NEDMALLOCPTRATTR void * nedpmalloc(nedpool *p, size_t size) THROWSPEC +{ + void *ret=0; + threadcache *tc; + int mymspace; + GetThreadCache(&p, &tc, &mymspace, &size); +#if THREADCACHEMAX + if(tc && size<=THREADCACHEMAX) + { /* Use the thread cache */ + ret=threadcache_malloc(p, tc, &size); + } +#endif + if(!ret) + { /* Use this thread's mspace */ + GETMSPACE(m, p, tc, mymspace, size, + ret=CallMalloc(m, size, 0)); + } + return ret; +} +NEDMALLOCPTRATTR void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC +{ + size_t rsize=size*no; + void *ret=0; + threadcache *tc; + int mymspace; + GetThreadCache(&p, &tc, &mymspace, &rsize); +#if THREADCACHEMAX + if(tc && rsize<=THREADCACHEMAX) + { /* Use the thread cache */ + if((ret=threadcache_malloc(p, tc, &rsize))) + memset(ret, 0, rsize); + } +#endif + if(!ret) + { /* Use this thread's mspace */ + GETMSPACE(m, p, tc, mymspace, rsize, + ret=CallCalloc(m, rsize, 0)); + } + return ret; +} +NEDMALLOCPTRATTR void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC +{ + void *ret=0; + threadcache *tc; + int mymspace, isforeign=1; + size_t memsize; + if(!mem) return nedpmalloc(p, size); + memsize=nedblksize(&isforeign, mem); + assert(memsize); + if(!memsize) + { + fprintf(stderr, "nedmalloc: nedprealloc() called with a block not created by nedmalloc!\n"); + abort(); + } + else if(size<=memsize && memsize-size< +#ifdef DEBUG + 32 +#else + 1024 +#endif + ) /* If realloc size is within 1Kb smaller than existing, noop it */ + return mem; + GetThreadCache(&p, &tc, &mymspace, &size); +#if THREADCACHEMAX + if(tc && size && size<=THREADCACHEMAX) + { /* Use the thread cache */ + if((ret=threadcache_malloc(p, tc, &size))) + { + memcpy(ret, mem, memsize=sizeof(threadcacheblk) && memsize<=(THREADCACHEMAX+CHUNK_OVERHEAD)) + threadcache_free(p, tc, mymspace, mem, memsize); + else + CallFree(0, mem, isforeign); + } + } +#endif + if(!ret) + { /* Reallocs always happen in the mspace they happened in, so skip + locking the preferred mspace for this thread */ + ret=CallRealloc(p->m[mymspace], mem, isforeign, memsize, size); + } + return ret; +} +void nedpfree(nedpool *p, void *mem) THROWSPEC +{ /* Frees always happen in the mspace they happened in, so skip + locking the preferred mspace for this thread */ + threadcache *tc; + int mymspace, isforeign=1; + size_t memsize; + if(!mem) + { /* If you tried this on FreeBSD you'd be sorry! */ +#ifdef DEBUG + fprintf(stderr, "nedmalloc: WARNING nedpfree() called with zero. This is not portable behaviour!\n"); +#endif + return; + } + memsize=nedblksize(&isforeign, mem); + assert(memsize); + if(!memsize) + { + fprintf(stderr, "nedmalloc: nedpfree() called with a block not created by nedmalloc!\n"); + abort(); + } + GetThreadCache(&p, &tc, &mymspace, 0); +#if THREADCACHEMAX + if(mem && tc && memsize>=sizeof(threadcacheblk) && memsize<=(THREADCACHEMAX+CHUNK_OVERHEAD)) + threadcache_free(p, tc, mymspace, mem, memsize); + else +#endif + CallFree(0, mem, isforeign); +} +NEDMALLOCPTRATTR void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC +{ + void *ret; + threadcache *tc; + int mymspace; + GetThreadCache(&p, &tc, &mymspace, &bytes); + { /* Use this thread's mspace */ + GETMSPACE(m, p, tc, mymspace, bytes, + ret=CallMalloc(m, bytes, alignment)); + } + return ret; +} +struct nedmallinfo nedpmallinfo(nedpool *p) THROWSPEC +{ + int n; + struct nedmallinfo ret={0}; + if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } + for(n=0; p->m[n]; n++) + { +#if USE_ALLOCATOR==1 && !NO_MALLINFO + struct mallinfo t=mspace_mallinfo(p->m[n]); + ret.arena+=t.arena; + ret.ordblks+=t.ordblks; + ret.hblkhd+=t.hblkhd; + ret.usmblks+=t.usmblks; + ret.uordblks+=t.uordblks; + ret.fordblks+=t.fordblks; + ret.keepcost+=t.keepcost; +#endif + } + return ret; +} +int nedpmallopt(nedpool *p, int parno, int value) THROWSPEC +{ +#if USE_ALLOCATOR==1 + return mspace_mallopt(parno, value); +#else + return 0; +#endif +} +NEDMALLOCNOALIASATTR void* nedmalloc_internals(size_t *granularity, size_t *magic) THROWSPEC +{ +#if USE_ALLOCATOR==1 + if(granularity) *granularity=mparams.granularity; + if(magic) *magic=mparams.magic; + return (void *) &syspool; +#else + if(granularity) *granularity=0; + if(magic) *magic=0; + return 0; +#endif +} +int nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC +{ + int n, ret=0; + if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } + for(n=0; p->m[n]; n++) + { +#if USE_ALLOCATOR==1 + ret+=mspace_trim(p->m[n], pad); +#endif + } + return ret; +} +void nedpmalloc_stats(nedpool *p) THROWSPEC +{ + int n; + if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } + for(n=0; p->m[n]; n++) + { +#if USE_ALLOCATOR==1 + mspace_malloc_stats(p->m[n]); +#endif + } +} +size_t nedpmalloc_footprint(nedpool *p) THROWSPEC +{ + size_t ret=0; + int n; + if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } + for(n=0; p->m[n]; n++) + { +#if USE_ALLOCATOR==1 + ret+=mspace_footprint(p->m[n]); +#endif + } + return ret; +} +NEDMALLOCPTRATTR void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC +{ + void **ret; + threadcache *tc; + int mymspace; + GetThreadCache(&p, &tc, &mymspace, &elemsize); +#if USE_ALLOCATOR==0 + GETMSPACE(m, p, tc, mymspace, elemsno*elemsize, + ret=unsupported_operation("independent_calloc")); +#elif USE_ALLOCATOR==1 + GETMSPACE(m, p, tc, mymspace, elemsno*elemsize, + ret=mspace_independent_calloc(m, elemsno, elemsize, chunks)); +#endif + return ret; +} +NEDMALLOCPTRATTR void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC +{ + void **ret; + threadcache *tc; + int mymspace; + size_t i, *adjustedsizes=(size_t *) alloca(elems*sizeof(size_t)); + if(!adjustedsizes) return 0; + for(i=0; i