11 #ifndef TBCI_MALLOC_CACHE_H 12 #define TBCI_MALLOC_CACHE_H 14 #define MALLOC_CACHE 1 16 #ifndef HAVE_NO_NEW_HEADERS_BUG 22 #ifdef TBCI_MALLOC_STATS 26 #ifdef HAVE_MALLOC_ATTR 27 # define MALLOC __attribute__((malloc)) 32 #ifdef HAVE_MALLOC_H // memalign 36 #if defined(SMP) && !defined(TBCI_MALLOC_NOLOCK) 37 # define TBCI_MALLOC_LOCK 1 38 # include "tbci/smp.h" 41 #if defined(__GNUC__) && defined(SMP) && defined(TBCI_MALLOC_NOLOCK) 42 # warning "SMP support but no locking in memalloc_cache. Don't do memory\ 43 de/allocations from threads; otherwise you face corruption!" 49 # define DEBUG_PRINTF1(arg1) CSTD__ fprintf(stderr, arg1) 50 # define DEBUG_PRINTF2(a1,a2) CSTD__ fprintf(stderr, a1, a2) 51 # define DEBUG_PRINTF3(a1,a2,a3) CSTD__ fprintf(stderr, a1, a2, a3) 52 # define DEBUG_PRINTF4(a1,a2,a3,a4) CSTD__ fprintf(stderr, a1, a2, a3, a4) 54 # define DEBUG_PRINTF1(arg1) do {} while (0) 55 # define DEBUG_PRINTF2(a1,a2) do {} while (0) 56 # define DEBUG_PRINTF3(a1,a2,a3) do {} while (0) 57 # define DEBUG_PRINTF4(a1,a2,a3,a4) do {} while (0) 61 #ifndef TBCI_MALLOC_POOLSZ 62 # define TBCI_MALLOC_POOLSZ 8 64 #ifndef TBCI_MALLOC_LIMIT 65 # define TBCI_MALLOC_LIMIT 8192 68 #if !defined(__GNUC__) && !defined(__BORLANDC__) 69 # define DUMMY1 const T* const dummy 70 # define DUMMY2(TYPE) const TYPE * const dummy 71 # define DUMMY3(t) (t*)0 73 #else // GCC || Borland 80 # define MINLINE inline 84 #if defined(__INTEL_COMPILER) || defined(_MSC_VER) || defined(__BORLANDC__) || (defined(_SGI_SOURCE) && defined(_COMPILER_VERSION) && !defined(__GNUC__)) 86 # define NO_SINGLETON_PUBLIC public: 89 # define NO_SINGLETON_PUBLIC 100 template <
typename T>
110 {
return new T[sz]; }
111 static void dealloc(
const T*
const ptr,
const unsigned long sz)
112 {
delete [] (
T*)ptr; }
116 #ifndef TBCI_SIMD_ALIGN 118 # define TBCI_SIMD_ALIGN 64 119 # elif defined(__AVX__) 120 # define TBCI_SIMD_ALIGN 32 122 # define TBCI_SIMD_ALIGN 16 125 #ifndef TBCI_MALLOC_ALIGN_FACT 126 # define TBCI_MALLOC_ALIGN_FACT 1 129 #if !defined(__SSE2__) || defined(__x86_64__) || !defined(HAVE_MALLOC_H) 130 # define _MC_NEW(T,SZ) new T[SZ] 131 # define _MC_DELETE(T,PTR) delete[] (T*)PTR 133 # define SSE_VARS_MAY_BE_UNALIGNED 136 # define _MC_NEW(T,SZ) (T*)memalign(TBCI_SIMD_ALIGN*TBCI_MALLOC_ALIGN_FACT, SZ*sizeof(T)) 137 # define _MC_DELETE(T,PTR) free((T*)PTR) 140 #if defined(__x86_64__) || defined(__i386__) 141 # define INC(x) asm volatile ("lock incl %0" : "=m" (x): "m" (x)); 174 #if defined(HAVE_TLS) && !defined(TBCI_MALLOC_FORCESPINLOCK) 175 # define TBCI_MALLOC_NOSPINLOCK 178 #if !defined(TBCI_MALLOC_NOSPINLOCK) && defined(HAVE_PTHREAD_SPINLOCK) && defined(TBCI_MALLOC_LOCK) 180 #define TBCI_MALLOC_SPINLOCK 183 #if defined(__i386__) || defined(__x86_64__) 184 # define _cpu_relax() asm ("rep; nop") 186 # define _cpu_relax() 189 #ifndef TBCI_MAXLOCKTRIES 190 # define TBCI_MAXLOCKTRIES 4 193 #ifdef TBCI_MALLOC_STATS 194 static volatile unsigned long locks_taken = 0;
195 static volatile unsigned long locks_tried = 0;
196 static volatile unsigned long locks_direct = 0;
197 static volatile unsigned long locks_yield = 0;
206 static inline void _spin_lock(pthread_spinlock_t* lock)
208 unsigned int tries = 0;
210 while (0 != (
err = pthread_spin_trylock(lock))) {
213 fprintf(stderr,
"LOCKING ERROR: %s\n", strerror(
err));
217 if (0 == ++tries % TBCI_MAXLOCKTRIES) {
218 #ifdef TBCI_MALLOC_STATS 225 #ifdef TBCI_MALLOC_STATS 227 locks_tried += tries;
233 static inline void _spin_unlock(pthread_spinlock_t* lock)
235 pthread_spin_unlock(lock);
238 # define SMP_LOCK _spin_lock(&this->lock) 239 # define SMP_UNLOCK _spin_unlock(&this->lock) 247 template <
typename T>
257 template <
typename T>
278 template <
typename T>
282 #ifdef TBCI_MALLOC_SPINLOCK 283 pthread_spinlock_t lock;
290 void enter(
const T*
const ptr,
const unsigned ln);
292 void rmv(
const unsigned idx);
295 void dealloc(
const T*
const,
const unsigned long);
308 free(new_malloc_tls);
314 void deinit(
const int thr);
323 #ifdef TBCI_MALLOC_SPINLOCK 324 pthread_spin_init(&lock, 0);
333 for (
int t = 1; t <=
nthr; ++t)
339 #ifdef TBCI_MALLOC_STATS 347 for (
int t =
nthr; t > 0; --t) {
348 #ifdef TBCI_MALLOC_STATS 357 #ifdef TBCI_MALLOC_STATS 360 CSTD__ fprintf(stderr,
"malloc_cache<%s>: %i hits, %i misses, %i searches, %i allocs\n",
362 #ifdef TBCI_MALLOC_SPINLOCK 363 CSTD__ fprintf(stderr,
"malloc_cache<%s>: %lu locks, %lu tries, %lu direct, %lu yields\n",
364 typeid(
T).name(), locks_taken, locks_tried, locks_direct, locks_yield);
384 (cbackfn*) &
_deinit, (
void*)
this);
391 (cbackfn*) &
_deinit, (
void*)
this);
401 template <
typename T>
410 const unsigned fidx = pool->
free_idx;
415 #ifdef TBCI_MALLOC_STATS 420 #ifdef TBCI_MALLOC_STATS 425 #ifdef TBCI_MALLOC_STATS 432 template <
typename T>
436 const unsigned fidx = pool->
free_idx;
443 template <
typename T>
451 template <
typename T>
456 const int idx = find_by_sz(sz);
466 template <
typename T>
470 #ifdef TBCI_MALLOC_STATS 473 #if defined(TBCI_MALLOC_LOCK) && !defined(TBCI_MALLOC_SPINLOCK) 481 const T* ptr = find_and_rmv(sz);
489 template <
typename T>
496 const unsigned fidx = pool->
free_idx;
507 template <
typename T>
510 BCHK (!ptr||!sz,
NumErr, dealloc null ptr or 0 bytes, (
long)ptr, );
513 CSTD__ memset((
void*)ptr, 0xa5, sz*
sizeof(
T));
515 #if defined(TBCI_MALLOC_LOCK) && !defined(TBCI_MALLOC_SPINLOCK) 524 free_and_enter(ptr, sz);
527 template <
typename T>
542 #ifdef TBCI_MALLOC_SPINLOCK 543 pthread_spin_destroy(&lock);
549 #define SPECIALIZE_MEMALLOC_CLASS(TYPE) \ 551 class tbci_memalloc<TYPE > { \ 554 tbci_memalloc_cache<T > m_cache; \ 555 NO_SINGLETON_PUBLIC \ 557 ~tbci_memalloc() {} \ 559 friend tbci_memalloc<T >& tbci_s_allocator FGD2 (DUMMY1); \ 560 T* alloc(const unsigned long sz) { return m_cache.alloc(sz); } \ 561 void dealloc(const T* const ptr, const unsigned long sz) \ 562 { m_cache.dealloc(ptr,sz); } \ 566 template <
typename T> \
570 return _tbci_s_alloc_ANON;
573 #if defined(__MINGW32__) 574 #define SPECIALIZE_MEMALLOC(TYPE) 575 #define SPECIALIZE_MEMALLOC2(TYPE,SHTP) 578 #define SPECIALIZE_MEMALLOC(TYPE) \ 579 SPECIALIZE_MEMALLOC_CLASS(TYPE) \ 581 MINLINE tbci_memalloc<TYPE >& tbci_s_allocator<TYPE > (DUMMY2(TYPE)) \ 583 static tbci_memalloc<TYPE > _tbci_s_alloc_##TYPE; \ 584 return _tbci_s_alloc_##TYPE; \ 587 #define SPECIALIZE_MEMALLOC2(TYPE,SHTP) \ 588 SPECIALIZE_MEMALLOC_CLASS(TYPE) \ 590 MINLINE tbci_memalloc<TYPE >& tbci_s_allocator<TYPE > (DUMMY2(TYPE)) \ 592 static tbci_memalloc<TYPE > _tbci_s_alloc_##SHTP; \ 593 return _tbci_s_alloc_##SHTP; \ 599 #if 0 // defined(TBCI_SELECTIVE_INST) && !defined(TBCI_INSTANTIATE) && !defined(AUTO_DECL) 600 # include "malloc_cache_gd.h" 631 #endif // TBCI_SELECTIVE_INST 633 #define NEW(t, s) tbci_s_allocator<t>(DUMMY3(t)).alloc(s) 634 #define TBCIDELETE(t, v, sz) tbci_s_allocator<t>(DUMMY3(t)).dealloc(v, sz) 635 #define TBCIDELETE_RO(t, v, sz) tbci_s_allocator<t>(DUMMY3(t)).dealloc(v, sz) 636 #define REALLOC(v, os, t, s) do { \ 637 t *ptr = NEW(t, s); \ 639 TBCICOPY(ptr, v, t, MIN(os, s));\ 640 TBCIDELETE(t, v, os); \ #define DEBUG_PRINTF3(a1, a2, a3)
volatile unsigned int srch
#define SPECIALIZE_MEMALLOC(TYPE)
void thread_dereg_callback(cbackfn ctor, cbackfn dtor, void *parm)
volatile unsigned int hit
void enter(const T *const ptr, const unsigned ln)
Enter into list of FREE slots.
volatile unsigned int miss
void rmv(const unsigned idx)
Remove from list of FREE slots.
static void _init(tbci_memalloc_cache< T > *th, const int thr)
static void _deinit(tbci_memalloc_cache< T > *th, const int thr)
volatile unsigned int srch
void free_and_enter(const T *const ptr, const unsigned ln)
exception base class for the TBCI NumLib
tbci_memalloc< T > & tbci_s_allocator()
volatile unsigned int all
#define DEBUG_PRINTF4(a1, a2, a3, a4)
void deinit(const int thr)
#define BCHK(cond, exc, txt, ind, rtval)
#define TBCI_MALLOC_LIMIT
NAMESPACE_TBCI int num_threads
volatile unsigned int miss
#define SPECIALIZE_MEMALLOC2(TYPE, SHTP)
For specializations of the memory allocator:
#define DEBUG_PRINTF1(arg1)
unsigned short free_sz[8]
#define _MC_DELETE(T, PTR)
const T * find_and_rmv(const unsigned sz)
#define SMP_LOCK
SMP locking: The TBCI worker threads don't do memory allocations so the memalloc_cache code does not ...
tbci_memalloc_cache_tls< T > * malloc_tls
#define TBCI_MALLOC_POOLSZ
struct tbci_memalloc_cache ALIGN
void thread_reg_callback(cbackfn ctor, cbackfn dtor, void *parm)
void SWAP(T &a, T &b)
SWAP function Note: We could implement a swap function without temporaries: a -= b b += a a -= b a = ...
static void dealloc(const T *const ptr, const unsigned long sz)
void dealloc(const T *const, const unsigned long)
int find_by_sz(const unsigned ln) const
Find free slot with right len, starting point for search is free_idx.
#define DEBUG_PRINTF2(a1, a2)
THREAD__ int ismainthread
volatile unsigned int hit
#define NO_SINGLETON_PUBLIC
static T * alloc(const unsigned long sz)
const Vector< T > const Vector< T > const Vector< T > int T T & err
volatile unsigned int all
T * alloc(const unsigned long)
#define LIKELY(expr)
branch prediction note that we sometimes on purpose mark the unlikely possibility likely and vice ver...