11 #ifndef TBCI_MALLOC_CACHE_H
12 #define TBCI_MALLOC_CACHE_H
14 #define MALLOC_CACHE 1
16 #ifndef HAVE_NO_NEW_HEADERS_BUG
22 #ifdef TBCI_MALLOC_STATS
26 #ifdef HAVE_MALLOC_ATTR
27 # define MALLOC __attribute__((malloc))
32 #ifdef HAVE_MALLOC_H // memalign
36 #if defined(SMP) && !defined(TBCI_MALLOC_NOLOCK)
37 # define TBCI_MALLOC_LOCK 1
38 # include "tbci/smp.h"
41 #if defined(__GNUC__) && defined(SMP) && defined(TBCI_MALLOC_NOLOCK)
42 # warning "SMP support but no locking in memalloc_cache. Don't do memory\
43 de/allocations from threads; otherwise you face corruption!"
49 # define DEBUG_PRINTF1(arg1) CSTD__ fprintf(stderr, arg1)
50 # define DEBUG_PRINTF2(a1,a2) CSTD__ fprintf(stderr, a1, a2)
51 # define DEBUG_PRINTF3(a1,a2,a3) CSTD__ fprintf(stderr, a1, a2, a3)
52 # define DEBUG_PRINTF4(a1,a2,a3,a4) CSTD__ fprintf(stderr, a1, a2, a3, a4)
54 # define DEBUG_PRINTF1(arg1) do {} while (0)
55 # define DEBUG_PRINTF2(a1,a2) do {} while (0)
56 # define DEBUG_PRINTF3(a1,a2,a3) do {} while (0)
57 # define DEBUG_PRINTF4(a1,a2,a3,a4) do {} while (0)
61 #ifndef TBCI_MALLOC_POOLSZ
62 # define TBCI_MALLOC_POOLSZ 8
64 #ifndef TBCI_MALLOC_LIMIT
65 # define TBCI_MALLOC_LIMIT 8192
68 #if !defined(__GNUC__) && !defined(__BORLANDC__)
69 # define DUMMY1 const T* const dummy
70 # define DUMMY2(TYPE) const TYPE * const dummy
71 # define DUMMY3(t) (t*)0
73 #else // GCC || Borland
80 # define MINLINE inline
84 #if defined(__INTEL_COMPILER) || defined(_MSC_VER) || defined(__BORLANDC__) || (defined(_SGI_SOURCE) && defined(_COMPILER_VERSION) && !defined(__GNUC__))
86 # define NO_SINGLETON_PUBLIC public:
89 # define NO_SINGLETON_PUBLIC
100 template <
typename T>
110 {
return new T[sz]; }
111 static void dealloc(
const T*
const ptr,
const unsigned long sz)
112 {
delete [] (
T*)ptr; }
116 #ifndef TBCI_SIMD_ALIGN
118 # define TBCI_SIMD_ALIGN 64
119 # elif defined(__AVX__)
120 # define TBCI_SIMD_ALIGN 32
122 # define TBCI_SIMD_ALIGN 16
125 #ifndef TBCI_MALLOC_ALIGN_FACT
126 # define TBCI_MALLOC_ALIGN_FACT 1
129 #if !defined(__SSE2__) || defined(__x86_64__) || !defined(HAVE_MALLOC_H)
130 # define _MC_NEW(T,SZ) new T[SZ]
131 # define _MC_DELETE(T,PTR) delete[] (T*)PTR
133 # define SSE_VARS_MAY_BE_UNALIGNED
136 # define _MC_NEW(T,SZ) (T*)memalign(TBCI_SIMD_ALIGN*TBCI_MALLOC_ALIGN_FACT, SZ*sizeof(T))
137 # define _MC_DELETE(T,PTR) free((T*)PTR)
140 #if defined(__x86_64__) || defined(__i386__)
141 # define INC(x) asm volatile ("lock incl %0" : "=m" (x): "m" (x));
174 #if defined(HAVE_TLS) && !defined(TBCI_MALLOC_FORCESPINLOCK)
175 # define TBCI_MALLOC_NOSPINLOCK
178 #if !defined(TBCI_MALLOC_NOSPINLOCK) && defined(HAVE_PTHREAD_SPINLOCK) && defined(TBCI_MALLOC_LOCK)
180 #define TBCI_MALLOC_SPINLOCK
183 #if defined(__i386__) || defined(__x86_64__)
184 # define _cpu_relax() asm ("rep; nop")
186 # define _cpu_relax()
189 #ifndef TBCI_MAXLOCKTRIES
190 # define TBCI_MAXLOCKTRIES 4
193 #ifdef TBCI_MALLOC_STATS
194 static volatile unsigned long locks_taken = 0;
195 static volatile unsigned long locks_tried = 0;
196 static volatile unsigned long locks_direct = 0;
197 static volatile unsigned long locks_yield = 0;
206 static inline void _spin_lock(pthread_spinlock_t* lock)
208 unsigned int tries = 0;
210 while (0 != (err = pthread_spin_trylock(lock))) {
213 fprintf(stderr,
"LOCKING ERROR: %s\n", strerror(err));
217 if (0 == ++tries % TBCI_MAXLOCKTRIES) {
218 #ifdef TBCI_MALLOC_STATS
225 #ifdef TBCI_MALLOC_STATS
227 locks_tried += tries;
233 static inline void _spin_unlock(pthread_spinlock_t* lock)
235 pthread_spin_unlock(lock);
238 # define SMP_LOCK _spin_lock(&this->lock)
239 # define SMP_UNLOCK _spin_unlock(&this->lock)
247 template <
typename T>
257 template <
typename T>
278 template <
typename T>
282 #ifdef TBCI_MALLOC_SPINLOCK
283 pthread_spinlock_t lock;
290 void enter(
const T*
const ptr,
const unsigned ln);
292 void rmv(
const unsigned idx);
295 void dealloc(
const T*
const,
const unsigned long);
308 free(new_malloc_tls);
314 void deinit(
const int thr);
323 #ifdef TBCI_MALLOC_SPINLOCK
324 pthread_spin_init(&lock, 0);
333 for (
int t = 1; t <=
nthr; ++t)
339 #ifdef TBCI_MALLOC_STATS
341 hit = srch = all = miss = 0;
347 for (
int t =
nthr; t > 0; --t) {
348 #ifdef TBCI_MALLOC_STATS
357 #ifdef TBCI_MALLOC_STATS
360 CSTD__ fprintf(stderr,
"malloc_cache<%s>: %i hits, %i misses, %i searches, %i allocs\n",
361 typeid(
T).name(), hit, miss, srch, all);
362 #ifdef TBCI_MALLOC_SPINLOCK
363 CSTD__ fprintf(stderr,
"malloc_cache<%s>: %lu locks, %lu tries, %lu direct, %lu yields\n",
364 typeid(
T).name(), locks_taken, locks_tried, locks_direct, locks_yield);
384 (cbackfn*) &
_deinit, (
void*)
this);
391 (cbackfn*) &
_deinit, (
void*)
this);
401 template <
typename T>
410 const unsigned fidx = pool->
free_idx;
415 #ifdef TBCI_MALLOC_STATS
420 #ifdef TBCI_MALLOC_STATS
425 #ifdef TBCI_MALLOC_STATS
432 template <
typename T>
436 const unsigned fidx = pool->
free_idx;
443 template <
typename T>
451 template <
typename T>
456 const int idx = find_by_sz(sz);
459 ptr = malloc_tls[
thrno].free_pt[idx];
466 template <
typename T>
470 #ifdef TBCI_MALLOC_STATS
471 ++malloc_tls[
thrno].all;
473 #if defined(TBCI_MALLOC_LOCK) && !defined(TBCI_MALLOC_SPINLOCK)
481 const T* ptr = find_and_rmv(sz);
489 template <
typename T>
496 const unsigned fidx = pool->
free_idx;
507 template <
typename T>
510 BCHK (!ptr||!sz,
NumErr, dealloc null ptr or 0 bytes, (
long)ptr, );
513 CSTD__ memset((
void*)ptr, 0xa5, sz*
sizeof(
T));
515 #if defined(TBCI_MALLOC_LOCK) && !defined(TBCI_MALLOC_SPINLOCK)
524 free_and_enter(ptr, sz);
527 template <
typename T>
542 #ifdef TBCI_MALLOC_SPINLOCK
543 pthread_spin_destroy(&lock);
549 #define SPECIALIZE_MEMALLOC_CLASS(TYPE) \
551 class tbci_memalloc<TYPE > { \
554 tbci_memalloc_cache<T > m_cache; \
555 NO_SINGLETON_PUBLIC \
557 ~tbci_memalloc() {} \
559 friend tbci_memalloc<T >& tbci_s_allocator FGD2 (DUMMY1); \
560 T* alloc(const unsigned long sz) { return m_cache.alloc(sz); } \
561 void dealloc(const T* const ptr, const unsigned long sz) \
562 { m_cache.dealloc(ptr,sz); } \
566 template <
typename T>
\
570 return _tbci_s_alloc_ANON;
573 #if defined(__MINGW32__)
574 #define SPECIALIZE_MEMALLOC(TYPE)
575 #define SPECIALIZE_MEMALLOC2(TYPE,SHTP)
578 #define SPECIALIZE_MEMALLOC(TYPE) \
579 SPECIALIZE_MEMALLOC_CLASS(TYPE) \
581 MINLINE tbci_memalloc<TYPE >& tbci_s_allocator<TYPE > (DUMMY2(TYPE)) \
583 static tbci_memalloc<TYPE > _tbci_s_alloc_##TYPE; \
584 return _tbci_s_alloc_##TYPE; \
587 #define SPECIALIZE_MEMALLOC2(TYPE,SHTP) \
588 SPECIALIZE_MEMALLOC_CLASS(TYPE) \
590 MINLINE tbci_memalloc<TYPE >& tbci_s_allocator<TYPE > (DUMMY2(TYPE)) \
592 static tbci_memalloc<TYPE > _tbci_s_alloc_##SHTP; \
593 return _tbci_s_alloc_##SHTP; \
599 #if 0 // defined(TBCI_SELECTIVE_INST) && !defined(TBCI_INSTANTIATE) && !defined(AUTO_DECL)
600 # include "malloc_cache_gd.h"
631 #endif // TBCI_SELECTIVE_INST
633 #define NEW(t, s) tbci_s_allocator<t>(DUMMY3(t)).alloc(s)
634 #define TBCIDELETE(t, v, sz) tbci_s_allocator<t>(DUMMY3(t)).dealloc(v, sz)
635 #define TBCIDELETE_RO(t, v, sz) tbci_s_allocator<t>(DUMMY3(t)).dealloc(v, sz)
636 #define REALLOC(v, os, t, s) do { \
637 t *ptr = NEW(t, s); \
639 TBCICOPY(ptr, v, t, MIN(os, s));\
640 TBCIDELETE(t, v, os); \
#define DEBUG_PRINTF3(a1, a2, a3)
volatile unsigned int srch
#define SPECIALIZE_MEMALLOC(TYPE)
void thread_dereg_callback(cbackfn ctor, cbackfn dtor, void *parm)
volatile unsigned int hit
void enter(const T *const ptr, const unsigned ln)
Enter into list of FREE slots.
volatile unsigned int miss
int find_by_sz(const unsigned ln) const
Find free slot with right len, starting point for search is free_idx.
void rmv(const unsigned idx)
Remove from list of FREE slots.
static void _init(tbci_memalloc_cache< T > *th, const int thr)
static void _deinit(tbci_memalloc_cache< T > *th, const int thr)
volatile unsigned int srch
void free_and_enter(const T *const ptr, const unsigned ln)
exception base class for the TBCI NumLib
tbci_memalloc< T > & tbci_s_allocator()
volatile unsigned int all
#define DEBUG_PRINTF4(a1, a2, a3, a4)
thread_reg_callback((cbackfn *)&_init,(cbackfn *)&_deinit,(void *) this)
void deinit(const int thr)
#define BCHK(cond, exc, txt, ind, rtval)
#define TBCI_MALLOC_LIMIT
NAMESPACE_TBCI int num_threads
volatile unsigned int miss
#define SPECIALIZE_MEMALLOC2(TYPE, SHTP)
For specializations of the memory allocator:
#define DEBUG_PRINTF1(arg1)
unsigned short free_sz[8]
#define _MC_DELETE(T, PTR)
const T * find_and_rmv(const unsigned sz)
#define SMP_LOCK
SMP locking: The TBCI worker threads don't do memory allocations so the memalloc_cache code does not ...
tbci_memalloc_cache_tls< T > * malloc_tls
#define TBCI_MALLOC_POOLSZ
void SWAP(T &a, T &b)
SWAP function Note: We could implement a swap function without temporaries: a -= b b += a a -= b a = ...
const Vector< T > const Vector< T > const Vector< T > int T T & err
static void dealloc(const T *const ptr, const unsigned long sz)
void dealloc(const T *const, const unsigned long)
#define DEBUG_PRINTF2(a1, a2)
THREAD__ int ismainthread
volatile unsigned int hit
#define NO_SINGLETON_PUBLIC
static T * alloc(const unsigned long sz)
volatile unsigned int all
T * alloc(const unsigned long)
#define LIKELY(expr)
branch prediction note that we sometimes on purpose mark the unlikely possibility likely and vice ver...