TBCI Numerical high perf. C++ Library 2.8.0
smp.h
Go to the documentation of this file.
11
51
53#include "tbci/basics.h"
54
55#ifndef TBCI_SMP_H
56#define TBCI_SMP_H
57
58#ifdef NO_SMP
59# undef SMP
60#endif
61
62// libc5 hack
63//#define _POSIX_THREAD_PRIO_INHERIT
64
65// Check for _POSIX_THREADS, give up otherwise
66#if defined(SMP) && defined(HAVE_PTHREADS)
67
68//#warning SMP
69
70#include <signal.h>
71#ifndef HAVE_NO_NEW_HEADERS_BUG
72# include <cstdlib>
73# include <cstdarg>
74#else
75# include <stdlib.h>
76# include <stdarg.h>
77#endif
78#include <sys/types.h>
79#include <sys/mman.h>
80#include <limits>
81#ifndef PAGESIZE
82# define PAGESIZE EXEC_PAGESIZE
83#endif
84//#include <unistd.h>
85//#define __need_sig_atomic_t
86#define __need_sigset_t
87#ifdef BUGGY_PTHREAD
88# include <sigset.h>
89 typedef __sigset_t sigset_t;
90#endif
91// We need pthread spinlocks (XOPEN2K/XPG6 stuff)
92#undef _POSIX_C_SOURCE
93#define _POSIX_C_SOURCE 200112L
94#include <pthread.h>
95
96#ifndef CACHELINE_SZ
97# ifdef __WORDSIZE
98# define CACHELINE_SZ __WORDSIZE
99# else
100# warning Setting cache line size manually to 32
101# define CACHELINE_SZ 32
102# endif
103#endif
104
106
107#ifdef __GNUC__
108# define smp_barrier() asm ("": : :"memory")
109#else
110// Oops! No barrier?
111# define smp_barrier()
112#endif
113
114extern pid_t main_thread_pid;
115#define MAIN_PID (main_thread_pid)
116extern bool bound_main;
117#endif
118
119#if defined(NEED_SMP_DECLS) || defined(SMP)
120#ifdef HAVE_PTHREADS
121#include <pthread.h>
122#else
123#error NEED pthread.h if NEED_SMP_DECLS is set
124#endif
125
126typedef void (*thr_job_t) (struct thr_ctrl*);
127typedef void* (*useful_job_t) (void*);
128
129#define THREAD_MAX_ARGS 6
130#define THREAD_MAX_RES_LN 16
131
132struct job_input {
133 unsigned long t_job_no;
134 thr_job_t t_job; /* Function pointer */
135 unsigned long t_size; /* size */
136 unsigned long t_off; /* offset */
137 void* t_par[THREAD_MAX_ARGS]; /* Space for 6 more pointer args */
138};
139// 80 bytes on 64bit, 40 on 32bit
140
142 unsigned long t_job_output_no;
143 long t_retval; /* return value */
144 union {
145 volatile char t_res_dummy[16]; /* 16 byte space for results */
147 double t_res_d;
150 };
151};
152// 32bytes on 64bit, 24 on 32bit
153
158 int t_no; /* thread_no */
159 pid_t t_pid; /* thread_pid */
160 pthread_t t_id; /* thread_id */
162 unsigned int t_done_var;
164} ALIGN(64);
165
166/* For compatibility; this is allocated on the local stack when starting
167 * a thread; input data is copied into it */
168struct thr_ctrl {
169 unsigned long t_job_no;
170 thr_job_t t_job; /* Function pointer */
171 unsigned long t_size; /* size */
172 unsigned long t_off; /* offset */
173 void* t_par[THREAD_MAX_ARGS]; /* Space for 6 more pointer args */
174#if 1
175 union {
176 volatile char t_res_dummy[16]; /* 16 byte space for results */
178 double t_res_d;
181 };
182#endif
183 int t_no;
185#endif
186
187#if defined(SMP) && defined(HAVE_PTHREADS)
188//extern thr_ctrl master_thr;
189extern int num_threads;
190extern int threads_busy;
191extern int numa_avail;
192extern struct thr_struct *threads;
193extern THREAD__ int ismainthread;
194extern THREAD__ int thrno;
195extern THREAD__ struct thr_struct *this_thread;
196extern unsigned int curr_n_thr, last_n_thr, prev_n_thr;
197#ifdef HAVE_LIBNUMA
198extern unsigned page_size;
199#endif
200
211int init_threads (const int thr = 0, const bool load = false);
213void free_threads ();
215inline int threads_avail (const int wanted = 0)
216{
217 if (threads_busy)
218 return 0; /* avoid deadlock */
219 if (wanted > num_threads)
220 return num_threads;
221 else
222 return wanted;
223};
224
226void disable_threads ();
228void reenable_threads ();
243void bind_threads (bool bind_main = true, bool enable_numa = true, bool add_sibl = false);
244
245//
249void thread_start (const int thr_no, thr_job_t job, const unsigned long sz, ...);
250void thread_start_off (const int thr_no , thr_job_t job,
251 const unsigned long offset, const unsigned long sz, ...);
252void thread_wait (const int, job_output* out = 0);
253/* wait for a thread to finish and return the double t_res */
254double thread_wait_result (const int);
255
256#ifndef SLICE_ALIGN
257# define SLICE_ALIGN CACHELINE_SZ
258#endif
259#ifndef SLICE_DEF_ALIGN
260# define SLICE_DEF_ALIGN 8
261#endif
262/* Helper to slice things into cache aligned tiles */
263/* TODO: On NUMA systems, we should better return page size aligned tiles */
264template <typename T>
265static inline unsigned long slice_offset(int thr, int no_thr, unsigned long dim, T* ptr)
266{
267#ifdef HAVE_LIBNUMA
268 const unsigned int align = numa_avail? page_size: SLICE_ALIGN;
269#else
270 const unsigned int align = SLICE_ALIGN;
271#endif
272 if (thr == no_thr)
273 return dim;
274 else if (thr == 0)
275 return 0;
276 BCHK(thr < 0 || thr > no_thr, NumErr, Illegal thread in slice_offset, thr, 0);
277 const unsigned step = dim/no_thr;
278 unsigned long offs = thr*dim/no_thr;
279 /* If we don't get a valid pointer, don't try to optimize for cache lines */
280 if (!ptr || 2*sizeof(T) > align) {
281 offs -= 1;
282 return offs + (SLICE_DEF_ALIGN - offs%SLICE_DEF_ALIGN);
283 }
284 offs += MIN((unsigned int)(align/sizeof(T)), step-1);
285 unsigned long misalign = (unsigned long)(ptr+offs)%align / sizeof(T);
286 //fprintf(stderr, "Slice %i (base %p): %li(%li) (@%p)\n",
287 // thr, ptr, offs-misalign, offs, ptr+(offs-misalign));
288 return offs - misalign;
289}
290
291
292inline void update_n_thr(const unsigned int n_thr)
293{
295 curr_n_thr = n_thr;
296}
297
298/* callbacks */
299typedef void cbackfn(void *ptr, const int thr);
300void thread_reg_callback(cbackfn ctor, cbackfn dtor, void *parm);
301void thread_dereg_callback(cbackfn ctor, cbackfn dtor, void *parm);
302
303#ifdef HAVE_LIBNUMA
304extern int main_numa_node;
305int do_numa_move_pages(int node, int fault_in,
306 unsigned long firstaddr, unsigned long lastaddr);
307void numa_move_pages_job(struct thr_ctrl *tc);
308#endif
309
311
312#if defined(SMP) && (!defined(_REENTRANT) && !defined(_THREAD_SAFE))
313# warning "Define _REENTRANT and/or _THREAD_SAFE for multithreaded (SMP) compilation!"
314#endif
315
316#else /* _POSIX_THREADS */
317# define MAIN_PID (getpid())
318# undef SMP
319# define num_threads (0)
320# define thrno (0)
321# define ismainthread (1)
322# define threads_avail(x) (0)
323static int init_threads(const int=0, const bool=false) { return 0; };
324static void bind_threads(bool=true, bool=true, bool=false) {};
325static void free_threads () {};
326static void disable_threads ()
327#ifdef TBCI_OMP
328{ omp_set_num_threads(1); }
329#else
330{};
331#endif
332static void reenable_threads ()
333#ifdef TBCI_OMP
334{ omp_set_num_threads(omp_get_num_procs()); }
335#else
336{};
337#endif
338
339
340//# define numa_optimize(x,y) do {} while(0)
341#endif /* _POSIX_THREADS */
342
343#endif /* TBCI_SMP_H */
#define num_threads
Definition basics.h:782
#define BCHK(cond, exc, txt, ind, rtval)
Definition basics.h:575
#define ismainthread
Definition basics.h:784
#define MIN(a, b)
Definition basics.h:655
#define NAMESPACE_END
Definition basics.h:323
#define thrno
Definition basics.h:783
#define NAMESPACE_TBCI
Definition basics.h:317
#define THREAD__
Definition basics.h:774
#define LONG_DOUBLE
Definition basics.h:219
#define T
Definition bdmatlib.cc:20
exception base class for the TBCI NumLib
Definition except.h:59
void thread_dereg_callback(cbackfn ctor, cbackfn dtor, void *parm)
Definition smp.cc:340
THREAD__ struct thr_struct * this_thread
Definition smp.cc:113
void thread_start_off(const int thr_no, thr_job_t job, const unsigned long off, const unsigned long sz,...)
Definition smp.cc:979
bool bound_main
Definition smp.cc:109
void thread_wait(const int thr_no, struct job_output *out)
Definition smp.cc:997
unsigned int prev_n_thr
Definition smp.cc:1056
int main_numa_node
Definition smp.cc:110
double thread_wait_result(const int thr_no)
Definition smp.cc:1017
int threads_busy
Definition smp.cc:104
pid_t main_thread_pid
Definition smp.cc:107
void thread_start(const int thr_no, thr_job_t job, const unsigned long sz,...)
Definition smp.cc:988
void thread_reg_callback(cbackfn ctor, cbackfn dtor, void *parm)
Definition smp.cc:333
unsigned int last_n_thr
Definition smp.cc:1056
unsigned int curr_n_thr
Definition smp.cc:1056
struct thr_struct * threads
Definition smp.cc:106
int numa_avail
Definition smp.cc:105
static void bind_threads(bool=true, bool=true, bool=false)
Definition smp.h:324
void(* thr_job_t)(struct thr_ctrl *)
Before the double inclusion guard on purpose!
Definition smp.h:126
static void disable_threads()
Definition smp.h:326
struct thr_ctrl ALIGN
static void reenable_threads()
Definition smp.h:332
#define threads_avail(x)
Definition smp.h:322
#define THREAD_MAX_ARGS
Definition smp.h:129
static int init_threads(const int=0, const bool=false)
Definition smp.h:323
static void free_threads()
Definition smp.h:325
unsigned long t_size
Definition smp.h:135
unsigned long t_off
Definition smp.h:136
unsigned long t_job_no
Definition smp.h:133
thr_job_t t_job
Definition smp.h:134
void * t_par[6]
Definition smp.h:137
volatile char t_res_dummy[16]
Definition smp.h:145
LONG_DOUBLE t_res_ld
Definition smp.h:146
void * t_res_ptr
Definition smp.h:149
long t_res_l
Definition smp.h:148
long t_retval
Definition smp.h:143
unsigned long t_job_output_no
Definition smp.h:142
double t_res_d
Definition smp.h:147
double t_res_d
Definition smp.h:178
int t_no
Definition smp.h:183
long t_res_l
Definition smp.h:179
unsigned long t_off
Definition smp.h:172
thr_job_t t_job
Definition smp.h:170
void * t_res_ptr
Definition smp.h:180
LONG_DOUBLE t_res_ld
Definition smp.h:177
unsigned long t_job_no
Definition smp.h:169
unsigned long t_size
Definition smp.h:171
void * t_par[6]
Definition smp.h:173
volatile char t_res_dummy[16]
Definition smp.h:176
int t_no
Definition smp.h:158
int t_pipe_from_thread[2]
Definition smp.h:161
pid_t t_pid
Definition smp.h:159
int t_pipe_to_thread[2]
Definition smp.h:161
int numa_node
Definition smp.h:163
unsigned int t_done_var
Definition smp.h:162
pthread_t t_id
Definition smp.h:160