TBCI Numerical high perf. C++ Library  2.8.0
smp.h
Go to the documentation of this file.
1 
52 #include "tbci/basics.h"
54 
55 #ifndef TBCI_SMP_H
56 #define TBCI_SMP_H
57 
58 #ifdef NO_SMP
59 # undef SMP
60 #endif
61 
62 // libc5 hack
63 //#define _POSIX_THREAD_PRIO_INHERIT
64 
65 // Check for _POSIX_THREADS, give up otherwise
66 #if defined(SMP) && defined(HAVE_PTHREADS)
67 
68 //#warning SMP
69 
70 #include <signal.h>
71 #ifndef HAVE_NO_NEW_HEADERS_BUG
72 # include <cstdlib>
73 # include <cstdarg>
74 #else
75 # include <stdlib.h>
76 # include <stdarg.h>
77 #endif
78 #include <sys/types.h>
79 #include <sys/mman.h>
80 #include <limits>
81 #ifndef PAGESIZE
82 # define PAGESIZE EXEC_PAGESIZE
83 #endif
84 //#include <unistd.h>
85 //#define __need_sig_atomic_t
86 #define __need_sigset_t
87 #ifdef BUGGY_PTHREAD
88 # include <sigset.h>
89  typedef __sigset_t sigset_t;
90 #endif
91 // We need pthread spinlocks (XOPEN2K/XPG6 stuff)
92 #undef _POSIX_C_SOURCE
93 #define _POSIX_C_SOURCE 200112L
94 #include <pthread.h>
95 
96 #ifndef CACHELINE_SZ
97 # ifdef __WORDSIZE
98 # define CACHELINE_SZ __WORDSIZE
99 # else
100 # warning Setting cache line size manually to 32
101 # define CACHELINE_SZ 32
102 # endif
103 #endif
104 
106 
107 #ifdef __GNUC__
108 # define smp_barrier() asm ("": : :"memory")
109 #else
110 // Oops! No barrier?
111 # define smp_barrier()
112 #endif
113 
114 extern pid_t main_thread_pid;
115 #define MAIN_PID (main_thread_pid)
116 extern bool bound_main;
117 #endif
118 
119 #if defined(NEED_SMP_DECLS) || defined(SMP)
120 #ifdef HAVE_PTHREADS
121 #include <pthread.h>
122 #else
123 #error NEED pthread.h if NEED_SMP_DECLS is set
124 #endif
125 
126 typedef void (*thr_job_t) (struct thr_ctrl*);
127 typedef void* (*useful_job_t) (void*);
129 #define THREAD_MAX_ARGS 6
130 #define THREAD_MAX_RES_LN 16
132 struct job_input {
133  unsigned long t_job_no;
134  thr_job_t t_job; /* Function pointer */
135  unsigned long t_size; /* size */
136  unsigned long t_off; /* offset */
137  void* t_par[THREAD_MAX_ARGS]; /* Space for 6 more pointer args */
138 };
139 // 80 bytes on 64bit, 40 on 32bit
140 
141 struct job_output {
142  unsigned long t_job_output_no;
143  long t_retval; /* return value */
144  union {
145  volatile char t_res_dummy[16]; /* 16 byte space for results */
147  double t_res_d;
148  long t_res_l;
149  void* t_res_ptr;
150  };
151 };
152 // 32bytes on 64bit, 24 on 32bit
153 
157 struct thr_struct {
158  int t_no; /* thread_no */
159  pid_t t_pid; /* thread_pid */
160  pthread_t t_id; /* thread_id */
162  unsigned int t_done_var;
164 } ALIGN(64);
165 
166 /* For compatibility; this is allocated on the local stack when starting
167  * a thread; input data is copied into it */
168 struct thr_ctrl {
169  unsigned long t_job_no;
170  thr_job_t t_job; /* Function pointer */
171  unsigned long t_size; /* size */
172  unsigned long t_off; /* offset */
173  void* t_par[THREAD_MAX_ARGS]; /* Space for 6 more pointer args */
174 #if 1
175  union {
176  volatile char t_res_dummy[16]; /* 16 byte space for results */
178  double t_res_d;
179  long t_res_l;
180  void* t_res_ptr;
181  };
182 #endif
183  int t_no;
184 };
185 #endif
186 
187 #if defined(SMP) && defined(HAVE_PTHREADS)
188 //extern thr_ctrl master_thr;
189 extern int num_threads;
190 extern int threads_busy;
191 extern int numa_avail;
192 extern struct thr_struct *threads;
193 extern THREAD__ int ismainthread;
194 extern THREAD__ int thrno;
195 extern THREAD__ struct thr_struct *this_thread;
196 extern unsigned int curr_n_thr, last_n_thr, prev_n_thr;
197 #ifdef HAVE_LIBNUMA
198 extern unsigned page_size;
199 #endif
200 
211 int init_threads (const int thr = 0, const bool load = false);
213 void free_threads ();
215 inline int threads_avail (const int wanted = 0)
216 {
217  if (threads_busy)
218  return 0; /* avoid deadlock */
219  if (wanted > num_threads)
220  return num_threads;
221  else
222  return wanted;
223 };
224 
226 void disable_threads ();
228 void reenable_threads ();
243 void bind_threads (bool bind_main = true, bool enable_numa = true, bool add_sibl = false);
244 
245 //
249 void thread_start (const int thr_no, thr_job_t job, const unsigned long sz, ...);
250 void thread_start_off (const int thr_no , thr_job_t job,
251  const unsigned long offset, const unsigned long sz, ...);
252 void thread_wait (const int, job_output* out = 0);
253 /* wait for a thread to finish and return the double t_res */
254 double thread_wait_result (const int);
255 
256 #ifndef SLICE_ALIGN
257 # define SLICE_ALIGN CACHELINE_SZ
258 #endif
259 #ifndef SLICE_DEF_ALIGN
260 # define SLICE_DEF_ALIGN 8
261 #endif
262 /* Helper to slice things into cache aligned tiles */
263 /* TODO: On NUMA systems, we should better return page size aligned tiles */
264 template <typename T>
265 static inline unsigned long slice_offset(int thr, int no_thr, unsigned long dim, T* ptr)
266 {
267 #ifdef HAVE_LIBNUMA
268  const unsigned int align = numa_avail? page_size: SLICE_ALIGN;
269 #else
270  const unsigned int align = SLICE_ALIGN;
271 #endif
272  if (thr == no_thr)
273  return dim;
274  else if (thr == 0)
275  return 0;
276  BCHK(thr < 0 || thr > no_thr, NumErr, Illegal thread in slice_offset, thr, 0);
277  const unsigned step = dim/no_thr;
278  unsigned long offs = thr*dim/no_thr;
279  /* If we don't get a valid pointer, don't try to optimize for cache lines */
280  if (!ptr || 2*sizeof(T) > align) {
281  offs -= 1;
282  return offs + (SLICE_DEF_ALIGN - offs%SLICE_DEF_ALIGN);
283  }
284  offs += MIN((unsigned int)(align/sizeof(T)), step-1);
285  unsigned long misalign = (unsigned long)(ptr+offs)%align / sizeof(T);
286  //fprintf(stderr, "Slice %i (base %p): %li(%li) (@%p)\n",
287  // thr, ptr, offs-misalign, offs, ptr+(offs-misalign));
288  return offs - misalign;
289 }
290 
291 
292 inline void update_n_thr(const unsigned int n_thr)
293 {
295  curr_n_thr = n_thr;
296 }
297 
298 /* callbacks */
299 typedef void cbackfn(void *ptr, const int thr);
300 void thread_reg_callback(cbackfn ctor, cbackfn dtor, void *parm);
301 void thread_dereg_callback(cbackfn ctor, cbackfn dtor, void *parm);
302 
303 #ifdef HAVE_LIBNUMA
304 extern int main_numa_node;
305 int do_numa_move_pages(int node, int fault_in,
306  unsigned long firstaddr, unsigned long lastaddr);
307 void numa_move_pages_job(struct thr_ctrl *tc);
308 #endif
309 
311 
312 #if defined(SMP) && (!defined(_REENTRANT) && !defined(_THREAD_SAFE))
313 # warning "Define _REENTRANT and/or _THREAD_SAFE for multithreaded (SMP) compilation!"
314 #endif
315 
316 #else /* _POSIX_THREADS */
317 # define MAIN_PID (getpid())
318 # undef SMP
319 # define num_threads (0)
320 # define thrno (0)
321 # define ismainthread (1)
322 # define threads_avail(x) (0)
323 static int init_threads(const int=0, const bool=false) { return 0; };
324 static void bind_threads(bool=true, bool=true, bool=false) {};
325 static void free_threads () {};
326 static void disable_threads ()
327 #ifdef TBCI_OMP
328 { omp_set_num_threads(1); }
329 #else
330 {};
331 #endif
332 static void reenable_threads ()
333 #ifdef TBCI_OMP
334 { omp_set_num_threads(omp_get_num_procs()); }
335 #else
336 {};
337 #endif
338 
339 
340 //# define numa_optimize(x,y) do {} while(0)
341 #endif /* _POSIX_THREADS */
342 
343 #endif /* TBCI_SMP_H */
void * t_par[6]
Definition: smp.h:173
int t_no
Definition: smp.h:158
void thread_dereg_callback(cbackfn ctor, cbackfn dtor, void *parm)
Definition: smp.cc:340
#define ALIGN(x)
Definition: basics.h:444
static void free_threads()
Definition: smp.h:325
volatile char t_res_dummy[16]
Definition: smp.h:176
long t_res_l
Definition: smp.h:179
int numa_avail
Definition: smp.cc:105
unsigned long t_job_output_no
Definition: smp.h:142
#define MIN(a, b)
Definition: basics.h:655
#define ismainthread
Definition: basics.h:784
#define NAMESPACE_TBCI
Definition: basics.h:317
int t_pipe_from_thread[2]
Definition: smp.h:161
void thread_start(const int thr_no, thr_job_t job, const unsigned long sz,...)
Definition: smp.cc:988
exception base class for the TBCI NumLib
Definition: except.h:58
bool bound_main
Definition: smp.cc:109
void * t_res_ptr
Definition: smp.h:180
void * t_par[6]
Definition: smp.h:137
int threads_busy
Definition: smp.cc:104
unsigned int curr_n_thr
Definition: smp.cc:1056
#define BCHK(cond, exc, txt, ind, rtval)
Definition: basics.h:575
long t_res_l
Definition: smp.h:148
LONG_DOUBLE t_res_ld
Definition: smp.h:146
double t_res_d
Definition: smp.h:147
Definition: smp.h:168
void(* thr_job_t)(struct thr_ctrl *)
Before the double inclusion guard on purpose!
Definition: smp.h:126
Definition: smp.h:132
unsigned long t_job_no
Definition: smp.h:169
unsigned long t_off
Definition: smp.h:136
unsigned long t_job_no
Definition: smp.h:133
#define num_threads
Definition: basics.h:782
LONG_DOUBLE t_res_ld
Definition: smp.h:177
struct thr_struct * threads
Definition: smp.cc:106
unsigned long t_size
Definition: smp.h:171
long t_retval
Definition: smp.h:143
unsigned int last_n_thr
Definition: smp.cc:1056
#define THREAD__
Definition: basics.h:774
static int init_threads(const int=0, const bool=false)
Definition: smp.h:323
void thread_reg_callback(cbackfn ctor, cbackfn dtor, void *parm)
Definition: smp.cc:333
volatile char t_res_dummy[16]
Definition: smp.h:145
int numa_node
Definition: smp.h:163
#define THREAD_MAX_ARGS
Definition: smp.h:129
static void bind_threads(bool=true, bool=true, bool=false)
Definition: smp.h:324
THREAD__ struct thr_struct * this_thread
Definition: smp.cc:113
pthread_t t_id
Definition: smp.h:160
static void reenable_threads()
Definition: smp.h:332
#define threads_avail(x)
Definition: smp.h:322
unsigned long t_off
Definition: smp.h:172
void thread_wait(const int thr_no, struct job_output *out)
Definition: smp.cc:997
double thread_wait_result(const int thr_no)
Definition: smp.cc:1017
#define NAMESPACE_END
Definition: basics.h:323
void * t_res_ptr
Definition: smp.h:149
static void disable_threads()
Definition: smp.h:326
#define T
Definition: bdmatlib.cc:20
unsigned int prev_n_thr
Definition: smp.cc:1056
int main_numa_node
Definition: smp.cc:110
int t_no
Definition: smp.h:183
int t_pipe_to_thread[2]
Definition: smp.h:161
#define LONG_DOUBLE
Definition: basics.h:219
thr_job_t t_job
Definition: smp.h:170
void thread_start_off(const int thr_no, thr_job_t job, const unsigned long off, const unsigned long sz,...)
Definition: smp.cc:979
unsigned int t_done_var
Definition: smp.h:162
thr_job_t t_job
Definition: smp.h:134
double t_res_d
Definition: smp.h:178
unsigned long t_size
Definition: smp.h:135
pid_t main_thread_pid
Definition: smp.cc:107
pid_t t_pid
Definition: smp.h:159
#define thrno
Definition: basics.h:783