summaryrefslogtreecommitdiffstats
path: root/meta/recipes-devtools/gcc/gcc-4.3.3/fedora/gcc43-libgomp-speedup.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta/recipes-devtools/gcc/gcc-4.3.3/fedora/gcc43-libgomp-speedup.patch')
-rw-r--r--meta/recipes-devtools/gcc/gcc-4.3.3/fedora/gcc43-libgomp-speedup.patch2797
1 files changed, 2797 insertions, 0 deletions
diff --git a/meta/recipes-devtools/gcc/gcc-4.3.3/fedora/gcc43-libgomp-speedup.patch b/meta/recipes-devtools/gcc/gcc-4.3.3/fedora/gcc43-libgomp-speedup.patch
new file mode 100644
index 0000000000..da85e556ec
--- /dev/null
+++ b/meta/recipes-devtools/gcc/gcc-4.3.3/fedora/gcc43-libgomp-speedup.patch
@@ -0,0 +1,2797 @@
12008-03-28 Jakub Jelinek <jakub@redhat.com>
2
3 * config/linux/sparc/futex.h (atomic_write_barrier): Fix membar
4 argument.
5
62008-03-27 Jakub Jelinek <jakub@redhat.com>
7
8 * libgomp.h (struct gomp_team_state): Remove single_count field
9 ifndef HAVE_SYNC_BUILTINS.
10 (struct gomp_team): Likewise. Add work_share_list_free_lock
11 ifndef HAVE_SYNC_BUILTINS.
12 * team.c (gomp_new_team): If HAVE_SYNC_BUILTINS is not defined,
13 don't initialize single_count, but instead initialize
14 work_share_list_free_lock.
15 (free_team): Destroy work_share_list_free_lock ifndef
16 HAVE_SYNC_BUILTINS.
17 (gomp_team_start): Don't initialize ts.single_count ifndef
18 HAVE_SYNC_BUILTINS.
19 * work.c (alloc_work_share, free_work_share): Use
20 work_share_list_free_lock instead of atomic chaining ifndef
21 HAVE_SYNC_BUILTINS.
22
232008-03-26 Jakub Jelinek <jakub@redhat.com>
24
25 * loop.c (gomp_loop_init): Fix GFS_DYNAMIC ws->mode setting.
26 * testsuite/libgomp.c/loop-4.c: New test.
27
28 * libgomp.h (struct gomp_team_state): Add single_count field.
29 (struct gomp_team): Likewise.
30 * team.c (gomp_new_team): Clear single_count.
31 (gomp_team_start): Likewise.
32 * single.c (GOMP_single_start): Rewritten if HAVE_SYNC_BUILTINS.
33
342008-03-25 Jakub Jelinek <jakub@redhat.com>
35
36 * team.c (gomp_thread_start): Don't clear ts.static_trip here.
37 * loop.c (gomp_loop_static_start, gomp_loop_dynamic_start): Clear
38 ts.static_trip here.
39 * work.c (gomp_work_share_start): Don't clear ts.static_trip here.
40
412008-03-21 Jakub Jelinek <jakub@redhat.com>
42
43 * libgomp.h: Include ptrlock.h.
44 (struct gomp_work_share): Reshuffle fields. Add next_alloc,
45 next_ws, next_free and inline_ordered_team_ids fields, change
46 ordered_team_ids into pointer from flexible array member.
47 (struct gomp_team_state): Add last_work_share field, remove
48 work_share_generation.
49 (struct gomp_team): Remove work_share_lock, generation_mask,
50 oldest_live_gen, num_live_gen and init_work_shares fields, add
51 work work_share_list_alloc, work_share_list_free and work_share_chunk
52 fields. Change work_shares from pointer to pointers into an array.
53 (gomp_new_team): New prototype.
54 (gomp_team_start): Change type of last argument.
55 (gomp_new_work_share): Removed.
56 (gomp_init_work_share, gomp_fini_work_share): New prototypes.
57 (gomp_work_share_init_done): New static inline.
58 * team.c (gomp_thread_start): Clear ts.last_work_share, don't clear
59 ts.work_share_generation.
60 (new_team): Removed.
61 (gomp_new_team): New function.
62 (free_team): Free gomp_work_share blocks chained through next_alloc,
63 instead of freeing work_shares and destroying work_share_lock.
64 (gomp_team_start): Change last argument from ws to team, don't create
65 new team, set ts.work_share to &team->work_shares[0] and clear
66 ts.last_work_share. Don't clear ts.work_share_generation.
67 (gomp_team_end): Call gomp_fini_work_share.
68 * work.c (gomp_new_work_share): Removed.
69 (alloc_work_share, gomp_init_work_share, gomp_fini_work_share): New
70 functions.
71 (free_work_share): Add team argument. Call gomp_fini_work_share
72 and then either free ws if orphaned, or put it into
73 work_share_list_free list of the current team.
74 (gomp_work_share_start, gomp_work_share_end,
75 gomp_work_share_end_nowait): Rewritten.
76 * sections.c (GOMP_sections_start): Call gomp_work_share_init_done
77 after gomp_sections_init. If HAVE_SYNC_BUILTINS, call
78 gomp_iter_dynamic_next instead of the _locked variant and don't take
79 lock around it, otherwise acquire it before calling
80 gomp_iter_dynamic_next_locked.
81 (GOMP_sections_next): If HAVE_SYNC_BUILTINS, call
82 gomp_iter_dynamic_next instead of the _locked variant and don't take
83 lock around it.
84 (GOMP_parallel_sections_start): Call gomp_new_team instead of
85 gomp_new_work_share. Call gomp_sections_init on &team->work_shares[0].
86 Adjust gomp_team_start caller.
87 * loop.c (gomp_loop_static_start, gomp_loop_ordered_static_start): Call
88 gomp_work_share_init_done after gomp_loop_init. Don't unlock ws->lock.
89 (gomp_loop_dynamic_start, gomp_loop_guided_start): Call
90 gomp_work_share_init_done after gomp_loop_init. If HAVE_SYNC_BUILTINS,
91 don't unlock ws->lock, otherwise lock it.
92 (gomp_loop_ordered_dynamic_start, gomp_loop_ordered_guided_start): Call
93 gomp_work_share_init_done after gomp_loop_init. Lock ws->lock.
94 (gomp_parallel_loop_start): Call gomp_new_team instead of
95 gomp_new_work_share. Call gomp_loop_init on &team->work_shares[0].
96 Adjust gomp_team_start caller.
97 * single.c (GOMP_single_start, GOMP_single_copy_start): Call
98 gomp_work_share_init_done if gomp_work_share_start returned true.
99 Don't unlock ws->lock.
100 * parallel.c (GOMP_parallel_start): Call gomp_new_team and pass that
101 as last argument to gomp_team_start.
102 * config/linux/ptrlock.c: New file.
103 * config/linux/ptrlock.h: New file.
104 * config/posix/ptrlock.c: New file.
105 * config/posix/ptrlock.h: New file.
106 * Makefile.am (libgomp_la_SOURCES): Add ptrlock.c.
107 * Makefile.in: Regenerated.
108 * testsuite/Makefile.in: Regenerated.
109
1102008-03-19 Jakub Jelinek <jakub@redhat.com>
111
112 * libgomp.h (gomp_active_wait_policy): Remove decl.
113 (gomp_throttled_spin_count_var, gomp_available_cpus,
114 gomp_managed_threads): New extern decls.
115 * team.c (gomp_team_start, gomp_team_end): If number of threads
116 changed, adjust atomically gomp_managed_threads.
117 * env.c (gomp_active_wait_policy, gomp_block_time_var): Remove.
118 (gomp_throttled_spin_count_var, gomp_available_cpus,
119 gomp_managed_threads): New variables.
120 (parse_millis): Removed.
121 (parse_spincount): New function.
122 (parse_wait_policy): Return -1/0/1 instead of setting
123 gomp_active_wait_policy.
124 (initialize_env): Call gomp_init_num_threads unconditionally.
125 Initialize gomp_available_cpus. Call parse_spincount instead
126 of parse_millis, initialize gomp_{,throttled_}spin_count_var
127 depending on presence and value of OMP_WAIT_POLICY and
128 GOMP_SPINCOUNT env vars.
129 * config/linux/wait.h (do_wait): Use gomp_throttled_spin_count_var
130 instead of gomp_spin_count_var if gomp_managed_threads >
131 gomp_available_cpus.
132
133 * config/linux/wait.h: Include errno.h.
134 (FUTEX_WAIT, FUTEX_WAKE, FUTEX_PRIVATE_FLAG): Define.
135 (gomp_futex_wake, gomp_futex_wait): New extern decls.
136 * config/linux/mutex.c (gomp_futex_wake, gomp_futex_wait): New
137 variables.
138 * config/linux/powerpc/futex.h (FUTEX_WAIT, FUTEX_WAKE): Remove.
139 (sys_futex0): Return error code.
140 (futex_wake, futex_wait): If ENOSYS was returned, clear
141 FUTEX_PRIVATE_FLAG in gomp_futex_wa{ke,it} and retry.
142 * config/linux/alpha/futex.h (FUTEX_WAIT, FUTEX_WAKE): Remove.
143 (futex_wake, futex_wait): If ENOSYS was returned, clear
144 FUTEX_PRIVATE_FLAG in gomp_futex_wa{ke,it} and retry.
145 * config/linux/x86/futex.h (FUTEX_WAIT, FUTEX_WAKE): Remove.
146 (sys_futex0): Return error code.
147 (futex_wake, futex_wait): If ENOSYS was returned, clear
148 FUTEX_PRIVATE_FLAG in gomp_futex_wa{ke,it} and retry.
149 * config/linux/s390/futex.h (FUTEX_WAIT, FUTEX_WAKE): Remove.
150 (sys_futex0): Return error code.
151 (futex_wake, futex_wait): If ENOSYS was returned, clear
152 FUTEX_PRIVATE_FLAG in gomp_futex_wa{ke,it} and retry.
153 * config/linux/ia64/futex.h (FUTEX_WAIT, FUTEX_WAKE): Remove.
154 (sys_futex0): Return error code.
155 (futex_wake, futex_wait): If ENOSYS was returned, clear
156 FUTEX_PRIVATE_FLAG in gomp_futex_wa{ke,it} and retry.
157 * config/linux/sparc/futex.h (FUTEX_WAIT, FUTEX_WAKE): Remove.
158 (sys_futex0): Return error code.
159 (futex_wake, futex_wait): If ENOSYS was returned, clear
160 FUTEX_PRIVATE_FLAG in gomp_futex_wa{ke,it} and retry.
161
1622008-03-18 Jakub Jelinek <jakub@redhat.com>
163
164 * libgomp.h (struct gomp_work_share): Add mode field. Put lock and
165 next into a different cache line from most of the write-once fields.
166 * loop.c: Include limits.h.
167 (gomp_loop_init): For GFS_DYNAMIC, multiply ws->chunk_size by incr.
168 If adding ws->chunk_size nthreads + 1 times after end won't
169 overflow, set ws->mode to 1.
170 * iter.c (gomp_iter_dynamic_next_locked): Don't multiply
171 ws->chunk_size by incr.
172 (gomp_iter_dynamic_next): Likewise. If ws->mode, use more efficient
173 code.
174 * work.c: Include stddef.h.
175 (gomp_new_work_share): Use offsetof rather than sizeof.
176
1772008-03-17 Jakub Jelinek <jakub@redhat.com>
178
179 * libgomp.h (struct gomp_team): Change ordered_release field
180 into gomp_sem_t ** from flexible array member. Add implicit_task
181 and initial_work_shares fields.
182 (gomp_new_task): Removed.
183 (gomp_init_task): New prototype.
184 * team.c (new_team): Allocate implicit_task for each thread
185 and initial work_shares together with gomp_team allocation.
186 (free_team): Only free work_shares if it is not init_work_shares.
187 (gomp_team_start): Use gomp_init_task instead of gomp_new_task,
188 set thr->task to the corresponding implicit_task array entry.
189 * task.c (gomp_new_task): Removed.
190 (gomp_init_task): New function.
191 (gomp_end_task): Don't free the task.
192 (GOMP_task): Allocate struct gomp_task on the stack, call
193 gomp_init_task rather than gomp_new_task.
194 * work.c (gomp_work_share_start): If work_shares ==
195 init_work_shares, gomp_malloc + memcpy rather than gomp_realloc.
196
1972008-03-15 Jakub Jelinek <jakub@redhat.com>
198 Ulrich Drepper <drepper@redhat.com>
199
200 * config/linux/bar.h (gomp_barrier_state_t): Rewritten.
201 (gomp_barrier_state_t): Change to unsigned int.
202 (gomp_barrier_init, gomp_barrier_reinit, gomp_barrier_destroy,
203 gomp_barrier_wait_start, gomp_barrier_last_thread): Rewritten.
204 (gomp_barrier_wait_last): Prototype rather than inline.
205 * config/linux/bar.c (gomp_barrier_wait_end): Rewritten.
206 (gomp_barrier_wait_last): New function.
207
2082008-03-15 Jakub Jelinek <jakub@redhat.com>
209
210 * team.c (gomp_thread_start): Use gomp_barrier_wait_last instead
211 of gomp_barrier_wait.
212 * env.c (gomp_block_time_var, gomp_spin_count_var): New variables.
213 (parse_millis): New function.
214 (initialize_env): Handle GOMP_BLOCKTIME env var.
215 * libgomp.h (struct gomp_team): Move close to the end of the struct.
216 (gomp_spin_count_var): New extern var decl.
217 * work.c (gomp_work_share_end): Use gomp_barrier_state_t bstate
218 var instead of bool last, call gomp_barrier_last_thread to check
219 for last thread, pass bstate to gomp_barrier_wait_end.
220 * config/linux/wait.h: New file.
221 * config/linux/mutex.c: Include wait.h instead of libgomp.h and
222 futex.h.
223 (gomp_mutex_lock_slow): Call do_wait instead of futex_wait.
224 * config/linux/bar.c: Include wait.h instead of libgomp.h and
225 futex.h.
226 (gomp_barrier_wait_end): Change second argument to
227 gomp_barrier_state_t. Call do_wait instead of futex_wait.
228 * config/linux/sem.c: Include wait.h instead of libgomp.h and
229 futex.h.
230 (gomp_sem_wait_slow): Call do_wait instead of futex_wait.
231 * config/linux/lock.c: Include wait.h instead of libgomp.h and
232 futex.h.
233 (gomp_set_nest_lock_25): Call do_wait instead of futex_wait.
234 * config/linux/affinity.c: Assume HAVE_SYNC_BUILTINS.
235 * config/linux/bar.h (gomp_barrier_state_t): New typedef.
236 (gomp_barrier_wait_end): Change second argument to
237 gomp_barrier_state_t.
238 (gomp_barrier_wait_start): Return gomp_barrier_state_t.
239 (gomp_barrier_last_thread, gomp_barrier_wait_last): New static
240 inlines.
241 * config/linux/powerpc/futex.h (cpu_relax, atomic_write_barrier): New
242 static inlines.
243 * config/linux/alpha/futex.h (cpu_relax, atomic_write_barrier):
244 Likewise.
245 * config/linux/x86/futex.h (cpu_relax, atomic_write_barrier):
246 Likewise.
247 * config/linux/s390/futex.h (cpu_relax, atomic_write_barrier):
248 Likewise.
249 * config/linux/ia64/futex.h (cpu_relax, atomic_write_barrier):
250 Likewise.
251 * config/linux/sparc/futex.h (cpu_relax, atomic_write_barrier):
252 Likewise.
253 * config/posix/bar.c (gomp_barrier_wait_end): Change second argument
254 to gomp_barrier_state_t.
255 * config/posix/bar.h (gomp_barrier_state_t): New typedef.
256 (gomp_barrier_wait_end): Change second argument to
257 gomp_barrier_state_t.
258 (gomp_barrier_wait_start): Return gomp_barrier_state_t.
259 (gomp_barrier_last_thread, gomp_barrier_wait_last): New static
260 inlines.
261
262--- libgomp/parallel.c.jj 2007-12-07 14:41:01.000000000 +0100
263+++ libgomp/parallel.c 2008-03-26 15:32:06.000000000 +0100
264@@ -68,7 +68,7 @@ void
265 GOMP_parallel_start (void (*fn) (void *), void *data, unsigned num_threads)
266 {
267 num_threads = gomp_resolve_num_threads (num_threads);
268- gomp_team_start (fn, data, num_threads, NULL);
269+ gomp_team_start (fn, data, num_threads, gomp_new_team (num_threads));
270 }
271
272 void
273--- libgomp/sections.c.jj 2007-12-07 14:41:01.000000000 +0100
274+++ libgomp/sections.c 2008-03-26 15:33:06.000000000 +0100
275@@ -59,14 +59,24 @@ GOMP_sections_start (unsigned count)
276 long s, e, ret;
277
278 if (gomp_work_share_start (false))
279- gomp_sections_init (thr->ts.work_share, count);
280+ {
281+ gomp_sections_init (thr->ts.work_share, count);
282+ gomp_work_share_init_done ();
283+ }
284
285+#ifdef HAVE_SYNC_BUILTINS
286+ if (gomp_iter_dynamic_next (&s, &e))
287+ ret = s;
288+ else
289+ ret = 0;
290+#else
291+ gomp_mutex_lock (&thr->ts.work_share->lock);
292 if (gomp_iter_dynamic_next_locked (&s, &e))
293 ret = s;
294 else
295 ret = 0;
296-
297 gomp_mutex_unlock (&thr->ts.work_share->lock);
298+#endif
299
300 return ret;
301 }
302@@ -83,15 +93,23 @@ GOMP_sections_start (unsigned count)
303 unsigned
304 GOMP_sections_next (void)
305 {
306- struct gomp_thread *thr = gomp_thread ();
307 long s, e, ret;
308
309+#ifdef HAVE_SYNC_BUILTINS
310+ if (gomp_iter_dynamic_next (&s, &e))
311+ ret = s;
312+ else
313+ ret = 0;
314+#else
315+ struct gomp_thread *thr = gomp_thread ();
316+
317 gomp_mutex_lock (&thr->ts.work_share->lock);
318 if (gomp_iter_dynamic_next_locked (&s, &e))
319 ret = s;
320 else
321 ret = 0;
322 gomp_mutex_unlock (&thr->ts.work_share->lock);
323+#endif
324
325 return ret;
326 }
327@@ -103,15 +121,15 @@ void
328 GOMP_parallel_sections_start (void (*fn) (void *), void *data,
329 unsigned num_threads, unsigned count)
330 {
331- struct gomp_work_share *ws;
332+ struct gomp_team *team;
333
334 num_threads = gomp_resolve_num_threads (num_threads);
335 if (gomp_dyn_var && num_threads > count)
336 num_threads = count;
337
338- ws = gomp_new_work_share (false, num_threads);
339- gomp_sections_init (ws, count);
340- gomp_team_start (fn, data, num_threads, ws);
341+ team = gomp_new_team (num_threads);
342+ gomp_sections_init (&team->work_shares[0], count);
343+ gomp_team_start (fn, data, num_threads, team);
344 }
345
346 /* The GOMP_section_end* routines are called after the thread is told
347--- libgomp/env.c.jj 2007-12-07 14:41:01.000000000 +0100
348+++ libgomp/env.c 2008-03-26 16:40:26.000000000 +0100
349@@ -44,6 +44,11 @@ enum gomp_schedule_type gomp_run_sched_v
350 unsigned long gomp_run_sched_chunk = 1;
351 unsigned short *gomp_cpu_affinity;
352 size_t gomp_cpu_affinity_len;
353+#ifndef HAVE_SYNC_BUILTINS
354+gomp_mutex_t gomp_remaining_threads_lock;
355+#endif
356+unsigned long gomp_available_cpus = 1, gomp_managed_threads = 1;
357+unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var;
358
359 /* Parse the OMP_SCHEDULE environment variable. */
360
361@@ -147,6 +152,79 @@ parse_unsigned_long (const char *name, u
362 return false;
363 }
364
365+/* Parse the GOMP_SPINCOUNT environment varible. Return true if one was
366+ present and it was successfully parsed. */
367+
368+static bool
369+parse_spincount (const char *name, unsigned long long *pvalue)
370+{
371+ char *env, *end;
372+ unsigned long long value, mult = 1;
373+
374+ env = getenv (name);
375+ if (env == NULL)
376+ return false;
377+
378+ while (isspace ((unsigned char) *env))
379+ ++env;
380+ if (*env == '\0')
381+ goto invalid;
382+
383+ if (strncasecmp (env, "infinite", 8) == 0
384+ || strncasecmp (env, "infinity", 8) == 0)
385+ {
386+ value = ~0ULL;
387+ end = env + 8;
388+ goto check_tail;
389+ }
390+
391+ errno = 0;
392+ value = strtoull (env, &end, 10);
393+ if (errno)
394+ goto invalid;
395+
396+ while (isspace ((unsigned char) *end))
397+ ++end;
398+ if (*end != '\0')
399+ {
400+ switch (tolower (*end))
401+ {
402+ case 'k':
403+ mult = 1000LL;
404+ break;
405+ case 'm':
406+ mult = 1000LL * 1000LL;
407+ break;
408+ case 'g':
409+ mult = 1000LL * 1000LL * 1000LL;
410+ break;
411+ case 't':
412+ mult = 1000LL * 1000LL * 1000LL * 1000LL;
413+ break;
414+ default:
415+ goto invalid;
416+ }
417+ ++end;
418+ check_tail:
419+ while (isspace ((unsigned char) *end))
420+ ++end;
421+ if (*end != '\0')
422+ goto invalid;
423+ }
424+
425+ if (value > ~0ULL / mult)
426+ value = ~0ULL;
427+ else
428+ value *= mult;
429+
430+ *pvalue = value;
431+ return true;
432+
433+ invalid:
434+ gomp_error ("Invalid value for environment variable %s", name);
435+ return false;
436+}
437+
438 /* Parse a boolean value for environment variable NAME and store the
439 result in VALUE. */
440
441@@ -281,10 +359,25 @@ initialize_env (void)
442 parse_schedule ();
443 parse_boolean ("OMP_DYNAMIC", &gomp_dyn_var);
444 parse_boolean ("OMP_NESTED", &gomp_nest_var);
445+ gomp_init_num_threads ();
446+ gomp_available_cpus = gomp_nthreads_var;
447 if (!parse_unsigned_long ("OMP_NUM_THREADS", &gomp_nthreads_var))
448- gomp_init_num_threads ();
449+ gomp_nthreads_var = gomp_available_cpus;
450 if (parse_affinity ())
451 gomp_init_affinity ();
452+ if (!parse_spincount ("GOMP_SPINCOUNT", &gomp_spin_count_var))
453+ {
454+ /* Using a rough estimation of 100000 spins per msec,
455+ use 200 msec blocking.
456+ Depending on the CPU speed, this can be e.g. 5 times longer
457+ or 5 times shorter. */
458+ gomp_spin_count_var = 20000000LL;
459+ }
460+ /* gomp_throttled_spin_count_var is used when there are more libgomp
461+ managed threads than available CPUs. Use very short spinning. */
462+ gomp_throttled_spin_count_var = 100LL;
463+ if (gomp_throttled_spin_count_var > gomp_spin_count_var)
464+ gomp_throttled_spin_count_var = gomp_spin_count_var;
465
466 /* Not strictly environment related, but ordering constructors is tricky. */
467 pthread_attr_init (&gomp_thread_attr);
468--- libgomp/libgomp.h.jj 2007-12-07 14:41:01.000000000 +0100
469+++ libgomp/libgomp.h 2008-03-27 12:21:51.000000000 +0100
470@@ -50,6 +50,7 @@
471 #include "sem.h"
472 #include "mutex.h"
473 #include "bar.h"
474+#include "ptrlock.h"
475
476
477 /* This structure contains the data to control one work-sharing construct,
478@@ -70,6 +71,8 @@ struct gomp_work_share
479 If this is a SECTIONS construct, this value will always be DYNAMIC. */
480 enum gomp_schedule_type sched;
481
482+ int mode;
483+
484 /* This is the chunk_size argument to the SCHEDULE clause. */
485 long chunk_size;
486
487@@ -81,17 +84,38 @@ struct gomp_work_share
488 is always 1. */
489 long incr;
490
491- /* This lock protects the update of the following members. */
492- gomp_mutex_t lock;
493+ /* This is a circular queue that details which threads will be allowed
494+ into the ordered region and in which order. When a thread allocates
495+ iterations on which it is going to work, it also registers itself at
496+ the end of the array. When a thread reaches the ordered region, it
497+ checks to see if it is the one at the head of the queue. If not, it
498+ blocks on its RELEASE semaphore. */
499+ unsigned *ordered_team_ids;
500
501- union {
502- /* This is the next iteration value to be allocated. In the case of
503- GFS_STATIC loops, this the iteration start point and never changes. */
504- long next;
505+ /* This is the number of threads that have registered themselves in
506+ the circular queue ordered_team_ids. */
507+ unsigned ordered_num_used;
508
509- /* This is the returned data structure for SINGLE COPYPRIVATE. */
510- void *copyprivate;
511- };
512+ /* This is the team_id of the currently acknowledged owner of the ordered
513+ section, or -1u if the ordered section has not been acknowledged by
514+ any thread. This is distinguished from the thread that is *allowed*
515+ to take the section next. */
516+ unsigned ordered_owner;
517+
518+ /* This is the index into the circular queue ordered_team_ids of the
519+ current thread that's allowed into the ordered reason. */
520+ unsigned ordered_cur;
521+
522+ /* This is a chain of allocated gomp_work_share blocks, valid only
523+ in the first gomp_work_share struct in the block. */
524+ struct gomp_work_share *next_alloc;
525+
526+ /* The above fields are written once during workshare initialization,
527+ or related to ordered worksharing. Make sure the following fields
528+ are in a different cache line. */
529+
530+ /* This lock protects the update of the following members. */
531+ gomp_mutex_t lock __attribute__((aligned (64)));
532
533 /* This is the count of the number of threads that have exited the work
534 share construct. If the construct was marked nowait, they have moved on
535@@ -99,27 +123,28 @@ struct gomp_work_share
536 of the team to exit the work share construct must deallocate it. */
537 unsigned threads_completed;
538
539- /* This is the index into the circular queue ordered_team_ids of the
540- current thread that's allowed into the ordered reason. */
541- unsigned ordered_cur;
542+ union {
543+ /* This is the next iteration value to be allocated. In the case of
544+ GFS_STATIC loops, this the iteration start point and never changes. */
545+ long next;
546
547- /* This is the number of threads that have registered themselves in
548- the circular queue ordered_team_ids. */
549- unsigned ordered_num_used;
550+ /* This is the returned data structure for SINGLE COPYPRIVATE. */
551+ void *copyprivate;
552+ };
553
554- /* This is the team_id of the currently acknoledged owner of the ordered
555- section, or -1u if the ordered section has not been acknowledged by
556- any thread. This is distinguished from the thread that is *allowed*
557- to take the section next. */
558- unsigned ordered_owner;
559+ union {
560+ /* Link to gomp_work_share struct for next work sharing construct
561+ encountered after this one. */
562+ gomp_ptrlock_t next_ws;
563+
564+ /* gomp_work_share structs are chained in the free work share cache
565+ through this. */
566+ struct gomp_work_share *next_free;
567+ };
568
569- /* This is a circular queue that details which threads will be allowed
570- into the ordered region and in which order. When a thread allocates
571- iterations on which it is going to work, it also registers itself at
572- the end of the array. When a thread reaches the ordered region, it
573- checks to see if it is the one at the head of the queue. If not, it
574- blocks on its RELEASE semaphore. */
575- unsigned ordered_team_ids[];
576+ /* If only few threads are in the team, ordered_team_ids can point
577+ to this array which fills the padding at the end of this struct. */
578+ unsigned inline_ordered_team_ids[0];
579 };
580
581 /* This structure contains all of the thread-local data associated with
582@@ -133,21 +158,24 @@ struct gomp_team_state
583
584 /* This is the work share construct which this thread is currently
585 processing. Recall that with NOWAIT, not all threads may be
586- processing the same construct. This value is NULL when there
587- is no construct being processed. */
588+ processing the same construct. */
589 struct gomp_work_share *work_share;
590
591+ /* This is the previous work share construct or NULL if there wasn't any.
592+ When all threads are done with the current work sharing construct,
593+ the previous one can be freed. The current one can't, as its
594+ next_ws field is used. */
595+ struct gomp_work_share *last_work_share;
596+
597 /* This is the ID of this thread within the team. This value is
598 guaranteed to be between 0 and N-1, where N is the number of
599 threads in the team. */
600 unsigned team_id;
601
602- /* The work share "generation" is a number that increases by one for
603- each work share construct encountered in the dynamic flow of the
604- program. It is used to find the control data for the work share
605- when encountering it for the first time. This particular number
606- reflects the generation of the work_share member of this struct. */
607- unsigned work_share_generation;
608+#ifdef HAVE_SYNC_BUILTINS
609+ /* Number of single stmts encountered. */
610+ unsigned long single_count;
611+#endif
612
613 /* For GFS_RUNTIME loops that resolved to GFS_STATIC, this is the
614 trip number through the loop. So first time a particular loop
615@@ -163,41 +191,53 @@ struct gomp_team_state
616
617 struct gomp_team
618 {
619- /* This lock protects access to the following work shares data structures. */
620- gomp_mutex_t work_share_lock;
621-
622- /* This is a dynamically sized array containing pointers to the control
623- structs for all "live" work share constructs. Here "live" means that
624- the construct has been encountered by at least one thread, and not
625- completed by all threads. */
626- struct gomp_work_share **work_shares;
627-
628- /* The work_shares array is indexed by "generation & generation_mask".
629- The mask will be 2**N - 1, where 2**N is the size of the array. */
630- unsigned generation_mask;
631-
632- /* These two values define the bounds of the elements of the work_shares
633- array that are currently in use. */
634- unsigned oldest_live_gen;
635- unsigned num_live_gen;
636-
637 /* This is the number of threads in the current team. */
638 unsigned nthreads;
639
640+ /* This is number of gomp_work_share structs that have been allocated
641+ as a block last time. */
642+ unsigned work_share_chunk;
643+
644 /* This is the saved team state that applied to a master thread before
645 the current thread was created. */
646 struct gomp_team_state prev_ts;
647
648- /* This barrier is used for most synchronization of the team. */
649- gomp_barrier_t barrier;
650-
651 /* This semaphore should be used by the master thread instead of its
652 "native" semaphore in the thread structure. Required for nested
653 parallels, as the master is a member of two teams. */
654 gomp_sem_t master_release;
655
656- /* This array contains pointers to the release semaphore of the threads
657- in the team. */
658+ /* List of gomp_work_share structs chained through next_free fields.
659+ This is populated and taken off only by the first thread in the
660+ team encountering a new work sharing construct, in a critical
661+ section. */
662+ struct gomp_work_share *work_share_list_alloc;
663+
664+ /* List of gomp_work_share structs freed by free_work_share. New
665+ entries are atomically added to the start of the list, and
666+ alloc_work_share can safely only move all but the first entry
667+ to work_share_list alloc, as free_work_share can happen concurrently
668+ with alloc_work_share. */
669+ struct gomp_work_share *work_share_list_free;
670+
671+#ifdef HAVE_SYNC_BUILTINS
672+ /* Number of simple single regions encountered by threads in this
673+ team. */
674+ unsigned long single_count;
675+#else
676+ /* Mutex protecting addition of workshares to work_share_list_free. */
677+ gomp_mutex_t work_share_list_free_lock;
678+#endif
679+
680+ /* This barrier is used for most synchronization of the team. */
681+ gomp_barrier_t barrier;
682+
683+ /* Initial work shares, to avoid allocating any gomp_work_share
684+ structs in the common case. */
685+ struct gomp_work_share work_shares[8];
686+
687+ /* This is an array with pointers to the release semaphore
688+ of the threads in the team. */
689 gomp_sem_t *ordered_release[];
690 };
691
692@@ -242,6 +282,11 @@ extern bool gomp_dyn_var;
693 extern bool gomp_nest_var;
694 extern enum gomp_schedule_type gomp_run_sched_var;
695 extern unsigned long gomp_run_sched_chunk;
696+#ifndef HAVE_SYNC_BUILTINS
697+extern gomp_mutex_t gomp_remaining_threads_lock;
698+#endif
699+extern unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var;
700+extern unsigned long gomp_available_cpus, gomp_managed_threads;
701
702 /* The attributes to be used during thread creation. */
703 extern pthread_attr_t gomp_thread_attr;
704@@ -306,17 +351,27 @@ extern unsigned gomp_dynamic_max_threads
705
706 /* team.c */
707
708+extern struct gomp_team *gomp_new_team (unsigned);
709 extern void gomp_team_start (void (*) (void *), void *, unsigned,
710- struct gomp_work_share *);
711+ struct gomp_team *);
712 extern void gomp_team_end (void);
713
714 /* work.c */
715
716-extern struct gomp_work_share * gomp_new_work_share (bool, unsigned);
717+extern void gomp_init_work_share (struct gomp_work_share *, bool, unsigned);
718+extern void gomp_fini_work_share (struct gomp_work_share *);
719 extern bool gomp_work_share_start (bool);
720 extern void gomp_work_share_end (void);
721 extern void gomp_work_share_end_nowait (void);
722
723+static inline void
724+gomp_work_share_init_done (void)
725+{
726+ struct gomp_thread *thr = gomp_thread ();
727+ if (__builtin_expect (thr->ts.last_work_share != NULL, 1))
728+ gomp_ptrlock_set (&thr->ts.last_work_share->next_ws, thr->ts.work_share);
729+}
730+
731 #ifdef HAVE_ATTRIBUTE_VISIBILITY
732 # pragma GCC visibility pop
733 #endif
734--- libgomp/iter.c.jj 2008-03-26 14:48:34.000000000 +0100
735+++ libgomp/iter.c 2008-03-26 15:11:23.000000000 +0100
736@@ -1,4 +1,4 @@
737-/* Copyright (C) 2005 Free Software Foundation, Inc.
738+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
739 Contributed by Richard Henderson <rth@redhat.com>.
740
741 This file is part of the GNU OpenMP Library (libgomp).
742@@ -154,7 +154,7 @@ gomp_iter_dynamic_next_locked (long *pst
743 if (start == ws->end)
744 return false;
745
746- chunk = ws->chunk_size * ws->incr;
747+ chunk = ws->chunk_size;
748 left = ws->end - start;
749 if (ws->incr < 0)
750 {
751@@ -186,11 +186,38 @@ gomp_iter_dynamic_next (long *pstart, lo
752 struct gomp_work_share *ws = thr->ts.work_share;
753 long start, end, nend, chunk, incr;
754
755- start = ws->next;
756 end = ws->end;
757 incr = ws->incr;
758- chunk = ws->chunk_size * incr;
759+ chunk = ws->chunk_size;
760+
761+ if (__builtin_expect (ws->mode, 1))
762+ {
763+ long tmp = __sync_fetch_and_add (&ws->next, chunk);
764+ if (incr > 0)
765+ {
766+ if (tmp >= end)
767+ return false;
768+ nend = tmp + chunk;
769+ if (nend > end)
770+ nend = end;
771+ *pstart = tmp;
772+ *pend = nend;
773+ return true;
774+ }
775+ else
776+ {
777+ if (tmp <= end)
778+ return false;
779+ nend = tmp + chunk;
780+ if (nend < end)
781+ nend = end;
782+ *pstart = tmp;
783+ *pend = nend;
784+ return true;
785+ }
786+ }
787
788+ start = ws->next;
789 while (1)
790 {
791 long left = end - start;
792--- libgomp/work.c.jj 2007-12-07 14:41:01.000000000 +0100
793+++ libgomp/work.c 2008-03-27 12:21:51.000000000 +0100
794@@ -1,4 +1,4 @@
795-/* Copyright (C) 2005 Free Software Foundation, Inc.
796+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
797 Contributed by Richard Henderson <rth@redhat.com>.
798
799 This file is part of the GNU OpenMP Library (libgomp).
800@@ -29,39 +29,138 @@
801 of threads. */
802
803 #include "libgomp.h"
804+#include <stddef.h>
805 #include <stdlib.h>
806 #include <string.h>
807
808
809-/* Create a new work share structure. */
810+/* Allocate a new work share structure, preferably from current team's
811+ free gomp_work_share cache. */
812
813-struct gomp_work_share *
814-gomp_new_work_share (bool ordered, unsigned nthreads)
815+static struct gomp_work_share *
816+alloc_work_share (struct gomp_team *team)
817 {
818 struct gomp_work_share *ws;
819- size_t size;
820+ unsigned int i;
821
822- size = sizeof (*ws);
823- if (ordered)
824- size += nthreads * sizeof (ws->ordered_team_ids[0]);
825+ /* This is called in a critical section. */
826+ if (team->work_share_list_alloc != NULL)
827+ {
828+ ws = team->work_share_list_alloc;
829+ team->work_share_list_alloc = ws->next_free;
830+ return ws;
831+ }
832
833- ws = gomp_malloc_cleared (size);
834- gomp_mutex_init (&ws->lock);
835- ws->ordered_owner = -1;
836+#ifdef HAVE_SYNC_BUILTINS
837+ ws = team->work_share_list_free;
838+ /* We need atomic read from work_share_list_free,
839+ as free_work_share can be called concurrently. */
840+ __asm ("" : "+r" (ws));
841+
842+ if (ws && ws->next_free)
843+ {
844+ struct gomp_work_share *next = ws->next_free;
845+ ws->next_free = NULL;
846+ team->work_share_list_alloc = next->next_free;
847+ return next;
848+ }
849+#else
850+ gomp_mutex_lock (&team->work_share_list_free_lock);
851+ ws = team->work_share_list_free;
852+ if (ws)
853+ {
854+ team->work_share_list_alloc = ws->next_free;
855+ team->work_share_list_free = NULL;
856+ gomp_mutex_unlock (&team->work_share_list_free_lock);
857+ return ws;
858+ }
859+ gomp_mutex_unlock (&team->work_share_list_free_lock);
860+#endif
861
862+ team->work_share_chunk *= 2;
863+ ws = gomp_malloc (team->work_share_chunk * sizeof (struct gomp_work_share));
864+ ws->next_alloc = team->work_shares[0].next_alloc;
865+ team->work_shares[0].next_alloc = ws;
866+ team->work_share_list_alloc = &ws[1];
867+ for (i = 1; i < team->work_share_chunk - 1; i++)
868+ ws[i].next_free = &ws[i + 1];
869+ ws[i].next_free = NULL;
870 return ws;
871 }
872
873+/* Initialize an already allocated struct gomp_work_share.
874+ This shouldn't touch the next_alloc field. */
875+
876+void
877+gomp_init_work_share (struct gomp_work_share *ws, bool ordered,
878+ unsigned nthreads)
879+{
880+ gomp_mutex_init (&ws->lock);
881+ if (__builtin_expect (ordered, 0))
882+ {
883+#define INLINE_ORDERED_TEAM_IDS_CNT \
884+ ((sizeof (struct gomp_work_share) \
885+ - offsetof (struct gomp_work_share, inline_ordered_team_ids)) \
886+ / sizeof (((struct gomp_work_share *) 0)->inline_ordered_team_ids[0]))
887+
888+ if (nthreads > INLINE_ORDERED_TEAM_IDS_CNT)
889+ ws->ordered_team_ids
890+ = gomp_malloc (nthreads * sizeof (*ws->ordered_team_ids));
891+ else
892+ ws->ordered_team_ids = ws->inline_ordered_team_ids;
893+ memset (ws->ordered_team_ids, '\0',
894+ nthreads * sizeof (*ws->ordered_team_ids));
895+ ws->ordered_num_used = 0;
896+ ws->ordered_owner = -1;
897+ ws->ordered_cur = 0;
898+ }
899+ else
900+ ws->ordered_team_ids = NULL;
901+ gomp_ptrlock_init (&ws->next_ws, NULL);
902+ ws->threads_completed = 0;
903+}
904
905-/* Free a work share structure. */
906+/* Do any needed destruction of gomp_work_share fields before it
907+ is put back into free gomp_work_share cache or freed. */
908
909-static void
910-free_work_share (struct gomp_work_share *ws)
911+void
912+gomp_fini_work_share (struct gomp_work_share *ws)
913 {
914 gomp_mutex_destroy (&ws->lock);
915- free (ws);
916+ if (ws->ordered_team_ids != ws->inline_ordered_team_ids)
917+ free (ws->ordered_team_ids);
918+ gomp_ptrlock_destroy (&ws->next_ws);
919 }
920
921+/* Free a work share struct, if not orphaned, put it into current
922+ team's free gomp_work_share cache. */
923+
924+static inline void
925+free_work_share (struct gomp_team *team, struct gomp_work_share *ws)
926+{
927+ gomp_fini_work_share (ws);
928+ if (__builtin_expect (team == NULL, 0))
929+ free (ws);
930+ else
931+ {
932+ struct gomp_work_share *next_ws;
933+#ifdef HAVE_SYNC_BUILTINS
934+ do
935+ {
936+ next_ws = team->work_share_list_free;
937+ ws->next_free = next_ws;
938+ }
939+ while (!__sync_bool_compare_and_swap (&team->work_share_list_free,
940+ next_ws, ws));
941+#else
942+ gomp_mutex_lock (&team->work_share_list_free_lock);
943+ next_ws = team->work_share_list_free;
944+ ws->next_free = next_ws;
945+ team->work_share_list_free = ws;
946+ gomp_mutex_unlock (&team->work_share_list_free_lock);
947+#endif
948+ }
949+}
950
951 /* The current thread is ready to begin the next work sharing construct.
952 In all cases, thr->ts.work_share is updated to point to the new
953@@ -74,71 +173,34 @@ gomp_work_share_start (bool ordered)
954 struct gomp_thread *thr = gomp_thread ();
955 struct gomp_team *team = thr->ts.team;
956 struct gomp_work_share *ws;
957- unsigned ws_index, ws_gen;
958
959 /* Work sharing constructs can be orphaned. */
960 if (team == NULL)
961 {
962- ws = gomp_new_work_share (ordered, 1);
963+ ws = gomp_malloc (sizeof (*ws));
964+ gomp_init_work_share (ws, ordered, 1);
965 thr->ts.work_share = ws;
966- thr->ts.static_trip = 0;
967- gomp_mutex_lock (&ws->lock);
968- return true;
969+ return ws;
970 }
971
972- gomp_mutex_lock (&team->work_share_lock);
973-
974- /* This thread is beginning its next generation. */
975- ws_gen = ++thr->ts.work_share_generation;
976-
977- /* If this next generation is not newer than any other generation in
978- the team, then simply reference the existing construct. */
979- if (ws_gen - team->oldest_live_gen < team->num_live_gen)
980+ ws = thr->ts.work_share;
981+ thr->ts.last_work_share = ws;
982+ ws = gomp_ptrlock_get (&ws->next_ws);
983+ if (ws == NULL)
984 {
985- ws_index = ws_gen & team->generation_mask;
986- ws = team->work_shares[ws_index];
987+ /* This thread encountered a new ws first. */
988+ struct gomp_work_share *ws = alloc_work_share (team);
989+ gomp_init_work_share (ws, ordered, team->nthreads);
990 thr->ts.work_share = ws;
991- thr->ts.static_trip = 0;
992-
993- gomp_mutex_lock (&ws->lock);
994- gomp_mutex_unlock (&team->work_share_lock);
995-
996- return false;
997+ return true;
998 }
999-
1000- /* Resize the work shares queue if we've run out of space. */
1001- if (team->num_live_gen++ == team->generation_mask)
1002+ else
1003 {
1004- team->work_shares = gomp_realloc (team->work_shares,
1005- 2 * team->num_live_gen
1006- * sizeof (*team->work_shares));
1007-
1008- /* Unless oldest_live_gen is zero, the sequence of live elements
1009- wraps around the end of the array. If we do nothing, we break
1010- lookup of the existing elements. Fix that by unwrapping the
1011- data from the front to the end. */
1012- if (team->oldest_live_gen > 0)
1013- memcpy (team->work_shares + team->num_live_gen,
1014- team->work_shares,
1015- (team->oldest_live_gen & team->generation_mask)
1016- * sizeof (*team->work_shares));
1017-
1018- team->generation_mask = team->generation_mask * 2 + 1;
1019- }
1020-
1021- ws_index = ws_gen & team->generation_mask;
1022- ws = gomp_new_work_share (ordered, team->nthreads);
1023- thr->ts.work_share = ws;
1024- thr->ts.static_trip = 0;
1025- team->work_shares[ws_index] = ws;
1026-
1027- gomp_mutex_lock (&ws->lock);
1028- gomp_mutex_unlock (&team->work_share_lock);
1029-
1030- return true;
1031+ thr->ts.work_share = ws;
1032+ return false;
1033+ }
1034 }
1035
1036-
1037 /* The current thread is done with its current work sharing construct.
1038 This version does imply a barrier at the end of the work-share. */
1039
1040@@ -147,36 +209,28 @@ gomp_work_share_end (void)
1041 {
1042 struct gomp_thread *thr = gomp_thread ();
1043 struct gomp_team *team = thr->ts.team;
1044- struct gomp_work_share *ws = thr->ts.work_share;
1045- bool last;
1046-
1047- thr->ts.work_share = NULL;
1048+ gomp_barrier_state_t bstate;
1049
1050 /* Work sharing constructs can be orphaned. */
1051 if (team == NULL)
1052 {
1053- free_work_share (ws);
1054+ free_work_share (NULL, thr->ts.work_share);
1055+ thr->ts.work_share = NULL;
1056 return;
1057 }
1058
1059- last = gomp_barrier_wait_start (&team->barrier);
1060+ bstate = gomp_barrier_wait_start (&team->barrier);
1061
1062- if (last)
1063+ if (gomp_barrier_last_thread (bstate))
1064 {
1065- unsigned ws_index;
1066-
1067- ws_index = thr->ts.work_share_generation & team->generation_mask;
1068- team->work_shares[ws_index] = NULL;
1069- team->oldest_live_gen++;
1070- team->num_live_gen = 0;
1071-
1072- free_work_share (ws);
1073+ if (__builtin_expect (thr->ts.last_work_share != NULL, 1))
1074+ free_work_share (team, thr->ts.last_work_share);
1075 }
1076
1077- gomp_barrier_wait_end (&team->barrier, last);
1078+ gomp_barrier_wait_end (&team->barrier, bstate);
1079+ thr->ts.last_work_share = NULL;
1080 }
1081
1082-
1083 /* The current thread is done with its current work sharing construct.
1084 This version does NOT imply a barrier at the end of the work-share. */
1085
1086@@ -188,15 +242,17 @@ gomp_work_share_end_nowait (void)
1087 struct gomp_work_share *ws = thr->ts.work_share;
1088 unsigned completed;
1089
1090- thr->ts.work_share = NULL;
1091-
1092 /* Work sharing constructs can be orphaned. */
1093 if (team == NULL)
1094 {
1095- free_work_share (ws);
1096+ free_work_share (NULL, ws);
1097+ thr->ts.work_share = NULL;
1098 return;
1099 }
1100
1101+ if (__builtin_expect (thr->ts.last_work_share == NULL, 0))
1102+ return;
1103+
1104 #ifdef HAVE_SYNC_BUILTINS
1105 completed = __sync_add_and_fetch (&ws->threads_completed, 1);
1106 #else
1107@@ -206,18 +262,6 @@ gomp_work_share_end_nowait (void)
1108 #endif
1109
1110 if (completed == team->nthreads)
1111- {
1112- unsigned ws_index;
1113-
1114- gomp_mutex_lock (&team->work_share_lock);
1115-
1116- ws_index = thr->ts.work_share_generation & team->generation_mask;
1117- team->work_shares[ws_index] = NULL;
1118- team->oldest_live_gen++;
1119- team->num_live_gen--;
1120-
1121- gomp_mutex_unlock (&team->work_share_lock);
1122-
1123- free_work_share (ws);
1124- }
1125+ free_work_share (team, thr->ts.last_work_share);
1126+ thr->ts.last_work_share = NULL;
1127 }
1128--- libgomp/single.c.jj 2007-12-07 14:41:01.000000000 +0100
1129+++ libgomp/single.c 2008-03-26 15:11:32.000000000 +0100
1130@@ -1,4 +1,4 @@
1131-/* Copyright (C) 2005 Free Software Foundation, Inc.
1132+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
1133 Contributed by Richard Henderson <rth@redhat.com>.
1134
1135 This file is part of the GNU OpenMP Library (libgomp).
1136@@ -37,10 +37,24 @@
1137 bool
1138 GOMP_single_start (void)
1139 {
1140+#ifdef HAVE_SYNC_BUILTINS
1141+ struct gomp_thread *thr = gomp_thread ();
1142+ struct gomp_team *team = thr->ts.team;
1143+ unsigned long single_count;
1144+
1145+ if (__builtin_expect (team == NULL, 0))
1146+ return true;
1147+
1148+ single_count = thr->ts.single_count++;
1149+ return __sync_bool_compare_and_swap (&team->single_count, single_count,
1150+ single_count + 1L);
1151+#else
1152 bool ret = gomp_work_share_start (false);
1153- gomp_mutex_unlock (&gomp_thread ()->ts.work_share->lock);
1154+ if (ret)
1155+ gomp_work_share_init_done ();
1156 gomp_work_share_end_nowait ();
1157 return ret;
1158+#endif
1159 }
1160
1161 /* This routine is called when first encountering a SINGLE construct that
1162@@ -57,10 +71,12 @@ GOMP_single_copy_start (void)
1163 void *ret;
1164
1165 first = gomp_work_share_start (false);
1166- gomp_mutex_unlock (&thr->ts.work_share->lock);
1167
1168 if (first)
1169- ret = NULL;
1170+ {
1171+ gomp_work_share_init_done ();
1172+ ret = NULL;
1173+ }
1174 else
1175 {
1176 gomp_barrier_wait (&thr->ts.team->barrier);
1177--- libgomp/loop.c.jj 2007-12-07 14:41:01.000000000 +0100
1178+++ libgomp/loop.c 2008-03-26 18:47:04.000000000 +0100
1179@@ -27,8 +27,9 @@
1180
1181 /* This file handles the LOOP (FOR/DO) construct. */
1182
1183-#include "libgomp.h"
1184+#include <limits.h>
1185 #include <stdlib.h>
1186+#include "libgomp.h"
1187
1188
1189 /* Initialize the given work share construct from the given arguments. */
1190@@ -44,6 +45,39 @@ gomp_loop_init (struct gomp_work_share *
1191 ? start : end;
1192 ws->incr = incr;
1193 ws->next = start;
1194+ if (sched == GFS_DYNAMIC)
1195+ {
1196+ ws->chunk_size *= incr;
1197+
1198+#ifdef HAVE_SYNC_BUILTINS
1199+ {
1200+ /* For dynamic scheduling prepare things to make each iteration
1201+ faster. */
1202+ struct gomp_thread *thr = gomp_thread ();
1203+ struct gomp_team *team = thr->ts.team;
1204+ long nthreads = team ? team->nthreads : 1;
1205+
1206+ if (__builtin_expect (incr > 0, 1))
1207+ {
1208+ /* Cheap overflow protection. */
1209+ if (__builtin_expect ((nthreads | ws->chunk_size)
1210+ >= 1UL << (sizeof (long)
1211+ * __CHAR_BIT__ / 2 - 1), 0))
1212+ ws->mode = 0;
1213+ else
1214+ ws->mode = ws->end < (LONG_MAX
1215+ - (nthreads + 1) * ws->chunk_size);
1216+ }
1217+ /* Cheap overflow protection. */
1218+ else if (__builtin_expect ((nthreads | -ws->chunk_size)
1219+ >= 1UL << (sizeof (long)
1220+ * __CHAR_BIT__ / 2 - 1), 0))
1221+ ws->mode = 0;
1222+ else
1223+ ws->mode = ws->end > (nthreads + 1) * -ws->chunk_size - LONG_MAX;
1224+ }
1225+#endif
1226+ }
1227 }
1228
1229 /* The *_start routines are called when first encountering a loop construct
1230@@ -68,10 +102,13 @@ gomp_loop_static_start (long start, long
1231 {
1232 struct gomp_thread *thr = gomp_thread ();
1233
1234+ thr->ts.static_trip = 0;
1235 if (gomp_work_share_start (false))
1236- gomp_loop_init (thr->ts.work_share, start, end, incr,
1237- GFS_STATIC, chunk_size);
1238- gomp_mutex_unlock (&thr->ts.work_share->lock);
1239+ {
1240+ gomp_loop_init (thr->ts.work_share, start, end, incr,
1241+ GFS_STATIC, chunk_size);
1242+ gomp_work_share_init_done ();
1243+ }
1244
1245 return !gomp_iter_static_next (istart, iend);
1246 }
1247@@ -84,13 +121,16 @@ gomp_loop_dynamic_start (long start, lon
1248 bool ret;
1249
1250 if (gomp_work_share_start (false))
1251- gomp_loop_init (thr->ts.work_share, start, end, incr,
1252- GFS_DYNAMIC, chunk_size);
1253+ {
1254+ gomp_loop_init (thr->ts.work_share, start, end, incr,
1255+ GFS_DYNAMIC, chunk_size);
1256+ gomp_work_share_init_done ();
1257+ }
1258
1259 #ifdef HAVE_SYNC_BUILTINS
1260- gomp_mutex_unlock (&thr->ts.work_share->lock);
1261 ret = gomp_iter_dynamic_next (istart, iend);
1262 #else
1263+ gomp_mutex_lock (&thr->ts.work_share->lock);
1264 ret = gomp_iter_dynamic_next_locked (istart, iend);
1265 gomp_mutex_unlock (&thr->ts.work_share->lock);
1266 #endif
1267@@ -106,13 +146,16 @@ gomp_loop_guided_start (long start, long
1268 bool ret;
1269
1270 if (gomp_work_share_start (false))
1271- gomp_loop_init (thr->ts.work_share, start, end, incr,
1272- GFS_GUIDED, chunk_size);
1273+ {
1274+ gomp_loop_init (thr->ts.work_share, start, end, incr,
1275+ GFS_GUIDED, chunk_size);
1276+ gomp_work_share_init_done ();
1277+ }
1278
1279 #ifdef HAVE_SYNC_BUILTINS
1280- gomp_mutex_unlock (&thr->ts.work_share->lock);
1281 ret = gomp_iter_guided_next (istart, iend);
1282 #else
1283+ gomp_mutex_lock (&thr->ts.work_share->lock);
1284 ret = gomp_iter_guided_next_locked (istart, iend);
1285 gomp_mutex_unlock (&thr->ts.work_share->lock);
1286 #endif
1287@@ -149,13 +192,14 @@ gomp_loop_ordered_static_start (long sta
1288 {
1289 struct gomp_thread *thr = gomp_thread ();
1290
1291+ thr->ts.static_trip = 0;
1292 if (gomp_work_share_start (true))
1293 {
1294 gomp_loop_init (thr->ts.work_share, start, end, incr,
1295 GFS_STATIC, chunk_size);
1296 gomp_ordered_static_init ();
1297+ gomp_work_share_init_done ();
1298 }
1299- gomp_mutex_unlock (&thr->ts.work_share->lock);
1300
1301 return !gomp_iter_static_next (istart, iend);
1302 }
1303@@ -168,8 +212,14 @@ gomp_loop_ordered_dynamic_start (long st
1304 bool ret;
1305
1306 if (gomp_work_share_start (true))
1307- gomp_loop_init (thr->ts.work_share, start, end, incr,
1308- GFS_DYNAMIC, chunk_size);
1309+ {
1310+ gomp_loop_init (thr->ts.work_share, start, end, incr,
1311+ GFS_DYNAMIC, chunk_size);
1312+ gomp_mutex_lock (&thr->ts.work_share->lock);
1313+ gomp_work_share_init_done ();
1314+ }
1315+ else
1316+ gomp_mutex_lock (&thr->ts.work_share->lock);
1317
1318 ret = gomp_iter_dynamic_next_locked (istart, iend);
1319 if (ret)
1320@@ -187,8 +237,14 @@ gomp_loop_ordered_guided_start (long sta
1321 bool ret;
1322
1323 if (gomp_work_share_start (true))
1324- gomp_loop_init (thr->ts.work_share, start, end, incr,
1325- GFS_GUIDED, chunk_size);
1326+ {
1327+ gomp_loop_init (thr->ts.work_share, start, end, incr,
1328+ GFS_GUIDED, chunk_size);
1329+ gomp_mutex_lock (&thr->ts.work_share->lock);
1330+ gomp_work_share_init_done ();
1331+ }
1332+ else
1333+ gomp_mutex_lock (&thr->ts.work_share->lock);
1334
1335 ret = gomp_iter_guided_next_locked (istart, iend);
1336 if (ret)
1337@@ -375,12 +431,12 @@ gomp_parallel_loop_start (void (*fn) (vo
1338 long incr, enum gomp_schedule_type sched,
1339 long chunk_size)
1340 {
1341- struct gomp_work_share *ws;
1342+ struct gomp_team *team;
1343
1344 num_threads = gomp_resolve_num_threads (num_threads);
1345- ws = gomp_new_work_share (false, num_threads);
1346- gomp_loop_init (ws, start, end, incr, sched, chunk_size);
1347- gomp_team_start (fn, data, num_threads, ws);
1348+ team = gomp_new_team (num_threads);
1349+ gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size);
1350+ gomp_team_start (fn, data, num_threads, team);
1351 }
1352
1353 void
1354--- libgomp/Makefile.in.jj 2008-01-10 20:53:47.000000000 +0100
1355+++ libgomp/Makefile.in 2008-03-26 18:51:01.000000000 +0100
1356@@ -83,7 +83,7 @@ libgomp_la_LIBADD =
1357 am_libgomp_la_OBJECTS = alloc.lo barrier.lo critical.lo env.lo \
1358 error.lo iter.lo loop.lo ordered.lo parallel.lo sections.lo \
1359 single.lo team.lo work.lo lock.lo mutex.lo proc.lo sem.lo \
1360- bar.lo time.lo fortran.lo affinity.lo
1361+ bar.lo ptrlock.lo time.lo fortran.lo affinity.lo
1362 libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
1363 DEFAULT_INCLUDES = -I. -I$(srcdir) -I.
1364 depcomp = $(SHELL) $(top_srcdir)/../depcomp
1365@@ -292,7 +292,7 @@ libgomp_version_info = -version-info $(l
1366 libgomp_la_LDFLAGS = $(libgomp_version_info) $(libgomp_version_script)
1367 libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
1368 loop.c ordered.c parallel.c sections.c single.c team.c work.c \
1369- lock.c mutex.c proc.c sem.c bar.c time.c fortran.c affinity.c
1370+ lock.c mutex.c proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c
1371
1372 nodist_noinst_HEADERS = libgomp_f.h
1373 nodist_libsubinclude_HEADERS = omp.h
1374@@ -434,6 +434,7 @@ distclean-compile:
1375 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@
1376 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@
1377 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc.Plo@am__quote@
1378+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ptrlock.Plo@am__quote@
1379 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sections.Plo@am__quote@
1380 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@
1381 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@
1382--- libgomp/testsuite/libgomp.c/loop-4.c.jj 2008-03-26 18:47:04.000000000 +0100
1383+++ libgomp/testsuite/libgomp.c/loop-4.c 2008-03-26 18:47:04.000000000 +0100
1384@@ -0,0 +1,28 @@
1385+/* { dg-do run } */
1386+
1387+extern void abort (void);
1388+
1389+int
1390+main (void)
1391+{
1392+ int e = 0;
1393+#pragma omp parallel num_threads (4) reduction(+:e)
1394+ {
1395+ long i;
1396+ #pragma omp for schedule(dynamic,1)
1397+ for (i = __LONG_MAX__ - 30001; i <= __LONG_MAX__ - 10001; i += 10000)
1398+ if (i != __LONG_MAX__ - 30001
1399+ && i != __LONG_MAX__ - 20001
1400+ && i != __LONG_MAX__ - 10001)
1401+ e = 1;
1402+ #pragma omp for schedule(dynamic,1)
1403+ for (i = -__LONG_MAX__ + 30000; i >= -__LONG_MAX__ + 10000; i -= 10000)
1404+ if (i != -__LONG_MAX__ + 30000
1405+ && i != -__LONG_MAX__ + 20000
1406+ && i != -__LONG_MAX__ + 10000)
1407+ e = 1;
1408+ }
1409+ if (e)
1410+ abort ();
1411+ return 0;
1412+}
1413--- libgomp/Makefile.am.jj 2007-12-07 14:41:01.000000000 +0100
1414+++ libgomp/Makefile.am 2008-03-26 15:15:19.000000000 +0100
1415@@ -31,7 +31,7 @@ libgomp_la_LDFLAGS = $(libgomp_version_i
1416
1417 libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
1418 loop.c ordered.c parallel.c sections.c single.c team.c work.c \
1419- lock.c mutex.c proc.c sem.c bar.c time.c fortran.c affinity.c
1420+ lock.c mutex.c proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c
1421
1422 nodist_noinst_HEADERS = libgomp_f.h
1423 nodist_libsubinclude_HEADERS = omp.h
1424--- libgomp/team.c.jj 2007-12-07 14:41:01.000000000 +0100
1425+++ libgomp/team.c 2008-03-27 12:22:26.000000000 +0100
1426@@ -94,7 +94,7 @@ gomp_thread_start (void *xdata)
1427 {
1428 gomp_barrier_wait (&thr->ts.team->barrier);
1429 local_fn (local_data);
1430- gomp_barrier_wait (&thr->ts.team->barrier);
1431+ gomp_barrier_wait_last (&thr->ts.team->barrier);
1432 }
1433 else
1434 {
1435@@ -114,11 +114,10 @@ gomp_thread_start (void *xdata)
1436 thr->data = NULL;
1437 thr->ts.team = NULL;
1438 thr->ts.work_share = NULL;
1439+ thr->ts.last_work_share = NULL;
1440 thr->ts.team_id = 0;
1441- thr->ts.work_share_generation = 0;
1442- thr->ts.static_trip = 0;
1443
1444- gomp_barrier_wait (&team->barrier);
1445+ gomp_barrier_wait_last (&team->barrier);
1446 gomp_barrier_wait (&gomp_threads_dock);
1447
1448 local_fn = thr->fn;
1449@@ -133,21 +132,29 @@ gomp_thread_start (void *xdata)
1450
1451 /* Create a new team data structure. */
1452
1453-static struct gomp_team *
1454-new_team (unsigned nthreads, struct gomp_work_share *work_share)
1455+struct gomp_team *
1456+gomp_new_team (unsigned nthreads)
1457 {
1458 struct gomp_team *team;
1459 size_t size;
1460+ int i;
1461
1462 size = sizeof (*team) + nthreads * sizeof (team->ordered_release[0]);
1463 team = gomp_malloc (size);
1464- gomp_mutex_init (&team->work_share_lock);
1465
1466- team->work_shares = gomp_malloc (4 * sizeof (struct gomp_work_share *));
1467- team->generation_mask = 3;
1468- team->oldest_live_gen = work_share == NULL;
1469- team->num_live_gen = work_share != NULL;
1470- team->work_shares[0] = work_share;
1471+ team->work_share_chunk = 8;
1472+#ifdef HAVE_SYNC_BUILTINS
1473+ team->single_count = 0;
1474+#else
1475+ gomp_mutex_init (&team->work_share_list_free_lock);
1476+#endif
1477+ gomp_init_work_share (&team->work_shares[0], false, nthreads);
1478+ team->work_shares[0].next_alloc = NULL;
1479+ team->work_share_list_free = NULL;
1480+ team->work_share_list_alloc = &team->work_shares[1];
1481+ for (i = 1; i < 7; i++)
1482+ team->work_shares[i].next_free = &team->work_shares[i + 1];
1483+ team->work_shares[i].next_free = NULL;
1484
1485 team->nthreads = nthreads;
1486 gomp_barrier_init (&team->barrier, nthreads);
1487@@ -164,10 +171,22 @@ new_team (unsigned nthreads, struct gomp
1488 static void
1489 free_team (struct gomp_team *team)
1490 {
1491- free (team->work_shares);
1492- gomp_mutex_destroy (&team->work_share_lock);
1493+ if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
1494+ {
1495+ struct gomp_work_share *ws = team->work_shares[0].next_alloc;
1496+ do
1497+ {
1498+ struct gomp_work_share *next_ws = ws->next_alloc;
1499+ free (ws);
1500+ ws = next_ws;
1501+ }
1502+ while (ws != NULL);
1503+ }
1504 gomp_barrier_destroy (&team->barrier);
1505 gomp_sem_destroy (&team->master_release);
1506+#ifndef HAVE_SYNC_BUILTINS
1507+ gomp_mutex_destroy (&team->work_share_list_free_lock);
1508+#endif
1509 free (team);
1510 }
1511
1512@@ -176,11 +195,10 @@ free_team (struct gomp_team *team)
1513
1514 void
1515 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
1516- struct gomp_work_share *work_share)
1517+ struct gomp_team *team)
1518 {
1519 struct gomp_thread_start_data *start_data;
1520 struct gomp_thread *thr, *nthr;
1521- struct gomp_team *team;
1522 bool nested;
1523 unsigned i, n, old_threads_used = 0;
1524 pthread_attr_t thread_attr, *attr;
1525@@ -188,17 +206,18 @@ gomp_team_start (void (*fn) (void *), vo
1526 thr = gomp_thread ();
1527 nested = thr->ts.team != NULL;
1528
1529- team = new_team (nthreads, work_share);
1530-
1531 /* Always save the previous state, even if this isn't a nested team.
1532 In particular, we should save any work share state from an outer
1533 orphaned work share construct. */
1534 team->prev_ts = thr->ts;
1535
1536 thr->ts.team = team;
1537- thr->ts.work_share = work_share;
1538 thr->ts.team_id = 0;
1539- thr->ts.work_share_generation = 0;
1540+ thr->ts.work_share = &team->work_shares[0];
1541+ thr->ts.last_work_share = NULL;
1542+#ifdef HAVE_SYNC_BUILTINS
1543+ thr->ts.single_count = 0;
1544+#endif
1545 thr->ts.static_trip = 0;
1546
1547 if (nthreads == 1)
1548@@ -241,9 +260,12 @@ gomp_team_start (void (*fn) (void *), vo
1549 {
1550 nthr = gomp_threads[i];
1551 nthr->ts.team = team;
1552- nthr->ts.work_share = work_share;
1553+ nthr->ts.work_share = &team->work_shares[0];
1554+ nthr->ts.last_work_share = NULL;
1555 nthr->ts.team_id = i;
1556- nthr->ts.work_share_generation = 0;
1557+#ifdef HAVE_SYNC_BUILTINS
1558+ nthr->ts.single_count = 0;
1559+#endif
1560 nthr->ts.static_trip = 0;
1561 nthr->fn = fn;
1562 nthr->data = data;
1563@@ -266,8 +288,24 @@ gomp_team_start (void (*fn) (void *), vo
1564 }
1565 }
1566
1567+ if (__builtin_expect (nthreads > old_threads_used, 0))
1568+ {
1569+ long diff = (long) nthreads - (long) old_threads_used;
1570+
1571+ if (old_threads_used == 0)
1572+ --diff;
1573+
1574+#ifdef HAVE_SYNC_BUILTINS
1575+ __sync_fetch_and_add (&gomp_managed_threads, diff);
1576+#else
1577+ gomp_mutex_lock (&gomp_remaining_threads_lock);
1578+ gomp_managed_threads += diff;
1579+ gomp_mutex_unlock (&gomp_remaining_threads_lock);
1580+#endif
1581+ }
1582+
1583 attr = &gomp_thread_attr;
1584- if (gomp_cpu_affinity != NULL)
1585+ if (__builtin_expect (gomp_cpu_affinity != NULL, 0))
1586 {
1587 size_t stacksize;
1588 pthread_attr_init (&thread_attr);
1589@@ -287,9 +325,12 @@ gomp_team_start (void (*fn) (void *), vo
1590 int err;
1591
1592 start_data->ts.team = team;
1593- start_data->ts.work_share = work_share;
1594+ start_data->ts.work_share = &team->work_shares[0];
1595+ start_data->ts.last_work_share = NULL;
1596 start_data->ts.team_id = i;
1597- start_data->ts.work_share_generation = 0;
1598+#ifdef HAVE_SYNC_BUILTINS
1599+ start_data->ts.single_count = 0;
1600+#endif
1601 start_data->ts.static_trip = 0;
1602 start_data->fn = fn;
1603 start_data->fn_data = data;
1604@@ -303,7 +344,7 @@ gomp_team_start (void (*fn) (void *), vo
1605 gomp_fatal ("Thread creation failed: %s", strerror (err));
1606 }
1607
1608- if (gomp_cpu_affinity != NULL)
1609+ if (__builtin_expect (gomp_cpu_affinity != NULL, 0))
1610 pthread_attr_destroy (&thread_attr);
1611
1612 do_release:
1613@@ -313,8 +354,20 @@ gomp_team_start (void (*fn) (void *), vo
1614 that should arrive back at the end of this team. The extra
1615 threads should be exiting. Note that we arrange for this test
1616 to never be true for nested teams. */
1617- if (nthreads < old_threads_used)
1618- gomp_barrier_reinit (&gomp_threads_dock, nthreads);
1619+ if (__builtin_expect (nthreads < old_threads_used, 0))
1620+ {
1621+ long diff = (long) nthreads - (long) old_threads_used;
1622+
1623+ gomp_barrier_reinit (&gomp_threads_dock, nthreads);
1624+
1625+#ifdef HAVE_SYNC_BUILTINS
1626+ __sync_fetch_and_add (&gomp_managed_threads, diff);
1627+#else
1628+ gomp_mutex_lock (&gomp_remaining_threads_lock);
1629+ gomp_managed_threads += diff;
1630+ gomp_mutex_unlock (&gomp_remaining_threads_lock);
1631+#endif
1632+ }
1633 }
1634
1635
1636@@ -329,8 +382,21 @@ gomp_team_end (void)
1637
1638 gomp_barrier_wait (&team->barrier);
1639
1640+ gomp_fini_work_share (thr->ts.work_share);
1641+
1642 thr->ts = team->prev_ts;
1643
1644+ if (__builtin_expect (thr->ts.team != NULL, 0))
1645+ {
1646+#ifdef HAVE_SYNC_BUILTINS
1647+ __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
1648+#else
1649+ gomp_mutex_lock (&gomp_remaining_threads_lock);
1650+ gomp_managed_threads -= team->nthreads - 1L;
1651+ gomp_mutex_unlock (&gomp_remaining_threads_lock);
1652+#endif
1653+ }
1654+
1655 free_team (team);
1656 }
1657
1658--- libgomp/config/posix/bar.h.jj 2007-12-07 14:41:01.000000000 +0100
1659+++ libgomp/config/posix/bar.h 2008-03-26 15:11:32.000000000 +0100
1660@@ -1,4 +1,4 @@
1661-/* Copyright (C) 2005 Free Software Foundation, Inc.
1662+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
1663 Contributed by Richard Henderson <rth@redhat.com>.
1664
1665 This file is part of the GNU OpenMP Library (libgomp).
1666@@ -46,18 +46,32 @@ typedef struct
1667 unsigned total;
1668 unsigned arrived;
1669 } gomp_barrier_t;
1670+typedef bool gomp_barrier_state_t;
1671
1672 extern void gomp_barrier_init (gomp_barrier_t *, unsigned);
1673 extern void gomp_barrier_reinit (gomp_barrier_t *, unsigned);
1674 extern void gomp_barrier_destroy (gomp_barrier_t *);
1675
1676 extern void gomp_barrier_wait (gomp_barrier_t *);
1677-extern void gomp_barrier_wait_end (gomp_barrier_t *, bool);
1678+extern void gomp_barrier_wait_end (gomp_barrier_t *, gomp_barrier_state_t);
1679
1680-static inline bool gomp_barrier_wait_start (gomp_barrier_t *bar)
1681+static inline gomp_barrier_state_t
1682+gomp_barrier_wait_start (gomp_barrier_t *bar)
1683 {
1684 gomp_mutex_lock (&bar->mutex1);
1685 return ++bar->arrived == bar->total;
1686 }
1687
1688+static inline bool
1689+gomp_barrier_last_thread (gomp_barrier_state_t state)
1690+{
1691+ return state;
1692+}
1693+
1694+static inline void
1695+gomp_barrier_wait_last (gomp_barrier_t *bar)
1696+{
1697+ gomp_barrier_wait (bar);
1698+}
1699+
1700 #endif /* GOMP_BARRIER_H */
1701--- libgomp/config/posix/ptrlock.h.jj 2008-03-26 15:11:32.000000000 +0100
1702+++ libgomp/config/posix/ptrlock.h 2008-03-26 15:11:32.000000000 +0100
1703@@ -0,0 +1,69 @@
1704+/* Copyright (C) 2008 Free Software Foundation, Inc.
1705+ Contributed by Jakub Jelinek <jakub@redhat.com>.
1706+
1707+ This file is part of the GNU OpenMP Library (libgomp).
1708+
1709+ Libgomp is free software; you can redistribute it and/or modify it
1710+ under the terms of the GNU Lesser General Public License as published by
1711+ the Free Software Foundation; either version 2.1 of the License, or
1712+ (at your option) any later version.
1713+
1714+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
1715+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
1716+ FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
1717+ more details.
1718+
1719+ You should have received a copy of the GNU Lesser General Public License
1720+ along with libgomp; see the file COPYING.LIB. If not, write to the
1721+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
1722+ MA 02110-1301, USA. */
1723+
1724+/* As a special exception, if you link this library with other files, some
1725+ of which are compiled with GCC, to produce an executable, this library
1726+ does not by itself cause the resulting executable to be covered by the
1727+ GNU General Public License. This exception does not however invalidate
1728+ any other reasons why the executable file might be covered by the GNU
1729+ General Public License. */
1730+
1731+/* This is a Linux specific implementation of a mutex synchronization
1732+ mechanism for libgomp. This type is private to the library. This
1733+ implementation uses atomic instructions and the futex syscall. */
1734+
1735+#ifndef GOMP_PTRLOCK_H
1736+#define GOMP_PTRLOCK_H 1
1737+
1738+typedef struct { void *ptr; gomp_mutex_t lock; } gomp_ptrlock_t;
1739+
1740+static inline void gomp_ptrlock_init (gomp_ptrlock_t *ptrlock, void *ptr)
1741+{
1742+ ptrlock->ptr = ptr;
1743+ gomp_mutex_init (&ptrlock->lock);
1744+}
1745+
1746+static inline void *gomp_ptrlock_get (gomp_ptrlock_t *ptrlock)
1747+{
1748+ if (ptrlock->ptr != NULL)
1749+ return ptrlock->ptr;
1750+
1751+ gomp_mutex_lock (&ptrlock->lock);
1752+ if (ptrlock->ptr != NULL)
1753+ {
1754+ gomp_mutex_unlock (&ptrlock->lock);
1755+ return ptrlock->ptr;
1756+ }
1757+
1758+ return NULL;
1759+}
1760+
1761+static inline void gomp_ptrlock_set (gomp_ptrlock_t *ptrlock, void *ptr)
1762+{
1763+ ptrlock->ptr = ptr;
1764+ gomp_mutex_unlock (&ptrlock->lock);
1765+}
1766+
1767+static inline void gomp_ptrlock_destroy (gomp_ptrlock_t *ptrlock)
1768+{
1769+ gomp_mutex_destroy (&ptrlock->lock);
1770+}
1771+
1772+#endif /* GOMP_PTRLOCK_H */
1773--- libgomp/config/posix/ptrlock.c.jj 2008-03-26 15:11:32.000000000 +0100
1774+++ libgomp/config/posix/ptrlock.c 2008-03-26 15:11:32.000000000 +0100
1775@@ -0,0 +1 @@
1776+/* Everything is in the header. */
1777--- libgomp/config/posix/bar.c.jj 2007-12-07 14:41:01.000000000 +0100
1778+++ libgomp/config/posix/bar.c 2008-03-26 15:11:32.000000000 +0100
1779@@ -1,4 +1,4 @@
1780-/* Copyright (C) 2005 Free Software Foundation, Inc.
1781+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
1782 Contributed by Richard Henderson <rth@redhat.com>.
1783
1784 This file is part of the GNU OpenMP Library (libgomp).
1785@@ -70,7 +70,7 @@ gomp_barrier_reinit (gomp_barrier_t *bar
1786 }
1787
1788 void
1789-gomp_barrier_wait_end (gomp_barrier_t *bar, bool last)
1790+gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t last)
1791 {
1792 unsigned int n;
1793
1794--- libgomp/config/linux/alpha/futex.h.jj 2007-12-07 14:41:00.000000000 +0100
1795+++ libgomp/config/linux/alpha/futex.h 2008-03-26 15:11:32.000000000 +0100
1796@@ -1,4 +1,4 @@
1797-/* Copyright (C) 2005 Free Software Foundation, Inc.
1798+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
1799 Contributed by Richard Henderson <rth@redhat.com>.
1800
1801 This file is part of the GNU OpenMP Library (libgomp).
1802@@ -30,8 +30,6 @@
1803 #ifndef SYS_futex
1804 #define SYS_futex 394
1805 #endif
1806-#define FUTEX_WAIT 0
1807-#define FUTEX_WAKE 1
1808
1809
1810 static inline void
1811@@ -45,7 +43,7 @@ futex_wait (int *addr, int val)
1812
1813 sc_0 = SYS_futex;
1814 sc_16 = (long) addr;
1815- sc_17 = FUTEX_WAIT;
1816+ sc_17 = gomp_futex_wait;
1817 sc_18 = val;
1818 sc_19 = 0;
1819 __asm volatile ("callsys"
1820@@ -53,6 +51,20 @@ futex_wait (int *addr, int val)
1821 : "0"(sc_0), "r" (sc_16), "r"(sc_17), "r"(sc_18), "1"(sc_19)
1822 : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8",
1823 "$22", "$23", "$24", "$25", "$27", "$28", "memory");
1824+ if (__builtin_expect (sc_19, 0) && sc_0 == ENOSYS)
1825+ {
1826+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
1827+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
1828+ sc_0 = SYS_futex;
1829+ sc_17 &= ~FUTEX_PRIVATE_FLAG;
1830+ sc_19 = 0;
1831+ __asm volatile ("callsys"
1832+ : "=r" (sc_0), "=r"(sc_19)
1833+ : "0"(sc_0), "r" (sc_16), "r"(sc_17), "r"(sc_18),
1834+ "1"(sc_19)
1835+ : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8",
1836+ "$22", "$23", "$24", "$25", "$27", "$28", "memory");
1837+ }
1838 }
1839
1840 static inline void
1841@@ -66,11 +78,35 @@ futex_wake (int *addr, int count)
1842
1843 sc_0 = SYS_futex;
1844 sc_16 = (long) addr;
1845- sc_17 = FUTEX_WAKE;
1846+ sc_17 = gomp_futex_wake;
1847 sc_18 = count;
1848 __asm volatile ("callsys"
1849 : "=r" (sc_0), "=r"(sc_19)
1850 : "0"(sc_0), "r" (sc_16), "r"(sc_17), "r"(sc_18)
1851 : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8",
1852 "$22", "$23", "$24", "$25", "$27", "$28", "memory");
1853+ if (__builtin_expect (sc_19, 0) && sc_0 == ENOSYS)
1854+ {
1855+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
1856+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
1857+ sc_0 = SYS_futex;
1858+ sc_17 &= ~FUTEX_PRIVATE_FLAG;
1859+ __asm volatile ("callsys"
1860+ : "=r" (sc_0), "=r"(sc_19)
1861+ : "0"(sc_0), "r" (sc_16), "r"(sc_17), "r"(sc_18)
1862+ : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8",
1863+ "$22", "$23", "$24", "$25", "$27", "$28", "memory");
1864+ }
1865+}
1866+
1867+static inline void
1868+cpu_relax (void)
1869+{
1870+ __asm volatile ("" : : : "memory");
1871+}
1872+
1873+static inline void
1874+atomic_write_barrier (void)
1875+{
1876+ __asm volatile ("wmb" : : : "memory");
1877 }
1878--- libgomp/config/linux/affinity.c.jj 2007-12-07 14:41:00.000000000 +0100
1879+++ libgomp/config/linux/affinity.c 2008-03-26 15:11:32.000000000 +0100
1880@@ -1,4 +1,4 @@
1881-/* Copyright (C) 2006, 2007 Free Software Foundation, Inc.
1882+/* Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
1883 Contributed by Jakub Jelinek <jakub@redhat.com>.
1884
1885 This file is part of the GNU OpenMP Library (libgomp).
1886@@ -38,9 +38,6 @@
1887 #ifdef HAVE_PTHREAD_AFFINITY_NP
1888
1889 static unsigned int affinity_counter;
1890-#ifndef HAVE_SYNC_BUILTINS
1891-static gomp_mutex_t affinity_lock;
1892-#endif
1893
1894 void
1895 gomp_init_affinity (void)
1896@@ -76,9 +73,6 @@ gomp_init_affinity (void)
1897 CPU_SET (gomp_cpu_affinity[0], &cpuset);
1898 pthread_setaffinity_np (pthread_self (), sizeof (cpuset), &cpuset);
1899 affinity_counter = 1;
1900-#ifndef HAVE_SYNC_BUILTINS
1901- gomp_mutex_init (&affinity_lock);
1902-#endif
1903 }
1904
1905 void
1906@@ -87,13 +81,7 @@ gomp_init_thread_affinity (pthread_attr_
1907 unsigned int cpu;
1908 cpu_set_t cpuset;
1909
1910-#ifdef HAVE_SYNC_BUILTINS
1911 cpu = __sync_fetch_and_add (&affinity_counter, 1);
1912-#else
1913- gomp_mutex_lock (&affinity_lock);
1914- cpu = affinity_counter++;
1915- gomp_mutex_unlock (&affinity_lock);
1916-#endif
1917 cpu %= gomp_cpu_affinity_len;
1918 CPU_ZERO (&cpuset);
1919 CPU_SET (gomp_cpu_affinity[cpu], &cpuset);
1920--- libgomp/config/linux/bar.h.jj 2007-12-07 14:41:00.000000000 +0100
1921+++ libgomp/config/linux/bar.h 2008-03-26 15:11:32.000000000 +0100
1922@@ -1,4 +1,4 @@
1923-/* Copyright (C) 2005 Free Software Foundation, Inc.
1924+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
1925 Contributed by Richard Henderson <rth@redhat.com>.
1926
1927 This file is part of the GNU OpenMP Library (libgomp).
1928@@ -36,40 +36,49 @@
1929
1930 typedef struct
1931 {
1932- gomp_mutex_t mutex;
1933- unsigned total;
1934- unsigned arrived;
1935- int generation;
1936+ /* Make sure total/generation is in a mostly read cacheline, while
1937+ awaited in a separate cacheline. */
1938+ unsigned total __attribute__((aligned (64)));
1939+ unsigned generation;
1940+ unsigned awaited __attribute__((aligned (64)));
1941 } gomp_barrier_t;
1942+typedef unsigned int gomp_barrier_state_t;
1943
1944 static inline void gomp_barrier_init (gomp_barrier_t *bar, unsigned count)
1945 {
1946- gomp_mutex_init (&bar->mutex);
1947 bar->total = count;
1948- bar->arrived = 0;
1949+ bar->awaited = count;
1950 bar->generation = 0;
1951 }
1952
1953 static inline void gomp_barrier_reinit (gomp_barrier_t *bar, unsigned count)
1954 {
1955- gomp_mutex_lock (&bar->mutex);
1956+ __sync_fetch_and_add (&bar->awaited, count - bar->total);
1957 bar->total = count;
1958- gomp_mutex_unlock (&bar->mutex);
1959 }
1960
1961 static inline void gomp_barrier_destroy (gomp_barrier_t *bar)
1962 {
1963- /* Before destroying, make sure all threads have left the barrier. */
1964- gomp_mutex_lock (&bar->mutex);
1965 }
1966
1967 extern void gomp_barrier_wait (gomp_barrier_t *);
1968-extern void gomp_barrier_wait_end (gomp_barrier_t *, bool);
1969+extern void gomp_barrier_wait_last (gomp_barrier_t *);
1970+extern void gomp_barrier_wait_end (gomp_barrier_t *, gomp_barrier_state_t);
1971
1972-static inline bool gomp_barrier_wait_start (gomp_barrier_t *bar)
1973+static inline gomp_barrier_state_t
1974+gomp_barrier_wait_start (gomp_barrier_t *bar)
1975 {
1976- gomp_mutex_lock (&bar->mutex);
1977- return ++bar->arrived == bar->total;
1978+ unsigned int ret = bar->generation;
1979+ /* Do we need any barrier here or is __sync_add_and_fetch acting
1980+ as the needed LoadLoad barrier already? */
1981+ ret += __sync_add_and_fetch (&bar->awaited, -1) == 0;
1982+ return ret;
1983+}
1984+
1985+static inline bool
1986+gomp_barrier_last_thread (gomp_barrier_state_t state)
1987+{
1988+ return state & 1;
1989 }
1990
1991 #endif /* GOMP_BARRIER_H */
1992--- libgomp/config/linux/ptrlock.h.jj 2008-03-26 15:11:32.000000000 +0100
1993+++ libgomp/config/linux/ptrlock.h 2008-03-26 15:11:32.000000000 +0100
1994@@ -0,0 +1,65 @@
1995+/* Copyright (C) 2008 Free Software Foundation, Inc.
1996+ Contributed by Jakub Jelinek <jakub@redhat.com>.
1997+
1998+ This file is part of the GNU OpenMP Library (libgomp).
1999+
2000+ Libgomp is free software; you can redistribute it and/or modify it
2001+ under the terms of the GNU Lesser General Public License as published by
2002+ the Free Software Foundation; either version 2.1 of the License, or
2003+ (at your option) any later version.
2004+
2005+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
2006+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
2007+ FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
2008+ more details.
2009+
2010+ You should have received a copy of the GNU Lesser General Public License
2011+ along with libgomp; see the file COPYING.LIB. If not, write to the
2012+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
2013+ MA 02110-1301, USA. */
2014+
2015+/* As a special exception, if you link this library with other files, some
2016+ of which are compiled with GCC, to produce an executable, this library
2017+ does not by itself cause the resulting executable to be covered by the
2018+ GNU General Public License. This exception does not however invalidate
2019+ any other reasons why the executable file might be covered by the GNU
2020+ General Public License. */
2021+
2022+/* This is a Linux specific implementation of a mutex synchronization
2023+ mechanism for libgomp. This type is private to the library. This
2024+ implementation uses atomic instructions and the futex syscall. */
2025+
2026+#ifndef GOMP_PTRLOCK_H
2027+#define GOMP_PTRLOCK_H 1
2028+
2029+typedef void *gomp_ptrlock_t;
2030+
2031+static inline void gomp_ptrlock_init (gomp_ptrlock_t *ptrlock, void *ptr)
2032+{
2033+ *ptrlock = ptr;
2034+}
2035+
2036+extern void *gomp_ptrlock_get_slow (gomp_ptrlock_t *ptrlock);
2037+static inline void *gomp_ptrlock_get (gomp_ptrlock_t *ptrlock)
2038+{
2039+ if ((uintptr_t) *ptrlock > 2)
2040+ return *ptrlock;
2041+
2042+ if (__sync_bool_compare_and_swap (ptrlock, NULL, (uintptr_t) 1))
2043+ return NULL;
2044+
2045+ return gomp_ptrlock_get_slow (ptrlock);
2046+}
2047+
2048+extern void gomp_ptrlock_set_slow (gomp_ptrlock_t *ptrlock, void *ptr);
2049+static inline void gomp_ptrlock_set (gomp_ptrlock_t *ptrlock, void *ptr)
2050+{
2051+ if (!__sync_bool_compare_and_swap (ptrlock, (uintptr_t) 1, ptr))
2052+ gomp_ptrlock_set_slow (ptrlock, ptr);
2053+}
2054+
2055+static inline void gomp_ptrlock_destroy (gomp_ptrlock_t *ptrlock)
2056+{
2057+}
2058+
2059+#endif /* GOMP_PTRLOCK_H */
2060--- libgomp/config/linux/lock.c.jj 2007-12-07 14:41:00.000000000 +0100
2061+++ libgomp/config/linux/lock.c 2008-03-26 15:11:32.000000000 +0100
2062@@ -29,11 +29,10 @@
2063 primitives. This implementation uses atomic instructions and the futex
2064 syscall. */
2065
2066-#include "libgomp.h"
2067 #include <string.h>
2068 #include <unistd.h>
2069 #include <sys/syscall.h>
2070-#include "futex.h"
2071+#include "wait.h"
2072
2073
2074 /* The internal gomp_mutex_t and the external non-recursive omp_lock_t
2075@@ -137,7 +136,7 @@ omp_set_nest_lock (omp_nest_lock_t *lock
2076 return;
2077 }
2078
2079- futex_wait (&lock->owner, otid);
2080+ do_wait (&lock->owner, otid);
2081 }
2082 }
2083
2084--- libgomp/config/linux/ptrlock.c.jj 2008-03-26 15:11:32.000000000 +0100
2085+++ libgomp/config/linux/ptrlock.c 2008-03-26 15:11:32.000000000 +0100
2086@@ -0,0 +1,70 @@
2087+/* Copyright (C) 2008 Free Software Foundation, Inc.
2088+ Contributed by Jakub Jelinek <jakub@redhat.com>.
2089+
2090+ This file is part of the GNU OpenMP Library (libgomp).
2091+
2092+ Libgomp is free software; you can redistribute it and/or modify it
2093+ under the terms of the GNU Lesser General Public License as published by
2094+ the Free Software Foundation; either version 2.1 of the License, or
2095+ (at your option) any later version.
2096+
2097+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
2098+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
2099+ FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
2100+ more details.
2101+
2102+ You should have received a copy of the GNU Lesser General Public License
2103+ along with libgomp; see the file COPYING.LIB. If not, write to the
2104+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
2105+ MA 02110-1301, USA. */
2106+
2107+/* As a special exception, if you link this library with other files, some
2108+ of which are compiled with GCC, to produce an executable, this library
2109+ does not by itself cause the resulting executable to be covered by the
2110+ GNU General Public License. This exception does not however invalidate
2111+ any other reasons why the executable file might be covered by the GNU
2112+ General Public License. */
2113+
2114+/* This is a Linux specific implementation of a mutex synchronization
2115+ mechanism for libgomp. This type is private to the library. This
2116+ implementation uses atomic instructions and the futex syscall. */
2117+
2118+#include <endian.h>
2119+#include <limits.h>
2120+#include "wait.h"
2121+
2122+void *
2123+gomp_ptrlock_get_slow (gomp_ptrlock_t *ptrlock)
2124+{
2125+ int *intptr;
2126+ __sync_bool_compare_and_swap (ptrlock, 1, 2);
2127+
2128+ /* futex works on ints, not pointers.
2129+ But a valid work share pointer will be at least
2130+ 8 byte aligned, so it is safe to assume the low
2131+ 32-bits of the pointer won't contain values 1 or 2. */
2132+ __asm volatile ("" : "=r" (intptr) : "0" (ptrlock));
2133+#if __BYTE_ORDER == __BIG_ENDIAN
2134+ if (sizeof (*ptrlock) > sizeof (int))
2135+ intptr += (sizeof (*ptrlock) / sizeof (int)) - 1;
2136+#endif
2137+ do
2138+ do_wait (intptr, 2);
2139+ while (*intptr == 2);
2140+ __asm volatile ("" : : : "memory");
2141+ return *ptrlock;
2142+}
2143+
2144+void
2145+gomp_ptrlock_set_slow (gomp_ptrlock_t *ptrlock, void *ptr)
2146+{
2147+ int *intptr;
2148+
2149+ *ptrlock = ptr;
2150+ __asm volatile ("" : "=r" (intptr) : "0" (ptrlock));
2151+#if __BYTE_ORDER == __BIG_ENDIAN
2152+ if (sizeof (*ptrlock) > sizeof (int))
2153+ intptr += (sizeof (*ptrlock) / sizeof (int)) - 1;
2154+#endif
2155+ futex_wake (intptr, INT_MAX);
2156+}
2157--- libgomp/config/linux/x86/futex.h.jj 2007-12-07 14:41:00.000000000 +0100
2158+++ libgomp/config/linux/x86/futex.h 2008-03-26 15:11:32.000000000 +0100
2159@@ -1,4 +1,4 @@
2160-/* Copyright (C) 2005 Free Software Foundation, Inc.
2161+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
2162 Contributed by Richard Henderson <rth@redhat.com>.
2163
2164 This file is part of the GNU OpenMP Library (libgomp).
2165@@ -27,9 +27,6 @@
2166
2167 /* Provide target-specific access to the futex system call. */
2168
2169-#define FUTEX_WAIT 0
2170-#define FUTEX_WAKE 1
2171-
2172 #ifdef __LP64__
2173 # ifndef SYS_futex
2174 # define SYS_futex 202
2175@@ -38,14 +35,26 @@
2176 static inline void
2177 futex_wait (int *addr, int val)
2178 {
2179- register long r10 __asm__("%r10") = 0;
2180+ register long r10 __asm__("%r10");
2181 long res;
2182
2183+ r10 = 0;
2184 __asm volatile ("syscall"
2185 : "=a" (res)
2186- : "0"(SYS_futex), "D" (addr), "S"(FUTEX_WAIT),
2187- "d"(val), "r"(r10)
2188+ : "0" (SYS_futex), "D" (addr), "S" (gomp_futex_wait),
2189+ "d" (val), "r" (r10)
2190 : "r11", "rcx", "memory");
2191+ if (__builtin_expect (res == -ENOSYS, 0))
2192+ {
2193+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
2194+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
2195+ r10 = 0;
2196+ __asm volatile ("syscall"
2197+ : "=a" (res)
2198+ : "0" (SYS_futex), "D" (addr), "S" (gomp_futex_wait),
2199+ "d" (val), "r" (r10)
2200+ : "r11", "rcx", "memory");
2201+ }
2202 }
2203
2204 static inline void
2205@@ -55,8 +64,19 @@ futex_wake (int *addr, int count)
2206
2207 __asm volatile ("syscall"
2208 : "=a" (res)
2209- : "0"(SYS_futex), "D" (addr), "S"(FUTEX_WAKE), "d"(count)
2210+ : "0" (SYS_futex), "D" (addr), "S" (gomp_futex_wake),
2211+ "d" (count)
2212 : "r11", "rcx", "memory");
2213+ if (__builtin_expect (res == -ENOSYS, 0))
2214+ {
2215+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
2216+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
2217+ __asm volatile ("syscall"
2218+ : "=a" (res)
2219+ : "0" (SYS_futex), "D" (addr), "S" (gomp_futex_wake),
2220+ "d" (count)
2221+ : "r11", "rcx", "memory");
2222+ }
2223 }
2224 #else
2225 # ifndef SYS_futex
2226@@ -65,7 +85,7 @@ futex_wake (int *addr, int count)
2227
2228 # ifdef __PIC__
2229
2230-static inline void
2231+static inline long
2232 sys_futex0 (int *addr, int op, int val)
2233 {
2234 long res;
2235@@ -77,11 +97,12 @@ sys_futex0 (int *addr, int op, int val)
2236 : "0"(SYS_futex), "r" (addr), "c"(op),
2237 "d"(val), "S"(0)
2238 : "memory");
2239+ return res;
2240 }
2241
2242 # else
2243
2244-static inline void
2245+static inline long
2246 sys_futex0 (int *addr, int op, int val)
2247 {
2248 long res;
2249@@ -91,6 +112,7 @@ sys_futex0 (int *addr, int op, int val)
2250 : "0"(SYS_futex), "b" (addr), "c"(op),
2251 "d"(val), "S"(0)
2252 : "memory");
2253+ return res;
2254 }
2255
2256 # endif /* __PIC__ */
2257@@ -98,13 +120,37 @@ sys_futex0 (int *addr, int op, int val)
2258 static inline void
2259 futex_wait (int *addr, int val)
2260 {
2261- sys_futex0 (addr, FUTEX_WAIT, val);
2262+ long res = sys_futex0 (addr, gomp_futex_wait, val);
2263+ if (__builtin_expect (res == -ENOSYS, 0))
2264+ {
2265+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
2266+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
2267+ sys_futex0 (addr, gomp_futex_wait, val);
2268+ }
2269 }
2270
2271 static inline void
2272 futex_wake (int *addr, int count)
2273 {
2274- sys_futex0 (addr, FUTEX_WAKE, count);
2275+ long res = sys_futex0 (addr, gomp_futex_wake, count);
2276+ if (__builtin_expect (res == -ENOSYS, 0))
2277+ {
2278+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
2279+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
2280+ sys_futex0 (addr, gomp_futex_wake, count);
2281+ }
2282 }
2283
2284 #endif /* __LP64__ */
2285+
2286+static inline void
2287+cpu_relax (void)
2288+{
2289+ __asm volatile ("rep; nop" : : : "memory");
2290+}
2291+
2292+static inline void
2293+atomic_write_barrier (void)
2294+{
2295+ __sync_synchronize ();
2296+}
2297--- libgomp/config/linux/wait.h.jj 2008-03-26 15:11:32.000000000 +0100
2298+++ libgomp/config/linux/wait.h 2008-03-26 15:11:32.000000000 +0100
2299@@ -0,0 +1,68 @@
2300+/* Copyright (C) 2008 Free Software Foundation, Inc.
2301+ Contributed by Jakub Jelinek <jakub@redhat.com>.
2302+
2303+ This file is part of the GNU OpenMP Library (libgomp).
2304+
2305+ Libgomp is free software; you can redistribute it and/or modify it
2306+ under the terms of the GNU Lesser General Public License as published by
2307+ the Free Software Foundation; either version 2.1 of the License, or
2308+ (at your option) any later version.
2309+
2310+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
2311+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
2312+ FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
2313+ more details.
2314+
2315+ You should have received a copy of the GNU Lesser General Public License
2316+ along with libgomp; see the file COPYING.LIB. If not, write to the
2317+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
2318+ MA 02110-1301, USA. */
2319+
2320+/* As a special exception, if you link this library with other files, some
2321+ of which are compiled with GCC, to produce an executable, this library
2322+ does not by itself cause the resulting executable to be covered by the
2323+ GNU General Public License. This exception does not however invalidate
2324+ any other reasons why the executable file might be covered by the GNU
2325+ General Public License. */
2326+
2327+/* This is a Linux specific implementation of a mutex synchronization
2328+ mechanism for libgomp. This type is private to the library. This
2329+ implementation uses atomic instructions and the futex syscall. */
2330+
2331+#ifndef GOMP_WAIT_H
2332+#define GOMP_WAIT_H 1
2333+
2334+#include "libgomp.h"
2335+#include <errno.h>
2336+
2337+#define FUTEX_WAIT 0
2338+#define FUTEX_WAKE 1
2339+#define FUTEX_PRIVATE_FLAG 128L
2340+
2341+#ifdef HAVE_ATTRIBUTE_VISIBILITY
2342+# pragma GCC visibility push(hidden)
2343+#endif
2344+
2345+extern long int gomp_futex_wait, gomp_futex_wake;
2346+
2347+#include "futex.h"
2348+
2349+static inline void do_wait (int *addr, int val)
2350+{
2351+ unsigned long long i, count = gomp_spin_count_var;
2352+
2353+ if (__builtin_expect (gomp_managed_threads > gomp_available_cpus, 0))
2354+ count = gomp_throttled_spin_count_var;
2355+ for (i = 0; i < count; i++)
2356+ if (__builtin_expect (*addr != val, 0))
2357+ return;
2358+ else
2359+ cpu_relax ();
2360+ futex_wait (addr, val);
2361+}
2362+
2363+#ifdef HAVE_ATTRIBUTE_VISIBILITY
2364+# pragma GCC visibility pop
2365+#endif
2366+
2367+#endif /* GOMP_WAIT_H */
2368--- libgomp/config/linux/sparc/futex.h.jj 2007-12-07 14:41:00.000000000 +0100
2369+++ libgomp/config/linux/sparc/futex.h 2008-03-26 15:11:32.000000000 +0100
2370@@ -1,4 +1,4 @@
2371-/* Copyright (C) 2005 Free Software Foundation, Inc.
2372+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
2373 Contributed by Jakub Jelinek <jakub@redhat.com>.
2374
2375 This file is part of the GNU OpenMP Library (libgomp).
2376@@ -28,10 +28,8 @@
2377 /* Provide target-specific access to the futex system call. */
2378
2379 #include <sys/syscall.h>
2380-#define FUTEX_WAIT 0
2381-#define FUTEX_WAKE 1
2382
2383-static inline void
2384+static inline long
2385 sys_futex0 (int *addr, int op, int val)
2386 {
2387 register long int g1 __asm__ ("g1");
2388@@ -47,9 +45,9 @@ sys_futex0 (int *addr, int op, int val)
2389 o3 = 0;
2390
2391 #ifdef __arch64__
2392-# define SYSCALL_STRING "ta\t0x6d"
2393+# define SYSCALL_STRING "ta\t0x6d; bcs,a,pt %%xcc, 1f; sub %%g0, %%o0, %%o0; 1:"
2394 #else
2395-# define SYSCALL_STRING "ta\t0x10"
2396+# define SYSCALL_STRING "ta\t0x10; bcs,a 1f; sub %%g0, %%o0, %%o0; 1:"
2397 #endif
2398
2399 __asm volatile (SYSCALL_STRING
2400@@ -65,16 +63,49 @@ sys_futex0 (int *addr, int op, int val)
2401 "f48", "f50", "f52", "f54", "f56", "f58", "f60", "f62",
2402 #endif
2403 "cc", "memory");
2404+ return o0;
2405 }
2406
2407 static inline void
2408 futex_wait (int *addr, int val)
2409 {
2410- sys_futex0 (addr, FUTEX_WAIT, val);
2411+ long err = sys_futex0 (addr, gomp_futex_wait, val);
2412+ if (__builtin_expect (err == ENOSYS, 0))
2413+ {
2414+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
2415+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
2416+ sys_futex0 (addr, gomp_futex_wait, val);
2417+ }
2418 }
2419
2420 static inline void
2421 futex_wake (int *addr, int count)
2422 {
2423- sys_futex0 (addr, FUTEX_WAKE, count);
2424+ long err = sys_futex0 (addr, gomp_futex_wake, count);
2425+ if (__builtin_expect (err == ENOSYS, 0))
2426+ {
2427+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
2428+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
2429+ sys_futex0 (addr, gomp_futex_wake, count);
2430+ }
2431+}
2432+
2433+static inline void
2434+cpu_relax (void)
2435+{
2436+#if defined __arch64__ || defined __sparc_v9__
2437+ __asm volatile ("membar #LoadLoad" : : : "memory");
2438+#else
2439+ __asm volatile ("" : : : "memory");
2440+#endif
2441+}
2442+
2443+static inline void
2444+atomic_write_barrier (void)
2445+{
2446+#if defined __arch64__ || defined __sparc_v9__
2447+ __asm volatile ("membar #StoreStore" : : : "memory");
2448+#else
2449+ __sync_synchronize ();
2450+#endif
2451 }
2452--- libgomp/config/linux/ia64/futex.h.jj 2007-12-07 14:41:00.000000000 +0100
2453+++ libgomp/config/linux/ia64/futex.h 2008-03-26 15:11:32.000000000 +0100
2454@@ -1,4 +1,4 @@
2455-/* Copyright (C) 2005 Free Software Foundation, Inc.
2456+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
2457 Contributed by Richard Henderson <rth@redhat.com>.
2458
2459 This file is part of the GNU OpenMP Library (libgomp).
2460@@ -29,23 +29,24 @@
2461
2462 #include <sys/syscall.h>
2463
2464-#define FUTEX_WAIT 0
2465-#define FUTEX_WAKE 1
2466
2467
2468-static inline void
2469-sys_futex0(int *addr, int op, int val)
2470+static inline long
2471+sys_futex0(int *addr, long op, int val)
2472 {
2473 register long out0 asm ("out0") = (long) addr;
2474 register long out1 asm ("out1") = op;
2475 register long out2 asm ("out2") = val;
2476 register long out3 asm ("out3") = 0;
2477+ register long r8 asm ("r8");
2478+ register long r10 asm ("r10");
2479 register long r15 asm ("r15") = SYS_futex;
2480
2481 __asm __volatile ("break 0x100000"
2482- : "=r"(r15), "=r"(out0), "=r"(out1), "=r"(out2), "=r"(out3)
2483+ : "=r"(r15), "=r"(out0), "=r"(out1), "=r"(out2), "=r"(out3),
2484+ "=r"(r8), "=r"(r10)
2485 : "r"(r15), "r"(out0), "r"(out1), "r"(out2), "r"(out3)
2486- : "memory", "r8", "r10", "out4", "out5", "out6", "out7",
2487+ : "memory", "out4", "out5", "out6", "out7",
2488 /* Non-stacked integer registers, minus r8, r10, r15. */
2489 "r2", "r3", "r9", "r11", "r12", "r13", "r14", "r16", "r17", "r18",
2490 "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27",
2491@@ -56,16 +57,41 @@ sys_futex0(int *addr, int op, int val)
2492 "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
2493 /* Branch registers. */
2494 "b6");
2495+ return r8 & r10;
2496 }
2497
2498 static inline void
2499 futex_wait (int *addr, int val)
2500 {
2501- sys_futex0 (addr, FUTEX_WAIT, val);
2502+ long err = sys_futex0 (addr, gomp_futex_wait, val);
2503+ if (__builtin_expect (err == ENOSYS, 0))
2504+ {
2505+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
2506+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
2507+ sys_futex0 (addr, gomp_futex_wait, val);
2508+ }
2509 }
2510
2511 static inline void
2512 futex_wake (int *addr, int count)
2513 {
2514- sys_futex0 (addr, FUTEX_WAKE, count);
2515+ long err = sys_futex0 (addr, gomp_futex_wake, count);
2516+ if (__builtin_expect (err == ENOSYS, 0))
2517+ {
2518+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
2519+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
2520+ sys_futex0 (addr, gomp_futex_wake, count);
2521+ }
2522+}
2523+
2524+static inline void
2525+cpu_relax (void)
2526+{
2527+ __asm volatile ("hint @pause" : : : "memory");
2528+}
2529+
2530+static inline void
2531+atomic_write_barrier (void)
2532+{
2533+ __sync_synchronize ();
2534 }
2535--- libgomp/config/linux/s390/futex.h.jj 2007-12-07 14:41:00.000000000 +0100
2536+++ libgomp/config/linux/s390/futex.h 2008-03-26 15:11:32.000000000 +0100
2537@@ -1,4 +1,4 @@
2538-/* Copyright (C) 2005 Free Software Foundation, Inc.
2539+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
2540 Contributed by Jakub Jelinek <jakub@redhat.com>.
2541
2542 This file is part of the GNU OpenMP Library (libgomp).
2543@@ -28,10 +28,8 @@
2544 /* Provide target-specific access to the futex system call. */
2545
2546 #include <sys/syscall.h>
2547-#define FUTEX_WAIT 0
2548-#define FUTEX_WAKE 1
2549
2550-static inline void
2551+static inline long
2552 sys_futex0 (int *addr, int op, int val)
2553 {
2554 register long int gpr2 __asm__ ("2");
2555@@ -49,16 +47,41 @@ sys_futex0 (int *addr, int op, int val)
2556 : "i" (SYS_futex),
2557 "0" (gpr2), "d" (gpr3), "d" (gpr4), "d" (gpr5)
2558 : "memory");
2559+ return gpr2;
2560 }
2561
2562 static inline void
2563 futex_wait (int *addr, int val)
2564 {
2565- sys_futex0 (addr, FUTEX_WAIT, val);
2566+ long err = sys_futex0 (addr, gomp_futex_wait, val);
2567+ if (__builtin_expect (err == -ENOSYS, 0))
2568+ {
2569+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
2570+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
2571+ sys_futex0 (addr, gomp_futex_wait, val);
2572+ }
2573 }
2574
2575 static inline void
2576 futex_wake (int *addr, int count)
2577 {
2578- sys_futex0 (addr, FUTEX_WAKE, count);
2579+ long err = sys_futex0 (addr, gomp_futex_wake, count);
2580+ if (__builtin_expect (err == -ENOSYS, 0))
2581+ {
2582+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
2583+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
2584+ sys_futex0 (addr, gomp_futex_wake, count);
2585+ }
2586+}
2587+
2588+static inline void
2589+cpu_relax (void)
2590+{
2591+ __asm volatile ("" : : : "memory");
2592+}
2593+
2594+static inline void
2595+atomic_write_barrier (void)
2596+{
2597+ __sync_synchronize ();
2598 }
2599--- libgomp/config/linux/mutex.c.jj 2007-12-07 14:41:00.000000000 +0100
2600+++ libgomp/config/linux/mutex.c 2008-03-26 15:11:32.000000000 +0100
2601@@ -1,4 +1,4 @@
2602-/* Copyright (C) 2005 Free Software Foundation, Inc.
2603+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
2604 Contributed by Richard Henderson <rth@redhat.com>.
2605
2606 This file is part of the GNU OpenMP Library (libgomp).
2607@@ -29,9 +29,10 @@
2608 mechanism for libgomp. This type is private to the library. This
2609 implementation uses atomic instructions and the futex syscall. */
2610
2611-#include "libgomp.h"
2612-#include "futex.h"
2613+#include "wait.h"
2614
2615+long int gomp_futex_wake = FUTEX_WAKE | FUTEX_PRIVATE_FLAG;
2616+long int gomp_futex_wait = FUTEX_WAIT | FUTEX_PRIVATE_FLAG;
2617
2618 void
2619 gomp_mutex_lock_slow (gomp_mutex_t *mutex)
2620@@ -40,7 +41,7 @@ gomp_mutex_lock_slow (gomp_mutex_t *mute
2621 {
2622 int oldval = __sync_val_compare_and_swap (mutex, 1, 2);
2623 if (oldval != 0)
2624- futex_wait (mutex, 2);
2625+ do_wait (mutex, 2);
2626 }
2627 while (!__sync_bool_compare_and_swap (mutex, 0, 2));
2628 }
2629--- libgomp/config/linux/sem.c.jj 2007-12-07 14:41:00.000000000 +0100
2630+++ libgomp/config/linux/sem.c 2008-03-26 15:11:32.000000000 +0100
2631@@ -1,4 +1,4 @@
2632-/* Copyright (C) 2005 Free Software Foundation, Inc.
2633+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
2634 Contributed by Richard Henderson <rth@redhat.com>.
2635
2636 This file is part of the GNU OpenMP Library (libgomp).
2637@@ -29,8 +29,7 @@
2638 mechanism for libgomp. This type is private to the library. This
2639 implementation uses atomic instructions and the futex syscall. */
2640
2641-#include "libgomp.h"
2642-#include "futex.h"
2643+#include "wait.h"
2644
2645
2646 void
2647@@ -44,7 +43,7 @@ gomp_sem_wait_slow (gomp_sem_t *sem)
2648 if (__sync_bool_compare_and_swap (sem, val, val - 1))
2649 return;
2650 }
2651- futex_wait (sem, -1);
2652+ do_wait (sem, -1);
2653 }
2654 }
2655
2656--- libgomp/config/linux/powerpc/futex.h.jj 2007-12-07 14:41:00.000000000 +0100
2657+++ libgomp/config/linux/powerpc/futex.h 2008-03-26 15:11:32.000000000 +0100
2658@@ -1,4 +1,4 @@
2659-/* Copyright (C) 2005 Free Software Foundation, Inc.
2660+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
2661 Contributed by Richard Henderson <rth@redhat.com>.
2662
2663 This file is part of the GNU OpenMP Library (libgomp).
2664@@ -28,10 +28,8 @@
2665 /* Provide target-specific access to the futex system call. */
2666
2667 #include <sys/syscall.h>
2668-#define FUTEX_WAIT 0
2669-#define FUTEX_WAKE 1
2670
2671-static inline void
2672+static inline long
2673 sys_futex0 (int *addr, int op, int val)
2674 {
2675 register long int r0 __asm__ ("r0");
2676@@ -50,21 +48,48 @@ sys_futex0 (int *addr, int op, int val)
2677 doesn't. It doesn't much matter for us. In the interest of unity,
2678 go ahead and clobber it always. */
2679
2680- __asm volatile ("sc"
2681+ __asm volatile ("sc; mfcr %0"
2682 : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6)
2683 : "r"(r0), "r"(r3), "r"(r4), "r"(r5), "r"(r6)
2684 : "r7", "r8", "r9", "r10", "r11", "r12",
2685 "cr0", "ctr", "memory");
2686+ if (__builtin_expect (r0 & (1 << 28), 0))
2687+ return r3;
2688+ return 0;
2689 }
2690
2691 static inline void
2692 futex_wait (int *addr, int val)
2693 {
2694- sys_futex0 (addr, FUTEX_WAIT, val);
2695+ long err = sys_futex0 (addr, gomp_futex_wait, val);
2696+ if (__builtin_expect (err == ENOSYS, 0))
2697+ {
2698+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
2699+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
2700+ sys_futex0 (addr, gomp_futex_wait, val);
2701+ }
2702 }
2703
2704 static inline void
2705 futex_wake (int *addr, int count)
2706 {
2707- sys_futex0 (addr, FUTEX_WAKE, count);
2708+ long err = sys_futex0 (addr, gomp_futex_wake, count);
2709+ if (__builtin_expect (err == ENOSYS, 0))
2710+ {
2711+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
2712+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
2713+ sys_futex0 (addr, gomp_futex_wake, count);
2714+ }
2715+}
2716+
2717+static inline void
2718+cpu_relax (void)
2719+{
2720+ __asm volatile ("" : : : "memory");
2721+}
2722+
2723+static inline void
2724+atomic_write_barrier (void)
2725+{
2726+ __asm volatile ("eieio" : : : "memory");
2727 }
2728--- libgomp/config/linux/bar.c.jj 2007-12-07 14:41:00.000000000 +0100
2729+++ libgomp/config/linux/bar.c 2008-03-26 15:11:32.000000000 +0100
2730@@ -1,4 +1,4 @@
2731-/* Copyright (C) 2005 Free Software Foundation, Inc.
2732+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
2733 Contributed by Richard Henderson <rth@redhat.com>.
2734
2735 This file is part of the GNU OpenMP Library (libgomp).
2736@@ -29,32 +29,29 @@
2737 mechanism for libgomp. This type is private to the library. This
2738 implementation uses atomic instructions and the futex syscall. */
2739
2740-#include "libgomp.h"
2741-#include "futex.h"
2742 #include <limits.h>
2743+#include "wait.h"
2744
2745
2746 void
2747-gomp_barrier_wait_end (gomp_barrier_t *bar, bool last)
2748+gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
2749 {
2750- if (last)
2751+ if (__builtin_expect ((state & 1) != 0, 0))
2752 {
2753- bar->generation++;
2754- futex_wake (&bar->generation, INT_MAX);
2755+ /* Next time we'll be awaiting TOTAL threads again. */
2756+ bar->awaited = bar->total;
2757+ atomic_write_barrier ();
2758+ bar->generation += 2;
2759+ futex_wake ((int *) &bar->generation, INT_MAX);
2760 }
2761 else
2762 {
2763- unsigned int generation = bar->generation;
2764-
2765- gomp_mutex_unlock (&bar->mutex);
2766+ unsigned int generation = state;
2767
2768 do
2769- futex_wait (&bar->generation, generation);
2770+ do_wait ((int *) &bar->generation, generation);
2771 while (bar->generation == generation);
2772 }
2773-
2774- if (__sync_add_and_fetch (&bar->arrived, -1) == 0)
2775- gomp_mutex_unlock (&bar->mutex);
2776 }
2777
2778 void
2779@@ -62,3 +59,18 @@ gomp_barrier_wait (gomp_barrier_t *barri
2780 {
2781 gomp_barrier_wait_end (barrier, gomp_barrier_wait_start (barrier));
2782 }
2783+
2784+/* Like gomp_barrier_wait, except that if the encountering thread
2785+ is not the last one to hit the barrier, it returns immediately.
2786+ The intended usage is that a thread which intends to gomp_barrier_destroy
2787+ this barrier calls gomp_barrier_wait, while all other threads
2788+ call gomp_barrier_wait_last. When gomp_barrier_wait returns,
2789+ the barrier can be safely destroyed. */
2790+
2791+void
2792+gomp_barrier_wait_last (gomp_barrier_t *barrier)
2793+{
2794+ gomp_barrier_state_t state = gomp_barrier_wait_start (barrier);
2795+ if (state & 1)
2796+ gomp_barrier_wait_end (barrier, state);
2797+}