summaryrefslogtreecommitdiffstats
path: root/meta/recipes-devtools/gcc/gcc-4.5.1/fedora/gcc43-libgomp-speedup.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta/recipes-devtools/gcc/gcc-4.5.1/fedora/gcc43-libgomp-speedup.patch')
-rw-r--r--meta/recipes-devtools/gcc/gcc-4.5.1/fedora/gcc43-libgomp-speedup.patch2798
1 files changed, 0 insertions, 2798 deletions
diff --git a/meta/recipes-devtools/gcc/gcc-4.5.1/fedora/gcc43-libgomp-speedup.patch b/meta/recipes-devtools/gcc/gcc-4.5.1/fedora/gcc43-libgomp-speedup.patch
deleted file mode 100644
index 54c855fb34..0000000000
--- a/meta/recipes-devtools/gcc/gcc-4.5.1/fedora/gcc43-libgomp-speedup.patch
+++ /dev/null
@@ -1,2798 +0,0 @@
1Upstream-Status: Inappropriate [distribution: fedora]
22008-03-28 Jakub Jelinek <jakub@redhat.com>
3
4 * config/linux/sparc/futex.h (atomic_write_barrier): Fix membar
5 argument.
6
72008-03-27 Jakub Jelinek <jakub@redhat.com>
8
9 * libgomp.h (struct gomp_team_state): Remove single_count field
10 ifndef HAVE_SYNC_BUILTINS.
11 (struct gomp_team): Likewise. Add work_share_list_free_lock
12 ifndef HAVE_SYNC_BUILTINS.
13 * team.c (gomp_new_team): If HAVE_SYNC_BUILTINS is not defined,
14 don't initialize single_count, but instead initialize
15 work_share_list_free_lock.
16 (free_team): Destroy work_share_list_free_lock ifndef
17 HAVE_SYNC_BUILTINS.
18 (gomp_team_start): Don't initialize ts.single_count ifndef
19 HAVE_SYNC_BUILTINS.
20 * work.c (alloc_work_share, free_work_share): Use
21 work_share_list_free_lock instead of atomic chaining ifndef
22 HAVE_SYNC_BUILTINS.
23
242008-03-26 Jakub Jelinek <jakub@redhat.com>
25
26 * loop.c (gomp_loop_init): Fix GFS_DYNAMIC ws->mode setting.
27 * testsuite/libgomp.c/loop-4.c: New test.
28
29 * libgomp.h (struct gomp_team_state): Add single_count field.
30 (struct gomp_team): Likewise.
31 * team.c (gomp_new_team): Clear single_count.
32 (gomp_team_start): Likewise.
33 * single.c (GOMP_single_start): Rewritten if HAVE_SYNC_BUILTINS.
34
352008-03-25 Jakub Jelinek <jakub@redhat.com>
36
37 * team.c (gomp_thread_start): Don't clear ts.static_trip here.
38 * loop.c (gomp_loop_static_start, gomp_loop_dynamic_start): Clear
39 ts.static_trip here.
40 * work.c (gomp_work_share_start): Don't clear ts.static_trip here.
41
422008-03-21 Jakub Jelinek <jakub@redhat.com>
43
44 * libgomp.h: Include ptrlock.h.
45 (struct gomp_work_share): Reshuffle fields. Add next_alloc,
46 next_ws, next_free and inline_ordered_team_ids fields, change
47 ordered_team_ids into pointer from flexible array member.
48 (struct gomp_team_state): Add last_work_share field, remove
49 work_share_generation.
50 (struct gomp_team): Remove work_share_lock, generation_mask,
51 oldest_live_gen, num_live_gen and init_work_shares fields, add
52 work work_share_list_alloc, work_share_list_free and work_share_chunk
53 fields. Change work_shares from pointer to pointers into an array.
54 (gomp_new_team): New prototype.
55 (gomp_team_start): Change type of last argument.
56 (gomp_new_work_share): Removed.
57 (gomp_init_work_share, gomp_fini_work_share): New prototypes.
58 (gomp_work_share_init_done): New static inline.
59 * team.c (gomp_thread_start): Clear ts.last_work_share, don't clear
60 ts.work_share_generation.
61 (new_team): Removed.
62 (gomp_new_team): New function.
63 (free_team): Free gomp_work_share blocks chained through next_alloc,
64 instead of freeing work_shares and destroying work_share_lock.
65 (gomp_team_start): Change last argument from ws to team, don't create
66 new team, set ts.work_share to &team->work_shares[0] and clear
67 ts.last_work_share. Don't clear ts.work_share_generation.
68 (gomp_team_end): Call gomp_fini_work_share.
69 * work.c (gomp_new_work_share): Removed.
70 (alloc_work_share, gomp_init_work_share, gomp_fini_work_share): New
71 functions.
72 (free_work_share): Add team argument. Call gomp_fini_work_share
73 and then either free ws if orphaned, or put it into
74 work_share_list_free list of the current team.
75 (gomp_work_share_start, gomp_work_share_end,
76 gomp_work_share_end_nowait): Rewritten.
77 * sections.c (GOMP_sections_start): Call gomp_work_share_init_done
78 after gomp_sections_init. If HAVE_SYNC_BUILTINS, call
79 gomp_iter_dynamic_next instead of the _locked variant and don't take
80 lock around it, otherwise acquire it before calling
81 gomp_iter_dynamic_next_locked.
82 (GOMP_sections_next): If HAVE_SYNC_BUILTINS, call
83 gomp_iter_dynamic_next instead of the _locked variant and don't take
84 lock around it.
85 (GOMP_parallel_sections_start): Call gomp_new_team instead of
86 gomp_new_work_share. Call gomp_sections_init on &team->work_shares[0].
87 Adjust gomp_team_start caller.
88 * loop.c (gomp_loop_static_start, gomp_loop_ordered_static_start): Call
89 gomp_work_share_init_done after gomp_loop_init. Don't unlock ws->lock.
90 (gomp_loop_dynamic_start, gomp_loop_guided_start): Call
91 gomp_work_share_init_done after gomp_loop_init. If HAVE_SYNC_BUILTINS,
92 don't unlock ws->lock, otherwise lock it.
93 (gomp_loop_ordered_dynamic_start, gomp_loop_ordered_guided_start): Call
94 gomp_work_share_init_done after gomp_loop_init. Lock ws->lock.
95 (gomp_parallel_loop_start): Call gomp_new_team instead of
96 gomp_new_work_share. Call gomp_loop_init on &team->work_shares[0].
97 Adjust gomp_team_start caller.
98 * single.c (GOMP_single_start, GOMP_single_copy_start): Call
99 gomp_work_share_init_done if gomp_work_share_start returned true.
100 Don't unlock ws->lock.
101 * parallel.c (GOMP_parallel_start): Call gomp_new_team and pass that
102 as last argument to gomp_team_start.
103 * config/linux/ptrlock.c: New file.
104 * config/linux/ptrlock.h: New file.
105 * config/posix/ptrlock.c: New file.
106 * config/posix/ptrlock.h: New file.
107 * Makefile.am (libgomp_la_SOURCES): Add ptrlock.c.
108 * Makefile.in: Regenerated.
109 * testsuite/Makefile.in: Regenerated.
110
1112008-03-19 Jakub Jelinek <jakub@redhat.com>
112
113 * libgomp.h (gomp_active_wait_policy): Remove decl.
114 (gomp_throttled_spin_count_var, gomp_available_cpus,
115 gomp_managed_threads): New extern decls.
116 * team.c (gomp_team_start, gomp_team_end): If number of threads
117 changed, adjust atomically gomp_managed_threads.
118 * env.c (gomp_active_wait_policy, gomp_block_time_var): Remove.
119 (gomp_throttled_spin_count_var, gomp_available_cpus,
120 gomp_managed_threads): New variables.
121 (parse_millis): Removed.
122 (parse_spincount): New function.
123 (parse_wait_policy): Return -1/0/1 instead of setting
124 gomp_active_wait_policy.
125 (initialize_env): Call gomp_init_num_threads unconditionally.
126 Initialize gomp_available_cpus. Call parse_spincount instead
127 of parse_millis, initialize gomp_{,throttled_}spin_count_var
128 depending on presence and value of OMP_WAIT_POLICY and
129 GOMP_SPINCOUNT env vars.
130 * config/linux/wait.h (do_wait): Use gomp_throttled_spin_count_var
131 instead of gomp_spin_count_var if gomp_managed_threads >
132 gomp_available_cpus.
133
134 * config/linux/wait.h: Include errno.h.
135 (FUTEX_WAIT, FUTEX_WAKE, FUTEX_PRIVATE_FLAG): Define.
136 (gomp_futex_wake, gomp_futex_wait): New extern decls.
137 * config/linux/mutex.c (gomp_futex_wake, gomp_futex_wait): New
138 variables.
139 * config/linux/powerpc/futex.h (FUTEX_WAIT, FUTEX_WAKE): Remove.
140 (sys_futex0): Return error code.
141 (futex_wake, futex_wait): If ENOSYS was returned, clear
142 FUTEX_PRIVATE_FLAG in gomp_futex_wa{ke,it} and retry.
143 * config/linux/alpha/futex.h (FUTEX_WAIT, FUTEX_WAKE): Remove.
144 (futex_wake, futex_wait): If ENOSYS was returned, clear
145 FUTEX_PRIVATE_FLAG in gomp_futex_wa{ke,it} and retry.
146 * config/linux/x86/futex.h (FUTEX_WAIT, FUTEX_WAKE): Remove.
147 (sys_futex0): Return error code.
148 (futex_wake, futex_wait): If ENOSYS was returned, clear
149 FUTEX_PRIVATE_FLAG in gomp_futex_wa{ke,it} and retry.
150 * config/linux/s390/futex.h (FUTEX_WAIT, FUTEX_WAKE): Remove.
151 (sys_futex0): Return error code.
152 (futex_wake, futex_wait): If ENOSYS was returned, clear
153 FUTEX_PRIVATE_FLAG in gomp_futex_wa{ke,it} and retry.
154 * config/linux/ia64/futex.h (FUTEX_WAIT, FUTEX_WAKE): Remove.
155 (sys_futex0): Return error code.
156 (futex_wake, futex_wait): If ENOSYS was returned, clear
157 FUTEX_PRIVATE_FLAG in gomp_futex_wa{ke,it} and retry.
158 * config/linux/sparc/futex.h (FUTEX_WAIT, FUTEX_WAKE): Remove.
159 (sys_futex0): Return error code.
160 (futex_wake, futex_wait): If ENOSYS was returned, clear
161 FUTEX_PRIVATE_FLAG in gomp_futex_wa{ke,it} and retry.
162
1632008-03-18 Jakub Jelinek <jakub@redhat.com>
164
165 * libgomp.h (struct gomp_work_share): Add mode field. Put lock and
166 next into a different cache line from most of the write-once fields.
167 * loop.c: Include limits.h.
168 (gomp_loop_init): For GFS_DYNAMIC, multiply ws->chunk_size by incr.
169 If adding ws->chunk_size nthreads + 1 times after end won't
170 overflow, set ws->mode to 1.
171 * iter.c (gomp_iter_dynamic_next_locked): Don't multiply
172 ws->chunk_size by incr.
173 (gomp_iter_dynamic_next): Likewise. If ws->mode, use more efficient
174 code.
175 * work.c: Include stddef.h.
176 (gomp_new_work_share): Use offsetof rather than sizeof.
177
1782008-03-17 Jakub Jelinek <jakub@redhat.com>
179
180 * libgomp.h (struct gomp_team): Change ordered_release field
181 into gomp_sem_t ** from flexible array member. Add implicit_task
182 and initial_work_shares fields.
183 (gomp_new_task): Removed.
184 (gomp_init_task): New prototype.
185 * team.c (new_team): Allocate implicit_task for each thread
186 and initial work_shares together with gomp_team allocation.
187 (free_team): Only free work_shares if it is not init_work_shares.
188 (gomp_team_start): Use gomp_init_task instead of gomp_new_task,
189 set thr->task to the corresponding implicit_task array entry.
190 * task.c (gomp_new_task): Removed.
191 (gomp_init_task): New function.
192 (gomp_end_task): Don't free the task.
193 (GOMP_task): Allocate struct gomp_task on the stack, call
194 gomp_init_task rather than gomp_new_task.
195 * work.c (gomp_work_share_start): If work_shares ==
196 init_work_shares, gomp_malloc + memcpy rather than gomp_realloc.
197
1982008-03-15 Jakub Jelinek <jakub@redhat.com>
199 Ulrich Drepper <drepper@redhat.com>
200
201 * config/linux/bar.h (gomp_barrier_state_t): Rewritten.
202 (gomp_barrier_state_t): Change to unsigned int.
203 (gomp_barrier_init, gomp_barrier_reinit, gomp_barrier_destroy,
204 gomp_barrier_wait_start, gomp_barrier_last_thread): Rewritten.
205 (gomp_barrier_wait_last): Prototype rather than inline.
206 * config/linux/bar.c (gomp_barrier_wait_end): Rewritten.
207 (gomp_barrier_wait_last): New function.
208
2092008-03-15 Jakub Jelinek <jakub@redhat.com>
210
211 * team.c (gomp_thread_start): Use gomp_barrier_wait_last instead
212 of gomp_barrier_wait.
213 * env.c (gomp_block_time_var, gomp_spin_count_var): New variables.
214 (parse_millis): New function.
215 (initialize_env): Handle GOMP_BLOCKTIME env var.
216 * libgomp.h (struct gomp_team): Move close to the end of the struct.
217 (gomp_spin_count_var): New extern var decl.
218 * work.c (gomp_work_share_end): Use gomp_barrier_state_t bstate
219 var instead of bool last, call gomp_barrier_last_thread to check
220 for last thread, pass bstate to gomp_barrier_wait_end.
221 * config/linux/wait.h: New file.
222 * config/linux/mutex.c: Include wait.h instead of libgomp.h and
223 futex.h.
224 (gomp_mutex_lock_slow): Call do_wait instead of futex_wait.
225 * config/linux/bar.c: Include wait.h instead of libgomp.h and
226 futex.h.
227 (gomp_barrier_wait_end): Change second argument to
228 gomp_barrier_state_t. Call do_wait instead of futex_wait.
229 * config/linux/sem.c: Include wait.h instead of libgomp.h and
230 futex.h.
231 (gomp_sem_wait_slow): Call do_wait instead of futex_wait.
232 * config/linux/lock.c: Include wait.h instead of libgomp.h and
233 futex.h.
234 (gomp_set_nest_lock_25): Call do_wait instead of futex_wait.
235 * config/linux/affinity.c: Assume HAVE_SYNC_BUILTINS.
236 * config/linux/bar.h (gomp_barrier_state_t): New typedef.
237 (gomp_barrier_wait_end): Change second argument to
238 gomp_barrier_state_t.
239 (gomp_barrier_wait_start): Return gomp_barrier_state_t.
240 (gomp_barrier_last_thread, gomp_barrier_wait_last): New static
241 inlines.
242 * config/linux/powerpc/futex.h (cpu_relax, atomic_write_barrier): New
243 static inlines.
244 * config/linux/alpha/futex.h (cpu_relax, atomic_write_barrier):
245 Likewise.
246 * config/linux/x86/futex.h (cpu_relax, atomic_write_barrier):
247 Likewise.
248 * config/linux/s390/futex.h (cpu_relax, atomic_write_barrier):
249 Likewise.
250 * config/linux/ia64/futex.h (cpu_relax, atomic_write_barrier):
251 Likewise.
252 * config/linux/sparc/futex.h (cpu_relax, atomic_write_barrier):
253 Likewise.
254 * config/posix/bar.c (gomp_barrier_wait_end): Change second argument
255 to gomp_barrier_state_t.
256 * config/posix/bar.h (gomp_barrier_state_t): New typedef.
257 (gomp_barrier_wait_end): Change second argument to
258 gomp_barrier_state_t.
259 (gomp_barrier_wait_start): Return gomp_barrier_state_t.
260 (gomp_barrier_last_thread, gomp_barrier_wait_last): New static
261 inlines.
262
263--- libgomp/parallel.c.jj 2007-12-07 14:41:01.000000000 +0100
264+++ libgomp/parallel.c 2008-03-26 15:32:06.000000000 +0100
265@@ -68,7 +68,7 @@ void
266 GOMP_parallel_start (void (*fn) (void *), void *data, unsigned num_threads)
267 {
268 num_threads = gomp_resolve_num_threads (num_threads);
269- gomp_team_start (fn, data, num_threads, NULL);
270+ gomp_team_start (fn, data, num_threads, gomp_new_team (num_threads));
271 }
272
273 void
274--- libgomp/sections.c.jj 2007-12-07 14:41:01.000000000 +0100
275+++ libgomp/sections.c 2008-03-26 15:33:06.000000000 +0100
276@@ -59,14 +59,24 @@ GOMP_sections_start (unsigned count)
277 long s, e, ret;
278
279 if (gomp_work_share_start (false))
280- gomp_sections_init (thr->ts.work_share, count);
281+ {
282+ gomp_sections_init (thr->ts.work_share, count);
283+ gomp_work_share_init_done ();
284+ }
285
286+#ifdef HAVE_SYNC_BUILTINS
287+ if (gomp_iter_dynamic_next (&s, &e))
288+ ret = s;
289+ else
290+ ret = 0;
291+#else
292+ gomp_mutex_lock (&thr->ts.work_share->lock);
293 if (gomp_iter_dynamic_next_locked (&s, &e))
294 ret = s;
295 else
296 ret = 0;
297-
298 gomp_mutex_unlock (&thr->ts.work_share->lock);
299+#endif
300
301 return ret;
302 }
303@@ -83,15 +93,23 @@ GOMP_sections_start (unsigned count)
304 unsigned
305 GOMP_sections_next (void)
306 {
307- struct gomp_thread *thr = gomp_thread ();
308 long s, e, ret;
309
310+#ifdef HAVE_SYNC_BUILTINS
311+ if (gomp_iter_dynamic_next (&s, &e))
312+ ret = s;
313+ else
314+ ret = 0;
315+#else
316+ struct gomp_thread *thr = gomp_thread ();
317+
318 gomp_mutex_lock (&thr->ts.work_share->lock);
319 if (gomp_iter_dynamic_next_locked (&s, &e))
320 ret = s;
321 else
322 ret = 0;
323 gomp_mutex_unlock (&thr->ts.work_share->lock);
324+#endif
325
326 return ret;
327 }
328@@ -103,15 +121,15 @@ void
329 GOMP_parallel_sections_start (void (*fn) (void *), void *data,
330 unsigned num_threads, unsigned count)
331 {
332- struct gomp_work_share *ws;
333+ struct gomp_team *team;
334
335 num_threads = gomp_resolve_num_threads (num_threads);
336 if (gomp_dyn_var && num_threads > count)
337 num_threads = count;
338
339- ws = gomp_new_work_share (false, num_threads);
340- gomp_sections_init (ws, count);
341- gomp_team_start (fn, data, num_threads, ws);
342+ team = gomp_new_team (num_threads);
343+ gomp_sections_init (&team->work_shares[0], count);
344+ gomp_team_start (fn, data, num_threads, team);
345 }
346
347 /* The GOMP_section_end* routines are called after the thread is told
348--- libgomp/env.c.jj 2007-12-07 14:41:01.000000000 +0100
349+++ libgomp/env.c 2008-03-26 16:40:26.000000000 +0100
350@@ -44,6 +44,11 @@ enum gomp_schedule_type gomp_run_sched_v
351 unsigned long gomp_run_sched_chunk = 1;
352 unsigned short *gomp_cpu_affinity;
353 size_t gomp_cpu_affinity_len;
354+#ifndef HAVE_SYNC_BUILTINS
355+gomp_mutex_t gomp_remaining_threads_lock;
356+#endif
357+unsigned long gomp_available_cpus = 1, gomp_managed_threads = 1;
358+unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var;
359
360 /* Parse the OMP_SCHEDULE environment variable. */
361
362@@ -147,6 +152,79 @@ parse_unsigned_long (const char *name, u
363 return false;
364 }
365
366+/* Parse the GOMP_SPINCOUNT environment varible. Return true if one was
367+ present and it was successfully parsed. */
368+
369+static bool
370+parse_spincount (const char *name, unsigned long long *pvalue)
371+{
372+ char *env, *end;
373+ unsigned long long value, mult = 1;
374+
375+ env = getenv (name);
376+ if (env == NULL)
377+ return false;
378+
379+ while (isspace ((unsigned char) *env))
380+ ++env;
381+ if (*env == '\0')
382+ goto invalid;
383+
384+ if (strncasecmp (env, "infinite", 8) == 0
385+ || strncasecmp (env, "infinity", 8) == 0)
386+ {
387+ value = ~0ULL;
388+ end = env + 8;
389+ goto check_tail;
390+ }
391+
392+ errno = 0;
393+ value = strtoull (env, &end, 10);
394+ if (errno)
395+ goto invalid;
396+
397+ while (isspace ((unsigned char) *end))
398+ ++end;
399+ if (*end != '\0')
400+ {
401+ switch (tolower (*end))
402+ {
403+ case 'k':
404+ mult = 1000LL;
405+ break;
406+ case 'm':
407+ mult = 1000LL * 1000LL;
408+ break;
409+ case 'g':
410+ mult = 1000LL * 1000LL * 1000LL;
411+ break;
412+ case 't':
413+ mult = 1000LL * 1000LL * 1000LL * 1000LL;
414+ break;
415+ default:
416+ goto invalid;
417+ }
418+ ++end;
419+ check_tail:
420+ while (isspace ((unsigned char) *end))
421+ ++end;
422+ if (*end != '\0')
423+ goto invalid;
424+ }
425+
426+ if (value > ~0ULL / mult)
427+ value = ~0ULL;
428+ else
429+ value *= mult;
430+
431+ *pvalue = value;
432+ return true;
433+
434+ invalid:
435+ gomp_error ("Invalid value for environment variable %s", name);
436+ return false;
437+}
438+
439 /* Parse a boolean value for environment variable NAME and store the
440 result in VALUE. */
441
442@@ -281,10 +359,25 @@ initialize_env (void)
443 parse_schedule ();
444 parse_boolean ("OMP_DYNAMIC", &gomp_dyn_var);
445 parse_boolean ("OMP_NESTED", &gomp_nest_var);
446+ gomp_init_num_threads ();
447+ gomp_available_cpus = gomp_nthreads_var;
448 if (!parse_unsigned_long ("OMP_NUM_THREADS", &gomp_nthreads_var))
449- gomp_init_num_threads ();
450+ gomp_nthreads_var = gomp_available_cpus;
451 if (parse_affinity ())
452 gomp_init_affinity ();
453+ if (!parse_spincount ("GOMP_SPINCOUNT", &gomp_spin_count_var))
454+ {
455+ /* Using a rough estimation of 100000 spins per msec,
456+ use 200 msec blocking.
457+ Depending on the CPU speed, this can be e.g. 5 times longer
458+ or 5 times shorter. */
459+ gomp_spin_count_var = 20000000LL;
460+ }
461+ /* gomp_throttled_spin_count_var is used when there are more libgomp
462+ managed threads than available CPUs. Use very short spinning. */
463+ gomp_throttled_spin_count_var = 100LL;
464+ if (gomp_throttled_spin_count_var > gomp_spin_count_var)
465+ gomp_throttled_spin_count_var = gomp_spin_count_var;
466
467 /* Not strictly environment related, but ordering constructors is tricky. */
468 pthread_attr_init (&gomp_thread_attr);
469--- libgomp/libgomp.h.jj 2007-12-07 14:41:01.000000000 +0100
470+++ libgomp/libgomp.h 2008-03-27 12:21:51.000000000 +0100
471@@ -50,6 +50,7 @@
472 #include "sem.h"
473 #include "mutex.h"
474 #include "bar.h"
475+#include "ptrlock.h"
476
477
478 /* This structure contains the data to control one work-sharing construct,
479@@ -70,6 +71,8 @@ struct gomp_work_share
480 If this is a SECTIONS construct, this value will always be DYNAMIC. */
481 enum gomp_schedule_type sched;
482
483+ int mode;
484+
485 /* This is the chunk_size argument to the SCHEDULE clause. */
486 long chunk_size;
487
488@@ -81,17 +84,38 @@ struct gomp_work_share
489 is always 1. */
490 long incr;
491
492- /* This lock protects the update of the following members. */
493- gomp_mutex_t lock;
494+ /* This is a circular queue that details which threads will be allowed
495+ into the ordered region and in which order. When a thread allocates
496+ iterations on which it is going to work, it also registers itself at
497+ the end of the array. When a thread reaches the ordered region, it
498+ checks to see if it is the one at the head of the queue. If not, it
499+ blocks on its RELEASE semaphore. */
500+ unsigned *ordered_team_ids;
501
502- union {
503- /* This is the next iteration value to be allocated. In the case of
504- GFS_STATIC loops, this the iteration start point and never changes. */
505- long next;
506+ /* This is the number of threads that have registered themselves in
507+ the circular queue ordered_team_ids. */
508+ unsigned ordered_num_used;
509
510- /* This is the returned data structure for SINGLE COPYPRIVATE. */
511- void *copyprivate;
512- };
513+ /* This is the team_id of the currently acknowledged owner of the ordered
514+ section, or -1u if the ordered section has not been acknowledged by
515+ any thread. This is distinguished from the thread that is *allowed*
516+ to take the section next. */
517+ unsigned ordered_owner;
518+
519+ /* This is the index into the circular queue ordered_team_ids of the
520+ current thread that's allowed into the ordered reason. */
521+ unsigned ordered_cur;
522+
523+ /* This is a chain of allocated gomp_work_share blocks, valid only
524+ in the first gomp_work_share struct in the block. */
525+ struct gomp_work_share *next_alloc;
526+
527+ /* The above fields are written once during workshare initialization,
528+ or related to ordered worksharing. Make sure the following fields
529+ are in a different cache line. */
530+
531+ /* This lock protects the update of the following members. */
532+ gomp_mutex_t lock __attribute__((aligned (64)));
533
534 /* This is the count of the number of threads that have exited the work
535 share construct. If the construct was marked nowait, they have moved on
536@@ -99,27 +123,28 @@ struct gomp_work_share
537 of the team to exit the work share construct must deallocate it. */
538 unsigned threads_completed;
539
540- /* This is the index into the circular queue ordered_team_ids of the
541- current thread that's allowed into the ordered reason. */
542- unsigned ordered_cur;
543+ union {
544+ /* This is the next iteration value to be allocated. In the case of
545+ GFS_STATIC loops, this the iteration start point and never changes. */
546+ long next;
547
548- /* This is the number of threads that have registered themselves in
549- the circular queue ordered_team_ids. */
550- unsigned ordered_num_used;
551+ /* This is the returned data structure for SINGLE COPYPRIVATE. */
552+ void *copyprivate;
553+ };
554
555- /* This is the team_id of the currently acknoledged owner of the ordered
556- section, or -1u if the ordered section has not been acknowledged by
557- any thread. This is distinguished from the thread that is *allowed*
558- to take the section next. */
559- unsigned ordered_owner;
560+ union {
561+ /* Link to gomp_work_share struct for next work sharing construct
562+ encountered after this one. */
563+ gomp_ptrlock_t next_ws;
564+
565+ /* gomp_work_share structs are chained in the free work share cache
566+ through this. */
567+ struct gomp_work_share *next_free;
568+ };
569
570- /* This is a circular queue that details which threads will be allowed
571- into the ordered region and in which order. When a thread allocates
572- iterations on which it is going to work, it also registers itself at
573- the end of the array. When a thread reaches the ordered region, it
574- checks to see if it is the one at the head of the queue. If not, it
575- blocks on its RELEASE semaphore. */
576- unsigned ordered_team_ids[];
577+ /* If only few threads are in the team, ordered_team_ids can point
578+ to this array which fills the padding at the end of this struct. */
579+ unsigned inline_ordered_team_ids[0];
580 };
581
582 /* This structure contains all of the thread-local data associated with
583@@ -133,21 +158,24 @@ struct gomp_team_state
584
585 /* This is the work share construct which this thread is currently
586 processing. Recall that with NOWAIT, not all threads may be
587- processing the same construct. This value is NULL when there
588- is no construct being processed. */
589+ processing the same construct. */
590 struct gomp_work_share *work_share;
591
592+ /* This is the previous work share construct or NULL if there wasn't any.
593+ When all threads are done with the current work sharing construct,
594+ the previous one can be freed. The current one can't, as its
595+ next_ws field is used. */
596+ struct gomp_work_share *last_work_share;
597+
598 /* This is the ID of this thread within the team. This value is
599 guaranteed to be between 0 and N-1, where N is the number of
600 threads in the team. */
601 unsigned team_id;
602
603- /* The work share "generation" is a number that increases by one for
604- each work share construct encountered in the dynamic flow of the
605- program. It is used to find the control data for the work share
606- when encountering it for the first time. This particular number
607- reflects the generation of the work_share member of this struct. */
608- unsigned work_share_generation;
609+#ifdef HAVE_SYNC_BUILTINS
610+ /* Number of single stmts encountered. */
611+ unsigned long single_count;
612+#endif
613
614 /* For GFS_RUNTIME loops that resolved to GFS_STATIC, this is the
615 trip number through the loop. So first time a particular loop
616@@ -163,41 +191,53 @@ struct gomp_team_state
617
618 struct gomp_team
619 {
620- /* This lock protects access to the following work shares data structures. */
621- gomp_mutex_t work_share_lock;
622-
623- /* This is a dynamically sized array containing pointers to the control
624- structs for all "live" work share constructs. Here "live" means that
625- the construct has been encountered by at least one thread, and not
626- completed by all threads. */
627- struct gomp_work_share **work_shares;
628-
629- /* The work_shares array is indexed by "generation & generation_mask".
630- The mask will be 2**N - 1, where 2**N is the size of the array. */
631- unsigned generation_mask;
632-
633- /* These two values define the bounds of the elements of the work_shares
634- array that are currently in use. */
635- unsigned oldest_live_gen;
636- unsigned num_live_gen;
637-
638 /* This is the number of threads in the current team. */
639 unsigned nthreads;
640
641+ /* This is number of gomp_work_share structs that have been allocated
642+ as a block last time. */
643+ unsigned work_share_chunk;
644+
645 /* This is the saved team state that applied to a master thread before
646 the current thread was created. */
647 struct gomp_team_state prev_ts;
648
649- /* This barrier is used for most synchronization of the team. */
650- gomp_barrier_t barrier;
651-
652 /* This semaphore should be used by the master thread instead of its
653 "native" semaphore in the thread structure. Required for nested
654 parallels, as the master is a member of two teams. */
655 gomp_sem_t master_release;
656
657- /* This array contains pointers to the release semaphore of the threads
658- in the team. */
659+ /* List of gomp_work_share structs chained through next_free fields.
660+ This is populated and taken off only by the first thread in the
661+ team encountering a new work sharing construct, in a critical
662+ section. */
663+ struct gomp_work_share *work_share_list_alloc;
664+
665+ /* List of gomp_work_share structs freed by free_work_share. New
666+ entries are atomically added to the start of the list, and
667+ alloc_work_share can safely only move all but the first entry
668+ to work_share_list alloc, as free_work_share can happen concurrently
669+ with alloc_work_share. */
670+ struct gomp_work_share *work_share_list_free;
671+
672+#ifdef HAVE_SYNC_BUILTINS
673+ /* Number of simple single regions encountered by threads in this
674+ team. */
675+ unsigned long single_count;
676+#else
677+ /* Mutex protecting addition of workshares to work_share_list_free. */
678+ gomp_mutex_t work_share_list_free_lock;
679+#endif
680+
681+ /* This barrier is used for most synchronization of the team. */
682+ gomp_barrier_t barrier;
683+
684+ /* Initial work shares, to avoid allocating any gomp_work_share
685+ structs in the common case. */
686+ struct gomp_work_share work_shares[8];
687+
688+ /* This is an array with pointers to the release semaphore
689+ of the threads in the team. */
690 gomp_sem_t *ordered_release[];
691 };
692
693@@ -242,6 +282,11 @@ extern bool gomp_dyn_var;
694 extern bool gomp_nest_var;
695 extern enum gomp_schedule_type gomp_run_sched_var;
696 extern unsigned long gomp_run_sched_chunk;
697+#ifndef HAVE_SYNC_BUILTINS
698+extern gomp_mutex_t gomp_remaining_threads_lock;
699+#endif
700+extern unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var;
701+extern unsigned long gomp_available_cpus, gomp_managed_threads;
702
703 /* The attributes to be used during thread creation. */
704 extern pthread_attr_t gomp_thread_attr;
705@@ -306,17 +351,27 @@ extern unsigned gomp_dynamic_max_threads
706
707 /* team.c */
708
709+extern struct gomp_team *gomp_new_team (unsigned);
710 extern void gomp_team_start (void (*) (void *), void *, unsigned,
711- struct gomp_work_share *);
712+ struct gomp_team *);
713 extern void gomp_team_end (void);
714
715 /* work.c */
716
717-extern struct gomp_work_share * gomp_new_work_share (bool, unsigned);
718+extern void gomp_init_work_share (struct gomp_work_share *, bool, unsigned);
719+extern void gomp_fini_work_share (struct gomp_work_share *);
720 extern bool gomp_work_share_start (bool);
721 extern void gomp_work_share_end (void);
722 extern void gomp_work_share_end_nowait (void);
723
724+static inline void
725+gomp_work_share_init_done (void)
726+{
727+ struct gomp_thread *thr = gomp_thread ();
728+ if (__builtin_expect (thr->ts.last_work_share != NULL, 1))
729+ gomp_ptrlock_set (&thr->ts.last_work_share->next_ws, thr->ts.work_share);
730+}
731+
732 #ifdef HAVE_ATTRIBUTE_VISIBILITY
733 # pragma GCC visibility pop
734 #endif
735--- libgomp/iter.c.jj 2008-03-26 14:48:34.000000000 +0100
736+++ libgomp/iter.c 2008-03-26 15:11:23.000000000 +0100
737@@ -1,4 +1,4 @@
738-/* Copyright (C) 2005 Free Software Foundation, Inc.
739+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
740 Contributed by Richard Henderson <rth@redhat.com>.
741
742 This file is part of the GNU OpenMP Library (libgomp).
743@@ -154,7 +154,7 @@ gomp_iter_dynamic_next_locked (long *pst
744 if (start == ws->end)
745 return false;
746
747- chunk = ws->chunk_size * ws->incr;
748+ chunk = ws->chunk_size;
749 left = ws->end - start;
750 if (ws->incr < 0)
751 {
752@@ -186,11 +186,38 @@ gomp_iter_dynamic_next (long *pstart, lo
753 struct gomp_work_share *ws = thr->ts.work_share;
754 long start, end, nend, chunk, incr;
755
756- start = ws->next;
757 end = ws->end;
758 incr = ws->incr;
759- chunk = ws->chunk_size * incr;
760+ chunk = ws->chunk_size;
761+
762+ if (__builtin_expect (ws->mode, 1))
763+ {
764+ long tmp = __sync_fetch_and_add (&ws->next, chunk);
765+ if (incr > 0)
766+ {
767+ if (tmp >= end)
768+ return false;
769+ nend = tmp + chunk;
770+ if (nend > end)
771+ nend = end;
772+ *pstart = tmp;
773+ *pend = nend;
774+ return true;
775+ }
776+ else
777+ {
778+ if (tmp <= end)
779+ return false;
780+ nend = tmp + chunk;
781+ if (nend < end)
782+ nend = end;
783+ *pstart = tmp;
784+ *pend = nend;
785+ return true;
786+ }
787+ }
788
789+ start = ws->next;
790 while (1)
791 {
792 long left = end - start;
793--- libgomp/work.c.jj 2007-12-07 14:41:01.000000000 +0100
794+++ libgomp/work.c 2008-03-27 12:21:51.000000000 +0100
795@@ -1,4 +1,4 @@
796-/* Copyright (C) 2005 Free Software Foundation, Inc.
797+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
798 Contributed by Richard Henderson <rth@redhat.com>.
799
800 This file is part of the GNU OpenMP Library (libgomp).
801@@ -29,39 +29,138 @@
802 of threads. */
803
804 #include "libgomp.h"
805+#include <stddef.h>
806 #include <stdlib.h>
807 #include <string.h>
808
809
810-/* Create a new work share structure. */
811+/* Allocate a new work share structure, preferably from current team's
812+ free gomp_work_share cache. */
813
814-struct gomp_work_share *
815-gomp_new_work_share (bool ordered, unsigned nthreads)
816+static struct gomp_work_share *
817+alloc_work_share (struct gomp_team *team)
818 {
819 struct gomp_work_share *ws;
820- size_t size;
821+ unsigned int i;
822
823- size = sizeof (*ws);
824- if (ordered)
825- size += nthreads * sizeof (ws->ordered_team_ids[0]);
826+ /* This is called in a critical section. */
827+ if (team->work_share_list_alloc != NULL)
828+ {
829+ ws = team->work_share_list_alloc;
830+ team->work_share_list_alloc = ws->next_free;
831+ return ws;
832+ }
833
834- ws = gomp_malloc_cleared (size);
835- gomp_mutex_init (&ws->lock);
836- ws->ordered_owner = -1;
837+#ifdef HAVE_SYNC_BUILTINS
838+ ws = team->work_share_list_free;
839+ /* We need atomic read from work_share_list_free,
840+ as free_work_share can be called concurrently. */
841+ __asm ("" : "+r" (ws));
842+
843+ if (ws && ws->next_free)
844+ {
845+ struct gomp_work_share *next = ws->next_free;
846+ ws->next_free = NULL;
847+ team->work_share_list_alloc = next->next_free;
848+ return next;
849+ }
850+#else
851+ gomp_mutex_lock (&team->work_share_list_free_lock);
852+ ws = team->work_share_list_free;
853+ if (ws)
854+ {
855+ team->work_share_list_alloc = ws->next_free;
856+ team->work_share_list_free = NULL;
857+ gomp_mutex_unlock (&team->work_share_list_free_lock);
858+ return ws;
859+ }
860+ gomp_mutex_unlock (&team->work_share_list_free_lock);
861+#endif
862
863+ team->work_share_chunk *= 2;
864+ ws = gomp_malloc (team->work_share_chunk * sizeof (struct gomp_work_share));
865+ ws->next_alloc = team->work_shares[0].next_alloc;
866+ team->work_shares[0].next_alloc = ws;
867+ team->work_share_list_alloc = &ws[1];
868+ for (i = 1; i < team->work_share_chunk - 1; i++)
869+ ws[i].next_free = &ws[i + 1];
870+ ws[i].next_free = NULL;
871 return ws;
872 }
873
874+/* Initialize an already allocated struct gomp_work_share.
875+ This shouldn't touch the next_alloc field. */
876+
877+void
878+gomp_init_work_share (struct gomp_work_share *ws, bool ordered,
879+ unsigned nthreads)
880+{
881+ gomp_mutex_init (&ws->lock);
882+ if (__builtin_expect (ordered, 0))
883+ {
884+#define INLINE_ORDERED_TEAM_IDS_CNT \
885+ ((sizeof (struct gomp_work_share) \
886+ - offsetof (struct gomp_work_share, inline_ordered_team_ids)) \
887+ / sizeof (((struct gomp_work_share *) 0)->inline_ordered_team_ids[0]))
888+
889+ if (nthreads > INLINE_ORDERED_TEAM_IDS_CNT)
890+ ws->ordered_team_ids
891+ = gomp_malloc (nthreads * sizeof (*ws->ordered_team_ids));
892+ else
893+ ws->ordered_team_ids = ws->inline_ordered_team_ids;
894+ memset (ws->ordered_team_ids, '\0',
895+ nthreads * sizeof (*ws->ordered_team_ids));
896+ ws->ordered_num_used = 0;
897+ ws->ordered_owner = -1;
898+ ws->ordered_cur = 0;
899+ }
900+ else
901+ ws->ordered_team_ids = NULL;
902+ gomp_ptrlock_init (&ws->next_ws, NULL);
903+ ws->threads_completed = 0;
904+}
905
906-/* Free a work share structure. */
907+/* Do any needed destruction of gomp_work_share fields before it
908+ is put back into free gomp_work_share cache or freed. */
909
910-static void
911-free_work_share (struct gomp_work_share *ws)
912+void
913+gomp_fini_work_share (struct gomp_work_share *ws)
914 {
915 gomp_mutex_destroy (&ws->lock);
916- free (ws);
917+ if (ws->ordered_team_ids != ws->inline_ordered_team_ids)
918+ free (ws->ordered_team_ids);
919+ gomp_ptrlock_destroy (&ws->next_ws);
920 }
921
922+/* Free a work share struct, if not orphaned, put it into current
923+ team's free gomp_work_share cache. */
924+
925+static inline void
926+free_work_share (struct gomp_team *team, struct gomp_work_share *ws)
927+{
928+ gomp_fini_work_share (ws);
929+ if (__builtin_expect (team == NULL, 0))
930+ free (ws);
931+ else
932+ {
933+ struct gomp_work_share *next_ws;
934+#ifdef HAVE_SYNC_BUILTINS
935+ do
936+ {
937+ next_ws = team->work_share_list_free;
938+ ws->next_free = next_ws;
939+ }
940+ while (!__sync_bool_compare_and_swap (&team->work_share_list_free,
941+ next_ws, ws));
942+#else
943+ gomp_mutex_lock (&team->work_share_list_free_lock);
944+ next_ws = team->work_share_list_free;
945+ ws->next_free = next_ws;
946+ team->work_share_list_free = ws;
947+ gomp_mutex_unlock (&team->work_share_list_free_lock);
948+#endif
949+ }
950+}
951
952 /* The current thread is ready to begin the next work sharing construct.
953 In all cases, thr->ts.work_share is updated to point to the new
954@@ -74,71 +173,34 @@ gomp_work_share_start (bool ordered)
955 struct gomp_thread *thr = gomp_thread ();
956 struct gomp_team *team = thr->ts.team;
957 struct gomp_work_share *ws;
958- unsigned ws_index, ws_gen;
959
960 /* Work sharing constructs can be orphaned. */
961 if (team == NULL)
962 {
963- ws = gomp_new_work_share (ordered, 1);
964+ ws = gomp_malloc (sizeof (*ws));
965+ gomp_init_work_share (ws, ordered, 1);
966 thr->ts.work_share = ws;
967- thr->ts.static_trip = 0;
968- gomp_mutex_lock (&ws->lock);
969- return true;
970+ return ws;
971 }
972
973- gomp_mutex_lock (&team->work_share_lock);
974-
975- /* This thread is beginning its next generation. */
976- ws_gen = ++thr->ts.work_share_generation;
977-
978- /* If this next generation is not newer than any other generation in
979- the team, then simply reference the existing construct. */
980- if (ws_gen - team->oldest_live_gen < team->num_live_gen)
981+ ws = thr->ts.work_share;
982+ thr->ts.last_work_share = ws;
983+ ws = gomp_ptrlock_get (&ws->next_ws);
984+ if (ws == NULL)
985 {
986- ws_index = ws_gen & team->generation_mask;
987- ws = team->work_shares[ws_index];
988+ /* This thread encountered a new ws first. */
989+ struct gomp_work_share *ws = alloc_work_share (team);
990+ gomp_init_work_share (ws, ordered, team->nthreads);
991 thr->ts.work_share = ws;
992- thr->ts.static_trip = 0;
993-
994- gomp_mutex_lock (&ws->lock);
995- gomp_mutex_unlock (&team->work_share_lock);
996-
997- return false;
998+ return true;
999 }
1000-
1001- /* Resize the work shares queue if we've run out of space. */
1002- if (team->num_live_gen++ == team->generation_mask)
1003+ else
1004 {
1005- team->work_shares = gomp_realloc (team->work_shares,
1006- 2 * team->num_live_gen
1007- * sizeof (*team->work_shares));
1008-
1009- /* Unless oldest_live_gen is zero, the sequence of live elements
1010- wraps around the end of the array. If we do nothing, we break
1011- lookup of the existing elements. Fix that by unwrapping the
1012- data from the front to the end. */
1013- if (team->oldest_live_gen > 0)
1014- memcpy (team->work_shares + team->num_live_gen,
1015- team->work_shares,
1016- (team->oldest_live_gen & team->generation_mask)
1017- * sizeof (*team->work_shares));
1018-
1019- team->generation_mask = team->generation_mask * 2 + 1;
1020- }
1021-
1022- ws_index = ws_gen & team->generation_mask;
1023- ws = gomp_new_work_share (ordered, team->nthreads);
1024- thr->ts.work_share = ws;
1025- thr->ts.static_trip = 0;
1026- team->work_shares[ws_index] = ws;
1027-
1028- gomp_mutex_lock (&ws->lock);
1029- gomp_mutex_unlock (&team->work_share_lock);
1030-
1031- return true;
1032+ thr->ts.work_share = ws;
1033+ return false;
1034+ }
1035 }
1036
1037-
1038 /* The current thread is done with its current work sharing construct.
1039 This version does imply a barrier at the end of the work-share. */
1040
1041@@ -147,36 +209,28 @@ gomp_work_share_end (void)
1042 {
1043 struct gomp_thread *thr = gomp_thread ();
1044 struct gomp_team *team = thr->ts.team;
1045- struct gomp_work_share *ws = thr->ts.work_share;
1046- bool last;
1047-
1048- thr->ts.work_share = NULL;
1049+ gomp_barrier_state_t bstate;
1050
1051 /* Work sharing constructs can be orphaned. */
1052 if (team == NULL)
1053 {
1054- free_work_share (ws);
1055+ free_work_share (NULL, thr->ts.work_share);
1056+ thr->ts.work_share = NULL;
1057 return;
1058 }
1059
1060- last = gomp_barrier_wait_start (&team->barrier);
1061+ bstate = gomp_barrier_wait_start (&team->barrier);
1062
1063- if (last)
1064+ if (gomp_barrier_last_thread (bstate))
1065 {
1066- unsigned ws_index;
1067-
1068- ws_index = thr->ts.work_share_generation & team->generation_mask;
1069- team->work_shares[ws_index] = NULL;
1070- team->oldest_live_gen++;
1071- team->num_live_gen = 0;
1072-
1073- free_work_share (ws);
1074+ if (__builtin_expect (thr->ts.last_work_share != NULL, 1))
1075+ free_work_share (team, thr->ts.last_work_share);
1076 }
1077
1078- gomp_barrier_wait_end (&team->barrier, last);
1079+ gomp_barrier_wait_end (&team->barrier, bstate);
1080+ thr->ts.last_work_share = NULL;
1081 }
1082
1083-
1084 /* The current thread is done with its current work sharing construct.
1085 This version does NOT imply a barrier at the end of the work-share. */
1086
1087@@ -188,15 +242,17 @@ gomp_work_share_end_nowait (void)
1088 struct gomp_work_share *ws = thr->ts.work_share;
1089 unsigned completed;
1090
1091- thr->ts.work_share = NULL;
1092-
1093 /* Work sharing constructs can be orphaned. */
1094 if (team == NULL)
1095 {
1096- free_work_share (ws);
1097+ free_work_share (NULL, ws);
1098+ thr->ts.work_share = NULL;
1099 return;
1100 }
1101
1102+ if (__builtin_expect (thr->ts.last_work_share == NULL, 0))
1103+ return;
1104+
1105 #ifdef HAVE_SYNC_BUILTINS
1106 completed = __sync_add_and_fetch (&ws->threads_completed, 1);
1107 #else
1108@@ -206,18 +262,6 @@ gomp_work_share_end_nowait (void)
1109 #endif
1110
1111 if (completed == team->nthreads)
1112- {
1113- unsigned ws_index;
1114-
1115- gomp_mutex_lock (&team->work_share_lock);
1116-
1117- ws_index = thr->ts.work_share_generation & team->generation_mask;
1118- team->work_shares[ws_index] = NULL;
1119- team->oldest_live_gen++;
1120- team->num_live_gen--;
1121-
1122- gomp_mutex_unlock (&team->work_share_lock);
1123-
1124- free_work_share (ws);
1125- }
1126+ free_work_share (team, thr->ts.last_work_share);
1127+ thr->ts.last_work_share = NULL;
1128 }
1129--- libgomp/single.c.jj 2007-12-07 14:41:01.000000000 +0100
1130+++ libgomp/single.c 2008-03-26 15:11:32.000000000 +0100
1131@@ -1,4 +1,4 @@
1132-/* Copyright (C) 2005 Free Software Foundation, Inc.
1133+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
1134 Contributed by Richard Henderson <rth@redhat.com>.
1135
1136 This file is part of the GNU OpenMP Library (libgomp).
1137@@ -37,10 +37,24 @@
1138 bool
1139 GOMP_single_start (void)
1140 {
1141+#ifdef HAVE_SYNC_BUILTINS
1142+ struct gomp_thread *thr = gomp_thread ();
1143+ struct gomp_team *team = thr->ts.team;
1144+ unsigned long single_count;
1145+
1146+ if (__builtin_expect (team == NULL, 0))
1147+ return true;
1148+
1149+ single_count = thr->ts.single_count++;
1150+ return __sync_bool_compare_and_swap (&team->single_count, single_count,
1151+ single_count + 1L);
1152+#else
1153 bool ret = gomp_work_share_start (false);
1154- gomp_mutex_unlock (&gomp_thread ()->ts.work_share->lock);
1155+ if (ret)
1156+ gomp_work_share_init_done ();
1157 gomp_work_share_end_nowait ();
1158 return ret;
1159+#endif
1160 }
1161
1162 /* This routine is called when first encountering a SINGLE construct that
1163@@ -57,10 +71,12 @@ GOMP_single_copy_start (void)
1164 void *ret;
1165
1166 first = gomp_work_share_start (false);
1167- gomp_mutex_unlock (&thr->ts.work_share->lock);
1168
1169 if (first)
1170- ret = NULL;
1171+ {
1172+ gomp_work_share_init_done ();
1173+ ret = NULL;
1174+ }
1175 else
1176 {
1177 gomp_barrier_wait (&thr->ts.team->barrier);
1178--- libgomp/loop.c.jj 2007-12-07 14:41:01.000000000 +0100
1179+++ libgomp/loop.c 2008-03-26 18:47:04.000000000 +0100
1180@@ -27,8 +27,9 @@
1181
1182 /* This file handles the LOOP (FOR/DO) construct. */
1183
1184-#include "libgomp.h"
1185+#include <limits.h>
1186 #include <stdlib.h>
1187+#include "libgomp.h"
1188
1189
1190 /* Initialize the given work share construct from the given arguments. */
1191@@ -44,6 +45,39 @@ gomp_loop_init (struct gomp_work_share *
1192 ? start : end;
1193 ws->incr = incr;
1194 ws->next = start;
1195+ if (sched == GFS_DYNAMIC)
1196+ {
1197+ ws->chunk_size *= incr;
1198+
1199+#ifdef HAVE_SYNC_BUILTINS
1200+ {
1201+ /* For dynamic scheduling prepare things to make each iteration
1202+ faster. */
1203+ struct gomp_thread *thr = gomp_thread ();
1204+ struct gomp_team *team = thr->ts.team;
1205+ long nthreads = team ? team->nthreads : 1;
1206+
1207+ if (__builtin_expect (incr > 0, 1))
1208+ {
1209+ /* Cheap overflow protection. */
1210+ if (__builtin_expect ((nthreads | ws->chunk_size)
1211+ >= 1UL << (sizeof (long)
1212+ * __CHAR_BIT__ / 2 - 1), 0))
1213+ ws->mode = 0;
1214+ else
1215+ ws->mode = ws->end < (LONG_MAX
1216+ - (nthreads + 1) * ws->chunk_size);
1217+ }
1218+ /* Cheap overflow protection. */
1219+ else if (__builtin_expect ((nthreads | -ws->chunk_size)
1220+ >= 1UL << (sizeof (long)
1221+ * __CHAR_BIT__ / 2 - 1), 0))
1222+ ws->mode = 0;
1223+ else
1224+ ws->mode = ws->end > (nthreads + 1) * -ws->chunk_size - LONG_MAX;
1225+ }
1226+#endif
1227+ }
1228 }
1229
1230 /* The *_start routines are called when first encountering a loop construct
1231@@ -68,10 +102,13 @@ gomp_loop_static_start (long start, long
1232 {
1233 struct gomp_thread *thr = gomp_thread ();
1234
1235+ thr->ts.static_trip = 0;
1236 if (gomp_work_share_start (false))
1237- gomp_loop_init (thr->ts.work_share, start, end, incr,
1238- GFS_STATIC, chunk_size);
1239- gomp_mutex_unlock (&thr->ts.work_share->lock);
1240+ {
1241+ gomp_loop_init (thr->ts.work_share, start, end, incr,
1242+ GFS_STATIC, chunk_size);
1243+ gomp_work_share_init_done ();
1244+ }
1245
1246 return !gomp_iter_static_next (istart, iend);
1247 }
1248@@ -84,13 +121,16 @@ gomp_loop_dynamic_start (long start, lon
1249 bool ret;
1250
1251 if (gomp_work_share_start (false))
1252- gomp_loop_init (thr->ts.work_share, start, end, incr,
1253- GFS_DYNAMIC, chunk_size);
1254+ {
1255+ gomp_loop_init (thr->ts.work_share, start, end, incr,
1256+ GFS_DYNAMIC, chunk_size);
1257+ gomp_work_share_init_done ();
1258+ }
1259
1260 #ifdef HAVE_SYNC_BUILTINS
1261- gomp_mutex_unlock (&thr->ts.work_share->lock);
1262 ret = gomp_iter_dynamic_next (istart, iend);
1263 #else
1264+ gomp_mutex_lock (&thr->ts.work_share->lock);
1265 ret = gomp_iter_dynamic_next_locked (istart, iend);
1266 gomp_mutex_unlock (&thr->ts.work_share->lock);
1267 #endif
1268@@ -106,13 +146,16 @@ gomp_loop_guided_start (long start, long
1269 bool ret;
1270
1271 if (gomp_work_share_start (false))
1272- gomp_loop_init (thr->ts.work_share, start, end, incr,
1273- GFS_GUIDED, chunk_size);
1274+ {
1275+ gomp_loop_init (thr->ts.work_share, start, end, incr,
1276+ GFS_GUIDED, chunk_size);
1277+ gomp_work_share_init_done ();
1278+ }
1279
1280 #ifdef HAVE_SYNC_BUILTINS
1281- gomp_mutex_unlock (&thr->ts.work_share->lock);
1282 ret = gomp_iter_guided_next (istart, iend);
1283 #else
1284+ gomp_mutex_lock (&thr->ts.work_share->lock);
1285 ret = gomp_iter_guided_next_locked (istart, iend);
1286 gomp_mutex_unlock (&thr->ts.work_share->lock);
1287 #endif
1288@@ -149,13 +192,14 @@ gomp_loop_ordered_static_start (long sta
1289 {
1290 struct gomp_thread *thr = gomp_thread ();
1291
1292+ thr->ts.static_trip = 0;
1293 if (gomp_work_share_start (true))
1294 {
1295 gomp_loop_init (thr->ts.work_share, start, end, incr,
1296 GFS_STATIC, chunk_size);
1297 gomp_ordered_static_init ();
1298+ gomp_work_share_init_done ();
1299 }
1300- gomp_mutex_unlock (&thr->ts.work_share->lock);
1301
1302 return !gomp_iter_static_next (istart, iend);
1303 }
1304@@ -168,8 +212,14 @@ gomp_loop_ordered_dynamic_start (long st
1305 bool ret;
1306
1307 if (gomp_work_share_start (true))
1308- gomp_loop_init (thr->ts.work_share, start, end, incr,
1309- GFS_DYNAMIC, chunk_size);
1310+ {
1311+ gomp_loop_init (thr->ts.work_share, start, end, incr,
1312+ GFS_DYNAMIC, chunk_size);
1313+ gomp_mutex_lock (&thr->ts.work_share->lock);
1314+ gomp_work_share_init_done ();
1315+ }
1316+ else
1317+ gomp_mutex_lock (&thr->ts.work_share->lock);
1318
1319 ret = gomp_iter_dynamic_next_locked (istart, iend);
1320 if (ret)
1321@@ -187,8 +237,14 @@ gomp_loop_ordered_guided_start (long sta
1322 bool ret;
1323
1324 if (gomp_work_share_start (true))
1325- gomp_loop_init (thr->ts.work_share, start, end, incr,
1326- GFS_GUIDED, chunk_size);
1327+ {
1328+ gomp_loop_init (thr->ts.work_share, start, end, incr,
1329+ GFS_GUIDED, chunk_size);
1330+ gomp_mutex_lock (&thr->ts.work_share->lock);
1331+ gomp_work_share_init_done ();
1332+ }
1333+ else
1334+ gomp_mutex_lock (&thr->ts.work_share->lock);
1335
1336 ret = gomp_iter_guided_next_locked (istart, iend);
1337 if (ret)
1338@@ -375,12 +431,12 @@ gomp_parallel_loop_start (void (*fn) (vo
1339 long incr, enum gomp_schedule_type sched,
1340 long chunk_size)
1341 {
1342- struct gomp_work_share *ws;
1343+ struct gomp_team *team;
1344
1345 num_threads = gomp_resolve_num_threads (num_threads);
1346- ws = gomp_new_work_share (false, num_threads);
1347- gomp_loop_init (ws, start, end, incr, sched, chunk_size);
1348- gomp_team_start (fn, data, num_threads, ws);
1349+ team = gomp_new_team (num_threads);
1350+ gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size);
1351+ gomp_team_start (fn, data, num_threads, team);
1352 }
1353
1354 void
1355--- libgomp/Makefile.in.jj 2008-01-10 20:53:47.000000000 +0100
1356+++ libgomp/Makefile.in 2008-03-26 18:51:01.000000000 +0100
1357@@ -83,7 +83,7 @@ libgomp_la_LIBADD =
1358 am_libgomp_la_OBJECTS = alloc.lo barrier.lo critical.lo env.lo \
1359 error.lo iter.lo loop.lo ordered.lo parallel.lo sections.lo \
1360 single.lo team.lo work.lo lock.lo mutex.lo proc.lo sem.lo \
1361- bar.lo time.lo fortran.lo affinity.lo
1362+ bar.lo ptrlock.lo time.lo fortran.lo affinity.lo
1363 libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
1364 DEFAULT_INCLUDES = -I. -I$(srcdir) -I.
1365 depcomp = $(SHELL) $(top_srcdir)/../depcomp
1366@@ -292,7 +292,7 @@ libgomp_version_info = -version-info $(l
1367 libgomp_la_LDFLAGS = $(libgomp_version_info) $(libgomp_version_script)
1368 libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
1369 loop.c ordered.c parallel.c sections.c single.c team.c work.c \
1370- lock.c mutex.c proc.c sem.c bar.c time.c fortran.c affinity.c
1371+ lock.c mutex.c proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c
1372
1373 nodist_noinst_HEADERS = libgomp_f.h
1374 nodist_libsubinclude_HEADERS = omp.h
1375@@ -434,6 +434,7 @@ distclean-compile:
1376 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@
1377 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@
1378 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc.Plo@am__quote@
1379+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ptrlock.Plo@am__quote@
1380 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sections.Plo@am__quote@
1381 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@
1382 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@
1383--- libgomp/testsuite/libgomp.c/loop-4.c.jj 2008-03-26 18:47:04.000000000 +0100
1384+++ libgomp/testsuite/libgomp.c/loop-4.c 2008-03-26 18:47:04.000000000 +0100
1385@@ -0,0 +1,28 @@
1386+/* { dg-do run } */
1387+
1388+extern void abort (void);
1389+
1390+int
1391+main (void)
1392+{
1393+ int e = 0;
1394+#pragma omp parallel num_threads (4) reduction(+:e)
1395+ {
1396+ long i;
1397+ #pragma omp for schedule(dynamic,1)
1398+ for (i = __LONG_MAX__ - 30001; i <= __LONG_MAX__ - 10001; i += 10000)
1399+ if (i != __LONG_MAX__ - 30001
1400+ && i != __LONG_MAX__ - 20001
1401+ && i != __LONG_MAX__ - 10001)
1402+ e = 1;
1403+ #pragma omp for schedule(dynamic,1)
1404+ for (i = -__LONG_MAX__ + 30000; i >= -__LONG_MAX__ + 10000; i -= 10000)
1405+ if (i != -__LONG_MAX__ + 30000
1406+ && i != -__LONG_MAX__ + 20000
1407+ && i != -__LONG_MAX__ + 10000)
1408+ e = 1;
1409+ }
1410+ if (e)
1411+ abort ();
1412+ return 0;
1413+}
1414--- libgomp/Makefile.am.jj 2007-12-07 14:41:01.000000000 +0100
1415+++ libgomp/Makefile.am 2008-03-26 15:15:19.000000000 +0100
1416@@ -31,7 +31,7 @@ libgomp_la_LDFLAGS = $(libgomp_version_i
1417
1418 libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
1419 loop.c ordered.c parallel.c sections.c single.c team.c work.c \
1420- lock.c mutex.c proc.c sem.c bar.c time.c fortran.c affinity.c
1421+ lock.c mutex.c proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c
1422
1423 nodist_noinst_HEADERS = libgomp_f.h
1424 nodist_libsubinclude_HEADERS = omp.h
1425--- libgomp/team.c.jj 2007-12-07 14:41:01.000000000 +0100
1426+++ libgomp/team.c 2008-03-27 12:22:26.000000000 +0100
1427@@ -94,7 +94,7 @@ gomp_thread_start (void *xdata)
1428 {
1429 gomp_barrier_wait (&thr->ts.team->barrier);
1430 local_fn (local_data);
1431- gomp_barrier_wait (&thr->ts.team->barrier);
1432+ gomp_barrier_wait_last (&thr->ts.team->barrier);
1433 }
1434 else
1435 {
1436@@ -114,11 +114,10 @@ gomp_thread_start (void *xdata)
1437 thr->data = NULL;
1438 thr->ts.team = NULL;
1439 thr->ts.work_share = NULL;
1440+ thr->ts.last_work_share = NULL;
1441 thr->ts.team_id = 0;
1442- thr->ts.work_share_generation = 0;
1443- thr->ts.static_trip = 0;
1444
1445- gomp_barrier_wait (&team->barrier);
1446+ gomp_barrier_wait_last (&team->barrier);
1447 gomp_barrier_wait (&gomp_threads_dock);
1448
1449 local_fn = thr->fn;
1450@@ -133,21 +132,29 @@ gomp_thread_start (void *xdata)
1451
1452 /* Create a new team data structure. */
1453
1454-static struct gomp_team *
1455-new_team (unsigned nthreads, struct gomp_work_share *work_share)
1456+struct gomp_team *
1457+gomp_new_team (unsigned nthreads)
1458 {
1459 struct gomp_team *team;
1460 size_t size;
1461+ int i;
1462
1463 size = sizeof (*team) + nthreads * sizeof (team->ordered_release[0]);
1464 team = gomp_malloc (size);
1465- gomp_mutex_init (&team->work_share_lock);
1466
1467- team->work_shares = gomp_malloc (4 * sizeof (struct gomp_work_share *));
1468- team->generation_mask = 3;
1469- team->oldest_live_gen = work_share == NULL;
1470- team->num_live_gen = work_share != NULL;
1471- team->work_shares[0] = work_share;
1472+ team->work_share_chunk = 8;
1473+#ifdef HAVE_SYNC_BUILTINS
1474+ team->single_count = 0;
1475+#else
1476+ gomp_mutex_init (&team->work_share_list_free_lock);
1477+#endif
1478+ gomp_init_work_share (&team->work_shares[0], false, nthreads);
1479+ team->work_shares[0].next_alloc = NULL;
1480+ team->work_share_list_free = NULL;
1481+ team->work_share_list_alloc = &team->work_shares[1];
1482+ for (i = 1; i < 7; i++)
1483+ team->work_shares[i].next_free = &team->work_shares[i + 1];
1484+ team->work_shares[i].next_free = NULL;
1485
1486 team->nthreads = nthreads;
1487 gomp_barrier_init (&team->barrier, nthreads);
1488@@ -164,10 +171,22 @@ new_team (unsigned nthreads, struct gomp
1489 static void
1490 free_team (struct gomp_team *team)
1491 {
1492- free (team->work_shares);
1493- gomp_mutex_destroy (&team->work_share_lock);
1494+ if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
1495+ {
1496+ struct gomp_work_share *ws = team->work_shares[0].next_alloc;
1497+ do
1498+ {
1499+ struct gomp_work_share *next_ws = ws->next_alloc;
1500+ free (ws);
1501+ ws = next_ws;
1502+ }
1503+ while (ws != NULL);
1504+ }
1505 gomp_barrier_destroy (&team->barrier);
1506 gomp_sem_destroy (&team->master_release);
1507+#ifndef HAVE_SYNC_BUILTINS
1508+ gomp_mutex_destroy (&team->work_share_list_free_lock);
1509+#endif
1510 free (team);
1511 }
1512
1513@@ -176,11 +195,10 @@ free_team (struct gomp_team *team)
1514
1515 void
1516 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
1517- struct gomp_work_share *work_share)
1518+ struct gomp_team *team)
1519 {
1520 struct gomp_thread_start_data *start_data;
1521 struct gomp_thread *thr, *nthr;
1522- struct gomp_team *team;
1523 bool nested;
1524 unsigned i, n, old_threads_used = 0;
1525 pthread_attr_t thread_attr, *attr;
1526@@ -188,17 +206,18 @@ gomp_team_start (void (*fn) (void *), vo
1527 thr = gomp_thread ();
1528 nested = thr->ts.team != NULL;
1529
1530- team = new_team (nthreads, work_share);
1531-
1532 /* Always save the previous state, even if this isn't a nested team.
1533 In particular, we should save any work share state from an outer
1534 orphaned work share construct. */
1535 team->prev_ts = thr->ts;
1536
1537 thr->ts.team = team;
1538- thr->ts.work_share = work_share;
1539 thr->ts.team_id = 0;
1540- thr->ts.work_share_generation = 0;
1541+ thr->ts.work_share = &team->work_shares[0];
1542+ thr->ts.last_work_share = NULL;
1543+#ifdef HAVE_SYNC_BUILTINS
1544+ thr->ts.single_count = 0;
1545+#endif
1546 thr->ts.static_trip = 0;
1547
1548 if (nthreads == 1)
1549@@ -241,9 +260,12 @@ gomp_team_start (void (*fn) (void *), vo
1550 {
1551 nthr = gomp_threads[i];
1552 nthr->ts.team = team;
1553- nthr->ts.work_share = work_share;
1554+ nthr->ts.work_share = &team->work_shares[0];
1555+ nthr->ts.last_work_share = NULL;
1556 nthr->ts.team_id = i;
1557- nthr->ts.work_share_generation = 0;
1558+#ifdef HAVE_SYNC_BUILTINS
1559+ nthr->ts.single_count = 0;
1560+#endif
1561 nthr->ts.static_trip = 0;
1562 nthr->fn = fn;
1563 nthr->data = data;
1564@@ -266,8 +288,24 @@ gomp_team_start (void (*fn) (void *), vo
1565 }
1566 }
1567
1568+ if (__builtin_expect (nthreads > old_threads_used, 0))
1569+ {
1570+ long diff = (long) nthreads - (long) old_threads_used;
1571+
1572+ if (old_threads_used == 0)
1573+ --diff;
1574+
1575+#ifdef HAVE_SYNC_BUILTINS
1576+ __sync_fetch_and_add (&gomp_managed_threads, diff);
1577+#else
1578+ gomp_mutex_lock (&gomp_remaining_threads_lock);
1579+ gomp_managed_threads += diff;
1580+ gomp_mutex_unlock (&gomp_remaining_threads_lock);
1581+#endif
1582+ }
1583+
1584 attr = &gomp_thread_attr;
1585- if (gomp_cpu_affinity != NULL)
1586+ if (__builtin_expect (gomp_cpu_affinity != NULL, 0))
1587 {
1588 size_t stacksize;
1589 pthread_attr_init (&thread_attr);
1590@@ -287,9 +325,12 @@ gomp_team_start (void (*fn) (void *), vo
1591 int err;
1592
1593 start_data->ts.team = team;
1594- start_data->ts.work_share = work_share;
1595+ start_data->ts.work_share = &team->work_shares[0];
1596+ start_data->ts.last_work_share = NULL;
1597 start_data->ts.team_id = i;
1598- start_data->ts.work_share_generation = 0;
1599+#ifdef HAVE_SYNC_BUILTINS
1600+ start_data->ts.single_count = 0;
1601+#endif
1602 start_data->ts.static_trip = 0;
1603 start_data->fn = fn;
1604 start_data->fn_data = data;
1605@@ -303,7 +344,7 @@ gomp_team_start (void (*fn) (void *), vo
1606 gomp_fatal ("Thread creation failed: %s", strerror (err));
1607 }
1608
1609- if (gomp_cpu_affinity != NULL)
1610+ if (__builtin_expect (gomp_cpu_affinity != NULL, 0))
1611 pthread_attr_destroy (&thread_attr);
1612
1613 do_release:
1614@@ -313,8 +354,20 @@ gomp_team_start (void (*fn) (void *), vo
1615 that should arrive back at the end of this team. The extra
1616 threads should be exiting. Note that we arrange for this test
1617 to never be true for nested teams. */
1618- if (nthreads < old_threads_used)
1619- gomp_barrier_reinit (&gomp_threads_dock, nthreads);
1620+ if (__builtin_expect (nthreads < old_threads_used, 0))
1621+ {
1622+ long diff = (long) nthreads - (long) old_threads_used;
1623+
1624+ gomp_barrier_reinit (&gomp_threads_dock, nthreads);
1625+
1626+#ifdef HAVE_SYNC_BUILTINS
1627+ __sync_fetch_and_add (&gomp_managed_threads, diff);
1628+#else
1629+ gomp_mutex_lock (&gomp_remaining_threads_lock);
1630+ gomp_managed_threads += diff;
1631+ gomp_mutex_unlock (&gomp_remaining_threads_lock);
1632+#endif
1633+ }
1634 }
1635
1636
1637@@ -329,8 +382,21 @@ gomp_team_end (void)
1638
1639 gomp_barrier_wait (&team->barrier);
1640
1641+ gomp_fini_work_share (thr->ts.work_share);
1642+
1643 thr->ts = team->prev_ts;
1644
1645+ if (__builtin_expect (thr->ts.team != NULL, 0))
1646+ {
1647+#ifdef HAVE_SYNC_BUILTINS
1648+ __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
1649+#else
1650+ gomp_mutex_lock (&gomp_remaining_threads_lock);
1651+ gomp_managed_threads -= team->nthreads - 1L;
1652+ gomp_mutex_unlock (&gomp_remaining_threads_lock);
1653+#endif
1654+ }
1655+
1656 free_team (team);
1657 }
1658
1659--- libgomp/config/posix/bar.h.jj 2007-12-07 14:41:01.000000000 +0100
1660+++ libgomp/config/posix/bar.h 2008-03-26 15:11:32.000000000 +0100
1661@@ -1,4 +1,4 @@
1662-/* Copyright (C) 2005 Free Software Foundation, Inc.
1663+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
1664 Contributed by Richard Henderson <rth@redhat.com>.
1665
1666 This file is part of the GNU OpenMP Library (libgomp).
1667@@ -46,18 +46,32 @@ typedef struct
1668 unsigned total;
1669 unsigned arrived;
1670 } gomp_barrier_t;
1671+typedef bool gomp_barrier_state_t;
1672
1673 extern void gomp_barrier_init (gomp_barrier_t *, unsigned);
1674 extern void gomp_barrier_reinit (gomp_barrier_t *, unsigned);
1675 extern void gomp_barrier_destroy (gomp_barrier_t *);
1676
1677 extern void gomp_barrier_wait (gomp_barrier_t *);
1678-extern void gomp_barrier_wait_end (gomp_barrier_t *, bool);
1679+extern void gomp_barrier_wait_end (gomp_barrier_t *, gomp_barrier_state_t);
1680
1681-static inline bool gomp_barrier_wait_start (gomp_barrier_t *bar)
1682+static inline gomp_barrier_state_t
1683+gomp_barrier_wait_start (gomp_barrier_t *bar)
1684 {
1685 gomp_mutex_lock (&bar->mutex1);
1686 return ++bar->arrived == bar->total;
1687 }
1688
1689+static inline bool
1690+gomp_barrier_last_thread (gomp_barrier_state_t state)
1691+{
1692+ return state;
1693+}
1694+
1695+static inline void
1696+gomp_barrier_wait_last (gomp_barrier_t *bar)
1697+{
1698+ gomp_barrier_wait (bar);
1699+}
1700+
1701 #endif /* GOMP_BARRIER_H */
1702--- libgomp/config/posix/ptrlock.h.jj 2008-03-26 15:11:32.000000000 +0100
1703+++ libgomp/config/posix/ptrlock.h 2008-03-26 15:11:32.000000000 +0100
1704@@ -0,0 +1,69 @@
1705+/* Copyright (C) 2008 Free Software Foundation, Inc.
1706+ Contributed by Jakub Jelinek <jakub@redhat.com>.
1707+
1708+ This file is part of the GNU OpenMP Library (libgomp).
1709+
1710+ Libgomp is free software; you can redistribute it and/or modify it
1711+ under the terms of the GNU Lesser General Public License as published by
1712+ the Free Software Foundation; either version 2.1 of the License, or
1713+ (at your option) any later version.
1714+
1715+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
1716+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
1717+ FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
1718+ more details.
1719+
1720+ You should have received a copy of the GNU Lesser General Public License
1721+ along with libgomp; see the file COPYING.LIB. If not, write to the
1722+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
1723+ MA 02110-1301, USA. */
1724+
1725+/* As a special exception, if you link this library with other files, some
1726+ of which are compiled with GCC, to produce an executable, this library
1727+ does not by itself cause the resulting executable to be covered by the
1728+ GNU General Public License. This exception does not however invalidate
1729+ any other reasons why the executable file might be covered by the GNU
1730+ General Public License. */
1731+
1732+/* This is a Linux specific implementation of a mutex synchronization
1733+ mechanism for libgomp. This type is private to the library. This
1734+ implementation uses atomic instructions and the futex syscall. */
1735+
1736+#ifndef GOMP_PTRLOCK_H
1737+#define GOMP_PTRLOCK_H 1
1738+
1739+typedef struct { void *ptr; gomp_mutex_t lock; } gomp_ptrlock_t;
1740+
1741+static inline void gomp_ptrlock_init (gomp_ptrlock_t *ptrlock, void *ptr)
1742+{
1743+ ptrlock->ptr = ptr;
1744+ gomp_mutex_init (&ptrlock->lock);
1745+}
1746+
1747+static inline void *gomp_ptrlock_get (gomp_ptrlock_t *ptrlock)
1748+{
1749+ if (ptrlock->ptr != NULL)
1750+ return ptrlock->ptr;
1751+
1752+ gomp_mutex_lock (&ptrlock->lock);
1753+ if (ptrlock->ptr != NULL)
1754+ {
1755+ gomp_mutex_unlock (&ptrlock->lock);
1756+ return ptrlock->ptr;
1757+ }
1758+
1759+ return NULL;
1760+}
1761+
1762+static inline void gomp_ptrlock_set (gomp_ptrlock_t *ptrlock, void *ptr)
1763+{
1764+ ptrlock->ptr = ptr;
1765+ gomp_mutex_unlock (&ptrlock->lock);
1766+}
1767+
1768+static inline void gomp_ptrlock_destroy (gomp_ptrlock_t *ptrlock)
1769+{
1770+ gomp_mutex_destroy (&ptrlock->lock);
1771+}
1772+
1773+#endif /* GOMP_PTRLOCK_H */
1774--- libgomp/config/posix/ptrlock.c.jj 2008-03-26 15:11:32.000000000 +0100
1775+++ libgomp/config/posix/ptrlock.c 2008-03-26 15:11:32.000000000 +0100
1776@@ -0,0 +1 @@
1777+/* Everything is in the header. */
1778--- libgomp/config/posix/bar.c.jj 2007-12-07 14:41:01.000000000 +0100
1779+++ libgomp/config/posix/bar.c 2008-03-26 15:11:32.000000000 +0100
1780@@ -1,4 +1,4 @@
1781-/* Copyright (C) 2005 Free Software Foundation, Inc.
1782+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
1783 Contributed by Richard Henderson <rth@redhat.com>.
1784
1785 This file is part of the GNU OpenMP Library (libgomp).
1786@@ -70,7 +70,7 @@ gomp_barrier_reinit (gomp_barrier_t *bar
1787 }
1788
1789 void
1790-gomp_barrier_wait_end (gomp_barrier_t *bar, bool last)
1791+gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t last)
1792 {
1793 unsigned int n;
1794
1795--- libgomp/config/linux/alpha/futex.h.jj 2007-12-07 14:41:00.000000000 +0100
1796+++ libgomp/config/linux/alpha/futex.h 2008-03-26 15:11:32.000000000 +0100
1797@@ -1,4 +1,4 @@
1798-/* Copyright (C) 2005 Free Software Foundation, Inc.
1799+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
1800 Contributed by Richard Henderson <rth@redhat.com>.
1801
1802 This file is part of the GNU OpenMP Library (libgomp).
1803@@ -30,8 +30,6 @@
1804 #ifndef SYS_futex
1805 #define SYS_futex 394
1806 #endif
1807-#define FUTEX_WAIT 0
1808-#define FUTEX_WAKE 1
1809
1810
1811 static inline void
1812@@ -45,7 +43,7 @@ futex_wait (int *addr, int val)
1813
1814 sc_0 = SYS_futex;
1815 sc_16 = (long) addr;
1816- sc_17 = FUTEX_WAIT;
1817+ sc_17 = gomp_futex_wait;
1818 sc_18 = val;
1819 sc_19 = 0;
1820 __asm volatile ("callsys"
1821@@ -53,6 +51,20 @@ futex_wait (int *addr, int val)
1822 : "0"(sc_0), "r" (sc_16), "r"(sc_17), "r"(sc_18), "1"(sc_19)
1823 : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8",
1824 "$22", "$23", "$24", "$25", "$27", "$28", "memory");
1825+ if (__builtin_expect (sc_19, 0) && sc_0 == ENOSYS)
1826+ {
1827+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
1828+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
1829+ sc_0 = SYS_futex;
1830+ sc_17 &= ~FUTEX_PRIVATE_FLAG;
1831+ sc_19 = 0;
1832+ __asm volatile ("callsys"
1833+ : "=r" (sc_0), "=r"(sc_19)
1834+ : "0"(sc_0), "r" (sc_16), "r"(sc_17), "r"(sc_18),
1835+ "1"(sc_19)
1836+ : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8",
1837+ "$22", "$23", "$24", "$25", "$27", "$28", "memory");
1838+ }
1839 }
1840
1841 static inline void
1842@@ -66,11 +78,35 @@ futex_wake (int *addr, int count)
1843
1844 sc_0 = SYS_futex;
1845 sc_16 = (long) addr;
1846- sc_17 = FUTEX_WAKE;
1847+ sc_17 = gomp_futex_wake;
1848 sc_18 = count;
1849 __asm volatile ("callsys"
1850 : "=r" (sc_0), "=r"(sc_19)
1851 : "0"(sc_0), "r" (sc_16), "r"(sc_17), "r"(sc_18)
1852 : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8",
1853 "$22", "$23", "$24", "$25", "$27", "$28", "memory");
1854+ if (__builtin_expect (sc_19, 0) && sc_0 == ENOSYS)
1855+ {
1856+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
1857+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
1858+ sc_0 = SYS_futex;
1859+ sc_17 &= ~FUTEX_PRIVATE_FLAG;
1860+ __asm volatile ("callsys"
1861+ : "=r" (sc_0), "=r"(sc_19)
1862+ : "0"(sc_0), "r" (sc_16), "r"(sc_17), "r"(sc_18)
1863+ : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8",
1864+ "$22", "$23", "$24", "$25", "$27", "$28", "memory");
1865+ }
1866+}
1867+
1868+static inline void
1869+cpu_relax (void)
1870+{
1871+ __asm volatile ("" : : : "memory");
1872+}
1873+
1874+static inline void
1875+atomic_write_barrier (void)
1876+{
1877+ __asm volatile ("wmb" : : : "memory");
1878 }
1879--- libgomp/config/linux/affinity.c.jj 2007-12-07 14:41:00.000000000 +0100
1880+++ libgomp/config/linux/affinity.c 2008-03-26 15:11:32.000000000 +0100
1881@@ -1,4 +1,4 @@
1882-/* Copyright (C) 2006, 2007 Free Software Foundation, Inc.
1883+/* Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
1884 Contributed by Jakub Jelinek <jakub@redhat.com>.
1885
1886 This file is part of the GNU OpenMP Library (libgomp).
1887@@ -38,9 +38,6 @@
1888 #ifdef HAVE_PTHREAD_AFFINITY_NP
1889
1890 static unsigned int affinity_counter;
1891-#ifndef HAVE_SYNC_BUILTINS
1892-static gomp_mutex_t affinity_lock;
1893-#endif
1894
1895 void
1896 gomp_init_affinity (void)
1897@@ -76,9 +73,6 @@ gomp_init_affinity (void)
1898 CPU_SET (gomp_cpu_affinity[0], &cpuset);
1899 pthread_setaffinity_np (pthread_self (), sizeof (cpuset), &cpuset);
1900 affinity_counter = 1;
1901-#ifndef HAVE_SYNC_BUILTINS
1902- gomp_mutex_init (&affinity_lock);
1903-#endif
1904 }
1905
1906 void
1907@@ -87,13 +81,7 @@ gomp_init_thread_affinity (pthread_attr_
1908 unsigned int cpu;
1909 cpu_set_t cpuset;
1910
1911-#ifdef HAVE_SYNC_BUILTINS
1912 cpu = __sync_fetch_and_add (&affinity_counter, 1);
1913-#else
1914- gomp_mutex_lock (&affinity_lock);
1915- cpu = affinity_counter++;
1916- gomp_mutex_unlock (&affinity_lock);
1917-#endif
1918 cpu %= gomp_cpu_affinity_len;
1919 CPU_ZERO (&cpuset);
1920 CPU_SET (gomp_cpu_affinity[cpu], &cpuset);
1921--- libgomp/config/linux/bar.h.jj 2007-12-07 14:41:00.000000000 +0100
1922+++ libgomp/config/linux/bar.h 2008-03-26 15:11:32.000000000 +0100
1923@@ -1,4 +1,4 @@
1924-/* Copyright (C) 2005 Free Software Foundation, Inc.
1925+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
1926 Contributed by Richard Henderson <rth@redhat.com>.
1927
1928 This file is part of the GNU OpenMP Library (libgomp).
1929@@ -36,40 +36,49 @@
1930
1931 typedef struct
1932 {
1933- gomp_mutex_t mutex;
1934- unsigned total;
1935- unsigned arrived;
1936- int generation;
1937+ /* Make sure total/generation is in a mostly read cacheline, while
1938+ awaited in a separate cacheline. */
1939+ unsigned total __attribute__((aligned (64)));
1940+ unsigned generation;
1941+ unsigned awaited __attribute__((aligned (64)));
1942 } gomp_barrier_t;
1943+typedef unsigned int gomp_barrier_state_t;
1944
1945 static inline void gomp_barrier_init (gomp_barrier_t *bar, unsigned count)
1946 {
1947- gomp_mutex_init (&bar->mutex);
1948 bar->total = count;
1949- bar->arrived = 0;
1950+ bar->awaited = count;
1951 bar->generation = 0;
1952 }
1953
1954 static inline void gomp_barrier_reinit (gomp_barrier_t *bar, unsigned count)
1955 {
1956- gomp_mutex_lock (&bar->mutex);
1957+ __sync_fetch_and_add (&bar->awaited, count - bar->total);
1958 bar->total = count;
1959- gomp_mutex_unlock (&bar->mutex);
1960 }
1961
1962 static inline void gomp_barrier_destroy (gomp_barrier_t *bar)
1963 {
1964- /* Before destroying, make sure all threads have left the barrier. */
1965- gomp_mutex_lock (&bar->mutex);
1966 }
1967
1968 extern void gomp_barrier_wait (gomp_barrier_t *);
1969-extern void gomp_barrier_wait_end (gomp_barrier_t *, bool);
1970+extern void gomp_barrier_wait_last (gomp_barrier_t *);
1971+extern void gomp_barrier_wait_end (gomp_barrier_t *, gomp_barrier_state_t);
1972
1973-static inline bool gomp_barrier_wait_start (gomp_barrier_t *bar)
1974+static inline gomp_barrier_state_t
1975+gomp_barrier_wait_start (gomp_barrier_t *bar)
1976 {
1977- gomp_mutex_lock (&bar->mutex);
1978- return ++bar->arrived == bar->total;
1979+ unsigned int ret = bar->generation;
1980+ /* Do we need any barrier here or is __sync_add_and_fetch acting
1981+ as the needed LoadLoad barrier already? */
1982+ ret += __sync_add_and_fetch (&bar->awaited, -1) == 0;
1983+ return ret;
1984+}
1985+
1986+static inline bool
1987+gomp_barrier_last_thread (gomp_barrier_state_t state)
1988+{
1989+ return state & 1;
1990 }
1991
1992 #endif /* GOMP_BARRIER_H */
1993--- libgomp/config/linux/ptrlock.h.jj 2008-03-26 15:11:32.000000000 +0100
1994+++ libgomp/config/linux/ptrlock.h 2008-03-26 15:11:32.000000000 +0100
1995@@ -0,0 +1,65 @@
1996+/* Copyright (C) 2008 Free Software Foundation, Inc.
1997+ Contributed by Jakub Jelinek <jakub@redhat.com>.
1998+
1999+ This file is part of the GNU OpenMP Library (libgomp).
2000+
2001+ Libgomp is free software; you can redistribute it and/or modify it
2002+ under the terms of the GNU Lesser General Public License as published by
2003+ the Free Software Foundation; either version 2.1 of the License, or
2004+ (at your option) any later version.
2005+
2006+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
2007+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
2008+ FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
2009+ more details.
2010+
2011+ You should have received a copy of the GNU Lesser General Public License
2012+ along with libgomp; see the file COPYING.LIB. If not, write to the
2013+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
2014+ MA 02110-1301, USA. */
2015+
2016+/* As a special exception, if you link this library with other files, some
2017+ of which are compiled with GCC, to produce an executable, this library
2018+ does not by itself cause the resulting executable to be covered by the
2019+ GNU General Public License. This exception does not however invalidate
2020+ any other reasons why the executable file might be covered by the GNU
2021+ General Public License. */
2022+
2023+/* This is a Linux specific implementation of a mutex synchronization
2024+ mechanism for libgomp. This type is private to the library. This
2025+ implementation uses atomic instructions and the futex syscall. */
2026+
2027+#ifndef GOMP_PTRLOCK_H
2028+#define GOMP_PTRLOCK_H 1
2029+
2030+typedef void *gomp_ptrlock_t;
2031+
2032+static inline void gomp_ptrlock_init (gomp_ptrlock_t *ptrlock, void *ptr)
2033+{
2034+ *ptrlock = ptr;
2035+}
2036+
2037+extern void *gomp_ptrlock_get_slow (gomp_ptrlock_t *ptrlock);
2038+static inline void *gomp_ptrlock_get (gomp_ptrlock_t *ptrlock)
2039+{
2040+ if ((uintptr_t) *ptrlock > 2)
2041+ return *ptrlock;
2042+
2043+ if (__sync_bool_compare_and_swap (ptrlock, NULL, (uintptr_t) 1))
2044+ return NULL;
2045+
2046+ return gomp_ptrlock_get_slow (ptrlock);
2047+}
2048+
2049+extern void gomp_ptrlock_set_slow (gomp_ptrlock_t *ptrlock, void *ptr);
2050+static inline void gomp_ptrlock_set (gomp_ptrlock_t *ptrlock, void *ptr)
2051+{
2052+ if (!__sync_bool_compare_and_swap (ptrlock, (uintptr_t) 1, ptr))
2053+ gomp_ptrlock_set_slow (ptrlock, ptr);
2054+}
2055+
2056+static inline void gomp_ptrlock_destroy (gomp_ptrlock_t *ptrlock)
2057+{
2058+}
2059+
2060+#endif /* GOMP_PTRLOCK_H */
2061--- libgomp/config/linux/lock.c.jj 2007-12-07 14:41:00.000000000 +0100
2062+++ libgomp/config/linux/lock.c 2008-03-26 15:11:32.000000000 +0100
2063@@ -29,11 +29,10 @@
2064 primitives. This implementation uses atomic instructions and the futex
2065 syscall. */
2066
2067-#include "libgomp.h"
2068 #include <string.h>
2069 #include <unistd.h>
2070 #include <sys/syscall.h>
2071-#include "futex.h"
2072+#include "wait.h"
2073
2074
2075 /* The internal gomp_mutex_t and the external non-recursive omp_lock_t
2076@@ -137,7 +136,7 @@ omp_set_nest_lock (omp_nest_lock_t *lock
2077 return;
2078 }
2079
2080- futex_wait (&lock->owner, otid);
2081+ do_wait (&lock->owner, otid);
2082 }
2083 }
2084
2085--- libgomp/config/linux/ptrlock.c.jj 2008-03-26 15:11:32.000000000 +0100
2086+++ libgomp/config/linux/ptrlock.c 2008-03-26 15:11:32.000000000 +0100
2087@@ -0,0 +1,70 @@
2088+/* Copyright (C) 2008 Free Software Foundation, Inc.
2089+ Contributed by Jakub Jelinek <jakub@redhat.com>.
2090+
2091+ This file is part of the GNU OpenMP Library (libgomp).
2092+
2093+ Libgomp is free software; you can redistribute it and/or modify it
2094+ under the terms of the GNU Lesser General Public License as published by
2095+ the Free Software Foundation; either version 2.1 of the License, or
2096+ (at your option) any later version.
2097+
2098+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
2099+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
2100+ FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
2101+ more details.
2102+
2103+ You should have received a copy of the GNU Lesser General Public License
2104+ along with libgomp; see the file COPYING.LIB. If not, write to the
2105+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
2106+ MA 02110-1301, USA. */
2107+
2108+/* As a special exception, if you link this library with other files, some
2109+ of which are compiled with GCC, to produce an executable, this library
2110+ does not by itself cause the resulting executable to be covered by the
2111+ GNU General Public License. This exception does not however invalidate
2112+ any other reasons why the executable file might be covered by the GNU
2113+ General Public License. */
2114+
2115+/* This is a Linux specific implementation of a mutex synchronization
2116+ mechanism for libgomp. This type is private to the library. This
2117+ implementation uses atomic instructions and the futex syscall. */
2118+
2119+#include <endian.h>
2120+#include <limits.h>
2121+#include "wait.h"
2122+
2123+void *
2124+gomp_ptrlock_get_slow (gomp_ptrlock_t *ptrlock)
2125+{
2126+ int *intptr;
2127+ __sync_bool_compare_and_swap (ptrlock, 1, 2);
2128+
2129+ /* futex works on ints, not pointers.
2130+ But a valid work share pointer will be at least
2131+ 8 byte aligned, so it is safe to assume the low
2132+ 32-bits of the pointer won't contain values 1 or 2. */
2133+ __asm volatile ("" : "=r" (intptr) : "0" (ptrlock));
2134+#if __BYTE_ORDER == __BIG_ENDIAN
2135+ if (sizeof (*ptrlock) > sizeof (int))
2136+ intptr += (sizeof (*ptrlock) / sizeof (int)) - 1;
2137+#endif
2138+ do
2139+ do_wait (intptr, 2);
2140+ while (*intptr == 2);
2141+ __asm volatile ("" : : : "memory");
2142+ return *ptrlock;
2143+}
2144+
2145+void
2146+gomp_ptrlock_set_slow (gomp_ptrlock_t *ptrlock, void *ptr)
2147+{
2148+ int *intptr;
2149+
2150+ *ptrlock = ptr;
2151+ __asm volatile ("" : "=r" (intptr) : "0" (ptrlock));
2152+#if __BYTE_ORDER == __BIG_ENDIAN
2153+ if (sizeof (*ptrlock) > sizeof (int))
2154+ intptr += (sizeof (*ptrlock) / sizeof (int)) - 1;
2155+#endif
2156+ futex_wake (intptr, INT_MAX);
2157+}
2158--- libgomp/config/linux/x86/futex.h.jj 2007-12-07 14:41:00.000000000 +0100
2159+++ libgomp/config/linux/x86/futex.h 2008-03-26 15:11:32.000000000 +0100
2160@@ -1,4 +1,4 @@
2161-/* Copyright (C) 2005 Free Software Foundation, Inc.
2162+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
2163 Contributed by Richard Henderson <rth@redhat.com>.
2164
2165 This file is part of the GNU OpenMP Library (libgomp).
2166@@ -27,9 +27,6 @@
2167
2168 /* Provide target-specific access to the futex system call. */
2169
2170-#define FUTEX_WAIT 0
2171-#define FUTEX_WAKE 1
2172-
2173 #ifdef __LP64__
2174 # ifndef SYS_futex
2175 # define SYS_futex 202
2176@@ -38,14 +35,26 @@
2177 static inline void
2178 futex_wait (int *addr, int val)
2179 {
2180- register long r10 __asm__("%r10") = 0;
2181+ register long r10 __asm__("%r10");
2182 long res;
2183
2184+ r10 = 0;
2185 __asm volatile ("syscall"
2186 : "=a" (res)
2187- : "0"(SYS_futex), "D" (addr), "S"(FUTEX_WAIT),
2188- "d"(val), "r"(r10)
2189+ : "0" (SYS_futex), "D" (addr), "S" (gomp_futex_wait),
2190+ "d" (val), "r" (r10)
2191 : "r11", "rcx", "memory");
2192+ if (__builtin_expect (res == -ENOSYS, 0))
2193+ {
2194+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
2195+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
2196+ r10 = 0;
2197+ __asm volatile ("syscall"
2198+ : "=a" (res)
2199+ : "0" (SYS_futex), "D" (addr), "S" (gomp_futex_wait),
2200+ "d" (val), "r" (r10)
2201+ : "r11", "rcx", "memory");
2202+ }
2203 }
2204
2205 static inline void
2206@@ -55,8 +64,19 @@ futex_wake (int *addr, int count)
2207
2208 __asm volatile ("syscall"
2209 : "=a" (res)
2210- : "0"(SYS_futex), "D" (addr), "S"(FUTEX_WAKE), "d"(count)
2211+ : "0" (SYS_futex), "D" (addr), "S" (gomp_futex_wake),
2212+ "d" (count)
2213 : "r11", "rcx", "memory");
2214+ if (__builtin_expect (res == -ENOSYS, 0))
2215+ {
2216+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
2217+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
2218+ __asm volatile ("syscall"
2219+ : "=a" (res)
2220+ : "0" (SYS_futex), "D" (addr), "S" (gomp_futex_wake),
2221+ "d" (count)
2222+ : "r11", "rcx", "memory");
2223+ }
2224 }
2225 #else
2226 # ifndef SYS_futex
2227@@ -65,7 +85,7 @@ futex_wake (int *addr, int count)
2228
2229 # ifdef __PIC__
2230
2231-static inline void
2232+static inline long
2233 sys_futex0 (int *addr, int op, int val)
2234 {
2235 long res;
2236@@ -77,11 +97,12 @@ sys_futex0 (int *addr, int op, int val)
2237 : "0"(SYS_futex), "r" (addr), "c"(op),
2238 "d"(val), "S"(0)
2239 : "memory");
2240+ return res;
2241 }
2242
2243 # else
2244
2245-static inline void
2246+static inline long
2247 sys_futex0 (int *addr, int op, int val)
2248 {
2249 long res;
2250@@ -91,6 +112,7 @@ sys_futex0 (int *addr, int op, int val)
2251 : "0"(SYS_futex), "b" (addr), "c"(op),
2252 "d"(val), "S"(0)
2253 : "memory");
2254+ return res;
2255 }
2256
2257 # endif /* __PIC__ */
2258@@ -98,13 +120,37 @@ sys_futex0 (int *addr, int op, int val)
2259 static inline void
2260 futex_wait (int *addr, int val)
2261 {
2262- sys_futex0 (addr, FUTEX_WAIT, val);
2263+ long res = sys_futex0 (addr, gomp_futex_wait, val);
2264+ if (__builtin_expect (res == -ENOSYS, 0))
2265+ {
2266+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
2267+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
2268+ sys_futex0 (addr, gomp_futex_wait, val);
2269+ }
2270 }
2271
2272 static inline void
2273 futex_wake (int *addr, int count)
2274 {
2275- sys_futex0 (addr, FUTEX_WAKE, count);
2276+ long res = sys_futex0 (addr, gomp_futex_wake, count);
2277+ if (__builtin_expect (res == -ENOSYS, 0))
2278+ {
2279+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
2280+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
2281+ sys_futex0 (addr, gomp_futex_wake, count);
2282+ }
2283 }
2284
2285 #endif /* __LP64__ */
2286+
2287+static inline void
2288+cpu_relax (void)
2289+{
2290+ __asm volatile ("rep; nop" : : : "memory");
2291+}
2292+
2293+static inline void
2294+atomic_write_barrier (void)
2295+{
2296+ __sync_synchronize ();
2297+}
2298--- libgomp/config/linux/wait.h.jj 2008-03-26 15:11:32.000000000 +0100
2299+++ libgomp/config/linux/wait.h 2008-03-26 15:11:32.000000000 +0100
2300@@ -0,0 +1,68 @@
2301+/* Copyright (C) 2008 Free Software Foundation, Inc.
2302+ Contributed by Jakub Jelinek <jakub@redhat.com>.
2303+
2304+ This file is part of the GNU OpenMP Library (libgomp).
2305+
2306+ Libgomp is free software; you can redistribute it and/or modify it
2307+ under the terms of the GNU Lesser General Public License as published by
2308+ the Free Software Foundation; either version 2.1 of the License, or
2309+ (at your option) any later version.
2310+
2311+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
2312+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
2313+ FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
2314+ more details.
2315+
2316+ You should have received a copy of the GNU Lesser General Public License
2317+ along with libgomp; see the file COPYING.LIB. If not, write to the
2318+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
2319+ MA 02110-1301, USA. */
2320+
2321+/* As a special exception, if you link this library with other files, some
2322+ of which are compiled with GCC, to produce an executable, this library
2323+ does not by itself cause the resulting executable to be covered by the
2324+ GNU General Public License. This exception does not however invalidate
2325+ any other reasons why the executable file might be covered by the GNU
2326+ General Public License. */
2327+
2328+/* This is a Linux specific implementation of a mutex synchronization
2329+ mechanism for libgomp. This type is private to the library. This
2330+ implementation uses atomic instructions and the futex syscall. */
2331+
2332+#ifndef GOMP_WAIT_H
2333+#define GOMP_WAIT_H 1
2334+
2335+#include "libgomp.h"
2336+#include <errno.h>
2337+
2338+#define FUTEX_WAIT 0
2339+#define FUTEX_WAKE 1
2340+#define FUTEX_PRIVATE_FLAG 128L
2341+
2342+#ifdef HAVE_ATTRIBUTE_VISIBILITY
2343+# pragma GCC visibility push(hidden)
2344+#endif
2345+
2346+extern long int gomp_futex_wait, gomp_futex_wake;
2347+
2348+#include "futex.h"
2349+
2350+static inline void do_wait (int *addr, int val)
2351+{
2352+ unsigned long long i, count = gomp_spin_count_var;
2353+
2354+ if (__builtin_expect (gomp_managed_threads > gomp_available_cpus, 0))
2355+ count = gomp_throttled_spin_count_var;
2356+ for (i = 0; i < count; i++)
2357+ if (__builtin_expect (*addr != val, 0))
2358+ return;
2359+ else
2360+ cpu_relax ();
2361+ futex_wait (addr, val);
2362+}
2363+
2364+#ifdef HAVE_ATTRIBUTE_VISIBILITY
2365+# pragma GCC visibility pop
2366+#endif
2367+
2368+#endif /* GOMP_WAIT_H */
2369--- libgomp/config/linux/sparc/futex.h.jj 2007-12-07 14:41:00.000000000 +0100
2370+++ libgomp/config/linux/sparc/futex.h 2008-03-26 15:11:32.000000000 +0100
2371@@ -1,4 +1,4 @@
2372-/* Copyright (C) 2005 Free Software Foundation, Inc.
2373+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
2374 Contributed by Jakub Jelinek <jakub@redhat.com>.
2375
2376 This file is part of the GNU OpenMP Library (libgomp).
2377@@ -28,10 +28,8 @@
2378 /* Provide target-specific access to the futex system call. */
2379
2380 #include <sys/syscall.h>
2381-#define FUTEX_WAIT 0
2382-#define FUTEX_WAKE 1
2383
2384-static inline void
2385+static inline long
2386 sys_futex0 (int *addr, int op, int val)
2387 {
2388 register long int g1 __asm__ ("g1");
2389@@ -47,9 +45,9 @@ sys_futex0 (int *addr, int op, int val)
2390 o3 = 0;
2391
2392 #ifdef __arch64__
2393-# define SYSCALL_STRING "ta\t0x6d"
2394+# define SYSCALL_STRING "ta\t0x6d; bcs,a,pt %%xcc, 1f; sub %%g0, %%o0, %%o0; 1:"
2395 #else
2396-# define SYSCALL_STRING "ta\t0x10"
2397+# define SYSCALL_STRING "ta\t0x10; bcs,a 1f; sub %%g0, %%o0, %%o0; 1:"
2398 #endif
2399
2400 __asm volatile (SYSCALL_STRING
2401@@ -65,16 +63,49 @@ sys_futex0 (int *addr, int op, int val)
2402 "f48", "f50", "f52", "f54", "f56", "f58", "f60", "f62",
2403 #endif
2404 "cc", "memory");
2405+ return o0;
2406 }
2407
2408 static inline void
2409 futex_wait (int *addr, int val)
2410 {
2411- sys_futex0 (addr, FUTEX_WAIT, val);
2412+ long err = sys_futex0 (addr, gomp_futex_wait, val);
2413+ if (__builtin_expect (err == ENOSYS, 0))
2414+ {
2415+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
2416+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
2417+ sys_futex0 (addr, gomp_futex_wait, val);
2418+ }
2419 }
2420
2421 static inline void
2422 futex_wake (int *addr, int count)
2423 {
2424- sys_futex0 (addr, FUTEX_WAKE, count);
2425+ long err = sys_futex0 (addr, gomp_futex_wake, count);
2426+ if (__builtin_expect (err == ENOSYS, 0))
2427+ {
2428+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
2429+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
2430+ sys_futex0 (addr, gomp_futex_wake, count);
2431+ }
2432+}
2433+
2434+static inline void
2435+cpu_relax (void)
2436+{
2437+#if defined __arch64__ || defined __sparc_v9__
2438+ __asm volatile ("membar #LoadLoad" : : : "memory");
2439+#else
2440+ __asm volatile ("" : : : "memory");
2441+#endif
2442+}
2443+
2444+static inline void
2445+atomic_write_barrier (void)
2446+{
2447+#if defined __arch64__ || defined __sparc_v9__
2448+ __asm volatile ("membar #StoreStore" : : : "memory");
2449+#else
2450+ __sync_synchronize ();
2451+#endif
2452 }
2453--- libgomp/config/linux/ia64/futex.h.jj 2007-12-07 14:41:00.000000000 +0100
2454+++ libgomp/config/linux/ia64/futex.h 2008-03-26 15:11:32.000000000 +0100
2455@@ -1,4 +1,4 @@
2456-/* Copyright (C) 2005 Free Software Foundation, Inc.
2457+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
2458 Contributed by Richard Henderson <rth@redhat.com>.
2459
2460 This file is part of the GNU OpenMP Library (libgomp).
2461@@ -29,23 +29,24 @@
2462
2463 #include <sys/syscall.h>
2464
2465-#define FUTEX_WAIT 0
2466-#define FUTEX_WAKE 1
2467
2468
2469-static inline void
2470-sys_futex0(int *addr, int op, int val)
2471+static inline long
2472+sys_futex0(int *addr, long op, int val)
2473 {
2474 register long out0 asm ("out0") = (long) addr;
2475 register long out1 asm ("out1") = op;
2476 register long out2 asm ("out2") = val;
2477 register long out3 asm ("out3") = 0;
2478+ register long r8 asm ("r8");
2479+ register long r10 asm ("r10");
2480 register long r15 asm ("r15") = SYS_futex;
2481
2482 __asm __volatile ("break 0x100000"
2483- : "=r"(r15), "=r"(out0), "=r"(out1), "=r"(out2), "=r"(out3)
2484+ : "=r"(r15), "=r"(out0), "=r"(out1), "=r"(out2), "=r"(out3),
2485+ "=r"(r8), "=r"(r10)
2486 : "r"(r15), "r"(out0), "r"(out1), "r"(out2), "r"(out3)
2487- : "memory", "r8", "r10", "out4", "out5", "out6", "out7",
2488+ : "memory", "out4", "out5", "out6", "out7",
2489 /* Non-stacked integer registers, minus r8, r10, r15. */
2490 "r2", "r3", "r9", "r11", "r12", "r13", "r14", "r16", "r17", "r18",
2491 "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27",
2492@@ -56,16 +57,41 @@ sys_futex0(int *addr, int op, int val)
2493 "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
2494 /* Branch registers. */
2495 "b6");
2496+ return r8 & r10;
2497 }
2498
2499 static inline void
2500 futex_wait (int *addr, int val)
2501 {
2502- sys_futex0 (addr, FUTEX_WAIT, val);
2503+ long err = sys_futex0 (addr, gomp_futex_wait, val);
2504+ if (__builtin_expect (err == ENOSYS, 0))
2505+ {
2506+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
2507+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
2508+ sys_futex0 (addr, gomp_futex_wait, val);
2509+ }
2510 }
2511
2512 static inline void
2513 futex_wake (int *addr, int count)
2514 {
2515- sys_futex0 (addr, FUTEX_WAKE, count);
2516+ long err = sys_futex0 (addr, gomp_futex_wake, count);
2517+ if (__builtin_expect (err == ENOSYS, 0))
2518+ {
2519+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
2520+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
2521+ sys_futex0 (addr, gomp_futex_wake, count);
2522+ }
2523+}
2524+
2525+static inline void
2526+cpu_relax (void)
2527+{
2528+ __asm volatile ("hint @pause" : : : "memory");
2529+}
2530+
2531+static inline void
2532+atomic_write_barrier (void)
2533+{
2534+ __sync_synchronize ();
2535 }
2536--- libgomp/config/linux/s390/futex.h.jj 2007-12-07 14:41:00.000000000 +0100
2537+++ libgomp/config/linux/s390/futex.h 2008-03-26 15:11:32.000000000 +0100
2538@@ -1,4 +1,4 @@
2539-/* Copyright (C) 2005 Free Software Foundation, Inc.
2540+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
2541 Contributed by Jakub Jelinek <jakub@redhat.com>.
2542
2543 This file is part of the GNU OpenMP Library (libgomp).
2544@@ -28,10 +28,8 @@
2545 /* Provide target-specific access to the futex system call. */
2546
2547 #include <sys/syscall.h>
2548-#define FUTEX_WAIT 0
2549-#define FUTEX_WAKE 1
2550
2551-static inline void
2552+static inline long
2553 sys_futex0 (int *addr, int op, int val)
2554 {
2555 register long int gpr2 __asm__ ("2");
2556@@ -49,16 +47,41 @@ sys_futex0 (int *addr, int op, int val)
2557 : "i" (SYS_futex),
2558 "0" (gpr2), "d" (gpr3), "d" (gpr4), "d" (gpr5)
2559 : "memory");
2560+ return gpr2;
2561 }
2562
2563 static inline void
2564 futex_wait (int *addr, int val)
2565 {
2566- sys_futex0 (addr, FUTEX_WAIT, val);
2567+ long err = sys_futex0 (addr, gomp_futex_wait, val);
2568+ if (__builtin_expect (err == -ENOSYS, 0))
2569+ {
2570+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
2571+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
2572+ sys_futex0 (addr, gomp_futex_wait, val);
2573+ }
2574 }
2575
2576 static inline void
2577 futex_wake (int *addr, int count)
2578 {
2579- sys_futex0 (addr, FUTEX_WAKE, count);
2580+ long err = sys_futex0 (addr, gomp_futex_wake, count);
2581+ if (__builtin_expect (err == -ENOSYS, 0))
2582+ {
2583+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
2584+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
2585+ sys_futex0 (addr, gomp_futex_wake, count);
2586+ }
2587+}
2588+
2589+static inline void
2590+cpu_relax (void)
2591+{
2592+ __asm volatile ("" : : : "memory");
2593+}
2594+
2595+static inline void
2596+atomic_write_barrier (void)
2597+{
2598+ __sync_synchronize ();
2599 }
2600--- libgomp/config/linux/mutex.c.jj 2007-12-07 14:41:00.000000000 +0100
2601+++ libgomp/config/linux/mutex.c 2008-03-26 15:11:32.000000000 +0100
2602@@ -1,4 +1,4 @@
2603-/* Copyright (C) 2005 Free Software Foundation, Inc.
2604+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
2605 Contributed by Richard Henderson <rth@redhat.com>.
2606
2607 This file is part of the GNU OpenMP Library (libgomp).
2608@@ -29,9 +29,10 @@
2609 mechanism for libgomp. This type is private to the library. This
2610 implementation uses atomic instructions and the futex syscall. */
2611
2612-#include "libgomp.h"
2613-#include "futex.h"
2614+#include "wait.h"
2615
2616+long int gomp_futex_wake = FUTEX_WAKE | FUTEX_PRIVATE_FLAG;
2617+long int gomp_futex_wait = FUTEX_WAIT | FUTEX_PRIVATE_FLAG;
2618
2619 void
2620 gomp_mutex_lock_slow (gomp_mutex_t *mutex)
2621@@ -40,7 +41,7 @@ gomp_mutex_lock_slow (gomp_mutex_t *mute
2622 {
2623 int oldval = __sync_val_compare_and_swap (mutex, 1, 2);
2624 if (oldval != 0)
2625- futex_wait (mutex, 2);
2626+ do_wait (mutex, 2);
2627 }
2628 while (!__sync_bool_compare_and_swap (mutex, 0, 2));
2629 }
2630--- libgomp/config/linux/sem.c.jj 2007-12-07 14:41:00.000000000 +0100
2631+++ libgomp/config/linux/sem.c 2008-03-26 15:11:32.000000000 +0100
2632@@ -1,4 +1,4 @@
2633-/* Copyright (C) 2005 Free Software Foundation, Inc.
2634+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
2635 Contributed by Richard Henderson <rth@redhat.com>.
2636
2637 This file is part of the GNU OpenMP Library (libgomp).
2638@@ -29,8 +29,7 @@
2639 mechanism for libgomp. This type is private to the library. This
2640 implementation uses atomic instructions and the futex syscall. */
2641
2642-#include "libgomp.h"
2643-#include "futex.h"
2644+#include "wait.h"
2645
2646
2647 void
2648@@ -44,7 +43,7 @@ gomp_sem_wait_slow (gomp_sem_t *sem)
2649 if (__sync_bool_compare_and_swap (sem, val, val - 1))
2650 return;
2651 }
2652- futex_wait (sem, -1);
2653+ do_wait (sem, -1);
2654 }
2655 }
2656
2657--- libgomp/config/linux/powerpc/futex.h.jj 2007-12-07 14:41:00.000000000 +0100
2658+++ libgomp/config/linux/powerpc/futex.h 2008-03-26 15:11:32.000000000 +0100
2659@@ -1,4 +1,4 @@
2660-/* Copyright (C) 2005 Free Software Foundation, Inc.
2661+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
2662 Contributed by Richard Henderson <rth@redhat.com>.
2663
2664 This file is part of the GNU OpenMP Library (libgomp).
2665@@ -28,10 +28,8 @@
2666 /* Provide target-specific access to the futex system call. */
2667
2668 #include <sys/syscall.h>
2669-#define FUTEX_WAIT 0
2670-#define FUTEX_WAKE 1
2671
2672-static inline void
2673+static inline long
2674 sys_futex0 (int *addr, int op, int val)
2675 {
2676 register long int r0 __asm__ ("r0");
2677@@ -50,21 +48,48 @@ sys_futex0 (int *addr, int op, int val)
2678 doesn't. It doesn't much matter for us. In the interest of unity,
2679 go ahead and clobber it always. */
2680
2681- __asm volatile ("sc"
2682+ __asm volatile ("sc; mfcr %0"
2683 : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6)
2684 : "r"(r0), "r"(r3), "r"(r4), "r"(r5), "r"(r6)
2685 : "r7", "r8", "r9", "r10", "r11", "r12",
2686 "cr0", "ctr", "memory");
2687+ if (__builtin_expect (r0 & (1 << 28), 0))
2688+ return r3;
2689+ return 0;
2690 }
2691
2692 static inline void
2693 futex_wait (int *addr, int val)
2694 {
2695- sys_futex0 (addr, FUTEX_WAIT, val);
2696+ long err = sys_futex0 (addr, gomp_futex_wait, val);
2697+ if (__builtin_expect (err == ENOSYS, 0))
2698+ {
2699+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
2700+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
2701+ sys_futex0 (addr, gomp_futex_wait, val);
2702+ }
2703 }
2704
2705 static inline void
2706 futex_wake (int *addr, int count)
2707 {
2708- sys_futex0 (addr, FUTEX_WAKE, count);
2709+ long err = sys_futex0 (addr, gomp_futex_wake, count);
2710+ if (__builtin_expect (err == ENOSYS, 0))
2711+ {
2712+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
2713+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
2714+ sys_futex0 (addr, gomp_futex_wake, count);
2715+ }
2716+}
2717+
2718+static inline void
2719+cpu_relax (void)
2720+{
2721+ __asm volatile ("" : : : "memory");
2722+}
2723+
2724+static inline void
2725+atomic_write_barrier (void)
2726+{
2727+ __asm volatile ("eieio" : : : "memory");
2728 }
2729--- libgomp/config/linux/bar.c.jj 2007-12-07 14:41:00.000000000 +0100
2730+++ libgomp/config/linux/bar.c 2008-03-26 15:11:32.000000000 +0100
2731@@ -1,4 +1,4 @@
2732-/* Copyright (C) 2005 Free Software Foundation, Inc.
2733+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
2734 Contributed by Richard Henderson <rth@redhat.com>.
2735
2736 This file is part of the GNU OpenMP Library (libgomp).
2737@@ -29,32 +29,29 @@
2738 mechanism for libgomp. This type is private to the library. This
2739 implementation uses atomic instructions and the futex syscall. */
2740
2741-#include "libgomp.h"
2742-#include "futex.h"
2743 #include <limits.h>
2744+#include "wait.h"
2745
2746
2747 void
2748-gomp_barrier_wait_end (gomp_barrier_t *bar, bool last)
2749+gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
2750 {
2751- if (last)
2752+ if (__builtin_expect ((state & 1) != 0, 0))
2753 {
2754- bar->generation++;
2755- futex_wake (&bar->generation, INT_MAX);
2756+ /* Next time we'll be awaiting TOTAL threads again. */
2757+ bar->awaited = bar->total;
2758+ atomic_write_barrier ();
2759+ bar->generation += 2;
2760+ futex_wake ((int *) &bar->generation, INT_MAX);
2761 }
2762 else
2763 {
2764- unsigned int generation = bar->generation;
2765-
2766- gomp_mutex_unlock (&bar->mutex);
2767+ unsigned int generation = state;
2768
2769 do
2770- futex_wait (&bar->generation, generation);
2771+ do_wait ((int *) &bar->generation, generation);
2772 while (bar->generation == generation);
2773 }
2774-
2775- if (__sync_add_and_fetch (&bar->arrived, -1) == 0)
2776- gomp_mutex_unlock (&bar->mutex);
2777 }
2778
2779 void
2780@@ -62,3 +59,18 @@ gomp_barrier_wait (gomp_barrier_t *barri
2781 {
2782 gomp_barrier_wait_end (barrier, gomp_barrier_wait_start (barrier));
2783 }
2784+
2785+/* Like gomp_barrier_wait, except that if the encountering thread
2786+ is not the last one to hit the barrier, it returns immediately.
2787+ The intended usage is that a thread which intends to gomp_barrier_destroy
2788+ this barrier calls gomp_barrier_wait, while all other threads
2789+ call gomp_barrier_wait_last. When gomp_barrier_wait returns,
2790+ the barrier can be safely destroyed. */
2791+
2792+void
2793+gomp_barrier_wait_last (gomp_barrier_t *barrier)
2794+{
2795+ gomp_barrier_state_t state = gomp_barrier_wait_start (barrier);
2796+ if (state & 1)
2797+ gomp_barrier_wait_end (barrier, state);
2798+}