diff options
Diffstat (limited to 'meta/recipes-devtools/gcc/gcc-4.5.1/fedora/gcc43-libgomp-speedup.patch')
-rw-r--r-- | meta/recipes-devtools/gcc/gcc-4.5.1/fedora/gcc43-libgomp-speedup.patch | 2798 |
1 files changed, 0 insertions, 2798 deletions
diff --git a/meta/recipes-devtools/gcc/gcc-4.5.1/fedora/gcc43-libgomp-speedup.patch b/meta/recipes-devtools/gcc/gcc-4.5.1/fedora/gcc43-libgomp-speedup.patch deleted file mode 100644 index 54c855fb34..0000000000 --- a/meta/recipes-devtools/gcc/gcc-4.5.1/fedora/gcc43-libgomp-speedup.patch +++ /dev/null | |||
@@ -1,2798 +0,0 @@ | |||
1 | Upstream-Status: Inappropriate [distribution: fedora] | ||
2 | 2008-03-28 Jakub Jelinek <jakub@redhat.com> | ||
3 | |||
4 | * config/linux/sparc/futex.h (atomic_write_barrier): Fix membar | ||
5 | argument. | ||
6 | |||
7 | 2008-03-27 Jakub Jelinek <jakub@redhat.com> | ||
8 | |||
9 | * libgomp.h (struct gomp_team_state): Remove single_count field | ||
10 | ifndef HAVE_SYNC_BUILTINS. | ||
11 | (struct gomp_team): Likewise. Add work_share_list_free_lock | ||
12 | ifndef HAVE_SYNC_BUILTINS. | ||
13 | * team.c (gomp_new_team): If HAVE_SYNC_BUILTINS is not defined, | ||
14 | don't initialize single_count, but instead initialize | ||
15 | work_share_list_free_lock. | ||
16 | (free_team): Destroy work_share_list_free_lock ifndef | ||
17 | HAVE_SYNC_BUILTINS. | ||
18 | (gomp_team_start): Don't initialize ts.single_count ifndef | ||
19 | HAVE_SYNC_BUILTINS. | ||
20 | * work.c (alloc_work_share, free_work_share): Use | ||
21 | work_share_list_free_lock instead of atomic chaining ifndef | ||
22 | HAVE_SYNC_BUILTINS. | ||
23 | |||
24 | 2008-03-26 Jakub Jelinek <jakub@redhat.com> | ||
25 | |||
26 | * loop.c (gomp_loop_init): Fix GFS_DYNAMIC ws->mode setting. | ||
27 | * testsuite/libgomp.c/loop-4.c: New test. | ||
28 | |||
29 | * libgomp.h (struct gomp_team_state): Add single_count field. | ||
30 | (struct gomp_team): Likewise. | ||
31 | * team.c (gomp_new_team): Clear single_count. | ||
32 | (gomp_team_start): Likewise. | ||
33 | * single.c (GOMP_single_start): Rewritten if HAVE_SYNC_BUILTINS. | ||
34 | |||
35 | 2008-03-25 Jakub Jelinek <jakub@redhat.com> | ||
36 | |||
37 | * team.c (gomp_thread_start): Don't clear ts.static_trip here. | ||
38 | * loop.c (gomp_loop_static_start, gomp_loop_dynamic_start): Clear | ||
39 | ts.static_trip here. | ||
40 | * work.c (gomp_work_share_start): Don't clear ts.static_trip here. | ||
41 | |||
42 | 2008-03-21 Jakub Jelinek <jakub@redhat.com> | ||
43 | |||
44 | * libgomp.h: Include ptrlock.h. | ||
45 | (struct gomp_work_share): Reshuffle fields. Add next_alloc, | ||
46 | next_ws, next_free and inline_ordered_team_ids fields, change | ||
47 | ordered_team_ids into pointer from flexible array member. | ||
48 | (struct gomp_team_state): Add last_work_share field, remove | ||
49 | work_share_generation. | ||
50 | (struct gomp_team): Remove work_share_lock, generation_mask, | ||
51 | oldest_live_gen, num_live_gen and init_work_shares fields, add | ||
52 | work work_share_list_alloc, work_share_list_free and work_share_chunk | ||
53 | fields. Change work_shares from pointer to pointers into an array. | ||
54 | (gomp_new_team): New prototype. | ||
55 | (gomp_team_start): Change type of last argument. | ||
56 | (gomp_new_work_share): Removed. | ||
57 | (gomp_init_work_share, gomp_fini_work_share): New prototypes. | ||
58 | (gomp_work_share_init_done): New static inline. | ||
59 | * team.c (gomp_thread_start): Clear ts.last_work_share, don't clear | ||
60 | ts.work_share_generation. | ||
61 | (new_team): Removed. | ||
62 | (gomp_new_team): New function. | ||
63 | (free_team): Free gomp_work_share blocks chained through next_alloc, | ||
64 | instead of freeing work_shares and destroying work_share_lock. | ||
65 | (gomp_team_start): Change last argument from ws to team, don't create | ||
66 | new team, set ts.work_share to &team->work_shares[0] and clear | ||
67 | ts.last_work_share. Don't clear ts.work_share_generation. | ||
68 | (gomp_team_end): Call gomp_fini_work_share. | ||
69 | * work.c (gomp_new_work_share): Removed. | ||
70 | (alloc_work_share, gomp_init_work_share, gomp_fini_work_share): New | ||
71 | functions. | ||
72 | (free_work_share): Add team argument. Call gomp_fini_work_share | ||
73 | and then either free ws if orphaned, or put it into | ||
74 | work_share_list_free list of the current team. | ||
75 | (gomp_work_share_start, gomp_work_share_end, | ||
76 | gomp_work_share_end_nowait): Rewritten. | ||
77 | * sections.c (GOMP_sections_start): Call gomp_work_share_init_done | ||
78 | after gomp_sections_init. If HAVE_SYNC_BUILTINS, call | ||
79 | gomp_iter_dynamic_next instead of the _locked variant and don't take | ||
80 | lock around it, otherwise acquire it before calling | ||
81 | gomp_iter_dynamic_next_locked. | ||
82 | (GOMP_sections_next): If HAVE_SYNC_BUILTINS, call | ||
83 | gomp_iter_dynamic_next instead of the _locked variant and don't take | ||
84 | lock around it. | ||
85 | (GOMP_parallel_sections_start): Call gomp_new_team instead of | ||
86 | gomp_new_work_share. Call gomp_sections_init on &team->work_shares[0]. | ||
87 | Adjust gomp_team_start caller. | ||
88 | * loop.c (gomp_loop_static_start, gomp_loop_ordered_static_start): Call | ||
89 | gomp_work_share_init_done after gomp_loop_init. Don't unlock ws->lock. | ||
90 | (gomp_loop_dynamic_start, gomp_loop_guided_start): Call | ||
91 | gomp_work_share_init_done after gomp_loop_init. If HAVE_SYNC_BUILTINS, | ||
92 | don't unlock ws->lock, otherwise lock it. | ||
93 | (gomp_loop_ordered_dynamic_start, gomp_loop_ordered_guided_start): Call | ||
94 | gomp_work_share_init_done after gomp_loop_init. Lock ws->lock. | ||
95 | (gomp_parallel_loop_start): Call gomp_new_team instead of | ||
96 | gomp_new_work_share. Call gomp_loop_init on &team->work_shares[0]. | ||
97 | Adjust gomp_team_start caller. | ||
98 | * single.c (GOMP_single_start, GOMP_single_copy_start): Call | ||
99 | gomp_work_share_init_done if gomp_work_share_start returned true. | ||
100 | Don't unlock ws->lock. | ||
101 | * parallel.c (GOMP_parallel_start): Call gomp_new_team and pass that | ||
102 | as last argument to gomp_team_start. | ||
103 | * config/linux/ptrlock.c: New file. | ||
104 | * config/linux/ptrlock.h: New file. | ||
105 | * config/posix/ptrlock.c: New file. | ||
106 | * config/posix/ptrlock.h: New file. | ||
107 | * Makefile.am (libgomp_la_SOURCES): Add ptrlock.c. | ||
108 | * Makefile.in: Regenerated. | ||
109 | * testsuite/Makefile.in: Regenerated. | ||
110 | |||
111 | 2008-03-19 Jakub Jelinek <jakub@redhat.com> | ||
112 | |||
113 | * libgomp.h (gomp_active_wait_policy): Remove decl. | ||
114 | (gomp_throttled_spin_count_var, gomp_available_cpus, | ||
115 | gomp_managed_threads): New extern decls. | ||
116 | * team.c (gomp_team_start, gomp_team_end): If number of threads | ||
117 | changed, adjust atomically gomp_managed_threads. | ||
118 | * env.c (gomp_active_wait_policy, gomp_block_time_var): Remove. | ||
119 | (gomp_throttled_spin_count_var, gomp_available_cpus, | ||
120 | gomp_managed_threads): New variables. | ||
121 | (parse_millis): Removed. | ||
122 | (parse_spincount): New function. | ||
123 | (parse_wait_policy): Return -1/0/1 instead of setting | ||
124 | gomp_active_wait_policy. | ||
125 | (initialize_env): Call gomp_init_num_threads unconditionally. | ||
126 | Initialize gomp_available_cpus. Call parse_spincount instead | ||
127 | of parse_millis, initialize gomp_{,throttled_}spin_count_var | ||
128 | depending on presence and value of OMP_WAIT_POLICY and | ||
129 | GOMP_SPINCOUNT env vars. | ||
130 | * config/linux/wait.h (do_wait): Use gomp_throttled_spin_count_var | ||
131 | instead of gomp_spin_count_var if gomp_managed_threads > | ||
132 | gomp_available_cpus. | ||
133 | |||
134 | * config/linux/wait.h: Include errno.h. | ||
135 | (FUTEX_WAIT, FUTEX_WAKE, FUTEX_PRIVATE_FLAG): Define. | ||
136 | (gomp_futex_wake, gomp_futex_wait): New extern decls. | ||
137 | * config/linux/mutex.c (gomp_futex_wake, gomp_futex_wait): New | ||
138 | variables. | ||
139 | * config/linux/powerpc/futex.h (FUTEX_WAIT, FUTEX_WAKE): Remove. | ||
140 | (sys_futex0): Return error code. | ||
141 | (futex_wake, futex_wait): If ENOSYS was returned, clear | ||
142 | FUTEX_PRIVATE_FLAG in gomp_futex_wa{ke,it} and retry. | ||
143 | * config/linux/alpha/futex.h (FUTEX_WAIT, FUTEX_WAKE): Remove. | ||
144 | (futex_wake, futex_wait): If ENOSYS was returned, clear | ||
145 | FUTEX_PRIVATE_FLAG in gomp_futex_wa{ke,it} and retry. | ||
146 | * config/linux/x86/futex.h (FUTEX_WAIT, FUTEX_WAKE): Remove. | ||
147 | (sys_futex0): Return error code. | ||
148 | (futex_wake, futex_wait): If ENOSYS was returned, clear | ||
149 | FUTEX_PRIVATE_FLAG in gomp_futex_wa{ke,it} and retry. | ||
150 | * config/linux/s390/futex.h (FUTEX_WAIT, FUTEX_WAKE): Remove. | ||
151 | (sys_futex0): Return error code. | ||
152 | (futex_wake, futex_wait): If ENOSYS was returned, clear | ||
153 | FUTEX_PRIVATE_FLAG in gomp_futex_wa{ke,it} and retry. | ||
154 | * config/linux/ia64/futex.h (FUTEX_WAIT, FUTEX_WAKE): Remove. | ||
155 | (sys_futex0): Return error code. | ||
156 | (futex_wake, futex_wait): If ENOSYS was returned, clear | ||
157 | FUTEX_PRIVATE_FLAG in gomp_futex_wa{ke,it} and retry. | ||
158 | * config/linux/sparc/futex.h (FUTEX_WAIT, FUTEX_WAKE): Remove. | ||
159 | (sys_futex0): Return error code. | ||
160 | (futex_wake, futex_wait): If ENOSYS was returned, clear | ||
161 | FUTEX_PRIVATE_FLAG in gomp_futex_wa{ke,it} and retry. | ||
162 | |||
163 | 2008-03-18 Jakub Jelinek <jakub@redhat.com> | ||
164 | |||
165 | * libgomp.h (struct gomp_work_share): Add mode field. Put lock and | ||
166 | next into a different cache line from most of the write-once fields. | ||
167 | * loop.c: Include limits.h. | ||
168 | (gomp_loop_init): For GFS_DYNAMIC, multiply ws->chunk_size by incr. | ||
169 | If adding ws->chunk_size nthreads + 1 times after end won't | ||
170 | overflow, set ws->mode to 1. | ||
171 | * iter.c (gomp_iter_dynamic_next_locked): Don't multiply | ||
172 | ws->chunk_size by incr. | ||
173 | (gomp_iter_dynamic_next): Likewise. If ws->mode, use more efficient | ||
174 | code. | ||
175 | * work.c: Include stddef.h. | ||
176 | (gomp_new_work_share): Use offsetof rather than sizeof. | ||
177 | |||
178 | 2008-03-17 Jakub Jelinek <jakub@redhat.com> | ||
179 | |||
180 | * libgomp.h (struct gomp_team): Change ordered_release field | ||
181 | into gomp_sem_t ** from flexible array member. Add implicit_task | ||
182 | and initial_work_shares fields. | ||
183 | (gomp_new_task): Removed. | ||
184 | (gomp_init_task): New prototype. | ||
185 | * team.c (new_team): Allocate implicit_task for each thread | ||
186 | and initial work_shares together with gomp_team allocation. | ||
187 | (free_team): Only free work_shares if it is not init_work_shares. | ||
188 | (gomp_team_start): Use gomp_init_task instead of gomp_new_task, | ||
189 | set thr->task to the corresponding implicit_task array entry. | ||
190 | * task.c (gomp_new_task): Removed. | ||
191 | (gomp_init_task): New function. | ||
192 | (gomp_end_task): Don't free the task. | ||
193 | (GOMP_task): Allocate struct gomp_task on the stack, call | ||
194 | gomp_init_task rather than gomp_new_task. | ||
195 | * work.c (gomp_work_share_start): If work_shares == | ||
196 | init_work_shares, gomp_malloc + memcpy rather than gomp_realloc. | ||
197 | |||
198 | 2008-03-15 Jakub Jelinek <jakub@redhat.com> | ||
199 | Ulrich Drepper <drepper@redhat.com> | ||
200 | |||
201 | * config/linux/bar.h (gomp_barrier_state_t): Rewritten. | ||
202 | (gomp_barrier_state_t): Change to unsigned int. | ||
203 | (gomp_barrier_init, gomp_barrier_reinit, gomp_barrier_destroy, | ||
204 | gomp_barrier_wait_start, gomp_barrier_last_thread): Rewritten. | ||
205 | (gomp_barrier_wait_last): Prototype rather than inline. | ||
206 | * config/linux/bar.c (gomp_barrier_wait_end): Rewritten. | ||
207 | (gomp_barrier_wait_last): New function. | ||
208 | |||
209 | 2008-03-15 Jakub Jelinek <jakub@redhat.com> | ||
210 | |||
211 | * team.c (gomp_thread_start): Use gomp_barrier_wait_last instead | ||
212 | of gomp_barrier_wait. | ||
213 | * env.c (gomp_block_time_var, gomp_spin_count_var): New variables. | ||
214 | (parse_millis): New function. | ||
215 | (initialize_env): Handle GOMP_BLOCKTIME env var. | ||
216 | * libgomp.h (struct gomp_team): Move close to the end of the struct. | ||
217 | (gomp_spin_count_var): New extern var decl. | ||
218 | * work.c (gomp_work_share_end): Use gomp_barrier_state_t bstate | ||
219 | var instead of bool last, call gomp_barrier_last_thread to check | ||
220 | for last thread, pass bstate to gomp_barrier_wait_end. | ||
221 | * config/linux/wait.h: New file. | ||
222 | * config/linux/mutex.c: Include wait.h instead of libgomp.h and | ||
223 | futex.h. | ||
224 | (gomp_mutex_lock_slow): Call do_wait instead of futex_wait. | ||
225 | * config/linux/bar.c: Include wait.h instead of libgomp.h and | ||
226 | futex.h. | ||
227 | (gomp_barrier_wait_end): Change second argument to | ||
228 | gomp_barrier_state_t. Call do_wait instead of futex_wait. | ||
229 | * config/linux/sem.c: Include wait.h instead of libgomp.h and | ||
230 | futex.h. | ||
231 | (gomp_sem_wait_slow): Call do_wait instead of futex_wait. | ||
232 | * config/linux/lock.c: Include wait.h instead of libgomp.h and | ||
233 | futex.h. | ||
234 | (gomp_set_nest_lock_25): Call do_wait instead of futex_wait. | ||
235 | * config/linux/affinity.c: Assume HAVE_SYNC_BUILTINS. | ||
236 | * config/linux/bar.h (gomp_barrier_state_t): New typedef. | ||
237 | (gomp_barrier_wait_end): Change second argument to | ||
238 | gomp_barrier_state_t. | ||
239 | (gomp_barrier_wait_start): Return gomp_barrier_state_t. | ||
240 | (gomp_barrier_last_thread, gomp_barrier_wait_last): New static | ||
241 | inlines. | ||
242 | * config/linux/powerpc/futex.h (cpu_relax, atomic_write_barrier): New | ||
243 | static inlines. | ||
244 | * config/linux/alpha/futex.h (cpu_relax, atomic_write_barrier): | ||
245 | Likewise. | ||
246 | * config/linux/x86/futex.h (cpu_relax, atomic_write_barrier): | ||
247 | Likewise. | ||
248 | * config/linux/s390/futex.h (cpu_relax, atomic_write_barrier): | ||
249 | Likewise. | ||
250 | * config/linux/ia64/futex.h (cpu_relax, atomic_write_barrier): | ||
251 | Likewise. | ||
252 | * config/linux/sparc/futex.h (cpu_relax, atomic_write_barrier): | ||
253 | Likewise. | ||
254 | * config/posix/bar.c (gomp_barrier_wait_end): Change second argument | ||
255 | to gomp_barrier_state_t. | ||
256 | * config/posix/bar.h (gomp_barrier_state_t): New typedef. | ||
257 | (gomp_barrier_wait_end): Change second argument to | ||
258 | gomp_barrier_state_t. | ||
259 | (gomp_barrier_wait_start): Return gomp_barrier_state_t. | ||
260 | (gomp_barrier_last_thread, gomp_barrier_wait_last): New static | ||
261 | inlines. | ||
262 | |||
263 | --- libgomp/parallel.c.jj 2007-12-07 14:41:01.000000000 +0100 | ||
264 | +++ libgomp/parallel.c 2008-03-26 15:32:06.000000000 +0100 | ||
265 | @@ -68,7 +68,7 @@ void | ||
266 | GOMP_parallel_start (void (*fn) (void *), void *data, unsigned num_threads) | ||
267 | { | ||
268 | num_threads = gomp_resolve_num_threads (num_threads); | ||
269 | - gomp_team_start (fn, data, num_threads, NULL); | ||
270 | + gomp_team_start (fn, data, num_threads, gomp_new_team (num_threads)); | ||
271 | } | ||
272 | |||
273 | void | ||
274 | --- libgomp/sections.c.jj 2007-12-07 14:41:01.000000000 +0100 | ||
275 | +++ libgomp/sections.c 2008-03-26 15:33:06.000000000 +0100 | ||
276 | @@ -59,14 +59,24 @@ GOMP_sections_start (unsigned count) | ||
277 | long s, e, ret; | ||
278 | |||
279 | if (gomp_work_share_start (false)) | ||
280 | - gomp_sections_init (thr->ts.work_share, count); | ||
281 | + { | ||
282 | + gomp_sections_init (thr->ts.work_share, count); | ||
283 | + gomp_work_share_init_done (); | ||
284 | + } | ||
285 | |||
286 | +#ifdef HAVE_SYNC_BUILTINS | ||
287 | + if (gomp_iter_dynamic_next (&s, &e)) | ||
288 | + ret = s; | ||
289 | + else | ||
290 | + ret = 0; | ||
291 | +#else | ||
292 | + gomp_mutex_lock (&thr->ts.work_share->lock); | ||
293 | if (gomp_iter_dynamic_next_locked (&s, &e)) | ||
294 | ret = s; | ||
295 | else | ||
296 | ret = 0; | ||
297 | - | ||
298 | gomp_mutex_unlock (&thr->ts.work_share->lock); | ||
299 | +#endif | ||
300 | |||
301 | return ret; | ||
302 | } | ||
303 | @@ -83,15 +93,23 @@ GOMP_sections_start (unsigned count) | ||
304 | unsigned | ||
305 | GOMP_sections_next (void) | ||
306 | { | ||
307 | - struct gomp_thread *thr = gomp_thread (); | ||
308 | long s, e, ret; | ||
309 | |||
310 | +#ifdef HAVE_SYNC_BUILTINS | ||
311 | + if (gomp_iter_dynamic_next (&s, &e)) | ||
312 | + ret = s; | ||
313 | + else | ||
314 | + ret = 0; | ||
315 | +#else | ||
316 | + struct gomp_thread *thr = gomp_thread (); | ||
317 | + | ||
318 | gomp_mutex_lock (&thr->ts.work_share->lock); | ||
319 | if (gomp_iter_dynamic_next_locked (&s, &e)) | ||
320 | ret = s; | ||
321 | else | ||
322 | ret = 0; | ||
323 | gomp_mutex_unlock (&thr->ts.work_share->lock); | ||
324 | +#endif | ||
325 | |||
326 | return ret; | ||
327 | } | ||
328 | @@ -103,15 +121,15 @@ void | ||
329 | GOMP_parallel_sections_start (void (*fn) (void *), void *data, | ||
330 | unsigned num_threads, unsigned count) | ||
331 | { | ||
332 | - struct gomp_work_share *ws; | ||
333 | + struct gomp_team *team; | ||
334 | |||
335 | num_threads = gomp_resolve_num_threads (num_threads); | ||
336 | if (gomp_dyn_var && num_threads > count) | ||
337 | num_threads = count; | ||
338 | |||
339 | - ws = gomp_new_work_share (false, num_threads); | ||
340 | - gomp_sections_init (ws, count); | ||
341 | - gomp_team_start (fn, data, num_threads, ws); | ||
342 | + team = gomp_new_team (num_threads); | ||
343 | + gomp_sections_init (&team->work_shares[0], count); | ||
344 | + gomp_team_start (fn, data, num_threads, team); | ||
345 | } | ||
346 | |||
347 | /* The GOMP_section_end* routines are called after the thread is told | ||
348 | --- libgomp/env.c.jj 2007-12-07 14:41:01.000000000 +0100 | ||
349 | +++ libgomp/env.c 2008-03-26 16:40:26.000000000 +0100 | ||
350 | @@ -44,6 +44,11 @@ enum gomp_schedule_type gomp_run_sched_v | ||
351 | unsigned long gomp_run_sched_chunk = 1; | ||
352 | unsigned short *gomp_cpu_affinity; | ||
353 | size_t gomp_cpu_affinity_len; | ||
354 | +#ifndef HAVE_SYNC_BUILTINS | ||
355 | +gomp_mutex_t gomp_remaining_threads_lock; | ||
356 | +#endif | ||
357 | +unsigned long gomp_available_cpus = 1, gomp_managed_threads = 1; | ||
358 | +unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var; | ||
359 | |||
360 | /* Parse the OMP_SCHEDULE environment variable. */ | ||
361 | |||
362 | @@ -147,6 +152,79 @@ parse_unsigned_long (const char *name, u | ||
363 | return false; | ||
364 | } | ||
365 | |||
366 | +/* Parse the GOMP_SPINCOUNT environment varible. Return true if one was | ||
367 | + present and it was successfully parsed. */ | ||
368 | + | ||
369 | +static bool | ||
370 | +parse_spincount (const char *name, unsigned long long *pvalue) | ||
371 | +{ | ||
372 | + char *env, *end; | ||
373 | + unsigned long long value, mult = 1; | ||
374 | + | ||
375 | + env = getenv (name); | ||
376 | + if (env == NULL) | ||
377 | + return false; | ||
378 | + | ||
379 | + while (isspace ((unsigned char) *env)) | ||
380 | + ++env; | ||
381 | + if (*env == '\0') | ||
382 | + goto invalid; | ||
383 | + | ||
384 | + if (strncasecmp (env, "infinite", 8) == 0 | ||
385 | + || strncasecmp (env, "infinity", 8) == 0) | ||
386 | + { | ||
387 | + value = ~0ULL; | ||
388 | + end = env + 8; | ||
389 | + goto check_tail; | ||
390 | + } | ||
391 | + | ||
392 | + errno = 0; | ||
393 | + value = strtoull (env, &end, 10); | ||
394 | + if (errno) | ||
395 | + goto invalid; | ||
396 | + | ||
397 | + while (isspace ((unsigned char) *end)) | ||
398 | + ++end; | ||
399 | + if (*end != '\0') | ||
400 | + { | ||
401 | + switch (tolower (*end)) | ||
402 | + { | ||
403 | + case 'k': | ||
404 | + mult = 1000LL; | ||
405 | + break; | ||
406 | + case 'm': | ||
407 | + mult = 1000LL * 1000LL; | ||
408 | + break; | ||
409 | + case 'g': | ||
410 | + mult = 1000LL * 1000LL * 1000LL; | ||
411 | + break; | ||
412 | + case 't': | ||
413 | + mult = 1000LL * 1000LL * 1000LL * 1000LL; | ||
414 | + break; | ||
415 | + default: | ||
416 | + goto invalid; | ||
417 | + } | ||
418 | + ++end; | ||
419 | + check_tail: | ||
420 | + while (isspace ((unsigned char) *end)) | ||
421 | + ++end; | ||
422 | + if (*end != '\0') | ||
423 | + goto invalid; | ||
424 | + } | ||
425 | + | ||
426 | + if (value > ~0ULL / mult) | ||
427 | + value = ~0ULL; | ||
428 | + else | ||
429 | + value *= mult; | ||
430 | + | ||
431 | + *pvalue = value; | ||
432 | + return true; | ||
433 | + | ||
434 | + invalid: | ||
435 | + gomp_error ("Invalid value for environment variable %s", name); | ||
436 | + return false; | ||
437 | +} | ||
438 | + | ||
439 | /* Parse a boolean value for environment variable NAME and store the | ||
440 | result in VALUE. */ | ||
441 | |||
442 | @@ -281,10 +359,25 @@ initialize_env (void) | ||
443 | parse_schedule (); | ||
444 | parse_boolean ("OMP_DYNAMIC", &gomp_dyn_var); | ||
445 | parse_boolean ("OMP_NESTED", &gomp_nest_var); | ||
446 | + gomp_init_num_threads (); | ||
447 | + gomp_available_cpus = gomp_nthreads_var; | ||
448 | if (!parse_unsigned_long ("OMP_NUM_THREADS", &gomp_nthreads_var)) | ||
449 | - gomp_init_num_threads (); | ||
450 | + gomp_nthreads_var = gomp_available_cpus; | ||
451 | if (parse_affinity ()) | ||
452 | gomp_init_affinity (); | ||
453 | + if (!parse_spincount ("GOMP_SPINCOUNT", &gomp_spin_count_var)) | ||
454 | + { | ||
455 | + /* Using a rough estimation of 100000 spins per msec, | ||
456 | + use 200 msec blocking. | ||
457 | + Depending on the CPU speed, this can be e.g. 5 times longer | ||
458 | + or 5 times shorter. */ | ||
459 | + gomp_spin_count_var = 20000000LL; | ||
460 | + } | ||
461 | + /* gomp_throttled_spin_count_var is used when there are more libgomp | ||
462 | + managed threads than available CPUs. Use very short spinning. */ | ||
463 | + gomp_throttled_spin_count_var = 100LL; | ||
464 | + if (gomp_throttled_spin_count_var > gomp_spin_count_var) | ||
465 | + gomp_throttled_spin_count_var = gomp_spin_count_var; | ||
466 | |||
467 | /* Not strictly environment related, but ordering constructors is tricky. */ | ||
468 | pthread_attr_init (&gomp_thread_attr); | ||
469 | --- libgomp/libgomp.h.jj 2007-12-07 14:41:01.000000000 +0100 | ||
470 | +++ libgomp/libgomp.h 2008-03-27 12:21:51.000000000 +0100 | ||
471 | @@ -50,6 +50,7 @@ | ||
472 | #include "sem.h" | ||
473 | #include "mutex.h" | ||
474 | #include "bar.h" | ||
475 | +#include "ptrlock.h" | ||
476 | |||
477 | |||
478 | /* This structure contains the data to control one work-sharing construct, | ||
479 | @@ -70,6 +71,8 @@ struct gomp_work_share | ||
480 | If this is a SECTIONS construct, this value will always be DYNAMIC. */ | ||
481 | enum gomp_schedule_type sched; | ||
482 | |||
483 | + int mode; | ||
484 | + | ||
485 | /* This is the chunk_size argument to the SCHEDULE clause. */ | ||
486 | long chunk_size; | ||
487 | |||
488 | @@ -81,17 +84,38 @@ struct gomp_work_share | ||
489 | is always 1. */ | ||
490 | long incr; | ||
491 | |||
492 | - /* This lock protects the update of the following members. */ | ||
493 | - gomp_mutex_t lock; | ||
494 | + /* This is a circular queue that details which threads will be allowed | ||
495 | + into the ordered region and in which order. When a thread allocates | ||
496 | + iterations on which it is going to work, it also registers itself at | ||
497 | + the end of the array. When a thread reaches the ordered region, it | ||
498 | + checks to see if it is the one at the head of the queue. If not, it | ||
499 | + blocks on its RELEASE semaphore. */ | ||
500 | + unsigned *ordered_team_ids; | ||
501 | |||
502 | - union { | ||
503 | - /* This is the next iteration value to be allocated. In the case of | ||
504 | - GFS_STATIC loops, this the iteration start point and never changes. */ | ||
505 | - long next; | ||
506 | + /* This is the number of threads that have registered themselves in | ||
507 | + the circular queue ordered_team_ids. */ | ||
508 | + unsigned ordered_num_used; | ||
509 | |||
510 | - /* This is the returned data structure for SINGLE COPYPRIVATE. */ | ||
511 | - void *copyprivate; | ||
512 | - }; | ||
513 | + /* This is the team_id of the currently acknowledged owner of the ordered | ||
514 | + section, or -1u if the ordered section has not been acknowledged by | ||
515 | + any thread. This is distinguished from the thread that is *allowed* | ||
516 | + to take the section next. */ | ||
517 | + unsigned ordered_owner; | ||
518 | + | ||
519 | + /* This is the index into the circular queue ordered_team_ids of the | ||
520 | + current thread that's allowed into the ordered reason. */ | ||
521 | + unsigned ordered_cur; | ||
522 | + | ||
523 | + /* This is a chain of allocated gomp_work_share blocks, valid only | ||
524 | + in the first gomp_work_share struct in the block. */ | ||
525 | + struct gomp_work_share *next_alloc; | ||
526 | + | ||
527 | + /* The above fields are written once during workshare initialization, | ||
528 | + or related to ordered worksharing. Make sure the following fields | ||
529 | + are in a different cache line. */ | ||
530 | + | ||
531 | + /* This lock protects the update of the following members. */ | ||
532 | + gomp_mutex_t lock __attribute__((aligned (64))); | ||
533 | |||
534 | /* This is the count of the number of threads that have exited the work | ||
535 | share construct. If the construct was marked nowait, they have moved on | ||
536 | @@ -99,27 +123,28 @@ struct gomp_work_share | ||
537 | of the team to exit the work share construct must deallocate it. */ | ||
538 | unsigned threads_completed; | ||
539 | |||
540 | - /* This is the index into the circular queue ordered_team_ids of the | ||
541 | - current thread that's allowed into the ordered reason. */ | ||
542 | - unsigned ordered_cur; | ||
543 | + union { | ||
544 | + /* This is the next iteration value to be allocated. In the case of | ||
545 | + GFS_STATIC loops, this the iteration start point and never changes. */ | ||
546 | + long next; | ||
547 | |||
548 | - /* This is the number of threads that have registered themselves in | ||
549 | - the circular queue ordered_team_ids. */ | ||
550 | - unsigned ordered_num_used; | ||
551 | + /* This is the returned data structure for SINGLE COPYPRIVATE. */ | ||
552 | + void *copyprivate; | ||
553 | + }; | ||
554 | |||
555 | - /* This is the team_id of the currently acknoledged owner of the ordered | ||
556 | - section, or -1u if the ordered section has not been acknowledged by | ||
557 | - any thread. This is distinguished from the thread that is *allowed* | ||
558 | - to take the section next. */ | ||
559 | - unsigned ordered_owner; | ||
560 | + union { | ||
561 | + /* Link to gomp_work_share struct for next work sharing construct | ||
562 | + encountered after this one. */ | ||
563 | + gomp_ptrlock_t next_ws; | ||
564 | + | ||
565 | + /* gomp_work_share structs are chained in the free work share cache | ||
566 | + through this. */ | ||
567 | + struct gomp_work_share *next_free; | ||
568 | + }; | ||
569 | |||
570 | - /* This is a circular queue that details which threads will be allowed | ||
571 | - into the ordered region and in which order. When a thread allocates | ||
572 | - iterations on which it is going to work, it also registers itself at | ||
573 | - the end of the array. When a thread reaches the ordered region, it | ||
574 | - checks to see if it is the one at the head of the queue. If not, it | ||
575 | - blocks on its RELEASE semaphore. */ | ||
576 | - unsigned ordered_team_ids[]; | ||
577 | + /* If only few threads are in the team, ordered_team_ids can point | ||
578 | + to this array which fills the padding at the end of this struct. */ | ||
579 | + unsigned inline_ordered_team_ids[0]; | ||
580 | }; | ||
581 | |||
582 | /* This structure contains all of the thread-local data associated with | ||
583 | @@ -133,21 +158,24 @@ struct gomp_team_state | ||
584 | |||
585 | /* This is the work share construct which this thread is currently | ||
586 | processing. Recall that with NOWAIT, not all threads may be | ||
587 | - processing the same construct. This value is NULL when there | ||
588 | - is no construct being processed. */ | ||
589 | + processing the same construct. */ | ||
590 | struct gomp_work_share *work_share; | ||
591 | |||
592 | + /* This is the previous work share construct or NULL if there wasn't any. | ||
593 | + When all threads are done with the current work sharing construct, | ||
594 | + the previous one can be freed. The current one can't, as its | ||
595 | + next_ws field is used. */ | ||
596 | + struct gomp_work_share *last_work_share; | ||
597 | + | ||
598 | /* This is the ID of this thread within the team. This value is | ||
599 | guaranteed to be between 0 and N-1, where N is the number of | ||
600 | threads in the team. */ | ||
601 | unsigned team_id; | ||
602 | |||
603 | - /* The work share "generation" is a number that increases by one for | ||
604 | - each work share construct encountered in the dynamic flow of the | ||
605 | - program. It is used to find the control data for the work share | ||
606 | - when encountering it for the first time. This particular number | ||
607 | - reflects the generation of the work_share member of this struct. */ | ||
608 | - unsigned work_share_generation; | ||
609 | +#ifdef HAVE_SYNC_BUILTINS | ||
610 | + /* Number of single stmts encountered. */ | ||
611 | + unsigned long single_count; | ||
612 | +#endif | ||
613 | |||
614 | /* For GFS_RUNTIME loops that resolved to GFS_STATIC, this is the | ||
615 | trip number through the loop. So first time a particular loop | ||
616 | @@ -163,41 +191,53 @@ struct gomp_team_state | ||
617 | |||
618 | struct gomp_team | ||
619 | { | ||
620 | - /* This lock protects access to the following work shares data structures. */ | ||
621 | - gomp_mutex_t work_share_lock; | ||
622 | - | ||
623 | - /* This is a dynamically sized array containing pointers to the control | ||
624 | - structs for all "live" work share constructs. Here "live" means that | ||
625 | - the construct has been encountered by at least one thread, and not | ||
626 | - completed by all threads. */ | ||
627 | - struct gomp_work_share **work_shares; | ||
628 | - | ||
629 | - /* The work_shares array is indexed by "generation & generation_mask". | ||
630 | - The mask will be 2**N - 1, where 2**N is the size of the array. */ | ||
631 | - unsigned generation_mask; | ||
632 | - | ||
633 | - /* These two values define the bounds of the elements of the work_shares | ||
634 | - array that are currently in use. */ | ||
635 | - unsigned oldest_live_gen; | ||
636 | - unsigned num_live_gen; | ||
637 | - | ||
638 | /* This is the number of threads in the current team. */ | ||
639 | unsigned nthreads; | ||
640 | |||
641 | + /* This is number of gomp_work_share structs that have been allocated | ||
642 | + as a block last time. */ | ||
643 | + unsigned work_share_chunk; | ||
644 | + | ||
645 | /* This is the saved team state that applied to a master thread before | ||
646 | the current thread was created. */ | ||
647 | struct gomp_team_state prev_ts; | ||
648 | |||
649 | - /* This barrier is used for most synchronization of the team. */ | ||
650 | - gomp_barrier_t barrier; | ||
651 | - | ||
652 | /* This semaphore should be used by the master thread instead of its | ||
653 | "native" semaphore in the thread structure. Required for nested | ||
654 | parallels, as the master is a member of two teams. */ | ||
655 | gomp_sem_t master_release; | ||
656 | |||
657 | - /* This array contains pointers to the release semaphore of the threads | ||
658 | - in the team. */ | ||
659 | + /* List of gomp_work_share structs chained through next_free fields. | ||
660 | + This is populated and taken off only by the first thread in the | ||
661 | + team encountering a new work sharing construct, in a critical | ||
662 | + section. */ | ||
663 | + struct gomp_work_share *work_share_list_alloc; | ||
664 | + | ||
665 | + /* List of gomp_work_share structs freed by free_work_share. New | ||
666 | + entries are atomically added to the start of the list, and | ||
667 | + alloc_work_share can safely only move all but the first entry | ||
668 | + to work_share_list alloc, as free_work_share can happen concurrently | ||
669 | + with alloc_work_share. */ | ||
670 | + struct gomp_work_share *work_share_list_free; | ||
671 | + | ||
672 | +#ifdef HAVE_SYNC_BUILTINS | ||
673 | + /* Number of simple single regions encountered by threads in this | ||
674 | + team. */ | ||
675 | + unsigned long single_count; | ||
676 | +#else | ||
677 | + /* Mutex protecting addition of workshares to work_share_list_free. */ | ||
678 | + gomp_mutex_t work_share_list_free_lock; | ||
679 | +#endif | ||
680 | + | ||
681 | + /* This barrier is used for most synchronization of the team. */ | ||
682 | + gomp_barrier_t barrier; | ||
683 | + | ||
684 | + /* Initial work shares, to avoid allocating any gomp_work_share | ||
685 | + structs in the common case. */ | ||
686 | + struct gomp_work_share work_shares[8]; | ||
687 | + | ||
688 | + /* This is an array with pointers to the release semaphore | ||
689 | + of the threads in the team. */ | ||
690 | gomp_sem_t *ordered_release[]; | ||
691 | }; | ||
692 | |||
693 | @@ -242,6 +282,11 @@ extern bool gomp_dyn_var; | ||
694 | extern bool gomp_nest_var; | ||
695 | extern enum gomp_schedule_type gomp_run_sched_var; | ||
696 | extern unsigned long gomp_run_sched_chunk; | ||
697 | +#ifndef HAVE_SYNC_BUILTINS | ||
698 | +extern gomp_mutex_t gomp_remaining_threads_lock; | ||
699 | +#endif | ||
700 | +extern unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var; | ||
701 | +extern unsigned long gomp_available_cpus, gomp_managed_threads; | ||
702 | |||
703 | /* The attributes to be used during thread creation. */ | ||
704 | extern pthread_attr_t gomp_thread_attr; | ||
705 | @@ -306,17 +351,27 @@ extern unsigned gomp_dynamic_max_threads | ||
706 | |||
707 | /* team.c */ | ||
708 | |||
709 | +extern struct gomp_team *gomp_new_team (unsigned); | ||
710 | extern void gomp_team_start (void (*) (void *), void *, unsigned, | ||
711 | - struct gomp_work_share *); | ||
712 | + struct gomp_team *); | ||
713 | extern void gomp_team_end (void); | ||
714 | |||
715 | /* work.c */ | ||
716 | |||
717 | -extern struct gomp_work_share * gomp_new_work_share (bool, unsigned); | ||
718 | +extern void gomp_init_work_share (struct gomp_work_share *, bool, unsigned); | ||
719 | +extern void gomp_fini_work_share (struct gomp_work_share *); | ||
720 | extern bool gomp_work_share_start (bool); | ||
721 | extern void gomp_work_share_end (void); | ||
722 | extern void gomp_work_share_end_nowait (void); | ||
723 | |||
724 | +static inline void | ||
725 | +gomp_work_share_init_done (void) | ||
726 | +{ | ||
727 | + struct gomp_thread *thr = gomp_thread (); | ||
728 | + if (__builtin_expect (thr->ts.last_work_share != NULL, 1)) | ||
729 | + gomp_ptrlock_set (&thr->ts.last_work_share->next_ws, thr->ts.work_share); | ||
730 | +} | ||
731 | + | ||
732 | #ifdef HAVE_ATTRIBUTE_VISIBILITY | ||
733 | # pragma GCC visibility pop | ||
734 | #endif | ||
735 | --- libgomp/iter.c.jj 2008-03-26 14:48:34.000000000 +0100 | ||
736 | +++ libgomp/iter.c 2008-03-26 15:11:23.000000000 +0100 | ||
737 | @@ -1,4 +1,4 @@ | ||
738 | -/* Copyright (C) 2005 Free Software Foundation, Inc. | ||
739 | +/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. | ||
740 | Contributed by Richard Henderson <rth@redhat.com>. | ||
741 | |||
742 | This file is part of the GNU OpenMP Library (libgomp). | ||
743 | @@ -154,7 +154,7 @@ gomp_iter_dynamic_next_locked (long *pst | ||
744 | if (start == ws->end) | ||
745 | return false; | ||
746 | |||
747 | - chunk = ws->chunk_size * ws->incr; | ||
748 | + chunk = ws->chunk_size; | ||
749 | left = ws->end - start; | ||
750 | if (ws->incr < 0) | ||
751 | { | ||
752 | @@ -186,11 +186,38 @@ gomp_iter_dynamic_next (long *pstart, lo | ||
753 | struct gomp_work_share *ws = thr->ts.work_share; | ||
754 | long start, end, nend, chunk, incr; | ||
755 | |||
756 | - start = ws->next; | ||
757 | end = ws->end; | ||
758 | incr = ws->incr; | ||
759 | - chunk = ws->chunk_size * incr; | ||
760 | + chunk = ws->chunk_size; | ||
761 | + | ||
762 | + if (__builtin_expect (ws->mode, 1)) | ||
763 | + { | ||
764 | + long tmp = __sync_fetch_and_add (&ws->next, chunk); | ||
765 | + if (incr > 0) | ||
766 | + { | ||
767 | + if (tmp >= end) | ||
768 | + return false; | ||
769 | + nend = tmp + chunk; | ||
770 | + if (nend > end) | ||
771 | + nend = end; | ||
772 | + *pstart = tmp; | ||
773 | + *pend = nend; | ||
774 | + return true; | ||
775 | + } | ||
776 | + else | ||
777 | + { | ||
778 | + if (tmp <= end) | ||
779 | + return false; | ||
780 | + nend = tmp + chunk; | ||
781 | + if (nend < end) | ||
782 | + nend = end; | ||
783 | + *pstart = tmp; | ||
784 | + *pend = nend; | ||
785 | + return true; | ||
786 | + } | ||
787 | + } | ||
788 | |||
789 | + start = ws->next; | ||
790 | while (1) | ||
791 | { | ||
792 | long left = end - start; | ||
793 | --- libgomp/work.c.jj 2007-12-07 14:41:01.000000000 +0100 | ||
794 | +++ libgomp/work.c 2008-03-27 12:21:51.000000000 +0100 | ||
795 | @@ -1,4 +1,4 @@ | ||
796 | -/* Copyright (C) 2005 Free Software Foundation, Inc. | ||
797 | +/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. | ||
798 | Contributed by Richard Henderson <rth@redhat.com>. | ||
799 | |||
800 | This file is part of the GNU OpenMP Library (libgomp). | ||
801 | @@ -29,39 +29,138 @@ | ||
802 | of threads. */ | ||
803 | |||
804 | #include "libgomp.h" | ||
805 | +#include <stddef.h> | ||
806 | #include <stdlib.h> | ||
807 | #include <string.h> | ||
808 | |||
809 | |||
810 | -/* Create a new work share structure. */ | ||
811 | +/* Allocate a new work share structure, preferably from current team's | ||
812 | + free gomp_work_share cache. */ | ||
813 | |||
814 | -struct gomp_work_share * | ||
815 | -gomp_new_work_share (bool ordered, unsigned nthreads) | ||
816 | +static struct gomp_work_share * | ||
817 | +alloc_work_share (struct gomp_team *team) | ||
818 | { | ||
819 | struct gomp_work_share *ws; | ||
820 | - size_t size; | ||
821 | + unsigned int i; | ||
822 | |||
823 | - size = sizeof (*ws); | ||
824 | - if (ordered) | ||
825 | - size += nthreads * sizeof (ws->ordered_team_ids[0]); | ||
826 | + /* This is called in a critical section. */ | ||
827 | + if (team->work_share_list_alloc != NULL) | ||
828 | + { | ||
829 | + ws = team->work_share_list_alloc; | ||
830 | + team->work_share_list_alloc = ws->next_free; | ||
831 | + return ws; | ||
832 | + } | ||
833 | |||
834 | - ws = gomp_malloc_cleared (size); | ||
835 | - gomp_mutex_init (&ws->lock); | ||
836 | - ws->ordered_owner = -1; | ||
837 | +#ifdef HAVE_SYNC_BUILTINS | ||
838 | + ws = team->work_share_list_free; | ||
839 | + /* We need atomic read from work_share_list_free, | ||
840 | + as free_work_share can be called concurrently. */ | ||
841 | + __asm ("" : "+r" (ws)); | ||
842 | + | ||
843 | + if (ws && ws->next_free) | ||
844 | + { | ||
845 | + struct gomp_work_share *next = ws->next_free; | ||
846 | + ws->next_free = NULL; | ||
847 | + team->work_share_list_alloc = next->next_free; | ||
848 | + return next; | ||
849 | + } | ||
850 | +#else | ||
851 | + gomp_mutex_lock (&team->work_share_list_free_lock); | ||
852 | + ws = team->work_share_list_free; | ||
853 | + if (ws) | ||
854 | + { | ||
855 | + team->work_share_list_alloc = ws->next_free; | ||
856 | + team->work_share_list_free = NULL; | ||
857 | + gomp_mutex_unlock (&team->work_share_list_free_lock); | ||
858 | + return ws; | ||
859 | + } | ||
860 | + gomp_mutex_unlock (&team->work_share_list_free_lock); | ||
861 | +#endif | ||
862 | |||
863 | + team->work_share_chunk *= 2; | ||
864 | + ws = gomp_malloc (team->work_share_chunk * sizeof (struct gomp_work_share)); | ||
865 | + ws->next_alloc = team->work_shares[0].next_alloc; | ||
866 | + team->work_shares[0].next_alloc = ws; | ||
867 | + team->work_share_list_alloc = &ws[1]; | ||
868 | + for (i = 1; i < team->work_share_chunk - 1; i++) | ||
869 | + ws[i].next_free = &ws[i + 1]; | ||
870 | + ws[i].next_free = NULL; | ||
871 | return ws; | ||
872 | } | ||
873 | |||
874 | +/* Initialize an already allocated struct gomp_work_share. | ||
875 | + This shouldn't touch the next_alloc field. */ | ||
876 | + | ||
877 | +void | ||
878 | +gomp_init_work_share (struct gomp_work_share *ws, bool ordered, | ||
879 | + unsigned nthreads) | ||
880 | +{ | ||
881 | + gomp_mutex_init (&ws->lock); | ||
882 | + if (__builtin_expect (ordered, 0)) | ||
883 | + { | ||
884 | +#define INLINE_ORDERED_TEAM_IDS_CNT \ | ||
885 | + ((sizeof (struct gomp_work_share) \ | ||
886 | + - offsetof (struct gomp_work_share, inline_ordered_team_ids)) \ | ||
887 | + / sizeof (((struct gomp_work_share *) 0)->inline_ordered_team_ids[0])) | ||
888 | + | ||
889 | + if (nthreads > INLINE_ORDERED_TEAM_IDS_CNT) | ||
890 | + ws->ordered_team_ids | ||
891 | + = gomp_malloc (nthreads * sizeof (*ws->ordered_team_ids)); | ||
892 | + else | ||
893 | + ws->ordered_team_ids = ws->inline_ordered_team_ids; | ||
894 | + memset (ws->ordered_team_ids, '\0', | ||
895 | + nthreads * sizeof (*ws->ordered_team_ids)); | ||
896 | + ws->ordered_num_used = 0; | ||
897 | + ws->ordered_owner = -1; | ||
898 | + ws->ordered_cur = 0; | ||
899 | + } | ||
900 | + else | ||
901 | + ws->ordered_team_ids = NULL; | ||
902 | + gomp_ptrlock_init (&ws->next_ws, NULL); | ||
903 | + ws->threads_completed = 0; | ||
904 | +} | ||
905 | |||
906 | -/* Free a work share structure. */ | ||
907 | +/* Do any needed destruction of gomp_work_share fields before it | ||
908 | + is put back into free gomp_work_share cache or freed. */ | ||
909 | |||
910 | -static void | ||
911 | -free_work_share (struct gomp_work_share *ws) | ||
912 | +void | ||
913 | +gomp_fini_work_share (struct gomp_work_share *ws) | ||
914 | { | ||
915 | gomp_mutex_destroy (&ws->lock); | ||
916 | - free (ws); | ||
917 | + if (ws->ordered_team_ids != ws->inline_ordered_team_ids) | ||
918 | + free (ws->ordered_team_ids); | ||
919 | + gomp_ptrlock_destroy (&ws->next_ws); | ||
920 | } | ||
921 | |||
922 | +/* Free a work share struct, if not orphaned, put it into current | ||
923 | + team's free gomp_work_share cache. */ | ||
924 | + | ||
925 | +static inline void | ||
926 | +free_work_share (struct gomp_team *team, struct gomp_work_share *ws) | ||
927 | +{ | ||
928 | + gomp_fini_work_share (ws); | ||
929 | + if (__builtin_expect (team == NULL, 0)) | ||
930 | + free (ws); | ||
931 | + else | ||
932 | + { | ||
933 | + struct gomp_work_share *next_ws; | ||
934 | +#ifdef HAVE_SYNC_BUILTINS | ||
935 | + do | ||
936 | + { | ||
937 | + next_ws = team->work_share_list_free; | ||
938 | + ws->next_free = next_ws; | ||
939 | + } | ||
940 | + while (!__sync_bool_compare_and_swap (&team->work_share_list_free, | ||
941 | + next_ws, ws)); | ||
942 | +#else | ||
943 | + gomp_mutex_lock (&team->work_share_list_free_lock); | ||
944 | + next_ws = team->work_share_list_free; | ||
945 | + ws->next_free = next_ws; | ||
946 | + team->work_share_list_free = ws; | ||
947 | + gomp_mutex_unlock (&team->work_share_list_free_lock); | ||
948 | +#endif | ||
949 | + } | ||
950 | +} | ||
951 | |||
952 | /* The current thread is ready to begin the next work sharing construct. | ||
953 | In all cases, thr->ts.work_share is updated to point to the new | ||
954 | @@ -74,71 +173,34 @@ gomp_work_share_start (bool ordered) | ||
955 | struct gomp_thread *thr = gomp_thread (); | ||
956 | struct gomp_team *team = thr->ts.team; | ||
957 | struct gomp_work_share *ws; | ||
958 | - unsigned ws_index, ws_gen; | ||
959 | |||
960 | /* Work sharing constructs can be orphaned. */ | ||
961 | if (team == NULL) | ||
962 | { | ||
963 | - ws = gomp_new_work_share (ordered, 1); | ||
964 | + ws = gomp_malloc (sizeof (*ws)); | ||
965 | + gomp_init_work_share (ws, ordered, 1); | ||
966 | thr->ts.work_share = ws; | ||
967 | - thr->ts.static_trip = 0; | ||
968 | - gomp_mutex_lock (&ws->lock); | ||
969 | - return true; | ||
970 | + return ws; | ||
971 | } | ||
972 | |||
973 | - gomp_mutex_lock (&team->work_share_lock); | ||
974 | - | ||
975 | - /* This thread is beginning its next generation. */ | ||
976 | - ws_gen = ++thr->ts.work_share_generation; | ||
977 | - | ||
978 | - /* If this next generation is not newer than any other generation in | ||
979 | - the team, then simply reference the existing construct. */ | ||
980 | - if (ws_gen - team->oldest_live_gen < team->num_live_gen) | ||
981 | + ws = thr->ts.work_share; | ||
982 | + thr->ts.last_work_share = ws; | ||
983 | + ws = gomp_ptrlock_get (&ws->next_ws); | ||
984 | + if (ws == NULL) | ||
985 | { | ||
986 | - ws_index = ws_gen & team->generation_mask; | ||
987 | - ws = team->work_shares[ws_index]; | ||
988 | + /* This thread encountered a new ws first. */ | ||
989 | + struct gomp_work_share *ws = alloc_work_share (team); | ||
990 | + gomp_init_work_share (ws, ordered, team->nthreads); | ||
991 | thr->ts.work_share = ws; | ||
992 | - thr->ts.static_trip = 0; | ||
993 | - | ||
994 | - gomp_mutex_lock (&ws->lock); | ||
995 | - gomp_mutex_unlock (&team->work_share_lock); | ||
996 | - | ||
997 | - return false; | ||
998 | + return true; | ||
999 | } | ||
1000 | - | ||
1001 | - /* Resize the work shares queue if we've run out of space. */ | ||
1002 | - if (team->num_live_gen++ == team->generation_mask) | ||
1003 | + else | ||
1004 | { | ||
1005 | - team->work_shares = gomp_realloc (team->work_shares, | ||
1006 | - 2 * team->num_live_gen | ||
1007 | - * sizeof (*team->work_shares)); | ||
1008 | - | ||
1009 | - /* Unless oldest_live_gen is zero, the sequence of live elements | ||
1010 | - wraps around the end of the array. If we do nothing, we break | ||
1011 | - lookup of the existing elements. Fix that by unwrapping the | ||
1012 | - data from the front to the end. */ | ||
1013 | - if (team->oldest_live_gen > 0) | ||
1014 | - memcpy (team->work_shares + team->num_live_gen, | ||
1015 | - team->work_shares, | ||
1016 | - (team->oldest_live_gen & team->generation_mask) | ||
1017 | - * sizeof (*team->work_shares)); | ||
1018 | - | ||
1019 | - team->generation_mask = team->generation_mask * 2 + 1; | ||
1020 | - } | ||
1021 | - | ||
1022 | - ws_index = ws_gen & team->generation_mask; | ||
1023 | - ws = gomp_new_work_share (ordered, team->nthreads); | ||
1024 | - thr->ts.work_share = ws; | ||
1025 | - thr->ts.static_trip = 0; | ||
1026 | - team->work_shares[ws_index] = ws; | ||
1027 | - | ||
1028 | - gomp_mutex_lock (&ws->lock); | ||
1029 | - gomp_mutex_unlock (&team->work_share_lock); | ||
1030 | - | ||
1031 | - return true; | ||
1032 | + thr->ts.work_share = ws; | ||
1033 | + return false; | ||
1034 | + } | ||
1035 | } | ||
1036 | |||
1037 | - | ||
1038 | /* The current thread is done with its current work sharing construct. | ||
1039 | This version does imply a barrier at the end of the work-share. */ | ||
1040 | |||
1041 | @@ -147,36 +209,28 @@ gomp_work_share_end (void) | ||
1042 | { | ||
1043 | struct gomp_thread *thr = gomp_thread (); | ||
1044 | struct gomp_team *team = thr->ts.team; | ||
1045 | - struct gomp_work_share *ws = thr->ts.work_share; | ||
1046 | - bool last; | ||
1047 | - | ||
1048 | - thr->ts.work_share = NULL; | ||
1049 | + gomp_barrier_state_t bstate; | ||
1050 | |||
1051 | /* Work sharing constructs can be orphaned. */ | ||
1052 | if (team == NULL) | ||
1053 | { | ||
1054 | - free_work_share (ws); | ||
1055 | + free_work_share (NULL, thr->ts.work_share); | ||
1056 | + thr->ts.work_share = NULL; | ||
1057 | return; | ||
1058 | } | ||
1059 | |||
1060 | - last = gomp_barrier_wait_start (&team->barrier); | ||
1061 | + bstate = gomp_barrier_wait_start (&team->barrier); | ||
1062 | |||
1063 | - if (last) | ||
1064 | + if (gomp_barrier_last_thread (bstate)) | ||
1065 | { | ||
1066 | - unsigned ws_index; | ||
1067 | - | ||
1068 | - ws_index = thr->ts.work_share_generation & team->generation_mask; | ||
1069 | - team->work_shares[ws_index] = NULL; | ||
1070 | - team->oldest_live_gen++; | ||
1071 | - team->num_live_gen = 0; | ||
1072 | - | ||
1073 | - free_work_share (ws); | ||
1074 | + if (__builtin_expect (thr->ts.last_work_share != NULL, 1)) | ||
1075 | + free_work_share (team, thr->ts.last_work_share); | ||
1076 | } | ||
1077 | |||
1078 | - gomp_barrier_wait_end (&team->barrier, last); | ||
1079 | + gomp_barrier_wait_end (&team->barrier, bstate); | ||
1080 | + thr->ts.last_work_share = NULL; | ||
1081 | } | ||
1082 | |||
1083 | - | ||
1084 | /* The current thread is done with its current work sharing construct. | ||
1085 | This version does NOT imply a barrier at the end of the work-share. */ | ||
1086 | |||
1087 | @@ -188,15 +242,17 @@ gomp_work_share_end_nowait (void) | ||
1088 | struct gomp_work_share *ws = thr->ts.work_share; | ||
1089 | unsigned completed; | ||
1090 | |||
1091 | - thr->ts.work_share = NULL; | ||
1092 | - | ||
1093 | /* Work sharing constructs can be orphaned. */ | ||
1094 | if (team == NULL) | ||
1095 | { | ||
1096 | - free_work_share (ws); | ||
1097 | + free_work_share (NULL, ws); | ||
1098 | + thr->ts.work_share = NULL; | ||
1099 | return; | ||
1100 | } | ||
1101 | |||
1102 | + if (__builtin_expect (thr->ts.last_work_share == NULL, 0)) | ||
1103 | + return; | ||
1104 | + | ||
1105 | #ifdef HAVE_SYNC_BUILTINS | ||
1106 | completed = __sync_add_and_fetch (&ws->threads_completed, 1); | ||
1107 | #else | ||
1108 | @@ -206,18 +262,6 @@ gomp_work_share_end_nowait (void) | ||
1109 | #endif | ||
1110 | |||
1111 | if (completed == team->nthreads) | ||
1112 | - { | ||
1113 | - unsigned ws_index; | ||
1114 | - | ||
1115 | - gomp_mutex_lock (&team->work_share_lock); | ||
1116 | - | ||
1117 | - ws_index = thr->ts.work_share_generation & team->generation_mask; | ||
1118 | - team->work_shares[ws_index] = NULL; | ||
1119 | - team->oldest_live_gen++; | ||
1120 | - team->num_live_gen--; | ||
1121 | - | ||
1122 | - gomp_mutex_unlock (&team->work_share_lock); | ||
1123 | - | ||
1124 | - free_work_share (ws); | ||
1125 | - } | ||
1126 | + free_work_share (team, thr->ts.last_work_share); | ||
1127 | + thr->ts.last_work_share = NULL; | ||
1128 | } | ||
1129 | --- libgomp/single.c.jj 2007-12-07 14:41:01.000000000 +0100 | ||
1130 | +++ libgomp/single.c 2008-03-26 15:11:32.000000000 +0100 | ||
1131 | @@ -1,4 +1,4 @@ | ||
1132 | -/* Copyright (C) 2005 Free Software Foundation, Inc. | ||
1133 | +/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. | ||
1134 | Contributed by Richard Henderson <rth@redhat.com>. | ||
1135 | |||
1136 | This file is part of the GNU OpenMP Library (libgomp). | ||
1137 | @@ -37,10 +37,24 @@ | ||
1138 | bool | ||
1139 | GOMP_single_start (void) | ||
1140 | { | ||
1141 | +#ifdef HAVE_SYNC_BUILTINS | ||
1142 | + struct gomp_thread *thr = gomp_thread (); | ||
1143 | + struct gomp_team *team = thr->ts.team; | ||
1144 | + unsigned long single_count; | ||
1145 | + | ||
1146 | + if (__builtin_expect (team == NULL, 0)) | ||
1147 | + return true; | ||
1148 | + | ||
1149 | + single_count = thr->ts.single_count++; | ||
1150 | + return __sync_bool_compare_and_swap (&team->single_count, single_count, | ||
1151 | + single_count + 1L); | ||
1152 | +#else | ||
1153 | bool ret = gomp_work_share_start (false); | ||
1154 | - gomp_mutex_unlock (&gomp_thread ()->ts.work_share->lock); | ||
1155 | + if (ret) | ||
1156 | + gomp_work_share_init_done (); | ||
1157 | gomp_work_share_end_nowait (); | ||
1158 | return ret; | ||
1159 | +#endif | ||
1160 | } | ||
1161 | |||
1162 | /* This routine is called when first encountering a SINGLE construct that | ||
1163 | @@ -57,10 +71,12 @@ GOMP_single_copy_start (void) | ||
1164 | void *ret; | ||
1165 | |||
1166 | first = gomp_work_share_start (false); | ||
1167 | - gomp_mutex_unlock (&thr->ts.work_share->lock); | ||
1168 | |||
1169 | if (first) | ||
1170 | - ret = NULL; | ||
1171 | + { | ||
1172 | + gomp_work_share_init_done (); | ||
1173 | + ret = NULL; | ||
1174 | + } | ||
1175 | else | ||
1176 | { | ||
1177 | gomp_barrier_wait (&thr->ts.team->barrier); | ||
1178 | --- libgomp/loop.c.jj 2007-12-07 14:41:01.000000000 +0100 | ||
1179 | +++ libgomp/loop.c 2008-03-26 18:47:04.000000000 +0100 | ||
1180 | @@ -27,8 +27,9 @@ | ||
1181 | |||
1182 | /* This file handles the LOOP (FOR/DO) construct. */ | ||
1183 | |||
1184 | -#include "libgomp.h" | ||
1185 | +#include <limits.h> | ||
1186 | #include <stdlib.h> | ||
1187 | +#include "libgomp.h" | ||
1188 | |||
1189 | |||
1190 | /* Initialize the given work share construct from the given arguments. */ | ||
1191 | @@ -44,6 +45,39 @@ gomp_loop_init (struct gomp_work_share * | ||
1192 | ? start : end; | ||
1193 | ws->incr = incr; | ||
1194 | ws->next = start; | ||
1195 | + if (sched == GFS_DYNAMIC) | ||
1196 | + { | ||
1197 | + ws->chunk_size *= incr; | ||
1198 | + | ||
1199 | +#ifdef HAVE_SYNC_BUILTINS | ||
1200 | + { | ||
1201 | + /* For dynamic scheduling prepare things to make each iteration | ||
1202 | + faster. */ | ||
1203 | + struct gomp_thread *thr = gomp_thread (); | ||
1204 | + struct gomp_team *team = thr->ts.team; | ||
1205 | + long nthreads = team ? team->nthreads : 1; | ||
1206 | + | ||
1207 | + if (__builtin_expect (incr > 0, 1)) | ||
1208 | + { | ||
1209 | + /* Cheap overflow protection. */ | ||
1210 | + if (__builtin_expect ((nthreads | ws->chunk_size) | ||
1211 | + >= 1UL << (sizeof (long) | ||
1212 | + * __CHAR_BIT__ / 2 - 1), 0)) | ||
1213 | + ws->mode = 0; | ||
1214 | + else | ||
1215 | + ws->mode = ws->end < (LONG_MAX | ||
1216 | + - (nthreads + 1) * ws->chunk_size); | ||
1217 | + } | ||
1218 | + /* Cheap overflow protection. */ | ||
1219 | + else if (__builtin_expect ((nthreads | -ws->chunk_size) | ||
1220 | + >= 1UL << (sizeof (long) | ||
1221 | + * __CHAR_BIT__ / 2 - 1), 0)) | ||
1222 | + ws->mode = 0; | ||
1223 | + else | ||
1224 | + ws->mode = ws->end > (nthreads + 1) * -ws->chunk_size - LONG_MAX; | ||
1225 | + } | ||
1226 | +#endif | ||
1227 | + } | ||
1228 | } | ||
1229 | |||
1230 | /* The *_start routines are called when first encountering a loop construct | ||
1231 | @@ -68,10 +102,13 @@ gomp_loop_static_start (long start, long | ||
1232 | { | ||
1233 | struct gomp_thread *thr = gomp_thread (); | ||
1234 | |||
1235 | + thr->ts.static_trip = 0; | ||
1236 | if (gomp_work_share_start (false)) | ||
1237 | - gomp_loop_init (thr->ts.work_share, start, end, incr, | ||
1238 | - GFS_STATIC, chunk_size); | ||
1239 | - gomp_mutex_unlock (&thr->ts.work_share->lock); | ||
1240 | + { | ||
1241 | + gomp_loop_init (thr->ts.work_share, start, end, incr, | ||
1242 | + GFS_STATIC, chunk_size); | ||
1243 | + gomp_work_share_init_done (); | ||
1244 | + } | ||
1245 | |||
1246 | return !gomp_iter_static_next (istart, iend); | ||
1247 | } | ||
1248 | @@ -84,13 +121,16 @@ gomp_loop_dynamic_start (long start, lon | ||
1249 | bool ret; | ||
1250 | |||
1251 | if (gomp_work_share_start (false)) | ||
1252 | - gomp_loop_init (thr->ts.work_share, start, end, incr, | ||
1253 | - GFS_DYNAMIC, chunk_size); | ||
1254 | + { | ||
1255 | + gomp_loop_init (thr->ts.work_share, start, end, incr, | ||
1256 | + GFS_DYNAMIC, chunk_size); | ||
1257 | + gomp_work_share_init_done (); | ||
1258 | + } | ||
1259 | |||
1260 | #ifdef HAVE_SYNC_BUILTINS | ||
1261 | - gomp_mutex_unlock (&thr->ts.work_share->lock); | ||
1262 | ret = gomp_iter_dynamic_next (istart, iend); | ||
1263 | #else | ||
1264 | + gomp_mutex_lock (&thr->ts.work_share->lock); | ||
1265 | ret = gomp_iter_dynamic_next_locked (istart, iend); | ||
1266 | gomp_mutex_unlock (&thr->ts.work_share->lock); | ||
1267 | #endif | ||
1268 | @@ -106,13 +146,16 @@ gomp_loop_guided_start (long start, long | ||
1269 | bool ret; | ||
1270 | |||
1271 | if (gomp_work_share_start (false)) | ||
1272 | - gomp_loop_init (thr->ts.work_share, start, end, incr, | ||
1273 | - GFS_GUIDED, chunk_size); | ||
1274 | + { | ||
1275 | + gomp_loop_init (thr->ts.work_share, start, end, incr, | ||
1276 | + GFS_GUIDED, chunk_size); | ||
1277 | + gomp_work_share_init_done (); | ||
1278 | + } | ||
1279 | |||
1280 | #ifdef HAVE_SYNC_BUILTINS | ||
1281 | - gomp_mutex_unlock (&thr->ts.work_share->lock); | ||
1282 | ret = gomp_iter_guided_next (istart, iend); | ||
1283 | #else | ||
1284 | + gomp_mutex_lock (&thr->ts.work_share->lock); | ||
1285 | ret = gomp_iter_guided_next_locked (istart, iend); | ||
1286 | gomp_mutex_unlock (&thr->ts.work_share->lock); | ||
1287 | #endif | ||
1288 | @@ -149,13 +192,14 @@ gomp_loop_ordered_static_start (long sta | ||
1289 | { | ||
1290 | struct gomp_thread *thr = gomp_thread (); | ||
1291 | |||
1292 | + thr->ts.static_trip = 0; | ||
1293 | if (gomp_work_share_start (true)) | ||
1294 | { | ||
1295 | gomp_loop_init (thr->ts.work_share, start, end, incr, | ||
1296 | GFS_STATIC, chunk_size); | ||
1297 | gomp_ordered_static_init (); | ||
1298 | + gomp_work_share_init_done (); | ||
1299 | } | ||
1300 | - gomp_mutex_unlock (&thr->ts.work_share->lock); | ||
1301 | |||
1302 | return !gomp_iter_static_next (istart, iend); | ||
1303 | } | ||
1304 | @@ -168,8 +212,14 @@ gomp_loop_ordered_dynamic_start (long st | ||
1305 | bool ret; | ||
1306 | |||
1307 | if (gomp_work_share_start (true)) | ||
1308 | - gomp_loop_init (thr->ts.work_share, start, end, incr, | ||
1309 | - GFS_DYNAMIC, chunk_size); | ||
1310 | + { | ||
1311 | + gomp_loop_init (thr->ts.work_share, start, end, incr, | ||
1312 | + GFS_DYNAMIC, chunk_size); | ||
1313 | + gomp_mutex_lock (&thr->ts.work_share->lock); | ||
1314 | + gomp_work_share_init_done (); | ||
1315 | + } | ||
1316 | + else | ||
1317 | + gomp_mutex_lock (&thr->ts.work_share->lock); | ||
1318 | |||
1319 | ret = gomp_iter_dynamic_next_locked (istart, iend); | ||
1320 | if (ret) | ||
1321 | @@ -187,8 +237,14 @@ gomp_loop_ordered_guided_start (long sta | ||
1322 | bool ret; | ||
1323 | |||
1324 | if (gomp_work_share_start (true)) | ||
1325 | - gomp_loop_init (thr->ts.work_share, start, end, incr, | ||
1326 | - GFS_GUIDED, chunk_size); | ||
1327 | + { | ||
1328 | + gomp_loop_init (thr->ts.work_share, start, end, incr, | ||
1329 | + GFS_GUIDED, chunk_size); | ||
1330 | + gomp_mutex_lock (&thr->ts.work_share->lock); | ||
1331 | + gomp_work_share_init_done (); | ||
1332 | + } | ||
1333 | + else | ||
1334 | + gomp_mutex_lock (&thr->ts.work_share->lock); | ||
1335 | |||
1336 | ret = gomp_iter_guided_next_locked (istart, iend); | ||
1337 | if (ret) | ||
1338 | @@ -375,12 +431,12 @@ gomp_parallel_loop_start (void (*fn) (vo | ||
1339 | long incr, enum gomp_schedule_type sched, | ||
1340 | long chunk_size) | ||
1341 | { | ||
1342 | - struct gomp_work_share *ws; | ||
1343 | + struct gomp_team *team; | ||
1344 | |||
1345 | num_threads = gomp_resolve_num_threads (num_threads); | ||
1346 | - ws = gomp_new_work_share (false, num_threads); | ||
1347 | - gomp_loop_init (ws, start, end, incr, sched, chunk_size); | ||
1348 | - gomp_team_start (fn, data, num_threads, ws); | ||
1349 | + team = gomp_new_team (num_threads); | ||
1350 | + gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size); | ||
1351 | + gomp_team_start (fn, data, num_threads, team); | ||
1352 | } | ||
1353 | |||
1354 | void | ||
1355 | --- libgomp/Makefile.in.jj 2008-01-10 20:53:47.000000000 +0100 | ||
1356 | +++ libgomp/Makefile.in 2008-03-26 18:51:01.000000000 +0100 | ||
1357 | @@ -83,7 +83,7 @@ libgomp_la_LIBADD = | ||
1358 | am_libgomp_la_OBJECTS = alloc.lo barrier.lo critical.lo env.lo \ | ||
1359 | error.lo iter.lo loop.lo ordered.lo parallel.lo sections.lo \ | ||
1360 | single.lo team.lo work.lo lock.lo mutex.lo proc.lo sem.lo \ | ||
1361 | - bar.lo time.lo fortran.lo affinity.lo | ||
1362 | + bar.lo ptrlock.lo time.lo fortran.lo affinity.lo | ||
1363 | libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS) | ||
1364 | DEFAULT_INCLUDES = -I. -I$(srcdir) -I. | ||
1365 | depcomp = $(SHELL) $(top_srcdir)/../depcomp | ||
1366 | @@ -292,7 +292,7 @@ libgomp_version_info = -version-info $(l | ||
1367 | libgomp_la_LDFLAGS = $(libgomp_version_info) $(libgomp_version_script) | ||
1368 | libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \ | ||
1369 | loop.c ordered.c parallel.c sections.c single.c team.c work.c \ | ||
1370 | - lock.c mutex.c proc.c sem.c bar.c time.c fortran.c affinity.c | ||
1371 | + lock.c mutex.c proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c | ||
1372 | |||
1373 | nodist_noinst_HEADERS = libgomp_f.h | ||
1374 | nodist_libsubinclude_HEADERS = omp.h | ||
1375 | @@ -434,6 +434,7 @@ distclean-compile: | ||
1376 | @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@ | ||
1377 | @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@ | ||
1378 | @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc.Plo@am__quote@ | ||
1379 | +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ptrlock.Plo@am__quote@ | ||
1380 | @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sections.Plo@am__quote@ | ||
1381 | @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@ | ||
1382 | @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@ | ||
1383 | --- libgomp/testsuite/libgomp.c/loop-4.c.jj 2008-03-26 18:47:04.000000000 +0100 | ||
1384 | +++ libgomp/testsuite/libgomp.c/loop-4.c 2008-03-26 18:47:04.000000000 +0100 | ||
1385 | @@ -0,0 +1,28 @@ | ||
1386 | +/* { dg-do run } */ | ||
1387 | + | ||
1388 | +extern void abort (void); | ||
1389 | + | ||
1390 | +int | ||
1391 | +main (void) | ||
1392 | +{ | ||
1393 | + int e = 0; | ||
1394 | +#pragma omp parallel num_threads (4) reduction(+:e) | ||
1395 | + { | ||
1396 | + long i; | ||
1397 | + #pragma omp for schedule(dynamic,1) | ||
1398 | + for (i = __LONG_MAX__ - 30001; i <= __LONG_MAX__ - 10001; i += 10000) | ||
1399 | + if (i != __LONG_MAX__ - 30001 | ||
1400 | + && i != __LONG_MAX__ - 20001 | ||
1401 | + && i != __LONG_MAX__ - 10001) | ||
1402 | + e = 1; | ||
1403 | + #pragma omp for schedule(dynamic,1) | ||
1404 | + for (i = -__LONG_MAX__ + 30000; i >= -__LONG_MAX__ + 10000; i -= 10000) | ||
1405 | + if (i != -__LONG_MAX__ + 30000 | ||
1406 | + && i != -__LONG_MAX__ + 20000 | ||
1407 | + && i != -__LONG_MAX__ + 10000) | ||
1408 | + e = 1; | ||
1409 | + } | ||
1410 | + if (e) | ||
1411 | + abort (); | ||
1412 | + return 0; | ||
1413 | +} | ||
1414 | --- libgomp/Makefile.am.jj 2007-12-07 14:41:01.000000000 +0100 | ||
1415 | +++ libgomp/Makefile.am 2008-03-26 15:15:19.000000000 +0100 | ||
1416 | @@ -31,7 +31,7 @@ libgomp_la_LDFLAGS = $(libgomp_version_i | ||
1417 | |||
1418 | libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \ | ||
1419 | loop.c ordered.c parallel.c sections.c single.c team.c work.c \ | ||
1420 | - lock.c mutex.c proc.c sem.c bar.c time.c fortran.c affinity.c | ||
1421 | + lock.c mutex.c proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c | ||
1422 | |||
1423 | nodist_noinst_HEADERS = libgomp_f.h | ||
1424 | nodist_libsubinclude_HEADERS = omp.h | ||
1425 | --- libgomp/team.c.jj 2007-12-07 14:41:01.000000000 +0100 | ||
1426 | +++ libgomp/team.c 2008-03-27 12:22:26.000000000 +0100 | ||
1427 | @@ -94,7 +94,7 @@ gomp_thread_start (void *xdata) | ||
1428 | { | ||
1429 | gomp_barrier_wait (&thr->ts.team->barrier); | ||
1430 | local_fn (local_data); | ||
1431 | - gomp_barrier_wait (&thr->ts.team->barrier); | ||
1432 | + gomp_barrier_wait_last (&thr->ts.team->barrier); | ||
1433 | } | ||
1434 | else | ||
1435 | { | ||
1436 | @@ -114,11 +114,10 @@ gomp_thread_start (void *xdata) | ||
1437 | thr->data = NULL; | ||
1438 | thr->ts.team = NULL; | ||
1439 | thr->ts.work_share = NULL; | ||
1440 | + thr->ts.last_work_share = NULL; | ||
1441 | thr->ts.team_id = 0; | ||
1442 | - thr->ts.work_share_generation = 0; | ||
1443 | - thr->ts.static_trip = 0; | ||
1444 | |||
1445 | - gomp_barrier_wait (&team->barrier); | ||
1446 | + gomp_barrier_wait_last (&team->barrier); | ||
1447 | gomp_barrier_wait (&gomp_threads_dock); | ||
1448 | |||
1449 | local_fn = thr->fn; | ||
1450 | @@ -133,21 +132,29 @@ gomp_thread_start (void *xdata) | ||
1451 | |||
1452 | /* Create a new team data structure. */ | ||
1453 | |||
1454 | -static struct gomp_team * | ||
1455 | -new_team (unsigned nthreads, struct gomp_work_share *work_share) | ||
1456 | +struct gomp_team * | ||
1457 | +gomp_new_team (unsigned nthreads) | ||
1458 | { | ||
1459 | struct gomp_team *team; | ||
1460 | size_t size; | ||
1461 | + int i; | ||
1462 | |||
1463 | size = sizeof (*team) + nthreads * sizeof (team->ordered_release[0]); | ||
1464 | team = gomp_malloc (size); | ||
1465 | - gomp_mutex_init (&team->work_share_lock); | ||
1466 | |||
1467 | - team->work_shares = gomp_malloc (4 * sizeof (struct gomp_work_share *)); | ||
1468 | - team->generation_mask = 3; | ||
1469 | - team->oldest_live_gen = work_share == NULL; | ||
1470 | - team->num_live_gen = work_share != NULL; | ||
1471 | - team->work_shares[0] = work_share; | ||
1472 | + team->work_share_chunk = 8; | ||
1473 | +#ifdef HAVE_SYNC_BUILTINS | ||
1474 | + team->single_count = 0; | ||
1475 | +#else | ||
1476 | + gomp_mutex_init (&team->work_share_list_free_lock); | ||
1477 | +#endif | ||
1478 | + gomp_init_work_share (&team->work_shares[0], false, nthreads); | ||
1479 | + team->work_shares[0].next_alloc = NULL; | ||
1480 | + team->work_share_list_free = NULL; | ||
1481 | + team->work_share_list_alloc = &team->work_shares[1]; | ||
1482 | + for (i = 1; i < 7; i++) | ||
1483 | + team->work_shares[i].next_free = &team->work_shares[i + 1]; | ||
1484 | + team->work_shares[i].next_free = NULL; | ||
1485 | |||
1486 | team->nthreads = nthreads; | ||
1487 | gomp_barrier_init (&team->barrier, nthreads); | ||
1488 | @@ -164,10 +171,22 @@ new_team (unsigned nthreads, struct gomp | ||
1489 | static void | ||
1490 | free_team (struct gomp_team *team) | ||
1491 | { | ||
1492 | - free (team->work_shares); | ||
1493 | - gomp_mutex_destroy (&team->work_share_lock); | ||
1494 | + if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0)) | ||
1495 | + { | ||
1496 | + struct gomp_work_share *ws = team->work_shares[0].next_alloc; | ||
1497 | + do | ||
1498 | + { | ||
1499 | + struct gomp_work_share *next_ws = ws->next_alloc; | ||
1500 | + free (ws); | ||
1501 | + ws = next_ws; | ||
1502 | + } | ||
1503 | + while (ws != NULL); | ||
1504 | + } | ||
1505 | gomp_barrier_destroy (&team->barrier); | ||
1506 | gomp_sem_destroy (&team->master_release); | ||
1507 | +#ifndef HAVE_SYNC_BUILTINS | ||
1508 | + gomp_mutex_destroy (&team->work_share_list_free_lock); | ||
1509 | +#endif | ||
1510 | free (team); | ||
1511 | } | ||
1512 | |||
1513 | @@ -176,11 +195,10 @@ free_team (struct gomp_team *team) | ||
1514 | |||
1515 | void | ||
1516 | gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, | ||
1517 | - struct gomp_work_share *work_share) | ||
1518 | + struct gomp_team *team) | ||
1519 | { | ||
1520 | struct gomp_thread_start_data *start_data; | ||
1521 | struct gomp_thread *thr, *nthr; | ||
1522 | - struct gomp_team *team; | ||
1523 | bool nested; | ||
1524 | unsigned i, n, old_threads_used = 0; | ||
1525 | pthread_attr_t thread_attr, *attr; | ||
1526 | @@ -188,17 +206,18 @@ gomp_team_start (void (*fn) (void *), vo | ||
1527 | thr = gomp_thread (); | ||
1528 | nested = thr->ts.team != NULL; | ||
1529 | |||
1530 | - team = new_team (nthreads, work_share); | ||
1531 | - | ||
1532 | /* Always save the previous state, even if this isn't a nested team. | ||
1533 | In particular, we should save any work share state from an outer | ||
1534 | orphaned work share construct. */ | ||
1535 | team->prev_ts = thr->ts; | ||
1536 | |||
1537 | thr->ts.team = team; | ||
1538 | - thr->ts.work_share = work_share; | ||
1539 | thr->ts.team_id = 0; | ||
1540 | - thr->ts.work_share_generation = 0; | ||
1541 | + thr->ts.work_share = &team->work_shares[0]; | ||
1542 | + thr->ts.last_work_share = NULL; | ||
1543 | +#ifdef HAVE_SYNC_BUILTINS | ||
1544 | + thr->ts.single_count = 0; | ||
1545 | +#endif | ||
1546 | thr->ts.static_trip = 0; | ||
1547 | |||
1548 | if (nthreads == 1) | ||
1549 | @@ -241,9 +260,12 @@ gomp_team_start (void (*fn) (void *), vo | ||
1550 | { | ||
1551 | nthr = gomp_threads[i]; | ||
1552 | nthr->ts.team = team; | ||
1553 | - nthr->ts.work_share = work_share; | ||
1554 | + nthr->ts.work_share = &team->work_shares[0]; | ||
1555 | + nthr->ts.last_work_share = NULL; | ||
1556 | nthr->ts.team_id = i; | ||
1557 | - nthr->ts.work_share_generation = 0; | ||
1558 | +#ifdef HAVE_SYNC_BUILTINS | ||
1559 | + nthr->ts.single_count = 0; | ||
1560 | +#endif | ||
1561 | nthr->ts.static_trip = 0; | ||
1562 | nthr->fn = fn; | ||
1563 | nthr->data = data; | ||
1564 | @@ -266,8 +288,24 @@ gomp_team_start (void (*fn) (void *), vo | ||
1565 | } | ||
1566 | } | ||
1567 | |||
1568 | + if (__builtin_expect (nthreads > old_threads_used, 0)) | ||
1569 | + { | ||
1570 | + long diff = (long) nthreads - (long) old_threads_used; | ||
1571 | + | ||
1572 | + if (old_threads_used == 0) | ||
1573 | + --diff; | ||
1574 | + | ||
1575 | +#ifdef HAVE_SYNC_BUILTINS | ||
1576 | + __sync_fetch_and_add (&gomp_managed_threads, diff); | ||
1577 | +#else | ||
1578 | + gomp_mutex_lock (&gomp_remaining_threads_lock); | ||
1579 | + gomp_managed_threads += diff; | ||
1580 | + gomp_mutex_unlock (&gomp_remaining_threads_lock); | ||
1581 | +#endif | ||
1582 | + } | ||
1583 | + | ||
1584 | attr = &gomp_thread_attr; | ||
1585 | - if (gomp_cpu_affinity != NULL) | ||
1586 | + if (__builtin_expect (gomp_cpu_affinity != NULL, 0)) | ||
1587 | { | ||
1588 | size_t stacksize; | ||
1589 | pthread_attr_init (&thread_attr); | ||
1590 | @@ -287,9 +325,12 @@ gomp_team_start (void (*fn) (void *), vo | ||
1591 | int err; | ||
1592 | |||
1593 | start_data->ts.team = team; | ||
1594 | - start_data->ts.work_share = work_share; | ||
1595 | + start_data->ts.work_share = &team->work_shares[0]; | ||
1596 | + start_data->ts.last_work_share = NULL; | ||
1597 | start_data->ts.team_id = i; | ||
1598 | - start_data->ts.work_share_generation = 0; | ||
1599 | +#ifdef HAVE_SYNC_BUILTINS | ||
1600 | + start_data->ts.single_count = 0; | ||
1601 | +#endif | ||
1602 | start_data->ts.static_trip = 0; | ||
1603 | start_data->fn = fn; | ||
1604 | start_data->fn_data = data; | ||
1605 | @@ -303,7 +344,7 @@ gomp_team_start (void (*fn) (void *), vo | ||
1606 | gomp_fatal ("Thread creation failed: %s", strerror (err)); | ||
1607 | } | ||
1608 | |||
1609 | - if (gomp_cpu_affinity != NULL) | ||
1610 | + if (__builtin_expect (gomp_cpu_affinity != NULL, 0)) | ||
1611 | pthread_attr_destroy (&thread_attr); | ||
1612 | |||
1613 | do_release: | ||
1614 | @@ -313,8 +354,20 @@ gomp_team_start (void (*fn) (void *), vo | ||
1615 | that should arrive back at the end of this team. The extra | ||
1616 | threads should be exiting. Note that we arrange for this test | ||
1617 | to never be true for nested teams. */ | ||
1618 | - if (nthreads < old_threads_used) | ||
1619 | - gomp_barrier_reinit (&gomp_threads_dock, nthreads); | ||
1620 | + if (__builtin_expect (nthreads < old_threads_used, 0)) | ||
1621 | + { | ||
1622 | + long diff = (long) nthreads - (long) old_threads_used; | ||
1623 | + | ||
1624 | + gomp_barrier_reinit (&gomp_threads_dock, nthreads); | ||
1625 | + | ||
1626 | +#ifdef HAVE_SYNC_BUILTINS | ||
1627 | + __sync_fetch_and_add (&gomp_managed_threads, diff); | ||
1628 | +#else | ||
1629 | + gomp_mutex_lock (&gomp_remaining_threads_lock); | ||
1630 | + gomp_managed_threads += diff; | ||
1631 | + gomp_mutex_unlock (&gomp_remaining_threads_lock); | ||
1632 | +#endif | ||
1633 | + } | ||
1634 | } | ||
1635 | |||
1636 | |||
1637 | @@ -329,8 +382,21 @@ gomp_team_end (void) | ||
1638 | |||
1639 | gomp_barrier_wait (&team->barrier); | ||
1640 | |||
1641 | + gomp_fini_work_share (thr->ts.work_share); | ||
1642 | + | ||
1643 | thr->ts = team->prev_ts; | ||
1644 | |||
1645 | + if (__builtin_expect (thr->ts.team != NULL, 0)) | ||
1646 | + { | ||
1647 | +#ifdef HAVE_SYNC_BUILTINS | ||
1648 | + __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads); | ||
1649 | +#else | ||
1650 | + gomp_mutex_lock (&gomp_remaining_threads_lock); | ||
1651 | + gomp_managed_threads -= team->nthreads - 1L; | ||
1652 | + gomp_mutex_unlock (&gomp_remaining_threads_lock); | ||
1653 | +#endif | ||
1654 | + } | ||
1655 | + | ||
1656 | free_team (team); | ||
1657 | } | ||
1658 | |||
1659 | --- libgomp/config/posix/bar.h.jj 2007-12-07 14:41:01.000000000 +0100 | ||
1660 | +++ libgomp/config/posix/bar.h 2008-03-26 15:11:32.000000000 +0100 | ||
1661 | @@ -1,4 +1,4 @@ | ||
1662 | -/* Copyright (C) 2005 Free Software Foundation, Inc. | ||
1663 | +/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. | ||
1664 | Contributed by Richard Henderson <rth@redhat.com>. | ||
1665 | |||
1666 | This file is part of the GNU OpenMP Library (libgomp). | ||
1667 | @@ -46,18 +46,32 @@ typedef struct | ||
1668 | unsigned total; | ||
1669 | unsigned arrived; | ||
1670 | } gomp_barrier_t; | ||
1671 | +typedef bool gomp_barrier_state_t; | ||
1672 | |||
1673 | extern void gomp_barrier_init (gomp_barrier_t *, unsigned); | ||
1674 | extern void gomp_barrier_reinit (gomp_barrier_t *, unsigned); | ||
1675 | extern void gomp_barrier_destroy (gomp_barrier_t *); | ||
1676 | |||
1677 | extern void gomp_barrier_wait (gomp_barrier_t *); | ||
1678 | -extern void gomp_barrier_wait_end (gomp_barrier_t *, bool); | ||
1679 | +extern void gomp_barrier_wait_end (gomp_barrier_t *, gomp_barrier_state_t); | ||
1680 | |||
1681 | -static inline bool gomp_barrier_wait_start (gomp_barrier_t *bar) | ||
1682 | +static inline gomp_barrier_state_t | ||
1683 | +gomp_barrier_wait_start (gomp_barrier_t *bar) | ||
1684 | { | ||
1685 | gomp_mutex_lock (&bar->mutex1); | ||
1686 | return ++bar->arrived == bar->total; | ||
1687 | } | ||
1688 | |||
1689 | +static inline bool | ||
1690 | +gomp_barrier_last_thread (gomp_barrier_state_t state) | ||
1691 | +{ | ||
1692 | + return state; | ||
1693 | +} | ||
1694 | + | ||
1695 | +static inline void | ||
1696 | +gomp_barrier_wait_last (gomp_barrier_t *bar) | ||
1697 | +{ | ||
1698 | + gomp_barrier_wait (bar); | ||
1699 | +} | ||
1700 | + | ||
1701 | #endif /* GOMP_BARRIER_H */ | ||
1702 | --- libgomp/config/posix/ptrlock.h.jj 2008-03-26 15:11:32.000000000 +0100 | ||
1703 | +++ libgomp/config/posix/ptrlock.h 2008-03-26 15:11:32.000000000 +0100 | ||
1704 | @@ -0,0 +1,69 @@ | ||
1705 | +/* Copyright (C) 2008 Free Software Foundation, Inc. | ||
1706 | + Contributed by Jakub Jelinek <jakub@redhat.com>. | ||
1707 | + | ||
1708 | + This file is part of the GNU OpenMP Library (libgomp). | ||
1709 | + | ||
1710 | + Libgomp is free software; you can redistribute it and/or modify it | ||
1711 | + under the terms of the GNU Lesser General Public License as published by | ||
1712 | + the Free Software Foundation; either version 2.1 of the License, or | ||
1713 | + (at your option) any later version. | ||
1714 | + | ||
1715 | + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | ||
1716 | + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | ||
1717 | + FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for | ||
1718 | + more details. | ||
1719 | + | ||
1720 | + You should have received a copy of the GNU Lesser General Public License | ||
1721 | + along with libgomp; see the file COPYING.LIB. If not, write to the | ||
1722 | + Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | ||
1723 | + MA 02110-1301, USA. */ | ||
1724 | + | ||
1725 | +/* As a special exception, if you link this library with other files, some | ||
1726 | + of which are compiled with GCC, to produce an executable, this library | ||
1727 | + does not by itself cause the resulting executable to be covered by the | ||
1728 | + GNU General Public License. This exception does not however invalidate | ||
1729 | + any other reasons why the executable file might be covered by the GNU | ||
1730 | + General Public License. */ | ||
1731 | + | ||
1732 | +/* This is a Linux specific implementation of a mutex synchronization | ||
1733 | + mechanism for libgomp. This type is private to the library. This | ||
1734 | + implementation uses atomic instructions and the futex syscall. */ | ||
1735 | + | ||
1736 | +#ifndef GOMP_PTRLOCK_H | ||
1737 | +#define GOMP_PTRLOCK_H 1 | ||
1738 | + | ||
1739 | +typedef struct { void *ptr; gomp_mutex_t lock; } gomp_ptrlock_t; | ||
1740 | + | ||
1741 | +static inline void gomp_ptrlock_init (gomp_ptrlock_t *ptrlock, void *ptr) | ||
1742 | +{ | ||
1743 | + ptrlock->ptr = ptr; | ||
1744 | + gomp_mutex_init (&ptrlock->lock); | ||
1745 | +} | ||
1746 | + | ||
1747 | +static inline void *gomp_ptrlock_get (gomp_ptrlock_t *ptrlock) | ||
1748 | +{ | ||
1749 | + if (ptrlock->ptr != NULL) | ||
1750 | + return ptrlock->ptr; | ||
1751 | + | ||
1752 | + gomp_mutex_lock (&ptrlock->lock); | ||
1753 | + if (ptrlock->ptr != NULL) | ||
1754 | + { | ||
1755 | + gomp_mutex_unlock (&ptrlock->lock); | ||
1756 | + return ptrlock->ptr; | ||
1757 | + } | ||
1758 | + | ||
1759 | + return NULL; | ||
1760 | +} | ||
1761 | + | ||
1762 | +static inline void gomp_ptrlock_set (gomp_ptrlock_t *ptrlock, void *ptr) | ||
1763 | +{ | ||
1764 | + ptrlock->ptr = ptr; | ||
1765 | + gomp_mutex_unlock (&ptrlock->lock); | ||
1766 | +} | ||
1767 | + | ||
1768 | +static inline void gomp_ptrlock_destroy (gomp_ptrlock_t *ptrlock) | ||
1769 | +{ | ||
1770 | + gomp_mutex_destroy (&ptrlock->lock); | ||
1771 | +} | ||
1772 | + | ||
1773 | +#endif /* GOMP_PTRLOCK_H */ | ||
1774 | --- libgomp/config/posix/ptrlock.c.jj 2008-03-26 15:11:32.000000000 +0100 | ||
1775 | +++ libgomp/config/posix/ptrlock.c 2008-03-26 15:11:32.000000000 +0100 | ||
1776 | @@ -0,0 +1 @@ | ||
1777 | +/* Everything is in the header. */ | ||
1778 | --- libgomp/config/posix/bar.c.jj 2007-12-07 14:41:01.000000000 +0100 | ||
1779 | +++ libgomp/config/posix/bar.c 2008-03-26 15:11:32.000000000 +0100 | ||
1780 | @@ -1,4 +1,4 @@ | ||
1781 | -/* Copyright (C) 2005 Free Software Foundation, Inc. | ||
1782 | +/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. | ||
1783 | Contributed by Richard Henderson <rth@redhat.com>. | ||
1784 | |||
1785 | This file is part of the GNU OpenMP Library (libgomp). | ||
1786 | @@ -70,7 +70,7 @@ gomp_barrier_reinit (gomp_barrier_t *bar | ||
1787 | } | ||
1788 | |||
1789 | void | ||
1790 | -gomp_barrier_wait_end (gomp_barrier_t *bar, bool last) | ||
1791 | +gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t last) | ||
1792 | { | ||
1793 | unsigned int n; | ||
1794 | |||
1795 | --- libgomp/config/linux/alpha/futex.h.jj 2007-12-07 14:41:00.000000000 +0100 | ||
1796 | +++ libgomp/config/linux/alpha/futex.h 2008-03-26 15:11:32.000000000 +0100 | ||
1797 | @@ -1,4 +1,4 @@ | ||
1798 | -/* Copyright (C) 2005 Free Software Foundation, Inc. | ||
1799 | +/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. | ||
1800 | Contributed by Richard Henderson <rth@redhat.com>. | ||
1801 | |||
1802 | This file is part of the GNU OpenMP Library (libgomp). | ||
1803 | @@ -30,8 +30,6 @@ | ||
1804 | #ifndef SYS_futex | ||
1805 | #define SYS_futex 394 | ||
1806 | #endif | ||
1807 | -#define FUTEX_WAIT 0 | ||
1808 | -#define FUTEX_WAKE 1 | ||
1809 | |||
1810 | |||
1811 | static inline void | ||
1812 | @@ -45,7 +43,7 @@ futex_wait (int *addr, int val) | ||
1813 | |||
1814 | sc_0 = SYS_futex; | ||
1815 | sc_16 = (long) addr; | ||
1816 | - sc_17 = FUTEX_WAIT; | ||
1817 | + sc_17 = gomp_futex_wait; | ||
1818 | sc_18 = val; | ||
1819 | sc_19 = 0; | ||
1820 | __asm volatile ("callsys" | ||
1821 | @@ -53,6 +51,20 @@ futex_wait (int *addr, int val) | ||
1822 | : "0"(sc_0), "r" (sc_16), "r"(sc_17), "r"(sc_18), "1"(sc_19) | ||
1823 | : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", | ||
1824 | "$22", "$23", "$24", "$25", "$27", "$28", "memory"); | ||
1825 | + if (__builtin_expect (sc_19, 0) && sc_0 == ENOSYS) | ||
1826 | + { | ||
1827 | + gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; | ||
1828 | + gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; | ||
1829 | + sc_0 = SYS_futex; | ||
1830 | + sc_17 &= ~FUTEX_PRIVATE_FLAG; | ||
1831 | + sc_19 = 0; | ||
1832 | + __asm volatile ("callsys" | ||
1833 | + : "=r" (sc_0), "=r"(sc_19) | ||
1834 | + : "0"(sc_0), "r" (sc_16), "r"(sc_17), "r"(sc_18), | ||
1835 | + "1"(sc_19) | ||
1836 | + : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", | ||
1837 | + "$22", "$23", "$24", "$25", "$27", "$28", "memory"); | ||
1838 | + } | ||
1839 | } | ||
1840 | |||
1841 | static inline void | ||
1842 | @@ -66,11 +78,35 @@ futex_wake (int *addr, int count) | ||
1843 | |||
1844 | sc_0 = SYS_futex; | ||
1845 | sc_16 = (long) addr; | ||
1846 | - sc_17 = FUTEX_WAKE; | ||
1847 | + sc_17 = gomp_futex_wake; | ||
1848 | sc_18 = count; | ||
1849 | __asm volatile ("callsys" | ||
1850 | : "=r" (sc_0), "=r"(sc_19) | ||
1851 | : "0"(sc_0), "r" (sc_16), "r"(sc_17), "r"(sc_18) | ||
1852 | : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", | ||
1853 | "$22", "$23", "$24", "$25", "$27", "$28", "memory"); | ||
1854 | + if (__builtin_expect (sc_19, 0) && sc_0 == ENOSYS) | ||
1855 | + { | ||
1856 | + gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; | ||
1857 | + gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; | ||
1858 | + sc_0 = SYS_futex; | ||
1859 | + sc_17 &= ~FUTEX_PRIVATE_FLAG; | ||
1860 | + __asm volatile ("callsys" | ||
1861 | + : "=r" (sc_0), "=r"(sc_19) | ||
1862 | + : "0"(sc_0), "r" (sc_16), "r"(sc_17), "r"(sc_18) | ||
1863 | + : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", | ||
1864 | + "$22", "$23", "$24", "$25", "$27", "$28", "memory"); | ||
1865 | + } | ||
1866 | +} | ||
1867 | + | ||
1868 | +static inline void | ||
1869 | +cpu_relax (void) | ||
1870 | +{ | ||
1871 | + __asm volatile ("" : : : "memory"); | ||
1872 | +} | ||
1873 | + | ||
1874 | +static inline void | ||
1875 | +atomic_write_barrier (void) | ||
1876 | +{ | ||
1877 | + __asm volatile ("wmb" : : : "memory"); | ||
1878 | } | ||
1879 | --- libgomp/config/linux/affinity.c.jj 2007-12-07 14:41:00.000000000 +0100 | ||
1880 | +++ libgomp/config/linux/affinity.c 2008-03-26 15:11:32.000000000 +0100 | ||
1881 | @@ -1,4 +1,4 @@ | ||
1882 | -/* Copyright (C) 2006, 2007 Free Software Foundation, Inc. | ||
1883 | +/* Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc. | ||
1884 | Contributed by Jakub Jelinek <jakub@redhat.com>. | ||
1885 | |||
1886 | This file is part of the GNU OpenMP Library (libgomp). | ||
1887 | @@ -38,9 +38,6 @@ | ||
1888 | #ifdef HAVE_PTHREAD_AFFINITY_NP | ||
1889 | |||
1890 | static unsigned int affinity_counter; | ||
1891 | -#ifndef HAVE_SYNC_BUILTINS | ||
1892 | -static gomp_mutex_t affinity_lock; | ||
1893 | -#endif | ||
1894 | |||
1895 | void | ||
1896 | gomp_init_affinity (void) | ||
1897 | @@ -76,9 +73,6 @@ gomp_init_affinity (void) | ||
1898 | CPU_SET (gomp_cpu_affinity[0], &cpuset); | ||
1899 | pthread_setaffinity_np (pthread_self (), sizeof (cpuset), &cpuset); | ||
1900 | affinity_counter = 1; | ||
1901 | -#ifndef HAVE_SYNC_BUILTINS | ||
1902 | - gomp_mutex_init (&affinity_lock); | ||
1903 | -#endif | ||
1904 | } | ||
1905 | |||
1906 | void | ||
1907 | @@ -87,13 +81,7 @@ gomp_init_thread_affinity (pthread_attr_ | ||
1908 | unsigned int cpu; | ||
1909 | cpu_set_t cpuset; | ||
1910 | |||
1911 | -#ifdef HAVE_SYNC_BUILTINS | ||
1912 | cpu = __sync_fetch_and_add (&affinity_counter, 1); | ||
1913 | -#else | ||
1914 | - gomp_mutex_lock (&affinity_lock); | ||
1915 | - cpu = affinity_counter++; | ||
1916 | - gomp_mutex_unlock (&affinity_lock); | ||
1917 | -#endif | ||
1918 | cpu %= gomp_cpu_affinity_len; | ||
1919 | CPU_ZERO (&cpuset); | ||
1920 | CPU_SET (gomp_cpu_affinity[cpu], &cpuset); | ||
1921 | --- libgomp/config/linux/bar.h.jj 2007-12-07 14:41:00.000000000 +0100 | ||
1922 | +++ libgomp/config/linux/bar.h 2008-03-26 15:11:32.000000000 +0100 | ||
1923 | @@ -1,4 +1,4 @@ | ||
1924 | -/* Copyright (C) 2005 Free Software Foundation, Inc. | ||
1925 | +/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. | ||
1926 | Contributed by Richard Henderson <rth@redhat.com>. | ||
1927 | |||
1928 | This file is part of the GNU OpenMP Library (libgomp). | ||
1929 | @@ -36,40 +36,49 @@ | ||
1930 | |||
1931 | typedef struct | ||
1932 | { | ||
1933 | - gomp_mutex_t mutex; | ||
1934 | - unsigned total; | ||
1935 | - unsigned arrived; | ||
1936 | - int generation; | ||
1937 | + /* Make sure total/generation is in a mostly read cacheline, while | ||
1938 | + awaited in a separate cacheline. */ | ||
1939 | + unsigned total __attribute__((aligned (64))); | ||
1940 | + unsigned generation; | ||
1941 | + unsigned awaited __attribute__((aligned (64))); | ||
1942 | } gomp_barrier_t; | ||
1943 | +typedef unsigned int gomp_barrier_state_t; | ||
1944 | |||
1945 | static inline void gomp_barrier_init (gomp_barrier_t *bar, unsigned count) | ||
1946 | { | ||
1947 | - gomp_mutex_init (&bar->mutex); | ||
1948 | bar->total = count; | ||
1949 | - bar->arrived = 0; | ||
1950 | + bar->awaited = count; | ||
1951 | bar->generation = 0; | ||
1952 | } | ||
1953 | |||
1954 | static inline void gomp_barrier_reinit (gomp_barrier_t *bar, unsigned count) | ||
1955 | { | ||
1956 | - gomp_mutex_lock (&bar->mutex); | ||
1957 | + __sync_fetch_and_add (&bar->awaited, count - bar->total); | ||
1958 | bar->total = count; | ||
1959 | - gomp_mutex_unlock (&bar->mutex); | ||
1960 | } | ||
1961 | |||
1962 | static inline void gomp_barrier_destroy (gomp_barrier_t *bar) | ||
1963 | { | ||
1964 | - /* Before destroying, make sure all threads have left the barrier. */ | ||
1965 | - gomp_mutex_lock (&bar->mutex); | ||
1966 | } | ||
1967 | |||
1968 | extern void gomp_barrier_wait (gomp_barrier_t *); | ||
1969 | -extern void gomp_barrier_wait_end (gomp_barrier_t *, bool); | ||
1970 | +extern void gomp_barrier_wait_last (gomp_barrier_t *); | ||
1971 | +extern void gomp_barrier_wait_end (gomp_barrier_t *, gomp_barrier_state_t); | ||
1972 | |||
1973 | -static inline bool gomp_barrier_wait_start (gomp_barrier_t *bar) | ||
1974 | +static inline gomp_barrier_state_t | ||
1975 | +gomp_barrier_wait_start (gomp_barrier_t *bar) | ||
1976 | { | ||
1977 | - gomp_mutex_lock (&bar->mutex); | ||
1978 | - return ++bar->arrived == bar->total; | ||
1979 | + unsigned int ret = bar->generation; | ||
1980 | + /* Do we need any barrier here or is __sync_add_and_fetch acting | ||
1981 | + as the needed LoadLoad barrier already? */ | ||
1982 | + ret += __sync_add_and_fetch (&bar->awaited, -1) == 0; | ||
1983 | + return ret; | ||
1984 | +} | ||
1985 | + | ||
1986 | +static inline bool | ||
1987 | +gomp_barrier_last_thread (gomp_barrier_state_t state) | ||
1988 | +{ | ||
1989 | + return state & 1; | ||
1990 | } | ||
1991 | |||
1992 | #endif /* GOMP_BARRIER_H */ | ||
1993 | --- libgomp/config/linux/ptrlock.h.jj 2008-03-26 15:11:32.000000000 +0100 | ||
1994 | +++ libgomp/config/linux/ptrlock.h 2008-03-26 15:11:32.000000000 +0100 | ||
1995 | @@ -0,0 +1,65 @@ | ||
1996 | +/* Copyright (C) 2008 Free Software Foundation, Inc. | ||
1997 | + Contributed by Jakub Jelinek <jakub@redhat.com>. | ||
1998 | + | ||
1999 | + This file is part of the GNU OpenMP Library (libgomp). | ||
2000 | + | ||
2001 | + Libgomp is free software; you can redistribute it and/or modify it | ||
2002 | + under the terms of the GNU Lesser General Public License as published by | ||
2003 | + the Free Software Foundation; either version 2.1 of the License, or | ||
2004 | + (at your option) any later version. | ||
2005 | + | ||
2006 | + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | ||
2007 | + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | ||
2008 | + FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for | ||
2009 | + more details. | ||
2010 | + | ||
2011 | + You should have received a copy of the GNU Lesser General Public License | ||
2012 | + along with libgomp; see the file COPYING.LIB. If not, write to the | ||
2013 | + Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | ||
2014 | + MA 02110-1301, USA. */ | ||
2015 | + | ||
2016 | +/* As a special exception, if you link this library with other files, some | ||
2017 | + of which are compiled with GCC, to produce an executable, this library | ||
2018 | + does not by itself cause the resulting executable to be covered by the | ||
2019 | + GNU General Public License. This exception does not however invalidate | ||
2020 | + any other reasons why the executable file might be covered by the GNU | ||
2021 | + General Public License. */ | ||
2022 | + | ||
2023 | +/* This is a Linux specific implementation of a mutex synchronization | ||
2024 | + mechanism for libgomp. This type is private to the library. This | ||
2025 | + implementation uses atomic instructions and the futex syscall. */ | ||
2026 | + | ||
2027 | +#ifndef GOMP_PTRLOCK_H | ||
2028 | +#define GOMP_PTRLOCK_H 1 | ||
2029 | + | ||
2030 | +typedef void *gomp_ptrlock_t; | ||
2031 | + | ||
2032 | +static inline void gomp_ptrlock_init (gomp_ptrlock_t *ptrlock, void *ptr) | ||
2033 | +{ | ||
2034 | + *ptrlock = ptr; | ||
2035 | +} | ||
2036 | + | ||
2037 | +extern void *gomp_ptrlock_get_slow (gomp_ptrlock_t *ptrlock); | ||
2038 | +static inline void *gomp_ptrlock_get (gomp_ptrlock_t *ptrlock) | ||
2039 | +{ | ||
2040 | + if ((uintptr_t) *ptrlock > 2) | ||
2041 | + return *ptrlock; | ||
2042 | + | ||
2043 | + if (__sync_bool_compare_and_swap (ptrlock, NULL, (uintptr_t) 1)) | ||
2044 | + return NULL; | ||
2045 | + | ||
2046 | + return gomp_ptrlock_get_slow (ptrlock); | ||
2047 | +} | ||
2048 | + | ||
2049 | +extern void gomp_ptrlock_set_slow (gomp_ptrlock_t *ptrlock, void *ptr); | ||
2050 | +static inline void gomp_ptrlock_set (gomp_ptrlock_t *ptrlock, void *ptr) | ||
2051 | +{ | ||
2052 | + if (!__sync_bool_compare_and_swap (ptrlock, (uintptr_t) 1, ptr)) | ||
2053 | + gomp_ptrlock_set_slow (ptrlock, ptr); | ||
2054 | +} | ||
2055 | + | ||
2056 | +static inline void gomp_ptrlock_destroy (gomp_ptrlock_t *ptrlock) | ||
2057 | +{ | ||
2058 | +} | ||
2059 | + | ||
2060 | +#endif /* GOMP_PTRLOCK_H */ | ||
2061 | --- libgomp/config/linux/lock.c.jj 2007-12-07 14:41:00.000000000 +0100 | ||
2062 | +++ libgomp/config/linux/lock.c 2008-03-26 15:11:32.000000000 +0100 | ||
2063 | @@ -29,11 +29,10 @@ | ||
2064 | primitives. This implementation uses atomic instructions and the futex | ||
2065 | syscall. */ | ||
2066 | |||
2067 | -#include "libgomp.h" | ||
2068 | #include <string.h> | ||
2069 | #include <unistd.h> | ||
2070 | #include <sys/syscall.h> | ||
2071 | -#include "futex.h" | ||
2072 | +#include "wait.h" | ||
2073 | |||
2074 | |||
2075 | /* The internal gomp_mutex_t and the external non-recursive omp_lock_t | ||
2076 | @@ -137,7 +136,7 @@ omp_set_nest_lock (omp_nest_lock_t *lock | ||
2077 | return; | ||
2078 | } | ||
2079 | |||
2080 | - futex_wait (&lock->owner, otid); | ||
2081 | + do_wait (&lock->owner, otid); | ||
2082 | } | ||
2083 | } | ||
2084 | |||
2085 | --- libgomp/config/linux/ptrlock.c.jj 2008-03-26 15:11:32.000000000 +0100 | ||
2086 | +++ libgomp/config/linux/ptrlock.c 2008-03-26 15:11:32.000000000 +0100 | ||
2087 | @@ -0,0 +1,70 @@ | ||
2088 | +/* Copyright (C) 2008 Free Software Foundation, Inc. | ||
2089 | + Contributed by Jakub Jelinek <jakub@redhat.com>. | ||
2090 | + | ||
2091 | + This file is part of the GNU OpenMP Library (libgomp). | ||
2092 | + | ||
2093 | + Libgomp is free software; you can redistribute it and/or modify it | ||
2094 | + under the terms of the GNU Lesser General Public License as published by | ||
2095 | + the Free Software Foundation; either version 2.1 of the License, or | ||
2096 | + (at your option) any later version. | ||
2097 | + | ||
2098 | + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | ||
2099 | + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | ||
2100 | + FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for | ||
2101 | + more details. | ||
2102 | + | ||
2103 | + You should have received a copy of the GNU Lesser General Public License | ||
2104 | + along with libgomp; see the file COPYING.LIB. If not, write to the | ||
2105 | + Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | ||
2106 | + MA 02110-1301, USA. */ | ||
2107 | + | ||
2108 | +/* As a special exception, if you link this library with other files, some | ||
2109 | + of which are compiled with GCC, to produce an executable, this library | ||
2110 | + does not by itself cause the resulting executable to be covered by the | ||
2111 | + GNU General Public License. This exception does not however invalidate | ||
2112 | + any other reasons why the executable file might be covered by the GNU | ||
2113 | + General Public License. */ | ||
2114 | + | ||
2115 | +/* This is a Linux specific implementation of a mutex synchronization | ||
2116 | + mechanism for libgomp. This type is private to the library. This | ||
2117 | + implementation uses atomic instructions and the futex syscall. */ | ||
2118 | + | ||
2119 | +#include <endian.h> | ||
2120 | +#include <limits.h> | ||
2121 | +#include "wait.h" | ||
2122 | + | ||
2123 | +void * | ||
2124 | +gomp_ptrlock_get_slow (gomp_ptrlock_t *ptrlock) | ||
2125 | +{ | ||
2126 | + int *intptr; | ||
2127 | + __sync_bool_compare_and_swap (ptrlock, 1, 2); | ||
2128 | + | ||
2129 | + /* futex works on ints, not pointers. | ||
2130 | + But a valid work share pointer will be at least | ||
2131 | + 8 byte aligned, so it is safe to assume the low | ||
2132 | + 32-bits of the pointer won't contain values 1 or 2. */ | ||
2133 | + __asm volatile ("" : "=r" (intptr) : "0" (ptrlock)); | ||
2134 | +#if __BYTE_ORDER == __BIG_ENDIAN | ||
2135 | + if (sizeof (*ptrlock) > sizeof (int)) | ||
2136 | + intptr += (sizeof (*ptrlock) / sizeof (int)) - 1; | ||
2137 | +#endif | ||
2138 | + do | ||
2139 | + do_wait (intptr, 2); | ||
2140 | + while (*intptr == 2); | ||
2141 | + __asm volatile ("" : : : "memory"); | ||
2142 | + return *ptrlock; | ||
2143 | +} | ||
2144 | + | ||
2145 | +void | ||
2146 | +gomp_ptrlock_set_slow (gomp_ptrlock_t *ptrlock, void *ptr) | ||
2147 | +{ | ||
2148 | + int *intptr; | ||
2149 | + | ||
2150 | + *ptrlock = ptr; | ||
2151 | + __asm volatile ("" : "=r" (intptr) : "0" (ptrlock)); | ||
2152 | +#if __BYTE_ORDER == __BIG_ENDIAN | ||
2153 | + if (sizeof (*ptrlock) > sizeof (int)) | ||
2154 | + intptr += (sizeof (*ptrlock) / sizeof (int)) - 1; | ||
2155 | +#endif | ||
2156 | + futex_wake (intptr, INT_MAX); | ||
2157 | +} | ||
2158 | --- libgomp/config/linux/x86/futex.h.jj 2007-12-07 14:41:00.000000000 +0100 | ||
2159 | +++ libgomp/config/linux/x86/futex.h 2008-03-26 15:11:32.000000000 +0100 | ||
2160 | @@ -1,4 +1,4 @@ | ||
2161 | -/* Copyright (C) 2005 Free Software Foundation, Inc. | ||
2162 | +/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. | ||
2163 | Contributed by Richard Henderson <rth@redhat.com>. | ||
2164 | |||
2165 | This file is part of the GNU OpenMP Library (libgomp). | ||
2166 | @@ -27,9 +27,6 @@ | ||
2167 | |||
2168 | /* Provide target-specific access to the futex system call. */ | ||
2169 | |||
2170 | -#define FUTEX_WAIT 0 | ||
2171 | -#define FUTEX_WAKE 1 | ||
2172 | - | ||
2173 | #ifdef __LP64__ | ||
2174 | # ifndef SYS_futex | ||
2175 | # define SYS_futex 202 | ||
2176 | @@ -38,14 +35,26 @@ | ||
2177 | static inline void | ||
2178 | futex_wait (int *addr, int val) | ||
2179 | { | ||
2180 | - register long r10 __asm__("%r10") = 0; | ||
2181 | + register long r10 __asm__("%r10"); | ||
2182 | long res; | ||
2183 | |||
2184 | + r10 = 0; | ||
2185 | __asm volatile ("syscall" | ||
2186 | : "=a" (res) | ||
2187 | - : "0"(SYS_futex), "D" (addr), "S"(FUTEX_WAIT), | ||
2188 | - "d"(val), "r"(r10) | ||
2189 | + : "0" (SYS_futex), "D" (addr), "S" (gomp_futex_wait), | ||
2190 | + "d" (val), "r" (r10) | ||
2191 | : "r11", "rcx", "memory"); | ||
2192 | + if (__builtin_expect (res == -ENOSYS, 0)) | ||
2193 | + { | ||
2194 | + gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; | ||
2195 | + gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; | ||
2196 | + r10 = 0; | ||
2197 | + __asm volatile ("syscall" | ||
2198 | + : "=a" (res) | ||
2199 | + : "0" (SYS_futex), "D" (addr), "S" (gomp_futex_wait), | ||
2200 | + "d" (val), "r" (r10) | ||
2201 | + : "r11", "rcx", "memory"); | ||
2202 | + } | ||
2203 | } | ||
2204 | |||
2205 | static inline void | ||
2206 | @@ -55,8 +64,19 @@ futex_wake (int *addr, int count) | ||
2207 | |||
2208 | __asm volatile ("syscall" | ||
2209 | : "=a" (res) | ||
2210 | - : "0"(SYS_futex), "D" (addr), "S"(FUTEX_WAKE), "d"(count) | ||
2211 | + : "0" (SYS_futex), "D" (addr), "S" (gomp_futex_wake), | ||
2212 | + "d" (count) | ||
2213 | : "r11", "rcx", "memory"); | ||
2214 | + if (__builtin_expect (res == -ENOSYS, 0)) | ||
2215 | + { | ||
2216 | + gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; | ||
2217 | + gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; | ||
2218 | + __asm volatile ("syscall" | ||
2219 | + : "=a" (res) | ||
2220 | + : "0" (SYS_futex), "D" (addr), "S" (gomp_futex_wake), | ||
2221 | + "d" (count) | ||
2222 | + : "r11", "rcx", "memory"); | ||
2223 | + } | ||
2224 | } | ||
2225 | #else | ||
2226 | # ifndef SYS_futex | ||
2227 | @@ -65,7 +85,7 @@ futex_wake (int *addr, int count) | ||
2228 | |||
2229 | # ifdef __PIC__ | ||
2230 | |||
2231 | -static inline void | ||
2232 | +static inline long | ||
2233 | sys_futex0 (int *addr, int op, int val) | ||
2234 | { | ||
2235 | long res; | ||
2236 | @@ -77,11 +97,12 @@ sys_futex0 (int *addr, int op, int val) | ||
2237 | : "0"(SYS_futex), "r" (addr), "c"(op), | ||
2238 | "d"(val), "S"(0) | ||
2239 | : "memory"); | ||
2240 | + return res; | ||
2241 | } | ||
2242 | |||
2243 | # else | ||
2244 | |||
2245 | -static inline void | ||
2246 | +static inline long | ||
2247 | sys_futex0 (int *addr, int op, int val) | ||
2248 | { | ||
2249 | long res; | ||
2250 | @@ -91,6 +112,7 @@ sys_futex0 (int *addr, int op, int val) | ||
2251 | : "0"(SYS_futex), "b" (addr), "c"(op), | ||
2252 | "d"(val), "S"(0) | ||
2253 | : "memory"); | ||
2254 | + return res; | ||
2255 | } | ||
2256 | |||
2257 | # endif /* __PIC__ */ | ||
2258 | @@ -98,13 +120,37 @@ sys_futex0 (int *addr, int op, int val) | ||
2259 | static inline void | ||
2260 | futex_wait (int *addr, int val) | ||
2261 | { | ||
2262 | - sys_futex0 (addr, FUTEX_WAIT, val); | ||
2263 | + long res = sys_futex0 (addr, gomp_futex_wait, val); | ||
2264 | + if (__builtin_expect (res == -ENOSYS, 0)) | ||
2265 | + { | ||
2266 | + gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; | ||
2267 | + gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; | ||
2268 | + sys_futex0 (addr, gomp_futex_wait, val); | ||
2269 | + } | ||
2270 | } | ||
2271 | |||
2272 | static inline void | ||
2273 | futex_wake (int *addr, int count) | ||
2274 | { | ||
2275 | - sys_futex0 (addr, FUTEX_WAKE, count); | ||
2276 | + long res = sys_futex0 (addr, gomp_futex_wake, count); | ||
2277 | + if (__builtin_expect (res == -ENOSYS, 0)) | ||
2278 | + { | ||
2279 | + gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; | ||
2280 | + gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; | ||
2281 | + sys_futex0 (addr, gomp_futex_wake, count); | ||
2282 | + } | ||
2283 | } | ||
2284 | |||
2285 | #endif /* __LP64__ */ | ||
2286 | + | ||
2287 | +static inline void | ||
2288 | +cpu_relax (void) | ||
2289 | +{ | ||
2290 | + __asm volatile ("rep; nop" : : : "memory"); | ||
2291 | +} | ||
2292 | + | ||
2293 | +static inline void | ||
2294 | +atomic_write_barrier (void) | ||
2295 | +{ | ||
2296 | + __sync_synchronize (); | ||
2297 | +} | ||
2298 | --- libgomp/config/linux/wait.h.jj 2008-03-26 15:11:32.000000000 +0100 | ||
2299 | +++ libgomp/config/linux/wait.h 2008-03-26 15:11:32.000000000 +0100 | ||
2300 | @@ -0,0 +1,68 @@ | ||
2301 | +/* Copyright (C) 2008 Free Software Foundation, Inc. | ||
2302 | + Contributed by Jakub Jelinek <jakub@redhat.com>. | ||
2303 | + | ||
2304 | + This file is part of the GNU OpenMP Library (libgomp). | ||
2305 | + | ||
2306 | + Libgomp is free software; you can redistribute it and/or modify it | ||
2307 | + under the terms of the GNU Lesser General Public License as published by | ||
2308 | + the Free Software Foundation; either version 2.1 of the License, or | ||
2309 | + (at your option) any later version. | ||
2310 | + | ||
2311 | + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | ||
2312 | + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | ||
2313 | + FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for | ||
2314 | + more details. | ||
2315 | + | ||
2316 | + You should have received a copy of the GNU Lesser General Public License | ||
2317 | + along with libgomp; see the file COPYING.LIB. If not, write to the | ||
2318 | + Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | ||
2319 | + MA 02110-1301, USA. */ | ||
2320 | + | ||
2321 | +/* As a special exception, if you link this library with other files, some | ||
2322 | + of which are compiled with GCC, to produce an executable, this library | ||
2323 | + does not by itself cause the resulting executable to be covered by the | ||
2324 | + GNU General Public License. This exception does not however invalidate | ||
2325 | + any other reasons why the executable file might be covered by the GNU | ||
2326 | + General Public License. */ | ||
2327 | + | ||
2328 | +/* This is a Linux specific implementation of a mutex synchronization | ||
2329 | + mechanism for libgomp. This type is private to the library. This | ||
2330 | + implementation uses atomic instructions and the futex syscall. */ | ||
2331 | + | ||
2332 | +#ifndef GOMP_WAIT_H | ||
2333 | +#define GOMP_WAIT_H 1 | ||
2334 | + | ||
2335 | +#include "libgomp.h" | ||
2336 | +#include <errno.h> | ||
2337 | + | ||
2338 | +#define FUTEX_WAIT 0 | ||
2339 | +#define FUTEX_WAKE 1 | ||
2340 | +#define FUTEX_PRIVATE_FLAG 128L | ||
2341 | + | ||
2342 | +#ifdef HAVE_ATTRIBUTE_VISIBILITY | ||
2343 | +# pragma GCC visibility push(hidden) | ||
2344 | +#endif | ||
2345 | + | ||
2346 | +extern long int gomp_futex_wait, gomp_futex_wake; | ||
2347 | + | ||
2348 | +#include "futex.h" | ||
2349 | + | ||
2350 | +static inline void do_wait (int *addr, int val) | ||
2351 | +{ | ||
2352 | + unsigned long long i, count = gomp_spin_count_var; | ||
2353 | + | ||
2354 | + if (__builtin_expect (gomp_managed_threads > gomp_available_cpus, 0)) | ||
2355 | + count = gomp_throttled_spin_count_var; | ||
2356 | + for (i = 0; i < count; i++) | ||
2357 | + if (__builtin_expect (*addr != val, 0)) | ||
2358 | + return; | ||
2359 | + else | ||
2360 | + cpu_relax (); | ||
2361 | + futex_wait (addr, val); | ||
2362 | +} | ||
2363 | + | ||
2364 | +#ifdef HAVE_ATTRIBUTE_VISIBILITY | ||
2365 | +# pragma GCC visibility pop | ||
2366 | +#endif | ||
2367 | + | ||
2368 | +#endif /* GOMP_WAIT_H */ | ||
2369 | --- libgomp/config/linux/sparc/futex.h.jj 2007-12-07 14:41:00.000000000 +0100 | ||
2370 | +++ libgomp/config/linux/sparc/futex.h 2008-03-26 15:11:32.000000000 +0100 | ||
2371 | @@ -1,4 +1,4 @@ | ||
2372 | -/* Copyright (C) 2005 Free Software Foundation, Inc. | ||
2373 | +/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. | ||
2374 | Contributed by Jakub Jelinek <jakub@redhat.com>. | ||
2375 | |||
2376 | This file is part of the GNU OpenMP Library (libgomp). | ||
2377 | @@ -28,10 +28,8 @@ | ||
2378 | /* Provide target-specific access to the futex system call. */ | ||
2379 | |||
2380 | #include <sys/syscall.h> | ||
2381 | -#define FUTEX_WAIT 0 | ||
2382 | -#define FUTEX_WAKE 1 | ||
2383 | |||
2384 | -static inline void | ||
2385 | +static inline long | ||
2386 | sys_futex0 (int *addr, int op, int val) | ||
2387 | { | ||
2388 | register long int g1 __asm__ ("g1"); | ||
2389 | @@ -47,9 +45,9 @@ sys_futex0 (int *addr, int op, int val) | ||
2390 | o3 = 0; | ||
2391 | |||
2392 | #ifdef __arch64__ | ||
2393 | -# define SYSCALL_STRING "ta\t0x6d" | ||
2394 | +# define SYSCALL_STRING "ta\t0x6d; bcs,a,pt %%xcc, 1f; sub %%g0, %%o0, %%o0; 1:" | ||
2395 | #else | ||
2396 | -# define SYSCALL_STRING "ta\t0x10" | ||
2397 | +# define SYSCALL_STRING "ta\t0x10; bcs,a 1f; sub %%g0, %%o0, %%o0; 1:" | ||
2398 | #endif | ||
2399 | |||
2400 | __asm volatile (SYSCALL_STRING | ||
2401 | @@ -65,16 +63,49 @@ sys_futex0 (int *addr, int op, int val) | ||
2402 | "f48", "f50", "f52", "f54", "f56", "f58", "f60", "f62", | ||
2403 | #endif | ||
2404 | "cc", "memory"); | ||
2405 | + return o0; | ||
2406 | } | ||
2407 | |||
2408 | static inline void | ||
2409 | futex_wait (int *addr, int val) | ||
2410 | { | ||
2411 | - sys_futex0 (addr, FUTEX_WAIT, val); | ||
2412 | + long err = sys_futex0 (addr, gomp_futex_wait, val); | ||
2413 | + if (__builtin_expect (err == ENOSYS, 0)) | ||
2414 | + { | ||
2415 | + gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; | ||
2416 | + gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; | ||
2417 | + sys_futex0 (addr, gomp_futex_wait, val); | ||
2418 | + } | ||
2419 | } | ||
2420 | |||
2421 | static inline void | ||
2422 | futex_wake (int *addr, int count) | ||
2423 | { | ||
2424 | - sys_futex0 (addr, FUTEX_WAKE, count); | ||
2425 | + long err = sys_futex0 (addr, gomp_futex_wake, count); | ||
2426 | + if (__builtin_expect (err == ENOSYS, 0)) | ||
2427 | + { | ||
2428 | + gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; | ||
2429 | + gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; | ||
2430 | + sys_futex0 (addr, gomp_futex_wake, count); | ||
2431 | + } | ||
2432 | +} | ||
2433 | + | ||
2434 | +static inline void | ||
2435 | +cpu_relax (void) | ||
2436 | +{ | ||
2437 | +#if defined __arch64__ || defined __sparc_v9__ | ||
2438 | + __asm volatile ("membar #LoadLoad" : : : "memory"); | ||
2439 | +#else | ||
2440 | + __asm volatile ("" : : : "memory"); | ||
2441 | +#endif | ||
2442 | +} | ||
2443 | + | ||
2444 | +static inline void | ||
2445 | +atomic_write_barrier (void) | ||
2446 | +{ | ||
2447 | +#if defined __arch64__ || defined __sparc_v9__ | ||
2448 | + __asm volatile ("membar #StoreStore" : : : "memory"); | ||
2449 | +#else | ||
2450 | + __sync_synchronize (); | ||
2451 | +#endif | ||
2452 | } | ||
2453 | --- libgomp/config/linux/ia64/futex.h.jj 2007-12-07 14:41:00.000000000 +0100 | ||
2454 | +++ libgomp/config/linux/ia64/futex.h 2008-03-26 15:11:32.000000000 +0100 | ||
2455 | @@ -1,4 +1,4 @@ | ||
2456 | -/* Copyright (C) 2005 Free Software Foundation, Inc. | ||
2457 | +/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. | ||
2458 | Contributed by Richard Henderson <rth@redhat.com>. | ||
2459 | |||
2460 | This file is part of the GNU OpenMP Library (libgomp). | ||
2461 | @@ -29,23 +29,24 @@ | ||
2462 | |||
2463 | #include <sys/syscall.h> | ||
2464 | |||
2465 | -#define FUTEX_WAIT 0 | ||
2466 | -#define FUTEX_WAKE 1 | ||
2467 | |||
2468 | |||
2469 | -static inline void | ||
2470 | -sys_futex0(int *addr, int op, int val) | ||
2471 | +static inline long | ||
2472 | +sys_futex0(int *addr, long op, int val) | ||
2473 | { | ||
2474 | register long out0 asm ("out0") = (long) addr; | ||
2475 | register long out1 asm ("out1") = op; | ||
2476 | register long out2 asm ("out2") = val; | ||
2477 | register long out3 asm ("out3") = 0; | ||
2478 | + register long r8 asm ("r8"); | ||
2479 | + register long r10 asm ("r10"); | ||
2480 | register long r15 asm ("r15") = SYS_futex; | ||
2481 | |||
2482 | __asm __volatile ("break 0x100000" | ||
2483 | - : "=r"(r15), "=r"(out0), "=r"(out1), "=r"(out2), "=r"(out3) | ||
2484 | + : "=r"(r15), "=r"(out0), "=r"(out1), "=r"(out2), "=r"(out3), | ||
2485 | + "=r"(r8), "=r"(r10) | ||
2486 | : "r"(r15), "r"(out0), "r"(out1), "r"(out2), "r"(out3) | ||
2487 | - : "memory", "r8", "r10", "out4", "out5", "out6", "out7", | ||
2488 | + : "memory", "out4", "out5", "out6", "out7", | ||
2489 | /* Non-stacked integer registers, minus r8, r10, r15. */ | ||
2490 | "r2", "r3", "r9", "r11", "r12", "r13", "r14", "r16", "r17", "r18", | ||
2491 | "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27", | ||
2492 | @@ -56,16 +57,41 @@ sys_futex0(int *addr, int op, int val) | ||
2493 | "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", | ||
2494 | /* Branch registers. */ | ||
2495 | "b6"); | ||
2496 | + return r8 & r10; | ||
2497 | } | ||
2498 | |||
2499 | static inline void | ||
2500 | futex_wait (int *addr, int val) | ||
2501 | { | ||
2502 | - sys_futex0 (addr, FUTEX_WAIT, val); | ||
2503 | + long err = sys_futex0 (addr, gomp_futex_wait, val); | ||
2504 | + if (__builtin_expect (err == ENOSYS, 0)) | ||
2505 | + { | ||
2506 | + gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; | ||
2507 | + gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; | ||
2508 | + sys_futex0 (addr, gomp_futex_wait, val); | ||
2509 | + } | ||
2510 | } | ||
2511 | |||
2512 | static inline void | ||
2513 | futex_wake (int *addr, int count) | ||
2514 | { | ||
2515 | - sys_futex0 (addr, FUTEX_WAKE, count); | ||
2516 | + long err = sys_futex0 (addr, gomp_futex_wake, count); | ||
2517 | + if (__builtin_expect (err == ENOSYS, 0)) | ||
2518 | + { | ||
2519 | + gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; | ||
2520 | + gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; | ||
2521 | + sys_futex0 (addr, gomp_futex_wake, count); | ||
2522 | + } | ||
2523 | +} | ||
2524 | + | ||
2525 | +static inline void | ||
2526 | +cpu_relax (void) | ||
2527 | +{ | ||
2528 | + __asm volatile ("hint @pause" : : : "memory"); | ||
2529 | +} | ||
2530 | + | ||
2531 | +static inline void | ||
2532 | +atomic_write_barrier (void) | ||
2533 | +{ | ||
2534 | + __sync_synchronize (); | ||
2535 | } | ||
2536 | --- libgomp/config/linux/s390/futex.h.jj 2007-12-07 14:41:00.000000000 +0100 | ||
2537 | +++ libgomp/config/linux/s390/futex.h 2008-03-26 15:11:32.000000000 +0100 | ||
2538 | @@ -1,4 +1,4 @@ | ||
2539 | -/* Copyright (C) 2005 Free Software Foundation, Inc. | ||
2540 | +/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. | ||
2541 | Contributed by Jakub Jelinek <jakub@redhat.com>. | ||
2542 | |||
2543 | This file is part of the GNU OpenMP Library (libgomp). | ||
2544 | @@ -28,10 +28,8 @@ | ||
2545 | /* Provide target-specific access to the futex system call. */ | ||
2546 | |||
2547 | #include <sys/syscall.h> | ||
2548 | -#define FUTEX_WAIT 0 | ||
2549 | -#define FUTEX_WAKE 1 | ||
2550 | |||
2551 | -static inline void | ||
2552 | +static inline long | ||
2553 | sys_futex0 (int *addr, int op, int val) | ||
2554 | { | ||
2555 | register long int gpr2 __asm__ ("2"); | ||
2556 | @@ -49,16 +47,41 @@ sys_futex0 (int *addr, int op, int val) | ||
2557 | : "i" (SYS_futex), | ||
2558 | "0" (gpr2), "d" (gpr3), "d" (gpr4), "d" (gpr5) | ||
2559 | : "memory"); | ||
2560 | + return gpr2; | ||
2561 | } | ||
2562 | |||
2563 | static inline void | ||
2564 | futex_wait (int *addr, int val) | ||
2565 | { | ||
2566 | - sys_futex0 (addr, FUTEX_WAIT, val); | ||
2567 | + long err = sys_futex0 (addr, gomp_futex_wait, val); | ||
2568 | + if (__builtin_expect (err == -ENOSYS, 0)) | ||
2569 | + { | ||
2570 | + gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; | ||
2571 | + gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; | ||
2572 | + sys_futex0 (addr, gomp_futex_wait, val); | ||
2573 | + } | ||
2574 | } | ||
2575 | |||
2576 | static inline void | ||
2577 | futex_wake (int *addr, int count) | ||
2578 | { | ||
2579 | - sys_futex0 (addr, FUTEX_WAKE, count); | ||
2580 | + long err = sys_futex0 (addr, gomp_futex_wake, count); | ||
2581 | + if (__builtin_expect (err == -ENOSYS, 0)) | ||
2582 | + { | ||
2583 | + gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; | ||
2584 | + gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; | ||
2585 | + sys_futex0 (addr, gomp_futex_wake, count); | ||
2586 | + } | ||
2587 | +} | ||
2588 | + | ||
2589 | +static inline void | ||
2590 | +cpu_relax (void) | ||
2591 | +{ | ||
2592 | + __asm volatile ("" : : : "memory"); | ||
2593 | +} | ||
2594 | + | ||
2595 | +static inline void | ||
2596 | +atomic_write_barrier (void) | ||
2597 | +{ | ||
2598 | + __sync_synchronize (); | ||
2599 | } | ||
2600 | --- libgomp/config/linux/mutex.c.jj 2007-12-07 14:41:00.000000000 +0100 | ||
2601 | +++ libgomp/config/linux/mutex.c 2008-03-26 15:11:32.000000000 +0100 | ||
2602 | @@ -1,4 +1,4 @@ | ||
2603 | -/* Copyright (C) 2005 Free Software Foundation, Inc. | ||
2604 | +/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. | ||
2605 | Contributed by Richard Henderson <rth@redhat.com>. | ||
2606 | |||
2607 | This file is part of the GNU OpenMP Library (libgomp). | ||
2608 | @@ -29,9 +29,10 @@ | ||
2609 | mechanism for libgomp. This type is private to the library. This | ||
2610 | implementation uses atomic instructions and the futex syscall. */ | ||
2611 | |||
2612 | -#include "libgomp.h" | ||
2613 | -#include "futex.h" | ||
2614 | +#include "wait.h" | ||
2615 | |||
2616 | +long int gomp_futex_wake = FUTEX_WAKE | FUTEX_PRIVATE_FLAG; | ||
2617 | +long int gomp_futex_wait = FUTEX_WAIT | FUTEX_PRIVATE_FLAG; | ||
2618 | |||
2619 | void | ||
2620 | gomp_mutex_lock_slow (gomp_mutex_t *mutex) | ||
2621 | @@ -40,7 +41,7 @@ gomp_mutex_lock_slow (gomp_mutex_t *mute | ||
2622 | { | ||
2623 | int oldval = __sync_val_compare_and_swap (mutex, 1, 2); | ||
2624 | if (oldval != 0) | ||
2625 | - futex_wait (mutex, 2); | ||
2626 | + do_wait (mutex, 2); | ||
2627 | } | ||
2628 | while (!__sync_bool_compare_and_swap (mutex, 0, 2)); | ||
2629 | } | ||
2630 | --- libgomp/config/linux/sem.c.jj 2007-12-07 14:41:00.000000000 +0100 | ||
2631 | +++ libgomp/config/linux/sem.c 2008-03-26 15:11:32.000000000 +0100 | ||
2632 | @@ -1,4 +1,4 @@ | ||
2633 | -/* Copyright (C) 2005 Free Software Foundation, Inc. | ||
2634 | +/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. | ||
2635 | Contributed by Richard Henderson <rth@redhat.com>. | ||
2636 | |||
2637 | This file is part of the GNU OpenMP Library (libgomp). | ||
2638 | @@ -29,8 +29,7 @@ | ||
2639 | mechanism for libgomp. This type is private to the library. This | ||
2640 | implementation uses atomic instructions and the futex syscall. */ | ||
2641 | |||
2642 | -#include "libgomp.h" | ||
2643 | -#include "futex.h" | ||
2644 | +#include "wait.h" | ||
2645 | |||
2646 | |||
2647 | void | ||
2648 | @@ -44,7 +43,7 @@ gomp_sem_wait_slow (gomp_sem_t *sem) | ||
2649 | if (__sync_bool_compare_and_swap (sem, val, val - 1)) | ||
2650 | return; | ||
2651 | } | ||
2652 | - futex_wait (sem, -1); | ||
2653 | + do_wait (sem, -1); | ||
2654 | } | ||
2655 | } | ||
2656 | |||
2657 | --- libgomp/config/linux/powerpc/futex.h.jj 2007-12-07 14:41:00.000000000 +0100 | ||
2658 | +++ libgomp/config/linux/powerpc/futex.h 2008-03-26 15:11:32.000000000 +0100 | ||
2659 | @@ -1,4 +1,4 @@ | ||
2660 | -/* Copyright (C) 2005 Free Software Foundation, Inc. | ||
2661 | +/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. | ||
2662 | Contributed by Richard Henderson <rth@redhat.com>. | ||
2663 | |||
2664 | This file is part of the GNU OpenMP Library (libgomp). | ||
2665 | @@ -28,10 +28,8 @@ | ||
2666 | /* Provide target-specific access to the futex system call. */ | ||
2667 | |||
2668 | #include <sys/syscall.h> | ||
2669 | -#define FUTEX_WAIT 0 | ||
2670 | -#define FUTEX_WAKE 1 | ||
2671 | |||
2672 | -static inline void | ||
2673 | +static inline long | ||
2674 | sys_futex0 (int *addr, int op, int val) | ||
2675 | { | ||
2676 | register long int r0 __asm__ ("r0"); | ||
2677 | @@ -50,21 +48,48 @@ sys_futex0 (int *addr, int op, int val) | ||
2678 | doesn't. It doesn't much matter for us. In the interest of unity, | ||
2679 | go ahead and clobber it always. */ | ||
2680 | |||
2681 | - __asm volatile ("sc" | ||
2682 | + __asm volatile ("sc; mfcr %0" | ||
2683 | : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6) | ||
2684 | : "r"(r0), "r"(r3), "r"(r4), "r"(r5), "r"(r6) | ||
2685 | : "r7", "r8", "r9", "r10", "r11", "r12", | ||
2686 | "cr0", "ctr", "memory"); | ||
2687 | + if (__builtin_expect (r0 & (1 << 28), 0)) | ||
2688 | + return r3; | ||
2689 | + return 0; | ||
2690 | } | ||
2691 | |||
2692 | static inline void | ||
2693 | futex_wait (int *addr, int val) | ||
2694 | { | ||
2695 | - sys_futex0 (addr, FUTEX_WAIT, val); | ||
2696 | + long err = sys_futex0 (addr, gomp_futex_wait, val); | ||
2697 | + if (__builtin_expect (err == ENOSYS, 0)) | ||
2698 | + { | ||
2699 | + gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; | ||
2700 | + gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; | ||
2701 | + sys_futex0 (addr, gomp_futex_wait, val); | ||
2702 | + } | ||
2703 | } | ||
2704 | |||
2705 | static inline void | ||
2706 | futex_wake (int *addr, int count) | ||
2707 | { | ||
2708 | - sys_futex0 (addr, FUTEX_WAKE, count); | ||
2709 | + long err = sys_futex0 (addr, gomp_futex_wake, count); | ||
2710 | + if (__builtin_expect (err == ENOSYS, 0)) | ||
2711 | + { | ||
2712 | + gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; | ||
2713 | + gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; | ||
2714 | + sys_futex0 (addr, gomp_futex_wake, count); | ||
2715 | + } | ||
2716 | +} | ||
2717 | + | ||
2718 | +static inline void | ||
2719 | +cpu_relax (void) | ||
2720 | +{ | ||
2721 | + __asm volatile ("" : : : "memory"); | ||
2722 | +} | ||
2723 | + | ||
2724 | +static inline void | ||
2725 | +atomic_write_barrier (void) | ||
2726 | +{ | ||
2727 | + __asm volatile ("eieio" : : : "memory"); | ||
2728 | } | ||
2729 | --- libgomp/config/linux/bar.c.jj 2007-12-07 14:41:00.000000000 +0100 | ||
2730 | +++ libgomp/config/linux/bar.c 2008-03-26 15:11:32.000000000 +0100 | ||
2731 | @@ -1,4 +1,4 @@ | ||
2732 | -/* Copyright (C) 2005 Free Software Foundation, Inc. | ||
2733 | +/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. | ||
2734 | Contributed by Richard Henderson <rth@redhat.com>. | ||
2735 | |||
2736 | This file is part of the GNU OpenMP Library (libgomp). | ||
2737 | @@ -29,32 +29,29 @@ | ||
2738 | mechanism for libgomp. This type is private to the library. This | ||
2739 | implementation uses atomic instructions and the futex syscall. */ | ||
2740 | |||
2741 | -#include "libgomp.h" | ||
2742 | -#include "futex.h" | ||
2743 | #include <limits.h> | ||
2744 | +#include "wait.h" | ||
2745 | |||
2746 | |||
2747 | void | ||
2748 | -gomp_barrier_wait_end (gomp_barrier_t *bar, bool last) | ||
2749 | +gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state) | ||
2750 | { | ||
2751 | - if (last) | ||
2752 | + if (__builtin_expect ((state & 1) != 0, 0)) | ||
2753 | { | ||
2754 | - bar->generation++; | ||
2755 | - futex_wake (&bar->generation, INT_MAX); | ||
2756 | + /* Next time we'll be awaiting TOTAL threads again. */ | ||
2757 | + bar->awaited = bar->total; | ||
2758 | + atomic_write_barrier (); | ||
2759 | + bar->generation += 2; | ||
2760 | + futex_wake ((int *) &bar->generation, INT_MAX); | ||
2761 | } | ||
2762 | else | ||
2763 | { | ||
2764 | - unsigned int generation = bar->generation; | ||
2765 | - | ||
2766 | - gomp_mutex_unlock (&bar->mutex); | ||
2767 | + unsigned int generation = state; | ||
2768 | |||
2769 | do | ||
2770 | - futex_wait (&bar->generation, generation); | ||
2771 | + do_wait ((int *) &bar->generation, generation); | ||
2772 | while (bar->generation == generation); | ||
2773 | } | ||
2774 | - | ||
2775 | - if (__sync_add_and_fetch (&bar->arrived, -1) == 0) | ||
2776 | - gomp_mutex_unlock (&bar->mutex); | ||
2777 | } | ||
2778 | |||
2779 | void | ||
2780 | @@ -62,3 +59,18 @@ gomp_barrier_wait (gomp_barrier_t *barri | ||
2781 | { | ||
2782 | gomp_barrier_wait_end (barrier, gomp_barrier_wait_start (barrier)); | ||
2783 | } | ||
2784 | + | ||
2785 | +/* Like gomp_barrier_wait, except that if the encountering thread | ||
2786 | + is not the last one to hit the barrier, it returns immediately. | ||
2787 | + The intended usage is that a thread which intends to gomp_barrier_destroy | ||
2788 | + this barrier calls gomp_barrier_wait, while all other threads | ||
2789 | + call gomp_barrier_wait_last. When gomp_barrier_wait returns, | ||
2790 | + the barrier can be safely destroyed. */ | ||
2791 | + | ||
2792 | +void | ||
2793 | +gomp_barrier_wait_last (gomp_barrier_t *barrier) | ||
2794 | +{ | ||
2795 | + gomp_barrier_state_t state = gomp_barrier_wait_start (barrier); | ||
2796 | + if (state & 1) | ||
2797 | + gomp_barrier_wait_end (barrier, state); | ||
2798 | +} | ||