* Makefile.am (libgomp_la_SOURCES): Add atomic.c, icv.c, icv-device.c.
* Makefile.in. Regenerate.
* configure.ac [nvptx*-*-*] (libgomp_use_pthreads): Set and use it...
(LIBGOMP_USE_PTHREADS): ...here; new define.
* configure: Regenerate.
* config.h.in: Likewise.
* config/posix/affinity.c: Move to...
* affinity.c: ...here (new file). Guard use of Pthreads-specific
interface by LIBGOMP_USE_PTHREADS.
* critical.c: Split out GOMP_atomic_{start,end} into...
* atomic.c: ...here (new file).
* env.c: Split out ICV definitions into...
* icv.c: ...here (new file) and...
* icv-device.c: ...here. New file.
* config/linux/lock.c (gomp_init_lock_30): Move to generic lock.c.
(gomp_destroy_lock_30): Ditto.
(gomp_set_lock_30): Ditto.
(gomp_unset_lock_30): Ditto.
(gomp_test_lock_30): Ditto.
(gomp_init_nest_lock_30): Ditto.
(gomp_destroy_nest_lock_30): Ditto.
(gomp_set_nest_lock_30): Ditto.
(gomp_unset_nest_lock_30): Ditto.
(gomp_test_nest_lock_30): Ditto.
* lock.c: New.
* config/nvptx/lock.c: New.
* config/nvptx/bar.c: New.
* config/nvptx/bar.h: New.
* config/nvptx/doacross.h: New.
* config/nvptx/error.c: New.
* config/nvptx/icv-device.c: New.
* config/nvptx/mutex.h: New.
* config/nvptx/pool.h: New.
* config/nvptx/proc.c: New.
* config/nvptx/ptrlock.h: New.
* config/nvptx/sem.h: New.
* config/nvptx/simple-bar.h: New.
* config/nvptx/target.c: New.
* config/nvptx/task.c: New.
* config/nvptx/team.c: New.
* config/nvptx/time.c: New.
* config/posix/simple-bar.h: New.
* libgomp.h: Guard pthread.h inclusion. Include simple-bar.h.
(gomp_num_teams_var): Declare.
(struct gomp_thread_pool): Change threads_dock member to
gomp_simple_barrier_t.
[__nvptx__] (gomp_thread): New implementation.
(gomp_thread_attr): Guard by LIBGOMP_USE_PTHREADS.
(gomp_thread_destructor): Ditto.
(gomp_init_thread_affinity): Ditto.
* team.c: Guard uses of Pthreads-specific interfaces by
LIBGOMP_USE_PTHREADS. Adjust all uses of threads_dock.
(gomp_free_thread) [__nvptx__]: Do not call 'free'.
* config/nvptx/alloc.c: Delete.
* config/nvptx/barrier.c: Ditto.
* config/nvptx/fortran.c: Ditto.
* config/nvptx/iter.c: Ditto.
* config/nvptx/iter_ull.c: Ditto.
* config/nvptx/loop.c: Ditto.
* config/nvptx/loop_ull.c: Ditto.
* config/nvptx/ordered.c: Ditto.
* config/nvptx/parallel.c: Ditto.
* config/nvptx/priority_queue.c: Ditto.
* config/nvptx/sections.c: Ditto.
* config/nvptx/single.c: Ditto.
* config/nvptx/splay-tree.c: Ditto.
* config/nvptx/work.c: Ditto.
* testsuite/libgomp.fortran/fortran.exp (lang_link_flags): Pass
-foffload=-lgfortran in addition to -lgfortran.
* testsuite/libgomp.oacc-fortran/fortran.exp (lang_link_flags): Ditto.
* plugin/plugin-nvptx.c: Include <limits.h>.
(struct targ_fn_descriptor): Add new fields.
(struct ptx_device): Ditto. Set them...
(nvptx_open_device): ...here.
(nvptx_adjust_launch_bounds): New.
(nvptx_host2dev): Allow NULL 'nvthd'.
(nvptx_dev2host): Ditto.
(GOMP_OFFLOAD_get_caps): Add GOMP_OFFLOAD_CAP_OPENMP_400.
(link_ptx): Adjust log sizes.
(nvptx_host2dev): Allow NULL 'nvthd'.
(nvptx_dev2host): Ditto.
(nvptx_set_clocktick): New. Use it...
(GOMP_OFFLOAD_load_image): ...here. Set new targ_fn_descriptor
fields.
(GOMP_OFFLOAD_dev2dev): New.
(nvptx_adjust_launch_bounds): New.
(nvptx_stacks_size): New.
(nvptx_stacks_alloc): New.
(nvptx_stacks_free): New.
(GOMP_OFFLOAD_run): New.
(GOMP_OFFLOAD_async_run): New (stub).
Co-Authored-By: Dmitry Melnik <dm@ispras.ru>
Co-Authored-By: Jakub Jelinek <jakub@redhat.com>
From-SVN: r242789
+2016-11-23 Alexander Monakov <amonakov@ispras.ru>
+ Jakub Jelinek <jakub@redhat.com>
+ Dmitry Melnik <dm@ispras.ru>
+
+ * Makefile.am (libgomp_la_SOURCES): Add atomic.c, icv.c, icv-device.c.
+ * Makefile.in. Regenerate.
+ * configure.ac [nvptx*-*-*] (libgomp_use_pthreads): Set and use it...
+ (LIBGOMP_USE_PTHREADS): ...here; new define.
+ * configure: Regenerate.
+ * config.h.in: Likewise.
+ * config/posix/affinity.c: Move to...
+ * affinity.c: ...here (new file). Guard use of Pthreads-specific
+ interface by LIBGOMP_USE_PTHREADS.
+ * critical.c: Split out GOMP_atomic_{start,end} into...
+ * atomic.c: ...here (new file).
+ * env.c: Split out ICV definitions into...
+ * icv.c: ...here (new file) and...
+ * icv-device.c: ...here. New file.
+ * config/linux/lock.c (gomp_init_lock_30): Move to generic lock.c.
+ (gomp_destroy_lock_30): Ditto.
+ (gomp_set_lock_30): Ditto.
+ (gomp_unset_lock_30): Ditto.
+ (gomp_test_lock_30): Ditto.
+ (gomp_init_nest_lock_30): Ditto.
+ (gomp_destroy_nest_lock_30): Ditto.
+ (gomp_set_nest_lock_30): Ditto.
+ (gomp_unset_nest_lock_30): Ditto.
+ (gomp_test_nest_lock_30): Ditto.
+ * lock.c: New.
+ * config/nvptx/lock.c: New.
+ * config/nvptx/bar.c: New.
+ * config/nvptx/bar.h: New.
+ * config/nvptx/doacross.h: New.
+ * config/nvptx/error.c: New.
+ * config/nvptx/icv-device.c: New.
+ * config/nvptx/mutex.h: New.
+ * config/nvptx/pool.h: New.
+ * config/nvptx/proc.c: New.
+ * config/nvptx/ptrlock.h: New.
+ * config/nvptx/sem.h: New.
+ * config/nvptx/simple-bar.h: New.
+ * config/nvptx/target.c: New.
+ * config/nvptx/task.c: New.
+ * config/nvptx/team.c: New.
+ * config/nvptx/time.c: New.
+ * config/posix/simple-bar.h: New.
+ * libgomp.h: Guard pthread.h inclusion. Include simple-bar.h.
+ (gomp_num_teams_var): Declare.
+ (struct gomp_thread_pool): Change threads_dock member to
+ gomp_simple_barrier_t.
+ [__nvptx__] (gomp_thread): New implementation.
+ (gomp_thread_attr): Guard by LIBGOMP_USE_PTHREADS.
+ (gomp_thread_destructor): Ditto.
+ (gomp_init_thread_affinity): Ditto.
+ * team.c: Guard uses of Pthreads-specific interfaces by
+ LIBGOMP_USE_PTHREADS. Adjust all uses of threads_dock.
+ (gomp_free_thread) [__nvptx__]: Do not call 'free'.
+ * config/nvptx/alloc.c: Delete.
+ * config/nvptx/barrier.c: Ditto.
+ * config/nvptx/fortran.c: Ditto.
+ * config/nvptx/iter.c: Ditto.
+ * config/nvptx/iter_ull.c: Ditto.
+ * config/nvptx/loop.c: Ditto.
+ * config/nvptx/loop_ull.c: Ditto.
+ * config/nvptx/ordered.c: Ditto.
+ * config/nvptx/parallel.c: Ditto.
+ * config/nvptx/priority_queue.c: Ditto.
+ * config/nvptx/sections.c: Ditto.
+ * config/nvptx/single.c: Ditto.
+ * config/nvptx/splay-tree.c: Ditto.
+ * config/nvptx/work.c: Ditto.
+ * testsuite/libgomp.fortran/fortran.exp (lang_link_flags): Pass
+ -foffload=-lgfortran in addition to -lgfortran.
+ * testsuite/libgomp.oacc-fortran/fortran.exp (lang_link_flags): Ditto.
+ * plugin/plugin-nvptx.c: Include <limits.h>.
+ (struct targ_fn_descriptor): Add new fields.
+ (struct ptx_device): Ditto. Set them...
+ (nvptx_open_device): ...here.
+ (nvptx_adjust_launch_bounds): New.
+ (nvptx_host2dev): Allow NULL 'nvthd'.
+ (nvptx_dev2host): Ditto.
+ (GOMP_OFFLOAD_get_caps): Add GOMP_OFFLOAD_CAP_OPENMP_400.
+ (link_ptx): Adjust log sizes.
+ (nvptx_host2dev): Allow NULL 'nvthd'.
+ (nvptx_dev2host): Ditto.
+ (nvptx_set_clocktick): New. Use it...
+ (GOMP_OFFLOAD_load_image): ...here. Set new targ_fn_descriptor
+ fields.
+ (GOMP_OFFLOAD_dev2dev): New.
+ (nvptx_adjust_launch_bounds): New.
+ (nvptx_stacks_size): New.
+ (nvptx_stacks_alloc): New.
+ (nvptx_stacks_free): New.
+ (GOMP_OFFLOAD_run): New.
+ (GOMP_OFFLOAD_async_run): New (stub).
+
2016-11-23 Martin Jambor <mjambor@suse.cz>
* testsuite/libgomp.hsa.c/bits-insns.c: New test.
libgomp_la_DEPENDENCIES = $(libgomp_version_dep)
libgomp_la_LINK = $(LINK) $(libgomp_la_LDFLAGS)
-libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
- iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \
- task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \
- time.c fortran.c affinity.c target.c splay-tree.c libgomp-plugin.c \
- oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c oacc-async.c \
- oacc-plugin.c oacc-cuda.c priority_queue.c
+libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \
+ icv.c icv-device.c iter.c iter_ull.c loop.c loop_ull.c ordered.c \
+ parallel.c sections.c single.c task.c team.c work.c lock.c mutex.c \
+ proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c target.c \
+ splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c oacc-init.c \
+ oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c
include $(top_srcdir)/plugin/Makefrag.am
@PLUGIN_NVPTX_TRUE@ $(toolexeclibdir)
libgomp_la_LIBADD =
@USE_FORTRAN_TRUE@am__objects_1 = openacc.lo
-am_libgomp_la_OBJECTS = alloc.lo barrier.lo critical.lo env.lo \
- error.lo iter.lo iter_ull.lo loop.lo loop_ull.lo ordered.lo \
- parallel.lo sections.lo single.lo task.lo team.lo work.lo \
- lock.lo mutex.lo proc.lo sem.lo bar.lo ptrlock.lo time.lo \
- fortran.lo affinity.lo target.lo splay-tree.lo \
- libgomp-plugin.lo oacc-parallel.lo oacc-host.lo oacc-init.lo \
- oacc-mem.lo oacc-async.lo oacc-plugin.lo oacc-cuda.lo \
- priority_queue.lo $(am__objects_1)
+am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
+ env.lo error.lo icv.lo icv-device.lo iter.lo iter_ull.lo \
+ loop.lo loop_ull.lo ordered.lo parallel.lo sections.lo \
+ single.lo task.lo team.lo work.lo lock.lo mutex.lo proc.lo \
+ sem.lo bar.lo ptrlock.lo time.lo fortran.lo affinity.lo \
+ target.lo splay-tree.lo libgomp-plugin.lo oacc-parallel.lo \
+ oacc-host.lo oacc-init.lo oacc-mem.lo oacc-async.lo \
+ oacc-plugin.lo oacc-cuda.lo priority_queue.lo $(am__objects_1)
libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
DEFAULT_INCLUDES = -I.@am__isrc@
depcomp = $(SHELL) $(top_srcdir)/../depcomp
libgomp_la_DEPENDENCIES = $(libgomp_version_dep)
libgomp_la_LINK = $(LINK) $(libgomp_la_LDFLAGS)
-libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
- iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c \
- single.c task.c team.c work.c lock.c mutex.c proc.c sem.c \
- bar.c ptrlock.c time.c fortran.c affinity.c target.c \
- splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \
- oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \
- priority_queue.c $(am__append_3)
+libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
+ error.c icv.c icv-device.c iter.c iter_ull.c loop.c loop_ull.c \
+ ordered.c parallel.c sections.c single.c task.c team.c work.c \
+ lock.c mutex.c proc.c sem.c bar.c ptrlock.c time.c fortran.c \
+ affinity.c target.c splay-tree.c libgomp-plugin.c \
+ oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
+ oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
+ $(am__append_3)
# Nvidia PTX OpenACC plugin.
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/affinity.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alloc.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/atomic.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bar.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/barrier.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/critical.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/env.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/error.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fortran.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/icv-device.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/icv.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter_ull.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp-plugin.Plo@am__quote@
--- /dev/null
+/* Copyright (C) 2006-2016 Free Software Foundation, Inc.
+ Contributed by Jakub Jelinek <jakub@redhat.com>.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This is a generic stub implementation of a CPU affinity setting. */
+
+#include "libgomp.h"
+
+void
+gomp_init_affinity (void)
+{
+}
+
+#ifdef LIBGOMP_USE_PTHREADS
+void
+gomp_init_thread_affinity (pthread_attr_t *attr, unsigned int place)
+{
+ (void) attr;
+ (void) place;
+}
+#endif
+
+void **
+gomp_affinity_alloc (unsigned long count, bool quiet)
+{
+ (void) count;
+ if (!quiet)
+ gomp_error ("Affinity not supported on this configuration");
+ return NULL;
+}
+
+void
+gomp_affinity_init_place (void *p)
+{
+ (void) p;
+}
+
+bool
+gomp_affinity_add_cpus (void *p, unsigned long num,
+ unsigned long len, long stride, bool quiet)
+{
+ (void) p;
+ (void) num;
+ (void) len;
+ (void) stride;
+ (void) quiet;
+ return false;
+}
+
+bool
+gomp_affinity_remove_cpu (void *p, unsigned long num)
+{
+ (void) p;
+ (void) num;
+ return false;
+}
+
+bool
+gomp_affinity_copy_place (void *p, void *q, long stride)
+{
+ (void) p;
+ (void) q;
+ (void) stride;
+ return false;
+}
+
+bool
+gomp_affinity_same_place (void *p, void *q)
+{
+ (void) p;
+ (void) q;
+ return false;
+}
+
+bool
+gomp_affinity_finalize_place_list (bool quiet)
+{
+ (void) quiet;
+ return false;
+}
+
+bool
+gomp_affinity_init_level (int level, unsigned long count, bool quiet)
+{
+ (void) level;
+ (void) count;
+ (void) quiet;
+ if (!quiet)
+ gomp_error ("Affinity not supported on this configuration");
+ return NULL;
+}
+
+void
+gomp_affinity_print_place (void *p)
+{
+ (void) p;
+}
+
+int
+omp_get_place_num_procs (int place_num)
+{
+ (void) place_num;
+ return 0;
+}
+
+void
+omp_get_place_proc_ids (int place_num, int *ids)
+{
+ (void) place_num;
+ (void) ids;
+}
+
+void
+gomp_get_place_proc_ids_8 (int place_num, int64_t *ids)
+{
+ (void) place_num;
+ (void) ids;
+}
+
+ialias(omp_get_place_num_procs)
+ialias(omp_get_place_proc_ids)
--- /dev/null
+/* Copyright (C) 2005-2016 Free Software Foundation, Inc.
+ Contributed by Richard Henderson <rth@redhat.com>.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file contains helpers for the ATOMIC construct. */
+
+#include "libgomp.h"
+
+/* This mutex is used when atomic operations don't exist for the target
+ in the mode requested. The result is not globally atomic, but works so
+ long as all parallel references are within #pragma omp atomic directives.
+ According to responses received from omp@openmp.org, appears to be within
+ spec. Which makes sense, since that's how several other compilers
+ handle this situation as well. */
+
+static gomp_mutex_t atomic_lock;
+
+void
+GOMP_atomic_start (void)
+{
+ gomp_mutex_lock (&atomic_lock);
+}
+
+void
+GOMP_atomic_end (void)
+{
+ gomp_mutex_unlock (&atomic_lock);
+}
+
+#if !GOMP_MUTEX_INIT_0
+static void __attribute__((constructor))
+initialize_atomic (void)
+{
+ gomp_mutex_init (&atomic_lock);
+}
+#endif
/* Define to 1 if GNU symbol versioning is used for libgomp. */
#undef LIBGOMP_GNU_SYMBOL_VERSIONING
+/* Define to 1 if libgomp should use POSIX threads. */
+#undef LIBGOMP_USE_PTHREADS
+
/* Define to the sub-directory in which libtool stores uninstalled libraries.
*/
#undef LT_OBJDIR
#include <sys/syscall.h>
#include "wait.h"
-
-/* The internal gomp_mutex_t and the external non-recursive omp_lock_t
- have the same form. Re-use it. */
-
-void
-gomp_init_lock_30 (omp_lock_t *lock)
-{
- gomp_mutex_init (lock);
-}
-
-void
-gomp_destroy_lock_30 (omp_lock_t *lock)
-{
- gomp_mutex_destroy (lock);
-}
-
-void
-gomp_set_lock_30 (omp_lock_t *lock)
-{
- gomp_mutex_lock (lock);
-}
-
-void
-gomp_unset_lock_30 (omp_lock_t *lock)
-{
- gomp_mutex_unlock (lock);
-}
-
-int
-gomp_test_lock_30 (omp_lock_t *lock)
-{
- int oldval = 0;
-
- return __atomic_compare_exchange_n (lock, &oldval, 1, false,
- MEMMODEL_ACQUIRE, MEMMODEL_RELAXED);
-}
-
-void
-gomp_init_nest_lock_30 (omp_nest_lock_t *lock)
-{
- memset (lock, '\0', sizeof (*lock));
-}
-
-void
-gomp_destroy_nest_lock_30 (omp_nest_lock_t *lock)
-{
-}
-
-void
-gomp_set_nest_lock_30 (omp_nest_lock_t *lock)
-{
- void *me = gomp_icv (true);
-
- if (lock->owner != me)
- {
- gomp_mutex_lock (&lock->lock);
- lock->owner = me;
- }
-
- lock->count++;
-}
-
-void
-gomp_unset_nest_lock_30 (omp_nest_lock_t *lock)
-{
- if (--lock->count == 0)
- {
- lock->owner = NULL;
- gomp_mutex_unlock (&lock->lock);
- }
-}
-
-int
-gomp_test_nest_lock_30 (omp_nest_lock_t *lock)
-{
- void *me = gomp_icv (true);
- int oldval;
-
- if (lock->owner == me)
- return ++lock->count;
-
- oldval = 0;
- if (__atomic_compare_exchange_n (&lock->lock, &oldval, 1, false,
- MEMMODEL_ACQUIRE, MEMMODEL_RELAXED))
- {
- lock->owner = me;
- lock->count = 1;
- return 1;
- }
-
- return 0;
-}
+/* Reuse the generic implementation in terms of gomp_mutex_t. */
+#include "../../lock.c"
#ifdef LIBGOMP_GNU_SYMBOL_VERSIONING
/* gomp_mutex_* can be safely locked in one thread and
+/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ Contributed by Alexander Monakov <amonakov@ispras.ru>
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This is an NVPTX specific implementation of a barrier synchronization
+ mechanism for libgomp. This type is private to the library. This
+ implementation uses atomic instructions and bar.sync instruction. */
+
+#include <limits.h>
+#include "libgomp.h"
+
+
+void
+gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
+{
+ if (__builtin_expect (state & BAR_WAS_LAST, 0))
+ {
+ /* Next time we'll be awaiting TOTAL threads again. */
+ bar->awaited = bar->total;
+ __atomic_store_n (&bar->generation, bar->generation + BAR_INCR,
+ MEMMODEL_RELEASE);
+ }
+ asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
+}
+
+void
+gomp_barrier_wait (gomp_barrier_t *bar)
+{
+ gomp_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
+}
+
+/* Like gomp_barrier_wait, except that if the encountering thread
+ is not the last one to hit the barrier, it returns immediately.
+ The intended usage is that a thread which intends to gomp_barrier_destroy
+ this barrier calls gomp_barrier_wait, while all other threads
+ call gomp_barrier_wait_last. When gomp_barrier_wait returns,
+ the barrier can be safely destroyed. */
+
+void
+gomp_barrier_wait_last (gomp_barrier_t *bar)
+{
+ /* Deferring to gomp_barrier_wait does not use the optimization opportunity
+ allowed by the interface contract for all-but-last participants. The
+ original implementation in config/linux/bar.c handles this better. */
+ gomp_barrier_wait (bar);
+}
+
+void
+gomp_team_barrier_wake (gomp_barrier_t *bar, int count)
+{
+ asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
+}
+
+void
+gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
+{
+ unsigned int generation, gen;
+
+ if (__builtin_expect (state & BAR_WAS_LAST, 0))
+ {
+ /* Next time we'll be awaiting TOTAL threads again. */
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+
+ bar->awaited = bar->total;
+ team->work_share_cancelled = 0;
+ if (__builtin_expect (team->task_count, 0))
+ {
+ gomp_barrier_handle_tasks (state);
+ state &= ~BAR_WAS_LAST;
+ }
+ else
+ {
+ state &= ~BAR_CANCELLED;
+ state += BAR_INCR - BAR_WAS_LAST;
+ __atomic_store_n (&bar->generation, state, MEMMODEL_RELEASE);
+ asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
+ return;
+ }
+ }
+
+ generation = state;
+ state &= ~BAR_CANCELLED;
+ do
+ {
+ asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
+ gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
+ if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
+ {
+ gomp_barrier_handle_tasks (state);
+ gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
+ }
+ generation |= gen & BAR_WAITING_FOR_TASK;
+ }
+ while (gen != state + BAR_INCR);
+}
+
+void
+gomp_team_barrier_wait (gomp_barrier_t *bar)
+{
+ gomp_team_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
+}
+
+void
+gomp_team_barrier_wait_final (gomp_barrier_t *bar)
+{
+ gomp_barrier_state_t state = gomp_barrier_wait_final_start (bar);
+ if (__builtin_expect (state & BAR_WAS_LAST, 0))
+ bar->awaited_final = bar->total;
+ gomp_team_barrier_wait_end (bar, state);
+}
+
+bool
+gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
+ gomp_barrier_state_t state)
+{
+ unsigned int generation, gen;
+
+ if (__builtin_expect (state & BAR_WAS_LAST, 0))
+ {
+ /* Next time we'll be awaiting TOTAL threads again. */
+ /* BAR_CANCELLED should never be set in state here, because
+ cancellation means that at least one of the threads has been
+ cancelled, thus on a cancellable barrier we should never see
+ all threads to arrive. */
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+
+ bar->awaited = bar->total;
+ team->work_share_cancelled = 0;
+ if (__builtin_expect (team->task_count, 0))
+ {
+ gomp_barrier_handle_tasks (state);
+ state &= ~BAR_WAS_LAST;
+ }
+ else
+ {
+ state += BAR_INCR - BAR_WAS_LAST;
+ __atomic_store_n (&bar->generation, state, MEMMODEL_RELEASE);
+ asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
+ return false;
+ }
+ }
+
+ if (__builtin_expect (state & BAR_CANCELLED, 0))
+ return true;
+
+ generation = state;
+ do
+ {
+ asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
+ gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
+ if (__builtin_expect (gen & BAR_CANCELLED, 0))
+ return true;
+ if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
+ {
+ gomp_barrier_handle_tasks (state);
+ gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
+ }
+ generation |= gen & BAR_WAITING_FOR_TASK;
+ }
+ while (gen != state + BAR_INCR);
+
+ return false;
+}
+
+bool
+gomp_team_barrier_wait_cancel (gomp_barrier_t *bar)
+{
+ return gomp_team_barrier_wait_cancel_end (bar, gomp_barrier_wait_start (bar));
+}
+
+void
+gomp_team_barrier_cancel (struct gomp_team *team)
+{
+ gomp_mutex_lock (&team->task_lock);
+ if (team->barrier.generation & BAR_CANCELLED)
+ {
+ gomp_mutex_unlock (&team->task_lock);
+ return;
+ }
+ team->barrier.generation |= BAR_CANCELLED;
+ gomp_mutex_unlock (&team->task_lock);
+ gomp_team_barrier_wake (&team->barrier, INT_MAX);
+}
--- /dev/null
+/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ Contributed by Alexander Monakov <amonakov@ispras.ru>
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This is an NVPTX specific implementation of a barrier synchronization
+ mechanism for libgomp. This type is private to the library. This
+ implementation uses atomic instructions and bar.sync instruction. */
+
+#ifndef GOMP_BARRIER_H
+#define GOMP_BARRIER_H 1
+
+#include "mutex.h"
+
+typedef struct
+{
+ unsigned total;
+ unsigned generation;
+ unsigned awaited;
+ unsigned awaited_final;
+} gomp_barrier_t;
+
+typedef unsigned int gomp_barrier_state_t;
+
+/* The generation field contains a counter in the high bits, with a few
+ low bits dedicated to flags. Note that TASK_PENDING and WAS_LAST can
+ share space because WAS_LAST is never stored back to generation. */
+#define BAR_TASK_PENDING 1
+#define BAR_WAS_LAST 1
+#define BAR_WAITING_FOR_TASK 2
+#define BAR_CANCELLED 4
+#define BAR_INCR 8
+
+static inline void gomp_barrier_init (gomp_barrier_t *bar, unsigned count)
+{
+ bar->total = count;
+ bar->awaited = count;
+ bar->awaited_final = count;
+ bar->generation = 0;
+}
+
+static inline void gomp_barrier_reinit (gomp_barrier_t *bar, unsigned count)
+{
+ __atomic_add_fetch (&bar->awaited, count - bar->total, MEMMODEL_ACQ_REL);
+ bar->total = count;
+}
+
+static inline void gomp_barrier_destroy (gomp_barrier_t *bar)
+{
+}
+
+extern void gomp_barrier_wait (gomp_barrier_t *);
+extern void gomp_barrier_wait_last (gomp_barrier_t *);
+extern void gomp_barrier_wait_end (gomp_barrier_t *, gomp_barrier_state_t);
+extern void gomp_team_barrier_wait (gomp_barrier_t *);
+extern void gomp_team_barrier_wait_final (gomp_barrier_t *);
+extern void gomp_team_barrier_wait_end (gomp_barrier_t *,
+ gomp_barrier_state_t);
+extern bool gomp_team_barrier_wait_cancel (gomp_barrier_t *);
+extern bool gomp_team_barrier_wait_cancel_end (gomp_barrier_t *,
+ gomp_barrier_state_t);
+extern void gomp_team_barrier_wake (gomp_barrier_t *, int);
+struct gomp_team;
+extern void gomp_team_barrier_cancel (struct gomp_team *);
+
+static inline gomp_barrier_state_t
+gomp_barrier_wait_start (gomp_barrier_t *bar)
+{
+ unsigned int ret = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
+ ret &= -BAR_INCR | BAR_CANCELLED;
+ /* A memory barrier is needed before exiting from the various forms
+ of gomp_barrier_wait, to satisfy OpenMP API version 3.1 section
+ 2.8.6 flush Construct, which says there is an implicit flush during
+ a barrier region. This is a convenient place to add the barrier,
+ so we use MEMMODEL_ACQ_REL here rather than MEMMODEL_ACQUIRE. */
+ if (__atomic_add_fetch (&bar->awaited, -1, MEMMODEL_ACQ_REL) == 0)
+ ret |= BAR_WAS_LAST;
+ return ret;
+}
+
+static inline gomp_barrier_state_t
+gomp_barrier_wait_cancel_start (gomp_barrier_t *bar)
+{
+ return gomp_barrier_wait_start (bar);
+}
+
+/* This is like gomp_barrier_wait_start, except it decrements
+ bar->awaited_final rather than bar->awaited and should be used
+ for the gomp_team_end barrier only. */
+static inline gomp_barrier_state_t
+gomp_barrier_wait_final_start (gomp_barrier_t *bar)
+{
+ unsigned int ret = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
+ ret &= -BAR_INCR | BAR_CANCELLED;
+ /* See above gomp_barrier_wait_start comment. */
+ if (__atomic_add_fetch (&bar->awaited_final, -1, MEMMODEL_ACQ_REL) == 0)
+ ret |= BAR_WAS_LAST;
+ return ret;
+}
+
+static inline bool
+gomp_barrier_last_thread (gomp_barrier_state_t state)
+{
+ return state & BAR_WAS_LAST;
+}
+
+/* All the inlines below must be called with team->task_lock
+ held. */
+
+static inline void
+gomp_team_barrier_set_task_pending (gomp_barrier_t *bar)
+{
+ bar->generation |= BAR_TASK_PENDING;
+}
+
+static inline void
+gomp_team_barrier_clear_task_pending (gomp_barrier_t *bar)
+{
+ bar->generation &= ~BAR_TASK_PENDING;
+}
+
+static inline void
+gomp_team_barrier_set_waiting_for_tasks (gomp_barrier_t *bar)
+{
+ bar->generation |= BAR_WAITING_FOR_TASK;
+}
+
+static inline bool
+gomp_team_barrier_waiting_for_tasks (gomp_barrier_t *bar)
+{
+ return (bar->generation & BAR_WAITING_FOR_TASK) != 0;
+}
+
+static inline bool
+gomp_team_barrier_cancelled (gomp_barrier_t *bar)
+{
+ return __builtin_expect ((bar->generation & BAR_CANCELLED) != 0, 0);
+}
+
+static inline void
+gomp_team_barrier_done (gomp_barrier_t *bar, gomp_barrier_state_t state)
+{
+ bar->generation = (state & -BAR_INCR) + BAR_INCR;
+}
+
+#endif /* GOMP_BARRIER_H */
--- /dev/null
+/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ Contributed by Alexander Monakov <amonakov@ispras.ru>
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This is the NVPTX implementation of doacross spinning. */
+
+#ifndef GOMP_DOACROSS_H
+#define GOMP_DOACROSS_H 1
+
+#include "libgomp.h"
+
+static int zero;
+
+static inline int
+cpu_relax (void)
+{
+ int r;
+ /* Here we need a long-latency operation to make the current warp yield.
+ We could use ld.cv, uncached load from system (host) memory, but that
+ would require allocating locked memory in the plugin. Alternatively,
+ we can use ld.cg, which evicts from L1 and caches in L2. */
+ asm volatile ("ld.cg.s32 %0, [%1];" : "=r" (r) : "i" (&zero) : "memory");
+ return r;
+}
+
+static inline void doacross_spin (unsigned long *addr, unsigned long expected,
+ unsigned long cur)
+{
+ /* Prevent compiler from optimizing based on bounds of containing object. */
+ asm ("" : "+r" (addr));
+ do
+ {
+ int i = cpu_relax ();
+ cur = addr[i];
+ }
+ while (cur <= expected);
+}
+
+#endif /* GOMP_DOACROSS_H */
+/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ Contributed by Alexander Monakov <amonakov@ispras.ru>
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file contains routines used to signal errors. On NVPTX, we have
+ one default output stream (stdout), so redirect everything there. */
+
+#include "libgomp.h"
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#undef vfprintf
+#undef fputs
+#undef fputc
+
+#define vfprintf(stream, fmt, list) vprintf (fmt, list)
+#define fputs(s, stream) printf ("%s", s)
+#define fputc(c, stream) printf ("%c", c)
+
+#include "../../error.c"
+++ /dev/null
-/* OpenACC Runtime Fortran wrapper routines
-
- Copyright (C) 2014-2016 Free Software Foundation, Inc.
-
- Contributed by Mentor Embedded.
-
- This file is part of the GNU Offloading and Multi Processing Library
- (libgomp).
-
- Libgomp is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
-
- Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
- WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
-
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
-
-/* Temporary hack; this will be provided by libgfortran. */
-
-extern void _gfortran_abort (void);
-
-__asm__ ("// BEGIN GLOBAL FUNCTION DECL: _gfortran_abort\n"
- ".visible .func _gfortran_abort;\n"
- "// BEGIN GLOBAL FUNCTION DEF: _gfortran_abort\n"
- ".visible .func _gfortran_abort\n"
- "{\n"
- "trap;\n"
- "ret;\n"
- "}\n");
--- /dev/null
+/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ Contributed by Alexander Monakov <amonakov@ispras.ru>
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file defines OpenMP API entry points that accelerator targets are
+ expected to replace. */
+
+#include "libgomp.h"
+
+void
+omp_set_default_device (int device_num __attribute__((unused)))
+{
+}
+
+int
+omp_get_default_device (void)
+{
+ return 0;
+}
+
+int
+omp_get_num_devices (void)
+{
+ return 0;
+}
+
+int
+omp_get_num_teams (void)
+{
+ return gomp_num_teams_var + 1;
+}
+
+int
+omp_get_team_num (void)
+{
+ int ctaid;
+ asm ("mov.u32 %0, %%ctaid.x;" : "=r" (ctaid));
+ return ctaid;
+}
+
+int
+omp_is_initial_device (void)
+{
+ /* NVPTX is an accelerator-only target. */
+ return 0;
+}
+
+ialias (omp_set_default_device)
+ialias (omp_get_default_device)
+ialias (omp_get_num_devices)
+ialias (omp_get_num_teams)
+ialias (omp_get_team_num)
+ialias (omp_is_initial_device)
+/* Copyright (C) 2016 Free Software Foundation, Inc.
+ Contributed by Alexander Monakov <amonakov@ispras.ru>.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This is a NVPTX specific implementation of the public OpenMP locking
+ primitives. */
+
+/* Reuse the generic implementation in terms of gomp_mutex_t. */
+#include "../../lock.c"
+
+ialias (omp_init_lock)
+ialias (omp_init_nest_lock)
+ialias (omp_destroy_lock)
+ialias (omp_destroy_nest_lock)
+ialias (omp_set_lock)
+ialias (omp_set_nest_lock)
+ialias (omp_unset_lock)
+ialias (omp_unset_nest_lock)
+ialias (omp_test_lock)
+ialias (omp_test_nest_lock)
--- /dev/null
+/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ Contributed by Alexander Monakov <amonakov@ispras.ru>
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This is an NVPTX specific implementation of a mutex synchronization
+ mechanism for libgomp. This type is private to the library. This
+ implementation uses atomic instructions and busy waiting. */
+
+#ifndef GOMP_MUTEX_H
+#define GOMP_MUTEX_H 1
+
+typedef int gomp_mutex_t;
+
+#define GOMP_MUTEX_INIT_0 1
+
+static inline void
+gomp_mutex_init (gomp_mutex_t *mutex)
+{
+ *mutex = 0;
+}
+
+static inline void
+gomp_mutex_destroy (gomp_mutex_t *mutex)
+{
+}
+
+static inline void
+gomp_mutex_lock (gomp_mutex_t *mutex)
+{
+ while (__sync_lock_test_and_set (mutex, 1))
+ /* spin */ ;
+}
+
+static inline void
+gomp_mutex_unlock (gomp_mutex_t *mutex)
+{
+ __sync_lock_release (mutex);
+}
+#endif /* GOMP_MUTEX_H */
--- /dev/null
+/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ Contributed by Alexander Monakov <amonakov@ispras.ru>
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This is the NVPTX implementation of the thread pool management
+ for libgomp. This type is private to the library. */
+
+#ifndef GOMP_POOL_H
+#define GOMP_POOL_H 1
+
+#include "libgomp.h"
+
+/* Get the thread pool. */
+
+static inline struct gomp_thread_pool *
+gomp_get_thread_pool (struct gomp_thread *thr, unsigned nthreads)
+{
+ /* NVPTX is running with a fixed pool of pre-started threads. */
+ return thr->thread_pool;
+}
+
+static inline void
+gomp_release_thread_pool (struct gomp_thread_pool *pool)
+{
+ /* Do nothing. */
+}
+
+#endif /* GOMP_POOL_H */
+++ /dev/null
-/* Empty stub for omp task priority support. */
+/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ Contributed by Alexander Monakov <amonakov@ispras.ru>
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file contains system specific routines related to counting
+ online processors and dynamic load balancing. */
+
+#include "libgomp.h"
+
+unsigned
+gomp_dynamic_max_threads (void)
+{
+ return gomp_icv (false)->nthreads_var;
+}
+
+int
+omp_get_num_procs (void)
+{
+ return gomp_icv (false)->nthreads_var;
+}
--- /dev/null
+/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ Contributed by Alexander Monakov <amonakov@ispras.ru>
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This is an NVPTX specific implementation of a mutex synchronization
+ mechanism for libgomp. This type is private to the library. This
+ implementation uses atomic instructions and busy waiting.
+
+ A ptrlock has four states:
+ 0/NULL Initial
+ 1 Owned by me, I get to write a pointer to ptrlock.
+ 2 Some thread is waiting on the ptrlock.
+ >2 Ptrlock contains a valid pointer.
+ It is not valid to gain the ptrlock and then write a NULL to it. */
+
+#ifndef GOMP_PTRLOCK_H
+#define GOMP_PTRLOCK_H 1
+
+typedef void *gomp_ptrlock_t;
+
+static inline void gomp_ptrlock_init (gomp_ptrlock_t *ptrlock, void *ptr)
+{
+ *ptrlock = ptr;
+}
+
+static inline void *gomp_ptrlock_get (gomp_ptrlock_t *ptrlock)
+{
+ uintptr_t v = (uintptr_t) __atomic_load_n (ptrlock, MEMMODEL_ACQUIRE);
+ if (v > 2)
+ return (void *) v;
+
+ if (v == 0
+ && __atomic_compare_exchange_n (ptrlock, &v, 1, false,
+ MEMMODEL_ACQUIRE, MEMMODEL_ACQUIRE))
+ return NULL;
+
+ while (v == 1)
+ v = (uintptr_t) __atomic_load_n (ptrlock, MEMMODEL_ACQUIRE);
+
+ return (void *) v;
+}
+
+static inline void gomp_ptrlock_set (gomp_ptrlock_t *ptrlock, void *ptr)
+{
+ __atomic_store_n (ptrlock, ptr, MEMMODEL_RELEASE);
+}
+
+static inline void gomp_ptrlock_destroy (gomp_ptrlock_t *ptrlock)
+{
+}
+
+#endif /* GOMP_PTRLOCK_H */
--- /dev/null
+/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ Contributed by Alexander Monakov <amonakov@ispras.ru>
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This is an NVPTX specific implementation of a semaphore synchronization
+ mechanism for libgomp. This type is private to the library. This
+ semaphore implementation uses atomic instructions and busy waiting. */
+
+#ifndef GOMP_SEM_H
+#define GOMP_SEM_H 1
+
+typedef int gomp_sem_t;
+
+static inline void
+gomp_sem_init (gomp_sem_t *sem, int value)
+{
+ *sem = value;
+}
+
+static inline void
+gomp_sem_destroy (gomp_sem_t *sem)
+{
+}
+
+static inline void
+gomp_sem_wait (gomp_sem_t *sem)
+{
+ int count = __atomic_load_n (sem, MEMMODEL_ACQUIRE);
+ for (;;)
+ {
+ while (count == 0)
+ count = __atomic_load_n (sem, MEMMODEL_ACQUIRE);
+ if (__atomic_compare_exchange_n (sem, &count, count - 1, false,
+ MEMMODEL_ACQUIRE, MEMMODEL_RELAXED))
+ return;
+ }
+}
+
+static inline void
+gomp_sem_post (gomp_sem_t *sem)
+{
+ (void) __atomic_add_fetch (sem, 1, MEMMODEL_RELEASE);
+}
+#endif /* GOMP_SEM_H */
--- /dev/null
+/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ Contributed by Alexander Monakov <amonakov@ispras.ru>
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This is a simplified barrier that is suitable for thread pool
+ synchronizaton. Only a subset of full barrier API (bar.h) is exposed.
+ Here in the NVPTX-specific implementation, we expect that thread pool
+ corresponds to a PTX CTA (thread block). */
+
+#ifndef GOMP_SIMPLE_BARRIER_H
+#define GOMP_SIMPLE_BARRIER_H 1
+
+typedef struct
+{
+ unsigned count;
+} gomp_simple_barrier_t;
+
+static inline void
+gomp_simple_barrier_init (gomp_simple_barrier_t *bar, unsigned count)
+{
+ bar->count = count * 32;
+}
+
+/* Unused on NVPTX.
+static inline void
+gomp_simple_barrier_reinit (gomp_simple_barrier_t *bar, unsigned count)
+{
+ bar->count = count * 32;
+}
+*/
+
+static inline void
+gomp_simple_barrier_destroy (gomp_simple_barrier_t *bar)
+{
+}
+
+static inline void
+gomp_simple_barrier_wait (gomp_simple_barrier_t *bar)
+{
+ asm volatile ("bar.sync 0, %0;" : : "r" (bar->count) : "memory");
+}
+
+static inline void
+gomp_simple_barrier_wait_last (gomp_simple_barrier_t *bar)
+{
+ asm volatile ("bar.arrive 0, %0;" : : "r" (bar->count) : "memory");
+}
+
+#endif /* GOMP_SIMPLE_BARRIER_H */
+/* Copyright (C) 2013-2016 Free Software Foundation, Inc.
+ Contributed by Jakub Jelinek <jakub@redhat.com>.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "libgomp.h"
+#include <limits.h>
+
+void
+GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
+{
+ if (thread_limit)
+ {
+ struct gomp_task_icv *icv = gomp_icv (true);
+ icv->thread_limit_var
+ = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
+ }
+ unsigned int num_blocks, block_id;
+ asm ("mov.u32 %0, %%nctaid.x;" : "=r" (num_blocks));
+ asm ("mov.u32 %0, %%ctaid.x;" : "=r" (block_id));
+ if (!num_teams || num_teams >= num_blocks)
+ num_teams = num_blocks;
+ else if (block_id >= num_teams)
+ {
+ gomp_free_thread (nvptx_thrs);
+ asm ("exit;");
+ }
+ gomp_num_teams_var = num_teams - 1;
+}
+/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ Contributed by Alexander Monakov <amonakov@ispras.ru>.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file handles the maintainence of tasks in response to task
+ creation and termination. */
+
+#ifdef __nvptx_softstack__
+
+#include "libgomp.h"
+
+/* NVPTX is an accelerator-only target, so this should never be called. */
+
+bool
+gomp_target_task_fn (void *data)
+{
+ __builtin_unreachable ();
+}
+
+#include "../../task.c"
+
+#endif
+/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ Contributed by Alexander Monakov <amonakov@ispras.ru>
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file handles maintainance of threads on NVPTX. */
+
+#if defined __nvptx_softstack__ && defined __nvptx_unisimt__
+
+#include "libgomp.h"
+#include <stdlib.h>
+#include <string.h>
+
+struct gomp_thread *nvptx_thrs __attribute__((shared,nocommon));
+
+static void gomp_thread_start (struct gomp_thread_pool *);
+
+
+/* This externally visible function handles target region entry. It
+ sets up a per-team thread pool and transfers control by calling FN (FN_DATA)
+ in the master thread or gomp_thread_start in other threads.
+
+ The name of this function is part of the interface with the compiler: for
+ each target region, GCC emits a PTX .kernel function that sets up soft-stack
+ and uniform-simt state and calls this function, passing in FN the original
+ function outlined for the target region. */
+
+void
+gomp_nvptx_main (void (*fn) (void *), void *fn_data)
+{
+ int tid, ntids;
+ asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
+ asm ("mov.u32 %0, %%ntid.y;" : "=r" (ntids));
+ if (tid == 0)
+ {
+ gomp_global_icv.nthreads_var = ntids;
+ /* Starting additional threads is not supported. */
+ gomp_global_icv.dyn_var = true;
+
+ nvptx_thrs = alloca (ntids * sizeof (*nvptx_thrs));
+ memset (nvptx_thrs, 0, ntids * sizeof (*nvptx_thrs));
+
+ struct gomp_thread_pool *pool = alloca (sizeof (*pool));
+ pool->threads = alloca (ntids * sizeof (*pool->threads));
+ for (tid = 0; tid < ntids; tid++)
+ pool->threads[tid] = nvptx_thrs + tid;
+ pool->threads_size = ntids;
+ pool->threads_used = ntids;
+ pool->threads_busy = 1;
+ pool->last_team = NULL;
+ gomp_simple_barrier_init (&pool->threads_dock, ntids);
+
+ nvptx_thrs[0].thread_pool = pool;
+ asm ("bar.sync 0;");
+ fn (fn_data);
+
+ gomp_free_thread (nvptx_thrs);
+ }
+ else
+ {
+ asm ("bar.sync 0;");
+ gomp_thread_start (nvptx_thrs[0].thread_pool);
+ }
+}
+
+/* This function contains the idle loop in which a thread waits
+ to be called up to become part of a team. */
+
+static void
+gomp_thread_start (struct gomp_thread_pool *pool)
+{
+ struct gomp_thread *thr = gomp_thread ();
+
+ gomp_sem_init (&thr->release, 0);
+ thr->thread_pool = pool;
+
+ do
+ {
+ gomp_simple_barrier_wait (&pool->threads_dock);
+ if (!thr->fn)
+ continue;
+ thr->fn (thr->data);
+ thr->fn = NULL;
+
+ struct gomp_task *task = thr->task;
+ gomp_team_barrier_wait_final (&thr->ts.team->barrier);
+ gomp_finish_task (task);
+ }
+ /* Work around an NVIDIA driver bug: when generating sm_50 machine code,
+ it can trash stack pointer R1 in loops lacking exit edges. Add a cheap
+ artificial exit that the driver would not be able to optimize out. */
+ while (nvptx_thrs);
+}
+
+/* Launch a team. */
+
+void
+gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
+ unsigned flags, struct gomp_team *team)
+{
+ struct gomp_thread *thr, *nthr;
+ struct gomp_task *task;
+ struct gomp_task_icv *icv;
+ struct gomp_thread_pool *pool;
+ unsigned long nthreads_var;
+
+ thr = gomp_thread ();
+ pool = thr->thread_pool;
+ task = thr->task;
+ icv = task ? &task->icv : &gomp_global_icv;
+
+ /* Always save the previous state, even if this isn't a nested team.
+ In particular, we should save any work share state from an outer
+ orphaned work share construct. */
+ team->prev_ts = thr->ts;
+
+ thr->ts.team = team;
+ thr->ts.team_id = 0;
+ ++thr->ts.level;
+ if (nthreads > 1)
+ ++thr->ts.active_level;
+ thr->ts.work_share = &team->work_shares[0];
+ thr->ts.last_work_share = NULL;
+ thr->ts.single_count = 0;
+ thr->ts.static_trip = 0;
+ thr->task = &team->implicit_task[0];
+ nthreads_var = icv->nthreads_var;
+ gomp_init_task (thr->task, task, icv);
+ team->implicit_task[0].icv.nthreads_var = nthreads_var;
+
+ if (nthreads == 1)
+ return;
+
+ /* Release existing idle threads. */
+ for (unsigned i = 1; i < nthreads; ++i)
+ {
+ nthr = pool->threads[i];
+ nthr->ts.team = team;
+ nthr->ts.work_share = &team->work_shares[0];
+ nthr->ts.last_work_share = NULL;
+ nthr->ts.team_id = i;
+ nthr->ts.level = team->prev_ts.level + 1;
+ nthr->ts.active_level = thr->ts.active_level;
+ nthr->ts.single_count = 0;
+ nthr->ts.static_trip = 0;
+ nthr->task = &team->implicit_task[i];
+ gomp_init_task (nthr->task, task, icv);
+ team->implicit_task[i].icv.nthreads_var = nthreads_var;
+ nthr->fn = fn;
+ nthr->data = data;
+ team->ordered_release[i] = &nthr->release;
+ }
+
+ gomp_simple_barrier_wait (&pool->threads_dock);
+}
+
+#include "../../team.c"
+#endif
+/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ Contributed by Dmitry Melnik <dm@ispras.ru>
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file implements timer routines for NVPTX. It uses the %clock64 cycle
+ counter. */
+
+#include "libgomp.h"
+
+/* This is set from host in plugin-nvptx.c. */
+double __nvptx_clocktick = 0;
+
+double
+omp_get_wtime (void)
+{
+ uint64_t clock;
+ asm ("mov.u64 %0, %%clock64;" : "=r" (clock));
+ return clock * __nvptx_clocktick;
+}
+
+double
+omp_get_wtick (void)
+{
+ return __nvptx_clocktick;
+}
+
+ialias (omp_get_wtime)
+ialias (omp_get_wtick)
+++ /dev/null
-/* Copyright (C) 2006-2016 Free Software Foundation, Inc.
- Contributed by Jakub Jelinek <jakub@redhat.com>.
-
- This file is part of the GNU Offloading and Multi Processing Library
- (libgomp).
-
- Libgomp is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
-
- Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
- WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
-
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
-
-/* This is a generic stub implementation of a CPU affinity setting. */
-
-#include "libgomp.h"
-
-void
-gomp_init_affinity (void)
-{
-}
-
-void
-gomp_init_thread_affinity (pthread_attr_t *attr, unsigned int place)
-{
- (void) attr;
- (void) place;
-}
-
-void **
-gomp_affinity_alloc (unsigned long count, bool quiet)
-{
- (void) count;
- if (!quiet)
- gomp_error ("Affinity not supported on this configuration");
- return NULL;
-}
-
-void
-gomp_affinity_init_place (void *p)
-{
- (void) p;
-}
-
-bool
-gomp_affinity_add_cpus (void *p, unsigned long num,
- unsigned long len, long stride, bool quiet)
-{
- (void) p;
- (void) num;
- (void) len;
- (void) stride;
- (void) quiet;
- return false;
-}
-
-bool
-gomp_affinity_remove_cpu (void *p, unsigned long num)
-{
- (void) p;
- (void) num;
- return false;
-}
-
-bool
-gomp_affinity_copy_place (void *p, void *q, long stride)
-{
- (void) p;
- (void) q;
- (void) stride;
- return false;
-}
-
-bool
-gomp_affinity_same_place (void *p, void *q)
-{
- (void) p;
- (void) q;
- return false;
-}
-
-bool
-gomp_affinity_finalize_place_list (bool quiet)
-{
- (void) quiet;
- return false;
-}
-
-bool
-gomp_affinity_init_level (int level, unsigned long count, bool quiet)
-{
- (void) level;
- (void) count;
- (void) quiet;
- if (!quiet)
- gomp_error ("Affinity not supported on this configuration");
- return NULL;
-}
-
-void
-gomp_affinity_print_place (void *p)
-{
- (void) p;
-}
-
-int
-omp_get_place_num_procs (int place_num)
-{
- (void) place_num;
- return 0;
-}
-
-void
-omp_get_place_proc_ids (int place_num, int *ids)
-{
- (void) place_num;
- (void) ids;
-}
-
-void
-gomp_get_place_proc_ids_8 (int place_num, int64_t *ids)
-{
- (void) place_num;
- (void) ids;
-}
-
-ialias(omp_get_place_num_procs)
-ialias(omp_get_place_proc_ids)
--- /dev/null
+/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ Contributed by Alexander Monakov <amonakov@ispras.ru>
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This is a simplified barrier that is suitable for thread pool
+ synchronizaton. Only a subset of full barrier API (bar.h) is exposed. */
+
+#ifndef GOMP_SIMPLE_BARRIER_H
+#define GOMP_SIMPLE_BARRIER_H 1
+
+#include "bar.h"
+
+typedef struct
+{
+ gomp_barrier_t bar;
+} gomp_simple_barrier_t;
+
+static inline void
+gomp_simple_barrier_init (gomp_simple_barrier_t *bar, unsigned count)
+{
+ gomp_barrier_init (&bar->bar, count);
+}
+
+static inline void
+gomp_simple_barrier_reinit (gomp_simple_barrier_t *bar, unsigned count)
+{
+ gomp_barrier_reinit (&bar->bar, count);
+}
+
+static inline void
+gomp_simple_barrier_destroy (gomp_simple_barrier_t *bar)
+{
+ gomp_barrier_destroy (&bar->bar);
+}
+
+static inline void
+gomp_simple_barrier_wait (gomp_simple_barrier_t *bar)
+{
+ gomp_barrier_wait (&bar->bar);
+}
+
+static inline void
+gomp_simple_barrier_wait_last (gomp_simple_barrier_t *bar)
+{
+ gomp_barrier_wait_last (&bar->bar);
+}
+
+#endif /* GOMP_SIMPLE_BARRIER_H */
;;
nvptx*-*-*)
# NVPTX does not support Pthreads, has its own code replacement.
+ libgomp_use_pthreads=no
;;
*)
# Check to see if -pthread or -lpthread is needed. Prefer the former.
conftest$ac_exeext conftest.$ac_ext
esac
+if test x$libgomp_use_pthreads != xno; then
+
+$as_echo "#define LIBGOMP_USE_PTHREADS 1" >>confdefs.h
+
+fi
+
# Plugins for offload execution, configure.ac fragment. -*- mode: autoconf -*-
#
# Copyright (C) 2014-2016 Free Software Foundation, Inc.
;;
nvptx*-*-*)
# NVPTX does not support Pthreads, has its own code replacement.
+ libgomp_use_pthreads=no
;;
*)
# Check to see if -pthread or -lpthread is needed. Prefer the former.
[AC_MSG_ERROR([Pthreads are required to build libgomp])])])
esac
+if test x$libgomp_use_pthreads != xno; then
+ AC_DEFINE(LIBGOMP_USE_PTHREADS, 1,
+ [Define to 1 if libgomp should use POSIX threads.])
+fi
+
m4_include([plugin/configfrag.ac])
# Check for functions needed.
gomp_mutex_unlock (plock);
}
-/* This mutex is used when atomic operations don't exist for the target
- in the mode requested. The result is not globally atomic, but works so
- long as all parallel references are within #pragma omp atomic directives.
- According to responses received from omp@openmp.org, appears to be within
- spec. Which makes sense, since that's how several other compilers
- handle this situation as well. */
-
-static gomp_mutex_t atomic_lock;
-
-void
-GOMP_atomic_start (void)
-{
- gomp_mutex_lock (&atomic_lock);
-}
-
-void
-GOMP_atomic_end (void)
-{
- gomp_mutex_unlock (&atomic_lock);
-}
-
#if !GOMP_MUTEX_INIT_0
static void __attribute__((constructor))
initialize_critical (void)
{
gomp_mutex_init (&default_lock);
- gomp_mutex_init (&atomic_lock);
#ifndef HAVE_SYNC_BUILTINS
gomp_mutex_init (&create_lock_lock);
#endif
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-/* This file defines the OpenMP internal control variables, and arranges
- for them to be initialized from environment variables at startup. */
+/* This file arranges for OpenMP internal control variables to be initialized
+ from environment variables at startup. */
#include "libgomp.h"
#include "libgomp_f.h"
# define strtoull(ptr, eptr, base) strtoul (ptr, eptr, base)
#endif
-struct gomp_task_icv gomp_global_icv = {
- .nthreads_var = 1,
- .thread_limit_var = UINT_MAX,
- .run_sched_var = GFS_DYNAMIC,
- .run_sched_chunk_size = 1,
- .default_device_var = 0,
- .dyn_var = false,
- .nest_var = false,
- .bind_var = omp_proc_bind_false,
- .target_data = NULL
-};
-
-unsigned long gomp_max_active_levels_var = INT_MAX;
-bool gomp_cancel_var = false;
-int gomp_max_task_priority_var = 0;
-#ifndef HAVE_SYNC_BUILTINS
-gomp_mutex_t gomp_managed_threads_lock;
-#endif
-unsigned long gomp_available_cpus = 1, gomp_managed_threads = 1;
-unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var;
-unsigned long *gomp_nthreads_var_list, gomp_nthreads_var_list_len;
-char *gomp_bind_var_list;
-unsigned long gomp_bind_var_list_len;
-void **gomp_places_list;
-unsigned long gomp_places_list_len;
-int gomp_debug_var;
-char *goacc_device_type;
-int goacc_device_num;
-
/* Parse the OMP_SCHEDULE environment variable. */
static void
goacc_runtime_initialize ();
}
-
-\f
-/* The public OpenMP API routines that access these variables. */
-
-void
-omp_set_num_threads (int n)
-{
- struct gomp_task_icv *icv = gomp_icv (true);
- icv->nthreads_var = (n > 0 ? n : 1);
-}
-
-void
-omp_set_dynamic (int val)
-{
- struct gomp_task_icv *icv = gomp_icv (true);
- icv->dyn_var = val;
-}
-
-int
-omp_get_dynamic (void)
-{
- struct gomp_task_icv *icv = gomp_icv (false);
- return icv->dyn_var;
-}
-
-void
-omp_set_nested (int val)
-{
- struct gomp_task_icv *icv = gomp_icv (true);
- icv->nest_var = val;
-}
-
-int
-omp_get_nested (void)
-{
- struct gomp_task_icv *icv = gomp_icv (false);
- return icv->nest_var;
-}
-
-void
-omp_set_schedule (omp_sched_t kind, int chunk_size)
-{
- struct gomp_task_icv *icv = gomp_icv (true);
- switch (kind)
- {
- case omp_sched_static:
- if (chunk_size < 1)
- chunk_size = 0;
- icv->run_sched_chunk_size = chunk_size;
- break;
- case omp_sched_dynamic:
- case omp_sched_guided:
- if (chunk_size < 1)
- chunk_size = 1;
- icv->run_sched_chunk_size = chunk_size;
- break;
- case omp_sched_auto:
- break;
- default:
- return;
- }
- icv->run_sched_var = kind;
-}
-
-void
-omp_get_schedule (omp_sched_t *kind, int *chunk_size)
-{
- struct gomp_task_icv *icv = gomp_icv (false);
- *kind = icv->run_sched_var;
- *chunk_size = icv->run_sched_chunk_size;
-}
-
-int
-omp_get_max_threads (void)
-{
- struct gomp_task_icv *icv = gomp_icv (false);
- return icv->nthreads_var;
-}
-
-int
-omp_get_thread_limit (void)
-{
- struct gomp_task_icv *icv = gomp_icv (false);
- return icv->thread_limit_var > INT_MAX ? INT_MAX : icv->thread_limit_var;
-}
-
-void
-omp_set_max_active_levels (int max_levels)
-{
- if (max_levels >= 0)
- gomp_max_active_levels_var = max_levels;
-}
-
-int
-omp_get_max_active_levels (void)
-{
- return gomp_max_active_levels_var;
-}
-
-int
-omp_get_cancellation (void)
-{
- return gomp_cancel_var;
-}
-
-int
-omp_get_max_task_priority (void)
-{
- return gomp_max_task_priority_var;
-}
-
-omp_proc_bind_t
-omp_get_proc_bind (void)
-{
- struct gomp_task_icv *icv = gomp_icv (false);
- return icv->bind_var;
-}
-
-void
-omp_set_default_device (int device_num)
-{
- struct gomp_task_icv *icv = gomp_icv (true);
- icv->default_device_var = device_num >= 0 ? device_num : 0;
-}
-
-int
-omp_get_default_device (void)
-{
- struct gomp_task_icv *icv = gomp_icv (false);
- return icv->default_device_var;
-}
-
-int
-omp_get_num_devices (void)
-{
- return gomp_get_num_devices ();
-}
-
-int
-omp_get_num_teams (void)
-{
- /* Hardcoded to 1 on host, MIC, HSAIL? Maybe variable on PTX. */
- return 1;
-}
-
-int
-omp_get_team_num (void)
-{
- /* Hardcoded to 0 on host, MIC, HSAIL? Maybe variable on PTX. */
- return 0;
-}
-
-int
-omp_is_initial_device (void)
-{
- /* Hardcoded to 1 on host, should be 0 on MIC, HSAIL, PTX. */
- return 1;
-}
-
-int
-omp_get_initial_device (void)
-{
- return GOMP_DEVICE_HOST_FALLBACK;
-}
-
-int
-omp_get_num_places (void)
-{
- return gomp_places_list_len;
-}
-
-int
-omp_get_place_num (void)
-{
- if (gomp_places_list == NULL)
- return -1;
-
- struct gomp_thread *thr = gomp_thread ();
- if (thr->place == 0)
- gomp_init_affinity ();
-
- return (int) thr->place - 1;
-}
-
-int
-omp_get_partition_num_places (void)
-{
- if (gomp_places_list == NULL)
- return 0;
-
- struct gomp_thread *thr = gomp_thread ();
- if (thr->place == 0)
- gomp_init_affinity ();
-
- return thr->ts.place_partition_len;
-}
-
-void
-omp_get_partition_place_nums (int *place_nums)
-{
- if (gomp_places_list == NULL)
- return;
-
- struct gomp_thread *thr = gomp_thread ();
- if (thr->place == 0)
- gomp_init_affinity ();
-
- unsigned int i;
- for (i = 0; i < thr->ts.place_partition_len; i++)
- *place_nums++ = thr->ts.place_partition_off + i;
-}
-
-ialias (omp_set_dynamic)
-ialias (omp_set_nested)
-ialias (omp_set_num_threads)
-ialias (omp_get_dynamic)
-ialias (omp_get_nested)
-ialias (omp_set_schedule)
-ialias (omp_get_schedule)
-ialias (omp_get_max_threads)
-ialias (omp_get_thread_limit)
-ialias (omp_set_max_active_levels)
-ialias (omp_get_max_active_levels)
-ialias (omp_get_cancellation)
-ialias (omp_get_proc_bind)
-ialias (omp_set_default_device)
-ialias (omp_get_default_device)
-ialias (omp_get_num_devices)
-ialias (omp_get_num_teams)
-ialias (omp_get_team_num)
-ialias (omp_is_initial_device)
-ialias (omp_get_initial_device)
-ialias (omp_get_max_task_priority)
-ialias (omp_get_num_places)
-ialias (omp_get_place_num)
-ialias (omp_get_partition_num_places)
-ialias (omp_get_partition_place_nums)
--- /dev/null
+/* Copyright (C) 2005-2016 Free Software Foundation, Inc.
+ Contributed by Richard Henderson <rth@redhat.com>.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file defines OpenMP API entry points that accelerator targets are
+ expected to replace. */
+
+#include "libgomp.h"
+
+void
+omp_set_default_device (int device_num)
+{
+ struct gomp_task_icv *icv = gomp_icv (true);
+ icv->default_device_var = device_num >= 0 ? device_num : 0;
+}
+
+int
+omp_get_default_device (void)
+{
+ struct gomp_task_icv *icv = gomp_icv (false);
+ return icv->default_device_var;
+}
+
+int
+omp_get_num_devices (void)
+{
+ return gomp_get_num_devices ();
+}
+
+int
+omp_get_num_teams (void)
+{
+ /* Hardcoded to 1 on host, MIC, HSAIL? Maybe variable on PTX. */
+ return 1;
+}
+
+int
+omp_get_team_num (void)
+{
+ /* Hardcoded to 0 on host, MIC, HSAIL? Maybe variable on PTX. */
+ return 0;
+}
+
+int
+omp_is_initial_device (void)
+{
+ /* Hardcoded to 1 on host, should be 0 on MIC, HSAIL, PTX. */
+ return 1;
+}
+
+ialias (omp_set_default_device)
+ialias (omp_get_default_device)
+ialias (omp_get_num_devices)
+ialias (omp_get_num_teams)
+ialias (omp_get_team_num)
+ialias (omp_is_initial_device)
--- /dev/null
+/* Copyright (C) 2005-2016 Free Software Foundation, Inc.
+ Contributed by Richard Henderson <rth@redhat.com>.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file defines the OpenMP internal control variables and associated
+ OpenMP API entry points. */
+
+#include "libgomp.h"
+#include "gomp-constants.h"
+#include <limits.h>
+
+struct gomp_task_icv gomp_global_icv = {
+ .nthreads_var = 1,
+ .thread_limit_var = UINT_MAX,
+ .run_sched_var = GFS_DYNAMIC,
+ .run_sched_chunk_size = 1,
+ .default_device_var = 0,
+ .dyn_var = false,
+ .nest_var = false,
+ .bind_var = omp_proc_bind_false,
+ .target_data = NULL
+};
+
+unsigned long gomp_max_active_levels_var = INT_MAX;
+bool gomp_cancel_var = false;
+int gomp_max_task_priority_var = 0;
+#ifndef HAVE_SYNC_BUILTINS
+gomp_mutex_t gomp_managed_threads_lock;
+#endif
+unsigned long gomp_available_cpus = 1, gomp_managed_threads = 1;
+unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var;
+unsigned long *gomp_nthreads_var_list, gomp_nthreads_var_list_len;
+char *gomp_bind_var_list;
+unsigned long gomp_bind_var_list_len;
+void **gomp_places_list;
+unsigned long gomp_places_list_len;
+int gomp_debug_var;
+unsigned int gomp_num_teams_var;
+char *goacc_device_type;
+int goacc_device_num;
+
+void
+omp_set_num_threads (int n)
+{
+ struct gomp_task_icv *icv = gomp_icv (true);
+ icv->nthreads_var = (n > 0 ? n : 1);
+}
+
+void
+omp_set_dynamic (int val)
+{
+ struct gomp_task_icv *icv = gomp_icv (true);
+ icv->dyn_var = val;
+}
+
+int
+omp_get_dynamic (void)
+{
+ struct gomp_task_icv *icv = gomp_icv (false);
+ return icv->dyn_var;
+}
+
+void
+omp_set_nested (int val)
+{
+ struct gomp_task_icv *icv = gomp_icv (true);
+ icv->nest_var = val;
+}
+
+int
+omp_get_nested (void)
+{
+ struct gomp_task_icv *icv = gomp_icv (false);
+ return icv->nest_var;
+}
+
+void
+omp_set_schedule (omp_sched_t kind, int chunk_size)
+{
+ struct gomp_task_icv *icv = gomp_icv (true);
+ switch (kind)
+ {
+ case omp_sched_static:
+ if (chunk_size < 1)
+ chunk_size = 0;
+ icv->run_sched_chunk_size = chunk_size;
+ break;
+ case omp_sched_dynamic:
+ case omp_sched_guided:
+ if (chunk_size < 1)
+ chunk_size = 1;
+ icv->run_sched_chunk_size = chunk_size;
+ break;
+ case omp_sched_auto:
+ break;
+ default:
+ return;
+ }
+ icv->run_sched_var = kind;
+}
+
+void
+omp_get_schedule (omp_sched_t *kind, int *chunk_size)
+{
+ struct gomp_task_icv *icv = gomp_icv (false);
+ *kind = icv->run_sched_var;
+ *chunk_size = icv->run_sched_chunk_size;
+}
+
+int
+omp_get_max_threads (void)
+{
+ struct gomp_task_icv *icv = gomp_icv (false);
+ return icv->nthreads_var;
+}
+
+int
+omp_get_thread_limit (void)
+{
+ struct gomp_task_icv *icv = gomp_icv (false);
+ return icv->thread_limit_var > INT_MAX ? INT_MAX : icv->thread_limit_var;
+}
+
+void
+omp_set_max_active_levels (int max_levels)
+{
+ if (max_levels >= 0)
+ gomp_max_active_levels_var = max_levels;
+}
+
+int
+omp_get_max_active_levels (void)
+{
+ return gomp_max_active_levels_var;
+}
+
+int
+omp_get_cancellation (void)
+{
+ return gomp_cancel_var;
+}
+
+int
+omp_get_max_task_priority (void)
+{
+ return gomp_max_task_priority_var;
+}
+
+omp_proc_bind_t
+omp_get_proc_bind (void)
+{
+ struct gomp_task_icv *icv = gomp_icv (false);
+ return icv->bind_var;
+}
+
+int
+omp_get_initial_device (void)
+{
+ return GOMP_DEVICE_HOST_FALLBACK;
+}
+
+int
+omp_get_num_places (void)
+{
+ return gomp_places_list_len;
+}
+
+int
+omp_get_place_num (void)
+{
+ if (gomp_places_list == NULL)
+ return -1;
+
+ struct gomp_thread *thr = gomp_thread ();
+ if (thr->place == 0)
+ gomp_init_affinity ();
+
+ return (int) thr->place - 1;
+}
+
+int
+omp_get_partition_num_places (void)
+{
+ if (gomp_places_list == NULL)
+ return 0;
+
+ struct gomp_thread *thr = gomp_thread ();
+ if (thr->place == 0)
+ gomp_init_affinity ();
+
+ return thr->ts.place_partition_len;
+}
+
+void
+omp_get_partition_place_nums (int *place_nums)
+{
+ if (gomp_places_list == NULL)
+ return;
+
+ struct gomp_thread *thr = gomp_thread ();
+ if (thr->place == 0)
+ gomp_init_affinity ();
+
+ unsigned int i;
+ for (i = 0; i < thr->ts.place_partition_len; i++)
+ *place_nums++ = thr->ts.place_partition_off + i;
+}
+
+ialias (omp_set_dynamic)
+ialias (omp_set_nested)
+ialias (omp_set_num_threads)
+ialias (omp_get_dynamic)
+ialias (omp_get_nested)
+ialias (omp_set_schedule)
+ialias (omp_get_schedule)
+ialias (omp_get_max_threads)
+ialias (omp_get_thread_limit)
+ialias (omp_set_max_active_levels)
+ialias (omp_get_max_active_levels)
+ialias (omp_get_cancellation)
+ialias (omp_get_proc_bind)
+ialias (omp_get_initial_device)
+ialias (omp_get_max_task_priority)
+ialias (omp_get_num_places)
+ialias (omp_get_place_num)
+ialias (omp_get_partition_num_places)
+ialias (omp_get_partition_place_nums)
#include "gstdint.h"
#include "libgomp-plugin.h"
+#ifdef HAVE_PTHREAD_H
#include <pthread.h>
+#endif
#include <stdbool.h>
#include <stdlib.h>
#include <stdarg.h>
#include "sem.h"
#include "mutex.h"
#include "bar.h"
+#include "simple-bar.h"
#include "ptrlock.h"
extern unsigned long gomp_bind_var_list_len;
extern void **gomp_places_list;
extern unsigned long gomp_places_list_len;
+extern unsigned int gomp_num_teams_var;
extern int gomp_debug_var;
extern int goacc_device_num;
extern char *goacc_device_type;
/* Number of threads running in this contention group. */
unsigned long threads_busy;
- /* This barrier holds and releases threads waiting in threads. */
- gomp_barrier_t threads_dock;
+ /* This barrier holds and releases threads waiting in thread pools. */
+ gomp_simple_barrier_t threads_dock;
};
enum gomp_cancel_kind
/* ... and here is that TLS data. */
-#if defined HAVE_TLS || defined USE_EMUTLS
+#if defined __nvptx__
+extern struct gomp_thread *nvptx_thrs __attribute__((shared));
+static inline struct gomp_thread *gomp_thread (void)
+{
+ int tid;
+ asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
+ return nvptx_thrs + tid;
+}
+#elif defined HAVE_TLS || defined USE_EMUTLS
extern __thread struct gomp_thread gomp_tls_data;
static inline struct gomp_thread *gomp_thread (void)
{
return &gomp_global_icv;
}
+#ifdef LIBGOMP_USE_PTHREADS
/* The attributes to be used during thread creation. */
extern pthread_attr_t gomp_thread_attr;
extern pthread_key_t gomp_thread_destructor;
+#endif
/* Function prototypes. */
/* affinity.c */
extern void gomp_init_affinity (void);
+#ifdef LIBGOMP_USE_PTHREADS
extern void gomp_init_thread_affinity (pthread_attr_t *, unsigned int);
+#endif
extern void **gomp_affinity_alloc (unsigned long, bool);
extern void gomp_affinity_init_place (void *);
extern bool gomp_affinity_add_cpus (void *, unsigned long, unsigned long,
--- /dev/null
+/* Copyright (C) 2005-2016 Free Software Foundation, Inc.
+ Contributed by Richard Henderson <rth@redhat.com>.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This is a generic implementation of the public OpenMP locking primitives in
+ terms of internal gomp_mutex_t. It is not meant to be compiled on its own.
+ It is #include'd from config/{linux,nvptx}/lock.c. */
+
+#include <string.h>
+#include "libgomp.h"
+
+/* The internal gomp_mutex_t and the external non-recursive omp_lock_t
+ have the same form. Re-use it. */
+
+void
+gomp_init_lock_30 (omp_lock_t *lock)
+{
+ gomp_mutex_init (lock);
+}
+
+void
+gomp_destroy_lock_30 (omp_lock_t *lock)
+{
+ gomp_mutex_destroy (lock);
+}
+
+void
+gomp_set_lock_30 (omp_lock_t *lock)
+{
+ gomp_mutex_lock (lock);
+}
+
+void
+gomp_unset_lock_30 (omp_lock_t *lock)
+{
+ gomp_mutex_unlock (lock);
+}
+
+int
+gomp_test_lock_30 (omp_lock_t *lock)
+{
+ int oldval = 0;
+
+ return __atomic_compare_exchange_n (lock, &oldval, 1, false,
+ MEMMODEL_ACQUIRE, MEMMODEL_RELAXED);
+}
+
+void
+gomp_init_nest_lock_30 (omp_nest_lock_t *lock)
+{
+ memset (lock, '\0', sizeof (*lock));
+}
+
+void
+gomp_destroy_nest_lock_30 (omp_nest_lock_t *lock)
+{
+}
+
+void
+gomp_set_nest_lock_30 (omp_nest_lock_t *lock)
+{
+ void *me = gomp_icv (true);
+
+ if (lock->owner != me)
+ {
+ gomp_mutex_lock (&lock->lock);
+ lock->owner = me;
+ }
+
+ lock->count++;
+}
+
+void
+gomp_unset_nest_lock_30 (omp_nest_lock_t *lock)
+{
+ if (--lock->count == 0)
+ {
+ lock->owner = NULL;
+ gomp_mutex_unlock (&lock->lock);
+ }
+}
+
+int
+gomp_test_nest_lock_30 (omp_nest_lock_t *lock)
+{
+ void *me = gomp_icv (true);
+ int oldval;
+
+ if (lock->owner == me)
+ return ++lock->count;
+
+ oldval = 0;
+ if (__atomic_compare_exchange_n (&lock->lock, &oldval, 1, false,
+ MEMMODEL_ACQUIRE, MEMMODEL_RELAXED))
+ {
+ lock->owner = me;
+ lock->count = 1;
+ return 1;
+ }
+
+ return 0;
+}
#include <cuda.h>
#include <stdbool.h>
#include <stdint.h>
+#include <limits.h>
#include <string.h>
#include <stdio.h>
#include <unistd.h>
{
CUfunction fn;
const struct targ_fn_launch *launch;
+ int regs_per_thread;
+ int max_threads_per_block;
};
/* A loaded PTX image. */
bool overlap;
bool map;
bool concur;
- int mode;
bool mkern;
+ int mode;
+ int clock_khz;
+ int num_sms;
+ int regs_per_block;
+ int regs_per_sm;
struct ptx_image_data *images; /* Images loaded on device. */
pthread_mutex_t image_lock; /* Lock for above list. */
&pi, CU_DEVICE_ATTRIBUTE_INTEGRATED, dev);
ptx_dev->mkern = pi;
+ CUDA_CALL_ERET (NULL, cuDeviceGetAttribute,
+ &pi, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ ptx_dev->clock_khz = pi;
+
+ CUDA_CALL_ERET (NULL, cuDeviceGetAttribute,
+ &pi, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, dev);
+ ptx_dev->num_sms = pi;
+
+ CUDA_CALL_ERET (NULL, cuDeviceGetAttribute,
+ &pi, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, dev);
+ ptx_dev->regs_per_block = pi;
+
+ /* CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82 is defined only
+ in CUDA 6.0 and newer. */
+ r = cuDeviceGetAttribute (&pi, 82, dev);
+ /* Fallback: use limit of registers per block, which is usually equal. */
+ if (r == CUDA_ERROR_INVALID_VALUE)
+ pi = ptx_dev->regs_per_block;
+ else if (r != CUDA_SUCCESS)
+ {
+ GOMP_PLUGIN_error ("cuDeviceGetAttribute error: %s", cuda_error (r));
+ return NULL;
+ }
+ ptx_dev->regs_per_sm = pi;
+
+ CUDA_CALL_ERET (NULL, cuDeviceGetAttribute,
+ &pi, CU_DEVICE_ATTRIBUTE_WARP_SIZE, dev);
+ if (pi != 32)
+ {
+ GOMP_PLUGIN_error ("Only warp size 32 is supported");
+ return NULL;
+ }
+
r = cuDeviceGetAttribute (&async_engines,
CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT, dev);
if (r != CUDA_SUCCESS)
CUjit_option opts[6];
void *optvals[6];
float elapsed = 0.0;
-#define LOGSIZE 8192
- char elog[LOGSIZE];
- char ilog[LOGSIZE];
- unsigned long logsize = LOGSIZE;
+ char elog[1024];
+ char ilog[16384];
CUlinkState linkstate;
CUresult r;
void *linkout;
optvals[1] = &ilog[0];
opts[2] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
- optvals[2] = (void *) logsize;
+ optvals[2] = (void *) sizeof ilog;
opts[3] = CU_JIT_ERROR_LOG_BUFFER;
optvals[3] = &elog[0];
opts[4] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
- optvals[4] = (void *) logsize;
+ optvals[4] = (void *) sizeof elog;
opts[5] = CU_JIT_LOG_VERBOSE;
optvals[5] = (void *) 1;
}
#ifndef DISABLE_ASYNC
- if (nvthd->current_stream != nvthd->ptx_dev->null_stream)
+ if (nvthd && nvthd->current_stream != nvthd->ptx_dev->null_stream)
{
CUevent *e = (CUevent *)GOMP_PLUGIN_malloc (sizeof (CUevent));
CUDA_CALL (cuEventCreate, e, CU_EVENT_DISABLE_TIMING);
}
#ifndef DISABLE_ASYNC
- if (nvthd->current_stream != nvthd->ptx_dev->null_stream)
+ if (nvthd && nvthd->current_stream != nvthd->ptx_dev->null_stream)
{
CUevent *e = (CUevent *) GOMP_PLUGIN_malloc (sizeof (CUevent));
CUDA_CALL (cuEventCreate, e, CU_EVENT_DISABLE_TIMING);
unsigned int
GOMP_OFFLOAD_get_caps (void)
{
- return GOMP_OFFLOAD_CAP_OPENACC_200;
+ return GOMP_OFFLOAD_CAP_OPENACC_200 | GOMP_OFFLOAD_CAP_OPENMP_400;
}
int
return GOMP_VERSION;
}
+/* Initialize __nvptx_clocktick, if present in MODULE. */
+
+static void
+nvptx_set_clocktick (CUmodule module, struct ptx_device *dev)
+{
+ CUdeviceptr dptr;
+ CUresult r = cuModuleGetGlobal (&dptr, NULL, module, "__nvptx_clocktick");
+ if (r == CUDA_ERROR_NOT_FOUND)
+ return;
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuModuleGetGlobal error: %s", cuda_error (r));
+ double __nvptx_clocktick = 1e-3 / dev->clock_khz;
+ r = cuMemcpyHtoD (dptr, &__nvptx_clocktick, sizeof (__nvptx_clocktick));
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuMemcpyHtoD error: %s", cuda_error (r));
+}
+
/* Load the (partial) program described by TARGET_DATA to device
number ORD. Allocate and return TARGET_TABLE. */
for (i = 0; i < fn_entries; i++, targ_fns++, targ_tbl++)
{
CUfunction function;
+ int nregs, mthrs;
CUDA_CALL_ERET (-1, cuModuleGetFunction, &function, module,
fn_descs[i].fn);
+ CUDA_CALL_ERET (-1, cuFuncGetAttribute, &nregs,
+ CU_FUNC_ATTRIBUTE_NUM_REGS, function);
+ CUDA_CALL_ERET (-1, cuFuncGetAttribute, &mthrs,
+ CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, function);
targ_fns->fn = function;
targ_fns->launch = &fn_descs[i];
+ targ_fns->regs_per_thread = nregs;
+ targ_fns->max_threads_per_block = mthrs;
targ_tbl->start = (uintptr_t) targ_fns;
targ_tbl->end = targ_tbl->start + 1;
targ_tbl->end = targ_tbl->start + bytes;
}
+ nvptx_set_clocktick (module, dev);
+
return fn_entries + var_entries;
}
&& nvptx_host2dev (dst, src, n));
}
+bool
+GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n)
+{
+ struct ptx_device *ptx_dev = ptx_devices[ord];
+ CUDA_CALL (cuMemcpyDtoDAsync, (CUdeviceptr) dst, (CUdeviceptr) src, n,
+ ptx_dev->null_stream->stream);
+ return true;
+}
+
void (*device_run) (int n, void *fn_ptr, void *vars) = NULL;
void
{
return nvptx_set_cuda_stream (async, stream);
}
+
+/* Adjust launch dimensions: pick good values for number of blocks and warps
+ and ensure that number of warps does not exceed CUDA limits as well as GCC's
+ own limits. */
+
+static void
+nvptx_adjust_launch_bounds (struct targ_fn_descriptor *fn,
+ struct ptx_device *ptx_dev,
+ int *teams_p, int *threads_p)
+{
+ int max_warps_block = fn->max_threads_per_block / 32;
+ /* Maximum 32 warps per block is an implementation limit in NVPTX backend
+ and libgcc, which matches documented limit of all GPUs as of 2015. */
+ if (max_warps_block > 32)
+ max_warps_block = 32;
+ if (*threads_p <= 0)
+ *threads_p = 8;
+ if (*threads_p > max_warps_block)
+ *threads_p = max_warps_block;
+
+ int regs_per_block = fn->regs_per_thread * 32 * *threads_p;
+ /* This is an estimate of how many blocks the device can host simultaneously.
+ Actual limit, which may be lower, can be queried with "occupancy control"
+ driver interface (since CUDA 6.0). */
+ int max_blocks = ptx_dev->regs_per_sm / regs_per_block * ptx_dev->num_sms;
+ if (*teams_p <= 0 || *teams_p > max_blocks)
+ *teams_p = max_blocks;
+}
+
+/* Return the size of per-warp stacks (see gcc -msoft-stack) to use for OpenMP
+ target regions. */
+
+static size_t
+nvptx_stacks_size ()
+{
+ return 128 * 1024;
+}
+
+/* Return contiguous storage for NUM stacks, each SIZE bytes. */
+
+static void *
+nvptx_stacks_alloc (size_t size, int num)
+{
+ CUdeviceptr stacks;
+ CUresult r = cuMemAlloc (&stacks, size * num);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuMemAlloc error: %s", cuda_error (r));
+ return (void *) stacks;
+}
+
+/* Release storage previously allocated by nvptx_stacks_alloc. */
+
+static void
+nvptx_stacks_free (void *p, int num)
+{
+ CUresult r = cuMemFree ((CUdeviceptr) p);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuMemFree error: %s", cuda_error (r));
+}
+
+void
+GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args)
+{
+ CUfunction function = ((struct targ_fn_descriptor *) tgt_fn)->fn;
+ CUresult r;
+ struct ptx_device *ptx_dev = ptx_devices[ord];
+ const char *maybe_abort_msg = "(perhaps abort was called)";
+ int teams = 0, threads = 0;
+
+ if (!args)
+ GOMP_PLUGIN_fatal ("No target arguments provided");
+ while (*args)
+ {
+ intptr_t id = (intptr_t) *args++, val;
+ if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
+ val = (intptr_t) *args++;
+ else
+ val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
+ if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
+ continue;
+ val = val > INT_MAX ? INT_MAX : val;
+ id &= GOMP_TARGET_ARG_ID_MASK;
+ if (id == GOMP_TARGET_ARG_NUM_TEAMS)
+ teams = val;
+ else if (id == GOMP_TARGET_ARG_THREAD_LIMIT)
+ threads = val;
+ }
+ nvptx_adjust_launch_bounds (tgt_fn, ptx_dev, &teams, &threads);
+
+ size_t stack_size = nvptx_stacks_size ();
+ void *stacks = nvptx_stacks_alloc (stack_size, teams * threads);
+ void *fn_args[] = {tgt_vars, stacks, (void *) stack_size};
+ size_t fn_args_size = sizeof fn_args;
+ void *config[] = {
+ CU_LAUNCH_PARAM_BUFFER_POINTER, fn_args,
+ CU_LAUNCH_PARAM_BUFFER_SIZE, &fn_args_size,
+ CU_LAUNCH_PARAM_END
+ };
+ r = cuLaunchKernel (function,
+ teams, 1, 1,
+ 32, threads, 1,
+ 0, ptx_dev->null_stream->stream, NULL, config);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuLaunchKernel error: %s", cuda_error (r));
+
+ r = cuCtxSynchronize ();
+ if (r == CUDA_ERROR_LAUNCH_FAILED)
+ GOMP_PLUGIN_fatal ("cuCtxSynchronize error: %s %s\n", cuda_error (r),
+ maybe_abort_msg);
+ else if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuCtxSynchronize error: %s", cuda_error (r));
+ nvptx_stacks_free (stacks, teams * threads);
+}
+
+void
+GOMP_OFFLOAD_async_run (int ord, void *tgt_fn, void *tgt_vars, void **args,
+ void *async_data)
+{
+ GOMP_PLUGIN_fatal ("GOMP_OFFLOAD_async_run unimplemented");
+}
#include <stdlib.h>
#include <string.h>
+#ifdef LIBGOMP_USE_PTHREADS
/* This attribute contains PTHREAD_CREATE_DETACHED. */
pthread_attr_t gomp_thread_attr;
{
pool->threads[thr->ts.team_id] = thr;
- gomp_barrier_wait (&pool->threads_dock);
+ gomp_simple_barrier_wait (&pool->threads_dock);
do
{
struct gomp_team *team = thr->ts.team;
gomp_team_barrier_wait_final (&team->barrier);
gomp_finish_task (task);
- gomp_barrier_wait (&pool->threads_dock);
+ gomp_simple_barrier_wait (&pool->threads_dock);
local_fn = thr->fn;
local_data = thr->data;
thr->task = NULL;
return NULL;
}
+#endif
static inline struct gomp_team *
get_last_team (unsigned nthreads)
struct gomp_thread *thr = gomp_thread ();
struct gomp_thread_pool *pool
= (struct gomp_thread_pool *) thread_pool;
- gomp_barrier_wait_last (&pool->threads_dock);
+ gomp_simple_barrier_wait_last (&pool->threads_dock);
gomp_sem_destroy (&thr->release);
thr->thread_pool = NULL;
thr->task = NULL;
+#ifdef LIBGOMP_USE_PTHREADS
pthread_exit (NULL);
+#elif defined(__nvptx__)
+ asm ("exit;");
+#else
+#error gomp_free_pool_helper must terminate the thread
+#endif
}
/* Free a thread pool and release its threads. */
nthr->data = pool;
}
/* This barrier undocks threads docked on pool->threads_dock. */
- gomp_barrier_wait (&pool->threads_dock);
+ gomp_simple_barrier_wait (&pool->threads_dock);
/* And this waits till all threads have called gomp_barrier_wait_last
in gomp_free_pool_helper. */
- gomp_barrier_wait (&pool->threads_dock);
+ gomp_simple_barrier_wait (&pool->threads_dock);
/* Now it is safe to destroy the barrier and free the pool. */
- gomp_barrier_destroy (&pool->threads_dock);
+ gomp_simple_barrier_destroy (&pool->threads_dock);
#ifdef HAVE_SYNC_BUILTINS
__sync_fetch_and_add (&gomp_managed_threads,
gomp_mutex_unlock (&gomp_managed_threads_lock);
#endif
}
- free (pool->threads);
if (pool->last_team)
free_team (pool->last_team);
+#ifndef __nvptx__
+ free (pool->threads);
free (pool);
+#endif
thr->thread_pool = NULL;
}
if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0))
/* Launch a team. */
+#ifdef LIBGOMP_USE_PTHREADS
void
gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
unsigned flags, struct gomp_team *team)
else if (old_threads_used == 0)
{
n = 0;
- gomp_barrier_init (&pool->threads_dock, nthreads);
+ gomp_simple_barrier_init (&pool->threads_dock, nthreads);
}
else
{
/* Increase the barrier threshold to make sure all new
threads arrive before the team is released. */
- gomp_barrier_reinit (&pool->threads_dock, nthreads);
+ gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
}
/* Not true yet, but soon will be. We're going to release all
threads and all the threads we're going to let die
arrive before the team is released. */
if (affinity_count)
- gomp_barrier_reinit (&pool->threads_dock,
- nthreads + affinity_count);
+ gomp_simple_barrier_reinit (&pool->threads_dock,
+ nthreads + affinity_count);
}
}
pthread_attr_destroy (&thread_attr);
do_release:
- gomp_barrier_wait (nested ? &team->barrier : &pool->threads_dock);
+ if (nested)
+ gomp_barrier_wait (&team->barrier);
+ else
+ gomp_simple_barrier_wait (&pool->threads_dock);
/* Decrease the barrier threshold to match the number of threads
that should arrive back at the end of this team. The extra
if (affinity_count)
diff = -affinity_count;
- gomp_barrier_reinit (&pool->threads_dock, nthreads);
+ gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
#ifdef HAVE_SYNC_BUILTINS
__sync_fetch_and_add (&gomp_managed_threads, diff);
&& team->prev_ts.place_partition_len > 64)
free (affinity_thr);
}
+#endif
/* Terminate the current team. This is only to be called by the master
}
}
+#ifdef LIBGOMP_USE_PTHREADS
/* Constructors for this file. */
crashes. */
pthread_key_delete (gomp_thread_destructor);
}
+#endif
struct gomp_task_icv *
gomp_new_icv (void)
struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
gomp_init_task (task, NULL, &gomp_global_icv);
thr->task = task;
+#ifdef LIBGOMP_USE_PTHREADS
pthread_setspecific (gomp_thread_destructor, thr);
+#endif
return &task->icv;
}
set shlib_ext [get_shlib_extension]
set lang_library_path "../libgfortran/.libs"
-set lang_link_flags "-lgfortran"
+set lang_link_flags "-lgfortran -foffload=-lgfortran"
if [info exists lang_include_flags] then {
unset lang_include_flags
}
set shlib_ext [get_shlib_extension]
set lang_library_path "../libgfortran/.libs"
-set lang_link_flags "-lgfortran"
+set lang_link_flags "-lgfortran -foffload=-lgfortran"
if [info exists lang_include_flags] then {
unset lang_include_flags
}