+2019-11-13 Andrew Stubbs <ams@codesourcery.com>
+ Kwok Cheung Yeung <kcy@codesourcery.com>
+ Julian Brown <julian@codesourcery.com>
+ Tom de Vries <tom@codesourcery.com>
+
+ * gomp-constants.h (GOMP_DEVICE_GCN): Define.
+ (GOMP_VERSION_GCN): Define.
+
2019-08-08 Martin Liska <mliska@suse.cz>
PR bootstrap/91352
#define GOMP_DEVICE_NVIDIA_PTX 5
#define GOMP_DEVICE_INTEL_MIC 6
#define GOMP_DEVICE_HSA 7
+#define GOMP_DEVICE_GCN 8
#define GOMP_DEVICE_ICV -1
#define GOMP_DEVICE_HOST_FALLBACK -2
#define GOMP_VERSION_NVIDIA_PTX 1
#define GOMP_VERSION_INTEL_MIC 0
#define GOMP_VERSION_HSA 0
+#define GOMP_VERSION_GCN 1
#define GOMP_VERSION_PACK(LIB, DEV) (((LIB) << 16) | (DEV))
#define GOMP_VERSION_LIB(PACK) (((PACK) >> 16) & 0xffff)
+2019-11-13 Andrew Stubbs <ams@codesourcery.com>
+ Kwok Cheung Yeung <kcy@codesourcery.com>
+ Julian Brown <julian@codesourcery.com>
+ Tom de Vries <tom@codesourcery.com>
+
+ * Makefile.am (libgomp_la_SOURCES): Add oacc-target.c.
+ * Makefile.in: Regenerate.
+ * config.h.in (PLUGIN_GCN): Add new undef.
+ * config/accel/openacc.f90 (acc_device_gcn): New parameter.
+ * config/gcn/affinity-fmt.c: New file.
+ * config/gcn/bar.c: New file.
+ * config/gcn/bar.h: New file.
+ * config/gcn/doacross.h: New file.
+ * config/gcn/icv-device.c: New file.
+ * config/gcn/oacc-target.c: New file.
+ * config/gcn/simple-bar.h: New file.
+ * config/gcn/target.c: New file.
+ * config/gcn/task.c: New file.
+ * config/gcn/team.c: New file.
+ * config/gcn/time.c: New file.
+ * configure.ac: Add amdgcn*-*-*.
+ * configure: Regenerate.
+ * configure.tgt: Add amdgcn*-*-*.
+ * libgomp-plugin.h (offload_target_type): Add OFFLOAD_TARGET_TYPE_GCN.
+ * libgomp.h (gcn_thrs): Add amdgcn variant.
+ (set_gcn_thrs): Likewise.
+ (gomp_thread): Likewise.
+ * oacc-int.h (goacc_thread): Likewise.
+ * oacc-target.c: New file.
+ * openacc.f90 (acc_device_gcn): New parameter.
+ * openacc.h (acc_device_t): Add acc_device_gcn.
+ * team.c (gomp_free_pool_helper): Add amdgcn support.
+
2019-11-13 Andrew Stubbs <ams@codesourcery.com>
Julian Brown <julian@codesourcery.com>
proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c target.c \
splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c oacc-init.c \
oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
- affinity-fmt.c teams.c oacc-profiling.c
+ affinity-fmt.c teams.c oacc-profiling.c oacc-target.c
include $(top_srcdir)/plugin/Makefrag.am
target.lo splay-tree.lo libgomp-plugin.lo oacc-parallel.lo \
oacc-host.lo oacc-init.lo oacc-mem.lo oacc-async.lo \
oacc-plugin.lo oacc-cuda.lo priority_queue.lo affinity-fmt.lo \
- teams.lo oacc-profiling.lo $(am__objects_1)
+ teams.lo oacc-profiling.lo oacc-target.lo $(am__objects_1)
libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
affinity.c target.c splay-tree.c libgomp-plugin.c \
oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
- affinity-fmt.c teams.c oacc-profiling.c $(am__append_3)
+ affinity-fmt.c teams.c oacc-profiling.c oacc-target.c \
+ $(am__append_3)
# Nvidia PTX OpenACC plugin.
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-parallel.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-profiling.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-target.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/priority_queue.Plo@am__quote@
/* Define to the version of this package. */
#undef PACKAGE_VERSION
+/* Define to 1 if the GCN plugin is built, 0 if not. */
+#undef PLUGIN_GCN
+
/* Define to 1 if the HSA plugin is built, 0 if not. */
#undef PLUGIN_HSA
! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed.
integer (acc_device_kind), parameter :: acc_device_not_host = 4
integer (acc_device_kind), parameter :: acc_device_nvidia = 5
+ integer (acc_device_kind), parameter :: acc_device_gcn = 8
end module
--- /dev/null
+/* Copyright (C) 2018-2019 Free Software Foundation, Inc.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "libgomp.h"
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifdef HAVE_INTTYPES_H
+# include <inttypes.h> /* For PRIx64. */
+#endif
+#ifdef HAVE_UNAME
+#include <sys/utsname.h>
+#endif
+
+/* The HAVE_GETPID and HAVE_GETHOSTNAME configure tests are passing for nvptx,
+ while the nvptx newlib implementation does not support those functions.
+ Override the configure test results here. */
+#undef HAVE_GETPID
+#undef HAVE_GETHOSTNAME
+
+/* The GCN newlib implementation does not support fwrite, but it does support
+ write. Map fwrite to write. */
+#undef fwrite
+#define fwrite(ptr, size, nmemb, stream) write (1, (ptr), (nmemb) * (size))
+
+#include "../../affinity-fmt.c"
+
--- /dev/null
+/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This is an AMD GCN specific implementation of a barrier synchronization
+ mechanism for libgomp. This type is private to the library. This
+ implementation uses atomic instructions and s_barrier instruction. It
+ uses MEMMODEL_RELAXED here because barriers are within workgroups and
+ therefore don't need to flush caches. */
+
+#include <limits.h>
+#include "libgomp.h"
+
+
+void
+gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
+{
+ if (__builtin_expect (state & BAR_WAS_LAST, 0))
+ {
+ /* Next time we'll be awaiting TOTAL threads again. */
+ bar->awaited = bar->total;
+ __atomic_store_n (&bar->generation, bar->generation + BAR_INCR,
+ MEMMODEL_RELAXED);
+ }
+ asm ("s_barrier" ::: "memory");
+}
+
+void
+gomp_barrier_wait (gomp_barrier_t *bar)
+{
+ gomp_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
+}
+
+/* Like gomp_barrier_wait, except that if the encountering thread
+ is not the last one to hit the barrier, it returns immediately.
+ The intended usage is that a thread which intends to gomp_barrier_destroy
+ this barrier calls gomp_barrier_wait, while all other threads
+ call gomp_barrier_wait_last. When gomp_barrier_wait returns,
+ the barrier can be safely destroyed. */
+
+void
+gomp_barrier_wait_last (gomp_barrier_t *bar)
+{
+ /* Deferring to gomp_barrier_wait does not use the optimization opportunity
+ allowed by the interface contract for all-but-last participants. The
+ original implementation in config/linux/bar.c handles this better. */
+ gomp_barrier_wait (bar);
+}
+
+void
+gomp_team_barrier_wake (gomp_barrier_t *bar, int count)
+{
+ asm ("s_barrier" ::: "memory");
+}
+
+void
+gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
+{
+ unsigned int generation, gen;
+
+ if (__builtin_expect (state & BAR_WAS_LAST, 0))
+ {
+ /* Next time we'll be awaiting TOTAL threads again. */
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+
+ bar->awaited = bar->total;
+ team->work_share_cancelled = 0;
+ if (__builtin_expect (team->task_count, 0))
+ {
+ gomp_barrier_handle_tasks (state);
+ state &= ~BAR_WAS_LAST;
+ }
+ else
+ {
+ state &= ~BAR_CANCELLED;
+ state += BAR_INCR - BAR_WAS_LAST;
+ __atomic_store_n (&bar->generation, state, MEMMODEL_RELAXED);
+ asm ("s_barrier" ::: "memory");
+ return;
+ }
+ }
+
+ generation = state;
+ state &= ~BAR_CANCELLED;
+ int retry = 100;
+ do
+ {
+ if (retry-- == 0)
+ {
+ /* It really shouldn't happen that barriers get out of sync, but
+ if they do then this will loop until they realign, so we need
+ to avoid an infinite loop where the thread just isn't there. */
+ const char msg[] = ("Barrier sync failed (another thread died?);"
+ " aborting.");
+ write (2, msg, sizeof (msg)-1);
+ abort();
+ }
+
+ asm ("s_barrier" ::: "memory");
+ gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
+ if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
+ {
+ gomp_barrier_handle_tasks (state);
+ gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
+ }
+ generation |= gen & BAR_WAITING_FOR_TASK;
+ }
+ while (gen != state + BAR_INCR);
+}
+
+void
+gomp_team_barrier_wait (gomp_barrier_t *bar)
+{
+ gomp_team_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
+}
+
+void
+gomp_team_barrier_wait_final (gomp_barrier_t *bar)
+{
+ gomp_barrier_state_t state = gomp_barrier_wait_final_start (bar);
+ if (__builtin_expect (state & BAR_WAS_LAST, 0))
+ bar->awaited_final = bar->total;
+ gomp_team_barrier_wait_end (bar, state);
+}
+
+bool
+gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
+ gomp_barrier_state_t state)
+{
+ unsigned int generation, gen;
+
+ if (__builtin_expect (state & BAR_WAS_LAST, 0))
+ {
+ /* Next time we'll be awaiting TOTAL threads again. */
+ /* BAR_CANCELLED should never be set in state here, because
+ cancellation means that at least one of the threads has been
+ cancelled, thus on a cancellable barrier we should never see
+ all threads to arrive. */
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+
+ bar->awaited = bar->total;
+ team->work_share_cancelled = 0;
+ if (__builtin_expect (team->task_count, 0))
+ {
+ gomp_barrier_handle_tasks (state);
+ state &= ~BAR_WAS_LAST;
+ }
+ else
+ {
+ state += BAR_INCR - BAR_WAS_LAST;
+ __atomic_store_n (&bar->generation, state, MEMMODEL_RELAXED);
+ asm ("s_barrier" ::: "memory");
+ return false;
+ }
+ }
+
+ if (__builtin_expect (state & BAR_CANCELLED, 0))
+ return true;
+
+ generation = state;
+ int retry = 100;
+ do
+ {
+ if (retry-- == 0)
+ {
+ /* It really shouldn't happen that barriers get out of sync, but
+ if they do then this will loop until they realign, so we need
+ to avoid an infinite loop where the thread just isn't there. */
+ const char msg[] = ("Barrier sync failed (another thread died?);"
+ " aborting.");
+ write (2, msg, sizeof (msg)-1);
+ abort();
+ }
+
+ asm ("s_barrier" ::: "memory");
+ gen = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
+ if (__builtin_expect (gen & BAR_CANCELLED, 0))
+ return true;
+ if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
+ {
+ gomp_barrier_handle_tasks (state);
+ gen = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
+ }
+ generation |= gen & BAR_WAITING_FOR_TASK;
+ }
+ while (gen != state + BAR_INCR);
+
+ return false;
+}
+
+bool
+gomp_team_barrier_wait_cancel (gomp_barrier_t *bar)
+{
+ return gomp_team_barrier_wait_cancel_end (bar, gomp_barrier_wait_start (bar));
+}
+
+void
+gomp_team_barrier_cancel (struct gomp_team *team)
+{
+ gomp_mutex_lock (&team->task_lock);
+ if (team->barrier.generation & BAR_CANCELLED)
+ {
+ gomp_mutex_unlock (&team->task_lock);
+ return;
+ }
+ team->barrier.generation |= BAR_CANCELLED;
+ gomp_mutex_unlock (&team->task_lock);
+ gomp_team_barrier_wake (&team->barrier, INT_MAX);
+}
--- /dev/null
+/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This is an AMD GCN specific implementation of a barrier synchronization
+ mechanism for libgomp. This type is private to the library. This
+ implementation uses atomic instructions and s_barrier instruction. It
+ uses MEMMODEL_RELAXED here because barriers are within workgroups and
+ therefore don't need to flush caches. */
+
+#ifndef GOMP_BARRIER_H
+#define GOMP_BARRIER_H 1
+
+#include "mutex.h"
+
+typedef struct
+{
+ unsigned total;
+ unsigned generation;
+ unsigned awaited;
+ unsigned awaited_final;
+} gomp_barrier_t;
+
+typedef unsigned int gomp_barrier_state_t;
+
+/* The generation field contains a counter in the high bits, with a few
+ low bits dedicated to flags. Note that TASK_PENDING and WAS_LAST can
+ share space because WAS_LAST is never stored back to generation. */
+#define BAR_TASK_PENDING 1
+#define BAR_WAS_LAST 1
+#define BAR_WAITING_FOR_TASK 2
+#define BAR_CANCELLED 4
+#define BAR_INCR 8
+
+static inline void gomp_barrier_init (gomp_barrier_t *bar, unsigned count)
+{
+ bar->total = count;
+ bar->awaited = count;
+ bar->awaited_final = count;
+ bar->generation = 0;
+}
+
+static inline void gomp_barrier_reinit (gomp_barrier_t *bar, unsigned count)
+{
+ __atomic_add_fetch (&bar->awaited, count - bar->total, MEMMODEL_RELAXED);
+ bar->total = count;
+}
+
+static inline void gomp_barrier_destroy (gomp_barrier_t *bar)
+{
+}
+
+extern void gomp_barrier_wait (gomp_barrier_t *);
+extern void gomp_barrier_wait_last (gomp_barrier_t *);
+extern void gomp_barrier_wait_end (gomp_barrier_t *, gomp_barrier_state_t);
+extern void gomp_team_barrier_wait (gomp_barrier_t *);
+extern void gomp_team_barrier_wait_final (gomp_barrier_t *);
+extern void gomp_team_barrier_wait_end (gomp_barrier_t *,
+ gomp_barrier_state_t);
+extern bool gomp_team_barrier_wait_cancel (gomp_barrier_t *);
+extern bool gomp_team_barrier_wait_cancel_end (gomp_barrier_t *,
+ gomp_barrier_state_t);
+extern void gomp_team_barrier_wake (gomp_barrier_t *, int);
+struct gomp_team;
+extern void gomp_team_barrier_cancel (struct gomp_team *);
+
+static inline gomp_barrier_state_t
+gomp_barrier_wait_start (gomp_barrier_t *bar)
+{
+ unsigned int ret = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
+ ret &= -BAR_INCR | BAR_CANCELLED;
+ /* A memory barrier is needed before exiting from the various forms
+ of gomp_barrier_wait, to satisfy OpenMP API version 3.1 section
+ 2.8.6 flush Construct, which says there is an implicit flush during
+ a barrier region. This is a convenient place to add the barrier,
+ so we use MEMMODEL_ACQ_REL here rather than MEMMODEL_ACQUIRE. */
+ if (__atomic_add_fetch (&bar->awaited, -1, MEMMODEL_RELAXED) == 0)
+ ret |= BAR_WAS_LAST;
+ return ret;
+}
+
+static inline gomp_barrier_state_t
+gomp_barrier_wait_cancel_start (gomp_barrier_t *bar)
+{
+ return gomp_barrier_wait_start (bar);
+}
+
+/* This is like gomp_barrier_wait_start, except it decrements
+ bar->awaited_final rather than bar->awaited and should be used
+ for the gomp_team_end barrier only. */
+static inline gomp_barrier_state_t
+gomp_barrier_wait_final_start (gomp_barrier_t *bar)
+{
+ unsigned int ret = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
+ ret &= -BAR_INCR | BAR_CANCELLED;
+ /* See above gomp_barrier_wait_start comment. */
+ if (__atomic_add_fetch (&bar->awaited_final, -1, MEMMODEL_RELAXED) == 0)
+ ret |= BAR_WAS_LAST;
+ return ret;
+}
+
+static inline bool
+gomp_barrier_last_thread (gomp_barrier_state_t state)
+{
+ return state & BAR_WAS_LAST;
+}
+
+/* All the inlines below must be called with team->task_lock
+ held. */
+
+static inline void
+gomp_team_barrier_set_task_pending (gomp_barrier_t *bar)
+{
+ bar->generation |= BAR_TASK_PENDING;
+}
+
+static inline void
+gomp_team_barrier_clear_task_pending (gomp_barrier_t *bar)
+{
+ bar->generation &= ~BAR_TASK_PENDING;
+}
+
+static inline void
+gomp_team_barrier_set_waiting_for_tasks (gomp_barrier_t *bar)
+{
+ bar->generation |= BAR_WAITING_FOR_TASK;
+}
+
+static inline bool
+gomp_team_barrier_waiting_for_tasks (gomp_barrier_t *bar)
+{
+ return (bar->generation & BAR_WAITING_FOR_TASK) != 0;
+}
+
+static inline bool
+gomp_team_barrier_cancelled (gomp_barrier_t *bar)
+{
+ return __builtin_expect ((bar->generation & BAR_CANCELLED) != 0, 0);
+}
+
+static inline void
+gomp_team_barrier_done (gomp_barrier_t *bar, gomp_barrier_state_t state)
+{
+ bar->generation = (state & -BAR_INCR) + BAR_INCR;
+}
+
+#endif /* GOMP_BARRIER_H */
--- /dev/null
+/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This is the AMD GCN implementation of doacross spinning. */
+
+#ifndef GOMP_DOACROSS_H
+#define GOMP_DOACROSS_H 1
+
+#include "libgomp.h"
+
+static inline int
+cpu_relax (void)
+{
+ /* This can be implemented as just a memory barrier, but a sleep seems
+ like it should allow the wavefront to yield (maybe?)
+ Use the shortest possible sleep time of 1*64 cycles. */
+ asm volatile ("s_sleep\t1" ::: "memory");
+ return 0;
+}
+
+static inline void doacross_spin (unsigned long *addr, unsigned long expected,
+ unsigned long cur)
+{
+ /* Prevent compiler from optimizing based on bounds of containing object. */
+ asm ("" : "+r" (addr));
+ do
+ {
+ /* An alternative implementation might use s_setprio to lower the
+ priority temporarily, and then restore it after. */
+ int i = cpu_relax ();
+ cur = addr[i];
+ }
+ while (cur <= expected);
+}
+
+#endif /* GOMP_DOACROSS_H */
--- /dev/null
+/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file defines OpenMP API entry points that accelerator targets are
+ expected to replace. */
+
+#include "libgomp.h"
+
+void
+omp_set_default_device (int device_num __attribute__((unused)))
+{
+}
+
+int
+omp_get_default_device (void)
+{
+ return 0;
+}
+
+int
+omp_get_num_devices (void)
+{
+ return 0;
+}
+
+int
+omp_get_num_teams (void)
+{
+ return gomp_num_teams_var + 1;
+}
+
+int __attribute__ ((__optimize__ ("O2")))
+omp_get_team_num (void)
+{
+ return __builtin_gcn_dim_pos (0);
+}
+
+int
+omp_is_initial_device (void)
+{
+ /* AMD GCN is an accelerator-only target. */
+ return 0;
+}
+
+ialias (omp_set_default_device)
+ialias (omp_get_default_device)
+ialias (omp_get_num_devices)
+ialias (omp_get_num_teams)
+ialias (omp_get_team_num)
+ialias (omp_is_initial_device)
--- /dev/null
+/* Oversized reductions lock variable
+ Copyright (C) 2017-2019 Free Software Foundation, Inc.
+ Contributed by Mentor Graphics.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* We use a global lock variable for reductions on objects larger than
+ 64 bits. Until and unless proven that lock contention for
+ different reductions is a problem, a single lock will suffice. */
+
+unsigned volatile __reduction_lock = 0;
--- /dev/null
+/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This is a simplified barrier that is suitable for thread pool
+ synchronizaton. Only a subset of full barrier API (bar.h) is exposed.
+ Here in the AMD GCN-specific implementation, we expect that thread pool
+ corresponds to the wavefronts within a work group. */
+
+#ifndef GOMP_SIMPLE_BARRIER_H
+#define GOMP_SIMPLE_BARRIER_H 1
+
+/* AMD GCN has no use for this type. */
+typedef int gomp_simple_barrier_t;
+
+/* GCN barriers block all wavefronts, so the count is not interesting. */
+static inline void
+gomp_simple_barrier_init (gomp_simple_barrier_t *bar, unsigned count)
+{
+}
+
+static inline void
+gomp_simple_barrier_destroy (gomp_simple_barrier_t *bar)
+{
+}
+
+static inline void
+gomp_simple_barrier_wait (gomp_simple_barrier_t *bar)
+{
+ asm volatile ("s_barrier" ::: "memory");
+}
+
+static inline void
+gomp_simple_barrier_wait_last (gomp_simple_barrier_t *bar)
+{
+ /* GCN has no way to signal a barrier without waiting. */
+ asm volatile ("s_barrier" ::: "memory");
+}
+
+#endif /* GOMP_SIMPLE_BARRIER_H */
--- /dev/null
+/* Copyright (C) 2017-2019 Free Software Foundation, Inc.
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "libgomp.h"
+#include <limits.h>
+
+void
+GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
+{
+ if (thread_limit)
+ {
+ struct gomp_task_icv *icv = gomp_icv (true);
+ icv->thread_limit_var
+ = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
+ }
+ unsigned int num_workgroups, workgroup_id;
+ num_workgroups = __builtin_gcn_dim_size (0);
+ workgroup_id = __builtin_gcn_dim_pos (0);
+ if (!num_teams || num_teams >= num_workgroups)
+ num_teams = num_workgroups;
+ else if (workgroup_id >= num_teams)
+ {
+ gomp_free_thread (gcn_thrs ());
+ exit (0);
+ }
+ gomp_num_teams_var = num_teams - 1;
+}
+
+int
+omp_pause_resource (omp_pause_resource_t kind, int device_num)
+{
+ (void) kind;
+ (void) device_num;
+ return -1;
+}
+
+int
+omp_pause_resource_all (omp_pause_resource_t kind)
+{
+ (void) kind;
+ return -1;
+}
+
+ialias (omp_pause_resource)
+ialias (omp_pause_resource_all)
--- /dev/null
+/* Copyright (C) 2017-2019 Free Software Foundation, Inc.
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file handles the maintainence of tasks in response to task
+ creation and termination. */
+
+#include "libgomp.h"
+
+/* AMD GCN is an accelerator-only target, so this should never be called. */
+
+bool
+gomp_target_task_fn (void *data)
+{
+ __builtin_unreachable ();
+}
+
+#include "../../task.c"
--- /dev/null
+/* Copyright (C) 2017-2019 Free Software Foundation, Inc.
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file handles maintainance of threads on AMD GCN. */
+
+#include "libgomp.h"
+#include <stdlib.h>
+#include <string.h>
+
+static void gomp_thread_start (struct gomp_thread_pool *);
+
+/* This externally visible function handles target region entry. It
+ sets up a per-team thread pool and transfers control by returning to
+ the kernel in the master thread or gomp_thread_start in other threads.
+
+ The name of this function is part of the interface with the compiler: for
+ each OpenMP kernel the compiler configures the stack, then calls here.
+
+ Likewise, gomp_gcn_exit_kernel is called during the kernel epilogue. */
+
+void
+gomp_gcn_enter_kernel (void)
+{
+ int threadid = __builtin_gcn_dim_pos (1);
+
+ if (threadid == 0)
+ {
+ int numthreads = __builtin_gcn_dim_size (1);
+ int teamid = __builtin_gcn_dim_pos(0);
+
+ /* Set up the global state.
+ Every team will do this, but that should be harmless. */
+ gomp_global_icv.nthreads_var = 16;
+ gomp_global_icv.thread_limit_var = numthreads;
+ /* Starting additional threads is not supported. */
+ gomp_global_icv.dyn_var = true;
+
+ /* Allocate and initialize the team-local-storage data. */
+ struct gomp_thread *thrs = gomp_malloc_cleared (sizeof (*thrs)
+ * numthreads);
+ set_gcn_thrs (thrs);
+
+ /* Allocate and initailize a pool of threads in the team.
+ The threads are already running, of course, we just need to manage
+ the communication between them. */
+ struct gomp_thread_pool *pool = gomp_malloc (sizeof (*pool));
+ pool->threads = gomp_malloc (sizeof (void *) * numthreads);
+ for (int tid = 0; tid < numthreads; tid++)
+ pool->threads[tid] = &thrs[tid];
+ pool->threads_size = numthreads;
+ pool->threads_used = numthreads;
+ pool->threads_busy = 1;
+ pool->last_team = NULL;
+ gomp_simple_barrier_init (&pool->threads_dock, numthreads);
+ thrs->thread_pool = pool;
+
+ asm ("s_barrier" ::: "memory");
+ return; /* Return to kernel. */
+ }
+ else
+ {
+ asm ("s_barrier" ::: "memory");
+ gomp_thread_start (gcn_thrs ()[0].thread_pool);
+ /* gomp_thread_start does not return. */
+ }
+}
+
+void
+gomp_gcn_exit_kernel (void)
+{
+ gomp_free_thread (gcn_thrs ());
+ free (gcn_thrs ());
+}
+
+/* This function contains the idle loop in which a thread waits
+ to be called up to become part of a team. */
+
+static void
+gomp_thread_start (struct gomp_thread_pool *pool)
+{
+ struct gomp_thread *thr = gomp_thread ();
+
+ gomp_sem_init (&thr->release, 0);
+ thr->thread_pool = pool;
+
+ /* The loop exits only when "fn" is assigned "gomp_free_pool_helper",
+ which contains "s_endpgm", or an infinite no-op loop is
+ suspected (this happens when the thread master crashes). */
+ int nul_limit = 99;
+ do
+ {
+ gomp_simple_barrier_wait (&pool->threads_dock);
+ if (!thr->fn)
+ {
+ if (nul_limit-- > 0)
+ continue;
+ else
+ {
+ const char msg[] = ("team master not responding;"
+ " slave thread aborting");
+ write (2, msg, sizeof (msg)-1);
+ abort();
+ }
+ }
+ thr->fn (thr->data);
+ thr->fn = NULL;
+
+ struct gomp_task *task = thr->task;
+ gomp_team_barrier_wait_final (&thr->ts.team->barrier);
+ gomp_finish_task (task);
+ }
+ while (1);
+}
+
+/* Launch a team. */
+
+void
+gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
+ unsigned flags, struct gomp_team *team,
+ struct gomp_taskgroup *taskgroup)
+{
+ struct gomp_thread *thr, *nthr;
+ struct gomp_task *task;
+ struct gomp_task_icv *icv;
+ struct gomp_thread_pool *pool;
+ unsigned long nthreads_var;
+
+ thr = gomp_thread ();
+ pool = thr->thread_pool;
+ task = thr->task;
+ icv = task ? &task->icv : &gomp_global_icv;
+
+ /* Always save the previous state, even if this isn't a nested team.
+ In particular, we should save any work share state from an outer
+ orphaned work share construct. */
+ team->prev_ts = thr->ts;
+
+ thr->ts.team = team;
+ thr->ts.team_id = 0;
+ ++thr->ts.level;
+ if (nthreads > 1)
+ ++thr->ts.active_level;
+ thr->ts.work_share = &team->work_shares[0];
+ thr->ts.last_work_share = NULL;
+ thr->ts.single_count = 0;
+ thr->ts.static_trip = 0;
+ thr->task = &team->implicit_task[0];
+ nthreads_var = icv->nthreads_var;
+ gomp_init_task (thr->task, task, icv);
+ team->implicit_task[0].icv.nthreads_var = nthreads_var;
+ team->implicit_task[0].taskgroup = taskgroup;
+
+ if (nthreads == 1)
+ return;
+
+ /* Release existing idle threads. */
+ for (unsigned i = 1; i < nthreads; ++i)
+ {
+ nthr = pool->threads[i];
+ nthr->ts.team = team;
+ nthr->ts.work_share = &team->work_shares[0];
+ nthr->ts.last_work_share = NULL;
+ nthr->ts.team_id = i;
+ nthr->ts.level = team->prev_ts.level + 1;
+ nthr->ts.active_level = thr->ts.active_level;
+ nthr->ts.single_count = 0;
+ nthr->ts.static_trip = 0;
+ nthr->task = &team->implicit_task[i];
+ gomp_init_task (nthr->task, task, icv);
+ team->implicit_task[i].icv.nthreads_var = nthreads_var;
+ team->implicit_task[i].taskgroup = taskgroup;
+ nthr->fn = fn;
+ nthr->data = data;
+ team->ordered_release[i] = &nthr->release;
+ }
+
+ gomp_simple_barrier_wait (&pool->threads_dock);
+}
+
+#include "../../team.c"
--- /dev/null
+/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file implements timer routines for AMD GCN. */
+
+#include "libgomp.h"
+
+/* According to AMD:
+ dGPU RTC is 27MHz
+ AGPU RTC is 100MHz
+ FIXME: DTRT on an APU. */
+#define RTC_TICKS (1.0 / 27000000.0) /* 27MHz */
+
+double
+omp_get_wtime (void)
+{
+ uint64_t clock;
+ asm ("s_memrealtime %0\n\t"
+ "s_waitcnt 0" : "=r" (clock));
+ return clock * RTC_TICKS;
+}
+
+double
+omp_get_wtick (void)
+{
+ return RTC_TICKS;
+}
+
+ialias (omp_get_wtime)
+ialias (omp_get_wtick)
*-*-rtems*)
# RTEMS supports Pthreads, but the library is not available at GCC build time.
;;
- nvptx*-*-*)
+ nvptx*-*-* | amdgcn*-*-*)
# NVPTX does not support Pthreads, has its own code replacement.
libgomp_use_pthreads=no
# NVPTX is an accelerator-only target
*-*-rtems*)
# RTEMS supports Pthreads, but the library is not available at GCC build time.
;;
- nvptx*-*-*)
+ nvptx*-*-* | amdgcn*-*-*)
# NVPTX does not support Pthreads, has its own code replacement.
libgomp_use_pthreads=no
# NVPTX is an accelerator-only target
fi
;;
+ amdgcn*-*-*)
+ config_path="gcn accel"
+ ;;
+
*)
;;
/* OFFLOAD_TARGET_TYPE_HOST_NONSHM = 3 removed. */
OFFLOAD_TARGET_TYPE_NVIDIA_PTX = 5,
OFFLOAD_TARGET_TYPE_INTEL_MIC = 6,
- OFFLOAD_TARGET_TYPE_HSA = 7
+ OFFLOAD_TARGET_TYPE_HSA = 7,
+ OFFLOAD_TARGET_TYPE_GCN = 8
};
/* Opaque type to represent plugin-dependent implementation of an
asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
return nvptx_thrs + tid;
}
+#elif defined __AMDGCN__
+static inline struct gomp_thread *gcn_thrs (void)
+{
+ /* The value is at the bottom of LDS. */
+ struct gomp_thread * __lds *thrs = (struct gomp_thread * __lds *)4;
+ return *thrs;
+}
+static inline void set_gcn_thrs (struct gomp_thread *val)
+{
+ /* The value is at the bottom of LDS. */
+ struct gomp_thread * __lds *thrs = (struct gomp_thread * __lds *)4;
+ *thrs = val;
+}
+static inline struct gomp_thread *gomp_thread (void)
+{
+ int tid = __builtin_gcn_dim_pos(1);
+ return gcn_thrs () + tid;
+}
#elif defined HAVE_TLS || defined USE_EMUTLS
extern __thread struct gomp_thread gomp_tls_data;
static inline struct gomp_thread *gomp_thread (void)
void *target_tls;
};
-#if defined HAVE_TLS || defined USE_EMUTLS
+#ifdef __AMDGCN__
+static inline struct goacc_thread *
+goacc_thread (void)
+{
+ /* Unused in the offload libgomp for OpenACC: return a dummy value. */
+ return 0;
+}
+#elif defined HAVE_TLS || defined USE_EMUTLS
extern __thread struct goacc_thread *goacc_tls_data;
static inline struct goacc_thread *
goacc_thread (void)
--- /dev/null
+/* Nothing needed here. */
! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed.
integer (acc_device_kind), parameter :: acc_device_not_host = 4
integer (acc_device_kind), parameter :: acc_device_nvidia = 5
+ integer (acc_device_kind), parameter :: acc_device_gcn = 8
public :: acc_handle_kind
/* acc_device_host_nonshm = 3 removed. */
acc_device_not_host = 4,
acc_device_nvidia = 5,
+ acc_device_gcn = 8,
_ACC_device_hwm,
/* Ensure enumeration is layout compatible with int. */
_ACC_highest = __INT_MAX__,
pthread_exit (NULL);
#elif defined(__nvptx__)
asm ("exit;");
+#elif defined(__AMDGCN__)
+ asm ("s_dcache_wb\n\t"
+ "s_endpgm");
#else
#error gomp_free_pool_helper must terminate the thread
#endif