From fa4999953db61cf94c0e57a9ab8b006d950e54ca Mon Sep 17 00:00:00 2001 From: Andrew Stubbs Date: Wed, 13 Nov 2019 12:38:04 +0000 Subject: [PATCH] GCN libgomp port 2019-11-13 Andrew Stubbs Kwok Cheung Yeung Julian Brown Tom de Vries include/ * gomp-constants.h (GOMP_DEVICE_GCN): Define. (GOMP_VERSION_GCN): Define. libgomp/ * Makefile.am (libgomp_la_SOURCES): Add oacc-target.c. * Makefile.in: Regenerate. * config.h.in (PLUGIN_GCN): Add new undef. * config/accel/openacc.f90 (acc_device_gcn): New parameter. * config/gcn/affinity-fmt.c: New file. * config/gcn/bar.c: New file. * config/gcn/bar.h: New file. * config/gcn/doacross.h: New file. * config/gcn/icv-device.c: New file. * config/gcn/oacc-target.c: New file. * config/gcn/simple-bar.h: New file. * config/gcn/target.c: New file. * config/gcn/task.c: New file. * config/gcn/team.c: New file. * config/gcn/time.c: New file. * configure.ac: Add amdgcn*-*-*. * configure: Regenerate. * configure.tgt: Add amdgcn*-*-*. * libgomp-plugin.h (offload_target_type): Add OFFLOAD_TARGET_TYPE_GCN. * libgomp.h (gcn_thrs): Add amdgcn variant. (set_gcn_thrs): Likewise. (gomp_thread): Likewise. * oacc-int.h (goacc_thread): Likewise. * oacc-target.c: New file. * openacc.f90 (acc_device_gcn): New parameter. * openacc.h (acc_device_t): Add acc_device_gcn. * team.c (gomp_free_pool_helper): Add amdgcn support. Co-Authored-By: Julian Brown Co-Authored-By: Kwok Cheung Yeung Co-Authored-By: Tom de Vries From-SVN: r278135 --- include/ChangeLog | 8 ++ include/gomp-constants.h | 2 + libgomp/ChangeLog | 33 +++++ libgomp/Makefile.am | 2 +- libgomp/Makefile.in | 6 +- libgomp/config.h.in | 3 + libgomp/config/accel/openacc.f90 | 1 + libgomp/config/gcn/affinity-fmt.c | 51 +++++++ libgomp/config/gcn/bar.c | 232 ++++++++++++++++++++++++++++++ libgomp/config/gcn/bar.h | 168 ++++++++++++++++++++++ libgomp/config/gcn/doacross.h | 58 ++++++++ libgomp/config/gcn/icv-device.c | 72 ++++++++++ libgomp/config/gcn/oacc-target.c | 31 ++++ libgomp/config/gcn/simple-bar.h | 61 ++++++++ libgomp/config/gcn/target.c | 67 +++++++++ libgomp/config/gcn/task.c | 39 +++++ libgomp/config/gcn/team.c | 202 ++++++++++++++++++++++++++ libgomp/config/gcn/time.c | 52 +++++++ libgomp/configure | 2 +- libgomp/configure.ac | 2 +- libgomp/configure.tgt | 4 + libgomp/libgomp-plugin.h | 3 +- libgomp/libgomp.h | 18 +++ libgomp/oacc-int.h | 9 +- libgomp/oacc-target.c | 1 + libgomp/openacc.f90 | 1 + libgomp/openacc.h | 1 + libgomp/team.c | 3 + 28 files changed, 1125 insertions(+), 7 deletions(-) create mode 100644 libgomp/config/gcn/affinity-fmt.c create mode 100644 libgomp/config/gcn/bar.c create mode 100644 libgomp/config/gcn/bar.h create mode 100644 libgomp/config/gcn/doacross.h create mode 100644 libgomp/config/gcn/icv-device.c create mode 100644 libgomp/config/gcn/oacc-target.c create mode 100644 libgomp/config/gcn/simple-bar.h create mode 100644 libgomp/config/gcn/target.c create mode 100644 libgomp/config/gcn/task.c create mode 100644 libgomp/config/gcn/team.c create mode 100644 libgomp/config/gcn/time.c create mode 100644 libgomp/oacc-target.c diff --git a/include/ChangeLog b/include/ChangeLog index 83bd789dd2e..364fa72d457 100644 --- a/include/ChangeLog +++ b/include/ChangeLog @@ -1,3 +1,11 @@ +2019-11-13 Andrew Stubbs + Kwok Cheung Yeung + Julian Brown + Tom de Vries + + * gomp-constants.h (GOMP_DEVICE_GCN): Define. + (GOMP_VERSION_GCN): Define. + 2019-08-08 Martin Liska PR bootstrap/91352 diff --git a/include/gomp-constants.h b/include/gomp-constants.h index 82e9094c934..9e356cdfeec 100644 --- a/include/gomp-constants.h +++ b/include/gomp-constants.h @@ -174,6 +174,7 @@ enum gomp_map_kind #define GOMP_DEVICE_NVIDIA_PTX 5 #define GOMP_DEVICE_INTEL_MIC 6 #define GOMP_DEVICE_HSA 7 +#define GOMP_DEVICE_GCN 8 #define GOMP_DEVICE_ICV -1 #define GOMP_DEVICE_HOST_FALLBACK -2 @@ -215,6 +216,7 @@ enum gomp_map_kind #define GOMP_VERSION_NVIDIA_PTX 1 #define GOMP_VERSION_INTEL_MIC 0 #define GOMP_VERSION_HSA 0 +#define GOMP_VERSION_GCN 1 #define GOMP_VERSION_PACK(LIB, DEV) (((LIB) << 16) | (DEV)) #define GOMP_VERSION_LIB(PACK) (((PACK) >> 16) & 0xffff) diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index c8977988060..a7ce28f6dab 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,36 @@ +2019-11-13 Andrew Stubbs + Kwok Cheung Yeung + Julian Brown + Tom de Vries + + * Makefile.am (libgomp_la_SOURCES): Add oacc-target.c. + * Makefile.in: Regenerate. + * config.h.in (PLUGIN_GCN): Add new undef. + * config/accel/openacc.f90 (acc_device_gcn): New parameter. + * config/gcn/affinity-fmt.c: New file. + * config/gcn/bar.c: New file. + * config/gcn/bar.h: New file. + * config/gcn/doacross.h: New file. + * config/gcn/icv-device.c: New file. + * config/gcn/oacc-target.c: New file. + * config/gcn/simple-bar.h: New file. + * config/gcn/target.c: New file. + * config/gcn/task.c: New file. + * config/gcn/team.c: New file. + * config/gcn/time.c: New file. + * configure.ac: Add amdgcn*-*-*. + * configure: Regenerate. + * configure.tgt: Add amdgcn*-*-*. + * libgomp-plugin.h (offload_target_type): Add OFFLOAD_TARGET_TYPE_GCN. + * libgomp.h (gcn_thrs): Add amdgcn variant. + (set_gcn_thrs): Likewise. + (gomp_thread): Likewise. + * oacc-int.h (goacc_thread): Likewise. + * oacc-target.c: New file. + * openacc.f90 (acc_device_gcn): New parameter. + * openacc.h (acc_device_t): Add acc_device_gcn. + * team.c (gomp_free_pool_helper): Add amdgcn support. + 2019-11-13 Andrew Stubbs Julian Brown diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am index 7d36343a4be..669b9e4defd 100644 --- a/libgomp/Makefile.am +++ b/libgomp/Makefile.am @@ -65,7 +65,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \ proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c target.c \ splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c oacc-init.c \ oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \ - affinity-fmt.c teams.c oacc-profiling.c + affinity-fmt.c teams.c oacc-profiling.c oacc-target.c include $(top_srcdir)/plugin/Makefrag.am diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in index 3bd84dc9e13..d4185b35b50 100644 --- a/libgomp/Makefile.in +++ b/libgomp/Makefile.in @@ -217,7 +217,7 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \ target.lo splay-tree.lo libgomp-plugin.lo oacc-parallel.lo \ oacc-host.lo oacc-init.lo oacc-mem.lo oacc-async.lo \ oacc-plugin.lo oacc-cuda.lo priority_queue.lo affinity-fmt.lo \ - teams.lo oacc-profiling.lo $(am__objects_1) + teams.lo oacc-profiling.lo oacc-target.lo $(am__objects_1) libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) @@ -552,7 +552,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \ affinity.c target.c splay-tree.c libgomp-plugin.c \ oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \ oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \ - affinity-fmt.c teams.c oacc-profiling.c $(am__append_3) + affinity-fmt.c teams.c oacc-profiling.c oacc-target.c \ + $(am__append_3) # Nvidia PTX OpenACC plugin. @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION) @@ -755,6 +756,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-parallel.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-profiling.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-target.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/priority_queue.Plo@am__quote@ diff --git a/libgomp/config.h.in b/libgomp/config.h.in index ceb062fcb4c..2d50fcd5c1a 100644 --- a/libgomp/config.h.in +++ b/libgomp/config.h.in @@ -170,6 +170,9 @@ /* Define to the version of this package. */ #undef PACKAGE_VERSION +/* Define to 1 if the GCN plugin is built, 0 if not. */ +#undef PLUGIN_GCN + /* Define to 1 if the HSA plugin is built, 0 if not. */ #undef PLUGIN_HSA diff --git a/libgomp/config/accel/openacc.f90 b/libgomp/config/accel/openacc.f90 index a7f690e1572..6a8c5e9cb3d 100644 --- a/libgomp/config/accel/openacc.f90 +++ b/libgomp/config/accel/openacc.f90 @@ -51,6 +51,7 @@ module openacc_kinds ! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed. integer (acc_device_kind), parameter :: acc_device_not_host = 4 integer (acc_device_kind), parameter :: acc_device_nvidia = 5 + integer (acc_device_kind), parameter :: acc_device_gcn = 8 end module diff --git a/libgomp/config/gcn/affinity-fmt.c b/libgomp/config/gcn/affinity-fmt.c new file mode 100644 index 00000000000..3585f414460 --- /dev/null +++ b/libgomp/config/gcn/affinity-fmt.c @@ -0,0 +1,51 @@ +/* Copyright (C) 2018-2019 Free Software Foundation, Inc. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include "libgomp.h" +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_INTTYPES_H +# include /* For PRIx64. */ +#endif +#ifdef HAVE_UNAME +#include +#endif + +/* The HAVE_GETPID and HAVE_GETHOSTNAME configure tests are passing for nvptx, + while the nvptx newlib implementation does not support those functions. + Override the configure test results here. */ +#undef HAVE_GETPID +#undef HAVE_GETHOSTNAME + +/* The GCN newlib implementation does not support fwrite, but it does support + write. Map fwrite to write. */ +#undef fwrite +#define fwrite(ptr, size, nmemb, stream) write (1, (ptr), (nmemb) * (size)) + +#include "../../affinity-fmt.c" + diff --git a/libgomp/config/gcn/bar.c b/libgomp/config/gcn/bar.c new file mode 100644 index 00000000000..fb709be26ce --- /dev/null +++ b/libgomp/config/gcn/bar.c @@ -0,0 +1,232 @@ +/* Copyright (C) 2015-2019 Free Software Foundation, Inc. + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* This is an AMD GCN specific implementation of a barrier synchronization + mechanism for libgomp. This type is private to the library. This + implementation uses atomic instructions and s_barrier instruction. It + uses MEMMODEL_RELAXED here because barriers are within workgroups and + therefore don't need to flush caches. */ + +#include +#include "libgomp.h" + + +void +gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state) +{ + if (__builtin_expect (state & BAR_WAS_LAST, 0)) + { + /* Next time we'll be awaiting TOTAL threads again. */ + bar->awaited = bar->total; + __atomic_store_n (&bar->generation, bar->generation + BAR_INCR, + MEMMODEL_RELAXED); + } + asm ("s_barrier" ::: "memory"); +} + +void +gomp_barrier_wait (gomp_barrier_t *bar) +{ + gomp_barrier_wait_end (bar, gomp_barrier_wait_start (bar)); +} + +/* Like gomp_barrier_wait, except that if the encountering thread + is not the last one to hit the barrier, it returns immediately. + The intended usage is that a thread which intends to gomp_barrier_destroy + this barrier calls gomp_barrier_wait, while all other threads + call gomp_barrier_wait_last. When gomp_barrier_wait returns, + the barrier can be safely destroyed. */ + +void +gomp_barrier_wait_last (gomp_barrier_t *bar) +{ + /* Deferring to gomp_barrier_wait does not use the optimization opportunity + allowed by the interface contract for all-but-last participants. The + original implementation in config/linux/bar.c handles this better. */ + gomp_barrier_wait (bar); +} + +void +gomp_team_barrier_wake (gomp_barrier_t *bar, int count) +{ + asm ("s_barrier" ::: "memory"); +} + +void +gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state) +{ + unsigned int generation, gen; + + if (__builtin_expect (state & BAR_WAS_LAST, 0)) + { + /* Next time we'll be awaiting TOTAL threads again. */ + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + + bar->awaited = bar->total; + team->work_share_cancelled = 0; + if (__builtin_expect (team->task_count, 0)) + { + gomp_barrier_handle_tasks (state); + state &= ~BAR_WAS_LAST; + } + else + { + state &= ~BAR_CANCELLED; + state += BAR_INCR - BAR_WAS_LAST; + __atomic_store_n (&bar->generation, state, MEMMODEL_RELAXED); + asm ("s_barrier" ::: "memory"); + return; + } + } + + generation = state; + state &= ~BAR_CANCELLED; + int retry = 100; + do + { + if (retry-- == 0) + { + /* It really shouldn't happen that barriers get out of sync, but + if they do then this will loop until they realign, so we need + to avoid an infinite loop where the thread just isn't there. */ + const char msg[] = ("Barrier sync failed (another thread died?);" + " aborting."); + write (2, msg, sizeof (msg)-1); + abort(); + } + + asm ("s_barrier" ::: "memory"); + gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE); + if (__builtin_expect (gen & BAR_TASK_PENDING, 0)) + { + gomp_barrier_handle_tasks (state); + gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE); + } + generation |= gen & BAR_WAITING_FOR_TASK; + } + while (gen != state + BAR_INCR); +} + +void +gomp_team_barrier_wait (gomp_barrier_t *bar) +{ + gomp_team_barrier_wait_end (bar, gomp_barrier_wait_start (bar)); +} + +void +gomp_team_barrier_wait_final (gomp_barrier_t *bar) +{ + gomp_barrier_state_t state = gomp_barrier_wait_final_start (bar); + if (__builtin_expect (state & BAR_WAS_LAST, 0)) + bar->awaited_final = bar->total; + gomp_team_barrier_wait_end (bar, state); +} + +bool +gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar, + gomp_barrier_state_t state) +{ + unsigned int generation, gen; + + if (__builtin_expect (state & BAR_WAS_LAST, 0)) + { + /* Next time we'll be awaiting TOTAL threads again. */ + /* BAR_CANCELLED should never be set in state here, because + cancellation means that at least one of the threads has been + cancelled, thus on a cancellable barrier we should never see + all threads to arrive. */ + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + + bar->awaited = bar->total; + team->work_share_cancelled = 0; + if (__builtin_expect (team->task_count, 0)) + { + gomp_barrier_handle_tasks (state); + state &= ~BAR_WAS_LAST; + } + else + { + state += BAR_INCR - BAR_WAS_LAST; + __atomic_store_n (&bar->generation, state, MEMMODEL_RELAXED); + asm ("s_barrier" ::: "memory"); + return false; + } + } + + if (__builtin_expect (state & BAR_CANCELLED, 0)) + return true; + + generation = state; + int retry = 100; + do + { + if (retry-- == 0) + { + /* It really shouldn't happen that barriers get out of sync, but + if they do then this will loop until they realign, so we need + to avoid an infinite loop where the thread just isn't there. */ + const char msg[] = ("Barrier sync failed (another thread died?);" + " aborting."); + write (2, msg, sizeof (msg)-1); + abort(); + } + + asm ("s_barrier" ::: "memory"); + gen = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED); + if (__builtin_expect (gen & BAR_CANCELLED, 0)) + return true; + if (__builtin_expect (gen & BAR_TASK_PENDING, 0)) + { + gomp_barrier_handle_tasks (state); + gen = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED); + } + generation |= gen & BAR_WAITING_FOR_TASK; + } + while (gen != state + BAR_INCR); + + return false; +} + +bool +gomp_team_barrier_wait_cancel (gomp_barrier_t *bar) +{ + return gomp_team_barrier_wait_cancel_end (bar, gomp_barrier_wait_start (bar)); +} + +void +gomp_team_barrier_cancel (struct gomp_team *team) +{ + gomp_mutex_lock (&team->task_lock); + if (team->barrier.generation & BAR_CANCELLED) + { + gomp_mutex_unlock (&team->task_lock); + return; + } + team->barrier.generation |= BAR_CANCELLED; + gomp_mutex_unlock (&team->task_lock); + gomp_team_barrier_wake (&team->barrier, INT_MAX); +} diff --git a/libgomp/config/gcn/bar.h b/libgomp/config/gcn/bar.h new file mode 100644 index 00000000000..ec8851ba078 --- /dev/null +++ b/libgomp/config/gcn/bar.h @@ -0,0 +1,168 @@ +/* Copyright (C) 2015-2019 Free Software Foundation, Inc. + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* This is an AMD GCN specific implementation of a barrier synchronization + mechanism for libgomp. This type is private to the library. This + implementation uses atomic instructions and s_barrier instruction. It + uses MEMMODEL_RELAXED here because barriers are within workgroups and + therefore don't need to flush caches. */ + +#ifndef GOMP_BARRIER_H +#define GOMP_BARRIER_H 1 + +#include "mutex.h" + +typedef struct +{ + unsigned total; + unsigned generation; + unsigned awaited; + unsigned awaited_final; +} gomp_barrier_t; + +typedef unsigned int gomp_barrier_state_t; + +/* The generation field contains a counter in the high bits, with a few + low bits dedicated to flags. Note that TASK_PENDING and WAS_LAST can + share space because WAS_LAST is never stored back to generation. */ +#define BAR_TASK_PENDING 1 +#define BAR_WAS_LAST 1 +#define BAR_WAITING_FOR_TASK 2 +#define BAR_CANCELLED 4 +#define BAR_INCR 8 + +static inline void gomp_barrier_init (gomp_barrier_t *bar, unsigned count) +{ + bar->total = count; + bar->awaited = count; + bar->awaited_final = count; + bar->generation = 0; +} + +static inline void gomp_barrier_reinit (gomp_barrier_t *bar, unsigned count) +{ + __atomic_add_fetch (&bar->awaited, count - bar->total, MEMMODEL_RELAXED); + bar->total = count; +} + +static inline void gomp_barrier_destroy (gomp_barrier_t *bar) +{ +} + +extern void gomp_barrier_wait (gomp_barrier_t *); +extern void gomp_barrier_wait_last (gomp_barrier_t *); +extern void gomp_barrier_wait_end (gomp_barrier_t *, gomp_barrier_state_t); +extern void gomp_team_barrier_wait (gomp_barrier_t *); +extern void gomp_team_barrier_wait_final (gomp_barrier_t *); +extern void gomp_team_barrier_wait_end (gomp_barrier_t *, + gomp_barrier_state_t); +extern bool gomp_team_barrier_wait_cancel (gomp_barrier_t *); +extern bool gomp_team_barrier_wait_cancel_end (gomp_barrier_t *, + gomp_barrier_state_t); +extern void gomp_team_barrier_wake (gomp_barrier_t *, int); +struct gomp_team; +extern void gomp_team_barrier_cancel (struct gomp_team *); + +static inline gomp_barrier_state_t +gomp_barrier_wait_start (gomp_barrier_t *bar) +{ + unsigned int ret = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED); + ret &= -BAR_INCR | BAR_CANCELLED; + /* A memory barrier is needed before exiting from the various forms + of gomp_barrier_wait, to satisfy OpenMP API version 3.1 section + 2.8.6 flush Construct, which says there is an implicit flush during + a barrier region. This is a convenient place to add the barrier, + so we use MEMMODEL_ACQ_REL here rather than MEMMODEL_ACQUIRE. */ + if (__atomic_add_fetch (&bar->awaited, -1, MEMMODEL_RELAXED) == 0) + ret |= BAR_WAS_LAST; + return ret; +} + +static inline gomp_barrier_state_t +gomp_barrier_wait_cancel_start (gomp_barrier_t *bar) +{ + return gomp_barrier_wait_start (bar); +} + +/* This is like gomp_barrier_wait_start, except it decrements + bar->awaited_final rather than bar->awaited and should be used + for the gomp_team_end barrier only. */ +static inline gomp_barrier_state_t +gomp_barrier_wait_final_start (gomp_barrier_t *bar) +{ + unsigned int ret = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED); + ret &= -BAR_INCR | BAR_CANCELLED; + /* See above gomp_barrier_wait_start comment. */ + if (__atomic_add_fetch (&bar->awaited_final, -1, MEMMODEL_RELAXED) == 0) + ret |= BAR_WAS_LAST; + return ret; +} + +static inline bool +gomp_barrier_last_thread (gomp_barrier_state_t state) +{ + return state & BAR_WAS_LAST; +} + +/* All the inlines below must be called with team->task_lock + held. */ + +static inline void +gomp_team_barrier_set_task_pending (gomp_barrier_t *bar) +{ + bar->generation |= BAR_TASK_PENDING; +} + +static inline void +gomp_team_barrier_clear_task_pending (gomp_barrier_t *bar) +{ + bar->generation &= ~BAR_TASK_PENDING; +} + +static inline void +gomp_team_barrier_set_waiting_for_tasks (gomp_barrier_t *bar) +{ + bar->generation |= BAR_WAITING_FOR_TASK; +} + +static inline bool +gomp_team_barrier_waiting_for_tasks (gomp_barrier_t *bar) +{ + return (bar->generation & BAR_WAITING_FOR_TASK) != 0; +} + +static inline bool +gomp_team_barrier_cancelled (gomp_barrier_t *bar) +{ + return __builtin_expect ((bar->generation & BAR_CANCELLED) != 0, 0); +} + +static inline void +gomp_team_barrier_done (gomp_barrier_t *bar, gomp_barrier_state_t state) +{ + bar->generation = (state & -BAR_INCR) + BAR_INCR; +} + +#endif /* GOMP_BARRIER_H */ diff --git a/libgomp/config/gcn/doacross.h b/libgomp/config/gcn/doacross.h new file mode 100644 index 00000000000..2bff18ae1a8 --- /dev/null +++ b/libgomp/config/gcn/doacross.h @@ -0,0 +1,58 @@ +/* Copyright (C) 2015-2019 Free Software Foundation, Inc. + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* This is the AMD GCN implementation of doacross spinning. */ + +#ifndef GOMP_DOACROSS_H +#define GOMP_DOACROSS_H 1 + +#include "libgomp.h" + +static inline int +cpu_relax (void) +{ + /* This can be implemented as just a memory barrier, but a sleep seems + like it should allow the wavefront to yield (maybe?) + Use the shortest possible sleep time of 1*64 cycles. */ + asm volatile ("s_sleep\t1" ::: "memory"); + return 0; +} + +static inline void doacross_spin (unsigned long *addr, unsigned long expected, + unsigned long cur) +{ + /* Prevent compiler from optimizing based on bounds of containing object. */ + asm ("" : "+r" (addr)); + do + { + /* An alternative implementation might use s_setprio to lower the + priority temporarily, and then restore it after. */ + int i = cpu_relax (); + cur = addr[i]; + } + while (cur <= expected); +} + +#endif /* GOMP_DOACROSS_H */ diff --git a/libgomp/config/gcn/icv-device.c b/libgomp/config/gcn/icv-device.c new file mode 100644 index 00000000000..cbb9dfa1133 --- /dev/null +++ b/libgomp/config/gcn/icv-device.c @@ -0,0 +1,72 @@ +/* Copyright (C) 2015-2019 Free Software Foundation, Inc. + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* This file defines OpenMP API entry points that accelerator targets are + expected to replace. */ + +#include "libgomp.h" + +void +omp_set_default_device (int device_num __attribute__((unused))) +{ +} + +int +omp_get_default_device (void) +{ + return 0; +} + +int +omp_get_num_devices (void) +{ + return 0; +} + +int +omp_get_num_teams (void) +{ + return gomp_num_teams_var + 1; +} + +int __attribute__ ((__optimize__ ("O2"))) +omp_get_team_num (void) +{ + return __builtin_gcn_dim_pos (0); +} + +int +omp_is_initial_device (void) +{ + /* AMD GCN is an accelerator-only target. */ + return 0; +} + +ialias (omp_set_default_device) +ialias (omp_get_default_device) +ialias (omp_get_num_devices) +ialias (omp_get_num_teams) +ialias (omp_get_team_num) +ialias (omp_is_initial_device) diff --git a/libgomp/config/gcn/oacc-target.c b/libgomp/config/gcn/oacc-target.c new file mode 100644 index 00000000000..bdcc9153d96 --- /dev/null +++ b/libgomp/config/gcn/oacc-target.c @@ -0,0 +1,31 @@ +/* Oversized reductions lock variable + Copyright (C) 2017-2019 Free Software Foundation, Inc. + Contributed by Mentor Graphics. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* We use a global lock variable for reductions on objects larger than + 64 bits. Until and unless proven that lock contention for + different reductions is a problem, a single lock will suffice. */ + +unsigned volatile __reduction_lock = 0; diff --git a/libgomp/config/gcn/simple-bar.h b/libgomp/config/gcn/simple-bar.h new file mode 100644 index 00000000000..802e0f5c301 --- /dev/null +++ b/libgomp/config/gcn/simple-bar.h @@ -0,0 +1,61 @@ +/* Copyright (C) 2015-2019 Free Software Foundation, Inc. + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* This is a simplified barrier that is suitable for thread pool + synchronizaton. Only a subset of full barrier API (bar.h) is exposed. + Here in the AMD GCN-specific implementation, we expect that thread pool + corresponds to the wavefronts within a work group. */ + +#ifndef GOMP_SIMPLE_BARRIER_H +#define GOMP_SIMPLE_BARRIER_H 1 + +/* AMD GCN has no use for this type. */ +typedef int gomp_simple_barrier_t; + +/* GCN barriers block all wavefronts, so the count is not interesting. */ +static inline void +gomp_simple_barrier_init (gomp_simple_barrier_t *bar, unsigned count) +{ +} + +static inline void +gomp_simple_barrier_destroy (gomp_simple_barrier_t *bar) +{ +} + +static inline void +gomp_simple_barrier_wait (gomp_simple_barrier_t *bar) +{ + asm volatile ("s_barrier" ::: "memory"); +} + +static inline void +gomp_simple_barrier_wait_last (gomp_simple_barrier_t *bar) +{ + /* GCN has no way to signal a barrier without waiting. */ + asm volatile ("s_barrier" ::: "memory"); +} + +#endif /* GOMP_SIMPLE_BARRIER_H */ diff --git a/libgomp/config/gcn/target.c b/libgomp/config/gcn/target.c new file mode 100644 index 00000000000..db00551e695 --- /dev/null +++ b/libgomp/config/gcn/target.c @@ -0,0 +1,67 @@ +/* Copyright (C) 2017-2019 Free Software Foundation, Inc. + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include "libgomp.h" +#include + +void +GOMP_teams (unsigned int num_teams, unsigned int thread_limit) +{ + if (thread_limit) + { + struct gomp_task_icv *icv = gomp_icv (true); + icv->thread_limit_var + = thread_limit > INT_MAX ? UINT_MAX : thread_limit; + } + unsigned int num_workgroups, workgroup_id; + num_workgroups = __builtin_gcn_dim_size (0); + workgroup_id = __builtin_gcn_dim_pos (0); + if (!num_teams || num_teams >= num_workgroups) + num_teams = num_workgroups; + else if (workgroup_id >= num_teams) + { + gomp_free_thread (gcn_thrs ()); + exit (0); + } + gomp_num_teams_var = num_teams - 1; +} + +int +omp_pause_resource (omp_pause_resource_t kind, int device_num) +{ + (void) kind; + (void) device_num; + return -1; +} + +int +omp_pause_resource_all (omp_pause_resource_t kind) +{ + (void) kind; + return -1; +} + +ialias (omp_pause_resource) +ialias (omp_pause_resource_all) diff --git a/libgomp/config/gcn/task.c b/libgomp/config/gcn/task.c new file mode 100644 index 00000000000..a13565034b6 --- /dev/null +++ b/libgomp/config/gcn/task.c @@ -0,0 +1,39 @@ +/* Copyright (C) 2017-2019 Free Software Foundation, Inc. + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* This file handles the maintainence of tasks in response to task + creation and termination. */ + +#include "libgomp.h" + +/* AMD GCN is an accelerator-only target, so this should never be called. */ + +bool +gomp_target_task_fn (void *data) +{ + __builtin_unreachable (); +} + +#include "../../task.c" diff --git a/libgomp/config/gcn/team.c b/libgomp/config/gcn/team.c new file mode 100644 index 00000000000..c566482bda2 --- /dev/null +++ b/libgomp/config/gcn/team.c @@ -0,0 +1,202 @@ +/* Copyright (C) 2017-2019 Free Software Foundation, Inc. + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* This file handles maintainance of threads on AMD GCN. */ + +#include "libgomp.h" +#include +#include + +static void gomp_thread_start (struct gomp_thread_pool *); + +/* This externally visible function handles target region entry. It + sets up a per-team thread pool and transfers control by returning to + the kernel in the master thread or gomp_thread_start in other threads. + + The name of this function is part of the interface with the compiler: for + each OpenMP kernel the compiler configures the stack, then calls here. + + Likewise, gomp_gcn_exit_kernel is called during the kernel epilogue. */ + +void +gomp_gcn_enter_kernel (void) +{ + int threadid = __builtin_gcn_dim_pos (1); + + if (threadid == 0) + { + int numthreads = __builtin_gcn_dim_size (1); + int teamid = __builtin_gcn_dim_pos(0); + + /* Set up the global state. + Every team will do this, but that should be harmless. */ + gomp_global_icv.nthreads_var = 16; + gomp_global_icv.thread_limit_var = numthreads; + /* Starting additional threads is not supported. */ + gomp_global_icv.dyn_var = true; + + /* Allocate and initialize the team-local-storage data. */ + struct gomp_thread *thrs = gomp_malloc_cleared (sizeof (*thrs) + * numthreads); + set_gcn_thrs (thrs); + + /* Allocate and initailize a pool of threads in the team. + The threads are already running, of course, we just need to manage + the communication between them. */ + struct gomp_thread_pool *pool = gomp_malloc (sizeof (*pool)); + pool->threads = gomp_malloc (sizeof (void *) * numthreads); + for (int tid = 0; tid < numthreads; tid++) + pool->threads[tid] = &thrs[tid]; + pool->threads_size = numthreads; + pool->threads_used = numthreads; + pool->threads_busy = 1; + pool->last_team = NULL; + gomp_simple_barrier_init (&pool->threads_dock, numthreads); + thrs->thread_pool = pool; + + asm ("s_barrier" ::: "memory"); + return; /* Return to kernel. */ + } + else + { + asm ("s_barrier" ::: "memory"); + gomp_thread_start (gcn_thrs ()[0].thread_pool); + /* gomp_thread_start does not return. */ + } +} + +void +gomp_gcn_exit_kernel (void) +{ + gomp_free_thread (gcn_thrs ()); + free (gcn_thrs ()); +} + +/* This function contains the idle loop in which a thread waits + to be called up to become part of a team. */ + +static void +gomp_thread_start (struct gomp_thread_pool *pool) +{ + struct gomp_thread *thr = gomp_thread (); + + gomp_sem_init (&thr->release, 0); + thr->thread_pool = pool; + + /* The loop exits only when "fn" is assigned "gomp_free_pool_helper", + which contains "s_endpgm", or an infinite no-op loop is + suspected (this happens when the thread master crashes). */ + int nul_limit = 99; + do + { + gomp_simple_barrier_wait (&pool->threads_dock); + if (!thr->fn) + { + if (nul_limit-- > 0) + continue; + else + { + const char msg[] = ("team master not responding;" + " slave thread aborting"); + write (2, msg, sizeof (msg)-1); + abort(); + } + } + thr->fn (thr->data); + thr->fn = NULL; + + struct gomp_task *task = thr->task; + gomp_team_barrier_wait_final (&thr->ts.team->barrier); + gomp_finish_task (task); + } + while (1); +} + +/* Launch a team. */ + +void +gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, + unsigned flags, struct gomp_team *team, + struct gomp_taskgroup *taskgroup) +{ + struct gomp_thread *thr, *nthr; + struct gomp_task *task; + struct gomp_task_icv *icv; + struct gomp_thread_pool *pool; + unsigned long nthreads_var; + + thr = gomp_thread (); + pool = thr->thread_pool; + task = thr->task; + icv = task ? &task->icv : &gomp_global_icv; + + /* Always save the previous state, even if this isn't a nested team. + In particular, we should save any work share state from an outer + orphaned work share construct. */ + team->prev_ts = thr->ts; + + thr->ts.team = team; + thr->ts.team_id = 0; + ++thr->ts.level; + if (nthreads > 1) + ++thr->ts.active_level; + thr->ts.work_share = &team->work_shares[0]; + thr->ts.last_work_share = NULL; + thr->ts.single_count = 0; + thr->ts.static_trip = 0; + thr->task = &team->implicit_task[0]; + nthreads_var = icv->nthreads_var; + gomp_init_task (thr->task, task, icv); + team->implicit_task[0].icv.nthreads_var = nthreads_var; + team->implicit_task[0].taskgroup = taskgroup; + + if (nthreads == 1) + return; + + /* Release existing idle threads. */ + for (unsigned i = 1; i < nthreads; ++i) + { + nthr = pool->threads[i]; + nthr->ts.team = team; + nthr->ts.work_share = &team->work_shares[0]; + nthr->ts.last_work_share = NULL; + nthr->ts.team_id = i; + nthr->ts.level = team->prev_ts.level + 1; + nthr->ts.active_level = thr->ts.active_level; + nthr->ts.single_count = 0; + nthr->ts.static_trip = 0; + nthr->task = &team->implicit_task[i]; + gomp_init_task (nthr->task, task, icv); + team->implicit_task[i].icv.nthreads_var = nthreads_var; + team->implicit_task[i].taskgroup = taskgroup; + nthr->fn = fn; + nthr->data = data; + team->ordered_release[i] = &nthr->release; + } + + gomp_simple_barrier_wait (&pool->threads_dock); +} + +#include "../../team.c" diff --git a/libgomp/config/gcn/time.c b/libgomp/config/gcn/time.c new file mode 100644 index 00000000000..f189e55889c --- /dev/null +++ b/libgomp/config/gcn/time.c @@ -0,0 +1,52 @@ +/* Copyright (C) 2015-2019 Free Software Foundation, Inc. + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* This file implements timer routines for AMD GCN. */ + +#include "libgomp.h" + +/* According to AMD: + dGPU RTC is 27MHz + AGPU RTC is 100MHz + FIXME: DTRT on an APU. */ +#define RTC_TICKS (1.0 / 27000000.0) /* 27MHz */ + +double +omp_get_wtime (void) +{ + uint64_t clock; + asm ("s_memrealtime %0\n\t" + "s_waitcnt 0" : "=r" (clock)); + return clock * RTC_TICKS; +} + +double +omp_get_wtick (void) +{ + return RTC_TICKS; +} + +ialias (omp_get_wtime) +ialias (omp_get_wtick) diff --git a/libgomp/configure b/libgomp/configure index 1221c44c64b..6a9ccc1f173 100755 --- a/libgomp/configure +++ b/libgomp/configure @@ -14921,7 +14921,7 @@ case "$host" in *-*-rtems*) # RTEMS supports Pthreads, but the library is not available at GCC build time. ;; - nvptx*-*-*) + nvptx*-*-* | amdgcn*-*-*) # NVPTX does not support Pthreads, has its own code replacement. libgomp_use_pthreads=no # NVPTX is an accelerator-only target diff --git a/libgomp/configure.ac b/libgomp/configure.ac index 0f9e821c007..725f3bfd285 100644 --- a/libgomp/configure.ac +++ b/libgomp/configure.ac @@ -176,7 +176,7 @@ case "$host" in *-*-rtems*) # RTEMS supports Pthreads, but the library is not available at GCC build time. ;; - nvptx*-*-*) + nvptx*-*-* | amdgcn*-*-*) # NVPTX does not support Pthreads, has its own code replacement. libgomp_use_pthreads=no # NVPTX is an accelerator-only target diff --git a/libgomp/configure.tgt b/libgomp/configure.tgt index c5ae9a9e39a..06ee115ece9 100644 --- a/libgomp/configure.tgt +++ b/libgomp/configure.tgt @@ -164,6 +164,10 @@ case "${target}" in fi ;; + amdgcn*-*-*) + config_path="gcn accel" + ;; + *) ;; diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h index de969e1ba45..037558c43f5 100644 --- a/libgomp/libgomp-plugin.h +++ b/libgomp/libgomp-plugin.h @@ -50,7 +50,8 @@ enum offload_target_type /* OFFLOAD_TARGET_TYPE_HOST_NONSHM = 3 removed. */ OFFLOAD_TARGET_TYPE_NVIDIA_PTX = 5, OFFLOAD_TARGET_TYPE_INTEL_MIC = 6, - OFFLOAD_TARGET_TYPE_HSA = 7 + OFFLOAD_TARGET_TYPE_HSA = 7, + OFFLOAD_TARGET_TYPE_GCN = 8 }; /* Opaque type to represent plugin-dependent implementation of an diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 178eb600ccd..19e1241ee4c 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -692,6 +692,24 @@ static inline struct gomp_thread *gomp_thread (void) asm ("mov.u32 %0, %%tid.y;" : "=r" (tid)); return nvptx_thrs + tid; } +#elif defined __AMDGCN__ +static inline struct gomp_thread *gcn_thrs (void) +{ + /* The value is at the bottom of LDS. */ + struct gomp_thread * __lds *thrs = (struct gomp_thread * __lds *)4; + return *thrs; +} +static inline void set_gcn_thrs (struct gomp_thread *val) +{ + /* The value is at the bottom of LDS. */ + struct gomp_thread * __lds *thrs = (struct gomp_thread * __lds *)4; + *thrs = val; +} +static inline struct gomp_thread *gomp_thread (void) +{ + int tid = __builtin_gcn_dim_pos(1); + return gcn_thrs () + tid; +} #elif defined HAVE_TLS || defined USE_EMUTLS extern __thread struct gomp_thread gomp_tls_data; static inline struct gomp_thread *gomp_thread (void) diff --git a/libgomp/oacc-int.h b/libgomp/oacc-int.h index 5ca9944601e..9dc6c8a5713 100644 --- a/libgomp/oacc-int.h +++ b/libgomp/oacc-int.h @@ -82,7 +82,14 @@ struct goacc_thread void *target_tls; }; -#if defined HAVE_TLS || defined USE_EMUTLS +#ifdef __AMDGCN__ +static inline struct goacc_thread * +goacc_thread (void) +{ + /* Unused in the offload libgomp for OpenACC: return a dummy value. */ + return 0; +} +#elif defined HAVE_TLS || defined USE_EMUTLS extern __thread struct goacc_thread *goacc_tls_data; static inline struct goacc_thread * goacc_thread (void) diff --git a/libgomp/oacc-target.c b/libgomp/oacc-target.c new file mode 100644 index 00000000000..f2e79899030 --- /dev/null +++ b/libgomp/oacc-target.c @@ -0,0 +1 @@ +/* Nothing needed here. */ diff --git a/libgomp/openacc.f90 b/libgomp/openacc.f90 index bc205453f82..831a157e703 100644 --- a/libgomp/openacc.f90 +++ b/libgomp/openacc.f90 @@ -46,6 +46,7 @@ module openacc_kinds ! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed. integer (acc_device_kind), parameter :: acc_device_not_host = 4 integer (acc_device_kind), parameter :: acc_device_nvidia = 5 + integer (acc_device_kind), parameter :: acc_device_gcn = 8 public :: acc_handle_kind diff --git a/libgomp/openacc.h b/libgomp/openacc.h index 1bbe6c90e7f..42c861caabf 100644 --- a/libgomp/openacc.h +++ b/libgomp/openacc.h @@ -55,6 +55,7 @@ typedef enum acc_device_t { /* acc_device_host_nonshm = 3 removed. */ acc_device_not_host = 4, acc_device_nvidia = 5, + acc_device_gcn = 8, _ACC_device_hwm, /* Ensure enumeration is layout compatible with int. */ _ACC_highest = __INT_MAX__, diff --git a/libgomp/team.c b/libgomp/team.c index c422da3701d..b26caaaaec6 100644 --- a/libgomp/team.c +++ b/libgomp/team.c @@ -239,6 +239,9 @@ gomp_free_pool_helper (void *thread_pool) pthread_exit (NULL); #elif defined(__nvptx__) asm ("exit;"); +#elif defined(__AMDGCN__) + asm ("s_dcache_wb\n\t" + "s_endpgm"); #else #error gomp_free_pool_helper must terminate the thread #endif -- 2.30.2