From 57963e3934de387ade6b314a5d72330c28f30806 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Wed, 11 Dec 2019 17:49:17 +0100 Subject: [PATCH] [OpenACC] Consolidate 'GOACC_enter_exit_data' and its helper functions in 'libgomp/oacc-mem.c' libgomp/ * oacc-parallel.c (find_pointer, GOACC_enter_exit_data): Move... * oacc-mem.c: ... here. (gomp_acc_insert_pointer, gomp_acc_remove_pointer): Rename to 'goacc_insert_pointer', 'goacc_remove_pointer', and make 'static'. * libgomp.h (gomp_acc_insert_pointer, gomp_acc_remove_pointer): Remove. * libgomp_g.h: Update. From-SVN: r279233 --- libgomp/ChangeLog | 8 ++ libgomp/libgomp.h | 2 - libgomp/libgomp_g.h | 7 +- libgomp/oacc-mem.c | 274 +++++++++++++++++++++++++++++++++++++++- libgomp/oacc-parallel.c | 253 ------------------------------------- 5 files changed, 281 insertions(+), 263 deletions(-) diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index f7d9ae98616..0a5650ed438 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,5 +1,13 @@ 2019-12-11 Thomas Schwinge + * oacc-parallel.c (find_pointer, GOACC_enter_exit_data): Move... + * oacc-mem.c: ... here. + (gomp_acc_insert_pointer, gomp_acc_remove_pointer): Rename to + 'goacc_insert_pointer', 'goacc_remove_pointer', and make 'static'. + * libgomp.h (gomp_acc_insert_pointer, gomp_acc_remove_pointer): + Remove. + * libgomp_g.h: Update. + * oacc-parallel.c (GOACC_wait, goacc_wait): Move... * oacc-async.c: ... here. * oacc-int.h (goacc_wait): Declare. diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index a35aa07c80b..9f4d0428871 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -1138,8 +1138,6 @@ enum gomp_map_vars_kind GOMP_MAP_VARS_ENTER_DATA }; -extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *, int); -extern void gomp_acc_remove_pointer (void *, size_t, bool, int, int, int); extern void gomp_acc_declare_allocate (bool, size_t, void **, size_t *, unsigned short *); struct gomp_coalesce_buf; diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h index beb1689180d..c6e1c94caf8 100644 --- a/libgomp/libgomp_g.h +++ b/libgomp/libgomp_g.h @@ -361,6 +361,11 @@ extern void GOMP_teams_reg (void (*) (void *), void *, unsigned, unsigned, extern void GOACC_wait (int, int, ...); +/* oacc-mem.c */ + +extern void GOACC_enter_exit_data (int, size_t, void **, size_t *, + unsigned short *, int, int, ...); + /* oacc-parallel.c */ extern void GOACC_parallel_keyed (int, void (*) (void *), size_t, @@ -370,8 +375,6 @@ extern void GOACC_parallel (int, void (*) (void *), size_t, void **, size_t *, extern void GOACC_data_start (int, size_t, void **, size_t *, unsigned short *); extern void GOACC_data_end (void); -extern void GOACC_enter_exit_data (int, size_t, void **, - size_t *, unsigned short *, int, int, ...); extern void GOACC_update (int, size_t, void **, size_t *, unsigned short *, int, int, ...); extern int GOACC_get_num_threads (void); diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 369a11696da..571e0606ac8 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -870,9 +870,18 @@ acc_update_self_async (void *h, size_t s, int async) update_dev_host (0, h, s, async); } -void -gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, - void *kinds, int async) + +/* OpenACC 'enter data', 'exit data': 'GOACC_enter_exit_data' and its helper + functions. */ + +/* Special handling for 'GOMP_MAP_POINTER', 'GOMP_MAP_TO_PSET'. + + Only the first mapping is considered in reference counting; the following + ones implicitly follow suit. */ + +static void +goacc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, + void *kinds, int async) { struct target_mem_desc *tgt; struct goacc_thread *thr = goacc_thread (); @@ -914,9 +923,9 @@ gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, tgt->list[0].key->dynamic_refcount = 1; } -void -gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async, - int finalize, int mapnum) +static void +goacc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async, + int finalize, int mapnum) { struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; @@ -986,3 +995,256 @@ gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async, gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); } + +/* Return the number of mappings associated with 'GOMP_MAP_TO_PSET' or + 'GOMP_MAP_POINTER'. */ + +static int +find_pointer (int pos, size_t mapnum, unsigned short *kinds) +{ + if (pos + 1 >= mapnum) + return 0; + + unsigned char kind = kinds[pos+1] & 0xff; + + if (kind == GOMP_MAP_TO_PSET) + return 3; + else if (kind == GOMP_MAP_POINTER) + return 2; + + return 0; +} + +void +GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds, int async, + int num_waits, ...) +{ + int flags = GOACC_FLAGS_UNMARSHAL (flags_m); + + struct goacc_thread *thr; + struct gomp_device_descr *acc_dev; + bool data_enter = false; + size_t i; + + goacc_lazy_initialize (); + + thr = goacc_thread (); + acc_dev = thr->dev; + + /* Determine whether "finalize" semantics apply to all mappings of this + OpenACC directive. */ + bool finalize = false; + if (mapnum > 0) + { + unsigned char kind = kinds[0] & 0xff; + if (kind == GOMP_MAP_DELETE + || kind == GOMP_MAP_FORCE_FROM) + finalize = true; + } + + /* Determine if this is an "acc enter data". */ + for (i = 0; i < mapnum; ++i) + { + unsigned char kind = kinds[i] & 0xff; + + if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET) + continue; + + if (kind == GOMP_MAP_FORCE_ALLOC + || kind == GOMP_MAP_FORCE_PRESENT + || kind == GOMP_MAP_FORCE_TO + || kind == GOMP_MAP_TO + || kind == GOMP_MAP_ALLOC) + { + data_enter = true; + break; + } + + if (kind == GOMP_MAP_RELEASE + || kind == GOMP_MAP_DELETE + || kind == GOMP_MAP_FROM + || kind == GOMP_MAP_FORCE_FROM) + break; + + gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", + kind); + } + + bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); + + acc_prof_info prof_info; + if (profiling_p) + { + thr->prof_info = &prof_info; + + prof_info.event_type + = data_enter ? acc_ev_enter_data_start : acc_ev_exit_data_start; + prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; + prof_info.version = _ACC_PROF_INFO_VERSION; + prof_info.device_type = acc_device_type (acc_dev->type); + prof_info.device_number = acc_dev->target_id; + prof_info.thread_id = -1; + prof_info.async = async; + prof_info.async_queue = prof_info.async; + prof_info.src_file = NULL; + prof_info.func_name = NULL; + prof_info.line_no = -1; + prof_info.end_line_no = -1; + prof_info.func_line_no = -1; + prof_info.func_end_line_no = -1; + } + acc_event_info enter_exit_data_event_info; + if (profiling_p) + { + enter_exit_data_event_info.other_event.event_type + = prof_info.event_type; + enter_exit_data_event_info.other_event.valid_bytes + = _ACC_OTHER_EVENT_INFO_VALID_BYTES; + enter_exit_data_event_info.other_event.parent_construct + = data_enter ? acc_construct_enter_data : acc_construct_exit_data; + enter_exit_data_event_info.other_event.implicit = 0; + enter_exit_data_event_info.other_event.tool_info = NULL; + } + acc_api_info api_info; + if (profiling_p) + { + thr->api_info = &api_info; + + api_info.device_api = acc_device_api_none; + api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; + api_info.device_type = prof_info.device_type; + api_info.vendor = -1; + api_info.device_handle = NULL; + api_info.context_handle = NULL; + api_info.async_handle = NULL; + } + + if (profiling_p) + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); + + if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + || (flags & GOACC_FLAG_HOST_FALLBACK)) + { + prof_info.device_type = acc_device_host; + api_info.device_type = prof_info.device_type; + + goto out_prof; + } + + if (num_waits) + { + va_list ap; + + va_start (ap, num_waits); + goacc_wait (async, num_waits, &ap); + va_end (ap); + } + + /* In c, non-pointers and arrays are represented by a single data clause. + Dynamically allocated arrays and subarrays are represented by a data + clause followed by an internal GOMP_MAP_POINTER. + + In fortran, scalars and not allocated arrays are represented by a + single data clause. Allocated arrays and subarrays have three mappings: + 1) the original data clause, 2) a PSET 3) a pointer to the array data. + */ + + if (data_enter) + { + for (i = 0; i < mapnum; i++) + { + unsigned char kind = kinds[i] & 0xff; + + /* Scan for pointers and PSETs. */ + int pointer = find_pointer (i, mapnum, kinds); + + if (!pointer) + { + switch (kind) + { + case GOMP_MAP_ALLOC: + case GOMP_MAP_FORCE_ALLOC: + acc_create_async (hostaddrs[i], sizes[i], async); + break; + case GOMP_MAP_TO: + case GOMP_MAP_FORCE_TO: + acc_copyin_async (hostaddrs[i], sizes[i], async); + break; + default: + gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", + kind); + break; + } + } + else + { + goacc_insert_pointer (pointer, &hostaddrs[i], &sizes[i], &kinds[i], + async); + /* Increment 'i' by two because OpenACC requires fortran + arrays to be contiguous, so each PSET is associated with + one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and + one MAP_POINTER. */ + i += pointer - 1; + } + } + } + else + for (i = 0; i < mapnum; ++i) + { + unsigned char kind = kinds[i] & 0xff; + + int pointer = find_pointer (i, mapnum, kinds); + + if (!pointer) + { + switch (kind) + { + case GOMP_MAP_RELEASE: + case GOMP_MAP_DELETE: + if (acc_is_present (hostaddrs[i], sizes[i])) + { + if (finalize) + acc_delete_finalize_async (hostaddrs[i], sizes[i], async); + else + acc_delete_async (hostaddrs[i], sizes[i], async); + } + break; + case GOMP_MAP_FROM: + case GOMP_MAP_FORCE_FROM: + if (finalize) + acc_copyout_finalize_async (hostaddrs[i], sizes[i], async); + else + acc_copyout_async (hostaddrs[i], sizes[i], async); + break; + default: + gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", + kind); + break; + } + } + else + { + bool copyfrom = (kind == GOMP_MAP_FORCE_FROM + || kind == GOMP_MAP_FROM); + goacc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async, + finalize, pointer); + /* See the above comment. */ + i += pointer - 1; + } + } + + out_prof: + if (profiling_p) + { + prof_info.event_type + = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end; + enter_exit_data_event_info.other_event.event_type = prof_info.event_type; + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); + + thr->prof_info = NULL; + thr->api_info = NULL; + } +} diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index 1faca5d562f..5c13a7e4348 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -47,25 +47,6 @@ _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_HOST_FALLBACK) "legacy GOMP_DEVICE_HOST_FALLBACK broken"); -/* Returns the number of mappings associated with the pointer or pset. PSET - have three mappings, whereas pointer have two. */ - -static int -find_pointer (int pos, size_t mapnum, unsigned short *kinds) -{ - if (pos + 1 >= mapnum) - return 0; - - unsigned char kind = kinds[pos+1] & 0xff; - - if (kind == GOMP_MAP_TO_PSET) - return 3; - else if (kind == GOMP_MAP_POINTER) - return 2; - - return 0; -} - /* Handle the mapping pair that are presented when a deviceptr clause is used with Fortran. */ @@ -578,240 +559,6 @@ GOACC_data_end (void) } } -void -GOACC_enter_exit_data (int flags_m, size_t mapnum, - void **hostaddrs, size_t *sizes, unsigned short *kinds, - int async, int num_waits, ...) -{ - int flags = GOACC_FLAGS_UNMARSHAL (flags_m); - - struct goacc_thread *thr; - struct gomp_device_descr *acc_dev; - bool data_enter = false; - size_t i; - - goacc_lazy_initialize (); - - thr = goacc_thread (); - acc_dev = thr->dev; - - /* Determine whether "finalize" semantics apply to all mappings of this - OpenACC directive. */ - bool finalize = false; - if (mapnum > 0) - { - unsigned char kind = kinds[0] & 0xff; - if (kind == GOMP_MAP_DELETE - || kind == GOMP_MAP_FORCE_FROM) - finalize = true; - } - - /* Determine if this is an "acc enter data". */ - for (i = 0; i < mapnum; ++i) - { - unsigned char kind = kinds[i] & 0xff; - - if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET) - continue; - - if (kind == GOMP_MAP_FORCE_ALLOC - || kind == GOMP_MAP_FORCE_PRESENT - || kind == GOMP_MAP_FORCE_TO - || kind == GOMP_MAP_TO - || kind == GOMP_MAP_ALLOC) - { - data_enter = true; - break; - } - - if (kind == GOMP_MAP_RELEASE - || kind == GOMP_MAP_DELETE - || kind == GOMP_MAP_FROM - || kind == GOMP_MAP_FORCE_FROM) - break; - - gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", - kind); - } - - bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); - - acc_prof_info prof_info; - if (profiling_p) - { - thr->prof_info = &prof_info; - - prof_info.event_type - = data_enter ? acc_ev_enter_data_start : acc_ev_exit_data_start; - prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; - prof_info.version = _ACC_PROF_INFO_VERSION; - prof_info.device_type = acc_device_type (acc_dev->type); - prof_info.device_number = acc_dev->target_id; - prof_info.thread_id = -1; - prof_info.async = async; - prof_info.async_queue = prof_info.async; - prof_info.src_file = NULL; - prof_info.func_name = NULL; - prof_info.line_no = -1; - prof_info.end_line_no = -1; - prof_info.func_line_no = -1; - prof_info.func_end_line_no = -1; - } - acc_event_info enter_exit_data_event_info; - if (profiling_p) - { - enter_exit_data_event_info.other_event.event_type - = prof_info.event_type; - enter_exit_data_event_info.other_event.valid_bytes - = _ACC_OTHER_EVENT_INFO_VALID_BYTES; - enter_exit_data_event_info.other_event.parent_construct - = data_enter ? acc_construct_enter_data : acc_construct_exit_data; - enter_exit_data_event_info.other_event.implicit = 0; - enter_exit_data_event_info.other_event.tool_info = NULL; - } - acc_api_info api_info; - if (profiling_p) - { - thr->api_info = &api_info; - - api_info.device_api = acc_device_api_none; - api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; - api_info.device_type = prof_info.device_type; - api_info.vendor = -1; - api_info.device_handle = NULL; - api_info.context_handle = NULL; - api_info.async_handle = NULL; - } - - if (profiling_p) - goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, - &api_info); - - if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) - || (flags & GOACC_FLAG_HOST_FALLBACK)) - { - prof_info.device_type = acc_device_host; - api_info.device_type = prof_info.device_type; - - goto out_prof; - } - - if (num_waits) - { - va_list ap; - - va_start (ap, num_waits); - goacc_wait (async, num_waits, &ap); - va_end (ap); - } - - /* In c, non-pointers and arrays are represented by a single data clause. - Dynamically allocated arrays and subarrays are represented by a data - clause followed by an internal GOMP_MAP_POINTER. - - In fortran, scalars and not allocated arrays are represented by a - single data clause. Allocated arrays and subarrays have three mappings: - 1) the original data clause, 2) a PSET 3) a pointer to the array data. - */ - - if (data_enter) - { - for (i = 0; i < mapnum; i++) - { - unsigned char kind = kinds[i] & 0xff; - - /* Scan for pointers and PSETs. */ - int pointer = find_pointer (i, mapnum, kinds); - - if (!pointer) - { - switch (kind) - { - case GOMP_MAP_ALLOC: - case GOMP_MAP_FORCE_ALLOC: - acc_create_async (hostaddrs[i], sizes[i], async); - break; - case GOMP_MAP_TO: - case GOMP_MAP_FORCE_TO: - acc_copyin_async (hostaddrs[i], sizes[i], async); - break; - default: - gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", - kind); - break; - } - } - else - { - gomp_acc_insert_pointer (pointer, &hostaddrs[i], - &sizes[i], &kinds[i], async); - /* Increment 'i' by two because OpenACC requires fortran - arrays to be contiguous, so each PSET is associated with - one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and - one MAP_POINTER. */ - i += pointer - 1; - } - } - } - else - for (i = 0; i < mapnum; ++i) - { - unsigned char kind = kinds[i] & 0xff; - - int pointer = find_pointer (i, mapnum, kinds); - - if (!pointer) - { - switch (kind) - { - case GOMP_MAP_RELEASE: - case GOMP_MAP_DELETE: - if (acc_is_present (hostaddrs[i], sizes[i])) - { - if (finalize) - acc_delete_finalize_async (hostaddrs[i], sizes[i], async); - else - acc_delete_async (hostaddrs[i], sizes[i], async); - } - break; - case GOMP_MAP_FROM: - case GOMP_MAP_FORCE_FROM: - if (finalize) - acc_copyout_finalize_async (hostaddrs[i], sizes[i], async); - else - acc_copyout_async (hostaddrs[i], sizes[i], async); - break; - default: - gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", - kind); - break; - } - } - else - { - bool copyfrom = (kind == GOMP_MAP_FORCE_FROM - || kind == GOMP_MAP_FROM); - gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async, - finalize, pointer); - /* See the above comment. */ - i += pointer - 1; - } - } - - out_prof: - if (profiling_p) - { - prof_info.event_type - = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end; - enter_exit_data_event_info.other_event.event_type = prof_info.event_type; - goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, - &api_info); - - thr->prof_info = NULL; - thr->api_info = NULL; - } -} - void GOACC_update (int flags_m, size_t mapnum, void **hostaddrs, size_t *sizes, unsigned short *kinds, -- 2.30.2