From e3d0ee4acf2bc84a3e081442ba022ab148958a7b Mon Sep 17 00:00:00 2001 From: Andrew Stubbs Date: Wed, 13 Nov 2019 12:37:44 +0000 Subject: [PATCH] Move gcn-run heap into GPU memory. 2019-11-13 Andrew Stubbs gcc/ * config/gcn/gcn-run.c (heap_region): New global variable. (struct hsa_runtime_fn_info): Add hsa_memory_assign_agent_fn. (init_hsa_runtime_functions): Initialize hsa_memory_assign_agent. (get_kernarg_region): Move contents to .... (get_memory_region): .... here. (get_heap_region): New function. (init_device): Initialize the heap_region. (device_malloc): Add region parameter. (struct kernargs): Move heap .... (heap): ... to global scope. (main): Allocate heap separate to kernargs. From-SVN: r278131 --- gcc/ChangeLog | 14 +++++++++ gcc/config/gcn/gcn-run.c | 64 ++++++++++++++++++++++++++++++---------- 2 files changed, 62 insertions(+), 16 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c7066ab662f..c2daf406826 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +2019-11-13 Andrew Stubbs + + * config/gcn/gcn-run.c (heap_region): New global variable. + (struct hsa_runtime_fn_info): Add hsa_memory_assign_agent_fn. + (init_hsa_runtime_functions): Initialize hsa_memory_assign_agent. + (get_kernarg_region): Move contents to .... + (get_memory_region): .... here. + (get_heap_region): New function. + (init_device): Initialize the heap_region. + (device_malloc): Add region parameter. + (struct kernargs): Move heap .... + (heap): ... to global scope. + (main): Allocate heap separate to kernargs. + 2019-11-13 Jan Hubicka * ipa-prop.c (ipa_print_node_jump_functions, diff --git a/gcc/config/gcn/gcn-run.c b/gcc/config/gcn/gcn-run.c index 21852d78bc5..cf4870ff4be 100644 --- a/gcc/config/gcn/gcn-run.c +++ b/gcc/config/gcn/gcn-run.c @@ -72,6 +72,7 @@ uint64_t main_kernel = 0; hsa_executable_t executable = { 0 }; hsa_region_t kernargs_region = { 0 }; +hsa_region_t heap_region = { 0 }; uint32_t kernarg_segment_size = 0; uint32_t group_segment_size = 0; uint32_t private_segment_size = 0; @@ -135,6 +136,8 @@ struct hsa_runtime_fn_info hsa_signal_t *signal); hsa_status_t (*hsa_memory_allocate_fn) (hsa_region_t region, size_t size, void **ptr); + hsa_status_t (*hsa_memory_assign_agent_fn) (void *ptr, hsa_agent_t agent, + hsa_access_permission_t access); hsa_status_t (*hsa_memory_copy_fn) (void *dst, const void *src, size_t size); hsa_status_t (*hsa_memory_free_fn) (void *ptr); @@ -204,6 +207,7 @@ init_hsa_runtime_functions (void) DLSYM_FN (hsa_executable_freeze) DLSYM_FN (hsa_signal_create) DLSYM_FN (hsa_memory_allocate) + DLSYM_FN (hsa_memory_assign_agent) DLSYM_FN (hsa_memory_copy) DLSYM_FN (hsa_memory_free) DLSYM_FN (hsa_signal_destroy) @@ -282,7 +286,8 @@ get_gpu_agent (hsa_agent_t agent, void *data __attribute__ ((unused))) suitable one has been found. */ static hsa_status_t -get_kernarg_region (hsa_region_t region, void *data __attribute__ ((unused))) +get_memory_region (hsa_region_t region, hsa_region_t *retval, + hsa_region_global_flag_t kind) { /* Reject non-global regions. */ hsa_region_segment_t segment; @@ -294,9 +299,9 @@ get_kernarg_region (hsa_region_t region, void *data __attribute__ ((unused))) hsa_region_global_flag_t flags; hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags); - if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG) + if (flags & kind) { - kernargs_region = region; + *retval = region; return HSA_STATUS_INFO_BREAK; } @@ -304,6 +309,20 @@ get_kernarg_region (hsa_region_t region, void *data __attribute__ ((unused))) return HSA_STATUS_SUCCESS; } +static hsa_status_t +get_kernarg_region (hsa_region_t region, void *data __attribute__((unused))) +{ + return get_memory_region (region, &kernargs_region, + HSA_REGION_GLOBAL_FLAG_KERNARG); +} + +static hsa_status_t +get_heap_region (hsa_region_t region, void *data __attribute__((unused))) +{ + return get_memory_region (region, &heap_region, + HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED); +} + /* Initialize the HSA Runtime library and GPU device. */ static void @@ -338,6 +357,13 @@ init_device () NULL), status == HSA_STATUS_SUCCESS || status == HSA_STATUS_INFO_BREAK, "Locate kernargs memory"); + + /* Select a memory region for the kernel heap. + The call-back function, get_heap_region, does the selection. */ + XHSA_CMP (hsa_fns.hsa_agent_iterate_regions_fn (device, get_heap_region, + NULL), + status == HSA_STATUS_SUCCESS || status == HSA_STATUS_INFO_BREAK, + "Locate device memory"); } @@ -593,10 +619,10 @@ found_main:; __flat_scalar GCN address space). */ static void * -device_malloc (size_t size) +device_malloc (size_t size, hsa_region_t region) { void *result; - XHSA (hsa_fns.hsa_memory_allocate_fn (kernargs_region, size, &result), + XHSA (hsa_fns.hsa_memory_allocate_fn (region, size, &result), "Allocate device memory"); return result; } @@ -634,14 +660,14 @@ struct kernargs } queue[1024]; unsigned int consumed; } output_data; - - struct heap - { - int64_t size; - char data[0]; - } heap; }; +struct heap +{ + int64_t size; + char data[0]; +} heap; + /* Print any console output from the kernel. We print all entries from "consumed" to the next entry without a "written" flag, or "next_output" is reached. The buffer is circular, but the @@ -811,13 +837,19 @@ main (int argc, char *argv[]) /* Allocate device memory for both function parameters and the argv data. */ - size_t heap_size = 10 * 1024 * 1024; /* 10MB. */ - struct kernargs *kernargs = device_malloc (sizeof (*kernargs) + heap_size); + struct kernargs *kernargs = device_malloc (sizeof (*kernargs), + kernargs_region); struct argdata { int64_t argv_data[kernel_argc]; char strings[args_size]; - } *args = device_malloc (sizeof (struct argdata)); + } *args = device_malloc (sizeof (struct argdata), kernargs_region); + + size_t heap_size = 10 * 1024 * 1024; /* 10MB. */ + struct heap *heap = device_malloc (heap_size, heap_region); + XHSA (hsa_fns.hsa_memory_assign_agent_fn (heap, device, + HSA_ACCESS_PERMISSION_RW), + "Assign heap to device agent"); /* Write the data to the target. */ kernargs->argc = kernel_argc; @@ -837,8 +869,8 @@ main (int argc, char *argv[]) memcpy (&args->strings[offset], kernel_argv[i], arg_len + 1); offset += arg_len; } - kernargs->heap_ptr = (int64_t) &kernargs->heap; - kernargs->heap.size = heap_size; + kernargs->heap_ptr = (int64_t) heap; + hsa_fns.hsa_memory_copy_fn (&heap->size, &heap_size, sizeof (heap_size)); /* Run constructors on the GPU. */ run (init_array_kernel, kernargs); -- 2.30.2