From 249a9df7fce0a6bebc70852ab583c5324208bf06 Mon Sep 17 00:00:00 2001 From: Grigori Goronzy Date: Thu, 28 May 2015 12:40:29 +0200 Subject: [PATCH] gallium: add PIPE_COMPUTE_CAP_SUBGROUP_SIZE We need this to implement OpenCL's CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE. Reviewed-by: Francisco Jerez --- src/gallium/docs/source/screen.rst | 2 ++ src/gallium/drivers/ilo/ilo_screen.c | 8 ++++++++ .../drivers/nouveau/nvc0/nvc0_screen.c | 4 ++++ src/gallium/drivers/radeon/r600_pipe_common.c | 6 ++++++ src/gallium/drivers/radeon/r600_pipe_common.h | 20 +++++++++++++++++++ src/gallium/include/pipe/p_defines.h | 3 ++- 6 files changed, 42 insertions(+), 1 deletion(-) diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 8f64817fe5f..74636207d06 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -384,6 +384,8 @@ pipe_screen::get_compute_param. Value type: ``uint32_t`` * ``PIPE_COMPUTE_CAP_IMAGES_SUPPORTED``: Whether images are supported non-zero means yes, zero means no. Value type: ``uint32_t`` +* ``PIPE_COMPUTE_CAP_SUBGROUP_SIZE``: The size of a basic execution unit in + threads. Also known as wavefront size, warp size or SIMD width. .. _pipe_bind: diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c index 94105559b80..faebb9279b3 100644 --- a/src/gallium/drivers/ilo/ilo_screen.c +++ b/src/gallium/drivers/ilo/ilo_screen.c @@ -193,6 +193,7 @@ ilo_get_compute_param(struct pipe_screen *screen, uint32_t max_clock_frequency; uint32_t max_compute_units; uint32_t images_supported; + uint32_t subgroup_size; } val; const void *ptr; int size; @@ -284,6 +285,13 @@ ilo_get_compute_param(struct pipe_screen *screen, ptr = &val.images_supported; size = sizeof(val.images_supported); break; + case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: + /* best case is actually SIMD32 */ + val.subgroup_size = 16; + + ptr = &val.subgroup_size; + size = sizeof(val.subgroup_size); + break; default: ptr = NULL; size = 0; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 56c230e42fc..4c53106289c 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -341,6 +341,7 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen, enum pipe_compute_cap param, void *data) { uint64_t *data64 = (uint64_t *)data; + uint32_t *data32 = (uint32_t *)data; const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass; switch (param) { @@ -372,6 +373,9 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen, case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */ data64[0] = 4096; return 8; + case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: + data32[0] = 32; + return 4; default: return 0; } diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 3def4446882..775cf53ba88 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -636,6 +636,12 @@ static int r600_get_compute_param(struct pipe_screen *screen, return sizeof(uint32_t); case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: break; /* unused */ + case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: + if (ret) { + uint32_t *subgroup_size = ret; + *subgroup_size = r600_wavefront_size(rscreen->family); + } + return sizeof(uint32_t); } fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param); diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 6ce81d33ddd..51fd016229c 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -570,6 +570,26 @@ static inline unsigned r600_tex_aniso_filter(unsigned filter) /* else */ return 4; } +static inline unsigned r600_wavefront_size(enum radeon_family family) +{ + switch (family) { + case CHIP_RV610: + case CHIP_RS780: + case CHIP_RV620: + case CHIP_RS880: + return 16; + case CHIP_RV630: + case CHIP_RV635: + case CHIP_RV730: + case CHIP_RV710: + case CHIP_PALM: + case CHIP_CEDAR: + return 32; + default: + return 64; + } +} + #define COMPUTE_DBG(rscreen, fmt, args...) \ do { \ if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \ diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 88b7b7699c1..153897af754 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -700,7 +700,8 @@ enum pipe_compute_cap PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY, PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS, - PIPE_COMPUTE_CAP_IMAGES_SUPPORTED + PIPE_COMPUTE_CAP_IMAGES_SUPPORTED, + PIPE_COMPUTE_CAP_SUBGROUP_SIZE }; /** -- 2.30.2