From dbd38f2a92633e565fe35c8d9c66f864d6ff4877 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 16 Feb 2017 01:17:48 +0100 Subject: [PATCH] radeonsi: add a workaround for clamping unaligned RGB 8 & 16-bit vertex loads MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_shader.c | 38 +++++++++++++++++-- src/gallium/drivers/radeonsi/si_shader.h | 4 ++ .../drivers/radeonsi/si_shader_internal.h | 3 ++ .../drivers/radeonsi/si_shader_tgsi_setup.c | 3 ++ src/gallium/drivers/radeonsi/si_state.c | 18 ++++++++- 5 files changed, 60 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 8b9fed9fb8c..1829e3ec118 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -368,14 +368,27 @@ static void declare_input_vs( fix_fetch = ctx->shader->key.mono.vs.fix_fetch[input_index]; - /* Do multiple loads for double formats. */ - if (fix_fetch == SI_FIX_FETCH_RGB_64_FLOAT) { + /* Do multiple loads for special formats. */ + switch (fix_fetch) { + case SI_FIX_FETCH_RGB_64_FLOAT: num_fetches = 3; /* 3 2-dword loads */ fetch_stride = 8; - } else if (fix_fetch == SI_FIX_FETCH_RGBA_64_FLOAT) { + break; + case SI_FIX_FETCH_RGBA_64_FLOAT: num_fetches = 2; /* 2 4-dword loads */ fetch_stride = 16; - } else { + break; + case SI_FIX_FETCH_RGB_8: + case SI_FIX_FETCH_RGB_8_INT: + num_fetches = 3; + fetch_stride = 1; + break; + case SI_FIX_FETCH_RGB_16: + case SI_FIX_FETCH_RGB_16_INT: + num_fetches = 3; + fetch_stride = 2; + break; + default: num_fetches = 1; fetch_stride = 0; } @@ -512,6 +525,23 @@ static void declare_input_vs( chan % 2); } break; + case SI_FIX_FETCH_RGB_8: + case SI_FIX_FETCH_RGB_8_INT: + case SI_FIX_FETCH_RGB_16: + case SI_FIX_FETCH_RGB_16_INT: + for (chan = 0; chan < 3; chan++) { + out[chan] = LLVMBuildExtractElement(gallivm->builder, + input[chan], + ctx->i32_0, ""); + } + if (fix_fetch == SI_FIX_FETCH_RGB_8 || + fix_fetch == SI_FIX_FETCH_RGB_16) { + out[3] = LLVMConstReal(ctx->f32, 1); + } else { + out[3] = LLVMBuildBitCast(gallivm->builder, ctx->i32_1, + ctx->f32, ""); + } + break; } } diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 46161907a67..da88df041ca 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -250,6 +250,10 @@ enum { SI_FIX_FETCH_RG_64_FLOAT, SI_FIX_FETCH_RGB_64_FLOAT, SI_FIX_FETCH_RGBA_64_FLOAT, + SI_FIX_FETCH_RGB_8, /* A = 1.0 */ + SI_FIX_FETCH_RGB_8_INT, /* A = 1 */ + SI_FIX_FETCH_RGB_16, + SI_FIX_FETCH_RGB_16_INT, }; struct si_shader; diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 26cc28d8079..8fde6c25684 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -149,6 +149,9 @@ struct si_shader_context { LLVMTypeRef v4f32; LLVMTypeRef v8i32; + LLVMValueRef i32_0; + LLVMValueRef i32_1; + LLVMValueRef shared_memory; }; diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c index c7445e037a3..c7019c1b8b7 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c @@ -1366,6 +1366,9 @@ void si_llvm_context_init(struct si_shader_context *ctx, ctx->v4i32 = LLVMVectorType(ctx->i32, 4); ctx->v4f32 = LLVMVectorType(ctx->f32, 4); ctx->v8i32 = LLVMVectorType(ctx->i32, 8); + + ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0); + ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0); } void si_llvm_create_func(struct si_shader_context *ctx, diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index d9b9f838763..024de8b035e 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -1732,10 +1732,10 @@ static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen, case 8: switch (desc->nr_channels) { case 1: + case 3: /* 3 loads */ return V_008F0C_BUF_DATA_FORMAT_8; case 2: return V_008F0C_BUF_DATA_FORMAT_8_8; - case 3: case 4: return V_008F0C_BUF_DATA_FORMAT_8_8_8_8; } @@ -1743,10 +1743,10 @@ static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen, case 16: switch (desc->nr_channels) { case 1: + case 3: /* 3 loads */ return V_008F0C_BUF_DATA_FORMAT_16; case 2: return V_008F0C_BUF_DATA_FORMAT_16_16; - case 3: case 4: return V_008F0C_BUF_DATA_FORMAT_16_16_16_16; } @@ -3459,6 +3459,20 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, default: assert(0); } + } else if (channel && desc->nr_channels == 3) { + assert(desc->swizzle[0] == PIPE_SWIZZLE_X); + + if (channel->size == 8) { + if (channel->pure_integer) + v->fix_fetch[i] = SI_FIX_FETCH_RGB_8_INT; + else + v->fix_fetch[i] = SI_FIX_FETCH_RGB_8; + } else if (channel->size == 16) { + if (channel->pure_integer) + v->fix_fetch[i] = SI_FIX_FETCH_RGB_16_INT; + else + v->fix_fetch[i] = SI_FIX_FETCH_RGB_16; + } } v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(swizzle[0])) | -- 2.30.2