From 8cf552b1823e6c0ea8ce0465937920b4cab83465 Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Tue, 18 Dec 2012 17:39:19 +0400 Subject: [PATCH] radeon/llvm: improve cube map handling MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add support for TEX2, TXB2, TXL2, fix SHADOWCUBE Signed-off-by: Vadim Girlin Reviewed-by: Michel Dänzer Tested-by: Michel Dänzer --- src/gallium/drivers/r600/r600_llvm.c | 3 + src/gallium/drivers/radeon/radeon_llvm.h | 3 +- .../drivers/radeon/radeon_setup_tgsi_llvm.c | 86 +++++++++++++++---- .../drivers/radeonsi/radeonsi_shader.c | 14 ++- 4 files changed, 84 insertions(+), 22 deletions(-) diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c index 8f1ed26b4c6..17d362c0aaf 100644 --- a/src/gallium/drivers/r600/r600_llvm.c +++ b/src/gallium/drivers/r600/r600_llvm.c @@ -421,9 +421,12 @@ LLVMModuleRef r600_tgsi_llvm( bld_base->op_actions[TGSI_OPCODE_DDX].emit = llvm_emit_tex; bld_base->op_actions[TGSI_OPCODE_DDY].emit = llvm_emit_tex; bld_base->op_actions[TGSI_OPCODE_TEX].emit = llvm_emit_tex; + bld_base->op_actions[TGSI_OPCODE_TEX2].emit = llvm_emit_tex; bld_base->op_actions[TGSI_OPCODE_TXB].emit = llvm_emit_tex; + bld_base->op_actions[TGSI_OPCODE_TXB2].emit = llvm_emit_tex; bld_base->op_actions[TGSI_OPCODE_TXD].emit = llvm_emit_tex; bld_base->op_actions[TGSI_OPCODE_TXL].emit = llvm_emit_tex; + bld_base->op_actions[TGSI_OPCODE_TXL2].emit = llvm_emit_tex; bld_base->op_actions[TGSI_OPCODE_TXF].emit = llvm_emit_tex; bld_base->op_actions[TGSI_OPCODE_TXQ].emit = llvm_emit_tex; bld_base->op_actions[TGSI_OPCODE_TXP].emit = llvm_emit_tex; diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h index 61f37722f5d..b8dc771226a 100644 --- a/src/gallium/drivers/radeon/radeon_llvm.h +++ b/src/gallium/drivers/radeon/radeon_llvm.h @@ -153,7 +153,8 @@ static inline LLVMValueRef bitcast( void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context * bld_base, - LLVMValueRef *arg, unsigned target); + struct lp_build_emit_data * emit_data, + unsigned coord_arg); void radeon_llvm_context_init(struct radeon_llvm_context * ctx); diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index 647b66566bd..d390f927a62 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -513,29 +513,29 @@ static void kil_emit( } } - +/* coord_arg - index of the source coord vector in the emit_data->args array */ void radeon_llvm_emit_prepare_cube_coords( struct lp_build_tgsi_context * bld_base, - LLVMValueRef *arg, - unsigned target) + struct lp_build_emit_data * emit_data, + unsigned coord_arg) { - boolean shadowcube = (target == TGSI_TEXTURE_SHADOWCUBE); + + unsigned target = emit_data->inst->Texture.Texture; + unsigned opcode = emit_data->inst->Instruction.Opcode; struct gallivm_state * gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef type = bld_base->base.elem_type; LLVMValueRef coords[4]; LLVMValueRef mad_args[3]; - unsigned i, cnt; + LLVMValueRef idx; + unsigned i; LLVMValueRef v = build_intrinsic(builder, "llvm.AMDGPU.cube", LLVMVectorType(type, 4), - arg, 1, LLVMReadNoneAttribute); - - /* save src.w for shadow cube */ - cnt = shadowcube ? 3 : 4; + &emit_data->args[coord_arg], 1, LLVMReadNoneAttribute); - for (i = 0; i < cnt; ++i) { - LLVMValueRef idx = lp_build_const_int32(gallivm, i); + for (i = 0; i < 4; ++i) { + idx = lp_build_const_int32(gallivm, i); coords[i] = LLVMBuildExtractElement(builder, v, idx, ""); } @@ -554,13 +554,42 @@ void radeon_llvm_emit_prepare_cube_coords( coords[1] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD, mad_args[0], mad_args[1], mad_args[2]); - /* apply yxwy swizzle to cooords */ + /* apply xyz = yxw swizzle to cooords */ coords[2] = coords[3]; coords[3] = coords[1]; coords[1] = coords[0]; coords[0] = coords[3]; - *arg = lp_build_gather_values(bld_base->base.gallivm, coords, 4); + /* all cases except simple cube map sampling require special handling + * for coord vector */ + if (target != TGSI_TEXTURE_CUBE || + opcode != TGSI_OPCODE_TEX) { + + /* load source coord.w component - array_index for cube arrays or + * compare value for SHADOWCUBE */ + idx = lp_build_const_int32(gallivm, 3); + coords[3] = LLVMBuildExtractElement(builder, + emit_data->args[coord_arg], idx, ""); + + /* for cube arrays coord.z = coord.w(array_index) * 8 + face */ + if (target == TGSI_TEXTURE_CUBE_ARRAY || + target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { + + coords[2] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD, + coords[3], lp_build_const_float(gallivm, 8.0), coords[2]); + } + + /* for instructions that need additional src (compare/lod/bias), + * put it in coord.w */ + if (opcode == TGSI_OPCODE_TEX2 || + opcode == TGSI_OPCODE_TXB2 || + opcode == TGSI_OPCODE_TXL2) { + coords[3] = emit_data->args[coord_arg + 1]; + } + } + + emit_data->args[coord_arg] = + lp_build_gather_values(bld_base->base.gallivm, coords, 4); } static void txd_fetch_args( @@ -607,11 +636,12 @@ static void txp_fetch_args( emit_data->arg_count = 1; if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || - inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) && + inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || + inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || + inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && inst->Instruction.Opcode != TGSI_OPCODE_TXQ && inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) { - radeon_llvm_emit_prepare_cube_coords(bld_base, &emit_data->args[0], - inst->Texture.Texture); + radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, 0); } } @@ -640,12 +670,24 @@ static void tex_fetch_args( coords, 4); emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); + if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 || + inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || + inst->Instruction.Opcode == TGSI_OPCODE_TXL2) { + /* These instructions have additional operand that should be packed + * into the cube coord vector by radeon_llvm_emit_prepare_cube_coords. + * That operand should be passed as a float value in the args array + * right after the coord vector. After packing it's not used anymore, + * that's why arg_count is not increased */ + emit_data->args[1] = lp_build_emit_fetch(bld_base, inst, 1, 0); + } + if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || - inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) && + inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || + inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || + inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && inst->Instruction.Opcode != TGSI_OPCODE_TXQ && inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) { - radeon_llvm_emit_prepare_cube_coords(bld_base, &emit_data->args[0], - inst->Texture.Texture); + radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, 0); } } @@ -1143,14 +1185,20 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg; bld_base->op_actions[TGSI_OPCODE_TEX].fetch_args = tex_fetch_args; bld_base->op_actions[TGSI_OPCODE_TEX].intr_name = "llvm.AMDGPU.tex"; + bld_base->op_actions[TGSI_OPCODE_TEX2].fetch_args = tex_fetch_args; + bld_base->op_actions[TGSI_OPCODE_TEX2].intr_name = "llvm.AMDGPU.tex"; bld_base->op_actions[TGSI_OPCODE_TXB].fetch_args = tex_fetch_args; bld_base->op_actions[TGSI_OPCODE_TXB].intr_name = "llvm.AMDGPU.txb"; + bld_base->op_actions[TGSI_OPCODE_TXB2].fetch_args = tex_fetch_args; + bld_base->op_actions[TGSI_OPCODE_TXB2].intr_name = "llvm.AMDGPU.txb"; bld_base->op_actions[TGSI_OPCODE_TXD].fetch_args = txd_fetch_args; bld_base->op_actions[TGSI_OPCODE_TXD].intr_name = "llvm.AMDGPU.txd"; bld_base->op_actions[TGSI_OPCODE_TXF].fetch_args = txf_fetch_args; bld_base->op_actions[TGSI_OPCODE_TXF].intr_name = "llvm.AMDGPU.txf"; bld_base->op_actions[TGSI_OPCODE_TXL].fetch_args = tex_fetch_args; bld_base->op_actions[TGSI_OPCODE_TXL].intr_name = "llvm.AMDGPU.txl"; + bld_base->op_actions[TGSI_OPCODE_TXL2].fetch_args = tex_fetch_args; + bld_base->op_actions[TGSI_OPCODE_TXL2].intr_name = "llvm.AMDGPU.txl"; bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args; bld_base->op_actions[TGSI_OPCODE_TXP].intr_name = "llvm.AMDGPU.tex"; bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = tex_fetch_args; diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c index a452d4bbe9b..5dab46e4d6d 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_shader.c +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c @@ -813,11 +813,21 @@ static void tex_fetch_args( emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, LP_CHAN_ALL); + if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 || + inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || + inst->Instruction.Opcode == TGSI_OPCODE_TXL2) { + /* These instructions have additional operand that should be packed + * into the cube coord vector by radeon_llvm_emit_prepare_cube_coords. + * That operand should be passed as a float value in the args array + * right after the coord vector. After packing it's not used anymore, + * that's why arg_count is not increased */ + emit_data->args[2] = lp_build_emit_fetch(bld_base, inst, 1, 0); + } + if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) && inst->Instruction.Opcode != TGSI_OPCODE_TXQ) { - radeon_llvm_emit_prepare_cube_coords(bld_base, &emit_data->args[1], - inst->Texture.Texture); + radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, 1); } /* Resource */ -- 2.30.2