From a2bb7b26a1c4ed1c00c9d81bcd0318ff0acb141a Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Tue, 6 Aug 2019 21:59:44 -0400 Subject: [PATCH] gallium: redefine ATOMINC_WRAP to be more hardware-friendly MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Both AMD and NVIDIA hardware define it this way. Instead of replicating the logic everywhere, just fix it up in one place. Signed-off-by: Ilia Mirkin Reviewed-by: Marek Olšák --- src/gallium/docs/source/tgsi.rst | 2 +- src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 12 ------------ src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 11 ++++++++++- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index 17ad097e85e..e72b047dbd5 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -2846,7 +2846,7 @@ These atomic operations may only be used with 32-bit integer image formats. dst_x = resource[offset] + 1 - resource[offset] = dst_x < src_x ? dst_x : 0 + resource[offset] = dst_x <= src_x ? dst_x : 0 .. opcode:: ATOMDEC_WRAP - Atomic decrement + wrap around diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index 4a4ba43780a..f79ed2c57e1 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -828,18 +828,6 @@ static void atomic_emit( args.data[num_data++] = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 2, 0)); - if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMINC_WRAP) { - /* ATOMIC_INC instruction does: - * value = (value + 1) % (data + 1) - * but we want: - * value = (value + 1) % data - * So replace 'data' by 'data - 1'. - */ - args.data[0] = LLVMBuildSub(ctx->ac.builder, - args.data[0], - ctx->ac.i32_1, ""); - } - args.cache_policy = get_cache_policy(ctx, inst, true, false, false); if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index ff2ec0726e8..9b982569490 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3938,9 +3938,18 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) case ir_intrinsic_image_atomic_comp_swap: opcode = TGSI_OPCODE_ATOMCAS; break; - case ir_intrinsic_image_atomic_inc_wrap: + case ir_intrinsic_image_atomic_inc_wrap: { + /* There's a bit of disagreement between GLSL and the hardware. The + * hardware wants to wrap after the given wrap value, while GLSL + * wants to wrap at the value. Subtract 1 to make up the difference. + */ + st_src_reg wrap = get_temp(glsl_type::uint_type); + emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(wrap), + arg1, st_src_reg_for_int(-1)); + arg1 = wrap; opcode = TGSI_OPCODE_ATOMINC_WRAP; break; + } case ir_intrinsic_image_atomic_dec_wrap: opcode = TGSI_OPCODE_ATOMDEC_WRAP; break; -- 2.30.2