From 7ceec21b763ee307689519820034e1b48c1bb58e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 13 Jul 2019 18:35:20 -0500 Subject: [PATCH] intel/fs: Use a strided MOV instead of a conversion for load_* destinations In many cases, the compiler can just copy-prop the strided MOV whereas the conversion is a bit trickier. This cuts 5% of the instructions off of one particular Vulkan CTS test which does lots of load_ssbo. Reviewed-by: Matt Turner --- src/intel/compiler/brw_fs_nir.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index aeebaaeb62c..e5a927431c4 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4018,7 +4018,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld, fs_reg read_result = bld.vgrf(BRW_REGISTER_TYPE_UD); bld.emit(SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL, read_result, srcs, SURFACE_LOGICAL_NUM_SRCS); - bld.MOV(dest, read_result); + bld.MOV(dest, subscript(read_result, dest.type, 0)); } break; } @@ -4644,15 +4644,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr const unsigned bit_size = nir_dest_bit_size(instr->dest); assert(bit_size <= 32); assert(nir_dest_num_components(instr->dest) == 1); - brw_reg_type data_type = - brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD); fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD); bld.emit(SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL, tmp, get_nir_src(instr->src[0]), /* Address */ fs_reg(), /* No source data */ brw_imm_ud(bit_size)); - bld.MOV(retype(dest, data_type), tmp); + bld.MOV(dest, subscript(tmp, dest.type, 0)); } break; } @@ -4755,7 +4753,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr fs_reg read_result = bld.vgrf(BRW_REGISTER_TYPE_UD); bld.emit(SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL, read_result, srcs, SURFACE_LOGICAL_NUM_SRCS); - bld.MOV(dest, read_result); + bld.MOV(dest, subscript(read_result, dest.type, 0)); } break; } -- 2.30.2