From 6197a6b7ac6ff03e87a939311329fa0cb4af7f4c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 26 Aug 2017 11:26:40 -0700 Subject: [PATCH] i965/fs/nir: Simplify 64-bit store_output The swizzles weren't doing any good because swiz is just XYZW. Also, we were emitting an extra set of MOVs because shuffle_64bit_data_for_32bit already does a MOV for us. Finally, the temporary was only ever used inside the inner loop so there's no need for it to actually be an array. Reviewed-by: Iago Toral Quiroga --- src/intel/compiler/brw_fs_nir.cpp | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index f433e3b6c3d..d0625c8bd3f 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -2568,7 +2568,6 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, instr->src[0].ssa->bit_size : instr->src[0].reg.reg->bit_size) == 64; fs_reg indirect_offset = get_indirect_offset(instr); unsigned imm_offset = instr->const_index[0]; - unsigned swiz = BRW_SWIZZLE_XYZW; unsigned mask = instr->const_index[1]; unsigned header_regs = 0; fs_reg srcs[7]; @@ -2598,13 +2597,6 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, } } - /* 64-bit data needs to me shuffled before we can write it to the URB. - * We will use this temporary to shuffle the components in each - * iteration. - */ - fs_reg tmp = - fs_reg(VGRF, alloc.allocate(2 * iter_components), value.type); - mask = mask << first_component; for (unsigned iter = 0; iter < num_iterations; iter++) { @@ -2648,26 +2640,21 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, continue; if (!is_64bit) { - srcs[header_regs + i + first_component] = - offset(value, bld, BRW_GET_SWZ(swiz, i)); + srcs[header_regs + i + first_component] = offset(value, bld, i); } else { /* We need to shuffle the 64-bit data to match the layout * expected by our 32-bit URB write messages. We use a temporary * for that. */ - unsigned channel = BRW_GET_SWZ(swiz, iter * 2 + i); + fs_reg dest = fs_reg(VGRF, alloc.allocate(2), value.type); + unsigned channel = iter * 2 + i; shuffle_64bit_data_for_32bit_write(bld, - retype(offset(tmp, bld, 2 * i), BRW_REGISTER_TYPE_F), + retype(dest, BRW_REGISTER_TYPE_F), retype(offset(value, bld, 2 * channel), BRW_REGISTER_TYPE_DF), 1); - /* Now copy the data to the destination */ - fs_reg dest = fs_reg(VGRF, alloc.allocate(2), value.type); - unsigned idx = 2 * i; - bld.MOV(dest, offset(tmp, bld, idx)); - bld.MOV(offset(dest, bld, 1), offset(tmp, bld, idx + 1)); - srcs[header_regs + idx + first_component * 2] = dest; - srcs[header_regs + idx + 1 + first_component * 2] = + srcs[header_regs + (i + first_component) * 2] = dest; + srcs[header_regs + (i + first_component) * 2 + 1] = offset(dest, bld, 1); } } -- 2.30.2