From 935e0e305dd7a4f67557e969513a30357d308efb Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Tue, 19 Apr 2016 13:29:55 +0200 Subject: [PATCH] i965/fs: optimize unpack double When we are actually unpacking from a double that we have previously packed from its 32-bit components we can bypass the pack operation and source from its arguments directly. v2 (Sam): - Fix line overflow (Topi) - Bail if the parent instruction's source is not SSA (Connor) Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 30 ++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index afa3308135e..f07b38afeb3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1132,12 +1132,34 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) break; case nir_op_unpack_double_2x32_split_x: - bld.MOV(result, subscript(op[0], BRW_REGISTER_TYPE_UD, 0)); - break; + case nir_op_unpack_double_2x32_split_y: { + /* Optimize the common case where we are unpacking from a double we have + * previously packed. In this case we can just bypass the pack operation + * and source directly from its arguments. + */ + unsigned index = (instr->op == nir_op_unpack_double_2x32_split_x) ? 0 : 1; + if (instr->src[0].src.is_ssa) { + nir_instr *parent_instr = instr->src[0].src.ssa->parent_instr; + if (parent_instr->type == nir_instr_type_alu) { + nir_alu_instr *alu_parent = nir_instr_as_alu(parent_instr); + if (alu_parent->op == nir_op_pack_double_2x32_split && + alu_parent->src[index].src.is_ssa) { + op[0] = retype(get_nir_src(alu_parent->src[index].src), + BRW_REGISTER_TYPE_UD); + op[0] = + offset(op[0], bld, alu_parent->src[index].swizzle[channel]); + bld.MOV(result, op[0]); + break; + } + } + } - case nir_op_unpack_double_2x32_split_y: - bld.MOV(result, subscript(op[0], BRW_REGISTER_TYPE_UD, 1)); + if (instr->op == nir_op_unpack_double_2x32_split_x) + bld.MOV(result, subscript(op[0], BRW_REGISTER_TYPE_UD, 0)); + else + bld.MOV(result, subscript(op[0], BRW_REGISTER_TYPE_UD, 1)); break; + } case nir_op_fpow: inst = bld.emit(SHADER_OPCODE_POW, result, op[0], op[1]); -- 2.30.2