From 70fcd565388354da5a3c96d8a265e4d0b5ad7292 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 10 Mar 2014 14:11:05 -0700 Subject: [PATCH] i965/vec4: Optimize packSnorm4x8(). Reduces the number of instructions needed to implement packSnorm4x8() from 13 -> 7. --- src/mesa/drivers/dri/i965/brw_shader.cpp | 6 ++--- src/mesa/drivers/dri/i965/brw_vec4.h | 1 + .../drivers/dri/i965/brw_vec4_visitor.cpp | 26 ++++++++++++++++++- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index d7a2a916387..8e4f7795d82 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -82,13 +82,13 @@ brw_lower_packing_builtins(struct brw_context *brw, int ops = LOWER_PACK_SNORM_2x16 | LOWER_UNPACK_SNORM_2x16 | LOWER_PACK_UNORM_2x16 - | LOWER_UNPACK_UNORM_2x16 - | LOWER_PACK_SNORM_4x8; + | LOWER_UNPACK_UNORM_2x16; if (shader_type == MESA_SHADER_FRAGMENT) { ops |= LOWER_UNPACK_UNORM_4x8 | LOWER_UNPACK_SNORM_4x8 - | LOWER_PACK_UNORM_4x8; + | LOWER_PACK_UNORM_4x8 + | LOWER_PACK_SNORM_4x8; } if (brw->gen >= 7) { diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 150e20a0387..7d814ca65bd 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -511,6 +511,7 @@ public: void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0); void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0); void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0); + void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0); uint32_t gather_channel(ir_texture *ir, uint32_t sampler); src_reg emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 34f96070250..c1c24ac0b49 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -535,6 +535,28 @@ vec4_visitor::emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0) emit(VEC4_OPCODE_PACK_BYTES, dst, bytes); } +void +vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0) +{ + dst_reg max(this, glsl_type::vec4_type); + emit_minmax(BRW_CONDITIONAL_G, max, src0, src_reg(-1.0f)); + + dst_reg min(this, glsl_type::vec4_type); + emit_minmax(BRW_CONDITIONAL_L, min, src_reg(max), src_reg(1.0f)); + + dst_reg scaled(this, glsl_type::vec4_type); + emit(MUL(scaled, src_reg(min), src_reg(127.0f))); + + dst_reg rounded(this, glsl_type::vec4_type); + emit(RNDE(rounded, src_reg(scaled))); + + dst_reg i(this, glsl_type::ivec4_type); + emit(MOV(i, src_reg(rounded))); + + src_reg bytes(i); + emit(VEC4_OPCODE_PACK_BYTES, dst, bytes); +} + void vec4_visitor::visit_instructions(const exec_list *list) { @@ -1825,8 +1847,10 @@ vec4_visitor::visit(ir_expression *ir) case ir_unop_pack_unorm_4x8: emit_pack_unorm_4x8(result_dst, op[0]); break; - case ir_unop_pack_snorm_2x16: case ir_unop_pack_snorm_4x8: + emit_pack_snorm_4x8(result_dst, op[0]); + break; + case ir_unop_pack_snorm_2x16: case ir_unop_pack_unorm_2x16: case ir_unop_unpack_snorm_2x16: case ir_unop_unpack_unorm_2x16: -- 2.30.2