i965/fs: Use the LRP instruction for ir_triop_lrp when possible.

author Kenneth Graunke <kenneth@whitecape.org>

Sun, 2 Dec 2012 08:08:15 +0000 (00:08 -0800)

committer Matt Turner <mattst88@gmail.com>

Thu, 28 Feb 2013 21:19:00 +0000 (13:19 -0800)
author Kenneth Graunke <kenneth@whitecape.org>
Sun, 2 Dec 2012 08:08:15 +0000 (00:08 -0800)
committer Matt Turner <mattst88@gmail.com>
Thu, 28 Feb 2013 21:19:00 +0000 (13:19 -0800)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp

index ff4248bd61a52a7153c4452df63cbead19fa684d..fe3470184723d4f8017051f699ff06bc1890b5e4 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -146,6 +146,13 @@ fs_inst::fs_inst(enum opcode opcode, fs_reg dst,
        return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1);    \
     }
  
+#define ALU3(op)                                                        \
+   fs_inst *                                                            \
+   fs_visitor::op(fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)    \
+   {                                                                    \
+      return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1, src2);\
+   }
+
  ALU1(NOT)
  ALU1(MOV)
  ALU1(FRC)
@@ -161,6 +168,7 @@ ALU2(XOR)
  ALU2(SHL)
  ALU2(SHR)
  ALU2(ASR)
+ALU3(LRP)
  
  /** Gen4 predicated IF. */
  fs_inst *
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h

index d1bb111bf5fddcf8f34a674ae813b37dcc30c702..17ef046533bc5183d06b69c06136282adbe0b88b 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -285,6 +285,7 @@ public:
     fs_inst *IF(fs_reg src0, fs_reg src1, uint32_t condition);
     fs_inst *CMP(fs_reg dst, fs_reg src0, fs_reg src1,
                  uint32_t condition);
+   fs_inst *LRP(fs_reg dst, fs_reg a, fs_reg y, fs_reg x);
     fs_inst *DEP_RESOLVE_MOV(int grf);
  
     int type_size(const struct glsl_type *type);
@@ -361,6 +362,7 @@ public:
     fs_reg fix_math_operand(fs_reg src);
     fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0);
     fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1);
+   void emit_lrp(fs_reg dst, fs_reg x, fs_reg y, fs_reg a);
     void emit_minmax(uint32_t conditionalmod, fs_reg dst,
                      fs_reg src0, fs_reg src1);
     bool try_emit_saturate(ir_expression *ir);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp

index ea0622576fa8e806ada894d5cf491f59f96bc045..30d8d9bf527c01122f04dc14099706be8305f7cb 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -135,7 +135,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
     ir_expression *expr = ir->rhs->as_expression();
     bool found_vector = false;
     unsigned int i, vector_elements = 1;
-   ir_variable *op_var[2];
+   ir_variable *op_var[3];
  
     if (!expr)
        return visit_continue;
@@ -342,6 +342,20 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
        assert(!"not yet supported");
        break;
  
+   case ir_triop_lrp:
+      for (i = 0; i < vector_elements; i++) {
+        ir_rvalue *op0 = get_element(op_var[0], i);
+        ir_rvalue *op1 = get_element(op_var[1], i);
+        ir_rvalue *op2 = get_element(op_var[2], i);
+
+        assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
+                                                 element_type,
+                                                 op0,
+                                                 op1,
+                                                 op2));
+      }
+      break;
+
     case ir_unop_pack_snorm_2x16:
     case ir_unop_pack_snorm_4x8:
     case ir_unop_pack_unorm_2x16:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp

index 44479d8e9ffef2286450ccb757aee6df246b556f..e0f824c5e3e6a3d8379caa8bde075c94f0eef3d2 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -66,6 +66,7 @@ is_expression(const fs_inst *const inst)
     case BRW_OPCODE_LINE:
     case BRW_OPCODE_PLN:
     case BRW_OPCODE_MAD:
+   case BRW_OPCODE_LRP:
     case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
     case FS_OPCODE_CINTERP:
     case FS_OPCODE_LINTERP:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp

index 3d1f3b356a8595ff76a5db4d59bb86d88eb9d6e7..a25f594d9d88cf01193e21096d6c99d2dbbd9914 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -1095,6 +1095,20 @@ fs_generator::generate_code(exec_list *instructions)
          brw_set_access_mode(p, BRW_ALIGN_1);
          break;
  
+      case BRW_OPCODE_LRP:
+        brw_set_access_mode(p, BRW_ALIGN_16);
+        if (dispatch_width == 16) {
+           brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+           brw_LRP(p, dst, src[0], src[1], src[2]);
+           brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+           brw_LRP(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
+           brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+        } else {
+           brw_LRP(p, dst, src[0], src[1], src[2]);
+        }
+        brw_set_access_mode(p, BRW_ALIGN_1);
+        break;
+
        case BRW_OPCODE_FRC:
          brw_FRC(p, dst, src[0]);
          break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp

index 573921cf8cc671c0ae30266b9c4a47becbbee0c6..e6daf2f62e5a779c312c4127dfa1a34f1391ec02 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -198,6 +198,30 @@ fs_visitor::visit(ir_dereference_array *ir)
     this->result = src;
  }
  
+void
+fs_visitor::emit_lrp(fs_reg dst, fs_reg x, fs_reg y, fs_reg a)
+{
+   if (intel->gen < 6 || x.file != GRF || y.file != GRF || a.file != GRF) {
+      /* We can't use the LRP instruction.  Emit x*(1-a) + y*a. */
+      fs_reg y_times_a           = fs_reg(this, glsl_type::float_type);
+      fs_reg one_minus_a         = fs_reg(this, glsl_type::float_type);
+      fs_reg x_times_one_minus_a = fs_reg(this, glsl_type::float_type);
+
+      emit(MUL(y_times_a, y, a));
+
+      a.negate = !a.negate;
+      emit(ADD(one_minus_a, fs_reg(1.0f), a));
+      emit(MUL(x_times_one_minus_a, x, one_minus_a));
+
+      emit(ADD(dst, x_times_one_minus_a, y_times_a));
+   } else {
+      /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
+       * we need to reorder the operands.
+       */
+      emit(LRP(dst, a, y, x));
+   }
+}
+
  void
  fs_visitor::emit_minmax(uint32_t conditionalmod, fs_reg dst,
                          fs_reg src0, fs_reg src1)
@@ -291,10 +315,10 @@ void
  fs_visitor::visit(ir_expression *ir)
  {
     unsigned int operand;
-   fs_reg op[2], temp;
+   fs_reg op[3], temp;
     fs_inst *inst;
  
-   assert(ir->get_num_operands() <= 2);
+   assert(ir->get_num_operands() <= 3);
  
     if (try_emit_saturate(ir))
        return;
@@ -586,7 +610,7 @@ fs_visitor::visit(ir_expression *ir)
     case ir_binop_pack_half_2x16_split:
        emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, this->result, op[0], op[1]);
        break;
-   case ir_binop_ubo_load:
+   case ir_binop_ubo_load: {
        /* This IR node takes a constant uniform block and a constant or
         * variable byte offset within the block and loads a vector from that.
         */
@@ -640,6 +664,11 @@ fs_visitor::visit(ir_expression *ir)
        result.reg_offset = 0;
        break;
     }
+
+   case ir_triop_lrp:
+      emit_lrp(this->result, op[0], op[1], op[2]);
+      break;
+   }
  }
  
  void
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp

index 9ab18cc7d6309ddc49dc2fe3b081513f881e1db8..2da5ed5c6cc88514a480d52de88e79c8ef7d8cc4 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -150,13 +150,15 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
         */
        brw_lower_packing_builtins(brw, (gl_shader_type) stage, shader->ir);
        do_mat_op_to_vec(shader->ir);
+      const int lrp_to_arith = (intel->gen < 6 || stage != MESA_SHADER_FRAGMENT)
+                                ? LRP_TO_ARITH : 0;
        lower_instructions(shader->ir,
                          MOD_TO_FRACT |
                          DIV_TO_MUL_RCP |
                          SUB_TO_ADD_NEG |
                          EXP_TO_EXP2 |
                          LOG_TO_LOG2 |
-                        LRP_TO_ARITH);
+                         lrp_to_arith);
  
        /* Pre-gen6 HW can only nest if-statements 16 deep.  Beyond this,
         * if-statements need to be flattened.
author	Kenneth Graunke <kenneth@whitecape.org>
	Sun, 2 Dec 2012 08:08:15 +0000 (00:08 -0800)
committer	Matt Turner <mattst88@gmail.com>
	Thu, 28 Feb 2013 21:19:00 +0000 (13:19 -0800)
src/mesa/drivers/dri/i965/brw_fs.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_cse.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_emit.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_shader.cpp		patch \| blob \| history