glsl_to_tgsi: implement ir_unop_any using DP4 w/saturate or DP4 w/SLT

[mesa.git] / src / mesa / state_tracker / st_glsl_to_tgsi.cpp
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp

index d7a1ba80e1da9703e9549a4fe14b0d8cc85f03ac..f7d79e9f50cd6b320d01d4f85b49a25e21359a17 100644 (file)
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -295,6 +295,7 @@ public:
     bool indirect_addr_consts;
     
     int glsl_version;
+   bool native_integers;
  
     variable_storage *find_variable_storage(ir_variable *var);
  
@@ -372,11 +373,11 @@ public:
     /**
      * Emit the correct dot-product instruction for the type of arguments
      */
-   void emit_dp(ir_instruction *ir,
-                st_dst_reg dst,
-                st_src_reg src0,
-                st_src_reg src1,
-                unsigned elements);
+   glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir,
+                                     st_dst_reg dst,
+                                     st_src_reg src0,
+                                     st_src_reg src1,
+                                     unsigned elements);
  
     void emit_scalar(ir_instruction *ir, unsigned op,
                     st_dst_reg dst, st_src_reg src0);
@@ -600,7 +601,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
     
     if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
        type = GLSL_TYPE_FLOAT;
-   else if (glsl_version >= 130)
+   else if (native_integers)
        type = src0.type;
  
  #define case4(c, f, i, u) \
@@ -641,7 +642,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
     return op;
  }
  
-void
+glsl_to_tgsi_instruction *
  glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
                             st_dst_reg dst, st_src_reg src0, st_src_reg src1,
                             unsigned elements)
@@ -650,7 +651,7 @@ glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
        TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
     };
  
-   emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
+   return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
  }
  
  /**
@@ -881,7 +882,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
     st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
     union gl_constant_value uval;
     
-   assert(glsl_version >= 130);
+   assert(native_integers);
  
     uval.i = val;
     src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle);
@@ -892,7 +893,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
  struct st_src_reg
  glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
  {
-   if (glsl_version >= 130)
+   if (native_integers)
        return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : 
                                         st_src_reg_for_int(val);
     else
@@ -950,7 +951,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
  {
     st_src_reg src;
  
-   src.type = glsl_version >= 130 ? type->base_type : GLSL_TYPE_FLOAT;
+   src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
     src.file = PROGRAM_TEMPORARY;
     src.index = next_temp;
     src.reladdr = NULL;
@@ -1053,7 +1054,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
           this->next_temp += type_size(ir->type);
  
           dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index,
-               glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT));
+               native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT));
        }
  
  
@@ -1069,7 +1070,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
              }
           } else {
              st_src_reg src(PROGRAM_STATE_VAR, index,
-                  glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT);
+                  native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT);
              src.swizzle = slots[i].swizzle;
              emit(ir, TGSI_OPCODE_MOV, dst, src);
              /* even a float takes up a whole vec4 reg in a struct/array. */
@@ -1335,7 +1336,17 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
  
     switch (ir->operation) {
     case ir_unop_logic_not:
-      emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0));
+      if (result_dst.type != GLSL_TYPE_FLOAT)
+         emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0));
+      else {
+         /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
+          * older GPUs implement SEQ using multiple instructions (i915 uses two
+          * SGE instructions and a MUL instruction).  Since our logic values are
+          * 0.0 and 1.0, 1-x also implements !x.
+          */
+         op[0].negate = ~op[0].negate;
+         emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
+      }
        break;
     case ir_unop_neg:
        assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT);
@@ -1444,7 +1455,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
        /* "==" operator producing a scalar boolean. */
        if (ir->operands[0]->type->is_vector() ||
            ir->operands[1]->type->is_vector()) {
-         st_src_reg temp = get_temp(glsl_version >= 130 ? 
+         st_src_reg temp = get_temp(native_integers ?
                 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
                 glsl_type::vec4_type);
           assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
@@ -1459,7 +1470,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
        /* "!=" operator producing a scalar boolean. */
        if (ir->operands[0]->type->is_vector() ||
            ir->operands[1]->type->is_vector()) {
-         st_src_reg temp = get_temp(glsl_version >= 130 ? 
+         st_src_reg temp = get_temp(native_integers ?
                 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
                 glsl_type::vec4_type);
           assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
@@ -1471,22 +1482,69 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
        }
        break;
  
-   case ir_unop_any:
+   case ir_unop_any: {
        assert(ir->operands[0]->type->is_vector());
-      emit_dp(ir, result_dst, op[0], op[0],
-              ir->operands[0]->type->vector_elements);
-      emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+
+      /* After the dot-product, the value will be an integer on the
+       * range [0,4].  Zero stays zero, and positive values become 1.0.
+       */
+      glsl_to_tgsi_instruction *const dp =
+         emit_dp(ir, result_dst, op[0], op[0],
+                 ir->operands[0]->type->vector_elements);
+      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
+          result_dst.type == GLSL_TYPE_FLOAT) {
+             /* The clamping to [0,1] can be done for free in the fragment
+              * shader with a saturate.
+              */
+             dp->saturate = true;
+      } else if (result_dst.type == GLSL_TYPE_FLOAT) {
+             /* Negating the result of the dot-product gives values on the range
+              * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
+              * is achieved using SLT.
+              */
+             st_src_reg slt_src = result_src;
+             slt_src.negate = ~slt_src.negate;
+             emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+      }
+      else {
+         /* Use SNE 0 if integers are being used as boolean values. */
+         emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
+      }
        break;
+   }
  
     case ir_binop_logic_xor:
        emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
        break;
  
-   case ir_binop_logic_or:
-      /* This could be a saturated add and skip the SNE. */
-      emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
-      emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+   case ir_binop_logic_or: {
+      /* After the addition, the value will be an integer on the
+       * range [0,2].  Zero stays zero, and positive values become 1.0.
+       */
+      glsl_to_tgsi_instruction *add =
+         emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
+      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
+          result_dst.type == GLSL_TYPE_FLOAT) {
+         /* The clamping to [0,1] can be done for free in the fragment
+          * shader with a saturate if floats are being used as boolean values.
+          */
+         add->saturate = true;
+      } else if (result_dst.type == GLSL_TYPE_FLOAT) {
+         /* Negating the result of the addition gives values on the range
+          * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
+          * is achieved using SLT.
+          */
+         st_src_reg slt_src = result_src;
+         slt_src.negate = ~slt_src.negate;
+         emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+      } else {
+         /* Use an SNE on the result of the addition.  Zero stays zero,
+          * 1 stays 1, and 2 becomes 1.
+          */
+         emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
+      }
        break;
+   }
  
     case ir_binop_logic_and:
        /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
@@ -1514,7 +1572,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
        break;
     case ir_unop_i2f:
     case ir_unop_b2f:
-      if (glsl_version >= 130) {
+      if (native_integers) {
           emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
           break;
        }
@@ -1526,7 +1584,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
        result_src = op[0];
        break;
     case ir_unop_f2i:
-      if (glsl_version >= 130)
+      if (native_integers)
           emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
        else
           emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
@@ -1567,7 +1625,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
           break;
        }
     case ir_unop_u2f:
-      if (glsl_version >= 130) {
+      if (native_integers) {
           emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
           break;
        }
@@ -1719,7 +1777,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
     }
  
     this->result = st_src_reg(entry->file, entry->index, var->type);
-   if (glsl_version <= 120)
+   if (!native_integers)
        this->result.type = GLSL_TYPE_FLOAT;
  }
  
@@ -1994,15 +2052,17 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
     } else if (ir->rhs->as_expression() &&
                this->instructions.get_tail() &&
                ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
-              type_size(ir->lhs->type) == 1) {
+              type_size(ir->lhs->type) == 1 &&
+              l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) {
        /* To avoid emitting an extra MOV when assigning an expression to a 
         * variable, emit the last instruction of the expression again, but
         * replace the destination register with the target of the assignment.
         * Dead code elimination will remove the original instruction.
         */
-      glsl_to_tgsi_instruction *inst;
+      glsl_to_tgsi_instruction *inst, *new_inst;
        inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
-      emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
+      new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
+      new_inst->saturate = inst->saturate;
     } else {
        for (i = 0; i < type_size(ir->lhs->type); i++) {
           emit(ir, TGSI_OPCODE_MOV, l, r);
@@ -2107,27 +2167,27 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
        }
        break;
     case GLSL_TYPE_UINT:
-      gl_type = glsl_version >= 130 ? GL_UNSIGNED_INT : GL_FLOAT;
+      gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
        for (i = 0; i < ir->type->vector_elements; i++) {
-         if (glsl_version >= 130)
+         if (native_integers)
              values[i].u = ir->value.u[i];
           else
              values[i].f = ir->value.u[i];
        }
        break;
     case GLSL_TYPE_INT:
-      gl_type = glsl_version >= 130 ? GL_INT : GL_FLOAT;
+      gl_type = native_integers ? GL_INT : GL_FLOAT;
        for (i = 0; i < ir->type->vector_elements; i++) {
-         if (glsl_version >= 130)
+         if (native_integers)
              values[i].i = ir->value.i[i];
           else
              values[i].f = ir->value.i[i];
        }
        break;
     case GLSL_TYPE_BOOL:
-      gl_type = glsl_version >= 130 ? GL_BOOL : GL_FLOAT;
+      gl_type = native_integers ? GL_BOOL : GL_FLOAT;
        for (i = 0; i < ir->type->vector_elements; i++) {
-         if (glsl_version >= 130)
+         if (native_integers)
              values[i].b = ir->value.b[i];
           else
              values[i].f = ir->value.b[i];
@@ -3441,7 +3501,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
           /* Continuing the block, clear any channels from the write array that
            * are read by this instruction.
            */
-         for (int i = 0; i < 4; i++) {
+         for (unsigned i = 0; i < Elements(inst->src); i++) {
              if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
                 /* Any temporary might be read, so no dead code elimination 
                  * across this instruction.
@@ -3609,6 +3669,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
     v->ctx = original->ctx;
     v->prog = prog;
     v->glsl_version = original->glsl_version;
+   v->native_integers = original->native_integers;
     v->options = original->options;
     v->next_temp = original->next_temp;
     v->num_address_regs = original->num_address_regs;
@@ -3737,6 +3798,7 @@ get_bitmap_visitor(struct st_fragment_program *fp,
     v->ctx = original->ctx;
     v->prog = prog;
     v->glsl_version = original->glsl_version;
+   v->native_integers = original->native_integers;
     v->options = original->options;
     v->next_temp = original->next_temp;
     v->num_address_regs = original->num_address_regs;
@@ -4672,6 +4734,7 @@ get_mesa_program(struct gl_context *ctx,
     v->shader_program = shader_program;
     v->options = options;
     v->glsl_version = ctx->Const.GLSLVersion;
+   v->native_integers = ctx->Const.NativeIntegers;
  
     add_uniforms_to_parameters_list(shader_program, shader, prog);