i965/fs: Implement integer quotient and remainder math operations.
authorKenneth Graunke <kenneth@whitecape.org>
Thu, 29 Sep 2011 00:37:54 +0000 (17:37 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Mon, 3 Oct 2011 00:01:09 +0000 (17:01 -0700)
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Tested-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs.h
src/mesa/drivers/dri/i965/brw_fs_emit.cpp
src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
src/mesa/drivers/dri/i965/brw_shader.cpp

index 05a13375fe0c05e2daae230dd5e1ee28fb7b5d04..a11163093a38014a809eae1d673dc578f6baf5f2 100644 (file)
@@ -622,6 +622,8 @@ enum opcode {
    SHADER_OPCODE_EXP2,
    SHADER_OPCODE_LOG2,
    SHADER_OPCODE_POW,
+   SHADER_OPCODE_INT_QUOTIENT,
+   SHADER_OPCODE_INT_REMAINDER,
    SHADER_OPCODE_SIN,
    SHADER_OPCODE_COS,
    FS_OPCODE_DDX,
index 9a89f88b61fe8aef1100cbcae246a92628f8efa3..1d93a51a8d10124dcc2869140c3fde5164e20e03 100644 (file)
@@ -152,6 +152,8 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
    case SHADER_OPCODE_COS:
       return 1 * c->dispatch_width / 8;
    case SHADER_OPCODE_POW:
+   case SHADER_OPCODE_INT_QUOTIENT:
+   case SHADER_OPCODE_INT_REMAINDER:
       return 2 * c->dispatch_width / 8;
    case FS_OPCODE_TEX:
    case FS_OPCODE_TXB:
@@ -576,7 +578,15 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
    int base_mrf = 2;
    fs_inst *inst;
 
-   assert(opcode == SHADER_OPCODE_POW);
+   switch (opcode) {
+   case SHADER_OPCODE_POW:
+   case SHADER_OPCODE_INT_QUOTIENT:
+   case SHADER_OPCODE_INT_REMAINDER:
+      break;
+   default:
+      assert(!"not reached: unsupported binary math opcode.");
+      return NULL;
+   }
 
    if (intel->gen >= 6) {
       /* Can't do hstride == 0 args to gen6 math, so expand it out.
@@ -586,19 +596,21 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
        */
       if (src0.file == UNIFORM || src0.abs || src0.negate) {
         fs_reg expanded = fs_reg(this, glsl_type::float_type);
+        expanded.type = src0.type;
         emit(BRW_OPCODE_MOV, expanded, src0);
         src0 = expanded;
       }
 
       if (src1.file == UNIFORM || src1.abs || src1.negate) {
         fs_reg expanded = fs_reg(this, glsl_type::float_type);
+        expanded.type = src1.type;
         emit(BRW_OPCODE_MOV, expanded, src1);
         src1 = expanded;
       }
 
       inst = emit(opcode, dst, src0, src1);
    } else {
-      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1), src1);
+      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1, src1.type), src1);
       inst = emit(opcode, dst, src0, reg_null_f);
 
       inst->base_mrf = base_mrf;
index f6a57bacb33ccc94a32916fabf76e9313e3e74c5..56181a37c9f20b82566e7a77e8cb7c73821b4ddc 100644 (file)
@@ -305,6 +305,8 @@ public:
              opcode == SHADER_OPCODE_LOG2 ||
              opcode == SHADER_OPCODE_SIN ||
              opcode == SHADER_OPCODE_COS ||
+             opcode == SHADER_OPCODE_INT_QUOTIENT ||
+             opcode == SHADER_OPCODE_INT_REMAINDER ||
              opcode == SHADER_OPCODE_POW);
    }
 
index 8176a76a85e491dc1eb623c6b528eee31409eef0..4c158fed11cc9ba860fba48437cf4c0d6bfe0070 100644 (file)
@@ -794,6 +794,8 @@ fs_visitor::generate_code()
            generate_math_gen4(inst, dst, src[0]);
         }
         break;
+      case SHADER_OPCODE_INT_QUOTIENT:
+      case SHADER_OPCODE_INT_REMAINDER:
       case SHADER_OPCODE_POW:
         if (intel->gen >= 6) {
            generate_math2_gen6(inst, dst, src[0], src[1]);
index 1f83ee278b2b68b160921c481df4f3077fa7cd78..910f3297d27d14a34f16280356478319cc2e9858 100644 (file)
@@ -75,11 +75,13 @@ public:
       case SHADER_OPCODE_RSQ:
         this->latency = 2 * chans * math_latency;
         break;
+      case SHADER_OPCODE_INT_QUOTIENT:
       case SHADER_OPCODE_SQRT:
       case SHADER_OPCODE_LOG2:
         /* full precision log.  partial is 2. */
         this->latency = 3 * chans * math_latency;
         break;
+      case SHADER_OPCODE_INT_REMAINDER:
       case SHADER_OPCODE_EXP2:
         /* full precision.  partial is 3, same throughput. */
         this->latency = 4 * chans * math_latency;
index 3af57807eff545100e75d607056a1cf0968e8869..07ea84fe4c1f338fc3de241112d18c892cb2cde5 100644 (file)
@@ -294,10 +294,14 @@ fs_visitor::visit(ir_expression *ir)
       }
       break;
    case ir_binop_div:
-      assert(!"not reached: should be handled by ir_div_to_mul_rcp");
+      /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
+      assert(ir->type->is_integer());
+      emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]);
       break;
    case ir_binop_mod:
-      assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
+      /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
+      assert(ir->type->is_integer());
+      emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]);
       break;
 
    case ir_binop_less:
index a6ed810a16e17a88f99aa4f72f3543d9fcdd737a..c938c750dec07bef9f1e0704e474095d977d50b1 100644 (file)
@@ -227,6 +227,10 @@ brw_math_function(enum opcode op)
       return BRW_MATH_FUNCTION_SIN;
    case SHADER_OPCODE_COS:
       return BRW_MATH_FUNCTION_COS;
+   case SHADER_OPCODE_INT_QUOTIENT:
+      return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT;
+   case SHADER_OPCODE_INT_REMAINDER:
+      return BRW_MATH_FUNCTION_INT_DIV_REMAINDER;
    default:
       assert(!"not reached: unknown math function");
       return 0;