i965: Implement nir_op_uadd_carry and _usub_borrow without accumulator.
authorFrancisco Jerez <currojerez@riseup.net>
Thu, 9 Jul 2015 18:42:28 +0000 (21:42 +0300)
committerFrancisco Jerez <currojerez@riseup.net>
Thu, 16 Jul 2015 15:29:32 +0000 (18:29 +0300)
This gets rid of two no16() fall-backs and should allow better
scheduling of the generated IR.  There are no uses of usubBorrow() or
uaddCarry() in shader-db so no changes are expected.  However the
"arb_gpu_shader5/execution/built-in-functions/fs-usubBorrow" and
"arb_gpu_shader5/execution/built-in-functions/fs-uaddCarry" piglit
tests go from 40 to 28 instructions.  The reason is that the plain ADD
instruction can easily be CSE'ed with the original addition, and the
b2i negation can easily be propagated into the source modifier of
another instruction, so effectively both operations are performed with
just one instruction.

v2: Rely on carry_to_arith() and borrow_to_arith() to lower these
    (Ilia Mirkin).

Reviewed-by: Matt Turner <mattst88@gmail.com>
src/mesa/drivers/dri/i965/brw_fs_nir.cpp
src/mesa/drivers/dri/i965/brw_shader.cpp
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

index 198703281e6bf840b337a7bb0bfe6e162b20fa57..3099dc447ec40b8148763ff0f6e1e40b4cc22eac 100644 (file)
@@ -835,29 +835,11 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
       bld.emit(SHADER_OPCODE_INT_QUOTIENT, result, op[0], op[1]);
       break;
 
-   case nir_op_uadd_carry: {
-      if (devinfo->gen >= 7)
-         no16("SIMD16 explicit accumulator operands unsupported\n");
-
-      struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
-                                  BRW_REGISTER_TYPE_UD);
-
-      bld.ADDC(bld.null_reg_ud(), op[0], op[1]);
-      bld.MOV(result, fs_reg(acc));
-      break;
-   }
+   case nir_op_uadd_carry:
+      unreachable("Should have been lowered by carry_to_arith().");
 
-   case nir_op_usub_borrow: {
-      if (devinfo->gen >= 7)
-         no16("SIMD16 explicit accumulator operands unsupported\n");
-
-      struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
-                                  BRW_REGISTER_TYPE_UD);
-
-      bld.SUBB(bld.null_reg_ud(), op[0], op[1]);
-      bld.MOV(result, fs_reg(acc));
-      break;
-   }
+   case nir_op_usub_borrow:
+      unreachable("Should have been lowered by borrow_to_arith().");
 
    case nir_op_umod:
       bld.emit(SHADER_OPCODE_INT_REMAINDER, result, op[0], op[1]);
index 3e3d78b9ad713e46fc60b341dcceb0097dd9fd16..d66baf34b389750c4849aad8939072593ebb1371 100644 (file)
@@ -259,7 +259,9 @@ process_glsl_ir(struct brw_context *brw,
                       EXP_TO_EXP2 |
                       LOG_TO_LOG2 |
                       bitfield_insert |
-                      LDEXP_TO_ARITH);
+                      LDEXP_TO_ARITH |
+                      CARRY_TO_ARITH |
+                      BORROW_TO_ARITH);
 
    /* Pre-gen6 HW can only nest if-statements 16 deep.  Beyond this,
     * if-statements need to be flattened.
index 8a352d33e2fe3b7947089d0c1d3b6c22d661902a..f351bf4075b695b629c55c9e8a32f1b7561aa74c 100644 (file)
@@ -1601,20 +1601,13 @@ vec4_visitor::visit(ir_expression *ir)
       assert(ir->type->is_integer());
       emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
       break;
-   case ir_binop_carry: {
-      struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD);
 
-      emit(ADDC(dst_null_ud(), op[0], op[1]));
-      emit(MOV(result_dst, src_reg(acc)));
-      break;
-   }
-   case ir_binop_borrow: {
-      struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD);
+   case ir_binop_carry:
+      unreachable("Should have been lowered by carry_to_arith().");
+
+   case ir_binop_borrow:
+      unreachable("Should have been lowered by borrow_to_arith().");
 
-      emit(SUBB(dst_null_ud(), op[0], op[1]));
-      emit(MOV(result_dst, src_reg(acc)));
-      break;
-   }
    case ir_binop_mod:
       /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */
       assert(ir->type->is_integer());