i965/fs: Handle derivative quality decisions in the front-end.
authorKenneth Graunke <kenneth@whitecape.org>
Sat, 8 Nov 2014 09:39:14 +0000 (01:39 -0800)
committerKenneth Graunke <kenneth@whitecape.org>
Fri, 28 Nov 2014 04:25:14 +0000 (20:25 -0800)
Kristian noted that there's very little use of brw_wm_prog_key in the
generator, and that it basically just generates what it's told, without
caring about what stage it's handling.

One exception to this is derivative handling.  When handling dFdxCoarse
and dFdxFine, we packed an enum value in a second source register,
explicitly telling the generator what to do.  For dFdx, we specified an
enum value of "please use the hint", then checked the program key in the
generator level code.

A natural method is to define separate FS_OPCODE_DD[XY]_{COARSE,FINE}
opcodes, and have the front-end (which already decides what IR to
generate based on the program key) decide which dPdx/dPdy should
correspond to.  This consolidates the decision making in one place.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_fs.h
src/mesa/drivers/dri/i965/brw_fs_generator.cpp
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
src/mesa/drivers/dri/i965/brw_shader.cpp

index fa473d08ef6fac80e75afd2612dbbe6043c6379c..e7f6a2bad8e4a2a2f3f1265110a7c70699e50ec9 100644 (file)
@@ -910,8 +910,10 @@ enum opcode {
 
    VEC4_OPCODE_PACK_BYTES,
 
-   FS_OPCODE_DDX,
-   FS_OPCODE_DDY,
+   FS_OPCODE_DDX_COARSE,
+   FS_OPCODE_DDX_FINE,
+   FS_OPCODE_DDY_COARSE,
+   FS_OPCODE_DDY_FINE,
    FS_OPCODE_PIXEL_X,
    FS_OPCODE_PIXEL_Y,
    FS_OPCODE_CINTERP,
@@ -1093,12 +1095,6 @@ enum opcode {
    GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
 };
 
-enum brw_derivative_quality {
-   BRW_DERIVATIVE_BY_HINT = 0,
-   BRW_DERIVATIVE_FINE = 1,
-   BRW_DERIVATIVE_COARSE = 2,
-};
-
 enum brw_urb_write_flags {
    BRW_URB_WRITE_NO_FLAGS = 0,
 
index 4421eeda2a6a935e7dd52b2bfeade9c03ce33f62..1deb7743cea9cb9490692fa01a5f136f4c5c313a 100644 (file)
@@ -710,9 +710,9 @@ private:
    void generate_math_g45(fs_inst *inst,
                          struct brw_reg dst,
                          struct brw_reg src);
-   void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src, struct brw_reg quality);
-   void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
-                     struct brw_reg quality, bool negate_value);
+   void generate_ddx(enum opcode op, struct brw_reg dst, struct brw_reg src);
+   void generate_ddy(enum opcode op, struct brw_reg dst, struct brw_reg src,
+                     bool negate_value);
    void generate_scratch_write(fs_inst *inst, struct brw_reg src);
    void generate_scratch_read(fs_inst *inst, struct brw_reg dst);
    void generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst);
index 2a35fa98945e7bf024adc4b6b879a666bb0acbcf..27ba0bbadb89963c7ebfd341a6d0857dd4337279 100644 (file)
@@ -705,25 +705,16 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
  * appropriate swizzling.
  */
 void
-fs_generator::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
-                           struct brw_reg quality)
+fs_generator::generate_ddx(enum opcode opcode,
+                           struct brw_reg dst, struct brw_reg src)
 {
    unsigned vstride, width;
-   assert(quality.file == BRW_IMMEDIATE_VALUE);
-   assert(quality.type == BRW_REGISTER_TYPE_D);
 
-   assert(stage == MESA_SHADER_FRAGMENT);
-   const brw_wm_prog_key * const key = (brw_wm_prog_key * const) this->key;
-
-   int quality_value = quality.dw1.d;
-
-   if (quality_value == BRW_DERIVATIVE_FINE ||
-      (key->high_quality_derivatives && quality_value != BRW_DERIVATIVE_COARSE)) {
+   if (opcode == FS_OPCODE_DDX_FINE) {
       /* produce accurate derivatives */
       vstride = BRW_VERTICAL_STRIDE_2;
       width = BRW_WIDTH_2;
-   }
-   else {
+   } else {
       /* replicate the derivative at the top-left pixel to other pixels */
       vstride = BRW_VERTICAL_STRIDE_4;
       width = BRW_WIDTH_4;
@@ -749,19 +740,11 @@ fs_generator::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src
  * left.
  */
 void
-fs_generator::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
-                         struct brw_reg quality, bool negate_value)
+fs_generator::generate_ddy(enum opcode opcode,
+                           struct brw_reg dst, struct brw_reg src,
+                           bool negate_value)
 {
-   assert(quality.file == BRW_IMMEDIATE_VALUE);
-   assert(quality.type == BRW_REGISTER_TYPE_D);
-
-   assert(stage == MESA_SHADER_FRAGMENT);
-   const brw_wm_prog_key * const key = (brw_wm_prog_key * const) this->key;
-
-   int quality_value = quality.dw1.d;
-
-   if (quality_value == BRW_DERIVATIVE_FINE ||
-      (key->high_quality_derivatives && quality_value != BRW_DERIVATIVE_COARSE)) {
+   if (opcode == FS_OPCODE_DDY_FINE) {
       /* From the Ivy Bridge PRM, volume 4 part 3, section 3.3.9 (Register
        * Region Restrictions):
        *
@@ -1871,16 +1854,18 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
       case SHADER_OPCODE_TG4_OFFSET:
         generate_tex(inst, dst, src[0], src[1]);
         break;
-      case FS_OPCODE_DDX:
-        generate_ddx(inst, dst, src[0], src[1]);
-        break;
-      case FS_OPCODE_DDY:
+      case FS_OPCODE_DDX_COARSE:
+      case FS_OPCODE_DDX_FINE:
+         generate_ddx(inst->opcode, dst, src[0]);
+         break;
+      case FS_OPCODE_DDY_COARSE:
+      case FS_OPCODE_DDY_FINE:
          /* Make sure fp->UsesDFdy flag got set (otherwise there's no
           * guarantee that key->render_to_fbo is set).
           */
          assert(stage == MESA_SHADER_FRAGMENT &&
                 ((gl_fragment_program *) prog)->UsesDFdy);
-         generate_ddy(inst, dst, src[0], src[1],
+         generate_ddy(inst->opcode, dst, src[0],
                       ((brw_wm_prog_key * const) this->key)->render_to_fbo);
         break;
 
index b46a8fd709e063f72a2e6dd8803f5c1555c0af59..1b0edaf3cdbd437b98d760d74e1bd4471cf3b8ef 100644 (file)
@@ -471,6 +471,7 @@ fs_visitor::visit(ir_expression *ir)
    unsigned int operand;
    fs_reg op[3], temp;
    fs_inst *inst;
+   struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key;
 
    assert(ir->get_num_operands() <= 3);
 
@@ -601,22 +602,35 @@ fs_visitor::visit(ir_expression *ir)
       break;
 
    case ir_unop_dFdx:
-      emit(FS_OPCODE_DDX, this->result, op[0], fs_reg(BRW_DERIVATIVE_BY_HINT));
+      /* Select one of the two opcodes based on the glHint value. */
+      if (fs_key->high_quality_derivatives)
+         emit(FS_OPCODE_DDX_FINE, this->result, op[0]);
+      else
+         emit(FS_OPCODE_DDX_COARSE, this->result, op[0]);
       break;
+
    case ir_unop_dFdx_coarse:
-      emit(FS_OPCODE_DDX, this->result, op[0], fs_reg(BRW_DERIVATIVE_COARSE));
+      emit(FS_OPCODE_DDX_COARSE, this->result, op[0]);
       break;
+
    case ir_unop_dFdx_fine:
-      emit(FS_OPCODE_DDX, this->result, op[0], fs_reg(BRW_DERIVATIVE_FINE));
+      emit(FS_OPCODE_DDX_FINE, this->result, op[0]);
       break;
+
    case ir_unop_dFdy:
-      emit(FS_OPCODE_DDY, this->result, op[0], fs_reg(BRW_DERIVATIVE_BY_HINT));
+      /* Select one of the two opcodes based on the glHint value. */
+      if (fs_key->high_quality_derivatives)
+         emit(FS_OPCODE_DDY_FINE, this->result, op[0]);
+      else
+         emit(FS_OPCODE_DDY_COARSE, this->result, op[0]);
       break;
+
    case ir_unop_dFdy_coarse:
-      emit(FS_OPCODE_DDY, this->result, op[0], fs_reg(BRW_DERIVATIVE_COARSE));
+      emit(FS_OPCODE_DDY_COARSE, this->result, op[0]);
       break;
+
    case ir_unop_dFdy_fine:
-      emit(FS_OPCODE_DDY, this->result, op[0], fs_reg(BRW_DERIVATIVE_FINE));
+      emit(FS_OPCODE_DDY_FINE, this->result, op[0]);
       break;
 
    case ir_binop_add:
index 8e4f7795d828343a3330efcb1a857263ad63fb46..8528d3ef727c6f082cf076807446850f4a331282 100644 (file)
@@ -450,10 +450,14 @@ brw_instruction_name(enum opcode op)
    case VEC4_OPCODE_PACK_BYTES:
       return "pack_bytes";
 
-   case FS_OPCODE_DDX:
-      return "ddx";
-   case FS_OPCODE_DDY:
-      return "ddy";
+   case FS_OPCODE_DDX_COARSE:
+      return "ddx_coarse";
+   case FS_OPCODE_DDX_FINE:
+      return "ddx_fine";
+   case FS_OPCODE_DDY_COARSE:
+      return "ddy_coarse";
+   case FS_OPCODE_DDY_FINE:
+      return "ddy_fine";
 
    case FS_OPCODE_PIXEL_X:
       return "pixel_x";
@@ -724,7 +728,7 @@ backend_instruction::writes_accumulator_implicitly(struct brw_context *brw) cons
    return writes_accumulator ||
           (brw->gen < 6 &&
            ((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) ||
-            (opcode >= FS_OPCODE_DDX && opcode <= FS_OPCODE_LINTERP &&
+            (opcode >= FS_OPCODE_DDX_COARSE && opcode <= FS_OPCODE_LINTERP &&
              opcode != FS_OPCODE_CINTERP)));
 }