From cea37f0911cf2d88f11a7a2afe4ab2351601571a Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Sat, 8 Nov 2014 01:39:14 -0800 Subject: [PATCH] i965/fs: Handle derivative quality decisions in the front-end. Kristian noted that there's very little use of brw_wm_prog_key in the generator, and that it basically just generates what it's told, without caring about what stage it's handling. One exception to this is derivative handling. When handling dFdxCoarse and dFdxFine, we packed an enum value in a second source register, explicitly telling the generator what to do. For dFdx, we specified an enum value of "please use the hint", then checked the program key in the generator level code. A natural method is to define separate FS_OPCODE_DD[XY]_{COARSE,FINE} opcodes, and have the front-end (which already decides what IR to generate based on the program key) decide which dPdx/dPdy should correspond to. This consolidates the decision making in one place. Signed-off-by: Kenneth Graunke Reviewed-by: Jordan Justen Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_defines.h | 12 ++--- src/mesa/drivers/dri/i965/brw_fs.h | 6 +-- .../drivers/dri/i965/brw_fs_generator.cpp | 45 +++++++------------ src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 26 ++++++++--- src/mesa/drivers/dri/i965/brw_shader.cpp | 14 +++--- 5 files changed, 51 insertions(+), 52 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index fa473d08ef6..e7f6a2bad8e 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -910,8 +910,10 @@ enum opcode { VEC4_OPCODE_PACK_BYTES, - FS_OPCODE_DDX, - FS_OPCODE_DDY, + FS_OPCODE_DDX_COARSE, + FS_OPCODE_DDX_FINE, + FS_OPCODE_DDY_COARSE, + FS_OPCODE_DDY_FINE, FS_OPCODE_PIXEL_X, FS_OPCODE_PIXEL_Y, FS_OPCODE_CINTERP, @@ -1093,12 +1095,6 @@ enum opcode { GS_OPCODE_FF_SYNC_SET_PRIMITIVES, }; -enum brw_derivative_quality { - BRW_DERIVATIVE_BY_HINT = 0, - BRW_DERIVATIVE_FINE = 1, - BRW_DERIVATIVE_COARSE = 2, -}; - enum brw_urb_write_flags { BRW_URB_WRITE_NO_FLAGS = 0, diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 4421eeda2a6..1deb7743cea 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -710,9 +710,9 @@ private: void generate_math_g45(fs_inst *inst, struct brw_reg dst, struct brw_reg src); - void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src, struct brw_reg quality); - void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src, - struct brw_reg quality, bool negate_value); + void generate_ddx(enum opcode op, struct brw_reg dst, struct brw_reg src); + void generate_ddy(enum opcode op, struct brw_reg dst, struct brw_reg src, + bool negate_value); void generate_scratch_write(fs_inst *inst, struct brw_reg src); void generate_scratch_read(fs_inst *inst, struct brw_reg dst); void generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst); diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 2a35fa98945..27ba0bbadb8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -705,25 +705,16 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src * appropriate swizzling. */ void -fs_generator::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src, - struct brw_reg quality) +fs_generator::generate_ddx(enum opcode opcode, + struct brw_reg dst, struct brw_reg src) { unsigned vstride, width; - assert(quality.file == BRW_IMMEDIATE_VALUE); - assert(quality.type == BRW_REGISTER_TYPE_D); - assert(stage == MESA_SHADER_FRAGMENT); - const brw_wm_prog_key * const key = (brw_wm_prog_key * const) this->key; - - int quality_value = quality.dw1.d; - - if (quality_value == BRW_DERIVATIVE_FINE || - (key->high_quality_derivatives && quality_value != BRW_DERIVATIVE_COARSE)) { + if (opcode == FS_OPCODE_DDX_FINE) { /* produce accurate derivatives */ vstride = BRW_VERTICAL_STRIDE_2; width = BRW_WIDTH_2; - } - else { + } else { /* replicate the derivative at the top-left pixel to other pixels */ vstride = BRW_VERTICAL_STRIDE_4; width = BRW_WIDTH_4; @@ -749,19 +740,11 @@ fs_generator::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src * left. */ void -fs_generator::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src, - struct brw_reg quality, bool negate_value) +fs_generator::generate_ddy(enum opcode opcode, + struct brw_reg dst, struct brw_reg src, + bool negate_value) { - assert(quality.file == BRW_IMMEDIATE_VALUE); - assert(quality.type == BRW_REGISTER_TYPE_D); - - assert(stage == MESA_SHADER_FRAGMENT); - const brw_wm_prog_key * const key = (brw_wm_prog_key * const) this->key; - - int quality_value = quality.dw1.d; - - if (quality_value == BRW_DERIVATIVE_FINE || - (key->high_quality_derivatives && quality_value != BRW_DERIVATIVE_COARSE)) { + if (opcode == FS_OPCODE_DDY_FINE) { /* From the Ivy Bridge PRM, volume 4 part 3, section 3.3.9 (Register * Region Restrictions): * @@ -1871,16 +1854,18 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) case SHADER_OPCODE_TG4_OFFSET: generate_tex(inst, dst, src[0], src[1]); break; - case FS_OPCODE_DDX: - generate_ddx(inst, dst, src[0], src[1]); - break; - case FS_OPCODE_DDY: + case FS_OPCODE_DDX_COARSE: + case FS_OPCODE_DDX_FINE: + generate_ddx(inst->opcode, dst, src[0]); + break; + case FS_OPCODE_DDY_COARSE: + case FS_OPCODE_DDY_FINE: /* Make sure fp->UsesDFdy flag got set (otherwise there's no * guarantee that key->render_to_fbo is set). */ assert(stage == MESA_SHADER_FRAGMENT && ((gl_fragment_program *) prog)->UsesDFdy); - generate_ddy(inst, dst, src[0], src[1], + generate_ddy(inst->opcode, dst, src[0], ((brw_wm_prog_key * const) this->key)->render_to_fbo); break; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index b46a8fd709e..1b0edaf3cdb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -471,6 +471,7 @@ fs_visitor::visit(ir_expression *ir) unsigned int operand; fs_reg op[3], temp; fs_inst *inst; + struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key; assert(ir->get_num_operands() <= 3); @@ -601,22 +602,35 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_dFdx: - emit(FS_OPCODE_DDX, this->result, op[0], fs_reg(BRW_DERIVATIVE_BY_HINT)); + /* Select one of the two opcodes based on the glHint value. */ + if (fs_key->high_quality_derivatives) + emit(FS_OPCODE_DDX_FINE, this->result, op[0]); + else + emit(FS_OPCODE_DDX_COARSE, this->result, op[0]); break; + case ir_unop_dFdx_coarse: - emit(FS_OPCODE_DDX, this->result, op[0], fs_reg(BRW_DERIVATIVE_COARSE)); + emit(FS_OPCODE_DDX_COARSE, this->result, op[0]); break; + case ir_unop_dFdx_fine: - emit(FS_OPCODE_DDX, this->result, op[0], fs_reg(BRW_DERIVATIVE_FINE)); + emit(FS_OPCODE_DDX_FINE, this->result, op[0]); break; + case ir_unop_dFdy: - emit(FS_OPCODE_DDY, this->result, op[0], fs_reg(BRW_DERIVATIVE_BY_HINT)); + /* Select one of the two opcodes based on the glHint value. */ + if (fs_key->high_quality_derivatives) + emit(FS_OPCODE_DDY_FINE, this->result, op[0]); + else + emit(FS_OPCODE_DDY_COARSE, this->result, op[0]); break; + case ir_unop_dFdy_coarse: - emit(FS_OPCODE_DDY, this->result, op[0], fs_reg(BRW_DERIVATIVE_COARSE)); + emit(FS_OPCODE_DDY_COARSE, this->result, op[0]); break; + case ir_unop_dFdy_fine: - emit(FS_OPCODE_DDY, this->result, op[0], fs_reg(BRW_DERIVATIVE_FINE)); + emit(FS_OPCODE_DDY_FINE, this->result, op[0]); break; case ir_binop_add: diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 8e4f7795d82..8528d3ef727 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -450,10 +450,14 @@ brw_instruction_name(enum opcode op) case VEC4_OPCODE_PACK_BYTES: return "pack_bytes"; - case FS_OPCODE_DDX: - return "ddx"; - case FS_OPCODE_DDY: - return "ddy"; + case FS_OPCODE_DDX_COARSE: + return "ddx_coarse"; + case FS_OPCODE_DDX_FINE: + return "ddx_fine"; + case FS_OPCODE_DDY_COARSE: + return "ddy_coarse"; + case FS_OPCODE_DDY_FINE: + return "ddy_fine"; case FS_OPCODE_PIXEL_X: return "pixel_x"; @@ -724,7 +728,7 @@ backend_instruction::writes_accumulator_implicitly(struct brw_context *brw) cons return writes_accumulator || (brw->gen < 6 && ((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) || - (opcode >= FS_OPCODE_DDX && opcode <= FS_OPCODE_LINTERP && + (opcode >= FS_OPCODE_DDX_COARSE && opcode <= FS_OPCODE_LINTERP && opcode != FS_OPCODE_CINTERP))); } -- 2.30.2