From dac10e8a1390711f1f36f224644c4a33586cebe3 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 17 May 2016 01:52:16 -0700 Subject: [PATCH] i965, anv: Use NIR FragCoord re-center and y-transform passes. This handles gl_FragCoord transformations and other window system vs. user FBO coordinate system flipping by multiplying/adding uniform values, rather than recompiles. This is much better because we have no decent way to guess whether the application is going to use a shader with the window system FBO or a user FBO, much less the drawable height. This led to a lot of recompiles in many applications. Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner --- src/intel/vulkan/anv_pipeline.c | 5 ++++ src/mesa/drivers/dri/i965/brw_defines.h | 1 - src/mesa/drivers/dri/i965/brw_fs.cpp | 25 +++---------------- src/mesa/drivers/dri/i965/brw_fs.h | 6 ++--- .../drivers/dri/i965/brw_fs_generator.cpp | 8 +++--- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 22 ++++++---------- src/mesa/drivers/dri/i965/brw_nir.c | 13 ++++++++++ .../drivers/dri/i965/brw_nir_uniforms.cpp | 16 ++++-------- 8 files changed, 39 insertions(+), 57 deletions(-) diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index faa0adbc971..a9cecd69eb5 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -142,6 +142,11 @@ anv_shader_compile_to_nir(struct anv_device *device, free(spec_entries); + if (stage == MESA_SHADER_FRAGMENT) { + nir_lower_wpos_center(nir); + nir_validate_shader(nir); + } + nir_lower_returns(nir); nir_validate_shader(nir); diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 089eded7402..31b33367727 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1105,7 +1105,6 @@ enum opcode { FS_OPCODE_DDX_FINE, /** * Compute dFdy(), dFdyCoarse(), or dFdyFine(). - * src1 is an immediate storing the key->render_to_fbo boolean. */ FS_OPCODE_DDY_COARSE, FS_OPCODE_DDY_FINE, diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index f5e8d6be21f..847a6d36566 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1060,37 +1060,18 @@ fs_visitor::import_uniforms(fs_visitor *v) } fs_reg * -fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer, - bool origin_upper_left) +fs_visitor::emit_fragcoord_interpolation() { assert(stage == MESA_SHADER_FRAGMENT); - brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::vec4_type)); fs_reg wpos = *reg; - bool flip = !origin_upper_left ^ key->render_to_fbo; /* gl_FragCoord.x */ - if (pixel_center_integer) { - bld.MOV(wpos, this->pixel_x); - } else { - bld.ADD(wpos, this->pixel_x, brw_imm_f(0.5f)); - } + bld.MOV(wpos, this->pixel_x); wpos = offset(wpos, bld, 1); /* gl_FragCoord.y */ - if (!flip && pixel_center_integer) { - bld.MOV(wpos, this->pixel_y); - } else { - fs_reg pixel_y = this->pixel_y; - float offset = (pixel_center_integer ? 0.0f : 0.5f); - - if (flip) { - pixel_y.negate = true; - offset += key->drawable_height - 1.0f; - } - - bld.ADD(wpos, pixel_y, brw_imm_f(offset)); - } + bld.MOV(wpos, this->pixel_y); wpos = offset(wpos, bld, 1); /* gl_FragCoord.z */ diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index f7052172d2d..de583e3990c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -183,8 +183,7 @@ public: void emit_dummy_fs(); void emit_repclear_shader(); - fs_reg *emit_fragcoord_interpolation(bool pixel_center_integer, - bool origin_upper_left); + fs_reg *emit_fragcoord_interpolation(); fs_inst *emit_linterp(const fs_reg &attr, const fs_reg &interp, glsl_interp_qualifier interpolation_mode, bool is_centroid, bool is_sample); @@ -457,8 +456,7 @@ private: struct brw_reg dst, struct brw_reg src); void generate_ddx(enum opcode op, struct brw_reg dst, struct brw_reg src); - void generate_ddy(enum opcode op, struct brw_reg dst, struct brw_reg src, - bool negate_value); + void generate_ddy(enum opcode op, struct brw_reg dst, struct brw_reg src); void generate_scratch_write(fs_inst *inst, struct brw_reg src); void generate_scratch_read(fs_inst *inst, struct brw_reg dst); void generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst); diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 368861725c9..599287e1025 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1111,9 +1111,10 @@ fs_generator::generate_ddx(enum opcode opcode, */ void fs_generator::generate_ddy(enum opcode opcode, - struct brw_reg dst, struct brw_reg src, - bool negate_value) + struct brw_reg dst, struct brw_reg src) { + bool negate_value = true; + if (opcode == FS_OPCODE_DDY_FINE) { /* From the Ivy Bridge PRM, volume 4 part 3, section 3.3.9 (Register * Region Restrictions): @@ -2154,8 +2155,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) break; case FS_OPCODE_DDY_COARSE: case FS_OPCODE_DDY_FINE: - assert(src[1].file == BRW_IMMEDIATE_VALUE); - generate_ddy(inst->opcode, dst, src[0], src[1].ud); + generate_ddy(inst->opcode, dst, src[0]); break; case SHADER_OPCODE_GEN4_SCRATCH_WRITE: diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 0bf0e095792..d5ef569536a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -62,8 +62,7 @@ fs_visitor::nir_setup_inputs() fs_reg reg; if (var->data.location == VARYING_SLOT_POS) { - reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer, - var->data.origin_upper_left); + reg = *emit_fragcoord_interpolation(); emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(), input, reg), 0xF); } else if (var->data.location == VARYING_SLOT_LAYER) { @@ -879,22 +878,18 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) break; case nir_op_fddy: if (fs_key->high_quality_derivatives) { - inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0], - brw_imm_d(fs_key->render_to_fbo)); + inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0]); } else { - inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0], - brw_imm_d(fs_key->render_to_fbo)); + inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0]); } inst->saturate = instr->dest.saturate; break; case nir_op_fddy_fine: - inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0], - brw_imm_d(fs_key->render_to_fbo)); + inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_fddy_coarse: - inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0], - brw_imm_d(fs_key->render_to_fbo)); + inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0]); inst->saturate = instr->dest.saturate; break; @@ -3074,12 +3069,9 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, case nir_intrinsic_interp_var_at_offset: { nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); - const bool flip = !wm_key->render_to_fbo; - if (const_offset) { unsigned off_x = MIN2((int)(const_offset->f32[0] * 16), 7) & 0xf; - unsigned off_y = MIN2((int)(const_offset->f32[1] * 16 * - (flip ? -1 : 1)), 7) & 0xf; + unsigned off_y = MIN2((int)(const_offset->f32[1] * 16), 7) & 0xf; emit_pixel_interpolater_send(bld, FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, @@ -3096,7 +3088,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, bld.MUL(temp, offset(offset_src, bld, i), brw_imm_f(16.0f)); fs_reg itemp = vgrf(glsl_type::int_type); /* float to int */ - bld.MOV(itemp, (i == 1 && flip) ? negate(temp) : temp); + bld.MOV(itemp, temp); /* Clamp the upper end of the range to +7/16. * ARB_gpu_shader5 requires that we support a maximum offset diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 9afd036d67b..372b746453e 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -27,6 +27,7 @@ #include "compiler/nir/glsl_to_nir.h" #include "compiler/nir/nir_builder.h" #include "program/prog_to_nir.h" +#include "program/prog_parameter.h" static bool is_input(nir_intrinsic_instr *intrin) @@ -573,6 +574,18 @@ brw_create_nir(struct brw_context *brw, nir = brw_preprocess_nir(brw->intelScreen->compiler, nir); + if (stage == MESA_SHADER_FRAGMENT) { + static const struct nir_lower_wpos_ytransform_options wpos_options = { + .state_tokens = {STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0}, + .fs_coord_pixel_center_integer = 1, + .fs_coord_origin_upper_left = 1, + }; + _mesa_add_state_reference(prog->Parameters, + (gl_state_index *) wpos_options.state_tokens); + + OPT(nir_lower_wpos_ytransform, &wpos_options); + } + OPT(nir_lower_system_values); OPT_V(brw_nir_lower_uniforms, is_scalar); diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp index 15d99fa2738..b752ad5610b 100644 --- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp +++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp @@ -157,17 +157,11 @@ brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog, { struct gl_program_parameter_list *plist = prog->Parameters; -#ifndef NDEBUG - if (!shader->uniforms.is_empty()) { - /* For ARB programs, only a single "parameters" variable is generated to - * support uniform data. - */ - assert(shader->uniforms.length() == 1); - nir_variable *var = (nir_variable *) shader->uniforms.get_head(); - assert(strcmp(var->name, "parameters") == 0); - assert(var->type->array_size() == (int)plist->NumParameters); - } -#endif + /* For ARB programs, prog_to_nir generates a single "parameters" variable + * for all uniform data. nir_lower_wpos_ytransform may also create an + * additional variable. + */ + assert(shader->uniforms.length() <= 2); for (unsigned p = 0; p < plist->NumParameters; p++) { /* Parameters should be either vec4 uniforms or single component -- 2.30.2