From e12bbb503f71b60b9f212e82fdd3ed9aaf3ab318 Mon Sep 17 00:00:00 2001 From: Anuj Phogat Date: Thu, 24 Oct 2013 16:17:08 -0700 Subject: [PATCH] i965: Add FS backend for builtin gl_SampleID V2: - Update comments - Add compute_sample_id variables in brw_wm_prog_key - Add a special backend instruction to compute sample_id. V3: - Make changes to support simd16 mode. Signed-off-by: Anuj Phogat Reviewed-by: Paul Berry --- src/mesa/drivers/dri/i965/brw_defines.h | 1 + src/mesa/drivers/dri/i965/brw_fs.cpp | 48 +++++++++++++++++++ src/mesa/drivers/dri/i965/brw_fs.h | 7 +++ .../drivers/dri/i965/brw_fs_generator.cpp | 29 +++++++++++ src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 + src/mesa/drivers/dri/i965/brw_wm.c | 6 +++ src/mesa/drivers/dri/i965/brw_wm.h | 1 + 7 files changed, 94 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index bad6d40fca8..e03ac583bfe 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -794,6 +794,7 @@ enum opcode { FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7, FS_OPCODE_MOV_DISPATCH_TO_FLAGS, FS_OPCODE_DISCARD_JUMP, + FS_OPCODE_SET_SAMPLE_ID, FS_OPCODE_SET_SIMD4X2_OFFSET, FS_OPCODE_PACK_HALF_2x16_SPLIT, FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index dfb0d15c031..7caa52ded91 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1217,6 +1217,54 @@ fs_visitor::emit_samplepos_setup(ir_variable *ir) return reg; } +fs_reg * +fs_visitor::emit_sampleid_setup(ir_variable *ir) +{ + assert(brw->gen >= 6); + + this->current_annotation = "compute sample id"; + fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); + + if (c->key.compute_sample_id) { + fs_reg t1 = fs_reg(this, glsl_type::int_type); + fs_reg t2 = fs_reg(this, glsl_type::int_type); + t2.type = BRW_REGISTER_TYPE_UW; + + /* The PS will be run in MSDISPMODE_PERSAMPLE. For example with + * 8x multisampling, subspan 0 will represent sample N (where N + * is 0, 2, 4 or 6), subspan 1 will represent sample 1, 3, 5 or + * 7. We can find the value of N by looking at R0.0 bits 7:6 + * ("Starting Sample Pair Index (SSPI)") and multiplying by two + * (since samples are always delivered in pairs). That is, we + * compute 2*((R0.0 & 0xc0) >> 6) == (R0.0 & 0xc0) >> 5. Then + * we need to add N to the sequence (0, 0, 0, 0, 1, 1, 1, 1) in + * case of SIMD8 and sequence (0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, + * 2, 3, 3, 3, 3) in case of SIMD16. We compute this sequence by + * populating a temporary variable with the sequence (0, 1, 2, 3), + * and then reading from it using vstride=1, width=4, hstride=0. + * These computations hold good for 4x multisampling as well. + */ + emit(BRW_OPCODE_AND, t1, + fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)), + fs_reg(brw_imm_d(0xc0))); + emit(BRW_OPCODE_SHR, t1, t1, fs_reg(5)); + /* This works for both SIMD8 and SIMD16 */ + emit(MOV(t2, brw_imm_v(0x3210))); + /* This special instruction takes care of setting vstride=1, + * width=4, hstride=0 of t2 during an ADD instruction. + */ + emit(FS_OPCODE_SET_SAMPLE_ID, *reg, t1, t2); + } else { + /* As per GL_ARB_sample_shading specification: + * "When rendering to a non-multisample buffer, or if multisample + * rasterization is disabled, gl_SampleID will always be zero." + */ + emit(BRW_OPCODE_MOV, *reg, fs_reg(0)); + } + + return reg; +} + fs_reg fs_visitor::fix_math_operand(fs_reg src) { diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 7eb82fc5a3c..0ed8c053634 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -341,6 +341,7 @@ public: bool is_centroid); fs_reg *emit_frontfacing_interpolation(ir_variable *ir); fs_reg *emit_samplepos_setup(ir_variable *ir); + fs_reg *emit_sampleid_setup(ir_variable *ir); fs_reg *emit_general_interpolation(ir_variable *ir); void emit_interpolation_setup_gen4(); void emit_interpolation_setup_gen6(); @@ -547,6 +548,12 @@ private: struct brw_reg index, struct brw_reg offset); void generate_mov_dispatch_to_flags(fs_inst *inst); + + void generate_set_sample_id(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1); + void generate_set_simd4x2_offset(fs_inst *inst, struct brw_reg dst, struct brw_reg offset); diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 672e8968658..25530983f66 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1068,6 +1068,31 @@ fs_generator::generate_set_simd4x2_offset(fs_inst *inst, brw_pop_insn_state(p); } +/* Sets vstride=1, width=4, hstride=0 of register src1 during + * the ADD instruction. + */ +void +fs_generator::generate_set_sample_id(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) +{ + assert(dst.type == BRW_REGISTER_TYPE_D || + dst.type == BRW_REGISTER_TYPE_UD); + assert(src0.type == BRW_REGISTER_TYPE_D || + src0.type == BRW_REGISTER_TYPE_UD); + + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + struct brw_reg reg = stride(retype(brw_vec1_reg(src1.file, src1.nr, 0), + BRW_REGISTER_TYPE_UW), 1, 4, 0); + brw_ADD(p, dst, src0, reg); + if (dispatch_width == 16) + brw_ADD(p, offset(dst, 1), offset(src0, 1), suboffset(reg, 2)); + brw_pop_insn_state(p); +} + /** * Change the register's data type from UD to W, doubling the strides in order * to compensate for halving the data type width. @@ -1641,6 +1666,10 @@ fs_generator::generate_code(exec_list *instructions) generate_set_simd4x2_offset(inst, dst, src[0]); break; + case FS_OPCODE_SET_SAMPLE_ID: + generate_set_sample_id(inst, dst, src[0], src[1]); + break; + case FS_OPCODE_PACK_HALF_2x16_SPLIT: generate_pack_half_2x16_split(inst, dst, src[0], src[1]); break; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 0404fffe9b8..09936374fd2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -129,6 +129,8 @@ fs_visitor::visit(ir_variable *ir) } else if (ir->mode == ir_var_system_value) { if (ir->location == SYSTEM_VALUE_SAMPLE_POS) { reg = emit_samplepos_setup(ir); + } else if (ir->location == SYSTEM_VALUE_SAMPLE_ID) { + reg = emit_sampleid_setup(ir); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index d2a5a9fb3be..e404d996076 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -367,6 +367,7 @@ static void brw_wm_populate_key( struct brw_context *brw, GLuint lookup = 0; GLuint line_aa; bool program_uses_dfdy = fp->program.UsesDFdy; + bool multisample_fbo = ctx->DrawBuffer->Visual.samples > 1; memset(key, 0, sizeof(*key)); @@ -489,6 +490,11 @@ static void brw_wm_populate_key( struct brw_context *brw, _mesa_get_min_invocations_per_fragment(ctx, &fp->program) > 1 && fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_POS; + key->compute_sample_id = + multisample_fbo && + ctx->Multisample.Enabled && + (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_ID); + /* BRW_NEW_VUE_MAP_GEOM_OUT */ if (brw->gen < 6 || _mesa_bitcount_64(fp->program.Base.InputsRead & BRW_FS_VARYING_INPUT_MASK) > 16) diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index eb740adffe6..f5823f43258 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -66,6 +66,7 @@ struct brw_wm_prog_key { GLuint render_to_fbo:1; GLuint clamp_fragment_color:1; GLuint compute_pos_offset:1; + GLuint compute_sample_id:1; GLuint line_aa:2; GLuint high_quality_derivatives:1; -- 2.30.2