FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
FS_OPCODE_DISCARD_JUMP,
+ FS_OPCODE_SET_SAMPLE_ID,
FS_OPCODE_SET_SIMD4X2_OFFSET,
FS_OPCODE_PACK_HALF_2x16_SPLIT,
FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X,
return reg;
}
+fs_reg *
+fs_visitor::emit_sampleid_setup(ir_variable *ir)
+{
+ assert(brw->gen >= 6);
+
+ this->current_annotation = "compute sample id";
+ fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
+
+ if (c->key.compute_sample_id) {
+ fs_reg t1 = fs_reg(this, glsl_type::int_type);
+ fs_reg t2 = fs_reg(this, glsl_type::int_type);
+ t2.type = BRW_REGISTER_TYPE_UW;
+
+ /* The PS will be run in MSDISPMODE_PERSAMPLE. For example with
+ * 8x multisampling, subspan 0 will represent sample N (where N
+ * is 0, 2, 4 or 6), subspan 1 will represent sample 1, 3, 5 or
+ * 7. We can find the value of N by looking at R0.0 bits 7:6
+ * ("Starting Sample Pair Index (SSPI)") and multiplying by two
+ * (since samples are always delivered in pairs). That is, we
+ * compute 2*((R0.0 & 0xc0) >> 6) == (R0.0 & 0xc0) >> 5. Then
+ * we need to add N to the sequence (0, 0, 0, 0, 1, 1, 1, 1) in
+ * case of SIMD8 and sequence (0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2,
+ * 2, 3, 3, 3, 3) in case of SIMD16. We compute this sequence by
+ * populating a temporary variable with the sequence (0, 1, 2, 3),
+ * and then reading from it using vstride=1, width=4, hstride=0.
+ * These computations hold good for 4x multisampling as well.
+ */
+ emit(BRW_OPCODE_AND, t1,
+ fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)),
+ fs_reg(brw_imm_d(0xc0)));
+ emit(BRW_OPCODE_SHR, t1, t1, fs_reg(5));
+ /* This works for both SIMD8 and SIMD16 */
+ emit(MOV(t2, brw_imm_v(0x3210)));
+ /* This special instruction takes care of setting vstride=1,
+ * width=4, hstride=0 of t2 during an ADD instruction.
+ */
+ emit(FS_OPCODE_SET_SAMPLE_ID, *reg, t1, t2);
+ } else {
+ /* As per GL_ARB_sample_shading specification:
+ * "When rendering to a non-multisample buffer, or if multisample
+ * rasterization is disabled, gl_SampleID will always be zero."
+ */
+ emit(BRW_OPCODE_MOV, *reg, fs_reg(0));
+ }
+
+ return reg;
+}
+
fs_reg
fs_visitor::fix_math_operand(fs_reg src)
{
bool is_centroid);
fs_reg *emit_frontfacing_interpolation(ir_variable *ir);
fs_reg *emit_samplepos_setup(ir_variable *ir);
+ fs_reg *emit_sampleid_setup(ir_variable *ir);
fs_reg *emit_general_interpolation(ir_variable *ir);
void emit_interpolation_setup_gen4();
void emit_interpolation_setup_gen6();
struct brw_reg index,
struct brw_reg offset);
void generate_mov_dispatch_to_flags(fs_inst *inst);
+
+ void generate_set_sample_id(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1);
+
void generate_set_simd4x2_offset(fs_inst *inst,
struct brw_reg dst,
struct brw_reg offset);
brw_pop_insn_state(p);
}
+/* Sets vstride=1, width=4, hstride=0 of register src1 during
+ * the ADD instruction.
+ */
+void
+fs_generator::generate_set_sample_id(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ assert(dst.type == BRW_REGISTER_TYPE_D ||
+ dst.type == BRW_REGISTER_TYPE_UD);
+ assert(src0.type == BRW_REGISTER_TYPE_D ||
+ src0.type == BRW_REGISTER_TYPE_UD);
+
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ struct brw_reg reg = stride(retype(brw_vec1_reg(src1.file, src1.nr, 0),
+ BRW_REGISTER_TYPE_UW), 1, 4, 0);
+ brw_ADD(p, dst, src0, reg);
+ if (dispatch_width == 16)
+ brw_ADD(p, offset(dst, 1), offset(src0, 1), suboffset(reg, 2));
+ brw_pop_insn_state(p);
+}
+
/**
* Change the register's data type from UD to W, doubling the strides in order
* to compensate for halving the data type width.
generate_set_simd4x2_offset(inst, dst, src[0]);
break;
+ case FS_OPCODE_SET_SAMPLE_ID:
+ generate_set_sample_id(inst, dst, src[0], src[1]);
+ break;
+
case FS_OPCODE_PACK_HALF_2x16_SPLIT:
generate_pack_half_2x16_split(inst, dst, src[0], src[1]);
break;
} else if (ir->mode == ir_var_system_value) {
if (ir->location == SYSTEM_VALUE_SAMPLE_POS) {
reg = emit_samplepos_setup(ir);
+ } else if (ir->location == SYSTEM_VALUE_SAMPLE_ID) {
+ reg = emit_sampleid_setup(ir);
}
}
GLuint lookup = 0;
GLuint line_aa;
bool program_uses_dfdy = fp->program.UsesDFdy;
+ bool multisample_fbo = ctx->DrawBuffer->Visual.samples > 1;
memset(key, 0, sizeof(*key));
_mesa_get_min_invocations_per_fragment(ctx, &fp->program) > 1 &&
fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_POS;
+ key->compute_sample_id =
+ multisample_fbo &&
+ ctx->Multisample.Enabled &&
+ (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_ID);
+
/* BRW_NEW_VUE_MAP_GEOM_OUT */
if (brw->gen < 6 || _mesa_bitcount_64(fp->program.Base.InputsRead &
BRW_FS_VARYING_INPUT_MASK) > 16)
GLuint render_to_fbo:1;
GLuint clamp_fragment_color:1;
GLuint compute_pos_offset:1;
+ GLuint compute_sample_id:1;
GLuint line_aa:2;
GLuint high_quality_derivatives:1;