} binding_table;
uint8_t computed_depth_mode;
+ bool computed_stencil;
bool early_fragment_tests;
bool no_8;
FS_OPCODE_BLORP_FB_WRITE,
FS_OPCODE_REP_FB_WRITE,
+ FS_OPCODE_PACK_STENCIL_REF,
SHADER_OPCODE_RCP,
SHADER_OPCODE_RSQ,
SHADER_OPCODE_SQRT,
FB_WRITE_LOGICAL_SRC_SRC0_ALPHA,
FB_WRITE_LOGICAL_SRC_SRC_DEPTH, /* gl_FragDepth */
FB_WRITE_LOGICAL_SRC_DST_DEPTH, /* GEN4-5: passthrough from thread */
+ FB_WRITE_LOGICAL_SRC_SRC_STENCIL, /* gl_FragStencilRefARB */
FB_WRITE_LOGICAL_SRC_OMASK, /* Sample Mask (gl_SampleMask) */
FB_WRITE_LOGICAL_SRC_COMPONENTS, /* REQUIRED */
};
const fs_reg &src0_alpha = inst->src[FB_WRITE_LOGICAL_SRC_SRC0_ALPHA];
const fs_reg &src_depth = inst->src[FB_WRITE_LOGICAL_SRC_SRC_DEPTH];
const fs_reg &dst_depth = inst->src[FB_WRITE_LOGICAL_SRC_DST_DEPTH];
+ const fs_reg &src_stencil = inst->src[FB_WRITE_LOGICAL_SRC_SRC_STENCIL];
fs_reg sample_mask = inst->src[FB_WRITE_LOGICAL_SRC_OMASK];
const unsigned components =
inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].fixed_hw_reg.dw1.ud;
length++;
}
+ if (src_stencil.file != BAD_FILE) {
+ assert(devinfo->gen >= 9);
+ assert(bld.dispatch_width() != 16);
+
+ sources[length] = bld.vgrf(BRW_REGISTER_TYPE_UD);
+ bld.exec_all().annotate("FB write OS")
+ .emit(FS_OPCODE_PACK_STENCIL_REF, sources[length],
+ retype(src_stencil, BRW_REGISTER_TYPE_UB));
+ length++;
+ }
+
fs_inst *load;
if (devinfo->gen >= 7) {
/* Send from the GRF */
prog_data->uses_omask =
shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
prog_data->computed_depth_mode = computed_depth_mode(shader);
+ prog_data->computed_stencil =
+ shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL);
prog_data->early_fragment_tests = shader->info.fs.early_fragment_tests;
int *push_constant_loc;
fs_reg frag_depth;
+ fs_reg frag_stencil;
fs_reg sample_mask;
fs_reg outputs[VARYING_SLOT_MAX];
unsigned output_components[VARYING_SLOT_MAX];
void generate_urb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload);
void generate_urb_write(fs_inst *inst, struct brw_reg payload);
void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
+ void generate_stencil_ref_packing(fs_inst *inst, struct brw_reg dst,
+ struct brw_reg src);
void generate_barrier(fs_inst *inst, struct brw_reg src);
void generate_blorp_fb_write(fs_inst *inst);
void generate_linterp(fs_inst *inst, struct brw_reg dst,
brw_imm_ud(inst->target));
}
+ /* Set computes stencil to render target */
+ if (prog_data->computed_stencil) {
+ brw_OR(p,
+ vec1(retype(payload, BRW_REGISTER_TYPE_UD)),
+ vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)),
+ brw_imm_ud(0x1 << 14));
+ }
+
implied_header = brw_null_reg();
} else {
implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
}
+void
+fs_generator::generate_stencil_ref_packing(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ assert(dispatch_width == 8);
+ assert(devinfo->gen >= 9);
+
+ /* Stencil value updates are provided in 8 slots of 1 byte per slot.
+ * Presumably, in order to save memory bandwidth, the stencil reference
+ * values written from the FS need to be packed into 2 dwords (this makes
+ * sense because the stencil values are limited to 1 byte each and a SIMD8
+ * send, so stencil slots 0-3 in dw0, and 4-7 in dw1.)
+ *
+ * The spec is confusing here because in the payload definition of MDP_RTW_S8
+ * (Message Data Payload for Render Target Writes with Stencil 8b) the
+ * stencil value seems to be dw4.0-dw4.7. However, if you look at the type of
+ * dw4 it is type MDPR_STENCIL (Message Data Payload Register) which is the
+ * packed values specified above and diagrammed below:
+ *
+ * 31 0
+ * --------------------------------
+ * DW | |
+ * 2-7 | IGNORED |
+ * | |
+ * --------------------------------
+ * DW1 | STC | STC | STC | STC |
+ * | slot7 | slot6 | slot5 | slot4|
+ * --------------------------------
+ * DW0 | STC | STC | STC | STC |
+ * | slot3 | slot2 | slot1 | slot0|
+ * --------------------------------
+ */
+
+ src.vstride = BRW_VERTICAL_STRIDE_4;
+ src.width = BRW_WIDTH_1;
+ src.hstride = BRW_HORIZONTAL_STRIDE_0;
+ assert(src.type == BRW_REGISTER_TYPE_UB);
+ brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UB), src);
+}
+
void
fs_generator::generate_barrier(fs_inst *inst, struct brw_reg src)
{
generate_barrier(inst, src[0]);
break;
+ case FS_OPCODE_PACK_STENCIL_REF:
+ generate_stencil_ref_packing(inst, dst, src[0]);
+ break;
+
default:
unreachable("Unsupported opcode");
}
} else if (var->data.location == FRAG_RESULT_DEPTH) {
this->frag_depth = reg;
+ } else if (var->data.location == FRAG_RESULT_STENCIL) {
+ this->frag_stencil = reg;
} else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
this->sample_mask = reg;
} else {
const fs_reg dst_depth = (payload.dest_depth_reg ?
fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0)) :
fs_reg());
- fs_reg src_depth;
+ fs_reg src_depth, src_stencil;
if (source_depth_to_render_target) {
if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
src_depth = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0));
}
+ if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
+ src_stencil = frag_stencil;
+
const fs_reg sources[] = {
- color0, color1, src0_alpha, src_depth, dst_depth, sample_mask,
- fs_reg(components)
+ color0, color1, src0_alpha, src_depth, dst_depth, src_stencil,
+ sample_mask, fs_reg(components)
};
assert(ARRAY_SIZE(sources) - 1 == FB_WRITE_LOGICAL_SRC_COMPONENTS);
fs_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, fs_reg(),
no16("Missing support for simd16 depth writes on gen6\n");
}
+ if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
+ /* From the 'Render Target Write message' section of the docs:
+ * "Output Stencil is not supported with SIMD16 Render Target Write
+ * Messages."
+ *
+ * FINISHME: split 16 into 2 8s
+ */
+ no16("FINISHME: support 2 simd8 writes for gl_FragStencilRefARB\n");
+ }
+
if (do_dual_src) {
const fs_builder abld = bld.annotate("FB dual-source write");
return "fb_write";
case FS_OPCODE_FB_WRITE_LOGICAL:
return "fb_write_logical";
+ case FS_OPCODE_PACK_STENCIL_REF:
+ return "pack_stencil_ref";
case FS_OPCODE_BLORP_FB_WRITE:
return "blorp_fb_write";
case FS_OPCODE_REP_FB_WRITE:
!brw_color_buffer_write_enabled(brw))
dw1 |= GEN8_PSX_SHADER_HAS_UAV;
+ if (prog_data->computed_stencil) {
+ assert(brw->gen >= 9);
+ dw1 |= GEN9_PSX_SHADER_COMPUTES_STENCIL;
+ }
+
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2));
OUT_BATCH(dw1);