brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
}
-void
-fs_generator::generate_stencil_ref_packing(fs_inst *inst,
- struct brw_reg dst,
- struct brw_reg src)
-{
- assert(dispatch_width == 8);
- assert(devinfo->gen >= 9);
-
- /* Stencil value updates are provided in 8 slots of 1 byte per slot.
- * Presumably, in order to save memory bandwidth, the stencil reference
- * values written from the FS need to be packed into 2 dwords (this makes
- * sense because the stencil values are limited to 1 byte each and a SIMD8
- * send, so stencil slots 0-3 in dw0, and 4-7 in dw1.)
- *
- * The spec is confusing here because in the payload definition of MDP_RTW_S8
- * (Message Data Payload for Render Target Writes with Stencil 8b) the
- * stencil value seems to be dw4.0-dw4.7. However, if you look at the type of
- * dw4 it is type MDPR_STENCIL (Message Data Payload Register) which is the
- * packed values specified above and diagrammed below:
- *
- * 31 0
- * --------------------------------
- * DW | |
- * 2-7 | IGNORED |
- * | |
- * --------------------------------
- * DW1 | STC | STC | STC | STC |
- * | slot7 | slot6 | slot5 | slot4|
- * --------------------------------
- * DW0 | STC | STC | STC | STC |
- * | slot3 | slot2 | slot1 | slot0|
- * --------------------------------
- */
-
- src.vstride = BRW_VERTICAL_STRIDE_4;
- src.width = BRW_WIDTH_1;
- src.hstride = BRW_HORIZONTAL_STRIDE_0;
- assert(src.type == BRW_REGISTER_TYPE_UB);
- brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UB), src);
-}
-
void
fs_generator::generate_barrier(fs_inst *inst, struct brw_reg src)
{
struct brw_reg dst, struct brw_reg src)
{
if (opcode == FS_OPCODE_DDY_FINE) {
- /* From the Ivy Bridge PRM, volume 4 part 3, section 3.3.9 (Register
- * Region Restrictions):
- *
- * In Align16 access mode, SIMD16 is not allowed for DW operations
- * and SIMD8 is not allowed for DF operations.
- *
- * In this context, "DW operations" means "operations acting on 32-bit
- * values", so it includes operations on floats.
- *
- * Gen4 has a similar restriction. From the i965 PRM, section 11.5.3
- * (Instruction Compression -> Rules and Restrictions):
- *
- * A compressed instruction must be in Align1 access mode. Align16
- * mode instructions cannot be compressed.
- *
- * Similar text exists in the g45 PRM.
- *
- * Empirically, compressed align16 instructions using odd register
- * numbers don't appear to work on Sandybridge either.
- *
- * On these platforms, if we're building a SIMD16 shader, we need to
- * manually unroll to a pair of SIMD8 instructions.
- */
- bool unroll_to_simd8 =
- (dispatch_width == 16 &&
- (devinfo->gen == 4 || devinfo->gen == 6 ||
- (devinfo->gen == 7 && !devinfo->is_haswell)));
-
/* produce accurate derivatives */
struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
src.negate, src.abs,
BRW_SWIZZLE_ZWZW, WRITEMASK_XYZW);
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_16);
- if (unroll_to_simd8) {
- brw_set_default_exec_size(p, BRW_EXECUTE_8);
- brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
- brw_ADD(p, firsthalf(dst), negate(firsthalf(src0)), firsthalf(src1));
- brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_ADD(p, sechalf(dst), negate(sechalf(src0)), sechalf(src1));
- } else {
- brw_ADD(p, dst, negate(src0), src1);
- }
+ brw_ADD(p, dst, negate(src0), src1);
brw_pop_insn_state(p);
} else {
/* replicate the derivative at the top-left pixel to other pixels */
gen6_resolve_implied_move(p, &header, inst->base_mrf);
brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
- brw_inst_set_qtr_control(p->devinfo, send, BRW_COMPRESSION_NONE);
+ brw_inst_set_compression(devinfo, send, false);
brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UW));
brw_set_src0(p, send, header);
if (devinfo->gen < 6)
brw_NOP(p);
this->dispatch_width = dispatch_width;
- if (dispatch_width == 16)
- brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
int start_offset = p->next_insn_offset;
int spill_count = 0, fill_count = 0;
case BRW_OPCODE_BFI1:
assert(devinfo->gen >= 7);
- /* The Haswell WaForceSIMD8ForBFIInstruction workaround says that we
- * should
- *
- * "Force BFI instructions to be executed always in SIMD8."
- */
- if (dispatch_width == 16 && devinfo->is_haswell) {
- brw_set_default_exec_size(p, BRW_EXECUTE_8);
- brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
- brw_BFI1(p, firsthalf(dst), firsthalf(src[0]), firsthalf(src[1]));
- brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_BFI1(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]));
- brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
- } else {
- brw_BFI1(p, dst, src[0], src[1]);
- }
+ brw_BFI1(p, dst, src[0], src[1]);
break;
case BRW_OPCODE_BFI2:
assert(devinfo->gen >= 7);
brw_set_default_access_mode(p, BRW_ALIGN_16);
- /* The Haswell WaForceSIMD8ForBFIInstruction workaround says that we
- * should
- *
- * "Force BFI instructions to be executed always in SIMD8."
- *
- * Otherwise we would be able to emit compressed instructions like we
- * do for the other three-source instructions.
- */
- if (dispatch_width == 16 && devinfo->is_haswell) {
- brw_set_default_exec_size(p, BRW_EXECUTE_8);
- brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
- brw_BFI2(p, firsthalf(dst), firsthalf(src[0]), firsthalf(src[1]), firsthalf(src[2]));
- brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_BFI2(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
- brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
- } else {
- brw_BFI2(p, dst, src[0], src[1], src[2]);
- }
- brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_BFI2(p, dst, src[0], src[1], src[2]);
break;
case BRW_OPCODE_IF:
brw_broadcast(p, dst, src[0], src[1]);
break;
- case SHADER_OPCODE_EXTRACT_BYTE: {
- assert(src[0].type == BRW_REGISTER_TYPE_D ||
- src[0].type == BRW_REGISTER_TYPE_UD);
-
- enum brw_reg_type type =
- src[0].type == BRW_REGISTER_TYPE_D ? BRW_REGISTER_TYPE_B
- : BRW_REGISTER_TYPE_UB;
- brw_MOV(p, dst, spread(suboffset(retype(src[0], type), src[1].ud), 4));
- break;
- }
-
- case SHADER_OPCODE_EXTRACT_WORD: {
- assert(src[0].type == BRW_REGISTER_TYPE_D ||
- src[0].type == BRW_REGISTER_TYPE_UD);
-
- enum brw_reg_type type =
- src[0].type == BRW_REGISTER_TYPE_D ? BRW_REGISTER_TYPE_W
- : BRW_REGISTER_TYPE_UW;
- brw_MOV(p, dst, spread(suboffset(retype(src[0], type), src[1].ud), 2));
- break;
- }
-
case FS_OPCODE_SET_SAMPLE_ID:
generate_set_sample_id(inst, dst, src[0], src[1]);
break;
generate_barrier(inst, src[0]);
break;
- case FS_OPCODE_PACK_STENCIL_REF:
- generate_stencil_ref_packing(inst, dst, src[0]);
- break;
-
default:
unreachable("Unsupported opcode");