* it.
*/
bool
-fs_inst::is_partial_write() const
+fs_inst::is_partial_reg_write() const
{
return ((this->predicate && this->opcode != BRW_OPCODE_SEL) ||
- (this->exec_size * type_sz(this->dst.type)) < 32 ||
!this->dst.is_contiguous() ||
+ (this->exec_size * type_sz(this->dst.type)) < REG_SIZE ||
this->dst.offset % REG_SIZE != 0);
}
+/**
+ * Returns true if the instruction has a flag that means it won't
+ * update an entire variable for the given dispatch width.
+ *
+ * This is only different from is_partial_reg_write() for SIMD8
+ * dispatches of 16-bit (or smaller) instructions.
+ */
+bool
+fs_inst::is_partial_var_write(uint32_t dispatch_width) const
+{
+ const uint32_t type_size = type_sz(this->dst.type);
+ uint32_t var_size = MIN2(REG_SIZE, dispatch_width * type_size);
+
+ return ((this->predicate && this->opcode != BRW_OPCODE_SEL) ||
+ !this->dst.is_contiguous() ||
+ (this->exec_size * type_sz(this->dst.type)) < var_size ||
+ this->dst.offset % var_size != 0);
+}
+
unsigned
fs_inst::components_read(unsigned i) const
{
break;
}
- /* a * 0.0 = 0.0 */
- if (inst->src[1].is_zero()) {
- inst->opcode = BRW_OPCODE_MOV;
- inst->src[0] = inst->src[1];
- inst->src[1] = reg_undef;
- progress = true;
- break;
- }
-
if (inst->src[0].file == IMM) {
assert(inst->src[0].type == BRW_REGISTER_TYPE_F);
inst->opcode = BRW_OPCODE_MOV;
if (inst->src[1].file != IMM)
continue;
- /* a + 0.0 = a */
- if (inst->src[1].is_zero()) {
- inst->opcode = BRW_OPCODE_MOV;
- inst->src[1] = reg_undef;
- progress = true;
- break;
- }
-
if (inst->src[0].file == IMM) {
assert(inst->src[0].type == BRW_REGISTER_TYPE_F);
inst->opcode = BRW_OPCODE_MOV;
break;
}
break;
- case BRW_OPCODE_LRP:
- if (inst->src[1].equals(inst->src[2])) {
- inst->opcode = BRW_OPCODE_MOV;
- inst->src[0] = inst->src[1];
- inst->src[1] = reg_undef;
- inst->src[2] = reg_undef;
- progress = true;
- break;
- }
- break;
case BRW_OPCODE_CMP:
if ((inst->conditional_mod == BRW_CONDITIONAL_Z ||
inst->conditional_mod == BRW_CONDITIONAL_NZ) &&
}
break;
case BRW_OPCODE_MAD:
- if (inst->src[1].is_zero() || inst->src[2].is_zero()) {
- inst->opcode = BRW_OPCODE_MOV;
- inst->src[1] = reg_undef;
- inst->src[2] = reg_undef;
- progress = true;
- } else if (inst->src[0].is_zero()) {
- inst->opcode = BRW_OPCODE_MUL;
- inst->src[0] = inst->src[2];
- inst->src[2] = reg_undef;
- progress = true;
- } else if (inst->src[1].is_one()) {
+ if (inst->src[0].type != BRW_REGISTER_TYPE_F ||
+ inst->src[1].type != BRW_REGISTER_TYPE_F ||
+ inst->src[2].type != BRW_REGISTER_TYPE_F)
+ break;
+ if (inst->src[1].is_one()) {
inst->opcode = BRW_OPCODE_ADD;
inst->src[1] = inst->src[2];
inst->src[2] = reg_undef;
inst->opcode = BRW_OPCODE_ADD;
inst->src[2] = reg_undef;
progress = true;
- } else if (inst->src[1].file == IMM && inst->src[2].file == IMM) {
- inst->opcode = BRW_OPCODE_ADD;
- inst->src[1].f *= inst->src[2].f;
- inst->src[2] = reg_undef;
- progress = true;
}
break;
case SHADER_OPCODE_BROADCAST:
if (depth == 0 &&
inst->dst.file == VGRF &&
alloc.sizes[inst->dst.nr] * REG_SIZE == inst->size_written &&
- !inst->is_partial_write()) {
+ !inst->is_partial_reg_write()) {
if (remap[dst] == ~0u) {
remap[dst] = dst;
} else {
next_ip++;
if (inst->opcode != BRW_OPCODE_MOV ||
- inst->is_partial_write() ||
+ inst->is_partial_reg_write() ||
inst->dst.file != MRF || inst->src[0].file != VGRF ||
inst->dst.type != inst->src[0].type ||
inst->src[0].abs || inst->src[0].negate ||
* that writes that reg, but it would require smarter
* tracking.
*/
- if (scan_inst->is_partial_write())
+ if (scan_inst->is_partial_reg_write())
break;
/* Handling things not fully contained in the source of the copy
if (inst->opcode == BRW_OPCODE_MOV &&
inst->dst.file == MRF &&
inst->src[0].file != ARF &&
- !inst->is_partial_write()) {
+ !inst->is_partial_reg_write()) {
last_mrf_move[inst->dst.nr] = inst;
}
}
return progress;
}
+static bool
+is_mixed_float_with_fp32_dst(const fs_inst *inst)
+{
+ /* This opcode sometimes uses :W type on the source even if the operand is
+ * a :HF, because in gen7 there is no support for :HF, and thus it uses :W.
+ */
+ if (inst->opcode == BRW_OPCODE_F16TO32)
+ return true;
+
+ if (inst->dst.type != BRW_REGISTER_TYPE_F)
+ return false;
+
+ for (int i = 0; i < inst->sources; i++) {
+ if (inst->src[i].type == BRW_REGISTER_TYPE_HF)
+ return true;
+ }
+
+ return false;
+}
+
+static bool
+is_mixed_float_with_packed_fp16_dst(const fs_inst *inst)
+{
+ /* This opcode sometimes uses :W type on the destination even if the
+ * destination is a :HF, because in gen7 there is no support for :HF, and
+ * thus it uses :W.
+ */
+ if (inst->opcode == BRW_OPCODE_F32TO16 &&
+ inst->dst.stride == 1)
+ return true;
+
+ if (inst->dst.type != BRW_REGISTER_TYPE_HF ||
+ inst->dst.stride != 1)
+ return false;
+
+ for (int i = 0; i < inst->sources; i++) {
+ if (inst->src[i].type == BRW_REGISTER_TYPE_F)
+ return true;
+ }
+
+ return false;
+}
+
/**
* Get the closest allowed SIMD width for instruction \p inst accounting for
* some common regioning and execution control restrictions that apply to FPU
max_width = MIN2(max_width, 4);
}
+ /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
+ * Float Operations:
+ *
+ * "No SIMD16 in mixed mode when destination is f32. Instruction
+ * execution size must be no more than 8."
+ *
+ * FIXME: the simulator doesn't seem to complain if we don't do this and
+ * empirical testing with existing CTS tests show that they pass just fine
+ * without implementing this, however, since our interpretation of the PRM
+ * is that conversion MOVs between HF and F are still mixed-float
+ * instructions (and therefore subject to this restriction) we decided to
+ * split them to be safe. Might be useful to do additional investigation to
+ * lift the restriction if we can ensure that it is safe though, since these
+ * conversions are common when half-float types are involved since many
+ * instructions do not support HF types and conversions from/to F are
+ * required.
+ */
+ if (is_mixed_float_with_fp32_dst(inst))
+ max_width = MIN2(max_width, 8);
+
+ /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
+ * Float Operations:
+ *
+ * "No SIMD16 in mixed mode when destination is packed f16 for both
+ * Align1 and Align16."
+ */
+ if (is_mixed_float_with_packed_fp16_dst(inst))
+ max_width = MIN2(max_width, 8);
+
/* Only power-of-two execution sizes are representable in the instruction
* control fields.
*/
case SHADER_OPCODE_EXP2:
case SHADER_OPCODE_LOG2:
case SHADER_OPCODE_SIN:
- case SHADER_OPCODE_COS:
+ case SHADER_OPCODE_COS: {
/* Unary extended math instructions are limited to SIMD8 on Gen4 and
- * Gen6.
+ * Gen6. Extended Math Function is limited to SIMD8 with half-float.
*/
- return (devinfo->gen >= 7 ? MIN2(16, inst->exec_size) :
- devinfo->gen == 5 || devinfo->is_g4x ? MIN2(16, inst->exec_size) :
- MIN2(8, inst->exec_size));
+ if (devinfo->gen == 6 || (devinfo->gen == 4 && !devinfo->is_g4x))
+ return MIN2(8, inst->exec_size);
+ if (inst->dst.type == BRW_REGISTER_TYPE_HF)
+ return MIN2(8, inst->exec_size);
+ return MIN2(16, inst->exec_size);
+ }
- case SHADER_OPCODE_POW:
- /* SIMD16 is only allowed on Gen7+. */
- return (devinfo->gen >= 7 ? MIN2(16, inst->exec_size) :
- MIN2(8, inst->exec_size));
+ case SHADER_OPCODE_POW: {
+ /* SIMD16 is only allowed on Gen7+. Extended Math Function is limited
+ * to SIMD8 with half-float
+ */
+ if (devinfo->gen < 7)
+ return MIN2(8, inst->exec_size);
+ if (inst->dst.type == BRW_REGISTER_TYPE_HF)
+ return MIN2(8, inst->exec_size);
+ return MIN2(16, inst->exec_size);
+ }
case SHADER_OPCODE_INT_QUOTIENT:
case SHADER_OPCODE_INT_REMAINDER: