reg_size);
}
+static inline enum brw_reg_type
+get_exec_type(const vec4_instruction *inst)
+{
+ enum brw_reg_type exec_type = BRW_REGISTER_TYPE_B;
+
+ for (int i = 0; i < 3; i++) {
+ if (inst->src[i].file != BAD_FILE) {
+ const brw_reg_type t = get_exec_type(brw_reg_type(inst->src[i].type));
+ if (type_sz(t) > type_sz(exec_type))
+ exec_type = t;
+ else if (type_sz(t) == type_sz(exec_type) &&
+ brw_reg_type_is_floating_point(t))
+ exec_type = t;
+ }
+ }
+
+ if (exec_type == BRW_REGISTER_TYPE_B)
+ exec_type = inst->dst.type;
+
+ /* TODO: We need to handle half-float conversions. */
+ assert(exec_type != BRW_REGISTER_TYPE_HF ||
+ inst->dst.type == BRW_REGISTER_TYPE_HF);
+ assert(exec_type != BRW_REGISTER_TYPE_B);
+
+ return exec_type;
+}
+
+static inline unsigned
+get_exec_type_size(const vec4_instruction *inst)
+{
+ return type_sz(get_exec_type(inst));
+}
+
} /* namespace brw */
#endif
}
}
+ /* IvyBridge can manage a maximum of 4 DFs per SIMD4x2 instruction, since
+ * it doesn't support compression in Align16 mode, no matter if it has
+ * force_writemask_all enabled or disabled (the latter is affected by the
+ * compressed instruction bug in gen7, which is another reason to enforce
+ * this limit).
+ */
+ if (devinfo->gen == 7 && !devinfo->is_haswell &&
+ (get_exec_type_size(inst) == 8 || type_sz(inst->dst.type) == 8))
+ lowered_width = MIN2(lowered_width, 4);
+
return lowered_width;
}
brw_set_default_saturate(p, inst->saturate);
brw_set_default_mask_control(p, inst->force_writemask_all);
brw_set_default_acc_write_control(p, inst->writes_accumulator);
- brw_set_default_exec_size(p, cvt(inst->exec_size) - 1);
assert(inst->group % inst->exec_size == 0);
assert(inst->group % 8 == 0 ||
inst->src[0].type == BRW_REGISTER_TYPE_DF ||
inst->src[1].type == BRW_REGISTER_TYPE_DF ||
inst->src[2].type == BRW_REGISTER_TYPE_DF);
+
+ unsigned exec_size = inst->exec_size;
+ if (devinfo->gen == 7 &&
+ !devinfo->is_haswell &&
+ (get_exec_type_size(inst) == 8 ||
+ inst->dst.type == BRW_REGISTER_TYPE_DF))
+ exec_size *= 2;
+
+ brw_set_default_exec_size(p, cvt(exec_size) - 1);
+
if (!inst->force_writemask_all)
brw_set_default_group(p, inst->group);