}
}
+/**
+ * Return true if splitting out the group of channels of instruction \p inst
+ * given by lbld.group() requires allocating a temporary for the i-th source
+ * of the lowered instruction.
+ */
+static inline bool
+needs_src_copy(const fs_builder &lbld, const fs_inst *inst, unsigned i)
+{
+ return !(is_periodic(inst->src[i], lbld.dispatch_width()) ||
+ (inst->components_read(i) == 1 &&
+ lbld.dispatch_width() <= inst->exec_size));
+}
+
/**
* Extract the data that would be consumed by the channel group given by
* lbld.group() from the i-th source region of instruction \p inst and return
/* Specified channel group from the source region. */
const fs_reg src = horiz_offset(inst->src[i], lbld.group());
- if (!is_periodic(inst->src[i], lbld.dispatch_width())) {
+ if (needs_src_copy(lbld, inst, i)) {
/* Builder of the right width to perform the copy avoiding uninitialized
* data if the lowered execution size is greater than the original
* execution size of the instruction.
return tmp;
- } else {
+ } else if (is_periodic(inst->src[i], lbld.dispatch_width())) {
/* The source is invariant for all dispatch_width-wide groups of the
* original region.
*/
return inst->src[i];
+
+ } else {
+ /* We can just point the lowered instruction at the right channel group
+ * from the original region.
+ */
+ return src;
}
}