opcode == BRW_OPCODE_NOT);
}
+/**
+ * Get the origin of a copy as a single register if all components present in
+ * the given readmask originate from the same register and have compatible
+ * regions, otherwise return a BAD_FILE register.
+ */
+static src_reg
+get_copy_value(const copy_entry &entry, unsigned readmask)
+{
+ unsigned swz[4] = {};
+ src_reg value;
+
+ for (unsigned i = 0; i < 4; i++) {
+ if (readmask & (1 << i)) {
+ if (entry.value[i]) {
+ src_reg src = *entry.value[i];
+
+ if (src.file == IMM) {
+ swz[i] = i;
+ } else {
+ swz[i] = BRW_GET_SWZ(src.swizzle, i);
+ /* Overwrite the original swizzle so the src_reg::equals call
+ * below doesn't care about it, the correct swizzle will be
+ * calculated once the swizzles of all components are known.
+ */
+ src.swizzle = BRW_SWIZZLE_XYZW;
+ }
+
+ if (value.file == BAD_FILE) {
+ value = src;
+ } else if (!value.equals(src)) {
+ return src_reg();
+ }
+ } else {
+ return src_reg();
+ }
+ }
+ }
+
+ return swizzle(value,
+ brw_compose_swizzle(brw_swizzle_for_mask(readmask),
+ BRW_SWIZZLE4(swz[0], swz[1],
+ swz[2], swz[3])));
+}
+
static bool
try_constant_propagate(const struct brw_device_info *devinfo,
vec4_instruction *inst,
- int arg, struct copy_entry *entry)
+ int arg, const copy_entry *entry)
{
/* For constant propagation, we only handle the same constant
* across all 4 channels. Some day, we should handle the 8-bit
* float vector format, which would let us constant propagate
* vectors better.
+ * We could be more aggressive here -- some channels might not get used
+ * based on the destination writemask.
*/
- src_reg value = *entry->value[0];
- for (int i = 1; i < 4; i++) {
- if (!value.equals(*entry->value[i]))
- return false;
- }
+ src_reg value =
+ get_copy_value(*entry,
+ brw_apply_inv_swizzle_to_mask(inst->src[arg].swizzle,
+ WRITEMASK_XYZW));
if (value.file != IMM)
return false;
static bool
try_copy_propagate(const struct brw_device_info *devinfo,
vec4_instruction *inst, int arg,
- struct copy_entry *entry, int attributes_per_reg)
+ const copy_entry *entry, int attributes_per_reg)
{
/* Build up the value we are propagating as if it were the source of a
* single MOV
*/
- /* For constant propagation, we only handle the same constant
- * across all 4 channels. Some day, we should handle the 8-bit
- * float vector format, which would let us constant propagate
- * vectors better.
- */
- src_reg value = *entry->value[0];
- for (int i = 1; i < 4; i++) {
- /* This is equals() except we don't care about the swizzle. */
- if (value.file != entry->value[i]->file ||
- value.nr != entry->value[i]->nr ||
- value.reg_offset != entry->value[i]->reg_offset ||
- value.type != entry->value[i]->type ||
- value.negate != entry->value[i]->negate ||
- value.abs != entry->value[i]->abs) {
- return false;
- }
- }
-
- /* Compute the swizzle of the original register by swizzling the
- * component loaded from each value according to the swizzle of
- * operand we're going to change.
- */
- int s[4];
- for (int i = 0; i < 4; i++) {
- s[i] = BRW_GET_SWZ(entry->value[i]->swizzle, i);
- }
- value.swizzle = BRW_SWIZZLE4(s[0], s[1], s[2], s[3]);
+ src_reg value =
+ get_copy_value(*entry,
+ brw_apply_inv_swizzle_to_mask(inst->src[arg].swizzle,
+ WRITEMASK_XYZW));
/* Check that we can propagate that value */
if (value.file != UNIFORM &&
if (inst->regs_read(i) != 1)
continue;
- int reg = (alloc.offsets[inst->src[i].nr] +
- inst->src[i].reg_offset);
-
- /* Find the regs that each swizzle component came from.
- */
- struct copy_entry entry;
- memset(&entry, 0, sizeof(copy_entry));
- int c;
- for (c = 0; c < 4; c++) {
- int channel = BRW_GET_SWZ(inst->src[i].swizzle, c);
- entry.value[c] = entries[reg].value[channel];
-
- /* If there's no available copy for this channel, bail.
- * We could be more aggressive here -- some channels might
- * not get used based on the destination writemask.
- */
- if (!entry.value[c])
- break;
-
- entry.saturatemask |=
- (entries[reg].saturatemask & (1 << channel) ? 1 : 0) << c;
-
- /* We'll only be able to copy propagate if the sources are
- * all from the same file -- there's no ability to swizzle
- * 0 or 1 constants in with source registers like in i915.
- */
- if (c > 0 && entry.value[c - 1]->file != entry.value[c]->file)
- break;
- }
-
- if (c != 4)
- continue;
+ const unsigned reg = (alloc.offsets[inst->src[i].nr] +
+ inst->src[i].reg_offset);
+ const copy_entry &entry = entries[reg];
if (do_constant_prop && try_constant_propagate(devinfo, inst, i, &entry))
progress = true;