intel/peephole_ffma: Fix swizzle propagation
authorJason Ekstrand <jason.ekstrand@intel.com>
Wed, 17 Oct 2018 16:34:32 +0000 (11:34 -0500)
committerJason Ekstrand <jason.ekstrand@intel.com>
Fri, 11 Jan 2019 16:44:08 +0000 (10:44 -0600)
The num_components value passed into get_mul_for_src is used to only
compose the parts of the swizzle that we know will be used so we don't
compose invalid swizzle components.  However, we had a bug where we
passed the number of components of the add all the way through.  For the
given source, we need the number of components read from that source.
In the case where we have a narrow add, say 2 components, that is
sourced from a chain of wider instructions, we may not compose all the
swizzles.  All we really need to do is pass through the right number of
components at each level.

Fixes: 2231cf0ba3a "nir: Fix output swizzle in get_mul_for_src"
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
src/intel/compiler/brw_nir_opt_peephole_ffma.c

index cc225e1847b289a3551f674855290701eb6b180b..7271bdbca4363b7765d0c5d87f36d093f86f9378 100644 (file)
@@ -68,7 +68,7 @@ are_all_uses_fadd(nir_ssa_def *def)
 }
 
 static nir_alu_instr *
-get_mul_for_src(nir_alu_src *src, int num_components,
+get_mul_for_src(nir_alu_src *src, unsigned num_components,
                 uint8_t swizzle[4], bool *negate, bool *abs)
 {
    uint8_t swizzle_tmp[4];
@@ -93,16 +93,19 @@ get_mul_for_src(nir_alu_src *src, int num_components,
    switch (alu->op) {
    case nir_op_imov:
    case nir_op_fmov:
-      alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);
+      alu = get_mul_for_src(&alu->src[0], alu->dest.dest.ssa.num_components,
+                            swizzle, negate, abs);
       break;
 
    case nir_op_fneg:
-      alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);
+      alu = get_mul_for_src(&alu->src[0], alu->dest.dest.ssa.num_components,
+                            swizzle, negate, abs);
       *negate = !*negate;
       break;
 
    case nir_op_fabs:
-      alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);
+      alu = get_mul_for_src(&alu->src[0], alu->dest.dest.ssa.num_components,
+                            swizzle, negate, abs);
       *negate = false;
       *abs = true;
       break;