i965/fs: optimize pack double
authorIago Toral Quiroga <itoral@igalia.com>
Tue, 19 Apr 2016 10:59:47 +0000 (12:59 +0200)
committerSamuel Iglesias Gonsálvez <siglesias@igalia.com>
Tue, 10 May 2016 09:25:06 +0000 (11:25 +0200)
When we are actually creating a double using values obtained from a
previous unpack operation we can bypass the unpack and source from
the original double value directly.

v2:
- Style changes (Topi)
- Bail is parent instruction's src is not SSA (Connor)

v3: Use subscript() instead of stride() (Curro)

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_fs_nir.cpp

index 5b5a7667bbb2f74cb9b3b180496951fb885f2251..afa3308135ee5884309b3e024b75dacf6f101b64 100644 (file)
@@ -1099,6 +1099,35 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
       break;
 
    case nir_op_pack_double_2x32_split:
+      /* Optimize the common case where we are re-packing a double with
+       * the result of a previous double unpack. In this case we can take the
+       * 32-bit value to use in the re-pack from the original double and bypass
+       * the unpack operation.
+       */
+      for (int i = 0; i < 2; i++) {
+         if (instr->src[i].src.is_ssa)
+            continue;
+
+         const nir_instr *parent_instr = instr->src[i].src.ssa->parent_instr;
+         if (parent_instr->type == nir_instr_type_alu)
+            continue;
+
+         const nir_alu_instr *alu_parent = nir_instr_as_alu(parent_instr);
+         if (alu_parent->op == nir_op_unpack_double_2x32_split_x ||
+             alu_parent->op == nir_op_unpack_double_2x32_split_y)
+            continue;
+
+         if (!alu_parent->src[0].src.is_ssa)
+            continue;
+
+         op[i] = get_nir_src(alu_parent->src[0].src);
+         op[i] = offset(retype(op[i], BRW_REGISTER_TYPE_DF), bld,
+                        alu_parent->src[0].swizzle[channel]);
+         if (alu_parent->op == nir_op_unpack_double_2x32_split_y)
+            op[i] = subscript(op[i], BRW_REGISTER_TYPE_UD, 1);
+         else
+            op[i] = subscript(op[i], BRW_REGISTER_TYPE_UD, 0);
+      }
       bld.emit(FS_OPCODE_PACK, result, op[0], op[1]);
       break;