i965/fs_nir: Get rid of get_alu_src

author Jason Ekstrand <jason.ekstrand@intel.com>

Thu, 22 Jan 2015 00:00:55 +0000 (16:00 -0800)

committer Jason Ekstrand <jason.ekstrand@intel.com>

Tue, 3 Feb 2015 20:33:11 +0000 (12:33 -0800)
author Jason Ekstrand <jason.ekstrand@intel.com>
Thu, 22 Jan 2015 00:00:55 +0000 (16:00 -0800)
committer Jason Ekstrand <jason.ekstrand@intel.com>
Tue, 3 Feb 2015 20:33:11 +0000 (12:33 -0800)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h

index 25197cd6ba718f09376903f10ea2d344a83d4888..b95e2c03e137a0305ce0f70c6da1f2637d23fff0 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -589,7 +589,6 @@ public:
     void nir_emit_texture(nir_tex_instr *instr);
     void nir_emit_jump(nir_jump_instr *instr);
     fs_reg get_nir_src(nir_src src);
-   fs_reg get_nir_alu_src(nir_alu_instr *instr, unsigned src);
     fs_reg get_nir_dest(nir_dest dest);
     void emit_percomp(fs_inst *inst, unsigned wr_mask);
  
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp

index e88c71bc0e87cc3bf8002880b7fc6325707c96b4..90980c78800ae9021f9fc0d8d5f6b15ef17200a4 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -139,8 +139,6 @@ fs_visitor::emit_nir_code()
  
     nir_convert_from_ssa(nir);
     nir_validate_shader(nir);
-   nir_lower_vec_to_movs(nir);
-   nir_validate_shader(nir);
  
     /* emit the arrays used for inputs and outputs - load/store intrinsics will
      * be converted to reads/writes of these arrays
@@ -418,6 +416,7 @@ fs_visitor::nir_emit_impl(nir_function_impl *impl)
  void
  fs_visitor::nir_emit_cf_list(exec_list *list)
  {
+   exec_list_validate(list);
     foreach_list_typed(nir_cf_node, node, node, list) {
        switch (node->type) {
        case nir_cf_node_if:
@@ -540,34 +539,97 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
     struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key;
     fs_inst *inst;
  
-   fs_reg op[3];
     fs_reg result = get_nir_dest(instr->dest.dest);
     result.type = brw_type_for_nir_type(nir_op_infos[instr->op].output_type);
  
-   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
-      op[i] = get_nir_alu_src(instr, i);
+   fs_reg op[4];
+   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+      op[i] = get_nir_src(instr->src[i].src);
+      op[i].type = brw_type_for_nir_type(nir_op_infos[instr->op].input_types[i]);
+      op[i].abs = instr->src[i].abs;
+      op[i].negate = instr->src[i].negate;
+   }
+
+   /* We get a bunch of mov's out of the from_ssa pass and they may still
+    * be vectorized.  We'll handle them as a special-case.  We'll also
+    * handle vecN here because it's basically the same thing.
+    */
+   switch (instr->op) {
+   case nir_op_imov:
+   case nir_op_fmov:
+   case nir_op_vec2:
+   case nir_op_vec3:
+   case nir_op_vec4: {
+      fs_reg temp = result;
+      bool need_extra_copy = false;
+      for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+         if (!instr->src[i].src.is_ssa &&
+             instr->dest.dest.reg.reg == instr->src[i].src.reg.reg) {
+            need_extra_copy = true;
+            temp = retype(vgrf(4), result.type);
+            break;
+         }
+      }
+
+      for (unsigned i = 0; i < 4; i++) {
+         if (!(instr->dest.write_mask & (1 << i)))
+            continue;
+
+         if (instr->op == nir_op_imov || instr->op == nir_op_fmov) {
+            inst = emit(MOV(offset(temp, i),
+                        offset(op[0], instr->src[0].swizzle[i])));
+         } else {
+            inst = emit(MOV(offset(temp, i),
+                        offset(op[i], instr->src[i].swizzle[0])));
+         }
+         inst->saturate = instr->dest.saturate;
+      }
+
+      /* In this case the source and destination registers were the same,
+       * so we need to insert an extra set of moves in order to deal with
+       * any swizzling.
+       */
+      if (need_extra_copy) {
+         for (unsigned i = 0; i < 4; i++) {
+            if (!(instr->dest.write_mask & (1 << i)))
+               continue;
+
+            emit(MOV(offset(result, i), offset(temp, i)));
+         }
+      }
+      return;
+   }
+   default:
+      break;
+   }
  
+   /* At this point, we have dealt with any instruction that operates on
+    * more than a single channel.  Therefore, we can just adjust the source
+    * and destination registers for that channel and emit the instruction.
+    */
+   unsigned channel = 0;
     if (nir_op_infos[instr->op].output_size == 0) {
-      /* We've already scalarized, so we know that we only have one
-       * channel.  The only question is which channel.
+      /* Since NIR is doing the scalarizing for us, we should only ever see
+       * vectorized operations with a single channel.
         */
        assert(_mesa_bitcount(instr->dest.write_mask) == 1);
-      unsigned off = ffs(instr->dest.write_mask) - 1;
-      result = offset(result, off);
+      channel = ffs(instr->dest.write_mask) - 1;
+
+      result = offset(result, channel);
+   }
  
-      for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
-         op[i] = offset(op[i], off);
+   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+      assert(nir_op_infos[instr->op].input_sizes[i] < 2);
+      op[i] = offset(op[i], instr->src[i].swizzle[channel]);
     }
  
     switch (instr->op) {
-   case nir_op_fmov:
     case nir_op_i2f:
     case nir_op_u2f:
        inst = emit(MOV(result, op[0]));
        inst->saturate = instr->dest.saturate;
        break;
  
-   case nir_op_imov:
     case nir_op_f2i:
     case nir_op_f2u:
        emit(MOV(result, op[0]));
@@ -820,11 +882,6 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
     case nir_op_fnoise4_4:
        unreachable("not reached: should be handled by lower_noise");
  
-   case nir_op_vec2:
-   case nir_op_vec3:
-   case nir_op_vec4:
-      unreachable("not reached: should be handled by lower_quadop_vector");
-
     case nir_op_ldexp:
        unreachable("not reached: should be handled by ldexp_to_arith()");
  
@@ -1044,46 +1101,6 @@ fs_visitor::get_nir_src(nir_src src)
     }
  }
  
-fs_reg
-fs_visitor::get_nir_alu_src(nir_alu_instr *instr, unsigned src)
-{
-   fs_reg reg = get_nir_src(instr->src[src].src);
-
-   reg.type = brw_type_for_nir_type(nir_op_infos[instr->op].input_types[src]);
-   reg.abs = instr->src[src].abs;
-   reg.negate = instr->src[src].negate;
-
-   bool needs_swizzle = false;
-   unsigned num_components = 0;
-   for (unsigned i = 0; i < 4; i++) {
-      if (!nir_alu_instr_channel_used(instr, src, i))
-         continue;
-
-      if (instr->src[src].swizzle[i] != i)
-         needs_swizzle = true;
-
-      num_components = i + 1;
-   }
-
-   if (needs_swizzle) {
-      /* resolve the swizzle through MOV's */
-      fs_reg new_reg = vgrf(num_components);
-      new_reg.type = reg.type;
-
-      for (unsigned i = 0; i < 4; i++) {
-         if (!nir_alu_instr_channel_used(instr, src, i))
-            continue;
-
-         emit(MOV(offset(new_reg, i),
-                  offset(reg, instr->src[src].swizzle[i])));
-      }
-
-      return new_reg;
-   }
-
-   return reg;
-}
-
  fs_reg
  fs_visitor::get_nir_dest(nir_dest dest)
  {
author	Jason Ekstrand <jason.ekstrand@intel.com>
	Thu, 22 Jan 2015 00:00:55 +0000 (16:00 -0800)
committer	Jason Ekstrand <jason.ekstrand@intel.com>
	Tue, 3 Feb 2015 20:33:11 +0000 (12:33 -0800)
src/mesa/drivers/dri/i965/brw_fs.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_nir.cpp		patch \| blob \| history