nir: Vectorize intrinsics
authorJason Ekstrand <jason.ekstrand@intel.com>
Thu, 4 Dec 2014 01:03:19 +0000 (17:03 -0800)
committerJason Ekstrand <jason.ekstrand@intel.com>
Thu, 15 Jan 2015 15:19:03 +0000 (07:19 -0800)
We used to have the number of components built into the intrinsic.  This
meant that all of our load/store intrinsics had vec1, vec2, vec3, and vec4
variants.  This lead to piles of switch statements to generate the correct
intrinsic names, and introspection to figure out the number of components.
We can make things much nicer by allowing "vectorized" intrinsics.

Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
src/glsl/nir/glsl_to_nir.cpp
src/glsl/nir/nir.h
src/glsl/nir/nir_intrinsics.h
src/glsl/nir/nir_lower_io.c
src/glsl/nir/nir_lower_locals_to_regs.c
src/glsl/nir/nir_lower_system_values.c
src/glsl/nir/nir_lower_variables.c
src/glsl/nir/nir_validate.c
src/mesa/drivers/dri/i965/brw_fs_nir.cpp

index 06f50ac63026cbb174641cda4590b647f3270b8d..25873d90e02be593d1e85b3586122d3b213e58df 100644 (file)
@@ -625,7 +625,8 @@ nir_visitor::visit(ir_call *ir)
       nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
 
       nir_intrinsic_instr *store_instr =
-         nir_intrinsic_instr_create(shader, nir_intrinsic_store_var_vec1);
+         nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
+      store_instr->num_components = 1;
 
       ir->return_deref->accept(this);
       store_instr->variables[0] = this->deref_head;
@@ -699,17 +700,9 @@ nir_visitor::visit(ir_assignment *ir)
        * back into the LHS. Copy propagation should get rid of the mess.
        */
 
-      nir_intrinsic_op load_op;
-      switch (ir->lhs->type->vector_elements) {
-         case 1: load_op = nir_intrinsic_load_var_vec1; break;
-         case 2: load_op = nir_intrinsic_load_var_vec2; break;
-         case 3: load_op = nir_intrinsic_load_var_vec3; break;
-         case 4: load_op = nir_intrinsic_load_var_vec4; break;
-         default: unreachable("Invalid number of components"); break;
-      }
-
-      nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader,
-                                                             load_op);
+      nir_intrinsic_instr *load =
+         nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var);
+      load->num_components = ir->lhs->type->vector_elements;
       load->dest.is_ssa = true;
       nir_ssa_def_init(&load->instr, &load->dest.ssa,
                        num_components, NULL);
@@ -754,17 +747,9 @@ nir_visitor::visit(ir_assignment *ir)
       src.ssa = &vec->dest.dest.ssa;
    }
 
-   nir_intrinsic_op store_op;
-   switch (ir->lhs->type->vector_elements) {
-      case 1: store_op = nir_intrinsic_store_var_vec1; break;
-      case 2: store_op = nir_intrinsic_store_var_vec2; break;
-      case 3: store_op = nir_intrinsic_store_var_vec3; break;
-      case 4: store_op = nir_intrinsic_store_var_vec4; break;
-      default: unreachable("Invalid number of components"); break;
-   }
-
-   nir_intrinsic_instr *store = nir_intrinsic_instr_create(this->shader,
-                                                           store_op);
+   nir_intrinsic_instr *store =
+      nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var);
+   store->num_components = ir->lhs->type->vector_elements;
    nir_deref *store_deref = nir_copy_deref(this->shader, &lhs_deref->deref);
    store->variables[0] = nir_deref_as_var(store_deref);
    store->src[0] = src;
@@ -843,17 +828,9 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir)
        * must emit a variable load.
        */
 
-      nir_intrinsic_op load_op;
-      switch (ir->type->vector_elements) {
-      case 1: load_op = nir_intrinsic_load_var_vec1; break;
-      case 2: load_op = nir_intrinsic_load_var_vec2; break;
-      case 3: load_op = nir_intrinsic_load_var_vec3; break;
-      case 4: load_op = nir_intrinsic_load_var_vec4; break;
-      default: unreachable("Invalid number of components");
-      }
-
       nir_intrinsic_instr *load_instr =
-         nir_intrinsic_instr_create(this->shader, load_op);
+         nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var);
+      load_instr->num_components = ir->type->vector_elements;
       load_instr->variables[0] = this->deref_head;
       add_instr(&load_instr->instr, ir->type->vector_elements);
    }
@@ -912,23 +889,12 @@ nir_visitor::visit(ir_expression *ir)
 
       nir_intrinsic_op op;
       if (const_index) {
-         switch (ir->type->vector_elements) {
-            case 1: op = nir_intrinsic_load_ubo_vec1; break;
-            case 2: op = nir_intrinsic_load_ubo_vec2; break;
-            case 3: op = nir_intrinsic_load_ubo_vec3; break;
-            case 4: op = nir_intrinsic_load_ubo_vec4; break;
-            default: assert(0); break;
-         }
+         op = nir_intrinsic_load_ubo;
       } else {
-         switch (ir->type->vector_elements) {
-            case 1: op = nir_intrinsic_load_ubo_vec1_indirect; break;
-            case 2: op = nir_intrinsic_load_ubo_vec2_indirect; break;
-            case 3: op = nir_intrinsic_load_ubo_vec3_indirect; break;
-            case 4: op = nir_intrinsic_load_ubo_vec4_indirect; break;
-            default: assert(0); break;
-         }
+         op = nir_intrinsic_load_ubo_indirect;
       }
       nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader, op);
+      load->num_components = ir->type->vector_elements;
       load->const_index[0] = ir->operands[0]->as_constant()->value.u[0];
       load->const_index[1] = const_index ? const_index->value.u[0] : 0; /* base offset */
       load->const_index[2] = 1; /* number of vec4's */
index a0a16c1d3a81f6bd202c5fa480cf2117baaebea8..ff1f7964fce75cf70f55549ecbcab652376093b0 100644 (file)
@@ -693,6 +693,9 @@ typedef struct {
 
    nir_dest dest;
 
+   /** number of components if this is a vectorized intrinsic */
+   uint8_t num_components;
+
    int const_index[3];
 
    nir_deref_var *variables[2];
@@ -732,12 +735,20 @@ typedef struct {
 
    unsigned num_srcs; /** < number of register/SSA inputs */
 
-   /** number of components of each input register */
+   /** number of components of each input register
+    *
+    * If this value is 0, the number of components is given by the
+    * num_components field of nir_intrinsic_instr.
+    */
    unsigned src_components[NIR_INTRINSIC_MAX_INPUTS];
 
    bool has_dest;
 
-   /** number of components of each output register */
+   /** number of components of the output register
+    *
+    * If this value is 0, the number of components is given by the
+    * num_components field of nir_intrinsic_instr.
+    */
    unsigned dest_components;
 
    /** the number of inputs/outputs that are variables */
index e4ad8cdc02a6d60ed3a65d9b9e00714d44b0abd4..75bd12f6a0a130f66f88d921c84e4ca5c93b6195 100644 (file)
 #define ARR(...) { __VA_ARGS__ }
 
 
-INTRINSIC(load_var_vec1,   0, ARR(), true, 1, 1, 0,
-          NIR_INTRINSIC_CAN_ELIMINATE)
-INTRINSIC(load_var_vec2,   0, ARR(), true, 2, 1, 0,
-          NIR_INTRINSIC_CAN_ELIMINATE)
-INTRINSIC(load_var_vec3,   0, ARR(), true, 3, 1, 0,
-          NIR_INTRINSIC_CAN_ELIMINATE)
-INTRINSIC(load_var_vec4,   0, ARR(), true, 4, 1, 0,
-          NIR_INTRINSIC_CAN_ELIMINATE)
-INTRINSIC(store_var_vec1, 1, ARR(1), false, 0, 1, 0, 0)
-INTRINSIC(store_var_vec2, 1, ARR(2), false, 0, 1, 0, 0)
-INTRINSIC(store_var_vec3, 1, ARR(3), false, 0, 1, 0, 0)
-INTRINSIC(store_var_vec4, 1, ARR(4), false, 0, 1, 0, 0)
-INTRINSIC(copy_var,       0, ARR(),  false, 0, 2, 0, 0)
+INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE)
+INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 0, 0)
+INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)
 
 /*
  * a barrier is an intrinsic with no inputs/outputs but which can't be moved
@@ -94,27 +84,6 @@ SYSTEM_VALUE(sample_pos, 2)
 SYSTEM_VALUE(sample_mask_in, 1)
 SYSTEM_VALUE(invocation_id, 1)
 
-#define LOAD_OR_INTERP(name, num_srcs, src_comps, num_indices, flags) \
-   INTRINSIC(name##_vec1, num_srcs, ARR(src_comps), true, 1, \
-             0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
-   INTRINSIC(name##_vec2, num_srcs, ARR(src_comps), true, 2, \
-             0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
-   INTRINSIC(name##_vec3, num_srcs, ARR(src_comps), true, 3, \
-             0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
-   INTRINSIC(name##_vec4, num_srcs, ARR(src_comps), true, 4, \
-             0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
-   INTRINSIC(name##_vec1_indirect, 1 + num_srcs, ARR(1, src_comps), true, 1, \
-             0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
-   INTRINSIC(name##_vec2_indirect, 1 + num_srcs, ARR(1, src_comps), true, 2, \
-             0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
-   INTRINSIC(name##_vec3_indirect, 1 + num_srcs, ARR(1, src_comps), true, 3, \
-             0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
-   INTRINSIC(name##_vec4_indirect, 1 + num_srcs, ARR(1, src_comps), true, 4, \
-             0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags)
-
-#define LOAD(name, num_indices, flags) \
-   LOAD_OR_INTERP(load_##name, 0, 0, num_indices, flags)
-
 /*
  * The first index is the address to load from, and the second index is the
  * number of array elements to load. For UBO's (and SSBO's), the first index
@@ -129,6 +98,12 @@ SYSTEM_VALUE(invocation_id, 1)
  * elements begin immediately after the previous array element.
  */
 
+#define LOAD(name, num_indices, flags) \
+   INTRINSIC(load_##name, 0, ARR(), true, 0, 0, num_indices, \
+             NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \
+   INTRINSIC(load_##name##_indirect, 1, ARR(1), true, 0, 0, num_indices, \
+             NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \
+
 LOAD(uniform, 2, NIR_INTRINSIC_CAN_REORDER)
 LOAD(ubo, 3, NIR_INTRINSIC_CAN_REORDER)
 LOAD(input, 2, NIR_INTRINSIC_CAN_REORDER)
@@ -140,29 +115,16 @@ LOAD(input, 2, NIR_INTRINSIC_CAN_REORDER)
  * interp_at_offset* intrinsics take a second source that is either a
  * sample id or a vec2 position offset.
  */
-#define INTERP(name, flags) \
-   LOAD_OR_INTERP(interp_##name, 0, 0, 2, flags)
-
-#define INTERP_WITH_ARG(name, src_comps, flags) \
-   LOAD_OR_INTERP(interp_##name, 1, src_comps, 2, flags)
 
-INTERP(at_centroid, NIR_INTRINSIC_CAN_REORDER)
-INTERP_WITH_ARG(at_sample, 1, NIR_INTRINSIC_CAN_REORDER)
-INTERP_WITH_ARG(at_offset, 1, NIR_INTRINSIC_CAN_REORDER)
+#define INTERP(name, num_srcs, src_comps) \
+   INTRINSIC(interp_##name, num_srcs, ARR(src_comps), true, \
+             0, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \
+   INTRINSIC(interp_##name##_indirect, 1 + num_srcs, ARR(1, src_comps), true, \
+             0, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 
-#define STORE(name, num_indices, flags) \
-   INTRINSIC(store_##name##_vec1, 1, ARR(1), false, 0, 0, num_indices, flags) \
-   INTRINSIC(store_##name##_vec2, 1, ARR(2), false, 0, 0, num_indices, flags) \
-   INTRINSIC(store_##name##_vec3, 1, ARR(3), false, 0, 0, num_indices, flags) \
-   INTRINSIC(store_##name##_vec4, 1, ARR(4), false, 0, 0, num_indices, flags) \
-   INTRINSIC(store_##name##_vec1_indirect, 2, ARR(1, 1), false, 0, 0, \
-             num_indices, flags) \
-   INTRINSIC(store_##name##_vec2_indirect, 2, ARR(2, 1), false, 0, 0, \
-             num_indices, flags) \
-   INTRINSIC(store_##name##_vec3_indirect, 2, ARR(3, 1), false, 0, 0, \
-             num_indices, flags) \
-   INTRINSIC(store_##name##_vec4_indirect, 2, ARR(4, 1), false, 0, 0, \
-             num_indices, flags) \
+INTERP(at_centroid, 0, 0)
+INTERP(at_sample, 1, 1)
+INTERP(at_offset, 1, 1)
 
 /*
  * Stores work the same way as loads, except now the first register input is
@@ -170,7 +132,12 @@ INTERP_WITH_ARG(at_offset, 1, NIR_INTRINSIC_CAN_REORDER)
  * offset.
  */
 
+#define STORE(name, num_indices, flags) \
+   INTRINSIC(store_##name, 1, ARR(0), false, 0, 0, num_indices, flags) \
+   INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \
+             num_indices, flags) \
+
 STORE(output, 2, 0)
 /* STORE(ssbo, 3, 0) */
 
-LAST_INTRINSIC(store_output_vec4_indirect)
+LAST_INTRINSIC(store_output_indirect)
index e00970e4afa03c6ab8819dfc73d83129ef552308..6a043fdedd1d0cba22a6ca27fe8d50634e360257 100644 (file)
@@ -189,66 +189,6 @@ get_io_offset(nir_deref_var *deref, nir_instr *instr, nir_src *indirect,
    return base_offset;
 }
 
-static nir_intrinsic_op
-get_load_op(nir_variable_mode mode, bool indirect, unsigned num_components)
-{
-   if (indirect) {
-      switch (mode) {
-      case nir_var_shader_in:
-         switch (num_components) {
-         case 1: return nir_intrinsic_load_input_vec1_indirect;
-         case 2: return nir_intrinsic_load_input_vec2_indirect;
-         case 3: return nir_intrinsic_load_input_vec3_indirect;
-         case 4: return nir_intrinsic_load_input_vec4_indirect;
-         default: unreachable("Invalid number of components"); break;
-         }
-         break;
-
-      case nir_var_uniform:
-         switch (num_components) {
-         case 1: return nir_intrinsic_load_uniform_vec1_indirect;
-         case 2: return nir_intrinsic_load_uniform_vec2_indirect;
-         case 3: return nir_intrinsic_load_uniform_vec3_indirect;
-         case 4: return nir_intrinsic_load_uniform_vec4_indirect;
-         default: unreachable("Invalid number of components"); break;
-         }
-         break;
-
-      default:
-         unreachable("Invalid input type");
-         break;
-      }
-   } else {
-      switch (mode) {
-      case nir_var_shader_in:
-         switch (num_components) {
-         case 1: return nir_intrinsic_load_input_vec1;
-         case 2: return nir_intrinsic_load_input_vec2;
-         case 3: return nir_intrinsic_load_input_vec3;
-         case 4: return nir_intrinsic_load_input_vec4;
-         default: unreachable("Invalid number of components"); break;
-         }
-         break;
-
-      case nir_var_uniform:
-         switch (num_components) {
-         case 1: return nir_intrinsic_load_uniform_vec1;
-         case 2: return nir_intrinsic_load_uniform_vec2;
-         case 3: return nir_intrinsic_load_uniform_vec3;
-         case 4: return nir_intrinsic_load_uniform_vec4;
-         default: unreachable("Invalid number of components"); break;
-         }
-         break;
-
-      default:
-         unreachable("Invalid input type");
-         break;
-      }
-   }
-
-   return nir_intrinsic_load_input_vec1;
-}
-
 static bool
 nir_lower_io_block(nir_block *block, void *void_state)
 {
@@ -261,22 +201,35 @@ nir_lower_io_block(nir_block *block, void *void_state)
       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 
       switch (intrin->intrinsic) {
-      case nir_intrinsic_load_var_vec1:
-      case nir_intrinsic_load_var_vec2:
-      case nir_intrinsic_load_var_vec3:
-      case nir_intrinsic_load_var_vec4: {
+      case nir_intrinsic_load_var: {
          nir_variable_mode mode = intrin->variables[0]->var->data.mode;
          if (mode != nir_var_shader_in && mode != nir_var_uniform)
             continue;
 
          bool has_indirect = deref_has_indirect(intrin->variables[0]);
-         unsigned num_components =
-            nir_intrinsic_infos[intrin->intrinsic].dest_components;
 
-         nir_intrinsic_op load_op = get_load_op(mode, has_indirect,
-                                                num_components);
+         nir_intrinsic_op load_op;
+         switch (mode) {
+         case nir_var_shader_in:
+            if (has_indirect) {
+               load_op = nir_intrinsic_load_input_indirect;
+            } else {
+               load_op = nir_intrinsic_load_input;
+            }
+            break;
+         case nir_var_uniform:
+            if (has_indirect) {
+               load_op = nir_intrinsic_load_uniform_indirect;
+            } else {
+               load_op = nir_intrinsic_load_uniform;
+            }
+            break;
+         default:
+            unreachable("Unknown variable mode");
+         }
          nir_intrinsic_instr *load = nir_intrinsic_instr_create(state->mem_ctx,
                                                                 load_op);
+         load->num_components = intrin->num_components;
 
          nir_src indirect;
          unsigned offset = get_io_offset(intrin->variables[0],
@@ -292,7 +245,7 @@ nir_lower_io_block(nir_block *block, void *void_state)
          if (intrin->dest.is_ssa) {
             load->dest.is_ssa = true;
             nir_ssa_def_init(&load->instr, &load->dest.ssa,
-                             num_components, NULL);
+                             intrin->num_components, NULL);
 
             nir_src new_src = {
                .is_ssa = true,
@@ -310,38 +263,22 @@ nir_lower_io_block(nir_block *block, void *void_state)
          break;
       }
 
-      case nir_intrinsic_store_var_vec1:
-      case nir_intrinsic_store_var_vec2:
-      case nir_intrinsic_store_var_vec3:
-      case nir_intrinsic_store_var_vec4: {
+      case nir_intrinsic_store_var: {
          if (intrin->variables[0]->var->data.mode != nir_var_shader_out)
             continue;
 
          bool has_indirect = deref_has_indirect(intrin->variables[0]);
-         unsigned num_components =
-            nir_intrinsic_infos[intrin->intrinsic].src_components[0];
 
          nir_intrinsic_op store_op;
          if (has_indirect) {
-            switch (num_components) {
-            case 1: store_op = nir_intrinsic_store_output_vec1_indirect; break;
-            case 2: store_op = nir_intrinsic_store_output_vec2_indirect; break;
-            case 3: store_op = nir_intrinsic_store_output_vec3_indirect; break;
-            case 4: store_op = nir_intrinsic_store_output_vec4_indirect; break;
-            default: unreachable("Invalid number of components"); break;
-            }
+            store_op = nir_intrinsic_store_output_indirect;
          } else {
-            switch (num_components) {
-            case 1: store_op = nir_intrinsic_store_output_vec1; break;
-            case 2: store_op = nir_intrinsic_store_output_vec2; break;
-            case 3: store_op = nir_intrinsic_store_output_vec3; break;
-            case 4: store_op = nir_intrinsic_store_output_vec4; break;
-            default: unreachable("Invalid number of components"); break;
-            }
+            store_op = nir_intrinsic_store_output;
          }
 
          nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx,
                                                                  store_op);
+         store->num_components = intrin->num_components;
 
          nir_src indirect;
          unsigned offset = get_io_offset(intrin->variables[0],
index 715c634bcd9c2ffb7367f08e04e2a25fb472e5b2..e2a8fa74e109b3d4ad8713c734f63f71a2791e78 100644 (file)
@@ -219,22 +219,18 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 
       switch (intrin->intrinsic) {
-      case nir_intrinsic_load_var_vec1:
-      case nir_intrinsic_load_var_vec2:
-      case nir_intrinsic_load_var_vec3:
-      case nir_intrinsic_load_var_vec4: {
+      case nir_intrinsic_load_var: {
          if (intrin->variables[0]->var->data.mode != nir_var_local)
             continue;
 
          nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, nir_op_imov);
          mov->src[0].src = get_deref_reg_src(intrin->variables[0],
                                              &intrin->instr, state);
-         unsigned num_components = mov->src[0].src.reg.reg->num_components;
-         mov->dest.write_mask = (1 << num_components) - 1;
+         mov->dest.write_mask = (1 << intrin->num_components) - 1;
          if (intrin->dest.is_ssa) {
             mov->dest.dest.is_ssa = true;
             nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa,
-                             num_components, NULL);
+                             intrin->num_components, NULL);
 
             nir_src new_src = {
                .is_ssa = true,
@@ -252,20 +248,16 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
          break;
       }
 
-      case nir_intrinsic_store_var_vec1:
-      case nir_intrinsic_store_var_vec2:
-      case nir_intrinsic_store_var_vec3:
-      case nir_intrinsic_store_var_vec4: {
+      case nir_intrinsic_store_var: {
          if (intrin->variables[0]->var->data.mode != nir_var_local)
             continue;
 
          nir_src reg_src = get_deref_reg_src(intrin->variables[0],
                                              &intrin->instr, state);
-         unsigned num_components = reg_src.reg.reg->num_components;
 
          nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, nir_op_imov);
          mov->src[0].src = nir_src_copy(intrin->src[0], state->mem_ctx);
-         mov->dest.write_mask = (1 << num_components) - 1;
+         mov->dest.write_mask = (1 << intrin->num_components) - 1;
          mov->dest.dest.is_ssa = false;
          mov->dest.dest.reg.reg = reg_src.reg.reg;
          mov->dest.dest.reg.base_offset = reg_src.reg.base_offset;
index cbd1dace9c6352cee8aa88ad012b3930727b010d..e700df43bb48430b6ba27e9dc116cd0863e4d4c5 100644 (file)
@@ -30,8 +30,7 @@
 static void
 convert_instr(nir_intrinsic_instr *instr)
 {
-   if (instr->intrinsic != nir_intrinsic_load_var_vec1 &&
-       instr->intrinsic != nir_intrinsic_load_var_vec2)
+   if (instr->intrinsic != nir_intrinsic_load_var)
       return;
 
    nir_variable *var = instr->variables[0]->var;
index ed3e0fd2876f66c0d06d72db6f594502151af721..8c231240183c6d211104d57943c8ba03a45e933e 100644 (file)
@@ -445,17 +445,11 @@ fill_deref_tables_block(nir_block *block, void *void_state)
       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 
       switch (intrin->intrinsic) {
-      case nir_intrinsic_load_var_vec1:
-      case nir_intrinsic_load_var_vec2:
-      case nir_intrinsic_load_var_vec3:
-      case nir_intrinsic_load_var_vec4:
+      case nir_intrinsic_load_var:
          register_load_instr(intrin, true, state);
          break;
 
-      case nir_intrinsic_store_var_vec1:
-      case nir_intrinsic_store_var_vec2:
-      case nir_intrinsic_store_var_vec3:
-      case nir_intrinsic_store_var_vec4:
+      case nir_intrinsic_store_var:
          register_store_instr(intrin, true, state);
          break;
 
@@ -537,17 +531,9 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
       nir_deref *src_deref = nir_copy_deref(state->mem_ctx, &src_head->deref);
       nir_deref *dest_deref = nir_copy_deref(state->mem_ctx, &dest_head->deref);
 
-      nir_intrinsic_op load_op;
-      switch (num_components) {
-         case 1: load_op = nir_intrinsic_load_var_vec1; break;
-         case 2: load_op = nir_intrinsic_load_var_vec2; break;
-         case 3: load_op = nir_intrinsic_load_var_vec3; break;
-         case 4: load_op = nir_intrinsic_load_var_vec4; break;
-         default: unreachable("Invalid number of components"); break;
-      }
-
-      nir_intrinsic_instr *load = nir_intrinsic_instr_create(state->mem_ctx,
-                                                             load_op);
+      nir_intrinsic_instr *load =
+         nir_intrinsic_instr_create(state->mem_ctx, nir_intrinsic_load_var);
+      load->num_components = num_components;
       load->variables[0] = nir_deref_as_var(src_deref);
       load->dest.is_ssa = true;
       nir_ssa_def_init(&load->instr, &load->dest.ssa, num_components, NULL);
@@ -555,17 +541,9 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
       nir_instr_insert_before(&copy_instr->instr, &load->instr);
       register_load_instr(load, false, state);
 
-      nir_intrinsic_op store_op;
-      switch (num_components) {
-         case 1: store_op = nir_intrinsic_store_var_vec1; break;
-         case 2: store_op = nir_intrinsic_store_var_vec2; break;
-         case 3: store_op = nir_intrinsic_store_var_vec3; break;
-         case 4: store_op = nir_intrinsic_store_var_vec4; break;
-         default: unreachable("Invalid number of components"); break;
-      }
-
-      nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx,
-                                                              store_op);
+      nir_intrinsic_instr *store =
+         nir_intrinsic_instr_create(state->mem_ctx, nir_intrinsic_store_var);
+      store->num_components = num_components;
       store->variables[0] = nir_deref_as_var(dest_deref);
       store->src[0].is_ssa = true;
       store->src[0].ssa = &load->dest.ssa;
@@ -776,14 +754,9 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)
          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 
          switch (intrin->intrinsic) {
-         case nir_intrinsic_load_var_vec1:
-         case nir_intrinsic_load_var_vec2:
-         case nir_intrinsic_load_var_vec3:
-         case nir_intrinsic_load_var_vec4: {
+         case nir_intrinsic_load_var: {
             struct deref_node *node = get_deref_node(intrin->variables[0],
                                                      false, state);
-            unsigned num_chans =
-               nir_intrinsic_infos[intrin->intrinsic].dest_components;
 
             if (node == NULL) {
                /* If we hit this path then we are referencing an invalid
@@ -793,7 +766,8 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)
                 */
                nir_ssa_undef_instr *undef =
                   nir_ssa_undef_instr_create(state->mem_ctx);
-               nir_ssa_def_init(&undef->instr, &undef->def, num_chans, NULL);
+               nir_ssa_def_init(&undef->instr, &undef->def,
+                                intrin->num_components, NULL);
 
                nir_instr_insert_before(&intrin->instr, &undef->instr);
                nir_instr_remove(&intrin->instr);
@@ -815,14 +789,15 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)
                                                       nir_op_imov);
             mov->src[0].src.is_ssa = true;
             mov->src[0].src.ssa = get_ssa_def_for_block(node, block, state);
-            for (unsigned i = num_chans; i < 4; i++)
+            for (unsigned i = intrin->num_components; i < 4; i++)
                mov->src[0].swizzle[i] = 0;
 
             assert(intrin->dest.is_ssa);
 
-            mov->dest.write_mask = (1 << num_chans) - 1;
+            mov->dest.write_mask = (1 << intrin->num_components) - 1;
             mov->dest.dest.is_ssa = true;
-            nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa, num_chans, NULL);
+            nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa,
+                             intrin->num_components, NULL);
 
             nir_instr_insert_before(&intrin->instr, &mov->instr);
             nir_instr_remove(&intrin->instr);
@@ -837,10 +812,7 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)
             break;
          }
 
-         case nir_intrinsic_store_var_vec1:
-         case nir_intrinsic_store_var_vec2:
-         case nir_intrinsic_store_var_vec3:
-         case nir_intrinsic_store_var_vec4: {
+         case nir_intrinsic_store_var: {
             struct deref_node *node = get_deref_node(intrin->variables[0],
                                                      false, state);
 
@@ -854,7 +826,8 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)
             if (!node->lower_to_ssa)
                continue;
 
-            unsigned num_chans = glsl_get_vector_elements(node->type);
+            assert(intrin->num_components ==
+                   glsl_get_vector_elements(node->type));
 
             assert(intrin->src[0].is_ssa);
 
@@ -867,12 +840,12 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)
 
                mov->src[1].src.is_ssa = true;
                mov->src[1].src.ssa = intrin->src[0].ssa;
-               for (unsigned i = num_chans; i < 4; i++)
+               for (unsigned i = intrin->num_components; i < 4; i++)
                   mov->src[1].swizzle[i] = 0;
 
                mov->src[2].src.is_ssa = true;
                mov->src[2].src.ssa = get_ssa_def_for_block(node, block, state);
-               for (unsigned i = num_chans; i < 4; i++)
+               for (unsigned i = intrin->num_components; i < 4; i++)
                   mov->src[2].swizzle[i] = 0;
 
             } else {
@@ -880,13 +853,14 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)
 
                mov->src[0].src.is_ssa = true;
                mov->src[0].src.ssa = intrin->src[0].ssa;
-               for (unsigned i = num_chans; i < 4; i++)
+               for (unsigned i = intrin->num_components; i < 4; i++)
                   mov->src[0].swizzle[i] = 0;
             }
 
-            mov->dest.write_mask = (1 << num_chans) - 1;
+            mov->dest.write_mask = (1 << intrin->num_components) - 1;
             mov->dest.dest.is_ssa = true;
-            nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa, num_chans, NULL);
+            nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa,
+                             intrin->num_components, NULL);
 
             nir_instr_insert_before(&intrin->instr, &mov->instr);
             nir_instr_remove(&intrin->instr);
index 73ca1b0e7869e7acf7e2d179d9f3029c2d1a5b69..bd7f5d71c94c8a7132bfecea383ebbec88cb1518 100644 (file)
@@ -331,16 +331,10 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
    }
 
    switch (instr->intrinsic) {
-   case nir_intrinsic_load_var_vec1:
-   case nir_intrinsic_load_var_vec2:
-   case nir_intrinsic_load_var_vec3:
-   case nir_intrinsic_load_var_vec4:
+   case nir_intrinsic_load_var:
       assert(instr->variables[0]->var->data.mode != nir_var_shader_out);
       break;
-   case nir_intrinsic_store_var_vec1:
-   case nir_intrinsic_store_var_vec2:
-   case nir_intrinsic_store_var_vec3:
-   case nir_intrinsic_store_var_vec4:
+   case nir_intrinsic_store_var:
       assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
              instr->variables[0]->var->data.mode != nir_var_uniform);
       break;
index dbb2470f34b9c8ab271d6af97f29b9103f43ab3c..4c1805d4b47218fd6aab7cb02f17df77839f4547 100644 (file)
@@ -1312,14 +1312,10 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       break;
    }
 
-   case nir_intrinsic_load_uniform_vec1:
-   case nir_intrinsic_load_uniform_vec2:
-   case nir_intrinsic_load_uniform_vec3:
-   case nir_intrinsic_load_uniform_vec4: {
+   case nir_intrinsic_load_uniform: {
       unsigned index = 0;
       for (int i = 0; i < instr->const_index[1]; i++) {
-         for (unsigned j = 0;
-            j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
+         for (unsigned j = 0; j < instr->num_components; j++) {
             fs_reg src = nir_uniforms;
             src.reg_offset = instr->const_index[0] + index;
             src.type = dest.type;
@@ -1335,14 +1331,10 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       break;
    }
 
-   case nir_intrinsic_load_uniform_vec1_indirect:
-   case nir_intrinsic_load_uniform_vec2_indirect:
-   case nir_intrinsic_load_uniform_vec3_indirect:
-   case nir_intrinsic_load_uniform_vec4_indirect: {
+   case nir_intrinsic_load_uniform_indirect: {
       unsigned index = 0;
       for (int i = 0; i < instr->const_index[1]; i++) {
-         for (unsigned j = 0;
-            j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
+         for (unsigned j = 0; j < instr->num_components; j++) {
             fs_reg src = nir_uniforms;
             src.reg_offset = instr->const_index[0] + index;
             src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
@@ -1360,10 +1352,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       break;
    }
 
-   case nir_intrinsic_load_ubo_vec1:
-   case nir_intrinsic_load_ubo_vec2:
-   case nir_intrinsic_load_ubo_vec3:
-   case nir_intrinsic_load_ubo_vec4: {
+   case nir_intrinsic_load_ubo: {
       fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start +
                                  (unsigned) instr->const_index[0]);
       fs_reg packed_consts = fs_reg(this, glsl_type::float_type);
@@ -1373,8 +1362,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
                                 packed_consts, surf_index, const_offset_reg));
 
-      for (unsigned i = 0;
-           i < nir_intrinsic_infos[instr->intrinsic].dest_components; i++) {
+      for (unsigned i = 0; i < instr->num_components; i++) {
          packed_consts.set_smear(instr->const_index[1] % 16 / 4 + i);
 
          /* The std140 packing rules don't allow vectors to cross 16-byte
@@ -1392,10 +1380,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       break;
    }
 
-   case nir_intrinsic_load_ubo_vec1_indirect:
-   case nir_intrinsic_load_ubo_vec2_indirect:
-   case nir_intrinsic_load_ubo_vec3_indirect:
-   case nir_intrinsic_load_ubo_vec4_indirect: {
+   case nir_intrinsic_load_ubo_indirect: {
       fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start +
                                  instr->const_index[0]);
       /* Turn the byte offset into a dword offset. */
@@ -1404,8 +1389,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       emit(SHR(offset, retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_D),
                fs_reg(2)));
 
-      for (unsigned i = 0;
-           i < nir_intrinsic_infos[instr->intrinsic].dest_components; i++) {
+      for (unsigned i = 0; i < instr->num_components; i++) {
          exec_list list = VARYING_PULL_CONSTANT_LOAD(dest, surf_index,
                                                      offset, base_offset + i);
          fs_inst *last_inst = (fs_inst *) list.get_tail();
@@ -1418,14 +1402,10 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       break;
    }
 
-   case nir_intrinsic_load_input_vec1:
-   case nir_intrinsic_load_input_vec2:
-   case nir_intrinsic_load_input_vec3:
-   case nir_intrinsic_load_input_vec4: {
+   case nir_intrinsic_load_input: {
       unsigned index = 0;
       for (int i = 0; i < instr->const_index[1]; i++) {
-         for (unsigned j = 0;
-            j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
+         for (unsigned j = 0; j < instr->num_components; j++) {
             fs_reg src = nir_inputs;
             src.reg_offset = instr->const_index[0] + index;
             src.type = dest.type;
@@ -1441,14 +1421,10 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       break;
    }
 
-   case nir_intrinsic_load_input_vec1_indirect:
-   case nir_intrinsic_load_input_vec2_indirect:
-   case nir_intrinsic_load_input_vec3_indirect:
-   case nir_intrinsic_load_input_vec4_indirect: {
+   case nir_intrinsic_load_input_indirect: {
       unsigned index = 0;
       for (int i = 0; i < instr->const_index[1]; i++) {
-         for (unsigned j = 0;
-            j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
+         for (unsigned j = 0; j < instr->num_components; j++) {
             fs_reg src = nir_inputs;
             src.reg_offset = instr->const_index[0] + index;
             src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
@@ -1466,15 +1442,11 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       break;
    }
 
-   case nir_intrinsic_store_output_vec1:
-   case nir_intrinsic_store_output_vec2:
-   case nir_intrinsic_store_output_vec3:
-   case nir_intrinsic_store_output_vec4: {
+   case nir_intrinsic_store_output: {
       fs_reg src = get_nir_src(instr->src[0]);
       unsigned index = 0;
       for (int i = 0; i < instr->const_index[1]; i++) {
-         for (unsigned j = 0;
-            j < nir_intrinsic_infos[instr->intrinsic].src_components[0]; j++) {
+         for (unsigned j = 0; j < instr->num_components; j++) {
             fs_reg new_dest = nir_outputs;
             new_dest.reg_offset = instr->const_index[0] + index;
             new_dest.type = src.type;
@@ -1489,16 +1461,12 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       break;
    }
 
-   case nir_intrinsic_store_output_vec1_indirect:
-   case nir_intrinsic_store_output_vec2_indirect:
-   case nir_intrinsic_store_output_vec3_indirect:
-   case nir_intrinsic_store_output_vec4_indirect: {
+   case nir_intrinsic_store_output_indirect: {
       fs_reg src = get_nir_src(instr->src[0]);
       fs_reg indirect = get_nir_src(instr->src[1]);
       unsigned index = 0;
       for (int i = 0; i < instr->const_index[1]; i++) {
-         for (unsigned j = 0;
-            j < nir_intrinsic_infos[instr->intrinsic].src_components[0]; j++) {
+         for (unsigned j = 0; j < instr->num_components; j++) {
             fs_reg new_dest = nir_outputs;
             new_dest.reg_offset = instr->const_index[0] + index;
             new_dest.reladdr = new(mem_ctx) fs_reg(indirect);