nir: Use a source for uniform buffer indices instead of an index

author Jason Ekstrand <jason.ekstrand@intel.com>

Tue, 9 Dec 2014 01:34:52 +0000 (17:34 -0800)

committer Jason Ekstrand <jason.ekstrand@intel.com>

Thu, 15 Jan 2015 15:20:21 +0000 (07:20 -0800)
author Jason Ekstrand <jason.ekstrand@intel.com>
Tue, 9 Dec 2014 01:34:52 +0000 (17:34 -0800)
committer Jason Ekstrand <jason.ekstrand@intel.com>
Thu, 15 Jan 2015 15:20:21 +0000 (07:20 -0800)
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp

index 70f0c851f2af07754da1a414fc64738d46275d78..78070afe8e5017ceada4ea29d6468204c63ea060 100644 (file)
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -897,11 +897,11 @@ nir_visitor::visit(ir_expression *ir)
        }
        nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader, op);
        load->num_components = ir->type->vector_elements;
-      load->const_index[0] = ir->operands[0]->as_constant()->value.u[0];
-      load->const_index[1] = const_index ? const_index->value.u[0] : 0; /* base offset */
-      load->const_index[2] = 1; /* number of vec4's */
+      load->const_index[0] = const_index ? const_index->value.u[0] : 0; /* base offset */
+      load->const_index[1] = 1; /* number of vec4's */
+      load->src[0] = evaluate_rvalue(ir->operands[0]);
        if (!const_index)
-         load->src[0] = evaluate_rvalue(ir->operands[1]);
+         load->src[1] = evaluate_rvalue(ir->operands[1]);
        add_instr(&load->instr, ir->type->vector_elements);
  
        /*
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h

index e66273d3847be0fa323aed111ba27e3fd229193c..d94866c859737bc37c8c767d6c1758291d227ebd 100644 (file)
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -101,11 +101,11 @@ SYSTEM_VALUE(invocation_id, 1)
  
  /*
   * The first index is the address to load from, and the second index is the
- * number of array elements to load. For UBO's (and SSBO's), the first index
- * is the UBO buffer index (TODO nonconstant UBO buffer index) and the second
- * and third indices play the role of the first and second indices in the other
- * loads. Indirect loads have an additional register input, which is added
- * to the constant address to compute the final address to load from.
+ * number of array elements to load.  Indirect loads have an additional
+ * register input, which is added to the constant address to compute the
+ * final address to load from.  For UBO's (and SSBO's), the first source is
+ * the (possibly constant) UBO buffer index and the indirect (if it exists)
+ * is the second source.
   *
   * For vector backends, the address is in terms of one vec4, and so each array
   * element is +4 scalar components from the previous array element. For scalar
@@ -113,16 +113,15 @@ SYSTEM_VALUE(invocation_id, 1)
   * elements begin immediately after the previous array element.
   */
  
-#define LOAD(name, num_indices, flags) \
-   INTRINSIC(load_##name, 0, ARR(), true, 0, 0, num_indices, \
-             NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \
-   INTRINSIC(load_##name##_indirect, 1, ARR(1), true, 0, 0, num_indices, \
-             NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \
+#define LOAD(name, extra_srcs, flags) \
+   INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 2, flags) \
+   INTRINSIC(load_##name##_indirect, extra_srcs + 1, ARR(1, 1), \
+             true, 0, 0, 2, flags)
  
-LOAD(uniform, 2, NIR_INTRINSIC_CAN_REORDER)
-LOAD(ubo, 3, NIR_INTRINSIC_CAN_REORDER)
-LOAD(input, 2, NIR_INTRINSIC_CAN_REORDER)
-/* LOAD(ssbo, 2, 0) */
+LOAD(uniform, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+LOAD(ubo, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+LOAD(input, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* LOAD(ssbo, 1, 0) */
  
  /*
   * Stores work the same way as loads, except now the first register input is
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp

index 56e180a138a2ffee79650397d65a3100bc90a443..a53607546482c0034e6fa90df5e07974450b4333 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1344,52 +1344,74 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
        break;
     }
  
+   case nir_intrinsic_load_ubo_indirect:
+      has_indirect = true;
     case nir_intrinsic_load_ubo: {
-      fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start +
-                                 (unsigned) instr->const_index[0]);
-      fs_reg packed_consts = fs_reg(this, glsl_type::float_type);
-      packed_consts.type = dest.type;
-
-      fs_reg const_offset_reg = fs_reg((unsigned) instr->const_index[1] & ~15);
-      emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
-                                packed_consts, surf_index, const_offset_reg));
+      nir_const_value *const_index = nir_src_as_const_value(instr->src[0]);
+      fs_reg surf_index;
  
-      for (unsigned i = 0; i < instr->num_components; i++) {
-         packed_consts.set_smear(instr->const_index[1] % 16 / 4 + i);
+      if (const_index) {
+         surf_index = fs_reg(stage_prog_data->binding_table.ubo_start +
+                             const_index->u[0]);
+      } else {
+         /* The block index is not a constant. Evaluate the index expression
+          * per-channel and add the base UBO index; the generator will select
+          * a value from any live channel.
+          */
+         surf_index = fs_reg(this, glsl_type::uint_type);
+         emit(ADD(surf_index, get_nir_src(instr->src[0]),
+                  fs_reg(stage_prog_data->binding_table.ubo_start)))
+            ->force_writemask_all = true;
  
-         /* The std140 packing rules don't allow vectors to cross 16-byte
-          * boundaries, and a reg is 32 bytes.
+         /* Assume this may touch any UBO. It would be nice to provide
+          * a tighter bound, but the array information is already lowered away.
            */
-         assert(packed_consts.subreg_offset < 32);
+         brw_mark_surface_used(prog_data,
+                               stage_prog_data->binding_table.ubo_start +
+                               shader_prog->NumUniformBlocks - 1);
+      }
  
-         fs_inst *inst = MOV(dest, packed_consts);
-         if (instr->has_predicate)
-               inst->predicate = BRW_PREDICATE_NORMAL;
-         emit(inst);
+      if (has_indirect) {
+         /* Turn the byte offset into a dword offset. */
+         fs_reg base_offset = fs_reg(this, glsl_type::int_type);
+         emit(SHR(base_offset, retype(get_nir_src(instr->src[1]),
+                                 BRW_REGISTER_TYPE_D),
+                  fs_reg(2)));
  
-         dest.reg_offset++;
-      }
-      break;
-   }
+         unsigned vec4_offset = instr->const_index[0] / 4;
+         for (int i = 0; i < instr->num_components; i++) {
+            exec_list list = VARYING_PULL_CONSTANT_LOAD(offset(dest, i),
+                                                        surf_index, base_offset,
+                                                        vec4_offset + i);
  
-   case nir_intrinsic_load_ubo_indirect: {
-      fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start +
-                                 instr->const_index[0]);
-      /* Turn the byte offset into a dword offset. */
-      unsigned base_offset = instr->const_index[1] / 4;
-      fs_reg offset = fs_reg(this, glsl_type::int_type);
-      emit(SHR(offset, retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_D),
-               fs_reg(2)));
+            fs_inst *last_inst = (fs_inst *) list.get_tail();
+            if (instr->has_predicate)
+                  last_inst->predicate = BRW_PREDICATE_NORMAL;
+            emit(list);
+         }
+      } else {
+         fs_reg packed_consts = fs_reg(this, glsl_type::float_type);
+         packed_consts.type = dest.type;
  
-      for (unsigned i = 0; i < instr->num_components; i++) {
-         exec_list list = VARYING_PULL_CONSTANT_LOAD(dest, surf_index,
-                                                     offset, base_offset + i);
-         fs_inst *last_inst = (fs_inst *) list.get_tail();
-         if (instr->has_predicate)
-               last_inst->predicate = BRW_PREDICATE_NORMAL;
-         emit(list);
+         fs_reg const_offset_reg((unsigned) instr->const_index[0] & ~15);
+         emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts,
+              surf_index, const_offset_reg);
  
-         dest.reg_offset++;
+         for (unsigned i = 0; i < instr->num_components; i++) {
+            packed_consts.set_smear(instr->const_index[0] % 16 / 4 + i);
+
+            /* The std140 packing rules don't allow vectors to cross 16-byte
+             * boundaries, and a reg is 32 bytes.
+             */
+            assert(packed_consts.subreg_offset < 32);
+
+            fs_inst *inst = MOV(dest, packed_consts);
+            if (instr->has_predicate)
+                  inst->predicate = BRW_PREDICATE_NORMAL;
+            emit(inst);
+
+            dest.reg_offset++;
+         }
        }
        break;
     }
author	Jason Ekstrand <jason.ekstrand@intel.com>
	Tue, 9 Dec 2014 01:34:52 +0000 (17:34 -0800)
committer	Jason Ekstrand <jason.ekstrand@intel.com>
	Thu, 15 Jan 2015 15:20:21 +0000 (07:20 -0800)
src/glsl/nir/glsl_to_nir.cpp		patch \| blob \| history
src/glsl/nir/nir_intrinsics.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_nir.cpp		patch \| blob \| history