i965/vs: Start adding support for uniforms
authorEric Anholt <eric@anholt.net>
Wed, 4 May 2011 19:50:16 +0000 (12:50 -0700)
committerEric Anholt <eric@anholt.net>
Tue, 16 Aug 2011 20:04:41 +0000 (13:04 -0700)
There's no clever packing here, no pull constants, and no array support.

src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_curbe.c
src/mesa/drivers/dri/i965/brw_vec4.h
src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
src/mesa/drivers/dri/i965/brw_wm_surface_state.c
src/mesa/drivers/dri/i965/gen6_vs_state.c
src/mesa/drivers/dri/i965/gen6_wm_state.c
src/mesa/drivers/dri/i965/gen7_wm_state.c

index 7b6b64c1a5c384eafac9872e6b1e8782d2227a0a..4a1abd6252e876d66676c512daeb69b5b978180e 100644 (file)
@@ -248,6 +248,7 @@ enum param_conversion {
    PARAM_CONVERT_F2I,
    PARAM_CONVERT_F2U,
    PARAM_CONVERT_F2B,
+   PARAM_CONVERT_ZERO,
 };
 
 /* Data about a particular attempt to compile a program.  Note that
@@ -317,6 +318,13 @@ struct brw_vs_prog_data {
    /* Used for calculating urb partitions:
     */
    GLuint urb_entry_size;
+
+   const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */
+   enum param_conversion param_convert[MAX_UNIFORMS * 4];
+   const float *pull_param[MAX_UNIFORMS * 4];
+   enum param_conversion pull_param_convert[MAX_UNIFORMS * 4];
+
+   bool uses_new_param_layout;
 };
 
 
@@ -898,7 +906,7 @@ brw_fragment_program_const(const struct gl_fragment_program *p)
 }
 
 static inline
-float convert_param(enum param_conversion conversion, float param)
+float convert_param(enum param_conversion conversion, const float *param)
 {
    union {
       float f;
@@ -908,21 +916,23 @@ float convert_param(enum param_conversion conversion, float param)
 
    switch (conversion) {
    case PARAM_NO_CONVERT:
-      return param;
+      return *param;
    case PARAM_CONVERT_F2I:
-      fi.i = param;
+      fi.i = *param;
       return fi.f;
    case PARAM_CONVERT_F2U:
-      fi.u = param;
+      fi.u = *param;
       return fi.f;
    case PARAM_CONVERT_F2B:
-      if (param != 0.0)
+      if (*param != 0.0)
         fi.i = 1;
       else
         fi.i = 0;
       return fi.f;
+   case PARAM_CONVERT_ZERO:
+      return 0.0;
    default:
-      return param;
+      return *param;
    }
 }
 
index ae11c487a2c7d949f7d0be9c9f6e01888a7c9aa0..960be10006e2be843a3baf8610ca49ce041918cb 100644 (file)
@@ -203,7 +203,7 @@ static void prepare_constant_buffer(struct brw_context *brw)
       /* copy float constants */
       for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
         buf[offset + i] = convert_param(brw->wm.prog_data->param_convert[i],
-                                        *brw->wm.prog_data->param[i]);
+                                        brw->wm.prog_data->param[i]);
       }
    }
 
@@ -244,15 +244,22 @@ static void prepare_constant_buffer(struct brw_context *brw)
       GLuint offset = brw->curbe.vs_start * 16;
       GLuint nr = brw->vs.prog_data->nr_params / 4;
 
-      /* Load the subset of push constants that will get used when
-       * we also have a pull constant buffer.
-       */
-      for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
-        if (brw->vs.constant_map[i] != -1) {
-           assert(brw->vs.constant_map[i] <= nr);
-           memcpy(buf + offset + brw->vs.constant_map[i] * 4,
-                  vp->program.Base.Parameters->ParameterValues[i],
-                  4 * sizeof(float));
+      if (brw->vs.prog_data->uses_new_param_layout) {
+        for (i = 0; i < brw->vs.prog_data->nr_params; i++) {
+           buf[offset + i] = convert_param(brw->vs.prog_data->param_convert[i],
+                                           brw->vs.prog_data->param[i]);
+        }
+      } else {
+        /* Load the subset of push constants that will get used when
+         * we also have a pull constant buffer.
+         */
+        for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
+           if (brw->vs.constant_map[i] != -1) {
+              assert(brw->vs.constant_map[i] <= nr);
+              memcpy(buf + offset + brw->vs.constant_map[i] * 4,
+                     vp->program.Base.Parameters->ParameterValues[i],
+                     4 * sizeof(float));
+           }
         }
       }
    }
index 10168fc1cb0f3ff08ac6b33a63775c4346049eb6..01058243f046ad0b5d52c7086f62093b0fa36919 100644 (file)
@@ -356,6 +356,8 @@ public:
     * for the ir->location's used.
     */
    dst_reg output_reg[VERT_RESULT_MAX];
+   int uniform_size[MAX_UNIFORMS];
+   int uniforms;
 
    struct hash_table *variable_ht;
 
@@ -363,7 +365,10 @@ public:
    void fail(const char *msg, ...);
 
    int virtual_grf_alloc(int size);
+   int setup_uniform_values(int loc, const glsl_type *type);
+   void setup_builtin_uniform_values(ir_variable *ir);
    int setup_attributes(int payload_reg);
+   int setup_uniforms(int payload_reg);
    void setup_payload();
    void reg_allocate_trivial();
    void reg_allocate();
index bdc7a79d83d7fc68d6755aaf34d94dd63b52d4b7..1f2853e11186039ca34256c2b37ac637c5da0d19 100644 (file)
@@ -67,20 +67,12 @@ vec4_visitor::setup_attributes(int payload_reg)
 
    prog_data->urb_read_length = (nr_attributes + 1) / 2;
 
-   return nr_attributes;
+   return payload_reg + nr_attributes;
 }
 
-void
-vec4_visitor::setup_payload(void)
+int
+vec4_visitor::setup_uniforms(int reg)
 {
-   int reg = 0;
-
-   /* r0 is always reserved, as it contains the payload with the URB
-    * handles that are passed on to the URB write at the end of the
-    * thread.
-    */
-   reg++;
-
    /* User clip planes from curbe:
     */
    if (c->key.nr_userclip) {
@@ -99,14 +91,49 @@ vec4_visitor::setup_payload(void)
       }
    }
 
-   /* FINISHME: push constants */
+   /* The pre-gen6 VS requires that some push constants get loaded no
+    * matter what, or the GPU would hang.
+    */
+   if (this->uniforms == 0) {
+      this->uniform_size[this->uniforms] = 1;
+
+      for (unsigned int i = 0; i < 4; i++) {
+        unsigned int slot = this->uniforms * 4 + i;
+
+        c->prog_data.param[slot] = NULL;
+        c->prog_data.param_convert[slot] = PARAM_CONVERT_ZERO;
+      }
+
+      this->uniforms++;
+   } else {
+      reg += ALIGN(uniforms, 2) / 2;
+   }
+
+   /* for now, we are not doing any elimination of unused slots, nor
+    * are we packing our uniforms.
+    */
+   c->prog_data.nr_params = this->uniforms * 4;
+
    c->prog_data.curb_read_length = reg - 1;
-   c->prog_data.nr_params = 0;
-   /* XXX 0 causes a bug elsewhere... */
-   if (intel->gen < 6 && c->prog_data.nr_params == 0)
-      c->prog_data.nr_params = 4;
+   c->prog_data.uses_new_param_layout = true;
+
+   return reg;
+}
+
+void
+vec4_visitor::setup_payload(void)
+{
+   int reg = 0;
+
+   /* The payload always contains important data in g0, which contains
+    * the URB handles that are passed on to the URB write at the end
+    * of the thread.  So, we always start push constants at g1.
+    */
+   reg++;
 
-   reg += setup_attributes(reg);
+   reg = setup_uniforms(reg);
+
+   reg = setup_attributes(reg);
 
    this->first_non_payload_grf = reg;
 }
@@ -174,6 +201,18 @@ vec4_instruction::get_src(int i)
       }
       break;
 
+   case UNIFORM:
+      brw_reg = stride(brw_vec4_grf(1 + (src[i].reg + src[i].reg_offset) / 2,
+                                   ((src[i].reg + src[i].reg_offset) % 2) * 4),
+                      0, 4, 1);
+      brw_reg = retype(brw_reg, src[i].type);
+      brw_reg.dw1.bits.swizzle = src[i].swizzle;
+      if (src[i].abs)
+        brw_reg = brw_abs(brw_reg);
+      if (src[i].negate)
+        brw_reg = negate(brw_reg);
+      break;
+
    case HW_REG:
       brw_reg = src[i].fixed_hw_reg;
       break;
index bba1d810f194f3deed174e770779ce6633520349..91abd40faadbb2f5abe780277d54d0045f826619 100644 (file)
  */
 
 #include "brw_vec4.h"
+extern "C" {
 #include "main/macros.h"
+#include "program/prog_parameter.h"
+}
 
 namespace brw {
 
@@ -306,6 +309,130 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
    this->type = brw_type_for_base_type(type);
 }
 
+/* Our support for uniforms is piggy-backed on the struct
+ * gl_fragment_program, because that's where the values actually
+ * get stored, rather than in some global gl_shader_program uniform
+ * store.
+ */
+int
+vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
+{
+   unsigned int offset = 0;
+   float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
+
+   if (type->is_matrix()) {
+      const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+                                                       type->vector_elements,
+                                                       1);
+
+      for (unsigned int i = 0; i < type->matrix_columns; i++) {
+        offset += setup_uniform_values(loc + offset, column);
+      }
+
+      return offset;
+   }
+
+   switch (type->base_type) {
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_BOOL:
+      for (unsigned int i = 0; i < type->vector_elements; i++) {
+        int slot = this->uniforms * 4 + i;
+        switch (type->base_type) {
+        case GLSL_TYPE_FLOAT:
+           c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
+           break;
+        case GLSL_TYPE_UINT:
+           c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U;
+           break;
+        case GLSL_TYPE_INT:
+           c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I;
+           break;
+        case GLSL_TYPE_BOOL:
+           c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B;
+           break;
+        default:
+           assert(!"not reached");
+           c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
+           break;
+        }
+        c->prog_data.param[slot] = &values[i];
+      }
+
+      for (unsigned int i = type->vector_elements; i < 4; i++) {
+        c->prog_data.param_convert[this->uniforms * 4 + i] =
+           PARAM_CONVERT_ZERO;
+        c->prog_data.param[this->uniforms * 4 + i] = NULL;
+      }
+
+      this->uniform_size[this->uniforms] = type->vector_elements;
+      this->uniforms++;
+
+      return 1;
+
+   case GLSL_TYPE_STRUCT:
+      for (unsigned int i = 0; i < type->length; i++) {
+        offset += setup_uniform_values(loc + offset,
+                                       type->fields.structure[i].type);
+      }
+      return offset;
+
+   case GLSL_TYPE_ARRAY:
+      for (unsigned int i = 0; i < type->length; i++) {
+        offset += setup_uniform_values(loc + offset, type->fields.array);
+      }
+      return offset;
+
+   case GLSL_TYPE_SAMPLER:
+      /* The sampler takes up a slot, but we don't use any values from it. */
+      return 1;
+
+   default:
+      assert(!"not reached");
+      return 0;
+   }
+}
+
+/* Our support for builtin uniforms is even scarier than non-builtin.
+ * It sits on top of the PROG_STATE_VAR parameters that are
+ * automatically updated from GL context state.
+ */
+void
+vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
+{
+   const ir_state_slot *const slots = ir->state_slots;
+   assert(ir->state_slots != NULL);
+
+   for (unsigned int i = 0; i < ir->num_state_slots; i++) {
+      /* This state reference has already been setup by ir_to_mesa,
+       * but we'll get the same index back here.  We can reference
+       * ParameterValues directly, since unlike brw_fs.cpp, we never
+       * add new state references during compile.
+       */
+      int index = _mesa_add_state_reference(this->vp->Base.Parameters,
+                                           (gl_state_index *)slots[i].tokens);
+      float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
+
+      this->uniform_size[this->uniforms] = 0;
+      /* Add each of the unique swizzled channels of the element.
+       * This will end up matching the size of the glsl_type of this field.
+       */
+      int last_swiz = -1;
+      for (unsigned int j = 0; j < 4; j++) {
+        int swiz = GET_SWZ(slots[i].swizzle, j);
+        if (swiz == last_swiz)
+           break;
+        last_swiz = swiz;
+
+        c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
+        c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT;
+        this->uniform_size[this->uniforms]++;
+      }
+      this->uniforms++;
+   }
+}
+
 dst_reg *
 vec4_visitor::variable_storage(ir_variable *var)
 {
@@ -496,13 +623,10 @@ vec4_visitor::visit(ir_variable *ir)
    switch (ir->mode) {
    case ir_var_in:
       reg = new(mem_ctx) dst_reg(ATTR, ir->location);
-      reg->type = brw_type_for_base_type(ir->type);
-      hash_table_insert(this->variable_ht, reg, ir);
       break;
 
    case ir_var_out:
       reg = new(mem_ctx) dst_reg(this, ir->type);
-      hash_table_insert(this->variable_ht, reg, ir);
 
       for (int i = 0; i < type_size(ir->type); i++) {
         output_reg[ir->location + i] = *reg;
@@ -512,14 +636,21 @@ vec4_visitor::visit(ir_variable *ir)
 
    case ir_var_temporary:
       reg = new(mem_ctx) dst_reg(this, ir->type);
-      hash_table_insert(this->variable_ht, reg, ir);
-
       break;
 
    case ir_var_uniform:
-      /* FINISHME: uniforms */
+      reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
+
+      if (!strncmp(ir->name, "gl_", 3)) {
+        setup_builtin_uniform_values(ir);
+      } else {
+        setup_uniform_values(ir->location, ir->type);
+      }
       break;
    }
+
+   reg->type = brw_type_for_base_type(ir->type);
+   hash_table_insert(this->variable_ht, reg, ir);
 }
 
 void
@@ -1606,6 +1737,7 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
    this->current_annotation = NULL;
 
    this->c = c;
+   this->vp = brw->vertex_program; /* FINISHME: change for precompile */
    this->prog_data = &c->prog_data;
 
    this->variable_ht = hash_table_ctor(0,
@@ -1615,6 +1747,12 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
    this->virtual_grf_sizes = NULL;
    this->virtual_grf_count = 0;
    this->virtual_grf_array_size = 0;
+
+   this->uniforms = 0;
+
+   this->variable_ht = hash_table_ctor(0,
+                                      hash_table_pointer_hash,
+                                      hash_table_pointer_compare);
 }
 
 vec4_visitor::~vec4_visitor()
index fb4fb146f8d3fea07a143638ebbb4ae62d606f77..ad909789d827be0824d7e9eddcaf0c8ce507c75d 100644 (file)
@@ -342,7 +342,7 @@ prepare_wm_pull_constants(struct brw_context *brw)
    constants = brw->wm.const_bo->virtual;
    for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
       constants[i] = convert_param(brw->wm.prog_data->pull_param_convert[i],
-                                  *brw->wm.prog_data->pull_param[i]);
+                                  brw->wm.prog_data->pull_param[i]);
    }
    drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
 
index e70454416bfb1a7b90403b8a7b240d43763d5f64..affa72c732418b0e99bb134a20ec222abb1ea9cf 100644 (file)
@@ -81,12 +81,21 @@ gen6_prepare_vs_push_constants(struct brw_context *brw)
         params_uploaded++;
       }
 
-      for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
-        if (brw->vs.constant_map[i] != -1) {
-           memcpy(param + brw->vs.constant_map[i] * 4,
-                  vp->program.Base.Parameters->ParameterValues[i],
-                  4 * sizeof(float));
-           params_uploaded++;
+      if (brw->vs.prog_data->uses_new_param_layout) {
+        for (i = 0; i < brw->vs.prog_data->nr_params; i++) {
+           *param = convert_param(brw->vs.prog_data->param_convert[i],
+                                  brw->vs.prog_data->param[i]);
+           param++;
+        }
+        params_uploaded += brw->vs.prog_data->nr_params / 4;
+      } else {
+        for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
+           if (brw->vs.constant_map[i] != -1) {
+              memcpy(param + brw->vs.constant_map[i] * 4,
+                     vp->program.Base.Parameters->ParameterValues[i],
+                     4 * sizeof(float));
+              params_uploaded++;
+           }
         }
       }
 
index 3d525248f258346eaf5f28c3f7d7f607766281eb..07e9995f53b80c3aa89e83243bf68179bc2413a8 100644 (file)
@@ -61,7 +61,7 @@ gen6_prepare_wm_push_constants(struct brw_context *brw)
 
       for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
         constants[i] = convert_param(brw->wm.prog_data->param_convert[i],
-                                     *brw->wm.prog_data->param[i]);
+                                     brw->wm.prog_data->param[i]);
       }
 
       if (0) {
index a102ca772b323dc19e6d9e9746b763f7a8235dea..1d80e96778ef2fc368e50f05c1c3b3a5e21ca291 100644 (file)
@@ -58,7 +58,7 @@ gen7_prepare_wm_constants(struct brw_context *brw)
 
       for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
         constants[i] = convert_param(brw->wm.prog_data->param_convert[i],
-                                     *brw->wm.prog_data->param[i]);
+                                     brw->wm.prog_data->param[i]);
       }
 
       if (0) {