st/mesa: add double input support including lowering (v3.1)
authorDave Airlie <airlied@redhat.com>
Fri, 20 Feb 2015 01:42:19 +0000 (11:42 +1000)
committerDave Airlie <airlied@redhat.com>
Fri, 8 May 2015 00:21:02 +0000 (10:21 +1000)
This takes a different approach to previously, we cannot index into the
inputMapping with anything but the mesa attribute index, so we can't use
the just add one to index trick, we need more info to add one to it
after we've mapped the input.

(Fixed copy propgation and cleaned up a little)

v2: drop float64 format check, just attr->Doubles.
merge enable patch.
v3: cleanup code a bit.
v3.1: minor review fixups (comment, newline) (Ilia)

Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
Signed-off-by: Dave Airlie <airlied@redhat.com>
src/mesa/state_tracker/st_atom_array.c
src/mesa/state_tracker/st_extensions.c
src/mesa/state_tracker/st_glsl_to_tgsi.cpp
src/mesa/state_tracker/st_program.c
src/mesa/state_tracker/st_program.h

index d4fb8b862f60abe5ea2b46bafaa1994b147157e4..56b8019a36fbccc8956fa43c13793866d8d2571d 100644 (file)
@@ -44,7 +44,6 @@
 
 #include "cso_cache/cso_context.h"
 #include "util/u_math.h"
-
 #include "main/bufferobj.h"
 #include "main/glformats.h"
 
@@ -311,6 +310,18 @@ st_pipe_vertex_format(GLenum type, GLuint size, GLenum format,
    return PIPE_FORMAT_NONE; /* silence compiler warning */
 }
 
+static const struct gl_client_array *
+get_client_array(const struct st_vertex_program *vp,
+                 const struct gl_client_array **arrays,
+                 int attr)
+{
+   const GLuint mesaAttr = vp->index_to_input[attr];
+   /* st_program uses 0xffffffff to denote a double placeholder attribute */
+   if (mesaAttr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
+      return NULL;
+   return arrays[mesaAttr];
+}
+
 /**
  * Examine the active arrays to determine if we have interleaved
  * vertex arrays all living in one VBO, or all living in user space.
@@ -327,11 +338,16 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
    GLboolean userSpaceBuffer = GL_FALSE;
 
    for (attr = 0; attr < vpv->num_inputs; attr++) {
-      const GLuint mesaAttr = vp->index_to_input[attr];
-      const struct gl_client_array *array = arrays[mesaAttr];
-      const struct gl_buffer_object *bufObj = array->BufferObj;
-      const GLsizei stride = array->StrideB; /* in bytes */
+      const struct gl_client_array *array;
+      const struct gl_buffer_object *bufObj;
+      GLsizei stride;
+
+      array = get_client_array(vp, arrays, attr);
+      if (!array)
+        continue;
 
+      stride = array->StrideB; /* in bytes */
+      bufObj = array->BufferObj;
       if (attr == 0) {
          /* save info about the first array */
          firstStride = stride;
@@ -358,6 +374,55 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
    return GL_TRUE;
 }
 
+static void init_velement(struct pipe_vertex_element *velement,
+                          int src_offset, int format,
+                          int instance_divisor, int vbo_index)
+{
+   velement->src_offset = src_offset;
+   velement->src_format = format;
+   velement->instance_divisor = instance_divisor;
+   velement->vertex_buffer_index = vbo_index;
+   assert(velement->src_format);
+}
+
+static void init_velement_lowered(struct st_context *st,
+                                  struct pipe_vertex_element *velements,
+                                  int src_offset, int format,
+                                  int instance_divisor, int vbo_index,
+                                  int nr_components, GLboolean doubles,
+                                  GLuint *attr_idx)
+{
+   int idx = *attr_idx;
+   if (doubles) {
+      int lower_format;
+
+      if (nr_components == 1)
+         lower_format = PIPE_FORMAT_R32G32_UINT;
+      else if (nr_components >= 2)
+         lower_format = PIPE_FORMAT_R32G32B32A32_UINT;
+
+      init_velement(&velements[idx], src_offset,
+                    lower_format, instance_divisor, vbo_index);
+      idx++;
+
+      if (nr_components > 2) {
+         if (nr_components == 3)
+            lower_format = PIPE_FORMAT_R32G32_UINT;
+         else if (nr_components >= 4)
+            lower_format = PIPE_FORMAT_R32G32B32A32_UINT;
+
+         init_velement(&velements[idx], src_offset + 4 * sizeof(float),
+                       lower_format, instance_divisor, vbo_index);
+         idx++;
+      }
+   } else {
+      init_velement(&velements[idx], src_offset,
+                    format, instance_divisor, vbo_index);
+      idx++;
+   }
+   *attr_idx = idx;
+}
+
 /**
  * Set up for drawing interleaved arrays that all live in one VBO
  * or all live in user space.
@@ -365,13 +430,15 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
  * \param velements  returns vertex element info
  */
 static boolean
-setup_interleaved_attribs(const struct st_vertex_program *vp,
+setup_interleaved_attribs(struct st_context *st,
+                          const struct st_vertex_program *vp,
                           const struct st_vp_variant *vpv,
                           const struct gl_client_array **arrays,
                           struct pipe_vertex_buffer *vbuffer,
-                          struct pipe_vertex_element velements[])
+                          struct pipe_vertex_element velements[],
+                          unsigned *num_velements)
 {
-   GLuint attr;
+   GLuint attr, attr_idx;
    const GLubyte *low_addr = NULL;
    GLboolean usingVBO;      /* all arrays in a VBO? */
    struct gl_buffer_object *bufobj;
@@ -381,8 +448,10 @@ setup_interleaved_attribs(const struct st_vertex_program *vp,
     * Init bufobj and stride.
     */
    if (vpv->num_inputs) {
-      const GLuint mesaAttr0 = vp->index_to_input[0];
-      const struct gl_client_array *array = arrays[mesaAttr0];
+      const struct gl_client_array *array;
+
+      array = get_client_array(vp, arrays, 0);
+      assert(array);
 
       /* Since we're doing interleaved arrays, we know there'll be at most
        * one buffer object and the stride will be the same for all arrays.
@@ -394,7 +463,11 @@ setup_interleaved_attribs(const struct st_vertex_program *vp,
       low_addr = arrays[vp->index_to_input[0]]->Ptr;
 
       for (attr = 1; attr < vpv->num_inputs; attr++) {
-         const GLubyte *start = arrays[vp->index_to_input[attr]]->Ptr;
+         const GLubyte *start;
+         array = get_client_array(vp, arrays, attr);
+         if (!array)
+            continue;
+         start = array->Ptr;
          low_addr = MIN2(low_addr, start);
       }
    }
@@ -408,25 +481,33 @@ setup_interleaved_attribs(const struct st_vertex_program *vp,
    /* are the arrays in user space? */
    usingVBO = _mesa_is_bufferobj(bufobj);
 
+   attr_idx = 0;
    for (attr = 0; attr < vpv->num_inputs; attr++) {
-      const GLuint mesaAttr = vp->index_to_input[attr];
-      const struct gl_client_array *array = arrays[mesaAttr];
-      unsigned src_offset = (unsigned) (array->Ptr - low_addr);
+      const struct gl_client_array *array;
+      unsigned src_offset;
+      unsigned src_format;
+
+      array = get_client_array(vp, arrays, attr);
+      if (!array)
+         continue;
 
+      src_offset = (unsigned) (array->Ptr - low_addr);
       assert(array->_ElementSize ==
              _mesa_bytes_per_vertex_attrib(array->Size, array->Type));
 
-      velements[attr].src_offset = src_offset;
-      velements[attr].instance_divisor = array->InstanceDivisor;
-      velements[attr].vertex_buffer_index = 0;
-      velements[attr].src_format = st_pipe_vertex_format(array->Type,
-                                                         array->Size,
-                                                         array->Format,
-                                                         array->Normalized,
-                                                         array->Integer);
-      assert(velements[attr].src_format);
+      src_format = st_pipe_vertex_format(array->Type,
+                                         array->Size,
+                                         array->Format,
+                                         array->Normalized,
+                                         array->Integer);
+
+      init_velement_lowered(st, velements, src_offset, src_format,
+                            array->InstanceDivisor, 0,
+                            array->Size, array->Doubles, &attr_idx);
    }
 
+   *num_velements = attr_idx;
+
    /*
     * Return the vbuffer info and setup user-space attrib info, if needed.
     */
@@ -472,17 +553,25 @@ setup_non_interleaved_attribs(struct st_context *st,
                               const struct st_vp_variant *vpv,
                               const struct gl_client_array **arrays,
                               struct pipe_vertex_buffer vbuffer[],
-                              struct pipe_vertex_element velements[])
+                              struct pipe_vertex_element velements[],
+                              unsigned *num_velements)
 {
    struct gl_context *ctx = st->ctx;
-   GLuint attr;
+   GLuint attr, attr_idx = 0;
 
    for (attr = 0; attr < vpv->num_inputs; attr++) {
       const GLuint mesaAttr = vp->index_to_input[attr];
-      const struct gl_client_array *array = arrays[mesaAttr];
-      struct gl_buffer_object *bufobj = array->BufferObj;
-      GLsizei stride = array->StrideB;
+      const struct gl_client_array *array;
+      struct gl_buffer_object *bufobj;
+      GLsizei stride;
+      unsigned src_format;
 
+      array = get_client_array(vp, arrays, attr);
+      if (!array)
+         continue;
+
+      stride = array->StrideB;
+      bufobj = array->BufferObj;
       assert(array->_ElementSize ==
              _mesa_bytes_per_vertex_attrib(array->Size, array->Type));
 
@@ -524,16 +613,19 @@ setup_non_interleaved_attribs(struct st_context *st,
       /* common-case setup */
       vbuffer[attr].stride = stride; /* in bytes */
 
-      velements[attr].src_offset = 0;
-      velements[attr].instance_divisor = array->InstanceDivisor;
-      velements[attr].vertex_buffer_index = attr;
-      velements[attr].src_format = st_pipe_vertex_format(array->Type,
-                                                         array->Size,
-                                                         array->Format,
-                                                         array->Normalized,
-                                                         array->Integer);
-      assert(velements[attr].src_format);
+      src_format = st_pipe_vertex_format(array->Type,
+                                         array->Size,
+                                         array->Format,
+                                         array->Normalized,
+                                         array->Integer);
+
+      init_velement_lowered(st, velements, 0, src_format,
+                            array->InstanceDivisor, attr,
+                            array->Size, array->Doubles, &attr_idx);
+
    }
+
+   *num_velements = attr_idx;
    return TRUE;
 }
 
@@ -563,25 +655,23 @@ static void update_array(struct st_context *st)
     * Setup the vbuffer[] and velements[] arrays.
     */
    if (is_interleaved_arrays(vp, vpv, arrays)) {
-      if (!setup_interleaved_attribs(vp, vpv, arrays, vbuffer, velements)) {
+      if (!setup_interleaved_attribs(st, vp, vpv, arrays, vbuffer, velements, &num_velements)) {
          st->vertex_array_out_of_memory = TRUE;
          return;
       }
 
       num_vbuffers = 1;
-      num_velements = vpv->num_inputs;
       if (num_velements == 0)
          num_vbuffers = 0;
    }
    else {
       if (!setup_non_interleaved_attribs(st, vp, vpv, arrays, vbuffer,
-                                         velements)) {
+                                         velements, &num_velements)) {
          st->vertex_array_out_of_memory = TRUE;
          return;
       }
 
       num_vbuffers = vpv->num_inputs;
-      num_velements = vpv->num_inputs;
    }
 
    cso_set_vertex_buffers(st->cso_context, 0, num_vbuffers, vbuffer);
index 82e4a30093f0012dc85e3b4859a75e879f6d9556..b1057f3eaddd2a57a1f4fb064b3d9672237329f4 100644 (file)
@@ -909,6 +909,8 @@ void st_init_extensions(struct pipe_screen *screen,
    if (screen->get_shader_param(screen, PIPE_SHADER_VERTEX,
                                 PIPE_SHADER_CAP_DOUBLES) &&
        screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
-                                PIPE_SHADER_CAP_DOUBLES))
+                                PIPE_SHADER_CAP_DOUBLES)) {
       extensions->ARB_gpu_shader_fp64 = GL_TRUE;
+      extensions->ARB_vertex_attrib_64bit = GL_TRUE;
+   }
 }
index 08957dc2e1d9cb675c19e7eddcfaa15d2790ad49..1fea8600a75d0fdc13dd5e47f2eca7450e8bbcb0 100644 (file)
@@ -88,6 +88,7 @@ public:
       this->reladdr = NULL;
       this->reladdr2 = NULL;
       this->has_index2 = false;
+      this->double_reg2 = false;
    }
 
    st_src_reg(gl_register_file file, int index, int type)
@@ -101,6 +102,7 @@ public:
       this->reladdr = NULL;
       this->reladdr2 = NULL;
       this->has_index2 = false;
+      this->double_reg2 = false;
    }
 
    st_src_reg(gl_register_file file, int index, int type, int index2D)
@@ -114,6 +116,7 @@ public:
       this->reladdr = NULL;
       this->reladdr2 = NULL;
       this->has_index2 = false;
+      this->double_reg2 = false;
    }
 
    st_src_reg()
@@ -127,6 +130,7 @@ public:
       this->reladdr = NULL;
       this->reladdr2 = NULL;
       this->has_index2 = false;
+      this->double_reg2 = false;
    }
 
    explicit st_src_reg(st_dst_reg reg);
@@ -141,6 +145,11 @@ public:
    st_src_reg *reladdr;
    st_src_reg *reladdr2;
    bool has_index2;
+   /*
+    * Is this the second half of a double register pair?
+    * currently used for input mapping only.
+    */
+   bool double_reg2;
 };
 
 class st_dst_reg {
@@ -197,6 +206,7 @@ st_src_reg::st_src_reg(st_dst_reg reg)
    this->index2D = 0;
    this->reladdr2 = NULL;
    this->has_index2 = false;
+   this->double_reg2 = false;
 }
 
 st_dst_reg::st_dst_reg(st_src_reg reg)
@@ -677,8 +687,10 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
 
             if (dinst->src[j].type == GLSL_TYPE_DOUBLE) {
                dinst->src[j].index = initial_src_idx[j];
-               if (swz > 1)
+               if (swz > 1) {
+                  dinst->src[j].double_reg2 = true;
                   dinst->src[j].index++;
+              }
 
                if (swz & 1)
                   dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
@@ -3705,6 +3717,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
             } else {
                if (first->src[0].file != copy_chan->src[0].file ||
                    first->src[0].index != copy_chan->src[0].index ||
+                   first->src[0].double_reg2 != copy_chan->src[0].double_reg2 ||
                    first->src[0].index2D != copy_chan->src[0].index2D) {
                   good = false;
                   break;
@@ -3720,6 +3733,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
             inst->src[r].index = first->src[0].index;
             inst->src[r].index2D = first->src[0].index2D;
             inst->src[r].has_index2 = first->src[0].has_index2;
+            inst->src[r].double_reg2 = first->src[0].double_reg2;
 
             int swizzle = 0;
             for (int i = 0; i < 4; i++) {
@@ -4552,6 +4566,9 @@ dst_register(struct st_translate *t,
 static struct ureg_src
 src_register(struct st_translate *t, const st_src_reg *reg)
 {
+   int index = reg->index;
+   int double_reg2 = reg->double_reg2 ? 1 : 0;
+
    switch(reg->file) {
    case PROGRAM_UNDEFINED:
       return ureg_imm4f(t->ureg, 0, 0, 0, 0);
@@ -4577,8 +4594,12 @@ src_register(struct st_translate *t, const st_src_reg *reg)
       return t->immediates[reg->index];
 
    case PROGRAM_INPUT:
-      assert(t->inputMapping[reg->index] < ARRAY_SIZE(t->inputs));
-      return t->inputs[t->inputMapping[reg->index]];
+      /* GLSL inputs are 64-bit containers, so we have to
+       * map back to the original index and add the offset after
+       * mapping. */
+      index -= double_reg2;
+      assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs));
+      return t->inputs[t->inputMapping[index] + double_reg2];
 
    case PROGRAM_OUTPUT:
       assert(t->outputMapping[reg->index] < ARRAY_SIZE(t->outputs));
index d93b3c7bcb86f5231352f0880b03fad4907352b0..a9110d3c674d54e186446a74e3839e0933a42b45 100644 (file)
@@ -194,6 +194,11 @@ st_prepare_vertex_program(struct gl_context *ctx,
          stvp->input_to_index[attr] = stvp->num_inputs;
          stvp->index_to_input[stvp->num_inputs] = attr;
          stvp->num_inputs++;
+         if ((stvp->Base.Base.DoubleInputsRead & BITFIELD64_BIT(attr)) != 0) {
+            /* add placeholder for second part of a double attribute */
+            stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
+            stvp->num_inputs++;
+         }
       }
    }
    /* bit of a hack, presetup potentially unused edgeflag input */
index b2c86faecb7d411b295911ecf15e5a64d85237ae..a2c56062d6e1a5b2309f2f3d9d5157493e483a67 100644 (file)
@@ -45,6 +45,7 @@
 extern "C" {
 #endif
 
+#define ST_DOUBLE_ATTRIB_PLACEHOLDER 0xffffffff
 
 /** Fragment program variant key */
 struct st_fp_variant_key