Merge branch 'gallium-userbuf'
[mesa.git] / src / mesa / state_tracker / st_draw.c
index 34f75a379693f50ddd016a48c2faf5bf124c4c09..a8c20f45acda0f58e9a20347a5a85b84eefb2058 100644 (file)
@@ -33,7 +33,7 @@
  *
  * We basically convert the VBO's vertex attribute/array information into
  * Gallium vertex state, bind the vertex buffer objects and call
- * pipe->draw_elements(), pipe->draw_range_elements() or pipe->draw_arrays().
+ * pipe->draw_vbo().
  *
  * Authors:
  *   Keith Whitwell <keith@tungstengraphics.com>
 
 #include "main/imports.h"
 #include "main/image.h"
+#include "main/bufferobj.h"
 #include "main/macros.h"
 #include "main/mfeatures.h"
-#include "program/prog_uniform.h"
 
 #include "vbo/vbo.h"
 
 #include "st_context.h"
 #include "st_atom.h"
 #include "st_cb_bufferobjects.h"
+#include "st_cb_xformfb.h"
 #include "st_draw.h"
 #include "st_program.h"
 
 #include "util/u_format.h"
 #include "util/u_prim.h"
 #include "util/u_draw_quad.h"
+#include "util/u_upload_mgr.h"
 #include "draw/draw_context.h"
 #include "cso_cache/cso_context.h"
 
+#include "../glsl/ir_uniform.h"
+
 
 static GLuint double_types[4] = {
    PIPE_FORMAT_R64_FLOAT,
@@ -99,6 +103,13 @@ static GLuint uint_types_scale[4] = {
    PIPE_FORMAT_R32G32B32A32_USCALED
 };
 
+static GLuint uint_types_int[4] = {
+   PIPE_FORMAT_R32_UINT,
+   PIPE_FORMAT_R32G32_UINT,
+   PIPE_FORMAT_R32G32B32_UINT,
+   PIPE_FORMAT_R32G32B32A32_UINT
+};
+
 static GLuint int_types_norm[4] = {
    PIPE_FORMAT_R32_SNORM,
    PIPE_FORMAT_R32G32_SNORM,
@@ -113,6 +124,13 @@ static GLuint int_types_scale[4] = {
    PIPE_FORMAT_R32G32B32A32_SSCALED
 };
 
+static GLuint int_types_int[4] = {
+   PIPE_FORMAT_R32_SINT,
+   PIPE_FORMAT_R32G32_SINT,
+   PIPE_FORMAT_R32G32B32_SINT,
+   PIPE_FORMAT_R32G32B32A32_SINT
+};
+
 static GLuint ushort_types_norm[4] = {
    PIPE_FORMAT_R16_UNORM,
    PIPE_FORMAT_R16G16_UNORM,
@@ -127,6 +145,13 @@ static GLuint ushort_types_scale[4] = {
    PIPE_FORMAT_R16G16B16A16_USCALED
 };
 
+static GLuint ushort_types_int[4] = {
+   PIPE_FORMAT_R16_UINT,
+   PIPE_FORMAT_R16G16_UINT,
+   PIPE_FORMAT_R16G16B16_UINT,
+   PIPE_FORMAT_R16G16B16A16_UINT
+};
+
 static GLuint short_types_norm[4] = {
    PIPE_FORMAT_R16_SNORM,
    PIPE_FORMAT_R16G16_SNORM,
@@ -141,6 +166,13 @@ static GLuint short_types_scale[4] = {
    PIPE_FORMAT_R16G16B16A16_SSCALED
 };
 
+static GLuint short_types_int[4] = {
+   PIPE_FORMAT_R16_SINT,
+   PIPE_FORMAT_R16G16_SINT,
+   PIPE_FORMAT_R16G16B16_SINT,
+   PIPE_FORMAT_R16G16B16A16_SINT
+};
+
 static GLuint ubyte_types_norm[4] = {
    PIPE_FORMAT_R8_UNORM,
    PIPE_FORMAT_R8G8_UNORM,
@@ -155,6 +187,13 @@ static GLuint ubyte_types_scale[4] = {
    PIPE_FORMAT_R8G8B8A8_USCALED
 };
 
+static GLuint ubyte_types_int[4] = {
+   PIPE_FORMAT_R8_UINT,
+   PIPE_FORMAT_R8G8_UINT,
+   PIPE_FORMAT_R8G8B8_UINT,
+   PIPE_FORMAT_R8G8B8A8_UINT
+};
+
 static GLuint byte_types_norm[4] = {
    PIPE_FORMAT_R8_SNORM,
    PIPE_FORMAT_R8G8_SNORM,
@@ -169,6 +208,13 @@ static GLuint byte_types_scale[4] = {
    PIPE_FORMAT_R8G8B8A8_SSCALED
 };
 
+static GLuint byte_types_int[4] = {
+   PIPE_FORMAT_R8_SINT,
+   PIPE_FORMAT_R8G8_SINT,
+   PIPE_FORMAT_R8G8B8_SINT,
+   PIPE_FORMAT_R8G8B8A8_SINT
+};
+
 static GLuint fixed_types[4] = {
    PIPE_FORMAT_R32_FIXED,
    PIPE_FORMAT_R32G32_FIXED,
@@ -181,16 +227,50 @@ static GLuint fixed_types[4] = {
 /**
  * Return a PIPE_FORMAT_x for the given GL datatype and size.
  */
-GLuint
+enum pipe_format
 st_pipe_vertex_format(GLenum type, GLuint size, GLenum format,
-                      GLboolean normalized)
+                      GLboolean normalized, GLboolean integer)
 {
    assert((type >= GL_BYTE && type <= GL_DOUBLE) ||
-          type == GL_FIXED || type == GL_HALF_FLOAT);
+          type == GL_FIXED || type == GL_HALF_FLOAT ||
+          type == GL_INT_2_10_10_10_REV ||
+          type == GL_UNSIGNED_INT_2_10_10_10_REV);
    assert(size >= 1);
    assert(size <= 4);
    assert(format == GL_RGBA || format == GL_BGRA);
 
+   if (type == GL_INT_2_10_10_10_REV ||
+       type == GL_UNSIGNED_INT_2_10_10_10_REV) {
+      assert(size == 4);
+      assert(!integer);
+
+      if (format == GL_BGRA) {
+         if (type == GL_INT_2_10_10_10_REV) {
+            if (normalized)
+               return PIPE_FORMAT_B10G10R10A2_SNORM;
+            else
+               return PIPE_FORMAT_B10G10R10A2_SSCALED;
+         } else {
+            if (normalized)
+               return PIPE_FORMAT_B10G10R10A2_UNORM;
+            else
+               return PIPE_FORMAT_B10G10R10A2_USCALED;
+         }
+      } else {
+         if (type == GL_INT_2_10_10_10_REV) {
+            if (normalized)
+               return PIPE_FORMAT_R10G10B10A2_SNORM;
+            else
+               return PIPE_FORMAT_R10G10B10A2_SSCALED;
+         } else {
+            if (normalized)
+               return PIPE_FORMAT_R10G10B10A2_UNORM;
+            else
+               return PIPE_FORMAT_R10G10B10A2_USCALED;
+         }
+      }
+   }
+
    if (format == GL_BGRA) {
       /* this is an odd-ball case */
       assert(type == GL_UNSIGNED_BYTE);
@@ -198,7 +278,18 @@ st_pipe_vertex_format(GLenum type, GLuint size, GLenum format,
       return PIPE_FORMAT_B8G8R8A8_UNORM;
    }
 
-   if (normalized) {
+   if (integer) {
+      switch (type) {
+      case GL_INT: return int_types_int[size-1];
+      case GL_SHORT: return short_types_int[size-1];
+      case GL_BYTE: return byte_types_int[size-1];
+      case GL_UNSIGNED_INT: return uint_types_int[size-1];
+      case GL_UNSIGNED_SHORT: return ushort_types_int[size-1];
+      case GL_UNSIGNED_BYTE: return ubyte_types_int[size-1];
+      default: assert(0); return 0;
+      }
+   }
+   else if (normalized) {
       switch (type) {
       case GL_DOUBLE: return double_types[size-1];
       case GL_FLOAT: return float_types[size-1];
@@ -211,7 +302,7 @@ st_pipe_vertex_format(GLenum type, GLuint size, GLenum format,
       case GL_UNSIGNED_BYTE: return ubyte_types_norm[size-1];
       case GL_FIXED: return fixed_types[size-1];
       default: assert(0); return 0;
-      }      
+      }
    }
    else {
       switch (type) {
@@ -226,19 +317,34 @@ st_pipe_vertex_format(GLenum type, GLuint size, GLenum format,
       case GL_UNSIGNED_BYTE: return ubyte_types_scale[size-1];
       case GL_FIXED: return fixed_types[size-1];
       default: assert(0); return 0;
-      }      
+      }
    }
-   return 0; /* silence compiler warning */
+   return PIPE_FORMAT_NONE; /* silence compiler warning */
 }
 
 
+/**
+ * This is very similar to vbo_all_varyings_in_vbos() but we are
+ * only interested in per-vertex data.  See bug 38626.
+ */
+static GLboolean
+all_varyings_in_vbos(const struct gl_client_array *arrays[])
+{
+   GLuint i;
+   
+   for (i = 0; i < VERT_ATTRIB_MAX; i++)
+      if (arrays[i]->StrideB &&
+          !arrays[i]->InstanceDivisor &&
+          !_mesa_is_bufferobj(arrays[i]->BufferObj))
+        return GL_FALSE;
 
+   return GL_TRUE;
+}
 
 
 /**
  * Examine the active arrays to determine if we have interleaved
  * vertex arrays all living in one VBO, or all living in user space.
- * \param userSpace  returns whether the arrays are in user space.
  */
 static GLboolean
 is_interleaved_arrays(const struct st_vertex_program *vp,
@@ -248,37 +354,35 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
    GLuint attr;
    const struct gl_buffer_object *firstBufObj = NULL;
    GLint firstStride = -1;
-   const GLubyte *client_addr = NULL;
+   const GLubyte *firstPtr = NULL;
+   GLboolean userSpaceBuffer = GL_FALSE;
 
    for (attr = 0; attr < vpv->num_inputs; attr++) {
       const GLuint mesaAttr = vp->index_to_input[attr];
-      const struct gl_buffer_object *bufObj = arrays[mesaAttr]->BufferObj;
-      const GLsizei stride = arrays[mesaAttr]->StrideB; /* in bytes */
+      const struct gl_client_array *array = arrays[mesaAttr];
+      const struct gl_buffer_object *bufObj = array->BufferObj;
+      const GLsizei stride = array->StrideB; /* in bytes */
 
-      if (firstStride < 0) {
+      if (attr == 0) {
+         /* save info about the first array */
          firstStride = stride;
-      }
-      else if (firstStride != stride) {
-         return GL_FALSE;
-      }
-
-      if (!bufObj || !bufObj->Name) {
-         /* Try to detect if the client-space arrays are
-          * "close" to each other.
-          */
-         if (!client_addr) {
-            client_addr = arrays[mesaAttr]->Ptr;
-         }
-         else if (abs(arrays[mesaAttr]->Ptr - client_addr) > firstStride) {
-            /* arrays start too far apart */
-            return GL_FALSE;
-         }
-      }
-      else if (!firstBufObj) {
+         firstPtr = array->Ptr;         
          firstBufObj = bufObj;
+         userSpaceBuffer = !bufObj || !bufObj->Name;
       }
-      else if (bufObj != firstBufObj) {
-         return GL_FALSE;
+      else {
+         /* check if other arrays interleave with the first, in same buffer */
+         if (stride != firstStride)
+            return GL_FALSE; /* strides don't match */
+
+         if (bufObj != firstBufObj)
+            return GL_FALSE; /* arrays in different VBOs */
+
+         if (abs(array->Ptr - firstPtr) > firstStride)
+            return GL_FALSE; /* arrays start too far apart */
+
+         if ((!_mesa_is_bufferobj(bufObj)) != userSpaceBuffer)
+            return GL_FALSE; /* mix of VBO and user-space arrays */
       }
    }
 
@@ -291,60 +395,105 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
  * or all live in user space.
  * \param vbuffer  returns vertex buffer info
  * \param velements  returns vertex element info
+ * \return GL_TRUE for success, GL_FALSE otherwise (probably out of memory)
  */
-static void
+static GLboolean
 setup_interleaved_attribs(struct gl_context *ctx,
                           const struct st_vertex_program *vp,
                           const struct st_vp_variant *vpv,
                           const struct gl_client_array **arrays,
                           struct pipe_vertex_buffer *vbuffer,
-                          struct pipe_vertex_element velements[],
-                          unsigned max_index)
+                          struct pipe_vertex_element velements[])
 {
-   struct st_context *st = st_context(ctx);
-   struct pipe_context *pipe = st->pipe;
    GLuint attr;
    const GLubyte *low_addr = NULL;
+   GLboolean usingVBO;      /* all arrays in a VBO? */
+   struct gl_buffer_object *bufobj;
+   GLsizei stride;
 
-   /* Find the lowest address. */
-   for (attr = 0; attr < vpv->num_inputs; attr++) {
-      const GLubyte *start = arrays[vp->index_to_input[attr]]->Ptr;
-
-      low_addr = !low_addr ? start : MIN2(low_addr, start);
+   /* Find the lowest address of the arrays we're drawing,
+    * Init bufobj and stride.
+    */
+   if (vpv->num_inputs) {
+      const GLuint mesaAttr0 = vp->index_to_input[0];
+      const struct gl_client_array *array = arrays[mesaAttr0];
+
+      /* Since we're doing interleaved arrays, we know there'll be at most
+       * one buffer object and the stride will be the same for all arrays.
+       * Grab them now.
+       */
+      bufobj = array->BufferObj;
+      stride = array->StrideB;
+
+      low_addr = arrays[vp->index_to_input[0]]->Ptr;
+
+      for (attr = 1; attr < vpv->num_inputs; attr++) {
+         const GLubyte *start = arrays[vp->index_to_input[attr]]->Ptr;
+         low_addr = MIN2(low_addr, start);
+      }
+   }
+   else {
+      /* not sure we'll ever have zero inputs, but play it safe */
+      bufobj = NULL;
+      stride = 0;
+      low_addr = 0;
    }
 
+   /* are the arrays in user space? */
+   usingVBO = _mesa_is_bufferobj(bufobj);
+
    for (attr = 0; attr < vpv->num_inputs; attr++) {
       const GLuint mesaAttr = vp->index_to_input[attr];
-      struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj;
-      struct st_buffer_object *stobj = st_buffer_object(bufobj);
-      GLsizei stride = arrays[mesaAttr]->StrideB;
+      const struct gl_client_array *array = arrays[mesaAttr];
+      unsigned src_offset = (unsigned) (array->Ptr - low_addr);
+      GLuint element_size = array->_ElementSize;
 
-      if (attr == 0) {
-         if (bufobj && bufobj->Name) {
-            vbuffer->buffer = NULL;
-            pipe_resource_reference(&vbuffer->buffer, stobj->buffer);
-            vbuffer->buffer_offset = pointer_to_offset(low_addr);
-         } else {
-            vbuffer->buffer =
-               pipe_user_buffer_create(pipe->screen, (void*)low_addr,
-                                       stride * (max_index + 1),
-                                      PIPE_BIND_VERTEX_BUFFER);
-            vbuffer->buffer_offset = 0;
-         }
-         vbuffer->stride = stride; /* in bytes */
-      }
+      assert(element_size == array->Size * _mesa_sizeof_type(array->Type));
 
-      velements[attr].src_offset =
-         (unsigned) (arrays[mesaAttr]->Ptr - low_addr);
-      velements[attr].instance_divisor = arrays[mesaAttr]->InstanceDivisor;
+      velements[attr].src_offset = src_offset;
+      velements[attr].instance_divisor = array->InstanceDivisor;
       velements[attr].vertex_buffer_index = 0;
-      velements[attr].src_format =
-         st_pipe_vertex_format(arrays[mesaAttr]->Type,
-                               arrays[mesaAttr]->Size,
-                               arrays[mesaAttr]->Format,
-                               arrays[mesaAttr]->Normalized);
+      velements[attr].src_format = st_pipe_vertex_format(array->Type,
+                                                         array->Size,
+                                                         array->Format,
+                                                         array->Normalized,
+                                                         array->Integer);
       assert(velements[attr].src_format);
    }
+
+   /*
+    * Return the vbuffer info and setup user-space attrib info, if needed.
+    */
+   if (vpv->num_inputs == 0) {
+      /* just defensive coding here */
+      vbuffer->buffer = NULL;
+      vbuffer->user_buffer = NULL;
+      vbuffer->buffer_offset = 0;
+      vbuffer->stride = 0;
+   }
+   else if (usingVBO) {
+      /* all interleaved arrays in a VBO */
+      struct st_buffer_object *stobj = st_buffer_object(bufobj);
+
+      if (!stobj || !stobj->buffer) {
+         /* probably out of memory (or zero-sized buffer) */
+         return GL_FALSE;
+      }
+
+      vbuffer->buffer = stobj->buffer;
+      vbuffer->user_buffer = NULL;
+      vbuffer->buffer_offset = pointer_to_offset(low_addr);
+      vbuffer->stride = stride;
+   }
+   else {
+      /* all interleaved arrays in user memory */
+      vbuffer->buffer = NULL;
+      vbuffer->user_buffer = low_addr;
+      vbuffer->buffer_offset = 0;
+      vbuffer->stride = stride;
+   }
+
+   return GL_TRUE;
 }
 
 
@@ -353,57 +502,59 @@ setup_interleaved_attribs(struct gl_context *ctx,
  * vertex attribute.
  * \param vbuffer  returns vertex buffer info
  * \param velements  returns vertex element info
+ * \return GL_TRUE for success, GL_FALSE otherwise (probably out of memory)
  */
-static void
+static GLboolean
 setup_non_interleaved_attribs(struct gl_context *ctx,
                               const struct st_vertex_program *vp,
                               const struct st_vp_variant *vpv,
                               const struct gl_client_array **arrays,
                               struct pipe_vertex_buffer vbuffer[],
-                              struct pipe_vertex_element velements[],
-                              unsigned max_index)
+                              struct pipe_vertex_element velements[])
 {
-   struct st_context *st = st_context(ctx);
-   struct pipe_context *pipe = st->pipe;
    GLuint attr;
 
    for (attr = 0; attr < vpv->num_inputs; attr++) {
       const GLuint mesaAttr = vp->index_to_input[attr];
-      struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj;
-      GLsizei stride = arrays[mesaAttr]->StrideB;
+      const struct gl_client_array *array = arrays[mesaAttr];
+      struct gl_buffer_object *bufobj = array->BufferObj;
+      GLsizei stride = array->StrideB;
+
+      assert(array->_ElementSize == array->Size * _mesa_sizeof_type(array->Type));
 
-      if (bufobj && bufobj->Name) {
+      if (_mesa_is_bufferobj(bufobj)) {
          /* Attribute data is in a VBO.
           * Recall that for VBOs, the gl_client_array->Ptr field is
           * really an offset from the start of the VBO, not a pointer.
           */
          struct st_buffer_object *stobj = st_buffer_object(bufobj);
-         assert(stobj->buffer);
 
-         vbuffer[attr].buffer = NULL;
-         pipe_resource_reference(&vbuffer[attr].buffer, stobj->buffer);
-         vbuffer[attr].buffer_offset = pointer_to_offset(arrays[mesaAttr]->Ptr);
+         if (!stobj || !stobj->buffer) {
+            /* probably out of memory (or zero-sized buffer) */
+            return GL_FALSE;
+         }
+
+         vbuffer[attr].buffer = stobj->buffer;
+         vbuffer[attr].user_buffer = NULL;
+         vbuffer[attr].buffer_offset = pointer_to_offset(array->Ptr);
       }
       else {
          /* wrap user data */
-         if (arrays[mesaAttr]->Ptr) {
-            vbuffer[attr].buffer = 
-              pipe_user_buffer_create(pipe->screen,
-                                      (void *) arrays[mesaAttr]->Ptr,
-                                      stride * (max_index + 1),
-                                      PIPE_BIND_VERTEX_BUFFER);
+         void *ptr;
+
+         if (array->Ptr) {
+            ptr = (void *) array->Ptr;
          }
          else {
             /* no array, use ctx->Current.Attrib[] value */
-            uint bytes = sizeof(ctx->Current.Attrib[0]);
-            vbuffer[attr].buffer = 
-              pipe_user_buffer_create(pipe->screen,
-                                      (void *) ctx->Current.Attrib[mesaAttr],
-                                      bytes,
-                                      PIPE_BIND_VERTEX_BUFFER);
+            ptr = (void *) ctx->Current.Attrib[mesaAttr];
             stride = 0;
          }
 
+         assert(ptr);
+
+         vbuffer[attr].buffer = NULL;
+         vbuffer[attr].user_buffer = ptr;
          vbuffer[attr].buffer_offset = 0;
       }
 
@@ -411,62 +562,49 @@ setup_non_interleaved_attribs(struct gl_context *ctx,
       vbuffer[attr].stride = stride; /* in bytes */
 
       velements[attr].src_offset = 0;
-      velements[attr].instance_divisor = arrays[mesaAttr]->InstanceDivisor;
+      velements[attr].instance_divisor = array->InstanceDivisor;
       velements[attr].vertex_buffer_index = attr;
-      velements[attr].src_format
-         = st_pipe_vertex_format(arrays[mesaAttr]->Type,
-                                 arrays[mesaAttr]->Size,
-                                 arrays[mesaAttr]->Format,
-                                 arrays[mesaAttr]->Normalized);
+      velements[attr].src_format = st_pipe_vertex_format(array->Type,
+                                                         array->Size,
+                                                         array->Format,
+                                                         array->Normalized,
+                                                         array->Integer);
       assert(velements[attr].src_format);
    }
+
+   return GL_TRUE;
 }
 
 
 static void
-setup_index_buffer(struct gl_context *ctx,
+setup_index_buffer(struct st_context *st,
                    const struct _mesa_index_buffer *ib,
                    struct pipe_index_buffer *ibuffer)
 {
-   struct st_context *st = st_context(ctx);
-   struct pipe_context *pipe = st->pipe;
+   struct gl_buffer_object *bufobj = ib->obj;
 
-   memset(ibuffer, 0, sizeof(*ibuffer));
-   if (ib) {
-      struct gl_buffer_object *bufobj = ib->obj;
-
-      switch (ib->type) {
-      case GL_UNSIGNED_INT:
-         ibuffer->index_size = 4;
-         break;
-      case GL_UNSIGNED_SHORT:
-         ibuffer->index_size = 2;
-         break;
-      case GL_UNSIGNED_BYTE:
-         ibuffer->index_size = 1;
-         break;
-      default:
-         assert(0);
-        return;
-      }
+   ibuffer->index_size = vbo_sizeof_ib_type(ib->type);
 
-      /* get/create the index buffer object */
-      if (bufobj && bufobj->Name) {
-         /* elements/indexes are in a real VBO */
-         struct st_buffer_object *stobj = st_buffer_object(bufobj);
-         pipe_resource_reference(&ibuffer->buffer, stobj->buffer);
-         ibuffer->offset = pointer_to_offset(ib->ptr);
-      }
-      else {
-         /* element/indicies are in user space memory */
-         ibuffer->buffer =
-            pipe_user_buffer_create(pipe->screen, (void *) ib->ptr,
-                                    ib->count * ibuffer->index_size,
-                                    PIPE_BIND_INDEX_BUFFER);
-      }
+   /* get/create the index buffer object */
+   if (_mesa_is_bufferobj(bufobj)) {
+      /* indices are in a real VBO */
+      ibuffer->buffer = st_buffer_object(bufobj)->buffer;
+      ibuffer->offset = pointer_to_offset(ib->ptr);
    }
+   else if (st->indexbuf_uploader) {
+      u_upload_data(st->indexbuf_uploader, 0, ib->count * ibuffer->index_size,
+                    ib->ptr, &ibuffer->offset, &ibuffer->buffer);
+      u_upload_unmap(st->indexbuf_uploader);
+   }
+   else {
+      /* indices are in user space memory */
+      ibuffer->user_buffer = ib->ptr;
+   }
+
+   cso_set_index_buffer(st->cso_context, ibuffer);
 }
 
+
 /**
  * Prior to drawing, check that any uniforms referenced by the
  * current shader have been set.  If a uniform has not been set,
@@ -488,18 +626,187 @@ check_uniforms(struct gl_context *ctx)
       if (shProg[j] == NULL || !shProg[j]->LinkStatus)
         continue;
 
-      for (i = 0; i < shProg[j]->Uniforms->NumUniforms; i++) {
-         const struct gl_uniform *u = &shProg[j]->Uniforms->Uniforms[i];
-         if (!u->Initialized) {
+      for (i = 0; i < shProg[j]->NumUserUniformStorage; i++) {
+         const struct gl_uniform_storage *u = &shProg[j]->UniformStorage[i];
+         if (!u->initialized) {
             _mesa_warning(ctx,
                           "Using shader with uninitialized uniform: %s",
-                          u->Name);
+                          u->name);
          }
       }
    }
 }
 
 
+/*
+ * Notes on primitive restart:
+ * The code below is used when the gallium driver does not support primitive
+ * restart itself.  We map the index buffer, find the restart indexes, unmap
+ * the index buffer then draw the sub-primitives delineated by the restarts.
+ * A couple possible optimizations:
+ * 1. Save the list of sub-primitive (start, count) values in a list attached
+ *    to the index buffer for re-use in subsequent draws.  The list would be
+ *    invalidated when the contents of the buffer changed.
+ * 2. If drawing triangle strips or quad strips, create a new index buffer
+ *    that uses duplicated vertices to render the disjoint strips as one
+ *    long strip.  We'd have to be careful to avoid using too much memory
+ *    for this.
+ * Finally, some apps might perform better if they don't use primitive restart
+ * at all rather than this fallback path.  Set MESA_EXTENSION_OVERRIDE to
+ * "-GL_NV_primitive_restart" to test that.
+ */
+
+
+struct sub_primitive
+{
+   unsigned start, count;
+};
+
+
+/**
+ * Scan the elements array to find restart indexes.  Return a list
+ * of primitive (start,count) pairs to indicate how to draw the sub-
+ * primitives delineated by the restart index.
+ */
+static struct sub_primitive *
+find_sub_primitives(const void *elements, unsigned element_size,
+                    unsigned start, unsigned end, unsigned restart_index,
+                    unsigned *num_sub_prims)
+{
+   const unsigned max_prims = end - start;
+   struct sub_primitive *sub_prims;
+   unsigned i, cur_start, cur_count, num;
+
+   sub_prims = (struct sub_primitive *)
+      malloc(max_prims * sizeof(struct sub_primitive));
+
+   if (!sub_prims) {
+      *num_sub_prims = 0;
+      return NULL;
+   }
+
+   cur_start = start;
+   cur_count = 0;
+   num = 0;
+
+#define SCAN_ELEMENTS(TYPE) \
+   for (i = start; i < end; i++) { \
+      if (((const TYPE *) elements)[i] == restart_index) { \
+         if (cur_count > 0) { \
+            assert(num < max_prims); \
+            sub_prims[num].start = cur_start; \
+            sub_prims[num].count = cur_count; \
+            num++; \
+         } \
+         cur_start = i + 1; \
+         cur_count = 0; \
+      } \
+      else { \
+         cur_count++; \
+      } \
+   } \
+   if (cur_count > 0) { \
+      assert(num < max_prims); \
+      sub_prims[num].start = cur_start; \
+      sub_prims[num].count = cur_count; \
+      num++; \
+   }
+
+   switch (element_size) {
+   case 1:
+      SCAN_ELEMENTS(ubyte);
+      break;
+   case 2:
+      SCAN_ELEMENTS(ushort);
+      break;
+   case 4:
+      SCAN_ELEMENTS(uint);
+      break;
+   default:
+      assert(0 && "bad index_size in find_sub_primitives()");
+   }
+
+#undef SCAN_ELEMENTS
+
+   *num_sub_prims = num;
+
+   return sub_prims;
+}
+
+
+/**
+ * For gallium drivers that don't support the primitive restart
+ * feature, handle it here by breaking up the indexed primitive into
+ * sub-primitives.
+ */
+static void
+handle_fallback_primitive_restart(struct cso_context *cso,
+                                  struct pipe_context *pipe,
+                                  const struct _mesa_index_buffer *ib,
+                                  struct pipe_index_buffer *ibuffer,
+                                  struct pipe_draw_info *orig_info)
+{
+   const unsigned start = orig_info->start;
+   const unsigned count = orig_info->count;
+   struct pipe_draw_info info = *orig_info;
+   struct pipe_transfer *transfer = NULL;
+   unsigned instance, i;
+   const void *ptr = NULL;
+   struct sub_primitive *sub_prims;
+   unsigned num_sub_prims;
+
+   assert(info.indexed);
+   assert(ibuffer->buffer || ibuffer->user_buffer);
+   assert(ib);
+
+   if (!ibuffer->buffer || !ibuffer->user_buffer || !ib)
+      return;
+
+   info.primitive_restart = FALSE;
+   info.instance_count = 1;
+
+   if (_mesa_is_bufferobj(ib->obj)) {
+      ptr = pipe_buffer_map_range(pipe, ibuffer->buffer,
+                                  start * ibuffer->index_size, /* start */
+                                  count * ibuffer->index_size, /* length */
+                                  PIPE_TRANSFER_READ, &transfer);
+      if (!ptr)
+         return;
+
+      ptr = (uint8_t*)ptr + (ibuffer->offset - start * ibuffer->index_size);
+   }
+   else {
+      ptr = ib->ptr;
+      if (!ptr)
+         return;
+   }
+
+   sub_prims = find_sub_primitives(ptr, ibuffer->index_size,
+                                   0, count, orig_info->restart_index,
+                                   &num_sub_prims);
+
+   if (transfer)
+      pipe_buffer_unmap(pipe, transfer);
+
+   /* Now draw the sub primitives.
+    * Need to loop over instances as well to preserve draw order.
+    */
+   for (instance = 0; instance < orig_info->instance_count; instance++) {
+      info.start_instance = instance + orig_info->start_instance;
+      for (i = 0; i < num_sub_prims; i++) {
+         info.start = sub_prims[i].start;
+         info.count = sub_prims[i].count;
+         if (u_trim_pipe_prim(info.mode, &info.count)) {
+            cso_draw_vbo(cso, &info);
+         }
+      }
+   }
+
+   if (sub_prims)
+      free(sub_prims);
+}
+
+
 /**
  * Translate OpenGL primtive type (GL_POINTS, GL_TRIANGLE_STRIP, etc) to
  * the corresponding Gallium type.
@@ -513,8 +820,8 @@ translate_prim(const struct gl_context *ctx, unsigned prim)
    assert(GL_TRIANGLE_STRIP_ADJACENCY == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY);
 
    /* Avoid quadstrips if it's easy to do so:
-    * Note: it's imporant to do the correct trimming if we change the prim type!
-    * We do that wherever this function is called.
+    * Note: it's important to do the correct trimming if we change the
+    * prim type!  We do that wherever this function is called.
     */
    if (prim == GL_QUAD_STRIP &&
        ctx->Light.ShadeModel != GL_FLAT &&
@@ -526,10 +833,13 @@ translate_prim(const struct gl_context *ctx, unsigned prim)
 }
 
 
-static void
+/**
+ * Setup vertex arrays and buffers prior to drawing.
+ * \return GL_TRUE for success, GL_FALSE otherwise (probably out of memory)
+ */
+static GLboolean
 st_validate_varrays(struct gl_context *ctx,
-                    const struct gl_client_array **arrays,
-                    unsigned max_index)
+                    const struct gl_client_array **arrays)
 {
    struct st_context *st = st_context(ctx);
    const struct st_vertex_program *vp;
@@ -537,27 +847,33 @@ st_validate_varrays(struct gl_context *ctx,
    struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS];
    struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
    unsigned num_vbuffers, num_velements;
-   GLuint attr;
 
    /* must get these after state validation! */
    vp = st->vp;
    vpv = st->vp_variant;
 
    memset(velements, 0, sizeof(struct pipe_vertex_element) * vpv->num_inputs);
+
    /*
     * Setup the vbuffer[] and velements[] arrays.
     */
    if (is_interleaved_arrays(vp, vpv, arrays)) {
-      setup_interleaved_attribs(ctx, vp, vpv, arrays, vbuffer, velements,
-                                max_index);
+      if (!setup_interleaved_attribs(ctx, vp, vpv, arrays, vbuffer,
+                                     velements)) {
+         return GL_FALSE;
+      }
+
       num_vbuffers = 1;
       num_velements = vpv->num_inputs;
       if (num_velements == 0)
          num_vbuffers = 0;
    }
    else {
-      setup_non_interleaved_attribs(ctx, vp, vpv, arrays,
-                                    vbuffer, velements, max_index);
+      if (!setup_non_interleaved_attribs(ctx, vp, vpv, arrays,
+                                         vbuffer, velements)) {
+         return GL_FALSE;
+      }
+
       num_vbuffers = vpv->num_inputs;
       num_velements = vpv->num_inputs;
    }
@@ -565,13 +881,7 @@ st_validate_varrays(struct gl_context *ctx,
    cso_set_vertex_buffers(st->cso_context, num_vbuffers, vbuffer);
    cso_set_vertex_elements(st->cso_context, num_velements, velements);
 
-   /* unreference buffers (frees wrapped user-space buffer objects)
-    * This is OK, because the pipe driver should reference buffers by itself
-    * in set_vertex_buffers. */
-   for (attr = 0; attr < num_vbuffers; attr++) {
-      pipe_resource_reference(&vbuffer[attr].buffer, NULL);
-      assert(!vbuffer[attr].buffer);
-   }
+   return GL_TRUE;
 }
 
 
@@ -582,48 +892,37 @@ st_validate_varrays(struct gl_context *ctx,
  */
 void
 st_draw_vbo(struct gl_context *ctx,
-            const struct gl_client_array **arrays,
             const struct _mesa_prim *prims,
             GLuint nr_prims,
             const struct _mesa_index_buffer *ib,
            GLboolean index_bounds_valid,
             GLuint min_index,
-            GLuint max_index)
+            GLuint max_index,
+            struct gl_transform_feedback_object *tfb_vertcount)
 {
    struct st_context *st = st_context(ctx);
    struct pipe_context *pipe = st->pipe;
-   struct pipe_index_buffer ibuffer;
+   struct pipe_index_buffer ibuffer = {0};
    struct pipe_draw_info info;
+   const struct gl_client_array **arrays = ctx->Array._DrawArrays;
    unsigned i;
-   GLboolean new_array =
-         st->dirty.st && (st->dirty.mesa & (_NEW_ARRAY | _NEW_PROGRAM)) != 0;
+   GLboolean new_array;
 
    /* Mesa core state should have been validated already */
    assert(ctx->NewState == 0x0);
 
-   if (ib) {
-      /* Gallium probably doesn't want this in some cases. */
-      if (!index_bounds_valid)
-         if (!vbo_all_varyings_in_vbos(arrays))
-            vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index);
-   } else {
-      /* Get min/max index for non-indexed drawing. */
-      min_index = ~0;
-      max_index = 0;
-
-      for (i = 0; i < nr_prims; i++) {
-         min_index = MIN2(min_index, prims[i].start);
-         max_index = MAX2(max_index, prims[i].start + prims[i].count - 1);
-      }
-   }
+   /* Get Mesa driver state. */
+   st->dirty.st |= ctx->NewDriverState;
+   ctx->NewDriverState = 0;
+
+   new_array =
+      (st->dirty.st & (ST_NEW_VERTEX_ARRAYS | ST_NEW_VERTEX_PROGRAM)) ||
+      (st->dirty.mesa & (_NEW_PROGRAM | _NEW_BUFFER_OBJECT)) != 0;
 
    /* Validate state. */
    if (st->dirty.st) {
       GLboolean vertDataEdgeFlags;
 
-      /* sanity check for pointer arithmetic below */
-      assert(sizeof(arrays[0]->Ptr[0]) == 1);
-
       vertDataEdgeFlags = arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj &&
                           arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj->Name;
       if (vertDataEdgeFlags != st->vertdata_edgeflags) {
@@ -634,7 +933,10 @@ st_draw_vbo(struct gl_context *ctx,
       st_validate_state(st);
 
       if (new_array) {
-         st_validate_varrays(ctx, arrays, max_index);
+         if (!st_validate_varrays(ctx, arrays)) {
+            /* probably out of memory, no-op the draw call */
+            return;
+         }
       }
 
 #if 0
@@ -646,20 +948,35 @@ st_draw_vbo(struct gl_context *ctx,
 #endif
    }
 
-   setup_index_buffer(ctx, ib, &ibuffer);
-   pipe->set_index_buffer(pipe, &ibuffer);
-
    util_draw_init_info(&info);
    if (ib) {
+      /* Get index bounds for user buffers. */
+      if (!index_bounds_valid)
+         if (!all_varyings_in_vbos(arrays))
+            vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index,
+                                   nr_prims);
+
+      setup_index_buffer(st, ib, &ibuffer);
+
       info.indexed = TRUE;
       if (min_index != ~0 && max_index != ~0) {
          info.min_index = min_index;
          info.max_index = max_index;
       }
-   }
 
-   info.primitive_restart = st->ctx->Array.PrimitiveRestart;
-   info.restart_index = st->ctx->Array.RestartIndex;
+      /* The VBO module handles restart for the non-indexed GLDrawArrays
+       * so we only set these fields for indexed drawing:
+       */
+      info.primitive_restart = ctx->Array.PrimitiveRestart;
+      info.restart_index = ctx->Array.RestartIndex;
+   }
+   else {
+      /* Transform feedback drawing is always non-indexed. */
+      /* Set info.count_from_stream_output. */
+      if (tfb_vertcount) {
+         st_transform_feedback_draw_init(tfb_vertcount, &info);
+      }
+   }
 
    /* do actual drawing */
    for (i = 0; i < nr_prims; i++) {
@@ -673,15 +990,32 @@ st_draw_vbo(struct gl_context *ctx,
          info.max_index = info.start + info.count - 1;
       }
 
-      if (u_trim_pipe_prim(info.mode, &info.count))
-         pipe->draw_vbo(pipe, &info);
+      if (info.count_from_stream_output) {
+         cso_draw_vbo(st->cso_context, &info);
+      }
+      else if (info.primitive_restart) {
+         if (st->sw_primitive_restart) {
+            /* Handle primitive restart for drivers that doesn't support it */
+            handle_fallback_primitive_restart(st->cso_context, pipe, ib,
+                                              &ibuffer, &info);
+         }
+         else {
+            /* don't trim, restarts might be inside index list */
+            cso_draw_vbo(st->cso_context, &info);
+         }
+      }
+      else if (u_trim_pipe_prim(info.mode, &info.count))
+         cso_draw_vbo(st->cso_context, &info);
    }
 
-   pipe_resource_reference(&ibuffer.buffer, NULL);
+   if (ib && st->indexbuf_uploader && !_mesa_is_bufferobj(ib->obj)) {
+      pipe_resource_reference(&ibuffer.buffer, NULL);
+   }
 }
 
 
-void st_init_draw( struct st_context *st )
+void
+st_init_draw(struct st_context *st)
 {
    struct gl_context *ctx = st->ctx;
 
@@ -701,11 +1035,10 @@ void st_init_draw( struct st_context *st )
 }
 
 
-void st_destroy_draw( struct st_context *st )
+void
+st_destroy_draw(struct st_context *st)
 {
 #if FEATURE_feedback || FEATURE_rastpos
    draw_destroy(st->draw);
 #endif
 }
-
-