gallium: add PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
[mesa.git] / src / mesa / state_tracker / st_atom_array.c
index 87a0a17f1b9db8ae6667529ec298c4e6ca5e6b83..6af1355ee17bd41268208fa8100711f2b1e6443d 100644 (file)
@@ -1,7 +1,7 @@
 
 /**************************************************************************
  *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2007 VMware, Inc.
  * Copyright 2012 Marek Olšák <maraeo@gmail.com>
  * All Rights Reserved.
  *
@@ -32,7 +32,7 @@
  * Gallium vertex state and binds it.
  *
  * Authors:
- *   Keith Whitwell <keith@tungstengraphics.com>
+ *   Keith Whitwell <keithw@vmware.com>
  *   Marek Olšák <maraeo@gmail.com>
  */
 
 
 #include "cso_cache/cso_context.h"
 #include "util/u_math.h"
-
+#include "util/u_upload_mgr.h"
 #include "main/bufferobj.h"
 #include "main/glformats.h"
 
-
-static GLuint double_types[4] = {
-   PIPE_FORMAT_R64_FLOAT,
-   PIPE_FORMAT_R64G64_FLOAT,
-   PIPE_FORMAT_R64G64B64_FLOAT,
-   PIPE_FORMAT_R64G64B64A64_FLOAT
-};
-
-static GLuint float_types[4] = {
-   PIPE_FORMAT_R32_FLOAT,
-   PIPE_FORMAT_R32G32_FLOAT,
-   PIPE_FORMAT_R32G32B32_FLOAT,
-   PIPE_FORMAT_R32G32B32A32_FLOAT
-};
-
-static GLuint half_float_types[4] = {
-   PIPE_FORMAT_R16_FLOAT,
-   PIPE_FORMAT_R16G16_FLOAT,
-   PIPE_FORMAT_R16G16B16_FLOAT,
-   PIPE_FORMAT_R16G16B16A16_FLOAT
-};
-
-static GLuint uint_types_norm[4] = {
-   PIPE_FORMAT_R32_UNORM,
-   PIPE_FORMAT_R32G32_UNORM,
-   PIPE_FORMAT_R32G32B32_UNORM,
-   PIPE_FORMAT_R32G32B32A32_UNORM
-};
-
-static GLuint uint_types_scale[4] = {
-   PIPE_FORMAT_R32_USCALED,
-   PIPE_FORMAT_R32G32_USCALED,
-   PIPE_FORMAT_R32G32B32_USCALED,
-   PIPE_FORMAT_R32G32B32A32_USCALED
-};
-
-static GLuint uint_types_int[4] = {
-   PIPE_FORMAT_R32_UINT,
-   PIPE_FORMAT_R32G32_UINT,
-   PIPE_FORMAT_R32G32B32_UINT,
-   PIPE_FORMAT_R32G32B32A32_UINT
-};
-
-static GLuint int_types_norm[4] = {
-   PIPE_FORMAT_R32_SNORM,
-   PIPE_FORMAT_R32G32_SNORM,
-   PIPE_FORMAT_R32G32B32_SNORM,
-   PIPE_FORMAT_R32G32B32A32_SNORM
-};
-
-static GLuint int_types_scale[4] = {
-   PIPE_FORMAT_R32_SSCALED,
-   PIPE_FORMAT_R32G32_SSCALED,
-   PIPE_FORMAT_R32G32B32_SSCALED,
-   PIPE_FORMAT_R32G32B32A32_SSCALED
-};
-
-static GLuint int_types_int[4] = {
-   PIPE_FORMAT_R32_SINT,
-   PIPE_FORMAT_R32G32_SINT,
-   PIPE_FORMAT_R32G32B32_SINT,
-   PIPE_FORMAT_R32G32B32A32_SINT
-};
-
-static GLuint ushort_types_norm[4] = {
-   PIPE_FORMAT_R16_UNORM,
-   PIPE_FORMAT_R16G16_UNORM,
-   PIPE_FORMAT_R16G16B16_UNORM,
-   PIPE_FORMAT_R16G16B16A16_UNORM
-};
-
-static GLuint ushort_types_scale[4] = {
-   PIPE_FORMAT_R16_USCALED,
-   PIPE_FORMAT_R16G16_USCALED,
-   PIPE_FORMAT_R16G16B16_USCALED,
-   PIPE_FORMAT_R16G16B16A16_USCALED
-};
-
-static GLuint ushort_types_int[4] = {
-   PIPE_FORMAT_R16_UINT,
-   PIPE_FORMAT_R16G16_UINT,
-   PIPE_FORMAT_R16G16B16_UINT,
-   PIPE_FORMAT_R16G16B16A16_UINT
-};
-
-static GLuint short_types_norm[4] = {
-   PIPE_FORMAT_R16_SNORM,
-   PIPE_FORMAT_R16G16_SNORM,
-   PIPE_FORMAT_R16G16B16_SNORM,
-   PIPE_FORMAT_R16G16B16A16_SNORM
-};
-
-static GLuint short_types_scale[4] = {
-   PIPE_FORMAT_R16_SSCALED,
-   PIPE_FORMAT_R16G16_SSCALED,
-   PIPE_FORMAT_R16G16B16_SSCALED,
-   PIPE_FORMAT_R16G16B16A16_SSCALED
-};
-
-static GLuint short_types_int[4] = {
-   PIPE_FORMAT_R16_SINT,
-   PIPE_FORMAT_R16G16_SINT,
-   PIPE_FORMAT_R16G16B16_SINT,
-   PIPE_FORMAT_R16G16B16A16_SINT
-};
-
-static GLuint ubyte_types_norm[4] = {
-   PIPE_FORMAT_R8_UNORM,
-   PIPE_FORMAT_R8G8_UNORM,
-   PIPE_FORMAT_R8G8B8_UNORM,
-   PIPE_FORMAT_R8G8B8A8_UNORM
-};
-
-static GLuint ubyte_types_scale[4] = {
-   PIPE_FORMAT_R8_USCALED,
-   PIPE_FORMAT_R8G8_USCALED,
-   PIPE_FORMAT_R8G8B8_USCALED,
-   PIPE_FORMAT_R8G8B8A8_USCALED
-};
-
-static GLuint ubyte_types_int[4] = {
-   PIPE_FORMAT_R8_UINT,
-   PIPE_FORMAT_R8G8_UINT,
-   PIPE_FORMAT_R8G8B8_UINT,
-   PIPE_FORMAT_R8G8B8A8_UINT
-};
-
-static GLuint byte_types_norm[4] = {
-   PIPE_FORMAT_R8_SNORM,
-   PIPE_FORMAT_R8G8_SNORM,
-   PIPE_FORMAT_R8G8B8_SNORM,
-   PIPE_FORMAT_R8G8B8A8_SNORM
-};
-
-static GLuint byte_types_scale[4] = {
-   PIPE_FORMAT_R8_SSCALED,
-   PIPE_FORMAT_R8G8_SSCALED,
-   PIPE_FORMAT_R8G8B8_SSCALED,
-   PIPE_FORMAT_R8G8B8A8_SSCALED
-};
-
-static GLuint byte_types_int[4] = {
-   PIPE_FORMAT_R8_SINT,
-   PIPE_FORMAT_R8G8_SINT,
-   PIPE_FORMAT_R8G8B8_SINT,
-   PIPE_FORMAT_R8G8B8A8_SINT
-};
-
-static GLuint fixed_types[4] = {
-   PIPE_FORMAT_R32_FIXED,
-   PIPE_FORMAT_R32G32_FIXED,
-   PIPE_FORMAT_R32G32B32_FIXED,
-   PIPE_FORMAT_R32G32B32A32_FIXED
+/* vertex_formats[gltype - GL_BYTE][integer*2 + normalized][size - 1] */
+static const uint16_t vertex_formats[][4][4] = {
+   { /* GL_BYTE */
+      {
+         PIPE_FORMAT_R8_SSCALED,
+         PIPE_FORMAT_R8G8_SSCALED,
+         PIPE_FORMAT_R8G8B8_SSCALED,
+         PIPE_FORMAT_R8G8B8A8_SSCALED
+      },
+      {
+         PIPE_FORMAT_R8_SNORM,
+         PIPE_FORMAT_R8G8_SNORM,
+         PIPE_FORMAT_R8G8B8_SNORM,
+         PIPE_FORMAT_R8G8B8A8_SNORM
+      },
+      {
+         PIPE_FORMAT_R8_SINT,
+         PIPE_FORMAT_R8G8_SINT,
+         PIPE_FORMAT_R8G8B8_SINT,
+         PIPE_FORMAT_R8G8B8A8_SINT
+      },
+   },
+   { /* GL_UNSIGNED_BYTE */
+      {
+         PIPE_FORMAT_R8_USCALED,
+         PIPE_FORMAT_R8G8_USCALED,
+         PIPE_FORMAT_R8G8B8_USCALED,
+         PIPE_FORMAT_R8G8B8A8_USCALED
+      },
+      {
+         PIPE_FORMAT_R8_UNORM,
+         PIPE_FORMAT_R8G8_UNORM,
+         PIPE_FORMAT_R8G8B8_UNORM,
+         PIPE_FORMAT_R8G8B8A8_UNORM
+      },
+      {
+         PIPE_FORMAT_R8_UINT,
+         PIPE_FORMAT_R8G8_UINT,
+         PIPE_FORMAT_R8G8B8_UINT,
+         PIPE_FORMAT_R8G8B8A8_UINT
+      },
+   },
+   { /* GL_SHORT */
+      {
+         PIPE_FORMAT_R16_SSCALED,
+         PIPE_FORMAT_R16G16_SSCALED,
+         PIPE_FORMAT_R16G16B16_SSCALED,
+         PIPE_FORMAT_R16G16B16A16_SSCALED
+      },
+      {
+         PIPE_FORMAT_R16_SNORM,
+         PIPE_FORMAT_R16G16_SNORM,
+         PIPE_FORMAT_R16G16B16_SNORM,
+         PIPE_FORMAT_R16G16B16A16_SNORM
+      },
+      {
+         PIPE_FORMAT_R16_SINT,
+         PIPE_FORMAT_R16G16_SINT,
+         PIPE_FORMAT_R16G16B16_SINT,
+         PIPE_FORMAT_R16G16B16A16_SINT
+      },
+   },
+   { /* GL_UNSIGNED_SHORT */
+      {
+         PIPE_FORMAT_R16_USCALED,
+         PIPE_FORMAT_R16G16_USCALED,
+         PIPE_FORMAT_R16G16B16_USCALED,
+         PIPE_FORMAT_R16G16B16A16_USCALED
+      },
+      {
+         PIPE_FORMAT_R16_UNORM,
+         PIPE_FORMAT_R16G16_UNORM,
+         PIPE_FORMAT_R16G16B16_UNORM,
+         PIPE_FORMAT_R16G16B16A16_UNORM
+      },
+      {
+         PIPE_FORMAT_R16_UINT,
+         PIPE_FORMAT_R16G16_UINT,
+         PIPE_FORMAT_R16G16B16_UINT,
+         PIPE_FORMAT_R16G16B16A16_UINT
+      },
+   },
+   { /* GL_INT */
+      {
+         PIPE_FORMAT_R32_SSCALED,
+         PIPE_FORMAT_R32G32_SSCALED,
+         PIPE_FORMAT_R32G32B32_SSCALED,
+         PIPE_FORMAT_R32G32B32A32_SSCALED
+      },
+      {
+         PIPE_FORMAT_R32_SNORM,
+         PIPE_FORMAT_R32G32_SNORM,
+         PIPE_FORMAT_R32G32B32_SNORM,
+         PIPE_FORMAT_R32G32B32A32_SNORM
+      },
+      {
+         PIPE_FORMAT_R32_SINT,
+         PIPE_FORMAT_R32G32_SINT,
+         PIPE_FORMAT_R32G32B32_SINT,
+         PIPE_FORMAT_R32G32B32A32_SINT
+      },
+   },
+   { /* GL_UNSIGNED_INT */
+      {
+         PIPE_FORMAT_R32_USCALED,
+         PIPE_FORMAT_R32G32_USCALED,
+         PIPE_FORMAT_R32G32B32_USCALED,
+         PIPE_FORMAT_R32G32B32A32_USCALED
+      },
+      {
+         PIPE_FORMAT_R32_UNORM,
+         PIPE_FORMAT_R32G32_UNORM,
+         PIPE_FORMAT_R32G32B32_UNORM,
+         PIPE_FORMAT_R32G32B32A32_UNORM
+      },
+      {
+         PIPE_FORMAT_R32_UINT,
+         PIPE_FORMAT_R32G32_UINT,
+         PIPE_FORMAT_R32G32B32_UINT,
+         PIPE_FORMAT_R32G32B32A32_UINT
+      },
+   },
+   { /* GL_FLOAT */
+      {
+         PIPE_FORMAT_R32_FLOAT,
+         PIPE_FORMAT_R32G32_FLOAT,
+         PIPE_FORMAT_R32G32B32_FLOAT,
+         PIPE_FORMAT_R32G32B32A32_FLOAT
+      },
+      {
+         PIPE_FORMAT_R32_FLOAT,
+         PIPE_FORMAT_R32G32_FLOAT,
+         PIPE_FORMAT_R32G32B32_FLOAT,
+         PIPE_FORMAT_R32G32B32A32_FLOAT
+      },
+   },
+   {{0}}, /* GL_2_BYTES */
+   {{0}}, /* GL_3_BYTES */
+   {{0}}, /* GL_4_BYTES */
+   { /* GL_DOUBLE */
+      {
+         PIPE_FORMAT_R64_FLOAT,
+         PIPE_FORMAT_R64G64_FLOAT,
+         PIPE_FORMAT_R64G64B64_FLOAT,
+         PIPE_FORMAT_R64G64B64A64_FLOAT
+      },
+      {
+         PIPE_FORMAT_R64_FLOAT,
+         PIPE_FORMAT_R64G64_FLOAT,
+         PIPE_FORMAT_R64G64B64_FLOAT,
+         PIPE_FORMAT_R64G64B64A64_FLOAT
+      },
+   },
+   { /* GL_HALF_FLOAT */
+      {
+         PIPE_FORMAT_R16_FLOAT,
+         PIPE_FORMAT_R16G16_FLOAT,
+         PIPE_FORMAT_R16G16B16_FLOAT,
+         PIPE_FORMAT_R16G16B16A16_FLOAT
+      },
+      {
+         PIPE_FORMAT_R16_FLOAT,
+         PIPE_FORMAT_R16G16_FLOAT,
+         PIPE_FORMAT_R16G16B16_FLOAT,
+         PIPE_FORMAT_R16G16B16A16_FLOAT
+      },
+   },
+   { /* GL_FIXED */
+      {
+         PIPE_FORMAT_R32_FIXED,
+         PIPE_FORMAT_R32G32_FIXED,
+         PIPE_FORMAT_R32G32B32_FIXED,
+         PIPE_FORMAT_R32G32B32A32_FIXED
+      },
+      {
+         PIPE_FORMAT_R32_FIXED,
+         PIPE_FORMAT_R32G32_FIXED,
+         PIPE_FORMAT_R32G32B32_FIXED,
+         PIPE_FORMAT_R32G32B32A32_FIXED
+      },
+   },
 };
 
 
@@ -211,95 +239,75 @@ enum pipe_format
 st_pipe_vertex_format(GLenum type, GLuint size, GLenum format,
                       GLboolean normalized, GLboolean integer)
 {
-   assert((type >= GL_BYTE && type <= GL_DOUBLE) ||
-          type == GL_FIXED || type == GL_HALF_FLOAT ||
-          type == GL_INT_2_10_10_10_REV ||
-          type == GL_UNSIGNED_INT_2_10_10_10_REV);
-   assert(size >= 1);
-   assert(size <= 4);
+   unsigned index;
+
+   assert(size >= 1 && size <= 4);
    assert(format == GL_RGBA || format == GL_BGRA);
 
-   if (type == GL_INT_2_10_10_10_REV ||
-       type == GL_UNSIGNED_INT_2_10_10_10_REV) {
-      assert(size == 4);
-      assert(!integer);
+   switch (type) {
+   case GL_HALF_FLOAT_OES:
+      type = GL_HALF_FLOAT;
+      break;
+
+   case GL_INT_2_10_10_10_REV:
+      assert(size == 4 && !integer);
 
       if (format == GL_BGRA) {
-         if (type == GL_INT_2_10_10_10_REV) {
-            if (normalized)
-               return PIPE_FORMAT_B10G10R10A2_SNORM;
-            else
-               return PIPE_FORMAT_B10G10R10A2_SSCALED;
-         } else {
-            if (normalized)
-               return PIPE_FORMAT_B10G10R10A2_UNORM;
-            else
-               return PIPE_FORMAT_B10G10R10A2_USCALED;
-         }
+         if (normalized)
+            return PIPE_FORMAT_B10G10R10A2_SNORM;
+         else
+            return PIPE_FORMAT_B10G10R10A2_SSCALED;
       } else {
-         if (type == GL_INT_2_10_10_10_REV) {
-            if (normalized)
-               return PIPE_FORMAT_R10G10B10A2_SNORM;
-            else
-               return PIPE_FORMAT_R10G10B10A2_SSCALED;
-         } else {
-            if (normalized)
-               return PIPE_FORMAT_R10G10B10A2_UNORM;
-            else
-               return PIPE_FORMAT_R10G10B10A2_USCALED;
-         }
+         if (normalized)
+            return PIPE_FORMAT_R10G10B10A2_SNORM;
+         else
+            return PIPE_FORMAT_R10G10B10A2_SSCALED;
       }
-   }
+      break;
 
-   if (format == GL_BGRA) {
-      /* this is an odd-ball case */
-      assert(type == GL_UNSIGNED_BYTE);
-      assert(normalized);
-      return PIPE_FORMAT_B8G8R8A8_UNORM;
-   }
+   case GL_UNSIGNED_INT_2_10_10_10_REV:
+      assert(size == 4 && !integer);
 
-   if (integer) {
-      switch (type) {
-      case GL_INT: return int_types_int[size-1];
-      case GL_SHORT: return short_types_int[size-1];
-      case GL_BYTE: return byte_types_int[size-1];
-      case GL_UNSIGNED_INT: return uint_types_int[size-1];
-      case GL_UNSIGNED_SHORT: return ushort_types_int[size-1];
-      case GL_UNSIGNED_BYTE: return ubyte_types_int[size-1];
-      default: assert(0); return 0;
-      }
-   }
-   else if (normalized) {
-      switch (type) {
-      case GL_DOUBLE: return double_types[size-1];
-      case GL_FLOAT: return float_types[size-1];
-      case GL_HALF_FLOAT: return half_float_types[size-1];
-      case GL_INT: return int_types_norm[size-1];
-      case GL_SHORT: return short_types_norm[size-1];
-      case GL_BYTE: return byte_types_norm[size-1];
-      case GL_UNSIGNED_INT: return uint_types_norm[size-1];
-      case GL_UNSIGNED_SHORT: return ushort_types_norm[size-1];
-      case GL_UNSIGNED_BYTE: return ubyte_types_norm[size-1];
-      case GL_FIXED: return fixed_types[size-1];
-      default: assert(0); return 0;
+      if (format == GL_BGRA) {
+         if (normalized)
+            return PIPE_FORMAT_B10G10R10A2_UNORM;
+         else
+            return PIPE_FORMAT_B10G10R10A2_USCALED;
+      } else {
+         if (normalized)
+            return PIPE_FORMAT_R10G10B10A2_UNORM;
+         else
+            return PIPE_FORMAT_R10G10B10A2_USCALED;
       }
-   }
-   else {
-      switch (type) {
-      case GL_DOUBLE: return double_types[size-1];
-      case GL_FLOAT: return float_types[size-1];
-      case GL_HALF_FLOAT: return half_float_types[size-1];
-      case GL_INT: return int_types_scale[size-1];
-      case GL_SHORT: return short_types_scale[size-1];
-      case GL_BYTE: return byte_types_scale[size-1];
-      case GL_UNSIGNED_INT: return uint_types_scale[size-1];
-      case GL_UNSIGNED_SHORT: return ushort_types_scale[size-1];
-      case GL_UNSIGNED_BYTE: return ubyte_types_scale[size-1];
-      case GL_FIXED: return fixed_types[size-1];
-      default: assert(0); return 0;
+      break;
+
+   case GL_UNSIGNED_INT_10F_11F_11F_REV:
+      assert(size == 3 && !integer && format == GL_RGBA);
+      return PIPE_FORMAT_R11G11B10_FLOAT;
+
+   case GL_UNSIGNED_BYTE:
+      if (format == GL_BGRA) {
+         /* this is an odd-ball case */
+         assert(normalized);
+         return PIPE_FORMAT_B8G8R8A8_UNORM;
       }
+      break;
    }
-   return PIPE_FORMAT_NONE; /* silence compiler warning */
+
+   index = integer*2 + normalized;
+   assert(index <= 2);
+   assert(type >= GL_BYTE && type <= GL_FIXED);
+   return vertex_formats[type - GL_BYTE][index][size-1];
+}
+
+static const struct gl_vertex_array *
+get_client_array(const struct gl_vertex_array **arrays,
+                 unsigned mesaAttr)
+{
+   /* st_program uses 0xffffffff to denote a double placeholder attribute */
+   if (mesaAttr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
+      return NULL;
+   return arrays[mesaAttr];
 }
 
 /**
@@ -308,8 +316,8 @@ st_pipe_vertex_format(GLenum type, GLuint size, GLenum format,
  */
 static GLboolean
 is_interleaved_arrays(const struct st_vertex_program *vp,
-                      const struct st_vp_variant *vpv,
-                      const struct gl_client_array **arrays)
+                      const struct gl_vertex_array **arrays,
+                      unsigned num_inputs)
 {
    GLuint attr;
    const struct gl_buffer_object *firstBufObj = NULL;
@@ -317,12 +325,22 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
    const GLubyte *firstPtr = NULL;
    GLboolean userSpaceBuffer = GL_FALSE;
 
-   for (attr = 0; attr < vpv->num_inputs; attr++) {
-      const GLuint mesaAttr = vp->index_to_input[attr];
-      const struct gl_client_array *array = arrays[mesaAttr];
-      const struct gl_buffer_object *bufObj = array->BufferObj;
-      const GLsizei stride = array->StrideB; /* in bytes */
+   for (attr = 0; attr < num_inputs; attr++) {
+      const struct gl_vertex_array *array;
+      const struct gl_buffer_object *bufObj;
+      GLsizei stride;
+
+      array = get_client_array(arrays, vp->index_to_input[attr]);
+      if (!array)
+        continue;
+
+      stride = array->StrideB; /* in bytes */
 
+      /* To keep things simple, don't allow interleaved zero-stride attribs. */
+      if (stride == 0)
+         return false;
+
+      bufObj = array->BufferObj;
       if (attr == 0) {
          /* save info about the first array */
          firstStride = stride;
@@ -338,7 +356,7 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
          if (bufObj != firstBufObj)
             return GL_FALSE; /* arrays in different VBOs */
 
-         if (abs(array->Ptr - firstPtr) > firstStride)
+         if (llabs(array->Ptr - firstPtr) > firstStride)
             return GL_FALSE; /* arrays start too far apart */
 
          if ((!_mesa_is_bufferobj(bufObj)) != userSpaceBuffer)
@@ -349,19 +367,98 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
    return GL_TRUE;
 }
 
+static void init_velement(struct pipe_vertex_element *velement,
+                          int src_offset, int format,
+                          int instance_divisor, int vbo_index)
+{
+   velement->src_offset = src_offset;
+   velement->src_format = format;
+   velement->instance_divisor = instance_divisor;
+   velement->vertex_buffer_index = vbo_index;
+   assert(velement->src_format);
+}
+
+static void init_velement_lowered(const struct st_vertex_program *vp,
+                                  struct pipe_vertex_element *velements,
+                                  int src_offset, int format,
+                                  int instance_divisor, int vbo_index,
+                                  int nr_components, GLboolean doubles,
+                                  GLuint *attr_idx)
+{
+   int idx = *attr_idx;
+   if (doubles) {
+      int lower_format;
+
+      if (nr_components < 2)
+         lower_format = PIPE_FORMAT_R32G32_UINT;
+      else
+         lower_format = PIPE_FORMAT_R32G32B32A32_UINT;
+
+      init_velement(&velements[idx], src_offset,
+                    lower_format, instance_divisor, vbo_index);
+      idx++;
+
+      if (idx < vp->num_inputs &&
+          vp->index_to_input[idx] == ST_DOUBLE_ATTRIB_PLACEHOLDER) {
+         if (nr_components >= 3) {
+            if (nr_components == 3)
+               lower_format = PIPE_FORMAT_R32G32_UINT;
+            else
+               lower_format = PIPE_FORMAT_R32G32B32A32_UINT;
+
+            init_velement(&velements[idx], src_offset + 4 * sizeof(float),
+                        lower_format, instance_divisor, vbo_index);
+         } else {
+            /* The values here are undefined. Fill in some conservative
+             * dummy values.
+             */
+            init_velement(&velements[idx], src_offset, PIPE_FORMAT_R32G32_UINT,
+                          instance_divisor, vbo_index);
+         }
+
+         idx++;
+      }
+   } else {
+      init_velement(&velements[idx], src_offset,
+                    format, instance_divisor, vbo_index);
+      idx++;
+   }
+   *attr_idx = idx;
+}
+
+static void
+set_vertex_attribs(struct st_context *st,
+                   struct pipe_vertex_buffer *vbuffers,
+                   unsigned num_vbuffers,
+                   struct pipe_vertex_element *velements,
+                   unsigned num_velements)
+{
+   struct cso_context *cso = st->cso_context;
+
+   cso_set_vertex_buffers(cso, 0, num_vbuffers, vbuffers);
+   if (st->last_num_vbuffers > num_vbuffers) {
+      /* Unbind remaining buffers, if any. */
+      cso_set_vertex_buffers(cso, num_vbuffers,
+                             st->last_num_vbuffers - num_vbuffers, NULL);
+   }
+   st->last_num_vbuffers = num_vbuffers;
+   cso_set_vertex_elements(cso, num_velements, velements);
+}
+
 /**
  * Set up for drawing interleaved arrays that all live in one VBO
  * or all live in user space.
  * \param vbuffer  returns vertex buffer info
  * \param velements  returns vertex element info
  */
-static boolean
-setup_interleaved_attribs(const struct st_vertex_program *vp,
-                          const struct st_vp_variant *vpv,
-                          const struct gl_client_array **arrays,
-                          struct pipe_vertex_buffer *vbuffer,
-                          struct pipe_vertex_element velements[])
+static void
+setup_interleaved_attribs(struct st_context *st,
+                          const struct st_vertex_program *vp,
+                          const struct gl_vertex_array **arrays,
+                          unsigned num_inputs)
 {
+   struct pipe_vertex_buffer vbuffer;
+   struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}};
    GLuint attr;
    const GLubyte *low_addr = NULL;
    GLboolean usingVBO;      /* all arrays in a VBO? */
@@ -371,9 +468,11 @@ setup_interleaved_attribs(const struct st_vertex_program *vp,
    /* Find the lowest address of the arrays we're drawing,
     * Init bufobj and stride.
     */
-   if (vpv->num_inputs) {
-      const GLuint mesaAttr0 = vp->index_to_input[0];
-      const struct gl_client_array *array = arrays[mesaAttr0];
+   if (num_inputs) {
+      const struct gl_vertex_array *array;
+
+      array = get_client_array(arrays, vp->index_to_input[0]);
+      assert(array);
 
       /* Since we're doing interleaved arrays, we know there'll be at most
        * one buffer object and the stride will be the same for all arrays.
@@ -384,8 +483,12 @@ setup_interleaved_attribs(const struct st_vertex_program *vp,
 
       low_addr = arrays[vp->index_to_input[0]]->Ptr;
 
-      for (attr = 1; attr < vpv->num_inputs; attr++) {
-         const GLubyte *start = arrays[vp->index_to_input[attr]]->Ptr;
+      for (attr = 1; attr < num_inputs; attr++) {
+         const GLubyte *start;
+         array = get_client_array(arrays, vp->index_to_input[attr]);
+         if (!array)
+            continue;
+         start = array->Ptr;
          low_addr = MIN2(low_addr, start);
       }
    }
@@ -399,56 +502,66 @@ setup_interleaved_attribs(const struct st_vertex_program *vp,
    /* are the arrays in user space? */
    usingVBO = _mesa_is_bufferobj(bufobj);
 
-   for (attr = 0; attr < vpv->num_inputs; attr++) {
-      const GLuint mesaAttr = vp->index_to_input[attr];
-      const struct gl_client_array *array = arrays[mesaAttr];
-      unsigned src_offset = (unsigned) (array->Ptr - low_addr);
+   for (attr = 0; attr < num_inputs;) {
+      const struct gl_vertex_array *array;
+      unsigned src_offset;
+      unsigned src_format;
+
+      array = get_client_array(arrays, vp->index_to_input[attr]);
+      assert(array);
 
+      src_offset = (unsigned) (array->Ptr - low_addr);
       assert(array->_ElementSize ==
              _mesa_bytes_per_vertex_attrib(array->Size, array->Type));
 
-      velements[attr].src_offset = src_offset;
-      velements[attr].instance_divisor = array->InstanceDivisor;
-      velements[attr].vertex_buffer_index = 0;
-      velements[attr].src_format = st_pipe_vertex_format(array->Type,
-                                                         array->Size,
-                                                         array->Format,
-                                                         array->Normalized,
-                                                         array->Integer);
-      assert(velements[attr].src_format);
+      src_format = st_pipe_vertex_format(array->Type,
+                                         array->Size,
+                                         array->Format,
+                                         array->Normalized,
+                                         array->Integer);
+
+      init_velement_lowered(vp, velements, src_offset, src_format,
+                            array->InstanceDivisor, 0,
+                            array->Size, array->Doubles, &attr);
    }
 
    /*
     * Return the vbuffer info and setup user-space attrib info, if needed.
     */
-   if (vpv->num_inputs == 0) {
+   if (num_inputs == 0) {
       /* just defensive coding here */
-      vbuffer->buffer = NULL;
-      vbuffer->user_buffer = NULL;
-      vbuffer->buffer_offset = 0;
-      vbuffer->stride = 0;
+      vbuffer.buffer.resource = NULL;
+      vbuffer.is_user_buffer = false;
+      vbuffer.buffer_offset = 0;
+      vbuffer.stride = 0;
    }
    else if (usingVBO) {
       /* all interleaved arrays in a VBO */
       struct st_buffer_object *stobj = st_buffer_object(bufobj);
 
       if (!stobj || !stobj->buffer) {
-         return FALSE; /* out-of-memory error probably */
+         st->vertex_array_out_of_memory = true;
+         return; /* out-of-memory error probably */
       }
 
-      vbuffer->buffer = stobj->buffer;
-      vbuffer->user_buffer = NULL;
-      vbuffer->buffer_offset = pointer_to_offset(low_addr);
-      vbuffer->stride = stride;
+      vbuffer.buffer.resource = stobj->buffer;
+      vbuffer.is_user_buffer = false;
+      vbuffer.buffer_offset = pointer_to_offset(low_addr);
+      vbuffer.stride = stride;
    }
    else {
       /* all interleaved arrays in user memory */
-      vbuffer->buffer = NULL;
-      vbuffer->user_buffer = low_addr;
-      vbuffer->buffer_offset = 0;
-      vbuffer->stride = stride;
+      vbuffer.buffer.user = low_addr;
+      vbuffer.is_user_buffer = !!low_addr; /* if NULL, then unbind */
+      vbuffer.buffer_offset = 0;
+      vbuffer.stride = stride;
+
+      if (low_addr)
+         st->draw_needs_minmax_index = true;
    }
-   return TRUE;
+
+   set_vertex_attribs(st, &vbuffer, num_inputs ? 1 : 0,
+                      velements, num_inputs);
 }
 
 /**
@@ -457,88 +570,128 @@ setup_interleaved_attribs(const struct st_vertex_program *vp,
  * \param vbuffer  returns vertex buffer info
  * \param velements  returns vertex element info
  */
-static boolean
+static void
 setup_non_interleaved_attribs(struct st_context *st,
                               const struct st_vertex_program *vp,
-                              const struct st_vp_variant *vpv,
-                              const struct gl_client_array **arrays,
-                              struct pipe_vertex_buffer vbuffer[],
-                              struct pipe_vertex_element velements[])
+                              const struct gl_vertex_array **arrays,
+                              unsigned num_inputs)
 {
    struct gl_context *ctx = st->ctx;
+   struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
+   struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}};
+   unsigned num_vbuffers = 0;
+   unsigned unref_buffers = 0;
    GLuint attr;
 
-   for (attr = 0; attr < vpv->num_inputs; attr++) {
-      const GLuint mesaAttr = vp->index_to_input[attr];
-      const struct gl_client_array *array = arrays[mesaAttr];
-      struct gl_buffer_object *bufobj = array->BufferObj;
-      GLsizei stride = array->StrideB;
+   for (attr = 0; attr < num_inputs;) {
+      const unsigned mesaAttr = vp->index_to_input[attr];
+      const struct gl_vertex_array *array;
+      struct gl_buffer_object *bufobj;
+      GLsizei stride;
+      unsigned src_format;
+      unsigned bufidx;
+
+      array = get_client_array(arrays, mesaAttr);
+      assert(array);
+
+      bufidx = num_vbuffers++;
 
+      stride = array->StrideB;
+      bufobj = array->BufferObj;
       assert(array->_ElementSize ==
              _mesa_bytes_per_vertex_attrib(array->Size, array->Type));
 
       if (_mesa_is_bufferobj(bufobj)) {
          /* Attribute data is in a VBO.
-          * Recall that for VBOs, the gl_client_array->Ptr field is
+          * Recall that for VBOs, the gl_vertex_array->Ptr field is
           * really an offset from the start of the VBO, not a pointer.
           */
          struct st_buffer_object *stobj = st_buffer_object(bufobj);
 
          if (!stobj || !stobj->buffer) {
-            return FALSE; /* out-of-memory error probably */
+            st->vertex_array_out_of_memory = true;
+            return; /* out-of-memory error probably */
          }
 
-         vbuffer[attr].buffer = stobj->buffer;
-         vbuffer[attr].user_buffer = NULL;
-         vbuffer[attr].buffer_offset = pointer_to_offset(array->Ptr);
+         vbuffer[bufidx].buffer.resource = stobj->buffer;
+         vbuffer[bufidx].is_user_buffer = false;
+         vbuffer[bufidx].buffer_offset = pointer_to_offset(array->Ptr);
       }
       else {
-         /* wrap user data */
-         void *ptr;
+         if (stride == 0) {
+            unsigned size = array->_ElementSize;
+            /* This is optimal for GPU cache line usage if the upload size
+             * is <= cache line size.
+             */
+            unsigned alignment = util_next_power_of_two(size);
+            void *ptr = array->Ptr ? (void*)array->Ptr :
+                                     (void*)ctx->Current.Attrib[mesaAttr];
+
+            vbuffer[bufidx].is_user_buffer = false;
+            vbuffer[bufidx].buffer.resource = NULL;
+
+            /* Use const_uploader for zero-stride vertex attributes, because
+             * it may use a better memory placement than stream_uploader.
+             * The reason is that zero-stride attributes can be fetched many
+             * times (thousands of times), so a better placement is going to
+             * perform better.
+             *
+             * Upload the maximum possible size, which is 4x GLdouble = 32.
+             */
+            u_upload_data(st->can_bind_const_buffer_as_vertex ?
+                             st->pipe->const_uploader :
+                             st->pipe->stream_uploader,
+                          0, size, alignment, ptr,
+                          &vbuffer[bufidx].buffer_offset,
+                          &vbuffer[bufidx].buffer.resource);
+            unref_buffers |= 1u << bufidx;
+         } else {
+            assert(array->Ptr);
+            vbuffer[bufidx].buffer.user = array->Ptr;
+            vbuffer[bufidx].is_user_buffer = true;
+            vbuffer[bufidx].buffer_offset = 0;
 
-         if (array->Ptr) {
-            ptr = (void *) array->Ptr;
-         }
-         else {
-            /* no array, use ctx->Current.Attrib[] value */
-            ptr = (void *) ctx->Current.Attrib[mesaAttr];
-            stride = 0;
+            if (!array->InstanceDivisor)
+               st->draw_needs_minmax_index = true;
          }
-
-         assert(ptr);
-
-         vbuffer[attr].buffer = NULL;
-         vbuffer[attr].user_buffer = ptr;
-         vbuffer[attr].buffer_offset = 0;
       }
 
       /* common-case setup */
-      vbuffer[attr].stride = stride; /* in bytes */
-
-      velements[attr].src_offset = 0;
-      velements[attr].instance_divisor = array->InstanceDivisor;
-      velements[attr].vertex_buffer_index = attr;
-      velements[attr].src_format = st_pipe_vertex_format(array->Type,
-                                                         array->Size,
-                                                         array->Format,
-                                                         array->Normalized,
-                                                         array->Integer);
-      assert(velements[attr].src_format);
+      vbuffer[bufidx].stride = stride; /* in bytes */
+
+      src_format = st_pipe_vertex_format(array->Type,
+                                         array->Size,
+                                         array->Format,
+                                         array->Normalized,
+                                         array->Integer);
+
+      init_velement_lowered(vp, velements, 0, src_format,
+                            array->InstanceDivisor, bufidx,
+                            array->Size, array->Doubles, &attr);
+   }
+
+   if (!ctx->Const.AllowMappedBuffersDuringExecution) {
+      u_upload_unmap(st->pipe->stream_uploader);
+   }
+
+   set_vertex_attribs(st, vbuffer, num_vbuffers, velements, num_inputs);
+
+   /* Unreference uploaded zero-stride vertex buffers. */
+   while (unref_buffers) {
+      unsigned i = u_bit_scan(&unref_buffers);
+      pipe_resource_reference(&vbuffer[i].buffer.resource, NULL);
    }
-   return TRUE;
 }
 
-static void update_array(struct st_context *st)
+void st_update_array(struct st_context *st)
 {
    struct gl_context *ctx = st->ctx;
-   const struct gl_client_array **arrays = ctx->Array._DrawArrays;
+   const struct gl_vertex_array **arrays = ctx->Array._DrawArrays;
    const struct st_vertex_program *vp;
-   const struct st_vp_variant *vpv;
-   struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS];
-   struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
-   unsigned num_vbuffers, num_velements;
+   unsigned num_inputs;
 
    st->vertex_array_out_of_memory = FALSE;
+   st->draw_needs_minmax_index = false;
 
    /* No drawing has been done yet, so do nothing. */
    if (!arrays)
@@ -546,51 +699,10 @@ static void update_array(struct st_context *st)
 
    /* vertex program validation must be done before this */
    vp = st->vp;
-   vpv = st->vp_variant;
-
-   memset(velements, 0, sizeof(struct pipe_vertex_element) * vpv->num_inputs);
-
-   /*
-    * Setup the vbuffer[] and velements[] arrays.
-    */
-   if (is_interleaved_arrays(vp, vpv, arrays)) {
-      if (!setup_interleaved_attribs(vp, vpv, arrays, vbuffer, velements)) {
-         st->vertex_array_out_of_memory = TRUE;
-         return;
-      }
-
-      num_vbuffers = 1;
-      num_velements = vpv->num_inputs;
-      if (num_velements == 0)
-         num_vbuffers = 0;
-   }
-   else {
-      if (!setup_non_interleaved_attribs(st, vp, vpv, arrays, vbuffer,
-                                         velements)) {
-         st->vertex_array_out_of_memory = TRUE;
-         return;
-      }
-
-      num_vbuffers = vpv->num_inputs;
-      num_velements = vpv->num_inputs;
-   }
+   num_inputs = st->vp_variant->num_inputs;
 
-   cso_set_vertex_buffers(st->cso_context, 0, num_vbuffers, vbuffer);
-   if (st->last_num_vbuffers > num_vbuffers) {
-      /* Unbind remaining buffers, if any. */
-      cso_set_vertex_buffers(st->cso_context, num_vbuffers,
-                             st->last_num_vbuffers - num_vbuffers, NULL);
-   }
-   st->last_num_vbuffers = num_vbuffers;
-   cso_set_vertex_elements(st->cso_context, num_velements, velements);
+   if (is_interleaved_arrays(vp, arrays, num_inputs))
+      setup_interleaved_attribs(st, vp, arrays, num_inputs);
+   else
+      setup_non_interleaved_attribs(st, vp, arrays, num_inputs);
 }
-
-
-const struct st_tracked_state st_update_array = {
-   "st_update_array",                                  /* name */
-   {                                                   /* dirty */
-      0,                                                /* mesa */
-      ST_NEW_VERTEX_ARRAYS | ST_NEW_VERTEX_PROGRAM,     /* st */
-   },
-   update_array                                                /* update */
-};