ilo: introduce vertex element CSO
authorChia-I Wu <olvaffe@gmail.com>
Fri, 31 May 2013 18:00:55 +0000 (02:00 +0800)
committerChia-I Wu <olvaffe@gmail.com>
Fri, 7 Jun 2013 03:13:15 +0000 (11:13 +0800)
Introduce ilo_ve_cso and initialize it in create_vertex_elements_state().
This commit goes a step further by setting up mappings from HW VB to PIPE VB,
which we failed to do previously.  That allows us to support instanced
rendering.

src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c
src/gallium/drivers/ilo/ilo_gpe.h
src/gallium/drivers/ilo/ilo_gpe_gen6.c
src/gallium/drivers/ilo/ilo_gpe_gen6.h
src/gallium/drivers/ilo/ilo_screen.c
src/gallium/drivers/ilo/ilo_state.c

index a504acd586a3cb6b7987aa35c1625e6d1d4fb389..6c5125128c735a6ddbabcfd24e7b1d4e03fc98b1 100644 (file)
@@ -402,9 +402,9 @@ gen6_pipeline_vf(struct ilo_3d_pipeline *p,
    }
 
    /* 3DSTATE_VERTEX_BUFFERS */
-   if (DIRTY(VERTEX_BUFFERS)) {
+   if (DIRTY(VERTEX_BUFFERS) || DIRTY(VERTEX_ELEMENTS)) {
       p->gen6_3DSTATE_VERTEX_BUFFERS(p->dev,
-            ilo->vb.states, NULL, ilo->vb.enabled_mask, p->cp);
+            ilo->vb.states, ilo->vb.enabled_mask, ilo->ve, p->cp);
    }
 
    /* 3DSTATE_VERTEX_ELEMENTS */
@@ -425,8 +425,7 @@ gen6_pipeline_vf(struct ilo_3d_pipeline *p,
          prepend_generate_ids = (info->has_instanceid || info->has_vertexid);
       }
 
-      p->gen6_3DSTATE_VERTEX_ELEMENTS(p->dev,
-            ve->states, ve->count,
+      p->gen6_3DSTATE_VERTEX_ELEMENTS(p->dev, ve,
             last_velement_edgeflag, prepend_generate_ids, p->cp);
    }
 }
index dc4e80eb97eee67ee211426b4a369a975377a9a3..ae813b1e83903f253a6bb4f35b86561c8b57a047 100644 (file)
@@ -62,9 +62,18 @@ struct ilo_ib_state {
    struct pipe_index_buffer state;
 };
 
+struct ilo_ve_cso {
+   /* VERTEX_ELEMENT_STATE */
+   uint32_t payload[2];
+};
+
 struct ilo_ve_state {
-   struct pipe_vertex_element states[PIPE_MAX_ATTRIBS];
+   struct ilo_ve_cso cso[PIPE_MAX_ATTRIBS];
    unsigned count;
+
+   unsigned instance_divisors[PIPE_MAX_ATTRIBS];
+   unsigned vb_mapping[PIPE_MAX_ATTRIBS];
+   unsigned vb_count;
 };
 
 struct ilo_so_state {
@@ -190,6 +199,12 @@ struct ilo_global_binding {
    unsigned count;
 };
 
+void
+ilo_gpe_init_ve(const struct ilo_dev_info *dev,
+                unsigned num_states,
+                const struct pipe_vertex_element *states,
+                struct ilo_ve_state *ve);
+
 void
 ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev,
                          const struct pipe_viewport_state *state,
index a585819961f3a8bc063497fc3f4c63ce1cec3046..3167bd692ee725b8746b9d092a81d31d496a086a 100644 (file)
@@ -712,12 +712,13 @@ gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev,
 static void
 gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
                                  const struct pipe_vertex_buffer *vbuffers,
-                                 const int *instance_divisors,
-                                 uint32_t vbuffer_mask,
+                                 uint64_t vbuffer_mask,
+                                 const struct ilo_ve_state *ve,
                                  struct ilo_cp *cp)
 {
    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08);
    uint8_t cmd_len;
+   unsigned hw_idx;
 
    ILO_GPE_VALID_GEN(dev, 6, 7);
 
@@ -725,27 +726,34 @@ gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
     * From the Sandy Bridge PRM, volume 2 part 1, page 82:
     *
     *     "From 1 to 33 VBs can be specified..."
-    *
-    * Because of the type of vbuffer_mask, this is always the case.
     */
    assert(vbuffer_mask <= (1UL << 33));
 
    if (!vbuffer_mask)
       return;
 
-   cmd_len = 4 * util_bitcount(vbuffer_mask) + 1;
+   cmd_len = 1;
+
+   for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
+      const unsigned pipe_idx = ve->vb_mapping[hw_idx];
+
+      if (vbuffer_mask & (1 << pipe_idx))
+         cmd_len += 4;
+   }
 
    ilo_cp_begin(cp, cmd_len);
    ilo_cp_write(cp, cmd | (cmd_len - 2));
 
-   while (vbuffer_mask) {
-      const int index = u_bit_scan(&vbuffer_mask);
-      const struct pipe_vertex_buffer *vb = &vbuffers[index];
-      const int instance_divisor =
-         (instance_divisors) ? instance_divisors[index] : 0;
+   for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
+      const unsigned instance_divisor = ve->instance_divisors[hw_idx];
+      const unsigned pipe_idx = ve->vb_mapping[hw_idx];
+      const struct pipe_vertex_buffer *vb = &vbuffers[pipe_idx];
       uint32_t dw;
 
-      dw = index << GEN6_VB0_INDEX_SHIFT;
+      if (!(vbuffer_mask & (1 << pipe_idx)))
+         continue;
+
+      dw = hw_idx << GEN6_VB0_INDEX_SHIFT;
 
       if (instance_divisor)
          dw |= GEN6_VB0_ACCESS_INSTANCEDATA;
@@ -781,17 +789,164 @@ gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
    ilo_cp_end(cp);
 }
 
+static void
+ve_set_cso_edgeflag(const struct ilo_dev_info *dev,
+                    struct ilo_ve_cso *cso)
+{
+   int format;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 94:
+    *
+    *     "- This bit (Edge Flag Enable) must only be ENABLED on the last
+    *        valid VERTEX_ELEMENT structure.
+    *
+    *      - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
+    *        and Component 1-3 Control must be set to VFCOMP_NOSTORE.
+    *
+    *      - The Source Element Format must be set to the UINT format.
+    *
+    *      - [DevSNB]: Edge Flags are not supported for QUADLIST
+    *        primitives.  Software may elect to convert QUADLIST primitives
+    *        to some set of corresponding edge-flag-supported primitive
+    *        types (e.g., POLYGONs) prior to submission to the 3D pipeline."
+    */
+
+   cso->payload[0] |= GEN6_VE0_EDGE_FLAG_ENABLE;
+   cso->payload[1] =
+         BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
+         BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_1_SHIFT |
+         BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT |
+         BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT;
+
+   /*
+    * Edge flags have format BRW_SURFACEFORMAT_R8_UINT when defined via
+    * glEdgeFlagPointer(), and format BRW_SURFACEFORMAT_R32_FLOAT when defined
+    * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
+    *
+    * Since all the hardware cares about is whether the flags are zero or not,
+    * we can treat them as BRW_SURFACEFORMAT_R32_UINT in the latter case.
+    */
+   format = (cso->payload[0] >> BRW_VE0_FORMAT_SHIFT) & 0x1ff;
+   if (format == BRW_SURFACEFORMAT_R32_FLOAT) {
+      STATIC_ASSERT(BRW_SURFACEFORMAT_R32_UINT ==
+            BRW_SURFACEFORMAT_R32_FLOAT - 1);
+
+      cso->payload[0] -= (1 << BRW_VE0_FORMAT_SHIFT);
+   }
+   else {
+      assert(format == BRW_SURFACEFORMAT_R8_UINT);
+   }
+}
+
+static void
+ve_init_cso_with_components(const struct ilo_dev_info *dev,
+                            int comp0, int comp1, int comp2, int comp3,
+                            struct ilo_ve_cso *cso)
+{
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   STATIC_ASSERT(Elements(cso->payload) >= 2);
+   cso->payload[0] = GEN6_VE0_VALID;
+   cso->payload[1] =
+         comp0 << BRW_VE1_COMPONENT_0_SHIFT |
+         comp1 << BRW_VE1_COMPONENT_1_SHIFT |
+         comp2 << BRW_VE1_COMPONENT_2_SHIFT |
+         comp3 << BRW_VE1_COMPONENT_3_SHIFT;
+}
+
+static void
+ve_init_cso(const struct ilo_dev_info *dev,
+            const struct pipe_vertex_element *state,
+            unsigned vb_index,
+            struct ilo_ve_cso *cso)
+{
+   int comp[4] = {
+      BRW_VE1_COMPONENT_STORE_SRC,
+      BRW_VE1_COMPONENT_STORE_SRC,
+      BRW_VE1_COMPONENT_STORE_SRC,
+      BRW_VE1_COMPONENT_STORE_SRC,
+   };
+   int format;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   switch (util_format_get_nr_components(state->src_format)) {
+   case 1: comp[1] = BRW_VE1_COMPONENT_STORE_0;
+   case 2: comp[2] = BRW_VE1_COMPONENT_STORE_0;
+   case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ?
+                     BRW_VE1_COMPONENT_STORE_1_INT :
+                     BRW_VE1_COMPONENT_STORE_1_FLT;
+   }
+
+   format = ilo_translate_vertex_format(state->src_format);
+
+   STATIC_ASSERT(Elements(cso->payload) >= 2);
+   cso->payload[0] =
+      vb_index << GEN6_VE0_INDEX_SHIFT |
+      GEN6_VE0_VALID |
+      format << BRW_VE0_FORMAT_SHIFT |
+      state->src_offset << BRW_VE0_SRC_OFFSET_SHIFT;
+
+   cso->payload[1] =
+         comp[0] << BRW_VE1_COMPONENT_0_SHIFT |
+         comp[1] << BRW_VE1_COMPONENT_1_SHIFT |
+         comp[2] << BRW_VE1_COMPONENT_2_SHIFT |
+         comp[3] << BRW_VE1_COMPONENT_3_SHIFT;
+}
+
+void
+ilo_gpe_init_ve(const struct ilo_dev_info *dev,
+                unsigned num_states,
+                const struct pipe_vertex_element *states,
+                struct ilo_ve_state *ve)
+{
+   unsigned i;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   ve->count = num_states;
+   ve->vb_count = 0;
+
+   for (i = 0; i < num_states; i++) {
+      const unsigned pipe_idx = states[i].vertex_buffer_index;
+      const unsigned instance_divisor = states[i].instance_divisor;
+      unsigned hw_idx;
+
+      /*
+       * map the pipe vb to the hardware vb, which has a fixed instance
+       * divisor
+       */
+      for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
+         if (ve->vb_mapping[hw_idx] == pipe_idx &&
+             ve->instance_divisors[hw_idx] == instance_divisor)
+            break;
+      }
+
+      /* create one if there is no matching hardware vb */
+      if (hw_idx >= ve->vb_count) {
+         hw_idx = ve->vb_count++;
+
+         ve->vb_mapping[hw_idx] = pipe_idx;
+         ve->instance_divisors[hw_idx] = instance_divisor;
+      }
+
+      ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]);
+   }
+}
+
 static void
 gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
-                                  const struct pipe_vertex_element *velements,
-                                  int num_velements,
+                                  const struct ilo_ve_state *ve,
                                   bool last_velement_edgeflag,
                                   bool prepend_generated_ids,
                                   struct ilo_cp *cp)
 {
    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09);
    uint8_t cmd_len;
-   int format, i;
+   unsigned i;
 
    ILO_GPE_VALID_GEN(dev, 6, 7);
 
@@ -800,118 +955,58 @@ gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
     *
     *     "Up to 34 (DevSNB+) vertex elements are supported."
     */
-   assert(num_velements + prepend_generated_ids <= 34);
+   assert(ve->count + prepend_generated_ids <= 34);
 
-   if (!num_velements && !prepend_generated_ids) {
-      cmd_len = 3;
-      format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+   if (!ve->count && !prepend_generated_ids) {
+      struct ilo_ve_cso dummy;
+
+      ve_init_cso_with_components(dev,
+            BRW_VE1_COMPONENT_STORE_0,
+            BRW_VE1_COMPONENT_STORE_0,
+            BRW_VE1_COMPONENT_STORE_0,
+            BRW_VE1_COMPONENT_STORE_1_FLT,
+            &dummy);
 
+      cmd_len = 3;
       ilo_cp_begin(cp, cmd_len);
       ilo_cp_write(cp, cmd | (cmd_len - 2));
-      ilo_cp_write(cp,
-            0 << GEN6_VE0_INDEX_SHIFT |
-            GEN6_VE0_VALID |
-            format << BRW_VE0_FORMAT_SHIFT |
-            0 << BRW_VE0_SRC_OFFSET_SHIFT);
-      ilo_cp_write(cp,
-            BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT |
-            BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT |
-            BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT |
-            BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT);
+      ilo_cp_write_multi(cp, dummy.payload, 2);
       ilo_cp_end(cp);
 
       return;
    }
 
-   cmd_len = 2 * (num_velements + prepend_generated_ids) + 1;
+   cmd_len = 2 * (ve->count + prepend_generated_ids) + 1;
 
    ilo_cp_begin(cp, cmd_len);
    ilo_cp_write(cp, cmd | (cmd_len - 2));
 
    if (prepend_generated_ids) {
-      ilo_cp_write(cp, GEN6_VE0_VALID);
-      ilo_cp_write(cp,
-            BRW_VE1_COMPONENT_STORE_VID << BRW_VE1_COMPONENT_0_SHIFT |
-            BRW_VE1_COMPONENT_STORE_IID << BRW_VE1_COMPONENT_1_SHIFT |
-            BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT |
-            BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT);
-   }
-
-   for (i = 0; i < num_velements; i++) {
-      const struct pipe_vertex_element *ve = &velements[i];
-      int comp[4] = {
-         BRW_VE1_COMPONENT_STORE_SRC,
-         BRW_VE1_COMPONENT_STORE_SRC,
-         BRW_VE1_COMPONENT_STORE_SRC,
-         BRW_VE1_COMPONENT_STORE_SRC,
-      };
-      int edgeflag_enable;
-
-      if (last_velement_edgeflag && i == num_velements - 1) {
-         /*
-          * From the Sandy Bridge PRM, volume 2 part 1, page 94:
-          *
-          *     "* This bit (Edge Flag Enable) must only be ENABLED on the
-          *        last valid VERTEX_ELEMENT structure.
-          *
-          *      * When set, Component 0 Control must be set to
-          *        VFCOMP_STORE_SRC, and Component 1-3 Control must be set to
-          *        VFCOMP_NOSTORE.
-          *
-          *      * The Source Element Format must be set to the UINT format.
-          *
-          *      * [DevSNB]: Edge Flags are not supported for QUADLIST
-          *        primitives.  Software may elect to convert QUADLIST
-          *        primitives to some set of corresponding edge-flag-supported
-          *        primitive types (e.g., POLYGONs) prior to submission to the
-          *        3D pipeline."
-          *
-          * Only a limitied set of primitive types could have Edge Flag Enable
-          * set.  The caller should not set last_velement_edgeflag for such
-          * primitive types.
-          */
-         comp[1] = BRW_VE1_COMPONENT_NOSTORE;
-         comp[2] = BRW_VE1_COMPONENT_NOSTORE;
-         comp[3] = BRW_VE1_COMPONENT_NOSTORE;
+      struct ilo_ve_cso gen_ids;
 
-         switch (ve->src_format) {
-         case PIPE_FORMAT_R32_FLOAT:
-            format = ilo_translate_vertex_format(PIPE_FORMAT_R32_UINT);
-            break;
-         default:
-            assert(ve->src_format == PIPE_FORMAT_R8_UINT);
-            format = ilo_translate_vertex_format(ve->src_format);
-            break;
-         }
+      ve_init_cso_with_components(dev,
+            BRW_VE1_COMPONENT_STORE_VID,
+            BRW_VE1_COMPONENT_STORE_IID,
+            BRW_VE1_COMPONENT_NOSTORE,
+            BRW_VE1_COMPONENT_NOSTORE,
+            &gen_ids);
 
-         edgeflag_enable = GEN6_VE0_EDGE_FLAG_ENABLE;
-      }
-      else {
-         switch (util_format_get_nr_components(ve->src_format)) {
-         case 1: comp[1] = BRW_VE1_COMPONENT_STORE_0;
-         case 2: comp[2] = BRW_VE1_COMPONENT_STORE_0;
-         case 3: comp[3] = (util_format_is_pure_integer(ve->src_format)) ?
-                           BRW_VE1_COMPONENT_STORE_1_INT :
-                           BRW_VE1_COMPONENT_STORE_1_FLT;
-         }
+      ilo_cp_write_multi(cp, gen_ids.payload, 2);
+   }
 
-         format = ilo_translate_vertex_format(ve->src_format);
+   if (last_velement_edgeflag) {
+      struct ilo_ve_cso edgeflag;
 
-         edgeflag_enable = 0;
-      }
+      for (i = 0; i < ve->count - 1; i++)
+         ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
 
-      ilo_cp_write(cp,
-            ve->vertex_buffer_index << GEN6_VE0_INDEX_SHIFT |
-            GEN6_VE0_VALID |
-            format << BRW_VE0_FORMAT_SHIFT |
-            edgeflag_enable |
-            ve->src_offset << BRW_VE0_SRC_OFFSET_SHIFT);
-
-      ilo_cp_write(cp,
-            comp[0] << BRW_VE1_COMPONENT_0_SHIFT |
-            comp[1] << BRW_VE1_COMPONENT_1_SHIFT |
-            comp[2] << BRW_VE1_COMPONENT_2_SHIFT |
-            comp[3] << BRW_VE1_COMPONENT_3_SHIFT);
+      edgeflag = ve->cso[i];
+      ve_set_cso_edgeflag(dev, &edgeflag);
+      ilo_cp_write_multi(cp, edgeflag.payload, 2);
+   }
+   else {
+      for (i = 0; i < ve->count; i++)
+         ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
    }
 
    ilo_cp_end(cp);
index c7cd7b3a53ec15959de3ef9b77d325d5abd02fec..5c94e7def262b62e660aacc920b1a6fc53000019 100644 (file)
@@ -195,14 +195,13 @@ typedef void
 typedef void
 (*ilo_gpe_gen6_3DSTATE_VERTEX_BUFFERS)(const struct ilo_dev_info *dev,
                                        const struct pipe_vertex_buffer *vbuffers,
-                                       const int *instance_divisors,
-                                       uint32_t vbuffer_mask,
+                                       uint64_t vbuffer_mask,
+                                       const struct ilo_ve_state *ve,
                                        struct ilo_cp *cp);
 
 typedef void
 (*ilo_gpe_gen6_3DSTATE_VERTEX_ELEMENTS)(const struct ilo_dev_info *dev,
-                                        const struct pipe_vertex_element *velements,
-                                        int num_elements,
+                                        const struct ilo_ve_state *ve,
                                         bool last_velement_edgeflag,
                                         bool prepend_generated_ids,
                                         struct ilo_cp *cp);
index c4a5e9444b69c3d8d171abd08f30c3d023177e6d..9e95bb947c3b2da8bc4ebe4cce7018b5a7891085 100644 (file)
@@ -347,7 +347,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
       return false;
    case PIPE_CAP_TGSI_INSTANCEID:
    case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
-      return false; /* TODO */
+      return true;
    case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
       return false;
    case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
index c032e177673e0052cfd7eb9564c254fc1d4382a5..502297e559186d57c061c9abf29297f8585ca1ad 100644 (file)
@@ -438,13 +438,13 @@ ilo_create_vertex_elements_state(struct pipe_context *pipe,
                                  unsigned num_elements,
                                  const struct pipe_vertex_element *elements)
 {
+   struct ilo_context *ilo = ilo_context(pipe);
    struct ilo_ve_state *ve;
 
    ve = MALLOC_STRUCT(ilo_ve_state);
    assert(ve);
 
-   memcpy(ve->states, elements, sizeof(*elements) * num_elements);
-   ve->count = num_elements;
+   ilo_gpe_init_ve(ilo->dev, num_elements, elements, ve);
 
    return ve;
 }