ilo: embed ilo_state_urb in ilo_state_vector
authorChia-I Wu <olvaffe@gmail.com>
Thu, 28 May 2015 05:43:56 +0000 (13:43 +0800)
committerChia-I Wu <olvaffe@gmail.com>
Sun, 14 Jun 2015 17:07:09 +0000 (01:07 +0800)
src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
src/gallium/drivers/ilo/ilo_blitter.h
src/gallium/drivers/ilo/ilo_blitter_rectlist.c
src/gallium/drivers/ilo/ilo_render.c
src/gallium/drivers/ilo/ilo_render_gen.h
src/gallium/drivers/ilo/ilo_render_gen6.c
src/gallium/drivers/ilo/ilo_render_gen7.c
src/gallium/drivers/ilo/ilo_state.c
src/gallium/drivers/ilo/ilo_state.h

index 3a2522186be9e3e22e0e34a1952aff41a3ffd35c..2a475cb15ca4075e31643d2bcd45f0ef7b7a339e 100644 (file)
 #include "ilo_state_3d.h"
 #include "ilo_state_sampler.h"
 #include "ilo_state_sol.h"
+#include "ilo_state_urb.h"
 #include "ilo_builder.h"
 
 static inline void
 gen6_3DSTATE_URB(struct ilo_builder *builder,
-                 int vs_total_size, int gs_total_size,
-                 int vs_entry_size, int gs_entry_size)
+                 const struct ilo_state_urb *urb)
 {
    const uint8_t cmd_len = 3;
-   const int row_size = 128; /* 1024 bits */
-   int vs_alloc_size, gs_alloc_size;
-   int vs_num_entries, gs_num_entries;
    uint32_t *dw;
 
-   ILO_DEV_ASSERT(builder->dev, 6, 6);
-
-   /* in 1024-bit URB rows */
-   vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
-   gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
-
-   /* the valid range is [1, 5] */
-   if (!vs_alloc_size)
-      vs_alloc_size = 1;
-   if (!gs_alloc_size)
-      gs_alloc_size = 1;
-   assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
-
-   /* the valid range is [24, 256] in multiples of 4 */
-   vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
-   if (vs_num_entries > 256)
-      vs_num_entries = 256;
-   assert(vs_num_entries >= 24);
-
-   /* the valid range is [0, 256] in multiples of 4 */
-   gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
-   if (gs_num_entries > 256)
-      gs_num_entries = 256;
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_URB) | (cmd_len - 2);
-   dw[1] = (vs_alloc_size - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT |
-           vs_num_entries << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT;
-   dw[2] = gs_num_entries << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT |
-           (gs_alloc_size - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT;
+   /* see urb_set_gen6_3DSTATE_URB() */
+   dw[1] = urb->urb[0];
+   dw[2] = urb->urb[1];
 }
 
 static inline void
-gen7_3dstate_push_constant_alloc(struct ilo_builder *builder,
-                                 int subop, int offset, int size)
+gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(struct ilo_builder *builder,
+                                    const struct ilo_state_urb *urb)
 {
-   const uint32_t cmd = GEN6_RENDER_TYPE_RENDER |
-                        GEN6_RENDER_SUBTYPE_3D |
-                        subop;
    const uint8_t cmd_len = 2;
-   const int slice_count = ((ilo_dev_gen(builder->dev) == ILO_GEN(7.5) &&
-                             builder->dev->gt == 3) ||
-                            ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 2 : 1;
    uint32_t *dw;
-   int end;
-
-   ILO_DEV_ASSERT(builder->dev, 7, 8);
-
-   /* VS, HS, DS, GS, and PS variants */
-   assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS &&
-          subop <= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS);
-
-   /*
-    * From the Ivy Bridge PRM, volume 2 part 1, page 68:
-    *
-    *     "(A table that says the maximum size of each constant buffer is
-    *      16KB")
-    *
-    * From the Ivy Bridge PRM, volume 2 part 1, page 115:
-    *
-    *     "The sum of the Constant Buffer Offset and the Constant Buffer Size
-    *      may not exceed the maximum value of the Constant Buffer Size."
-    *
-    * Thus, the valid range of buffer end is [0KB, 16KB].
-    */
-   end = (offset + size) / 1024;
-   if (end > 16 * slice_count) {
-      assert(!"invalid constant buffer end");
-      end = 16 * slice_count;
-   }
-
-   /* the valid range of buffer offset is [0KB, 15KB] */
-   offset = (offset + 1023) / 1024;
-   if (offset > 15 * slice_count) {
-      assert(!"invalid constant buffer offset");
-      offset = 15 * slice_count;
-   }
-
-   if (offset > end) {
-      assert(!size);
-      offset = end;
-   }
-
-   /* the valid range of buffer size is [0KB, 15KB] */
-   size = end - offset;
-   if (size > 15 * slice_count) {
-      assert(!"invalid constant buffer size");
-      size = 15 * slice_count;
-   }
-
-   assert(offset % slice_count == 0 && size % slice_count == 0);
 
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
-   dw[0] = cmd | (cmd_len - 2);
-   dw[1] = offset << GEN7_PCB_ALLOC_DW1_OFFSET__SHIFT |
-           size;
-}
-
-static inline void
-gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(struct ilo_builder *builder,
-                                    int offset, int size)
-{
-   gen7_3dstate_push_constant_alloc(builder,
-         GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS, offset, size);
+   dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_VS) |
+           (cmd_len - 2);
+   /* see urb_set_gen7_3dstate_push_constant_alloc() */
+   dw[1] = urb->pcb[0];
 }
 
 static inline void
 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_HS(struct ilo_builder *builder,
-                                    int offset, int size)
+                                    const struct ilo_state_urb *urb)
 {
-   gen7_3dstate_push_constant_alloc(builder,
-         GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_HS, offset, size);
+   const uint8_t cmd_len = 2;
+   uint32_t *dw;
+
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+   dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_HS) |
+           (cmd_len - 2);
+   /* see urb_set_gen7_3dstate_push_constant_alloc() */
+   dw[1] = urb->pcb[1];
 }
 
 static inline void
 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_DS(struct ilo_builder *builder,
-                                    int offset, int size)
+                                    const struct ilo_state_urb *urb)
 {
-   gen7_3dstate_push_constant_alloc(builder,
-         GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_DS, offset, size);
+   const uint8_t cmd_len = 2;
+   uint32_t *dw;
+
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+   dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_DS) |
+           (cmd_len - 2);
+   /* see urb_set_gen7_3dstate_push_constant_alloc() */
+   dw[1] = urb->pcb[2];
 }
 
 static inline void
 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS(struct ilo_builder *builder,
-                                    int offset, int size)
+                                    const struct ilo_state_urb *urb)
 {
-   gen7_3dstate_push_constant_alloc(builder,
-         GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_GS, offset, size);
-}
+   const uint8_t cmd_len = 2;
+   uint32_t *dw;
 
-static inline void
-gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(struct ilo_builder *builder,
-                                    int offset, int size)
-{
-   gen7_3dstate_push_constant_alloc(builder,
-         GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS, offset, size);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+   dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_GS) |
+           (cmd_len - 2);
+   /* see urb_set_gen7_3dstate_push_constant_alloc() */
+   dw[1] = urb->pcb[3];
 }
 
 static inline void
-gen7_3dstate_urb(struct ilo_builder *builder,
-                 int subop, int offset, int size,
-                 int entry_size)
+gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(struct ilo_builder *builder,
+                                    const struct ilo_state_urb *urb)
 {
-   const uint32_t cmd = GEN6_RENDER_TYPE_RENDER |
-                        GEN6_RENDER_SUBTYPE_3D |
-                        subop;
    const uint8_t cmd_len = 2;
-   const int row_size = 64; /* 512 bits */
-   int alloc_size, num_entries, min_entries, max_entries;
    uint32_t *dw;
 
-   ILO_DEV_ASSERT(builder->dev, 7, 8);
-
-   /* VS, HS, DS, and GS variants */
-   assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_URB_VS &&
-          subop <= GEN7_RENDER_OPCODE_3DSTATE_URB_GS);
-
-   /* in multiples of 8KB */
-   assert(offset % 8192 == 0);
-   offset /= 8192;
-
-   /* in multiple of 512-bit rows */
-   alloc_size = (entry_size + row_size - 1) / row_size;
-   if (!alloc_size)
-      alloc_size = 1;
-
-   /*
-    * From the Ivy Bridge PRM, volume 2 part 1, page 34:
-    *
-    *     "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
-    *      cause performance to decrease due to banking in the URB. Element
-    *      sizes of 16 to 20 should be programmed with six 512-bit URB rows."
-    */
-   if (subop == GEN7_RENDER_OPCODE_3DSTATE_URB_VS && alloc_size == 5)
-      alloc_size = 6;
-
-   /* in multiples of 8 */
-   num_entries = (size / row_size / alloc_size) & ~7;
-
-   switch (subop) {
-   case GEN7_RENDER_OPCODE_3DSTATE_URB_VS:
-      switch (ilo_dev_gen(builder->dev)) {
-      case ILO_GEN(8):
-         max_entries = 2560;
-         min_entries = 64;
-         break;
-      case ILO_GEN(7.5):
-         max_entries = (builder->dev->gt >= 2) ? 1664 : 640;
-         min_entries = (builder->dev->gt >= 2) ? 64 : 32;
-         break;
-      case ILO_GEN(7):
-      default:
-         max_entries = (builder->dev->gt == 2) ? 704 : 512;
-         min_entries = 32;
-         break;
-      }
-
-      assert(num_entries >= min_entries);
-      if (num_entries > max_entries)
-         num_entries = max_entries;
-      break;
-   case GEN7_RENDER_OPCODE_3DSTATE_URB_HS:
-      max_entries = (builder->dev->gt == 2) ? 64 : 32;
-      if (num_entries > max_entries)
-         num_entries = max_entries;
-      break;
-   case GEN7_RENDER_OPCODE_3DSTATE_URB_DS:
-      if (num_entries)
-         assert(num_entries >= 138);
-      break;
-   case GEN7_RENDER_OPCODE_3DSTATE_URB_GS:
-      switch (ilo_dev_gen(builder->dev)) {
-      case ILO_GEN(8):
-         max_entries = 960;
-         break;
-      case ILO_GEN(7.5):
-         max_entries = (builder->dev->gt >= 2) ? 640 : 256;
-         break;
-      case ILO_GEN(7):
-      default:
-         max_entries = (builder->dev->gt == 2) ? 320 : 192;
-         break;
-      }
-
-      if (num_entries > max_entries)
-         num_entries = max_entries;
-      break;
-   default:
-      break;
-   }
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
-   dw[0] = cmd | (cmd_len - 2);
-   dw[1] = offset << GEN7_URB_DW1_OFFSET__SHIFT |
-           (alloc_size - 1) << GEN7_URB_DW1_ENTRY_SIZE__SHIFT |
-           num_entries;
+   dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_PS) |
+           (cmd_len - 2);
+   /* see urb_set_gen7_3dstate_push_constant_alloc() */
+   dw[1] = urb->pcb[4];
 }
 
 static inline void
 gen7_3DSTATE_URB_VS(struct ilo_builder *builder,
-                    int offset, int size, int entry_size)
+                    const struct ilo_state_urb *urb)
 {
-   gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_VS,
-         offset, size, entry_size);
+   const uint8_t cmd_len = 2;
+   uint32_t *dw;
+
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+   dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_VS) | (cmd_len - 2);
+   /* see urb_set_gen7_3dstate_push_constant_alloc() */
+   dw[1] = urb->urb[0];
 }
 
 static inline void
 gen7_3DSTATE_URB_HS(struct ilo_builder *builder,
-                    int offset, int size, int entry_size)
+                    const struct ilo_state_urb *urb)
 {
-   gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_HS,
-         offset, size, entry_size);
+   const uint8_t cmd_len = 2;
+   uint32_t *dw;
+
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+   dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_HS) | (cmd_len - 2);
+   /* see urb_set_gen7_3dstate_push_constant_alloc() */
+   dw[1] = urb->urb[1];
 }
 
 static inline void
 gen7_3DSTATE_URB_DS(struct ilo_builder *builder,
-                    int offset, int size, int entry_size)
+                    const struct ilo_state_urb *urb)
 {
-   gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_DS,
-         offset, size, entry_size);
+   const uint8_t cmd_len = 2;
+   uint32_t *dw;
+
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+   dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_DS) | (cmd_len - 2);
+   /* see urb_set_gen7_3dstate_push_constant_alloc() */
+   dw[1] = urb->urb[2];
 }
 
 static inline void
 gen7_3DSTATE_URB_GS(struct ilo_builder *builder,
-                    int offset, int size, int entry_size)
+                    const struct ilo_state_urb *urb)
 {
-   gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_GS,
-         offset, size, entry_size);
+   const uint8_t cmd_len = 2;
+   uint32_t *dw;
+
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+   dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_GS) | (cmd_len - 2);
+   /* see urb_set_gen7_3dstate_push_constant_alloc() */
+   dw[1] = urb->urb[3];
 }
 
 static inline void
index 3d02063f8092c1037f85679a528ff29591a64e9e..c257c6048e129fec4edeb1a1d94a08b76ebbe9c9 100644 (file)
@@ -70,6 +70,8 @@ struct ilo_blitter {
 
    uint32_t depth_clear_value;
 
+   struct ilo_state_urb urb;
+
    struct {
       struct ilo_surface_cso dst;
       unsigned width, height;
index b2b839cbb41405b770d5125374a469d071114b02..b106e79958a962066020fca5e19486f1fb0af45d 100644 (file)
@@ -78,6 +78,9 @@ ilo_blitter_set_invariants(struct ilo_blitter *blitter)
    ilo_state_viewport_init_for_rectlist(&blitter->vp, blitter->ilo->dev,
          blitter->vp_data, sizeof(blitter->vp_data));
 
+   ilo_state_urb_init_for_rectlist(&blitter->urb, blitter->ilo->dev,
+         blitter->ve.count + blitter->ve.prepend_nosrc_cso);
+
    blitter->initialized = true;
 
    return true;
index 6935138f8d9f513c35c4186423703b3b1c79cfeb..0fd19e3fdff433d4ba22af5790cc35e514c3a885 100644 (file)
@@ -448,6 +448,8 @@ draw_session_prepare(struct ilo_render *render,
       session->prim_changed = true;
       session->primitive_restart_changed = true;
 
+      ilo_state_urb_full_delta(&vec->urb, render->dev, &session->urb_delta);
+
       ilo_state_raster_full_delta(&vec->rasterizer->rs, render->dev,
             &session->rs_delta);
 
@@ -462,6 +464,9 @@ draw_session_prepare(struct ilo_render *render,
       session->primitive_restart_changed =
          (render->state.primitive_restart != vec->draw->primitive_restart);
 
+      ilo_state_urb_get_delta(&vec->urb, render->dev,
+            &render->state.urb, &session->urb_delta);
+
       if (vec->dirty & ILO_DIRTY_RASTERIZER) {
          ilo_state_raster_get_delta(&vec->rasterizer->rs, render->dev,
                &render->state.rs, &session->rs_delta);
@@ -493,6 +498,7 @@ draw_session_end(struct ilo_render *render,
    render->state.reduced_prim = session->reduced_prim;
    render->state.primitive_restart = vec->draw->primitive_restart;
 
+   render->state.urb = vec->urb;
    render->state.rs = vec->rasterizer->rs;
    render->state.cc = vec->blend->cc;
 }
index cc6f77d97505285a22b8baa50b13eb20d4fe4998..74c138017672eeaa7da1d40dac2fff39ab213ede 100644 (file)
@@ -90,6 +90,7 @@ struct ilo_render {
       int reduced_prim;
       int so_max_vertices;
 
+      struct ilo_state_urb urb;
       struct ilo_state_raster rs;
       struct ilo_state_cc cc;
 
@@ -148,6 +149,7 @@ struct ilo_render_draw_session {
    bool prim_changed;
    bool primitive_restart_changed;
 
+   struct ilo_state_urb_delta urb_delta;
    struct ilo_state_raster_delta rs_delta;
    struct ilo_state_viewport_delta vp_delta;
    struct ilo_state_cc_delta cc_delta;
index ff0bf2fb820bbd80b50f5ca95d391fb4629f15dc..9d199955be3e20ddb4b3258f2587fdc8ae43f07a 100644 (file)
@@ -329,64 +329,19 @@ gen6_draw_common_urb(struct ilo_render *r,
                      const struct ilo_state_vector *vec,
                      struct ilo_render_draw_session *session)
 {
-   /* 3DSTATE_URB */
-   if (DIRTY(VE) || DIRTY(VS) || DIRTY(GS)) {
-      const bool gs_active = (vec->gs || (vec->vs &&
-               ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)));
-      int vs_entry_size, gs_entry_size;
-      int vs_total_size, gs_total_size;
-
-      vs_entry_size = (vec->vs) ?
-         ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_OUTPUT_COUNT) : 0;
-
-      /*
-       * As indicated by 2e712e41db0c0676e9f30fc73172c0e8de8d84d4, VF and VS
-       * share VUE handles.  The VUE allocation size must be large enough to
-       * store either VF outputs (number of VERTEX_ELEMENTs) and VS outputs.
-       *
-       * I am not sure if the PRM explicitly states that VF and VS share VUE
-       * handles.  But here is a citation that implies so:
-       *
-       * From the Sandy Bridge PRM, volume 2 part 1, page 44:
-       *
-       *     "Once a FF stage that spawn threads has sufficient input to
-       *      initiate a thread, it must guarantee that it is safe to request
-       *      the thread initiation. For all these FF stages, this check is
-       *      based on :
-       *
-       *      - The availability of output URB entries:
-       *        - VS: As the input URB entries are overwritten with the
-       *          VS-generated output data, output URB availability isn't a
-       *          factor."
-       */
-      if (vs_entry_size < vec->ve->count + vec->ve->prepend_nosrc_cso)
-         vs_entry_size = vec->ve->count + vec->ve->prepend_nosrc_cso;
+   const bool gs_active = (vec->gs || (vec->vs &&
+            ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)));
 
-      gs_entry_size = (vec->gs) ?
-         ilo_shader_get_kernel_param(vec->gs, ILO_KERNEL_OUTPUT_COUNT) :
-         (gs_active) ? vs_entry_size : 0;
-
-      /* in bytes */
-      vs_entry_size *= sizeof(float) * 4;
-      gs_entry_size *= sizeof(float) * 4;
-      vs_total_size = r->dev->urb_size;
-
-      if (gs_active) {
-         vs_total_size /= 2;
-         gs_total_size = vs_total_size;
-      }
-      else {
-         gs_total_size = 0;
-      }
-
-      gen6_3DSTATE_URB(r->builder, vs_total_size, gs_total_size,
-            vs_entry_size, gs_entry_size);
+   /* 3DSTATE_URB */
+   if (session->urb_delta.dirty & (ILO_STATE_URB_3DSTATE_URB_VS |
+                                   ILO_STATE_URB_3DSTATE_URB_GS)) {
+      gen6_3DSTATE_URB(r->builder, &vec->urb);
 
       if (r->state.gs.active && !gs_active)
          gen6_wa_post_3dstate_urb_no_gs(r);
-
-      r->state.gs.active = gs_active;
    }
+
+   r->state.gs.active = gs_active;
 }
 
 static void
@@ -920,9 +875,7 @@ ilo_render_emit_rectlist_commands_gen6(struct ilo_render *r,
 
    gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->ve);
 
-   gen6_3DSTATE_URB(r->builder, r->dev->urb_size, 0,
-         (blitter->ve.count + blitter->ve.prepend_nosrc_cso) * 4 * sizeof(float),
-         0);
+   gen6_3DSTATE_URB(r->builder, &blitter->urb);
 
    if (r->state.gs.active) {
       gen6_wa_post_3dstate_urb_no_gs(r);
index 95884a0d51d4fcdfde20cab60fddfb819091e1cb..f5c1a82d671bfd5cd83bd10300d02d5fa4f57ac6 100644 (file)
@@ -200,40 +200,17 @@ gen7_draw_common_urb(struct ilo_render *r,
                      struct ilo_render_draw_session *session)
 {
    /* 3DSTATE_URB_{VS,GS,HS,DS} */
-   if (DIRTY(VE) || DIRTY(VS)) {
-      /* the first 16KB are reserved for VS and PS PCBs */
-      const int offset =
-         (ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
-          (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
-          32768 : 16384;
-      int vs_entry_size, vs_total_size;
-
-      vs_entry_size = (vec->vs) ?
-         ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_OUTPUT_COUNT) : 0;
-
-      /*
-       * From the Ivy Bridge PRM, volume 2 part 1, page 35:
-       *
-       *     "Programming Restriction: As the VS URB entry serves as both the
-       *      per-vertex input and output of the VS shader, the VS URB
-       *      Allocation Size must be sized to the maximum of the vertex input
-       *      and output structures."
-       */
-      if (vs_entry_size < vec->ve->count + vec->ve->prepend_nosrc_cso)
-         vs_entry_size = vec->ve->count + vec->ve->prepend_nosrc_cso;
-
-      vs_entry_size *= sizeof(float) * 4;
-      vs_total_size = r->dev->urb_size - offset;
-
+   if (session->urb_delta.dirty & (ILO_STATE_URB_3DSTATE_URB_VS |
+                                   ILO_STATE_URB_3DSTATE_URB_HS |
+                                   ILO_STATE_URB_3DSTATE_URB_DS |
+                                   ILO_STATE_URB_3DSTATE_URB_GS)) {
       if (ilo_dev_gen(r->dev) == ILO_GEN(7))
          gen7_wa_pre_vs(r);
 
-      gen7_3DSTATE_URB_VS(r->builder,
-            offset, vs_total_size, vs_entry_size);
-
-      gen7_3DSTATE_URB_GS(r->builder, offset, 0, 0);
-      gen7_3DSTATE_URB_HS(r->builder, offset, 0, 0);
-      gen7_3DSTATE_URB_DS(r->builder, offset, 0, 0);
+      gen7_3DSTATE_URB_VS(r->builder, &vec->urb);
+      gen7_3DSTATE_URB_GS(r->builder, &vec->urb);
+      gen7_3DSTATE_URB_HS(r->builder, &vec->urb);
+      gen7_3DSTATE_URB_DS(r->builder, &vec->urb);
    }
 }
 
@@ -243,22 +220,15 @@ gen7_draw_common_pcb_alloc(struct ilo_render *r,
                            struct ilo_render_draw_session *session)
 {
    /* 3DSTATE_PUSH_CONSTANT_ALLOC_{VS,PS} */
-   if (r->hw_ctx_changed) {
-      /*
-       * Push constant buffers are only allowed to take up at most the first
-       * 16KB of the URB.  Split the space evenly for VS and FS.
-       */
-      const int max_size =
-         (ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
-          (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
-          32768 : 16384;
-      const int size = max_size / 2;
-      int offset = 0;
-
-      gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, offset, size);
-      offset += size;
-
-      gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, offset, size);
+   if (session->urb_delta.dirty &
+         (ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_VS |
+          ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_HS |
+          ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_DS |
+          ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_GS |
+          ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_PS)) {
+      gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, &vec->urb);
+      gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS(r->builder, &vec->urb);
+      gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, &vec->urb);
 
       if (ilo_dev_gen(r->dev) == ILO_GEN(7))
          gen7_wa_post_3dstate_push_constant_alloc_ps(r);
@@ -671,21 +641,8 @@ static void
 gen7_rectlist_pcb_alloc(struct ilo_render *r,
                         const struct ilo_blitter *blitter)
 {
-   /*
-    * Push constant buffers are only allowed to take up at most the first
-    * 16KB of the URB.  Split the space evenly for VS and FS.
-    */
-   const int max_size =
-      (ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
-       (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
-       32768 : 16384;
-   const int size = max_size / 2;
-   int offset = 0;
-
-   gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, offset, size);
-   offset += size;
-
-   gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, offset, size);
+   gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, &blitter->urb);
+   gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, &blitter->urb);
 
    if (ilo_dev_gen(r->dev) == ILO_GEN(7))
       gen7_wa_post_3dstate_push_constant_alloc_ps(r);
@@ -695,19 +652,10 @@ static void
 gen7_rectlist_urb(struct ilo_render *r,
                   const struct ilo_blitter *blitter)
 {
-   /* the first 16KB are reserved for VS and PS PCBs */
-   const int offset =
-      (ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
-       (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
-       32768 : 16384;
-
-   gen7_3DSTATE_URB_VS(r->builder, offset, r->dev->urb_size - offset,
-         (blitter->ve.count + blitter->ve.prepend_nosrc_cso) *
-         4 * sizeof(float));
-
-   gen7_3DSTATE_URB_GS(r->builder, offset, 0, 0);
-   gen7_3DSTATE_URB_HS(r->builder, offset, 0, 0);
-   gen7_3DSTATE_URB_DS(r->builder, offset, 0, 0);
+   gen7_3DSTATE_URB_VS(r->builder, &blitter->urb);
+   gen7_3DSTATE_URB_GS(r->builder, &blitter->urb);
+   gen7_3DSTATE_URB_HS(r->builder, &blitter->urb);
+   gen7_3DSTATE_URB_DS(r->builder, &blitter->urb);
 }
 
 static void
index 0a568bfbdc28adbfe11ddfaeb83ed5f39fc365b2..896402c9109356a60285f3a87feaa8852c9a0fba 100644 (file)
@@ -477,6 +477,55 @@ finalize_vertex_elements(struct ilo_context *ilo)
    }
 }
 
+static void
+finalize_urb(struct ilo_context *ilo)
+{
+   const uint16_t attr_size = sizeof(uint32_t) * 4;
+   const struct ilo_dev *dev = ilo->dev;
+   struct ilo_state_vector *vec = &ilo->state_vector;
+   struct ilo_state_urb_info info;
+
+   if (!(vec->dirty & (ILO_DIRTY_VE | ILO_DIRTY_VS |
+                       ILO_DIRTY_GS | ILO_DIRTY_FS)))
+      return;
+
+   memset(&info, 0, sizeof(info));
+
+   info.ve_entry_size = attr_size *
+      (vec->ve->count + vec->ve->prepend_nosrc_cso);
+
+   if (vec->vs) {
+      info.vs_const_data = (bool)
+         (ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_PCB_CBUF0_SIZE) +
+          ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_PCB_UCP_SIZE));
+      info.vs_entry_size = attr_size *
+         ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_OUTPUT_COUNT);
+   }
+
+   if (vec->gs) {
+      info.gs_const_data = (bool)
+         ilo_shader_get_kernel_param(vec->gs, ILO_KERNEL_PCB_CBUF0_SIZE);
+
+      /*
+       * From the Ivy Bridge PRM, volume 2 part 1, page 189:
+       *
+       *     "All outputs of a GS thread will be stored in the single GS
+       *      thread output URB entry."
+       *
+       * TODO
+       */
+      info.gs_entry_size = attr_size *
+         ilo_shader_get_kernel_param(vec->gs, ILO_KERNEL_OUTPUT_COUNT);
+   }
+
+   if (vec->fs) {
+      info.ps_const_data = (bool)
+         ilo_shader_get_kernel_param(vec->fs, ILO_KERNEL_PCB_CBUF0_SIZE);
+   }
+
+   ilo_state_urb_set_info(&vec->urb, dev, &info);
+}
+
 static void
 finalize_viewport(struct ilo_context *ilo)
 {
@@ -680,6 +729,7 @@ ilo_finalize_3d_states(struct ilo_context *ilo,
    finalize_index_buffer(ilo);
    finalize_vertex_elements(ilo);
 
+   finalize_urb(ilo);
    finalize_rasterizer(ilo);
    finalize_viewport(ilo);
    finalize_blend(ilo);
@@ -2065,6 +2115,8 @@ void
 ilo_state_vector_init(const struct ilo_dev *dev,
                       struct ilo_state_vector *vec)
 {
+   struct ilo_state_urb_info urb_info;
+
    vec->sample_mask = ~0u;
 
    ilo_state_viewport_init_data_only(&vec->viewport.vp, dev,
@@ -2079,6 +2131,9 @@ ilo_state_vector_init(const struct ilo_dev *dev,
 
    ilo_state_sampler_init_disabled(&vec->disabled_sampler, dev);
 
+   memset(&urb_info, 0, sizeof(urb_info));
+   ilo_state_urb_init(&vec->urb, dev, &urb_info);
+
    util_dynarray_init(&vec->global_binding.bindings);
 
    vec->dirty = ILO_DIRTY_ALL;
index ae4639fe3f7806157424e15a62b5fbd4b75715cc..908585a507eb8d065e568f195b3c4f1b195148bd 100644 (file)
@@ -34,6 +34,7 @@
 #include "core/ilo_state_sampler.h"
 #include "core/ilo_state_sol.h"
 #include "core/ilo_state_surface.h"
+#include "core/ilo_state_urb.h"
 #include "core/ilo_state_viewport.h"
 #include "core/ilo_state_zs.h"
 #include "pipe/p_state.h"
@@ -275,6 +276,8 @@ struct ilo_state_vector {
 
    struct ilo_fb_state fb;
 
+   struct ilo_state_urb urb;
+
    /* shader resources */
    struct ilo_sampler_state sampler[PIPE_SHADER_TYPES];
    struct ilo_view_state view[PIPE_SHADER_TYPES];