X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_vs_state.c;h=fc4373ab311f03d9a7375251c213e719e10dd88d;hb=636d01bd61cac83e13c3c64874e7e34e828ca93a;hp=d790ab65553a31295b2691de6f76df924dd54ed4;hpb=ba002eb1965ccdb09d7c3657d32e22e71ed4096a;p=mesa.git

diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
index d790ab65553..fc4373ab311 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -36,150 +36,129 @@
 #include "brw_defines.h"
 #include "main/macros.h"
 
-struct brw_vs_unit_key {
-   unsigned int total_grf;
-   unsigned int urb_entry_read_length;
-   unsigned int curb_entry_read_length;
-
-   unsigned int curbe_offset;
-
-   unsigned int nr_urb_entries, urb_size;
-
-   unsigned int nr_surfaces;
-};
-
 static void
-vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key)
+brw_prepare_vs_unit(struct brw_context *brw)
 {
-   GLcontext *ctx = &brw->intel.ctx;
-
-   memset(key, 0, sizeof(*key));
-
-   /* CACHE_NEW_VS_PROG */
-   key->total_grf = brw->vs.prog_data->total_grf;
-   key->urb_entry_read_length = brw->vs.prog_data->urb_read_length;
-   key->curb_entry_read_length = brw->vs.prog_data->curb_read_length;
-
-   /* BRW_NEW_URB_FENCE */
-   key->nr_urb_entries = brw->urb.nr_vs_entries;
-   key->urb_size = brw->urb.vsize;
+   struct intel_context *intel = &brw->intel;
+   struct gl_context *ctx = &intel->ctx;
+   struct brw_vs_unit_state *vs;
+
+   vs = brw_state_batch(brw, AUB_TRACE_VS_STATE,
+			sizeof(*vs), 32, &brw->vs.state_offset);
+   memset(vs, 0, sizeof(*vs));
+
+   /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_VS_PROG */
+   vs->thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1;
+   vs->thread0.kernel_start_pointer =
+      brw_program_reloc(brw,
+			brw->vs.state_offset +
+			offsetof(struct brw_vs_unit_state, thread0),
+			brw->vs.prog_offset +
+			(vs->thread0.grf_reg_count << 1)) >> 6;
+
+   vs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+   /* Choosing multiple program flow means that we may get 2-vertex threads,
+    * which will have the channel mask for dwords 4-7 enabled in the thread,
+    * and those dwords will be written to the second URB handle when we
+    * brw_urb_WRITE() results.
+    */
+   /* Disable single program flow on Ironlake.  We cannot reliably get
+    * all applications working without it.  See:
+    * https://bugs.freedesktop.org/show_bug.cgi?id=29172
+    *
+    * The most notable and reliably failing application is the Humus
+    * demo "CelShading"
+   */
+   vs->thread1.single_program_flow = (intel->gen == 5);
 
    /* BRW_NEW_NR_VS_SURFACES */
-   key->nr_surfaces = brw->vs.nr_surfaces;
+   if (intel->gen == 5)
+      vs->thread1.binding_table_entry_count = 0; /* hardware requirement */
+   else
+      vs->thread1.binding_table_entry_count = brw->vs.nr_surfaces;
+
+   vs->thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length;
+   vs->thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length;
+   vs->thread3.dispatch_grf_start_reg = 1;
+   vs->thread3.urb_entry_read_offset = 0;
 
    /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */
    if (ctx->Transform.ClipPlanesEnabled) {
       /* Note that we read in the userclip planes as well, hence
        * clip_start:
        */
-      key->curbe_offset = brw->curbe.clip_start;
+      vs->thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2;
    }
    else {
-      key->curbe_offset = brw->curbe.vs_start;
+      vs->thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2;
    }
-}
-
-static dri_bo *
-vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
-{
-   struct brw_vs_unit_state vs;
-   dri_bo *bo;
-   int chipset_max_threads;
 
-   memset(&vs, 0, sizeof(vs));
 
-   vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */
-   vs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
-   vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
-   /* Choosing multiple program flow means that we may get 2-vertex threads,
-    * which will have the channel mask for dwords 4-7 enabled in the thread,
-    * and those dwords will be written to the second URB handle when we
-    * brw_urb_WRITE() results.
-    */
-   vs.thread1.single_program_flow = 0;
-
-   if (BRW_IS_IGDNG(brw))
-      vs.thread1.binding_table_entry_count = 0; /* hardware requirement */
-   else
-      vs.thread1.binding_table_entry_count = key->nr_surfaces;
-
-   vs.thread3.urb_entry_read_length = key->urb_entry_read_length;
-   vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
-   vs.thread3.dispatch_grf_start_reg = 1;
-   vs.thread3.urb_entry_read_offset = 0;
-   vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
-
-   if (BRW_IS_IGDNG(brw))
-       vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2;
-   else
-       vs.thread4.nr_urb_entries = key->nr_urb_entries;
+   /* BRW_NEW_URB_FENCE */
+   if (intel->gen == 5) {
+      switch (brw->urb.nr_vs_entries) {
+      case 8:
+      case 12:
+      case 16:
+      case 32:
+      case 64:
+      case 96:
+      case 128:
+      case 168:
+      case 192:
+      case 224:
+      case 256:
+	 vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries >> 2;
+	 break;
+      default:
+	 assert(0);
+      }
+   } else {
+      switch (brw->urb.nr_vs_entries) {
+      case 8:
+      case 12:
+      case 16:
+      case 32:
+	 break;
+      case 64:
+	 assert(intel->is_g4x);
+	 break;
+      default:
+	 assert(0);
+      }
+      vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries;
+   }
 
-   vs.thread4.urb_entry_allocation_size = key->urb_size - 1;
+   vs->thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
 
-   if (BRW_IS_IGDNG(brw))
-      chipset_max_threads = 72;
-   else if (BRW_IS_G4X(brw))
-      chipset_max_threads = 32;
-   else
-      chipset_max_threads = 16;
-   vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2,
-				  1, chipset_max_threads) - 1;
-
-   if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
-      vs.thread4.max_threads = 0;
+   vs->thread4.max_threads = CLAMP(brw->urb.nr_vs_entries / 2,
+				   1, brw->vs_max_threads) - 1;
 
    /* No samplers for ARB_vp programs:
     */
-   /* It has to be set to 0 for IGDNG
+   /* It has to be set to 0 for Ironlake
     */
-   vs.vs5.sampler_count = 0;
+   vs->vs5.sampler_count = 0;
 
-   if (INTEL_DEBUG & DEBUG_STATS)
-      vs.thread4.stats_enable = 1;
+   if (unlikely(INTEL_DEBUG & DEBUG_STATS))
+      vs->thread4.stats_enable = 1;
 
    /* Vertex program always enabled:
     */
-   vs.vs6.vs_enable = 1;
-
-   bo = brw_upload_cache(&brw->cache, BRW_VS_UNIT,
-			 key, sizeof(*key),
-			 &brw->vs.prog_bo, 1,
-			 &vs, sizeof(vs),
-			 NULL, NULL);
-
-   /* Emit VS program relocation */
-   dri_bo_emit_reloc(bo,
-		     I915_GEM_DOMAIN_INSTRUCTION, 0,
-		     vs.thread0.grf_reg_count << 1,
-		     offsetof(struct brw_vs_unit_state, thread0),
-		     brw->vs.prog_bo);
-
-   return bo;
-}
+   vs->vs6.vs_enable = 1;
 
-static void prepare_vs_unit(struct brw_context *brw)
-{
-   struct brw_vs_unit_key key;
-
-   vs_unit_populate_key(brw, &key);
-
-   dri_bo_unreference(brw->vs.state_bo);
-   brw->vs.state_bo = brw_search_cache(&brw->cache, BRW_VS_UNIT,
-				       &key, sizeof(key),
-				       &brw->vs.prog_bo, 1,
-				       NULL);
-   if (brw->vs.state_bo == NULL) {
-      brw->vs.state_bo = vs_unit_create_from_key(brw, &key);
-   }
+   brw->state.dirty.cache |= CACHE_NEW_VS_UNIT;
 }
 
 const struct brw_tracked_state brw_vs_unit = {
    .dirty = {
       .mesa  = _NEW_TRANSFORM,
-      .brw   = (BRW_NEW_CURBE_OFFSETS |
+      .brw   = (BRW_NEW_BATCH |
+		BRW_NEW_PROGRAM_CACHE |
+		BRW_NEW_CURBE_OFFSETS |
                 BRW_NEW_NR_VS_SURFACES |
 		BRW_NEW_URB_FENCE),
       .cache = CACHE_NEW_VS_PROG
    },
-   .prepare = prepare_vs_unit,
+   .prepare = brw_prepare_vs_unit,
 };