i965: Increase G4X default VS URB allocation to actually allow 32 threads.
authorEric Anholt <eric@anholt.net>
Tue, 30 Jun 2009 21:26:06 +0000 (14:26 -0700)
committerEric Anholt <eric@anholt.net>
Wed, 1 Jul 2009 00:55:23 +0000 (17:55 -0700)
This improves the performance of my GLSL demo by 30%.  It also fixes the
VS deadlock that ut2004 had, for reasons I can't explain. Bug #21330.

src/mesa/drivers/dri/i965/brw_urb.c

index 7673dd36eb90b5e44e6216fbdfb081e8b4b82323..47bc45c912c509522d8a32dcc0fd7b0c0a4fa8e9 100644 (file)
@@ -143,7 +143,19 @@ static void recalculate_urb_fence( struct brw_context *brw )
       brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries;
       brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;        
       brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries;        
-      
+
+      brw->urb.constrained = 0;
+
+      if (BRW_IS_G4X(brw)) {
+        brw->urb.nr_vs_entries = 64;
+        if (check_urb_layout(brw)) {
+           goto done;
+        } else {
+           brw->urb.constrained = 1;
+           brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+        }
+      }
+
       if (!check_urb_layout(brw)) {
         brw->urb.nr_vs_entries = limits[VS].min_nr_entries;    
         brw->urb.nr_gs_entries = limits[GS].min_nr_entries;    
@@ -169,9 +181,8 @@ static void recalculate_urb_fence( struct brw_context *brw )
         if (INTEL_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS))
            _mesa_printf("URB CONSTRAINED\n");
       }
-      else 
-        brw->urb.constrained = 0;
 
+done:
       if (INTEL_DEBUG & DEBUG_URB)
         _mesa_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
                      brw->urb.vs_start,