[965] Convert WM unit to use a cache key instead of brw_cache_data.
authorEric Anholt <eric@anholt.net>
Wed, 2 Jan 2008 23:47:47 +0000 (15:47 -0800)
committerEric Anholt <eric@anholt.net>
Wed, 2 Jan 2008 23:51:49 +0000 (15:51 -0800)
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_wm_state.c

index fd18fcdc210780039c2ad5c460435350ceb8c73b..05111b351ab88166c7b2702fa3cfa14e88f2e9df 100644 (file)
@@ -590,7 +590,6 @@ struct brw_context
 
       GLuint max_threads;
       dri_bo *scratch_buffer;
-      GLuint scratch_buffer_size;
 
       GLuint sampler_count;
       dri_bo *sampler_bo;
index 8a7236e62f4b10bdd523fbd80f6063f3866f2db2..02443c50d0589c839933a13ce9faf9d3b56c138a 100644 (file)
  * WM unit - fragment programs and rasterization
  */
 
-static void upload_wm_unit(struct brw_context *brw )
+struct brw_wm_unit_key {
+   unsigned int total_grf, total_scratch;
+   unsigned int urb_entry_read_length;
+   unsigned int curb_entry_read_length;
+   unsigned int dispatch_grf_start_reg;
+
+   unsigned int curbe_offset;
+   unsigned int urb_size;
+
+   unsigned int max_threads;
+
+   unsigned int nr_surfaces, sampler_count;
+   GLboolean uses_depth, computes_depth, uses_kill, is_glsl;
+   GLboolean polygon_stipple, stats_wm;
+};
+
+static void
+wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
 {
+   const struct gl_fragment_program *fp = brw->fragment_program;
    struct intel_context *intel = &brw->intel;
-   struct brw_wm_unit_state wm;
-   GLuint max_threads;
-   GLuint per_thread;
-   dri_bo *reloc_bufs[3];
 
    if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
-      max_threads = 0; 
+      key->max_threads = 1;
    else
-      max_threads = 31;
+      key->max_threads = 32;
 
+   /* CACHE_NEW_WM_PROG */
+   key->total_grf = brw->wm.prog_data->total_grf;
+   key->urb_entry_read_length = brw->wm.prog_data->urb_read_length;
+   key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
+   key->total_scratch = ALIGN(brw->wm.prog_data->total_scratch, 1024);
 
-   memset(&wm, 0, sizeof(wm));
+   /* BRW_NEW_URB_FENCE */
+   key->urb_size = brw->urb.vsize;
 
-   /* CACHE_NEW_WM_PROG */
-   wm.thread0.grf_reg_count = ALIGN(brw->wm.prog_data->total_grf, 16) / 16 - 1;
-   wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
-   wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
-   wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length;
-   wm.thread3.const_urb_entry_read_length = brw->wm.prog_data->curb_read_length;
+   /* CACHE_NEW_SURFACE */
+   key->nr_surfaces = brw->wm.nr_surfaces;
 
-   wm.wm5.max_threads = max_threads;      
+   /* CACHE_NEW_SAMPLER */
+   key->sampler_count = brw->wm.sampler_count;
 
-   per_thread = ALIGN(brw->wm.prog_data->total_scratch, 1024);
-   assert(per_thread <= 12 * 1024);
+   /* _NEW_POLYGONSTIPPLE */
+   key->polygon_stipple = brw->attribs.Polygon->StippleFlag;
 
-   if (brw->wm.prog_data->total_scratch) {
-      GLuint total = per_thread * (max_threads + 1);
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   key->uses_depth = (fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
 
-      /* Scratch space -- just have to make sure there is sufficient
-       * allocated for the active program and current number of threads.
-       */
-      brw->wm.scratch_buffer_size = total;
-      if (brw->wm.scratch_buffer &&
-         brw->wm.scratch_buffer_size > brw->wm.scratch_buffer->size) {
-        dri_bo_unreference(brw->wm.scratch_buffer);
-        brw->wm.scratch_buffer = NULL;
-      }
-      if (!brw->wm.scratch_buffer) {
-        brw->wm.scratch_buffer = dri_bo_alloc(intel->bufmgr,
-                                              "wm scratch",
-                                              brw->wm.scratch_buffer_size,
-                                              4096, DRM_BO_FLAG_MEM_TT);
-      }
-   }
+   /* as far as we can tell */
+   key->computes_depth =
+      (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) != 0;
 
-   /* CACHE_NEW_SURFACE */
-   wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces;
+   /* _NEW_COLOR */
+   key->uses_kill = fp->UsesKill || brw->attribs.Color->AlphaEnabled;
+   key->is_glsl = brw_wm_is_glsl(fp);
 
-   /* CACHE_NEW_WM_PROG */
-   if (per_thread != 0) {
-   /* reloc */
+   /* XXX: This needs a flag to indicate when it changes. */
+   key->stats_wm = intel->stats_wm;
+}
+
+static dri_bo *
+wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
+                       dri_bo **reloc_bufs)
+{
+   struct brw_wm_unit_state wm;
+
+   memset(&wm, 0, sizeof(wm));
+
+   wm.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+   wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
+   wm.thread1.depth_coef_urb_read_offset = 1;
+   wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+   wm.thread1.binding_table_entry_count = key->nr_surfaces;
+
+   if (key->total_scratch != 0) {
       wm.thread2.scratch_space_base_pointer =
-        brw->wm.scratch_buffer->offset >> 10;
-      wm.thread2.per_thread_scratch_space = per_thread / 1024 - 1;
+        brw->wm.scratch_buffer->offset >> 10; /* reloc */
+      wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1;
    } else {
       wm.thread2.scratch_space_base_pointer = 0;
       wm.thread2.per_thread_scratch_space = 0;
    }
 
-   /* BRW_NEW_CURBE_OFFSETS */
-   wm.thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2;
-
+   wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg;
+   wm.thread3.urb_entry_read_length = key->urb_entry_read_length;
+   wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
+   wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
    wm.thread3.urb_entry_read_offset = 0;
-   wm.thread1.depth_coef_urb_read_offset = 1;
-   wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
 
-   /* CACHE_NEW_SAMPLER */
-   wm.wm4.sampler_count = (brw->wm.sampler_count + 1) / 4;
+   wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
    if (brw->wm.sampler_bo != NULL) {
       /* reloc */
       wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5;
@@ -119,27 +138,16 @@ static void upload_wm_unit(struct brw_context *brw )
       wm.wm4.sampler_state_pointer = 0;
    }
 
-   /* BRW_NEW_FRAGMENT_PROGRAM */
-   {
-      const struct gl_fragment_program *fp = brw->fragment_program; 
-
-      if (fp->Base.InputsRead & (1<<FRAG_ATTRIB_WPOS)) 
-        wm.wm5.program_uses_depth = 1; /* as far as we can tell */
-   
-      if (fp->Base.OutputsWritten & (1<<FRAG_RESULT_DEPR)) 
-        wm.wm5.program_computes_depth = 1;
-   
-      /* _NEW_COLOR */
-      if (fp->UsesKill || 
-         brw->attribs.Color->AlphaEnabled) 
-        wm.wm5.program_uses_killpixel = 1; 
-      
-      if (brw_wm_is_glsl(fp))
-         wm.wm5.enable_8_pix = 1;
-      else
-         wm.wm5.enable_16_pix = 1;
-   }
+   wm.wm5.program_uses_depth = key->uses_depth;
+   wm.wm5.program_computes_depth = key->computes_depth;
+   wm.wm5.program_uses_killpixel = key->uses_kill;
+
+   if (key->is_glsl)
+      wm.wm5.enable_8_pix = 1;
+   else
+      wm.wm5.enable_16_pix = 1;
 
+   wm.wm5.max_threads = key->max_threads - 1;
    wm.wm5.thread_dispatch_enable = 1;  /* AKA: color_write */
    wm.wm5.legacy_line_rast = 0;
    wm.wm5.legacy_global_depth_bias = 0;
@@ -147,9 +155,7 @@ static void upload_wm_unit(struct brw_context *brw )
    wm.wm5.line_aa_region_width = 0;
    wm.wm5.line_endcap_aa_region_width = 1;
 
-   /* _NEW_POLYGONSTIPPLE */
-   if (brw->attribs.Polygon->StippleFlag) 
-      wm.wm5.polygon_stipple = 1;
+   wm.wm5.polygon_stipple = key->polygon_stipple;
 
    /* _NEW_POLYGON */
    if (brw->attribs.Polygon->OffsetFill) {
@@ -171,20 +177,61 @@ static void upload_wm_unit(struct brw_context *brw )
       wm.wm5.line_stipple = 1;
    }
 
-   if (INTEL_DEBUG & DEBUG_STATS || intel->stats_wm)
+   if (INTEL_DEBUG & DEBUG_STATS || key->stats_wm)
       wm.wm4.stats_enable = 1;
 
-   reloc_bufs[0] = brw->wm.prog_bo;
-   reloc_bufs[1] = brw->wm.scratch_buffer;
-   reloc_bufs[2] = brw->wm.sampler_bo;
-
    brw->wm.thread0_delta = wm.thread0.grf_reg_count << 1;
    brw->wm.thread2_delta = wm.thread2.per_thread_scratch_space;
    brw->wm.wm4_delta = wm.wm4.stats_enable | (wm.wm4.sampler_count << 2);
 
+   return brw_upload_cache(&brw->cache, BRW_WM_UNIT,
+                          key, sizeof(*key),
+                          reloc_bufs, 3,
+                          &wm, sizeof(wm),
+                          NULL, NULL);
+}
+
+
+static void upload_wm_unit( struct brw_context *brw )
+{
+   struct intel_context *intel = &brw->intel;
+   struct brw_wm_unit_key key;
+   dri_bo *reloc_bufs[3];
+
+   wm_unit_populate_key(brw, &key);
+
+   /* Allocate the necessary scratch space if we haven't already.  Don't
+    * bother reducing the allocation later, since we use scratch so
+    * rarely.
+    */
+   assert(key.total_scratch <= 12 * 1024);
+   if (key.total_scratch) {
+      GLuint total = key.total_scratch * key.max_threads;
+
+      if (brw->wm.scratch_buffer && total > brw->wm.scratch_buffer->size) {
+        dri_bo_unreference(brw->wm.scratch_buffer);
+        brw->wm.scratch_buffer = NULL;
+      }
+      if (brw->wm.scratch_buffer == NULL) {
+        brw->wm.scratch_buffer = dri_bo_alloc(intel->bufmgr,
+                                              "wm scratch",
+                                              total,
+                                              4096, DRM_BO_FLAG_MEM_TT);
+      }
+   }
+
+   reloc_bufs[0] = brw->wm.prog_bo;
+   reloc_bufs[1] = brw->wm.scratch_buffer;
+   reloc_bufs[2] = brw->wm.sampler_bo;
+
    dri_bo_unreference(brw->wm.state_bo);
-   brw->wm.state_bo = brw_cache_data( &brw->cache, BRW_WM_UNIT, &wm,
-                                     reloc_bufs, 3 );
+   brw->wm.state_bo = brw_search_cache(&brw->cache, BRW_WM_UNIT,
+                                      &key, sizeof(key),
+                                      reloc_bufs, 3,
+                                      NULL);
+   if (brw->wm.state_bo == NULL) {
+      brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs);
+   }
 }
 
 static void emit_reloc_wm_unit(struct brw_context *brw)