i965/gen4: Move clip state to state streaming
authorEric Anholt <eric@anholt.net>
Mon, 25 Apr 2011 03:20:16 +0000 (20:20 -0700)
committerEric Anholt <eric@anholt.net>
Fri, 29 Apr 2011 22:26:50 +0000 (15:26 -0700)
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_clip_state.c
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_misc_state.c
src/mesa/drivers/dri/i965/brw_vtbl.c

index 60fd5fa7d9e30833f50b1c4413a7b266b4be058f..6015c8cbe9fde8e93830b256d3c1641d745d8bab 100644 (file)
 #include "brw_state.h"
 #include "brw_defines.h"
 
-struct brw_clip_unit_key {
-   unsigned int total_grf;
-   unsigned int urb_entry_read_length;
-   unsigned int curb_entry_read_length;
-   unsigned int clip_mode;
-
-   unsigned int curbe_offset;
-
-   unsigned int nr_urb_entries, urb_size;
-
-   GLboolean depth_clamp;
-};
-
 static void
-clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
-{
-   struct gl_context *ctx = &brw->intel.ctx;
-   memset(key, 0, sizeof(*key));
-
-   /* CACHE_NEW_CLIP_PROG */
-   key->total_grf = brw->clip.prog_data->total_grf;
-   key->urb_entry_read_length = brw->clip.prog_data->urb_read_length;
-   key->curb_entry_read_length = brw->clip.prog_data->curb_read_length;
-   key->clip_mode = brw->clip.prog_data->clip_mode;
-
-   /* BRW_NEW_CURBE_OFFSETS */
-   key->curbe_offset = brw->curbe.clip_start;
-
-   /* BRW_NEW_URB_FENCE */
-   key->nr_urb_entries = brw->urb.nr_clip_entries;
-   key->urb_size = brw->urb.vsize;
-
-   /* _NEW_TRANSOFORM */
-   key->depth_clamp = ctx->Transform.DepthClamp;
-}
-
-static drm_intel_bo *
-clip_unit_create_from_key(struct brw_context *brw,
-                         struct brw_clip_unit_key *key)
+brw_prepare_clip_unit(struct brw_context *brw)
 {
    struct intel_context *intel = &brw->intel;
-   struct brw_clip_unit_state clip;
-   drm_intel_bo *bo;
+   struct gl_context *ctx = &intel->ctx;
+   struct brw_clip_unit_state *clip;
 
-   memset(&clip, 0, sizeof(clip));
+   clip = brw_state_batch(brw, sizeof(*clip), 32, &brw->clip.state_offset);
+   memset(clip, 0, sizeof(*clip));
 
-   clip.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+   /* CACHE_NEW_CLIP_PROG */
+   clip->thread0.grf_reg_count = (ALIGN(brw->clip.prog_data->total_grf, 16) /
+                                16 - 1);
    /* reloc */
-   clip.thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6;
+   clip->thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6;
 
-   clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
-   clip.thread1.single_program_flow = 1;
+   clip->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+   clip->thread1.single_program_flow = 1;
 
-   clip.thread3.urb_entry_read_length = key->urb_entry_read_length;
-   clip.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
-   clip.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
-   clip.thread3.dispatch_grf_start_reg = 1;
-   clip.thread3.urb_entry_read_offset = 0;
+   clip->thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length;
+   clip->thread3.const_urb_entry_read_length =
+      brw->clip.prog_data->curb_read_length;
+
+   /* BRW_NEW_CURBE_OFFSETS */
+   clip->thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2;
+   clip->thread3.dispatch_grf_start_reg = 1;
+   clip->thread3.urb_entry_read_offset = 0;
 
-   clip.thread4.nr_urb_entries = key->nr_urb_entries;
-   clip.thread4.urb_entry_allocation_size = key->urb_size - 1;
+   /* BRW_NEW_URB_FENCE */
+   clip->thread4.nr_urb_entries = brw->urb.nr_clip_entries;
+   clip->thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
    /* If we have enough clip URB entries to run two threads, do so.
     */
-   if (key->nr_urb_entries >= 10) {
+   if (brw->urb.nr_clip_entries >= 10) {
       /* Half of the URB entries go to each thread, and it has to be an
        * even number.
        */
-      assert(key->nr_urb_entries % 2 == 0);
+      assert(brw->urb.nr_clip_entries % 2 == 0);
       
       /* Although up to 16 concurrent Clip threads are allowed on Ironlake,
        * only 2 threads can output VUEs at a time.
        */
       if (intel->gen == 5)
-         clip.thread4.max_threads = 16 - 1;        
+         clip->thread4.max_threads = 16 - 1;
       else
-         clip.thread4.max_threads = 2 - 1;
+         clip->thread4.max_threads = 2 - 1;
    } else {
-      assert(key->nr_urb_entries >= 5);
-      clip.thread4.max_threads = 1 - 1;
+      assert(brw->urb.nr_clip_entries >= 5);
+      clip->thread4.max_threads = 1 - 1;
    }
 
    if (unlikely(INTEL_DEBUG & DEBUG_SINGLE_THREAD))
-      clip.thread4.max_threads = 0;
+      clip->thread4.max_threads = 0;
 
    if (unlikely(INTEL_DEBUG & DEBUG_STATS))
-      clip.thread4.stats_enable = 1;
-
-   clip.clip5.userclip_enable_flags = 0x7f;
-   clip.clip5.userclip_must_clip = 1;
-   clip.clip5.guard_band_enable = 0;
-   if (!key->depth_clamp)
-      clip.clip5.viewport_z_clip_enable = 1;
-   clip.clip5.viewport_xy_clip_enable = 1;
-   clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
-   clip.clip5.api_mode = BRW_CLIP_API_OGL;
-   clip.clip5.clip_mode = key->clip_mode;
+      clip->thread4.stats_enable = 1;
 
-   if (intel->is_g4x)
-      clip.clip5.negative_w_clip_test = 1;
+   clip->clip5.userclip_enable_flags = 0x7f;
+   clip->clip5.userclip_must_clip = 1;
+   clip->clip5.guard_band_enable = 0;
+   /* _NEW_TRANSOFORM */
+   if (!ctx->Transform.DepthClamp)
+      clip->clip5.viewport_z_clip_enable = 1;
+   clip->clip5.viewport_xy_clip_enable = 1;
+   clip->clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
+   clip->clip5.api_mode = BRW_CLIP_API_OGL;
+   clip->clip5.clip_mode = brw->clip.prog_data->clip_mode;
 
-   clip.clip6.clipper_viewport_state_ptr = 0;
-   clip.viewport_xmin = -1;
-   clip.viewport_xmax = 1;
-   clip.viewport_ymin = -1;
-   clip.viewport_ymax = 1;
+   if (intel->is_g4x)
+      clip->clip5.negative_w_clip_test = 1;
 
-   bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT,
-                        key, sizeof(*key),
-                        &brw->clip.prog_bo, 1,
-                        &clip, sizeof(clip));
+   clip->clip6.clipper_viewport_state_ptr = 0;
+   clip->viewport_xmin = -1;
+   clip->viewport_xmax = 1;
+   clip->viewport_ymin = -1;
+   clip->viewport_ymax = 1;
 
    /* Emit clip program relocation */
    assert(brw->clip.prog_bo);
-   drm_intel_bo_emit_reloc(bo, offsetof(struct brw_clip_unit_state, thread0),
-                          brw->clip.prog_bo, clip.thread0.grf_reg_count << 1,
+   drm_intel_bo_emit_reloc(intel->batch.bo,
+                          (brw->clip.state_offset +
+                           offsetof(struct brw_clip_unit_state, thread0)),
+                          brw->clip.prog_bo, clip->thread0.grf_reg_count << 1,
                           I915_GEM_DOMAIN_INSTRUCTION, 0);
 
-   return bo;
-}
-
-static void upload_clip_unit( struct brw_context *brw )
-{
-   struct brw_clip_unit_key key;
-
-   clip_unit_populate_key(brw, &key);
-
-   drm_intel_bo_unreference(brw->clip.state_bo);
-   brw->clip.state_bo = brw_search_cache(&brw->cache, BRW_CLIP_UNIT,
-                                        &key, sizeof(key),
-                                        &brw->clip.prog_bo, 1,
-                                        NULL);
-   if (brw->clip.state_bo == NULL) {
-      brw->clip.state_bo = clip_unit_create_from_key(brw, &key);
-   }
+   brw->state.dirty.cache |= CACHE_NEW_CLIP_UNIT;
 }
 
 const struct brw_tracked_state brw_clip_unit = {
    .dirty = {
       .mesa  = _NEW_TRANSFORM,
-      .brw   = (BRW_NEW_CURBE_OFFSETS |
+      .brw   = (BRW_NEW_BATCH |
+               BRW_NEW_CURBE_OFFSETS |
                BRW_NEW_URB_FENCE),
       .cache = CACHE_NEW_CLIP_PROG
    },
-   .prepare = upload_clip_unit,
+   .prepare = brw_prepare_clip_unit,
 };
index a81d6157390dbaf8121ae63a2da10f60b961d440..4b97bfb2ac54e41f4bb7fd03b62674c08cd7ab5a 100644 (file)
@@ -655,7 +655,9 @@ struct brw_context
       struct brw_clip_prog_data *prog_data;
 
       drm_intel_bo *prog_bo;
-      drm_intel_bo *state_bo;
+
+      /* Offset in the batch to the CLIP state on pre-gen6. */
+      uint32_t state_offset;
 
       /* As of gen6, this is the offset in the batch to the CLIP VP,
        * instead of vp_bo.
index b61a7ad099ef4a89d97c58aabc21bfc7836da4de..3552cce62adc374527df38c4d7ea35a6a81f7168 100644 (file)
@@ -149,7 +149,8 @@ static void upload_pipelined_state_pointers(struct brw_context *brw )
       OUT_RELOC(brw->gs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
    else
       OUT_BATCH(0);
-   OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+   OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+            brw->clip.state_offset | 1);
    OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
             brw->sf.state_offset);
    OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
@@ -165,7 +166,6 @@ static void upload_pipelined_state_pointers(struct brw_context *brw )
 static void prepare_psp_urb_cbs(struct brw_context *brw)
 {
    brw_add_validated_bo(brw, brw->gs.state_bo);
-   brw_add_validated_bo(brw, brw->clip.state_bo);
 }
 
 static void upload_psp_urb_cbs(struct brw_context *brw )
index 8d1497c8215e85bfb822eb7f1eaf7aa3653f4bc7..49d771133bffa404a85e1f06f3c2aacea698b04f 100644 (file)
@@ -80,7 +80,6 @@ static void brw_destroy_context( struct intel_context *intel )
    dri_bo_release(&brw->gs.prog_bo);
    dri_bo_release(&brw->gs.state_bo);
    dri_bo_release(&brw->clip.prog_bo);
-   dri_bo_release(&brw->clip.state_bo);
    dri_bo_release(&brw->sf.prog_bo);
    dri_bo_release(&brw->wm.prog_bo);
    dri_bo_release(&brw->wm.const_bo);