i965: Use state streaming on programs, and state base address on gen5+.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_clip_state.c
index c8f24a94e4d5feb574ccfd0fbb6ec7f48ef97557..b9efbb74c87a38492d0f38cc482b56716e5c397e 100644 (file)
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
-#include "main/macros.h"
-
-struct brw_clip_unit_key {
-   unsigned int total_grf;
-   unsigned int urb_entry_read_length;
-   unsigned int curb_entry_read_length;
-   unsigned int clip_mode;
-
-   unsigned int curbe_offset;
-
-   unsigned int nr_urb_entries, urb_size;
-
-   GLboolean depth_clamp;
-};
 
 static void
-clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
-{
-   GLcontext *ctx = &brw->intel.ctx;
-   memset(key, 0, sizeof(*key));
-
-   /* CACHE_NEW_CLIP_PROG */
-   key->total_grf = brw->clip.prog_data->total_grf;
-   key->urb_entry_read_length = brw->clip.prog_data->urb_read_length;
-   key->curb_entry_read_length = brw->clip.prog_data->curb_read_length;
-   key->clip_mode = brw->clip.prog_data->clip_mode;
-
-   /* BRW_NEW_CURBE_OFFSETS */
-   key->curbe_offset = brw->curbe.clip_start;
-
-   /* BRW_NEW_URB_FENCE */
-   key->nr_urb_entries = brw->urb.nr_clip_entries;
-   key->urb_size = brw->urb.vsize;
-
-   /* _NEW_TRANSOFORM */
-   key->depth_clamp = ctx->Transform.DepthClamp;
-}
-
-static dri_bo *
-clip_unit_create_from_key(struct brw_context *brw,
-                         struct brw_clip_unit_key *key)
+brw_prepare_clip_unit(struct brw_context *brw)
 {
    struct intel_context *intel = &brw->intel;
-   struct brw_clip_unit_state clip;
-   dri_bo *bo;
+   struct gl_context *ctx = &intel->ctx;
+   struct brw_clip_unit_state *clip;
+
+   clip = brw_state_batch(brw, sizeof(*clip), 32, &brw->clip.state_offset);
+   memset(clip, 0, sizeof(*clip));
 
-   memset(&clip, 0, sizeof(clip));
+   /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_CLIP_PROG */
+   clip->thread0.grf_reg_count = (ALIGN(brw->clip.prog_data->total_grf, 16) /
+                                16 - 1);
+   clip->thread0.kernel_start_pointer =
+      brw_program_reloc(brw,
+                       brw->clip.state_offset +
+                       offsetof(struct brw_clip_unit_state, thread0),
+                       brw->clip.prog_offset +
+                       (clip->thread0.grf_reg_count << 1)) >> 6;
 
-   clip.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
-   /* reloc */
-   clip.thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6;
+   clip->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+   clip->thread1.single_program_flow = 1;
 
-   clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
-   clip.thread1.single_program_flow = 1;
+   clip->thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length;
+   clip->thread3.const_urb_entry_read_length =
+      brw->clip.prog_data->curb_read_length;
 
-   clip.thread3.urb_entry_read_length = key->urb_entry_read_length;
-   clip.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
-   clip.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
-   clip.thread3.dispatch_grf_start_reg = 1;
-   clip.thread3.urb_entry_read_offset = 0;
+   /* BRW_NEW_CURBE_OFFSETS */
+   clip->thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2;
+   clip->thread3.dispatch_grf_start_reg = 1;
+   clip->thread3.urb_entry_read_offset = 0;
 
-   clip.thread4.nr_urb_entries = key->nr_urb_entries;
-   clip.thread4.urb_entry_allocation_size = key->urb_size - 1;
+   /* BRW_NEW_URB_FENCE */
+   clip->thread4.nr_urb_entries = brw->urb.nr_clip_entries;
+   clip->thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
    /* If we have enough clip URB entries to run two threads, do so.
     */
-   if (key->nr_urb_entries >= 10) {
+   if (brw->urb.nr_clip_entries >= 10) {
       /* Half of the URB entries go to each thread, and it has to be an
        * even number.
        */
-      assert(key->nr_urb_entries % 2 == 0);
+      assert(brw->urb.nr_clip_entries % 2 == 0);
       
-      /* Although up to 16 concurrent Clip threads are allowed on IGDNG, 
+      /* Although up to 16 concurrent Clip threads are allowed on Ironlake,
        * only 2 threads can output VUEs at a time.
        */
-      if (intel->is_ironlake)
-         clip.thread4.max_threads = 16 - 1;        
+      if (intel->gen == 5)
+         clip->thread4.max_threads = 16 - 1;
       else
-         clip.thread4.max_threads = 2 - 1;
+         clip->thread4.max_threads = 2 - 1;
    } else {
-      assert(key->nr_urb_entries >= 5);
-      clip.thread4.max_threads = 1 - 1;
+      assert(brw->urb.nr_clip_entries >= 5);
+      clip->thread4.max_threads = 1 - 1;
    }
 
-   if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
-      clip.thread4.max_threads = 0;
+   if (unlikely(INTEL_DEBUG & DEBUG_SINGLE_THREAD))
+      clip->thread4.max_threads = 0;
 
-   if (INTEL_DEBUG & DEBUG_STATS)
-      clip.thread4.stats_enable = 1;
+   if (unlikely(INTEL_DEBUG & DEBUG_STATS))
+      clip->thread4.stats_enable = 1;
 
-   clip.clip5.userclip_enable_flags = 0x7f;
-   clip.clip5.userclip_must_clip = 1;
-   clip.clip5.guard_band_enable = 0;
-   if (!key->depth_clamp)
-      clip.clip5.viewport_z_clip_enable = 1;
-   clip.clip5.viewport_xy_clip_enable = 1;
-   clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
-   clip.clip5.api_mode = BRW_CLIP_API_OGL;
-   clip.clip5.clip_mode = key->clip_mode;
+   clip->clip5.userclip_enable_flags = 0x7f;
+   clip->clip5.userclip_must_clip = 1;
+   clip->clip5.guard_band_enable = 0;
+   /* _NEW_TRANSOFORM */
+   if (!ctx->Transform.DepthClamp)
+      clip->clip5.viewport_z_clip_enable = 1;
+   clip->clip5.viewport_xy_clip_enable = 1;
+   clip->clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
+   clip->clip5.api_mode = BRW_CLIP_API_OGL;
+   clip->clip5.clip_mode = brw->clip.prog_data->clip_mode;
 
    if (intel->is_g4x)
-      clip.clip5.negative_w_clip_test = 1;
-
-   clip.clip6.clipper_viewport_state_ptr = 0;
-   clip.viewport_xmin = -1;
-   clip.viewport_xmax = 1;
-   clip.viewport_ymin = -1;
-   clip.viewport_ymax = 1;
-
-   bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT,
-                        key, sizeof(*key),
-                        &brw->clip.prog_bo, 1,
-                        &clip, sizeof(clip),
-                        NULL, NULL);
+      clip->clip5.negative_w_clip_test = 1;
 
-   /* Emit clip program relocation */
-   assert(brw->clip.prog_bo);
-   dri_bo_emit_reloc(bo,
-                    I915_GEM_DOMAIN_INSTRUCTION,
-                    0,
-                    clip.thread0.grf_reg_count << 1,
-                    offsetof(struct brw_clip_unit_state, thread0),
-                    brw->clip.prog_bo);
+   clip->clip6.clipper_viewport_state_ptr = 0;
+   clip->viewport_xmin = -1;
+   clip->viewport_xmax = 1;
+   clip->viewport_ymin = -1;
+   clip->viewport_ymax = 1;
 
-   return bo;
-}
-
-static void upload_clip_unit( struct brw_context *brw )
-{
-   struct brw_clip_unit_key key;
-
-   clip_unit_populate_key(brw, &key);
-
-   dri_bo_unreference(brw->clip.state_bo);
-   brw->clip.state_bo = brw_search_cache(&brw->cache, BRW_CLIP_UNIT,
-                                        &key, sizeof(key),
-                                        &brw->clip.prog_bo, 1,
-                                        NULL);
-   if (brw->clip.state_bo == NULL) {
-      brw->clip.state_bo = clip_unit_create_from_key(brw, &key);
-   }
+   brw->state.dirty.cache |= CACHE_NEW_CLIP_UNIT;
 }
 
 const struct brw_tracked_state brw_clip_unit = {
    .dirty = {
       .mesa  = _NEW_TRANSFORM,
-      .brw   = (BRW_NEW_CURBE_OFFSETS |
+      .brw   = (BRW_NEW_BATCH |
+               BRW_NEW_PROGRAM_CACHE |
+               BRW_NEW_CURBE_OFFSETS |
                BRW_NEW_URB_FENCE),
       .cache = CACHE_NEW_CLIP_PROG
    },
-   .prepare = upload_clip_unit,
+   .prepare = brw_prepare_clip_unit,
 };