i965/vs: Add support for pull constant loads for uniform arrays.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_clip_state.c
index 60fd5fa7d9e30833f50b1c4413a7b266b4be058f..31fbadf5ef2f134ab2563f72988138f641b47174 100644 (file)
 #include "brw_state.h"
 #include "brw_defines.h"
 
-struct brw_clip_unit_key {
-   unsigned int total_grf;
-   unsigned int urb_entry_read_length;
-   unsigned int curb_entry_read_length;
-   unsigned int clip_mode;
-
-   unsigned int curbe_offset;
-
-   unsigned int nr_urb_entries, urb_size;
-
-   GLboolean depth_clamp;
-};
-
 static void
-clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
+brw_prepare_clip_unit(struct brw_context *brw)
 {
-   struct gl_context *ctx = &brw->intel.ctx;
-   memset(key, 0, sizeof(*key));
-
-   /* CACHE_NEW_CLIP_PROG */
-   key->total_grf = brw->clip.prog_data->total_grf;
-   key->urb_entry_read_length = brw->clip.prog_data->urb_read_length;
-   key->curb_entry_read_length = brw->clip.prog_data->curb_read_length;
-   key->clip_mode = brw->clip.prog_data->clip_mode;
+   struct intel_context *intel = &brw->intel;
+   struct gl_context *ctx = &intel->ctx;
+   struct brw_clip_unit_state *clip;
+
+   clip = brw_state_batch(brw, AUB_TRACE_CLIP_STATE,
+                         sizeof(*clip), 32, &brw->clip.state_offset);
+   memset(clip, 0, sizeof(*clip));
+
+   /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_CLIP_PROG */
+   clip->thread0.grf_reg_count = (ALIGN(brw->clip.prog_data->total_grf, 16) /
+                                16 - 1);
+   clip->thread0.kernel_start_pointer =
+      brw_program_reloc(brw,
+                       brw->clip.state_offset +
+                       offsetof(struct brw_clip_unit_state, thread0),
+                       brw->clip.prog_offset +
+                       (clip->thread0.grf_reg_count << 1)) >> 6;
+
+   clip->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+   clip->thread1.single_program_flow = 1;
+
+   clip->thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length;
+   clip->thread3.const_urb_entry_read_length =
+      brw->clip.prog_data->curb_read_length;
 
    /* BRW_NEW_CURBE_OFFSETS */
-   key->curbe_offset = brw->curbe.clip_start;
+   clip->thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2;
+   clip->thread3.dispatch_grf_start_reg = 1;
+   clip->thread3.urb_entry_read_offset = 0;
 
    /* BRW_NEW_URB_FENCE */
-   key->nr_urb_entries = brw->urb.nr_clip_entries;
-   key->urb_size = brw->urb.vsize;
-
-   /* _NEW_TRANSOFORM */
-   key->depth_clamp = ctx->Transform.DepthClamp;
-}
-
-static drm_intel_bo *
-clip_unit_create_from_key(struct brw_context *brw,
-                         struct brw_clip_unit_key *key)
-{
-   struct intel_context *intel = &brw->intel;
-   struct brw_clip_unit_state clip;
-   drm_intel_bo *bo;
-
-   memset(&clip, 0, sizeof(clip));
-
-   clip.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
-   /* reloc */
-   clip.thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6;
-
-   clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
-   clip.thread1.single_program_flow = 1;
-
-   clip.thread3.urb_entry_read_length = key->urb_entry_read_length;
-   clip.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
-   clip.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
-   clip.thread3.dispatch_grf_start_reg = 1;
-   clip.thread3.urb_entry_read_offset = 0;
-
-   clip.thread4.nr_urb_entries = key->nr_urb_entries;
-   clip.thread4.urb_entry_allocation_size = key->urb_size - 1;
+   clip->thread4.nr_urb_entries = brw->urb.nr_clip_entries;
+   clip->thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
    /* If we have enough clip URB entries to run two threads, do so.
     */
-   if (key->nr_urb_entries >= 10) {
+   if (brw->urb.nr_clip_entries >= 10) {
       /* Half of the URB entries go to each thread, and it has to be an
        * even number.
        */
-      assert(key->nr_urb_entries % 2 == 0);
+      assert(brw->urb.nr_clip_entries % 2 == 0);
       
       /* Although up to 16 concurrent Clip threads are allowed on Ironlake,
        * only 2 threads can output VUEs at a time.
        */
       if (intel->gen == 5)
-         clip.thread4.max_threads = 16 - 1;        
+         clip->thread4.max_threads = 16 - 1;
       else
-         clip.thread4.max_threads = 2 - 1;
+         clip->thread4.max_threads = 2 - 1;
    } else {
-      assert(key->nr_urb_entries >= 5);
-      clip.thread4.max_threads = 1 - 1;
+      assert(brw->urb.nr_clip_entries >= 5);
+      clip->thread4.max_threads = 1 - 1;
    }
 
    if (unlikely(INTEL_DEBUG & DEBUG_SINGLE_THREAD))
-      clip.thread4.max_threads = 0;
+      clip->thread4.max_threads = 0;
 
    if (unlikely(INTEL_DEBUG & DEBUG_STATS))
-      clip.thread4.stats_enable = 1;
+      clip->thread4.stats_enable = 1;
 
-   clip.clip5.userclip_enable_flags = 0x7f;
-   clip.clip5.userclip_must_clip = 1;
-   clip.clip5.guard_band_enable = 0;
-   if (!key->depth_clamp)
-      clip.clip5.viewport_z_clip_enable = 1;
-   clip.clip5.viewport_xy_clip_enable = 1;
-   clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
-   clip.clip5.api_mode = BRW_CLIP_API_OGL;
-   clip.clip5.clip_mode = key->clip_mode;
+   clip->clip5.userclip_enable_flags = 0x7f;
+   clip->clip5.userclip_must_clip = 1;
+   clip->clip5.guard_band_enable = 0;
+   /* _NEW_TRANSOFORM */
+   if (!ctx->Transform.DepthClamp)
+      clip->clip5.viewport_z_clip_enable = 1;
+   clip->clip5.viewport_xy_clip_enable = 1;
+   clip->clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
+   clip->clip5.api_mode = BRW_CLIP_API_OGL;
+   clip->clip5.clip_mode = brw->clip.prog_data->clip_mode;
 
    if (intel->is_g4x)
-      clip.clip5.negative_w_clip_test = 1;
-
-   clip.clip6.clipper_viewport_state_ptr = 0;
-   clip.viewport_xmin = -1;
-   clip.viewport_xmax = 1;
-   clip.viewport_ymin = -1;
-   clip.viewport_ymax = 1;
-
-   bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT,
-                        key, sizeof(*key),
-                        &brw->clip.prog_bo, 1,
-                        &clip, sizeof(clip));
+      clip->clip5.negative_w_clip_test = 1;
 
-   /* Emit clip program relocation */
-   assert(brw->clip.prog_bo);
-   drm_intel_bo_emit_reloc(bo, offsetof(struct brw_clip_unit_state, thread0),
-                          brw->clip.prog_bo, clip.thread0.grf_reg_count << 1,
-                          I915_GEM_DOMAIN_INSTRUCTION, 0);
+   clip->clip6.clipper_viewport_state_ptr = 0;
+   clip->viewport_xmin = -1;
+   clip->viewport_xmax = 1;
+   clip->viewport_ymin = -1;
+   clip->viewport_ymax = 1;
 
-   return bo;
-}
-
-static void upload_clip_unit( struct brw_context *brw )
-{
-   struct brw_clip_unit_key key;
-
-   clip_unit_populate_key(brw, &key);
-
-   drm_intel_bo_unreference(brw->clip.state_bo);
-   brw->clip.state_bo = brw_search_cache(&brw->cache, BRW_CLIP_UNIT,
-                                        &key, sizeof(key),
-                                        &brw->clip.prog_bo, 1,
-                                        NULL);
-   if (brw->clip.state_bo == NULL) {
-      brw->clip.state_bo = clip_unit_create_from_key(brw, &key);
-   }
+   brw->state.dirty.cache |= CACHE_NEW_CLIP_UNIT;
 }
 
 const struct brw_tracked_state brw_clip_unit = {
    .dirty = {
       .mesa  = _NEW_TRANSFORM,
-      .brw   = (BRW_NEW_CURBE_OFFSETS |
+      .brw   = (BRW_NEW_BATCH |
+               BRW_NEW_PROGRAM_CACHE |
+               BRW_NEW_CURBE_OFFSETS |
                BRW_NEW_URB_FENCE),
       .cache = CACHE_NEW_CLIP_PROG
    },
-   .prepare = upload_clip_unit,
+   .prepare = brw_prepare_clip_unit,
 };