#include "brw_state.h"
#include "brw_defines.h"
-struct brw_clip_unit_key {
- unsigned int total_grf;
- unsigned int urb_entry_read_length;
- unsigned int curb_entry_read_length;
- unsigned int clip_mode;
+static void
+upload_clip_vp(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->ctx;
+ struct brw_clipper_viewport *vp;
- unsigned int curbe_offset;
+ vp = brw_state_batch(brw, AUB_TRACE_CLIP_VP_STATE,
+ sizeof(*vp), 32, &brw->clip.vp_offset);
- unsigned int nr_urb_entries, urb_size;
+ const float maximum_post_clamp_delta = 4096;
+ float gbx = maximum_post_clamp_delta / (float) ctx->Viewport.Width;
+ float gby = maximum_post_clamp_delta / (float) ctx->Viewport.Height;
- GLboolean depth_clamp;
-};
+ vp->xmin = -gbx;
+ vp->xmax = gbx;
+ vp->ymin = -gby;
+ vp->ymax = gby;
+}
static void
-clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
+brw_upload_clip_unit(struct brw_context *brw)
{
- GLcontext *ctx = &brw->intel.ctx;
- memset(key, 0, sizeof(*key));
+ struct gl_context *ctx = &brw->ctx;
+ struct brw_clip_unit_state *clip;
- /* CACHE_NEW_CLIP_PROG */
- key->total_grf = brw->clip.prog_data->total_grf;
- key->urb_entry_read_length = brw->clip.prog_data->urb_read_length;
- key->curb_entry_read_length = brw->clip.prog_data->curb_read_length;
- key->clip_mode = brw->clip.prog_data->clip_mode;
+ /* _NEW_BUFFERS */
+ struct gl_framebuffer *fb = ctx->DrawBuffer;
- /* BRW_NEW_CURBE_OFFSETS */
- key->curbe_offset = brw->curbe.clip_start;
+ upload_clip_vp(brw);
- /* BRW_NEW_URB_FENCE */
- key->nr_urb_entries = brw->urb.nr_clip_entries;
- key->urb_size = brw->urb.vsize;
-
- /* _NEW_TRANSOFORM */
- key->depth_clamp = ctx->Transform.DepthClamp;
-}
+ clip = brw_state_batch(brw, AUB_TRACE_CLIP_STATE,
+ sizeof(*clip), 32, &brw->clip.state_offset);
+ memset(clip, 0, sizeof(*clip));
-static drm_intel_bo *
-clip_unit_create_from_key(struct brw_context *brw,
- struct brw_clip_unit_key *key)
-{
- struct intel_context *intel = &brw->intel;
- struct brw_clip_unit_state clip;
- drm_intel_bo *bo;
+ /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_CLIP_PROG */
+ clip->thread0.grf_reg_count = (ALIGN(brw->clip.prog_data->total_grf, 16) /
+ 16 - 1);
+ clip->thread0.kernel_start_pointer =
+ brw_program_reloc(brw,
+ brw->clip.state_offset +
+ offsetof(struct brw_clip_unit_state, thread0),
+ brw->clip.prog_offset +
+ (clip->thread0.grf_reg_count << 1)) >> 6;
- memset(&clip, 0, sizeof(clip));
+ clip->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ clip->thread1.single_program_flow = 1;
- clip.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
- /* reloc */
- clip.thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6;
+ clip->thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length;
+ clip->thread3.const_urb_entry_read_length =
+ brw->clip.prog_data->curb_read_length;
- clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
- clip.thread1.single_program_flow = 1;
-
- clip.thread3.urb_entry_read_length = key->urb_entry_read_length;
- clip.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
- clip.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
- clip.thread3.dispatch_grf_start_reg = 1;
- clip.thread3.urb_entry_read_offset = 0;
+ /* BRW_NEW_CURBE_OFFSETS */
+ clip->thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2;
+ clip->thread3.dispatch_grf_start_reg = 1;
+ clip->thread3.urb_entry_read_offset = 0;
- clip.thread4.nr_urb_entries = key->nr_urb_entries;
- clip.thread4.urb_entry_allocation_size = key->urb_size - 1;
+ /* BRW_NEW_URB_FENCE */
+ clip->thread4.nr_urb_entries = brw->urb.nr_clip_entries;
+ clip->thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
/* If we have enough clip URB entries to run two threads, do so.
*/
- if (key->nr_urb_entries >= 10) {
+ if (brw->urb.nr_clip_entries >= 10) {
/* Half of the URB entries go to each thread, and it has to be an
* even number.
*/
- assert(key->nr_urb_entries % 2 == 0);
+ assert(brw->urb.nr_clip_entries % 2 == 0);
/* Although up to 16 concurrent Clip threads are allowed on Ironlake,
* only 2 threads can output VUEs at a time.
*/
- if (intel->gen == 5)
- clip.thread4.max_threads = 16 - 1;
+ if (brw->gen == 5)
+ clip->thread4.max_threads = 16 - 1;
else
- clip.thread4.max_threads = 2 - 1;
+ clip->thread4.max_threads = 2 - 1;
} else {
- assert(key->nr_urb_entries >= 5);
- clip.thread4.max_threads = 1 - 1;
+ assert(brw->urb.nr_clip_entries >= 5);
+ clip->thread4.max_threads = 1 - 1;
}
- if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
- clip.thread4.max_threads = 0;
-
- if (INTEL_DEBUG & DEBUG_STATS)
- clip.thread4.stats_enable = 1;
-
- clip.clip5.userclip_enable_flags = 0x7f;
- clip.clip5.userclip_must_clip = 1;
- clip.clip5.guard_band_enable = 0;
- if (!key->depth_clamp)
- clip.clip5.viewport_z_clip_enable = 1;
- clip.clip5.viewport_xy_clip_enable = 1;
- clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
- clip.clip5.api_mode = BRW_CLIP_API_OGL;
- clip.clip5.clip_mode = key->clip_mode;
-
- if (intel->is_g4x)
- clip.clip5.negative_w_clip_test = 1;
-
- clip.clip6.clipper_viewport_state_ptr = 0;
- clip.viewport_xmin = -1;
- clip.viewport_xmax = 1;
- clip.viewport_ymin = -1;
- clip.viewport_ymax = 1;
-
- bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT,
- key, sizeof(*key),
- &brw->clip.prog_bo, 1,
- &clip, sizeof(clip));
-
- /* Emit clip program relocation */
- assert(brw->clip.prog_bo);
- drm_intel_bo_emit_reloc(bo, offsetof(struct brw_clip_unit_state, thread0),
- brw->clip.prog_bo, clip.thread0.grf_reg_count << 1,
- I915_GEM_DOMAIN_INSTRUCTION, 0);
-
- return bo;
-}
+ if (unlikely(INTEL_DEBUG & DEBUG_STATS))
+ clip->thread4.stats_enable = 1;
+
+ /* _NEW_TRANSFORM */
+ if (brw->gen == 5 || brw->is_g4x)
+ clip->clip5.userclip_enable_flags = ctx->Transform.ClipPlanesEnabled;
+ else
+ /* Up to 6 actual clip flags, plus the 7th for negative RHW workaround. */
+ clip->clip5.userclip_enable_flags = (ctx->Transform.ClipPlanesEnabled & 0x3f) | 0x40;
+
+ clip->clip5.userclip_must_clip = 1;
+
+ /* enable guardband clipping if we can */
+ if (ctx->Viewport.X == 0 &&
+ ctx->Viewport.Y == 0 &&
+ ctx->Viewport.Width == fb->Width &&
+ ctx->Viewport.Height == fb->Height)
+ {
+ clip->clip5.guard_band_enable = 1;
+ clip->clip6.clipper_viewport_state_ptr =
+ (brw->batch.bo->offset + brw->clip.vp_offset) >> 5;
+
+ /* emit clip viewport relocation */
+ drm_intel_bo_emit_reloc(brw->batch.bo,
+ (brw->clip.state_offset +
+ offsetof(struct brw_clip_unit_state, clip6)),
+ brw->batch.bo, brw->clip.vp_offset,
+ I915_GEM_DOMAIN_INSTRUCTION, 0);
+ }
-static void upload_clip_unit( struct brw_context *brw )
-{
- struct brw_clip_unit_key key;
+ /* _NEW_TRANSFORM */
+ if (!ctx->Transform.DepthClamp)
+ clip->clip5.viewport_z_clip_enable = 1;
+ clip->clip5.viewport_xy_clip_enable = 1;
+ clip->clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
+ clip->clip5.api_mode = BRW_CLIP_API_OGL;
+ clip->clip5.clip_mode = brw->clip.prog_data->clip_mode;
- clip_unit_populate_key(brw, &key);
+ if (brw->is_g4x)
+ clip->clip5.negative_w_clip_test = 1;
- drm_intel_bo_unreference(brw->clip.state_bo);
- brw->clip.state_bo = brw_search_cache(&brw->cache, BRW_CLIP_UNIT,
- &key, sizeof(key),
- &brw->clip.prog_bo, 1,
- NULL);
- if (brw->clip.state_bo == NULL) {
- brw->clip.state_bo = clip_unit_create_from_key(brw, &key);
- }
+ clip->viewport_xmin = -1;
+ clip->viewport_xmax = 1;
+ clip->viewport_ymin = -1;
+ clip->viewport_ymax = 1;
+
+ brw->state.dirty.cache |= CACHE_NEW_CLIP_UNIT;
}
const struct brw_tracked_state brw_clip_unit = {
.dirty = {
- .mesa = _NEW_TRANSFORM,
- .brw = (BRW_NEW_CURBE_OFFSETS |
+ .mesa = _NEW_TRANSFORM | _NEW_BUFFERS | _NEW_VIEWPORT,
+ .brw = (BRW_NEW_BATCH |
+ BRW_NEW_PROGRAM_CACHE |
+ BRW_NEW_CURBE_OFFSETS |
BRW_NEW_URB_FENCE),
.cache = CACHE_NEW_CLIP_PROG
},
- .prepare = upload_clip_unit,
+ .emit = brw_upload_clip_unit,
};