intel: Add a batch flush between front-buffer downsample and X protocol.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_clip_state.c
index 6015c8cbe9fde8e93830b256d3c1641d745d8bab..634719bd031b7026f39cafa3fc57287bbb99c604 100644 (file)
 #include "brw_defines.h"
 
 static void
-brw_prepare_clip_unit(struct brw_context *brw)
+upload_clip_vp(struct brw_context *brw)
 {
-   struct intel_context *intel = &brw->intel;
-   struct gl_context *ctx = &intel->ctx;
+   struct gl_context *ctx = &brw->ctx;
+   struct brw_clipper_viewport *vp;
+
+   vp = brw_state_batch(brw, AUB_TRACE_CLIP_VP_STATE,
+                        sizeof(*vp), 32, &brw->clip.vp_offset);
+
+   const float maximum_post_clamp_delta = 4096;
+   float gbx = maximum_post_clamp_delta / (float) ctx->Viewport.Width;
+   float gby = maximum_post_clamp_delta / (float) ctx->Viewport.Height;
+
+   vp->xmin = -gbx;
+   vp->xmax = gbx;
+   vp->ymin = -gby;
+   vp->ymax = gby;
+}
+
+static void
+brw_upload_clip_unit(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
    struct brw_clip_unit_state *clip;
 
-   clip = brw_state_batch(brw, sizeof(*clip), 32, &brw->clip.state_offset);
+   /* _NEW_BUFFERS */
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+
+   upload_clip_vp(brw);
+
+   clip = brw_state_batch(brw, AUB_TRACE_CLIP_STATE,
+                         sizeof(*clip), 32, &brw->clip.state_offset);
    memset(clip, 0, sizeof(*clip));
 
-   /* CACHE_NEW_CLIP_PROG */
+   /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_CLIP_PROG */
    clip->thread0.grf_reg_count = (ALIGN(brw->clip.prog_data->total_grf, 16) /
                                 16 - 1);
-   /* reloc */
-   clip->thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6;
+   clip->thread0.kernel_start_pointer =
+      brw_program_reloc(brw,
+                       brw->clip.state_offset +
+                       offsetof(struct brw_clip_unit_state, thread0),
+                       brw->clip.prog_offset +
+                       (clip->thread0.grf_reg_count << 1)) >> 6;
 
    clip->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
    clip->thread1.single_program_flow = 1;
@@ -75,7 +103,7 @@ brw_prepare_clip_unit(struct brw_context *brw)
       /* Although up to 16 concurrent Clip threads are allowed on Ironlake,
        * only 2 threads can output VUEs at a time.
        */
-      if (intel->gen == 5)
+      if (brw->gen == 5)
          clip->thread4.max_threads = 16 - 1;
       else
          clip->thread4.max_threads = 2 - 1;
@@ -84,16 +112,37 @@ brw_prepare_clip_unit(struct brw_context *brw)
       clip->thread4.max_threads = 1 - 1;
    }
 
-   if (unlikely(INTEL_DEBUG & DEBUG_SINGLE_THREAD))
-      clip->thread4.max_threads = 0;
-
    if (unlikely(INTEL_DEBUG & DEBUG_STATS))
       clip->thread4.stats_enable = 1;
 
-   clip->clip5.userclip_enable_flags = 0x7f;
+   /* _NEW_TRANSFORM */
+   if (brw->gen == 5 || brw->is_g4x)
+      clip->clip5.userclip_enable_flags = ctx->Transform.ClipPlanesEnabled;
+   else
+      /* Up to 6 actual clip flags, plus the 7th for negative RHW workaround. */
+      clip->clip5.userclip_enable_flags = (ctx->Transform.ClipPlanesEnabled & 0x3f) | 0x40;
+
    clip->clip5.userclip_must_clip = 1;
-   clip->clip5.guard_band_enable = 0;
-   /* _NEW_TRANSOFORM */
+
+   /* enable guardband clipping if we can */
+   if (ctx->Viewport.X == 0 &&
+       ctx->Viewport.Y == 0 &&
+       ctx->Viewport.Width == fb->Width &&
+       ctx->Viewport.Height == fb->Height)
+   {
+      clip->clip5.guard_band_enable = 1;
+      clip->clip6.clipper_viewport_state_ptr =
+         (brw->batch.bo->offset + brw->clip.vp_offset) >> 5;
+
+      /* emit clip viewport relocation */
+      drm_intel_bo_emit_reloc(brw->batch.bo,
+                              (brw->clip.state_offset +
+                               offsetof(struct brw_clip_unit_state, clip6)),
+                              brw->batch.bo, brw->clip.vp_offset,
+                              I915_GEM_DOMAIN_INSTRUCTION, 0);
+   }
+
+   /* _NEW_TRANSFORM */
    if (!ctx->Transform.DepthClamp)
       clip->clip5.viewport_z_clip_enable = 1;
    clip->clip5.viewport_xy_clip_enable = 1;
@@ -101,33 +150,25 @@ brw_prepare_clip_unit(struct brw_context *brw)
    clip->clip5.api_mode = BRW_CLIP_API_OGL;
    clip->clip5.clip_mode = brw->clip.prog_data->clip_mode;
 
-   if (intel->is_g4x)
+   if (brw->is_g4x)
       clip->clip5.negative_w_clip_test = 1;
 
-   clip->clip6.clipper_viewport_state_ptr = 0;
    clip->viewport_xmin = -1;
    clip->viewport_xmax = 1;
    clip->viewport_ymin = -1;
    clip->viewport_ymax = 1;
 
-   /* Emit clip program relocation */
-   assert(brw->clip.prog_bo);
-   drm_intel_bo_emit_reloc(intel->batch.bo,
-                          (brw->clip.state_offset +
-                           offsetof(struct brw_clip_unit_state, thread0)),
-                          brw->clip.prog_bo, clip->thread0.grf_reg_count << 1,
-                          I915_GEM_DOMAIN_INSTRUCTION, 0);
-
    brw->state.dirty.cache |= CACHE_NEW_CLIP_UNIT;
 }
 
 const struct brw_tracked_state brw_clip_unit = {
    .dirty = {
-      .mesa  = _NEW_TRANSFORM,
+      .mesa  = _NEW_TRANSFORM | _NEW_BUFFERS | _NEW_VIEWPORT,
       .brw   = (BRW_NEW_BATCH |
+               BRW_NEW_PROGRAM_CACHE |
                BRW_NEW_CURBE_OFFSETS |
                BRW_NEW_URB_FENCE),
       .cache = CACHE_NEW_CLIP_PROG
    },
-   .prepare = brw_prepare_clip_unit,
+   .emit = brw_upload_clip_unit,
 };