i965: Assign PS kernel start pointers when we decide which kernels to use
authorKristian Høgsberg <krh@bitplanet.net>
Mon, 7 Jul 2014 21:43:30 +0000 (14:43 -0700)
committerKristian Høgsberg <krh@bitplanet.net>
Thu, 14 Aug 2014 20:57:26 +0000 (13:57 -0700)
Right now we decide which kernels to use and the GRF start offsets in
one place and emit the kernel pointers later.  The logic of how to map
8, 16 and 32 kernels to kernel start pointers follows the same logic as which
GRF start offsets to use, so lets figure out these two things in one place.

Signed-off-by: Kristian Høgsberg <krh@bitplanet.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
src/mesa/drivers/dri/i965/gen6_wm_state.c
src/mesa/drivers/dri/i965/gen7_wm_state.c
src/mesa/drivers/dri/i965/gen8_ps_state.c

index 047e036f5b66641ca8c3cf750244d9993ad57dab..de95db8b8ab46c16c52cf61eb21b01abf9506672 100644 (file)
@@ -71,7 +71,7 @@ upload_wm_state(struct brw_context *brw)
    struct gl_context *ctx = &brw->ctx;
    const struct brw_fragment_program *fp =
       brw_fragment_program_const(brw->fragment_program);
-   uint32_t dw2, dw4, dw5, dw6;
+   uint32_t dw2, dw4, dw5, dw6, ksp0, ksp2;
 
    /* _NEW_BUFFERS */
    bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;
@@ -109,7 +109,7 @@ upload_wm_state(struct brw_context *brw)
       ADVANCE_BATCH();
    }
 
-   dw2 = dw4 = dw5 = dw6 = 0;
+   dw2 = dw4 = dw5 = dw6 = ksp2 = 0;
    dw4 |= GEN6_WM_STATISTICS_ENABLE;
    dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0;
    dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5;
@@ -151,14 +151,19 @@ upload_wm_state(struct brw_context *brw)
                  GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
          dw4 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
                  GEN6_WM_DISPATCH_START_GRF_SHIFT_2);
-      } else
+         ksp0 = brw->wm.base.prog_offset;
+         ksp2 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16;
+      } else {
          dw4 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
                 GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
+         ksp0 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16;
+      }
    }
    else {
       dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
       dw4 |= (brw->wm.prog_data->base.dispatch_grf_start_reg <<
               GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
+      ksp0 = brw->wm.base.prog_offset;
    }
 
    /* CACHE_NEW_WM_PROG | _NEW_COLOR */
@@ -277,10 +282,7 @@ upload_wm_state(struct brw_context *brw)
 
    BEGIN_BATCH(9);
    OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
-   if (brw->wm.prog_data->prog_offset_16 && min_inv_per_frag > 1)
-      OUT_BATCH(brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16);
-   else
-      OUT_BATCH(brw->wm.base.prog_offset);
+   OUT_BATCH(ksp0);
    OUT_BATCH(dw2);
    if (brw->wm.prog_data->total_scratch) {
       OUT_RELOC(brw->wm.base.scratch_bo,
@@ -293,8 +295,7 @@ upload_wm_state(struct brw_context *brw)
    OUT_BATCH(dw5);
    OUT_BATCH(dw6);
    OUT_BATCH(0); /* kernel 1 pointer */
-   /* kernel 2 pointer */
-   OUT_BATCH(brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16);
+   OUT_BATCH(ksp2);
    ADVANCE_BATCH();
 }
 
index 27a3f44cfc8fcda6cc554143aa498e866845a58b..c03d19db1639a4e27eef21c06d4ee8852b23f96f 100644 (file)
@@ -139,11 +139,11 @@ static void
 upload_ps_state(struct brw_context *brw)
 {
    struct gl_context *ctx = &brw->ctx;
-   uint32_t dw2, dw4, dw5;
+   uint32_t dw2, dw4, dw5, ksp0, ksp2;
    const int max_threads_shift = brw->is_haswell ?
       HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT;
 
-   dw2 = dw4 = dw5 = 0;
+   dw2 = dw4 = dw5 = ksp2 = 0;
 
    dw2 |=
       (ALIGN(brw->wm.base.sampler_count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT;
@@ -231,22 +231,24 @@ upload_ps_state(struct brw_context *brw)
                  GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
          dw5 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
                  GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
-      } else
+         ksp0 = brw->wm.base.prog_offset;
+         ksp2 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16;
+      } else {
          dw5 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
                  GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
+         ksp0 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16;
+      }
    }
    else {
       dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
       dw5 |= (brw->wm.prog_data->base.dispatch_grf_start_reg <<
               GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
+      ksp0 = brw->wm.base.prog_offset;
    }
 
    BEGIN_BATCH(8);
    OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
-   if (brw->wm.prog_data->prog_offset_16 && min_inv_per_frag > 1)
-      OUT_BATCH(brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16);
-   else
-      OUT_BATCH(brw->wm.base.prog_offset);
+   OUT_BATCH(ksp0);
    OUT_BATCH(dw2);
    if (brw->wm.prog_data->total_scratch) {
       OUT_RELOC(brw->wm.base.scratch_bo,
@@ -258,7 +260,7 @@ upload_ps_state(struct brw_context *brw)
    OUT_BATCH(dw4);
    OUT_BATCH(dw5);
    OUT_BATCH(0); /* kernel 1 pointer */
-   OUT_BATCH(brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16);
+   OUT_BATCH(ksp2);
    ADVANCE_BATCH();
 }
 
index 3d6d7f0f7a2956c7fb2b2a3bd11e8730c8837c89..f58d49c55cb3b17b03c317f52f22f373e7d93e29 100644 (file)
@@ -134,7 +134,7 @@ static void
 upload_ps_state(struct brw_context *brw)
 {
    struct gl_context *ctx = &brw->ctx;
-   uint32_t dw3 = 0, dw6 = 0, dw7 = 0;
+   uint32_t dw3 = 0, dw6 = 0, dw7 = 0, ksp0, ksp2 = 0;
 
    /* Initialize the execution mask with VMask.  Otherwise, derivatives are
     * incorrect for subspans where some of the pixels are unlit.  We believe
@@ -203,22 +203,24 @@ upload_ps_state(struct brw_context *brw)
                  GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
          dw7 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
                  GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
+         ksp0 = brw->wm.base.prog_offset;
+         ksp2 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16;
       } else {
          dw7 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
                  GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
+
+         ksp0 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16;
       }
    } else {
       dw6 |= GEN7_PS_8_DISPATCH_ENABLE;
       dw7 |= (brw->wm.prog_data->base.dispatch_grf_start_reg <<
               GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
+      ksp0 = brw->wm.base.prog_offset;
    }
 
    BEGIN_BATCH(12);
    OUT_BATCH(_3DSTATE_PS << 16 | (12 - 2));
-   if (brw->wm.prog_data->prog_offset_16 && min_invocations_per_fragment > 1)
-      OUT_BATCH(brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16);
-   else
-      OUT_BATCH(brw->wm.base.prog_offset);
+   OUT_BATCH(ksp0);
    OUT_BATCH(0);
    OUT_BATCH(dw3);
    if (brw->wm.prog_data->total_scratch) {
@@ -233,7 +235,7 @@ upload_ps_state(struct brw_context *brw)
    OUT_BATCH(dw7);
    OUT_BATCH(0); /* kernel 1 pointer */
    OUT_BATCH(0);
-   OUT_BATCH(brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16);
+   OUT_BATCH(ksp2);
    OUT_BATCH(0);
    ADVANCE_BATCH();
 }