i965/gen7+: Move sampler state packets to the stage sampler state table update.
authorEric Anholt <eric@anholt.net>
Mon, 28 Apr 2014 18:27:22 +0000 (11:27 -0700)
committerEric Anholt <eric@anholt.net>
Sat, 3 May 2014 00:01:40 +0000 (17:01 -0700)
Now that we have the stage state coming into our setup of sampler states,
it's easy to drop an identifier into it of which stage the stage_state is,
and then look up which packet to emit in a little table.

No performance difference on cairo on glamor (n=492).

v2: Don't forget to do the workaround flush on IVB.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_context.c
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/gen7_gs_state.c
src/mesa/drivers/dri/i965/gen7_sampler_state.c
src/mesa/drivers/dri/i965/gen7_vs_state.c
src/mesa/drivers/dri/i965/gen7_wm_state.c
src/mesa/drivers/dri/i965/gen8_gs_state.c
src/mesa/drivers/dri/i965/gen8_ps_state.c
src/mesa/drivers/dri/i965/gen8_vs_state.c

index 449fcfc478931068029c0b85e56390de7b485ba4..17ae6857b10a2e6a5971e929853a8baefc5e514a 100644 (file)
@@ -628,6 +628,9 @@ brwCreateContext(gl_api api,
    brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
    brw->has_swizzling = screen->hw_has_swizzling;
 
+   brw->vs.base.stage = MESA_SHADER_VERTEX;
+   brw->gs.base.stage = MESA_SHADER_GEOMETRY;
+   brw->wm.base.stage = MESA_SHADER_FRAGMENT;
    if (brw->gen >= 8) {
       gen8_init_vtable_surface_functions(brw);
       gen7_init_vtable_sampler_functions(brw);
index 379af38cba01d6d91d09fdb8648cd8f556188d1b..92e1592bfa4a2bbea9604836a3e9b0f687faf861 100644 (file)
@@ -925,6 +925,7 @@ struct brw_transform_feedback_object {
  */
 struct brw_stage_state
 {
+   gl_shader_stage stage;
    struct brw_stage_prog_data *prog_data;
 
    /**
index d18ae155185ed2d6ea7936e212d5e014d970d0a6..06e6cf7500e72337f3b2e88ead52646960c98140 100644 (file)
@@ -66,12 +66,6 @@ upload_gs_state(struct brw_context *brw)
    /* CACHE_NEW_GS_PROG */
    const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base;
 
-   /* CACHE_NEW_SAMPLER */
-   BEGIN_BATCH(2);
-   OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_GS << 16 | (2 - 2));
-   OUT_BATCH(stage_state->sampler_offset);
-   ADVANCE_BATCH();
-
    gen7_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS);
 
    /**
@@ -198,7 +192,7 @@ const struct brw_tracked_state gen7_gs_state = {
                 BRW_NEW_GS_BINDING_TABLE |
                 BRW_NEW_BATCH |
                 BRW_NEW_PUSH_CONSTANT_ALLOCATION),
-      .cache = CACHE_NEW_GS_PROG | CACHE_NEW_SAMPLER
+      .cache = CACHE_NEW_GS_PROG
    },
    .emit = upload_gs_state,
 };
index 8eb337d5492e557d4911a41bea508e23d14c32c7..74d5e9e0a7e3856f41f8735021a0b45ad10fa39c 100644 (file)
@@ -187,6 +187,11 @@ gen7_upload_sampler_state_table(struct brw_context *brw,
    struct gl_context *ctx = &brw->ctx;
    struct gen7_sampler_state *samplers;
    uint32_t sampler_count = stage_state->sampler_count;
+   static const uint16_t packet_headers[] = {
+      [MESA_SHADER_VERTEX] = _3DSTATE_SAMPLER_STATE_POINTERS_VS,
+      [MESA_SHADER_GEOMETRY] = _3DSTATE_SAMPLER_STATE_POINTERS_GS,
+      [MESA_SHADER_FRAGMENT] = _3DSTATE_SAMPLER_STATE_POINTERS_PS,
+   };
 
    GLbitfield SamplersUsed = prog->SamplersUsed;
 
@@ -207,7 +212,15 @@ gen7_upload_sampler_state_table(struct brw_context *brw,
       }
    }
 
-   brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
+  if (brw->gen == 7 && !brw->is_haswell &&
+      stage_state->stage == MESA_SHADER_VERTEX) {
+      gen7_emit_vs_workaround_flush(brw);
+  }
+
+   BEGIN_BATCH(2);
+   OUT_BATCH(packet_headers[stage_state->stage] << 16 | (2 - 2));
+   OUT_BATCH(stage_state->sampler_offset);
+   ADVANCE_BATCH();
 }
 
 void
index b5fc871cfe10fe869a1f7d12056bd019334b5e6d..6b1f6807950d5fc3f2dbf72ac8c244f7feb7dcb2 100644 (file)
@@ -75,12 +75,6 @@ upload_vs_state(struct brw_context *brw)
    if (!brw->is_haswell)
       gen7_emit_vs_workaround_flush(brw);
 
-   /* CACHE_NEW_SAMPLER */
-   BEGIN_BATCH(2);
-   OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_VS << 16 | (2 - 2));
-   OUT_BATCH(stage_state->sampler_offset);
-   ADVANCE_BATCH();
-
    gen7_upload_constant_state(brw, stage_state, true /* active */,
                               _3DSTATE_CONSTANT_VS);
 
@@ -126,7 +120,7 @@ const struct brw_tracked_state gen7_vs_state = {
                BRW_NEW_VS_BINDING_TABLE |
                BRW_NEW_BATCH |
                 BRW_NEW_PUSH_CONSTANT_ALLOCATION),
-      .cache = CACHE_NEW_VS_PROG | CACHE_NEW_SAMPLER
+      .cache = CACHE_NEW_VS_PROG
    },
    .emit = upload_vs_state,
 };
index eabadee3bf992aa023446b1037c8980f27d14772..2b95ef1ffd2f716f45430f9135c45a048ba498e4 100644 (file)
@@ -143,12 +143,6 @@ upload_ps_state(struct brw_context *brw)
    const int max_threads_shift = brw->is_haswell ?
       HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT;
 
-   /* CACHE_NEW_SAMPLER */
-   BEGIN_BATCH(2);
-   OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2));
-   OUT_BATCH(brw->wm.base.sampler_offset);
-   ADVANCE_BATCH();
-
    /* CACHE_NEW_WM_PROG */
    gen7_upload_constant_state(brw, &brw->wm.base, true, _3DSTATE_CONSTANT_PS);
 
@@ -281,8 +275,7 @@ const struct brw_tracked_state gen7_ps_state = {
                BRW_NEW_PS_BINDING_TABLE |
                BRW_NEW_BATCH |
                 BRW_NEW_PUSH_CONSTANT_ALLOCATION),
-      .cache = (CACHE_NEW_SAMPLER |
-               CACHE_NEW_WM_PROG)
+      .cache = (CACHE_NEW_WM_PROG)
    },
    .emit = upload_ps_state,
 };
index 97fbf84f7ff71329aa6ef0bee592f209731955ea..6baada31ee9716aa7baf072f2b2546f12e43c644 100644 (file)
@@ -36,12 +36,6 @@ gen8_upload_gs_state(struct brw_context *brw)
    /* CACHE_NEW_GS_PROG */
    const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base;
 
-   /* CACHE_NEW_SAMPLER */
-   BEGIN_BATCH(2);
-   OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_GS << 16 | (2 - 2));
-   OUT_BATCH(stage_state->sampler_offset);
-   ADVANCE_BATCH();
-
    gen8_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS);
 
    if (active) {
@@ -135,7 +129,7 @@ const struct brw_tracked_state gen8_gs_state = {
                 BRW_NEW_GS_BINDING_TABLE |
                 BRW_NEW_BATCH |
                 BRW_NEW_PUSH_CONSTANT_ALLOCATION),
-      .cache = CACHE_NEW_GS_PROG | CACHE_NEW_SAMPLER
+      .cache = CACHE_NEW_GS_PROG
    },
    .emit = gen8_upload_gs_state,
 };
index 085606072bded3a84e924befe28a72e1a9223d51..aa7183ba932a371a61e64846afbfe67a0dd4e612 100644 (file)
@@ -136,12 +136,6 @@ upload_ps_state(struct brw_context *brw)
    struct gl_context *ctx = &brw->ctx;
    uint32_t dw3 = 0, dw6 = 0, dw7 = 0;
 
-   /* CACHE_NEW_SAMPLER */
-   BEGIN_BATCH(2);
-   OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2));
-   OUT_BATCH(brw->wm.base.sampler_offset);
-   ADVANCE_BATCH();
-
    /* CACHE_NEW_WM_PROG */
    gen8_upload_constant_state(brw, &brw->wm.base, true, _3DSTATE_CONSTANT_PS);
 
@@ -254,7 +248,7 @@ const struct brw_tracked_state gen8_ps_state = {
                BRW_NEW_PS_BINDING_TABLE |
                BRW_NEW_BATCH |
                BRW_NEW_PUSH_CONSTANT_ALLOCATION,
-      .cache = CACHE_NEW_SAMPLER | CACHE_NEW_WM_PROG
+      .cache = CACHE_NEW_WM_PROG
    },
    .emit = upload_ps_state,
 };
index 373cfe4b6f86d48ce59b06edef5127e9f54f6053..e7634eec7cf5096575efc7faf61e7218b35dcb43 100644 (file)
@@ -62,12 +62,6 @@ upload_vs_state(struct brw_context *brw)
    /* CACHE_NEW_VS_PROG */
    const struct brw_vec4_prog_data *prog_data = &brw->vs.prog_data->base;
 
-   /* CACHE_NEW_SAMPLER */
-   BEGIN_BATCH(2);
-   OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_VS << 16 | (2 - 2));
-   OUT_BATCH(stage_state->sampler_offset);
-   ADVANCE_BATCH();
-
    gen8_upload_constant_state(brw, stage_state, true /* active */,
                               _3DSTATE_CONSTANT_VS);
 
@@ -119,7 +113,7 @@ const struct brw_tracked_state gen8_vs_state = {
                BRW_NEW_VS_BINDING_TABLE |
                BRW_NEW_BATCH |
                BRW_NEW_PUSH_CONSTANT_ALLOCATION,
-      .cache = CACHE_NEW_VS_PROG | CACHE_NEW_SAMPLER
+      .cache = CACHE_NEW_VS_PROG
    },
    .emit = upload_vs_state,
 };