i965: make sure check_and_emit_atom gets inlined
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
index 9a858e64a2e3ae9139757e2882378554c4eec91b..1ae45ba2ac1c2c52daff9d3a7a852f47caf89eb2 100644 (file)
@@ -60,8 +60,10 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
 
    brw_upload_invariant_state(brw);
 
-   /* Recommended optimization for Victim Cache eviction in pixel backend. */
-   if (brw->gen >= 9) {
+   if (brw->gen == 9) {
+      /* Recommended optimizations for Victim Cache eviction and floating
+       * point blending.
+       */
       BEGIN_BATCH(3);
       OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
       OUT_BATCH(GEN7_CACHE_MODE_1);
@@ -70,6 +72,15 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
                 GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE |
                 GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
       ADVANCE_BATCH();
+
+      if (brw->is_broxton) {
+         BEGIN_BATCH(3);
+         OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
+         OUT_BATCH(GEN7_GT_MODE);
+         OUT_BATCH(GEN9_SUBSLICE_HASHING_MASK_BITS |
+                   GEN9_SUBSLICE_HASHING_16x16);
+         ADVANCE_BATCH();
+      }
    }
 
    if (brw->gen >= 8) {
@@ -88,6 +99,30 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
       OUT_BATCH(0);
       ADVANCE_BATCH();
    }
+
+   /* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
+    * 3DSTATE_CONSTANT_XS buffer 0 is an absolute address.
+    *
+    * On Gen6-7.5, we use an execbuf parameter to do this for us.
+    * However, the kernel ignores that when execlists are in use.
+    * Fortunately, we can just write the registers from userspace
+    * on Gen8+, and they're context saved/restored.
+    */
+   if (brw->gen >= 9) {
+      BEGIN_BATCH(3);
+      OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
+      OUT_BATCH(CS_DEBUG_MODE2);
+      OUT_BATCH(REG_MASK(CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
+                CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
+      ADVANCE_BATCH();
+   } else if (brw->gen == 8) {
+      BEGIN_BATCH(3);
+      OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
+      OUT_BATCH(INSTPM);
+      OUT_BATCH(REG_MASK(INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
+                INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
+      ADVANCE_BATCH();
+   }
 }
 
 static inline const struct brw_tracked_state *
@@ -250,7 +285,6 @@ static struct dirty_bit_map mesa_bits[] = {
    DEFINE_BIT(_NEW_TRACK_MATRIX),
    DEFINE_BIT(_NEW_PROGRAM),
    DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
-   DEFINE_BIT(_NEW_BUFFER_OBJECT),
    DEFINE_BIT(_NEW_FRAG_CLAMP),
    /* Avoid sign extension problems. */
    {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
@@ -315,6 +349,7 @@ static struct dirty_bit_map brw_bits[] = {
    DEFINE_BIT(BRW_NEW_BLORP),
    DEFINE_BIT(BRW_NEW_VIEWPORT_COUNT),
    DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION),
+   DEFINE_BIT(BRW_NEW_DRAW_CALL),
    {0, 0, 0}
 };
 
@@ -412,7 +447,7 @@ merge_ctx_state(struct brw_context *brw,
    state->brw |= brw->ctx.NewDriverState;
 }
 
-static inline void
+static ALWAYS_INLINE void
 check_and_emit_atom(struct brw_context *brw,
                     struct brw_state_flags *state,
                     const struct brw_tracked_state *atom)
@@ -431,7 +466,8 @@ brw_upload_pipeline_state(struct brw_context *brw,
    int i;
    static int dirty_count = 0;
    struct brw_state_flags state = brw->state.pipelines[pipeline];
-   unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer);
+   const unsigned fb_samples =
+      MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
 
    brw_select_pipeline(brw, pipeline);