Revert "i965/icl: Add WA_2204188704 to disable pixel shader panic dispatch"
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
index 8ee2dff072cfdebfc2872a8ad2b022992bff87ca..c41d9551a1e22c48c12d147ab64b7954cb8e800d 100644 (file)
 #include "brw_cs.h"
 #include "main/framebuffer.h"
 
+void
+brw_enable_obj_preemption(struct brw_context *brw, bool enable)
+{
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+   assert(devinfo->gen >= 9);
+
+   if (enable == brw->object_preemption)
+      return;
+
+   /* A fixed function pipe flush is required before modifying this field */
+   brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
+
+   bool replay_mode = enable ?
+      GEN9_REPLAY_MODE_MIDOBJECT : GEN9_REPLAY_MODE_MIDBUFFER;
+
+   /* enable object level preemption */
+   brw_load_register_imm32(brw, CS_CHICKEN1,
+                           replay_mode | GEN9_REPLAY_MODE_MASK);
+
+   brw->object_preemption = enable;
+}
+
 static void
 brw_upload_initial_gpu_state(struct brw_context *brw)
 {
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+   const struct brw_compiler *compiler = brw->screen->compiler;
+
    /* On platforms with hardware contexts, we can set our initial GPU state
     * right away rather than doing it via state atoms.  This saves a small
     * amount of overhead on every draw call.
@@ -55,26 +80,75 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
    if (!brw->hw_ctx)
       return;
 
-   if (brw->gen == 6)
+   if (devinfo->gen == 6)
       brw_emit_post_sync_nonzero_flush(brw);
 
    brw_upload_invariant_state(brw);
 
-   if (brw->gen == 9) {
+   if (devinfo->gen == 11) {
+      /* The default behavior of bit 5 "Headerless Message for Pre-emptable
+       * Contexts" in SAMPLER MODE register is set to 0, which means
+       * headerless sampler messages are not allowed for pre-emptable
+       * contexts. Set the bit 5 to 1 to allow them.
+       */
+      brw_load_register_imm32(brw, GEN11_SAMPLER_MODE,
+                              HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK |
+                              HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS);
+
+      /* Bit 1 "Enabled Texel Offset Precision Fix" must be set in
+       * HALF_SLICE_CHICKEN7 register.
+       */
+      brw_load_register_imm32(brw, HALF_SLICE_CHICKEN7,
+                              TEXEL_OFFSET_FIX_MASK |
+                              TEXEL_OFFSET_FIX_ENABLE);
+
+      /* WA_1406697149: Bit 9 "Error Detection Behavior Control" must be set
+       * in L3CNTLREG register. The default setting of the bit is not the
+       * desirable behavior.
+       */
+      brw_load_register_imm32(brw, GEN8_L3CNTLREG,
+                              GEN8_L3CNTLREG_EDBC_NO_HANG);
+
+       /* WaEnableStateCacheRedirectToCS:icl */
+       brw_load_register_imm32(brw, SLICE_COMMON_ECO_CHICKEN1,
+                               GEN11_STATE_CACHE_REDIRECT_TO_CS_SECTION_ENABLE |
+                               REG_MASK(GEN11_STATE_CACHE_REDIRECT_TO_CS_SECTION_ENABLE));
+   }
+
+   if (devinfo->gen == 10 || devinfo->gen == 11) {
+      /* From gen10 workaround table in h/w specs:
+       *
+       *    "On 3DSTATE_3D_MODE, driver must always program bits 31:16 of DW1
+       *     a value of 0xFFFF"
+       *
+       * This means that we end up setting the entire 3D_MODE state. Bits
+       * in this register control things such as slice hashing and we want
+       * the default values of zero at the moment.
+       */
+      BEGIN_BATCH(2);
+      OUT_BATCH(_3DSTATE_3D_MODE  << 16 | (2 - 2));
+      OUT_BATCH(0xFFFF << 16);
+      ADVANCE_BATCH();
+   }
+
+   if (devinfo->gen == 9) {
       /* Recommended optimizations for Victim Cache eviction and floating
        * point blending.
        */
-      BEGIN_BATCH(3);
-      OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
-      OUT_BATCH(GEN7_CACHE_MODE_1);
-      OUT_BATCH(REG_MASK(GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE) |
-                REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
-                GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE |
-                GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
-      ADVANCE_BATCH();
+      brw_load_register_imm32(brw, GEN7_CACHE_MODE_1,
+                              REG_MASK(GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE) |
+                              REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
+                              GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE |
+                              GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
+
+      if (gen_device_info_is_9lp(devinfo)) {
+         brw_load_register_imm32(brw, GEN7_GT_MODE,
+                                 GEN9_SUBSLICE_HASHING_MASK_BITS |
+                                 GEN9_SUBSLICE_HASHING_16x16);
+      }
    }
 
-   if (brw->gen >= 8) {
+   if (devinfo->gen >= 8) {
       gen8_emit_3dstate_sample_pattern(brw);
 
       BEGIN_BATCH(5);
@@ -94,26 +168,30 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
    /* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
     * 3DSTATE_CONSTANT_XS buffer 0 is an absolute address.
     *
-    * On Gen6-7.5, we use an execbuf parameter to do this for us.
-    * However, the kernel ignores that when execlists are in use.
-    * Fortunately, we can just write the registers from userspace
-    * on Gen8+, and they're context saved/restored.
+    * This is only safe on kernels with context isolation support.
     */
-   if (brw->gen >= 9) {
-      BEGIN_BATCH(3);
-      OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
-      OUT_BATCH(CS_DEBUG_MODE2);
-      OUT_BATCH(REG_MASK(CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
-                CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
-      ADVANCE_BATCH();
-   } else if (brw->gen == 8) {
-      BEGIN_BATCH(3);
-      OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
-      OUT_BATCH(INSTPM);
-      OUT_BATCH(REG_MASK(INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
-                INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
-      ADVANCE_BATCH();
+   if (!compiler->constant_buffer_0_is_relative) {
+      if (devinfo->gen >= 9) {
+         BEGIN_BATCH(3);
+         OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
+         OUT_BATCH(CS_DEBUG_MODE2);
+         OUT_BATCH(REG_MASK(CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
+                   CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
+         ADVANCE_BATCH();
+      } else if (devinfo->gen == 8) {
+         BEGIN_BATCH(3);
+         OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
+         OUT_BATCH(INSTPM);
+         OUT_BATCH(REG_MASK(INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
+                   INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
+         ADVANCE_BATCH();
+      }
    }
+
+   brw->object_preemption = false;
+
+   if (devinfo->gen >= 10)
+      brw_enable_obj_preemption(brw, true);
 }
 
 static inline const struct brw_tracked_state *
@@ -156,27 +234,30 @@ brw_copy_pipeline_atoms(struct brw_context *brw,
 void brw_init_state( struct brw_context *brw )
 {
    struct gl_context *ctx = &brw->ctx;
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
 
    /* Force the first brw_select_pipeline to emit pipeline select */
    brw->last_pipeline = BRW_NUM_PIPELINES;
 
    brw_init_caches(brw);
 
-   if (brw->gen >= 10)
+   if (devinfo->gen >= 11)
+      gen11_init_atoms(brw);
+   else if (devinfo->gen >= 10)
       gen10_init_atoms(brw);
-   else if (brw->gen >= 9)
+   else if (devinfo->gen >= 9)
       gen9_init_atoms(brw);
-   else if (brw->gen >= 8)
+   else if (devinfo->gen >= 8)
       gen8_init_atoms(brw);
-   else if (brw->is_haswell)
+   else if (devinfo->is_haswell)
       gen75_init_atoms(brw);
-   else if (brw->gen >= 7)
+   else if (devinfo->gen >= 7)
       gen7_init_atoms(brw);
-   else if (brw->gen >= 6)
+   else if (devinfo->gen >= 6)
       gen6_init_atoms(brw);
-   else if (brw->gen >= 5)
+   else if (devinfo->gen >= 5)
       gen5_init_atoms(brw);
-   else if (brw->is_g4x)
+   else if (devinfo->is_g4x)
       gen45_init_atoms(brw);
    else
       gen4_init_atoms(brw);
@@ -202,7 +283,7 @@ void brw_init_state( struct brw_context *brw )
    ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
    ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
    ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
-   ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
+   ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_UNIFORM_BUFFER;
    ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
    ctx->DriverFlags.NewDefaultTessLevels = BRW_NEW_DEFAULT_TESS_LEVELS;
    ctx->DriverFlags.NewIntelConservativeRasterization = BRW_NEW_CONSERVATIVE_RASTERIZATION;
@@ -268,7 +349,6 @@ static struct dirty_bit_map mesa_bits[] = {
    DEFINE_BIT(_NEW_TRANSFORM),
    DEFINE_BIT(_NEW_VIEWPORT),
    DEFINE_BIT(_NEW_TEXTURE_STATE),
-   DEFINE_BIT(_NEW_ARRAY),
    DEFINE_BIT(_NEW_RENDERMODE),
    DEFINE_BIT(_NEW_BUFFERS),
    DEFINE_BIT(_NEW_CURRENT_ATTRIB),
@@ -321,7 +401,6 @@ static struct dirty_bit_map brw_bits[] = {
    DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
    DEFINE_BIT(BRW_NEW_STATS_WM),
    DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
-   DEFINE_BIT(BRW_NEW_ATOMIC_BUFFER),
    DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
    DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
    DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
@@ -341,6 +420,7 @@ static struct dirty_bit_map brw_bits[] = {
    DEFINE_BIT(BRW_NEW_VIEWPORT_COUNT),
    DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION),
    DEFINE_BIT(BRW_NEW_DRAW_CALL),
+   DEFINE_BIT(BRW_NEW_AUX_STATE),
    {0, 0, 0}
 };
 
@@ -367,7 +447,7 @@ brw_print_dirty_count(struct dirty_bit_map *bit_map)
 static inline void
 brw_upload_tess_programs(struct brw_context *brw)
 {
-   if (brw->tess_eval_program) {
+   if (brw->programs[MESA_SHADER_TESS_EVAL]) {
       brw_upload_tcs_prog(brw);
       brw_upload_tes_prog(brw);
    } else {
@@ -381,15 +461,19 @@ brw_upload_programs(struct brw_context *brw,
                     enum brw_pipeline pipeline)
 {
    struct gl_context *ctx = &brw->ctx;
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
 
    if (pipeline == BRW_RENDER_PIPELINE) {
       brw_upload_vs_prog(brw);
       brw_upload_tess_programs(brw);
 
-      if (brw->gen < 6)
-         brw_upload_ff_gs_prog(brw);
-      else
+      if (brw->programs[MESA_SHADER_GEOMETRY]) {
          brw_upload_gs_prog(brw);
+      } else {
+         brw->gs.base.prog_data = NULL;
+         if (devinfo->gen < 7)
+            brw_upload_ff_gs_prog(brw);
+      }
 
       /* Update the VUE map for data exiting the GS stage of the pipeline.
        * This comes from the last enabled shader stage.
@@ -397,9 +481,9 @@ brw_upload_programs(struct brw_context *brw,
       GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
       bool old_separate = brw->vue_map_geom_out.separate;
       struct brw_vue_prog_data *vue_prog_data;
-      if (brw->geometry_program)
+      if (brw->programs[MESA_SHADER_GEOMETRY])
          vue_prog_data = brw_vue_prog_data(brw->gs.base.prog_data);
-      else if (brw->tess_eval_program)
+      else if (brw->programs[MESA_SHADER_TESS_EVAL])
          vue_prog_data = brw_vue_prog_data(brw->tes.base.prog_data);
       else
          vue_prog_data = brw_vue_prog_data(brw->vs.base.prog_data);
@@ -421,12 +505,15 @@ brw_upload_programs(struct brw_context *brw,
 
       brw_upload_wm_prog(brw);
 
-      if (brw->gen < 6) {
+      if (devinfo->gen < 6) {
          brw_upload_clip_prog(brw);
          brw_upload_sf_prog(brw);
       }
+
+      brw_disk_cache_write_render_programs(brw);
    } else if (pipeline == BRW_COMPUTE_PIPELINE) {
       brw_upload_cs_prog(brw);
+      brw_disk_cache_write_compute_program(brw);
    }
 }
 
@@ -438,7 +525,7 @@ merge_ctx_state(struct brw_context *brw,
    state->brw |= brw->ctx.NewDriverState;
 }
 
-static inline void
+static ALWAYS_INLINE void
 check_and_emit_atom(struct brw_context *brw,
                     struct brw_state_flags *state,
                     const struct brw_tracked_state *atom)
@@ -453,49 +540,55 @@ static inline void
 brw_upload_pipeline_state(struct brw_context *brw,
                           enum brw_pipeline pipeline)
 {
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
    struct gl_context *ctx = &brw->ctx;
    int i;
    static int dirty_count = 0;
    struct brw_state_flags state = brw->state.pipelines[pipeline];
-   unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer);
+   const unsigned fb_samples =
+      MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
 
    brw_select_pipeline(brw, pipeline);
 
-   if (0) {
+   if (unlikely(INTEL_DEBUG & DEBUG_REEMIT)) {
       /* Always re-emit all state. */
       brw->NewGLState = ~0;
       ctx->NewDriverState = ~0ull;
    }
 
    if (pipeline == BRW_RENDER_PIPELINE) {
-      if (brw->fragment_program != ctx->FragmentProgram._Current) {
-         brw->fragment_program = ctx->FragmentProgram._Current;
+      if (brw->programs[MESA_SHADER_FRAGMENT] !=
+          ctx->FragmentProgram._Current) {
+         brw->programs[MESA_SHADER_FRAGMENT] = ctx->FragmentProgram._Current;
          brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
       }
 
-      if (brw->tess_eval_program != ctx->TessEvalProgram._Current) {
-         brw->tess_eval_program = ctx->TessEvalProgram._Current;
+      if (brw->programs[MESA_SHADER_TESS_EVAL] !=
+          ctx->TessEvalProgram._Current) {
+         brw->programs[MESA_SHADER_TESS_EVAL] = ctx->TessEvalProgram._Current;
          brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
       }
 
-      if (brw->tess_ctrl_program != ctx->TessCtrlProgram._Current) {
-         brw->tess_ctrl_program = ctx->TessCtrlProgram._Current;
+      if (brw->programs[MESA_SHADER_TESS_CTRL] !=
+          ctx->TessCtrlProgram._Current) {
+         brw->programs[MESA_SHADER_TESS_CTRL] = ctx->TessCtrlProgram._Current;
          brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
       }
 
-      if (brw->geometry_program != ctx->GeometryProgram._Current) {
-         brw->geometry_program = ctx->GeometryProgram._Current;
+      if (brw->programs[MESA_SHADER_GEOMETRY] !=
+          ctx->GeometryProgram._Current) {
+         brw->programs[MESA_SHADER_GEOMETRY] = ctx->GeometryProgram._Current;
          brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
       }
 
-      if (brw->vertex_program != ctx->VertexProgram._Current) {
-         brw->vertex_program = ctx->VertexProgram._Current;
+      if (brw->programs[MESA_SHADER_VERTEX] != ctx->VertexProgram._Current) {
+         brw->programs[MESA_SHADER_VERTEX] = ctx->VertexProgram._Current;
          brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
       }
    }
 
-   if (brw->compute_program != ctx->ComputeProgram._Current) {
-      brw->compute_program = ctx->ComputeProgram._Current;
+   if (brw->programs[MESA_SHADER_COMPUTE] != ctx->ComputeProgram._Current) {
+      brw->programs[MESA_SHADER_COMPUTE] = ctx->ComputeProgram._Current;
       brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
    }
 
@@ -515,7 +608,7 @@ brw_upload_pipeline_state(struct brw_context *brw,
       return;
 
    /* Emit Sandybridge workaround flushes on every primitive, for safety. */
-   if (brw->gen == 6)
+   if (devinfo->gen == 6)
       brw_emit_post_sync_nonzero_flush(brw);
 
    brw_upload_programs(brw, pipeline);