#include "brw_cs.h"
#include "main/framebuffer.h"
+void
+brw_enable_obj_preemption(struct brw_context *brw, bool enable)
+{
+ ASSERTED const struct gen_device_info *devinfo = &brw->screen->devinfo;
+ assert(devinfo->gen >= 9);
+
+ if (enable == brw->object_preemption)
+ return;
+
+ /* A fixed function pipe flush is required before modifying this field */
+ brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
+
+ bool replay_mode = enable ?
+ GEN9_REPLAY_MODE_MIDOBJECT : GEN9_REPLAY_MODE_MIDBUFFER;
+
+ /* enable object level preemption */
+ brw_load_register_imm32(brw, CS_CHICKEN1,
+ replay_mode | GEN9_REPLAY_MODE_MASK);
+
+ brw->object_preemption = enable;
+}
+
static void
brw_upload_initial_gpu_state(struct brw_context *brw)
{
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
+ const struct brw_compiler *compiler = brw->screen->compiler;
+
/* On platforms with hardware contexts, we can set our initial GPU state
* right away rather than doing it via state atoms. This saves a small
* amount of overhead on every draw call.
if (!brw->hw_ctx)
return;
- if (brw->gen == 6)
+ if (devinfo->gen == 6)
brw_emit_post_sync_nonzero_flush(brw);
brw_upload_invariant_state(brw);
- /* Recommended optimization for Victim Cache eviction in pixel backend. */
- if (brw->gen >= 9) {
- BEGIN_BATCH(3);
- OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
- OUT_BATCH(GEN7_CACHE_MODE_1);
- OUT_BATCH(REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
- GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
- ADVANCE_BATCH();
+ if (devinfo->gen == 11) {
+ /* The default behavior of bit 5 "Headerless Message for Pre-emptable
+ * Contexts" in SAMPLER MODE register is set to 0, which means
+ * headerless sampler messages are not allowed for pre-emptable
+ * contexts. Set the bit 5 to 1 to allow them.
+ */
+ brw_load_register_imm32(brw, GEN11_SAMPLER_MODE,
+ HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK |
+ HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS);
+
+ /* Bit 1 "Enabled Texel Offset Precision Fix" must be set in
+ * HALF_SLICE_CHICKEN7 register.
+ */
+ brw_load_register_imm32(brw, HALF_SLICE_CHICKEN7,
+ TEXEL_OFFSET_FIX_MASK |
+ TEXEL_OFFSET_FIX_ENABLE);
+
+ /* WA_1406697149: Bit 9 "Error Detection Behavior Control" must be set
+ * in L3CNTLREG register. The default setting of the bit is not the
+ * desirable behavior.
+ */
+ brw_load_register_imm32(brw, GEN8_L3CNTLREG,
+ GEN8_L3CNTLREG_EDBC_NO_HANG);
+
+ /* WaEnableStateCacheRedirectToCS:icl */
+ brw_load_register_imm32(brw, SLICE_COMMON_ECO_CHICKEN1,
+ GEN11_STATE_CACHE_REDIRECT_TO_CS_SECTION_ENABLE |
+ REG_MASK(GEN11_STATE_CACHE_REDIRECT_TO_CS_SECTION_ENABLE));
}
- if (brw->gen >= 8) {
+ /* hardware specification recommends disabling repacking for
+ * the compatibility with decompression mechanism in display controller.
+ */
+ if (devinfo->disable_ccs_repack) {
+ brw_load_register_imm32(brw, GEN7_CACHE_MODE_0,
+ GEN11_DISABLE_REPACKING_FOR_COMPRESSION |
+ REG_MASK(GEN11_DISABLE_REPACKING_FOR_COMPRESSION));
+ }
+
+ if (devinfo->gen == 9) {
+ /* Recommended optimizations for Victim Cache eviction and floating
+ * point blending.
+ */
+ brw_load_register_imm32(brw, GEN7_CACHE_MODE_1,
+ REG_MASK(GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE) |
+ REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
+ GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE |
+ GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
+ }
+
+ if (devinfo->gen >= 8) {
gen8_emit_3dstate_sample_pattern(brw);
BEGIN_BATCH(5);
OUT_BATCH(0);
ADVANCE_BATCH();
}
+
+ /* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
+ * 3DSTATE_CONSTANT_XS buffer 0 is an absolute address.
+ *
+ * This is only safe on kernels with context isolation support.
+ */
+ if (!compiler->constant_buffer_0_is_relative) {
+ if (devinfo->gen >= 9) {
+ BEGIN_BATCH(3);
+ OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
+ OUT_BATCH(CS_DEBUG_MODE2);
+ OUT_BATCH(REG_MASK(CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
+ CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
+ ADVANCE_BATCH();
+ } else if (devinfo->gen == 8) {
+ BEGIN_BATCH(3);
+ OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
+ OUT_BATCH(INSTPM);
+ OUT_BATCH(REG_MASK(INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
+ INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
+ ADVANCE_BATCH();
+ }
+ }
+
+ brw->object_preemption = false;
+
+ if (devinfo->gen >= 10)
+ brw_enable_obj_preemption(brw, true);
}
static inline const struct brw_tracked_state *
void brw_init_state( struct brw_context *brw )
{
struct gl_context *ctx = &brw->ctx;
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
/* Force the first brw_select_pipeline to emit pipeline select */
brw->last_pipeline = BRW_NUM_PIPELINES;
brw_init_caches(brw);
- if (brw->gen >= 9)
+ if (devinfo->gen >= 11)
+ gen11_init_atoms(brw);
+ else if (devinfo->gen >= 10)
+ gen10_init_atoms(brw);
+ else if (devinfo->gen >= 9)
gen9_init_atoms(brw);
- else if (brw->gen >= 8)
+ else if (devinfo->gen >= 8)
gen8_init_atoms(brw);
- else if (brw->is_haswell)
+ else if (devinfo->is_haswell)
gen75_init_atoms(brw);
- else if (brw->gen >= 7)
+ else if (devinfo->gen >= 7)
gen7_init_atoms(brw);
- else if (brw->gen >= 6)
+ else if (devinfo->gen >= 6)
gen6_init_atoms(brw);
- else if (brw->gen >= 5)
+ else if (devinfo->gen >= 5)
gen5_init_atoms(brw);
- else if (brw->is_g4x)
+ else if (devinfo->is_g4x)
gen45_init_atoms(brw);
else
gen4_init_atoms(brw);
ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
- ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
+ ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_UNIFORM_BUFFER;
ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
ctx->DriverFlags.NewDefaultTessLevels = BRW_NEW_DEFAULT_TESS_LEVELS;
ctx->DriverFlags.NewIntelConservativeRasterization = BRW_NEW_CONSERVATIVE_RASTERIZATION;
DEFINE_BIT(_NEW_TRANSFORM),
DEFINE_BIT(_NEW_VIEWPORT),
DEFINE_BIT(_NEW_TEXTURE_STATE),
- DEFINE_BIT(_NEW_ARRAY),
DEFINE_BIT(_NEW_RENDERMODE),
DEFINE_BIT(_NEW_BUFFERS),
DEFINE_BIT(_NEW_CURRENT_ATTRIB),
DEFINE_BIT(_NEW_TRACK_MATRIX),
DEFINE_BIT(_NEW_PROGRAM),
DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
- DEFINE_BIT(_NEW_BUFFER_OBJECT),
DEFINE_BIT(_NEW_FRAG_CLAMP),
/* Avoid sign extension problems. */
{(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
DEFINE_BIT(BRW_NEW_STATS_WM),
DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
- DEFINE_BIT(BRW_NEW_ATOMIC_BUFFER),
DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
DEFINE_BIT(BRW_NEW_BLORP),
DEFINE_BIT(BRW_NEW_VIEWPORT_COUNT),
DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION),
+ DEFINE_BIT(BRW_NEW_DRAW_CALL),
+ DEFINE_BIT(BRW_NEW_AUX_STATE),
{0, 0, 0}
};
static inline void
brw_upload_tess_programs(struct brw_context *brw)
{
- if (brw->tess_eval_program) {
+ if (brw->programs[MESA_SHADER_TESS_EVAL]) {
brw_upload_tcs_prog(brw);
brw_upload_tes_prog(brw);
} else {
enum brw_pipeline pipeline)
{
struct gl_context *ctx = &brw->ctx;
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
if (pipeline == BRW_RENDER_PIPELINE) {
brw_upload_vs_prog(brw);
brw_upload_tess_programs(brw);
- if (brw->gen < 6)
- brw_upload_ff_gs_prog(brw);
- else
+ if (brw->programs[MESA_SHADER_GEOMETRY]) {
brw_upload_gs_prog(brw);
+ } else {
+ brw->gs.base.prog_data = NULL;
+ if (devinfo->gen < 7)
+ brw_upload_ff_gs_prog(brw);
+ }
/* Update the VUE map for data exiting the GS stage of the pipeline.
* This comes from the last enabled shader stage.
GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
bool old_separate = brw->vue_map_geom_out.separate;
struct brw_vue_prog_data *vue_prog_data;
- if (brw->geometry_program)
+ if (brw->programs[MESA_SHADER_GEOMETRY])
vue_prog_data = brw_vue_prog_data(brw->gs.base.prog_data);
- else if (brw->tess_eval_program)
+ else if (brw->programs[MESA_SHADER_TESS_EVAL])
vue_prog_data = brw_vue_prog_data(brw->tes.base.prog_data);
else
vue_prog_data = brw_vue_prog_data(brw->vs.base.prog_data);
brw_upload_wm_prog(brw);
- if (brw->gen < 6) {
+ if (devinfo->gen < 6) {
brw_upload_clip_prog(brw);
brw_upload_sf_prog(brw);
}
+
+ brw_disk_cache_write_render_programs(brw);
} else if (pipeline == BRW_COMPUTE_PIPELINE) {
brw_upload_cs_prog(brw);
+ brw_disk_cache_write_compute_program(brw);
}
}
state->brw |= brw->ctx.NewDriverState;
}
-static inline void
+static ALWAYS_INLINE void
check_and_emit_atom(struct brw_context *brw,
struct brw_state_flags *state,
const struct brw_tracked_state *atom)
brw_upload_pipeline_state(struct brw_context *brw,
enum brw_pipeline pipeline)
{
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
struct gl_context *ctx = &brw->ctx;
int i;
static int dirty_count = 0;
struct brw_state_flags state = brw->state.pipelines[pipeline];
- unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer);
+ const unsigned fb_samples =
+ MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
brw_select_pipeline(brw, pipeline);
- if (0) {
+ if (pipeline == BRW_RENDER_PIPELINE && brw->current_hash_scale != 1)
+ brw_emit_hashing_mode(brw, UINT_MAX, UINT_MAX, 1);
+
+ if (unlikely(INTEL_DEBUG & DEBUG_REEMIT)) {
/* Always re-emit all state. */
brw->NewGLState = ~0;
ctx->NewDriverState = ~0ull;
}
if (pipeline == BRW_RENDER_PIPELINE) {
- if (brw->fragment_program != ctx->FragmentProgram._Current) {
- brw->fragment_program = ctx->FragmentProgram._Current;
+ if (brw->programs[MESA_SHADER_FRAGMENT] !=
+ ctx->FragmentProgram._Current) {
+ brw->programs[MESA_SHADER_FRAGMENT] = ctx->FragmentProgram._Current;
brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
}
- if (brw->tess_eval_program != ctx->TessEvalProgram._Current) {
- brw->tess_eval_program = ctx->TessEvalProgram._Current;
+ if (brw->programs[MESA_SHADER_TESS_EVAL] !=
+ ctx->TessEvalProgram._Current) {
+ brw->programs[MESA_SHADER_TESS_EVAL] = ctx->TessEvalProgram._Current;
brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
}
- if (brw->tess_ctrl_program != ctx->TessCtrlProgram._Current) {
- brw->tess_ctrl_program = ctx->TessCtrlProgram._Current;
+ if (brw->programs[MESA_SHADER_TESS_CTRL] !=
+ ctx->TessCtrlProgram._Current) {
+ brw->programs[MESA_SHADER_TESS_CTRL] = ctx->TessCtrlProgram._Current;
brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
}
- if (brw->geometry_program != ctx->GeometryProgram._Current) {
- brw->geometry_program = ctx->GeometryProgram._Current;
+ if (brw->programs[MESA_SHADER_GEOMETRY] !=
+ ctx->GeometryProgram._Current) {
+ brw->programs[MESA_SHADER_GEOMETRY] = ctx->GeometryProgram._Current;
brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
}
- if (brw->vertex_program != ctx->VertexProgram._Current) {
- brw->vertex_program = ctx->VertexProgram._Current;
+ if (brw->programs[MESA_SHADER_VERTEX] != ctx->VertexProgram._Current) {
+ brw->programs[MESA_SHADER_VERTEX] = ctx->VertexProgram._Current;
brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
}
}
- if (brw->compute_program != ctx->ComputeProgram._Current) {
- brw->compute_program = ctx->ComputeProgram._Current;
+ if (brw->programs[MESA_SHADER_COMPUTE] != ctx->ComputeProgram._Current) {
+ brw->programs[MESA_SHADER_COMPUTE] = ctx->ComputeProgram._Current;
brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
}
return;
/* Emit Sandybridge workaround flushes on every primitive, for safety. */
- if (brw->gen == 6)
+ if (devinfo->gen == 6)
brw_emit_post_sync_nonzero_flush(brw);
brw_upload_programs(brw, pipeline);