#include "brw_context.h"
+#include "brw_defines.h"
#include "brw_state.h"
+#include "brw_program.h"
#include "drivers/common/meta.h"
#include "intel_batchbuffer.h"
#include "intel_buffers.h"
#include "brw_cs.h"
#include "main/framebuffer.h"
-static const struct brw_tracked_state *gen4_atoms[] =
+void
+brw_enable_obj_preemption(struct brw_context *brw, bool enable)
{
- &brw_interpolation_map,
-
- &brw_clip_prog, /* must do before state base address */
- &brw_sf_prog, /* must do before state base address */
-
- /* Once all the programs are done, we know how large urb entry
- * sizes need to be and can decide if we need to change the urb
- * layout.
- */
- &brw_curbe_offsets,
- &brw_recalculate_urb_fence,
-
- &brw_cc_vp,
- &brw_cc_unit,
+ ASSERTED const struct gen_device_info *devinfo = &brw->screen->devinfo;
+ assert(devinfo->gen >= 9);
- /* Surface state setup. Must come before the VS/WM unit. The binding
- * table upload must be last.
- */
- &brw_vs_pull_constants,
- &brw_wm_pull_constants,
- &brw_renderbuffer_surfaces,
- &brw_texture_surfaces,
- &brw_vs_binding_table,
- &brw_wm_binding_table,
-
- &brw_fs_samplers,
- &brw_vs_samplers,
-
- /* These set up state for brw_psp_urb_cbs */
- &brw_wm_unit,
- &brw_sf_vp,
- &brw_sf_unit,
- &brw_vs_unit, /* always required, enabled or not */
- &brw_clip_unit,
- &brw_gs_unit,
-
- /* Command packets:
- */
- &brw_invariant_state,
- &brw_state_base_address,
-
- &brw_binding_table_pointers,
- &brw_blend_constant_color,
-
- &brw_depthbuffer,
-
- &brw_polygon_stipple,
- &brw_polygon_stipple_offset,
+ if (enable == brw->object_preemption)
+ return;
- &brw_line_stipple,
- &brw_aa_line_parameters,
+ /* A fixed function pipe flush is required before modifying this field */
+ brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
- &brw_psp_urb_cbs,
+ bool replay_mode = enable ?
+ GEN9_REPLAY_MODE_MIDOBJECT : GEN9_REPLAY_MODE_MIDBUFFER;
- &brw_drawing_rect,
- &brw_indices, /* must come before brw_vertices */
- &brw_index_buffer,
- &brw_vertices,
+ /* enable object level preemption */
+ brw_load_register_imm32(brw, CS_CHICKEN1,
+ replay_mode | GEN9_REPLAY_MODE_MASK);
- &brw_constant_buffer
-};
+ brw->object_preemption = enable;
+}
-static const struct brw_tracked_state *gen6_atoms[] =
+static void
+brw_upload_initial_gpu_state(struct brw_context *brw)
{
- &gen6_clip_vp,
- &gen6_sf_vp,
-
- /* Command packets: */
-
- /* must do before binding table pointers, cc state ptrs */
- &brw_state_base_address,
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
+ const struct brw_compiler *compiler = brw->screen->compiler;
- &brw_cc_vp,
- &gen6_viewport_state, /* must do after *_vp stages */
-
- &gen6_urb,
- &gen6_blend_state, /* must do before cc unit */
- &gen6_color_calc_state, /* must do before cc unit */
- &gen6_depth_stencil_state, /* must do before cc unit */
-
- &gen6_vs_push_constants, /* Before vs_state */
- &gen6_gs_push_constants, /* Before gs_state */
- &gen6_wm_push_constants, /* Before wm_state */
-
- /* Surface state setup. Must come before the VS/WM unit. The binding
- * table upload must be last.
+ /* On platforms with hardware contexts, we can set our initial GPU state
+ * right away rather than doing it via state atoms. This saves a small
+ * amount of overhead on every draw call.
*/
- &brw_vs_pull_constants,
- &brw_vs_ubo_surfaces,
- &brw_gs_pull_constants,
- &brw_gs_ubo_surfaces,
- &brw_wm_pull_constants,
- &brw_wm_ubo_surfaces,
- &gen6_renderbuffer_surfaces,
- &brw_texture_surfaces,
- &gen6_sol_surface,
- &brw_vs_binding_table,
- &gen6_gs_binding_table,
- &brw_wm_binding_table,
-
- &brw_fs_samplers,
- &brw_vs_samplers,
- &brw_gs_samplers,
- &gen6_sampler_state,
- &gen6_multisample_state,
-
- &gen6_vs_state,
- &gen6_gs_state,
- &gen6_clip_state,
- &gen6_sf_state,
- &gen6_wm_state,
-
- &gen6_scissor_state,
-
- &gen6_binding_table_pointers,
-
- &brw_depthbuffer,
-
- &brw_polygon_stipple,
- &brw_polygon_stipple_offset,
-
- &brw_line_stipple,
- &brw_aa_line_parameters,
-
- &brw_drawing_rect,
-
- &brw_indices, /* must come before brw_vertices */
- &brw_index_buffer,
- &brw_vertices,
-};
-
-static const struct brw_tracked_state *gen7_render_atoms[] =
-{
- /* Command packets: */
-
- /* must do before binding table pointers, cc state ptrs */
- &brw_state_base_address,
-
- &brw_cc_vp,
- &gen7_sf_clip_viewport,
+ if (!brw->hw_ctx)
+ return;
- &gen7_l3_state,
- &gen7_push_constant_space,
- &gen7_urb,
- &gen6_blend_state, /* must do before cc unit */
- &gen6_color_calc_state, /* must do before cc unit */
- &gen6_depth_stencil_state, /* must do before cc unit */
+ if (devinfo->gen == 6)
+ brw_emit_post_sync_nonzero_flush(brw);
- &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
+ brw_upload_invariant_state(brw);
- &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
- &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
- &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
- &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
- &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
+ if (devinfo->gen == 11) {
+ /* The default behavior of bit 5 "Headerless Message for Pre-emptable
+ * Contexts" in SAMPLER MODE register is set to 0, which means
+ * headerless sampler messages are not allowed for pre-emptable
+ * contexts. Set the bit 5 to 1 to allow them.
+ */
+ brw_load_register_imm32(brw, GEN11_SAMPLER_MODE,
+ HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK |
+ HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS);
- &gen6_vs_push_constants, /* Before vs_state */
- &gen7_tcs_push_constants,
- &gen7_tes_push_constants,
- &gen6_gs_push_constants, /* Before gs_state */
- &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
+ /* Bit 1 "Enabled Texel Offset Precision Fix" must be set in
+ * HALF_SLICE_CHICKEN7 register.
+ */
+ brw_load_register_imm32(brw, HALF_SLICE_CHICKEN7,
+ TEXEL_OFFSET_FIX_MASK |
+ TEXEL_OFFSET_FIX_ENABLE);
- /* Surface state setup. Must come before the VS/WM unit. The binding
- * table upload must be last.
- */
- &brw_vs_pull_constants,
- &brw_vs_ubo_surfaces,
- &brw_vs_abo_surfaces,
- &brw_tcs_pull_constants,
- &brw_tcs_ubo_surfaces,
- &brw_tcs_abo_surfaces,
- &brw_tes_pull_constants,
- &brw_tes_ubo_surfaces,
- &brw_tes_abo_surfaces,
- &brw_gs_pull_constants,
- &brw_gs_ubo_surfaces,
- &brw_gs_abo_surfaces,
- &brw_wm_pull_constants,
- &brw_wm_ubo_surfaces,
- &brw_wm_abo_surfaces,
- &gen6_renderbuffer_surfaces,
- &brw_texture_surfaces,
- &brw_vs_binding_table,
- &brw_tcs_binding_table,
- &brw_tes_binding_table,
- &brw_gs_binding_table,
- &brw_wm_binding_table,
-
- &brw_fs_samplers,
- &brw_vs_samplers,
- &brw_tcs_samplers,
- &brw_tes_samplers,
- &brw_gs_samplers,
- &gen6_multisample_state,
-
- &gen7_vs_state,
- &gen7_hs_state,
- &gen7_te_state,
- &gen7_ds_state,
- &gen7_gs_state,
- &gen7_sol_state,
- &gen7_clip_state,
- &gen7_sbe_state,
- &gen7_sf_state,
- &gen7_wm_state,
- &gen7_ps_state,
-
- &gen6_scissor_state,
-
- &gen7_depthbuffer,
-
- &brw_polygon_stipple,
- &brw_polygon_stipple_offset,
-
- &brw_line_stipple,
- &brw_aa_line_parameters,
-
- &brw_drawing_rect,
-
- &brw_indices, /* must come before brw_vertices */
- &brw_index_buffer,
- &brw_vertices,
-
- &haswell_cut_index,
-};
+ /* WA_1406697149: Bit 9 "Error Detection Behavior Control" must be set
+ * in L3CNTLREG register. The default setting of the bit is not the
+ * desirable behavior.
+ */
+ brw_load_register_imm32(brw, GEN8_L3CNTLREG,
+ GEN8_L3CNTLREG_EDBC_NO_HANG);
-static const struct brw_tracked_state *gen7_compute_atoms[] =
-{
- &brw_state_base_address,
- &gen7_l3_state,
- &brw_cs_image_surfaces,
- &gen7_cs_push_constants,
- &brw_cs_pull_constants,
- &brw_cs_ubo_surfaces,
- &brw_cs_abo_surfaces,
- &brw_cs_texture_surfaces,
- &brw_cs_work_groups_surface,
- &brw_cs_samplers,
- &brw_cs_state,
-};
+ /* WaEnableStateCacheRedirectToCS:icl */
+ brw_load_register_imm32(brw, SLICE_COMMON_ECO_CHICKEN1,
+ GEN11_STATE_CACHE_REDIRECT_TO_CS_SECTION_ENABLE |
+ REG_MASK(GEN11_STATE_CACHE_REDIRECT_TO_CS_SECTION_ENABLE));
+ }
-static const struct brw_tracked_state *gen8_render_atoms[] =
-{
- /* Command packets: */
- &gen8_state_base_address,
-
- &brw_cc_vp,
- &gen8_sf_clip_viewport,
-
- &gen7_l3_state,
- &gen7_push_constant_space,
- &gen7_urb,
- &gen8_blend_state,
- &gen6_color_calc_state,
-
- &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
-
- &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
- &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
- &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
- &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
- &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
-
- &gen6_vs_push_constants, /* Before vs_state */
- &gen7_tcs_push_constants,
- &gen7_tes_push_constants,
- &gen6_gs_push_constants, /* Before gs_state */
- &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
-
- /* Surface state setup. Must come before the VS/WM unit. The binding
- * table upload must be last.
+ /* hardware specification recommends disabling repacking for
+ * the compatibility with decompression mechanism in display controller.
*/
- &brw_vs_pull_constants,
- &brw_vs_ubo_surfaces,
- &brw_vs_abo_surfaces,
- &brw_tcs_pull_constants,
- &brw_tcs_ubo_surfaces,
- &brw_tcs_abo_surfaces,
- &brw_tes_pull_constants,
- &brw_tes_ubo_surfaces,
- &brw_tes_abo_surfaces,
- &brw_gs_pull_constants,
- &brw_gs_ubo_surfaces,
- &brw_gs_abo_surfaces,
- &brw_wm_pull_constants,
- &brw_wm_ubo_surfaces,
- &brw_wm_abo_surfaces,
- &gen6_renderbuffer_surfaces,
- &brw_texture_surfaces,
- &brw_vs_binding_table,
- &brw_tcs_binding_table,
- &brw_tes_binding_table,
- &brw_gs_binding_table,
- &brw_wm_binding_table,
-
- &brw_fs_samplers,
- &brw_vs_samplers,
- &brw_tcs_samplers,
- &brw_tes_samplers,
- &brw_gs_samplers,
- &gen8_multisample_state,
-
- &gen8_disable_stages,
- &gen8_vs_state,
- &gen8_hs_state,
- &gen7_te_state,
- &gen8_ds_state,
- &gen8_gs_state,
- &gen8_sol_state,
- &gen6_clip_state,
- &gen8_raster_state,
- &gen8_sbe_state,
- &gen8_sf_state,
- &gen8_ps_blend,
- &gen8_ps_extra,
- &gen8_ps_state,
- &gen8_wm_depth_stencil,
- &gen8_wm_state,
-
- &gen6_scissor_state,
-
- &gen7_depthbuffer,
-
- &brw_polygon_stipple,
- &brw_polygon_stipple_offset,
-
- &brw_line_stipple,
- &brw_aa_line_parameters,
-
- &brw_drawing_rect,
-
- &gen8_vf_topology,
-
- &brw_indices,
- &gen8_index_buffer,
- &gen8_vertices,
-
- &haswell_cut_index,
- &gen8_pma_fix,
-};
-
-static const struct brw_tracked_state *gen8_compute_atoms[] =
-{
- &gen8_state_base_address,
- &gen7_l3_state,
- &brw_cs_image_surfaces,
- &gen7_cs_push_constants,
- &brw_cs_pull_constants,
- &brw_cs_ubo_surfaces,
- &brw_cs_abo_surfaces,
- &brw_cs_texture_surfaces,
- &brw_cs_work_groups_surface,
- &brw_cs_samplers,
- &brw_cs_state,
-};
+ if (devinfo->disable_ccs_repack) {
+ brw_load_register_imm32(brw, GEN7_CACHE_MODE_0,
+ GEN11_DISABLE_REPACKING_FOR_COMPRESSION |
+ REG_MASK(GEN11_DISABLE_REPACKING_FOR_COMPRESSION));
+ }
-static void
-brw_upload_initial_gpu_state(struct brw_context *brw)
-{
- /* On platforms with hardware contexts, we can set our initial GPU state
- * right away rather than doing it via state atoms. This saves a small
- * amount of overhead on every draw call.
- */
- if (!brw->hw_ctx)
- return;
+ if (devinfo->gen == 9) {
+ /* Recommended optimizations for Victim Cache eviction and floating
+ * point blending.
+ */
+ brw_load_register_imm32(brw, GEN7_CACHE_MODE_1,
+ REG_MASK(GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE) |
+ REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
+ GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE |
+ GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
+ }
- if (brw->gen == 6)
- brw_emit_post_sync_nonzero_flush(brw);
+ if (devinfo->gen >= 8) {
+ gen8_emit_3dstate_sample_pattern(brw);
- brw_upload_invariant_state(brw);
+ BEGIN_BATCH(5);
+ OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
- /* Recommended optimization for Victim Cache eviction in pixel backend. */
- if (brw->gen >= 9) {
- BEGIN_BATCH(3);
- OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
- OUT_BATCH(GEN7_CACHE_MODE_1);
- OUT_BATCH(REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
- GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
+ BEGIN_BATCH(2);
+ OUT_BATCH(_3DSTATE_WM_CHROMAKEY << 16 | (2 - 2));
+ OUT_BATCH(0);
ADVANCE_BATCH();
}
- if (brw->gen >= 8) {
- gen8_emit_3dstate_sample_pattern(brw);
+ /* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
+ * 3DSTATE_CONSTANT_XS buffer 0 is an absolute address.
+ *
+ * This is only safe on kernels with context isolation support.
+ */
+ if (!compiler->constant_buffer_0_is_relative) {
+ if (devinfo->gen >= 9) {
+ BEGIN_BATCH(3);
+ OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
+ OUT_BATCH(CS_DEBUG_MODE2);
+ OUT_BATCH(REG_MASK(CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
+ CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
+ ADVANCE_BATCH();
+ } else if (devinfo->gen == 8) {
+ BEGIN_BATCH(3);
+ OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
+ OUT_BATCH(INSTPM);
+ OUT_BATCH(REG_MASK(INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
+ INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
+ ADVANCE_BATCH();
+ }
}
+
+ brw->object_preemption = false;
+
+ if (devinfo->gen >= 10)
+ brw_enable_obj_preemption(brw, true);
}
static inline const struct brw_tracked_state *
}
}
-static void
+void
brw_copy_pipeline_atoms(struct brw_context *brw,
enum brw_pipeline pipeline,
const struct brw_tracked_state **atoms,
void brw_init_state( struct brw_context *brw )
{
struct gl_context *ctx = &brw->ctx;
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
/* Force the first brw_select_pipeline to emit pipeline select */
brw->last_pipeline = BRW_NUM_PIPELINES;
- STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->render_atoms));
- STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->render_atoms));
- STATIC_ASSERT(ARRAY_SIZE(gen7_render_atoms) <=
- ARRAY_SIZE(brw->render_atoms));
- STATIC_ASSERT(ARRAY_SIZE(gen8_render_atoms) <=
- ARRAY_SIZE(brw->render_atoms));
- STATIC_ASSERT(ARRAY_SIZE(gen7_compute_atoms) <=
- ARRAY_SIZE(brw->compute_atoms));
- STATIC_ASSERT(ARRAY_SIZE(gen8_compute_atoms) <=
- ARRAY_SIZE(brw->compute_atoms));
-
brw_init_caches(brw);
- if (brw->gen >= 8) {
- brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
- gen8_render_atoms,
- ARRAY_SIZE(gen8_render_atoms));
- brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
- gen8_compute_atoms,
- ARRAY_SIZE(gen8_compute_atoms));
- } else if (brw->gen == 7) {
- brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
- gen7_render_atoms,
- ARRAY_SIZE(gen7_render_atoms));
- brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
- gen7_compute_atoms,
- ARRAY_SIZE(gen7_compute_atoms));
- } else if (brw->gen == 6) {
- brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
- gen6_atoms, ARRAY_SIZE(gen6_atoms));
- } else {
- brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
- gen4_atoms, ARRAY_SIZE(gen4_atoms));
- }
+ if (devinfo->gen >= 11)
+ gen11_init_atoms(brw);
+ else if (devinfo->gen >= 10)
+ gen10_init_atoms(brw);
+ else if (devinfo->gen >= 9)
+ gen9_init_atoms(brw);
+ else if (devinfo->gen >= 8)
+ gen8_init_atoms(brw);
+ else if (devinfo->is_haswell)
+ gen75_init_atoms(brw);
+ else if (devinfo->gen >= 7)
+ gen7_init_atoms(brw);
+ else if (devinfo->gen >= 6)
+ gen6_init_atoms(brw);
+ else if (devinfo->gen >= 5)
+ gen5_init_atoms(brw);
+ else if (devinfo->is_g4x)
+ gen45_init_atoms(brw);
+ else
+ gen4_init_atoms(brw);
brw_upload_initial_gpu_state(brw);
ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
- ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
+ ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_UNIFORM_BUFFER;
ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
ctx->DriverFlags.NewDefaultTessLevels = BRW_NEW_DEFAULT_TESS_LEVELS;
+ ctx->DriverFlags.NewIntelConservativeRasterization = BRW_NEW_CONSERVATIVE_RASTERIZATION;
}
DEFINE_BIT(_NEW_POLYGONSTIPPLE),
DEFINE_BIT(_NEW_SCISSOR),
DEFINE_BIT(_NEW_STENCIL),
- DEFINE_BIT(_NEW_TEXTURE),
+ DEFINE_BIT(_NEW_TEXTURE_OBJECT),
DEFINE_BIT(_NEW_TRANSFORM),
DEFINE_BIT(_NEW_VIEWPORT),
- DEFINE_BIT(_NEW_ARRAY),
+ DEFINE_BIT(_NEW_TEXTURE_STATE),
DEFINE_BIT(_NEW_RENDERMODE),
DEFINE_BIT(_NEW_BUFFERS),
DEFINE_BIT(_NEW_CURRENT_ATTRIB),
DEFINE_BIT(_NEW_TRACK_MATRIX),
DEFINE_BIT(_NEW_PROGRAM),
DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
- DEFINE_BIT(_NEW_BUFFER_OBJECT),
DEFINE_BIT(_NEW_FRAG_CLAMP),
/* Avoid sign extension problems. */
{(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
DEFINE_BIT(BRW_NEW_TESS_PROGRAMS),
DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
- DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE),
DEFINE_BIT(BRW_NEW_PRIMITIVE),
DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
DEFINE_BIT(BRW_NEW_STATS_WM),
DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
- DEFINE_BIT(BRW_NEW_ATOMIC_BUFFER),
DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
- DEFINE_BIT(BRW_NEW_INTERPOLATION_MAP),
DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
DEFINE_BIT(BRW_NEW_URB_SIZE),
DEFINE_BIT(BRW_NEW_CC_STATE),
+ DEFINE_BIT(BRW_NEW_BLORP),
+ DEFINE_BIT(BRW_NEW_VIEWPORT_COUNT),
+ DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION),
+ DEFINE_BIT(BRW_NEW_DRAW_CALL),
+ DEFINE_BIT(BRW_NEW_AUX_STATE),
{0, 0, 0}
};
{
for (int i = 0; bit_map[i].bit != 0; i++) {
if (bit_map[i].count > 1) {
- fprintf(stderr, "0x%016lx: %12d (%s)\n",
+ fprintf(stderr, "0x%016"PRIx64": %12d (%s)\n",
bit_map[i].bit, bit_map[i].count, bit_map[i].name);
}
}
static inline void
brw_upload_tess_programs(struct brw_context *brw)
{
- if (brw->tess_eval_program) {
- uint64_t per_vertex_slots = brw->tess_eval_program->Base.InputsRead;
- uint32_t per_patch_slots =
- brw->tess_eval_program->Base.PatchInputsRead;
-
- /* The TCS may have additional outputs which aren't read by the
- * TES (possibly for cross-thread communication). These need to
- * be stored in the Patch URB Entry as well.
- */
- if (brw->tess_ctrl_program) {
- per_vertex_slots |= brw->tess_ctrl_program->Base.OutputsWritten;
- per_patch_slots |=
- brw->tess_ctrl_program->Base.PatchOutputsWritten;
- }
-
- brw_upload_tcs_prog(brw, per_vertex_slots, per_patch_slots);
- brw_upload_tes_prog(brw, per_vertex_slots, per_patch_slots);
+ if (brw->programs[MESA_SHADER_TESS_EVAL]) {
+ brw_upload_tcs_prog(brw);
+ brw_upload_tes_prog(brw);
} else {
- brw->tcs.prog_data = NULL;
brw->tcs.base.prog_data = NULL;
- brw->tes.prog_data = NULL;
brw->tes.base.prog_data = NULL;
}
}
brw_upload_programs(struct brw_context *brw,
enum brw_pipeline pipeline)
{
+ struct gl_context *ctx = &brw->ctx;
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
+
if (pipeline == BRW_RENDER_PIPELINE) {
brw_upload_vs_prog(brw);
brw_upload_tess_programs(brw);
- if (brw->gen < 6)
- brw_upload_ff_gs_prog(brw);
- else
+ if (brw->programs[MESA_SHADER_GEOMETRY]) {
brw_upload_gs_prog(brw);
+ } else {
+ brw->gs.base.prog_data = NULL;
+ if (devinfo->gen < 7)
+ brw_upload_ff_gs_prog(brw);
+ }
/* Update the VUE map for data exiting the GS stage of the pipeline.
* This comes from the last enabled shader stage.
*/
GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
bool old_separate = brw->vue_map_geom_out.separate;
- if (brw->geometry_program)
- brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
- else if (brw->tess_eval_program)
- brw->vue_map_geom_out = brw->tes.prog_data->base.vue_map;
+ struct brw_vue_prog_data *vue_prog_data;
+ if (brw->programs[MESA_SHADER_GEOMETRY])
+ vue_prog_data = brw_vue_prog_data(brw->gs.base.prog_data);
+ else if (brw->programs[MESA_SHADER_TESS_EVAL])
+ vue_prog_data = brw_vue_prog_data(brw->tes.base.prog_data);
else
- brw->vue_map_geom_out = brw->vs.prog_data->base.vue_map;
+ vue_prog_data = brw_vue_prog_data(brw->vs.base.prog_data);
+
+ brw->vue_map_geom_out = vue_prog_data->vue_map;
/* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
if (old_slots != brw->vue_map_geom_out.slots_valid ||
old_separate != brw->vue_map_geom_out.separate)
brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
+ if ((old_slots ^ brw->vue_map_geom_out.slots_valid) &
+ VARYING_BIT_VIEWPORT) {
+ ctx->NewDriverState |= BRW_NEW_VIEWPORT_COUNT;
+ brw->clip.viewport_count =
+ (brw->vue_map_geom_out.slots_valid & VARYING_BIT_VIEWPORT) ?
+ ctx->Const.MaxViewports : 1;
+ }
+
brw_upload_wm_prog(brw);
+
+ if (devinfo->gen < 6) {
+ brw_upload_clip_prog(brw);
+ brw_upload_sf_prog(brw);
+ }
+
+ brw_disk_cache_write_render_programs(brw);
} else if (pipeline == BRW_COMPUTE_PIPELINE) {
brw_upload_cs_prog(brw);
+ brw_disk_cache_write_compute_program(brw);
}
}
state->brw |= brw->ctx.NewDriverState;
}
-static inline void
+static ALWAYS_INLINE void
check_and_emit_atom(struct brw_context *brw,
struct brw_state_flags *state,
const struct brw_tracked_state *atom)
brw_upload_pipeline_state(struct brw_context *brw,
enum brw_pipeline pipeline)
{
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
struct gl_context *ctx = &brw->ctx;
int i;
static int dirty_count = 0;
struct brw_state_flags state = brw->state.pipelines[pipeline];
- unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer);
+ const unsigned fb_samples =
+ MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
brw_select_pipeline(brw, pipeline);
- if (0) {
+ if (pipeline == BRW_RENDER_PIPELINE && brw->current_hash_scale != 1)
+ brw_emit_hashing_mode(brw, UINT_MAX, UINT_MAX, 1);
+
+ if (unlikely(INTEL_DEBUG & DEBUG_REEMIT)) {
/* Always re-emit all state. */
brw->NewGLState = ~0;
ctx->NewDriverState = ~0ull;
}
if (pipeline == BRW_RENDER_PIPELINE) {
- if (brw->fragment_program != ctx->FragmentProgram._Current) {
- brw->fragment_program = ctx->FragmentProgram._Current;
+ if (brw->programs[MESA_SHADER_FRAGMENT] !=
+ ctx->FragmentProgram._Current) {
+ brw->programs[MESA_SHADER_FRAGMENT] = ctx->FragmentProgram._Current;
brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
}
- if (brw->tess_eval_program != ctx->TessEvalProgram._Current) {
- brw->tess_eval_program = ctx->TessEvalProgram._Current;
+ if (brw->programs[MESA_SHADER_TESS_EVAL] !=
+ ctx->TessEvalProgram._Current) {
+ brw->programs[MESA_SHADER_TESS_EVAL] = ctx->TessEvalProgram._Current;
brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
}
- if (brw->tess_ctrl_program != ctx->TessCtrlProgram._Current) {
- brw->tess_ctrl_program = ctx->TessCtrlProgram._Current;
+ if (brw->programs[MESA_SHADER_TESS_CTRL] !=
+ ctx->TessCtrlProgram._Current) {
+ brw->programs[MESA_SHADER_TESS_CTRL] = ctx->TessCtrlProgram._Current;
brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
}
- if (brw->geometry_program != ctx->GeometryProgram._Current) {
- brw->geometry_program = ctx->GeometryProgram._Current;
+ if (brw->programs[MESA_SHADER_GEOMETRY] !=
+ ctx->GeometryProgram._Current) {
+ brw->programs[MESA_SHADER_GEOMETRY] = ctx->GeometryProgram._Current;
brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
}
- if (brw->vertex_program != ctx->VertexProgram._Current) {
- brw->vertex_program = ctx->VertexProgram._Current;
+ if (brw->programs[MESA_SHADER_VERTEX] != ctx->VertexProgram._Current) {
+ brw->programs[MESA_SHADER_VERTEX] = ctx->VertexProgram._Current;
brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
}
}
- if (brw->compute_program != ctx->ComputeProgram._Current) {
- brw->compute_program = ctx->ComputeProgram._Current;
+ if (brw->programs[MESA_SHADER_COMPUTE] != ctx->ComputeProgram._Current) {
+ brw->programs[MESA_SHADER_COMPUTE] = ctx->ComputeProgram._Current;
brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
}
return;
/* Emit Sandybridge workaround flushes on every primitive, for safety. */
- if (brw->gen == 6)
+ if (devinfo->gen == 6)
brw_emit_post_sync_nonzero_flush(brw);
brw_upload_programs(brw, pipeline);
merge_ctx_state(brw, &state);
+ brw_upload_state_base_address(brw);
+
const struct brw_tracked_state *atoms =
brw_get_pipeline_atoms(brw, pipeline);
const int num_atoms = brw->num_atoms[pipeline];