X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_state_upload.c;h=1b848593de654ba9fb22788abcf7ee165a7e1418;hb=46c35c61e9c5c1b56fdd9fcd4eb45591dd16d21d;hp=3c5af09f24fe661c359c2f8c8ca36d0e8866586b;hpb=fd91ab662d64746ceaddc6de9c5d684ac725799f;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 3c5af09f24f..1b848593de6 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -36,17 +36,17 @@ #include "drivers/common/meta.h" #include "intel_batchbuffer.h" #include "intel_buffers.h" +#include "brw_vs.h" +#include "brw_ff_gs.h" +#include "brw_gs.h" +#include "brw_wm.h" static const struct brw_tracked_state *gen4_atoms[] = { - &brw_vs_prog, /* must do before GS prog, state base address. */ - &brw_ff_gs_prog, /* must do before state base address */ - &brw_interpolation_map, &brw_clip_prog, /* must do before state base address */ &brw_sf_prog, /* must do before state base address */ - &brw_wm_prog, /* must do before state base address */ /* Once all the programs are done, we know how large urb entry * sizes need to be and can decide if we need to change the urb @@ -98,7 +98,7 @@ static const struct brw_tracked_state *gen4_atoms[] = &brw_psp_urb_cbs, &brw_drawing_rect, - &brw_indices, + &brw_indices, /* must come before brw_vertices */ &brw_index_buffer, &brw_vertices, @@ -107,10 +107,6 @@ static const struct brw_tracked_state *gen4_atoms[] = static const struct brw_tracked_state *gen6_atoms[] = { - &brw_vs_prog, /* must do before state base address */ - &brw_ff_gs_prog, /* must do before state base address */ - &brw_wm_prog, /* must do before state base address */ - &gen6_clip_vp, &gen6_sf_vp, @@ -128,6 +124,7 @@ static const struct brw_tracked_state *gen6_atoms[] = &gen6_depth_stencil_state, /* must do before cc unit */ &gen6_vs_push_constants, /* Before vs_state */ + &gen6_gs_push_constants, /* Before gs_state */ &gen6_wm_push_constants, /* Before wm_state */ /* Surface state setup. Must come before the VS/WM unit. The binding @@ -135,6 +132,8 @@ static const struct brw_tracked_state *gen6_atoms[] = */ &brw_vs_pull_constants, &brw_vs_ubo_surfaces, + &brw_gs_pull_constants, + &brw_gs_ubo_surfaces, &brw_wm_pull_constants, &brw_wm_ubo_surfaces, &gen6_renderbuffer_surfaces, @@ -146,6 +145,7 @@ static const struct brw_tracked_state *gen6_atoms[] = &brw_fs_samplers, &brw_vs_samplers, + &brw_gs_samplers, &gen6_sampler_state, &gen6_multisample_state, @@ -169,24 +169,19 @@ static const struct brw_tracked_state *gen6_atoms[] = &brw_drawing_rect, - &brw_indices, + &brw_indices, /* must come before brw_vertices */ &brw_index_buffer, &brw_vertices, }; static const struct brw_tracked_state *gen7_atoms[] = { - &brw_vs_prog, - &brw_gs_prog, - &brw_wm_prog, - /* Command packets: */ /* must do before binding table pointers, cc state ptrs */ &brw_state_base_address, &brw_cc_vp, - &gen7_cc_viewport_state_pointer, /* must do after brw_cc_vp */ &gen7_sf_clip_viewport, &gen7_push_constant_space, @@ -196,7 +191,7 @@ static const struct brw_tracked_state *gen7_atoms[] = &gen6_depth_stencil_state, /* must do before cc unit */ &gen6_vs_push_constants, /* Before vs_state */ - &gen7_gs_push_constants, /* Before gs_state */ + &gen6_gs_push_constants, /* Before gs_state */ &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */ /* Surface state setup. Must come before the VS/WM unit. The binding @@ -244,7 +239,7 @@ static const struct brw_tracked_state *gen7_atoms[] = &brw_drawing_rect, - &brw_indices, + &brw_indices, /* must come before brw_vertices */ &brw_index_buffer, &brw_vertices, @@ -253,24 +248,19 @@ static const struct brw_tracked_state *gen7_atoms[] = static const struct brw_tracked_state *gen8_atoms[] = { - &brw_vs_prog, - &brw_gs_prog, - &brw_wm_prog, - /* Command packets: */ &gen8_state_base_address, &brw_cc_vp, - &gen7_cc_viewport_state_pointer, /* must do after brw_cc_vp */ - &gen7_sf_clip_viewport, + &gen8_sf_clip_viewport, &gen7_push_constant_space, &gen7_urb, - &gen6_blend_state, + &gen8_blend_state, &gen6_color_calc_state, &gen6_vs_push_constants, /* Before vs_state */ - &gen7_gs_push_constants, /* Before gs_state */ + &gen6_gs_push_constants, /* Before gs_state */ &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */ /* Surface state setup. Must come before the VS/WM unit. The binding @@ -294,12 +284,12 @@ static const struct brw_tracked_state *gen8_atoms[] = &brw_fs_samplers, &brw_vs_samplers, &brw_gs_samplers, - &gen6_multisample_state, + &gen8_multisample_state, &gen8_disable_stages, &gen8_vs_state, - &gen7_gs_state, - &gen7_sol_state, + &gen8_gs_state, + &gen8_sol_state, &gen6_clip_state, &gen8_raster_state, &gen8_sbe_state, @@ -329,6 +319,7 @@ static const struct brw_tracked_state *gen8_atoms[] = &gen8_vertices, &haswell_cut_index, + &gen8_pma_fix, }; static void @@ -341,7 +332,14 @@ brw_upload_initial_gpu_state(struct brw_context *brw) if (!brw->hw_ctx) return; + if (brw->gen == 6) + intel_emit_post_sync_nonzero_flush(brw); + brw_upload_invariant_state(brw); + + if (brw->gen >= 8) { + gen8_emit_3dstate_sample_pattern(brw); + } } void brw_init_state( struct brw_context *brw ) @@ -350,6 +348,11 @@ void brw_init_state( struct brw_context *brw ) const struct brw_tracked_state **atoms; int num_atoms; + STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->atoms)); + STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->atoms)); + STATIC_ASSERT(ARRAY_SIZE(gen7_atoms) <= ARRAY_SIZE(brw->atoms)); + STATIC_ASSERT(ARRAY_SIZE(gen8_atoms) <= ARRAY_SIZE(brw->atoms)); + brw_init_caches(brw); if (brw->gen >= 8) { @@ -366,13 +369,19 @@ void brw_init_state( struct brw_context *brw ) num_atoms = ARRAY_SIZE(gen4_atoms); } - brw->atoms = atoms; brw->num_atoms = num_atoms; + /* This is to work around brw_context::atoms being declared const. We want + * it to be const, but it needs to be initialized somehow! + */ + struct brw_tracked_state *context_atoms = + (struct brw_tracked_state *) &brw->atoms[0]; + + for (int i = 0; i < num_atoms; i++) + context_atoms[i] = *atoms[i]; + while (num_atoms--) { - assert((*atoms)->dirty.mesa | - (*atoms)->dirty.brw | - (*atoms)->dirty.cache); + assert((*atoms)->dirty.mesa | (*atoms)->dirty.brw); assert((*atoms)->emit); atoms++; } @@ -380,7 +389,12 @@ void brw_init_state( struct brw_context *brw ) brw_upload_initial_gpu_state(brw); brw->state.dirty.mesa = ~0; - brw->state.dirty.brw = ~0; + brw->state.dirty.brw = ~0ull; + + /* ~0 is a nonsensical value which won't match anything we program, so + * the programming will take effect on the first time around. + */ + brw->pma_stall_bits = ~0; /* Make sure that brw->state.dirty.brw has enough bits to hold all possible * dirty flags. @@ -391,6 +405,7 @@ void brw_init_state( struct brw_context *brw ) ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK; ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD; ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER; + ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER; ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER; } @@ -406,9 +421,7 @@ void brw_destroy_state( struct brw_context *brw ) static bool check_state(const struct brw_state_flags *a, const struct brw_state_flags *b) { - return ((a->mesa & b->mesa) | - (a->brw & b->brw) | - (a->cache & b->cache)) != 0; + return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0; } static void accumulate_state( struct brw_state_flags *a, @@ -416,7 +429,6 @@ static void accumulate_state( struct brw_state_flags *a, { a->mesa |= b->mesa; a->brw |= b->brw; - a->cache |= b->cache; } @@ -426,11 +438,10 @@ static void xor_states( struct brw_state_flags *result, { result->mesa = a->mesa ^ b->mesa; result->brw = a->brw ^ b->brw; - result->cache = a->cache ^ b->cache; } struct dirty_bit_map { - uint32_t bit; + uint64_t bit; char *name; uint32_t count; }; @@ -467,11 +478,19 @@ static struct dirty_bit_map mesa_bits[] = { DEFINE_BIT(_NEW_PROGRAM_CONSTANTS), DEFINE_BIT(_NEW_BUFFER_OBJECT), DEFINE_BIT(_NEW_FRAG_CLAMP), - DEFINE_BIT(_NEW_VARYING_VP_INPUTS), + /* Avoid sign extension problems. */ + {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0}, {0, 0, 0} }; static struct dirty_bit_map brw_bits[] = { + DEFINE_BIT(BRW_NEW_FS_PROG_DATA), + DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA), + DEFINE_BIT(BRW_NEW_SF_PROG_DATA), + DEFINE_BIT(BRW_NEW_VS_PROG_DATA), + DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA), + DEFINE_BIT(BRW_NEW_GS_PROG_DATA), + DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA), DEFINE_BIT(BRW_NEW_URB_FENCE), DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM), DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM), @@ -503,41 +522,21 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_META_IN_PROGRESS), DEFINE_BIT(BRW_NEW_INTERPOLATION_MAP), DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION), + DEFINE_BIT(BRW_NEW_NUM_SAMPLES), + DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER), + DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE), + DEFINE_BIT(BRW_NEW_CC_VP), + DEFINE_BIT(BRW_NEW_SF_VP), + DEFINE_BIT(BRW_NEW_CLIP_VP), + DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE), + DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS), {0, 0, 0} }; -static struct dirty_bit_map cache_bits[] = { - DEFINE_BIT(CACHE_NEW_CC_VP), - DEFINE_BIT(CACHE_NEW_CC_UNIT), - DEFINE_BIT(CACHE_NEW_WM_PROG), - DEFINE_BIT(CACHE_NEW_BLORP_BLIT_PROG), - DEFINE_BIT(CACHE_NEW_BLORP_CONST_COLOR_PROG), - DEFINE_BIT(CACHE_NEW_SAMPLER), - DEFINE_BIT(CACHE_NEW_WM_UNIT), - DEFINE_BIT(CACHE_NEW_SF_PROG), - DEFINE_BIT(CACHE_NEW_SF_VP), - DEFINE_BIT(CACHE_NEW_SF_UNIT), - DEFINE_BIT(CACHE_NEW_VS_UNIT), - DEFINE_BIT(CACHE_NEW_VS_PROG), - DEFINE_BIT(CACHE_NEW_FF_GS_UNIT), - DEFINE_BIT(CACHE_NEW_FF_GS_PROG), - DEFINE_BIT(CACHE_NEW_GS_PROG), - DEFINE_BIT(CACHE_NEW_CLIP_VP), - DEFINE_BIT(CACHE_NEW_CLIP_UNIT), - DEFINE_BIT(CACHE_NEW_CLIP_PROG), - {0, 0, 0} -}; - - static void -brw_update_dirty_count(struct dirty_bit_map *bit_map, int32_t bits) +brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits) { - int i; - - for (i = 0; i < 32; i++) { - if (bit_map[i].bit == 0) - return; - + for (int i = 0; bit_map[i].bit != 0; i++) { if (bit_map[i].bit & bits) bit_map[i].count++; } @@ -546,15 +545,25 @@ brw_update_dirty_count(struct dirty_bit_map *bit_map, int32_t bits) static void brw_print_dirty_count(struct dirty_bit_map *bit_map) { - int i; + for (int i = 0; bit_map[i].bit != 0; i++) { + if (bit_map[i].count > 1) { + fprintf(stderr, "0x%016lx: %12d (%s)\n", + bit_map[i].bit, bit_map[i].count, bit_map[i].name); + } + } +} + +static void +brw_upload_programs(struct brw_context *brw) +{ + brw_upload_vs_prog(brw); - for (i = 0; i < 32; i++) { - if (bit_map[i].bit == 0) - return; + if (brw->gen < 6) + brw_upload_ff_gs_prog(brw); + else + brw_upload_gs_prog(brw); - fprintf(stderr, "0x%08x: %12d (%s)\n", - bit_map[i].bit, bit_map[i].count, bit_map[i].name); - } + brw_upload_wm_prog(brw); } /*********************************************************************** @@ -576,8 +585,7 @@ void brw_upload_state(struct brw_context *brw) if (0) { /* Always re-emit all state. */ state->mesa |= ~0; - state->brw |= ~0; - state->cache |= ~0; + state->brw |= ~0ull; } if (brw->fragment_program != ctx->FragmentProgram._Current) { @@ -600,10 +608,19 @@ void brw_upload_state(struct brw_context *brw) brw->state.dirty.brw |= BRW_NEW_META_IN_PROGRESS; } - if ((state->mesa | state->cache | state->brw) == 0) + if (brw->num_samples != ctx->DrawBuffer->Visual.samples) { + brw->num_samples = ctx->DrawBuffer->Visual.samples; + brw->state.dirty.brw |= BRW_NEW_NUM_SAMPLES; + } + + if ((state->mesa | state->brw) == 0) return; - intel_check_front_buffer_rendering(brw); + /* Emit Sandybridge workaround flushes on every primitive, for safety. */ + if (brw->gen == 6) + intel_emit_post_sync_nonzero_flush(brw); + + brw_upload_programs(brw); if (unlikely(INTEL_DEBUG)) { /* Debug version which enforces various sanity checks on the @@ -615,7 +632,7 @@ void brw_upload_state(struct brw_context *brw) prev = *state; for (i = 0; i < brw->num_atoms; i++) { - const struct brw_tracked_state *atom = brw->atoms[i]; + const struct brw_tracked_state *atom = &brw->atoms[i]; struct brw_state_flags generated; if (check_state(state, &atom->dirty)) { @@ -635,7 +652,7 @@ void brw_upload_state(struct brw_context *brw) } else { for (i = 0; i < brw->num_atoms; i++) { - const struct brw_tracked_state *atom = brw->atoms[i]; + const struct brw_tracked_state *atom = &brw->atoms[i]; if (check_state(state, &atom->dirty)) { atom->emit(brw); @@ -645,15 +662,12 @@ void brw_upload_state(struct brw_context *brw) if (unlikely(INTEL_DEBUG & DEBUG_STATE)) { STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1); - STATIC_ASSERT(ARRAY_SIZE(cache_bits) == BRW_MAX_CACHE + 1); brw_update_dirty_count(mesa_bits, state->mesa); brw_update_dirty_count(brw_bits, state->brw); - brw_update_dirty_count(cache_bits, state->cache); if (dirty_count++ % 1000 == 0) { brw_print_dirty_count(mesa_bits); brw_print_dirty_count(brw_bits); - brw_print_dirty_count(cache_bits); fprintf(stderr, "\n"); } }