From 6b4303f358494e6c39476291c539af50d730910a Mon Sep 17 00:00:00 2001 From: Glenn Kennard Date: Sun, 5 Mar 2017 18:26:51 +0100 Subject: [PATCH] r600g: Implement scratch buffer state management (v2) v2: add Glenn's fixes Signed-off-by: Glenn Kennard Reviewed-by: Dave Airlie --- src/gallium/drivers/r600/evergreen_state.c | 24 +++++ src/gallium/drivers/r600/r600_hw_context.c | 4 + src/gallium/drivers/r600/r600_pipe.c | 3 + src/gallium/drivers/r600/r600_pipe.h | 16 ++- src/gallium/drivers/r600/r600_shader.h | 1 + src/gallium/drivers/r600/r600_state_common.c | 105 +++++++++++++++++++ 6 files changed, 152 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index fcd742c5f95..48934158bdf 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -2297,6 +2297,30 @@ static void evergreen_emit_tcs_constant_buffers(struct r600_context *rctx, struc 0); } +void evergreen_setup_scratch_buffers(struct r600_context *rctx) { + static const struct { + unsigned ring_base; + unsigned item_size; + unsigned ring_size; + } regs[EG_NUM_HW_STAGES] = { + [R600_HW_STAGE_PS] = { R_008C68_SQ_PSTMP_RING_BASE, R_028914_SQ_PSTMP_RING_ITEMSIZE, R_008C6C_SQ_PSTMP_RING_SIZE }, + [R600_HW_STAGE_VS] = { R_008C60_SQ_VSTMP_RING_BASE, R_028910_SQ_VSTMP_RING_ITEMSIZE, R_008C64_SQ_VSTMP_RING_SIZE }, + [R600_HW_STAGE_GS] = { R_008C58_SQ_GSTMP_RING_BASE, R_02890C_SQ_GSTMP_RING_ITEMSIZE, R_008C5C_SQ_GSTMP_RING_SIZE }, + [R600_HW_STAGE_ES] = { R_008C50_SQ_ESTMP_RING_BASE, R_028908_SQ_ESTMP_RING_ITEMSIZE, R_008C54_SQ_ESTMP_RING_SIZE }, + [EG_HW_STAGE_LS] = { R_008E10_SQ_LSTMP_RING_BASE, R_028830_SQ_LSTMP_RING_ITEMSIZE, R_008E14_SQ_LSTMP_RING_SIZE }, + [EG_HW_STAGE_HS] = { R_008E18_SQ_HSTMP_RING_BASE, R_028834_SQ_HSTMP_RING_ITEMSIZE, R_008E1C_SQ_HSTMP_RING_SIZE } + }; + + for (unsigned i = 0; i < EG_NUM_HW_STAGES; i++) { + struct r600_pipe_shader *stage = rctx->hw_shader_stages[i].shader; + + if (stage && unlikely(stage->scratch_space_needed)) { + r600_setup_scratch_area_for_shader(rctx, stage, + &rctx->scratch_buffers[i], regs[i].ring_base, regs[i].item_size, regs[i].ring_size); + } + } +} + static void evergreen_emit_sampler_views(struct r600_context *rctx, struct r600_samplerview_state *state, unsigned resource_id_base, unsigned pkt_flags) diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 4b6d951af6b..3ce18251044 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -415,6 +415,10 @@ void r600_begin_new_cs(struct r600_context *ctx) r600_sampler_states_dirty(ctx, &samplers->states); } + for (shader = 0; shader < ARRAY_SIZE(ctx->scratch_buffers); shader++) { + ctx->scratch_buffers[shader].dirty = true; + } + r600_postflush_resume_features(&ctx->b); /* Re-emit the draw state. */ diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 6c021e568d8..cc35d867092 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -71,6 +71,9 @@ static void r600_destroy_context(struct pipe_context *context) r600_sb_context_destroy(rctx->sb_context); + for (sh = 0; sh < (rctx->b.chip_class < EVERGREEN ? R600_NUM_HW_STAGES : EG_NUM_HW_STAGES); sh++) { + r600_resource_reference(&rctx->scratch_buffers[sh].buffer, NULL); + } r600_resource_reference(&rctx->dummy_cmask, NULL); r600_resource_reference(&rctx->dummy_fmask, NULL); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 740b50aec5e..6d090930522 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -478,6 +478,14 @@ struct r600_image_state { struct r600_image_view views[R600_MAX_IMAGES]; }; +/* Used to spill shader temps */ +struct r600_scratch_buffer { + struct r600_resource *buffer; + boolean dirty; + unsigned size; + unsigned item_size; +}; + struct r600_context { struct r600_common_context b; struct r600_screen *screen; @@ -594,6 +602,8 @@ struct r600_context { unsigned last_num_tcs_input_cp; unsigned lds_alloc; + struct r600_scratch_buffer scratch_buffers[MAX2(R600_NUM_HW_STAGES, EG_NUM_HW_STAGES)]; + /* Debug state. */ bool is_debug; struct radeon_saved_cs last_gfx; @@ -703,7 +713,7 @@ void evergreen_init_color_surface_rat(struct r600_context *rctx, struct r600_surface *surf); void evergreen_update_db_shader_control(struct r600_context * rctx); bool evergreen_adjust_gprs(struct r600_context *rctx); - +void evergreen_setup_scratch_buffers(struct r600_context *rctx); uint32_t evergreen_construct_rat_mask(struct r600_context *rctx, struct r600_cb_misc_state *a, unsigned nr_cbufs); /* r600_blit.c */ @@ -754,6 +764,7 @@ boolean r600_is_format_supported(struct pipe_screen *screen, unsigned sample_count, unsigned usage); void r600_update_db_shader_control(struct r600_context * rctx); +void r600_setup_scratch_buffers(struct r600_context *rctx); /* r600_hw_context.c */ void r600_context_gfx_flush(void *context, unsigned flags, @@ -819,6 +830,9 @@ void r600_sampler_states_dirty(struct r600_context *rctx, struct r600_sampler_states *state); void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state); void r600_set_sample_locations_constant_buffer(struct r600_context *rctx); +void r600_setup_scratch_area_for_shader(struct r600_context *rctx, + struct r600_pipe_shader *shader, struct r600_scratch_buffer *scratch, + unsigned ring_base_reg, unsigned item_size_reg, unsigned ring_size_reg); uint32_t r600_translate_stencil_op(int s_op); uint32_t r600_translate_fill(uint32_t func); unsigned r600_tex_wrap(unsigned wrap); diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 4b23facf6f8..b837d1309ae 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -180,6 +180,7 @@ struct r600_pipe_shader { unsigned db_shader_control; unsigned ps_depth_export; unsigned enabled_stream_buffers_mask; + unsigned scratch_space_needed; /* size of scratch space (if > 0) counted in vec4 */ }; /* return the table index 0-5 for TGSI_INTERPOLATE_LINEAR/PERSPECTIVE and diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index b697f2e24ed..9994f476cc2 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -1610,6 +1610,104 @@ void r600_update_compressed_resource_state(struct r600_context *rctx, bool compu } } +/* update MEM_SCRATCH buffers if needed */ +void r600_setup_scratch_area_for_shader(struct r600_context *rctx, + struct r600_pipe_shader *shader, struct r600_scratch_buffer *scratch, + unsigned ring_base_reg, unsigned item_size_reg, unsigned ring_size_reg) +{ + unsigned num_ses = rctx->screen->b.info.max_se; + unsigned num_pipes = rctx->screen->b.info.r600_max_quad_pipes; + unsigned nthreads = 128; + + unsigned itemsize = shader->scratch_space_needed * 4; + unsigned size = align(itemsize * nthreads * num_pipes * num_ses * 4, 256); + + if (scratch->dirty || + unlikely(shader->scratch_space_needed != scratch->item_size || + size > scratch->size)) { + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; + + scratch->dirty = false; + + if (size > scratch->size) { + // Release prior one if any + if (scratch->buffer) { + pipe_resource_reference((struct pipe_resource**)&scratch->buffer, NULL); + } + + scratch->buffer = (struct r600_resource *)pipe_buffer_create(rctx->b.b.screen, PIPE_BIND_CUSTOM, + PIPE_USAGE_DEFAULT, size); + if (scratch->buffer) { + scratch->size = size; + } + } + + scratch->item_size = shader->scratch_space_needed; + + radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1)); + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH)); + + // multi-SE chips need programming per SE + for (unsigned se = 0; se < num_ses; se++) { + struct r600_resource *rbuffer = scratch->buffer; + unsigned size_per_se = size / num_ses; + + // Direct to particular SE + if (num_ses > 1) { + radeon_set_config_reg(cs, EG_0802C_GRBM_GFX_INDEX, + S_0802C_INSTANCE_INDEX(0) | + S_0802C_SE_INDEX(se) | + S_0802C_INSTANCE_BROADCAST_WRITES(1) | + S_0802C_SE_BROADCAST_WRITES(0)); + } + + radeon_set_config_reg(cs, ring_base_reg, (rbuffer->gpu_address + size_per_se * se) >> 8); + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); + radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, + RADEON_USAGE_READWRITE, + RADEON_PRIO_SCRATCH_BUFFER)); + radeon_set_context_reg(cs, item_size_reg, itemsize); + radeon_set_config_reg(cs, ring_size_reg, size_per_se >> 8); + } + + // Restore broadcast mode + if (num_ses > 1) { + radeon_set_config_reg(cs, EG_0802C_GRBM_GFX_INDEX, + S_0802C_INSTANCE_INDEX(0) | + S_0802C_SE_INDEX(0) | + S_0802C_INSTANCE_BROADCAST_WRITES(1) | + S_0802C_SE_BROADCAST_WRITES(1)); + } + + radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1)); + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH)); + } +} + +void r600_setup_scratch_buffers(struct r600_context *rctx) { + static const struct { + unsigned ring_base; + unsigned item_size; + unsigned ring_size; + } regs[R600_NUM_HW_STAGES] = { + [R600_HW_STAGE_PS] = { R_008C68_SQ_PSTMP_RING_BASE, R_0288BC_SQ_PSTMP_RING_ITEMSIZE, R_008C6C_SQ_PSTMP_RING_SIZE }, + [R600_HW_STAGE_VS] = { R_008C60_SQ_VSTMP_RING_BASE, R_0288B8_SQ_VSTMP_RING_ITEMSIZE, R_008C64_SQ_VSTMP_RING_SIZE }, + [R600_HW_STAGE_GS] = { R_008C58_SQ_GSTMP_RING_BASE, R_0288B4_SQ_GSTMP_RING_ITEMSIZE, R_008C5C_SQ_GSTMP_RING_SIZE }, + [R600_HW_STAGE_ES] = { R_008C50_SQ_ESTMP_RING_BASE, R_0288B0_SQ_ESTMP_RING_ITEMSIZE, R_008C54_SQ_ESTMP_RING_SIZE } + }; + + for (unsigned i = 0; i < R600_NUM_HW_STAGES; i++) { + struct r600_pipe_shader *stage = rctx->hw_shader_stages[i].shader; + + if (stage && unlikely(stage->scratch_space_needed)) { + r600_setup_scratch_area_for_shader(rctx, stage, + &rctx->scratch_buffers[i], regs[i].ring_base, regs[i].item_size, regs[i].ring_size); + } + } +} + #define SELECT_SHADER_OR_FAIL(x) do { \ r600_shader_select(ctx, rctx->x##_shader, &x##_dirty); \ if (unlikely(!rctx->x##_shader->current)) \ @@ -1785,6 +1883,13 @@ static bool r600_update_derived_state(struct r600_context *rctx) r600_update_db_shader_control(rctx); } + /* For each shader stage that needs to spill, set up buffer for MEM_SCRATCH */ + if (rctx->b.chip_class >= EVERGREEN) { + evergreen_setup_scratch_buffers(rctx); + } else { + r600_setup_scratch_buffers(rctx); + } + /* on R600 we stuff masks + txq info into one constant buffer */ /* on evergreen we only need a txq info one */ if (rctx->ps_shader) { -- 2.30.2