From: Marek Olšák Date: Mon, 15 Sep 2014 21:34:28 +0000 (+0200) Subject: radeonsi: rework how shader pointers to descriptors are set X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3ce91c727f2a00a05f414351266b0b45d677611e;p=mesa.git radeonsi: rework how shader pointers to descriptors are set This is mainly needed for tessellation where a VS can be bound as VS, ES, or LS, and TES (tess. evaluationshader) can be bound as VS or ES or neither. Therefore we need the ability to move pointers to descriptors between shaders arbitrarily. The idea is that the context has a mapping from PIPE_SHADER_x to SPI_SHADER_USER_DATA_x. After a shader is enabled or disabled, si_shader_change_notify should be called to update this mapping accordingly. There is a dirty flag for each shader pointer, but only one emit function for all pointers in the whole context, whose code and logic is separated from descriptors. Reviewed-by: Michel Dänzer --- diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index bbfd36dcbeb..f31cccbba17 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -141,7 +141,7 @@ static void si_emit_cp_dma_clear_buffer(struct si_context *sctx, static void si_init_descriptors(struct si_context *sctx, struct si_descriptors *desc, - unsigned shader_userdata_reg, + unsigned shader_userdata_index, unsigned element_dw_size, unsigned num_elements, void (*emit_func)(struct si_context *ctx, struct r600_atom *state)) @@ -150,7 +150,7 @@ static void si_init_descriptors(struct si_context *sctx, assert(num_elements <= sizeof(desc->dirty_mask)*8); desc->atom.emit = (void*)emit_func; - desc->shader_userdata_reg = shader_userdata_reg; + desc->shader_userdata_offset = shader_userdata_index * 4; desc->element_dw_size = element_dw_size; desc->num_elements = num_elements; desc->context_size = num_elements * element_dw_size * 4; @@ -181,14 +181,11 @@ static void si_update_descriptors(struct si_context *sctx, if (desc->dirty_mask) { desc->atom.num_dw = 7 + /* copy */ - (4 + desc->element_dw_size) * util_bitcount64(desc->dirty_mask) + /* update */ - 4; /* pointer update */ - - if (desc->shader_userdata_reg >= R_00B130_SPI_SHADER_USER_DATA_VS_0 && - desc->shader_userdata_reg < R_00B230_SPI_SHADER_USER_DATA_GS_0) - desc->atom.num_dw += 4; /* second pointer update */ + (4 + desc->element_dw_size) * util_bitcount(desc->dirty_mask); /* update */ desc->atom.dirty = true; + desc->pointer_dirty = true; + sctx->shader_userdata.atom.dirty = true; /* TODO: Investigate if these flushes can be removed after * adding CE support. */ @@ -206,32 +203,6 @@ static void si_update_descriptors(struct si_context *sctx, } } -static void si_emit_shader_pointer(struct si_context *sctx, - struct r600_atom *atom) -{ - struct si_descriptors *desc = (struct si_descriptors*)atom; - struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; - uint64_t va = desc->buffer->gpu_address + - desc->current_context_id * desc->context_size + - desc->buffer_offset; - - radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0)); - radeon_emit(cs, (desc->shader_userdata_reg - SI_SH_REG_OFFSET) >> 2); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - - if (desc->shader_userdata_reg >= R_00B130_SPI_SHADER_USER_DATA_VS_0 && - desc->shader_userdata_reg < R_00B230_SPI_SHADER_USER_DATA_GS_0) { - radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0)); - radeon_emit(cs, (desc->shader_userdata_reg + - (R_00B330_SPI_SHADER_USER_DATA_ES_0 - - R_00B130_SPI_SHADER_USER_DATA_VS_0) - - SI_SH_REG_OFFSET) >> 2); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - } -} - static void si_emit_descriptors(struct si_context *sctx, struct si_descriptors *desc, uint32_t **descriptors) @@ -295,24 +266,6 @@ static void si_emit_descriptors(struct si_context *sctx, desc->dirty_mask = 0; desc->current_context_id = new_context_id; - - /* Now update the shader userdata pointer. */ - si_emit_shader_pointer(sctx, &desc->atom); -} - -static unsigned si_get_shader_user_data_base(unsigned shader) -{ - switch (shader) { - case PIPE_SHADER_VERTEX: - return R_00B130_SPI_SHADER_USER_DATA_VS_0; - case PIPE_SHADER_GEOMETRY: - return R_00B230_SPI_SHADER_USER_DATA_GS_0; - case PIPE_SHADER_FRAGMENT: - return R_00B030_SPI_SHADER_USER_DATA_PS_0; - default: - assert(0); - return 0; - } } /* SAMPLER VIEWS */ @@ -325,14 +278,11 @@ static void si_emit_sampler_views(struct si_context *sctx, struct r600_atom *ato } static void si_init_sampler_views(struct si_context *sctx, - struct si_sampler_views *views, - unsigned shader) + struct si_sampler_views *views) { int i; - si_init_descriptors(sctx, &views->desc, - si_get_shader_user_data_base(shader) + - SI_SGPR_RESOURCE * 4, + si_init_descriptors(sctx, &views->desc, SI_SGPR_RESOURCE, 8, SI_NUM_SAMPLER_VIEWS, si_emit_sampler_views); for (i = 0; i < views->desc.num_elements; i++) { @@ -384,8 +334,6 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx, r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, views->desc.buffer, RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA); - - si_emit_shader_pointer(sctx, &views->desc.atom); } static void si_set_sampler_view(struct si_context *sctx, unsigned shader, @@ -493,7 +441,6 @@ static void si_sampler_states_begin_new_cs(struct si_context *sctx, { r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, states->desc.buffer, RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA); - si_emit_shader_pointer(sctx, &states->desc.atom); } void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader, @@ -536,7 +483,7 @@ static void si_emit_buffer_resources(struct si_context *sctx, struct r600_atom * static void si_init_buffer_resources(struct si_context *sctx, struct si_buffer_resources *buffers, - unsigned num_buffers, unsigned shader, + unsigned num_buffers, unsigned shader_userdata_index, enum radeon_bo_usage shader_usage, enum radeon_bo_priority priority) @@ -556,10 +503,8 @@ static void si_init_buffer_resources(struct si_context *sctx, buffers->desc_data[i] = &buffers->desc_storage[i*4]; } - si_init_descriptors(sctx, &buffers->desc, - si_get_shader_user_data_base(shader) + - shader_userdata_index*4, 4, num_buffers, - si_emit_buffer_resources); + si_init_descriptors(sctx, &buffers->desc, shader_userdata_index, 4, + num_buffers, si_emit_buffer_resources); } static void si_release_buffer_resources(struct si_buffer_resources *buffers) @@ -593,8 +538,6 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx, r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, buffers->desc.buffer, RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA); - - si_emit_shader_pointer(sctx, &buffers->desc.atom); } /* VERTEX BUFFERS */ @@ -620,8 +563,6 @@ static void si_vertex_buffers_begin_new_cs(struct si_context *sctx) r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, desc->buffer, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); - - si_emit_shader_pointer(sctx, &desc->atom); } void si_update_vertex_buffers(struct si_context *sctx) @@ -693,13 +634,12 @@ void si_update_vertex_buffers(struct si_context *sctx) } } - desc->atom.num_dw = 8; /* update 2 shader pointers (VS+ES) */ - desc->atom.dirty = true; - /* Don't flush the const cache. It would have a very negative effect * on performance (confirmed by testing). New descriptors are always * uploaded to a fresh new buffer, so I don't think flushing the const * cache is needed. */ + desc->pointer_dirty = true; + sctx->shader_userdata.atom.dirty = true; } @@ -1277,6 +1217,112 @@ void si_copy_buffer(struct si_context *sctx, r600_resource(dst)->TC_L2_dirty = true; } +/* SHADER USER DATA */ + +static void si_mark_shader_pointers_dirty(struct si_context *sctx, + unsigned shader) +{ + sctx->const_buffers[shader].desc.pointer_dirty = true; + sctx->rw_buffers[shader].desc.pointer_dirty = true; + sctx->samplers[shader].views.desc.pointer_dirty = true; + sctx->samplers[shader].states.desc.pointer_dirty = true; + + if (shader == PIPE_SHADER_VERTEX) + sctx->vertex_buffers.pointer_dirty = true; + + sctx->shader_userdata.atom.dirty = true; +} + +static void si_shader_userdata_begin_new_cs(struct si_context *sctx) +{ + int i; + + for (i = 0; i < SI_NUM_SHADERS; i++) { + si_mark_shader_pointers_dirty(sctx, i); + } +} + +/* Set a base register address for user data constants in the given shader. + * This assigns a mapping from PIPE_SHADER_* to SPI_SHADER_USER_DATA_*. + */ +static void si_set_user_data_base(struct si_context *sctx, + unsigned shader, uint32_t new_base) +{ + uint32_t *base = &sctx->shader_userdata.sh_base[shader]; + + if (*base != new_base) { + *base = new_base; + + if (new_base) + si_mark_shader_pointers_dirty(sctx, shader); + } +} + +/* This must be called when these shaders are changed from non-NULL to NULL + * and vice versa: + * - geometry shader + * - tessellation control shader + * - tessellation evaluation shader + */ +void si_shader_change_notify(struct si_context *sctx) +{ + /* VS can be bound as VS or ES. */ + if (sctx->gs_shader) + si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, + R_00B330_SPI_SHADER_USER_DATA_ES_0); + else + si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, + R_00B130_SPI_SHADER_USER_DATA_VS_0); +} + +static void si_emit_shader_pointer(struct si_context *sctx, + struct si_descriptors *desc, + unsigned sh_base, bool keep_dirty) +{ + struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; + uint64_t va; + + if (!desc->pointer_dirty) + return; + + va = desc->buffer->gpu_address + + desc->current_context_id * desc->context_size + + desc->buffer_offset; + + radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0)); + radeon_emit(cs, (sh_base + desc->shader_userdata_offset - SI_SH_REG_OFFSET) >> 2); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + + desc->pointer_dirty = keep_dirty; +} + +static void si_emit_shader_userdata(struct si_context *sctx, + struct r600_atom *atom) +{ + unsigned i; + uint32_t *sh_base = sctx->shader_userdata.sh_base; + + /* The VS copy shader needs these for clipping, streamout, and rings. */ + if (sctx->gs_shader) { + unsigned base = R_00B130_SPI_SHADER_USER_DATA_VS_0; + unsigned i = PIPE_SHADER_VERTEX; + + si_emit_shader_pointer(sctx, &sctx->const_buffers[i].desc, base, true); + si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc, base, true); + } + + for (i = 0; i < SI_NUM_SHADERS; i++) { + unsigned base = sh_base[i]; + + si_emit_shader_pointer(sctx, &sctx->const_buffers[i].desc, base, false); + si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc, base, false); + si_emit_shader_pointer(sctx, &sctx->samplers[i].views.desc, base, false); + si_emit_shader_pointer(sctx, &sctx->samplers[i].states.desc, base, false); + } + si_emit_shader_pointer(sctx, &sctx->vertex_buffers, sh_base[PIPE_SHADER_VERTEX], false); +} + /* INIT/DEINIT */ void si_init_all_descriptors(struct si_context *sctx) @@ -1285,19 +1331,17 @@ void si_init_all_descriptors(struct si_context *sctx) for (i = 0; i < SI_NUM_SHADERS; i++) { si_init_buffer_resources(sctx, &sctx->const_buffers[i], - SI_NUM_CONST_BUFFERS, i, SI_SGPR_CONST, + SI_NUM_CONST_BUFFERS, SI_SGPR_CONST, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO); si_init_buffer_resources(sctx, &sctx->rw_buffers[i], - i == PIPE_SHADER_VERTEX ? - SI_NUM_RW_BUFFERS : SI_NUM_RING_BUFFERS, - i, SI_SGPR_RW_BUFFERS, + SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS, RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW); - si_init_sampler_views(sctx, &sctx->samplers[i].views, i); + si_init_sampler_views(sctx, &sctx->samplers[i].views); si_init_descriptors(sctx, &sctx->samplers[i].states.desc, - si_get_shader_user_data_base(i) + SI_SGPR_SAMPLER * 4, - 4, SI_NUM_SAMPLER_STATES, si_emit_sampler_states); + SI_SGPR_SAMPLER, 4, SI_NUM_SAMPLER_STATES, + si_emit_sampler_states); sctx->atoms.s.const_buffers[i] = &sctx->const_buffers[i].desc.atom; sctx->atoms.s.rw_buffers[i] = &sctx->rw_buffers[i].desc.atom; @@ -1305,11 +1349,8 @@ void si_init_all_descriptors(struct si_context *sctx) sctx->atoms.s.sampler_states[i] = &sctx->samplers[i].states.desc.atom; } - si_init_descriptors(sctx, &sctx->vertex_buffers, - si_get_shader_user_data_base(PIPE_SHADER_VERTEX) + - SI_SGPR_VERTEX_BUFFER*4, 4, SI_NUM_VERTEX_BUFFERS, - si_emit_shader_pointer); - sctx->atoms.s.vertex_buffers = &sctx->vertex_buffers.atom; + si_init_descriptors(sctx, &sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFER, + 4, SI_NUM_VERTEX_BUFFERS, NULL); /* Set pipe_context functions. */ sctx->b.b.set_constant_buffer = si_set_constant_buffer; @@ -1317,6 +1358,18 @@ void si_init_all_descriptors(struct si_context *sctx) sctx->b.b.set_stream_output_targets = si_set_streamout_targets; sctx->b.clear_buffer = si_clear_buffer; sctx->b.invalidate_buffer = si_invalidate_buffer; + + /* Shader user data. */ + sctx->atoms.s.shader_userdata = &sctx->shader_userdata.atom; + sctx->shader_userdata.atom.emit = (void*)si_emit_shader_userdata; + + /* Upper bound, 4 pointers per shader, +1 for vertex buffers, +2 for the VS copy shader. */ + sctx->shader_userdata.atom.num_dw = (SI_NUM_SHADERS * 4 + 1 + 2) * 4; + + /* Set default and immutable mappings. */ + si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0); + si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY, R_00B230_SPI_SHADER_USER_DATA_GS_0); + si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, R_00B030_SPI_SHADER_USER_DATA_PS_0); } void si_release_all_descriptors(struct si_context *sctx) @@ -1343,4 +1396,5 @@ void si_all_descriptors_begin_new_cs(struct si_context *sctx) si_sampler_states_begin_new_cs(sctx, &sctx->samplers[i].states); } si_vertex_buffers_begin_new_cs(sctx); + si_shader_userdata_begin_new_cs(sctx); } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 266eeef61e9..11900105d82 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -125,8 +125,6 @@ struct si_framebuffer { #define SI_NUM_ATOMS(sctx) (sizeof((sctx)->atoms)/sizeof((sctx)->atoms.array[0])) -#define SI_NUM_SHADERS (PIPE_SHADER_GEOMETRY+1) - struct si_context { struct r600_common_context b; struct blitter_context *blitter; @@ -142,7 +140,6 @@ struct si_context { union { struct { /* The order matters. */ - struct r600_atom *vertex_buffers; struct r600_atom *const_buffers[SI_NUM_SHADERS]; struct r600_atom *rw_buffers[SI_NUM_SHADERS]; struct r600_atom *sampler_views[SI_NUM_SHADERS]; @@ -157,6 +154,7 @@ struct si_context { struct r600_atom *db_render_state; struct r600_atom *msaa_config; struct r600_atom *clip_regs; + struct r600_atom *shader_userdata; } s; struct r600_atom *array[0]; } atoms; @@ -170,6 +168,7 @@ struct si_context { struct si_shader_selector *gs_shader; struct si_shader_selector *vs_shader; struct si_cs_shader_state cs_shader_state; + struct si_shader_data shader_userdata; /* shader information */ unsigned sprite_coord_enable; bool flatshade; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 0c1fdb41408..1510c2284d6 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -30,6 +30,8 @@ #include "si_pm4.h" #include "radeon/r600_pipe_common.h" +#define SI_NUM_SHADERS (PIPE_SHADER_GEOMETRY+1) + struct si_screen; struct si_shader; @@ -111,6 +113,11 @@ union si_state { struct si_pm4_state *array[0]; }; +struct si_shader_data { + struct r600_atom atom; + uint32_t sh_base[SI_NUM_SHADERS]; +}; + #define SI_NUM_USER_SAMPLERS 16 /* AKA OpenGL textures units per shader */ #define SI_POLY_STIPPLE_SAMPLER SI_NUM_USER_SAMPLERS #define SI_NUM_SAMPLERS (SI_POLY_STIPPLE_SAMPLER + 1) @@ -172,9 +179,10 @@ struct si_descriptors { /* The size of a context, should be equal to 4*element_dw_size*num_elements. */ unsigned context_size; - /* The shader userdata register where the 64-bit pointer to the descriptor + /* The shader userdata offset within a shader where the 64-bit pointer to the descriptor * array will be stored. */ - unsigned shader_userdata_reg; + unsigned shader_userdata_offset; + bool pointer_dirty; }; struct si_sampler_views { @@ -246,6 +254,7 @@ void si_copy_buffer(struct si_context *sctx, uint64_t dst_offset, uint64_t src_offset, unsigned size, bool is_framebuffer); void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer, const uint8_t *ptr, unsigned size, uint32_t *const_offset); +void si_shader_change_notify(struct si_context *sctx); /* si_state.c */ struct si_shader_selector; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index e85ed15deb7..abfae1c8ba3 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -249,8 +249,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_index_buffer *ib) { struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; - unsigned sh_base_reg = (sctx->gs_shader ? R_00B330_SPI_SHADER_USER_DATA_ES_0 : - R_00B130_SPI_SHADER_USER_DATA_VS_0); + unsigned sh_base_reg = sctx->shader_userdata.sh_base[PIPE_SHADER_VERTEX]; if (info->count_from_stream_output) { struct r600_so_target *t = diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index b153228fb2c..23aee3864f8 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -533,6 +533,7 @@ static void si_bind_gs_shader(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; struct si_shader_selector *sel = state; + bool enable_changed = !!sctx->gs_shader != !!sel; if (sctx->gs_shader == sel) return; @@ -540,6 +541,9 @@ static void si_bind_gs_shader(struct pipe_context *ctx, void *state) sctx->gs_shader = sel; sctx->clip_regs.dirty = true; sctx->last_rast_prim = -1; /* reset this so that it gets updated */ + + if (enable_changed) + si_shader_change_notify(sctx); } static void si_make_dummy_ps(struct si_context *sctx)