And mark the constant buffers as read only for the GPU again.
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
unsigned element_size, unsigned index_stride)
{
struct si_context *sctx = (struct si_context *)ctx;
- struct si_buffer_resources *buffers = &sctx->const_buffers[shader];
+ struct si_buffer_resources *buffers = &sctx->rw_buffers[shader];
if (shader >= SI_NUM_SHADERS)
return;
unsigned append_bitmask)
{
struct si_context *sctx = (struct si_context *)ctx;
- struct si_buffer_resources *buffers = &sctx->streamout_buffers;
+ struct si_buffer_resources *buffers = &sctx->rw_buffers[PIPE_SHADER_VERTEX];
unsigned old_num_targets = sctx->b.streamout.num_targets;
- unsigned i;
+ unsigned i, bufidx;
/* Streamout buffers must be bound in 2 places:
* 1) in VGT by setting the VGT_STRMOUT registers
/* Set the shader resources.*/
for (i = 0; i < num_targets; i++) {
+ bufidx = SI_RW_SO + i;
+
if (targets[i]) {
struct pipe_resource *buffer = targets[i]->buffer;
uint64_t va = r600_resource_va(ctx->screen, buffer);
/* Set the descriptor. */
- uint32_t *desc = buffers->desc_data[i];
+ uint32_t *desc = buffers->desc_data[bufidx];
desc[0] = va;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
desc[2] = 0xffffffff;
S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
/* Set the resource. */
- pipe_resource_reference(&buffers->buffers[i], buffer);
+ pipe_resource_reference(&buffers->buffers[bufidx],
+ buffer);
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
(struct r600_resource*)buffer,
buffers->shader_usage);
- buffers->desc.enabled_mask |= 1 << i;
+ buffers->desc.enabled_mask |= 1 << bufidx;
} else {
/* Clear the descriptor and unset the resource. */
- memset(buffers->desc_data[i], 0, sizeof(uint32_t) * 4);
- pipe_resource_reference(&buffers->buffers[i], NULL);
- buffers->desc.enabled_mask &= ~(1 << i);
+ memset(buffers->desc_data[bufidx], 0,
+ sizeof(uint32_t) * 4);
+ pipe_resource_reference(&buffers->buffers[bufidx],
+ NULL);
+ buffers->desc.enabled_mask &= ~(1 << bufidx);
}
- buffers->desc.dirty_mask |= 1 << i;
+ buffers->desc.dirty_mask |= 1 << bufidx;
}
for (; i < old_num_targets; i++) {
+ bufidx = SI_RW_SO + i;
/* Clear the descriptor and unset the resource. */
- memset(buffers->desc_data[i], 0, sizeof(uint32_t) * 4);
- pipe_resource_reference(&buffers->buffers[i], NULL);
- buffers->desc.enabled_mask &= ~(1 << i);
- buffers->desc.dirty_mask |= 1 << i;
+ memset(buffers->desc_data[bufidx], 0, sizeof(uint32_t) * 4);
+ pipe_resource_reference(&buffers->buffers[bufidx], NULL);
+ buffers->desc.enabled_mask &= ~(1 << bufidx);
+ buffers->desc.dirty_mask |= 1 << bufidx;
}
si_update_descriptors(sctx, &buffers->desc);
/* Vertex buffers. */
/* Nothing to do. Vertex buffer bindings are updated before every draw call. */
- /* Streamout buffers. */
- for (i = 0; i < sctx->streamout_buffers.num_buffers; i++) {
- if (sctx->streamout_buffers.buffers[i] == buf) {
- /* Update the descriptor. */
- si_desc_reset_buffer_offset(ctx, sctx->streamout_buffers.desc_data[i],
- old_va, buf);
+ /* Read/Write buffers. */
+ for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
+ struct si_buffer_resources *buffers = &sctx->rw_buffers[shader];
+ bool found = false;
+ uint32_t mask = buffers->desc.enabled_mask;
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
- (struct r600_resource*)buf,
- sctx->streamout_buffers.shader_usage);
- sctx->streamout_buffers.desc.dirty_mask |= 1 << i;
- si_update_descriptors(sctx, &sctx->streamout_buffers.desc);
-
- /* Update the streamout state. */
- if (sctx->b.streamout.begin_emitted) {
- r600_emit_streamout_end(&sctx->b);
+ while (mask) {
+ i = u_bit_scan(&mask);
+ if (buffers->buffers[i] == buf) {
+ si_desc_reset_buffer_offset(ctx, buffers->desc_data[i],
+ old_va, buf);
+
+ r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ rbuffer, buffers->shader_usage);
+
+ buffers->desc.dirty_mask |= 1 << i;
+ found = true;
+
+ if (i >= SI_RW_SO && shader == PIPE_SHADER_VERTEX) {
+ /* Update the streamout state. */
+ if (sctx->b.streamout.begin_emitted) {
+ r600_emit_streamout_end(&sctx->b);
+ }
+ sctx->b.streamout.append_bitmask =
+ sctx->b.streamout.enabled_mask;
+ r600_streamout_buffers_dirty(&sctx->b);
+ }
}
- sctx->b.streamout.append_bitmask = sctx->b.streamout.enabled_mask;
- r600_streamout_buffers_dirty(&sctx->b);
+ }
+ if (found) {
+ si_update_descriptors(sctx, &buffers->desc);
}
}
for (i = 0; i < SI_NUM_SHADERS; i++) {
si_init_buffer_resources(sctx, &sctx->const_buffers[i],
NUM_CONST_BUFFERS, i, SI_SGPR_CONST,
+ RADEON_USAGE_READ);
+ si_init_buffer_resources(sctx, &sctx->rw_buffers[i],
+ i == PIPE_SHADER_VERTEX ?
+ SI_RW_SO + 4 : SI_RW_SO,
+ i, SI_SGPR_RW_BUFFERS,
RADEON_USAGE_READWRITE);
si_init_sampler_views(sctx, &sctx->samplers[i].views, i);
sctx->atoms.const_buffers[i] = &sctx->const_buffers[i].desc.atom;
+ sctx->atoms.rw_buffers[i] = &sctx->rw_buffers[i].desc.atom;
sctx->atoms.sampler_views[i] = &sctx->samplers[i].views.desc.atom;
}
- si_init_buffer_resources(sctx, &sctx->streamout_buffers, 4, PIPE_SHADER_VERTEX,
- SI_SGPR_SO_BUFFER, RADEON_USAGE_WRITE);
- sctx->atoms.streamout_buffers = &sctx->streamout_buffers.desc.atom;
/* Set pipe_context functions. */
sctx->b.b.set_constant_buffer = si_set_constant_buffer;
for (i = 0; i < SI_NUM_SHADERS; i++) {
si_release_buffer_resources(&sctx->const_buffers[i]);
+ si_release_buffer_resources(&sctx->rw_buffers[i]);
si_release_sampler_views(&sctx->samplers[i].views);
}
- si_release_buffer_resources(&sctx->streamout_buffers);
}
void si_all_descriptors_begin_new_cs(struct si_context *sctx)
for (i = 0; i < SI_NUM_SHADERS; i++) {
si_buffer_resources_begin_new_cs(sctx, &sctx->const_buffers[i]);
+ si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers[i]);
si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views);
}
- si_buffer_resources_begin_new_cs(sctx, &sctx->streamout_buffers);
}
#define SI_NUM_SHADERS (PIPE_SHADER_GEOMETRY+1)
+#define SI_RW_SO 2 /* Streamout buffer descriptors after ring buffers */
+
struct si_context {
struct r600_common_context b;
struct blitter_context *blitter;
struct {
/* The order matters. */
struct r600_atom *const_buffers[SI_NUM_SHADERS];
+ struct r600_atom *rw_buffers[SI_NUM_SHADERS];
struct r600_atom *sampler_views[SI_NUM_SHADERS];
- struct r600_atom *streamout_buffers;
/* Caches must be flushed after resource descriptors are
* updated in memory. */
struct r600_atom *cache_flush;
unsigned sprite_coord_enable;
unsigned export_16bpc;
struct si_buffer_resources const_buffers[SI_NUM_SHADERS];
- struct si_buffer_resources streamout_buffers;
+ struct si_buffer_resources rw_buffers[SI_NUM_SHADERS];
struct si_textures_info samplers[SI_NUM_SHADERS];
struct r600_resource *border_color_table;
unsigned border_color_offset;
4);
/* Load the ESGS ring resource descriptor */
- t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+ t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ SI_PARAM_RW_BUFFERS);
t_list = build_indexed_load(si_shader_ctx, t_list_ptr,
lp_build_const_int32(gallivm, SI_RING_ESGS));
}
/* Load the ESGS ring resource descriptor */
- t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+ t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ SI_PARAM_RW_BUFFERS);
t_list = build_indexed_load(si_shader_ctx, t_list_ptr,
lp_build_const_int32(gallivm, SI_RING_ESGS));
int i;
/* Load the GSVS ring resource descriptor */
- t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+ t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ SI_PARAM_RW_BUFFERS);
t_list = build_indexed_load(si_shader_ctx, t_list_ptr,
lp_build_const_int32(gallivm, SI_RING_GSVS));
struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
struct gallivm_state *gallivm = bld_base->base.gallivm;
struct si_pipe_shader *shader = si_shader_ctx->shader;
- LLVMTypeRef params[21], f32, i8, i32, v2i32, v3i32;
+ LLVMTypeRef params[SI_NUM_PARAMS], f32, i8, i32, v2i32, v3i32;
unsigned i, last_sgpr, num_params;
i8 = LLVMInt8TypeInContext(gallivm->context);
params[SI_PARAM_CONST] = LLVMPointerType(
LLVMArrayType(LLVMVectorType(i8, 16), NUM_CONST_BUFFERS), CONST_ADDR_SPACE);
+ params[SI_PARAM_RW_BUFFERS] = params[SI_PARAM_CONST];
+
/* We assume at most 16 textures per program at the moment.
* This need probably need to be changed to support bindless textures */
params[SI_PARAM_SAMPLER] = LLVMPointerType(
switch (si_shader_ctx->type) {
case TGSI_PROCESSOR_VERTEX:
params[SI_PARAM_VERTEX_BUFFER] = params[SI_PARAM_CONST];
- params[SI_PARAM_SO_BUFFER] = params[SI_PARAM_CONST];
params[SI_PARAM_START_INSTANCE] = i32;
num_params = SI_PARAM_START_INSTANCE+1;
if (shader->key.vs.as_es) {
return;
LLVMValueRef buf_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
- SI_PARAM_SO_BUFFER);
+ SI_PARAM_RW_BUFFERS);
/* Load the resources, we rely on the code sinking to do the rest */
for (i = 0; i < 4; ++i) {
if (si_shader_ctx->shader->selector->so.stride[i]) {
- LLVMValueRef offset = lp_build_const_int32(gallivm, i);
+ LLVMValueRef offset = lp_build_const_int32(gallivm,
+ SI_RW_SO + i);
si_shader_ctx->so_buffers[i] = build_indexed_load(si_shader_ctx, buf_ptr, offset);
}
preload_streamout_buffers(si_shader_ctx);
/* Load the GSVS ring resource descriptor */
- t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+ t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ SI_PARAM_RW_BUFFERS);
t_list = build_indexed_load(si_shader_ctx, t_list_ptr,
lp_build_const_int32(gallivm, SI_RING_GSVS));
#define SI_SGPR_CONST 0
#define SI_SGPR_SAMPLER 2
#define SI_SGPR_RESOURCE 4
-#define SI_SGPR_VERTEX_BUFFER 6 /* VS only */
-#define SI_SGPR_SO_BUFFER 8 /* VS only, stream-out */
+#define SI_SGPR_RW_BUFFERS 6 /* rings (& stream-out, VS only) */
+#define SI_SGPR_VERTEX_BUFFER 8 /* VS only */
#define SI_SGPR_START_INSTANCE 10 /* VS only */
-#define SI_SGPR_ALPHA_REF 6 /* PS only */
+#define SI_SGPR_ALPHA_REF 8 /* PS only */
#define SI_VS_NUM_USER_SGPR 11
-#define SI_GS_NUM_USER_SGPR 6
-#define SI_PS_NUM_USER_SGPR 7
+#define SI_GS_NUM_USER_SGPR 8
+#define SI_PS_NUM_USER_SGPR 9
/* LLVM function parameter indices */
#define SI_PARAM_CONST 0
#define SI_PARAM_SAMPLER 1
#define SI_PARAM_RESOURCE 2
+#define SI_PARAM_RW_BUFFERS 3
/* VS only parameters */
-#define SI_PARAM_VERTEX_BUFFER 3
-#define SI_PARAM_SO_BUFFER 4
+#define SI_PARAM_VERTEX_BUFFER 4
#define SI_PARAM_START_INSTANCE 5
/* the other VS parameters are assigned dynamically */
#define SI_PARAM_ES2GS_OFFSET 6
/* GS only parameters */
-#define SI_PARAM_GS2VS_OFFSET 3
-#define SI_PARAM_GS_WAVE_ID 4
-#define SI_PARAM_VTX0_OFFSET 5
-#define SI_PARAM_VTX1_OFFSET 6
-#define SI_PARAM_PRIMITIVE_ID 7
-#define SI_PARAM_VTX2_OFFSET 8
-#define SI_PARAM_VTX3_OFFSET 9
-#define SI_PARAM_VTX4_OFFSET 10
-#define SI_PARAM_VTX5_OFFSET 11
-#define SI_PARAM_GS_INSTANCE_ID 12
+#define SI_PARAM_GS2VS_OFFSET 4
+#define SI_PARAM_GS_WAVE_ID 5
+#define SI_PARAM_VTX0_OFFSET 6
+#define SI_PARAM_VTX1_OFFSET 7
+#define SI_PARAM_PRIMITIVE_ID 8
+#define SI_PARAM_VTX2_OFFSET 9
+#define SI_PARAM_VTX3_OFFSET 10
+#define SI_PARAM_VTX4_OFFSET 11
+#define SI_PARAM_VTX5_OFFSET 12
+#define SI_PARAM_GS_INSTANCE_ID 13
/* PS only parameters */
-#define SI_PARAM_ALPHA_REF 3
-#define SI_PARAM_PRIM_MASK 4
-#define SI_PARAM_PERSP_SAMPLE 5
-#define SI_PARAM_PERSP_CENTER 6
-#define SI_PARAM_PERSP_CENTROID 7
-#define SI_PARAM_PERSP_PULL_MODEL 8
-#define SI_PARAM_LINEAR_SAMPLE 9
-#define SI_PARAM_LINEAR_CENTER 10
-#define SI_PARAM_LINEAR_CENTROID 11
-#define SI_PARAM_LINE_STIPPLE_TEX 12
-#define SI_PARAM_POS_X_FLOAT 13
-#define SI_PARAM_POS_Y_FLOAT 14
-#define SI_PARAM_POS_Z_FLOAT 15
-#define SI_PARAM_POS_W_FLOAT 16
-#define SI_PARAM_FRONT_FACE 17
-#define SI_PARAM_ANCILLARY 18
-#define SI_PARAM_SAMPLE_COVERAGE 19
-#define SI_PARAM_POS_FIXED_PT 20
+#define SI_PARAM_ALPHA_REF 4
+#define SI_PARAM_PRIM_MASK 5
+#define SI_PARAM_PERSP_SAMPLE 6
+#define SI_PARAM_PERSP_CENTER 7
+#define SI_PARAM_PERSP_CENTROID 8
+#define SI_PARAM_PERSP_PULL_MODEL 9
+#define SI_PARAM_LINEAR_SAMPLE 10
+#define SI_PARAM_LINEAR_CENTER 11
+#define SI_PARAM_LINEAR_CENTROID 12
+#define SI_PARAM_LINE_STIPPLE_TEX 13
+#define SI_PARAM_POS_X_FLOAT 14
+#define SI_PARAM_POS_Y_FLOAT 15
+#define SI_PARAM_POS_Z_FLOAT 16
+#define SI_PARAM_POS_W_FLOAT 17
+#define SI_PARAM_FRONT_FACE 18
+#define SI_PARAM_ANCILLARY 19
+#define SI_PARAM_SAMPLE_COVERAGE 20
+#define SI_PARAM_POS_FIXED_PT 21
+
+#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1)
struct si_shader_input {
unsigned name;
#define NUM_SAMPLER_STATES NUM_TEX_UNITS
#define NUM_PIPE_CONST_BUFFERS 16
-#define SI_RING_ESGS 17
-#define SI_RING_GSVS 18
-#define NUM_CONST_BUFFERS (SI_RING_GSVS + 1)
+#define NUM_CONST_BUFFERS (NUM_PIPE_CONST_BUFFERS + 1)
+#define SI_RING_ESGS 0
+#define SI_RING_GSVS 1
/* This represents resource descriptors in memory, such as buffer resources,
* image resources, and sampler states.