* Authors:
* Marek Olšák <marek.olsak@amd.com>
*/
-#include "../radeon/r600_cs.h"
+
+/* Resource binding slots and sampler states (each described with 8 or 4 dwords)
+ * live in memory on SI.
+ *
+ * This file is responsible for managing lists of resources and sampler states
+ * in memory and binding them, which means updating those structures in memory.
+ *
+ * There is also code for updating shader pointers to resources and sampler
+ * states. CP DMA functions are here too.
+ */
+
+#include "radeon/r600_cs.h"
#include "si_pipe.h"
-#include "si_resource.h"
#include "si_shader.h"
+#include "sid.h"
#include "util/u_memory.h"
+#include "util/u_upload_mgr.h"
#define SI_NUM_CONTEXTS 16
unsigned num_elements,
void (*emit_func)(struct si_context *ctx, struct r600_atom *state))
{
- uint64_t va;
-
assert(num_elements <= sizeof(desc->enabled_mask)*8);
assert(num_elements <= sizeof(desc->dirty_mask)*8);
desc->buffer = (struct r600_resource*)
pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
- PIPE_USAGE_STATIC,
+ PIPE_USAGE_DEFAULT,
SI_NUM_CONTEXTS * desc->context_size);
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, desc->buffer, RADEON_USAGE_READWRITE);
- va = r600_resource_va(sctx->b.b.screen, &desc->buffer->b.b);
+ r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, desc->buffer,
+ RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
/* We don't check for CS space here, because this should be called
* only once at context initialization. */
- si_emit_cp_dma_clear_buffer(sctx, va, desc->buffer->b.b.width0, 0,
+ si_emit_cp_dma_clear_buffer(sctx, desc->buffer->gpu_address,
+ desc->buffer->b.b.width0, 0,
R600_CP_DMA_SYNC);
}
7 + /* copy */
(4 + desc->element_dw_size) * util_bitcount(desc->dirty_mask) + /* update */
4; /* pointer update */
+
+ if (desc->shader_userdata_reg >= R_00B130_SPI_SHADER_USER_DATA_VS_0 &&
+ desc->shader_userdata_reg < R_00B230_SPI_SHADER_USER_DATA_GS_0)
+ desc->atom.num_dw += 4; /* second pointer update */
+
desc->atom.dirty = true;
/* The descriptors are read with the K cache. */
sctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE;
}
static void si_emit_shader_pointer(struct si_context *sctx,
- struct si_descriptors *desc)
+ struct r600_atom *atom)
{
+ struct si_descriptors *desc = (struct si_descriptors*)atom;
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
- uint64_t va = r600_resource_va(sctx->b.b.screen, &desc->buffer->b.b) +
- desc->current_context_id * desc->context_size;
+ uint64_t va = desc->buffer->gpu_address +
+ desc->current_context_id * desc->context_size +
+ desc->buffer_offset;
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
radeon_emit(cs, (desc->shader_userdata_reg - SI_SH_REG_OFFSET) >> 2);
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
+
+ if (desc->shader_userdata_reg >= R_00B130_SPI_SHADER_USER_DATA_VS_0 &&
+ desc->shader_userdata_reg < R_00B230_SPI_SHADER_USER_DATA_GS_0) {
+ radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
+ radeon_emit(cs, (desc->shader_userdata_reg +
+ (R_00B330_SPI_SHADER_USER_DATA_ES_0 -
+ R_00B130_SPI_SHADER_USER_DATA_VS_0) -
+ SI_SH_REG_OFFSET) >> 2);
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ }
}
static void si_emit_descriptors(struct si_context *sctx,
assert(dirty_mask);
- va_base = r600_resource_va(sctx->b.b.screen, &desc->buffer->b.b);
+ va_base = desc->buffer->gpu_address;
/* Copy the descriptors to a new context slot. */
/* XXX Consider using TC or L2 for this copy on CIK. */
desc->current_context_id = new_context_id;
/* Now update the shader userdata pointer. */
- si_emit_shader_pointer(sctx, desc);
+ si_emit_shader_pointer(sctx, &desc->atom);
}
static unsigned si_get_shader_user_data_base(unsigned shader)
si_init_descriptors(sctx, &views->desc,
si_get_shader_user_data_base(shader) +
SI_SGPR_RESOURCE * 4,
- 8, NUM_SAMPLER_VIEWS, si_emit_sampler_views);
+ 8, SI_NUM_SAMPLER_VIEWS, si_emit_sampler_views);
}
static void si_release_sampler_views(struct si_sampler_views *views)
si_release_descriptors(&views->desc);
}
+static enum radeon_bo_priority si_get_resource_ro_priority(struct r600_resource *res)
+{
+ if (res->b.b.target == PIPE_BUFFER)
+ return RADEON_PRIO_SHADER_BUFFER_RO;
+
+ if (res->b.b.nr_samples > 1)
+ return RADEON_PRIO_SHADER_TEXTURE_MSAA;
+
+ return RADEON_PRIO_SHADER_TEXTURE_RO;
+}
+
static void si_sampler_views_begin_new_cs(struct si_context *sctx,
struct si_sampler_views *views)
{
/* Add relocations to the CS. */
while (mask) {
int i = u_bit_scan(&mask);
- struct si_pipe_sampler_view *rview =
- (struct si_pipe_sampler_view*)views->views[i];
+ struct si_sampler_view *rview =
+ (struct si_sampler_view*)views->views[i];
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, rview->resource, RADEON_USAGE_READ);
+ r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ rview->resource, RADEON_USAGE_READ,
+ si_get_resource_ro_priority(rview->resource));
}
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, views->desc.buffer, RADEON_USAGE_READWRITE);
+ r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, views->desc.buffer,
+ RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
- si_emit_shader_pointer(sctx, &views->desc);
+ si_emit_shader_pointer(sctx, &views->desc.atom);
}
-void si_set_sampler_view(struct si_context *sctx, unsigned shader,
- unsigned slot, struct pipe_sampler_view *view,
- unsigned *view_desc)
+static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
+ unsigned slot, struct pipe_sampler_view *view,
+ unsigned *view_desc)
{
struct si_sampler_views *views = &sctx->samplers[shader].views;
return;
if (view) {
- struct si_pipe_sampler_view *rview =
- (struct si_pipe_sampler_view*)view;
+ struct si_sampler_view *rview =
+ (struct si_sampler_view*)view;
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, rview->resource, RADEON_USAGE_READ);
+ r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ rview->resource, RADEON_USAGE_READ,
+ si_get_resource_ro_priority(rview->resource));
pipe_sampler_view_reference(&views->views[slot], view);
views->desc_data[slot] = view_desc;
}
views->desc.dirty_mask |= 1 << slot;
- si_update_descriptors(sctx, &views->desc);
+}
+
+static void si_set_sampler_views(struct pipe_context *ctx,
+ unsigned shader, unsigned start,
+ unsigned count,
+ struct pipe_sampler_view **views)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+ struct si_textures_info *samplers = &sctx->samplers[shader];
+ struct si_sampler_view **rviews = (struct si_sampler_view **)views;
+ int i;
+
+ if (!count || shader >= SI_NUM_SHADERS)
+ return;
+
+ for (i = 0; i < count; i++) {
+ unsigned slot = start + i;
+
+ if (!views[i]) {
+ samplers->depth_texture_mask &= ~(1 << slot);
+ samplers->compressed_colortex_mask &= ~(1 << slot);
+ si_set_sampler_view(sctx, shader, slot, NULL, NULL);
+ si_set_sampler_view(sctx, shader, SI_FMASK_TEX_OFFSET + slot,
+ NULL, NULL);
+ continue;
+ }
+
+ si_set_sampler_view(sctx, shader, slot, views[i], rviews[i]->state);
+
+ if (views[i]->texture->target != PIPE_BUFFER) {
+ struct r600_texture *rtex =
+ (struct r600_texture*)views[i]->texture;
+
+ if (rtex->is_depth && !rtex->is_flushing_texture) {
+ samplers->depth_texture_mask |= 1 << slot;
+ } else {
+ samplers->depth_texture_mask &= ~(1 << slot);
+ }
+ if (rtex->cmask.size || rtex->fmask.size) {
+ samplers->compressed_colortex_mask |= 1 << slot;
+ } else {
+ samplers->compressed_colortex_mask &= ~(1 << slot);
+ }
+
+ if (rtex->fmask.size) {
+ si_set_sampler_view(sctx, shader, SI_FMASK_TEX_OFFSET + slot,
+ views[i], rviews[i]->fmask_state);
+ } else {
+ si_set_sampler_view(sctx, shader, SI_FMASK_TEX_OFFSET + slot,
+ NULL, NULL);
+ }
+ } else {
+ samplers->depth_texture_mask &= ~(1 << slot);
+ samplers->compressed_colortex_mask &= ~(1 << slot);
+ si_set_sampler_view(sctx, shader, SI_FMASK_TEX_OFFSET + slot,
+ NULL, NULL);
+ }
+ }
+
+ sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE;
+ si_update_descriptors(sctx, &samplers->views.desc);
+}
+
+/* SAMPLER STATES */
+
+static void si_emit_sampler_states(struct si_context *sctx, struct r600_atom *atom)
+{
+ struct si_sampler_states *states = (struct si_sampler_states*)atom;
+
+ si_emit_descriptors(sctx, &states->desc, states->desc_data);
+}
+
+static void si_sampler_states_begin_new_cs(struct si_context *sctx,
+ struct si_sampler_states *states)
+{
+ r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, states->desc.buffer,
+ RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
+ si_emit_shader_pointer(sctx, &states->desc.atom);
+}
+
+void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
+ unsigned start, unsigned count, void **states)
+{
+ struct si_sampler_states *samplers = &sctx->samplers[shader].states;
+ struct si_sampler_state **sstates = (struct si_sampler_state**)states;
+ int i;
+
+ if (start == 0)
+ samplers->saved_states[0] = states[0];
+ if (start == 1)
+ samplers->saved_states[1] = states[0];
+ else if (start == 0 && count >= 2)
+ samplers->saved_states[1] = states[1];
+
+ for (i = 0; i < count; i++) {
+ unsigned slot = start + i;
+
+ if (!sstates[i]) {
+ samplers->desc.dirty_mask &= ~(1 << slot);
+ continue;
+ }
+
+ samplers->desc_data[slot] = sstates[i]->val;
+ samplers->desc.dirty_mask |= 1 << slot;
+ }
+
+ si_update_descriptors(sctx, &samplers->desc);
}
/* BUFFER RESOURCES */
struct si_buffer_resources *buffers,
unsigned num_buffers, unsigned shader,
unsigned shader_userdata_index,
- enum radeon_bo_usage shader_usage)
+ enum radeon_bo_usage shader_usage,
+ enum radeon_bo_priority priority)
{
int i;
buffers->num_buffers = num_buffers;
buffers->shader_usage = shader_usage;
+ buffers->priority = priority;
buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
buffers->desc_storage = CALLOC(num_buffers, sizeof(uint32_t) * 4);
{
int i;
- for (i = 0; i < Elements(buffers->buffers); i++) {
+ for (i = 0; i < buffers->num_buffers; i++) {
pipe_resource_reference(&buffers->buffers[i], NULL);
}
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
(struct r600_resource*)buffers->buffers[i],
- buffers->shader_usage);
+ buffers->shader_usage, buffers->priority);
}
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
- buffers->desc.buffer, RADEON_USAGE_READWRITE);
+ buffers->desc.buffer, RADEON_USAGE_READWRITE,
+ RADEON_PRIO_SHADER_DATA);
+
+ si_emit_shader_pointer(sctx, &buffers->desc.atom);
+}
+
+/* VERTEX BUFFERS */
+
+static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)
+{
+ struct si_descriptors *desc = &sctx->vertex_buffers;
+ int count = sctx->vertex_elements ? sctx->vertex_elements->count : 0;
+ int i;
+
+ for (i = 0; i < count; i++) {
+ int vb = sctx->vertex_elements->elements[i].vertex_buffer_index;
+
+ if (vb >= Elements(sctx->vertex_buffer))
+ continue;
+ if (!sctx->vertex_buffer[vb].buffer)
+ continue;
+
+ r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ (struct r600_resource*)sctx->vertex_buffer[vb].buffer,
+ RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
+ }
+ r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ desc->buffer, RADEON_USAGE_READ,
+ RADEON_PRIO_SHADER_DATA);
+
+ si_emit_shader_pointer(sctx, &desc->atom);
+}
+
+void si_update_vertex_buffers(struct si_context *sctx)
+{
+ struct si_descriptors *desc = &sctx->vertex_buffers;
+ bool bound[SI_NUM_VERTEX_BUFFERS] = {};
+ unsigned i, count = sctx->vertex_elements->count;
+ uint64_t va;
+ uint32_t *ptr;
+
+ if (!count || !sctx->vertex_elements)
+ return;
+
+ /* Vertex buffer descriptors are the only ones which are uploaded
+ * directly through a staging buffer and don't go through
+ * the fine-grained upload path.
+ */
+ u_upload_alloc(sctx->b.uploader, 0, count * 16, &desc->buffer_offset,
+ (struct pipe_resource**)&desc->buffer, (void**)&ptr);
+
+ r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ desc->buffer, RADEON_USAGE_READ,
+ RADEON_PRIO_SHADER_DATA);
+
+ assert(count <= SI_NUM_VERTEX_BUFFERS);
+ assert(desc->current_context_id == 0);
+
+ for (i = 0; i < count; i++) {
+ struct pipe_vertex_element *ve = &sctx->vertex_elements->elements[i];
+ struct pipe_vertex_buffer *vb;
+ struct r600_resource *rbuffer;
+ unsigned offset;
+ uint32_t *desc = &ptr[i*4];
+
+ if (ve->vertex_buffer_index >= Elements(sctx->vertex_buffer)) {
+ memset(desc, 0, 16);
+ continue;
+ }
- si_emit_shader_pointer(sctx, &buffers->desc);
+ vb = &sctx->vertex_buffer[ve->vertex_buffer_index];
+ rbuffer = (struct r600_resource*)vb->buffer;
+ if (rbuffer == NULL) {
+ memset(desc, 0, 16);
+ continue;
+ }
+
+ offset = vb->buffer_offset + ve->src_offset;
+ va = rbuffer->gpu_address + offset;
+
+ /* Fill in T# buffer resource description */
+ desc[0] = va & 0xFFFFFFFF;
+ desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
+ S_008F04_STRIDE(vb->stride);
+ if (vb->stride)
+ /* Round up by rounding down and adding 1 */
+ desc[2] = (vb->buffer->width0 - offset -
+ sctx->vertex_elements->format_size[i]) /
+ vb->stride + 1;
+ else
+ desc[2] = vb->buffer->width0 - offset;
+
+ desc[3] = sctx->vertex_elements->rsrc_word3[i];
+
+ if (!bound[ve->vertex_buffer_index]) {
+ r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ (struct r600_resource*)vb->buffer,
+ RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
+ bound[ve->vertex_buffer_index] = true;
+ }
+ }
+
+ desc->atom.num_dw = 8; /* update 2 shader pointers (VS+ES) */
+ desc->atom.dirty = true;
+
+ /* Don't flush the const cache. It would have a very negative effect
+ * on performance (confirmed by testing). New descriptors are always
+ * uploaded to a fresh new buffer, so I don't think flushing the const
+ * cache is needed. */
+ sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE;
}
+
/* CONSTANT BUFFERS */
+void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
+ const uint8_t *ptr, unsigned size, uint32_t *const_offset)
+{
+ void *tmp;
+
+ u_upload_alloc(sctx->b.uploader, 0, size, const_offset,
+ (struct pipe_resource**)rbuffer, &tmp);
+ util_memcpy_cpu_to_le32(tmp, ptr, size);
+}
+
static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint slot,
struct pipe_constant_buffer *input)
{
si_upload_const_buffer(sctx,
(struct r600_resource**)&buffer, input->user_buffer,
input->buffer_size, &buffer_offset);
- va = r600_resource_va(ctx->screen, buffer) + buffer_offset;
+ va = r600_resource(buffer)->gpu_address + buffer_offset;
} else {
pipe_resource_reference(&buffer, input->buffer);
- va = r600_resource_va(ctx->screen, buffer) + input->buffer_offset;
+ va = r600_resource(buffer)->gpu_address + input->buffer_offset;
}
/* Set the descriptor. */
buffers->buffers[slot] = buffer;
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
- (struct r600_resource*)buffer, buffers->shader_usage);
+ (struct r600_resource*)buffer,
+ buffers->shader_usage, buffers->priority);
+ buffers->desc.enabled_mask |= 1 << slot;
+ } else {
+ /* Clear the descriptor. */
+ memset(buffers->desc_data[slot], 0, sizeof(uint32_t) * 4);
+ buffers->desc.enabled_mask &= ~(1 << slot);
+ }
+
+ buffers->desc.dirty_mask |= 1 << slot;
+ si_update_descriptors(sctx, &buffers->desc);
+}
+
+/* RING BUFFERS */
+
+void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
+ struct pipe_resource *buffer,
+ unsigned stride, unsigned num_records,
+ bool add_tid, bool swizzle,
+ unsigned element_size, unsigned index_stride)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+ struct si_buffer_resources *buffers = &sctx->rw_buffers[shader];
+
+ if (shader >= SI_NUM_SHADERS)
+ return;
+
+ /* The stride field in the resource descriptor has 14 bits */
+ assert(stride < (1 << 14));
+
+ assert(slot < buffers->num_buffers);
+ pipe_resource_reference(&buffers->buffers[slot], NULL);
+
+ if (buffer) {
+ uint64_t va;
+
+ va = r600_resource(buffer)->gpu_address;
+
+ switch (element_size) {
+ default:
+ assert(!"Unsupported ring buffer element size");
+ case 0:
+ case 2:
+ element_size = 0;
+ break;
+ case 4:
+ element_size = 1;
+ break;
+ case 8:
+ element_size = 2;
+ break;
+ case 16:
+ element_size = 3;
+ break;
+ }
+
+ switch (index_stride) {
+ default:
+ assert(!"Unsupported ring buffer index stride");
+ case 0:
+ case 8:
+ index_stride = 0;
+ break;
+ case 16:
+ index_stride = 1;
+ break;
+ case 32:
+ index_stride = 2;
+ break;
+ case 64:
+ index_stride = 3;
+ break;
+ }
+
+ /* Set the descriptor. */
+ uint32_t *desc = buffers->desc_data[slot];
+ desc[0] = va;
+ desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
+ S_008F04_STRIDE(stride) |
+ S_008F04_SWIZZLE_ENABLE(swizzle);
+ desc[2] = num_records;
+ desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+ S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+ S_008F0C_ELEMENT_SIZE(element_size) |
+ S_008F0C_INDEX_STRIDE(index_stride) |
+ S_008F0C_ADD_TID_ENABLE(add_tid);
+
+ pipe_resource_reference(&buffers->buffers[slot], buffer);
+ r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ (struct r600_resource*)buffer,
+ buffers->shader_usage, buffers->priority);
buffers->desc.enabled_mask |= 1 << slot;
} else {
/* Clear the descriptor. */
static void si_set_streamout_targets(struct pipe_context *ctx,
unsigned num_targets,
struct pipe_stream_output_target **targets,
- unsigned append_bitmask)
+ const unsigned *offsets)
{
struct si_context *sctx = (struct si_context *)ctx;
- struct si_buffer_resources *buffers = &sctx->streamout_buffers;
+ struct si_buffer_resources *buffers = &sctx->rw_buffers[PIPE_SHADER_VERTEX];
unsigned old_num_targets = sctx->b.streamout.num_targets;
- unsigned i;
+ unsigned i, bufidx;
/* Streamout buffers must be bound in 2 places:
* 1) in VGT by setting the VGT_STRMOUT registers
*/
/* Set the VGT regs. */
- r600_set_streamout_targets(ctx, num_targets, targets, append_bitmask);
+ r600_set_streamout_targets(ctx, num_targets, targets, offsets);
/* Set the shader resources.*/
for (i = 0; i < num_targets; i++) {
+ bufidx = SI_SO_BUF_OFFSET + i;
+
if (targets[i]) {
struct pipe_resource *buffer = targets[i]->buffer;
- uint64_t va = r600_resource_va(ctx->screen, buffer);
+ uint64_t va = r600_resource(buffer)->gpu_address;
/* Set the descriptor. */
- uint32_t *desc = buffers->desc_data[i];
+ uint32_t *desc = buffers->desc_data[bufidx];
desc[0] = va;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
desc[2] = 0xffffffff;
S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
/* Set the resource. */
- pipe_resource_reference(&buffers->buffers[i], buffer);
+ pipe_resource_reference(&buffers->buffers[bufidx],
+ buffer);
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
(struct r600_resource*)buffer,
- buffers->shader_usage);
- buffers->desc.enabled_mask |= 1 << i;
+ buffers->shader_usage, buffers->priority);
+ buffers->desc.enabled_mask |= 1 << bufidx;
} else {
/* Clear the descriptor and unset the resource. */
- memset(buffers->desc_data[i], 0, sizeof(uint32_t) * 4);
- pipe_resource_reference(&buffers->buffers[i], NULL);
- buffers->desc.enabled_mask &= ~(1 << i);
+ memset(buffers->desc_data[bufidx], 0,
+ sizeof(uint32_t) * 4);
+ pipe_resource_reference(&buffers->buffers[bufidx],
+ NULL);
+ buffers->desc.enabled_mask &= ~(1 << bufidx);
}
- buffers->desc.dirty_mask |= 1 << i;
+ buffers->desc.dirty_mask |= 1 << bufidx;
}
for (; i < old_num_targets; i++) {
+ bufidx = SI_SO_BUF_OFFSET + i;
/* Clear the descriptor and unset the resource. */
- memset(buffers->desc_data[i], 0, sizeof(uint32_t) * 4);
- pipe_resource_reference(&buffers->buffers[i], NULL);
- buffers->desc.enabled_mask &= ~(1 << i);
- buffers->desc.dirty_mask |= 1 << i;
+ memset(buffers->desc_data[bufidx], 0, sizeof(uint32_t) * 4);
+ pipe_resource_reference(&buffers->buffers[bufidx], NULL);
+ buffers->desc.enabled_mask &= ~(1 << bufidx);
+ buffers->desc.dirty_mask |= 1 << bufidx;
}
si_update_descriptors(sctx, &buffers->desc);
uint64_t offset_within_buffer = old_desc_va - old_buf_va;
/* Update the descriptor. */
- uint64_t va = r600_resource_va(ctx->screen, new_buf) + offset_within_buffer;
+ uint64_t va = r600_resource(new_buf)->gpu_address + offset_within_buffer;
desc[0] = va;
desc[1] = (desc[1] & C_008F04_BASE_ADDRESS_HI) |
struct si_context *sctx = (struct si_context*)ctx;
struct r600_resource *rbuffer = r600_resource(buf);
unsigned i, shader, alignment = rbuffer->buf->alignment;
- uint64_t old_va = r600_resource_va(ctx->screen, buf);
+ uint64_t old_va = rbuffer->gpu_address;
+ unsigned num_elems = sctx->vertex_elements ?
+ sctx->vertex_elements->count : 0;
+ struct si_sampler_view *view;
- /* Discard the buffer. */
- pb_reference(&rbuffer->buf, NULL);
-
- /* Create a new one in the same pipe_resource. */
- r600_init_resource(&sctx->screen->b, rbuffer, rbuffer->b.b.width0, alignment,
- TRUE, rbuffer->b.b.usage);
+ /* Reallocate the buffer in the same pipe_resource. */
+ r600_init_resource(&sctx->screen->b, rbuffer, rbuffer->b.b.width0,
+ alignment, TRUE);
/* We changed the buffer, now we need to bind it where the old one
* was bound. This consists of 2 things:
*/
/* Vertex buffers. */
- /* Nothing to do. Vertex buffer bindings are updated before every draw call. */
+ for (i = 0; i < num_elems; i++) {
+ int vb = sctx->vertex_elements->elements[i].vertex_buffer_index;
- /* Streamout buffers. */
- for (i = 0; i < sctx->streamout_buffers.num_buffers; i++) {
- if (sctx->streamout_buffers.buffers[i] == buf) {
- /* Update the descriptor. */
- si_desc_reset_buffer_offset(ctx, sctx->streamout_buffers.desc_data[i],
- old_va, buf);
+ if (vb >= Elements(sctx->vertex_buffer))
+ continue;
+ if (!sctx->vertex_buffer[vb].buffer)
+ continue;
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
- (struct r600_resource*)buf,
- sctx->streamout_buffers.shader_usage);
- sctx->streamout_buffers.desc.dirty_mask |= 1 << i;
- si_update_descriptors(sctx, &sctx->streamout_buffers.desc);
-
- /* Update the streamout state. */
- if (sctx->b.streamout.begin_emitted) {
- r600_emit_streamout_end(&sctx->b);
+ if (sctx->vertex_buffer[vb].buffer == buf) {
+ sctx->vertex_buffers_dirty = true;
+ break;
+ }
+ }
+
+ /* Read/Write buffers. */
+ for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
+ struct si_buffer_resources *buffers = &sctx->rw_buffers[shader];
+ bool found = false;
+ uint32_t mask = buffers->desc.enabled_mask;
+
+ while (mask) {
+ i = u_bit_scan(&mask);
+ if (buffers->buffers[i] == buf) {
+ si_desc_reset_buffer_offset(ctx, buffers->desc_data[i],
+ old_va, buf);
+
+ r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ rbuffer, buffers->shader_usage,
+ buffers->priority);
+
+ buffers->desc.dirty_mask |= 1 << i;
+ found = true;
+
+ if (i >= SI_SO_BUF_OFFSET && shader == PIPE_SHADER_VERTEX) {
+ /* Update the streamout state. */
+ if (sctx->b.streamout.begin_emitted) {
+ r600_emit_streamout_end(&sctx->b);
+ }
+ sctx->b.streamout.append_bitmask =
+ sctx->b.streamout.enabled_mask;
+ r600_streamout_buffers_dirty(&sctx->b);
+ }
}
- sctx->b.streamout.append_bitmask = sctx->b.streamout.enabled_mask;
- r600_streamout_buffers_dirty(&sctx->b);
+ }
+ if (found) {
+ si_update_descriptors(sctx, &buffers->desc);
}
}
old_va, buf);
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
- rbuffer, buffers->shader_usage);
+ rbuffer, buffers->shader_usage,
+ buffers->priority);
buffers->desc.dirty_mask |= 1 << i;
found = true;
}
}
- /* Texture buffers. */
+ /* Texture buffers - update virtual addresses in sampler view descriptors. */
+ LIST_FOR_EACH_ENTRY(view, &sctx->b.texture_buffers, list) {
+ if (view->base.texture == buf) {
+ si_desc_reset_buffer_offset(ctx, view->state, old_va, buf);
+ }
+ }
+ /* Texture buffers - update bindings. */
for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
struct si_sampler_views *views = &sctx->samplers[shader].views;
bool found = false;
while (mask) {
unsigned i = u_bit_scan(&mask);
if (views->views[i]->texture == buf) {
- /* This updates the sampler view directly. */
- si_desc_reset_buffer_offset(ctx, views->desc_data[i],
- old_va, buf);
-
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
- rbuffer, RADEON_USAGE_READ);
+ rbuffer, RADEON_USAGE_READ,
+ RADEON_PRIO_SHADER_BUFFER_RO);
views->desc.dirty_mask |= 1 << i;
found = true;
return;
}
- uint64_t va = r600_resource_va(&sctx->screen->b.b, dst) + offset;
+ uint64_t va = r600_resource(dst)->gpu_address + offset;
/* Flush the caches where the resource is bound. */
/* XXX only flush the caches where the buffer is bound. */
/* This must be done after need_cs_space. */
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
- (struct r600_resource*)dst, RADEON_USAGE_WRITE);
+ (struct r600_resource*)dst, RADEON_USAGE_WRITE,
+ RADEON_PRIO_MIN);
/* Flush the caches for the first copy only.
* Also wait for the previous CP DMA operations. */
util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset,
dst_offset + size);
- dst_offset += r600_resource_va(&sctx->screen->b.b, dst);
- src_offset += r600_resource_va(&sctx->screen->b.b, src);
+ dst_offset += r600_resource(dst)->gpu_address;
+ src_offset += r600_resource(src)->gpu_address;
/* Flush the caches where the resource is bound. */
sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
}
/* This must be done after r600_need_cs_space. */
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)src, RADEON_USAGE_READ);
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)dst, RADEON_USAGE_WRITE);
+ r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)src,
+ RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)dst,
+ RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
si_emit_cp_dma_copy_buffer(sctx, dst_offset, src_offset, byte_count, sync_flags);
for (i = 0; i < SI_NUM_SHADERS; i++) {
si_init_buffer_resources(sctx, &sctx->const_buffers[i],
- NUM_CONST_BUFFERS, i, SI_SGPR_CONST,
- RADEON_USAGE_READ);
+ SI_NUM_CONST_BUFFERS, i, SI_SGPR_CONST,
+ RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
+ si_init_buffer_resources(sctx, &sctx->rw_buffers[i],
+ i == PIPE_SHADER_VERTEX ?
+ SI_NUM_RW_BUFFERS : SI_NUM_RING_BUFFERS,
+ i, SI_SGPR_RW_BUFFERS,
+ RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
si_init_sampler_views(sctx, &sctx->samplers[i].views, i);
- sctx->atoms.const_buffers[i] = &sctx->const_buffers[i].desc.atom;
- sctx->atoms.sampler_views[i] = &sctx->samplers[i].views.desc.atom;
+ si_init_descriptors(sctx, &sctx->samplers[i].states.desc,
+ si_get_shader_user_data_base(i) + SI_SGPR_SAMPLER * 4,
+ 4, SI_NUM_SAMPLER_STATES, si_emit_sampler_states);
+
+ sctx->atoms.s.const_buffers[i] = &sctx->const_buffers[i].desc.atom;
+ sctx->atoms.s.rw_buffers[i] = &sctx->rw_buffers[i].desc.atom;
+ sctx->atoms.s.sampler_views[i] = &sctx->samplers[i].views.desc.atom;
+ sctx->atoms.s.sampler_states[i] = &sctx->samplers[i].states.desc.atom;
}
- si_init_buffer_resources(sctx, &sctx->streamout_buffers, 4, PIPE_SHADER_VERTEX,
- SI_SGPR_SO_BUFFER, RADEON_USAGE_WRITE);
- sctx->atoms.streamout_buffers = &sctx->streamout_buffers.desc.atom;
+ si_init_descriptors(sctx, &sctx->vertex_buffers,
+ si_get_shader_user_data_base(PIPE_SHADER_VERTEX) +
+ SI_SGPR_VERTEX_BUFFER*4, 4, SI_NUM_VERTEX_BUFFERS,
+ si_emit_shader_pointer);
+ sctx->atoms.s.vertex_buffers = &sctx->vertex_buffers.atom;
/* Set pipe_context functions. */
sctx->b.b.set_constant_buffer = si_set_constant_buffer;
+ sctx->b.b.set_sampler_views = si_set_sampler_views;
sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
sctx->b.clear_buffer = si_clear_buffer;
sctx->b.invalidate_buffer = si_invalidate_buffer;
for (i = 0; i < SI_NUM_SHADERS; i++) {
si_release_buffer_resources(&sctx->const_buffers[i]);
+ si_release_buffer_resources(&sctx->rw_buffers[i]);
si_release_sampler_views(&sctx->samplers[i].views);
+ si_release_descriptors(&sctx->samplers[i].states.desc);
}
- si_release_buffer_resources(&sctx->streamout_buffers);
+ si_release_descriptors(&sctx->vertex_buffers);
}
void si_all_descriptors_begin_new_cs(struct si_context *sctx)
for (i = 0; i < SI_NUM_SHADERS; i++) {
si_buffer_resources_begin_new_cs(sctx, &sctx->const_buffers[i]);
+ si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers[i]);
si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views);
+ si_sampler_states_begin_new_cs(sctx, &sctx->samplers[i].states);
}
- si_buffer_resources_begin_new_cs(sctx, &sctx->streamout_buffers);
+ si_vertex_buffers_begin_new_cs(sctx);
}