* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Marek Olšák <marek.olsak@amd.com>
*/
/* Resource binding slots and sampler states (each described with 8 or
* descriptor */
};
+static uint64_t si_desc_extract_buffer_address(uint32_t *desc)
+{
+ return desc[0] | ((uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32);
+}
+
static void si_init_descriptor_list(uint32_t *desc_list,
unsigned element_dw_size,
unsigned num_elements,
desc->element_dw_size = element_dw_size;
desc->num_elements = num_elements;
desc->shader_userdata_offset = shader_userdata_index * 4;
+ desc->slot_index_to_bind_directly = -1;
}
static void si_release_descriptors(struct si_descriptors *desc)
if (!upload_size)
return true;
+ /* If there is just one active descriptor, bind it directly. */
+ if ((int)desc->first_active_slot == desc->slot_index_to_bind_directly &&
+ desc->num_active_slots == 1) {
+ uint32_t *descriptor = &desc->list[desc->slot_index_to_bind_directly *
+ desc->element_dw_size];
+
+ /* The buffer is already in the buffer list. */
+ r600_resource_reference(&desc->buffer, NULL);
+ desc->gpu_list = NULL;
+ desc->gpu_address = si_desc_extract_buffer_address(descriptor);
+ si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
+ return true;
+ }
+
uint32_t *ptr;
+ int buffer_offset;
u_upload_alloc(sctx->b.b.const_uploader, 0, upload_size,
si_optimal_tcc_alignment(sctx, upload_size),
- (unsigned*)&desc->buffer_offset,
+ (unsigned*)&buffer_offset,
(struct pipe_resource**)&desc->buffer,
(void**)&ptr);
- if (!desc->buffer)
+ if (!desc->buffer) {
+ desc->gpu_address = 0;
return false; /* skip the draw call */
+ }
util_memcpy_cpu_to_le32(ptr, (char*)desc->list + first_slot_offset,
upload_size);
RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
/* The shader pointer should point to slot 0. */
- desc->buffer_offset -= first_slot_offset;
+ buffer_offset -= first_slot_offset;
+ desc->gpu_address = desc->buffer->gpu_address + buffer_offset;
si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
return true;
/* SAMPLER VIEWS */
+static inline enum radeon_bo_priority
+si_get_sampler_view_priority(struct r600_resource *res)
+{
+ if (res->b.b.target == PIPE_BUFFER)
+ return RADEON_PRIO_SAMPLER_BUFFER;
+
+ if (res->b.b.nr_samples > 1)
+ return RADEON_PRIO_SAMPLER_TEXTURE_MSAA;
+
+ return RADEON_PRIO_SAMPLER_TEXTURE;
+}
+
static unsigned
si_sampler_and_image_descriptors_idx(unsigned shader)
{
if (resource->target != PIPE_BUFFER) {
struct r600_texture *tex = (struct r600_texture*)resource;
- if (tex->is_depth && !r600_can_sample_zs(tex, is_stencil_sampler))
+ if (tex->is_depth && !si_can_sample_zs(tex, is_stencil_sampler))
resource = &tex->flushed_depth_texture->resource.b.b;
}
rres = (struct r600_resource*)resource;
- priority = r600_get_sampler_view_priority(rres);
+ priority = si_get_sampler_view_priority(rres);
radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
rres, usage, priority,
{
uint64_t va, meta_va = 0;
- if (tex->is_depth && !r600_can_sample_zs(tex, is_stencil)) {
+ if (tex->is_depth && !si_can_sample_zs(tex, is_stencil)) {
tex = tex->flushed_depth_texture;
is_stencil = false;
}
va = tex->resource.gpu_address;
- if (sscreen->b.chip_class >= GFX9) {
+ if (sscreen->info.chip_class >= GFX9) {
/* Only stencil_offset needs to be added here. */
if (is_stencil)
va += tex->surface.u.gfx9.stencil_offset;
/* Only macrotiled modes can set tile swizzle.
* GFX9 doesn't use (legacy) base_level_info.
*/
- if (sscreen->b.chip_class >= GFX9 ||
+ if (sscreen->info.chip_class >= GFX9 ||
base_level_info->mode == RADEON_SURF_MODE_2D)
state[0] |= tex->surface.tile_swizzle;
- if (sscreen->b.chip_class >= VI) {
+ if (sscreen->info.chip_class >= VI) {
state[6] &= C_008F28_COMPRESSION_EN;
state[7] = 0;
meta_va = (!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
tex->dcc_offset;
- if (sscreen->b.chip_class == VI) {
+ if (sscreen->info.chip_class == VI) {
meta_va += base_level_info->dcc_offset;
assert(base_level_info->mode == RADEON_SURF_MODE_2D);
}
}
}
- if (sscreen->b.chip_class >= GFX9) {
+ if (sscreen->info.chip_class >= GFX9) {
state[3] &= C_008F1C_SW_MODE;
state[4] &= C_008F20_PITCH_GFX9;
unsigned level = view->u.tex.level;
unsigned width, height, depth, hw_level;
bool uses_dcc = vi_dcc_enabled(tex, level);
+ unsigned access = view->access;
+
+ /* Clear the write flag when writes can't occur.
+ * Note that DCC_DECOMPRESS for MSAA doesn't work in some cases,
+ * so we don't wanna trigger it.
+ */
+ if (tex->is_depth || tex->resource.b.b.nr_samples >= 2) {
+ assert(!"Z/S and MSAA image stores are not supported");
+ access &= ~PIPE_IMAGE_ACCESS_WRITE;
+ }
assert(!tex->is_depth);
assert(tex->fmask.size == 0);
unsigned i, count;
unsigned desc_list_byte_size;
unsigned first_vb_use_mask;
- uint64_t va;
uint32_t *ptr;
if (!sctx->vertex_buffers_dirty || !velems)
* directly through a staging buffer and don't go through
* the fine-grained upload path.
*/
+ unsigned buffer_offset = 0;
u_upload_alloc(sctx->b.b.const_uploader, 0,
desc_list_byte_size,
si_optimal_tcc_alignment(sctx, desc_list_byte_size),
- (unsigned*)&desc->buffer_offset,
+ &buffer_offset,
(struct pipe_resource**)&desc->buffer, (void**)&ptr);
- if (!desc->buffer)
+ if (!desc->buffer) {
+ desc->gpu_address = 0;
return false;
+ }
+ desc->gpu_address = desc->buffer->gpu_address + buffer_offset;
desc->list = ptr;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
desc->buffer, RADEON_USAGE_READ,
for (i = 0; i < count; i++) {
struct pipe_vertex_buffer *vb;
struct r600_resource *rbuffer;
- unsigned offset;
unsigned vbo_index = velems->vertex_buffer_index[i];
uint32_t *desc = &ptr[i*4];
continue;
}
- offset = vb->buffer_offset + velems->src_offset[i];
- va = rbuffer->gpu_address + offset;
-
- /* Fill in T# buffer resource description */
- desc[0] = va;
- desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
- S_008F04_STRIDE(vb->stride);
+ int offset = (int)vb->buffer_offset + (int)velems->src_offset[i];
+ int64_t va = (int64_t)rbuffer->gpu_address + offset;
+ assert(va > 0);
+ int64_t num_records = (int64_t)rbuffer->b.b.width0 - offset;
if (sctx->b.chip_class != VI && vb->stride) {
/* Round up by rounding down and adding 1 */
- desc[2] = (vb->buffer.resource->width0 - offset -
- velems->format_size[i]) /
- vb->stride + 1;
- } else {
- desc[2] = vb->buffer.resource->width0 - offset;
+ num_records = (num_records - velems->format_size[i]) /
+ vb->stride + 1;
}
+ assert(num_records >= 0 && num_records <= UINT_MAX);
+ desc[0] = va;
+ desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
+ S_008F04_STRIDE(vb->stride);
+ desc[2] = num_records;
desc[3] = velems->rsrc_word3[i];
if (first_vb_use_mask & (1 << i)) {
struct pipe_resource *new_buf)
{
/* Retrieve the buffer offset from the descriptor. */
- uint64_t old_desc_va =
- desc[0] | ((uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32);
+ uint64_t old_desc_va = si_desc_extract_buffer_address(desc);
assert(old_buf_va <= old_desc_va);
uint64_t offset_within_buffer = old_desc_va - old_buf_va;
uint64_t old_va = rbuffer->gpu_address;
/* Reallocate the buffer in the same pipe_resource. */
- si_alloc_resource(&sctx->screen->b, rbuffer);
+ si_alloc_resource(sctx->screen, rbuffer);
si_rebind_buffer(ctx, buf, old_va);
}
uint64_t va;
data = desc->list + desc_slot_offset;
-
- va = desc->buffer->gpu_address + desc->buffer_offset +
- desc_slot_offset * 4;
+ va = desc->gpu_address + desc_slot_offset * 4;
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + num_dwords, 0));
radeon_emit(cs, S_370_DST_SEL(V_370_TC_L2) |
static void si_emit_shader_pointer_body(struct radeon_winsys_cs *cs,
struct si_descriptors *desc)
{
- uint64_t va = 0;
-
- if (desc->buffer)
- va = desc->buffer->gpu_address + desc->buffer_offset;
+ uint64_t va = desc->gpu_address;
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
assert(resource->target == PIPE_BUFFER);
/* Retrieve the old buffer addr from the descriptor. */
- old_desc_va = desc_list[0];
- old_desc_va |= ((uint64_t)G_008F04_BASE_ADDRESS_HI(desc_list[1]) << 32);
+ old_desc_va = si_desc_extract_buffer_address(desc_list);
if (old_desc_va != buf->gpu_address + offset) {
/* The buffer has been invalidated when the handle wasn't
bool gfx9_gs = false;
unsigned num_sampler_slots = SI_NUM_IMAGES / 2 + SI_NUM_SAMPLERS;
unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS;
+ struct si_descriptors *desc;
if (sctx->b.chip_class >= GFX9) {
gfx9_tcs = i == PIPE_SHADER_TESS_CTRL;
gfx9_gs = i == PIPE_SHADER_GEOMETRY;
}
- si_init_buffer_resources(&sctx->const_and_shader_buffers[i],
- si_const_and_shader_buffer_descriptors(sctx, i),
+ desc = si_const_and_shader_buffer_descriptors(sctx, i);
+ si_init_buffer_resources(&sctx->const_and_shader_buffers[i], desc,
num_buffer_slots,
gfx9_tcs ? GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS :
gfx9_gs ? GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS :
RADEON_USAGE_READ,
RADEON_PRIO_SHADER_RW_BUFFER,
RADEON_PRIO_CONST_BUFFER);
+ desc->slot_index_to_bind_directly = si_get_constbuf_slot(0);
- struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, i);
+ desc = si_sampler_and_image_descriptors(sctx, i);
si_init_descriptors(desc,
gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES :
gfx9_gs ? GFX9_SGPR_GS_SAMPLERS_AND_IMAGES :