static void si_bind_compute_state(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context*)ctx;
- sctx->cs_shader_state.program = (struct si_compute*)state;
+ struct si_compute *program = (struct si_compute*)state;
+
+ sctx->cs_shader_state.program = program;
+ if (!program)
+ return;
+
+ /* Wait because we need active slot usage masks. */
+ if (program->ir_type == PIPE_SHADER_IR_TGSI)
+ util_queue_fence_wait(&program->ready);
+
+ si_set_active_descriptors(sctx,
+ SI_DESCS_FIRST_COMPUTE +
+ SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS,
+ program->active_const_and_shader_buffers);
+ si_set_active_descriptors(sctx,
+ SI_DESCS_FIRST_COMPUTE +
+ SI_SHADER_DESCS_SAMPLERS_AND_IMAGES,
+ program->active_samplers_and_images);
}
static void si_set_global_binding(
sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
SI_CONTEXT_CS_PARTIAL_FLUSH;
- if (program->ir_type == PIPE_SHADER_IR_TGSI) {
- util_queue_fence_wait(&program->ready);
-
- if (program->shader.compilation_failed)
- return;
- }
+ if (program->ir_type == PIPE_SHADER_IR_TGSI &&
+ program->shader.compilation_failed)
+ return;
si_decompress_compute_textures(sctx);
}
static bool si_ce_upload(struct si_context *sctx, unsigned ce_offset, unsigned size,
- unsigned *out_offset, struct r600_resource **out_buf) {
+ unsigned *out_offset, struct r600_resource **out_buf)
+{
uint64_t va;
u_suballocator_alloc(sctx->ce_suballocator, size,
- sctx->screen->b.info.tcc_cache_line_size,
- out_offset, (struct pipe_resource**)out_buf);
+ si_optimal_tcc_alignment(sctx, size),
+ out_offset,
+ (struct pipe_resource**)out_buf);
if (!out_buf)
return false;
struct si_descriptors *desc,
struct r600_atom * atom)
{
- unsigned list_size = desc->num_elements * desc->element_dw_size * 4;
+ unsigned slot_size = desc->element_dw_size * 4;
+ unsigned first_slot_offset = desc->first_active_slot * slot_size;
+ unsigned upload_size = desc->num_active_slots * slot_size;
+
+ /* Skip the upload if no shader is using the descriptors. dirty_mask
+ * will stay dirty and the descriptors will be uploaded when there is
+ * a shader using them.
+ */
+ if (!upload_size)
+ return true;
if (sctx->ce_ib && desc->uses_ce) {
uint32_t const* list = (uint32_t const*)desc->list;
radeon_emit_array(sctx->ce_ib, list + begin, count);
}
- if (!si_ce_upload(sctx, desc->ce_offset, list_size,
- &desc->buffer_offset, &desc->buffer))
+ if (!si_ce_upload(sctx, desc->ce_offset + first_slot_offset,
+ upload_size, (unsigned*)&desc->buffer_offset,
+ &desc->buffer))
return false;
} else {
- void *ptr;
+ uint32_t *ptr;
- u_upload_alloc(sctx->b.b.const_uploader, 0, list_size,
- sctx->screen->b.info.tcc_cache_line_size,
- &desc->buffer_offset,
- (struct pipe_resource**)&desc->buffer, &ptr);
+ u_upload_alloc(sctx->b.b.const_uploader, 0, upload_size,
+ si_optimal_tcc_alignment(sctx, upload_size),
+ (unsigned*)&desc->buffer_offset,
+ (struct pipe_resource**)&desc->buffer,
+ (void**)&ptr);
if (!desc->buffer)
return false; /* skip the draw call */
- util_memcpy_cpu_to_le32(ptr, desc->list, list_size);
- desc->gpu_list = ptr;
+ util_memcpy_cpu_to_le32(ptr, (char*)desc->list + first_slot_offset,
+ upload_size);
+ desc->gpu_list = ptr - first_slot_offset / 4;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
}
+
+ /* The shader pointer should point to slot 0. */
+ desc->buffer_offset -= first_slot_offset;
+
desc->dirty_mask = 0;
if (atom)
u_upload_alloc(sctx->b.b.const_uploader, 0,
desc_list_byte_size,
si_optimal_tcc_alignment(sctx, desc_list_byte_size),
- &desc->buffer_offset,
+ (unsigned*)&desc->buffer_offset,
(struct pipe_resource**)&desc->buffer, (void**)&ptr);
if (!desc->buffer)
return false;
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
uint64_t va;
- assert(desc->buffer);
+ if (!desc->buffer)
+ return; /* the pointer is not used by current shaders */
va = desc->buffer->gpu_address +
desc->buffer_offset;
RADEON_USAGE_READWRITE, RADEON_USAGE_READ,
RADEON_PRIO_SHADER_RINGS, RADEON_PRIO_CONST_BUFFER,
&ce_offset);
+ sctx->descriptors[SI_DESCS_RW_BUFFERS].num_active_slots = SI_NUM_RW_BUFFERS;
+
si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS,
4, SI_NUM_VERTEX_BUFFERS, NULL);
si_shader_userdata_begin_new_cs(sctx);
}
+
+void si_set_active_descriptors(struct si_context *sctx, unsigned desc_idx,
+ uint64_t new_active_mask)
+{
+ struct si_descriptors *desc = &sctx->descriptors[desc_idx];
+
+ /* Ignore no-op updates and updates that disable all slots. */
+ if (!new_active_mask ||
+ new_active_mask == u_bit_consecutive64(desc->first_active_slot,
+ desc->num_active_slots))
+ return;
+
+ int first, count;
+ u_bit_scan_consecutive_range64(&new_active_mask, &first, &count);
+ assert(new_active_mask == 0);
+
+ /* Upload/dump descriptors if slots are being enabled. */
+ if (first < desc->first_active_slot ||
+ first + count > desc->first_active_slot + desc->num_active_slots)
+ sctx->descriptors_dirty |= 1u << desc_idx;
+
+ desc->first_active_slot = first;
+ desc->num_active_slots = count;
+}
+
+void si_set_active_descriptors_for_shader(struct si_context *sctx,
+ struct si_shader_selector *sel)
+{
+ if (!sel)
+ return;
+
+ si_set_active_descriptors(sctx,
+ si_const_and_shader_buffer_descriptors_idx(sel->type),
+ sel->active_const_and_shader_buffers);
+ si_set_active_descriptors(sctx,
+ si_sampler_and_image_descriptors_idx(sel->type),
+ sel->active_samplers_and_images);
+}
struct si_screen;
struct si_shader;
+struct si_shader_selector;
struct si_state_blend {
struct si_pm4_state pm4;
/* The buffer where the descriptors have been uploaded. */
struct r600_resource *buffer;
- unsigned buffer_offset;
+ int buffer_offset; /* can be negative if not using lower slots */
/* Offset in CE RAM */
unsigned ce_offset;
- /* elements of the list that are changed and need to be uploaded */
+ /* Slots that are used by currently-bound shaders.
+ * With CE: It determines which slots are dumped to L2.
+ * It doesn't skip uploads to CE RAM.
+ * Without CE: It determines which slots are uploaded.
+ */
+ unsigned first_active_slot;
+ unsigned num_active_slots;
+
+ /* Slots that have been changed and need to be uploaded. */
uint64_t dirty_mask;
/* Whether CE is used to upload this descriptor array. */
void si_emit_compute_shader_userdata(struct si_context *sctx);
void si_set_rw_buffer(struct si_context *sctx,
uint slot, const struct pipe_constant_buffer *input);
+void si_set_active_descriptors(struct si_context *sctx, unsigned desc_idx,
+ uint64_t new_active_mask);
+void si_set_active_descriptors_for_shader(struct si_context *sctx,
+ struct si_shader_selector *sel);
+
/* si_state.c */
struct si_shader_selector;
sctx->do_update_shaders = true;
si_mark_atom_dirty(sctx, &sctx->clip_regs);
r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
+ si_set_active_descriptors_for_shader(sctx, sel);
}
static void si_update_tess_uses_prim_id(struct si_context *sctx)
si_update_tess_uses_prim_id(sctx);
}
r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
+ si_set_active_descriptors_for_shader(sctx, sel);
}
static void si_bind_tcs_shader(struct pipe_context *ctx, void *state)
if (enable_changed)
sctx->last_tcs = NULL; /* invalidate derived tess state */
+
+ si_set_active_descriptors_for_shader(sctx, sel);
}
static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
sctx->last_tes_sh_base = -1; /* invalidate derived tess state */
}
r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
+ si_set_active_descriptors_for_shader(sctx, sel);
}
static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
if (sel && sctx->ia_multi_vgt_param_key.u.uses_tess)
si_update_tess_uses_prim_id(sctx);
si_mark_atom_dirty(sctx, &sctx->cb_render_state);
+ si_set_active_descriptors_for_shader(sctx, sel);
}
static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)