struct si_screen *sscreen = (struct si_screen *)ctx->screen;
struct si_compute *program = CALLOC_STRUCT(si_compute);
+ pipe_reference_init(&program->reference, 1);
program->screen = (struct si_screen *)ctx->screen;
program->ir_type = cso->ir_type;
program->local_size = cso->req_local_mem;
if ((sctx->b.debug.debug_message && !sctx->b.debug.async) ||
sctx->is_debug ||
- r600_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE))
+ si_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE))
si_create_compute_state_async(program, -1);
else
util_queue_add_job(&sscreen->shader_compiler_queue,
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
uint64_t bc_va;
- radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3);
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
-
radeon_set_sh_reg_seq(cs, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2);
/* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1 */
radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
r600_resource_reference(&sctx->compute_scratch_buffer, NULL);
sctx->compute_scratch_buffer = (struct r600_resource*)
- r600_aligned_buffer_create(&sctx->screen->b.b,
+ si_aligned_buffer_create(&sctx->screen->b.b,
R600_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT,
scratch_needed, 256);
radeon_emit(cs, S_00B820_NUM_THREAD_FULL(info->block[1]));
radeon_emit(cs, S_00B824_NUM_THREAD_FULL(info->block[2]));
+ unsigned dispatch_initiator =
+ S_00B800_COMPUTE_SHADER_EN(1) |
+ S_00B800_FORCE_START_AT_000(1) |
+ /* If the KMD allows it (there is a KMD hw register for it),
+ * allow launching waves out-of-order. (same as Vulkan) */
+ S_00B800_ORDER_MODE(sctx->b.chip_class >= CIK);
+
if (info->indirect) {
uint64_t base_va = r600_resource(info->indirect)->gpu_address;
radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 1, render_cond_bit) |
PKT3_SHADER_TYPE_S(1));
radeon_emit(cs, info->indirect_offset);
- radeon_emit(cs, 1);
+ radeon_emit(cs, dispatch_initiator);
} else {
radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, render_cond_bit) |
PKT3_SHADER_TYPE_S(1));
radeon_emit(cs, info->grid[0]);
radeon_emit(cs, info->grid[1]);
radeon_emit(cs, info->grid[2]);
- radeon_emit(cs, 1);
+ radeon_emit(cs, dispatch_initiator);
}
}
program->shader.compilation_failed)
return;
+ if (sctx->b.last_num_draw_calls != sctx->b.num_draw_calls) {
+ si_update_fb_dirtiness_after_rendering(sctx);
+ sctx->b.last_num_draw_calls = sctx->b.num_draw_calls;
+ }
+
si_decompress_compute_textures(sctx);
/* Add buffer sizes for memory checking in need_cs_space. */
si_need_cs_space(sctx);
+ if (sctx->b.log)
+ si_log_compute_state(sctx, sctx->b.log);
+
if (!sctx->cs_shader_state.initialized)
si_initialize_compute(sctx);
return;
si_upload_compute_shader_descriptors(sctx);
- si_emit_compute_shader_userdata(sctx);
+ si_emit_compute_shader_pointers(sctx);
if (si_is_atom_dirty(sctx, sctx->atoms.s.render_cond)) {
sctx->atoms.s.render_cond->emit(&sctx->b,
if (program->ir_type == PIPE_SHADER_IR_TGSI)
si_setup_tgsi_grid(sctx, info);
- si_ce_pre_draw_synchronization(sctx);
-
si_emit_dispatch_packets(sctx, info);
- si_ce_post_draw_synchronization(sctx);
+ if (unlikely(sctx->current_saved_cs))
+ si_trace_emit(sctx);
sctx->compute_is_busy = true;
sctx->b.num_compute_calls++;
sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
}
+void si_destroy_compute(struct si_compute *program)
+{
+ if (program->ir_type == PIPE_SHADER_IR_TGSI) {
+ util_queue_drop_job(&program->screen->shader_compiler_queue,
+ &program->ready);
+ util_queue_fence_destroy(&program->ready);
+ }
+
+ si_shader_destroy(&program->shader);
+ FREE(program);
+}
static void si_delete_compute_state(struct pipe_context *ctx, void* state){
struct si_compute *program = (struct si_compute *)state;
struct si_context *sctx = (struct si_context*)ctx;
- if (!state) {
+ if (!state)
return;
- }
-
- if (program->ir_type == PIPE_SHADER_IR_TGSI) {
- util_queue_drop_job(&sctx->screen->shader_compiler_queue,
- &program->ready);
- util_queue_fence_destroy(&program->ready);
- }
if (program == sctx->cs_shader_state.program)
sctx->cs_shader_state.program = NULL;
if (program == sctx->cs_shader_state.emitted_program)
sctx->cs_shader_state.emitted_program = NULL;
- si_shader_destroy(&program->shader);
- FREE(program);
+ si_compute_reference(&program, NULL);
}
static void si_set_compute_resources(struct pipe_context * ctx_,