gallium/radeon: add driver queries for compute/dma call stats and spills
[mesa.git] / src / gallium / drivers / radeonsi / si_compute.c
index 557e89236fa62010ede4f7763630c0244e574751..2f7e1721c89b98f764326e64a4335a61fc58bf0e 100644 (file)
@@ -70,6 +70,7 @@ static void *si_create_compute_state(
 
                sel.tokens = tgsi_dup_tokens(cso->prog);
                if (!sel.tokens) {
+                       FREE(program);
                        return NULL;
                }
 
@@ -81,14 +82,21 @@ static void *si_create_compute_state(
 
                program->shader.selector = &sel;
 
-               if (si_compile_tgsi_shader(sscreen, sctx->tm, &program->shader,
-                                          true, &sctx->b.debug)) {
+               if (si_shader_create(sscreen, sctx->tm, &program->shader,
+                                    &sctx->b.debug)) {
                        FREE(sel.tokens);
+                       FREE(program);
                        return NULL;
                }
 
                scratch_enabled = shader->config.scratch_bytes_per_wave > 0;
 
+               shader->config.rsrc1 =
+                          S_00B848_VGPRS((shader->config.num_vgprs - 1) / 4) |
+                          S_00B848_SGPRS((shader->config.num_sgprs - 1) / 8) |
+                          S_00B848_DX10_CLAMP(1) |
+                          S_00B848_FLOAT_MODE(shader->config.float_mode);
+
                shader->config.rsrc2 = S_00B84C_USER_SGPR(SI_CS_NUM_USER_SGPR) |
                           S_00B84C_SCRATCH_EN(scratch_enabled) |
                           S_00B84C_TGID_X_EN(1) | S_00B84C_TGID_Y_EN(1) |
@@ -105,10 +113,10 @@ static void *si_create_compute_state(
                radeon_elf_read(code, header->num_bytes, &program->shader.binary);
                si_shader_binary_read_config(&program->shader.binary,
                             &program->shader.config, 0);
+               si_shader_dump(sctx->screen, &program->shader, &sctx->b.debug,
+                              PIPE_SHADER_COMPUTE, stderr);
+               si_shader_binary_upload(sctx->screen, &program->shader);
        }
-       si_shader_dump(sctx->screen, &program->shader, &sctx->b.debug,
-                      TGSI_PROCESSOR_COMPUTE, stderr);
-       si_shader_binary_upload(sctx->screen, &program->shader);
 
        return program;
 }
@@ -215,7 +223,7 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx,
        if (sctx->compute_scratch_buffer != shader->scratch_bo && scratch_needed) {
                uint64_t scratch_va = sctx->compute_scratch_buffer->gpu_address;
 
-               si_shader_apply_scratch_relocs(sctx, shader, scratch_va);
+               si_shader_apply_scratch_relocs(sctx, shader, config, scratch_va);
 
                if (si_shader_binary_upload(sctx->screen, shader))
                        return false;
@@ -300,6 +308,8 @@ static bool si_switch_compute_shader(struct si_context *sctx,
 
        sctx->cs_shader_state.emitted_program = program;
        sctx->cs_shader_state.offset = offset;
+       sctx->cs_shader_state.uses_scratch =
+               config->scratch_bytes_per_wave != 0;
 
        return true;
 }
@@ -479,6 +489,10 @@ static void si_launch_grid(
        si_emit_dispatch_packets(sctx, info);
 
        si_ce_post_draw_synchronization(sctx);
+
+       sctx->b.num_compute_calls++;
+       if (sctx->cs_shader_state.uses_scratch)
+               sctx->b.num_spill_compute_calls++;
 }