gallium: add AMD-specific compute TGSI enums
[mesa.git] / src / gallium / drivers / radeonsi / si_compute.c
index c0b59915b1ff9b01e6bedfcfd4a597c88fab3471..22975069c999be515daf341c9d3031e9c8910a78 100644 (file)
@@ -23,6 +23,7 @@
  *
  */
 
+#include "nir/tgsi_to_nir.h"
 #include "tgsi/tgsi_parse.h"
 #include "util/u_async_debug.h"
 #include "util/u_memory.h"
@@ -68,6 +69,7 @@ static const amd_kernel_code_t *si_compute_get_code_object(
        if (!ac_rtld_open(&rtld, (struct ac_rtld_open_info){
                        .info = &sel->screen->info,
                        .shader_type = MESA_SHADER_COMPUTE,
+                       .wave_size = sel->screen->compute_wave_size,
                        .num_parts = 1,
                        .elf_ptrs = &program->shader.binary.elf_buffer,
                        .elf_sizes = &program->shader.binary.elf_size }))
@@ -126,7 +128,7 @@ static void si_create_compute_state_async(void *job, int thread_index)
 
                si_nir_opts(sel->nir);
                si_nir_scan_shader(sel->nir, &sel->info);
-               si_lower_nir(sel);
+               si_lower_nir(sel, sscreen->compute_wave_size);
        }
 
        /* Store the declared LDS size into tgsi_shader_info for the shader
@@ -143,7 +145,7 @@ static void si_create_compute_state_async(void *job, int thread_index)
                sel->info.uses_block_size &&
                sel->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0;
        program->num_cs_user_data_dwords =
-               sel->info.properties[TGSI_PROPERTY_CS_USER_DATA_DWORDS];
+               sel->info.properties[TGSI_PROPERTY_CS_USER_DATA_COMPONENTS_AMD];
 
        void *ir_binary = si_get_ir_binary(sel);
 
@@ -177,7 +179,8 @@ static void si_create_compute_state_async(void *job, int thread_index)
                                      program->num_cs_user_data_dwords;
 
                shader->config.rsrc1 =
-                       S_00B848_VGPRS((shader->config.num_vgprs - 1) / 4) |
+                       S_00B848_VGPRS((shader->config.num_vgprs - 1) /
+                                      (sscreen->compute_wave_size == 32 ? 8 : 4)) |
                        S_00B848_DX10_CLAMP(1) |
                        S_00B848_MEM_ORDERED(sscreen->info.chip_class >= GFX10) |
                        S_00B848_WGP_MODE(sscreen->info.chip_class >= GFX10) |
@@ -229,7 +232,11 @@ static void *si_create_compute_state(
        program->input_size = cso->req_input_mem;
 
        if (cso->ir_type != PIPE_SHADER_IR_NATIVE) {
-               if (cso->ir_type == PIPE_SHADER_IR_TGSI) {
+               if (sscreen->options.always_nir &&
+                   cso->ir_type == PIPE_SHADER_IR_TGSI) {
+                       program->ir_type = PIPE_SHADER_IR_NIR;
+                       sel->nir = tgsi_to_nir(cso->prog, ctx->screen);
+               } else if (cso->ir_type == PIPE_SHADER_IR_TGSI) {
                        sel->tokens = tgsi_dup_tokens(cso->prog);
                        if (!sel->tokens) {
                                FREE(program);
@@ -745,7 +752,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx,
        unsigned threads_per_threadgroup =
                info->block[0] * info->block[1] * info->block[2];
        unsigned waves_per_threadgroup =
-               DIV_ROUND_UP(threads_per_threadgroup, 64);
+               DIV_ROUND_UP(threads_per_threadgroup, sscreen->compute_wave_size);
        unsigned threadgroups_per_cu = 1;
 
        if (sctx->chip_class >= GFX10 && waves_per_threadgroup == 1)
@@ -762,7 +769,8 @@ static void si_emit_dispatch_packets(struct si_context *sctx,
                S_00B800_FORCE_START_AT_000(1) |
                /* If the KMD allows it (there is a KMD hw register for it),
                 * allow launching waves out-of-order. (same as Vulkan) */
-               S_00B800_ORDER_MODE(sctx->chip_class >= GFX7);
+               S_00B800_ORDER_MODE(sctx->chip_class >= GFX7) |
+               S_00B800_CS_W32_EN(sscreen->compute_wave_size == 32);
 
        const uint *last_block = info->last_block;
        bool partial_block_en = last_block[0] || last_block[1] || last_block[2];
@@ -945,6 +953,7 @@ void si_destroy_compute(struct si_compute *program)
        }
 
        si_shader_destroy(&program->shader);
+       ralloc_free(program->sel.nir);
        FREE(program);
 }
 
@@ -961,7 +970,6 @@ static void si_delete_compute_state(struct pipe_context *ctx, void* state){
        if (program == sctx->cs_shader_state.emitted_program)
                sctx->cs_shader_state.emitted_program = NULL;
 
-       ralloc_free(program->sel.nir);
        si_compute_reference(&program, NULL);
 }