*
*/
+#include "nir/tgsi_to_nir.h"
#include "tgsi/tgsi_parse.h"
#include "util/u_async_debug.h"
#include "util/u_memory.h"
if (!ac_rtld_open(&rtld, (struct ac_rtld_open_info){
.info = &sel->screen->info,
.shader_type = MESA_SHADER_COMPUTE,
+ .wave_size = sel->screen->compute_wave_size,
.num_parts = 1,
.elf_ptrs = &program->shader.binary.elf_buffer,
.elf_sizes = &program->shader.binary.elf_size }))
si_nir_opts(sel->nir);
si_nir_scan_shader(sel->nir, &sel->info);
- si_lower_nir(sel);
+ si_lower_nir(sel, sscreen->compute_wave_size);
}
/* Store the declared LDS size into tgsi_shader_info for the shader
sel->info.uses_block_size &&
sel->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0;
program->num_cs_user_data_dwords =
- sel->info.properties[TGSI_PROPERTY_CS_USER_DATA_DWORDS];
+ sel->info.properties[TGSI_PROPERTY_CS_USER_DATA_COMPONENTS_AMD];
void *ir_binary = si_get_ir_binary(sel);
program->num_cs_user_data_dwords;
shader->config.rsrc1 =
- S_00B848_VGPRS((shader->config.num_vgprs - 1) / 4) |
+ S_00B848_VGPRS((shader->config.num_vgprs - 1) /
+ (sscreen->compute_wave_size == 32 ? 8 : 4)) |
S_00B848_DX10_CLAMP(1) |
S_00B848_MEM_ORDERED(sscreen->info.chip_class >= GFX10) |
S_00B848_WGP_MODE(sscreen->info.chip_class >= GFX10) |
program->input_size = cso->req_input_mem;
if (cso->ir_type != PIPE_SHADER_IR_NATIVE) {
- if (cso->ir_type == PIPE_SHADER_IR_TGSI) {
+ if (sscreen->options.always_nir &&
+ cso->ir_type == PIPE_SHADER_IR_TGSI) {
+ program->ir_type = PIPE_SHADER_IR_NIR;
+ sel->nir = tgsi_to_nir(cso->prog, ctx->screen);
+ } else if (cso->ir_type == PIPE_SHADER_IR_TGSI) {
sel->tokens = tgsi_dup_tokens(cso->prog);
if (!sel->tokens) {
FREE(program);
unsigned threads_per_threadgroup =
info->block[0] * info->block[1] * info->block[2];
unsigned waves_per_threadgroup =
- DIV_ROUND_UP(threads_per_threadgroup, 64);
+ DIV_ROUND_UP(threads_per_threadgroup, sscreen->compute_wave_size);
unsigned threadgroups_per_cu = 1;
if (sctx->chip_class >= GFX10 && waves_per_threadgroup == 1)
S_00B800_FORCE_START_AT_000(1) |
/* If the KMD allows it (there is a KMD hw register for it),
* allow launching waves out-of-order. (same as Vulkan) */
- S_00B800_ORDER_MODE(sctx->chip_class >= GFX7);
+ S_00B800_ORDER_MODE(sctx->chip_class >= GFX7) |
+ S_00B800_CS_W32_EN(sscreen->compute_wave_size == 32);
const uint *last_block = info->last_block;
bool partial_block_en = last_block[0] || last_block[1] || last_block[2];
}
si_shader_destroy(&program->shader);
+ ralloc_free(program->sel.nir);
FREE(program);
}
if (program == sctx->cs_shader_state.emitted_program)
sctx->cs_shader_state.emitted_program = NULL;
- ralloc_free(program->sel.nir);
si_compute_reference(&program, NULL);
}