/* Currently not implemented on GFX6-7 */
assert(ctx->options->chip_class >= GFX8);
- if (ctx->options->chip_class <= GFX9 || ctx->options->wave_size == 32) {
+ if (ctx->options->chip_class <= GFX9 || ctx->program->wave_size == 32) {
return bld.ds(aco_opcode::ds_bpermute_b32, bld.def(v1), index_x4, data);
}
scratch_addr = bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), scratch_addr, Operand(0u));
uint32_t rsrc_conf = S_008F0C_ADD_TID_ENABLE(1) |
- S_008F0C_INDEX_STRIDE(ctx->options->wave_size == 64 ? 3 : 2);;
+ S_008F0C_INDEX_STRIDE(ctx->program->wave_size == 64 ? 3 : 2);;
if (ctx->program->chip_class >= GFX10) {
rsrc_conf |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
program->info = info;
program->chip_class = options->chip_class;
program->family = options->family;
- program->wave_size = options->wave_size;
+ program->wave_size = info->wave_size;
program->lds_alloc_granule = options->chip_class >= GFX7 ? 512 : 256;
program->lds_limit = options->chip_class >= GFX7 ? 65536 : 32768;
for (unsigned i = 0; i < shader_count; i++)
scratch_size = std::max(scratch_size, shaders[i]->scratch_size);
ctx.scratch_enabled = scratch_size > 0;
- ctx.program->config->scratch_bytes_per_wave = align(scratch_size * ctx.options->wave_size, 1024);
+ ctx.program->config->scratch_bytes_per_wave = align(scratch_size * ctx.program->wave_size, 1024);
ctx.program->config->float_mode = V_00B028_FP_64_DENORMS;
- ctx.program->info->wave_size = ctx.options->wave_size;
ctx.block = ctx.program->create_and_insert_block();
ctx.block->loop_nest_depth = 0;
}
ac_llvm_context_init(&ctx.ac, ac_llvm, options->chip_class,
- options->family, float_mode, options->wave_size, 64);
+ options->family, float_mode, shader_info->wave_size, 64);
ctx.context = ctx.ac.context;
for (i = 0; i < MAX_SETS; i++)
shader_info->gs.es_type = nir[0]->info.stage;
}
}
- shader_info->wave_size = options->wave_size;
}
static void
keys[MESA_SHADER_FRAGMENT].fs.num_samples = key->num_samples;
}
+static uint8_t
+radv_get_wave_size(struct radv_device *device,
+ gl_shader_stage stage,
+ const struct radv_shader_variant_key *key)
+{
+ if (stage == MESA_SHADER_GEOMETRY && !key->vs_common_out.as_ngg)
+ return 64;
+ else if (stage == MESA_SHADER_COMPUTE)
+ return device->physical_device->cs_wave_size;
+ else if (stage == MESA_SHADER_FRAGMENT)
+ return device->physical_device->ps_wave_size;
+ else
+ return device->physical_device->ge_wave_size;
+}
+
static void
radv_fill_shader_info(struct radv_pipeline *pipeline,
struct radv_shader_variant_key *keys,
radv_nir_shader_info_pass(nir[i], pipeline->layout,
&keys[i], &infos[i]);
}
+
+ for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+ if (nir[i])
+ infos[i].wave_size =
+ radv_get_wave_size(pipeline->device, i, &keys[i]);
+ }
}
static void
radv_nir_shader_info_pass(nir[MESA_SHADER_GEOMETRY],
pipeline->layout, &key,
&info);
+ info.wave_size = 64; /* Wave32 not supported. */
pipeline->gs_copy_shader = radv_create_gs_copy_shader(
device, nir[MESA_SHADER_GEOMETRY], &info,
options->has_ls_vgpr_init_bug = device->physical_device->rad_info.has_ls_vgpr_init_bug;
options->use_ngg_streamout = device->physical_device->use_ngg_streamout;
- if ((stage == MESA_SHADER_GEOMETRY && !options->key.vs_common_out.as_ngg) ||
- gs_copy_shader)
- options->wave_size = 64;
- else if (stage == MESA_SHADER_COMPUTE)
- options->wave_size = device->physical_device->cs_wave_size;
- else if (stage == MESA_SHADER_FRAGMENT)
- options->wave_size = device->physical_device->ps_wave_size;
- else
- options->wave_size = device->physical_device->ge_wave_size;
-
if (!use_aco || options->dump_shader || options->record_ir)
ac_init_llvm_once();
radv_init_llvm_compiler(&ac_llvm,
thread_compiler,
chip_family, tm_options,
- options->wave_size);
+ info->wave_size);
if (gs_copy_shader) {
assert(shader_count == 1);
enum chip_class chip_class;
uint32_t tess_offchip_block_dw_size;
uint32_t address32_hi;
- uint8_t wave_size;
};
enum radv_ud_index {