program->sgpr_alloc_granule = 7;
program->sgpr_limit = 104;
}
+
/* TODO: we don't have to allocate VCC if we don't need it */
program->needs_vcc = true;
+ calc_min_waves(program);
+ program->vgpr_limit = get_addr_vgpr_from_waves(program, program->min_waves);
+ program->sgpr_limit = get_addr_sgpr_from_waves(program, program->min_waves);
+
isel_context ctx = {};
ctx.program = program;
ctx.args = args;
Temp private_segment_buffer;
Temp scratch_offset;
+ uint16_t min_waves = 0;
uint16_t lds_alloc_granule;
uint32_t lds_limit; /* in bytes */
uint16_t vgpr_limit;
void lower_wqm(Program* program, live& live_vars,
const struct radv_nir_compiler_options *options);
void lower_bool_phis(Program* program);
+void calc_min_waves(Program* program);
void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand);
live live_var_analysis(Program* program, const struct radv_nir_compiler_options *options);
std::vector<uint16_t> dead_code_analysis(Program *program);
assert(block->index != 0 || new_demand == RegisterDemand());
}
+
+unsigned calc_waves_per_workgroup(Program *program)
+{
+ unsigned workgroup_size = program->wave_size;
+ if (program->stage == compute_cs) {
+ unsigned* bsize = program->info->cs.block_size;
+ workgroup_size = bsize[0] * bsize[1] * bsize[2];
+ }
+ return align(workgroup_size, program->wave_size) / program->wave_size;
+}
} /* end namespace */
uint16_t get_extra_sgprs(Program *program)
return std::min(vgprs, program->vgpr_limit);
}
+void calc_min_waves(Program* program)
+{
+ unsigned waves_per_workgroup = calc_waves_per_workgroup(program);
+ /* currently min_waves is in wave64 waves */
+ if (program->wave_size == 32)
+ waves_per_workgroup = DIV_ROUND_UP(waves_per_workgroup, 2);
+
+ unsigned simd_per_cu = 4; /* TODO: different on Navi */
+ bool wgp = program->chip_class >= GFX10; /* assume WGP is used on Navi */
+ unsigned simd_per_cu_wgp = wgp ? simd_per_cu * 2 : simd_per_cu;
+
+ program->min_waves = DIV_ROUND_UP(waves_per_workgroup, simd_per_cu_wgp);
+}
+
void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand)
{
/* TODO: max_waves_per_simd, simd_per_cu and the number of physical vgprs for Navi */
program->max_waves = max_waves_per_simd;
/* adjust max_waves for workgroup and LDS limits */
- unsigned workgroup_size = program->wave_size;
- if (program->stage == compute_cs) {
- unsigned* bsize = program->info->cs.block_size;
- workgroup_size = bsize[0] * bsize[1] * bsize[2];
- }
- unsigned waves_per_workgroup = align(workgroup_size, program->wave_size) / program->wave_size;
-
+ unsigned waves_per_workgroup = calc_waves_per_workgroup(program);
unsigned workgroups_per_cu_wgp = max_waves_per_simd * simd_per_cu_wgp / waves_per_workgroup;
if (program->config->lds_size) {
unsigned lds = program->config->lds_size * program->lds_alloc_granule;