}
ctx->buffer_resource_flags = std::vector<uint8_t>(resource_flag_count);
- nir_foreach_variable(var, &impl->function->shader->uniforms) {
- if (var->data.mode == nir_var_mem_ssbo && (var->data.access & ACCESS_RESTRICT)) {
+ nir_foreach_variable_with_modes(var, impl->function->shader, nir_var_mem_ssbo) {
+ if (var->data.access & ACCESS_RESTRICT) {
uint32_t offset = ctx->resource_flag_offsets[var->data.descriptor_set];
ctx->buffer_resource_flags[offset + var->data.binding] |= buffer_is_restrict;
}
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
- if (!(nir_intrinsic_infos[intrin->intrinsic].index_map[NIR_INTRINSIC_ACCESS]))
+ if (!nir_intrinsic_has_access(intrin))
continue;
nir_ssa_def *res = NULL;
res = intrin->src[0].ssa;
break;
case nir_intrinsic_store_ssbo:
- if (nir_src_is_divergent(intrin->src[2]) || ctx->program->chip_class < GFX8 ||
+ if (nir_src_is_divergent(intrin->src[2]) ||
+ ctx->program->chip_class < GFX8 || ctx->program->chip_class >= GFX10_3 ||
(intrin->src[0].ssa->bit_size < 32 && !can_subdword_ssbo_store_use_smem(intrin)))
flags |= glc ? has_glc_vmem_store : has_nonglc_vmem_store;
res = intrin->src[1].ssa;
case nir_op_fsub:
case nir_op_fmax:
case nir_op_fmin:
- case nir_op_fmax3:
- case nir_op_fmin3:
- case nir_op_fmed3:
case nir_op_fneg:
case nir_op_fabs:
case nir_op_fsat:
case nir_intrinsic_shared_atomic_xor:
case nir_intrinsic_shared_atomic_exchange:
case nir_intrinsic_shared_atomic_comp_swap:
+ case nir_intrinsic_shared_atomic_fadd:
case nir_intrinsic_load_scratch:
case nir_intrinsic_load_invocation_id:
case nir_intrinsic_load_primitive_id:
ctx->allocated.reset(allocated.release());
ctx->cf_info.nir_to_aco.reset(nir_to_aco.release());
+
+ /* align and copy constant data */
+ while (ctx->program->constant_data.size() % 4u)
+ ctx->program->constant_data.push_back(0);
+ ctx->constant_data_offset = ctx->program->constant_data.size();
+ ctx->program->constant_data.insert(ctx->program->constant_data.end(),
+ (uint8_t*)shader->constant_data,
+ (uint8_t*)shader->constant_data + shader->constant_data_size);
}
Pseudo_instruction *add_startpgm(struct isel_context *ctx)
void
setup_vs_variables(isel_context *ctx, nir_shader *nir)
{
- nir_foreach_variable(variable, &nir->inputs)
+ nir_foreach_shader_in_variable(variable, nir)
{
variable->data.driver_location = variable->data.location * 4;
}
- nir_foreach_variable(variable, &nir->outputs)
+ nir_foreach_shader_out_variable(variable, nir)
{
if (ctx->stage == vertex_vs || ctx->stage == ngg_vertex_gs)
variable->data.driver_location = variable->data.location * 4;
if (ctx->stage == vertex_geometry_gs || ctx->stage == tess_eval_geometry_gs)
ctx->program->config->lds_size = ctx->program->info->gs_ring_info.lds_size; /* Already in units of the alloc granularity */
- nir_foreach_variable(variable, &nir->outputs) {
+ nir_foreach_shader_out_variable(variable, nir) {
variable->data.driver_location = variable->data.location * 4;
}
ctx->args->options->chip_class,
ctx->args->options->family);
unsigned lds_size = calculate_tess_lds_size(
+ ctx->args->options->chip_class,
ctx->args->options->key.tcs.input_vertices,
nir->info.tess.tcs_vertices_out,
ctx->tcs_num_inputs,
ctx->tcs_num_patch_outputs);
ctx->args->shader_info->tcs.num_patches = ctx->tcs_num_patches;
- ctx->args->shader_info->tcs.lds_size = lds_size;
+ ctx->args->shader_info->tcs.num_lds_blocks = lds_size;
ctx->program->config->lds_size = (lds_size + ctx->program->lds_alloc_granule - 1) /
ctx->program->lds_alloc_granule;
}
void
setup_tcs_variables(isel_context *ctx, nir_shader *nir)
{
- nir_foreach_variable(variable, &nir->outputs) {
+ nir_foreach_shader_out_variable(variable, nir) {
assert(variable->data.location >= 0 && variable->data.location <= UINT8_MAX);
if (variable->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
ctx->tcs_num_patches = ctx->args->options->key.tes.num_patches;
ctx->tcs_num_outputs = ctx->program->info->tes.num_linked_inputs;
- nir_foreach_variable(variable, &nir->outputs) {
+ nir_foreach_shader_out_variable(variable, nir) {
if (ctx->stage == tess_eval_vs || ctx->stage == ngg_tess_eval_gs)
variable->data.driver_location = variable->data.location * 4;
}
{
switch (nir->info.stage) {
case MESA_SHADER_FRAGMENT: {
- nir_foreach_variable(variable, &nir->outputs)
+ nir_foreach_shader_out_variable(variable, nir)
{
int idx = variable->data.location + variable->data.index;
variable->data.driver_location = idx * 4;
void
setup_nir(isel_context *ctx, nir_shader *nir)
{
- Program *program = ctx->program;
-
- /* align and copy constant data */
- while (program->constant_data.size() % 4u)
- program->constant_data.push_back(0);
- ctx->constant_data_offset = program->constant_data.size();
- program->constant_data.insert(program->constant_data.end(),
- (uint8_t*)nir->constant_data,
- (uint8_t*)nir->constant_data + nir->constant_data_size);
-
/* the variable setup has to be done before lower_io / CSE */
setup_variables(ctx, nir);
if (nir->info.stage != MESA_SHADER_COMPUTE)
nir_lower_io(nir, (nir_variable_mode)(nir_var_shader_in | nir_var_shader_out), type_size, (nir_lower_io_options)0);
+ lower_to_scalar |= nir_opt_shrink_vectors(nir);
+
if (lower_to_scalar)
nir_lower_alu_to_scalar(nir, NULL, NULL);
if (lower_pack)
nir_lower_pack(nir);
/* lower ALU operations */
- nir_lower_int64(nir, nir->options->lower_int64_options);
+ nir_lower_int64(nir);
if (nir_lower_bit_size(nir, lower_bit_size_callback, NULL))
nir_copy_prop(nir); /* allow nir_opt_idiv_const() to optimize lowered divisions */
/* cleanup passes */
nir_lower_load_const_to_scalar(nir);
- nir_opt_shrink_load(nir);
nir_move_options move_opts = (nir_move_options)(
nir_move_const_undef | nir_move_load_ubo | nir_move_load_input |
nir_move_comparisons | nir_move_copies);
struct radv_shader_args *args,
bool is_gs_copy_shader)
{
- program->stage = 0;
+ Stage stage = 0;
for (unsigned i = 0; i < shader_count; i++) {
switch (shaders[i]->info.stage) {
case MESA_SHADER_VERTEX:
- program->stage |= sw_vs;
+ stage |= sw_vs;
break;
case MESA_SHADER_TESS_CTRL:
- program->stage |= sw_tcs;
+ stage |= sw_tcs;
break;
case MESA_SHADER_TESS_EVAL:
- program->stage |= sw_tes;
+ stage |= sw_tes;
break;
case MESA_SHADER_GEOMETRY:
- program->stage |= is_gs_copy_shader ? sw_gs_copy : sw_gs;
+ stage |= is_gs_copy_shader ? sw_gs_copy : sw_gs;
break;
case MESA_SHADER_FRAGMENT:
- program->stage |= sw_fs;
+ stage |= sw_fs;
break;
case MESA_SHADER_COMPUTE:
- program->stage |= sw_cs;
+ stage |= sw_cs;
break;
default:
unreachable("Shader stage not implemented");
}
bool gfx9_plus = args->options->chip_class >= GFX9;
bool ngg = args->shader_info->is_ngg && args->options->chip_class >= GFX10;
- if (program->stage == sw_vs && args->shader_info->vs.as_es && !ngg)
- program->stage |= hw_es;
- else if (program->stage == sw_vs && !args->shader_info->vs.as_ls && !ngg)
- program->stage |= hw_vs;
- else if (program->stage == sw_vs && ngg)
- program->stage |= hw_ngg_gs; /* GFX10/NGG: VS without GS uses the HW GS stage */
- else if (program->stage == sw_gs)
- program->stage |= hw_gs;
- else if (program->stage == sw_fs)
- program->stage |= hw_fs;
- else if (program->stage == sw_cs)
- program->stage |= hw_cs;
- else if (program->stage == sw_gs_copy)
- program->stage |= hw_vs;
- else if (program->stage == (sw_vs | sw_gs) && gfx9_plus && !ngg)
- program->stage |= hw_gs;
- else if (program->stage == sw_vs && args->shader_info->vs.as_ls)
- program->stage |= hw_ls; /* GFX6-8: VS is a Local Shader, when tessellation is used */
- else if (program->stage == sw_tcs)
- program->stage |= hw_hs; /* GFX6-8: TCS is a Hull Shader */
- else if (program->stage == (sw_vs | sw_tcs))
- program->stage |= hw_hs; /* GFX9-10: VS+TCS merged into a Hull Shader */
- else if (program->stage == sw_tes && !args->shader_info->tes.as_es && !ngg)
- program->stage |= hw_vs; /* GFX6-9: TES without GS uses the HW VS stage (and GFX10/legacy) */
- else if (program->stage == sw_tes && !args->shader_info->tes.as_es && ngg)
- program->stage |= hw_ngg_gs; /* GFX10/NGG: TES without GS uses the HW GS stage */
- else if (program->stage == sw_tes && args->shader_info->tes.as_es && !ngg)
- program->stage |= hw_es; /* GFX6-8: TES is an Export Shader */
- else if (program->stage == (sw_tes | sw_gs) && gfx9_plus && !ngg)
- program->stage |= hw_gs; /* GFX9: TES+GS merged into a GS (and GFX10/legacy) */
+ if (stage == sw_vs && args->shader_info->vs.as_es && !ngg)
+ stage |= hw_es;
+ else if (stage == sw_vs && !args->shader_info->vs.as_ls && !ngg)
+ stage |= hw_vs;
+ else if (stage == sw_vs && ngg)
+ stage |= hw_ngg_gs; /* GFX10/NGG: VS without GS uses the HW GS stage */
+ else if (stage == sw_gs)
+ stage |= hw_gs;
+ else if (stage == sw_fs)
+ stage |= hw_fs;
+ else if (stage == sw_cs)
+ stage |= hw_cs;
+ else if (stage == sw_gs_copy)
+ stage |= hw_vs;
+ else if (stage == (sw_vs | sw_gs) && gfx9_plus && !ngg)
+ stage |= hw_gs;
+ else if (stage == sw_vs && args->shader_info->vs.as_ls)
+ stage |= hw_ls; /* GFX6-8: VS is a Local Shader, when tessellation is used */
+ else if (stage == sw_tcs)
+ stage |= hw_hs; /* GFX6-8: TCS is a Hull Shader */
+ else if (stage == (sw_vs | sw_tcs))
+ stage |= hw_hs; /* GFX9-10: VS+TCS merged into a Hull Shader */
+ else if (stage == sw_tes && !args->shader_info->tes.as_es && !ngg)
+ stage |= hw_vs; /* GFX6-9: TES without GS uses the HW VS stage (and GFX10/legacy) */
+ else if (stage == sw_tes && !args->shader_info->tes.as_es && ngg)
+ stage |= hw_ngg_gs; /* GFX10/NGG: TES without GS uses the HW GS stage */
+ else if (stage == sw_tes && args->shader_info->tes.as_es && !ngg)
+ stage |= hw_es; /* GFX6-8: TES is an Export Shader */
+ else if (stage == (sw_tes | sw_gs) && gfx9_plus && !ngg)
+ stage |= hw_gs; /* GFX9: TES+GS merged into a GS (and GFX10/legacy) */
else
unreachable("Shader stage not implemented");
- program->config = config;
- program->info = args->shader_info;
- program->chip_class = args->options->chip_class;
- program->family = args->options->family;
- program->wave_size = args->shader_info->wave_size;
- program->lane_mask = program->wave_size == 32 ? s1 : s2;
-
- program->lds_alloc_granule = args->options->chip_class >= GFX7 ? 512 : 256;
- program->lds_limit = args->options->chip_class >= GFX7 ? 65536 : 32768;
- /* apparently gfx702 also has 16-bank LDS but I can't find a family for that */
- program->has_16bank_lds = args->options->family == CHIP_KABINI || args->options->family == CHIP_STONEY;
-
- program->vgpr_limit = 256;
- program->vgpr_alloc_granule = 3;
-
- if (args->options->chip_class >= GFX10) {
- program->physical_sgprs = 2560; /* doesn't matter as long as it's at least 128 * 20 */
- program->sgpr_alloc_granule = 127;
- program->sgpr_limit = 106;
- program->vgpr_alloc_granule = program->wave_size == 32 ? 7 : 3;
- } else if (program->chip_class >= GFX8) {
- program->physical_sgprs = 800;
- program->sgpr_alloc_granule = 15;
- if (args->options->family == CHIP_TONGA || args->options->family == CHIP_ICELAND)
- program->sgpr_limit = 94; /* workaround hardware bug */
- else
- program->sgpr_limit = 102;
- } else {
- program->physical_sgprs = 512;
- program->sgpr_alloc_granule = 7;
- program->sgpr_limit = 104;
- }
+ init_program(program, stage, args->shader_info,
+ args->options->chip_class, args->options->family, config);
isel_context ctx = {};
ctx.program = program;