RADEON_LLVM_AMDGPU_GS = 88,
RADEON_LLVM_AMDGPU_PS = 89,
RADEON_LLVM_AMDGPU_CS = 90,
+ RADEON_LLVM_AMDGPU_HS = 93,
};
#define CONST_ADDR_SPACE 2
LLVMValueRef tcs_out_layout;
LLVMValueRef tcs_in_layout;
LLVMValueRef oc_lds;
+ LLVMValueRef merged_wave_info;
LLVMValueRef tess_factor_offset;
LLVMValueRef tcs_patch_id;
LLVMValueRef tcs_rel_ids;
switch (stage) {
case MESA_SHADER_VERTEX:
- case MESA_SHADER_TESS_CTRL:
case MESA_SHADER_TESS_EVAL:
calling_conv = RADEON_LLVM_AMDGPU_VS;
break;
case MESA_SHADER_GEOMETRY:
calling_conv = RADEON_LLVM_AMDGPU_GS;
break;
+ case MESA_SHADER_TESS_CTRL:
+ calling_conv = HAVE_LLVM >= 0x0500 ? RADEON_LLVM_AMDGPU_HS : RADEON_LLVM_AMDGPU_VS;
+ break;
case MESA_SHADER_FRAGMENT:
calling_conv = RADEON_LLVM_AMDGPU_PS;
break;
}
}
-static void create_function(struct nir_to_llvm_context *ctx)
+static void
+radv_define_common_user_sgprs_phase1(struct nir_to_llvm_context *ctx,
+ gl_shader_stage stage,
+ bool has_previous_stage,
+ gl_shader_stage previous_stage,
+ const struct user_sgpr_info *user_sgpr_info,
+ struct arg_info *args,
+ LLVMValueRef *desc_sets)
{
unsigned num_sets = ctx->options->layout ? ctx->options->layout->num_sets : 0;
- uint8_t user_sgpr_idx;
- struct user_sgpr_info user_sgpr_info;
- struct arg_info args = {};
- LLVMValueRef desc_sets;
-
- allocate_user_sgprs(ctx, &user_sgpr_info);
- if (user_sgpr_info.need_ring_offsets && !ctx->options->supports_spill) {
- add_user_sgpr_argument(&args, const_array(ctx->v4i32, 16), &ctx->ring_offsets); /* address of rings */
- }
+ unsigned stage_mask = 1 << stage;
+ if (has_previous_stage)
+ stage_mask |= 1 << previous_stage;
/* 1 for each descriptor set */
- if (!user_sgpr_info.indirect_all_descriptor_sets) {
+ if (!user_sgpr_info->indirect_all_descriptor_sets) {
for (unsigned i = 0; i < num_sets; ++i) {
- if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
- add_user_sgpr_array_argument(&args, const_array(ctx->i8, 1024 * 1024), &ctx->descriptor_sets[i]);
+ if (ctx->options->layout->set[i].layout->shader_stages & stage_mask) {
+ add_user_sgpr_array_argument(args, const_array(ctx->i8, 1024 * 1024), &ctx->descriptor_sets[i]);
}
}
} else
- add_user_sgpr_array_argument(&args, const_array(const_array(ctx->i8, 1024 * 1024), 32), &desc_sets);
+ add_user_sgpr_array_argument(args, const_array(const_array(ctx->i8, 1024 * 1024), 32), desc_sets);
if (ctx->shader_info->info.needs_push_constants) {
/* 1 for push constants and dynamic descriptors */
- add_user_sgpr_array_argument(&args, const_array(ctx->i8, 1024 * 1024), &ctx->push_constants);
+ add_user_sgpr_array_argument(args, const_array(ctx->i8, 1024 * 1024), &ctx->push_constants);
}
+}
- switch (ctx->stage) {
+static void
+radv_define_common_user_sgprs_phase2(struct nir_to_llvm_context *ctx,
+ gl_shader_stage stage,
+ bool has_previous_stage,
+ gl_shader_stage previous_stage,
+ const struct user_sgpr_info *user_sgpr_info,
+ LLVMValueRef desc_sets,
+ uint8_t *user_sgpr_idx)
+{
+ unsigned num_sets = ctx->options->layout ? ctx->options->layout->num_sets : 0;
+ unsigned stage_mask = 1 << stage;
+ if (has_previous_stage)
+ stage_mask |= 1 << previous_stage;
+
+ if (!user_sgpr_info->indirect_all_descriptor_sets) {
+ for (unsigned i = 0; i < num_sets; ++i) {
+ if (ctx->options->layout->set[i].layout->shader_stages & stage_mask) {
+ set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], user_sgpr_idx, 2);
+ } else
+ ctx->descriptor_sets[i] = NULL;
+ }
+ } else {
+ uint32_t desc_sgpr_idx = *user_sgpr_idx;
+ set_userdata_location_shader(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS, user_sgpr_idx, 2);
+
+ for (unsigned i = 0; i < num_sets; ++i) {
+ if (ctx->options->layout->set[i].layout->shader_stages & stage_mask) {
+ set_userdata_location_indirect(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], desc_sgpr_idx, 2, i * 8);
+ ctx->descriptor_sets[i] = ac_build_load_to_sgpr(&ctx->ac, desc_sets, LLVMConstInt(ctx->i32, i, false));
+
+ } else
+ ctx->descriptor_sets[i] = NULL;
+ }
+ ctx->shader_info->need_indirect_descriptor_sets = true;
+ }
+
+ if (ctx->shader_info->info.needs_push_constants) {
+ set_userdata_location_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2);
+ }
+}
+
+static void
+radv_define_vs_user_sgprs_phase1(struct nir_to_llvm_context *ctx,
+ gl_shader_stage stage,
+ bool has_previous_stage,
+ gl_shader_stage previous_stage,
+ struct arg_info *args)
+{
+ if (!ctx->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX || (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
+ if (ctx->shader_info->info.vs.has_vertex_buffers)
+ add_user_sgpr_argument(args, const_array(ctx->v4i32, 16), &ctx->vertex_buffers); /* vertex buffers */
+ add_user_sgpr_argument(args, ctx->i32, &ctx->abi.base_vertex); // base vertex
+ add_user_sgpr_argument(args, ctx->i32, &ctx->abi.start_instance);// start instance
+ if (ctx->shader_info->info.vs.needs_draw_id)
+ add_user_sgpr_argument(args, ctx->i32, &ctx->abi.draw_id); // draw id
+ }
+}
+
+static void
+radv_define_vs_user_sgprs_phase2(struct nir_to_llvm_context *ctx,
+ gl_shader_stage stage,
+ bool has_previous_stage,
+ gl_shader_stage previous_stage,
+ uint8_t *user_sgpr_idx)
+{
+ if (!ctx->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX || (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
+ if (ctx->shader_info->info.vs.has_vertex_buffers) {
+ set_userdata_location_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx, 2);
+ }
+ unsigned vs_num = 2;
+ if (ctx->shader_info->info.vs.needs_draw_id)
+ vs_num++;
+
+ set_userdata_location_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, vs_num);
+ }
+}
+
+
+static void create_function(struct nir_to_llvm_context *ctx,
+ gl_shader_stage stage,
+ bool has_previous_stage,
+ gl_shader_stage previous_stage)
+{
+ uint8_t user_sgpr_idx;
+ struct user_sgpr_info user_sgpr_info;
+ struct arg_info args = {};
+ LLVMValueRef desc_sets;
+
+ allocate_user_sgprs(ctx, &user_sgpr_info);
+
+ if (user_sgpr_info.need_ring_offsets && !ctx->options->supports_spill) {
+ add_user_sgpr_argument(&args, const_array(ctx->v4i32, 16), &ctx->ring_offsets); /* address of rings */
+ }
+
+ switch (stage) {
case MESA_SHADER_COMPUTE:
+ radv_define_common_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_info, &args, &desc_sets);
if (ctx->shader_info->info.cs.grid_components_used)
add_user_sgpr_argument(&args, LLVMVectorType(ctx->i32, ctx->shader_info->info.cs.grid_components_used), &ctx->num_work_groups); /* grid size */
add_sgpr_argument(&args, LLVMVectorType(ctx->i32, 3), &ctx->workgroup_ids);
add_vgpr_argument(&args, LLVMVectorType(ctx->i32, 3), &ctx->local_invocation_ids);
break;
case MESA_SHADER_VERTEX:
- if (!ctx->is_gs_copy_shader) {
- if (ctx->shader_info->info.vs.has_vertex_buffers)
- add_user_sgpr_argument(&args, const_array(ctx->v4i32, 16), &ctx->vertex_buffers); /* vertex buffers */
- add_user_sgpr_argument(&args, ctx->i32, &ctx->abi.base_vertex); // base vertex
- add_user_sgpr_argument(&args, ctx->i32, &ctx->abi.start_instance);// start instance
- if (ctx->shader_info->info.vs.needs_draw_id)
- add_user_sgpr_argument(&args, ctx->i32, &ctx->abi.draw_id); // draw id
- }
+ radv_define_common_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_info, &args, &desc_sets);
+ radv_define_vs_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &args);
if (ctx->shader_info->info.needs_multiview_view_index || (!ctx->options->key.vs.as_es && !ctx->options->key.vs.as_ls && ctx->options->key.has_multiview_view_index))
add_user_sgpr_argument(&args, ctx->i32, &ctx->view_index);
if (ctx->options->key.vs.as_es)
}
break;
case MESA_SHADER_TESS_CTRL:
- add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_offchip_layout); // tcs offchip layout
- add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_out_offsets); // tcs out offsets
- add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_out_layout); // tcs out layout
- add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_in_layout); // tcs in layout
- if (ctx->shader_info->info.needs_multiview_view_index)
- add_user_sgpr_argument(&args, ctx->i32, &ctx->view_index);
- add_sgpr_argument(&args, ctx->i32, &ctx->oc_lds); // param oc lds
- add_sgpr_argument(&args, ctx->i32, &ctx->tess_factor_offset); // tess factor offset
- add_vgpr_argument(&args, ctx->i32, &ctx->tcs_patch_id); // patch id
- add_vgpr_argument(&args, ctx->i32, &ctx->tcs_rel_ids); // rel ids;
+ if (has_previous_stage) {
+ // First 6 system regs
+ add_sgpr_argument(&args, ctx->i32, &ctx->oc_lds); // param oc lds
+ add_sgpr_argument(&args, ctx->i32, &ctx->merged_wave_info); // merged wave info
+ add_sgpr_argument(&args, ctx->i32, &ctx->tess_factor_offset); // tess factor offset
+
+ add_sgpr_argument(&args, ctx->i32, NULL); // scratch offset
+ add_sgpr_argument(&args, ctx->i32, NULL); // unknown
+ add_sgpr_argument(&args, ctx->i32, NULL); // unknown
+
+ radv_define_common_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_info, &args, &desc_sets);
+ radv_define_vs_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &args);
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->ls_out_layout); // ls out layout
+
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_offchip_layout); // tcs offchip layout
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_out_offsets); // tcs out offsets
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_out_layout); // tcs out layout
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_in_layout); // tcs in layout
+ if (ctx->shader_info->info.needs_multiview_view_index)
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->view_index);
+
+ add_vgpr_argument(&args, ctx->i32, &ctx->tcs_patch_id); // patch id
+ add_vgpr_argument(&args, ctx->i32, &ctx->tcs_rel_ids); // rel ids;
+ add_vgpr_argument(&args, ctx->i32, &ctx->abi.vertex_id); // vertex id
+ add_vgpr_argument(&args, ctx->i32, &ctx->rel_auto_id); // rel auto id
+ add_vgpr_argument(&args, ctx->i32, &ctx->vs_prim_id); // vs prim id
+ add_vgpr_argument(&args, ctx->i32, &ctx->abi.instance_id); // instance id
+ } else {
+ radv_define_common_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_info, &args, &desc_sets);
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_offchip_layout); // tcs offchip layout
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_out_offsets); // tcs out offsets
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_out_layout); // tcs out layout
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_in_layout); // tcs in layout
+ if (ctx->shader_info->info.needs_multiview_view_index)
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->view_index);
+ add_sgpr_argument(&args, ctx->i32, &ctx->oc_lds); // param oc lds
+ add_sgpr_argument(&args, ctx->i32, &ctx->tess_factor_offset); // tess factor offset
+ add_vgpr_argument(&args, ctx->i32, &ctx->tcs_patch_id); // patch id
+ add_vgpr_argument(&args, ctx->i32, &ctx->tcs_rel_ids); // rel ids;
+ }
break;
case MESA_SHADER_TESS_EVAL:
+ radv_define_common_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_info, &args, &desc_sets);
add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_offchip_layout); // tcs offchip layout
if (ctx->shader_info->info.needs_multiview_view_index || (!ctx->options->key.tes.as_es && ctx->options->key.has_multiview_view_index))
add_user_sgpr_argument(&args, ctx->i32, &ctx->view_index);
add_vgpr_argument(&args, ctx->i32, &ctx->tes_patch_id); // tes patch id
break;
case MESA_SHADER_GEOMETRY:
+ radv_define_common_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_info, &args, &desc_sets);
+ radv_define_vs_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &args);
add_user_sgpr_argument(&args, ctx->i32, &ctx->gsvs_ring_stride); // gsvs stride
add_user_sgpr_argument(&args, ctx->i32, &ctx->gsvs_num_entries); // gsvs num entires
if (ctx->shader_info->info.needs_multiview_view_index)
add_vgpr_argument(&args, ctx->i32, &ctx->gs_invocation_id);
break;
case MESA_SHADER_FRAGMENT:
+ radv_define_common_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_info, &args, &desc_sets);
if (ctx->shader_info->info.ps.needs_sample_positions)
add_user_sgpr_argument(&args, ctx->i32, &ctx->sample_pos_offset); /* sample position offset */
add_sgpr_argument(&args, ctx->i32, &ctx->prim_mask); /* prim mask */
ctx->context, ctx->module, ctx->builder, NULL, 0, &args,
ctx->max_workgroup_size,
ctx->options->unsafe_math);
- set_llvm_calling_convention(ctx->main_function, ctx->stage);
+ set_llvm_calling_convention(ctx->main_function, stage);
ctx->shader_info->num_input_vgprs = 0;
- ctx->shader_info->num_input_sgprs = ctx->shader_info->num_user_sgprs =
- ctx->options->supports_spill ? 2 : 0;
+ ctx->shader_info->num_input_sgprs = ctx->options->supports_spill ? 2 : 0;
- ctx->shader_info->num_user_sgprs += args.num_user_sgprs_used;
ctx->shader_info->num_input_sgprs += args.num_sgprs_used;
if (ctx->stage != MESA_SHADER_FRAGMENT)
const_array(ctx->v4i32, 16), "");
}
}
+
+ /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
+ * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */
+ if (has_previous_stage)
+ user_sgpr_idx = 0;
- if (!user_sgpr_info.indirect_all_descriptor_sets) {
- for (unsigned i = 0; i < num_sets; ++i) {
- if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
- set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], &user_sgpr_idx, 2);
- } else
- ctx->descriptor_sets[i] = NULL;
- }
- } else {
- uint32_t desc_sgpr_idx = user_sgpr_idx;
- set_userdata_location_shader(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS, &user_sgpr_idx, 2);
-
- for (unsigned i = 0; i < num_sets; ++i) {
- if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
- set_userdata_location_indirect(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], desc_sgpr_idx, 2, i * 8);
- ctx->descriptor_sets[i] = ac_build_indexed_load_const(&ctx->ac, desc_sets, LLVMConstInt(ctx->i32, i, false));
-
- } else
- ctx->descriptor_sets[i] = NULL;
- }
- ctx->shader_info->need_indirect_descriptor_sets = true;
- }
-
- if (ctx->shader_info->info.needs_push_constants) {
- set_userdata_location_shader(ctx, AC_UD_PUSH_CONSTANTS, &user_sgpr_idx, 2);
- }
+ radv_define_common_user_sgprs_phase2(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_info, desc_sets, &user_sgpr_idx);
- switch (ctx->stage) {
+ switch (stage) {
case MESA_SHADER_COMPUTE:
if (ctx->shader_info->info.cs.grid_components_used) {
set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, &user_sgpr_idx, ctx->shader_info->info.cs.grid_components_used);
}
break;
case MESA_SHADER_VERTEX:
- if (!ctx->is_gs_copy_shader) {
- if (ctx->shader_info->info.vs.has_vertex_buffers) {
- set_userdata_location_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, &user_sgpr_idx, 2);
- }
- unsigned vs_num = 2;
- if (ctx->shader_info->info.vs.needs_draw_id)
- vs_num++;
-
- set_userdata_location_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, &user_sgpr_idx, vs_num);
- }
+ radv_define_vs_user_sgprs_phase2(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_idx);
if (ctx->view_index)
set_userdata_location_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
if (ctx->options->key.vs.as_ls) {
declare_tess_lds(ctx);
break;
case MESA_SHADER_TESS_CTRL:
+ radv_define_vs_user_sgprs_phase2(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_idx);
+ if (has_previous_stage)
+ set_userdata_location_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT, &user_sgpr_idx, 1);
set_userdata_location_shader(ctx, AC_UD_TCS_OFFCHIP_LAYOUT, &user_sgpr_idx, 4);
if (ctx->view_index)
set_userdata_location_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
set_userdata_location_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
break;
case MESA_SHADER_GEOMETRY:
+ radv_define_vs_user_sgprs_phase2(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_idx);
set_userdata_location_shader(ctx, AC_UD_GS_VS_RING_STRIDE_ENTRIES, &user_sgpr_idx, 2);
if (ctx->view_index)
set_userdata_location_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
default:
unreachable("Shader stage not implemented");
}
+
+ ctx->shader_info->num_user_sgprs = user_sgpr_idx;
}
static void setup_types(struct nir_to_llvm_context *ctx)
unsigned mask;
int idx;
LLVMValueRef result;
- bool has_ds_bpermute = ctx->abi->chip_class >= VI;
if (op == nir_op_fddx_fine || op == nir_op_fddx)
mask = AC_TID_MASK_LEFT;
else
idx = 2;
- result = ac_build_ddxy(&ctx->ac, has_ds_bpermute,
- mask, idx,
- src0);
+ result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
return result;
}
result);
break;
case nir_op_ffma:
- result = emit_intrin_3f_param(&ctx->ac, "llvm.fma",
+ result = emit_intrin_3f_param(&ctx->ac, "llvm.fmuladd",
ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
break;
case nir_op_ibitfield_extract:
LLVMConstInt(ctx->ac.i32, 2, false), "");
/* VI only */
- if (ctx->abi->chip_class == VI && in_elements) {
+ if (ctx->ac.chip_class == VI && in_elements) {
/* On VI, the descriptor contains the size in bytes,
* but TXQ must return the size in elements.
* The stride is always non-zero for resources using TXQ.
break;
}
- if (instr->op == nir_texop_tg4 && ctx->abi->chip_class <= VI) {
+ if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= VI) {
enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
return radv_lower_gather4_integer(&ctx->ac, args, instr);
LLVMValueRef dw_addr)
{
LLVMValueRef value;
- value = ac_build_indexed_load(&ctx->ac, ctx->lds, dw_addr, false);
+ value = ac_build_load(&ctx->ac, ctx->lds, dw_addr);
return value;
}
dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
- bool gfx9_1d = ctx->abi->chip_class >= GFX9 && dim == GLSL_SAMPLER_DIM_1D;
+ bool gfx9_1d = ctx->ac.chip_class >= GFX9 && dim == GLSL_SAMPLER_DIM_1D;
count = image_type_to_components_count(dim, is_array);
if (is_ms) {
LLVMValueRef i1false = LLVMConstInt(ctx->ac.i1, 0, false);
LLVMValueRef i1true = LLVMConstInt(ctx->ac.i1, 1, false);
LLVMValueRef glc = i1false;
- bool force_glc = ctx->abi->chip_class == SI;
+ bool force_glc = ctx->ac.chip_class == SI;
if (force_glc)
glc = i1true;
static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr)
{
- LLVMValueRef params[6];
+ LLVMValueRef params[7];
int param_count = 0;
const nir_variable *var = instr->variables[0]->var;
z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, "");
}
- if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_1D &&
+ if (ctx->ac.chip_class >= GFX9 &&
+ glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_1D &&
glsl_sampler_type_is_array(type)) {
LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
res = LLVMBuildInsertElement(ctx->ac.builder, res, layers,
const_array(ctx->v2f32, 64), "");
sample_id = LLVMBuildAdd(ctx->builder, sample_id, ctx->sample_pos_offset, "");
- result = ac_build_indexed_load(&ctx->ac, ptr, sample_id, false);
+ result = ac_build_load_invariant(&ctx->ac, ptr, sample_id);
return result;
}
list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->i32, offset, 0));
list = LLVMBuildPointerCast(builder, list, const_array(type, 0), "");
- return ac_build_indexed_load_const(&ctx->ac, list, index);
+ return ac_build_load_to_sgpr(&ctx->ac, list, index);
}
static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef img7, samp0;
- if (ctx->abi->chip_class >= VI)
+ if (ctx->ac.chip_class >= VI)
return samp;
img7 = LLVMBuildExtractElement(builder, res,
* It's unnecessary if the original texture format was
* Z32_FLOAT, but we don't know that here.
*/
- if (ctx->abi->chip_class == VI)
+ if (ctx->ac.chip_class == VI)
z = ac_build_clamp(&ctx->ac, z);
address[count++] = z;
break;
case GLSL_SAMPLER_DIM_1D:
num_src_deriv_channels = 1;
- if (ctx->abi->chip_class >= GFX9) {
+ if (ctx->ac.chip_class >= GFX9) {
num_dest_deriv_channels = 2;
num_deriv_comp = 2;
} else {
}
if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) {
- if (instr->is_array && instr->op != nir_texop_lod)
- coords[3] = apply_round_slice(&ctx->ac, coords[3]);
for (chan = 0; chan < instr->coord_components; chan++)
coords[chan] = ac_to_float(&ctx->ac, coords[chan]);
if (instr->coord_components == 3)
coords[3] = LLVMGetUndef(ctx->ac.f32);
ac_prepare_cube_coords(&ctx->ac,
instr->op == nir_texop_txd, instr->is_array,
- coords, derivs);
+ instr->op == nir_texop_lod, coords, derivs);
if (num_deriv_comp)
num_deriv_comp--;
}
address[count++] = coords[2];
}
- if (ctx->abi->chip_class >= GFX9) {
+ if (ctx->ac.chip_class >= GFX9) {
LLVMValueRef filler;
if (instr->op == nir_texop_txf)
filler = ctx->ac.i32_0;
LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
result = LLVMBuildInsertElement(ctx->ac.builder, result, z, two, "");
- } else if (ctx->abi->chip_class >= GFX9 &&
+ } else if (ctx->ac.chip_class >= GFX9 &&
instr->op == nir_texop_txs &&
instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
instr->is_array) {
for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
t_offset = LLVMConstInt(ctx->i32, index + i, false);
- t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset);
+ t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
input = ac_build_buffer_load_format(&ctx->ac, t_list,
buffer_index,
static void
scan_shader_output_decl(struct nir_to_llvm_context *ctx,
- struct nir_variable *variable)
+ struct nir_variable *variable,
+ struct nir_shader *shader,
+ gl_shader_stage stage)
{
int idx = variable->data.location + variable->data.index;
unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
variable->data.driver_location = idx * 4;
/* tess ctrl has it's own load/store paths for outputs */
- if (ctx->stage == MESA_SHADER_TESS_CTRL)
+ if (stage == MESA_SHADER_TESS_CTRL)
return;
mask_attribs = ((1ull << attrib_count) - 1) << idx;
- if (ctx->stage == MESA_SHADER_VERTEX ||
- ctx->stage == MESA_SHADER_TESS_EVAL ||
- ctx->stage == MESA_SHADER_GEOMETRY) {
+ if (stage == MESA_SHADER_VERTEX ||
+ stage == MESA_SHADER_TESS_EVAL ||
+ stage == MESA_SHADER_GEOMETRY) {
if (idx == VARYING_SLOT_CLIP_DIST0) {
- int length = ctx->num_output_clips + ctx->num_output_culls;
- if (ctx->stage == MESA_SHADER_VERTEX) {
- ctx->shader_info->vs.outinfo.clip_dist_mask = (1 << ctx->num_output_clips) - 1;
- ctx->shader_info->vs.outinfo.cull_dist_mask = (1 << ctx->num_output_culls) - 1;
+ int length = shader->info.clip_distance_array_size +
+ shader->info.cull_distance_array_size;
+ if (stage == MESA_SHADER_VERTEX) {
+ ctx->shader_info->vs.outinfo.clip_dist_mask = (1 << shader->info.clip_distance_array_size) - 1;
+ ctx->shader_info->vs.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1;
}
- if (ctx->stage == MESA_SHADER_TESS_EVAL) {
- ctx->shader_info->tes.outinfo.clip_dist_mask = (1 << ctx->num_output_clips) - 1;
- ctx->shader_info->tes.outinfo.cull_dist_mask = (1 << ctx->num_output_culls) - 1;
+ if (stage == MESA_SHADER_TESS_EVAL) {
+ ctx->shader_info->tes.outinfo.clip_dist_mask = (1 << shader->info.clip_distance_array_size) - 1;
+ ctx->shader_info->tes.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1;
}
if (length > 4)
{
if ((ctx->stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_es) ||
(ctx->stage == MESA_SHADER_TESS_EVAL && ctx->options->key.tes.as_es)) {
- ctx->esgs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_VS, false));
+ ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_VS, false));
}
if (ctx->is_gs_copy_shader) {
- ctx->gsvs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_VS, false));
+ ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_VS, false));
}
if (ctx->stage == MESA_SHADER_GEOMETRY) {
LLVMValueRef tmp;
- ctx->esgs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_GS, false));
- ctx->gsvs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_GS, false));
+ ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_GS, false));
+ ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_GS, false));
ctx->gsvs_ring = LLVMBuildBitCast(ctx->builder, ctx->gsvs_ring, ctx->v4i32, "");
if (ctx->stage == MESA_SHADER_TESS_CTRL ||
ctx->stage == MESA_SHADER_TESS_EVAL) {
- ctx->hs_ring_tess_offchip = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_HS_TESS_OFFCHIP, false));
- ctx->hs_ring_tess_factor = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_HS_TESS_FACTOR, false));
+ ctx->hs_ring_tess_offchip = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_HS_TESS_OFFCHIP, false));
+ ctx->hs_ring_tess_factor = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_HS_TESS_FACTOR, false));
}
}
ctx.context = LLVMContextCreate();
ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);
- ac_llvm_context_init(&ctx.ac, ctx.context);
+ ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class);
ctx.ac.module = ctx.module;
memset(shader_info, 0, sizeof(*shader_info));
for (i = 0; i < AC_UD_MAX_UD; i++)
shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
- create_function(&ctx);
+ create_function(&ctx, nir->stage, false, MESA_SHADER_VERTEX);
if (nir->stage == MESA_SHADER_GEOMETRY) {
ctx.gs_next_vertex = ac_build_alloca(&ctx.ac, ctx.i32, "gs_next_vertex");
else if(nir->stage == MESA_SHADER_VERTEX)
handle_vs_inputs(&ctx, nir);
- ctx.abi.chip_class = options->chip_class;
ctx.abi.inputs = &ctx.inputs[0];
ctx.abi.emit_outputs = handle_shader_outputs_post;
ctx.abi.load_ssbo = radv_load_ssbo;
ctx.abi.load_sampler_desc = radv_get_sampler_desc;
nir_foreach_variable(variable, &nir->outputs)
- scan_shader_output_decl(&ctx, variable);
+ scan_shader_output_decl(&ctx, variable, nir, nir->stage);
ac_nir_translate(&ctx.ac, &ctx.abi, nir, &ctx);
shader_info->num_input_sgprs + 3);
}
+static void
+ac_fill_shader_info(struct ac_shader_variant_info *shader_info, struct nir_shader *nir, const struct ac_nir_compiler_options *options)
+{
+ switch (nir->stage) {
+ case MESA_SHADER_COMPUTE:
+ for (int i = 0; i < 3; ++i)
+ shader_info->cs.block_size[i] = nir->info.cs.local_size[i];
+ break;
+ case MESA_SHADER_FRAGMENT:
+ shader_info->fs.early_fragment_test = nir->info.fs.early_fragment_tests;
+ break;
+ case MESA_SHADER_GEOMETRY:
+ shader_info->gs.vertices_in = nir->info.gs.vertices_in;
+ shader_info->gs.vertices_out = nir->info.gs.vertices_out;
+ shader_info->gs.output_prim = nir->info.gs.output_primitive;
+ shader_info->gs.invocations = nir->info.gs.invocations;
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ shader_info->tes.primitive_mode = nir->info.tess.primitive_mode;
+ shader_info->tes.spacing = nir->info.tess.spacing;
+ shader_info->tes.ccw = nir->info.tess.ccw;
+ shader_info->tes.point_mode = nir->info.tess.point_mode;
+ shader_info->tes.as_es = options->key.tes.as_es;
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ shader_info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out;
+ break;
+ case MESA_SHADER_VERTEX:
+ shader_info->vs.as_es = options->key.vs.as_es;
+ shader_info->vs.as_ls = options->key.vs.as_ls;
+ /* in LS mode we need at least 1, invocation id needs 3, handled elsewhere */
+ if (options->key.vs.as_ls)
+ shader_info->vs.vgpr_comp_cnt = MAX2(1, shader_info->vs.vgpr_comp_cnt);
+ break;
+ default:
+ break;
+ }
+}
+
void ac_compile_nir_shader(LLVMTargetMachineRef tm,
struct ac_shader_binary *binary,
struct ac_shader_config *config,
struct ac_shader_variant_info *shader_info,
- struct nir_shader *nir,
+ struct nir_shader *const *nir,
+ int nir_count,
const struct ac_nir_compiler_options *options,
bool dump_shader)
{
- LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, shader_info,
+ LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir[0], shader_info,
options);
- ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir->stage, dump_shader, options->supports_spill);
- switch (nir->stage) {
- case MESA_SHADER_COMPUTE:
- for (int i = 0; i < 3; ++i)
- shader_info->cs.block_size[i] = nir->info.cs.local_size[i];
- break;
- case MESA_SHADER_FRAGMENT:
- shader_info->fs.early_fragment_test = nir->info.fs.early_fragment_tests;
- break;
- case MESA_SHADER_GEOMETRY:
- shader_info->gs.vertices_in = nir->info.gs.vertices_in;
- shader_info->gs.vertices_out = nir->info.gs.vertices_out;
- shader_info->gs.output_prim = nir->info.gs.output_primitive;
- shader_info->gs.invocations = nir->info.gs.invocations;
- break;
- case MESA_SHADER_TESS_EVAL:
- shader_info->tes.primitive_mode = nir->info.tess.primitive_mode;
- shader_info->tes.spacing = nir->info.tess.spacing;
- shader_info->tes.ccw = nir->info.tess.ccw;
- shader_info->tes.point_mode = nir->info.tess.point_mode;
- shader_info->tes.as_es = options->key.tes.as_es;
- break;
- case MESA_SHADER_TESS_CTRL:
- shader_info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out;
- break;
- case MESA_SHADER_VERTEX:
- shader_info->vs.as_es = options->key.vs.as_es;
- shader_info->vs.as_ls = options->key.vs.as_ls;
- /* in LS mode we need at least 1, invocation id needs 3, handled elsewhere */
- if (options->key.vs.as_ls)
- shader_info->vs.vgpr_comp_cnt = MAX2(1, shader_info->vs.vgpr_comp_cnt);
- break;
- default:
- break;
- }
+ ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir[0]->stage, dump_shader, options->supports_spill);
+ for (int i = 0; i < nir_count; ++i)
+ ac_fill_shader_info(shader_info, nir[i], options);
}
static void
ctx.options = options;
ctx.shader_info = shader_info;
- ac_llvm_context_init(&ctx.ac, ctx.context);
+ ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class);
ctx.ac.module = ctx.module;
ctx.is_gs_copy_shader = true;
ctx.ac.builder = ctx.builder;
ctx.stage = MESA_SHADER_VERTEX;
- create_function(&ctx);
+ create_function(&ctx, MESA_SHADER_VERTEX, false, MESA_SHADER_VERTEX);
ctx.gs_max_out_vertices = geom_shader->info.gs.vertices_out;
ac_setup_rings(&ctx);
ctx.nir = &nir_ctx;
nir_foreach_variable(variable, &geom_shader->outputs) {
- scan_shader_output_decl(&ctx, variable);
+ scan_shader_output_decl(&ctx, variable, geom_shader, MESA_SHADER_VERTEX);
handle_shader_output_decl(&nir_ctx, geom_shader, variable);
}