unsigned num_return_elems,
struct arg_info *args,
unsigned max_workgroup_size,
- bool unsafe_math)
+ const struct radv_nir_compiler_options *options)
{
LLVMTypeRef main_function_type, ret_type;
LLVMBasicBlockRef main_function_body;
}
}
+ if (options->address32_hi) {
+ ac_llvm_add_target_dep_function_attr(main_function,
+ "amdgpu-32bit-address-high-bits",
+ options->address32_hi);
+ }
+
if (max_workgroup_size) {
ac_llvm_add_target_dep_function_attr(main_function,
"amdgpu-max-work-group-size",
max_workgroup_size);
}
- if (unsafe_math) {
+ if (options->unsafe_math) {
/* These were copied from some LLVM test. */
LLVMAddTargetDependentFunctionAttr(main_function,
"less-precise-fpmad",
set_loc(ud_info, sgpr_idx, num_sgprs, 0);
}
+static void
+set_loc_shader_ptr(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx)
+{
+ bool use_32bit_pointers = HAVE_32BIT_POINTERS &&
+ idx != AC_UD_SCRATCH_RING_OFFSETS;
+
+ set_loc_shader(ctx, idx, sgpr_idx, use_32bit_pointers ? 1 : 2);
+}
+
static void
set_loc_desc(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx,
uint32_t indirect_offset)
&ctx->shader_info->user_sgprs_locs.descriptor_sets[idx];
assert(ud_info);
- set_loc(ud_info, sgpr_idx, 2, indirect_offset);
+ set_loc(ud_info, sgpr_idx, HAVE_32BIT_POINTERS ? 1 : 2, indirect_offset);
}
struct user_sgpr_info {
{
uint8_t count = 0;
- count += ctx->shader_info->info.vs.has_vertex_buffers ? 2 : 0;
+ if (ctx->shader_info->info.vs.has_vertex_buffers)
+ count += HAVE_32BIT_POINTERS ? 1 : 2;
count += ctx->shader_info->info.vs.needs_draw_id ? 3 : 2;
return count;
user_sgpr_info->sgpr_count++;
if (ctx->shader_info->info.loads_push_constants)
- user_sgpr_info->sgpr_count += 2;
+ user_sgpr_info->sgpr_count += HAVE_32BIT_POINTERS ? 1 : 2;
uint32_t available_sgprs = ctx->options->chip_class >= GFX9 ? 32 : 16;
uint32_t remaining_sgprs = available_sgprs - user_sgpr_info->sgpr_count;
if (remaining_sgprs / 2 < util_bitcount(ctx->shader_info->info.desc_set_used_mask)) {
- user_sgpr_info->sgpr_count += 2;
+ user_sgpr_info->sgpr_count += HAVE_32BIT_POINTERS ? 1 : 2;
user_sgpr_info->indirect_all_descriptor_sets = true;
} else {
user_sgpr_info->sgpr_count += util_bitcount(ctx->shader_info->info.desc_set_used_mask) * 2;
struct arg_info *args,
LLVMValueRef *desc_sets)
{
- LLVMTypeRef type = ac_array_in_const_addr_space(ctx->ac.i8);
+ LLVMTypeRef type = ac_array_in_const32_addr_space(ctx->ac.i8);
unsigned num_sets = ctx->options->layout ?
ctx->options->layout->num_sets : 0;
unsigned stage_mask = 1 << stage;
}
}
} else {
- add_array_arg(args, ac_array_in_const_addr_space(type), desc_sets);
+ add_array_arg(args, ac_array_in_const32_addr_space(type), desc_sets);
}
if (ctx->shader_info->info.loads_push_constants) {
(stage == MESA_SHADER_VERTEX ||
(has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
if (ctx->shader_info->info.vs.has_vertex_buffers) {
- add_arg(args, ARG_SGPR, ac_array_in_const_addr_space(ctx->ac.v4i32),
+ add_arg(args, ARG_SGPR,
+ ac_array_in_const32_addr_space(ctx->ac.v4i32),
&ctx->vertex_buffers);
}
add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.base_vertex);
ctx->descriptor_sets[i] = NULL;
}
} else {
- set_loc_shader(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS,
- user_sgpr_idx, 2);
+ set_loc_shader_ptr(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS,
+ user_sgpr_idx);
for (unsigned i = 0; i < num_sets; ++i) {
if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) &&
}
if (ctx->shader_info->info.loads_push_constants) {
- set_loc_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2);
+ set_loc_shader_ptr(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx);
}
}
(stage == MESA_SHADER_VERTEX ||
(has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
if (ctx->shader_info->info.vs.has_vertex_buffers) {
- set_loc_shader(ctx, AC_UD_VS_VERTEX_BUFFERS,
- user_sgpr_idx, 2);
+ set_loc_shader_ptr(ctx, AC_UD_VS_VERTEX_BUFFERS,
+ user_sgpr_idx);
}
unsigned vs_num = 2;
calling_conv = RADEON_LLVM_AMDGPU_GS;
break;
case MESA_SHADER_TESS_CTRL:
- calling_conv = HAVE_LLVM >= 0x0500 ? RADEON_LLVM_AMDGPU_HS : RADEON_LLVM_AMDGPU_VS;
+ calling_conv = RADEON_LLVM_AMDGPU_HS;
break;
case MESA_SHADER_FRAGMENT:
calling_conv = RADEON_LLVM_AMDGPU_PS;
ctx->main_function = create_llvm_function(
ctx->context, ctx->ac.module, ctx->ac.builder, NULL, 0, &args,
- ctx->max_workgroup_size,
- ctx->options->unsafe_math);
+ ctx->max_workgroup_size, ctx->options);
set_llvm_calling_convention(ctx->main_function, stage);
user_sgpr_idx = 0;
if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets) {
- set_loc_shader(ctx, AC_UD_SCRATCH_RING_OFFSETS,
- &user_sgpr_idx, 2);
+ set_loc_shader_ptr(ctx, AC_UD_SCRATCH_RING_OFFSETS,
+ &user_sgpr_idx);
if (ctx->options->supports_spill) {
ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr",
LLVMPointerType(ctx->ac.i8, AC_CONST_ADDR_SPACE),
/* loop num outputs */
idx = 0;
for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+ unsigned output_usage_mask =
+ ctx->shader_info->info.gs.output_usage_mask[i];
LLVMValueRef *out_ptr = &addrs[i * 4];
int length = 4;
int slot = idx;
length = ctx->num_output_clips + ctx->num_output_culls;
if (length > 4)
slot_inc = 2;
+ output_usage_mask = (1 << length) - 1;
}
+
for (unsigned j = 0; j < length; j++) {
+ if (!(output_usage_mask & (1 << j)))
+ continue;
+
LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder,
out_ptr[j], "");
LLVMValueRef voffset = LLVMConstInt(ctx->ac.i32, (slot * 4 + j) * ctx->gs_max_out_vertices, false);
index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), "");
list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->ac.i32, offset, 0));
- list = LLVMBuildPointerCast(builder, list, ac_array_in_const_addr_space(type), "");
+ list = LLVMBuildPointerCast(builder, list,
+ ac_array_in_const32_addr_space(type), "");
return ac_build_load_to_sgpr(&ctx->ac, list, index);
}
+/* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
+ * so we may need to fix it up. */
+static LLVMValueRef
+adjust_vertex_fetch_alpha(struct radv_shader_context *ctx,
+ unsigned adjustment,
+ LLVMValueRef alpha)
+{
+ if (adjustment == RADV_ALPHA_ADJUST_NONE)
+ return alpha;
+
+ LLVMValueRef c30 = LLVMConstInt(ctx->ac.i32, 30, 0);
+
+ if (adjustment == RADV_ALPHA_ADJUST_SSCALED)
+ alpha = LLVMBuildFPToUI(ctx->ac.builder, alpha, ctx->ac.i32, "");
+ else
+ alpha = ac_to_integer(&ctx->ac, alpha);
+
+ /* For the integer-like cases, do a natural sign extension.
+ *
+ * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0
+ * and happen to contain 0, 1, 2, 3 as the two LSBs of the
+ * exponent.
+ */
+ alpha = LLVMBuildShl(ctx->ac.builder, alpha,
+ adjustment == RADV_ALPHA_ADJUST_SNORM ?
+ LLVMConstInt(ctx->ac.i32, 7, 0) : c30, "");
+ alpha = LLVMBuildAShr(ctx->ac.builder, alpha, c30, "");
+
+ /* Convert back to the right type. */
+ if (adjustment == RADV_ALPHA_ADJUST_SNORM) {
+ LLVMValueRef clamp;
+ LLVMValueRef neg_one = LLVMConstReal(ctx->ac.f32, -1.0);
+ alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, "");
+ clamp = LLVMBuildFCmp(ctx->ac.builder, LLVMRealULT, alpha, neg_one, "");
+ alpha = LLVMBuildSelect(ctx->ac.builder, clamp, neg_one, alpha, "");
+ } else if (adjustment == RADV_ALPHA_ADJUST_SSCALED) {
+ alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, "");
+ }
+
+ return alpha;
+}
static void
handle_vs_input_decl(struct radv_shader_context *ctx,
LLVMValueRef t_list;
LLVMValueRef input;
LLVMValueRef buffer_index;
- int index = variable->data.location - VERT_ATTRIB_GENERIC0;
- int idx = variable->data.location;
unsigned attrib_count = glsl_count_attribute_slots(variable->type, true);
uint8_t input_usage_mask =
ctx->shader_info->info.vs.input_usage_mask[variable->data.location];
unsigned num_channels = util_last_bit(input_usage_mask);
- variable->data.driver_location = idx * 4;
+ variable->data.driver_location = variable->data.location * 4;
+
+ for (unsigned i = 0; i < attrib_count; ++i) {
+ LLVMValueRef output[4];
+ unsigned attrib_index = variable->data.location + i - VERT_ATTRIB_GENERIC0;
- for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
- if (ctx->options->key.vs.instance_rate_inputs & (1u << (index + i))) {
- uint32_t divisor = ctx->options->key.vs.instance_rate_divisors[index + i];
+ if (ctx->options->key.vs.instance_rate_inputs & (1u << attrib_index)) {
+ uint32_t divisor = ctx->options->key.vs.instance_rate_divisors[attrib_index];
if (divisor) {
buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.instance_id,
} else
buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id,
ctx->abi.base_vertex, "");
- t_offset = LLVMConstInt(ctx->ac.i32, index + i, false);
+ t_offset = LLVMConstInt(ctx->ac.i32, attrib_index, false);
t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
for (unsigned chan = 0; chan < 4; chan++) {
LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
- ctx->inputs[ac_llvm_reg_index_soa(idx, chan)] =
- ac_to_integer(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder,
- input, llvm_chan, ""));
+ output[chan] = LLVMBuildExtractElement(ctx->ac.builder, input, llvm_chan, "");
+ }
+
+ unsigned alpha_adjust = (ctx->options->key.vs.alpha_adjust >> (attrib_index * 2)) & 3;
+ output[3] = adjust_vertex_fetch_alpha(ctx, alpha_adjust, output[3]);
+
+ for (unsigned chan = 0; chan < 4; chan++) {
+ ctx->inputs[ac_llvm_reg_index_soa(variable->data.location + i, chan)] =
+ ac_to_integer(&ctx->ac, output[chan]);
}
}
}
prepare_interp_optimize(struct radv_shader_context *ctx,
struct nir_shader *nir)
{
- if (!ctx->options->key.fs.multisample)
- return;
-
bool uses_center = false;
bool uses_centroid = false;
nir_foreach_variable(variable, &nir->inputs) {
output_usage_mask =
ctx->shader_info->info.tes.output_usage_mask[i];
} else {
- /* Enable all channels for the GS copy shader because
- * we don't know the output usage mask currently.
- */
- output_usage_mask = 0xf;
+ assert(ctx->is_gs_copy_shader);
+ output_usage_mask =
+ ctx->shader_info->info.gs.output_usage_mask[i];
}
radv_export_param(ctx, param_count, values, output_usage_mask);
for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
LLVMValueRef dw_addr = NULL;
LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4];
+ unsigned output_usage_mask;
int param_index;
int length = 4;
if (!(ctx->output_mask & (1ull << i)))
continue;
- if (i == VARYING_SLOT_CLIP_DIST0)
+ if (ctx->stage == MESA_SHADER_VERTEX) {
+ output_usage_mask =
+ ctx->shader_info->info.vs.output_usage_mask[i];
+ } else {
+ assert(ctx->stage == MESA_SHADER_TESS_EVAL);
+ output_usage_mask =
+ ctx->shader_info->info.tes.output_usage_mask[i];
+ }
+
+ if (i == VARYING_SLOT_CLIP_DIST0) {
length = ctx->num_output_clips + ctx->num_output_culls;
+ output_usage_mask = (1 << length) - 1;
+ }
param_index = shader_io_get_unique_index(i);
LLVMConstInt(ctx->ac.i32, param_index * 4, false),
"");
}
+
for (j = 0; j < length; j++) {
+ if (!(output_usage_mask & (1 << j)))
+ continue;
+
LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, out_ptr[j], "");
out_val = LLVMBuildBitCast(ctx->ac.builder, out_val, ctx->ac.i32, "");
if (ctx->ac.chip_class >= GFX9) {
- ac_lds_store(&ctx->ac, dw_addr,
+ LLVMValueRef dw_addr_offset =
+ LLVMBuildAdd(ctx->ac.builder, dw_addr,
+ LLVMConstInt(ctx->ac.i32,
+ j, false), "");
+
+ ac_lds_store(&ctx->ac, dw_addr_offset,
LLVMBuildLoad(ctx->ac.builder, out_ptr[j], ""));
- dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, ctx->ac.i32_1, "");
} else {
ac_build_buffer_store_dword(&ctx->ac,
ctx->esgs_ring,
static void
ac_setup_rings(struct radv_shader_context *ctx)
{
- if ((ctx->stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_es) ||
- (ctx->stage == MESA_SHADER_TESS_EVAL && ctx->options->key.tes.as_es)) {
- ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_ESGS_VS, false));
+ if (ctx->options->chip_class <= VI &&
+ (ctx->stage == MESA_SHADER_GEOMETRY ||
+ ctx->options->key.vs.as_es || ctx->options->key.tes.as_es)) {
+ unsigned ring = ctx->stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS
+ : RING_ESGS_VS;
+ LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, ring, false);
+
+ ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac,
+ ctx->ring_offsets,
+ offset);
}
if (ctx->is_gs_copy_shader) {
uint32_t num_entries = 64;
LLVMValueRef gsvs_ring_stride = LLVMConstInt(ctx->ac.i32, ctx->max_gsvs_emit_size, false);
LLVMValueRef gsvs_ring_desc = LLVMConstInt(ctx->ac.i32, ctx->max_gsvs_emit_size << 16, false);
- ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_ESGS_GS, false));
ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_GSVS_GS, false));
ctx->gsvs_ring = LLVMBuildBitCast(ctx->ac.builder, ctx->gsvs_ring, ctx->ac.v4i32, "");
ctx.ac.builder = ac_create_builder(ctx.context, float_mode);
ctx.stage = MESA_SHADER_VERTEX;
+ radv_nir_shader_info_pass(geom_shader, options, &shader_info->info);
+
create_function(&ctx, MESA_SHADER_VERTEX, false, MESA_SHADER_VERTEX);
ctx.gs_max_out_vertices = geom_shader->info.gs.vertices_out;