#include <llvm-c/Core.h>
#include <llvm-c/TargetMachine.h>
#include <llvm-c/Transforms/Scalar.h>
+#if HAVE_LLVM >= 0x0700
+#include <llvm-c/Transforms/Utils.h>
+#endif
#include "sid.h"
#include "gfx9d.h"
struct radv_shader_context {
struct ac_llvm_context ac;
- const struct ac_nir_compiler_options *options;
+ const struct radv_nir_compiler_options *options;
struct radv_shader_variant_info *shader_info;
struct ac_shader_abi abi;
LLVMValueRef vertex_buffers;
LLVMValueRef rel_auto_id;
LLVMValueRef vs_prim_id;
- LLVMValueRef ls_out_layout;
LLVMValueRef es2gs_offset;
- LLVMValueRef tcs_offchip_layout;
- LLVMValueRef tcs_out_offsets;
- LLVMValueRef tcs_out_layout;
- LLVMValueRef tcs_in_layout;
LLVMValueRef oc_lds;
LLVMValueRef merged_wave_info;
LLVMValueRef tess_factor_offset;
LLVMValueRef tes_u;
LLVMValueRef tes_v;
- LLVMValueRef gsvs_ring_stride;
- LLVMValueRef gsvs_num_entries;
LLVMValueRef gs2vs_offset;
LLVMValueRef gs_wave_id;
LLVMValueRef gs_vtx_offset[6];
unsigned gs_max_out_vertices;
unsigned tes_primitive_mode;
- uint64_t tess_outputs_written;
- uint64_t tess_patch_outputs_written;
uint32_t tcs_patch_outputs_read;
uint64_t tcs_outputs_read;
uint32_t tcs_vertices_per_patch;
+ uint32_t tcs_num_inputs;
+ uint32_t tcs_num_patches;
+ uint32_t max_gsvs_emit_size;
+ uint32_t gsvs_vertex_size;
};
enum radeon_llvm_calling_convention {
}
}
+static unsigned
+get_tcs_num_patches(struct radv_shader_context *ctx)
+{
+ unsigned num_tcs_input_cp = ctx->options->key.tcs.input_vertices;
+ unsigned num_tcs_output_cp = ctx->tcs_vertices_per_patch;
+ uint32_t input_vertex_size = ctx->tcs_num_inputs * 16;
+ uint32_t input_patch_size = ctx->options->key.tcs.input_vertices * input_vertex_size;
+ uint32_t num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written);
+ uint32_t num_tcs_patch_outputs = util_last_bit64(ctx->shader_info->info.tcs.patch_outputs_written);
+ uint32_t output_vertex_size = num_tcs_outputs * 16;
+ uint32_t pervertex_output_patch_size = ctx->tcs_vertices_per_patch * output_vertex_size;
+ uint32_t output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
+ unsigned num_patches;
+ unsigned hardware_lds_size;
+
+ /* Ensure that we only need one wave per SIMD so we don't need to check
+ * resource usage. Also ensures that the number of tcs in and out
+ * vertices per threadgroup are at most 256.
+ */
+ num_patches = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp) * 4;
+ /* Make sure that the data fits in LDS. This assumes the shaders only
+ * use LDS for the inputs and outputs.
+ */
+ hardware_lds_size = ctx->options->chip_class >= CIK ? 65536 : 32768;
+ num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size));
+ /* Make sure the output data fits in the offchip buffer */
+ num_patches = MIN2(num_patches, (ctx->options->tess_offchip_block_dw_size * 4) / output_patch_size);
+ /* Not necessary for correctness, but improves performance. The
+ * specific value is taken from the proprietary driver.
+ */
+ num_patches = MIN2(num_patches, 40);
+
+ /* SI bug workaround - limit LS-HS threadgroups to only one wave. */
+ if (ctx->options->chip_class == SI) {
+ unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp);
+ num_patches = MIN2(num_patches, one_wave);
+ }
+ return num_patches;
+}
+
+static unsigned
+calculate_tess_lds_size(struct radv_shader_context *ctx)
+{
+ unsigned num_tcs_input_cp = ctx->options->key.tcs.input_vertices;
+ unsigned num_tcs_output_cp;
+ unsigned num_tcs_outputs, num_tcs_patch_outputs;
+ unsigned input_vertex_size, output_vertex_size;
+ unsigned input_patch_size, output_patch_size;
+ unsigned pervertex_output_patch_size;
+ unsigned output_patch0_offset;
+ unsigned num_patches;
+ unsigned lds_size;
+
+ num_tcs_output_cp = ctx->tcs_vertices_per_patch;
+ num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written);
+ num_tcs_patch_outputs = util_last_bit64(ctx->shader_info->info.tcs.patch_outputs_written);
+
+ input_vertex_size = ctx->tcs_num_inputs * 16;
+ output_vertex_size = num_tcs_outputs * 16;
+
+ input_patch_size = num_tcs_input_cp * input_vertex_size;
+
+ pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size;
+ output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
+
+ num_patches = ctx->tcs_num_patches;
+ output_patch0_offset = input_patch_size * num_patches;
+
+ lds_size = output_patch0_offset + output_patch_size * num_patches;
+ return lds_size;
+}
+
/* Tessellation shaders pass outputs to the next shader using LDS.
*
* LS outputs = TCS inputs
static LLVMValueRef
get_tcs_in_patch_stride(struct radv_shader_context *ctx)
{
- if (ctx->stage == MESA_SHADER_VERTEX)
- return ac_unpack_param(&ctx->ac, ctx->ls_out_layout, 0, 13);
- else if (ctx->stage == MESA_SHADER_TESS_CTRL)
- return ac_unpack_param(&ctx->ac, ctx->tcs_in_layout, 0, 13);
- else {
- assert(0);
- return NULL;
- }
+ assert (ctx->stage == MESA_SHADER_TESS_CTRL);
+ uint32_t input_vertex_size = ctx->tcs_num_inputs * 16;
+ uint32_t input_patch_size = ctx->options->key.tcs.input_vertices * input_vertex_size;
+
+ input_patch_size /= 4;
+ return LLVMConstInt(ctx->ac.i32, input_patch_size, false);
}
static LLVMValueRef
get_tcs_out_patch_stride(struct radv_shader_context *ctx)
{
- return ac_unpack_param(&ctx->ac, ctx->tcs_out_layout, 0, 13);
+ uint32_t num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written);
+ uint32_t num_tcs_patch_outputs = util_last_bit64(ctx->shader_info->info.tcs.patch_outputs_written);
+ uint32_t output_vertex_size = num_tcs_outputs * 16;
+ uint32_t pervertex_output_patch_size = ctx->tcs_vertices_per_patch * output_vertex_size;
+ uint32_t output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
+ output_patch_size /= 4;
+ return LLVMConstInt(ctx->ac.i32, output_patch_size, false);
}
static LLVMValueRef
get_tcs_out_vertex_stride(struct radv_shader_context *ctx)
{
- return ac_unpack_param(&ctx->ac, ctx->tcs_out_layout, 13, 8);
+ uint32_t num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written);
+ uint32_t output_vertex_size = num_tcs_outputs * 16;
+ output_vertex_size /= 4;
+ return LLVMConstInt(ctx->ac.i32, output_vertex_size, false);
}
static LLVMValueRef
get_tcs_out_patch0_offset(struct radv_shader_context *ctx)
{
- return LLVMBuildMul(ctx->ac.builder,
- ac_unpack_param(&ctx->ac, ctx->tcs_out_offsets, 0, 16),
- LLVMConstInt(ctx->ac.i32, 4, false), "");
+ assert (ctx->stage == MESA_SHADER_TESS_CTRL);
+ uint32_t input_vertex_size = ctx->tcs_num_inputs * 16;
+ uint32_t input_patch_size = ctx->options->key.tcs.input_vertices * input_vertex_size;
+ uint32_t output_patch0_offset = input_patch_size;
+ unsigned num_patches = ctx->tcs_num_patches;
+
+ output_patch0_offset *= num_patches;
+ output_patch0_offset /= 4;
+ return LLVMConstInt(ctx->ac.i32, output_patch0_offset, false);
}
static LLVMValueRef
get_tcs_out_patch0_patch_data_offset(struct radv_shader_context *ctx)
{
- return LLVMBuildMul(ctx->ac.builder,
- ac_unpack_param(&ctx->ac, ctx->tcs_out_offsets, 16, 16),
- LLVMConstInt(ctx->ac.i32, 4, false), "");
+ assert (ctx->stage == MESA_SHADER_TESS_CTRL);
+ uint32_t input_vertex_size = ctx->tcs_num_inputs * 16;
+ uint32_t input_patch_size = ctx->options->key.tcs.input_vertices * input_vertex_size;
+ uint32_t output_patch0_offset = input_patch_size;
+
+ uint32_t num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written);
+ uint32_t output_vertex_size = num_tcs_outputs * 16;
+ uint32_t pervertex_output_patch_size = ctx->tcs_vertices_per_patch * output_vertex_size;
+ unsigned num_patches = ctx->tcs_num_patches;
+
+ output_patch0_offset *= num_patches;
+ output_patch0_offset += pervertex_output_patch_size;
+ output_patch0_offset /= 4;
+ return LLVMConstInt(ctx->ac.i32, output_patch0_offset, false);
}
static LLVMValueRef
case MESA_SHADER_VERTEX:
if (!ctx->is_gs_copy_shader)
user_sgpr_info->sgpr_count += count_vs_user_sgprs(ctx);
- if (ctx->options->key.vs.as_ls)
- user_sgpr_info->sgpr_count++;
break;
case MESA_SHADER_TESS_CTRL:
if (has_previous_stage) {
if (previous_stage == MESA_SHADER_VERTEX)
user_sgpr_info->sgpr_count += count_vs_user_sgprs(ctx);
- user_sgpr_info->sgpr_count++;
}
- user_sgpr_info->sgpr_count += 4;
break;
case MESA_SHADER_TESS_EVAL:
- user_sgpr_info->sgpr_count += 1;
break;
case MESA_SHADER_GEOMETRY:
if (has_previous_stage) {
if (previous_stage == MESA_SHADER_VERTEX) {
user_sgpr_info->sgpr_count += count_vs_user_sgprs(ctx);
- } else {
- user_sgpr_info->sgpr_count++;
}
}
- user_sgpr_info->sgpr_count += 2;
break;
default:
break;
}
}
-static unsigned shader_io_get_unique_index(gl_varying_slot slot)
-{
- /* handle patch indices separate */
- if (slot == VARYING_SLOT_TESS_LEVEL_OUTER)
- return 0;
- if (slot == VARYING_SLOT_TESS_LEVEL_INNER)
- return 1;
- if (slot >= VARYING_SLOT_PATCH0 && slot <= VARYING_SLOT_TESS_MAX)
- return 2 + (slot - VARYING_SLOT_PATCH0);
-
- if (slot == VARYING_SLOT_POS)
- return 0;
- if (slot == VARYING_SLOT_PSIZ)
- return 1;
- if (slot == VARYING_SLOT_CLIP_DIST0)
- return 2;
- /* 3 is reserved for clip dist as well */
- if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31)
- return 4 + (slot - VARYING_SLOT_VAR0);
- unreachable("illegal slot in get unique index\n");
-}
-
static void set_llvm_calling_convention(LLVMValueRef func,
gl_shader_stage stage)
{
if (ctx->options->key.vs.as_es)
add_arg(&args, ARG_SGPR, ctx->ac.i32,
&ctx->es2gs_offset);
- else if (ctx->options->key.vs.as_ls)
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->ls_out_layout);
declare_vs_input_vgprs(ctx, &args);
break;
has_previous_stage,
previous_stage, &args);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->ls_out_layout);
-
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->tcs_offchip_layout);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->tcs_out_offsets);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->tcs_out_layout);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->tcs_in_layout);
if (needs_view_index)
add_arg(&args, ARG_SGPR, ctx->ac.i32,
&ctx->abi.view_index);
&user_sgpr_info, &args,
&desc_sets);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->tcs_offchip_layout);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->tcs_out_offsets);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->tcs_out_layout);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->tcs_in_layout);
if (needs_view_index)
add_arg(&args, ARG_SGPR, ctx->ac.i32,
&ctx->abi.view_index);
previous_stage, &user_sgpr_info,
&args, &desc_sets);
- add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->tcs_offchip_layout);
if (needs_view_index)
add_arg(&args, ARG_SGPR, ctx->ac.i32,
&ctx->abi.view_index);
&user_sgpr_info, &args,
&desc_sets);
- if (previous_stage == MESA_SHADER_TESS_EVAL) {
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->tcs_offchip_layout);
- } else {
+ if (previous_stage != MESA_SHADER_TESS_EVAL) {
declare_vs_specific_input_sgprs(ctx, stage,
has_previous_stage,
previous_stage,
&args);
}
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->gsvs_ring_stride);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->gsvs_num_entries);
if (needs_view_index)
add_arg(&args, ARG_SGPR, ctx->ac.i32,
&ctx->abi.view_index);
&user_sgpr_info, &args,
&desc_sets);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->gsvs_ring_stride);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->gsvs_num_entries);
if (needs_view_index)
add_arg(&args, ARG_SGPR, ctx->ac.i32,
&ctx->abi.view_index);
previous_stage, &user_sgpr_idx);
if (ctx->abi.view_index)
set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
- if (ctx->options->key.vs.as_ls) {
- set_loc_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT,
- &user_sgpr_idx, 1);
- }
break;
case MESA_SHADER_TESS_CTRL:
set_vs_specific_input_locs(ctx, stage, has_previous_stage,
previous_stage, &user_sgpr_idx);
- if (has_previous_stage)
- set_loc_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT,
- &user_sgpr_idx, 1);
- set_loc_shader(ctx, AC_UD_TCS_OFFCHIP_LAYOUT, &user_sgpr_idx, 4);
if (ctx->abi.view_index)
set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
break;
case MESA_SHADER_TESS_EVAL:
- set_loc_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, &user_sgpr_idx, 1);
if (ctx->abi.view_index)
set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
break;
has_previous_stage,
previous_stage,
&user_sgpr_idx);
- else
- set_loc_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT,
- &user_sgpr_idx, 1);
}
- set_loc_shader(ctx, AC_UD_GS_VS_RING_STRIDE_ENTRIES,
- &user_sgpr_idx, 2);
if (ctx->abi.view_index)
set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
break;
*
* Note that every attribute has 4 components.
*/
+static LLVMValueRef get_non_vertex_index_offset(struct radv_shader_context *ctx)
+{
+ uint32_t num_patches = ctx->tcs_num_patches;
+ uint32_t num_tcs_outputs;
+ if (ctx->stage == MESA_SHADER_TESS_CTRL)
+ num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written);
+ else
+ num_tcs_outputs = ctx->options->key.tes.tcs_num_outputs;
+
+ uint32_t output_vertex_size = num_tcs_outputs * 16;
+ uint32_t pervertex_output_patch_size = ctx->tcs_vertices_per_patch * output_vertex_size;
+
+ return LLVMConstInt(ctx->ac.i32, pervertex_output_patch_size * num_patches, false);
+}
+
+static LLVMValueRef calc_param_stride(struct radv_shader_context *ctx,
+ LLVMValueRef vertex_index)
+{
+ LLVMValueRef param_stride;
+ if (vertex_index)
+ param_stride = LLVMConstInt(ctx->ac.i32, ctx->tcs_vertices_per_patch * ctx->tcs_num_patches, false);
+ else
+ param_stride = LLVMConstInt(ctx->ac.i32, ctx->tcs_num_patches, false);
+ return param_stride;
+}
+
static LLVMValueRef get_tcs_tes_buffer_address(struct radv_shader_context *ctx,
LLVMValueRef vertex_index,
LLVMValueRef param_index)
{
- LLVMValueRef base_addr, vertices_per_patch, num_patches;
+ LLVMValueRef base_addr;
LLVMValueRef param_stride, constant16;
LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
-
- vertices_per_patch = LLVMConstInt(ctx->ac.i32, ctx->tcs_vertices_per_patch, false);
- num_patches = ac_unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 0, 9);
-
+ LLVMValueRef vertices_per_patch = LLVMConstInt(ctx->ac.i32, ctx->tcs_vertices_per_patch, false);
constant16 = LLVMConstInt(ctx->ac.i32, 16, false);
+ param_stride = calc_param_stride(ctx, vertex_index);
if (vertex_index) {
base_addr = LLVMBuildMul(ctx->ac.builder, rel_patch_id,
vertices_per_patch, "");
base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
vertex_index, "");
-
- param_stride = LLVMBuildMul(ctx->ac.builder, vertices_per_patch,
- num_patches, "");
} else {
base_addr = rel_patch_id;
- param_stride = num_patches;
}
base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
base_addr = LLVMBuildMul(ctx->ac.builder, base_addr, constant16, "");
if (!vertex_index) {
- LLVMValueRef patch_data_offset =
- ac_unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 16, 16);
+ LLVMValueRef patch_data_offset = get_non_vertex_index_offset(ctx);
base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
patch_data_offset, "");
return get_tcs_tes_buffer_address(ctx, vertex_index, param_index);
}
-static void
-mark_tess_output(struct radv_shader_context *ctx,
- bool is_patch, uint32_t param)
-
-{
- if (is_patch) {
- ctx->tess_patch_outputs_written |= (1ull << param);
- } else
- ctx->tess_outputs_written |= (1ull << param);
-}
-
static LLVMValueRef
get_dw_address(struct radv_shader_context *ctx,
LLVMValueRef dw_addr,
LLVMConstInt(ctx->ac.i32, 4, false), ""), "");
else if (const_index && !compact_const_index)
dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
- LLVMConstInt(ctx->ac.i32, const_index, false), "");
+ LLVMConstInt(ctx->ac.i32, const_index * 4, false), "");
dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
LLVMConstInt(ctx->ac.i32, param * 4, false), "");
unsigned param = shader_io_get_unique_index(location);
if (load_input) {
- stride = ac_unpack_param(&ctx->ac, ctx->tcs_in_layout, 13, 8);
+ uint32_t input_vertex_size = (ctx->tcs_num_inputs * 16) / 4;
+ stride = LLVMConstInt(ctx->ac.i32, input_vertex_size, false);
dw_addr = get_tcs_in_current_patch_offset(ctx);
} else {
if (!is_patch) {
static void
store_tcs_output(struct ac_shader_abi *abi,
+ const nir_variable *var,
LLVMValueRef vertex_index,
LLVMValueRef param_index,
unsigned const_index,
- unsigned location,
- unsigned driver_location,
LLVMValueRef src,
- unsigned component,
- bool is_patch,
- bool is_compact,
unsigned writemask)
{
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ const unsigned location = var->data.location;
+ const unsigned component = var->data.location_frac;
+ const bool is_patch = var->data.patch;
+ const bool is_compact = var->data.compact;
LLVMValueRef dw_addr;
LLVMValueRef stride = NULL;
LLVMValueRef buf_addr = NULL;
dw_addr = get_tcs_out_current_patch_data_offset(ctx);
}
- mark_tess_output(ctx, is_patch, param);
-
dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
param_index);
buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, is_compact,
unsigned constant_index,
LLVMValueRef index,
enum ac_descriptor_type desc_type,
- bool image, bool write)
+ bool image, bool write,
+ bool bindless)
{
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
LLVMValueRef list = ctx->descriptor_sets[descriptor_set];
for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
if (ctx->options->key.vs.instance_rate_inputs & (1u << (index + i))) {
- buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.instance_id,
- ctx->abi.start_instance, "");
- if (ctx->options->key.vs.as_ls) {
- ctx->shader_info->vs.vgpr_comp_cnt =
- MAX2(2, ctx->shader_info->vs.vgpr_comp_cnt);
+ uint32_t divisor = ctx->options->key.vs.instance_rate_divisors[index + i];
+
+ if (divisor) {
+ buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.instance_id,
+ ctx->abi.start_instance, "");
+
+ if (divisor != 1) {
+ buffer_index = LLVMBuildUDiv(ctx->ac.builder, buffer_index,
+ LLVMConstInt(ctx->ac.i32, divisor, 0), "");
+ }
+
+ if (ctx->options->key.vs.as_ls) {
+ ctx->shader_info->vs.vgpr_comp_cnt =
+ MAX2(2, ctx->shader_info->vs.vgpr_comp_cnt);
+ } else {
+ ctx->shader_info->vs.vgpr_comp_cnt =
+ MAX2(1, ctx->shader_info->vs.vgpr_comp_cnt);
+ }
} else {
- ctx->shader_info->vs.vgpr_comp_cnt =
- MAX2(1, ctx->shader_info->vs.vgpr_comp_cnt);
+ buffer_index = ctx->ac.i32_0;
}
} else
buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id,
for (unsigned chan = 0; chan < 4; chan++) {
LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
- ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] =
+ ctx->inputs[ac_llvm_reg_index_soa(idx, chan)] =
ac_to_integer(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder,
input, llvm_chan, ""));
}
interp = NULL;
for (unsigned i = 0; i < attrib_count; ++i)
- ctx->inputs[radeon_llvm_reg_index_soa(idx + i, 0)] = interp;
+ ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp;
}
for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) {
LLVMValueRef interp_param;
- LLVMValueRef *inputs = ctx->inputs +radeon_llvm_reg_index_soa(i, 0);
+ LLVMValueRef *inputs = ctx->inputs +ac_llvm_reg_index_soa(i, 0);
if (!(ctx->input_mask & (1ull << i)))
continue;
ctx->shader_info->fs.input_mask = ctx->input_mask >> VARYING_SLOT_VAR0;
if (ctx->shader_info->info.needs_multiview_view_index)
- ctx->abi.view_index = ctx->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
+ ctx->abi.view_index = ctx->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
}
static void
radv_load_output(struct radv_shader_context *ctx, unsigned index, unsigned chan)
{
LLVMValueRef output =
- ctx->abi.outputs[radeon_llvm_reg_index_soa(index, chan)];
+ ctx->abi.outputs[ac_llvm_reg_index_soa(index, chan)];
return LLVMBuildLoad(ctx->ac.builder, output, "");
}
static void
handle_vs_outputs_post(struct radv_shader_context *ctx,
- bool export_prim_id,
+ bool export_prim_id, bool export_layer_id,
struct radv_vs_output_info *outinfo)
{
uint32_t param_count = 0;
int i;
if (ctx->options->key.has_multiview_view_index) {
- LLVMValueRef* tmp_out = &ctx->abi.outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
+ LLVMValueRef* tmp_out = &ctx->abi.outputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
if(!*tmp_out) {
for(unsigned i = 0; i < 4; ++i)
- ctx->abi.outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, i)] =
+ ctx->abi.outputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, i)] =
ac_build_alloca_undef(&ctx->ac, ctx->ac.f32, "");
}
for (unsigned j = 1; j < 4; j++)
values[j] = ctx->ac.f32_0;
- radv_export_param(ctx, param_count, values, 0xf);
+ radv_export_param(ctx, param_count, values, 0x1);
outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count++;
outinfo->export_prim_id = true;
}
+ if (export_layer_id && layer_value) {
+ LLVMValueRef values[4];
+
+ values[0] = layer_value;
+ for (unsigned j = 1; j < 4; j++)
+ values[j] = ctx->ac.f32_0;
+
+ radv_export_param(ctx, param_count, values, 0x1);
+
+ outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] = param_count++;
+ }
+
outinfo->pos_exports = num_pos_exports;
outinfo->param_exports = param_count;
}
handle_ls_outputs_post(struct radv_shader_context *ctx)
{
LLVMValueRef vertex_id = ctx->rel_auto_id;
- LLVMValueRef vertex_dw_stride = ac_unpack_param(&ctx->ac, ctx->ls_out_layout, 13, 8);
+ uint32_t num_tcs_inputs = util_last_bit64(ctx->shader_info->info.vs.ls_outputs_written);
+ LLVMValueRef vertex_dw_stride = LLVMConstInt(ctx->ac.i32, num_tcs_inputs * 4, false);
LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id,
vertex_dw_stride, "");
if (i == VARYING_SLOT_CLIP_DIST0)
length = ctx->num_output_clips + ctx->num_output_culls;
int param = shader_io_get_unique_index(i);
- mark_tess_output(ctx, false, param);
- if (length > 4)
- mark_tess_output(ctx, false, param + 1);
LLVMValueRef dw_addr = LLVMBuildAdd(ctx->ac.builder, base_dw_addr,
LLVMConstInt(ctx->ac.i32, param * 4, false),
"");
if (inner_comps) {
tess_inner_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER);
- mark_tess_output(ctx, true, tess_inner_index);
lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_base,
LLVMConstInt(ctx->ac.i32, tess_inner_index * 4, false), "");
}
tess_outer_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER);
- mark_tess_output(ctx, true, tess_outer_index);
lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_base,
LLVMConstInt(ctx->ac.i32, tess_outer_index * 4, false), "");
handle_es_outputs_post(ctx, &ctx->shader_info->vs.es_info);
else
handle_vs_outputs_post(ctx, ctx->options->key.vs.export_prim_id,
+ ctx->options->key.vs.export_layer_id,
&ctx->shader_info->vs.outinfo);
break;
case MESA_SHADER_FRAGMENT:
handle_es_outputs_post(ctx, &ctx->shader_info->tes.es_info);
else
handle_vs_outputs_post(ctx, ctx->options->key.tes.export_prim_id,
+ ctx->options->key.tes.export_layer_id,
&ctx->shader_info->tes.outinfo);
break;
default:
}
if (ctx->stage == MESA_SHADER_GEOMETRY) {
LLVMValueRef tmp;
+ uint32_t num_entries = 64;
+ LLVMValueRef gsvs_ring_stride = LLVMConstInt(ctx->ac.i32, ctx->max_gsvs_emit_size, false);
+ LLVMValueRef gsvs_ring_desc = LLVMConstInt(ctx->ac.i32, ctx->max_gsvs_emit_size << 16, false);
ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_ESGS_GS, false));
ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_GSVS_GS, false));
ctx->gsvs_ring = LLVMBuildBitCast(ctx->ac.builder, ctx->gsvs_ring, ctx->ac.v4i32, "");
- ctx->gsvs_ring = LLVMBuildInsertElement(ctx->ac.builder, ctx->gsvs_ring, ctx->gsvs_num_entries, LLVMConstInt(ctx->ac.i32, 2, false), "");
+ tmp = LLVMConstInt(ctx->ac.i32, num_entries, false);
+ if (ctx->options->chip_class >= VI)
+ tmp = LLVMBuildMul(ctx->ac.builder, gsvs_ring_stride, tmp, "");
+ ctx->gsvs_ring = LLVMBuildInsertElement(ctx->ac.builder, ctx->gsvs_ring, tmp, LLVMConstInt(ctx->ac.i32, 2, false), "");
tmp = LLVMBuildExtractElement(ctx->ac.builder, ctx->gsvs_ring, ctx->ac.i32_1, "");
- tmp = LLVMBuildOr(ctx->ac.builder, tmp, ctx->gsvs_ring_stride, "");
+ tmp = LLVMBuildOr(ctx->ac.builder, tmp, gsvs_ring_desc, "");
ctx->gsvs_ring = LLVMBuildInsertElement(ctx->ac.builder, ctx->gsvs_ring, tmp, ctx->ac.i32_1, "");
}
struct nir_shader *const *shaders,
int shader_count,
struct radv_shader_variant_info *shader_info,
- const struct ac_nir_compiler_options *options,
- bool dump_shader)
+ const struct radv_nir_compiler_options *options)
{
struct radv_shader_context ctx = {0};
unsigned i;
memset(shader_info, 0, sizeof(*shader_info));
for(int i = 0; i < shader_count; ++i)
- ac_nir_shader_info_pass(shaders[i], options, &shader_info->info);
+ radv_nir_shader_info_pass(shaders[i], options, &shader_info->info);
for (i = 0; i < RADV_UD_MAX_SETS; i++)
shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
ctx.abi.load_sampler_desc = radv_get_sampler_desc;
ctx.abi.load_resource = radv_load_resource;
ctx.abi.clamp_shadow_reference = false;
+ ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9;
if (shader_count >= 2)
ac_init_exec_full_mask(&ctx.ac);
for(int i = 0; i < shader_count; ++i) {
ctx.stage = shaders[i]->info.stage;
ctx.output_mask = 0;
- ctx.tess_outputs_written = 0;
ctx.num_output_clips = shaders[i]->info.clip_distance_array_size;
ctx.num_output_culls = shaders[i]->info.cull_distance_array_size;
ctx.abi.load_patch_vertices_in = load_patch_vertices_in;
ctx.abi.store_tcs_outputs = store_tcs_output;
ctx.tcs_vertices_per_patch = shaders[i]->info.tess.tcs_vertices_out;
+ if (shader_count == 1)
+ ctx.tcs_num_inputs = ctx.options->key.tcs.num_inputs;
+ else
+ ctx.tcs_num_inputs = util_last_bit64(shader_info->info.vs.ls_outputs_written);
+ ctx.tcs_num_patches = get_tcs_num_patches(&ctx);
} else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) {
ctx.tes_primitive_mode = shaders[i]->info.tess.primitive_mode;
ctx.abi.load_tess_varyings = load_tes_input;
ctx.abi.load_tess_coord = load_tess_coord;
ctx.abi.load_patch_vertices_in = load_patch_vertices_in;
ctx.tcs_vertices_per_patch = shaders[i]->info.tess.tcs_vertices_out;
+ ctx.tcs_num_patches = ctx.options->key.tes.num_patches;
} else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) {
if (shader_info->info.vs.needs_instance_id) {
if (ctx.options->key.vs.as_ls) {
if (i)
ac_emit_barrier(&ctx.ac, ctx.stage);
+ nir_foreach_variable(variable, &shaders[i]->outputs)
+ scan_shader_output_decl(&ctx, variable, shaders[i], shaders[i]->info.stage);
+
+ if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) {
+ unsigned addclip = shaders[i]->info.clip_distance_array_size +
+ shaders[i]->info.cull_distance_array_size > 4;
+ ctx.gsvs_vertex_size = (util_bitcount64(ctx.output_mask) + addclip) * 16;
+ ctx.max_gsvs_emit_size = ctx.gsvs_vertex_size *
+ shaders[i]->info.gs.vertices_out;
+ }
+
ac_setup_rings(&ctx);
LLVMBasicBlockRef merge_block;
else if(shader_count >= 2 && shaders[i]->info.stage == MESA_SHADER_GEOMETRY)
prepare_gs_input_vgprs(&ctx);
- nir_foreach_variable(variable, &shaders[i]->outputs)
- scan_shader_output_decl(&ctx, variable, shaders[i], shaders[i]->info.stage);
-
ac_nir_translate(&ctx.ac, &ctx.abi, shaders[i]);
if (shader_count >= 2) {
}
if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) {
- unsigned addclip = shaders[i]->info.clip_distance_array_size +
- shaders[i]->info.cull_distance_array_size > 4;
- shader_info->gs.gsvs_vertex_size = (util_bitcount64(ctx.output_mask) + addclip) * 16;
- shader_info->gs.max_gsvs_emit_size = shader_info->gs.gsvs_vertex_size *
- shaders[i]->info.gs.vertices_out;
+ shader_info->gs.gsvs_vertex_size = ctx.gsvs_vertex_size;
+ shader_info->gs.max_gsvs_emit_size = ctx.max_gsvs_emit_size;
} else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
- shader_info->tcs.outputs_written = ctx.tess_outputs_written;
- shader_info->tcs.patch_outputs_written = ctx.tess_patch_outputs_written;
- } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX && ctx.options->key.vs.as_ls) {
- shader_info->vs.outputs_written = ctx.tess_outputs_written;
+ shader_info->tcs.num_patches = ctx.tcs_num_patches;
+ shader_info->tcs.lds_size = calculate_tess_lds_size(&ctx);
}
}
if (shader_count == 1)
ac_nir_eliminate_const_vs_outputs(&ctx);
- if (dump_shader) {
+ if (options->dump_shader) {
ctx.shader_info->private_mem_vgprs =
ac_count_scratch_private_memory(ctx.main_function);
}
struct ac_shader_config *config,
struct radv_shader_variant_info *shader_info,
gl_shader_stage stage,
- bool dump_shader, bool supports_spill)
+ const struct radv_nir_compiler_options *options)
{
- if (dump_shader)
+ if (options->dump_shader)
ac_dump_module(llvm_module);
memset(binary, 0, sizeof(*binary));
+
+ if (options->record_llvm_ir) {
+ char *llvm_ir = LLVMPrintModuleToString(llvm_module);
+ binary->llvm_ir_string = strdup(llvm_ir);
+ LLVMDisposeMessage(llvm_ir);
+ }
+
int v = ac_llvm_compile(llvm_module, binary, tm);
if (v) {
fprintf(stderr, "compile failed\n");
}
- if (dump_shader)
+ if (options->dump_shader)
fprintf(stderr, "disasm:\n%s\n", binary->disasm_string);
- ac_shader_binary_read_config(binary, config, 0, supports_spill);
+ ac_shader_binary_read_config(binary, config, 0, options->supports_spill);
LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
LLVMDisposeModule(llvm_module);
}
static void
-ac_fill_shader_info(struct radv_shader_variant_info *shader_info, struct nir_shader *nir, const struct ac_nir_compiler_options *options)
+ac_fill_shader_info(struct radv_shader_variant_info *shader_info, struct nir_shader *nir, const struct radv_nir_compiler_options *options)
{
switch (nir->info.stage) {
case MESA_SHADER_COMPUTE:
struct radv_shader_variant_info *shader_info,
struct nir_shader *const *nir,
int nir_count,
- const struct ac_nir_compiler_options *options,
- bool dump_shader)
+ const struct radv_nir_compiler_options *options)
{
- LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, nir_count, shader_info,
- options, dump_shader);
+ LLVMModuleRef llvm_module;
+
+ llvm_module = ac_translate_nir_to_llvm(tm, nir, nir_count, shader_info,
+ options);
+
+ ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info,
+ nir[0]->info.stage, options);
- ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir[0]->info.stage, dump_shader, options->supports_spill);
for (int i = 0; i < nir_count; ++i)
ac_fill_shader_info(shader_info, nir[i], options);
0, 1, 1, true, false);
LLVMBuildStore(ctx->ac.builder,
- ac_to_float(&ctx->ac, value), ctx->abi.outputs[radeon_llvm_reg_index_soa(i, j)]);
+ ac_to_float(&ctx->ac, value), ctx->abi.outputs[ac_llvm_reg_index_soa(i, j)]);
}
idx += slot_inc;
}
- handle_vs_outputs_post(ctx, false, &ctx->shader_info->vs.outinfo);
+ handle_vs_outputs_post(ctx, false, false, &ctx->shader_info->vs.outinfo);
}
void
struct ac_shader_binary *binary,
struct ac_shader_config *config,
struct radv_shader_variant_info *shader_info,
- const struct ac_nir_compiler_options *options,
- bool dump_shader)
+ const struct radv_nir_compiler_options *options)
{
struct radv_shader_context ctx = {0};
ctx.context = LLVMContextCreate();
ac_llvm_finalize_module(&ctx);
ac_compile_llvm_module(tm, ctx.ac.module, binary, config, shader_info,
- MESA_SHADER_VERTEX,
- dump_shader, options->supports_spill);
+ MESA_SHADER_VERTEX, options);
}