#include <llvm-c/Core.h>
#include <llvm-c/TargetMachine.h>
#include <llvm-c/Transforms/Scalar.h>
+#if HAVE_LLVM >= 0x0700
+#include <llvm-c/Transforms/Utils.h>
+#endif
#include "sid.h"
#include "gfx9d.h"
struct radv_shader_context {
struct ac_llvm_context ac;
- const struct ac_nir_compiler_options *options;
+ const struct radv_nir_compiler_options *options;
struct radv_shader_variant_info *shader_info;
struct ac_shader_abi abi;
LLVMValueRef vertex_buffers;
LLVMValueRef rel_auto_id;
LLVMValueRef vs_prim_id;
- LLVMValueRef ls_out_layout;
LLVMValueRef es2gs_offset;
- LLVMValueRef tcs_offchip_layout;
- LLVMValueRef tcs_out_offsets;
- LLVMValueRef tcs_out_layout;
- LLVMValueRef tcs_in_layout;
LLVMValueRef oc_lds;
LLVMValueRef merged_wave_info;
LLVMValueRef tess_factor_offset;
LLVMValueRef tes_u;
LLVMValueRef tes_v;
- LLVMValueRef gsvs_ring_stride;
- LLVMValueRef gsvs_num_entries;
LLVMValueRef gs2vs_offset;
LLVMValueRef gs_wave_id;
LLVMValueRef gs_vtx_offset[6];
unsigned gs_max_out_vertices;
unsigned tes_primitive_mode;
- uint64_t tess_outputs_written;
- uint64_t tess_patch_outputs_written;
uint32_t tcs_patch_outputs_read;
uint64_t tcs_outputs_read;
uint32_t tcs_vertices_per_patch;
+ uint32_t tcs_num_inputs;
+ uint32_t tcs_num_patches;
+ uint32_t max_gsvs_emit_size;
+ uint32_t gsvs_vertex_size;
};
enum radeon_llvm_calling_convention {
return container_of(abi, ctx, abi);
}
+struct ac_build_if_state
+{
+ struct radv_shader_context *ctx;
+ LLVMValueRef condition;
+ LLVMBasicBlockRef entry_block;
+ LLVMBasicBlockRef true_block;
+ LLVMBasicBlockRef false_block;
+ LLVMBasicBlockRef merge_block;
+};
+
+static LLVMBasicBlockRef
+ac_build_insert_new_block(struct radv_shader_context *ctx, const char *name)
+{
+ LLVMBasicBlockRef current_block;
+ LLVMBasicBlockRef next_block;
+ LLVMBasicBlockRef new_block;
+
+ /* get current basic block */
+ current_block = LLVMGetInsertBlock(ctx->ac.builder);
+
+ /* chqeck if there's another block after this one */
+ next_block = LLVMGetNextBasicBlock(current_block);
+ if (next_block) {
+ /* insert the new block before the next block */
+ new_block = LLVMInsertBasicBlockInContext(ctx->context, next_block, name);
+ }
+ else {
+ /* append new block after current block */
+ LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
+ new_block = LLVMAppendBasicBlockInContext(ctx->context, function, name);
+ }
+ return new_block;
+}
+
+static void
+ac_nir_build_if(struct ac_build_if_state *ifthen,
+ struct radv_shader_context *ctx,
+ LLVMValueRef condition)
+{
+ LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->ac.builder);
+
+ memset(ifthen, 0, sizeof *ifthen);
+ ifthen->ctx = ctx;
+ ifthen->condition = condition;
+ ifthen->entry_block = block;
+
+ /* create endif/merge basic block for the phi functions */
+ ifthen->merge_block = ac_build_insert_new_block(ctx, "endif-block");
+
+ /* create/insert true_block before merge_block */
+ ifthen->true_block =
+ LLVMInsertBasicBlockInContext(ctx->context,
+ ifthen->merge_block,
+ "if-true-block");
+
+ /* successive code goes into the true block */
+ LLVMPositionBuilderAtEnd(ctx->ac.builder, ifthen->true_block);
+}
+
+/**
+ * End a conditional.
+ */
+static void
+ac_nir_build_endif(struct ac_build_if_state *ifthen)
+{
+ LLVMBuilderRef builder = ifthen->ctx->ac.builder;
+
+ /* Insert branch to the merge block from current block */
+ LLVMBuildBr(builder, ifthen->merge_block);
+
+ /*
+ * Now patch in the various branch instructions.
+ */
+
+ /* Insert the conditional branch instruction at the end of entry_block */
+ LLVMPositionBuilderAtEnd(builder, ifthen->entry_block);
+ if (ifthen->false_block) {
+ /* we have an else clause */
+ LLVMBuildCondBr(builder, ifthen->condition,
+ ifthen->true_block, ifthen->false_block);
+ }
+ else {
+ /* no else clause */
+ LLVMBuildCondBr(builder, ifthen->condition,
+ ifthen->true_block, ifthen->merge_block);
+ }
+
+ /* Resume building code at end of the ifthen->merge_block */
+ LLVMPositionBuilderAtEnd(builder, ifthen->merge_block);
+}
+
+
static LLVMValueRef get_rel_patch_id(struct radv_shader_context *ctx)
{
switch (ctx->stage) {
}
}
+static unsigned
+get_tcs_num_patches(struct radv_shader_context *ctx)
+{
+ unsigned num_tcs_input_cp = ctx->options->key.tcs.input_vertices;
+ unsigned num_tcs_output_cp = ctx->tcs_vertices_per_patch;
+ uint32_t input_vertex_size = ctx->tcs_num_inputs * 16;
+ uint32_t input_patch_size = ctx->options->key.tcs.input_vertices * input_vertex_size;
+ uint32_t num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written);
+ uint32_t num_tcs_patch_outputs = util_last_bit64(ctx->shader_info->info.tcs.patch_outputs_written);
+ uint32_t output_vertex_size = num_tcs_outputs * 16;
+ uint32_t pervertex_output_patch_size = ctx->tcs_vertices_per_patch * output_vertex_size;
+ uint32_t output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
+ unsigned num_patches;
+ unsigned hardware_lds_size;
+
+ /* Ensure that we only need one wave per SIMD so we don't need to check
+ * resource usage. Also ensures that the number of tcs in and out
+ * vertices per threadgroup are at most 256.
+ */
+ num_patches = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp) * 4;
+ /* Make sure that the data fits in LDS. This assumes the shaders only
+ * use LDS for the inputs and outputs.
+ */
+ hardware_lds_size = ctx->options->chip_class >= CIK ? 65536 : 32768;
+ num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size));
+ /* Make sure the output data fits in the offchip buffer */
+ num_patches = MIN2(num_patches, (ctx->options->tess_offchip_block_dw_size * 4) / output_patch_size);
+ /* Not necessary for correctness, but improves performance. The
+ * specific value is taken from the proprietary driver.
+ */
+ num_patches = MIN2(num_patches, 40);
+
+ /* SI bug workaround - limit LS-HS threadgroups to only one wave. */
+ if (ctx->options->chip_class == SI) {
+ unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp);
+ num_patches = MIN2(num_patches, one_wave);
+ }
+ return num_patches;
+}
+
+static unsigned
+calculate_tess_lds_size(struct radv_shader_context *ctx)
+{
+ unsigned num_tcs_input_cp = ctx->options->key.tcs.input_vertices;
+ unsigned num_tcs_output_cp;
+ unsigned num_tcs_outputs, num_tcs_patch_outputs;
+ unsigned input_vertex_size, output_vertex_size;
+ unsigned input_patch_size, output_patch_size;
+ unsigned pervertex_output_patch_size;
+ unsigned output_patch0_offset;
+ unsigned num_patches;
+ unsigned lds_size;
+
+ num_tcs_output_cp = ctx->tcs_vertices_per_patch;
+ num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written);
+ num_tcs_patch_outputs = util_last_bit64(ctx->shader_info->info.tcs.patch_outputs_written);
+
+ input_vertex_size = ctx->tcs_num_inputs * 16;
+ output_vertex_size = num_tcs_outputs * 16;
+
+ input_patch_size = num_tcs_input_cp * input_vertex_size;
+
+ pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size;
+ output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
+
+ num_patches = ctx->tcs_num_patches;
+ output_patch0_offset = input_patch_size * num_patches;
+
+ lds_size = output_patch0_offset + output_patch_size * num_patches;
+ return lds_size;
+}
+
/* Tessellation shaders pass outputs to the next shader using LDS.
*
* LS outputs = TCS inputs
static LLVMValueRef
get_tcs_in_patch_stride(struct radv_shader_context *ctx)
{
- if (ctx->stage == MESA_SHADER_VERTEX)
- return ac_unpack_param(&ctx->ac, ctx->ls_out_layout, 0, 13);
- else if (ctx->stage == MESA_SHADER_TESS_CTRL)
- return ac_unpack_param(&ctx->ac, ctx->tcs_in_layout, 0, 13);
- else {
- assert(0);
- return NULL;
- }
+ assert (ctx->stage == MESA_SHADER_TESS_CTRL);
+ uint32_t input_vertex_size = ctx->tcs_num_inputs * 16;
+ uint32_t input_patch_size = ctx->options->key.tcs.input_vertices * input_vertex_size;
+
+ input_patch_size /= 4;
+ return LLVMConstInt(ctx->ac.i32, input_patch_size, false);
}
static LLVMValueRef
get_tcs_out_patch_stride(struct radv_shader_context *ctx)
{
- return ac_unpack_param(&ctx->ac, ctx->tcs_out_layout, 0, 13);
+ uint32_t num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written);
+ uint32_t num_tcs_patch_outputs = util_last_bit64(ctx->shader_info->info.tcs.patch_outputs_written);
+ uint32_t output_vertex_size = num_tcs_outputs * 16;
+ uint32_t pervertex_output_patch_size = ctx->tcs_vertices_per_patch * output_vertex_size;
+ uint32_t output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
+ output_patch_size /= 4;
+ return LLVMConstInt(ctx->ac.i32, output_patch_size, false);
}
static LLVMValueRef
get_tcs_out_vertex_stride(struct radv_shader_context *ctx)
{
- return ac_unpack_param(&ctx->ac, ctx->tcs_out_layout, 13, 8);
+ uint32_t num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written);
+ uint32_t output_vertex_size = num_tcs_outputs * 16;
+ output_vertex_size /= 4;
+ return LLVMConstInt(ctx->ac.i32, output_vertex_size, false);
}
static LLVMValueRef
get_tcs_out_patch0_offset(struct radv_shader_context *ctx)
{
- return LLVMBuildMul(ctx->ac.builder,
- ac_unpack_param(&ctx->ac, ctx->tcs_out_offsets, 0, 16),
- LLVMConstInt(ctx->ac.i32, 4, false), "");
+ assert (ctx->stage == MESA_SHADER_TESS_CTRL);
+ uint32_t input_vertex_size = ctx->tcs_num_inputs * 16;
+ uint32_t input_patch_size = ctx->options->key.tcs.input_vertices * input_vertex_size;
+ uint32_t output_patch0_offset = input_patch_size;
+ unsigned num_patches = ctx->tcs_num_patches;
+
+ output_patch0_offset *= num_patches;
+ output_patch0_offset /= 4;
+ return LLVMConstInt(ctx->ac.i32, output_patch0_offset, false);
}
static LLVMValueRef
get_tcs_out_patch0_patch_data_offset(struct radv_shader_context *ctx)
{
- return LLVMBuildMul(ctx->ac.builder,
- ac_unpack_param(&ctx->ac, ctx->tcs_out_offsets, 16, 16),
- LLVMConstInt(ctx->ac.i32, 4, false), "");
+ assert (ctx->stage == MESA_SHADER_TESS_CTRL);
+ uint32_t input_vertex_size = ctx->tcs_num_inputs * 16;
+ uint32_t input_patch_size = ctx->options->key.tcs.input_vertices * input_vertex_size;
+ uint32_t output_patch0_offset = input_patch_size;
+
+ uint32_t num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written);
+ uint32_t output_vertex_size = num_tcs_outputs * 16;
+ uint32_t pervertex_output_patch_size = ctx->tcs_vertices_per_patch * output_vertex_size;
+ unsigned num_patches = ctx->tcs_num_patches;
+
+ output_patch0_offset *= num_patches;
+ output_patch0_offset += pervertex_output_patch_size;
+ output_patch0_offset /= 4;
+ return LLVMConstInt(ctx->ac.i32, output_patch0_offset, false);
}
static LLVMValueRef
case MESA_SHADER_VERTEX:
if (!ctx->is_gs_copy_shader)
user_sgpr_info->sgpr_count += count_vs_user_sgprs(ctx);
- if (ctx->options->key.vs.as_ls)
- user_sgpr_info->sgpr_count++;
break;
case MESA_SHADER_TESS_CTRL:
if (has_previous_stage) {
if (previous_stage == MESA_SHADER_VERTEX)
user_sgpr_info->sgpr_count += count_vs_user_sgprs(ctx);
- user_sgpr_info->sgpr_count++;
}
- user_sgpr_info->sgpr_count += 4;
break;
case MESA_SHADER_TESS_EVAL:
- user_sgpr_info->sgpr_count += 1;
break;
case MESA_SHADER_GEOMETRY:
if (has_previous_stage) {
if (previous_stage == MESA_SHADER_VERTEX) {
user_sgpr_info->sgpr_count += count_vs_user_sgprs(ctx);
- } else {
- user_sgpr_info->sgpr_count++;
}
}
- user_sgpr_info->sgpr_count += 2;
break;
default:
break;
}
}
-static unsigned shader_io_get_unique_index(gl_varying_slot slot)
-{
- /* handle patch indices separate */
- if (slot == VARYING_SLOT_TESS_LEVEL_OUTER)
- return 0;
- if (slot == VARYING_SLOT_TESS_LEVEL_INNER)
- return 1;
- if (slot >= VARYING_SLOT_PATCH0 && slot <= VARYING_SLOT_TESS_MAX)
- return 2 + (slot - VARYING_SLOT_PATCH0);
-
- if (slot == VARYING_SLOT_POS)
- return 0;
- if (slot == VARYING_SLOT_PSIZ)
- return 1;
- if (slot == VARYING_SLOT_CLIP_DIST0)
- return 2;
- /* 3 is reserved for clip dist as well */
- if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31)
- return 4 + (slot - VARYING_SLOT_VAR0);
- unreachable("illegal slot in get unique index\n");
-}
-
static void set_llvm_calling_convention(LLVMValueRef func,
gl_shader_stage stage)
{
if (ctx->options->key.vs.as_es)
add_arg(&args, ARG_SGPR, ctx->ac.i32,
&ctx->es2gs_offset);
- else if (ctx->options->key.vs.as_ls)
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->ls_out_layout);
declare_vs_input_vgprs(ctx, &args);
break;
has_previous_stage,
previous_stage, &args);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->ls_out_layout);
-
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->tcs_offchip_layout);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->tcs_out_offsets);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->tcs_out_layout);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->tcs_in_layout);
if (needs_view_index)
add_arg(&args, ARG_SGPR, ctx->ac.i32,
&ctx->abi.view_index);
&user_sgpr_info, &args,
&desc_sets);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->tcs_offchip_layout);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->tcs_out_offsets);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->tcs_out_layout);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->tcs_in_layout);
if (needs_view_index)
add_arg(&args, ARG_SGPR, ctx->ac.i32,
&ctx->abi.view_index);
previous_stage, &user_sgpr_info,
&args, &desc_sets);
- add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->tcs_offchip_layout);
if (needs_view_index)
add_arg(&args, ARG_SGPR, ctx->ac.i32,
&ctx->abi.view_index);
&user_sgpr_info, &args,
&desc_sets);
- if (previous_stage == MESA_SHADER_TESS_EVAL) {
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->tcs_offchip_layout);
- } else {
+ if (previous_stage != MESA_SHADER_TESS_EVAL) {
declare_vs_specific_input_sgprs(ctx, stage,
has_previous_stage,
previous_stage,
&args);
}
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->gsvs_ring_stride);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->gsvs_num_entries);
if (needs_view_index)
add_arg(&args, ARG_SGPR, ctx->ac.i32,
&ctx->abi.view_index);
&user_sgpr_info, &args,
&desc_sets);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->gsvs_ring_stride);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->gsvs_num_entries);
if (needs_view_index)
add_arg(&args, ARG_SGPR, ctx->ac.i32,
&ctx->abi.view_index);
previous_stage, &user_sgpr_idx);
if (ctx->abi.view_index)
set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
- if (ctx->options->key.vs.as_ls) {
- set_loc_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT,
- &user_sgpr_idx, 1);
- }
break;
case MESA_SHADER_TESS_CTRL:
set_vs_specific_input_locs(ctx, stage, has_previous_stage,
previous_stage, &user_sgpr_idx);
- if (has_previous_stage)
- set_loc_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT,
- &user_sgpr_idx, 1);
- set_loc_shader(ctx, AC_UD_TCS_OFFCHIP_LAYOUT, &user_sgpr_idx, 4);
if (ctx->abi.view_index)
set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
break;
case MESA_SHADER_TESS_EVAL:
- set_loc_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, &user_sgpr_idx, 1);
if (ctx->abi.view_index)
set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
break;
has_previous_stage,
previous_stage,
&user_sgpr_idx);
- else
- set_loc_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT,
- &user_sgpr_idx, 1);
}
- set_loc_shader(ctx, AC_UD_GS_VS_RING_STRIDE_ENTRIES,
- &user_sgpr_idx, 2);
if (ctx->abi.view_index)
set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
break;
*
* Note that every attribute has 4 components.
*/
+static LLVMValueRef get_non_vertex_index_offset(struct radv_shader_context *ctx)
+{
+ uint32_t num_patches = ctx->tcs_num_patches;
+ uint32_t num_tcs_outputs;
+ if (ctx->stage == MESA_SHADER_TESS_CTRL)
+ num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written);
+ else
+ num_tcs_outputs = ctx->options->key.tes.tcs_num_outputs;
+
+ uint32_t output_vertex_size = num_tcs_outputs * 16;
+ uint32_t pervertex_output_patch_size = ctx->tcs_vertices_per_patch * output_vertex_size;
+
+ return LLVMConstInt(ctx->ac.i32, pervertex_output_patch_size * num_patches, false);
+}
+
+static LLVMValueRef calc_param_stride(struct radv_shader_context *ctx,
+ LLVMValueRef vertex_index)
+{
+ LLVMValueRef param_stride;
+ if (vertex_index)
+ param_stride = LLVMConstInt(ctx->ac.i32, ctx->tcs_vertices_per_patch * ctx->tcs_num_patches, false);
+ else
+ param_stride = LLVMConstInt(ctx->ac.i32, ctx->tcs_num_patches, false);
+ return param_stride;
+}
+
static LLVMValueRef get_tcs_tes_buffer_address(struct radv_shader_context *ctx,
LLVMValueRef vertex_index,
LLVMValueRef param_index)
{
- LLVMValueRef base_addr, vertices_per_patch, num_patches;
+ LLVMValueRef base_addr;
LLVMValueRef param_stride, constant16;
LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
-
- vertices_per_patch = LLVMConstInt(ctx->ac.i32, ctx->tcs_vertices_per_patch, false);
- num_patches = ac_unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 0, 9);
-
+ LLVMValueRef vertices_per_patch = LLVMConstInt(ctx->ac.i32, ctx->tcs_vertices_per_patch, false);
constant16 = LLVMConstInt(ctx->ac.i32, 16, false);
+ param_stride = calc_param_stride(ctx, vertex_index);
if (vertex_index) {
base_addr = LLVMBuildMul(ctx->ac.builder, rel_patch_id,
vertices_per_patch, "");
base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
vertex_index, "");
-
- param_stride = LLVMBuildMul(ctx->ac.builder, vertices_per_patch,
- num_patches, "");
} else {
base_addr = rel_patch_id;
- param_stride = num_patches;
}
base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
base_addr = LLVMBuildMul(ctx->ac.builder, base_addr, constant16, "");
if (!vertex_index) {
- LLVMValueRef patch_data_offset =
- ac_unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 16, 16);
+ LLVMValueRef patch_data_offset = get_non_vertex_index_offset(ctx);
base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
patch_data_offset, "");
return get_tcs_tes_buffer_address(ctx, vertex_index, param_index);
}
-static void
-mark_tess_output(struct radv_shader_context *ctx,
- bool is_patch, uint32_t param)
-
-{
- if (is_patch) {
- ctx->tess_patch_outputs_written |= (1ull << param);
- } else
- ctx->tess_outputs_written |= (1ull << param);
-}
-
static LLVMValueRef
get_dw_address(struct radv_shader_context *ctx,
LLVMValueRef dw_addr,
LLVMConstInt(ctx->ac.i32, 4, false), ""), "");
else if (const_index && !compact_const_index)
dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
- LLVMConstInt(ctx->ac.i32, const_index, false), "");
+ LLVMConstInt(ctx->ac.i32, const_index * 4, false), "");
dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
LLVMConstInt(ctx->ac.i32, param * 4, false), "");
unsigned param = shader_io_get_unique_index(location);
if (load_input) {
- stride = ac_unpack_param(&ctx->ac, ctx->tcs_in_layout, 13, 8);
+ uint32_t input_vertex_size = (ctx->tcs_num_inputs * 16) / 4;
+ stride = LLVMConstInt(ctx->ac.i32, input_vertex_size, false);
dw_addr = get_tcs_in_current_patch_offset(ctx);
} else {
if (!is_patch) {
static void
store_tcs_output(struct ac_shader_abi *abi,
+ const nir_variable *var,
LLVMValueRef vertex_index,
LLVMValueRef param_index,
unsigned const_index,
- unsigned location,
- unsigned driver_location,
LLVMValueRef src,
- unsigned component,
- bool is_patch,
- bool is_compact,
unsigned writemask)
{
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ const unsigned location = var->data.location;
+ const unsigned component = var->data.location_frac;
+ const bool is_patch = var->data.patch;
+ const bool is_compact = var->data.compact;
LLVMValueRef dw_addr;
LLVMValueRef stride = NULL;
LLVMValueRef buf_addr = NULL;
dw_addr = get_tcs_out_current_patch_data_offset(ctx);
}
- mark_tess_output(ctx, is_patch, param);
-
dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
param_index);
buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, is_compact,
/* loop num outputs */
idx = 0;
for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+ unsigned output_usage_mask =
+ ctx->shader_info->info.gs.output_usage_mask[i];
LLVMValueRef *out_ptr = &addrs[i * 4];
int length = 4;
int slot = idx;
length = ctx->num_output_clips + ctx->num_output_culls;
if (length > 4)
slot_inc = 2;
+ output_usage_mask = (1 << length) - 1;
}
+
for (unsigned j = 0; j < length; j++) {
+ if (!(output_usage_mask & (1 << j)))
+ continue;
+
LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder,
out_ptr[j], "");
LLVMValueRef voffset = LLVMConstInt(ctx->ac.i32, (slot * 4 + j) * ctx->gs_max_out_vertices, false);
unsigned constant_index,
LLVMValueRef index,
enum ac_descriptor_type desc_type,
- bool image, bool write)
+ bool image, bool write,
+ bool bindless)
{
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
LLVMValueRef list = ctx->descriptor_sets[descriptor_set];
return ac_build_load_to_sgpr(&ctx->ac, list, index);
}
+/* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
+ * so we may need to fix it up. */
+static LLVMValueRef
+adjust_vertex_fetch_alpha(struct radv_shader_context *ctx,
+ unsigned adjustment,
+ LLVMValueRef alpha)
+{
+ if (adjustment == RADV_ALPHA_ADJUST_NONE)
+ return alpha;
+
+ LLVMValueRef c30 = LLVMConstInt(ctx->ac.i32, 30, 0);
+
+ if (adjustment == RADV_ALPHA_ADJUST_SSCALED)
+ alpha = LLVMBuildFPToUI(ctx->ac.builder, alpha, ctx->ac.i32, "");
+ else
+ alpha = ac_to_integer(&ctx->ac, alpha);
+
+ /* For the integer-like cases, do a natural sign extension.
+ *
+ * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0
+ * and happen to contain 0, 1, 2, 3 as the two LSBs of the
+ * exponent.
+ */
+ alpha = LLVMBuildShl(ctx->ac.builder, alpha,
+ adjustment == RADV_ALPHA_ADJUST_SNORM ?
+ LLVMConstInt(ctx->ac.i32, 7, 0) : c30, "");
+ alpha = LLVMBuildAShr(ctx->ac.builder, alpha, c30, "");
+
+ /* Convert back to the right type. */
+ if (adjustment == RADV_ALPHA_ADJUST_SNORM) {
+ LLVMValueRef clamp;
+ LLVMValueRef neg_one = LLVMConstReal(ctx->ac.f32, -1.0);
+ alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, "");
+ clamp = LLVMBuildFCmp(ctx->ac.builder, LLVMRealULT, alpha, neg_one, "");
+ alpha = LLVMBuildSelect(ctx->ac.builder, clamp, neg_one, alpha, "");
+ } else if (adjustment == RADV_ALPHA_ADJUST_SSCALED) {
+ alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, "");
+ }
+
+ return alpha;
+}
static void
handle_vs_input_decl(struct radv_shader_context *ctx,
LLVMValueRef t_list;
LLVMValueRef input;
LLVMValueRef buffer_index;
- int index = variable->data.location - VERT_ATTRIB_GENERIC0;
- int idx = variable->data.location;
unsigned attrib_count = glsl_count_attribute_slots(variable->type, true);
uint8_t input_usage_mask =
ctx->shader_info->info.vs.input_usage_mask[variable->data.location];
unsigned num_channels = util_last_bit(input_usage_mask);
- variable->data.driver_location = idx * 4;
+ variable->data.driver_location = variable->data.location * 4;
+
+ for (unsigned i = 0; i < attrib_count; ++i) {
+ LLVMValueRef output[4];
+ unsigned attrib_index = variable->data.location + i - VERT_ATTRIB_GENERIC0;
+
+ if (ctx->options->key.vs.instance_rate_inputs & (1u << attrib_index)) {
+ uint32_t divisor = ctx->options->key.vs.instance_rate_divisors[attrib_index];
+
+ if (divisor) {
+ buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.instance_id,
+ ctx->abi.start_instance, "");
- for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
- if (ctx->options->key.vs.instance_rate_inputs & (1u << (index + i))) {
- buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.instance_id,
- ctx->abi.start_instance, "");
- if (ctx->options->key.vs.as_ls) {
- ctx->shader_info->vs.vgpr_comp_cnt =
- MAX2(2, ctx->shader_info->vs.vgpr_comp_cnt);
+ if (divisor != 1) {
+ buffer_index = LLVMBuildUDiv(ctx->ac.builder, buffer_index,
+ LLVMConstInt(ctx->ac.i32, divisor, 0), "");
+ }
+
+ if (ctx->options->key.vs.as_ls) {
+ ctx->shader_info->vs.vgpr_comp_cnt =
+ MAX2(2, ctx->shader_info->vs.vgpr_comp_cnt);
+ } else {
+ ctx->shader_info->vs.vgpr_comp_cnt =
+ MAX2(1, ctx->shader_info->vs.vgpr_comp_cnt);
+ }
} else {
- ctx->shader_info->vs.vgpr_comp_cnt =
- MAX2(1, ctx->shader_info->vs.vgpr_comp_cnt);
+ buffer_index = ctx->ac.i32_0;
}
} else
buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id,
ctx->abi.base_vertex, "");
- t_offset = LLVMConstInt(ctx->ac.i32, index + i, false);
+ t_offset = LLVMConstInt(ctx->ac.i32, attrib_index, false);
t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
for (unsigned chan = 0; chan < 4; chan++) {
LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
- ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] =
- ac_to_integer(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder,
- input, llvm_chan, ""));
+ output[chan] = LLVMBuildExtractElement(ctx->ac.builder, input, llvm_chan, "");
+ }
+
+ unsigned alpha_adjust = (ctx->options->key.vs.alpha_adjust >> (attrib_index * 2)) & 3;
+ output[3] = adjust_vertex_fetch_alpha(ctx, alpha_adjust, output[3]);
+
+ for (unsigned chan = 0; chan < 4; chan++) {
+ ctx->inputs[ac_llvm_reg_index_soa(variable->data.location + i, chan)] =
+ ac_to_integer(&ctx->ac, output[chan]);
}
}
}
interp = NULL;
for (unsigned i = 0; i < attrib_count; ++i)
- ctx->inputs[radeon_llvm_reg_index_soa(idx + i, 0)] = interp;
+ ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp;
}
for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) {
LLVMValueRef interp_param;
- LLVMValueRef *inputs = ctx->inputs +radeon_llvm_reg_index_soa(i, 0);
+ LLVMValueRef *inputs = ctx->inputs +ac_llvm_reg_index_soa(i, 0);
if (!(ctx->input_mask & (1ull << i)))
continue;
ctx->shader_info->fs.input_mask = ctx->input_mask >> VARYING_SLOT_VAR0;
if (ctx->shader_info->info.needs_multiview_view_index)
- ctx->abi.view_index = ctx->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
+ ctx->abi.view_index = ctx->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
}
static void
radv_load_output(struct radv_shader_context *ctx, unsigned index, unsigned chan)
{
LLVMValueRef output =
- ctx->abi.outputs[radeon_llvm_reg_index_soa(index, chan)];
+ ctx->abi.outputs[ac_llvm_reg_index_soa(index, chan)];
return LLVMBuildLoad(ctx->ac.builder, output, "");
}
static void
handle_vs_outputs_post(struct radv_shader_context *ctx,
- bool export_prim_id,
+ bool export_prim_id, bool export_layer_id,
struct radv_vs_output_info *outinfo)
{
uint32_t param_count = 0;
int i;
if (ctx->options->key.has_multiview_view_index) {
- LLVMValueRef* tmp_out = &ctx->abi.outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
+ LLVMValueRef* tmp_out = &ctx->abi.outputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
if(!*tmp_out) {
for(unsigned i = 0; i < 4; ++i)
- ctx->abi.outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, i)] =
+ ctx->abi.outputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, i)] =
ac_build_alloca_undef(&ctx->ac, ctx->ac.f32, "");
}
output_usage_mask =
ctx->shader_info->info.tes.output_usage_mask[i];
} else {
- /* Enable all channels for the GS copy shader because
- * we don't know the output usage mask currently.
- */
- output_usage_mask = 0xf;
+ assert(ctx->is_gs_copy_shader);
+ output_usage_mask =
+ ctx->shader_info->info.gs.output_usage_mask[i];
}
radv_export_param(ctx, param_count, values, output_usage_mask);
for (unsigned j = 1; j < 4; j++)
values[j] = ctx->ac.f32_0;
- radv_export_param(ctx, param_count, values, 0xf);
+ radv_export_param(ctx, param_count, values, 0x1);
outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count++;
outinfo->export_prim_id = true;
}
+ if (export_layer_id && layer_value) {
+ LLVMValueRef values[4];
+
+ values[0] = layer_value;
+ for (unsigned j = 1; j < 4; j++)
+ values[j] = ctx->ac.f32_0;
+
+ radv_export_param(ctx, param_count, values, 0x1);
+
+ outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] = param_count++;
+ }
+
outinfo->pos_exports = num_pos_exports;
outinfo->param_exports = param_count;
}
handle_ls_outputs_post(struct radv_shader_context *ctx)
{
LLVMValueRef vertex_id = ctx->rel_auto_id;
- LLVMValueRef vertex_dw_stride = ac_unpack_param(&ctx->ac, ctx->ls_out_layout, 13, 8);
+ uint32_t num_tcs_inputs = util_last_bit64(ctx->shader_info->info.vs.ls_outputs_written);
+ LLVMValueRef vertex_dw_stride = LLVMConstInt(ctx->ac.i32, num_tcs_inputs * 4, false);
LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id,
vertex_dw_stride, "");
if (i == VARYING_SLOT_CLIP_DIST0)
length = ctx->num_output_clips + ctx->num_output_culls;
int param = shader_io_get_unique_index(i);
- mark_tess_output(ctx, false, param);
- if (length > 4)
- mark_tess_output(ctx, false, param + 1);
LLVMValueRef dw_addr = LLVMBuildAdd(ctx->ac.builder, base_dw_addr,
LLVMConstInt(ctx->ac.i32, param * 4, false),
"");
}
}
-struct ac_build_if_state
-{
- struct radv_shader_context *ctx;
- LLVMValueRef condition;
- LLVMBasicBlockRef entry_block;
- LLVMBasicBlockRef true_block;
- LLVMBasicBlockRef false_block;
- LLVMBasicBlockRef merge_block;
-};
-
-static LLVMBasicBlockRef
-ac_build_insert_new_block(struct radv_shader_context *ctx, const char *name)
-{
- LLVMBasicBlockRef current_block;
- LLVMBasicBlockRef next_block;
- LLVMBasicBlockRef new_block;
-
- /* get current basic block */
- current_block = LLVMGetInsertBlock(ctx->ac.builder);
-
- /* chqeck if there's another block after this one */
- next_block = LLVMGetNextBasicBlock(current_block);
- if (next_block) {
- /* insert the new block before the next block */
- new_block = LLVMInsertBasicBlockInContext(ctx->context, next_block, name);
- }
- else {
- /* append new block after current block */
- LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
- new_block = LLVMAppendBasicBlockInContext(ctx->context, function, name);
- }
- return new_block;
-}
-
-static void
-ac_nir_build_if(struct ac_build_if_state *ifthen,
- struct radv_shader_context *ctx,
- LLVMValueRef condition)
-{
- LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->ac.builder);
-
- memset(ifthen, 0, sizeof *ifthen);
- ifthen->ctx = ctx;
- ifthen->condition = condition;
- ifthen->entry_block = block;
-
- /* create endif/merge basic block for the phi functions */
- ifthen->merge_block = ac_build_insert_new_block(ctx, "endif-block");
-
- /* create/insert true_block before merge_block */
- ifthen->true_block =
- LLVMInsertBasicBlockInContext(ctx->context,
- ifthen->merge_block,
- "if-true-block");
-
- /* successive code goes into the true block */
- LLVMPositionBuilderAtEnd(ctx->ac.builder, ifthen->true_block);
-}
-
-/**
- * End a conditional.
- */
-static void
-ac_nir_build_endif(struct ac_build_if_state *ifthen)
-{
- LLVMBuilderRef builder = ifthen->ctx->ac.builder;
-
- /* Insert branch to the merge block from current block */
- LLVMBuildBr(builder, ifthen->merge_block);
-
- /*
- * Now patch in the various branch instructions.
- */
-
- /* Insert the conditional branch instruction at the end of entry_block */
- LLVMPositionBuilderAtEnd(builder, ifthen->entry_block);
- if (ifthen->false_block) {
- /* we have an else clause */
- LLVMBuildCondBr(builder, ifthen->condition,
- ifthen->true_block, ifthen->false_block);
- }
- else {
- /* no else clause */
- LLVMBuildCondBr(builder, ifthen->condition,
- ifthen->true_block, ifthen->merge_block);
- }
-
- /* Resume building code at end of the ifthen->merge_block */
- LLVMPositionBuilderAtEnd(builder, ifthen->merge_block);
-}
-
static void
write_tess_factors(struct radv_shader_context *ctx)
{
if (inner_comps) {
tess_inner_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER);
- mark_tess_output(ctx, true, tess_inner_index);
lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_base,
LLVMConstInt(ctx->ac.i32, tess_inner_index * 4, false), "");
}
tess_outer_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER);
- mark_tess_output(ctx, true, tess_outer_index);
lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_base,
LLVMConstInt(ctx->ac.i32, tess_outer_index * 4, false), "");
outer[i] = LLVMGetUndef(ctx->ac.i32);
}
- // LINES reverseal
+ // LINES reversal
if (ctx->options->key.tcs.primitive_mode == GL_ISOLINES) {
outer[0] = out[1] = ac_lds_load(&ctx->ac, lds_outer);
lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_outer,
handle_es_outputs_post(ctx, &ctx->shader_info->vs.es_info);
else
handle_vs_outputs_post(ctx, ctx->options->key.vs.export_prim_id,
+ ctx->options->key.vs.export_layer_id,
&ctx->shader_info->vs.outinfo);
break;
case MESA_SHADER_FRAGMENT:
handle_es_outputs_post(ctx, &ctx->shader_info->tes.es_info);
else
handle_vs_outputs_post(ctx, ctx->options->key.tes.export_prim_id,
+ ctx->options->key.tes.export_layer_id,
&ctx->shader_info->tes.outinfo);
break;
default:
}
if (ctx->stage == MESA_SHADER_GEOMETRY) {
LLVMValueRef tmp;
+ uint32_t num_entries = 64;
+ LLVMValueRef gsvs_ring_stride = LLVMConstInt(ctx->ac.i32, ctx->max_gsvs_emit_size, false);
+ LLVMValueRef gsvs_ring_desc = LLVMConstInt(ctx->ac.i32, ctx->max_gsvs_emit_size << 16, false);
ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_ESGS_GS, false));
ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_GSVS_GS, false));
ctx->gsvs_ring = LLVMBuildBitCast(ctx->ac.builder, ctx->gsvs_ring, ctx->ac.v4i32, "");
- ctx->gsvs_ring = LLVMBuildInsertElement(ctx->ac.builder, ctx->gsvs_ring, ctx->gsvs_num_entries, LLVMConstInt(ctx->ac.i32, 2, false), "");
+ tmp = LLVMConstInt(ctx->ac.i32, num_entries, false);
+ if (ctx->options->chip_class >= VI)
+ tmp = LLVMBuildMul(ctx->ac.builder, gsvs_ring_stride, tmp, "");
+ ctx->gsvs_ring = LLVMBuildInsertElement(ctx->ac.builder, ctx->gsvs_ring, tmp, LLVMConstInt(ctx->ac.i32, 2, false), "");
tmp = LLVMBuildExtractElement(ctx->ac.builder, ctx->gsvs_ring, ctx->ac.i32_1, "");
- tmp = LLVMBuildOr(ctx->ac.builder, tmp, ctx->gsvs_ring_stride, "");
+ tmp = LLVMBuildOr(ctx->ac.builder, tmp, gsvs_ring_desc, "");
ctx->gsvs_ring = LLVMBuildInsertElement(ctx->ac.builder, ctx->gsvs_ring, tmp, ctx->ac.i32_1, "");
}
struct nir_shader *const *shaders,
int shader_count,
struct radv_shader_variant_info *shader_info,
- const struct ac_nir_compiler_options *options,
- bool dump_shader)
+ const struct radv_nir_compiler_options *options)
{
struct radv_shader_context ctx = {0};
unsigned i;
memset(shader_info, 0, sizeof(*shader_info));
for(int i = 0; i < shader_count; ++i)
- ac_nir_shader_info_pass(shaders[i], options, &shader_info->info);
+ radv_nir_shader_info_pass(shaders[i], options, &shader_info->info);
for (i = 0; i < RADV_UD_MAX_SETS; i++)
shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
ctx.abi.load_sampler_desc = radv_get_sampler_desc;
ctx.abi.load_resource = radv_load_resource;
ctx.abi.clamp_shadow_reference = false;
+ ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9;
if (shader_count >= 2)
ac_init_exec_full_mask(&ctx.ac);
for(int i = 0; i < shader_count; ++i) {
ctx.stage = shaders[i]->info.stage;
ctx.output_mask = 0;
- ctx.tess_outputs_written = 0;
ctx.num_output_clips = shaders[i]->info.clip_distance_array_size;
ctx.num_output_culls = shaders[i]->info.cull_distance_array_size;
ctx.abi.load_patch_vertices_in = load_patch_vertices_in;
ctx.abi.store_tcs_outputs = store_tcs_output;
ctx.tcs_vertices_per_patch = shaders[i]->info.tess.tcs_vertices_out;
+ if (shader_count == 1)
+ ctx.tcs_num_inputs = ctx.options->key.tcs.num_inputs;
+ else
+ ctx.tcs_num_inputs = util_last_bit64(shader_info->info.vs.ls_outputs_written);
+ ctx.tcs_num_patches = get_tcs_num_patches(&ctx);
} else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) {
ctx.tes_primitive_mode = shaders[i]->info.tess.primitive_mode;
ctx.abi.load_tess_varyings = load_tes_input;
ctx.abi.load_tess_coord = load_tess_coord;
ctx.abi.load_patch_vertices_in = load_patch_vertices_in;
ctx.tcs_vertices_per_patch = shaders[i]->info.tess.tcs_vertices_out;
+ ctx.tcs_num_patches = ctx.options->key.tes.num_patches;
} else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) {
if (shader_info->info.vs.needs_instance_id) {
if (ctx.options->key.vs.as_ls) {
if (i)
ac_emit_barrier(&ctx.ac, ctx.stage);
+ nir_foreach_variable(variable, &shaders[i]->outputs)
+ scan_shader_output_decl(&ctx, variable, shaders[i], shaders[i]->info.stage);
+
+ if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) {
+ unsigned addclip = shaders[i]->info.clip_distance_array_size +
+ shaders[i]->info.cull_distance_array_size > 4;
+ ctx.gsvs_vertex_size = (util_bitcount64(ctx.output_mask) + addclip) * 16;
+ ctx.max_gsvs_emit_size = ctx.gsvs_vertex_size *
+ shaders[i]->info.gs.vertices_out;
+ }
+
ac_setup_rings(&ctx);
LLVMBasicBlockRef merge_block;
else if(shader_count >= 2 && shaders[i]->info.stage == MESA_SHADER_GEOMETRY)
prepare_gs_input_vgprs(&ctx);
- nir_foreach_variable(variable, &shaders[i]->outputs)
- scan_shader_output_decl(&ctx, variable, shaders[i], shaders[i]->info.stage);
-
ac_nir_translate(&ctx.ac, &ctx.abi, shaders[i]);
if (shader_count >= 2) {
}
if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) {
- unsigned addclip = shaders[i]->info.clip_distance_array_size +
- shaders[i]->info.cull_distance_array_size > 4;
- shader_info->gs.gsvs_vertex_size = (util_bitcount64(ctx.output_mask) + addclip) * 16;
- shader_info->gs.max_gsvs_emit_size = shader_info->gs.gsvs_vertex_size *
- shaders[i]->info.gs.vertices_out;
+ shader_info->gs.gsvs_vertex_size = ctx.gsvs_vertex_size;
+ shader_info->gs.max_gsvs_emit_size = ctx.max_gsvs_emit_size;
} else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
- shader_info->tcs.outputs_written = ctx.tess_outputs_written;
- shader_info->tcs.patch_outputs_written = ctx.tess_patch_outputs_written;
- } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX && ctx.options->key.vs.as_ls) {
- shader_info->vs.outputs_written = ctx.tess_outputs_written;
+ shader_info->tcs.num_patches = ctx.tcs_num_patches;
+ shader_info->tcs.lds_size = calculate_tess_lds_size(&ctx);
}
}
if (shader_count == 1)
ac_nir_eliminate_const_vs_outputs(&ctx);
- if (dump_shader) {
+ if (options->dump_shader) {
ctx.shader_info->private_mem_vgprs =
ac_count_scratch_private_memory(ctx.main_function);
}
struct ac_shader_config *config,
struct radv_shader_variant_info *shader_info,
gl_shader_stage stage,
- bool dump_shader, bool supports_spill)
+ const struct radv_nir_compiler_options *options)
{
- if (dump_shader)
+ if (options->dump_shader)
ac_dump_module(llvm_module);
memset(binary, 0, sizeof(*binary));
+
+ if (options->record_llvm_ir) {
+ char *llvm_ir = LLVMPrintModuleToString(llvm_module);
+ binary->llvm_ir_string = strdup(llvm_ir);
+ LLVMDisposeMessage(llvm_ir);
+ }
+
int v = ac_llvm_compile(llvm_module, binary, tm);
if (v) {
fprintf(stderr, "compile failed\n");
}
- if (dump_shader)
+ if (options->dump_shader)
fprintf(stderr, "disasm:\n%s\n", binary->disasm_string);
- ac_shader_binary_read_config(binary, config, 0, supports_spill);
+ ac_shader_binary_read_config(binary, config, 0, options->supports_spill);
LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
LLVMDisposeModule(llvm_module);
}
static void
-ac_fill_shader_info(struct radv_shader_variant_info *shader_info, struct nir_shader *nir, const struct ac_nir_compiler_options *options)
+ac_fill_shader_info(struct radv_shader_variant_info *shader_info, struct nir_shader *nir, const struct radv_nir_compiler_options *options)
{
switch (nir->info.stage) {
case MESA_SHADER_COMPUTE:
struct radv_shader_variant_info *shader_info,
struct nir_shader *const *nir,
int nir_count,
- const struct ac_nir_compiler_options *options,
- bool dump_shader)
+ const struct radv_nir_compiler_options *options)
{
- LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, nir_count, shader_info,
- options, dump_shader);
+ LLVMModuleRef llvm_module;
+
+ llvm_module = ac_translate_nir_to_llvm(tm, nir, nir_count, shader_info,
+ options);
+
+ ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info,
+ nir[0]->info.stage, options);
- ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir[0]->info.stage, dump_shader, options->supports_spill);
for (int i = 0; i < nir_count; ++i)
ac_fill_shader_info(shader_info, nir[i], options);
0, 1, 1, true, false);
LLVMBuildStore(ctx->ac.builder,
- ac_to_float(&ctx->ac, value), ctx->abi.outputs[radeon_llvm_reg_index_soa(i, j)]);
+ ac_to_float(&ctx->ac, value), ctx->abi.outputs[ac_llvm_reg_index_soa(i, j)]);
}
idx += slot_inc;
}
- handle_vs_outputs_post(ctx, false, &ctx->shader_info->vs.outinfo);
+ handle_vs_outputs_post(ctx, false, false, &ctx->shader_info->vs.outinfo);
}
void
struct ac_shader_binary *binary,
struct ac_shader_config *config,
struct radv_shader_variant_info *shader_info,
- const struct ac_nir_compiler_options *options,
- bool dump_shader)
+ const struct radv_nir_compiler_options *options)
{
struct radv_shader_context ctx = {0};
ctx.context = LLVMContextCreate();
ctx.ac.builder = ac_create_builder(ctx.context, float_mode);
ctx.stage = MESA_SHADER_VERTEX;
+ radv_nir_shader_info_pass(geom_shader, options, &shader_info->info);
+
create_function(&ctx, MESA_SHADER_VERTEX, false, MESA_SHADER_VERTEX);
ctx.gs_max_out_vertices = geom_shader->info.gs.vertices_out;
ac_llvm_finalize_module(&ctx);
ac_compile_llvm_module(tm, ctx.ac.module, binary, config, shader_info,
- MESA_SHADER_VERTEX,
- dump_shader, options->supports_spill);
+ MESA_SHADER_VERTEX, options);
}