#include <llvm-c/Core.h>
#include <llvm-c/TargetMachine.h>
#include <llvm-c/Transforms/Scalar.h>
+#if HAVE_LLVM >= 0x0700
+#include <llvm-c/Transforms/Utils.h>
+#endif
#include "sid.h"
#include "gfx9d.h"
LLVMValueRef vs_prim_id;
LLVMValueRef es2gs_offset;
- LLVMValueRef tcs_offchip_layout;
- LLVMValueRef tcs_out_offsets;
- LLVMValueRef tcs_out_layout;
LLVMValueRef oc_lds;
LLVMValueRef merged_wave_info;
LLVMValueRef tess_factor_offset;
LLVMValueRef tes_u;
LLVMValueRef tes_v;
- LLVMValueRef gsvs_ring_stride;
- LLVMValueRef gsvs_num_entries;
LLVMValueRef gs2vs_offset;
LLVMValueRef gs_wave_id;
LLVMValueRef gs_vtx_offset[6];
unsigned gs_max_out_vertices;
unsigned tes_primitive_mode;
- uint64_t tess_outputs_written;
- uint64_t tess_patch_outputs_written;
uint32_t tcs_patch_outputs_read;
uint64_t tcs_outputs_read;
uint32_t tcs_vertices_per_patch;
uint32_t tcs_num_inputs;
+ uint32_t tcs_num_patches;
+ uint32_t max_gsvs_emit_size;
+ uint32_t gsvs_vertex_size;
};
enum radeon_llvm_calling_convention {
return container_of(abi, ctx, abi);
}
+struct ac_build_if_state
+{
+ struct radv_shader_context *ctx;
+ LLVMValueRef condition;
+ LLVMBasicBlockRef entry_block;
+ LLVMBasicBlockRef true_block;
+ LLVMBasicBlockRef false_block;
+ LLVMBasicBlockRef merge_block;
+};
+
+static LLVMBasicBlockRef
+ac_build_insert_new_block(struct radv_shader_context *ctx, const char *name)
+{
+ LLVMBasicBlockRef current_block;
+ LLVMBasicBlockRef next_block;
+ LLVMBasicBlockRef new_block;
+
+ /* get current basic block */
+ current_block = LLVMGetInsertBlock(ctx->ac.builder);
+
+ /* chqeck if there's another block after this one */
+ next_block = LLVMGetNextBasicBlock(current_block);
+ if (next_block) {
+ /* insert the new block before the next block */
+ new_block = LLVMInsertBasicBlockInContext(ctx->context, next_block, name);
+ }
+ else {
+ /* append new block after current block */
+ LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
+ new_block = LLVMAppendBasicBlockInContext(ctx->context, function, name);
+ }
+ return new_block;
+}
+
+static void
+ac_nir_build_if(struct ac_build_if_state *ifthen,
+ struct radv_shader_context *ctx,
+ LLVMValueRef condition)
+{
+ LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->ac.builder);
+
+ memset(ifthen, 0, sizeof *ifthen);
+ ifthen->ctx = ctx;
+ ifthen->condition = condition;
+ ifthen->entry_block = block;
+
+ /* create endif/merge basic block for the phi functions */
+ ifthen->merge_block = ac_build_insert_new_block(ctx, "endif-block");
+
+ /* create/insert true_block before merge_block */
+ ifthen->true_block =
+ LLVMInsertBasicBlockInContext(ctx->context,
+ ifthen->merge_block,
+ "if-true-block");
+
+ /* successive code goes into the true block */
+ LLVMPositionBuilderAtEnd(ctx->ac.builder, ifthen->true_block);
+}
+
+/**
+ * End a conditional.
+ */
+static void
+ac_nir_build_endif(struct ac_build_if_state *ifthen)
+{
+ LLVMBuilderRef builder = ifthen->ctx->ac.builder;
+
+ /* Insert branch to the merge block from current block */
+ LLVMBuildBr(builder, ifthen->merge_block);
+
+ /*
+ * Now patch in the various branch instructions.
+ */
+
+ /* Insert the conditional branch instruction at the end of entry_block */
+ LLVMPositionBuilderAtEnd(builder, ifthen->entry_block);
+ if (ifthen->false_block) {
+ /* we have an else clause */
+ LLVMBuildCondBr(builder, ifthen->condition,
+ ifthen->true_block, ifthen->false_block);
+ }
+ else {
+ /* no else clause */
+ LLVMBuildCondBr(builder, ifthen->condition,
+ ifthen->true_block, ifthen->merge_block);
+ }
+
+ /* Resume building code at end of the ifthen->merge_block */
+ LLVMPositionBuilderAtEnd(builder, ifthen->merge_block);
+}
+
+
static LLVMValueRef get_rel_patch_id(struct radv_shader_context *ctx)
{
switch (ctx->stage) {
}
}
+static unsigned
+get_tcs_num_patches(struct radv_shader_context *ctx)
+{
+ unsigned num_tcs_input_cp = ctx->options->key.tcs.input_vertices;
+ unsigned num_tcs_output_cp = ctx->tcs_vertices_per_patch;
+ uint32_t input_vertex_size = ctx->tcs_num_inputs * 16;
+ uint32_t input_patch_size = ctx->options->key.tcs.input_vertices * input_vertex_size;
+ uint32_t num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written);
+ uint32_t num_tcs_patch_outputs = util_last_bit64(ctx->shader_info->info.tcs.patch_outputs_written);
+ uint32_t output_vertex_size = num_tcs_outputs * 16;
+ uint32_t pervertex_output_patch_size = ctx->tcs_vertices_per_patch * output_vertex_size;
+ uint32_t output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
+ unsigned num_patches;
+ unsigned hardware_lds_size;
+
+ /* Ensure that we only need one wave per SIMD so we don't need to check
+ * resource usage. Also ensures that the number of tcs in and out
+ * vertices per threadgroup are at most 256.
+ */
+ num_patches = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp) * 4;
+ /* Make sure that the data fits in LDS. This assumes the shaders only
+ * use LDS for the inputs and outputs.
+ */
+ hardware_lds_size = ctx->options->chip_class >= CIK ? 65536 : 32768;
+ num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size));
+ /* Make sure the output data fits in the offchip buffer */
+ num_patches = MIN2(num_patches, (ctx->options->tess_offchip_block_dw_size * 4) / output_patch_size);
+ /* Not necessary for correctness, but improves performance. The
+ * specific value is taken from the proprietary driver.
+ */
+ num_patches = MIN2(num_patches, 40);
+
+ /* SI bug workaround - limit LS-HS threadgroups to only one wave. */
+ if (ctx->options->chip_class == SI) {
+ unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp);
+ num_patches = MIN2(num_patches, one_wave);
+ }
+ return num_patches;
+}
+
+static unsigned
+calculate_tess_lds_size(struct radv_shader_context *ctx)
+{
+ unsigned num_tcs_input_cp = ctx->options->key.tcs.input_vertices;
+ unsigned num_tcs_output_cp;
+ unsigned num_tcs_outputs, num_tcs_patch_outputs;
+ unsigned input_vertex_size, output_vertex_size;
+ unsigned input_patch_size, output_patch_size;
+ unsigned pervertex_output_patch_size;
+ unsigned output_patch0_offset;
+ unsigned num_patches;
+ unsigned lds_size;
+
+ num_tcs_output_cp = ctx->tcs_vertices_per_patch;
+ num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written);
+ num_tcs_patch_outputs = util_last_bit64(ctx->shader_info->info.tcs.patch_outputs_written);
+
+ input_vertex_size = ctx->tcs_num_inputs * 16;
+ output_vertex_size = num_tcs_outputs * 16;
+
+ input_patch_size = num_tcs_input_cp * input_vertex_size;
+
+ pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size;
+ output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
+
+ num_patches = ctx->tcs_num_patches;
+ output_patch0_offset = input_patch_size * num_patches;
+
+ lds_size = output_patch0_offset + output_patch_size * num_patches;
+ return lds_size;
+}
+
/* Tessellation shaders pass outputs to the next shader using LDS.
*
* LS outputs = TCS inputs
static LLVMValueRef
get_tcs_out_patch_stride(struct radv_shader_context *ctx)
{
- return ac_unpack_param(&ctx->ac, ctx->tcs_out_layout, 0, 13);
+ uint32_t num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written);
+ uint32_t num_tcs_patch_outputs = util_last_bit64(ctx->shader_info->info.tcs.patch_outputs_written);
+ uint32_t output_vertex_size = num_tcs_outputs * 16;
+ uint32_t pervertex_output_patch_size = ctx->tcs_vertices_per_patch * output_vertex_size;
+ uint32_t output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
+ output_patch_size /= 4;
+ return LLVMConstInt(ctx->ac.i32, output_patch_size, false);
}
static LLVMValueRef
get_tcs_out_vertex_stride(struct radv_shader_context *ctx)
{
- return ac_unpack_param(&ctx->ac, ctx->tcs_out_layout, 13, 8);
+ uint32_t num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written);
+ uint32_t output_vertex_size = num_tcs_outputs * 16;
+ output_vertex_size /= 4;
+ return LLVMConstInt(ctx->ac.i32, output_vertex_size, false);
}
static LLVMValueRef
get_tcs_out_patch0_offset(struct radv_shader_context *ctx)
{
- return LLVMBuildMul(ctx->ac.builder,
- ac_unpack_param(&ctx->ac, ctx->tcs_out_offsets, 0, 16),
- LLVMConstInt(ctx->ac.i32, 4, false), "");
+ assert (ctx->stage == MESA_SHADER_TESS_CTRL);
+ uint32_t input_vertex_size = ctx->tcs_num_inputs * 16;
+ uint32_t input_patch_size = ctx->options->key.tcs.input_vertices * input_vertex_size;
+ uint32_t output_patch0_offset = input_patch_size;
+ unsigned num_patches = ctx->tcs_num_patches;
+
+ output_patch0_offset *= num_patches;
+ output_patch0_offset /= 4;
+ return LLVMConstInt(ctx->ac.i32, output_patch0_offset, false);
}
static LLVMValueRef
get_tcs_out_patch0_patch_data_offset(struct radv_shader_context *ctx)
{
- return LLVMBuildMul(ctx->ac.builder,
- ac_unpack_param(&ctx->ac, ctx->tcs_out_offsets, 16, 16),
- LLVMConstInt(ctx->ac.i32, 4, false), "");
+ assert (ctx->stage == MESA_SHADER_TESS_CTRL);
+ uint32_t input_vertex_size = ctx->tcs_num_inputs * 16;
+ uint32_t input_patch_size = ctx->options->key.tcs.input_vertices * input_vertex_size;
+ uint32_t output_patch0_offset = input_patch_size;
+
+ uint32_t num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written);
+ uint32_t output_vertex_size = num_tcs_outputs * 16;
+ uint32_t pervertex_output_patch_size = ctx->tcs_vertices_per_patch * output_vertex_size;
+ unsigned num_patches = ctx->tcs_num_patches;
+
+ output_patch0_offset *= num_patches;
+ output_patch0_offset += pervertex_output_patch_size;
+ output_patch0_offset /= 4;
+ return LLVMConstInt(ctx->ac.i32, output_patch0_offset, false);
}
static LLVMValueRef
unsigned num_return_elems,
struct arg_info *args,
unsigned max_workgroup_size,
- bool unsafe_math)
+ const struct radv_nir_compiler_options *options)
{
LLVMTypeRef main_function_type, ret_type;
LLVMBasicBlockRef main_function_body;
}
}
+ if (options->address32_hi) {
+ ac_llvm_add_target_dep_function_attr(main_function,
+ "amdgpu-32bit-address-high-bits",
+ options->address32_hi);
+ }
+
if (max_workgroup_size) {
ac_llvm_add_target_dep_function_attr(main_function,
"amdgpu-max-work-group-size",
max_workgroup_size);
}
- if (unsafe_math) {
+ if (options->unsafe_math) {
/* These were copied from some LLVM test. */
LLVMAddTargetDependentFunctionAttr(main_function,
"less-precise-fpmad",
set_loc(ud_info, sgpr_idx, num_sgprs, 0);
}
+static void
+set_loc_shader_ptr(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx)
+{
+ bool use_32bit_pointers = HAVE_32BIT_POINTERS &&
+ idx != AC_UD_SCRATCH_RING_OFFSETS;
+
+ set_loc_shader(ctx, idx, sgpr_idx, use_32bit_pointers ? 1 : 2);
+}
+
static void
set_loc_desc(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx,
uint32_t indirect_offset)
&ctx->shader_info->user_sgprs_locs.descriptor_sets[idx];
assert(ud_info);
- set_loc(ud_info, sgpr_idx, 2, indirect_offset);
+ set_loc(ud_info, sgpr_idx, HAVE_32BIT_POINTERS ? 1 : 2, indirect_offset);
}
struct user_sgpr_info {
{
uint8_t count = 0;
- count += ctx->shader_info->info.vs.has_vertex_buffers ? 2 : 0;
+ if (ctx->shader_info->info.vs.has_vertex_buffers)
+ count += HAVE_32BIT_POINTERS ? 1 : 2;
count += ctx->shader_info->info.vs.needs_draw_id ? 3 : 2;
return count;
if (previous_stage == MESA_SHADER_VERTEX)
user_sgpr_info->sgpr_count += count_vs_user_sgprs(ctx);
}
- user_sgpr_info->sgpr_count += 3;
break;
case MESA_SHADER_TESS_EVAL:
- user_sgpr_info->sgpr_count += 1;
break;
case MESA_SHADER_GEOMETRY:
if (has_previous_stage) {
if (previous_stage == MESA_SHADER_VERTEX) {
user_sgpr_info->sgpr_count += count_vs_user_sgprs(ctx);
- } else {
- user_sgpr_info->sgpr_count++;
}
}
- user_sgpr_info->sgpr_count += 2;
break;
default:
break;
user_sgpr_info->sgpr_count++;
if (ctx->shader_info->info.loads_push_constants)
- user_sgpr_info->sgpr_count += 2;
+ user_sgpr_info->sgpr_count += HAVE_32BIT_POINTERS ? 1 : 2;
uint32_t available_sgprs = ctx->options->chip_class >= GFX9 ? 32 : 16;
uint32_t remaining_sgprs = available_sgprs - user_sgpr_info->sgpr_count;
if (remaining_sgprs / 2 < util_bitcount(ctx->shader_info->info.desc_set_used_mask)) {
- user_sgpr_info->sgpr_count += 2;
+ user_sgpr_info->sgpr_count += HAVE_32BIT_POINTERS ? 1 : 2;
user_sgpr_info->indirect_all_descriptor_sets = true;
} else {
user_sgpr_info->sgpr_count += util_bitcount(ctx->shader_info->info.desc_set_used_mask) * 2;
struct arg_info *args,
LLVMValueRef *desc_sets)
{
- LLVMTypeRef type = ac_array_in_const_addr_space(ctx->ac.i8);
+ LLVMTypeRef type = ac_array_in_const32_addr_space(ctx->ac.i8);
unsigned num_sets = ctx->options->layout ?
ctx->options->layout->num_sets : 0;
unsigned stage_mask = 1 << stage;
}
}
} else {
- add_array_arg(args, ac_array_in_const_addr_space(type), desc_sets);
+ add_array_arg(args, ac_array_in_const32_addr_space(type), desc_sets);
}
if (ctx->shader_info->info.loads_push_constants) {
(stage == MESA_SHADER_VERTEX ||
(has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
if (ctx->shader_info->info.vs.has_vertex_buffers) {
- add_arg(args, ARG_SGPR, ac_array_in_const_addr_space(ctx->ac.v4i32),
+ add_arg(args, ARG_SGPR,
+ ac_array_in_const32_addr_space(ctx->ac.v4i32),
&ctx->vertex_buffers);
}
add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.base_vertex);
ctx->descriptor_sets[i] = NULL;
}
} else {
- set_loc_shader(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS,
- user_sgpr_idx, 2);
+ set_loc_shader_ptr(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS,
+ user_sgpr_idx);
for (unsigned i = 0; i < num_sets; ++i) {
if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) &&
}
if (ctx->shader_info->info.loads_push_constants) {
- set_loc_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2);
+ set_loc_shader_ptr(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx);
}
}
(stage == MESA_SHADER_VERTEX ||
(has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
if (ctx->shader_info->info.vs.has_vertex_buffers) {
- set_loc_shader(ctx, AC_UD_VS_VERTEX_BUFFERS,
- user_sgpr_idx, 2);
+ set_loc_shader_ptr(ctx, AC_UD_VS_VERTEX_BUFFERS,
+ user_sgpr_idx);
}
unsigned vs_num = 2;
calling_conv = RADEON_LLVM_AMDGPU_GS;
break;
case MESA_SHADER_TESS_CTRL:
- calling_conv = HAVE_LLVM >= 0x0500 ? RADEON_LLVM_AMDGPU_HS : RADEON_LLVM_AMDGPU_VS;
+ calling_conv = RADEON_LLVM_AMDGPU_HS;
break;
case MESA_SHADER_FRAGMENT:
calling_conv = RADEON_LLVM_AMDGPU_PS;
has_previous_stage,
previous_stage, &args);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->tcs_offchip_layout);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->tcs_out_offsets);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->tcs_out_layout);
if (needs_view_index)
add_arg(&args, ARG_SGPR, ctx->ac.i32,
&ctx->abi.view_index);
&user_sgpr_info, &args,
&desc_sets);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->tcs_offchip_layout);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->tcs_out_offsets);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->tcs_out_layout);
if (needs_view_index)
add_arg(&args, ARG_SGPR, ctx->ac.i32,
&ctx->abi.view_index);
previous_stage, &user_sgpr_info,
&args, &desc_sets);
- add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->tcs_offchip_layout);
if (needs_view_index)
add_arg(&args, ARG_SGPR, ctx->ac.i32,
&ctx->abi.view_index);
&user_sgpr_info, &args,
&desc_sets);
- if (previous_stage == MESA_SHADER_TESS_EVAL) {
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->tcs_offchip_layout);
- } else {
+ if (previous_stage != MESA_SHADER_TESS_EVAL) {
declare_vs_specific_input_sgprs(ctx, stage,
has_previous_stage,
previous_stage,
&args);
}
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->gsvs_ring_stride);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->gsvs_num_entries);
if (needs_view_index)
add_arg(&args, ARG_SGPR, ctx->ac.i32,
&ctx->abi.view_index);
&user_sgpr_info, &args,
&desc_sets);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->gsvs_ring_stride);
- add_arg(&args, ARG_SGPR, ctx->ac.i32,
- &ctx->gsvs_num_entries);
if (needs_view_index)
add_arg(&args, ARG_SGPR, ctx->ac.i32,
&ctx->abi.view_index);
ctx->main_function = create_llvm_function(
ctx->context, ctx->ac.module, ctx->ac.builder, NULL, 0, &args,
- ctx->max_workgroup_size,
- ctx->options->unsafe_math);
+ ctx->max_workgroup_size, ctx->options);
set_llvm_calling_convention(ctx->main_function, stage);
user_sgpr_idx = 0;
if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets) {
- set_loc_shader(ctx, AC_UD_SCRATCH_RING_OFFSETS,
- &user_sgpr_idx, 2);
+ set_loc_shader_ptr(ctx, AC_UD_SCRATCH_RING_OFFSETS,
+ &user_sgpr_idx);
if (ctx->options->supports_spill) {
ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr",
LLVMPointerType(ctx->ac.i8, AC_CONST_ADDR_SPACE),
case MESA_SHADER_TESS_CTRL:
set_vs_specific_input_locs(ctx, stage, has_previous_stage,
previous_stage, &user_sgpr_idx);
- set_loc_shader(ctx, AC_UD_TCS_OFFCHIP_LAYOUT, &user_sgpr_idx, 3);
if (ctx->abi.view_index)
set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
break;
case MESA_SHADER_TESS_EVAL:
- set_loc_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, &user_sgpr_idx, 1);
if (ctx->abi.view_index)
set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
break;
has_previous_stage,
previous_stage,
&user_sgpr_idx);
- else
- set_loc_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT,
- &user_sgpr_idx, 1);
}
- set_loc_shader(ctx, AC_UD_GS_VS_RING_STRIDE_ENTRIES,
- &user_sgpr_idx, 2);
if (ctx->abi.view_index)
set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
break;
*
* Note that every attribute has 4 components.
*/
+static LLVMValueRef get_non_vertex_index_offset(struct radv_shader_context *ctx)
+{
+ uint32_t num_patches = ctx->tcs_num_patches;
+ uint32_t num_tcs_outputs;
+ if (ctx->stage == MESA_SHADER_TESS_CTRL)
+ num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written);
+ else
+ num_tcs_outputs = ctx->options->key.tes.tcs_num_outputs;
+
+ uint32_t output_vertex_size = num_tcs_outputs * 16;
+ uint32_t pervertex_output_patch_size = ctx->tcs_vertices_per_patch * output_vertex_size;
+
+ return LLVMConstInt(ctx->ac.i32, pervertex_output_patch_size * num_patches, false);
+}
+
+static LLVMValueRef calc_param_stride(struct radv_shader_context *ctx,
+ LLVMValueRef vertex_index)
+{
+ LLVMValueRef param_stride;
+ if (vertex_index)
+ param_stride = LLVMConstInt(ctx->ac.i32, ctx->tcs_vertices_per_patch * ctx->tcs_num_patches, false);
+ else
+ param_stride = LLVMConstInt(ctx->ac.i32, ctx->tcs_num_patches, false);
+ return param_stride;
+}
+
static LLVMValueRef get_tcs_tes_buffer_address(struct radv_shader_context *ctx,
LLVMValueRef vertex_index,
LLVMValueRef param_index)
{
- LLVMValueRef base_addr, vertices_per_patch, num_patches;
+ LLVMValueRef base_addr;
LLVMValueRef param_stride, constant16;
LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
-
- vertices_per_patch = LLVMConstInt(ctx->ac.i32, ctx->tcs_vertices_per_patch, false);
- num_patches = ac_unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 0, 9);
-
+ LLVMValueRef vertices_per_patch = LLVMConstInt(ctx->ac.i32, ctx->tcs_vertices_per_patch, false);
constant16 = LLVMConstInt(ctx->ac.i32, 16, false);
+ param_stride = calc_param_stride(ctx, vertex_index);
if (vertex_index) {
base_addr = LLVMBuildMul(ctx->ac.builder, rel_patch_id,
vertices_per_patch, "");
base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
vertex_index, "");
-
- param_stride = LLVMBuildMul(ctx->ac.builder, vertices_per_patch,
- num_patches, "");
} else {
base_addr = rel_patch_id;
- param_stride = num_patches;
}
base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
base_addr = LLVMBuildMul(ctx->ac.builder, base_addr, constant16, "");
if (!vertex_index) {
- LLVMValueRef patch_data_offset =
- ac_unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 16, 16);
+ LLVMValueRef patch_data_offset = get_non_vertex_index_offset(ctx);
base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
patch_data_offset, "");
return get_tcs_tes_buffer_address(ctx, vertex_index, param_index);
}
-static void
-mark_tess_output(struct radv_shader_context *ctx,
- bool is_patch, uint32_t param, int num_slots)
-
-{
- uint64_t slot_mask = (1ull << num_slots) - 1;
- if (is_patch) {
- ctx->tess_patch_outputs_written |= (slot_mask << param);
- } else
- ctx->tess_outputs_written |= (slot_mask << param);
-}
-
static LLVMValueRef
get_dw_address(struct radv_shader_context *ctx,
LLVMValueRef dw_addr,
const unsigned component = var->data.location_frac;
const bool is_patch = var->data.patch;
const bool is_compact = var->data.compact;
- const unsigned count = glsl_count_attribute_slots(var->type, false);
LLVMValueRef dw_addr;
LLVMValueRef stride = NULL;
LLVMValueRef buf_addr = NULL;
dw_addr = get_tcs_out_current_patch_data_offset(ctx);
}
- if (param_index)
- mark_tess_output(ctx, is_patch, param, count);
- else
- mark_tess_output(ctx, is_patch, param, 1);
-
dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
param_index);
buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, is_compact,
/* loop num outputs */
idx = 0;
for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+ unsigned output_usage_mask =
+ ctx->shader_info->info.gs.output_usage_mask[i];
LLVMValueRef *out_ptr = &addrs[i * 4];
int length = 4;
int slot = idx;
length = ctx->num_output_clips + ctx->num_output_culls;
if (length > 4)
slot_inc = 2;
+ output_usage_mask = (1 << length) - 1;
}
+
for (unsigned j = 0; j < length; j++) {
+ if (!(output_usage_mask & (1 << j)))
+ continue;
+
LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder,
out_ptr[j], "");
LLVMValueRef voffset = LLVMConstInt(ctx->ac.i32, (slot * 4 + j) * ctx->gs_max_out_vertices, false);
unsigned constant_index,
LLVMValueRef index,
enum ac_descriptor_type desc_type,
- bool image, bool write)
+ bool image, bool write,
+ bool bindless)
{
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
LLVMValueRef list = ctx->descriptor_sets[descriptor_set];
index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), "");
list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->ac.i32, offset, 0));
- list = LLVMBuildPointerCast(builder, list, ac_array_in_const_addr_space(type), "");
+ list = LLVMBuildPointerCast(builder, list,
+ ac_array_in_const32_addr_space(type), "");
return ac_build_load_to_sgpr(&ctx->ac, list, index);
}
+/* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
+ * so we may need to fix it up. */
+static LLVMValueRef
+adjust_vertex_fetch_alpha(struct radv_shader_context *ctx,
+ unsigned adjustment,
+ LLVMValueRef alpha)
+{
+ if (adjustment == RADV_ALPHA_ADJUST_NONE)
+ return alpha;
+
+ LLVMValueRef c30 = LLVMConstInt(ctx->ac.i32, 30, 0);
+
+ if (adjustment == RADV_ALPHA_ADJUST_SSCALED)
+ alpha = LLVMBuildFPToUI(ctx->ac.builder, alpha, ctx->ac.i32, "");
+ else
+ alpha = ac_to_integer(&ctx->ac, alpha);
+
+ /* For the integer-like cases, do a natural sign extension.
+ *
+ * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0
+ * and happen to contain 0, 1, 2, 3 as the two LSBs of the
+ * exponent.
+ */
+ alpha = LLVMBuildShl(ctx->ac.builder, alpha,
+ adjustment == RADV_ALPHA_ADJUST_SNORM ?
+ LLVMConstInt(ctx->ac.i32, 7, 0) : c30, "");
+ alpha = LLVMBuildAShr(ctx->ac.builder, alpha, c30, "");
+
+ /* Convert back to the right type. */
+ if (adjustment == RADV_ALPHA_ADJUST_SNORM) {
+ LLVMValueRef clamp;
+ LLVMValueRef neg_one = LLVMConstReal(ctx->ac.f32, -1.0);
+ alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, "");
+ clamp = LLVMBuildFCmp(ctx->ac.builder, LLVMRealULT, alpha, neg_one, "");
+ alpha = LLVMBuildSelect(ctx->ac.builder, clamp, neg_one, alpha, "");
+ } else if (adjustment == RADV_ALPHA_ADJUST_SSCALED) {
+ alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, "");
+ }
+
+ return alpha;
+}
static void
handle_vs_input_decl(struct radv_shader_context *ctx,
LLVMValueRef t_list;
LLVMValueRef input;
LLVMValueRef buffer_index;
- int index = variable->data.location - VERT_ATTRIB_GENERIC0;
- int idx = variable->data.location;
unsigned attrib_count = glsl_count_attribute_slots(variable->type, true);
uint8_t input_usage_mask =
ctx->shader_info->info.vs.input_usage_mask[variable->data.location];
unsigned num_channels = util_last_bit(input_usage_mask);
- variable->data.driver_location = idx * 4;
+ variable->data.driver_location = variable->data.location * 4;
+
+ for (unsigned i = 0; i < attrib_count; ++i) {
+ LLVMValueRef output[4];
+ unsigned attrib_index = variable->data.location + i - VERT_ATTRIB_GENERIC0;
- for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
- if (ctx->options->key.vs.instance_rate_inputs & (1u << (index + i))) {
- buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.instance_id,
- ctx->abi.start_instance, "");
- if (ctx->options->key.vs.as_ls) {
- ctx->shader_info->vs.vgpr_comp_cnt =
- MAX2(2, ctx->shader_info->vs.vgpr_comp_cnt);
+ if (ctx->options->key.vs.instance_rate_inputs & (1u << attrib_index)) {
+ uint32_t divisor = ctx->options->key.vs.instance_rate_divisors[attrib_index];
+
+ if (divisor) {
+ buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.instance_id,
+ ctx->abi.start_instance, "");
+
+ if (divisor != 1) {
+ buffer_index = LLVMBuildUDiv(ctx->ac.builder, buffer_index,
+ LLVMConstInt(ctx->ac.i32, divisor, 0), "");
+ }
+
+ if (ctx->options->key.vs.as_ls) {
+ ctx->shader_info->vs.vgpr_comp_cnt =
+ MAX2(2, ctx->shader_info->vs.vgpr_comp_cnt);
+ } else {
+ ctx->shader_info->vs.vgpr_comp_cnt =
+ MAX2(1, ctx->shader_info->vs.vgpr_comp_cnt);
+ }
} else {
- ctx->shader_info->vs.vgpr_comp_cnt =
- MAX2(1, ctx->shader_info->vs.vgpr_comp_cnt);
+ buffer_index = ctx->ac.i32_0;
}
} else
buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id,
ctx->abi.base_vertex, "");
- t_offset = LLVMConstInt(ctx->ac.i32, index + i, false);
+ t_offset = LLVMConstInt(ctx->ac.i32, attrib_index, false);
t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
for (unsigned chan = 0; chan < 4; chan++) {
LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
- ctx->inputs[ac_llvm_reg_index_soa(idx, chan)] =
- ac_to_integer(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder,
- input, llvm_chan, ""));
+ output[chan] = LLVMBuildExtractElement(ctx->ac.builder, input, llvm_chan, "");
+ }
+
+ unsigned alpha_adjust = (ctx->options->key.vs.alpha_adjust >> (attrib_index * 2)) & 3;
+ output[3] = adjust_vertex_fetch_alpha(ctx, alpha_adjust, output[3]);
+
+ for (unsigned chan = 0; chan < 4; chan++) {
+ ctx->inputs[ac_llvm_reg_index_soa(variable->data.location + i, chan)] =
+ ac_to_integer(&ctx->ac, output[chan]);
}
}
}
prepare_interp_optimize(struct radv_shader_context *ctx,
struct nir_shader *nir)
{
- if (!ctx->options->key.fs.multisample)
- return;
-
bool uses_center = false;
bool uses_centroid = false;
nir_foreach_variable(variable, &nir->inputs) {
static void
handle_vs_outputs_post(struct radv_shader_context *ctx,
- bool export_prim_id,
+ bool export_prim_id, bool export_layer_id,
struct radv_vs_output_info *outinfo)
{
uint32_t param_count = 0;
output_usage_mask =
ctx->shader_info->info.tes.output_usage_mask[i];
} else {
- /* Enable all channels for the GS copy shader because
- * we don't know the output usage mask currently.
- */
- output_usage_mask = 0xf;
+ assert(ctx->is_gs_copy_shader);
+ output_usage_mask =
+ ctx->shader_info->info.gs.output_usage_mask[i];
}
radv_export_param(ctx, param_count, values, output_usage_mask);
for (unsigned j = 1; j < 4; j++)
values[j] = ctx->ac.f32_0;
- radv_export_param(ctx, param_count, values, 0xf);
+ radv_export_param(ctx, param_count, values, 0x1);
outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count++;
outinfo->export_prim_id = true;
}
+ if (export_layer_id && layer_value) {
+ LLVMValueRef values[4];
+
+ values[0] = layer_value;
+ for (unsigned j = 1; j < 4; j++)
+ values[j] = ctx->ac.f32_0;
+
+ radv_export_param(ctx, param_count, values, 0x1);
+
+ outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] = param_count++;
+ }
+
outinfo->pos_exports = num_pos_exports;
outinfo->param_exports = param_count;
}
for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
LLVMValueRef dw_addr = NULL;
LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4];
+ unsigned output_usage_mask;
int param_index;
int length = 4;
if (!(ctx->output_mask & (1ull << i)))
continue;
- if (i == VARYING_SLOT_CLIP_DIST0)
+ if (ctx->stage == MESA_SHADER_VERTEX) {
+ output_usage_mask =
+ ctx->shader_info->info.vs.output_usage_mask[i];
+ } else {
+ assert(ctx->stage == MESA_SHADER_TESS_EVAL);
+ output_usage_mask =
+ ctx->shader_info->info.tes.output_usage_mask[i];
+ }
+
+ if (i == VARYING_SLOT_CLIP_DIST0) {
length = ctx->num_output_clips + ctx->num_output_culls;
+ output_usage_mask = (1 << length) - 1;
+ }
param_index = shader_io_get_unique_index(i);
LLVMConstInt(ctx->ac.i32, param_index * 4, false),
"");
}
+
for (j = 0; j < length; j++) {
+ if (!(output_usage_mask & (1 << j)))
+ continue;
+
LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, out_ptr[j], "");
out_val = LLVMBuildBitCast(ctx->ac.builder, out_val, ctx->ac.i32, "");
if (ctx->ac.chip_class >= GFX9) {
- ac_lds_store(&ctx->ac, dw_addr,
+ LLVMValueRef dw_addr_offset =
+ LLVMBuildAdd(ctx->ac.builder, dw_addr,
+ LLVMConstInt(ctx->ac.i32,
+ j, false), "");
+
+ ac_lds_store(&ctx->ac, dw_addr_offset,
LLVMBuildLoad(ctx->ac.builder, out_ptr[j], ""));
- dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, ctx->ac.i32_1, "");
} else {
ac_build_buffer_store_dword(&ctx->ac,
ctx->esgs_ring,
if (i == VARYING_SLOT_CLIP_DIST0)
length = ctx->num_output_clips + ctx->num_output_culls;
int param = shader_io_get_unique_index(i);
- mark_tess_output(ctx, false, param, 1);
- if (length > 4)
- mark_tess_output(ctx, false, param + 1, 1);
LLVMValueRef dw_addr = LLVMBuildAdd(ctx->ac.builder, base_dw_addr,
LLVMConstInt(ctx->ac.i32, param * 4, false),
"");
}
}
-struct ac_build_if_state
-{
- struct radv_shader_context *ctx;
- LLVMValueRef condition;
- LLVMBasicBlockRef entry_block;
- LLVMBasicBlockRef true_block;
- LLVMBasicBlockRef false_block;
- LLVMBasicBlockRef merge_block;
-};
-
-static LLVMBasicBlockRef
-ac_build_insert_new_block(struct radv_shader_context *ctx, const char *name)
-{
- LLVMBasicBlockRef current_block;
- LLVMBasicBlockRef next_block;
- LLVMBasicBlockRef new_block;
-
- /* get current basic block */
- current_block = LLVMGetInsertBlock(ctx->ac.builder);
-
- /* chqeck if there's another block after this one */
- next_block = LLVMGetNextBasicBlock(current_block);
- if (next_block) {
- /* insert the new block before the next block */
- new_block = LLVMInsertBasicBlockInContext(ctx->context, next_block, name);
- }
- else {
- /* append new block after current block */
- LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
- new_block = LLVMAppendBasicBlockInContext(ctx->context, function, name);
- }
- return new_block;
-}
-
-static void
-ac_nir_build_if(struct ac_build_if_state *ifthen,
- struct radv_shader_context *ctx,
- LLVMValueRef condition)
-{
- LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->ac.builder);
-
- memset(ifthen, 0, sizeof *ifthen);
- ifthen->ctx = ctx;
- ifthen->condition = condition;
- ifthen->entry_block = block;
-
- /* create endif/merge basic block for the phi functions */
- ifthen->merge_block = ac_build_insert_new_block(ctx, "endif-block");
-
- /* create/insert true_block before merge_block */
- ifthen->true_block =
- LLVMInsertBasicBlockInContext(ctx->context,
- ifthen->merge_block,
- "if-true-block");
-
- /* successive code goes into the true block */
- LLVMPositionBuilderAtEnd(ctx->ac.builder, ifthen->true_block);
-}
-
-/**
- * End a conditional.
- */
-static void
-ac_nir_build_endif(struct ac_build_if_state *ifthen)
-{
- LLVMBuilderRef builder = ifthen->ctx->ac.builder;
-
- /* Insert branch to the merge block from current block */
- LLVMBuildBr(builder, ifthen->merge_block);
-
- /*
- * Now patch in the various branch instructions.
- */
-
- /* Insert the conditional branch instruction at the end of entry_block */
- LLVMPositionBuilderAtEnd(builder, ifthen->entry_block);
- if (ifthen->false_block) {
- /* we have an else clause */
- LLVMBuildCondBr(builder, ifthen->condition,
- ifthen->true_block, ifthen->false_block);
- }
- else {
- /* no else clause */
- LLVMBuildCondBr(builder, ifthen->condition,
- ifthen->true_block, ifthen->merge_block);
- }
-
- /* Resume building code at end of the ifthen->merge_block */
- LLVMPositionBuilderAtEnd(builder, ifthen->merge_block);
-}
-
static void
write_tess_factors(struct radv_shader_context *ctx)
{
if (inner_comps) {
tess_inner_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER);
- mark_tess_output(ctx, true, tess_inner_index, 1);
lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_base,
LLVMConstInt(ctx->ac.i32, tess_inner_index * 4, false), "");
}
tess_outer_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER);
- mark_tess_output(ctx, true, tess_outer_index, 1);
lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_base,
LLVMConstInt(ctx->ac.i32, tess_outer_index * 4, false), "");
outer[i] = LLVMGetUndef(ctx->ac.i32);
}
- // LINES reverseal
+ // LINES reversal
if (ctx->options->key.tcs.primitive_mode == GL_ISOLINES) {
outer[0] = out[1] = ac_lds_load(&ctx->ac, lds_outer);
lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_outer,
handle_es_outputs_post(ctx, &ctx->shader_info->vs.es_info);
else
handle_vs_outputs_post(ctx, ctx->options->key.vs.export_prim_id,
+ ctx->options->key.vs.export_layer_id,
&ctx->shader_info->vs.outinfo);
break;
case MESA_SHADER_FRAGMENT:
handle_es_outputs_post(ctx, &ctx->shader_info->tes.es_info);
else
handle_vs_outputs_post(ctx, ctx->options->key.tes.export_prim_id,
+ ctx->options->key.tes.export_layer_id,
&ctx->shader_info->tes.outinfo);
break;
default:
static void
ac_setup_rings(struct radv_shader_context *ctx)
{
- if ((ctx->stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_es) ||
- (ctx->stage == MESA_SHADER_TESS_EVAL && ctx->options->key.tes.as_es)) {
- ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_ESGS_VS, false));
+ if (ctx->options->chip_class <= VI &&
+ (ctx->stage == MESA_SHADER_GEOMETRY ||
+ ctx->options->key.vs.as_es || ctx->options->key.tes.as_es)) {
+ unsigned ring = ctx->stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS
+ : RING_ESGS_VS;
+ LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, ring, false);
+
+ ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac,
+ ctx->ring_offsets,
+ offset);
}
if (ctx->is_gs_copy_shader) {
}
if (ctx->stage == MESA_SHADER_GEOMETRY) {
LLVMValueRef tmp;
- ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_ESGS_GS, false));
+ uint32_t num_entries = 64;
+ LLVMValueRef gsvs_ring_stride = LLVMConstInt(ctx->ac.i32, ctx->max_gsvs_emit_size, false);
+ LLVMValueRef gsvs_ring_desc = LLVMConstInt(ctx->ac.i32, ctx->max_gsvs_emit_size << 16, false);
ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_GSVS_GS, false));
ctx->gsvs_ring = LLVMBuildBitCast(ctx->ac.builder, ctx->gsvs_ring, ctx->ac.v4i32, "");
- ctx->gsvs_ring = LLVMBuildInsertElement(ctx->ac.builder, ctx->gsvs_ring, ctx->gsvs_num_entries, LLVMConstInt(ctx->ac.i32, 2, false), "");
+ tmp = LLVMConstInt(ctx->ac.i32, num_entries, false);
+ if (ctx->options->chip_class >= VI)
+ tmp = LLVMBuildMul(ctx->ac.builder, gsvs_ring_stride, tmp, "");
+ ctx->gsvs_ring = LLVMBuildInsertElement(ctx->ac.builder, ctx->gsvs_ring, tmp, LLVMConstInt(ctx->ac.i32, 2, false), "");
tmp = LLVMBuildExtractElement(ctx->ac.builder, ctx->gsvs_ring, ctx->ac.i32_1, "");
- tmp = LLVMBuildOr(ctx->ac.builder, tmp, ctx->gsvs_ring_stride, "");
+ tmp = LLVMBuildOr(ctx->ac.builder, tmp, gsvs_ring_desc, "");
ctx->gsvs_ring = LLVMBuildInsertElement(ctx->ac.builder, ctx->gsvs_ring, tmp, ctx->ac.i32_1, "");
}
ctx.abi.load_sampler_desc = radv_get_sampler_desc;
ctx.abi.load_resource = radv_load_resource;
ctx.abi.clamp_shadow_reference = false;
+ ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9;
if (shader_count >= 2)
ac_init_exec_full_mask(&ctx.ac);
for(int i = 0; i < shader_count; ++i) {
ctx.stage = shaders[i]->info.stage;
ctx.output_mask = 0;
- ctx.tess_outputs_written = 0;
ctx.num_output_clips = shaders[i]->info.clip_distance_array_size;
ctx.num_output_culls = shaders[i]->info.cull_distance_array_size;
ctx.tcs_num_inputs = ctx.options->key.tcs.num_inputs;
else
ctx.tcs_num_inputs = util_last_bit64(shader_info->info.vs.ls_outputs_written);
+ ctx.tcs_num_patches = get_tcs_num_patches(&ctx);
} else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) {
ctx.tes_primitive_mode = shaders[i]->info.tess.primitive_mode;
ctx.abi.load_tess_varyings = load_tes_input;
ctx.abi.load_tess_coord = load_tess_coord;
ctx.abi.load_patch_vertices_in = load_patch_vertices_in;
ctx.tcs_vertices_per_patch = shaders[i]->info.tess.tcs_vertices_out;
+ ctx.tcs_num_patches = ctx.options->key.tes.num_patches;
} else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) {
if (shader_info->info.vs.needs_instance_id) {
if (ctx.options->key.vs.as_ls) {
if (i)
ac_emit_barrier(&ctx.ac, ctx.stage);
+ nir_foreach_variable(variable, &shaders[i]->outputs)
+ scan_shader_output_decl(&ctx, variable, shaders[i], shaders[i]->info.stage);
+
+ if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) {
+ unsigned addclip = shaders[i]->info.clip_distance_array_size +
+ shaders[i]->info.cull_distance_array_size > 4;
+ ctx.gsvs_vertex_size = (util_bitcount64(ctx.output_mask) + addclip) * 16;
+ ctx.max_gsvs_emit_size = ctx.gsvs_vertex_size *
+ shaders[i]->info.gs.vertices_out;
+ }
+
ac_setup_rings(&ctx);
LLVMBasicBlockRef merge_block;
else if(shader_count >= 2 && shaders[i]->info.stage == MESA_SHADER_GEOMETRY)
prepare_gs_input_vgprs(&ctx);
- nir_foreach_variable(variable, &shaders[i]->outputs)
- scan_shader_output_decl(&ctx, variable, shaders[i], shaders[i]->info.stage);
-
ac_nir_translate(&ctx.ac, &ctx.abi, shaders[i]);
if (shader_count >= 2) {
}
if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) {
- unsigned addclip = shaders[i]->info.clip_distance_array_size +
- shaders[i]->info.cull_distance_array_size > 4;
- shader_info->gs.gsvs_vertex_size = (util_bitcount64(ctx.output_mask) + addclip) * 16;
- shader_info->gs.max_gsvs_emit_size = shader_info->gs.gsvs_vertex_size *
- shaders[i]->info.gs.vertices_out;
+ shader_info->gs.gsvs_vertex_size = ctx.gsvs_vertex_size;
+ shader_info->gs.max_gsvs_emit_size = ctx.max_gsvs_emit_size;
} else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
- shader_info->tcs.outputs_written = ctx.tess_outputs_written;
- shader_info->tcs.patch_outputs_written = ctx.tess_patch_outputs_written;
- assert(ctx.tess_outputs_written == ctx.shader_info->info.tcs.outputs_written);
- assert(ctx.tess_patch_outputs_written == ctx.shader_info->info.tcs.patch_outputs_written);
- } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX && ctx.options->key.vs.as_ls) {
- shader_info->vs.outputs_written = ctx.tess_outputs_written;
- assert(ctx.tess_outputs_written == ctx.shader_info->info.vs.ls_outputs_written);
+ shader_info->tcs.num_patches = ctx.tcs_num_patches;
+ shader_info->tcs.lds_size = calculate_tess_lds_size(&ctx);
}
}
}
idx += slot_inc;
}
- handle_vs_outputs_post(ctx, false, &ctx->shader_info->vs.outinfo);
+ handle_vs_outputs_post(ctx, false, false, &ctx->shader_info->vs.outinfo);
}
void
ctx.ac.builder = ac_create_builder(ctx.context, float_mode);
ctx.stage = MESA_SHADER_VERTEX;
+ radv_nir_shader_info_pass(geom_shader, options, &shader_info->info);
+
create_function(&ctx, MESA_SHADER_VERTEX, false, MESA_SHADER_VERTEX);
ctx.gs_max_out_vertices = geom_shader->info.gs.vertices_out;