#include "radv_shader.h"
#include "radv_shader_helper.h"
#include "radv_shader_args.h"
+#include "radv_debug.h"
#include "nir/nir.h"
-#include <llvm-c/Core.h>
-#include <llvm-c/TargetMachine.h>
-#include <llvm-c/Transforms/Scalar.h>
-#include <llvm-c/Transforms/Utils.h>
-
#include "sid.h"
#include "ac_binary.h"
#include "ac_llvm_util.h"
}
}
-static unsigned
-get_tcs_num_patches(struct radv_shader_context *ctx)
-{
- unsigned num_tcs_input_cp = ctx->args->options->key.tcs.input_vertices;
- unsigned num_tcs_output_cp = ctx->shader->info.tess.tcs_vertices_out;
- uint32_t input_vertex_size = ctx->tcs_num_inputs * 16;
- uint32_t input_patch_size = ctx->args->options->key.tcs.input_vertices * input_vertex_size;
- uint32_t num_tcs_outputs = util_last_bit64(ctx->args->shader_info->tcs.outputs_written);
- uint32_t num_tcs_patch_outputs = util_last_bit64(ctx->args->shader_info->tcs.patch_outputs_written);
- uint32_t output_vertex_size = num_tcs_outputs * 16;
- uint32_t pervertex_output_patch_size = ctx->shader->info.tess.tcs_vertices_out * output_vertex_size;
- uint32_t output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
- unsigned num_patches;
- unsigned hardware_lds_size;
-
- /* Ensure that we only need one wave per SIMD so we don't need to check
- * resource usage. Also ensures that the number of tcs in and out
- * vertices per threadgroup are at most 256.
- */
- num_patches = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp) * 4;
- /* Make sure that the data fits in LDS. This assumes the shaders only
- * use LDS for the inputs and outputs.
- */
- hardware_lds_size = 32768;
-
- /* Looks like STONEY hangs if we use more than 32 KiB LDS in a single
- * threadgroup, even though there is more than 32 KiB LDS.
- *
- * Test: dEQP-VK.tessellation.shader_input_output.barrier
- */
- if (ctx->args->options->chip_class >= GFX7 && ctx->args->options->family != CHIP_STONEY)
- hardware_lds_size = 65536;
-
- num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size));
- /* Make sure the output data fits in the offchip buffer */
- num_patches = MIN2(num_patches, (ctx->args->options->tess_offchip_block_dw_size * 4) / output_patch_size);
- /* Not necessary for correctness, but improves performance. The
- * specific value is taken from the proprietary driver.
- */
- num_patches = MIN2(num_patches, 40);
-
- /* GFX6 bug workaround - limit LS-HS threadgroups to only one wave. */
- if (ctx->args->options->chip_class == GFX6) {
- unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp);
- num_patches = MIN2(num_patches, one_wave);
- }
- return num_patches;
-}
-
-static unsigned
-calculate_tess_lds_size(struct radv_shader_context *ctx)
-{
- unsigned num_tcs_input_cp = ctx->args->options->key.tcs.input_vertices;
- unsigned num_tcs_output_cp;
- unsigned num_tcs_outputs, num_tcs_patch_outputs;
- unsigned input_vertex_size, output_vertex_size;
- unsigned input_patch_size, output_patch_size;
- unsigned pervertex_output_patch_size;
- unsigned output_patch0_offset;
- unsigned num_patches;
- unsigned lds_size;
-
- num_tcs_output_cp = ctx->shader->info.tess.tcs_vertices_out;
- num_tcs_outputs = util_last_bit64(ctx->args->shader_info->tcs.outputs_written);
- num_tcs_patch_outputs = util_last_bit64(ctx->args->shader_info->tcs.patch_outputs_written);
-
- input_vertex_size = ctx->tcs_num_inputs * 16;
- output_vertex_size = num_tcs_outputs * 16;
-
- input_patch_size = num_tcs_input_cp * input_vertex_size;
-
- pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size;
- output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
-
- num_patches = ctx->tcs_num_patches;
- output_patch0_offset = input_patch_size * num_patches;
-
- lds_size = output_patch0_offset + output_patch_size * num_patches;
- return lds_size;
-}
-
/* Tessellation shaders pass outputs to the next shader using LDS.
*
* LS outputs = TCS inputs
return result;
}
-
-static void radv_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible)
-{
- struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
- ac_build_kill_if_false(&ctx->ac, visible);
-}
-
static uint32_t
radv_get_sample_pos_offset(uint32_t num_samples)
{
static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx,
unsigned stream,
+ LLVMValueRef vertexidx,
LLVMValueRef *addrs);
static void
-visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs)
+visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream,
+ LLVMValueRef vertexidx, LLVMValueRef *addrs)
{
- LLVMValueRef gs_next_vertex;
- LLVMValueRef can_emit;
unsigned offset = 0;
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
if (ctx->args->options->key.vs_common_out.as_ngg) {
- gfx10_ngg_gs_emit_vertex(ctx, stream, addrs);
+ gfx10_ngg_gs_emit_vertex(ctx, stream, vertexidx, addrs);
return;
}
- /* Write vertex attribute values to GSVS ring */
- gs_next_vertex = LLVMBuildLoad(ctx->ac.builder,
- ctx->gs_next_vertex[stream],
- "");
-
- /* If this thread has already emitted the declared maximum number of
- * vertices, don't emit any more: excessive vertex emissions are not
- * supposed to have any effect.
- */
- can_emit = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, gs_next_vertex,
- LLVMConstInt(ctx->ac.i32, ctx->shader->info.gs.vertices_out, false), "");
-
- bool use_kill = !ctx->args->shader_info->gs.writes_memory;
- if (use_kill)
- ac_build_kill_if_false(&ctx->ac, can_emit);
- else
- ac_build_ifcc(&ctx->ac, can_emit, 6505);
-
for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
unsigned output_usage_mask =
ctx->args->shader_info->gs.output_usage_mask[i];
offset++;
- voffset = LLVMBuildAdd(ctx->ac.builder, voffset, gs_next_vertex, "");
+ voffset = LLVMBuildAdd(ctx->ac.builder, voffset, vertexidx, "");
voffset = LLVMBuildMul(ctx->ac.builder, voffset, LLVMConstInt(ctx->ac.i32, 4, false), "");
out_val = ac_to_integer(&ctx->ac, out_val);
}
}
- gs_next_vertex = LLVMBuildAdd(ctx->ac.builder, gs_next_vertex,
- ctx->ac.i32_1, "");
- LLVMBuildStore(ctx->ac.builder, gs_next_vertex, ctx->gs_next_vertex[stream]);
-
ac_build_sendmsg(&ctx->ac,
AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
ctx->gs_wave_id);
-
- if (!use_kill)
- ac_build_endif(&ctx->ac, 6505);
}
static void
return LLVMBuildBitCast(ctx->ac.builder, alpha, ctx->ac.i32, "");
}
-static const struct vertex_format_info {
- uint8_t vertex_byte_size;
- uint8_t num_channels;
- uint8_t chan_byte_size;
- uint8_t chan_format;
-} vertex_format_table[] = {
- { 0, 4, 0, V_008F0C_BUF_DATA_FORMAT_INVALID }, /* BUF_DATA_FORMAT_INVALID */
- { 1, 1, 1, V_008F0C_BUF_DATA_FORMAT_8 }, /* BUF_DATA_FORMAT_8 */
- { 2, 1, 2, V_008F0C_BUF_DATA_FORMAT_16 }, /* BUF_DATA_FORMAT_16 */
- { 2, 2, 1, V_008F0C_BUF_DATA_FORMAT_8 }, /* BUF_DATA_FORMAT_8_8 */
- { 4, 1, 4, V_008F0C_BUF_DATA_FORMAT_32 }, /* BUF_DATA_FORMAT_32 */
- { 4, 2, 2, V_008F0C_BUF_DATA_FORMAT_16 }, /* BUF_DATA_FORMAT_16_16 */
- { 4, 3, 0, V_008F0C_BUF_DATA_FORMAT_10_11_11 }, /* BUF_DATA_FORMAT_10_11_11 */
- { 4, 3, 0, V_008F0C_BUF_DATA_FORMAT_11_11_10 }, /* BUF_DATA_FORMAT_11_11_10 */
- { 4, 4, 0, V_008F0C_BUF_DATA_FORMAT_10_10_10_2 }, /* BUF_DATA_FORMAT_10_10_10_2 */
- { 4, 4, 0, V_008F0C_BUF_DATA_FORMAT_2_10_10_10 }, /* BUF_DATA_FORMAT_2_10_10_10 */
- { 4, 4, 1, V_008F0C_BUF_DATA_FORMAT_8 }, /* BUF_DATA_FORMAT_8_8_8_8 */
- { 8, 2, 4, V_008F0C_BUF_DATA_FORMAT_32 }, /* BUF_DATA_FORMAT_32_32 */
- { 8, 4, 2, V_008F0C_BUF_DATA_FORMAT_16 }, /* BUF_DATA_FORMAT_16_16_16_16 */
- { 12, 3, 4, V_008F0C_BUF_DATA_FORMAT_32 }, /* BUF_DATA_FORMAT_32_32_32 */
- { 16, 4, 4, V_008F0C_BUF_DATA_FORMAT_32 }, /* BUF_DATA_FORMAT_32_32_32_32 */
-};
-
static LLVMValueRef
radv_fixup_vertex_input_fetches(struct radv_shader_context *ctx,
LLVMValueRef value,
ctx->args->ac.base_vertex), "");
}
- assert(data_format < ARRAY_SIZE(vertex_format_table));
- const struct vertex_format_info *vtx_info = &vertex_format_table[data_format];
+ const struct ac_data_format_info *vtx_info = ac_get_data_format_info(data_format);
/* Adjust the number of channels to load based on the vertex
* attribute format.
bool unaligned_vertex_fetches = false;
if ((ctx->ac.chip_class == GFX6 || ctx->ac.chip_class == GFX10) &&
vtx_info->chan_format != data_format &&
- ((attrib_offset % vtx_info->vertex_byte_size) ||
- (attrib_stride % vtx_info->vertex_byte_size)))
+ ((attrib_offset % vtx_info->element_size) ||
+ (attrib_stride % vtx_info->element_size)))
unaligned_vertex_fetches = true;
if (unaligned_vertex_fetches) {
return ngg_gs_vertex_ptr(ctx, vertexidx);
}
+static LLVMValueRef
+ngg_gs_get_emit_output_ptr(struct radv_shader_context *ctx, LLVMValueRef vertexptr,
+ unsigned out_idx)
+{
+ LLVMValueRef gep_idx[3] = {
+ ctx->ac.i32_0, /* implied C-style array */
+ ctx->ac.i32_0, /* first struct entry */
+ LLVMConstInt(ctx->ac.i32, out_idx, false),
+ };
+ return LLVMBuildGEP(ctx->ac.builder, vertexptr, gep_idx, 3, "");
+}
+
+static LLVMValueRef
+ngg_gs_get_emit_primflag_ptr(struct radv_shader_context *ctx, LLVMValueRef vertexptr,
+ unsigned stream)
+{
+ LLVMValueRef gep_idx[3] = {
+ ctx->ac.i32_0, /* implied C-style array */
+ ctx->ac.i32_1, /* second struct entry */
+ LLVMConstInt(ctx->ac.i32, stream, false),
+ };
+ return LLVMBuildGEP(ctx->ac.builder, vertexptr, gep_idx, 3, "");
+}
+
static struct radv_stream_output *
radv_get_stream_output_by_loc(struct radv_streamout_info *so, unsigned location)
{
LLVMBuildStore(builder, tmp, ctx->gs_next_vertex[stream]);
tmp = ngg_gs_emit_vertex_ptr(ctx, gsthread, vertexidx);
- LLVMValueRef gep_idx[3] = {
- ctx->ac.i32_0, /* implied C-style array */
- ctx->ac.i32_1, /* second entry of struct */
- LLVMConstInt(ctx->ac.i32, stream, false),
- };
- tmp = LLVMBuildGEP(builder, tmp, gep_idx, 3, "");
- LLVMBuildStore(builder, i8_0, tmp);
+ LLVMBuildStore(builder, i8_0,
+ ngg_gs_get_emit_primflag_ptr(ctx, tmp, stream));
ac_build_endloop(&ctx->ac, 5100);
}
if (!ctx->args->shader_info->gs.num_stream_output_components[stream])
continue;
- LLVMValueRef gep_idx[3] = {
- ctx->ac.i32_0, /* implicit C-style array */
- ctx->ac.i32_1, /* second value of struct */
- LLVMConstInt(ctx->ac.i32, stream, false),
- };
- tmp = LLVMBuildGEP(builder, vertexptr, gep_idx, 3, "");
- tmp = LLVMBuildLoad(builder, tmp, "");
+ tmp = LLVMBuildLoad(builder,
+ ngg_gs_get_emit_primflag_ptr(ctx, vertexptr, stream), "");
tmp = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
tmp2 = LLVMBuildICmp(builder, LLVMIntULT, tid, num_emit_threads, "");
nggso.prim_enable[stream] = LLVMBuildAnd(builder, tmp, tmp2, "");
build_streamout(ctx, &nggso);
}
+ /* Write shader query data. */
+ tmp = ac_get_arg(&ctx->ac, ctx->args->ngg_gs_state);
+ tmp = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
+ ac_build_ifcc(&ctx->ac, tmp, 5109);
+ tmp = LLVMBuildICmp(builder, LLVMIntULT, tid,
+ LLVMConstInt(ctx->ac.i32, 4, false), "");
+ ac_build_ifcc(&ctx->ac, tmp, 5110);
+ {
+ tmp = LLVMBuildLoad(builder, ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, tid), "");
+
+ ac_llvm_add_target_dep_function_attr(ctx->main_function,
+ "amdgpu-gds-size", 256);
+
+ LLVMTypeRef gdsptr = LLVMPointerType(ctx->ac.i32, AC_ADDR_SPACE_GDS);
+ LLVMValueRef gdsbase = LLVMBuildIntToPtr(builder, ctx->ac.i32_0, gdsptr, "");
+
+ const char *sync_scope = LLVM_VERSION_MAJOR >= 9 ? "workgroup-one-as" : "workgroup";
+
+ /* Use a plain GDS atomic to accumulate the number of generated
+ * primitives.
+ */
+ ac_build_atomic_rmw(&ctx->ac, LLVMAtomicRMWBinOpAdd, gdsbase,
+ tmp, sync_scope);
+ }
+ ac_build_endif(&ctx->ac, 5110);
+ ac_build_endif(&ctx->ac, 5109);
+
/* TODO: culling */
/* Determine vertex liveness. */
/* Load primitive liveness */
tmp = ngg_gs_vertex_ptr(ctx, primidx);
- LLVMValueRef gep_idx[3] = {
- ctx->ac.i32_0, /* implicit C-style array */
- ctx->ac.i32_1, /* second value of struct */
- ctx->ac.i32_0, /* stream 0 */
- };
- tmp = LLVMBuildGEP(builder, tmp, gep_idx, 3, "");
- tmp = LLVMBuildLoad(builder, tmp, "");
+ tmp = LLVMBuildLoad(builder,
+ ngg_gs_get_emit_primflag_ptr(ctx, tmp, 0), "");
const LLVMValueRef primlive =
LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
ac_build_ifcc(&ctx->ac, vertlive, 5130);
{
tmp = ngg_gs_vertex_ptr(ctx, vertlive_scan.result_exclusive);
- LLVMValueRef gep_idx[3] = {
- ctx->ac.i32_0, /* implicit C-style array */
- ctx->ac.i32_1, /* second value of struct */
- ctx->ac.i32_1, /* stream 1 */
- };
- tmp = LLVMBuildGEP(builder, tmp, gep_idx, 3, "");
tmp2 = LLVMBuildTrunc(builder, tid, ctx->ac.i8, "");
- LLVMBuildStore(builder, tmp2, tmp);
+ LLVMBuildStore(builder, tmp2,
+ ngg_gs_get_emit_primflag_ptr(ctx, tmp, 1));
}
ac_build_endif(&ctx->ac, 5130);
prim.num_vertices = verts_per_prim;
tmp = ngg_gs_vertex_ptr(ctx, tid);
- LLVMValueRef gep_idx[3] = {
- ctx->ac.i32_0, /* implicit C-style array */
- ctx->ac.i32_1, /* second value of struct */
- ctx->ac.i32_0, /* primflag */
- };
- tmp = LLVMBuildGEP(builder, tmp, gep_idx, 3, "");
- flags = LLVMBuildLoad(builder, tmp, "");
+ flags = LLVMBuildLoad(builder,
+ ngg_gs_get_emit_primflag_ptr(ctx, tmp, 0), "");
prim.isnull = LLVMBuildNot(builder, LLVMBuildTrunc(builder, flags, ctx->ac.i1, ""), "");
for (unsigned i = 0; i < verts_per_prim; ++i) {
outinfo->pos_exports = 0;
tmp = ngg_gs_vertex_ptr(ctx, tid);
- LLVMValueRef gep_idx[3] = {
- ctx->ac.i32_0, /* implicit C-style array */
- ctx->ac.i32_1, /* second value of struct */
- ctx->ac.i32_1, /* stream 1: source data index */
- };
- tmp = LLVMBuildGEP(builder, tmp, gep_idx, 3, "");
- tmp = LLVMBuildLoad(builder, tmp, "");
+ tmp = LLVMBuildLoad(builder,
+ ngg_gs_get_emit_primflag_ptr(ctx, tmp, 1), "");
tmp = LLVMBuildZExt(builder, tmp, ctx->ac.i32, "");
const LLVMValueRef vertexptr = ngg_gs_vertex_ptr(ctx, tmp);
unsigned out_idx = 0;
- gep_idx[1] = ctx->ac.i32_0;
for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
unsigned output_usage_mask =
ctx->args->shader_info->gs.output_usage_mask[i];
if (!(output_usage_mask & (1 << j)))
continue;
- gep_idx[2] = LLVMConstInt(ctx->ac.i32, out_idx, false);
- tmp = LLVMBuildGEP(builder, vertexptr, gep_idx, 3, "");
+ tmp = ngg_gs_get_emit_output_ptr(ctx, vertexptr, out_idx);
tmp = LLVMBuildLoad(builder, tmp, "");
LLVMTypeRef type = LLVMGetAllocatedType(ctx->abi.outputs[ac_llvm_reg_index_soa(i, j)]);
static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx,
unsigned stream,
+ LLVMValueRef vertexidx,
LLVMValueRef *addrs)
{
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef tmp;
- const LLVMValueRef vertexidx =
- LLVMBuildLoad(builder, ctx->gs_next_vertex[stream], "");
-
- /* If this thread has already emitted the declared maximum number of
- * vertices, skip the write: excessive vertex emissions are not
- * supposed to have any effect.
- */
- const LLVMValueRef can_emit =
- LLVMBuildICmp(builder, LLVMIntULT, vertexidx,
- LLVMConstInt(ctx->ac.i32, ctx->shader->info.gs.vertices_out, false), "");
- ac_build_ifcc(&ctx->ac, can_emit, 9001);
-
- tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, "");
- tmp = LLVMBuildSelect(builder, can_emit, tmp, vertexidx, "");
- LLVMBuildStore(builder, tmp, ctx->gs_next_vertex[stream]);
const LLVMValueRef vertexptr =
ngg_gs_emit_vertex_ptr(ctx, get_thread_id_in_tg(ctx), vertexidx);
LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder,
out_ptr[j], "");
- LLVMValueRef gep_idx[3] = {
- ctx->ac.i32_0, /* implied C-style array */
- ctx->ac.i32_0, /* first entry of struct */
- LLVMConstInt(ctx->ac.i32, out_idx, false),
- };
- LLVMValueRef ptr = LLVMBuildGEP(builder, vertexptr, gep_idx, 3, "");
-
out_val = ac_to_integer(&ctx->ac, out_val);
out_val = LLVMBuildZExtOrBitCast(ctx->ac.builder, out_val, ctx->ac.i32, "");
- LLVMBuildStore(builder, out_val, ptr);
+ LLVMBuildStore(builder, out_val,
+ ngg_gs_get_emit_output_ptr(ctx, vertexptr, out_idx));
}
}
assert(out_idx * 4 <= ctx->args->shader_info->gs.gsvs_vertex_size);
+ /* Store the current number of emitted vertices to zero out remaining
+ * primitive flags in case the geometry shader doesn't emit the maximum
+ * number of vertices.
+ */
+ tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, "");
+ LLVMBuildStore(builder, tmp, ctx->gs_next_vertex[stream]);
+
/* Determine and store whether this vertex completed a primitive. */
const LLVMValueRef curverts = LLVMBuildLoad(builder, ctx->gs_curprim_verts[stream], "");
tmp = LLVMBuildAdd(builder, curverts, ctx->ac.i32_1, "");
LLVMBuildStore(builder, tmp, ctx->gs_curprim_verts[stream]);
- LLVMValueRef gep_idx[3] = {
- ctx->ac.i32_0, /* implied C-style array */
- ctx->ac.i32_1, /* second struct entry */
- LLVMConstInt(ctx->ac.i32, stream, false),
- };
- const LLVMValueRef primflagptr =
- LLVMBuildGEP(builder, vertexptr, gep_idx, 3, "");
-
/* The per-vertex primitive flag encoding:
* bit 0: whether this vertex finishes a primitive
* bit 1: whether the primitive is odd (if we are emitting triangle strips)
LLVMBuildShl(builder,
LLVMBuildZExt(builder, is_odd, ctx->ac.i8, ""),
ctx->ac.i8_1, ""), "");
- LLVMBuildStore(builder, tmp, primflagptr);
+ LLVMBuildStore(builder, tmp,
+ ngg_gs_get_emit_primflag_ptr(ctx, vertexptr, stream));
tmp = LLVMBuildLoad(builder, ctx->gs_generated_prims[stream], "");
tmp = LLVMBuildAdd(builder, tmp, LLVMBuildZExt(builder, iscompleteprim, ctx->ac.i32, ""), "");
LLVMBuildStore(builder, tmp, ctx->gs_generated_prims[stream]);
-
- ac_build_endif(&ctx->ac, 9001);
}
static void
ac_llvm_context_init(&ctx.ac, ac_llvm, args->options->chip_class,
args->options->family, float_mode,
- args->shader_info->wave_size, 64);
+ args->shader_info->wave_size,
+ args->shader_info->ballot_bit_size);
ctx.context = ctx.ac.context;
ctx.max_workgroup_size = 0;
ctx.abi.inputs = &ctx.inputs[0];
ctx.abi.emit_outputs = handle_shader_outputs_post;
- ctx.abi.emit_vertex = visit_emit_vertex;
+ ctx.abi.emit_vertex_with_counter = visit_emit_vertex_with_counter;
ctx.abi.load_ubo = radv_load_ubo;
ctx.abi.load_ssbo = radv_load_ssbo;
ctx.abi.load_sampler_desc = radv_get_sampler_desc;
ctx.tcs_num_inputs = args->options->key.tcs.num_inputs;
else
ctx.tcs_num_inputs = util_last_bit64(args->shader_info->vs.ls_outputs_written);
- ctx.tcs_num_patches = get_tcs_num_patches(&ctx);
+ ctx.tcs_num_patches =
+ get_tcs_num_patches(
+ ctx.args->options->key.tcs.input_vertices,
+ ctx.shader->info.tess.tcs_vertices_out,
+ ctx.tcs_num_inputs,
+ ctx.args->shader_info->tcs.outputs_written,
+ ctx.args->shader_info->tcs.patch_outputs_written,
+ ctx.args->options->tess_offchip_block_dw_size,
+ ctx.args->options->chip_class,
+ ctx.args->options->family);
} else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) {
ctx.abi.load_tess_varyings = load_tes_input;
ctx.abi.load_tess_coord = load_tess_coord;
} else if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) {
ctx.abi.load_sample_position = load_sample_position;
ctx.abi.load_sample_mask_in = load_sample_mask_in;
- ctx.abi.emit_kill = radv_emit_kill;
}
if (shaders[i]->info.stage == MESA_SHADER_VERTEX &&
ac_setup_rings(&ctx);
- LLVMBasicBlockRef merge_block;
+ LLVMBasicBlockRef merge_block = NULL;
if (shader_count >= 2 || is_ngg) {
LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
LLVMBasicBlockRef then_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, "");
if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
args->shader_info->tcs.num_patches = ctx.tcs_num_patches;
- args->shader_info->tcs.lds_size = calculate_tess_lds_size(&ctx);
+ args->shader_info->tcs.lds_size =
+ calculate_tess_lds_size(
+ ctx.args->options->key.tcs.input_vertices,
+ ctx.shader->info.tess.tcs_vertices_out,
+ ctx.tcs_num_inputs,
+ ctx.tcs_num_patches,
+ ctx.args->shader_info->tcs.outputs_written,
+ ctx.args->shader_info->tcs.patch_outputs_written);
}
}
free(elf_buffer);
}
-void
+static void
radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm,
struct radv_shader_binary **rbinary,
const struct radv_shader_args *args,
LLVMPositionBuilderAtEnd(ctx->ac.builder, end_bb);
}
-void
+static void
radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm,
struct nir_shader *geom_shader,
struct radv_shader_binary **rbinary,
(*rbinary)->is_gs_copy_shader = true;
}
+
+void
+llvm_compile_shader(struct radv_device *device,
+ unsigned shader_count,
+ struct nir_shader *const *shaders,
+ struct radv_shader_binary **binary,
+ struct radv_shader_args *args)
+{
+ enum ac_target_machine_options tm_options = 0;
+ struct ac_llvm_compiler ac_llvm;
+ bool thread_compiler;
+
+ tm_options |= AC_TM_SUPPORTS_SPILL;
+ if (args->options->check_ir)
+ tm_options |= AC_TM_CHECK_IR;
+ if (device->instance->debug_flags & RADV_DEBUG_NO_LOAD_STORE_OPT)
+ tm_options |= AC_TM_NO_LOAD_STORE_OPT;
+
+ thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM);
+
+ radv_init_llvm_compiler(&ac_llvm, thread_compiler,
+ args->options->family, tm_options,
+ args->shader_info->wave_size);
+
+ if (args->is_gs_copy_shader) {
+ radv_compile_gs_copy_shader(&ac_llvm, *shaders, binary, args);
+ } else {
+ radv_compile_nir_shader(&ac_llvm, binary, args,
+ shaders, shader_count);
+ }
+
+ radv_destroy_llvm_compiler(&ac_llvm, thread_compiler);
+}