LLVMValueRef base_addr,
LLVMValueRef vertex_index,
LLVMValueRef param_index,
- unsigned input_index,
- ubyte *name,
- ubyte *index,
- bool is_patch)
+ ubyte name, ubyte index)
{
if (vertex_dw_stride) {
base_addr = ac_build_imad(&ctx->ac, vertex_index,
LLVMConstInt(ctx->i32, 4, 0), base_addr);
}
- int param = is_patch ?
- si_shader_io_get_unique_index_patch(name[input_index],
- index[input_index]) :
- si_shader_io_get_unique_index(name[input_index],
- index[input_index], false);
+ int param = name == TGSI_SEMANTIC_PATCH ||
+ name == TGSI_SEMANTIC_TESSINNER ||
+ name == TGSI_SEMANTIC_TESSOUTER ?
+ si_shader_io_get_unique_index_patch(name, index) :
+ si_shader_io_get_unique_index(name, index, false);
/* Add the base address of the element. */
return LLVMBuildAdd(ctx->ac.builder, base_addr,
return get_dw_address_from_generic_indices(ctx, vertex_dw_stride,
base_addr, vertex_index,
- ind_index, input_index,
- name, index,
- !reg.Register.Dimension);
+ ind_index, name[input_index],
+ index[input_index]);
}
/* The offchip buffer layout for TCS->TES is
struct si_shader_context *ctx,
LLVMValueRef vertex_index,
LLVMValueRef param_index,
- unsigned param_base,
- ubyte *name,
- ubyte *index,
- bool is_patch)
+ ubyte name, ubyte index)
{
unsigned param_index_base;
- param_index_base = is_patch ?
- si_shader_io_get_unique_index_patch(name[param_base], index[param_base]) :
- si_shader_io_get_unique_index(name[param_base], index[param_base], false);
+ param_index_base = name == TGSI_SEMANTIC_PATCH ||
+ name == TGSI_SEMANTIC_TESSINNER ||
+ name == TGSI_SEMANTIC_TESSOUTER ?
+ si_shader_io_get_unique_index_patch(name, index) :
+ si_shader_io_get_unique_index(name, index, false);
if (param_index) {
param_index = LLVMBuildAdd(ctx->ac.builder, param_index,
reg = src ? *src : tgsi_full_src_register_from_dst(dst);
if (reg.Register.Dimension) {
-
if (reg.Dimension.Indirect)
vertex_index = si_get_indirect_index(ctx, ®.DimIndirect,
1, reg.Dimension.Index);
param_index = si_get_indirect_index(ctx, ®.Indirect,
1, reg.Register.Index - param_base);
-
} else {
param_base = reg.Register.Index;
}
return get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index,
- param_index, param_base,
- name, index, !reg.Register.Dimension);
+ param_index, name[param_base],
+ index[param_base]);
}
static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
struct tgsi_shader_info *info = &ctx->shader->selector->info;
struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
LLVMValueRef dw_addr, stride;
+ ubyte name, index;
driver_location = driver_location / 4;
+ if (load_input) {
+ name = info->input_semantic_name[driver_location];
+ index = info->input_semantic_index[driver_location];
+ } else {
+ name = info->output_semantic_name[driver_location];
+ index = info->output_semantic_index[driver_location];
+ }
+
+ assert((name == TGSI_SEMANTIC_PATCH ||
+ name == TGSI_SEMANTIC_TESSINNER ||
+ name == TGSI_SEMANTIC_TESSOUTER) == is_patch);
+
if (load_input) {
stride = get_tcs_in_vertex_dw_stride(ctx);
dw_addr = get_tcs_in_current_patch_offset(ctx);
param_index = LLVMConstInt(ctx->i32, const_index, 0);
}
- ubyte *names;
- ubyte *indices;
- if (load_input) {
- names = info->input_semantic_name;
- indices = info->input_semantic_index;
- } else {
- names = info->output_semantic_name;
- indices = info->output_semantic_index;
- }
-
dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr,
vertex_index, param_index,
- driver_location,
- names, indices,
- is_patch);
+ name, index);
LLVMValueRef value[4];
for (unsigned i = 0; i < num_components; i++) {
LLVMValueRef base, addr;
driver_location = driver_location / 4;
+ ubyte name = info->input_semantic_name[driver_location];
+ ubyte index = info->input_semantic_index[driver_location];
+
+ assert((name == TGSI_SEMANTIC_PATCH ||
+ name == TGSI_SEMANTIC_TESSINNER ||
+ name == TGSI_SEMANTIC_TESSOUTER) == is_patch);
base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
}
addr = get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index,
- param_index, driver_location,
- info->input_semantic_name,
- info->input_semantic_index,
- is_patch);
+ param_index,
+ name, index);
/* TODO: This will generate rather ordinary llvm code, although it
* should be easy for the optimiser to fix up. In future we might want
if (llvm_type_is_64bit(ctx, type)) {
offset *= 2;
if (offset == 4) {
+ ubyte name = info->input_semantic_name[driver_location + 1];
+ ubyte index = info->input_semantic_index[driver_location + 1];
addr = get_tcs_tes_buffer_address_from_generic_indices(ctx,
vertex_index,
param_index,
- driver_location + 1,
- info->input_semantic_name,
- info->input_semantic_index,
- is_patch);
+ name, index);
}
offset = offset % 4;
if (reg->Register.WriteMask != 0xF && !is_tess_factor) {
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
buf_addr, base,
- 4 * chan_index, ac_glc, false);
+ 4 * chan_index, ac_glc);
}
/* Write tess factors into VGPRs for the epilog. */
LLVMValueRef value = ac_build_gather_values(&ctx->ac,
values, 4);
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buf_addr,
- base, 0, ac_glc, false);
+ base, 0, ac_glc);
}
}
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
struct tgsi_shader_info *info = &ctx->shader->selector->info;
const unsigned component = var->data.location_frac;
- const bool is_patch = var->data.patch;
unsigned driver_location = var->data.driver_location;
LLVMValueRef dw_addr, stride;
LLVMValueRef buffer, base, addr;
bool is_tess_factor = false, is_tess_inner = false;
driver_location = driver_location / 4;
+ ubyte name = info->output_semantic_name[driver_location];
+ ubyte index = info->output_semantic_index[driver_location];
bool is_const = !param_index;
if (!param_index)
param_index = LLVMConstInt(ctx->i32, const_index, 0);
+ const bool is_patch = var->data.patch ||
+ var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
+ var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER;
+
+ assert((name == TGSI_SEMANTIC_PATCH ||
+ name == TGSI_SEMANTIC_TESSINNER ||
+ name == TGSI_SEMANTIC_TESSOUTER) == is_patch);
+
if (!is_patch) {
stride = get_tcs_out_vertex_dw_stride(ctx);
dw_addr = get_tcs_out_current_patch_offset(ctx);
dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr,
vertex_index, param_index,
- driver_location,
- info->output_semantic_name,
- info->output_semantic_index,
- is_patch);
+ name, index);
skip_lds_store = !info->reads_pervertex_outputs;
} else {
dw_addr = get_tcs_out_current_patch_data_offset(ctx);
dw_addr = get_dw_address_from_generic_indices(ctx, NULL, dw_addr,
vertex_index, param_index,
- driver_location,
- info->output_semantic_name,
- info->output_semantic_index,
- is_patch);
+ name, index);
skip_lds_store = !info->reads_perpatch_outputs;
base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
addr = get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index,
- param_index, driver_location,
- info->output_semantic_name,
- info->output_semantic_index,
- is_patch);
+ param_index, name, index);
- for (unsigned chan = 0; chan < 8; chan++) {
+ for (unsigned chan = component; chan < 8; chan++) {
if (!(writemask & (1 << chan)))
continue;
LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
unsigned buffer_store_offset = chan % 4;
if (chan == 4) {
+ ubyte name = info->output_semantic_name[driver_location + 1];
+ ubyte index = info->output_semantic_index[driver_location + 1];
addr = get_tcs_tes_buffer_address_from_generic_indices(ctx,
vertex_index,
param_index,
- driver_location + 1,
- info->output_semantic_name,
- info->output_semantic_index,
- is_patch);
+ name, index);
}
/* Skip LDS stores if there is no LDS read of this output. */
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
addr, base,
4 * buffer_store_offset,
- ac_glc, false);
+ ac_glc);
}
/* Write tess factors into VGPRs for the epilog. */
LLVMValueRef value = ac_build_gather_values(&ctx->ac,
values, 4);
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, addr,
- base, 0, ac_glc, false);
+ base, 0, ac_glc);
}
}
vdata, num_comps,
so_write_offsets[buf_idx],
ctx->i32_0,
- stream_out->dst_offset * 4, ac_glc | ac_slc, false);
+ stream_out->dst_offset * 4, ac_glc | ac_slc);
}
/**
LLVMValueRef value = lshs_lds_load(bld_base, ctx->ac.i32, ~0, lds_ptr);
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr,
- buffer_offset, 0, ac_glc, false);
+ buffer_offset, 0, ac_glc);
}
}
ac_build_buffer_store_dword(&ctx->ac, buffer,
LLVMConstInt(ctx->i32, 0x80000000, 0),
1, ctx->i32_0, tf_base,
- offset, ac_glc, false);
+ offset, ac_glc);
offset += 4;
}
/* Store the tessellation factors. */
ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
MIN2(stride, 4), byteoffset, tf_base,
- offset, ac_glc, false);
+ offset, ac_glc);
offset += 16;
if (vec1)
ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
stride - 4, byteoffset, tf_base,
- offset, ac_glc, false);
+ offset, ac_glc);
/* Store the tess factors into the offchip buffer if TES reads them. */
if (shader->key.part.tcs.epilog.tes_reads_tess_factors) {
ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec,
outer_comps, tf_outer_offset,
- base, 0, ac_glc, false);
+ base, 0, ac_glc);
if (inner_comps) {
param_inner = si_shader_io_get_unique_index_patch(
TGSI_SEMANTIC_TESSINNER, 0);
ac_build_gather_values(&ctx->ac, inner, inner_comps);
ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec,
inner_comps, tf_inner_offset,
- base, 0, ac_glc, false);
+ base, 0, ac_glc);
}
}
out_val, 1, NULL,
ac_get_arg(&ctx->ac, ctx->es2gs_offset),
(4 * param + chan) * 4,
- ac_glc | ac_slc, true);
+ ac_glc | ac_slc | ac_swizzled);
}
}
ctx->gsvs_ring[stream],
out_val, 1,
voffset, soffset, 0,
- ac_glc | ac_slc, true);
+ ac_glc | ac_slc | ac_swizzled);
}
}
if (!shader->is_gs_copy_shader) {
/* Vertex load indices. */
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->vertex_index0);
- for (unsigned i = 1; i < shader->selector->info.num_inputs; i++)
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL);
+ if (shader->selector->info.num_inputs) {
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT,
+ &ctx->vertex_index0);
+ for (unsigned i = 1; i < shader->selector->info.num_inputs; i++)
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL);
+ }
*num_prolog_vgprs += shader->selector->info.num_inputs;
}
}
&ctx->cs_user_data);
}
+ /* Hardware SGPRs. */
for (i = 0; i < 3; i++) {
if (shader->selector->info.uses_block_id[i]) {
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT,
&ctx->args.workgroup_ids[i]);
}
}
+ if (shader->selector->info.uses_subgroup_info)
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tg_size);
+ /* Hardware VGPRs. */
ac_add_arg(&ctx->args, AC_ARG_VGPR, 3, AC_ARG_INT,
&ctx->args.local_invocation_ids);
break;
return sel->vs_needs_prolog || key->ls_vgpr_fix;
}
+LLVMValueRef si_is_es_thread(struct si_shader_context *ctx)
+{
+ /* Return true if the current thread should execute an ES thread. */
+ return LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
+ ac_get_thread_id(&ctx->ac),
+ si_unpack_param(ctx, ctx->merged_wave_info, 0, 8), "");
+}
+
+LLVMValueRef si_is_gs_thread(struct si_shader_context *ctx)
+{
+ /* Return true if the current thread should execute a GS thread. */
+ return LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
+ ac_get_thread_id(&ctx->ac),
+ si_unpack_param(ctx, ctx->merged_wave_info, 8, 8), "");
+}
+
static bool si_compile_tgsi_main(struct si_shader_context *ctx,
struct nir_shader *nir, bool free_nir)
{
} else if (ctx->type == PIPE_SHADER_TESS_CTRL ||
ctx->type == PIPE_SHADER_GEOMETRY ||
(shader->key.as_ngg && !shader->key.as_es)) {
- LLVMValueRef num_threads;
+ LLVMValueRef thread_enabled;
bool nested_barrier;
if (!shader->is_monolithic ||
nested_barrier = true;
}
- /* Number of patches / primitives */
- num_threads = si_unpack_param(ctx, ctx->merged_wave_info, 8, 8);
+ thread_enabled = si_is_gs_thread(ctx);
} else {
- /* Number of vertices */
- num_threads = si_unpack_param(ctx, ctx->merged_wave_info, 0, 8);
+ thread_enabled = si_is_es_thread(ctx);
nested_barrier = false;
}
- LLVMValueRef ena =
- LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
- ac_get_thread_id(&ctx->ac), num_threads, "");
-
ctx->merged_wrap_if_entry_block = LLVMGetInsertBlock(ctx->ac.builder);
ctx->merged_wrap_if_label = 11500;
- ac_build_ifcc(&ctx->ac, ena, ctx->merged_wrap_if_label);
+ ac_build_ifcc(&ctx->ac, thread_enabled, ctx->merged_wrap_if_label);
if (nested_barrier) {
/* Execute a barrier before the second shader in
memset(key, 0, sizeof(*key));
key->vs_prolog.states = *prolog_key;
key->vs_prolog.num_input_sgprs = num_input_sgprs;
- key->vs_prolog.last_input = MAX2(1, info->num_inputs) - 1;
+ key->vs_prolog.num_inputs = info->num_inputs;
key->vs_prolog.as_ls = shader_out->key.as_ls;
key->vs_prolog.as_es = shader_out->key.as_es;
key->vs_prolog.as_ngg = shader_out->key.as_ngg;
shader->info.num_input_sgprs,
&shader->key.part.vs.prolog,
shader, &prolog_key);
+ prolog_key.vs_prolog.is_monolithic = true;
si_build_vs_prolog_function(&ctx, &prolog_key);
parts[0] = ctx.main_fn;
}
memset(&ctx->args, 0, sizeof(ctx->args));
/* 4 preloaded VGPRs + vertex load indices as prolog outputs */
- returns = alloca((num_all_input_regs + key->vs_prolog.last_input + 1) *
+ returns = alloca((num_all_input_regs + key->vs_prolog.num_inputs) *
sizeof(LLVMTypeRef));
num_returns = 0;
}
/* Vertex load indices. */
- for (i = 0; i <= key->vs_prolog.last_input; i++)
+ for (i = 0; i < key->vs_prolog.num_inputs; i++)
returns[num_returns++] = ctx->f32;
/* Create the function. */
key->vs_prolog.num_input_sgprs + i, "");
}
- LLVMValueRef original_ret = ret;
- bool wrapped = false;
- LLVMBasicBlockRef if_entry_block = NULL;
-
- if (key->vs_prolog.is_monolithic && key->vs_prolog.as_ngg) {
- LLVMValueRef num_threads;
- LLVMValueRef ena;
-
- num_threads = si_unpack_param(ctx, merged_wave_info, 0, 8);
- ena = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
- ac_get_thread_id(&ctx->ac), num_threads, "");
- if_entry_block = LLVMGetInsertBlock(ctx->ac.builder);
- ac_build_ifcc(&ctx->ac, ena, 11501);
- wrapped = true;
- }
-
/* Compute vertex load indices from instance divisors. */
LLVMValueRef instance_divisor_constbuf = NULL;
ac_build_load_to_sgpr(&ctx->ac, list, buf_index);
}
- for (i = 0; i <= key->vs_prolog.last_input; i++) {
+ for (i = 0; i < key->vs_prolog.num_inputs; i++) {
bool divisor_is_one =
key->vs_prolog.states.instance_divisor_is_one & (1u << i);
bool divisor_is_fetched =
ctx->args.arg_count + i, "");
}
- if (wrapped) {
- LLVMBasicBlockRef bbs[2] = {
- LLVMGetInsertBlock(ctx->ac.builder),
- if_entry_block,
- };
- ac_build_endif(&ctx->ac, 11501);
-
- LLVMValueRef values[2] = {
- ret,
- original_ret
- };
- ret = ac_build_phi(&ctx->ac, LLVMTypeOf(ret), 2, values, bbs);
- }
-
si_llvm_build_ret(ctx, ret);
}