union si_shader_part_key *key);
static void si_build_ps_epilog_function(struct si_shader_context *ctx,
union si_shader_part_key *key);
+static void si_fix_resource_usage(struct si_screen *sscreen,
+ struct si_shader *shader);
/* Ideally pass the sample mask input to the PS epilog as v14, which
* is its usual location, so that the shader doesn't have to add v_mov.
}
}
-static LLVMValueRef get_instance_index_for_fetch(
- struct si_shader_context *ctx,
- unsigned param_start_instance, LLVMValueRef divisor)
-{
- LLVMValueRef result = ctx->abi.instance_id;
-
- /* The division must be done before START_INSTANCE is added. */
- if (divisor != ctx->i32_1)
- result = LLVMBuildUDiv(ctx->ac.builder, result, divisor, "");
-
- return LLVMBuildAdd(ctx->ac.builder, result,
- LLVMGetParam(ctx->main_fn, param_start_instance), "");
-}
-
/* Bitcast <4 x float> to <2 x double>, extract the component, and convert
* to float. */
static LLVMValueRef extract_double_to_float(struct si_shader_context *ctx,
/* Do multiple loads for special formats. */
switch (fix_fetch) {
+ case SI_FIX_FETCH_RG_64_FLOAT:
+ num_fetches = 1; /* 1 2-dword or 4-dword load */
+ fetch_stride = 0;
+ if (util_last_bit(info->input_usage_mask[input_index]) >= 2)
+ num_channels = 4; /* 2 doubles in 4 dwords */
+ else
+ num_channels = 2; /* 1 double in 2 dwords */
+ break;
case SI_FIX_FETCH_RGB_64_FLOAT:
num_fetches = 3; /* 3 2-dword loads */
fetch_stride = 8;
static LLVMValueRef fetch_input_tcs(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_src_register *reg,
- enum tgsi_opcode_type type, unsigned swizzle)
+ enum tgsi_opcode_type type, unsigned swizzle_in)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef dw_addr, stride;
-
+ unsigned swizzle = swizzle_in & 0xffff;
stride = get_tcs_in_vertex_dw_stride(ctx);
dw_addr = get_tcs_in_current_patch_offset(ctx);
dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr);
static LLVMValueRef fetch_output_tcs(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_src_register *reg,
- enum tgsi_opcode_type type, unsigned swizzle)
+ enum tgsi_opcode_type type, unsigned swizzle_in)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef dw_addr, stride;
+ unsigned swizzle = (swizzle_in & 0xffff);
if (reg->Register.Dimension) {
stride = get_tcs_out_vertex_dw_stride(ctx);
static LLVMValueRef fetch_input_tes(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_src_register *reg,
- enum tgsi_opcode_type type, unsigned swizzle)
+ enum tgsi_opcode_type type, unsigned swizzle_in)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef base, addr;
+ unsigned swizzle = (swizzle_in & 0xffff);
base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset);
addr = get_tcs_tes_buffer_address_from_reg(ctx, NULL, reg);
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_src_register *reg,
enum tgsi_opcode_type type,
- unsigned swizzle)
+ unsigned swizzle_in)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct tgsi_shader_info *info = &ctx->shader->selector->info;
+ unsigned swizzle = swizzle_in & 0xffff;
unsigned semantic_name = info->input_semantic_name[reg->Register.Index];
if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID)
break;
}
+ case TGSI_SEMANTIC_CS_USER_DATA:
+ value = LLVMGetParam(ctx->main_fn, ctx->param_cs_user_data);
+ break;
+
default:
assert(!"unknown system value");
return;
struct si_shader_selector *sel = ctx->shader->selector;
unsigned lds_size = sel->info.properties[TGSI_PROPERTY_CS_LOCAL_SIZE];
- LLVMTypeRef i8p = LLVMPointerType(ctx->i8, AC_LOCAL_ADDR_SPACE);
+ LLVMTypeRef i8p = LLVMPointerType(ctx->i8, AC_ADDR_SPACE_LDS);
LLVMValueRef var;
assert(!ctx->ac.lds);
var = LLVMAddGlobalInAddressSpace(ctx->ac.module,
LLVMArrayType(ctx->i8, lds_size),
"compute_lds",
- AC_LOCAL_ADDR_SPACE);
+ AC_ADDR_SPACE_LDS);
LLVMSetAlignment(var, 4);
ctx->ac.lds = LLVMBuildBitCast(ctx->ac.builder, var, i8p, "");
ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, "");
LLVMValueRef desc0, desc1;
- if (HAVE_32BIT_POINTERS) {
- desc0 = ptr;
- desc1 = LLVMConstInt(ctx->i32,
- S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
- } else {
- ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->v2i32, "");
- desc0 = LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_0, "");
- desc1 = LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_1, "");
- /* Mask out all bits except BASE_ADDRESS_HI. */
- desc1 = LLVMBuildAnd(ctx->ac.builder, desc1,
- LLVMConstInt(ctx->i32, ~C_008F04_BASE_ADDRESS_HI, 0), "");
- }
+ desc0 = ptr;
+ desc1 = LLVMConstInt(ctx->i32,
+ S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
LLVMValueRef desc_elems[] = {
desc0,
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_src_register *reg,
enum tgsi_opcode_type type,
- unsigned swizzle)
+ unsigned swizzle_in)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct si_shader_selector *sel = ctx->shader->selector;
const struct tgsi_ind_register *ireg = ®->Indirect;
unsigned buf, idx;
+ unsigned swizzle = swizzle_in & 0xffff;
LLVMValueRef addr, bufp;
- if (swizzle == LP_CHAN_ALL) {
+ if (swizzle_in == LP_CHAN_ALL) {
unsigned chan;
LLVMValueRef values[4];
for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
LLVMValueRef lo, hi;
lo = fetch_constant(bld_base, reg, TGSI_TYPE_UNSIGNED, swizzle);
- hi = fetch_constant(bld_base, reg, TGSI_TYPE_UNSIGNED, swizzle + 1);
+ hi = fetch_constant(bld_base, reg, TGSI_TYPE_UNSIGNED, (swizzle_in >> 16));
return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
lo, hi);
}
LLVMBuildFCmp(ctx->ac.builder, cond, alpha, alpha_ref, "");
ac_build_kill_if_false(&ctx->ac, alpha_pass);
} else {
- ac_build_kill_if_false(&ctx->ac, LLVMConstInt(ctx->i1, 0, 0));
+ ac_build_kill_if_false(&ctx->ac, ctx->i1false);
}
}
break;
case 2: /* as v2i32 */
case 3: /* as v4i32 (aligned to 4) */
+ out[3] = LLVMGetUndef(ctx->i32);
+ /* fall through */
case 4: /* as v4i32 */
- vdata = LLVMGetUndef(LLVMVectorType(ctx->i32, util_next_power_of_two(num_comps)));
- for (int j = 0; j < num_comps; j++) {
- vdata = LLVMBuildInsertElement(ctx->ac.builder, vdata, out[j],
- LLVMConstInt(ctx->i32, j, 0), "");
- }
+ vdata = ac_build_gather_values(&ctx->ac, out, util_next_power_of_two(num_comps));
break;
}
unsigned param, unsigned return_index)
{
LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef ptr, lo, hi;
-
- if (HAVE_32BIT_POINTERS) {
- ptr = LLVMGetParam(ctx->main_fn, param);
- ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i32, "");
- return LLVMBuildInsertValue(builder, ret, ptr, return_index, "");
- }
-
- ptr = LLVMGetParam(ctx->main_fn, param);
- ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i64, "");
- ptr = LLVMBuildBitCast(builder, ptr, ctx->v2i32, "");
- lo = LLVMBuildExtractElement(builder, ptr, ctx->i32_0, "");
- hi = LLVMBuildExtractElement(builder, ptr, ctx->i32_1, "");
- ret = LLVMBuildInsertValue(builder, ret, lo, return_index, "");
- return LLVMBuildInsertValue(builder, ret, hi, return_index + 1, "");
+ LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, param);
+ ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i32, "");
+ return LLVMBuildInsertValue(builder, ret, ptr, return_index, "");
}
/* This only writes the tessellation factor levels. */
LLVMValueRef ret = ctx->return_value;
ret = si_insert_input_ptr(ctx, ret, 0, 0);
- if (HAVE_32BIT_POINTERS)
- ret = si_insert_input_ptr(ctx, ret, 1, 1);
+ ret = si_insert_input_ptr(ctx, ret, 1, 1);
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_offset, 2);
ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_offset, 4);
ret = si_insert_input_ret(ctx, ret, ctx->param_vs_state_bits,
8 + SI_SGPR_VS_STATE_BITS);
-#if !HAVE_32BIT_POINTERS
- ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 4,
- 8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES);
-#endif
-
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout,
8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_offsets,
LLVMValueRef ret = ctx->return_value;
ret = si_insert_input_ptr(ctx, ret, 0, 0);
- if (HAVE_32BIT_POINTERS)
- ret = si_insert_input_ptr(ctx, ret, 1, 1);
+ ret = si_insert_input_ptr(ctx, ret, 1, 1);
ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2);
ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5);
ctx->param_bindless_samplers_and_images,
8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
-#if !HAVE_32BIT_POINTERS
- ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 4,
- 8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES);
-#endif
-
unsigned vgpr;
if (ctx->type == PIPE_SHADER_VERTEX)
vgpr = 8 + GFX9_VSGS_NUM_USER_SGPR;
gs_next_vertex = LLVMBuildAdd(ctx->ac.builder, gs_next_vertex, ctx->i32_1, "");
LLVMBuildStore(ctx->ac.builder, gs_next_vertex, ctx->gs_next_vertex[stream]);
- /* Signal vertex emission */
- ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
- si_get_gs_wave_id(ctx));
+ /* Signal vertex emission if vertex data was written. */
+ if (offset) {
+ ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
+ si_get_gs_wave_id(ctx));
+ }
+
if (!use_kill)
lp_build_endif(&if_state);
}
}
}
+static void declare_vs_blit_inputs(struct si_shader_context *ctx,
+ struct si_function_info *fninfo,
+ unsigned vs_blit_property)
+{
+ ctx->param_vs_blit_inputs = fninfo->num_params;
+ add_arg(fninfo, ARG_SGPR, ctx->i32); /* i16 x1, y1 */
+ add_arg(fninfo, ARG_SGPR, ctx->i32); /* i16 x2, y2 */
+ add_arg(fninfo, ARG_SGPR, ctx->f32); /* depth */
+
+ if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_COLOR) {
+ add_arg(fninfo, ARG_SGPR, ctx->f32); /* color0 */
+ add_arg(fninfo, ARG_SGPR, ctx->f32); /* color1 */
+ add_arg(fninfo, ARG_SGPR, ctx->f32); /* color2 */
+ add_arg(fninfo, ARG_SGPR, ctx->f32); /* color3 */
+ } else if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_TEXCOORD) {
+ add_arg(fninfo, ARG_SGPR, ctx->f32); /* texcoord.x1 */
+ add_arg(fninfo, ARG_SGPR, ctx->f32); /* texcoord.y1 */
+ add_arg(fninfo, ARG_SGPR, ctx->f32); /* texcoord.x2 */
+ add_arg(fninfo, ARG_SGPR, ctx->f32); /* texcoord.y2 */
+ add_arg(fninfo, ARG_SGPR, ctx->f32); /* texcoord.z */
+ add_arg(fninfo, ARG_SGPR, ctx->f32); /* texcoord.w */
+ }
+}
+
static void declare_tes_input_vgprs(struct si_shader_context *ctx,
struct si_function_info *fninfo)
{
declare_global_desc_pointers(ctx, &fninfo);
if (vs_blit_property) {
- ctx->param_vs_blit_inputs = fninfo.num_params;
- add_arg(&fninfo, ARG_SGPR, ctx->i32); /* i16 x1, y1 */
- add_arg(&fninfo, ARG_SGPR, ctx->i32); /* i16 x2, y2 */
- add_arg(&fninfo, ARG_SGPR, ctx->f32); /* depth */
-
- if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_COLOR) {
- add_arg(&fninfo, ARG_SGPR, ctx->f32); /* color0 */
- add_arg(&fninfo, ARG_SGPR, ctx->f32); /* color1 */
- add_arg(&fninfo, ARG_SGPR, ctx->f32); /* color2 */
- add_arg(&fninfo, ARG_SGPR, ctx->f32); /* color3 */
- } else if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_TEXCOORD) {
- add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.x1 */
- add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.y1 */
- add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.x2 */
- add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.y2 */
- add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.z */
- add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.w */
- }
+ declare_vs_blit_inputs(ctx, &fninfo, vs_blit_property);
/* VGPRs */
declare_vs_input_vgprs(ctx, &fninfo, &num_prolog_vgprs);
case SI_SHADER_MERGED_VERTEX_TESSCTRL:
/* Merged stages have 8 system SGPRs at the beginning. */
/* SPI_SHADER_USER_DATA_ADDR_LO/HI_HS */
- if (HAVE_32BIT_POINTERS) {
- declare_per_stage_desc_pointers(ctx, &fninfo,
- ctx->type == PIPE_SHADER_TESS_CTRL);
- } else {
- declare_const_and_shader_buffers(ctx, &fninfo,
- ctx->type == PIPE_SHADER_TESS_CTRL);
- }
+ declare_per_stage_desc_pointers(ctx, &fninfo,
+ ctx->type == PIPE_SHADER_TESS_CTRL);
ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->type == PIPE_SHADER_VERTEX);
declare_vs_specific_input_sgprs(ctx, &fninfo);
- if (!HAVE_32BIT_POINTERS) {
- declare_samplers_and_images(ctx, &fninfo,
- ctx->type == PIPE_SHADER_TESS_CTRL);
- }
ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
- if (!HAVE_32BIT_POINTERS) /* Align to 2 dwords. */
- add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR,
ac_array_in_const32_addr_space(ctx->v4i32));
case SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY:
/* Merged stages have 8 system SGPRs at the beginning. */
/* SPI_SHADER_USER_DATA_ADDR_LO/HI_GS */
- if (HAVE_32BIT_POINTERS) {
- declare_per_stage_desc_pointers(ctx, &fninfo,
- ctx->type == PIPE_SHADER_GEOMETRY);
- } else {
- declare_const_and_shader_buffers(ctx, &fninfo,
- ctx->type == PIPE_SHADER_GEOMETRY);
- }
+ declare_per_stage_desc_pointers(ctx, &fninfo,
+ ctx->type == PIPE_SHADER_GEOMETRY);
ctx->param_gs2vs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tes_offchip_addr = add_arg(&fninfo, ARG_SGPR, ctx->i32);
/* Declare as many input SGPRs as the VS has. */
- if (!HAVE_32BIT_POINTERS)
- add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
}
- if (!HAVE_32BIT_POINTERS) {
- declare_samplers_and_images(ctx, &fninfo,
- ctx->type == PIPE_SHADER_GEOMETRY);
- }
if (ctx->type == PIPE_SHADER_VERTEX) {
ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR,
ac_array_in_const32_addr_space(ctx->v4i32));
shader->selector->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0)
ctx->param_block_size = add_arg(&fninfo, ARG_SGPR, v3i32);
+ unsigned cs_user_data_dwords =
+ shader->selector->info.properties[TGSI_PROPERTY_CS_USER_DATA_DWORDS];
+ if (cs_user_data_dwords) {
+ ctx->param_cs_user_data = add_arg(&fninfo, ARG_SGPR,
+ LLVMVectorType(ctx->i32, cs_user_data_dwords));
+ }
+
for (i = 0; i < 3; i++) {
ctx->abi.workgroup_ids[i] = NULL;
if (shader->selector->info.uses_block_id[i])
!mainb->rodata_size);
assert(!epilog || !epilog->rodata_size);
- r600_resource_reference(&shader->bo, NULL);
+ si_resource_reference(&shader->bo, NULL);
shader->bo = si_aligned_buffer_create(&sscreen->b,
sscreen->cpdma_prefetch_writes_memory ?
0 : SI_RESOURCE_FLAG_READ_ONLY,
/* Upload. */
ptr = sscreen->ws->buffer_map(shader->bo->buf, NULL,
PIPE_TRANSFER_READ_WRITE |
- PIPE_TRANSFER_UNSYNCHRONIZED);
+ PIPE_TRANSFER_UNSYNCHRONIZED |
+ RADEON_TRANSFER_TEMPORARY);
/* Don't use util_memcpy_cpu_to_le32. LLVM binaries are
* endian-independent. */
if (r != 0) {
FREE(shader);
shader = NULL;
+ } else {
+ si_fix_resource_usage(sscreen, shader);
}
return shader;
}
if (sel->force_correct_derivs_after_kill) {
ctx->postponed_kill = ac_build_alloca_undef(&ctx->ac, ctx->i1, "");
/* true = don't kill. */
- LLVMBuildStore(ctx->ac.builder, LLVMConstInt(ctx->i1, 1, 0),
+ LLVMBuildStore(ctx->ac.builder, ctx->i1true,
ctx->postponed_kill);
}
if (LLVMTypeOf(arg) != param_type) {
if (LLVMGetTypeKind(param_type) == LLVMPointerTypeKind) {
if (LLVMGetPointerAddressSpace(param_type) ==
- AC_CONST_32BIT_ADDR_SPACE) {
+ AC_ADDR_SPACE_CONST_32BIT) {
arg = LLVMBuildBitCast(builder, arg, ctx->i32, "");
arg = LLVMBuildIntToPtr(builder, arg, param_type, "");
} else {
LLVMValueRef ptr[2], list;
bool merged_shader = is_merged_shader(ctx);
- if (HAVE_32BIT_POINTERS) {
- ptr[0] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS);
- list = LLVMBuildIntToPtr(ctx->ac.builder, ptr[0],
- ac_array_in_const32_addr_space(ctx->v4i32), "");
- return list;
- }
-
- /* Get the pointer to rw buffers. */
ptr[0] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS);
- ptr[1] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS + 1);
- list = ac_build_gather_values(&ctx->ac, ptr, 2);
- list = LLVMBuildBitCast(ctx->ac.builder, list, ctx->i64, "");
- list = LLVMBuildIntToPtr(ctx->ac.builder, list,
- ac_array_in_const_addr_space(ctx->v4i32), "");
+ list = LLVMBuildIntToPtr(ctx->ac.builder, ptr[0],
+ ac_array_in_const32_addr_space(ctx->v4i32), "");
return list;
}
key->vs_prolog.states.instance_divisor_is_one & (1u << i);
bool divisor_is_fetched =
key->vs_prolog.states.instance_divisor_is_fetched & (1u << i);
- LLVMValueRef index;
-
- if (divisor_is_one || divisor_is_fetched) {
- LLVMValueRef divisor = ctx->i32_1;
-
- if (divisor_is_fetched) {
- divisor = buffer_load_const(ctx, instance_divisor_constbuf,
- LLVMConstInt(ctx->i32, i * 4, 0));
- divisor = ac_to_integer(&ctx->ac, divisor);
+ LLVMValueRef index = NULL;
+
+ if (divisor_is_one) {
+ index = ctx->abi.instance_id;
+ } else if (divisor_is_fetched) {
+ LLVMValueRef udiv_factors[4];
+
+ for (unsigned j = 0; j < 4; j++) {
+ udiv_factors[j] =
+ buffer_load_const(ctx, instance_divisor_constbuf,
+ LLVMConstInt(ctx->i32, i*16 + j*4, 0));
+ udiv_factors[j] = ac_to_integer(&ctx->ac, udiv_factors[j]);
}
+ /* The faster NUW version doesn't work when InstanceID == UINT_MAX.
+ * Such InstanceID might not be achievable in a reasonable time though.
+ */
+ index = ac_build_fast_udiv_nuw(&ctx->ac, ctx->abi.instance_id,
+ udiv_factors[0], udiv_factors[1],
+ udiv_factors[2], udiv_factors[3]);
+ }
- /* InstanceID / Divisor + StartInstance */
- index = get_instance_index_for_fetch(ctx,
- user_sgpr_base +
- SI_SGPR_START_INSTANCE,
- divisor);
+ if (divisor_is_one || divisor_is_fetched) {
+ /* Add StartInstance. */
+ index = LLVMBuildAdd(ctx->ac.builder, index,
+ LLVMGetParam(ctx->main_fn, user_sgpr_base +
+ SI_SGPR_START_INSTANCE), "");
} else {
/* VertexID + BaseVertex */
index = LLVMBuildAdd(ctx->ac.builder,
add_arg(&fninfo, ARG_SGPR, ctx->i32);
add_arg(&fninfo, ARG_SGPR, ctx->i32);
add_arg(&fninfo, ARG_SGPR, ctx->i32);
- if (!HAVE_32BIT_POINTERS)
- add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
void si_shader_destroy(struct si_shader *shader)
{
if (shader->scratch_bo)
- r600_resource_reference(&shader->scratch_bo, NULL);
+ si_resource_reference(&shader->scratch_bo, NULL);
- r600_resource_reference(&shader->bo, NULL);
+ si_resource_reference(&shader->bo, NULL);
if (!shader->is_binary_shared)
ac_shader_binary_clean(&shader->binary);