"");
}
+static LLVMValueRef build_gep0(struct si_shader_context *ctx,
+ LLVMValueRef base_ptr, LLVMValueRef index)
+{
+ LLVMValueRef indices[2] = {
+ LLVMConstInt(ctx->i32, 0, 0),
+ index,
+ };
+ return LLVMBuildGEP(ctx->radeon_bld.gallivm.builder, base_ptr,
+ indices, 2, "");
+}
+
static void build_indexed_store(struct si_shader_context *ctx,
LLVMValueRef base_ptr, LLVMValueRef index,
LLVMValueRef value)
{
struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
struct gallivm_state *gallivm = bld_base->base.gallivm;
- LLVMValueRef indices[2], pointer;
-
- indices[0] = bld_base->uint_bld.zero;
- indices[1] = index;
- pointer = LLVMBuildGEP(gallivm->builder, base_ptr, indices, 2, "");
- LLVMBuildStore(gallivm->builder, value, pointer);
+ LLVMBuildStore(gallivm->builder, value,
+ build_gep0(ctx, base_ptr, index));
}
/**
{
struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
struct gallivm_state *gallivm = bld_base->base.gallivm;
- LLVMValueRef indices[2], pointer;
-
- indices[0] = bld_base->uint_bld.zero;
- indices[1] = index;
+ LLVMValueRef pointer;
- pointer = LLVMBuildGEP(gallivm->builder, base_ptr, indices, 2, "");
+ pointer = build_gep0(ctx, base_ptr, index);
if (uniform)
LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
return LLVMBuildLoad(gallivm->builder, pointer, "");
LLVMValueRef values[3];
unsigned i;
unsigned *properties = ctx->shader->selector->info.properties;
- unsigned sizes[3] = {
- properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH],
- properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT],
- properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH]
- };
- for (i = 0; i < 3; ++i)
- values[i] = lp_build_const_int32(gallivm, sizes[i]);
+ if (properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] != 0) {
+ unsigned sizes[3] = {
+ properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH],
+ properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT],
+ properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH]
+ };
+
+ for (i = 0; i < 3; ++i)
+ values[i] = lp_build_const_int32(gallivm, sizes[i]);
- value = lp_build_gather_values(gallivm, values, 3);
+ value = lp_build_gather_values(gallivm, values, 3);
+ } else {
+ value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_BLOCK_SIZE);
+ }
break;
}
result = bitcast(bld_base, type, result);
else {
LLVMValueRef addr2, result2;
- addr2 = ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle + 1];
+ addr2 = ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle];
addr2 = LLVMBuildLoad(base->gallivm->builder, addr2, "load addr reg2");
addr2 = lp_build_mul_imm(&bld_base->uint_bld, addr2, 16);
addr2 = lp_build_add(&bld_base->uint_bld, addr2,
- lp_build_const_int32(base->gallivm, idx * 4));
+ lp_build_const_int32(base->gallivm, (idx + 1) * 4));
result2 = buffer_load_const(ctx, bufp, addr2);
* Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
* intrinsic names).
*/
-static void build_int_type_name(
+static void build_type_name_for_intr(
LLVMTypeRef type,
char *buf, unsigned bufsize)
{
- assert(bufsize >= 6);
+ LLVMTypeRef elem_type = type;
- if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
- snprintf(buf, bufsize, "v%ui32",
- LLVMGetVectorSize(type));
- else
- strcpy(buf, "i32");
+ assert(bufsize >= 8);
+
+ if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
+ int ret = snprintf(buf, bufsize, "v%u",
+ LLVMGetVectorSize(type));
+ if (ret < 0) {
+ char *type_name = LLVMPrintTypeToString(type);
+ fprintf(stderr, "Error building type name for: %s\n",
+ type_name);
+ return;
+ }
+ elem_type = LLVMGetElementType(type);
+ buf += ret;
+ bufsize -= ret;
+ }
+ switch (LLVMGetTypeKind(elem_type)) {
+ default: break;
+ case LLVMIntegerTypeKind:
+ snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type));
+ break;
+ case LLVMFloatTypeKind:
+ snprintf(buf, bufsize, "f32");
+ break;
+ case LLVMDoubleTypeKind:
+ snprintf(buf, bufsize, "f64");
+ break;
+ }
}
static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
const struct tgsi_full_instruction *inst = emit_data->inst;
LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
-
- emit_data->args[emit_data->arg_count++] = i1false; /* r128 */
- emit_data->args[emit_data->arg_count++] =
- tgsi_is_array_image(target) ? i1true : i1false; /* da */
- if (!atomic) {
- emit_data->args[emit_data->arg_count++] =
- inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ?
- i1true : i1false; /* glc */
+ LLVMValueRef r128 = i1false;
+ LLVMValueRef da = tgsi_is_array_image(target) ? i1true : i1false;
+ LLVMValueRef glc =
+ inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ?
+ i1true : i1false;
+ LLVMValueRef slc = i1false;
+ LLVMValueRef lwe = i1false;
+
+ if (atomic || (HAVE_LLVM <= 0x0309)) {
+ emit_data->args[emit_data->arg_count++] = r128;
+ emit_data->args[emit_data->arg_count++] = da;
+ if (!atomic) {
+ emit_data->args[emit_data->arg_count++] = glc;
+ }
+ emit_data->args[emit_data->arg_count++] = slc;
+ return;
}
- emit_data->args[emit_data->arg_count++] = i1false; /* slc */
+
+ /* HAVE_LLVM >= 0x0400 */
+ emit_data->args[emit_data->arg_count++] = glc;
+ emit_data->args[emit_data->arg_count++] = slc;
+ emit_data->args[emit_data->arg_count++] = lwe;
+ emit_data->args[emit_data->arg_count++] = da;
}
/**
emit_data->output[emit_data->chan] = lp_build_gather_values(gallivm, channels, 4);
}
+static void get_image_intr_name(const char *base_name,
+ LLVMTypeRef data_type,
+ LLVMTypeRef coords_type,
+ LLVMTypeRef rsrc_type,
+ char *out_name, unsigned out_len)
+{
+ char coords_type_name[8];
+
+ build_type_name_for_intr(coords_type, coords_type_name,
+ sizeof(coords_type_name));
+
+ if (HAVE_LLVM <= 0x0309) {
+ snprintf(out_name, out_len, "%s.%s", base_name, coords_type_name);
+ } else {
+ char data_type_name[8];
+ char rsrc_type_name[8];
+
+ build_type_name_for_intr(data_type, data_type_name,
+ sizeof(data_type_name));
+ build_type_name_for_intr(rsrc_type, rsrc_type_name,
+ sizeof(rsrc_type_name));
+ snprintf(out_name, out_len, "%s.%s.%s.%s", base_name,
+ data_type_name, coords_type_name, rsrc_type_name);
+ }
+}
+
static void load_emit(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
const struct tgsi_full_instruction * inst = emit_data->inst;
- char intrinsic_name[32];
- char coords_type[8];
+ char intrinsic_name[64];
if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
load_emit_memory(ctx, emit_data);
emit_data->args, emit_data->arg_count,
LLVMReadOnlyAttribute);
} else {
- build_int_type_name(LLVMTypeOf(emit_data->args[0]),
- coords_type, sizeof(coords_type));
-
- snprintf(intrinsic_name, sizeof(intrinsic_name),
- "llvm.amdgcn.image.load.%s", coords_type);
+ get_image_intr_name("llvm.amdgcn.image.load",
+ emit_data->dst_type, /* vdata */
+ LLVMTypeOf(emit_data->args[0]), /* coords */
+ LLVMTypeOf(emit_data->args[1]), /* rsrc */
+ intrinsic_name, sizeof(intrinsic_name));
emit_data->output[emit_data->chan] =
lp_build_intrinsic(
LLVMBuilderRef builder = gallivm->builder;
const struct tgsi_full_instruction * inst = emit_data->inst;
unsigned target = inst->Memory.Texture;
- char intrinsic_name[32];
- char coords_type[8];
+ char intrinsic_name[64];
if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) {
store_emit_memory(ctx, emit_data);
emit_data->dst_type, emit_data->args,
emit_data->arg_count, 0);
} else {
- build_int_type_name(LLVMTypeOf(emit_data->args[1]),
- coords_type, sizeof(coords_type));
- snprintf(intrinsic_name, sizeof(intrinsic_name),
- "llvm.amdgcn.image.store.%s", coords_type);
+ get_image_intr_name("llvm.amdgcn.image.store",
+ LLVMTypeOf(emit_data->args[0]), /* vdata */
+ LLVMTypeOf(emit_data->args[1]), /* coords */
+ LLVMTypeOf(emit_data->args[2]), /* rsrc */
+ intrinsic_name, sizeof(intrinsic_name));
emit_data->output[emit_data->chan] =
lp_build_intrinsic(
snprintf(intrinsic_name, sizeof(intrinsic_name),
"llvm.amdgcn.buffer.atomic.%s", action->intr_name);
} else {
+ LLVMValueRef coords;
char coords_type[8];
- build_int_type_name(LLVMTypeOf(emit_data->args[1]),
- coords_type, sizeof(coords_type));
+ if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
+ coords = emit_data->args[2];
+ else
+ coords = emit_data->args[1];
+
+ build_type_name_for_intr(LLVMTypeOf(coords), coords_type, sizeof(coords_type));
snprintf(intrinsic_name, sizeof(intrinsic_name),
"llvm.amdgcn.image.atomic.%s.%s",
action->intr_name, coords_type);
/* Pack depth comparison value */
if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) {
+ LLVMValueRef z;
+
if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
- address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
+ z = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
} else {
assert(ref_pos >= 0);
- address[count++] = coords[ref_pos];
+ z = coords[ref_pos];
}
+
+ /* TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
+ * so the depth comparison value isn't clamped for Z16 and
+ * Z24 anymore. Do it manually here.
+ *
+ * It's unnecessary if the original texture format was
+ * Z32_FLOAT, but we don't know that here.
+ */
+ if (ctx->screen->b.chip_class == VI)
+ z = radeon_llvm_saturate(bld_base, z);
+
+ address[count++] = z;
}
/* Pack user derivatives */
}
/* Add the type and suffixes .c, .o if needed. */
- build_int_type_name(LLVMTypeOf(emit_data->args[0]), type, sizeof(type));
+ build_type_name_for_intr(LLVMTypeOf(emit_data->args[0]), type, sizeof(type));
sprintf(intr_name, "%s%s%s%s.%s",
name, is_shadow ? ".c" : "", infix,
has_offset ? ".o" : "", type);
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
- const struct tgsi_full_instruction *inst = emit_data->inst;
- unsigned opcode = inst->Instruction.Opcode;
- LLVMValueRef indices[2];
- LLVMValueRef store_ptr, load_ptr0, load_ptr1;
- LLVMValueRef tl, trbl, result[4];
- LLVMValueRef tl_tid, trbl_tid;
- unsigned swizzle[4];
- unsigned c;
+ unsigned opcode = emit_data->info->opcode;
+ LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, val, args[2];
int idx;
unsigned mask;
- indices[0] = bld_base->uint_bld.zero;
- indices[1] = get_thread_id(ctx);
- store_ptr = LLVMBuildGEP(gallivm->builder, ctx->lds,
- indices, 2, "");
+ thread_id = get_thread_id(ctx);
if (opcode == TGSI_OPCODE_DDX_FINE)
mask = TID_MASK_LEFT;
else
mask = TID_MASK_TOP_LEFT;
- tl_tid = LLVMBuildAnd(gallivm->builder, indices[1],
+ tl_tid = LLVMBuildAnd(gallivm->builder, thread_id,
lp_build_const_int32(gallivm, mask), "");
- indices[1] = tl_tid;
- load_ptr0 = LLVMBuildGEP(gallivm->builder, ctx->lds,
- indices, 2, "");
/* for DDX we want to next X pixel, DDY next Y pixel. */
idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 : 2;
- trbl_tid = LLVMBuildAdd(gallivm->builder, indices[1],
+ trbl_tid = LLVMBuildAdd(gallivm->builder, tl_tid,
lp_build_const_int32(gallivm, idx), "");
- indices[1] = trbl_tid;
- load_ptr1 = LLVMBuildGEP(gallivm->builder, ctx->lds,
- indices, 2, "");
-
- for (c = 0; c < 4; ++c) {
- unsigned i;
- LLVMValueRef val;
- LLVMValueRef args[2];
-
- swizzle[c] = tgsi_util_get_full_src_register_swizzle(&inst->Src[0], c);
- for (i = 0; i < c; ++i) {
- if (swizzle[i] == swizzle[c]) {
- result[c] = result[i];
- break;
- }
- }
- if (i != c)
- continue;
-
- val = LLVMBuildBitCast(gallivm->builder,
- lp_build_emit_fetch(bld_base, inst, 0, c),
- ctx->i32, "");
- if ((HAVE_LLVM >= 0x0309) && ctx->screen->b.family >= CHIP_TONGA) {
+ val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0], ctx->i32, "");
- args[0] = LLVMBuildMul(gallivm->builder, tl_tid,
- lp_build_const_int32(gallivm, 4), "");
- args[1] = val;
- tl = lp_build_intrinsic(gallivm->builder,
+ if (ctx->screen->has_ds_bpermute) {
+ args[0] = LLVMBuildMul(gallivm->builder, tl_tid,
+ lp_build_const_int32(gallivm, 4), "");
+ args[1] = val;
+ tl = lp_build_intrinsic(gallivm->builder,
"llvm.amdgcn.ds.bpermute", ctx->i32,
args, 2, LLVMReadNoneAttribute);
- args[0] = LLVMBuildMul(gallivm->builder, trbl_tid,
- lp_build_const_int32(gallivm, 4), "");
- trbl = lp_build_intrinsic(gallivm->builder,
- "llvm.amdgcn.ds.bpermute", ctx->i32,
- args, 2, LLVMReadNoneAttribute);
- } else {
- LLVMBuildStore(gallivm->builder, val, store_ptr);
- tl = LLVMBuildLoad(gallivm->builder, load_ptr0, "");
- trbl = LLVMBuildLoad(gallivm->builder, load_ptr1, "");
- }
- tl = LLVMBuildBitCast(gallivm->builder, tl, ctx->f32, "");
- trbl = LLVMBuildBitCast(gallivm->builder, trbl, ctx->f32, "");
- result[c] = LLVMBuildFSub(gallivm->builder, trbl, tl, "");
+ args[0] = LLVMBuildMul(gallivm->builder, trbl_tid,
+ lp_build_const_int32(gallivm, 4), "");
+ trbl = lp_build_intrinsic(gallivm->builder,
+ "llvm.amdgcn.ds.bpermute", ctx->i32,
+ args, 2, LLVMReadNoneAttribute);
+ } else {
+ LLVMValueRef store_ptr, load_ptr0, load_ptr1;
+
+ store_ptr = build_gep0(ctx, ctx->lds, thread_id);
+ load_ptr0 = build_gep0(ctx, ctx->lds, tl_tid);
+ load_ptr1 = build_gep0(ctx, ctx->lds, trbl_tid);
+
+ LLVMBuildStore(gallivm->builder, val, store_ptr);
+ tl = LLVMBuildLoad(gallivm->builder, load_ptr0, "");
+ trbl = LLVMBuildLoad(gallivm->builder, load_ptr1, "");
}
- emit_data->output[0] = lp_build_gather_values(gallivm, result, 4);
+ tl = LLVMBuildBitCast(gallivm->builder, tl, ctx->f32, "");
+ trbl = LLVMBuildBitCast(gallivm->builder, trbl, ctx->f32, "");
+
+ emit_data->output[emit_data->chan] =
+ LLVMBuildFSub(gallivm->builder, trbl, tl, "");
}
/*
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
- LLVMValueRef indices[2];
- LLVMValueRef store_ptr, load_ptr_x, load_ptr_y, load_ptr_ddx, load_ptr_ddy, temp, temp2;
- LLVMValueRef tl, tr, bl, result[4];
- unsigned c;
-
- indices[0] = bld_base->uint_bld.zero;
- indices[1] = get_thread_id(ctx);
- store_ptr = LLVMBuildGEP(gallivm->builder, ctx->lds,
- indices, 2, "");
-
- temp = LLVMBuildAnd(gallivm->builder, indices[1],
- lp_build_const_int32(gallivm, TID_MASK_LEFT), "");
-
- temp2 = LLVMBuildAnd(gallivm->builder, indices[1],
- lp_build_const_int32(gallivm, TID_MASK_TOP), "");
-
- indices[1] = temp;
- load_ptr_x = LLVMBuildGEP(gallivm->builder, ctx->lds,
- indices, 2, "");
-
- indices[1] = temp2;
- load_ptr_y = LLVMBuildGEP(gallivm->builder, ctx->lds,
- indices, 2, "");
-
- indices[1] = LLVMBuildAdd(gallivm->builder, temp,
- lp_build_const_int32(gallivm, 1), "");
- load_ptr_ddx = LLVMBuildGEP(gallivm->builder, ctx->lds,
- indices, 2, "");
-
- indices[1] = LLVMBuildAdd(gallivm->builder, temp2,
- lp_build_const_int32(gallivm, 2), "");
- load_ptr_ddy = LLVMBuildGEP(gallivm->builder, ctx->lds,
- indices, 2, "");
-
- for (c = 0; c < 2; ++c) {
- LLVMValueRef store_val;
- LLVMValueRef c_ll = lp_build_const_int32(gallivm, c);
-
- store_val = LLVMBuildExtractElement(gallivm->builder,
- interp_ij, c_ll, "");
- LLVMBuildStore(gallivm->builder,
- store_val,
- store_ptr);
-
- tl = LLVMBuildLoad(gallivm->builder, load_ptr_x, "");
- tl = LLVMBuildBitCast(gallivm->builder, tl, ctx->f32, "");
-
- tr = LLVMBuildLoad(gallivm->builder, load_ptr_ddx, "");
- tr = LLVMBuildBitCast(gallivm->builder, tr, ctx->f32, "");
-
- result[c] = LLVMBuildFSub(gallivm->builder, tr, tl, "");
-
- tl = LLVMBuildLoad(gallivm->builder, load_ptr_y, "");
- tl = LLVMBuildBitCast(gallivm->builder, tl, ctx->f32, "");
-
- bl = LLVMBuildLoad(gallivm->builder, load_ptr_ddy, "");
- bl = LLVMBuildBitCast(gallivm->builder, bl, ctx->f32, "");
+ LLVMValueRef result[4], a;
+ unsigned i;
- result[c + 2] = LLVMBuildFSub(gallivm->builder, bl, tl, "");
+ for (i = 0; i < 2; i++) {
+ a = LLVMBuildExtractElement(gallivm->builder, interp_ij,
+ LLVMConstInt(ctx->i32, i, 0), "");
+ result[i] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_DDX, a);
+ result[2+i] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_DDY, a);
}
return lp_build_gather_values(gallivm, result, 4);
}
intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
- for (chan = 0; chan < 2; chan++) {
+ for (chan = 0; chan < 4; chan++) {
LLVMValueRef args[4];
LLVMValueRef llvm_chan;
unsigned schan;
case PIPE_SHADER_COMPUTE:
params[SI_PARAM_GRID_SIZE] = v3i32;
+ params[SI_PARAM_BLOCK_SIZE] = v3i32;
params[SI_PARAM_BLOCK_ID] = v3i32;
last_sgpr = SI_PARAM_BLOCK_ID;
properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] *
properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
- assert(max_work_group_size);
+ if (!max_work_group_size) {
+ /* This is a variable group size compute shader,
+ * compile it for the maximum possible group size.
+ */
+ max_work_group_size = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
+ }
radeon_llvm_add_attribute(ctx->radeon_bld.main_fn,
"amdgpu-max-work-group-size",
for (; i < num_params; ++i)
shader->info.num_input_vgprs += llvm_get_type_size(params[i]) / 4;
- if (bld_base->info &&
+ if (!ctx->screen->has_ds_bpermute &&
+ bld_base->info &&
(bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 ||
bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 ||
r600_can_dump_shader(&sscreen->b, PIPE_SHADER_GEOMETRY))
LLVMDumpModule(bld_base->base.gallivm->module);
- radeon_llvm_finalize_module(&ctx->radeon_bld);
+ radeon_llvm_finalize_module(
+ &ctx->radeon_bld,
+ r600_extra_shader_checks(&sscreen->b, PIPE_SHADER_GEOMETRY));
r = si_compile_llvm(sscreen, &ctx->shader->binary,
&ctx->shader->config, ctx->tm,
r600_can_dump_shader(&sscreen->b, ctx.type))
LLVMDumpModule(mod);
- radeon_llvm_finalize_module(&ctx.radeon_bld);
+ radeon_llvm_finalize_module(
+ &ctx.radeon_bld,
+ r600_extra_shader_checks(&sscreen->b, ctx.type));
r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm,
mod, debug, ctx.type, "TGSI shader");
unsigned max_vgprs = 256;
unsigned max_sgprs = sscreen->b.chip_class >= VI ? 800 : 512;
unsigned max_sgprs_per_wave = 128;
- unsigned min_waves_per_cu =
- DIV_ROUND_UP(props[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] *
- props[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] *
- props[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH],
- wave_size);
+ unsigned max_block_threads;
+
+ if (props[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH])
+ max_block_threads = props[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] *
+ props[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] *
+ props[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
+ else
+ max_block_threads = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
+
+ unsigned min_waves_per_cu = DIV_ROUND_UP(max_block_threads, wave_size);
unsigned min_waves_per_simd = DIV_ROUND_UP(min_waves_per_cu, 4);
max_vgprs = max_vgprs / min_waves_per_simd;
/* Compile. */
si_llvm_build_ret(&ctx, ret);
- radeon_llvm_finalize_module(&ctx.radeon_bld);
+ radeon_llvm_finalize_module(
+ &ctx.radeon_bld,
+ r600_extra_shader_checks(&sscreen->b, PIPE_SHADER_VERTEX));
if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
gallivm->module, debug, ctx.type,
/* Compile. */
LLVMBuildRetVoid(gallivm->builder);
- radeon_llvm_finalize_module(&ctx.radeon_bld);
+ radeon_llvm_finalize_module(
+ &ctx.radeon_bld,
+ r600_extra_shader_checks(&sscreen->b, PIPE_SHADER_VERTEX));
if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
gallivm->module, debug, ctx.type,
/* Compile. */
LLVMBuildRetVoid(gallivm->builder);
- radeon_llvm_finalize_module(&ctx.radeon_bld);
+ radeon_llvm_finalize_module(
+ &ctx.radeon_bld,
+ r600_extra_shader_checks(&sscreen->b, PIPE_SHADER_TESS_CTRL));
if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
gallivm->module, debug, ctx.type,
}
}
- /* Interpolate colors. */
- for (i = 0; i < 2; i++) {
- unsigned writemask = (key->ps_prolog.colors_read >> (i * 4)) & 0xf;
- unsigned face_vgpr = key->ps_prolog.num_input_sgprs +
- key->ps_prolog.face_vgpr_index;
- LLVMValueRef interp[2], color[4];
- LLVMValueRef interp_ij = NULL, prim_mask = NULL, face = NULL;
-
- if (!writemask)
- continue;
-
- /* If the interpolation qualifier is not CONSTANT (-1). */
- if (key->ps_prolog.color_interp_vgpr_index[i] != -1) {
- unsigned interp_vgpr = key->ps_prolog.num_input_sgprs +
- key->ps_prolog.color_interp_vgpr_index[i];
-
- /* Get the (i,j) updated by bc_optimize handling. */
- interp[0] = LLVMBuildExtractValue(gallivm->builder, ret,
- interp_vgpr, "");
- interp[1] = LLVMBuildExtractValue(gallivm->builder, ret,
- interp_vgpr + 1, "");
- interp_ij = lp_build_gather_values(gallivm, interp, 2);
- interp_ij = LLVMBuildBitCast(gallivm->builder, interp_ij,
- ctx.v2i32, "");
- }
-
- /* Use the absolute location of the input. */
- prim_mask = LLVMGetParam(func, SI_PS_NUM_USER_SGPR);
-
- if (key->ps_prolog.states.color_two_side) {
- face = LLVMGetParam(func, face_vgpr);
- face = LLVMBuildBitCast(gallivm->builder, face, ctx.i32, "");
- }
-
- interp_fs_input(&ctx,
- key->ps_prolog.color_attr_index[i],
- TGSI_SEMANTIC_COLOR, i,
- key->ps_prolog.num_interp_inputs,
- key->ps_prolog.colors_read, interp_ij,
- prim_mask, face, color);
-
- while (writemask) {
- unsigned chan = u_bit_scan(&writemask);
- ret = LLVMBuildInsertValue(gallivm->builder, ret, color[chan],
- num_params++, "");
- }
- }
-
/* Force per-sample interpolation. */
if (key->ps_prolog.states.force_persp_sample_interp) {
unsigned i, base = key->ps_prolog.num_input_sgprs;
linear_center[i], base + 10 + i, "");
}
+ /* Interpolate colors. */
+ for (i = 0; i < 2; i++) {
+ unsigned writemask = (key->ps_prolog.colors_read >> (i * 4)) & 0xf;
+ unsigned face_vgpr = key->ps_prolog.num_input_sgprs +
+ key->ps_prolog.face_vgpr_index;
+ LLVMValueRef interp[2], color[4];
+ LLVMValueRef interp_ij = NULL, prim_mask = NULL, face = NULL;
+
+ if (!writemask)
+ continue;
+
+ /* If the interpolation qualifier is not CONSTANT (-1). */
+ if (key->ps_prolog.color_interp_vgpr_index[i] != -1) {
+ unsigned interp_vgpr = key->ps_prolog.num_input_sgprs +
+ key->ps_prolog.color_interp_vgpr_index[i];
+
+ /* Get the (i,j) updated by bc_optimize handling. */
+ interp[0] = LLVMBuildExtractValue(gallivm->builder, ret,
+ interp_vgpr, "");
+ interp[1] = LLVMBuildExtractValue(gallivm->builder, ret,
+ interp_vgpr + 1, "");
+ interp_ij = lp_build_gather_values(gallivm, interp, 2);
+ interp_ij = LLVMBuildBitCast(gallivm->builder, interp_ij,
+ ctx.v2i32, "");
+ }
+
+ /* Use the absolute location of the input. */
+ prim_mask = LLVMGetParam(func, SI_PS_NUM_USER_SGPR);
+
+ if (key->ps_prolog.states.color_two_side) {
+ face = LLVMGetParam(func, face_vgpr);
+ face = LLVMBuildBitCast(gallivm->builder, face, ctx.i32, "");
+ }
+
+ interp_fs_input(&ctx,
+ key->ps_prolog.color_attr_index[i],
+ TGSI_SEMANTIC_COLOR, i,
+ key->ps_prolog.num_interp_inputs,
+ key->ps_prolog.colors_read, interp_ij,
+ prim_mask, face, color);
+
+ while (writemask) {
+ unsigned chan = u_bit_scan(&writemask);
+ ret = LLVMBuildInsertValue(gallivm->builder, ret, color[chan],
+ num_params++, "");
+ }
+ }
+
/* Tell LLVM to insert WQM instruction sequence when needed. */
if (key->ps_prolog.wqm) {
LLVMAddTargetDependentFunctionAttr(func,
/* Compile. */
si_llvm_build_ret(&ctx, ret);
- radeon_llvm_finalize_module(&ctx.radeon_bld);
+ radeon_llvm_finalize_module(
+ &ctx.radeon_bld,
+ r600_extra_shader_checks(&sscreen->b, PIPE_SHADER_FRAGMENT));
if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
gallivm->module, debug, ctx.type,
/* Compile. */
LLVMBuildRetVoid(gallivm->builder);
- radeon_llvm_finalize_module(&ctx.radeon_bld);
+ radeon_llvm_finalize_module(
+ &ctx.radeon_bld,
+ r600_extra_shader_checks(&sscreen->b, PIPE_SHADER_FRAGMENT));
if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
gallivm->module, debug, ctx.type,
struct si_shader *shader,
struct pipe_debug_callback *debug)
{
- struct si_shader *mainp = shader->selector->main_shader_part;
+ struct si_shader_selector *sel = shader->selector;
+ struct si_shader *mainp = sel->main_shader_part;
int r;
/* LS, ES, VS are compiled on demand if the main part hasn't been
* compiled for that stage.
*/
if (!mainp ||
- (shader->selector->type == PIPE_SHADER_VERTEX &&
+ (sel->type == PIPE_SHADER_VERTEX &&
(shader->key.vs.as_es != mainp->key.vs.as_es ||
shader->key.vs.as_ls != mainp->key.vs.as_ls)) ||
- (shader->selector->type == PIPE_SHADER_TESS_EVAL &&
+ (sel->type == PIPE_SHADER_TESS_EVAL &&
shader->key.tes.as_es != mainp->key.tes.as_es) ||
- (shader->selector->type == PIPE_SHADER_TESS_CTRL &&
+ (sel->type == PIPE_SHADER_TESS_CTRL &&
shader->key.tcs.epilog.inputs_to_copy) ||
- shader->selector->type == PIPE_SHADER_COMPUTE) {
+ sel->type == PIPE_SHADER_COMPUTE) {
/* Monolithic shader (compiled as a whole, has many variants,
* may take a long time to compile).
*/
shader->info.nr_param_exports = mainp->info.nr_param_exports;
/* Select prologs and/or epilogs. */
- switch (shader->selector->type) {
+ switch (sel->type) {
case PIPE_SHADER_VERTEX:
if (!si_shader_select_vs_parts(sscreen, tm, shader, debug))
return -1;
}
si_fix_num_sgprs(shader);
- si_shader_dump(sscreen, shader, debug, shader->selector->info.processor,
+ si_shader_dump(sscreen, shader, debug, sel->info.processor,
stderr);
/* Upload. */