LLVMValueRef *ssa_defs;
+ LLVMValueRef scratch;
+
struct hash_table *defs;
struct hash_table *phis;
struct hash_table *vars;
LLVMValueRef compare,
LLVMValueRef exchange)
{
- LLVMValueRef size = ac_llvm_extract_elem(&ctx->ac, descriptor, 2);
+ LLVMBasicBlockRef start_block = NULL, then_block = NULL;
+ if (ctx->abi->robust_buffer_access) {
+ LLVMValueRef size = ac_llvm_extract_elem(&ctx->ac, descriptor, 2);
- LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");
- LLVMBasicBlockRef start_block = LLVMGetInsertBlock(ctx->ac.builder);
+ LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");
+ start_block = LLVMGetInsertBlock(ctx->ac.builder);
- ac_build_ifcc(&ctx->ac, cond, -1);
+ ac_build_ifcc(&ctx->ac, cond, -1);
- LLVMBasicBlockRef then_block = LLVMGetInsertBlock(ctx->ac.builder);
+ then_block = LLVMGetInsertBlock(ctx->ac.builder);
+ }
LLVMValueRef ptr_parts[2] = {
ac_llvm_extract_elem(&ctx->ac, descriptor, 0),
LLVMValueRef result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, compare, exchange, "singlethread-one-as");
result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
- ac_build_endif(&ctx->ac, -1);
+ if (ctx->abi->robust_buffer_access) {
+ ac_build_endif(&ctx->ac, -1);
- LLVMBasicBlockRef incoming_blocks[2] = {
- start_block,
- then_block,
- };
+ LLVMBasicBlockRef incoming_blocks[2] = {
+ start_block,
+ then_block,
+ };
- LLVMValueRef incoming_values[2] = {
- LLVMConstInt(ctx->ac.i64, 0, 0),
- result,
- };
- LLVMValueRef ret = LLVMBuildPhi(ctx->ac.builder, ctx->ac.i64, "");
- LLVMAddIncoming(ret, incoming_values, incoming_blocks, 2);
- return ret;
+ LLVMValueRef incoming_values[2] = {
+ LLVMConstInt(ctx->ac.i64, 0, 0),
+ result,
+ };
+ LLVMValueRef ret = LLVMBuildPhi(ctx->ac.builder, ctx->ac.i64, "");
+ LLVMAddIncoming(ret, incoming_values, incoming_blocks, 2);
+ return ret;
+ } else {
+ return result;
+ }
}
static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
case nir_intrinsic_mbcnt_amd:
result = ac_build_mbcnt(&ctx->ac, get_src(ctx, instr->src[0]));
break;
+ case nir_intrinsic_load_scratch: {
+ LLVMValueRef offset = get_src(ctx, instr->src[0]);
+ LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch,
+ offset);
+ LLVMTypeRef comp_type =
+ LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
+ LLVMTypeRef vec_type =
+ instr->dest.ssa.num_components == 1 ? comp_type :
+ LLVMVectorType(comp_type, instr->dest.ssa.num_components);
+ unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
+ ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
+ LLVMPointerType(vec_type, addr_space), "");
+ result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
+ break;
+ }
+ case nir_intrinsic_store_scratch: {
+ LLVMValueRef offset = get_src(ctx, instr->src[1]);
+ LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch,
+ offset);
+ LLVMTypeRef comp_type =
+ LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size);
+ LLVMTypeRef vec_type =
+ instr->src[0].ssa->num_components == 1 ? comp_type :
+ LLVMVectorType(comp_type, instr->src[0].ssa->num_components);
+ unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
+ ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
+ LLVMPointerType(vec_type, addr_space), "");
+ LLVMBuildStore(ctx->ac.builder, get_src(ctx, instr->src[0]), ptr);
+ break;
+ }
default:
fprintf(stderr, "Unknown intrinsic: ");
nir_print_instr(&instr->instr, stderr);
}
}
+static void
+setup_scratch(struct ac_nir_context *ctx,
+ struct nir_shader *shader)
+{
+ if (shader->scratch_size == 0)
+ return;
+
+ ctx->scratch = ac_build_alloca_undef(&ctx->ac,
+ LLVMArrayType(ctx->ac.i8, shader->scratch_size),
+ "scratch");
+}
+
static void
setup_shared(struct ac_nir_context *ctx,
struct nir_shader *nir)
ctx.ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
setup_locals(&ctx, func);
+ setup_scratch(&ctx, nir);
if (gl_shader_stage_is_compute(nir->info.stage))
setup_shared(&ctx, nir);
void
ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class)
{
+ /* Lower large variables to scratch first so that we won't bloat the
+ * shader by generating large if ladders for them. We later lower
+ * scratch to alloca's, assuming LLVM won't generate VGPR indexing.
+ */
+ NIR_PASS_V(nir, nir_lower_vars_to_scratch,
+ nir_var_function_temp,
+ 256,
+ glsl_get_natural_size_align_bytes);
+
/* While it would be nice not to have this flag, we are constrained
* by the reality that LLVM 9.0 has buggy VGPR indexing on GFX9.
*/