Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
*/
void
ac_llvm_context_init(struct ac_llvm_context *ctx,
*/
void
ac_llvm_context_init(struct ac_llvm_context *ctx,
- enum chip_class chip_class, enum radeon_family family)
+ enum chip_class chip_class, enum radeon_family family,
+ unsigned wave_size)
ctx->chip_class = chip_class;
ctx->family = family;
ctx->chip_class = chip_class;
ctx->family = family;
+ ctx->wave_size = wave_size;
ctx->module = NULL;
ctx->builder = NULL;
ctx->module = NULL;
ctx->builder = NULL;
"llvm.amdgcn.mbcnt.lo", ctx->i32,
tid_args, 2, AC_FUNC_ATTR_READNONE);
"llvm.amdgcn.mbcnt.lo", ctx->i32,
tid_args, 2, AC_FUNC_ATTR_READNONE);
- tid = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi",
- ctx->i32, tid_args,
- 2, AC_FUNC_ATTR_READNONE);
- set_range_metadata(ctx, tid, 0, 64);
+ if (ctx->wave_size == 32) {
+ tid = tid_args[1];
+ } else {
+ tid = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi",
+ ctx->i32, tid_args,
+ 2, AC_FUNC_ATTR_READNONE);
+ }
+ set_range_metadata(ctx, tid, 0, ctx->wave_size);
get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
LLVMTypeOf(identity), "");
get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
LLVMTypeOf(identity), "");
- result = ac_build_scan(ctx, op, result, identity, 64, true);
+ result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, true);
return ac_build_wwm(ctx, result);
}
return ac_build_wwm(ctx, result);
}
get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
LLVMTypeOf(identity), "");
get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
LLVMTypeOf(identity), "");
- result = ac_build_scan(ctx, op, result, identity, 64, false);
+ result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, false);
return ac_build_wwm(ctx, result);
}
return ac_build_wwm(ctx, result);
}
if (ws->maxwaves <= 1)
return;
if (ws->maxwaves <= 1)
return;
- const LLVMValueRef i32_63 = LLVMConstInt(ctx->i32, 63, false);
+ const LLVMValueRef last_lane = LLVMConstInt(ctx->i32, ctx->wave_size - 1, false);
LLVMBuilderRef builder = ctx->builder;
LLVMValueRef tid = ac_get_thread_id(ctx);
LLVMValueRef tmp;
LLVMBuilderRef builder = ctx->builder;
LLVMValueRef tid = ac_get_thread_id(ctx);
LLVMValueRef tmp;
- tmp = LLVMBuildICmp(builder, LLVMIntEQ, tid, i32_63, "");
+ tmp = LLVMBuildICmp(builder, LLVMIntEQ, tid, last_lane, "");
ac_build_ifcc(ctx, tmp, 1000);
LLVMBuildStore(builder, ws->src, LLVMBuildGEP(builder, ws->scratch, &ws->waveidx, 1, ""));
ac_build_endif(ctx, 1000);
ac_build_ifcc(ctx, tmp, 1000);
LLVMBuildStore(builder, ws->src, LLVMBuildGEP(builder, ws->scratch, &ws->waveidx, 1, ""));
ac_build_endif(ctx, 1000);
enum chip_class chip_class;
enum radeon_family family;
enum chip_class chip_class;
enum radeon_family family;
LLVMValueRef lds;
};
void
ac_llvm_context_init(struct ac_llvm_context *ctx,
LLVMValueRef lds;
};
void
ac_llvm_context_init(struct ac_llvm_context *ctx,
- enum chip_class chip_class, enum radeon_family family);
+ enum chip_class chip_class, enum radeon_family family,
+ unsigned wave_size);
void
ac_llvm_context_dispose(struct ac_llvm_context *ctx);
void
ac_llvm_context_dispose(struct ac_llvm_context *ctx);
ctx.options = options;
ctx.shader_info = shader_info;
ctx.options = options;
ctx.shader_info = shader_info;
- ac_llvm_context_init(&ctx.ac, options->chip_class, options->family);
+ ac_llvm_context_init(&ctx.ac, options->chip_class, options->family, 64);
ctx.context = ctx.ac.context;
ctx.ac.module = ac_create_module(ac_llvm->tm, ctx.context);
ctx.context = ctx.ac.context;
ctx.ac.module = ac_create_module(ac_llvm->tm, ctx.context);
ctx.options = options;
ctx.shader_info = shader_info;
ctx.options = options;
ctx.shader_info = shader_info;
- ac_llvm_context_init(&ctx.ac, options->chip_class, options->family);
+ ac_llvm_context_init(&ctx.ac, options->chip_class, options->family, 64);
ctx.context = ctx.ac.context;
ctx.ac.module = ac_create_module(ac_llvm->tm, ctx.context);
ctx.context = ctx.ac.context;
ctx.ac.module = ac_create_module(ac_llvm->tm, ctx.context);
ctx->screen = sscreen;
ctx->compiler = compiler;
ctx->screen = sscreen;
ctx->compiler = compiler;
- ac_llvm_context_init(&ctx->ac, sscreen->info.chip_class, sscreen->info.family);
+ ac_llvm_context_init(&ctx->ac, sscreen->info.chip_class, sscreen->info.family, 64);
ctx->ac.module = ac_create_module(compiler->tm, ctx->ac.context);
enum ac_float_mode float_mode =
ctx->ac.module = ac_create_module(compiler->tm, ctx->ac.context);
enum ac_float_mode float_mode =