ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
+ ctx->i128 = LLVMIntTypeInContext(ctx->context, 128);
ctx->intptr = ctx->i32;
ctx->f16 = LLVMHalfTypeInContext(ctx->context);
ctx->f32 = LLVMFloatTypeInContext(ctx->context);
ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
ctx->i64_0 = LLVMConstInt(ctx->i64, 0, false);
ctx->i64_1 = LLVMConstInt(ctx->i64, 1, false);
+ ctx->i128_0 = LLVMConstInt(ctx->i128, 0, false);
+ ctx->i128_1 = LLVMConstInt(ctx->i128, 1, false);
ctx->f16_0 = LLVMConstReal(ctx->f16, 0.0);
ctx->f16_1 = LLVMConstReal(ctx->f16, 1.0);
ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
switch (bitsize) {
+ case 128:
+ result = ac_build_intrinsic(ctx, "llvm.ctpop.i128", ctx->i128,
+ (LLVMValueRef []) { src0 }, 1,
+ AC_FUNC_ATTR_READNONE);
+ result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
+ break;
case 64:
result = ac_build_intrinsic(ctx, "llvm.ctpop.i64", ctx->i64,
(LLVMValueRef []) { src0 }, 1,
if (cluster_size == 32) return ac_build_wwm(ctx, result);
if (ctx->chip_class >= GFX8) {
- if (ctx->chip_class >= GFX10)
- swap = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, false));
- else
- swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
- result = ac_build_alu_op(ctx, result, swap, op);
- result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 63, 0));
+ if (ctx->wave_size == 64) {
+ if (ctx->chip_class >= GFX10)
+ swap = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, false));
+ else
+ swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
+ result = ac_build_alu_op(ctx, result, swap, op);
+ result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 63, 0));
+ }
+
return ac_build_wwm(ctx, result);
} else {
swap = ac_build_readlane(ctx, result, ctx->i32_0);
ac_build_endif(ctx, 5020);
}
+LLVMValueRef ac_pack_prim_export(struct ac_llvm_context *ctx,
+ const struct ac_ngg_prim *prim)
+{
+ /* The prim export format is:
+ * - bits 0..8: index 0
+ * - bit 9: edge flag 0
+ * - bits 10..18: index 1
+ * - bit 19: edge flag 1
+ * - bits 20..28: index 2
+ * - bit 29: edge flag 2
+ * - bit 31: null primitive (skip)
+ */
+ LLVMBuilderRef builder = ctx->builder;
+ LLVMValueRef tmp = LLVMBuildZExt(builder, prim->isnull, ctx->i32, "");
+ LLVMValueRef result = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->i32, 31, false), "");
+
+ for (unsigned i = 0; i < prim->num_vertices; ++i) {
+ tmp = LLVMBuildShl(builder, prim->index[i],
+ LLVMConstInt(ctx->i32, 10 * i, false), "");
+ result = LLVMBuildOr(builder, result, tmp, "");
+ tmp = LLVMBuildZExt(builder, prim->edgeflag[i], ctx->i32, "");
+ tmp = LLVMBuildShl(builder, tmp,
+ LLVMConstInt(ctx->i32, 10 * i + 9, false), "");
+ result = LLVMBuildOr(builder, result, tmp, "");
+ }
+ return result;
+}
+
+void ac_build_export_prim(struct ac_llvm_context *ctx,
+ const struct ac_ngg_prim *prim)
+{
+ struct ac_export_args args;
+
+ if (prim->passthrough) {
+ args.out[0] = prim->passthrough;
+ } else {
+ args.out[0] = ac_pack_prim_export(ctx, prim);
+ }
+
+ args.out[0] = LLVMBuildBitCast(ctx->builder, args.out[0], ctx->f32, "");
+ args.out[1] = LLVMGetUndef(ctx->f32);
+ args.out[2] = LLVMGetUndef(ctx->f32);
+ args.out[3] = LLVMGetUndef(ctx->f32);
+
+ args.target = V_008DFC_SQ_EXP_PRIM;
+ args.enabled_channels = 1;
+ args.done = true;
+ args.valid_mask = false;
+ args.compr = false;
+
+ ac_build_export(ctx, &args);
+}
+
static LLVMTypeRef
arg_llvm_type(enum ac_arg_type type, unsigned size, struct ac_llvm_context *ctx)
{
return main_function;
}
+void ac_build_s_endpgm(struct ac_llvm_context *ctx)
+{
+ LLVMTypeRef calltype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
+ LLVMValueRef code = LLVMConstInlineAsm(calltype, "s_endpgm", "", true, false);
+ LLVMBuildCall(ctx->builder, code, NULL, 0, "");
+}
+
+LLVMValueRef ac_prefix_bitcount(struct ac_llvm_context *ctx,
+ LLVMValueRef mask, LLVMValueRef index)
+{
+ LLVMBuilderRef builder = ctx->builder;
+ LLVMTypeRef type = LLVMTypeOf(mask);
+
+ LLVMValueRef bit = LLVMBuildShl(builder, LLVMConstInt(type, 1, 0),
+ LLVMBuildZExt(builder, index, type, ""), "");
+ LLVMValueRef prefix_bits = LLVMBuildSub(builder, bit, LLVMConstInt(type, 1, 0), "");
+ LLVMValueRef prefix_mask = LLVMBuildAnd(builder, mask, prefix_bits, "");
+ return ac_build_bit_count(ctx, prefix_mask);
+}
+
+/* Compute the prefix sum of the "mask" bit array with 128 elements (bits). */
+LLVMValueRef ac_prefix_bitcount_2x64(struct ac_llvm_context *ctx,
+ LLVMValueRef mask[2], LLVMValueRef index)
+{
+ LLVMBuilderRef builder = ctx->builder;
+#if 0
+ /* Reference version using i128. */
+ LLVMValueRef input_mask =
+ LLVMBuildBitCast(builder, ac_build_gather_values(ctx, mask, 2), ctx->i128, "");
+
+ return ac_prefix_bitcount(ctx, input_mask, index);
+#else
+ /* Optimized version using 2 64-bit masks. */
+ LLVMValueRef is_hi, is_0, c64, c128, all_bits;
+ LLVMValueRef prefix_mask[2], shift[2], mask_bcnt0, prefix_bcnt[2];
+
+ /* Compute the 128-bit prefix mask. */
+ c64 = LLVMConstInt(ctx->i32, 64, 0);
+ c128 = LLVMConstInt(ctx->i32, 128, 0);
+ all_bits = LLVMConstInt(ctx->i64, UINT64_MAX, 0);
+ /* The first index that can have non-zero high bits in the prefix mask is 65. */
+ is_hi = LLVMBuildICmp(builder, LLVMIntUGT, index, c64, "");
+ is_0 = LLVMBuildICmp(builder, LLVMIntEQ, index, ctx->i32_0, "");
+ mask_bcnt0 = ac_build_bit_count(ctx, mask[0]);
+
+ for (unsigned i = 0; i < 2; i++) {
+ shift[i] = LLVMBuildSub(builder, i ? c128 : c64, index, "");
+ /* For i==0, index==0, the right shift by 64 doesn't give the desired result,
+ * so we handle it by the is_0 select.
+ * For i==1, index==64, same story, so we handle it by the last is_hi select.
+ * For i==0, index==64, we shift by 0, which is what we want.
+ */
+ prefix_mask[i] = LLVMBuildLShr(builder, all_bits,
+ LLVMBuildZExt(builder, shift[i], ctx->i64, ""), "");
+ prefix_mask[i] = LLVMBuildAnd(builder, mask[i], prefix_mask[i], "");
+ prefix_bcnt[i] = ac_build_bit_count(ctx, prefix_mask[i]);
+ }
+
+ prefix_bcnt[0] = LLVMBuildSelect(builder, is_0, ctx->i32_0, prefix_bcnt[0], "");
+ prefix_bcnt[0] = LLVMBuildSelect(builder, is_hi, mask_bcnt0, prefix_bcnt[0], "");
+ prefix_bcnt[1] = LLVMBuildSelect(builder, is_hi, prefix_bcnt[1], ctx->i32_0, "");
+
+ return LLVMBuildAdd(builder, prefix_bcnt[0], prefix_bcnt[1], "");
+#endif
+}