X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fcommon%2Fac_llvm_build.c;h=c74a47a79984737178f29dcc21c9b3d84b84835d;hb=bedfa06eaf242a87c1b590f1504b8082d3d8ac1f;hp=8a329515b57b0877efea2e9c1cc8cf3a8cfbd836;hpb=94736d31c364635a76a11e0bd4f046a42d2221d5;p=mesa.git

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 8a329515b57..c74a47a7998 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -47,11 +47,12 @@
  */
 void
 ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
-		     enum chip_class chip_class)
+		     enum chip_class chip_class, enum radeon_family family)
 {
 	LLVMValueRef args[1];
 
 	ctx->chip_class = chip_class;
+	ctx->family = family;
 
 	ctx->context = context;
 	ctx->module = NULL;
@@ -66,7 +67,10 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
 	ctx->f16 = LLVMHalfTypeInContext(ctx->context);
 	ctx->f32 = LLVMFloatTypeInContext(ctx->context);
 	ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
+	ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
+	ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
 	ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
+	ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
 	ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
 	ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
 
@@ -75,6 +79,9 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
 	ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
 	ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0);
 
+	ctx->i1false = LLVMConstInt(ctx->i1, 0, false);
+	ctx->i1true = LLVMConstInt(ctx->i1, 1, false);
+
 	ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
 						     "range", 5);
 
@@ -252,6 +259,20 @@ void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize)
 	}
 }
 
+/**
+ * Helper function that builds an LLVM IR PHI node and immediately adds
+ * incoming edges.
+ */
+LLVMValueRef
+ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
+	     unsigned count_incoming, LLVMValueRef *values,
+	     LLVMBasicBlockRef *blocks)
+{
+	LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, "");
+	LLVMAddIncoming(phi, values, blocks, count_incoming);
+	return phi;
+}
+
 /* Prevent optimizations (at least of memory accesses) across the current
  * point in the program by emitting empty inline assembly that is marked as
  * having side effects.
@@ -350,6 +371,28 @@ ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value)
 	return LLVMBuildOr(ctx->builder, all, none, "");
 }
 
+LLVMValueRef
+ac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values,
+			       unsigned value_count, unsigned component)
+{
+	LLVMValueRef vec = NULL;
+
+	if (value_count == 1) {
+		return values[component];
+	} else if (!value_count)
+		unreachable("value_count is 0");
+
+	for (unsigned i = component; i < value_count + component; i++) {
+		LLVMValueRef value = values[i];
+
+		if (!i)
+			vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
+		LLVMValueRef index = LLVMConstInt(ctx->i32, i - component, false);
+		vec = LLVMBuildInsertElement(ctx->builder, vec, value, index, "");
+	}
+	return vec;
+}
+
 LLVMValueRef
 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
 				LLVMValueRef *values,
@@ -397,6 +440,7 @@ ac_build_fdiv(struct ac_llvm_context *ctx,
 {
 	LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, "");
 
+	/* Use v_rcp_f32 instead of precise division. */
 	if (!LLVMIsConstant(ret))
 		LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
 	return ret;
@@ -438,12 +482,13 @@ build_cube_intrinsic(struct ac_llvm_context *ctx,
  * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
  * the selcoords major axis.
  */
-static void build_cube_select(LLVMBuilderRef builder,
+static void build_cube_select(struct ac_llvm_context *ctx,
 			      const struct cube_selection_coords *selcoords,
 			      const LLVMValueRef *coords,
 			      LLVMValueRef *out_st,
 			      LLVMValueRef *out_ma)
 {
+	LLVMBuilderRef builder = ctx->builder;
 	LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
 	LLVMValueRef is_ma_positive;
 	LLVMValueRef sgn_ma;
@@ -465,24 +510,24 @@ static void build_cube_select(LLVMBuilderRef builder,
 	is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");
 
 	/* Select sc */
-	tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], "");
+	tmp = LLVMBuildSelect(builder, is_ma_x, coords[2], coords[0], "");
 	sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0),
-		LLVMBuildSelect(builder, is_ma_x, sgn_ma,
+		LLVMBuildSelect(builder, is_ma_z, sgn_ma,
 			LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
 	out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");
 
 	/* Select tc */
 	tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
-	sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, sgn_ma, ""),
+	sgn = LLVMBuildSelect(builder, is_ma_y, sgn_ma,
 		LLVMConstReal(f32, -1.0), "");
 	out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");
 
 	/* Select ma */
 	tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
 		LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
-	sgn = LLVMBuildSelect(builder, is_ma_positive,
-		LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), "");
-	*out_ma = LLVMBuildFMul(builder, tmp, sgn, "");
+	tmp = ac_build_intrinsic(ctx, "llvm.fabs.f32",
+				 ctx->f32, &tmp, 1, AC_FUNC_ATTR_READNONE);
+	*out_ma = LLVMBuildFMul(builder, tmp, LLVMConstReal(f32, 2.0), "");
 }
 
 void
@@ -570,7 +615,7 @@ ac_prepare_cube_coords(struct ac_llvm_context *ctx,
 			 * seems awfully quiet about how textureGrad for cube
 			 * maps should be handled.
 			 */
-			build_cube_select(builder, &selcoords, &derivs_arg[axis * 3],
+			build_cube_select(ctx, &selcoords, &derivs_arg[axis * 3],
 					  deriv_st, &deriv_ma);
 
 			deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");
@@ -702,32 +747,40 @@ ac_build_indexed_store(struct ac_llvm_context *ctx,
  * \param base_ptr  Where the array starts.
  * \param index     The element index into the array.
  * \param uniform   Whether the base_ptr and index can be assumed to be
- *                  dynamically uniform
+ *                  dynamically uniform (i.e. load to an SGPR)
+ * \param invariant Whether the load is invariant (no other opcodes affect it)
  */
-LLVMValueRef
-ac_build_indexed_load(struct ac_llvm_context *ctx,
-		      LLVMValueRef base_ptr, LLVMValueRef index,
-		      bool uniform)
+static LLVMValueRef
+ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
+		     LLVMValueRef index, bool uniform, bool invariant)
 {
-	LLVMValueRef pointer;
+	LLVMValueRef pointer, result;
 
 	pointer = ac_build_gep0(ctx, base_ptr, index);
 	if (uniform)
 		LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
-	return LLVMBuildLoad(ctx->builder, pointer, "");
+	result = LLVMBuildLoad(ctx->builder, pointer, "");
+	if (invariant)
+		LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
+	return result;
 }
 
-/**
- * Do a load from &base_ptr[index], but also add a flag that it's loading
- * a constant from a dynamically uniform index.
- */
-LLVMValueRef
-ac_build_indexed_load_const(struct ac_llvm_context *ctx,
-			    LLVMValueRef base_ptr, LLVMValueRef index)
+LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
+			   LLVMValueRef index)
 {
-	LLVMValueRef result = ac_build_indexed_load(ctx, base_ptr, index, true);
-	LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
-	return result;
+	return ac_build_load_custom(ctx, base_ptr, index, false, false);
+}
+
+LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
+				     LLVMValueRef base_ptr, LLVMValueRef index)
+{
+	return ac_build_load_custom(ctx, base_ptr, index, false, true);
+}
+
+LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
+				   LLVMValueRef base_ptr, LLVMValueRef index)
+{
+	return ac_build_load_custom(ctx, base_ptr, index, true, true);
 }
 
 /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
@@ -745,10 +798,13 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
 			    bool glc,
 			    bool slc,
 			    bool writeonly_memory,
-			    bool has_add_tid)
+			    bool swizzle_enable_hint)
 {
-	/* TODO: Fix stores with ADD_TID and remove the "has_add_tid" flag. */
-	if (!has_add_tid) {
+	/* SWIZZLE_ENABLE requires that soffset isn't folded into voffset
+	 * (voffset is swizzled, but soffset isn't swizzled).
+	 * llvm.amdgcn.buffer.store doesn't have a separate soffset parameter.
+	 */
+	if (!swizzle_enable_hint) {
 		/* Split 3 channel stores, becase LLVM doesn't support 3-channel
 		 * intrinsics. */
 		if (num_channels == 3) {
@@ -762,11 +818,11 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
 
 			ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset,
 						    soffset, inst_offset, glc, slc,
-						    writeonly_memory, has_add_tid);
+						    writeonly_memory, swizzle_enable_hint);
 			ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset,
 						    soffset, inst_offset + 8,
 						    glc, slc,
-						    writeonly_memory, has_add_tid);
+						    writeonly_memory, swizzle_enable_hint);
 			return;
 		}
 
@@ -920,8 +976,8 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
 		LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
 		vindex,
 		voffset,
-		LLVMConstInt(ctx->i1, 0, 0), /* glc */
-		LLVMConstInt(ctx->i1, 0, 0), /* slc */
+		ctx->i1false, /* glc */
+		ctx->i1false, /* slc */
 	};
 
 	return ac_build_intrinsic(ctx,
@@ -1033,7 +1089,7 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
 					  AC_FUNC_ATTR_READNONE |
 					  AC_FUNC_ATTR_CONVERGENT);
 	} else {
-		uint32_t masks[2];
+		uint32_t masks[2] = {};
 
 		switch (mask) {
 		case AC_TID_MASK_TOP_LEFT:
@@ -1052,6 +1108,8 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
 			masks[0] = 0x80a0;
 			masks[1] = 0x80f5;
 			break;
+		default:
+			assert(0);
 		}
 
 		args[0] = val;
@@ -1122,7 +1180,7 @@ ac_build_umsb(struct ac_llvm_context *ctx,
 {
 	LLVMValueRef args[2] = {
 		arg,
-		LLVMConstInt(ctx->i1, 1, 0),
+		ctx->i1true,
 	};
 	LLVMValueRef msb = ac_build_intrinsic(ctx, "llvm.ctlz.i32",
 					      dst_type, args, ARRAY_SIZE(args),
@@ -1229,7 +1287,7 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
 	LLVMTypeRef dst_type;
 	LLVMValueRef args[11];
 	unsigned num_args = 0;
-	const char *name;
+	const char *name = NULL;
 	char intr_name[128], type[64];
 
 	if (HAVE_LLVM >= 0x0400) {
@@ -1248,9 +1306,9 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
 		args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0);
 		if (sample)
 			args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, 0);
-		args[num_args++] = LLVMConstInt(ctx->i1, 0, 0); /* glc */
-		args[num_args++] = LLVMConstInt(ctx->i1, 0, 0); /* slc */
-		args[num_args++] = LLVMConstInt(ctx->i1, 0, 0); /* lwe */
+		args[num_args++] = ctx->i1false; /* glc */
+		args[num_args++] = ctx->i1false; /* slc */
+		args[num_args++] = ctx->i1false; /* lwe */
 		args[num_args++] = LLVMConstInt(ctx->i1, a->da, 0);
 
 		switch (a->opcode) {
@@ -1377,20 +1435,26 @@ LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
 				  AC_FUNC_ATTR_LEGACY);
 }
 
-/**
- * KILL, AKA discard in GLSL.
- *
- * \param value  kill if value < 0.0 or value == NULL.
- */
-void ac_build_kill(struct ac_llvm_context *ctx, LLVMValueRef value)
+LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1)
 {
-	if (value) {
-		ac_build_intrinsic(ctx, "llvm.AMDGPU.kill", ctx->voidt,
-				   &value, 1, AC_FUNC_ATTR_LEGACY);
-	} else {
-		ac_build_intrinsic(ctx, "llvm.AMDGPU.kilp", ctx->voidt,
-				   NULL, 0, AC_FUNC_ATTR_LEGACY);
+	assert(HAVE_LLVM >= 0x0600);
+	return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1,
+				  &i1, 1, AC_FUNC_ATTR_READNONE);
+}
+
+void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1)
+{
+	if (HAVE_LLVM >= 0x0600) {
+		ac_build_intrinsic(ctx, "llvm.amdgcn.kill", ctx->voidt,
+				   &i1, 1, 0);
+		return;
 	}
+
+	LLVMValueRef value = LLVMBuildSelect(ctx->builder, i1,
+					     LLVMConstReal(ctx->f32, 1),
+					     LLVMConstReal(ctx->f32, -1), "");
+	ac_build_intrinsic(ctx, "llvm.AMDGPU.kill", ctx->voidt,
+			   &value, 1, AC_FUNC_ATTR_LEGACY);
 }
 
 LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
@@ -1419,6 +1483,15 @@ LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
 				  AC_FUNC_ATTR_LEGACY);
 }
 
+void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16)
+{
+	LLVMValueRef args[1] = {
+		LLVMConstInt(ctx->i32, simm16, false),
+	};
+	ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt",
+			   ctx->voidt, args, 1, 0);
+}
+
 void ac_get_image_intr_name(const char *base_name,
 			    LLVMTypeRef data_type,
 			    LLVMTypeRef coords_type,
@@ -1706,3 +1779,65 @@ void ac_optimize_vs_outputs(struct ac_llvm_context *ctx,
 		*num_param_exports = exports.num;
 	}
 }
+
+void ac_init_exec_full_mask(struct ac_llvm_context *ctx)
+{
+	LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0);
+	ac_build_intrinsic(ctx,
+			   "llvm.amdgcn.init.exec", ctx->voidt,
+			   &full_mask, 1, AC_FUNC_ATTR_CONVERGENT);
+}
+
+void ac_declare_lds_as_pointer(struct ac_llvm_context *ctx)
+{
+	unsigned lds_size = ctx->chip_class >= CIK ? 65536 : 32768;
+	ctx->lds = LLVMBuildIntToPtr(ctx->builder, ctx->i32_0,
+				     LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_LOCAL_ADDR_SPACE),
+				     "lds");
+}
+
+LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx,
+			 LLVMValueRef dw_addr)
+{
+	return ac_build_load(ctx, ctx->lds, dw_addr);
+}
+
+void ac_lds_store(struct ac_llvm_context *ctx,
+		  LLVMValueRef dw_addr,
+		  LLVMValueRef value)
+{
+	value = ac_to_integer(ctx, value);
+	ac_build_indexed_store(ctx, ctx->lds,
+			       dw_addr, value);
+}
+
+LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
+			 LLVMTypeRef dst_type,
+			 LLVMValueRef src0)
+{
+	LLVMValueRef params[2] = {
+		src0,
+
+		/* The value of 1 means that ffs(x=0) = undef, so LLVM won't
+		 * add special code to check for x=0. The reason is that
+		 * the LLVM behavior for x=0 is different from what we
+		 * need here. However, LLVM also assumes that ffs(x) is
+		 * in [0, 31], but GLSL expects that ffs(0) = -1, so
+		 * a conditional assignment to handle 0 is still required.
+		 *
+		 * The hardware already implements the correct behavior.
+		 */
+		LLVMConstInt(ctx->i1, 1, false),
+	};
+
+	LLVMValueRef lsb = ac_build_intrinsic(ctx, "llvm.cttz.i32", ctx->i32,
+					      params, 2,
+					      AC_FUNC_ATTR_READNONE);
+
+	/* TODO: We need an intrinsic to skip this conditional. */
+	/* Check for zero: */
+	return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder,
+							   LLVMIntEQ, src0,
+							   ctx->i32_0, ""),
+			       LLVMConstInt(ctx->i32, -1, 0), lsb, "");
+}