X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fllvm%2Fac_nir_to_llvm.c;h=7643326fde50ec855039b1f8e36c02fea558c5c1;hb=1ccd681109e80516430a3be489dca1be15316d50;hp=eec0a10efc86ab6e83cb444c2f5afb9ef8274ef1;hpb=2361e8e72278cfe256f80946516be7a48534e6d5;p=mesa.git

diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index eec0a10efc8..7643326fde5 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -170,6 +170,17 @@ static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx,
                                  LLVMIntPredicate pred, LLVMValueRef src0,
                                  LLVMValueRef src1)
 {
+	LLVMTypeRef src0_type = LLVMTypeOf(src0);
+	LLVMTypeRef src1_type = LLVMTypeOf(src1);
+
+	if (LLVMGetTypeKind(src0_type) == LLVMPointerTypeKind &&
+	    LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
+		src1 = LLVMBuildIntToPtr(ctx->builder, src1, src0_type, "");
+	} else if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
+		   LLVMGetTypeKind(src0_type) != LLVMPointerTypeKind) {
+		src0 = LLVMBuildIntToPtr(ctx->builder, src0, src1_type, "");
+	}
+
 	LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
 	return LLVMBuildSelect(ctx->builder, result,
 	                       LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
@@ -194,13 +205,13 @@ static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx,
 					 LLVMTypeRef result_type,
 					 LLVMValueRef src0)
 {
-	char name[64];
+	char name[64], type[64];
 	LLVMValueRef params[] = {
 		ac_to_float(ctx, src0),
 	};
 
-	ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
-						 ac_get_elem_bits(ctx, result_type));
+	ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
+	ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
 	assert(length < sizeof(name));
 	return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE);
 }
@@ -210,14 +221,14 @@ static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx,
 				       LLVMTypeRef result_type,
 				       LLVMValueRef src0, LLVMValueRef src1)
 {
-	char name[64];
+	char name[64], type[64];
 	LLVMValueRef params[] = {
 		ac_to_float(ctx, src0),
 		ac_to_float(ctx, src1),
 	};
 
-	ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
-						 ac_get_elem_bits(ctx, result_type));
+	ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
+	ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
 	assert(length < sizeof(name));
 	return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE);
 }
@@ -227,15 +238,15 @@ static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx,
 					 LLVMTypeRef result_type,
 					 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
 {
-	char name[64];
+	char name[64], type[64];
 	LLVMValueRef params[] = {
 		ac_to_float(ctx, src0),
 		ac_to_float(ctx, src1),
 		ac_to_float(ctx, src2),
 	};
 
-	ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
-						 ac_get_elem_bits(ctx, result_type));
+	ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
+	ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
 	assert(length < sizeof(name));
 	return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE);
 }
@@ -246,7 +257,7 @@ static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx,
 	LLVMTypeRef src1_type = LLVMTypeOf(src1);
 	LLVMTypeRef src2_type = LLVMTypeOf(src2);
 
-	assert(LLVMGetTypeKind(LLVMTypeOf(src0)) != LLVMFixedVectorTypeKind);
+	assert(LLVMGetTypeKind(LLVMTypeOf(src0)) != LLVMVectorTypeKind);
 
 	if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
 	    LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) {
@@ -693,8 +704,15 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
 		result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
 		break;
 	case nir_op_frcp:
-		result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rcp",
-					      ac_to_float_type(&ctx->ac, def_type), src[0]);
+		/* For doubles, we need precise division to pass GLCTS. */
+		if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL &&
+		    ac_get_type_size(def_type) == 8) {
+			result = LLVMBuildFDiv(ctx->ac.builder, ctx->ac.f64_1,
+					       ac_to_float(&ctx->ac, src[0]), "");
+		} else {
+			result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rcp",
+						      ac_to_float_type(&ctx->ac, def_type), src[0]);
+		}
 		break;
 	case nir_op_iand:
 		result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
@@ -943,15 +961,45 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
 		result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
 		break;
 	case nir_op_f2f16_rtz:
+	case nir_op_f2f16:
+	case nir_op_f2fmp:
 		src[0] = ac_to_float(&ctx->ac, src[0]);
-		if (LLVMTypeOf(src[0]) == ctx->ac.f64)
-			src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");
-		LLVMValueRef param[2] = { src[0], ctx->ac.f32_0 };
-		result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
-		result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
+
+		/* For OpenGL, we want fast packing with v_cvt_pkrtz_f16, but if we use it,
+		 * all f32->f16 conversions have to round towards zero, because both scalar
+		 * and vec2 down-conversions have to round equally.
+		 */
+		if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL ||
+		    instr->op == nir_op_f2f16_rtz) {
+			src[0] = ac_to_float(&ctx->ac, src[0]);
+
+			if (LLVMTypeOf(src[0]) == ctx->ac.f64)
+				src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");
+
+			/* Fast path conversion. This only works if NIR is vectorized
+			 * to vec2 16.
+			 */
+			if (LLVMTypeOf(src[0]) == ctx->ac.v2f32) {
+				LLVMValueRef args[] = {
+					ac_llvm_extract_elem(&ctx->ac, src[0], 0),
+					ac_llvm_extract_elem(&ctx->ac, src[0], 1),
+				};
+				result = ac_build_cvt_pkrtz_f16(&ctx->ac, args);
+				break;
+			}
+
+			assert(ac_get_llvm_num_components(src[0]) == 1);
+			LLVMValueRef param[2] = { src[0], LLVMGetUndef(ctx->ac.f32) };
+			result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
+			result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
+		} else {
+			if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
+				result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
+			else
+				result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
+		}
 		break;
 	case nir_op_f2f16_rtne:
-	case nir_op_f2f16:
 	case nir_op_f2f32:
 	case nir_op_f2f64:
 		src[0] = ac_to_float(&ctx->ac, src[0]);
@@ -962,6 +1010,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
 		break;
 	case nir_op_u2u8:
 	case nir_op_u2u16:
+	case nir_op_u2ump:
 	case nir_op_u2u32:
 	case nir_op_u2u64:
 		if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
@@ -971,6 +1020,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
 		break;
 	case nir_op_i2i8:
 	case nir_op_i2i16:
+	case nir_op_i2imp:
 	case nir_op_i2i32:
 	case nir_op_i2i64:
 		if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
@@ -1436,12 +1486,14 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
 	if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
 		unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
 
+		assert(instr->dest.is_ssa);
 		return ac_build_buffer_load_format(&ctx->ac,
 			                           args->resource,
 			                           args->coords[0],
 			                           ctx->ac.i32_0,
 			                           util_last_bit(mask),
-			                           0, true);
+			                           0, true,
+						   instr->dest.ssa.bit_size == 16);
 	}
 
 	args->opcode = ac_image_sample;
@@ -1470,7 +1522,8 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
 		break;
 	case nir_texop_tg4:
 		args->opcode = ac_image_gather4;
-		args->level_zero = true;
+                if (!args->lod && !args->bias)
+			args->level_zero = true;
 		break;
 	case nir_texop_lod:
 		args->opcode = ac_image_get_lod;
@@ -1573,13 +1626,13 @@ static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx,
 
 	if (instr->dest.ssa.bit_size == 8) {
 		unsigned load_dwords = instr->dest.ssa.num_components > 1 ? 2 : 1;
-		LLVMTypeRef vec_type = LLVMVectorType(LLVMInt8TypeInContext(ctx->ac.context), 4 * load_dwords);
+		LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i8, 4 * load_dwords);
 		ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
 		LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
 
 		LLVMValueRef params[3];
 		if (load_dwords > 1) {
-			LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(ctx->ac.i32, 2), "");
+			LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.v2i32, "");
 			params[0] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 1, false), "");
 			params[1] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 0, false), "");
 		} else {
@@ -1592,11 +1645,11 @@ static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx,
 
 		res = LLVMBuildTrunc(ctx->ac.builder, res, LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.num_components * 8), "");
 		if (instr->dest.ssa.num_components > 1)
-			res = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(LLVMInt8TypeInContext(ctx->ac.context), instr->dest.ssa.num_components), "");
+			res = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(ctx->ac.i8, instr->dest.ssa.num_components), "");
 		return res;
 	} else if (instr->dest.ssa.bit_size == 16) {
 		unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1;
-		LLVMTypeRef vec_type = LLVMVectorType(LLVMInt16TypeInContext(ctx->ac.context), 2 * load_dwords);
+		LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i16, 2 * load_dwords);
 		ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
 		LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
 		res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, "");
@@ -2194,7 +2247,7 @@ static LLVMValueRef load_tess_varyings(struct ac_nir_context *ctx,
 	LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa);
 
 	LLVMTypeRef src_component_type;
-	if (LLVMGetTypeKind(dest_type) == LLVMFixedVectorTypeKind)
+	if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)
 		src_component_type = LLVMGetElementType(dest_type);
 	else
 		src_component_type = dest_type;
@@ -2354,7 +2407,7 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
 		bool split_loads = ctx->ac.chip_class == GFX6 && elem_size_bytes < 4;
 
 		if (stride != natural_stride || split_loads) {
-			if (LLVMGetTypeKind(result_type) == LLVMFixedVectorTypeKind)
+			if (LLVMGetTypeKind(result_type) == LLVMVectorTypeKind)
 				result_type = LLVMGetElementType(result_type);
 
 			LLVMTypeRef ptr_type = LLVMPointerType(result_type,
@@ -2365,6 +2418,9 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
 				LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i * stride / natural_stride, 0);
 				values[i] = LLVMBuildLoad(ctx->ac.builder,
 				                          ac_build_gep_ptr(&ctx->ac, address, offset), "");
+
+				if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
+					LLVMSetOrdering(values[i], LLVMAtomicOrderingMonotonic);
 			}
 			return ac_build_gather_values(&ctx->ac, values, instr->dest.ssa.num_components);
 		} else {
@@ -2372,6 +2428,9 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
 			                                        LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
 			address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
 			LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, "");
+
+			if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
+				LLVMSetOrdering(val, LLVMAtomicOrderingMonotonic);
 			return val;
 		}
 	}
@@ -2527,10 +2586,13 @@ visit_store_var(struct ac_nir_context *ctx,
 
 			val = LLVMBuildBitCast(ctx->ac.builder, val,
 			                       LLVMGetElementType(LLVMTypeOf(address)), "");
-			LLVMBuildStore(ctx->ac.builder, val, address);
+			LLVMValueRef store = LLVMBuildStore(ctx->ac.builder, val, address);
+
+			if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
+				LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic);
 		} else {
 			LLVMTypeRef val_type = LLVMTypeOf(val);
-			if (LLVMGetTypeKind(LLVMTypeOf(val)) == LLVMFixedVectorTypeKind)
+			if (LLVMGetTypeKind(LLVMTypeOf(val)) == LLVMVectorTypeKind)
 				val_type = LLVMGetElementType(val_type);
 
 			LLVMTypeRef ptr_type = LLVMPointerType(val_type,
@@ -2547,7 +2609,10 @@ visit_store_var(struct ac_nir_context *ctx,
 									chan);
 				src = LLVMBuildBitCast(ctx->ac.builder, src,
 				                       LLVMGetElementType(LLVMTypeOf(ptr)), "");
-				LLVMBuildStore(ctx->ac.builder, src, ptr);
+				LLVMValueRef store = LLVMBuildStore(ctx->ac.builder, src, ptr);
+
+				if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
+					LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic);
 			}
 		}
 		break;
@@ -2750,18 +2815,17 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
 	LLVMValueRef res;
 
 	enum glsl_sampler_dim dim;
-	enum gl_access_qualifier access;
+	enum gl_access_qualifier access = nir_intrinsic_access(instr);
 	bool is_array;
 	if (bindless) {
 		dim = nir_intrinsic_image_dim(instr);
-		access = nir_intrinsic_access(instr);
 		is_array = nir_intrinsic_image_array(instr);
 	} else {
 		const nir_deref_instr *image_deref = get_image_deref(instr);
 		const struct glsl_type *type = image_deref->type;
 		const nir_variable *var = nir_deref_instr_get_variable(image_deref);
 		dim = glsl_get_sampler_dim(type);
-		access = var->data.access;
+		access |= var->data.access;
 		is_array = glsl_sampler_type_is_array(type);
 	}
 
@@ -2781,11 +2845,13 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
 		vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
 						 ctx->ac.i32_0, "");
 
+		assert(instr->dest.is_ssa);
 		bool can_speculate = access & ACCESS_CAN_REORDER;
 		res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex,
 						  ctx->ac.i32_0, num_channels,
 						  args.cache_policy,
-						  can_speculate);
+						  can_speculate,
+						  instr->dest.ssa.bit_size == 16);
 		res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels);
 
 		res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);
@@ -2802,6 +2868,9 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
 		args.dmask = 15;
 		args.attributes = AC_FUNC_ATTR_READONLY;
 
+		assert(instr->dest.is_ssa);
+		args.d16 = instr->dest.ssa.bit_size == 16;
+
 		res = ac_build_image_opcode(&ctx->ac, &args);
 	}
 	return exit_waterfall(ctx, &wctx, res);
@@ -2818,19 +2887,18 @@ static void visit_image_store(struct ac_nir_context *ctx,
         }
 
 	enum glsl_sampler_dim dim;
-	enum gl_access_qualifier access;
+	enum gl_access_qualifier access = nir_intrinsic_access(instr);
 	bool is_array;
 
 	if (bindless) {
 		dim = nir_intrinsic_image_dim(instr);
-		access = nir_intrinsic_access(instr);
 		is_array = nir_intrinsic_image_array(instr);
 	} else {
 		const nir_deref_instr *image_deref = get_image_deref(instr);
 		const struct glsl_type *type = image_deref->type;
 		const nir_variable *var = nir_deref_instr_get_variable(image_deref);
 		dim = glsl_get_sampler_dim(type);
-		access = var->data.access;
+		access |= var->data.access;
 		is_array = glsl_sampler_type_is_array(type);
 	}
 
@@ -2856,8 +2924,7 @@ static void visit_image_store(struct ac_nir_context *ctx,
 						 ctx->ac.i32_0, "");
 
 		ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex,
-					     ctx->ac.i32_0, src_channels,
-					     args.cache_policy);
+					     ctx->ac.i32_0, args.cache_policy);
 	} else {
 		bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0;
 
@@ -2869,6 +2936,7 @@ static void visit_image_store(struct ac_nir_context *ctx,
 		if (!level_zero)
 			args.lod = get_src(ctx, instr->src[4]);
 		args.dmask = 15;
+		args.d16 = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.data[0])) == 16;
 
 		ac_build_image_opcode(&ctx->ac, &args);
 	}
@@ -2974,16 +3042,6 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
 	case nir_intrinsic_image_deref_atomic_inc_wrap: {
 		atomic_name = "inc";
 		atomic_subop = ac_atomic_inc_wrap;
-		/* ATOMIC_INC instruction does:
-		 *      value = (value + 1) % (data + 1)
-		 * but we want:
-		 *      value = (value + 1) % data
-		 * So replace 'data' by 'data - 1'.
-		 */
-		ctx->ssa_defs[instr->src[3].ssa->index] =
-			LLVMBuildSub(ctx->ac.builder,
-				     ctx->ssa_defs[instr->src[3].ssa->index],
-				     ctx->ac.i32_1, "");
 		break;
 	}
 	case nir_intrinsic_bindless_image_atomic_dec_wrap:
@@ -3086,6 +3144,7 @@ static LLVMValueRef visit_image_size(struct ac_nir_context *ctx,
 		args.dmask = 0xf;
 		args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
 		args.opcode = ac_image_get_resinfo;
+		assert(nir_src_as_uint(instr->src[1]) == 0);
 		args.lod = ctx->ac.i32_0;
 		args.attributes = AC_FUNC_ATTR_READNONE;
 
@@ -3356,11 +3415,26 @@ static LLVMValueRef visit_var_atomic(struct ac_nir_context *ctx,
 		case nir_intrinsic_deref_atomic_exchange:
 			op = LLVMAtomicRMWBinOpXchg;
 			break;
+#if LLVM_VERSION_MAJOR >= 10
+		case nir_intrinsic_shared_atomic_fadd:
+		case nir_intrinsic_deref_atomic_fadd:
+			op = LLVMAtomicRMWBinOpFAdd;
+			break;
+#endif
 		default:
 			return NULL;
 		}
 
-		result = ac_build_atomic_rmw(&ctx->ac, op, ptr, ac_to_integer(&ctx->ac, src), sync_scope);
+		LLVMValueRef val;
+
+		if (instr->intrinsic == nir_intrinsic_shared_atomic_fadd ||
+		    instr->intrinsic == nir_intrinsic_deref_atomic_fadd) {
+			val = ac_to_float(&ctx->ac, src);
+		} else {
+			val = ac_to_integer(&ctx->ac, src);
+		}
+
+		result = ac_build_atomic_rmw(&ctx->ac, op, ptr, val, sync_scope);
 	}
 
 	if (ctx->ac.postponed_kill)
@@ -3864,7 +3938,8 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
 		result = visit_image_size(ctx, instr, false);
 		break;
 	case nir_intrinsic_shader_clock:
-		result = ac_build_shader_clock(&ctx->ac);
+		result = ac_build_shader_clock(&ctx->ac,
+					       nir_intrinsic_memory_scope(instr));
 		break;
 	case nir_intrinsic_discard:
 	case nir_intrinsic_discard_if:
@@ -3881,6 +3956,25 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
 	case nir_intrinsic_memory_barrier_shared:
 		emit_membar(&ctx->ac, instr);
 		break;
+	case nir_intrinsic_scoped_barrier: {
+		assert(!(nir_intrinsic_memory_semantics(instr) &
+			 (NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE)));
+
+		nir_variable_mode modes = nir_intrinsic_memory_modes(instr);
+
+		unsigned wait_flags = 0;
+		if (modes & (nir_var_mem_global | nir_var_mem_ssbo))
+			wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE;
+		if (modes & nir_var_mem_shared)
+			wait_flags |= AC_WAIT_LGKM;
+
+		if (wait_flags)
+			ac_build_waitcnt(&ctx->ac, wait_flags);
+
+		if (nir_intrinsic_execution_scope(instr) == NIR_SCOPE_WORKGROUP)
+			ac_emit_barrier(&ctx->ac, ctx->stage);
+		break;
+	}
 	case nir_intrinsic_memory_barrier_tcs_patch:
 		break;
 	case nir_intrinsic_control_barrier:
@@ -3895,7 +3989,8 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
 	case nir_intrinsic_shared_atomic_or:
 	case nir_intrinsic_shared_atomic_xor:
 	case nir_intrinsic_shared_atomic_exchange:
-	case nir_intrinsic_shared_atomic_comp_swap: {
+	case nir_intrinsic_shared_atomic_comp_swap:
+	case nir_intrinsic_shared_atomic_fadd: {
 		LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0],
 						  instr->src[1].ssa->bit_size);
 		result = visit_var_atomic(ctx, instr, ptr, 1);
@@ -3910,7 +4005,8 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
 	case nir_intrinsic_deref_atomic_or:
 	case nir_intrinsic_deref_atomic_xor:
 	case nir_intrinsic_deref_atomic_exchange:
-	case nir_intrinsic_deref_atomic_comp_swap: {
+	case nir_intrinsic_deref_atomic_comp_swap:
+	case nir_intrinsic_deref_atomic_fadd: {
 		LLVMValueRef ptr = get_src(ctx, instr->src[0]);
 		result = visit_var_atomic(ctx, instr, ptr, 1);
 		break;
@@ -4002,7 +4098,7 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
 	case nir_intrinsic_shuffle:
 		if (ctx->ac.chip_class == GFX8 ||
 		    ctx->ac.chip_class == GFX9 ||
-		    (ctx->ac.chip_class == GFX10 && ctx->ac.wave_size == 32)) {
+		    (ctx->ac.chip_class >= GFX10 && ctx->ac.wave_size == 32)) {
 			result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]),
 						  get_src(ctx, instr->src[1]));
 		} else {
@@ -4447,8 +4543,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
 			offset_src = i;
 			break;
 		case nir_tex_src_bias:
-			if (instr->op == nir_texop_txb)
-				args.bias = get_src(ctx, instr->src[i].src);
+			args.bias = get_src(ctx, instr->src[i].src);
 			break;
 		case nir_tex_src_lod: {
 			if (nir_src_is_const(instr->src[i].src) && nir_src_as_uint(instr->src[i].src) == 0)
@@ -4732,6 +4827,9 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
 		}
 	}
 
+	assert(instr->dest.is_ssa);
+	args.d16 = instr->dest.ssa.bit_size == 16;
+
 	result = build_tex_intrinsic(ctx, instr, &args);
 
 	if (instr->op == nir_texop_query_levels)
@@ -5022,7 +5120,7 @@ static void visit_deref(struct ac_nir_context *ctx,
 		LLVMTypeRef type = LLVMPointerType(pointee_type, address_space);
 
 		if (LLVMTypeOf(result) != type) {
-			if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMFixedVectorTypeKind) {
+			if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind) {
 				result = LLVMBuildBitCast(ctx->ac.builder, result,
 				                          type, "");
 			} else {
@@ -5186,7 +5284,7 @@ setup_locals(struct ac_nir_context *ctx,
 {
 	int i, j;
 	ctx->num_locals = 0;
-	nir_foreach_variable(variable, &func->impl->locals) {
+	nir_foreach_function_temp_variable(variable, func->impl) {
 		unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
 		variable->data.driver_location = ctx->num_locals * 4;
 		variable->data.location_frac = 0;
@@ -5286,7 +5384,7 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
 
 	ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
 
-	nir_foreach_variable(variable, &nir->outputs)
+	nir_foreach_shader_out_variable(variable, nir)
 		ac_handle_shader_output_decl(&ctx.ac, ctx.abi, nir, variable,
 					     ctx.stage);