X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fllvm%2Fac_nir_to_llvm.c;h=7643326fde50ec855039b1f8e36c02fea558c5c1;hb=1ccd681109e80516430a3be489dca1be15316d50;hp=eec0a10efc86ab6e83cb444c2f5afb9ef8274ef1;hpb=2361e8e72278cfe256f80946516be7a48534e6d5;p=mesa.git diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index eec0a10efc8..7643326fde5 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -170,6 +170,17 @@ static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx, LLVMIntPredicate pred, LLVMValueRef src0, LLVMValueRef src1) { + LLVMTypeRef src0_type = LLVMTypeOf(src0); + LLVMTypeRef src1_type = LLVMTypeOf(src1); + + if (LLVMGetTypeKind(src0_type) == LLVMPointerTypeKind && + LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) { + src1 = LLVMBuildIntToPtr(ctx->builder, src1, src0_type, ""); + } else if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind && + LLVMGetTypeKind(src0_type) != LLVMPointerTypeKind) { + src0 = LLVMBuildIntToPtr(ctx->builder, src0, src1_type, ""); + } + LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, ""); return LLVMBuildSelect(ctx->builder, result, LLVMConstInt(ctx->i32, 0xFFFFFFFF, false), @@ -194,13 +205,13 @@ static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx, LLVMTypeRef result_type, LLVMValueRef src0) { - char name[64]; + char name[64], type[64]; LLVMValueRef params[] = { ac_to_float(ctx, src0), }; - ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin, - ac_get_elem_bits(ctx, result_type)); + ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type)); + ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type); assert(length < sizeof(name)); return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE); } @@ -210,14 +221,14 @@ static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx, LLVMTypeRef result_type, LLVMValueRef src0, LLVMValueRef src1) { - char name[64]; + char name[64], type[64]; LLVMValueRef params[] = { ac_to_float(ctx, src0), ac_to_float(ctx, src1), }; - ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin, - ac_get_elem_bits(ctx, result_type)); + ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type)); + ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type); assert(length < sizeof(name)); return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE); } @@ -227,15 +238,15 @@ static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx, LLVMTypeRef result_type, LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2) { - char name[64]; + char name[64], type[64]; LLVMValueRef params[] = { ac_to_float(ctx, src0), ac_to_float(ctx, src1), ac_to_float(ctx, src2), }; - ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin, - ac_get_elem_bits(ctx, result_type)); + ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type)); + ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type); assert(length < sizeof(name)); return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE); } @@ -246,7 +257,7 @@ static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx, LLVMTypeRef src1_type = LLVMTypeOf(src1); LLVMTypeRef src2_type = LLVMTypeOf(src2); - assert(LLVMGetTypeKind(LLVMTypeOf(src0)) != LLVMFixedVectorTypeKind); + assert(LLVMGetTypeKind(LLVMTypeOf(src0)) != LLVMVectorTypeKind); if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind && LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) { @@ -693,8 +704,15 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_frcp: - result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rcp", - ac_to_float_type(&ctx->ac, def_type), src[0]); + /* For doubles, we need precise division to pass GLCTS. */ + if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL && + ac_get_type_size(def_type) == 8) { + result = LLVMBuildFDiv(ctx->ac.builder, ctx->ac.f64_1, + ac_to_float(&ctx->ac, src[0]), ""); + } else { + result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rcp", + ac_to_float_type(&ctx->ac, def_type), src[0]); + } break; case nir_op_iand: result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], ""); @@ -943,15 +961,45 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); break; case nir_op_f2f16_rtz: + case nir_op_f2f16: + case nir_op_f2fmp: src[0] = ac_to_float(&ctx->ac, src[0]); - if (LLVMTypeOf(src[0]) == ctx->ac.f64) - src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, ""); - LLVMValueRef param[2] = { src[0], ctx->ac.f32_0 }; - result = ac_build_cvt_pkrtz_f16(&ctx->ac, param); - result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""); + + /* For OpenGL, we want fast packing with v_cvt_pkrtz_f16, but if we use it, + * all f32->f16 conversions have to round towards zero, because both scalar + * and vec2 down-conversions have to round equally. + */ + if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL || + instr->op == nir_op_f2f16_rtz) { + src[0] = ac_to_float(&ctx->ac, src[0]); + + if (LLVMTypeOf(src[0]) == ctx->ac.f64) + src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, ""); + + /* Fast path conversion. This only works if NIR is vectorized + * to vec2 16. + */ + if (LLVMTypeOf(src[0]) == ctx->ac.v2f32) { + LLVMValueRef args[] = { + ac_llvm_extract_elem(&ctx->ac, src[0], 0), + ac_llvm_extract_elem(&ctx->ac, src[0], 1), + }; + result = ac_build_cvt_pkrtz_f16(&ctx->ac, args); + break; + } + + assert(ac_get_llvm_num_components(src[0]) == 1); + LLVMValueRef param[2] = { src[0], LLVMGetUndef(ctx->ac.f32) }; + result = ac_build_cvt_pkrtz_f16(&ctx->ac, param); + result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""); + } else { + if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type)) + result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); + else + result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); + } break; case nir_op_f2f16_rtne: - case nir_op_f2f16: case nir_op_f2f32: case nir_op_f2f64: src[0] = ac_to_float(&ctx->ac, src[0]); @@ -962,6 +1010,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) break; case nir_op_u2u8: case nir_op_u2u16: + case nir_op_u2ump: case nir_op_u2u32: case nir_op_u2u64: if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type)) @@ -971,6 +1020,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) break; case nir_op_i2i8: case nir_op_i2i16: + case nir_op_i2imp: case nir_op_i2i32: case nir_op_i2i64: if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type)) @@ -1436,12 +1486,14 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa); + assert(instr->dest.is_ssa); return ac_build_buffer_load_format(&ctx->ac, args->resource, args->coords[0], ctx->ac.i32_0, util_last_bit(mask), - 0, true); + 0, true, + instr->dest.ssa.bit_size == 16); } args->opcode = ac_image_sample; @@ -1470,7 +1522,8 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, break; case nir_texop_tg4: args->opcode = ac_image_gather4; - args->level_zero = true; + if (!args->lod && !args->bias) + args->level_zero = true; break; case nir_texop_lod: args->opcode = ac_image_get_lod; @@ -1573,13 +1626,13 @@ static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx, if (instr->dest.ssa.bit_size == 8) { unsigned load_dwords = instr->dest.ssa.num_components > 1 ? 2 : 1; - LLVMTypeRef vec_type = LLVMVectorType(LLVMInt8TypeInContext(ctx->ac.context), 4 * load_dwords); + LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i8, 4 * load_dwords); ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type); LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, ""); LLVMValueRef params[3]; if (load_dwords > 1) { - LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(ctx->ac.i32, 2), ""); + LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.v2i32, ""); params[0] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 1, false), ""); params[1] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 0, false), ""); } else { @@ -1592,11 +1645,11 @@ static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx, res = LLVMBuildTrunc(ctx->ac.builder, res, LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.num_components * 8), ""); if (instr->dest.ssa.num_components > 1) - res = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(LLVMInt8TypeInContext(ctx->ac.context), instr->dest.ssa.num_components), ""); + res = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(ctx->ac.i8, instr->dest.ssa.num_components), ""); return res; } else if (instr->dest.ssa.bit_size == 16) { unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1; - LLVMTypeRef vec_type = LLVMVectorType(LLVMInt16TypeInContext(ctx->ac.context), 2 * load_dwords); + LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i16, 2 * load_dwords); ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type); LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, ""); res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, ""); @@ -2194,7 +2247,7 @@ static LLVMValueRef load_tess_varyings(struct ac_nir_context *ctx, LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa); LLVMTypeRef src_component_type; - if (LLVMGetTypeKind(dest_type) == LLVMFixedVectorTypeKind) + if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind) src_component_type = LLVMGetElementType(dest_type); else src_component_type = dest_type; @@ -2354,7 +2407,7 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, bool split_loads = ctx->ac.chip_class == GFX6 && elem_size_bytes < 4; if (stride != natural_stride || split_loads) { - if (LLVMGetTypeKind(result_type) == LLVMFixedVectorTypeKind) + if (LLVMGetTypeKind(result_type) == LLVMVectorTypeKind) result_type = LLVMGetElementType(result_type); LLVMTypeRef ptr_type = LLVMPointerType(result_type, @@ -2365,6 +2418,9 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i * stride / natural_stride, 0); values[i] = LLVMBuildLoad(ctx->ac.builder, ac_build_gep_ptr(&ctx->ac, address, offset), ""); + + if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) + LLVMSetOrdering(values[i], LLVMAtomicOrderingMonotonic); } return ac_build_gather_values(&ctx->ac, values, instr->dest.ssa.num_components); } else { @@ -2372,6 +2428,9 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, LLVMGetPointerAddressSpace(LLVMTypeOf(address))); address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , ""); LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, ""); + + if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) + LLVMSetOrdering(val, LLVMAtomicOrderingMonotonic); return val; } } @@ -2527,10 +2586,13 @@ visit_store_var(struct ac_nir_context *ctx, val = LLVMBuildBitCast(ctx->ac.builder, val, LLVMGetElementType(LLVMTypeOf(address)), ""); - LLVMBuildStore(ctx->ac.builder, val, address); + LLVMValueRef store = LLVMBuildStore(ctx->ac.builder, val, address); + + if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) + LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic); } else { LLVMTypeRef val_type = LLVMTypeOf(val); - if (LLVMGetTypeKind(LLVMTypeOf(val)) == LLVMFixedVectorTypeKind) + if (LLVMGetTypeKind(LLVMTypeOf(val)) == LLVMVectorTypeKind) val_type = LLVMGetElementType(val_type); LLVMTypeRef ptr_type = LLVMPointerType(val_type, @@ -2547,7 +2609,10 @@ visit_store_var(struct ac_nir_context *ctx, chan); src = LLVMBuildBitCast(ctx->ac.builder, src, LLVMGetElementType(LLVMTypeOf(ptr)), ""); - LLVMBuildStore(ctx->ac.builder, src, ptr); + LLVMValueRef store = LLVMBuildStore(ctx->ac.builder, src, ptr); + + if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) + LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic); } } break; @@ -2750,18 +2815,17 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, LLVMValueRef res; enum glsl_sampler_dim dim; - enum gl_access_qualifier access; + enum gl_access_qualifier access = nir_intrinsic_access(instr); bool is_array; if (bindless) { dim = nir_intrinsic_image_dim(instr); - access = nir_intrinsic_access(instr); is_array = nir_intrinsic_image_array(instr); } else { const nir_deref_instr *image_deref = get_image_deref(instr); const struct glsl_type *type = image_deref->type; const nir_variable *var = nir_deref_instr_get_variable(image_deref); dim = glsl_get_sampler_dim(type); - access = var->data.access; + access |= var->data.access; is_array = glsl_sampler_type_is_array(type); } @@ -2781,11 +2845,13 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, ""); + assert(instr->dest.is_ssa); bool can_speculate = access & ACCESS_CAN_REORDER; res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex, ctx->ac.i32_0, num_channels, args.cache_policy, - can_speculate); + can_speculate, + instr->dest.ssa.bit_size == 16); res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels); res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components); @@ -2802,6 +2868,9 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, args.dmask = 15; args.attributes = AC_FUNC_ATTR_READONLY; + assert(instr->dest.is_ssa); + args.d16 = instr->dest.ssa.bit_size == 16; + res = ac_build_image_opcode(&ctx->ac, &args); } return exit_waterfall(ctx, &wctx, res); @@ -2818,19 +2887,18 @@ static void visit_image_store(struct ac_nir_context *ctx, } enum glsl_sampler_dim dim; - enum gl_access_qualifier access; + enum gl_access_qualifier access = nir_intrinsic_access(instr); bool is_array; if (bindless) { dim = nir_intrinsic_image_dim(instr); - access = nir_intrinsic_access(instr); is_array = nir_intrinsic_image_array(instr); } else { const nir_deref_instr *image_deref = get_image_deref(instr); const struct glsl_type *type = image_deref->type; const nir_variable *var = nir_deref_instr_get_variable(image_deref); dim = glsl_get_sampler_dim(type); - access = var->data.access; + access |= var->data.access; is_array = glsl_sampler_type_is_array(type); } @@ -2856,8 +2924,7 @@ static void visit_image_store(struct ac_nir_context *ctx, ctx->ac.i32_0, ""); ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex, - ctx->ac.i32_0, src_channels, - args.cache_policy); + ctx->ac.i32_0, args.cache_policy); } else { bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0; @@ -2869,6 +2936,7 @@ static void visit_image_store(struct ac_nir_context *ctx, if (!level_zero) args.lod = get_src(ctx, instr->src[4]); args.dmask = 15; + args.d16 = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.data[0])) == 16; ac_build_image_opcode(&ctx->ac, &args); } @@ -2974,16 +3042,6 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, case nir_intrinsic_image_deref_atomic_inc_wrap: { atomic_name = "inc"; atomic_subop = ac_atomic_inc_wrap; - /* ATOMIC_INC instruction does: - * value = (value + 1) % (data + 1) - * but we want: - * value = (value + 1) % data - * So replace 'data' by 'data - 1'. - */ - ctx->ssa_defs[instr->src[3].ssa->index] = - LLVMBuildSub(ctx->ac.builder, - ctx->ssa_defs[instr->src[3].ssa->index], - ctx->ac.i32_1, ""); break; } case nir_intrinsic_bindless_image_atomic_dec_wrap: @@ -3086,6 +3144,7 @@ static LLVMValueRef visit_image_size(struct ac_nir_context *ctx, args.dmask = 0xf; args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false); args.opcode = ac_image_get_resinfo; + assert(nir_src_as_uint(instr->src[1]) == 0); args.lod = ctx->ac.i32_0; args.attributes = AC_FUNC_ATTR_READNONE; @@ -3356,11 +3415,26 @@ static LLVMValueRef visit_var_atomic(struct ac_nir_context *ctx, case nir_intrinsic_deref_atomic_exchange: op = LLVMAtomicRMWBinOpXchg; break; +#if LLVM_VERSION_MAJOR >= 10 + case nir_intrinsic_shared_atomic_fadd: + case nir_intrinsic_deref_atomic_fadd: + op = LLVMAtomicRMWBinOpFAdd; + break; +#endif default: return NULL; } - result = ac_build_atomic_rmw(&ctx->ac, op, ptr, ac_to_integer(&ctx->ac, src), sync_scope); + LLVMValueRef val; + + if (instr->intrinsic == nir_intrinsic_shared_atomic_fadd || + instr->intrinsic == nir_intrinsic_deref_atomic_fadd) { + val = ac_to_float(&ctx->ac, src); + } else { + val = ac_to_integer(&ctx->ac, src); + } + + result = ac_build_atomic_rmw(&ctx->ac, op, ptr, val, sync_scope); } if (ctx->ac.postponed_kill) @@ -3864,7 +3938,8 @@ static void visit_intrinsic(struct ac_nir_context *ctx, result = visit_image_size(ctx, instr, false); break; case nir_intrinsic_shader_clock: - result = ac_build_shader_clock(&ctx->ac); + result = ac_build_shader_clock(&ctx->ac, + nir_intrinsic_memory_scope(instr)); break; case nir_intrinsic_discard: case nir_intrinsic_discard_if: @@ -3881,6 +3956,25 @@ static void visit_intrinsic(struct ac_nir_context *ctx, case nir_intrinsic_memory_barrier_shared: emit_membar(&ctx->ac, instr); break; + case nir_intrinsic_scoped_barrier: { + assert(!(nir_intrinsic_memory_semantics(instr) & + (NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE))); + + nir_variable_mode modes = nir_intrinsic_memory_modes(instr); + + unsigned wait_flags = 0; + if (modes & (nir_var_mem_global | nir_var_mem_ssbo)) + wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE; + if (modes & nir_var_mem_shared) + wait_flags |= AC_WAIT_LGKM; + + if (wait_flags) + ac_build_waitcnt(&ctx->ac, wait_flags); + + if (nir_intrinsic_execution_scope(instr) == NIR_SCOPE_WORKGROUP) + ac_emit_barrier(&ctx->ac, ctx->stage); + break; + } case nir_intrinsic_memory_barrier_tcs_patch: break; case nir_intrinsic_control_barrier: @@ -3895,7 +3989,8 @@ static void visit_intrinsic(struct ac_nir_context *ctx, case nir_intrinsic_shared_atomic_or: case nir_intrinsic_shared_atomic_xor: case nir_intrinsic_shared_atomic_exchange: - case nir_intrinsic_shared_atomic_comp_swap: { + case nir_intrinsic_shared_atomic_comp_swap: + case nir_intrinsic_shared_atomic_fadd: { LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], instr->src[1].ssa->bit_size); result = visit_var_atomic(ctx, instr, ptr, 1); @@ -3910,7 +4005,8 @@ static void visit_intrinsic(struct ac_nir_context *ctx, case nir_intrinsic_deref_atomic_or: case nir_intrinsic_deref_atomic_xor: case nir_intrinsic_deref_atomic_exchange: - case nir_intrinsic_deref_atomic_comp_swap: { + case nir_intrinsic_deref_atomic_comp_swap: + case nir_intrinsic_deref_atomic_fadd: { LLVMValueRef ptr = get_src(ctx, instr->src[0]); result = visit_var_atomic(ctx, instr, ptr, 1); break; @@ -4002,7 +4098,7 @@ static void visit_intrinsic(struct ac_nir_context *ctx, case nir_intrinsic_shuffle: if (ctx->ac.chip_class == GFX8 || ctx->ac.chip_class == GFX9 || - (ctx->ac.chip_class == GFX10 && ctx->ac.wave_size == 32)) { + (ctx->ac.chip_class >= GFX10 && ctx->ac.wave_size == 32)) { result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1])); } else { @@ -4447,8 +4543,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) offset_src = i; break; case nir_tex_src_bias: - if (instr->op == nir_texop_txb) - args.bias = get_src(ctx, instr->src[i].src); + args.bias = get_src(ctx, instr->src[i].src); break; case nir_tex_src_lod: { if (nir_src_is_const(instr->src[i].src) && nir_src_as_uint(instr->src[i].src) == 0) @@ -4732,6 +4827,9 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) } } + assert(instr->dest.is_ssa); + args.d16 = instr->dest.ssa.bit_size == 16; + result = build_tex_intrinsic(ctx, instr, &args); if (instr->op == nir_texop_query_levels) @@ -5022,7 +5120,7 @@ static void visit_deref(struct ac_nir_context *ctx, LLVMTypeRef type = LLVMPointerType(pointee_type, address_space); if (LLVMTypeOf(result) != type) { - if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMFixedVectorTypeKind) { + if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind) { result = LLVMBuildBitCast(ctx->ac.builder, result, type, ""); } else { @@ -5186,7 +5284,7 @@ setup_locals(struct ac_nir_context *ctx, { int i, j; ctx->num_locals = 0; - nir_foreach_variable(variable, &func->impl->locals) { + nir_foreach_function_temp_variable(variable, func->impl) { unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); variable->data.driver_location = ctx->num_locals * 4; variable->data.location_frac = 0; @@ -5286,7 +5384,7 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder)); - nir_foreach_variable(variable, &nir->outputs) + nir_foreach_shader_out_variable(variable, nir) ac_handle_shader_output_decl(&ctx.ac, ctx.abi, nir, variable, ctx.stage);