ac/nir: use llvm.amdgcn.rsq for nir_op_frsq
[mesa.git] / src / amd / llvm / ac_nir_to_llvm.c
index fec68c469137bfce31eac300f0e51a76595eda36..d1c333ac73d252004c6a54a003cf9ce3394f77e5 100644 (file)
@@ -688,8 +688,8 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
                result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
                break;
        case nir_op_frcp:
-               src[0] = ac_to_float(&ctx->ac, src[0]);
-               result = ac_build_fdiv(&ctx->ac, LLVMConstReal(LLVMTypeOf(src[0]), 1.0), src[0]);
+               result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rcp",
+                                             ac_to_float_type(&ctx->ac, def_type), src[0]);
                break;
        case nir_op_iand:
                result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
@@ -834,9 +834,8 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
                                              ac_to_float_type(&ctx->ac, def_type), src[0]);
                break;
        case nir_op_frsq:
-               result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
-                                             ac_to_float_type(&ctx->ac, def_type), src[0]);
-               result = ac_build_fdiv(&ctx->ac, LLVMConstReal(LLVMTypeOf(result), 1.0), result);
+               result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rsq",
+                                             ac_to_float_type(&ctx->ac, def_type), src[0]);
                break;
        case nir_op_frexp_exp:
                src[0] = ac_to_float(&ctx->ac, src[0]);
@@ -2706,7 +2705,11 @@ static LLVMValueRef enter_waterfall_image(struct ac_nir_context *ctx,
                                          struct waterfall_context *wctx,
                                          const nir_intrinsic_instr *instr)
 {
-       nir_deref_instr *deref_instr = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
+       nir_deref_instr *deref_instr = NULL;
+
+       if (instr->src[0].ssa->parent_instr->type == nir_instr_type_deref)
+               deref_instr = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
+
        LLVMValueRef value = get_sampler_desc_index(ctx, deref_instr, &instr->instr, true);
        return enter_waterfall(ctx, wctx, value, nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
 }
@@ -3946,8 +3949,33 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
                break;
        }
        case nir_intrinsic_shuffle:
-               result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]),
-                               get_src(ctx, instr->src[1]));
+               if (ctx->ac.chip_class == GFX8 ||
+                   ctx->ac.chip_class == GFX9 ||
+                   (ctx->ac.chip_class == GFX10 && ctx->ac.wave_size == 32)) {
+                       result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]),
+                                                 get_src(ctx, instr->src[1]));
+               } else {
+                       LLVMValueRef src = get_src(ctx, instr->src[0]);
+                       LLVMValueRef index = get_src(ctx, instr->src[1]);
+                       LLVMTypeRef type = LLVMTypeOf(src);
+                       struct waterfall_context wctx;
+                       LLVMValueRef index_val;
+
+                       index_val = enter_waterfall(ctx, &wctx, index, true);
+
+                       src = LLVMBuildZExt(ctx->ac.builder, src,
+                                           ctx->ac.i32, "");
+
+                       result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane",
+                                                   ctx->ac.i32,
+                                                   (LLVMValueRef []) { src, index_val }, 2,
+                                                   AC_FUNC_ATTR_READNONE |
+                                                   AC_FUNC_ATTR_CONVERGENT);
+
+                       result = LLVMBuildTrunc(ctx->ac.builder, result, type, "");
+
+                       result = exit_waterfall(ctx, &wctx, result);
+               }
                break;
        case nir_intrinsic_reduce:
                result = ac_build_reduce(&ctx->ac,
@@ -4282,20 +4310,18 @@ static void tex_fetch_ptrs(struct ac_nir_context *ctx,
                }
        }
 
+       LLVMValueRef texture_dynamic_index = get_sampler_desc_index(ctx, texture_deref_instr,
+                                                                   &instr->instr, false);
        if (!sampler_deref_instr)
                sampler_deref_instr = texture_deref_instr;
 
-       LLVMValueRef texture_dynamic_index = NULL, sampler_dynamic_index = NULL;
-       if (texture_deref_instr) {
-               texture_dynamic_index = get_sampler_desc_index(ctx, texture_deref_instr, &instr->instr, false);
-               texture_dynamic_index = enter_waterfall(ctx, wctx + 0, texture_dynamic_index, instr->texture_non_uniform);
-       }
+        LLVMValueRef sampler_dynamic_index = get_sampler_desc_index(ctx, sampler_deref_instr,
+                                                                   &instr->instr, false);
+       if (instr->texture_non_uniform)
+               texture_dynamic_index = enter_waterfall(ctx, wctx + 0, texture_dynamic_index, true);
 
-       if (sampler_deref_instr && sampler_deref_instr != texture_deref_instr) {
-               sampler_dynamic_index = get_sampler_desc_index(ctx, sampler_deref_instr, &instr->instr, false);
-               sampler_dynamic_index = enter_waterfall(ctx, wctx + 1, sampler_dynamic_index, instr->sampler_non_uniform);
-       } else
-               sampler_dynamic_index = texture_dynamic_index;
+       if (instr->sampler_non_uniform)
+               sampler_dynamic_index = enter_waterfall(ctx, wctx + 1, sampler_dynamic_index, true);
 
        enum ac_descriptor_type main_descriptor = instr->sampler_dim  == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE;