ac/llvm: add better code for fsign

[mesa.git] / src / amd / llvm / ac_nir_to_llvm.c
diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c

index 7643326fde50ec855039b1f8e36c02fea558c5c1..4b696f28f124eb774a15483da9c1d4eb84dad17e 100644 (file)
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -216,6 +216,35 @@ static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx,
         return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE);
  }
  
+static LLVMValueRef emit_intrin_1f_param_scalar(struct ac_llvm_context *ctx,
+                                               const char *intrin,
+                                               LLVMTypeRef result_type,
+                                               LLVMValueRef src0)
+{
+       if (LLVMGetTypeKind(result_type) != LLVMVectorTypeKind)
+               return emit_intrin_1f_param(ctx, intrin, result_type, src0);
+
+       LLVMTypeRef elem_type = LLVMGetElementType(result_type);
+       LLVMValueRef ret = LLVMGetUndef(result_type);
+
+       /* Scalarize the intrinsic, because vectors are not supported. */
+       for (unsigned i = 0; i < LLVMGetVectorSize(result_type); i++) {
+               char name[64], type[64];
+               LLVMValueRef params[] = {
+                       ac_to_float(ctx, ac_llvm_extract_elem(ctx, src0, i)),
+               };
+
+               ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
+               ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
+               assert(length < sizeof(name));
+               ret = LLVMBuildInsertElement(ctx->builder, ret,
+                                            ac_build_intrinsic(ctx, name, elem_type, params,
+                                                               1, AC_FUNC_ATTR_READNONE),
+                                            LLVMConstInt(ctx->i32, i, 0), "");
+       }
+       return ret;
+}
+
  static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx,
                                        const char *intrin,
                                        LLVMTypeRef result_type,
@@ -257,8 +286,6 @@ static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx,
         LLVMTypeRef src1_type = LLVMTypeOf(src1);
         LLVMTypeRef src2_type = LLVMTypeOf(src2);
  
-       assert(LLVMGetTypeKind(LLVMTypeOf(src0)) != LLVMVectorTypeKind);
-
         if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
             LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) {
                 src2 = LLVMBuildIntToPtr(ctx->builder, src2, src1_type, "");
@@ -268,7 +295,7 @@ static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx,
         }
  
         LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
-                                      ctx->i32_0, "");
+                                      LLVMConstNull(LLVMTypeOf(src0)), "");
         return LLVMBuildSelect(ctx->builder, v,
                                ac_to_integer_or_pointer(ctx, src1),
                                ac_to_integer_or_pointer(ctx, src2), "");
@@ -601,10 +628,6 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
         unsigned num_components = instr->dest.dest.ssa.num_components;
         unsigned src_components;
         LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
-       bool saved_inexact = false;
-
-       if (instr->exact)
-               saved_inexact = ac_disable_inexact_math(ctx->ac.builder);
  
         assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
         switch (instr->op) {
@@ -678,17 +701,6 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
         case nir_op_umod:
                 result = LLVMBuildURem(ctx->ac.builder, src[0], src[1], "");
                 break;
-       case nir_op_fmod:
-               /* lower_fmod only lower 16-bit and 32-bit fmod */
-               assert(instr->dest.dest.ssa.bit_size == 64);
-               src[0] = ac_to_float(&ctx->ac, src[0]);
-               src[1] = ac_to_float(&ctx->ac, src[1]);
-               result = ac_build_fdiv(&ctx->ac, src[0], src[1]);
-               result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
-                                             ac_to_float_type(&ctx->ac, def_type), result);
-               result = LLVMBuildFMul(ctx->ac.builder, src[1] , result, "");
-               result = LLVMBuildFSub(ctx->ac.builder, src[0], result, "");
-               break;
         case nir_op_irem:
                 result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
                 break;
@@ -710,9 +722,12 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
                         result = LLVMBuildFDiv(ctx->ac.builder, ctx->ac.f64_1,
                                                ac_to_float(&ctx->ac, src[0]), "");
                 } else {
-                       result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rcp",
-                                                     ac_to_float_type(&ctx->ac, def_type), src[0]);
+                       result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.rcp",
+                                                            ac_to_float_type(&ctx->ac, def_type), src[0]);
                 }
+               if (ctx->abi->clamp_div_by_zero)
+                       result = ac_build_fmin(&ctx->ac, result,
+                                              LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));
                 break;
         case nir_op_iand:
                 result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
@@ -771,7 +786,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
         case nir_op_feq32:
                 result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]);
                 break;
-       case nir_op_fne32:
+       case nir_op_fneu32:
                 result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
                 break;
         case nir_op_flt32:
@@ -807,13 +822,11 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
                 result = ac_build_umin(&ctx->ac, src[0], src[1]);
                 break;
         case nir_op_isign:
-               result = ac_build_isign(&ctx->ac, src[0],
-                                       instr->dest.dest.ssa.bit_size);
+               result = ac_build_isign(&ctx->ac, src[0]);
                 break;
         case nir_op_fsign:
                 src[0] = ac_to_float(&ctx->ac, src[0]);
-               result = ac_build_fsign(&ctx->ac, src[0],
-                                       instr->dest.dest.ssa.bit_size);
+               result = ac_build_fsign(&ctx->ac, src[0]);
                 break;
         case nir_op_ffloor:
                 result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
@@ -832,9 +845,8 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
                                               ac_to_float_type(&ctx->ac, def_type),src[0]);
                 break;
         case nir_op_ffract:
-               src[0] = ac_to_float(&ctx->ac, src[0]);
-               result = ac_build_fract(&ctx->ac, src[0],
-                                       instr->dest.dest.ssa.bit_size);
+               result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.fract",
+                                                    ac_to_float_type(&ctx->ac, def_type), src[0]);
                 break;
         case nir_op_fsin:
                 result = emit_intrin_1f_param(&ctx->ac, "llvm.sin",
@@ -857,8 +869,11 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
                                               ac_to_float_type(&ctx->ac, def_type), src[0]);
                 break;
         case nir_op_frsq:
-               result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rsq",
-                                             ac_to_float_type(&ctx->ac, def_type), src[0]);
+               result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.rsq",
+                                                    ac_to_float_type(&ctx->ac, def_type), src[0]);
+               if (ctx->abi->clamp_div_by_zero)
+                       result = ac_build_fmin(&ctx->ac, result,
+                                              LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));
                 break;
         case nir_op_frexp_exp:
                 src[0] = ac_to_float(&ctx->ac, src[0]);
@@ -900,7 +915,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
         case nir_op_ffma:
                 /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
                 result = emit_intrin_3f_param(&ctx->ac, ctx->ac.chip_class >= GFX10 ? "llvm.fma" : "llvm.fmuladd",
-                                             ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
+                                             ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
                 break;
         case nir_op_ldexp:
                 src[0] = ac_to_float(&ctx->ac, src[0]);
@@ -1174,57 +1189,6 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
                 break;
         }
  
-       case nir_op_fmin3:
-               result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
-                                               ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
-               result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
-                                               ac_to_float_type(&ctx->ac, def_type), result, src[2]);
-               break;
-       case nir_op_umin3:
-               result = ac_build_umin(&ctx->ac, src[0], src[1]);
-               result = ac_build_umin(&ctx->ac, result, src[2]);
-               break;
-       case nir_op_imin3:
-               result = ac_build_imin(&ctx->ac, src[0], src[1]);
-               result = ac_build_imin(&ctx->ac, result, src[2]);
-               break;
-       case nir_op_fmax3:
-               result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
-                                               ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
-               result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
-                                               ac_to_float_type(&ctx->ac, def_type), result, src[2]);
-               break;
-       case nir_op_umax3:
-               result = ac_build_umax(&ctx->ac, src[0], src[1]);
-               result = ac_build_umax(&ctx->ac, result, src[2]);
-               break;
-       case nir_op_imax3:
-               result = ac_build_imax(&ctx->ac, src[0], src[1]);
-               result = ac_build_imax(&ctx->ac, result, src[2]);
-               break;
-       case nir_op_fmed3: {
-               src[0] = ac_to_float(&ctx->ac, src[0]);
-               src[1] = ac_to_float(&ctx->ac, src[1]);
-               src[2] = ac_to_float(&ctx->ac, src[2]);
-               result = ac_build_fmed3(&ctx->ac, src[0], src[1], src[2],
-                                       instr->dest.dest.ssa.bit_size);
-               break;
-       }
-       case nir_op_imed3: {
-               LLVMValueRef tmp1 = ac_build_imin(&ctx->ac, src[0], src[1]);
-               LLVMValueRef tmp2 = ac_build_imax(&ctx->ac, src[0], src[1]);
-               tmp2 = ac_build_imin(&ctx->ac, tmp2, src[2]);
-               result = ac_build_imax(&ctx->ac, tmp1, tmp2);
-               break;
-       }
-       case nir_op_umed3: {
-               LLVMValueRef tmp1 = ac_build_umin(&ctx->ac, src[0], src[1]);
-               LLVMValueRef tmp2 = ac_build_umax(&ctx->ac, src[0], src[1]);
-               tmp2 = ac_build_umin(&ctx->ac, tmp2, src[2]);
-               result = ac_build_umax(&ctx->ac, tmp1, tmp2);
-               break;
-       }
-
         default:
                 fprintf(stderr, "Unknown NIR alu instr: ");
                 nir_print_instr(&instr->instr, stderr);
@@ -1237,9 +1201,6 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
                 result = ac_to_integer_or_pointer(&ctx->ac, result);
                 ctx->ssa_defs[instr->dest.dest.ssa.index] = result;
         }
-
-       if (instr->exact)
-               ac_restore_inexact_math(ctx->ac.builder, saved_inexact);
  }
  
  static void visit_load_const(struct ac_nir_context *ctx,
@@ -2314,6 +2275,7 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
  
         switch (mode) {
         case nir_var_shader_in:
+               /* TODO: remove this after RADV switches to lowered IO */
                 if (ctx->stage == MESA_SHADER_TESS_CTRL ||
                     ctx->stage == MESA_SHADER_TESS_EVAL) {
                         return load_tess_varyings(ctx, instr, true);
@@ -2369,6 +2331,7 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
                 }
                 break;
         case nir_var_shader_out:
+               /* TODO: remove this after RADV switches to lowered IO */
                 if (ctx->stage == MESA_SHADER_TESS_CTRL) {
                         return load_tess_varyings(ctx, instr, false);
                 }
@@ -2489,7 +2452,7 @@ visit_store_var(struct ac_nir_context *ctx,
  
         switch (deref->mode) {
         case nir_var_shader_out:
-
+               /* TODO: remove this after RADV switches to lowered IO */
                 if (ctx->stage == MESA_SHADER_TESS_CTRL) {
                         LLVMValueRef vertex_index = NULL;
                         LLVMValueRef indir_index = NULL;
@@ -2504,7 +2467,9 @@ visit_store_var(struct ac_nir_context *ctx,
  
                         ctx->abi->store_tcs_outputs(ctx->abi, var,
                                                     vertex_index, indir_index,
-                                                   const_index, src, writemask);
+                                                   const_index, src, writemask,
+                                                   var->data.location_frac,
+                                                   var->data.driver_location);
                         break;
                 }
  
@@ -2626,6 +2591,71 @@ visit_store_var(struct ac_nir_context *ctx,
                 ac_build_endif(&ctx->ac, 7002);
  }
  
+static void
+visit_store_output(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
+{
+       if (ctx->ac.postponed_kill) {
+               LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
+                                                  ctx->ac.postponed_kill, "");
+               ac_build_ifcc(&ctx->ac, cond, 7002);
+       }
+
+       unsigned base = nir_intrinsic_base(instr);
+       unsigned writemask = nir_intrinsic_write_mask(instr);
+       unsigned component = nir_intrinsic_component(instr);
+       LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
+       nir_src offset = *nir_get_io_offset_src(instr);
+       LLVMValueRef indir_index = NULL;
+
+       if (nir_src_is_const(offset))
+               assert(nir_src_as_uint(offset) == 0);
+       else
+               indir_index = get_src(ctx, offset);
+
+       switch (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src))) {
+       case 32:
+               break;
+       case 64:
+               writemask = widen_mask(writemask, 2);
+               src = LLVMBuildBitCast(ctx->ac.builder, src,
+                                      LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2),
+                                      "");
+               break;
+       default:
+               unreachable("unhandled store_output bit size");
+               return;
+       }
+
+       writemask <<= component;
+
+       if (ctx->stage == MESA_SHADER_TESS_CTRL) {
+               nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr);
+               LLVMValueRef vertex_index =
+                               vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
+
+               ctx->abi->store_tcs_outputs(ctx->abi, NULL,
+                                           vertex_index, indir_index,
+                                           0, src, writemask,
+                                           component, base * 4);
+               return;
+       }
+
+       /* No indirect indexing is allowed after this point. */
+       assert(!indir_index);
+
+       for (unsigned chan = 0; chan < 8; chan++) {
+               if (!(writemask & (1 << chan)))
+                       continue;
+
+               LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
+               LLVMBuildStore(ctx->ac.builder, value,
+                              ctx->abi->outputs[base * 4 + chan]);
+       }
+
+       if (ctx->ac.postponed_kill)
+               ac_build_endif(&ctx->ac, 7002);
+}
+
  static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
  {
         switch (dim) {
@@ -3623,18 +3653,82 @@ static LLVMValueRef load_interpolated_input(struct ac_nir_context *ctx,
         return ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, num_components));
  }
  
-static LLVMValueRef load_input(struct ac_nir_context *ctx,
-                              nir_intrinsic_instr *instr)
+static LLVMValueRef visit_load(struct ac_nir_context *ctx,
+                              nir_intrinsic_instr *instr, bool is_output)
  {
-       unsigned offset_idx = instr->intrinsic == nir_intrinsic_load_input ? 0 : 1;
+       LLVMValueRef values[8];
+       LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa);
+       LLVMTypeRef component_type;
+       unsigned base = nir_intrinsic_base(instr);
+       unsigned component = nir_intrinsic_component(instr);
+       unsigned count = instr->dest.ssa.num_components *
+                        (instr->dest.ssa.bit_size == 64 ? 2 : 1);
+       nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr);
+       LLVMValueRef vertex_index =
+               vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
+       nir_src offset = *nir_get_io_offset_src(instr);
+       LLVMValueRef indir_index = NULL;
  
-       /* We only lower inputs for fragment shaders ATM */
-       ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[offset_idx]);
-       assert(offset);
-       assert(offset[0].i32 == 0);
+       if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)
+               component_type = LLVMGetElementType(dest_type);
+       else
+               component_type = dest_type;
  
-       unsigned component = nir_intrinsic_component(instr);
-       unsigned index = nir_intrinsic_base(instr);
+       if (nir_src_is_const(offset))
+               assert(nir_src_as_uint(offset) == 0);
+       else
+               indir_index = get_src(ctx, offset);
+
+       if (ctx->stage == MESA_SHADER_TESS_CTRL ||
+           (ctx->stage == MESA_SHADER_TESS_EVAL && !is_output)) {
+               LLVMValueRef result =
+                       ctx->abi->load_tess_varyings(ctx->abi, component_type,
+                                                    vertex_index, indir_index,
+                                                    0, 0, base * 4,
+                                                    component,
+                                                    instr->num_components,
+                                                    false, false, !is_output);
+               if (instr->dest.ssa.bit_size == 16) {
+                       result = ac_to_integer(&ctx->ac, result);
+                       result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");
+               }
+               return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
+       }
+
+       /* No indirect indexing is allowed after this point. */
+       assert(!indir_index);
+
+       if (ctx->stage == MESA_SHADER_GEOMETRY) {
+               LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
+               assert(nir_src_is_const(*vertex_index_src));
+
+               return ctx->abi->load_inputs(ctx->abi, 0, base * 4, component,
+                                            instr->num_components,
+                                            nir_src_as_uint(*vertex_index_src),
+                                            0, type);
+       }
+
+       if (ctx->stage == MESA_SHADER_FRAGMENT && is_output &&
+           nir_intrinsic_io_semantics(instr).fb_fetch_output)
+               return ctx->abi->emit_fbfetch(ctx->abi);
+
+       /* Other non-fragment cases have inputs and outputs in temporaries. */
+       if (ctx->stage != MESA_SHADER_FRAGMENT) {
+               for (unsigned chan = component; chan < count + component; chan++) {
+                       if (is_output) {
+                               values[chan] = LLVMBuildLoad(ctx->ac.builder,
+                                                            ctx->abi->outputs[base * 4 + chan], "");
+                       } else {
+                               values[chan] = ctx->abi->inputs[base * 4 + chan];
+                               if (!values[chan])
+                                       values[chan] = LLVMGetUndef(ctx->ac.i32);
+                       }
+               }
+               LLVMValueRef result = ac_build_varying_gather_values(&ctx->ac, values, count, component);
+               return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
+       }
+
+       /* Fragment shader inputs. */
         unsigned vertex_id = 2; /* P0 */
  
         if (instr->intrinsic == nir_intrinsic_load_input_vertex) {
@@ -3655,18 +3749,11 @@ static LLVMValueRef load_input(struct ac_nir_context *ctx,
                 }
         }
  
-       LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false);
-       LLVMValueRef values[8];
-
-       /* Each component of a 64-bit value takes up two GL-level channels. */
-       unsigned num_components = instr->dest.ssa.num_components;
-       unsigned bit_size = instr->dest.ssa.bit_size;
-       unsigned channels =
-               bit_size == 64 ? num_components * 2 : num_components;
+       LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, base, false);
  
-       for (unsigned chan = 0; chan < channels; chan++) {
+       for (unsigned chan = 0; chan < count; chan++) {
                 if (component + chan > 4)
-                       attr_number = LLVMConstInt(ctx->ac.i32, index + 1, false);
+                       attr_number = LLVMConstInt(ctx->ac.i32, base + 1, false);
                 LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, (component + chan) % 4, false);
                 values[chan] = ac_build_fs_interp_mov(&ctx->ac,
                                                       LLVMConstInt(ctx->ac.i32, vertex_id, false),
@@ -3675,16 +3762,12 @@ static LLVMValueRef load_input(struct ac_nir_context *ctx,
                                                       ac_get_arg(&ctx->ac, ctx->args->prim_mask));
                 values[chan] = LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i32, "");
                 values[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, values[chan],
-                                                      bit_size == 16 ? ctx->ac.i16 : ctx->ac.i32, "");
+                                                      instr->dest.ssa.bit_size == 16 ? ctx->ac.i16
+                                                                                     : ctx->ac.i32, "");
         }
  
-       LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, channels);
-       if (bit_size == 64) {
-               LLVMTypeRef type = num_components == 1 ? ctx->ac.i64 :
-                       LLVMVectorType(ctx->ac.i64, num_components);
-               result = LLVMBuildBitCast(ctx->ac.builder, result, type, "");
-       }
-       return result;
+       LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, count);
+       return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
  }
  
  static void visit_intrinsic(struct ac_nir_context *ctx,
@@ -3881,6 +3964,19 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
         case nir_intrinsic_store_deref:
                 visit_store_var(ctx, instr);
                 break;
+       case nir_intrinsic_load_input:
+       case nir_intrinsic_load_input_vertex:
+       case nir_intrinsic_load_per_vertex_input:
+               result = visit_load(ctx, instr, false);
+               break;
+       case nir_intrinsic_load_output:
+       case nir_intrinsic_load_per_vertex_output:
+               result = visit_load(ctx, instr, true);
+               break;
+       case nir_intrinsic_store_output:
+       case nir_intrinsic_store_per_vertex_output:
+               visit_store_output(ctx, instr);
+               break;
         case nir_intrinsic_load_shared:
                 result = visit_load_shared(ctx, instr);
                 break;
@@ -4048,10 +4144,6 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
                                                  instr->dest.ssa.bit_size);
                 break;
         }
-       case nir_intrinsic_load_input:
-       case nir_intrinsic_load_input_vertex:
-               result = load_input(ctx, instr);
-               break;
         case nir_intrinsic_emit_vertex:
                 ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs);
                 break;
@@ -5081,7 +5173,7 @@ static void visit_deref(struct ac_nir_context *ctx,
                 break;
         case nir_deref_type_ptr_as_array:
                 if (instr->mode == nir_var_mem_global) {
-                       unsigned stride = nir_deref_instr_ptr_as_array_stride(instr);
+                       unsigned stride = nir_deref_instr_array_stride(instr);
  
                         LLVMValueRef index = get_src(ctx, instr->arr.index);
                         if (LLVMTypeOf(index) != ctx->ac.i64)
@@ -5384,9 +5476,13 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
  
         ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
  
-       nir_foreach_shader_out_variable(variable, nir)
-               ac_handle_shader_output_decl(&ctx.ac, ctx.abi, nir, variable,
-                                            ctx.stage);
+       /* TODO: remove this after RADV switches to lowered IO */
+       if (!nir->info.io_lowered) {
+               nir_foreach_shader_out_variable(variable, nir) {
+                       ac_handle_shader_output_decl(&ctx.ac, ctx.abi, nir, variable,
+                                                    ctx.stage);
+               }
+       }
  
         ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
                                            _mesa_key_pointer_equal);
@@ -5481,33 +5577,26 @@ ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class)
          */
         indirect_mask |= nir_var_function_temp;
  
-       progress |= nir_lower_indirect_derefs(nir, indirect_mask);
+       progress |= nir_lower_indirect_derefs(nir, indirect_mask, UINT32_MAX);
         return progress;
  }
  
  static unsigned
  get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)
  {
-       if (intrin->intrinsic != nir_intrinsic_store_deref)
+       if (intrin->intrinsic != nir_intrinsic_store_output)
                 return 0;
  
-       nir_variable *var =
-               nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[0]));
+       unsigned writemask = nir_intrinsic_write_mask(intrin) <<
+                            nir_intrinsic_component(intrin);
+       unsigned location = nir_intrinsic_io_semantics(intrin).location;
  
-       if (var->data.mode != nir_var_shader_out)
-               return 0;
+       if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
+               return writemask << 4;
+       else if (location == VARYING_SLOT_TESS_LEVEL_INNER)
+               return writemask;
  
-       unsigned writemask = 0;
-       const int location = var->data.location;
-       unsigned first_component = var->data.location_frac;
-       unsigned num_comps = intrin->dest.ssa.num_components;
-
-       if (location == VARYING_SLOT_TESS_LEVEL_INNER)
-               writemask = ((1 << (num_comps + 1)) - 1) << first_component;
-       else if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
-               writemask = (((1 << (num_comps + 1)) - 1) << first_component) << 4;
-
-       return writemask;
+       return 0;
  }
  
  static void