radeonsi/nir: always lower ballot masks as 64-bit, codegen handles it
[mesa.git] / src / amd / common / ac_nir_to_llvm.c
index 5e25e838f8f3f78c5d7a1590768f95bde4d0a828..d97387ef13d275daafdf9d87eb645e1b792a73c2 100644 (file)
@@ -1549,6 +1549,9 @@ static unsigned get_cache_policy(struct ac_nir_context *ctx,
                cache_policy |= ac_glc;
        }
 
+       if (access & ACCESS_STREAM_CACHE_POLICY)
+               cache_policy |= ac_slc;
+
        return cache_policy;
 }
 
@@ -2438,7 +2441,7 @@ static void get_image_coords(struct ac_nir_context *ctx,
                                                               fmask_load_address[2],
                                                               sample_index,
                                                               get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr),
-                                                                               AC_DESC_FMASK, &instr->instr, false, false));
+                                                                               AC_DESC_FMASK, &instr->instr, true, false));
        }
        if (count == 1 && !gfx9_1d) {
                if (instr->src[1].ssa->num_components)
@@ -2677,6 +2680,27 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
                atomic_name = "cmpswap";
                atomic_subop = 0; /* not used */
                break;
+       case nir_intrinsic_bindless_image_atomic_inc_wrap:
+       case nir_intrinsic_image_deref_atomic_inc_wrap: {
+               atomic_name = "inc";
+               atomic_subop = ac_atomic_inc_wrap;
+               /* ATOMIC_INC instruction does:
+                *      value = (value + 1) % (data + 1)
+                * but we want:
+                *      value = (value + 1) % data
+                * So replace 'data' by 'data - 1'.
+                */
+               ctx->ssa_defs[instr->src[3].ssa->index] =
+                       LLVMBuildSub(ctx->ac.builder,
+                                    ctx->ssa_defs[instr->src[3].ssa->index],
+                                    ctx->ac.i32_1, "");
+               break;
+       }
+       case nir_intrinsic_bindless_image_atomic_dec_wrap:
+       case nir_intrinsic_image_deref_atomic_dec_wrap:
+               atomic_name = "dec";
+               atomic_subop = ac_atomic_dec_wrap;
+               break;
        default:
                abort();
        }
@@ -3080,6 +3104,9 @@ static LLVMValueRef barycentric_at_sample(struct ac_nir_context *ctx,
                                          unsigned mode,
                                          LLVMValueRef sample_id)
 {
+       if (ctx->abi->interp_at_sample_force_center)
+               return barycentric_center(ctx, mode);
+
        LLVMValueRef halfval = LLVMConstReal(ctx->ac.f32, 0.5f);
 
        /* fetch sample ID */
@@ -3178,6 +3205,8 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
        switch (instr->intrinsic) {
        case nir_intrinsic_ballot:
                result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
+               if (ctx->ac.ballot_mask_bits > ctx->ac.wave_size)
+                       result = LLVMBuildZExt(ctx->ac.builder, result, ctx->ac.iN_ballotmask, "");
                break;
        case nir_intrinsic_read_invocation:
                result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]),
@@ -3286,6 +3315,10 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
        case nir_intrinsic_load_color1:
                result = ctx->abi->color1;
                break;
+       case nir_intrinsic_load_user_data_amd:
+               assert(LLVMTypeOf(ctx->abi->user_data) == ctx->ac.v4i32);
+               result = ctx->abi->user_data;
+               break;
        case nir_intrinsic_load_instance_id:
                result = ctx->abi->instance_id;
                break;
@@ -3381,6 +3414,8 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
        case nir_intrinsic_bindless_image_atomic_xor:
        case nir_intrinsic_bindless_image_atomic_exchange:
        case nir_intrinsic_bindless_image_atomic_comp_swap:
+       case nir_intrinsic_bindless_image_atomic_inc_wrap:
+       case nir_intrinsic_bindless_image_atomic_dec_wrap:
                result = visit_image_atomic(ctx, instr, true);
                break;
        case nir_intrinsic_image_deref_atomic_add:
@@ -3391,6 +3426,8 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
        case nir_intrinsic_image_deref_atomic_xor:
        case nir_intrinsic_image_deref_atomic_exchange:
        case nir_intrinsic_image_deref_atomic_comp_swap:
+       case nir_intrinsic_image_deref_atomic_inc_wrap:
+       case nir_intrinsic_image_deref_atomic_dec_wrap:
                result = visit_image_atomic(ctx, instr, false);
                break;
        case nir_intrinsic_bindless_image_size:
@@ -3502,10 +3539,16 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
                result = ctx->abi->load_tess_coord(ctx->abi);
                break;
        case nir_intrinsic_load_tess_level_outer:
-               result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER);
+               result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, false);
                break;
        case nir_intrinsic_load_tess_level_inner:
-               result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER);
+               result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, false);
+               break;
+       case nir_intrinsic_load_tess_level_outer_default:
+               result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, true);
+               break;
+       case nir_intrinsic_load_tess_level_inner_default:
+               result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, true);
                break;
        case nir_intrinsic_load_patch_vertices_in:
                result = ctx->abi->load_patch_vertices_in(ctx->abi);
@@ -3596,13 +3639,27 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
                                                 offset);
                LLVMTypeRef comp_type =
                        LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size);
-               LLVMTypeRef vec_type =
-                       instr->src[0].ssa->num_components == 1 ? comp_type :
-                       LLVMVectorType(comp_type, instr->src[0].ssa->num_components);
                unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
                ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
-                                      LLVMPointerType(vec_type, addr_space), "");
-               LLVMBuildStore(ctx->ac.builder, get_src(ctx, instr->src[0]), ptr);
+                                      LLVMPointerType(comp_type, addr_space), "");
+               LLVMValueRef src = get_src(ctx, instr->src[0]);
+               unsigned wrmask = nir_intrinsic_write_mask(instr);
+               while (wrmask) {
+                       int start, count;
+                       u_bit_scan_consecutive_range(&wrmask, &start, &count);
+                       
+                       LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, start, false);
+                       LLVMValueRef offset_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &offset, 1, "");
+                       LLVMTypeRef vec_type =
+                               count == 1 ? comp_type : LLVMVectorType(comp_type, count);
+                       offset_ptr = LLVMBuildBitCast(ctx->ac.builder,
+                                                     offset_ptr,
+                                                     LLVMPointerType(vec_type, addr_space),
+                                                     "");
+                       LLVMValueRef offset_src =
+                               ac_extract_components(&ctx->ac, src, start, count);
+                       LLVMBuildStore(ctx->ac.builder, offset_src, offset_ptr);
+               }
                break;
        }
        default: