ac/nir: Fix store_scratch with a non-full writemask
authorConnor Abbott <cwabbott0@gmail.com>
Fri, 16 Aug 2019 10:46:27 +0000 (12:46 +0200)
committerConnor Abbott <cwabbott0@gmail.com>
Sun, 18 Aug 2019 13:15:45 +0000 (15:15 +0200)
By adding one more helper to ac_llvm_build, we can also easily keep
vector stores together.

Fixes the
tests/spec/glsl-1.30/execution/fs-large-local-array-vec4.shader_test
piglit test.

Fixes: 74470baebbd ("ac/nir: Lower large indirect variables to scratch")
Reviewed-by: Marek Olšák <marek.olsak@amd.com
src/amd/common/ac_llvm_build.c
src/amd/common/ac_llvm_build.h
src/amd/common/ac_nir_to_llvm.c

index 24970769b87472d5c542d5f6adbdd56d1b2c06e8..823bf34acdbcf9b67fa3ff511698bb5af09eb94a 100644 (file)
@@ -626,6 +626,22 @@ ac_build_expand(struct ac_llvm_context *ctx,
        return ac_build_gather_values(ctx, chan, dst_channels);
 }
 
+/* Extract components [start, start + channels) from a vector.
+ */
+LLVMValueRef
+ac_extract_components(struct ac_llvm_context *ctx,
+                     LLVMValueRef value,
+                     unsigned start,
+                     unsigned channels)
+{
+       LLVMValueRef chan[channels];
+
+       for (unsigned i = 0; i < channels; i++)
+               chan[i] = ac_llvm_extract_elem(ctx, value, i + start);
+
+       return ac_build_gather_values(ctx, chan, channels);
+}
+
 /* Expand a scalar or vector to <4 x type> by filling the remaining channels
  * with undef. Extract at most num_channels components from the input.
  */
index 082201fb048fd6af60ac3e382fcb6b8b2d3ddd43..6848a7ca082f6cf733be6079363bbfd15b0ffd7b 100644 (file)
@@ -190,6 +190,13 @@ LLVMValueRef
 ac_build_gather_values(struct ac_llvm_context *ctx,
                       LLVMValueRef *values,
                       unsigned value_count);
+
+LLVMValueRef
+ac_extract_components(struct ac_llvm_context *ctx,
+                     LLVMValueRef value,
+                     unsigned start,
+                     unsigned channels);
+
 LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
                                     LLVMValueRef value,
                                     unsigned num_channels);
index 9b59c82f385fa80d70d34e817e9e542e733417b6..b981d4cc897d68227c62d8eab0cd5e7fc780d250 100644 (file)
@@ -3637,13 +3637,27 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
                                                 offset);
                LLVMTypeRef comp_type =
                        LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size);
-               LLVMTypeRef vec_type =
-                       instr->src[0].ssa->num_components == 1 ? comp_type :
-                       LLVMVectorType(comp_type, instr->src[0].ssa->num_components);
                unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
                ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
-                                      LLVMPointerType(vec_type, addr_space), "");
-               LLVMBuildStore(ctx->ac.builder, get_src(ctx, instr->src[0]), ptr);
+                                      LLVMPointerType(comp_type, addr_space), "");
+               LLVMValueRef src = get_src(ctx, instr->src[0]);
+               unsigned wrmask = nir_intrinsic_write_mask(instr);
+               while (wrmask) {
+                       int start, count;
+                       u_bit_scan_consecutive_range(&wrmask, &start, &count);
+                       
+                       LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, start, false);
+                       LLVMValueRef offset_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &offset, 1, "");
+                       LLVMTypeRef vec_type =
+                               count == 1 ? comp_type : LLVMVectorType(comp_type, count);
+                       offset_ptr = LLVMBuildBitCast(ctx->ac.builder,
+                                                     offset_ptr,
+                                                     LLVMPointerType(vec_type, addr_space),
+                                                     "");
+                       LLVMValueRef offset_src =
+                               ac_extract_components(&ctx->ac, src, start, count);
+                       LLVMBuildStore(ctx->ac.builder, offset_src, offset_ptr);
+               }
                break;
        }
        default: