radeonsi/nir: Correctly handle double TCS/TES varyings
authorConnor Abbott <cwabbott0@gmail.com>
Wed, 29 May 2019 13:48:06 +0000 (15:48 +0200)
committerConnor Abbott <cwabbott0@gmail.com>
Fri, 31 May 2019 09:02:11 +0000 (11:02 +0200)
ac expands the store to 32-bit components for us, but we still have to
deal with storing up to 8 components, and when a varying is split across
two vec4 slots we have to calculate the address again for the second
slot, since they aren't adjacent in memory. I didn't do this on the ac
level because we should generate better indexing arithmetic for the lds
store, where slots are contiguous.

Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com>
src/gallium/drivers/radeonsi/si_shader.c

index d2927d0254b8bed7b5c74017f6fde8bfb5f258d0..5bd65e0f65c3fe37fd68f9358eb7e1e603e7f3c6 100644 (file)
@@ -1253,8 +1253,20 @@ LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi,
        LLVMValueRef value[4];
        for (unsigned i = 0; i < num_components; i++) {
                unsigned offset = i;
-               if (llvm_type_is_64bit(ctx, type))
+               if (llvm_type_is_64bit(ctx, type)) {
                        offset *= 2;
+                       if (offset == 4) {
+                                addr = get_tcs_tes_buffer_address_from_generic_indices(ctx,
+                                                                                       vertex_index,
+                                                                                       param_index,
+                                                                                       driver_location + 1,
+                                                                                       info->input_semantic_name,
+                                                                                       info->input_semantic_index,
+                                                                                       is_patch);
+                       }
+
+                        offset = offset % 4;
+               }
 
                offset += component;
                value[i + component] = buffer_load(&ctx->bld_base, type, offset,
@@ -1376,7 +1388,7 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
        unsigned driver_location = var->data.driver_location;
        LLVMValueRef dw_addr, stride;
        LLVMValueRef buffer, base, addr;
-       LLVMValueRef values[4];
+       LLVMValueRef values[8];
        bool skip_lds_store;
        bool is_tess_factor = false, is_tess_inner = false;
 
@@ -1438,11 +1450,22 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
                                                               info->output_semantic_index,
                                                               is_patch);
 
-       for (unsigned chan = 0; chan < 4; chan++) {
+       for (unsigned chan = 0; chan < 8; chan++) {
                if (!(writemask & (1 << chan)))
                        continue;
                LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
 
+               unsigned buffer_store_offset = chan % 4;
+               if (chan == 4) {
+                        addr = get_tcs_tes_buffer_address_from_generic_indices(ctx,
+                                                                               vertex_index,
+                                                                               param_index,
+                                                                               driver_location + 1,
+                                                                               info->output_semantic_name,
+                                                                               info->output_semantic_index,
+                                                                               is_patch);
+               }
+
                /* Skip LDS stores if there is no LDS read of this output. */
                if (!skip_lds_store)
                        lds_store(ctx, chan, dw_addr, value);
@@ -1453,7 +1476,8 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
                if (writemask != 0xF && !is_tess_factor) {
                        ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
                                                    addr, base,
-                                                   4 * chan, 1, 0, true, false);
+                                                   4 * buffer_store_offset,
+                                                    1, 0, true, false);
                }
 
                /* Write tess factors into VGPRs for the epilog. */