ac: replace llvm.SI.tbuffer.store with llvm.amdgcn.buffer.store if ADD_TID=0
authorMarek Olšák <marek.olsak@amd.com>
Fri, 24 Feb 2017 01:09:47 +0000 (02:09 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Fri, 3 Mar 2017 14:29:30 +0000 (15:29 +0100)
ADD_TID doesn't work. Needs more investigation.

v2: remove leftover dead code

Reviewed-by: Dave Airlie <airlied@redhat.com> (v1)
src/amd/common/ac_llvm_build.c
src/amd/common/ac_llvm_build.h
src/amd/common/ac_nir_to_llvm.c
src/gallium/drivers/radeonsi/si_shader.c

index 08fedc7bf415677410f2bd78993e07cbce3979b8..9435b189de4d4b603f19d8809f60f4c94714de38 100644 (file)
@@ -551,8 +551,64 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
                            LLVMValueRef soffset,
                            unsigned inst_offset,
                            bool glc,
-                           bool slc)
+                           bool slc,
+                           bool writeonly_memory,
+                           bool has_add_tid)
 {
+       /* TODO: Fix stores with ADD_TID and remove the "has_add_tid" flag. */
+       if (HAVE_LLVM >= 0x0309 && !has_add_tid) {
+               /* Split 3 channel stores, becase LLVM doesn't support 3-channel
+                * intrinsics. */
+               if (num_channels == 3) {
+                       LLVMValueRef v[3], v01;
+
+                       for (int i = 0; i < 3; i++) {
+                               v[i] = LLVMBuildExtractElement(ctx->builder, vdata,
+                                               LLVMConstInt(ctx->i32, i, 0), "");
+                       }
+                       v01 = ac_build_gather_values(ctx, v, 2);
+
+                       ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset,
+                                                   soffset, inst_offset, glc, slc,
+                                                   writeonly_memory, has_add_tid);
+                       ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset,
+                                                   soffset, inst_offset + 8,
+                                                   glc, slc,
+                                                   writeonly_memory, has_add_tid);
+                       return;
+               }
+
+               unsigned func = CLAMP(num_channels, 1, 3) - 1;
+               static const char *types[] = {"f32", "v2f32", "v4f32"};
+               char name[256];
+               LLVMValueRef offset = soffset;
+
+               if (inst_offset)
+                       offset = LLVMBuildAdd(ctx->builder, offset,
+                                             LLVMConstInt(ctx->i32, inst_offset, 0), "");
+               if (voffset)
+                       offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
+
+               LLVMValueRef args[] = {
+                       bitcast_to_float(ctx, vdata),
+                       LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
+                       LLVMConstInt(ctx->i32, 0, 0),
+                       offset,
+                       LLVMConstInt(ctx->i1, glc, 0),
+                       LLVMConstInt(ctx->i1, slc, 0),
+               };
+
+               snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.%s",
+                        types[func]);
+
+               ac_emit_llvm_intrinsic(ctx, name, ctx->voidt,
+                                      args, ARRAY_SIZE(args),
+                                      writeonly_memory ?
+                                       AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY :
+                                       AC_FUNC_ATTR_WRITEONLY);
+               return;
+       }
+
        static unsigned dfmt[] = {
                V_008F0C_BUF_DATA_FORMAT_32,
                V_008F0C_BUF_DATA_FORMAT_32_32,
index 78df441b6dc215f39b053622f8bfc38d06a1bf08..aa99e92e2567c8067895a9f3332b8d24b092c71b 100644 (file)
@@ -130,7 +130,9 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
                            LLVMValueRef soffset,
                            unsigned inst_offset,
                            bool glc,
-                           bool slc);
+                           bool slc,
+                           bool writeonly_memory,
+                           bool has_add_tid);
 LLVMValueRef
 ac_build_buffer_load(struct ac_llvm_context *ctx,
                     LLVMValueRef rsrc,
index c3634701dadb25a6014d72cdf59cac13d7df9aab..2c9ef4916bacb459ba6fbe9a308517f3d81346cb 100644 (file)
@@ -3159,7 +3159,7 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx,
                        ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring,
                                                    out_val, 1,
                                                    voffset, ctx->gs2vs_offset, 0,
-                                                   1, 1);
+                                                   1, 1, true, true);
                }
                idx += slot_inc;
        }
@@ -4675,7 +4675,7 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx)
                                               out_val, 1,
                                               NULL, ctx->es2gs_offset,
                                               (4 * param_index + j + start) * 4,
-                                              1, 1);
+                                              1, 1, true, true);
                }
        }
        ctx->shader_info->vs.esgs_itemsize = (max_output_written + 1) * 16;
index c9dab8066cd7cdee37cd078365453a0dd7f37086..9538304562d583a566d0ed9d31ed71cce481dd52 100644 (file)
@@ -1049,7 +1049,7 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
                if (inst->Dst[0].Register.WriteMask != 0xF && !is_tess_factor) {
                        ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
                                                    buf_addr, base,
-                                                   4 * chan_index, 1, 0);
+                                                   4 * chan_index, 1, 0, true, false);
                }
        }
 
@@ -1057,7 +1057,7 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
                LLVMValueRef value = lp_build_gather_values(bld_base->base.gallivm,
                                                            values, 4);
                ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buf_addr,
-                                           base, 0, 1, 0);
+                                           base, 0, 1, 0, true, false);
        }
 }
 
@@ -2087,7 +2087,7 @@ static void emit_streamout_output(struct si_shader_context *ctx,
                                    vdata, num_comps,
                                    so_write_offsets[buf_idx],
                                    LLVMConstInt(ctx->i32, 0, 0),
-                                   stream_out->dst_offset * 4, 1, 1);
+                                   stream_out->dst_offset * 4, 1, 1, true, false);
 }
 
 /**
@@ -2412,7 +2412,7 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
                                              lds_ptr);
 
                ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr,
-                                           buffer_offset, 0, 1, 0);
+                                           buffer_offset, 0, 1, 0, true, false);
        }
 }
 
@@ -2527,18 +2527,18 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
        ac_build_buffer_store_dword(&ctx->ac, buffer,
                                    lp_build_const_int32(gallivm, 0x80000000),
                                    1, lp_build_const_int32(gallivm, 0), tf_base,
-                                   0, 1, 0);
+                                   0, 1, 0, true, false);
 
        lp_build_endif(&inner_if_ctx);
 
        /* Store the tessellation factors. */
        ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
                                    MIN2(stride, 4), byteoffset, tf_base,
-                                   4, 1, 0);
+                                   4, 1, 0, true, false);
        if (vec1)
                ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
                                            stride - 4, byteoffset, tf_base,
-                                           20, 1, 0);
+                                           20, 1, 0, true, false);
 
        /* Store the tess factors into the offchip buffer if TES reads them. */
        if (shader->key.part.tcs.epilog.tes_reads_tess_factors) {
@@ -2560,7 +2560,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
 
                ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec,
                                            outer_comps, tf_outer_offset,
-                                           base, 0, 1, 0);
+                                           base, 0, 1, 0, true, false);
                if (inner_comps) {
                        param_inner = si_shader_io_get_unique_index(
                                              TGSI_SEMANTIC_TESSINNER, 0);
@@ -2571,7 +2571,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
                                    lp_build_gather_values(gallivm, inner, inner_comps);
                        ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec,
                                                    inner_comps, tf_inner_offset,
-                                                   base, 0, 1, 0);
+                                                   base, 0, 1, 0, true, false);
                }
        }
 
@@ -2695,7 +2695,7 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context *bld_base)
                                                    ctx->esgs_ring,
                                                    out_val, 1, NULL, soffset,
                                                    (4 * param_index + chan) * 4,
-                                                   1, 1);
+                                                   1, 1, true, true);
                }
        }
 }
@@ -5063,7 +5063,7 @@ static void si_llvm_emit_vertex(
                                                    ctx->gsvs_ring[stream],
                                                    out_val, 1,
                                                    voffset, soffset, 0,
-                                                   1, 1);
+                                                   1, 1, true, true);
                }
        }