gallivm: allow to pass two swizzles into fetches.
authorDave Airlie <airlied@redhat.com>
Mon, 27 Aug 2018 01:03:41 +0000 (02:03 +0100)
committerDave Airlie <airlied@redhat.com>
Wed, 29 Aug 2018 23:15:40 +0000 (00:15 +0100)
This hijacks the top 16-bits of swizzle, to pass in the swizzle
for the second channel.

This fixes handling .yx swizzles of 64-bit values.

This should fixup radeonsi and llvmpipe.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107524
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c

index 64d2cd703be7fc805b2a39d54749103b2ce200c9..2c3be8fb1275ae91f3f4c84e20bd57f7248af904 100644 (file)
@@ -353,6 +353,15 @@ lp_build_emit_fetch_src(
          assert(0 && "invalid swizzle in emit_fetch()");
          return bld_base->base.undef;
       }
+      if (tgsi_type_is_64bit(stype)) {
+        unsigned swizzle2;
+        swizzle2 = tgsi_util_get_full_src_register_swizzle(reg, chan_index + 1);
+        if (swizzle2 > 3) {
+           assert(0 && "invalid swizzle in emit_fetch()");
+           return bld_base->base.undef;
+        }
+        swizzle |= (swizzle2 << 16);
+      }
    }
 
    assert(reg->Register.Index <= bld_base->info->file_max[reg->Register.File]);
index 83d7dbea9a21113f127114a4de45d6b370778397..79ece639e35539eeb897a36530126c663dfa73e6 100644 (file)
@@ -1190,7 +1190,7 @@ emit_fetch_constant(
    struct lp_build_tgsi_context * bld_base,
    const struct tgsi_full_src_register * reg,
    enum tgsi_opcode_type stype,
-   unsigned swizzle)
+   unsigned swizzle_in)
 {
    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
    struct gallivm_state *gallivm = bld_base->base.gallivm;
@@ -1200,6 +1200,7 @@ emit_fetch_constant(
    LLVMValueRef consts_ptr;
    LLVMValueRef num_consts;
    LLVMValueRef res;
+   unsigned swizzle = swizzle_in & 0xffff;
 
    /* XXX: Handle fetching xyzw components as a vector */
    assert(swizzle != ~0u);
@@ -1241,7 +1242,7 @@ emit_fetch_constant(
 
       if (tgsi_type_is_64bit(stype)) {
          LLVMValueRef swizzle_vec2;
-         swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle + 1);
+         swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle_in >> 16);
          index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
          index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
       }
@@ -1256,21 +1257,42 @@ emit_fetch_constant(
 
       scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
                                 &index, 1, "");
-      if (stype == TGSI_TYPE_DOUBLE) {
-         LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
-         scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
-         bld_broad = &bld_base->dbl_bld;
-      } else if (stype == TGSI_TYPE_UNSIGNED64) {
-         LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
-         scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
-         bld_broad = &bld_base->uint64_bld;
-      } else if (stype == TGSI_TYPE_SIGNED64) {
-         LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
-         scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
-         bld_broad = &bld_base->int64_bld;
+
+      if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) {
+
+         LLVMValueRef scalar2, scalar2_ptr;
+         LLVMValueRef shuffles[2];
+         index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + (swizzle_in >> 16));
+
+         scalar2_ptr = LLVMBuildGEP(builder, consts_ptr,
+                                    &index, 1, "");
+
+         scalar = LLVMBuildLoad(builder, scalar_ptr, "");
+         scalar2 = LLVMBuildLoad(builder, scalar2_ptr, "");
+         shuffles[0] = lp_build_const_int32(gallivm, 0);
+         shuffles[1] = lp_build_const_int32(gallivm, 1);
+
+         res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
+         res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], "");
+         res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], "");
+      } else {
+        if (stype == TGSI_TYPE_DOUBLE) {
+           LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
+           scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
+           bld_broad = &bld_base->dbl_bld;
+        } else if (stype == TGSI_TYPE_UNSIGNED64) {
+           LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
+           scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
+           bld_broad = &bld_base->uint64_bld;
+        } else if (stype == TGSI_TYPE_SIGNED64) {
+           LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
+           scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
+           bld_broad = &bld_base->int64_bld;
+        }
+        scalar = LLVMBuildLoad(builder, scalar_ptr, "");
+        res = lp_build_broadcast_scalar(bld_broad, scalar);
       }
-      scalar = LLVMBuildLoad(builder, scalar_ptr, "");
-      res = lp_build_broadcast_scalar(bld_broad, scalar);
+
    }
 
    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
@@ -1319,12 +1341,13 @@ emit_fetch_immediate(
    struct lp_build_tgsi_context * bld_base,
    const struct tgsi_full_src_register * reg,
    enum tgsi_opcode_type stype,
-   unsigned swizzle)
+   unsigned swizzle_in)
 {
    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
    LLVMBuilderRef builder = gallivm->builder;
    LLVMValueRef res = NULL;
+   unsigned swizzle = swizzle_in & 0xffff;
 
    if (bld->use_immediates_array || reg->Register.Indirect) {
       LLVMValueRef imms_array;
@@ -1355,7 +1378,7 @@ emit_fetch_immediate(
          if (tgsi_type_is_64bit(stype))
             index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
                                               indirect_index,
-                                              swizzle + 1,
+                                              swizzle_in >> 16,
                                               FALSE);
          /* Gather values from the immediate register array */
          res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
@@ -1371,7 +1394,7 @@ emit_fetch_immediate(
             LLVMValueRef imms_ptr2;
             LLVMValueRef res2;
             gep[1] = lp_build_const_int32(gallivm,
-                                          reg->Register.Index * 4 + swizzle + 1);
+                                          reg->Register.Index * 4 + (swizzle_in >> 16));
             imms_ptr2 = LLVMBuildGEP(builder,
                                      bld->imms_array, gep, 2, "");
             res2 = LLVMBuildLoad(builder, imms_ptr2, "");
@@ -1382,7 +1405,7 @@ emit_fetch_immediate(
    else {
       res = bld->immediates[reg->Register.Index][swizzle];
       if (tgsi_type_is_64bit(stype))
-         res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle + 1]);
+         res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle_in >> 16]);
    }
 
    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
@@ -1397,12 +1420,13 @@ emit_fetch_input(
    struct lp_build_tgsi_context * bld_base,
    const struct tgsi_full_src_register * reg,
    enum tgsi_opcode_type stype,
-   unsigned swizzle)
+   unsigned swizzle_in)
 {
    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
    LLVMBuilderRef builder = gallivm->builder;
    LLVMValueRef res;
+   unsigned swizzle = swizzle_in & 0xffff;
 
    if (reg->Register.Indirect) {
       LLVMValueRef indirect_index;
@@ -1423,7 +1447,7 @@ emit_fetch_input(
       if (tgsi_type_is_64bit(stype)) {
          index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
                                            indirect_index,
-                                           swizzle + 1,
+                                           swizzle_in >> 16,
                                            TRUE);
       }
       /* cast inputs_array pointer to float* */
@@ -1446,7 +1470,7 @@ emit_fetch_input(
             LLVMValueRef res2;
 
             lindex1 = lp_build_const_int32(gallivm,
-                                           reg->Register.Index * 4 + swizzle + 1);
+                                           reg->Register.Index * 4 + (swizzle_in >> 16));
             input_ptr2 = LLVMBuildGEP(builder,
                                       bld->inputs_array, &lindex1, 1, "");
             res2 = LLVMBuildLoad(builder, input_ptr2, "");
@@ -1456,7 +1480,7 @@ emit_fetch_input(
       else {
          res = bld->inputs[reg->Register.Index][swizzle];
          if (tgsi_type_is_64bit(stype))
-            res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle + 1]);
+            res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle_in >> 16]);
       }
    }
 
@@ -1476,7 +1500,7 @@ emit_fetch_gs_input(
    struct lp_build_tgsi_context * bld_base,
    const struct tgsi_full_src_register * reg,
    enum tgsi_opcode_type stype,
-   unsigned swizzle)
+   unsigned swizzle_in)
 {
    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
@@ -1484,6 +1508,7 @@ emit_fetch_gs_input(
    LLVMBuilderRef builder = gallivm->builder;
    LLVMValueRef attrib_index = NULL;
    LLVMValueRef vertex_index = NULL;
+   unsigned swizzle = swizzle_in & 0xffff;
    LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
    LLVMValueRef res;
 
@@ -1525,7 +1550,7 @@ emit_fetch_gs_input(
 
    assert(res);
    if (tgsi_type_is_64bit(stype)) {
-      LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle + 1);
+      LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
       LLVMValueRef res2;
       res2 = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
                                         reg->Dimension.Indirect,
@@ -1549,12 +1574,13 @@ emit_fetch_temporary(
    struct lp_build_tgsi_context * bld_base,
    const struct tgsi_full_src_register * reg,
    enum tgsi_opcode_type stype,
-   unsigned swizzle)
+   unsigned swizzle_in)
 {
    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
    LLVMBuilderRef builder = gallivm->builder;
    LLVMValueRef res;
+   unsigned swizzle = swizzle_in & 0xffff;
 
    if (reg->Register.Indirect) {
       LLVMValueRef indirect_index;
@@ -1574,7 +1600,7 @@ emit_fetch_temporary(
       if (tgsi_type_is_64bit(stype)) {
                index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
                                                   indirect_index,
-                                                  swizzle + 1,
+                                                  swizzle_in >> 16,
                                                   TRUE);
       }
 
@@ -1593,7 +1619,7 @@ emit_fetch_temporary(
       if (tgsi_type_is_64bit(stype)) {
          LLVMValueRef temp_ptr2, res2;
 
-         temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle + 1);
+         temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle_in >> 16);
          res2 = LLVMBuildLoad(builder, temp_ptr2, "");
          res = emit_fetch_64bit(bld_base, stype, res, res2);
       }
@@ -1616,7 +1642,7 @@ emit_fetch_system_value(
    struct lp_build_tgsi_context * bld_base,
    const struct tgsi_full_src_register * reg,
    enum tgsi_opcode_type stype,
-   unsigned swizzle)
+   unsigned swizzle_in)
 {
    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
index c51e91b1d3d58b12b8e62496fc422cdc426a594b..d8930bfd50ef7fbe3adb62ff52525f477a00e6a3 100644 (file)
@@ -2397,16 +2397,17 @@ static LLVMValueRef fetch_constant(
        struct lp_build_tgsi_context *bld_base,
        const struct tgsi_full_src_register *reg,
        enum tgsi_opcode_type type,
-       unsigned swizzle)
+       unsigned swizzle_in)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct si_shader_selector *sel = ctx->shader->selector;
        const struct tgsi_ind_register *ireg = &reg->Indirect;
        unsigned buf, idx;
+       unsigned swizzle = swizzle_in & 0xffff;
 
        LLVMValueRef addr, bufp;
 
-       if (swizzle == LP_CHAN_ALL) {
+       if (swizzle_in == LP_CHAN_ALL) {
                unsigned chan;
                LLVMValueRef values[4];
                for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
@@ -2420,7 +2421,7 @@ static LLVMValueRef fetch_constant(
                LLVMValueRef lo, hi;
 
                lo = fetch_constant(bld_base, reg, TGSI_TYPE_UNSIGNED, swizzle);
-               hi = fetch_constant(bld_base, reg, TGSI_TYPE_UNSIGNED, swizzle + 1);
+               hi = fetch_constant(bld_base, reg, TGSI_TYPE_UNSIGNED, (swizzle_in >> 16));
                return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
                                                lo, hi);
        }
index 20164939cb762d5ea39468d503d4774920c3ffe8..d48eda1b100deabf546d1e5fdbc4c624c9fe896a 100644 (file)
@@ -445,13 +445,14 @@ get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index,
 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
                                const struct tgsi_full_src_register *reg,
                                enum tgsi_opcode_type type,
-                               unsigned swizzle)
+                               unsigned swizzle_in)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        LLVMBuilderRef builder = ctx->ac.builder;
        LLVMValueRef result = NULL, ptr, ptr2;
+       unsigned swizzle = swizzle_in & 0xffff;
 
-       if (swizzle == ~0) {
+       if (swizzle_in == ~0) {
                LLVMValueRef values[TGSI_NUM_CHANNELS];
                unsigned chan;
                for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
@@ -476,7 +477,7 @@ LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
                                                        ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
                                                        ctx->i32_0);
                        result = LLVMConstInsertElement(result,
-                                                       ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1],
+                                                       ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + (swizzle_in >> 16)],
                                                        ctx->i32_1);
                        return LLVMConstBitCast(result, ctype);
                } else {
@@ -503,7 +504,7 @@ LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
 
                if (tgsi_type_is_64bit(type)) {
                        ptr = result;
-                       ptr2 = input[swizzle + 1];
+                       ptr2 = input[swizzle_in >> 16];
                        return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
                                                        ptr, ptr2);
                }
@@ -515,7 +516,7 @@ LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
                        return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
                ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
                if (tgsi_type_is_64bit(type)) {
-                       ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
+                       ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + (swizzle_in >> 16)];
                        return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
                                                        LLVMBuildLoad(builder, ptr, ""),
                                                        LLVMBuildLoad(builder, ptr2, ""));
@@ -526,7 +527,7 @@ LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
        case TGSI_FILE_OUTPUT:
                ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle);
                if (tgsi_type_is_64bit(type)) {
-                       ptr2 = get_output_ptr(bld_base, reg->Register.Index, swizzle + 1);
+                       ptr2 = get_output_ptr(bld_base, reg->Register.Index, (swizzle_in >> 16));
                        return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
                                                        LLVMBuildLoad(builder, ptr, ""),
                                                        LLVMBuildLoad(builder, ptr2, ""));
@@ -544,11 +545,12 @@ LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
                                       const struct tgsi_full_src_register *reg,
                                       enum tgsi_opcode_type type,
-                                      unsigned swizzle)
+                                      unsigned swizzle_in)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        LLVMBuilderRef builder = ctx->ac.builder;
        LLVMValueRef cval = ctx->system_values[reg->Register.Index];
+       unsigned swizzle = swizzle_in & 0xffff;
 
        if (tgsi_type_is_64bit(type)) {
                LLVMValueRef lo, hi;
@@ -558,7 +560,7 @@ static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
                lo = LLVMBuildExtractElement(
                        builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
                hi = LLVMBuildExtractElement(
-                       builder, cval, LLVMConstInt(ctx->i32, swizzle + 1, 0), "");
+                       builder, cval, LLVMConstInt(ctx->i32, (swizzle_in >> 16), 0), "");
 
                return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
                                                lo, hi);