radeonsi: fix GLSL textureGrad(samplerCube*) functions
authorMarek Olšák <marek.olsak@amd.com>
Thu, 23 Jul 2015 22:54:08 +0000 (00:54 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Sat, 25 Jul 2015 08:38:14 +0000 (10:38 +0200)
+4 piglits

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
src/gallium/drivers/radeon/radeon_llvm.h
src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
src/gallium/drivers/radeonsi/si_shader.c

index ef09ead9774c3f2b1cc74d8b1cd25d85a6229bd6..950a51beff3514a65c38fe748a135abe02454782 100644 (file)
@@ -173,8 +173,9 @@ static inline LLVMValueRef bitcast(
 
 
 void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context * bld_base,
-                                          struct lp_build_emit_data * emit_data,
-                                          LLVMValueRef *coords_arg);
+                                         struct lp_build_emit_data * emit_data,
+                                         LLVMValueRef *coords_arg,
+                                         LLVMValueRef *derivs_arg);
 
 void radeon_llvm_context_init(struct radeon_llvm_context * ctx);
 
index 39bbdb67778b3e1c72bd06e9a3eeefc96391837a..2362979cf875a59d2e7c371cf04c63754e02a78e 100644 (file)
@@ -748,32 +748,24 @@ static void kil_emit(
        }
 }
 
-void radeon_llvm_emit_prepare_cube_coords(
-               struct lp_build_tgsi_context * bld_base,
-               struct lp_build_emit_data * emit_data,
-               LLVMValueRef *coords_arg)
+static void radeon_llvm_cube_to_2d_coords(struct lp_build_tgsi_context *bld_base,
+                                         LLVMValueRef *in, LLVMValueRef *out)
 {
-
-       unsigned target = emit_data->inst->Texture.Texture;
-       unsigned opcode = emit_data->inst->Instruction.Opcode;
        struct gallivm_state * gallivm = bld_base->base.gallivm;
        LLVMBuilderRef builder = gallivm->builder;
        LLVMTypeRef type = bld_base->base.elem_type;
        LLVMValueRef coords[4];
        LLVMValueRef mad_args[3];
-       LLVMValueRef idx;
-       struct LLVMOpaqueValue *cube_vec;
-       LLVMValueRef v;
+       LLVMValueRef v, cube_vec;
        unsigned i;
 
-       cube_vec = lp_build_gather_values(bld_base->base.gallivm, coords_arg, 4);
+       cube_vec = lp_build_gather_values(bld_base->base.gallivm, in, 4);
        v = build_intrinsic(builder, "llvm.AMDGPU.cube", LLVMVectorType(type, 4),
                             &cube_vec, 1, LLVMReadNoneAttribute);
 
-       for (i = 0; i < 4; ++i) {
-               idx = lp_build_const_int32(gallivm, i);
-               coords[i] = LLVMBuildExtractElement(builder, v, idx, "");
-       }
+       for (i = 0; i < 4; ++i)
+               coords[i] = LLVMBuildExtractElement(builder, v,
+                                                   lp_build_const_int32(gallivm, i), "");
 
        coords[2] = build_intrinsic(builder, "fabs",
                        type, &coords[2], 1, LLVMReadNoneAttribute);
@@ -791,10 +783,60 @@ void radeon_llvm_emit_prepare_cube_coords(
                        mad_args[0], mad_args[1], mad_args[2]);
 
        /* apply xyz = yxw swizzle to cooords */
-       coords[2] = coords[3];
-       coords[3] = coords[1];
-       coords[1] = coords[0];
-       coords[0] = coords[3];
+       out[0] = coords[1];
+       out[1] = coords[0];
+       out[2] = coords[3];
+}
+
+void radeon_llvm_emit_prepare_cube_coords(
+               struct lp_build_tgsi_context * bld_base,
+               struct lp_build_emit_data * emit_data,
+               LLVMValueRef *coords_arg,
+               LLVMValueRef *derivs_arg)
+{
+
+       unsigned target = emit_data->inst->Texture.Texture;
+       unsigned opcode = emit_data->inst->Instruction.Opcode;
+       struct gallivm_state * gallivm = bld_base->base.gallivm;
+       LLVMBuilderRef builder = gallivm->builder;
+       LLVMValueRef coords[4];
+       unsigned i;
+
+       radeon_llvm_cube_to_2d_coords(bld_base, coords_arg, coords);
+
+       if (opcode == TGSI_OPCODE_TXD && derivs_arg) {
+               LLVMValueRef derivs[4];
+               int axis;
+
+               /* Convert cube derivatives to 2D derivatives. */
+               for (axis = 0; axis < 2; axis++) {
+                       LLVMValueRef shifted_cube_coords[4], shifted_coords[4];
+
+                       /* Shift the cube coordinates by the derivatives to get
+                        * the cube coordinates of the "neighboring pixel".
+                        */
+                       for (i = 0; i < 3; i++)
+                               shifted_cube_coords[i] =
+                                       LLVMBuildFAdd(builder, coords_arg[i],
+                                                     derivs_arg[axis*3+i], "");
+                       shifted_cube_coords[3] = LLVMGetUndef(bld_base->base.elem_type);
+
+                       /* Project the shifted cube coordinates onto the face. */
+                       radeon_llvm_cube_to_2d_coords(bld_base, shifted_cube_coords,
+                                                     shifted_coords);
+
+                       /* Subtract both sets of 2D coordinates to get 2D derivatives.
+                        * This won't work if the shifted coordinates ended up
+                        * in a different face.
+                        */
+                       for (i = 0; i < 2; i++)
+                               derivs[axis * 2 + i] =
+                                       LLVMBuildFSub(builder, shifted_coords[i],
+                                                     coords[i], "");
+               }
+
+               memcpy(derivs_arg, derivs, sizeof(derivs));
+       }
 
        if (target == TGSI_TEXTURE_CUBE_ARRAY ||
            target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
@@ -864,7 +906,7 @@ static void txp_fetch_args(
             inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
            inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
            inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
-               radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
+               radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, NULL);
        }
 
        emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
@@ -909,7 +951,7 @@ static void tex_fetch_args(
             inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
            inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
            inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
-               radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
+               radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, NULL);
        }
 
        emit_data->arg_count = 1;
index 81f7bdb3472c1e09fbc449506968d7da49342319..bc0f2171a524c05c2936a40c34e0d9729a1f79d1 100644 (file)
@@ -2255,7 +2255,7 @@ static void tex_fetch_args(
        const struct tgsi_full_instruction * inst = emit_data->inst;
        unsigned opcode = inst->Instruction.Opcode;
        unsigned target = inst->Texture.Texture;
-       LLVMValueRef coords[5];
+       LLVMValueRef coords[5], derivs[6];
        LLVMValueRef address[16];
        int ref_pos;
        unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &ref_pos);
@@ -2263,6 +2263,7 @@ static void tex_fetch_args(
        unsigned chan;
        unsigned sampler_src;
        unsigned sampler_index;
+       unsigned num_deriv_channels = 0;
        bool has_offset = HAVE_LLVM >= 0x0305 ? inst->Texture.NumOffsets > 0 : false;
        LLVMValueRef res_ptr, samp_ptr;
 
@@ -2361,18 +2362,13 @@ static void tex_fetch_args(
                }
        }
 
-       if (target == TGSI_TEXTURE_CUBE ||
-           target == TGSI_TEXTURE_CUBE_ARRAY ||
-           target == TGSI_TEXTURE_SHADOWCUBE ||
-           target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
-               radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
-
        /* Pack user derivatives */
        if (opcode == TGSI_OPCODE_TXD) {
-               int num_deriv_channels, param;
+               int param, num_src_deriv_channels;
 
                switch (target) {
                case TGSI_TEXTURE_3D:
+                       num_src_deriv_channels = 3;
                        num_deriv_channels = 3;
                        break;
                case TGSI_TEXTURE_2D:
@@ -2381,27 +2377,44 @@ static void tex_fetch_args(
                case TGSI_TEXTURE_SHADOWRECT:
                case TGSI_TEXTURE_2D_ARRAY:
                case TGSI_TEXTURE_SHADOW2D_ARRAY:
+                       num_src_deriv_channels = 2;
+                       num_deriv_channels = 2;
+                       break;
                case TGSI_TEXTURE_CUBE:
                case TGSI_TEXTURE_SHADOWCUBE:
                case TGSI_TEXTURE_CUBE_ARRAY:
                case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
+                       /* Cube derivatives will be converted to 2D. */
+                       num_src_deriv_channels = 3;
                        num_deriv_channels = 2;
                        break;
                case TGSI_TEXTURE_1D:
                case TGSI_TEXTURE_SHADOW1D:
                case TGSI_TEXTURE_1D_ARRAY:
                case TGSI_TEXTURE_SHADOW1D_ARRAY:
+                       num_src_deriv_channels = 1;
                        num_deriv_channels = 1;
                        break;
                default:
                        assert(0); /* no other targets are valid here */
                }
 
-               for (param = 1; param <= 2; param++)
-                       for (chan = 0; chan < num_deriv_channels; chan++)
-                               address[count++] = lp_build_emit_fetch(bld_base, inst, param, chan);
+               for (param = 0; param < 2; param++)
+                       for (chan = 0; chan < num_src_deriv_channels; chan++)
+                               derivs[param * num_src_deriv_channels + chan] =
+                                       lp_build_emit_fetch(bld_base, inst, param+1, chan);
        }
 
+       if (target == TGSI_TEXTURE_CUBE ||
+           target == TGSI_TEXTURE_CUBE_ARRAY ||
+           target == TGSI_TEXTURE_SHADOWCUBE ||
+           target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
+               radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, derivs);
+
+       if (opcode == TGSI_OPCODE_TXD)
+               for (int i = 0; i < num_deriv_channels * 2; i++)
+                       address[count++] = derivs[i];
+
        /* Pack texture coordinates */
        address[count++] = coords[0];
        if (num_coords > 1)