radv/ac: gather4 cube workaround integer

author Dave Airlie <airlied@redhat.com>

Mon, 27 Feb 2017 01:30:41 +0000 (11:30 +1000)

committer Dave Airlie <airlied@redhat.com>

Tue, 14 Mar 2017 23:51:53 +0000 (09:51 +1000)
author Dave Airlie <airlied@redhat.com>
Mon, 27 Feb 2017 01:30:41 +0000 (11:30 +1000)
committer Dave Airlie <airlied@redhat.com>
Tue, 14 Mar 2017 23:51:53 +0000 (09:51 +1000)
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c

index 58f512ea99724e9edcc8e2090b21f7f0eb97847d..01346c35f7173625e3719c82534240a091ca26be 100644 (file)
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1636,8 +1636,11 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
                                                struct ac_image_args *args,
                                                nir_tex_instr *instr)
  {
+       enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
         LLVMValueRef coord = args->addr;
         LLVMValueRef half_texel[2];
+       LLVMValueRef compare_cube_wa;
+       LLVMValueRef result;
         int c;
         unsigned coord_vgpr_index = (unsigned)args->offset + (unsigned)args->compare;
  
@@ -1662,6 +1665,8 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
                 }
         }
  
+       LLVMValueRef orig_coords = args->addr;
+
         for (c = 0; c < 2; c++) {
                 LLVMValueRef tmp;
                 LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
@@ -1672,8 +1677,73 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
                 coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, "");
         }
  
+
+       /*
+        * Apparantly cube has issue with integer types that the workaround doesn't solve,
+        * so this tests if the format is 8_8_8_8 and an integer type do an alternate
+        * workaround by sampling using a scaled type and converting.
+        * This is taken from amdgpu-pro shaders.
+        */
+       /* NOTE this produces some ugly code compared to amdgpu-pro,
+        * LLVM ends up dumping SGPRs into VGPRs to deal with the compare/select,
+        * and then reads them back. -pro generates two selects,
+        * one s_cmp for the descriptor rewriting
+        * one v_cmp for the coordinate and result changes.
+        */
+       if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+               LLVMValueRef tmp, tmp2;
+
+               /* workaround 8/8/8/8 uint/sint cube gather bug */
+               /* first detect it then change to a scaled read and f2i */
+               tmp = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32one, "");
+               tmp2 = tmp;
+
+               /* extract the DATA_FORMAT */
+               tmp = ac_build_bfe(&ctx->ac, tmp, LLVMConstInt(ctx->i32, 20, false),
+                                  LLVMConstInt(ctx->i32, 6, false), false);
+
+               /* is the DATA_FORMAT == 8_8_8_8 */
+               compare_cube_wa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tmp, LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false), "");
+
+               if (stype == GLSL_TYPE_UINT)
+                       /* Create a NUM FORMAT - 0x2 or 0x4 - USCALED or UINT */
+                       tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0x8000000, false),
+                                             LLVMConstInt(ctx->i32, 0x10000000, false), "");
+               else
+                       /* Create a NUM FORMAT - 0x3 or 0x5 - SSCALED or SINT */
+                       tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0xc000000, false),
+                                             LLVMConstInt(ctx->i32, 0x14000000, false), "");
+
+               /* replace the NUM FORMAT in the descriptor */
+               tmp2 = LLVMBuildAnd(ctx->builder, tmp2, LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT, false), "");
+               tmp2 = LLVMBuildOr(ctx->builder, tmp2, tmp, "");
+
+               args->resource = LLVMBuildInsertElement(ctx->builder, args->resource, tmp2, ctx->i32one, "");
+
+               /* don't modify the coordinates for this case */
+               coord = LLVMBuildSelect(ctx->builder, compare_cube_wa, orig_coords, coord, "");
+       }
         args->addr = coord;
-       return ac_build_image_opcode(&ctx->ac, args);
+       result = ac_build_image_opcode(&ctx->ac, args);
+
+       if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+               LLVMValueRef tmp, tmp2;
+
+               /* if the cube workaround is in place, f2i the result. */
+               for (c = 0; c < 4; c++) {
+                       tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), "");
+                       if (stype == GLSL_TYPE_UINT)
+                               tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, "");
+                       else
+                               tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, "");
+                       tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
+                       tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, "");
+                       tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, tmp2, tmp, "");
+                       tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
+                       result = LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), "");
+               }
+       }
+       return result;
  }
  
  static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx,
author	Dave Airlie <airlied@redhat.com>
	Mon, 27 Feb 2017 01:30:41 +0000 (11:30 +1000)
committer	Dave Airlie <airlied@redhat.com>
	Tue, 14 Mar 2017 23:51:53 +0000 (09:51 +1000)