struct ac_image_args *args,
nir_tex_instr *instr)
{
+ enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
LLVMValueRef coord = args->addr;
LLVMValueRef half_texel[2];
+ LLVMValueRef compare_cube_wa;
+ LLVMValueRef result;
int c;
unsigned coord_vgpr_index = (unsigned)args->offset + (unsigned)args->compare;
}
}
+ LLVMValueRef orig_coords = args->addr;
+
for (c = 0; c < 2; c++) {
LLVMValueRef tmp;
LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, "");
}
+
+ /*
+ * Apparantly cube has issue with integer types that the workaround doesn't solve,
+ * so this tests if the format is 8_8_8_8 and an integer type do an alternate
+ * workaround by sampling using a scaled type and converting.
+ * This is taken from amdgpu-pro shaders.
+ */
+ /* NOTE this produces some ugly code compared to amdgpu-pro,
+ * LLVM ends up dumping SGPRs into VGPRs to deal with the compare/select,
+ * and then reads them back. -pro generates two selects,
+ * one s_cmp for the descriptor rewriting
+ * one v_cmp for the coordinate and result changes.
+ */
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+ LLVMValueRef tmp, tmp2;
+
+ /* workaround 8/8/8/8 uint/sint cube gather bug */
+ /* first detect it then change to a scaled read and f2i */
+ tmp = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32one, "");
+ tmp2 = tmp;
+
+ /* extract the DATA_FORMAT */
+ tmp = ac_build_bfe(&ctx->ac, tmp, LLVMConstInt(ctx->i32, 20, false),
+ LLVMConstInt(ctx->i32, 6, false), false);
+
+ /* is the DATA_FORMAT == 8_8_8_8 */
+ compare_cube_wa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tmp, LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false), "");
+
+ if (stype == GLSL_TYPE_UINT)
+ /* Create a NUM FORMAT - 0x2 or 0x4 - USCALED or UINT */
+ tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0x8000000, false),
+ LLVMConstInt(ctx->i32, 0x10000000, false), "");
+ else
+ /* Create a NUM FORMAT - 0x3 or 0x5 - SSCALED or SINT */
+ tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0xc000000, false),
+ LLVMConstInt(ctx->i32, 0x14000000, false), "");
+
+ /* replace the NUM FORMAT in the descriptor */
+ tmp2 = LLVMBuildAnd(ctx->builder, tmp2, LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT, false), "");
+ tmp2 = LLVMBuildOr(ctx->builder, tmp2, tmp, "");
+
+ args->resource = LLVMBuildInsertElement(ctx->builder, args->resource, tmp2, ctx->i32one, "");
+
+ /* don't modify the coordinates for this case */
+ coord = LLVMBuildSelect(ctx->builder, compare_cube_wa, orig_coords, coord, "");
+ }
args->addr = coord;
- return ac_build_image_opcode(&ctx->ac, args);
+ result = ac_build_image_opcode(&ctx->ac, args);
+
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+ LLVMValueRef tmp, tmp2;
+
+ /* if the cube workaround is in place, f2i the result. */
+ for (c = 0; c < 4; c++) {
+ tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), "");
+ if (stype == GLSL_TYPE_UINT)
+ tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, "");
+ else
+ tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, "");
+ tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
+ tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, "");
+ tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, tmp2, tmp, "");
+ tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
+ result = LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), "");
+ }
+ }
+ return result;
}
static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx,