From 8cf552b1823e6c0ea8ce0465937920b4cab83465 Mon Sep 17 00:00:00 2001
From: Vadim Girlin <vadimgirlin@gmail.com>
Date: Tue, 18 Dec 2012 17:39:19 +0400
Subject: [PATCH] radeon/llvm: improve cube map handling
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Add support for TEX2, TXB2, TXL2, fix SHADOWCUBE

Signed-off-by: Vadim Girlin <vadimgirlin@gmail.com>
Reviewed-by: Michel DÃ¤nzer <michel.daenzer@amd.com>
Tested-by: Michel DÃ¤nzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/r600/r600_llvm.c          |  3 +
 src/gallium/drivers/radeon/radeon_llvm.h      |  3 +-
 .../drivers/radeon/radeon_setup_tgsi_llvm.c   | 86 +++++++++++++++----
 .../drivers/radeonsi/radeonsi_shader.c        | 14 ++-
 4 files changed, 84 insertions(+), 22 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
index 8f1ed26b4c6..17d362c0aaf 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -421,9 +421,12 @@ LLVMModuleRef r600_tgsi_llvm(
 	bld_base->op_actions[TGSI_OPCODE_DDX].emit = llvm_emit_tex;
 	bld_base->op_actions[TGSI_OPCODE_DDY].emit = llvm_emit_tex;
 	bld_base->op_actions[TGSI_OPCODE_TEX].emit = llvm_emit_tex;
+	bld_base->op_actions[TGSI_OPCODE_TEX2].emit = llvm_emit_tex;
 	bld_base->op_actions[TGSI_OPCODE_TXB].emit = llvm_emit_tex;
+	bld_base->op_actions[TGSI_OPCODE_TXB2].emit = llvm_emit_tex;
 	bld_base->op_actions[TGSI_OPCODE_TXD].emit = llvm_emit_tex;
 	bld_base->op_actions[TGSI_OPCODE_TXL].emit = llvm_emit_tex;
+	bld_base->op_actions[TGSI_OPCODE_TXL2].emit = llvm_emit_tex;
 	bld_base->op_actions[TGSI_OPCODE_TXF].emit = llvm_emit_tex;
 	bld_base->op_actions[TGSI_OPCODE_TXQ].emit = llvm_emit_tex;
 	bld_base->op_actions[TGSI_OPCODE_TXP].emit = llvm_emit_tex;
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
index 61f37722f5d..b8dc771226a 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -153,7 +153,8 @@ static inline LLVMValueRef bitcast(
 
 
 void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context * bld_base,
-                                          LLVMValueRef *arg, unsigned target);
+                                          struct lp_build_emit_data * emit_data,
+                                          unsigned coord_arg);
 
 void radeon_llvm_context_init(struct radeon_llvm_context * ctx);
 
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 647b66566bd..d390f927a62 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -513,29 +513,29 @@ static void kil_emit(
 	}
 }
 
-
+/* coord_arg - index of the source coord vector in the emit_data->args array */
 void radeon_llvm_emit_prepare_cube_coords(
 		struct lp_build_tgsi_context * bld_base,
-		LLVMValueRef *arg,
-                unsigned target)
+		struct lp_build_emit_data * emit_data,
+		unsigned coord_arg)
 {
-	boolean shadowcube = (target == TGSI_TEXTURE_SHADOWCUBE);
+
+	unsigned target = emit_data->inst->Texture.Texture;
+	unsigned opcode = emit_data->inst->Instruction.Opcode;
 	struct gallivm_state * gallivm = bld_base->base.gallivm;
 	LLVMBuilderRef builder = gallivm->builder;
 	LLVMTypeRef type = bld_base->base.elem_type;
 	LLVMValueRef coords[4];
 	LLVMValueRef mad_args[3];
-	unsigned i, cnt;
+	LLVMValueRef idx;
+	unsigned i;
 
 	LLVMValueRef v = build_intrinsic(builder, "llvm.AMDGPU.cube",
 			LLVMVectorType(type, 4),
-			arg, 1, LLVMReadNoneAttribute);
-
-	/* save src.w for shadow cube */
-	cnt = shadowcube ? 3 : 4;
+			&emit_data->args[coord_arg], 1, LLVMReadNoneAttribute);
 
-	for (i = 0; i < cnt; ++i) {
-		LLVMValueRef idx = lp_build_const_int32(gallivm, i);
+	for (i = 0; i < 4; ++i) {
+		idx = lp_build_const_int32(gallivm, i);
 		coords[i] = LLVMBuildExtractElement(builder, v, idx, "");
 	}
 
@@ -554,13 +554,42 @@ void radeon_llvm_emit_prepare_cube_coords(
 	coords[1] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
 			mad_args[0], mad_args[1], mad_args[2]);
 
-	/* apply yxwy swizzle to cooords */
+	/* apply xyz = yxw swizzle to cooords */
 	coords[2] = coords[3];
 	coords[3] = coords[1];
 	coords[1] = coords[0];
 	coords[0] = coords[3];
 
-	*arg = lp_build_gather_values(bld_base->base.gallivm, coords, 4);
+	/* all cases except simple cube map sampling require special handling
+	 * for coord vector */
+	if (target != TGSI_TEXTURE_CUBE ||
+		opcode != TGSI_OPCODE_TEX) {
+
+		/* load source coord.w component - array_index for cube arrays or
+		 * compare value for SHADOWCUBE */
+		idx = lp_build_const_int32(gallivm, 3);
+		coords[3] = LLVMBuildExtractElement(builder,
+				emit_data->args[coord_arg], idx, "");
+
+		/* for cube arrays coord.z = coord.w(array_index) * 8 + face */
+		if (target == TGSI_TEXTURE_CUBE_ARRAY ||
+			target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
+
+			coords[2] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
+					coords[3], lp_build_const_float(gallivm, 8.0), coords[2]);
+		}
+
+		/* for instructions that need additional src (compare/lod/bias),
+		 * put it in coord.w */
+		if (opcode == TGSI_OPCODE_TEX2 ||
+			opcode == TGSI_OPCODE_TXB2 ||
+			opcode == TGSI_OPCODE_TXL2) {
+			coords[3] = emit_data->args[coord_arg + 1];
+		}
+	}
+
+	emit_data->args[coord_arg] =
+			lp_build_gather_values(bld_base->base.gallivm, coords, 4);
 }
 
 static void txd_fetch_args(
@@ -607,11 +636,12 @@ static void txp_fetch_args(
 	emit_data->arg_count = 1;
 
 	if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
-	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) &&
+	     inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
+	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
+	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
 	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
 	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
-		radeon_llvm_emit_prepare_cube_coords(bld_base, &emit_data->args[0],
-                                                     inst->Texture.Texture);
+		radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, 0);
 	}
 }
 
@@ -640,12 +670,24 @@ static void tex_fetch_args(
 						coords, 4);
 	emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
 
+	if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
+		inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
+		inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
+		/* These instructions have additional operand that should be packed
+		 * into the cube coord vector by radeon_llvm_emit_prepare_cube_coords.
+		 * That operand should be passed as a float value in the args array
+		 * right after the coord vector. After packing it's not used anymore,
+		 * that's why arg_count is not increased */
+		emit_data->args[1] = lp_build_emit_fetch(bld_base, inst, 1, 0);
+	}
+
 	if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
-	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) &&
+	     inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
+	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
+	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
 	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
 	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
-		radeon_llvm_emit_prepare_cube_coords(bld_base, &emit_data->args[0],
-                                                     inst->Texture.Texture);
+		radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, 0);
 	}
 }
 
@@ -1143,14 +1185,20 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
 	bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
 	bld_base->op_actions[TGSI_OPCODE_TEX].fetch_args = tex_fetch_args;
 	bld_base->op_actions[TGSI_OPCODE_TEX].intr_name = "llvm.AMDGPU.tex";
+	bld_base->op_actions[TGSI_OPCODE_TEX2].fetch_args = tex_fetch_args;
+	bld_base->op_actions[TGSI_OPCODE_TEX2].intr_name = "llvm.AMDGPU.tex";
 	bld_base->op_actions[TGSI_OPCODE_TXB].fetch_args = tex_fetch_args;
 	bld_base->op_actions[TGSI_OPCODE_TXB].intr_name = "llvm.AMDGPU.txb";
+	bld_base->op_actions[TGSI_OPCODE_TXB2].fetch_args = tex_fetch_args;
+	bld_base->op_actions[TGSI_OPCODE_TXB2].intr_name = "llvm.AMDGPU.txb";
 	bld_base->op_actions[TGSI_OPCODE_TXD].fetch_args = txd_fetch_args;
 	bld_base->op_actions[TGSI_OPCODE_TXD].intr_name = "llvm.AMDGPU.txd";
 	bld_base->op_actions[TGSI_OPCODE_TXF].fetch_args = txf_fetch_args;
 	bld_base->op_actions[TGSI_OPCODE_TXF].intr_name = "llvm.AMDGPU.txf";
 	bld_base->op_actions[TGSI_OPCODE_TXL].fetch_args = tex_fetch_args;
 	bld_base->op_actions[TGSI_OPCODE_TXL].intr_name = "llvm.AMDGPU.txl";
+	bld_base->op_actions[TGSI_OPCODE_TXL2].fetch_args = tex_fetch_args;
+	bld_base->op_actions[TGSI_OPCODE_TXL2].intr_name = "llvm.AMDGPU.txl";
 	bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args;
 	bld_base->op_actions[TGSI_OPCODE_TXP].intr_name = "llvm.AMDGPU.tex";
 	bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = tex_fetch_args;
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c
index a452d4bbe9b..5dab46e4d6d 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -813,11 +813,21 @@ static void tex_fetch_args(
 		emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
 							 0, LP_CHAN_ALL);
 
+	if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
+		inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
+		inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
+		/* These instructions have additional operand that should be packed
+		 * into the cube coord vector by radeon_llvm_emit_prepare_cube_coords.
+		 * That operand should be passed as a float value in the args array
+		 * right after the coord vector. After packing it's not used anymore,
+		 * that's why arg_count is not increased */
+		emit_data->args[2] = lp_build_emit_fetch(bld_base, inst, 1, 0);
+	}
+
 	if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
 	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) &&
 	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ) {
-		radeon_llvm_emit_prepare_cube_coords(bld_base, &emit_data->args[1],
-						     inst->Texture.Texture);
+		radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, 1);
 	}
 
 	/* Resource */
-- 
2.30.2