From: Marek Olšák
Date: Fri, 6 Jun 2014 01:00:18 +0000 (+0200)
Subject: radeonsi: implement ARB_texture_gather and Gather functions from GLSL 4.00
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=c7b5a5c4a31f855dcbef0000fa307a5b37448f83;p=mesa.git
radeonsi: implement ARB_texture_gather and Gather functions from GLSL 4.00
All ARB_texture_gather and gather-related ARB_gpu_shader5 piglit tests pass.
reviewed by Michel Dänzer
---
diff --git a/docs/GL3.txt b/docs/GL3.txt
index a0735625d52..f2fd11469d5 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -119,7 +119,7 @@ GL 4.0:
GL_ARB_tessellation_shader not started
GL_ARB_texture_buffer_object_rgb32 DONE (i965, nvc0, r600, radeonsi, softpipe)
GL_ARB_texture_cube_map_array DONE (i965, nv50, nvc0, r600, radeonsi, softpipe)
- GL_ARB_texture_gather DONE (i965, nv50, nvc0)
+ GL_ARB_texture_gather DONE (i965, nv50, nvc0, radeonsi)
GL_ARB_transform_feedback2 DONE (i965, nv50, nvc0, r600, radeonsi)
GL_ARB_transform_feedback3 DONE (i965, nv50, nvc0, r600, radeonsi)
diff --git a/docs/relnotes/10.3.html b/docs/relnotes/10.3.html
index c9daea54562..825eb84b64c 100644
--- a/docs/relnotes/10.3.html
+++ b/docs/relnotes/10.3.html
@@ -44,11 +44,12 @@ Note: some of the new features are only available with certain drivers.
+- GL_ARB_compressed_texture_pixel_storage on all drivers
+- GL_ARB_explicit_uniform_location (all drivers that support GLSL)
- GL_ARB_sample_shading on radeonsi
- GL_ARB_stencil_texturing on nv50, nvc0, r600, and radeonsi
- GL_ARB_texture_cube_map_array on radeonsi
-- GL_ARB_compressed_texture_pixel_storage on all drivers
-- GL_ARB_explicit_uniform_location (all drivers that support GLSL)
+- GL_ARB_texture_gather on radeonsi
- GL_ARB_viewport_array on nvc0
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 4b96f20b511..a3e18468d38 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -234,6 +234,11 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
return MIN2(sscreen->b.info.vram_size, 0xFFFFFFFF);
+ case PIPE_CAP_TEXTURE_GATHER_SM5:
+ return HAVE_LLVM >= 0x0305;
+ case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+ return HAVE_LLVM >= 0x0305 ? 4 : 0;
+
/* Unsupported features. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
@@ -242,8 +247,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
case PIPE_CAP_USER_VERTEX_BUFFERS:
- case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
- case PIPE_CAP_TEXTURE_GATHER_SM5:
case PIPE_CAP_TGSI_TEXCOORD:
case PIPE_CAP_FAKE_SW_MSAA:
case PIPE_CAP_TEXTURE_QUERY_LOD:
@@ -294,11 +297,12 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
case PIPE_CAP_MIN_TEXEL_OFFSET:
- return -8;
+ return -32;
case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
case PIPE_CAP_MAX_TEXEL_OFFSET:
- return 7;
+ return 31;
+
case PIPE_CAP_ENDIANNESS:
return PIPE_ENDIAN_LITTLE;
}
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 58a2725efc6..a5f0c9e7924 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1538,6 +1538,17 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data);
+static bool tgsi_is_shadow_sampler(unsigned target)
+{
+ return target == TGSI_TEXTURE_SHADOW1D ||
+ target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
+ target == TGSI_TEXTURE_SHADOW2D ||
+ target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
+ target == TGSI_TEXTURE_SHADOWCUBE ||
+ target == TGSI_TEXTURE_SHADOWCUBE_ARRAY ||
+ target == TGSI_TEXTURE_SHADOWRECT;
+}
+
static void tex_fetch_args(
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
@@ -1592,6 +1603,31 @@ static void tex_fetch_args(
if (opcode == TGSI_OPCODE_TXP)
coords[3] = bld_base->base.one;
+ /* Pack offsets. */
+ if (opcode == TGSI_OPCODE_TG4 &&
+ inst->Texture.NumOffsets) {
+ /* The offsets are six-bit signed integers packed like this:
+ * X=[5:0], Y=[13:8], and Z=[21:16].
+ */
+ LLVMValueRef offset[3], pack;
+
+ assert(inst->Texture.NumOffsets == 1);
+
+ for (chan = 0; chan < 3; chan++) {
+ offset[chan] = lp_build_emit_fetch_texoffset(bld_base,
+ emit_data->inst, 0, chan);
+ offset[chan] = LLVMBuildAnd(gallivm->builder, offset[chan],
+ lp_build_const_int32(gallivm, 0x3f), "");
+ if (chan)
+ offset[chan] = LLVMBuildShl(gallivm->builder, offset[chan],
+ lp_build_const_int32(gallivm, chan*8), "");
+ }
+
+ pack = LLVMBuildOr(gallivm->builder, offset[0], offset[1], "");
+ pack = LLVMBuildOr(gallivm->builder, pack, offset[2], "");
+ address[count++] = pack;
+ }
+
/* Pack LOD bias value */
if (opcode == TGSI_OPCODE_TXB)
address[count++] = coords[3];
@@ -1779,20 +1815,66 @@ static void tex_fetch_args(
}
}
+ emit_data->args[2] = lp_build_const_int32(gallivm, target);
+ emit_data->arg_count = 3;
+
emit_data->dst_type = LLVMVectorType(
LLVMInt32TypeInContext(bld_base->base.gallivm->context),
4);
+ } else if (opcode == TGSI_OPCODE_TG4) {
+ unsigned is_array = target == TGSI_TEXTURE_1D_ARRAY ||
+ target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
+ target == TGSI_TEXTURE_2D_ARRAY ||
+ target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
+ target == TGSI_TEXTURE_CUBE_ARRAY ||
+ target == TGSI_TEXTURE_SHADOWCUBE_ARRAY;
+ unsigned is_rect = target == TGSI_TEXTURE_RECT;
+ unsigned gather_comp = 0;
+
+ /* DMASK was repurposed for GATHER4. 4 components are always
+ * returned and DMASK works like a swizzle - it selects
+ * the component to fetch. The only valid DMASK values are
+ * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
+ * (red,red,red,red) etc.) The ISA document doesn't mention
+ * this.
+ */
+
+ /* Get the component index from src1.x for Gather4. */
+ if (!tgsi_is_shadow_sampler(target)) {
+ LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates;
+ LLVMValueRef comp_imm;
+ struct tgsi_src_register src1 = inst->Src[1].Register;
+
+ assert(src1.File == TGSI_FILE_IMMEDIATE);
+
+ comp_imm = imms[src1.Index][src1.SwizzleX];
+ gather_comp = LLVMConstIntGetZExtValue(comp_imm);
+ gather_comp = CLAMP(gather_comp, 0, 3);
+ }
- emit_data->arg_count = 3;
- } else {
- /* Sampler */
emit_data->args[2] = si_shader_ctx->samplers[sampler_index];
+ emit_data->args[3] = lp_build_const_int32(gallivm, 1 << gather_comp); /* dmask */
+ emit_data->args[4] = lp_build_const_int32(gallivm, is_rect); /* unorm */
+ emit_data->args[5] = lp_build_const_int32(gallivm, 0); /* r128 */
+ emit_data->args[6] = lp_build_const_int32(gallivm, is_array); /* da */
+ emit_data->args[7] = lp_build_const_int32(gallivm, 0); /* glc */
+ emit_data->args[8] = lp_build_const_int32(gallivm, 0); /* slc */
+ emit_data->args[9] = lp_build_const_int32(gallivm, 0); /* tfe */
+ emit_data->args[10] = lp_build_const_int32(gallivm, 0); /* lwe */
+
+ emit_data->arg_count = 11;
emit_data->dst_type = LLVMVectorType(
LLVMFloatTypeInContext(bld_base->base.gallivm->context),
4);
-
+ } else {
+ emit_data->args[2] = si_shader_ctx->samplers[sampler_index];
+ emit_data->args[3] = lp_build_const_int32(gallivm, target);
emit_data->arg_count = 4;
+
+ emit_data->dst_type = LLVMVectorType(
+ LLVMFloatTypeInContext(gallivm->context),
+ 4);
}
/* The fetch opcode has been converted to a 2D array fetch.
@@ -1802,10 +1884,6 @@ static void tex_fetch_args(
else if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
target = TGSI_TEXTURE_SHADOW2D_ARRAY;
- /* Dimensions */
- emit_data->args[emit_data->arg_count - 1] =
- lp_build_const_int32(bld_base->base.gallivm, target);
-
/* Pad to power of two vector */
while (count < util_next_power_of_two(count))
address[count++] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
@@ -1838,6 +1916,28 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
}
+static void build_new_tex_intrinsic(const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_context * base = &bld_base->base;
+ char intr_name[127];
+ unsigned target = emit_data->inst->Texture.Texture;
+ bool is_shadow = tgsi_is_shadow_sampler(target);
+
+ /* Add the type and suffixes .c, .o if needed. */
+ sprintf(intr_name, "%s%s%s.v%ui32",
+ action->intr_name,
+ is_shadow ? ".c" : "",
+ emit_data->inst->Texture.NumOffsets ? ".o" : "",
+ LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0])));
+
+ emit_data->output[emit_data->chan] = build_intrinsic(
+ base->gallivm->builder, intr_name, emit_data->dst_type,
+ emit_data->args, emit_data->arg_count,
+ LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
+}
+
static void txq_fetch_args(
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
@@ -2132,6 +2232,12 @@ static const struct lp_build_tgsi_action txq_action = {
.intr_name = "llvm.SI.resinfo"
};
+static const struct lp_build_tgsi_action new_tex_action = {
+ .fetch_args = tex_fetch_args,
+ .emit = build_new_tex_intrinsic,
+ .intr_name = "llvm.SI.gather4"
+};
+
static void create_meta_data(struct si_shader_context *si_shader_ctx)
{
struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
@@ -2596,6 +2702,7 @@ int si_pipe_shader_create(
bld_base->op_actions[TGSI_OPCODE_TXL2] = txl_action;
bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action;
bld_base->op_actions[TGSI_OPCODE_TXQ] = txq_action;
+ bld_base->op_actions[TGSI_OPCODE_TG4] = new_tex_action;
#if HAVE_LLVM >= 0x0304
bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;