From bb9d59aed5b01133f4c8e9f131a83b45fce91fdc Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 17 Jul 2015 05:35:30 +0100 Subject: [PATCH] radeonsi: add fine derivate control (v2.1) MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This adds support for fine derivatives and enables ARB_derivative_control on radeonsi. (just fell out of my working out interpolation) v2: cleanup some bits, write a comment v2.1: take Michel's comment from the mailing list Reviewed-by: Marek Olšák Signed-off-by: Dave Airlie --- docs/GL3.txt | 2 +- docs/relnotes/10.7.0.html | 1 + src/gallium/drivers/radeonsi/si_pipe.c | 2 +- src/gallium/drivers/radeonsi/si_shader.c | 52 +++++++++++++++++++++--- 4 files changed, 50 insertions(+), 7 deletions(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index e3fa1a1ec1b..15bb57f441a 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -191,7 +191,7 @@ GL 4.5, GLSL 4.50: GL_ARB_clip_control DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_conditional_render_inverted DONE (i965, nv50, nvc0, llvmpipe, softpipe) GL_ARB_cull_distance in progress (Tobias) - GL_ARB_derivative_control DONE (i965, nv50, nvc0, r600) + GL_ARB_derivative_control DONE (i965, nv50, nvc0, r600, radeonsi) GL_ARB_direct_state_access DONE (all drivers) GL_ARB_get_texture_sub_image DONE (all drivers) GL_ARB_shader_texture_image_samples not started diff --git a/docs/relnotes/10.7.0.html b/docs/relnotes/10.7.0.html index 26615a83814..afef5256b75 100644 --- a/docs/relnotes/10.7.0.html +++ b/docs/relnotes/10.7.0.html @@ -45,6 +45,7 @@ Note: some of the new features are only available with certain drivers.
  • GL_AMD_vertex_shader_viewport_index on radeonsi
  • +
  • GL_ARB_derivative_control on radeonsi
  • GL_ARB_fragment_layer_viewport on radeonsi
  • GL_ARB_framebuffer_no_attachments on i965
  • GL_ARB_get_texture_sub_image for all drivers
  • diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index c2985b8d224..ebe1f5a68f1 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -249,6 +249,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: case PIPE_CAP_TGSI_TEXCOORD: + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: return 1; case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: @@ -289,7 +290,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_USER_VERTEX_BUFFERS: case PIPE_CAP_FAKE_SW_MSAA: case PIPE_CAP_TEXTURE_GATHER_OFFSETS: - case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: case PIPE_CAP_SAMPLER_VIEW_TARGET: case PIPE_CAP_VERTEXID_NOBASE: diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index bc0f2171a52..27b3c72d1c9 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2854,6 +2854,35 @@ static void build_txq_intrinsic(const struct lp_build_tgsi_action * action, } } +/* + * SI implements derivatives using the local data store (LDS) + * All writes to the LDS happen in all executing threads at + * the same time. TID is the Thread ID for the current + * thread and is a value between 0 and 63, representing + * the thread's position in the wavefront. + * + * For the pixel shader threads are grouped into quads of four pixels. + * The TIDs of the pixels of a quad are: + * + * +------+------+ + * |4n + 0|4n + 1| + * +------+------+ + * |4n + 2|4n + 3| + * +------+------+ + * + * So, masking the TID with 0xfffffffc yields the TID of the top left pixel + * of the quad, masking with 0xfffffffd yields the TID of the top pixel of + * the current pixel's column, and masking with 0xfffffffe yields the TID + * of the left pixel of the current pixel's row. + * + * Adding 1 yields the TID of the pixel to the right of the left pixel, and + * adding 2 yields the TID of the pixel below the top pixel. + */ +/* masks for thread ID. */ +#define TID_MASK_TOP_LEFT 0xfffffffc +#define TID_MASK_TOP 0xfffffffd +#define TID_MASK_LEFT 0xfffffffe + static void si_llvm_emit_ddxy( const struct lp_build_tgsi_action * action, struct lp_build_tgsi_context * bld_base, @@ -2870,6 +2899,8 @@ static void si_llvm_emit_ddxy( LLVMTypeRef i32; unsigned swizzle[4]; unsigned c; + int idx; + unsigned mask; i32 = LLVMInt32TypeInContext(gallivm->context); @@ -2879,15 +2910,22 @@ static void si_llvm_emit_ddxy( store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds, indices, 2, ""); + if (opcode == TGSI_OPCODE_DDX_FINE) + mask = TID_MASK_LEFT; + else if (opcode == TGSI_OPCODE_DDY_FINE) + mask = TID_MASK_TOP; + else + mask = TID_MASK_TOP_LEFT; + indices[1] = LLVMBuildAnd(gallivm->builder, indices[1], - lp_build_const_int32(gallivm, 0xfffffffc), ""); + lp_build_const_int32(gallivm, mask), ""); load_ptr0 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds, indices, 2, ""); + /* for DDX we want to next X pixel, DDY next Y pixel. */ + idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 : 2; indices[1] = LLVMBuildAdd(gallivm->builder, indices[1], - lp_build_const_int32(gallivm, - opcode == TGSI_OPCODE_DDX ? 1 : 2), - ""); + lp_build_const_int32(gallivm, idx), ""); load_ptr1 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds, indices, 2, ""); @@ -3229,7 +3267,9 @@ static void create_function(struct si_shader_context *si_shader_ctx) if (bld_base->info && (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 || - bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0)) + bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 || + bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 || + bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0)) si_shader_ctx->lds = LLVMAddGlobalInAddressSpace(gallivm->module, LLVMArrayType(i32, 64), @@ -3722,6 +3762,8 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy; bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy; + bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy; + bld_base->op_actions[TGSI_OPCODE_DDY_FINE].emit = si_llvm_emit_ddxy; bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex; bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive; -- 2.30.2