From: Erik Faye-Lund Date: Wed, 5 Jun 2019 13:39:41 +0000 (+0200) Subject: mesa/st: add tgsi-lowering code for depth-clamp X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=b048d8bf8f056759d1845a799d4ba2ac84bce30f;p=mesa.git mesa/st: add tgsi-lowering code for depth-clamp This is a TGSI pass that lowers depth-clamping into shader-operations, by replacing the depth-value with 0 (a z-coordinate of zero will always pass the OpenGL depth test conditions), and using a dedicated varying to interpolate the real depth-value instead. Finally we replace the depth-output in the fragment shader. v1 implemented by Erik Faye-Lund v2: Add support for handling depth clip mode, and refactor code v3: - Rename *_vs functions to *_last_vertex_stage (Erik) - Use 0.0 depth to avoid clipping (Erik) v4: Fix inversion of bool value for clip control property Signed-off-by: Gert Wollny Reviewed-by: Reviewed-by: Marek Olšák --- diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources index b0b43a78ce9..acb0c2b53cf 100644 --- a/src/mesa/Makefile.sources +++ b/src/mesa/Makefile.sources @@ -556,6 +556,8 @@ STATETRACKER_FILES = \ state_tracker/st_shader_cache.h \ state_tracker/st_texture.c \ state_tracker/st_texture.h \ + state_tracker/st_tgsi_lower_depth_clamp.c \ + state_tracker/st_tgsi_lower_depth_clamp.h \ state_tracker/st_tgsi_lower_yuv.c \ state_tracker/st_tgsi_lower_yuv.h \ state_tracker/st_util.h \ diff --git a/src/mesa/meson.build b/src/mesa/meson.build index 4a89358a267..563bdd7c103 100644 --- a/src/mesa/meson.build +++ b/src/mesa/meson.build @@ -600,6 +600,8 @@ files_libmesa_gallium = files( 'state_tracker/st_shader_cache.h', 'state_tracker/st_texture.c', 'state_tracker/st_texture.h', + 'state_tracker/st_tgsi_lower_depth_clamp.c', + 'state_tracker/st_tgsi_lower_depth_clamp.h', 'state_tracker/st_tgsi_lower_yuv.c', 'state_tracker/st_tgsi_lower_yuv.h', 'state_tracker/st_util.h', diff --git a/src/mesa/state_tracker/st_tgsi_lower_depth_clamp.c b/src/mesa/state_tracker/st_tgsi_lower_depth_clamp.c new file mode 100644 index 00000000000..b89ee986d33 --- /dev/null +++ b/src/mesa/state_tracker/st_tgsi_lower_depth_clamp.c @@ -0,0 +1,408 @@ +/* + * Copyright © 2018 Collabora Ltd + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "st_tgsi_lower_depth_clamp.h" +#include "tgsi/tgsi_transform.h" +#include "tgsi/tgsi_scan.h" + +struct tgsi_depth_clamp_transform { + struct tgsi_transform_context base; + + struct tgsi_shader_info info; + + int depth_range_const; + int next_generic; + int imm; + int depth_var; + int pos_input; + int pos_output; + int pos_input_temp; + int pos_output_temp; + int depth_range_corrected; + bool depth_clip_minus_one_to_one; +}; + +static inline struct tgsi_depth_clamp_transform * +tgsi_depth_clamp_transform(struct tgsi_transform_context *tctx) +{ + return (struct tgsi_depth_clamp_transform *)tctx; +} + +static void +transform_decl(struct tgsi_transform_context *tctx, + struct tgsi_full_declaration *decl) +{ + struct tgsi_depth_clamp_transform *ctx = tgsi_depth_clamp_transform(tctx); + + /* find the next generic index usable for our inserted varying */ + if (ctx->info.processor == PIPE_SHADER_FRAGMENT) { + if (decl->Declaration.File == TGSI_FILE_INPUT && + decl->Semantic.Name == TGSI_SEMANTIC_GENERIC) + ctx->next_generic = MAX2(ctx->next_generic, decl->Semantic.Index + 1); + } else { + if (decl->Declaration.File == TGSI_FILE_OUTPUT && + decl->Semantic.Name == TGSI_SEMANTIC_GENERIC) + ctx->next_generic = MAX2(ctx->next_generic, decl->Semantic.Index + 1); + } + + if (decl->Declaration.File == TGSI_FILE_OUTPUT && + decl->Semantic.Name == TGSI_SEMANTIC_POSITION) { + assert(decl->Semantic.Index == 0); + ctx->pos_output = decl->Range.First; + } else if (decl->Declaration.File == TGSI_FILE_INPUT && + decl->Semantic.Name == TGSI_SEMANTIC_POSITION) { + assert(decl->Semantic.Index == 0); + if (ctx->info.processor == PIPE_SHADER_FRAGMENT) + ctx->pos_input = decl->Range.First; + } + + tctx->emit_declaration(tctx, decl); +} + +static void +prolog_common(struct tgsi_depth_clamp_transform *ctx) +{ + assert(ctx->depth_range_const >= 0); + if (ctx->info.const_file_max[0] < ctx->depth_range_const) + tgsi_transform_const_decl(&ctx->base, ctx->depth_range_const, + ctx->depth_range_const); + + /* declare a temp for the position-output */ + ctx->pos_output_temp = ctx->info.file_max[TGSI_FILE_TEMPORARY] + 1; + tgsi_transform_temp_decl(&ctx->base, ctx->pos_output_temp); +} + +static void +prolog_last_vertex_stage(struct tgsi_transform_context *tctx) +{ + struct tgsi_depth_clamp_transform *ctx = tgsi_depth_clamp_transform(tctx); + + prolog_common(ctx); + + ctx->imm = ctx->info.immediate_count; + tgsi_transform_immediate_decl(tctx, 0.5, 0.0, 0.0, 0.0); + + /* declare the output */ + ctx->depth_var = ctx->info.num_outputs; + tgsi_transform_output_decl(tctx, ctx->depth_var, + TGSI_SEMANTIC_GENERIC, + ctx->next_generic, + TGSI_INTERPOLATE_LINEAR); +} + +static void +epilog_last_vertex_stage(struct tgsi_transform_context *tctx) +{ + struct tgsi_depth_clamp_transform *ctx = tgsi_depth_clamp_transform(tctx); + + int mad_dst_file = TGSI_FILE_TEMPORARY; + int mad_dst_index = ctx->pos_output_temp; + + if (!ctx->depth_clip_minus_one_to_one) { + mad_dst_file = TGSI_FILE_OUTPUT; + mad_dst_index = ctx->depth_var; + } + + /* move from temp-register to output */ + tgsi_transform_op1_inst(tctx, TGSI_OPCODE_MOV, + TGSI_FILE_OUTPUT, ctx->pos_output, + TGSI_WRITEMASK_XYZW, + TGSI_FILE_TEMPORARY, ctx->pos_output_temp); + + /* Set gl_position.z to 0.0 to avoid clipping */ + tgsi_transform_op1_swz_inst(tctx, TGSI_OPCODE_MOV, + TGSI_FILE_OUTPUT, ctx->pos_output, + TGSI_WRITEMASK_Z, + TGSI_FILE_IMMEDIATE, ctx->imm, + TGSI_SWIZZLE_Y); + + /* Evaluate and pass true depth value in depthRange terms */ + /* z = gl_Position.z / gl_Position.w */ + + struct tgsi_full_instruction inst; + + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_DIV; + inst.Instruction.NumDstRegs = 1; + inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + inst.Dst[0].Register.Index = ctx->pos_output_temp; + inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X; + inst.Instruction.NumSrcRegs = 2; + tgsi_transform_src_reg_xyzw(&inst.Src[0], TGSI_FILE_TEMPORARY, ctx->pos_output_temp); + tgsi_transform_src_reg_xyzw(&inst.Src[1], TGSI_FILE_TEMPORARY, ctx->pos_output_temp); + inst.Src[0].Register.SwizzleX = + inst.Src[0].Register.SwizzleY = + inst.Src[0].Register.SwizzleZ = + inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Z; + + inst.Src[1].Register.SwizzleX = + inst.Src[1].Register.SwizzleY = + inst.Src[1].Register.SwizzleZ = + inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W; + + tctx->emit_instruction(tctx, &inst); + + + /* OpenGL Core Profile 4.5 - 13.6.1 + * The vertex's windows z coordinate zw is given by zw = s * z + b. + * + * * With clip control depth mode ZERO_TO_ONE + * s = f - n, b = n, and hence + * + * zw_0_1 = z * gl_DepthRange.diff + gl_DepthRange.near + */ + tgsi_transform_op3_swz_inst(tctx, TGSI_OPCODE_MAD, + mad_dst_file, mad_dst_index, + TGSI_WRITEMASK_X, + TGSI_FILE_TEMPORARY, ctx->pos_output_temp, + TGSI_SWIZZLE_X, + false, + TGSI_FILE_CONSTANT, ctx->depth_range_const, + TGSI_SWIZZLE_Z, + TGSI_FILE_CONSTANT, ctx->depth_range_const, + TGSI_SWIZZLE_X); + + /* If clip control depth mode is NEGATIVE_ONE_TO_ONE, then + * s = 0.5 * (f - n), b = 0.5 * (n + f), and hence + * + * zw_m1_1 = 0.5 * (zw_01 + gl_DepthRange.far) + */ + if (ctx->depth_clip_minus_one_to_one) { + /* z += gl_DepthRange.far */ + tgsi_transform_op2_swz_inst(tctx, TGSI_OPCODE_ADD, + TGSI_FILE_TEMPORARY, ctx->pos_output_temp, + TGSI_WRITEMASK_X, + TGSI_FILE_TEMPORARY, ctx->pos_output_temp, + TGSI_SWIZZLE_X, + TGSI_FILE_CONSTANT, ctx->depth_range_const, + TGSI_SWIZZLE_Y, false); + /* z *= 0.5 */ + tgsi_transform_op2_swz_inst(tctx, TGSI_OPCODE_MUL, + TGSI_FILE_OUTPUT, ctx->depth_var, + TGSI_WRITEMASK_X, + TGSI_FILE_TEMPORARY, ctx->pos_output_temp, + TGSI_SWIZZLE_X, + TGSI_FILE_IMMEDIATE, ctx->imm, + TGSI_SWIZZLE_X, false); + } +} + + +static void +prolog_fs(struct tgsi_transform_context *tctx) +{ + struct tgsi_depth_clamp_transform *ctx = tgsi_depth_clamp_transform(tctx); + + prolog_common(ctx); + + ctx->depth_range_corrected = ctx->info.file_max[TGSI_FILE_TEMPORARY] + 2; + tgsi_transform_temp_decl(tctx, ctx->depth_range_corrected); + + /* declare the input */ + ctx->depth_var = ctx->info.num_inputs; + tgsi_transform_input_decl(tctx, ctx->depth_var, + TGSI_SEMANTIC_GENERIC, + ctx->next_generic, + TGSI_INTERPOLATE_LINEAR); + + /* declare the output */ + if (ctx->pos_output < 0) { + ctx->pos_output = ctx->info.num_outputs; + tgsi_transform_output_decl(tctx, ctx->pos_output, + TGSI_SEMANTIC_POSITION, + 0, + TGSI_INTERPOLATE_LINEAR); + } + + if (ctx->info.reads_z) { + ctx->pos_input_temp = ctx->info.file_max[TGSI_FILE_TEMPORARY] + 3; + tgsi_transform_temp_decl(tctx, ctx->pos_input_temp); + + assert(ctx->pos_input_temp >= 0); + /* copy normal position */ + tgsi_transform_op1_inst(tctx, TGSI_OPCODE_MOV, + TGSI_FILE_TEMPORARY, ctx->pos_input_temp, + TGSI_WRITEMASK_XYZW, + TGSI_FILE_INPUT, ctx->pos_input); + /* replace z-component with varying */ + tgsi_transform_op1_swz_inst(tctx, TGSI_OPCODE_MOV, + TGSI_FILE_TEMPORARY, ctx->pos_input_temp, + TGSI_WRITEMASK_Z, + TGSI_FILE_INPUT, ctx->depth_var, + TGSI_SWIZZLE_X); + } +} + +static void +epilog_fs(struct tgsi_transform_context *tctx) +{ + struct tgsi_depth_clamp_transform *ctx = tgsi_depth_clamp_transform(tctx); + + unsigned src0_file = TGSI_FILE_INPUT; + unsigned src0_index = ctx->depth_var; + unsigned src0_swizzle = TGSI_SWIZZLE_X; + + if (ctx->info.writes_z) { + src0_file = TGSI_FILE_TEMPORARY; + src0_index = ctx->pos_output_temp; + src0_swizzle = TGSI_SWIZZLE_Z; + } + + /* it is possible to have gl_DepthRange.near > gl_DepthRange.far, so first + * we have to sort the two */ + tgsi_transform_op2_swz_inst(tctx, TGSI_OPCODE_MIN, + TGSI_FILE_TEMPORARY, ctx->depth_range_corrected, + TGSI_WRITEMASK_X, + TGSI_FILE_CONSTANT, ctx->depth_range_const, + TGSI_SWIZZLE_X, + TGSI_FILE_CONSTANT, ctx->depth_range_const, + TGSI_SWIZZLE_Y, + false); + + tgsi_transform_op2_swz_inst(tctx, TGSI_OPCODE_MAX, + TGSI_FILE_TEMPORARY, ctx->depth_range_corrected, + TGSI_WRITEMASK_Y, + TGSI_FILE_CONSTANT, ctx->depth_range_const, + TGSI_SWIZZLE_X, + TGSI_FILE_CONSTANT, ctx->depth_range_const, + TGSI_SWIZZLE_Y, + false); + + /* gl_FragDepth = max(gl_FragDepth, min(gl_DepthRange.near, gl_DepthRange.far)) */ + tgsi_transform_op2_swz_inst(tctx, TGSI_OPCODE_MAX, + TGSI_FILE_TEMPORARY, ctx->pos_output_temp, + TGSI_WRITEMASK_X, + src0_file, src0_index, src0_swizzle, + TGSI_FILE_TEMPORARY, ctx->depth_range_corrected, + TGSI_SWIZZLE_X, false); + + /* gl_FragDepth = min(gl_FragDepth, max(gl_DepthRange.near, gl_DepthRange.far)) */ + tgsi_transform_op2_swz_inst(tctx, TGSI_OPCODE_MIN, + TGSI_FILE_OUTPUT, ctx->pos_output, + TGSI_WRITEMASK_Z, + TGSI_FILE_TEMPORARY, ctx->pos_output_temp, + TGSI_SWIZZLE_X, + TGSI_FILE_TEMPORARY, ctx->depth_range_corrected, + TGSI_SWIZZLE_Y, false); +} + +static void +transform_instr(struct tgsi_transform_context *tctx, + struct tgsi_full_instruction *inst) +{ + struct tgsi_depth_clamp_transform *ctx = tgsi_depth_clamp_transform(tctx); + + if (ctx->pos_output >= 0) { + /* replace writes to gl_Position / gl_FragDepth with a temp-variable + */ + for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { + if (inst->Dst[i].Register.File == TGSI_FILE_OUTPUT && + inst->Dst[i].Register.Index == ctx->pos_output) { + inst->Dst[i].Register.File = TGSI_FILE_TEMPORARY; + inst->Dst[i].Register.Index = ctx->pos_output_temp; + } + } + } + + if (ctx->info.reads_z) { + /* replace reads from gl_FragCoord with temp-variable + */ + assert(ctx->pos_input_temp >= 0); + for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { + if (inst->Src[i].Register.File == TGSI_FILE_INPUT && + inst->Src[i].Register.Index == ctx->pos_input) { + inst->Src[i].Register.File = TGSI_FILE_TEMPORARY; + inst->Src[i].Register.Index = ctx->pos_input_temp; + } + } + } + + /* In a GS each we have to add the z-write opilog for each emit + */ + if (ctx->info.processor == PIPE_SHADER_GEOMETRY && + inst->Instruction.Opcode == TGSI_OPCODE_EMIT) + epilog_last_vertex_stage(tctx); + + tctx->emit_instruction(tctx, inst); +} + +const struct tgsi_token * +st_tgsi_lower_depth_clamp(const struct tgsi_token *tokens, + int depth_range_const, + bool clip_negative_one_to_one) +{ + struct tgsi_depth_clamp_transform ctx = {}; + struct tgsi_token *newtoks; + int newlen; + + tgsi_scan_shader(tokens, &ctx.info); + + /* we only want to do this for the fragment shader, and the shader-stage + * right before it, but in the first pass there might be no "next" shader + */ + if (ctx.info.processor != PIPE_SHADER_FRAGMENT && + ctx.info.processor != PIPE_SHADER_GEOMETRY && + ctx.info.processor != PIPE_SHADER_VERTEX && + ctx.info.processor != PIPE_SHADER_TESS_EVAL && + (ctx.info.properties[TGSI_PROPERTY_NEXT_SHADER] > PIPE_SHADER_VERTEX && + (ctx.info.properties[TGSI_PROPERTY_NEXT_SHADER] != PIPE_SHADER_FRAGMENT))) { + return tokens; + } + + ctx.base.transform_declaration = transform_decl; + ctx.base.transform_instruction = transform_instr; + + if (ctx.info.processor == PIPE_SHADER_FRAGMENT) { + ctx.base.prolog = prolog_fs; + ctx.base.epilog = epilog_fs; + } else { + ctx.base.prolog = prolog_last_vertex_stage; + ctx.base.epilog = epilog_last_vertex_stage; + } + + ctx.pos_output = ctx.pos_input = -1; + ctx.depth_range_const = depth_range_const; + ctx.depth_clip_minus_one_to_one = clip_negative_one_to_one; + + /* We add approximately 30 tokens per Z write, so add this per vertex in + * a GS and some additional tokes for VS and TES + */ + newlen = tgsi_num_tokens(tokens) + + 30 * ctx.info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES] + + 120; + + newtoks = tgsi_alloc_tokens(newlen); + if (!newtoks) + return tokens; + + tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base); + + return newtoks; +} + +const struct tgsi_token * +st_tgsi_lower_depth_clamp_fs(const struct tgsi_token *tokens, + int depth_range_const) +{ + return st_tgsi_lower_depth_clamp(tokens, depth_range_const, false); +} \ No newline at end of file diff --git a/src/mesa/state_tracker/st_tgsi_lower_depth_clamp.h b/src/mesa/state_tracker/st_tgsi_lower_depth_clamp.h new file mode 100644 index 00000000000..acfb97569f6 --- /dev/null +++ b/src/mesa/state_tracker/st_tgsi_lower_depth_clamp.h @@ -0,0 +1,39 @@ +/* + * Copyright © 2018 Collabora Ltd + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ST_TGSI_LOWER_DEPTH_CLAMP_H +#define ST_TGSI_LOWER_DEPTH_CLAMP_H + +#include +struct tgsi_token; + +const struct tgsi_token * +st_tgsi_lower_depth_clamp(const struct tgsi_token *tokens, + int depth_range_const, + bool clip_negative_one_to_one); + +const struct tgsi_token * +st_tgsi_lower_depth_clamp_fs(const struct tgsi_token *tokens, + int depth_range_const); + +#endif /* ST_TGSI_LOWER_DEPTH_CLAMP_H */