2 * Copyright © 2018 Collabora Ltd
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "st_tgsi_lower_depth_clamp.h"
25 #include "tgsi/tgsi_transform.h"
26 #include "tgsi/tgsi_scan.h"
28 struct tgsi_depth_clamp_transform
{
29 struct tgsi_transform_context base
;
31 struct tgsi_shader_info info
;
33 int depth_range_const
;
41 int depth_range_corrected
;
42 bool depth_clip_minus_one_to_one
;
45 static inline struct tgsi_depth_clamp_transform
*
46 tgsi_depth_clamp_transform(struct tgsi_transform_context
*tctx
)
48 return (struct tgsi_depth_clamp_transform
*)tctx
;
52 transform_decl(struct tgsi_transform_context
*tctx
,
53 struct tgsi_full_declaration
*decl
)
55 struct tgsi_depth_clamp_transform
*ctx
= tgsi_depth_clamp_transform(tctx
);
57 /* find the next generic index usable for our inserted varying */
58 if (ctx
->info
.processor
== PIPE_SHADER_FRAGMENT
) {
59 if (decl
->Declaration
.File
== TGSI_FILE_INPUT
&&
60 decl
->Semantic
.Name
== TGSI_SEMANTIC_GENERIC
)
61 ctx
->next_generic
= MAX2(ctx
->next_generic
, decl
->Semantic
.Index
+ 1);
63 if (decl
->Declaration
.File
== TGSI_FILE_OUTPUT
&&
64 decl
->Semantic
.Name
== TGSI_SEMANTIC_GENERIC
)
65 ctx
->next_generic
= MAX2(ctx
->next_generic
, decl
->Semantic
.Index
+ 1);
68 if (decl
->Declaration
.File
== TGSI_FILE_OUTPUT
&&
69 decl
->Semantic
.Name
== TGSI_SEMANTIC_POSITION
) {
70 assert(decl
->Semantic
.Index
== 0);
71 ctx
->pos_output
= decl
->Range
.First
;
72 } else if (decl
->Declaration
.File
== TGSI_FILE_INPUT
&&
73 decl
->Semantic
.Name
== TGSI_SEMANTIC_POSITION
) {
74 assert(decl
->Semantic
.Index
== 0);
75 if (ctx
->info
.processor
== PIPE_SHADER_FRAGMENT
)
76 ctx
->pos_input
= decl
->Range
.First
;
79 tctx
->emit_declaration(tctx
, decl
);
83 prolog_common(struct tgsi_depth_clamp_transform
*ctx
)
85 assert(ctx
->depth_range_const
>= 0);
86 if (ctx
->info
.const_file_max
[0] < ctx
->depth_range_const
)
87 tgsi_transform_const_decl(&ctx
->base
, ctx
->depth_range_const
,
88 ctx
->depth_range_const
);
90 /* declare a temp for the position-output */
91 ctx
->pos_output_temp
= ctx
->info
.file_max
[TGSI_FILE_TEMPORARY
] + 1;
92 tgsi_transform_temp_decl(&ctx
->base
, ctx
->pos_output_temp
);
96 prolog_last_vertex_stage(struct tgsi_transform_context
*tctx
)
98 struct tgsi_depth_clamp_transform
*ctx
= tgsi_depth_clamp_transform(tctx
);
102 ctx
->imm
= ctx
->info
.immediate_count
;
103 tgsi_transform_immediate_decl(tctx
, 0.5, 0.0, 0.0, 0.0);
105 /* declare the output */
106 ctx
->depth_var
= ctx
->info
.num_outputs
;
107 tgsi_transform_output_decl(tctx
, ctx
->depth_var
,
108 TGSI_SEMANTIC_GENERIC
,
110 TGSI_INTERPOLATE_LINEAR
);
114 epilog_last_vertex_stage(struct tgsi_transform_context
*tctx
)
116 struct tgsi_depth_clamp_transform
*ctx
= tgsi_depth_clamp_transform(tctx
);
118 int mad_dst_file
= TGSI_FILE_TEMPORARY
;
119 int mad_dst_index
= ctx
->pos_output_temp
;
121 if (!ctx
->depth_clip_minus_one_to_one
) {
122 mad_dst_file
= TGSI_FILE_OUTPUT
;
123 mad_dst_index
= ctx
->depth_var
;
126 /* move from temp-register to output */
127 tgsi_transform_op1_inst(tctx
, TGSI_OPCODE_MOV
,
128 TGSI_FILE_OUTPUT
, ctx
->pos_output
,
130 TGSI_FILE_TEMPORARY
, ctx
->pos_output_temp
);
132 /* Set gl_position.z to 0.0 to avoid clipping */
133 tgsi_transform_op1_swz_inst(tctx
, TGSI_OPCODE_MOV
,
134 TGSI_FILE_OUTPUT
, ctx
->pos_output
,
136 TGSI_FILE_IMMEDIATE
, ctx
->imm
,
139 /* Evaluate and pass true depth value in depthRange terms */
140 /* z = gl_Position.z / gl_Position.w */
142 struct tgsi_full_instruction inst
;
144 inst
= tgsi_default_full_instruction();
145 inst
.Instruction
.Opcode
= TGSI_OPCODE_DIV
;
146 inst
.Instruction
.NumDstRegs
= 1;
147 inst
.Dst
[0].Register
.File
= TGSI_FILE_TEMPORARY
;
148 inst
.Dst
[0].Register
.Index
= ctx
->pos_output_temp
;
149 inst
.Dst
[0].Register
.WriteMask
= TGSI_WRITEMASK_X
;
150 inst
.Instruction
.NumSrcRegs
= 2;
151 tgsi_transform_src_reg_xyzw(&inst
.Src
[0], TGSI_FILE_TEMPORARY
, ctx
->pos_output_temp
);
152 tgsi_transform_src_reg_xyzw(&inst
.Src
[1], TGSI_FILE_TEMPORARY
, ctx
->pos_output_temp
);
153 inst
.Src
[0].Register
.SwizzleX
=
154 inst
.Src
[0].Register
.SwizzleY
=
155 inst
.Src
[0].Register
.SwizzleZ
=
156 inst
.Src
[0].Register
.SwizzleW
= TGSI_SWIZZLE_Z
;
158 inst
.Src
[1].Register
.SwizzleX
=
159 inst
.Src
[1].Register
.SwizzleY
=
160 inst
.Src
[1].Register
.SwizzleZ
=
161 inst
.Src
[1].Register
.SwizzleW
= TGSI_SWIZZLE_W
;
163 tctx
->emit_instruction(tctx
, &inst
);
166 /* OpenGL Core Profile 4.5 - 13.6.1
167 * The vertex's windows z coordinate zw is given by zw = s * z + b.
169 * * With clip control depth mode ZERO_TO_ONE
170 * s = f - n, b = n, and hence
172 * zw_0_1 = z * gl_DepthRange.diff + gl_DepthRange.near
174 tgsi_transform_op3_swz_inst(tctx
, TGSI_OPCODE_MAD
,
175 mad_dst_file
, mad_dst_index
,
177 TGSI_FILE_TEMPORARY
, ctx
->pos_output_temp
,
180 TGSI_FILE_CONSTANT
, ctx
->depth_range_const
,
182 TGSI_FILE_CONSTANT
, ctx
->depth_range_const
,
185 /* If clip control depth mode is NEGATIVE_ONE_TO_ONE, then
186 * s = 0.5 * (f - n), b = 0.5 * (n + f), and hence
188 * zw_m1_1 = 0.5 * (zw_01 + gl_DepthRange.far)
190 if (ctx
->depth_clip_minus_one_to_one
) {
191 /* z += gl_DepthRange.far */
192 tgsi_transform_op2_swz_inst(tctx
, TGSI_OPCODE_ADD
,
193 TGSI_FILE_TEMPORARY
, ctx
->pos_output_temp
,
195 TGSI_FILE_TEMPORARY
, ctx
->pos_output_temp
,
197 TGSI_FILE_CONSTANT
, ctx
->depth_range_const
,
198 TGSI_SWIZZLE_Y
, false);
200 tgsi_transform_op2_swz_inst(tctx
, TGSI_OPCODE_MUL
,
201 TGSI_FILE_OUTPUT
, ctx
->depth_var
,
203 TGSI_FILE_TEMPORARY
, ctx
->pos_output_temp
,
205 TGSI_FILE_IMMEDIATE
, ctx
->imm
,
206 TGSI_SWIZZLE_X
, false);
212 prolog_fs(struct tgsi_transform_context
*tctx
)
214 struct tgsi_depth_clamp_transform
*ctx
= tgsi_depth_clamp_transform(tctx
);
218 ctx
->depth_range_corrected
= ctx
->info
.file_max
[TGSI_FILE_TEMPORARY
] + 2;
219 tgsi_transform_temp_decl(tctx
, ctx
->depth_range_corrected
);
221 /* declare the input */
222 ctx
->depth_var
= ctx
->info
.num_inputs
;
223 tgsi_transform_input_decl(tctx
, ctx
->depth_var
,
224 TGSI_SEMANTIC_GENERIC
,
226 TGSI_INTERPOLATE_LINEAR
);
228 /* declare the output */
229 if (ctx
->pos_output
< 0) {
230 ctx
->pos_output
= ctx
->info
.num_outputs
;
231 tgsi_transform_output_decl(tctx
, ctx
->pos_output
,
232 TGSI_SEMANTIC_POSITION
,
234 TGSI_INTERPOLATE_LINEAR
);
237 if (ctx
->info
.reads_z
) {
238 ctx
->pos_input_temp
= ctx
->info
.file_max
[TGSI_FILE_TEMPORARY
] + 3;
239 tgsi_transform_temp_decl(tctx
, ctx
->pos_input_temp
);
241 assert(ctx
->pos_input_temp
>= 0);
242 /* copy normal position */
243 tgsi_transform_op1_inst(tctx
, TGSI_OPCODE_MOV
,
244 TGSI_FILE_TEMPORARY
, ctx
->pos_input_temp
,
246 TGSI_FILE_INPUT
, ctx
->pos_input
);
247 /* replace z-component with varying */
248 tgsi_transform_op1_swz_inst(tctx
, TGSI_OPCODE_MOV
,
249 TGSI_FILE_TEMPORARY
, ctx
->pos_input_temp
,
251 TGSI_FILE_INPUT
, ctx
->depth_var
,
257 epilog_fs(struct tgsi_transform_context
*tctx
)
259 struct tgsi_depth_clamp_transform
*ctx
= tgsi_depth_clamp_transform(tctx
);
261 unsigned src0_file
= TGSI_FILE_INPUT
;
262 unsigned src0_index
= ctx
->depth_var
;
263 unsigned src0_swizzle
= TGSI_SWIZZLE_X
;
265 if (ctx
->info
.writes_z
) {
266 src0_file
= TGSI_FILE_TEMPORARY
;
267 src0_index
= ctx
->pos_output_temp
;
268 src0_swizzle
= TGSI_SWIZZLE_Z
;
271 /* it is possible to have gl_DepthRange.near > gl_DepthRange.far, so first
272 * we have to sort the two */
273 tgsi_transform_op2_swz_inst(tctx
, TGSI_OPCODE_MIN
,
274 TGSI_FILE_TEMPORARY
, ctx
->depth_range_corrected
,
276 TGSI_FILE_CONSTANT
, ctx
->depth_range_const
,
278 TGSI_FILE_CONSTANT
, ctx
->depth_range_const
,
282 tgsi_transform_op2_swz_inst(tctx
, TGSI_OPCODE_MAX
,
283 TGSI_FILE_TEMPORARY
, ctx
->depth_range_corrected
,
285 TGSI_FILE_CONSTANT
, ctx
->depth_range_const
,
287 TGSI_FILE_CONSTANT
, ctx
->depth_range_const
,
291 /* gl_FragDepth = max(gl_FragDepth, min(gl_DepthRange.near, gl_DepthRange.far)) */
292 tgsi_transform_op2_swz_inst(tctx
, TGSI_OPCODE_MAX
,
293 TGSI_FILE_TEMPORARY
, ctx
->pos_output_temp
,
295 src0_file
, src0_index
, src0_swizzle
,
296 TGSI_FILE_TEMPORARY
, ctx
->depth_range_corrected
,
297 TGSI_SWIZZLE_X
, false);
299 /* gl_FragDepth = min(gl_FragDepth, max(gl_DepthRange.near, gl_DepthRange.far)) */
300 tgsi_transform_op2_swz_inst(tctx
, TGSI_OPCODE_MIN
,
301 TGSI_FILE_OUTPUT
, ctx
->pos_output
,
303 TGSI_FILE_TEMPORARY
, ctx
->pos_output_temp
,
305 TGSI_FILE_TEMPORARY
, ctx
->depth_range_corrected
,
306 TGSI_SWIZZLE_Y
, false);
310 transform_instr(struct tgsi_transform_context
*tctx
,
311 struct tgsi_full_instruction
*inst
)
313 struct tgsi_depth_clamp_transform
*ctx
= tgsi_depth_clamp_transform(tctx
);
315 if (ctx
->pos_output
>= 0) {
316 /* replace writes to gl_Position / gl_FragDepth with a temp-variable
318 for (int i
= 0; i
< inst
->Instruction
.NumDstRegs
; ++i
) {
319 if (inst
->Dst
[i
].Register
.File
== TGSI_FILE_OUTPUT
&&
320 inst
->Dst
[i
].Register
.Index
== ctx
->pos_output
) {
321 inst
->Dst
[i
].Register
.File
= TGSI_FILE_TEMPORARY
;
322 inst
->Dst
[i
].Register
.Index
= ctx
->pos_output_temp
;
327 if (ctx
->info
.reads_z
) {
328 /* replace reads from gl_FragCoord with temp-variable
330 assert(ctx
->pos_input_temp
>= 0);
331 for (int i
= 0; i
< inst
->Instruction
.NumSrcRegs
; ++i
) {
332 if (inst
->Src
[i
].Register
.File
== TGSI_FILE_INPUT
&&
333 inst
->Src
[i
].Register
.Index
== ctx
->pos_input
) {
334 inst
->Src
[i
].Register
.File
= TGSI_FILE_TEMPORARY
;
335 inst
->Src
[i
].Register
.Index
= ctx
->pos_input_temp
;
340 /* In a GS each we have to add the z-write opilog for each emit
342 if (ctx
->info
.processor
== PIPE_SHADER_GEOMETRY
&&
343 inst
->Instruction
.Opcode
== TGSI_OPCODE_EMIT
)
344 epilog_last_vertex_stage(tctx
);
346 tctx
->emit_instruction(tctx
, inst
);
349 const struct tgsi_token
*
350 st_tgsi_lower_depth_clamp(const struct tgsi_token
*tokens
,
351 int depth_range_const
,
352 bool clip_negative_one_to_one
)
354 struct tgsi_depth_clamp_transform ctx
= {};
355 struct tgsi_token
*newtoks
;
358 tgsi_scan_shader(tokens
, &ctx
.info
);
360 /* we only want to do this for the fragment shader, and the shader-stage
361 * right before it, but in the first pass there might be no "next" shader
363 if (ctx
.info
.processor
!= PIPE_SHADER_FRAGMENT
&&
364 ctx
.info
.processor
!= PIPE_SHADER_GEOMETRY
&&
365 ctx
.info
.processor
!= PIPE_SHADER_VERTEX
&&
366 ctx
.info
.processor
!= PIPE_SHADER_TESS_EVAL
&&
367 (ctx
.info
.properties
[TGSI_PROPERTY_NEXT_SHADER
] > PIPE_SHADER_VERTEX
&&
368 (ctx
.info
.properties
[TGSI_PROPERTY_NEXT_SHADER
] != PIPE_SHADER_FRAGMENT
))) {
372 ctx
.base
.transform_declaration
= transform_decl
;
373 ctx
.base
.transform_instruction
= transform_instr
;
375 if (ctx
.info
.processor
== PIPE_SHADER_FRAGMENT
) {
376 ctx
.base
.prolog
= prolog_fs
;
377 ctx
.base
.epilog
= epilog_fs
;
379 ctx
.base
.prolog
= prolog_last_vertex_stage
;
380 ctx
.base
.epilog
= epilog_last_vertex_stage
;
383 ctx
.pos_output
= ctx
.pos_input
= -1;
384 ctx
.depth_range_const
= depth_range_const
;
385 ctx
.depth_clip_minus_one_to_one
= clip_negative_one_to_one
;
387 /* We add approximately 30 tokens per Z write, so add this per vertex in
388 * a GS and some additional tokes for VS and TES
390 newlen
= tgsi_num_tokens(tokens
) +
391 30 * ctx
.info
.properties
[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES
] +
394 newtoks
= tgsi_alloc_tokens(newlen
);
398 tgsi_transform_shader(tokens
, newtoks
, newlen
, &ctx
.base
);
403 const struct tgsi_token
*
404 st_tgsi_lower_depth_clamp_fs(const struct tgsi_token
*tokens
,
405 int depth_range_const
)
407 return st_tgsi_lower_depth_clamp(tokens
, depth_range_const
, false);