glsl: lower mediump temporaries to 16 bits except structures (v2)
[mesa.git] / src / mesa / state_tracker / st_tgsi_lower_depth_clamp.c
1 /*
2 * Copyright © 2018 Collabora Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "st_tgsi_lower_depth_clamp.h"
25 #include "tgsi/tgsi_transform.h"
26 #include "tgsi/tgsi_scan.h"
27
28 struct tgsi_depth_clamp_transform {
29 struct tgsi_transform_context base;
30
31 struct tgsi_shader_info info;
32
33 int depth_range_const;
34 int next_generic;
35 int imm;
36 int depth_var;
37 int pos_input;
38 int pos_output;
39 int pos_input_temp;
40 int pos_output_temp;
41 int depth_range_corrected;
42 bool depth_clip_minus_one_to_one;
43 };
44
45 static inline struct tgsi_depth_clamp_transform *
46 tgsi_depth_clamp_transform(struct tgsi_transform_context *tctx)
47 {
48 return (struct tgsi_depth_clamp_transform *)tctx;
49 }
50
51 static void
52 transform_decl(struct tgsi_transform_context *tctx,
53 struct tgsi_full_declaration *decl)
54 {
55 struct tgsi_depth_clamp_transform *ctx = tgsi_depth_clamp_transform(tctx);
56
57 /* find the next generic index usable for our inserted varying */
58 if (ctx->info.processor == PIPE_SHADER_FRAGMENT) {
59 if (decl->Declaration.File == TGSI_FILE_INPUT &&
60 decl->Semantic.Name == TGSI_SEMANTIC_GENERIC)
61 ctx->next_generic = MAX2(ctx->next_generic, decl->Semantic.Index + 1);
62 } else {
63 if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
64 decl->Semantic.Name == TGSI_SEMANTIC_GENERIC)
65 ctx->next_generic = MAX2(ctx->next_generic, decl->Semantic.Index + 1);
66 }
67
68 if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
69 decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
70 assert(decl->Semantic.Index == 0);
71 ctx->pos_output = decl->Range.First;
72 } else if (decl->Declaration.File == TGSI_FILE_INPUT &&
73 decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
74 assert(decl->Semantic.Index == 0);
75 if (ctx->info.processor == PIPE_SHADER_FRAGMENT)
76 ctx->pos_input = decl->Range.First;
77 }
78
79 tctx->emit_declaration(tctx, decl);
80 }
81
82 static void
83 prolog_common(struct tgsi_depth_clamp_transform *ctx)
84 {
85 assert(ctx->depth_range_const >= 0);
86 if (ctx->info.const_file_max[0] < ctx->depth_range_const)
87 tgsi_transform_const_decl(&ctx->base, ctx->depth_range_const,
88 ctx->depth_range_const);
89
90 /* declare a temp for the position-output */
91 ctx->pos_output_temp = ctx->info.file_max[TGSI_FILE_TEMPORARY] + 1;
92 tgsi_transform_temp_decl(&ctx->base, ctx->pos_output_temp);
93 }
94
95 static void
96 prolog_last_vertex_stage(struct tgsi_transform_context *tctx)
97 {
98 struct tgsi_depth_clamp_transform *ctx = tgsi_depth_clamp_transform(tctx);
99
100 prolog_common(ctx);
101
102 ctx->imm = ctx->info.immediate_count;
103 tgsi_transform_immediate_decl(tctx, 0.5, 0.0, 0.0, 0.0);
104
105 /* declare the output */
106 ctx->depth_var = ctx->info.num_outputs;
107 tgsi_transform_output_decl(tctx, ctx->depth_var,
108 TGSI_SEMANTIC_GENERIC,
109 ctx->next_generic,
110 TGSI_INTERPOLATE_LINEAR);
111 }
112
113 static void
114 epilog_last_vertex_stage(struct tgsi_transform_context *tctx)
115 {
116 struct tgsi_depth_clamp_transform *ctx = tgsi_depth_clamp_transform(tctx);
117
118 int mad_dst_file = TGSI_FILE_TEMPORARY;
119 int mad_dst_index = ctx->pos_output_temp;
120
121 if (!ctx->depth_clip_minus_one_to_one) {
122 mad_dst_file = TGSI_FILE_OUTPUT;
123 mad_dst_index = ctx->depth_var;
124 }
125
126 /* move from temp-register to output */
127 tgsi_transform_op1_inst(tctx, TGSI_OPCODE_MOV,
128 TGSI_FILE_OUTPUT, ctx->pos_output,
129 TGSI_WRITEMASK_XYZW,
130 TGSI_FILE_TEMPORARY, ctx->pos_output_temp);
131
132 /* Set gl_position.z to 0.0 to avoid clipping */
133 tgsi_transform_op1_swz_inst(tctx, TGSI_OPCODE_MOV,
134 TGSI_FILE_OUTPUT, ctx->pos_output,
135 TGSI_WRITEMASK_Z,
136 TGSI_FILE_IMMEDIATE, ctx->imm,
137 TGSI_SWIZZLE_Y);
138
139 /* Evaluate and pass true depth value in depthRange terms */
140 /* z = gl_Position.z / gl_Position.w */
141
142 struct tgsi_full_instruction inst;
143
144 inst = tgsi_default_full_instruction();
145 inst.Instruction.Opcode = TGSI_OPCODE_DIV;
146 inst.Instruction.NumDstRegs = 1;
147 inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
148 inst.Dst[0].Register.Index = ctx->pos_output_temp;
149 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X;
150 inst.Instruction.NumSrcRegs = 2;
151 tgsi_transform_src_reg_xyzw(&inst.Src[0], TGSI_FILE_TEMPORARY, ctx->pos_output_temp);
152 tgsi_transform_src_reg_xyzw(&inst.Src[1], TGSI_FILE_TEMPORARY, ctx->pos_output_temp);
153 inst.Src[0].Register.SwizzleX =
154 inst.Src[0].Register.SwizzleY =
155 inst.Src[0].Register.SwizzleZ =
156 inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Z;
157
158 inst.Src[1].Register.SwizzleX =
159 inst.Src[1].Register.SwizzleY =
160 inst.Src[1].Register.SwizzleZ =
161 inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
162
163 tctx->emit_instruction(tctx, &inst);
164
165
166 /* OpenGL Core Profile 4.5 - 13.6.1
167 * The vertex's windows z coordinate zw is given by zw = s * z + b.
168 *
169 * * With clip control depth mode ZERO_TO_ONE
170 * s = f - n, b = n, and hence
171 *
172 * zw_0_1 = z * gl_DepthRange.diff + gl_DepthRange.near
173 */
174 tgsi_transform_op3_swz_inst(tctx, TGSI_OPCODE_MAD,
175 mad_dst_file, mad_dst_index,
176 TGSI_WRITEMASK_X,
177 TGSI_FILE_TEMPORARY, ctx->pos_output_temp,
178 TGSI_SWIZZLE_X,
179 false,
180 TGSI_FILE_CONSTANT, ctx->depth_range_const,
181 TGSI_SWIZZLE_Z,
182 TGSI_FILE_CONSTANT, ctx->depth_range_const,
183 TGSI_SWIZZLE_X);
184
185 /* If clip control depth mode is NEGATIVE_ONE_TO_ONE, then
186 * s = 0.5 * (f - n), b = 0.5 * (n + f), and hence
187 *
188 * zw_m1_1 = 0.5 * (zw_01 + gl_DepthRange.far)
189 */
190 if (ctx->depth_clip_minus_one_to_one) {
191 /* z += gl_DepthRange.far */
192 tgsi_transform_op2_swz_inst(tctx, TGSI_OPCODE_ADD,
193 TGSI_FILE_TEMPORARY, ctx->pos_output_temp,
194 TGSI_WRITEMASK_X,
195 TGSI_FILE_TEMPORARY, ctx->pos_output_temp,
196 TGSI_SWIZZLE_X,
197 TGSI_FILE_CONSTANT, ctx->depth_range_const,
198 TGSI_SWIZZLE_Y, false);
199 /* z *= 0.5 */
200 tgsi_transform_op2_swz_inst(tctx, TGSI_OPCODE_MUL,
201 TGSI_FILE_OUTPUT, ctx->depth_var,
202 TGSI_WRITEMASK_X,
203 TGSI_FILE_TEMPORARY, ctx->pos_output_temp,
204 TGSI_SWIZZLE_X,
205 TGSI_FILE_IMMEDIATE, ctx->imm,
206 TGSI_SWIZZLE_X, false);
207 }
208 }
209
210
211 static void
212 prolog_fs(struct tgsi_transform_context *tctx)
213 {
214 struct tgsi_depth_clamp_transform *ctx = tgsi_depth_clamp_transform(tctx);
215
216 prolog_common(ctx);
217
218 ctx->depth_range_corrected = ctx->info.file_max[TGSI_FILE_TEMPORARY] + 2;
219 tgsi_transform_temp_decl(tctx, ctx->depth_range_corrected);
220
221 /* declare the input */
222 ctx->depth_var = ctx->info.num_inputs;
223 tgsi_transform_input_decl(tctx, ctx->depth_var,
224 TGSI_SEMANTIC_GENERIC,
225 ctx->next_generic,
226 TGSI_INTERPOLATE_LINEAR);
227
228 /* declare the output */
229 if (ctx->pos_output < 0) {
230 ctx->pos_output = ctx->info.num_outputs;
231 tgsi_transform_output_decl(tctx, ctx->pos_output,
232 TGSI_SEMANTIC_POSITION,
233 0,
234 TGSI_INTERPOLATE_LINEAR);
235 }
236
237 if (ctx->info.reads_z) {
238 ctx->pos_input_temp = ctx->info.file_max[TGSI_FILE_TEMPORARY] + 3;
239 tgsi_transform_temp_decl(tctx, ctx->pos_input_temp);
240
241 assert(ctx->pos_input_temp >= 0);
242 /* copy normal position */
243 tgsi_transform_op1_inst(tctx, TGSI_OPCODE_MOV,
244 TGSI_FILE_TEMPORARY, ctx->pos_input_temp,
245 TGSI_WRITEMASK_XYZW,
246 TGSI_FILE_INPUT, ctx->pos_input);
247 /* replace z-component with varying */
248 tgsi_transform_op1_swz_inst(tctx, TGSI_OPCODE_MOV,
249 TGSI_FILE_TEMPORARY, ctx->pos_input_temp,
250 TGSI_WRITEMASK_Z,
251 TGSI_FILE_INPUT, ctx->depth_var,
252 TGSI_SWIZZLE_X);
253 }
254 }
255
256 static void
257 epilog_fs(struct tgsi_transform_context *tctx)
258 {
259 struct tgsi_depth_clamp_transform *ctx = tgsi_depth_clamp_transform(tctx);
260
261 unsigned src0_file = TGSI_FILE_INPUT;
262 unsigned src0_index = ctx->depth_var;
263 unsigned src0_swizzle = TGSI_SWIZZLE_X;
264
265 if (ctx->info.writes_z) {
266 src0_file = TGSI_FILE_TEMPORARY;
267 src0_index = ctx->pos_output_temp;
268 src0_swizzle = TGSI_SWIZZLE_Z;
269 }
270
271 /* it is possible to have gl_DepthRange.near > gl_DepthRange.far, so first
272 * we have to sort the two */
273 tgsi_transform_op2_swz_inst(tctx, TGSI_OPCODE_MIN,
274 TGSI_FILE_TEMPORARY, ctx->depth_range_corrected,
275 TGSI_WRITEMASK_X,
276 TGSI_FILE_CONSTANT, ctx->depth_range_const,
277 TGSI_SWIZZLE_X,
278 TGSI_FILE_CONSTANT, ctx->depth_range_const,
279 TGSI_SWIZZLE_Y,
280 false);
281
282 tgsi_transform_op2_swz_inst(tctx, TGSI_OPCODE_MAX,
283 TGSI_FILE_TEMPORARY, ctx->depth_range_corrected,
284 TGSI_WRITEMASK_Y,
285 TGSI_FILE_CONSTANT, ctx->depth_range_const,
286 TGSI_SWIZZLE_X,
287 TGSI_FILE_CONSTANT, ctx->depth_range_const,
288 TGSI_SWIZZLE_Y,
289 false);
290
291 /* gl_FragDepth = max(gl_FragDepth, min(gl_DepthRange.near, gl_DepthRange.far)) */
292 tgsi_transform_op2_swz_inst(tctx, TGSI_OPCODE_MAX,
293 TGSI_FILE_TEMPORARY, ctx->pos_output_temp,
294 TGSI_WRITEMASK_X,
295 src0_file, src0_index, src0_swizzle,
296 TGSI_FILE_TEMPORARY, ctx->depth_range_corrected,
297 TGSI_SWIZZLE_X, false);
298
299 /* gl_FragDepth = min(gl_FragDepth, max(gl_DepthRange.near, gl_DepthRange.far)) */
300 tgsi_transform_op2_swz_inst(tctx, TGSI_OPCODE_MIN,
301 TGSI_FILE_OUTPUT, ctx->pos_output,
302 TGSI_WRITEMASK_Z,
303 TGSI_FILE_TEMPORARY, ctx->pos_output_temp,
304 TGSI_SWIZZLE_X,
305 TGSI_FILE_TEMPORARY, ctx->depth_range_corrected,
306 TGSI_SWIZZLE_Y, false);
307 }
308
309 static void
310 transform_instr(struct tgsi_transform_context *tctx,
311 struct tgsi_full_instruction *inst)
312 {
313 struct tgsi_depth_clamp_transform *ctx = tgsi_depth_clamp_transform(tctx);
314
315 if (ctx->pos_output >= 0) {
316 /* replace writes to gl_Position / gl_FragDepth with a temp-variable
317 */
318 for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) {
319 if (inst->Dst[i].Register.File == TGSI_FILE_OUTPUT &&
320 inst->Dst[i].Register.Index == ctx->pos_output) {
321 inst->Dst[i].Register.File = TGSI_FILE_TEMPORARY;
322 inst->Dst[i].Register.Index = ctx->pos_output_temp;
323 }
324 }
325 }
326
327 if (ctx->info.reads_z) {
328 /* replace reads from gl_FragCoord with temp-variable
329 */
330 assert(ctx->pos_input_temp >= 0);
331 for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) {
332 if (inst->Src[i].Register.File == TGSI_FILE_INPUT &&
333 inst->Src[i].Register.Index == ctx->pos_input) {
334 inst->Src[i].Register.File = TGSI_FILE_TEMPORARY;
335 inst->Src[i].Register.Index = ctx->pos_input_temp;
336 }
337 }
338 }
339
340 /* In a GS each we have to add the z-write opilog for each emit
341 */
342 if (ctx->info.processor == PIPE_SHADER_GEOMETRY &&
343 inst->Instruction.Opcode == TGSI_OPCODE_EMIT)
344 epilog_last_vertex_stage(tctx);
345
346 tctx->emit_instruction(tctx, inst);
347 }
348
349 const struct tgsi_token *
350 st_tgsi_lower_depth_clamp(const struct tgsi_token *tokens,
351 int depth_range_const,
352 bool clip_negative_one_to_one)
353 {
354 struct tgsi_depth_clamp_transform ctx;
355 struct tgsi_token *newtoks;
356 int newlen;
357
358 memset(&ctx, 0, sizeof(ctx));
359 tgsi_scan_shader(tokens, &ctx.info);
360
361 /* we only want to do this for the fragment shader, and the shader-stage
362 * right before it, but in the first pass there might be no "next" shader
363 */
364 if (ctx.info.processor != PIPE_SHADER_FRAGMENT &&
365 ctx.info.processor != PIPE_SHADER_GEOMETRY &&
366 ctx.info.processor != PIPE_SHADER_VERTEX &&
367 ctx.info.processor != PIPE_SHADER_TESS_EVAL &&
368 (ctx.info.properties[TGSI_PROPERTY_NEXT_SHADER] > PIPE_SHADER_VERTEX &&
369 (ctx.info.properties[TGSI_PROPERTY_NEXT_SHADER] != PIPE_SHADER_FRAGMENT))) {
370 return tokens;
371 }
372
373 ctx.base.transform_declaration = transform_decl;
374 ctx.base.transform_instruction = transform_instr;
375
376 if (ctx.info.processor == PIPE_SHADER_FRAGMENT) {
377 ctx.base.prolog = prolog_fs;
378 ctx.base.epilog = epilog_fs;
379 } else {
380 ctx.base.prolog = prolog_last_vertex_stage;
381 ctx.base.epilog = epilog_last_vertex_stage;
382 }
383
384 ctx.pos_output = ctx.pos_input = -1;
385 ctx.depth_range_const = depth_range_const;
386 ctx.depth_clip_minus_one_to_one = clip_negative_one_to_one;
387
388 /* We add approximately 30 tokens per Z write, so add this per vertex in
389 * a GS and some additional tokes for VS and TES
390 */
391 newlen = tgsi_num_tokens(tokens) +
392 30 * ctx.info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES] +
393 120;
394
395 newtoks = tgsi_alloc_tokens(newlen);
396 if (!newtoks)
397 return tokens;
398
399 tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
400
401 return newtoks;
402 }
403
404 const struct tgsi_token *
405 st_tgsi_lower_depth_clamp_fs(const struct tgsi_token *tokens,
406 int depth_range_const)
407 {
408 return st_tgsi_lower_depth_clamp(tokens, depth_range_const, false);
409 }