nir: Add nir_lower_tex options to lower sampler return formats.
[mesa.git] / src / compiler / nir / nir_lower_tex.c
1 /*
2 * Copyright © 2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /*
25 * This lowering pass supports (as configured via nir_lower_tex_options)
26 * various texture related conversions:
27 * + texture projector lowering: converts the coordinate division for
28 * texture projection to be done in ALU instructions instead of
29 * asking the texture operation to do so.
30 * + lowering RECT: converts the un-normalized RECT texture coordinates
31 * to normalized coordinates with txs plus ALU instructions
32 * + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
33 * inserts instructions to clamp specified coordinates to [0.0, 1.0].
34 * Note that this automatically triggers texture projector lowering if
35 * needed, since clamping must happen after projector lowering.
36 */
37
38 #include "nir.h"
39 #include "nir_builder.h"
40 #include "nir_format_convert.h"
41
42 static void
43 project_src(nir_builder *b, nir_tex_instr *tex)
44 {
45 /* Find the projector in the srcs list, if present. */
46 int proj_index = nir_tex_instr_src_index(tex, nir_tex_src_projector);
47 if (proj_index < 0)
48 return;
49
50 b->cursor = nir_before_instr(&tex->instr);
51
52 nir_ssa_def *inv_proj =
53 nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
54
55 /* Walk through the sources projecting the arguments. */
56 for (unsigned i = 0; i < tex->num_srcs; i++) {
57 switch (tex->src[i].src_type) {
58 case nir_tex_src_coord:
59 case nir_tex_src_comparator:
60 break;
61 default:
62 continue;
63 }
64 nir_ssa_def *unprojected =
65 nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
66 nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
67
68 /* Array indices don't get projected, so make an new vector with the
69 * coordinate's array index untouched.
70 */
71 if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
72 switch (tex->coord_components) {
73 case 4:
74 projected = nir_vec4(b,
75 nir_channel(b, projected, 0),
76 nir_channel(b, projected, 1),
77 nir_channel(b, projected, 2),
78 nir_channel(b, unprojected, 3));
79 break;
80 case 3:
81 projected = nir_vec3(b,
82 nir_channel(b, projected, 0),
83 nir_channel(b, projected, 1),
84 nir_channel(b, unprojected, 2));
85 break;
86 case 2:
87 projected = nir_vec2(b,
88 nir_channel(b, projected, 0),
89 nir_channel(b, unprojected, 1));
90 break;
91 default:
92 unreachable("bad texture coord count for array");
93 break;
94 }
95 }
96
97 nir_instr_rewrite_src(&tex->instr,
98 &tex->src[i].src,
99 nir_src_for_ssa(projected));
100 }
101
102 nir_tex_instr_remove_src(tex, proj_index);
103 }
104
105 static nir_ssa_def *
106 get_texture_size(nir_builder *b, nir_tex_instr *tex)
107 {
108 b->cursor = nir_before_instr(&tex->instr);
109
110 nir_tex_instr *txs;
111
112 unsigned num_srcs = 1; /* One for the LOD */
113 for (unsigned i = 0; i < tex->num_srcs; i++) {
114 if (tex->src[i].src_type == nir_tex_src_texture_deref ||
115 tex->src[i].src_type == nir_tex_src_sampler_deref ||
116 tex->src[i].src_type == nir_tex_src_texture_offset ||
117 tex->src[i].src_type == nir_tex_src_sampler_offset)
118 num_srcs++;
119 }
120
121 txs = nir_tex_instr_create(b->shader, num_srcs);
122 txs->op = nir_texop_txs;
123 txs->sampler_dim = tex->sampler_dim;
124 txs->is_array = tex->is_array;
125 txs->is_shadow = tex->is_shadow;
126 txs->is_new_style_shadow = tex->is_new_style_shadow;
127 txs->texture_index = tex->texture_index;
128 txs->sampler_index = tex->sampler_index;
129 txs->dest_type = nir_type_int;
130
131 unsigned idx = 0;
132 for (unsigned i = 0; i < tex->num_srcs; i++) {
133 if (tex->src[i].src_type == nir_tex_src_texture_deref ||
134 tex->src[i].src_type == nir_tex_src_sampler_deref ||
135 tex->src[i].src_type == nir_tex_src_texture_offset ||
136 tex->src[i].src_type == nir_tex_src_sampler_offset) {
137 nir_src_copy(&txs->src[idx].src, &tex->src[i].src, txs);
138 txs->src[idx].src_type = tex->src[i].src_type;
139 idx++;
140 }
141 }
142 /* Add in an LOD because some back-ends require it */
143 txs->src[idx].src = nir_src_for_ssa(nir_imm_int(b, 0));
144 txs->src[idx].src_type = nir_tex_src_lod;
145
146 nir_ssa_dest_init(&txs->instr, &txs->dest,
147 nir_tex_instr_dest_size(txs), 32, NULL);
148 nir_builder_instr_insert(b, &txs->instr);
149
150 return nir_i2f32(b, &txs->dest.ssa);
151 }
152
153 static nir_ssa_def *
154 get_texture_lod(nir_builder *b, nir_tex_instr *tex)
155 {
156 b->cursor = nir_before_instr(&tex->instr);
157
158 nir_tex_instr *tql;
159
160 unsigned num_srcs = 0;
161 for (unsigned i = 0; i < tex->num_srcs; i++) {
162 if (tex->src[i].src_type == nir_tex_src_coord ||
163 tex->src[i].src_type == nir_tex_src_texture_deref ||
164 tex->src[i].src_type == nir_tex_src_sampler_deref ||
165 tex->src[i].src_type == nir_tex_src_texture_offset ||
166 tex->src[i].src_type == nir_tex_src_sampler_offset)
167 num_srcs++;
168 }
169
170 tql = nir_tex_instr_create(b->shader, num_srcs);
171 tql->op = nir_texop_lod;
172 tql->coord_components = tex->coord_components;
173 tql->sampler_dim = tex->sampler_dim;
174 tql->is_array = tex->is_array;
175 tql->is_shadow = tex->is_shadow;
176 tql->is_new_style_shadow = tex->is_new_style_shadow;
177 tql->texture_index = tex->texture_index;
178 tql->sampler_index = tex->sampler_index;
179 tql->dest_type = nir_type_float;
180
181 unsigned idx = 0;
182 for (unsigned i = 0; i < tex->num_srcs; i++) {
183 if (tex->src[i].src_type == nir_tex_src_coord ||
184 tex->src[i].src_type == nir_tex_src_texture_deref ||
185 tex->src[i].src_type == nir_tex_src_sampler_deref ||
186 tex->src[i].src_type == nir_tex_src_texture_offset ||
187 tex->src[i].src_type == nir_tex_src_sampler_offset) {
188 nir_src_copy(&tql->src[idx].src, &tex->src[i].src, tql);
189 tql->src[idx].src_type = tex->src[i].src_type;
190 idx++;
191 }
192 }
193
194 nir_ssa_dest_init(&tql->instr, &tql->dest, 2, 32, NULL);
195 nir_builder_instr_insert(b, &tql->instr);
196
197 /* The LOD is the y component of the result */
198 return nir_channel(b, &tql->dest.ssa, 1);
199 }
200
201 static bool
202 lower_offset(nir_builder *b, nir_tex_instr *tex)
203 {
204 int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset);
205 if (offset_index < 0)
206 return false;
207
208 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
209 assert(coord_index >= 0);
210
211 assert(tex->src[offset_index].src.is_ssa);
212 assert(tex->src[coord_index].src.is_ssa);
213 nir_ssa_def *offset = tex->src[offset_index].src.ssa;
214 nir_ssa_def *coord = tex->src[coord_index].src.ssa;
215
216 b->cursor = nir_before_instr(&tex->instr);
217
218 nir_ssa_def *offset_coord;
219 if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) {
220 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
221 offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset));
222 } else {
223 nir_ssa_def *txs = get_texture_size(b, tex);
224 nir_ssa_def *scale = nir_frcp(b, txs);
225
226 offset_coord = nir_fadd(b, coord,
227 nir_fmul(b,
228 nir_i2f32(b, offset),
229 scale));
230 }
231 } else {
232 offset_coord = nir_iadd(b, coord, offset);
233 }
234
235 if (tex->is_array) {
236 /* The offset is not applied to the array index */
237 if (tex->coord_components == 2) {
238 offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0),
239 nir_channel(b, coord, 1));
240 } else if (tex->coord_components == 3) {
241 offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0),
242 nir_channel(b, offset_coord, 1),
243 nir_channel(b, coord, 2));
244 } else {
245 unreachable("Invalid number of components");
246 }
247 }
248
249 nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
250 nir_src_for_ssa(offset_coord));
251
252 nir_tex_instr_remove_src(tex, offset_index);
253
254 return true;
255 }
256
257 static void
258 lower_rect(nir_builder *b, nir_tex_instr *tex)
259 {
260 nir_ssa_def *txs = get_texture_size(b, tex);
261 nir_ssa_def *scale = nir_frcp(b, txs);
262
263 /* Walk through the sources normalizing the requested arguments. */
264 for (unsigned i = 0; i < tex->num_srcs; i++) {
265 if (tex->src[i].src_type != nir_tex_src_coord)
266 continue;
267
268 nir_ssa_def *coords =
269 nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
270 nir_instr_rewrite_src(&tex->instr,
271 &tex->src[i].src,
272 nir_src_for_ssa(nir_fmul(b, coords, scale)));
273 }
274
275 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
276 }
277
278 static void
279 lower_implicit_lod(nir_builder *b, nir_tex_instr *tex)
280 {
281 assert(tex->op == nir_texop_tex || tex->op == nir_texop_txb);
282 assert(nir_tex_instr_src_index(tex, nir_tex_src_lod) < 0);
283 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
284 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
285
286 b->cursor = nir_before_instr(&tex->instr);
287
288 nir_ssa_def *lod = get_texture_lod(b, tex);
289
290 int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
291 if (bias_idx >= 0) {
292 /* If we have a bias, add it in */
293 lod = nir_fadd(b, lod, nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
294 nir_tex_instr_remove_src(tex, bias_idx);
295 }
296
297 int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
298 if (min_lod_idx >= 0) {
299 /* If we have a minimum LOD, clamp LOD accordingly */
300 lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
301 nir_tex_instr_remove_src(tex, min_lod_idx);
302 }
303
304 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
305 tex->op = nir_texop_txl;
306 }
307
308 static nir_ssa_def *
309 sample_plane(nir_builder *b, nir_tex_instr *tex, int plane)
310 {
311 assert(tex->dest.is_ssa);
312 assert(nir_tex_instr_dest_size(tex) == 4);
313 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
314 assert(tex->op == nir_texop_tex);
315 assert(tex->coord_components == 2);
316
317 nir_tex_instr *plane_tex =
318 nir_tex_instr_create(b->shader, tex->num_srcs + 1);
319 for (unsigned i = 0; i < tex->num_srcs; i++) {
320 nir_src_copy(&plane_tex->src[i].src, &tex->src[i].src, plane_tex);
321 plane_tex->src[i].src_type = tex->src[i].src_type;
322 }
323 plane_tex->src[tex->num_srcs].src = nir_src_for_ssa(nir_imm_int(b, plane));
324 plane_tex->src[tex->num_srcs].src_type = nir_tex_src_plane;
325 plane_tex->op = nir_texop_tex;
326 plane_tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
327 plane_tex->dest_type = nir_type_float;
328 plane_tex->coord_components = 2;
329
330 plane_tex->texture_index = tex->texture_index;
331 plane_tex->sampler_index = tex->sampler_index;
332
333 nir_ssa_dest_init(&plane_tex->instr, &plane_tex->dest, 4, 32, NULL);
334
335 nir_builder_instr_insert(b, &plane_tex->instr);
336
337 return &plane_tex->dest.ssa;
338 }
339
340 static void
341 convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
342 nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v,
343 nir_ssa_def *a)
344 {
345 nir_const_value m[3] = {
346 { .f32 = { 1.0f, 0.0f, 1.59602678f, 0.0f } },
347 { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } },
348 { .f32 = { 1.0f, 2.01723214f, 0.0f, 0.0f } }
349 };
350
351 nir_ssa_def *yuv =
352 nir_vec4(b,
353 nir_fmul(b, nir_imm_float(b, 1.16438356f),
354 nir_fadd(b, y, nir_imm_float(b, -16.0f / 255.0f))),
355 nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -128.0f / 255.0f)), 0),
356 nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -128.0f / 255.0f)), 0),
357 nir_imm_float(b, 0.0));
358
359 nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0]));
360 nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[1]));
361 nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2]));
362
363 nir_ssa_def *result = nir_vec4(b, red, green, blue, a);
364
365 nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(result));
366 }
367
368 static void
369 lower_y_uv_external(nir_builder *b, nir_tex_instr *tex)
370 {
371 b->cursor = nir_after_instr(&tex->instr);
372
373 nir_ssa_def *y = sample_plane(b, tex, 0);
374 nir_ssa_def *uv = sample_plane(b, tex, 1);
375
376 convert_yuv_to_rgb(b, tex,
377 nir_channel(b, y, 0),
378 nir_channel(b, uv, 0),
379 nir_channel(b, uv, 1),
380 nir_imm_float(b, 1.0f));
381 }
382
383 static void
384 lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex)
385 {
386 b->cursor = nir_after_instr(&tex->instr);
387
388 nir_ssa_def *y = sample_plane(b, tex, 0);
389 nir_ssa_def *u = sample_plane(b, tex, 1);
390 nir_ssa_def *v = sample_plane(b, tex, 2);
391
392 convert_yuv_to_rgb(b, tex,
393 nir_channel(b, y, 0),
394 nir_channel(b, u, 0),
395 nir_channel(b, v, 0),
396 nir_imm_float(b, 1.0f));
397 }
398
399 static void
400 lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex)
401 {
402 b->cursor = nir_after_instr(&tex->instr);
403
404 nir_ssa_def *y = sample_plane(b, tex, 0);
405 nir_ssa_def *xuxv = sample_plane(b, tex, 1);
406
407 convert_yuv_to_rgb(b, tex,
408 nir_channel(b, y, 0),
409 nir_channel(b, xuxv, 1),
410 nir_channel(b, xuxv, 3),
411 nir_imm_float(b, 1.0f));
412 }
413
414 static void
415 lower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex)
416 {
417 b->cursor = nir_after_instr(&tex->instr);
418
419 nir_ssa_def *y = sample_plane(b, tex, 0);
420 nir_ssa_def *uxvx = sample_plane(b, tex, 1);
421
422 convert_yuv_to_rgb(b, tex,
423 nir_channel(b, y, 1),
424 nir_channel(b, uxvx, 0),
425 nir_channel(b, uxvx, 2),
426 nir_imm_float(b, 1.0f));
427 }
428
429 static void
430 lower_ayuv_external(nir_builder *b, nir_tex_instr *tex)
431 {
432 b->cursor = nir_after_instr(&tex->instr);
433
434 nir_ssa_def *ayuv = sample_plane(b, tex, 0);
435
436 convert_yuv_to_rgb(b, tex,
437 nir_channel(b, ayuv, 2),
438 nir_channel(b, ayuv, 1),
439 nir_channel(b, ayuv, 0),
440 nir_channel(b, ayuv, 3));
441 }
442
443 /*
444 * Converts a nir_texop_txd instruction to nir_texop_txl with the given lod
445 * computed from the gradients.
446 */
447 static void
448 replace_gradient_with_lod(nir_builder *b, nir_ssa_def *lod, nir_tex_instr *tex)
449 {
450 assert(tex->op == nir_texop_txd);
451
452 nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx));
453 nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy));
454
455 int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
456 if (min_lod_idx >= 0) {
457 /* If we have a minimum LOD, clamp LOD accordingly */
458 lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
459 nir_tex_instr_remove_src(tex, min_lod_idx);
460 }
461
462 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
463 tex->op = nir_texop_txl;
464 }
465
466 static void
467 lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex)
468 {
469 assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
470 assert(tex->op == nir_texop_txd);
471 assert(tex->dest.is_ssa);
472
473 /* Use textureSize() to get the width and height of LOD 0 */
474 nir_ssa_def *size = get_texture_size(b, tex);
475
476 /* Cubemap texture lookups first generate a texture coordinate normalized
477 * to [-1, 1] on the appropiate face. The appropiate face is determined
478 * by which component has largest magnitude and its sign. The texture
479 * coordinate is the quotient of the remaining texture coordinates against
480 * that absolute value of the component of largest magnitude. This
481 * division requires that the computing of the derivative of the texel
482 * coordinate must use the quotient rule. The high level GLSL code is as
483 * follows:
484 *
485 * Step 1: selection
486 *
487 * vec3 abs_p, Q, dQdx, dQdy;
488 * abs_p = abs(ir->coordinate);
489 * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
490 * Q = ir->coordinate.yzx;
491 * dQdx = ir->lod_info.grad.dPdx.yzx;
492 * dQdy = ir->lod_info.grad.dPdy.yzx;
493 * }
494 * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
495 * Q = ir->coordinate.xzy;
496 * dQdx = ir->lod_info.grad.dPdx.xzy;
497 * dQdy = ir->lod_info.grad.dPdy.xzy;
498 * }
499 * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
500 * Q = ir->coordinate;
501 * dQdx = ir->lod_info.grad.dPdx;
502 * dQdy = ir->lod_info.grad.dPdy;
503 * }
504 *
505 * Step 2: use quotient rule to compute derivative. The normalized to
506 * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
507 * only concerned with the magnitudes of the derivatives whose values are
508 * not affected by the sign. We drop the sign from the computation.
509 *
510 * vec2 dx, dy;
511 * float recip;
512 *
513 * recip = 1.0 / Q.z;
514 * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
515 * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
516 *
517 * Step 3: compute LOD. At this point we have the derivatives of the
518 * texture coordinates normalized to [-1,1]. We take the LOD to be
519 * result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
520 * = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
521 * = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
522 * = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
523 * = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
524 * where L is the dimension of the cubemap. The code is:
525 *
526 * float M, result;
527 * M = max(dot(dx, dx), dot(dy, dy));
528 * L = textureSize(sampler, 0).x;
529 * result = -1.0 + 0.5 * log2(L * L * M);
530 */
531
532 /* coordinate */
533 nir_ssa_def *p =
534 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa;
535
536 /* unmodified dPdx, dPdy values */
537 nir_ssa_def *dPdx =
538 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
539 nir_ssa_def *dPdy =
540 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
541
542 nir_ssa_def *abs_p = nir_fabs(b, p);
543 nir_ssa_def *abs_p_x = nir_channel(b, abs_p, 0);
544 nir_ssa_def *abs_p_y = nir_channel(b, abs_p, 1);
545 nir_ssa_def *abs_p_z = nir_channel(b, abs_p, 2);
546
547 /* 1. compute selector */
548 nir_ssa_def *Q, *dQdx, *dQdy;
549
550 nir_ssa_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y));
551 nir_ssa_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z));
552
553 unsigned yzx[3] = { 1, 2, 0 };
554 unsigned xzy[3] = { 0, 2, 1 };
555
556 Q = nir_bcsel(b, cond_z,
557 p,
558 nir_bcsel(b, cond_y,
559 nir_swizzle(b, p, xzy, 3, false),
560 nir_swizzle(b, p, yzx, 3, false)));
561
562 dQdx = nir_bcsel(b, cond_z,
563 dPdx,
564 nir_bcsel(b, cond_y,
565 nir_swizzle(b, dPdx, xzy, 3, false),
566 nir_swizzle(b, dPdx, yzx, 3, false)));
567
568 dQdy = nir_bcsel(b, cond_z,
569 dPdy,
570 nir_bcsel(b, cond_y,
571 nir_swizzle(b, dPdy, xzy, 3, false),
572 nir_swizzle(b, dPdy, yzx, 3, false)));
573
574 /* 2. quotient rule */
575
576 /* tmp = Q.xy * recip;
577 * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
578 * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
579 */
580 nir_ssa_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2));
581
582 nir_ssa_def *Q_xy = nir_channels(b, Q, 0x3);
583 nir_ssa_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z);
584
585 nir_ssa_def *dQdx_xy = nir_channels(b, dQdx, 0x3);
586 nir_ssa_def *dQdx_z = nir_channel(b, dQdx, 2);
587 nir_ssa_def *dx =
588 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z)));
589
590 nir_ssa_def *dQdy_xy = nir_channels(b, dQdy, 0x3);
591 nir_ssa_def *dQdy_z = nir_channel(b, dQdy, 2);
592 nir_ssa_def *dy =
593 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z)));
594
595 /* M = max(dot(dx, dx), dot(dy, dy)); */
596 nir_ssa_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy));
597
598 /* size has textureSize() of LOD 0 */
599 nir_ssa_def *L = nir_channel(b, size, 0);
600
601 /* lod = -1.0 + 0.5 * log2(L * L * M); */
602 nir_ssa_def *lod =
603 nir_fadd(b,
604 nir_imm_float(b, -1.0f),
605 nir_fmul(b,
606 nir_imm_float(b, 0.5f),
607 nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M)))));
608
609 /* 3. Replace the gradient instruction with an equivalent lod instruction */
610 replace_gradient_with_lod(b, lod, tex);
611 }
612
613 static void
614 lower_gradient(nir_builder *b, nir_tex_instr *tex)
615 {
616 /* Cubes are more complicated and have their own function */
617 if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
618 lower_gradient_cube_map(b, tex);
619 return;
620 }
621
622 assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE);
623 assert(tex->op == nir_texop_txd);
624 assert(tex->dest.is_ssa);
625
626 /* Use textureSize() to get the width and height of LOD 0 */
627 unsigned component_mask;
628 switch (tex->sampler_dim) {
629 case GLSL_SAMPLER_DIM_3D:
630 component_mask = 7;
631 break;
632 case GLSL_SAMPLER_DIM_1D:
633 component_mask = 1;
634 break;
635 default:
636 component_mask = 3;
637 break;
638 }
639
640 nir_ssa_def *size =
641 nir_channels(b, get_texture_size(b, tex), component_mask);
642
643 /* Scale the gradients by width and height. Effectively, the incoming
644 * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
645 * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
646 */
647 nir_ssa_def *ddx =
648 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
649 nir_ssa_def *ddy =
650 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
651
652 nir_ssa_def *dPdx = nir_fmul(b, ddx, size);
653 nir_ssa_def *dPdy = nir_fmul(b, ddy, size);
654
655 nir_ssa_def *rho;
656 if (dPdx->num_components == 1) {
657 rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy));
658 } else {
659 rho = nir_fmax(b,
660 nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)),
661 nir_fsqrt(b, nir_fdot(b, dPdy, dPdy)));
662 }
663
664 /* lod = log2(rho). We're ignoring GL state biases for now. */
665 nir_ssa_def *lod = nir_flog2(b, rho);
666
667 /* Replace the gradient instruction with an equivalent lod instruction */
668 replace_gradient_with_lod(b, lod, tex);
669 }
670
671 static void
672 saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
673 {
674 b->cursor = nir_before_instr(&tex->instr);
675
676 /* Walk through the sources saturating the requested arguments. */
677 for (unsigned i = 0; i < tex->num_srcs; i++) {
678 if (tex->src[i].src_type != nir_tex_src_coord)
679 continue;
680
681 nir_ssa_def *src =
682 nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
683
684 /* split src into components: */
685 nir_ssa_def *comp[4];
686
687 assume(tex->coord_components >= 1);
688
689 for (unsigned j = 0; j < tex->coord_components; j++)
690 comp[j] = nir_channel(b, src, j);
691
692 /* clamp requested components, array index does not get clamped: */
693 unsigned ncomp = tex->coord_components;
694 if (tex->is_array)
695 ncomp--;
696
697 for (unsigned j = 0; j < ncomp; j++) {
698 if ((1 << j) & sat_mask) {
699 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
700 /* non-normalized texture coords, so clamp to texture
701 * size rather than [0.0, 1.0]
702 */
703 nir_ssa_def *txs = get_texture_size(b, tex);
704 comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0));
705 comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j));
706 } else {
707 comp[j] = nir_fsat(b, comp[j]);
708 }
709 }
710 }
711
712 /* and move the result back into a single vecN: */
713 src = nir_vec(b, comp, tex->coord_components);
714
715 nir_instr_rewrite_src(&tex->instr,
716 &tex->src[i].src,
717 nir_src_for_ssa(src));
718 }
719 }
720
721 static nir_ssa_def *
722 get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
723 {
724 nir_const_value v;
725
726 memset(&v, 0, sizeof(v));
727
728 if (swizzle_val == 4) {
729 v.u32[0] = v.u32[1] = v.u32[2] = v.u32[3] = 0;
730 } else {
731 assert(swizzle_val == 5);
732 if (type == nir_type_float)
733 v.f32[0] = v.f32[1] = v.f32[2] = v.f32[3] = 1.0;
734 else
735 v.u32[0] = v.u32[1] = v.u32[2] = v.u32[3] = 1;
736 }
737
738 return nir_build_imm(b, 4, 32, v);
739 }
740
741 static void
742 swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
743 {
744 assert(tex->dest.is_ssa);
745
746 b->cursor = nir_after_instr(&tex->instr);
747
748 nir_ssa_def *swizzled;
749 if (tex->op == nir_texop_tg4) {
750 if (swizzle[tex->component] < 4) {
751 /* This one's easy */
752 tex->component = swizzle[tex->component];
753 return;
754 } else {
755 swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]);
756 }
757 } else {
758 assert(nir_tex_instr_dest_size(tex) == 4);
759 if (swizzle[0] < 4 && swizzle[1] < 4 &&
760 swizzle[2] < 4 && swizzle[3] < 4) {
761 unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] };
762 /* We have no 0s or 1s, just emit a swizzling MOV */
763 swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4, false);
764 } else {
765 nir_ssa_def *srcs[4];
766 for (unsigned i = 0; i < 4; i++) {
767 if (swizzle[i] < 4) {
768 srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]);
769 } else {
770 srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]);
771 }
772 }
773 swizzled = nir_vec(b, srcs, 4);
774 }
775 }
776
777 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled),
778 swizzled->parent_instr);
779 }
780
781 static void
782 linearize_srgb_result(nir_builder *b, nir_tex_instr *tex)
783 {
784 assert(tex->dest.is_ssa);
785 assert(nir_tex_instr_dest_size(tex) == 4);
786 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
787
788 b->cursor = nir_after_instr(&tex->instr);
789
790 nir_ssa_def *rgb =
791 nir_format_srgb_to_linear(b, nir_channels(b, &tex->dest.ssa, 0x7));
792
793 /* alpha is untouched: */
794 nir_ssa_def *result = nir_vec4(b,
795 nir_channel(b, rgb, 0),
796 nir_channel(b, rgb, 1),
797 nir_channel(b, rgb, 2),
798 nir_channel(b, &tex->dest.ssa, 3));
799
800 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(result),
801 result->parent_instr);
802 }
803
804 /**
805 * Lowers texture instructions from giving a vec4 result to a vec2 of f16,
806 * i16, or u16, or a single unorm4x8 value.
807 *
808 * Note that we don't change the destination num_components, because
809 * nir_tex_instr_dest_size() will still return 4. The driver is just expected
810 * to not store the other channels, given that nothing at the NIR level will
811 * read them.
812 */
813 static void
814 lower_tex_packing(nir_builder *b, nir_tex_instr *tex,
815 const nir_lower_tex_options *options)
816 {
817 nir_ssa_def *color = &tex->dest.ssa;
818
819 b->cursor = nir_after_instr(&tex->instr);
820
821 switch (options->lower_tex_packing[tex->sampler_index]) {
822 case nir_lower_tex_packing_none:
823 return;
824
825 case nir_lower_tex_packing_16: {
826 static const unsigned bits[4] = {16, 16, 16, 16};
827
828 switch (nir_alu_type_get_base_type(tex->dest_type)) {
829 case nir_type_float:
830 if (tex->is_shadow && tex->is_new_style_shadow) {
831 color = nir_unpack_half_2x16_split_x(b, nir_channel(b, color, 0));
832 } else {
833 nir_ssa_def *rg = nir_channel(b, color, 0);
834 nir_ssa_def *ba = nir_channel(b, color, 1);
835 color = nir_vec4(b,
836 nir_unpack_half_2x16_split_x(b, rg),
837 nir_unpack_half_2x16_split_y(b, rg),
838 nir_unpack_half_2x16_split_x(b, ba),
839 nir_unpack_half_2x16_split_y(b, ba));
840 }
841 break;
842
843 case nir_type_int:
844 color = nir_format_unpack_sint(b, color, bits, 4);
845 break;
846
847 case nir_type_uint:
848 color = nir_format_unpack_uint(b, color, bits, 4);
849 break;
850
851 default:
852 unreachable("unknown base type");
853 }
854 break;
855 }
856
857 case nir_lower_tex_packing_8:
858 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
859 color = nir_unpack_unorm_4x8(b, nir_channel(b, color, 0));
860 break;
861 }
862
863 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(color),
864 color->parent_instr);
865 }
866
867 static bool
868 nir_lower_tex_block(nir_block *block, nir_builder *b,
869 const nir_lower_tex_options *options)
870 {
871 bool progress = false;
872
873 nir_foreach_instr_safe(instr, block) {
874 if (instr->type != nir_instr_type_tex)
875 continue;
876
877 nir_tex_instr *tex = nir_instr_as_tex(instr);
878 bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim));
879
880 /* mask of src coords to saturate (clamp): */
881 unsigned sat_mask = 0;
882
883 if ((1 << tex->sampler_index) & options->saturate_r)
884 sat_mask |= (1 << 2); /* .z */
885 if ((1 << tex->sampler_index) & options->saturate_t)
886 sat_mask |= (1 << 1); /* .y */
887 if ((1 << tex->sampler_index) & options->saturate_s)
888 sat_mask |= (1 << 0); /* .x */
889
890 /* If we are clamping any coords, we must lower projector first
891 * as clamping happens *after* projection:
892 */
893 if (lower_txp || sat_mask) {
894 project_src(b, tex);
895 progress = true;
896 }
897
898 if ((tex->op == nir_texop_txf && options->lower_txf_offset) ||
899 (sat_mask && nir_tex_instr_src_index(tex, nir_tex_src_coord) >= 0) ||
900 (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT &&
901 options->lower_rect_offset)) {
902 progress = lower_offset(b, tex) || progress;
903 }
904
905 if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect) {
906 lower_rect(b, tex);
907 progress = true;
908 }
909
910 if ((1 << tex->texture_index) & options->lower_y_uv_external) {
911 lower_y_uv_external(b, tex);
912 progress = true;
913 }
914
915 if ((1 << tex->texture_index) & options->lower_y_u_v_external) {
916 lower_y_u_v_external(b, tex);
917 progress = true;
918 }
919
920 if ((1 << tex->texture_index) & options->lower_yx_xuxv_external) {
921 lower_yx_xuxv_external(b, tex);
922 progress = true;
923 }
924
925 if ((1 << tex->texture_index) & options->lower_xy_uxvx_external) {
926 lower_xy_uxvx_external(b, tex);
927 progress = true;
928 }
929
930 if ((1 << tex->texture_index) & options->lower_ayuv_external) {
931 lower_ayuv_external(b, tex);
932 progress = true;
933 }
934
935 if (sat_mask) {
936 saturate_src(b, tex, sat_mask);
937 progress = true;
938 }
939
940 if (((1 << tex->texture_index) & options->swizzle_result) &&
941 !nir_tex_instr_is_query(tex) &&
942 !(tex->is_shadow && tex->is_new_style_shadow)) {
943 swizzle_result(b, tex, options->swizzles[tex->texture_index]);
944 progress = true;
945 }
946
947 /* should be after swizzle so we know which channels are rgb: */
948 if (((1 << tex->texture_index) & options->lower_srgb) &&
949 !nir_tex_instr_is_query(tex) && !tex->is_shadow) {
950 linearize_srgb_result(b, tex);
951 progress = true;
952 }
953
954 const bool has_min_lod =
955 nir_tex_instr_src_index(tex, nir_tex_src_min_lod) >= 0;
956 const bool has_offset =
957 nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0;
958
959 if (tex->op == nir_texop_txb && tex->is_shadow && has_min_lod &&
960 options->lower_txb_shadow_clamp) {
961 lower_implicit_lod(b, tex);
962 progress = true;
963 }
964
965 if (options->lower_tex_packing[tex->sampler_index] !=
966 nir_lower_tex_packing_none &&
967 tex->op != nir_texop_txs &&
968 tex->op != nir_texop_query_levels) {
969 lower_tex_packing(b, tex, options);
970 progress = true;
971 }
972
973 if (tex->op == nir_texop_txd &&
974 (options->lower_txd ||
975 (options->lower_txd_shadow && tex->is_shadow) ||
976 (options->lower_txd_shadow_clamp && tex->is_shadow && has_min_lod) ||
977 (options->lower_txd_offset_clamp && has_offset && has_min_lod) ||
978 (options->lower_txd_cube_map &&
979 tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) ||
980 (options->lower_txd_3d &&
981 tex->sampler_dim == GLSL_SAMPLER_DIM_3D))) {
982 lower_gradient(b, tex);
983 progress = true;
984 continue;
985 }
986
987 /* TXF, TXS and TXL require a LOD but not everything we implement using those
988 * three opcodes provides one. Provide a default LOD of 0.
989 */
990 if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) &&
991 (tex->op == nir_texop_txf || tex->op == nir_texop_txs ||
992 tex->op == nir_texop_txl || tex->op == nir_texop_query_levels ||
993 (tex->op == nir_texop_tex &&
994 b->shader->info.stage != MESA_SHADER_FRAGMENT))) {
995 b->cursor = nir_before_instr(&tex->instr);
996 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(nir_imm_int(b, 0)));
997 progress = true;
998 continue;
999 }
1000 }
1001
1002 return progress;
1003 }
1004
1005 static bool
1006 nir_lower_tex_impl(nir_function_impl *impl,
1007 const nir_lower_tex_options *options)
1008 {
1009 bool progress = false;
1010 nir_builder builder;
1011 nir_builder_init(&builder, impl);
1012
1013 nir_foreach_block(block, impl) {
1014 progress |= nir_lower_tex_block(block, &builder, options);
1015 }
1016
1017 nir_metadata_preserve(impl, nir_metadata_block_index |
1018 nir_metadata_dominance);
1019 return progress;
1020 }
1021
1022 bool
1023 nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
1024 {
1025 bool progress = false;
1026
1027 nir_foreach_function(function, shader) {
1028 if (function->impl)
1029 progress |= nir_lower_tex_impl(function->impl, options);
1030 }
1031
1032 return progress;
1033 }