46bcc8b6d1fa83718b2b49865e5d604ef6dce978
[mesa.git] / src / compiler / nir / nir_lower_tex.c
1 /*
2 * Copyright © 2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /*
25 * This lowering pass supports (as configured via nir_lower_tex_options)
26 * various texture related conversions:
27 * + texture projector lowering: converts the coordinate division for
28 * texture projection to be done in ALU instructions instead of
29 * asking the texture operation to do so.
30 * + lowering RECT: converts the un-normalized RECT texture coordinates
31 * to normalized coordinates with txs plus ALU instructions
32 * + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
33 * inserts instructions to clamp specified coordinates to [0.0, 1.0].
34 * Note that this automatically triggers texture projector lowering if
35 * needed, since clamping must happen after projector lowering.
36 */
37
38 #include "nir.h"
39 #include "nir_builder.h"
40 #include "nir_builtin_builder.h"
41 #include "nir_format_convert.h"
42
43 static bool
44 project_src(nir_builder *b, nir_tex_instr *tex)
45 {
46 /* Find the projector in the srcs list, if present. */
47 int proj_index = nir_tex_instr_src_index(tex, nir_tex_src_projector);
48 if (proj_index < 0)
49 return false;
50
51 b->cursor = nir_before_instr(&tex->instr);
52
53 nir_ssa_def *inv_proj =
54 nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
55
56 /* Walk through the sources projecting the arguments. */
57 for (unsigned i = 0; i < tex->num_srcs; i++) {
58 switch (tex->src[i].src_type) {
59 case nir_tex_src_coord:
60 case nir_tex_src_comparator:
61 break;
62 default:
63 continue;
64 }
65 nir_ssa_def *unprojected =
66 nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
67 nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
68
69 /* Array indices don't get projected, so make an new vector with the
70 * coordinate's array index untouched.
71 */
72 if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
73 switch (tex->coord_components) {
74 case 4:
75 projected = nir_vec4(b,
76 nir_channel(b, projected, 0),
77 nir_channel(b, projected, 1),
78 nir_channel(b, projected, 2),
79 nir_channel(b, unprojected, 3));
80 break;
81 case 3:
82 projected = nir_vec3(b,
83 nir_channel(b, projected, 0),
84 nir_channel(b, projected, 1),
85 nir_channel(b, unprojected, 2));
86 break;
87 case 2:
88 projected = nir_vec2(b,
89 nir_channel(b, projected, 0),
90 nir_channel(b, unprojected, 1));
91 break;
92 default:
93 unreachable("bad texture coord count for array");
94 break;
95 }
96 }
97
98 nir_instr_rewrite_src(&tex->instr,
99 &tex->src[i].src,
100 nir_src_for_ssa(projected));
101 }
102
103 nir_tex_instr_remove_src(tex, proj_index);
104 return true;
105 }
106
107 static bool
108 lower_offset(nir_builder *b, nir_tex_instr *tex)
109 {
110 int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset);
111 if (offset_index < 0)
112 return false;
113
114 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
115 assert(coord_index >= 0);
116
117 assert(tex->src[offset_index].src.is_ssa);
118 assert(tex->src[coord_index].src.is_ssa);
119 nir_ssa_def *offset = tex->src[offset_index].src.ssa;
120 nir_ssa_def *coord = tex->src[coord_index].src.ssa;
121
122 b->cursor = nir_before_instr(&tex->instr);
123
124 nir_ssa_def *offset_coord;
125 if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) {
126 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
127 offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset));
128 } else {
129 nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
130 nir_ssa_def *scale = nir_frcp(b, txs);
131
132 offset_coord = nir_fadd(b, coord,
133 nir_fmul(b,
134 nir_i2f32(b, offset),
135 scale));
136 }
137 } else {
138 offset_coord = nir_iadd(b, coord, offset);
139 }
140
141 if (tex->is_array) {
142 /* The offset is not applied to the array index */
143 if (tex->coord_components == 2) {
144 offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0),
145 nir_channel(b, coord, 1));
146 } else if (tex->coord_components == 3) {
147 offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0),
148 nir_channel(b, offset_coord, 1),
149 nir_channel(b, coord, 2));
150 } else {
151 unreachable("Invalid number of components");
152 }
153 }
154
155 nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
156 nir_src_for_ssa(offset_coord));
157
158 nir_tex_instr_remove_src(tex, offset_index);
159
160 return true;
161 }
162
163 static void
164 lower_rect(nir_builder *b, nir_tex_instr *tex)
165 {
166 /* Set the sampler_dim to 2D here so that get_texture_size picks up the
167 * right dimensionality.
168 */
169 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
170
171 nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
172 nir_ssa_def *scale = nir_frcp(b, txs);
173
174 /* Walk through the sources normalizing the requested arguments. */
175 for (unsigned i = 0; i < tex->num_srcs; i++) {
176 if (tex->src[i].src_type != nir_tex_src_coord)
177 continue;
178
179 nir_ssa_def *coords =
180 nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
181 nir_instr_rewrite_src(&tex->instr,
182 &tex->src[i].src,
183 nir_src_for_ssa(nir_fmul(b, coords, scale)));
184 }
185 }
186
187 static void
188 lower_implicit_lod(nir_builder *b, nir_tex_instr *tex)
189 {
190 assert(tex->op == nir_texop_tex || tex->op == nir_texop_txb);
191 assert(nir_tex_instr_src_index(tex, nir_tex_src_lod) < 0);
192 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
193 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
194
195 b->cursor = nir_before_instr(&tex->instr);
196
197 nir_ssa_def *lod = nir_get_texture_lod(b, tex);
198
199 int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
200 if (bias_idx >= 0) {
201 /* If we have a bias, add it in */
202 lod = nir_fadd(b, lod, nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
203 nir_tex_instr_remove_src(tex, bias_idx);
204 }
205
206 int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
207 if (min_lod_idx >= 0) {
208 /* If we have a minimum LOD, clamp LOD accordingly */
209 lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
210 nir_tex_instr_remove_src(tex, min_lod_idx);
211 }
212
213 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
214 tex->op = nir_texop_txl;
215 }
216
217 static nir_ssa_def *
218 sample_plane(nir_builder *b, nir_tex_instr *tex, int plane,
219 const nir_lower_tex_options *options)
220 {
221 assert(tex->dest.is_ssa);
222 assert(nir_tex_instr_dest_size(tex) == 4);
223 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
224 assert(tex->op == nir_texop_tex);
225 assert(tex->coord_components == 2);
226
227 nir_tex_instr *plane_tex =
228 nir_tex_instr_create(b->shader, tex->num_srcs + 1);
229 for (unsigned i = 0; i < tex->num_srcs; i++) {
230 nir_src_copy(&plane_tex->src[i].src, &tex->src[i].src, plane_tex);
231 plane_tex->src[i].src_type = tex->src[i].src_type;
232 }
233 plane_tex->src[tex->num_srcs].src = nir_src_for_ssa(nir_imm_int(b, plane));
234 plane_tex->src[tex->num_srcs].src_type = nir_tex_src_plane;
235 plane_tex->op = nir_texop_tex;
236 plane_tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
237 plane_tex->dest_type = nir_type_float;
238 plane_tex->coord_components = 2;
239
240 plane_tex->texture_index = tex->texture_index;
241 plane_tex->sampler_index = tex->sampler_index;
242
243 nir_ssa_dest_init(&plane_tex->instr, &plane_tex->dest, 4,
244 nir_dest_bit_size(tex->dest), NULL);
245
246 nir_builder_instr_insert(b, &plane_tex->instr);
247
248 /* If scaling_factor is set, return a scaled value. */
249 if (options->scale_factors[tex->texture_index])
250 return nir_fmul_imm(b, &plane_tex->dest.ssa,
251 options->scale_factors[tex->texture_index]);
252
253 return &plane_tex->dest.ssa;
254 }
255
256 static void
257 convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
258 nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v,
259 nir_ssa_def *a)
260 {
261 nir_const_value m[3][4] = {
262 { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 0.0f } },
263 { { .f32 = 0.0f }, { .f32 = -0.39176229f }, { .f32 = 2.01723214f }, { .f32 = 0.0f } },
264 { { .f32 = 1.59602678f }, { .f32 = -0.81296764f }, { .f32 = 0.0f }, { .f32 = 0.0f } },
265 };
266 unsigned bit_size = nir_dest_bit_size(tex->dest);
267
268 nir_ssa_def *offset =
269 nir_vec4(b,
270 nir_imm_float(b, -0.874202214f),
271 nir_imm_float(b, 0.531667820f),
272 nir_imm_float(b, -1.085630787f),
273 a);
274
275 offset = nir_f2fN(b, offset, bit_size);
276
277 nir_ssa_def *m0 = nir_f2fN(b, nir_build_imm(b, 4, 32, m[0]), bit_size);
278 nir_ssa_def *m1 = nir_f2fN(b, nir_build_imm(b, 4, 32, m[1]), bit_size);
279 nir_ssa_def *m2 = nir_f2fN(b, nir_build_imm(b, 4, 32, m[2]), bit_size);
280
281 nir_ssa_def *result =
282 nir_ffma(b, y, m0, nir_ffma(b, u, m1, nir_ffma(b, v, m2, offset)));
283
284 nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(result));
285 }
286
287 static void
288 lower_y_uv_external(nir_builder *b, nir_tex_instr *tex,
289 const nir_lower_tex_options *options)
290 {
291 b->cursor = nir_after_instr(&tex->instr);
292
293 nir_ssa_def *y = sample_plane(b, tex, 0, options);
294 nir_ssa_def *uv = sample_plane(b, tex, 1, options);
295
296 convert_yuv_to_rgb(b, tex,
297 nir_channel(b, y, 0),
298 nir_channel(b, uv, 0),
299 nir_channel(b, uv, 1),
300 nir_imm_float(b, 1.0f));
301 }
302
303 static void
304 lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex,
305 const nir_lower_tex_options *options)
306 {
307 b->cursor = nir_after_instr(&tex->instr);
308
309 nir_ssa_def *y = sample_plane(b, tex, 0, options);
310 nir_ssa_def *u = sample_plane(b, tex, 1, options);
311 nir_ssa_def *v = sample_plane(b, tex, 2, options);
312
313 convert_yuv_to_rgb(b, tex,
314 nir_channel(b, y, 0),
315 nir_channel(b, u, 0),
316 nir_channel(b, v, 0),
317 nir_imm_float(b, 1.0f));
318 }
319
320 static void
321 lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex,
322 const nir_lower_tex_options *options)
323 {
324 b->cursor = nir_after_instr(&tex->instr);
325
326 nir_ssa_def *y = sample_plane(b, tex, 0, options);
327 nir_ssa_def *xuxv = sample_plane(b, tex, 1, options);
328
329 convert_yuv_to_rgb(b, tex,
330 nir_channel(b, y, 0),
331 nir_channel(b, xuxv, 1),
332 nir_channel(b, xuxv, 3),
333 nir_imm_float(b, 1.0f));
334 }
335
336 static void
337 lower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex,
338 const nir_lower_tex_options *options)
339 {
340 b->cursor = nir_after_instr(&tex->instr);
341
342 nir_ssa_def *y = sample_plane(b, tex, 0, options);
343 nir_ssa_def *uxvx = sample_plane(b, tex, 1, options);
344
345 convert_yuv_to_rgb(b, tex,
346 nir_channel(b, y, 1),
347 nir_channel(b, uxvx, 0),
348 nir_channel(b, uxvx, 2),
349 nir_imm_float(b, 1.0f));
350 }
351
352 static void
353 lower_ayuv_external(nir_builder *b, nir_tex_instr *tex,
354 const nir_lower_tex_options *options)
355 {
356 b->cursor = nir_after_instr(&tex->instr);
357
358 nir_ssa_def *ayuv = sample_plane(b, tex, 0, options);
359
360 convert_yuv_to_rgb(b, tex,
361 nir_channel(b, ayuv, 2),
362 nir_channel(b, ayuv, 1),
363 nir_channel(b, ayuv, 0),
364 nir_channel(b, ayuv, 3));
365 }
366
367 static void
368 lower_xyuv_external(nir_builder *b, nir_tex_instr *tex,
369 const nir_lower_tex_options *options)
370 {
371 b->cursor = nir_after_instr(&tex->instr);
372
373 nir_ssa_def *xyuv = sample_plane(b, tex, 0, options);
374
375 convert_yuv_to_rgb(b, tex,
376 nir_channel(b, xyuv, 2),
377 nir_channel(b, xyuv, 1),
378 nir_channel(b, xyuv, 0),
379 nir_imm_float(b, 1.0f));
380 }
381
382 /*
383 * Converts a nir_texop_txd instruction to nir_texop_txl with the given lod
384 * computed from the gradients.
385 */
386 static void
387 replace_gradient_with_lod(nir_builder *b, nir_ssa_def *lod, nir_tex_instr *tex)
388 {
389 assert(tex->op == nir_texop_txd);
390
391 nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx));
392 nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy));
393
394 int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
395 if (min_lod_idx >= 0) {
396 /* If we have a minimum LOD, clamp LOD accordingly */
397 lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
398 nir_tex_instr_remove_src(tex, min_lod_idx);
399 }
400
401 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
402 tex->op = nir_texop_txl;
403 }
404
405 static void
406 lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex)
407 {
408 assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
409 assert(tex->op == nir_texop_txd);
410 assert(tex->dest.is_ssa);
411
412 /* Use textureSize() to get the width and height of LOD 0 */
413 nir_ssa_def *size = nir_i2f32(b, nir_get_texture_size(b, tex));
414
415 /* Cubemap texture lookups first generate a texture coordinate normalized
416 * to [-1, 1] on the appropiate face. The appropiate face is determined
417 * by which component has largest magnitude and its sign. The texture
418 * coordinate is the quotient of the remaining texture coordinates against
419 * that absolute value of the component of largest magnitude. This
420 * division requires that the computing of the derivative of the texel
421 * coordinate must use the quotient rule. The high level GLSL code is as
422 * follows:
423 *
424 * Step 1: selection
425 *
426 * vec3 abs_p, Q, dQdx, dQdy;
427 * abs_p = abs(ir->coordinate);
428 * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
429 * Q = ir->coordinate.yzx;
430 * dQdx = ir->lod_info.grad.dPdx.yzx;
431 * dQdy = ir->lod_info.grad.dPdy.yzx;
432 * }
433 * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
434 * Q = ir->coordinate.xzy;
435 * dQdx = ir->lod_info.grad.dPdx.xzy;
436 * dQdy = ir->lod_info.grad.dPdy.xzy;
437 * }
438 * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
439 * Q = ir->coordinate;
440 * dQdx = ir->lod_info.grad.dPdx;
441 * dQdy = ir->lod_info.grad.dPdy;
442 * }
443 *
444 * Step 2: use quotient rule to compute derivative. The normalized to
445 * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
446 * only concerned with the magnitudes of the derivatives whose values are
447 * not affected by the sign. We drop the sign from the computation.
448 *
449 * vec2 dx, dy;
450 * float recip;
451 *
452 * recip = 1.0 / Q.z;
453 * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
454 * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
455 *
456 * Step 3: compute LOD. At this point we have the derivatives of the
457 * texture coordinates normalized to [-1,1]. We take the LOD to be
458 * result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
459 * = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
460 * = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
461 * = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
462 * = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
463 * where L is the dimension of the cubemap. The code is:
464 *
465 * float M, result;
466 * M = max(dot(dx, dx), dot(dy, dy));
467 * L = textureSize(sampler, 0).x;
468 * result = -1.0 + 0.5 * log2(L * L * M);
469 */
470
471 /* coordinate */
472 nir_ssa_def *p =
473 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa;
474
475 /* unmodified dPdx, dPdy values */
476 nir_ssa_def *dPdx =
477 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
478 nir_ssa_def *dPdy =
479 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
480
481 nir_ssa_def *abs_p = nir_fabs(b, p);
482 nir_ssa_def *abs_p_x = nir_channel(b, abs_p, 0);
483 nir_ssa_def *abs_p_y = nir_channel(b, abs_p, 1);
484 nir_ssa_def *abs_p_z = nir_channel(b, abs_p, 2);
485
486 /* 1. compute selector */
487 nir_ssa_def *Q, *dQdx, *dQdy;
488
489 nir_ssa_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y));
490 nir_ssa_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z));
491
492 unsigned yzx[3] = { 1, 2, 0 };
493 unsigned xzy[3] = { 0, 2, 1 };
494
495 Q = nir_bcsel(b, cond_z,
496 p,
497 nir_bcsel(b, cond_y,
498 nir_swizzle(b, p, xzy, 3),
499 nir_swizzle(b, p, yzx, 3)));
500
501 dQdx = nir_bcsel(b, cond_z,
502 dPdx,
503 nir_bcsel(b, cond_y,
504 nir_swizzle(b, dPdx, xzy, 3),
505 nir_swizzle(b, dPdx, yzx, 3)));
506
507 dQdy = nir_bcsel(b, cond_z,
508 dPdy,
509 nir_bcsel(b, cond_y,
510 nir_swizzle(b, dPdy, xzy, 3),
511 nir_swizzle(b, dPdy, yzx, 3)));
512
513 /* 2. quotient rule */
514
515 /* tmp = Q.xy * recip;
516 * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
517 * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
518 */
519 nir_ssa_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2));
520
521 nir_ssa_def *Q_xy = nir_channels(b, Q, 0x3);
522 nir_ssa_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z);
523
524 nir_ssa_def *dQdx_xy = nir_channels(b, dQdx, 0x3);
525 nir_ssa_def *dQdx_z = nir_channel(b, dQdx, 2);
526 nir_ssa_def *dx =
527 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z)));
528
529 nir_ssa_def *dQdy_xy = nir_channels(b, dQdy, 0x3);
530 nir_ssa_def *dQdy_z = nir_channel(b, dQdy, 2);
531 nir_ssa_def *dy =
532 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z)));
533
534 /* M = max(dot(dx, dx), dot(dy, dy)); */
535 nir_ssa_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy));
536
537 /* size has textureSize() of LOD 0 */
538 nir_ssa_def *L = nir_channel(b, size, 0);
539
540 /* lod = -1.0 + 0.5 * log2(L * L * M); */
541 nir_ssa_def *lod =
542 nir_fadd(b,
543 nir_imm_float(b, -1.0f),
544 nir_fmul(b,
545 nir_imm_float(b, 0.5f),
546 nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M)))));
547
548 /* 3. Replace the gradient instruction with an equivalent lod instruction */
549 replace_gradient_with_lod(b, lod, tex);
550 }
551
552 static void
553 lower_gradient(nir_builder *b, nir_tex_instr *tex)
554 {
555 /* Cubes are more complicated and have their own function */
556 if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
557 lower_gradient_cube_map(b, tex);
558 return;
559 }
560
561 assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE);
562 assert(tex->op == nir_texop_txd);
563 assert(tex->dest.is_ssa);
564
565 /* Use textureSize() to get the width and height of LOD 0 */
566 unsigned component_mask;
567 switch (tex->sampler_dim) {
568 case GLSL_SAMPLER_DIM_3D:
569 component_mask = 7;
570 break;
571 case GLSL_SAMPLER_DIM_1D:
572 component_mask = 1;
573 break;
574 default:
575 component_mask = 3;
576 break;
577 }
578
579 nir_ssa_def *size =
580 nir_channels(b, nir_i2f32(b, nir_get_texture_size(b, tex)),
581 component_mask);
582
583 /* Scale the gradients by width and height. Effectively, the incoming
584 * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
585 * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
586 */
587 nir_ssa_def *ddx =
588 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
589 nir_ssa_def *ddy =
590 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
591
592 nir_ssa_def *dPdx = nir_fmul(b, ddx, size);
593 nir_ssa_def *dPdy = nir_fmul(b, ddy, size);
594
595 nir_ssa_def *rho;
596 if (dPdx->num_components == 1) {
597 rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy));
598 } else {
599 rho = nir_fmax(b,
600 nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)),
601 nir_fsqrt(b, nir_fdot(b, dPdy, dPdy)));
602 }
603
604 /* lod = log2(rho). We're ignoring GL state biases for now. */
605 nir_ssa_def *lod = nir_flog2(b, rho);
606
607 /* Replace the gradient instruction with an equivalent lod instruction */
608 replace_gradient_with_lod(b, lod, tex);
609 }
610
611 static void
612 saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
613 {
614 b->cursor = nir_before_instr(&tex->instr);
615
616 /* Walk through the sources saturating the requested arguments. */
617 for (unsigned i = 0; i < tex->num_srcs; i++) {
618 if (tex->src[i].src_type != nir_tex_src_coord)
619 continue;
620
621 nir_ssa_def *src =
622 nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
623
624 /* split src into components: */
625 nir_ssa_def *comp[4];
626
627 assume(tex->coord_components >= 1);
628
629 for (unsigned j = 0; j < tex->coord_components; j++)
630 comp[j] = nir_channel(b, src, j);
631
632 /* clamp requested components, array index does not get clamped: */
633 unsigned ncomp = tex->coord_components;
634 if (tex->is_array)
635 ncomp--;
636
637 for (unsigned j = 0; j < ncomp; j++) {
638 if ((1 << j) & sat_mask) {
639 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
640 /* non-normalized texture coords, so clamp to texture
641 * size rather than [0.0, 1.0]
642 */
643 nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
644 comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0));
645 comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j));
646 } else {
647 comp[j] = nir_fsat(b, comp[j]);
648 }
649 }
650 }
651
652 /* and move the result back into a single vecN: */
653 src = nir_vec(b, comp, tex->coord_components);
654
655 nir_instr_rewrite_src(&tex->instr,
656 &tex->src[i].src,
657 nir_src_for_ssa(src));
658 }
659 }
660
661 static nir_ssa_def *
662 get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
663 {
664 nir_const_value v[4];
665
666 memset(&v, 0, sizeof(v));
667
668 if (swizzle_val == 4) {
669 v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 0;
670 } else {
671 assert(swizzle_val == 5);
672 if (type == nir_type_float)
673 v[0].f32 = v[1].f32 = v[2].f32 = v[3].f32 = 1.0;
674 else
675 v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 1;
676 }
677
678 return nir_build_imm(b, 4, 32, v);
679 }
680
681 static void
682 swizzle_tg4_broadcom(nir_builder *b, nir_tex_instr *tex)
683 {
684 assert(tex->dest.is_ssa);
685
686 b->cursor = nir_after_instr(&tex->instr);
687
688 assert(nir_tex_instr_dest_size(tex) == 4);
689 unsigned swiz[4] = { 2, 3, 1, 0 };
690 nir_ssa_def *swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4);
691
692 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled),
693 swizzled->parent_instr);
694 }
695
696 static void
697 swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
698 {
699 assert(tex->dest.is_ssa);
700
701 b->cursor = nir_after_instr(&tex->instr);
702
703 nir_ssa_def *swizzled;
704 if (tex->op == nir_texop_tg4) {
705 if (swizzle[tex->component] < 4) {
706 /* This one's easy */
707 tex->component = swizzle[tex->component];
708 return;
709 } else {
710 swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]);
711 }
712 } else {
713 assert(nir_tex_instr_dest_size(tex) == 4);
714 if (swizzle[0] < 4 && swizzle[1] < 4 &&
715 swizzle[2] < 4 && swizzle[3] < 4) {
716 unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] };
717 /* We have no 0s or 1s, just emit a swizzling MOV */
718 swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4);
719 } else {
720 nir_ssa_def *srcs[4];
721 for (unsigned i = 0; i < 4; i++) {
722 if (swizzle[i] < 4) {
723 srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]);
724 } else {
725 srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]);
726 }
727 }
728 swizzled = nir_vec(b, srcs, 4);
729 }
730 }
731
732 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled),
733 swizzled->parent_instr);
734 }
735
736 static void
737 linearize_srgb_result(nir_builder *b, nir_tex_instr *tex)
738 {
739 assert(tex->dest.is_ssa);
740 assert(nir_tex_instr_dest_size(tex) == 4);
741 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
742
743 b->cursor = nir_after_instr(&tex->instr);
744
745 nir_ssa_def *rgb =
746 nir_format_srgb_to_linear(b, nir_channels(b, &tex->dest.ssa, 0x7));
747
748 /* alpha is untouched: */
749 nir_ssa_def *result = nir_vec4(b,
750 nir_channel(b, rgb, 0),
751 nir_channel(b, rgb, 1),
752 nir_channel(b, rgb, 2),
753 nir_channel(b, &tex->dest.ssa, 3));
754
755 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(result),
756 result->parent_instr);
757 }
758
759 /**
760 * Lowers texture instructions from giving a vec4 result to a vec2 of f16,
761 * i16, or u16, or a single unorm4x8 value.
762 *
763 * Note that we don't change the destination num_components, because
764 * nir_tex_instr_dest_size() will still return 4. The driver is just expected
765 * to not store the other channels, given that nothing at the NIR level will
766 * read them.
767 */
768 static void
769 lower_tex_packing(nir_builder *b, nir_tex_instr *tex,
770 const nir_lower_tex_options *options)
771 {
772 nir_ssa_def *color = &tex->dest.ssa;
773
774 b->cursor = nir_after_instr(&tex->instr);
775
776 switch (options->lower_tex_packing[tex->sampler_index]) {
777 case nir_lower_tex_packing_none:
778 return;
779
780 case nir_lower_tex_packing_16: {
781 static const unsigned bits[4] = {16, 16, 16, 16};
782
783 switch (nir_alu_type_get_base_type(tex->dest_type)) {
784 case nir_type_float:
785 if (tex->is_shadow && tex->is_new_style_shadow) {
786 color = nir_unpack_half_2x16_split_x(b, nir_channel(b, color, 0));
787 } else {
788 nir_ssa_def *rg = nir_channel(b, color, 0);
789 nir_ssa_def *ba = nir_channel(b, color, 1);
790 color = nir_vec4(b,
791 nir_unpack_half_2x16_split_x(b, rg),
792 nir_unpack_half_2x16_split_y(b, rg),
793 nir_unpack_half_2x16_split_x(b, ba),
794 nir_unpack_half_2x16_split_y(b, ba));
795 }
796 break;
797
798 case nir_type_int:
799 color = nir_format_unpack_sint(b, color, bits, 4);
800 break;
801
802 case nir_type_uint:
803 color = nir_format_unpack_uint(b, color, bits, 4);
804 break;
805
806 default:
807 unreachable("unknown base type");
808 }
809 break;
810 }
811
812 case nir_lower_tex_packing_8:
813 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
814 color = nir_unpack_unorm_4x8(b, nir_channel(b, color, 0));
815 break;
816 }
817
818 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(color),
819 color->parent_instr);
820 }
821
822 static bool
823 sampler_index_lt(nir_tex_instr *tex, unsigned max)
824 {
825 assert(nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref) == -1);
826
827 unsigned sampler_index = tex->sampler_index;
828
829 int sampler_offset_idx =
830 nir_tex_instr_src_index(tex, nir_tex_src_sampler_offset);
831 if (sampler_offset_idx >= 0) {
832 if (!nir_src_is_const(tex->src[sampler_offset_idx].src))
833 return false;
834
835 sampler_index += nir_src_as_uint(tex->src[sampler_offset_idx].src);
836 }
837
838 return sampler_index < max;
839 }
840
841 static bool
842 lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex)
843 {
844 assert(tex->op == nir_texop_tg4);
845 assert(nir_tex_instr_has_explicit_tg4_offsets(tex));
846 assert(nir_tex_instr_src_index(tex, nir_tex_src_offset) == -1);
847
848 b->cursor = nir_after_instr(&tex->instr);
849
850 nir_ssa_def *dest[4];
851 for (unsigned i = 0; i < 4; ++i) {
852 nir_tex_instr *tex_copy = nir_tex_instr_create(b->shader, tex->num_srcs + 1);
853 tex_copy->op = tex->op;
854 tex_copy->coord_components = tex->coord_components;
855 tex_copy->sampler_dim = tex->sampler_dim;
856 tex_copy->is_array = tex->is_array;
857 tex_copy->is_shadow = tex->is_shadow;
858 tex_copy->is_new_style_shadow = tex->is_new_style_shadow;
859 tex_copy->component = tex->component;
860 tex_copy->dest_type = tex->dest_type;
861
862 for (unsigned j = 0; j < tex->num_srcs; ++j) {
863 nir_src_copy(&tex_copy->src[j].src, &tex->src[j].src, tex_copy);
864 tex_copy->src[j].src_type = tex->src[j].src_type;
865 }
866
867 nir_tex_src src;
868 src.src = nir_src_for_ssa(nir_imm_ivec2(b, tex->tg4_offsets[i][0],
869 tex->tg4_offsets[i][1]));
870 src.src_type = nir_tex_src_offset;
871 tex_copy->src[tex_copy->num_srcs - 1] = src;
872
873 nir_ssa_dest_init(&tex_copy->instr, &tex_copy->dest,
874 nir_tex_instr_dest_size(tex), 32, NULL);
875
876 nir_builder_instr_insert(b, &tex_copy->instr);
877
878 dest[i] = nir_channel(b, &tex_copy->dest.ssa, 3);
879 }
880
881 nir_ssa_def *res = nir_vec4(b, dest[0], dest[1], dest[2], dest[3]);
882 nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(res));
883 nir_instr_remove(&tex->instr);
884
885 return true;
886 }
887
888 static bool
889 nir_lower_txs_lod(nir_builder *b, nir_tex_instr *tex)
890 {
891 int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
892 if (lod_idx < 0 ||
893 (nir_src_is_const(tex->src[lod_idx].src) &&
894 nir_src_as_int(tex->src[lod_idx].src) == 0))
895 return false;
896
897 unsigned dest_size = nir_tex_instr_dest_size(tex);
898
899 b->cursor = nir_before_instr(&tex->instr);
900 nir_ssa_def *lod = nir_ssa_for_src(b, tex->src[lod_idx].src, 1);
901
902 /* Replace the non-0-LOD in the initial TXS operation by a 0-LOD. */
903 nir_instr_rewrite_src(&tex->instr, &tex->src[lod_idx].src,
904 nir_src_for_ssa(nir_imm_int(b, 0)));
905
906 /* TXS(LOD) = max(TXS(0) >> LOD, 1) */
907 b->cursor = nir_after_instr(&tex->instr);
908 nir_ssa_def *minified = nir_imax(b, nir_ushr(b, &tex->dest.ssa, lod),
909 nir_imm_int(b, 1));
910
911 /* Make sure the component encoding the array size (if any) is not
912 * minified.
913 */
914 if (tex->is_array) {
915 nir_ssa_def *comp[3];
916
917 assert(dest_size <= ARRAY_SIZE(comp));
918 for (unsigned i = 0; i < dest_size - 1; i++)
919 comp[i] = nir_channel(b, minified, i);
920
921 comp[dest_size - 1] = nir_channel(b, &tex->dest.ssa, dest_size - 1);
922 minified = nir_vec(b, comp, dest_size);
923 }
924
925 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(minified),
926 minified->parent_instr);
927 return true;
928 }
929
930 static bool
931 nir_lower_tex_block(nir_block *block, nir_builder *b,
932 const nir_lower_tex_options *options)
933 {
934 bool progress = false;
935
936 nir_foreach_instr_safe(instr, block) {
937 if (instr->type != nir_instr_type_tex)
938 continue;
939
940 nir_tex_instr *tex = nir_instr_as_tex(instr);
941 bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim));
942
943 /* mask of src coords to saturate (clamp): */
944 unsigned sat_mask = 0;
945
946 if ((1 << tex->sampler_index) & options->saturate_r)
947 sat_mask |= (1 << 2); /* .z */
948 if ((1 << tex->sampler_index) & options->saturate_t)
949 sat_mask |= (1 << 1); /* .y */
950 if ((1 << tex->sampler_index) & options->saturate_s)
951 sat_mask |= (1 << 0); /* .x */
952
953 /* If we are clamping any coords, we must lower projector first
954 * as clamping happens *after* projection:
955 */
956 if (lower_txp || sat_mask) {
957 progress |= project_src(b, tex);
958 }
959
960 if ((tex->op == nir_texop_txf && options->lower_txf_offset) ||
961 (sat_mask && nir_tex_instr_src_index(tex, nir_tex_src_coord) >= 0) ||
962 (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT &&
963 options->lower_rect_offset)) {
964 progress = lower_offset(b, tex) || progress;
965 }
966
967 if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect &&
968 tex->op != nir_texop_txf && !nir_tex_instr_is_query(tex)) {
969 lower_rect(b, tex);
970 progress = true;
971 }
972
973 if ((1 << tex->texture_index) & options->lower_y_uv_external) {
974 lower_y_uv_external(b, tex, options);
975 progress = true;
976 }
977
978 if ((1 << tex->texture_index) & options->lower_y_u_v_external) {
979 lower_y_u_v_external(b, tex, options);
980 progress = true;
981 }
982
983 if ((1 << tex->texture_index) & options->lower_yx_xuxv_external) {
984 lower_yx_xuxv_external(b, tex, options);
985 progress = true;
986 }
987
988 if ((1 << tex->texture_index) & options->lower_xy_uxvx_external) {
989 lower_xy_uxvx_external(b, tex, options);
990 progress = true;
991 }
992
993 if ((1 << tex->texture_index) & options->lower_ayuv_external) {
994 lower_ayuv_external(b, tex, options);
995 progress = true;
996 }
997
998 if ((1 << tex->texture_index) & options->lower_xyuv_external) {
999 lower_xyuv_external(b, tex, options);
1000 progress = true;
1001 }
1002
1003 if (sat_mask) {
1004 saturate_src(b, tex, sat_mask);
1005 progress = true;
1006 }
1007
1008 if (tex->op == nir_texop_tg4 && options->lower_tg4_broadcom_swizzle) {
1009 swizzle_tg4_broadcom(b, tex);
1010 progress = true;
1011 }
1012
1013 if (((1 << tex->texture_index) & options->swizzle_result) &&
1014 !nir_tex_instr_is_query(tex) &&
1015 !(tex->is_shadow && tex->is_new_style_shadow)) {
1016 swizzle_result(b, tex, options->swizzles[tex->texture_index]);
1017 progress = true;
1018 }
1019
1020 /* should be after swizzle so we know which channels are rgb: */
1021 if (((1 << tex->texture_index) & options->lower_srgb) &&
1022 !nir_tex_instr_is_query(tex) && !tex->is_shadow) {
1023 linearize_srgb_result(b, tex);
1024 progress = true;
1025 }
1026
1027 const bool has_min_lod =
1028 nir_tex_instr_src_index(tex, nir_tex_src_min_lod) >= 0;
1029 const bool has_offset =
1030 nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0;
1031
1032 if (tex->op == nir_texop_txb && tex->is_shadow && has_min_lod &&
1033 options->lower_txb_shadow_clamp) {
1034 lower_implicit_lod(b, tex);
1035 progress = true;
1036 }
1037
1038 if (options->lower_tex_packing[tex->sampler_index] !=
1039 nir_lower_tex_packing_none &&
1040 tex->op != nir_texop_txs &&
1041 tex->op != nir_texop_query_levels) {
1042 lower_tex_packing(b, tex, options);
1043 progress = true;
1044 }
1045
1046 if (tex->op == nir_texop_txd &&
1047 (options->lower_txd ||
1048 (options->lower_txd_shadow && tex->is_shadow) ||
1049 (options->lower_txd_shadow_clamp && tex->is_shadow && has_min_lod) ||
1050 (options->lower_txd_offset_clamp && has_offset && has_min_lod) ||
1051 (options->lower_txd_clamp_bindless_sampler && has_min_lod &&
1052 nir_tex_instr_src_index(tex, nir_tex_src_sampler_handle) != -1) ||
1053 (options->lower_txd_clamp_if_sampler_index_not_lt_16 &&
1054 has_min_lod && !sampler_index_lt(tex, 16)) ||
1055 (options->lower_txd_cube_map &&
1056 tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) ||
1057 (options->lower_txd_3d &&
1058 tex->sampler_dim == GLSL_SAMPLER_DIM_3D))) {
1059 lower_gradient(b, tex);
1060 progress = true;
1061 continue;
1062 }
1063
1064 bool shader_supports_implicit_lod =
1065 b->shader->info.stage == MESA_SHADER_FRAGMENT ||
1066 (b->shader->info.stage == MESA_SHADER_COMPUTE &&
1067 b->shader->info.cs.derivative_group != DERIVATIVE_GROUP_NONE);
1068
1069 /* TXF, TXS and TXL require a LOD but not everything we implement using those
1070 * three opcodes provides one. Provide a default LOD of 0.
1071 */
1072 if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) &&
1073 (tex->op == nir_texop_txf || tex->op == nir_texop_txs ||
1074 tex->op == nir_texop_txl || tex->op == nir_texop_query_levels ||
1075 (tex->op == nir_texop_tex && !shader_supports_implicit_lod))) {
1076 b->cursor = nir_before_instr(&tex->instr);
1077 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(nir_imm_int(b, 0)));
1078 if (tex->op == nir_texop_tex && options->lower_tex_without_implicit_lod)
1079 tex->op = nir_texop_txl;
1080 progress = true;
1081 continue;
1082 }
1083
1084 if (options->lower_txs_lod && tex->op == nir_texop_txs) {
1085 progress |= nir_lower_txs_lod(b, tex);
1086 continue;
1087 }
1088
1089 /* has to happen after all the other lowerings as the original tg4 gets
1090 * replaced by 4 tg4 instructions.
1091 */
1092 if (tex->op == nir_texop_tg4 &&
1093 nir_tex_instr_has_explicit_tg4_offsets(tex) &&
1094 options->lower_tg4_offsets) {
1095 progress |= lower_tg4_offsets(b, tex);
1096 continue;
1097 }
1098 }
1099
1100 return progress;
1101 }
1102
1103 static bool
1104 nir_lower_tex_impl(nir_function_impl *impl,
1105 const nir_lower_tex_options *options)
1106 {
1107 bool progress = false;
1108 nir_builder builder;
1109 nir_builder_init(&builder, impl);
1110
1111 nir_foreach_block(block, impl) {
1112 progress |= nir_lower_tex_block(block, &builder, options);
1113 }
1114
1115 nir_metadata_preserve(impl, nir_metadata_block_index |
1116 nir_metadata_dominance);
1117 return progress;
1118 }
1119
1120 bool
1121 nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
1122 {
1123 bool progress = false;
1124
1125 nir_foreach_function(function, shader) {
1126 if (function->impl)
1127 progress |= nir_lower_tex_impl(function->impl, options);
1128 }
1129
1130 return progress;
1131 }