nir: Add a lowering pass to split 64bit phis
[mesa.git] / src / compiler / nir / nir_lower_tex.c
1 /*
2 * Copyright © 2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /*
25 * This lowering pass supports (as configured via nir_lower_tex_options)
26 * various texture related conversions:
27 * + texture projector lowering: converts the coordinate division for
28 * texture projection to be done in ALU instructions instead of
29 * asking the texture operation to do so.
30 * + lowering RECT: converts the un-normalized RECT texture coordinates
31 * to normalized coordinates with txs plus ALU instructions
32 * + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
33 * inserts instructions to clamp specified coordinates to [0.0, 1.0].
34 * Note that this automatically triggers texture projector lowering if
35 * needed, since clamping must happen after projector lowering.
36 */
37
38 #include "nir.h"
39 #include "nir_builder.h"
40 #include "nir_builtin_builder.h"
41 #include "nir_format_convert.h"
42
43 static float bt601_csc_coeffs[9] = {
44 1.16438356f, 1.16438356f, 1.16438356f,
45 0.0f, -0.39176229f, 2.01723214f,
46 1.59602678f, -0.81296764f, 0.0f,
47 };
48 static float bt709_csc_coeffs[9] = {
49 1.16438356f, 1.16438356f, 1.16438356f,
50 0.0f , -0.21324861f, 2.11240179f,
51 1.79274107f, -0.53290933f, 0.0f,
52 };
53 static float bt2020_csc_coeffs[9] = {
54 1.16438356f, 1.16438356f, 1.16438356f,
55 0.0f , -0.18732610f, 2.14177232f,
56 1.67867411f, -0.65042432f, 0.0f,
57 };
58
59 static float bt601_csc_offsets[3] = {
60 -0.874202218f, 0.531667823f, -1.085630789f
61 };
62 static float bt709_csc_offsets[3] = {
63 -0.972945075f, 0.301482665f, -1.133402218f
64 };
65 static float bt2020_csc_offsets[3] = {
66 -0.915687932f, 0.347458499f, -1.148145075f
67 };
68
69 static bool
70 project_src(nir_builder *b, nir_tex_instr *tex)
71 {
72 /* Find the projector in the srcs list, if present. */
73 int proj_index = nir_tex_instr_src_index(tex, nir_tex_src_projector);
74 if (proj_index < 0)
75 return false;
76
77 b->cursor = nir_before_instr(&tex->instr);
78
79 nir_ssa_def *inv_proj =
80 nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
81
82 /* Walk through the sources projecting the arguments. */
83 for (unsigned i = 0; i < tex->num_srcs; i++) {
84 switch (tex->src[i].src_type) {
85 case nir_tex_src_coord:
86 case nir_tex_src_comparator:
87 break;
88 default:
89 continue;
90 }
91 nir_ssa_def *unprojected =
92 nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
93 nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
94
95 /* Array indices don't get projected, so make an new vector with the
96 * coordinate's array index untouched.
97 */
98 if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
99 switch (tex->coord_components) {
100 case 4:
101 projected = nir_vec4(b,
102 nir_channel(b, projected, 0),
103 nir_channel(b, projected, 1),
104 nir_channel(b, projected, 2),
105 nir_channel(b, unprojected, 3));
106 break;
107 case 3:
108 projected = nir_vec3(b,
109 nir_channel(b, projected, 0),
110 nir_channel(b, projected, 1),
111 nir_channel(b, unprojected, 2));
112 break;
113 case 2:
114 projected = nir_vec2(b,
115 nir_channel(b, projected, 0),
116 nir_channel(b, unprojected, 1));
117 break;
118 default:
119 unreachable("bad texture coord count for array");
120 break;
121 }
122 }
123
124 nir_instr_rewrite_src(&tex->instr,
125 &tex->src[i].src,
126 nir_src_for_ssa(projected));
127 }
128
129 nir_tex_instr_remove_src(tex, proj_index);
130 return true;
131 }
132
133 static bool
134 lower_offset(nir_builder *b, nir_tex_instr *tex)
135 {
136 int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset);
137 if (offset_index < 0)
138 return false;
139
140 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
141 assert(coord_index >= 0);
142
143 assert(tex->src[offset_index].src.is_ssa);
144 assert(tex->src[coord_index].src.is_ssa);
145 nir_ssa_def *offset = tex->src[offset_index].src.ssa;
146 nir_ssa_def *coord = tex->src[coord_index].src.ssa;
147
148 b->cursor = nir_before_instr(&tex->instr);
149
150 nir_ssa_def *offset_coord;
151 if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) {
152 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
153 offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset));
154 } else {
155 nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
156 nir_ssa_def *scale = nir_frcp(b, txs);
157
158 offset_coord = nir_fadd(b, coord,
159 nir_fmul(b,
160 nir_i2f32(b, offset),
161 scale));
162 }
163 } else {
164 offset_coord = nir_iadd(b, coord, offset);
165 }
166
167 if (tex->is_array) {
168 /* The offset is not applied to the array index */
169 if (tex->coord_components == 2) {
170 offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0),
171 nir_channel(b, coord, 1));
172 } else if (tex->coord_components == 3) {
173 offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0),
174 nir_channel(b, offset_coord, 1),
175 nir_channel(b, coord, 2));
176 } else {
177 unreachable("Invalid number of components");
178 }
179 }
180
181 nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
182 nir_src_for_ssa(offset_coord));
183
184 nir_tex_instr_remove_src(tex, offset_index);
185
186 return true;
187 }
188
189 static void
190 lower_rect(nir_builder *b, nir_tex_instr *tex)
191 {
192 /* Set the sampler_dim to 2D here so that get_texture_size picks up the
193 * right dimensionality.
194 */
195 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
196
197 nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
198 nir_ssa_def *scale = nir_frcp(b, txs);
199
200 /* Walk through the sources normalizing the requested arguments. */
201 for (unsigned i = 0; i < tex->num_srcs; i++) {
202 if (tex->src[i].src_type != nir_tex_src_coord)
203 continue;
204
205 nir_ssa_def *coords =
206 nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
207 nir_instr_rewrite_src(&tex->instr,
208 &tex->src[i].src,
209 nir_src_for_ssa(nir_fmul(b, coords, scale)));
210 }
211 }
212
213 static void
214 lower_implicit_lod(nir_builder *b, nir_tex_instr *tex)
215 {
216 assert(tex->op == nir_texop_tex || tex->op == nir_texop_txb);
217 assert(nir_tex_instr_src_index(tex, nir_tex_src_lod) < 0);
218 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
219 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
220
221 b->cursor = nir_before_instr(&tex->instr);
222
223 nir_ssa_def *lod = nir_get_texture_lod(b, tex);
224
225 int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
226 if (bias_idx >= 0) {
227 /* If we have a bias, add it in */
228 lod = nir_fadd(b, lod, nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
229 nir_tex_instr_remove_src(tex, bias_idx);
230 }
231
232 int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
233 if (min_lod_idx >= 0) {
234 /* If we have a minimum LOD, clamp LOD accordingly */
235 lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
236 nir_tex_instr_remove_src(tex, min_lod_idx);
237 }
238
239 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
240 tex->op = nir_texop_txl;
241 }
242
243 static nir_ssa_def *
244 sample_plane(nir_builder *b, nir_tex_instr *tex, int plane,
245 const nir_lower_tex_options *options)
246 {
247 assert(tex->dest.is_ssa);
248 assert(nir_tex_instr_dest_size(tex) == 4);
249 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
250 assert(tex->op == nir_texop_tex);
251 assert(tex->coord_components == 2);
252
253 nir_tex_instr *plane_tex =
254 nir_tex_instr_create(b->shader, tex->num_srcs + 1);
255 for (unsigned i = 0; i < tex->num_srcs; i++) {
256 nir_src_copy(&plane_tex->src[i].src, &tex->src[i].src, plane_tex);
257 plane_tex->src[i].src_type = tex->src[i].src_type;
258 }
259 plane_tex->src[tex->num_srcs].src = nir_src_for_ssa(nir_imm_int(b, plane));
260 plane_tex->src[tex->num_srcs].src_type = nir_tex_src_plane;
261 plane_tex->op = nir_texop_tex;
262 plane_tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
263 plane_tex->dest_type = nir_type_float;
264 plane_tex->coord_components = 2;
265
266 plane_tex->texture_index = tex->texture_index;
267 plane_tex->sampler_index = tex->sampler_index;
268
269 nir_ssa_dest_init(&plane_tex->instr, &plane_tex->dest, 4,
270 nir_dest_bit_size(tex->dest), NULL);
271
272 nir_builder_instr_insert(b, &plane_tex->instr);
273
274 /* If scaling_factor is set, return a scaled value. */
275 if (options->scale_factors[tex->texture_index])
276 return nir_fmul_imm(b, &plane_tex->dest.ssa,
277 options->scale_factors[tex->texture_index]);
278
279 return &plane_tex->dest.ssa;
280 }
281
282 static void
283 convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
284 nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v,
285 nir_ssa_def *a,
286 const nir_lower_tex_options *options)
287 {
288
289 float *offset_vals;
290 float *m_vals;
291 assert((options->bt709_external & options->bt2020_external) == 0);
292 if (options->bt709_external & (1 << tex->texture_index)) {
293 m_vals = bt709_csc_coeffs;
294 offset_vals = bt709_csc_offsets;
295 } else if (options->bt2020_external & (1 << tex->texture_index)) {
296 m_vals = bt2020_csc_coeffs;
297 offset_vals = bt2020_csc_offsets;
298 } else {
299 m_vals = bt601_csc_coeffs;
300 offset_vals = bt601_csc_offsets;
301 }
302
303 nir_const_value m[3][4] = {
304 { { .f32 = m_vals[0] }, { .f32 = m_vals[1] }, { .f32 = m_vals[2] }, { .f32 = 0.0f } },
305 { { .f32 = m_vals[3] }, { .f32 = m_vals[4] }, { .f32 = m_vals[5] }, { .f32 = 0.0f } },
306 { { .f32 = m_vals[6] }, { .f32 = m_vals[7] }, { .f32 = m_vals[8] }, { .f32 = 0.0f } },
307 };
308 unsigned bit_size = nir_dest_bit_size(tex->dest);
309
310 nir_ssa_def *offset =
311 nir_vec4(b,
312 nir_imm_float(b, offset_vals[0]),
313 nir_imm_float(b, offset_vals[1]),
314 nir_imm_float(b, offset_vals[2]),
315 a);
316
317 offset = nir_f2fN(b, offset, bit_size);
318
319 nir_ssa_def *m0 = nir_f2fN(b, nir_build_imm(b, 4, 32, m[0]), bit_size);
320 nir_ssa_def *m1 = nir_f2fN(b, nir_build_imm(b, 4, 32, m[1]), bit_size);
321 nir_ssa_def *m2 = nir_f2fN(b, nir_build_imm(b, 4, 32, m[2]), bit_size);
322
323 nir_ssa_def *result =
324 nir_ffma(b, y, m0, nir_ffma(b, u, m1, nir_ffma(b, v, m2, offset)));
325
326 nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(result));
327 }
328
329 static void
330 lower_y_uv_external(nir_builder *b, nir_tex_instr *tex,
331 const nir_lower_tex_options *options)
332 {
333 b->cursor = nir_after_instr(&tex->instr);
334
335 nir_ssa_def *y = sample_plane(b, tex, 0, options);
336 nir_ssa_def *uv = sample_plane(b, tex, 1, options);
337
338 convert_yuv_to_rgb(b, tex,
339 nir_channel(b, y, 0),
340 nir_channel(b, uv, 0),
341 nir_channel(b, uv, 1),
342 nir_imm_float(b, 1.0f),
343 options);
344 }
345
346 static void
347 lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex,
348 const nir_lower_tex_options *options)
349 {
350 b->cursor = nir_after_instr(&tex->instr);
351
352 nir_ssa_def *y = sample_plane(b, tex, 0, options);
353 nir_ssa_def *u = sample_plane(b, tex, 1, options);
354 nir_ssa_def *v = sample_plane(b, tex, 2, options);
355
356 convert_yuv_to_rgb(b, tex,
357 nir_channel(b, y, 0),
358 nir_channel(b, u, 0),
359 nir_channel(b, v, 0),
360 nir_imm_float(b, 1.0f),
361 options);
362 }
363
364 static void
365 lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex,
366 const nir_lower_tex_options *options)
367 {
368 b->cursor = nir_after_instr(&tex->instr);
369
370 nir_ssa_def *y = sample_plane(b, tex, 0, options);
371 nir_ssa_def *xuxv = sample_plane(b, tex, 1, options);
372
373 convert_yuv_to_rgb(b, tex,
374 nir_channel(b, y, 0),
375 nir_channel(b, xuxv, 1),
376 nir_channel(b, xuxv, 3),
377 nir_imm_float(b, 1.0f),
378 options);
379 }
380
381 static void
382 lower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex,
383 const nir_lower_tex_options *options)
384 {
385 b->cursor = nir_after_instr(&tex->instr);
386
387 nir_ssa_def *y = sample_plane(b, tex, 0, options);
388 nir_ssa_def *uxvx = sample_plane(b, tex, 1, options);
389
390 convert_yuv_to_rgb(b, tex,
391 nir_channel(b, y, 1),
392 nir_channel(b, uxvx, 0),
393 nir_channel(b, uxvx, 2),
394 nir_imm_float(b, 1.0f),
395 options);
396 }
397
398 static void
399 lower_ayuv_external(nir_builder *b, nir_tex_instr *tex,
400 const nir_lower_tex_options *options)
401 {
402 b->cursor = nir_after_instr(&tex->instr);
403
404 nir_ssa_def *ayuv = sample_plane(b, tex, 0, options);
405
406 convert_yuv_to_rgb(b, tex,
407 nir_channel(b, ayuv, 2),
408 nir_channel(b, ayuv, 1),
409 nir_channel(b, ayuv, 0),
410 nir_channel(b, ayuv, 3),
411 options);
412 }
413
414 static void
415 lower_xyuv_external(nir_builder *b, nir_tex_instr *tex,
416 const nir_lower_tex_options *options)
417 {
418 b->cursor = nir_after_instr(&tex->instr);
419
420 nir_ssa_def *xyuv = sample_plane(b, tex, 0, options);
421
422 convert_yuv_to_rgb(b, tex,
423 nir_channel(b, xyuv, 2),
424 nir_channel(b, xyuv, 1),
425 nir_channel(b, xyuv, 0),
426 nir_imm_float(b, 1.0f),
427 options);
428 }
429
430 /*
431 * Converts a nir_texop_txd instruction to nir_texop_txl with the given lod
432 * computed from the gradients.
433 */
434 static void
435 replace_gradient_with_lod(nir_builder *b, nir_ssa_def *lod, nir_tex_instr *tex)
436 {
437 assert(tex->op == nir_texop_txd);
438
439 nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx));
440 nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy));
441
442 int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
443 if (min_lod_idx >= 0) {
444 /* If we have a minimum LOD, clamp LOD accordingly */
445 lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
446 nir_tex_instr_remove_src(tex, min_lod_idx);
447 }
448
449 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
450 tex->op = nir_texop_txl;
451 }
452
453 static void
454 lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex)
455 {
456 assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
457 assert(tex->op == nir_texop_txd);
458 assert(tex->dest.is_ssa);
459
460 /* Use textureSize() to get the width and height of LOD 0 */
461 nir_ssa_def *size = nir_i2f32(b, nir_get_texture_size(b, tex));
462
463 /* Cubemap texture lookups first generate a texture coordinate normalized
464 * to [-1, 1] on the appropiate face. The appropiate face is determined
465 * by which component has largest magnitude and its sign. The texture
466 * coordinate is the quotient of the remaining texture coordinates against
467 * that absolute value of the component of largest magnitude. This
468 * division requires that the computing of the derivative of the texel
469 * coordinate must use the quotient rule. The high level GLSL code is as
470 * follows:
471 *
472 * Step 1: selection
473 *
474 * vec3 abs_p, Q, dQdx, dQdy;
475 * abs_p = abs(ir->coordinate);
476 * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
477 * Q = ir->coordinate.yzx;
478 * dQdx = ir->lod_info.grad.dPdx.yzx;
479 * dQdy = ir->lod_info.grad.dPdy.yzx;
480 * }
481 * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
482 * Q = ir->coordinate.xzy;
483 * dQdx = ir->lod_info.grad.dPdx.xzy;
484 * dQdy = ir->lod_info.grad.dPdy.xzy;
485 * }
486 * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
487 * Q = ir->coordinate;
488 * dQdx = ir->lod_info.grad.dPdx;
489 * dQdy = ir->lod_info.grad.dPdy;
490 * }
491 *
492 * Step 2: use quotient rule to compute derivative. The normalized to
493 * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
494 * only concerned with the magnitudes of the derivatives whose values are
495 * not affected by the sign. We drop the sign from the computation.
496 *
497 * vec2 dx, dy;
498 * float recip;
499 *
500 * recip = 1.0 / Q.z;
501 * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
502 * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
503 *
504 * Step 3: compute LOD. At this point we have the derivatives of the
505 * texture coordinates normalized to [-1,1]. We take the LOD to be
506 * result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
507 * = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
508 * = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
509 * = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
510 * = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
511 * where L is the dimension of the cubemap. The code is:
512 *
513 * float M, result;
514 * M = max(dot(dx, dx), dot(dy, dy));
515 * L = textureSize(sampler, 0).x;
516 * result = -1.0 + 0.5 * log2(L * L * M);
517 */
518
519 /* coordinate */
520 nir_ssa_def *p =
521 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa;
522
523 /* unmodified dPdx, dPdy values */
524 nir_ssa_def *dPdx =
525 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
526 nir_ssa_def *dPdy =
527 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
528
529 nir_ssa_def *abs_p = nir_fabs(b, p);
530 nir_ssa_def *abs_p_x = nir_channel(b, abs_p, 0);
531 nir_ssa_def *abs_p_y = nir_channel(b, abs_p, 1);
532 nir_ssa_def *abs_p_z = nir_channel(b, abs_p, 2);
533
534 /* 1. compute selector */
535 nir_ssa_def *Q, *dQdx, *dQdy;
536
537 nir_ssa_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y));
538 nir_ssa_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z));
539
540 unsigned yzx[3] = { 1, 2, 0 };
541 unsigned xzy[3] = { 0, 2, 1 };
542
543 Q = nir_bcsel(b, cond_z,
544 p,
545 nir_bcsel(b, cond_y,
546 nir_swizzle(b, p, xzy, 3),
547 nir_swizzle(b, p, yzx, 3)));
548
549 dQdx = nir_bcsel(b, cond_z,
550 dPdx,
551 nir_bcsel(b, cond_y,
552 nir_swizzle(b, dPdx, xzy, 3),
553 nir_swizzle(b, dPdx, yzx, 3)));
554
555 dQdy = nir_bcsel(b, cond_z,
556 dPdy,
557 nir_bcsel(b, cond_y,
558 nir_swizzle(b, dPdy, xzy, 3),
559 nir_swizzle(b, dPdy, yzx, 3)));
560
561 /* 2. quotient rule */
562
563 /* tmp = Q.xy * recip;
564 * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
565 * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
566 */
567 nir_ssa_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2));
568
569 nir_ssa_def *Q_xy = nir_channels(b, Q, 0x3);
570 nir_ssa_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z);
571
572 nir_ssa_def *dQdx_xy = nir_channels(b, dQdx, 0x3);
573 nir_ssa_def *dQdx_z = nir_channel(b, dQdx, 2);
574 nir_ssa_def *dx =
575 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z)));
576
577 nir_ssa_def *dQdy_xy = nir_channels(b, dQdy, 0x3);
578 nir_ssa_def *dQdy_z = nir_channel(b, dQdy, 2);
579 nir_ssa_def *dy =
580 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z)));
581
582 /* M = max(dot(dx, dx), dot(dy, dy)); */
583 nir_ssa_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy));
584
585 /* size has textureSize() of LOD 0 */
586 nir_ssa_def *L = nir_channel(b, size, 0);
587
588 /* lod = -1.0 + 0.5 * log2(L * L * M); */
589 nir_ssa_def *lod =
590 nir_fadd(b,
591 nir_imm_float(b, -1.0f),
592 nir_fmul(b,
593 nir_imm_float(b, 0.5f),
594 nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M)))));
595
596 /* 3. Replace the gradient instruction with an equivalent lod instruction */
597 replace_gradient_with_lod(b, lod, tex);
598 }
599
600 static void
601 lower_gradient(nir_builder *b, nir_tex_instr *tex)
602 {
603 /* Cubes are more complicated and have their own function */
604 if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
605 lower_gradient_cube_map(b, tex);
606 return;
607 }
608
609 assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE);
610 assert(tex->op == nir_texop_txd);
611 assert(tex->dest.is_ssa);
612
613 /* Use textureSize() to get the width and height of LOD 0 */
614 unsigned component_mask;
615 switch (tex->sampler_dim) {
616 case GLSL_SAMPLER_DIM_3D:
617 component_mask = 7;
618 break;
619 case GLSL_SAMPLER_DIM_1D:
620 component_mask = 1;
621 break;
622 default:
623 component_mask = 3;
624 break;
625 }
626
627 nir_ssa_def *size =
628 nir_channels(b, nir_i2f32(b, nir_get_texture_size(b, tex)),
629 component_mask);
630
631 /* Scale the gradients by width and height. Effectively, the incoming
632 * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
633 * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
634 */
635 nir_ssa_def *ddx =
636 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
637 nir_ssa_def *ddy =
638 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
639
640 nir_ssa_def *dPdx = nir_fmul(b, ddx, size);
641 nir_ssa_def *dPdy = nir_fmul(b, ddy, size);
642
643 nir_ssa_def *rho;
644 if (dPdx->num_components == 1) {
645 rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy));
646 } else {
647 rho = nir_fmax(b,
648 nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)),
649 nir_fsqrt(b, nir_fdot(b, dPdy, dPdy)));
650 }
651
652 /* lod = log2(rho). We're ignoring GL state biases for now. */
653 nir_ssa_def *lod = nir_flog2(b, rho);
654
655 /* Replace the gradient instruction with an equivalent lod instruction */
656 replace_gradient_with_lod(b, lod, tex);
657 }
658
659 static void
660 saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
661 {
662 b->cursor = nir_before_instr(&tex->instr);
663
664 /* Walk through the sources saturating the requested arguments. */
665 for (unsigned i = 0; i < tex->num_srcs; i++) {
666 if (tex->src[i].src_type != nir_tex_src_coord)
667 continue;
668
669 nir_ssa_def *src =
670 nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
671
672 /* split src into components: */
673 nir_ssa_def *comp[4];
674
675 assume(tex->coord_components >= 1);
676
677 for (unsigned j = 0; j < tex->coord_components; j++)
678 comp[j] = nir_channel(b, src, j);
679
680 /* clamp requested components, array index does not get clamped: */
681 unsigned ncomp = tex->coord_components;
682 if (tex->is_array)
683 ncomp--;
684
685 for (unsigned j = 0; j < ncomp; j++) {
686 if ((1 << j) & sat_mask) {
687 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
688 /* non-normalized texture coords, so clamp to texture
689 * size rather than [0.0, 1.0]
690 */
691 nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
692 comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0));
693 comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j));
694 } else {
695 comp[j] = nir_fsat(b, comp[j]);
696 }
697 }
698 }
699
700 /* and move the result back into a single vecN: */
701 src = nir_vec(b, comp, tex->coord_components);
702
703 nir_instr_rewrite_src(&tex->instr,
704 &tex->src[i].src,
705 nir_src_for_ssa(src));
706 }
707 }
708
709 static nir_ssa_def *
710 get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
711 {
712 nir_const_value v[4];
713
714 memset(&v, 0, sizeof(v));
715
716 if (swizzle_val == 4) {
717 v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 0;
718 } else {
719 assert(swizzle_val == 5);
720 if (type == nir_type_float)
721 v[0].f32 = v[1].f32 = v[2].f32 = v[3].f32 = 1.0;
722 else
723 v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 1;
724 }
725
726 return nir_build_imm(b, 4, 32, v);
727 }
728
729 static void
730 swizzle_tg4_broadcom(nir_builder *b, nir_tex_instr *tex)
731 {
732 assert(tex->dest.is_ssa);
733
734 b->cursor = nir_after_instr(&tex->instr);
735
736 assert(nir_tex_instr_dest_size(tex) == 4);
737 unsigned swiz[4] = { 2, 3, 1, 0 };
738 nir_ssa_def *swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4);
739
740 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled),
741 swizzled->parent_instr);
742 }
743
744 static void
745 swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
746 {
747 assert(tex->dest.is_ssa);
748
749 b->cursor = nir_after_instr(&tex->instr);
750
751 nir_ssa_def *swizzled;
752 if (tex->op == nir_texop_tg4) {
753 if (swizzle[tex->component] < 4) {
754 /* This one's easy */
755 tex->component = swizzle[tex->component];
756 return;
757 } else {
758 swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]);
759 }
760 } else {
761 assert(nir_tex_instr_dest_size(tex) == 4);
762 if (swizzle[0] < 4 && swizzle[1] < 4 &&
763 swizzle[2] < 4 && swizzle[3] < 4) {
764 unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] };
765 /* We have no 0s or 1s, just emit a swizzling MOV */
766 swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4);
767 } else {
768 nir_ssa_def *srcs[4];
769 for (unsigned i = 0; i < 4; i++) {
770 if (swizzle[i] < 4) {
771 srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]);
772 } else {
773 srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]);
774 }
775 }
776 swizzled = nir_vec(b, srcs, 4);
777 }
778 }
779
780 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled),
781 swizzled->parent_instr);
782 }
783
784 static void
785 linearize_srgb_result(nir_builder *b, nir_tex_instr *tex)
786 {
787 assert(tex->dest.is_ssa);
788 assert(nir_tex_instr_dest_size(tex) == 4);
789 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
790
791 b->cursor = nir_after_instr(&tex->instr);
792
793 nir_ssa_def *rgb =
794 nir_format_srgb_to_linear(b, nir_channels(b, &tex->dest.ssa, 0x7));
795
796 /* alpha is untouched: */
797 nir_ssa_def *result = nir_vec4(b,
798 nir_channel(b, rgb, 0),
799 nir_channel(b, rgb, 1),
800 nir_channel(b, rgb, 2),
801 nir_channel(b, &tex->dest.ssa, 3));
802
803 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(result),
804 result->parent_instr);
805 }
806
807 /**
808 * Lowers texture instructions from giving a vec4 result to a vec2 of f16,
809 * i16, or u16, or a single unorm4x8 value.
810 *
811 * Note that we don't change the destination num_components, because
812 * nir_tex_instr_dest_size() will still return 4. The driver is just expected
813 * to not store the other channels, given that nothing at the NIR level will
814 * read them.
815 */
816 static void
817 lower_tex_packing(nir_builder *b, nir_tex_instr *tex,
818 const nir_lower_tex_options *options)
819 {
820 nir_ssa_def *color = &tex->dest.ssa;
821
822 b->cursor = nir_after_instr(&tex->instr);
823
824 switch (options->lower_tex_packing[tex->sampler_index]) {
825 case nir_lower_tex_packing_none:
826 return;
827
828 case nir_lower_tex_packing_16: {
829 static const unsigned bits[4] = {16, 16, 16, 16};
830
831 switch (nir_alu_type_get_base_type(tex->dest_type)) {
832 case nir_type_float:
833 switch (nir_tex_instr_dest_size(tex)) {
834 case 1:
835 assert(tex->is_shadow && tex->is_new_style_shadow);
836 color = nir_unpack_half_2x16_split_x(b, nir_channel(b, color, 0));
837 break;
838 case 2: {
839 nir_ssa_def *rg = nir_channel(b, color, 0);
840 color = nir_vec2(b,
841 nir_unpack_half_2x16_split_x(b, rg),
842 nir_unpack_half_2x16_split_y(b, rg));
843 break;
844 }
845 case 4: {
846 nir_ssa_def *rg = nir_channel(b, color, 0);
847 nir_ssa_def *ba = nir_channel(b, color, 1);
848 color = nir_vec4(b,
849 nir_unpack_half_2x16_split_x(b, rg),
850 nir_unpack_half_2x16_split_y(b, rg),
851 nir_unpack_half_2x16_split_x(b, ba),
852 nir_unpack_half_2x16_split_y(b, ba));
853 break;
854 }
855 default:
856 unreachable("wrong dest_size");
857 }
858 break;
859
860 case nir_type_int:
861 color = nir_format_unpack_sint(b, color, bits, 4);
862 break;
863
864 case nir_type_uint:
865 color = nir_format_unpack_uint(b, color, bits, 4);
866 break;
867
868 default:
869 unreachable("unknown base type");
870 }
871 break;
872 }
873
874 case nir_lower_tex_packing_8:
875 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
876 color = nir_unpack_unorm_4x8(b, nir_channel(b, color, 0));
877 break;
878 }
879
880 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(color),
881 color->parent_instr);
882 }
883
884 static bool
885 sampler_index_lt(nir_tex_instr *tex, unsigned max)
886 {
887 assert(nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref) == -1);
888
889 unsigned sampler_index = tex->sampler_index;
890
891 int sampler_offset_idx =
892 nir_tex_instr_src_index(tex, nir_tex_src_sampler_offset);
893 if (sampler_offset_idx >= 0) {
894 if (!nir_src_is_const(tex->src[sampler_offset_idx].src))
895 return false;
896
897 sampler_index += nir_src_as_uint(tex->src[sampler_offset_idx].src);
898 }
899
900 return sampler_index < max;
901 }
902
903 static bool
904 lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex)
905 {
906 assert(tex->op == nir_texop_tg4);
907 assert(nir_tex_instr_has_explicit_tg4_offsets(tex));
908 assert(nir_tex_instr_src_index(tex, nir_tex_src_offset) == -1);
909
910 b->cursor = nir_after_instr(&tex->instr);
911
912 nir_ssa_def *dest[4];
913 for (unsigned i = 0; i < 4; ++i) {
914 nir_tex_instr *tex_copy = nir_tex_instr_create(b->shader, tex->num_srcs + 1);
915 tex_copy->op = tex->op;
916 tex_copy->coord_components = tex->coord_components;
917 tex_copy->sampler_dim = tex->sampler_dim;
918 tex_copy->is_array = tex->is_array;
919 tex_copy->is_shadow = tex->is_shadow;
920 tex_copy->is_new_style_shadow = tex->is_new_style_shadow;
921 tex_copy->component = tex->component;
922 tex_copy->dest_type = tex->dest_type;
923
924 for (unsigned j = 0; j < tex->num_srcs; ++j) {
925 nir_src_copy(&tex_copy->src[j].src, &tex->src[j].src, tex_copy);
926 tex_copy->src[j].src_type = tex->src[j].src_type;
927 }
928
929 nir_tex_src src;
930 src.src = nir_src_for_ssa(nir_imm_ivec2(b, tex->tg4_offsets[i][0],
931 tex->tg4_offsets[i][1]));
932 src.src_type = nir_tex_src_offset;
933 tex_copy->src[tex_copy->num_srcs - 1] = src;
934
935 nir_ssa_dest_init(&tex_copy->instr, &tex_copy->dest,
936 nir_tex_instr_dest_size(tex), 32, NULL);
937
938 nir_builder_instr_insert(b, &tex_copy->instr);
939
940 dest[i] = nir_channel(b, &tex_copy->dest.ssa, 3);
941 }
942
943 nir_ssa_def *res = nir_vec4(b, dest[0], dest[1], dest[2], dest[3]);
944 nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(res));
945 nir_instr_remove(&tex->instr);
946
947 return true;
948 }
949
950 static bool
951 nir_lower_txs_lod(nir_builder *b, nir_tex_instr *tex)
952 {
953 int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
954 if (lod_idx < 0 ||
955 (nir_src_is_const(tex->src[lod_idx].src) &&
956 nir_src_as_int(tex->src[lod_idx].src) == 0))
957 return false;
958
959 unsigned dest_size = nir_tex_instr_dest_size(tex);
960
961 b->cursor = nir_before_instr(&tex->instr);
962 nir_ssa_def *lod = nir_ssa_for_src(b, tex->src[lod_idx].src, 1);
963
964 /* Replace the non-0-LOD in the initial TXS operation by a 0-LOD. */
965 nir_instr_rewrite_src(&tex->instr, &tex->src[lod_idx].src,
966 nir_src_for_ssa(nir_imm_int(b, 0)));
967
968 /* TXS(LOD) = max(TXS(0) >> LOD, 1) */
969 b->cursor = nir_after_instr(&tex->instr);
970 nir_ssa_def *minified = nir_imax(b, nir_ushr(b, &tex->dest.ssa, lod),
971 nir_imm_int(b, 1));
972
973 /* Make sure the component encoding the array size (if any) is not
974 * minified.
975 */
976 if (tex->is_array) {
977 nir_ssa_def *comp[3];
978
979 assert(dest_size <= ARRAY_SIZE(comp));
980 for (unsigned i = 0; i < dest_size - 1; i++)
981 comp[i] = nir_channel(b, minified, i);
982
983 comp[dest_size - 1] = nir_channel(b, &tex->dest.ssa, dest_size - 1);
984 minified = nir_vec(b, comp, dest_size);
985 }
986
987 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(minified),
988 minified->parent_instr);
989 return true;
990 }
991
992 static bool
993 nir_lower_tex_block(nir_block *block, nir_builder *b,
994 const nir_lower_tex_options *options)
995 {
996 bool progress = false;
997
998 nir_foreach_instr_safe(instr, block) {
999 if (instr->type != nir_instr_type_tex)
1000 continue;
1001
1002 nir_tex_instr *tex = nir_instr_as_tex(instr);
1003 bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim));
1004
1005 /* mask of src coords to saturate (clamp): */
1006 unsigned sat_mask = 0;
1007
1008 if ((1 << tex->sampler_index) & options->saturate_r)
1009 sat_mask |= (1 << 2); /* .z */
1010 if ((1 << tex->sampler_index) & options->saturate_t)
1011 sat_mask |= (1 << 1); /* .y */
1012 if ((1 << tex->sampler_index) & options->saturate_s)
1013 sat_mask |= (1 << 0); /* .x */
1014
1015 /* If we are clamping any coords, we must lower projector first
1016 * as clamping happens *after* projection:
1017 */
1018 if (lower_txp || sat_mask) {
1019 progress |= project_src(b, tex);
1020 }
1021
1022 if ((tex->op == nir_texop_txf && options->lower_txf_offset) ||
1023 (sat_mask && nir_tex_instr_src_index(tex, nir_tex_src_coord) >= 0) ||
1024 (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT &&
1025 options->lower_rect_offset)) {
1026 progress = lower_offset(b, tex) || progress;
1027 }
1028
1029 if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect &&
1030 tex->op != nir_texop_txf && !nir_tex_instr_is_query(tex)) {
1031 lower_rect(b, tex);
1032 progress = true;
1033 }
1034
1035 if ((1 << tex->texture_index) & options->lower_y_uv_external) {
1036 lower_y_uv_external(b, tex, options);
1037 progress = true;
1038 }
1039
1040 if ((1 << tex->texture_index) & options->lower_y_u_v_external) {
1041 lower_y_u_v_external(b, tex, options);
1042 progress = true;
1043 }
1044
1045 if ((1 << tex->texture_index) & options->lower_yx_xuxv_external) {
1046 lower_yx_xuxv_external(b, tex, options);
1047 progress = true;
1048 }
1049
1050 if ((1 << tex->texture_index) & options->lower_xy_uxvx_external) {
1051 lower_xy_uxvx_external(b, tex, options);
1052 progress = true;
1053 }
1054
1055 if ((1 << tex->texture_index) & options->lower_ayuv_external) {
1056 lower_ayuv_external(b, tex, options);
1057 progress = true;
1058 }
1059
1060 if ((1 << tex->texture_index) & options->lower_xyuv_external) {
1061 lower_xyuv_external(b, tex, options);
1062 progress = true;
1063 }
1064
1065 if (sat_mask) {
1066 saturate_src(b, tex, sat_mask);
1067 progress = true;
1068 }
1069
1070 if (tex->op == nir_texop_tg4 && options->lower_tg4_broadcom_swizzle) {
1071 swizzle_tg4_broadcom(b, tex);
1072 progress = true;
1073 }
1074
1075 if (((1 << tex->texture_index) & options->swizzle_result) &&
1076 !nir_tex_instr_is_query(tex) &&
1077 !(tex->is_shadow && tex->is_new_style_shadow)) {
1078 swizzle_result(b, tex, options->swizzles[tex->texture_index]);
1079 progress = true;
1080 }
1081
1082 /* should be after swizzle so we know which channels are rgb: */
1083 if (((1 << tex->texture_index) & options->lower_srgb) &&
1084 !nir_tex_instr_is_query(tex) && !tex->is_shadow) {
1085 linearize_srgb_result(b, tex);
1086 progress = true;
1087 }
1088
1089 const bool has_min_lod =
1090 nir_tex_instr_src_index(tex, nir_tex_src_min_lod) >= 0;
1091 const bool has_offset =
1092 nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0;
1093
1094 if (tex->op == nir_texop_txb && tex->is_shadow && has_min_lod &&
1095 options->lower_txb_shadow_clamp) {
1096 lower_implicit_lod(b, tex);
1097 progress = true;
1098 }
1099
1100 if (options->lower_tex_packing[tex->sampler_index] !=
1101 nir_lower_tex_packing_none &&
1102 tex->op != nir_texop_txs &&
1103 tex->op != nir_texop_query_levels &&
1104 tex->op != nir_texop_texture_samples) {
1105 lower_tex_packing(b, tex, options);
1106 progress = true;
1107 }
1108
1109 if (tex->op == nir_texop_txd &&
1110 (options->lower_txd ||
1111 (options->lower_txd_shadow && tex->is_shadow) ||
1112 (options->lower_txd_shadow_clamp && tex->is_shadow && has_min_lod) ||
1113 (options->lower_txd_offset_clamp && has_offset && has_min_lod) ||
1114 (options->lower_txd_clamp_bindless_sampler && has_min_lod &&
1115 nir_tex_instr_src_index(tex, nir_tex_src_sampler_handle) != -1) ||
1116 (options->lower_txd_clamp_if_sampler_index_not_lt_16 &&
1117 has_min_lod && !sampler_index_lt(tex, 16)) ||
1118 (options->lower_txd_cube_map &&
1119 tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) ||
1120 (options->lower_txd_3d &&
1121 tex->sampler_dim == GLSL_SAMPLER_DIM_3D))) {
1122 lower_gradient(b, tex);
1123 progress = true;
1124 continue;
1125 }
1126
1127 bool shader_supports_implicit_lod =
1128 b->shader->info.stage == MESA_SHADER_FRAGMENT ||
1129 (b->shader->info.stage == MESA_SHADER_COMPUTE &&
1130 b->shader->info.cs.derivative_group != DERIVATIVE_GROUP_NONE);
1131
1132 /* TXF, TXS and TXL require a LOD but not everything we implement using those
1133 * three opcodes provides one. Provide a default LOD of 0.
1134 */
1135 if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) &&
1136 (tex->op == nir_texop_txf || tex->op == nir_texop_txs ||
1137 tex->op == nir_texop_txl || tex->op == nir_texop_query_levels ||
1138 (tex->op == nir_texop_tex && !shader_supports_implicit_lod))) {
1139 b->cursor = nir_before_instr(&tex->instr);
1140 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(nir_imm_int(b, 0)));
1141 if (tex->op == nir_texop_tex && options->lower_tex_without_implicit_lod)
1142 tex->op = nir_texop_txl;
1143 progress = true;
1144 continue;
1145 }
1146
1147 if (options->lower_txs_lod && tex->op == nir_texop_txs) {
1148 progress |= nir_lower_txs_lod(b, tex);
1149 continue;
1150 }
1151
1152 /* has to happen after all the other lowerings as the original tg4 gets
1153 * replaced by 4 tg4 instructions.
1154 */
1155 if (tex->op == nir_texop_tg4 &&
1156 nir_tex_instr_has_explicit_tg4_offsets(tex) &&
1157 options->lower_tg4_offsets) {
1158 progress |= lower_tg4_offsets(b, tex);
1159 continue;
1160 }
1161 }
1162
1163 return progress;
1164 }
1165
1166 static bool
1167 nir_lower_tex_impl(nir_function_impl *impl,
1168 const nir_lower_tex_options *options)
1169 {
1170 bool progress = false;
1171 nir_builder builder;
1172 nir_builder_init(&builder, impl);
1173
1174 nir_foreach_block(block, impl) {
1175 progress |= nir_lower_tex_block(block, &builder, options);
1176 }
1177
1178 nir_metadata_preserve(impl, nir_metadata_block_index |
1179 nir_metadata_dominance);
1180 return progress;
1181 }
1182
1183 bool
1184 nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
1185 {
1186 bool progress = false;
1187
1188 nir_foreach_function(function, shader) {
1189 if (function->impl)
1190 progress |= nir_lower_tex_impl(function->impl, options);
1191 }
1192
1193 return progress;
1194 }