nir: make nir_const_value scalar
[mesa.git] / src / compiler / nir / nir_lower_tex.c
1 /*
2 * Copyright © 2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /*
25 * This lowering pass supports (as configured via nir_lower_tex_options)
26 * various texture related conversions:
27 * + texture projector lowering: converts the coordinate division for
28 * texture projection to be done in ALU instructions instead of
29 * asking the texture operation to do so.
30 * + lowering RECT: converts the un-normalized RECT texture coordinates
31 * to normalized coordinates with txs plus ALU instructions
32 * + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
33 * inserts instructions to clamp specified coordinates to [0.0, 1.0].
34 * Note that this automatically triggers texture projector lowering if
35 * needed, since clamping must happen after projector lowering.
36 */
37
38 #include "nir.h"
39 #include "nir_builder.h"
40 #include "nir_format_convert.h"
41
42 static void
43 project_src(nir_builder *b, nir_tex_instr *tex)
44 {
45 /* Find the projector in the srcs list, if present. */
46 int proj_index = nir_tex_instr_src_index(tex, nir_tex_src_projector);
47 if (proj_index < 0)
48 return;
49
50 b->cursor = nir_before_instr(&tex->instr);
51
52 nir_ssa_def *inv_proj =
53 nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
54
55 /* Walk through the sources projecting the arguments. */
56 for (unsigned i = 0; i < tex->num_srcs; i++) {
57 switch (tex->src[i].src_type) {
58 case nir_tex_src_coord:
59 case nir_tex_src_comparator:
60 break;
61 default:
62 continue;
63 }
64 nir_ssa_def *unprojected =
65 nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
66 nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
67
68 /* Array indices don't get projected, so make an new vector with the
69 * coordinate's array index untouched.
70 */
71 if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
72 switch (tex->coord_components) {
73 case 4:
74 projected = nir_vec4(b,
75 nir_channel(b, projected, 0),
76 nir_channel(b, projected, 1),
77 nir_channel(b, projected, 2),
78 nir_channel(b, unprojected, 3));
79 break;
80 case 3:
81 projected = nir_vec3(b,
82 nir_channel(b, projected, 0),
83 nir_channel(b, projected, 1),
84 nir_channel(b, unprojected, 2));
85 break;
86 case 2:
87 projected = nir_vec2(b,
88 nir_channel(b, projected, 0),
89 nir_channel(b, unprojected, 1));
90 break;
91 default:
92 unreachable("bad texture coord count for array");
93 break;
94 }
95 }
96
97 nir_instr_rewrite_src(&tex->instr,
98 &tex->src[i].src,
99 nir_src_for_ssa(projected));
100 }
101
102 nir_tex_instr_remove_src(tex, proj_index);
103 }
104
105 static nir_ssa_def *
106 get_texture_size(nir_builder *b, nir_tex_instr *tex)
107 {
108 b->cursor = nir_before_instr(&tex->instr);
109
110 nir_tex_instr *txs;
111
112 unsigned num_srcs = 1; /* One for the LOD */
113 for (unsigned i = 0; i < tex->num_srcs; i++) {
114 if (tex->src[i].src_type == nir_tex_src_texture_deref ||
115 tex->src[i].src_type == nir_tex_src_sampler_deref ||
116 tex->src[i].src_type == nir_tex_src_texture_offset ||
117 tex->src[i].src_type == nir_tex_src_sampler_offset ||
118 tex->src[i].src_type == nir_tex_src_texture_handle ||
119 tex->src[i].src_type == nir_tex_src_sampler_handle)
120 num_srcs++;
121 }
122
123 txs = nir_tex_instr_create(b->shader, num_srcs);
124 txs->op = nir_texop_txs;
125 txs->sampler_dim = tex->sampler_dim;
126 txs->is_array = tex->is_array;
127 txs->is_shadow = tex->is_shadow;
128 txs->is_new_style_shadow = tex->is_new_style_shadow;
129 txs->texture_index = tex->texture_index;
130 txs->sampler_index = tex->sampler_index;
131 txs->dest_type = nir_type_int;
132
133 unsigned idx = 0;
134 for (unsigned i = 0; i < tex->num_srcs; i++) {
135 if (tex->src[i].src_type == nir_tex_src_texture_deref ||
136 tex->src[i].src_type == nir_tex_src_sampler_deref ||
137 tex->src[i].src_type == nir_tex_src_texture_offset ||
138 tex->src[i].src_type == nir_tex_src_sampler_offset ||
139 tex->src[i].src_type == nir_tex_src_texture_handle ||
140 tex->src[i].src_type == nir_tex_src_sampler_handle) {
141 nir_src_copy(&txs->src[idx].src, &tex->src[i].src, txs);
142 txs->src[idx].src_type = tex->src[i].src_type;
143 idx++;
144 }
145 }
146 /* Add in an LOD because some back-ends require it */
147 txs->src[idx].src = nir_src_for_ssa(nir_imm_int(b, 0));
148 txs->src[idx].src_type = nir_tex_src_lod;
149
150 nir_ssa_dest_init(&txs->instr, &txs->dest,
151 nir_tex_instr_dest_size(txs), 32, NULL);
152 nir_builder_instr_insert(b, &txs->instr);
153
154 return nir_i2f32(b, &txs->dest.ssa);
155 }
156
157 static nir_ssa_def *
158 get_texture_lod(nir_builder *b, nir_tex_instr *tex)
159 {
160 b->cursor = nir_before_instr(&tex->instr);
161
162 nir_tex_instr *tql;
163
164 unsigned num_srcs = 0;
165 for (unsigned i = 0; i < tex->num_srcs; i++) {
166 if (tex->src[i].src_type == nir_tex_src_coord ||
167 tex->src[i].src_type == nir_tex_src_texture_deref ||
168 tex->src[i].src_type == nir_tex_src_sampler_deref ||
169 tex->src[i].src_type == nir_tex_src_texture_offset ||
170 tex->src[i].src_type == nir_tex_src_sampler_offset ||
171 tex->src[i].src_type == nir_tex_src_texture_handle ||
172 tex->src[i].src_type == nir_tex_src_sampler_handle)
173 num_srcs++;
174 }
175
176 tql = nir_tex_instr_create(b->shader, num_srcs);
177 tql->op = nir_texop_lod;
178 tql->coord_components = tex->coord_components;
179 tql->sampler_dim = tex->sampler_dim;
180 tql->is_array = tex->is_array;
181 tql->is_shadow = tex->is_shadow;
182 tql->is_new_style_shadow = tex->is_new_style_shadow;
183 tql->texture_index = tex->texture_index;
184 tql->sampler_index = tex->sampler_index;
185 tql->dest_type = nir_type_float;
186
187 unsigned idx = 0;
188 for (unsigned i = 0; i < tex->num_srcs; i++) {
189 if (tex->src[i].src_type == nir_tex_src_coord ||
190 tex->src[i].src_type == nir_tex_src_texture_deref ||
191 tex->src[i].src_type == nir_tex_src_sampler_deref ||
192 tex->src[i].src_type == nir_tex_src_texture_offset ||
193 tex->src[i].src_type == nir_tex_src_sampler_offset ||
194 tex->src[i].src_type == nir_tex_src_texture_handle ||
195 tex->src[i].src_type == nir_tex_src_sampler_handle) {
196 nir_src_copy(&tql->src[idx].src, &tex->src[i].src, tql);
197 tql->src[idx].src_type = tex->src[i].src_type;
198 idx++;
199 }
200 }
201
202 nir_ssa_dest_init(&tql->instr, &tql->dest, 2, 32, NULL);
203 nir_builder_instr_insert(b, &tql->instr);
204
205 /* The LOD is the y component of the result */
206 return nir_channel(b, &tql->dest.ssa, 1);
207 }
208
209 static bool
210 lower_offset(nir_builder *b, nir_tex_instr *tex)
211 {
212 int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset);
213 if (offset_index < 0)
214 return false;
215
216 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
217 assert(coord_index >= 0);
218
219 assert(tex->src[offset_index].src.is_ssa);
220 assert(tex->src[coord_index].src.is_ssa);
221 nir_ssa_def *offset = tex->src[offset_index].src.ssa;
222 nir_ssa_def *coord = tex->src[coord_index].src.ssa;
223
224 b->cursor = nir_before_instr(&tex->instr);
225
226 nir_ssa_def *offset_coord;
227 if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) {
228 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
229 offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset));
230 } else {
231 nir_ssa_def *txs = get_texture_size(b, tex);
232 nir_ssa_def *scale = nir_frcp(b, txs);
233
234 offset_coord = nir_fadd(b, coord,
235 nir_fmul(b,
236 nir_i2f32(b, offset),
237 scale));
238 }
239 } else {
240 offset_coord = nir_iadd(b, coord, offset);
241 }
242
243 if (tex->is_array) {
244 /* The offset is not applied to the array index */
245 if (tex->coord_components == 2) {
246 offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0),
247 nir_channel(b, coord, 1));
248 } else if (tex->coord_components == 3) {
249 offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0),
250 nir_channel(b, offset_coord, 1),
251 nir_channel(b, coord, 2));
252 } else {
253 unreachable("Invalid number of components");
254 }
255 }
256
257 nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
258 nir_src_for_ssa(offset_coord));
259
260 nir_tex_instr_remove_src(tex, offset_index);
261
262 return true;
263 }
264
265 static void
266 lower_rect(nir_builder *b, nir_tex_instr *tex)
267 {
268 nir_ssa_def *txs = get_texture_size(b, tex);
269 nir_ssa_def *scale = nir_frcp(b, txs);
270
271 /* Walk through the sources normalizing the requested arguments. */
272 for (unsigned i = 0; i < tex->num_srcs; i++) {
273 if (tex->src[i].src_type != nir_tex_src_coord)
274 continue;
275
276 nir_ssa_def *coords =
277 nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
278 nir_instr_rewrite_src(&tex->instr,
279 &tex->src[i].src,
280 nir_src_for_ssa(nir_fmul(b, coords, scale)));
281 }
282
283 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
284 }
285
286 static void
287 lower_implicit_lod(nir_builder *b, nir_tex_instr *tex)
288 {
289 assert(tex->op == nir_texop_tex || tex->op == nir_texop_txb);
290 assert(nir_tex_instr_src_index(tex, nir_tex_src_lod) < 0);
291 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
292 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
293
294 b->cursor = nir_before_instr(&tex->instr);
295
296 nir_ssa_def *lod = get_texture_lod(b, tex);
297
298 int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
299 if (bias_idx >= 0) {
300 /* If we have a bias, add it in */
301 lod = nir_fadd(b, lod, nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
302 nir_tex_instr_remove_src(tex, bias_idx);
303 }
304
305 int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
306 if (min_lod_idx >= 0) {
307 /* If we have a minimum LOD, clamp LOD accordingly */
308 lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
309 nir_tex_instr_remove_src(tex, min_lod_idx);
310 }
311
312 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
313 tex->op = nir_texop_txl;
314 }
315
316 static nir_ssa_def *
317 sample_plane(nir_builder *b, nir_tex_instr *tex, int plane,
318 const nir_lower_tex_options *options)
319 {
320 assert(tex->dest.is_ssa);
321 assert(nir_tex_instr_dest_size(tex) == 4);
322 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
323 assert(tex->op == nir_texop_tex);
324 assert(tex->coord_components == 2);
325
326 nir_tex_instr *plane_tex =
327 nir_tex_instr_create(b->shader, tex->num_srcs + 1);
328 for (unsigned i = 0; i < tex->num_srcs; i++) {
329 nir_src_copy(&plane_tex->src[i].src, &tex->src[i].src, plane_tex);
330 plane_tex->src[i].src_type = tex->src[i].src_type;
331 }
332 plane_tex->src[tex->num_srcs].src = nir_src_for_ssa(nir_imm_int(b, plane));
333 plane_tex->src[tex->num_srcs].src_type = nir_tex_src_plane;
334 plane_tex->op = nir_texop_tex;
335 plane_tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
336 plane_tex->dest_type = nir_type_float;
337 plane_tex->coord_components = 2;
338
339 plane_tex->texture_index = tex->texture_index;
340 plane_tex->sampler_index = tex->sampler_index;
341
342 nir_ssa_dest_init(&plane_tex->instr, &plane_tex->dest, 4, 32, NULL);
343
344 nir_builder_instr_insert(b, &plane_tex->instr);
345
346 /* If scaling_factor is set, return a scaled value. */
347 if (options->scale_factors[tex->texture_index])
348 return nir_fmul_imm(b, &plane_tex->dest.ssa,
349 options->scale_factors[tex->texture_index]);
350
351 return &plane_tex->dest.ssa;
352 }
353
354 static void
355 convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
356 nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v,
357 nir_ssa_def *a)
358 {
359 nir_const_value m[3][4] = {
360 { { .f32 = 1.0f }, { .f32 = 0.0f }, { .f32 = 1.59602678f }, { .f32 = 0.0f } },
361 { { .f32 = 1.0f }, { .f32 = -0.39176229f }, { .f32 = -0.81296764f }, { .f32 = 0.0f } },
362 { { .f32 = 1.0f }, { .f32 = 2.01723214f }, { .f32 = 0.0f }, { .f32 = 0.0f } },
363 };
364
365 nir_ssa_def *yuv =
366 nir_vec4(b,
367 nir_fmul(b, nir_imm_float(b, 1.16438356f),
368 nir_fadd(b, y, nir_imm_float(b, -16.0f / 255.0f))),
369 nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -128.0f / 255.0f)), 0),
370 nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -128.0f / 255.0f)), 0),
371 nir_imm_float(b, 0.0));
372
373 nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0]));
374 nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[1]));
375 nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2]));
376
377 nir_ssa_def *result = nir_vec4(b, red, green, blue, a);
378
379 nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(result));
380 }
381
382 static void
383 lower_y_uv_external(nir_builder *b, nir_tex_instr *tex,
384 const nir_lower_tex_options *options)
385 {
386 b->cursor = nir_after_instr(&tex->instr);
387
388 nir_ssa_def *y = sample_plane(b, tex, 0, options);
389 nir_ssa_def *uv = sample_plane(b, tex, 1, options);
390
391 convert_yuv_to_rgb(b, tex,
392 nir_channel(b, y, 0),
393 nir_channel(b, uv, 0),
394 nir_channel(b, uv, 1),
395 nir_imm_float(b, 1.0f));
396 }
397
398 static void
399 lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex,
400 const nir_lower_tex_options *options)
401 {
402 b->cursor = nir_after_instr(&tex->instr);
403
404 nir_ssa_def *y = sample_plane(b, tex, 0, options);
405 nir_ssa_def *u = sample_plane(b, tex, 1, options);
406 nir_ssa_def *v = sample_plane(b, tex, 2, options);
407
408 convert_yuv_to_rgb(b, tex,
409 nir_channel(b, y, 0),
410 nir_channel(b, u, 0),
411 nir_channel(b, v, 0),
412 nir_imm_float(b, 1.0f));
413 }
414
415 static void
416 lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex,
417 const nir_lower_tex_options *options)
418 {
419 b->cursor = nir_after_instr(&tex->instr);
420
421 nir_ssa_def *y = sample_plane(b, tex, 0, options);
422 nir_ssa_def *xuxv = sample_plane(b, tex, 1, options);
423
424 convert_yuv_to_rgb(b, tex,
425 nir_channel(b, y, 0),
426 nir_channel(b, xuxv, 1),
427 nir_channel(b, xuxv, 3),
428 nir_imm_float(b, 1.0f));
429 }
430
431 static void
432 lower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex,
433 const nir_lower_tex_options *options)
434 {
435 b->cursor = nir_after_instr(&tex->instr);
436
437 nir_ssa_def *y = sample_plane(b, tex, 0, options);
438 nir_ssa_def *uxvx = sample_plane(b, tex, 1, options);
439
440 convert_yuv_to_rgb(b, tex,
441 nir_channel(b, y, 1),
442 nir_channel(b, uxvx, 0),
443 nir_channel(b, uxvx, 2),
444 nir_imm_float(b, 1.0f));
445 }
446
447 static void
448 lower_ayuv_external(nir_builder *b, nir_tex_instr *tex,
449 const nir_lower_tex_options *options)
450 {
451 b->cursor = nir_after_instr(&tex->instr);
452
453 nir_ssa_def *ayuv = sample_plane(b, tex, 0, options);
454
455 convert_yuv_to_rgb(b, tex,
456 nir_channel(b, ayuv, 2),
457 nir_channel(b, ayuv, 1),
458 nir_channel(b, ayuv, 0),
459 nir_channel(b, ayuv, 3));
460 }
461
462 static void
463 lower_xyuv_external(nir_builder *b, nir_tex_instr *tex,
464 const nir_lower_tex_options *options)
465 {
466 b->cursor = nir_after_instr(&tex->instr);
467
468 nir_ssa_def *xyuv = sample_plane(b, tex, 0, options);
469
470 convert_yuv_to_rgb(b, tex,
471 nir_channel(b, xyuv, 2),
472 nir_channel(b, xyuv, 1),
473 nir_channel(b, xyuv, 0),
474 nir_imm_float(b, 1.0f));
475 }
476
477 /*
478 * Converts a nir_texop_txd instruction to nir_texop_txl with the given lod
479 * computed from the gradients.
480 */
481 static void
482 replace_gradient_with_lod(nir_builder *b, nir_ssa_def *lod, nir_tex_instr *tex)
483 {
484 assert(tex->op == nir_texop_txd);
485
486 nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx));
487 nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy));
488
489 int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
490 if (min_lod_idx >= 0) {
491 /* If we have a minimum LOD, clamp LOD accordingly */
492 lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
493 nir_tex_instr_remove_src(tex, min_lod_idx);
494 }
495
496 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
497 tex->op = nir_texop_txl;
498 }
499
500 static void
501 lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex)
502 {
503 assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
504 assert(tex->op == nir_texop_txd);
505 assert(tex->dest.is_ssa);
506
507 /* Use textureSize() to get the width and height of LOD 0 */
508 nir_ssa_def *size = get_texture_size(b, tex);
509
510 /* Cubemap texture lookups first generate a texture coordinate normalized
511 * to [-1, 1] on the appropiate face. The appropiate face is determined
512 * by which component has largest magnitude and its sign. The texture
513 * coordinate is the quotient of the remaining texture coordinates against
514 * that absolute value of the component of largest magnitude. This
515 * division requires that the computing of the derivative of the texel
516 * coordinate must use the quotient rule. The high level GLSL code is as
517 * follows:
518 *
519 * Step 1: selection
520 *
521 * vec3 abs_p, Q, dQdx, dQdy;
522 * abs_p = abs(ir->coordinate);
523 * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
524 * Q = ir->coordinate.yzx;
525 * dQdx = ir->lod_info.grad.dPdx.yzx;
526 * dQdy = ir->lod_info.grad.dPdy.yzx;
527 * }
528 * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
529 * Q = ir->coordinate.xzy;
530 * dQdx = ir->lod_info.grad.dPdx.xzy;
531 * dQdy = ir->lod_info.grad.dPdy.xzy;
532 * }
533 * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
534 * Q = ir->coordinate;
535 * dQdx = ir->lod_info.grad.dPdx;
536 * dQdy = ir->lod_info.grad.dPdy;
537 * }
538 *
539 * Step 2: use quotient rule to compute derivative. The normalized to
540 * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
541 * only concerned with the magnitudes of the derivatives whose values are
542 * not affected by the sign. We drop the sign from the computation.
543 *
544 * vec2 dx, dy;
545 * float recip;
546 *
547 * recip = 1.0 / Q.z;
548 * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
549 * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
550 *
551 * Step 3: compute LOD. At this point we have the derivatives of the
552 * texture coordinates normalized to [-1,1]. We take the LOD to be
553 * result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
554 * = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
555 * = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
556 * = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
557 * = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
558 * where L is the dimension of the cubemap. The code is:
559 *
560 * float M, result;
561 * M = max(dot(dx, dx), dot(dy, dy));
562 * L = textureSize(sampler, 0).x;
563 * result = -1.0 + 0.5 * log2(L * L * M);
564 */
565
566 /* coordinate */
567 nir_ssa_def *p =
568 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa;
569
570 /* unmodified dPdx, dPdy values */
571 nir_ssa_def *dPdx =
572 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
573 nir_ssa_def *dPdy =
574 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
575
576 nir_ssa_def *abs_p = nir_fabs(b, p);
577 nir_ssa_def *abs_p_x = nir_channel(b, abs_p, 0);
578 nir_ssa_def *abs_p_y = nir_channel(b, abs_p, 1);
579 nir_ssa_def *abs_p_z = nir_channel(b, abs_p, 2);
580
581 /* 1. compute selector */
582 nir_ssa_def *Q, *dQdx, *dQdy;
583
584 nir_ssa_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y));
585 nir_ssa_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z));
586
587 unsigned yzx[3] = { 1, 2, 0 };
588 unsigned xzy[3] = { 0, 2, 1 };
589
590 Q = nir_bcsel(b, cond_z,
591 p,
592 nir_bcsel(b, cond_y,
593 nir_swizzle(b, p, xzy, 3, false),
594 nir_swizzle(b, p, yzx, 3, false)));
595
596 dQdx = nir_bcsel(b, cond_z,
597 dPdx,
598 nir_bcsel(b, cond_y,
599 nir_swizzle(b, dPdx, xzy, 3, false),
600 nir_swizzle(b, dPdx, yzx, 3, false)));
601
602 dQdy = nir_bcsel(b, cond_z,
603 dPdy,
604 nir_bcsel(b, cond_y,
605 nir_swizzle(b, dPdy, xzy, 3, false),
606 nir_swizzle(b, dPdy, yzx, 3, false)));
607
608 /* 2. quotient rule */
609
610 /* tmp = Q.xy * recip;
611 * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
612 * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
613 */
614 nir_ssa_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2));
615
616 nir_ssa_def *Q_xy = nir_channels(b, Q, 0x3);
617 nir_ssa_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z);
618
619 nir_ssa_def *dQdx_xy = nir_channels(b, dQdx, 0x3);
620 nir_ssa_def *dQdx_z = nir_channel(b, dQdx, 2);
621 nir_ssa_def *dx =
622 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z)));
623
624 nir_ssa_def *dQdy_xy = nir_channels(b, dQdy, 0x3);
625 nir_ssa_def *dQdy_z = nir_channel(b, dQdy, 2);
626 nir_ssa_def *dy =
627 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z)));
628
629 /* M = max(dot(dx, dx), dot(dy, dy)); */
630 nir_ssa_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy));
631
632 /* size has textureSize() of LOD 0 */
633 nir_ssa_def *L = nir_channel(b, size, 0);
634
635 /* lod = -1.0 + 0.5 * log2(L * L * M); */
636 nir_ssa_def *lod =
637 nir_fadd(b,
638 nir_imm_float(b, -1.0f),
639 nir_fmul(b,
640 nir_imm_float(b, 0.5f),
641 nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M)))));
642
643 /* 3. Replace the gradient instruction with an equivalent lod instruction */
644 replace_gradient_with_lod(b, lod, tex);
645 }
646
647 static void
648 lower_gradient(nir_builder *b, nir_tex_instr *tex)
649 {
650 /* Cubes are more complicated and have their own function */
651 if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
652 lower_gradient_cube_map(b, tex);
653 return;
654 }
655
656 assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE);
657 assert(tex->op == nir_texop_txd);
658 assert(tex->dest.is_ssa);
659
660 /* Use textureSize() to get the width and height of LOD 0 */
661 unsigned component_mask;
662 switch (tex->sampler_dim) {
663 case GLSL_SAMPLER_DIM_3D:
664 component_mask = 7;
665 break;
666 case GLSL_SAMPLER_DIM_1D:
667 component_mask = 1;
668 break;
669 default:
670 component_mask = 3;
671 break;
672 }
673
674 nir_ssa_def *size =
675 nir_channels(b, get_texture_size(b, tex), component_mask);
676
677 /* Scale the gradients by width and height. Effectively, the incoming
678 * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
679 * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
680 */
681 nir_ssa_def *ddx =
682 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
683 nir_ssa_def *ddy =
684 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
685
686 nir_ssa_def *dPdx = nir_fmul(b, ddx, size);
687 nir_ssa_def *dPdy = nir_fmul(b, ddy, size);
688
689 nir_ssa_def *rho;
690 if (dPdx->num_components == 1) {
691 rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy));
692 } else {
693 rho = nir_fmax(b,
694 nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)),
695 nir_fsqrt(b, nir_fdot(b, dPdy, dPdy)));
696 }
697
698 /* lod = log2(rho). We're ignoring GL state biases for now. */
699 nir_ssa_def *lod = nir_flog2(b, rho);
700
701 /* Replace the gradient instruction with an equivalent lod instruction */
702 replace_gradient_with_lod(b, lod, tex);
703 }
704
705 static void
706 saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
707 {
708 b->cursor = nir_before_instr(&tex->instr);
709
710 /* Walk through the sources saturating the requested arguments. */
711 for (unsigned i = 0; i < tex->num_srcs; i++) {
712 if (tex->src[i].src_type != nir_tex_src_coord)
713 continue;
714
715 nir_ssa_def *src =
716 nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
717
718 /* split src into components: */
719 nir_ssa_def *comp[4];
720
721 assume(tex->coord_components >= 1);
722
723 for (unsigned j = 0; j < tex->coord_components; j++)
724 comp[j] = nir_channel(b, src, j);
725
726 /* clamp requested components, array index does not get clamped: */
727 unsigned ncomp = tex->coord_components;
728 if (tex->is_array)
729 ncomp--;
730
731 for (unsigned j = 0; j < ncomp; j++) {
732 if ((1 << j) & sat_mask) {
733 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
734 /* non-normalized texture coords, so clamp to texture
735 * size rather than [0.0, 1.0]
736 */
737 nir_ssa_def *txs = get_texture_size(b, tex);
738 comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0));
739 comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j));
740 } else {
741 comp[j] = nir_fsat(b, comp[j]);
742 }
743 }
744 }
745
746 /* and move the result back into a single vecN: */
747 src = nir_vec(b, comp, tex->coord_components);
748
749 nir_instr_rewrite_src(&tex->instr,
750 &tex->src[i].src,
751 nir_src_for_ssa(src));
752 }
753 }
754
755 static nir_ssa_def *
756 get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
757 {
758 nir_const_value v[4];
759
760 memset(&v, 0, sizeof(v));
761
762 if (swizzle_val == 4) {
763 v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 0;
764 } else {
765 assert(swizzle_val == 5);
766 if (type == nir_type_float)
767 v[0].f32 = v[1].f32 = v[2].f32 = v[3].f32 = 1.0;
768 else
769 v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 1;
770 }
771
772 return nir_build_imm(b, 4, 32, v);
773 }
774
775 static void
776 swizzle_tg4_broadcom(nir_builder *b, nir_tex_instr *tex)
777 {
778 assert(tex->dest.is_ssa);
779
780 b->cursor = nir_after_instr(&tex->instr);
781
782 assert(nir_tex_instr_dest_size(tex) == 4);
783 unsigned swiz[4] = { 2, 3, 1, 0 };
784 nir_ssa_def *swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4, false);
785
786 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled),
787 swizzled->parent_instr);
788 }
789
790 static void
791 swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
792 {
793 assert(tex->dest.is_ssa);
794
795 b->cursor = nir_after_instr(&tex->instr);
796
797 nir_ssa_def *swizzled;
798 if (tex->op == nir_texop_tg4) {
799 if (swizzle[tex->component] < 4) {
800 /* This one's easy */
801 tex->component = swizzle[tex->component];
802 return;
803 } else {
804 swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]);
805 }
806 } else {
807 assert(nir_tex_instr_dest_size(tex) == 4);
808 if (swizzle[0] < 4 && swizzle[1] < 4 &&
809 swizzle[2] < 4 && swizzle[3] < 4) {
810 unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] };
811 /* We have no 0s or 1s, just emit a swizzling MOV */
812 swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4, false);
813 } else {
814 nir_ssa_def *srcs[4];
815 for (unsigned i = 0; i < 4; i++) {
816 if (swizzle[i] < 4) {
817 srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]);
818 } else {
819 srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]);
820 }
821 }
822 swizzled = nir_vec(b, srcs, 4);
823 }
824 }
825
826 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled),
827 swizzled->parent_instr);
828 }
829
830 static void
831 linearize_srgb_result(nir_builder *b, nir_tex_instr *tex)
832 {
833 assert(tex->dest.is_ssa);
834 assert(nir_tex_instr_dest_size(tex) == 4);
835 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
836
837 b->cursor = nir_after_instr(&tex->instr);
838
839 nir_ssa_def *rgb =
840 nir_format_srgb_to_linear(b, nir_channels(b, &tex->dest.ssa, 0x7));
841
842 /* alpha is untouched: */
843 nir_ssa_def *result = nir_vec4(b,
844 nir_channel(b, rgb, 0),
845 nir_channel(b, rgb, 1),
846 nir_channel(b, rgb, 2),
847 nir_channel(b, &tex->dest.ssa, 3));
848
849 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(result),
850 result->parent_instr);
851 }
852
853 /**
854 * Lowers texture instructions from giving a vec4 result to a vec2 of f16,
855 * i16, or u16, or a single unorm4x8 value.
856 *
857 * Note that we don't change the destination num_components, because
858 * nir_tex_instr_dest_size() will still return 4. The driver is just expected
859 * to not store the other channels, given that nothing at the NIR level will
860 * read them.
861 */
862 static void
863 lower_tex_packing(nir_builder *b, nir_tex_instr *tex,
864 const nir_lower_tex_options *options)
865 {
866 nir_ssa_def *color = &tex->dest.ssa;
867
868 b->cursor = nir_after_instr(&tex->instr);
869
870 switch (options->lower_tex_packing[tex->sampler_index]) {
871 case nir_lower_tex_packing_none:
872 return;
873
874 case nir_lower_tex_packing_16: {
875 static const unsigned bits[4] = {16, 16, 16, 16};
876
877 switch (nir_alu_type_get_base_type(tex->dest_type)) {
878 case nir_type_float:
879 if (tex->is_shadow && tex->is_new_style_shadow) {
880 color = nir_unpack_half_2x16_split_x(b, nir_channel(b, color, 0));
881 } else {
882 nir_ssa_def *rg = nir_channel(b, color, 0);
883 nir_ssa_def *ba = nir_channel(b, color, 1);
884 color = nir_vec4(b,
885 nir_unpack_half_2x16_split_x(b, rg),
886 nir_unpack_half_2x16_split_y(b, rg),
887 nir_unpack_half_2x16_split_x(b, ba),
888 nir_unpack_half_2x16_split_y(b, ba));
889 }
890 break;
891
892 case nir_type_int:
893 color = nir_format_unpack_sint(b, color, bits, 4);
894 break;
895
896 case nir_type_uint:
897 color = nir_format_unpack_uint(b, color, bits, 4);
898 break;
899
900 default:
901 unreachable("unknown base type");
902 }
903 break;
904 }
905
906 case nir_lower_tex_packing_8:
907 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
908 color = nir_unpack_unorm_4x8(b, nir_channel(b, color, 0));
909 break;
910 }
911
912 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(color),
913 color->parent_instr);
914 }
915
916 static bool
917 sampler_index_lt(nir_tex_instr *tex, unsigned max)
918 {
919 assert(nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref) == -1);
920
921 unsigned sampler_index = tex->sampler_index;
922
923 int sampler_offset_idx =
924 nir_tex_instr_src_index(tex, nir_tex_src_sampler_offset);
925 if (sampler_offset_idx >= 0) {
926 if (!nir_src_is_const(tex->src[sampler_offset_idx].src))
927 return false;
928
929 sampler_index += nir_src_as_uint(tex->src[sampler_offset_idx].src);
930 }
931
932 return sampler_index < max;
933 }
934
935 static bool
936 lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex)
937 {
938 assert(tex->op == nir_texop_tg4);
939 assert(nir_tex_instr_has_explicit_tg4_offsets(tex));
940 assert(nir_tex_instr_src_index(tex, nir_tex_src_offset) == -1);
941
942 b->cursor = nir_after_instr(&tex->instr);
943
944 nir_ssa_def *dest[4];
945 for (unsigned i = 0; i < 4; ++i) {
946 nir_tex_instr *tex_copy = nir_tex_instr_create(b->shader, tex->num_srcs + 1);
947 tex_copy->op = tex->op;
948 tex_copy->coord_components = tex->coord_components;
949 tex_copy->sampler_dim = tex->sampler_dim;
950 tex_copy->is_array = tex->is_array;
951 tex_copy->is_shadow = tex->is_shadow;
952 tex_copy->is_new_style_shadow = tex->is_new_style_shadow;
953 tex_copy->component = tex->component;
954 tex_copy->dest_type = tex->dest_type;
955
956 for (unsigned j = 0; j < tex->num_srcs; ++j) {
957 nir_src_copy(&tex_copy->src[j].src, &tex->src[j].src, tex_copy);
958 tex_copy->src[j].src_type = tex->src[j].src_type;
959 }
960
961 nir_tex_src src;
962 src.src = nir_src_for_ssa(nir_imm_ivec2(b, tex->tg4_offsets[i][0],
963 tex->tg4_offsets[i][1]));
964 src.src_type = nir_tex_src_offset;
965 tex_copy->src[tex_copy->num_srcs - 1] = src;
966
967 nir_ssa_dest_init(&tex_copy->instr, &tex_copy->dest,
968 nir_tex_instr_dest_size(tex), 32, NULL);
969
970 nir_builder_instr_insert(b, &tex_copy->instr);
971
972 dest[i] = nir_channel(b, &tex_copy->dest.ssa, 3);
973 }
974
975 nir_ssa_def *res = nir_vec4(b, dest[0], dest[1], dest[2], dest[3]);
976 nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(res));
977 nir_instr_remove(&tex->instr);
978
979 return true;
980 }
981
982 static bool
983 nir_lower_tex_block(nir_block *block, nir_builder *b,
984 const nir_lower_tex_options *options)
985 {
986 bool progress = false;
987
988 nir_foreach_instr_safe(instr, block) {
989 if (instr->type != nir_instr_type_tex)
990 continue;
991
992 nir_tex_instr *tex = nir_instr_as_tex(instr);
993 bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim));
994
995 /* mask of src coords to saturate (clamp): */
996 unsigned sat_mask = 0;
997
998 if ((1 << tex->sampler_index) & options->saturate_r)
999 sat_mask |= (1 << 2); /* .z */
1000 if ((1 << tex->sampler_index) & options->saturate_t)
1001 sat_mask |= (1 << 1); /* .y */
1002 if ((1 << tex->sampler_index) & options->saturate_s)
1003 sat_mask |= (1 << 0); /* .x */
1004
1005 /* If we are clamping any coords, we must lower projector first
1006 * as clamping happens *after* projection:
1007 */
1008 if (lower_txp || sat_mask) {
1009 project_src(b, tex);
1010 progress = true;
1011 }
1012
1013 if ((tex->op == nir_texop_txf && options->lower_txf_offset) ||
1014 (sat_mask && nir_tex_instr_src_index(tex, nir_tex_src_coord) >= 0) ||
1015 (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT &&
1016 options->lower_rect_offset)) {
1017 progress = lower_offset(b, tex) || progress;
1018 }
1019
1020 if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect) {
1021 lower_rect(b, tex);
1022 progress = true;
1023 }
1024
1025 if ((1 << tex->texture_index) & options->lower_y_uv_external) {
1026 lower_y_uv_external(b, tex, options);
1027 progress = true;
1028 }
1029
1030 if ((1 << tex->texture_index) & options->lower_y_u_v_external) {
1031 lower_y_u_v_external(b, tex, options);
1032 progress = true;
1033 }
1034
1035 if ((1 << tex->texture_index) & options->lower_yx_xuxv_external) {
1036 lower_yx_xuxv_external(b, tex, options);
1037 progress = true;
1038 }
1039
1040 if ((1 << tex->texture_index) & options->lower_xy_uxvx_external) {
1041 lower_xy_uxvx_external(b, tex, options);
1042 progress = true;
1043 }
1044
1045 if ((1 << tex->texture_index) & options->lower_ayuv_external) {
1046 lower_ayuv_external(b, tex, options);
1047 progress = true;
1048 }
1049
1050 if ((1 << tex->texture_index) & options->lower_xyuv_external) {
1051 lower_xyuv_external(b, tex, options);
1052 progress = true;
1053 }
1054
1055 if (sat_mask) {
1056 saturate_src(b, tex, sat_mask);
1057 progress = true;
1058 }
1059
1060 if (tex->op == nir_texop_tg4 && options->lower_tg4_broadcom_swizzle) {
1061 swizzle_tg4_broadcom(b, tex);
1062 progress = true;
1063 }
1064
1065 if (((1 << tex->texture_index) & options->swizzle_result) &&
1066 !nir_tex_instr_is_query(tex) &&
1067 !(tex->is_shadow && tex->is_new_style_shadow)) {
1068 swizzle_result(b, tex, options->swizzles[tex->texture_index]);
1069 progress = true;
1070 }
1071
1072 /* should be after swizzle so we know which channels are rgb: */
1073 if (((1 << tex->texture_index) & options->lower_srgb) &&
1074 !nir_tex_instr_is_query(tex) && !tex->is_shadow) {
1075 linearize_srgb_result(b, tex);
1076 progress = true;
1077 }
1078
1079 const bool has_min_lod =
1080 nir_tex_instr_src_index(tex, nir_tex_src_min_lod) >= 0;
1081 const bool has_offset =
1082 nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0;
1083
1084 if (tex->op == nir_texop_txb && tex->is_shadow && has_min_lod &&
1085 options->lower_txb_shadow_clamp) {
1086 lower_implicit_lod(b, tex);
1087 progress = true;
1088 }
1089
1090 if (options->lower_tex_packing[tex->sampler_index] !=
1091 nir_lower_tex_packing_none &&
1092 tex->op != nir_texop_txs &&
1093 tex->op != nir_texop_query_levels) {
1094 lower_tex_packing(b, tex, options);
1095 progress = true;
1096 }
1097
1098 if (tex->op == nir_texop_txd &&
1099 (options->lower_txd ||
1100 (options->lower_txd_shadow && tex->is_shadow) ||
1101 (options->lower_txd_shadow_clamp && tex->is_shadow && has_min_lod) ||
1102 (options->lower_txd_offset_clamp && has_offset && has_min_lod) ||
1103 (options->lower_txd_clamp_if_sampler_index_not_lt_16 &&
1104 has_min_lod && !sampler_index_lt(tex, 16)) ||
1105 (options->lower_txd_cube_map &&
1106 tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) ||
1107 (options->lower_txd_3d &&
1108 tex->sampler_dim == GLSL_SAMPLER_DIM_3D))) {
1109 lower_gradient(b, tex);
1110 progress = true;
1111 continue;
1112 }
1113
1114 bool shader_supports_implicit_lod =
1115 b->shader->info.stage == MESA_SHADER_FRAGMENT ||
1116 (b->shader->info.stage == MESA_SHADER_COMPUTE &&
1117 b->shader->info.cs.derivative_group != DERIVATIVE_GROUP_NONE);
1118
1119 /* TXF, TXS and TXL require a LOD but not everything we implement using those
1120 * three opcodes provides one. Provide a default LOD of 0.
1121 */
1122 if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) &&
1123 (tex->op == nir_texop_txf || tex->op == nir_texop_txs ||
1124 tex->op == nir_texop_txl || tex->op == nir_texop_query_levels ||
1125 (tex->op == nir_texop_tex && !shader_supports_implicit_lod))) {
1126 b->cursor = nir_before_instr(&tex->instr);
1127 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(nir_imm_int(b, 0)));
1128 progress = true;
1129 continue;
1130 }
1131
1132 /* has to happen after all the other lowerings as the original tg4 gets
1133 * replaced by 4 tg4 instructions.
1134 */
1135 if (tex->op == nir_texop_tg4 &&
1136 nir_tex_instr_has_explicit_tg4_offsets(tex) &&
1137 options->lower_tg4_offsets) {
1138 progress |= lower_tg4_offsets(b, tex);
1139 continue;
1140 }
1141 }
1142
1143 return progress;
1144 }
1145
1146 static bool
1147 nir_lower_tex_impl(nir_function_impl *impl,
1148 const nir_lower_tex_options *options)
1149 {
1150 bool progress = false;
1151 nir_builder builder;
1152 nir_builder_init(&builder, impl);
1153
1154 nir_foreach_block(block, impl) {
1155 progress |= nir_lower_tex_block(block, &builder, options);
1156 }
1157
1158 nir_metadata_preserve(impl, nir_metadata_block_index |
1159 nir_metadata_dominance);
1160 return progress;
1161 }
1162
1163 bool
1164 nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
1165 {
1166 bool progress = false;
1167
1168 nir_foreach_function(function, shader) {
1169 if (function->impl)
1170 progress |= nir_lower_tex_impl(function->impl, options);
1171 }
1172
1173 return progress;
1174 }