nir/lower_tex: Update ->sampler_dim value before calling get_texture_size()
[mesa.git] / src / compiler / nir / nir_lower_tex.c
1 /*
2 * Copyright © 2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /*
25 * This lowering pass supports (as configured via nir_lower_tex_options)
26 * various texture related conversions:
27 * + texture projector lowering: converts the coordinate division for
28 * texture projection to be done in ALU instructions instead of
29 * asking the texture operation to do so.
30 * + lowering RECT: converts the un-normalized RECT texture coordinates
31 * to normalized coordinates with txs plus ALU instructions
32 * + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
33 * inserts instructions to clamp specified coordinates to [0.0, 1.0].
34 * Note that this automatically triggers texture projector lowering if
35 * needed, since clamping must happen after projector lowering.
36 */
37
38 #include "nir.h"
39 #include "nir_builder.h"
40 #include "nir_format_convert.h"
41
42 static bool
43 project_src(nir_builder *b, nir_tex_instr *tex)
44 {
45 /* Find the projector in the srcs list, if present. */
46 int proj_index = nir_tex_instr_src_index(tex, nir_tex_src_projector);
47 if (proj_index < 0)
48 return false;
49
50 b->cursor = nir_before_instr(&tex->instr);
51
52 nir_ssa_def *inv_proj =
53 nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
54
55 /* Walk through the sources projecting the arguments. */
56 for (unsigned i = 0; i < tex->num_srcs; i++) {
57 switch (tex->src[i].src_type) {
58 case nir_tex_src_coord:
59 case nir_tex_src_comparator:
60 break;
61 default:
62 continue;
63 }
64 nir_ssa_def *unprojected =
65 nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
66 nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
67
68 /* Array indices don't get projected, so make an new vector with the
69 * coordinate's array index untouched.
70 */
71 if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
72 switch (tex->coord_components) {
73 case 4:
74 projected = nir_vec4(b,
75 nir_channel(b, projected, 0),
76 nir_channel(b, projected, 1),
77 nir_channel(b, projected, 2),
78 nir_channel(b, unprojected, 3));
79 break;
80 case 3:
81 projected = nir_vec3(b,
82 nir_channel(b, projected, 0),
83 nir_channel(b, projected, 1),
84 nir_channel(b, unprojected, 2));
85 break;
86 case 2:
87 projected = nir_vec2(b,
88 nir_channel(b, projected, 0),
89 nir_channel(b, unprojected, 1));
90 break;
91 default:
92 unreachable("bad texture coord count for array");
93 break;
94 }
95 }
96
97 nir_instr_rewrite_src(&tex->instr,
98 &tex->src[i].src,
99 nir_src_for_ssa(projected));
100 }
101
102 nir_tex_instr_remove_src(tex, proj_index);
103 return true;
104 }
105
106 static nir_ssa_def *
107 get_texture_size(nir_builder *b, nir_tex_instr *tex)
108 {
109 b->cursor = nir_before_instr(&tex->instr);
110
111 nir_tex_instr *txs;
112
113 unsigned num_srcs = 1; /* One for the LOD */
114 for (unsigned i = 0; i < tex->num_srcs; i++) {
115 if (tex->src[i].src_type == nir_tex_src_texture_deref ||
116 tex->src[i].src_type == nir_tex_src_sampler_deref ||
117 tex->src[i].src_type == nir_tex_src_texture_offset ||
118 tex->src[i].src_type == nir_tex_src_sampler_offset ||
119 tex->src[i].src_type == nir_tex_src_texture_handle ||
120 tex->src[i].src_type == nir_tex_src_sampler_handle)
121 num_srcs++;
122 }
123
124 txs = nir_tex_instr_create(b->shader, num_srcs);
125 txs->op = nir_texop_txs;
126 txs->sampler_dim = tex->sampler_dim;
127 txs->is_array = tex->is_array;
128 txs->is_shadow = tex->is_shadow;
129 txs->is_new_style_shadow = tex->is_new_style_shadow;
130 txs->texture_index = tex->texture_index;
131 txs->sampler_index = tex->sampler_index;
132 txs->dest_type = nir_type_int;
133
134 unsigned idx = 0;
135 for (unsigned i = 0; i < tex->num_srcs; i++) {
136 if (tex->src[i].src_type == nir_tex_src_texture_deref ||
137 tex->src[i].src_type == nir_tex_src_sampler_deref ||
138 tex->src[i].src_type == nir_tex_src_texture_offset ||
139 tex->src[i].src_type == nir_tex_src_sampler_offset ||
140 tex->src[i].src_type == nir_tex_src_texture_handle ||
141 tex->src[i].src_type == nir_tex_src_sampler_handle) {
142 nir_src_copy(&txs->src[idx].src, &tex->src[i].src, txs);
143 txs->src[idx].src_type = tex->src[i].src_type;
144 idx++;
145 }
146 }
147 /* Add in an LOD because some back-ends require it */
148 txs->src[idx].src = nir_src_for_ssa(nir_imm_int(b, 0));
149 txs->src[idx].src_type = nir_tex_src_lod;
150
151 nir_ssa_dest_init(&txs->instr, &txs->dest,
152 nir_tex_instr_dest_size(txs), 32, NULL);
153 nir_builder_instr_insert(b, &txs->instr);
154
155 return nir_i2f32(b, &txs->dest.ssa);
156 }
157
158 static nir_ssa_def *
159 get_texture_lod(nir_builder *b, nir_tex_instr *tex)
160 {
161 b->cursor = nir_before_instr(&tex->instr);
162
163 nir_tex_instr *tql;
164
165 unsigned num_srcs = 0;
166 for (unsigned i = 0; i < tex->num_srcs; i++) {
167 if (tex->src[i].src_type == nir_tex_src_coord ||
168 tex->src[i].src_type == nir_tex_src_texture_deref ||
169 tex->src[i].src_type == nir_tex_src_sampler_deref ||
170 tex->src[i].src_type == nir_tex_src_texture_offset ||
171 tex->src[i].src_type == nir_tex_src_sampler_offset ||
172 tex->src[i].src_type == nir_tex_src_texture_handle ||
173 tex->src[i].src_type == nir_tex_src_sampler_handle)
174 num_srcs++;
175 }
176
177 tql = nir_tex_instr_create(b->shader, num_srcs);
178 tql->op = nir_texop_lod;
179 tql->coord_components = tex->coord_components;
180 tql->sampler_dim = tex->sampler_dim;
181 tql->is_array = tex->is_array;
182 tql->is_shadow = tex->is_shadow;
183 tql->is_new_style_shadow = tex->is_new_style_shadow;
184 tql->texture_index = tex->texture_index;
185 tql->sampler_index = tex->sampler_index;
186 tql->dest_type = nir_type_float;
187
188 unsigned idx = 0;
189 for (unsigned i = 0; i < tex->num_srcs; i++) {
190 if (tex->src[i].src_type == nir_tex_src_coord ||
191 tex->src[i].src_type == nir_tex_src_texture_deref ||
192 tex->src[i].src_type == nir_tex_src_sampler_deref ||
193 tex->src[i].src_type == nir_tex_src_texture_offset ||
194 tex->src[i].src_type == nir_tex_src_sampler_offset ||
195 tex->src[i].src_type == nir_tex_src_texture_handle ||
196 tex->src[i].src_type == nir_tex_src_sampler_handle) {
197 nir_src_copy(&tql->src[idx].src, &tex->src[i].src, tql);
198 tql->src[idx].src_type = tex->src[i].src_type;
199 idx++;
200 }
201 }
202
203 nir_ssa_dest_init(&tql->instr, &tql->dest, 2, 32, NULL);
204 nir_builder_instr_insert(b, &tql->instr);
205
206 /* The LOD is the y component of the result */
207 return nir_channel(b, &tql->dest.ssa, 1);
208 }
209
210 static bool
211 lower_offset(nir_builder *b, nir_tex_instr *tex)
212 {
213 int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset);
214 if (offset_index < 0)
215 return false;
216
217 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
218 assert(coord_index >= 0);
219
220 assert(tex->src[offset_index].src.is_ssa);
221 assert(tex->src[coord_index].src.is_ssa);
222 nir_ssa_def *offset = tex->src[offset_index].src.ssa;
223 nir_ssa_def *coord = tex->src[coord_index].src.ssa;
224
225 b->cursor = nir_before_instr(&tex->instr);
226
227 nir_ssa_def *offset_coord;
228 if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) {
229 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
230 offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset));
231 } else {
232 nir_ssa_def *txs = get_texture_size(b, tex);
233 nir_ssa_def *scale = nir_frcp(b, txs);
234
235 offset_coord = nir_fadd(b, coord,
236 nir_fmul(b,
237 nir_i2f32(b, offset),
238 scale));
239 }
240 } else {
241 offset_coord = nir_iadd(b, coord, offset);
242 }
243
244 if (tex->is_array) {
245 /* The offset is not applied to the array index */
246 if (tex->coord_components == 2) {
247 offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0),
248 nir_channel(b, coord, 1));
249 } else if (tex->coord_components == 3) {
250 offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0),
251 nir_channel(b, offset_coord, 1),
252 nir_channel(b, coord, 2));
253 } else {
254 unreachable("Invalid number of components");
255 }
256 }
257
258 nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
259 nir_src_for_ssa(offset_coord));
260
261 nir_tex_instr_remove_src(tex, offset_index);
262
263 return true;
264 }
265
266 static void
267 lower_rect(nir_builder *b, nir_tex_instr *tex)
268 {
269 /* Set the sampler_dim to 2D here so that get_texture_size picks up the
270 * right dimensionality.
271 */
272 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
273
274 nir_ssa_def *txs = get_texture_size(b, tex);
275 nir_ssa_def *scale = nir_frcp(b, txs);
276
277 /* Walk through the sources normalizing the requested arguments. */
278 for (unsigned i = 0; i < tex->num_srcs; i++) {
279 if (tex->src[i].src_type != nir_tex_src_coord)
280 continue;
281
282 nir_ssa_def *coords =
283 nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
284 nir_instr_rewrite_src(&tex->instr,
285 &tex->src[i].src,
286 nir_src_for_ssa(nir_fmul(b, coords, scale)));
287 }
288 }
289
290 static void
291 lower_implicit_lod(nir_builder *b, nir_tex_instr *tex)
292 {
293 assert(tex->op == nir_texop_tex || tex->op == nir_texop_txb);
294 assert(nir_tex_instr_src_index(tex, nir_tex_src_lod) < 0);
295 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
296 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
297
298 b->cursor = nir_before_instr(&tex->instr);
299
300 nir_ssa_def *lod = get_texture_lod(b, tex);
301
302 int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
303 if (bias_idx >= 0) {
304 /* If we have a bias, add it in */
305 lod = nir_fadd(b, lod, nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
306 nir_tex_instr_remove_src(tex, bias_idx);
307 }
308
309 int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
310 if (min_lod_idx >= 0) {
311 /* If we have a minimum LOD, clamp LOD accordingly */
312 lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
313 nir_tex_instr_remove_src(tex, min_lod_idx);
314 }
315
316 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
317 tex->op = nir_texop_txl;
318 }
319
320 static nir_ssa_def *
321 sample_plane(nir_builder *b, nir_tex_instr *tex, int plane,
322 const nir_lower_tex_options *options)
323 {
324 assert(tex->dest.is_ssa);
325 assert(nir_tex_instr_dest_size(tex) == 4);
326 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
327 assert(tex->op == nir_texop_tex);
328 assert(tex->coord_components == 2);
329
330 nir_tex_instr *plane_tex =
331 nir_tex_instr_create(b->shader, tex->num_srcs + 1);
332 for (unsigned i = 0; i < tex->num_srcs; i++) {
333 nir_src_copy(&plane_tex->src[i].src, &tex->src[i].src, plane_tex);
334 plane_tex->src[i].src_type = tex->src[i].src_type;
335 }
336 plane_tex->src[tex->num_srcs].src = nir_src_for_ssa(nir_imm_int(b, plane));
337 plane_tex->src[tex->num_srcs].src_type = nir_tex_src_plane;
338 plane_tex->op = nir_texop_tex;
339 plane_tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
340 plane_tex->dest_type = nir_type_float;
341 plane_tex->coord_components = 2;
342
343 plane_tex->texture_index = tex->texture_index;
344 plane_tex->sampler_index = tex->sampler_index;
345
346 nir_ssa_dest_init(&plane_tex->instr, &plane_tex->dest, 4, 32, NULL);
347
348 nir_builder_instr_insert(b, &plane_tex->instr);
349
350 /* If scaling_factor is set, return a scaled value. */
351 if (options->scale_factors[tex->texture_index])
352 return nir_fmul_imm(b, &plane_tex->dest.ssa,
353 options->scale_factors[tex->texture_index]);
354
355 return &plane_tex->dest.ssa;
356 }
357
358 static void
359 convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
360 nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v,
361 nir_ssa_def *a)
362 {
363 nir_const_value m[3][4] = {
364 { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 0.0f } },
365 { { .f32 = 0.0f }, { .f32 = -0.39176229f }, { .f32 = 2.01723214f }, { .f32 = 0.0f } },
366 { { .f32 = 1.59602678f }, { .f32 = -0.81296764f }, { .f32 = 0.0f }, { .f32 = 0.0f } },
367 };
368
369 nir_ssa_def *offset =
370 nir_vec4(b,
371 nir_imm_float(b, -0.874202214f),
372 nir_imm_float(b, 0.531667820f),
373 nir_imm_float(b, -1.085630787f),
374 a);
375
376 nir_ssa_def *result =
377 nir_ffma(b, y, nir_build_imm(b, 4, 32, m[0]),
378 nir_ffma(b, u, nir_build_imm(b, 4, 32, m[1]),
379 nir_ffma(b, v, nir_build_imm(b, 4, 32, m[2]),
380 offset)));
381
382 nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(result));
383 }
384
385 static void
386 lower_y_uv_external(nir_builder *b, nir_tex_instr *tex,
387 const nir_lower_tex_options *options)
388 {
389 b->cursor = nir_after_instr(&tex->instr);
390
391 nir_ssa_def *y = sample_plane(b, tex, 0, options);
392 nir_ssa_def *uv = sample_plane(b, tex, 1, options);
393
394 convert_yuv_to_rgb(b, tex,
395 nir_channel(b, y, 0),
396 nir_channel(b, uv, 0),
397 nir_channel(b, uv, 1),
398 nir_imm_float(b, 1.0f));
399 }
400
401 static void
402 lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex,
403 const nir_lower_tex_options *options)
404 {
405 b->cursor = nir_after_instr(&tex->instr);
406
407 nir_ssa_def *y = sample_plane(b, tex, 0, options);
408 nir_ssa_def *u = sample_plane(b, tex, 1, options);
409 nir_ssa_def *v = sample_plane(b, tex, 2, options);
410
411 convert_yuv_to_rgb(b, tex,
412 nir_channel(b, y, 0),
413 nir_channel(b, u, 0),
414 nir_channel(b, v, 0),
415 nir_imm_float(b, 1.0f));
416 }
417
418 static void
419 lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex,
420 const nir_lower_tex_options *options)
421 {
422 b->cursor = nir_after_instr(&tex->instr);
423
424 nir_ssa_def *y = sample_plane(b, tex, 0, options);
425 nir_ssa_def *xuxv = sample_plane(b, tex, 1, options);
426
427 convert_yuv_to_rgb(b, tex,
428 nir_channel(b, y, 0),
429 nir_channel(b, xuxv, 1),
430 nir_channel(b, xuxv, 3),
431 nir_imm_float(b, 1.0f));
432 }
433
434 static void
435 lower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex,
436 const nir_lower_tex_options *options)
437 {
438 b->cursor = nir_after_instr(&tex->instr);
439
440 nir_ssa_def *y = sample_plane(b, tex, 0, options);
441 nir_ssa_def *uxvx = sample_plane(b, tex, 1, options);
442
443 convert_yuv_to_rgb(b, tex,
444 nir_channel(b, y, 1),
445 nir_channel(b, uxvx, 0),
446 nir_channel(b, uxvx, 2),
447 nir_imm_float(b, 1.0f));
448 }
449
450 static void
451 lower_ayuv_external(nir_builder *b, nir_tex_instr *tex,
452 const nir_lower_tex_options *options)
453 {
454 b->cursor = nir_after_instr(&tex->instr);
455
456 nir_ssa_def *ayuv = sample_plane(b, tex, 0, options);
457
458 convert_yuv_to_rgb(b, tex,
459 nir_channel(b, ayuv, 2),
460 nir_channel(b, ayuv, 1),
461 nir_channel(b, ayuv, 0),
462 nir_channel(b, ayuv, 3));
463 }
464
465 static void
466 lower_xyuv_external(nir_builder *b, nir_tex_instr *tex,
467 const nir_lower_tex_options *options)
468 {
469 b->cursor = nir_after_instr(&tex->instr);
470
471 nir_ssa_def *xyuv = sample_plane(b, tex, 0, options);
472
473 convert_yuv_to_rgb(b, tex,
474 nir_channel(b, xyuv, 2),
475 nir_channel(b, xyuv, 1),
476 nir_channel(b, xyuv, 0),
477 nir_imm_float(b, 1.0f));
478 }
479
480 /*
481 * Converts a nir_texop_txd instruction to nir_texop_txl with the given lod
482 * computed from the gradients.
483 */
484 static void
485 replace_gradient_with_lod(nir_builder *b, nir_ssa_def *lod, nir_tex_instr *tex)
486 {
487 assert(tex->op == nir_texop_txd);
488
489 nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx));
490 nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy));
491
492 int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
493 if (min_lod_idx >= 0) {
494 /* If we have a minimum LOD, clamp LOD accordingly */
495 lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
496 nir_tex_instr_remove_src(tex, min_lod_idx);
497 }
498
499 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
500 tex->op = nir_texop_txl;
501 }
502
503 static void
504 lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex)
505 {
506 assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
507 assert(tex->op == nir_texop_txd);
508 assert(tex->dest.is_ssa);
509
510 /* Use textureSize() to get the width and height of LOD 0 */
511 nir_ssa_def *size = get_texture_size(b, tex);
512
513 /* Cubemap texture lookups first generate a texture coordinate normalized
514 * to [-1, 1] on the appropiate face. The appropiate face is determined
515 * by which component has largest magnitude and its sign. The texture
516 * coordinate is the quotient of the remaining texture coordinates against
517 * that absolute value of the component of largest magnitude. This
518 * division requires that the computing of the derivative of the texel
519 * coordinate must use the quotient rule. The high level GLSL code is as
520 * follows:
521 *
522 * Step 1: selection
523 *
524 * vec3 abs_p, Q, dQdx, dQdy;
525 * abs_p = abs(ir->coordinate);
526 * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
527 * Q = ir->coordinate.yzx;
528 * dQdx = ir->lod_info.grad.dPdx.yzx;
529 * dQdy = ir->lod_info.grad.dPdy.yzx;
530 * }
531 * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
532 * Q = ir->coordinate.xzy;
533 * dQdx = ir->lod_info.grad.dPdx.xzy;
534 * dQdy = ir->lod_info.grad.dPdy.xzy;
535 * }
536 * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
537 * Q = ir->coordinate;
538 * dQdx = ir->lod_info.grad.dPdx;
539 * dQdy = ir->lod_info.grad.dPdy;
540 * }
541 *
542 * Step 2: use quotient rule to compute derivative. The normalized to
543 * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
544 * only concerned with the magnitudes of the derivatives whose values are
545 * not affected by the sign. We drop the sign from the computation.
546 *
547 * vec2 dx, dy;
548 * float recip;
549 *
550 * recip = 1.0 / Q.z;
551 * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
552 * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
553 *
554 * Step 3: compute LOD. At this point we have the derivatives of the
555 * texture coordinates normalized to [-1,1]. We take the LOD to be
556 * result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
557 * = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
558 * = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
559 * = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
560 * = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
561 * where L is the dimension of the cubemap. The code is:
562 *
563 * float M, result;
564 * M = max(dot(dx, dx), dot(dy, dy));
565 * L = textureSize(sampler, 0).x;
566 * result = -1.0 + 0.5 * log2(L * L * M);
567 */
568
569 /* coordinate */
570 nir_ssa_def *p =
571 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa;
572
573 /* unmodified dPdx, dPdy values */
574 nir_ssa_def *dPdx =
575 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
576 nir_ssa_def *dPdy =
577 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
578
579 nir_ssa_def *abs_p = nir_fabs(b, p);
580 nir_ssa_def *abs_p_x = nir_channel(b, abs_p, 0);
581 nir_ssa_def *abs_p_y = nir_channel(b, abs_p, 1);
582 nir_ssa_def *abs_p_z = nir_channel(b, abs_p, 2);
583
584 /* 1. compute selector */
585 nir_ssa_def *Q, *dQdx, *dQdy;
586
587 nir_ssa_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y));
588 nir_ssa_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z));
589
590 unsigned yzx[3] = { 1, 2, 0 };
591 unsigned xzy[3] = { 0, 2, 1 };
592
593 Q = nir_bcsel(b, cond_z,
594 p,
595 nir_bcsel(b, cond_y,
596 nir_swizzle(b, p, xzy, 3),
597 nir_swizzle(b, p, yzx, 3)));
598
599 dQdx = nir_bcsel(b, cond_z,
600 dPdx,
601 nir_bcsel(b, cond_y,
602 nir_swizzle(b, dPdx, xzy, 3),
603 nir_swizzle(b, dPdx, yzx, 3)));
604
605 dQdy = nir_bcsel(b, cond_z,
606 dPdy,
607 nir_bcsel(b, cond_y,
608 nir_swizzle(b, dPdy, xzy, 3),
609 nir_swizzle(b, dPdy, yzx, 3)));
610
611 /* 2. quotient rule */
612
613 /* tmp = Q.xy * recip;
614 * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
615 * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
616 */
617 nir_ssa_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2));
618
619 nir_ssa_def *Q_xy = nir_channels(b, Q, 0x3);
620 nir_ssa_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z);
621
622 nir_ssa_def *dQdx_xy = nir_channels(b, dQdx, 0x3);
623 nir_ssa_def *dQdx_z = nir_channel(b, dQdx, 2);
624 nir_ssa_def *dx =
625 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z)));
626
627 nir_ssa_def *dQdy_xy = nir_channels(b, dQdy, 0x3);
628 nir_ssa_def *dQdy_z = nir_channel(b, dQdy, 2);
629 nir_ssa_def *dy =
630 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z)));
631
632 /* M = max(dot(dx, dx), dot(dy, dy)); */
633 nir_ssa_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy));
634
635 /* size has textureSize() of LOD 0 */
636 nir_ssa_def *L = nir_channel(b, size, 0);
637
638 /* lod = -1.0 + 0.5 * log2(L * L * M); */
639 nir_ssa_def *lod =
640 nir_fadd(b,
641 nir_imm_float(b, -1.0f),
642 nir_fmul(b,
643 nir_imm_float(b, 0.5f),
644 nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M)))));
645
646 /* 3. Replace the gradient instruction with an equivalent lod instruction */
647 replace_gradient_with_lod(b, lod, tex);
648 }
649
650 static void
651 lower_gradient(nir_builder *b, nir_tex_instr *tex)
652 {
653 /* Cubes are more complicated and have their own function */
654 if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
655 lower_gradient_cube_map(b, tex);
656 return;
657 }
658
659 assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE);
660 assert(tex->op == nir_texop_txd);
661 assert(tex->dest.is_ssa);
662
663 /* Use textureSize() to get the width and height of LOD 0 */
664 unsigned component_mask;
665 switch (tex->sampler_dim) {
666 case GLSL_SAMPLER_DIM_3D:
667 component_mask = 7;
668 break;
669 case GLSL_SAMPLER_DIM_1D:
670 component_mask = 1;
671 break;
672 default:
673 component_mask = 3;
674 break;
675 }
676
677 nir_ssa_def *size =
678 nir_channels(b, get_texture_size(b, tex), component_mask);
679
680 /* Scale the gradients by width and height. Effectively, the incoming
681 * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
682 * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
683 */
684 nir_ssa_def *ddx =
685 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
686 nir_ssa_def *ddy =
687 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
688
689 nir_ssa_def *dPdx = nir_fmul(b, ddx, size);
690 nir_ssa_def *dPdy = nir_fmul(b, ddy, size);
691
692 nir_ssa_def *rho;
693 if (dPdx->num_components == 1) {
694 rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy));
695 } else {
696 rho = nir_fmax(b,
697 nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)),
698 nir_fsqrt(b, nir_fdot(b, dPdy, dPdy)));
699 }
700
701 /* lod = log2(rho). We're ignoring GL state biases for now. */
702 nir_ssa_def *lod = nir_flog2(b, rho);
703
704 /* Replace the gradient instruction with an equivalent lod instruction */
705 replace_gradient_with_lod(b, lod, tex);
706 }
707
708 static void
709 saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
710 {
711 b->cursor = nir_before_instr(&tex->instr);
712
713 /* Walk through the sources saturating the requested arguments. */
714 for (unsigned i = 0; i < tex->num_srcs; i++) {
715 if (tex->src[i].src_type != nir_tex_src_coord)
716 continue;
717
718 nir_ssa_def *src =
719 nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
720
721 /* split src into components: */
722 nir_ssa_def *comp[4];
723
724 assume(tex->coord_components >= 1);
725
726 for (unsigned j = 0; j < tex->coord_components; j++)
727 comp[j] = nir_channel(b, src, j);
728
729 /* clamp requested components, array index does not get clamped: */
730 unsigned ncomp = tex->coord_components;
731 if (tex->is_array)
732 ncomp--;
733
734 for (unsigned j = 0; j < ncomp; j++) {
735 if ((1 << j) & sat_mask) {
736 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
737 /* non-normalized texture coords, so clamp to texture
738 * size rather than [0.0, 1.0]
739 */
740 nir_ssa_def *txs = get_texture_size(b, tex);
741 comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0));
742 comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j));
743 } else {
744 comp[j] = nir_fsat(b, comp[j]);
745 }
746 }
747 }
748
749 /* and move the result back into a single vecN: */
750 src = nir_vec(b, comp, tex->coord_components);
751
752 nir_instr_rewrite_src(&tex->instr,
753 &tex->src[i].src,
754 nir_src_for_ssa(src));
755 }
756 }
757
758 static nir_ssa_def *
759 get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
760 {
761 nir_const_value v[4];
762
763 memset(&v, 0, sizeof(v));
764
765 if (swizzle_val == 4) {
766 v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 0;
767 } else {
768 assert(swizzle_val == 5);
769 if (type == nir_type_float)
770 v[0].f32 = v[1].f32 = v[2].f32 = v[3].f32 = 1.0;
771 else
772 v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 1;
773 }
774
775 return nir_build_imm(b, 4, 32, v);
776 }
777
778 static void
779 swizzle_tg4_broadcom(nir_builder *b, nir_tex_instr *tex)
780 {
781 assert(tex->dest.is_ssa);
782
783 b->cursor = nir_after_instr(&tex->instr);
784
785 assert(nir_tex_instr_dest_size(tex) == 4);
786 unsigned swiz[4] = { 2, 3, 1, 0 };
787 nir_ssa_def *swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4);
788
789 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled),
790 swizzled->parent_instr);
791 }
792
793 static void
794 swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
795 {
796 assert(tex->dest.is_ssa);
797
798 b->cursor = nir_after_instr(&tex->instr);
799
800 nir_ssa_def *swizzled;
801 if (tex->op == nir_texop_tg4) {
802 if (swizzle[tex->component] < 4) {
803 /* This one's easy */
804 tex->component = swizzle[tex->component];
805 return;
806 } else {
807 swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]);
808 }
809 } else {
810 assert(nir_tex_instr_dest_size(tex) == 4);
811 if (swizzle[0] < 4 && swizzle[1] < 4 &&
812 swizzle[2] < 4 && swizzle[3] < 4) {
813 unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] };
814 /* We have no 0s or 1s, just emit a swizzling MOV */
815 swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4);
816 } else {
817 nir_ssa_def *srcs[4];
818 for (unsigned i = 0; i < 4; i++) {
819 if (swizzle[i] < 4) {
820 srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]);
821 } else {
822 srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]);
823 }
824 }
825 swizzled = nir_vec(b, srcs, 4);
826 }
827 }
828
829 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled),
830 swizzled->parent_instr);
831 }
832
833 static void
834 linearize_srgb_result(nir_builder *b, nir_tex_instr *tex)
835 {
836 assert(tex->dest.is_ssa);
837 assert(nir_tex_instr_dest_size(tex) == 4);
838 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
839
840 b->cursor = nir_after_instr(&tex->instr);
841
842 nir_ssa_def *rgb =
843 nir_format_srgb_to_linear(b, nir_channels(b, &tex->dest.ssa, 0x7));
844
845 /* alpha is untouched: */
846 nir_ssa_def *result = nir_vec4(b,
847 nir_channel(b, rgb, 0),
848 nir_channel(b, rgb, 1),
849 nir_channel(b, rgb, 2),
850 nir_channel(b, &tex->dest.ssa, 3));
851
852 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(result),
853 result->parent_instr);
854 }
855
856 /**
857 * Lowers texture instructions from giving a vec4 result to a vec2 of f16,
858 * i16, or u16, or a single unorm4x8 value.
859 *
860 * Note that we don't change the destination num_components, because
861 * nir_tex_instr_dest_size() will still return 4. The driver is just expected
862 * to not store the other channels, given that nothing at the NIR level will
863 * read them.
864 */
865 static void
866 lower_tex_packing(nir_builder *b, nir_tex_instr *tex,
867 const nir_lower_tex_options *options)
868 {
869 nir_ssa_def *color = &tex->dest.ssa;
870
871 b->cursor = nir_after_instr(&tex->instr);
872
873 switch (options->lower_tex_packing[tex->sampler_index]) {
874 case nir_lower_tex_packing_none:
875 return;
876
877 case nir_lower_tex_packing_16: {
878 static const unsigned bits[4] = {16, 16, 16, 16};
879
880 switch (nir_alu_type_get_base_type(tex->dest_type)) {
881 case nir_type_float:
882 if (tex->is_shadow && tex->is_new_style_shadow) {
883 color = nir_unpack_half_2x16_split_x(b, nir_channel(b, color, 0));
884 } else {
885 nir_ssa_def *rg = nir_channel(b, color, 0);
886 nir_ssa_def *ba = nir_channel(b, color, 1);
887 color = nir_vec4(b,
888 nir_unpack_half_2x16_split_x(b, rg),
889 nir_unpack_half_2x16_split_y(b, rg),
890 nir_unpack_half_2x16_split_x(b, ba),
891 nir_unpack_half_2x16_split_y(b, ba));
892 }
893 break;
894
895 case nir_type_int:
896 color = nir_format_unpack_sint(b, color, bits, 4);
897 break;
898
899 case nir_type_uint:
900 color = nir_format_unpack_uint(b, color, bits, 4);
901 break;
902
903 default:
904 unreachable("unknown base type");
905 }
906 break;
907 }
908
909 case nir_lower_tex_packing_8:
910 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
911 color = nir_unpack_unorm_4x8(b, nir_channel(b, color, 0));
912 break;
913 }
914
915 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(color),
916 color->parent_instr);
917 }
918
919 static bool
920 sampler_index_lt(nir_tex_instr *tex, unsigned max)
921 {
922 assert(nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref) == -1);
923
924 unsigned sampler_index = tex->sampler_index;
925
926 int sampler_offset_idx =
927 nir_tex_instr_src_index(tex, nir_tex_src_sampler_offset);
928 if (sampler_offset_idx >= 0) {
929 if (!nir_src_is_const(tex->src[sampler_offset_idx].src))
930 return false;
931
932 sampler_index += nir_src_as_uint(tex->src[sampler_offset_idx].src);
933 }
934
935 return sampler_index < max;
936 }
937
938 static bool
939 lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex)
940 {
941 assert(tex->op == nir_texop_tg4);
942 assert(nir_tex_instr_has_explicit_tg4_offsets(tex));
943 assert(nir_tex_instr_src_index(tex, nir_tex_src_offset) == -1);
944
945 b->cursor = nir_after_instr(&tex->instr);
946
947 nir_ssa_def *dest[4];
948 for (unsigned i = 0; i < 4; ++i) {
949 nir_tex_instr *tex_copy = nir_tex_instr_create(b->shader, tex->num_srcs + 1);
950 tex_copy->op = tex->op;
951 tex_copy->coord_components = tex->coord_components;
952 tex_copy->sampler_dim = tex->sampler_dim;
953 tex_copy->is_array = tex->is_array;
954 tex_copy->is_shadow = tex->is_shadow;
955 tex_copy->is_new_style_shadow = tex->is_new_style_shadow;
956 tex_copy->component = tex->component;
957 tex_copy->dest_type = tex->dest_type;
958
959 for (unsigned j = 0; j < tex->num_srcs; ++j) {
960 nir_src_copy(&tex_copy->src[j].src, &tex->src[j].src, tex_copy);
961 tex_copy->src[j].src_type = tex->src[j].src_type;
962 }
963
964 nir_tex_src src;
965 src.src = nir_src_for_ssa(nir_imm_ivec2(b, tex->tg4_offsets[i][0],
966 tex->tg4_offsets[i][1]));
967 src.src_type = nir_tex_src_offset;
968 tex_copy->src[tex_copy->num_srcs - 1] = src;
969
970 nir_ssa_dest_init(&tex_copy->instr, &tex_copy->dest,
971 nir_tex_instr_dest_size(tex), 32, NULL);
972
973 nir_builder_instr_insert(b, &tex_copy->instr);
974
975 dest[i] = nir_channel(b, &tex_copy->dest.ssa, 3);
976 }
977
978 nir_ssa_def *res = nir_vec4(b, dest[0], dest[1], dest[2], dest[3]);
979 nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(res));
980 nir_instr_remove(&tex->instr);
981
982 return true;
983 }
984
985 static bool
986 nir_lower_tex_block(nir_block *block, nir_builder *b,
987 const nir_lower_tex_options *options)
988 {
989 bool progress = false;
990
991 nir_foreach_instr_safe(instr, block) {
992 if (instr->type != nir_instr_type_tex)
993 continue;
994
995 nir_tex_instr *tex = nir_instr_as_tex(instr);
996 bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim));
997
998 /* mask of src coords to saturate (clamp): */
999 unsigned sat_mask = 0;
1000
1001 if ((1 << tex->sampler_index) & options->saturate_r)
1002 sat_mask |= (1 << 2); /* .z */
1003 if ((1 << tex->sampler_index) & options->saturate_t)
1004 sat_mask |= (1 << 1); /* .y */
1005 if ((1 << tex->sampler_index) & options->saturate_s)
1006 sat_mask |= (1 << 0); /* .x */
1007
1008 /* If we are clamping any coords, we must lower projector first
1009 * as clamping happens *after* projection:
1010 */
1011 if (lower_txp || sat_mask) {
1012 progress |= project_src(b, tex);
1013 }
1014
1015 if ((tex->op == nir_texop_txf && options->lower_txf_offset) ||
1016 (sat_mask && nir_tex_instr_src_index(tex, nir_tex_src_coord) >= 0) ||
1017 (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT &&
1018 options->lower_rect_offset)) {
1019 progress = lower_offset(b, tex) || progress;
1020 }
1021
1022 if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect) {
1023 lower_rect(b, tex);
1024 progress = true;
1025 }
1026
1027 if ((1 << tex->texture_index) & options->lower_y_uv_external) {
1028 lower_y_uv_external(b, tex, options);
1029 progress = true;
1030 }
1031
1032 if ((1 << tex->texture_index) & options->lower_y_u_v_external) {
1033 lower_y_u_v_external(b, tex, options);
1034 progress = true;
1035 }
1036
1037 if ((1 << tex->texture_index) & options->lower_yx_xuxv_external) {
1038 lower_yx_xuxv_external(b, tex, options);
1039 progress = true;
1040 }
1041
1042 if ((1 << tex->texture_index) & options->lower_xy_uxvx_external) {
1043 lower_xy_uxvx_external(b, tex, options);
1044 progress = true;
1045 }
1046
1047 if ((1 << tex->texture_index) & options->lower_ayuv_external) {
1048 lower_ayuv_external(b, tex, options);
1049 progress = true;
1050 }
1051
1052 if ((1 << tex->texture_index) & options->lower_xyuv_external) {
1053 lower_xyuv_external(b, tex, options);
1054 progress = true;
1055 }
1056
1057 if (sat_mask) {
1058 saturate_src(b, tex, sat_mask);
1059 progress = true;
1060 }
1061
1062 if (tex->op == nir_texop_tg4 && options->lower_tg4_broadcom_swizzle) {
1063 swizzle_tg4_broadcom(b, tex);
1064 progress = true;
1065 }
1066
1067 if (((1 << tex->texture_index) & options->swizzle_result) &&
1068 !nir_tex_instr_is_query(tex) &&
1069 !(tex->is_shadow && tex->is_new_style_shadow)) {
1070 swizzle_result(b, tex, options->swizzles[tex->texture_index]);
1071 progress = true;
1072 }
1073
1074 /* should be after swizzle so we know which channels are rgb: */
1075 if (((1 << tex->texture_index) & options->lower_srgb) &&
1076 !nir_tex_instr_is_query(tex) && !tex->is_shadow) {
1077 linearize_srgb_result(b, tex);
1078 progress = true;
1079 }
1080
1081 const bool has_min_lod =
1082 nir_tex_instr_src_index(tex, nir_tex_src_min_lod) >= 0;
1083 const bool has_offset =
1084 nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0;
1085
1086 if (tex->op == nir_texop_txb && tex->is_shadow && has_min_lod &&
1087 options->lower_txb_shadow_clamp) {
1088 lower_implicit_lod(b, tex);
1089 progress = true;
1090 }
1091
1092 if (options->lower_tex_packing[tex->sampler_index] !=
1093 nir_lower_tex_packing_none &&
1094 tex->op != nir_texop_txs &&
1095 tex->op != nir_texop_query_levels) {
1096 lower_tex_packing(b, tex, options);
1097 progress = true;
1098 }
1099
1100 if (tex->op == nir_texop_txd &&
1101 (options->lower_txd ||
1102 (options->lower_txd_shadow && tex->is_shadow) ||
1103 (options->lower_txd_shadow_clamp && tex->is_shadow && has_min_lod) ||
1104 (options->lower_txd_offset_clamp && has_offset && has_min_lod) ||
1105 (options->lower_txd_clamp_bindless_sampler && has_min_lod &&
1106 nir_tex_instr_src_index(tex, nir_tex_src_sampler_handle) != -1) ||
1107 (options->lower_txd_clamp_if_sampler_index_not_lt_16 &&
1108 has_min_lod && !sampler_index_lt(tex, 16)) ||
1109 (options->lower_txd_cube_map &&
1110 tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) ||
1111 (options->lower_txd_3d &&
1112 tex->sampler_dim == GLSL_SAMPLER_DIM_3D))) {
1113 lower_gradient(b, tex);
1114 progress = true;
1115 continue;
1116 }
1117
1118 bool shader_supports_implicit_lod =
1119 b->shader->info.stage == MESA_SHADER_FRAGMENT ||
1120 (b->shader->info.stage == MESA_SHADER_COMPUTE &&
1121 b->shader->info.cs.derivative_group != DERIVATIVE_GROUP_NONE);
1122
1123 /* TXF, TXS and TXL require a LOD but not everything we implement using those
1124 * three opcodes provides one. Provide a default LOD of 0.
1125 */
1126 if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) &&
1127 (tex->op == nir_texop_txf || tex->op == nir_texop_txs ||
1128 tex->op == nir_texop_txl || tex->op == nir_texop_query_levels ||
1129 (tex->op == nir_texop_tex && !shader_supports_implicit_lod))) {
1130 b->cursor = nir_before_instr(&tex->instr);
1131 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(nir_imm_int(b, 0)));
1132 if (tex->op == nir_texop_tex && options->lower_tex_without_implicit_lod)
1133 tex->op = nir_texop_txl;
1134 progress = true;
1135 continue;
1136 }
1137
1138 /* has to happen after all the other lowerings as the original tg4 gets
1139 * replaced by 4 tg4 instructions.
1140 */
1141 if (tex->op == nir_texop_tg4 &&
1142 nir_tex_instr_has_explicit_tg4_offsets(tex) &&
1143 options->lower_tg4_offsets) {
1144 progress |= lower_tg4_offsets(b, tex);
1145 continue;
1146 }
1147 }
1148
1149 return progress;
1150 }
1151
1152 static bool
1153 nir_lower_tex_impl(nir_function_impl *impl,
1154 const nir_lower_tex_options *options)
1155 {
1156 bool progress = false;
1157 nir_builder builder;
1158 nir_builder_init(&builder, impl);
1159
1160 nir_foreach_block(block, impl) {
1161 progress |= nir_lower_tex_block(block, &builder, options);
1162 }
1163
1164 nir_metadata_preserve(impl, nir_metadata_block_index |
1165 nir_metadata_dominance);
1166 return progress;
1167 }
1168
1169 bool
1170 nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
1171 {
1172 bool progress = false;
1173
1174 nir_foreach_function(function, shader) {
1175 if (function->impl)
1176 progress |= nir_lower_tex_impl(function->impl, options);
1177 }
1178
1179 return progress;
1180 }