2 * Copyright © 2015 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * This lowering pass supports (as configured via nir_lower_tex_options)
26 * various texture related conversions:
27 * + texture projector lowering: converts the coordinate division for
28 * texture projection to be done in ALU instructions instead of
29 * asking the texture operation to do so.
30 * + lowering RECT: converts the un-normalized RECT texture coordinates
31 * to normalized coordinates with txs plus ALU instructions
32 * + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
33 * inserts instructions to clamp specified coordinates to [0.0, 1.0].
34 * Note that this automatically triggers texture projector lowering if
35 * needed, since clamping must happen after projector lowering.
39 #include "nir_builder.h"
40 #include "nir_format_convert.h"
43 project_src(nir_builder
*b
, nir_tex_instr
*tex
)
45 /* Find the projector in the srcs list, if present. */
46 int proj_index
= nir_tex_instr_src_index(tex
, nir_tex_src_projector
);
50 b
->cursor
= nir_before_instr(&tex
->instr
);
52 nir_ssa_def
*inv_proj
=
53 nir_frcp(b
, nir_ssa_for_src(b
, tex
->src
[proj_index
].src
, 1));
55 /* Walk through the sources projecting the arguments. */
56 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
57 switch (tex
->src
[i
].src_type
) {
58 case nir_tex_src_coord
:
59 case nir_tex_src_comparator
:
64 nir_ssa_def
*unprojected
=
65 nir_ssa_for_src(b
, tex
->src
[i
].src
, nir_tex_instr_src_size(tex
, i
));
66 nir_ssa_def
*projected
= nir_fmul(b
, unprojected
, inv_proj
);
68 /* Array indices don't get projected, so make an new vector with the
69 * coordinate's array index untouched.
71 if (tex
->is_array
&& tex
->src
[i
].src_type
== nir_tex_src_coord
) {
72 switch (tex
->coord_components
) {
74 projected
= nir_vec4(b
,
75 nir_channel(b
, projected
, 0),
76 nir_channel(b
, projected
, 1),
77 nir_channel(b
, projected
, 2),
78 nir_channel(b
, unprojected
, 3));
81 projected
= nir_vec3(b
,
82 nir_channel(b
, projected
, 0),
83 nir_channel(b
, projected
, 1),
84 nir_channel(b
, unprojected
, 2));
87 projected
= nir_vec2(b
,
88 nir_channel(b
, projected
, 0),
89 nir_channel(b
, unprojected
, 1));
92 unreachable("bad texture coord count for array");
97 nir_instr_rewrite_src(&tex
->instr
,
99 nir_src_for_ssa(projected
));
102 nir_tex_instr_remove_src(tex
, proj_index
);
106 get_texture_size(nir_builder
*b
, nir_tex_instr
*tex
)
108 b
->cursor
= nir_before_instr(&tex
->instr
);
112 unsigned num_srcs
= 1; /* One for the LOD */
113 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
114 if (tex
->src
[i
].src_type
== nir_tex_src_texture_deref
||
115 tex
->src
[i
].src_type
== nir_tex_src_sampler_deref
||
116 tex
->src
[i
].src_type
== nir_tex_src_texture_offset
||
117 tex
->src
[i
].src_type
== nir_tex_src_sampler_offset
)
121 txs
= nir_tex_instr_create(b
->shader
, num_srcs
);
122 txs
->op
= nir_texop_txs
;
123 txs
->sampler_dim
= tex
->sampler_dim
;
124 txs
->is_array
= tex
->is_array
;
125 txs
->is_shadow
= tex
->is_shadow
;
126 txs
->is_new_style_shadow
= tex
->is_new_style_shadow
;
127 txs
->texture_index
= tex
->texture_index
;
128 txs
->sampler_index
= tex
->sampler_index
;
129 txs
->dest_type
= nir_type_int
;
132 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
133 if (tex
->src
[i
].src_type
== nir_tex_src_texture_deref
||
134 tex
->src
[i
].src_type
== nir_tex_src_sampler_deref
||
135 tex
->src
[i
].src_type
== nir_tex_src_texture_offset
||
136 tex
->src
[i
].src_type
== nir_tex_src_sampler_offset
) {
137 nir_src_copy(&txs
->src
[idx
].src
, &tex
->src
[i
].src
, txs
);
138 txs
->src
[idx
].src_type
= tex
->src
[i
].src_type
;
142 /* Add in an LOD because some back-ends require it */
143 txs
->src
[idx
].src
= nir_src_for_ssa(nir_imm_int(b
, 0));
144 txs
->src
[idx
].src_type
= nir_tex_src_lod
;
146 nir_ssa_dest_init(&txs
->instr
, &txs
->dest
,
147 nir_tex_instr_dest_size(txs
), 32, NULL
);
148 nir_builder_instr_insert(b
, &txs
->instr
);
150 return nir_i2f32(b
, &txs
->dest
.ssa
);
154 get_texture_lod(nir_builder
*b
, nir_tex_instr
*tex
)
156 b
->cursor
= nir_before_instr(&tex
->instr
);
160 unsigned num_srcs
= 0;
161 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
162 if (tex
->src
[i
].src_type
== nir_tex_src_coord
||
163 tex
->src
[i
].src_type
== nir_tex_src_texture_deref
||
164 tex
->src
[i
].src_type
== nir_tex_src_sampler_deref
||
165 tex
->src
[i
].src_type
== nir_tex_src_texture_offset
||
166 tex
->src
[i
].src_type
== nir_tex_src_sampler_offset
)
170 tql
= nir_tex_instr_create(b
->shader
, num_srcs
);
171 tql
->op
= nir_texop_lod
;
172 tql
->coord_components
= tex
->coord_components
;
173 tql
->sampler_dim
= tex
->sampler_dim
;
174 tql
->is_array
= tex
->is_array
;
175 tql
->is_shadow
= tex
->is_shadow
;
176 tql
->is_new_style_shadow
= tex
->is_new_style_shadow
;
177 tql
->texture_index
= tex
->texture_index
;
178 tql
->sampler_index
= tex
->sampler_index
;
179 tql
->dest_type
= nir_type_float
;
182 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
183 if (tex
->src
[i
].src_type
== nir_tex_src_coord
||
184 tex
->src
[i
].src_type
== nir_tex_src_texture_deref
||
185 tex
->src
[i
].src_type
== nir_tex_src_sampler_deref
||
186 tex
->src
[i
].src_type
== nir_tex_src_texture_offset
||
187 tex
->src
[i
].src_type
== nir_tex_src_sampler_offset
) {
188 nir_src_copy(&tql
->src
[idx
].src
, &tex
->src
[i
].src
, tql
);
189 tql
->src
[idx
].src_type
= tex
->src
[i
].src_type
;
194 nir_ssa_dest_init(&tql
->instr
, &tql
->dest
, 2, 32, NULL
);
195 nir_builder_instr_insert(b
, &tql
->instr
);
197 /* The LOD is the y component of the result */
198 return nir_channel(b
, &tql
->dest
.ssa
, 1);
202 lower_offset(nir_builder
*b
, nir_tex_instr
*tex
)
204 int offset_index
= nir_tex_instr_src_index(tex
, nir_tex_src_offset
);
205 if (offset_index
< 0)
208 int coord_index
= nir_tex_instr_src_index(tex
, nir_tex_src_coord
);
209 assert(coord_index
>= 0);
211 assert(tex
->src
[offset_index
].src
.is_ssa
);
212 assert(tex
->src
[coord_index
].src
.is_ssa
);
213 nir_ssa_def
*offset
= tex
->src
[offset_index
].src
.ssa
;
214 nir_ssa_def
*coord
= tex
->src
[coord_index
].src
.ssa
;
216 b
->cursor
= nir_before_instr(&tex
->instr
);
218 nir_ssa_def
*offset_coord
;
219 if (nir_tex_instr_src_type(tex
, coord_index
) == nir_type_float
) {
220 if (tex
->sampler_dim
== GLSL_SAMPLER_DIM_RECT
) {
221 offset_coord
= nir_fadd(b
, coord
, nir_i2f32(b
, offset
));
223 nir_ssa_def
*txs
= get_texture_size(b
, tex
);
224 nir_ssa_def
*scale
= nir_frcp(b
, txs
);
226 offset_coord
= nir_fadd(b
, coord
,
228 nir_i2f32(b
, offset
),
232 offset_coord
= nir_iadd(b
, coord
, offset
);
236 /* The offset is not applied to the array index */
237 if (tex
->coord_components
== 2) {
238 offset_coord
= nir_vec2(b
, nir_channel(b
, offset_coord
, 0),
239 nir_channel(b
, coord
, 1));
240 } else if (tex
->coord_components
== 3) {
241 offset_coord
= nir_vec3(b
, nir_channel(b
, offset_coord
, 0),
242 nir_channel(b
, offset_coord
, 1),
243 nir_channel(b
, coord
, 2));
245 unreachable("Invalid number of components");
249 nir_instr_rewrite_src(&tex
->instr
, &tex
->src
[coord_index
].src
,
250 nir_src_for_ssa(offset_coord
));
252 nir_tex_instr_remove_src(tex
, offset_index
);
258 lower_rect(nir_builder
*b
, nir_tex_instr
*tex
)
260 nir_ssa_def
*txs
= get_texture_size(b
, tex
);
261 nir_ssa_def
*scale
= nir_frcp(b
, txs
);
263 /* Walk through the sources normalizing the requested arguments. */
264 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
265 if (tex
->src
[i
].src_type
!= nir_tex_src_coord
)
268 nir_ssa_def
*coords
=
269 nir_ssa_for_src(b
, tex
->src
[i
].src
, tex
->coord_components
);
270 nir_instr_rewrite_src(&tex
->instr
,
272 nir_src_for_ssa(nir_fmul(b
, coords
, scale
)));
275 tex
->sampler_dim
= GLSL_SAMPLER_DIM_2D
;
279 lower_implicit_lod(nir_builder
*b
, nir_tex_instr
*tex
)
281 assert(tex
->op
== nir_texop_tex
|| tex
->op
== nir_texop_txb
);
282 assert(nir_tex_instr_src_index(tex
, nir_tex_src_lod
) < 0);
283 assert(nir_tex_instr_src_index(tex
, nir_tex_src_ddx
) < 0);
284 assert(nir_tex_instr_src_index(tex
, nir_tex_src_ddy
) < 0);
286 b
->cursor
= nir_before_instr(&tex
->instr
);
288 nir_ssa_def
*lod
= get_texture_lod(b
, tex
);
290 int bias_idx
= nir_tex_instr_src_index(tex
, nir_tex_src_bias
);
292 /* If we have a bias, add it in */
293 lod
= nir_fadd(b
, lod
, nir_ssa_for_src(b
, tex
->src
[bias_idx
].src
, 1));
294 nir_tex_instr_remove_src(tex
, bias_idx
);
297 int min_lod_idx
= nir_tex_instr_src_index(tex
, nir_tex_src_min_lod
);
298 if (min_lod_idx
>= 0) {
299 /* If we have a minimum LOD, clamp LOD accordingly */
300 lod
= nir_fmax(b
, lod
, nir_ssa_for_src(b
, tex
->src
[min_lod_idx
].src
, 1));
301 nir_tex_instr_remove_src(tex
, min_lod_idx
);
304 nir_tex_instr_add_src(tex
, nir_tex_src_lod
, nir_src_for_ssa(lod
));
305 tex
->op
= nir_texop_txl
;
309 sample_plane(nir_builder
*b
, nir_tex_instr
*tex
, int plane
,
310 const nir_lower_tex_options
*options
)
312 assert(tex
->dest
.is_ssa
);
313 assert(nir_tex_instr_dest_size(tex
) == 4);
314 assert(nir_alu_type_get_base_type(tex
->dest_type
) == nir_type_float
);
315 assert(tex
->op
== nir_texop_tex
);
316 assert(tex
->coord_components
== 2);
318 nir_tex_instr
*plane_tex
=
319 nir_tex_instr_create(b
->shader
, tex
->num_srcs
+ 1);
320 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
321 nir_src_copy(&plane_tex
->src
[i
].src
, &tex
->src
[i
].src
, plane_tex
);
322 plane_tex
->src
[i
].src_type
= tex
->src
[i
].src_type
;
324 plane_tex
->src
[tex
->num_srcs
].src
= nir_src_for_ssa(nir_imm_int(b
, plane
));
325 plane_tex
->src
[tex
->num_srcs
].src_type
= nir_tex_src_plane
;
326 plane_tex
->op
= nir_texop_tex
;
327 plane_tex
->sampler_dim
= GLSL_SAMPLER_DIM_2D
;
328 plane_tex
->dest_type
= nir_type_float
;
329 plane_tex
->coord_components
= 2;
331 plane_tex
->texture_index
= tex
->texture_index
;
332 plane_tex
->sampler_index
= tex
->sampler_index
;
334 nir_ssa_dest_init(&plane_tex
->instr
, &plane_tex
->dest
, 4, 32, NULL
);
336 nir_builder_instr_insert(b
, &plane_tex
->instr
);
338 /* If scaling_factor is set, return a scaled value. */
339 if (options
->scale_factors
[tex
->texture_index
])
340 return nir_fmul_imm(b
, &plane_tex
->dest
.ssa
,
341 options
->scale_factors
[tex
->texture_index
]);
343 return &plane_tex
->dest
.ssa
;
347 convert_yuv_to_rgb(nir_builder
*b
, nir_tex_instr
*tex
,
348 nir_ssa_def
*y
, nir_ssa_def
*u
, nir_ssa_def
*v
,
351 nir_const_value m
[3] = {
352 { .f32
= { 1.0f
, 0.0f
, 1.59602678f
, 0.0f
} },
353 { .f32
= { 1.0f
, -0.39176229f
, -0.81296764f
, 0.0f
} },
354 { .f32
= { 1.0f
, 2.01723214f
, 0.0f
, 0.0f
} }
359 nir_fmul(b
, nir_imm_float(b
, 1.16438356f
),
360 nir_fadd(b
, y
, nir_imm_float(b
, -16.0f
/ 255.0f
))),
361 nir_channel(b
, nir_fadd(b
, u
, nir_imm_float(b
, -128.0f
/ 255.0f
)), 0),
362 nir_channel(b
, nir_fadd(b
, v
, nir_imm_float(b
, -128.0f
/ 255.0f
)), 0),
363 nir_imm_float(b
, 0.0));
365 nir_ssa_def
*red
= nir_fdot4(b
, yuv
, nir_build_imm(b
, 4, 32, m
[0]));
366 nir_ssa_def
*green
= nir_fdot4(b
, yuv
, nir_build_imm(b
, 4, 32, m
[1]));
367 nir_ssa_def
*blue
= nir_fdot4(b
, yuv
, nir_build_imm(b
, 4, 32, m
[2]));
369 nir_ssa_def
*result
= nir_vec4(b
, red
, green
, blue
, a
);
371 nir_ssa_def_rewrite_uses(&tex
->dest
.ssa
, nir_src_for_ssa(result
));
375 lower_y_uv_external(nir_builder
*b
, nir_tex_instr
*tex
,
376 const nir_lower_tex_options
*options
)
378 b
->cursor
= nir_after_instr(&tex
->instr
);
380 nir_ssa_def
*y
= sample_plane(b
, tex
, 0, options
);
381 nir_ssa_def
*uv
= sample_plane(b
, tex
, 1, options
);
383 convert_yuv_to_rgb(b
, tex
,
384 nir_channel(b
, y
, 0),
385 nir_channel(b
, uv
, 0),
386 nir_channel(b
, uv
, 1),
387 nir_imm_float(b
, 1.0f
));
391 lower_y_u_v_external(nir_builder
*b
, nir_tex_instr
*tex
,
392 const nir_lower_tex_options
*options
)
394 b
->cursor
= nir_after_instr(&tex
->instr
);
396 nir_ssa_def
*y
= sample_plane(b
, tex
, 0, options
);
397 nir_ssa_def
*u
= sample_plane(b
, tex
, 1, options
);
398 nir_ssa_def
*v
= sample_plane(b
, tex
, 2, options
);
400 convert_yuv_to_rgb(b
, tex
,
401 nir_channel(b
, y
, 0),
402 nir_channel(b
, u
, 0),
403 nir_channel(b
, v
, 0),
404 nir_imm_float(b
, 1.0f
));
408 lower_yx_xuxv_external(nir_builder
*b
, nir_tex_instr
*tex
,
409 const nir_lower_tex_options
*options
)
411 b
->cursor
= nir_after_instr(&tex
->instr
);
413 nir_ssa_def
*y
= sample_plane(b
, tex
, 0, options
);
414 nir_ssa_def
*xuxv
= sample_plane(b
, tex
, 1, options
);
416 convert_yuv_to_rgb(b
, tex
,
417 nir_channel(b
, y
, 0),
418 nir_channel(b
, xuxv
, 1),
419 nir_channel(b
, xuxv
, 3),
420 nir_imm_float(b
, 1.0f
));
424 lower_xy_uxvx_external(nir_builder
*b
, nir_tex_instr
*tex
,
425 const nir_lower_tex_options
*options
)
427 b
->cursor
= nir_after_instr(&tex
->instr
);
429 nir_ssa_def
*y
= sample_plane(b
, tex
, 0, options
);
430 nir_ssa_def
*uxvx
= sample_plane(b
, tex
, 1, options
);
432 convert_yuv_to_rgb(b
, tex
,
433 nir_channel(b
, y
, 1),
434 nir_channel(b
, uxvx
, 0),
435 nir_channel(b
, uxvx
, 2),
436 nir_imm_float(b
, 1.0f
));
440 lower_ayuv_external(nir_builder
*b
, nir_tex_instr
*tex
,
441 const nir_lower_tex_options
*options
)
443 b
->cursor
= nir_after_instr(&tex
->instr
);
445 nir_ssa_def
*ayuv
= sample_plane(b
, tex
, 0, options
);
447 convert_yuv_to_rgb(b
, tex
,
448 nir_channel(b
, ayuv
, 2),
449 nir_channel(b
, ayuv
, 1),
450 nir_channel(b
, ayuv
, 0),
451 nir_channel(b
, ayuv
, 3));
455 lower_xyuv_external(nir_builder
*b
, nir_tex_instr
*tex
,
456 const nir_lower_tex_options
*options
)
458 b
->cursor
= nir_after_instr(&tex
->instr
);
460 nir_ssa_def
*xyuv
= sample_plane(b
, tex
, 0, options
);
462 convert_yuv_to_rgb(b
, tex
,
463 nir_channel(b
, xyuv
, 2),
464 nir_channel(b
, xyuv
, 1),
465 nir_channel(b
, xyuv
, 0),
466 nir_imm_float(b
, 1.0f
));
470 * Converts a nir_texop_txd instruction to nir_texop_txl with the given lod
471 * computed from the gradients.
474 replace_gradient_with_lod(nir_builder
*b
, nir_ssa_def
*lod
, nir_tex_instr
*tex
)
476 assert(tex
->op
== nir_texop_txd
);
478 nir_tex_instr_remove_src(tex
, nir_tex_instr_src_index(tex
, nir_tex_src_ddx
));
479 nir_tex_instr_remove_src(tex
, nir_tex_instr_src_index(tex
, nir_tex_src_ddy
));
481 int min_lod_idx
= nir_tex_instr_src_index(tex
, nir_tex_src_min_lod
);
482 if (min_lod_idx
>= 0) {
483 /* If we have a minimum LOD, clamp LOD accordingly */
484 lod
= nir_fmax(b
, lod
, nir_ssa_for_src(b
, tex
->src
[min_lod_idx
].src
, 1));
485 nir_tex_instr_remove_src(tex
, min_lod_idx
);
488 nir_tex_instr_add_src(tex
, nir_tex_src_lod
, nir_src_for_ssa(lod
));
489 tex
->op
= nir_texop_txl
;
493 lower_gradient_cube_map(nir_builder
*b
, nir_tex_instr
*tex
)
495 assert(tex
->sampler_dim
== GLSL_SAMPLER_DIM_CUBE
);
496 assert(tex
->op
== nir_texop_txd
);
497 assert(tex
->dest
.is_ssa
);
499 /* Use textureSize() to get the width and height of LOD 0 */
500 nir_ssa_def
*size
= get_texture_size(b
, tex
);
502 /* Cubemap texture lookups first generate a texture coordinate normalized
503 * to [-1, 1] on the appropiate face. The appropiate face is determined
504 * by which component has largest magnitude and its sign. The texture
505 * coordinate is the quotient of the remaining texture coordinates against
506 * that absolute value of the component of largest magnitude. This
507 * division requires that the computing of the derivative of the texel
508 * coordinate must use the quotient rule. The high level GLSL code is as
513 * vec3 abs_p, Q, dQdx, dQdy;
514 * abs_p = abs(ir->coordinate);
515 * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
516 * Q = ir->coordinate.yzx;
517 * dQdx = ir->lod_info.grad.dPdx.yzx;
518 * dQdy = ir->lod_info.grad.dPdy.yzx;
520 * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
521 * Q = ir->coordinate.xzy;
522 * dQdx = ir->lod_info.grad.dPdx.xzy;
523 * dQdy = ir->lod_info.grad.dPdy.xzy;
525 * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
526 * Q = ir->coordinate;
527 * dQdx = ir->lod_info.grad.dPdx;
528 * dQdy = ir->lod_info.grad.dPdy;
531 * Step 2: use quotient rule to compute derivative. The normalized to
532 * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
533 * only concerned with the magnitudes of the derivatives whose values are
534 * not affected by the sign. We drop the sign from the computation.
540 * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
541 * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
543 * Step 3: compute LOD. At this point we have the derivatives of the
544 * texture coordinates normalized to [-1,1]. We take the LOD to be
545 * result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
546 * = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
547 * = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
548 * = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
549 * = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
550 * where L is the dimension of the cubemap. The code is:
553 * M = max(dot(dx, dx), dot(dy, dy));
554 * L = textureSize(sampler, 0).x;
555 * result = -1.0 + 0.5 * log2(L * L * M);
560 tex
->src
[nir_tex_instr_src_index(tex
, nir_tex_src_coord
)].src
.ssa
;
562 /* unmodified dPdx, dPdy values */
564 tex
->src
[nir_tex_instr_src_index(tex
, nir_tex_src_ddx
)].src
.ssa
;
566 tex
->src
[nir_tex_instr_src_index(tex
, nir_tex_src_ddy
)].src
.ssa
;
568 nir_ssa_def
*abs_p
= nir_fabs(b
, p
);
569 nir_ssa_def
*abs_p_x
= nir_channel(b
, abs_p
, 0);
570 nir_ssa_def
*abs_p_y
= nir_channel(b
, abs_p
, 1);
571 nir_ssa_def
*abs_p_z
= nir_channel(b
, abs_p
, 2);
573 /* 1. compute selector */
574 nir_ssa_def
*Q
, *dQdx
, *dQdy
;
576 nir_ssa_def
*cond_z
= nir_fge(b
, abs_p_z
, nir_fmax(b
, abs_p_x
, abs_p_y
));
577 nir_ssa_def
*cond_y
= nir_fge(b
, abs_p_y
, nir_fmax(b
, abs_p_x
, abs_p_z
));
579 unsigned yzx
[3] = { 1, 2, 0 };
580 unsigned xzy
[3] = { 0, 2, 1 };
582 Q
= nir_bcsel(b
, cond_z
,
585 nir_swizzle(b
, p
, xzy
, 3, false),
586 nir_swizzle(b
, p
, yzx
, 3, false)));
588 dQdx
= nir_bcsel(b
, cond_z
,
591 nir_swizzle(b
, dPdx
, xzy
, 3, false),
592 nir_swizzle(b
, dPdx
, yzx
, 3, false)));
594 dQdy
= nir_bcsel(b
, cond_z
,
597 nir_swizzle(b
, dPdy
, xzy
, 3, false),
598 nir_swizzle(b
, dPdy
, yzx
, 3, false)));
600 /* 2. quotient rule */
602 /* tmp = Q.xy * recip;
603 * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
604 * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
606 nir_ssa_def
*rcp_Q_z
= nir_frcp(b
, nir_channel(b
, Q
, 2));
608 nir_ssa_def
*Q_xy
= nir_channels(b
, Q
, 0x3);
609 nir_ssa_def
*tmp
= nir_fmul(b
, Q_xy
, rcp_Q_z
);
611 nir_ssa_def
*dQdx_xy
= nir_channels(b
, dQdx
, 0x3);
612 nir_ssa_def
*dQdx_z
= nir_channel(b
, dQdx
, 2);
614 nir_fmul(b
, rcp_Q_z
, nir_fsub(b
, dQdx_xy
, nir_fmul(b
, tmp
, dQdx_z
)));
616 nir_ssa_def
*dQdy_xy
= nir_channels(b
, dQdy
, 0x3);
617 nir_ssa_def
*dQdy_z
= nir_channel(b
, dQdy
, 2);
619 nir_fmul(b
, rcp_Q_z
, nir_fsub(b
, dQdy_xy
, nir_fmul(b
, tmp
, dQdy_z
)));
621 /* M = max(dot(dx, dx), dot(dy, dy)); */
622 nir_ssa_def
*M
= nir_fmax(b
, nir_fdot(b
, dx
, dx
), nir_fdot(b
, dy
, dy
));
624 /* size has textureSize() of LOD 0 */
625 nir_ssa_def
*L
= nir_channel(b
, size
, 0);
627 /* lod = -1.0 + 0.5 * log2(L * L * M); */
630 nir_imm_float(b
, -1.0f
),
632 nir_imm_float(b
, 0.5f
),
633 nir_flog2(b
, nir_fmul(b
, L
, nir_fmul(b
, L
, M
)))));
635 /* 3. Replace the gradient instruction with an equivalent lod instruction */
636 replace_gradient_with_lod(b
, lod
, tex
);
640 lower_gradient(nir_builder
*b
, nir_tex_instr
*tex
)
642 /* Cubes are more complicated and have their own function */
643 if (tex
->sampler_dim
== GLSL_SAMPLER_DIM_CUBE
) {
644 lower_gradient_cube_map(b
, tex
);
648 assert(tex
->sampler_dim
!= GLSL_SAMPLER_DIM_CUBE
);
649 assert(tex
->op
== nir_texop_txd
);
650 assert(tex
->dest
.is_ssa
);
652 /* Use textureSize() to get the width and height of LOD 0 */
653 unsigned component_mask
;
654 switch (tex
->sampler_dim
) {
655 case GLSL_SAMPLER_DIM_3D
:
658 case GLSL_SAMPLER_DIM_1D
:
667 nir_channels(b
, get_texture_size(b
, tex
), component_mask
);
669 /* Scale the gradients by width and height. Effectively, the incoming
670 * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
671 * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
674 tex
->src
[nir_tex_instr_src_index(tex
, nir_tex_src_ddx
)].src
.ssa
;
676 tex
->src
[nir_tex_instr_src_index(tex
, nir_tex_src_ddy
)].src
.ssa
;
678 nir_ssa_def
*dPdx
= nir_fmul(b
, ddx
, size
);
679 nir_ssa_def
*dPdy
= nir_fmul(b
, ddy
, size
);
682 if (dPdx
->num_components
== 1) {
683 rho
= nir_fmax(b
, nir_fabs(b
, dPdx
), nir_fabs(b
, dPdy
));
686 nir_fsqrt(b
, nir_fdot(b
, dPdx
, dPdx
)),
687 nir_fsqrt(b
, nir_fdot(b
, dPdy
, dPdy
)));
690 /* lod = log2(rho). We're ignoring GL state biases for now. */
691 nir_ssa_def
*lod
= nir_flog2(b
, rho
);
693 /* Replace the gradient instruction with an equivalent lod instruction */
694 replace_gradient_with_lod(b
, lod
, tex
);
698 saturate_src(nir_builder
*b
, nir_tex_instr
*tex
, unsigned sat_mask
)
700 b
->cursor
= nir_before_instr(&tex
->instr
);
702 /* Walk through the sources saturating the requested arguments. */
703 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
704 if (tex
->src
[i
].src_type
!= nir_tex_src_coord
)
708 nir_ssa_for_src(b
, tex
->src
[i
].src
, tex
->coord_components
);
710 /* split src into components: */
711 nir_ssa_def
*comp
[4];
713 assume(tex
->coord_components
>= 1);
715 for (unsigned j
= 0; j
< tex
->coord_components
; j
++)
716 comp
[j
] = nir_channel(b
, src
, j
);
718 /* clamp requested components, array index does not get clamped: */
719 unsigned ncomp
= tex
->coord_components
;
723 for (unsigned j
= 0; j
< ncomp
; j
++) {
724 if ((1 << j
) & sat_mask
) {
725 if (tex
->sampler_dim
== GLSL_SAMPLER_DIM_RECT
) {
726 /* non-normalized texture coords, so clamp to texture
727 * size rather than [0.0, 1.0]
729 nir_ssa_def
*txs
= get_texture_size(b
, tex
);
730 comp
[j
] = nir_fmax(b
, comp
[j
], nir_imm_float(b
, 0.0));
731 comp
[j
] = nir_fmin(b
, comp
[j
], nir_channel(b
, txs
, j
));
733 comp
[j
] = nir_fsat(b
, comp
[j
]);
738 /* and move the result back into a single vecN: */
739 src
= nir_vec(b
, comp
, tex
->coord_components
);
741 nir_instr_rewrite_src(&tex
->instr
,
743 nir_src_for_ssa(src
));
748 get_zero_or_one(nir_builder
*b
, nir_alu_type type
, uint8_t swizzle_val
)
752 memset(&v
, 0, sizeof(v
));
754 if (swizzle_val
== 4) {
755 v
.u32
[0] = v
.u32
[1] = v
.u32
[2] = v
.u32
[3] = 0;
757 assert(swizzle_val
== 5);
758 if (type
== nir_type_float
)
759 v
.f32
[0] = v
.f32
[1] = v
.f32
[2] = v
.f32
[3] = 1.0;
761 v
.u32
[0] = v
.u32
[1] = v
.u32
[2] = v
.u32
[3] = 1;
764 return nir_build_imm(b
, 4, 32, v
);
768 swizzle_tg4_broadcom(nir_builder
*b
, nir_tex_instr
*tex
)
770 assert(tex
->dest
.is_ssa
);
772 b
->cursor
= nir_after_instr(&tex
->instr
);
774 assert(nir_tex_instr_dest_size(tex
) == 4);
775 unsigned swiz
[4] = { 2, 3, 1, 0 };
776 nir_ssa_def
*swizzled
= nir_swizzle(b
, &tex
->dest
.ssa
, swiz
, 4, false);
778 nir_ssa_def_rewrite_uses_after(&tex
->dest
.ssa
, nir_src_for_ssa(swizzled
),
779 swizzled
->parent_instr
);
783 swizzle_result(nir_builder
*b
, nir_tex_instr
*tex
, const uint8_t swizzle
[4])
785 assert(tex
->dest
.is_ssa
);
787 b
->cursor
= nir_after_instr(&tex
->instr
);
789 nir_ssa_def
*swizzled
;
790 if (tex
->op
== nir_texop_tg4
) {
791 if (swizzle
[tex
->component
] < 4) {
792 /* This one's easy */
793 tex
->component
= swizzle
[tex
->component
];
796 swizzled
= get_zero_or_one(b
, tex
->dest_type
, swizzle
[tex
->component
]);
799 assert(nir_tex_instr_dest_size(tex
) == 4);
800 if (swizzle
[0] < 4 && swizzle
[1] < 4 &&
801 swizzle
[2] < 4 && swizzle
[3] < 4) {
802 unsigned swiz
[4] = { swizzle
[0], swizzle
[1], swizzle
[2], swizzle
[3] };
803 /* We have no 0s or 1s, just emit a swizzling MOV */
804 swizzled
= nir_swizzle(b
, &tex
->dest
.ssa
, swiz
, 4, false);
806 nir_ssa_def
*srcs
[4];
807 for (unsigned i
= 0; i
< 4; i
++) {
808 if (swizzle
[i
] < 4) {
809 srcs
[i
] = nir_channel(b
, &tex
->dest
.ssa
, swizzle
[i
]);
811 srcs
[i
] = get_zero_or_one(b
, tex
->dest_type
, swizzle
[i
]);
814 swizzled
= nir_vec(b
, srcs
, 4);
818 nir_ssa_def_rewrite_uses_after(&tex
->dest
.ssa
, nir_src_for_ssa(swizzled
),
819 swizzled
->parent_instr
);
823 linearize_srgb_result(nir_builder
*b
, nir_tex_instr
*tex
)
825 assert(tex
->dest
.is_ssa
);
826 assert(nir_tex_instr_dest_size(tex
) == 4);
827 assert(nir_alu_type_get_base_type(tex
->dest_type
) == nir_type_float
);
829 b
->cursor
= nir_after_instr(&tex
->instr
);
832 nir_format_srgb_to_linear(b
, nir_channels(b
, &tex
->dest
.ssa
, 0x7));
834 /* alpha is untouched: */
835 nir_ssa_def
*result
= nir_vec4(b
,
836 nir_channel(b
, rgb
, 0),
837 nir_channel(b
, rgb
, 1),
838 nir_channel(b
, rgb
, 2),
839 nir_channel(b
, &tex
->dest
.ssa
, 3));
841 nir_ssa_def_rewrite_uses_after(&tex
->dest
.ssa
, nir_src_for_ssa(result
),
842 result
->parent_instr
);
846 * Lowers texture instructions from giving a vec4 result to a vec2 of f16,
847 * i16, or u16, or a single unorm4x8 value.
849 * Note that we don't change the destination num_components, because
850 * nir_tex_instr_dest_size() will still return 4. The driver is just expected
851 * to not store the other channels, given that nothing at the NIR level will
855 lower_tex_packing(nir_builder
*b
, nir_tex_instr
*tex
,
856 const nir_lower_tex_options
*options
)
858 nir_ssa_def
*color
= &tex
->dest
.ssa
;
860 b
->cursor
= nir_after_instr(&tex
->instr
);
862 switch (options
->lower_tex_packing
[tex
->sampler_index
]) {
863 case nir_lower_tex_packing_none
:
866 case nir_lower_tex_packing_16
: {
867 static const unsigned bits
[4] = {16, 16, 16, 16};
869 switch (nir_alu_type_get_base_type(tex
->dest_type
)) {
871 if (tex
->is_shadow
&& tex
->is_new_style_shadow
) {
872 color
= nir_unpack_half_2x16_split_x(b
, nir_channel(b
, color
, 0));
874 nir_ssa_def
*rg
= nir_channel(b
, color
, 0);
875 nir_ssa_def
*ba
= nir_channel(b
, color
, 1);
877 nir_unpack_half_2x16_split_x(b
, rg
),
878 nir_unpack_half_2x16_split_y(b
, rg
),
879 nir_unpack_half_2x16_split_x(b
, ba
),
880 nir_unpack_half_2x16_split_y(b
, ba
));
885 color
= nir_format_unpack_sint(b
, color
, bits
, 4);
889 color
= nir_format_unpack_uint(b
, color
, bits
, 4);
893 unreachable("unknown base type");
898 case nir_lower_tex_packing_8
:
899 assert(nir_alu_type_get_base_type(tex
->dest_type
) == nir_type_float
);
900 color
= nir_unpack_unorm_4x8(b
, nir_channel(b
, color
, 0));
904 nir_ssa_def_rewrite_uses_after(&tex
->dest
.ssa
, nir_src_for_ssa(color
),
905 color
->parent_instr
);
909 sampler_index_lt(nir_tex_instr
*tex
, unsigned max
)
911 assert(nir_tex_instr_src_index(tex
, nir_tex_src_sampler_deref
) == -1);
913 unsigned sampler_index
= tex
->sampler_index
;
915 int sampler_offset_idx
=
916 nir_tex_instr_src_index(tex
, nir_tex_src_sampler_offset
);
917 if (sampler_offset_idx
>= 0) {
918 if (!nir_src_is_const(tex
->src
[sampler_offset_idx
].src
))
921 sampler_index
+= nir_src_as_uint(tex
->src
[sampler_offset_idx
].src
);
924 return sampler_index
< max
;
928 nir_lower_tex_block(nir_block
*block
, nir_builder
*b
,
929 const nir_lower_tex_options
*options
)
931 bool progress
= false;
933 nir_foreach_instr_safe(instr
, block
) {
934 if (instr
->type
!= nir_instr_type_tex
)
937 nir_tex_instr
*tex
= nir_instr_as_tex(instr
);
938 bool lower_txp
= !!(options
->lower_txp
& (1 << tex
->sampler_dim
));
940 /* mask of src coords to saturate (clamp): */
941 unsigned sat_mask
= 0;
943 if ((1 << tex
->sampler_index
) & options
->saturate_r
)
944 sat_mask
|= (1 << 2); /* .z */
945 if ((1 << tex
->sampler_index
) & options
->saturate_t
)
946 sat_mask
|= (1 << 1); /* .y */
947 if ((1 << tex
->sampler_index
) & options
->saturate_s
)
948 sat_mask
|= (1 << 0); /* .x */
950 /* If we are clamping any coords, we must lower projector first
951 * as clamping happens *after* projection:
953 if (lower_txp
|| sat_mask
) {
958 if ((tex
->op
== nir_texop_txf
&& options
->lower_txf_offset
) ||
959 (sat_mask
&& nir_tex_instr_src_index(tex
, nir_tex_src_coord
) >= 0) ||
960 (tex
->sampler_dim
== GLSL_SAMPLER_DIM_RECT
&&
961 options
->lower_rect_offset
)) {
962 progress
= lower_offset(b
, tex
) || progress
;
965 if ((tex
->sampler_dim
== GLSL_SAMPLER_DIM_RECT
) && options
->lower_rect
) {
970 if ((1 << tex
->texture_index
) & options
->lower_y_uv_external
) {
971 lower_y_uv_external(b
, tex
, options
);
975 if ((1 << tex
->texture_index
) & options
->lower_y_u_v_external
) {
976 lower_y_u_v_external(b
, tex
, options
);
980 if ((1 << tex
->texture_index
) & options
->lower_yx_xuxv_external
) {
981 lower_yx_xuxv_external(b
, tex
, options
);
985 if ((1 << tex
->texture_index
) & options
->lower_xy_uxvx_external
) {
986 lower_xy_uxvx_external(b
, tex
, options
);
990 if ((1 << tex
->texture_index
) & options
->lower_ayuv_external
) {
991 lower_ayuv_external(b
, tex
, options
);
995 if ((1 << tex
->texture_index
) & options
->lower_xyuv_external
) {
996 lower_xyuv_external(b
, tex
, options
);
1001 saturate_src(b
, tex
, sat_mask
);
1005 if (tex
->op
== nir_texop_tg4
&& options
->lower_tg4_broadcom_swizzle
) {
1006 swizzle_tg4_broadcom(b
, tex
);
1010 if (((1 << tex
->texture_index
) & options
->swizzle_result
) &&
1011 !nir_tex_instr_is_query(tex
) &&
1012 !(tex
->is_shadow
&& tex
->is_new_style_shadow
)) {
1013 swizzle_result(b
, tex
, options
->swizzles
[tex
->texture_index
]);
1017 /* should be after swizzle so we know which channels are rgb: */
1018 if (((1 << tex
->texture_index
) & options
->lower_srgb
) &&
1019 !nir_tex_instr_is_query(tex
) && !tex
->is_shadow
) {
1020 linearize_srgb_result(b
, tex
);
1024 const bool has_min_lod
=
1025 nir_tex_instr_src_index(tex
, nir_tex_src_min_lod
) >= 0;
1026 const bool has_offset
=
1027 nir_tex_instr_src_index(tex
, nir_tex_src_offset
) >= 0;
1029 if (tex
->op
== nir_texop_txb
&& tex
->is_shadow
&& has_min_lod
&&
1030 options
->lower_txb_shadow_clamp
) {
1031 lower_implicit_lod(b
, tex
);
1035 if (options
->lower_tex_packing
[tex
->sampler_index
] !=
1036 nir_lower_tex_packing_none
&&
1037 tex
->op
!= nir_texop_txs
&&
1038 tex
->op
!= nir_texop_query_levels
) {
1039 lower_tex_packing(b
, tex
, options
);
1043 if (tex
->op
== nir_texop_txd
&&
1044 (options
->lower_txd
||
1045 (options
->lower_txd_shadow
&& tex
->is_shadow
) ||
1046 (options
->lower_txd_shadow_clamp
&& tex
->is_shadow
&& has_min_lod
) ||
1047 (options
->lower_txd_offset_clamp
&& has_offset
&& has_min_lod
) ||
1048 (options
->lower_txd_clamp_if_sampler_index_not_lt_16
&&
1049 has_min_lod
&& !sampler_index_lt(tex
, 16)) ||
1050 (options
->lower_txd_cube_map
&&
1051 tex
->sampler_dim
== GLSL_SAMPLER_DIM_CUBE
) ||
1052 (options
->lower_txd_3d
&&
1053 tex
->sampler_dim
== GLSL_SAMPLER_DIM_3D
))) {
1054 lower_gradient(b
, tex
);
1059 /* TXF, TXS and TXL require a LOD but not everything we implement using those
1060 * three opcodes provides one. Provide a default LOD of 0.
1062 if ((nir_tex_instr_src_index(tex
, nir_tex_src_lod
) == -1) &&
1063 (tex
->op
== nir_texop_txf
|| tex
->op
== nir_texop_txs
||
1064 tex
->op
== nir_texop_txl
|| tex
->op
== nir_texop_query_levels
||
1065 (tex
->op
== nir_texop_tex
&&
1066 b
->shader
->info
.stage
!= MESA_SHADER_FRAGMENT
))) {
1067 b
->cursor
= nir_before_instr(&tex
->instr
);
1068 nir_tex_instr_add_src(tex
, nir_tex_src_lod
, nir_src_for_ssa(nir_imm_int(b
, 0)));
1078 nir_lower_tex_impl(nir_function_impl
*impl
,
1079 const nir_lower_tex_options
*options
)
1081 bool progress
= false;
1082 nir_builder builder
;
1083 nir_builder_init(&builder
, impl
);
1085 nir_foreach_block(block
, impl
) {
1086 progress
|= nir_lower_tex_block(block
, &builder
, options
);
1089 nir_metadata_preserve(impl
, nir_metadata_block_index
|
1090 nir_metadata_dominance
);
1095 nir_lower_tex(nir_shader
*shader
, const nir_lower_tex_options
*options
)
1097 bool progress
= false;
1099 nir_foreach_function(function
, shader
) {
1101 progress
|= nir_lower_tex_impl(function
->impl
, options
);