2 * Copyright © 2015 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * This lowering pass supports (as configured via nir_lower_tex_options)
26 * various texture related conversions:
27 * + texture projector lowering: converts the coordinate division for
28 * texture projection to be done in ALU instructions instead of
29 * asking the texture operation to do so.
30 * + lowering RECT: converts the un-normalized RECT texture coordinates
31 * to normalized coordinates with txs plus ALU instructions
32 * + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
33 * inserts instructions to clamp specified coordinates to [0.0, 1.0].
34 * Note that this automatically triggers texture projector lowering if
35 * needed, since clamping must happen after projector lowering.
39 #include "nir_builder.h"
40 #include "nir_builtin_builder.h"
41 #include "nir_format_convert.h"
43 static float bt601_csc_coeffs
[9] = {
44 1.16438356f
, 1.16438356f
, 1.16438356f
,
45 0.0f
, -0.39176229f
, 2.01723214f
,
46 1.59602678f
, -0.81296764f
, 0.0f
,
48 static float bt709_csc_coeffs
[9] = {
49 1.16438356f
, 1.16438356f
, 1.16438356f
,
50 0.0f
, -0.21324861f
, 2.11240179f
,
51 1.79274107f
, -0.53290933f
, 0.0f
,
53 static float bt2020_csc_coeffs
[9] = {
54 1.16438356f
, 1.16438356f
, 1.16438356f
,
55 0.0f
, -0.18732610f
, 2.14177232f
,
56 1.67867411f
, -0.65042432f
, 0.0f
,
59 static float bt601_csc_offsets
[3] = {
60 -0.874202218f
, 0.531667823f
, -1.085630789f
62 static float bt709_csc_offsets
[3] = {
63 -0.972945075f
, 0.301482665f
, -1.133402218f
65 static float bt2020_csc_offsets
[3] = {
66 -0.915687932f
, 0.347458499f
, -1.148145075f
70 project_src(nir_builder
*b
, nir_tex_instr
*tex
)
72 /* Find the projector in the srcs list, if present. */
73 int proj_index
= nir_tex_instr_src_index(tex
, nir_tex_src_projector
);
77 b
->cursor
= nir_before_instr(&tex
->instr
);
79 nir_ssa_def
*inv_proj
=
80 nir_frcp(b
, nir_ssa_for_src(b
, tex
->src
[proj_index
].src
, 1));
82 /* Walk through the sources projecting the arguments. */
83 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
84 switch (tex
->src
[i
].src_type
) {
85 case nir_tex_src_coord
:
86 case nir_tex_src_comparator
:
91 nir_ssa_def
*unprojected
=
92 nir_ssa_for_src(b
, tex
->src
[i
].src
, nir_tex_instr_src_size(tex
, i
));
93 nir_ssa_def
*projected
= nir_fmul(b
, unprojected
, inv_proj
);
95 /* Array indices don't get projected, so make an new vector with the
96 * coordinate's array index untouched.
98 if (tex
->is_array
&& tex
->src
[i
].src_type
== nir_tex_src_coord
) {
99 switch (tex
->coord_components
) {
101 projected
= nir_vec4(b
,
102 nir_channel(b
, projected
, 0),
103 nir_channel(b
, projected
, 1),
104 nir_channel(b
, projected
, 2),
105 nir_channel(b
, unprojected
, 3));
108 projected
= nir_vec3(b
,
109 nir_channel(b
, projected
, 0),
110 nir_channel(b
, projected
, 1),
111 nir_channel(b
, unprojected
, 2));
114 projected
= nir_vec2(b
,
115 nir_channel(b
, projected
, 0),
116 nir_channel(b
, unprojected
, 1));
119 unreachable("bad texture coord count for array");
124 nir_instr_rewrite_src(&tex
->instr
,
126 nir_src_for_ssa(projected
));
129 nir_tex_instr_remove_src(tex
, proj_index
);
134 lower_offset(nir_builder
*b
, nir_tex_instr
*tex
)
136 int offset_index
= nir_tex_instr_src_index(tex
, nir_tex_src_offset
);
137 if (offset_index
< 0)
140 int coord_index
= nir_tex_instr_src_index(tex
, nir_tex_src_coord
);
141 assert(coord_index
>= 0);
143 assert(tex
->src
[offset_index
].src
.is_ssa
);
144 assert(tex
->src
[coord_index
].src
.is_ssa
);
145 nir_ssa_def
*offset
= tex
->src
[offset_index
].src
.ssa
;
146 nir_ssa_def
*coord
= tex
->src
[coord_index
].src
.ssa
;
148 b
->cursor
= nir_before_instr(&tex
->instr
);
150 nir_ssa_def
*offset_coord
;
151 if (nir_tex_instr_src_type(tex
, coord_index
) == nir_type_float
) {
152 if (tex
->sampler_dim
== GLSL_SAMPLER_DIM_RECT
) {
153 offset_coord
= nir_fadd(b
, coord
, nir_i2f32(b
, offset
));
155 nir_ssa_def
*txs
= nir_i2f32(b
, nir_get_texture_size(b
, tex
));
156 nir_ssa_def
*scale
= nir_frcp(b
, txs
);
158 offset_coord
= nir_fadd(b
, coord
,
160 nir_i2f32(b
, offset
),
164 offset_coord
= nir_iadd(b
, coord
, offset
);
168 /* The offset is not applied to the array index */
169 if (tex
->coord_components
== 2) {
170 offset_coord
= nir_vec2(b
, nir_channel(b
, offset_coord
, 0),
171 nir_channel(b
, coord
, 1));
172 } else if (tex
->coord_components
== 3) {
173 offset_coord
= nir_vec3(b
, nir_channel(b
, offset_coord
, 0),
174 nir_channel(b
, offset_coord
, 1),
175 nir_channel(b
, coord
, 2));
177 unreachable("Invalid number of components");
181 nir_instr_rewrite_src(&tex
->instr
, &tex
->src
[coord_index
].src
,
182 nir_src_for_ssa(offset_coord
));
184 nir_tex_instr_remove_src(tex
, offset_index
);
190 lower_rect(nir_builder
*b
, nir_tex_instr
*tex
)
192 /* Set the sampler_dim to 2D here so that get_texture_size picks up the
193 * right dimensionality.
195 tex
->sampler_dim
= GLSL_SAMPLER_DIM_2D
;
197 nir_ssa_def
*txs
= nir_i2f32(b
, nir_get_texture_size(b
, tex
));
198 nir_ssa_def
*scale
= nir_frcp(b
, txs
);
200 /* Walk through the sources normalizing the requested arguments. */
201 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
202 if (tex
->src
[i
].src_type
!= nir_tex_src_coord
)
205 nir_ssa_def
*coords
=
206 nir_ssa_for_src(b
, tex
->src
[i
].src
, tex
->coord_components
);
207 nir_instr_rewrite_src(&tex
->instr
,
209 nir_src_for_ssa(nir_fmul(b
, coords
, scale
)));
214 lower_implicit_lod(nir_builder
*b
, nir_tex_instr
*tex
)
216 assert(tex
->op
== nir_texop_tex
|| tex
->op
== nir_texop_txb
);
217 assert(nir_tex_instr_src_index(tex
, nir_tex_src_lod
) < 0);
218 assert(nir_tex_instr_src_index(tex
, nir_tex_src_ddx
) < 0);
219 assert(nir_tex_instr_src_index(tex
, nir_tex_src_ddy
) < 0);
221 b
->cursor
= nir_before_instr(&tex
->instr
);
223 nir_ssa_def
*lod
= nir_get_texture_lod(b
, tex
);
225 int bias_idx
= nir_tex_instr_src_index(tex
, nir_tex_src_bias
);
227 /* If we have a bias, add it in */
228 lod
= nir_fadd(b
, lod
, nir_ssa_for_src(b
, tex
->src
[bias_idx
].src
, 1));
229 nir_tex_instr_remove_src(tex
, bias_idx
);
232 int min_lod_idx
= nir_tex_instr_src_index(tex
, nir_tex_src_min_lod
);
233 if (min_lod_idx
>= 0) {
234 /* If we have a minimum LOD, clamp LOD accordingly */
235 lod
= nir_fmax(b
, lod
, nir_ssa_for_src(b
, tex
->src
[min_lod_idx
].src
, 1));
236 nir_tex_instr_remove_src(tex
, min_lod_idx
);
239 nir_tex_instr_add_src(tex
, nir_tex_src_lod
, nir_src_for_ssa(lod
));
240 tex
->op
= nir_texop_txl
;
244 sample_plane(nir_builder
*b
, nir_tex_instr
*tex
, int plane
,
245 const nir_lower_tex_options
*options
)
247 assert(tex
->dest
.is_ssa
);
248 assert(nir_tex_instr_dest_size(tex
) == 4);
249 assert(nir_alu_type_get_base_type(tex
->dest_type
) == nir_type_float
);
250 assert(tex
->op
== nir_texop_tex
);
251 assert(tex
->coord_components
== 2);
253 nir_tex_instr
*plane_tex
=
254 nir_tex_instr_create(b
->shader
, tex
->num_srcs
+ 1);
255 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
256 nir_src_copy(&plane_tex
->src
[i
].src
, &tex
->src
[i
].src
, plane_tex
);
257 plane_tex
->src
[i
].src_type
= tex
->src
[i
].src_type
;
259 plane_tex
->src
[tex
->num_srcs
].src
= nir_src_for_ssa(nir_imm_int(b
, plane
));
260 plane_tex
->src
[tex
->num_srcs
].src_type
= nir_tex_src_plane
;
261 plane_tex
->op
= nir_texop_tex
;
262 plane_tex
->sampler_dim
= GLSL_SAMPLER_DIM_2D
;
263 plane_tex
->dest_type
= nir_type_float
;
264 plane_tex
->coord_components
= 2;
266 plane_tex
->texture_index
= tex
->texture_index
;
267 plane_tex
->sampler_index
= tex
->sampler_index
;
269 nir_ssa_dest_init(&plane_tex
->instr
, &plane_tex
->dest
, 4,
270 nir_dest_bit_size(tex
->dest
), NULL
);
272 nir_builder_instr_insert(b
, &plane_tex
->instr
);
274 /* If scaling_factor is set, return a scaled value. */
275 if (options
->scale_factors
[tex
->texture_index
])
276 return nir_fmul_imm(b
, &plane_tex
->dest
.ssa
,
277 options
->scale_factors
[tex
->texture_index
]);
279 return &plane_tex
->dest
.ssa
;
283 convert_yuv_to_rgb(nir_builder
*b
, nir_tex_instr
*tex
,
284 nir_ssa_def
*y
, nir_ssa_def
*u
, nir_ssa_def
*v
,
286 const nir_lower_tex_options
*options
)
291 assert((options
->bt709_external
& options
->bt2020_external
) == 0);
292 if (options
->bt709_external
& (1 << tex
->texture_index
)) {
293 m_vals
= bt709_csc_coeffs
;
294 offset_vals
= bt709_csc_offsets
;
295 } else if (options
->bt2020_external
& (1 << tex
->texture_index
)) {
296 m_vals
= bt2020_csc_coeffs
;
297 offset_vals
= bt2020_csc_offsets
;
299 m_vals
= bt601_csc_coeffs
;
300 offset_vals
= bt601_csc_offsets
;
303 nir_const_value m
[3][4] = {
304 { { .f32
= m_vals
[0] }, { .f32
= m_vals
[1] }, { .f32
= m_vals
[2] }, { .f32
= 0.0f
} },
305 { { .f32
= m_vals
[3] }, { .f32
= m_vals
[4] }, { .f32
= m_vals
[5] }, { .f32
= 0.0f
} },
306 { { .f32
= m_vals
[6] }, { .f32
= m_vals
[7] }, { .f32
= m_vals
[8] }, { .f32
= 0.0f
} },
308 unsigned bit_size
= nir_dest_bit_size(tex
->dest
);
310 nir_ssa_def
*offset
=
312 nir_imm_float(b
, offset_vals
[0]),
313 nir_imm_float(b
, offset_vals
[1]),
314 nir_imm_float(b
, offset_vals
[2]),
317 offset
= nir_f2fN(b
, offset
, bit_size
);
319 nir_ssa_def
*m0
= nir_f2fN(b
, nir_build_imm(b
, 4, 32, m
[0]), bit_size
);
320 nir_ssa_def
*m1
= nir_f2fN(b
, nir_build_imm(b
, 4, 32, m
[1]), bit_size
);
321 nir_ssa_def
*m2
= nir_f2fN(b
, nir_build_imm(b
, 4, 32, m
[2]), bit_size
);
323 nir_ssa_def
*result
=
324 nir_ffma(b
, y
, m0
, nir_ffma(b
, u
, m1
, nir_ffma(b
, v
, m2
, offset
)));
326 nir_ssa_def_rewrite_uses(&tex
->dest
.ssa
, nir_src_for_ssa(result
));
330 lower_y_uv_external(nir_builder
*b
, nir_tex_instr
*tex
,
331 const nir_lower_tex_options
*options
)
333 b
->cursor
= nir_after_instr(&tex
->instr
);
335 nir_ssa_def
*y
= sample_plane(b
, tex
, 0, options
);
336 nir_ssa_def
*uv
= sample_plane(b
, tex
, 1, options
);
338 convert_yuv_to_rgb(b
, tex
,
339 nir_channel(b
, y
, 0),
340 nir_channel(b
, uv
, 0),
341 nir_channel(b
, uv
, 1),
342 nir_imm_float(b
, 1.0f
),
347 lower_y_u_v_external(nir_builder
*b
, nir_tex_instr
*tex
,
348 const nir_lower_tex_options
*options
)
350 b
->cursor
= nir_after_instr(&tex
->instr
);
352 nir_ssa_def
*y
= sample_plane(b
, tex
, 0, options
);
353 nir_ssa_def
*u
= sample_plane(b
, tex
, 1, options
);
354 nir_ssa_def
*v
= sample_plane(b
, tex
, 2, options
);
356 convert_yuv_to_rgb(b
, tex
,
357 nir_channel(b
, y
, 0),
358 nir_channel(b
, u
, 0),
359 nir_channel(b
, v
, 0),
360 nir_imm_float(b
, 1.0f
),
365 lower_yx_xuxv_external(nir_builder
*b
, nir_tex_instr
*tex
,
366 const nir_lower_tex_options
*options
)
368 b
->cursor
= nir_after_instr(&tex
->instr
);
370 nir_ssa_def
*y
= sample_plane(b
, tex
, 0, options
);
371 nir_ssa_def
*xuxv
= sample_plane(b
, tex
, 1, options
);
373 convert_yuv_to_rgb(b
, tex
,
374 nir_channel(b
, y
, 0),
375 nir_channel(b
, xuxv
, 1),
376 nir_channel(b
, xuxv
, 3),
377 nir_imm_float(b
, 1.0f
),
382 lower_xy_uxvx_external(nir_builder
*b
, nir_tex_instr
*tex
,
383 const nir_lower_tex_options
*options
)
385 b
->cursor
= nir_after_instr(&tex
->instr
);
387 nir_ssa_def
*y
= sample_plane(b
, tex
, 0, options
);
388 nir_ssa_def
*uxvx
= sample_plane(b
, tex
, 1, options
);
390 convert_yuv_to_rgb(b
, tex
,
391 nir_channel(b
, y
, 1),
392 nir_channel(b
, uxvx
, 0),
393 nir_channel(b
, uxvx
, 2),
394 nir_imm_float(b
, 1.0f
),
399 lower_ayuv_external(nir_builder
*b
, nir_tex_instr
*tex
,
400 const nir_lower_tex_options
*options
)
402 b
->cursor
= nir_after_instr(&tex
->instr
);
404 nir_ssa_def
*ayuv
= sample_plane(b
, tex
, 0, options
);
406 convert_yuv_to_rgb(b
, tex
,
407 nir_channel(b
, ayuv
, 2),
408 nir_channel(b
, ayuv
, 1),
409 nir_channel(b
, ayuv
, 0),
410 nir_channel(b
, ayuv
, 3),
415 lower_xyuv_external(nir_builder
*b
, nir_tex_instr
*tex
,
416 const nir_lower_tex_options
*options
)
418 b
->cursor
= nir_after_instr(&tex
->instr
);
420 nir_ssa_def
*xyuv
= sample_plane(b
, tex
, 0, options
);
422 convert_yuv_to_rgb(b
, tex
,
423 nir_channel(b
, xyuv
, 2),
424 nir_channel(b
, xyuv
, 1),
425 nir_channel(b
, xyuv
, 0),
426 nir_imm_float(b
, 1.0f
),
431 * Converts a nir_texop_txd instruction to nir_texop_txl with the given lod
432 * computed from the gradients.
435 replace_gradient_with_lod(nir_builder
*b
, nir_ssa_def
*lod
, nir_tex_instr
*tex
)
437 assert(tex
->op
== nir_texop_txd
);
439 nir_tex_instr_remove_src(tex
, nir_tex_instr_src_index(tex
, nir_tex_src_ddx
));
440 nir_tex_instr_remove_src(tex
, nir_tex_instr_src_index(tex
, nir_tex_src_ddy
));
442 int min_lod_idx
= nir_tex_instr_src_index(tex
, nir_tex_src_min_lod
);
443 if (min_lod_idx
>= 0) {
444 /* If we have a minimum LOD, clamp LOD accordingly */
445 lod
= nir_fmax(b
, lod
, nir_ssa_for_src(b
, tex
->src
[min_lod_idx
].src
, 1));
446 nir_tex_instr_remove_src(tex
, min_lod_idx
);
449 nir_tex_instr_add_src(tex
, nir_tex_src_lod
, nir_src_for_ssa(lod
));
450 tex
->op
= nir_texop_txl
;
454 lower_gradient_cube_map(nir_builder
*b
, nir_tex_instr
*tex
)
456 assert(tex
->sampler_dim
== GLSL_SAMPLER_DIM_CUBE
);
457 assert(tex
->op
== nir_texop_txd
);
458 assert(tex
->dest
.is_ssa
);
460 /* Use textureSize() to get the width and height of LOD 0 */
461 nir_ssa_def
*size
= nir_i2f32(b
, nir_get_texture_size(b
, tex
));
463 /* Cubemap texture lookups first generate a texture coordinate normalized
464 * to [-1, 1] on the appropiate face. The appropiate face is determined
465 * by which component has largest magnitude and its sign. The texture
466 * coordinate is the quotient of the remaining texture coordinates against
467 * that absolute value of the component of largest magnitude. This
468 * division requires that the computing of the derivative of the texel
469 * coordinate must use the quotient rule. The high level GLSL code is as
474 * vec3 abs_p, Q, dQdx, dQdy;
475 * abs_p = abs(ir->coordinate);
476 * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
477 * Q = ir->coordinate.yzx;
478 * dQdx = ir->lod_info.grad.dPdx.yzx;
479 * dQdy = ir->lod_info.grad.dPdy.yzx;
481 * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
482 * Q = ir->coordinate.xzy;
483 * dQdx = ir->lod_info.grad.dPdx.xzy;
484 * dQdy = ir->lod_info.grad.dPdy.xzy;
486 * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
487 * Q = ir->coordinate;
488 * dQdx = ir->lod_info.grad.dPdx;
489 * dQdy = ir->lod_info.grad.dPdy;
492 * Step 2: use quotient rule to compute derivative. The normalized to
493 * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
494 * only concerned with the magnitudes of the derivatives whose values are
495 * not affected by the sign. We drop the sign from the computation.
501 * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
502 * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
504 * Step 3: compute LOD. At this point we have the derivatives of the
505 * texture coordinates normalized to [-1,1]. We take the LOD to be
506 * result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
507 * = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
508 * = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
509 * = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
510 * = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
511 * where L is the dimension of the cubemap. The code is:
514 * M = max(dot(dx, dx), dot(dy, dy));
515 * L = textureSize(sampler, 0).x;
516 * result = -1.0 + 0.5 * log2(L * L * M);
521 tex
->src
[nir_tex_instr_src_index(tex
, nir_tex_src_coord
)].src
.ssa
;
523 /* unmodified dPdx, dPdy values */
525 tex
->src
[nir_tex_instr_src_index(tex
, nir_tex_src_ddx
)].src
.ssa
;
527 tex
->src
[nir_tex_instr_src_index(tex
, nir_tex_src_ddy
)].src
.ssa
;
529 nir_ssa_def
*abs_p
= nir_fabs(b
, p
);
530 nir_ssa_def
*abs_p_x
= nir_channel(b
, abs_p
, 0);
531 nir_ssa_def
*abs_p_y
= nir_channel(b
, abs_p
, 1);
532 nir_ssa_def
*abs_p_z
= nir_channel(b
, abs_p
, 2);
534 /* 1. compute selector */
535 nir_ssa_def
*Q
, *dQdx
, *dQdy
;
537 nir_ssa_def
*cond_z
= nir_fge(b
, abs_p_z
, nir_fmax(b
, abs_p_x
, abs_p_y
));
538 nir_ssa_def
*cond_y
= nir_fge(b
, abs_p_y
, nir_fmax(b
, abs_p_x
, abs_p_z
));
540 unsigned yzx
[3] = { 1, 2, 0 };
541 unsigned xzy
[3] = { 0, 2, 1 };
543 Q
= nir_bcsel(b
, cond_z
,
546 nir_swizzle(b
, p
, xzy
, 3),
547 nir_swizzle(b
, p
, yzx
, 3)));
549 dQdx
= nir_bcsel(b
, cond_z
,
552 nir_swizzle(b
, dPdx
, xzy
, 3),
553 nir_swizzle(b
, dPdx
, yzx
, 3)));
555 dQdy
= nir_bcsel(b
, cond_z
,
558 nir_swizzle(b
, dPdy
, xzy
, 3),
559 nir_swizzle(b
, dPdy
, yzx
, 3)));
561 /* 2. quotient rule */
563 /* tmp = Q.xy * recip;
564 * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
565 * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
567 nir_ssa_def
*rcp_Q_z
= nir_frcp(b
, nir_channel(b
, Q
, 2));
569 nir_ssa_def
*Q_xy
= nir_channels(b
, Q
, 0x3);
570 nir_ssa_def
*tmp
= nir_fmul(b
, Q_xy
, rcp_Q_z
);
572 nir_ssa_def
*dQdx_xy
= nir_channels(b
, dQdx
, 0x3);
573 nir_ssa_def
*dQdx_z
= nir_channel(b
, dQdx
, 2);
575 nir_fmul(b
, rcp_Q_z
, nir_fsub(b
, dQdx_xy
, nir_fmul(b
, tmp
, dQdx_z
)));
577 nir_ssa_def
*dQdy_xy
= nir_channels(b
, dQdy
, 0x3);
578 nir_ssa_def
*dQdy_z
= nir_channel(b
, dQdy
, 2);
580 nir_fmul(b
, rcp_Q_z
, nir_fsub(b
, dQdy_xy
, nir_fmul(b
, tmp
, dQdy_z
)));
582 /* M = max(dot(dx, dx), dot(dy, dy)); */
583 nir_ssa_def
*M
= nir_fmax(b
, nir_fdot(b
, dx
, dx
), nir_fdot(b
, dy
, dy
));
585 /* size has textureSize() of LOD 0 */
586 nir_ssa_def
*L
= nir_channel(b
, size
, 0);
588 /* lod = -1.0 + 0.5 * log2(L * L * M); */
591 nir_imm_float(b
, -1.0f
),
593 nir_imm_float(b
, 0.5f
),
594 nir_flog2(b
, nir_fmul(b
, L
, nir_fmul(b
, L
, M
)))));
596 /* 3. Replace the gradient instruction with an equivalent lod instruction */
597 replace_gradient_with_lod(b
, lod
, tex
);
601 lower_gradient(nir_builder
*b
, nir_tex_instr
*tex
)
603 /* Cubes are more complicated and have their own function */
604 if (tex
->sampler_dim
== GLSL_SAMPLER_DIM_CUBE
) {
605 lower_gradient_cube_map(b
, tex
);
609 assert(tex
->sampler_dim
!= GLSL_SAMPLER_DIM_CUBE
);
610 assert(tex
->op
== nir_texop_txd
);
611 assert(tex
->dest
.is_ssa
);
613 /* Use textureSize() to get the width and height of LOD 0 */
614 unsigned component_mask
;
615 switch (tex
->sampler_dim
) {
616 case GLSL_SAMPLER_DIM_3D
:
619 case GLSL_SAMPLER_DIM_1D
:
628 nir_channels(b
, nir_i2f32(b
, nir_get_texture_size(b
, tex
)),
631 /* Scale the gradients by width and height. Effectively, the incoming
632 * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
633 * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
636 tex
->src
[nir_tex_instr_src_index(tex
, nir_tex_src_ddx
)].src
.ssa
;
638 tex
->src
[nir_tex_instr_src_index(tex
, nir_tex_src_ddy
)].src
.ssa
;
640 nir_ssa_def
*dPdx
= nir_fmul(b
, ddx
, size
);
641 nir_ssa_def
*dPdy
= nir_fmul(b
, ddy
, size
);
644 if (dPdx
->num_components
== 1) {
645 rho
= nir_fmax(b
, nir_fabs(b
, dPdx
), nir_fabs(b
, dPdy
));
648 nir_fsqrt(b
, nir_fdot(b
, dPdx
, dPdx
)),
649 nir_fsqrt(b
, nir_fdot(b
, dPdy
, dPdy
)));
652 /* lod = log2(rho). We're ignoring GL state biases for now. */
653 nir_ssa_def
*lod
= nir_flog2(b
, rho
);
655 /* Replace the gradient instruction with an equivalent lod instruction */
656 replace_gradient_with_lod(b
, lod
, tex
);
660 saturate_src(nir_builder
*b
, nir_tex_instr
*tex
, unsigned sat_mask
)
662 b
->cursor
= nir_before_instr(&tex
->instr
);
664 /* Walk through the sources saturating the requested arguments. */
665 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
666 if (tex
->src
[i
].src_type
!= nir_tex_src_coord
)
670 nir_ssa_for_src(b
, tex
->src
[i
].src
, tex
->coord_components
);
672 /* split src into components: */
673 nir_ssa_def
*comp
[4];
675 assume(tex
->coord_components
>= 1);
677 for (unsigned j
= 0; j
< tex
->coord_components
; j
++)
678 comp
[j
] = nir_channel(b
, src
, j
);
680 /* clamp requested components, array index does not get clamped: */
681 unsigned ncomp
= tex
->coord_components
;
685 for (unsigned j
= 0; j
< ncomp
; j
++) {
686 if ((1 << j
) & sat_mask
) {
687 if (tex
->sampler_dim
== GLSL_SAMPLER_DIM_RECT
) {
688 /* non-normalized texture coords, so clamp to texture
689 * size rather than [0.0, 1.0]
691 nir_ssa_def
*txs
= nir_i2f32(b
, nir_get_texture_size(b
, tex
));
692 comp
[j
] = nir_fmax(b
, comp
[j
], nir_imm_float(b
, 0.0));
693 comp
[j
] = nir_fmin(b
, comp
[j
], nir_channel(b
, txs
, j
));
695 comp
[j
] = nir_fsat(b
, comp
[j
]);
700 /* and move the result back into a single vecN: */
701 src
= nir_vec(b
, comp
, tex
->coord_components
);
703 nir_instr_rewrite_src(&tex
->instr
,
705 nir_src_for_ssa(src
));
710 get_zero_or_one(nir_builder
*b
, nir_alu_type type
, uint8_t swizzle_val
)
712 nir_const_value v
[4];
714 memset(&v
, 0, sizeof(v
));
716 if (swizzle_val
== 4) {
717 v
[0].u32
= v
[1].u32
= v
[2].u32
= v
[3].u32
= 0;
719 assert(swizzle_val
== 5);
720 if (type
== nir_type_float
)
721 v
[0].f32
= v
[1].f32
= v
[2].f32
= v
[3].f32
= 1.0;
723 v
[0].u32
= v
[1].u32
= v
[2].u32
= v
[3].u32
= 1;
726 return nir_build_imm(b
, 4, 32, v
);
730 swizzle_tg4_broadcom(nir_builder
*b
, nir_tex_instr
*tex
)
732 assert(tex
->dest
.is_ssa
);
734 b
->cursor
= nir_after_instr(&tex
->instr
);
736 assert(nir_tex_instr_dest_size(tex
) == 4);
737 unsigned swiz
[4] = { 2, 3, 1, 0 };
738 nir_ssa_def
*swizzled
= nir_swizzle(b
, &tex
->dest
.ssa
, swiz
, 4);
740 nir_ssa_def_rewrite_uses_after(&tex
->dest
.ssa
, nir_src_for_ssa(swizzled
),
741 swizzled
->parent_instr
);
745 swizzle_result(nir_builder
*b
, nir_tex_instr
*tex
, const uint8_t swizzle
[4])
747 assert(tex
->dest
.is_ssa
);
749 b
->cursor
= nir_after_instr(&tex
->instr
);
751 nir_ssa_def
*swizzled
;
752 if (tex
->op
== nir_texop_tg4
) {
753 if (swizzle
[tex
->component
] < 4) {
754 /* This one's easy */
755 tex
->component
= swizzle
[tex
->component
];
758 swizzled
= get_zero_or_one(b
, tex
->dest_type
, swizzle
[tex
->component
]);
761 assert(nir_tex_instr_dest_size(tex
) == 4);
762 if (swizzle
[0] < 4 && swizzle
[1] < 4 &&
763 swizzle
[2] < 4 && swizzle
[3] < 4) {
764 unsigned swiz
[4] = { swizzle
[0], swizzle
[1], swizzle
[2], swizzle
[3] };
765 /* We have no 0s or 1s, just emit a swizzling MOV */
766 swizzled
= nir_swizzle(b
, &tex
->dest
.ssa
, swiz
, 4);
768 nir_ssa_def
*srcs
[4];
769 for (unsigned i
= 0; i
< 4; i
++) {
770 if (swizzle
[i
] < 4) {
771 srcs
[i
] = nir_channel(b
, &tex
->dest
.ssa
, swizzle
[i
]);
773 srcs
[i
] = get_zero_or_one(b
, tex
->dest_type
, swizzle
[i
]);
776 swizzled
= nir_vec(b
, srcs
, 4);
780 nir_ssa_def_rewrite_uses_after(&tex
->dest
.ssa
, nir_src_for_ssa(swizzled
),
781 swizzled
->parent_instr
);
785 linearize_srgb_result(nir_builder
*b
, nir_tex_instr
*tex
)
787 assert(tex
->dest
.is_ssa
);
788 assert(nir_tex_instr_dest_size(tex
) == 4);
789 assert(nir_alu_type_get_base_type(tex
->dest_type
) == nir_type_float
);
791 b
->cursor
= nir_after_instr(&tex
->instr
);
794 nir_format_srgb_to_linear(b
, nir_channels(b
, &tex
->dest
.ssa
, 0x7));
796 /* alpha is untouched: */
797 nir_ssa_def
*result
= nir_vec4(b
,
798 nir_channel(b
, rgb
, 0),
799 nir_channel(b
, rgb
, 1),
800 nir_channel(b
, rgb
, 2),
801 nir_channel(b
, &tex
->dest
.ssa
, 3));
803 nir_ssa_def_rewrite_uses_after(&tex
->dest
.ssa
, nir_src_for_ssa(result
),
804 result
->parent_instr
);
808 * Lowers texture instructions from giving a vec4 result to a vec2 of f16,
809 * i16, or u16, or a single unorm4x8 value.
811 * Note that we don't change the destination num_components, because
812 * nir_tex_instr_dest_size() will still return 4. The driver is just expected
813 * to not store the other channels, given that nothing at the NIR level will
817 lower_tex_packing(nir_builder
*b
, nir_tex_instr
*tex
,
818 const nir_lower_tex_options
*options
)
820 nir_ssa_def
*color
= &tex
->dest
.ssa
;
822 b
->cursor
= nir_after_instr(&tex
->instr
);
824 switch (options
->lower_tex_packing
[tex
->sampler_index
]) {
825 case nir_lower_tex_packing_none
:
828 case nir_lower_tex_packing_16
: {
829 static const unsigned bits
[4] = {16, 16, 16, 16};
831 switch (nir_alu_type_get_base_type(tex
->dest_type
)) {
833 switch (nir_tex_instr_dest_size(tex
)) {
835 assert(tex
->is_shadow
&& tex
->is_new_style_shadow
);
836 color
= nir_unpack_half_2x16_split_x(b
, nir_channel(b
, color
, 0));
839 nir_ssa_def
*rg
= nir_channel(b
, color
, 0);
841 nir_unpack_half_2x16_split_x(b
, rg
),
842 nir_unpack_half_2x16_split_y(b
, rg
));
846 nir_ssa_def
*rg
= nir_channel(b
, color
, 0);
847 nir_ssa_def
*ba
= nir_channel(b
, color
, 1);
849 nir_unpack_half_2x16_split_x(b
, rg
),
850 nir_unpack_half_2x16_split_y(b
, rg
),
851 nir_unpack_half_2x16_split_x(b
, ba
),
852 nir_unpack_half_2x16_split_y(b
, ba
));
856 unreachable("wrong dest_size");
861 color
= nir_format_unpack_sint(b
, color
, bits
, 4);
865 color
= nir_format_unpack_uint(b
, color
, bits
, 4);
869 unreachable("unknown base type");
874 case nir_lower_tex_packing_8
:
875 assert(nir_alu_type_get_base_type(tex
->dest_type
) == nir_type_float
);
876 color
= nir_unpack_unorm_4x8(b
, nir_channel(b
, color
, 0));
880 nir_ssa_def_rewrite_uses_after(&tex
->dest
.ssa
, nir_src_for_ssa(color
),
881 color
->parent_instr
);
885 sampler_index_lt(nir_tex_instr
*tex
, unsigned max
)
887 assert(nir_tex_instr_src_index(tex
, nir_tex_src_sampler_deref
) == -1);
889 unsigned sampler_index
= tex
->sampler_index
;
891 int sampler_offset_idx
=
892 nir_tex_instr_src_index(tex
, nir_tex_src_sampler_offset
);
893 if (sampler_offset_idx
>= 0) {
894 if (!nir_src_is_const(tex
->src
[sampler_offset_idx
].src
))
897 sampler_index
+= nir_src_as_uint(tex
->src
[sampler_offset_idx
].src
);
900 return sampler_index
< max
;
904 lower_tg4_offsets(nir_builder
*b
, nir_tex_instr
*tex
)
906 assert(tex
->op
== nir_texop_tg4
);
907 assert(nir_tex_instr_has_explicit_tg4_offsets(tex
));
908 assert(nir_tex_instr_src_index(tex
, nir_tex_src_offset
) == -1);
910 b
->cursor
= nir_after_instr(&tex
->instr
);
912 nir_ssa_def
*dest
[4];
913 for (unsigned i
= 0; i
< 4; ++i
) {
914 nir_tex_instr
*tex_copy
= nir_tex_instr_create(b
->shader
, tex
->num_srcs
+ 1);
915 tex_copy
->op
= tex
->op
;
916 tex_copy
->coord_components
= tex
->coord_components
;
917 tex_copy
->sampler_dim
= tex
->sampler_dim
;
918 tex_copy
->is_array
= tex
->is_array
;
919 tex_copy
->is_shadow
= tex
->is_shadow
;
920 tex_copy
->is_new_style_shadow
= tex
->is_new_style_shadow
;
921 tex_copy
->component
= tex
->component
;
922 tex_copy
->dest_type
= tex
->dest_type
;
924 for (unsigned j
= 0; j
< tex
->num_srcs
; ++j
) {
925 nir_src_copy(&tex_copy
->src
[j
].src
, &tex
->src
[j
].src
, tex_copy
);
926 tex_copy
->src
[j
].src_type
= tex
->src
[j
].src_type
;
930 src
.src
= nir_src_for_ssa(nir_imm_ivec2(b
, tex
->tg4_offsets
[i
][0],
931 tex
->tg4_offsets
[i
][1]));
932 src
.src_type
= nir_tex_src_offset
;
933 tex_copy
->src
[tex_copy
->num_srcs
- 1] = src
;
935 nir_ssa_dest_init(&tex_copy
->instr
, &tex_copy
->dest
,
936 nir_tex_instr_dest_size(tex
), 32, NULL
);
938 nir_builder_instr_insert(b
, &tex_copy
->instr
);
940 dest
[i
] = nir_channel(b
, &tex_copy
->dest
.ssa
, 3);
943 nir_ssa_def
*res
= nir_vec4(b
, dest
[0], dest
[1], dest
[2], dest
[3]);
944 nir_ssa_def_rewrite_uses(&tex
->dest
.ssa
, nir_src_for_ssa(res
));
945 nir_instr_remove(&tex
->instr
);
951 nir_lower_txs_lod(nir_builder
*b
, nir_tex_instr
*tex
)
953 int lod_idx
= nir_tex_instr_src_index(tex
, nir_tex_src_lod
);
955 (nir_src_is_const(tex
->src
[lod_idx
].src
) &&
956 nir_src_as_int(tex
->src
[lod_idx
].src
) == 0))
959 unsigned dest_size
= nir_tex_instr_dest_size(tex
);
961 b
->cursor
= nir_before_instr(&tex
->instr
);
962 nir_ssa_def
*lod
= nir_ssa_for_src(b
, tex
->src
[lod_idx
].src
, 1);
964 /* Replace the non-0-LOD in the initial TXS operation by a 0-LOD. */
965 nir_instr_rewrite_src(&tex
->instr
, &tex
->src
[lod_idx
].src
,
966 nir_src_for_ssa(nir_imm_int(b
, 0)));
968 /* TXS(LOD) = max(TXS(0) >> LOD, 1) */
969 b
->cursor
= nir_after_instr(&tex
->instr
);
970 nir_ssa_def
*minified
= nir_imax(b
, nir_ushr(b
, &tex
->dest
.ssa
, lod
),
973 /* Make sure the component encoding the array size (if any) is not
977 nir_ssa_def
*comp
[3];
979 assert(dest_size
<= ARRAY_SIZE(comp
));
980 for (unsigned i
= 0; i
< dest_size
- 1; i
++)
981 comp
[i
] = nir_channel(b
, minified
, i
);
983 comp
[dest_size
- 1] = nir_channel(b
, &tex
->dest
.ssa
, dest_size
- 1);
984 minified
= nir_vec(b
, comp
, dest_size
);
987 nir_ssa_def_rewrite_uses_after(&tex
->dest
.ssa
, nir_src_for_ssa(minified
),
988 minified
->parent_instr
);
993 nir_lower_tex_block(nir_block
*block
, nir_builder
*b
,
994 const nir_lower_tex_options
*options
)
996 bool progress
= false;
998 nir_foreach_instr_safe(instr
, block
) {
999 if (instr
->type
!= nir_instr_type_tex
)
1002 nir_tex_instr
*tex
= nir_instr_as_tex(instr
);
1003 bool lower_txp
= !!(options
->lower_txp
& (1 << tex
->sampler_dim
));
1005 /* mask of src coords to saturate (clamp): */
1006 unsigned sat_mask
= 0;
1008 if ((1 << tex
->sampler_index
) & options
->saturate_r
)
1009 sat_mask
|= (1 << 2); /* .z */
1010 if ((1 << tex
->sampler_index
) & options
->saturate_t
)
1011 sat_mask
|= (1 << 1); /* .y */
1012 if ((1 << tex
->sampler_index
) & options
->saturate_s
)
1013 sat_mask
|= (1 << 0); /* .x */
1015 /* If we are clamping any coords, we must lower projector first
1016 * as clamping happens *after* projection:
1018 if (lower_txp
|| sat_mask
) {
1019 progress
|= project_src(b
, tex
);
1022 if ((tex
->op
== nir_texop_txf
&& options
->lower_txf_offset
) ||
1023 (sat_mask
&& nir_tex_instr_src_index(tex
, nir_tex_src_coord
) >= 0) ||
1024 (tex
->sampler_dim
== GLSL_SAMPLER_DIM_RECT
&&
1025 options
->lower_rect_offset
)) {
1026 progress
= lower_offset(b
, tex
) || progress
;
1029 if ((tex
->sampler_dim
== GLSL_SAMPLER_DIM_RECT
) && options
->lower_rect
&&
1030 tex
->op
!= nir_texop_txf
&& !nir_tex_instr_is_query(tex
)) {
1035 if ((1 << tex
->texture_index
) & options
->lower_y_uv_external
) {
1036 lower_y_uv_external(b
, tex
, options
);
1040 if ((1 << tex
->texture_index
) & options
->lower_y_u_v_external
) {
1041 lower_y_u_v_external(b
, tex
, options
);
1045 if ((1 << tex
->texture_index
) & options
->lower_yx_xuxv_external
) {
1046 lower_yx_xuxv_external(b
, tex
, options
);
1050 if ((1 << tex
->texture_index
) & options
->lower_xy_uxvx_external
) {
1051 lower_xy_uxvx_external(b
, tex
, options
);
1055 if ((1 << tex
->texture_index
) & options
->lower_ayuv_external
) {
1056 lower_ayuv_external(b
, tex
, options
);
1060 if ((1 << tex
->texture_index
) & options
->lower_xyuv_external
) {
1061 lower_xyuv_external(b
, tex
, options
);
1066 saturate_src(b
, tex
, sat_mask
);
1070 if (tex
->op
== nir_texop_tg4
&& options
->lower_tg4_broadcom_swizzle
) {
1071 swizzle_tg4_broadcom(b
, tex
);
1075 if (((1 << tex
->texture_index
) & options
->swizzle_result
) &&
1076 !nir_tex_instr_is_query(tex
) &&
1077 !(tex
->is_shadow
&& tex
->is_new_style_shadow
)) {
1078 swizzle_result(b
, tex
, options
->swizzles
[tex
->texture_index
]);
1082 /* should be after swizzle so we know which channels are rgb: */
1083 if (((1 << tex
->texture_index
) & options
->lower_srgb
) &&
1084 !nir_tex_instr_is_query(tex
) && !tex
->is_shadow
) {
1085 linearize_srgb_result(b
, tex
);
1089 const bool has_min_lod
=
1090 nir_tex_instr_src_index(tex
, nir_tex_src_min_lod
) >= 0;
1091 const bool has_offset
=
1092 nir_tex_instr_src_index(tex
, nir_tex_src_offset
) >= 0;
1094 if (tex
->op
== nir_texop_txb
&& tex
->is_shadow
&& has_min_lod
&&
1095 options
->lower_txb_shadow_clamp
) {
1096 lower_implicit_lod(b
, tex
);
1100 if (options
->lower_tex_packing
[tex
->sampler_index
] !=
1101 nir_lower_tex_packing_none
&&
1102 tex
->op
!= nir_texop_txs
&&
1103 tex
->op
!= nir_texop_query_levels
&&
1104 tex
->op
!= nir_texop_texture_samples
) {
1105 lower_tex_packing(b
, tex
, options
);
1109 if (tex
->op
== nir_texop_txd
&&
1110 (options
->lower_txd
||
1111 (options
->lower_txd_shadow
&& tex
->is_shadow
) ||
1112 (options
->lower_txd_shadow_clamp
&& tex
->is_shadow
&& has_min_lod
) ||
1113 (options
->lower_txd_offset_clamp
&& has_offset
&& has_min_lod
) ||
1114 (options
->lower_txd_clamp_bindless_sampler
&& has_min_lod
&&
1115 nir_tex_instr_src_index(tex
, nir_tex_src_sampler_handle
) != -1) ||
1116 (options
->lower_txd_clamp_if_sampler_index_not_lt_16
&&
1117 has_min_lod
&& !sampler_index_lt(tex
, 16)) ||
1118 (options
->lower_txd_cube_map
&&
1119 tex
->sampler_dim
== GLSL_SAMPLER_DIM_CUBE
) ||
1120 (options
->lower_txd_3d
&&
1121 tex
->sampler_dim
== GLSL_SAMPLER_DIM_3D
))) {
1122 lower_gradient(b
, tex
);
1127 bool shader_supports_implicit_lod
=
1128 b
->shader
->info
.stage
== MESA_SHADER_FRAGMENT
||
1129 (b
->shader
->info
.stage
== MESA_SHADER_COMPUTE
&&
1130 b
->shader
->info
.cs
.derivative_group
!= DERIVATIVE_GROUP_NONE
);
1132 /* TXF, TXS and TXL require a LOD but not everything we implement using those
1133 * three opcodes provides one. Provide a default LOD of 0.
1135 if ((nir_tex_instr_src_index(tex
, nir_tex_src_lod
) == -1) &&
1136 (tex
->op
== nir_texop_txf
|| tex
->op
== nir_texop_txs
||
1137 tex
->op
== nir_texop_txl
|| tex
->op
== nir_texop_query_levels
||
1138 (tex
->op
== nir_texop_tex
&& !shader_supports_implicit_lod
))) {
1139 b
->cursor
= nir_before_instr(&tex
->instr
);
1140 nir_tex_instr_add_src(tex
, nir_tex_src_lod
, nir_src_for_ssa(nir_imm_int(b
, 0)));
1141 if (tex
->op
== nir_texop_tex
&& options
->lower_tex_without_implicit_lod
)
1142 tex
->op
= nir_texop_txl
;
1147 if (options
->lower_txs_lod
&& tex
->op
== nir_texop_txs
) {
1148 progress
|= nir_lower_txs_lod(b
, tex
);
1152 /* has to happen after all the other lowerings as the original tg4 gets
1153 * replaced by 4 tg4 instructions.
1155 if (tex
->op
== nir_texop_tg4
&&
1156 nir_tex_instr_has_explicit_tg4_offsets(tex
) &&
1157 options
->lower_tg4_offsets
) {
1158 progress
|= lower_tg4_offsets(b
, tex
);
1167 nir_lower_tex_impl(nir_function_impl
*impl
,
1168 const nir_lower_tex_options
*options
)
1170 bool progress
= false;
1171 nir_builder builder
;
1172 nir_builder_init(&builder
, impl
);
1174 nir_foreach_block(block
, impl
) {
1175 progress
|= nir_lower_tex_block(block
, &builder
, options
);
1178 nir_metadata_preserve(impl
, nir_metadata_block_index
|
1179 nir_metadata_dominance
);
1184 nir_lower_tex(nir_shader
*shader
, const nir_lower_tex_options
*options
)
1186 bool progress
= false;
1188 nir_foreach_function(function
, shader
) {
1190 progress
|= nir_lower_tex_impl(function
->impl
, options
);