2 * Copyright © 2015 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * This lowering pass supports (as configured via nir_lower_tex_options)
26 * various texture related conversions:
27 * + texture projector lowering: converts the coordinate division for
28 * texture projection to be done in ALU instructions instead of
29 * asking the texture operation to do so.
30 * + lowering RECT: converts the un-normalized RECT texture coordinates
31 * to normalized coordinates with txs plus ALU instructions
32 * + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
33 * inserts instructions to clamp specified coordinates to [0.0, 1.0].
34 * Note that this automatically triggers texture projector lowering if
35 * needed, since clamping must happen after projector lowering.
39 #include "nir_builder.h"
42 project_src(nir_builder
*b
, nir_tex_instr
*tex
)
44 /* Find the projector in the srcs list, if present. */
45 int proj_index
= nir_tex_instr_src_index(tex
, nir_tex_src_projector
);
49 b
->cursor
= nir_before_instr(&tex
->instr
);
51 nir_ssa_def
*inv_proj
=
52 nir_frcp(b
, nir_ssa_for_src(b
, tex
->src
[proj_index
].src
, 1));
54 /* Walk through the sources projecting the arguments. */
55 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
56 switch (tex
->src
[i
].src_type
) {
57 case nir_tex_src_coord
:
58 case nir_tex_src_comparator
:
63 nir_ssa_def
*unprojected
=
64 nir_ssa_for_src(b
, tex
->src
[i
].src
, nir_tex_instr_src_size(tex
, i
));
65 nir_ssa_def
*projected
= nir_fmul(b
, unprojected
, inv_proj
);
67 /* Array indices don't get projected, so make an new vector with the
68 * coordinate's array index untouched.
70 if (tex
->is_array
&& tex
->src
[i
].src_type
== nir_tex_src_coord
) {
71 switch (tex
->coord_components
) {
73 projected
= nir_vec4(b
,
74 nir_channel(b
, projected
, 0),
75 nir_channel(b
, projected
, 1),
76 nir_channel(b
, projected
, 2),
77 nir_channel(b
, unprojected
, 3));
80 projected
= nir_vec3(b
,
81 nir_channel(b
, projected
, 0),
82 nir_channel(b
, projected
, 1),
83 nir_channel(b
, unprojected
, 2));
86 projected
= nir_vec2(b
,
87 nir_channel(b
, projected
, 0),
88 nir_channel(b
, unprojected
, 1));
91 unreachable("bad texture coord count for array");
96 nir_instr_rewrite_src(&tex
->instr
,
98 nir_src_for_ssa(projected
));
101 nir_tex_instr_remove_src(tex
, proj_index
);
105 get_texture_size(nir_builder
*b
, nir_tex_instr
*tex
)
107 b
->cursor
= nir_before_instr(&tex
->instr
);
111 txs
= nir_tex_instr_create(b
->shader
, 1);
112 txs
->op
= nir_texop_txs
;
113 txs
->sampler_dim
= tex
->sampler_dim
;
114 txs
->is_array
= tex
->is_array
;
115 txs
->is_shadow
= tex
->is_shadow
;
116 txs
->is_new_style_shadow
= tex
->is_new_style_shadow
;
117 txs
->texture_index
= tex
->texture_index
;
118 txs
->texture
= nir_deref_var_clone(tex
->texture
, txs
);
119 txs
->sampler_index
= tex
->sampler_index
;
120 txs
->sampler
= nir_deref_var_clone(tex
->sampler
, txs
);
121 txs
->dest_type
= nir_type_int
;
123 /* only single src, the lod: */
124 txs
->src
[0].src
= nir_src_for_ssa(nir_imm_int(b
, 0));
125 txs
->src
[0].src_type
= nir_tex_src_lod
;
127 nir_ssa_dest_init(&txs
->instr
, &txs
->dest
,
128 nir_tex_instr_dest_size(txs
), 32, NULL
);
129 nir_builder_instr_insert(b
, &txs
->instr
);
131 return nir_i2f32(b
, &txs
->dest
.ssa
);
135 lower_offset(nir_builder
*b
, nir_tex_instr
*tex
)
137 int offset_index
= nir_tex_instr_src_index(tex
, nir_tex_src_offset
);
138 if (offset_index
< 0)
141 int coord_index
= nir_tex_instr_src_index(tex
, nir_tex_src_coord
);
142 assert(coord_index
>= 0);
144 assert(tex
->src
[offset_index
].src
.is_ssa
);
145 assert(tex
->src
[coord_index
].src
.is_ssa
);
146 nir_ssa_def
*offset
= tex
->src
[offset_index
].src
.ssa
;
147 nir_ssa_def
*coord
= tex
->src
[coord_index
].src
.ssa
;
149 b
->cursor
= nir_before_instr(&tex
->instr
);
151 nir_ssa_def
*offset_coord
;
152 if (nir_tex_instr_src_type(tex
, coord_index
) == nir_type_float
) {
153 if (tex
->sampler_dim
== GLSL_SAMPLER_DIM_RECT
) {
154 offset_coord
= nir_fadd(b
, coord
, nir_i2f32(b
, offset
));
156 nir_ssa_def
*txs
= get_texture_size(b
, tex
);
157 nir_ssa_def
*scale
= nir_frcp(b
, txs
);
159 offset_coord
= nir_fadd(b
, coord
,
161 nir_i2f32(b
, offset
),
165 offset_coord
= nir_iadd(b
, coord
, offset
);
169 /* The offset is not applied to the array index */
170 if (tex
->coord_components
== 2) {
171 offset_coord
= nir_vec2(b
, nir_channel(b
, offset_coord
, 0),
172 nir_channel(b
, coord
, 1));
173 } else if (tex
->coord_components
== 3) {
174 offset_coord
= nir_vec3(b
, nir_channel(b
, offset_coord
, 0),
175 nir_channel(b
, offset_coord
, 1),
176 nir_channel(b
, coord
, 2));
178 unreachable("Invalid number of components");
182 nir_instr_rewrite_src(&tex
->instr
, &tex
->src
[coord_index
].src
,
183 nir_src_for_ssa(offset_coord
));
185 nir_tex_instr_remove_src(tex
, offset_index
);
191 lower_rect(nir_builder
*b
, nir_tex_instr
*tex
)
193 nir_ssa_def
*txs
= get_texture_size(b
, tex
);
194 nir_ssa_def
*scale
= nir_frcp(b
, txs
);
196 /* Walk through the sources normalizing the requested arguments. */
197 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
198 if (tex
->src
[i
].src_type
!= nir_tex_src_coord
)
201 nir_ssa_def
*coords
=
202 nir_ssa_for_src(b
, tex
->src
[i
].src
, tex
->coord_components
);
203 nir_instr_rewrite_src(&tex
->instr
,
205 nir_src_for_ssa(nir_fmul(b
, coords
, scale
)));
208 tex
->sampler_dim
= GLSL_SAMPLER_DIM_2D
;
212 sample_plane(nir_builder
*b
, nir_tex_instr
*tex
, int plane
)
214 assert(tex
->dest
.is_ssa
);
215 assert(nir_tex_instr_dest_size(tex
) == 4);
216 assert(nir_alu_type_get_base_type(tex
->dest_type
) == nir_type_float
);
217 assert(tex
->op
== nir_texop_tex
);
218 assert(tex
->coord_components
== 2);
220 nir_tex_instr
*plane_tex
= nir_tex_instr_create(b
->shader
, 2);
221 nir_src_copy(&plane_tex
->src
[0].src
, &tex
->src
[0].src
, plane_tex
);
222 plane_tex
->src
[0].src_type
= nir_tex_src_coord
;
223 plane_tex
->src
[1].src
= nir_src_for_ssa(nir_imm_int(b
, plane
));
224 plane_tex
->src
[1].src_type
= nir_tex_src_plane
;
225 plane_tex
->op
= nir_texop_tex
;
226 plane_tex
->sampler_dim
= GLSL_SAMPLER_DIM_2D
;
227 plane_tex
->dest_type
= nir_type_float
;
228 plane_tex
->coord_components
= 2;
230 plane_tex
->texture_index
= tex
->texture_index
;
231 plane_tex
->texture
= nir_deref_var_clone(tex
->texture
, plane_tex
);
232 plane_tex
->sampler_index
= tex
->sampler_index
;
233 plane_tex
->sampler
= nir_deref_var_clone(tex
->sampler
, plane_tex
);
235 nir_ssa_dest_init(&plane_tex
->instr
, &plane_tex
->dest
, 4, 32, NULL
);
237 nir_builder_instr_insert(b
, &plane_tex
->instr
);
239 return &plane_tex
->dest
.ssa
;
243 convert_yuv_to_rgb(nir_builder
*b
, nir_tex_instr
*tex
,
244 nir_ssa_def
*y
, nir_ssa_def
*u
, nir_ssa_def
*v
)
246 nir_const_value m
[3] = {
247 { .f32
= { 1.0f
, 0.0f
, 1.59602678f
, 0.0f
} },
248 { .f32
= { 1.0f
, -0.39176229f
, -0.81296764f
, 0.0f
} },
249 { .f32
= { 1.0f
, 2.01723214f
, 0.0f
, 0.0f
} }
254 nir_fmul(b
, nir_imm_float(b
, 1.16438356f
),
255 nir_fadd(b
, y
, nir_imm_float(b
, -16.0f
/ 255.0f
))),
256 nir_channel(b
, nir_fadd(b
, u
, nir_imm_float(b
, -128.0f
/ 255.0f
)), 0),
257 nir_channel(b
, nir_fadd(b
, v
, nir_imm_float(b
, -128.0f
/ 255.0f
)), 0),
258 nir_imm_float(b
, 0.0));
260 nir_ssa_def
*red
= nir_fdot4(b
, yuv
, nir_build_imm(b
, 4, 32, m
[0]));
261 nir_ssa_def
*green
= nir_fdot4(b
, yuv
, nir_build_imm(b
, 4, 32, m
[1]));
262 nir_ssa_def
*blue
= nir_fdot4(b
, yuv
, nir_build_imm(b
, 4, 32, m
[2]));
264 nir_ssa_def
*result
= nir_vec4(b
, red
, green
, blue
, nir_imm_float(b
, 1.0f
));
266 nir_ssa_def_rewrite_uses(&tex
->dest
.ssa
, nir_src_for_ssa(result
));
270 lower_y_uv_external(nir_builder
*b
, nir_tex_instr
*tex
)
272 b
->cursor
= nir_after_instr(&tex
->instr
);
274 nir_ssa_def
*y
= sample_plane(b
, tex
, 0);
275 nir_ssa_def
*uv
= sample_plane(b
, tex
, 1);
277 convert_yuv_to_rgb(b
, tex
,
278 nir_channel(b
, y
, 0),
279 nir_channel(b
, uv
, 0),
280 nir_channel(b
, uv
, 1));
284 lower_y_u_v_external(nir_builder
*b
, nir_tex_instr
*tex
)
286 b
->cursor
= nir_after_instr(&tex
->instr
);
288 nir_ssa_def
*y
= sample_plane(b
, tex
, 0);
289 nir_ssa_def
*u
= sample_plane(b
, tex
, 1);
290 nir_ssa_def
*v
= sample_plane(b
, tex
, 2);
292 convert_yuv_to_rgb(b
, tex
,
293 nir_channel(b
, y
, 0),
294 nir_channel(b
, u
, 0),
295 nir_channel(b
, v
, 0));
299 lower_yx_xuxv_external(nir_builder
*b
, nir_tex_instr
*tex
)
301 b
->cursor
= nir_after_instr(&tex
->instr
);
303 nir_ssa_def
*y
= sample_plane(b
, tex
, 0);
304 nir_ssa_def
*xuxv
= sample_plane(b
, tex
, 1);
306 convert_yuv_to_rgb(b
, tex
,
307 nir_channel(b
, y
, 0),
308 nir_channel(b
, xuxv
, 1),
309 nir_channel(b
, xuxv
, 3));
313 lower_xy_uxvx_external(nir_builder
*b
, nir_tex_instr
*tex
)
315 b
->cursor
= nir_after_instr(&tex
->instr
);
317 nir_ssa_def
*y
= sample_plane(b
, tex
, 0);
318 nir_ssa_def
*uxvx
= sample_plane(b
, tex
, 1);
320 convert_yuv_to_rgb(b
, tex
,
321 nir_channel(b
, y
, 1),
322 nir_channel(b
, uxvx
, 0),
323 nir_channel(b
, uxvx
, 2));
327 * Emits a textureLod operation used to replace an existing
328 * textureGrad instruction.
331 replace_gradient_with_lod(nir_builder
*b
, nir_ssa_def
*lod
, nir_tex_instr
*tex
)
333 /* We are going to emit a textureLod() with the same parameters except that
334 * we replace ddx/ddy with lod.
336 int num_srcs
= tex
->num_srcs
- 1;
337 nir_tex_instr
*txl
= nir_tex_instr_create(b
->shader
, num_srcs
);
339 txl
->op
= nir_texop_txl
;
340 txl
->sampler_dim
= tex
->sampler_dim
;
341 txl
->texture_index
= tex
->texture_index
;
342 txl
->dest_type
= tex
->dest_type
;
343 txl
->is_array
= tex
->is_array
;
344 txl
->is_shadow
= tex
->is_shadow
;
345 txl
->is_new_style_shadow
= tex
->is_new_style_shadow
;
346 txl
->sampler_index
= tex
->sampler_index
;
347 txl
->texture
= nir_deref_var_clone(tex
->texture
, txl
);
348 txl
->sampler
= nir_deref_var_clone(tex
->sampler
, txl
);
349 txl
->coord_components
= tex
->coord_components
;
351 nir_ssa_dest_init(&txl
->instr
, &txl
->dest
, 4, 32, NULL
);
354 for (int i
= 0; i
< tex
->num_srcs
; i
++) {
355 if (tex
->src
[i
].src_type
== nir_tex_src_ddx
||
356 tex
->src
[i
].src_type
== nir_tex_src_ddy
)
358 nir_src_copy(&txl
->src
[src_num
].src
, &tex
->src
[i
].src
, txl
);
359 txl
->src
[src_num
].src_type
= tex
->src
[i
].src_type
;
363 txl
->src
[src_num
].src
= nir_src_for_ssa(lod
);
364 txl
->src
[src_num
].src_type
= nir_tex_src_lod
;
367 assert(src_num
== num_srcs
);
369 nir_ssa_dest_init(&txl
->instr
, &txl
->dest
,
370 tex
->dest
.ssa
.num_components
, 32, NULL
);
371 nir_builder_instr_insert(b
, &txl
->instr
);
373 nir_ssa_def_rewrite_uses(&tex
->dest
.ssa
, nir_src_for_ssa(&txl
->dest
.ssa
));
375 nir_instr_remove(&tex
->instr
);
379 lower_gradient_cube_map(nir_builder
*b
, nir_tex_instr
*tex
)
381 assert(tex
->sampler_dim
== GLSL_SAMPLER_DIM_CUBE
);
382 assert(tex
->op
== nir_texop_txd
);
383 assert(tex
->dest
.is_ssa
);
385 /* Use textureSize() to get the width and height of LOD 0 */
386 nir_ssa_def
*size
= get_texture_size(b
, tex
);
388 /* Cubemap texture lookups first generate a texture coordinate normalized
389 * to [-1, 1] on the appropiate face. The appropiate face is determined
390 * by which component has largest magnitude and its sign. The texture
391 * coordinate is the quotient of the remaining texture coordinates against
392 * that absolute value of the component of largest magnitude. This
393 * division requires that the computing of the derivative of the texel
394 * coordinate must use the quotient rule. The high level GLSL code is as
399 * vec3 abs_p, Q, dQdx, dQdy;
400 * abs_p = abs(ir->coordinate);
401 * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
402 * Q = ir->coordinate.yzx;
403 * dQdx = ir->lod_info.grad.dPdx.yzx;
404 * dQdy = ir->lod_info.grad.dPdy.yzx;
406 * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
407 * Q = ir->coordinate.xzy;
408 * dQdx = ir->lod_info.grad.dPdx.xzy;
409 * dQdy = ir->lod_info.grad.dPdy.xzy;
411 * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
412 * Q = ir->coordinate;
413 * dQdx = ir->lod_info.grad.dPdx;
414 * dQdy = ir->lod_info.grad.dPdy;
417 * Step 2: use quotient rule to compute derivative. The normalized to
418 * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
419 * only concerned with the magnitudes of the derivatives whose values are
420 * not affected by the sign. We drop the sign from the computation.
426 * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
427 * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
429 * Step 3: compute LOD. At this point we have the derivatives of the
430 * texture coordinates normalized to [-1,1]. We take the LOD to be
431 * result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
432 * = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
433 * = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
434 * = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
435 * = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
436 * where L is the dimension of the cubemap. The code is:
439 * M = max(dot(dx, dx), dot(dy, dy));
440 * L = textureSize(sampler, 0).x;
441 * result = -1.0 + 0.5 * log2(L * L * M);
446 tex
->src
[nir_tex_instr_src_index(tex
, nir_tex_src_coord
)].src
.ssa
;
448 /* unmodified dPdx, dPdy values */
450 tex
->src
[nir_tex_instr_src_index(tex
, nir_tex_src_ddx
)].src
.ssa
;
452 tex
->src
[nir_tex_instr_src_index(tex
, nir_tex_src_ddy
)].src
.ssa
;
454 nir_ssa_def
*abs_p
= nir_fabs(b
, p
);
455 nir_ssa_def
*abs_p_x
= nir_channel(b
, abs_p
, 0);
456 nir_ssa_def
*abs_p_y
= nir_channel(b
, abs_p
, 1);
457 nir_ssa_def
*abs_p_z
= nir_channel(b
, abs_p
, 2);
459 /* 1. compute selector */
460 nir_ssa_def
*Q
, *dQdx
, *dQdy
;
462 nir_ssa_def
*cond_z
= nir_fge(b
, abs_p_z
, nir_fmax(b
, abs_p_x
, abs_p_y
));
463 nir_ssa_def
*cond_y
= nir_fge(b
, abs_p_y
, nir_fmax(b
, abs_p_x
, abs_p_z
));
465 unsigned yzx
[4] = { 1, 2, 0, 0 };
466 unsigned xzy
[4] = { 0, 2, 1, 0 };
468 Q
= nir_bcsel(b
, cond_z
,
471 nir_swizzle(b
, p
, xzy
, 3, false),
472 nir_swizzle(b
, p
, yzx
, 3, false)));
474 dQdx
= nir_bcsel(b
, cond_z
,
477 nir_swizzle(b
, dPdx
, xzy
, 3, false),
478 nir_swizzle(b
, dPdx
, yzx
, 3, false)));
480 dQdy
= nir_bcsel(b
, cond_z
,
483 nir_swizzle(b
, dPdy
, xzy
, 3, false),
484 nir_swizzle(b
, dPdy
, yzx
, 3, false)));
486 /* 2. quotient rule */
488 /* tmp = Q.xy * recip;
489 * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
490 * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
492 nir_ssa_def
*rcp_Q_z
= nir_frcp(b
, nir_channel(b
, Q
, 2));
494 unsigned xy
[4] = { 0, 1, 0, 0 };
495 nir_ssa_def
*Q_xy
= nir_swizzle(b
, Q
, xy
, 2, false);
496 nir_ssa_def
*tmp
= nir_fmul(b
, Q_xy
, rcp_Q_z
);
498 nir_ssa_def
*dQdx_xy
= nir_swizzle(b
, dQdx
, xy
, 2, false);
499 nir_ssa_def
*dQdx_z
= nir_channel(b
, dQdx
, 2);
501 nir_fmul(b
, rcp_Q_z
, nir_fsub(b
, dQdx_xy
, nir_fmul(b
, tmp
, dQdx_z
)));
503 nir_ssa_def
*dQdy_xy
= nir_swizzle(b
, dQdy
, xy
, 2, false);
504 nir_ssa_def
*dQdy_z
= nir_channel(b
, dQdy
, 2);
506 nir_fmul(b
, rcp_Q_z
, nir_fsub(b
, dQdy_xy
, nir_fmul(b
, tmp
, dQdy_z
)));
508 /* M = max(dot(dx, dx), dot(dy, dy)); */
509 nir_ssa_def
*M
= nir_fmax(b
, nir_fdot(b
, dx
, dx
), nir_fdot(b
, dy
, dy
));
511 /* size has textureSize() of LOD 0 */
512 nir_ssa_def
*L
= nir_channel(b
, size
, 0);
514 /* lod = -1.0 + 0.5 * log2(L * L * M); */
517 nir_imm_float(b
, -1.0f
),
519 nir_imm_float(b
, 0.5f
),
520 nir_flog2(b
, nir_fmul(b
, L
, nir_fmul(b
, L
, M
)))));
522 /* 3. Replace the gradient instruction with an equivalent lod instruction */
523 replace_gradient_with_lod(b
, lod
, tex
);
527 lower_gradient(nir_builder
*b
, nir_tex_instr
*tex
)
529 assert(tex
->sampler_dim
!= GLSL_SAMPLER_DIM_CUBE
);
530 assert(tex
->op
== nir_texop_txd
);
531 assert(tex
->dest
.is_ssa
);
533 /* Use textureSize() to get the width and height of LOD 0 */
534 unsigned component_mask
;
535 switch (tex
->sampler_dim
) {
536 case GLSL_SAMPLER_DIM_3D
:
539 case GLSL_SAMPLER_DIM_1D
:
548 nir_channels(b
, get_texture_size(b
, tex
), component_mask
);
550 /* Scale the gradients by width and height. Effectively, the incoming
551 * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
552 * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
555 tex
->src
[nir_tex_instr_src_index(tex
, nir_tex_src_ddx
)].src
.ssa
;
557 tex
->src
[nir_tex_instr_src_index(tex
, nir_tex_src_ddy
)].src
.ssa
;
559 nir_ssa_def
*dPdx
= nir_fmul(b
, ddx
, size
);
560 nir_ssa_def
*dPdy
= nir_fmul(b
, ddy
, size
);
563 if (dPdx
->num_components
== 1) {
564 rho
= nir_fmax(b
, nir_fabs(b
, dPdx
), nir_fabs(b
, dPdy
));
567 nir_fsqrt(b
, nir_fdot(b
, dPdx
, dPdx
)),
568 nir_fsqrt(b
, nir_fdot(b
, dPdy
, dPdy
)));
571 /* lod = log2(rho). We're ignoring GL state biases for now. */
572 nir_ssa_def
*lod
= nir_flog2(b
, rho
);
574 /* Replace the gradient instruction with an equivalent lod instruction */
575 replace_gradient_with_lod(b
, lod
, tex
);
579 saturate_src(nir_builder
*b
, nir_tex_instr
*tex
, unsigned sat_mask
)
581 b
->cursor
= nir_before_instr(&tex
->instr
);
583 /* Walk through the sources saturating the requested arguments. */
584 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
585 if (tex
->src
[i
].src_type
!= nir_tex_src_coord
)
589 nir_ssa_for_src(b
, tex
->src
[i
].src
, tex
->coord_components
);
591 /* split src into components: */
592 nir_ssa_def
*comp
[4];
594 assume(tex
->coord_components
>= 1);
596 for (unsigned j
= 0; j
< tex
->coord_components
; j
++)
597 comp
[j
] = nir_channel(b
, src
, j
);
599 /* clamp requested components, array index does not get clamped: */
600 unsigned ncomp
= tex
->coord_components
;
604 for (unsigned j
= 0; j
< ncomp
; j
++) {
605 if ((1 << j
) & sat_mask
) {
606 if (tex
->sampler_dim
== GLSL_SAMPLER_DIM_RECT
) {
607 /* non-normalized texture coords, so clamp to texture
608 * size rather than [0.0, 1.0]
610 nir_ssa_def
*txs
= get_texture_size(b
, tex
);
611 comp
[j
] = nir_fmax(b
, comp
[j
], nir_imm_float(b
, 0.0));
612 comp
[j
] = nir_fmin(b
, comp
[j
], nir_channel(b
, txs
, j
));
614 comp
[j
] = nir_fsat(b
, comp
[j
]);
619 /* and move the result back into a single vecN: */
620 src
= nir_vec(b
, comp
, tex
->coord_components
);
622 nir_instr_rewrite_src(&tex
->instr
,
624 nir_src_for_ssa(src
));
629 get_zero_or_one(nir_builder
*b
, nir_alu_type type
, uint8_t swizzle_val
)
633 memset(&v
, 0, sizeof(v
));
635 if (swizzle_val
== 4) {
636 v
.u32
[0] = v
.u32
[1] = v
.u32
[2] = v
.u32
[3] = 0;
638 assert(swizzle_val
== 5);
639 if (type
== nir_type_float
)
640 v
.f32
[0] = v
.f32
[1] = v
.f32
[2] = v
.f32
[3] = 1.0;
642 v
.u32
[0] = v
.u32
[1] = v
.u32
[2] = v
.u32
[3] = 1;
645 return nir_build_imm(b
, 4, 32, v
);
649 swizzle_result(nir_builder
*b
, nir_tex_instr
*tex
, const uint8_t swizzle
[4])
651 assert(tex
->dest
.is_ssa
);
653 b
->cursor
= nir_after_instr(&tex
->instr
);
655 nir_ssa_def
*swizzled
;
656 if (tex
->op
== nir_texop_tg4
) {
657 if (swizzle
[tex
->component
] < 4) {
658 /* This one's easy */
659 tex
->component
= swizzle
[tex
->component
];
662 swizzled
= get_zero_or_one(b
, tex
->dest_type
, swizzle
[tex
->component
]);
665 assert(nir_tex_instr_dest_size(tex
) == 4);
666 if (swizzle
[0] < 4 && swizzle
[1] < 4 &&
667 swizzle
[2] < 4 && swizzle
[3] < 4) {
668 unsigned swiz
[4] = { swizzle
[0], swizzle
[1], swizzle
[2], swizzle
[3] };
669 /* We have no 0s or 1s, just emit a swizzling MOV */
670 swizzled
= nir_swizzle(b
, &tex
->dest
.ssa
, swiz
, 4, false);
672 nir_ssa_def
*srcs
[4];
673 for (unsigned i
= 0; i
< 4; i
++) {
674 if (swizzle
[i
] < 4) {
675 srcs
[i
] = nir_channel(b
, &tex
->dest
.ssa
, swizzle
[i
]);
677 srcs
[i
] = get_zero_or_one(b
, tex
->dest_type
, swizzle
[i
]);
680 swizzled
= nir_vec(b
, srcs
, 4);
684 nir_ssa_def_rewrite_uses_after(&tex
->dest
.ssa
, nir_src_for_ssa(swizzled
),
685 swizzled
->parent_instr
);
689 linearize_srgb_result(nir_builder
*b
, nir_tex_instr
*tex
)
691 assert(tex
->dest
.is_ssa
);
692 assert(nir_tex_instr_dest_size(tex
) == 4);
693 assert(nir_alu_type_get_base_type(tex
->dest_type
) == nir_type_float
);
695 b
->cursor
= nir_after_instr(&tex
->instr
);
697 static const unsigned swiz
[4] = {0, 1, 2, 0};
698 nir_ssa_def
*comp
= nir_swizzle(b
, &tex
->dest
.ssa
, swiz
, 3, true);
701 * (comp <= 0.04045) ?
703 * pow((comp + 0.055) / 1.055, 2.4)
705 nir_ssa_def
*low
= nir_fmul(b
, comp
, nir_imm_float(b
, 1.0 / 12.92));
706 nir_ssa_def
*high
= nir_fpow(b
,
710 nir_imm_float(b
, 0.055)),
711 nir_imm_float(b
, 1.0 / 1.055)),
712 nir_imm_float(b
, 2.4));
713 nir_ssa_def
*cond
= nir_fge(b
, nir_imm_float(b
, 0.04045), comp
);
714 nir_ssa_def
*rgb
= nir_bcsel(b
, cond
, low
, high
);
716 /* alpha is untouched: */
717 nir_ssa_def
*result
= nir_vec4(b
,
718 nir_channel(b
, rgb
, 0),
719 nir_channel(b
, rgb
, 1),
720 nir_channel(b
, rgb
, 2),
721 nir_channel(b
, &tex
->dest
.ssa
, 3));
723 nir_ssa_def_rewrite_uses_after(&tex
->dest
.ssa
, nir_src_for_ssa(result
),
724 result
->parent_instr
);
728 nir_lower_tex_block(nir_block
*block
, nir_builder
*b
,
729 const nir_lower_tex_options
*options
)
731 bool progress
= false;
733 nir_foreach_instr_safe(instr
, block
) {
734 if (instr
->type
!= nir_instr_type_tex
)
737 nir_tex_instr
*tex
= nir_instr_as_tex(instr
);
738 bool lower_txp
= !!(options
->lower_txp
& (1 << tex
->sampler_dim
));
740 /* mask of src coords to saturate (clamp): */
741 unsigned sat_mask
= 0;
743 if ((1 << tex
->sampler_index
) & options
->saturate_r
)
744 sat_mask
|= (1 << 2); /* .z */
745 if ((1 << tex
->sampler_index
) & options
->saturate_t
)
746 sat_mask
|= (1 << 1); /* .y */
747 if ((1 << tex
->sampler_index
) & options
->saturate_s
)
748 sat_mask
|= (1 << 0); /* .x */
750 /* If we are clamping any coords, we must lower projector first
751 * as clamping happens *after* projection:
753 if (lower_txp
|| sat_mask
) {
758 if ((tex
->op
== nir_texop_txf
&& options
->lower_txf_offset
) ||
759 (sat_mask
&& nir_tex_instr_src_index(tex
, nir_tex_src_coord
) >= 0) ||
760 (tex
->sampler_dim
== GLSL_SAMPLER_DIM_RECT
&&
761 options
->lower_rect_offset
)) {
762 progress
= lower_offset(b
, tex
) || progress
;
765 if ((tex
->sampler_dim
== GLSL_SAMPLER_DIM_RECT
) && options
->lower_rect
) {
770 if ((1 << tex
->texture_index
) & options
->lower_y_uv_external
) {
771 lower_y_uv_external(b
, tex
);
775 if ((1 << tex
->texture_index
) & options
->lower_y_u_v_external
) {
776 lower_y_u_v_external(b
, tex
);
780 if ((1 << tex
->texture_index
) & options
->lower_yx_xuxv_external
) {
781 lower_yx_xuxv_external(b
, tex
);
785 if ((1 << tex
->texture_index
) & options
->lower_xy_uxvx_external
) {
786 lower_xy_uxvx_external(b
, tex
);
791 saturate_src(b
, tex
, sat_mask
);
795 if (((1 << tex
->texture_index
) & options
->swizzle_result
) &&
796 !nir_tex_instr_is_query(tex
) &&
797 !(tex
->is_shadow
&& tex
->is_new_style_shadow
)) {
798 swizzle_result(b
, tex
, options
->swizzles
[tex
->texture_index
]);
802 /* should be after swizzle so we know which channels are rgb: */
803 if (((1 << tex
->texture_index
) & options
->lower_srgb
) &&
804 !nir_tex_instr_is_query(tex
) && !tex
->is_shadow
) {
805 linearize_srgb_result(b
, tex
);
809 if (tex
->op
== nir_texop_txd
&&
810 tex
->sampler_dim
== GLSL_SAMPLER_DIM_CUBE
&&
811 (options
->lower_txd
||
812 options
->lower_txd_cube_map
||
813 (tex
->is_shadow
&& options
->lower_txd_shadow
))) {
814 lower_gradient_cube_map(b
, tex
);
819 if (tex
->op
== nir_texop_txd
&&
820 (options
->lower_txd
||
821 (options
->lower_txd_shadow
&&
822 tex
->is_shadow
&& tex
->sampler_dim
!= GLSL_SAMPLER_DIM_CUBE
))) {
823 lower_gradient(b
, tex
);
828 /* TXF, TXS and TXL require a LOD but not everything we implement using those
829 * three opcodes provides one. Provide a default LOD of 0.
831 if ((nir_tex_instr_src_index(tex
, nir_tex_src_lod
) == -1) &&
832 (tex
->op
== nir_texop_txf
|| tex
->op
== nir_texop_txs
||
833 tex
->op
== nir_texop_txl
|| tex
->op
== nir_texop_query_levels
||
834 (tex
->op
== nir_texop_tex
&&
835 b
->shader
->info
.stage
!= MESA_SHADER_FRAGMENT
))) {
836 b
->cursor
= nir_before_instr(&tex
->instr
);
837 nir_tex_instr_add_src(tex
, nir_tex_src_lod
, nir_src_for_ssa(nir_imm_int(b
, 0)));
847 nir_lower_tex_impl(nir_function_impl
*impl
,
848 const nir_lower_tex_options
*options
)
850 bool progress
= false;
852 nir_builder_init(&builder
, impl
);
854 nir_foreach_block(block
, impl
) {
855 progress
|= nir_lower_tex_block(block
, &builder
, options
);
858 nir_metadata_preserve(impl
, nir_metadata_block_index
|
859 nir_metadata_dominance
);
864 nir_lower_tex(nir_shader
*shader
, const nir_lower_tex_options
*options
)
866 bool progress
= false;
868 nir_foreach_function(function
, shader
) {
870 progress
|= nir_lower_tex_impl(function
->impl
, options
);