2 * Copyright © 2015 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * This lowering pass supports (as configured via nir_lower_tex_options)
26 * various texture related conversions:
27 * + texture projector lowering: converts the coordinate division for
28 * texture projection to be done in ALU instructions instead of
29 * asking the texture operation to do so.
30 * + lowering RECT: converts the un-normalized RECT texture coordinates
31 * to normalized coordinates with txs plus ALU instructions
32 * + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
33 * inserts instructions to clamp specified coordinates to [0.0, 1.0].
34 * Note that this automatically triggers texture projector lowering if
35 * needed, since clamping must happen after projector lowering.
39 #include "nir_builder.h"
42 tex_instr_remove_src(nir_tex_instr
*tex
, unsigned src_idx
)
44 assert(src_idx
< tex
->num_srcs
);
46 /* First rewrite the source to NIR_SRC_INIT */
47 nir_instr_rewrite_src(&tex
->instr
, &tex
->src
[src_idx
].src
, NIR_SRC_INIT
);
49 /* Now, move all of the other sources down */
50 for (unsigned i
= src_idx
+ 1; i
< tex
->num_srcs
; i
++) {
51 tex
->src
[i
-1].src_type
= tex
->src
[i
].src_type
;
52 nir_instr_move_src(&tex
->instr
, &tex
->src
[i
-1].src
, &tex
->src
[i
].src
);
58 project_src(nir_builder
*b
, nir_tex_instr
*tex
)
60 /* Find the projector in the srcs list, if present. */
61 int proj_index
= nir_tex_instr_src_index(tex
, nir_tex_src_projector
);
65 b
->cursor
= nir_before_instr(&tex
->instr
);
67 nir_ssa_def
*inv_proj
=
68 nir_frcp(b
, nir_ssa_for_src(b
, tex
->src
[proj_index
].src
, 1));
70 /* Walk through the sources projecting the arguments. */
71 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
72 switch (tex
->src
[i
].src_type
) {
73 case nir_tex_src_coord
:
74 case nir_tex_src_comparitor
:
79 nir_ssa_def
*unprojected
=
80 nir_ssa_for_src(b
, tex
->src
[i
].src
, nir_tex_instr_src_size(tex
, i
));
81 nir_ssa_def
*projected
= nir_fmul(b
, unprojected
, inv_proj
);
83 /* Array indices don't get projected, so make an new vector with the
84 * coordinate's array index untouched.
86 if (tex
->is_array
&& tex
->src
[i
].src_type
== nir_tex_src_coord
) {
87 switch (tex
->coord_components
) {
89 projected
= nir_vec4(b
,
90 nir_channel(b
, projected
, 0),
91 nir_channel(b
, projected
, 1),
92 nir_channel(b
, projected
, 2),
93 nir_channel(b
, unprojected
, 3));
96 projected
= nir_vec3(b
,
97 nir_channel(b
, projected
, 0),
98 nir_channel(b
, projected
, 1),
99 nir_channel(b
, unprojected
, 2));
102 projected
= nir_vec2(b
,
103 nir_channel(b
, projected
, 0),
104 nir_channel(b
, unprojected
, 1));
107 unreachable("bad texture coord count for array");
112 nir_instr_rewrite_src(&tex
->instr
,
114 nir_src_for_ssa(projected
));
117 tex_instr_remove_src(tex
, proj_index
);
121 lower_offset(nir_builder
*b
, nir_tex_instr
*tex
)
123 int offset_index
= nir_tex_instr_src_index(tex
, nir_tex_src_offset
);
124 if (offset_index
< 0)
127 int coord_index
= nir_tex_instr_src_index(tex
, nir_tex_src_coord
);
128 assert(coord_index
>= 0);
130 assert(tex
->src
[offset_index
].src
.is_ssa
);
131 assert(tex
->src
[coord_index
].src
.is_ssa
);
132 nir_ssa_def
*offset
= tex
->src
[offset_index
].src
.ssa
;
133 nir_ssa_def
*coord
= tex
->src
[coord_index
].src
.ssa
;
135 b
->cursor
= nir_before_instr(&tex
->instr
);
137 nir_ssa_def
*offset_coord
;
138 if (nir_tex_instr_src_type(tex
, coord_index
) == nir_type_float
) {
139 assert(tex
->sampler_dim
== GLSL_SAMPLER_DIM_RECT
);
140 offset_coord
= nir_fadd(b
, coord
, nir_i2f(b
, offset
));
142 offset_coord
= nir_iadd(b
, coord
, offset
);
146 /* The offset is not applied to the array index */
147 if (tex
->coord_components
== 2) {
148 offset_coord
= nir_vec2(b
, nir_channel(b
, offset_coord
, 0),
149 nir_channel(b
, coord
, 1));
150 } else if (tex
->coord_components
== 3) {
151 offset_coord
= nir_vec3(b
, nir_channel(b
, offset_coord
, 0),
152 nir_channel(b
, offset_coord
, 1),
153 nir_channel(b
, coord
, 2));
155 unreachable("Invalid number of components");
159 nir_instr_rewrite_src(&tex
->instr
, &tex
->src
[coord_index
].src
,
160 nir_src_for_ssa(offset_coord
));
162 tex_instr_remove_src(tex
, offset_index
);
169 get_texture_size(nir_builder
*b
, nir_tex_instr
*tex
)
171 b
->cursor
= nir_before_instr(&tex
->instr
);
173 /* RECT textures should not be array: */
174 assert(!tex
->is_array
);
178 txs
= nir_tex_instr_create(b
->shader
, 1);
179 txs
->op
= nir_texop_txs
;
180 txs
->sampler_dim
= GLSL_SAMPLER_DIM_RECT
;
181 txs
->texture_index
= tex
->texture_index
;
182 txs
->dest_type
= nir_type_int
;
184 /* only single src, the lod: */
185 txs
->src
[0].src
= nir_src_for_ssa(nir_imm_int(b
, 0));
186 txs
->src
[0].src_type
= nir_tex_src_lod
;
188 nir_ssa_dest_init(&txs
->instr
, &txs
->dest
, 2, 32, NULL
);
189 nir_builder_instr_insert(b
, &txs
->instr
);
191 return nir_i2f(b
, &txs
->dest
.ssa
);
195 lower_rect(nir_builder
*b
, nir_tex_instr
*tex
)
197 nir_ssa_def
*txs
= get_texture_size(b
, tex
);
198 nir_ssa_def
*scale
= nir_frcp(b
, txs
);
200 /* Walk through the sources normalizing the requested arguments. */
201 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
202 if (tex
->src
[i
].src_type
!= nir_tex_src_coord
)
205 nir_ssa_def
*coords
=
206 nir_ssa_for_src(b
, tex
->src
[i
].src
, tex
->coord_components
);
207 nir_instr_rewrite_src(&tex
->instr
,
209 nir_src_for_ssa(nir_fmul(b
, coords
, scale
)));
212 tex
->sampler_dim
= GLSL_SAMPLER_DIM_2D
;
216 sample_plane(nir_builder
*b
, nir_tex_instr
*tex
, int plane
)
218 assert(tex
->dest
.is_ssa
);
219 assert(nir_tex_instr_dest_size(tex
) == 4);
220 assert(nir_alu_type_get_base_type(tex
->dest_type
) == nir_type_float
);
221 assert(tex
->op
== nir_texop_tex
);
222 assert(tex
->coord_components
== 2);
224 nir_tex_instr
*plane_tex
= nir_tex_instr_create(b
->shader
, 2);
225 nir_src_copy(&plane_tex
->src
[0].src
, &tex
->src
[0].src
, plane_tex
);
226 plane_tex
->src
[0].src_type
= nir_tex_src_coord
;
227 plane_tex
->src
[1].src
= nir_src_for_ssa(nir_imm_int(b
, plane
));
228 plane_tex
->src
[1].src_type
= nir_tex_src_plane
;
229 plane_tex
->op
= nir_texop_tex
;
230 plane_tex
->sampler_dim
= 2;
231 plane_tex
->dest_type
= nir_type_float
;
232 plane_tex
->coord_components
= 2;
234 plane_tex
->texture_index
= tex
->texture_index
;
235 plane_tex
->texture
= (nir_deref_var
*)
236 nir_copy_deref(plane_tex
, &tex
->texture
->deref
);
237 plane_tex
->sampler_index
= tex
->sampler_index
;
238 plane_tex
->sampler
= (nir_deref_var
*)
239 nir_copy_deref(plane_tex
, &tex
->sampler
->deref
);
241 nir_ssa_dest_init(&plane_tex
->instr
, &plane_tex
->dest
, 4, 32, NULL
);
243 nir_builder_instr_insert(b
, &plane_tex
->instr
);
245 return &plane_tex
->dest
.ssa
;
249 convert_yuv_to_rgb(nir_builder
*b
, nir_tex_instr
*tex
,
250 nir_ssa_def
*y
, nir_ssa_def
*u
, nir_ssa_def
*v
)
252 nir_const_value m
[3] = {
253 { .f32
= { 1.0f
, 0.0f
, 1.59602678f
, 0.0f
} },
254 { .f32
= { 1.0f
, -0.39176229f
, -0.81296764f
, 0.0f
} },
255 { .f32
= { 1.0f
, 2.01723214f
, 0.0f
, 0.0f
} }
260 nir_fmul(b
, nir_imm_float(b
, 1.16438356f
),
261 nir_fadd(b
, y
, nir_imm_float(b
, -0.0625f
))),
262 nir_channel(b
, nir_fadd(b
, u
, nir_imm_float(b
, -0.5f
)), 0),
263 nir_channel(b
, nir_fadd(b
, v
, nir_imm_float(b
, -0.5f
)), 0),
264 nir_imm_float(b
, 0.0));
266 nir_ssa_def
*red
= nir_fdot4(b
, yuv
, nir_build_imm(b
, 4, 32, m
[0]));
267 nir_ssa_def
*green
= nir_fdot4(b
, yuv
, nir_build_imm(b
, 4, 32, m
[1]));
268 nir_ssa_def
*blue
= nir_fdot4(b
, yuv
, nir_build_imm(b
, 4, 32, m
[2]));
270 nir_ssa_def
*result
= nir_vec4(b
, red
, green
, blue
, nir_imm_float(b
, 1.0f
));
272 nir_ssa_def_rewrite_uses(&tex
->dest
.ssa
, nir_src_for_ssa(result
));
276 lower_y_uv_external(nir_builder
*b
, nir_tex_instr
*tex
)
278 b
->cursor
= nir_after_instr(&tex
->instr
);
280 nir_ssa_def
*y
= sample_plane(b
, tex
, 0);
281 nir_ssa_def
*uv
= sample_plane(b
, tex
, 1);
283 convert_yuv_to_rgb(b
, tex
,
284 nir_channel(b
, y
, 0),
285 nir_channel(b
, uv
, 0),
286 nir_channel(b
, uv
, 1));
290 lower_y_u_v_external(nir_builder
*b
, nir_tex_instr
*tex
)
292 b
->cursor
= nir_after_instr(&tex
->instr
);
294 nir_ssa_def
*y
= sample_plane(b
, tex
, 0);
295 nir_ssa_def
*u
= sample_plane(b
, tex
, 1);
296 nir_ssa_def
*v
= sample_plane(b
, tex
, 2);
298 convert_yuv_to_rgb(b
, tex
,
299 nir_channel(b
, y
, 0),
300 nir_channel(b
, u
, 0),
301 nir_channel(b
, v
, 0));
305 lower_yx_xuxv_external(nir_builder
*b
, nir_tex_instr
*tex
)
307 b
->cursor
= nir_after_instr(&tex
->instr
);
309 nir_ssa_def
*y
= sample_plane(b
, tex
, 0);
310 nir_ssa_def
*xuxv
= sample_plane(b
, tex
, 1);
312 convert_yuv_to_rgb(b
, tex
,
313 nir_channel(b
, y
, 0),
314 nir_channel(b
, xuxv
, 1),
315 nir_channel(b
, xuxv
, 3));
319 saturate_src(nir_builder
*b
, nir_tex_instr
*tex
, unsigned sat_mask
)
321 b
->cursor
= nir_before_instr(&tex
->instr
);
323 /* Walk through the sources saturating the requested arguments. */
324 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
325 if (tex
->src
[i
].src_type
!= nir_tex_src_coord
)
329 nir_ssa_for_src(b
, tex
->src
[i
].src
, tex
->coord_components
);
331 /* split src into components: */
332 nir_ssa_def
*comp
[4];
334 assume(tex
->coord_components
>= 1);
336 for (unsigned j
= 0; j
< tex
->coord_components
; j
++)
337 comp
[j
] = nir_channel(b
, src
, j
);
339 /* clamp requested components, array index does not get clamped: */
340 unsigned ncomp
= tex
->coord_components
;
344 for (unsigned j
= 0; j
< ncomp
; j
++) {
345 if ((1 << j
) & sat_mask
) {
346 if (tex
->sampler_dim
== GLSL_SAMPLER_DIM_RECT
) {
347 /* non-normalized texture coords, so clamp to texture
348 * size rather than [0.0, 1.0]
350 nir_ssa_def
*txs
= get_texture_size(b
, tex
);
351 comp
[j
] = nir_fmax(b
, comp
[j
], nir_imm_float(b
, 0.0));
352 comp
[j
] = nir_fmin(b
, comp
[j
], nir_channel(b
, txs
, j
));
354 comp
[j
] = nir_fsat(b
, comp
[j
]);
359 /* and move the result back into a single vecN: */
360 src
= nir_vec(b
, comp
, tex
->coord_components
);
362 nir_instr_rewrite_src(&tex
->instr
,
364 nir_src_for_ssa(src
));
369 get_zero_or_one(nir_builder
*b
, nir_alu_type type
, uint8_t swizzle_val
)
373 memset(&v
, 0, sizeof(v
));
375 if (swizzle_val
== 4) {
376 v
.u32
[0] = v
.u32
[1] = v
.u32
[2] = v
.u32
[3] = 0;
378 assert(swizzle_val
== 5);
379 if (type
== nir_type_float
)
380 v
.f32
[0] = v
.f32
[1] = v
.f32
[2] = v
.f32
[3] = 1.0;
382 v
.u32
[0] = v
.u32
[1] = v
.u32
[2] = v
.u32
[3] = 1;
385 return nir_build_imm(b
, 4, 32, v
);
389 swizzle_result(nir_builder
*b
, nir_tex_instr
*tex
, const uint8_t swizzle
[4])
391 assert(tex
->dest
.is_ssa
);
393 b
->cursor
= nir_after_instr(&tex
->instr
);
395 nir_ssa_def
*swizzled
;
396 if (tex
->op
== nir_texop_tg4
) {
397 if (swizzle
[tex
->component
] < 4) {
398 /* This one's easy */
399 tex
->component
= swizzle
[tex
->component
];
402 swizzled
= get_zero_or_one(b
, tex
->dest_type
, swizzle
[tex
->component
]);
405 assert(nir_tex_instr_dest_size(tex
) == 4);
406 if (swizzle
[0] < 4 && swizzle
[1] < 4 &&
407 swizzle
[2] < 4 && swizzle
[3] < 4) {
408 unsigned swiz
[4] = { swizzle
[0], swizzle
[1], swizzle
[2], swizzle
[3] };
409 /* We have no 0's or 1's, just emit a swizzling MOV */
410 swizzled
= nir_swizzle(b
, &tex
->dest
.ssa
, swiz
, 4, false);
412 nir_ssa_def
*srcs
[4];
413 for (unsigned i
= 0; i
< 4; i
++) {
414 if (swizzle
[i
] < 4) {
415 srcs
[i
] = nir_channel(b
, &tex
->dest
.ssa
, swizzle
[i
]);
417 srcs
[i
] = get_zero_or_one(b
, tex
->dest_type
, swizzle
[i
]);
420 swizzled
= nir_vec(b
, srcs
, 4);
424 nir_ssa_def_rewrite_uses_after(&tex
->dest
.ssa
, nir_src_for_ssa(swizzled
),
425 swizzled
->parent_instr
);
429 linearize_srgb_result(nir_builder
*b
, nir_tex_instr
*tex
)
431 assert(tex
->dest
.is_ssa
);
432 assert(nir_tex_instr_dest_size(tex
) == 4);
433 assert(nir_alu_type_get_base_type(tex
->dest_type
) == nir_type_float
);
435 b
->cursor
= nir_after_instr(&tex
->instr
);
437 static const unsigned swiz
[4] = {0, 1, 2, 0};
438 nir_ssa_def
*comp
= nir_swizzle(b
, &tex
->dest
.ssa
, swiz
, 3, true);
441 * (comp <= 0.04045) ?
443 * pow((comp + 0.055) / 1.055, 2.4)
445 nir_ssa_def
*low
= nir_fmul(b
, comp
, nir_imm_float(b
, 1.0 / 12.92));
446 nir_ssa_def
*high
= nir_fpow(b
,
450 nir_imm_float(b
, 0.055)),
451 nir_imm_float(b
, 1.0 / 1.055)),
452 nir_imm_float(b
, 2.4));
453 nir_ssa_def
*cond
= nir_fge(b
, nir_imm_float(b
, 0.04045), comp
);
454 nir_ssa_def
*rgb
= nir_bcsel(b
, cond
, low
, high
);
456 /* alpha is untouched: */
457 nir_ssa_def
*result
= nir_vec4(b
,
458 nir_channel(b
, rgb
, 0),
459 nir_channel(b
, rgb
, 1),
460 nir_channel(b
, rgb
, 2),
461 nir_channel(b
, &tex
->dest
.ssa
, 3));
463 nir_ssa_def_rewrite_uses_after(&tex
->dest
.ssa
, nir_src_for_ssa(result
),
464 result
->parent_instr
);
468 nir_lower_tex_block(nir_block
*block
, nir_builder
*b
,
469 const nir_lower_tex_options
*options
)
471 bool progress
= false;
473 nir_foreach_instr_safe(instr
, block
) {
474 if (instr
->type
!= nir_instr_type_tex
)
477 nir_tex_instr
*tex
= nir_instr_as_tex(instr
);
478 bool lower_txp
= !!(options
->lower_txp
& (1 << tex
->sampler_dim
));
480 /* mask of src coords to saturate (clamp): */
481 unsigned sat_mask
= 0;
483 if ((1 << tex
->sampler_index
) & options
->saturate_r
)
484 sat_mask
|= (1 << 2); /* .z */
485 if ((1 << tex
->sampler_index
) & options
->saturate_t
)
486 sat_mask
|= (1 << 1); /* .y */
487 if ((1 << tex
->sampler_index
) & options
->saturate_s
)
488 sat_mask
|= (1 << 0); /* .x */
490 /* If we are clamping any coords, we must lower projector first
491 * as clamping happens *after* projection:
493 if (lower_txp
|| sat_mask
) {
498 if ((tex
->op
== nir_texop_txf
&& options
->lower_txf_offset
) ||
499 (tex
->sampler_dim
== GLSL_SAMPLER_DIM_RECT
&&
500 options
->lower_rect_offset
)) {
501 progress
= lower_offset(b
, tex
) || progress
;
504 if ((tex
->sampler_dim
== GLSL_SAMPLER_DIM_RECT
) && options
->lower_rect
) {
509 if ((1 << tex
->texture_index
) & options
->lower_y_uv_external
) {
510 lower_y_uv_external(b
, tex
);
514 if ((1 << tex
->texture_index
) & options
->lower_y_u_v_external
) {
515 lower_y_u_v_external(b
, tex
);
519 if ((1 << tex
->texture_index
) & options
->lower_yx_xuxv_external
) {
520 lower_yx_xuxv_external(b
, tex
);
526 saturate_src(b
, tex
, sat_mask
);
530 if (((1 << tex
->texture_index
) & options
->swizzle_result
) &&
531 !nir_tex_instr_is_query(tex
) &&
532 !(tex
->is_shadow
&& tex
->is_new_style_shadow
)) {
533 swizzle_result(b
, tex
, options
->swizzles
[tex
->texture_index
]);
537 /* should be after swizzle so we know which channels are rgb: */
538 if (((1 << tex
->texture_index
) & options
->lower_srgb
) &&
539 !nir_tex_instr_is_query(tex
) && !tex
->is_shadow
) {
540 linearize_srgb_result(b
, tex
);
549 nir_lower_tex_impl(nir_function_impl
*impl
,
550 const nir_lower_tex_options
*options
)
552 bool progress
= false;
554 nir_builder_init(&builder
, impl
);
556 nir_foreach_block(block
, impl
) {
557 progress
|= nir_lower_tex_block(block
, &builder
, options
);
560 nir_metadata_preserve(impl
, nir_metadata_block_index
|
561 nir_metadata_dominance
);
566 nir_lower_tex(nir_shader
*shader
, const nir_lower_tex_options
*options
)
568 bool progress
= false;
570 nir_foreach_function(function
, shader
) {
572 progress
|= nir_lower_tex_impl(function
->impl
, options
);