2 * Copyright © 2019 Igalia S.L.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 * A pass which detects tex instructions which are candidate to be executed
28 * prior to FS shader start, and change them to nir_texop_tex_prefetch.
32 coord_offset(nir_ssa_def
*ssa
)
34 nir_instr
*parent_instr
= ssa
->parent_instr
;
36 /* The coordinate of a texture sampling instruction eligible for
37 * pre-fetch is either going to be a load_interpolated_input/
38 * load_input, or a vec2 assembling non-swizzled components of
39 * a load_interpolated_input/load_input (due to varying packing)
42 if (parent_instr
->type
== nir_instr_type_alu
) {
43 nir_alu_instr
*alu
= nir_instr_as_alu(parent_instr
);
45 if (alu
->op
!= nir_op_vec2
)
48 if (!alu
->src
[0].src
.is_ssa
)
51 int base_offset
= coord_offset(alu
->src
[0].src
.ssa
) +
52 alu
->src
[0].swizzle
[0];
54 /* NOTE it might be possible to support more than 2D? */
55 for (int i
= 1; i
< 2; i
++) {
56 if (!alu
->src
[i
].src
.is_ssa
)
59 int nth_offset
= coord_offset(alu
->src
[i
].src
.ssa
) +
60 alu
->src
[i
].swizzle
[0];
62 if (nth_offset
!= (base_offset
+ i
))
69 if (parent_instr
->type
!= nir_instr_type_intrinsic
)
72 nir_intrinsic_instr
*input
= nir_instr_as_intrinsic(parent_instr
);
74 if (input
->intrinsic
!= nir_intrinsic_load_interpolated_input
)
77 /* limit to load_barycentric_pixel, other interpolation modes don't seem
80 if (!input
->src
[0].is_ssa
)
83 nir_intrinsic_instr
*interp
=
84 nir_instr_as_intrinsic(input
->src
[0].ssa
->parent_instr
);
86 if (interp
->intrinsic
!= nir_intrinsic_load_barycentric_pixel
)
89 /* we also need a const input offset: */
90 if (!nir_src_is_const(input
->src
[1]))
93 unsigned base
= nir_src_as_uint(input
->src
[1]) + nir_intrinsic_base(input
);
94 unsigned comp
= nir_intrinsic_component(input
);
96 return (4 * base
) + comp
;
100 ir3_nir_coord_offset(nir_ssa_def
*ssa
)
103 assert (ssa
->num_components
== 2);
104 return coord_offset(ssa
);
108 has_src(nir_tex_instr
*tex
, nir_tex_src_type type
)
110 return nir_tex_instr_src_index(tex
, type
) > 0;
114 ok_bindless_src(nir_tex_instr
*tex
, nir_tex_src_type type
)
116 int idx
= nir_tex_instr_src_index(tex
, type
);
118 nir_intrinsic_instr
*bindless
= ir3_bindless_resource(tex
->src
[idx
].src
);
120 /* TODO from SP_FS_BINDLESS_PREFETCH[n] it looks like this limit should
123 return nir_src_is_const(bindless
->src
[0]) &&
124 (nir_src_as_uint(bindless
->src
[0]) < (1 << 16));
128 * Check that we will be able to encode the tex/samp parameters
129 * successfully. These limits are based on the layout of
130 * SP_FS_PREFETCH[n] and SP_FS_BINDLESS_PREFETCH[n], so at some
131 * point (if those regs changes) they may become generation
135 ok_tex_samp(nir_tex_instr
*tex
)
137 if (has_src(tex
, nir_tex_src_texture_handle
)) {
140 assert(has_src(tex
, nir_tex_src_sampler_handle
));
142 return ok_bindless_src(tex
, nir_tex_src_texture_handle
) &&
143 ok_bindless_src(tex
, nir_tex_src_sampler_handle
);
145 assert(!has_src(tex
, nir_tex_src_texture_offset
));
146 assert(!has_src(tex
, nir_tex_src_sampler_offset
));
148 return (tex
->texture_index
<= 0x1f) &&
149 (tex
->sampler_index
<= 0xf);
154 lower_tex_prefetch_block(nir_block
*block
)
156 bool progress
= false;
158 nir_foreach_instr_safe (instr
, block
) {
159 if (instr
->type
!= nir_instr_type_tex
)
162 nir_tex_instr
*tex
= nir_instr_as_tex(instr
);
163 if (tex
->op
!= nir_texop_tex
)
166 if (has_src(tex
, nir_tex_src_bias
) ||
167 has_src(tex
, nir_tex_src_lod
) ||
168 has_src(tex
, nir_tex_src_comparator
) ||
169 has_src(tex
, nir_tex_src_projector
) ||
170 has_src(tex
, nir_tex_src_offset
) ||
171 has_src(tex
, nir_tex_src_ddx
) ||
172 has_src(tex
, nir_tex_src_ddy
) ||
173 has_src(tex
, nir_tex_src_ms_index
) ||
174 has_src(tex
, nir_tex_src_texture_offset
) ||
175 has_src(tex
, nir_tex_src_sampler_offset
))
178 /* only prefetch for simple 2d tex fetch case */
179 if (tex
->sampler_dim
!= GLSL_SAMPLER_DIM_2D
|| tex
->is_array
)
182 if (!ok_tex_samp(tex
))
185 int idx
= nir_tex_instr_src_index(tex
, nir_tex_src_coord
);
186 /* First source should be the sampling coordinate. */
187 nir_tex_src
*coord
= &tex
->src
[idx
];
188 debug_assert(coord
->src
.is_ssa
);
190 if (ir3_nir_coord_offset(coord
->src
.ssa
) >= 0) {
191 tex
->op
= nir_texop_tex_prefetch
;
201 lower_tex_prefetch_func(nir_function_impl
*impl
)
203 /* Only instructions in the the outer-most block are considered
204 * eligible for pre-dispatch, because they need to be move-able
205 * to the beginning of the shader to avoid locking down the
206 * register holding the pre-fetched result for too long.
208 nir_block
*block
= nir_start_block(impl
);
212 bool progress
= lower_tex_prefetch_block(block
);
215 nir_metadata_preserve(impl
, nir_metadata_block_index
|
216 nir_metadata_dominance
);
223 ir3_nir_lower_tex_prefetch(nir_shader
*shader
)
225 bool progress
= false;
227 assert(shader
->info
.stage
== MESA_SHADER_FRAGMENT
);
229 nir_foreach_function (function
, shader
) {
230 /* Only texture sampling instructions inside the main function
231 * are eligible for pre-dispatch.
233 if (!function
->impl
|| !function
->is_entrypoint
)
236 progress
|= lower_tex_prefetch_func(function
->impl
);