2 * Copyright © 2015 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "compiler/v3d_compiler.h"
25 #include "compiler/nir/nir_builder.h"
28 * Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io
29 * intrinsics into something amenable to the V3D architecture.
31 * Most of the work is turning the VS's store_output intrinsics from working
32 * on a base representing the gallium-level vec4 driver_location to an offset
33 * within the VPM, and emitting the header that's read by the fixed function
34 * hardware between the VS and FS.
36 * We also adjust the offsets on uniform loads to be in bytes, since that's
37 * what we need for indirect addressing with general TMU access.
40 struct v3d_nir_lower_io_state
{
44 int rcp_wc_vpm_offset
;
46 int varyings_vpm_offset
;
48 BITSET_WORD varyings_stored
[BITSET_WORDS(V3D_MAX_ANY_STAGE_INPUTS
)];
54 v3d_nir_store_output(nir_builder
*b
, int base
, nir_ssa_def
*chan
)
56 nir_intrinsic_instr
*intr
=
57 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_store_output
);
58 nir_ssa_dest_init(&intr
->instr
, &intr
->dest
,
59 1, intr
->dest
.ssa
.bit_size
, NULL
);
60 intr
->num_components
= 1;
62 intr
->src
[0] = nir_src_for_ssa(chan
);
63 intr
->src
[1] = nir_src_for_ssa(nir_imm_int(b
, 0));
65 nir_intrinsic_set_base(intr
, base
);
66 nir_intrinsic_set_write_mask(intr
, 0x1);
67 nir_intrinsic_set_component(intr
, 0);
69 nir_builder_instr_insert(b
, &intr
->instr
);
72 /* Convert the uniform offset to bytes. If it happens to be a constant,
73 * constant-folding will clean up the shift for us.
76 v3d_nir_lower_uniform(struct v3d_compile
*c
, nir_builder
*b
,
77 nir_intrinsic_instr
*intr
)
79 b
->cursor
= nir_before_instr(&intr
->instr
);
81 nir_intrinsic_set_base(intr
, nir_intrinsic_base(intr
) * 16);
83 nir_instr_rewrite_src(&intr
->instr
,
85 nir_src_for_ssa(nir_ishl(b
, intr
->src
[0].ssa
,
90 v3d_varying_slot_vpm_offset(struct v3d_compile
*c
, nir_variable
*var
, int chan
)
92 int component
= var
->data
.location_frac
+ chan
;
94 for (int i
= 0; i
< c
->vs_key
->num_used_outputs
; i
++) {
95 struct v3d_varying_slot slot
= c
->vs_key
->used_outputs
[i
];
97 if (v3d_slot_get_slot(slot
) == var
->data
.location
&&
98 v3d_slot_get_component(slot
) == component
) {
106 /* Lowers a store_output(gallium driver location) to a series of store_outputs
107 * with a driver_location equal to the offset in the VPM.
110 v3d_nir_lower_vpm_output(struct v3d_compile
*c
, nir_builder
*b
,
111 nir_intrinsic_instr
*intr
,
112 struct v3d_nir_lower_io_state
*state
)
114 b
->cursor
= nir_before_instr(&intr
->instr
);
116 int start_comp
= nir_intrinsic_component(intr
);
117 nir_ssa_def
*src
= nir_ssa_for_src(b
, intr
->src
[0],
118 intr
->num_components
);
120 nir_variable
*var
= NULL
;
121 nir_foreach_variable(scan_var
, &c
->s
->outputs
) {
122 if (scan_var
->data
.driver_location
!= nir_intrinsic_base(intr
) ||
123 start_comp
< scan_var
->data
.location_frac
||
124 start_comp
>= scan_var
->data
.location_frac
+
125 glsl_get_components(scan_var
->type
)) {
132 /* Save off the components of the position for the setup of VPM inputs
133 * read by fixed function HW.
135 if (var
->data
.location
== VARYING_SLOT_POS
) {
136 for (int i
= 0; i
< intr
->num_components
; i
++) {
137 state
->pos
[start_comp
+ i
] = nir_channel(b
, src
, i
);
141 /* Just psiz to the position in the FF header right now. */
142 if (var
->data
.location
== VARYING_SLOT_PSIZ
&&
143 state
->psiz_vpm_offset
!= -1) {
144 v3d_nir_store_output(b
, state
->psiz_vpm_offset
, src
);
147 /* Scalarize outputs if it hasn't happened already, since we want to
148 * schedule each VPM write individually. We can skip any outut
149 * components not read by the FS.
151 for (int i
= 0; i
< intr
->num_components
; i
++) {
153 v3d_varying_slot_vpm_offset(c
, var
,
156 var
->data
.location_frac
);
158 if (vpm_offset
== -1)
161 BITSET_SET(state
->varyings_stored
, vpm_offset
);
163 v3d_nir_store_output(b
, state
->varyings_vpm_offset
+ vpm_offset
,
164 nir_channel(b
, src
, i
));
167 nir_instr_remove(&intr
->instr
);
171 v3d_nir_lower_io_instr(struct v3d_compile
*c
, nir_builder
*b
,
172 struct nir_instr
*instr
,
173 struct v3d_nir_lower_io_state
*state
)
175 if (instr
->type
!= nir_instr_type_intrinsic
)
177 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
179 switch (intr
->intrinsic
) {
180 case nir_intrinsic_load_uniform
:
181 v3d_nir_lower_uniform(c
, b
, intr
);
184 case nir_intrinsic_store_output
:
185 if (c
->s
->info
.stage
== MESA_SHADER_VERTEX
)
186 v3d_nir_lower_vpm_output(c
, b
, intr
, state
);
194 /* Remap the output var's .driver_location. This is purely for
195 * nir_print_shader() so that store_output can map back to a variable name.
198 v3d_nir_lower_io_update_output_var_base(struct v3d_compile
*c
,
199 struct v3d_nir_lower_io_state
*state
)
201 nir_foreach_variable_safe(var
, &c
->s
->outputs
) {
202 if (var
->data
.location
== VARYING_SLOT_POS
&&
203 state
->pos_vpm_offset
!= -1) {
204 var
->data
.driver_location
= state
->pos_vpm_offset
;
208 if (var
->data
.location
== VARYING_SLOT_PSIZ
&&
209 state
->psiz_vpm_offset
!= -1) {
210 var
->data
.driver_location
= state
->psiz_vpm_offset
;
214 int vpm_offset
= v3d_varying_slot_vpm_offset(c
, var
, 0);
215 if (vpm_offset
!= -1) {
216 var
->data
.driver_location
=
217 state
->varyings_vpm_offset
+ vpm_offset
;
219 /* If we couldn't find a mapping for the var, delete
220 * it so that its old .driver_location doesn't confuse
221 * nir_print_shader().
223 exec_node_remove(&var
->node
);
229 v3d_nir_setup_vpm_layout(struct v3d_compile
*c
,
230 struct v3d_nir_lower_io_state
*state
)
232 uint32_t vpm_offset
= 0;
234 if (c
->vs_key
->is_coord
) {
235 state
->pos_vpm_offset
= vpm_offset
;
238 state
->pos_vpm_offset
= -1;
241 state
->vp_vpm_offset
= vpm_offset
;
244 if (!c
->vs_key
->is_coord
) {
245 state
->zs_vpm_offset
= vpm_offset
++;
246 state
->rcp_wc_vpm_offset
= vpm_offset
++;
248 state
->zs_vpm_offset
= -1;
249 state
->rcp_wc_vpm_offset
= -1;
252 if (c
->vs_key
->per_vertex_point_size
)
253 state
->psiz_vpm_offset
= vpm_offset
++;
255 state
->psiz_vpm_offset
= -1;
257 state
->varyings_vpm_offset
= vpm_offset
;
259 c
->vpm_output_size
= vpm_offset
+ c
->vs_key
->num_used_outputs
;
263 v3d_nir_emit_ff_vpm_outputs(struct v3d_compile
*c
, nir_builder
*b
,
264 struct v3d_nir_lower_io_state
*state
)
266 for (int i
= 0; i
< 4; i
++) {
268 state
->pos
[i
] = nir_ssa_undef(b
, 1, 32);
271 nir_ssa_def
*rcp_wc
= nir_frcp(b
, state
->pos
[3]);
273 if (state
->pos_vpm_offset
!= -1) {
274 for (int i
= 0; i
< 4; i
++) {
275 v3d_nir_store_output(b
, state
->pos_vpm_offset
+ i
,
280 for (int i
= 0; i
< 2; i
++) {
285 scale
= nir_load_viewport_x_scale(b
);
287 scale
= nir_load_viewport_y_scale(b
);
288 pos
= nir_fmul(b
, pos
, scale
);
289 pos
= nir_fmul(b
, pos
, rcp_wc
);
290 pos
= nir_f2i32(b
, nir_fround_even(b
, pos
));
291 v3d_nir_store_output(b
, state
->vp_vpm_offset
+ i
,
295 if (state
->zs_vpm_offset
!= -1) {
296 nir_ssa_def
*z
= state
->pos
[2];
297 z
= nir_fmul(b
, z
, nir_load_viewport_z_scale(b
));
298 z
= nir_fmul(b
, z
, rcp_wc
);
299 z
= nir_fadd(b
, z
, nir_load_viewport_z_offset(b
));
300 v3d_nir_store_output(b
, state
->zs_vpm_offset
, z
);
303 if (state
->rcp_wc_vpm_offset
!= -1)
304 v3d_nir_store_output(b
, state
->rcp_wc_vpm_offset
, rcp_wc
);
306 /* Store 0 to varyings requested by the FS but not stored in the VS.
307 * This should be undefined behavior, but glsl-routing seems to rely
310 for (int i
= 0; i
< c
->vs_key
->num_used_outputs
; i
++) {
311 if (!BITSET_TEST(state
->varyings_stored
, i
)) {
312 v3d_nir_store_output(b
, state
->varyings_vpm_offset
+ i
,
319 v3d_nir_lower_io(nir_shader
*s
, struct v3d_compile
*c
)
321 struct v3d_nir_lower_io_state state
= { 0 };
323 /* Set up the layout of the VPM outputs. */
324 if (s
->info
.stage
== MESA_SHADER_VERTEX
)
325 v3d_nir_setup_vpm_layout(c
, &state
);
327 nir_foreach_function(function
, s
) {
328 if (function
->impl
) {
330 nir_builder_init(&b
, function
->impl
);
332 nir_foreach_block(block
, function
->impl
) {
333 nir_foreach_instr_safe(instr
, block
)
334 v3d_nir_lower_io_instr(c
, &b
, instr
,
338 nir_block
*last
= nir_impl_last_block(function
->impl
);
339 b
.cursor
= nir_after_block(last
);
340 if (s
->info
.stage
== MESA_SHADER_VERTEX
)
341 v3d_nir_emit_ff_vpm_outputs(c
, &b
, &state
);
343 nir_metadata_preserve(function
->impl
,
344 nir_metadata_block_index
|
345 nir_metadata_dominance
);
349 if (s
->info
.stage
== MESA_SHADER_VERTEX
)
350 v3d_nir_lower_io_update_output_var_base(c
, &state
);