2 * Copyright © 2015 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "compiler/v3d_compiler.h"
25 #include "compiler/nir/nir_builder.h"
28 * Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io
29 * intrinsics into something amenable to the V3D architecture.
31 * Most of the work is turning the VS's store_output intrinsics from working
32 * on a base representing the gallium-level vec4 driver_location to an offset
33 * within the VPM, and emitting the header that's read by the fixed function
34 * hardware between the VS and FS.
36 * We also adjust the offsets on uniform loads to be in bytes, since that's
37 * what we need for indirect addressing with general TMU access.
40 struct v3d_nir_lower_io_state
{
44 int rcp_wc_vpm_offset
;
46 int varyings_vpm_offset
;
48 BITSET_WORD varyings_stored
[BITSET_WORDS(V3D_MAX_ANY_STAGE_INPUTS
)];
54 v3d_nir_store_output(nir_builder
*b
, int base
, nir_ssa_def
*chan
)
56 nir_intrinsic_instr
*intr
=
57 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_store_output
);
58 nir_ssa_dest_init(&intr
->instr
, &intr
->dest
,
59 1, intr
->dest
.ssa
.bit_size
, NULL
);
60 intr
->num_components
= 1;
62 intr
->src
[0] = nir_src_for_ssa(chan
);
63 intr
->src
[1] = nir_src_for_ssa(nir_imm_int(b
, 0));
65 nir_intrinsic_set_base(intr
, base
);
66 nir_intrinsic_set_write_mask(intr
, 0x1);
67 nir_intrinsic_set_component(intr
, 0);
69 nir_builder_instr_insert(b
, &intr
->instr
);
72 /* Convert the uniform offset to bytes. If it happens to be a constant,
73 * constant-folding will clean up the shift for us.
76 v3d_nir_lower_uniform(struct v3d_compile
*c
, nir_builder
*b
,
77 nir_intrinsic_instr
*intr
)
79 b
->cursor
= nir_before_instr(&intr
->instr
);
81 nir_intrinsic_set_base(intr
, nir_intrinsic_base(intr
) * 16);
83 nir_instr_rewrite_src(&intr
->instr
,
85 nir_src_for_ssa(nir_ishl(b
, intr
->src
[0].ssa
,
90 v3d_varying_slot_vpm_offset(struct v3d_compile
*c
, nir_variable
*var
, int chan
)
92 int component
= var
->data
.location_frac
+ chan
;
94 for (int i
= 0; i
< c
->vs_key
->num_used_outputs
; i
++) {
95 struct v3d_varying_slot slot
= c
->vs_key
->used_outputs
[i
];
97 if (v3d_slot_get_slot(slot
) == var
->data
.location
&&
98 v3d_slot_get_component(slot
) == component
) {
106 /* Lowers a store_output(gallium driver location) to a series of store_outputs
107 * with a driver_location equal to the offset in the VPM.
110 v3d_nir_lower_vpm_output(struct v3d_compile
*c
, nir_builder
*b
,
111 nir_intrinsic_instr
*intr
,
112 struct v3d_nir_lower_io_state
*state
)
114 b
->cursor
= nir_before_instr(&intr
->instr
);
116 int start_comp
= nir_intrinsic_component(intr
);
117 nir_ssa_def
*src
= nir_ssa_for_src(b
, intr
->src
[0],
118 intr
->num_components
);
120 nir_variable
*var
= NULL
;
121 nir_foreach_variable(scan_var
, &c
->s
->outputs
) {
122 if (scan_var
->data
.driver_location
!= nir_intrinsic_base(intr
) ||
123 start_comp
< scan_var
->data
.location_frac
||
124 start_comp
>= scan_var
->data
.location_frac
+
125 glsl_get_components(scan_var
->type
)) {
131 /* Save off the components of the position for the setup of VPM inputs
132 * read by fixed function HW.
134 if (var
->data
.location
== VARYING_SLOT_POS
) {
135 for (int i
= 0; i
< intr
->num_components
; i
++) {
136 state
->pos
[start_comp
+ i
] = nir_channel(b
, src
, i
);
140 /* Just psiz to the position in the FF header right now. */
141 if (var
->data
.location
== VARYING_SLOT_PSIZ
&&
142 state
->psiz_vpm_offset
!= -1) {
143 v3d_nir_store_output(b
, state
->psiz_vpm_offset
, src
);
146 /* Scalarize outputs if it hasn't happened already, since we want to
147 * schedule each VPM write individually. We can skip any outut
148 * components not read by the FS.
150 for (int i
= 0; i
< intr
->num_components
; i
++) {
152 v3d_varying_slot_vpm_offset(c
, var
,
155 var
->data
.location_frac
);
157 if (vpm_offset
== -1)
160 BITSET_SET(state
->varyings_stored
, vpm_offset
);
162 v3d_nir_store_output(b
, state
->varyings_vpm_offset
+ vpm_offset
,
163 nir_channel(b
, src
, i
));
166 nir_instr_remove(&intr
->instr
);
170 v3d_nir_lower_io_instr(struct v3d_compile
*c
, nir_builder
*b
,
171 struct nir_instr
*instr
,
172 struct v3d_nir_lower_io_state
*state
)
174 if (instr
->type
!= nir_instr_type_intrinsic
)
176 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
178 switch (intr
->intrinsic
) {
179 case nir_intrinsic_load_uniform
:
180 v3d_nir_lower_uniform(c
, b
, intr
);
183 case nir_intrinsic_store_output
:
184 if (c
->s
->info
.stage
== MESA_SHADER_VERTEX
)
185 v3d_nir_lower_vpm_output(c
, b
, intr
, state
);
193 /* Remap the output var's .driver_location. This is purely for
194 * nir_print_shader() so that store_output can map back to a variable name.
197 v3d_nir_lower_io_update_output_var_base(struct v3d_compile
*c
,
198 struct v3d_nir_lower_io_state
*state
)
200 nir_foreach_variable_safe(var
, &c
->s
->outputs
) {
201 if (var
->data
.location
== VARYING_SLOT_POS
&&
202 state
->pos_vpm_offset
!= -1) {
203 var
->data
.driver_location
= state
->pos_vpm_offset
;
207 if (var
->data
.location
== VARYING_SLOT_PSIZ
&&
208 state
->psiz_vpm_offset
!= -1) {
209 var
->data
.driver_location
= state
->psiz_vpm_offset
;
213 int vpm_offset
= v3d_varying_slot_vpm_offset(c
, var
, 0);
214 if (vpm_offset
!= -1) {
215 var
->data
.driver_location
=
216 state
->varyings_vpm_offset
+ vpm_offset
;
218 /* If we couldn't find a mapping for the var, delete
219 * it so that its old .driver_location doesn't confuse
220 * nir_print_shader().
222 exec_node_remove(&var
->node
);
228 v3d_nir_setup_vpm_layout(struct v3d_compile
*c
,
229 struct v3d_nir_lower_io_state
*state
)
231 uint32_t vpm_offset
= 0;
233 if (c
->vs_key
->is_coord
) {
234 state
->pos_vpm_offset
= vpm_offset
;
237 state
->pos_vpm_offset
= -1;
240 state
->vp_vpm_offset
= vpm_offset
;
243 if (!c
->vs_key
->is_coord
) {
244 state
->zs_vpm_offset
= vpm_offset
++;
245 state
->rcp_wc_vpm_offset
= vpm_offset
++;
247 state
->zs_vpm_offset
= -1;
248 state
->rcp_wc_vpm_offset
= -1;
251 if (c
->vs_key
->per_vertex_point_size
)
252 state
->psiz_vpm_offset
= vpm_offset
++;
254 state
->psiz_vpm_offset
= -1;
256 state
->varyings_vpm_offset
= vpm_offset
;
258 c
->vpm_output_size
= vpm_offset
+ c
->vs_key
->num_used_outputs
;
262 v3d_nir_emit_ff_vpm_outputs(struct v3d_compile
*c
, nir_builder
*b
,
263 struct v3d_nir_lower_io_state
*state
)
265 for (int i
= 0; i
< 4; i
++) {
267 state
->pos
[i
] = nir_ssa_undef(b
, 1, 32);
270 nir_ssa_def
*rcp_wc
= nir_frcp(b
, state
->pos
[3]);
272 if (state
->pos_vpm_offset
!= -1) {
273 for (int i
= 0; i
< 4; i
++) {
274 v3d_nir_store_output(b
, state
->pos_vpm_offset
+ i
,
279 for (int i
= 0; i
< 2; i
++) {
284 scale
= nir_load_viewport_x_scale(b
);
286 scale
= nir_load_viewport_y_scale(b
);
287 pos
= nir_fmul(b
, pos
, scale
);
288 pos
= nir_fmul(b
, pos
, rcp_wc
);
289 pos
= nir_f2i32(b
, nir_fround_even(b
, pos
));
290 v3d_nir_store_output(b
, state
->vp_vpm_offset
+ i
,
294 if (state
->zs_vpm_offset
!= -1) {
295 nir_ssa_def
*z
= state
->pos
[2];
296 z
= nir_fmul(b
, z
, nir_load_viewport_z_scale(b
));
297 z
= nir_fmul(b
, z
, rcp_wc
);
298 z
= nir_fadd(b
, z
, nir_load_viewport_z_offset(b
));
299 v3d_nir_store_output(b
, state
->zs_vpm_offset
, z
);
302 if (state
->rcp_wc_vpm_offset
!= -1)
303 v3d_nir_store_output(b
, state
->rcp_wc_vpm_offset
, rcp_wc
);
305 /* Store 0 to varyings requested by the FS but not stored in the VS.
306 * This should be undefined behavior, but glsl-routing seems to rely
309 for (int i
= 0; i
< c
->vs_key
->num_used_outputs
; i
++) {
310 if (!BITSET_TEST(state
->varyings_stored
, i
)) {
311 v3d_nir_store_output(b
, state
->varyings_vpm_offset
+ i
,
318 v3d_nir_lower_io(nir_shader
*s
, struct v3d_compile
*c
)
320 struct v3d_nir_lower_io_state state
= { 0 };
322 /* Set up the layout of the VPM outputs. */
323 if (s
->info
.stage
== MESA_SHADER_VERTEX
)
324 v3d_nir_setup_vpm_layout(c
, &state
);
326 nir_foreach_function(function
, s
) {
327 if (function
->impl
) {
329 nir_builder_init(&b
, function
->impl
);
331 nir_foreach_block(block
, function
->impl
) {
332 nir_foreach_instr_safe(instr
, block
)
333 v3d_nir_lower_io_instr(c
, &b
, instr
,
337 nir_block
*last
= nir_impl_last_block(function
->impl
);
338 b
.cursor
= nir_after_block(last
);
339 if (s
->info
.stage
== MESA_SHADER_VERTEX
)
340 v3d_nir_emit_ff_vpm_outputs(c
, &b
, &state
);
342 nir_metadata_preserve(function
->impl
,
343 nir_metadata_block_index
|
344 nir_metadata_dominance
);
348 if (s
->info
.stage
== MESA_SHADER_VERTEX
)
349 v3d_nir_lower_io_update_output_var_base(c
, &state
);