2 * Copyright © 2015 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "compiler/v3d_compiler.h"
25 #include "compiler/nir/nir_builder.h"
28 * Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io
29 * intrinsics into something amenable to the V3D architecture.
31 * Most of the work is turning the VS's store_output intrinsics from working
32 * on a base representing the gallium-level vec4 driver_location to an offset
33 * within the VPM, and emitting the header that's read by the fixed function
34 * hardware between the VS and FS.
36 * We also adjust the offsets on uniform loads to be in bytes, since that's
37 * what we need for indirect addressing with general TMU access.
40 struct v3d_nir_lower_io_state
{
44 int rcp_wc_vpm_offset
;
46 int varyings_vpm_offset
;
48 /* Geometry shader state */
50 /* VPM offset for the current vertex data output */
51 nir_variable
*output_offset_var
;
52 /* VPM offset for the current vertex header */
53 nir_variable
*header_offset_var
;
54 /* VPM header for the current vertex */
55 nir_variable
*header_var
;
57 /* Size of the complete VPM output header */
58 uint32_t output_header_size
;
59 /* Size of the output data for a single vertex */
60 uint32_t output_vertex_data_size
;
63 BITSET_WORD varyings_stored
[BITSET_WORDS(V3D_MAX_ANY_STAGE_INPUTS
)];
69 v3d_nir_emit_ff_vpm_outputs(struct v3d_compile
*c
, nir_builder
*b
,
70 struct v3d_nir_lower_io_state
*state
);
73 v3d_nir_store_output(nir_builder
*b
, int base
, nir_ssa_def
*offset
,
76 nir_intrinsic_instr
*intr
=
77 nir_intrinsic_instr_create(b
->shader
,
78 nir_intrinsic_store_output
);
79 nir_ssa_dest_init(&intr
->instr
, &intr
->dest
,
80 1, intr
->dest
.ssa
.bit_size
, NULL
);
81 intr
->num_components
= 1;
83 intr
->src
[0] = nir_src_for_ssa(chan
);
85 intr
->src
[1] = nir_src_for_ssa(offset
);
87 intr
->src
[1] = nir_src_for_ssa(nir_imm_int(b
, 0));
89 nir_intrinsic_set_base(intr
, base
);
90 nir_intrinsic_set_write_mask(intr
, 0x1);
91 nir_intrinsic_set_component(intr
, 0);
93 nir_builder_instr_insert(b
, &intr
->instr
);
96 /* Convert the uniform offset to bytes. If it happens to be a constant,
97 * constant-folding will clean up the shift for us.
100 v3d_nir_lower_uniform(struct v3d_compile
*c
, nir_builder
*b
,
101 nir_intrinsic_instr
*intr
)
103 b
->cursor
= nir_before_instr(&intr
->instr
);
105 nir_intrinsic_set_base(intr
, nir_intrinsic_base(intr
) * 16);
107 nir_instr_rewrite_src(&intr
->instr
,
109 nir_src_for_ssa(nir_ishl(b
, intr
->src
[0].ssa
,
110 nir_imm_int(b
, 4))));
114 v3d_varying_slot_vpm_offset(struct v3d_compile
*c
, nir_variable
*var
, int chan
)
116 int component
= var
->data
.location_frac
+ chan
;
118 uint32_t num_used_outputs
= 0;
119 struct v3d_varying_slot
*used_outputs
= NULL
;
120 switch (c
->s
->info
.stage
) {
121 case MESA_SHADER_VERTEX
:
122 num_used_outputs
= c
->vs_key
->num_used_outputs
;
123 used_outputs
= c
->vs_key
->used_outputs
;
125 case MESA_SHADER_GEOMETRY
:
126 num_used_outputs
= c
->gs_key
->num_used_outputs
;
127 used_outputs
= c
->gs_key
->used_outputs
;
130 unreachable("Unsupported shader stage");
133 for (int i
= 0; i
< num_used_outputs
; i
++) {
134 struct v3d_varying_slot slot
= used_outputs
[i
];
136 if (v3d_slot_get_slot(slot
) == var
->data
.location
&&
137 v3d_slot_get_component(slot
) == component
) {
145 /* Lowers a store_output(gallium driver location) to a series of store_outputs
146 * with a driver_location equal to the offset in the VPM.
148 * For geometry shaders we need to emit multiple vertices so the VPM offsets
149 * need to be computed in the shader code based on the current vertex index.
152 v3d_nir_lower_vpm_output(struct v3d_compile
*c
, nir_builder
*b
,
153 nir_intrinsic_instr
*intr
,
154 struct v3d_nir_lower_io_state
*state
)
156 b
->cursor
= nir_before_instr(&intr
->instr
);
158 /* If this is a geometry shader we need to emit our outputs
159 * to the current vertex offset in the VPM.
161 nir_ssa_def
*offset_reg
=
162 c
->s
->info
.stage
== MESA_SHADER_GEOMETRY
?
163 nir_load_var(b
, state
->gs
.output_offset_var
) : NULL
;
165 int start_comp
= nir_intrinsic_component(intr
);
166 nir_ssa_def
*src
= nir_ssa_for_src(b
, intr
->src
[0],
167 intr
->num_components
);
169 nir_variable
*var
= NULL
;
170 nir_foreach_variable(scan_var
, &c
->s
->outputs
) {
171 if (scan_var
->data
.driver_location
!= nir_intrinsic_base(intr
) ||
172 start_comp
< scan_var
->data
.location_frac
||
173 start_comp
>= scan_var
->data
.location_frac
+
174 glsl_get_components(scan_var
->type
)) {
181 /* Save off the components of the position for the setup of VPM inputs
182 * read by fixed function HW.
184 if (var
->data
.location
== VARYING_SLOT_POS
) {
185 for (int i
= 0; i
< intr
->num_components
; i
++) {
186 state
->pos
[start_comp
+ i
] = nir_channel(b
, src
, i
);
190 /* Just psiz to the position in the FF header right now. */
191 if (var
->data
.location
== VARYING_SLOT_PSIZ
&&
192 state
->psiz_vpm_offset
!= -1) {
193 v3d_nir_store_output(b
, state
->psiz_vpm_offset
, offset_reg
, src
);
196 if (var
->data
.location
== VARYING_SLOT_LAYER
) {
197 assert(c
->s
->info
.stage
== MESA_SHADER_GEOMETRY
);
198 nir_ssa_def
*header
= nir_load_var(b
, state
->gs
.header_var
);
199 header
= nir_iand(b
, header
, nir_imm_int(b
, 0xff00ffff));
201 /* From the GLES 3.2 spec:
203 * "When fragments are written to a layered framebuffer, the
204 * fragment’s layer number selects an image from the array
205 * of images at each attachment (...). If the fragment’s
206 * layer number is negative, or greater than or equal to
207 * the minimum number of layers of any attachment, the
208 * effects of the fragment on the framebuffer contents are
211 * This suggests we can just ignore that situation, however,
212 * for V3D an out-of-bounds layer index means that the binner
213 * might do out-of-bounds writes access to the tile state. The
214 * simulator has an assert to catch this, so we play safe here
215 * and we make sure that doesn't happen by setting gl_Layer
216 * to 0 in that case (we always allocate tile state for at
219 nir_intrinsic_instr
*load
=
220 nir_intrinsic_instr_create(b
->shader
,
221 nir_intrinsic_load_fb_layers_v3d
);
222 nir_ssa_dest_init(&load
->instr
, &load
->dest
, 1, 32, NULL
);
223 nir_builder_instr_insert(b
, &load
->instr
);
224 nir_ssa_def
*fb_layers
= &load
->dest
.ssa
;
226 nir_ssa_def
*cond
= nir_ige(b
, src
, fb_layers
);
227 nir_ssa_def
*layer_id
=
230 nir_ishl(b
, src
, nir_imm_int(b
, 16)));
231 header
= nir_ior(b
, header
, layer_id
);
232 nir_store_var(b
, state
->gs
.header_var
, header
, 0x1);
235 /* Scalarize outputs if it hasn't happened already, since we want to
236 * schedule each VPM write individually. We can skip any outut
237 * components not read by the FS.
239 for (int i
= 0; i
< intr
->num_components
; i
++) {
241 v3d_varying_slot_vpm_offset(c
, var
,
244 var
->data
.location_frac
);
246 if (vpm_offset
== -1)
249 BITSET_SET(state
->varyings_stored
, vpm_offset
);
251 v3d_nir_store_output(b
, state
->varyings_vpm_offset
+ vpm_offset
,
252 offset_reg
, nir_channel(b
, src
, i
));
255 nir_instr_remove(&intr
->instr
);
259 reset_gs_header(nir_builder
*b
, struct v3d_nir_lower_io_state
*state
)
261 const uint8_t NEW_PRIMITIVE_OFFSET
= 0;
262 const uint8_t VERTEX_DATA_LENGTH_OFFSET
= 8;
264 uint32_t vertex_data_size
= state
->gs
.output_vertex_data_size
;
265 assert((vertex_data_size
& 0xffffff00) == 0);
268 header
= 1 << NEW_PRIMITIVE_OFFSET
;
269 header
|= vertex_data_size
<< VERTEX_DATA_LENGTH_OFFSET
;
270 nir_store_var(b
, state
->gs
.header_var
, nir_imm_int(b
, header
), 0x1);
274 v3d_nir_lower_emit_vertex(struct v3d_compile
*c
, nir_builder
*b
,
275 nir_intrinsic_instr
*instr
,
276 struct v3d_nir_lower_io_state
*state
)
278 b
->cursor
= nir_before_instr(&instr
->instr
);
280 nir_ssa_def
*header
= nir_load_var(b
, state
->gs
.header_var
);
281 nir_ssa_def
*header_offset
= nir_load_var(b
, state
->gs
.header_offset_var
);
282 nir_ssa_def
*output_offset
= nir_load_var(b
, state
->gs
.output_offset_var
);
284 /* Emit fixed function outputs */
285 v3d_nir_emit_ff_vpm_outputs(c
, b
, state
);
287 /* Emit vertex header */
288 v3d_nir_store_output(b
, 0, header_offset
, header
);
290 /* Update VPM offset for next vertex output data and header */
292 nir_iadd(b
, output_offset
,
293 nir_imm_int(b
, state
->gs
.output_vertex_data_size
));
295 header_offset
= nir_iadd(b
, header_offset
, nir_imm_int(b
, 1));
297 /* Reset the New Primitive bit */
298 header
= nir_iand(b
, header
, nir_imm_int(b
, 0xfffffffe));
300 nir_store_var(b
, state
->gs
.output_offset_var
, output_offset
, 0x1);
301 nir_store_var(b
, state
->gs
.header_offset_var
, header_offset
, 0x1);
302 nir_store_var(b
, state
->gs
.header_var
, header
, 0x1);
304 nir_instr_remove(&instr
->instr
);
308 v3d_nir_lower_end_primitive(struct v3d_compile
*c
, nir_builder
*b
,
309 nir_intrinsic_instr
*instr
,
310 struct v3d_nir_lower_io_state
*state
)
312 assert(state
->gs
.header_var
);
313 b
->cursor
= nir_before_instr(&instr
->instr
);
314 reset_gs_header(b
, state
);
316 nir_instr_remove(&instr
->instr
);
320 v3d_nir_lower_io_instr(struct v3d_compile
*c
, nir_builder
*b
,
321 struct nir_instr
*instr
,
322 struct v3d_nir_lower_io_state
*state
)
324 if (instr
->type
!= nir_instr_type_intrinsic
)
326 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
328 switch (intr
->intrinsic
) {
329 case nir_intrinsic_load_uniform
:
330 v3d_nir_lower_uniform(c
, b
, intr
);
333 case nir_intrinsic_store_output
:
334 if (c
->s
->info
.stage
== MESA_SHADER_VERTEX
||
335 c
->s
->info
.stage
== MESA_SHADER_GEOMETRY
) {
336 v3d_nir_lower_vpm_output(c
, b
, intr
, state
);
340 case nir_intrinsic_emit_vertex
:
341 v3d_nir_lower_emit_vertex(c
, b
, intr
, state
);
344 case nir_intrinsic_end_primitive
:
345 v3d_nir_lower_end_primitive(c
, b
, intr
, state
);
353 /* Remap the output var's .driver_location. This is purely for
354 * nir_print_shader() so that store_output can map back to a variable name.
357 v3d_nir_lower_io_update_output_var_base(struct v3d_compile
*c
,
358 struct v3d_nir_lower_io_state
*state
)
360 nir_foreach_variable_safe(var
, &c
->s
->outputs
) {
361 if (var
->data
.location
== VARYING_SLOT_POS
&&
362 state
->pos_vpm_offset
!= -1) {
363 var
->data
.driver_location
= state
->pos_vpm_offset
;
367 if (var
->data
.location
== VARYING_SLOT_PSIZ
&&
368 state
->psiz_vpm_offset
!= -1) {
369 var
->data
.driver_location
= state
->psiz_vpm_offset
;
373 int vpm_offset
= v3d_varying_slot_vpm_offset(c
, var
, 0);
374 if (vpm_offset
!= -1) {
375 var
->data
.driver_location
=
376 state
->varyings_vpm_offset
+ vpm_offset
;
378 /* If we couldn't find a mapping for the var, delete
379 * it so that its old .driver_location doesn't confuse
380 * nir_print_shader().
382 exec_node_remove(&var
->node
);
388 v3d_nir_setup_vpm_layout_vs(struct v3d_compile
*c
,
389 struct v3d_nir_lower_io_state
*state
)
391 uint32_t vpm_offset
= 0;
393 state
->pos_vpm_offset
= -1;
394 state
->vp_vpm_offset
= -1;
395 state
->zs_vpm_offset
= -1;
396 state
->rcp_wc_vpm_offset
= -1;
397 state
->psiz_vpm_offset
= -1;
399 bool needs_ff_outputs
= c
->vs_key
->base
.is_last_geometry_stage
;
400 if (needs_ff_outputs
) {
401 if (c
->vs_key
->is_coord
) {
402 state
->pos_vpm_offset
= vpm_offset
;
406 state
->vp_vpm_offset
= vpm_offset
;
409 if (!c
->vs_key
->is_coord
) {
410 state
->zs_vpm_offset
= vpm_offset
++;
411 state
->rcp_wc_vpm_offset
= vpm_offset
++;
414 if (c
->vs_key
->per_vertex_point_size
)
415 state
->psiz_vpm_offset
= vpm_offset
++;
418 state
->varyings_vpm_offset
= vpm_offset
;
420 c
->vpm_output_size
= MAX2(1, vpm_offset
+ c
->vs_key
->num_used_outputs
);
424 v3d_nir_setup_vpm_layout_gs(struct v3d_compile
*c
,
425 struct v3d_nir_lower_io_state
*state
)
427 /* 1 header slot for number of output vertices */
428 uint32_t vpm_offset
= 1;
430 /* 1 header slot per output vertex */
431 const uint32_t num_vertices
= c
->s
->info
.gs
.vertices_out
;
432 vpm_offset
+= num_vertices
;
434 state
->gs
.output_header_size
= vpm_offset
;
436 /* Vertex data: here we only compute offsets into a generic vertex data
437 * elements. When it is time to actually write a particular vertex to
438 * the VPM, we will add the offset for that vertex into the VPM output
441 * If geometry shaders are present, they are always the last shader
442 * stage before rasterization, so we always emit fixed function outputs.
445 if (c
->gs_key
->is_coord
) {
446 state
->pos_vpm_offset
= vpm_offset
;
449 state
->pos_vpm_offset
= -1;
452 state
->vp_vpm_offset
= vpm_offset
;
455 if (!c
->gs_key
->is_coord
) {
456 state
->zs_vpm_offset
= vpm_offset
++;
457 state
->rcp_wc_vpm_offset
= vpm_offset
++;
459 state
->zs_vpm_offset
= -1;
460 state
->rcp_wc_vpm_offset
= -1;
463 /* Mesa enables OES_geometry_shader_point_size automatically with
464 * OES_geometry_shader so we always need to handle point size
467 if (c
->gs_key
->per_vertex_point_size
)
468 state
->psiz_vpm_offset
= vpm_offset
++;
470 state
->varyings_vpm_offset
= vpm_offset
;
472 state
->gs
.output_vertex_data_size
=
473 state
->varyings_vpm_offset
+ c
->gs_key
->num_used_outputs
;
476 state
->gs
.output_header_size
+
477 state
->gs
.output_vertex_data_size
* num_vertices
;
481 v3d_nir_emit_ff_vpm_outputs(struct v3d_compile
*c
, nir_builder
*b
,
482 struct v3d_nir_lower_io_state
*state
)
484 /* If this is a geometry shader we need to emit our fixed function
485 * outputs to the current vertex offset in the VPM.
487 nir_ssa_def
*offset_reg
=
488 c
->s
->info
.stage
== MESA_SHADER_GEOMETRY
?
489 nir_load_var(b
, state
->gs
.output_offset_var
) : NULL
;
491 for (int i
= 0; i
< 4; i
++) {
493 state
->pos
[i
] = nir_ssa_undef(b
, 1, 32);
496 nir_ssa_def
*rcp_wc
= nir_frcp(b
, state
->pos
[3]);
498 if (state
->pos_vpm_offset
!= -1) {
499 for (int i
= 0; i
< 4; i
++) {
500 v3d_nir_store_output(b
, state
->pos_vpm_offset
+ i
,
501 offset_reg
, state
->pos
[i
]);
505 if (state
->vp_vpm_offset
!= -1) {
506 for (int i
= 0; i
< 2; i
++) {
511 scale
= nir_load_viewport_x_scale(b
);
513 scale
= nir_load_viewport_y_scale(b
);
514 pos
= nir_fmul(b
, pos
, scale
);
515 pos
= nir_fmul(b
, pos
, rcp_wc
);
516 pos
= nir_f2i32(b
, nir_fround_even(b
, pos
));
517 v3d_nir_store_output(b
, state
->vp_vpm_offset
+ i
,
522 if (state
->zs_vpm_offset
!= -1) {
523 nir_ssa_def
*z
= state
->pos
[2];
524 z
= nir_fmul(b
, z
, nir_load_viewport_z_scale(b
));
525 z
= nir_fmul(b
, z
, rcp_wc
);
526 z
= nir_fadd(b
, z
, nir_load_viewport_z_offset(b
));
527 v3d_nir_store_output(b
, state
->zs_vpm_offset
, offset_reg
, z
);
530 if (state
->rcp_wc_vpm_offset
!= -1) {
531 v3d_nir_store_output(b
, state
->rcp_wc_vpm_offset
,
535 /* Store 0 to varyings requested by the FS but not stored by the
536 * previous stage. This should be undefined behavior, but
537 * glsl-routing seems to rely on it.
539 uint32_t num_used_outputs
;
540 switch (c
->s
->info
.stage
) {
541 case MESA_SHADER_VERTEX
:
542 num_used_outputs
= c
->vs_key
->num_used_outputs
;
544 case MESA_SHADER_GEOMETRY
:
545 num_used_outputs
= c
->gs_key
->num_used_outputs
;
548 unreachable("Unsupported shader stage");
551 for (int i
= 0; i
< num_used_outputs
; i
++) {
552 if (!BITSET_TEST(state
->varyings_stored
, i
)) {
553 v3d_nir_store_output(b
, state
->varyings_vpm_offset
+ i
,
554 offset_reg
, nir_imm_int(b
, 0));
560 emit_gs_prolog(struct v3d_compile
*c
, nir_builder
*b
,
561 nir_function_impl
*impl
,
562 struct v3d_nir_lower_io_state
*state
)
564 nir_block
*first
= nir_start_block(impl
);
565 b
->cursor
= nir_before_block(first
);
567 const struct glsl_type
*uint_type
= glsl_uint_type();
569 assert(!state
->gs
.output_offset_var
);
570 state
->gs
.output_offset_var
=
571 nir_local_variable_create(impl
, uint_type
, "output_offset");
572 nir_store_var(b
, state
->gs
.output_offset_var
,
573 nir_imm_int(b
, state
->gs
.output_header_size
), 0x1);
575 assert(!state
->gs
.header_offset_var
);
576 state
->gs
.header_offset_var
=
577 nir_local_variable_create(impl
, uint_type
, "header_offset");
578 nir_store_var(b
, state
->gs
.header_offset_var
, nir_imm_int(b
, 1), 0x1);
580 assert(!state
->gs
.header_var
);
581 state
->gs
.header_var
=
582 nir_local_variable_create(impl
, uint_type
, "header");
583 reset_gs_header(b
, state
);
587 emit_gs_vpm_output_header_prolog(struct v3d_compile
*c
, nir_builder
*b
,
588 struct v3d_nir_lower_io_state
*state
)
590 const uint8_t VERTEX_COUNT_OFFSET
= 16;
592 /* Our GS header has 1 generic header slot (at VPM offset 0) and then
593 * one slot per output vertex after it. This means we don't need to
594 * have a variable just to keep track of the number of vertices we
595 * emitted and instead we can just compute it here from the header
596 * offset variable by removing the one generic header slot that always
597 * goes at the begining of out header.
599 nir_ssa_def
*header_offset
=
600 nir_load_var(b
, state
->gs
.header_offset_var
);
601 nir_ssa_def
*vertex_count
=
602 nir_isub(b
, header_offset
, nir_imm_int(b
, 1));
603 nir_ssa_def
*header
=
604 nir_ior(b
, nir_imm_int(b
, state
->gs
.output_header_size
),
605 nir_ishl(b
, vertex_count
,
606 nir_imm_int(b
, VERTEX_COUNT_OFFSET
)));
608 v3d_nir_store_output(b
, 0, NULL
, header
);
612 v3d_nir_lower_io(nir_shader
*s
, struct v3d_compile
*c
)
614 struct v3d_nir_lower_io_state state
= { 0 };
616 /* Set up the layout of the VPM outputs. */
617 switch (s
->info
.stage
) {
618 case MESA_SHADER_VERTEX
:
619 v3d_nir_setup_vpm_layout_vs(c
, &state
);
621 case MESA_SHADER_GEOMETRY
:
622 v3d_nir_setup_vpm_layout_gs(c
, &state
);
624 case MESA_SHADER_FRAGMENT
:
625 case MESA_SHADER_COMPUTE
:
628 unreachable("Unsupported shader stage");
631 nir_foreach_function(function
, s
) {
632 if (function
->impl
) {
634 nir_builder_init(&b
, function
->impl
);
636 if (c
->s
->info
.stage
== MESA_SHADER_GEOMETRY
)
637 emit_gs_prolog(c
, &b
, function
->impl
, &state
);
639 nir_foreach_block(block
, function
->impl
) {
640 nir_foreach_instr_safe(instr
, block
)
641 v3d_nir_lower_io_instr(c
, &b
, instr
,
645 nir_block
*last
= nir_impl_last_block(function
->impl
);
646 b
.cursor
= nir_after_block(last
);
647 if (s
->info
.stage
== MESA_SHADER_VERTEX
) {
648 v3d_nir_emit_ff_vpm_outputs(c
, &b
, &state
);
649 } else if (s
->info
.stage
== MESA_SHADER_GEOMETRY
) {
650 emit_gs_vpm_output_header_prolog(c
, &b
, &state
);
653 nir_metadata_preserve(function
->impl
,
654 nir_metadata_block_index
|
655 nir_metadata_dominance
);
659 if (s
->info
.stage
== MESA_SHADER_VERTEX
||
660 s
->info
.stage
== MESA_SHADER_GEOMETRY
) {
661 v3d_nir_lower_io_update_output_var_base(c
, &state
);