3 * Copyright (c) 2018 Collabora LTD
5 * Author: Gert Wollny <gert.wollny@collabora.com>
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
27 #include "sfn_shader_geometry.h"
28 #include "sfn_instruction_misc.h"
29 #include "sfn_instruction_fetch.h"
30 #include "sfn_shaderio.h"
34 GeometryShaderFromNir::GeometryShaderFromNir(r600_pipe_shader
*sh
,
35 r600_pipe_shader_selector
&sel
,
36 const r600_shader_key
&key
,
37 enum chip_class chip_class
):
38 VertexStage(PIPE_SHADER_GEOMETRY
, sel
, sh
->shader
,
39 sh
->scratch_space_needed
, chip_class
, key
.gs
.first_atomic_counter
),
42 m_first_vertex_emitted(false),
44 m_next_input_ring_offset(0),
48 m_gs_tri_strip_adj_fix(false)
50 sh_info().atomic_base
= key
.gs
.first_atomic_counter
;
53 bool GeometryShaderFromNir::do_emit_load_deref(UNUSED
const nir_variable
*in_var
, UNUSED nir_intrinsic_instr
* instr
)
58 bool GeometryShaderFromNir::do_emit_store_deref(const nir_variable
*out_var
, nir_intrinsic_instr
* instr
)
60 uint32_t write_mask
= nir_intrinsic_write_mask(instr
);
61 GPRVector::Swizzle swz
= swizzle_from_mask(write_mask
);
62 auto out_value
= vec_from_nir_with_fetch_constant(instr
->src
[1], write_mask
, swz
, true);
64 sh_info().output
[out_var
->data
.driver_location
].write_mask
= write_mask
;
66 auto ir
= new MemRingOutIntruction(cf_mem_ring
, mem_write_ind
, out_value
,
67 4 * out_var
->data
.driver_location
,
68 instr
->num_components
, m_export_base
);
70 streamout_data
[out_var
->data
.location
] = ir
;
75 bool GeometryShaderFromNir::scan_sysvalue_access(UNUSED nir_instr
*instr
)
80 bool GeometryShaderFromNir::do_process_inputs(nir_variable
*input
)
83 if (input
->data
.location
== VARYING_SLOT_POS
||
84 input
->data
.location
== VARYING_SLOT_PSIZ
||
85 input
->data
.location
== VARYING_SLOT_FOGC
||
86 input
->data
.location
== VARYING_SLOT_CLIP_VERTEX
||
87 input
->data
.location
== VARYING_SLOT_CLIP_DIST0
||
88 input
->data
.location
== VARYING_SLOT_CLIP_DIST1
||
89 input
->data
.location
== VARYING_SLOT_COL0
||
90 input
->data
.location
== VARYING_SLOT_COL1
||
91 input
->data
.location
== VARYING_SLOT_BFC0
||
92 input
->data
.location
== VARYING_SLOT_BFC1
||
93 input
->data
.location
== VARYING_SLOT_PNTC
||
94 (input
->data
.location
>= VARYING_SLOT_VAR0
&&
95 input
->data
.location
<= VARYING_SLOT_VAR31
) ||
96 (input
->data
.location
>= VARYING_SLOT_TEX0
&&
97 input
->data
.location
<= VARYING_SLOT_TEX7
)) {
99 r600_shader_io
& io
= sh_info().input
[input
->data
.driver_location
];
100 auto semantic
= r600_get_varying_semantic(input
->data
.location
);
101 io
.name
= semantic
.first
;
102 io
.sid
= semantic
.second
;
104 io
.ring_offset
= 16 * input
->data
.driver_location
;
106 m_next_input_ring_offset
+= 16;
113 bool GeometryShaderFromNir::do_process_outputs(nir_variable
*output
)
115 if (output
->data
.location
== VARYING_SLOT_COL0
||
116 output
->data
.location
== VARYING_SLOT_COL1
||
117 (output
->data
.location
>= VARYING_SLOT_VAR0
&&
118 output
->data
.location
<= VARYING_SLOT_VAR31
) ||
119 (output
->data
.location
>= VARYING_SLOT_TEX0
&&
120 output
->data
.location
<= VARYING_SLOT_TEX7
) ||
121 output
->data
.location
== VARYING_SLOT_BFC0
||
122 output
->data
.location
== VARYING_SLOT_BFC1
||
123 output
->data
.location
== VARYING_SLOT_PNTC
||
124 output
->data
.location
== VARYING_SLOT_CLIP_VERTEX
||
125 output
->data
.location
== VARYING_SLOT_CLIP_DIST0
||
126 output
->data
.location
== VARYING_SLOT_CLIP_DIST1
||
127 output
->data
.location
== VARYING_SLOT_PRIMITIVE_ID
||
128 output
->data
.location
== VARYING_SLOT_POS
||
129 output
->data
.location
== VARYING_SLOT_PSIZ
||
130 output
->data
.location
== VARYING_SLOT_LAYER
||
131 output
->data
.location
== VARYING_SLOT_VIEWPORT
||
132 output
->data
.location
== VARYING_SLOT_FOGC
) {
133 r600_shader_io
& io
= sh_info().output
[output
->data
.driver_location
];
135 auto semantic
= r600_get_varying_semantic(output
->data
.location
);
136 io
.name
= semantic
.first
;
137 io
.sid
= semantic
.second
;
139 evaluate_spi_sid(io
);
142 if (output
->data
.location
== VARYING_SLOT_CLIP_DIST0
||
143 output
->data
.location
== VARYING_SLOT_CLIP_DIST1
) {
144 m_num_clip_dist
+= 4;
147 if (output
->data
.location
== VARYING_SLOT_VIEWPORT
) {
148 sh_info().vs_out_viewport
= 1;
149 sh_info().vs_out_misc_write
= 1;
157 bool GeometryShaderFromNir::do_allocate_reserved_registers()
159 const int sel
[6] = {0, 0 ,0, 1, 1, 1};
160 const int chan
[6] = {0, 1 ,3, 0, 1, 2};
162 increment_reserved_registers();
163 increment_reserved_registers();
165 /* Reserve registers used by the shaders (should check how many
166 * components are actually used */
167 for (int i
= 0; i
< 6; ++i
) {
168 auto reg
= new GPRValue(sel
[i
], chan
[i
]);
170 m_per_vertex_offsets
[i
].reset(reg
);
171 inject_register(sel
[i
], chan
[i
], m_per_vertex_offsets
[i
], false);
173 auto reg
= new GPRValue(0, 2);
175 m_primitive_id
.reset(reg
);
176 inject_register(0, 2, m_primitive_id
, false);
178 reg
= new GPRValue(1, 3);
180 m_invocation_id
.reset(reg
);
181 inject_register(1, 3, m_invocation_id
, false);
183 m_export_base
= get_temp_register();
184 emit_instruction(new AluInstruction(op1_mov
, m_export_base
, Value::zero
, {alu_write
, alu_last_instr
}));
186 sh_info().ring_item_sizes
[0] = m_next_input_ring_offset
;
188 if (m_key
.gs
.tri_strip_adj_fix
)
194 void GeometryShaderFromNir::emit_adj_fix()
196 PValue
adjhelp0(new GPRValue(m_export_base
->sel(), 1));
197 emit_instruction(op2_and_int
, adjhelp0
, {m_primitive_id
, Value::one_i
}, {alu_write
, alu_last_instr
});
199 int help2
= allocate_temp_register();
201 int reg_chanels
[6] = {0, 1, 2, 3, 2, 3};
203 int rotate_indices
[6] = {4, 5, 0, 1, 2, 3};
205 reg_indices
[0] = reg_indices
[1] = reg_indices
[2] = reg_indices
[3] = help2
;
206 reg_indices
[4] = reg_indices
[5] = m_export_base
->sel();
208 std::array
<PValue
, 6> adjhelp
;
210 AluInstruction
*ir
= nullptr;
211 for (int i
= 0; i
< 6; i
++) {
212 adjhelp
[i
].reset(new GPRValue(reg_indices
[i
], reg_chanels
[i
]));
213 ir
= new AluInstruction(op3_cnde_int
, adjhelp
[i
],
214 {adjhelp0
, m_per_vertex_offsets
[i
],
215 m_per_vertex_offsets
[rotate_indices
[i
]]},
218 ir
->set_flag(alu_last_instr
);
219 emit_instruction(ir
);
221 ir
->set_flag(alu_last_instr
);
223 for (int i
= 0; i
< 6; i
++)
224 m_per_vertex_offsets
[i
] = adjhelp
[i
];
227 bool GeometryShaderFromNir::emit_deref_instruction_override(nir_deref_instr
* instr
)
229 if (instr
->deref_type
== nir_deref_type_array
) {
230 auto var
= get_deref_location(instr
->parent
);
231 ArrayDeref ad
= {var
, &instr
->arr
.index
};
232 assert(instr
->dest
.is_ssa
);
233 m_in_array_deref
[instr
->dest
.ssa
.index
] = ad
;
235 /* Problem: nir_intrinsice_load_deref tries to lookup the
236 * variable, and will not find it, need to override that too */
242 bool GeometryShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr
* instr
)
244 switch (instr
->intrinsic
) {
245 case nir_intrinsic_load_deref
: {
246 auto& src
= instr
->src
[0];
248 auto array
= m_in_array_deref
.find(src
.ssa
->index
);
249 if (array
!= m_in_array_deref
.end())
250 return emit_load_from_array(instr
, array
->second
);
252 case nir_intrinsic_emit_vertex
:
253 return emit_vertex(instr
, false);
254 case nir_intrinsic_end_primitive
:
255 return emit_vertex(instr
, true);
256 case nir_intrinsic_load_primitive_id
:
257 return load_preloaded_value(instr
->dest
, 0, m_primitive_id
);
258 case nir_intrinsic_load_invocation_id
:
259 return load_preloaded_value(instr
->dest
, 0, m_invocation_id
);
266 bool GeometryShaderFromNir::emit_vertex(nir_intrinsic_instr
* instr
, bool cut
)
268 int stream
= nir_intrinsic_stream_id(instr
);
271 for(auto v
: streamout_data
) {
272 if (stream
== 0 || v
.first
!= VARYING_SLOT_POS
) {
273 v
.second
->patch_ring(stream
);
274 emit_instruction(v
.second
);
278 streamout_data
.clear();
279 emit_instruction(new EmitVertex(stream
, cut
));
282 emit_instruction(new AluInstruction(op2_add_int
, m_export_base
, m_export_base
,
283 PValue(new LiteralValue(sh_info().noutput
)),
284 {alu_write
, alu_last_instr
}));
289 bool GeometryShaderFromNir::emit_load_from_array(nir_intrinsic_instr
* instr
,
290 const ArrayDeref
& array_deref
)
292 auto dest
= vec_from_nir(instr
->dest
, instr
->num_components
);
294 const nir_load_const_instr
* literal_index
= nullptr;
296 if (array_deref
.index
->is_ssa
)
297 literal_index
= get_literal_constant(array_deref
.index
->ssa
->index
);
299 if (!literal_index
) {
300 sfn_log
<< SfnLog::err
<< "GS: Indirect input addressing not (yet) supported\n";
303 assert(literal_index
->value
[0].u32
< 6);
304 PValue addr
= m_per_vertex_offsets
[literal_index
->value
[0].u32
];
306 auto fetch
= new FetchInstruction(vc_fetch
, no_index_offset
, dest
, addr
,
307 16 * array_deref
.var
->data
.driver_location
,
308 R600_GS_RING_CONST_BUFFER
, PValue(), bim_none
, true);
309 emit_instruction(fetch
);
313 void GeometryShaderFromNir::do_finalize()
315 if (m_num_clip_dist
) {
316 sh_info().cc_dist_mask
= (1 << m_num_clip_dist
) - 1;
317 sh_info().clip_dist_write
= (1 << m_num_clip_dist
) - 1;