nir/opt_vectorize: Add a callback for filtering of vectorizing.
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_shader_geometry.cpp
1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_shader_geometry.h"
28 #include "sfn_instruction_misc.h"
29 #include "sfn_instruction_fetch.h"
30 #include "sfn_shaderio.h"
31
32 namespace r600 {
33
34 GeometryShaderFromNir::GeometryShaderFromNir(r600_pipe_shader *sh,
35 r600_pipe_shader_selector &sel,
36 const r600_shader_key &key,
37 enum chip_class chip_class):
38 VertexStage(PIPE_SHADER_GEOMETRY, sel, sh->shader,
39 sh->scratch_space_needed, chip_class, key.gs.first_atomic_counter),
40 m_pipe_shader(sh),
41 m_so_info(&sel.so),
42 m_first_vertex_emitted(false),
43 m_offset(0),
44 m_next_input_ring_offset(0),
45 m_key(key),
46 m_num_clip_dist(0),
47 m_cur_ring_output(0),
48 m_gs_tri_strip_adj_fix(false)
49 {
50 sh_info().atomic_base = key.gs.first_atomic_counter;
51 }
52
53 bool GeometryShaderFromNir::do_emit_load_deref(UNUSED const nir_variable *in_var, UNUSED nir_intrinsic_instr* instr)
54 {
55 return false;
56 }
57
58 bool GeometryShaderFromNir::do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
59 {
60 uint32_t write_mask = nir_intrinsic_write_mask(instr);
61 GPRVector::Swizzle swz = swizzle_from_mask(write_mask);
62 auto out_value = vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swz, true);
63
64 sh_info().output[out_var->data.driver_location].write_mask = write_mask;
65
66 auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write_ind, out_value,
67 4 * out_var->data.driver_location,
68 instr->num_components, m_export_base);
69
70 streamout_data[out_var->data.location] = ir;
71
72 return true;
73 }
74
75 bool GeometryShaderFromNir::scan_sysvalue_access(UNUSED nir_instr *instr)
76 {
77 return true;
78 }
79
80 bool GeometryShaderFromNir::do_process_inputs(nir_variable *input)
81 {
82
83 if (input->data.location == VARYING_SLOT_POS ||
84 input->data.location == VARYING_SLOT_PSIZ ||
85 input->data.location == VARYING_SLOT_FOGC ||
86 input->data.location == VARYING_SLOT_CLIP_VERTEX ||
87 input->data.location == VARYING_SLOT_CLIP_DIST0 ||
88 input->data.location == VARYING_SLOT_CLIP_DIST1 ||
89 input->data.location == VARYING_SLOT_COL0 ||
90 input->data.location == VARYING_SLOT_COL1 ||
91 input->data.location == VARYING_SLOT_BFC0 ||
92 input->data.location == VARYING_SLOT_BFC1 ||
93 input->data.location == VARYING_SLOT_PNTC ||
94 (input->data.location >= VARYING_SLOT_VAR0 &&
95 input->data.location <= VARYING_SLOT_VAR31) ||
96 (input->data.location >= VARYING_SLOT_TEX0 &&
97 input->data.location <= VARYING_SLOT_TEX7)) {
98
99 r600_shader_io& io = sh_info().input[input->data.driver_location];
100 auto semantic = r600_get_varying_semantic(input->data.location);
101 io.name = semantic.first;
102 io.sid = semantic.second;
103
104 io.ring_offset = 16 * input->data.driver_location;
105 ++sh_info().ninput;
106 m_next_input_ring_offset += 16;
107 return true;
108 }
109
110 return false;
111 }
112
113 bool GeometryShaderFromNir::do_process_outputs(nir_variable *output)
114 {
115 if (output->data.location == VARYING_SLOT_COL0 ||
116 output->data.location == VARYING_SLOT_COL1 ||
117 (output->data.location >= VARYING_SLOT_VAR0 &&
118 output->data.location <= VARYING_SLOT_VAR31) ||
119 (output->data.location >= VARYING_SLOT_TEX0 &&
120 output->data.location <= VARYING_SLOT_TEX7) ||
121 output->data.location == VARYING_SLOT_BFC0 ||
122 output->data.location == VARYING_SLOT_BFC1 ||
123 output->data.location == VARYING_SLOT_PNTC ||
124 output->data.location == VARYING_SLOT_CLIP_VERTEX ||
125 output->data.location == VARYING_SLOT_CLIP_DIST0 ||
126 output->data.location == VARYING_SLOT_CLIP_DIST1 ||
127 output->data.location == VARYING_SLOT_PRIMITIVE_ID ||
128 output->data.location == VARYING_SLOT_POS ||
129 output->data.location == VARYING_SLOT_PSIZ ||
130 output->data.location == VARYING_SLOT_LAYER ||
131 output->data.location == VARYING_SLOT_VIEWPORT ||
132 output->data.location == VARYING_SLOT_FOGC) {
133 r600_shader_io& io = sh_info().output[output->data.driver_location];
134
135 auto semantic = r600_get_varying_semantic(output->data.location);
136 io.name = semantic.first;
137 io.sid = semantic.second;
138
139 evaluate_spi_sid(io);
140 ++sh_info().noutput;
141
142 if (output->data.location == VARYING_SLOT_CLIP_DIST0 ||
143 output->data.location == VARYING_SLOT_CLIP_DIST1) {
144 m_num_clip_dist += 4;
145 }
146
147 if (output->data.location == VARYING_SLOT_VIEWPORT) {
148 sh_info().vs_out_viewport = 1;
149 sh_info().vs_out_misc_write = 1;
150 }
151 return true;
152 }
153 return false;
154 }
155
156
157 bool GeometryShaderFromNir::do_allocate_reserved_registers()
158 {
159 const int sel[6] = {0, 0 ,0, 1, 1, 1};
160 const int chan[6] = {0, 1 ,3, 0, 1, 2};
161
162 increment_reserved_registers();
163 increment_reserved_registers();
164
165 /* Reserve registers used by the shaders (should check how many
166 * components are actually used */
167 for (int i = 0; i < 6; ++i) {
168 auto reg = new GPRValue(sel[i], chan[i]);
169 reg->set_as_input();
170 m_per_vertex_offsets[i].reset(reg);
171 inject_register(sel[i], chan[i], m_per_vertex_offsets[i], false);
172 }
173 auto reg = new GPRValue(0, 2);
174 reg->set_as_input();
175 m_primitive_id.reset(reg);
176 inject_register(0, 2, m_primitive_id, false);
177
178 reg = new GPRValue(1, 3);
179 reg->set_as_input();
180 m_invocation_id.reset(reg);
181 inject_register(1, 3, m_invocation_id, false);
182
183 m_export_base = get_temp_register();
184 emit_instruction(new AluInstruction(op1_mov, m_export_base, Value::zero, {alu_write, alu_last_instr}));
185
186 sh_info().ring_item_sizes[0] = m_next_input_ring_offset;
187
188 if (m_key.gs.tri_strip_adj_fix)
189 emit_adj_fix();
190
191 return true;
192 }
193
194 void GeometryShaderFromNir::emit_adj_fix()
195 {
196 PValue adjhelp0(new GPRValue(m_export_base->sel(), 1));
197 emit_instruction(op2_and_int, adjhelp0, {m_primitive_id, Value::one_i}, {alu_write, alu_last_instr});
198
199 int help2 = allocate_temp_register();
200 int reg_indices[6];
201 int reg_chanels[6] = {0, 1, 2, 3, 2, 3};
202
203 int rotate_indices[6] = {4, 5, 0, 1, 2, 3};
204
205 reg_indices[0] = reg_indices[1] = reg_indices[2] = reg_indices[3] = help2;
206 reg_indices[4] = reg_indices[5] = m_export_base->sel();
207
208 std::array<PValue, 6> adjhelp;
209
210 AluInstruction *ir = nullptr;
211 for (int i = 0; i < 6; i++) {
212 adjhelp[i].reset(new GPRValue(reg_indices[i], reg_chanels[i]));
213 ir = new AluInstruction(op3_cnde_int, adjhelp[i],
214 {adjhelp0, m_per_vertex_offsets[i],
215 m_per_vertex_offsets[rotate_indices[i]]},
216 {alu_write});
217 if (i == 3)
218 ir->set_flag(alu_last_instr);
219 emit_instruction(ir);
220 }
221 ir->set_flag(alu_last_instr);
222
223 for (int i = 0; i < 6; i++)
224 m_per_vertex_offsets[i] = adjhelp[i];
225 }
226
227 bool GeometryShaderFromNir::emit_deref_instruction_override(nir_deref_instr* instr)
228 {
229 if (instr->deref_type == nir_deref_type_array) {
230 auto var = get_deref_location(instr->parent);
231 ArrayDeref ad = {var, &instr->arr.index};
232 assert(instr->dest.is_ssa);
233 m_in_array_deref[instr->dest.ssa.index] = ad;
234
235 /* Problem: nir_intrinsice_load_deref tries to lookup the
236 * variable, and will not find it, need to override that too */
237 return true;
238 }
239 return false;
240 }
241
242 bool GeometryShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
243 {
244 switch (instr->intrinsic) {
245 case nir_intrinsic_load_deref: {
246 auto& src = instr->src[0];
247 assert(src.is_ssa);
248 auto array = m_in_array_deref.find(src.ssa->index);
249 if (array != m_in_array_deref.end())
250 return emit_load_from_array(instr, array->second);
251 } break;
252 case nir_intrinsic_emit_vertex:
253 return emit_vertex(instr, false);
254 case nir_intrinsic_end_primitive:
255 return emit_vertex(instr, true);
256 case nir_intrinsic_load_primitive_id:
257 return load_preloaded_value(instr->dest, 0, m_primitive_id);
258 case nir_intrinsic_load_invocation_id:
259 return load_preloaded_value(instr->dest, 0, m_invocation_id);
260 default:
261 ;
262 }
263 return false;
264 }
265
266 bool GeometryShaderFromNir::emit_vertex(nir_intrinsic_instr* instr, bool cut)
267 {
268 int stream = nir_intrinsic_stream_id(instr);
269 assert(stream < 4);
270
271 for(auto v: streamout_data) {
272 if (stream == 0 || v.first != VARYING_SLOT_POS) {
273 v.second->patch_ring(stream);
274 emit_instruction(v.second);
275 } else
276 delete v.second;
277 }
278 streamout_data.clear();
279 emit_instruction(new EmitVertex(stream, cut));
280
281 if (!cut)
282 emit_instruction(new AluInstruction(op2_add_int, m_export_base, m_export_base,
283 PValue(new LiteralValue(sh_info().noutput)),
284 {alu_write, alu_last_instr}));
285
286 return true;
287 }
288
289 bool GeometryShaderFromNir::emit_load_from_array(nir_intrinsic_instr* instr,
290 const ArrayDeref& array_deref)
291 {
292 auto dest = vec_from_nir(instr->dest, instr->num_components);
293
294 const nir_load_const_instr* literal_index = nullptr;
295
296 if (array_deref.index->is_ssa)
297 literal_index = get_literal_constant(array_deref.index->ssa->index);
298
299 if (!literal_index) {
300 sfn_log << SfnLog::err << "GS: Indirect input addressing not (yet) supported\n";
301 return false;
302 }
303 assert(literal_index->value[0].u32 < 6);
304 PValue addr = m_per_vertex_offsets[literal_index->value[0].u32];
305
306 auto fetch = new FetchInstruction(vc_fetch, no_index_offset, dest, addr,
307 16 * array_deref.var->data.driver_location,
308 R600_GS_RING_CONST_BUFFER, PValue(), bim_none, true);
309 emit_instruction(fetch);
310 return true;
311 }
312
313 void GeometryShaderFromNir::do_finalize()
314 {
315 if (m_num_clip_dist) {
316 sh_info().cc_dist_mask = (1 << m_num_clip_dist) - 1;
317 sh_info().clip_dist_write = (1 << m_num_clip_dist) - 1;
318 }
319 }
320
321 }