r600/sfn: remove pointless check
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_vertexstageexport.cpp
1 #include "sfn_vertexstageexport.h"
2
3 #include "sfn_shaderio.h"
4
5 namespace r600 {
6
7 using std::priority_queue;
8
9 VertexStageExportBase::VertexStageExportBase(VertexStage& proc):
10 m_proc(proc),
11 m_cur_clip_pos(1),
12 m_cur_param(0)
13 {
14
15 }
16
17 VertexStageExportBase::~VertexStageExportBase()
18 {
19
20 }
21
22 VertexStageExportForFS::VertexStageExportForFS(VertexStage& proc,
23 const pipe_stream_output_info *so_info,
24 r600_pipe_shader *pipe_shader, const r600_shader_key &key):
25 VertexStageExportBase(proc),
26 m_last_param_export(nullptr),
27 m_last_pos_export(nullptr),
28 m_num_clip_dist(0),
29 m_enabled_stream_buffers_mask(0),
30 m_so_info(so_info),
31 m_pipe_shader(pipe_shader),
32 m_key(key)
33 {
34 }
35
36 void VertexStageExportBase::setup_paramn_map()
37 {
38 priority_queue<int, std::vector<int>, std::greater<int>> q;
39 for (auto a: m_param_map) {
40 q.push(a.first);
41 }
42
43 int next_param = 0;
44 while (!q.empty()) {
45 int loc = q.top();
46 q.pop();
47 m_param_map[loc] = next_param++;
48 }
49 }
50
51 bool VertexStageExportBase::do_process_outputs(nir_variable *output)
52 {
53 if (output->data.location == VARYING_SLOT_COL0 ||
54 output->data.location == VARYING_SLOT_COL1 ||
55 (output->data.location >= VARYING_SLOT_VAR0 &&
56 output->data.location <= VARYING_SLOT_VAR31) ||
57 (output->data.location >= VARYING_SLOT_TEX0 &&
58 output->data.location <= VARYING_SLOT_TEX7) ||
59 output->data.location == VARYING_SLOT_BFC0 ||
60 output->data.location == VARYING_SLOT_BFC1 ||
61 output->data.location == VARYING_SLOT_CLIP_VERTEX ||
62 output->data.location == VARYING_SLOT_CLIP_DIST0 ||
63 output->data.location == VARYING_SLOT_CLIP_DIST1 ||
64 output->data.location == VARYING_SLOT_POS ||
65 output->data.location == VARYING_SLOT_PSIZ ||
66 output->data.location == VARYING_SLOT_FOGC ||
67 output->data.location == VARYING_SLOT_LAYER ||
68 output->data.location == VARYING_SLOT_EDGE ||
69 output->data.location == VARYING_SLOT_VIEWPORT
70 ) {
71
72 r600_shader_io& io = m_proc.sh_info().output[output->data.driver_location];
73 auto semantic = r600_get_varying_semantic(output->data.location);
74 io.name = semantic.first;
75 io.sid = semantic.second;
76
77 m_proc.evaluate_spi_sid(io);
78 io.write_mask = ((1 << glsl_get_components(output->type)) - 1)
79 << output->data.location_frac;
80 ++m_proc.sh_info().noutput;
81
82 if (output->data.location == VARYING_SLOT_PSIZ ||
83 output->data.location == VARYING_SLOT_EDGE ||
84 output->data.location == VARYING_SLOT_LAYER) // VIEWPORT?
85 m_cur_clip_pos = 2;
86
87 if (output->data.location != VARYING_SLOT_POS &&
88 output->data.location != VARYING_SLOT_EDGE &&
89 output->data.location != VARYING_SLOT_PSIZ &&
90 output->data.location != VARYING_SLOT_CLIP_VERTEX)
91 m_param_map[output->data.location] = m_cur_param++;
92
93 return true;
94 }
95 return false;
96 }
97
98
99 bool VertexStageExportForFS::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
100 {
101
102 switch (out_var->data.location) {
103 case VARYING_SLOT_PSIZ:
104 m_proc.sh_info().vs_out_point_size = 1;
105 m_proc.sh_info().vs_out_misc_write = 1;
106 /* fallthrough */
107 case VARYING_SLOT_POS:
108 return emit_varying_pos(out_var, instr);
109 case VARYING_SLOT_EDGE: {
110 std::array<uint32_t, 4> swizzle_override = {7 ,0, 7, 7};
111 return emit_varying_pos(out_var, instr, &swizzle_override);
112 }
113 case VARYING_SLOT_VIEWPORT: {
114 std::array<uint32_t, 4> swizzle_override = {7, 7, 7, 0};
115 return emit_varying_pos(out_var, instr, &swizzle_override) &&
116 emit_varying_param(out_var, instr);
117 }
118 case VARYING_SLOT_CLIP_VERTEX:
119 return emit_clip_vertices(out_var, instr);
120 case VARYING_SLOT_CLIP_DIST0:
121 case VARYING_SLOT_CLIP_DIST1:
122 m_num_clip_dist += 4;
123 return emit_varying_param(out_var, instr) && emit_varying_pos(out_var, instr);
124 case VARYING_SLOT_LAYER: {
125 m_proc.sh_info().vs_out_misc_write = 1;
126 m_proc.sh_info().vs_out_layer = 1;
127 std::array<uint32_t, 4> swz = {7,7,0,7};
128 return emit_varying_pos(out_var, instr, &swz) &&
129 emit_varying_param(out_var, instr);
130 }
131 case VARYING_SLOT_VIEW_INDEX:
132 return emit_varying_pos(out_var, instr) &&
133 emit_varying_param(out_var, instr);
134
135 default:
136 return emit_varying_param(out_var, instr);
137 }
138
139 fprintf(stderr, "r600-NIR: Unimplemented store_deref for %d\n",
140 out_var->data.location);
141 return false;
142 }
143
144 bool VertexStageExportForFS::emit_varying_pos(const nir_variable *out_var, nir_intrinsic_instr* instr,
145 std::array<uint32_t, 4> *swizzle_override)
146 {
147 std::array<uint32_t,4> swizzle;
148 uint32_t write_mask = 0;
149
150 if (swizzle_override) {
151 swizzle = *swizzle_override;
152 for (int i = 0; i < 4; ++i) {
153 if (swizzle[i] < 6)
154 write_mask |= 1 << i;
155 }
156 } else {
157 write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac;
158 for (int i = 0; i < 4; ++i)
159 swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7;
160 }
161
162 m_proc.sh_info().output[out_var->data.driver_location].write_mask = write_mask;
163
164 GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle);
165 m_proc.set_output(out_var->data.driver_location, value.sel());
166
167 int export_slot = 0;
168
169 switch (out_var->data.location) {
170 case VARYING_SLOT_EDGE: {
171 m_proc.sh_info().vs_out_misc_write = 1;
172 m_proc.sh_info().vs_out_edgeflag = 1;
173 m_proc.emit_instruction(op1_mov, value.reg_i(1), {value.reg_i(1)}, {alu_write, alu_dst_clamp, alu_last_instr});
174 m_proc.emit_instruction(op1_flt_to_int, value.reg_i(1), {value.reg_i(1)}, {alu_write, alu_last_instr});
175 m_proc.sh_info().output[out_var->data.driver_location].write_mask = 0xf;
176 }
177 /* fallthrough */
178 case VARYING_SLOT_PSIZ:
179 case VARYING_SLOT_LAYER:
180 export_slot = 1;
181 break;
182 case VARYING_SLOT_VIEWPORT:
183 m_proc.sh_info().vs_out_misc_write = 1;
184 m_proc.sh_info().vs_out_viewport = 1;
185 export_slot = 1;
186 break;
187 case VARYING_SLOT_POS:
188 break;
189 case VARYING_SLOT_CLIP_DIST0:
190 case VARYING_SLOT_CLIP_DIST1:
191 export_slot = m_cur_clip_pos++;
192 break;
193 default:
194 sfn_log << SfnLog::err << __func__ << "Unsupported location "
195 << out_var->data.location << "\n";
196 return false;
197 }
198
199 m_last_pos_export = new ExportInstruction(export_slot, value, ExportInstruction::et_pos);
200 m_proc.emit_export_instruction(m_last_pos_export);
201 m_proc.add_param_output_reg(out_var->data.driver_location, m_last_pos_export->gpr_ptr());
202 return true;
203 }
204
205 bool VertexStageExportForFS::emit_varying_param(const nir_variable *out_var, nir_intrinsic_instr* instr)
206 {
207 assert(out_var->data.driver_location < m_proc.sh_info().noutput);
208 sfn_log << SfnLog::io << __func__ << ": emit DDL: " << out_var->data.driver_location << "\n";
209
210 int write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac;
211 std::array<uint32_t,4> swizzle;
212 for (int i = 0; i < 4; ++i)
213 swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7;
214
215 m_proc.sh_info().output[out_var->data.driver_location].write_mask = write_mask;
216
217 GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle);
218 m_proc.sh_info().output[out_var->data.driver_location].gpr = value.sel();
219
220 /* This should use the registers!! */
221 m_proc.set_output(out_var->data.driver_location, value.sel());
222
223 auto param_loc = m_param_map.find(out_var->data.location);
224 assert(param_loc != m_param_map.end());
225
226 m_last_param_export = new ExportInstruction(param_loc->second, value, ExportInstruction::et_param);
227 m_proc.emit_export_instruction(m_last_param_export);
228 m_proc.add_param_output_reg(out_var->data.driver_location, m_last_param_export->gpr_ptr());
229 return true;
230 }
231
232 bool VertexStageExportForFS::emit_clip_vertices(const nir_variable *out_var, nir_intrinsic_instr* instr)
233 {
234 m_proc.sh_info().cc_dist_mask = 0xff;
235 m_proc.sh_info().clip_dist_write = 0xff;
236
237 GPRVector clip_vertex = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], 0xf, {0,1,2,3});
238
239 for (int i = 0; i < 4; ++i)
240 m_proc.sh_info().output[out_var->data.driver_location].write_mask |= 1 << i;
241
242 GPRVector clip_dist[2] = { m_proc.get_temp_vec4(), m_proc.get_temp_vec4()};
243
244 for (int i = 0; i < 8; i++) {
245 int oreg = i >> 2;
246 int ochan = i & 3;
247 AluInstruction *ir = nullptr;
248 for (int j = 0; j < 4; j++) {
249 ir = new AluInstruction(op2_dot4_ieee, clip_dist[oreg].reg_i(j), clip_vertex.reg_i(j),
250 PValue(new UniformValue(512 + i, j, R600_BUFFER_INFO_CONST_BUFFER)),
251 (j == ochan) ? EmitInstruction::write : EmitInstruction::empty);
252 m_proc.emit_instruction(ir);
253 }
254 ir->set_flag(alu_last_instr);
255 }
256
257 m_last_pos_export = new ExportInstruction(m_cur_clip_pos++, clip_dist[0], ExportInstruction::et_pos);
258 m_proc.emit_export_instruction(m_last_pos_export);
259
260 m_last_pos_export = new ExportInstruction(m_cur_clip_pos, clip_dist[1], ExportInstruction::et_pos);
261 m_proc.emit_export_instruction(m_last_pos_export);
262
263 return true;
264 }
265
266 void VertexStageExportForFS::finalize_exports()
267 {
268 if (m_key.vs.as_gs_a) {
269 PValue o(new GPRValue(0,PIPE_SWIZZLE_0));
270 GPRVector primid({m_proc.primitive_id(), o,o,o});
271 m_last_param_export = new ExportInstruction(m_cur_param, primid, ExportInstruction::et_param);
272 m_proc.emit_export_instruction(m_last_param_export);
273 int i;
274 i = m_proc.sh_info().noutput++;
275 auto& io = m_proc.sh_info().output[i];
276 io.name = TGSI_SEMANTIC_PRIMID;
277 io.sid = 0;
278 io.gpr = 0;
279 io.interpolate = TGSI_INTERPOLATE_CONSTANT;
280 io.write_mask = 0x1;
281 io.spi_sid = m_key.vs.prim_id_out;
282 m_proc.sh_info().vs_as_gs_a = 1;
283 }
284
285 if (m_so_info && m_so_info->num_outputs)
286 emit_stream(-1);
287
288 m_pipe_shader->enabled_stream_buffers_mask = m_enabled_stream_buffers_mask;
289
290 if (!m_last_param_export) {
291 GPRVector value(0,{7,7,7,7});
292 m_last_param_export = new ExportInstruction(0, value, ExportInstruction::et_param);
293 m_proc.emit_export_instruction(m_last_param_export);
294 }
295 m_last_param_export->set_last();
296
297 if (!m_last_pos_export) {
298 GPRVector value(0,{7,7,7,7});
299 m_last_pos_export = new ExportInstruction(0, value, ExportInstruction::et_pos);
300 m_proc.emit_export_instruction(m_last_pos_export);
301 }
302 m_last_pos_export->set_last();
303 }
304
305 bool VertexStageExportForFS::emit_stream(int stream)
306 {
307 assert(m_so_info);
308 if (m_so_info->num_outputs > PIPE_MAX_SO_OUTPUTS) {
309 R600_ERR("Too many stream outputs: %d\n", m_so_info->num_outputs);
310 return false;
311 }
312 for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
313 if (m_so_info->output[i].output_buffer >= 4) {
314 R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
315 m_so_info->output[i].output_buffer);
316 return false;
317 }
318 }
319 const GPRVector *so_gpr[PIPE_MAX_SHADER_OUTPUTS];
320 unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS];
321 std::vector<GPRVector> tmp(m_so_info->num_outputs);
322
323 /* Initialize locations where the outputs are stored. */
324 for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
325 if (stream != -1 && stream != m_so_info->output[i].stream)
326 continue;
327
328 sfn_log << SfnLog::instr << "Emit stream " << i
329 << " with register index " << m_so_info->output[i].register_index << " so_gpr:";
330
331
332 so_gpr[i] = m_proc.output_register(m_so_info->output[i].register_index);
333
334 if (!so_gpr[i]) {
335 sfn_log << SfnLog::err << "\nERR: register index "
336 << m_so_info->output[i].register_index
337 << " doesn't correspond to an output register\n";
338 return false;
339 }
340 start_comp[i] = m_so_info->output[i].start_component;
341 /* Lower outputs with dst_offset < start_component.
342 *
343 * We can only output 4D vectors with a write mask, e.g. we can
344 * only output the W component at offset 3, etc. If we want
345 * to store Y, Z, or W at buffer offset 0, we need to use MOV
346 * to move it to X and output X. */
347 if (m_so_info->output[i].dst_offset < m_so_info->output[i].start_component) {
348 int tmp_index = m_proc.allocate_temp_register();
349 int sc = m_so_info->output[i].start_component;
350 AluInstruction *alu = nullptr;
351 for (int j = 0; j < m_so_info->output[i].num_components; j++) {
352 PValue dst(new GPRValue(tmp_index, j));
353 alu = new AluInstruction(op1_mov, dst, so_gpr[i]->reg_i(j + sc), {alu_write});
354 tmp[i].set_reg_i(j, dst);
355 m_proc.emit_instruction(alu);
356 }
357 if (alu)
358 alu->set_flag(alu_last_instr);
359
360 /* Fill the vector with masked values */
361 PValue dst_blank(new GPRValue(tmp_index, 7));
362 for (int j = m_so_info->output[i].num_components; j < 4; j++)
363 tmp[i].set_reg_i(j, dst_blank);
364
365 start_comp[i] = 0;
366 so_gpr[i] = &tmp[i];
367 }
368 sfn_log << SfnLog::instr << *so_gpr[i] << "\n";
369 }
370
371 /* Write outputs to buffers. */
372 for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
373 sfn_log << SfnLog::instr << "Write output buffer " << i
374 << " with register index " << m_so_info->output[i].register_index << "\n";
375
376 StreamOutIntruction *out_stream =
377 new StreamOutIntruction(*so_gpr[i],
378 m_so_info->output[i].num_components,
379 m_so_info->output[i].dst_offset - start_comp[i],
380 ((1 << m_so_info->output[i].num_components) - 1) << start_comp[i],
381 m_so_info->output[i].output_buffer,
382 m_so_info->output[i].stream);
383 m_proc.emit_export_instruction(out_stream);
384 m_enabled_stream_buffers_mask |= (1 << m_so_info->output[i].output_buffer) << m_so_info->output[i].stream * 4;
385 }
386 return true;
387 }
388
389
390 VertexStageExportForGS::VertexStageExportForGS(VertexStage &proc,
391 const r600_shader *gs_shader):
392 VertexStageExportBase(proc),
393 m_gs_shader(gs_shader)
394 {
395
396 }
397
398 bool VertexStageExportForGS::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
399 {
400
401 int ring_offset = -1;
402 const r600_shader_io& out_io = m_proc.sh_info().output[out_var->data.driver_location];
403
404 sfn_log << SfnLog::io << "check output " << out_var->data.driver_location
405 << " name=" << out_io.name<< " sid=" << out_io.sid << "\n";
406 for (unsigned k = 0; k < m_gs_shader->ninput; ++k) {
407 auto& in_io = m_gs_shader->input[k];
408 sfn_log << SfnLog::io << " against " << k << " name=" << in_io.name<< " sid=" << in_io.sid << "\n";
409
410 if (in_io.name == out_io.name &&
411 in_io.sid == out_io.sid) {
412 ring_offset = in_io.ring_offset;
413 break;
414 }
415 }
416
417 if (out_var->data.location == VARYING_SLOT_VIEWPORT) {
418 m_proc.sh_info().vs_out_viewport = 1;
419 m_proc.sh_info().vs_out_misc_write = 1;
420 return true;
421 }
422
423 if (ring_offset == -1) {
424 sfn_log << SfnLog::err << "VS defines output at "
425 << out_var->data.driver_location << "name=" << out_io.name
426 << " sid=" << out_io.sid << " that is not consumed as GS input\n";
427 return true;
428 }
429
430 uint32_t write_mask = (1 << instr->num_components) - 1;
431
432 GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask,
433 swizzle_from_comps(instr->num_components));
434
435 auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write, value,
436 ring_offset >> 2, 4, PValue());
437 m_proc.emit_export_instruction(ir);
438
439 m_proc.sh_info().output[out_var->data.driver_location].write_mask |= write_mask;
440 if (out_var->data.location == VARYING_SLOT_CLIP_DIST0 ||
441 out_var->data.location == VARYING_SLOT_CLIP_DIST1)
442 m_num_clip_dist += 4;
443
444 return true;
445 }
446
447 void VertexStageExportForGS::finalize_exports()
448 {
449
450 }
451
452 VertexStageExportForES::VertexStageExportForES(VertexStage& proc):
453 VertexStageExportBase(proc)
454 {
455 }
456
457 bool VertexStageExportForES::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
458 {
459 return true;
460 }
461
462 void VertexStageExportForES::finalize_exports()
463 {
464
465 }
466
467 }