swr: fix build with mingw
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_vertexstageexport.cpp
1 #include "sfn_vertexstageexport.h"
2
3 #include "sfn_shaderio.h"
4
5 namespace r600 {
6
7 using std::priority_queue;
8
9 VertexStageExportBase::VertexStageExportBase(VertexStage& proc):
10 m_proc(proc),
11 m_cur_clip_pos(1),
12 m_cur_param(0)
13 {
14
15 }
16
17 VertexStageExportBase::~VertexStageExportBase()
18 {
19
20 }
21
22 VertexStageExportForFS::VertexStageExportForFS(VertexStage& proc,
23 const pipe_stream_output_info *so_info,
24 r600_pipe_shader *pipe_shader, const r600_shader_key &key):
25 VertexStageExportBase(proc),
26 m_last_param_export(nullptr),
27 m_last_pos_export(nullptr),
28 m_num_clip_dist(0),
29 m_enabled_stream_buffers_mask(0),
30 m_so_info(so_info),
31 m_pipe_shader(pipe_shader),
32 m_key(key)
33 {
34 }
35
36 bool VertexStageExportBase::do_process_outputs(nir_variable *output)
37 {
38 if (output->data.location == VARYING_SLOT_COL0 ||
39 output->data.location == VARYING_SLOT_COL1 ||
40 (output->data.location >= VARYING_SLOT_VAR0 &&
41 output->data.location <= VARYING_SLOT_VAR31) ||
42 (output->data.location >= VARYING_SLOT_TEX0 &&
43 output->data.location <= VARYING_SLOT_TEX7) ||
44 output->data.location == VARYING_SLOT_BFC0 ||
45 output->data.location == VARYING_SLOT_BFC1 ||
46 output->data.location == VARYING_SLOT_CLIP_VERTEX ||
47 output->data.location == VARYING_SLOT_CLIP_DIST0 ||
48 output->data.location == VARYING_SLOT_CLIP_DIST1 ||
49 output->data.location == VARYING_SLOT_POS ||
50 output->data.location == VARYING_SLOT_PSIZ ||
51 output->data.location == VARYING_SLOT_FOGC ||
52 output->data.location == VARYING_SLOT_LAYER ||
53 output->data.location == VARYING_SLOT_EDGE ||
54 output->data.location == VARYING_SLOT_VIEWPORT
55 ) {
56
57 r600_shader_io& io = m_proc.sh_info().output[output->data.driver_location];
58 auto semantic = r600_get_varying_semantic(output->data.location);
59 io.name = semantic.first;
60 io.sid = semantic.second;
61
62 m_proc.evaluate_spi_sid(io);
63 io.write_mask = ((1 << glsl_get_components(output->type)) - 1)
64 << output->data.location_frac;
65 ++m_proc.sh_info().noutput;
66
67 if (output->data.location == VARYING_SLOT_PSIZ ||
68 output->data.location == VARYING_SLOT_EDGE ||
69 output->data.location == VARYING_SLOT_LAYER) // VIEWPORT?
70 m_cur_clip_pos = 2;
71
72 if (output->data.location != VARYING_SLOT_POS &&
73 output->data.location != VARYING_SLOT_EDGE &&
74 output->data.location != VARYING_SLOT_PSIZ &&
75 output->data.location != VARYING_SLOT_CLIP_VERTEX)
76 m_param_map[output->data.location] = m_cur_param++;
77
78 return true;
79 }
80 return false;
81 }
82
83
84 bool VertexStageExportForFS::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
85 {
86
87 switch (out_var->data.location) {
88 case VARYING_SLOT_PSIZ:
89 m_proc.sh_info().vs_out_point_size = 1;
90 m_proc.sh_info().vs_out_misc_write = 1;
91 /* fallthrough */
92 case VARYING_SLOT_POS:
93 return emit_varying_pos(out_var, instr);
94 case VARYING_SLOT_EDGE: {
95 std::array<uint32_t, 4> swizzle_override = {7 ,0, 7, 7};
96 return emit_varying_pos(out_var, instr, &swizzle_override);
97 }
98 case VARYING_SLOT_VIEWPORT: {
99 std::array<uint32_t, 4> swizzle_override = {7, 7, 7, 0};
100 return emit_varying_pos(out_var, instr, &swizzle_override) &&
101 emit_varying_param(out_var, instr);
102 }
103 case VARYING_SLOT_CLIP_VERTEX:
104 return emit_clip_vertices(out_var, instr);
105 case VARYING_SLOT_CLIP_DIST0:
106 case VARYING_SLOT_CLIP_DIST1:
107 m_num_clip_dist += 4;
108 return emit_varying_param(out_var, instr) && emit_varying_pos(out_var, instr);
109 case VARYING_SLOT_LAYER: {
110 m_proc.sh_info().vs_out_misc_write = 1;
111 m_proc.sh_info().vs_out_layer = 1;
112 std::array<uint32_t, 4> swz = {7,7,0,7};
113 return emit_varying_pos(out_var, instr, &swz) &&
114 emit_varying_param(out_var, instr);
115 }
116 case VARYING_SLOT_VIEW_INDEX:
117 return emit_varying_pos(out_var, instr) &&
118 emit_varying_param(out_var, instr);
119
120 default:
121 return emit_varying_param(out_var, instr);
122 }
123
124 fprintf(stderr, "r600-NIR: Unimplemented store_deref for %d\n",
125 out_var->data.location);
126 return false;
127 }
128
129 bool VertexStageExportForFS::emit_varying_pos(const nir_variable *out_var, nir_intrinsic_instr* instr,
130 std::array<uint32_t, 4> *swizzle_override)
131 {
132 std::array<uint32_t,4> swizzle;
133 uint32_t write_mask = 0;
134
135 if (swizzle_override) {
136 swizzle = *swizzle_override;
137 for (int i = 0; i < 4; ++i) {
138 if (swizzle[i] < 6)
139 write_mask |= 1 << i;
140 }
141 } else {
142 write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac;
143 for (int i = 0; i < 4; ++i)
144 swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7;
145 }
146
147 m_proc.sh_info().output[out_var->data.driver_location].write_mask = write_mask;
148
149 GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle);
150 m_proc.set_output(out_var->data.driver_location, value.sel());
151
152 int export_slot = 0;
153
154 switch (out_var->data.location) {
155 case VARYING_SLOT_EDGE: {
156 m_proc.sh_info().vs_out_misc_write = 1;
157 m_proc.sh_info().vs_out_edgeflag = 1;
158 m_proc.emit_instruction(op1_mov, value.reg_i(1), {value.reg_i(1)}, {alu_write, alu_dst_clamp, alu_last_instr});
159 m_proc.emit_instruction(op1_flt_to_int, value.reg_i(1), {value.reg_i(1)}, {alu_write, alu_last_instr});
160 m_proc.sh_info().output[out_var->data.driver_location].write_mask = 0xf;
161 }
162 /* fallthrough */
163 case VARYING_SLOT_PSIZ:
164 case VARYING_SLOT_LAYER:
165 export_slot = 1;
166 break;
167 case VARYING_SLOT_VIEWPORT:
168 m_proc.sh_info().vs_out_misc_write = 1;
169 m_proc.sh_info().vs_out_viewport = 1;
170 export_slot = 1;
171 break;
172 case VARYING_SLOT_POS:
173 break;
174 case VARYING_SLOT_CLIP_DIST0:
175 case VARYING_SLOT_CLIP_DIST1:
176 export_slot = m_cur_clip_pos++;
177 break;
178 default:
179 sfn_log << SfnLog::err << __func__ << "Unsupported location "
180 << out_var->data.location << "\n";
181 return false;
182 }
183
184 m_last_pos_export = new ExportInstruction(export_slot, value, ExportInstruction::et_pos);
185 m_proc.emit_export_instruction(m_last_pos_export);
186 m_proc.add_param_output_reg(out_var->data.driver_location, m_last_pos_export->gpr_ptr());
187 return true;
188 }
189
190 bool VertexStageExportForFS::emit_varying_param(const nir_variable *out_var, nir_intrinsic_instr* instr)
191 {
192 assert(out_var->data.driver_location < m_proc.sh_info().noutput);
193 sfn_log << SfnLog::io << __func__ << ": emit DDL: " << out_var->data.driver_location << "\n";
194
195 int write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac;
196 std::array<uint32_t,4> swizzle;
197 for (int i = 0; i < 4; ++i)
198 swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7;
199
200 m_proc.sh_info().output[out_var->data.driver_location].write_mask = write_mask;
201
202 GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle, true);
203 m_proc.sh_info().output[out_var->data.driver_location].gpr = value.sel();
204
205 /* This should use the registers!! */
206 m_proc.set_output(out_var->data.driver_location, value.sel());
207
208 auto param_loc = m_param_map.find(out_var->data.location);
209 assert(param_loc != m_param_map.end());
210
211 m_last_param_export = new ExportInstruction(param_loc->second, value, ExportInstruction::et_param);
212 m_proc.emit_export_instruction(m_last_param_export);
213 m_proc.add_param_output_reg(out_var->data.driver_location, m_last_param_export->gpr_ptr());
214 return true;
215 }
216
217 bool VertexStageExportForFS::emit_clip_vertices(const nir_variable *out_var, nir_intrinsic_instr* instr)
218 {
219 m_proc.sh_info().cc_dist_mask = 0xff;
220 m_proc.sh_info().clip_dist_write = 0xff;
221
222 m_clip_vertex = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], 0xf, {0,1,2,3});
223 m_proc.add_param_output_reg(out_var->data.driver_location, &m_clip_vertex);
224
225 for (int i = 0; i < 4; ++i)
226 m_proc.sh_info().output[out_var->data.driver_location].write_mask |= 1 << i;
227
228 GPRVector clip_dist[2] = { m_proc.get_temp_vec4(), m_proc.get_temp_vec4()};
229
230 for (int i = 0; i < 8; i++) {
231 int oreg = i >> 2;
232 int ochan = i & 3;
233 AluInstruction *ir = nullptr;
234 for (int j = 0; j < 4; j++) {
235 ir = new AluInstruction(op2_dot4_ieee, clip_dist[oreg].reg_i(j), m_clip_vertex.reg_i(j),
236 PValue(new UniformValue(512 + i, j, R600_BUFFER_INFO_CONST_BUFFER)),
237 (j == ochan) ? EmitInstruction::write : EmitInstruction::empty);
238 m_proc.emit_instruction(ir);
239 }
240 ir->set_flag(alu_last_instr);
241 }
242
243 m_last_pos_export = new ExportInstruction(m_cur_clip_pos++, clip_dist[0], ExportInstruction::et_pos);
244 m_proc.emit_export_instruction(m_last_pos_export);
245
246 m_last_pos_export = new ExportInstruction(m_cur_clip_pos, clip_dist[1], ExportInstruction::et_pos);
247 m_proc.emit_export_instruction(m_last_pos_export);
248
249 return true;
250 }
251
252 void VertexStageExportForFS::finalize_exports()
253 {
254 if (m_key.vs.as_gs_a) {
255 PValue o(new GPRValue(0,PIPE_SWIZZLE_0));
256 GPRVector primid({m_proc.primitive_id(), o,o,o});
257 m_last_param_export = new ExportInstruction(m_cur_param, primid, ExportInstruction::et_param);
258 m_proc.emit_export_instruction(m_last_param_export);
259 int i;
260 i = m_proc.sh_info().noutput++;
261 auto& io = m_proc.sh_info().output[i];
262 io.name = TGSI_SEMANTIC_PRIMID;
263 io.sid = 0;
264 io.gpr = 0;
265 io.interpolate = TGSI_INTERPOLATE_CONSTANT;
266 io.write_mask = 0x1;
267 io.spi_sid = m_key.vs.prim_id_out;
268 m_proc.sh_info().vs_as_gs_a = 1;
269 }
270
271 if (m_so_info && m_so_info->num_outputs)
272 emit_stream(-1);
273
274 m_pipe_shader->enabled_stream_buffers_mask = m_enabled_stream_buffers_mask;
275
276 if (!m_last_param_export) {
277 GPRVector value(0,{7,7,7,7});
278 m_last_param_export = new ExportInstruction(0, value, ExportInstruction::et_param);
279 m_proc.emit_export_instruction(m_last_param_export);
280 }
281 m_last_param_export->set_last();
282
283 if (!m_last_pos_export) {
284 GPRVector value(0,{7,7,7,7});
285 m_last_pos_export = new ExportInstruction(0, value, ExportInstruction::et_pos);
286 m_proc.emit_export_instruction(m_last_pos_export);
287 }
288 m_last_pos_export->set_last();
289 }
290
291 bool VertexStageExportForFS::emit_stream(int stream)
292 {
293 assert(m_so_info);
294 if (m_so_info->num_outputs > PIPE_MAX_SO_OUTPUTS) {
295 R600_ERR("Too many stream outputs: %d\n", m_so_info->num_outputs);
296 return false;
297 }
298 for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
299 if (m_so_info->output[i].output_buffer >= 4) {
300 R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
301 m_so_info->output[i].output_buffer);
302 return false;
303 }
304 }
305 const GPRVector *so_gpr[PIPE_MAX_SHADER_OUTPUTS];
306 unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS];
307 std::vector<GPRVector> tmp(m_so_info->num_outputs);
308
309 /* Initialize locations where the outputs are stored. */
310 for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
311 if (stream != -1 && stream != m_so_info->output[i].stream)
312 continue;
313
314 sfn_log << SfnLog::instr << "Emit stream " << i
315 << " with register index " << m_so_info->output[i].register_index << " so_gpr:";
316
317
318 so_gpr[i] = m_proc.output_register(m_so_info->output[i].register_index);
319
320 if (!so_gpr[i]) {
321 sfn_log << SfnLog::err << "\nERR: register index "
322 << m_so_info->output[i].register_index
323 << " doesn't correspond to an output register\n";
324 return false;
325 }
326 start_comp[i] = m_so_info->output[i].start_component;
327 /* Lower outputs with dst_offset < start_component.
328 *
329 * We can only output 4D vectors with a write mask, e.g. we can
330 * only output the W component at offset 3, etc. If we want
331 * to store Y, Z, or W at buffer offset 0, we need to use MOV
332 * to move it to X and output X. */
333 if (m_so_info->output[i].dst_offset < m_so_info->output[i].start_component) {
334 int tmp_index = m_proc.allocate_temp_register();
335 int sc = m_so_info->output[i].start_component;
336 AluInstruction *alu = nullptr;
337 for (int j = 0; j < m_so_info->output[i].num_components; j++) {
338 PValue dst(new GPRValue(tmp_index, j));
339 alu = new AluInstruction(op1_mov, dst, so_gpr[i]->reg_i(j + sc), {alu_write});
340 tmp[i].set_reg_i(j, dst);
341 m_proc.emit_instruction(alu);
342 }
343 if (alu)
344 alu->set_flag(alu_last_instr);
345
346 /* Fill the vector with masked values */
347 PValue dst_blank(new GPRValue(tmp_index, 7));
348 for (int j = m_so_info->output[i].num_components; j < 4; j++)
349 tmp[i].set_reg_i(j, dst_blank);
350
351 start_comp[i] = 0;
352 so_gpr[i] = &tmp[i];
353 }
354 sfn_log << SfnLog::instr << *so_gpr[i] << "\n";
355 }
356
357 /* Write outputs to buffers. */
358 for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
359 sfn_log << SfnLog::instr << "Write output buffer " << i
360 << " with register index " << m_so_info->output[i].register_index << "\n";
361
362 StreamOutIntruction *out_stream =
363 new StreamOutIntruction(*so_gpr[i],
364 m_so_info->output[i].num_components,
365 m_so_info->output[i].dst_offset - start_comp[i],
366 ((1 << m_so_info->output[i].num_components) - 1) << start_comp[i],
367 m_so_info->output[i].output_buffer,
368 m_so_info->output[i].stream);
369 m_proc.emit_export_instruction(out_stream);
370 m_enabled_stream_buffers_mask |= (1 << m_so_info->output[i].output_buffer) << m_so_info->output[i].stream * 4;
371 }
372 return true;
373 }
374
375
376 VertexStageExportForGS::VertexStageExportForGS(VertexStage &proc,
377 const r600_shader *gs_shader):
378 VertexStageExportBase(proc),
379 m_num_clip_dist(0),
380 m_gs_shader(gs_shader)
381 {
382
383 }
384
385 bool VertexStageExportForGS::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
386 {
387
388 int ring_offset = -1;
389 const r600_shader_io& out_io = m_proc.sh_info().output[out_var->data.driver_location];
390
391 sfn_log << SfnLog::io << "check output " << out_var->data.driver_location
392 << " name=" << out_io.name<< " sid=" << out_io.sid << "\n";
393 for (unsigned k = 0; k < m_gs_shader->ninput; ++k) {
394 auto& in_io = m_gs_shader->input[k];
395 sfn_log << SfnLog::io << " against " << k << " name=" << in_io.name<< " sid=" << in_io.sid << "\n";
396
397 if (in_io.name == out_io.name &&
398 in_io.sid == out_io.sid) {
399 ring_offset = in_io.ring_offset;
400 break;
401 }
402 }
403
404 if (out_var->data.location == VARYING_SLOT_VIEWPORT) {
405 m_proc.sh_info().vs_out_viewport = 1;
406 m_proc.sh_info().vs_out_misc_write = 1;
407 return true;
408 }
409
410 if (ring_offset == -1) {
411 sfn_log << SfnLog::err << "VS defines output at "
412 << out_var->data.driver_location << "name=" << out_io.name
413 << " sid=" << out_io.sid << " that is not consumed as GS input\n";
414 return true;
415 }
416
417 uint32_t write_mask = (1 << instr->num_components) - 1;
418
419 GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask,
420 swizzle_from_comps(instr->num_components), true);
421
422 auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write, value,
423 ring_offset >> 2, 4, PValue());
424 m_proc.emit_export_instruction(ir);
425
426 m_proc.sh_info().output[out_var->data.driver_location].write_mask |= write_mask;
427 if (out_var->data.location == VARYING_SLOT_CLIP_DIST0 ||
428 out_var->data.location == VARYING_SLOT_CLIP_DIST1)
429 m_num_clip_dist += 4;
430
431 return true;
432 }
433
434 void VertexStageExportForGS::finalize_exports()
435 {
436
437 }
438
439 VertexStageExportForES::VertexStageExportForES(VertexStage& proc):
440 VertexStageExportBase(proc)
441 {
442 }
443
444 bool VertexStageExportForES::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
445 {
446 return true;
447 }
448
449 void VertexStageExportForES::finalize_exports()
450 {
451
452 }
453
454 }