08c778a2e103853cb1f86d62ecd9747eca12a2e2
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_vertexstageexport.cpp
1 #include "sfn_vertexstageexport.h"
2
3 #include "tgsi/tgsi_from_mesa.h"
4
5 namespace r600 {
6
7 using std::priority_queue;
8
9 VertexStageExportBase::VertexStageExportBase(VertexStage& proc):
10 m_proc(proc),
11 m_cur_clip_pos(1),
12 m_cur_param(0)
13 {
14
15 }
16
17 VertexStageExportBase::~VertexStageExportBase()
18 {
19
20 }
21
22 VertexStageExportForFS::VertexStageExportForFS(VertexStage& proc,
23 const pipe_stream_output_info *so_info,
24 r600_pipe_shader *pipe_shader, const r600_shader_key &key):
25 VertexStageExportBase(proc),
26 m_last_param_export(nullptr),
27 m_last_pos_export(nullptr),
28 m_num_clip_dist(0),
29 m_enabled_stream_buffers_mask(0),
30 m_so_info(so_info),
31 m_pipe_shader(pipe_shader),
32 m_key(key)
33 {
34 }
35
36 void VertexStageExportBase::setup_paramn_map()
37 {
38 priority_queue<int, std::vector<int>, std::greater<int>> q;
39 for (auto a: m_param_map) {
40 q.push(a.first);
41 }
42
43 int next_param = 0;
44 while (!q.empty()) {
45 int loc = q.top();
46 q.pop();
47 m_param_map[loc] = next_param++;
48 }
49 }
50
51 bool VertexStageExportBase::do_process_outputs(nir_variable *output)
52 {
53 if (output->data.location == VARYING_SLOT_COL0 ||
54 output->data.location == VARYING_SLOT_COL1 ||
55 (output->data.location >= VARYING_SLOT_VAR0 &&
56 output->data.location <= VARYING_SLOT_VAR31) ||
57 (output->data.location >= VARYING_SLOT_TEX0 &&
58 output->data.location <= VARYING_SLOT_TEX7) ||
59 output->data.location == VARYING_SLOT_BFC0 ||
60 output->data.location == VARYING_SLOT_BFC1 ||
61 output->data.location == VARYING_SLOT_CLIP_VERTEX ||
62 output->data.location == VARYING_SLOT_CLIP_DIST0 ||
63 output->data.location == VARYING_SLOT_CLIP_DIST1 ||
64 output->data.location == VARYING_SLOT_POS ||
65 output->data.location == VARYING_SLOT_PSIZ ||
66 output->data.location == VARYING_SLOT_FOGC ||
67 output->data.location == VARYING_SLOT_LAYER ||
68 output->data.location == VARYING_SLOT_EDGE ||
69 output->data.location == VARYING_SLOT_VIEWPORT
70 ) {
71
72 r600_shader_io& io = m_proc.sh_info().output[output->data.driver_location];
73 tgsi_get_gl_varying_semantic(static_cast<gl_varying_slot>( output->data.location),
74 true, &io.name, &io.sid);
75
76 m_proc.evaluate_spi_sid(io);
77 io.write_mask = ((1 << glsl_get_components(output->type)) - 1)
78 << output->data.location_frac;
79 ++m_proc.sh_info().noutput;
80
81 if (output->data.location == VARYING_SLOT_PSIZ ||
82 output->data.location == VARYING_SLOT_EDGE ||
83 output->data.location == VARYING_SLOT_LAYER)
84 m_cur_clip_pos = 2;
85
86 if (output->data.location != VARYING_SLOT_POS &&
87 output->data.location != VARYING_SLOT_EDGE &&
88 output->data.location != VARYING_SLOT_PSIZ &&
89 output->data.location != VARYING_SLOT_CLIP_VERTEX)
90 m_param_map[output->data.location] = m_cur_param++;
91
92 return true;
93 }
94 return false;
95 }
96
97
98 bool VertexStageExportForFS::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
99 {
100
101 switch (out_var->data.location) {
102 case VARYING_SLOT_PSIZ:
103 m_proc.sh_info().vs_out_point_size = 1;
104 m_proc.sh_info().vs_out_misc_write = 1;
105 /* fallthrough */
106 case VARYING_SLOT_POS:
107 return emit_varying_pos(out_var, instr);
108 case VARYING_SLOT_EDGE: {
109 std::array<uint32_t, 4> swizzle_override = {7 ,0, 7, 7};
110 return emit_varying_pos(out_var, instr, &swizzle_override);
111 }
112 case VARYING_SLOT_CLIP_VERTEX:
113 return emit_clip_vertices(out_var, instr);
114 case VARYING_SLOT_CLIP_DIST0:
115 case VARYING_SLOT_CLIP_DIST1:
116 m_num_clip_dist += 4;
117 return emit_varying_param(out_var, instr) && emit_varying_pos(out_var, instr);
118 case VARYING_SLOT_LAYER: {
119 m_proc.sh_info().vs_out_misc_write = 1;
120 m_proc.sh_info().vs_out_layer = 1;
121 std::array<uint32_t, 4> swz = {7,7,0,7};
122 return emit_varying_pos(out_var, instr, &swz) &&
123 emit_varying_param(out_var, instr);
124 }
125 case VARYING_SLOT_VIEW_INDEX:
126 return emit_varying_pos(out_var, instr) &&
127 emit_varying_param(out_var, instr);
128
129 default:
130 if (out_var->data.location <= VARYING_SLOT_VAR31 ||
131 (out_var->data.location >= VARYING_SLOT_TEX0 &&
132 out_var->data.location <= VARYING_SLOT_TEX7))
133 return emit_varying_param(out_var, instr);
134 }
135
136 fprintf(stderr, "r600-NIR: Unimplemented store_deref for %d\n",
137 out_var->data.location);
138 return false;
139 }
140
141 bool VertexStageExportForFS::emit_varying_pos(const nir_variable *out_var, nir_intrinsic_instr* instr,
142 std::array<uint32_t, 4> *swizzle_override)
143 {
144 std::array<uint32_t,4> swizzle;
145 uint32_t write_mask = 0;
146
147 if (swizzle_override) {
148 swizzle = *swizzle_override;
149 for (int i = 0; i < 4; ++i) {
150 if (swizzle[i] < 6)
151 write_mask |= 1 << i;
152 }
153 } else {
154 write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac;
155 for (int i = 0; i < 4; ++i)
156 swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7;
157 }
158
159 m_proc.sh_info().output[out_var->data.driver_location].write_mask = write_mask;
160
161 GPRVector *value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle);
162 m_proc.set_output(out_var->data.driver_location, PValue(value));
163
164 int export_slot = 0;
165
166 switch (out_var->data.location) {
167 case VARYING_SLOT_EDGE: {
168 m_proc.sh_info().vs_out_misc_write = 1;
169 m_proc.sh_info().vs_out_edgeflag = 1;
170 m_proc.emit_instruction(op1_mov, value->reg_i(1), {value->reg_i(1)}, {alu_write, alu_dst_clamp, alu_last_instr});
171 m_proc.emit_instruction(op1_flt_to_int, value->reg_i(1), {value->reg_i(1)}, {alu_write, alu_last_instr});
172 m_proc.sh_info().output[out_var->data.driver_location].write_mask = 0xf;
173 }
174 /* fallthrough */
175 case VARYING_SLOT_PSIZ:
176 case VARYING_SLOT_LAYER:
177 export_slot = 1;
178 break;
179 case VARYING_SLOT_POS:
180 break;
181 case VARYING_SLOT_CLIP_DIST0:
182 case VARYING_SLOT_CLIP_DIST1:
183 export_slot = m_cur_clip_pos++;
184 break;
185 default:
186 sfn_log << SfnLog::err << __func__ << "Unsupported location "
187 << out_var->data.location << "\n";
188 return false;
189 }
190
191 m_last_pos_export = new ExportInstruction(export_slot, *value, ExportInstruction::et_pos);
192 m_proc.emit_export_instruction(m_last_pos_export);
193 m_proc.add_param_output_reg(out_var->data.driver_location, m_last_pos_export->gpr_ptr());
194 return true;
195 }
196
197 bool VertexStageExportForFS::emit_varying_param(const nir_variable *out_var, nir_intrinsic_instr* instr)
198 {
199 assert(out_var->data.driver_location < m_proc.sh_info().noutput);
200 sfn_log << SfnLog::io << __func__ << ": emit DDL: " << out_var->data.driver_location << "\n";
201
202 int write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac;
203 std::array<uint32_t,4> swizzle;
204 for (int i = 0; i < 4; ++i)
205 swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7;
206
207 m_proc.sh_info().output[out_var->data.driver_location].write_mask = write_mask;
208
209 GPRVector *value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle);
210 m_proc.sh_info().output[out_var->data.driver_location].gpr = value->sel();
211
212 /* This should use the registers!! */
213 m_proc.set_output(out_var->data.driver_location, PValue(value));
214
215 auto param_loc = m_param_map.find(out_var->data.location);
216 assert(param_loc != m_param_map.end());
217
218 m_last_param_export = new ExportInstruction(param_loc->second, *value, ExportInstruction::et_param);
219 m_proc.emit_export_instruction(m_last_param_export);
220 m_proc.add_param_output_reg(out_var->data.driver_location, m_last_param_export->gpr_ptr());
221 return true;
222 }
223
224 bool VertexStageExportForFS::emit_clip_vertices(const nir_variable *out_var, nir_intrinsic_instr* instr)
225 {
226 m_proc.sh_info().cc_dist_mask = 0xff;
227 m_proc.sh_info().clip_dist_write = 0xff;
228
229 std::unique_ptr<GPRVector> clip_vertex(m_proc.vec_from_nir_with_fetch_constant(instr->src[1], 0xf, {0,1,2,3}));
230
231 for (int i = 0; i < 4; ++i)
232 m_proc.sh_info().output[out_var->data.driver_location].write_mask |= 1 << i;
233
234 GPRVector clip_dist[2] = { m_proc.get_temp_vec4(), m_proc.get_temp_vec4()};
235
236 for (int i = 0; i < 8; i++) {
237 int oreg = i >> 2;
238 int ochan = i & 3;
239 AluInstruction *ir = nullptr;
240 for (int j = 0; j < 4; j++) {
241 ir = new AluInstruction(op2_dot4_ieee, clip_dist[oreg].reg_i(j), clip_vertex->reg_i(j),
242 PValue(new UniformValue(512 + i, j, R600_BUFFER_INFO_CONST_BUFFER)),
243 (j == ochan) ? EmitInstruction::write : EmitInstruction::empty);
244 m_proc.emit_instruction(ir);
245 }
246 ir->set_flag(alu_last_instr);
247 }
248
249 m_last_pos_export = new ExportInstruction(m_cur_clip_pos++, clip_dist[0], ExportInstruction::et_pos);
250 m_proc.emit_export_instruction(m_last_pos_export);
251
252 m_last_pos_export = new ExportInstruction(m_cur_clip_pos, clip_dist[1], ExportInstruction::et_pos);
253 m_proc.emit_export_instruction(m_last_pos_export);
254
255 return true;
256 }
257
258 void VertexStageExportForFS::finalize_exports()
259 {
260 if (m_key.vs.as_gs_a) {
261 PValue o(new GPRValue(0,PIPE_SWIZZLE_0));
262 GPRVector primid({m_proc.primitive_id(), o,o,o});
263 m_last_param_export = new ExportInstruction(m_cur_param, primid, ExportInstruction::et_param);
264 m_proc.emit_export_instruction(m_last_param_export);
265 int i;
266 i = m_proc.sh_info().noutput++;
267 auto& io = m_proc.sh_info().output[i];
268 io.name = TGSI_SEMANTIC_PRIMID;
269 io.sid = 0;
270 io.gpr = 0;
271 io.interpolate = TGSI_INTERPOLATE_CONSTANT;
272 io.write_mask = 0x1;
273 io.spi_sid = m_key.vs.prim_id_out;
274 m_proc.sh_info().vs_as_gs_a = 1;
275 }
276
277 if (m_so_info && m_so_info->num_outputs)
278 emit_stream(-1);
279
280 m_pipe_shader->enabled_stream_buffers_mask = m_enabled_stream_buffers_mask;
281
282 if (!m_last_param_export) {
283 GPRVector value(0,{7,7,7,7});
284 m_last_param_export = new ExportInstruction(0, value, ExportInstruction::et_param);
285 m_proc.emit_export_instruction(m_last_param_export);
286 }
287 m_last_param_export->set_last();
288
289 if (!m_last_pos_export) {
290 GPRVector value(0,{7,7,7,7});
291 m_last_pos_export = new ExportInstruction(0, value, ExportInstruction::et_pos);
292 m_proc.emit_export_instruction(m_last_pos_export);
293 }
294 m_last_pos_export->set_last();
295 }
296
297 bool VertexStageExportForFS::emit_stream(int stream)
298 {
299 assert(m_so_info);
300 if (m_so_info->num_outputs > PIPE_MAX_SO_OUTPUTS) {
301 R600_ERR("Too many stream outputs: %d\n", m_so_info->num_outputs);
302 return false;
303 }
304 for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
305 if (m_so_info->output[i].output_buffer >= 4) {
306 R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
307 m_so_info->output[i].output_buffer);
308 return false;
309 }
310 }
311 const GPRVector *so_gpr[PIPE_MAX_SHADER_OUTPUTS];
312 unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS];
313 std::vector<GPRVector> tmp(m_so_info->num_outputs);
314
315 /* Initialize locations where the outputs are stored. */
316 for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
317 if (stream != -1 && stream != m_so_info->output[i].stream)
318 continue;
319
320 sfn_log << SfnLog::instr << "Emit stream " << i
321 << " with register index " << m_so_info->output[i].register_index << " so_gpr:";
322
323
324 so_gpr[i] = m_proc.output_register(m_so_info->output[i].register_index);
325
326 if (!so_gpr[i]) {
327 sfn_log << SfnLog::err << "\nERR: register index "
328 << m_so_info->output[i].register_index
329 << " doesn't correspond to an output register\n";
330 return false;
331 }
332 start_comp[i] = m_so_info->output[i].start_component;
333 /* Lower outputs with dst_offset < start_component.
334 *
335 * We can only output 4D vectors with a write mask, e.g. we can
336 * only output the W component at offset 3, etc. If we want
337 * to store Y, Z, or W at buffer offset 0, we need to use MOV
338 * to move it to X and output X. */
339 if (m_so_info->output[i].dst_offset < m_so_info->output[i].start_component) {
340 int tmp_index = m_proc.allocate_temp_register();
341 int sc = m_so_info->output[i].start_component;
342 AluInstruction *alu = nullptr;
343 for (int j = 0; j < m_so_info->output[i].num_components; j++) {
344 PValue dst(new GPRValue(tmp_index, j));
345 alu = new AluInstruction(op1_mov, dst, so_gpr[i]->reg_i(j + sc), {alu_write});
346 tmp[i].set_reg_i(j, dst);
347 m_proc.emit_instruction(alu);
348 }
349 if (alu)
350 alu->set_flag(alu_last_instr);
351
352 /* Fill the vector with masked values */
353 PValue dst_blank(new GPRValue(tmp_index, 7));
354 for (int j = m_so_info->output[i].num_components; j < 4; j++)
355 tmp[i].set_reg_i(j, dst_blank);
356
357 start_comp[i] = 0;
358 so_gpr[i] = &tmp[i];
359 }
360 sfn_log << SfnLog::instr << *so_gpr[i] << "\n";
361 }
362
363 /* Write outputs to buffers. */
364 for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
365 sfn_log << SfnLog::instr << "Write output buffer " << i
366 << " with register index " << m_so_info->output[i].register_index << "\n";
367
368 StreamOutIntruction *out_stream =
369 new StreamOutIntruction(*so_gpr[i],
370 m_so_info->output[i].num_components,
371 m_so_info->output[i].dst_offset - start_comp[i],
372 ((1 << m_so_info->output[i].num_components) - 1) << start_comp[i],
373 m_so_info->output[i].output_buffer,
374 m_so_info->output[i].stream);
375 m_proc.emit_export_instruction(out_stream);
376 m_enabled_stream_buffers_mask |= (1 << m_so_info->output[i].output_buffer) << m_so_info->output[i].stream * 4;
377 }
378 return true;
379 }
380
381
382 VertexStageExportForGS::VertexStageExportForGS(VertexStage &proc,
383 const r600_shader *gs_shader):
384 VertexStageExportBase(proc),
385 m_gs_shader(gs_shader)
386 {
387
388 }
389
390 bool VertexStageExportForGS::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
391 {
392
393 int ring_offset = -1;
394 const r600_shader_io& out_io = m_proc.sh_info().output[out_var->data.driver_location];
395
396 sfn_log << SfnLog::io << "check output " << out_var->data.driver_location
397 << " name=" << out_io.name<< " sid=" << out_io.sid << "\n";
398 for (unsigned k = 0; k < m_gs_shader->ninput; ++k) {
399 auto& in_io = m_gs_shader->input[k];
400 sfn_log << SfnLog::io << " against " << k << " name=" << in_io.name<< " sid=" << in_io.sid << "\n";
401
402 if (in_io.name == out_io.name &&
403 in_io.sid == out_io.sid) {
404 ring_offset = in_io.ring_offset;
405 break;
406 }
407 }
408
409 if (out_var->data.location == VARYING_SLOT_VIEWPORT)
410 return true;
411
412 if (ring_offset == -1) {
413 sfn_log << SfnLog::err << "VS defines output at "
414 << out_var->data.driver_location << "name=" << out_io.name
415 << " sid=" << out_io.sid << " that is not consumed as GS input\n";
416 return true;
417 }
418
419 uint32_t write_mask = (1 << instr->num_components) - 1;
420
421 std::unique_ptr<GPRVector> value(m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask,
422 swizzle_from_mask(instr->num_components)));
423
424 auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write, *value,
425 ring_offset >> 2, 4, PValue());
426 m_proc.emit_export_instruction(ir);
427
428 m_proc.sh_info().output[out_var->data.driver_location].write_mask |= write_mask;
429 if (out_var->data.location == VARYING_SLOT_CLIP_DIST0 ||
430 out_var->data.location == VARYING_SLOT_CLIP_DIST1)
431 m_num_clip_dist += 4;
432
433 return true;
434 }
435
436 void VertexStageExportForGS::finalize_exports()
437 {
438
439 }
440
441 VertexStageExportForES::VertexStageExportForES(VertexStage& proc):
442 VertexStageExportBase(proc)
443 {
444 }
445
446 bool VertexStageExportForES::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
447 {
448 return true;
449 }
450
451 void VertexStageExportForES::finalize_exports()
452 {
453
454 }
455
456 }