013603edbb6d9642f218cb949401aaa5c5c19ee2
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_vertexstageexport.cpp
1 #include "sfn_vertexstageexport.h"
2
3 #include "sfn_shaderio.h"
4
5 namespace r600 {
6
7 using std::priority_queue;
8
9 VertexStageExportBase::VertexStageExportBase(VertexStage& proc):
10 m_proc(proc),
11 m_cur_clip_pos(1),
12 m_cur_param(0)
13 {
14
15 }
16
17 VertexStageExportBase::~VertexStageExportBase()
18 {
19
20 }
21
22 VertexStageExportForFS::VertexStageExportForFS(VertexStage& proc,
23 const pipe_stream_output_info *so_info,
24 r600_pipe_shader *pipe_shader, const r600_shader_key &key):
25 VertexStageExportBase(proc),
26 m_last_param_export(nullptr),
27 m_last_pos_export(nullptr),
28 m_num_clip_dist(0),
29 m_enabled_stream_buffers_mask(0),
30 m_so_info(so_info),
31 m_pipe_shader(pipe_shader),
32 m_key(key)
33 {
34 }
35
36 void VertexStageExportBase::setup_paramn_map()
37 {
38 priority_queue<int, std::vector<int>, std::greater<int>> q;
39 for (auto a: m_param_map) {
40 q.push(a.first);
41 }
42
43 int next_param = 0;
44 while (!q.empty()) {
45 int loc = q.top();
46 q.pop();
47 m_param_map[loc] = next_param++;
48 }
49 }
50
51 bool VertexStageExportBase::do_process_outputs(nir_variable *output)
52 {
53 if (output->data.location == VARYING_SLOT_COL0 ||
54 output->data.location == VARYING_SLOT_COL1 ||
55 (output->data.location >= VARYING_SLOT_VAR0 &&
56 output->data.location <= VARYING_SLOT_VAR31) ||
57 (output->data.location >= VARYING_SLOT_TEX0 &&
58 output->data.location <= VARYING_SLOT_TEX7) ||
59 output->data.location == VARYING_SLOT_BFC0 ||
60 output->data.location == VARYING_SLOT_BFC1 ||
61 output->data.location == VARYING_SLOT_CLIP_VERTEX ||
62 output->data.location == VARYING_SLOT_CLIP_DIST0 ||
63 output->data.location == VARYING_SLOT_CLIP_DIST1 ||
64 output->data.location == VARYING_SLOT_POS ||
65 output->data.location == VARYING_SLOT_PSIZ ||
66 output->data.location == VARYING_SLOT_FOGC ||
67 output->data.location == VARYING_SLOT_LAYER ||
68 output->data.location == VARYING_SLOT_EDGE ||
69 output->data.location == VARYING_SLOT_VIEWPORT
70 ) {
71
72 r600_shader_io& io = m_proc.sh_info().output[output->data.driver_location];
73 auto semantic = r600_get_varying_semantic(output->data.location);
74 io.name = semantic.first;
75 io.sid = semantic.second;
76
77 m_proc.evaluate_spi_sid(io);
78 io.write_mask = ((1 << glsl_get_components(output->type)) - 1)
79 << output->data.location_frac;
80 ++m_proc.sh_info().noutput;
81
82 if (output->data.location == VARYING_SLOT_PSIZ ||
83 output->data.location == VARYING_SLOT_EDGE ||
84 output->data.location == VARYING_SLOT_LAYER)
85 m_cur_clip_pos = 2;
86
87 if (output->data.location != VARYING_SLOT_POS &&
88 output->data.location != VARYING_SLOT_EDGE &&
89 output->data.location != VARYING_SLOT_PSIZ &&
90 output->data.location != VARYING_SLOT_CLIP_VERTEX)
91 m_param_map[output->data.location] = m_cur_param++;
92
93 return true;
94 }
95 return false;
96 }
97
98
99 bool VertexStageExportForFS::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
100 {
101
102 switch (out_var->data.location) {
103 case VARYING_SLOT_PSIZ:
104 m_proc.sh_info().vs_out_point_size = 1;
105 m_proc.sh_info().vs_out_misc_write = 1;
106 /* fallthrough */
107 case VARYING_SLOT_POS:
108 return emit_varying_pos(out_var, instr);
109 case VARYING_SLOT_EDGE: {
110 std::array<uint32_t, 4> swizzle_override = {7 ,0, 7, 7};
111 return emit_varying_pos(out_var, instr, &swizzle_override);
112 }
113 case VARYING_SLOT_CLIP_VERTEX:
114 return emit_clip_vertices(out_var, instr);
115 case VARYING_SLOT_CLIP_DIST0:
116 case VARYING_SLOT_CLIP_DIST1:
117 m_num_clip_dist += 4;
118 return emit_varying_param(out_var, instr) && emit_varying_pos(out_var, instr);
119 case VARYING_SLOT_LAYER: {
120 m_proc.sh_info().vs_out_misc_write = 1;
121 m_proc.sh_info().vs_out_layer = 1;
122 std::array<uint32_t, 4> swz = {7,7,0,7};
123 return emit_varying_pos(out_var, instr, &swz) &&
124 emit_varying_param(out_var, instr);
125 }
126 case VARYING_SLOT_VIEW_INDEX:
127 return emit_varying_pos(out_var, instr) &&
128 emit_varying_param(out_var, instr);
129
130 default:
131 if (out_var->data.location <= VARYING_SLOT_VAR31 ||
132 (out_var->data.location >= VARYING_SLOT_TEX0 &&
133 out_var->data.location <= VARYING_SLOT_TEX7))
134 return emit_varying_param(out_var, instr);
135 }
136
137 fprintf(stderr, "r600-NIR: Unimplemented store_deref for %d\n",
138 out_var->data.location);
139 return false;
140 }
141
142 bool VertexStageExportForFS::emit_varying_pos(const nir_variable *out_var, nir_intrinsic_instr* instr,
143 std::array<uint32_t, 4> *swizzle_override)
144 {
145 std::array<uint32_t,4> swizzle;
146 uint32_t write_mask = 0;
147
148 if (swizzle_override) {
149 swizzle = *swizzle_override;
150 for (int i = 0; i < 4; ++i) {
151 if (swizzle[i] < 6)
152 write_mask |= 1 << i;
153 }
154 } else {
155 write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac;
156 for (int i = 0; i < 4; ++i)
157 swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7;
158 }
159
160 m_proc.sh_info().output[out_var->data.driver_location].write_mask = write_mask;
161
162 GPRVector *value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle);
163 m_proc.set_output(out_var->data.driver_location, PValue(value));
164
165 int export_slot = 0;
166
167 switch (out_var->data.location) {
168 case VARYING_SLOT_EDGE: {
169 m_proc.sh_info().vs_out_misc_write = 1;
170 m_proc.sh_info().vs_out_edgeflag = 1;
171 m_proc.emit_instruction(op1_mov, value->reg_i(1), {value->reg_i(1)}, {alu_write, alu_dst_clamp, alu_last_instr});
172 m_proc.emit_instruction(op1_flt_to_int, value->reg_i(1), {value->reg_i(1)}, {alu_write, alu_last_instr});
173 m_proc.sh_info().output[out_var->data.driver_location].write_mask = 0xf;
174 }
175 /* fallthrough */
176 case VARYING_SLOT_PSIZ:
177 case VARYING_SLOT_LAYER:
178 export_slot = 1;
179 break;
180 case VARYING_SLOT_POS:
181 break;
182 case VARYING_SLOT_CLIP_DIST0:
183 case VARYING_SLOT_CLIP_DIST1:
184 export_slot = m_cur_clip_pos++;
185 break;
186 default:
187 sfn_log << SfnLog::err << __func__ << "Unsupported location "
188 << out_var->data.location << "\n";
189 return false;
190 }
191
192 m_last_pos_export = new ExportInstruction(export_slot, *value, ExportInstruction::et_pos);
193 m_proc.emit_export_instruction(m_last_pos_export);
194 m_proc.add_param_output_reg(out_var->data.driver_location, m_last_pos_export->gpr_ptr());
195 return true;
196 }
197
198 bool VertexStageExportForFS::emit_varying_param(const nir_variable *out_var, nir_intrinsic_instr* instr)
199 {
200 assert(out_var->data.driver_location < m_proc.sh_info().noutput);
201 sfn_log << SfnLog::io << __func__ << ": emit DDL: " << out_var->data.driver_location << "\n";
202
203 int write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac;
204 std::array<uint32_t,4> swizzle;
205 for (int i = 0; i < 4; ++i)
206 swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7;
207
208 m_proc.sh_info().output[out_var->data.driver_location].write_mask = write_mask;
209
210 GPRVector *value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle);
211 m_proc.sh_info().output[out_var->data.driver_location].gpr = value->sel();
212
213 /* This should use the registers!! */
214 m_proc.set_output(out_var->data.driver_location, PValue(value));
215
216 auto param_loc = m_param_map.find(out_var->data.location);
217 assert(param_loc != m_param_map.end());
218
219 m_last_param_export = new ExportInstruction(param_loc->second, *value, ExportInstruction::et_param);
220 m_proc.emit_export_instruction(m_last_param_export);
221 m_proc.add_param_output_reg(out_var->data.driver_location, m_last_param_export->gpr_ptr());
222 return true;
223 }
224
225 bool VertexStageExportForFS::emit_clip_vertices(const nir_variable *out_var, nir_intrinsic_instr* instr)
226 {
227 m_proc.sh_info().cc_dist_mask = 0xff;
228 m_proc.sh_info().clip_dist_write = 0xff;
229
230 std::unique_ptr<GPRVector> clip_vertex(m_proc.vec_from_nir_with_fetch_constant(instr->src[1], 0xf, {0,1,2,3}));
231
232 for (int i = 0; i < 4; ++i)
233 m_proc.sh_info().output[out_var->data.driver_location].write_mask |= 1 << i;
234
235 GPRVector clip_dist[2] = { m_proc.get_temp_vec4(), m_proc.get_temp_vec4()};
236
237 for (int i = 0; i < 8; i++) {
238 int oreg = i >> 2;
239 int ochan = i & 3;
240 AluInstruction *ir = nullptr;
241 for (int j = 0; j < 4; j++) {
242 ir = new AluInstruction(op2_dot4_ieee, clip_dist[oreg].reg_i(j), clip_vertex->reg_i(j),
243 PValue(new UniformValue(512 + i, j, R600_BUFFER_INFO_CONST_BUFFER)),
244 (j == ochan) ? EmitInstruction::write : EmitInstruction::empty);
245 m_proc.emit_instruction(ir);
246 }
247 ir->set_flag(alu_last_instr);
248 }
249
250 m_last_pos_export = new ExportInstruction(m_cur_clip_pos++, clip_dist[0], ExportInstruction::et_pos);
251 m_proc.emit_export_instruction(m_last_pos_export);
252
253 m_last_pos_export = new ExportInstruction(m_cur_clip_pos, clip_dist[1], ExportInstruction::et_pos);
254 m_proc.emit_export_instruction(m_last_pos_export);
255
256 return true;
257 }
258
259 void VertexStageExportForFS::finalize_exports()
260 {
261 if (m_key.vs.as_gs_a) {
262 PValue o(new GPRValue(0,PIPE_SWIZZLE_0));
263 GPRVector primid({m_proc.primitive_id(), o,o,o});
264 m_last_param_export = new ExportInstruction(m_cur_param, primid, ExportInstruction::et_param);
265 m_proc.emit_export_instruction(m_last_param_export);
266 int i;
267 i = m_proc.sh_info().noutput++;
268 auto& io = m_proc.sh_info().output[i];
269 io.name = TGSI_SEMANTIC_PRIMID;
270 io.sid = 0;
271 io.gpr = 0;
272 io.interpolate = TGSI_INTERPOLATE_CONSTANT;
273 io.write_mask = 0x1;
274 io.spi_sid = m_key.vs.prim_id_out;
275 m_proc.sh_info().vs_as_gs_a = 1;
276 }
277
278 if (m_so_info && m_so_info->num_outputs)
279 emit_stream(-1);
280
281 m_pipe_shader->enabled_stream_buffers_mask = m_enabled_stream_buffers_mask;
282
283 if (!m_last_param_export) {
284 GPRVector value(0,{7,7,7,7});
285 m_last_param_export = new ExportInstruction(0, value, ExportInstruction::et_param);
286 m_proc.emit_export_instruction(m_last_param_export);
287 }
288 m_last_param_export->set_last();
289
290 if (!m_last_pos_export) {
291 GPRVector value(0,{7,7,7,7});
292 m_last_pos_export = new ExportInstruction(0, value, ExportInstruction::et_pos);
293 m_proc.emit_export_instruction(m_last_pos_export);
294 }
295 m_last_pos_export->set_last();
296 }
297
298 bool VertexStageExportForFS::emit_stream(int stream)
299 {
300 assert(m_so_info);
301 if (m_so_info->num_outputs > PIPE_MAX_SO_OUTPUTS) {
302 R600_ERR("Too many stream outputs: %d\n", m_so_info->num_outputs);
303 return false;
304 }
305 for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
306 if (m_so_info->output[i].output_buffer >= 4) {
307 R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
308 m_so_info->output[i].output_buffer);
309 return false;
310 }
311 }
312 const GPRVector *so_gpr[PIPE_MAX_SHADER_OUTPUTS];
313 unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS];
314 std::vector<GPRVector> tmp(m_so_info->num_outputs);
315
316 /* Initialize locations where the outputs are stored. */
317 for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
318 if (stream != -1 && stream != m_so_info->output[i].stream)
319 continue;
320
321 sfn_log << SfnLog::instr << "Emit stream " << i
322 << " with register index " << m_so_info->output[i].register_index << " so_gpr:";
323
324
325 so_gpr[i] = m_proc.output_register(m_so_info->output[i].register_index);
326
327 if (!so_gpr[i]) {
328 sfn_log << SfnLog::err << "\nERR: register index "
329 << m_so_info->output[i].register_index
330 << " doesn't correspond to an output register\n";
331 return false;
332 }
333 start_comp[i] = m_so_info->output[i].start_component;
334 /* Lower outputs with dst_offset < start_component.
335 *
336 * We can only output 4D vectors with a write mask, e.g. we can
337 * only output the W component at offset 3, etc. If we want
338 * to store Y, Z, or W at buffer offset 0, we need to use MOV
339 * to move it to X and output X. */
340 if (m_so_info->output[i].dst_offset < m_so_info->output[i].start_component) {
341 int tmp_index = m_proc.allocate_temp_register();
342 int sc = m_so_info->output[i].start_component;
343 AluInstruction *alu = nullptr;
344 for (int j = 0; j < m_so_info->output[i].num_components; j++) {
345 PValue dst(new GPRValue(tmp_index, j));
346 alu = new AluInstruction(op1_mov, dst, so_gpr[i]->reg_i(j + sc), {alu_write});
347 tmp[i].set_reg_i(j, dst);
348 m_proc.emit_instruction(alu);
349 }
350 if (alu)
351 alu->set_flag(alu_last_instr);
352
353 /* Fill the vector with masked values */
354 PValue dst_blank(new GPRValue(tmp_index, 7));
355 for (int j = m_so_info->output[i].num_components; j < 4; j++)
356 tmp[i].set_reg_i(j, dst_blank);
357
358 start_comp[i] = 0;
359 so_gpr[i] = &tmp[i];
360 }
361 sfn_log << SfnLog::instr << *so_gpr[i] << "\n";
362 }
363
364 /* Write outputs to buffers. */
365 for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
366 sfn_log << SfnLog::instr << "Write output buffer " << i
367 << " with register index " << m_so_info->output[i].register_index << "\n";
368
369 StreamOutIntruction *out_stream =
370 new StreamOutIntruction(*so_gpr[i],
371 m_so_info->output[i].num_components,
372 m_so_info->output[i].dst_offset - start_comp[i],
373 ((1 << m_so_info->output[i].num_components) - 1) << start_comp[i],
374 m_so_info->output[i].output_buffer,
375 m_so_info->output[i].stream);
376 m_proc.emit_export_instruction(out_stream);
377 m_enabled_stream_buffers_mask |= (1 << m_so_info->output[i].output_buffer) << m_so_info->output[i].stream * 4;
378 }
379 return true;
380 }
381
382
383 VertexStageExportForGS::VertexStageExportForGS(VertexStage &proc,
384 const r600_shader *gs_shader):
385 VertexStageExportBase(proc),
386 m_gs_shader(gs_shader)
387 {
388
389 }
390
391 bool VertexStageExportForGS::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
392 {
393
394 int ring_offset = -1;
395 const r600_shader_io& out_io = m_proc.sh_info().output[out_var->data.driver_location];
396
397 sfn_log << SfnLog::io << "check output " << out_var->data.driver_location
398 << " name=" << out_io.name<< " sid=" << out_io.sid << "\n";
399 for (unsigned k = 0; k < m_gs_shader->ninput; ++k) {
400 auto& in_io = m_gs_shader->input[k];
401 sfn_log << SfnLog::io << " against " << k << " name=" << in_io.name<< " sid=" << in_io.sid << "\n";
402
403 if (in_io.name == out_io.name &&
404 in_io.sid == out_io.sid) {
405 ring_offset = in_io.ring_offset;
406 break;
407 }
408 }
409
410 if (out_var->data.location == VARYING_SLOT_VIEWPORT)
411 return true;
412
413 if (ring_offset == -1) {
414 sfn_log << SfnLog::err << "VS defines output at "
415 << out_var->data.driver_location << "name=" << out_io.name
416 << " sid=" << out_io.sid << " that is not consumed as GS input\n";
417 return true;
418 }
419
420 uint32_t write_mask = (1 << instr->num_components) - 1;
421
422 std::unique_ptr<GPRVector> value(m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask,
423 swizzle_from_mask(instr->num_components)));
424
425 auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write, *value,
426 ring_offset >> 2, 4, PValue());
427 m_proc.emit_export_instruction(ir);
428
429 m_proc.sh_info().output[out_var->data.driver_location].write_mask |= write_mask;
430 if (out_var->data.location == VARYING_SLOT_CLIP_DIST0 ||
431 out_var->data.location == VARYING_SLOT_CLIP_DIST1)
432 m_num_clip_dist += 4;
433
434 return true;
435 }
436
437 void VertexStageExportForGS::finalize_exports()
438 {
439
440 }
441
442 VertexStageExportForES::VertexStageExportForES(VertexStage& proc):
443 VertexStageExportBase(proc)
444 {
445 }
446
447 bool VertexStageExportForES::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
448 {
449 return true;
450 }
451
452 void VertexStageExportForES::finalize_exports()
453 {
454
455 }
456
457 }