1 #include "sfn_vertexstageexport.h"
3 #include "sfn_shaderio.h"
7 using std::priority_queue
;
9 VertexStageExportBase::VertexStageExportBase(VertexStage
& proc
):
17 VertexStageExportBase::~VertexStageExportBase()
22 VertexStageExportForFS::VertexStageExportForFS(VertexStage
& proc
,
23 const pipe_stream_output_info
*so_info
,
24 r600_pipe_shader
*pipe_shader
, const r600_shader_key
&key
):
25 VertexStageExportBase(proc
),
26 m_last_param_export(nullptr),
27 m_last_pos_export(nullptr),
29 m_enabled_stream_buffers_mask(0),
31 m_pipe_shader(pipe_shader
),
36 void VertexStageExportBase::setup_paramn_map()
38 priority_queue
<int, std::vector
<int>, std::greater
<int>> q
;
39 for (auto a
: m_param_map
) {
47 m_param_map
[loc
] = next_param
++;
51 bool VertexStageExportBase::do_process_outputs(nir_variable
*output
)
53 if (output
->data
.location
== VARYING_SLOT_COL0
||
54 output
->data
.location
== VARYING_SLOT_COL1
||
55 (output
->data
.location
>= VARYING_SLOT_VAR0
&&
56 output
->data
.location
<= VARYING_SLOT_VAR31
) ||
57 (output
->data
.location
>= VARYING_SLOT_TEX0
&&
58 output
->data
.location
<= VARYING_SLOT_TEX7
) ||
59 output
->data
.location
== VARYING_SLOT_BFC0
||
60 output
->data
.location
== VARYING_SLOT_BFC1
||
61 output
->data
.location
== VARYING_SLOT_CLIP_VERTEX
||
62 output
->data
.location
== VARYING_SLOT_CLIP_DIST0
||
63 output
->data
.location
== VARYING_SLOT_CLIP_DIST1
||
64 output
->data
.location
== VARYING_SLOT_POS
||
65 output
->data
.location
== VARYING_SLOT_PSIZ
||
66 output
->data
.location
== VARYING_SLOT_FOGC
||
67 output
->data
.location
== VARYING_SLOT_LAYER
||
68 output
->data
.location
== VARYING_SLOT_EDGE
||
69 output
->data
.location
== VARYING_SLOT_VIEWPORT
72 r600_shader_io
& io
= m_proc
.sh_info().output
[output
->data
.driver_location
];
73 auto semantic
= r600_get_varying_semantic(output
->data
.location
);
74 io
.name
= semantic
.first
;
75 io
.sid
= semantic
.second
;
77 m_proc
.evaluate_spi_sid(io
);
78 io
.write_mask
= ((1 << glsl_get_components(output
->type
)) - 1)
79 << output
->data
.location_frac
;
80 ++m_proc
.sh_info().noutput
;
82 if (output
->data
.location
== VARYING_SLOT_PSIZ
||
83 output
->data
.location
== VARYING_SLOT_EDGE
||
84 output
->data
.location
== VARYING_SLOT_LAYER
)
87 if (output
->data
.location
!= VARYING_SLOT_POS
&&
88 output
->data
.location
!= VARYING_SLOT_EDGE
&&
89 output
->data
.location
!= VARYING_SLOT_PSIZ
&&
90 output
->data
.location
!= VARYING_SLOT_CLIP_VERTEX
)
91 m_param_map
[output
->data
.location
] = m_cur_param
++;
99 bool VertexStageExportForFS::store_deref(const nir_variable
*out_var
, nir_intrinsic_instr
* instr
)
102 switch (out_var
->data
.location
) {
103 case VARYING_SLOT_PSIZ
:
104 m_proc
.sh_info().vs_out_point_size
= 1;
105 m_proc
.sh_info().vs_out_misc_write
= 1;
107 case VARYING_SLOT_POS
:
108 return emit_varying_pos(out_var
, instr
);
109 case VARYING_SLOT_EDGE
: {
110 std::array
<uint32_t, 4> swizzle_override
= {7 ,0, 7, 7};
111 return emit_varying_pos(out_var
, instr
, &swizzle_override
);
113 case VARYING_SLOT_CLIP_VERTEX
:
114 return emit_clip_vertices(out_var
, instr
);
115 case VARYING_SLOT_CLIP_DIST0
:
116 case VARYING_SLOT_CLIP_DIST1
:
117 m_num_clip_dist
+= 4;
118 return emit_varying_param(out_var
, instr
) && emit_varying_pos(out_var
, instr
);
119 case VARYING_SLOT_LAYER
: {
120 m_proc
.sh_info().vs_out_misc_write
= 1;
121 m_proc
.sh_info().vs_out_layer
= 1;
122 std::array
<uint32_t, 4> swz
= {7,7,0,7};
123 return emit_varying_pos(out_var
, instr
, &swz
) &&
124 emit_varying_param(out_var
, instr
);
126 case VARYING_SLOT_VIEW_INDEX
:
127 return emit_varying_pos(out_var
, instr
) &&
128 emit_varying_param(out_var
, instr
);
131 if (out_var
->data
.location
<= VARYING_SLOT_VAR31
||
132 (out_var
->data
.location
>= VARYING_SLOT_TEX0
&&
133 out_var
->data
.location
<= VARYING_SLOT_TEX7
))
134 return emit_varying_param(out_var
, instr
);
137 fprintf(stderr
, "r600-NIR: Unimplemented store_deref for %d\n",
138 out_var
->data
.location
);
142 bool VertexStageExportForFS::emit_varying_pos(const nir_variable
*out_var
, nir_intrinsic_instr
* instr
,
143 std::array
<uint32_t, 4> *swizzle_override
)
145 std::array
<uint32_t,4> swizzle
;
146 uint32_t write_mask
= 0;
148 if (swizzle_override
) {
149 swizzle
= *swizzle_override
;
150 for (int i
= 0; i
< 4; ++i
) {
152 write_mask
|= 1 << i
;
155 write_mask
= nir_intrinsic_write_mask(instr
) << out_var
->data
.location_frac
;
156 for (int i
= 0; i
< 4; ++i
)
157 swizzle
[i
] = ((1 << i
) & write_mask
) ? i
- out_var
->data
.location_frac
: 7;
160 m_proc
.sh_info().output
[out_var
->data
.driver_location
].write_mask
= write_mask
;
162 GPRVector
*value
= m_proc
.vec_from_nir_with_fetch_constant(instr
->src
[1], write_mask
, swizzle
);
163 m_proc
.set_output(out_var
->data
.driver_location
, PValue(value
));
167 switch (out_var
->data
.location
) {
168 case VARYING_SLOT_EDGE
: {
169 m_proc
.sh_info().vs_out_misc_write
= 1;
170 m_proc
.sh_info().vs_out_edgeflag
= 1;
171 m_proc
.emit_instruction(op1_mov
, value
->reg_i(1), {value
->reg_i(1)}, {alu_write
, alu_dst_clamp
, alu_last_instr
});
172 m_proc
.emit_instruction(op1_flt_to_int
, value
->reg_i(1), {value
->reg_i(1)}, {alu_write
, alu_last_instr
});
173 m_proc
.sh_info().output
[out_var
->data
.driver_location
].write_mask
= 0xf;
176 case VARYING_SLOT_PSIZ
:
177 case VARYING_SLOT_LAYER
:
180 case VARYING_SLOT_POS
:
182 case VARYING_SLOT_CLIP_DIST0
:
183 case VARYING_SLOT_CLIP_DIST1
:
184 export_slot
= m_cur_clip_pos
++;
187 sfn_log
<< SfnLog::err
<< __func__
<< "Unsupported location "
188 << out_var
->data
.location
<< "\n";
192 m_last_pos_export
= new ExportInstruction(export_slot
, *value
, ExportInstruction::et_pos
);
193 m_proc
.emit_export_instruction(m_last_pos_export
);
194 m_proc
.add_param_output_reg(out_var
->data
.driver_location
, m_last_pos_export
->gpr_ptr());
198 bool VertexStageExportForFS::emit_varying_param(const nir_variable
*out_var
, nir_intrinsic_instr
* instr
)
200 assert(out_var
->data
.driver_location
< m_proc
.sh_info().noutput
);
201 sfn_log
<< SfnLog::io
<< __func__
<< ": emit DDL: " << out_var
->data
.driver_location
<< "\n";
203 int write_mask
= nir_intrinsic_write_mask(instr
) << out_var
->data
.location_frac
;
204 std::array
<uint32_t,4> swizzle
;
205 for (int i
= 0; i
< 4; ++i
)
206 swizzle
[i
] = ((1 << i
) & write_mask
) ? i
- out_var
->data
.location_frac
: 7;
208 m_proc
.sh_info().output
[out_var
->data
.driver_location
].write_mask
= write_mask
;
210 GPRVector
*value
= m_proc
.vec_from_nir_with_fetch_constant(instr
->src
[1], write_mask
, swizzle
);
211 m_proc
.sh_info().output
[out_var
->data
.driver_location
].gpr
= value
->sel();
213 /* This should use the registers!! */
214 m_proc
.set_output(out_var
->data
.driver_location
, PValue(value
));
216 auto param_loc
= m_param_map
.find(out_var
->data
.location
);
217 assert(param_loc
!= m_param_map
.end());
219 m_last_param_export
= new ExportInstruction(param_loc
->second
, *value
, ExportInstruction::et_param
);
220 m_proc
.emit_export_instruction(m_last_param_export
);
221 m_proc
.add_param_output_reg(out_var
->data
.driver_location
, m_last_param_export
->gpr_ptr());
225 bool VertexStageExportForFS::emit_clip_vertices(const nir_variable
*out_var
, nir_intrinsic_instr
* instr
)
227 m_proc
.sh_info().cc_dist_mask
= 0xff;
228 m_proc
.sh_info().clip_dist_write
= 0xff;
230 std::unique_ptr
<GPRVector
> clip_vertex(m_proc
.vec_from_nir_with_fetch_constant(instr
->src
[1], 0xf, {0,1,2,3}));
232 for (int i
= 0; i
< 4; ++i
)
233 m_proc
.sh_info().output
[out_var
->data
.driver_location
].write_mask
|= 1 << i
;
235 GPRVector clip_dist
[2] = { m_proc
.get_temp_vec4(), m_proc
.get_temp_vec4()};
237 for (int i
= 0; i
< 8; i
++) {
240 AluInstruction
*ir
= nullptr;
241 for (int j
= 0; j
< 4; j
++) {
242 ir
= new AluInstruction(op2_dot4_ieee
, clip_dist
[oreg
].reg_i(j
), clip_vertex
->reg_i(j
),
243 PValue(new UniformValue(512 + i
, j
, R600_BUFFER_INFO_CONST_BUFFER
)),
244 (j
== ochan
) ? EmitInstruction::write
: EmitInstruction::empty
);
245 m_proc
.emit_instruction(ir
);
247 ir
->set_flag(alu_last_instr
);
250 m_last_pos_export
= new ExportInstruction(m_cur_clip_pos
++, clip_dist
[0], ExportInstruction::et_pos
);
251 m_proc
.emit_export_instruction(m_last_pos_export
);
253 m_last_pos_export
= new ExportInstruction(m_cur_clip_pos
, clip_dist
[1], ExportInstruction::et_pos
);
254 m_proc
.emit_export_instruction(m_last_pos_export
);
259 void VertexStageExportForFS::finalize_exports()
261 if (m_key
.vs
.as_gs_a
) {
262 PValue
o(new GPRValue(0,PIPE_SWIZZLE_0
));
263 GPRVector
primid({m_proc
.primitive_id(), o
,o
,o
});
264 m_last_param_export
= new ExportInstruction(m_cur_param
, primid
, ExportInstruction::et_param
);
265 m_proc
.emit_export_instruction(m_last_param_export
);
267 i
= m_proc
.sh_info().noutput
++;
268 auto& io
= m_proc
.sh_info().output
[i
];
269 io
.name
= TGSI_SEMANTIC_PRIMID
;
272 io
.interpolate
= TGSI_INTERPOLATE_CONSTANT
;
274 io
.spi_sid
= m_key
.vs
.prim_id_out
;
275 m_proc
.sh_info().vs_as_gs_a
= 1;
278 if (m_so_info
&& m_so_info
->num_outputs
)
281 m_pipe_shader
->enabled_stream_buffers_mask
= m_enabled_stream_buffers_mask
;
283 if (!m_last_param_export
) {
284 GPRVector
value(0,{7,7,7,7});
285 m_last_param_export
= new ExportInstruction(0, value
, ExportInstruction::et_param
);
286 m_proc
.emit_export_instruction(m_last_param_export
);
288 m_last_param_export
->set_last();
290 if (!m_last_pos_export
) {
291 GPRVector
value(0,{7,7,7,7});
292 m_last_pos_export
= new ExportInstruction(0, value
, ExportInstruction::et_pos
);
293 m_proc
.emit_export_instruction(m_last_pos_export
);
295 m_last_pos_export
->set_last();
298 bool VertexStageExportForFS::emit_stream(int stream
)
301 if (m_so_info
->num_outputs
> PIPE_MAX_SO_OUTPUTS
) {
302 R600_ERR("Too many stream outputs: %d\n", m_so_info
->num_outputs
);
305 for (unsigned i
= 0; i
< m_so_info
->num_outputs
; i
++) {
306 if (m_so_info
->output
[i
].output_buffer
>= 4) {
307 R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
308 m_so_info
->output
[i
].output_buffer
);
312 const GPRVector
*so_gpr
[PIPE_MAX_SHADER_OUTPUTS
];
313 unsigned start_comp
[PIPE_MAX_SHADER_OUTPUTS
];
314 std::vector
<GPRVector
> tmp(m_so_info
->num_outputs
);
316 /* Initialize locations where the outputs are stored. */
317 for (unsigned i
= 0; i
< m_so_info
->num_outputs
; i
++) {
318 if (stream
!= -1 && stream
!= m_so_info
->output
[i
].stream
)
321 sfn_log
<< SfnLog::instr
<< "Emit stream " << i
322 << " with register index " << m_so_info
->output
[i
].register_index
<< " so_gpr:";
325 so_gpr
[i
] = m_proc
.output_register(m_so_info
->output
[i
].register_index
);
328 sfn_log
<< SfnLog::err
<< "\nERR: register index "
329 << m_so_info
->output
[i
].register_index
330 << " doesn't correspond to an output register\n";
333 start_comp
[i
] = m_so_info
->output
[i
].start_component
;
334 /* Lower outputs with dst_offset < start_component.
336 * We can only output 4D vectors with a write mask, e.g. we can
337 * only output the W component at offset 3, etc. If we want
338 * to store Y, Z, or W at buffer offset 0, we need to use MOV
339 * to move it to X and output X. */
340 if (m_so_info
->output
[i
].dst_offset
< m_so_info
->output
[i
].start_component
) {
341 int tmp_index
= m_proc
.allocate_temp_register();
342 int sc
= m_so_info
->output
[i
].start_component
;
343 AluInstruction
*alu
= nullptr;
344 for (int j
= 0; j
< m_so_info
->output
[i
].num_components
; j
++) {
345 PValue
dst(new GPRValue(tmp_index
, j
));
346 alu
= new AluInstruction(op1_mov
, dst
, so_gpr
[i
]->reg_i(j
+ sc
), {alu_write
});
347 tmp
[i
].set_reg_i(j
, dst
);
348 m_proc
.emit_instruction(alu
);
351 alu
->set_flag(alu_last_instr
);
353 /* Fill the vector with masked values */
354 PValue
dst_blank(new GPRValue(tmp_index
, 7));
355 for (int j
= m_so_info
->output
[i
].num_components
; j
< 4; j
++)
356 tmp
[i
].set_reg_i(j
, dst_blank
);
361 sfn_log
<< SfnLog::instr
<< *so_gpr
[i
] << "\n";
364 /* Write outputs to buffers. */
365 for (unsigned i
= 0; i
< m_so_info
->num_outputs
; i
++) {
366 sfn_log
<< SfnLog::instr
<< "Write output buffer " << i
367 << " with register index " << m_so_info
->output
[i
].register_index
<< "\n";
369 StreamOutIntruction
*out_stream
=
370 new StreamOutIntruction(*so_gpr
[i
],
371 m_so_info
->output
[i
].num_components
,
372 m_so_info
->output
[i
].dst_offset
- start_comp
[i
],
373 ((1 << m_so_info
->output
[i
].num_components
) - 1) << start_comp
[i
],
374 m_so_info
->output
[i
].output_buffer
,
375 m_so_info
->output
[i
].stream
);
376 m_proc
.emit_export_instruction(out_stream
);
377 m_enabled_stream_buffers_mask
|= (1 << m_so_info
->output
[i
].output_buffer
) << m_so_info
->output
[i
].stream
* 4;
383 VertexStageExportForGS::VertexStageExportForGS(VertexStage
&proc
,
384 const r600_shader
*gs_shader
):
385 VertexStageExportBase(proc
),
386 m_gs_shader(gs_shader
)
391 bool VertexStageExportForGS::store_deref(const nir_variable
*out_var
, nir_intrinsic_instr
* instr
)
394 int ring_offset
= -1;
395 const r600_shader_io
& out_io
= m_proc
.sh_info().output
[out_var
->data
.driver_location
];
397 sfn_log
<< SfnLog::io
<< "check output " << out_var
->data
.driver_location
398 << " name=" << out_io
.name
<< " sid=" << out_io
.sid
<< "\n";
399 for (unsigned k
= 0; k
< m_gs_shader
->ninput
; ++k
) {
400 auto& in_io
= m_gs_shader
->input
[k
];
401 sfn_log
<< SfnLog::io
<< " against " << k
<< " name=" << in_io
.name
<< " sid=" << in_io
.sid
<< "\n";
403 if (in_io
.name
== out_io
.name
&&
404 in_io
.sid
== out_io
.sid
) {
405 ring_offset
= in_io
.ring_offset
;
410 if (out_var
->data
.location
== VARYING_SLOT_VIEWPORT
)
413 if (ring_offset
== -1) {
414 sfn_log
<< SfnLog::err
<< "VS defines output at "
415 << out_var
->data
.driver_location
<< "name=" << out_io
.name
416 << " sid=" << out_io
.sid
<< " that is not consumed as GS input\n";
420 uint32_t write_mask
= (1 << instr
->num_components
) - 1;
422 std::unique_ptr
<GPRVector
> value(m_proc
.vec_from_nir_with_fetch_constant(instr
->src
[1], write_mask
,
423 swizzle_from_mask(instr
->num_components
)));
425 auto ir
= new MemRingOutIntruction(cf_mem_ring
, mem_write
, *value
,
426 ring_offset
>> 2, 4, PValue());
427 m_proc
.emit_export_instruction(ir
);
429 m_proc
.sh_info().output
[out_var
->data
.driver_location
].write_mask
|= write_mask
;
430 if (out_var
->data
.location
== VARYING_SLOT_CLIP_DIST0
||
431 out_var
->data
.location
== VARYING_SLOT_CLIP_DIST1
)
432 m_num_clip_dist
+= 4;
437 void VertexStageExportForGS::finalize_exports()
442 VertexStageExportForES::VertexStageExportForES(VertexStage
& proc
):
443 VertexStageExportBase(proc
)
447 bool VertexStageExportForES::store_deref(const nir_variable
*out_var
, nir_intrinsic_instr
* instr
)
452 void VertexStageExportForES::finalize_exports()