1 #include "sfn_vertexstageexport.h"
3 #include "tgsi/tgsi_from_mesa.h"
7 using std::priority_queue
;
9 VertexStageExportBase::VertexStageExportBase(VertexStage
& proc
):
17 VertexStageExportBase::~VertexStageExportBase()
22 VertexStageExportForFS::VertexStageExportForFS(VertexStage
& proc
,
23 const pipe_stream_output_info
*so_info
,
24 r600_pipe_shader
*pipe_shader
, const r600_shader_key
&key
):
25 VertexStageExportBase(proc
),
26 m_last_param_export(nullptr),
27 m_last_pos_export(nullptr),
29 m_enabled_stream_buffers_mask(0),
31 m_pipe_shader(pipe_shader
),
36 void VertexStageExportBase::setup_paramn_map()
38 priority_queue
<int, std::vector
<int>, std::greater
<int>> q
;
39 for (auto a
: m_param_map
) {
47 m_param_map
[loc
] = next_param
++;
51 bool VertexStageExportBase::do_process_outputs(nir_variable
*output
)
53 if (output
->data
.location
== VARYING_SLOT_COL0
||
54 output
->data
.location
== VARYING_SLOT_COL1
||
55 (output
->data
.location
>= VARYING_SLOT_VAR0
&&
56 output
->data
.location
<= VARYING_SLOT_VAR31
) ||
57 (output
->data
.location
>= VARYING_SLOT_TEX0
&&
58 output
->data
.location
<= VARYING_SLOT_TEX7
) ||
59 output
->data
.location
== VARYING_SLOT_BFC0
||
60 output
->data
.location
== VARYING_SLOT_BFC1
||
61 output
->data
.location
== VARYING_SLOT_CLIP_VERTEX
||
62 output
->data
.location
== VARYING_SLOT_CLIP_DIST0
||
63 output
->data
.location
== VARYING_SLOT_CLIP_DIST1
||
64 output
->data
.location
== VARYING_SLOT_POS
||
65 output
->data
.location
== VARYING_SLOT_PSIZ
||
66 output
->data
.location
== VARYING_SLOT_FOGC
||
67 output
->data
.location
== VARYING_SLOT_LAYER
||
68 output
->data
.location
== VARYING_SLOT_EDGE
||
69 output
->data
.location
== VARYING_SLOT_VIEWPORT
72 r600_shader_io
& io
= m_proc
.sh_info().output
[output
->data
.driver_location
];
73 tgsi_get_gl_varying_semantic(static_cast<gl_varying_slot
>( output
->data
.location
),
74 true, &io
.name
, &io
.sid
);
76 m_proc
.evaluate_spi_sid(io
);
77 io
.write_mask
= ((1 << glsl_get_components(output
->type
)) - 1)
78 << output
->data
.location_frac
;
79 ++m_proc
.sh_info().noutput
;
81 if (output
->data
.location
== VARYING_SLOT_PSIZ
||
82 output
->data
.location
== VARYING_SLOT_EDGE
||
83 output
->data
.location
== VARYING_SLOT_LAYER
)
86 if (output
->data
.location
!= VARYING_SLOT_POS
&&
87 output
->data
.location
!= VARYING_SLOT_EDGE
&&
88 output
->data
.location
!= VARYING_SLOT_PSIZ
&&
89 output
->data
.location
!= VARYING_SLOT_CLIP_VERTEX
)
90 m_param_map
[output
->data
.location
] = m_cur_param
++;
98 bool VertexStageExportForFS::store_deref(const nir_variable
*out_var
, nir_intrinsic_instr
* instr
)
101 switch (out_var
->data
.location
) {
102 case VARYING_SLOT_PSIZ
:
103 m_proc
.sh_info().vs_out_point_size
= 1;
104 m_proc
.sh_info().vs_out_misc_write
= 1;
106 case VARYING_SLOT_POS
:
107 return emit_varying_pos(out_var
, instr
);
108 case VARYING_SLOT_EDGE
: {
109 std::array
<uint32_t, 4> swizzle_override
= {7 ,0, 7, 7};
110 return emit_varying_pos(out_var
, instr
, &swizzle_override
);
112 case VARYING_SLOT_CLIP_VERTEX
:
113 return emit_clip_vertices(out_var
, instr
);
114 case VARYING_SLOT_CLIP_DIST0
:
115 case VARYING_SLOT_CLIP_DIST1
:
116 m_num_clip_dist
+= 4;
117 return emit_varying_param(out_var
, instr
) && emit_varying_pos(out_var
, instr
);
118 case VARYING_SLOT_LAYER
: {
119 m_proc
.sh_info().vs_out_misc_write
= 1;
120 m_proc
.sh_info().vs_out_layer
= 1;
121 std::array
<uint32_t, 4> swz
= {7,7,0,7};
122 return emit_varying_pos(out_var
, instr
, &swz
) &&
123 emit_varying_param(out_var
, instr
);
125 case VARYING_SLOT_VIEW_INDEX
:
126 return emit_varying_pos(out_var
, instr
) &&
127 emit_varying_param(out_var
, instr
);
130 if (out_var
->data
.location
<= VARYING_SLOT_VAR31
||
131 (out_var
->data
.location
>= VARYING_SLOT_TEX0
&&
132 out_var
->data
.location
<= VARYING_SLOT_TEX7
))
133 return emit_varying_param(out_var
, instr
);
136 fprintf(stderr
, "r600-NIR: Unimplemented store_deref for %d\n",
137 out_var
->data
.location
);
141 bool VertexStageExportForFS::emit_varying_pos(const nir_variable
*out_var
, nir_intrinsic_instr
* instr
,
142 std::array
<uint32_t, 4> *swizzle_override
)
144 std::array
<uint32_t,4> swizzle
;
145 uint32_t write_mask
= 0;
147 if (swizzle_override
) {
148 swizzle
= *swizzle_override
;
149 for (int i
= 0; i
< 4; ++i
) {
151 write_mask
|= 1 << i
;
154 write_mask
= nir_intrinsic_write_mask(instr
) << out_var
->data
.location_frac
;
155 for (int i
= 0; i
< 4; ++i
)
156 swizzle
[i
] = ((1 << i
) & write_mask
) ? i
- out_var
->data
.location_frac
: 7;
159 m_proc
.sh_info().output
[out_var
->data
.driver_location
].write_mask
= write_mask
;
161 GPRVector
*value
= m_proc
.vec_from_nir_with_fetch_constant(instr
->src
[1], write_mask
, swizzle
);
162 m_proc
.set_output(out_var
->data
.driver_location
, PValue(value
));
166 switch (out_var
->data
.location
) {
167 case VARYING_SLOT_EDGE
: {
168 m_proc
.sh_info().vs_out_misc_write
= 1;
169 m_proc
.sh_info().vs_out_edgeflag
= 1;
170 m_proc
.emit_instruction(op1_mov
, value
->reg_i(1), {value
->reg_i(1)}, {alu_write
, alu_dst_clamp
, alu_last_instr
});
171 m_proc
.emit_instruction(op1_flt_to_int
, value
->reg_i(1), {value
->reg_i(1)}, {alu_write
, alu_last_instr
});
172 m_proc
.sh_info().output
[out_var
->data
.driver_location
].write_mask
= 0xf;
175 case VARYING_SLOT_PSIZ
:
176 case VARYING_SLOT_LAYER
:
179 case VARYING_SLOT_POS
:
181 case VARYING_SLOT_CLIP_DIST0
:
182 case VARYING_SLOT_CLIP_DIST1
:
183 export_slot
= m_cur_clip_pos
++;
186 sfn_log
<< SfnLog::err
<< __func__
<< "Unsupported location "
187 << out_var
->data
.location
<< "\n";
191 m_last_pos_export
= new ExportInstruction(export_slot
, *value
, ExportInstruction::et_pos
);
192 m_proc
.emit_export_instruction(m_last_pos_export
);
193 m_proc
.add_param_output_reg(out_var
->data
.driver_location
, m_last_pos_export
->gpr_ptr());
197 bool VertexStageExportForFS::emit_varying_param(const nir_variable
*out_var
, nir_intrinsic_instr
* instr
)
199 assert(out_var
->data
.driver_location
< m_proc
.sh_info().noutput
);
200 sfn_log
<< SfnLog::io
<< __func__
<< ": emit DDL: " << out_var
->data
.driver_location
<< "\n";
202 int write_mask
= nir_intrinsic_write_mask(instr
) << out_var
->data
.location_frac
;
203 std::array
<uint32_t,4> swizzle
;
204 for (int i
= 0; i
< 4; ++i
)
205 swizzle
[i
] = ((1 << i
) & write_mask
) ? i
- out_var
->data
.location_frac
: 7;
207 m_proc
.sh_info().output
[out_var
->data
.driver_location
].write_mask
= write_mask
;
209 GPRVector
*value
= m_proc
.vec_from_nir_with_fetch_constant(instr
->src
[1], write_mask
, swizzle
);
210 m_proc
.sh_info().output
[out_var
->data
.driver_location
].gpr
= value
->sel();
212 /* This should use the registers!! */
213 m_proc
.set_output(out_var
->data
.driver_location
, PValue(value
));
215 auto param_loc
= m_param_map
.find(out_var
->data
.location
);
216 assert(param_loc
!= m_param_map
.end());
218 m_last_param_export
= new ExportInstruction(param_loc
->second
, *value
, ExportInstruction::et_param
);
219 m_proc
.emit_export_instruction(m_last_param_export
);
220 m_proc
.add_param_output_reg(out_var
->data
.driver_location
, m_last_param_export
->gpr_ptr());
224 bool VertexStageExportForFS::emit_clip_vertices(const nir_variable
*out_var
, nir_intrinsic_instr
* instr
)
226 m_proc
.sh_info().cc_dist_mask
= 0xff;
227 m_proc
.sh_info().clip_dist_write
= 0xff;
229 std::unique_ptr
<GPRVector
> clip_vertex(m_proc
.vec_from_nir_with_fetch_constant(instr
->src
[1], 0xf, {0,1,2,3}));
231 for (int i
= 0; i
< 4; ++i
)
232 m_proc
.sh_info().output
[out_var
->data
.driver_location
].write_mask
|= 1 << i
;
234 GPRVector clip_dist
[2] = { m_proc
.get_temp_vec4(), m_proc
.get_temp_vec4()};
236 for (int i
= 0; i
< 8; i
++) {
239 AluInstruction
*ir
= nullptr;
240 for (int j
= 0; j
< 4; j
++) {
241 ir
= new AluInstruction(op2_dot4_ieee
, clip_dist
[oreg
].reg_i(j
), clip_vertex
->reg_i(j
),
242 PValue(new UniformValue(512 + i
, j
, R600_BUFFER_INFO_CONST_BUFFER
)),
243 (j
== ochan
) ? EmitInstruction::write
: EmitInstruction::empty
);
244 m_proc
.emit_instruction(ir
);
246 ir
->set_flag(alu_last_instr
);
249 m_last_pos_export
= new ExportInstruction(m_cur_clip_pos
++, clip_dist
[0], ExportInstruction::et_pos
);
250 m_proc
.emit_export_instruction(m_last_pos_export
);
252 m_last_pos_export
= new ExportInstruction(m_cur_clip_pos
, clip_dist
[1], ExportInstruction::et_pos
);
253 m_proc
.emit_export_instruction(m_last_pos_export
);
258 void VertexStageExportForFS::finalize_exports()
260 if (m_key
.vs
.as_gs_a
) {
261 PValue
o(new GPRValue(0,PIPE_SWIZZLE_0
));
262 GPRVector
primid({m_proc
.primitive_id(), o
,o
,o
});
263 m_last_param_export
= new ExportInstruction(m_cur_param
, primid
, ExportInstruction::et_param
);
264 m_proc
.emit_export_instruction(m_last_param_export
);
266 i
= m_proc
.sh_info().noutput
++;
267 auto& io
= m_proc
.sh_info().output
[i
];
268 io
.name
= TGSI_SEMANTIC_PRIMID
;
271 io
.interpolate
= TGSI_INTERPOLATE_CONSTANT
;
273 io
.spi_sid
= m_key
.vs
.prim_id_out
;
274 m_proc
.sh_info().vs_as_gs_a
= 1;
277 if (m_so_info
&& m_so_info
->num_outputs
)
280 m_pipe_shader
->enabled_stream_buffers_mask
= m_enabled_stream_buffers_mask
;
282 if (!m_last_param_export
) {
283 GPRVector
value(0,{7,7,7,7});
284 m_last_param_export
= new ExportInstruction(0, value
, ExportInstruction::et_param
);
285 m_proc
.emit_export_instruction(m_last_param_export
);
287 m_last_param_export
->set_last();
289 if (!m_last_pos_export
) {
290 GPRVector
value(0,{7,7,7,7});
291 m_last_pos_export
= new ExportInstruction(0, value
, ExportInstruction::et_pos
);
292 m_proc
.emit_export_instruction(m_last_pos_export
);
294 m_last_pos_export
->set_last();
297 bool VertexStageExportForFS::emit_stream(int stream
)
300 if (m_so_info
->num_outputs
> PIPE_MAX_SO_OUTPUTS
) {
301 R600_ERR("Too many stream outputs: %d\n", m_so_info
->num_outputs
);
304 for (unsigned i
= 0; i
< m_so_info
->num_outputs
; i
++) {
305 if (m_so_info
->output
[i
].output_buffer
>= 4) {
306 R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
307 m_so_info
->output
[i
].output_buffer
);
311 const GPRVector
*so_gpr
[PIPE_MAX_SHADER_OUTPUTS
];
312 unsigned start_comp
[PIPE_MAX_SHADER_OUTPUTS
];
313 std::vector
<GPRVector
> tmp(m_so_info
->num_outputs
);
315 /* Initialize locations where the outputs are stored. */
316 for (unsigned i
= 0; i
< m_so_info
->num_outputs
; i
++) {
317 if (stream
!= -1 && stream
!= m_so_info
->output
[i
].stream
)
320 sfn_log
<< SfnLog::instr
<< "Emit stream " << i
321 << " with register index " << m_so_info
->output
[i
].register_index
<< " so_gpr:";
324 so_gpr
[i
] = m_proc
.output_register(m_so_info
->output
[i
].register_index
);
327 sfn_log
<< SfnLog::err
<< "\nERR: register index "
328 << m_so_info
->output
[i
].register_index
329 << " doesn't correspond to an output register\n";
332 start_comp
[i
] = m_so_info
->output
[i
].start_component
;
333 /* Lower outputs with dst_offset < start_component.
335 * We can only output 4D vectors with a write mask, e.g. we can
336 * only output the W component at offset 3, etc. If we want
337 * to store Y, Z, or W at buffer offset 0, we need to use MOV
338 * to move it to X and output X. */
339 if (m_so_info
->output
[i
].dst_offset
< m_so_info
->output
[i
].start_component
) {
340 int tmp_index
= m_proc
.allocate_temp_register();
341 int sc
= m_so_info
->output
[i
].start_component
;
342 AluInstruction
*alu
= nullptr;
343 for (int j
= 0; j
< m_so_info
->output
[i
].num_components
; j
++) {
344 PValue
dst(new GPRValue(tmp_index
, j
));
345 alu
= new AluInstruction(op1_mov
, dst
, so_gpr
[i
]->reg_i(j
+ sc
), {alu_write
});
346 tmp
[i
].set_reg_i(j
, dst
);
347 m_proc
.emit_instruction(alu
);
350 alu
->set_flag(alu_last_instr
);
352 /* Fill the vector with masked values */
353 PValue
dst_blank(new GPRValue(tmp_index
, 7));
354 for (int j
= m_so_info
->output
[i
].num_components
; j
< 4; j
++)
355 tmp
[i
].set_reg_i(j
, dst_blank
);
360 sfn_log
<< SfnLog::instr
<< *so_gpr
[i
] << "\n";
363 /* Write outputs to buffers. */
364 for (unsigned i
= 0; i
< m_so_info
->num_outputs
; i
++) {
365 sfn_log
<< SfnLog::instr
<< "Write output buffer " << i
366 << " with register index " << m_so_info
->output
[i
].register_index
<< "\n";
368 StreamOutIntruction
*out_stream
=
369 new StreamOutIntruction(*so_gpr
[i
],
370 m_so_info
->output
[i
].num_components
,
371 m_so_info
->output
[i
].dst_offset
- start_comp
[i
],
372 ((1 << m_so_info
->output
[i
].num_components
) - 1) << start_comp
[i
],
373 m_so_info
->output
[i
].output_buffer
,
374 m_so_info
->output
[i
].stream
);
375 m_proc
.emit_export_instruction(out_stream
);
376 m_enabled_stream_buffers_mask
|= (1 << m_so_info
->output
[i
].output_buffer
) << m_so_info
->output
[i
].stream
* 4;
382 VertexStageExportForGS::VertexStageExportForGS(VertexStage
&proc
,
383 const r600_shader
*gs_shader
):
384 VertexStageExportBase(proc
),
385 m_gs_shader(gs_shader
)
390 bool VertexStageExportForGS::store_deref(const nir_variable
*out_var
, nir_intrinsic_instr
* instr
)
393 int ring_offset
= -1;
394 const r600_shader_io
& out_io
= m_proc
.sh_info().output
[out_var
->data
.driver_location
];
396 sfn_log
<< SfnLog::io
<< "check output " << out_var
->data
.driver_location
397 << " name=" << out_io
.name
<< " sid=" << out_io
.sid
<< "\n";
398 for (unsigned k
= 0; k
< m_gs_shader
->ninput
; ++k
) {
399 auto& in_io
= m_gs_shader
->input
[k
];
400 sfn_log
<< SfnLog::io
<< " against " << k
<< " name=" << in_io
.name
<< " sid=" << in_io
.sid
<< "\n";
402 if (in_io
.name
== out_io
.name
&&
403 in_io
.sid
== out_io
.sid
) {
404 ring_offset
= in_io
.ring_offset
;
409 if (out_var
->data
.location
== VARYING_SLOT_VIEWPORT
)
412 if (ring_offset
== -1) {
413 sfn_log
<< SfnLog::err
<< "VS defines output at "
414 << out_var
->data
.driver_location
<< "name=" << out_io
.name
415 << " sid=" << out_io
.sid
<< " that is not consumed as GS input\n";
419 uint32_t write_mask
= (1 << instr
->num_components
) - 1;
421 std::unique_ptr
<GPRVector
> value(m_proc
.vec_from_nir_with_fetch_constant(instr
->src
[1], write_mask
,
422 swizzle_from_mask(instr
->num_components
)));
424 auto ir
= new MemRingOutIntruction(cf_mem_ring
, mem_write
, *value
,
425 ring_offset
>> 2, 4, PValue());
426 m_proc
.emit_export_instruction(ir
);
428 m_proc
.sh_info().output
[out_var
->data
.driver_location
].write_mask
|= write_mask
;
429 if (out_var
->data
.location
== VARYING_SLOT_CLIP_DIST0
||
430 out_var
->data
.location
== VARYING_SLOT_CLIP_DIST1
)
431 m_num_clip_dist
+= 4;
436 void VertexStageExportForGS::finalize_exports()
441 VertexStageExportForES::VertexStageExportForES(VertexStage
& proc
):
442 VertexStageExportBase(proc
)
446 bool VertexStageExportForES::store_deref(const nir_variable
*out_var
, nir_intrinsic_instr
* instr
)
451 void VertexStageExportForES::finalize_exports()