1 #include "sfn_emitssboinstruction.h"
3 #include "sfn_instruction_fetch.h"
4 #include "sfn_instruction_gds.h"
5 #include "sfn_instruction_misc.h"
6 #include "sfn_instruction_tex.h"
7 #include "../r600_pipe.h"
8 #include "../r600_asm.h"
12 #define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
14 EmitSSBOInstruction::EmitSSBOInstruction(ShaderFromNirProcessor
& processor
):
15 EmitInstruction(processor
),
16 m_require_rat_return_address(false)
21 void EmitSSBOInstruction::set_require_rat_return_address()
23 m_require_rat_return_address
= true;
27 EmitSSBOInstruction::load_rat_return_address()
29 if (m_require_rat_return_address
) {
30 m_rat_return_address
= get_temp_vec4();
31 emit_instruction(new AluInstruction(op1_mbcnt_32lo_accum_prev_int
, m_rat_return_address
.reg_i(0), literal(-1), {alu_write
}));
32 emit_instruction(new AluInstruction(op1_mbcnt_32hi_int
, m_rat_return_address
.reg_i(1), literal(-1), {alu_write
}));
33 emit_instruction(new AluInstruction(op3_muladd_uint24
, m_rat_return_address
.reg_i(2), PValue(new InlineConstValue(ALU_SRC_SE_ID
, 0)),
34 literal(256), PValue(new InlineConstValue(ALU_SRC_HW_WAVE_ID
, 0)), {alu_write
, alu_last_instr
}));
35 emit_instruction(new AluInstruction(op3_muladd_uint24
, m_rat_return_address
.reg_i(1),
36 m_rat_return_address
.reg_i(2), literal(0x40), m_rat_return_address
.reg_i(0),
37 {alu_write
, alu_last_instr
}));
38 m_require_rat_return_address
= false;
44 bool EmitSSBOInstruction::do_emit(nir_instr
* instr
)
46 const nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
47 switch (intr
->intrinsic
) {
48 case nir_intrinsic_atomic_counter_add
:
49 case nir_intrinsic_atomic_counter_and
:
50 case nir_intrinsic_atomic_counter_exchange
:
51 case nir_intrinsic_atomic_counter_max
:
52 case nir_intrinsic_atomic_counter_min
:
53 case nir_intrinsic_atomic_counter_or
:
54 case nir_intrinsic_atomic_counter_xor
:
55 case nir_intrinsic_atomic_counter_comp_swap
:
56 return emit_atomic(intr
);
57 case nir_intrinsic_atomic_counter_read
:
58 case nir_intrinsic_atomic_counter_post_dec
:
59 return emit_unary_atomic(intr
);
60 case nir_intrinsic_atomic_counter_inc
:
61 return emit_atomic_inc(intr
);
62 case nir_intrinsic_atomic_counter_pre_dec
:
63 return emit_atomic_pre_dec(intr
);
64 case nir_intrinsic_load_ssbo
:
65 return emit_load_ssbo(intr
);
66 case nir_intrinsic_store_ssbo
:
67 return emit_store_ssbo(intr
);
68 case nir_intrinsic_ssbo_atomic_add
:
69 return emit_ssbo_atomic_op(intr
);
70 case nir_intrinsic_image_store
:
71 return emit_image_store(intr
);
72 case nir_intrinsic_image_load
:
73 case nir_intrinsic_image_atomic_add
:
74 case nir_intrinsic_image_atomic_and
:
75 case nir_intrinsic_image_atomic_or
:
76 case nir_intrinsic_image_atomic_xor
:
77 case nir_intrinsic_image_atomic_exchange
:
78 case nir_intrinsic_image_atomic_comp_swap
:
79 case nir_intrinsic_image_atomic_umin
:
80 case nir_intrinsic_image_atomic_umax
:
81 case nir_intrinsic_image_atomic_imin
:
82 case nir_intrinsic_image_atomic_imax
:
83 return emit_image_load(intr
);
84 case nir_intrinsic_image_size
:
85 return emit_image_size(intr
);
91 bool EmitSSBOInstruction::emit_atomic(const nir_intrinsic_instr
* instr
)
93 ESDOp op
= get_opcode(instr
->intrinsic
);
95 if (DS_OP_INVALID
== op
)
98 GPRVector dest
= make_dest(instr
);
100 int base
= nir_intrinsic_base(instr
);
102 PValue uav_id
= from_nir(instr
->src
[0], 0);
104 PValue value
= from_nir_with_fetch_constant(instr
->src
[1], 0);
106 GDSInstr
*ir
= nullptr;
107 if (instr
->intrinsic
== nir_intrinsic_atomic_counter_comp_swap
) {
108 PValue value2
= from_nir_with_fetch_constant(instr
->src
[1], 1);
109 ir
= new GDSInstr(op
, dest
, value
, value2
, uav_id
, base
);
111 ir
= new GDSInstr(op
, dest
, value
, uav_id
, base
);
114 emit_instruction(ir
);
118 bool EmitSSBOInstruction::emit_unary_atomic(const nir_intrinsic_instr
* instr
)
120 ESDOp op
= get_opcode(instr
->intrinsic
);
122 if (DS_OP_INVALID
== op
)
125 GPRVector dest
= make_dest(instr
);
127 PValue uav_id
= from_nir(instr
->src
[0], 0);
129 auto ir
= new GDSInstr(op
, dest
, uav_id
, nir_intrinsic_base(instr
));
131 emit_instruction(ir
);
135 ESDOp
EmitSSBOInstruction::get_opcode(const nir_intrinsic_op opcode
)
138 case nir_intrinsic_atomic_counter_add
:
139 return DS_OP_ADD_RET
;
140 case nir_intrinsic_atomic_counter_and
:
141 return DS_OP_AND_RET
;
142 case nir_intrinsic_atomic_counter_exchange
:
143 return DS_OP_XCHG_RET
;
144 case nir_intrinsic_atomic_counter_inc
:
145 return DS_OP_INC_RET
;
146 case nir_intrinsic_atomic_counter_max
:
147 return DS_OP_MAX_UINT_RET
;
148 case nir_intrinsic_atomic_counter_min
:
149 return DS_OP_MIN_UINT_RET
;
150 case nir_intrinsic_atomic_counter_or
:
152 case nir_intrinsic_atomic_counter_read
:
153 return DS_OP_READ_RET
;
154 case nir_intrinsic_atomic_counter_xor
:
155 return DS_OP_XOR_RET
;
156 case nir_intrinsic_atomic_counter_post_dec
:
157 return DS_OP_DEC_RET
;
158 case nir_intrinsic_atomic_counter_comp_swap
:
159 return DS_OP_CMP_XCHG_RET
;
160 case nir_intrinsic_atomic_counter_pre_dec
:
162 return DS_OP_INVALID
;
166 RatInstruction::ERatOp
167 EmitSSBOInstruction::get_rat_opcode(const nir_intrinsic_op opcode
, pipe_format format
) const
170 case nir_intrinsic_ssbo_atomic_add
:
171 case nir_intrinsic_image_atomic_add
:
172 return RatInstruction::ADD_RTN
;
173 case nir_intrinsic_ssbo_atomic_and
:
174 case nir_intrinsic_image_atomic_and
:
175 return RatInstruction::AND_RTN
;
176 case nir_intrinsic_ssbo_atomic_exchange
:
177 case nir_intrinsic_image_atomic_exchange
:
178 return RatInstruction::XCHG_RTN
;
179 case nir_intrinsic_ssbo_atomic_or
:
180 case nir_intrinsic_image_atomic_or
:
181 return RatInstruction::OR_RTN
;
182 case nir_intrinsic_ssbo_atomic_imin
:
183 case nir_intrinsic_image_atomic_imin
:
184 return RatInstruction::MIN_INT_RTN
;
185 case nir_intrinsic_ssbo_atomic_imax
:
186 case nir_intrinsic_image_atomic_imax
:
187 return RatInstruction::MAX_INT_RTN
;
188 case nir_intrinsic_ssbo_atomic_umin
:
189 case nir_intrinsic_image_atomic_umin
:
190 return RatInstruction::MIN_UINT_RTN
;
191 case nir_intrinsic_ssbo_atomic_umax
:
192 case nir_intrinsic_image_atomic_umax
:
193 return RatInstruction::MAX_UINT_RTN
;
194 case nir_intrinsic_image_atomic_xor
:
195 return RatInstruction::XOR_RTN
;
196 case nir_intrinsic_image_atomic_comp_swap
:
197 if (util_format_is_float(format
))
198 return RatInstruction::CMPXCHG_FLT_RTN
;
200 return RatInstruction::CMPXCHG_INT_RTN
;
201 case nir_intrinsic_image_load
:
202 return RatInstruction::NOP_RTN
;
204 unreachable("Unsupported RAT instruction");
209 bool EmitSSBOInstruction::emit_atomic_add(const nir_intrinsic_instr
* instr
)
211 GPRVector dest
= make_dest(instr
);
213 PValue value
= from_nir_with_fetch_constant(instr
->src
[1], 0);
215 PValue uav_id
= from_nir(instr
->src
[0], 0);
217 auto ir
= new GDSInstr(DS_OP_ADD_RET
, dest
, value
, uav_id
,
218 nir_intrinsic_base(instr
));
220 emit_instruction(ir
);
224 bool EmitSSBOInstruction::load_atomic_inc_limits()
226 m_atomic_update
= get_temp_register();
227 emit_instruction(new AluInstruction(op1_mov
, m_atomic_update
, literal(1),
228 {alu_write
, alu_last_instr
}));
232 bool EmitSSBOInstruction::emit_atomic_inc(const nir_intrinsic_instr
* instr
)
234 PValue uav_id
= from_nir(instr
->src
[0], 0);
235 GPRVector dest
= make_dest(instr
);
236 auto ir
= new GDSInstr(DS_OP_ADD_RET
, dest
, m_atomic_update
, uav_id
,
237 nir_intrinsic_base(instr
));
238 emit_instruction(ir
);
242 bool EmitSSBOInstruction::emit_atomic_pre_dec(const nir_intrinsic_instr
*instr
)
244 GPRVector dest
= make_dest(instr
);
246 PValue uav_id
= from_nir(instr
->src
[0], 0);
248 auto ir
= new GDSInstr(DS_OP_SUB_RET
, dest
, m_atomic_update
, uav_id
,
249 nir_intrinsic_base(instr
));
250 emit_instruction(ir
);
255 bool EmitSSBOInstruction::emit_load_ssbo(const nir_intrinsic_instr
* instr
)
257 GPRVector dest
= make_dest(instr
);
259 /** src0 not used, should be some offset */
260 auto addr
= from_nir_with_fetch_constant(instr
->src
[1], 0);
261 PValue addr_temp
= create_register_from_nir_src(instr
->src
[1], 1);
263 /** Should be lowered in nir */
264 emit_instruction(new AluInstruction(op2_lshr_int
, addr_temp
, {addr
, PValue(new LiteralValue(2))},
265 {alu_write
, alu_last_instr
}));
267 const EVTXDataFormat formats
[4] = {
274 const std::array
<int,4> dest_swt
[4] = {
281 /* TODO fix resource index */
282 auto ir
= new FetchInstruction(dest
, addr_temp
,
283 R600_IMAGE_REAL_RESOURCE_OFFSET
, from_nir(instr
->src
[0], 0),
284 formats
[nir_dest_num_components(instr
->dest
) - 1], vtx_nf_int
);
285 ir
->set_dest_swizzle(dest_swt
[nir_dest_num_components(instr
->dest
) - 1]);
286 ir
->set_flag(vtx_use_tc
);
288 emit_instruction(ir
);
292 bool EmitSSBOInstruction::emit_store_ssbo(const nir_intrinsic_instr
* instr
)
295 GPRVector::Swizzle swz
= {7,7,7,7};
296 for (unsigned i
= 0; i
< nir_src_num_components(instr
->src
[0]); ++i
)
299 auto orig_addr
= from_nir(instr
->src
[2], 0);
301 int temp1
= allocate_temp_register();
302 GPRVector
addr_vec(temp1
, {0,1,2,7});
304 auto rat_id
= from_nir(instr
->src
[1], 0);
306 emit_instruction(new AluInstruction(op2_lshr_int
, addr_vec
.reg_i(0), orig_addr
,
307 PValue(new LiteralValue(2)), write
));
308 emit_instruction(new AluInstruction(op1_mov
, addr_vec
.reg_i(1), Value::zero
, write
));
309 emit_instruction(new AluInstruction(op1_mov
, addr_vec
.reg_i(2), Value::zero
, last_write
));
312 //#define WRITE_AS_VECTOR
313 #ifdef WRITE_AS_VECTOR
314 std::unique_ptr
<GPRVector
> value(vec_from_nir_with_fetch_constant(instr
->src
[0],
315 (1 << instr
->src
[0].ssa
->num_components
) - 1, swz
));
317 /* TODO fix resource index */
318 int nelements
= instr
->src
[0].ssa
->num_components
- 1;
321 auto ir
= new RatInstruction(cf_mem_rat
, RatInstruction::STORE_TYPED
,
322 *value
, addr_vec
, 0, rat_id
, 11,
323 (1 << instr
->src
[0].ssa
->num_components
) - 1,
325 emit_instruction(ir
);
328 auto values
= vec_from_nir_with_fetch_constant(instr
->src
[0],
329 (1 << nir_src_num_components(instr
->src
[0])) - 1, {0,1,2,3}, true);
331 emit_instruction(new RatInstruction(cf_mem_rat
, RatInstruction::STORE_TYPED
,
332 values
, addr_vec
, 0, rat_id
, 1,
334 for (unsigned i
= 1; i
< nir_src_num_components(instr
->src
[0]); ++i
) {
335 emit_instruction(new AluInstruction(op1_mov
, values
.reg_i(0), from_nir(instr
->src
[0], i
), write
));
336 emit_instruction(new AluInstruction(op2_add_int
, addr_vec
.reg_i(0),
337 {addr_vec
.reg_i(0), Value::one_i
}, last_write
));
338 emit_instruction(new RatInstruction(cf_mem_rat
, RatInstruction::STORE_TYPED
,
339 values
, addr_vec
, 0, rat_id
, 1,
347 EmitSSBOInstruction::emit_image_store(const nir_intrinsic_instr
*intrin
)
352 if (nir_src_is_const(intrin
->src
[0]))
353 imageid
= nir_src_as_int(intrin
->src
[0]);
355 image_offset
= from_nir(intrin
->src
[0], 0);
357 auto coord
= vec_from_nir_with_fetch_constant(intrin
->src
[1], 0xf, {0,1,2,3});
358 auto undef
= from_nir(intrin
->src
[2], 0);
359 auto value
= vec_from_nir_with_fetch_constant(intrin
->src
[3], 0xf, {0,1,2,3});
360 auto unknown
= from_nir(intrin
->src
[4], 0);
362 if (nir_intrinsic_image_dim(intrin
) == GLSL_SAMPLER_DIM_1D
&&
363 nir_intrinsic_image_array(intrin
)) {
364 emit_instruction(new AluInstruction(op1_mov
, coord
.reg_i(2), coord
.reg_i(1), {alu_write
}));
365 emit_instruction(new AluInstruction(op1_mov
, coord
.reg_i(1), coord
.reg_i(2), {alu_last_instr
, alu_write
}));
368 auto store
= new RatInstruction(cf_mem_rat
, RatInstruction::STORE_TYPED
, value
, coord
, imageid
,
369 image_offset
, 1, 0xf, 0, false);
370 emit_instruction(store
);
375 EmitSSBOInstruction::emit_ssbo_atomic_op(const nir_intrinsic_instr
*intrin
)
380 if (nir_src_is_const(intrin
->src
[0]))
381 imageid
= nir_src_as_int(intrin
->src
[0]);
383 image_offset
= from_nir(intrin
->src
[0], 0);
385 auto opcode
= EmitSSBOInstruction::get_rat_opcode(intrin
->intrinsic
, PIPE_FORMAT_R32_UINT
);
387 auto coord
= from_nir_with_fetch_constant(intrin
->src
[1], 0);
389 emit_instruction(new AluInstruction(op1_mov
, m_rat_return_address
.reg_i(0), from_nir(intrin
->src
[2], 0), write
));
390 emit_instruction(new AluInstruction(op1_mov
, m_rat_return_address
.reg_i(2), Value::zero
, last_write
));
392 GPRVector
out_vec({coord
, coord
, coord
, coord
});
394 auto atomic
= new RatInstruction(cf_mem_rat
, opcode
, m_rat_return_address
, out_vec
, imageid
,
395 image_offset
, 1, 0xf, 0, true);
396 emit_instruction(atomic
);
397 emit_instruction(new WaitAck(0));
399 GPRVector dest
= vec_from_nir(intrin
->dest
, intrin
->dest
.ssa
.num_components
);
400 auto fetch
= new FetchInstruction(vc_fetch
,
405 m_rat_return_address
.reg_i(1),
410 R600_IMAGE_IMMED_RESOURCE_OFFSET
,
420 fetch
->set_flag(vtx_srf_mode
);
421 fetch
->set_flag(vtx_use_tc
);
422 emit_instruction(fetch
);
428 EmitSSBOInstruction::emit_image_load(const nir_intrinsic_instr
*intrin
)
433 if (nir_src_is_const(intrin
->src
[0]))
434 imageid
= nir_src_as_int(intrin
->src
[0]);
436 image_offset
= from_nir(intrin
->src
[0], 0);
438 auto rat_op
= get_rat_opcode(intrin
->intrinsic
, nir_intrinsic_format(intrin
));
440 GPRVector::Swizzle swz
= {0,1,2,3};
441 auto coord
= vec_from_nir_with_fetch_constant(intrin
->src
[1], 0xf, swz
);
443 if (nir_intrinsic_image_dim(intrin
) == GLSL_SAMPLER_DIM_1D
&&
444 nir_intrinsic_image_array(intrin
)) {
445 emit_instruction(new AluInstruction(op1_mov
, coord
.reg_i(2), coord
.reg_i(1), {alu_write
}));
446 emit_instruction(new AluInstruction(op1_mov
, coord
.reg_i(1), coord
.reg_i(2), {alu_last_instr
, alu_write
}));
449 if (intrin
->intrinsic
!= nir_intrinsic_image_load
) {
450 if (intrin
->intrinsic
== nir_intrinsic_image_atomic_comp_swap
) {
451 emit_instruction(new AluInstruction(op1_mov
, m_rat_return_address
.reg_i(0),
452 from_nir(intrin
->src
[4], 0), {alu_write
}));
453 emit_instruction(new AluInstruction(op1_mov
, m_rat_return_address
.reg_i(3),
454 from_nir(intrin
->src
[3], 0), {alu_last_instr
, alu_write
}));
456 emit_instruction(new AluInstruction(op1_mov
, m_rat_return_address
.reg_i(0),
457 from_nir(intrin
->src
[3], 0), {alu_last_instr
, alu_write
}));
461 auto store
= new RatInstruction(cf_mem_rat
, rat_op
, m_rat_return_address
, coord
, imageid
,
462 image_offset
, 1, 0xf, 0, true);
463 emit_instruction(store
);
464 return fetch_return_value(intrin
);
467 bool EmitSSBOInstruction::fetch_return_value(const nir_intrinsic_instr
*intrin
)
469 emit_instruction(new WaitAck(0));
471 pipe_format format
= nir_intrinsic_format(intrin
);
472 unsigned fmt
= fmt_32
;
473 unsigned num_format
= 0;
474 unsigned format_comp
= 0;
477 r600_vertex_data_type(format
, &fmt
, &num_format
, &format_comp
, &endian
);
479 GPRVector dest
= vec_from_nir(intrin
->dest
, nir_dest_num_components(intrin
->dest
));
480 auto fetch
= new FetchInstruction(vc_fetch
,
483 (EVFetchNumFormat
)num_format
,
484 (EVFetchEndianSwap
)endian
,
485 m_rat_return_address
.reg_i(1),
490 R600_IMAGE_IMMED_RESOURCE_OFFSET
,
500 fetch
->set_flag(vtx_srf_mode
);
501 fetch
->set_flag(vtx_use_tc
);
503 fetch
->set_flag(vtx_format_comp_signed
);
505 emit_instruction(fetch
);
509 bool EmitSSBOInstruction::emit_image_size(const nir_intrinsic_instr
*intrin
)
511 GPRVector dest
= vec_from_nir(intrin
->dest
, nir_dest_num_components(intrin
->dest
));
512 GPRVector src
{0,{4,4,4,4}};
514 auto const_offset
= nir_src_as_const_value(intrin
->src
[0]);
515 auto dyn_offset
= PValue();
516 int res_id
= R600_IMAGE_REAL_RESOURCE_OFFSET
;
518 res_id
+= const_offset
[0].u32
;
520 dyn_offset
= from_nir(intrin
->src
[0], 0);
522 if (nir_intrinsic_image_dim(intrin
) == GLSL_SAMPLER_DIM_BUF
) {
523 emit_instruction(new FetchInstruction(dest
, PValue(new GPRValue(0, 7)),
528 emit_instruction(new TexInstruction(TexInstruction::get_resinfo
, dest
, src
,
530 res_id
, dyn_offset
));
531 if (nir_intrinsic_image_dim(intrin
) == GLSL_SAMPLER_DIM_CUBE
&&
532 nir_intrinsic_image_array(intrin
) && nir_dest_num_components(intrin
->dest
) > 2) {
533 /* Need to load the layers from a const buffer */
535 unsigned lookup_resid
= const_offset
[0].u32
;
536 emit_instruction(new AluInstruction(op1_mov
, dest
.reg_i(2),
537 PValue(new UniformValue(lookup_resid
/4 + R600_SHADER_BUFFER_INFO_SEL
, lookup_resid
% 4,
538 R600_BUFFER_INFO_CONST_BUFFER
)),
539 EmitInstruction::last_write
));
545 GPRVector
EmitSSBOInstruction::make_dest(const nir_intrinsic_instr
* ir
)
549 for (i
= 0; i
< 4; ++i
)
550 v
[i
] = from_nir(ir
->dest
, i
);