r600/sfn: Add support for reading cube image array dim.
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_emitssboinstruction.cpp
1 #include "sfn_emitssboinstruction.h"
2
3 #include "sfn_instruction_fetch.h"
4 #include "sfn_instruction_gds.h"
5 #include "sfn_instruction_misc.h"
6 #include "sfn_instruction_tex.h"
7 #include "../r600_pipe.h"
8 #include "../r600_asm.h"
9
10 namespace r600 {
11
12 #define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
13
14 EmitSSBOInstruction::EmitSSBOInstruction(ShaderFromNirProcessor& processor):
15 EmitInstruction(processor),
16 m_require_rat_return_address(false)
17 {
18 }
19
20
21 void EmitSSBOInstruction::set_require_rat_return_address()
22 {
23 m_require_rat_return_address = true;
24 }
25
26 bool
27 EmitSSBOInstruction::load_rat_return_address()
28 {
29 if (m_require_rat_return_address) {
30 m_rat_return_address = get_temp_vec4();
31 emit_instruction(new AluInstruction(op1_mbcnt_32lo_accum_prev_int, m_rat_return_address.reg_i(0), literal(-1), {alu_write}));
32 emit_instruction(new AluInstruction(op1_mbcnt_32hi_int, m_rat_return_address.reg_i(1), literal(-1), {alu_write}));
33 emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(2), PValue(new InlineConstValue(ALU_SRC_SE_ID, 0)),
34 literal(256), PValue(new InlineConstValue(ALU_SRC_HW_WAVE_ID, 0)), {alu_write, alu_last_instr}));
35 emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(1),
36 m_rat_return_address.reg_i(2), literal(0x40), m_rat_return_address.reg_i(0),
37 {alu_write, alu_last_instr}));
38 m_require_rat_return_address = false;
39 }
40 return true;
41 }
42
43
44 bool EmitSSBOInstruction::do_emit(nir_instr* instr)
45 {
46 const nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
47 switch (intr->intrinsic) {
48 case nir_intrinsic_atomic_counter_add:
49 case nir_intrinsic_atomic_counter_and:
50 case nir_intrinsic_atomic_counter_exchange:
51 case nir_intrinsic_atomic_counter_max:
52 case nir_intrinsic_atomic_counter_min:
53 case nir_intrinsic_atomic_counter_or:
54 case nir_intrinsic_atomic_counter_xor:
55 case nir_intrinsic_atomic_counter_comp_swap:
56 return emit_atomic(intr);
57 case nir_intrinsic_atomic_counter_read:
58 case nir_intrinsic_atomic_counter_post_dec:
59 return emit_unary_atomic(intr);
60 case nir_intrinsic_atomic_counter_inc:
61 return emit_atomic_inc(intr);
62 case nir_intrinsic_atomic_counter_pre_dec:
63 return emit_atomic_pre_dec(intr);
64 case nir_intrinsic_load_ssbo:
65 return emit_load_ssbo(intr);
66 case nir_intrinsic_store_ssbo:
67 return emit_store_ssbo(intr);
68 case nir_intrinsic_ssbo_atomic_add:
69 return emit_ssbo_atomic_op(intr);
70 case nir_intrinsic_image_store:
71 return emit_image_store(intr);
72 case nir_intrinsic_image_load:
73 case nir_intrinsic_image_atomic_add:
74 case nir_intrinsic_image_atomic_and:
75 case nir_intrinsic_image_atomic_or:
76 case nir_intrinsic_image_atomic_xor:
77 case nir_intrinsic_image_atomic_exchange:
78 case nir_intrinsic_image_atomic_comp_swap:
79 case nir_intrinsic_image_atomic_umin:
80 case nir_intrinsic_image_atomic_umax:
81 case nir_intrinsic_image_atomic_imin:
82 case nir_intrinsic_image_atomic_imax:
83 return emit_image_load(intr);
84 case nir_intrinsic_image_size:
85 return emit_image_size(intr);
86 default:
87 return false;
88 }
89 }
90
91 bool EmitSSBOInstruction::emit_atomic(const nir_intrinsic_instr* instr)
92 {
93 ESDOp op = get_opcode(instr->intrinsic);
94
95 if (DS_OP_INVALID == op)
96 return false;
97
98 GPRVector dest = make_dest(instr);
99
100 int base = nir_intrinsic_base(instr);
101
102 PValue uav_id = from_nir(instr->src[0], 0);
103
104 PValue value = from_nir_with_fetch_constant(instr->src[1], 0);
105
106 GDSInstr *ir = nullptr;
107 if (instr->intrinsic == nir_intrinsic_atomic_counter_comp_swap) {
108 PValue value2 = from_nir_with_fetch_constant(instr->src[1], 1);
109 ir = new GDSInstr(op, dest, value, value2, uav_id, base);
110 } else {
111 ir = new GDSInstr(op, dest, value, uav_id, base);
112 }
113
114 emit_instruction(ir);
115 return true;
116 }
117
118 bool EmitSSBOInstruction::emit_unary_atomic(const nir_intrinsic_instr* instr)
119 {
120 ESDOp op = get_opcode(instr->intrinsic);
121
122 if (DS_OP_INVALID == op)
123 return false;
124
125 GPRVector dest = make_dest(instr);
126
127 PValue uav_id = from_nir(instr->src[0], 0);
128
129 auto ir = new GDSInstr(op, dest, uav_id, nir_intrinsic_base(instr));
130
131 emit_instruction(ir);
132 return true;
133 }
134
135 ESDOp EmitSSBOInstruction::get_opcode(const nir_intrinsic_op opcode)
136 {
137 switch (opcode) {
138 case nir_intrinsic_atomic_counter_add:
139 return DS_OP_ADD_RET;
140 case nir_intrinsic_atomic_counter_and:
141 return DS_OP_AND_RET;
142 case nir_intrinsic_atomic_counter_exchange:
143 return DS_OP_XCHG_RET;
144 case nir_intrinsic_atomic_counter_inc:
145 return DS_OP_INC_RET;
146 case nir_intrinsic_atomic_counter_max:
147 return DS_OP_MAX_UINT_RET;
148 case nir_intrinsic_atomic_counter_min:
149 return DS_OP_MIN_UINT_RET;
150 case nir_intrinsic_atomic_counter_or:
151 return DS_OP_OR_RET;
152 case nir_intrinsic_atomic_counter_read:
153 return DS_OP_READ_RET;
154 case nir_intrinsic_atomic_counter_xor:
155 return DS_OP_XOR_RET;
156 case nir_intrinsic_atomic_counter_post_dec:
157 return DS_OP_DEC_RET;
158 case nir_intrinsic_atomic_counter_comp_swap:
159 return DS_OP_CMP_XCHG_RET;
160 case nir_intrinsic_atomic_counter_pre_dec:
161 default:
162 return DS_OP_INVALID;
163 }
164 }
165
166 RatInstruction::ERatOp
167 EmitSSBOInstruction::get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const
168 {
169 switch (opcode) {
170 case nir_intrinsic_ssbo_atomic_add:
171 case nir_intrinsic_image_atomic_add:
172 return RatInstruction::ADD_RTN;
173 case nir_intrinsic_ssbo_atomic_and:
174 case nir_intrinsic_image_atomic_and:
175 return RatInstruction::AND_RTN;
176 case nir_intrinsic_ssbo_atomic_exchange:
177 case nir_intrinsic_image_atomic_exchange:
178 return RatInstruction::XCHG_RTN;
179 case nir_intrinsic_ssbo_atomic_or:
180 case nir_intrinsic_image_atomic_or:
181 return RatInstruction::OR_RTN;
182 case nir_intrinsic_ssbo_atomic_imin:
183 case nir_intrinsic_image_atomic_imin:
184 return RatInstruction::MIN_INT_RTN;
185 case nir_intrinsic_ssbo_atomic_imax:
186 case nir_intrinsic_image_atomic_imax:
187 return RatInstruction::MAX_INT_RTN;
188 case nir_intrinsic_ssbo_atomic_umin:
189 case nir_intrinsic_image_atomic_umin:
190 return RatInstruction::MIN_UINT_RTN;
191 case nir_intrinsic_ssbo_atomic_umax:
192 case nir_intrinsic_image_atomic_umax:
193 return RatInstruction::MAX_UINT_RTN;
194 case nir_intrinsic_image_atomic_xor:
195 return RatInstruction::XOR_RTN;
196 case nir_intrinsic_image_atomic_comp_swap:
197 if (util_format_is_float(format))
198 return RatInstruction::CMPXCHG_FLT_RTN;
199 else
200 return RatInstruction::CMPXCHG_INT_RTN;
201 case nir_intrinsic_image_load:
202 return RatInstruction::NOP_RTN;
203 default:
204 unreachable("Unsupported RAT instruction");
205 }
206 }
207
208
209 bool EmitSSBOInstruction::emit_atomic_add(const nir_intrinsic_instr* instr)
210 {
211 GPRVector dest = make_dest(instr);
212
213 PValue value = from_nir_with_fetch_constant(instr->src[1], 0);
214
215 PValue uav_id = from_nir(instr->src[0], 0);
216
217 auto ir = new GDSInstr(DS_OP_ADD_RET, dest, value, uav_id,
218 nir_intrinsic_base(instr));
219
220 emit_instruction(ir);
221 return true;
222 }
223
224 bool EmitSSBOInstruction::load_atomic_inc_limits()
225 {
226 m_atomic_update = get_temp_register();
227 emit_instruction(new AluInstruction(op1_mov, m_atomic_update, literal(1),
228 {alu_write, alu_last_instr}));
229 return true;
230 }
231
232 bool EmitSSBOInstruction::emit_atomic_inc(const nir_intrinsic_instr* instr)
233 {
234 PValue uav_id = from_nir(instr->src[0], 0);
235 GPRVector dest = make_dest(instr);
236 auto ir = new GDSInstr(DS_OP_ADD_RET, dest, m_atomic_update, uav_id,
237 nir_intrinsic_base(instr));
238 emit_instruction(ir);
239 return true;
240 }
241
242 bool EmitSSBOInstruction::emit_atomic_pre_dec(const nir_intrinsic_instr *instr)
243 {
244 GPRVector dest = make_dest(instr);
245
246 PValue uav_id = from_nir(instr->src[0], 0);
247
248 auto ir = new GDSInstr(DS_OP_SUB_RET, dest, m_atomic_update, uav_id,
249 nir_intrinsic_base(instr));
250 emit_instruction(ir);
251
252 return true;
253 }
254
255 bool EmitSSBOInstruction::emit_load_ssbo(const nir_intrinsic_instr* instr)
256 {
257 GPRVector dest = make_dest(instr);
258
259 /** src0 not used, should be some offset */
260 auto addr = from_nir_with_fetch_constant(instr->src[1], 0);
261 PValue addr_temp = create_register_from_nir_src(instr->src[1], 1);
262
263 /** Should be lowered in nir */
264 emit_instruction(new AluInstruction(op2_lshr_int, addr_temp, {addr, PValue(new LiteralValue(2))},
265 {alu_write, alu_last_instr}));
266
267 const EVTXDataFormat formats[4] = {
268 fmt_32,
269 fmt_32_32,
270 fmt_32_32_32,
271 fmt_32_32_32_32
272 };
273
274 const std::array<int,4> dest_swt[4] = {
275 {0,7,7,7},
276 {0,1,7,7},
277 {0,1,2,7},
278 {0,1,2,3}
279 };
280
281 /* TODO fix resource index */
282 auto ir = new FetchInstruction(dest, addr_temp,
283 R600_IMAGE_REAL_RESOURCE_OFFSET, from_nir(instr->src[0], 0),
284 formats[nir_dest_num_components(instr->dest) - 1], vtx_nf_int);
285 ir->set_dest_swizzle(dest_swt[nir_dest_num_components(instr->dest) - 1]);
286 ir->set_flag(vtx_use_tc);
287
288 emit_instruction(ir);
289 return true;
290 }
291
292 bool EmitSSBOInstruction::emit_store_ssbo(const nir_intrinsic_instr* instr)
293 {
294
295 GPRVector::Swizzle swz = {7,7,7,7};
296 for (unsigned i = 0; i < nir_src_num_components(instr->src[0]); ++i)
297 swz[i] = i;
298
299 auto orig_addr = from_nir(instr->src[2], 0);
300
301 int temp1 = allocate_temp_register();
302 GPRVector addr_vec(temp1, {0,1,2,7});
303
304 auto rat_id = from_nir(instr->src[1], 0);
305
306 emit_instruction(new AluInstruction(op2_lshr_int, addr_vec.reg_i(0), orig_addr,
307 PValue(new LiteralValue(2)), write));
308 emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(1), Value::zero, write));
309 emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(2), Value::zero, last_write));
310
311
312 //#define WRITE_AS_VECTOR
313 #ifdef WRITE_AS_VECTOR
314 std::unique_ptr<GPRVector> value(vec_from_nir_with_fetch_constant(instr->src[0],
315 (1 << instr->src[0].ssa->num_components) - 1, swz));
316
317 /* TODO fix resource index */
318 int nelements = instr->src[0].ssa->num_components - 1;
319 if (nelements == 2)
320 nelements = 3;
321 auto ir = new RatInstruction(cf_mem_rat, RatInstruction::STORE_TYPED,
322 *value, addr_vec, 0, rat_id, 11,
323 (1 << instr->src[0].ssa->num_components) - 1,
324 0, false);
325 emit_instruction(ir);
326 #else
327
328 auto values = vec_from_nir_with_fetch_constant(instr->src[0],
329 (1 << nir_src_num_components(instr->src[0])) - 1, {0,1,2,3}, true);
330
331 emit_instruction(new RatInstruction(cf_mem_rat, RatInstruction::STORE_TYPED,
332 values, addr_vec, 0, rat_id, 1,
333 1, 0, false));
334 for (unsigned i = 1; i < nir_src_num_components(instr->src[0]); ++i) {
335 emit_instruction(new AluInstruction(op1_mov, values.reg_i(0), from_nir(instr->src[0], i), write));
336 emit_instruction(new AluInstruction(op2_add_int, addr_vec.reg_i(0),
337 {addr_vec.reg_i(0), Value::one_i}, last_write));
338 emit_instruction(new RatInstruction(cf_mem_rat, RatInstruction::STORE_TYPED,
339 values, addr_vec, 0, rat_id, 1,
340 1, 0, false));
341 }
342 #endif
343 return true;
344 }
345
346 bool
347 EmitSSBOInstruction::emit_image_store(const nir_intrinsic_instr *intrin)
348 {
349 int imageid = 0;
350 PValue image_offset;
351
352 if (nir_src_is_const(intrin->src[0]))
353 imageid = nir_src_as_int(intrin->src[0]);
354 else
355 image_offset = from_nir(intrin->src[0], 0);
356
357 auto coord = vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, {0,1,2,3});
358 auto undef = from_nir(intrin->src[2], 0);
359 auto value = vec_from_nir_with_fetch_constant(intrin->src[3], 0xf, {0,1,2,3});
360 auto unknown = from_nir(intrin->src[4], 0);
361
362 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
363 nir_intrinsic_image_array(intrin)) {
364 emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
365 emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
366 }
367
368 auto store = new RatInstruction(cf_mem_rat, RatInstruction::STORE_TYPED, value, coord, imageid,
369 image_offset, 1, 0xf, 0, false);
370 emit_instruction(store);
371 return true;
372 }
373
374 bool
375 EmitSSBOInstruction::emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin)
376 {
377 int imageid = 0;
378 PValue image_offset;
379
380 if (nir_src_is_const(intrin->src[0]))
381 imageid = nir_src_as_int(intrin->src[0]);
382 else
383 image_offset = from_nir(intrin->src[0], 0);
384
385 auto opcode = EmitSSBOInstruction::get_rat_opcode(intrin->intrinsic, PIPE_FORMAT_R32_UINT);
386
387 auto coord = from_nir_with_fetch_constant(intrin->src[1], 0);
388
389 emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0), from_nir(intrin->src[2], 0), write));
390 emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(2), Value::zero, last_write));
391
392 GPRVector out_vec({coord, coord, coord, coord});
393
394 auto atomic = new RatInstruction(cf_mem_rat, opcode, m_rat_return_address, out_vec, imageid,
395 image_offset, 1, 0xf, 0, true);
396 emit_instruction(atomic);
397 emit_instruction(new WaitAck(0));
398
399 GPRVector dest = vec_from_nir(intrin->dest, intrin->dest.ssa.num_components);
400 auto fetch = new FetchInstruction(vc_fetch,
401 no_index_offset,
402 fmt_32,
403 vtx_nf_int,
404 vtx_es_none,
405 m_rat_return_address.reg_i(1),
406 dest,
407 0,
408 false,
409 0xf,
410 R600_IMAGE_IMMED_RESOURCE_OFFSET,
411 0,
412 bim_none,
413 false,
414 false,
415 0,
416 0,
417 0,
418 PValue(),
419 {0,7,7,7});
420 fetch->set_flag(vtx_srf_mode);
421 fetch->set_flag(vtx_use_tc);
422 emit_instruction(fetch);
423 return true;
424
425 }
426
427 bool
428 EmitSSBOInstruction::emit_image_load(const nir_intrinsic_instr *intrin)
429 {
430 int imageid = 0;
431 PValue image_offset;
432
433 if (nir_src_is_const(intrin->src[0]))
434 imageid = nir_src_as_int(intrin->src[0]);
435 else
436 image_offset = from_nir(intrin->src[0], 0);
437
438 auto rat_op = get_rat_opcode(intrin->intrinsic, nir_intrinsic_format(intrin));
439
440 GPRVector::Swizzle swz = {0,1,2,3};
441 auto coord = vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, swz);
442
443 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
444 nir_intrinsic_image_array(intrin)) {
445 emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
446 emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
447 }
448
449 if (intrin->intrinsic != nir_intrinsic_image_load) {
450 if (intrin->intrinsic == nir_intrinsic_image_atomic_comp_swap) {
451 emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
452 from_nir(intrin->src[4], 0), {alu_write}));
453 emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(3),
454 from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
455 } else {
456 emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
457 from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
458 }
459 }
460
461 auto store = new RatInstruction(cf_mem_rat, rat_op, m_rat_return_address, coord, imageid,
462 image_offset, 1, 0xf, 0, true);
463 emit_instruction(store);
464 return fetch_return_value(intrin);
465 }
466
467 bool EmitSSBOInstruction::fetch_return_value(const nir_intrinsic_instr *intrin)
468 {
469 emit_instruction(new WaitAck(0));
470
471 pipe_format format = nir_intrinsic_format(intrin);
472 unsigned fmt = fmt_32;
473 unsigned num_format = 0;
474 unsigned format_comp = 0;
475 unsigned endian = 0;
476
477 r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian);
478
479 GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest));
480 auto fetch = new FetchInstruction(vc_fetch,
481 no_index_offset,
482 (EVTXDataFormat)fmt,
483 (EVFetchNumFormat)num_format,
484 (EVFetchEndianSwap)endian,
485 m_rat_return_address.reg_i(1),
486 dest,
487 0,
488 false,
489 0x3,
490 R600_IMAGE_IMMED_RESOURCE_OFFSET,
491 0,
492 bim_none,
493 false,
494 false,
495 0,
496 0,
497 0,
498 PValue(),
499 {0,1,2,3});
500 fetch->set_flag(vtx_srf_mode);
501 fetch->set_flag(vtx_use_tc);
502 if (format_comp)
503 fetch->set_flag(vtx_format_comp_signed);
504
505 emit_instruction(fetch);
506 return true;
507 }
508
509 bool EmitSSBOInstruction::emit_image_size(const nir_intrinsic_instr *intrin)
510 {
511 GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest));
512 GPRVector src{0,{4,4,4,4}};
513
514 auto const_offset = nir_src_as_const_value(intrin->src[0]);
515 auto dyn_offset = PValue();
516 int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
517 if (const_offset)
518 res_id += const_offset[0].u32;
519 else
520 dyn_offset = from_nir(intrin->src[0], 0);
521
522 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF) {
523 emit_instruction(new FetchInstruction(dest, PValue(new GPRValue(0, 7)),
524 res_id,
525 bim_none));
526 return true;
527 } else {
528 emit_instruction(new TexInstruction(TexInstruction::get_resinfo, dest, src,
529 0/* ?? */,
530 res_id, dyn_offset));
531 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE &&
532 nir_intrinsic_image_array(intrin) && nir_dest_num_components(intrin->dest) > 2) {
533 /* Need to load the layers from a const buffer */
534
535 unsigned lookup_resid = const_offset[0].u32;
536 emit_instruction(new AluInstruction(op1_mov, dest.reg_i(2),
537 PValue(new UniformValue(lookup_resid/4 + R600_SHADER_BUFFER_INFO_SEL, lookup_resid % 4,
538 R600_BUFFER_INFO_CONST_BUFFER)),
539 EmitInstruction::last_write));
540 }
541 }
542 return true;
543 }
544
545 GPRVector EmitSSBOInstruction::make_dest(const nir_intrinsic_instr* ir)
546 {
547 GPRVector::Values v;
548 int i;
549 for (i = 0; i < 4; ++i)
550 v[i] = from_nir(ir->dest, i);
551 return GPRVector(v);
552 }
553
554 }