nir: Use a single list for all shader variables
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_ir_to_assembly.cpp
1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_ir_to_assembly.h"
28 #include "sfn_conditionaljumptracker.h"
29 #include "sfn_callstack.h"
30 #include "sfn_instruction_gds.h"
31 #include "sfn_instruction_misc.h"
32 #include "sfn_instruction_fetch.h"
33 #include "sfn_instruction_lds.h"
34
35 #include "../r600_shader.h"
36 #include "../r600_sq.h"
37
38 namespace r600 {
39
40 using std::vector;
41
42 struct AssemblyFromShaderLegacyImpl {
43
44 AssemblyFromShaderLegacyImpl(r600_shader *sh, r600_shader_key *key);
45 bool emit(const Instruction::Pointer i);
46 void reset_addr_register() {m_last_addr.reset();}
47
48 private:
49 bool emit_alu(const AluInstruction& ai, ECFAluOpCode cf_op);
50 bool emit_export(const ExportInstruction & exi);
51 bool emit_streamout(const StreamOutIntruction& instr);
52 bool emit_memringwrite(const MemRingOutIntruction& instr);
53 bool emit_tex(const TexInstruction & tex_instr);
54 bool emit_vtx(const FetchInstruction& fetch_instr);
55 bool emit_if_start(const IfInstruction & if_instr);
56 bool emit_else(const ElseInstruction & else_instr);
57 bool emit_endif(const IfElseEndInstruction & endif_instr);
58 bool emit_emit_vertex(const EmitVertex &instr);
59
60 bool emit_loop_begin(const LoopBeginInstruction& instr);
61 bool emit_loop_end(const LoopEndInstruction& instr);
62 bool emit_loop_break(const LoopBreakInstruction& instr);
63 bool emit_loop_continue(const LoopContInstruction& instr);
64 bool emit_wait_ack(const WaitAck& instr);
65 bool emit_wr_scratch(const WriteScratchInstruction& instr);
66 bool emit_gds(const GDSInstr& instr);
67 bool emit_rat(const RatInstruction& instr);
68 bool emit_ldswrite(const LDSWriteInstruction& instr);
69 bool emit_ldsread(const LDSReadInstruction& instr);
70 bool emit_ldsatomic(const LDSAtomicInstruction& instr);
71 bool emit_tf_write(const GDSStoreTessFactor& instr);
72
73 bool emit_load_addr(PValue addr);
74 bool emit_fs_pixel_export(const ExportInstruction & exi);
75 bool emit_vs_pos_export(const ExportInstruction & exi);
76 bool emit_vs_param_export(const ExportInstruction & exi);
77 bool copy_dst(r600_bytecode_alu_dst& dst, const Value& src);
78 bool copy_src(r600_bytecode_alu_src& src, const Value& s);
79
80
81
82 ConditionalJumpTracker m_jump_tracker;
83 CallStack m_callstack;
84
85 public:
86 r600_bytecode *m_bc;
87 r600_shader *m_shader;
88 r600_shader_key *m_key;
89 r600_bytecode_output m_output;
90 unsigned m_max_color_exports;
91 bool has_pos_output;
92 bool has_param_output;
93 PValue m_last_addr;
94 int m_loop_nesting;
95 int m_nliterals_in_group;
96 std::set<int> vtx_fetch_results;
97 };
98
99
100 AssemblyFromShaderLegacy::AssemblyFromShaderLegacy(struct r600_shader *sh,
101 r600_shader_key *key)
102 {
103 impl = new AssemblyFromShaderLegacyImpl(sh, key);
104 }
105
106 AssemblyFromShaderLegacy::~AssemblyFromShaderLegacy()
107 {
108 delete impl;
109 }
110
111 bool AssemblyFromShaderLegacy::do_lower(const std::vector<InstructionBlock>& ir)
112 {
113 if (impl->m_shader->processor_type == PIPE_SHADER_VERTEX &&
114 impl->m_shader->ninput > 0)
115 r600_bytecode_add_cfinst(impl->m_bc, CF_OP_CALL_FS);
116
117
118 std::vector<Instruction::Pointer> exports;
119
120 for (const auto& block : ir) {
121 for (const auto& i : block) {
122 if (!impl->emit(i))
123 return false;
124 if (i->type() != Instruction::alu)
125 impl->reset_addr_register();
126 }
127 }
128 /*
129 for (const auto& i : exports) {
130 if (!impl->emit_export(static_cast<const ExportInstruction&>(*i)))
131 return false;
132 }*/
133
134
135 const struct cf_op_info *last = nullptr;
136 if (impl->m_bc->cf_last)
137 last = r600_isa_cf(impl->m_bc->cf_last->op);
138
139 /* alu clause instructions don't have EOP bit, so add NOP */
140 if (!last || last->flags & CF_ALU || impl->m_bc->cf_last->op == CF_OP_LOOP_END
141 || impl->m_bc->cf_last->op == CF_OP_POP)
142 r600_bytecode_add_cfinst(impl->m_bc, CF_OP_NOP);
143
144 /* A fetch shader only can't be EOP (results in hang), but we can replace it
145 * by a NOP */
146 else if (impl->m_bc->cf_last->op == CF_OP_CALL_FS)
147 impl->m_bc->cf_last->op = CF_OP_NOP;
148
149 if (impl->m_shader->bc.chip_class != CAYMAN)
150 impl->m_bc->cf_last->end_of_program = 1;
151 else
152 cm_bytecode_add_cf_end(impl->m_bc);
153
154 return true;
155 }
156
157 bool AssemblyFromShaderLegacyImpl::emit(const Instruction::Pointer i)
158 {
159 if (i->type() != Instruction::vtx)
160 vtx_fetch_results.clear();
161
162 sfn_log << SfnLog::assembly << "Emit from '" << *i << "\n";
163 switch (i->type()) {
164 case Instruction::alu:
165 return emit_alu(static_cast<const AluInstruction&>(*i), cf_alu_undefined);
166 case Instruction::exprt:
167 return emit_export(static_cast<const ExportInstruction&>(*i));
168 case Instruction::tex:
169 return emit_tex(static_cast<const TexInstruction&>(*i));
170 case Instruction::vtx:
171 return emit_vtx(static_cast<const FetchInstruction&>(*i));
172 case Instruction::cond_if:
173 return emit_if_start(static_cast<const IfInstruction&>(*i));
174 case Instruction::cond_else:
175 return emit_else(static_cast<const ElseInstruction&>(*i));
176 case Instruction::cond_endif:
177 return emit_endif(static_cast<const IfElseEndInstruction&>(*i));
178 case Instruction::loop_begin:
179 return emit_loop_begin(static_cast<const LoopBeginInstruction&>(*i));
180 case Instruction::loop_end:
181 return emit_loop_end(static_cast<const LoopEndInstruction&>(*i));
182 case Instruction::loop_break:
183 return emit_loop_break(static_cast<const LoopBreakInstruction&>(*i));
184 case Instruction::loop_continue:
185 return emit_loop_continue(static_cast<const LoopContInstruction&>(*i));
186 case Instruction::streamout:
187 return emit_streamout(static_cast<const StreamOutIntruction&>(*i));
188 case Instruction::ring:
189 return emit_memringwrite(static_cast<const MemRingOutIntruction&>(*i));
190 case Instruction::emit_vtx:
191 return emit_emit_vertex(static_cast<const EmitVertex&>(*i));
192 case Instruction::wait_ack:
193 return emit_wait_ack(static_cast<const WaitAck&>(*i));
194 case Instruction::mem_wr_scratch:
195 return emit_wr_scratch(static_cast<const WriteScratchInstruction&>(*i));
196 case Instruction::gds:
197 return emit_gds(static_cast<const GDSInstr&>(*i));
198 case Instruction::rat:
199 return emit_rat(static_cast<const RatInstruction&>(*i));
200 case Instruction::lds_write:
201 return emit_ldswrite(static_cast<const LDSWriteInstruction&>(*i));
202 case Instruction::lds_read:
203 return emit_ldsread(static_cast<const LDSReadInstruction&>(*i));
204 case Instruction::lds_atomic:
205 return emit_ldsatomic(static_cast<const LDSAtomicInstruction&>(*i));
206 case Instruction::tf_write:
207 return emit_tf_write(static_cast<const GDSStoreTessFactor&>(*i));
208 default:
209 return false;
210 }
211 }
212
213 AssemblyFromShaderLegacyImpl::AssemblyFromShaderLegacyImpl(r600_shader *sh,
214 r600_shader_key *key):
215 m_callstack(sh->bc),
216 m_bc(&sh->bc),
217 m_shader(sh),
218 m_key(key),
219 has_pos_output(false),
220 has_param_output(false),
221 m_loop_nesting(0),
222 m_nliterals_in_group(0)
223 {
224 m_max_color_exports = MAX2(m_key->ps.nr_cbufs, 1);
225 }
226
227 extern const std::map<EAluOp, int> opcode_map;
228
229 bool AssemblyFromShaderLegacyImpl::emit_load_addr(PValue addr)
230 {
231 m_bc->ar_reg = addr->sel();
232 m_bc->ar_chan = addr->chan();
233 m_bc->ar_loaded = 0;
234 m_last_addr = addr;
235
236 sfn_log << SfnLog::assembly << " Prepare " << *addr << " to address register\n";
237
238 return true;
239 }
240
241 bool AssemblyFromShaderLegacyImpl::emit_alu(const AluInstruction& ai, ECFAluOpCode cf_op)
242 {
243
244 struct r600_bytecode_alu alu;
245 memset(&alu, 0, sizeof(alu));
246 PValue addr_in_use;
247
248 if (opcode_map.find(ai.opcode()) == opcode_map.end()) {
249 std::cerr << "Opcode not handled for " << ai <<"\n";
250 return false;
251 }
252
253 unsigned old_nliterals_in_group = m_nliterals_in_group;
254 for (unsigned i = 0; i < ai.n_sources(); ++i) {
255 auto& s = ai.src(i);
256 if (s.type() == Value::literal)
257 ++m_nliterals_in_group;
258 }
259
260 /* This instruction group would exeed the limit of literals, so
261 * force a new instruction group by adding a NOP as last
262 * instruction. This will no loner be needed with a real
263 * scheduler */
264 if (m_nliterals_in_group > 4) {
265 sfn_log << SfnLog::assembly << " Have " << m_nliterals_in_group << " inject a last op (nop)\n";
266 alu.op = ALU_OP0_NOP;
267 alu.last = 1;
268 alu.dst.chan = 3;
269 int retval = r600_bytecode_add_alu(m_bc, &alu);
270 if (retval)
271 return false;
272 memset(&alu, 0, sizeof(alu));
273 m_nliterals_in_group -= old_nliterals_in_group;
274 }
275
276 alu.op = opcode_map.at(ai.opcode());
277
278 /* Missing test whether ai actually has a dest */
279 auto dst = ai.dest();
280
281 if (dst) {
282 if (!copy_dst(alu.dst, *dst))
283 return false;
284
285 alu.dst.write = ai.flag(alu_write);
286 alu.dst.clamp = ai.flag(alu_dst_clamp);
287
288 if (dst->type() == Value::gpr_array_value) {
289 auto& v = static_cast<const GPRArrayValue&>(*dst);
290 PValue addr = v.indirect();
291 if (addr) {
292 if (!m_last_addr || *addr != *m_last_addr) {
293 emit_load_addr(addr);
294 addr_in_use = addr;
295 }
296 alu.dst.rel = addr ? 1 : 0;;
297 }
298 }
299 }
300
301 alu.is_op3 = ai.n_sources() == 3;
302
303 for (unsigned i = 0; i < ai.n_sources(); ++i) {
304 auto& s = ai.src(i);
305
306 if (!copy_src(alu.src[i], s))
307 return false;
308 alu.src[i].neg = ai.flag(AluInstruction::src_neg_flags[i]);
309
310 if (s.type() == Value::gpr_array_value) {
311 auto& v = static_cast<const GPRArrayValue&>(s);
312 PValue addr = v.indirect();
313 if (addr) {
314 assert(!addr_in_use || (*addr_in_use == *addr));
315 if (!m_last_addr || *addr != *m_last_addr) {
316 emit_load_addr(addr);
317 addr_in_use = addr;
318 }
319 alu.src[i].rel = addr ? 1 : 0;
320 }
321 }
322 if (!alu.is_op3)
323 alu.src[i].abs = ai.flag(AluInstruction::src_abs_flags[i]);
324 }
325
326 if (ai.bank_swizzle() != alu_vec_unknown)
327 alu.bank_swizzle_force = ai.bank_swizzle();
328
329 alu.last = ai.flag(alu_last_instr);
330 alu.update_pred = ai.flag(alu_update_pred);
331 alu.execute_mask = ai.flag(alu_update_exec);
332
333 /* If the destination register is equal to the last loaded address register
334 * then clear the latter one, because the values will no longer be identical */
335 if (m_last_addr)
336 sfn_log << SfnLog::assembly << " Current address register is " << *m_last_addr << "\n";
337
338 if (dst)
339 sfn_log << SfnLog::assembly << " Current dst register is " << *dst << "\n";
340
341 if (dst && m_last_addr)
342 if (*dst == *m_last_addr) {
343 sfn_log << SfnLog::assembly << " Clear address register (was " << *m_last_addr << "\n";
344 m_last_addr.reset();
345 }
346
347 if (cf_op == cf_alu_undefined)
348 cf_op = ai.cf_type();
349
350 unsigned type = 0;
351 switch (cf_op) {
352 case cf_alu: type = CF_OP_ALU; break;
353 case cf_alu_push_before: type = CF_OP_ALU_PUSH_BEFORE; break;
354 case cf_alu_pop_after: type = CF_OP_ALU_POP_AFTER; break;
355 case cf_alu_pop2_after: type = CF_OP_ALU_POP2_AFTER; break;
356 case cf_alu_break: type = CF_OP_ALU_BREAK; break;
357 case cf_alu_else_after: type = CF_OP_ALU_ELSE_AFTER; break;
358 case cf_alu_continue: type = CF_OP_ALU_CONTINUE; break;
359 case cf_alu_extended: type = CF_OP_ALU_EXT; break;
360 default:
361 assert(0 && "cf_alu_undefined should have been replaced");
362 }
363
364 if (alu.last)
365 m_nliterals_in_group = 0;
366
367 bool retval = !r600_bytecode_add_alu_type(m_bc, &alu, type);
368
369 if (ai.opcode() == op1_mova_int)
370 m_bc->ar_loaded = 0;
371
372 if (ai.opcode() == op1_set_cf_idx0)
373 m_bc->index_loaded[0] = 1;
374
375 if (ai.opcode() == op1_set_cf_idx1)
376 m_bc->index_loaded[1] = 1;
377
378
379 m_bc->force_add_cf |= (ai.opcode() == op2_kille ||
380 ai.opcode() == op2_killne_int ||
381 ai.opcode() == op1_set_cf_idx0 ||
382 ai.opcode() == op1_set_cf_idx1);
383 return retval;
384 }
385
386 bool AssemblyFromShaderLegacyImpl::emit_vs_pos_export(const ExportInstruction & exi)
387 {
388 r600_bytecode_output output;
389 memset(&output, 0, sizeof(output));
390 assert(exi.gpr().type() == Value::gpr_vector);
391 const auto& gpr = exi.gpr();
392 output.gpr = gpr.sel();
393 output.elem_size = 3;
394 output.swizzle_x = gpr.chan_i(0);
395 output.swizzle_y = gpr.chan_i(1);
396 output.swizzle_z = gpr.chan_i(2);
397 output.swizzle_w = gpr.chan_i(3);
398 output.burst_count = 1;
399 output.array_base = 60 + exi.location();
400 output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
401 output.type = exi.export_type();
402
403
404 if (r600_bytecode_add_output(m_bc, &output)) {
405 R600_ERR("Error adding pixel export at location %d\n", exi.location());
406 return false;
407 }
408
409 return true;
410 }
411
412
413 bool AssemblyFromShaderLegacyImpl::emit_vs_param_export(const ExportInstruction & exi)
414 {
415 r600_bytecode_output output;
416 assert(exi.gpr().type() == Value::gpr_vector);
417 const auto& gpr = exi.gpr();
418
419 memset(&output, 0, sizeof(output));
420 output.gpr = gpr.sel();
421 output.elem_size = 3;
422 output.swizzle_x = gpr.chan_i(0);
423 output.swizzle_y = gpr.chan_i(1);
424 output.swizzle_z = gpr.chan_i(2);
425 output.swizzle_w = gpr.chan_i(3);
426 output.burst_count = 1;
427 output.array_base = exi.location();
428 output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
429 output.type = exi.export_type();
430
431
432 if (r600_bytecode_add_output(m_bc, &output)) {
433 R600_ERR("Error adding pixel export at location %d\n", exi.location());
434 return false;
435 }
436
437 return true;
438 }
439
440
441 bool AssemblyFromShaderLegacyImpl::emit_fs_pixel_export(const ExportInstruction & exi)
442 {
443 if (exi.location() >= m_max_color_exports && exi.location() < 60) {
444 R600_ERR("shader_from_nir: ignore pixel export %u, because supported max is %u\n",
445 exi.location(), m_max_color_exports);
446 return true;
447 }
448
449 assert(exi.gpr().type() == Value::gpr_vector);
450 const auto& gpr = exi.gpr();
451
452 r600_bytecode_output output;
453 memset(&output, 0, sizeof(output));
454
455 output.gpr = gpr.sel();
456 output.elem_size = 3;
457 output.swizzle_x = gpr.chan_i(0);
458 output.swizzle_y = gpr.chan_i(1);
459 output.swizzle_z = gpr.chan_i(2);
460 output.swizzle_w = m_key->ps.alpha_to_one ? 5 : gpr.chan_i(3); ;
461 output.burst_count = 1;
462 output.array_base = exi.location();
463 output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
464 output.type = exi.export_type();
465
466
467 if (r600_bytecode_add_output(m_bc, &output)) {
468 R600_ERR("Error adding pixel export at location %d\n", exi.location());
469 return false;
470 }
471
472 return true;
473 }
474
475
476 bool AssemblyFromShaderLegacyImpl::emit_export(const ExportInstruction & exi)
477 {
478 switch (exi.export_type()) {
479 case ExportInstruction::et_pixel:
480 return emit_fs_pixel_export(exi);
481 case ExportInstruction::et_pos:
482 return emit_vs_pos_export(exi);
483 case ExportInstruction::et_param:
484 return emit_vs_param_export(exi);
485 default:
486 R600_ERR("shader_from_nir: export %d type not yet supported\n", exi.export_type());
487 return false;
488 }
489 }
490
491 bool AssemblyFromShaderLegacyImpl::emit_if_start(const IfInstruction & if_instr)
492 {
493 bool needs_workaround = false;
494 int elems = m_callstack.push(FC_PUSH_VPM);
495
496 if (m_bc->chip_class == CAYMAN && m_bc->stack.loop > 1)
497 needs_workaround = true;
498 if (m_bc->family != CHIP_HEMLOCK &&
499 m_bc->family != CHIP_CYPRESS &&
500 m_bc->family != CHIP_JUNIPER) {
501 unsigned dmod1 = (elems - 1) % m_bc->stack.entry_size;
502 unsigned dmod2 = (elems) % m_bc->stack.entry_size;
503
504 if (elems && (!dmod1 || !dmod2))
505 needs_workaround = true;
506 }
507
508 auto& pred = if_instr.pred();
509 auto op = cf_alu_push_before;
510
511 if (needs_workaround) {
512 r600_bytecode_add_cfinst(m_bc, CF_OP_PUSH);
513 m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
514 op = cf_alu;
515 }
516 emit_alu(pred, op);
517
518 r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP);
519
520 m_jump_tracker.push(m_bc->cf_last, jt_if);
521 return true;
522 }
523
524 bool AssemblyFromShaderLegacyImpl::emit_else(UNUSED const ElseInstruction & else_instr)
525 {
526 r600_bytecode_add_cfinst(m_bc, CF_OP_ELSE);
527 m_bc->cf_last->pop_count = 1;
528 return m_jump_tracker.add_mid(m_bc->cf_last, jt_if);
529 }
530
531 bool AssemblyFromShaderLegacyImpl::emit_endif(UNUSED const IfElseEndInstruction & endif_instr)
532 {
533 m_callstack.pop(FC_PUSH_VPM);
534
535 unsigned force_pop = m_bc->force_add_cf;
536 if (!force_pop) {
537 int alu_pop = 3;
538 if (m_bc->cf_last) {
539 if (m_bc->cf_last->op == CF_OP_ALU)
540 alu_pop = 0;
541 else if (m_bc->cf_last->op == CF_OP_ALU_POP_AFTER)
542 alu_pop = 1;
543 }
544 alu_pop += 1;
545 if (alu_pop == 1) {
546 m_bc->cf_last->op = CF_OP_ALU_POP_AFTER;
547 m_bc->force_add_cf = 1;
548 } else if (alu_pop == 2) {
549 m_bc->cf_last->op = CF_OP_ALU_POP2_AFTER;
550 m_bc->force_add_cf = 1;
551 } else {
552 force_pop = 1;
553 }
554 }
555
556 if (force_pop) {
557 r600_bytecode_add_cfinst(m_bc, CF_OP_POP);
558 m_bc->cf_last->pop_count = 1;
559 m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
560 }
561
562 return m_jump_tracker.pop(m_bc->cf_last, jt_if);
563 }
564
565 bool AssemblyFromShaderLegacyImpl::emit_loop_begin(UNUSED const LoopBeginInstruction& instr)
566 {
567 r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_START_DX10);
568 m_jump_tracker.push(m_bc->cf_last, jt_loop);
569 m_callstack.push(FC_LOOP);
570 ++m_loop_nesting;
571 return true;
572 }
573
574 bool AssemblyFromShaderLegacyImpl::emit_loop_end(UNUSED const LoopEndInstruction& instr)
575 {
576 r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_END);
577 m_callstack.pop(FC_LOOP);
578 assert(m_loop_nesting);
579 --m_loop_nesting;
580 return m_jump_tracker.pop(m_bc->cf_last, jt_loop);
581 }
582
583 bool AssemblyFromShaderLegacyImpl::emit_loop_break(UNUSED const LoopBreakInstruction& instr)
584 {
585 r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_BREAK);
586 return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
587 }
588
589 bool AssemblyFromShaderLegacyImpl::emit_loop_continue(UNUSED const LoopContInstruction &instr)
590 {
591 r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_CONTINUE);
592 return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
593 }
594
595 bool AssemblyFromShaderLegacyImpl::emit_streamout(const StreamOutIntruction& so_instr)
596 {
597 struct r600_bytecode_output output;
598 memset(&output, 0, sizeof(struct r600_bytecode_output));
599
600 output.gpr = so_instr.gpr().sel();
601 output.elem_size = so_instr.element_size();
602 output.array_base = so_instr.array_base();
603 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
604 output.burst_count = so_instr.burst_count();
605 output.array_size = so_instr.array_size();
606 output.comp_mask = so_instr.comp_mask();
607 output.op = so_instr.op();
608
609 assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3);
610
611
612 if (r600_bytecode_add_output(m_bc, &output)) {
613 R600_ERR("shader_from_nir: Error creating stream output instruction\n");
614 return false;
615 }
616 return true;
617 }
618
619
620 bool AssemblyFromShaderLegacyImpl::emit_memringwrite(const MemRingOutIntruction& instr)
621 {
622 struct r600_bytecode_output output;
623 memset(&output, 0, sizeof(struct r600_bytecode_output));
624
625 output.gpr = instr.gpr().sel();
626 output.type = instr.type();
627 output.elem_size = 3;
628 output.comp_mask = 0xf;
629 output.burst_count = 1;
630 output.op = instr.op();
631 if (instr.type() == mem_write_ind || instr.type() == mem_write_ind_ack) {
632 output.index_gpr = instr.index_reg();
633 output.array_size = 0xfff;
634 }
635 output.array_base = instr.array_base();
636
637 if (r600_bytecode_add_output(m_bc, &output)) {
638 R600_ERR("shader_from_nir: Error creating mem ring write instruction\n");
639 return false;
640 }
641 return true;
642 }
643
644
645 bool AssemblyFromShaderLegacyImpl::emit_tex(const TexInstruction & tex_instr)
646 {
647 auto addr = tex_instr.sampler_offset();
648 if (addr && (!m_bc->index_loaded[1] || m_loop_nesting
649 || m_bc->index_reg[1] != addr->sel()
650 || m_bc->index_reg_chan[1] != addr->chan())) {
651 struct r600_bytecode_alu alu;
652 memset(&alu, 0, sizeof(alu));
653 alu.op = opcode_map.at(op1_mova_int);
654 alu.dst.chan = 0;
655 alu.src[0].sel = addr->sel();
656 alu.src[0].chan = addr->chan();
657 alu.last = 1;
658 int r = r600_bytecode_add_alu(m_bc, &alu);
659 if (r)
660 return false;
661
662 m_bc->ar_loaded = 0;
663
664 alu.op = opcode_map.at(op1_set_cf_idx1);
665 alu.dst.chan = 0;
666 alu.src[0].sel = 0;
667 alu.src[0].chan = 0;
668 alu.last = 1;
669
670 r = r600_bytecode_add_alu(m_bc, &alu);
671 if (r)
672 return false;
673
674 m_bc->index_reg[1] = addr->sel();
675 m_bc->index_reg_chan[1] = addr->chan();
676 m_bc->index_loaded[1] = true;
677 }
678
679 r600_bytecode_tex tex;
680 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
681 tex.op = tex_instr.opcode();
682 tex.sampler_id = tex_instr.sampler_id();
683 tex.sampler_index_mode = 0;
684 tex.resource_id = tex_instr.resource_id();;
685 tex.resource_index_mode = 0;
686 tex.src_gpr = tex_instr.src().sel();
687 tex.dst_gpr = tex_instr.dst().sel();
688 tex.dst_sel_x = tex_instr.dest_swizzle(0);
689 tex.dst_sel_y = tex_instr.dest_swizzle(1);
690 tex.dst_sel_z = tex_instr.dest_swizzle(2);
691 tex.dst_sel_w = tex_instr.dest_swizzle(3);
692 tex.src_sel_x = tex_instr.src().chan_i(0);
693 tex.src_sel_y = tex_instr.src().chan_i(1);
694 tex.src_sel_z = tex_instr.src().chan_i(2);
695 tex.src_sel_w = tex_instr.src().chan_i(3);
696 tex.coord_type_x = !tex_instr.has_flag(TexInstruction::x_unnormalized);
697 tex.coord_type_y = !tex_instr.has_flag(TexInstruction::y_unnormalized);
698 tex.coord_type_z = !tex_instr.has_flag(TexInstruction::z_unnormalized);
699 tex.coord_type_w = !tex_instr.has_flag(TexInstruction::w_unnormalized);
700 tex.offset_x = tex_instr.get_offset(0);
701 tex.offset_y = tex_instr.get_offset(1);
702 tex.offset_z = tex_instr.get_offset(2);
703 tex.resource_index_mode = (!!addr) ? 2 : 0;
704 tex.sampler_index_mode = tex.resource_index_mode;
705
706 if (tex_instr.opcode() == TexInstruction::get_gradient_h ||
707 tex_instr.opcode() == TexInstruction::get_gradient_v)
708 tex.inst_mod = tex_instr.has_flag(TexInstruction::grad_fine) ? 1 : 0;
709 else
710 tex.inst_mod = tex_instr.inst_mode();
711 if (r600_bytecode_add_tex(m_bc, &tex)) {
712 R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
713 return false;
714 }
715 return true;
716 }
717
718 bool AssemblyFromShaderLegacyImpl::emit_vtx(const FetchInstruction& fetch_instr)
719 {
720 int buffer_offset = 0;
721 auto addr = fetch_instr.buffer_offset();
722 auto index_mode = fetch_instr.buffer_index_mode();
723
724 if (addr) {
725 if (addr->type() == Value::literal) {
726 const auto& boffs = static_cast<const LiteralValue&>(*addr);
727 buffer_offset = boffs.value();
728 } else {
729 index_mode = bim_zero;
730 if ((!m_bc->index_loaded[0] || m_loop_nesting ||
731 m_bc->index_reg[0] != addr->sel() ||
732 m_bc->index_reg_chan[0] != addr->chan())) {
733 struct r600_bytecode_alu alu;
734 memset(&alu, 0, sizeof(alu));
735 alu.op = opcode_map.at(op1_mova_int);
736 alu.dst.chan = 0;
737 alu.src[0].sel = addr->sel();
738 alu.src[0].chan = addr->chan();
739 alu.last = 1;
740 int r = r600_bytecode_add_alu(m_bc, &alu);
741 if (r)
742 return false;
743
744 m_bc->ar_loaded = 0;
745
746 alu.op = opcode_map.at(op1_set_cf_idx0);
747 alu.dst.chan = 0;
748 alu.src[0].sel = 0;
749 alu.src[0].chan = 0;
750 alu.last = 1;
751
752 r = r600_bytecode_add_alu(m_bc, &alu);
753 if (r)
754 return false;
755
756 m_bc->index_reg[0] = addr->sel();
757 m_bc->index_reg_chan[0] = addr->chan();
758 m_bc->index_loaded[0] = true;
759 }
760 }
761 }
762
763 if (fetch_instr.has_prelude()) {
764 for(auto &i : fetch_instr.prelude()) {
765 if (!emit(i))
766 return false;
767 }
768 }
769
770 if (vtx_fetch_results.find(fetch_instr.src().sel()) !=
771 vtx_fetch_results.end()) {
772 m_bc->force_add_cf = 1;
773 vtx_fetch_results.clear();
774 }
775 vtx_fetch_results.insert(fetch_instr.dst().sel());
776
777 struct r600_bytecode_vtx vtx;
778 memset(&vtx, 0, sizeof(vtx));
779 vtx.op = fetch_instr.vc_opcode();
780 vtx.buffer_id = fetch_instr.buffer_id() + buffer_offset;
781 vtx.fetch_type = fetch_instr.fetch_type();
782 vtx.src_gpr = fetch_instr.src().sel();
783 vtx.src_sel_x = fetch_instr.src().chan();
784 vtx.mega_fetch_count = fetch_instr.mega_fetch_count();
785 vtx.dst_gpr = fetch_instr.dst().sel();
786 vtx.dst_sel_x = fetch_instr.swz(0); /* SEL_X */
787 vtx.dst_sel_y = fetch_instr.swz(1); /* SEL_Y */
788 vtx.dst_sel_z = fetch_instr.swz(2); /* SEL_Z */
789 vtx.dst_sel_w = fetch_instr.swz(3); /* SEL_W */
790 vtx.use_const_fields = fetch_instr.use_const_fields();
791 vtx.data_format = fetch_instr.data_format();
792 vtx.num_format_all = fetch_instr.num_format(); /* NUM_FORMAT_SCALED */
793 vtx.format_comp_all = fetch_instr.is_signed(); /* FORMAT_COMP_SIGNED */
794 vtx.endian = fetch_instr.endian_swap();
795 vtx.buffer_index_mode = index_mode;
796 vtx.offset = fetch_instr.offset();
797 vtx.indexed = fetch_instr.indexed();
798 vtx.uncached = fetch_instr.uncached();
799 vtx.elem_size = fetch_instr.elm_size();
800 vtx.array_base = fetch_instr.array_base();
801 vtx.array_size = fetch_instr.array_size();
802 vtx.srf_mode_all = fetch_instr.srf_mode_no_zero();
803
804 if (fetch_instr.use_tc()) {
805 if ((r600_bytecode_add_vtx_tc(m_bc, &vtx))) {
806 R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
807 return false;
808 }
809
810 } else {
811 if ((r600_bytecode_add_vtx(m_bc, &vtx))) {
812 R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
813 return false;
814 }
815 }
816
817 m_bc->cf_last->vpm = fetch_instr.use_vpm();
818 m_bc->cf_last->barrier = 1;
819
820 return true;
821 }
822
823 bool AssemblyFromShaderLegacyImpl::emit_emit_vertex(const EmitVertex &instr)
824 {
825 int r = r600_bytecode_add_cfinst(m_bc, instr.op());
826 if (!r)
827 m_bc->cf_last->count = instr.stream();
828 assert(m_bc->cf_last->count < 4);
829
830 return r == 0;
831 }
832
833 bool AssemblyFromShaderLegacyImpl::emit_wait_ack(const WaitAck& instr)
834 {
835 int r = r600_bytecode_add_cfinst(m_bc, instr.op());
836 if (!r)
837 m_bc->cf_last->cf_addr = instr.n_ack();
838
839 return r == 0;
840 }
841
842 bool AssemblyFromShaderLegacyImpl::emit_wr_scratch(const WriteScratchInstruction& instr)
843 {
844 struct r600_bytecode_output cf;
845
846 memset(&cf, 0, sizeof(struct r600_bytecode_output));
847
848 cf.op = CF_OP_MEM_SCRATCH;
849 cf.elem_size = 3;
850 cf.gpr = instr.gpr().sel();
851 cf.mark = 1;
852 cf.comp_mask = instr.write_mask();
853 cf.swizzle_x = 0;
854 cf.swizzle_y = 1;
855 cf.swizzle_z = 2;
856 cf.swizzle_w = 3;
857 cf.burst_count = 1;
858
859 if (instr.indirect()) {
860 cf.type = 3;
861 cf.index_gpr = instr.address();
862
863 /* The docu seems to be wrong here: In indirect addressing the
864 * address_base seems to be the array_size */
865 cf.array_size = instr.array_size();
866 } else {
867 cf.type = 2;
868 cf.array_base = instr.location();
869 }
870 /* This should be 0, but the address calculation is apparently wrong */
871
872
873 if (r600_bytecode_add_output(m_bc, &cf)){
874 R600_ERR("shader_from_nir: Error creating SCRATCH_WR assembly instruction\n");
875 return false;
876 }
877
878 return true;
879 }
880
881 extern const std::map<ESDOp, int> ds_opcode_map;
882
883 bool AssemblyFromShaderLegacyImpl::emit_gds(const GDSInstr& instr)
884 {
885 struct r600_bytecode_gds gds;
886
887 int uav_idx = -1;
888 auto addr = instr.uav_id();
889 if (addr->type() != Value::literal) {
890 if (!m_bc->index_loaded[1] || m_loop_nesting ||
891 m_bc->index_reg[1] != addr->sel()
892 || m_bc->index_reg_chan[1] != addr->chan()) {
893 struct r600_bytecode_alu alu;
894
895 memset(&alu, 0, sizeof(alu));
896 alu.op = opcode_map.at(op2_lshr_int);
897 alu.dst.sel = addr->sel();
898 alu.dst.chan = addr->chan();
899 alu.src[0].sel = addr->sel();
900 alu.src[0].chan = addr->chan();
901 alu.src[1].sel = ALU_SRC_LITERAL;
902 alu.src[1].value = 2;
903 alu.last = 1;
904 alu.dst.write = 1;
905 int r = r600_bytecode_add_alu(m_bc, &alu);
906 if (r)
907 return false;
908
909 memset(&alu, 0, sizeof(alu));
910 alu.op = opcode_map.at(op1_mova_int);
911 alu.dst.chan = 0;
912 alu.src[0].sel = addr->sel();
913 alu.src[0].chan = addr->chan();
914 alu.last = 1;
915 r = r600_bytecode_add_alu(m_bc, &alu);
916 if (r)
917 return false;
918
919 m_bc->ar_loaded = 0;
920
921 alu.op = opcode_map.at(op1_set_cf_idx1);
922 alu.dst.chan = 0;
923 alu.src[0].sel = 0;
924 alu.src[0].chan = 0;
925 alu.last = 1;
926
927 r = r600_bytecode_add_alu(m_bc, &alu);
928 if (r)
929 return false;
930
931 m_bc->index_reg[1] = addr->sel();
932 m_bc->index_reg_chan[1] = addr->chan();
933 m_bc->index_loaded[1] = true;
934 }
935 } else {
936 const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
937 uav_idx = addr_reg.value() >> 2;
938 }
939
940 memset(&gds, 0, sizeof(struct r600_bytecode_gds));
941
942 gds.op = ds_opcode_map.at(instr.op());
943 gds.dst_gpr = instr.dest_sel();
944 gds.uav_id = (uav_idx >= 0 ? uav_idx : 0) + instr.uav_base();
945 gds.uav_index_mode = uav_idx >= 0 ? bim_none : bim_one;
946 gds.src_gpr = instr.src_sel();
947
948 gds.src_sel_x = instr.src_swizzle(0);
949 gds.src_sel_y = instr.src_swizzle(1);
950 gds.src_sel_z = instr.src_swizzle(2);
951
952 gds.dst_sel_x = instr.dest_swizzle(0);
953 gds.dst_sel_y = 7;
954 gds.dst_sel_z = 7;
955 gds.dst_sel_w = 7;
956 gds.src_gpr2 = 0;
957 gds.alloc_consume = 1; // Not Cayman
958
959 int r = r600_bytecode_add_gds(m_bc, &gds);
960 if (r)
961 return false;
962 m_bc->cf_last->vpm = 1;
963 m_bc->cf_last->barrier = 1;
964 return true;
965 }
966
967 bool AssemblyFromShaderLegacyImpl::emit_tf_write(const GDSStoreTessFactor& instr)
968 {
969 struct r600_bytecode_gds gds;
970
971 memset(&gds, 0, sizeof(struct r600_bytecode_gds));
972 gds.src_gpr = instr.sel();
973 gds.src_sel_x = instr.chan(0);
974 gds.src_sel_y = instr.chan(1);
975 gds.src_sel_z = 4;
976 gds.dst_sel_x = 7;
977 gds.dst_sel_y = 7;
978 gds.dst_sel_z = 7;
979 gds.dst_sel_w = 7;
980 gds.op = FETCH_OP_TF_WRITE;
981
982 if (r600_bytecode_add_gds(m_bc, &gds) != 0)
983 return false;
984
985 if (instr.chan(2) != 7) {
986 memset(&gds, 0, sizeof(struct r600_bytecode_gds));
987 gds.src_gpr = instr.sel();
988 gds.src_sel_x = instr.chan(2);
989 gds.src_sel_y = instr.chan(3);
990 gds.src_sel_z = 4;
991 gds.dst_sel_x = 7;
992 gds.dst_sel_y = 7;
993 gds.dst_sel_z = 7;
994 gds.dst_sel_w = 7;
995 gds.op = FETCH_OP_TF_WRITE;
996
997 if (r600_bytecode_add_gds(m_bc, &gds))
998 return false;
999 }
1000 return true;
1001 }
1002
1003 bool AssemblyFromShaderLegacyImpl::emit_ldswrite(const LDSWriteInstruction& instr)
1004 {
1005 r600_bytecode_alu alu;
1006 memset(&alu, 0, sizeof(r600_bytecode_alu));
1007
1008 alu.last = true;
1009 alu.is_lds_idx_op = true;
1010 copy_src(alu.src[0], instr.address());
1011 copy_src(alu.src[1], instr.value0());
1012
1013 if (instr.num_components() == 1) {
1014 alu.op = LDS_OP2_LDS_WRITE;
1015 } else {
1016 alu.op = LDS_OP3_LDS_WRITE_REL;
1017 alu.lds_idx = 1;
1018 copy_src(alu.src[2], instr.value1());
1019 }
1020
1021 return r600_bytecode_add_alu(m_bc, &alu) == 0;
1022 }
1023
1024 bool AssemblyFromShaderLegacyImpl::emit_ldsread(const LDSReadInstruction& instr)
1025 {
1026 int r;
1027 unsigned nread = 0;
1028 unsigned nfetch = 0;
1029 unsigned n_values = instr.num_values();
1030
1031 r600_bytecode_alu alu_fetch;
1032 r600_bytecode_alu alu_read;
1033
1034 /* We must add a new ALU clause if the fetch and read op would be split otherwise
1035 * r600_asm limites at 120 slots = 240 dwords */
1036 if (m_bc->cf_last->ndw > 240 - 4 * n_values)
1037 m_bc->force_add_cf = 1;
1038
1039 while (nread < n_values) {
1040 if (nfetch < n_values) {
1041 memset(&alu_fetch, 0, sizeof(r600_bytecode_alu));
1042 alu_fetch.is_lds_idx_op = true;
1043 alu_fetch.op = LDS_OP1_LDS_READ_RET;
1044
1045 copy_src(alu_fetch.src[0], instr.address(nfetch));
1046 alu_fetch.src[1].sel = V_SQ_ALU_SRC_0;
1047 alu_fetch.src[2].sel = V_SQ_ALU_SRC_0;
1048 alu_fetch.last = 1;
1049 r = r600_bytecode_add_alu(m_bc, &alu_fetch);
1050 m_bc->cf_last->nlds_read++;
1051 if (r)
1052 return false;
1053 }
1054
1055 if (nfetch >= n_values) {
1056 memset(&alu_read, 0, sizeof(r600_bytecode_alu));
1057 copy_dst(alu_read.dst, instr.dest(nread));
1058 alu_read.op = ALU_OP1_MOV;
1059 alu_read.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
1060 alu_read.last = 1;
1061 alu_read.dst.write = 1;
1062 r = r600_bytecode_add_alu(m_bc, &alu_read);
1063 m_bc->cf_last->nqueue_read++;
1064 if (r)
1065 return false;
1066 ++nread;
1067 }
1068 ++nfetch;
1069 }
1070 assert(m_bc->cf_last->nlds_read == m_bc->cf_last->nqueue_read);
1071
1072 return true;
1073 }
1074
1075 bool AssemblyFromShaderLegacyImpl::emit_ldsatomic(const LDSAtomicInstruction& instr)
1076 {
1077 if (m_bc->cf_last->ndw > 240 - 4)
1078 m_bc->force_add_cf = 1;
1079
1080 r600_bytecode_alu alu_fetch;
1081 r600_bytecode_alu alu_read;
1082
1083 memset(&alu_fetch, 0, sizeof(r600_bytecode_alu));
1084 alu_fetch.is_lds_idx_op = true;
1085 alu_fetch.op = instr.op();
1086
1087 copy_src(alu_fetch.src[0], instr.address());
1088 auto& src0 = instr.src0();
1089 alu_fetch.src[1].sel = src0.sel();
1090 alu_fetch.src[1].chan = src0.chan();
1091 if (instr.src1()) {
1092 auto& src1 = *instr.src1();
1093 alu_fetch.src[2].sel = src1.sel();
1094 alu_fetch.src[2].chan = src1.chan();
1095 }
1096 alu_fetch.last = 1;
1097 int r = r600_bytecode_add_alu(m_bc, &alu_fetch);
1098 if (r)
1099 return false;
1100
1101 memset(&alu_read, 0, sizeof(r600_bytecode_alu));
1102 copy_dst(alu_read.dst, instr.dest());
1103 alu_read.op = ALU_OP1_MOV;
1104 alu_read.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
1105 alu_read.last = 1;
1106 alu_read.dst.write = 1;
1107 r = r600_bytecode_add_alu(m_bc, &alu_read);
1108 if (r)
1109 return false;
1110 return true;
1111 }
1112
1113 bool AssemblyFromShaderLegacyImpl::emit_rat(const RatInstruction& instr)
1114 {
1115 struct r600_bytecode_gds gds;
1116
1117 int rat_idx = instr.rat_id();
1118 EBufferIndexMode rat_index_mode = bim_none;
1119 auto addr = instr.rat_id_offset();
1120
1121 if (addr) {
1122 if (addr->type() != Value::literal) {
1123 rat_index_mode = bim_one;
1124 if (!m_bc->index_loaded[1] || m_loop_nesting ||
1125 m_bc->index_reg[1] != addr->sel()
1126 || m_bc->index_reg_chan[1] != addr->chan()) {
1127 struct r600_bytecode_alu alu;
1128
1129 memset(&alu, 0, sizeof(alu));
1130 alu.op = opcode_map.at(op1_mova_int);
1131 alu.dst.chan = 0;
1132 alu.src[0].sel = addr->sel();
1133 alu.src[0].chan = addr->chan();
1134 alu.last = 1;
1135 int r = r600_bytecode_add_alu(m_bc, &alu);
1136 if (r)
1137 return false;
1138
1139 m_bc->ar_loaded = 0;
1140
1141 alu.op = opcode_map.at(op1_set_cf_idx1);
1142 alu.dst.chan = 0;
1143 alu.src[0].sel = 0;
1144 alu.src[0].chan = 0;
1145 alu.last = 1;
1146
1147 r = r600_bytecode_add_alu(m_bc, &alu);
1148 if (r)
1149 return false;
1150
1151 m_bc->index_reg[1] = addr->sel();
1152 m_bc->index_reg_chan[1] = addr->chan();
1153 m_bc->index_loaded[1] = true;
1154
1155 }
1156 } else {
1157 const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
1158 rat_idx += addr_reg.value();
1159 }
1160 }
1161 memset(&gds, 0, sizeof(struct r600_bytecode_gds));
1162
1163 r600_bytecode_add_cfinst(m_bc, CF_OP_MEM_RAT);
1164 auto cf = m_bc->cf_last;
1165 cf->rat.id = rat_idx + m_shader->rat_base;
1166 cf->rat.inst = instr.rat_op();
1167 cf->rat.index_mode = rat_index_mode;
1168 cf->output.type = instr.need_ack() ? 3 : 1;
1169 cf->output.gpr = instr.data_gpr();
1170 cf->output.index_gpr = instr.index_gpr();
1171 cf->output.comp_mask = instr.comp_mask();
1172 cf->output.burst_count = instr.burst_count();
1173 assert(instr.data_swz(0) == PIPE_SWIZZLE_X);
1174 if (cf->rat.inst != RatInstruction::STORE_TYPED) {
1175 assert(instr.data_swz(1) == PIPE_SWIZZLE_Y ||
1176 instr.data_swz(1) == PIPE_SWIZZLE_MAX) ;
1177 assert(instr.data_swz(2) == PIPE_SWIZZLE_Z ||
1178 instr.data_swz(2) == PIPE_SWIZZLE_MAX) ;
1179 }
1180
1181 cf->vpm = 1;
1182 cf->barrier = 1;
1183 cf->mark = instr.need_ack();
1184 cf->output.elem_size = instr.elm_size();
1185 return true;
1186 }
1187
1188 bool AssemblyFromShaderLegacyImpl::copy_dst(r600_bytecode_alu_dst& dst,
1189 const Value& d)
1190 {
1191 assert(d.type() == Value::gpr || d.type() == Value::gpr_array_value);
1192
1193 if (d.sel() > 124) {
1194 R600_ERR("shader_from_nir: Don't support more then 124 GPRs, but try using %d\n", d.sel());
1195 return false;
1196 }
1197
1198 dst.sel = d.sel();
1199 dst.chan = d.chan();
1200
1201 if (m_bc->index_reg[1] == dst.sel &&
1202 m_bc->index_reg_chan[1] == dst.chan)
1203 m_bc->index_loaded[1] = false;
1204
1205 if (m_bc->index_reg[0] == dst.sel &&
1206 m_bc->index_reg_chan[0] == dst.chan)
1207 m_bc->index_loaded[0] = false;
1208
1209 return true;
1210 }
1211
1212 bool AssemblyFromShaderLegacyImpl::copy_src(r600_bytecode_alu_src& src, const Value& s)
1213 {
1214
1215 if (s.type() == Value::gpr && s.sel() > 124) {
1216 R600_ERR("shader_from_nir: Don't support more then 124 GPRs, try using %d\n", s.sel());
1217 return false;
1218 }
1219
1220 if (s.type() == Value::lds_direct) {
1221 R600_ERR("shader_from_nir: LDS_DIRECT values not supported\n");
1222 return false;
1223 }
1224
1225 if (s.type() == Value::kconst && s.sel() < 512) {
1226 R600_ERR("shader_from_nir: Uniforms should have values >= 512, got %d \n", s.sel());
1227 return false;
1228 }
1229
1230 if (s.type() == Value::literal) {
1231 auto& v = static_cast<const LiteralValue&>(s);
1232 if (v.value() == 0) {
1233 src.sel = ALU_SRC_0;
1234 src.chan = 0;
1235 --m_nliterals_in_group;
1236 return true;
1237 }
1238 if (v.value() == 1) {
1239 src.sel = ALU_SRC_1_INT;
1240 src.chan = 0;
1241 --m_nliterals_in_group;
1242 return true;
1243 }
1244 if (v.value_float() == 1.0f) {
1245 src.sel = ALU_SRC_1;
1246 src.chan = 0;
1247 --m_nliterals_in_group;
1248 return true;
1249 }
1250 if (v.value_float() == 0.5f) {
1251 src.sel = ALU_SRC_0_5;
1252 src.chan = 0;
1253 --m_nliterals_in_group;
1254 return true;
1255 }
1256 if (v.value() == 0xffffffff) {
1257 src.sel = ALU_SRC_M_1_INT;
1258 src.chan = 0;
1259 --m_nliterals_in_group;
1260 return true;
1261 }
1262 src.value = v.value();
1263 }
1264
1265 src.sel = s.sel();
1266 src.chan = s.chan();
1267 if (s.type() == Value::kconst) {
1268 const UniformValue& cv = static_cast<const UniformValue&>(s);
1269 src.kc_bank = cv.kcache_bank();
1270 }
1271
1272 return true;
1273 }
1274
1275 const std::map<EAluOp, int> opcode_map = {
1276
1277 {op2_add, ALU_OP2_ADD},
1278 {op2_mul, ALU_OP2_MUL},
1279 {op2_mul_ieee, ALU_OP2_MUL_IEEE},
1280 {op2_max, ALU_OP2_MAX},
1281 {op2_min, ALU_OP2_MIN},
1282 {op2_max_dx10, ALU_OP2_MAX_DX10},
1283 {op2_min_dx10, ALU_OP2_MIN_DX10},
1284 {op2_sete, ALU_OP2_SETE},
1285 {op2_setgt, ALU_OP2_SETGT},
1286 {op2_setge, ALU_OP2_SETGE},
1287 {op2_setne, ALU_OP2_SETNE},
1288 {op2_sete_dx10, ALU_OP2_SETE_DX10},
1289 {op2_setgt_dx10, ALU_OP2_SETGT_DX10},
1290 {op2_setge_dx10, ALU_OP2_SETGE_DX10},
1291 {op2_setne_dx10, ALU_OP2_SETNE_DX10},
1292 {op1_fract, ALU_OP1_FRACT},
1293 {op1_trunc, ALU_OP1_TRUNC},
1294 {op1_ceil, ALU_OP1_CEIL},
1295 {op1_rndne, ALU_OP1_RNDNE},
1296 {op1_floor, ALU_OP1_FLOOR},
1297 {op2_ashr_int, ALU_OP2_ASHR_INT},
1298 {op2_lshr_int, ALU_OP2_LSHR_INT},
1299 {op2_lshl_int, ALU_OP2_LSHL_INT},
1300 {op1_mov, ALU_OP1_MOV},
1301 {op0_nop, ALU_OP0_NOP},
1302 {op2_mul_64, ALU_OP2_MUL_64},
1303 {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
1304 {op1v_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64},
1305 {op2_pred_setgt_uint, ALU_OP2_PRED_SETGT_UINT},
1306 {op2_pred_setge_uint, ALU_OP2_PRED_SETGE_UINT},
1307 {op2_pred_sete, ALU_OP2_PRED_SETE},
1308 {op2_pred_setgt, ALU_OP2_PRED_SETGT},
1309 {op2_pred_setge, ALU_OP2_PRED_SETGE},
1310 {op2_pred_setne, ALU_OP2_PRED_SETNE},
1311 //{op2_pred_set_inv, ALU_OP2_PRED_SET},
1312 //{op2_pred_set_clr, ALU_OP2_PRED_SET_CRL},
1313 //{op2_pred_set_restore, ALU_OP2_PRED_SET_RESTORE},
1314 {op2_pred_sete_push, ALU_OP2_PRED_SETE_PUSH},
1315 {op2_pred_setgt_push, ALU_OP2_PRED_SETGT_PUSH},
1316 {op2_pred_setge_push, ALU_OP2_PRED_SETGE_PUSH},
1317 {op2_pred_setne_push, ALU_OP2_PRED_SETNE_PUSH},
1318 {op2_kille, ALU_OP2_KILLE},
1319 {op2_killgt, ALU_OP2_KILLGT},
1320 {op2_killge, ALU_OP2_KILLGE},
1321 {op2_killne, ALU_OP2_KILLNE},
1322 {op2_and_int, ALU_OP2_AND_INT},
1323 {op2_or_int, ALU_OP2_OR_INT},
1324 {op2_xor_int, ALU_OP2_XOR_INT},
1325 {op1_not_int, ALU_OP1_NOT_INT},
1326 {op2_add_int, ALU_OP2_ADD_INT},
1327 {op2_sub_int, ALU_OP2_SUB_INT},
1328 {op2_max_int, ALU_OP2_MAX_INT},
1329 {op2_min_int, ALU_OP2_MIN_INT},
1330 {op2_max_uint, ALU_OP2_MAX_UINT},
1331 {op2_min_uint, ALU_OP2_MIN_UINT},
1332 {op2_sete_int, ALU_OP2_SETE_INT},
1333 {op2_setgt_int, ALU_OP2_SETGT_INT},
1334 {op2_setge_int, ALU_OP2_SETGE_INT},
1335 {op2_setne_int, ALU_OP2_SETNE_INT},
1336 {op2_setgt_uint, ALU_OP2_SETGT_UINT},
1337 {op2_setge_uint, ALU_OP2_SETGE_UINT},
1338 {op2_killgt_uint, ALU_OP2_KILLGT_UINT},
1339 {op2_killge_uint, ALU_OP2_KILLGE_UINT},
1340 //p2_prede_int, ALU_OP2_PREDE_INT},
1341 {op2_pred_setgt_int, ALU_OP2_PRED_SETGT_INT},
1342 {op2_pred_setge_int, ALU_OP2_PRED_SETGE_INT},
1343 {op2_pred_setne_int, ALU_OP2_PRED_SETNE_INT},
1344 {op2_kille_int, ALU_OP2_KILLE_INT},
1345 {op2_killgt_int, ALU_OP2_KILLGT_INT},
1346 {op2_killge_int, ALU_OP2_KILLGE_INT},
1347 {op2_killne_int, ALU_OP2_KILLNE_INT},
1348 {op2_pred_sete_push_int, ALU_OP2_PRED_SETE_PUSH_INT},
1349 {op2_pred_setgt_push_int, ALU_OP2_PRED_SETGT_PUSH_INT},
1350 {op2_pred_setge_push_int, ALU_OP2_PRED_SETGE_PUSH_INT},
1351 {op2_pred_setne_push_int, ALU_OP2_PRED_SETNE_PUSH_INT},
1352 {op2_pred_setlt_push_int, ALU_OP2_PRED_SETLT_PUSH_INT},
1353 {op2_pred_setle_push_int, ALU_OP2_PRED_SETLE_PUSH_INT},
1354 {op1_flt_to_int, ALU_OP1_FLT_TO_INT},
1355 {op1_bfrev_int, ALU_OP1_BFREV_INT},
1356 {op2_addc_uint, ALU_OP2_ADDC_UINT},
1357 {op2_subb_uint, ALU_OP2_SUBB_UINT},
1358 {op0_group_barrier, ALU_OP0_GROUP_BARRIER},
1359 {op0_group_seq_begin, ALU_OP0_GROUP_SEQ_BEGIN},
1360 {op0_group_seq_end, ALU_OP0_GROUP_SEQ_END},
1361 {op2_set_mode, ALU_OP2_SET_MODE},
1362 {op1_set_cf_idx0, ALU_OP0_SET_CF_IDX0},
1363 {op1_set_cf_idx1, ALU_OP0_SET_CF_IDX1},
1364 {op2_set_lds_size, ALU_OP2_SET_LDS_SIZE},
1365 {op1_exp_ieee, ALU_OP1_EXP_IEEE},
1366 {op1_log_clamped, ALU_OP1_LOG_CLAMPED},
1367 {op1_log_ieee, ALU_OP1_LOG_IEEE},
1368 {op1_recip_clamped, ALU_OP1_RECIP_CLAMPED},
1369 {op1_recip_ff, ALU_OP1_RECIP_FF},
1370 {op1_recip_ieee, ALU_OP1_RECIP_IEEE},
1371 {op1_recipsqrt_clamped, ALU_OP1_RECIPSQRT_CLAMPED},
1372 {op1_recipsqrt_ff, ALU_OP1_RECIPSQRT_FF},
1373 {op1_recipsqrt_ieee1, ALU_OP1_RECIPSQRT_IEEE},
1374 {op1_sqrt_ieee, ALU_OP1_SQRT_IEEE},
1375 {op1_sin, ALU_OP1_SIN},
1376 {op1_cos, ALU_OP1_COS},
1377 {op2_mullo_int, ALU_OP2_MULLO_INT},
1378 {op2_mulhi_int, ALU_OP2_MULHI_INT},
1379 {op2_mullo_uint, ALU_OP2_MULLO_UINT},
1380 {op2_mulhi_uint, ALU_OP2_MULHI_UINT},
1381 {op1_recip_int, ALU_OP1_RECIP_INT},
1382 {op1_recip_uint, ALU_OP1_RECIP_UINT},
1383 {op1_recip_64, ALU_OP2_RECIP_64},
1384 {op1_recip_clamped_64, ALU_OP2_RECIP_CLAMPED_64},
1385 {op1_recipsqrt_64, ALU_OP2_RECIPSQRT_64},
1386 {op1_recipsqrt_clamped_64, ALU_OP2_RECIPSQRT_CLAMPED_64},
1387 {op1_sqrt_64, ALU_OP2_SQRT_64},
1388 {op1_flt_to_uint, ALU_OP1_FLT_TO_UINT},
1389 {op1_int_to_flt, ALU_OP1_INT_TO_FLT},
1390 {op1_uint_to_flt, ALU_OP1_UINT_TO_FLT},
1391 {op2_bfm_int, ALU_OP2_BFM_INT},
1392 {op1_flt32_to_flt16, ALU_OP1_FLT32_TO_FLT16},
1393 {op1_flt16_to_flt32, ALU_OP1_FLT16_TO_FLT32},
1394 {op1_ubyte0_flt, ALU_OP1_UBYTE0_FLT},
1395 {op1_ubyte1_flt, ALU_OP1_UBYTE1_FLT},
1396 {op1_ubyte2_flt, ALU_OP1_UBYTE2_FLT},
1397 {op1_ubyte3_flt, ALU_OP1_UBYTE3_FLT},
1398 {op1_bcnt_int, ALU_OP1_BCNT_INT},
1399 {op1_ffbh_uint, ALU_OP1_FFBH_UINT},
1400 {op1_ffbl_int, ALU_OP1_FFBL_INT},
1401 {op1_ffbh_int, ALU_OP1_FFBH_INT},
1402 {op1_flt_to_uint4, ALU_OP1_FLT_TO_UINT4},
1403 {op2_dot_ieee, ALU_OP2_DOT_IEEE},
1404 {op1_flt_to_int_rpi, ALU_OP1_FLT_TO_INT_RPI},
1405 {op1_flt_to_int_floor, ALU_OP1_FLT_TO_INT_FLOOR},
1406 {op2_mulhi_uint24, ALU_OP2_MULHI_UINT24},
1407 {op1_mbcnt_32hi_int, ALU_OP1_MBCNT_32HI_INT},
1408 {op1_offset_to_flt, ALU_OP1_OFFSET_TO_FLT},
1409 {op2_mul_uint24, ALU_OP2_MUL_UINT24},
1410 {op1_bcnt_accum_prev_int, ALU_OP1_BCNT_ACCUM_PREV_INT},
1411 {op1_mbcnt_32lo_accum_prev_int, ALU_OP1_MBCNT_32LO_ACCUM_PREV_INT},
1412 {op2_sete_64, ALU_OP2_SETE_64},
1413 {op2_setne_64, ALU_OP2_SETNE_64},
1414 {op2_setgt_64, ALU_OP2_SETGT_64},
1415 {op2_setge_64, ALU_OP2_SETGE_64},
1416 {op2_min_64, ALU_OP2_MIN_64},
1417 {op2_max_64, ALU_OP2_MAX_64},
1418 {op2_dot4, ALU_OP2_DOT4},
1419 {op2_dot4_ieee, ALU_OP2_DOT4_IEEE},
1420 {op2_cube, ALU_OP2_CUBE},
1421 {op1_max4, ALU_OP1_MAX4},
1422 {op1_frexp_64, ALU_OP1_FREXP_64},
1423 {op1_ldexp_64, ALU_OP2_LDEXP_64},
1424 {op1_fract_64, ALU_OP1_FRACT_64},
1425 {op2_pred_setgt_64, ALU_OP2_PRED_SETGT_64},
1426 {op2_pred_sete_64, ALU_OP2_PRED_SETE_64},
1427 {op2_pred_setge_64, ALU_OP2_PRED_SETGE_64},
1428 {op2_add_64, ALU_OP2_ADD_64},
1429 {op1_mova_int, ALU_OP1_MOVA_INT},
1430 {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
1431 {op1_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64},
1432 {op2_sad_accum_prev_uint, ALU_OP2_SAD_ACCUM_PREV_UINT},
1433 {op2_dot, ALU_OP2_DOT},
1434 //p2_mul_prev, ALU_OP2_MUL_PREV},
1435 //p2_mul_ieee_prev, ALU_OP2_MUL_IEEE_PREV},
1436 //p2_add_prev, ALU_OP2_ADD_PREV},
1437 {op2_muladd_prev, ALU_OP2_MULADD_PREV},
1438 {op2_muladd_ieee_prev, ALU_OP2_MULADD_IEEE_PREV},
1439 {op2_interp_xy, ALU_OP2_INTERP_XY},
1440 {op2_interp_zw, ALU_OP2_INTERP_ZW},
1441 {op2_interp_x, ALU_OP2_INTERP_X},
1442 {op2_interp_z, ALU_OP2_INTERP_Z},
1443 {op0_store_flags, ALU_OP1_STORE_FLAGS},
1444 {op1_load_store_flags, ALU_OP1_LOAD_STORE_FLAGS},
1445 {op0_lds_1a, ALU_OP2_LDS_1A},
1446 {op0_lds_1a1d, ALU_OP2_LDS_1A1D},
1447 {op0_lds_2a, ALU_OP2_LDS_2A},
1448 {op1_interp_load_p0, ALU_OP1_INTERP_LOAD_P0},
1449 {op1_interp_load_p10, ALU_OP1_INTERP_LOAD_P10},
1450 {op1_interp_load_p20, ALU_OP1_INTERP_LOAD_P20},
1451 // {op 3 all left shift 6
1452 {op3_bfe_uint, ALU_OP3_BFE_UINT},
1453 {op3_bfe_int, ALU_OP3_BFE_INT},
1454 {op3_bfi_int, ALU_OP3_BFI_INT},
1455 {op3_fma, ALU_OP3_FMA},
1456 {op3_cndne_64, ALU_OP3_CNDNE_64},
1457 {op3_fma_64, ALU_OP3_FMA_64},
1458 {op3_lerp_uint, ALU_OP3_LERP_UINT},
1459 {op3_bit_align_int, ALU_OP3_BIT_ALIGN_INT},
1460 {op3_byte_align_int, ALU_OP3_BYTE_ALIGN_INT},
1461 {op3_sad_accum_uint, ALU_OP3_SAD_ACCUM_UINT},
1462 {op3_sad_accum_hi_uint, ALU_OP3_SAD_ACCUM_HI_UINT},
1463 {op3_muladd_uint24, ALU_OP3_MULADD_UINT24},
1464 {op3_lds_idx_op, ALU_OP3_LDS_IDX_OP},
1465 {op3_muladd, ALU_OP3_MULADD},
1466 {op3_muladd_m2, ALU_OP3_MULADD_M2},
1467 {op3_muladd_m4, ALU_OP3_MULADD_M4},
1468 {op3_muladd_d2, ALU_OP3_MULADD_D2},
1469 {op3_muladd_ieee, ALU_OP3_MULADD_IEEE},
1470 {op3_cnde, ALU_OP3_CNDE},
1471 {op3_cndgt, ALU_OP3_CNDGT},
1472 {op3_cndge, ALU_OP3_CNDGE},
1473 {op3_cnde_int, ALU_OP3_CNDE_INT},
1474 {op3_cndgt_int, ALU_OP3_CNDGT_INT},
1475 {op3_cndge_int, ALU_OP3_CNDGE_INT},
1476 {op3_mul_lit, ALU_OP3_MUL_LIT},
1477 };
1478
1479 const std::map<ESDOp, int> ds_opcode_map = {
1480 {DS_OP_ADD, FETCH_OP_GDS_ADD},
1481 {DS_OP_SUB, FETCH_OP_GDS_SUB},
1482 {DS_OP_RSUB, FETCH_OP_GDS_RSUB},
1483 {DS_OP_INC, FETCH_OP_GDS_INC},
1484 {DS_OP_DEC, FETCH_OP_GDS_DEC},
1485 {DS_OP_MIN_INT, FETCH_OP_GDS_MIN_INT},
1486 {DS_OP_MAX_INT, FETCH_OP_GDS_MAX_INT},
1487 {DS_OP_MIN_UINT, FETCH_OP_GDS_MIN_UINT},
1488 {DS_OP_MAX_UINT, FETCH_OP_GDS_MAX_UINT},
1489 {DS_OP_AND, FETCH_OP_GDS_AND},
1490 {DS_OP_OR, FETCH_OP_GDS_OR},
1491 {DS_OP_XOR, FETCH_OP_GDS_XOR},
1492 {DS_OP_MSKOR, FETCH_OP_GDS_MSKOR},
1493 {DS_OP_WRITE, FETCH_OP_GDS_WRITE},
1494 {DS_OP_WRITE_REL, FETCH_OP_GDS_WRITE_REL},
1495 {DS_OP_WRITE2, FETCH_OP_GDS_WRITE2},
1496 {DS_OP_CMP_STORE, FETCH_OP_GDS_CMP_STORE},
1497 {DS_OP_CMP_STORE_SPF, FETCH_OP_GDS_CMP_STORE_SPF},
1498 {DS_OP_BYTE_WRITE, FETCH_OP_GDS_BYTE_WRITE},
1499 {DS_OP_SHORT_WRITE, FETCH_OP_GDS_SHORT_WRITE},
1500 {DS_OP_ADD_RET, FETCH_OP_GDS_ADD_RET},
1501 {DS_OP_SUB_RET, FETCH_OP_GDS_SUB_RET},
1502 {DS_OP_RSUB_RET, FETCH_OP_GDS_RSUB_RET},
1503 {DS_OP_INC_RET, FETCH_OP_GDS_INC_RET},
1504 {DS_OP_DEC_RET, FETCH_OP_GDS_DEC_RET},
1505 {DS_OP_MIN_INT_RET, FETCH_OP_GDS_MIN_INT_RET},
1506 {DS_OP_MAX_INT_RET, FETCH_OP_GDS_MAX_INT_RET},
1507 {DS_OP_MIN_UINT_RET, FETCH_OP_GDS_MIN_UINT_RET},
1508 {DS_OP_MAX_UINT_RET, FETCH_OP_GDS_MAX_UINT_RET},
1509 {DS_OP_AND_RET, FETCH_OP_GDS_AND_RET},
1510 {DS_OP_OR_RET, FETCH_OP_GDS_OR_RET},
1511 {DS_OP_XOR_RET, FETCH_OP_GDS_XOR_RET},
1512 {DS_OP_MSKOR_RET, FETCH_OP_GDS_MSKOR_RET},
1513 {DS_OP_XCHG_RET, FETCH_OP_GDS_XCHG_RET},
1514 {DS_OP_XCHG_REL_RET, FETCH_OP_GDS_XCHG_REL_RET},
1515 {DS_OP_XCHG2_RET, FETCH_OP_GDS_XCHG2_RET},
1516 {DS_OP_CMP_XCHG_RET, FETCH_OP_GDS_CMP_XCHG_RET},
1517 {DS_OP_CMP_XCHG_SPF_RET, FETCH_OP_GDS_CMP_XCHG_SPF_RET},
1518 {DS_OP_READ_RET, FETCH_OP_GDS_READ_RET},
1519 {DS_OP_READ_REL_RET, FETCH_OP_GDS_READ_REL_RET},
1520 {DS_OP_READ2_RET, FETCH_OP_GDS_READ2_RET},
1521 {DS_OP_READWRITE_RET, FETCH_OP_GDS_READWRITE_RET},
1522 {DS_OP_BYTE_READ_RET, FETCH_OP_GDS_BYTE_READ_RET},
1523 {DS_OP_UBYTE_READ_RET, FETCH_OP_GDS_UBYTE_READ_RET},
1524 {DS_OP_SHORT_READ_RET, FETCH_OP_GDS_SHORT_READ_RET},
1525 {DS_OP_USHORT_READ_RET, FETCH_OP_GDS_USHORT_READ_RET},
1526 {DS_OP_ATOMIC_ORDERED_ALLOC_RET, FETCH_OP_GDS_ATOMIC_ORDERED_ALLOC},
1527 {DS_OP_INVALID, 0},
1528 };
1529
1530 }