r600/sfn: Count only literals that are not inline to split instruction groups
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_ir_to_assembly.cpp
1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_ir_to_assembly.h"
28 #include "sfn_conditionaljumptracker.h"
29 #include "sfn_callstack.h"
30 #include "sfn_instruction_gds.h"
31 #include "sfn_instruction_misc.h"
32 #include "sfn_instruction_fetch.h"
33
34 #include "../r600_shader.h"
35 #include "../r600_sq.h"
36
37 namespace r600 {
38
39 using std::vector;
40
41 struct AssemblyFromShaderLegacyImpl {
42
43 AssemblyFromShaderLegacyImpl(r600_shader *sh, r600_shader_key *key);
44 bool emit(const Instruction::Pointer i);
45 void reset_addr_register() {m_last_addr.reset();}
46
47 private:
48 bool emit_alu(const AluInstruction& ai, ECFAluOpCode cf_op);
49 bool emit_export(const ExportInstruction & exi);
50 bool emit_streamout(const StreamOutIntruction& instr);
51 bool emit_memringwrite(const MemRingOutIntruction& instr);
52 bool emit_tex(const TexInstruction & tex_instr);
53 bool emit_vtx(const FetchInstruction& fetch_instr);
54 bool emit_if_start(const IfInstruction & if_instr);
55 bool emit_else(const ElseInstruction & else_instr);
56 bool emit_endif(const IfElseEndInstruction & endif_instr);
57 bool emit_emit_vertex(const EmitVertex &instr);
58
59 bool emit_loop_begin(const LoopBeginInstruction& instr);
60 bool emit_loop_end(const LoopEndInstruction& instr);
61 bool emit_loop_break(const LoopBreakInstruction& instr);
62 bool emit_loop_continue(const LoopContInstruction& instr);
63 bool emit_wait_ack(const WaitAck& instr);
64 bool emit_wr_scratch(const WriteScratchInstruction& instr);
65 bool emit_gds(const GDSInstr& instr);
66 bool emit_rat(const RatInstruction& instr);
67
68 bool emit_load_addr(PValue addr);
69 bool emit_fs_pixel_export(const ExportInstruction & exi);
70 bool emit_vs_pos_export(const ExportInstruction & exi);
71 bool emit_vs_param_export(const ExportInstruction & exi);
72 bool copy_dst(r600_bytecode_alu_dst& dst, const Value& src);
73 bool copy_src(r600_bytecode_alu_src& src, const Value& s);
74
75
76
77 ConditionalJumpTracker m_jump_tracker;
78 CallStack m_callstack;
79
80 public:
81 r600_bytecode *m_bc;
82 r600_shader *m_shader;
83 r600_shader_key *m_key;
84 r600_bytecode_output m_output;
85 unsigned m_max_color_exports;
86 bool has_pos_output;
87 bool has_param_output;
88 PValue m_last_addr;
89 int m_loop_nesting;
90 int m_nliterals_in_group;
91 std::set<int> vtx_fetch_results;
92 };
93
94
95 AssemblyFromShaderLegacy::AssemblyFromShaderLegacy(struct r600_shader *sh,
96 r600_shader_key *key)
97 {
98 impl = new AssemblyFromShaderLegacyImpl(sh, key);
99 }
100
101 AssemblyFromShaderLegacy::~AssemblyFromShaderLegacy()
102 {
103 delete impl;
104 }
105
106 bool AssemblyFromShaderLegacy::do_lower(const std::vector<InstructionBlock>& ir)
107 {
108 if (impl->m_shader->processor_type == PIPE_SHADER_VERTEX &&
109 impl->m_shader->ninput > 0)
110 r600_bytecode_add_cfinst(impl->m_bc, CF_OP_CALL_FS);
111
112
113 std::vector<Instruction::Pointer> exports;
114
115 for (const auto& block : ir) {
116 for (const auto& i : block) {
117 if (!impl->emit(i))
118 return false;
119 if (i->type() != Instruction::alu)
120 impl->reset_addr_register();
121 }
122 }
123 /*
124 for (const auto& i : exports) {
125 if (!impl->emit_export(static_cast<const ExportInstruction&>(*i)))
126 return false;
127 }*/
128
129
130 const struct cf_op_info *last = nullptr;
131 if (impl->m_bc->cf_last)
132 last = r600_isa_cf(impl->m_bc->cf_last->op);
133
134 /* alu clause instructions don't have EOP bit, so add NOP */
135 if (!last || last->flags & CF_ALU || impl->m_bc->cf_last->op == CF_OP_LOOP_END
136 || impl->m_bc->cf_last->op == CF_OP_POP)
137 r600_bytecode_add_cfinst(impl->m_bc, CF_OP_NOP);
138
139 /* A fetch shader only can't be EOP (results in hang), but we can replace it
140 * by a NOP */
141 else if (impl->m_bc->cf_last->op == CF_OP_CALL_FS)
142 impl->m_bc->cf_last->op = CF_OP_NOP;
143
144 impl->m_bc->cf_last->end_of_program = 1;
145
146 return true;
147 }
148
149 bool AssemblyFromShaderLegacyImpl::emit(const Instruction::Pointer i)
150 {
151 if (i->type() != Instruction::vtx)
152 vtx_fetch_results.clear();
153
154 sfn_log << SfnLog::assembly << "Emit from '" << *i << "\n";
155 switch (i->type()) {
156 case Instruction::alu:
157 return emit_alu(static_cast<const AluInstruction&>(*i), cf_alu_undefined);
158 case Instruction::exprt:
159 return emit_export(static_cast<const ExportInstruction&>(*i));
160 case Instruction::tex:
161 return emit_tex(static_cast<const TexInstruction&>(*i));
162 case Instruction::vtx:
163 return emit_vtx(static_cast<const FetchInstruction&>(*i));
164 case Instruction::cond_if:
165 return emit_if_start(static_cast<const IfInstruction&>(*i));
166 case Instruction::cond_else:
167 return emit_else(static_cast<const ElseInstruction&>(*i));
168 case Instruction::cond_endif:
169 return emit_endif(static_cast<const IfElseEndInstruction&>(*i));
170 case Instruction::loop_begin:
171 return emit_loop_begin(static_cast<const LoopBeginInstruction&>(*i));
172 case Instruction::loop_end:
173 return emit_loop_end(static_cast<const LoopEndInstruction&>(*i));
174 case Instruction::loop_break:
175 return emit_loop_break(static_cast<const LoopBreakInstruction&>(*i));
176 case Instruction::loop_continue:
177 return emit_loop_continue(static_cast<const LoopContInstruction&>(*i));
178 case Instruction::streamout:
179 return emit_streamout(static_cast<const StreamOutIntruction&>(*i));
180 case Instruction::ring:
181 return emit_memringwrite(static_cast<const MemRingOutIntruction&>(*i));
182 case Instruction::emit_vtx:
183 return emit_emit_vertex(static_cast<const EmitVertex&>(*i));
184 case Instruction::wait_ack:
185 return emit_wait_ack(static_cast<const WaitAck&>(*i));
186 case Instruction::mem_wr_scratch:
187 return emit_wr_scratch(static_cast<const WriteScratchInstruction&>(*i));
188 case Instruction::gds:
189 return emit_gds(static_cast<const GDSInstr&>(*i));
190 case Instruction::rat:
191 return emit_rat(static_cast<const RatInstruction&>(*i));
192 default:
193 return false;
194 }
195 }
196
197 AssemblyFromShaderLegacyImpl::AssemblyFromShaderLegacyImpl(r600_shader *sh,
198 r600_shader_key *key):
199 m_callstack(sh->bc),
200 m_bc(&sh->bc),
201 m_shader(sh),
202 m_key(key),
203 has_pos_output(false),
204 has_param_output(false),
205 m_loop_nesting(0),
206 m_nliterals_in_group(0)
207 {
208 m_max_color_exports = MAX2(m_key->ps.nr_cbufs, 1);
209 }
210
211 extern const std::map<EAluOp, int> opcode_map;
212
213 bool AssemblyFromShaderLegacyImpl::emit_load_addr(PValue addr)
214 {
215 m_bc->ar_reg = addr->sel();
216 m_bc->ar_chan = addr->chan();
217 m_bc->ar_loaded = 0;
218 m_last_addr = addr;
219
220 sfn_log << SfnLog::assembly << " Prepare " << *addr << " to address register\n";
221
222 return true;
223 }
224
225 bool AssemblyFromShaderLegacyImpl::emit_alu(const AluInstruction& ai, ECFAluOpCode cf_op)
226 {
227
228 struct r600_bytecode_alu alu;
229 memset(&alu, 0, sizeof(alu));
230 PValue addr_in_use;
231
232 if (opcode_map.find(ai.opcode()) == opcode_map.end()) {
233 std::cerr << "Opcode not handled for " << ai <<"\n";
234 return false;
235 }
236
237 for (unsigned i = 0; i < ai.n_sources(); ++i) {
238 auto& s = ai.src(i);
239 if (s.type() == Value::literal)
240 ++m_nliterals_in_group;
241 }
242
243 /* This instruction group would exeed the limit of literals, so
244 * force a new instruction group by adding a NOP as last
245 * instruction. This will no loner be needed with a real
246 * scheduler */
247 if (m_nliterals_in_group > 4) {
248 sfn_log << SfnLog::assembly << " Have " << m_nliterals_in_group << " inject a last op (nop)\n";
249 alu.op = ALU_OP0_NOP;
250 alu.last = 1;
251 int retval = r600_bytecode_add_alu(m_bc, &alu);
252 if (retval)
253 return false;
254 memset(&alu, 0, sizeof(alu));
255 m_nliterals_in_group = 0;
256 }
257
258 alu.op = opcode_map.at(ai.opcode());
259
260 /* Missing test whether ai actually has a dest */
261 auto dst = ai.dest();
262
263 if (dst) {
264 if (!copy_dst(alu.dst, *dst))
265 return false;
266
267 alu.dst.write = ai.flag(alu_write);
268 alu.dst.clamp = ai.flag(alu_dst_clamp);
269
270 if (dst->type() == Value::gpr_array_value) {
271 auto& v = static_cast<const GPRArrayValue&>(*dst);
272 PValue addr = v.indirect();
273 if (addr) {
274 if (!m_last_addr || *addr != *m_last_addr) {
275 emit_load_addr(addr);
276 addr_in_use = addr;
277 }
278 alu.dst.rel = addr ? 1 : 0;;
279 }
280 }
281 }
282
283 alu.is_op3 = ai.n_sources() == 3;
284
285 for (unsigned i = 0; i < ai.n_sources(); ++i) {
286 auto& s = ai.src(i);
287
288 if (!copy_src(alu.src[i], s))
289 return false;
290 alu.src[i].neg = ai.flag(AluInstruction::src_neg_flags[i]);
291
292 if (s.type() == Value::gpr_array_value) {
293 auto& v = static_cast<const GPRArrayValue&>(s);
294 PValue addr = v.indirect();
295 if (addr) {
296 assert(!addr_in_use || (*addr_in_use == *addr));
297 if (!m_last_addr || *addr != *m_last_addr) {
298 emit_load_addr(addr);
299 addr_in_use = addr;
300 }
301 alu.src[i].rel = addr ? 1 : 0;
302 }
303 }
304 if (!alu.is_op3)
305 alu.src[i].abs = ai.flag(AluInstruction::src_abs_flags[i]);
306 }
307
308 if (ai.bank_swizzle() != alu_vec_unknown)
309 alu.bank_swizzle_force = ai.bank_swizzle();
310
311 alu.last = ai.flag(alu_last_instr);
312 alu.update_pred = ai.flag(alu_update_pred);
313 alu.execute_mask = ai.flag(alu_update_exec);
314
315 /* If the destination register is equal to the last loaded address register
316 * then clear the latter one, because the values will no longer be identical */
317 if (m_last_addr)
318 sfn_log << SfnLog::assembly << " Current address register is " << *m_last_addr << "\n";
319
320 if (dst)
321 sfn_log << SfnLog::assembly << " Current dst register is " << *dst << "\n";
322
323 if (dst && m_last_addr)
324 if (*dst == *m_last_addr) {
325 sfn_log << SfnLog::assembly << " Clear address register (was " << *m_last_addr << "\n";
326 m_last_addr.reset();
327 }
328
329 if (cf_op == cf_alu_undefined)
330 cf_op = ai.cf_type();
331
332 unsigned type = 0;
333 switch (cf_op) {
334 case cf_alu: type = CF_OP_ALU; break;
335 case cf_alu_push_before: type = CF_OP_ALU_PUSH_BEFORE; break;
336 case cf_alu_pop_after: type = CF_OP_ALU_POP_AFTER; break;
337 case cf_alu_pop2_after: type = CF_OP_ALU_POP2_AFTER; break;
338 case cf_alu_break: type = CF_OP_ALU_BREAK; break;
339 case cf_alu_else_after: type = CF_OP_ALU_ELSE_AFTER; break;
340 case cf_alu_continue: type = CF_OP_ALU_CONTINUE; break;
341 case cf_alu_extended: type = CF_OP_ALU_EXT; break;
342 default:
343 assert(0 && "cf_alu_undefined should have been replaced");
344 }
345
346 if (alu.last)
347 m_nliterals_in_group = 0;
348
349 bool retval = !r600_bytecode_add_alu_type(m_bc, &alu, type);
350
351 if (ai.opcode() == op1_mova_int)
352 m_bc->ar_loaded = 0;
353
354 if (ai.opcode() == op1_set_cf_idx0)
355 m_bc->index_loaded[0] = 1;
356
357 if (ai.opcode() == op1_set_cf_idx1)
358 m_bc->index_loaded[1] = 1;
359
360
361 m_bc->force_add_cf |= (ai.opcode() == op2_kille ||
362 ai.opcode() == op2_killne_int ||
363 ai.opcode() == op1_set_cf_idx0 ||
364 ai.opcode() == op1_set_cf_idx1);
365 return retval;
366 }
367
368 bool AssemblyFromShaderLegacyImpl::emit_vs_pos_export(const ExportInstruction & exi)
369 {
370 r600_bytecode_output output;
371 memset(&output, 0, sizeof(output));
372 assert(exi.gpr().type() == Value::gpr_vector);
373 const auto& gpr = exi.gpr();
374 output.gpr = gpr.sel();
375 output.elem_size = 3;
376 output.swizzle_x = gpr.chan_i(0);
377 output.swizzle_y = gpr.chan_i(1);
378 output.swizzle_z = gpr.chan_i(2);
379 output.swizzle_w = gpr.chan_i(3);
380 output.burst_count = 1;
381 output.array_base = 60 + exi.location();
382 output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
383 output.type = exi.export_type();
384
385
386 if (r600_bytecode_add_output(m_bc, &output)) {
387 R600_ERR("Error adding pixel export at location %d\n", exi.location());
388 return false;
389 }
390
391 return true;
392 }
393
394
395 bool AssemblyFromShaderLegacyImpl::emit_vs_param_export(const ExportInstruction & exi)
396 {
397 r600_bytecode_output output;
398 assert(exi.gpr().type() == Value::gpr_vector);
399 const auto& gpr = exi.gpr();
400
401 memset(&output, 0, sizeof(output));
402 output.gpr = gpr.sel();
403 output.elem_size = 3;
404 output.swizzle_x = gpr.chan_i(0);
405 output.swizzle_y = gpr.chan_i(1);
406 output.swizzle_z = gpr.chan_i(2);
407 output.swizzle_w = gpr.chan_i(3);
408 output.burst_count = 1;
409 output.array_base = exi.location();
410 output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
411 output.type = exi.export_type();
412
413
414 if (r600_bytecode_add_output(m_bc, &output)) {
415 R600_ERR("Error adding pixel export at location %d\n", exi.location());
416 return false;
417 }
418
419 return true;
420 }
421
422
423 bool AssemblyFromShaderLegacyImpl::emit_fs_pixel_export(const ExportInstruction & exi)
424 {
425 if (exi.location() >= m_max_color_exports && exi.location() < 60) {
426 R600_ERR("shader_from_nir: ignore pixel export %u, because supported max is %u\n",
427 exi.location(), m_max_color_exports);
428 return true;
429 }
430
431 assert(exi.gpr().type() == Value::gpr_vector);
432 const auto& gpr = exi.gpr();
433
434 r600_bytecode_output output;
435 memset(&output, 0, sizeof(output));
436
437 output.gpr = gpr.sel();
438 output.elem_size = 3;
439 output.swizzle_x = gpr.chan_i(0);
440 output.swizzle_y = gpr.chan_i(1);
441 output.swizzle_z = gpr.chan_i(2);
442 output.swizzle_w = m_key->ps.alpha_to_one ? 5 : gpr.chan_i(3); ;
443 output.burst_count = 1;
444 output.array_base = exi.location();
445 output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
446 output.type = exi.export_type();
447
448
449 if (r600_bytecode_add_output(m_bc, &output)) {
450 R600_ERR("Error adding pixel export at location %d\n", exi.location());
451 return false;
452 }
453
454 return true;
455 }
456
457
458 bool AssemblyFromShaderLegacyImpl::emit_export(const ExportInstruction & exi)
459 {
460 switch (exi.export_type()) {
461 case ExportInstruction::et_pixel:
462 return emit_fs_pixel_export(exi);
463 case ExportInstruction::et_pos:
464 return emit_vs_pos_export(exi);
465 case ExportInstruction::et_param:
466 return emit_vs_param_export(exi);
467 default:
468 R600_ERR("shader_from_nir: export %d type not yet supported\n", exi.export_type());
469 return false;
470 }
471 }
472
473 bool AssemblyFromShaderLegacyImpl::emit_if_start(const IfInstruction & if_instr)
474 {
475 assert(m_bc->chip_class == EVERGREEN);
476
477 bool needs_workaround = false;
478 int elems = m_callstack.push(FC_PUSH_VPM);
479
480 if (m_bc->family != CHIP_HEMLOCK &&
481 m_bc->family != CHIP_CYPRESS &&
482 m_bc->family != CHIP_JUNIPER) {
483 unsigned dmod1 = (elems - 1) % m_bc->stack.entry_size;
484 unsigned dmod2 = (elems) % m_bc->stack.entry_size;
485
486 if (elems && (!dmod1 || !dmod2))
487 needs_workaround = true;
488 }
489
490 auto& pred = if_instr.pred();
491 auto op = cf_alu_push_before;
492
493 if (needs_workaround) {
494 r600_bytecode_add_cfinst(m_bc, CF_OP_PUSH);
495 m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
496 op = cf_alu;
497 }
498 emit_alu(pred, op);
499
500 r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP);
501
502 m_jump_tracker.push(m_bc->cf_last, jt_if);
503 return true;
504 }
505
506 bool AssemblyFromShaderLegacyImpl::emit_else(UNUSED const ElseInstruction & else_instr)
507 {
508 r600_bytecode_add_cfinst(m_bc, CF_OP_ELSE);
509 m_bc->cf_last->pop_count = 1;
510 return m_jump_tracker.add_mid(m_bc->cf_last, jt_if);
511 }
512
513 bool AssemblyFromShaderLegacyImpl::emit_endif(UNUSED const IfElseEndInstruction & endif_instr)
514 {
515 m_callstack.pop(FC_PUSH_VPM);
516
517 unsigned force_pop = m_bc->force_add_cf;
518 if (!force_pop) {
519 int alu_pop = 3;
520 if (m_bc->cf_last) {
521 if (m_bc->cf_last->op == CF_OP_ALU)
522 alu_pop = 0;
523 else if (m_bc->cf_last->op == CF_OP_ALU_POP_AFTER)
524 alu_pop = 1;
525 }
526 alu_pop += 1;
527 if (alu_pop == 1) {
528 m_bc->cf_last->op = CF_OP_ALU_POP_AFTER;
529 m_bc->force_add_cf = 1;
530 } else if (alu_pop == 2) {
531 m_bc->cf_last->op = CF_OP_ALU_POP2_AFTER;
532 m_bc->force_add_cf = 1;
533 } else {
534 force_pop = 1;
535 }
536 }
537
538 if (force_pop) {
539 r600_bytecode_add_cfinst(m_bc, CF_OP_POP);
540 m_bc->cf_last->pop_count = 1;
541 m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
542 }
543
544 return m_jump_tracker.pop(m_bc->cf_last, jt_if);
545 }
546
547 bool AssemblyFromShaderLegacyImpl::emit_loop_begin(UNUSED const LoopBeginInstruction& instr)
548 {
549 r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_START_DX10);
550 m_jump_tracker.push(m_bc->cf_last, jt_loop);
551 m_callstack.push(FC_LOOP);
552 ++m_loop_nesting;
553 return true;
554 }
555
556 bool AssemblyFromShaderLegacyImpl::emit_loop_end(UNUSED const LoopEndInstruction& instr)
557 {
558 r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_END);
559 m_callstack.pop(FC_LOOP);
560 assert(m_loop_nesting);
561 --m_loop_nesting;
562 return m_jump_tracker.pop(m_bc->cf_last, jt_loop);
563 }
564
565 bool AssemblyFromShaderLegacyImpl::emit_loop_break(UNUSED const LoopBreakInstruction& instr)
566 {
567 r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_BREAK);
568 return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
569 }
570
571 bool AssemblyFromShaderLegacyImpl::emit_loop_continue(UNUSED const LoopContInstruction &instr)
572 {
573 r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_CONTINUE);
574 return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
575 }
576
577 bool AssemblyFromShaderLegacyImpl::emit_streamout(const StreamOutIntruction& so_instr)
578 {
579 struct r600_bytecode_output output;
580 memset(&output, 0, sizeof(struct r600_bytecode_output));
581
582 output.gpr = so_instr.gpr().sel();
583 output.elem_size = so_instr.element_size();
584 output.array_base = so_instr.array_base();
585 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
586 output.burst_count = so_instr.burst_count();
587 output.array_size = so_instr.array_size();
588 output.comp_mask = so_instr.comp_mask();
589 output.op = so_instr.op();
590
591 assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3);
592
593
594 if (r600_bytecode_add_output(m_bc, &output)) {
595 R600_ERR("shader_from_nir: Error creating stream output instruction\n");
596 return false;
597 }
598 return true;
599 }
600
601
602 bool AssemblyFromShaderLegacyImpl::emit_memringwrite(const MemRingOutIntruction& instr)
603 {
604 struct r600_bytecode_output output;
605 memset(&output, 0, sizeof(struct r600_bytecode_output));
606
607 output.gpr = instr.gpr().sel();
608 output.type = instr.type();
609 output.elem_size = instr.ncomp();
610 output.comp_mask = 0xF;
611 output.burst_count = 1;
612 output.op = instr.op();
613 if (instr.type() == mem_write_ind || instr.type() == mem_write_ind_ack) {
614 output.index_gpr = instr.index_reg();
615 output.array_size = 0xfff;
616 }
617 output.array_base = instr.array_base();
618
619 if (r600_bytecode_add_output(m_bc, &output)) {
620 R600_ERR("shader_from_nir: Error creating mem ring write instruction\n");
621 return false;
622 }
623 return true;
624 }
625
626
627 bool AssemblyFromShaderLegacyImpl::emit_tex(const TexInstruction & tex_instr)
628 {
629 auto addr = tex_instr.sampler_offset();
630 if (addr && (!m_bc->index_loaded[1] || m_loop_nesting
631 || m_bc->index_reg[1] != addr->sel())) {
632 struct r600_bytecode_alu alu;
633 memset(&alu, 0, sizeof(alu));
634 alu.op = opcode_map.at(op1_mova_int);
635 alu.dst.chan = 0;
636 alu.src[0].sel = addr->sel();
637 alu.src[0].chan = addr->chan();
638 alu.last = 1;
639 int r = r600_bytecode_add_alu(m_bc, &alu);
640 if (r)
641 return false;
642
643 m_bc->ar_loaded = 0;
644
645 alu.op = opcode_map.at(op1_set_cf_idx1);
646 alu.dst.chan = 0;
647 alu.src[0].sel = 0;
648 alu.src[0].chan = 0;
649 alu.last = 1;
650
651 r = r600_bytecode_add_alu(m_bc, &alu);
652 if (r)
653 return false;
654
655 m_bc->index_reg[1] = addr->sel();
656 m_bc->index_loaded[1] = true;
657 }
658
659 r600_bytecode_tex tex;
660 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
661 tex.op = tex_instr.opcode();
662 tex.sampler_id = tex_instr.sampler_id();
663 tex.sampler_index_mode = 0;
664 tex.resource_id = tex_instr.resource_id();;
665 tex.resource_index_mode = 0;
666 tex.src_gpr = tex_instr.src().sel();
667 tex.dst_gpr = tex_instr.dst().sel();
668 tex.dst_sel_x = tex_instr.dest_swizzle(0);
669 tex.dst_sel_y = tex_instr.dest_swizzle(1);
670 tex.dst_sel_z = tex_instr.dest_swizzle(2);
671 tex.dst_sel_w = tex_instr.dest_swizzle(3);
672 tex.src_sel_x = tex_instr.src().chan_i(0);
673 tex.src_sel_y = tex_instr.src().chan_i(1);
674 tex.src_sel_z = tex_instr.src().chan_i(2);
675 tex.src_sel_w = tex_instr.src().chan_i(3);
676 tex.coord_type_x = !tex_instr.has_flag(TexInstruction::x_unnormalized);
677 tex.coord_type_y = !tex_instr.has_flag(TexInstruction::y_unnormalized);
678 tex.coord_type_z = !tex_instr.has_flag(TexInstruction::z_unnormalized);
679 tex.coord_type_w = !tex_instr.has_flag(TexInstruction::w_unnormalized);
680 tex.offset_x = tex_instr.get_offset(0);
681 tex.offset_y = tex_instr.get_offset(1);
682 tex.offset_z = tex_instr.get_offset(2);
683 tex.resource_index_mode = (!!addr) ? 2 : 0;
684 tex.sampler_index_mode = tex.resource_index_mode;
685
686 if (tex_instr.opcode() == TexInstruction::get_gradient_h ||
687 tex_instr.opcode() == TexInstruction::get_gradient_v)
688 tex.inst_mod = tex_instr.has_flag(TexInstruction::grad_fine) ? 1 : 0;
689 else
690 tex.inst_mod = tex_instr.inst_mode();
691 if (r600_bytecode_add_tex(m_bc, &tex)) {
692 R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
693 return false;
694 }
695 return true;
696 }
697
698 bool AssemblyFromShaderLegacyImpl::emit_vtx(const FetchInstruction& fetch_instr)
699 {
700 int buffer_offset = 0;
701 auto addr = fetch_instr.buffer_offset();
702 auto index_mode = fetch_instr.buffer_index_mode();
703
704 if (addr) {
705 if (addr->type() == Value::literal) {
706 const auto& boffs = static_cast<const LiteralValue&>(*addr);
707 buffer_offset = boffs.value();
708 } else {
709 index_mode = bim_zero;
710 if ((!m_bc->index_loaded[0] || m_loop_nesting || m_bc->index_reg[0] != addr->sel())) {
711 struct r600_bytecode_alu alu;
712 memset(&alu, 0, sizeof(alu));
713 alu.op = opcode_map.at(op1_mova_int);
714 alu.dst.chan = 0;
715 alu.src[0].sel = addr->sel();
716 alu.src[0].chan = addr->chan();
717 alu.last = 1;
718 int r = r600_bytecode_add_alu(m_bc, &alu);
719 if (r)
720 return false;
721
722 m_bc->ar_loaded = 0;
723
724 alu.op = opcode_map.at(op1_set_cf_idx0);
725 alu.dst.chan = 0;
726 alu.src[0].sel = 0;
727 alu.src[0].chan = 0;
728 alu.last = 1;
729
730 r = r600_bytecode_add_alu(m_bc, &alu);
731 if (r)
732 return false;
733
734 m_bc->index_reg[0] = addr->sel();
735 m_bc->index_loaded[0] = true;
736 }
737 }
738 }
739
740 if (fetch_instr.has_prelude()) {
741 for(auto &i : fetch_instr.prelude()) {
742 if (!emit(i))
743 return false;
744 }
745 }
746
747 if (vtx_fetch_results.find(fetch_instr.src().sel()) !=
748 vtx_fetch_results.end()) {
749 m_bc->force_add_cf = 1;
750 vtx_fetch_results.clear();
751 }
752 vtx_fetch_results.insert(fetch_instr.dst().sel());
753
754 struct r600_bytecode_vtx vtx;
755 memset(&vtx, 0, sizeof(vtx));
756 vtx.op = fetch_instr.vc_opcode();
757 vtx.buffer_id = fetch_instr.buffer_id() + buffer_offset;
758 vtx.fetch_type = fetch_instr.fetch_type();
759 vtx.src_gpr = fetch_instr.src().sel();
760 vtx.src_sel_x = fetch_instr.src().chan();
761 vtx.mega_fetch_count = fetch_instr.mega_fetch_count();
762 vtx.dst_gpr = fetch_instr.dst().sel();
763 vtx.dst_sel_x = fetch_instr.swz(0); /* SEL_X */
764 vtx.dst_sel_y = fetch_instr.swz(1); /* SEL_Y */
765 vtx.dst_sel_z = fetch_instr.swz(2); /* SEL_Z */
766 vtx.dst_sel_w = fetch_instr.swz(3); /* SEL_W */
767 vtx.use_const_fields = fetch_instr.use_const_fields();
768 vtx.data_format = fetch_instr.data_format();
769 vtx.num_format_all = fetch_instr.num_format(); /* NUM_FORMAT_SCALED */
770 vtx.format_comp_all = fetch_instr.is_signed(); /* FORMAT_COMP_SIGNED */
771 vtx.endian = fetch_instr.endian_swap();
772 vtx.buffer_index_mode = index_mode;
773 vtx.offset = fetch_instr.offset();
774 vtx.indexed = fetch_instr.indexed();
775 vtx.uncached = fetch_instr.uncached();
776 vtx.elem_size = fetch_instr.elm_size();
777 vtx.array_base = fetch_instr.array_base();
778 vtx.array_size = fetch_instr.array_size();
779 vtx.srf_mode_all = fetch_instr.srf_mode_no_zero();
780
781 if (fetch_instr.use_tc()) {
782 if ((r600_bytecode_add_vtx_tc(m_bc, &vtx))) {
783 R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
784 return false;
785 }
786
787 } else {
788 if ((r600_bytecode_add_vtx(m_bc, &vtx))) {
789 R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
790 return false;
791 }
792 }
793
794 m_bc->cf_last->vpm = fetch_instr.use_vpm();
795 m_bc->cf_last->barrier = 1;
796
797 return true;
798 }
799
800 bool AssemblyFromShaderLegacyImpl::emit_emit_vertex(const EmitVertex &instr)
801 {
802 int r = r600_bytecode_add_cfinst(m_bc, instr.op());
803 if (!r)
804 m_bc->cf_last->count = instr.stream();
805 assert(m_bc->cf_last->count < 4);
806
807 return r == 0;
808 }
809
810 bool AssemblyFromShaderLegacyImpl::emit_wait_ack(const WaitAck& instr)
811 {
812 int r = r600_bytecode_add_cfinst(m_bc, instr.op());
813 if (!r)
814 m_bc->cf_last->cf_addr = instr.n_ack();
815
816 return r == 0;
817 }
818
819 bool AssemblyFromShaderLegacyImpl::emit_wr_scratch(const WriteScratchInstruction& instr)
820 {
821 struct r600_bytecode_output cf;
822
823 memset(&cf, 0, sizeof(struct r600_bytecode_output));
824
825 cf.op = CF_OP_MEM_SCRATCH;
826 cf.elem_size = 3;
827 cf.gpr = instr.gpr().sel();
828 cf.mark = 1;
829 cf.comp_mask = instr.write_mask();
830 cf.swizzle_x = 0;
831 cf.swizzle_y = 1;
832 cf.swizzle_z = 2;
833 cf.swizzle_w = 3;
834 cf.burst_count = 1;
835
836 if (instr.indirect()) {
837 cf.type = 3;
838 cf.index_gpr = instr.address();
839
840 /* The docu seems to be wrong here: In indirect addressing the
841 * address_base seems to be the array_size */
842 cf.array_size = instr.array_size();
843 } else {
844 cf.type = 2;
845 cf.array_base = instr.location();
846 }
847 /* This should be 0, but the address calculation is apparently wrong */
848
849
850 if (r600_bytecode_add_output(m_bc, &cf)){
851 R600_ERR("shader_from_nir: Error creating SCRATCH_WR assembly instruction\n");
852 return false;
853 }
854
855 return true;
856 }
857
858 extern const std::map<ESDOp, int> ds_opcode_map;
859
860 bool AssemblyFromShaderLegacyImpl::emit_gds(const GDSInstr& instr)
861 {
862 struct r600_bytecode_gds gds;
863
864 int uav_idx = -1;
865 auto addr = instr.uav_id();
866 if (addr->type() != Value::literal) {
867 if (!m_bc->index_loaded[1] || m_loop_nesting ||
868 m_bc->index_reg[1] != addr->sel()) {
869 struct r600_bytecode_alu alu;
870
871 memset(&alu, 0, sizeof(alu));
872 alu.op = opcode_map.at(op2_lshr_int);
873 alu.dst.sel = addr->sel();
874 alu.dst.chan = addr->chan();
875 alu.src[0].sel = addr->sel();
876 alu.src[0].chan = addr->chan();
877 alu.src[1].sel = ALU_SRC_LITERAL;
878 alu.src[1].value = 2;
879 alu.last = 1;
880 alu.dst.write = 1;
881 int r = r600_bytecode_add_alu(m_bc, &alu);
882 if (r)
883 return false;
884
885 memset(&alu, 0, sizeof(alu));
886 alu.op = opcode_map.at(op1_mova_int);
887 alu.dst.chan = 0;
888 alu.src[0].sel = addr->sel();
889 alu.src[0].chan = addr->chan();
890 alu.last = 1;
891 r = r600_bytecode_add_alu(m_bc, &alu);
892 if (r)
893 return false;
894
895 m_bc->ar_loaded = 0;
896
897 alu.op = opcode_map.at(op1_set_cf_idx1);
898 alu.dst.chan = 0;
899 alu.src[0].sel = 0;
900 alu.src[0].chan = 0;
901 alu.last = 1;
902
903 r = r600_bytecode_add_alu(m_bc, &alu);
904 if (r)
905 return false;
906
907 m_bc->index_reg[1] = addr->sel();
908 m_bc->index_loaded[1] = true;
909 }
910 } else {
911 const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
912 uav_idx = addr_reg.value() >> 2;
913 }
914
915 memset(&gds, 0, sizeof(struct r600_bytecode_gds));
916
917 gds.op = ds_opcode_map.at(instr.op());
918 gds.dst_gpr = instr.dest_sel();
919 gds.uav_id = (uav_idx >= 0 ? uav_idx : 0) + instr.uav_base();
920 gds.uav_index_mode = uav_idx >= 0 ? bim_none : bim_one;
921 gds.src_gpr = instr.src_sel();
922
923 if (instr.op() == DS_OP_CMP_XCHG_RET) {
924 gds.src_sel_z = 1;
925 } else {
926 gds.src_sel_z = 7;
927 }
928
929 gds.src_sel_x = instr.src_swizzle(0);
930 gds.src_sel_y = instr.src_swizzle(1);
931
932 gds.dst_sel_x = 0;
933 gds.dst_sel_y = 7;
934 gds.dst_sel_z = 7;
935 gds.dst_sel_w = 7;
936 gds.src_gpr2 = 0;
937 gds.alloc_consume = 1; // Not Cayman
938
939 int r = r600_bytecode_add_gds(m_bc, &gds);
940 if (r)
941 return false;
942 m_bc->cf_last->vpm = 1;
943 return true;
944 }
945
946
947 bool AssemblyFromShaderLegacyImpl::emit_rat(const RatInstruction& instr)
948 {
949 struct r600_bytecode_gds gds;
950
951 int rat_idx = -1;
952 EBufferIndexMode rat_index_mode = bim_none;
953 auto addr = instr.rat_id_offset();
954
955 if (addr) {
956 if (addr->type() != Value::literal) {
957 rat_index_mode = bim_one;
958 if (!m_bc->index_loaded[1] || m_loop_nesting || m_bc->index_reg[1] != addr->sel()) {
959 struct r600_bytecode_alu alu;
960
961 memset(&alu, 0, sizeof(alu));
962 alu.op = opcode_map.at(op1_mova_int);
963 alu.dst.chan = 0;
964 alu.src[0].sel = addr->sel();
965 alu.src[0].chan = addr->chan();
966 alu.last = 1;
967 int r = r600_bytecode_add_alu(m_bc, &alu);
968 if (r)
969 return false;
970
971 m_bc->ar_loaded = 0;
972
973 alu.op = opcode_map.at(op1_set_cf_idx1);
974 alu.dst.chan = 0;
975 alu.src[0].sel = 0;
976 alu.src[0].chan = 0;
977 alu.last = 1;
978
979 r = r600_bytecode_add_alu(m_bc, &alu);
980 if (r)
981 return false;
982
983 m_bc->index_reg[1] = addr->sel();
984 m_bc->index_loaded[1] = true;
985
986 }
987 } else {
988 const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
989 rat_idx = addr_reg.value();
990 }
991 }
992 memset(&gds, 0, sizeof(struct r600_bytecode_gds));
993
994 r600_bytecode_add_cfinst(m_bc, CF_OP_MEM_RAT);
995 auto cf = m_bc->cf_last;
996 cf->rat.id = rat_idx + m_shader->rat_base;
997 cf->rat.inst = instr.rat_op();
998 cf->rat.index_mode = rat_index_mode;
999 cf->output.type = instr.need_ack() ? 3 : 1;
1000 cf->output.gpr = instr.data_gpr();
1001 cf->output.index_gpr = instr.index_gpr();
1002 cf->output.comp_mask = instr.comp_mask();
1003 cf->output.burst_count = instr.burst_count();
1004 cf->output.swizzle_x = instr.data_swz(0);
1005 cf->output.swizzle_y = instr.data_swz(1);
1006 cf->output.swizzle_z = instr.data_swz(2);
1007 cf->output.swizzle_w = instr.data_swz(3);
1008 cf->vpm = 1;
1009 cf->barrier = 1;
1010 cf->mark = instr.need_ack();
1011 cf->output.elem_size = instr.elm_size();
1012 return true;
1013 }
1014
1015 bool AssemblyFromShaderLegacyImpl::copy_dst(r600_bytecode_alu_dst& dst,
1016 const Value& d)
1017 {
1018 assert(d.type() == Value::gpr || d.type() == Value::gpr_array_value);
1019
1020 if (d.sel() > 124) {
1021 R600_ERR("shader_from_nir: Don't support more then 124 GPRs, but try using %d\n", d.sel());
1022 return false;
1023 }
1024
1025 dst.sel = d.sel();
1026 dst.chan = d.chan();
1027
1028 if (m_bc->index_reg[1] == dst.sel)
1029 m_bc->index_loaded[1] = false;
1030
1031 if (m_bc->index_reg[0] == dst.sel)
1032 m_bc->index_loaded[0] = false;
1033
1034 return true;
1035 }
1036
1037 bool AssemblyFromShaderLegacyImpl::copy_src(r600_bytecode_alu_src& src, const Value& s)
1038 {
1039
1040 if (s.type() == Value::gpr && s.sel() > 124) {
1041 R600_ERR("shader_from_nir: Don't support more then 124 GPRs, try using %d\n", s.sel());
1042 return false;
1043 }
1044
1045 if (s.type() == Value::lds_direct) {
1046 R600_ERR("shader_from_nir: LDS_DIRECT values not supported\n");
1047 return false;
1048 }
1049
1050 if (s.type() == Value::kconst && s.sel() < 512) {
1051 R600_ERR("shader_from_nir: Uniforms should have values >= 512, got %d \n", s.sel());
1052 return false;
1053 }
1054
1055 if (s.type() == Value::literal) {
1056 auto& v = static_cast<const LiteralValue&>(s);
1057 if (v.value() == 0) {
1058 src.sel = ALU_SRC_0;
1059 src.chan = 0;
1060 --m_nliterals_in_group;
1061 return true;
1062 }
1063 if (v.value() == 1) {
1064 src.sel = ALU_SRC_1_INT;
1065 src.chan = 0;
1066 --m_nliterals_in_group;
1067 return true;
1068 }
1069 if (v.value_float() == 1.0f) {
1070 src.sel = ALU_SRC_1;
1071 src.chan = 0;
1072 --m_nliterals_in_group;
1073 return true;
1074 }
1075 if (v.value_float() == 0.5f) {
1076 src.sel = ALU_SRC_0_5;
1077 src.chan = 0;
1078 --m_nliterals_in_group;
1079 return true;
1080 }
1081 if (v.value() == 0xffffffff) {
1082 src.sel = ALU_SRC_M_1_INT;
1083 src.chan = 0;
1084 --m_nliterals_in_group;
1085 return true;
1086 }
1087 src.value = v.value();
1088 }
1089
1090 src.sel = s.sel();
1091 src.chan = s.chan();
1092 if (s.type() == Value::kconst) {
1093 const UniformValue& cv = static_cast<const UniformValue&>(s);
1094 src.kc_bank = cv.kcache_bank();
1095 }
1096
1097 return true;
1098 }
1099
1100 const std::map<EAluOp, int> opcode_map = {
1101
1102 {op2_add, ALU_OP2_ADD},
1103 {op2_mul, ALU_OP2_MUL},
1104 {op2_mul_ieee, ALU_OP2_MUL_IEEE},
1105 {op2_max, ALU_OP2_MAX},
1106 {op2_min, ALU_OP2_MIN},
1107 {op2_max_dx10, ALU_OP2_MAX_DX10},
1108 {op2_min_dx10, ALU_OP2_MIN_DX10},
1109 {op2_sete, ALU_OP2_SETE},
1110 {op2_setgt, ALU_OP2_SETGT},
1111 {op2_setge, ALU_OP2_SETGE},
1112 {op2_setne, ALU_OP2_SETNE},
1113 {op2_sete_dx10, ALU_OP2_SETE_DX10},
1114 {op2_setgt_dx10, ALU_OP2_SETGT_DX10},
1115 {op2_setge_dx10, ALU_OP2_SETGE_DX10},
1116 {op2_setne_dx10, ALU_OP2_SETNE_DX10},
1117 {op1_fract, ALU_OP1_FRACT},
1118 {op1_trunc, ALU_OP1_TRUNC},
1119 {op1_ceil, ALU_OP1_CEIL},
1120 {op1_rndne, ALU_OP1_RNDNE},
1121 {op1_floor, ALU_OP1_FLOOR},
1122 {op2_ashr_int, ALU_OP2_ASHR_INT},
1123 {op2_lshr_int, ALU_OP2_LSHR_INT},
1124 {op2_lshl_int, ALU_OP2_LSHL_INT},
1125 {op1_mov, ALU_OP1_MOV},
1126 {op0_nop, ALU_OP0_NOP},
1127 {op2_mul_64, ALU_OP2_MUL_64},
1128 {op1_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
1129 {op1v_flt64_to_flt32, ALU_OP1_FLT32_TO_FLT64},
1130 {op2_pred_setgt_uint, ALU_OP2_PRED_SETGT_UINT},
1131 {op2_pred_setge_uint, ALU_OP2_PRED_SETGE_UINT},
1132 {op2_pred_sete, ALU_OP2_PRED_SETE},
1133 {op2_pred_setgt, ALU_OP2_PRED_SETGT},
1134 {op2_pred_setge, ALU_OP2_PRED_SETGE},
1135 {op2_pred_setne, ALU_OP2_PRED_SETNE},
1136 //{op2_pred_set_inv, ALU_OP2_PRED_SET},
1137 //{op2_pred_set_clr, ALU_OP2_PRED_SET_CRL},
1138 //{op2_pred_set_restore, ALU_OP2_PRED_SET_RESTORE},
1139 {op2_pred_sete_push, ALU_OP2_PRED_SETE_PUSH},
1140 {op2_pred_setgt_push, ALU_OP2_PRED_SETGT_PUSH},
1141 {op2_pred_setge_push, ALU_OP2_PRED_SETGE_PUSH},
1142 {op2_pred_setne_push, ALU_OP2_PRED_SETNE_PUSH},
1143 {op2_kille, ALU_OP2_KILLE},
1144 {op2_killgt, ALU_OP2_KILLGT},
1145 {op2_killge, ALU_OP2_KILLGE},
1146 {op2_killne, ALU_OP2_KILLNE},
1147 {op2_and_int, ALU_OP2_AND_INT},
1148 {op2_or_int, ALU_OP2_OR_INT},
1149 {op2_xor_int, ALU_OP2_XOR_INT},
1150 {op1_not_int, ALU_OP1_NOT_INT},
1151 {op2_add_int, ALU_OP2_ADD_INT},
1152 {op2_sub_int, ALU_OP2_SUB_INT},
1153 {op2_max_int, ALU_OP2_MAX_INT},
1154 {op2_min_int, ALU_OP2_MIN_INT},
1155 {op2_max_uint, ALU_OP2_MAX_UINT},
1156 {op2_min_uint, ALU_OP2_MIN_UINT},
1157 {op2_sete_int, ALU_OP2_SETE_INT},
1158 {op2_setgt_int, ALU_OP2_SETGT_INT},
1159 {op2_setge_int, ALU_OP2_SETGE_INT},
1160 {op2_setne_int, ALU_OP2_SETNE_INT},
1161 {op2_setgt_uint, ALU_OP2_SETGT_UINT},
1162 {op2_setge_uint, ALU_OP2_SETGE_UINT},
1163 {op2_killgt_uint, ALU_OP2_KILLGT_UINT},
1164 {op2_killge_uint, ALU_OP2_KILLGE_UINT},
1165 //p2_prede_int, ALU_OP2_PREDE_INT},
1166 {op2_pred_setgt_int, ALU_OP2_PRED_SETGT_INT},
1167 {op2_pred_setge_int, ALU_OP2_PRED_SETGE_INT},
1168 {op2_pred_setne_int, ALU_OP2_PRED_SETNE_INT},
1169 {op2_kille_int, ALU_OP2_KILLE_INT},
1170 {op2_killgt_int, ALU_OP2_KILLGT_INT},
1171 {op2_killge_int, ALU_OP2_KILLGE_INT},
1172 {op2_killne_int, ALU_OP2_KILLNE_INT},
1173 {op2_pred_sete_push_int, ALU_OP2_PRED_SETE_PUSH_INT},
1174 {op2_pred_setgt_push_int, ALU_OP2_PRED_SETGT_PUSH_INT},
1175 {op2_pred_setge_push_int, ALU_OP2_PRED_SETGE_PUSH_INT},
1176 {op2_pred_setne_push_int, ALU_OP2_PRED_SETNE_PUSH_INT},
1177 {op2_pred_setlt_push_int, ALU_OP2_PRED_SETLT_PUSH_INT},
1178 {op2_pred_setle_push_int, ALU_OP2_PRED_SETLE_PUSH_INT},
1179 {op1_flt_to_int, ALU_OP1_FLT_TO_INT},
1180 {op1_bfrev_int, ALU_OP1_BFREV_INT},
1181 {op2_addc_uint, ALU_OP2_ADDC_UINT},
1182 {op2_subb_uint, ALU_OP2_SUBB_UINT},
1183 {op0_group_barrier, ALU_OP0_GROUP_BARRIER},
1184 {op0_group_seq_begin, ALU_OP0_GROUP_SEQ_BEGIN},
1185 {op0_group_seq_end, ALU_OP0_GROUP_SEQ_END},
1186 {op2_set_mode, ALU_OP2_SET_MODE},
1187 {op1_set_cf_idx0, ALU_OP0_SET_CF_IDX0},
1188 {op1_set_cf_idx1, ALU_OP0_SET_CF_IDX1},
1189 {op2_set_lds_size, ALU_OP2_SET_LDS_SIZE},
1190 {op1_exp_ieee, ALU_OP1_EXP_IEEE},
1191 {op1_log_clamped, ALU_OP1_LOG_CLAMPED},
1192 {op1_log_ieee, ALU_OP1_LOG_IEEE},
1193 {op1_recip_clamped, ALU_OP1_RECIP_CLAMPED},
1194 {op1_recip_ff, ALU_OP1_RECIP_FF},
1195 {op1_recip_ieee, ALU_OP1_RECIP_IEEE},
1196 {op1_recipsqrt_clamped, ALU_OP1_RECIPSQRT_CLAMPED},
1197 {op1_recipsqrt_ff, ALU_OP1_RECIPSQRT_FF},
1198 {op1_recipsqrt_ieee1, ALU_OP1_RECIPSQRT_IEEE},
1199 {op1_sqrt_ieee, ALU_OP1_SQRT_IEEE},
1200 {op1_sin, ALU_OP1_SIN},
1201 {op1_cos, ALU_OP1_COS},
1202 {op2_mullo_int, ALU_OP2_MULLO_INT},
1203 {op2_mulhi_int, ALU_OP2_MULHI_INT},
1204 {op2_mullo_uint, ALU_OP2_MULLO_UINT},
1205 {op2_mulhi_uint, ALU_OP2_MULHI_UINT},
1206 {op1_recip_int, ALU_OP1_RECIP_INT},
1207 {op1_recip_uint, ALU_OP1_RECIP_UINT},
1208 {op1_recip_64, ALU_OP2_RECIP_64},
1209 {op1_recip_clamped_64, ALU_OP2_RECIP_CLAMPED_64},
1210 {op1_recipsqrt_64, ALU_OP2_RECIPSQRT_64},
1211 {op1_recipsqrt_clamped_64, ALU_OP2_RECIPSQRT_CLAMPED_64},
1212 {op1_sqrt_64, ALU_OP2_SQRT_64},
1213 {op1_flt_to_uint, ALU_OP1_FLT_TO_UINT},
1214 {op1_int_to_flt, ALU_OP1_INT_TO_FLT},
1215 {op1_uint_to_flt, ALU_OP1_UINT_TO_FLT},
1216 {op2_bfm_int, ALU_OP2_BFM_INT},
1217 {op1_flt32_to_flt16, ALU_OP1_FLT32_TO_FLT16},
1218 {op1_flt16_to_flt32, ALU_OP1_FLT16_TO_FLT32},
1219 {op1_ubyte0_flt, ALU_OP1_UBYTE0_FLT},
1220 {op1_ubyte1_flt, ALU_OP1_UBYTE1_FLT},
1221 {op1_ubyte2_flt, ALU_OP1_UBYTE2_FLT},
1222 {op1_ubyte3_flt, ALU_OP1_UBYTE3_FLT},
1223 {op1_bcnt_int, ALU_OP1_BCNT_INT},
1224 {op1_ffbh_uint, ALU_OP1_FFBH_UINT},
1225 {op1_ffbl_int, ALU_OP1_FFBL_INT},
1226 {op1_ffbh_int, ALU_OP1_FFBH_INT},
1227 {op1_flt_to_uint4, ALU_OP1_FLT_TO_UINT4},
1228 {op2_dot_ieee, ALU_OP2_DOT_IEEE},
1229 {op1_flt_to_int_rpi, ALU_OP1_FLT_TO_INT_RPI},
1230 {op1_flt_to_int_floor, ALU_OP1_FLT_TO_INT_FLOOR},
1231 {op2_mulhi_uint24, ALU_OP2_MULHI_UINT24},
1232 {op1_mbcnt_32hi_int, ALU_OP1_MBCNT_32HI_INT},
1233 {op1_offset_to_flt, ALU_OP1_OFFSET_TO_FLT},
1234 {op2_mul_uint24, ALU_OP2_MUL_UINT24},
1235 {op1_bcnt_accum_prev_int, ALU_OP1_BCNT_ACCUM_PREV_INT},
1236 {op1_mbcnt_32lo_accum_prev_int, ALU_OP1_MBCNT_32LO_ACCUM_PREV_INT},
1237 {op2_sete_64, ALU_OP2_SETE_64},
1238 {op2_setne_64, ALU_OP2_SETNE_64},
1239 {op2_setgt_64, ALU_OP2_SETGT_64},
1240 {op2_setge_64, ALU_OP2_SETGE_64},
1241 {op2_min_64, ALU_OP2_MIN_64},
1242 {op2_max_64, ALU_OP2_MAX_64},
1243 {op2_dot4, ALU_OP2_DOT4},
1244 {op2_dot4_ieee, ALU_OP2_DOT4_IEEE},
1245 {op2_cube, ALU_OP2_CUBE},
1246 {op1_max4, ALU_OP1_MAX4},
1247 {op1_frexp_64, ALU_OP1_FREXP_64},
1248 {op1_ldexp_64, ALU_OP2_LDEXP_64},
1249 {op1_fract_64, ALU_OP1_FRACT_64},
1250 {op2_pred_setgt_64, ALU_OP2_PRED_SETGT_64},
1251 {op2_pred_sete_64, ALU_OP2_PRED_SETE_64},
1252 {op2_pred_setge_64, ALU_OP2_PRED_SETGE_64},
1253 {op2_add_64, ALU_OP2_ADD_64},
1254 {op1_mova_int, ALU_OP1_MOVA_INT},
1255 {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
1256 {op1_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64},
1257 {op2_sad_accum_prev_uint, ALU_OP2_SAD_ACCUM_PREV_UINT},
1258 {op2_dot, ALU_OP2_DOT},
1259 //p2_mul_prev, ALU_OP2_MUL_PREV},
1260 //p2_mul_ieee_prev, ALU_OP2_MUL_IEEE_PREV},
1261 //p2_add_prev, ALU_OP2_ADD_PREV},
1262 {op2_muladd_prev, ALU_OP2_MULADD_PREV},
1263 {op2_muladd_ieee_prev, ALU_OP2_MULADD_IEEE_PREV},
1264 {op2_interp_xy, ALU_OP2_INTERP_XY},
1265 {op2_interp_zw, ALU_OP2_INTERP_ZW},
1266 {op2_interp_x, ALU_OP2_INTERP_X},
1267 {op2_interp_z, ALU_OP2_INTERP_Z},
1268 {op0_store_flags, ALU_OP1_STORE_FLAGS},
1269 {op1_load_store_flags, ALU_OP1_LOAD_STORE_FLAGS},
1270 {op0_lds_1a, ALU_OP2_LDS_1A},
1271 {op0_lds_1a1d, ALU_OP2_LDS_1A1D},
1272 {op0_lds_2a, ALU_OP2_LDS_2A},
1273 {op1_interp_load_p0, ALU_OP1_INTERP_LOAD_P0},
1274 {op1_interp_load_p10, ALU_OP1_INTERP_LOAD_P10},
1275 {op1_interp_load_p20, ALU_OP1_INTERP_LOAD_P20},
1276 // {op 3 all left shift 6
1277 {op3_bfe_uint, ALU_OP3_BFE_UINT},
1278 {op3_bfe_int, ALU_OP3_BFE_INT},
1279 {op3_bfi_int, ALU_OP3_BFI_INT},
1280 {op3_fma, ALU_OP3_FMA},
1281 {op3_cndne_64, ALU_OP3_CNDNE_64},
1282 {op3_fma_64, ALU_OP3_FMA_64},
1283 {op3_lerp_uint, ALU_OP3_LERP_UINT},
1284 {op3_bit_align_int, ALU_OP3_BIT_ALIGN_INT},
1285 {op3_byte_align_int, ALU_OP3_BYTE_ALIGN_INT},
1286 {op3_sad_accum_uint, ALU_OP3_SAD_ACCUM_UINT},
1287 {op3_sad_accum_hi_uint, ALU_OP3_SAD_ACCUM_HI_UINT},
1288 {op3_muladd_uint24, ALU_OP3_MULADD_UINT24},
1289 {op3_lds_idx_op, ALU_OP3_LDS_IDX_OP},
1290 {op3_muladd, ALU_OP3_MULADD},
1291 {op3_muladd_m2, ALU_OP3_MULADD_M2},
1292 {op3_muladd_m4, ALU_OP3_MULADD_M4},
1293 {op3_muladd_d2, ALU_OP3_MULADD_D2},
1294 {op3_muladd_ieee, ALU_OP3_MULADD_IEEE},
1295 {op3_cnde, ALU_OP3_CNDE},
1296 {op3_cndgt, ALU_OP3_CNDGT},
1297 {op3_cndge, ALU_OP3_CNDGE},
1298 {op3_cnde_int, ALU_OP3_CNDE_INT},
1299 {op3_cndgt_int, ALU_OP3_CNDGT_INT},
1300 {op3_cndge_int, ALU_OP3_CNDGE_INT},
1301 {op3_mul_lit, ALU_OP3_MUL_LIT},
1302 };
1303
1304 const std::map<ESDOp, int> ds_opcode_map = {
1305 {DS_OP_ADD, FETCH_OP_GDS_ADD},
1306 {DS_OP_SUB, FETCH_OP_GDS_SUB},
1307 {DS_OP_RSUB, FETCH_OP_GDS_RSUB},
1308 {DS_OP_INC, FETCH_OP_GDS_INC},
1309 {DS_OP_DEC, FETCH_OP_GDS_DEC},
1310 {DS_OP_MIN_INT, FETCH_OP_GDS_MIN_INT},
1311 {DS_OP_MAX_INT, FETCH_OP_GDS_MAX_INT},
1312 {DS_OP_MIN_UINT, FETCH_OP_GDS_MIN_UINT},
1313 {DS_OP_MAX_UINT, FETCH_OP_GDS_MAX_UINT},
1314 {DS_OP_AND, FETCH_OP_GDS_AND},
1315 {DS_OP_OR, FETCH_OP_GDS_OR},
1316 {DS_OP_XOR, FETCH_OP_GDS_XOR},
1317 {DS_OP_MSKOR, FETCH_OP_GDS_MSKOR},
1318 {DS_OP_WRITE, FETCH_OP_GDS_WRITE},
1319 {DS_OP_WRITE_REL, FETCH_OP_GDS_WRITE_REL},
1320 {DS_OP_WRITE2, FETCH_OP_GDS_WRITE2},
1321 {DS_OP_CMP_STORE, FETCH_OP_GDS_CMP_STORE},
1322 {DS_OP_CMP_STORE_SPF, FETCH_OP_GDS_CMP_STORE_SPF},
1323 {DS_OP_BYTE_WRITE, FETCH_OP_GDS_BYTE_WRITE},
1324 {DS_OP_SHORT_WRITE, FETCH_OP_GDS_SHORT_WRITE},
1325 {DS_OP_ADD_RET, FETCH_OP_GDS_ADD_RET},
1326 {DS_OP_SUB_RET, FETCH_OP_GDS_SUB_RET},
1327 {DS_OP_RSUB_RET, FETCH_OP_GDS_RSUB_RET},
1328 {DS_OP_INC_RET, FETCH_OP_GDS_INC_RET},
1329 {DS_OP_DEC_RET, FETCH_OP_GDS_DEC_RET},
1330 {DS_OP_MIN_INT_RET, FETCH_OP_GDS_MIN_INT_RET},
1331 {DS_OP_MAX_INT_RET, FETCH_OP_GDS_MAX_INT_RET},
1332 {DS_OP_MIN_UINT_RET, FETCH_OP_GDS_MIN_UINT_RET},
1333 {DS_OP_MAX_UINT_RET, FETCH_OP_GDS_MAX_UINT_RET},
1334 {DS_OP_AND_RET, FETCH_OP_GDS_AND_RET},
1335 {DS_OP_OR_RET, FETCH_OP_GDS_OR_RET},
1336 {DS_OP_XOR_RET, FETCH_OP_GDS_XOR_RET},
1337 {DS_OP_MSKOR_RET, FETCH_OP_GDS_MSKOR_RET},
1338 {DS_OP_XCHG_RET, FETCH_OP_GDS_XCHG_RET},
1339 {DS_OP_XCHG_REL_RET, FETCH_OP_GDS_XCHG_REL_RET},
1340 {DS_OP_XCHG2_RET, FETCH_OP_GDS_XCHG2_RET},
1341 {DS_OP_CMP_XCHG_RET, FETCH_OP_GDS_CMP_XCHG_RET},
1342 {DS_OP_CMP_XCHG_SPF_RET, FETCH_OP_GDS_CMP_XCHG_SPF_RET},
1343 {DS_OP_READ_RET, FETCH_OP_GDS_READ_RET},
1344 {DS_OP_READ_REL_RET, FETCH_OP_GDS_READ_REL_RET},
1345 {DS_OP_READ2_RET, FETCH_OP_GDS_READ2_RET},
1346 {DS_OP_READWRITE_RET, FETCH_OP_GDS_READWRITE_RET},
1347 {DS_OP_BYTE_READ_RET, FETCH_OP_GDS_BYTE_READ_RET},
1348 {DS_OP_UBYTE_READ_RET, FETCH_OP_GDS_UBYTE_READ_RET},
1349 {DS_OP_SHORT_READ_RET, FETCH_OP_GDS_SHORT_READ_RET},
1350 {DS_OP_USHORT_READ_RET, FETCH_OP_GDS_USHORT_READ_RET},
1351 {DS_OP_ATOMIC_ORDERED_ALLOC_RET, FETCH_OP_GDS_ATOMIC_ORDERED_ALLOC},
1352 {DS_OP_INVALID, 0},
1353 };
1354
1355 }