3 * Copyright (c) 2018 Collabora LTD
5 * Author: Gert Wollny <gert.wollny@collabora.com>
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
27 #include "sfn_ir_to_assembly.h"
28 #include "sfn_conditionaljumptracker.h"
29 #include "sfn_callstack.h"
30 #include "sfn_instruction_gds.h"
31 #include "sfn_instruction_misc.h"
32 #include "sfn_instruction_fetch.h"
34 #include "../r600_shader.h"
35 #include "../r600_sq.h"
41 struct AssemblyFromShaderLegacyImpl
{
43 AssemblyFromShaderLegacyImpl(r600_shader
*sh
, r600_shader_key
*key
);
44 bool emit(const Instruction::Pointer i
);
45 void reset_addr_register() {m_last_addr
.reset();}
48 bool emit_alu(const AluInstruction
& ai
, ECFAluOpCode cf_op
);
49 bool emit_export(const ExportInstruction
& exi
);
50 bool emit_streamout(const StreamOutIntruction
& instr
);
51 bool emit_memringwrite(const MemRingOutIntruction
& instr
);
52 bool emit_tex(const TexInstruction
& tex_instr
);
53 bool emit_vtx(const FetchInstruction
& fetch_instr
);
54 bool emit_if_start(const IfInstruction
& if_instr
);
55 bool emit_else(const ElseInstruction
& else_instr
);
56 bool emit_endif(const IfElseEndInstruction
& endif_instr
);
57 bool emit_emit_vertex(const EmitVertex
&instr
);
59 bool emit_loop_begin(const LoopBeginInstruction
& instr
);
60 bool emit_loop_end(const LoopEndInstruction
& instr
);
61 bool emit_loop_break(const LoopBreakInstruction
& instr
);
62 bool emit_loop_continue(const LoopContInstruction
& instr
);
63 bool emit_wait_ack(const WaitAck
& instr
);
64 bool emit_wr_scratch(const WriteScratchInstruction
& instr
);
65 bool emit_gds(const GDSInstr
& instr
);
66 bool emit_rat(const RatInstruction
& instr
);
68 bool emit_load_addr(PValue addr
);
69 bool emit_fs_pixel_export(const ExportInstruction
& exi
);
70 bool emit_vs_pos_export(const ExportInstruction
& exi
);
71 bool emit_vs_param_export(const ExportInstruction
& exi
);
72 bool copy_dst(r600_bytecode_alu_dst
& dst
, const Value
& src
);
73 bool copy_src(r600_bytecode_alu_src
& src
, const Value
& s
);
77 ConditionalJumpTracker m_jump_tracker
;
78 CallStack m_callstack
;
82 r600_shader
*m_shader
;
83 r600_shader_key
*m_key
;
84 r600_bytecode_output m_output
;
85 unsigned m_max_color_exports
;
87 bool has_param_output
;
90 int m_nliterals_in_group
;
91 std::set
<int> vtx_fetch_results
;
95 AssemblyFromShaderLegacy::AssemblyFromShaderLegacy(struct r600_shader
*sh
,
98 impl
= new AssemblyFromShaderLegacyImpl(sh
, key
);
101 AssemblyFromShaderLegacy::~AssemblyFromShaderLegacy()
106 bool AssemblyFromShaderLegacy::do_lower(const std::vector
<InstructionBlock
>& ir
)
108 if (impl
->m_shader
->processor_type
== PIPE_SHADER_VERTEX
&&
109 impl
->m_shader
->ninput
> 0)
110 r600_bytecode_add_cfinst(impl
->m_bc
, CF_OP_CALL_FS
);
113 std::vector
<Instruction::Pointer
> exports
;
115 for (const auto& block
: ir
) {
116 for (const auto& i
: block
) {
119 if (i
->type() != Instruction::alu
)
120 impl
->reset_addr_register();
124 for (const auto& i : exports) {
125 if (!impl->emit_export(static_cast<const ExportInstruction&>(*i)))
130 const struct cf_op_info
*last
= nullptr;
131 if (impl
->m_bc
->cf_last
)
132 last
= r600_isa_cf(impl
->m_bc
->cf_last
->op
);
134 /* alu clause instructions don't have EOP bit, so add NOP */
135 if (!last
|| last
->flags
& CF_ALU
|| impl
->m_bc
->cf_last
->op
== CF_OP_LOOP_END
136 || impl
->m_bc
->cf_last
->op
== CF_OP_POP
)
137 r600_bytecode_add_cfinst(impl
->m_bc
, CF_OP_NOP
);
139 /* A fetch shader only can't be EOP (results in hang), but we can replace it
141 else if (impl
->m_bc
->cf_last
->op
== CF_OP_CALL_FS
)
142 impl
->m_bc
->cf_last
->op
= CF_OP_NOP
;
144 impl
->m_bc
->cf_last
->end_of_program
= 1;
149 bool AssemblyFromShaderLegacyImpl::emit(const Instruction::Pointer i
)
151 if (i
->type() != Instruction::vtx
)
152 vtx_fetch_results
.clear();
154 sfn_log
<< SfnLog::assembly
<< "Emit from '" << *i
<< "\n";
156 case Instruction::alu
:
157 return emit_alu(static_cast<const AluInstruction
&>(*i
), cf_alu_undefined
);
158 case Instruction::exprt
:
159 return emit_export(static_cast<const ExportInstruction
&>(*i
));
160 case Instruction::tex
:
161 return emit_tex(static_cast<const TexInstruction
&>(*i
));
162 case Instruction::vtx
:
163 return emit_vtx(static_cast<const FetchInstruction
&>(*i
));
164 case Instruction::cond_if
:
165 return emit_if_start(static_cast<const IfInstruction
&>(*i
));
166 case Instruction::cond_else
:
167 return emit_else(static_cast<const ElseInstruction
&>(*i
));
168 case Instruction::cond_endif
:
169 return emit_endif(static_cast<const IfElseEndInstruction
&>(*i
));
170 case Instruction::loop_begin
:
171 return emit_loop_begin(static_cast<const LoopBeginInstruction
&>(*i
));
172 case Instruction::loop_end
:
173 return emit_loop_end(static_cast<const LoopEndInstruction
&>(*i
));
174 case Instruction::loop_break
:
175 return emit_loop_break(static_cast<const LoopBreakInstruction
&>(*i
));
176 case Instruction::loop_continue
:
177 return emit_loop_continue(static_cast<const LoopContInstruction
&>(*i
));
178 case Instruction::streamout
:
179 return emit_streamout(static_cast<const StreamOutIntruction
&>(*i
));
180 case Instruction::ring
:
181 return emit_memringwrite(static_cast<const MemRingOutIntruction
&>(*i
));
182 case Instruction::emit_vtx
:
183 return emit_emit_vertex(static_cast<const EmitVertex
&>(*i
));
184 case Instruction::wait_ack
:
185 return emit_wait_ack(static_cast<const WaitAck
&>(*i
));
186 case Instruction::mem_wr_scratch
:
187 return emit_wr_scratch(static_cast<const WriteScratchInstruction
&>(*i
));
188 case Instruction::gds
:
189 return emit_gds(static_cast<const GDSInstr
&>(*i
));
190 case Instruction::rat
:
191 return emit_rat(static_cast<const RatInstruction
&>(*i
));
197 AssemblyFromShaderLegacyImpl::AssemblyFromShaderLegacyImpl(r600_shader
*sh
,
198 r600_shader_key
*key
):
203 has_pos_output(false),
204 has_param_output(false),
206 m_nliterals_in_group(0)
208 m_max_color_exports
= MAX2(m_key
->ps
.nr_cbufs
, 1);
211 extern const std::map
<EAluOp
, int> opcode_map
;
213 bool AssemblyFromShaderLegacyImpl::emit_load_addr(PValue addr
)
215 m_bc
->ar_reg
= addr
->sel();
216 m_bc
->ar_chan
= addr
->chan();
220 sfn_log
<< SfnLog::assembly
<< " Prepare " << *addr
<< " to address register\n";
225 bool AssemblyFromShaderLegacyImpl::emit_alu(const AluInstruction
& ai
, ECFAluOpCode cf_op
)
228 struct r600_bytecode_alu alu
;
229 memset(&alu
, 0, sizeof(alu
));
232 if (opcode_map
.find(ai
.opcode()) == opcode_map
.end()) {
233 std::cerr
<< "Opcode not handled for " << ai
<<"\n";
237 for (unsigned i
= 0; i
< ai
.n_sources(); ++i
) {
239 if (s
.type() == Value::literal
)
240 ++m_nliterals_in_group
;
243 /* This instruction group would exeed the limit of literals, so
244 * force a new instruction group by adding a NOP as last
245 * instruction. This will no loner be needed with a real
247 if (m_nliterals_in_group
> 4) {
248 sfn_log
<< SfnLog::assembly
<< " Have " << m_nliterals_in_group
<< " inject a last op (nop)\n";
249 alu
.op
= ALU_OP0_NOP
;
251 int retval
= r600_bytecode_add_alu(m_bc
, &alu
);
254 memset(&alu
, 0, sizeof(alu
));
255 m_nliterals_in_group
= 0;
258 alu
.op
= opcode_map
.at(ai
.opcode());
260 /* Missing test whether ai actually has a dest */
261 auto dst
= ai
.dest();
264 if (!copy_dst(alu
.dst
, *dst
))
267 alu
.dst
.write
= ai
.flag(alu_write
);
268 alu
.dst
.clamp
= ai
.flag(alu_dst_clamp
);
270 if (dst
->type() == Value::gpr_array_value
) {
271 auto& v
= static_cast<const GPRArrayValue
&>(*dst
);
272 PValue addr
= v
.indirect();
274 if (!m_last_addr
|| *addr
!= *m_last_addr
) {
275 emit_load_addr(addr
);
278 alu
.dst
.rel
= addr
? 1 : 0;;
283 alu
.is_op3
= ai
.n_sources() == 3;
285 for (unsigned i
= 0; i
< ai
.n_sources(); ++i
) {
288 if (!copy_src(alu
.src
[i
], s
))
290 alu
.src
[i
].neg
= ai
.flag(AluInstruction::src_neg_flags
[i
]);
292 if (s
.type() == Value::gpr_array_value
) {
293 auto& v
= static_cast<const GPRArrayValue
&>(s
);
294 PValue addr
= v
.indirect();
296 assert(!addr_in_use
|| (*addr_in_use
== *addr
));
297 if (!m_last_addr
|| *addr
!= *m_last_addr
) {
298 emit_load_addr(addr
);
301 alu
.src
[i
].rel
= addr
? 1 : 0;
305 alu
.src
[i
].abs
= ai
.flag(AluInstruction::src_abs_flags
[i
]);
308 if (ai
.bank_swizzle() != alu_vec_unknown
)
309 alu
.bank_swizzle_force
= ai
.bank_swizzle();
311 alu
.last
= ai
.flag(alu_last_instr
);
312 alu
.update_pred
= ai
.flag(alu_update_pred
);
313 alu
.execute_mask
= ai
.flag(alu_update_exec
);
315 /* If the destination register is equal to the last loaded address register
316 * then clear the latter one, because the values will no longer be identical */
318 sfn_log
<< SfnLog::assembly
<< " Current address register is " << *m_last_addr
<< "\n";
321 sfn_log
<< SfnLog::assembly
<< " Current dst register is " << *dst
<< "\n";
323 if (dst
&& m_last_addr
)
324 if (*dst
== *m_last_addr
) {
325 sfn_log
<< SfnLog::assembly
<< " Clear address register (was " << *m_last_addr
<< "\n";
329 if (cf_op
== cf_alu_undefined
)
330 cf_op
= ai
.cf_type();
334 case cf_alu
: type
= CF_OP_ALU
; break;
335 case cf_alu_push_before
: type
= CF_OP_ALU_PUSH_BEFORE
; break;
336 case cf_alu_pop_after
: type
= CF_OP_ALU_POP_AFTER
; break;
337 case cf_alu_pop2_after
: type
= CF_OP_ALU_POP2_AFTER
; break;
338 case cf_alu_break
: type
= CF_OP_ALU_BREAK
; break;
339 case cf_alu_else_after
: type
= CF_OP_ALU_ELSE_AFTER
; break;
340 case cf_alu_continue
: type
= CF_OP_ALU_CONTINUE
; break;
341 case cf_alu_extended
: type
= CF_OP_ALU_EXT
; break;
343 assert(0 && "cf_alu_undefined should have been replaced");
347 m_nliterals_in_group
= 0;
349 bool retval
= !r600_bytecode_add_alu_type(m_bc
, &alu
, type
);
351 if (ai
.opcode() == op1_mova_int
)
354 if (ai
.opcode() == op1_set_cf_idx0
)
355 m_bc
->index_loaded
[0] = 1;
357 if (ai
.opcode() == op1_set_cf_idx1
)
358 m_bc
->index_loaded
[1] = 1;
361 m_bc
->force_add_cf
|= (ai
.opcode() == op2_kille
||
362 ai
.opcode() == op2_killne_int
||
363 ai
.opcode() == op1_set_cf_idx0
||
364 ai
.opcode() == op1_set_cf_idx1
);
368 bool AssemblyFromShaderLegacyImpl::emit_vs_pos_export(const ExportInstruction
& exi
)
370 r600_bytecode_output output
;
371 memset(&output
, 0, sizeof(output
));
372 assert(exi
.gpr().type() == Value::gpr_vector
);
373 const auto& gpr
= exi
.gpr();
374 output
.gpr
= gpr
.sel();
375 output
.elem_size
= 3;
376 output
.swizzle_x
= gpr
.chan_i(0);
377 output
.swizzle_y
= gpr
.chan_i(1);
378 output
.swizzle_z
= gpr
.chan_i(2);
379 output
.swizzle_w
= gpr
.chan_i(3);
380 output
.burst_count
= 1;
381 output
.array_base
= 60 + exi
.location();
382 output
.op
= exi
.is_last_export() ? CF_OP_EXPORT_DONE
: CF_OP_EXPORT
;
383 output
.type
= exi
.export_type();
386 if (r600_bytecode_add_output(m_bc
, &output
)) {
387 R600_ERR("Error adding pixel export at location %d\n", exi
.location());
395 bool AssemblyFromShaderLegacyImpl::emit_vs_param_export(const ExportInstruction
& exi
)
397 r600_bytecode_output output
;
398 assert(exi
.gpr().type() == Value::gpr_vector
);
399 const auto& gpr
= exi
.gpr();
401 memset(&output
, 0, sizeof(output
));
402 output
.gpr
= gpr
.sel();
403 output
.elem_size
= 3;
404 output
.swizzle_x
= gpr
.chan_i(0);
405 output
.swizzle_y
= gpr
.chan_i(1);
406 output
.swizzle_z
= gpr
.chan_i(2);
407 output
.swizzle_w
= gpr
.chan_i(3);
408 output
.burst_count
= 1;
409 output
.array_base
= exi
.location();
410 output
.op
= exi
.is_last_export() ? CF_OP_EXPORT_DONE
: CF_OP_EXPORT
;
411 output
.type
= exi
.export_type();
414 if (r600_bytecode_add_output(m_bc
, &output
)) {
415 R600_ERR("Error adding pixel export at location %d\n", exi
.location());
423 bool AssemblyFromShaderLegacyImpl::emit_fs_pixel_export(const ExportInstruction
& exi
)
425 if (exi
.location() >= m_max_color_exports
&& exi
.location() < 60) {
426 R600_ERR("shader_from_nir: ignore pixel export %u, because supported max is %u\n",
427 exi
.location(), m_max_color_exports
);
431 assert(exi
.gpr().type() == Value::gpr_vector
);
432 const auto& gpr
= exi
.gpr();
434 r600_bytecode_output output
;
435 memset(&output
, 0, sizeof(output
));
437 output
.gpr
= gpr
.sel();
438 output
.elem_size
= 3;
439 output
.swizzle_x
= gpr
.chan_i(0);
440 output
.swizzle_y
= gpr
.chan_i(1);
441 output
.swizzle_z
= gpr
.chan_i(2);
442 output
.swizzle_w
= m_key
->ps
.alpha_to_one
? 5 : gpr
.chan_i(3); ;
443 output
.burst_count
= 1;
444 output
.array_base
= exi
.location();
445 output
.op
= exi
.is_last_export() ? CF_OP_EXPORT_DONE
: CF_OP_EXPORT
;
446 output
.type
= exi
.export_type();
449 if (r600_bytecode_add_output(m_bc
, &output
)) {
450 R600_ERR("Error adding pixel export at location %d\n", exi
.location());
458 bool AssemblyFromShaderLegacyImpl::emit_export(const ExportInstruction
& exi
)
460 switch (exi
.export_type()) {
461 case ExportInstruction::et_pixel
:
462 return emit_fs_pixel_export(exi
);
463 case ExportInstruction::et_pos
:
464 return emit_vs_pos_export(exi
);
465 case ExportInstruction::et_param
:
466 return emit_vs_param_export(exi
);
468 R600_ERR("shader_from_nir: export %d type not yet supported\n", exi
.export_type());
473 bool AssemblyFromShaderLegacyImpl::emit_if_start(const IfInstruction
& if_instr
)
475 assert(m_bc
->chip_class
== EVERGREEN
);
477 bool needs_workaround
= false;
478 int elems
= m_callstack
.push(FC_PUSH_VPM
);
480 if (m_bc
->family
!= CHIP_HEMLOCK
&&
481 m_bc
->family
!= CHIP_CYPRESS
&&
482 m_bc
->family
!= CHIP_JUNIPER
) {
483 unsigned dmod1
= (elems
- 1) % m_bc
->stack
.entry_size
;
484 unsigned dmod2
= (elems
) % m_bc
->stack
.entry_size
;
486 if (elems
&& (!dmod1
|| !dmod2
))
487 needs_workaround
= true;
490 auto& pred
= if_instr
.pred();
491 auto op
= cf_alu_push_before
;
493 if (needs_workaround
) {
494 r600_bytecode_add_cfinst(m_bc
, CF_OP_PUSH
);
495 m_bc
->cf_last
->cf_addr
= m_bc
->cf_last
->id
+ 2;
500 r600_bytecode_add_cfinst(m_bc
, CF_OP_JUMP
);
502 m_jump_tracker
.push(m_bc
->cf_last
, jt_if
);
506 bool AssemblyFromShaderLegacyImpl::emit_else(UNUSED
const ElseInstruction
& else_instr
)
508 r600_bytecode_add_cfinst(m_bc
, CF_OP_ELSE
);
509 m_bc
->cf_last
->pop_count
= 1;
510 return m_jump_tracker
.add_mid(m_bc
->cf_last
, jt_if
);
513 bool AssemblyFromShaderLegacyImpl::emit_endif(UNUSED
const IfElseEndInstruction
& endif_instr
)
515 m_callstack
.pop(FC_PUSH_VPM
);
517 unsigned force_pop
= m_bc
->force_add_cf
;
521 if (m_bc
->cf_last
->op
== CF_OP_ALU
)
523 else if (m_bc
->cf_last
->op
== CF_OP_ALU_POP_AFTER
)
528 m_bc
->cf_last
->op
= CF_OP_ALU_POP_AFTER
;
529 m_bc
->force_add_cf
= 1;
530 } else if (alu_pop
== 2) {
531 m_bc
->cf_last
->op
= CF_OP_ALU_POP2_AFTER
;
532 m_bc
->force_add_cf
= 1;
539 r600_bytecode_add_cfinst(m_bc
, CF_OP_POP
);
540 m_bc
->cf_last
->pop_count
= 1;
541 m_bc
->cf_last
->cf_addr
= m_bc
->cf_last
->id
+ 2;
544 return m_jump_tracker
.pop(m_bc
->cf_last
, jt_if
);
547 bool AssemblyFromShaderLegacyImpl::emit_loop_begin(UNUSED
const LoopBeginInstruction
& instr
)
549 r600_bytecode_add_cfinst(m_bc
, CF_OP_LOOP_START_DX10
);
550 m_jump_tracker
.push(m_bc
->cf_last
, jt_loop
);
551 m_callstack
.push(FC_LOOP
);
556 bool AssemblyFromShaderLegacyImpl::emit_loop_end(UNUSED
const LoopEndInstruction
& instr
)
558 r600_bytecode_add_cfinst(m_bc
, CF_OP_LOOP_END
);
559 m_callstack
.pop(FC_LOOP
);
560 assert(m_loop_nesting
);
562 return m_jump_tracker
.pop(m_bc
->cf_last
, jt_loop
);
565 bool AssemblyFromShaderLegacyImpl::emit_loop_break(UNUSED
const LoopBreakInstruction
& instr
)
567 r600_bytecode_add_cfinst(m_bc
, CF_OP_LOOP_BREAK
);
568 return m_jump_tracker
.add_mid(m_bc
->cf_last
, jt_loop
);
571 bool AssemblyFromShaderLegacyImpl::emit_loop_continue(UNUSED
const LoopContInstruction
&instr
)
573 r600_bytecode_add_cfinst(m_bc
, CF_OP_LOOP_CONTINUE
);
574 return m_jump_tracker
.add_mid(m_bc
->cf_last
, jt_loop
);
577 bool AssemblyFromShaderLegacyImpl::emit_streamout(const StreamOutIntruction
& so_instr
)
579 struct r600_bytecode_output output
;
580 memset(&output
, 0, sizeof(struct r600_bytecode_output
));
582 output
.gpr
= so_instr
.gpr().sel();
583 output
.elem_size
= so_instr
.element_size();
584 output
.array_base
= so_instr
.array_base();
585 output
.type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE
;
586 output
.burst_count
= so_instr
.burst_count();
587 output
.array_size
= so_instr
.array_size();
588 output
.comp_mask
= so_instr
.comp_mask();
589 output
.op
= so_instr
.op();
591 assert(output
.op
>= CF_OP_MEM_STREAM0_BUF0
&& output
.op
<= CF_OP_MEM_STREAM3_BUF3
);
594 if (r600_bytecode_add_output(m_bc
, &output
)) {
595 R600_ERR("shader_from_nir: Error creating stream output instruction\n");
602 bool AssemblyFromShaderLegacyImpl::emit_memringwrite(const MemRingOutIntruction
& instr
)
604 struct r600_bytecode_output output
;
605 memset(&output
, 0, sizeof(struct r600_bytecode_output
));
607 output
.gpr
= instr
.gpr().sel();
608 output
.type
= instr
.type();
609 output
.elem_size
= instr
.ncomp();
610 output
.comp_mask
= 0xF;
611 output
.burst_count
= 1;
612 output
.op
= instr
.op();
613 if (instr
.type() == mem_write_ind
|| instr
.type() == mem_write_ind_ack
) {
614 output
.index_gpr
= instr
.index_reg();
615 output
.array_size
= 0xfff;
617 output
.array_base
= instr
.array_base();
619 if (r600_bytecode_add_output(m_bc
, &output
)) {
620 R600_ERR("shader_from_nir: Error creating mem ring write instruction\n");
627 bool AssemblyFromShaderLegacyImpl::emit_tex(const TexInstruction
& tex_instr
)
629 auto addr
= tex_instr
.sampler_offset();
630 if (addr
&& (!m_bc
->index_loaded
[1] || m_loop_nesting
631 || m_bc
->index_reg
[1] != addr
->sel())) {
632 struct r600_bytecode_alu alu
;
633 memset(&alu
, 0, sizeof(alu
));
634 alu
.op
= opcode_map
.at(op1_mova_int
);
636 alu
.src
[0].sel
= addr
->sel();
637 alu
.src
[0].chan
= addr
->chan();
639 int r
= r600_bytecode_add_alu(m_bc
, &alu
);
645 alu
.op
= opcode_map
.at(op1_set_cf_idx1
);
651 r
= r600_bytecode_add_alu(m_bc
, &alu
);
655 m_bc
->index_reg
[1] = addr
->sel();
656 m_bc
->index_loaded
[1] = true;
659 r600_bytecode_tex tex
;
660 memset(&tex
, 0, sizeof(struct r600_bytecode_tex
));
661 tex
.op
= tex_instr
.opcode();
662 tex
.sampler_id
= tex_instr
.sampler_id();
663 tex
.sampler_index_mode
= 0;
664 tex
.resource_id
= tex_instr
.resource_id();;
665 tex
.resource_index_mode
= 0;
666 tex
.src_gpr
= tex_instr
.src().sel();
667 tex
.dst_gpr
= tex_instr
.dst().sel();
668 tex
.dst_sel_x
= tex_instr
.dest_swizzle(0);
669 tex
.dst_sel_y
= tex_instr
.dest_swizzle(1);
670 tex
.dst_sel_z
= tex_instr
.dest_swizzle(2);
671 tex
.dst_sel_w
= tex_instr
.dest_swizzle(3);
672 tex
.src_sel_x
= tex_instr
.src().chan_i(0);
673 tex
.src_sel_y
= tex_instr
.src().chan_i(1);
674 tex
.src_sel_z
= tex_instr
.src().chan_i(2);
675 tex
.src_sel_w
= tex_instr
.src().chan_i(3);
676 tex
.coord_type_x
= !tex_instr
.has_flag(TexInstruction::x_unnormalized
);
677 tex
.coord_type_y
= !tex_instr
.has_flag(TexInstruction::y_unnormalized
);
678 tex
.coord_type_z
= !tex_instr
.has_flag(TexInstruction::z_unnormalized
);
679 tex
.coord_type_w
= !tex_instr
.has_flag(TexInstruction::w_unnormalized
);
680 tex
.offset_x
= tex_instr
.get_offset(0);
681 tex
.offset_y
= tex_instr
.get_offset(1);
682 tex
.offset_z
= tex_instr
.get_offset(2);
683 tex
.resource_index_mode
= (!!addr
) ? 2 : 0;
684 tex
.sampler_index_mode
= tex
.resource_index_mode
;
686 if (tex_instr
.opcode() == TexInstruction::get_gradient_h
||
687 tex_instr
.opcode() == TexInstruction::get_gradient_v
)
688 tex
.inst_mod
= tex_instr
.has_flag(TexInstruction::grad_fine
) ? 1 : 0;
690 tex
.inst_mod
= tex_instr
.inst_mode();
691 if (r600_bytecode_add_tex(m_bc
, &tex
)) {
692 R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
698 bool AssemblyFromShaderLegacyImpl::emit_vtx(const FetchInstruction
& fetch_instr
)
700 int buffer_offset
= 0;
701 auto addr
= fetch_instr
.buffer_offset();
702 auto index_mode
= fetch_instr
.buffer_index_mode();
705 if (addr
->type() == Value::literal
) {
706 const auto& boffs
= static_cast<const LiteralValue
&>(*addr
);
707 buffer_offset
= boffs
.value();
709 index_mode
= bim_zero
;
710 if ((!m_bc
->index_loaded
[0] || m_loop_nesting
|| m_bc
->index_reg
[0] != addr
->sel())) {
711 struct r600_bytecode_alu alu
;
712 memset(&alu
, 0, sizeof(alu
));
713 alu
.op
= opcode_map
.at(op1_mova_int
);
715 alu
.src
[0].sel
= addr
->sel();
716 alu
.src
[0].chan
= addr
->chan();
718 int r
= r600_bytecode_add_alu(m_bc
, &alu
);
724 alu
.op
= opcode_map
.at(op1_set_cf_idx0
);
730 r
= r600_bytecode_add_alu(m_bc
, &alu
);
734 m_bc
->index_reg
[0] = addr
->sel();
735 m_bc
->index_loaded
[0] = true;
740 if (fetch_instr
.has_prelude()) {
741 for(auto &i
: fetch_instr
.prelude()) {
747 if (vtx_fetch_results
.find(fetch_instr
.src().sel()) !=
748 vtx_fetch_results
.end()) {
749 m_bc
->force_add_cf
= 1;
750 vtx_fetch_results
.clear();
752 vtx_fetch_results
.insert(fetch_instr
.dst().sel());
754 struct r600_bytecode_vtx vtx
;
755 memset(&vtx
, 0, sizeof(vtx
));
756 vtx
.op
= fetch_instr
.vc_opcode();
757 vtx
.buffer_id
= fetch_instr
.buffer_id() + buffer_offset
;
758 vtx
.fetch_type
= fetch_instr
.fetch_type();
759 vtx
.src_gpr
= fetch_instr
.src().sel();
760 vtx
.src_sel_x
= fetch_instr
.src().chan();
761 vtx
.mega_fetch_count
= fetch_instr
.mega_fetch_count();
762 vtx
.dst_gpr
= fetch_instr
.dst().sel();
763 vtx
.dst_sel_x
= fetch_instr
.swz(0); /* SEL_X */
764 vtx
.dst_sel_y
= fetch_instr
.swz(1); /* SEL_Y */
765 vtx
.dst_sel_z
= fetch_instr
.swz(2); /* SEL_Z */
766 vtx
.dst_sel_w
= fetch_instr
.swz(3); /* SEL_W */
767 vtx
.use_const_fields
= fetch_instr
.use_const_fields();
768 vtx
.data_format
= fetch_instr
.data_format();
769 vtx
.num_format_all
= fetch_instr
.num_format(); /* NUM_FORMAT_SCALED */
770 vtx
.format_comp_all
= fetch_instr
.is_signed(); /* FORMAT_COMP_SIGNED */
771 vtx
.endian
= fetch_instr
.endian_swap();
772 vtx
.buffer_index_mode
= index_mode
;
773 vtx
.offset
= fetch_instr
.offset();
774 vtx
.indexed
= fetch_instr
.indexed();
775 vtx
.uncached
= fetch_instr
.uncached();
776 vtx
.elem_size
= fetch_instr
.elm_size();
777 vtx
.array_base
= fetch_instr
.array_base();
778 vtx
.array_size
= fetch_instr
.array_size();
779 vtx
.srf_mode_all
= fetch_instr
.srf_mode_no_zero();
781 if (fetch_instr
.use_tc()) {
782 if ((r600_bytecode_add_vtx_tc(m_bc
, &vtx
))) {
783 R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
788 if ((r600_bytecode_add_vtx(m_bc
, &vtx
))) {
789 R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
794 m_bc
->cf_last
->vpm
= fetch_instr
.use_vpm();
795 m_bc
->cf_last
->barrier
= 1;
800 bool AssemblyFromShaderLegacyImpl::emit_emit_vertex(const EmitVertex
&instr
)
802 int r
= r600_bytecode_add_cfinst(m_bc
, instr
.op());
804 m_bc
->cf_last
->count
= instr
.stream();
805 assert(m_bc
->cf_last
->count
< 4);
810 bool AssemblyFromShaderLegacyImpl::emit_wait_ack(const WaitAck
& instr
)
812 int r
= r600_bytecode_add_cfinst(m_bc
, instr
.op());
814 m_bc
->cf_last
->cf_addr
= instr
.n_ack();
819 bool AssemblyFromShaderLegacyImpl::emit_wr_scratch(const WriteScratchInstruction
& instr
)
821 struct r600_bytecode_output cf
;
823 memset(&cf
, 0, sizeof(struct r600_bytecode_output
));
825 cf
.op
= CF_OP_MEM_SCRATCH
;
827 cf
.gpr
= instr
.gpr().sel();
829 cf
.comp_mask
= instr
.write_mask();
836 if (instr
.indirect()) {
838 cf
.index_gpr
= instr
.address();
840 /* The docu seems to be wrong here: In indirect addressing the
841 * address_base seems to be the array_size */
842 cf
.array_size
= instr
.array_size();
845 cf
.array_base
= instr
.location();
847 /* This should be 0, but the address calculation is apparently wrong */
850 if (r600_bytecode_add_output(m_bc
, &cf
)){
851 R600_ERR("shader_from_nir: Error creating SCRATCH_WR assembly instruction\n");
858 extern const std::map
<ESDOp
, int> ds_opcode_map
;
860 bool AssemblyFromShaderLegacyImpl::emit_gds(const GDSInstr
& instr
)
862 struct r600_bytecode_gds gds
;
865 auto addr
= instr
.uav_id();
866 if (addr
->type() != Value::literal
) {
867 if (!m_bc
->index_loaded
[1] || m_loop_nesting
||
868 m_bc
->index_reg
[1] != addr
->sel()) {
869 struct r600_bytecode_alu alu
;
871 memset(&alu
, 0, sizeof(alu
));
872 alu
.op
= opcode_map
.at(op2_lshr_int
);
873 alu
.dst
.sel
= addr
->sel();
874 alu
.dst
.chan
= addr
->chan();
875 alu
.src
[0].sel
= addr
->sel();
876 alu
.src
[0].chan
= addr
->chan();
877 alu
.src
[1].sel
= ALU_SRC_LITERAL
;
878 alu
.src
[1].value
= 2;
881 int r
= r600_bytecode_add_alu(m_bc
, &alu
);
885 memset(&alu
, 0, sizeof(alu
));
886 alu
.op
= opcode_map
.at(op1_mova_int
);
888 alu
.src
[0].sel
= addr
->sel();
889 alu
.src
[0].chan
= addr
->chan();
891 r
= r600_bytecode_add_alu(m_bc
, &alu
);
897 alu
.op
= opcode_map
.at(op1_set_cf_idx1
);
903 r
= r600_bytecode_add_alu(m_bc
, &alu
);
907 m_bc
->index_reg
[1] = addr
->sel();
908 m_bc
->index_loaded
[1] = true;
911 const LiteralValue
& addr_reg
= static_cast<const LiteralValue
&>(*addr
);
912 uav_idx
= addr_reg
.value() >> 2;
915 memset(&gds
, 0, sizeof(struct r600_bytecode_gds
));
917 gds
.op
= ds_opcode_map
.at(instr
.op());
918 gds
.dst_gpr
= instr
.dest_sel();
919 gds
.uav_id
= (uav_idx
>= 0 ? uav_idx
: 0) + instr
.uav_base();
920 gds
.uav_index_mode
= uav_idx
>= 0 ? bim_none
: bim_one
;
921 gds
.src_gpr
= instr
.src_sel();
923 if (instr
.op() == DS_OP_CMP_XCHG_RET
) {
929 gds
.src_sel_x
= instr
.src_swizzle(0);
930 gds
.src_sel_y
= instr
.src_swizzle(1);
937 gds
.alloc_consume
= 1; // Not Cayman
939 int r
= r600_bytecode_add_gds(m_bc
, &gds
);
942 m_bc
->cf_last
->vpm
= 1;
947 bool AssemblyFromShaderLegacyImpl::emit_rat(const RatInstruction
& instr
)
949 struct r600_bytecode_gds gds
;
952 EBufferIndexMode rat_index_mode
= bim_none
;
953 auto addr
= instr
.rat_id_offset();
956 if (addr
->type() != Value::literal
) {
957 rat_index_mode
= bim_one
;
958 if (!m_bc
->index_loaded
[1] || m_loop_nesting
|| m_bc
->index_reg
[1] != addr
->sel()) {
959 struct r600_bytecode_alu alu
;
961 memset(&alu
, 0, sizeof(alu
));
962 alu
.op
= opcode_map
.at(op1_mova_int
);
964 alu
.src
[0].sel
= addr
->sel();
965 alu
.src
[0].chan
= addr
->chan();
967 int r
= r600_bytecode_add_alu(m_bc
, &alu
);
973 alu
.op
= opcode_map
.at(op1_set_cf_idx1
);
979 r
= r600_bytecode_add_alu(m_bc
, &alu
);
983 m_bc
->index_reg
[1] = addr
->sel();
984 m_bc
->index_loaded
[1] = true;
988 const LiteralValue
& addr_reg
= static_cast<const LiteralValue
&>(*addr
);
989 rat_idx
= addr_reg
.value();
992 memset(&gds
, 0, sizeof(struct r600_bytecode_gds
));
994 r600_bytecode_add_cfinst(m_bc
, CF_OP_MEM_RAT
);
995 auto cf
= m_bc
->cf_last
;
996 cf
->rat
.id
= rat_idx
+ m_shader
->rat_base
;
997 cf
->rat
.inst
= instr
.rat_op();
998 cf
->rat
.index_mode
= rat_index_mode
;
999 cf
->output
.type
= instr
.need_ack() ? 3 : 1;
1000 cf
->output
.gpr
= instr
.data_gpr();
1001 cf
->output
.index_gpr
= instr
.index_gpr();
1002 cf
->output
.comp_mask
= instr
.comp_mask();
1003 cf
->output
.burst_count
= instr
.burst_count();
1004 cf
->output
.swizzle_x
= instr
.data_swz(0);
1005 cf
->output
.swizzle_y
= instr
.data_swz(1);
1006 cf
->output
.swizzle_z
= instr
.data_swz(2);
1007 cf
->output
.swizzle_w
= instr
.data_swz(3);
1010 cf
->mark
= instr
.need_ack();
1011 cf
->output
.elem_size
= instr
.elm_size();
1015 bool AssemblyFromShaderLegacyImpl::copy_dst(r600_bytecode_alu_dst
& dst
,
1018 assert(d
.type() == Value::gpr
|| d
.type() == Value::gpr_array_value
);
1020 if (d
.sel() > 124) {
1021 R600_ERR("shader_from_nir: Don't support more then 124 GPRs, but try using %d\n", d
.sel());
1026 dst
.chan
= d
.chan();
1028 if (m_bc
->index_reg
[1] == dst
.sel
)
1029 m_bc
->index_loaded
[1] = false;
1031 if (m_bc
->index_reg
[0] == dst
.sel
)
1032 m_bc
->index_loaded
[0] = false;
1037 bool AssemblyFromShaderLegacyImpl::copy_src(r600_bytecode_alu_src
& src
, const Value
& s
)
1040 if (s
.type() == Value::gpr
&& s
.sel() > 124) {
1041 R600_ERR("shader_from_nir: Don't support more then 124 GPRs, try using %d\n", s
.sel());
1045 if (s
.type() == Value::lds_direct
) {
1046 R600_ERR("shader_from_nir: LDS_DIRECT values not supported\n");
1050 if (s
.type() == Value::kconst
&& s
.sel() < 512) {
1051 R600_ERR("shader_from_nir: Uniforms should have values >= 512, got %d \n", s
.sel());
1055 if (s
.type() == Value::literal
) {
1056 auto& v
= static_cast<const LiteralValue
&>(s
);
1057 if (v
.value() == 0) {
1058 src
.sel
= ALU_SRC_0
;
1060 --m_nliterals_in_group
;
1063 if (v
.value() == 1) {
1064 src
.sel
= ALU_SRC_1_INT
;
1066 --m_nliterals_in_group
;
1069 if (v
.value_float() == 1.0f
) {
1070 src
.sel
= ALU_SRC_1
;
1072 --m_nliterals_in_group
;
1075 if (v
.value_float() == 0.5f
) {
1076 src
.sel
= ALU_SRC_0_5
;
1078 --m_nliterals_in_group
;
1081 if (v
.value() == 0xffffffff) {
1082 src
.sel
= ALU_SRC_M_1_INT
;
1084 --m_nliterals_in_group
;
1087 src
.value
= v
.value();
1091 src
.chan
= s
.chan();
1092 if (s
.type() == Value::kconst
) {
1093 const UniformValue
& cv
= static_cast<const UniformValue
&>(s
);
1094 src
.kc_bank
= cv
.kcache_bank();
1100 const std::map
<EAluOp
, int> opcode_map
= {
1102 {op2_add
, ALU_OP2_ADD
},
1103 {op2_mul
, ALU_OP2_MUL
},
1104 {op2_mul_ieee
, ALU_OP2_MUL_IEEE
},
1105 {op2_max
, ALU_OP2_MAX
},
1106 {op2_min
, ALU_OP2_MIN
},
1107 {op2_max_dx10
, ALU_OP2_MAX_DX10
},
1108 {op2_min_dx10
, ALU_OP2_MIN_DX10
},
1109 {op2_sete
, ALU_OP2_SETE
},
1110 {op2_setgt
, ALU_OP2_SETGT
},
1111 {op2_setge
, ALU_OP2_SETGE
},
1112 {op2_setne
, ALU_OP2_SETNE
},
1113 {op2_sete_dx10
, ALU_OP2_SETE_DX10
},
1114 {op2_setgt_dx10
, ALU_OP2_SETGT_DX10
},
1115 {op2_setge_dx10
, ALU_OP2_SETGE_DX10
},
1116 {op2_setne_dx10
, ALU_OP2_SETNE_DX10
},
1117 {op1_fract
, ALU_OP1_FRACT
},
1118 {op1_trunc
, ALU_OP1_TRUNC
},
1119 {op1_ceil
, ALU_OP1_CEIL
},
1120 {op1_rndne
, ALU_OP1_RNDNE
},
1121 {op1_floor
, ALU_OP1_FLOOR
},
1122 {op2_ashr_int
, ALU_OP2_ASHR_INT
},
1123 {op2_lshr_int
, ALU_OP2_LSHR_INT
},
1124 {op2_lshl_int
, ALU_OP2_LSHL_INT
},
1125 {op1_mov
, ALU_OP1_MOV
},
1126 {op0_nop
, ALU_OP0_NOP
},
1127 {op2_mul_64
, ALU_OP2_MUL_64
},
1128 {op1_flt64_to_flt32
, ALU_OP1_FLT64_TO_FLT32
},
1129 {op1v_flt64_to_flt32
, ALU_OP1_FLT32_TO_FLT64
},
1130 {op2_pred_setgt_uint
, ALU_OP2_PRED_SETGT_UINT
},
1131 {op2_pred_setge_uint
, ALU_OP2_PRED_SETGE_UINT
},
1132 {op2_pred_sete
, ALU_OP2_PRED_SETE
},
1133 {op2_pred_setgt
, ALU_OP2_PRED_SETGT
},
1134 {op2_pred_setge
, ALU_OP2_PRED_SETGE
},
1135 {op2_pred_setne
, ALU_OP2_PRED_SETNE
},
1136 //{op2_pred_set_inv, ALU_OP2_PRED_SET},
1137 //{op2_pred_set_clr, ALU_OP2_PRED_SET_CRL},
1138 //{op2_pred_set_restore, ALU_OP2_PRED_SET_RESTORE},
1139 {op2_pred_sete_push
, ALU_OP2_PRED_SETE_PUSH
},
1140 {op2_pred_setgt_push
, ALU_OP2_PRED_SETGT_PUSH
},
1141 {op2_pred_setge_push
, ALU_OP2_PRED_SETGE_PUSH
},
1142 {op2_pred_setne_push
, ALU_OP2_PRED_SETNE_PUSH
},
1143 {op2_kille
, ALU_OP2_KILLE
},
1144 {op2_killgt
, ALU_OP2_KILLGT
},
1145 {op2_killge
, ALU_OP2_KILLGE
},
1146 {op2_killne
, ALU_OP2_KILLNE
},
1147 {op2_and_int
, ALU_OP2_AND_INT
},
1148 {op2_or_int
, ALU_OP2_OR_INT
},
1149 {op2_xor_int
, ALU_OP2_XOR_INT
},
1150 {op1_not_int
, ALU_OP1_NOT_INT
},
1151 {op2_add_int
, ALU_OP2_ADD_INT
},
1152 {op2_sub_int
, ALU_OP2_SUB_INT
},
1153 {op2_max_int
, ALU_OP2_MAX_INT
},
1154 {op2_min_int
, ALU_OP2_MIN_INT
},
1155 {op2_max_uint
, ALU_OP2_MAX_UINT
},
1156 {op2_min_uint
, ALU_OP2_MIN_UINT
},
1157 {op2_sete_int
, ALU_OP2_SETE_INT
},
1158 {op2_setgt_int
, ALU_OP2_SETGT_INT
},
1159 {op2_setge_int
, ALU_OP2_SETGE_INT
},
1160 {op2_setne_int
, ALU_OP2_SETNE_INT
},
1161 {op2_setgt_uint
, ALU_OP2_SETGT_UINT
},
1162 {op2_setge_uint
, ALU_OP2_SETGE_UINT
},
1163 {op2_killgt_uint
, ALU_OP2_KILLGT_UINT
},
1164 {op2_killge_uint
, ALU_OP2_KILLGE_UINT
},
1165 //p2_prede_int, ALU_OP2_PREDE_INT},
1166 {op2_pred_setgt_int
, ALU_OP2_PRED_SETGT_INT
},
1167 {op2_pred_setge_int
, ALU_OP2_PRED_SETGE_INT
},
1168 {op2_pred_setne_int
, ALU_OP2_PRED_SETNE_INT
},
1169 {op2_kille_int
, ALU_OP2_KILLE_INT
},
1170 {op2_killgt_int
, ALU_OP2_KILLGT_INT
},
1171 {op2_killge_int
, ALU_OP2_KILLGE_INT
},
1172 {op2_killne_int
, ALU_OP2_KILLNE_INT
},
1173 {op2_pred_sete_push_int
, ALU_OP2_PRED_SETE_PUSH_INT
},
1174 {op2_pred_setgt_push_int
, ALU_OP2_PRED_SETGT_PUSH_INT
},
1175 {op2_pred_setge_push_int
, ALU_OP2_PRED_SETGE_PUSH_INT
},
1176 {op2_pred_setne_push_int
, ALU_OP2_PRED_SETNE_PUSH_INT
},
1177 {op2_pred_setlt_push_int
, ALU_OP2_PRED_SETLT_PUSH_INT
},
1178 {op2_pred_setle_push_int
, ALU_OP2_PRED_SETLE_PUSH_INT
},
1179 {op1_flt_to_int
, ALU_OP1_FLT_TO_INT
},
1180 {op1_bfrev_int
, ALU_OP1_BFREV_INT
},
1181 {op2_addc_uint
, ALU_OP2_ADDC_UINT
},
1182 {op2_subb_uint
, ALU_OP2_SUBB_UINT
},
1183 {op0_group_barrier
, ALU_OP0_GROUP_BARRIER
},
1184 {op0_group_seq_begin
, ALU_OP0_GROUP_SEQ_BEGIN
},
1185 {op0_group_seq_end
, ALU_OP0_GROUP_SEQ_END
},
1186 {op2_set_mode
, ALU_OP2_SET_MODE
},
1187 {op1_set_cf_idx0
, ALU_OP0_SET_CF_IDX0
},
1188 {op1_set_cf_idx1
, ALU_OP0_SET_CF_IDX1
},
1189 {op2_set_lds_size
, ALU_OP2_SET_LDS_SIZE
},
1190 {op1_exp_ieee
, ALU_OP1_EXP_IEEE
},
1191 {op1_log_clamped
, ALU_OP1_LOG_CLAMPED
},
1192 {op1_log_ieee
, ALU_OP1_LOG_IEEE
},
1193 {op1_recip_clamped
, ALU_OP1_RECIP_CLAMPED
},
1194 {op1_recip_ff
, ALU_OP1_RECIP_FF
},
1195 {op1_recip_ieee
, ALU_OP1_RECIP_IEEE
},
1196 {op1_recipsqrt_clamped
, ALU_OP1_RECIPSQRT_CLAMPED
},
1197 {op1_recipsqrt_ff
, ALU_OP1_RECIPSQRT_FF
},
1198 {op1_recipsqrt_ieee1
, ALU_OP1_RECIPSQRT_IEEE
},
1199 {op1_sqrt_ieee
, ALU_OP1_SQRT_IEEE
},
1200 {op1_sin
, ALU_OP1_SIN
},
1201 {op1_cos
, ALU_OP1_COS
},
1202 {op2_mullo_int
, ALU_OP2_MULLO_INT
},
1203 {op2_mulhi_int
, ALU_OP2_MULHI_INT
},
1204 {op2_mullo_uint
, ALU_OP2_MULLO_UINT
},
1205 {op2_mulhi_uint
, ALU_OP2_MULHI_UINT
},
1206 {op1_recip_int
, ALU_OP1_RECIP_INT
},
1207 {op1_recip_uint
, ALU_OP1_RECIP_UINT
},
1208 {op1_recip_64
, ALU_OP2_RECIP_64
},
1209 {op1_recip_clamped_64
, ALU_OP2_RECIP_CLAMPED_64
},
1210 {op1_recipsqrt_64
, ALU_OP2_RECIPSQRT_64
},
1211 {op1_recipsqrt_clamped_64
, ALU_OP2_RECIPSQRT_CLAMPED_64
},
1212 {op1_sqrt_64
, ALU_OP2_SQRT_64
},
1213 {op1_flt_to_uint
, ALU_OP1_FLT_TO_UINT
},
1214 {op1_int_to_flt
, ALU_OP1_INT_TO_FLT
},
1215 {op1_uint_to_flt
, ALU_OP1_UINT_TO_FLT
},
1216 {op2_bfm_int
, ALU_OP2_BFM_INT
},
1217 {op1_flt32_to_flt16
, ALU_OP1_FLT32_TO_FLT16
},
1218 {op1_flt16_to_flt32
, ALU_OP1_FLT16_TO_FLT32
},
1219 {op1_ubyte0_flt
, ALU_OP1_UBYTE0_FLT
},
1220 {op1_ubyte1_flt
, ALU_OP1_UBYTE1_FLT
},
1221 {op1_ubyte2_flt
, ALU_OP1_UBYTE2_FLT
},
1222 {op1_ubyte3_flt
, ALU_OP1_UBYTE3_FLT
},
1223 {op1_bcnt_int
, ALU_OP1_BCNT_INT
},
1224 {op1_ffbh_uint
, ALU_OP1_FFBH_UINT
},
1225 {op1_ffbl_int
, ALU_OP1_FFBL_INT
},
1226 {op1_ffbh_int
, ALU_OP1_FFBH_INT
},
1227 {op1_flt_to_uint4
, ALU_OP1_FLT_TO_UINT4
},
1228 {op2_dot_ieee
, ALU_OP2_DOT_IEEE
},
1229 {op1_flt_to_int_rpi
, ALU_OP1_FLT_TO_INT_RPI
},
1230 {op1_flt_to_int_floor
, ALU_OP1_FLT_TO_INT_FLOOR
},
1231 {op2_mulhi_uint24
, ALU_OP2_MULHI_UINT24
},
1232 {op1_mbcnt_32hi_int
, ALU_OP1_MBCNT_32HI_INT
},
1233 {op1_offset_to_flt
, ALU_OP1_OFFSET_TO_FLT
},
1234 {op2_mul_uint24
, ALU_OP2_MUL_UINT24
},
1235 {op1_bcnt_accum_prev_int
, ALU_OP1_BCNT_ACCUM_PREV_INT
},
1236 {op1_mbcnt_32lo_accum_prev_int
, ALU_OP1_MBCNT_32LO_ACCUM_PREV_INT
},
1237 {op2_sete_64
, ALU_OP2_SETE_64
},
1238 {op2_setne_64
, ALU_OP2_SETNE_64
},
1239 {op2_setgt_64
, ALU_OP2_SETGT_64
},
1240 {op2_setge_64
, ALU_OP2_SETGE_64
},
1241 {op2_min_64
, ALU_OP2_MIN_64
},
1242 {op2_max_64
, ALU_OP2_MAX_64
},
1243 {op2_dot4
, ALU_OP2_DOT4
},
1244 {op2_dot4_ieee
, ALU_OP2_DOT4_IEEE
},
1245 {op2_cube
, ALU_OP2_CUBE
},
1246 {op1_max4
, ALU_OP1_MAX4
},
1247 {op1_frexp_64
, ALU_OP1_FREXP_64
},
1248 {op1_ldexp_64
, ALU_OP2_LDEXP_64
},
1249 {op1_fract_64
, ALU_OP1_FRACT_64
},
1250 {op2_pred_setgt_64
, ALU_OP2_PRED_SETGT_64
},
1251 {op2_pred_sete_64
, ALU_OP2_PRED_SETE_64
},
1252 {op2_pred_setge_64
, ALU_OP2_PRED_SETGE_64
},
1253 {op2_add_64
, ALU_OP2_ADD_64
},
1254 {op1_mova_int
, ALU_OP1_MOVA_INT
},
1255 {op1v_flt64_to_flt32
, ALU_OP1_FLT64_TO_FLT32
},
1256 {op1_flt32_to_flt64
, ALU_OP1_FLT32_TO_FLT64
},
1257 {op2_sad_accum_prev_uint
, ALU_OP2_SAD_ACCUM_PREV_UINT
},
1258 {op2_dot
, ALU_OP2_DOT
},
1259 //p2_mul_prev, ALU_OP2_MUL_PREV},
1260 //p2_mul_ieee_prev, ALU_OP2_MUL_IEEE_PREV},
1261 //p2_add_prev, ALU_OP2_ADD_PREV},
1262 {op2_muladd_prev
, ALU_OP2_MULADD_PREV
},
1263 {op2_muladd_ieee_prev
, ALU_OP2_MULADD_IEEE_PREV
},
1264 {op2_interp_xy
, ALU_OP2_INTERP_XY
},
1265 {op2_interp_zw
, ALU_OP2_INTERP_ZW
},
1266 {op2_interp_x
, ALU_OP2_INTERP_X
},
1267 {op2_interp_z
, ALU_OP2_INTERP_Z
},
1268 {op0_store_flags
, ALU_OP1_STORE_FLAGS
},
1269 {op1_load_store_flags
, ALU_OP1_LOAD_STORE_FLAGS
},
1270 {op0_lds_1a
, ALU_OP2_LDS_1A
},
1271 {op0_lds_1a1d
, ALU_OP2_LDS_1A1D
},
1272 {op0_lds_2a
, ALU_OP2_LDS_2A
},
1273 {op1_interp_load_p0
, ALU_OP1_INTERP_LOAD_P0
},
1274 {op1_interp_load_p10
, ALU_OP1_INTERP_LOAD_P10
},
1275 {op1_interp_load_p20
, ALU_OP1_INTERP_LOAD_P20
},
1276 // {op 3 all left shift 6
1277 {op3_bfe_uint
, ALU_OP3_BFE_UINT
},
1278 {op3_bfe_int
, ALU_OP3_BFE_INT
},
1279 {op3_bfi_int
, ALU_OP3_BFI_INT
},
1280 {op3_fma
, ALU_OP3_FMA
},
1281 {op3_cndne_64
, ALU_OP3_CNDNE_64
},
1282 {op3_fma_64
, ALU_OP3_FMA_64
},
1283 {op3_lerp_uint
, ALU_OP3_LERP_UINT
},
1284 {op3_bit_align_int
, ALU_OP3_BIT_ALIGN_INT
},
1285 {op3_byte_align_int
, ALU_OP3_BYTE_ALIGN_INT
},
1286 {op3_sad_accum_uint
, ALU_OP3_SAD_ACCUM_UINT
},
1287 {op3_sad_accum_hi_uint
, ALU_OP3_SAD_ACCUM_HI_UINT
},
1288 {op3_muladd_uint24
, ALU_OP3_MULADD_UINT24
},
1289 {op3_lds_idx_op
, ALU_OP3_LDS_IDX_OP
},
1290 {op3_muladd
, ALU_OP3_MULADD
},
1291 {op3_muladd_m2
, ALU_OP3_MULADD_M2
},
1292 {op3_muladd_m4
, ALU_OP3_MULADD_M4
},
1293 {op3_muladd_d2
, ALU_OP3_MULADD_D2
},
1294 {op3_muladd_ieee
, ALU_OP3_MULADD_IEEE
},
1295 {op3_cnde
, ALU_OP3_CNDE
},
1296 {op3_cndgt
, ALU_OP3_CNDGT
},
1297 {op3_cndge
, ALU_OP3_CNDGE
},
1298 {op3_cnde_int
, ALU_OP3_CNDE_INT
},
1299 {op3_cndgt_int
, ALU_OP3_CNDGT_INT
},
1300 {op3_cndge_int
, ALU_OP3_CNDGE_INT
},
1301 {op3_mul_lit
, ALU_OP3_MUL_LIT
},
1304 const std::map
<ESDOp
, int> ds_opcode_map
= {
1305 {DS_OP_ADD
, FETCH_OP_GDS_ADD
},
1306 {DS_OP_SUB
, FETCH_OP_GDS_SUB
},
1307 {DS_OP_RSUB
, FETCH_OP_GDS_RSUB
},
1308 {DS_OP_INC
, FETCH_OP_GDS_INC
},
1309 {DS_OP_DEC
, FETCH_OP_GDS_DEC
},
1310 {DS_OP_MIN_INT
, FETCH_OP_GDS_MIN_INT
},
1311 {DS_OP_MAX_INT
, FETCH_OP_GDS_MAX_INT
},
1312 {DS_OP_MIN_UINT
, FETCH_OP_GDS_MIN_UINT
},
1313 {DS_OP_MAX_UINT
, FETCH_OP_GDS_MAX_UINT
},
1314 {DS_OP_AND
, FETCH_OP_GDS_AND
},
1315 {DS_OP_OR
, FETCH_OP_GDS_OR
},
1316 {DS_OP_XOR
, FETCH_OP_GDS_XOR
},
1317 {DS_OP_MSKOR
, FETCH_OP_GDS_MSKOR
},
1318 {DS_OP_WRITE
, FETCH_OP_GDS_WRITE
},
1319 {DS_OP_WRITE_REL
, FETCH_OP_GDS_WRITE_REL
},
1320 {DS_OP_WRITE2
, FETCH_OP_GDS_WRITE2
},
1321 {DS_OP_CMP_STORE
, FETCH_OP_GDS_CMP_STORE
},
1322 {DS_OP_CMP_STORE_SPF
, FETCH_OP_GDS_CMP_STORE_SPF
},
1323 {DS_OP_BYTE_WRITE
, FETCH_OP_GDS_BYTE_WRITE
},
1324 {DS_OP_SHORT_WRITE
, FETCH_OP_GDS_SHORT_WRITE
},
1325 {DS_OP_ADD_RET
, FETCH_OP_GDS_ADD_RET
},
1326 {DS_OP_SUB_RET
, FETCH_OP_GDS_SUB_RET
},
1327 {DS_OP_RSUB_RET
, FETCH_OP_GDS_RSUB_RET
},
1328 {DS_OP_INC_RET
, FETCH_OP_GDS_INC_RET
},
1329 {DS_OP_DEC_RET
, FETCH_OP_GDS_DEC_RET
},
1330 {DS_OP_MIN_INT_RET
, FETCH_OP_GDS_MIN_INT_RET
},
1331 {DS_OP_MAX_INT_RET
, FETCH_OP_GDS_MAX_INT_RET
},
1332 {DS_OP_MIN_UINT_RET
, FETCH_OP_GDS_MIN_UINT_RET
},
1333 {DS_OP_MAX_UINT_RET
, FETCH_OP_GDS_MAX_UINT_RET
},
1334 {DS_OP_AND_RET
, FETCH_OP_GDS_AND_RET
},
1335 {DS_OP_OR_RET
, FETCH_OP_GDS_OR_RET
},
1336 {DS_OP_XOR_RET
, FETCH_OP_GDS_XOR_RET
},
1337 {DS_OP_MSKOR_RET
, FETCH_OP_GDS_MSKOR_RET
},
1338 {DS_OP_XCHG_RET
, FETCH_OP_GDS_XCHG_RET
},
1339 {DS_OP_XCHG_REL_RET
, FETCH_OP_GDS_XCHG_REL_RET
},
1340 {DS_OP_XCHG2_RET
, FETCH_OP_GDS_XCHG2_RET
},
1341 {DS_OP_CMP_XCHG_RET
, FETCH_OP_GDS_CMP_XCHG_RET
},
1342 {DS_OP_CMP_XCHG_SPF_RET
, FETCH_OP_GDS_CMP_XCHG_SPF_RET
},
1343 {DS_OP_READ_RET
, FETCH_OP_GDS_READ_RET
},
1344 {DS_OP_READ_REL_RET
, FETCH_OP_GDS_READ_REL_RET
},
1345 {DS_OP_READ2_RET
, FETCH_OP_GDS_READ2_RET
},
1346 {DS_OP_READWRITE_RET
, FETCH_OP_GDS_READWRITE_RET
},
1347 {DS_OP_BYTE_READ_RET
, FETCH_OP_GDS_BYTE_READ_RET
},
1348 {DS_OP_UBYTE_READ_RET
, FETCH_OP_GDS_UBYTE_READ_RET
},
1349 {DS_OP_SHORT_READ_RET
, FETCH_OP_GDS_SHORT_READ_RET
},
1350 {DS_OP_USHORT_READ_RET
, FETCH_OP_GDS_USHORT_READ_RET
},
1351 {DS_OP_ATOMIC_ORDERED_ALLOC_RET
, FETCH_OP_GDS_ATOMIC_ORDERED_ALLOC
},