3 * Copyright (c) 2018 Collabora LTD
5 * Author: Gert Wollny <gert.wollny@collabora.com>
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
27 #include "sfn_ir_to_assembly.h"
28 #include "sfn_conditionaljumptracker.h"
29 #include "sfn_callstack.h"
30 #include "sfn_instruction_gds.h"
31 #include "sfn_instruction_misc.h"
32 #include "sfn_instruction_fetch.h"
34 #include "../r600_shader.h"
35 #include "../r600_sq.h"
41 struct AssemblyFromShaderLegacyImpl
{
43 AssemblyFromShaderLegacyImpl(r600_shader
*sh
, r600_shader_key
*key
);
44 bool emit(const Instruction::Pointer i
);
45 void reset_addr_register() {m_last_addr
.reset();}
48 bool emit_alu(const AluInstruction
& ai
, ECFAluOpCode cf_op
);
49 bool emit_export(const ExportInstruction
& exi
);
50 bool emit_streamout(const StreamOutIntruction
& instr
);
51 bool emit_memringwrite(const MemRingOutIntruction
& instr
);
52 bool emit_tex(const TexInstruction
& tex_instr
);
53 bool emit_vtx(const FetchInstruction
& fetch_instr
);
54 bool emit_if_start(const IfInstruction
& if_instr
);
55 bool emit_else(const ElseInstruction
& else_instr
);
56 bool emit_endif(const IfElseEndInstruction
& endif_instr
);
57 bool emit_emit_vertex(const EmitVertex
&instr
);
59 bool emit_loop_begin(const LoopBeginInstruction
& instr
);
60 bool emit_loop_end(const LoopEndInstruction
& instr
);
61 bool emit_loop_break(const LoopBreakInstruction
& instr
);
62 bool emit_loop_continue(const LoopContInstruction
& instr
);
63 bool emit_wait_ack(const WaitAck
& instr
);
64 bool emit_wr_scratch(const WriteScratchInstruction
& instr
);
65 bool emit_gds(const GDSInstr
& instr
);
66 bool emit_rat(const RatInstruction
& instr
);
68 bool emit_load_addr(PValue addr
);
69 bool emit_fs_pixel_export(const ExportInstruction
& exi
);
70 bool emit_vs_pos_export(const ExportInstruction
& exi
);
71 bool emit_vs_param_export(const ExportInstruction
& exi
);
72 bool copy_dst(r600_bytecode_alu_dst
& dst
, const Value
& src
);
73 bool copy_src(r600_bytecode_alu_src
& src
, const Value
& s
);
77 ConditionalJumpTracker m_jump_tracker
;
78 CallStack m_callstack
;
82 r600_shader
*m_shader
;
83 r600_shader_key
*m_key
;
84 r600_bytecode_output m_output
;
85 unsigned m_max_color_exports
;
87 bool has_param_output
;
90 int m_nliterals_in_group
;
94 AssemblyFromShaderLegacy::AssemblyFromShaderLegacy(struct r600_shader
*sh
,
97 impl
= new AssemblyFromShaderLegacyImpl(sh
, key
);
100 AssemblyFromShaderLegacy::~AssemblyFromShaderLegacy()
105 bool AssemblyFromShaderLegacy::do_lower(const std::vector
<InstructionBlock
>& ir
)
107 if (impl
->m_shader
->processor_type
== PIPE_SHADER_VERTEX
&&
108 impl
->m_shader
->ninput
> 0)
109 r600_bytecode_add_cfinst(impl
->m_bc
, CF_OP_CALL_FS
);
112 std::vector
<Instruction::Pointer
> exports
;
114 for (const auto& block
: ir
) {
115 for (const auto& i
: block
) {
118 if (i
->type() != Instruction::alu
)
119 impl
->reset_addr_register();
123 for (const auto& i : exports) {
124 if (!impl->emit_export(static_cast<const ExportInstruction&>(*i)))
129 const struct cf_op_info
*last
= nullptr;
130 if (impl
->m_bc
->cf_last
)
131 last
= r600_isa_cf(impl
->m_bc
->cf_last
->op
);
133 /* alu clause instructions don't have EOP bit, so add NOP */
134 if (!last
|| last
->flags
& CF_ALU
|| impl
->m_bc
->cf_last
->op
== CF_OP_LOOP_END
135 || impl
->m_bc
->cf_last
->op
== CF_OP_POP
)
136 r600_bytecode_add_cfinst(impl
->m_bc
, CF_OP_NOP
);
138 /* A fetch shader only can't be EOP (results in hang), but we can replace it
140 else if (impl
->m_bc
->cf_last
->op
== CF_OP_CALL_FS
)
141 impl
->m_bc
->cf_last
->op
= CF_OP_NOP
;
143 impl
->m_bc
->cf_last
->end_of_program
= 1;
148 bool AssemblyFromShaderLegacyImpl::emit(const Instruction::Pointer i
)
150 sfn_log
<< SfnLog::assembly
<< "Emit from '" << *i
<< "\n";
152 case Instruction::alu
:
153 return emit_alu(static_cast<const AluInstruction
&>(*i
), cf_alu_undefined
);
154 case Instruction::exprt
:
155 return emit_export(static_cast<const ExportInstruction
&>(*i
));
156 case Instruction::tex
:
157 return emit_tex(static_cast<const TexInstruction
&>(*i
));
158 case Instruction::vtx
:
159 return emit_vtx(static_cast<const FetchInstruction
&>(*i
));
160 case Instruction::cond_if
:
161 return emit_if_start(static_cast<const IfInstruction
&>(*i
));
162 case Instruction::cond_else
:
163 return emit_else(static_cast<const ElseInstruction
&>(*i
));
164 case Instruction::cond_endif
:
165 return emit_endif(static_cast<const IfElseEndInstruction
&>(*i
));
166 case Instruction::loop_begin
:
167 return emit_loop_begin(static_cast<const LoopBeginInstruction
&>(*i
));
168 case Instruction::loop_end
:
169 return emit_loop_end(static_cast<const LoopEndInstruction
&>(*i
));
170 case Instruction::loop_break
:
171 return emit_loop_break(static_cast<const LoopBreakInstruction
&>(*i
));
172 case Instruction::loop_continue
:
173 return emit_loop_continue(static_cast<const LoopContInstruction
&>(*i
));
174 case Instruction::streamout
:
175 return emit_streamout(static_cast<const StreamOutIntruction
&>(*i
));
176 case Instruction::ring
:
177 return emit_memringwrite(static_cast<const MemRingOutIntruction
&>(*i
));
178 case Instruction::emit_vtx
:
179 return emit_emit_vertex(static_cast<const EmitVertex
&>(*i
));
180 case Instruction::wait_ack
:
181 return emit_wait_ack(static_cast<const WaitAck
&>(*i
));
182 case Instruction::mem_wr_scratch
:
183 return emit_wr_scratch(static_cast<const WriteScratchInstruction
&>(*i
));
184 case Instruction::gds
:
185 return emit_gds(static_cast<const GDSInstr
&>(*i
));
186 case Instruction::rat
:
187 return emit_rat(static_cast<const RatInstruction
&>(*i
));
193 AssemblyFromShaderLegacyImpl::AssemblyFromShaderLegacyImpl(r600_shader
*sh
,
194 r600_shader_key
*key
):
199 has_pos_output(false),
200 has_param_output(false),
202 m_nliterals_in_group(0)
204 m_max_color_exports
= MAX2(m_key
->ps
.nr_cbufs
, 1);
207 extern const std::map
<EAluOp
, int> opcode_map
;
209 bool AssemblyFromShaderLegacyImpl::emit_load_addr(PValue addr
)
211 m_bc
->ar_reg
= addr
->sel();
212 m_bc
->ar_chan
= addr
->chan();
216 sfn_log
<< SfnLog::assembly
<< " Prepare " << *addr
<< " to address register\n";
221 bool AssemblyFromShaderLegacyImpl::emit_alu(const AluInstruction
& ai
, ECFAluOpCode cf_op
)
224 struct r600_bytecode_alu alu
;
225 memset(&alu
, 0, sizeof(alu
));
228 if (opcode_map
.find(ai
.opcode()) == opcode_map
.end()) {
229 std::cerr
<< "Opcode not handled for " << ai
<<"\n";
233 for (unsigned i
= 0; i
< ai
.n_sources(); ++i
) {
235 if (s
.type() == Value::literal
)
236 ++m_nliterals_in_group
;
239 /* This instruction group would exeed the limit of literals, so
240 * force a new instruction group by adding a NOP as last
241 * instruction. This will no loner be needed with a real
243 if (m_nliterals_in_group
> 4) {
244 sfn_log
<< SfnLog::assembly
<< " Have " << m_nliterals_in_group
<< " inject a last op (nop)\n";
247 int retval
= r600_bytecode_add_alu(m_bc
, &alu
);
250 memset(&alu
, 0, sizeof(alu
));
251 m_nliterals_in_group
= 0;
254 alu
.op
= opcode_map
.at(ai
.opcode());
256 /* Missing test whether ai actually has a dest */
257 auto dst
= ai
.dest();
260 if (!copy_dst(alu
.dst
, *dst
))
263 alu
.dst
.write
= ai
.flag(alu_write
);
264 alu
.dst
.clamp
= ai
.flag(alu_dst_clamp
);
266 if (dst
->type() == Value::gpr_array_value
) {
267 auto& v
= static_cast<const GPRArrayValue
&>(*dst
);
268 PValue addr
= v
.indirect();
270 if (!m_last_addr
|| *addr
!= *m_last_addr
) {
271 emit_load_addr(addr
);
274 alu
.dst
.rel
= addr
? 1 : 0;;
279 alu
.is_op3
= ai
.n_sources() == 3;
281 for (unsigned i
= 0; i
< ai
.n_sources(); ++i
) {
284 if (!copy_src(alu
.src
[i
], s
))
286 alu
.src
[i
].neg
= ai
.flag(AluInstruction::src_neg_flags
[i
]);
288 if (s
.type() == Value::gpr_array_value
) {
289 auto& v
= static_cast<const GPRArrayValue
&>(s
);
290 PValue addr
= v
.indirect();
292 assert(!addr_in_use
|| (*addr_in_use
== *addr
));
293 if (!m_last_addr
|| *addr
!= *m_last_addr
) {
294 emit_load_addr(addr
);
297 alu
.src
[i
].rel
= addr
? 1 : 0;
301 alu
.src
[i
].abs
= ai
.flag(AluInstruction::src_abs_flags
[i
]);
304 if (ai
.bank_swizzle() != alu_vec_unknown
)
305 alu
.bank_swizzle_force
= ai
.bank_swizzle();
307 alu
.last
= ai
.flag(alu_last_instr
);
308 alu
.update_pred
= ai
.flag(alu_update_pred
);
309 alu
.execute_mask
= ai
.flag(alu_update_exec
);
311 /* If the destination register is equal to the last loaded address register
312 * then clear the latter one, because the values will no longer be identical */
314 sfn_log
<< SfnLog::assembly
<< " Current address register is " << *m_last_addr
<< "\n";
317 sfn_log
<< SfnLog::assembly
<< " Current dst register is " << *dst
<< "\n";
319 if (dst
&& m_last_addr
)
320 if (*dst
== *m_last_addr
) {
321 sfn_log
<< SfnLog::assembly
<< " Clear address register (was " << *m_last_addr
<< "\n";
325 if (cf_op
== cf_alu_undefined
)
326 cf_op
= ai
.cf_type();
330 case cf_alu
: type
= CF_OP_ALU
; break;
331 case cf_alu_push_before
: type
= CF_OP_ALU_PUSH_BEFORE
; break;
332 case cf_alu_pop_after
: type
= CF_OP_ALU_POP_AFTER
; break;
333 case cf_alu_pop2_after
: type
= CF_OP_ALU_POP2_AFTER
; break;
334 case cf_alu_break
: type
= CF_OP_ALU_BREAK
; break;
335 case cf_alu_else_after
: type
= CF_OP_ALU_ELSE_AFTER
; break;
336 case cf_alu_continue
: type
= CF_OP_ALU_CONTINUE
; break;
337 case cf_alu_extended
: type
= CF_OP_ALU_EXT
; break;
339 assert(0 && "cf_alu_undefined should have been replaced");
343 m_nliterals_in_group
= 0;
345 bool retval
= !r600_bytecode_add_alu_type(m_bc
, &alu
, type
);
347 if (ai
.opcode() == op1_mova_int
)
350 if (ai
.opcode() == op1_set_cf_idx0
)
351 m_bc
->index_loaded
[0] = 1;
353 if (ai
.opcode() == op1_set_cf_idx1
)
354 m_bc
->index_loaded
[1] = 1;
357 m_bc
->force_add_cf
|= (ai
.opcode() == op2_kille
||
358 ai
.opcode() == op2_killne_int
||
359 ai
.opcode() == op1_set_cf_idx0
||
360 ai
.opcode() == op1_set_cf_idx1
);
364 bool AssemblyFromShaderLegacyImpl::emit_vs_pos_export(const ExportInstruction
& exi
)
366 r600_bytecode_output output
;
367 memset(&output
, 0, sizeof(output
));
368 assert(exi
.gpr().type() == Value::gpr_vector
);
369 const auto& gpr
= exi
.gpr();
370 output
.gpr
= gpr
.sel();
371 output
.elem_size
= 3;
372 output
.swizzle_x
= gpr
.chan_i(0);
373 output
.swizzle_y
= gpr
.chan_i(1);
374 output
.swizzle_z
= gpr
.chan_i(2);
375 output
.swizzle_w
= gpr
.chan_i(3);
376 output
.burst_count
= 1;
377 output
.array_base
= 60 + exi
.location();
378 output
.op
= exi
.is_last_export() ? CF_OP_EXPORT_DONE
: CF_OP_EXPORT
;
379 output
.type
= exi
.export_type();
382 if (r600_bytecode_add_output(m_bc
, &output
)) {
383 R600_ERR("Error adding pixel export at location %d\n", exi
.location());
391 bool AssemblyFromShaderLegacyImpl::emit_vs_param_export(const ExportInstruction
& exi
)
393 r600_bytecode_output output
;
394 assert(exi
.gpr().type() == Value::gpr_vector
);
395 const auto& gpr
= exi
.gpr();
397 memset(&output
, 0, sizeof(output
));
398 output
.gpr
= gpr
.sel();
399 output
.elem_size
= 3;
400 output
.swizzle_x
= gpr
.chan_i(0);
401 output
.swizzle_y
= gpr
.chan_i(1);
402 output
.swizzle_z
= gpr
.chan_i(2);
403 output
.swizzle_w
= gpr
.chan_i(3);
404 output
.burst_count
= 1;
405 output
.array_base
= exi
.location();
406 output
.op
= exi
.is_last_export() ? CF_OP_EXPORT_DONE
: CF_OP_EXPORT
;
407 output
.type
= exi
.export_type();
410 if (r600_bytecode_add_output(m_bc
, &output
)) {
411 R600_ERR("Error adding pixel export at location %d\n", exi
.location());
419 bool AssemblyFromShaderLegacyImpl::emit_fs_pixel_export(const ExportInstruction
& exi
)
421 if (exi
.location() >= m_max_color_exports
&& exi
.location() < 60) {
422 R600_ERR("shader_from_nir: ignore pixel export %u, because supported max is %u\n",
423 exi
.location(), m_max_color_exports
);
427 assert(exi
.gpr().type() == Value::gpr_vector
);
428 const auto& gpr
= exi
.gpr();
430 r600_bytecode_output output
;
431 memset(&output
, 0, sizeof(output
));
433 output
.gpr
= gpr
.sel();
434 output
.elem_size
= 3;
435 output
.swizzle_x
= gpr
.chan_i(0);
436 output
.swizzle_y
= gpr
.chan_i(1);
437 output
.swizzle_z
= gpr
.chan_i(2);
438 output
.swizzle_w
= m_key
->ps
.alpha_to_one
? 5 : gpr
.chan_i(3); ;
439 output
.burst_count
= 1;
440 output
.array_base
= exi
.location();
441 output
.op
= exi
.is_last_export() ? CF_OP_EXPORT_DONE
: CF_OP_EXPORT
;
442 output
.type
= exi
.export_type();
445 if (r600_bytecode_add_output(m_bc
, &output
)) {
446 R600_ERR("Error adding pixel export at location %d\n", exi
.location());
454 bool AssemblyFromShaderLegacyImpl::emit_export(const ExportInstruction
& exi
)
456 switch (exi
.export_type()) {
457 case ExportInstruction::et_pixel
:
458 return emit_fs_pixel_export(exi
);
459 case ExportInstruction::et_pos
:
460 return emit_vs_pos_export(exi
);
461 case ExportInstruction::et_param
:
462 return emit_vs_param_export(exi
);
464 R600_ERR("shader_from_nir: export %d type not yet supported\n", exi
.export_type());
469 bool AssemblyFromShaderLegacyImpl::emit_if_start(const IfInstruction
& if_instr
)
471 assert(m_bc
->chip_class
== EVERGREEN
);
473 bool needs_workaround
= false;
474 int elems
= m_callstack
.push(FC_PUSH_VPM
);
476 if (m_bc
->family
!= CHIP_HEMLOCK
&&
477 m_bc
->family
!= CHIP_CYPRESS
&&
478 m_bc
->family
!= CHIP_JUNIPER
) {
479 unsigned dmod1
= (elems
- 1) % m_bc
->stack
.entry_size
;
480 unsigned dmod2
= (elems
) % m_bc
->stack
.entry_size
;
482 if (elems
&& (!dmod1
|| !dmod2
))
483 needs_workaround
= true;
486 auto& pred
= if_instr
.pred();
487 auto op
= cf_alu_push_before
;
489 if (needs_workaround
) {
490 r600_bytecode_add_cfinst(m_bc
, CF_OP_PUSH
);
491 m_bc
->cf_last
->cf_addr
= m_bc
->cf_last
->id
+ 2;
496 r600_bytecode_add_cfinst(m_bc
, CF_OP_JUMP
);
498 m_jump_tracker
.push(m_bc
->cf_last
, jt_if
);
502 bool AssemblyFromShaderLegacyImpl::emit_else(UNUSED
const ElseInstruction
& else_instr
)
504 r600_bytecode_add_cfinst(m_bc
, CF_OP_ELSE
);
505 m_bc
->cf_last
->pop_count
= 1;
506 return m_jump_tracker
.add_mid(m_bc
->cf_last
, jt_if
);
509 bool AssemblyFromShaderLegacyImpl::emit_endif(UNUSED
const IfElseEndInstruction
& endif_instr
)
511 m_callstack
.pop(FC_PUSH_VPM
);
513 unsigned force_pop
= m_bc
->force_add_cf
;
517 if (m_bc
->cf_last
->op
== CF_OP_ALU
)
519 else if (m_bc
->cf_last
->op
== CF_OP_ALU_POP_AFTER
)
524 m_bc
->cf_last
->op
= CF_OP_ALU_POP_AFTER
;
525 m_bc
->force_add_cf
= 1;
526 } else if (alu_pop
== 2) {
527 m_bc
->cf_last
->op
= CF_OP_ALU_POP2_AFTER
;
528 m_bc
->force_add_cf
= 1;
535 r600_bytecode_add_cfinst(m_bc
, CF_OP_POP
);
536 m_bc
->cf_last
->pop_count
= 1;
537 m_bc
->cf_last
->cf_addr
= m_bc
->cf_last
->id
+ 2;
540 return m_jump_tracker
.pop(m_bc
->cf_last
, jt_if
);
543 bool AssemblyFromShaderLegacyImpl::emit_loop_begin(UNUSED
const LoopBeginInstruction
& instr
)
545 r600_bytecode_add_cfinst(m_bc
, CF_OP_LOOP_START_DX10
);
546 m_jump_tracker
.push(m_bc
->cf_last
, jt_loop
);
547 m_callstack
.push(FC_LOOP
);
552 bool AssemblyFromShaderLegacyImpl::emit_loop_end(UNUSED
const LoopEndInstruction
& instr
)
554 r600_bytecode_add_cfinst(m_bc
, CF_OP_LOOP_END
);
555 m_callstack
.pop(FC_LOOP
);
556 assert(m_loop_nesting
);
558 return m_jump_tracker
.pop(m_bc
->cf_last
, jt_loop
);
561 bool AssemblyFromShaderLegacyImpl::emit_loop_break(UNUSED
const LoopBreakInstruction
& instr
)
563 r600_bytecode_add_cfinst(m_bc
, CF_OP_LOOP_BREAK
);
564 return m_jump_tracker
.add_mid(m_bc
->cf_last
, jt_loop
);
567 bool AssemblyFromShaderLegacyImpl::emit_loop_continue(UNUSED
const LoopContInstruction
&instr
)
569 r600_bytecode_add_cfinst(m_bc
, CF_OP_LOOP_CONTINUE
);
570 return m_jump_tracker
.add_mid(m_bc
->cf_last
, jt_loop
);
573 bool AssemblyFromShaderLegacyImpl::emit_streamout(const StreamOutIntruction
& so_instr
)
575 struct r600_bytecode_output output
;
576 memset(&output
, 0, sizeof(struct r600_bytecode_output
));
578 output
.gpr
= so_instr
.gpr().sel();
579 output
.elem_size
= so_instr
.element_size();
580 output
.array_base
= so_instr
.array_base();
581 output
.type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE
;
582 output
.burst_count
= so_instr
.burst_count();
583 output
.array_size
= so_instr
.array_size();
584 output
.comp_mask
= so_instr
.comp_mask();
585 output
.op
= so_instr
.op();
587 assert(output
.op
>= CF_OP_MEM_STREAM0_BUF0
&& output
.op
<= CF_OP_MEM_STREAM3_BUF3
);
590 if (r600_bytecode_add_output(m_bc
, &output
)) {
591 R600_ERR("shader_from_nir: Error creating stream output instruction\n");
598 bool AssemblyFromShaderLegacyImpl::emit_memringwrite(const MemRingOutIntruction
& instr
)
600 struct r600_bytecode_output output
;
601 memset(&output
, 0, sizeof(struct r600_bytecode_output
));
603 output
.gpr
= instr
.gpr().sel();
604 output
.type
= instr
.type();
605 output
.elem_size
= instr
.ncomp();
606 output
.comp_mask
= 0xF;
607 output
.burst_count
= 1;
608 output
.op
= instr
.op();
609 if (instr
.type() == mem_write_ind
|| instr
.type() == mem_write_ind_ack
) {
610 output
.index_gpr
= instr
.index_reg();
611 output
.array_size
= 0xfff;
613 output
.array_base
= instr
.array_base();
615 if (r600_bytecode_add_output(m_bc
, &output
)) {
616 R600_ERR("shader_from_nir: Error creating mem ring write instruction\n");
623 bool AssemblyFromShaderLegacyImpl::emit_tex(const TexInstruction
& tex_instr
)
625 auto addr
= tex_instr
.sampler_offset();
626 if (addr
&& (!m_bc
->index_loaded
[1] || m_loop_nesting
627 || m_bc
->index_reg
[1] != addr
->sel())) {
628 struct r600_bytecode_alu alu
;
629 memset(&alu
, 0, sizeof(alu
));
630 alu
.op
= opcode_map
.at(op1_mova_int
);
632 alu
.src
[0].sel
= addr
->sel();
633 alu
.src
[0].chan
= addr
->chan();
635 int r
= r600_bytecode_add_alu(m_bc
, &alu
);
641 alu
.op
= opcode_map
.at(op1_set_cf_idx1
);
647 r
= r600_bytecode_add_alu(m_bc
, &alu
);
651 m_bc
->index_reg
[1] = addr
->sel();
652 m_bc
->index_loaded
[1] = true;
655 r600_bytecode_tex tex
;
656 memset(&tex
, 0, sizeof(struct r600_bytecode_tex
));
657 tex
.op
= tex_instr
.opcode();
658 tex
.sampler_id
= tex_instr
.sampler_id();
659 tex
.sampler_index_mode
= 0;
660 tex
.resource_id
= tex_instr
.resource_id();;
661 tex
.resource_index_mode
= 0;
662 tex
.src_gpr
= tex_instr
.src().sel();
663 tex
.dst_gpr
= tex_instr
.dst().sel();
664 tex
.dst_sel_x
= tex_instr
.dest_swizzle(0);
665 tex
.dst_sel_y
= tex_instr
.dest_swizzle(1);
666 tex
.dst_sel_z
= tex_instr
.dest_swizzle(2);
667 tex
.dst_sel_w
= tex_instr
.dest_swizzle(3);
668 tex
.src_sel_x
= tex_instr
.src().chan_i(0);
669 tex
.src_sel_y
= tex_instr
.src().chan_i(1);
670 tex
.src_sel_z
= tex_instr
.src().chan_i(2);
671 tex
.src_sel_w
= tex_instr
.src().chan_i(3);
672 tex
.coord_type_x
= !tex_instr
.has_flag(TexInstruction::x_unnormalized
);
673 tex
.coord_type_y
= !tex_instr
.has_flag(TexInstruction::y_unnormalized
);
674 tex
.coord_type_z
= !tex_instr
.has_flag(TexInstruction::z_unnormalized
);
675 tex
.coord_type_w
= !tex_instr
.has_flag(TexInstruction::w_unnormalized
);
676 tex
.offset_x
= tex_instr
.get_offset(0);
677 tex
.offset_y
= tex_instr
.get_offset(1);
678 tex
.offset_z
= tex_instr
.get_offset(2);
679 tex
.resource_index_mode
= (!!addr
) ? 2 : 0;
680 tex
.sampler_index_mode
= tex
.resource_index_mode
;
682 if (tex_instr
.opcode() == TexInstruction::get_gradient_h
||
683 tex_instr
.opcode() == TexInstruction::get_gradient_v
)
684 tex
.inst_mod
= tex_instr
.has_flag(TexInstruction::grad_fine
) ? 1 : 0;
686 tex
.inst_mod
= tex_instr
.inst_mode();
687 if (r600_bytecode_add_tex(m_bc
, &tex
)) {
688 R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
694 bool AssemblyFromShaderLegacyImpl::emit_vtx(const FetchInstruction
& fetch_instr
)
696 int buffer_offset
= 0;
697 auto addr
= fetch_instr
.buffer_offset();
698 auto index_mode
= fetch_instr
.buffer_index_mode();
701 if (addr
->type() == Value::literal
) {
702 const auto& boffs
= static_cast<const LiteralValue
&>(*addr
);
703 buffer_offset
= boffs
.value();
705 index_mode
= bim_zero
;
706 if ((!m_bc
->index_loaded
[0] || m_loop_nesting
|| m_bc
->index_reg
[0] != addr
->sel())) {
707 struct r600_bytecode_alu alu
;
708 memset(&alu
, 0, sizeof(alu
));
709 alu
.op
= opcode_map
.at(op1_mova_int
);
711 alu
.src
[0].sel
= addr
->sel();
712 alu
.src
[0].chan
= addr
->chan();
714 int r
= r600_bytecode_add_alu(m_bc
, &alu
);
720 alu
.op
= opcode_map
.at(op1_set_cf_idx0
);
726 r
= r600_bytecode_add_alu(m_bc
, &alu
);
730 m_bc
->index_reg
[0] = addr
->sel();
731 m_bc
->index_loaded
[0] = true;
736 if (fetch_instr
.has_prelude()) {
737 for(auto &i
: fetch_instr
.prelude()) {
743 struct r600_bytecode_vtx vtx
;
744 memset(&vtx
, 0, sizeof(vtx
));
745 vtx
.op
= fetch_instr
.vc_opcode();
746 vtx
.buffer_id
= fetch_instr
.buffer_id() + buffer_offset
;
747 vtx
.fetch_type
= fetch_instr
.fetch_type();
748 vtx
.src_gpr
= fetch_instr
.src().sel();
749 vtx
.src_sel_x
= fetch_instr
.src().chan();
750 vtx
.mega_fetch_count
= fetch_instr
.mega_fetch_count();
751 vtx
.dst_gpr
= fetch_instr
.dst().sel();
752 vtx
.dst_sel_x
= fetch_instr
.swz(0); /* SEL_X */
753 vtx
.dst_sel_y
= fetch_instr
.swz(1); /* SEL_Y */
754 vtx
.dst_sel_z
= fetch_instr
.swz(2); /* SEL_Z */
755 vtx
.dst_sel_w
= fetch_instr
.swz(3); /* SEL_W */
756 vtx
.use_const_fields
= fetch_instr
.use_const_fields();
757 vtx
.data_format
= fetch_instr
.data_format();
758 vtx
.num_format_all
= fetch_instr
.num_format(); /* NUM_FORMAT_SCALED */
759 vtx
.format_comp_all
= fetch_instr
.is_signed(); /* FORMAT_COMP_SIGNED */
760 vtx
.endian
= fetch_instr
.endian_swap();
761 vtx
.buffer_index_mode
= index_mode
;
762 vtx
.offset
= fetch_instr
.offset();
763 vtx
.indexed
= fetch_instr
.indexed();
764 vtx
.uncached
= fetch_instr
.uncached();
765 vtx
.elem_size
= fetch_instr
.elm_size();
766 vtx
.array_base
= fetch_instr
.array_base();
767 vtx
.array_size
= fetch_instr
.array_size();
768 vtx
.srf_mode_all
= fetch_instr
.srf_mode_no_zero();
770 if (fetch_instr
.use_tc()) {
771 if ((r600_bytecode_add_vtx_tc(m_bc
, &vtx
))) {
772 R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
777 if ((r600_bytecode_add_vtx(m_bc
, &vtx
))) {
778 R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
783 m_bc
->cf_last
->vpm
= fetch_instr
.use_vpm();
784 m_bc
->cf_last
->barrier
= 1;
789 bool AssemblyFromShaderLegacyImpl::emit_emit_vertex(const EmitVertex
&instr
)
791 int r
= r600_bytecode_add_cfinst(m_bc
, instr
.op());
793 m_bc
->cf_last
->count
= instr
.stream();
794 assert(m_bc
->cf_last
->count
< 4);
799 bool AssemblyFromShaderLegacyImpl::emit_wait_ack(const WaitAck
& instr
)
801 int r
= r600_bytecode_add_cfinst(m_bc
, instr
.op());
803 m_bc
->cf_last
->cf_addr
= instr
.n_ack();
808 bool AssemblyFromShaderLegacyImpl::emit_wr_scratch(const WriteScratchInstruction
& instr
)
810 struct r600_bytecode_output cf
;
812 memset(&cf
, 0, sizeof(struct r600_bytecode_output
));
814 cf
.op
= CF_OP_MEM_SCRATCH
;
816 cf
.gpr
= instr
.gpr().sel();
818 cf
.comp_mask
= instr
.write_mask();
825 if (instr
.indirect()) {
827 cf
.index_gpr
= instr
.address();
829 /* The docu seems to be wrong here: In indirect addressing the
830 * address_base seems to be the array_size */
831 cf
.array_size
= instr
.array_size();
834 cf
.array_base
= instr
.location();
836 /* This should be 0, but the address calculation is apparently wrong */
839 if (r600_bytecode_add_output(m_bc
, &cf
)){
840 R600_ERR("shader_from_nir: Error creating SCRATCH_WR assembly instruction\n");
847 extern const std::map
<ESDOp
, int> ds_opcode_map
;
849 bool AssemblyFromShaderLegacyImpl::emit_gds(const GDSInstr
& instr
)
851 struct r600_bytecode_gds gds
;
854 auto addr
= instr
.uav_id();
855 if (addr
->type() != Value::literal
) {
856 if (!m_bc
->index_loaded
[1] || m_loop_nesting
||
857 m_bc
->index_reg
[1] != addr
->sel()) {
858 struct r600_bytecode_alu alu
;
860 memset(&alu
, 0, sizeof(alu
));
861 alu
.op
= opcode_map
.at(op2_lshr_int
);
862 alu
.dst
.sel
= addr
->sel();
863 alu
.dst
.chan
= addr
->chan();
864 alu
.src
[0].sel
= addr
->sel();
865 alu
.src
[0].chan
= addr
->chan();
866 alu
.src
[1].sel
= ALU_SRC_LITERAL
;
867 alu
.src
[1].value
= 2;
870 int r
= r600_bytecode_add_alu(m_bc
, &alu
);
874 memset(&alu
, 0, sizeof(alu
));
875 alu
.op
= opcode_map
.at(op1_mova_int
);
877 alu
.src
[0].sel
= addr
->sel();
878 alu
.src
[0].chan
= addr
->chan();
880 r
= r600_bytecode_add_alu(m_bc
, &alu
);
886 alu
.op
= opcode_map
.at(op1_set_cf_idx1
);
892 r
= r600_bytecode_add_alu(m_bc
, &alu
);
896 m_bc
->index_reg
[1] = addr
->sel();
897 m_bc
->index_loaded
[1] = true;
900 const LiteralValue
& addr_reg
= static_cast<const LiteralValue
&>(*addr
);
901 uav_idx
= addr_reg
.value() >> 2;
904 memset(&gds
, 0, sizeof(struct r600_bytecode_gds
));
906 gds
.op
= ds_opcode_map
.at(instr
.op());
907 gds
.dst_gpr
= instr
.dest_sel();
908 gds
.uav_id
= (uav_idx
>= 0 ? uav_idx
: 0) + instr
.uav_base();
909 gds
.uav_index_mode
= uav_idx
>= 0 ? bim_none
: bim_one
;
910 gds
.src_gpr
= instr
.src_sel();
912 if (instr
.op() == DS_OP_CMP_XCHG_RET
) {
918 gds
.src_sel_x
= instr
.src_swizzle(0);
919 gds
.src_sel_y
= instr
.src_swizzle(1);
926 gds
.alloc_consume
= 1; // Not Cayman
928 int r
= r600_bytecode_add_gds(m_bc
, &gds
);
931 m_bc
->cf_last
->vpm
= 1;
936 bool AssemblyFromShaderLegacyImpl::emit_rat(const RatInstruction
& instr
)
938 struct r600_bytecode_gds gds
;
941 EBufferIndexMode rat_index_mode
= bim_none
;
942 auto addr
= instr
.rat_id_offset();
945 if (addr
->type() != Value::literal
) {
946 rat_index_mode
= bim_one
;
947 if (!m_bc
->index_loaded
[1] || m_loop_nesting
|| m_bc
->index_reg
[1] != addr
->sel()) {
948 struct r600_bytecode_alu alu
;
950 memset(&alu
, 0, sizeof(alu
));
951 alu
.op
= opcode_map
.at(op1_mova_int
);
953 alu
.src
[0].sel
= addr
->sel();
954 alu
.src
[0].chan
= addr
->chan();
956 int r
= r600_bytecode_add_alu(m_bc
, &alu
);
962 alu
.op
= opcode_map
.at(op1_set_cf_idx1
);
968 r
= r600_bytecode_add_alu(m_bc
, &alu
);
972 m_bc
->index_reg
[1] = addr
->sel();
973 m_bc
->index_loaded
[1] = true;
977 const LiteralValue
& addr_reg
= static_cast<const LiteralValue
&>(*addr
);
978 rat_idx
= addr_reg
.value();
981 memset(&gds
, 0, sizeof(struct r600_bytecode_gds
));
983 r600_bytecode_add_cfinst(m_bc
, CF_OP_MEM_RAT
);
984 auto cf
= m_bc
->cf_last
;
985 cf
->rat
.id
= rat_idx
+ m_shader
->rat_base
;
986 cf
->rat
.inst
= instr
.rat_op();
987 cf
->rat
.index_mode
= rat_index_mode
;
988 cf
->output
.type
= instr
.need_ack() ? 3 : 1;
989 cf
->output
.gpr
= instr
.data_gpr();
990 cf
->output
.index_gpr
= instr
.index_gpr();
991 cf
->output
.comp_mask
= instr
.comp_mask();
992 cf
->output
.burst_count
= instr
.burst_count();
993 cf
->output
.swizzle_x
= instr
.data_swz(0);
994 cf
->output
.swizzle_y
= instr
.data_swz(1);
995 cf
->output
.swizzle_z
= instr
.data_swz(2);
996 cf
->output
.swizzle_w
= instr
.data_swz(3);
999 cf
->mark
= instr
.need_ack();
1000 cf
->output
.elem_size
= instr
.elm_size();
1004 bool AssemblyFromShaderLegacyImpl::copy_dst(r600_bytecode_alu_dst
& dst
,
1007 assert(d
.type() == Value::gpr
|| d
.type() == Value::gpr_array_value
);
1009 if (d
.sel() > 124) {
1010 R600_ERR("shader_from_nir: Don't support more then 124 GPRs, but try using %d\n", d
.sel());
1015 dst
.chan
= d
.chan();
1017 if (m_bc
->index_reg
[1] == dst
.sel
)
1018 m_bc
->index_loaded
[1] = false;
1020 if (m_bc
->index_reg
[0] == dst
.sel
)
1021 m_bc
->index_loaded
[0] = false;
1026 bool AssemblyFromShaderLegacyImpl::copy_src(r600_bytecode_alu_src
& src
, const Value
& s
)
1029 if (s
.type() == Value::gpr
&& s
.sel() > 124) {
1030 R600_ERR("shader_from_nir: Don't support more then 124 GPRs, try using %d\n", s
.sel());
1034 if (s
.type() == Value::lds_direct
) {
1035 R600_ERR("shader_from_nir: LDS_DIRECT values not supported\n");
1039 if (s
.type() == Value::kconst
&& s
.sel() < 512) {
1040 R600_ERR("shader_from_nir: Uniforms should have values >= 512, got %d \n", s
.sel());
1044 if (s
.type() == Value::literal
) {
1045 auto& v
= static_cast<const LiteralValue
&>(s
);
1046 if (v
.value() == 0) {
1047 src
.sel
= ALU_SRC_0
;
1051 if (v
.value() == 1) {
1052 src
.sel
= ALU_SRC_1_INT
;
1056 if (v
.value_float() == 1.0f
) {
1057 src
.sel
= ALU_SRC_1
;
1061 if (v
.value_float() == 0.5f
) {
1062 src
.sel
= ALU_SRC_0_5
;
1066 src
.value
= v
.value();
1070 src
.chan
= s
.chan();
1071 if (s
.type() == Value::kconst
) {
1072 const UniformValue
& cv
= static_cast<const UniformValue
&>(s
);
1073 src
.kc_bank
= cv
.kcache_bank();
1079 const std::map
<EAluOp
, int> opcode_map
= {
1081 {op2_add
, ALU_OP2_ADD
},
1082 {op2_mul
, ALU_OP2_MUL
},
1083 {op2_mul_ieee
, ALU_OP2_MUL_IEEE
},
1084 {op2_max
, ALU_OP2_MAX
},
1085 {op2_min
, ALU_OP2_MIN
},
1086 {op2_max_dx10
, ALU_OP2_MAX_DX10
},
1087 {op2_min_dx10
, ALU_OP2_MIN_DX10
},
1088 {op2_sete
, ALU_OP2_SETE
},
1089 {op2_setgt
, ALU_OP2_SETGT
},
1090 {op2_setge
, ALU_OP2_SETGE
},
1091 {op2_setne
, ALU_OP2_SETNE
},
1092 {op2_sete_dx10
, ALU_OP2_SETE_DX10
},
1093 {op2_setgt_dx10
, ALU_OP2_SETGT_DX10
},
1094 {op2_setge_dx10
, ALU_OP2_SETGE_DX10
},
1095 {op2_setne_dx10
, ALU_OP2_SETNE_DX10
},
1096 {op1_fract
, ALU_OP1_FRACT
},
1097 {op1_trunc
, ALU_OP1_TRUNC
},
1098 {op1_ceil
, ALU_OP1_CEIL
},
1099 {op1_rndne
, ALU_OP1_RNDNE
},
1100 {op1_floor
, ALU_OP1_FLOOR
},
1101 {op2_ashr_int
, ALU_OP2_ASHR_INT
},
1102 {op2_lshr_int
, ALU_OP2_LSHR_INT
},
1103 {op2_lshl_int
, ALU_OP2_LSHL_INT
},
1104 {op1_mov
, ALU_OP1_MOV
},
1105 {op0_nop
, ALU_OP0_NOP
},
1106 {op2_mul_64
, ALU_OP2_MUL_64
},
1107 {op1_flt64_to_flt32
, ALU_OP1_FLT64_TO_FLT32
},
1108 {op1v_flt64_to_flt32
, ALU_OP1_FLT32_TO_FLT64
},
1109 {op2_pred_setgt_uint
, ALU_OP2_PRED_SETGT_UINT
},
1110 {op2_pred_setge_uint
, ALU_OP2_PRED_SETGE_UINT
},
1111 {op2_pred_sete
, ALU_OP2_PRED_SETE
},
1112 {op2_pred_setgt
, ALU_OP2_PRED_SETGT
},
1113 {op2_pred_setge
, ALU_OP2_PRED_SETGE
},
1114 {op2_pred_setne
, ALU_OP2_PRED_SETNE
},
1115 //{op2_pred_set_inv, ALU_OP2_PRED_SET},
1116 //{op2_pred_set_clr, ALU_OP2_PRED_SET_CRL},
1117 //{op2_pred_set_restore, ALU_OP2_PRED_SET_RESTORE},
1118 {op2_pred_sete_push
, ALU_OP2_PRED_SETE_PUSH
},
1119 {op2_pred_setgt_push
, ALU_OP2_PRED_SETGT_PUSH
},
1120 {op2_pred_setge_push
, ALU_OP2_PRED_SETGE_PUSH
},
1121 {op2_pred_setne_push
, ALU_OP2_PRED_SETNE_PUSH
},
1122 {op2_kille
, ALU_OP2_KILLE
},
1123 {op2_killgt
, ALU_OP2_KILLGT
},
1124 {op2_killge
, ALU_OP2_KILLGE
},
1125 {op2_killne
, ALU_OP2_KILLNE
},
1126 {op2_and_int
, ALU_OP2_AND_INT
},
1127 {op2_or_int
, ALU_OP2_OR_INT
},
1128 {op2_xor_int
, ALU_OP2_XOR_INT
},
1129 {op1_not_int
, ALU_OP1_NOT_INT
},
1130 {op2_add_int
, ALU_OP2_ADD_INT
},
1131 {op2_sub_int
, ALU_OP2_SUB_INT
},
1132 {op2_max_int
, ALU_OP2_MAX_INT
},
1133 {op2_min_int
, ALU_OP2_MIN_INT
},
1134 {op2_max_uint
, ALU_OP2_MAX_UINT
},
1135 {op2_min_uint
, ALU_OP2_MIN_UINT
},
1136 {op2_sete_int
, ALU_OP2_SETE_INT
},
1137 {op2_setgt_int
, ALU_OP2_SETGT_INT
},
1138 {op2_setge_int
, ALU_OP2_SETGE_INT
},
1139 {op2_setne_int
, ALU_OP2_SETNE_INT
},
1140 {op2_setgt_uint
, ALU_OP2_SETGT_UINT
},
1141 {op2_setge_uint
, ALU_OP2_SETGE_UINT
},
1142 {op2_killgt_uint
, ALU_OP2_KILLGT_UINT
},
1143 {op2_killge_uint
, ALU_OP2_KILLGE_UINT
},
1144 //p2_prede_int, ALU_OP2_PREDE_INT},
1145 {op2_pred_setgt_int
, ALU_OP2_PRED_SETGT_INT
},
1146 {op2_pred_setge_int
, ALU_OP2_PRED_SETGE_INT
},
1147 {op2_pred_setne_int
, ALU_OP2_PRED_SETNE_INT
},
1148 {op2_kille_int
, ALU_OP2_KILLE_INT
},
1149 {op2_killgt_int
, ALU_OP2_KILLGT_INT
},
1150 {op2_killge_int
, ALU_OP2_KILLGE_INT
},
1151 {op2_killne_int
, ALU_OP2_KILLNE_INT
},
1152 {op2_pred_sete_push_int
, ALU_OP2_PRED_SETE_PUSH_INT
},
1153 {op2_pred_setgt_push_int
, ALU_OP2_PRED_SETGT_PUSH_INT
},
1154 {op2_pred_setge_push_int
, ALU_OP2_PRED_SETGE_PUSH_INT
},
1155 {op2_pred_setne_push_int
, ALU_OP2_PRED_SETNE_PUSH_INT
},
1156 {op2_pred_setlt_push_int
, ALU_OP2_PRED_SETLT_PUSH_INT
},
1157 {op2_pred_setle_push_int
, ALU_OP2_PRED_SETLE_PUSH_INT
},
1158 {op1_flt_to_int
, ALU_OP1_FLT_TO_INT
},
1159 {op1_bfrev_int
, ALU_OP1_BFREV_INT
},
1160 {op2_addc_uint
, ALU_OP2_ADDC_UINT
},
1161 {op2_subb_uint
, ALU_OP2_SUBB_UINT
},
1162 {op0_group_barrier
, ALU_OP0_GROUP_BARRIER
},
1163 {op0_group_seq_begin
, ALU_OP0_GROUP_SEQ_BEGIN
},
1164 {op0_group_seq_end
, ALU_OP0_GROUP_SEQ_END
},
1165 {op2_set_mode
, ALU_OP2_SET_MODE
},
1166 {op1_set_cf_idx0
, ALU_OP0_SET_CF_IDX0
},
1167 {op1_set_cf_idx1
, ALU_OP0_SET_CF_IDX1
},
1168 {op2_set_lds_size
, ALU_OP2_SET_LDS_SIZE
},
1169 {op1_exp_ieee
, ALU_OP1_EXP_IEEE
},
1170 {op1_log_clamped
, ALU_OP1_LOG_CLAMPED
},
1171 {op1_log_ieee
, ALU_OP1_LOG_IEEE
},
1172 {op1_recip_clamped
, ALU_OP1_RECIP_CLAMPED
},
1173 {op1_recip_ff
, ALU_OP1_RECIP_FF
},
1174 {op1_recip_ieee
, ALU_OP1_RECIP_IEEE
},
1175 {op1_recipsqrt_clamped
, ALU_OP1_RECIPSQRT_CLAMPED
},
1176 {op1_recipsqrt_ff
, ALU_OP1_RECIPSQRT_FF
},
1177 {op1_recipsqrt_ieee1
, ALU_OP1_RECIPSQRT_IEEE
},
1178 {op1_sqrt_ieee
, ALU_OP1_SQRT_IEEE
},
1179 {op1_sin
, ALU_OP1_SIN
},
1180 {op1_cos
, ALU_OP1_COS
},
1181 {op2_mullo_int
, ALU_OP2_MULLO_INT
},
1182 {op2_mulhi_int
, ALU_OP2_MULHI_INT
},
1183 {op2_mullo_uint
, ALU_OP2_MULLO_UINT
},
1184 {op2_mulhi_uint
, ALU_OP2_MULHI_UINT
},
1185 {op1_recip_int
, ALU_OP1_RECIP_INT
},
1186 {op1_recip_uint
, ALU_OP1_RECIP_UINT
},
1187 {op1_recip_64
, ALU_OP2_RECIP_64
},
1188 {op1_recip_clamped_64
, ALU_OP2_RECIP_CLAMPED_64
},
1189 {op1_recipsqrt_64
, ALU_OP2_RECIPSQRT_64
},
1190 {op1_recipsqrt_clamped_64
, ALU_OP2_RECIPSQRT_CLAMPED_64
},
1191 {op1_sqrt_64
, ALU_OP2_SQRT_64
},
1192 {op1_flt_to_uint
, ALU_OP1_FLT_TO_UINT
},
1193 {op1_int_to_flt
, ALU_OP1_INT_TO_FLT
},
1194 {op1_uint_to_flt
, ALU_OP1_UINT_TO_FLT
},
1195 {op2_bfm_int
, ALU_OP2_BFM_INT
},
1196 {op1_flt32_to_flt16
, ALU_OP1_FLT32_TO_FLT16
},
1197 {op1_flt16_to_flt32
, ALU_OP1_FLT16_TO_FLT32
},
1198 {op1_ubyte0_flt
, ALU_OP1_UBYTE0_FLT
},
1199 {op1_ubyte1_flt
, ALU_OP1_UBYTE1_FLT
},
1200 {op1_ubyte2_flt
, ALU_OP1_UBYTE2_FLT
},
1201 {op1_ubyte3_flt
, ALU_OP1_UBYTE3_FLT
},
1202 {op1_bcnt_int
, ALU_OP1_BCNT_INT
},
1203 {op1_ffbh_uint
, ALU_OP1_FFBH_UINT
},
1204 {op1_ffbl_int
, ALU_OP1_FFBL_INT
},
1205 {op1_ffbh_int
, ALU_OP1_FFBH_INT
},
1206 {op1_flt_to_uint4
, ALU_OP1_FLT_TO_UINT4
},
1207 {op2_dot_ieee
, ALU_OP2_DOT_IEEE
},
1208 {op1_flt_to_int_rpi
, ALU_OP1_FLT_TO_INT_RPI
},
1209 {op1_flt_to_int_floor
, ALU_OP1_FLT_TO_INT_FLOOR
},
1210 {op2_mulhi_uint24
, ALU_OP2_MULHI_UINT24
},
1211 {op1_mbcnt_32hi_int
, ALU_OP1_MBCNT_32HI_INT
},
1212 {op1_offset_to_flt
, ALU_OP1_OFFSET_TO_FLT
},
1213 {op2_mul_uint24
, ALU_OP2_MUL_UINT24
},
1214 {op1_bcnt_accum_prev_int
, ALU_OP1_BCNT_ACCUM_PREV_INT
},
1215 {op1_mbcnt_32lo_accum_prev_int
, ALU_OP1_MBCNT_32LO_ACCUM_PREV_INT
},
1216 {op2_sete_64
, ALU_OP2_SETE_64
},
1217 {op2_setne_64
, ALU_OP2_SETNE_64
},
1218 {op2_setgt_64
, ALU_OP2_SETGT_64
},
1219 {op2_setge_64
, ALU_OP2_SETGE_64
},
1220 {op2_min_64
, ALU_OP2_MIN_64
},
1221 {op2_max_64
, ALU_OP2_MAX_64
},
1222 {op2_dot4
, ALU_OP2_DOT4
},
1223 {op2_dot4_ieee
, ALU_OP2_DOT4_IEEE
},
1224 {op2_cube
, ALU_OP2_CUBE
},
1225 {op1_max4
, ALU_OP1_MAX4
},
1226 {op1_frexp_64
, ALU_OP1_FREXP_64
},
1227 {op1_ldexp_64
, ALU_OP2_LDEXP_64
},
1228 {op1_fract_64
, ALU_OP1_FRACT_64
},
1229 {op2_pred_setgt_64
, ALU_OP2_PRED_SETGT_64
},
1230 {op2_pred_sete_64
, ALU_OP2_PRED_SETE_64
},
1231 {op2_pred_setge_64
, ALU_OP2_PRED_SETGE_64
},
1232 {op2_add_64
, ALU_OP2_ADD_64
},
1233 {op1_mova_int
, ALU_OP1_MOVA_INT
},
1234 {op1v_flt64_to_flt32
, ALU_OP1_FLT64_TO_FLT32
},
1235 {op1_flt32_to_flt64
, ALU_OP1_FLT32_TO_FLT64
},
1236 {op2_sad_accum_prev_uint
, ALU_OP2_SAD_ACCUM_PREV_UINT
},
1237 {op2_dot
, ALU_OP2_DOT
},
1238 //p2_mul_prev, ALU_OP2_MUL_PREV},
1239 //p2_mul_ieee_prev, ALU_OP2_MUL_IEEE_PREV},
1240 //p2_add_prev, ALU_OP2_ADD_PREV},
1241 {op2_muladd_prev
, ALU_OP2_MULADD_PREV
},
1242 {op2_muladd_ieee_prev
, ALU_OP2_MULADD_IEEE_PREV
},
1243 {op2_interp_xy
, ALU_OP2_INTERP_XY
},
1244 {op2_interp_zw
, ALU_OP2_INTERP_ZW
},
1245 {op2_interp_x
, ALU_OP2_INTERP_X
},
1246 {op2_interp_z
, ALU_OP2_INTERP_Z
},
1247 {op0_store_flags
, ALU_OP1_STORE_FLAGS
},
1248 {op1_load_store_flags
, ALU_OP1_LOAD_STORE_FLAGS
},
1249 {op0_lds_1a
, ALU_OP2_LDS_1A
},
1250 {op0_lds_1a1d
, ALU_OP2_LDS_1A1D
},
1251 {op0_lds_2a
, ALU_OP2_LDS_2A
},
1252 {op1_interp_load_p0
, ALU_OP1_INTERP_LOAD_P0
},
1253 {op1_interp_load_p10
, ALU_OP1_INTERP_LOAD_P10
},
1254 {op1_interp_load_p20
, ALU_OP1_INTERP_LOAD_P20
},
1255 // {op 3 all left shift 6
1256 {op3_bfe_uint
, ALU_OP3_BFE_UINT
},
1257 {op3_bfe_int
, ALU_OP3_BFE_INT
},
1258 {op3_bfi_int
, ALU_OP3_BFI_INT
},
1259 {op3_fma
, ALU_OP3_FMA
},
1260 {op3_cndne_64
, ALU_OP3_CNDNE_64
},
1261 {op3_fma_64
, ALU_OP3_FMA_64
},
1262 {op3_lerp_uint
, ALU_OP3_LERP_UINT
},
1263 {op3_bit_align_int
, ALU_OP3_BIT_ALIGN_INT
},
1264 {op3_byte_align_int
, ALU_OP3_BYTE_ALIGN_INT
},
1265 {op3_sad_accum_uint
, ALU_OP3_SAD_ACCUM_UINT
},
1266 {op3_sad_accum_hi_uint
, ALU_OP3_SAD_ACCUM_HI_UINT
},
1267 {op3_muladd_uint24
, ALU_OP3_MULADD_UINT24
},
1268 {op3_lds_idx_op
, ALU_OP3_LDS_IDX_OP
},
1269 {op3_muladd
, ALU_OP3_MULADD
},
1270 {op3_muladd_m2
, ALU_OP3_MULADD_M2
},
1271 {op3_muladd_m4
, ALU_OP3_MULADD_M4
},
1272 {op3_muladd_d2
, ALU_OP3_MULADD_D2
},
1273 {op3_muladd_ieee
, ALU_OP3_MULADD_IEEE
},
1274 {op3_cnde
, ALU_OP3_CNDE
},
1275 {op3_cndgt
, ALU_OP3_CNDGT
},
1276 {op3_cndge
, ALU_OP3_CNDGE
},
1277 {op3_cnde_int
, ALU_OP3_CNDE_INT
},
1278 {op3_cndgt_int
, ALU_OP3_CNDGT_INT
},
1279 {op3_cndge_int
, ALU_OP3_CNDGE_INT
},
1280 {op3_mul_lit
, ALU_OP3_MUL_LIT
},
1283 const std::map
<ESDOp
, int> ds_opcode_map
= {
1284 {DS_OP_ADD
, FETCH_OP_GDS_ADD
},
1285 {DS_OP_SUB
, FETCH_OP_GDS_SUB
},
1286 {DS_OP_RSUB
, FETCH_OP_GDS_RSUB
},
1287 {DS_OP_INC
, FETCH_OP_GDS_INC
},
1288 {DS_OP_DEC
, FETCH_OP_GDS_DEC
},
1289 {DS_OP_MIN_INT
, FETCH_OP_GDS_MIN_INT
},
1290 {DS_OP_MAX_INT
, FETCH_OP_GDS_MAX_INT
},
1291 {DS_OP_MIN_UINT
, FETCH_OP_GDS_MIN_UINT
},
1292 {DS_OP_MAX_UINT
, FETCH_OP_GDS_MAX_UINT
},
1293 {DS_OP_AND
, FETCH_OP_GDS_AND
},
1294 {DS_OP_OR
, FETCH_OP_GDS_OR
},
1295 {DS_OP_XOR
, FETCH_OP_GDS_XOR
},
1296 {DS_OP_MSKOR
, FETCH_OP_GDS_MSKOR
},
1297 {DS_OP_WRITE
, FETCH_OP_GDS_WRITE
},
1298 {DS_OP_WRITE_REL
, FETCH_OP_GDS_WRITE_REL
},
1299 {DS_OP_WRITE2
, FETCH_OP_GDS_WRITE2
},
1300 {DS_OP_CMP_STORE
, FETCH_OP_GDS_CMP_STORE
},
1301 {DS_OP_CMP_STORE_SPF
, FETCH_OP_GDS_CMP_STORE_SPF
},
1302 {DS_OP_BYTE_WRITE
, FETCH_OP_GDS_BYTE_WRITE
},
1303 {DS_OP_SHORT_WRITE
, FETCH_OP_GDS_SHORT_WRITE
},
1304 {DS_OP_ADD_RET
, FETCH_OP_GDS_ADD_RET
},
1305 {DS_OP_SUB_RET
, FETCH_OP_GDS_SUB_RET
},
1306 {DS_OP_RSUB_RET
, FETCH_OP_GDS_RSUB_RET
},
1307 {DS_OP_INC_RET
, FETCH_OP_GDS_INC_RET
},
1308 {DS_OP_DEC_RET
, FETCH_OP_GDS_DEC_RET
},
1309 {DS_OP_MIN_INT_RET
, FETCH_OP_GDS_MIN_INT_RET
},
1310 {DS_OP_MAX_INT_RET
, FETCH_OP_GDS_MAX_INT_RET
},
1311 {DS_OP_MIN_UINT_RET
, FETCH_OP_GDS_MIN_UINT_RET
},
1312 {DS_OP_MAX_UINT_RET
, FETCH_OP_GDS_MAX_UINT_RET
},
1313 {DS_OP_AND_RET
, FETCH_OP_GDS_AND_RET
},
1314 {DS_OP_OR_RET
, FETCH_OP_GDS_OR_RET
},
1315 {DS_OP_XOR_RET
, FETCH_OP_GDS_XOR_RET
},
1316 {DS_OP_MSKOR_RET
, FETCH_OP_GDS_MSKOR_RET
},
1317 {DS_OP_XCHG_RET
, FETCH_OP_GDS_XCHG_RET
},
1318 {DS_OP_XCHG_REL_RET
, FETCH_OP_GDS_XCHG_REL_RET
},
1319 {DS_OP_XCHG2_RET
, FETCH_OP_GDS_XCHG2_RET
},
1320 {DS_OP_CMP_XCHG_RET
, FETCH_OP_GDS_CMP_XCHG_RET
},
1321 {DS_OP_CMP_XCHG_SPF_RET
, FETCH_OP_GDS_CMP_XCHG_SPF_RET
},
1322 {DS_OP_READ_RET
, FETCH_OP_GDS_READ_RET
},
1323 {DS_OP_READ_REL_RET
, FETCH_OP_GDS_READ_REL_RET
},
1324 {DS_OP_READ2_RET
, FETCH_OP_GDS_READ2_RET
},
1325 {DS_OP_READWRITE_RET
, FETCH_OP_GDS_READWRITE_RET
},
1326 {DS_OP_BYTE_READ_RET
, FETCH_OP_GDS_BYTE_READ_RET
},
1327 {DS_OP_UBYTE_READ_RET
, FETCH_OP_GDS_UBYTE_READ_RET
},
1328 {DS_OP_SHORT_READ_RET
, FETCH_OP_GDS_SHORT_READ_RET
},
1329 {DS_OP_USHORT_READ_RET
, FETCH_OP_GDS_USHORT_READ_RET
},
1330 {DS_OP_ATOMIC_ORDERED_ALLOC_RET
, FETCH_OP_GDS_ATOMIC_ORDERED_ALLOC
},