3 * Copyright (c) 2018 Collabora LTD
5 * Author: Gert Wollny <gert.wollny@collabora.com>
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
27 #include "sfn_ir_to_assembly.h"
28 #include "sfn_conditionaljumptracker.h"
29 #include "sfn_callstack.h"
30 #include "sfn_instruction_gds.h"
31 #include "sfn_instruction_misc.h"
32 #include "sfn_instruction_fetch.h"
33 #include "sfn_instruction_lds.h"
35 #include "../r600_shader.h"
36 #include "../r600_sq.h"
42 struct AssemblyFromShaderLegacyImpl
{
44 AssemblyFromShaderLegacyImpl(r600_shader
*sh
, r600_shader_key
*key
);
45 bool emit(const Instruction::Pointer i
);
46 void reset_addr_register() {m_last_addr
.reset();}
49 bool emit_alu(const AluInstruction
& ai
, ECFAluOpCode cf_op
);
50 bool emit_export(const ExportInstruction
& exi
);
51 bool emit_streamout(const StreamOutIntruction
& instr
);
52 bool emit_memringwrite(const MemRingOutIntruction
& instr
);
53 bool emit_tex(const TexInstruction
& tex_instr
);
54 bool emit_vtx(const FetchInstruction
& fetch_instr
);
55 bool emit_if_start(const IfInstruction
& if_instr
);
56 bool emit_else(const ElseInstruction
& else_instr
);
57 bool emit_endif(const IfElseEndInstruction
& endif_instr
);
58 bool emit_emit_vertex(const EmitVertex
&instr
);
60 bool emit_loop_begin(const LoopBeginInstruction
& instr
);
61 bool emit_loop_end(const LoopEndInstruction
& instr
);
62 bool emit_loop_break(const LoopBreakInstruction
& instr
);
63 bool emit_loop_continue(const LoopContInstruction
& instr
);
64 bool emit_wait_ack(const WaitAck
& instr
);
65 bool emit_wr_scratch(const WriteScratchInstruction
& instr
);
66 bool emit_gds(const GDSInstr
& instr
);
67 bool emit_rat(const RatInstruction
& instr
);
68 bool emit_ldswrite(const LDSWriteInstruction
& instr
);
69 bool emit_ldsread(const LDSReadInstruction
& instr
);
70 bool emit_ldsatomic(const LDSAtomicInstruction
& instr
);
71 bool emit_tf_write(const GDSStoreTessFactor
& instr
);
73 bool emit_load_addr(PValue addr
);
74 bool emit_fs_pixel_export(const ExportInstruction
& exi
);
75 bool emit_vs_pos_export(const ExportInstruction
& exi
);
76 bool emit_vs_param_export(const ExportInstruction
& exi
);
77 bool copy_dst(r600_bytecode_alu_dst
& dst
, const Value
& src
);
78 bool copy_src(r600_bytecode_alu_src
& src
, const Value
& s
);
82 ConditionalJumpTracker m_jump_tracker
;
83 CallStack m_callstack
;
87 r600_shader
*m_shader
;
88 r600_shader_key
*m_key
;
89 r600_bytecode_output m_output
;
90 unsigned m_max_color_exports
;
92 bool has_param_output
;
95 int m_nliterals_in_group
;
96 std::set
<int> vtx_fetch_results
;
100 AssemblyFromShaderLegacy::AssemblyFromShaderLegacy(struct r600_shader
*sh
,
101 r600_shader_key
*key
)
103 impl
= new AssemblyFromShaderLegacyImpl(sh
, key
);
106 AssemblyFromShaderLegacy::~AssemblyFromShaderLegacy()
111 bool AssemblyFromShaderLegacy::do_lower(const std::vector
<InstructionBlock
>& ir
)
113 if (impl
->m_shader
->processor_type
== PIPE_SHADER_VERTEX
&&
114 impl
->m_shader
->ninput
> 0)
115 r600_bytecode_add_cfinst(impl
->m_bc
, CF_OP_CALL_FS
);
118 std::vector
<Instruction::Pointer
> exports
;
120 for (const auto& block
: ir
) {
121 for (const auto& i
: block
) {
124 if (i
->type() != Instruction::alu
)
125 impl
->reset_addr_register();
129 for (const auto& i : exports) {
130 if (!impl->emit_export(static_cast<const ExportInstruction&>(*i)))
135 const struct cf_op_info
*last
= nullptr;
136 if (impl
->m_bc
->cf_last
)
137 last
= r600_isa_cf(impl
->m_bc
->cf_last
->op
);
139 /* alu clause instructions don't have EOP bit, so add NOP */
140 if (!last
|| last
->flags
& CF_ALU
|| impl
->m_bc
->cf_last
->op
== CF_OP_LOOP_END
141 || impl
->m_bc
->cf_last
->op
== CF_OP_POP
)
142 r600_bytecode_add_cfinst(impl
->m_bc
, CF_OP_NOP
);
144 /* A fetch shader only can't be EOP (results in hang), but we can replace it
146 else if (impl
->m_bc
->cf_last
->op
== CF_OP_CALL_FS
)
147 impl
->m_bc
->cf_last
->op
= CF_OP_NOP
;
149 if (impl
->m_shader
->bc
.chip_class
!= CAYMAN
)
150 impl
->m_bc
->cf_last
->end_of_program
= 1;
152 cm_bytecode_add_cf_end(impl
->m_bc
);
157 bool AssemblyFromShaderLegacyImpl::emit(const Instruction::Pointer i
)
159 if (i
->type() != Instruction::vtx
)
160 vtx_fetch_results
.clear();
162 sfn_log
<< SfnLog::assembly
<< "Emit from '" << *i
<< "\n";
164 case Instruction::alu
:
165 return emit_alu(static_cast<const AluInstruction
&>(*i
), cf_alu_undefined
);
166 case Instruction::exprt
:
167 return emit_export(static_cast<const ExportInstruction
&>(*i
));
168 case Instruction::tex
:
169 return emit_tex(static_cast<const TexInstruction
&>(*i
));
170 case Instruction::vtx
:
171 return emit_vtx(static_cast<const FetchInstruction
&>(*i
));
172 case Instruction::cond_if
:
173 return emit_if_start(static_cast<const IfInstruction
&>(*i
));
174 case Instruction::cond_else
:
175 return emit_else(static_cast<const ElseInstruction
&>(*i
));
176 case Instruction::cond_endif
:
177 return emit_endif(static_cast<const IfElseEndInstruction
&>(*i
));
178 case Instruction::loop_begin
:
179 return emit_loop_begin(static_cast<const LoopBeginInstruction
&>(*i
));
180 case Instruction::loop_end
:
181 return emit_loop_end(static_cast<const LoopEndInstruction
&>(*i
));
182 case Instruction::loop_break
:
183 return emit_loop_break(static_cast<const LoopBreakInstruction
&>(*i
));
184 case Instruction::loop_continue
:
185 return emit_loop_continue(static_cast<const LoopContInstruction
&>(*i
));
186 case Instruction::streamout
:
187 return emit_streamout(static_cast<const StreamOutIntruction
&>(*i
));
188 case Instruction::ring
:
189 return emit_memringwrite(static_cast<const MemRingOutIntruction
&>(*i
));
190 case Instruction::emit_vtx
:
191 return emit_emit_vertex(static_cast<const EmitVertex
&>(*i
));
192 case Instruction::wait_ack
:
193 return emit_wait_ack(static_cast<const WaitAck
&>(*i
));
194 case Instruction::mem_wr_scratch
:
195 return emit_wr_scratch(static_cast<const WriteScratchInstruction
&>(*i
));
196 case Instruction::gds
:
197 return emit_gds(static_cast<const GDSInstr
&>(*i
));
198 case Instruction::rat
:
199 return emit_rat(static_cast<const RatInstruction
&>(*i
));
200 case Instruction::lds_write
:
201 return emit_ldswrite(static_cast<const LDSWriteInstruction
&>(*i
));
202 case Instruction::lds_read
:
203 return emit_ldsread(static_cast<const LDSReadInstruction
&>(*i
));
204 case Instruction::lds_atomic
:
205 return emit_ldsatomic(static_cast<const LDSAtomicInstruction
&>(*i
));
206 case Instruction::tf_write
:
207 return emit_tf_write(static_cast<const GDSStoreTessFactor
&>(*i
));
213 AssemblyFromShaderLegacyImpl::AssemblyFromShaderLegacyImpl(r600_shader
*sh
,
214 r600_shader_key
*key
):
219 has_pos_output(false),
220 has_param_output(false),
222 m_nliterals_in_group(0)
224 m_max_color_exports
= MAX2(m_key
->ps
.nr_cbufs
, 1);
227 extern const std::map
<EAluOp
, int> opcode_map
;
229 bool AssemblyFromShaderLegacyImpl::emit_load_addr(PValue addr
)
231 m_bc
->ar_reg
= addr
->sel();
232 m_bc
->ar_chan
= addr
->chan();
236 sfn_log
<< SfnLog::assembly
<< " Prepare " << *addr
<< " to address register\n";
241 bool AssemblyFromShaderLegacyImpl::emit_alu(const AluInstruction
& ai
, ECFAluOpCode cf_op
)
244 struct r600_bytecode_alu alu
;
245 memset(&alu
, 0, sizeof(alu
));
248 if (opcode_map
.find(ai
.opcode()) == opcode_map
.end()) {
249 std::cerr
<< "Opcode not handled for " << ai
<<"\n";
253 unsigned old_nliterals_in_group
= m_nliterals_in_group
;
254 for (unsigned i
= 0; i
< ai
.n_sources(); ++i
) {
256 if (s
.type() == Value::literal
)
257 ++m_nliterals_in_group
;
260 /* This instruction group would exeed the limit of literals, so
261 * force a new instruction group by adding a NOP as last
262 * instruction. This will no loner be needed with a real
264 if (m_nliterals_in_group
> 4) {
265 sfn_log
<< SfnLog::assembly
<< " Have " << m_nliterals_in_group
<< " inject a last op (nop)\n";
266 alu
.op
= ALU_OP0_NOP
;
269 int retval
= r600_bytecode_add_alu(m_bc
, &alu
);
272 memset(&alu
, 0, sizeof(alu
));
273 m_nliterals_in_group
-= old_nliterals_in_group
;
276 alu
.op
= opcode_map
.at(ai
.opcode());
278 /* Missing test whether ai actually has a dest */
279 auto dst
= ai
.dest();
282 if (!copy_dst(alu
.dst
, *dst
))
285 alu
.dst
.write
= ai
.flag(alu_write
);
286 alu
.dst
.clamp
= ai
.flag(alu_dst_clamp
);
288 if (dst
->type() == Value::gpr_array_value
) {
289 auto& v
= static_cast<const GPRArrayValue
&>(*dst
);
290 PValue addr
= v
.indirect();
292 if (!m_last_addr
|| *addr
!= *m_last_addr
) {
293 emit_load_addr(addr
);
296 alu
.dst
.rel
= addr
? 1 : 0;;
301 alu
.is_op3
= ai
.n_sources() == 3;
303 for (unsigned i
= 0; i
< ai
.n_sources(); ++i
) {
306 if (!copy_src(alu
.src
[i
], s
))
308 alu
.src
[i
].neg
= ai
.flag(AluInstruction::src_neg_flags
[i
]);
310 if (s
.type() == Value::gpr_array_value
) {
311 auto& v
= static_cast<const GPRArrayValue
&>(s
);
312 PValue addr
= v
.indirect();
314 assert(!addr_in_use
|| (*addr_in_use
== *addr
));
315 if (!m_last_addr
|| *addr
!= *m_last_addr
) {
316 emit_load_addr(addr
);
319 alu
.src
[i
].rel
= addr
? 1 : 0;
323 alu
.src
[i
].abs
= ai
.flag(AluInstruction::src_abs_flags
[i
]);
326 if (ai
.bank_swizzle() != alu_vec_unknown
)
327 alu
.bank_swizzle_force
= ai
.bank_swizzle();
329 alu
.last
= ai
.flag(alu_last_instr
);
330 alu
.update_pred
= ai
.flag(alu_update_pred
);
331 alu
.execute_mask
= ai
.flag(alu_update_exec
);
333 /* If the destination register is equal to the last loaded address register
334 * then clear the latter one, because the values will no longer be identical */
336 sfn_log
<< SfnLog::assembly
<< " Current address register is " << *m_last_addr
<< "\n";
339 sfn_log
<< SfnLog::assembly
<< " Current dst register is " << *dst
<< "\n";
341 if (dst
&& m_last_addr
)
342 if (*dst
== *m_last_addr
) {
343 sfn_log
<< SfnLog::assembly
<< " Clear address register (was " << *m_last_addr
<< "\n";
347 if (cf_op
== cf_alu_undefined
)
348 cf_op
= ai
.cf_type();
352 case cf_alu
: type
= CF_OP_ALU
; break;
353 case cf_alu_push_before
: type
= CF_OP_ALU_PUSH_BEFORE
; break;
354 case cf_alu_pop_after
: type
= CF_OP_ALU_POP_AFTER
; break;
355 case cf_alu_pop2_after
: type
= CF_OP_ALU_POP2_AFTER
; break;
356 case cf_alu_break
: type
= CF_OP_ALU_BREAK
; break;
357 case cf_alu_else_after
: type
= CF_OP_ALU_ELSE_AFTER
; break;
358 case cf_alu_continue
: type
= CF_OP_ALU_CONTINUE
; break;
359 case cf_alu_extended
: type
= CF_OP_ALU_EXT
; break;
361 assert(0 && "cf_alu_undefined should have been replaced");
365 m_nliterals_in_group
= 0;
367 bool retval
= !r600_bytecode_add_alu_type(m_bc
, &alu
, type
);
369 if (ai
.opcode() == op1_mova_int
)
372 if (ai
.opcode() == op1_set_cf_idx0
)
373 m_bc
->index_loaded
[0] = 1;
375 if (ai
.opcode() == op1_set_cf_idx1
)
376 m_bc
->index_loaded
[1] = 1;
379 m_bc
->force_add_cf
|= (ai
.opcode() == op2_kille
||
380 ai
.opcode() == op2_killne_int
||
381 ai
.opcode() == op1_set_cf_idx0
||
382 ai
.opcode() == op1_set_cf_idx1
);
386 bool AssemblyFromShaderLegacyImpl::emit_vs_pos_export(const ExportInstruction
& exi
)
388 r600_bytecode_output output
;
389 memset(&output
, 0, sizeof(output
));
390 assert(exi
.gpr().type() == Value::gpr_vector
);
391 const auto& gpr
= exi
.gpr();
392 output
.gpr
= gpr
.sel();
393 output
.elem_size
= 3;
394 output
.swizzle_x
= gpr
.chan_i(0);
395 output
.swizzle_y
= gpr
.chan_i(1);
396 output
.swizzle_z
= gpr
.chan_i(2);
397 output
.swizzle_w
= gpr
.chan_i(3);
398 output
.burst_count
= 1;
399 output
.array_base
= 60 + exi
.location();
400 output
.op
= exi
.is_last_export() ? CF_OP_EXPORT_DONE
: CF_OP_EXPORT
;
401 output
.type
= exi
.export_type();
404 if (r600_bytecode_add_output(m_bc
, &output
)) {
405 R600_ERR("Error adding pixel export at location %d\n", exi
.location());
413 bool AssemblyFromShaderLegacyImpl::emit_vs_param_export(const ExportInstruction
& exi
)
415 r600_bytecode_output output
;
416 assert(exi
.gpr().type() == Value::gpr_vector
);
417 const auto& gpr
= exi
.gpr();
419 memset(&output
, 0, sizeof(output
));
420 output
.gpr
= gpr
.sel();
421 output
.elem_size
= 3;
422 output
.swizzle_x
= gpr
.chan_i(0);
423 output
.swizzle_y
= gpr
.chan_i(1);
424 output
.swizzle_z
= gpr
.chan_i(2);
425 output
.swizzle_w
= gpr
.chan_i(3);
426 output
.burst_count
= 1;
427 output
.array_base
= exi
.location();
428 output
.op
= exi
.is_last_export() ? CF_OP_EXPORT_DONE
: CF_OP_EXPORT
;
429 output
.type
= exi
.export_type();
432 if (r600_bytecode_add_output(m_bc
, &output
)) {
433 R600_ERR("Error adding pixel export at location %d\n", exi
.location());
441 bool AssemblyFromShaderLegacyImpl::emit_fs_pixel_export(const ExportInstruction
& exi
)
443 if (exi
.location() >= m_max_color_exports
&& exi
.location() < 60) {
444 R600_ERR("shader_from_nir: ignore pixel export %u, because supported max is %u\n",
445 exi
.location(), m_max_color_exports
);
449 assert(exi
.gpr().type() == Value::gpr_vector
);
450 const auto& gpr
= exi
.gpr();
452 r600_bytecode_output output
;
453 memset(&output
, 0, sizeof(output
));
455 output
.gpr
= gpr
.sel();
456 output
.elem_size
= 3;
457 output
.swizzle_x
= gpr
.chan_i(0);
458 output
.swizzle_y
= gpr
.chan_i(1);
459 output
.swizzle_z
= gpr
.chan_i(2);
460 output
.swizzle_w
= m_key
->ps
.alpha_to_one
? 5 : gpr
.chan_i(3); ;
461 output
.burst_count
= 1;
462 output
.array_base
= exi
.location();
463 output
.op
= exi
.is_last_export() ? CF_OP_EXPORT_DONE
: CF_OP_EXPORT
;
464 output
.type
= exi
.export_type();
467 if (r600_bytecode_add_output(m_bc
, &output
)) {
468 R600_ERR("Error adding pixel export at location %d\n", exi
.location());
476 bool AssemblyFromShaderLegacyImpl::emit_export(const ExportInstruction
& exi
)
478 switch (exi
.export_type()) {
479 case ExportInstruction::et_pixel
:
480 return emit_fs_pixel_export(exi
);
481 case ExportInstruction::et_pos
:
482 return emit_vs_pos_export(exi
);
483 case ExportInstruction::et_param
:
484 return emit_vs_param_export(exi
);
486 R600_ERR("shader_from_nir: export %d type not yet supported\n", exi
.export_type());
491 bool AssemblyFromShaderLegacyImpl::emit_if_start(const IfInstruction
& if_instr
)
493 bool needs_workaround
= false;
494 int elems
= m_callstack
.push(FC_PUSH_VPM
);
496 if (m_bc
->chip_class
== CAYMAN
&& m_bc
->stack
.loop
> 1)
497 needs_workaround
= true;
498 if (m_bc
->family
!= CHIP_HEMLOCK
&&
499 m_bc
->family
!= CHIP_CYPRESS
&&
500 m_bc
->family
!= CHIP_JUNIPER
) {
501 unsigned dmod1
= (elems
- 1) % m_bc
->stack
.entry_size
;
502 unsigned dmod2
= (elems
) % m_bc
->stack
.entry_size
;
504 if (elems
&& (!dmod1
|| !dmod2
))
505 needs_workaround
= true;
508 auto& pred
= if_instr
.pred();
509 auto op
= cf_alu_push_before
;
511 if (needs_workaround
) {
512 r600_bytecode_add_cfinst(m_bc
, CF_OP_PUSH
);
513 m_bc
->cf_last
->cf_addr
= m_bc
->cf_last
->id
+ 2;
518 r600_bytecode_add_cfinst(m_bc
, CF_OP_JUMP
);
520 m_jump_tracker
.push(m_bc
->cf_last
, jt_if
);
524 bool AssemblyFromShaderLegacyImpl::emit_else(UNUSED
const ElseInstruction
& else_instr
)
526 r600_bytecode_add_cfinst(m_bc
, CF_OP_ELSE
);
527 m_bc
->cf_last
->pop_count
= 1;
528 return m_jump_tracker
.add_mid(m_bc
->cf_last
, jt_if
);
531 bool AssemblyFromShaderLegacyImpl::emit_endif(UNUSED
const IfElseEndInstruction
& endif_instr
)
533 m_callstack
.pop(FC_PUSH_VPM
);
535 unsigned force_pop
= m_bc
->force_add_cf
;
539 if (m_bc
->cf_last
->op
== CF_OP_ALU
)
541 else if (m_bc
->cf_last
->op
== CF_OP_ALU_POP_AFTER
)
546 m_bc
->cf_last
->op
= CF_OP_ALU_POP_AFTER
;
547 m_bc
->force_add_cf
= 1;
548 } else if (alu_pop
== 2) {
549 m_bc
->cf_last
->op
= CF_OP_ALU_POP2_AFTER
;
550 m_bc
->force_add_cf
= 1;
557 r600_bytecode_add_cfinst(m_bc
, CF_OP_POP
);
558 m_bc
->cf_last
->pop_count
= 1;
559 m_bc
->cf_last
->cf_addr
= m_bc
->cf_last
->id
+ 2;
562 return m_jump_tracker
.pop(m_bc
->cf_last
, jt_if
);
565 bool AssemblyFromShaderLegacyImpl::emit_loop_begin(UNUSED
const LoopBeginInstruction
& instr
)
567 r600_bytecode_add_cfinst(m_bc
, CF_OP_LOOP_START_DX10
);
568 m_jump_tracker
.push(m_bc
->cf_last
, jt_loop
);
569 m_callstack
.push(FC_LOOP
);
574 bool AssemblyFromShaderLegacyImpl::emit_loop_end(UNUSED
const LoopEndInstruction
& instr
)
576 r600_bytecode_add_cfinst(m_bc
, CF_OP_LOOP_END
);
577 m_callstack
.pop(FC_LOOP
);
578 assert(m_loop_nesting
);
580 return m_jump_tracker
.pop(m_bc
->cf_last
, jt_loop
);
583 bool AssemblyFromShaderLegacyImpl::emit_loop_break(UNUSED
const LoopBreakInstruction
& instr
)
585 r600_bytecode_add_cfinst(m_bc
, CF_OP_LOOP_BREAK
);
586 return m_jump_tracker
.add_mid(m_bc
->cf_last
, jt_loop
);
589 bool AssemblyFromShaderLegacyImpl::emit_loop_continue(UNUSED
const LoopContInstruction
&instr
)
591 r600_bytecode_add_cfinst(m_bc
, CF_OP_LOOP_CONTINUE
);
592 return m_jump_tracker
.add_mid(m_bc
->cf_last
, jt_loop
);
595 bool AssemblyFromShaderLegacyImpl::emit_streamout(const StreamOutIntruction
& so_instr
)
597 struct r600_bytecode_output output
;
598 memset(&output
, 0, sizeof(struct r600_bytecode_output
));
600 output
.gpr
= so_instr
.gpr().sel();
601 output
.elem_size
= so_instr
.element_size();
602 output
.array_base
= so_instr
.array_base();
603 output
.type
= V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE
;
604 output
.burst_count
= so_instr
.burst_count();
605 output
.array_size
= so_instr
.array_size();
606 output
.comp_mask
= so_instr
.comp_mask();
607 output
.op
= so_instr
.op();
609 assert(output
.op
>= CF_OP_MEM_STREAM0_BUF0
&& output
.op
<= CF_OP_MEM_STREAM3_BUF3
);
612 if (r600_bytecode_add_output(m_bc
, &output
)) {
613 R600_ERR("shader_from_nir: Error creating stream output instruction\n");
620 bool AssemblyFromShaderLegacyImpl::emit_memringwrite(const MemRingOutIntruction
& instr
)
622 struct r600_bytecode_output output
;
623 memset(&output
, 0, sizeof(struct r600_bytecode_output
));
625 output
.gpr
= instr
.gpr().sel();
626 output
.type
= instr
.type();
627 output
.elem_size
= 3;
628 output
.comp_mask
= 0xf;
629 output
.burst_count
= 1;
630 output
.op
= instr
.op();
631 if (instr
.type() == mem_write_ind
|| instr
.type() == mem_write_ind_ack
) {
632 output
.index_gpr
= instr
.index_reg();
633 output
.array_size
= 0xfff;
635 output
.array_base
= instr
.array_base();
637 if (r600_bytecode_add_output(m_bc
, &output
)) {
638 R600_ERR("shader_from_nir: Error creating mem ring write instruction\n");
645 bool AssemblyFromShaderLegacyImpl::emit_tex(const TexInstruction
& tex_instr
)
647 auto addr
= tex_instr
.sampler_offset();
648 if (addr
&& (!m_bc
->index_loaded
[1] || m_loop_nesting
649 || m_bc
->index_reg
[1] != addr
->sel()
650 || m_bc
->index_reg_chan
[1] != addr
->chan())) {
651 struct r600_bytecode_alu alu
;
652 memset(&alu
, 0, sizeof(alu
));
653 alu
.op
= opcode_map
.at(op1_mova_int
);
655 alu
.src
[0].sel
= addr
->sel();
656 alu
.src
[0].chan
= addr
->chan();
658 int r
= r600_bytecode_add_alu(m_bc
, &alu
);
664 alu
.op
= opcode_map
.at(op1_set_cf_idx1
);
670 r
= r600_bytecode_add_alu(m_bc
, &alu
);
674 m_bc
->index_reg
[1] = addr
->sel();
675 m_bc
->index_reg_chan
[1] = addr
->chan();
676 m_bc
->index_loaded
[1] = true;
679 r600_bytecode_tex tex
;
680 memset(&tex
, 0, sizeof(struct r600_bytecode_tex
));
681 tex
.op
= tex_instr
.opcode();
682 tex
.sampler_id
= tex_instr
.sampler_id();
683 tex
.sampler_index_mode
= 0;
684 tex
.resource_id
= tex_instr
.resource_id();;
685 tex
.resource_index_mode
= 0;
686 tex
.src_gpr
= tex_instr
.src().sel();
687 tex
.dst_gpr
= tex_instr
.dst().sel();
688 tex
.dst_sel_x
= tex_instr
.dest_swizzle(0);
689 tex
.dst_sel_y
= tex_instr
.dest_swizzle(1);
690 tex
.dst_sel_z
= tex_instr
.dest_swizzle(2);
691 tex
.dst_sel_w
= tex_instr
.dest_swizzle(3);
692 tex
.src_sel_x
= tex_instr
.src().chan_i(0);
693 tex
.src_sel_y
= tex_instr
.src().chan_i(1);
694 tex
.src_sel_z
= tex_instr
.src().chan_i(2);
695 tex
.src_sel_w
= tex_instr
.src().chan_i(3);
696 tex
.coord_type_x
= !tex_instr
.has_flag(TexInstruction::x_unnormalized
);
697 tex
.coord_type_y
= !tex_instr
.has_flag(TexInstruction::y_unnormalized
);
698 tex
.coord_type_z
= !tex_instr
.has_flag(TexInstruction::z_unnormalized
);
699 tex
.coord_type_w
= !tex_instr
.has_flag(TexInstruction::w_unnormalized
);
700 tex
.offset_x
= tex_instr
.get_offset(0);
701 tex
.offset_y
= tex_instr
.get_offset(1);
702 tex
.offset_z
= tex_instr
.get_offset(2);
703 tex
.resource_index_mode
= (!!addr
) ? 2 : 0;
704 tex
.sampler_index_mode
= tex
.resource_index_mode
;
706 if (tex_instr
.opcode() == TexInstruction::get_gradient_h
||
707 tex_instr
.opcode() == TexInstruction::get_gradient_v
)
708 tex
.inst_mod
= tex_instr
.has_flag(TexInstruction::grad_fine
) ? 1 : 0;
710 tex
.inst_mod
= tex_instr
.inst_mode();
711 if (r600_bytecode_add_tex(m_bc
, &tex
)) {
712 R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
718 bool AssemblyFromShaderLegacyImpl::emit_vtx(const FetchInstruction
& fetch_instr
)
720 int buffer_offset
= 0;
721 auto addr
= fetch_instr
.buffer_offset();
722 auto index_mode
= fetch_instr
.buffer_index_mode();
725 if (addr
->type() == Value::literal
) {
726 const auto& boffs
= static_cast<const LiteralValue
&>(*addr
);
727 buffer_offset
= boffs
.value();
729 index_mode
= bim_zero
;
730 if ((!m_bc
->index_loaded
[0] || m_loop_nesting
||
731 m_bc
->index_reg
[0] != addr
->sel() ||
732 m_bc
->index_reg_chan
[0] != addr
->chan())) {
733 struct r600_bytecode_alu alu
;
734 memset(&alu
, 0, sizeof(alu
));
735 alu
.op
= opcode_map
.at(op1_mova_int
);
737 alu
.src
[0].sel
= addr
->sel();
738 alu
.src
[0].chan
= addr
->chan();
740 int r
= r600_bytecode_add_alu(m_bc
, &alu
);
746 alu
.op
= opcode_map
.at(op1_set_cf_idx0
);
752 r
= r600_bytecode_add_alu(m_bc
, &alu
);
756 m_bc
->index_reg
[0] = addr
->sel();
757 m_bc
->index_reg_chan
[0] = addr
->chan();
758 m_bc
->index_loaded
[0] = true;
763 if (fetch_instr
.has_prelude()) {
764 for(auto &i
: fetch_instr
.prelude()) {
770 if (vtx_fetch_results
.find(fetch_instr
.src().sel()) !=
771 vtx_fetch_results
.end()) {
772 m_bc
->force_add_cf
= 1;
773 vtx_fetch_results
.clear();
775 vtx_fetch_results
.insert(fetch_instr
.dst().sel());
777 struct r600_bytecode_vtx vtx
;
778 memset(&vtx
, 0, sizeof(vtx
));
779 vtx
.op
= fetch_instr
.vc_opcode();
780 vtx
.buffer_id
= fetch_instr
.buffer_id() + buffer_offset
;
781 vtx
.fetch_type
= fetch_instr
.fetch_type();
782 vtx
.src_gpr
= fetch_instr
.src().sel();
783 vtx
.src_sel_x
= fetch_instr
.src().chan();
784 vtx
.mega_fetch_count
= fetch_instr
.mega_fetch_count();
785 vtx
.dst_gpr
= fetch_instr
.dst().sel();
786 vtx
.dst_sel_x
= fetch_instr
.swz(0); /* SEL_X */
787 vtx
.dst_sel_y
= fetch_instr
.swz(1); /* SEL_Y */
788 vtx
.dst_sel_z
= fetch_instr
.swz(2); /* SEL_Z */
789 vtx
.dst_sel_w
= fetch_instr
.swz(3); /* SEL_W */
790 vtx
.use_const_fields
= fetch_instr
.use_const_fields();
791 vtx
.data_format
= fetch_instr
.data_format();
792 vtx
.num_format_all
= fetch_instr
.num_format(); /* NUM_FORMAT_SCALED */
793 vtx
.format_comp_all
= fetch_instr
.is_signed(); /* FORMAT_COMP_SIGNED */
794 vtx
.endian
= fetch_instr
.endian_swap();
795 vtx
.buffer_index_mode
= index_mode
;
796 vtx
.offset
= fetch_instr
.offset();
797 vtx
.indexed
= fetch_instr
.indexed();
798 vtx
.uncached
= fetch_instr
.uncached();
799 vtx
.elem_size
= fetch_instr
.elm_size();
800 vtx
.array_base
= fetch_instr
.array_base();
801 vtx
.array_size
= fetch_instr
.array_size();
802 vtx
.srf_mode_all
= fetch_instr
.srf_mode_no_zero();
804 if (fetch_instr
.use_tc()) {
805 if ((r600_bytecode_add_vtx_tc(m_bc
, &vtx
))) {
806 R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
811 if ((r600_bytecode_add_vtx(m_bc
, &vtx
))) {
812 R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
817 m_bc
->cf_last
->vpm
= fetch_instr
.use_vpm();
818 m_bc
->cf_last
->barrier
= 1;
823 bool AssemblyFromShaderLegacyImpl::emit_emit_vertex(const EmitVertex
&instr
)
825 int r
= r600_bytecode_add_cfinst(m_bc
, instr
.op());
827 m_bc
->cf_last
->count
= instr
.stream();
828 assert(m_bc
->cf_last
->count
< 4);
833 bool AssemblyFromShaderLegacyImpl::emit_wait_ack(const WaitAck
& instr
)
835 int r
= r600_bytecode_add_cfinst(m_bc
, instr
.op());
837 m_bc
->cf_last
->cf_addr
= instr
.n_ack();
842 bool AssemblyFromShaderLegacyImpl::emit_wr_scratch(const WriteScratchInstruction
& instr
)
844 struct r600_bytecode_output cf
;
846 memset(&cf
, 0, sizeof(struct r600_bytecode_output
));
848 cf
.op
= CF_OP_MEM_SCRATCH
;
850 cf
.gpr
= instr
.gpr().sel();
852 cf
.comp_mask
= instr
.write_mask();
859 if (instr
.indirect()) {
861 cf
.index_gpr
= instr
.address();
863 /* The docu seems to be wrong here: In indirect addressing the
864 * address_base seems to be the array_size */
865 cf
.array_size
= instr
.array_size();
868 cf
.array_base
= instr
.location();
870 /* This should be 0, but the address calculation is apparently wrong */
873 if (r600_bytecode_add_output(m_bc
, &cf
)){
874 R600_ERR("shader_from_nir: Error creating SCRATCH_WR assembly instruction\n");
881 extern const std::map
<ESDOp
, int> ds_opcode_map
;
883 bool AssemblyFromShaderLegacyImpl::emit_gds(const GDSInstr
& instr
)
885 struct r600_bytecode_gds gds
;
888 auto addr
= instr
.uav_id();
889 if (addr
->type() != Value::literal
) {
890 if (!m_bc
->index_loaded
[1] || m_loop_nesting
||
891 m_bc
->index_reg
[1] != addr
->sel()
892 || m_bc
->index_reg_chan
[1] != addr
->chan()) {
893 struct r600_bytecode_alu alu
;
895 memset(&alu
, 0, sizeof(alu
));
896 alu
.op
= opcode_map
.at(op2_lshr_int
);
897 alu
.dst
.sel
= addr
->sel();
898 alu
.dst
.chan
= addr
->chan();
899 alu
.src
[0].sel
= addr
->sel();
900 alu
.src
[0].chan
= addr
->chan();
901 alu
.src
[1].sel
= ALU_SRC_LITERAL
;
902 alu
.src
[1].value
= 2;
905 int r
= r600_bytecode_add_alu(m_bc
, &alu
);
909 memset(&alu
, 0, sizeof(alu
));
910 alu
.op
= opcode_map
.at(op1_mova_int
);
912 alu
.src
[0].sel
= addr
->sel();
913 alu
.src
[0].chan
= addr
->chan();
915 r
= r600_bytecode_add_alu(m_bc
, &alu
);
921 alu
.op
= opcode_map
.at(op1_set_cf_idx1
);
927 r
= r600_bytecode_add_alu(m_bc
, &alu
);
931 m_bc
->index_reg
[1] = addr
->sel();
932 m_bc
->index_reg_chan
[1] = addr
->chan();
933 m_bc
->index_loaded
[1] = true;
936 const LiteralValue
& addr_reg
= static_cast<const LiteralValue
&>(*addr
);
937 uav_idx
= addr_reg
.value() >> 2;
940 memset(&gds
, 0, sizeof(struct r600_bytecode_gds
));
942 gds
.op
= ds_opcode_map
.at(instr
.op());
943 gds
.dst_gpr
= instr
.dest_sel();
944 gds
.uav_id
= (uav_idx
>= 0 ? uav_idx
: 0) + instr
.uav_base();
945 gds
.uav_index_mode
= uav_idx
>= 0 ? bim_none
: bim_one
;
946 gds
.src_gpr
= instr
.src_sel();
948 gds
.src_sel_x
= instr
.src_swizzle(0);
949 gds
.src_sel_y
= instr
.src_swizzle(1);
950 gds
.src_sel_z
= instr
.src_swizzle(2);
952 gds
.dst_sel_x
= instr
.dest_swizzle(0);
957 gds
.alloc_consume
= 1; // Not Cayman
959 int r
= r600_bytecode_add_gds(m_bc
, &gds
);
962 m_bc
->cf_last
->vpm
= 1;
963 m_bc
->cf_last
->barrier
= 1;
967 bool AssemblyFromShaderLegacyImpl::emit_tf_write(const GDSStoreTessFactor
& instr
)
969 struct r600_bytecode_gds gds
;
971 memset(&gds
, 0, sizeof(struct r600_bytecode_gds
));
972 gds
.src_gpr
= instr
.sel();
973 gds
.src_sel_x
= instr
.chan(0);
974 gds
.src_sel_y
= instr
.chan(1);
980 gds
.op
= FETCH_OP_TF_WRITE
;
982 if (r600_bytecode_add_gds(m_bc
, &gds
) != 0)
985 if (instr
.chan(2) != 7) {
986 memset(&gds
, 0, sizeof(struct r600_bytecode_gds
));
987 gds
.src_gpr
= instr
.sel();
988 gds
.src_sel_x
= instr
.chan(2);
989 gds
.src_sel_y
= instr
.chan(3);
995 gds
.op
= FETCH_OP_TF_WRITE
;
997 if (r600_bytecode_add_gds(m_bc
, &gds
))
1003 bool AssemblyFromShaderLegacyImpl::emit_ldswrite(const LDSWriteInstruction
& instr
)
1005 r600_bytecode_alu alu
;
1006 memset(&alu
, 0, sizeof(r600_bytecode_alu
));
1009 alu
.is_lds_idx_op
= true;
1010 copy_src(alu
.src
[0], instr
.address());
1011 copy_src(alu
.src
[1], instr
.value0());
1013 if (instr
.num_components() == 1) {
1014 alu
.op
= LDS_OP2_LDS_WRITE
;
1016 alu
.op
= LDS_OP3_LDS_WRITE_REL
;
1018 copy_src(alu
.src
[2], instr
.value1());
1021 return r600_bytecode_add_alu(m_bc
, &alu
) == 0;
1024 bool AssemblyFromShaderLegacyImpl::emit_ldsread(const LDSReadInstruction
& instr
)
1028 unsigned nfetch
= 0;
1029 unsigned n_values
= instr
.num_values();
1031 r600_bytecode_alu alu_fetch
;
1032 r600_bytecode_alu alu_read
;
1034 /* We must add a new ALU clause if the fetch and read op would be split otherwise
1035 * r600_asm limites at 120 slots = 240 dwords */
1036 if (m_bc
->cf_last
->ndw
> 240 - 4 * n_values
)
1037 m_bc
->force_add_cf
= 1;
1039 while (nread
< n_values
) {
1040 if (nfetch
< n_values
) {
1041 memset(&alu_fetch
, 0, sizeof(r600_bytecode_alu
));
1042 alu_fetch
.is_lds_idx_op
= true;
1043 alu_fetch
.op
= LDS_OP1_LDS_READ_RET
;
1045 copy_src(alu_fetch
.src
[0], instr
.address(nfetch
));
1046 alu_fetch
.src
[1].sel
= V_SQ_ALU_SRC_0
;
1047 alu_fetch
.src
[2].sel
= V_SQ_ALU_SRC_0
;
1049 r
= r600_bytecode_add_alu(m_bc
, &alu_fetch
);
1050 m_bc
->cf_last
->nlds_read
++;
1055 if (nfetch
>= n_values
) {
1056 memset(&alu_read
, 0, sizeof(r600_bytecode_alu
));
1057 copy_dst(alu_read
.dst
, instr
.dest(nread
));
1058 alu_read
.op
= ALU_OP1_MOV
;
1059 alu_read
.src
[0].sel
= EG_V_SQ_ALU_SRC_LDS_OQ_A_POP
;
1061 alu_read
.dst
.write
= 1;
1062 r
= r600_bytecode_add_alu(m_bc
, &alu_read
);
1063 m_bc
->cf_last
->nqueue_read
++;
1070 assert(m_bc
->cf_last
->nlds_read
== m_bc
->cf_last
->nqueue_read
);
1075 bool AssemblyFromShaderLegacyImpl::emit_ldsatomic(const LDSAtomicInstruction
& instr
)
1077 if (m_bc
->cf_last
->ndw
> 240 - 4)
1078 m_bc
->force_add_cf
= 1;
1080 r600_bytecode_alu alu_fetch
;
1081 r600_bytecode_alu alu_read
;
1083 memset(&alu_fetch
, 0, sizeof(r600_bytecode_alu
));
1084 alu_fetch
.is_lds_idx_op
= true;
1085 alu_fetch
.op
= instr
.op();
1087 copy_src(alu_fetch
.src
[0], instr
.address());
1088 auto& src0
= instr
.src0();
1089 alu_fetch
.src
[1].sel
= src0
.sel();
1090 alu_fetch
.src
[1].chan
= src0
.chan();
1092 auto& src1
= *instr
.src1();
1093 alu_fetch
.src
[2].sel
= src1
.sel();
1094 alu_fetch
.src
[2].chan
= src1
.chan();
1097 int r
= r600_bytecode_add_alu(m_bc
, &alu_fetch
);
1101 memset(&alu_read
, 0, sizeof(r600_bytecode_alu
));
1102 copy_dst(alu_read
.dst
, instr
.dest());
1103 alu_read
.op
= ALU_OP1_MOV
;
1104 alu_read
.src
[0].sel
= EG_V_SQ_ALU_SRC_LDS_OQ_A_POP
;
1106 alu_read
.dst
.write
= 1;
1107 r
= r600_bytecode_add_alu(m_bc
, &alu_read
);
1113 bool AssemblyFromShaderLegacyImpl::emit_rat(const RatInstruction
& instr
)
1115 struct r600_bytecode_gds gds
;
1117 int rat_idx
= instr
.rat_id();
1118 EBufferIndexMode rat_index_mode
= bim_none
;
1119 auto addr
= instr
.rat_id_offset();
1122 if (addr
->type() != Value::literal
) {
1123 rat_index_mode
= bim_one
;
1124 if (!m_bc
->index_loaded
[1] || m_loop_nesting
||
1125 m_bc
->index_reg
[1] != addr
->sel()
1126 || m_bc
->index_reg_chan
[1] != addr
->chan()) {
1127 struct r600_bytecode_alu alu
;
1129 memset(&alu
, 0, sizeof(alu
));
1130 alu
.op
= opcode_map
.at(op1_mova_int
);
1132 alu
.src
[0].sel
= addr
->sel();
1133 alu
.src
[0].chan
= addr
->chan();
1135 int r
= r600_bytecode_add_alu(m_bc
, &alu
);
1139 m_bc
->ar_loaded
= 0;
1141 alu
.op
= opcode_map
.at(op1_set_cf_idx1
);
1144 alu
.src
[0].chan
= 0;
1147 r
= r600_bytecode_add_alu(m_bc
, &alu
);
1151 m_bc
->index_reg
[1] = addr
->sel();
1152 m_bc
->index_reg_chan
[1] = addr
->chan();
1153 m_bc
->index_loaded
[1] = true;
1157 const LiteralValue
& addr_reg
= static_cast<const LiteralValue
&>(*addr
);
1158 rat_idx
+= addr_reg
.value();
1161 memset(&gds
, 0, sizeof(struct r600_bytecode_gds
));
1163 r600_bytecode_add_cfinst(m_bc
, CF_OP_MEM_RAT
);
1164 auto cf
= m_bc
->cf_last
;
1165 cf
->rat
.id
= rat_idx
+ m_shader
->rat_base
;
1166 cf
->rat
.inst
= instr
.rat_op();
1167 cf
->rat
.index_mode
= rat_index_mode
;
1168 cf
->output
.type
= instr
.need_ack() ? 3 : 1;
1169 cf
->output
.gpr
= instr
.data_gpr();
1170 cf
->output
.index_gpr
= instr
.index_gpr();
1171 cf
->output
.comp_mask
= instr
.comp_mask();
1172 cf
->output
.burst_count
= instr
.burst_count();
1173 assert(instr
.data_swz(0) == PIPE_SWIZZLE_X
);
1174 if (cf
->rat
.inst
!= RatInstruction::STORE_TYPED
) {
1175 assert(instr
.data_swz(1) == PIPE_SWIZZLE_Y
||
1176 instr
.data_swz(1) == PIPE_SWIZZLE_MAX
) ;
1177 assert(instr
.data_swz(2) == PIPE_SWIZZLE_Z
||
1178 instr
.data_swz(2) == PIPE_SWIZZLE_MAX
) ;
1183 cf
->mark
= instr
.need_ack();
1184 cf
->output
.elem_size
= instr
.elm_size();
1188 bool AssemblyFromShaderLegacyImpl::copy_dst(r600_bytecode_alu_dst
& dst
,
1191 assert(d
.type() == Value::gpr
|| d
.type() == Value::gpr_array_value
);
1193 if (d
.sel() > 124) {
1194 R600_ERR("shader_from_nir: Don't support more then 124 GPRs, but try using %d\n", d
.sel());
1199 dst
.chan
= d
.chan();
1201 if (m_bc
->index_reg
[1] == dst
.sel
&&
1202 m_bc
->index_reg_chan
[1] == dst
.chan
)
1203 m_bc
->index_loaded
[1] = false;
1205 if (m_bc
->index_reg
[0] == dst
.sel
&&
1206 m_bc
->index_reg_chan
[0] == dst
.chan
)
1207 m_bc
->index_loaded
[0] = false;
1212 bool AssemblyFromShaderLegacyImpl::copy_src(r600_bytecode_alu_src
& src
, const Value
& s
)
1215 if (s
.type() == Value::gpr
&& s
.sel() > 124) {
1216 R600_ERR("shader_from_nir: Don't support more then 124 GPRs, try using %d\n", s
.sel());
1220 if (s
.type() == Value::lds_direct
) {
1221 R600_ERR("shader_from_nir: LDS_DIRECT values not supported\n");
1225 if (s
.type() == Value::kconst
&& s
.sel() < 512) {
1226 R600_ERR("shader_from_nir: Uniforms should have values >= 512, got %d \n", s
.sel());
1230 if (s
.type() == Value::literal
) {
1231 auto& v
= static_cast<const LiteralValue
&>(s
);
1232 if (v
.value() == 0) {
1233 src
.sel
= ALU_SRC_0
;
1235 --m_nliterals_in_group
;
1238 if (v
.value() == 1) {
1239 src
.sel
= ALU_SRC_1_INT
;
1241 --m_nliterals_in_group
;
1244 if (v
.value_float() == 1.0f
) {
1245 src
.sel
= ALU_SRC_1
;
1247 --m_nliterals_in_group
;
1250 if (v
.value_float() == 0.5f
) {
1251 src
.sel
= ALU_SRC_0_5
;
1253 --m_nliterals_in_group
;
1256 if (v
.value() == 0xffffffff) {
1257 src
.sel
= ALU_SRC_M_1_INT
;
1259 --m_nliterals_in_group
;
1262 src
.value
= v
.value();
1266 src
.chan
= s
.chan();
1267 if (s
.type() == Value::kconst
) {
1268 const UniformValue
& cv
= static_cast<const UniformValue
&>(s
);
1269 src
.kc_bank
= cv
.kcache_bank();
1275 const std::map
<EAluOp
, int> opcode_map
= {
1277 {op2_add
, ALU_OP2_ADD
},
1278 {op2_mul
, ALU_OP2_MUL
},
1279 {op2_mul_ieee
, ALU_OP2_MUL_IEEE
},
1280 {op2_max
, ALU_OP2_MAX
},
1281 {op2_min
, ALU_OP2_MIN
},
1282 {op2_max_dx10
, ALU_OP2_MAX_DX10
},
1283 {op2_min_dx10
, ALU_OP2_MIN_DX10
},
1284 {op2_sete
, ALU_OP2_SETE
},
1285 {op2_setgt
, ALU_OP2_SETGT
},
1286 {op2_setge
, ALU_OP2_SETGE
},
1287 {op2_setne
, ALU_OP2_SETNE
},
1288 {op2_sete_dx10
, ALU_OP2_SETE_DX10
},
1289 {op2_setgt_dx10
, ALU_OP2_SETGT_DX10
},
1290 {op2_setge_dx10
, ALU_OP2_SETGE_DX10
},
1291 {op2_setne_dx10
, ALU_OP2_SETNE_DX10
},
1292 {op1_fract
, ALU_OP1_FRACT
},
1293 {op1_trunc
, ALU_OP1_TRUNC
},
1294 {op1_ceil
, ALU_OP1_CEIL
},
1295 {op1_rndne
, ALU_OP1_RNDNE
},
1296 {op1_floor
, ALU_OP1_FLOOR
},
1297 {op2_ashr_int
, ALU_OP2_ASHR_INT
},
1298 {op2_lshr_int
, ALU_OP2_LSHR_INT
},
1299 {op2_lshl_int
, ALU_OP2_LSHL_INT
},
1300 {op1_mov
, ALU_OP1_MOV
},
1301 {op0_nop
, ALU_OP0_NOP
},
1302 {op2_mul_64
, ALU_OP2_MUL_64
},
1303 {op1v_flt64_to_flt32
, ALU_OP1_FLT64_TO_FLT32
},
1304 {op1v_flt32_to_flt64
, ALU_OP1_FLT32_TO_FLT64
},
1305 {op2_pred_setgt_uint
, ALU_OP2_PRED_SETGT_UINT
},
1306 {op2_pred_setge_uint
, ALU_OP2_PRED_SETGE_UINT
},
1307 {op2_pred_sete
, ALU_OP2_PRED_SETE
},
1308 {op2_pred_setgt
, ALU_OP2_PRED_SETGT
},
1309 {op2_pred_setge
, ALU_OP2_PRED_SETGE
},
1310 {op2_pred_setne
, ALU_OP2_PRED_SETNE
},
1311 //{op2_pred_set_inv, ALU_OP2_PRED_SET},
1312 //{op2_pred_set_clr, ALU_OP2_PRED_SET_CRL},
1313 //{op2_pred_set_restore, ALU_OP2_PRED_SET_RESTORE},
1314 {op2_pred_sete_push
, ALU_OP2_PRED_SETE_PUSH
},
1315 {op2_pred_setgt_push
, ALU_OP2_PRED_SETGT_PUSH
},
1316 {op2_pred_setge_push
, ALU_OP2_PRED_SETGE_PUSH
},
1317 {op2_pred_setne_push
, ALU_OP2_PRED_SETNE_PUSH
},
1318 {op2_kille
, ALU_OP2_KILLE
},
1319 {op2_killgt
, ALU_OP2_KILLGT
},
1320 {op2_killge
, ALU_OP2_KILLGE
},
1321 {op2_killne
, ALU_OP2_KILLNE
},
1322 {op2_and_int
, ALU_OP2_AND_INT
},
1323 {op2_or_int
, ALU_OP2_OR_INT
},
1324 {op2_xor_int
, ALU_OP2_XOR_INT
},
1325 {op1_not_int
, ALU_OP1_NOT_INT
},
1326 {op2_add_int
, ALU_OP2_ADD_INT
},
1327 {op2_sub_int
, ALU_OP2_SUB_INT
},
1328 {op2_max_int
, ALU_OP2_MAX_INT
},
1329 {op2_min_int
, ALU_OP2_MIN_INT
},
1330 {op2_max_uint
, ALU_OP2_MAX_UINT
},
1331 {op2_min_uint
, ALU_OP2_MIN_UINT
},
1332 {op2_sete_int
, ALU_OP2_SETE_INT
},
1333 {op2_setgt_int
, ALU_OP2_SETGT_INT
},
1334 {op2_setge_int
, ALU_OP2_SETGE_INT
},
1335 {op2_setne_int
, ALU_OP2_SETNE_INT
},
1336 {op2_setgt_uint
, ALU_OP2_SETGT_UINT
},
1337 {op2_setge_uint
, ALU_OP2_SETGE_UINT
},
1338 {op2_killgt_uint
, ALU_OP2_KILLGT_UINT
},
1339 {op2_killge_uint
, ALU_OP2_KILLGE_UINT
},
1340 //p2_prede_int, ALU_OP2_PREDE_INT},
1341 {op2_pred_setgt_int
, ALU_OP2_PRED_SETGT_INT
},
1342 {op2_pred_setge_int
, ALU_OP2_PRED_SETGE_INT
},
1343 {op2_pred_setne_int
, ALU_OP2_PRED_SETNE_INT
},
1344 {op2_kille_int
, ALU_OP2_KILLE_INT
},
1345 {op2_killgt_int
, ALU_OP2_KILLGT_INT
},
1346 {op2_killge_int
, ALU_OP2_KILLGE_INT
},
1347 {op2_killne_int
, ALU_OP2_KILLNE_INT
},
1348 {op2_pred_sete_push_int
, ALU_OP2_PRED_SETE_PUSH_INT
},
1349 {op2_pred_setgt_push_int
, ALU_OP2_PRED_SETGT_PUSH_INT
},
1350 {op2_pred_setge_push_int
, ALU_OP2_PRED_SETGE_PUSH_INT
},
1351 {op2_pred_setne_push_int
, ALU_OP2_PRED_SETNE_PUSH_INT
},
1352 {op2_pred_setlt_push_int
, ALU_OP2_PRED_SETLT_PUSH_INT
},
1353 {op2_pred_setle_push_int
, ALU_OP2_PRED_SETLE_PUSH_INT
},
1354 {op1_flt_to_int
, ALU_OP1_FLT_TO_INT
},
1355 {op1_bfrev_int
, ALU_OP1_BFREV_INT
},
1356 {op2_addc_uint
, ALU_OP2_ADDC_UINT
},
1357 {op2_subb_uint
, ALU_OP2_SUBB_UINT
},
1358 {op0_group_barrier
, ALU_OP0_GROUP_BARRIER
},
1359 {op0_group_seq_begin
, ALU_OP0_GROUP_SEQ_BEGIN
},
1360 {op0_group_seq_end
, ALU_OP0_GROUP_SEQ_END
},
1361 {op2_set_mode
, ALU_OP2_SET_MODE
},
1362 {op1_set_cf_idx0
, ALU_OP0_SET_CF_IDX0
},
1363 {op1_set_cf_idx1
, ALU_OP0_SET_CF_IDX1
},
1364 {op2_set_lds_size
, ALU_OP2_SET_LDS_SIZE
},
1365 {op1_exp_ieee
, ALU_OP1_EXP_IEEE
},
1366 {op1_log_clamped
, ALU_OP1_LOG_CLAMPED
},
1367 {op1_log_ieee
, ALU_OP1_LOG_IEEE
},
1368 {op1_recip_clamped
, ALU_OP1_RECIP_CLAMPED
},
1369 {op1_recip_ff
, ALU_OP1_RECIP_FF
},
1370 {op1_recip_ieee
, ALU_OP1_RECIP_IEEE
},
1371 {op1_recipsqrt_clamped
, ALU_OP1_RECIPSQRT_CLAMPED
},
1372 {op1_recipsqrt_ff
, ALU_OP1_RECIPSQRT_FF
},
1373 {op1_recipsqrt_ieee1
, ALU_OP1_RECIPSQRT_IEEE
},
1374 {op1_sqrt_ieee
, ALU_OP1_SQRT_IEEE
},
1375 {op1_sin
, ALU_OP1_SIN
},
1376 {op1_cos
, ALU_OP1_COS
},
1377 {op2_mullo_int
, ALU_OP2_MULLO_INT
},
1378 {op2_mulhi_int
, ALU_OP2_MULHI_INT
},
1379 {op2_mullo_uint
, ALU_OP2_MULLO_UINT
},
1380 {op2_mulhi_uint
, ALU_OP2_MULHI_UINT
},
1381 {op1_recip_int
, ALU_OP1_RECIP_INT
},
1382 {op1_recip_uint
, ALU_OP1_RECIP_UINT
},
1383 {op1_recip_64
, ALU_OP2_RECIP_64
},
1384 {op1_recip_clamped_64
, ALU_OP2_RECIP_CLAMPED_64
},
1385 {op1_recipsqrt_64
, ALU_OP2_RECIPSQRT_64
},
1386 {op1_recipsqrt_clamped_64
, ALU_OP2_RECIPSQRT_CLAMPED_64
},
1387 {op1_sqrt_64
, ALU_OP2_SQRT_64
},
1388 {op1_flt_to_uint
, ALU_OP1_FLT_TO_UINT
},
1389 {op1_int_to_flt
, ALU_OP1_INT_TO_FLT
},
1390 {op1_uint_to_flt
, ALU_OP1_UINT_TO_FLT
},
1391 {op2_bfm_int
, ALU_OP2_BFM_INT
},
1392 {op1_flt32_to_flt16
, ALU_OP1_FLT32_TO_FLT16
},
1393 {op1_flt16_to_flt32
, ALU_OP1_FLT16_TO_FLT32
},
1394 {op1_ubyte0_flt
, ALU_OP1_UBYTE0_FLT
},
1395 {op1_ubyte1_flt
, ALU_OP1_UBYTE1_FLT
},
1396 {op1_ubyte2_flt
, ALU_OP1_UBYTE2_FLT
},
1397 {op1_ubyte3_flt
, ALU_OP1_UBYTE3_FLT
},
1398 {op1_bcnt_int
, ALU_OP1_BCNT_INT
},
1399 {op1_ffbh_uint
, ALU_OP1_FFBH_UINT
},
1400 {op1_ffbl_int
, ALU_OP1_FFBL_INT
},
1401 {op1_ffbh_int
, ALU_OP1_FFBH_INT
},
1402 {op1_flt_to_uint4
, ALU_OP1_FLT_TO_UINT4
},
1403 {op2_dot_ieee
, ALU_OP2_DOT_IEEE
},
1404 {op1_flt_to_int_rpi
, ALU_OP1_FLT_TO_INT_RPI
},
1405 {op1_flt_to_int_floor
, ALU_OP1_FLT_TO_INT_FLOOR
},
1406 {op2_mulhi_uint24
, ALU_OP2_MULHI_UINT24
},
1407 {op1_mbcnt_32hi_int
, ALU_OP1_MBCNT_32HI_INT
},
1408 {op1_offset_to_flt
, ALU_OP1_OFFSET_TO_FLT
},
1409 {op2_mul_uint24
, ALU_OP2_MUL_UINT24
},
1410 {op1_bcnt_accum_prev_int
, ALU_OP1_BCNT_ACCUM_PREV_INT
},
1411 {op1_mbcnt_32lo_accum_prev_int
, ALU_OP1_MBCNT_32LO_ACCUM_PREV_INT
},
1412 {op2_sete_64
, ALU_OP2_SETE_64
},
1413 {op2_setne_64
, ALU_OP2_SETNE_64
},
1414 {op2_setgt_64
, ALU_OP2_SETGT_64
},
1415 {op2_setge_64
, ALU_OP2_SETGE_64
},
1416 {op2_min_64
, ALU_OP2_MIN_64
},
1417 {op2_max_64
, ALU_OP2_MAX_64
},
1418 {op2_dot4
, ALU_OP2_DOT4
},
1419 {op2_dot4_ieee
, ALU_OP2_DOT4_IEEE
},
1420 {op2_cube
, ALU_OP2_CUBE
},
1421 {op1_max4
, ALU_OP1_MAX4
},
1422 {op1_frexp_64
, ALU_OP1_FREXP_64
},
1423 {op1_ldexp_64
, ALU_OP2_LDEXP_64
},
1424 {op1_fract_64
, ALU_OP1_FRACT_64
},
1425 {op2_pred_setgt_64
, ALU_OP2_PRED_SETGT_64
},
1426 {op2_pred_sete_64
, ALU_OP2_PRED_SETE_64
},
1427 {op2_pred_setge_64
, ALU_OP2_PRED_SETGE_64
},
1428 {op2_add_64
, ALU_OP2_ADD_64
},
1429 {op1_mova_int
, ALU_OP1_MOVA_INT
},
1430 {op1v_flt64_to_flt32
, ALU_OP1_FLT64_TO_FLT32
},
1431 {op1_flt32_to_flt64
, ALU_OP1_FLT32_TO_FLT64
},
1432 {op2_sad_accum_prev_uint
, ALU_OP2_SAD_ACCUM_PREV_UINT
},
1433 {op2_dot
, ALU_OP2_DOT
},
1434 //p2_mul_prev, ALU_OP2_MUL_PREV},
1435 //p2_mul_ieee_prev, ALU_OP2_MUL_IEEE_PREV},
1436 //p2_add_prev, ALU_OP2_ADD_PREV},
1437 {op2_muladd_prev
, ALU_OP2_MULADD_PREV
},
1438 {op2_muladd_ieee_prev
, ALU_OP2_MULADD_IEEE_PREV
},
1439 {op2_interp_xy
, ALU_OP2_INTERP_XY
},
1440 {op2_interp_zw
, ALU_OP2_INTERP_ZW
},
1441 {op2_interp_x
, ALU_OP2_INTERP_X
},
1442 {op2_interp_z
, ALU_OP2_INTERP_Z
},
1443 {op0_store_flags
, ALU_OP1_STORE_FLAGS
},
1444 {op1_load_store_flags
, ALU_OP1_LOAD_STORE_FLAGS
},
1445 {op0_lds_1a
, ALU_OP2_LDS_1A
},
1446 {op0_lds_1a1d
, ALU_OP2_LDS_1A1D
},
1447 {op0_lds_2a
, ALU_OP2_LDS_2A
},
1448 {op1_interp_load_p0
, ALU_OP1_INTERP_LOAD_P0
},
1449 {op1_interp_load_p10
, ALU_OP1_INTERP_LOAD_P10
},
1450 {op1_interp_load_p20
, ALU_OP1_INTERP_LOAD_P20
},
1451 // {op 3 all left shift 6
1452 {op3_bfe_uint
, ALU_OP3_BFE_UINT
},
1453 {op3_bfe_int
, ALU_OP3_BFE_INT
},
1454 {op3_bfi_int
, ALU_OP3_BFI_INT
},
1455 {op3_fma
, ALU_OP3_FMA
},
1456 {op3_cndne_64
, ALU_OP3_CNDNE_64
},
1457 {op3_fma_64
, ALU_OP3_FMA_64
},
1458 {op3_lerp_uint
, ALU_OP3_LERP_UINT
},
1459 {op3_bit_align_int
, ALU_OP3_BIT_ALIGN_INT
},
1460 {op3_byte_align_int
, ALU_OP3_BYTE_ALIGN_INT
},
1461 {op3_sad_accum_uint
, ALU_OP3_SAD_ACCUM_UINT
},
1462 {op3_sad_accum_hi_uint
, ALU_OP3_SAD_ACCUM_HI_UINT
},
1463 {op3_muladd_uint24
, ALU_OP3_MULADD_UINT24
},
1464 {op3_lds_idx_op
, ALU_OP3_LDS_IDX_OP
},
1465 {op3_muladd
, ALU_OP3_MULADD
},
1466 {op3_muladd_m2
, ALU_OP3_MULADD_M2
},
1467 {op3_muladd_m4
, ALU_OP3_MULADD_M4
},
1468 {op3_muladd_d2
, ALU_OP3_MULADD_D2
},
1469 {op3_muladd_ieee
, ALU_OP3_MULADD_IEEE
},
1470 {op3_cnde
, ALU_OP3_CNDE
},
1471 {op3_cndgt
, ALU_OP3_CNDGT
},
1472 {op3_cndge
, ALU_OP3_CNDGE
},
1473 {op3_cnde_int
, ALU_OP3_CNDE_INT
},
1474 {op3_cndgt_int
, ALU_OP3_CNDGT_INT
},
1475 {op3_cndge_int
, ALU_OP3_CNDGE_INT
},
1476 {op3_mul_lit
, ALU_OP3_MUL_LIT
},
1479 const std::map
<ESDOp
, int> ds_opcode_map
= {
1480 {DS_OP_ADD
, FETCH_OP_GDS_ADD
},
1481 {DS_OP_SUB
, FETCH_OP_GDS_SUB
},
1482 {DS_OP_RSUB
, FETCH_OP_GDS_RSUB
},
1483 {DS_OP_INC
, FETCH_OP_GDS_INC
},
1484 {DS_OP_DEC
, FETCH_OP_GDS_DEC
},
1485 {DS_OP_MIN_INT
, FETCH_OP_GDS_MIN_INT
},
1486 {DS_OP_MAX_INT
, FETCH_OP_GDS_MAX_INT
},
1487 {DS_OP_MIN_UINT
, FETCH_OP_GDS_MIN_UINT
},
1488 {DS_OP_MAX_UINT
, FETCH_OP_GDS_MAX_UINT
},
1489 {DS_OP_AND
, FETCH_OP_GDS_AND
},
1490 {DS_OP_OR
, FETCH_OP_GDS_OR
},
1491 {DS_OP_XOR
, FETCH_OP_GDS_XOR
},
1492 {DS_OP_MSKOR
, FETCH_OP_GDS_MSKOR
},
1493 {DS_OP_WRITE
, FETCH_OP_GDS_WRITE
},
1494 {DS_OP_WRITE_REL
, FETCH_OP_GDS_WRITE_REL
},
1495 {DS_OP_WRITE2
, FETCH_OP_GDS_WRITE2
},
1496 {DS_OP_CMP_STORE
, FETCH_OP_GDS_CMP_STORE
},
1497 {DS_OP_CMP_STORE_SPF
, FETCH_OP_GDS_CMP_STORE_SPF
},
1498 {DS_OP_BYTE_WRITE
, FETCH_OP_GDS_BYTE_WRITE
},
1499 {DS_OP_SHORT_WRITE
, FETCH_OP_GDS_SHORT_WRITE
},
1500 {DS_OP_ADD_RET
, FETCH_OP_GDS_ADD_RET
},
1501 {DS_OP_SUB_RET
, FETCH_OP_GDS_SUB_RET
},
1502 {DS_OP_RSUB_RET
, FETCH_OP_GDS_RSUB_RET
},
1503 {DS_OP_INC_RET
, FETCH_OP_GDS_INC_RET
},
1504 {DS_OP_DEC_RET
, FETCH_OP_GDS_DEC_RET
},
1505 {DS_OP_MIN_INT_RET
, FETCH_OP_GDS_MIN_INT_RET
},
1506 {DS_OP_MAX_INT_RET
, FETCH_OP_GDS_MAX_INT_RET
},
1507 {DS_OP_MIN_UINT_RET
, FETCH_OP_GDS_MIN_UINT_RET
},
1508 {DS_OP_MAX_UINT_RET
, FETCH_OP_GDS_MAX_UINT_RET
},
1509 {DS_OP_AND_RET
, FETCH_OP_GDS_AND_RET
},
1510 {DS_OP_OR_RET
, FETCH_OP_GDS_OR_RET
},
1511 {DS_OP_XOR_RET
, FETCH_OP_GDS_XOR_RET
},
1512 {DS_OP_MSKOR_RET
, FETCH_OP_GDS_MSKOR_RET
},
1513 {DS_OP_XCHG_RET
, FETCH_OP_GDS_XCHG_RET
},
1514 {DS_OP_XCHG_REL_RET
, FETCH_OP_GDS_XCHG_REL_RET
},
1515 {DS_OP_XCHG2_RET
, FETCH_OP_GDS_XCHG2_RET
},
1516 {DS_OP_CMP_XCHG_RET
, FETCH_OP_GDS_CMP_XCHG_RET
},
1517 {DS_OP_CMP_XCHG_SPF_RET
, FETCH_OP_GDS_CMP_XCHG_SPF_RET
},
1518 {DS_OP_READ_RET
, FETCH_OP_GDS_READ_RET
},
1519 {DS_OP_READ_REL_RET
, FETCH_OP_GDS_READ_REL_RET
},
1520 {DS_OP_READ2_RET
, FETCH_OP_GDS_READ2_RET
},
1521 {DS_OP_READWRITE_RET
, FETCH_OP_GDS_READWRITE_RET
},
1522 {DS_OP_BYTE_READ_RET
, FETCH_OP_GDS_BYTE_READ_RET
},
1523 {DS_OP_UBYTE_READ_RET
, FETCH_OP_GDS_UBYTE_READ_RET
},
1524 {DS_OP_SHORT_READ_RET
, FETCH_OP_GDS_SHORT_READ_RET
},
1525 {DS_OP_USHORT_READ_RET
, FETCH_OP_GDS_USHORT_READ_RET
},
1526 {DS_OP_ATOMIC_ORDERED_ALLOC_RET
, FETCH_OP_GDS_ATOMIC_ORDERED_ALLOC
},