r600/sfn: Fix GDS assembly emission
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_ir_to_assembly.cpp
1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_ir_to_assembly.h"
28 #include "sfn_conditionaljumptracker.h"
29 #include "sfn_callstack.h"
30 #include "sfn_instruction_gds.h"
31 #include "sfn_instruction_misc.h"
32 #include "sfn_instruction_fetch.h"
33 #include "sfn_instruction_lds.h"
34
35 #include "../r600_shader.h"
36 #include "../r600_sq.h"
37
38 namespace r600 {
39
40 using std::vector;
41
42 struct AssemblyFromShaderLegacyImpl {
43
44 AssemblyFromShaderLegacyImpl(r600_shader *sh, r600_shader_key *key);
45 bool emit(const Instruction::Pointer i);
46 void reset_addr_register() {m_last_addr.reset();}
47
48 private:
49 bool emit_alu(const AluInstruction& ai, ECFAluOpCode cf_op);
50 bool emit_export(const ExportInstruction & exi);
51 bool emit_streamout(const StreamOutIntruction& instr);
52 bool emit_memringwrite(const MemRingOutIntruction& instr);
53 bool emit_tex(const TexInstruction & tex_instr);
54 bool emit_vtx(const FetchInstruction& fetch_instr);
55 bool emit_if_start(const IfInstruction & if_instr);
56 bool emit_else(const ElseInstruction & else_instr);
57 bool emit_endif(const IfElseEndInstruction & endif_instr);
58 bool emit_emit_vertex(const EmitVertex &instr);
59
60 bool emit_loop_begin(const LoopBeginInstruction& instr);
61 bool emit_loop_end(const LoopEndInstruction& instr);
62 bool emit_loop_break(const LoopBreakInstruction& instr);
63 bool emit_loop_continue(const LoopContInstruction& instr);
64 bool emit_wait_ack(const WaitAck& instr);
65 bool emit_wr_scratch(const WriteScratchInstruction& instr);
66 bool emit_gds(const GDSInstr& instr);
67 bool emit_rat(const RatInstruction& instr);
68 bool emit_ldswrite(const LDSWriteInstruction& instr);
69 bool emit_ldsread(const LDSReadInstruction& instr);
70 bool emit_tf_write(const GDSStoreTessFactor& instr);
71
72 bool emit_load_addr(PValue addr);
73 bool emit_fs_pixel_export(const ExportInstruction & exi);
74 bool emit_vs_pos_export(const ExportInstruction & exi);
75 bool emit_vs_param_export(const ExportInstruction & exi);
76 bool copy_dst(r600_bytecode_alu_dst& dst, const Value& src);
77 bool copy_src(r600_bytecode_alu_src& src, const Value& s);
78
79
80
81 ConditionalJumpTracker m_jump_tracker;
82 CallStack m_callstack;
83
84 public:
85 r600_bytecode *m_bc;
86 r600_shader *m_shader;
87 r600_shader_key *m_key;
88 r600_bytecode_output m_output;
89 unsigned m_max_color_exports;
90 bool has_pos_output;
91 bool has_param_output;
92 PValue m_last_addr;
93 int m_loop_nesting;
94 int m_nliterals_in_group;
95 std::set<int> vtx_fetch_results;
96 };
97
98
99 AssemblyFromShaderLegacy::AssemblyFromShaderLegacy(struct r600_shader *sh,
100 r600_shader_key *key)
101 {
102 impl = new AssemblyFromShaderLegacyImpl(sh, key);
103 }
104
105 AssemblyFromShaderLegacy::~AssemblyFromShaderLegacy()
106 {
107 delete impl;
108 }
109
110 bool AssemblyFromShaderLegacy::do_lower(const std::vector<InstructionBlock>& ir)
111 {
112 if (impl->m_shader->processor_type == PIPE_SHADER_VERTEX &&
113 impl->m_shader->ninput > 0)
114 r600_bytecode_add_cfinst(impl->m_bc, CF_OP_CALL_FS);
115
116
117 std::vector<Instruction::Pointer> exports;
118
119 for (const auto& block : ir) {
120 for (const auto& i : block) {
121 if (!impl->emit(i))
122 return false;
123 if (i->type() != Instruction::alu)
124 impl->reset_addr_register();
125 }
126 }
127 /*
128 for (const auto& i : exports) {
129 if (!impl->emit_export(static_cast<const ExportInstruction&>(*i)))
130 return false;
131 }*/
132
133
134 const struct cf_op_info *last = nullptr;
135 if (impl->m_bc->cf_last)
136 last = r600_isa_cf(impl->m_bc->cf_last->op);
137
138 /* alu clause instructions don't have EOP bit, so add NOP */
139 if (!last || last->flags & CF_ALU || impl->m_bc->cf_last->op == CF_OP_LOOP_END
140 || impl->m_bc->cf_last->op == CF_OP_POP)
141 r600_bytecode_add_cfinst(impl->m_bc, CF_OP_NOP);
142
143 /* A fetch shader only can't be EOP (results in hang), but we can replace it
144 * by a NOP */
145 else if (impl->m_bc->cf_last->op == CF_OP_CALL_FS)
146 impl->m_bc->cf_last->op = CF_OP_NOP;
147
148 if (impl->m_shader->bc.chip_class != CAYMAN)
149 impl->m_bc->cf_last->end_of_program = 1;
150 else
151 cm_bytecode_add_cf_end(impl->m_bc);
152
153 return true;
154 }
155
156 bool AssemblyFromShaderLegacyImpl::emit(const Instruction::Pointer i)
157 {
158 if (i->type() != Instruction::vtx)
159 vtx_fetch_results.clear();
160
161 sfn_log << SfnLog::assembly << "Emit from '" << *i << "\n";
162 switch (i->type()) {
163 case Instruction::alu:
164 return emit_alu(static_cast<const AluInstruction&>(*i), cf_alu_undefined);
165 case Instruction::exprt:
166 return emit_export(static_cast<const ExportInstruction&>(*i));
167 case Instruction::tex:
168 return emit_tex(static_cast<const TexInstruction&>(*i));
169 case Instruction::vtx:
170 return emit_vtx(static_cast<const FetchInstruction&>(*i));
171 case Instruction::cond_if:
172 return emit_if_start(static_cast<const IfInstruction&>(*i));
173 case Instruction::cond_else:
174 return emit_else(static_cast<const ElseInstruction&>(*i));
175 case Instruction::cond_endif:
176 return emit_endif(static_cast<const IfElseEndInstruction&>(*i));
177 case Instruction::loop_begin:
178 return emit_loop_begin(static_cast<const LoopBeginInstruction&>(*i));
179 case Instruction::loop_end:
180 return emit_loop_end(static_cast<const LoopEndInstruction&>(*i));
181 case Instruction::loop_break:
182 return emit_loop_break(static_cast<const LoopBreakInstruction&>(*i));
183 case Instruction::loop_continue:
184 return emit_loop_continue(static_cast<const LoopContInstruction&>(*i));
185 case Instruction::streamout:
186 return emit_streamout(static_cast<const StreamOutIntruction&>(*i));
187 case Instruction::ring:
188 return emit_memringwrite(static_cast<const MemRingOutIntruction&>(*i));
189 case Instruction::emit_vtx:
190 return emit_emit_vertex(static_cast<const EmitVertex&>(*i));
191 case Instruction::wait_ack:
192 return emit_wait_ack(static_cast<const WaitAck&>(*i));
193 case Instruction::mem_wr_scratch:
194 return emit_wr_scratch(static_cast<const WriteScratchInstruction&>(*i));
195 case Instruction::gds:
196 return emit_gds(static_cast<const GDSInstr&>(*i));
197 case Instruction::rat:
198 return emit_rat(static_cast<const RatInstruction&>(*i));
199 case Instruction::lds_write:
200 return emit_ldswrite(static_cast<const LDSWriteInstruction&>(*i));
201 case Instruction::lds_read:
202 return emit_ldsread(static_cast<const LDSReadInstruction&>(*i));
203 case Instruction::tf_write:
204 return emit_tf_write(static_cast<const GDSStoreTessFactor&>(*i));
205 default:
206 return false;
207 }
208 }
209
210 AssemblyFromShaderLegacyImpl::AssemblyFromShaderLegacyImpl(r600_shader *sh,
211 r600_shader_key *key):
212 m_callstack(sh->bc),
213 m_bc(&sh->bc),
214 m_shader(sh),
215 m_key(key),
216 has_pos_output(false),
217 has_param_output(false),
218 m_loop_nesting(0),
219 m_nliterals_in_group(0)
220 {
221 m_max_color_exports = MAX2(m_key->ps.nr_cbufs, 1);
222 }
223
224 extern const std::map<EAluOp, int> opcode_map;
225
226 bool AssemblyFromShaderLegacyImpl::emit_load_addr(PValue addr)
227 {
228 m_bc->ar_reg = addr->sel();
229 m_bc->ar_chan = addr->chan();
230 m_bc->ar_loaded = 0;
231 m_last_addr = addr;
232
233 sfn_log << SfnLog::assembly << " Prepare " << *addr << " to address register\n";
234
235 return true;
236 }
237
238 bool AssemblyFromShaderLegacyImpl::emit_alu(const AluInstruction& ai, ECFAluOpCode cf_op)
239 {
240
241 struct r600_bytecode_alu alu;
242 memset(&alu, 0, sizeof(alu));
243 PValue addr_in_use;
244
245 if (opcode_map.find(ai.opcode()) == opcode_map.end()) {
246 std::cerr << "Opcode not handled for " << ai <<"\n";
247 return false;
248 }
249
250 for (unsigned i = 0; i < ai.n_sources(); ++i) {
251 auto& s = ai.src(i);
252 if (s.type() == Value::literal)
253 ++m_nliterals_in_group;
254 }
255
256 /* This instruction group would exeed the limit of literals, so
257 * force a new instruction group by adding a NOP as last
258 * instruction. This will no loner be needed with a real
259 * scheduler */
260 if (m_nliterals_in_group > 4) {
261 sfn_log << SfnLog::assembly << " Have " << m_nliterals_in_group << " inject a last op (nop)\n";
262 alu.op = ALU_OP0_NOP;
263 alu.last = 1;
264 int retval = r600_bytecode_add_alu(m_bc, &alu);
265 if (retval)
266 return false;
267 memset(&alu, 0, sizeof(alu));
268 m_nliterals_in_group = 0;
269 }
270
271 alu.op = opcode_map.at(ai.opcode());
272
273 /* Missing test whether ai actually has a dest */
274 auto dst = ai.dest();
275
276 if (dst) {
277 if (!copy_dst(alu.dst, *dst))
278 return false;
279
280 alu.dst.write = ai.flag(alu_write);
281 alu.dst.clamp = ai.flag(alu_dst_clamp);
282
283 if (dst->type() == Value::gpr_array_value) {
284 auto& v = static_cast<const GPRArrayValue&>(*dst);
285 PValue addr = v.indirect();
286 if (addr) {
287 if (!m_last_addr || *addr != *m_last_addr) {
288 emit_load_addr(addr);
289 addr_in_use = addr;
290 }
291 alu.dst.rel = addr ? 1 : 0;;
292 }
293 }
294 }
295
296 alu.is_op3 = ai.n_sources() == 3;
297
298 for (unsigned i = 0; i < ai.n_sources(); ++i) {
299 auto& s = ai.src(i);
300
301 if (!copy_src(alu.src[i], s))
302 return false;
303 alu.src[i].neg = ai.flag(AluInstruction::src_neg_flags[i]);
304
305 if (s.type() == Value::gpr_array_value) {
306 auto& v = static_cast<const GPRArrayValue&>(s);
307 PValue addr = v.indirect();
308 if (addr) {
309 assert(!addr_in_use || (*addr_in_use == *addr));
310 if (!m_last_addr || *addr != *m_last_addr) {
311 emit_load_addr(addr);
312 addr_in_use = addr;
313 }
314 alu.src[i].rel = addr ? 1 : 0;
315 }
316 }
317 if (!alu.is_op3)
318 alu.src[i].abs = ai.flag(AluInstruction::src_abs_flags[i]);
319 }
320
321 if (ai.bank_swizzle() != alu_vec_unknown)
322 alu.bank_swizzle_force = ai.bank_swizzle();
323
324 alu.last = ai.flag(alu_last_instr);
325 alu.update_pred = ai.flag(alu_update_pred);
326 alu.execute_mask = ai.flag(alu_update_exec);
327
328 /* If the destination register is equal to the last loaded address register
329 * then clear the latter one, because the values will no longer be identical */
330 if (m_last_addr)
331 sfn_log << SfnLog::assembly << " Current address register is " << *m_last_addr << "\n";
332
333 if (dst)
334 sfn_log << SfnLog::assembly << " Current dst register is " << *dst << "\n";
335
336 if (dst && m_last_addr)
337 if (*dst == *m_last_addr) {
338 sfn_log << SfnLog::assembly << " Clear address register (was " << *m_last_addr << "\n";
339 m_last_addr.reset();
340 }
341
342 if (cf_op == cf_alu_undefined)
343 cf_op = ai.cf_type();
344
345 unsigned type = 0;
346 switch (cf_op) {
347 case cf_alu: type = CF_OP_ALU; break;
348 case cf_alu_push_before: type = CF_OP_ALU_PUSH_BEFORE; break;
349 case cf_alu_pop_after: type = CF_OP_ALU_POP_AFTER; break;
350 case cf_alu_pop2_after: type = CF_OP_ALU_POP2_AFTER; break;
351 case cf_alu_break: type = CF_OP_ALU_BREAK; break;
352 case cf_alu_else_after: type = CF_OP_ALU_ELSE_AFTER; break;
353 case cf_alu_continue: type = CF_OP_ALU_CONTINUE; break;
354 case cf_alu_extended: type = CF_OP_ALU_EXT; break;
355 default:
356 assert(0 && "cf_alu_undefined should have been replaced");
357 }
358
359 if (alu.last)
360 m_nliterals_in_group = 0;
361
362 bool retval = !r600_bytecode_add_alu_type(m_bc, &alu, type);
363
364 if (ai.opcode() == op1_mova_int)
365 m_bc->ar_loaded = 0;
366
367 if (ai.opcode() == op1_set_cf_idx0)
368 m_bc->index_loaded[0] = 1;
369
370 if (ai.opcode() == op1_set_cf_idx1)
371 m_bc->index_loaded[1] = 1;
372
373
374 m_bc->force_add_cf |= (ai.opcode() == op2_kille ||
375 ai.opcode() == op2_killne_int ||
376 ai.opcode() == op1_set_cf_idx0 ||
377 ai.opcode() == op1_set_cf_idx1);
378 return retval;
379 }
380
381 bool AssemblyFromShaderLegacyImpl::emit_vs_pos_export(const ExportInstruction & exi)
382 {
383 r600_bytecode_output output;
384 memset(&output, 0, sizeof(output));
385 assert(exi.gpr().type() == Value::gpr_vector);
386 const auto& gpr = exi.gpr();
387 output.gpr = gpr.sel();
388 output.elem_size = 3;
389 output.swizzle_x = gpr.chan_i(0);
390 output.swizzle_y = gpr.chan_i(1);
391 output.swizzle_z = gpr.chan_i(2);
392 output.swizzle_w = gpr.chan_i(3);
393 output.burst_count = 1;
394 output.array_base = 60 + exi.location();
395 output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
396 output.type = exi.export_type();
397
398
399 if (r600_bytecode_add_output(m_bc, &output)) {
400 R600_ERR("Error adding pixel export at location %d\n", exi.location());
401 return false;
402 }
403
404 return true;
405 }
406
407
408 bool AssemblyFromShaderLegacyImpl::emit_vs_param_export(const ExportInstruction & exi)
409 {
410 r600_bytecode_output output;
411 assert(exi.gpr().type() == Value::gpr_vector);
412 const auto& gpr = exi.gpr();
413
414 memset(&output, 0, sizeof(output));
415 output.gpr = gpr.sel();
416 output.elem_size = 3;
417 output.swizzle_x = gpr.chan_i(0);
418 output.swizzle_y = gpr.chan_i(1);
419 output.swizzle_z = gpr.chan_i(2);
420 output.swizzle_w = gpr.chan_i(3);
421 output.burst_count = 1;
422 output.array_base = exi.location();
423 output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
424 output.type = exi.export_type();
425
426
427 if (r600_bytecode_add_output(m_bc, &output)) {
428 R600_ERR("Error adding pixel export at location %d\n", exi.location());
429 return false;
430 }
431
432 return true;
433 }
434
435
436 bool AssemblyFromShaderLegacyImpl::emit_fs_pixel_export(const ExportInstruction & exi)
437 {
438 if (exi.location() >= m_max_color_exports && exi.location() < 60) {
439 R600_ERR("shader_from_nir: ignore pixel export %u, because supported max is %u\n",
440 exi.location(), m_max_color_exports);
441 return true;
442 }
443
444 assert(exi.gpr().type() == Value::gpr_vector);
445 const auto& gpr = exi.gpr();
446
447 r600_bytecode_output output;
448 memset(&output, 0, sizeof(output));
449
450 output.gpr = gpr.sel();
451 output.elem_size = 3;
452 output.swizzle_x = gpr.chan_i(0);
453 output.swizzle_y = gpr.chan_i(1);
454 output.swizzle_z = gpr.chan_i(2);
455 output.swizzle_w = m_key->ps.alpha_to_one ? 5 : gpr.chan_i(3); ;
456 output.burst_count = 1;
457 output.array_base = exi.location();
458 output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
459 output.type = exi.export_type();
460
461
462 if (r600_bytecode_add_output(m_bc, &output)) {
463 R600_ERR("Error adding pixel export at location %d\n", exi.location());
464 return false;
465 }
466
467 return true;
468 }
469
470
471 bool AssemblyFromShaderLegacyImpl::emit_export(const ExportInstruction & exi)
472 {
473 switch (exi.export_type()) {
474 case ExportInstruction::et_pixel:
475 return emit_fs_pixel_export(exi);
476 case ExportInstruction::et_pos:
477 return emit_vs_pos_export(exi);
478 case ExportInstruction::et_param:
479 return emit_vs_param_export(exi);
480 default:
481 R600_ERR("shader_from_nir: export %d type not yet supported\n", exi.export_type());
482 return false;
483 }
484 }
485
486 bool AssemblyFromShaderLegacyImpl::emit_if_start(const IfInstruction & if_instr)
487 {
488 bool needs_workaround = false;
489 int elems = m_callstack.push(FC_PUSH_VPM);
490
491 if (m_bc->chip_class == CAYMAN && m_bc->stack.loop > 1)
492 needs_workaround = true;
493 if (m_bc->family != CHIP_HEMLOCK &&
494 m_bc->family != CHIP_CYPRESS &&
495 m_bc->family != CHIP_JUNIPER) {
496 unsigned dmod1 = (elems - 1) % m_bc->stack.entry_size;
497 unsigned dmod2 = (elems) % m_bc->stack.entry_size;
498
499 if (elems && (!dmod1 || !dmod2))
500 needs_workaround = true;
501 }
502
503 auto& pred = if_instr.pred();
504 auto op = cf_alu_push_before;
505
506 if (needs_workaround) {
507 r600_bytecode_add_cfinst(m_bc, CF_OP_PUSH);
508 m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
509 op = cf_alu;
510 }
511 emit_alu(pred, op);
512
513 r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP);
514
515 m_jump_tracker.push(m_bc->cf_last, jt_if);
516 return true;
517 }
518
519 bool AssemblyFromShaderLegacyImpl::emit_else(UNUSED const ElseInstruction & else_instr)
520 {
521 r600_bytecode_add_cfinst(m_bc, CF_OP_ELSE);
522 m_bc->cf_last->pop_count = 1;
523 return m_jump_tracker.add_mid(m_bc->cf_last, jt_if);
524 }
525
526 bool AssemblyFromShaderLegacyImpl::emit_endif(UNUSED const IfElseEndInstruction & endif_instr)
527 {
528 m_callstack.pop(FC_PUSH_VPM);
529
530 unsigned force_pop = m_bc->force_add_cf;
531 if (!force_pop) {
532 int alu_pop = 3;
533 if (m_bc->cf_last) {
534 if (m_bc->cf_last->op == CF_OP_ALU)
535 alu_pop = 0;
536 else if (m_bc->cf_last->op == CF_OP_ALU_POP_AFTER)
537 alu_pop = 1;
538 }
539 alu_pop += 1;
540 if (alu_pop == 1) {
541 m_bc->cf_last->op = CF_OP_ALU_POP_AFTER;
542 m_bc->force_add_cf = 1;
543 } else if (alu_pop == 2) {
544 m_bc->cf_last->op = CF_OP_ALU_POP2_AFTER;
545 m_bc->force_add_cf = 1;
546 } else {
547 force_pop = 1;
548 }
549 }
550
551 if (force_pop) {
552 r600_bytecode_add_cfinst(m_bc, CF_OP_POP);
553 m_bc->cf_last->pop_count = 1;
554 m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
555 }
556
557 return m_jump_tracker.pop(m_bc->cf_last, jt_if);
558 }
559
560 bool AssemblyFromShaderLegacyImpl::emit_loop_begin(UNUSED const LoopBeginInstruction& instr)
561 {
562 r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_START_DX10);
563 m_jump_tracker.push(m_bc->cf_last, jt_loop);
564 m_callstack.push(FC_LOOP);
565 ++m_loop_nesting;
566 return true;
567 }
568
569 bool AssemblyFromShaderLegacyImpl::emit_loop_end(UNUSED const LoopEndInstruction& instr)
570 {
571 r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_END);
572 m_callstack.pop(FC_LOOP);
573 assert(m_loop_nesting);
574 --m_loop_nesting;
575 return m_jump_tracker.pop(m_bc->cf_last, jt_loop);
576 }
577
578 bool AssemblyFromShaderLegacyImpl::emit_loop_break(UNUSED const LoopBreakInstruction& instr)
579 {
580 r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_BREAK);
581 return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
582 }
583
584 bool AssemblyFromShaderLegacyImpl::emit_loop_continue(UNUSED const LoopContInstruction &instr)
585 {
586 r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_CONTINUE);
587 return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
588 }
589
590 bool AssemblyFromShaderLegacyImpl::emit_streamout(const StreamOutIntruction& so_instr)
591 {
592 struct r600_bytecode_output output;
593 memset(&output, 0, sizeof(struct r600_bytecode_output));
594
595 output.gpr = so_instr.gpr().sel();
596 output.elem_size = so_instr.element_size();
597 output.array_base = so_instr.array_base();
598 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
599 output.burst_count = so_instr.burst_count();
600 output.array_size = so_instr.array_size();
601 output.comp_mask = so_instr.comp_mask();
602 output.op = so_instr.op();
603
604 assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3);
605
606
607 if (r600_bytecode_add_output(m_bc, &output)) {
608 R600_ERR("shader_from_nir: Error creating stream output instruction\n");
609 return false;
610 }
611 return true;
612 }
613
614
615 bool AssemblyFromShaderLegacyImpl::emit_memringwrite(const MemRingOutIntruction& instr)
616 {
617 struct r600_bytecode_output output;
618 memset(&output, 0, sizeof(struct r600_bytecode_output));
619
620 output.gpr = instr.gpr().sel();
621 output.type = instr.type();
622 output.elem_size = 3;
623 output.comp_mask = 0xf;
624 output.burst_count = 1;
625 output.op = instr.op();
626 if (instr.type() == mem_write_ind || instr.type() == mem_write_ind_ack) {
627 output.index_gpr = instr.index_reg();
628 output.array_size = 0xfff;
629 }
630 output.array_base = instr.array_base();
631
632 if (r600_bytecode_add_output(m_bc, &output)) {
633 R600_ERR("shader_from_nir: Error creating mem ring write instruction\n");
634 return false;
635 }
636 return true;
637 }
638
639
640 bool AssemblyFromShaderLegacyImpl::emit_tex(const TexInstruction & tex_instr)
641 {
642 auto addr = tex_instr.sampler_offset();
643 if (addr && (!m_bc->index_loaded[1] || m_loop_nesting
644 || m_bc->index_reg[1] != addr->sel()
645 || m_bc->index_reg_chan[1] != addr->chan())) {
646 struct r600_bytecode_alu alu;
647 memset(&alu, 0, sizeof(alu));
648 alu.op = opcode_map.at(op1_mova_int);
649 alu.dst.chan = 0;
650 alu.src[0].sel = addr->sel();
651 alu.src[0].chan = addr->chan();
652 alu.last = 1;
653 int r = r600_bytecode_add_alu(m_bc, &alu);
654 if (r)
655 return false;
656
657 m_bc->ar_loaded = 0;
658
659 alu.op = opcode_map.at(op1_set_cf_idx1);
660 alu.dst.chan = 0;
661 alu.src[0].sel = 0;
662 alu.src[0].chan = 0;
663 alu.last = 1;
664
665 r = r600_bytecode_add_alu(m_bc, &alu);
666 if (r)
667 return false;
668
669 m_bc->index_reg[1] = addr->sel();
670 m_bc->index_reg_chan[1] = addr->chan();
671 m_bc->index_loaded[1] = true;
672 }
673
674 r600_bytecode_tex tex;
675 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
676 tex.op = tex_instr.opcode();
677 tex.sampler_id = tex_instr.sampler_id();
678 tex.sampler_index_mode = 0;
679 tex.resource_id = tex_instr.resource_id();;
680 tex.resource_index_mode = 0;
681 tex.src_gpr = tex_instr.src().sel();
682 tex.dst_gpr = tex_instr.dst().sel();
683 tex.dst_sel_x = tex_instr.dest_swizzle(0);
684 tex.dst_sel_y = tex_instr.dest_swizzle(1);
685 tex.dst_sel_z = tex_instr.dest_swizzle(2);
686 tex.dst_sel_w = tex_instr.dest_swizzle(3);
687 tex.src_sel_x = tex_instr.src().chan_i(0);
688 tex.src_sel_y = tex_instr.src().chan_i(1);
689 tex.src_sel_z = tex_instr.src().chan_i(2);
690 tex.src_sel_w = tex_instr.src().chan_i(3);
691 tex.coord_type_x = !tex_instr.has_flag(TexInstruction::x_unnormalized);
692 tex.coord_type_y = !tex_instr.has_flag(TexInstruction::y_unnormalized);
693 tex.coord_type_z = !tex_instr.has_flag(TexInstruction::z_unnormalized);
694 tex.coord_type_w = !tex_instr.has_flag(TexInstruction::w_unnormalized);
695 tex.offset_x = tex_instr.get_offset(0);
696 tex.offset_y = tex_instr.get_offset(1);
697 tex.offset_z = tex_instr.get_offset(2);
698 tex.resource_index_mode = (!!addr) ? 2 : 0;
699 tex.sampler_index_mode = tex.resource_index_mode;
700
701 if (tex_instr.opcode() == TexInstruction::get_gradient_h ||
702 tex_instr.opcode() == TexInstruction::get_gradient_v)
703 tex.inst_mod = tex_instr.has_flag(TexInstruction::grad_fine) ? 1 : 0;
704 else
705 tex.inst_mod = tex_instr.inst_mode();
706 if (r600_bytecode_add_tex(m_bc, &tex)) {
707 R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
708 return false;
709 }
710 return true;
711 }
712
713 bool AssemblyFromShaderLegacyImpl::emit_vtx(const FetchInstruction& fetch_instr)
714 {
715 int buffer_offset = 0;
716 auto addr = fetch_instr.buffer_offset();
717 auto index_mode = fetch_instr.buffer_index_mode();
718
719 if (addr) {
720 if (addr->type() == Value::literal) {
721 const auto& boffs = static_cast<const LiteralValue&>(*addr);
722 buffer_offset = boffs.value();
723 } else {
724 index_mode = bim_zero;
725 if ((!m_bc->index_loaded[0] || m_loop_nesting ||
726 m_bc->index_reg[0] != addr->sel() ||
727 m_bc->index_reg_chan[0] != addr->chan())) {
728 struct r600_bytecode_alu alu;
729 memset(&alu, 0, sizeof(alu));
730 alu.op = opcode_map.at(op1_mova_int);
731 alu.dst.chan = 0;
732 alu.src[0].sel = addr->sel();
733 alu.src[0].chan = addr->chan();
734 alu.last = 1;
735 int r = r600_bytecode_add_alu(m_bc, &alu);
736 if (r)
737 return false;
738
739 m_bc->ar_loaded = 0;
740
741 alu.op = opcode_map.at(op1_set_cf_idx0);
742 alu.dst.chan = 0;
743 alu.src[0].sel = 0;
744 alu.src[0].chan = 0;
745 alu.last = 1;
746
747 r = r600_bytecode_add_alu(m_bc, &alu);
748 if (r)
749 return false;
750
751 m_bc->index_reg[0] = addr->sel();
752 m_bc->index_reg_chan[0] = addr->chan();
753 m_bc->index_loaded[0] = true;
754 }
755 }
756 }
757
758 if (fetch_instr.has_prelude()) {
759 for(auto &i : fetch_instr.prelude()) {
760 if (!emit(i))
761 return false;
762 }
763 }
764
765 if (vtx_fetch_results.find(fetch_instr.src().sel()) !=
766 vtx_fetch_results.end()) {
767 m_bc->force_add_cf = 1;
768 vtx_fetch_results.clear();
769 }
770 vtx_fetch_results.insert(fetch_instr.dst().sel());
771
772 struct r600_bytecode_vtx vtx;
773 memset(&vtx, 0, sizeof(vtx));
774 vtx.op = fetch_instr.vc_opcode();
775 vtx.buffer_id = fetch_instr.buffer_id() + buffer_offset;
776 vtx.fetch_type = fetch_instr.fetch_type();
777 vtx.src_gpr = fetch_instr.src().sel();
778 vtx.src_sel_x = fetch_instr.src().chan();
779 vtx.mega_fetch_count = fetch_instr.mega_fetch_count();
780 vtx.dst_gpr = fetch_instr.dst().sel();
781 vtx.dst_sel_x = fetch_instr.swz(0); /* SEL_X */
782 vtx.dst_sel_y = fetch_instr.swz(1); /* SEL_Y */
783 vtx.dst_sel_z = fetch_instr.swz(2); /* SEL_Z */
784 vtx.dst_sel_w = fetch_instr.swz(3); /* SEL_W */
785 vtx.use_const_fields = fetch_instr.use_const_fields();
786 vtx.data_format = fetch_instr.data_format();
787 vtx.num_format_all = fetch_instr.num_format(); /* NUM_FORMAT_SCALED */
788 vtx.format_comp_all = fetch_instr.is_signed(); /* FORMAT_COMP_SIGNED */
789 vtx.endian = fetch_instr.endian_swap();
790 vtx.buffer_index_mode = index_mode;
791 vtx.offset = fetch_instr.offset();
792 vtx.indexed = fetch_instr.indexed();
793 vtx.uncached = fetch_instr.uncached();
794 vtx.elem_size = fetch_instr.elm_size();
795 vtx.array_base = fetch_instr.array_base();
796 vtx.array_size = fetch_instr.array_size();
797 vtx.srf_mode_all = fetch_instr.srf_mode_no_zero();
798
799 if (fetch_instr.use_tc()) {
800 if ((r600_bytecode_add_vtx_tc(m_bc, &vtx))) {
801 R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
802 return false;
803 }
804
805 } else {
806 if ((r600_bytecode_add_vtx(m_bc, &vtx))) {
807 R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
808 return false;
809 }
810 }
811
812 m_bc->cf_last->vpm = fetch_instr.use_vpm();
813 m_bc->cf_last->barrier = 1;
814
815 return true;
816 }
817
818 bool AssemblyFromShaderLegacyImpl::emit_emit_vertex(const EmitVertex &instr)
819 {
820 int r = r600_bytecode_add_cfinst(m_bc, instr.op());
821 if (!r)
822 m_bc->cf_last->count = instr.stream();
823 assert(m_bc->cf_last->count < 4);
824
825 return r == 0;
826 }
827
828 bool AssemblyFromShaderLegacyImpl::emit_wait_ack(const WaitAck& instr)
829 {
830 int r = r600_bytecode_add_cfinst(m_bc, instr.op());
831 if (!r)
832 m_bc->cf_last->cf_addr = instr.n_ack();
833
834 return r == 0;
835 }
836
837 bool AssemblyFromShaderLegacyImpl::emit_wr_scratch(const WriteScratchInstruction& instr)
838 {
839 struct r600_bytecode_output cf;
840
841 memset(&cf, 0, sizeof(struct r600_bytecode_output));
842
843 cf.op = CF_OP_MEM_SCRATCH;
844 cf.elem_size = 3;
845 cf.gpr = instr.gpr().sel();
846 cf.mark = 1;
847 cf.comp_mask = instr.write_mask();
848 cf.swizzle_x = 0;
849 cf.swizzle_y = 1;
850 cf.swizzle_z = 2;
851 cf.swizzle_w = 3;
852 cf.burst_count = 1;
853
854 if (instr.indirect()) {
855 cf.type = 3;
856 cf.index_gpr = instr.address();
857
858 /* The docu seems to be wrong here: In indirect addressing the
859 * address_base seems to be the array_size */
860 cf.array_size = instr.array_size();
861 } else {
862 cf.type = 2;
863 cf.array_base = instr.location();
864 }
865 /* This should be 0, but the address calculation is apparently wrong */
866
867
868 if (r600_bytecode_add_output(m_bc, &cf)){
869 R600_ERR("shader_from_nir: Error creating SCRATCH_WR assembly instruction\n");
870 return false;
871 }
872
873 return true;
874 }
875
876 extern const std::map<ESDOp, int> ds_opcode_map;
877
878 bool AssemblyFromShaderLegacyImpl::emit_gds(const GDSInstr& instr)
879 {
880 struct r600_bytecode_gds gds;
881
882 int uav_idx = -1;
883 auto addr = instr.uav_id();
884 if (addr->type() != Value::literal) {
885 if (!m_bc->index_loaded[1] || m_loop_nesting ||
886 m_bc->index_reg[1] != addr->sel()
887 || m_bc->index_reg_chan[1] != addr->chan()) {
888 struct r600_bytecode_alu alu;
889
890 memset(&alu, 0, sizeof(alu));
891 alu.op = opcode_map.at(op2_lshr_int);
892 alu.dst.sel = addr->sel();
893 alu.dst.chan = addr->chan();
894 alu.src[0].sel = addr->sel();
895 alu.src[0].chan = addr->chan();
896 alu.src[1].sel = ALU_SRC_LITERAL;
897 alu.src[1].value = 2;
898 alu.last = 1;
899 alu.dst.write = 1;
900 int r = r600_bytecode_add_alu(m_bc, &alu);
901 if (r)
902 return false;
903
904 memset(&alu, 0, sizeof(alu));
905 alu.op = opcode_map.at(op1_mova_int);
906 alu.dst.chan = 0;
907 alu.src[0].sel = addr->sel();
908 alu.src[0].chan = addr->chan();
909 alu.last = 1;
910 r = r600_bytecode_add_alu(m_bc, &alu);
911 if (r)
912 return false;
913
914 m_bc->ar_loaded = 0;
915
916 alu.op = opcode_map.at(op1_set_cf_idx1);
917 alu.dst.chan = 0;
918 alu.src[0].sel = 0;
919 alu.src[0].chan = 0;
920 alu.last = 1;
921
922 r = r600_bytecode_add_alu(m_bc, &alu);
923 if (r)
924 return false;
925
926 m_bc->index_reg[1] = addr->sel();
927 m_bc->index_reg_chan[1] = addr->chan();
928 m_bc->index_loaded[1] = true;
929 }
930 } else {
931 const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
932 uav_idx = addr_reg.value() >> 2;
933 }
934
935 memset(&gds, 0, sizeof(struct r600_bytecode_gds));
936
937 gds.op = ds_opcode_map.at(instr.op());
938 gds.dst_gpr = instr.dest_sel();
939 gds.uav_id = (uav_idx >= 0 ? uav_idx : 0) + instr.uav_base();
940 gds.uav_index_mode = uav_idx >= 0 ? bim_none : bim_one;
941 gds.src_gpr = instr.src_sel();
942
943 gds.src_sel_x = instr.src_swizzle(0);
944 gds.src_sel_y = instr.src_swizzle(1);
945 gds.src_sel_z = instr.src_swizzle(2);
946
947 gds.dst_sel_x = instr.dest_swizzle(0);
948 gds.dst_sel_y = 7;
949 gds.dst_sel_z = 7;
950 gds.dst_sel_w = 7;
951 gds.src_gpr2 = 0;
952 gds.alloc_consume = 1; // Not Cayman
953
954 int r = r600_bytecode_add_gds(m_bc, &gds);
955 if (r)
956 return false;
957 m_bc->cf_last->vpm = 1;
958 m_bc->cf_last->barrier = 1;
959 return true;
960 }
961
962 bool AssemblyFromShaderLegacyImpl::emit_tf_write(const GDSStoreTessFactor& instr)
963 {
964 struct r600_bytecode_gds gds;
965
966 memset(&gds, 0, sizeof(struct r600_bytecode_gds));
967 gds.src_gpr = instr.sel();
968 gds.src_sel_x = instr.chan(0);
969 gds.src_sel_y = instr.chan(1);
970 gds.src_sel_z = 4;
971 gds.dst_sel_x = 7;
972 gds.dst_sel_y = 7;
973 gds.dst_sel_z = 7;
974 gds.dst_sel_w = 7;
975 gds.op = FETCH_OP_TF_WRITE;
976
977 if (r600_bytecode_add_gds(m_bc, &gds) != 0)
978 return false;
979
980 if (instr.chan(2) != 7) {
981 memset(&gds, 0, sizeof(struct r600_bytecode_gds));
982 gds.src_gpr = instr.sel();
983 gds.src_sel_x = instr.chan(2);
984 gds.src_sel_y = instr.chan(3);
985 gds.src_sel_z = 4;
986 gds.dst_sel_x = 7;
987 gds.dst_sel_y = 7;
988 gds.dst_sel_z = 7;
989 gds.dst_sel_w = 7;
990 gds.op = FETCH_OP_TF_WRITE;
991
992 if (r600_bytecode_add_gds(m_bc, &gds))
993 return false;
994 }
995 return true;
996 }
997
998 bool AssemblyFromShaderLegacyImpl::emit_ldswrite(const LDSWriteInstruction& instr)
999 {
1000 r600_bytecode_alu alu;
1001 memset(&alu, 0, sizeof(r600_bytecode_alu));
1002
1003 alu.last = true;
1004 alu.is_lds_idx_op = true;
1005 copy_src(alu.src[0], instr.address());
1006 copy_src(alu.src[1], instr.value0());
1007
1008 if (instr.num_components() == 1) {
1009 alu.op = LDS_OP2_LDS_WRITE;
1010 } else {
1011 alu.op = LDS_OP3_LDS_WRITE_REL;
1012 alu.lds_idx = 1;
1013 copy_src(alu.src[2], instr.value1());
1014 }
1015
1016 return r600_bytecode_add_alu(m_bc, &alu) == 0;
1017 }
1018
1019 bool AssemblyFromShaderLegacyImpl::emit_ldsread(const LDSReadInstruction& instr)
1020 {
1021 int r;
1022 unsigned nread = 0;
1023 unsigned nfetch = 0;
1024 unsigned n_values = instr.num_values();
1025
1026 r600_bytecode_alu alu_fetch;
1027 r600_bytecode_alu alu_read;
1028
1029 /* We must add a new ALU clause if the fetch and read op would be split otherwise
1030 * r600_asm limites at 120 slots = 240 dwords */
1031 if (m_bc->cf_last->ndw > 240 - 4 * n_values)
1032 m_bc->force_add_cf = 1;
1033
1034 while (nread < n_values) {
1035 if (nfetch < n_values) {
1036 memset(&alu_fetch, 0, sizeof(r600_bytecode_alu));
1037 alu_fetch.is_lds_idx_op = true;
1038 alu_fetch.op = LDS_OP1_LDS_READ_RET;
1039
1040 copy_src(alu_fetch.src[0], instr.address(nfetch));
1041 alu_fetch.src[1].sel = V_SQ_ALU_SRC_0;
1042 alu_fetch.src[2].sel = V_SQ_ALU_SRC_0;
1043 alu_fetch.last = 1;
1044 r = r600_bytecode_add_alu(m_bc, &alu_fetch);
1045 m_bc->cf_last->nlds_read++;
1046 if (r)
1047 return false;
1048 }
1049
1050 if (nfetch >= n_values) {
1051 memset(&alu_read, 0, sizeof(r600_bytecode_alu));
1052 copy_dst(alu_read.dst, instr.dest(nread));
1053 alu_read.op = ALU_OP1_MOV;
1054 alu_read.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
1055 alu_read.last = 1;
1056 alu_read.dst.write = 1;
1057 r = r600_bytecode_add_alu(m_bc, &alu_read);
1058 m_bc->cf_last->nqueue_read++;
1059 if (r)
1060 return false;
1061 ++nread;
1062 }
1063 ++nfetch;
1064 }
1065 assert(m_bc->cf_last->nlds_read == m_bc->cf_last->nqueue_read);
1066
1067 return true;
1068 }
1069
1070 bool AssemblyFromShaderLegacyImpl::emit_rat(const RatInstruction& instr)
1071 {
1072 struct r600_bytecode_gds gds;
1073
1074 int rat_idx = -1;
1075 EBufferIndexMode rat_index_mode = bim_none;
1076 auto addr = instr.rat_id_offset();
1077
1078 if (addr) {
1079 if (addr->type() != Value::literal) {
1080 rat_index_mode = bim_one;
1081 if (!m_bc->index_loaded[1] || m_loop_nesting ||
1082 m_bc->index_reg[1] != addr->sel()
1083 || m_bc->index_reg_chan[1] != addr->chan()) {
1084 struct r600_bytecode_alu alu;
1085
1086 memset(&alu, 0, sizeof(alu));
1087 alu.op = opcode_map.at(op1_mova_int);
1088 alu.dst.chan = 0;
1089 alu.src[0].sel = addr->sel();
1090 alu.src[0].chan = addr->chan();
1091 alu.last = 1;
1092 int r = r600_bytecode_add_alu(m_bc, &alu);
1093 if (r)
1094 return false;
1095
1096 m_bc->ar_loaded = 0;
1097
1098 alu.op = opcode_map.at(op1_set_cf_idx1);
1099 alu.dst.chan = 0;
1100 alu.src[0].sel = 0;
1101 alu.src[0].chan = 0;
1102 alu.last = 1;
1103
1104 r = r600_bytecode_add_alu(m_bc, &alu);
1105 if (r)
1106 return false;
1107
1108 m_bc->index_reg[1] = addr->sel();
1109 m_bc->index_reg_chan[1] = addr->chan();
1110 m_bc->index_loaded[1] = true;
1111
1112 }
1113 } else {
1114 const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
1115 rat_idx += addr_reg.value();
1116 }
1117 }
1118 memset(&gds, 0, sizeof(struct r600_bytecode_gds));
1119
1120 r600_bytecode_add_cfinst(m_bc, CF_OP_MEM_RAT);
1121 auto cf = m_bc->cf_last;
1122 cf->rat.id = rat_idx + m_shader->rat_base;
1123 cf->rat.inst = instr.rat_op();
1124 cf->rat.index_mode = rat_index_mode;
1125 cf->output.type = instr.need_ack() ? 3 : 1;
1126 cf->output.gpr = instr.data_gpr();
1127 cf->output.index_gpr = instr.index_gpr();
1128 cf->output.comp_mask = instr.comp_mask();
1129 cf->output.burst_count = instr.burst_count();
1130 cf->output.swizzle_x = instr.data_swz(0);
1131 cf->output.swizzle_y = instr.data_swz(1);
1132 cf->output.swizzle_z = instr.data_swz(2);
1133 cf->output.swizzle_w = instr.data_swz(3);
1134 cf->vpm = 1;
1135 cf->barrier = 1;
1136 cf->mark = instr.need_ack();
1137 cf->output.elem_size = instr.elm_size();
1138 return true;
1139 }
1140
1141 bool AssemblyFromShaderLegacyImpl::copy_dst(r600_bytecode_alu_dst& dst,
1142 const Value& d)
1143 {
1144 assert(d.type() == Value::gpr || d.type() == Value::gpr_array_value);
1145
1146 if (d.sel() > 124) {
1147 R600_ERR("shader_from_nir: Don't support more then 124 GPRs, but try using %d\n", d.sel());
1148 return false;
1149 }
1150
1151 dst.sel = d.sel();
1152 dst.chan = d.chan();
1153
1154 if (m_bc->index_reg[1] == dst.sel &&
1155 m_bc->index_reg_chan[1] == dst.chan)
1156 m_bc->index_loaded[1] = false;
1157
1158 if (m_bc->index_reg[0] == dst.sel &&
1159 m_bc->index_reg_chan[0] == dst.chan)
1160 m_bc->index_loaded[0] = false;
1161
1162 return true;
1163 }
1164
1165 bool AssemblyFromShaderLegacyImpl::copy_src(r600_bytecode_alu_src& src, const Value& s)
1166 {
1167
1168 if (s.type() == Value::gpr && s.sel() > 124) {
1169 R600_ERR("shader_from_nir: Don't support more then 124 GPRs, try using %d\n", s.sel());
1170 return false;
1171 }
1172
1173 if (s.type() == Value::lds_direct) {
1174 R600_ERR("shader_from_nir: LDS_DIRECT values not supported\n");
1175 return false;
1176 }
1177
1178 if (s.type() == Value::kconst && s.sel() < 512) {
1179 R600_ERR("shader_from_nir: Uniforms should have values >= 512, got %d \n", s.sel());
1180 return false;
1181 }
1182
1183 if (s.type() == Value::literal) {
1184 auto& v = static_cast<const LiteralValue&>(s);
1185 if (v.value() == 0) {
1186 src.sel = ALU_SRC_0;
1187 src.chan = 0;
1188 --m_nliterals_in_group;
1189 return true;
1190 }
1191 if (v.value() == 1) {
1192 src.sel = ALU_SRC_1_INT;
1193 src.chan = 0;
1194 --m_nliterals_in_group;
1195 return true;
1196 }
1197 if (v.value_float() == 1.0f) {
1198 src.sel = ALU_SRC_1;
1199 src.chan = 0;
1200 --m_nliterals_in_group;
1201 return true;
1202 }
1203 if (v.value_float() == 0.5f) {
1204 src.sel = ALU_SRC_0_5;
1205 src.chan = 0;
1206 --m_nliterals_in_group;
1207 return true;
1208 }
1209 if (v.value() == 0xffffffff) {
1210 src.sel = ALU_SRC_M_1_INT;
1211 src.chan = 0;
1212 --m_nliterals_in_group;
1213 return true;
1214 }
1215 src.value = v.value();
1216 }
1217
1218 src.sel = s.sel();
1219 src.chan = s.chan();
1220 if (s.type() == Value::kconst) {
1221 const UniformValue& cv = static_cast<const UniformValue&>(s);
1222 src.kc_bank = cv.kcache_bank();
1223 }
1224
1225 return true;
1226 }
1227
1228 const std::map<EAluOp, int> opcode_map = {
1229
1230 {op2_add, ALU_OP2_ADD},
1231 {op2_mul, ALU_OP2_MUL},
1232 {op2_mul_ieee, ALU_OP2_MUL_IEEE},
1233 {op2_max, ALU_OP2_MAX},
1234 {op2_min, ALU_OP2_MIN},
1235 {op2_max_dx10, ALU_OP2_MAX_DX10},
1236 {op2_min_dx10, ALU_OP2_MIN_DX10},
1237 {op2_sete, ALU_OP2_SETE},
1238 {op2_setgt, ALU_OP2_SETGT},
1239 {op2_setge, ALU_OP2_SETGE},
1240 {op2_setne, ALU_OP2_SETNE},
1241 {op2_sete_dx10, ALU_OP2_SETE_DX10},
1242 {op2_setgt_dx10, ALU_OP2_SETGT_DX10},
1243 {op2_setge_dx10, ALU_OP2_SETGE_DX10},
1244 {op2_setne_dx10, ALU_OP2_SETNE_DX10},
1245 {op1_fract, ALU_OP1_FRACT},
1246 {op1_trunc, ALU_OP1_TRUNC},
1247 {op1_ceil, ALU_OP1_CEIL},
1248 {op1_rndne, ALU_OP1_RNDNE},
1249 {op1_floor, ALU_OP1_FLOOR},
1250 {op2_ashr_int, ALU_OP2_ASHR_INT},
1251 {op2_lshr_int, ALU_OP2_LSHR_INT},
1252 {op2_lshl_int, ALU_OP2_LSHL_INT},
1253 {op1_mov, ALU_OP1_MOV},
1254 {op0_nop, ALU_OP0_NOP},
1255 {op2_mul_64, ALU_OP2_MUL_64},
1256 {op1_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
1257 {op1v_flt64_to_flt32, ALU_OP1_FLT32_TO_FLT64},
1258 {op2_pred_setgt_uint, ALU_OP2_PRED_SETGT_UINT},
1259 {op2_pred_setge_uint, ALU_OP2_PRED_SETGE_UINT},
1260 {op2_pred_sete, ALU_OP2_PRED_SETE},
1261 {op2_pred_setgt, ALU_OP2_PRED_SETGT},
1262 {op2_pred_setge, ALU_OP2_PRED_SETGE},
1263 {op2_pred_setne, ALU_OP2_PRED_SETNE},
1264 //{op2_pred_set_inv, ALU_OP2_PRED_SET},
1265 //{op2_pred_set_clr, ALU_OP2_PRED_SET_CRL},
1266 //{op2_pred_set_restore, ALU_OP2_PRED_SET_RESTORE},
1267 {op2_pred_sete_push, ALU_OP2_PRED_SETE_PUSH},
1268 {op2_pred_setgt_push, ALU_OP2_PRED_SETGT_PUSH},
1269 {op2_pred_setge_push, ALU_OP2_PRED_SETGE_PUSH},
1270 {op2_pred_setne_push, ALU_OP2_PRED_SETNE_PUSH},
1271 {op2_kille, ALU_OP2_KILLE},
1272 {op2_killgt, ALU_OP2_KILLGT},
1273 {op2_killge, ALU_OP2_KILLGE},
1274 {op2_killne, ALU_OP2_KILLNE},
1275 {op2_and_int, ALU_OP2_AND_INT},
1276 {op2_or_int, ALU_OP2_OR_INT},
1277 {op2_xor_int, ALU_OP2_XOR_INT},
1278 {op1_not_int, ALU_OP1_NOT_INT},
1279 {op2_add_int, ALU_OP2_ADD_INT},
1280 {op2_sub_int, ALU_OP2_SUB_INT},
1281 {op2_max_int, ALU_OP2_MAX_INT},
1282 {op2_min_int, ALU_OP2_MIN_INT},
1283 {op2_max_uint, ALU_OP2_MAX_UINT},
1284 {op2_min_uint, ALU_OP2_MIN_UINT},
1285 {op2_sete_int, ALU_OP2_SETE_INT},
1286 {op2_setgt_int, ALU_OP2_SETGT_INT},
1287 {op2_setge_int, ALU_OP2_SETGE_INT},
1288 {op2_setne_int, ALU_OP2_SETNE_INT},
1289 {op2_setgt_uint, ALU_OP2_SETGT_UINT},
1290 {op2_setge_uint, ALU_OP2_SETGE_UINT},
1291 {op2_killgt_uint, ALU_OP2_KILLGT_UINT},
1292 {op2_killge_uint, ALU_OP2_KILLGE_UINT},
1293 //p2_prede_int, ALU_OP2_PREDE_INT},
1294 {op2_pred_setgt_int, ALU_OP2_PRED_SETGT_INT},
1295 {op2_pred_setge_int, ALU_OP2_PRED_SETGE_INT},
1296 {op2_pred_setne_int, ALU_OP2_PRED_SETNE_INT},
1297 {op2_kille_int, ALU_OP2_KILLE_INT},
1298 {op2_killgt_int, ALU_OP2_KILLGT_INT},
1299 {op2_killge_int, ALU_OP2_KILLGE_INT},
1300 {op2_killne_int, ALU_OP2_KILLNE_INT},
1301 {op2_pred_sete_push_int, ALU_OP2_PRED_SETE_PUSH_INT},
1302 {op2_pred_setgt_push_int, ALU_OP2_PRED_SETGT_PUSH_INT},
1303 {op2_pred_setge_push_int, ALU_OP2_PRED_SETGE_PUSH_INT},
1304 {op2_pred_setne_push_int, ALU_OP2_PRED_SETNE_PUSH_INT},
1305 {op2_pred_setlt_push_int, ALU_OP2_PRED_SETLT_PUSH_INT},
1306 {op2_pred_setle_push_int, ALU_OP2_PRED_SETLE_PUSH_INT},
1307 {op1_flt_to_int, ALU_OP1_FLT_TO_INT},
1308 {op1_bfrev_int, ALU_OP1_BFREV_INT},
1309 {op2_addc_uint, ALU_OP2_ADDC_UINT},
1310 {op2_subb_uint, ALU_OP2_SUBB_UINT},
1311 {op0_group_barrier, ALU_OP0_GROUP_BARRIER},
1312 {op0_group_seq_begin, ALU_OP0_GROUP_SEQ_BEGIN},
1313 {op0_group_seq_end, ALU_OP0_GROUP_SEQ_END},
1314 {op2_set_mode, ALU_OP2_SET_MODE},
1315 {op1_set_cf_idx0, ALU_OP0_SET_CF_IDX0},
1316 {op1_set_cf_idx1, ALU_OP0_SET_CF_IDX1},
1317 {op2_set_lds_size, ALU_OP2_SET_LDS_SIZE},
1318 {op1_exp_ieee, ALU_OP1_EXP_IEEE},
1319 {op1_log_clamped, ALU_OP1_LOG_CLAMPED},
1320 {op1_log_ieee, ALU_OP1_LOG_IEEE},
1321 {op1_recip_clamped, ALU_OP1_RECIP_CLAMPED},
1322 {op1_recip_ff, ALU_OP1_RECIP_FF},
1323 {op1_recip_ieee, ALU_OP1_RECIP_IEEE},
1324 {op1_recipsqrt_clamped, ALU_OP1_RECIPSQRT_CLAMPED},
1325 {op1_recipsqrt_ff, ALU_OP1_RECIPSQRT_FF},
1326 {op1_recipsqrt_ieee1, ALU_OP1_RECIPSQRT_IEEE},
1327 {op1_sqrt_ieee, ALU_OP1_SQRT_IEEE},
1328 {op1_sin, ALU_OP1_SIN},
1329 {op1_cos, ALU_OP1_COS},
1330 {op2_mullo_int, ALU_OP2_MULLO_INT},
1331 {op2_mulhi_int, ALU_OP2_MULHI_INT},
1332 {op2_mullo_uint, ALU_OP2_MULLO_UINT},
1333 {op2_mulhi_uint, ALU_OP2_MULHI_UINT},
1334 {op1_recip_int, ALU_OP1_RECIP_INT},
1335 {op1_recip_uint, ALU_OP1_RECIP_UINT},
1336 {op1_recip_64, ALU_OP2_RECIP_64},
1337 {op1_recip_clamped_64, ALU_OP2_RECIP_CLAMPED_64},
1338 {op1_recipsqrt_64, ALU_OP2_RECIPSQRT_64},
1339 {op1_recipsqrt_clamped_64, ALU_OP2_RECIPSQRT_CLAMPED_64},
1340 {op1_sqrt_64, ALU_OP2_SQRT_64},
1341 {op1_flt_to_uint, ALU_OP1_FLT_TO_UINT},
1342 {op1_int_to_flt, ALU_OP1_INT_TO_FLT},
1343 {op1_uint_to_flt, ALU_OP1_UINT_TO_FLT},
1344 {op2_bfm_int, ALU_OP2_BFM_INT},
1345 {op1_flt32_to_flt16, ALU_OP1_FLT32_TO_FLT16},
1346 {op1_flt16_to_flt32, ALU_OP1_FLT16_TO_FLT32},
1347 {op1_ubyte0_flt, ALU_OP1_UBYTE0_FLT},
1348 {op1_ubyte1_flt, ALU_OP1_UBYTE1_FLT},
1349 {op1_ubyte2_flt, ALU_OP1_UBYTE2_FLT},
1350 {op1_ubyte3_flt, ALU_OP1_UBYTE3_FLT},
1351 {op1_bcnt_int, ALU_OP1_BCNT_INT},
1352 {op1_ffbh_uint, ALU_OP1_FFBH_UINT},
1353 {op1_ffbl_int, ALU_OP1_FFBL_INT},
1354 {op1_ffbh_int, ALU_OP1_FFBH_INT},
1355 {op1_flt_to_uint4, ALU_OP1_FLT_TO_UINT4},
1356 {op2_dot_ieee, ALU_OP2_DOT_IEEE},
1357 {op1_flt_to_int_rpi, ALU_OP1_FLT_TO_INT_RPI},
1358 {op1_flt_to_int_floor, ALU_OP1_FLT_TO_INT_FLOOR},
1359 {op2_mulhi_uint24, ALU_OP2_MULHI_UINT24},
1360 {op1_mbcnt_32hi_int, ALU_OP1_MBCNT_32HI_INT},
1361 {op1_offset_to_flt, ALU_OP1_OFFSET_TO_FLT},
1362 {op2_mul_uint24, ALU_OP2_MUL_UINT24},
1363 {op1_bcnt_accum_prev_int, ALU_OP1_BCNT_ACCUM_PREV_INT},
1364 {op1_mbcnt_32lo_accum_prev_int, ALU_OP1_MBCNT_32LO_ACCUM_PREV_INT},
1365 {op2_sete_64, ALU_OP2_SETE_64},
1366 {op2_setne_64, ALU_OP2_SETNE_64},
1367 {op2_setgt_64, ALU_OP2_SETGT_64},
1368 {op2_setge_64, ALU_OP2_SETGE_64},
1369 {op2_min_64, ALU_OP2_MIN_64},
1370 {op2_max_64, ALU_OP2_MAX_64},
1371 {op2_dot4, ALU_OP2_DOT4},
1372 {op2_dot4_ieee, ALU_OP2_DOT4_IEEE},
1373 {op2_cube, ALU_OP2_CUBE},
1374 {op1_max4, ALU_OP1_MAX4},
1375 {op1_frexp_64, ALU_OP1_FREXP_64},
1376 {op1_ldexp_64, ALU_OP2_LDEXP_64},
1377 {op1_fract_64, ALU_OP1_FRACT_64},
1378 {op2_pred_setgt_64, ALU_OP2_PRED_SETGT_64},
1379 {op2_pred_sete_64, ALU_OP2_PRED_SETE_64},
1380 {op2_pred_setge_64, ALU_OP2_PRED_SETGE_64},
1381 {op2_add_64, ALU_OP2_ADD_64},
1382 {op1_mova_int, ALU_OP1_MOVA_INT},
1383 {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
1384 {op1_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64},
1385 {op2_sad_accum_prev_uint, ALU_OP2_SAD_ACCUM_PREV_UINT},
1386 {op2_dot, ALU_OP2_DOT},
1387 //p2_mul_prev, ALU_OP2_MUL_PREV},
1388 //p2_mul_ieee_prev, ALU_OP2_MUL_IEEE_PREV},
1389 //p2_add_prev, ALU_OP2_ADD_PREV},
1390 {op2_muladd_prev, ALU_OP2_MULADD_PREV},
1391 {op2_muladd_ieee_prev, ALU_OP2_MULADD_IEEE_PREV},
1392 {op2_interp_xy, ALU_OP2_INTERP_XY},
1393 {op2_interp_zw, ALU_OP2_INTERP_ZW},
1394 {op2_interp_x, ALU_OP2_INTERP_X},
1395 {op2_interp_z, ALU_OP2_INTERP_Z},
1396 {op0_store_flags, ALU_OP1_STORE_FLAGS},
1397 {op1_load_store_flags, ALU_OP1_LOAD_STORE_FLAGS},
1398 {op0_lds_1a, ALU_OP2_LDS_1A},
1399 {op0_lds_1a1d, ALU_OP2_LDS_1A1D},
1400 {op0_lds_2a, ALU_OP2_LDS_2A},
1401 {op1_interp_load_p0, ALU_OP1_INTERP_LOAD_P0},
1402 {op1_interp_load_p10, ALU_OP1_INTERP_LOAD_P10},
1403 {op1_interp_load_p20, ALU_OP1_INTERP_LOAD_P20},
1404 // {op 3 all left shift 6
1405 {op3_bfe_uint, ALU_OP3_BFE_UINT},
1406 {op3_bfe_int, ALU_OP3_BFE_INT},
1407 {op3_bfi_int, ALU_OP3_BFI_INT},
1408 {op3_fma, ALU_OP3_FMA},
1409 {op3_cndne_64, ALU_OP3_CNDNE_64},
1410 {op3_fma_64, ALU_OP3_FMA_64},
1411 {op3_lerp_uint, ALU_OP3_LERP_UINT},
1412 {op3_bit_align_int, ALU_OP3_BIT_ALIGN_INT},
1413 {op3_byte_align_int, ALU_OP3_BYTE_ALIGN_INT},
1414 {op3_sad_accum_uint, ALU_OP3_SAD_ACCUM_UINT},
1415 {op3_sad_accum_hi_uint, ALU_OP3_SAD_ACCUM_HI_UINT},
1416 {op3_muladd_uint24, ALU_OP3_MULADD_UINT24},
1417 {op3_lds_idx_op, ALU_OP3_LDS_IDX_OP},
1418 {op3_muladd, ALU_OP3_MULADD},
1419 {op3_muladd_m2, ALU_OP3_MULADD_M2},
1420 {op3_muladd_m4, ALU_OP3_MULADD_M4},
1421 {op3_muladd_d2, ALU_OP3_MULADD_D2},
1422 {op3_muladd_ieee, ALU_OP3_MULADD_IEEE},
1423 {op3_cnde, ALU_OP3_CNDE},
1424 {op3_cndgt, ALU_OP3_CNDGT},
1425 {op3_cndge, ALU_OP3_CNDGE},
1426 {op3_cnde_int, ALU_OP3_CNDE_INT},
1427 {op3_cndgt_int, ALU_OP3_CNDGT_INT},
1428 {op3_cndge_int, ALU_OP3_CNDGE_INT},
1429 {op3_mul_lit, ALU_OP3_MUL_LIT},
1430 };
1431
1432 const std::map<ESDOp, int> ds_opcode_map = {
1433 {DS_OP_ADD, FETCH_OP_GDS_ADD},
1434 {DS_OP_SUB, FETCH_OP_GDS_SUB},
1435 {DS_OP_RSUB, FETCH_OP_GDS_RSUB},
1436 {DS_OP_INC, FETCH_OP_GDS_INC},
1437 {DS_OP_DEC, FETCH_OP_GDS_DEC},
1438 {DS_OP_MIN_INT, FETCH_OP_GDS_MIN_INT},
1439 {DS_OP_MAX_INT, FETCH_OP_GDS_MAX_INT},
1440 {DS_OP_MIN_UINT, FETCH_OP_GDS_MIN_UINT},
1441 {DS_OP_MAX_UINT, FETCH_OP_GDS_MAX_UINT},
1442 {DS_OP_AND, FETCH_OP_GDS_AND},
1443 {DS_OP_OR, FETCH_OP_GDS_OR},
1444 {DS_OP_XOR, FETCH_OP_GDS_XOR},
1445 {DS_OP_MSKOR, FETCH_OP_GDS_MSKOR},
1446 {DS_OP_WRITE, FETCH_OP_GDS_WRITE},
1447 {DS_OP_WRITE_REL, FETCH_OP_GDS_WRITE_REL},
1448 {DS_OP_WRITE2, FETCH_OP_GDS_WRITE2},
1449 {DS_OP_CMP_STORE, FETCH_OP_GDS_CMP_STORE},
1450 {DS_OP_CMP_STORE_SPF, FETCH_OP_GDS_CMP_STORE_SPF},
1451 {DS_OP_BYTE_WRITE, FETCH_OP_GDS_BYTE_WRITE},
1452 {DS_OP_SHORT_WRITE, FETCH_OP_GDS_SHORT_WRITE},
1453 {DS_OP_ADD_RET, FETCH_OP_GDS_ADD_RET},
1454 {DS_OP_SUB_RET, FETCH_OP_GDS_SUB_RET},
1455 {DS_OP_RSUB_RET, FETCH_OP_GDS_RSUB_RET},
1456 {DS_OP_INC_RET, FETCH_OP_GDS_INC_RET},
1457 {DS_OP_DEC_RET, FETCH_OP_GDS_DEC_RET},
1458 {DS_OP_MIN_INT_RET, FETCH_OP_GDS_MIN_INT_RET},
1459 {DS_OP_MAX_INT_RET, FETCH_OP_GDS_MAX_INT_RET},
1460 {DS_OP_MIN_UINT_RET, FETCH_OP_GDS_MIN_UINT_RET},
1461 {DS_OP_MAX_UINT_RET, FETCH_OP_GDS_MAX_UINT_RET},
1462 {DS_OP_AND_RET, FETCH_OP_GDS_AND_RET},
1463 {DS_OP_OR_RET, FETCH_OP_GDS_OR_RET},
1464 {DS_OP_XOR_RET, FETCH_OP_GDS_XOR_RET},
1465 {DS_OP_MSKOR_RET, FETCH_OP_GDS_MSKOR_RET},
1466 {DS_OP_XCHG_RET, FETCH_OP_GDS_XCHG_RET},
1467 {DS_OP_XCHG_REL_RET, FETCH_OP_GDS_XCHG_REL_RET},
1468 {DS_OP_XCHG2_RET, FETCH_OP_GDS_XCHG2_RET},
1469 {DS_OP_CMP_XCHG_RET, FETCH_OP_GDS_CMP_XCHG_RET},
1470 {DS_OP_CMP_XCHG_SPF_RET, FETCH_OP_GDS_CMP_XCHG_SPF_RET},
1471 {DS_OP_READ_RET, FETCH_OP_GDS_READ_RET},
1472 {DS_OP_READ_REL_RET, FETCH_OP_GDS_READ_REL_RET},
1473 {DS_OP_READ2_RET, FETCH_OP_GDS_READ2_RET},
1474 {DS_OP_READWRITE_RET, FETCH_OP_GDS_READWRITE_RET},
1475 {DS_OP_BYTE_READ_RET, FETCH_OP_GDS_BYTE_READ_RET},
1476 {DS_OP_UBYTE_READ_RET, FETCH_OP_GDS_UBYTE_READ_RET},
1477 {DS_OP_SHORT_READ_RET, FETCH_OP_GDS_SHORT_READ_RET},
1478 {DS_OP_USHORT_READ_RET, FETCH_OP_GDS_USHORT_READ_RET},
1479 {DS_OP_ATOMIC_ORDERED_ALLOC_RET, FETCH_OP_GDS_ATOMIC_ORDERED_ALLOC},
1480 {DS_OP_INVALID, 0},
1481 };
1482
1483 }