3a057784730babdc838249732b44e168c7c109e0
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_shader_base.cpp
1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "../r600_pipe.h"
28 #include "../r600_shader.h"
29 #include "sfn_shader_vertex.h"
30
31 #include "sfn_shader_compute.h"
32 #include "sfn_shader_fragment.h"
33 #include "sfn_shader_geometry.h"
34 #include "sfn_liverange.h"
35 #include "sfn_ir_to_assembly.h"
36 #include "sfn_nir.h"
37 #include "sfn_instruction_misc.h"
38 #include "sfn_instruction_fetch.h"
39 #include "sfn_instruction_lds.h"
40
41 #include <iostream>
42
43 #define ENABLE_DEBUG 1
44
45 #ifdef ENABLE_DEBUG
46 #define DEBUG_SFN(X) \
47 do {\
48 X; \
49 } while (0)
50 #else
51 #define DEBUG_SFN(X)
52 #endif
53
54 namespace r600 {
55
56 using namespace std;
57
58
59 ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype,
60 r600_pipe_shader_selector& sel,
61 r600_shader &sh_info, int scratch_size,
62 enum chip_class chip_class,
63 int atomic_base):
64 m_processor_type(ptype),
65 m_nesting_depth(0),
66 m_block_number(0),
67 m_export_output(0, -1),
68 m_sh_info(sh_info),
69 m_chip_class(chip_class),
70 m_tex_instr(*this),
71 m_alu_instr(*this),
72 m_ssbo_instr(*this),
73 m_pending_else(nullptr),
74 m_scratch_size(scratch_size),
75 m_next_hwatomic_loc(0),
76 m_sel(sel),
77 m_atomic_base(atomic_base),
78 m_image_count(0)
79
80 {
81 m_sh_info.processor_type = ptype;
82
83 }
84
85
86 ShaderFromNirProcessor::~ShaderFromNirProcessor()
87 {
88 }
89
90 bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr)
91 {
92 switch (instr->type) {
93 case nir_instr_type_tex: {
94 nir_tex_instr *t = nir_instr_as_tex(instr);
95 if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF)
96 sh_info().uses_tex_buffers = true;
97 if (t->op == nir_texop_txs &&
98 t->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
99 t->is_array)
100 sh_info().has_txq_cube_array_z_comp = true;
101 break;
102 }
103 case nir_instr_type_intrinsic: {
104 auto *i = nir_instr_as_intrinsic(instr);
105 switch (i->intrinsic) {
106 case nir_intrinsic_ssbo_atomic_add:
107 case nir_intrinsic_image_atomic_add:
108 case nir_intrinsic_ssbo_atomic_and:
109 case nir_intrinsic_image_atomic_and:
110 case nir_intrinsic_ssbo_atomic_or:
111 case nir_intrinsic_image_atomic_or:
112 case nir_intrinsic_ssbo_atomic_imin:
113 case nir_intrinsic_image_atomic_imin:
114 case nir_intrinsic_ssbo_atomic_imax:
115 case nir_intrinsic_image_atomic_imax:
116 case nir_intrinsic_ssbo_atomic_umin:
117 case nir_intrinsic_image_atomic_umin:
118 case nir_intrinsic_ssbo_atomic_umax:
119 case nir_intrinsic_image_atomic_umax:
120 case nir_intrinsic_image_atomic_xor:
121 case nir_intrinsic_image_atomic_exchange:
122 case nir_intrinsic_image_atomic_comp_swap:
123 m_sel.info.writes_memory = 1;
124 /* fallthrough */
125 case nir_intrinsic_image_load:
126 m_ssbo_instr.set_require_rat_return_address();
127 break;
128 case nir_intrinsic_image_size: {
129 if (nir_intrinsic_image_dim(i) == GLSL_SAMPLER_DIM_CUBE &&
130 nir_intrinsic_image_array(i) && nir_dest_num_components(i->dest) > 2)
131 sh_info().has_txq_cube_array_z_comp = true;
132 }
133
134
135
136 default:
137 ;
138 }
139
140
141 }
142 default:
143 ;
144 }
145
146 return scan_sysvalue_access(instr);
147 }
148
149 enum chip_class ShaderFromNirProcessor::get_chip_class(void) const
150 {
151 return m_chip_class;
152 }
153
154 bool ShaderFromNirProcessor::allocate_reserved_registers()
155 {
156 bool retval = do_allocate_reserved_registers();
157 m_ssbo_instr.load_rat_return_address();
158 if (sh_info().uses_atomics)
159 m_ssbo_instr.load_atomic_inc_limits();
160 m_ssbo_instr.set_ssbo_offset(m_image_count);
161 return retval;
162 }
163
164 static void remap_shader_info(r600_shader& sh_info,
165 std::vector<rename_reg_pair>& map,
166 UNUSED ValueMap& values)
167 {
168 for (unsigned i = 0; i < sh_info.ninput; ++i) {
169 sfn_log << SfnLog::merge << "Input " << i << " gpr:" << sh_info.input[i].gpr
170 << " of map.size()\n";
171
172 assert(sh_info.input[i].gpr < map.size());
173 auto new_index = map[sh_info.input[i].gpr];
174 if (new_index.valid)
175 sh_info.input[i].gpr = new_index.new_reg;
176 map[sh_info.input[i].gpr].used = true;
177 }
178
179 for (unsigned i = 0; i < sh_info.noutput; ++i) {
180 assert(sh_info.output[i].gpr < map.size());
181 auto new_index = map[sh_info.output[i].gpr];
182 if (new_index.valid)
183 sh_info.output[i].gpr = new_index.new_reg;
184 map[sh_info.output[i].gpr].used = true;
185 }
186 }
187
188 void ShaderFromNirProcessor::remap_registers()
189 {
190 // register renumbering
191 auto rc = register_count();
192 if (!rc)
193 return;
194
195 std::vector<register_live_range> register_live_ranges(rc);
196
197 auto temp_register_map = get_temp_registers();
198
199 Shader sh{m_output, temp_register_map};
200 LiverangeEvaluator().run(sh, register_live_ranges);
201 auto register_map = get_temp_registers_remapping(register_live_ranges);
202
203 sfn_log << SfnLog::merge << "=========Mapping===========\n";
204 for (size_t i = 0; i < register_map.size(); ++i)
205 if (register_map[i].valid)
206 sfn_log << SfnLog::merge << "Map:" << i << " -> " << register_map[i].new_reg << "\n";
207
208 ValueRemapper vmap0(register_map, temp_register_map);
209 for (auto& block: m_output)
210 block.remap_registers(vmap0);
211
212 remap_shader_info(m_sh_info, register_map, temp_register_map);
213
214 /* Mark inputs as used registers, these registers should no be remapped */
215 for (auto& v: sh.m_temp) {
216 if (v.second->type() == Value::gpr) {
217 const auto& g = static_cast<const GPRValue&>(*v.second);
218 if (g.is_input())
219 register_map[g.sel()].used = true;
220 }
221 }
222
223 int new_index = 0;
224 for (auto& i : register_map) {
225 i.valid = i.used;
226 if (i.used)
227 i.new_reg = new_index++;
228 }
229
230 ValueRemapper vmap1(register_map, temp_register_map);
231 for (auto& ir: m_output)
232 ir.remap_registers(vmap1);
233
234 remap_shader_info(m_sh_info, register_map, temp_register_map);
235 }
236
237 bool ShaderFromNirProcessor::process_uniforms(nir_variable *uniform)
238 {
239 // m_uniform_type_map
240 m_uniform_type_map[uniform->data.location] = uniform->type;
241
242 if (uniform->type->contains_atomic()) {
243 int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE;
244 sh_info().nhwatomic += natomics;
245
246 if (uniform->type->is_array())
247 sh_info().indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
248
249 sh_info().uses_atomics = 1;
250
251 struct r600_shader_atomic& atom = sh_info().atomics[sh_info().nhwatomic_ranges];
252 ++sh_info().nhwatomic_ranges;
253 atom.buffer_id = uniform->data.binding;
254 atom.hw_idx = m_atomic_base + m_next_hwatomic_loc;
255 atom.start = m_next_hwatomic_loc;
256 atom.end = atom.start + natomics - 1;
257 m_next_hwatomic_loc = atom.end + 1;
258 //atom.array_id = uniform->type->is_array() ? 1 : 0;
259
260 m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] += atom.end - atom.start + 1;
261
262 sfn_log << SfnLog::io << "HW_ATOMIC file count: "
263 << m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] << "\n";
264 }
265
266 if (uniform->type->is_image() || uniform->data.mode == nir_var_mem_ssbo) {
267 sh_info().uses_images = 1;
268 }
269
270 if (uniform->type->is_image()) {
271 ++m_image_count;
272 }
273
274 return true;
275 }
276
277 bool ShaderFromNirProcessor::process_inputs(nir_variable *input)
278 {
279 return do_process_inputs(input);
280 }
281
282 bool ShaderFromNirProcessor::process_outputs(nir_variable *output)
283 {
284 return do_process_outputs(output);
285 }
286
287 void ShaderFromNirProcessor::add_array_deref(nir_deref_instr *instr)
288 {
289 nir_variable *var = nir_deref_instr_get_variable(instr);
290
291 assert(instr->mode == nir_var_function_temp);
292 assert(glsl_type_is_array(var->type));
293
294 // add an alias for the index to the register(s);
295
296
297 }
298
299 void ShaderFromNirProcessor::set_var_address(nir_deref_instr *instr)
300 {
301 auto& dest = instr->dest;
302 unsigned index = dest.is_ssa ? dest.ssa.index : dest.reg.reg->index;
303 m_var_mode[instr->var] = instr->mode;
304 m_var_derefs[index] = instr->var;
305
306 sfn_log << SfnLog::io << "Add var deref:" << index
307 << " with DDL:" << instr->var->data.driver_location << "\n";
308 }
309
310 void ShaderFromNirProcessor::evaluate_spi_sid(r600_shader_io& io)
311 {
312 switch (io.name) {
313 case TGSI_SEMANTIC_POSITION:
314 case TGSI_SEMANTIC_PSIZE:
315 case TGSI_SEMANTIC_EDGEFLAG:
316 case TGSI_SEMANTIC_FACE:
317 case TGSI_SEMANTIC_SAMPLEMASK:
318 case TGSI_SEMANTIC_CLIPVERTEX:
319 io.spi_sid = 0;
320 break;
321 case TGSI_SEMANTIC_GENERIC:
322 case TGSI_SEMANTIC_TEXCOORD:
323 case TGSI_SEMANTIC_PCOORD:
324 io.spi_sid = io.sid + 1;
325 break;
326 default:
327 /* For non-generic params - pack name and sid into 8 bits */
328 io.spi_sid = (0x80 | (io.name << 3) | io.sid) + 1;
329 }
330 }
331
332 const nir_variable *ShaderFromNirProcessor::get_deref_location(const nir_src& src) const
333 {
334 unsigned index = src.is_ssa ? src.ssa->index : src.reg.reg->index;
335
336 sfn_log << SfnLog::io << "Search for deref:" << index << "\n";
337
338 auto v = m_var_derefs.find(index);
339 if (v != m_var_derefs.end())
340 return v->second;
341
342 fprintf(stderr, "R600: could not find deref with index %d\n", index);
343
344 return nullptr;
345
346 /*nir_deref_instr *deref = nir_instr_as_deref(src.ssa->parent_instr);
347 return nir_deref_instr_get_variable(deref); */
348 }
349
350 bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr)
351 {
352 return m_tex_instr.emit(instr);
353 }
354
355 void ShaderFromNirProcessor::emit_instruction(Instruction *ir)
356 {
357 if (m_pending_else) {
358 append_block(-1);
359 m_output.back().emit(PInstruction(m_pending_else));
360 append_block(1);
361 m_pending_else = nullptr;
362 }
363
364 r600::sfn_log << SfnLog::instr << " as '" << *ir << "'\n";
365 if (m_output.empty())
366 append_block(0);
367
368 m_output.back().emit(Instruction::Pointer(ir));
369 }
370
371 void ShaderFromNirProcessor::emit_shader_start()
372 {
373 /* placeholder, may become an abstract method */
374 }
375
376 bool ShaderFromNirProcessor::emit_jump_instruction(nir_jump_instr *instr)
377 {
378 switch (instr->type) {
379 case nir_jump_break: {
380 auto b = new LoopBreakInstruction();
381 emit_instruction(b);
382 return true;
383 }
384 case nir_jump_continue: {
385 auto b = new LoopContInstruction();
386 emit_instruction(b);
387 return true;
388 }
389 default: {
390 nir_instr *i = reinterpret_cast<nir_instr*>(instr);
391 sfn_log << SfnLog::err << "Jump instrunction " << *i << " not supported\n";
392 return false;
393 }
394 }
395 return true;
396 }
397
398 bool ShaderFromNirProcessor::emit_alu_instruction(nir_instr* instr)
399 {
400 return m_alu_instr.emit(instr);
401 }
402
403 bool ShaderFromNirProcessor::emit_deref_instruction_override(UNUSED nir_deref_instr* instr)
404 {
405 return false;
406 }
407
408 bool ShaderFromNirProcessor::emit_loop_start(int loop_id)
409 {
410 LoopBeginInstruction *loop = new LoopBeginInstruction();
411 emit_instruction(loop);
412 m_loop_begin_block_map[loop_id] = loop;
413 append_block(1);
414 return true;
415 }
416 bool ShaderFromNirProcessor::emit_loop_end(int loop_id)
417 {
418 auto start = m_loop_begin_block_map.find(loop_id);
419 if (start == m_loop_begin_block_map.end()) {
420 sfn_log << SfnLog::err << "End loop: Loop start for "
421 << loop_id << " not found\n";
422 return false;
423 }
424 m_nesting_depth--;
425 m_block_number++;
426 m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number));
427 LoopEndInstruction *loop = new LoopEndInstruction(start->second);
428 emit_instruction(loop);
429
430 m_loop_begin_block_map.erase(start);
431 return true;
432 }
433
434 bool ShaderFromNirProcessor::emit_if_start(int if_id, nir_if *if_stmt)
435 {
436
437 auto value = from_nir(if_stmt->condition, 0, 0);
438 AluInstruction *pred = new AluInstruction(op2_pred_setne_int, PValue(new GPRValue(0,0)),
439 value, Value::zero, EmitInstruction::last);
440 pred->set_flag(alu_update_exec);
441 pred->set_flag(alu_update_pred);
442 pred->set_cf_type(cf_alu_push_before);
443
444 append_block(1);
445
446 IfInstruction *ir = new IfInstruction(pred);
447 emit_instruction(ir);
448 assert(m_if_block_start_map.find(if_id) == m_if_block_start_map.end());
449 m_if_block_start_map[if_id] = ir;
450 return true;
451 }
452
453 bool ShaderFromNirProcessor::emit_else_start(int if_id)
454 {
455 auto iif = m_if_block_start_map.find(if_id);
456 if (iif == m_if_block_start_map.end()) {
457 std::cerr << "Error: ELSE branch " << if_id << " without starting conditional branch\n";
458 return false;
459 }
460
461 if (iif->second->type() != Instruction::cond_if) {
462 std::cerr << "Error: ELSE branch " << if_id << " not started by an IF branch\n";
463 return false;
464 }
465 IfInstruction *if_instr = static_cast<IfInstruction *>(iif->second);
466 ElseInstruction *ir = new ElseInstruction(if_instr);
467 m_if_block_start_map[if_id] = ir;
468 m_pending_else = ir;
469
470 return true;
471 }
472
473 bool ShaderFromNirProcessor::emit_ifelse_end(int if_id)
474 {
475 auto ifelse = m_if_block_start_map.find(if_id);
476 if (ifelse == m_if_block_start_map.end()) {
477 std::cerr << "Error: ENDIF " << if_id << " without THEN or ELSE branch\n";
478 return false;
479 }
480
481 if (ifelse->second->type() != Instruction::cond_if &&
482 ifelse->second->type() != Instruction::cond_else) {
483 std::cerr << "Error: ENDIF " << if_id << " doesn't close an IF or ELSE branch\n";
484 return false;
485 }
486 /* Clear pending else, if the else branch was empty, non will be emitted */
487
488 m_pending_else = nullptr;
489
490 append_block(-1);
491 IfElseEndInstruction *ir = new IfElseEndInstruction();
492 emit_instruction(ir);
493
494 return true;
495 }
496
497 bool ShaderFromNirProcessor::emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset)
498 {
499 PValue src = get_temp_register();
500 emit_instruction(new AluInstruction(op1_mov, src, Value::zero, {alu_write, alu_last_instr}));
501
502 GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest));
503 emit_instruction(new FetchTCSIOParam(dest, src, offset));
504
505 return true;
506
507 }
508
509 bool ShaderFromNirProcessor::emit_load_local_shared(nir_intrinsic_instr* instr)
510 {
511 auto address = varvec_from_nir(instr->src[0], instr->num_components);
512 auto dest_value = varvec_from_nir(instr->dest, instr->num_components);
513
514 emit_instruction(new LDSReadInstruction(address, dest_value));
515 return true;
516 }
517
518 static unsigned
519 lds_op_from_intrinsic(nir_intrinsic_op op) {
520 switch (op) {
521 case nir_intrinsic_shared_atomic_add:
522 return LDS_OP2_LDS_ADD_RET;
523 case nir_intrinsic_shared_atomic_and:
524 return LDS_OP2_LDS_AND_RET;
525 case nir_intrinsic_shared_atomic_or:
526 return LDS_OP2_LDS_OR_RET;
527 case nir_intrinsic_shared_atomic_imax:
528 return LDS_OP2_LDS_MAX_INT_RET;
529 case nir_intrinsic_shared_atomic_umax:
530 return LDS_OP2_LDS_MAX_UINT_RET;
531 case nir_intrinsic_shared_atomic_imin:
532 return LDS_OP2_LDS_MIN_INT_RET;
533 case nir_intrinsic_shared_atomic_umin:
534 return LDS_OP2_LDS_MIN_UINT_RET;
535 case nir_intrinsic_shared_atomic_xor:
536 return LDS_OP2_LDS_XOR_RET;
537 case nir_intrinsic_shared_atomic_exchange:
538 return LDS_OP2_LDS_XCHG_RET;
539 case nir_intrinsic_shared_atomic_comp_swap:
540 return LDS_OP3_LDS_CMP_XCHG_RET;
541 default:
542 unreachable("Unsupported shared atomic opcode");
543 }
544 }
545
546 bool ShaderFromNirProcessor::emit_atomic_local_shared(nir_intrinsic_instr* instr)
547 {
548 auto address = from_nir(instr->src[0], 0);
549 auto dest_value = from_nir(instr->dest, 0);
550 auto value = from_nir(instr->src[1], 0);
551 auto op = lds_op_from_intrinsic(instr->intrinsic);
552
553 if (unlikely(instr->intrinsic ==nir_intrinsic_shared_atomic_comp_swap)) {
554 auto value2 = from_nir(instr->src[2], 0);
555 emit_instruction(new LDSAtomicInstruction(dest_value, value, value2, address, op));
556 } else {
557 emit_instruction(new LDSAtomicInstruction(dest_value, value, address, op));
558 }
559 return true;
560 }
561
562
563 bool ShaderFromNirProcessor::emit_store_local_shared(nir_intrinsic_instr* instr)
564 {
565 unsigned write_mask = nir_intrinsic_write_mask(instr);
566
567 auto address = from_nir(instr->src[1], 0);
568 int swizzle_base = (write_mask & 0x3) ? 0 : 2;
569 write_mask |= write_mask >> 2;
570
571 auto value = from_nir(instr->src[0], swizzle_base);
572 if (!(write_mask & 2)) {
573 emit_instruction(new LDSWriteInstruction(address, 0, value));
574 } else {
575 auto value1 = from_nir(instr->src[0], swizzle_base + 1);
576 emit_instruction(new LDSWriteInstruction(address, 0, value, value1));
577 }
578
579 return true;
580 }
581
582 bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* instr)
583 {
584 r600::sfn_log << SfnLog::instr << "emit '"
585 << *reinterpret_cast<nir_instr*>(instr)
586 << "' (" << __func__ << ")\n";
587
588 if (emit_intrinsic_instruction_override(instr))
589 return true;
590
591 if (m_ssbo_instr.emit(&instr->instr)) {
592 m_sel.info.writes_memory = true;
593 return true;
594 }
595
596 switch (instr->intrinsic) {
597 case nir_intrinsic_load_deref: {
598 auto var = get_deref_location(instr->src[0]);
599 if (!var)
600 return false;
601 auto mode_helper = m_var_mode.find(var);
602 if (mode_helper == m_var_mode.end()) {
603 cerr << "r600-nir: variable '" << var->name << "' not found\n";
604 return false;
605 }
606 switch (mode_helper->second) {
607 case nir_var_shader_in:
608 return emit_load_input_deref(var, instr);
609 case nir_var_function_temp:
610 return emit_load_function_temp(var, instr);
611 default:
612 cerr << "r600-nir: Unsupported mode" << mode_helper->second
613 << "for src variable\n";
614 return false;
615 }
616 }
617 case nir_intrinsic_store_scratch:
618 return emit_store_scratch(instr);
619 case nir_intrinsic_load_scratch:
620 return emit_load_scratch(instr);
621 case nir_intrinsic_store_deref:
622 return emit_store_deref(instr);
623 case nir_intrinsic_load_uniform:
624 return reserve_uniform(instr);
625 case nir_intrinsic_discard:
626 case nir_intrinsic_discard_if:
627 return emit_discard_if(instr);
628 case nir_intrinsic_load_ubo_r600:
629 return emit_load_ubo(instr);
630 case nir_intrinsic_load_tcs_in_param_base_r600:
631 return emit_load_tcs_param_base(instr, 0);
632 case nir_intrinsic_load_tcs_out_param_base_r600:
633 return emit_load_tcs_param_base(instr, 16);
634 case nir_intrinsic_load_local_shared_r600:
635 case nir_intrinsic_load_shared:
636 return emit_load_local_shared(instr);
637 case nir_intrinsic_store_local_shared_r600:
638 case nir_intrinsic_store_shared:
639 return emit_store_local_shared(instr);
640 case nir_intrinsic_control_barrier:
641 case nir_intrinsic_memory_barrier_tcs_patch:
642 case nir_intrinsic_memory_barrier_shared:
643 case nir_intrinsic_memory_barrier:
644 return emit_barrier(instr);
645 case nir_intrinsic_shared_atomic_add:
646 case nir_intrinsic_shared_atomic_and:
647 case nir_intrinsic_shared_atomic_or:
648 case nir_intrinsic_shared_atomic_imax:
649 case nir_intrinsic_shared_atomic_umax:
650 case nir_intrinsic_shared_atomic_imin:
651 case nir_intrinsic_shared_atomic_umin:
652 case nir_intrinsic_shared_atomic_xor:
653 case nir_intrinsic_shared_atomic_exchange:
654 case nir_intrinsic_shared_atomic_comp_swap:
655 return emit_atomic_local_shared(instr);
656 case nir_intrinsic_copy_deref:
657 case nir_intrinsic_load_constant:
658 case nir_intrinsic_load_input:
659 case nir_intrinsic_store_output:
660
661 default:
662 fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic);
663 return false;
664 }
665 return false;
666 }
667
668 bool ShaderFromNirProcessor::emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr* instr)
669 {
670 return false;
671 }
672
673 bool
674 ShaderFromNirProcessor::emit_load_function_temp(UNUSED const nir_variable *var, UNUSED nir_intrinsic_instr *instr)
675 {
676 return false;
677 }
678
679 bool ShaderFromNirProcessor::emit_barrier(UNUSED nir_intrinsic_instr* instr)
680 {
681 AluInstruction *ir = new AluInstruction(op0_group_barrier);
682 ir->set_flag(alu_last_instr);
683 emit_instruction(ir);
684 return true;
685 }
686
687
688 bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest& dest, int chan, PValue value, bool as_last)
689 {
690 if (!dest.is_ssa) {
691 auto ir = new AluInstruction(op1_mov, from_nir(dest, 0), value, {alu_write});
692 if (as_last)
693 ir->set_flag(alu_last_instr);
694 emit_instruction(ir);
695 } else {
696 inject_register(dest.ssa.index, chan, value, true);
697 }
698 return true;
699 }
700
701 bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr* instr)
702 {
703 PValue address = from_nir(instr->src[1], 0, 0);
704
705 auto value = vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1,
706 swizzle_from_comps(instr->num_components));
707
708 int writemask = nir_intrinsic_write_mask(instr);
709 int align = nir_intrinsic_align_mul(instr);
710 int align_offset = nir_intrinsic_align_offset(instr);
711
712 WriteScratchInstruction *ir = nullptr;
713 if (address->type() == Value::literal) {
714 const auto& lv = static_cast<const LiteralValue&>(*address);
715 ir = new WriteScratchInstruction(lv.value(), value, align, align_offset, writemask);
716 } else {
717 address = from_nir_with_fetch_constant(instr->src[1], 0);
718 ir = new WriteScratchInstruction(address, value, align, align_offset,
719 writemask, m_scratch_size);
720 }
721 emit_instruction(ir);
722 sh_info().needs_scratch_space = 1;
723 return true;
724 }
725
726 bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr* instr)
727 {
728 PValue address = from_nir_with_fetch_constant(instr->src[0], 0);
729 std::array<PValue, 4> dst_val;
730 for (int i = 0; i < 4; ++i)
731 dst_val[i] = from_nir(instr->dest, i < instr->num_components ? i : 7);
732
733 GPRVector dst(dst_val);
734 auto ir = new LoadFromScratch(dst, address, m_scratch_size);
735 ir->prelude_append(new WaitAck(0));
736 emit_instruction(ir);
737 sh_info().needs_scratch_space = 1;
738 return true;
739 }
740
741 GPRVector ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src,
742 unsigned mask,
743 const GPRVector::Swizzle& swizzle,
744 bool match)
745 {
746 bool use_same = true;
747 GPRVector::Values v;
748
749 for (int i = 0; i < 4 && use_same; ++i) {
750 if ((1 << i) & mask) {
751 if (swizzle[i] < 4) {
752 v[i] = from_nir(src, swizzle[i]);
753 assert(v[i]);
754 if (v[i]->type() != Value::gpr)
755 use_same = false;
756 if (match && (v[i]->chan() != swizzle[i]))
757 use_same = false;
758 }
759 }
760 }
761
762 if (use_same) {
763 int i = 0;
764 while (!v[i] && i < 4) ++i;
765 assert(i < 4);
766
767 unsigned sel = v[i]->sel();
768 for (i = 0; i < 4 && use_same; ++i) {
769 if (!v[i])
770 v[i] = PValue(new GPRValue(sel, swizzle[i]));
771 else
772 use_same &= v[i]->sel() == sel;
773 }
774 }
775
776 if (!use_same) {
777 AluInstruction *ir = nullptr;
778 int sel = allocate_temp_register();
779 for (int i = 0; i < 4; ++i) {
780 v[i] = PValue(new GPRValue(sel, swizzle[i]));
781 if (swizzle[i] < 4 && (mask & (1 << i))) {
782 ir = new AluInstruction(op1_mov, v[i], from_nir(src, swizzle[i]),
783 EmitInstruction::write);
784 emit_instruction(ir);
785 }
786 }
787 if (ir)
788 ir->set_flag(alu_last_instr);
789 }
790 return GPRVector(v);;
791 }
792
793 bool ShaderFromNirProcessor::emit_load_ubo(nir_intrinsic_instr* instr)
794 {
795 nir_src& src0 = instr->src[0];
796 nir_src& src1 = instr->src[1];
797
798 int sel_bufid_reg = src0.is_ssa ? src0.ssa->index : src0.reg.reg->index;
799 const nir_load_const_instr* literal0 = get_literal_constant(sel_bufid_reg);
800
801 int ofs_reg = src1.is_ssa ? src1.ssa->index : src1.reg.reg->index;
802 const nir_load_const_instr* literal1 = get_literal_constant(ofs_reg);
803 if (literal0) {
804 if (literal1) {
805 uint bufid = literal0->value[0].u32;
806 uint buf_ofs = literal1->value[0].u32 >> 4;
807 int buf_cmp = ((literal1->value[0].u32 >> 2) & 3);
808 AluInstruction *ir = nullptr;
809 for (int i = 0; i < instr->num_components; ++i) {
810 int cmp = buf_cmp + i;
811 assert(cmp < 4);
812 auto u = PValue(new UniformValue(512 + buf_ofs, cmp, bufid + 1));
813 if (instr->dest.is_ssa)
814 add_uniform((instr->dest.ssa.index << 2) + i, u);
815 else {
816 ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
817 emit_instruction(ir);
818 }
819 }
820 if (ir)
821 ir->set_flag(alu_last_instr);
822 return true;
823
824 } else {
825 /* literal0 is lost ...*/
826 return load_uniform_indirect(instr, from_nir(instr->src[1], 0, 0), 0, literal0->value[0].u32 + 1);
827 }
828 } else {
829 /* TODO: This can also be solved by using the CF indes on the ALU block, and
830 * this would probably make sense when there are more then one loads with
831 * the same buffer ID. */
832 PValue bufid = from_nir(instr->src[0], 0, 0);
833 PValue addr = from_nir_with_fetch_constant(instr->src[1], 0);
834 GPRVector trgt;
835 for (int i = 0; i < 4; ++i)
836 trgt.set_reg_i(i, from_nir(instr->dest, i));
837
838 auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
839 1, bufid, bim_zero);
840
841 emit_instruction(ir);
842 for (int i = 0; i < instr->num_components ; ++i) {
843 add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
844 }
845 m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
846 return true;
847 }
848
849 }
850
851 bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr)
852 {
853 r600::sfn_log << SfnLog::instr << "emit '"
854 << *reinterpret_cast<nir_instr*>(instr)
855 << "' (" << __func__ << ")\n";
856
857 if (instr->intrinsic == nir_intrinsic_discard_if) {
858 emit_instruction(new AluInstruction(op2_killne_int, PValue(new GPRValue(0,0)),
859 {from_nir(instr->src[0], 0, 0), Value::zero}, {alu_last_instr}));
860
861 } else {
862 emit_instruction(new AluInstruction(op2_kille, PValue(new GPRValue(0,0)),
863 {Value::zero, Value::zero}, {alu_last_instr}));
864 }
865 m_sh_info.uses_kill = 1;
866 return true;
867 }
868
869 bool ShaderFromNirProcessor::emit_load_input_deref(const nir_variable *var,
870 nir_intrinsic_instr* instr)
871 {
872 return do_emit_load_deref(var, instr);
873 }
874
875 bool ShaderFromNirProcessor::reserve_uniform(nir_intrinsic_instr* instr)
876 {
877 r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
878 << *reinterpret_cast<nir_instr*>(instr)
879 << "'\n";
880
881
882 /* If the target register is a SSA register and the loading is not
883 * indirect then we can do lazy loading, i.e. the uniform value can
884 * be used directly. Otherwise we have to load the data for real
885 * rigt away.
886 */
887
888 /* Try to find the literal that defines the array index */
889 const nir_load_const_instr* literal = nullptr;
890 if (instr->src[0].is_ssa)
891 literal = get_literal_constant(instr->src[0].ssa->index);
892
893 int base = nir_intrinsic_base(instr);
894 if (literal) {
895 AluInstruction *ir = nullptr;
896
897 for (int i = 0; i < instr->num_components ; ++i) {
898 PValue u = PValue(new UniformValue(512 + literal->value[0].u32 + base, i));
899 sfn_log << SfnLog::io << "uniform "
900 << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
901
902 if (instr->dest.is_ssa)
903 add_uniform((instr->dest.ssa.index << 2) + i, u);
904 else {
905 ir = new AluInstruction(op1_mov, from_nir(instr->dest, i),
906 u, {alu_write});
907 emit_instruction(ir);
908 }
909 }
910 if (ir)
911 ir->set_flag(alu_last_instr);
912 } else {
913 PValue addr = from_nir(instr->src[0], 0, 0);
914 return load_uniform_indirect(instr, addr, 16 * base, 0);
915 }
916 return true;
917 }
918
919 bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid)
920 {
921 if (!addr) {
922 std::cerr << "r600-nir: don't know how uniform is addressed\n";
923 return false;
924 }
925
926 GPRVector trgt;
927 for (int i = 0; i < 4; ++i)
928 trgt.set_reg_i(i, from_nir(instr->dest, i));
929
930 if (addr->type() != Value::gpr) {
931 emit_instruction(op1_mov, trgt.reg_i(0), {addr}, {alu_write, alu_last_instr});
932 addr = trgt.reg_i(0);
933 }
934
935 /* FIXME: buffer index and index mode are not set correctly */
936 auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest,
937 bufferid, PValue(), bim_none);
938 emit_instruction(ir);
939 for (int i = 0; i < instr->num_components ; ++i) {
940 add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
941 }
942 m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
943 return true;
944 }
945
946 AluInstruction *ShaderFromNirProcessor::emit_load_literal(const nir_load_const_instr * literal, const nir_src& src, unsigned writemask)
947 {
948 AluInstruction *ir = nullptr;
949 for (int i = 0; i < literal->def.num_components ; ++i) {
950 if (writemask & (1 << i)){
951 PValue lsrc;
952 switch (literal->def.bit_size) {
953
954 case 1:
955 sfn_log << SfnLog::reg << "Got literal of bit size 1\n";
956 lsrc = literal->value[i].b ?
957 PValue(new LiteralValue( 0xffffffff, i)) :
958 Value::zero;
959 break;
960 case 32:
961 sfn_log << SfnLog::reg << "Got literal of bit size 32\n";
962 if (literal->value[i].u32 == 0)
963 lsrc = Value::zero;
964 else if (literal->value[i].u32 == 1)
965 lsrc = Value::one_i;
966 else if (literal->value[i].f32 == 1.0f)
967 lsrc = Value::one_f;
968 else if (literal->value[i].f32 == 0.5f)
969 lsrc = Value::zero_dot_5;
970 else
971 lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
972 break;
973 default:
974 sfn_log << SfnLog::reg << "Got literal of bit size " << literal->def.bit_size
975 << " falling back to 32 bit\n";
976 lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
977 }
978 ir = new AluInstruction(op1_mov, create_register_from_nir_src(src, i), lsrc, EmitInstruction::write);
979
980 emit_instruction(ir);
981 }
982 }
983 return ir;
984 }
985
986 PValue ShaderFromNirProcessor::from_nir_with_fetch_constant(const nir_src& src, unsigned component)
987 {
988 PValue value = from_nir(src, component);
989 if (value->type() != Value::gpr &&
990 value->type() != Value::gpr_vector &&
991 value->type() != Value::gpr_array_value) {
992 PValue retval = get_temp_register();
993 emit_instruction(new AluInstruction(op1_mov, retval, value,
994 EmitInstruction::last_write));
995 value = retval;
996 }
997 return value;
998 }
999
1000 bool ShaderFromNirProcessor::emit_store_deref(nir_intrinsic_instr* instr)
1001 {
1002 auto out_var = get_deref_location(instr->src[0]);
1003 if (!out_var)
1004 return false;
1005
1006 return do_emit_store_deref(out_var, instr);
1007 }
1008
1009 bool ShaderFromNirProcessor::emit_deref_instruction(nir_deref_instr* instr)
1010 {
1011 r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
1012 << *reinterpret_cast<nir_instr*>(instr)
1013 << "'\n";
1014
1015 /* Give the specific shader type a chance to process this, i.e. Geometry and
1016 * tesselation shaders need specialized deref_array, for the other shaders
1017 * it is lowered.
1018 */
1019 if (emit_deref_instruction_override(instr))
1020 return true;
1021
1022 switch (instr->deref_type) {
1023 case nir_deref_type_var:
1024 set_var_address(instr);
1025 return true;
1026 case nir_deref_type_array:
1027 case nir_deref_type_array_wildcard:
1028 case nir_deref_type_struct:
1029 case nir_deref_type_cast:
1030 default:
1031 fprintf(stderr, "R600: deref type %d not supported\n", instr->deref_type);
1032 }
1033 return false;
1034 }
1035
1036 void ShaderFromNirProcessor::load_uniform(const nir_alu_src &src)
1037 {
1038 AluInstruction *ir = nullptr;
1039 PValue sv[4];
1040
1041 assert(src.src.is_ssa);
1042
1043 for (int i = 0; i < src.src.ssa->num_components ; ++i) {
1044 unsigned uindex = (src.src.ssa->index << 2) + i;
1045 sv[i] = uniform(uindex);
1046 assert(sv[i]);
1047 }
1048
1049 for (int i = 0; i < src.src.ssa->num_components ; ++i) {
1050 ir = new AluInstruction(op1_mov, create_register_from_nir_src(src.src, i), sv[i],
1051 EmitInstruction::write);
1052 emit_instruction(ir);
1053 }
1054 if (ir)
1055 ir->set_flag(alu_last_instr);
1056 }
1057
1058
1059
1060 bool ShaderFromNirProcessor::emit_instruction(EAluOp opcode, PValue dest,
1061 std::vector<PValue> srcs,
1062 const std::set<AluModifiers>& m_flags)
1063 {
1064 AluInstruction *ir = new AluInstruction(opcode, dest, srcs, m_flags);
1065 emit_instruction(ir);
1066 return true;
1067 }
1068
1069 void ShaderFromNirProcessor::add_param_output_reg(int loc, const GPRVector *gpr)
1070 {
1071 m_output_register_map[loc] = gpr;
1072 }
1073
1074 void ShaderFromNirProcessor::emit_export_instruction(WriteoutInstruction *ir)
1075 {
1076 r600::sfn_log << SfnLog::instr << " as '" << *ir << "'\n";
1077 m_export_output.emit(PInstruction(ir));
1078 }
1079
1080 const GPRVector * ShaderFromNirProcessor::output_register(unsigned location) const
1081 {
1082 const GPRVector *retval = nullptr;
1083 auto val = m_output_register_map.find(location);
1084 if (val != m_output_register_map.end())
1085 retval = val->second;
1086 return retval;
1087 }
1088
1089 void ShaderFromNirProcessor::set_input(unsigned pos, PValue var)
1090 {
1091 r600::sfn_log << SfnLog::io << "Set input[" << pos << "] =" << *var << "\n";
1092 m_inputs[pos] = var;
1093 }
1094
1095 void ShaderFromNirProcessor::set_output(unsigned pos, int sel)
1096 {
1097 r600::sfn_log << SfnLog::io << "Set output[" << pos << "] =" << sel << "\n";
1098 m_outputs[pos] = sel;
1099 }
1100
1101 void ShaderFromNirProcessor::append_block(int nesting_change)
1102 {
1103 m_nesting_depth += nesting_change;
1104 m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number++));
1105 }
1106
1107 void ShaderFromNirProcessor::finalize()
1108 {
1109 do_finalize();
1110
1111 for (auto& i : m_inputs)
1112 m_sh_info.input[i.first].gpr = i.second->sel();
1113
1114 for (auto& i : m_outputs)
1115 m_sh_info.output[i.first].gpr = i.second;
1116
1117 m_output.push_back(m_export_output);
1118 }
1119
1120 }