r600/sfn: Add support for SSBO load and store
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_shader_base.cpp
1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "../r600_pipe.h"
28 #include "../r600_shader.h"
29 #include "sfn_shader_vertex.h"
30
31 #include "sfn_shader_compute.h"
32 #include "sfn_shader_fragment.h"
33 #include "sfn_shader_geometry.h"
34 #include "sfn_liverange.h"
35 #include "sfn_ir_to_assembly.h"
36 #include "sfn_nir.h"
37 #include "sfn_instruction_misc.h"
38 #include "sfn_instruction_fetch.h"
39
40 #include <iostream>
41
42 #define ENABLE_DEBUG 1
43
44 #ifdef ENABLE_DEBUG
45 #define DEBUG_SFN(X) \
46 do {\
47 X; \
48 } while (0)
49 #else
50 #define DEBUG_SFN(X)
51 #endif
52
53 namespace r600 {
54
55 using namespace std;
56
57
58 ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype,
59 r600_pipe_shader_selector& sel,
60 r600_shader &sh_info, int scratch_size):
61 m_processor_type(ptype),
62 m_sh_info(sh_info),
63 m_tex_instr(*this),
64 m_alu_instr(*this),
65 m_ssbo_instr(*this),
66 m_pending_else(nullptr),
67 m_scratch_size(scratch_size),
68 m_next_hwatomic_loc(0),
69 m_sel(sel)
70 {
71 m_sh_info.processor_type = ptype;
72 }
73
74
75 ShaderFromNirProcessor::~ShaderFromNirProcessor()
76 {
77 }
78
79 bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr)
80 {
81 switch (instr->type) {
82 case nir_instr_type_tex: {
83 nir_tex_instr *t = nir_instr_as_tex(instr);
84 if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF)
85 sh_info().uses_tex_buffers = true;
86 }
87 default:
88 ;
89 }
90
91 return scan_sysvalue_access(instr);
92 }
93
94 static void remap_shader_info(r600_shader& sh_info,
95 std::vector<rename_reg_pair>& map,
96 UNUSED ValueMap& values)
97 {
98 for (unsigned i = 0; i < sh_info.ninput; ++i) {
99 sfn_log << SfnLog::merge << "Input " << i << " gpr:" << sh_info.input[i].gpr
100 << " of map.size()\n";
101
102 assert(sh_info.input[i].gpr < map.size());
103 auto new_index = map[sh_info.input[i].gpr];
104 if (new_index.valid)
105 sh_info.input[i].gpr = new_index.new_reg;
106 map[sh_info.input[i].gpr].used = true;
107 }
108
109 for (unsigned i = 0; i < sh_info.noutput; ++i) {
110 assert(sh_info.output[i].gpr < map.size());
111 auto new_index = map[sh_info.output[i].gpr];
112 if (new_index.valid)
113 sh_info.output[i].gpr = new_index.new_reg;
114 map[sh_info.output[i].gpr].used = true;
115 }
116 }
117
118 void ShaderFromNirProcessor::remap_registers()
119 {
120 // register renumbering
121 auto rc = register_count();
122 if (!rc)
123 return;
124
125 std::vector<register_live_range> register_live_ranges(rc);
126
127 auto temp_register_map = get_temp_registers();
128
129 Shader sh{m_output, temp_register_map};
130 LiverangeEvaluator().run(sh, register_live_ranges);
131 auto register_map = get_temp_registers_remapping(register_live_ranges);
132
133 sfn_log << SfnLog::merge << "=========Mapping===========\n";
134 for (size_t i = 0; i < register_map.size(); ++i)
135 if (register_map[i].valid)
136 sfn_log << SfnLog::merge << "Map:" << i << " -> " << register_map[i].new_reg << "\n";
137
138
139 ValueRemapper vmap0(register_map, temp_register_map);
140 for (auto ir: m_output)
141 ir->remap_registers(vmap0);
142
143 remap_shader_info(m_sh_info, register_map, temp_register_map);
144
145 /* Mark inputs as used registers, these registers should no be remapped */
146 for (auto& v: sh.m_temp) {
147 if (v.second->type() == Value::gpr) {
148 const auto& g = static_cast<const GPRValue&>(*v.second);
149 if (g.is_input())
150 register_map[g.sel()].used = true;
151 }
152 }
153
154 int new_index = 0;
155 for (auto& i : register_map) {
156 i.valid = i.used;
157 if (i.used)
158 i.new_reg = new_index++;
159 }
160
161 ValueRemapper vmap1(register_map, temp_register_map);
162 for (auto ir: m_output)
163 ir->remap_registers(vmap1);
164
165 remap_shader_info(m_sh_info, register_map, temp_register_map);
166 }
167
168 bool ShaderFromNirProcessor::process_uniforms(nir_variable *uniform)
169 {
170 // m_uniform_type_map
171 m_uniform_type_map[uniform->data.location] = uniform->type;
172
173 if (uniform->type->contains_atomic()) {
174 int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE;
175 sh_info().nhwatomic += natomics;
176
177 if (uniform->type->is_array())
178 sh_info().indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
179
180 sh_info().uses_atomics = 1;
181
182 struct r600_shader_atomic& atom = sh_info().atomics[sh_info().nhwatomic_ranges];
183 ++sh_info().nhwatomic_ranges;
184 atom.buffer_id = uniform->data.binding;
185 atom.hw_idx = m_next_hwatomic_loc;
186 atom.start = m_next_hwatomic_loc;
187 atom.end = atom.start + natomics - 1;
188 m_next_hwatomic_loc = atom.end + 1;
189 //atom.array_id = uniform->type->is_array() ? 1 : 0;
190
191 m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] += atom.end - atom.start + 1;
192
193 sfn_log << SfnLog::io << "HW_ATOMIC file count: "
194 << m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] << "\n";
195 }
196
197 if (uniform->type->is_image() || uniform->data.mode == nir_var_mem_ssbo) {
198 sh_info().uses_images = 1;
199 }
200
201 return true;
202 }
203
204 bool ShaderFromNirProcessor::process_inputs(nir_variable *input)
205 {
206 return do_process_inputs(input);
207 }
208
209 bool ShaderFromNirProcessor::process_outputs(nir_variable *output)
210 {
211 return do_process_outputs(output);
212 }
213
214 void ShaderFromNirProcessor::add_array_deref(nir_deref_instr *instr)
215 {
216 nir_variable *var = nir_deref_instr_get_variable(instr);
217
218 assert(instr->mode == nir_var_function_temp);
219 assert(glsl_type_is_array(var->type));
220
221 // add an alias for the index to the register(s);
222
223
224 }
225
226 void ShaderFromNirProcessor::set_var_address(nir_deref_instr *instr)
227 {
228 auto& dest = instr->dest;
229 unsigned index = dest.is_ssa ? dest.ssa.index : dest.reg.reg->index;
230 m_var_mode[instr->var] = instr->mode;
231 m_var_derefs[index] = instr->var;
232
233 sfn_log << SfnLog::io << "Add var deref:" << index
234 << " with DDL:" << instr->var->data.driver_location << "\n";
235 }
236
237 void ShaderFromNirProcessor::evaluate_spi_sid(r600_shader_io& io)
238 {
239 switch (io.name) {
240 case TGSI_SEMANTIC_POSITION:
241 case TGSI_SEMANTIC_PSIZE:
242 case TGSI_SEMANTIC_EDGEFLAG:
243 case TGSI_SEMANTIC_FACE:
244 case TGSI_SEMANTIC_SAMPLEMASK:
245 case TGSI_SEMANTIC_CLIPVERTEX:
246 io.spi_sid = 0;
247 break;
248 case TGSI_SEMANTIC_GENERIC:
249 io.spi_sid = io.sid + 1;
250 break;
251 default:
252 /* For non-generic params - pack name and sid into 8 bits */
253 io.spi_sid = (0x80 | (io.name << 3) | io.sid) + 1;
254 }
255 }
256
257 const nir_variable *ShaderFromNirProcessor::get_deref_location(const nir_src& src) const
258 {
259 unsigned index = src.is_ssa ? src.ssa->index : src.reg.reg->index;
260
261 sfn_log << SfnLog::io << "Search for deref:" << index << "\n";
262
263 auto v = m_var_derefs.find(index);
264 if (v != m_var_derefs.end())
265 return v->second;
266
267 fprintf(stderr, "R600: could not find deref with index %d\n", index);
268
269 return nullptr;
270
271 /*nir_deref_instr *deref = nir_instr_as_deref(src.ssa->parent_instr);
272 return nir_deref_instr_get_variable(deref); */
273 }
274
275 bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr)
276 {
277 return m_tex_instr.emit(instr);
278 }
279
280 void ShaderFromNirProcessor::emit_instruction(Instruction *ir)
281 {
282 if (m_pending_else) {
283 m_output.push_back(PInstruction(m_pending_else));
284 m_pending_else = nullptr;
285 }
286
287 r600::sfn_log << SfnLog::instr << " as '" << *ir << "'\n";
288 m_output.push_back(Instruction::Pointer(ir));
289 }
290
291 void ShaderFromNirProcessor::emit_shader_start()
292 {
293 /* placeholder, may become an abstract method */
294 }
295
296 bool ShaderFromNirProcessor::emit_jump_instruction(nir_jump_instr *instr)
297 {
298 switch (instr->type) {
299 case nir_jump_break: {
300 auto b = new LoopBreakInstruction();
301 emit_instruction(b);
302 return true;
303 }
304 case nir_jump_continue: {
305 auto b = new LoopContInstruction();
306 emit_instruction(b);
307 return true;
308 }
309 default: {
310 nir_instr *i = reinterpret_cast<nir_instr*>(instr);
311 sfn_log << SfnLog::err << "Jump instrunction " << *i << " not supported\n";
312 return false;
313 }
314 }
315 return true;
316 }
317
318 bool ShaderFromNirProcessor::emit_alu_instruction(nir_instr* instr)
319 {
320 return m_alu_instr.emit(instr);
321 }
322
323 bool ShaderFromNirProcessor::emit_deref_instruction_override(UNUSED nir_deref_instr* instr)
324 {
325 return false;
326 }
327
328 bool ShaderFromNirProcessor::emit_loop_start(int loop_id)
329 {
330 LoopBeginInstruction *loop = new LoopBeginInstruction();
331 emit_instruction(loop);
332 m_loop_begin_block_map[loop_id] = loop;
333 return true;
334 }
335 bool ShaderFromNirProcessor::emit_loop_end(int loop_id)
336 {
337 auto start = m_loop_begin_block_map.find(loop_id);
338 if (start == m_loop_begin_block_map.end()) {
339 sfn_log << SfnLog::err << "End loop: Loop start for "
340 << loop_id << " not found\n";
341 return false;
342 }
343 LoopEndInstruction *loop = new LoopEndInstruction(start->second);
344 emit_instruction(loop);
345
346 m_loop_begin_block_map.erase(start);
347 return true;
348 }
349
350 bool ShaderFromNirProcessor::emit_if_start(int if_id, nir_if *if_stmt)
351 {
352
353 auto value = from_nir(if_stmt->condition, 0, 0);
354 AluInstruction *pred = new AluInstruction(op2_pred_setne_int, PValue(new GPRValue(0,0)),
355 value, Value::zero, EmitInstruction::last);
356 pred->set_flag(alu_update_exec);
357 pred->set_flag(alu_update_pred);
358 pred->set_cf_type(cf_alu_push_before);
359
360 IfInstruction *ir = new IfInstruction(pred);
361 emit_instruction(ir);
362 assert(m_if_block_start_map.find(if_id) == m_if_block_start_map.end());
363 m_if_block_start_map[if_id] = ir;
364 return true;
365 }
366
367 bool ShaderFromNirProcessor::emit_else_start(int if_id)
368 {
369 auto iif = m_if_block_start_map.find(if_id);
370 if (iif == m_if_block_start_map.end()) {
371 std::cerr << "Error: ELSE branch " << if_id << " without starting conditional branch\n";
372 return false;
373 }
374
375 if (iif->second->type() != Instruction::cond_if) {
376 std::cerr << "Error: ELSE branch " << if_id << " not started by an IF branch\n";
377 return false;
378 }
379 IfInstruction *if_instr = static_cast<IfInstruction *>(iif->second);
380 ElseInstruction *ir = new ElseInstruction(if_instr);
381 m_if_block_start_map[if_id] = ir;
382 m_pending_else = ir;
383
384 return true;
385 }
386
387 bool ShaderFromNirProcessor::emit_ifelse_end(int if_id)
388 {
389 auto ifelse = m_if_block_start_map.find(if_id);
390 if (ifelse == m_if_block_start_map.end()) {
391 std::cerr << "Error: ENDIF " << if_id << " without THEN or ELSE branch\n";
392 return false;
393 }
394
395 if (ifelse->second->type() != Instruction::cond_if &&
396 ifelse->second->type() != Instruction::cond_else) {
397 std::cerr << "Error: ENDIF " << if_id << " doesn't close an IF or ELSE branch\n";
398 return false;
399 }
400 /* Clear pending else, if the else branch was empty, non will be emitted */
401
402 m_pending_else = nullptr;
403
404 IfElseEndInstruction *ir = new IfElseEndInstruction();
405 emit_instruction(ir);
406
407 return true;
408 }
409
410 bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* instr)
411 {
412 r600::sfn_log << SfnLog::instr << "emit '"
413 << *reinterpret_cast<nir_instr*>(instr)
414 << "' (" << __func__ << ")\n";
415
416 if (emit_intrinsic_instruction_override(instr))
417 return true;
418
419 switch (instr->intrinsic) {
420 case nir_intrinsic_load_deref: {
421 auto var = get_deref_location(instr->src[0]);
422 if (!var)
423 return false;
424 auto mode_helper = m_var_mode.find(var);
425 if (mode_helper == m_var_mode.end()) {
426 cerr << "r600-nir: variable '" << var->name << "' not found\n";
427 return false;
428 }
429 switch (mode_helper->second) {
430 case nir_var_shader_in:
431 return emit_load_input_deref(var, instr);
432 case nir_var_function_temp:
433 return emit_load_function_temp(var, instr);
434 default:
435 cerr << "r600-nir: Unsupported mode" << mode_helper->second
436 << "for src variable\n";
437 return false;
438 }
439 }
440 case nir_intrinsic_store_scratch:
441 return emit_store_scratch(instr);
442 case nir_intrinsic_load_scratch:
443 return emit_load_scratch(instr);
444 case nir_intrinsic_store_deref:
445 return emit_store_deref(instr);
446 case nir_intrinsic_load_uniform:
447 return reserve_uniform(instr);
448 case nir_intrinsic_discard:
449 case nir_intrinsic_discard_if:
450 return emit_discard_if(instr);
451 case nir_intrinsic_load_ubo_r600:
452 return emit_load_ubo(instr);
453 case nir_intrinsic_atomic_counter_add:
454 case nir_intrinsic_atomic_counter_and:
455 case nir_intrinsic_atomic_counter_exchange:
456 case nir_intrinsic_atomic_counter_max:
457 case nir_intrinsic_atomic_counter_min:
458 case nir_intrinsic_atomic_counter_or:
459 case nir_intrinsic_atomic_counter_xor:
460 case nir_intrinsic_atomic_counter_comp_swap:
461 case nir_intrinsic_atomic_counter_read:
462 case nir_intrinsic_atomic_counter_post_dec:
463 case nir_intrinsic_atomic_counter_inc:
464 case nir_intrinsic_atomic_counter_pre_dec:
465 case nir_intrinsic_store_ssbo:
466 m_sel.info.writes_memory = true;
467 /* fallthrough */
468 case nir_intrinsic_load_ssbo:
469 return m_ssbo_instr.emit(&instr->instr);
470 break;
471 case nir_intrinsic_copy_deref:
472 case nir_intrinsic_load_constant:
473 case nir_intrinsic_load_input:
474 case nir_intrinsic_store_output:
475 default:
476 fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic);
477 return false;
478 }
479 return false;
480 }
481
482 bool ShaderFromNirProcessor::emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr* instr)
483 {
484 return false;
485 }
486
487 bool
488 ShaderFromNirProcessor::emit_load_function_temp(UNUSED const nir_variable *var, UNUSED nir_intrinsic_instr *instr)
489 {
490 return false;
491 }
492
493 bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest& dest, int chan, PValue value, bool as_last)
494 {
495 if (!dest.is_ssa) {
496 auto ir = new AluInstruction(op1_mov, from_nir(dest, 0), value, {alu_write});
497 if (as_last)
498 ir->set_flag(alu_last_instr);
499 emit_instruction(ir);
500 } else {
501 inject_register(dest.ssa.index, chan, value, true);
502 }
503 return true;
504 }
505
506 bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr* instr)
507 {
508 PValue address = from_nir(instr->src[1], 0, 0);
509
510 std::unique_ptr<GPRVector> vec(vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1,
511 swizzle_from_mask(instr->num_components)));
512 GPRVector value(*vec);
513
514 int writemask = nir_intrinsic_write_mask(instr);
515 int align = nir_intrinsic_align_mul(instr);
516 int align_offset = nir_intrinsic_align_offset(instr);
517
518 WriteScratchInstruction *ir = nullptr;
519 if (address->type() == Value::literal) {
520 const auto& lv = dynamic_cast<const LiteralValue&>(*address);
521 ir = new WriteScratchInstruction(lv.value(), value, align, align_offset, writemask);
522 } else {
523 address = from_nir_with_fetch_constant(instr->src[1], 0);
524 ir = new WriteScratchInstruction(address, value, align, align_offset,
525 writemask, m_scratch_size);
526 }
527 emit_instruction(ir);
528 sh_info().needs_scratch_space = 1;
529 return true;
530 }
531
532 bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr* instr)
533 {
534 PValue address = from_nir_with_fetch_constant(instr->src[0], 0);
535 std::array<PValue, 4> dst_val;
536 for (int i = 0; i < 4; ++i)
537 dst_val[i] = from_nir(instr->dest, i < instr->num_components ? i : 7);
538
539 GPRVector dst(dst_val);
540 auto ir = new LoadFromScratch(dst, address, m_scratch_size);
541 ir->prelude_append(new WaitAck(0));
542 emit_instruction(ir);
543 sh_info().needs_scratch_space = 1;
544 return true;
545 }
546
547 GPRVector *ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src,
548 UNUSED unsigned mask,
549 const GPRVector::Swizzle& swizzle)
550 {
551 GPRVector *result = nullptr;
552 int sel = lookup_register_index(src);
553 if (sel >= 0 && from_nir(src, 0)->type() == Value::gpr &&
554 from_nir(src, 0)->chan() == 0) {
555 /* If the x-channel is really an x-channel register then we are pretty
556 * save that the value come like we need them */
557 result = new GPRVector(from_nir(src, 0)->sel(), swizzle);
558 } else {
559 AluInstruction *ir = nullptr;
560 int sel = allocate_temp_register();
561 GPRVector::Values v;
562 for (int i = 0; i < 4; ++i) {
563 v[i] = PValue(new GPRValue(sel, swizzle[i]));
564 if (swizzle[i] < 4 && (mask & (1 << i))) {
565 ir = new AluInstruction(op1_mov, v[i], from_nir(src, swizzle[i]),
566 EmitInstruction::write);
567 emit_instruction(ir);
568 }
569 }
570 if (ir)
571 ir->set_flag(alu_last_instr);
572
573 result = new GPRVector(v);
574 }
575 return result;
576 }
577
578 bool ShaderFromNirProcessor::emit_load_ubo(nir_intrinsic_instr* instr)
579 {
580 nir_src& src0 = instr->src[0];
581 nir_src& src1 = instr->src[1];
582
583 int sel_bufid_reg = src0.is_ssa ? src0.ssa->index : src0.reg.reg->index;
584 const nir_load_const_instr* literal0 = get_literal_constant(sel_bufid_reg);
585
586 int ofs_reg = src1.is_ssa ? src1.ssa->index : src1.reg.reg->index;
587 const nir_load_const_instr* literal1 = get_literal_constant(ofs_reg);
588 if (literal0) {
589 if (literal1) {
590 uint bufid = literal0->value[0].u32;
591 uint buf_ofs = literal1->value[0].u32 >> 4;
592 int buf_cmp = ((literal1->value[0].u32 >> 2) & 3);
593 AluInstruction *ir = nullptr;
594 for (int i = 0; i < instr->num_components; ++i) {
595 int cmp = buf_cmp + i;
596 assert(cmp < 4);
597 auto u = PValue(new UniformValue(512 + buf_ofs, cmp, bufid + 1));
598 if (instr->dest.is_ssa)
599 add_uniform((instr->dest.ssa.index << 2) + i, u);
600 else {
601 ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
602 emit_instruction(ir);
603 }
604 }
605 if (ir)
606 ir->set_flag(alu_last_instr);
607 return true;
608
609 } else {
610 /* literal0 is lost ...*/
611 return load_uniform_indirect(instr, from_nir(instr->src[1], 0, 0), 0, literal0->value[0].u32 + 1);
612 }
613 } else {
614 /* TODO: This can also be solved by using the CF indes on the ALU block, and
615 * this would probably make sense when there are more then one loads with
616 * the same buffer ID. */
617 PValue bufid = from_nir(instr->src[0], 0, 0);
618 PValue addr = from_nir_with_fetch_constant(instr->src[1], 0);
619 GPRVector trgt;
620 for (int i = 0; i < 4; ++i)
621 trgt.set_reg_i(i, from_nir(instr->dest, i));
622
623 auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
624 1, bufid, bim_zero);
625
626 emit_instruction(ir);
627 for (int i = 0; i < instr->num_components ; ++i) {
628 add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
629 }
630 m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
631 return true;
632 }
633
634 }
635
636 bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr)
637 {
638 r600::sfn_log << SfnLog::instr << "emit '"
639 << *reinterpret_cast<nir_instr*>(instr)
640 << "' (" << __func__ << ")\n";
641
642 if (instr->intrinsic == nir_intrinsic_discard_if) {
643 emit_instruction(new AluInstruction(op2_killne_int, PValue(new GPRValue(0,0)),
644 {from_nir(instr->src[0], 0, 0), Value::zero}, {alu_last_instr}));
645
646 } else {
647 emit_instruction(new AluInstruction(op2_kille, PValue(new GPRValue(0,0)),
648 {Value::zero, Value::zero}, {alu_last_instr}));
649 }
650 m_sh_info.uses_kill = 1;
651 return true;
652 }
653
654 bool ShaderFromNirProcessor::emit_load_input_deref(const nir_variable *var,
655 nir_intrinsic_instr* instr)
656 {
657 return do_emit_load_deref(var, instr);
658 }
659
660 bool ShaderFromNirProcessor::reserve_uniform(nir_intrinsic_instr* instr)
661 {
662 r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
663 << *reinterpret_cast<nir_instr*>(instr)
664 << "'\n";
665
666
667 /* If the target register is a SSA register and the loading is not
668 * indirect then we can do lazy loading, i.e. the uniform value can
669 * be used directly. Otherwise we have to load the data for real
670 * rigt away.
671 */
672
673 /* Try to find the literal that defines the array index */
674 const nir_load_const_instr* literal = nullptr;
675 if (instr->src[0].is_ssa)
676 literal = get_literal_constant(instr->src[0].ssa->index);
677
678 int base = nir_intrinsic_base(instr);
679 if (literal) {
680 AluInstruction *ir = nullptr;
681
682 for (int i = 0; i < instr->num_components ; ++i) {
683 PValue u = PValue(new UniformValue(512 + literal->value[0].u32 + base, i));
684 sfn_log << SfnLog::io << "uniform "
685 << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
686
687 if (instr->dest.is_ssa)
688 add_uniform((instr->dest.ssa.index << 2) + i, u);
689 else {
690 ir = new AluInstruction(op1_mov, from_nir(instr->dest, i),
691 u, {alu_write});
692 emit_instruction(ir);
693 }
694 }
695 if (ir)
696 ir->set_flag(alu_last_instr);
697 } else {
698 PValue addr = from_nir(instr->src[0], 0, 0);
699 return load_uniform_indirect(instr, addr, 16 * base, 0);
700 }
701 return true;
702 }
703
704 bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid)
705 {
706 if (!addr) {
707 std::cerr << "r600-nir: don't know how uniform is addressed\n";
708 return false;
709 }
710
711 GPRVector trgt;
712 for (int i = 0; i < 4; ++i)
713 trgt.set_reg_i(i, from_nir(instr->dest, i));
714
715 if (addr->type() != Value::gpr) {
716 emit_instruction(op1_mov, trgt.reg_i(0), {addr}, {alu_write, alu_last_instr});
717 addr = trgt.reg_i(0);
718 }
719
720 /* FIXME: buffer index and index mode are not set correctly */
721 auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest,
722 bufferid, PValue(), bim_none);
723 emit_instruction(ir);
724 for (int i = 0; i < instr->num_components ; ++i) {
725 add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
726 }
727 m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
728 return true;
729 }
730
731 AluInstruction *ShaderFromNirProcessor::emit_load_literal(const nir_load_const_instr * literal, const nir_src& src, unsigned writemask)
732 {
733 AluInstruction *ir = nullptr;
734 for (int i = 0; i < literal->def.num_components ; ++i) {
735 if (writemask & (1 << i)){
736 PValue lsrc;
737 switch (literal->def.bit_size) {
738
739 case 1:
740 sfn_log << SfnLog::reg << "Got literal of bit size 1\n";
741 lsrc = literal->value[i].b ?
742 PValue(new LiteralValue( 0xffffffff, i)) :
743 Value::zero;
744 break;
745 case 32:
746 sfn_log << SfnLog::reg << "Got literal of bit size 32\n";
747 if (literal->value[i].u32 == 0)
748 lsrc = Value::zero;
749 else if (literal->value[i].u32 == 1)
750 lsrc = Value::one_i;
751 else if (literal->value[i].f32 == 1.0f)
752 lsrc = Value::one_f;
753 else if (literal->value[i].f32 == 0.5f)
754 lsrc = Value::zero_dot_5;
755 else
756 lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
757 break;
758 default:
759 sfn_log << SfnLog::reg << "Got literal of bit size " << literal->def.bit_size
760 << " falling back to 32 bit\n";
761 lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
762 }
763 ir = new AluInstruction(op1_mov, create_register_from_nir_src(src, i), lsrc, EmitInstruction::write);
764
765 emit_instruction(ir);
766 }
767 }
768 return ir;
769 }
770
771 PValue ShaderFromNirProcessor::from_nir_with_fetch_constant(const nir_src& src, unsigned component)
772 {
773 PValue value = from_nir(src, component);
774 if (value->type() != Value::gpr &&
775 value->type() != Value::gpr_vector &&
776 value->type() != Value::gpr_array_value) {
777 unsigned temp = allocate_temp_register();
778 PValue retval(new GPRValue(temp, component));
779 emit_instruction(new AluInstruction(op1_mov, retval, value,
780 EmitInstruction::last_write));
781 value = retval;
782 }
783 return value;
784 }
785
786 bool ShaderFromNirProcessor::emit_store_deref(nir_intrinsic_instr* instr)
787 {
788 auto out_var = get_deref_location(instr->src[0]);
789 if (!out_var)
790 return false;
791
792 return do_emit_store_deref(out_var, instr);
793 }
794
795 bool ShaderFromNirProcessor::emit_deref_instruction(nir_deref_instr* instr)
796 {
797 r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
798 << *reinterpret_cast<nir_instr*>(instr)
799 << "'\n";
800
801 /* Give the specific shader type a chance to process this, i.e. Geometry and
802 * tesselation shaders need specialized deref_array, for the other shaders
803 * it is lowered.
804 */
805 if (emit_deref_instruction_override(instr))
806 return true;
807
808 switch (instr->deref_type) {
809 case nir_deref_type_var:
810 set_var_address(instr);
811 return true;
812 case nir_deref_type_array:
813 case nir_deref_type_array_wildcard:
814 case nir_deref_type_struct:
815 case nir_deref_type_cast:
816 default:
817 fprintf(stderr, "R600: deref type %d not supported\n", instr->deref_type);
818 }
819 return false;
820 }
821
822 void ShaderFromNirProcessor::load_uniform(const nir_alu_src &src)
823 {
824 AluInstruction *ir = nullptr;
825 PValue sv[4];
826
827 assert(src.src.is_ssa);
828
829 for (int i = 0; i < src.src.ssa->num_components ; ++i) {
830 unsigned uindex = (src.src.ssa->index << 2) + i;
831 sv[i] = uniform(uindex);
832 assert(sv[i]);
833 }
834
835 for (int i = 0; i < src.src.ssa->num_components ; ++i) {
836 ir = new AluInstruction(op1_mov, create_register_from_nir_src(src.src, i), sv[i],
837 EmitInstruction::write);
838 emit_instruction(ir);
839 }
840 if (ir)
841 ir->set_flag(alu_last_instr);
842 }
843
844
845
846 bool ShaderFromNirProcessor::emit_instruction(EAluOp opcode, PValue dest,
847 std::vector<PValue> srcs,
848 const std::set<AluModifiers>& m_flags)
849 {
850 AluInstruction *ir = new AluInstruction(opcode, dest, srcs, m_flags);
851 emit_instruction(ir);
852 return true;
853 }
854
855 void ShaderFromNirProcessor::add_param_output_reg(int loc, const GPRVector *gpr)
856 {
857 m_output_register_map[loc] = gpr;
858 }
859
860 void ShaderFromNirProcessor::emit_export_instruction(WriteoutInstruction *ir)
861 {
862 r600::sfn_log << SfnLog::instr << " as '" << *ir << "'\n";
863 m_export_output.push_back(PInstruction(ir));
864 }
865
866 const GPRVector * ShaderFromNirProcessor::output_register(unsigned location) const
867 {
868 const GPRVector *retval = nullptr;
869 auto val = m_output_register_map.find(location);
870 if (val != m_output_register_map.end())
871 retval = val->second;
872 return retval;
873 }
874
875 void ShaderFromNirProcessor::set_input(unsigned pos, PValue var)
876 {
877 r600::sfn_log << SfnLog::io << "Set input[" << pos << "] =" << *var << "\n";
878 m_inputs[pos] = var;
879 }
880
881 void ShaderFromNirProcessor::set_output(unsigned pos, PValue var)
882 {
883 r600::sfn_log << SfnLog::io << "Set output[" << pos << "] =" << *var << "\n";
884 m_outputs[pos] = var;
885 }
886
887 void ShaderFromNirProcessor::finalize()
888 {
889 do_finalize();
890
891 for (auto& i : m_inputs)
892 m_sh_info.input[i.first].gpr = i.second->sel();
893
894 for (auto& i : m_outputs)
895 m_sh_info.output[i.first].gpr = i.second->sel();
896
897 m_output.insert(m_output.end(), m_export_output.begin(), m_export_output.end());
898 m_export_output.clear();
899 }
900
901 }