r600/sfn: Implementing instructions blocks
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_shader_base.cpp
1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "../r600_pipe.h"
28 #include "../r600_shader.h"
29 #include "sfn_shader_vertex.h"
30
31 #include "sfn_shader_compute.h"
32 #include "sfn_shader_fragment.h"
33 #include "sfn_shader_geometry.h"
34 #include "sfn_liverange.h"
35 #include "sfn_ir_to_assembly.h"
36 #include "sfn_nir.h"
37 #include "sfn_instruction_misc.h"
38 #include "sfn_instruction_fetch.h"
39
40 #include <iostream>
41
42 #define ENABLE_DEBUG 1
43
44 #ifdef ENABLE_DEBUG
45 #define DEBUG_SFN(X) \
46 do {\
47 X; \
48 } while (0)
49 #else
50 #define DEBUG_SFN(X)
51 #endif
52
53 namespace r600 {
54
55 using namespace std;
56
57
58 ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype,
59 r600_pipe_shader_selector& sel,
60 r600_shader &sh_info, int scratch_size):
61 m_processor_type(ptype),
62 m_nesting_depth(0),
63 m_block_number(0),
64 m_export_output(0, -1),
65 m_sh_info(sh_info),
66 m_tex_instr(*this),
67 m_alu_instr(*this),
68 m_ssbo_instr(*this),
69 m_pending_else(nullptr),
70 m_scratch_size(scratch_size),
71 m_next_hwatomic_loc(0),
72 m_sel(sel)
73 {
74 m_sh_info.processor_type = ptype;
75 }
76
77
78 ShaderFromNirProcessor::~ShaderFromNirProcessor()
79 {
80 }
81
82 bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr)
83 {
84 switch (instr->type) {
85 case nir_instr_type_tex: {
86 nir_tex_instr *t = nir_instr_as_tex(instr);
87 if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF)
88 sh_info().uses_tex_buffers = true;
89 }
90 default:
91 ;
92 }
93
94 return scan_sysvalue_access(instr);
95 }
96
97 static void remap_shader_info(r600_shader& sh_info,
98 std::vector<rename_reg_pair>& map,
99 UNUSED ValueMap& values)
100 {
101 for (unsigned i = 0; i < sh_info.ninput; ++i) {
102 sfn_log << SfnLog::merge << "Input " << i << " gpr:" << sh_info.input[i].gpr
103 << " of map.size()\n";
104
105 assert(sh_info.input[i].gpr < map.size());
106 auto new_index = map[sh_info.input[i].gpr];
107 if (new_index.valid)
108 sh_info.input[i].gpr = new_index.new_reg;
109 map[sh_info.input[i].gpr].used = true;
110 }
111
112 for (unsigned i = 0; i < sh_info.noutput; ++i) {
113 assert(sh_info.output[i].gpr < map.size());
114 auto new_index = map[sh_info.output[i].gpr];
115 if (new_index.valid)
116 sh_info.output[i].gpr = new_index.new_reg;
117 map[sh_info.output[i].gpr].used = true;
118 }
119 }
120
121 void ShaderFromNirProcessor::remap_registers()
122 {
123 // register renumbering
124 auto rc = register_count();
125 if (!rc)
126 return;
127
128 std::vector<register_live_range> register_live_ranges(rc);
129
130 auto temp_register_map = get_temp_registers();
131
132 Shader sh{m_output, temp_register_map};
133 LiverangeEvaluator().run(sh, register_live_ranges);
134 auto register_map = get_temp_registers_remapping(register_live_ranges);
135
136 sfn_log << SfnLog::merge << "=========Mapping===========\n";
137 for (size_t i = 0; i < register_map.size(); ++i)
138 if (register_map[i].valid)
139 sfn_log << SfnLog::merge << "Map:" << i << " -> " << register_map[i].new_reg << "\n";
140
141 ValueRemapper vmap0(register_map, temp_register_map);
142 for (auto& block: m_output)
143 block.remap_registers(vmap0);
144
145 remap_shader_info(m_sh_info, register_map, temp_register_map);
146
147 /* Mark inputs as used registers, these registers should no be remapped */
148 for (auto& v: sh.m_temp) {
149 if (v.second->type() == Value::gpr) {
150 const auto& g = static_cast<const GPRValue&>(*v.second);
151 if (g.is_input())
152 register_map[g.sel()].used = true;
153 }
154 }
155
156 int new_index = 0;
157 for (auto& i : register_map) {
158 i.valid = i.used;
159 if (i.used)
160 i.new_reg = new_index++;
161 }
162
163 ValueRemapper vmap1(register_map, temp_register_map);
164 for (auto& ir: m_output)
165 ir.remap_registers(vmap1);
166
167 remap_shader_info(m_sh_info, register_map, temp_register_map);
168 }
169
170 bool ShaderFromNirProcessor::process_uniforms(nir_variable *uniform)
171 {
172 // m_uniform_type_map
173 m_uniform_type_map[uniform->data.location] = uniform->type;
174
175 if (uniform->type->contains_atomic()) {
176 int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE;
177 sh_info().nhwatomic += natomics;
178
179 if (uniform->type->is_array())
180 sh_info().indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
181
182 sh_info().uses_atomics = 1;
183
184 struct r600_shader_atomic& atom = sh_info().atomics[sh_info().nhwatomic_ranges];
185 ++sh_info().nhwatomic_ranges;
186 atom.buffer_id = uniform->data.binding;
187 atom.hw_idx = m_next_hwatomic_loc;
188 atom.start = m_next_hwatomic_loc;
189 atom.end = atom.start + natomics - 1;
190 m_next_hwatomic_loc = atom.end + 1;
191 //atom.array_id = uniform->type->is_array() ? 1 : 0;
192
193 m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] += atom.end - atom.start + 1;
194
195 sfn_log << SfnLog::io << "HW_ATOMIC file count: "
196 << m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] << "\n";
197 }
198
199 if (uniform->type->is_image() || uniform->data.mode == nir_var_mem_ssbo) {
200 sh_info().uses_images = 1;
201 }
202
203 return true;
204 }
205
206 bool ShaderFromNirProcessor::process_inputs(nir_variable *input)
207 {
208 return do_process_inputs(input);
209 }
210
211 bool ShaderFromNirProcessor::process_outputs(nir_variable *output)
212 {
213 return do_process_outputs(output);
214 }
215
216 void ShaderFromNirProcessor::add_array_deref(nir_deref_instr *instr)
217 {
218 nir_variable *var = nir_deref_instr_get_variable(instr);
219
220 assert(instr->mode == nir_var_function_temp);
221 assert(glsl_type_is_array(var->type));
222
223 // add an alias for the index to the register(s);
224
225
226 }
227
228 void ShaderFromNirProcessor::set_var_address(nir_deref_instr *instr)
229 {
230 auto& dest = instr->dest;
231 unsigned index = dest.is_ssa ? dest.ssa.index : dest.reg.reg->index;
232 m_var_mode[instr->var] = instr->mode;
233 m_var_derefs[index] = instr->var;
234
235 sfn_log << SfnLog::io << "Add var deref:" << index
236 << " with DDL:" << instr->var->data.driver_location << "\n";
237 }
238
239 void ShaderFromNirProcessor::evaluate_spi_sid(r600_shader_io& io)
240 {
241 switch (io.name) {
242 case TGSI_SEMANTIC_POSITION:
243 case TGSI_SEMANTIC_PSIZE:
244 case TGSI_SEMANTIC_EDGEFLAG:
245 case TGSI_SEMANTIC_FACE:
246 case TGSI_SEMANTIC_SAMPLEMASK:
247 case TGSI_SEMANTIC_CLIPVERTEX:
248 io.spi_sid = 0;
249 break;
250 case TGSI_SEMANTIC_GENERIC:
251 io.spi_sid = io.sid + 1;
252 break;
253 default:
254 /* For non-generic params - pack name and sid into 8 bits */
255 io.spi_sid = (0x80 | (io.name << 3) | io.sid) + 1;
256 }
257 }
258
259 const nir_variable *ShaderFromNirProcessor::get_deref_location(const nir_src& src) const
260 {
261 unsigned index = src.is_ssa ? src.ssa->index : src.reg.reg->index;
262
263 sfn_log << SfnLog::io << "Search for deref:" << index << "\n";
264
265 auto v = m_var_derefs.find(index);
266 if (v != m_var_derefs.end())
267 return v->second;
268
269 fprintf(stderr, "R600: could not find deref with index %d\n", index);
270
271 return nullptr;
272
273 /*nir_deref_instr *deref = nir_instr_as_deref(src.ssa->parent_instr);
274 return nir_deref_instr_get_variable(deref); */
275 }
276
277 bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr)
278 {
279 return m_tex_instr.emit(instr);
280 }
281
282 void ShaderFromNirProcessor::emit_instruction(Instruction *ir)
283 {
284 if (m_pending_else) {
285 append_block(-1);
286 m_output.back().emit(PInstruction(m_pending_else));
287 append_block(1);
288 m_pending_else = nullptr;
289 }
290
291 r600::sfn_log << SfnLog::instr << " as '" << *ir << "'\n";
292 if (m_output.empty())
293 append_block(0);
294
295 m_output.back().emit(Instruction::Pointer(ir));
296 }
297
298 void ShaderFromNirProcessor::emit_shader_start()
299 {
300 /* placeholder, may become an abstract method */
301 }
302
303 bool ShaderFromNirProcessor::emit_jump_instruction(nir_jump_instr *instr)
304 {
305 switch (instr->type) {
306 case nir_jump_break: {
307 auto b = new LoopBreakInstruction();
308 emit_instruction(b);
309 return true;
310 }
311 case nir_jump_continue: {
312 auto b = new LoopContInstruction();
313 emit_instruction(b);
314 return true;
315 }
316 default: {
317 nir_instr *i = reinterpret_cast<nir_instr*>(instr);
318 sfn_log << SfnLog::err << "Jump instrunction " << *i << " not supported\n";
319 return false;
320 }
321 }
322 return true;
323 }
324
325 bool ShaderFromNirProcessor::emit_alu_instruction(nir_instr* instr)
326 {
327 return m_alu_instr.emit(instr);
328 }
329
330 bool ShaderFromNirProcessor::emit_deref_instruction_override(UNUSED nir_deref_instr* instr)
331 {
332 return false;
333 }
334
335 bool ShaderFromNirProcessor::emit_loop_start(int loop_id)
336 {
337 LoopBeginInstruction *loop = new LoopBeginInstruction();
338 emit_instruction(loop);
339 m_loop_begin_block_map[loop_id] = loop;
340 append_block(1);
341 return true;
342 }
343 bool ShaderFromNirProcessor::emit_loop_end(int loop_id)
344 {
345 auto start = m_loop_begin_block_map.find(loop_id);
346 if (start == m_loop_begin_block_map.end()) {
347 sfn_log << SfnLog::err << "End loop: Loop start for "
348 << loop_id << " not found\n";
349 return false;
350 }
351 m_nesting_depth--;
352 m_block_number++;
353 m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number));
354 LoopEndInstruction *loop = new LoopEndInstruction(start->second);
355 emit_instruction(loop);
356
357 m_loop_begin_block_map.erase(start);
358 return true;
359 }
360
361 bool ShaderFromNirProcessor::emit_if_start(int if_id, nir_if *if_stmt)
362 {
363
364 auto value = from_nir(if_stmt->condition, 0, 0);
365 AluInstruction *pred = new AluInstruction(op2_pred_setne_int, PValue(new GPRValue(0,0)),
366 value, Value::zero, EmitInstruction::last);
367 pred->set_flag(alu_update_exec);
368 pred->set_flag(alu_update_pred);
369 pred->set_cf_type(cf_alu_push_before);
370
371 append_block(1);
372
373 IfInstruction *ir = new IfInstruction(pred);
374 emit_instruction(ir);
375 assert(m_if_block_start_map.find(if_id) == m_if_block_start_map.end());
376 m_if_block_start_map[if_id] = ir;
377 return true;
378 }
379
380 bool ShaderFromNirProcessor::emit_else_start(int if_id)
381 {
382 auto iif = m_if_block_start_map.find(if_id);
383 if (iif == m_if_block_start_map.end()) {
384 std::cerr << "Error: ELSE branch " << if_id << " without starting conditional branch\n";
385 return false;
386 }
387
388 if (iif->second->type() != Instruction::cond_if) {
389 std::cerr << "Error: ELSE branch " << if_id << " not started by an IF branch\n";
390 return false;
391 }
392 IfInstruction *if_instr = static_cast<IfInstruction *>(iif->second);
393 ElseInstruction *ir = new ElseInstruction(if_instr);
394 m_if_block_start_map[if_id] = ir;
395 m_pending_else = ir;
396
397 return true;
398 }
399
400 bool ShaderFromNirProcessor::emit_ifelse_end(int if_id)
401 {
402 auto ifelse = m_if_block_start_map.find(if_id);
403 if (ifelse == m_if_block_start_map.end()) {
404 std::cerr << "Error: ENDIF " << if_id << " without THEN or ELSE branch\n";
405 return false;
406 }
407
408 if (ifelse->second->type() != Instruction::cond_if &&
409 ifelse->second->type() != Instruction::cond_else) {
410 std::cerr << "Error: ENDIF " << if_id << " doesn't close an IF or ELSE branch\n";
411 return false;
412 }
413 /* Clear pending else, if the else branch was empty, non will be emitted */
414
415 m_pending_else = nullptr;
416
417 append_block(-1);
418 IfElseEndInstruction *ir = new IfElseEndInstruction();
419 emit_instruction(ir);
420
421 return true;
422 }
423
424 bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* instr)
425 {
426 r600::sfn_log << SfnLog::instr << "emit '"
427 << *reinterpret_cast<nir_instr*>(instr)
428 << "' (" << __func__ << ")\n";
429
430 if (emit_intrinsic_instruction_override(instr))
431 return true;
432
433 switch (instr->intrinsic) {
434 case nir_intrinsic_load_deref: {
435 auto var = get_deref_location(instr->src[0]);
436 if (!var)
437 return false;
438 auto mode_helper = m_var_mode.find(var);
439 if (mode_helper == m_var_mode.end()) {
440 cerr << "r600-nir: variable '" << var->name << "' not found\n";
441 return false;
442 }
443 switch (mode_helper->second) {
444 case nir_var_shader_in:
445 return emit_load_input_deref(var, instr);
446 case nir_var_function_temp:
447 return emit_load_function_temp(var, instr);
448 default:
449 cerr << "r600-nir: Unsupported mode" << mode_helper->second
450 << "for src variable\n";
451 return false;
452 }
453 }
454 case nir_intrinsic_store_scratch:
455 return emit_store_scratch(instr);
456 case nir_intrinsic_load_scratch:
457 return emit_load_scratch(instr);
458 case nir_intrinsic_store_deref:
459 return emit_store_deref(instr);
460 case nir_intrinsic_load_uniform:
461 return reserve_uniform(instr);
462 case nir_intrinsic_discard:
463 case nir_intrinsic_discard_if:
464 return emit_discard_if(instr);
465 case nir_intrinsic_load_ubo_r600:
466 return emit_load_ubo(instr);
467 case nir_intrinsic_atomic_counter_add:
468 case nir_intrinsic_atomic_counter_and:
469 case nir_intrinsic_atomic_counter_exchange:
470 case nir_intrinsic_atomic_counter_max:
471 case nir_intrinsic_atomic_counter_min:
472 case nir_intrinsic_atomic_counter_or:
473 case nir_intrinsic_atomic_counter_xor:
474 case nir_intrinsic_atomic_counter_comp_swap:
475 case nir_intrinsic_atomic_counter_read:
476 case nir_intrinsic_atomic_counter_post_dec:
477 case nir_intrinsic_atomic_counter_inc:
478 case nir_intrinsic_atomic_counter_pre_dec:
479 case nir_intrinsic_store_ssbo:
480 m_sel.info.writes_memory = true;
481 /* fallthrough */
482 case nir_intrinsic_load_ssbo:
483 return m_ssbo_instr.emit(&instr->instr);
484 break;
485 case nir_intrinsic_copy_deref:
486 case nir_intrinsic_load_constant:
487 case nir_intrinsic_load_input:
488 case nir_intrinsic_store_output:
489 default:
490 fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic);
491 return false;
492 }
493 return false;
494 }
495
496 bool ShaderFromNirProcessor::emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr* instr)
497 {
498 return false;
499 }
500
501 bool
502 ShaderFromNirProcessor::emit_load_function_temp(UNUSED const nir_variable *var, UNUSED nir_intrinsic_instr *instr)
503 {
504 return false;
505 }
506
507 bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest& dest, int chan, PValue value, bool as_last)
508 {
509 if (!dest.is_ssa) {
510 auto ir = new AluInstruction(op1_mov, from_nir(dest, 0), value, {alu_write});
511 if (as_last)
512 ir->set_flag(alu_last_instr);
513 emit_instruction(ir);
514 } else {
515 inject_register(dest.ssa.index, chan, value, true);
516 }
517 return true;
518 }
519
520 bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr* instr)
521 {
522 PValue address = from_nir(instr->src[1], 0, 0);
523
524 std::unique_ptr<GPRVector> vec(vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1,
525 swizzle_from_mask(instr->num_components)));
526 GPRVector value(*vec);
527
528 int writemask = nir_intrinsic_write_mask(instr);
529 int align = nir_intrinsic_align_mul(instr);
530 int align_offset = nir_intrinsic_align_offset(instr);
531
532 WriteScratchInstruction *ir = nullptr;
533 if (address->type() == Value::literal) {
534 const auto& lv = static_cast<const LiteralValue&>(*address);
535 ir = new WriteScratchInstruction(lv.value(), value, align, align_offset, writemask);
536 } else {
537 address = from_nir_with_fetch_constant(instr->src[1], 0);
538 ir = new WriteScratchInstruction(address, value, align, align_offset,
539 writemask, m_scratch_size);
540 }
541 emit_instruction(ir);
542 sh_info().needs_scratch_space = 1;
543 return true;
544 }
545
546 bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr* instr)
547 {
548 PValue address = from_nir_with_fetch_constant(instr->src[0], 0);
549 std::array<PValue, 4> dst_val;
550 for (int i = 0; i < 4; ++i)
551 dst_val[i] = from_nir(instr->dest, i < instr->num_components ? i : 7);
552
553 GPRVector dst(dst_val);
554 auto ir = new LoadFromScratch(dst, address, m_scratch_size);
555 ir->prelude_append(new WaitAck(0));
556 emit_instruction(ir);
557 sh_info().needs_scratch_space = 1;
558 return true;
559 }
560
561 GPRVector *ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src,
562 UNUSED unsigned mask,
563 const GPRVector::Swizzle& swizzle)
564 {
565 GPRVector *result = nullptr;
566 int sel = lookup_register_index(src);
567 if (sel >= 0 && from_nir(src, 0)->type() == Value::gpr &&
568 from_nir(src, 0)->chan() == 0) {
569 /* If the x-channel is really an x-channel register then we are pretty
570 * save that the value come like we need them */
571 result = new GPRVector(from_nir(src, 0)->sel(), swizzle);
572 } else {
573 AluInstruction *ir = nullptr;
574 int sel = allocate_temp_register();
575 GPRVector::Values v;
576 for (int i = 0; i < 4; ++i) {
577 v[i] = PValue(new GPRValue(sel, swizzle[i]));
578 if (swizzle[i] < 4 && (mask & (1 << i))) {
579 ir = new AluInstruction(op1_mov, v[i], from_nir(src, swizzle[i]),
580 EmitInstruction::write);
581 emit_instruction(ir);
582 }
583 }
584 if (ir)
585 ir->set_flag(alu_last_instr);
586
587 result = new GPRVector(v);
588 }
589 return result;
590 }
591
592 bool ShaderFromNirProcessor::emit_load_ubo(nir_intrinsic_instr* instr)
593 {
594 nir_src& src0 = instr->src[0];
595 nir_src& src1 = instr->src[1];
596
597 int sel_bufid_reg = src0.is_ssa ? src0.ssa->index : src0.reg.reg->index;
598 const nir_load_const_instr* literal0 = get_literal_constant(sel_bufid_reg);
599
600 int ofs_reg = src1.is_ssa ? src1.ssa->index : src1.reg.reg->index;
601 const nir_load_const_instr* literal1 = get_literal_constant(ofs_reg);
602 if (literal0) {
603 if (literal1) {
604 uint bufid = literal0->value[0].u32;
605 uint buf_ofs = literal1->value[0].u32 >> 4;
606 int buf_cmp = ((literal1->value[0].u32 >> 2) & 3);
607 AluInstruction *ir = nullptr;
608 for (int i = 0; i < instr->num_components; ++i) {
609 int cmp = buf_cmp + i;
610 assert(cmp < 4);
611 auto u = PValue(new UniformValue(512 + buf_ofs, cmp, bufid + 1));
612 if (instr->dest.is_ssa)
613 add_uniform((instr->dest.ssa.index << 2) + i, u);
614 else {
615 ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
616 emit_instruction(ir);
617 }
618 }
619 if (ir)
620 ir->set_flag(alu_last_instr);
621 return true;
622
623 } else {
624 /* literal0 is lost ...*/
625 return load_uniform_indirect(instr, from_nir(instr->src[1], 0, 0), 0, literal0->value[0].u32 + 1);
626 }
627 } else {
628 /* TODO: This can also be solved by using the CF indes on the ALU block, and
629 * this would probably make sense when there are more then one loads with
630 * the same buffer ID. */
631 PValue bufid = from_nir(instr->src[0], 0, 0);
632 PValue addr = from_nir_with_fetch_constant(instr->src[1], 0);
633 GPRVector trgt;
634 for (int i = 0; i < 4; ++i)
635 trgt.set_reg_i(i, from_nir(instr->dest, i));
636
637 auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
638 1, bufid, bim_zero);
639
640 emit_instruction(ir);
641 for (int i = 0; i < instr->num_components ; ++i) {
642 add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
643 }
644 m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
645 return true;
646 }
647
648 }
649
650 bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr)
651 {
652 r600::sfn_log << SfnLog::instr << "emit '"
653 << *reinterpret_cast<nir_instr*>(instr)
654 << "' (" << __func__ << ")\n";
655
656 if (instr->intrinsic == nir_intrinsic_discard_if) {
657 emit_instruction(new AluInstruction(op2_killne_int, PValue(new GPRValue(0,0)),
658 {from_nir(instr->src[0], 0, 0), Value::zero}, {alu_last_instr}));
659
660 } else {
661 emit_instruction(new AluInstruction(op2_kille, PValue(new GPRValue(0,0)),
662 {Value::zero, Value::zero}, {alu_last_instr}));
663 }
664 m_sh_info.uses_kill = 1;
665 return true;
666 }
667
668 bool ShaderFromNirProcessor::emit_load_input_deref(const nir_variable *var,
669 nir_intrinsic_instr* instr)
670 {
671 return do_emit_load_deref(var, instr);
672 }
673
674 bool ShaderFromNirProcessor::reserve_uniform(nir_intrinsic_instr* instr)
675 {
676 r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
677 << *reinterpret_cast<nir_instr*>(instr)
678 << "'\n";
679
680
681 /* If the target register is a SSA register and the loading is not
682 * indirect then we can do lazy loading, i.e. the uniform value can
683 * be used directly. Otherwise we have to load the data for real
684 * rigt away.
685 */
686
687 /* Try to find the literal that defines the array index */
688 const nir_load_const_instr* literal = nullptr;
689 if (instr->src[0].is_ssa)
690 literal = get_literal_constant(instr->src[0].ssa->index);
691
692 int base = nir_intrinsic_base(instr);
693 if (literal) {
694 AluInstruction *ir = nullptr;
695
696 for (int i = 0; i < instr->num_components ; ++i) {
697 PValue u = PValue(new UniformValue(512 + literal->value[0].u32 + base, i));
698 sfn_log << SfnLog::io << "uniform "
699 << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
700
701 if (instr->dest.is_ssa)
702 add_uniform((instr->dest.ssa.index << 2) + i, u);
703 else {
704 ir = new AluInstruction(op1_mov, from_nir(instr->dest, i),
705 u, {alu_write});
706 emit_instruction(ir);
707 }
708 }
709 if (ir)
710 ir->set_flag(alu_last_instr);
711 } else {
712 PValue addr = from_nir(instr->src[0], 0, 0);
713 return load_uniform_indirect(instr, addr, 16 * base, 0);
714 }
715 return true;
716 }
717
718 bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid)
719 {
720 if (!addr) {
721 std::cerr << "r600-nir: don't know how uniform is addressed\n";
722 return false;
723 }
724
725 GPRVector trgt;
726 for (int i = 0; i < 4; ++i)
727 trgt.set_reg_i(i, from_nir(instr->dest, i));
728
729 if (addr->type() != Value::gpr) {
730 emit_instruction(op1_mov, trgt.reg_i(0), {addr}, {alu_write, alu_last_instr});
731 addr = trgt.reg_i(0);
732 }
733
734 /* FIXME: buffer index and index mode are not set correctly */
735 auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest,
736 bufferid, PValue(), bim_none);
737 emit_instruction(ir);
738 for (int i = 0; i < instr->num_components ; ++i) {
739 add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
740 }
741 m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
742 return true;
743 }
744
745 AluInstruction *ShaderFromNirProcessor::emit_load_literal(const nir_load_const_instr * literal, const nir_src& src, unsigned writemask)
746 {
747 AluInstruction *ir = nullptr;
748 for (int i = 0; i < literal->def.num_components ; ++i) {
749 if (writemask & (1 << i)){
750 PValue lsrc;
751 switch (literal->def.bit_size) {
752
753 case 1:
754 sfn_log << SfnLog::reg << "Got literal of bit size 1\n";
755 lsrc = literal->value[i].b ?
756 PValue(new LiteralValue( 0xffffffff, i)) :
757 Value::zero;
758 break;
759 case 32:
760 sfn_log << SfnLog::reg << "Got literal of bit size 32\n";
761 if (literal->value[i].u32 == 0)
762 lsrc = Value::zero;
763 else if (literal->value[i].u32 == 1)
764 lsrc = Value::one_i;
765 else if (literal->value[i].f32 == 1.0f)
766 lsrc = Value::one_f;
767 else if (literal->value[i].f32 == 0.5f)
768 lsrc = Value::zero_dot_5;
769 else
770 lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
771 break;
772 default:
773 sfn_log << SfnLog::reg << "Got literal of bit size " << literal->def.bit_size
774 << " falling back to 32 bit\n";
775 lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
776 }
777 ir = new AluInstruction(op1_mov, create_register_from_nir_src(src, i), lsrc, EmitInstruction::write);
778
779 emit_instruction(ir);
780 }
781 }
782 return ir;
783 }
784
785 PValue ShaderFromNirProcessor::from_nir_with_fetch_constant(const nir_src& src, unsigned component)
786 {
787 PValue value = from_nir(src, component);
788 if (value->type() != Value::gpr &&
789 value->type() != Value::gpr_vector &&
790 value->type() != Value::gpr_array_value) {
791 unsigned temp = allocate_temp_register();
792 PValue retval(new GPRValue(temp, component));
793 emit_instruction(new AluInstruction(op1_mov, retval, value,
794 EmitInstruction::last_write));
795 value = retval;
796 }
797 return value;
798 }
799
800 bool ShaderFromNirProcessor::emit_store_deref(nir_intrinsic_instr* instr)
801 {
802 auto out_var = get_deref_location(instr->src[0]);
803 if (!out_var)
804 return false;
805
806 return do_emit_store_deref(out_var, instr);
807 }
808
809 bool ShaderFromNirProcessor::emit_deref_instruction(nir_deref_instr* instr)
810 {
811 r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
812 << *reinterpret_cast<nir_instr*>(instr)
813 << "'\n";
814
815 /* Give the specific shader type a chance to process this, i.e. Geometry and
816 * tesselation shaders need specialized deref_array, for the other shaders
817 * it is lowered.
818 */
819 if (emit_deref_instruction_override(instr))
820 return true;
821
822 switch (instr->deref_type) {
823 case nir_deref_type_var:
824 set_var_address(instr);
825 return true;
826 case nir_deref_type_array:
827 case nir_deref_type_array_wildcard:
828 case nir_deref_type_struct:
829 case nir_deref_type_cast:
830 default:
831 fprintf(stderr, "R600: deref type %d not supported\n", instr->deref_type);
832 }
833 return false;
834 }
835
836 void ShaderFromNirProcessor::load_uniform(const nir_alu_src &src)
837 {
838 AluInstruction *ir = nullptr;
839 PValue sv[4];
840
841 assert(src.src.is_ssa);
842
843 for (int i = 0; i < src.src.ssa->num_components ; ++i) {
844 unsigned uindex = (src.src.ssa->index << 2) + i;
845 sv[i] = uniform(uindex);
846 assert(sv[i]);
847 }
848
849 for (int i = 0; i < src.src.ssa->num_components ; ++i) {
850 ir = new AluInstruction(op1_mov, create_register_from_nir_src(src.src, i), sv[i],
851 EmitInstruction::write);
852 emit_instruction(ir);
853 }
854 if (ir)
855 ir->set_flag(alu_last_instr);
856 }
857
858
859
860 bool ShaderFromNirProcessor::emit_instruction(EAluOp opcode, PValue dest,
861 std::vector<PValue> srcs,
862 const std::set<AluModifiers>& m_flags)
863 {
864 AluInstruction *ir = new AluInstruction(opcode, dest, srcs, m_flags);
865 emit_instruction(ir);
866 return true;
867 }
868
869 void ShaderFromNirProcessor::add_param_output_reg(int loc, const GPRVector *gpr)
870 {
871 m_output_register_map[loc] = gpr;
872 }
873
874 void ShaderFromNirProcessor::emit_export_instruction(WriteoutInstruction *ir)
875 {
876 r600::sfn_log << SfnLog::instr << " as '" << *ir << "'\n";
877 m_export_output.emit(PInstruction(ir));
878 }
879
880 const GPRVector * ShaderFromNirProcessor::output_register(unsigned location) const
881 {
882 const GPRVector *retval = nullptr;
883 auto val = m_output_register_map.find(location);
884 if (val != m_output_register_map.end())
885 retval = val->second;
886 return retval;
887 }
888
889 void ShaderFromNirProcessor::set_input(unsigned pos, PValue var)
890 {
891 r600::sfn_log << SfnLog::io << "Set input[" << pos << "] =" << *var << "\n";
892 m_inputs[pos] = var;
893 }
894
895 void ShaderFromNirProcessor::set_output(unsigned pos, PValue var)
896 {
897 r600::sfn_log << SfnLog::io << "Set output[" << pos << "] =" << *var << "\n";
898 m_outputs[pos] = var;
899 }
900
901 void ShaderFromNirProcessor::append_block(int nesting_change)
902 {
903 m_nesting_depth += nesting_change;
904 m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number++));
905 }
906
907 void ShaderFromNirProcessor::finalize()
908 {
909 do_finalize();
910
911 for (auto& i : m_inputs)
912 m_sh_info.input[i.first].gpr = i.second->sel();
913
914 for (auto& i : m_outputs)
915 m_sh_info.output[i.first].gpr = i.second->sel();
916
917 m_output.push_back(m_export_output);
918 }
919
920 }