r600/sfn: rework getting a vector and uniforms from the value pool
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_shader_base.cpp
1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "../r600_pipe.h"
28 #include "../r600_shader.h"
29 #include "sfn_shader_vertex.h"
30
31 #include "sfn_shader_compute.h"
32 #include "sfn_shader_fragment.h"
33 #include "sfn_shader_geometry.h"
34 #include "sfn_liverange.h"
35 #include "sfn_ir_to_assembly.h"
36 #include "sfn_nir.h"
37 #include "sfn_instruction_misc.h"
38 #include "sfn_instruction_fetch.h"
39 #include "sfn_instruction_lds.h"
40
41 #include <iostream>
42
43 #define ENABLE_DEBUG 1
44
45 #ifdef ENABLE_DEBUG
46 #define DEBUG_SFN(X) \
47 do {\
48 X; \
49 } while (0)
50 #else
51 #define DEBUG_SFN(X)
52 #endif
53
54 namespace r600 {
55
56 using namespace std;
57
58
59 ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype,
60 r600_pipe_shader_selector& sel,
61 r600_shader &sh_info, int scratch_size,
62 enum chip_class chip_class):
63 m_processor_type(ptype),
64 m_nesting_depth(0),
65 m_block_number(0),
66 m_export_output(0, -1),
67 m_sh_info(sh_info),
68 m_chip_class(chip_class),
69 m_tex_instr(*this),
70 m_alu_instr(*this),
71 m_ssbo_instr(*this),
72 m_pending_else(nullptr),
73 m_scratch_size(scratch_size),
74 m_next_hwatomic_loc(0),
75 m_sel(sel)
76 {
77 m_sh_info.processor_type = ptype;
78 }
79
80
81 ShaderFromNirProcessor::~ShaderFromNirProcessor()
82 {
83 }
84
85 bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr)
86 {
87 switch (instr->type) {
88 case nir_instr_type_tex: {
89 nir_tex_instr *t = nir_instr_as_tex(instr);
90 if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF)
91 sh_info().uses_tex_buffers = true;
92 }
93 default:
94 ;
95 }
96
97 return scan_sysvalue_access(instr);
98 }
99
100 enum chip_class ShaderFromNirProcessor::get_chip_class(void) const
101 {
102 return m_chip_class;
103 }
104
105 static void remap_shader_info(r600_shader& sh_info,
106 std::vector<rename_reg_pair>& map,
107 UNUSED ValueMap& values)
108 {
109 for (unsigned i = 0; i < sh_info.ninput; ++i) {
110 sfn_log << SfnLog::merge << "Input " << i << " gpr:" << sh_info.input[i].gpr
111 << " of map.size()\n";
112
113 assert(sh_info.input[i].gpr < map.size());
114 auto new_index = map[sh_info.input[i].gpr];
115 if (new_index.valid)
116 sh_info.input[i].gpr = new_index.new_reg;
117 map[sh_info.input[i].gpr].used = true;
118 }
119
120 for (unsigned i = 0; i < sh_info.noutput; ++i) {
121 assert(sh_info.output[i].gpr < map.size());
122 auto new_index = map[sh_info.output[i].gpr];
123 if (new_index.valid)
124 sh_info.output[i].gpr = new_index.new_reg;
125 map[sh_info.output[i].gpr].used = true;
126 }
127 }
128
129 void ShaderFromNirProcessor::remap_registers()
130 {
131 // register renumbering
132 auto rc = register_count();
133 if (!rc)
134 return;
135
136 std::vector<register_live_range> register_live_ranges(rc);
137
138 auto temp_register_map = get_temp_registers();
139
140 Shader sh{m_output, temp_register_map};
141 LiverangeEvaluator().run(sh, register_live_ranges);
142 auto register_map = get_temp_registers_remapping(register_live_ranges);
143
144 sfn_log << SfnLog::merge << "=========Mapping===========\n";
145 for (size_t i = 0; i < register_map.size(); ++i)
146 if (register_map[i].valid)
147 sfn_log << SfnLog::merge << "Map:" << i << " -> " << register_map[i].new_reg << "\n";
148
149 ValueRemapper vmap0(register_map, temp_register_map);
150 for (auto& block: m_output)
151 block.remap_registers(vmap0);
152
153 remap_shader_info(m_sh_info, register_map, temp_register_map);
154
155 /* Mark inputs as used registers, these registers should no be remapped */
156 for (auto& v: sh.m_temp) {
157 if (v.second->type() == Value::gpr) {
158 const auto& g = static_cast<const GPRValue&>(*v.second);
159 if (g.is_input())
160 register_map[g.sel()].used = true;
161 }
162 }
163
164 int new_index = 0;
165 for (auto& i : register_map) {
166 i.valid = i.used;
167 if (i.used)
168 i.new_reg = new_index++;
169 }
170
171 ValueRemapper vmap1(register_map, temp_register_map);
172 for (auto& ir: m_output)
173 ir.remap_registers(vmap1);
174
175 remap_shader_info(m_sh_info, register_map, temp_register_map);
176 }
177
178 bool ShaderFromNirProcessor::process_uniforms(nir_variable *uniform)
179 {
180 // m_uniform_type_map
181 m_uniform_type_map[uniform->data.location] = uniform->type;
182
183 if (uniform->type->contains_atomic()) {
184 int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE;
185 sh_info().nhwatomic += natomics;
186
187 if (uniform->type->is_array())
188 sh_info().indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
189
190 sh_info().uses_atomics = 1;
191
192 struct r600_shader_atomic& atom = sh_info().atomics[sh_info().nhwatomic_ranges];
193 ++sh_info().nhwatomic_ranges;
194 atom.buffer_id = uniform->data.binding;
195 atom.hw_idx = m_next_hwatomic_loc;
196 atom.start = m_next_hwatomic_loc;
197 atom.end = atom.start + natomics - 1;
198 m_next_hwatomic_loc = atom.end + 1;
199 //atom.array_id = uniform->type->is_array() ? 1 : 0;
200
201 m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] += atom.end - atom.start + 1;
202
203 sfn_log << SfnLog::io << "HW_ATOMIC file count: "
204 << m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] << "\n";
205 }
206
207 if (uniform->type->is_image() || uniform->data.mode == nir_var_mem_ssbo) {
208 sh_info().uses_images = 1;
209 }
210
211 return true;
212 }
213
214 bool ShaderFromNirProcessor::process_inputs(nir_variable *input)
215 {
216 return do_process_inputs(input);
217 }
218
219 bool ShaderFromNirProcessor::process_outputs(nir_variable *output)
220 {
221 return do_process_outputs(output);
222 }
223
224 void ShaderFromNirProcessor::add_array_deref(nir_deref_instr *instr)
225 {
226 nir_variable *var = nir_deref_instr_get_variable(instr);
227
228 assert(instr->mode == nir_var_function_temp);
229 assert(glsl_type_is_array(var->type));
230
231 // add an alias for the index to the register(s);
232
233
234 }
235
236 void ShaderFromNirProcessor::set_var_address(nir_deref_instr *instr)
237 {
238 auto& dest = instr->dest;
239 unsigned index = dest.is_ssa ? dest.ssa.index : dest.reg.reg->index;
240 m_var_mode[instr->var] = instr->mode;
241 m_var_derefs[index] = instr->var;
242
243 sfn_log << SfnLog::io << "Add var deref:" << index
244 << " with DDL:" << instr->var->data.driver_location << "\n";
245 }
246
247 void ShaderFromNirProcessor::evaluate_spi_sid(r600_shader_io& io)
248 {
249 switch (io.name) {
250 case TGSI_SEMANTIC_POSITION:
251 case TGSI_SEMANTIC_PSIZE:
252 case TGSI_SEMANTIC_EDGEFLAG:
253 case TGSI_SEMANTIC_FACE:
254 case TGSI_SEMANTIC_SAMPLEMASK:
255 case TGSI_SEMANTIC_CLIPVERTEX:
256 io.spi_sid = 0;
257 break;
258 case TGSI_SEMANTIC_GENERIC:
259 case TGSI_SEMANTIC_TEXCOORD:
260 case TGSI_SEMANTIC_PCOORD:
261 io.spi_sid = io.sid + 1;
262 break;
263 default:
264 /* For non-generic params - pack name and sid into 8 bits */
265 io.spi_sid = (0x80 | (io.name << 3) | io.sid) + 1;
266 }
267 }
268
269 const nir_variable *ShaderFromNirProcessor::get_deref_location(const nir_src& src) const
270 {
271 unsigned index = src.is_ssa ? src.ssa->index : src.reg.reg->index;
272
273 sfn_log << SfnLog::io << "Search for deref:" << index << "\n";
274
275 auto v = m_var_derefs.find(index);
276 if (v != m_var_derefs.end())
277 return v->second;
278
279 fprintf(stderr, "R600: could not find deref with index %d\n", index);
280
281 return nullptr;
282
283 /*nir_deref_instr *deref = nir_instr_as_deref(src.ssa->parent_instr);
284 return nir_deref_instr_get_variable(deref); */
285 }
286
287 bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr)
288 {
289 return m_tex_instr.emit(instr);
290 }
291
292 void ShaderFromNirProcessor::emit_instruction(Instruction *ir)
293 {
294 if (m_pending_else) {
295 append_block(-1);
296 m_output.back().emit(PInstruction(m_pending_else));
297 append_block(1);
298 m_pending_else = nullptr;
299 }
300
301 r600::sfn_log << SfnLog::instr << " as '" << *ir << "'\n";
302 if (m_output.empty())
303 append_block(0);
304
305 m_output.back().emit(Instruction::Pointer(ir));
306 }
307
308 void ShaderFromNirProcessor::emit_shader_start()
309 {
310 /* placeholder, may become an abstract method */
311 }
312
313 bool ShaderFromNirProcessor::emit_jump_instruction(nir_jump_instr *instr)
314 {
315 switch (instr->type) {
316 case nir_jump_break: {
317 auto b = new LoopBreakInstruction();
318 emit_instruction(b);
319 return true;
320 }
321 case nir_jump_continue: {
322 auto b = new LoopContInstruction();
323 emit_instruction(b);
324 return true;
325 }
326 default: {
327 nir_instr *i = reinterpret_cast<nir_instr*>(instr);
328 sfn_log << SfnLog::err << "Jump instrunction " << *i << " not supported\n";
329 return false;
330 }
331 }
332 return true;
333 }
334
335 bool ShaderFromNirProcessor::emit_alu_instruction(nir_instr* instr)
336 {
337 return m_alu_instr.emit(instr);
338 }
339
340 bool ShaderFromNirProcessor::emit_deref_instruction_override(UNUSED nir_deref_instr* instr)
341 {
342 return false;
343 }
344
345 bool ShaderFromNirProcessor::emit_loop_start(int loop_id)
346 {
347 LoopBeginInstruction *loop = new LoopBeginInstruction();
348 emit_instruction(loop);
349 m_loop_begin_block_map[loop_id] = loop;
350 append_block(1);
351 return true;
352 }
353 bool ShaderFromNirProcessor::emit_loop_end(int loop_id)
354 {
355 auto start = m_loop_begin_block_map.find(loop_id);
356 if (start == m_loop_begin_block_map.end()) {
357 sfn_log << SfnLog::err << "End loop: Loop start for "
358 << loop_id << " not found\n";
359 return false;
360 }
361 m_nesting_depth--;
362 m_block_number++;
363 m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number));
364 LoopEndInstruction *loop = new LoopEndInstruction(start->second);
365 emit_instruction(loop);
366
367 m_loop_begin_block_map.erase(start);
368 return true;
369 }
370
371 bool ShaderFromNirProcessor::emit_if_start(int if_id, nir_if *if_stmt)
372 {
373
374 auto value = from_nir(if_stmt->condition, 0, 0);
375 AluInstruction *pred = new AluInstruction(op2_pred_setne_int, PValue(new GPRValue(0,0)),
376 value, Value::zero, EmitInstruction::last);
377 pred->set_flag(alu_update_exec);
378 pred->set_flag(alu_update_pred);
379 pred->set_cf_type(cf_alu_push_before);
380
381 append_block(1);
382
383 IfInstruction *ir = new IfInstruction(pred);
384 emit_instruction(ir);
385 assert(m_if_block_start_map.find(if_id) == m_if_block_start_map.end());
386 m_if_block_start_map[if_id] = ir;
387 return true;
388 }
389
390 bool ShaderFromNirProcessor::emit_else_start(int if_id)
391 {
392 auto iif = m_if_block_start_map.find(if_id);
393 if (iif == m_if_block_start_map.end()) {
394 std::cerr << "Error: ELSE branch " << if_id << " without starting conditional branch\n";
395 return false;
396 }
397
398 if (iif->second->type() != Instruction::cond_if) {
399 std::cerr << "Error: ELSE branch " << if_id << " not started by an IF branch\n";
400 return false;
401 }
402 IfInstruction *if_instr = static_cast<IfInstruction *>(iif->second);
403 ElseInstruction *ir = new ElseInstruction(if_instr);
404 m_if_block_start_map[if_id] = ir;
405 m_pending_else = ir;
406
407 return true;
408 }
409
410 bool ShaderFromNirProcessor::emit_ifelse_end(int if_id)
411 {
412 auto ifelse = m_if_block_start_map.find(if_id);
413 if (ifelse == m_if_block_start_map.end()) {
414 std::cerr << "Error: ENDIF " << if_id << " without THEN or ELSE branch\n";
415 return false;
416 }
417
418 if (ifelse->second->type() != Instruction::cond_if &&
419 ifelse->second->type() != Instruction::cond_else) {
420 std::cerr << "Error: ENDIF " << if_id << " doesn't close an IF or ELSE branch\n";
421 return false;
422 }
423 /* Clear pending else, if the else branch was empty, non will be emitted */
424
425 m_pending_else = nullptr;
426
427 append_block(-1);
428 IfElseEndInstruction *ir = new IfElseEndInstruction();
429 emit_instruction(ir);
430
431 return true;
432 }
433
434 bool ShaderFromNirProcessor::emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset)
435 {
436 PValue src = get_temp_register();
437 emit_instruction(new AluInstruction(op1_mov, src, Value::zero, {alu_write, alu_last_instr}));
438
439 GPRVector dest = vec_from_nir(instr->dest, instr->num_components);
440 emit_instruction(new FetchTCSIOParam(dest, src, offset));
441
442 return true;
443
444 }
445
446 bool ShaderFromNirProcessor::emit_load_local_shared(nir_intrinsic_instr* instr)
447 {
448 auto address = varvec_from_nir(instr->src[0], instr->num_components);
449 auto dest_value = varvec_from_nir(instr->dest, instr->num_components);
450
451 emit_instruction(new LDSReadInstruction(address, dest_value));
452 return true;
453 }
454
455 bool ShaderFromNirProcessor::emit_store_local_shared(nir_intrinsic_instr* instr)
456 {
457 unsigned write_mask = nir_intrinsic_write_mask(instr);
458
459 auto address = from_nir(instr->src[1], 0);
460 int swizzle_base = (write_mask & 0x3) ? 0 : 2;
461 write_mask |= write_mask >> 2;
462
463 auto value = from_nir(instr->src[0], swizzle_base);
464 if (!(write_mask & 2)) {
465 emit_instruction(new LDSWriteInstruction(address, 0, value));
466 } else {
467 auto value1 = from_nir(instr->src[0], swizzle_base + 1);
468 emit_instruction(new LDSWriteInstruction(address, 0, value, value1));
469 }
470
471 return true;
472 }
473
474 bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* instr)
475 {
476 r600::sfn_log << SfnLog::instr << "emit '"
477 << *reinterpret_cast<nir_instr*>(instr)
478 << "' (" << __func__ << ")\n";
479
480 if (emit_intrinsic_instruction_override(instr))
481 return true;
482
483 switch (instr->intrinsic) {
484 case nir_intrinsic_load_deref: {
485 auto var = get_deref_location(instr->src[0]);
486 if (!var)
487 return false;
488 auto mode_helper = m_var_mode.find(var);
489 if (mode_helper == m_var_mode.end()) {
490 cerr << "r600-nir: variable '" << var->name << "' not found\n";
491 return false;
492 }
493 switch (mode_helper->second) {
494 case nir_var_shader_in:
495 return emit_load_input_deref(var, instr);
496 case nir_var_function_temp:
497 return emit_load_function_temp(var, instr);
498 default:
499 cerr << "r600-nir: Unsupported mode" << mode_helper->second
500 << "for src variable\n";
501 return false;
502 }
503 }
504 case nir_intrinsic_store_scratch:
505 return emit_store_scratch(instr);
506 case nir_intrinsic_load_scratch:
507 return emit_load_scratch(instr);
508 case nir_intrinsic_store_deref:
509 return emit_store_deref(instr);
510 case nir_intrinsic_load_uniform:
511 return reserve_uniform(instr);
512 case nir_intrinsic_discard:
513 case nir_intrinsic_discard_if:
514 return emit_discard_if(instr);
515 case nir_intrinsic_load_ubo_r600:
516 return emit_load_ubo(instr);
517 case nir_intrinsic_atomic_counter_add:
518 case nir_intrinsic_atomic_counter_and:
519 case nir_intrinsic_atomic_counter_exchange:
520 case nir_intrinsic_atomic_counter_max:
521 case nir_intrinsic_atomic_counter_min:
522 case nir_intrinsic_atomic_counter_or:
523 case nir_intrinsic_atomic_counter_xor:
524 case nir_intrinsic_atomic_counter_comp_swap:
525 case nir_intrinsic_atomic_counter_read:
526 case nir_intrinsic_atomic_counter_post_dec:
527 case nir_intrinsic_atomic_counter_inc:
528 case nir_intrinsic_atomic_counter_pre_dec:
529 case nir_intrinsic_store_ssbo:
530 m_sel.info.writes_memory = true;
531 /* fallthrough */
532 case nir_intrinsic_load_ssbo:
533 return m_ssbo_instr.emit(&instr->instr);
534 break;
535 case nir_intrinsic_copy_deref:
536 case nir_intrinsic_load_constant:
537 case nir_intrinsic_load_input:
538 case nir_intrinsic_store_output:
539 case nir_intrinsic_load_tcs_in_param_base_r600:
540 return emit_load_tcs_param_base(instr, 0);
541 case nir_intrinsic_load_tcs_out_param_base_r600:
542 return emit_load_tcs_param_base(instr, 16);
543 case nir_intrinsic_load_local_shared_r600:
544 return emit_load_local_shared(instr);
545 case nir_intrinsic_store_local_shared_r600:
546 return emit_store_local_shared(instr);
547 case nir_intrinsic_control_barrier:
548 case nir_intrinsic_memory_barrier_tcs_patch:
549 return emit_barrier(instr);
550
551 default:
552 fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic);
553 return false;
554 }
555 return false;
556 }
557
558 bool ShaderFromNirProcessor::emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr* instr)
559 {
560 return false;
561 }
562
563 bool
564 ShaderFromNirProcessor::emit_load_function_temp(UNUSED const nir_variable *var, UNUSED nir_intrinsic_instr *instr)
565 {
566 return false;
567 }
568
569 bool ShaderFromNirProcessor::emit_barrier(UNUSED nir_intrinsic_instr* instr)
570 {
571 AluInstruction *ir = new AluInstruction(op0_group_barrier);
572 ir->set_flag(alu_last_instr);
573 emit_instruction(ir);
574 return true;
575 }
576
577
578 bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest& dest, int chan, PValue value, bool as_last)
579 {
580 if (!dest.is_ssa) {
581 auto ir = new AluInstruction(op1_mov, from_nir(dest, 0), value, {alu_write});
582 if (as_last)
583 ir->set_flag(alu_last_instr);
584 emit_instruction(ir);
585 } else {
586 inject_register(dest.ssa.index, chan, value, true);
587 }
588 return true;
589 }
590
591 bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr* instr)
592 {
593 PValue address = from_nir(instr->src[1], 0, 0);
594
595 auto value = vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1,
596 swizzle_from_comps(instr->num_components));
597
598 int writemask = nir_intrinsic_write_mask(instr);
599 int align = nir_intrinsic_align_mul(instr);
600 int align_offset = nir_intrinsic_align_offset(instr);
601
602 WriteScratchInstruction *ir = nullptr;
603 if (address->type() == Value::literal) {
604 const auto& lv = static_cast<const LiteralValue&>(*address);
605 ir = new WriteScratchInstruction(lv.value(), value, align, align_offset, writemask);
606 } else {
607 address = from_nir_with_fetch_constant(instr->src[1], 0);
608 ir = new WriteScratchInstruction(address, value, align, align_offset,
609 writemask, m_scratch_size);
610 }
611 emit_instruction(ir);
612 sh_info().needs_scratch_space = 1;
613 return true;
614 }
615
616 bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr* instr)
617 {
618 PValue address = from_nir_with_fetch_constant(instr->src[0], 0);
619 std::array<PValue, 4> dst_val;
620 for (int i = 0; i < 4; ++i)
621 dst_val[i] = from_nir(instr->dest, i < instr->num_components ? i : 7);
622
623 GPRVector dst(dst_val);
624 auto ir = new LoadFromScratch(dst, address, m_scratch_size);
625 ir->prelude_append(new WaitAck(0));
626 emit_instruction(ir);
627 sh_info().needs_scratch_space = 1;
628 return true;
629 }
630
631 GPRVector ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src,
632 unsigned mask,
633 const GPRVector::Swizzle& swizzle,
634 bool match)
635 {
636 bool use_same = true;
637 GPRVector::Values v;
638
639 for (int i = 0; i < 4 && use_same; ++i) {
640 if ((1 << i) & mask) {
641 if (swizzle[i] < 4) {
642 v[i] = from_nir(src, swizzle[i]);
643 assert(v[i]);
644 if (v[i]->type() != Value::gpr)
645 use_same = false;
646 if (match && (v[i]->chan() != swizzle[i]))
647 use_same = false;
648 }
649 }
650 }
651
652 if (use_same) {
653 int i = 0;
654 while (!v[i] && i < 4) ++i;
655 assert(i < 4);
656
657 unsigned sel = v[i]->sel();
658 for (i = 0; i < 4 && use_same; ++i) {
659 if (!v[i])
660 v[i] = PValue(new GPRValue(sel, swizzle[i]));
661 else
662 use_same &= v[i]->sel() == sel;
663 }
664 }
665
666 if (!use_same) {
667 AluInstruction *ir = nullptr;
668 int sel = allocate_temp_register();
669 for (int i = 0; i < 4; ++i) {
670 v[i] = PValue(new GPRValue(sel, swizzle[i]));
671 if (swizzle[i] < 4 && (mask & (1 << i))) {
672 ir = new AluInstruction(op1_mov, v[i], from_nir(src, swizzle[i]),
673 EmitInstruction::write);
674 emit_instruction(ir);
675 }
676 }
677 if (ir)
678 ir->set_flag(alu_last_instr);
679 }
680 return GPRVector(v);;
681 }
682
683 bool ShaderFromNirProcessor::emit_load_ubo(nir_intrinsic_instr* instr)
684 {
685 nir_src& src0 = instr->src[0];
686 nir_src& src1 = instr->src[1];
687
688 int sel_bufid_reg = src0.is_ssa ? src0.ssa->index : src0.reg.reg->index;
689 const nir_load_const_instr* literal0 = get_literal_constant(sel_bufid_reg);
690
691 int ofs_reg = src1.is_ssa ? src1.ssa->index : src1.reg.reg->index;
692 const nir_load_const_instr* literal1 = get_literal_constant(ofs_reg);
693 if (literal0) {
694 if (literal1) {
695 uint bufid = literal0->value[0].u32;
696 uint buf_ofs = literal1->value[0].u32 >> 4;
697 int buf_cmp = ((literal1->value[0].u32 >> 2) & 3);
698 AluInstruction *ir = nullptr;
699 for (int i = 0; i < instr->num_components; ++i) {
700 int cmp = buf_cmp + i;
701 assert(cmp < 4);
702 auto u = PValue(new UniformValue(512 + buf_ofs, cmp, bufid + 1));
703 if (instr->dest.is_ssa)
704 add_uniform((instr->dest.ssa.index << 2) + i, u);
705 else {
706 ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
707 emit_instruction(ir);
708 }
709 }
710 if (ir)
711 ir->set_flag(alu_last_instr);
712 return true;
713
714 } else {
715 /* literal0 is lost ...*/
716 return load_uniform_indirect(instr, from_nir(instr->src[1], 0, 0), 0, literal0->value[0].u32 + 1);
717 }
718 } else {
719 /* TODO: This can also be solved by using the CF indes on the ALU block, and
720 * this would probably make sense when there are more then one loads with
721 * the same buffer ID. */
722 PValue bufid = from_nir(instr->src[0], 0, 0);
723 PValue addr = from_nir_with_fetch_constant(instr->src[1], 0);
724 GPRVector trgt;
725 for (int i = 0; i < 4; ++i)
726 trgt.set_reg_i(i, from_nir(instr->dest, i));
727
728 auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
729 1, bufid, bim_zero);
730
731 emit_instruction(ir);
732 for (int i = 0; i < instr->num_components ; ++i) {
733 add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
734 }
735 m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
736 return true;
737 }
738
739 }
740
741 bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr)
742 {
743 r600::sfn_log << SfnLog::instr << "emit '"
744 << *reinterpret_cast<nir_instr*>(instr)
745 << "' (" << __func__ << ")\n";
746
747 if (instr->intrinsic == nir_intrinsic_discard_if) {
748 emit_instruction(new AluInstruction(op2_killne_int, PValue(new GPRValue(0,0)),
749 {from_nir(instr->src[0], 0, 0), Value::zero}, {alu_last_instr}));
750
751 } else {
752 emit_instruction(new AluInstruction(op2_kille, PValue(new GPRValue(0,0)),
753 {Value::zero, Value::zero}, {alu_last_instr}));
754 }
755 m_sh_info.uses_kill = 1;
756 return true;
757 }
758
759 bool ShaderFromNirProcessor::emit_load_input_deref(const nir_variable *var,
760 nir_intrinsic_instr* instr)
761 {
762 return do_emit_load_deref(var, instr);
763 }
764
765 bool ShaderFromNirProcessor::reserve_uniform(nir_intrinsic_instr* instr)
766 {
767 r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
768 << *reinterpret_cast<nir_instr*>(instr)
769 << "'\n";
770
771
772 /* If the target register is a SSA register and the loading is not
773 * indirect then we can do lazy loading, i.e. the uniform value can
774 * be used directly. Otherwise we have to load the data for real
775 * rigt away.
776 */
777
778 /* Try to find the literal that defines the array index */
779 const nir_load_const_instr* literal = nullptr;
780 if (instr->src[0].is_ssa)
781 literal = get_literal_constant(instr->src[0].ssa->index);
782
783 int base = nir_intrinsic_base(instr);
784 if (literal) {
785 AluInstruction *ir = nullptr;
786
787 for (int i = 0; i < instr->num_components ; ++i) {
788 PValue u = PValue(new UniformValue(512 + literal->value[0].u32 + base, i));
789 sfn_log << SfnLog::io << "uniform "
790 << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
791
792 if (instr->dest.is_ssa)
793 add_uniform((instr->dest.ssa.index << 2) + i, u);
794 else {
795 ir = new AluInstruction(op1_mov, from_nir(instr->dest, i),
796 u, {alu_write});
797 emit_instruction(ir);
798 }
799 }
800 if (ir)
801 ir->set_flag(alu_last_instr);
802 } else {
803 PValue addr = from_nir(instr->src[0], 0, 0);
804 return load_uniform_indirect(instr, addr, 16 * base, 0);
805 }
806 return true;
807 }
808
809 bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid)
810 {
811 if (!addr) {
812 std::cerr << "r600-nir: don't know how uniform is addressed\n";
813 return false;
814 }
815
816 GPRVector trgt;
817 for (int i = 0; i < 4; ++i)
818 trgt.set_reg_i(i, from_nir(instr->dest, i));
819
820 if (addr->type() != Value::gpr) {
821 emit_instruction(op1_mov, trgt.reg_i(0), {addr}, {alu_write, alu_last_instr});
822 addr = trgt.reg_i(0);
823 }
824
825 /* FIXME: buffer index and index mode are not set correctly */
826 auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest,
827 bufferid, PValue(), bim_none);
828 emit_instruction(ir);
829 for (int i = 0; i < instr->num_components ; ++i) {
830 add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
831 }
832 m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
833 return true;
834 }
835
836 AluInstruction *ShaderFromNirProcessor::emit_load_literal(const nir_load_const_instr * literal, const nir_src& src, unsigned writemask)
837 {
838 AluInstruction *ir = nullptr;
839 for (int i = 0; i < literal->def.num_components ; ++i) {
840 if (writemask & (1 << i)){
841 PValue lsrc;
842 switch (literal->def.bit_size) {
843
844 case 1:
845 sfn_log << SfnLog::reg << "Got literal of bit size 1\n";
846 lsrc = literal->value[i].b ?
847 PValue(new LiteralValue( 0xffffffff, i)) :
848 Value::zero;
849 break;
850 case 32:
851 sfn_log << SfnLog::reg << "Got literal of bit size 32\n";
852 if (literal->value[i].u32 == 0)
853 lsrc = Value::zero;
854 else if (literal->value[i].u32 == 1)
855 lsrc = Value::one_i;
856 else if (literal->value[i].f32 == 1.0f)
857 lsrc = Value::one_f;
858 else if (literal->value[i].f32 == 0.5f)
859 lsrc = Value::zero_dot_5;
860 else
861 lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
862 break;
863 default:
864 sfn_log << SfnLog::reg << "Got literal of bit size " << literal->def.bit_size
865 << " falling back to 32 bit\n";
866 lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
867 }
868 ir = new AluInstruction(op1_mov, create_register_from_nir_src(src, i), lsrc, EmitInstruction::write);
869
870 emit_instruction(ir);
871 }
872 }
873 return ir;
874 }
875
876 PValue ShaderFromNirProcessor::from_nir_with_fetch_constant(const nir_src& src, unsigned component)
877 {
878 PValue value = from_nir(src, component);
879 if (value->type() != Value::gpr &&
880 value->type() != Value::gpr_vector &&
881 value->type() != Value::gpr_array_value) {
882 PValue retval = get_temp_register();
883 emit_instruction(new AluInstruction(op1_mov, retval, value,
884 EmitInstruction::last_write));
885 value = retval;
886 }
887 return value;
888 }
889
890 bool ShaderFromNirProcessor::emit_store_deref(nir_intrinsic_instr* instr)
891 {
892 auto out_var = get_deref_location(instr->src[0]);
893 if (!out_var)
894 return false;
895
896 return do_emit_store_deref(out_var, instr);
897 }
898
899 bool ShaderFromNirProcessor::emit_deref_instruction(nir_deref_instr* instr)
900 {
901 r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
902 << *reinterpret_cast<nir_instr*>(instr)
903 << "'\n";
904
905 /* Give the specific shader type a chance to process this, i.e. Geometry and
906 * tesselation shaders need specialized deref_array, for the other shaders
907 * it is lowered.
908 */
909 if (emit_deref_instruction_override(instr))
910 return true;
911
912 switch (instr->deref_type) {
913 case nir_deref_type_var:
914 set_var_address(instr);
915 return true;
916 case nir_deref_type_array:
917 case nir_deref_type_array_wildcard:
918 case nir_deref_type_struct:
919 case nir_deref_type_cast:
920 default:
921 fprintf(stderr, "R600: deref type %d not supported\n", instr->deref_type);
922 }
923 return false;
924 }
925
926 void ShaderFromNirProcessor::load_uniform(const nir_alu_src &src)
927 {
928 AluInstruction *ir = nullptr;
929 PValue sv[4];
930
931 assert(src.src.is_ssa);
932
933 for (int i = 0; i < src.src.ssa->num_components ; ++i) {
934 unsigned uindex = (src.src.ssa->index << 2) + i;
935 sv[i] = uniform(uindex);
936 assert(sv[i]);
937 }
938
939 for (int i = 0; i < src.src.ssa->num_components ; ++i) {
940 ir = new AluInstruction(op1_mov, create_register_from_nir_src(src.src, i), sv[i],
941 EmitInstruction::write);
942 emit_instruction(ir);
943 }
944 if (ir)
945 ir->set_flag(alu_last_instr);
946 }
947
948
949
950 bool ShaderFromNirProcessor::emit_instruction(EAluOp opcode, PValue dest,
951 std::vector<PValue> srcs,
952 const std::set<AluModifiers>& m_flags)
953 {
954 AluInstruction *ir = new AluInstruction(opcode, dest, srcs, m_flags);
955 emit_instruction(ir);
956 return true;
957 }
958
959 void ShaderFromNirProcessor::add_param_output_reg(int loc, const GPRVector *gpr)
960 {
961 m_output_register_map[loc] = gpr;
962 }
963
964 void ShaderFromNirProcessor::emit_export_instruction(WriteoutInstruction *ir)
965 {
966 r600::sfn_log << SfnLog::instr << " as '" << *ir << "'\n";
967 m_export_output.emit(PInstruction(ir));
968 }
969
970 const GPRVector * ShaderFromNirProcessor::output_register(unsigned location) const
971 {
972 const GPRVector *retval = nullptr;
973 auto val = m_output_register_map.find(location);
974 if (val != m_output_register_map.end())
975 retval = val->second;
976 return retval;
977 }
978
979 void ShaderFromNirProcessor::set_input(unsigned pos, PValue var)
980 {
981 r600::sfn_log << SfnLog::io << "Set input[" << pos << "] =" << *var << "\n";
982 m_inputs[pos] = var;
983 }
984
985 void ShaderFromNirProcessor::set_output(unsigned pos, PValue var)
986 {
987 r600::sfn_log << SfnLog::io << "Set output[" << pos << "] =" << *var << "\n";
988 m_outputs[pos] = var;
989 }
990
991 void ShaderFromNirProcessor::append_block(int nesting_change)
992 {
993 m_nesting_depth += nesting_change;
994 m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number++));
995 }
996
997 void ShaderFromNirProcessor::finalize()
998 {
999 do_finalize();
1000
1001 for (auto& i : m_inputs)
1002 m_sh_info.input[i.first].gpr = i.second->sel();
1003
1004 for (auto& i : m_outputs)
1005 m_sh_info.output[i.first].gpr = i.second->sel();
1006
1007 m_output.push_back(m_export_output);
1008 }
1009
1010 }