r600/sfn: Make 3vec loads skip possible moves
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_shader_base.cpp
1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "../r600_pipe.h"
28 #include "../r600_shader.h"
29 #include "sfn_shader_vertex.h"
30
31 #include "sfn_shader_compute.h"
32 #include "sfn_shader_fragment.h"
33 #include "sfn_shader_geometry.h"
34 #include "sfn_liverange.h"
35 #include "sfn_ir_to_assembly.h"
36 #include "sfn_nir.h"
37 #include "sfn_instruction_misc.h"
38 #include "sfn_instruction_fetch.h"
39 #include "sfn_instruction_lds.h"
40
41 #include <iostream>
42
43 #define ENABLE_DEBUG 1
44
45 #ifdef ENABLE_DEBUG
46 #define DEBUG_SFN(X) \
47 do {\
48 X; \
49 } while (0)
50 #else
51 #define DEBUG_SFN(X)
52 #endif
53
54 namespace r600 {
55
56 using namespace std;
57
58
59 ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype,
60 r600_pipe_shader_selector& sel,
61 r600_shader &sh_info, int scratch_size,
62 enum chip_class chip_class):
63 m_processor_type(ptype),
64 m_nesting_depth(0),
65 m_block_number(0),
66 m_export_output(0, -1),
67 m_sh_info(sh_info),
68 m_chip_class(chip_class),
69 m_tex_instr(*this),
70 m_alu_instr(*this),
71 m_ssbo_instr(*this),
72 m_pending_else(nullptr),
73 m_scratch_size(scratch_size),
74 m_next_hwatomic_loc(0),
75 m_sel(sel)
76 {
77 m_sh_info.processor_type = ptype;
78 }
79
80
81 ShaderFromNirProcessor::~ShaderFromNirProcessor()
82 {
83 }
84
85 bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr)
86 {
87 switch (instr->type) {
88 case nir_instr_type_tex: {
89 nir_tex_instr *t = nir_instr_as_tex(instr);
90 if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF)
91 sh_info().uses_tex_buffers = true;
92 }
93 default:
94 ;
95 }
96
97 return scan_sysvalue_access(instr);
98 }
99
100 enum chip_class ShaderFromNirProcessor::get_chip_class(void) const
101 {
102 return m_chip_class;
103 }
104
105 bool ShaderFromNirProcessor::allocate_reserved_registers()
106 {
107 bool retval = do_allocate_reserved_registers();
108 return retval;
109 }
110
111 static void remap_shader_info(r600_shader& sh_info,
112 std::vector<rename_reg_pair>& map,
113 UNUSED ValueMap& values)
114 {
115 for (unsigned i = 0; i < sh_info.ninput; ++i) {
116 sfn_log << SfnLog::merge << "Input " << i << " gpr:" << sh_info.input[i].gpr
117 << " of map.size()\n";
118
119 assert(sh_info.input[i].gpr < map.size());
120 auto new_index = map[sh_info.input[i].gpr];
121 if (new_index.valid)
122 sh_info.input[i].gpr = new_index.new_reg;
123 map[sh_info.input[i].gpr].used = true;
124 }
125
126 for (unsigned i = 0; i < sh_info.noutput; ++i) {
127 assert(sh_info.output[i].gpr < map.size());
128 auto new_index = map[sh_info.output[i].gpr];
129 if (new_index.valid)
130 sh_info.output[i].gpr = new_index.new_reg;
131 map[sh_info.output[i].gpr].used = true;
132 }
133 }
134
135 void ShaderFromNirProcessor::remap_registers()
136 {
137 // register renumbering
138 auto rc = register_count();
139 if (!rc)
140 return;
141
142 std::vector<register_live_range> register_live_ranges(rc);
143
144 auto temp_register_map = get_temp_registers();
145
146 Shader sh{m_output, temp_register_map};
147 LiverangeEvaluator().run(sh, register_live_ranges);
148 auto register_map = get_temp_registers_remapping(register_live_ranges);
149
150 sfn_log << SfnLog::merge << "=========Mapping===========\n";
151 for (size_t i = 0; i < register_map.size(); ++i)
152 if (register_map[i].valid)
153 sfn_log << SfnLog::merge << "Map:" << i << " -> " << register_map[i].new_reg << "\n";
154
155 ValueRemapper vmap0(register_map, temp_register_map);
156 for (auto& block: m_output)
157 block.remap_registers(vmap0);
158
159 remap_shader_info(m_sh_info, register_map, temp_register_map);
160
161 /* Mark inputs as used registers, these registers should no be remapped */
162 for (auto& v: sh.m_temp) {
163 if (v.second->type() == Value::gpr) {
164 const auto& g = static_cast<const GPRValue&>(*v.second);
165 if (g.is_input())
166 register_map[g.sel()].used = true;
167 }
168 }
169
170 int new_index = 0;
171 for (auto& i : register_map) {
172 i.valid = i.used;
173 if (i.used)
174 i.new_reg = new_index++;
175 }
176
177 ValueRemapper vmap1(register_map, temp_register_map);
178 for (auto& ir: m_output)
179 ir.remap_registers(vmap1);
180
181 remap_shader_info(m_sh_info, register_map, temp_register_map);
182 }
183
184 bool ShaderFromNirProcessor::process_uniforms(nir_variable *uniform)
185 {
186 // m_uniform_type_map
187 m_uniform_type_map[uniform->data.location] = uniform->type;
188
189 if (uniform->type->contains_atomic()) {
190 int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE;
191 sh_info().nhwatomic += natomics;
192
193 if (uniform->type->is_array())
194 sh_info().indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
195
196 sh_info().uses_atomics = 1;
197
198 struct r600_shader_atomic& atom = sh_info().atomics[sh_info().nhwatomic_ranges];
199 ++sh_info().nhwatomic_ranges;
200 atom.buffer_id = uniform->data.binding;
201 atom.hw_idx = m_next_hwatomic_loc;
202 atom.start = m_next_hwatomic_loc;
203 atom.end = atom.start + natomics - 1;
204 m_next_hwatomic_loc = atom.end + 1;
205 //atom.array_id = uniform->type->is_array() ? 1 : 0;
206
207 m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] += atom.end - atom.start + 1;
208
209 sfn_log << SfnLog::io << "HW_ATOMIC file count: "
210 << m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] << "\n";
211 }
212
213 if (uniform->type->is_image() || uniform->data.mode == nir_var_mem_ssbo) {
214 sh_info().uses_images = 1;
215 }
216
217 return true;
218 }
219
220 bool ShaderFromNirProcessor::process_inputs(nir_variable *input)
221 {
222 return do_process_inputs(input);
223 }
224
225 bool ShaderFromNirProcessor::process_outputs(nir_variable *output)
226 {
227 return do_process_outputs(output);
228 }
229
230 void ShaderFromNirProcessor::add_array_deref(nir_deref_instr *instr)
231 {
232 nir_variable *var = nir_deref_instr_get_variable(instr);
233
234 assert(instr->mode == nir_var_function_temp);
235 assert(glsl_type_is_array(var->type));
236
237 // add an alias for the index to the register(s);
238
239
240 }
241
242 void ShaderFromNirProcessor::set_var_address(nir_deref_instr *instr)
243 {
244 auto& dest = instr->dest;
245 unsigned index = dest.is_ssa ? dest.ssa.index : dest.reg.reg->index;
246 m_var_mode[instr->var] = instr->mode;
247 m_var_derefs[index] = instr->var;
248
249 sfn_log << SfnLog::io << "Add var deref:" << index
250 << " with DDL:" << instr->var->data.driver_location << "\n";
251 }
252
253 void ShaderFromNirProcessor::evaluate_spi_sid(r600_shader_io& io)
254 {
255 switch (io.name) {
256 case TGSI_SEMANTIC_POSITION:
257 case TGSI_SEMANTIC_PSIZE:
258 case TGSI_SEMANTIC_EDGEFLAG:
259 case TGSI_SEMANTIC_FACE:
260 case TGSI_SEMANTIC_SAMPLEMASK:
261 case TGSI_SEMANTIC_CLIPVERTEX:
262 io.spi_sid = 0;
263 break;
264 case TGSI_SEMANTIC_GENERIC:
265 case TGSI_SEMANTIC_TEXCOORD:
266 case TGSI_SEMANTIC_PCOORD:
267 io.spi_sid = io.sid + 1;
268 break;
269 default:
270 /* For non-generic params - pack name and sid into 8 bits */
271 io.spi_sid = (0x80 | (io.name << 3) | io.sid) + 1;
272 }
273 }
274
275 const nir_variable *ShaderFromNirProcessor::get_deref_location(const nir_src& src) const
276 {
277 unsigned index = src.is_ssa ? src.ssa->index : src.reg.reg->index;
278
279 sfn_log << SfnLog::io << "Search for deref:" << index << "\n";
280
281 auto v = m_var_derefs.find(index);
282 if (v != m_var_derefs.end())
283 return v->second;
284
285 fprintf(stderr, "R600: could not find deref with index %d\n", index);
286
287 return nullptr;
288
289 /*nir_deref_instr *deref = nir_instr_as_deref(src.ssa->parent_instr);
290 return nir_deref_instr_get_variable(deref); */
291 }
292
293 bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr)
294 {
295 return m_tex_instr.emit(instr);
296 }
297
298 void ShaderFromNirProcessor::emit_instruction(Instruction *ir)
299 {
300 if (m_pending_else) {
301 append_block(-1);
302 m_output.back().emit(PInstruction(m_pending_else));
303 append_block(1);
304 m_pending_else = nullptr;
305 }
306
307 r600::sfn_log << SfnLog::instr << " as '" << *ir << "'\n";
308 if (m_output.empty())
309 append_block(0);
310
311 m_output.back().emit(Instruction::Pointer(ir));
312 }
313
314 void ShaderFromNirProcessor::emit_shader_start()
315 {
316 /* placeholder, may become an abstract method */
317 }
318
319 bool ShaderFromNirProcessor::emit_jump_instruction(nir_jump_instr *instr)
320 {
321 switch (instr->type) {
322 case nir_jump_break: {
323 auto b = new LoopBreakInstruction();
324 emit_instruction(b);
325 return true;
326 }
327 case nir_jump_continue: {
328 auto b = new LoopContInstruction();
329 emit_instruction(b);
330 return true;
331 }
332 default: {
333 nir_instr *i = reinterpret_cast<nir_instr*>(instr);
334 sfn_log << SfnLog::err << "Jump instrunction " << *i << " not supported\n";
335 return false;
336 }
337 }
338 return true;
339 }
340
341 bool ShaderFromNirProcessor::emit_alu_instruction(nir_instr* instr)
342 {
343 return m_alu_instr.emit(instr);
344 }
345
346 bool ShaderFromNirProcessor::emit_deref_instruction_override(UNUSED nir_deref_instr* instr)
347 {
348 return false;
349 }
350
351 bool ShaderFromNirProcessor::emit_loop_start(int loop_id)
352 {
353 LoopBeginInstruction *loop = new LoopBeginInstruction();
354 emit_instruction(loop);
355 m_loop_begin_block_map[loop_id] = loop;
356 append_block(1);
357 return true;
358 }
359 bool ShaderFromNirProcessor::emit_loop_end(int loop_id)
360 {
361 auto start = m_loop_begin_block_map.find(loop_id);
362 if (start == m_loop_begin_block_map.end()) {
363 sfn_log << SfnLog::err << "End loop: Loop start for "
364 << loop_id << " not found\n";
365 return false;
366 }
367 m_nesting_depth--;
368 m_block_number++;
369 m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number));
370 LoopEndInstruction *loop = new LoopEndInstruction(start->second);
371 emit_instruction(loop);
372
373 m_loop_begin_block_map.erase(start);
374 return true;
375 }
376
377 bool ShaderFromNirProcessor::emit_if_start(int if_id, nir_if *if_stmt)
378 {
379
380 auto value = from_nir(if_stmt->condition, 0, 0);
381 AluInstruction *pred = new AluInstruction(op2_pred_setne_int, PValue(new GPRValue(0,0)),
382 value, Value::zero, EmitInstruction::last);
383 pred->set_flag(alu_update_exec);
384 pred->set_flag(alu_update_pred);
385 pred->set_cf_type(cf_alu_push_before);
386
387 append_block(1);
388
389 IfInstruction *ir = new IfInstruction(pred);
390 emit_instruction(ir);
391 assert(m_if_block_start_map.find(if_id) == m_if_block_start_map.end());
392 m_if_block_start_map[if_id] = ir;
393 return true;
394 }
395
396 bool ShaderFromNirProcessor::emit_else_start(int if_id)
397 {
398 auto iif = m_if_block_start_map.find(if_id);
399 if (iif == m_if_block_start_map.end()) {
400 std::cerr << "Error: ELSE branch " << if_id << " without starting conditional branch\n";
401 return false;
402 }
403
404 if (iif->second->type() != Instruction::cond_if) {
405 std::cerr << "Error: ELSE branch " << if_id << " not started by an IF branch\n";
406 return false;
407 }
408 IfInstruction *if_instr = static_cast<IfInstruction *>(iif->second);
409 ElseInstruction *ir = new ElseInstruction(if_instr);
410 m_if_block_start_map[if_id] = ir;
411 m_pending_else = ir;
412
413 return true;
414 }
415
416 bool ShaderFromNirProcessor::emit_ifelse_end(int if_id)
417 {
418 auto ifelse = m_if_block_start_map.find(if_id);
419 if (ifelse == m_if_block_start_map.end()) {
420 std::cerr << "Error: ENDIF " << if_id << " without THEN or ELSE branch\n";
421 return false;
422 }
423
424 if (ifelse->second->type() != Instruction::cond_if &&
425 ifelse->second->type() != Instruction::cond_else) {
426 std::cerr << "Error: ENDIF " << if_id << " doesn't close an IF or ELSE branch\n";
427 return false;
428 }
429 /* Clear pending else, if the else branch was empty, non will be emitted */
430
431 m_pending_else = nullptr;
432
433 append_block(-1);
434 IfElseEndInstruction *ir = new IfElseEndInstruction();
435 emit_instruction(ir);
436
437 return true;
438 }
439
440 bool ShaderFromNirProcessor::emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset)
441 {
442 PValue src = get_temp_register();
443 emit_instruction(new AluInstruction(op1_mov, src, Value::zero, {alu_write, alu_last_instr}));
444
445 GPRVector dest = vec_from_nir(instr->dest, instr->num_components);
446 emit_instruction(new FetchTCSIOParam(dest, src, offset));
447
448 return true;
449
450 }
451
452 bool ShaderFromNirProcessor::emit_load_local_shared(nir_intrinsic_instr* instr)
453 {
454 auto address = varvec_from_nir(instr->src[0], instr->num_components);
455 auto dest_value = varvec_from_nir(instr->dest, instr->num_components);
456
457 emit_instruction(new LDSReadInstruction(address, dest_value));
458 return true;
459 }
460
461 bool ShaderFromNirProcessor::emit_store_local_shared(nir_intrinsic_instr* instr)
462 {
463 unsigned write_mask = nir_intrinsic_write_mask(instr);
464
465 auto address = from_nir(instr->src[1], 0);
466 int swizzle_base = (write_mask & 0x3) ? 0 : 2;
467 write_mask |= write_mask >> 2;
468
469 auto value = from_nir(instr->src[0], swizzle_base);
470 if (!(write_mask & 2)) {
471 emit_instruction(new LDSWriteInstruction(address, 0, value));
472 } else {
473 auto value1 = from_nir(instr->src[0], swizzle_base + 1);
474 emit_instruction(new LDSWriteInstruction(address, 0, value, value1));
475 }
476
477 return true;
478 }
479
480 bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* instr)
481 {
482 r600::sfn_log << SfnLog::instr << "emit '"
483 << *reinterpret_cast<nir_instr*>(instr)
484 << "' (" << __func__ << ")\n";
485
486 if (emit_intrinsic_instruction_override(instr))
487 return true;
488
489 switch (instr->intrinsic) {
490 case nir_intrinsic_load_deref: {
491 auto var = get_deref_location(instr->src[0]);
492 if (!var)
493 return false;
494 auto mode_helper = m_var_mode.find(var);
495 if (mode_helper == m_var_mode.end()) {
496 cerr << "r600-nir: variable '" << var->name << "' not found\n";
497 return false;
498 }
499 switch (mode_helper->second) {
500 case nir_var_shader_in:
501 return emit_load_input_deref(var, instr);
502 case nir_var_function_temp:
503 return emit_load_function_temp(var, instr);
504 default:
505 cerr << "r600-nir: Unsupported mode" << mode_helper->second
506 << "for src variable\n";
507 return false;
508 }
509 }
510 case nir_intrinsic_store_scratch:
511 return emit_store_scratch(instr);
512 case nir_intrinsic_load_scratch:
513 return emit_load_scratch(instr);
514 case nir_intrinsic_store_deref:
515 return emit_store_deref(instr);
516 case nir_intrinsic_load_uniform:
517 return reserve_uniform(instr);
518 case nir_intrinsic_discard:
519 case nir_intrinsic_discard_if:
520 return emit_discard_if(instr);
521 case nir_intrinsic_load_ubo_r600:
522 return emit_load_ubo(instr);
523 case nir_intrinsic_atomic_counter_add:
524 case nir_intrinsic_atomic_counter_and:
525 case nir_intrinsic_atomic_counter_exchange:
526 case nir_intrinsic_atomic_counter_max:
527 case nir_intrinsic_atomic_counter_min:
528 case nir_intrinsic_atomic_counter_or:
529 case nir_intrinsic_atomic_counter_xor:
530 case nir_intrinsic_atomic_counter_comp_swap:
531 case nir_intrinsic_atomic_counter_read:
532 case nir_intrinsic_atomic_counter_post_dec:
533 case nir_intrinsic_atomic_counter_inc:
534 case nir_intrinsic_atomic_counter_pre_dec:
535 case nir_intrinsic_store_ssbo:
536 m_sel.info.writes_memory = true;
537 /* fallthrough */
538 case nir_intrinsic_load_ssbo:
539 return m_ssbo_instr.emit(&instr->instr);
540 break;
541 case nir_intrinsic_copy_deref:
542 case nir_intrinsic_load_constant:
543 case nir_intrinsic_load_input:
544 case nir_intrinsic_store_output:
545 case nir_intrinsic_load_tcs_in_param_base_r600:
546 return emit_load_tcs_param_base(instr, 0);
547 case nir_intrinsic_load_tcs_out_param_base_r600:
548 return emit_load_tcs_param_base(instr, 16);
549 case nir_intrinsic_load_local_shared_r600:
550 return emit_load_local_shared(instr);
551 case nir_intrinsic_store_local_shared_r600:
552 return emit_store_local_shared(instr);
553 case nir_intrinsic_control_barrier:
554 case nir_intrinsic_memory_barrier_tcs_patch:
555 return emit_barrier(instr);
556
557 default:
558 fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic);
559 return false;
560 }
561 return false;
562 }
563
564 bool ShaderFromNirProcessor::emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr* instr)
565 {
566 return false;
567 }
568
569 bool
570 ShaderFromNirProcessor::emit_load_function_temp(UNUSED const nir_variable *var, UNUSED nir_intrinsic_instr *instr)
571 {
572 return false;
573 }
574
575 bool ShaderFromNirProcessor::emit_barrier(UNUSED nir_intrinsic_instr* instr)
576 {
577 AluInstruction *ir = new AluInstruction(op0_group_barrier);
578 ir->set_flag(alu_last_instr);
579 emit_instruction(ir);
580 return true;
581 }
582
583
584 bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest& dest, int chan, PValue value, bool as_last)
585 {
586 if (!dest.is_ssa) {
587 auto ir = new AluInstruction(op1_mov, from_nir(dest, 0), value, {alu_write});
588 if (as_last)
589 ir->set_flag(alu_last_instr);
590 emit_instruction(ir);
591 } else {
592 inject_register(dest.ssa.index, chan, value, true);
593 }
594 return true;
595 }
596
597 bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr* instr)
598 {
599 PValue address = from_nir(instr->src[1], 0, 0);
600
601 auto value = vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1,
602 swizzle_from_comps(instr->num_components));
603
604 int writemask = nir_intrinsic_write_mask(instr);
605 int align = nir_intrinsic_align_mul(instr);
606 int align_offset = nir_intrinsic_align_offset(instr);
607
608 WriteScratchInstruction *ir = nullptr;
609 if (address->type() == Value::literal) {
610 const auto& lv = static_cast<const LiteralValue&>(*address);
611 ir = new WriteScratchInstruction(lv.value(), value, align, align_offset, writemask);
612 } else {
613 address = from_nir_with_fetch_constant(instr->src[1], 0);
614 ir = new WriteScratchInstruction(address, value, align, align_offset,
615 writemask, m_scratch_size);
616 }
617 emit_instruction(ir);
618 sh_info().needs_scratch_space = 1;
619 return true;
620 }
621
622 bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr* instr)
623 {
624 PValue address = from_nir_with_fetch_constant(instr->src[0], 0);
625 std::array<PValue, 4> dst_val;
626 for (int i = 0; i < 4; ++i)
627 dst_val[i] = from_nir(instr->dest, i < instr->num_components ? i : 7);
628
629 GPRVector dst(dst_val);
630 auto ir = new LoadFromScratch(dst, address, m_scratch_size);
631 ir->prelude_append(new WaitAck(0));
632 emit_instruction(ir);
633 sh_info().needs_scratch_space = 1;
634 return true;
635 }
636
637 GPRVector ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src,
638 unsigned mask,
639 const GPRVector::Swizzle& swizzle,
640 bool match)
641 {
642 bool use_same = true;
643 GPRVector::Values v;
644
645 for (int i = 0; i < 4 && use_same; ++i) {
646 if ((1 << i) & mask) {
647 if (swizzle[i] < 4) {
648 v[i] = from_nir(src, swizzle[i]);
649 assert(v[i]);
650 if (v[i]->type() != Value::gpr)
651 use_same = false;
652 if (match && (v[i]->chan() != swizzle[i]))
653 use_same = false;
654 }
655 }
656 }
657
658 if (use_same) {
659 int i = 0;
660 while (!v[i] && i < 4) ++i;
661 assert(i < 4);
662
663 unsigned sel = v[i]->sel();
664 for (i = 0; i < 4 && use_same; ++i) {
665 if (!v[i])
666 v[i] = PValue(new GPRValue(sel, swizzle[i]));
667 else
668 use_same &= v[i]->sel() == sel;
669 }
670 }
671
672 if (!use_same) {
673 AluInstruction *ir = nullptr;
674 int sel = allocate_temp_register();
675 for (int i = 0; i < 4; ++i) {
676 v[i] = PValue(new GPRValue(sel, swizzle[i]));
677 if (swizzle[i] < 4 && (mask & (1 << i))) {
678 ir = new AluInstruction(op1_mov, v[i], from_nir(src, swizzle[i]),
679 EmitInstruction::write);
680 emit_instruction(ir);
681 }
682 }
683 if (ir)
684 ir->set_flag(alu_last_instr);
685 }
686 return GPRVector(v);;
687 }
688
689 bool ShaderFromNirProcessor::emit_load_ubo(nir_intrinsic_instr* instr)
690 {
691 nir_src& src0 = instr->src[0];
692 nir_src& src1 = instr->src[1];
693
694 int sel_bufid_reg = src0.is_ssa ? src0.ssa->index : src0.reg.reg->index;
695 const nir_load_const_instr* literal0 = get_literal_constant(sel_bufid_reg);
696
697 int ofs_reg = src1.is_ssa ? src1.ssa->index : src1.reg.reg->index;
698 const nir_load_const_instr* literal1 = get_literal_constant(ofs_reg);
699 if (literal0) {
700 if (literal1) {
701 uint bufid = literal0->value[0].u32;
702 uint buf_ofs = literal1->value[0].u32 >> 4;
703 int buf_cmp = ((literal1->value[0].u32 >> 2) & 3);
704 AluInstruction *ir = nullptr;
705 for (int i = 0; i < instr->num_components; ++i) {
706 int cmp = buf_cmp + i;
707 assert(cmp < 4);
708 auto u = PValue(new UniformValue(512 + buf_ofs, cmp, bufid + 1));
709 if (instr->dest.is_ssa)
710 add_uniform((instr->dest.ssa.index << 2) + i, u);
711 else {
712 ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
713 emit_instruction(ir);
714 }
715 }
716 if (ir)
717 ir->set_flag(alu_last_instr);
718 return true;
719
720 } else {
721 /* literal0 is lost ...*/
722 return load_uniform_indirect(instr, from_nir(instr->src[1], 0, 0), 0, literal0->value[0].u32 + 1);
723 }
724 } else {
725 /* TODO: This can also be solved by using the CF indes on the ALU block, and
726 * this would probably make sense when there are more then one loads with
727 * the same buffer ID. */
728 PValue bufid = from_nir(instr->src[0], 0, 0);
729 PValue addr = from_nir_with_fetch_constant(instr->src[1], 0);
730 GPRVector trgt;
731 for (int i = 0; i < 4; ++i)
732 trgt.set_reg_i(i, from_nir(instr->dest, i));
733
734 auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
735 1, bufid, bim_zero);
736
737 emit_instruction(ir);
738 for (int i = 0; i < instr->num_components ; ++i) {
739 add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
740 }
741 m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
742 return true;
743 }
744
745 }
746
747 bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr)
748 {
749 r600::sfn_log << SfnLog::instr << "emit '"
750 << *reinterpret_cast<nir_instr*>(instr)
751 << "' (" << __func__ << ")\n";
752
753 if (instr->intrinsic == nir_intrinsic_discard_if) {
754 emit_instruction(new AluInstruction(op2_killne_int, PValue(new GPRValue(0,0)),
755 {from_nir(instr->src[0], 0, 0), Value::zero}, {alu_last_instr}));
756
757 } else {
758 emit_instruction(new AluInstruction(op2_kille, PValue(new GPRValue(0,0)),
759 {Value::zero, Value::zero}, {alu_last_instr}));
760 }
761 m_sh_info.uses_kill = 1;
762 return true;
763 }
764
765 bool ShaderFromNirProcessor::emit_load_input_deref(const nir_variable *var,
766 nir_intrinsic_instr* instr)
767 {
768 return do_emit_load_deref(var, instr);
769 }
770
771 bool ShaderFromNirProcessor::reserve_uniform(nir_intrinsic_instr* instr)
772 {
773 r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
774 << *reinterpret_cast<nir_instr*>(instr)
775 << "'\n";
776
777
778 /* If the target register is a SSA register and the loading is not
779 * indirect then we can do lazy loading, i.e. the uniform value can
780 * be used directly. Otherwise we have to load the data for real
781 * rigt away.
782 */
783
784 /* Try to find the literal that defines the array index */
785 const nir_load_const_instr* literal = nullptr;
786 if (instr->src[0].is_ssa)
787 literal = get_literal_constant(instr->src[0].ssa->index);
788
789 int base = nir_intrinsic_base(instr);
790 if (literal) {
791 AluInstruction *ir = nullptr;
792
793 for (int i = 0; i < instr->num_components ; ++i) {
794 PValue u = PValue(new UniformValue(512 + literal->value[0].u32 + base, i));
795 sfn_log << SfnLog::io << "uniform "
796 << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
797
798 if (instr->dest.is_ssa)
799 add_uniform((instr->dest.ssa.index << 2) + i, u);
800 else {
801 ir = new AluInstruction(op1_mov, from_nir(instr->dest, i),
802 u, {alu_write});
803 emit_instruction(ir);
804 }
805 }
806 if (ir)
807 ir->set_flag(alu_last_instr);
808 } else {
809 PValue addr = from_nir(instr->src[0], 0, 0);
810 return load_uniform_indirect(instr, addr, 16 * base, 0);
811 }
812 return true;
813 }
814
815 bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid)
816 {
817 if (!addr) {
818 std::cerr << "r600-nir: don't know how uniform is addressed\n";
819 return false;
820 }
821
822 GPRVector trgt;
823 for (int i = 0; i < 4; ++i)
824 trgt.set_reg_i(i, from_nir(instr->dest, i));
825
826 if (addr->type() != Value::gpr) {
827 emit_instruction(op1_mov, trgt.reg_i(0), {addr}, {alu_write, alu_last_instr});
828 addr = trgt.reg_i(0);
829 }
830
831 /* FIXME: buffer index and index mode are not set correctly */
832 auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest,
833 bufferid, PValue(), bim_none);
834 emit_instruction(ir);
835 for (int i = 0; i < instr->num_components ; ++i) {
836 add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
837 }
838 m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
839 return true;
840 }
841
842 AluInstruction *ShaderFromNirProcessor::emit_load_literal(const nir_load_const_instr * literal, const nir_src& src, unsigned writemask)
843 {
844 AluInstruction *ir = nullptr;
845 for (int i = 0; i < literal->def.num_components ; ++i) {
846 if (writemask & (1 << i)){
847 PValue lsrc;
848 switch (literal->def.bit_size) {
849
850 case 1:
851 sfn_log << SfnLog::reg << "Got literal of bit size 1\n";
852 lsrc = literal->value[i].b ?
853 PValue(new LiteralValue( 0xffffffff, i)) :
854 Value::zero;
855 break;
856 case 32:
857 sfn_log << SfnLog::reg << "Got literal of bit size 32\n";
858 if (literal->value[i].u32 == 0)
859 lsrc = Value::zero;
860 else if (literal->value[i].u32 == 1)
861 lsrc = Value::one_i;
862 else if (literal->value[i].f32 == 1.0f)
863 lsrc = Value::one_f;
864 else if (literal->value[i].f32 == 0.5f)
865 lsrc = Value::zero_dot_5;
866 else
867 lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
868 break;
869 default:
870 sfn_log << SfnLog::reg << "Got literal of bit size " << literal->def.bit_size
871 << " falling back to 32 bit\n";
872 lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
873 }
874 ir = new AluInstruction(op1_mov, create_register_from_nir_src(src, i), lsrc, EmitInstruction::write);
875
876 emit_instruction(ir);
877 }
878 }
879 return ir;
880 }
881
882 PValue ShaderFromNirProcessor::from_nir_with_fetch_constant(const nir_src& src, unsigned component)
883 {
884 PValue value = from_nir(src, component);
885 if (value->type() != Value::gpr &&
886 value->type() != Value::gpr_vector &&
887 value->type() != Value::gpr_array_value) {
888 PValue retval = get_temp_register();
889 emit_instruction(new AluInstruction(op1_mov, retval, value,
890 EmitInstruction::last_write));
891 value = retval;
892 }
893 return value;
894 }
895
896 bool ShaderFromNirProcessor::emit_store_deref(nir_intrinsic_instr* instr)
897 {
898 auto out_var = get_deref_location(instr->src[0]);
899 if (!out_var)
900 return false;
901
902 return do_emit_store_deref(out_var, instr);
903 }
904
905 bool ShaderFromNirProcessor::emit_deref_instruction(nir_deref_instr* instr)
906 {
907 r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
908 << *reinterpret_cast<nir_instr*>(instr)
909 << "'\n";
910
911 /* Give the specific shader type a chance to process this, i.e. Geometry and
912 * tesselation shaders need specialized deref_array, for the other shaders
913 * it is lowered.
914 */
915 if (emit_deref_instruction_override(instr))
916 return true;
917
918 switch (instr->deref_type) {
919 case nir_deref_type_var:
920 set_var_address(instr);
921 return true;
922 case nir_deref_type_array:
923 case nir_deref_type_array_wildcard:
924 case nir_deref_type_struct:
925 case nir_deref_type_cast:
926 default:
927 fprintf(stderr, "R600: deref type %d not supported\n", instr->deref_type);
928 }
929 return false;
930 }
931
932 void ShaderFromNirProcessor::load_uniform(const nir_alu_src &src)
933 {
934 AluInstruction *ir = nullptr;
935 PValue sv[4];
936
937 assert(src.src.is_ssa);
938
939 for (int i = 0; i < src.src.ssa->num_components ; ++i) {
940 unsigned uindex = (src.src.ssa->index << 2) + i;
941 sv[i] = uniform(uindex);
942 assert(sv[i]);
943 }
944
945 for (int i = 0; i < src.src.ssa->num_components ; ++i) {
946 ir = new AluInstruction(op1_mov, create_register_from_nir_src(src.src, i), sv[i],
947 EmitInstruction::write);
948 emit_instruction(ir);
949 }
950 if (ir)
951 ir->set_flag(alu_last_instr);
952 }
953
954
955
956 bool ShaderFromNirProcessor::emit_instruction(EAluOp opcode, PValue dest,
957 std::vector<PValue> srcs,
958 const std::set<AluModifiers>& m_flags)
959 {
960 AluInstruction *ir = new AluInstruction(opcode, dest, srcs, m_flags);
961 emit_instruction(ir);
962 return true;
963 }
964
965 void ShaderFromNirProcessor::add_param_output_reg(int loc, const GPRVector *gpr)
966 {
967 m_output_register_map[loc] = gpr;
968 }
969
970 void ShaderFromNirProcessor::emit_export_instruction(WriteoutInstruction *ir)
971 {
972 r600::sfn_log << SfnLog::instr << " as '" << *ir << "'\n";
973 m_export_output.emit(PInstruction(ir));
974 }
975
976 const GPRVector * ShaderFromNirProcessor::output_register(unsigned location) const
977 {
978 const GPRVector *retval = nullptr;
979 auto val = m_output_register_map.find(location);
980 if (val != m_output_register_map.end())
981 retval = val->second;
982 return retval;
983 }
984
985 void ShaderFromNirProcessor::set_input(unsigned pos, PValue var)
986 {
987 r600::sfn_log << SfnLog::io << "Set input[" << pos << "] =" << *var << "\n";
988 m_inputs[pos] = var;
989 }
990
991 void ShaderFromNirProcessor::set_output(unsigned pos, int sel)
992 {
993 r600::sfn_log << SfnLog::io << "Set output[" << pos << "] =" << sel << "\n";
994 m_outputs[pos] = sel;
995 }
996
997 void ShaderFromNirProcessor::append_block(int nesting_change)
998 {
999 m_nesting_depth += nesting_change;
1000 m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number++));
1001 }
1002
1003 void ShaderFromNirProcessor::finalize()
1004 {
1005 do_finalize();
1006
1007 for (auto& i : m_inputs)
1008 m_sh_info.input[i.first].gpr = i.second->sel();
1009
1010 for (auto& i : m_outputs)
1011 m_sh_info.output[i.first].gpr = i.second;
1012
1013 m_output.push_back(m_export_output);
1014 }
1015
1016 }