r600/sfn: Add support for reading cube image array dim.
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_shader_base.cpp
1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "../r600_pipe.h"
28 #include "../r600_shader.h"
29 #include "sfn_shader_vertex.h"
30
31 #include "sfn_shader_compute.h"
32 #include "sfn_shader_fragment.h"
33 #include "sfn_shader_geometry.h"
34 #include "sfn_liverange.h"
35 #include "sfn_ir_to_assembly.h"
36 #include "sfn_nir.h"
37 #include "sfn_instruction_misc.h"
38 #include "sfn_instruction_fetch.h"
39 #include "sfn_instruction_lds.h"
40
41 #include <iostream>
42
43 #define ENABLE_DEBUG 1
44
45 #ifdef ENABLE_DEBUG
46 #define DEBUG_SFN(X) \
47 do {\
48 X; \
49 } while (0)
50 #else
51 #define DEBUG_SFN(X)
52 #endif
53
54 namespace r600 {
55
56 using namespace std;
57
58
59 ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype,
60 r600_pipe_shader_selector& sel,
61 r600_shader &sh_info, int scratch_size,
62 enum chip_class chip_class,
63 int atomic_base):
64 m_processor_type(ptype),
65 m_nesting_depth(0),
66 m_block_number(0),
67 m_export_output(0, -1),
68 m_sh_info(sh_info),
69 m_chip_class(chip_class),
70 m_tex_instr(*this),
71 m_alu_instr(*this),
72 m_ssbo_instr(*this),
73 m_pending_else(nullptr),
74 m_scratch_size(scratch_size),
75 m_next_hwatomic_loc(0),
76 m_sel(sel),
77 m_atomic_base(atomic_base)
78
79 {
80 m_sh_info.processor_type = ptype;
81
82 }
83
84
85 ShaderFromNirProcessor::~ShaderFromNirProcessor()
86 {
87 }
88
89 bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr)
90 {
91 switch (instr->type) {
92 case nir_instr_type_tex: {
93 nir_tex_instr *t = nir_instr_as_tex(instr);
94 if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF)
95 sh_info().uses_tex_buffers = true;
96 if (t->op == nir_texop_txs &&
97 t->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
98 t->is_array)
99 sh_info().has_txq_cube_array_z_comp = true;
100 break;
101 }
102 case nir_instr_type_intrinsic: {
103 auto *i = nir_instr_as_intrinsic(instr);
104 switch (i->intrinsic) {
105 case nir_intrinsic_ssbo_atomic_add:
106 case nir_intrinsic_image_atomic_add:
107 case nir_intrinsic_ssbo_atomic_and:
108 case nir_intrinsic_image_atomic_and:
109 case nir_intrinsic_ssbo_atomic_or:
110 case nir_intrinsic_image_atomic_or:
111 case nir_intrinsic_ssbo_atomic_imin:
112 case nir_intrinsic_image_atomic_imin:
113 case nir_intrinsic_ssbo_atomic_imax:
114 case nir_intrinsic_image_atomic_imax:
115 case nir_intrinsic_ssbo_atomic_umin:
116 case nir_intrinsic_image_atomic_umin:
117 case nir_intrinsic_ssbo_atomic_umax:
118 case nir_intrinsic_image_atomic_umax:
119 case nir_intrinsic_image_atomic_xor:
120 case nir_intrinsic_image_atomic_exchange:
121 case nir_intrinsic_image_atomic_comp_swap:
122 m_sel.info.writes_memory = 1;
123 /* fallthrough */
124 case nir_intrinsic_image_load:
125 m_ssbo_instr.set_require_rat_return_address();
126 break;
127 case nir_intrinsic_image_size: {
128 if (nir_intrinsic_image_dim(i) == GLSL_SAMPLER_DIM_CUBE &&
129 nir_intrinsic_image_array(i) && nir_dest_num_components(i->dest) > 2)
130 sh_info().has_txq_cube_array_z_comp = true;
131 }
132
133 default:
134 ;
135 }
136
137
138 }
139 default:
140 ;
141 }
142
143 return scan_sysvalue_access(instr);
144 }
145
146 enum chip_class ShaderFromNirProcessor::get_chip_class(void) const
147 {
148 return m_chip_class;
149 }
150
151 bool ShaderFromNirProcessor::allocate_reserved_registers()
152 {
153 bool retval = do_allocate_reserved_registers();
154 return retval;
155 }
156
157 static void remap_shader_info(r600_shader& sh_info,
158 std::vector<rename_reg_pair>& map,
159 UNUSED ValueMap& values)
160 {
161 for (unsigned i = 0; i < sh_info.ninput; ++i) {
162 sfn_log << SfnLog::merge << "Input " << i << " gpr:" << sh_info.input[i].gpr
163 << " of map.size()\n";
164
165 assert(sh_info.input[i].gpr < map.size());
166 auto new_index = map[sh_info.input[i].gpr];
167 if (new_index.valid)
168 sh_info.input[i].gpr = new_index.new_reg;
169 map[sh_info.input[i].gpr].used = true;
170 }
171
172 for (unsigned i = 0; i < sh_info.noutput; ++i) {
173 assert(sh_info.output[i].gpr < map.size());
174 auto new_index = map[sh_info.output[i].gpr];
175 if (new_index.valid)
176 sh_info.output[i].gpr = new_index.new_reg;
177 map[sh_info.output[i].gpr].used = true;
178 }
179 }
180
181 void ShaderFromNirProcessor::remap_registers()
182 {
183 // register renumbering
184 auto rc = register_count();
185 if (!rc)
186 return;
187
188 std::vector<register_live_range> register_live_ranges(rc);
189
190 auto temp_register_map = get_temp_registers();
191
192 Shader sh{m_output, temp_register_map};
193 LiverangeEvaluator().run(sh, register_live_ranges);
194 auto register_map = get_temp_registers_remapping(register_live_ranges);
195
196 sfn_log << SfnLog::merge << "=========Mapping===========\n";
197 for (size_t i = 0; i < register_map.size(); ++i)
198 if (register_map[i].valid)
199 sfn_log << SfnLog::merge << "Map:" << i << " -> " << register_map[i].new_reg << "\n";
200
201 ValueRemapper vmap0(register_map, temp_register_map);
202 for (auto& block: m_output)
203 block.remap_registers(vmap0);
204
205 remap_shader_info(m_sh_info, register_map, temp_register_map);
206
207 /* Mark inputs as used registers, these registers should no be remapped */
208 for (auto& v: sh.m_temp) {
209 if (v.second->type() == Value::gpr) {
210 const auto& g = static_cast<const GPRValue&>(*v.second);
211 if (g.is_input())
212 register_map[g.sel()].used = true;
213 }
214 }
215
216 int new_index = 0;
217 for (auto& i : register_map) {
218 i.valid = i.used;
219 if (i.used)
220 i.new_reg = new_index++;
221 }
222
223 ValueRemapper vmap1(register_map, temp_register_map);
224 for (auto& ir: m_output)
225 ir.remap_registers(vmap1);
226
227 remap_shader_info(m_sh_info, register_map, temp_register_map);
228 }
229
230 bool ShaderFromNirProcessor::process_uniforms(nir_variable *uniform)
231 {
232 // m_uniform_type_map
233 m_uniform_type_map[uniform->data.location] = uniform->type;
234
235 if (uniform->type->contains_atomic()) {
236 int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE;
237 sh_info().nhwatomic += natomics;
238
239 if (uniform->type->is_array())
240 sh_info().indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
241
242 sh_info().uses_atomics = 1;
243
244 struct r600_shader_atomic& atom = sh_info().atomics[sh_info().nhwatomic_ranges];
245 ++sh_info().nhwatomic_ranges;
246 atom.buffer_id = uniform->data.binding;
247 atom.hw_idx = m_atomic_base + m_next_hwatomic_loc;
248 atom.start = m_next_hwatomic_loc;
249 atom.end = atom.start + natomics - 1;
250 m_next_hwatomic_loc = atom.end + 1;
251 //atom.array_id = uniform->type->is_array() ? 1 : 0;
252
253 m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] += atom.end - atom.start + 1;
254
255 sfn_log << SfnLog::io << "HW_ATOMIC file count: "
256 << m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] << "\n";
257 }
258
259 if (uniform->type->is_image() || uniform->data.mode == nir_var_mem_ssbo) {
260 sh_info().uses_images = 1;
261 }
262
263 return true;
264 }
265
266 bool ShaderFromNirProcessor::process_inputs(nir_variable *input)
267 {
268 return do_process_inputs(input);
269 }
270
271 bool ShaderFromNirProcessor::process_outputs(nir_variable *output)
272 {
273 return do_process_outputs(output);
274 }
275
276 void ShaderFromNirProcessor::add_array_deref(nir_deref_instr *instr)
277 {
278 nir_variable *var = nir_deref_instr_get_variable(instr);
279
280 assert(instr->mode == nir_var_function_temp);
281 assert(glsl_type_is_array(var->type));
282
283 // add an alias for the index to the register(s);
284
285
286 }
287
288 void ShaderFromNirProcessor::set_var_address(nir_deref_instr *instr)
289 {
290 auto& dest = instr->dest;
291 unsigned index = dest.is_ssa ? dest.ssa.index : dest.reg.reg->index;
292 m_var_mode[instr->var] = instr->mode;
293 m_var_derefs[index] = instr->var;
294
295 sfn_log << SfnLog::io << "Add var deref:" << index
296 << " with DDL:" << instr->var->data.driver_location << "\n";
297 }
298
299 void ShaderFromNirProcessor::evaluate_spi_sid(r600_shader_io& io)
300 {
301 switch (io.name) {
302 case TGSI_SEMANTIC_POSITION:
303 case TGSI_SEMANTIC_PSIZE:
304 case TGSI_SEMANTIC_EDGEFLAG:
305 case TGSI_SEMANTIC_FACE:
306 case TGSI_SEMANTIC_SAMPLEMASK:
307 case TGSI_SEMANTIC_CLIPVERTEX:
308 io.spi_sid = 0;
309 break;
310 case TGSI_SEMANTIC_GENERIC:
311 case TGSI_SEMANTIC_TEXCOORD:
312 case TGSI_SEMANTIC_PCOORD:
313 io.spi_sid = io.sid + 1;
314 break;
315 default:
316 /* For non-generic params - pack name and sid into 8 bits */
317 io.spi_sid = (0x80 | (io.name << 3) | io.sid) + 1;
318 }
319 }
320
321 const nir_variable *ShaderFromNirProcessor::get_deref_location(const nir_src& src) const
322 {
323 unsigned index = src.is_ssa ? src.ssa->index : src.reg.reg->index;
324
325 sfn_log << SfnLog::io << "Search for deref:" << index << "\n";
326
327 auto v = m_var_derefs.find(index);
328 if (v != m_var_derefs.end())
329 return v->second;
330
331 fprintf(stderr, "R600: could not find deref with index %d\n", index);
332
333 return nullptr;
334
335 /*nir_deref_instr *deref = nir_instr_as_deref(src.ssa->parent_instr);
336 return nir_deref_instr_get_variable(deref); */
337 }
338
339 bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr)
340 {
341 return m_tex_instr.emit(instr);
342 }
343
344 void ShaderFromNirProcessor::emit_instruction(Instruction *ir)
345 {
346 if (m_pending_else) {
347 append_block(-1);
348 m_output.back().emit(PInstruction(m_pending_else));
349 append_block(1);
350 m_pending_else = nullptr;
351 }
352
353 r600::sfn_log << SfnLog::instr << " as '" << *ir << "'\n";
354 if (m_output.empty())
355 append_block(0);
356
357 m_output.back().emit(Instruction::Pointer(ir));
358 }
359
360 void ShaderFromNirProcessor::emit_shader_start()
361 {
362 /* placeholder, may become an abstract method */
363 }
364
365 bool ShaderFromNirProcessor::emit_jump_instruction(nir_jump_instr *instr)
366 {
367 switch (instr->type) {
368 case nir_jump_break: {
369 auto b = new LoopBreakInstruction();
370 emit_instruction(b);
371 return true;
372 }
373 case nir_jump_continue: {
374 auto b = new LoopContInstruction();
375 emit_instruction(b);
376 return true;
377 }
378 default: {
379 nir_instr *i = reinterpret_cast<nir_instr*>(instr);
380 sfn_log << SfnLog::err << "Jump instrunction " << *i << " not supported\n";
381 return false;
382 }
383 }
384 return true;
385 }
386
387 bool ShaderFromNirProcessor::emit_alu_instruction(nir_instr* instr)
388 {
389 return m_alu_instr.emit(instr);
390 }
391
392 bool ShaderFromNirProcessor::emit_deref_instruction_override(UNUSED nir_deref_instr* instr)
393 {
394 return false;
395 }
396
397 bool ShaderFromNirProcessor::emit_loop_start(int loop_id)
398 {
399 LoopBeginInstruction *loop = new LoopBeginInstruction();
400 emit_instruction(loop);
401 m_loop_begin_block_map[loop_id] = loop;
402 append_block(1);
403 return true;
404 }
405 bool ShaderFromNirProcessor::emit_loop_end(int loop_id)
406 {
407 auto start = m_loop_begin_block_map.find(loop_id);
408 if (start == m_loop_begin_block_map.end()) {
409 sfn_log << SfnLog::err << "End loop: Loop start for "
410 << loop_id << " not found\n";
411 return false;
412 }
413 m_nesting_depth--;
414 m_block_number++;
415 m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number));
416 LoopEndInstruction *loop = new LoopEndInstruction(start->second);
417 emit_instruction(loop);
418
419 m_loop_begin_block_map.erase(start);
420 return true;
421 }
422
423 bool ShaderFromNirProcessor::emit_if_start(int if_id, nir_if *if_stmt)
424 {
425
426 auto value = from_nir(if_stmt->condition, 0, 0);
427 AluInstruction *pred = new AluInstruction(op2_pred_setne_int, PValue(new GPRValue(0,0)),
428 value, Value::zero, EmitInstruction::last);
429 pred->set_flag(alu_update_exec);
430 pred->set_flag(alu_update_pred);
431 pred->set_cf_type(cf_alu_push_before);
432
433 append_block(1);
434
435 IfInstruction *ir = new IfInstruction(pred);
436 emit_instruction(ir);
437 assert(m_if_block_start_map.find(if_id) == m_if_block_start_map.end());
438 m_if_block_start_map[if_id] = ir;
439 return true;
440 }
441
442 bool ShaderFromNirProcessor::emit_else_start(int if_id)
443 {
444 auto iif = m_if_block_start_map.find(if_id);
445 if (iif == m_if_block_start_map.end()) {
446 std::cerr << "Error: ELSE branch " << if_id << " without starting conditional branch\n";
447 return false;
448 }
449
450 if (iif->second->type() != Instruction::cond_if) {
451 std::cerr << "Error: ELSE branch " << if_id << " not started by an IF branch\n";
452 return false;
453 }
454 IfInstruction *if_instr = static_cast<IfInstruction *>(iif->second);
455 ElseInstruction *ir = new ElseInstruction(if_instr);
456 m_if_block_start_map[if_id] = ir;
457 m_pending_else = ir;
458
459 return true;
460 }
461
462 bool ShaderFromNirProcessor::emit_ifelse_end(int if_id)
463 {
464 auto ifelse = m_if_block_start_map.find(if_id);
465 if (ifelse == m_if_block_start_map.end()) {
466 std::cerr << "Error: ENDIF " << if_id << " without THEN or ELSE branch\n";
467 return false;
468 }
469
470 if (ifelse->second->type() != Instruction::cond_if &&
471 ifelse->second->type() != Instruction::cond_else) {
472 std::cerr << "Error: ENDIF " << if_id << " doesn't close an IF or ELSE branch\n";
473 return false;
474 }
475 /* Clear pending else, if the else branch was empty, non will be emitted */
476
477 m_pending_else = nullptr;
478
479 append_block(-1);
480 IfElseEndInstruction *ir = new IfElseEndInstruction();
481 emit_instruction(ir);
482
483 return true;
484 }
485
486 bool ShaderFromNirProcessor::emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset)
487 {
488 PValue src = get_temp_register();
489 emit_instruction(new AluInstruction(op1_mov, src, Value::zero, {alu_write, alu_last_instr}));
490
491 GPRVector dest = vec_from_nir(instr->dest, instr->num_components);
492 emit_instruction(new FetchTCSIOParam(dest, src, offset));
493
494 return true;
495
496 }
497
498 bool ShaderFromNirProcessor::emit_load_local_shared(nir_intrinsic_instr* instr)
499 {
500 auto address = varvec_from_nir(instr->src[0], instr->num_components);
501 auto dest_value = varvec_from_nir(instr->dest, instr->num_components);
502
503 emit_instruction(new LDSReadInstruction(address, dest_value));
504 return true;
505 }
506
507 bool ShaderFromNirProcessor::emit_store_local_shared(nir_intrinsic_instr* instr)
508 {
509 unsigned write_mask = nir_intrinsic_write_mask(instr);
510
511 auto address = from_nir(instr->src[1], 0);
512 int swizzle_base = (write_mask & 0x3) ? 0 : 2;
513 write_mask |= write_mask >> 2;
514
515 auto value = from_nir(instr->src[0], swizzle_base);
516 if (!(write_mask & 2)) {
517 emit_instruction(new LDSWriteInstruction(address, 0, value));
518 } else {
519 auto value1 = from_nir(instr->src[0], swizzle_base + 1);
520 emit_instruction(new LDSWriteInstruction(address, 0, value, value1));
521 }
522
523 return true;
524 }
525
526 bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* instr)
527 {
528 r600::sfn_log << SfnLog::instr << "emit '"
529 << *reinterpret_cast<nir_instr*>(instr)
530 << "' (" << __func__ << ")\n";
531
532 if (emit_intrinsic_instruction_override(instr))
533 return true;
534
535 if (m_ssbo_instr.emit(&instr->instr)) {
536 m_sel.info.writes_memory = true;
537 return true;
538 }
539
540 switch (instr->intrinsic) {
541 case nir_intrinsic_load_deref: {
542 auto var = get_deref_location(instr->src[0]);
543 if (!var)
544 return false;
545 auto mode_helper = m_var_mode.find(var);
546 if (mode_helper == m_var_mode.end()) {
547 cerr << "r600-nir: variable '" << var->name << "' not found\n";
548 return false;
549 }
550 switch (mode_helper->second) {
551 case nir_var_shader_in:
552 return emit_load_input_deref(var, instr);
553 case nir_var_function_temp:
554 return emit_load_function_temp(var, instr);
555 default:
556 cerr << "r600-nir: Unsupported mode" << mode_helper->second
557 << "for src variable\n";
558 return false;
559 }
560 }
561 case nir_intrinsic_store_scratch:
562 return emit_store_scratch(instr);
563 case nir_intrinsic_load_scratch:
564 return emit_load_scratch(instr);
565 case nir_intrinsic_store_deref:
566 return emit_store_deref(instr);
567 case nir_intrinsic_load_uniform:
568 return reserve_uniform(instr);
569 case nir_intrinsic_discard:
570 case nir_intrinsic_discard_if:
571 return emit_discard_if(instr);
572 case nir_intrinsic_load_ubo_r600:
573 return emit_load_ubo(instr);
574 case nir_intrinsic_load_tcs_in_param_base_r600:
575 return emit_load_tcs_param_base(instr, 0);
576 case nir_intrinsic_load_tcs_out_param_base_r600:
577 return emit_load_tcs_param_base(instr, 16);
578 case nir_intrinsic_load_local_shared_r600:
579 case nir_intrinsic_load_shared:
580 return emit_load_local_shared(instr);
581 case nir_intrinsic_store_local_shared_r600:
582 case nir_intrinsic_store_shared:
583 return emit_store_local_shared(instr);
584 case nir_intrinsic_control_barrier:
585 case nir_intrinsic_memory_barrier_tcs_patch:
586 case nir_intrinsic_memory_barrier_shared:
587 return emit_barrier(instr);
588 case nir_intrinsic_copy_deref:
589 case nir_intrinsic_load_constant:
590 case nir_intrinsic_load_input:
591 case nir_intrinsic_store_output:
592
593 default:
594 fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic);
595 return false;
596 }
597 return false;
598 }
599
600 bool ShaderFromNirProcessor::emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr* instr)
601 {
602 return false;
603 }
604
605 bool
606 ShaderFromNirProcessor::emit_load_function_temp(UNUSED const nir_variable *var, UNUSED nir_intrinsic_instr *instr)
607 {
608 return false;
609 }
610
611 bool ShaderFromNirProcessor::emit_barrier(UNUSED nir_intrinsic_instr* instr)
612 {
613 AluInstruction *ir = new AluInstruction(op0_group_barrier);
614 ir->set_flag(alu_last_instr);
615 emit_instruction(ir);
616 return true;
617 }
618
619
620 bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest& dest, int chan, PValue value, bool as_last)
621 {
622 if (!dest.is_ssa) {
623 auto ir = new AluInstruction(op1_mov, from_nir(dest, 0), value, {alu_write});
624 if (as_last)
625 ir->set_flag(alu_last_instr);
626 emit_instruction(ir);
627 } else {
628 inject_register(dest.ssa.index, chan, value, true);
629 }
630 return true;
631 }
632
633 bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr* instr)
634 {
635 PValue address = from_nir(instr->src[1], 0, 0);
636
637 auto value = vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1,
638 swizzle_from_comps(instr->num_components));
639
640 int writemask = nir_intrinsic_write_mask(instr);
641 int align = nir_intrinsic_align_mul(instr);
642 int align_offset = nir_intrinsic_align_offset(instr);
643
644 WriteScratchInstruction *ir = nullptr;
645 if (address->type() == Value::literal) {
646 const auto& lv = static_cast<const LiteralValue&>(*address);
647 ir = new WriteScratchInstruction(lv.value(), value, align, align_offset, writemask);
648 } else {
649 address = from_nir_with_fetch_constant(instr->src[1], 0);
650 ir = new WriteScratchInstruction(address, value, align, align_offset,
651 writemask, m_scratch_size);
652 }
653 emit_instruction(ir);
654 sh_info().needs_scratch_space = 1;
655 return true;
656 }
657
658 bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr* instr)
659 {
660 PValue address = from_nir_with_fetch_constant(instr->src[0], 0);
661 std::array<PValue, 4> dst_val;
662 for (int i = 0; i < 4; ++i)
663 dst_val[i] = from_nir(instr->dest, i < instr->num_components ? i : 7);
664
665 GPRVector dst(dst_val);
666 auto ir = new LoadFromScratch(dst, address, m_scratch_size);
667 ir->prelude_append(new WaitAck(0));
668 emit_instruction(ir);
669 sh_info().needs_scratch_space = 1;
670 return true;
671 }
672
673 GPRVector ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src,
674 unsigned mask,
675 const GPRVector::Swizzle& swizzle,
676 bool match)
677 {
678 bool use_same = true;
679 GPRVector::Values v;
680
681 for (int i = 0; i < 4 && use_same; ++i) {
682 if ((1 << i) & mask) {
683 if (swizzle[i] < 4) {
684 v[i] = from_nir(src, swizzle[i]);
685 assert(v[i]);
686 if (v[i]->type() != Value::gpr)
687 use_same = false;
688 if (match && (v[i]->chan() != swizzle[i]))
689 use_same = false;
690 }
691 }
692 }
693
694 if (use_same) {
695 int i = 0;
696 while (!v[i] && i < 4) ++i;
697 assert(i < 4);
698
699 unsigned sel = v[i]->sel();
700 for (i = 0; i < 4 && use_same; ++i) {
701 if (!v[i])
702 v[i] = PValue(new GPRValue(sel, swizzle[i]));
703 else
704 use_same &= v[i]->sel() == sel;
705 }
706 }
707
708 if (!use_same) {
709 AluInstruction *ir = nullptr;
710 int sel = allocate_temp_register();
711 for (int i = 0; i < 4; ++i) {
712 v[i] = PValue(new GPRValue(sel, swizzle[i]));
713 if (swizzle[i] < 4 && (mask & (1 << i))) {
714 ir = new AluInstruction(op1_mov, v[i], from_nir(src, swizzle[i]),
715 EmitInstruction::write);
716 emit_instruction(ir);
717 }
718 }
719 if (ir)
720 ir->set_flag(alu_last_instr);
721 }
722 return GPRVector(v);;
723 }
724
725 bool ShaderFromNirProcessor::emit_load_ubo(nir_intrinsic_instr* instr)
726 {
727 nir_src& src0 = instr->src[0];
728 nir_src& src1 = instr->src[1];
729
730 int sel_bufid_reg = src0.is_ssa ? src0.ssa->index : src0.reg.reg->index;
731 const nir_load_const_instr* literal0 = get_literal_constant(sel_bufid_reg);
732
733 int ofs_reg = src1.is_ssa ? src1.ssa->index : src1.reg.reg->index;
734 const nir_load_const_instr* literal1 = get_literal_constant(ofs_reg);
735 if (literal0) {
736 if (literal1) {
737 uint bufid = literal0->value[0].u32;
738 uint buf_ofs = literal1->value[0].u32 >> 4;
739 int buf_cmp = ((literal1->value[0].u32 >> 2) & 3);
740 AluInstruction *ir = nullptr;
741 for (int i = 0; i < instr->num_components; ++i) {
742 int cmp = buf_cmp + i;
743 assert(cmp < 4);
744 auto u = PValue(new UniformValue(512 + buf_ofs, cmp, bufid + 1));
745 if (instr->dest.is_ssa)
746 add_uniform((instr->dest.ssa.index << 2) + i, u);
747 else {
748 ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
749 emit_instruction(ir);
750 }
751 }
752 if (ir)
753 ir->set_flag(alu_last_instr);
754 return true;
755
756 } else {
757 /* literal0 is lost ...*/
758 return load_uniform_indirect(instr, from_nir(instr->src[1], 0, 0), 0, literal0->value[0].u32 + 1);
759 }
760 } else {
761 /* TODO: This can also be solved by using the CF indes on the ALU block, and
762 * this would probably make sense when there are more then one loads with
763 * the same buffer ID. */
764 PValue bufid = from_nir(instr->src[0], 0, 0);
765 PValue addr = from_nir_with_fetch_constant(instr->src[1], 0);
766 GPRVector trgt;
767 for (int i = 0; i < 4; ++i)
768 trgt.set_reg_i(i, from_nir(instr->dest, i));
769
770 auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
771 1, bufid, bim_zero);
772
773 emit_instruction(ir);
774 for (int i = 0; i < instr->num_components ; ++i) {
775 add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
776 }
777 m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
778 return true;
779 }
780
781 }
782
783 bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr)
784 {
785 r600::sfn_log << SfnLog::instr << "emit '"
786 << *reinterpret_cast<nir_instr*>(instr)
787 << "' (" << __func__ << ")\n";
788
789 if (instr->intrinsic == nir_intrinsic_discard_if) {
790 emit_instruction(new AluInstruction(op2_killne_int, PValue(new GPRValue(0,0)),
791 {from_nir(instr->src[0], 0, 0), Value::zero}, {alu_last_instr}));
792
793 } else {
794 emit_instruction(new AluInstruction(op2_kille, PValue(new GPRValue(0,0)),
795 {Value::zero, Value::zero}, {alu_last_instr}));
796 }
797 m_sh_info.uses_kill = 1;
798 return true;
799 }
800
801 bool ShaderFromNirProcessor::emit_load_input_deref(const nir_variable *var,
802 nir_intrinsic_instr* instr)
803 {
804 return do_emit_load_deref(var, instr);
805 }
806
807 bool ShaderFromNirProcessor::reserve_uniform(nir_intrinsic_instr* instr)
808 {
809 r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
810 << *reinterpret_cast<nir_instr*>(instr)
811 << "'\n";
812
813
814 /* If the target register is a SSA register and the loading is not
815 * indirect then we can do lazy loading, i.e. the uniform value can
816 * be used directly. Otherwise we have to load the data for real
817 * rigt away.
818 */
819
820 /* Try to find the literal that defines the array index */
821 const nir_load_const_instr* literal = nullptr;
822 if (instr->src[0].is_ssa)
823 literal = get_literal_constant(instr->src[0].ssa->index);
824
825 int base = nir_intrinsic_base(instr);
826 if (literal) {
827 AluInstruction *ir = nullptr;
828
829 for (int i = 0; i < instr->num_components ; ++i) {
830 PValue u = PValue(new UniformValue(512 + literal->value[0].u32 + base, i));
831 sfn_log << SfnLog::io << "uniform "
832 << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
833
834 if (instr->dest.is_ssa)
835 add_uniform((instr->dest.ssa.index << 2) + i, u);
836 else {
837 ir = new AluInstruction(op1_mov, from_nir(instr->dest, i),
838 u, {alu_write});
839 emit_instruction(ir);
840 }
841 }
842 if (ir)
843 ir->set_flag(alu_last_instr);
844 } else {
845 PValue addr = from_nir(instr->src[0], 0, 0);
846 return load_uniform_indirect(instr, addr, 16 * base, 0);
847 }
848 return true;
849 }
850
851 bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid)
852 {
853 if (!addr) {
854 std::cerr << "r600-nir: don't know how uniform is addressed\n";
855 return false;
856 }
857
858 GPRVector trgt;
859 for (int i = 0; i < 4; ++i)
860 trgt.set_reg_i(i, from_nir(instr->dest, i));
861
862 if (addr->type() != Value::gpr) {
863 emit_instruction(op1_mov, trgt.reg_i(0), {addr}, {alu_write, alu_last_instr});
864 addr = trgt.reg_i(0);
865 }
866
867 /* FIXME: buffer index and index mode are not set correctly */
868 auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest,
869 bufferid, PValue(), bim_none);
870 emit_instruction(ir);
871 for (int i = 0; i < instr->num_components ; ++i) {
872 add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
873 }
874 m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
875 return true;
876 }
877
878 AluInstruction *ShaderFromNirProcessor::emit_load_literal(const nir_load_const_instr * literal, const nir_src& src, unsigned writemask)
879 {
880 AluInstruction *ir = nullptr;
881 for (int i = 0; i < literal->def.num_components ; ++i) {
882 if (writemask & (1 << i)){
883 PValue lsrc;
884 switch (literal->def.bit_size) {
885
886 case 1:
887 sfn_log << SfnLog::reg << "Got literal of bit size 1\n";
888 lsrc = literal->value[i].b ?
889 PValue(new LiteralValue( 0xffffffff, i)) :
890 Value::zero;
891 break;
892 case 32:
893 sfn_log << SfnLog::reg << "Got literal of bit size 32\n";
894 if (literal->value[i].u32 == 0)
895 lsrc = Value::zero;
896 else if (literal->value[i].u32 == 1)
897 lsrc = Value::one_i;
898 else if (literal->value[i].f32 == 1.0f)
899 lsrc = Value::one_f;
900 else if (literal->value[i].f32 == 0.5f)
901 lsrc = Value::zero_dot_5;
902 else
903 lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
904 break;
905 default:
906 sfn_log << SfnLog::reg << "Got literal of bit size " << literal->def.bit_size
907 << " falling back to 32 bit\n";
908 lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
909 }
910 ir = new AluInstruction(op1_mov, create_register_from_nir_src(src, i), lsrc, EmitInstruction::write);
911
912 emit_instruction(ir);
913 }
914 }
915 return ir;
916 }
917
918 PValue ShaderFromNirProcessor::from_nir_with_fetch_constant(const nir_src& src, unsigned component)
919 {
920 PValue value = from_nir(src, component);
921 if (value->type() != Value::gpr &&
922 value->type() != Value::gpr_vector &&
923 value->type() != Value::gpr_array_value) {
924 PValue retval = get_temp_register();
925 emit_instruction(new AluInstruction(op1_mov, retval, value,
926 EmitInstruction::last_write));
927 value = retval;
928 }
929 return value;
930 }
931
932 bool ShaderFromNirProcessor::emit_store_deref(nir_intrinsic_instr* instr)
933 {
934 auto out_var = get_deref_location(instr->src[0]);
935 if (!out_var)
936 return false;
937
938 return do_emit_store_deref(out_var, instr);
939 }
940
941 bool ShaderFromNirProcessor::emit_deref_instruction(nir_deref_instr* instr)
942 {
943 r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
944 << *reinterpret_cast<nir_instr*>(instr)
945 << "'\n";
946
947 /* Give the specific shader type a chance to process this, i.e. Geometry and
948 * tesselation shaders need specialized deref_array, for the other shaders
949 * it is lowered.
950 */
951 if (emit_deref_instruction_override(instr))
952 return true;
953
954 switch (instr->deref_type) {
955 case nir_deref_type_var:
956 set_var_address(instr);
957 return true;
958 case nir_deref_type_array:
959 case nir_deref_type_array_wildcard:
960 case nir_deref_type_struct:
961 case nir_deref_type_cast:
962 default:
963 fprintf(stderr, "R600: deref type %d not supported\n", instr->deref_type);
964 }
965 return false;
966 }
967
968 void ShaderFromNirProcessor::load_uniform(const nir_alu_src &src)
969 {
970 AluInstruction *ir = nullptr;
971 PValue sv[4];
972
973 assert(src.src.is_ssa);
974
975 for (int i = 0; i < src.src.ssa->num_components ; ++i) {
976 unsigned uindex = (src.src.ssa->index << 2) + i;
977 sv[i] = uniform(uindex);
978 assert(sv[i]);
979 }
980
981 for (int i = 0; i < src.src.ssa->num_components ; ++i) {
982 ir = new AluInstruction(op1_mov, create_register_from_nir_src(src.src, i), sv[i],
983 EmitInstruction::write);
984 emit_instruction(ir);
985 }
986 if (ir)
987 ir->set_flag(alu_last_instr);
988 }
989
990
991
992 bool ShaderFromNirProcessor::emit_instruction(EAluOp opcode, PValue dest,
993 std::vector<PValue> srcs,
994 const std::set<AluModifiers>& m_flags)
995 {
996 AluInstruction *ir = new AluInstruction(opcode, dest, srcs, m_flags);
997 emit_instruction(ir);
998 return true;
999 }
1000
1001 void ShaderFromNirProcessor::add_param_output_reg(int loc, const GPRVector *gpr)
1002 {
1003 m_output_register_map[loc] = gpr;
1004 }
1005
1006 void ShaderFromNirProcessor::emit_export_instruction(WriteoutInstruction *ir)
1007 {
1008 r600::sfn_log << SfnLog::instr << " as '" << *ir << "'\n";
1009 m_export_output.emit(PInstruction(ir));
1010 }
1011
1012 const GPRVector * ShaderFromNirProcessor::output_register(unsigned location) const
1013 {
1014 const GPRVector *retval = nullptr;
1015 auto val = m_output_register_map.find(location);
1016 if (val != m_output_register_map.end())
1017 retval = val->second;
1018 return retval;
1019 }
1020
1021 void ShaderFromNirProcessor::set_input(unsigned pos, PValue var)
1022 {
1023 r600::sfn_log << SfnLog::io << "Set input[" << pos << "] =" << *var << "\n";
1024 m_inputs[pos] = var;
1025 }
1026
1027 void ShaderFromNirProcessor::set_output(unsigned pos, int sel)
1028 {
1029 r600::sfn_log << SfnLog::io << "Set output[" << pos << "] =" << sel << "\n";
1030 m_outputs[pos] = sel;
1031 }
1032
1033 void ShaderFromNirProcessor::append_block(int nesting_change)
1034 {
1035 m_nesting_depth += nesting_change;
1036 m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number++));
1037 }
1038
1039 void ShaderFromNirProcessor::finalize()
1040 {
1041 do_finalize();
1042
1043 for (auto& i : m_inputs)
1044 m_sh_info.input[i.first].gpr = i.second->sel();
1045
1046 for (auto& i : m_outputs)
1047 m_sh_info.output[i.first].gpr = i.second;
1048
1049 m_output.push_back(m_export_output);
1050 }
1051
1052 }