3 * Copyright (c) 2018 Collabora LTD
5 * Author: Gert Wollny <gert.wollny@collabora.com>
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
27 #include "../r600_pipe.h"
28 #include "../r600_shader.h"
29 #include "sfn_shader_vertex.h"
31 #include "sfn_shader_compute.h"
32 #include "sfn_shader_fragment.h"
33 #include "sfn_shader_geometry.h"
34 #include "sfn_liverange.h"
35 #include "sfn_ir_to_assembly.h"
37 #include "sfn_instruction_misc.h"
38 #include "sfn_instruction_fetch.h"
39 #include "sfn_instruction_lds.h"
43 #define ENABLE_DEBUG 1
46 #define DEBUG_SFN(X) \
59 ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype
,
60 r600_pipe_shader_selector
& sel
,
61 r600_shader
&sh_info
, int scratch_size
,
62 enum chip_class chip_class
,
64 m_processor_type(ptype
),
67 m_export_output(0, -1),
69 m_chip_class(chip_class
),
73 m_pending_else(nullptr),
74 m_scratch_size(scratch_size
),
75 m_next_hwatomic_loc(0),
77 m_atomic_base(atomic_base
),
81 m_sh_info
.processor_type
= ptype
;
86 ShaderFromNirProcessor::~ShaderFromNirProcessor()
90 bool ShaderFromNirProcessor::scan_instruction(nir_instr
*instr
)
92 switch (instr
->type
) {
93 case nir_instr_type_tex
: {
94 nir_tex_instr
*t
= nir_instr_as_tex(instr
);
95 if (t
->sampler_dim
== GLSL_SAMPLER_DIM_BUF
)
96 sh_info().uses_tex_buffers
= true;
97 if (t
->op
== nir_texop_txs
&&
98 t
->sampler_dim
== GLSL_SAMPLER_DIM_CUBE
&&
100 sh_info().has_txq_cube_array_z_comp
= true;
103 case nir_instr_type_intrinsic
: {
104 auto *i
= nir_instr_as_intrinsic(instr
);
105 switch (i
->intrinsic
) {
106 case nir_intrinsic_ssbo_atomic_add
:
107 case nir_intrinsic_image_atomic_add
:
108 case nir_intrinsic_ssbo_atomic_and
:
109 case nir_intrinsic_image_atomic_and
:
110 case nir_intrinsic_ssbo_atomic_or
:
111 case nir_intrinsic_image_atomic_or
:
112 case nir_intrinsic_ssbo_atomic_imin
:
113 case nir_intrinsic_image_atomic_imin
:
114 case nir_intrinsic_ssbo_atomic_imax
:
115 case nir_intrinsic_image_atomic_imax
:
116 case nir_intrinsic_ssbo_atomic_umin
:
117 case nir_intrinsic_image_atomic_umin
:
118 case nir_intrinsic_ssbo_atomic_umax
:
119 case nir_intrinsic_image_atomic_umax
:
120 case nir_intrinsic_image_atomic_xor
:
121 case nir_intrinsic_image_atomic_exchange
:
122 case nir_intrinsic_image_atomic_comp_swap
:
123 m_sel
.info
.writes_memory
= 1;
125 case nir_intrinsic_image_load
:
126 m_ssbo_instr
.set_require_rat_return_address();
128 case nir_intrinsic_image_size
: {
129 if (nir_intrinsic_image_dim(i
) == GLSL_SAMPLER_DIM_CUBE
&&
130 nir_intrinsic_image_array(i
) && nir_dest_num_components(i
->dest
) > 2)
131 sh_info().has_txq_cube_array_z_comp
= true;
146 return scan_sysvalue_access(instr
);
149 enum chip_class
ShaderFromNirProcessor::get_chip_class(void) const
154 bool ShaderFromNirProcessor::allocate_reserved_registers()
156 bool retval
= do_allocate_reserved_registers();
157 m_ssbo_instr
.load_rat_return_address();
158 if (sh_info().uses_atomics
)
159 m_ssbo_instr
.load_atomic_inc_limits();
160 m_ssbo_instr
.set_ssbo_offset(m_image_count
);
164 static void remap_shader_info(r600_shader
& sh_info
,
165 std::vector
<rename_reg_pair
>& map
,
166 UNUSED ValueMap
& values
)
168 for (unsigned i
= 0; i
< sh_info
.ninput
; ++i
) {
169 sfn_log
<< SfnLog::merge
<< "Input " << i
<< " gpr:" << sh_info
.input
[i
].gpr
170 << " of map.size()\n";
172 assert(sh_info
.input
[i
].gpr
< map
.size());
173 auto new_index
= map
[sh_info
.input
[i
].gpr
];
175 sh_info
.input
[i
].gpr
= new_index
.new_reg
;
176 map
[sh_info
.input
[i
].gpr
].used
= true;
179 for (unsigned i
= 0; i
< sh_info
.noutput
; ++i
) {
180 assert(sh_info
.output
[i
].gpr
< map
.size());
181 auto new_index
= map
[sh_info
.output
[i
].gpr
];
183 sh_info
.output
[i
].gpr
= new_index
.new_reg
;
184 map
[sh_info
.output
[i
].gpr
].used
= true;
188 void ShaderFromNirProcessor::remap_registers()
190 // register renumbering
191 auto rc
= register_count();
195 std::vector
<register_live_range
> register_live_ranges(rc
);
197 auto temp_register_map
= get_temp_registers();
199 Shader sh
{m_output
, temp_register_map
};
200 LiverangeEvaluator().run(sh
, register_live_ranges
);
201 auto register_map
= get_temp_registers_remapping(register_live_ranges
);
203 sfn_log
<< SfnLog::merge
<< "=========Mapping===========\n";
204 for (size_t i
= 0; i
< register_map
.size(); ++i
)
205 if (register_map
[i
].valid
)
206 sfn_log
<< SfnLog::merge
<< "Map:" << i
<< " -> " << register_map
[i
].new_reg
<< "\n";
208 ValueRemapper
vmap0(register_map
, temp_register_map
);
209 for (auto& block
: m_output
)
210 block
.remap_registers(vmap0
);
212 remap_shader_info(m_sh_info
, register_map
, temp_register_map
);
214 /* Mark inputs as used registers, these registers should no be remapped */
215 for (auto& v
: sh
.m_temp
) {
216 if (v
.second
->type() == Value::gpr
) {
217 const auto& g
= static_cast<const GPRValue
&>(*v
.second
);
219 register_map
[g
.sel()].used
= true;
224 for (auto& i
: register_map
) {
227 i
.new_reg
= new_index
++;
230 ValueRemapper
vmap1(register_map
, temp_register_map
);
231 for (auto& ir
: m_output
)
232 ir
.remap_registers(vmap1
);
234 remap_shader_info(m_sh_info
, register_map
, temp_register_map
);
237 bool ShaderFromNirProcessor::process_uniforms(nir_variable
*uniform
)
239 // m_uniform_type_map
240 m_uniform_type_map
[uniform
->data
.location
] = uniform
->type
;
242 if (uniform
->type
->contains_atomic()) {
243 int natomics
= uniform
->type
->atomic_size() / ATOMIC_COUNTER_SIZE
;
244 sh_info().nhwatomic
+= natomics
;
246 if (uniform
->type
->is_array())
247 sh_info().indirect_files
|= 1 << TGSI_FILE_HW_ATOMIC
;
249 sh_info().uses_atomics
= 1;
251 struct r600_shader_atomic
& atom
= sh_info().atomics
[sh_info().nhwatomic_ranges
];
252 ++sh_info().nhwatomic_ranges
;
253 atom
.buffer_id
= uniform
->data
.binding
;
254 atom
.hw_idx
= m_atomic_base
+ m_next_hwatomic_loc
;
255 atom
.start
= m_next_hwatomic_loc
;
256 atom
.end
= atom
.start
+ natomics
- 1;
257 m_next_hwatomic_loc
= atom
.end
+ 1;
258 //atom.array_id = uniform->type->is_array() ? 1 : 0;
260 m_sel
.info
.file_count
[TGSI_FILE_HW_ATOMIC
] += atom
.end
- atom
.start
+ 1;
262 sfn_log
<< SfnLog::io
<< "HW_ATOMIC file count: "
263 << m_sel
.info
.file_count
[TGSI_FILE_HW_ATOMIC
] << "\n";
266 if (uniform
->type
->is_image() || uniform
->data
.mode
== nir_var_mem_ssbo
) {
267 sh_info().uses_images
= 1;
270 if (uniform
->type
->is_image()) {
277 bool ShaderFromNirProcessor::process_inputs(nir_variable
*input
)
279 return do_process_inputs(input
);
282 bool ShaderFromNirProcessor::process_outputs(nir_variable
*output
)
284 return do_process_outputs(output
);
287 void ShaderFromNirProcessor::add_array_deref(nir_deref_instr
*instr
)
289 nir_variable
*var
= nir_deref_instr_get_variable(instr
);
291 assert(instr
->mode
== nir_var_function_temp
);
292 assert(glsl_type_is_array(var
->type
));
294 // add an alias for the index to the register(s);
299 void ShaderFromNirProcessor::set_var_address(nir_deref_instr
*instr
)
301 auto& dest
= instr
->dest
;
302 unsigned index
= dest
.is_ssa
? dest
.ssa
.index
: dest
.reg
.reg
->index
;
303 m_var_mode
[instr
->var
] = instr
->mode
;
304 m_var_derefs
[index
] = instr
->var
;
306 sfn_log
<< SfnLog::io
<< "Add var deref:" << index
307 << " with DDL:" << instr
->var
->data
.driver_location
<< "\n";
310 void ShaderFromNirProcessor::evaluate_spi_sid(r600_shader_io
& io
)
313 case TGSI_SEMANTIC_POSITION
:
314 case TGSI_SEMANTIC_PSIZE
:
315 case TGSI_SEMANTIC_EDGEFLAG
:
316 case TGSI_SEMANTIC_FACE
:
317 case TGSI_SEMANTIC_SAMPLEMASK
:
318 case TGSI_SEMANTIC_CLIPVERTEX
:
321 case TGSI_SEMANTIC_GENERIC
:
322 case TGSI_SEMANTIC_TEXCOORD
:
323 case TGSI_SEMANTIC_PCOORD
:
324 io
.spi_sid
= io
.sid
+ 1;
327 /* For non-generic params - pack name and sid into 8 bits */
328 io
.spi_sid
= (0x80 | (io
.name
<< 3) | io
.sid
) + 1;
332 const nir_variable
*ShaderFromNirProcessor::get_deref_location(const nir_src
& src
) const
334 unsigned index
= src
.is_ssa
? src
.ssa
->index
: src
.reg
.reg
->index
;
336 sfn_log
<< SfnLog::io
<< "Search for deref:" << index
<< "\n";
338 auto v
= m_var_derefs
.find(index
);
339 if (v
!= m_var_derefs
.end())
342 fprintf(stderr
, "R600: could not find deref with index %d\n", index
);
346 /*nir_deref_instr *deref = nir_instr_as_deref(src.ssa->parent_instr);
347 return nir_deref_instr_get_variable(deref); */
350 bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr
* instr
)
352 return m_tex_instr
.emit(instr
);
355 void ShaderFromNirProcessor::emit_instruction(Instruction
*ir
)
357 if (m_pending_else
) {
359 m_output
.back().emit(PInstruction(m_pending_else
));
361 m_pending_else
= nullptr;
364 r600::sfn_log
<< SfnLog::instr
<< " as '" << *ir
<< "'\n";
365 if (m_output
.empty())
368 m_output
.back().emit(Instruction::Pointer(ir
));
371 void ShaderFromNirProcessor::emit_shader_start()
373 /* placeholder, may become an abstract method */
376 bool ShaderFromNirProcessor::emit_jump_instruction(nir_jump_instr
*instr
)
378 switch (instr
->type
) {
379 case nir_jump_break
: {
380 auto b
= new LoopBreakInstruction();
384 case nir_jump_continue
: {
385 auto b
= new LoopContInstruction();
390 nir_instr
*i
= reinterpret_cast<nir_instr
*>(instr
);
391 sfn_log
<< SfnLog::err
<< "Jump instrunction " << *i
<< " not supported\n";
398 bool ShaderFromNirProcessor::emit_alu_instruction(nir_instr
* instr
)
400 return m_alu_instr
.emit(instr
);
403 bool ShaderFromNirProcessor::emit_deref_instruction_override(UNUSED nir_deref_instr
* instr
)
408 bool ShaderFromNirProcessor::emit_loop_start(int loop_id
)
410 LoopBeginInstruction
*loop
= new LoopBeginInstruction();
411 emit_instruction(loop
);
412 m_loop_begin_block_map
[loop_id
] = loop
;
416 bool ShaderFromNirProcessor::emit_loop_end(int loop_id
)
418 auto start
= m_loop_begin_block_map
.find(loop_id
);
419 if (start
== m_loop_begin_block_map
.end()) {
420 sfn_log
<< SfnLog::err
<< "End loop: Loop start for "
421 << loop_id
<< " not found\n";
426 m_output
.push_back(InstructionBlock(m_nesting_depth
, m_block_number
));
427 LoopEndInstruction
*loop
= new LoopEndInstruction(start
->second
);
428 emit_instruction(loop
);
430 m_loop_begin_block_map
.erase(start
);
434 bool ShaderFromNirProcessor::emit_if_start(int if_id
, nir_if
*if_stmt
)
437 auto value
= from_nir(if_stmt
->condition
, 0, 0);
438 AluInstruction
*pred
= new AluInstruction(op2_pred_setne_int
, PValue(new GPRValue(0,0)),
439 value
, Value::zero
, EmitInstruction::last
);
440 pred
->set_flag(alu_update_exec
);
441 pred
->set_flag(alu_update_pred
);
442 pred
->set_cf_type(cf_alu_push_before
);
446 IfInstruction
*ir
= new IfInstruction(pred
);
447 emit_instruction(ir
);
448 assert(m_if_block_start_map
.find(if_id
) == m_if_block_start_map
.end());
449 m_if_block_start_map
[if_id
] = ir
;
453 bool ShaderFromNirProcessor::emit_else_start(int if_id
)
455 auto iif
= m_if_block_start_map
.find(if_id
);
456 if (iif
== m_if_block_start_map
.end()) {
457 std::cerr
<< "Error: ELSE branch " << if_id
<< " without starting conditional branch\n";
461 if (iif
->second
->type() != Instruction::cond_if
) {
462 std::cerr
<< "Error: ELSE branch " << if_id
<< " not started by an IF branch\n";
465 IfInstruction
*if_instr
= static_cast<IfInstruction
*>(iif
->second
);
466 ElseInstruction
*ir
= new ElseInstruction(if_instr
);
467 m_if_block_start_map
[if_id
] = ir
;
473 bool ShaderFromNirProcessor::emit_ifelse_end(int if_id
)
475 auto ifelse
= m_if_block_start_map
.find(if_id
);
476 if (ifelse
== m_if_block_start_map
.end()) {
477 std::cerr
<< "Error: ENDIF " << if_id
<< " without THEN or ELSE branch\n";
481 if (ifelse
->second
->type() != Instruction::cond_if
&&
482 ifelse
->second
->type() != Instruction::cond_else
) {
483 std::cerr
<< "Error: ENDIF " << if_id
<< " doesn't close an IF or ELSE branch\n";
486 /* Clear pending else, if the else branch was empty, non will be emitted */
488 m_pending_else
= nullptr;
491 IfElseEndInstruction
*ir
= new IfElseEndInstruction();
492 emit_instruction(ir
);
497 bool ShaderFromNirProcessor::emit_load_tcs_param_base(nir_intrinsic_instr
* instr
, int offset
)
499 PValue src
= get_temp_register();
500 emit_instruction(new AluInstruction(op1_mov
, src
, Value::zero
, {alu_write
, alu_last_instr
}));
502 GPRVector dest
= vec_from_nir(instr
->dest
, nir_dest_num_components(instr
->dest
));
503 emit_instruction(new FetchTCSIOParam(dest
, src
, offset
));
509 bool ShaderFromNirProcessor::emit_load_local_shared(nir_intrinsic_instr
* instr
)
511 auto address
= varvec_from_nir(instr
->src
[0], instr
->num_components
);
512 auto dest_value
= varvec_from_nir(instr
->dest
, instr
->num_components
);
514 emit_instruction(new LDSReadInstruction(address
, dest_value
));
519 lds_op_from_intrinsic(nir_intrinsic_op op
) {
521 case nir_intrinsic_shared_atomic_add
:
522 return LDS_OP2_LDS_ADD_RET
;
523 case nir_intrinsic_shared_atomic_and
:
524 return LDS_OP2_LDS_AND_RET
;
525 case nir_intrinsic_shared_atomic_or
:
526 return LDS_OP2_LDS_OR_RET
;
527 case nir_intrinsic_shared_atomic_imax
:
528 return LDS_OP2_LDS_MAX_INT_RET
;
529 case nir_intrinsic_shared_atomic_umax
:
530 return LDS_OP2_LDS_MAX_UINT_RET
;
531 case nir_intrinsic_shared_atomic_imin
:
532 return LDS_OP2_LDS_MIN_INT_RET
;
533 case nir_intrinsic_shared_atomic_umin
:
534 return LDS_OP2_LDS_MIN_UINT_RET
;
535 case nir_intrinsic_shared_atomic_xor
:
536 return LDS_OP2_LDS_XOR_RET
;
537 case nir_intrinsic_shared_atomic_exchange
:
538 return LDS_OP2_LDS_XCHG_RET
;
539 case nir_intrinsic_shared_atomic_comp_swap
:
540 return LDS_OP3_LDS_CMP_XCHG_RET
;
542 unreachable("Unsupported shared atomic opcode");
546 bool ShaderFromNirProcessor::emit_atomic_local_shared(nir_intrinsic_instr
* instr
)
548 auto address
= from_nir(instr
->src
[0], 0);
549 auto dest_value
= from_nir(instr
->dest
, 0);
550 auto value
= from_nir(instr
->src
[1], 0);
551 auto op
= lds_op_from_intrinsic(instr
->intrinsic
);
553 if (unlikely(instr
->intrinsic
==nir_intrinsic_shared_atomic_comp_swap
)) {
554 auto value2
= from_nir(instr
->src
[2], 0);
555 emit_instruction(new LDSAtomicInstruction(dest_value
, value
, value2
, address
, op
));
557 emit_instruction(new LDSAtomicInstruction(dest_value
, value
, address
, op
));
563 bool ShaderFromNirProcessor::emit_store_local_shared(nir_intrinsic_instr
* instr
)
565 unsigned write_mask
= nir_intrinsic_write_mask(instr
);
567 auto address
= from_nir(instr
->src
[1], 0);
568 int swizzle_base
= (write_mask
& 0x3) ? 0 : 2;
569 write_mask
|= write_mask
>> 2;
571 auto value
= from_nir(instr
->src
[0], swizzle_base
);
572 if (!(write_mask
& 2)) {
573 emit_instruction(new LDSWriteInstruction(address
, 0, value
));
575 auto value1
= from_nir(instr
->src
[0], swizzle_base
+ 1);
576 emit_instruction(new LDSWriteInstruction(address
, 0, value
, value1
));
582 bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr
* instr
)
584 r600::sfn_log
<< SfnLog::instr
<< "emit '"
585 << *reinterpret_cast<nir_instr
*>(instr
)
586 << "' (" << __func__
<< ")\n";
588 if (emit_intrinsic_instruction_override(instr
))
591 if (m_ssbo_instr
.emit(&instr
->instr
)) {
592 m_sel
.info
.writes_memory
= true;
596 switch (instr
->intrinsic
) {
597 case nir_intrinsic_load_deref
: {
598 auto var
= get_deref_location(instr
->src
[0]);
601 auto mode_helper
= m_var_mode
.find(var
);
602 if (mode_helper
== m_var_mode
.end()) {
603 cerr
<< "r600-nir: variable '" << var
->name
<< "' not found\n";
606 switch (mode_helper
->second
) {
607 case nir_var_shader_in
:
608 return emit_load_input_deref(var
, instr
);
609 case nir_var_function_temp
:
610 return emit_load_function_temp(var
, instr
);
612 cerr
<< "r600-nir: Unsupported mode" << mode_helper
->second
613 << "for src variable\n";
617 case nir_intrinsic_store_scratch
:
618 return emit_store_scratch(instr
);
619 case nir_intrinsic_load_scratch
:
620 return emit_load_scratch(instr
);
621 case nir_intrinsic_store_deref
:
622 return emit_store_deref(instr
);
623 case nir_intrinsic_load_uniform
:
624 return reserve_uniform(instr
);
625 case nir_intrinsic_discard
:
626 case nir_intrinsic_discard_if
:
627 return emit_discard_if(instr
);
628 case nir_intrinsic_load_ubo_r600
:
629 return emit_load_ubo(instr
);
630 case nir_intrinsic_load_tcs_in_param_base_r600
:
631 return emit_load_tcs_param_base(instr
, 0);
632 case nir_intrinsic_load_tcs_out_param_base_r600
:
633 return emit_load_tcs_param_base(instr
, 16);
634 case nir_intrinsic_load_local_shared_r600
:
635 case nir_intrinsic_load_shared
:
636 return emit_load_local_shared(instr
);
637 case nir_intrinsic_store_local_shared_r600
:
638 case nir_intrinsic_store_shared
:
639 return emit_store_local_shared(instr
);
640 case nir_intrinsic_control_barrier
:
641 case nir_intrinsic_memory_barrier_tcs_patch
:
642 case nir_intrinsic_memory_barrier_shared
:
643 case nir_intrinsic_memory_barrier
:
644 return emit_barrier(instr
);
645 case nir_intrinsic_shared_atomic_add
:
646 case nir_intrinsic_shared_atomic_and
:
647 case nir_intrinsic_shared_atomic_or
:
648 case nir_intrinsic_shared_atomic_imax
:
649 case nir_intrinsic_shared_atomic_umax
:
650 case nir_intrinsic_shared_atomic_imin
:
651 case nir_intrinsic_shared_atomic_umin
:
652 case nir_intrinsic_shared_atomic_xor
:
653 case nir_intrinsic_shared_atomic_exchange
:
654 case nir_intrinsic_shared_atomic_comp_swap
:
655 return emit_atomic_local_shared(instr
);
656 case nir_intrinsic_copy_deref
:
657 case nir_intrinsic_load_constant
:
658 case nir_intrinsic_load_input
:
659 case nir_intrinsic_store_output
:
662 fprintf(stderr
, "r600-nir: Unsupported intrinsic %d\n", instr
->intrinsic
);
668 bool ShaderFromNirProcessor::emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr
* instr
)
674 ShaderFromNirProcessor::emit_load_function_temp(UNUSED
const nir_variable
*var
, UNUSED nir_intrinsic_instr
*instr
)
679 bool ShaderFromNirProcessor::emit_barrier(UNUSED nir_intrinsic_instr
* instr
)
681 AluInstruction
*ir
= new AluInstruction(op0_group_barrier
);
682 ir
->set_flag(alu_last_instr
);
683 emit_instruction(ir
);
688 bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest
& dest
, int chan
, PValue value
, bool as_last
)
691 auto ir
= new AluInstruction(op1_mov
, from_nir(dest
, 0), value
, {alu_write
});
693 ir
->set_flag(alu_last_instr
);
694 emit_instruction(ir
);
696 inject_register(dest
.ssa
.index
, chan
, value
, true);
701 bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr
* instr
)
703 PValue address
= from_nir(instr
->src
[1], 0, 0);
705 auto value
= vec_from_nir_with_fetch_constant(instr
->src
[0], (1 << instr
->num_components
) - 1,
706 swizzle_from_comps(instr
->num_components
));
708 int writemask
= nir_intrinsic_write_mask(instr
);
709 int align
= nir_intrinsic_align_mul(instr
);
710 int align_offset
= nir_intrinsic_align_offset(instr
);
712 WriteScratchInstruction
*ir
= nullptr;
713 if (address
->type() == Value::literal
) {
714 const auto& lv
= static_cast<const LiteralValue
&>(*address
);
715 ir
= new WriteScratchInstruction(lv
.value(), value
, align
, align_offset
, writemask
);
717 address
= from_nir_with_fetch_constant(instr
->src
[1], 0);
718 ir
= new WriteScratchInstruction(address
, value
, align
, align_offset
,
719 writemask
, m_scratch_size
);
721 emit_instruction(ir
);
722 sh_info().needs_scratch_space
= 1;
726 bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr
* instr
)
728 PValue address
= from_nir_with_fetch_constant(instr
->src
[0], 0);
729 std::array
<PValue
, 4> dst_val
;
730 for (int i
= 0; i
< 4; ++i
)
731 dst_val
[i
] = from_nir(instr
->dest
, i
< instr
->num_components
? i
: 7);
733 GPRVector
dst(dst_val
);
734 auto ir
= new LoadFromScratch(dst
, address
, m_scratch_size
);
735 ir
->prelude_append(new WaitAck(0));
736 emit_instruction(ir
);
737 sh_info().needs_scratch_space
= 1;
741 GPRVector
ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src
& src
,
743 const GPRVector::Swizzle
& swizzle
,
746 bool use_same
= true;
749 for (int i
= 0; i
< 4 && use_same
; ++i
) {
750 if ((1 << i
) & mask
) {
751 if (swizzle
[i
] < 4) {
752 v
[i
] = from_nir(src
, swizzle
[i
]);
754 if (v
[i
]->type() != Value::gpr
)
756 if (match
&& (v
[i
]->chan() != swizzle
[i
]))
764 while (!v
[i
] && i
< 4) ++i
;
767 unsigned sel
= v
[i
]->sel();
768 for (i
= 0; i
< 4 && use_same
; ++i
) {
770 v
[i
] = PValue(new GPRValue(sel
, swizzle
[i
]));
772 use_same
&= v
[i
]->sel() == sel
;
777 AluInstruction
*ir
= nullptr;
778 int sel
= allocate_temp_register();
779 for (int i
= 0; i
< 4; ++i
) {
780 v
[i
] = PValue(new GPRValue(sel
, swizzle
[i
]));
781 if (swizzle
[i
] < 4 && (mask
& (1 << i
))) {
782 ir
= new AluInstruction(op1_mov
, v
[i
], from_nir(src
, swizzle
[i
]),
783 EmitInstruction::write
);
784 emit_instruction(ir
);
788 ir
->set_flag(alu_last_instr
);
790 return GPRVector(v
);;
793 bool ShaderFromNirProcessor::emit_load_ubo(nir_intrinsic_instr
* instr
)
795 nir_src
& src0
= instr
->src
[0];
796 nir_src
& src1
= instr
->src
[1];
798 int sel_bufid_reg
= src0
.is_ssa
? src0
.ssa
->index
: src0
.reg
.reg
->index
;
799 const nir_load_const_instr
* literal0
= get_literal_constant(sel_bufid_reg
);
801 int ofs_reg
= src1
.is_ssa
? src1
.ssa
->index
: src1
.reg
.reg
->index
;
802 const nir_load_const_instr
* literal1
= get_literal_constant(ofs_reg
);
805 uint bufid
= literal0
->value
[0].u32
;
806 uint buf_ofs
= literal1
->value
[0].u32
>> 4;
807 int buf_cmp
= ((literal1
->value
[0].u32
>> 2) & 3);
808 AluInstruction
*ir
= nullptr;
809 for (int i
= 0; i
< instr
->num_components
; ++i
) {
810 int cmp
= buf_cmp
+ i
;
812 auto u
= PValue(new UniformValue(512 + buf_ofs
, cmp
, bufid
+ 1));
813 if (instr
->dest
.is_ssa
)
814 add_uniform((instr
->dest
.ssa
.index
<< 2) + i
, u
);
816 ir
= new AluInstruction(op1_mov
, from_nir(instr
->dest
, i
), u
, {alu_write
});
817 emit_instruction(ir
);
821 ir
->set_flag(alu_last_instr
);
825 /* literal0 is lost ...*/
826 return load_uniform_indirect(instr
, from_nir(instr
->src
[1], 0, 0), 0, literal0
->value
[0].u32
+ 1);
829 /* TODO: This can also be solved by using the CF indes on the ALU block, and
830 * this would probably make sense when there are more then one loads with
831 * the same buffer ID. */
832 PValue bufid
= from_nir(instr
->src
[0], 0, 0);
833 PValue addr
= from_nir_with_fetch_constant(instr
->src
[1], 0);
835 for (int i
= 0; i
< 4; ++i
)
836 trgt
.set_reg_i(i
, from_nir(instr
->dest
, i
));
838 auto ir
= new FetchInstruction(vc_fetch
, no_index_offset
, trgt
, addr
, 0,
841 emit_instruction(ir
);
842 for (int i
= 0; i
< instr
->num_components
; ++i
) {
843 add_uniform((instr
->dest
.ssa
.index
<< 2) + i
, trgt
.reg_i(i
));
845 m_sh_info
.indirect_files
|= 1 << TGSI_FILE_CONSTANT
;
851 bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr
* instr
)
853 r600::sfn_log
<< SfnLog::instr
<< "emit '"
854 << *reinterpret_cast<nir_instr
*>(instr
)
855 << "' (" << __func__
<< ")\n";
857 if (instr
->intrinsic
== nir_intrinsic_discard_if
) {
858 emit_instruction(new AluInstruction(op2_killne_int
, PValue(new GPRValue(0,0)),
859 {from_nir(instr
->src
[0], 0, 0), Value::zero
}, {alu_last_instr
}));
862 emit_instruction(new AluInstruction(op2_kille
, PValue(new GPRValue(0,0)),
863 {Value::zero
, Value::zero
}, {alu_last_instr
}));
865 m_sh_info
.uses_kill
= 1;
869 bool ShaderFromNirProcessor::emit_load_input_deref(const nir_variable
*var
,
870 nir_intrinsic_instr
* instr
)
872 return do_emit_load_deref(var
, instr
);
875 bool ShaderFromNirProcessor::reserve_uniform(nir_intrinsic_instr
* instr
)
877 r600::sfn_log
<< SfnLog::instr
<< __func__
<< ": emit '"
878 << *reinterpret_cast<nir_instr
*>(instr
)
882 /* If the target register is a SSA register and the loading is not
883 * indirect then we can do lazy loading, i.e. the uniform value can
884 * be used directly. Otherwise we have to load the data for real
888 /* Try to find the literal that defines the array index */
889 const nir_load_const_instr
* literal
= nullptr;
890 if (instr
->src
[0].is_ssa
)
891 literal
= get_literal_constant(instr
->src
[0].ssa
->index
);
893 int base
= nir_intrinsic_base(instr
);
895 AluInstruction
*ir
= nullptr;
897 for (int i
= 0; i
< instr
->num_components
; ++i
) {
898 PValue u
= PValue(new UniformValue(512 + literal
->value
[0].u32
+ base
, i
));
899 sfn_log
<< SfnLog::io
<< "uniform "
900 << instr
->dest
.ssa
.index
<< " const["<< i
<< "]: "<< instr
->const_index
[i
] << "\n";
902 if (instr
->dest
.is_ssa
)
903 add_uniform((instr
->dest
.ssa
.index
<< 2) + i
, u
);
905 ir
= new AluInstruction(op1_mov
, from_nir(instr
->dest
, i
),
907 emit_instruction(ir
);
911 ir
->set_flag(alu_last_instr
);
913 PValue addr
= from_nir(instr
->src
[0], 0, 0);
914 return load_uniform_indirect(instr
, addr
, 16 * base
, 0);
919 bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr
* instr
, PValue addr
, int offest
, int bufferid
)
922 std::cerr
<< "r600-nir: don't know how uniform is addressed\n";
927 for (int i
= 0; i
< 4; ++i
)
928 trgt
.set_reg_i(i
, from_nir(instr
->dest
, i
));
930 if (addr
->type() != Value::gpr
) {
931 emit_instruction(op1_mov
, trgt
.reg_i(0), {addr
}, {alu_write
, alu_last_instr
});
932 addr
= trgt
.reg_i(0);
935 /* FIXME: buffer index and index mode are not set correctly */
936 auto ir
= new FetchInstruction(vc_fetch
, no_index_offset
, trgt
, addr
, offest
,
937 bufferid
, PValue(), bim_none
);
938 emit_instruction(ir
);
939 for (int i
= 0; i
< instr
->num_components
; ++i
) {
940 add_uniform((instr
->dest
.ssa
.index
<< 2) + i
, trgt
.reg_i(i
));
942 m_sh_info
.indirect_files
|= 1 << TGSI_FILE_CONSTANT
;
946 AluInstruction
*ShaderFromNirProcessor::emit_load_literal(const nir_load_const_instr
* literal
, const nir_src
& src
, unsigned writemask
)
948 AluInstruction
*ir
= nullptr;
949 for (int i
= 0; i
< literal
->def
.num_components
; ++i
) {
950 if (writemask
& (1 << i
)){
952 switch (literal
->def
.bit_size
) {
955 sfn_log
<< SfnLog::reg
<< "Got literal of bit size 1\n";
956 lsrc
= literal
->value
[i
].b
?
957 PValue(new LiteralValue( 0xffffffff, i
)) :
961 sfn_log
<< SfnLog::reg
<< "Got literal of bit size 32\n";
962 if (literal
->value
[i
].u32
== 0)
964 else if (literal
->value
[i
].u32
== 1)
966 else if (literal
->value
[i
].f32
== 1.0f
)
968 else if (literal
->value
[i
].f32
== 0.5f
)
969 lsrc
= Value::zero_dot_5
;
971 lsrc
= PValue(new LiteralValue(literal
->value
[i
].u32
, i
));
974 sfn_log
<< SfnLog::reg
<< "Got literal of bit size " << literal
->def
.bit_size
975 << " falling back to 32 bit\n";
976 lsrc
= PValue(new LiteralValue(literal
->value
[i
].u32
, i
));
978 ir
= new AluInstruction(op1_mov
, create_register_from_nir_src(src
, i
), lsrc
, EmitInstruction::write
);
980 emit_instruction(ir
);
986 PValue
ShaderFromNirProcessor::from_nir_with_fetch_constant(const nir_src
& src
, unsigned component
)
988 PValue value
= from_nir(src
, component
);
989 if (value
->type() != Value::gpr
&&
990 value
->type() != Value::gpr_vector
&&
991 value
->type() != Value::gpr_array_value
) {
992 PValue retval
= get_temp_register();
993 emit_instruction(new AluInstruction(op1_mov
, retval
, value
,
994 EmitInstruction::last_write
));
1000 bool ShaderFromNirProcessor::emit_store_deref(nir_intrinsic_instr
* instr
)
1002 auto out_var
= get_deref_location(instr
->src
[0]);
1006 return do_emit_store_deref(out_var
, instr
);
1009 bool ShaderFromNirProcessor::emit_deref_instruction(nir_deref_instr
* instr
)
1011 r600::sfn_log
<< SfnLog::instr
<< __func__
<< ": emit '"
1012 << *reinterpret_cast<nir_instr
*>(instr
)
1015 /* Give the specific shader type a chance to process this, i.e. Geometry and
1016 * tesselation shaders need specialized deref_array, for the other shaders
1019 if (emit_deref_instruction_override(instr
))
1022 switch (instr
->deref_type
) {
1023 case nir_deref_type_var
:
1024 set_var_address(instr
);
1026 case nir_deref_type_array
:
1027 case nir_deref_type_array_wildcard
:
1028 case nir_deref_type_struct
:
1029 case nir_deref_type_cast
:
1031 fprintf(stderr
, "R600: deref type %d not supported\n", instr
->deref_type
);
1036 void ShaderFromNirProcessor::load_uniform(const nir_alu_src
&src
)
1038 AluInstruction
*ir
= nullptr;
1041 assert(src
.src
.is_ssa
);
1043 for (int i
= 0; i
< src
.src
.ssa
->num_components
; ++i
) {
1044 unsigned uindex
= (src
.src
.ssa
->index
<< 2) + i
;
1045 sv
[i
] = uniform(uindex
);
1049 for (int i
= 0; i
< src
.src
.ssa
->num_components
; ++i
) {
1050 ir
= new AluInstruction(op1_mov
, create_register_from_nir_src(src
.src
, i
), sv
[i
],
1051 EmitInstruction::write
);
1052 emit_instruction(ir
);
1055 ir
->set_flag(alu_last_instr
);
1060 bool ShaderFromNirProcessor::emit_instruction(EAluOp opcode
, PValue dest
,
1061 std::vector
<PValue
> srcs
,
1062 const std::set
<AluModifiers
>& m_flags
)
1064 AluInstruction
*ir
= new AluInstruction(opcode
, dest
, srcs
, m_flags
);
1065 emit_instruction(ir
);
1069 void ShaderFromNirProcessor::add_param_output_reg(int loc
, const GPRVector
*gpr
)
1071 m_output_register_map
[loc
] = gpr
;
1074 void ShaderFromNirProcessor::emit_export_instruction(WriteoutInstruction
*ir
)
1076 r600::sfn_log
<< SfnLog::instr
<< " as '" << *ir
<< "'\n";
1077 m_export_output
.emit(PInstruction(ir
));
1080 const GPRVector
* ShaderFromNirProcessor::output_register(unsigned location
) const
1082 const GPRVector
*retval
= nullptr;
1083 auto val
= m_output_register_map
.find(location
);
1084 if (val
!= m_output_register_map
.end())
1085 retval
= val
->second
;
1089 void ShaderFromNirProcessor::set_input(unsigned pos
, PValue var
)
1091 r600::sfn_log
<< SfnLog::io
<< "Set input[" << pos
<< "] =" << *var
<< "\n";
1092 m_inputs
[pos
] = var
;
1095 void ShaderFromNirProcessor::set_output(unsigned pos
, int sel
)
1097 r600::sfn_log
<< SfnLog::io
<< "Set output[" << pos
<< "] =" << sel
<< "\n";
1098 m_outputs
[pos
] = sel
;
1101 void ShaderFromNirProcessor::append_block(int nesting_change
)
1103 m_nesting_depth
+= nesting_change
;
1104 m_output
.push_back(InstructionBlock(m_nesting_depth
, m_block_number
++));
1107 void ShaderFromNirProcessor::finalize()
1111 for (auto& i
: m_inputs
)
1112 m_sh_info
.input
[i
.first
].gpr
= i
.second
->sel();
1114 for (auto& i
: m_outputs
)
1115 m_sh_info
.output
[i
.first
].gpr
= i
.second
;
1117 m_output
.push_back(m_export_output
);