3 * Copyright (c) 2019 Collabora LTD
5 * Author: Gert Wollny <gert.wollny@collabora.com>
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "nir_builder.h"
30 #include "../r600_pipe.h"
31 #include "../r600_shader.h"
33 #include "sfn_instruction_tex.h"
35 #include "sfn_shader_vertex.h"
36 #include "sfn_shader_fragment.h"
37 #include "sfn_shader_geometry.h"
38 #include "sfn_shader_compute.h"
39 #include "sfn_shader_tcs.h"
40 #include "sfn_shader_tess_eval.h"
41 #include "sfn_nir_lower_fs_out_to_vector.h"
42 #include "sfn_ir_to_assembly.h"
50 ShaderFromNir::ShaderFromNir():sh(nullptr),
56 bool ShaderFromNir::lower(const nir_shader
*shader
, r600_pipe_shader
*pipe_shader
,
57 r600_pipe_shader_selector
*sel
, r600_shader_key
& key
,
58 struct r600_shader
* gs_shader
, enum chip_class _chip_class
)
61 chip_class
= _chip_class
;
64 switch (shader
->info
.stage
) {
65 case MESA_SHADER_VERTEX
:
66 impl
.reset(new VertexShaderFromNir(pipe_shader
, *sel
, key
, gs_shader
, chip_class
));
68 case MESA_SHADER_TESS_CTRL
:
69 sfn_log
<< SfnLog::trans
<< "Start TCS\n";
70 impl
.reset(new TcsShaderFromNir(pipe_shader
, *sel
, key
, chip_class
));
72 case MESA_SHADER_TESS_EVAL
:
73 sfn_log
<< SfnLog::trans
<< "Start TESS_EVAL\n";
74 impl
.reset(new TEvalShaderFromNir(pipe_shader
, *sel
, key
, gs_shader
, chip_class
));
76 case MESA_SHADER_GEOMETRY
:
77 sfn_log
<< SfnLog::trans
<< "Start GS\n";
78 impl
.reset(new GeometryShaderFromNir(pipe_shader
, *sel
, key
, chip_class
));
80 case MESA_SHADER_FRAGMENT
:
81 sfn_log
<< SfnLog::trans
<< "Start FS\n";
82 impl
.reset(new FragmentShaderFromNir(*shader
, pipe_shader
->shader
, *sel
, key
, chip_class
));
84 case MESA_SHADER_COMPUTE
:
85 sfn_log
<< SfnLog::trans
<< "Start CS\n";
86 impl
.reset(new ComputeShaderFromNir(pipe_shader
, *sel
, key
, chip_class
));
92 sfn_log
<< SfnLog::trans
<< "Process declarations\n";
93 if (!process_declaration())
96 // at this point all functions should be inlined
97 const nir_function
*func
= reinterpret_cast<const nir_function
*>(exec_list_get_head_const(&sh
->functions
));
99 sfn_log
<< SfnLog::trans
<< "Scan shader\n";
100 nir_foreach_block(block
, func
->impl
) {
101 nir_foreach_instr(instr
, block
) {
102 if (!impl
->scan_instruction(instr
)) {
103 fprintf(stderr
, "Unhandled sysvalue access ");
104 nir_print_instr(instr
, stderr
);
105 fprintf(stderr
, "\n");
111 sfn_log
<< SfnLog::trans
<< "Reserve registers\n";
112 if (!impl
->allocate_reserved_registers()) {
116 ValuePool::array_list arrays
;
117 sfn_log
<< SfnLog::trans
<< "Allocate local registers\n";
118 foreach_list_typed(nir_register
, reg
, node
, &func
->impl
->registers
) {
119 impl
->allocate_local_register(*reg
, arrays
);
122 sfn_log
<< SfnLog::trans
<< "Emit shader start\n";
123 impl
->allocate_arrays(arrays
);
125 impl
->emit_shader_start();
127 sfn_log
<< SfnLog::trans
<< "Process shader \n";
128 foreach_list_typed(nir_cf_node
, node
, node
, &func
->impl
->body
) {
129 if (!process_cf_node(node
))
133 // Add optimizations here
134 sfn_log
<< SfnLog::trans
<< "Finalize\n";
137 if (!sfn_log
.has_debug_flag(SfnLog::nomerge
)) {
138 sfn_log
<< SfnLog::trans
<< "Merge registers\n";
139 impl
->remap_registers();
141 sfn_log
<< SfnLog::trans
<< "Finished translating to R600 IR\n";
145 Shader
ShaderFromNir::shader() const
147 return Shader
{impl
->m_output
, impl
->get_temp_registers()};
151 bool ShaderFromNir::process_cf_node(nir_cf_node
*node
)
153 SFN_TRACE_FUNC(SfnLog::flow
, "CF");
154 switch (node
->type
) {
155 case nir_cf_node_block
:
156 return process_block(nir_cf_node_as_block(node
));
158 return process_if(nir_cf_node_as_if(node
));
159 case nir_cf_node_loop
:
160 return process_loop(nir_cf_node_as_loop(node
));
166 bool ShaderFromNir::process_if(nir_if
*if_stmt
)
168 SFN_TRACE_FUNC(SfnLog::flow
, "IF");
170 if (!impl
->emit_if_start(m_current_if_id
, if_stmt
))
173 int if_id
= m_current_if_id
++;
174 m_if_stack
.push(if_id
);
176 foreach_list_typed(nir_cf_node
, n
, node
, &if_stmt
->then_list
)
177 if (!process_cf_node(n
)) return false;
179 if (!if_stmt
->then_list
.is_empty()) {
180 if (!impl
->emit_else_start(if_id
))
183 foreach_list_typed(nir_cf_node
, n
, node
, &if_stmt
->else_list
)
184 if (!process_cf_node(n
)) return false;
187 if (!impl
->emit_ifelse_end(if_id
))
194 bool ShaderFromNir::process_loop(nir_loop
*node
)
196 SFN_TRACE_FUNC(SfnLog::flow
, "LOOP");
197 int loop_id
= m_current_loop_id
++;
199 if (!impl
->emit_loop_start(loop_id
))
202 foreach_list_typed(nir_cf_node
, n
, node
, &node
->body
)
203 if (!process_cf_node(n
)) return false;
205 if (!impl
->emit_loop_end(loop_id
))
211 bool ShaderFromNir::process_block(nir_block
*block
)
213 SFN_TRACE_FUNC(SfnLog::flow
, "BLOCK");
214 nir_foreach_instr(instr
, block
) {
215 int r
= emit_instruction(instr
);
217 sfn_log
<< SfnLog::err
<< "R600: Unsupported instruction: "
226 ShaderFromNir::~ShaderFromNir()
230 pipe_shader_type
ShaderFromNir::processor_type() const
232 return impl
->m_processor_type
;
236 bool ShaderFromNir::emit_instruction(nir_instr
*instr
)
240 sfn_log
<< SfnLog::instr
<< "Read instruction " << *instr
<< "\n";
242 switch (instr
->type
) {
243 case nir_instr_type_alu
:
244 return impl
->emit_alu_instruction(instr
);
245 case nir_instr_type_deref
:
246 return impl
->emit_deref_instruction(nir_instr_as_deref(instr
));
247 case nir_instr_type_intrinsic
:
248 return impl
->emit_intrinsic_instruction(nir_instr_as_intrinsic(instr
));
249 case nir_instr_type_load_const
:
250 return impl
->set_literal_constant(nir_instr_as_load_const(instr
));
251 case nir_instr_type_tex
:
252 return impl
->emit_tex_instruction(instr
);
253 case nir_instr_type_jump
:
254 return impl
->emit_jump_instruction(nir_instr_as_jump(instr
));
256 fprintf(stderr
, "R600: %s: ShaderFromNir Unsupported instruction: type %d:'", __func__
, instr
->type
);
257 nir_print_instr(instr
, stderr
);
258 fprintf(stderr
, "'\n");
260 case nir_instr_type_ssa_undef
:
261 return impl
->create_undef(nir_instr_as_ssa_undef(instr
));
266 bool ShaderFromNir::process_declaration()
269 nir_foreach_shader_in_variable(variable
, sh
) {
270 if (!impl
->process_inputs(variable
)) {
271 fprintf(stderr
, "R600: error parsing input varible %s\n", variable
->name
);
277 nir_foreach_shader_out_variable(variable
, sh
) {
278 if (!impl
->process_outputs(variable
)) {
279 fprintf(stderr
, "R600: error parsing outputs varible %s\n", variable
->name
);
285 nir_foreach_variable_with_modes(variable
, sh
, nir_var_uniform
|
288 if (!impl
->process_uniforms(variable
)) {
289 fprintf(stderr
, "R600: error parsing outputs varible %s\n", variable
->name
);
297 const std::vector
<InstructionBlock
>& ShaderFromNir::shader_ir() const
300 return impl
->m_output
;
304 AssemblyFromShader::~AssemblyFromShader()
308 bool AssemblyFromShader::lower(const std::vector
<InstructionBlock
>& ir
)
314 r600_nir_lower_pack_unpack_2x16_impl(nir_builder
*b
, nir_instr
*instr
, void *_options
)
316 nir_alu_instr
*alu
= nir_instr_as_alu(instr
);
319 case nir_op_unpack_half_2x16
: {
320 nir_ssa_def
*packed
= nir_ssa_for_alu_src(b
, alu
, 0);
321 return nir_vec2(b
, nir_unpack_half_2x16_split_x(b
, packed
),
322 nir_unpack_half_2x16_split_y(b
, packed
));
325 case nir_op_pack_half_2x16
: {
326 nir_ssa_def
*src_vec2
= nir_ssa_for_alu_src(b
, alu
, 0);
327 return nir_pack_half_2x16_split(b
, nir_channel(b
, src_vec2
, 0),
328 nir_channel(b
, src_vec2
, 1));
335 bool r600_nir_lower_pack_unpack_2x16_filter(const nir_instr
*instr
, const void *_options
)
337 return instr
->type
== nir_instr_type_alu
;
340 bool r600_nir_lower_pack_unpack_2x16(nir_shader
*shader
)
342 return nir_shader_lower_instructions(shader
,
343 r600_nir_lower_pack_unpack_2x16_filter
,
344 r600_nir_lower_pack_unpack_2x16_impl
,
349 r600_nir_lower_scratch_address_impl(nir_builder
*b
, nir_intrinsic_instr
*instr
)
351 b
->cursor
= nir_before_instr(&instr
->instr
);
353 int address_index
= 0;
356 if (instr
->intrinsic
== nir_intrinsic_store_scratch
) {
357 align
= instr
->src
[0].ssa
->num_components
;
360 align
= instr
->dest
.ssa
.num_components
;
363 nir_ssa_def
*address
= instr
->src
[address_index
].ssa
;
364 nir_ssa_def
*new_address
= nir_ishr(b
, address
, nir_imm_int(b
, 4 * align
));
366 nir_instr_rewrite_src(&instr
->instr
, &instr
->src
[address_index
],
367 nir_src_for_ssa(new_address
));
370 bool r600_lower_scratch_addresses(nir_shader
*shader
)
372 bool progress
= false;
373 nir_foreach_function(function
, shader
) {
375 nir_builder_init(&build
, function
->impl
);
377 nir_foreach_block(block
, function
->impl
) {
378 nir_foreach_instr(instr
, block
) {
379 if (instr
->type
!= nir_instr_type_intrinsic
)
381 nir_intrinsic_instr
*op
= nir_instr_as_intrinsic(instr
);
382 if (op
->intrinsic
!= nir_intrinsic_load_scratch
&&
383 op
->intrinsic
!= nir_intrinsic_store_scratch
)
385 r600_nir_lower_scratch_address_impl(&build
, op
);
394 r600_lower_ubo_to_align16_impl(nir_builder
*b
, nir_instr
*instr
, void *_options
)
396 b
->cursor
= nir_before_instr(instr
);
398 nir_intrinsic_instr
*op
= nir_instr_as_intrinsic(instr
);
399 assert(op
->intrinsic
== nir_intrinsic_load_ubo
);
401 bool const_address
= (nir_src_is_const(op
->src
[1]) && nir_src_is_const(op
->src
[0]));
403 nir_ssa_def
*offset
= op
->src
[1].ssa
;
405 /* This is ugly: With const addressing we can actually set a proper fetch target mask,
406 * but for this we need the component encoded, we don't shift and do de decoding in the
407 * backend. Otherwise we shift by four and resolve the component here
408 * (TODO: encode the start component in the intrinsic when the offset base is non-constant
409 * but a multiple of 16 */
411 nir_ssa_def
*new_offset
= offset
;
413 new_offset
= nir_ishr(b
, offset
, nir_imm_int(b
, 4));
415 nir_intrinsic_instr
*load
= nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_ubo_r600
);
416 load
->num_components
= const_address
? op
->num_components
: 4;
417 load
->src
[0] = op
->src
[0];
418 load
->src
[1] = nir_src_for_ssa(new_offset
);
419 nir_intrinsic_set_align(load
, nir_intrinsic_align_mul(op
), nir_intrinsic_align_offset(op
));
421 nir_ssa_dest_init(&load
->instr
, &load
->dest
, load
->num_components
, 32, NULL
);
422 nir_builder_instr_insert(b
, &load
->instr
);
424 /* when four components are loaded or both the offset and the location
425 * are constant, then the backend can deal with it better */
426 if (op
->num_components
== 4 || const_address
)
427 return &load
->dest
.ssa
;
429 /* What comes below is a performance disaster when the offset is not constant
430 * because then we have to assume that any component can be the first one and we
431 * have to pick the result manually. */
432 nir_ssa_def
*first_comp
= nir_iand(b
, nir_ishr(b
, offset
, nir_imm_int(b
, 2)),
435 const unsigned swz_000
[4] = {0, 0, 0};
436 nir_ssa_def
*component_select
= nir_ieq(b
, r600_imm_ivec3(b
, 0, 1, 2),
437 nir_swizzle(b
, first_comp
, swz_000
, 3));
439 if (op
->num_components
== 1) {
440 nir_ssa_def
*check0
= nir_bcsel(b
, nir_channel(b
, component_select
, 0),
441 nir_channel(b
, &load
->dest
.ssa
, 0),
442 nir_channel(b
, &load
->dest
.ssa
, 3));
443 nir_ssa_def
*check1
= nir_bcsel(b
, nir_channel(b
, component_select
, 1),
444 nir_channel(b
, &load
->dest
.ssa
, 1),
446 return nir_bcsel(b
, nir_channel(b
, component_select
, 2),
447 nir_channel(b
, &load
->dest
.ssa
, 2),
449 } else if (op
->num_components
== 2) {
450 const unsigned szw_01
[2] = {0, 1};
451 const unsigned szw_12
[2] = {1, 2};
452 const unsigned szw_23
[2] = {2, 3};
454 nir_ssa_def
*check0
= nir_bcsel(b
, nir_channel(b
, component_select
, 0),
455 nir_swizzle(b
, &load
->dest
.ssa
, szw_01
, 2),
456 nir_swizzle(b
, &load
->dest
.ssa
, szw_23
, 2));
457 return nir_bcsel(b
, nir_channel(b
, component_select
, 1),
458 nir_swizzle(b
, &load
->dest
.ssa
, szw_12
, 2),
461 const unsigned szw_012
[3] = {0, 1, 2};
462 const unsigned szw_123
[3] = {1, 2, 3};
463 return nir_bcsel(b
, nir_channel(b
, component_select
, 0),
464 nir_swizzle(b
, &load
->dest
.ssa
, szw_012
, 3),
465 nir_swizzle(b
, &load
->dest
.ssa
, szw_123
, 3));
469 bool r600_lower_ubo_to_align16_filter(const nir_instr
*instr
, const void *_options
)
471 if (instr
->type
!= nir_instr_type_intrinsic
)
474 nir_intrinsic_instr
*op
= nir_instr_as_intrinsic(instr
);
475 return op
->intrinsic
== nir_intrinsic_load_ubo
;
479 bool r600_lower_ubo_to_align16(nir_shader
*shader
)
481 return nir_shader_lower_instructions(shader
,
482 r600_lower_ubo_to_align16_filter
,
483 r600_lower_ubo_to_align16_impl
,
488 insert_uniform_sorted(struct exec_list
*var_list
, nir_variable
*new_var
)
490 nir_foreach_variable_in_list(var
, var_list
) {
491 if (var
->data
.binding
> new_var
->data
.binding
||
492 (var
->data
.binding
== new_var
->data
.binding
&&
493 var
->data
.offset
> new_var
->data
.offset
)) {
494 exec_node_insert_node_before(&var
->node
, &new_var
->node
);
498 exec_list_push_tail(var_list
, &new_var
->node
);
501 void sort_uniforms(nir_shader
*shader
)
503 struct exec_list new_list
;
504 exec_list_make_empty(&new_list
);
506 nir_foreach_uniform_variable_safe(var
, shader
) {
507 exec_node_remove(&var
->node
);
508 insert_uniform_sorted(&new_list
, var
);
510 exec_list_append(&shader
->variables
, &new_list
);
515 static nir_intrinsic_op
516 r600_map_atomic(nir_intrinsic_op op
)
519 case nir_intrinsic_atomic_counter_read_deref
:
520 return nir_intrinsic_atomic_counter_read
;
521 case nir_intrinsic_atomic_counter_inc_deref
:
522 return nir_intrinsic_atomic_counter_inc
;
523 case nir_intrinsic_atomic_counter_pre_dec_deref
:
524 return nir_intrinsic_atomic_counter_pre_dec
;
525 case nir_intrinsic_atomic_counter_post_dec_deref
:
526 return nir_intrinsic_atomic_counter_post_dec
;
527 case nir_intrinsic_atomic_counter_add_deref
:
528 return nir_intrinsic_atomic_counter_add
;
529 case nir_intrinsic_atomic_counter_min_deref
:
530 return nir_intrinsic_atomic_counter_min
;
531 case nir_intrinsic_atomic_counter_max_deref
:
532 return nir_intrinsic_atomic_counter_max
;
533 case nir_intrinsic_atomic_counter_and_deref
:
534 return nir_intrinsic_atomic_counter_and
;
535 case nir_intrinsic_atomic_counter_or_deref
:
536 return nir_intrinsic_atomic_counter_or
;
537 case nir_intrinsic_atomic_counter_xor_deref
:
538 return nir_intrinsic_atomic_counter_xor
;
539 case nir_intrinsic_atomic_counter_exchange_deref
:
540 return nir_intrinsic_atomic_counter_exchange
;
541 case nir_intrinsic_atomic_counter_comp_swap_deref
:
542 return nir_intrinsic_atomic_counter_comp_swap
;
544 return nir_num_intrinsics
;
549 r600_lower_deref_instr(nir_builder
*b
, nir_intrinsic_instr
*instr
,
552 nir_intrinsic_op op
= r600_map_atomic(instr
->intrinsic
);
553 if (nir_num_intrinsics
== op
)
556 nir_deref_instr
*deref
= nir_src_as_deref(instr
->src
[0]);
557 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
559 if (var
->data
.mode
!= nir_var_uniform
&&
560 var
->data
.mode
!= nir_var_mem_ssbo
&&
561 var
->data
.mode
!= nir_var_mem_shared
)
562 return false; /* atomics passed as function arguments can't be lowered */
564 const unsigned idx
= var
->data
.binding
;
566 b
->cursor
= nir_before_instr(&instr
->instr
);
568 nir_ssa_def
*offset
= nir_imm_int(b
, var
->data
.index
);
569 for (nir_deref_instr
*d
= deref
; d
->deref_type
!= nir_deref_type_var
;
570 d
= nir_deref_instr_parent(d
)) {
571 assert(d
->deref_type
== nir_deref_type_array
);
572 assert(d
->arr
.index
.is_ssa
);
574 unsigned array_stride
= 1;
575 if (glsl_type_is_array(d
->type
))
576 array_stride
*= glsl_get_aoa_size(d
->type
);
578 offset
= nir_iadd(b
, offset
, nir_imul(b
, d
->arr
.index
.ssa
,
579 nir_imm_int(b
, array_stride
)));
582 /* Since the first source is a deref and the first source in the lowered
583 * instruction is the offset, we can just swap it out and change the
586 instr
->intrinsic
= op
;
587 nir_instr_rewrite_src(&instr
->instr
, &instr
->src
[0],
588 nir_src_for_ssa(offset
));
589 nir_intrinsic_set_base(instr
, idx
);
591 nir_deref_instr_remove_if_unused(deref
);
597 r600_nir_lower_atomics(nir_shader
*shader
)
599 bool progress
= false;
601 /* First re-do the offsets, in Hardware we start at zero for each new
602 * binding, and we use an offset of one per counter */
603 int current_binding
= -1;
604 int current_offset
= 0;
605 nir_foreach_variable_with_modes(var
, shader
, nir_var_uniform
) {
606 if (!var
->type
->contains_atomic())
609 if (current_binding
== (int)var
->data
.binding
) {
610 var
->data
.index
= current_offset
;
611 current_offset
+= var
->type
->atomic_size() / ATOMIC_COUNTER_SIZE
;
613 current_binding
= var
->data
.binding
;
615 current_offset
= var
->type
->atomic_size() / ATOMIC_COUNTER_SIZE
;
619 nir_foreach_function(function
, shader
) {
623 bool impl_progress
= false;
626 nir_builder_init(&build
, function
->impl
);
628 nir_foreach_block(block
, function
->impl
) {
629 nir_foreach_instr_safe(instr
, block
) {
630 if (instr
->type
!= nir_instr_type_intrinsic
)
633 impl_progress
|= r600_lower_deref_instr(&build
,
634 nir_instr_as_intrinsic(instr
), shader
);
639 nir_metadata_preserve(function
->impl
, nir_metadata_block_index
| nir_metadata_dominance
);
646 using r600::r600_nir_lower_int_tg4
;
647 using r600::r600_nir_lower_pack_unpack_2x16
;
648 using r600::r600_lower_scratch_addresses
;
649 using r600::r600_lower_fs_out_to_vector
;
650 using r600::r600_lower_ubo_to_align16
;
653 r600_glsl_type_size(const struct glsl_type
*type
, bool is_bindless
)
655 return glsl_count_vec4_slots(type
, false, is_bindless
);
659 r600_get_natural_size_align_bytes(const struct glsl_type
*type
,
660 unsigned *size
, unsigned *align
)
662 if (type
->base_type
!= GLSL_TYPE_ARRAY
) {
666 unsigned elem_size
, elem_align
;
667 glsl_get_natural_size_align_bytes(type
->fields
.array
,
668 &elem_size
, &elem_align
);
670 *size
= type
->length
;
675 r600_lower_shared_io_impl(nir_function
*func
)
678 nir_builder_init(&b
, func
->impl
);
680 bool progress
= false;
681 nir_foreach_block(block
, func
->impl
) {
682 nir_foreach_instr_safe(instr
, block
) {
684 if (instr
->type
!= nir_instr_type_intrinsic
)
687 nir_intrinsic_instr
*op
= nir_instr_as_intrinsic(instr
);
688 if (op
->intrinsic
!= nir_intrinsic_load_shared
&&
689 op
->intrinsic
!= nir_intrinsic_store_shared
)
692 b
.cursor
= nir_before_instr(instr
);
694 if (op
->intrinsic
== nir_intrinsic_load_shared
) {
695 nir_ssa_def
*addr
= op
->src
[0].ssa
;
697 switch (nir_dest_num_components(op
->dest
)) {
699 auto addr2
= nir_iadd_imm(&b
, addr
, 4);
700 addr
= nir_vec2(&b
, addr
, addr2
);
704 auto addr2
= nir_iadd(&b
, addr
, nir_imm_ivec2(&b
, 4, 8));
705 addr
= nir_vec3(&b
, addr
,
706 nir_channel(&b
, addr2
, 0),
707 nir_channel(&b
, addr2
, 1));
711 addr
= nir_iadd(&b
, addr
, nir_imm_ivec4(&b
, 0, 4, 8, 12));
716 auto load
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_load_local_shared_r600
);
717 load
->num_components
= nir_dest_num_components(op
->dest
);
718 load
->src
[0] = nir_src_for_ssa(addr
);
719 nir_ssa_dest_init(&load
->instr
, &load
->dest
,
720 load
->num_components
, 32, NULL
);
721 nir_ssa_def_rewrite_uses(&op
->dest
.ssa
, nir_src_for_ssa(&load
->dest
.ssa
));
722 nir_builder_instr_insert(&b
, &load
->instr
);
724 nir_ssa_def
*addr
= op
->src
[1].ssa
;
725 for (int i
= 0; i
< 2; ++i
) {
726 unsigned test_mask
= (0x3 << 2 * i
);
727 if (!(nir_intrinsic_write_mask(op
) & test_mask
))
730 auto store
= nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_store_local_shared_r600
);
731 unsigned writemask
= nir_intrinsic_write_mask(op
) & test_mask
;
732 nir_intrinsic_set_write_mask(store
, writemask
);
733 store
->src
[0] = nir_src_for_ssa(op
->src
[0].ssa
);
734 store
->num_components
= store
->src
[0].ssa
->num_components
;
735 bool start_even
= (writemask
& (1u << (2 * i
)));
737 auto addr2
= nir_iadd(&b
, addr
, nir_imm_int(&b
, 8 * i
+ (start_even
? 0 : 4)));
738 store
->src
[1] = nir_src_for_ssa(addr2
);
740 nir_builder_instr_insert(&b
, &store
->instr
);
743 nir_instr_remove(instr
);
751 r600_lower_shared_io(nir_shader
*nir
)
754 nir_foreach_function(function
, nir
) {
755 if (function
->impl
&&
756 r600_lower_shared_io_impl(function
))
763 optimize_once(nir_shader
*shader
)
765 bool progress
= false;
766 NIR_PASS(progress
, shader
, nir_copy_prop
);
767 NIR_PASS(progress
, shader
, nir_opt_dce
);
768 NIR_PASS(progress
, shader
, nir_opt_algebraic
);
769 NIR_PASS(progress
, shader
, nir_opt_constant_folding
);
770 NIR_PASS(progress
, shader
, nir_opt_copy_prop_vars
);
771 NIR_PASS(progress
, shader
, nir_opt_vectorize
, NULL
, NULL
);
773 NIR_PASS(progress
, shader
, nir_opt_remove_phis
);
775 if (nir_opt_trivial_continues(shader
)) {
777 NIR_PASS(progress
, shader
, nir_copy_prop
);
778 NIR_PASS(progress
, shader
, nir_opt_dce
);
781 NIR_PASS(progress
, shader
, nir_opt_if
, false);
782 NIR_PASS(progress
, shader
, nir_opt_dead_cf
);
783 NIR_PASS(progress
, shader
, nir_opt_cse
);
784 NIR_PASS(progress
, shader
, nir_opt_peephole_select
, 200, true, true);
786 NIR_PASS(progress
, shader
, nir_opt_conditional_discard
);
787 NIR_PASS(progress
, shader
, nir_opt_dce
);
788 NIR_PASS(progress
, shader
, nir_opt_undef
);
792 bool has_saturate(const nir_function
*func
)
794 nir_foreach_block(block
, func
->impl
) {
795 nir_foreach_instr(instr
, block
) {
796 if (instr
->type
== nir_instr_type_alu
) {
797 auto alu
= nir_instr_as_alu(instr
);
798 if (alu
->dest
.saturate
)
806 int r600_shader_from_nir(struct r600_context
*rctx
,
807 struct r600_pipe_shader
*pipeshader
,
808 r600_shader_key
*key
)
811 struct r600_pipe_shader_selector
*sel
= pipeshader
->selector
;
813 r600::ShaderFromNir convert
;
815 if (rctx
->screen
->b
.debug_flags
& DBG_PREOPT_IR
) {
816 fprintf(stderr
, "PRE-OPT-NIR-----------.------------------------------\n");
817 nir_print_shader(sel
->nir
, stderr
);
818 fprintf(stderr
, "END PRE-OPT-NIR--------------------------------------\n\n");
821 r600::sort_uniforms(sel
->nir
);
823 NIR_PASS_V(sel
->nir
, nir_lower_vars_to_ssa
);
824 NIR_PASS_V(sel
->nir
, nir_lower_regs_to_ssa
);
825 NIR_PASS_V(sel
->nir
, nir_lower_phis_to_scalar
);
827 NIR_PASS_V(sel
->nir
, r600_lower_shared_io
);
828 NIR_PASS_V(sel
->nir
, r600_nir_lower_atomics
);
830 static const struct nir_lower_tex_options lower_tex_options
= {
833 NIR_PASS_V(sel
->nir
, nir_lower_tex
, &lower_tex_options
);
834 NIR_PASS_V(sel
->nir
, r600::r600_nir_lower_txl_txf_array_or_cube
);
836 NIR_PASS_V(sel
->nir
, r600_nir_lower_int_tg4
);
837 NIR_PASS_V(sel
->nir
, r600_nir_lower_pack_unpack_2x16
);
839 NIR_PASS_V(sel
->nir
, nir_lower_io
, nir_var_uniform
, r600_glsl_type_size
,
840 nir_lower_io_lower_64bit_to_32
);
842 if (sel
->nir
->info
.stage
== MESA_SHADER_VERTEX
)
843 NIR_PASS_V(sel
->nir
, r600_vectorize_vs_inputs
);
845 if (sel
->nir
->info
.stage
== MESA_SHADER_FRAGMENT
)
846 NIR_PASS_V(sel
->nir
, r600_lower_fs_out_to_vector
);
848 if (sel
->nir
->info
.stage
== MESA_SHADER_TESS_CTRL
||
849 (sel
->nir
->info
.stage
== MESA_SHADER_VERTEX
&& key
->vs
.as_ls
)) {
850 NIR_PASS_V(sel
->nir
, nir_lower_io
, nir_var_shader_out
, r600_glsl_type_size
,
851 nir_lower_io_lower_64bit_to_32
);
852 NIR_PASS_V(sel
->nir
, r600_lower_tess_io
, (pipe_prim_type
)key
->tcs
.prim_mode
);
855 if (sel
->nir
->info
.stage
== MESA_SHADER_TESS_CTRL
||
856 sel
->nir
->info
.stage
== MESA_SHADER_TESS_EVAL
) {
857 NIR_PASS_V(sel
->nir
, nir_lower_io
, nir_var_shader_in
, r600_glsl_type_size
,
858 nir_lower_io_lower_64bit_to_32
);
861 if (sel
->nir
->info
.stage
== MESA_SHADER_TESS_CTRL
||
862 sel
->nir
->info
.stage
== MESA_SHADER_TESS_EVAL
||
863 (sel
->nir
->info
.stage
== MESA_SHADER_VERTEX
&& key
->vs
.as_ls
)) {
864 auto prim_type
= sel
->nir
->info
.stage
== MESA_SHADER_TESS_CTRL
?
865 key
->tcs
.prim_mode
: sel
->nir
->info
.tess
.primitive_mode
;
866 NIR_PASS_V(sel
->nir
, r600_lower_tess_io
, static_cast<pipe_prim_type
>(prim_type
));
870 if (sel
->nir
->info
.stage
== MESA_SHADER_TESS_CTRL
)
871 NIR_PASS_V(sel
->nir
, r600_append_tcs_TF_emission
,
872 (pipe_prim_type
)key
->tcs
.prim_mode
);
875 const nir_function
*func
= reinterpret_cast<const nir_function
*>(exec_list_get_head_const(&sel
->nir
->functions
));
876 bool optimize
= func
->impl
->registers
.length() == 0 && !has_saturate(func
);
879 optimize_once(sel
->nir
);
880 NIR_PASS_V(sel
->nir
, r600_lower_ubo_to_align16
);
882 /* It seems the output of this optimization is cached somewhere, and
883 * when there are registers, then we can no longer copy propagate, so
884 * skip the optimization then. (There is probably a better way, but yeah)
887 while(optimize_once(sel
->nir
));
889 NIR_PASS_V(sel
->nir
, nir_remove_dead_variables
, nir_var_shader_in
, NULL
);
890 NIR_PASS_V(sel
->nir
, nir_remove_dead_variables
, nir_var_shader_out
, NULL
);
893 NIR_PASS_V(sel
->nir
, nir_lower_vars_to_scratch
,
894 nir_var_function_temp
,
896 r600_get_natural_size_align_bytes
);
898 while (optimize
&& optimize_once(sel
->nir
));
900 NIR_PASS_V(sel
->nir
, nir_lower_locals_to_regs
);
901 //NIR_PASS_V(sel->nir, nir_opt_algebraic);
902 //NIR_PASS_V(sel->nir, nir_copy_prop);
903 NIR_PASS_V(sel
->nir
, nir_lower_to_source_mods
, nir_lower_float_source_mods
);
904 NIR_PASS_V(sel
->nir
, nir_convert_from_ssa
, true);
905 NIR_PASS_V(sel
->nir
, nir_opt_dce
);
907 if ((rctx
->screen
->b
.debug_flags
& DBG_NIR
) &&
908 (rctx
->screen
->b
.debug_flags
& DBG_ALL_SHADERS
)) {
909 fprintf(stderr
, "-- NIR --------------------------------------------------------\n");
910 struct nir_function
*func
= (struct nir_function
*)exec_list_get_head(&sel
->nir
->functions
);
911 nir_index_ssa_defs(func
->impl
);
912 nir_print_shader(sel
->nir
, stderr
);
913 fprintf(stderr
, "-- END --------------------------------------------------------\n");
916 memset(&pipeshader
->shader
, 0, sizeof(r600_shader
));
917 pipeshader
->scratch_space_needed
= sel
->nir
->scratch_size
;
919 if (sel
->nir
->info
.stage
== MESA_SHADER_TESS_EVAL
||
920 sel
->nir
->info
.stage
== MESA_SHADER_VERTEX
||
921 sel
->nir
->info
.stage
== MESA_SHADER_GEOMETRY
) {
922 pipeshader
->shader
.clip_dist_write
|= ((1 << sel
->nir
->info
.clip_distance_array_size
) - 1);
923 pipeshader
->shader
.cull_dist_write
= ((1 << sel
->nir
->info
.cull_distance_array_size
) - 1)
924 << sel
->nir
->info
.clip_distance_array_size
;
925 pipeshader
->shader
.cc_dist_mask
= (1 << (sel
->nir
->info
.cull_distance_array_size
+
926 sel
->nir
->info
.clip_distance_array_size
)) - 1;
929 struct r600_shader
* gs_shader
= nullptr;
931 gs_shader
= &rctx
->gs_shader
->current
->shader
;
932 r600_screen
*rscreen
= rctx
->screen
;
934 bool r
= convert
.lower(sel
->nir
, pipeshader
, sel
, *key
, gs_shader
, rscreen
->b
.chip_class
);
935 if (!r
|| rctx
->screen
->b
.debug_flags
& DBG_ALL_SHADERS
) {
938 snprintf(filename
, 4000, "nir-%s_%d.inc", sel
->nir
->info
.name
, shnr
++);
940 if (access(filename
, F_OK
) == -1) {
941 FILE *f
= fopen(filename
, "w");
944 fprintf(f
, "const char *shader_blob_%s = {\nR\"(", sel
->nir
->info
.name
);
945 nir_print_shader(sel
->nir
, f
);
946 fprintf(f
, ")\";\n");
954 auto shader
= convert
.shader();
956 r600_bytecode_init(&pipeshader
->shader
.bc
, rscreen
->b
.chip_class
, rscreen
->b
.family
,
957 rscreen
->has_compressed_msaa_texturing
);
959 r600::sfn_log
<< r600::SfnLog::shader_info
960 << "pipeshader->shader.processor_type = "
961 << pipeshader
->shader
.processor_type
<< "\n";
963 pipeshader
->shader
.bc
.type
= pipeshader
->shader
.processor_type
;
964 pipeshader
->shader
.bc
.isa
= rctx
->isa
;
966 r600::AssemblyFromShaderLegacy
afs(&pipeshader
->shader
, key
);
967 if (!afs
.lower(shader
.m_ir
)) {
968 R600_ERR("%s: Lowering to assembly failed\n", __func__
);
972 if (sel
->nir
->info
.stage
== MESA_SHADER_GEOMETRY
) {
973 r600::sfn_log
<< r600::SfnLog::shader_info
<< "Geometry shader, create copy shader\n";
974 generate_gs_copy_shader(rctx
, pipeshader
, &sel
->so
);
975 assert(pipeshader
->gs_copy_shader
);
977 r600::sfn_log
<< r600::SfnLog::shader_info
<< "This is not a Geometry shader\n";
979 if (pipeshader
->shader
.bc
.ngpr
< 4)
980 pipeshader
->shader
.bc
.ngpr
= 4;