2 * Copyright (C) 2019 Ryan Houdek <Sonicadvance1@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "compiler/nir/nir_builder.h"
25 #include "bifrost_compile.h"
26 #include "bifrost_opts.h"
27 #include "bifrost_sched.h"
28 #include "compiler_defines.h"
29 #include "disassemble.h"
30 #include "bifrost_print.h"
35 glsl_type_size(const struct glsl_type
*type
, bool bindless
)
37 return glsl_count_attribute_slots(type
, false);
41 optimize_nir(nir_shader
*nir
)
45 NIR_PASS_V(nir
, nir_lower_io
, nir_var_all
, glsl_type_size
, 0);
46 NIR_PASS(progress
, nir
, nir_lower_regs_to_ssa
);
51 NIR_PASS(progress
, nir
, nir_lower_io
, nir_var_all
, glsl_type_size
, 0);
53 NIR_PASS(progress
, nir
, nir_lower_var_copies
);
54 NIR_PASS(progress
, nir
, nir_lower_vars_to_ssa
);
56 NIR_PASS(progress
, nir
, nir_copy_prop
);
57 NIR_PASS(progress
, nir
, nir_opt_constant_folding
);
59 NIR_PASS(progress
, nir
, nir_lower_vars_to_ssa
);
60 NIR_PASS(progress
, nir
, nir_lower_alu_to_scalar
, NULL
);
61 NIR_PASS(progress
, nir
, nir_opt_if
, true);
65 NIR_PASS(progress
, nir
, nir_copy_prop
);
66 NIR_PASS(progress
, nir
, nir_opt_dce
);
70 nir_src_index(compiler_context
*ctx
, nir_src
*src
)
73 return src
->ssa
->index
;
75 return ctx
->func
->impl
->ssa_alloc
+ src
->reg
.reg
->index
;
79 nir_dest_index(compiler_context
*ctx
, nir_dest
*dst
)
82 return dst
->ssa
.index
;
84 return ctx
->func
->impl
->ssa_alloc
+ dst
->reg
.reg
->index
;
88 nir_alu_src_index(compiler_context
*ctx
, nir_alu_src
*src
)
90 return nir_src_index(ctx
, &src
->src
);
93 struct bifrost_instruction
*
94 mir_alloc_ins(struct bifrost_instruction instr
)
96 struct bifrost_instruction
*heap_ins
= malloc(sizeof(instr
));
97 memcpy(heap_ins
, &instr
, sizeof(instr
));
102 emit_mir_instruction(struct compiler_context
*ctx
, struct bifrost_instruction instr
)
104 list_addtail(&(mir_alloc_ins(instr
))->link
, &ctx
->current_block
->instructions
);
108 bifrost_block_add_successor(bifrost_block
*block
, bifrost_block
*successor
)
110 assert(block
->num_successors
< ARRAY_SIZE(block
->successors
));
111 block
->successors
[block
->num_successors
++] = successor
;
115 emit_load_const(struct compiler_context
*ctx
, nir_load_const_instr
*instr
)
117 nir_ssa_def def
= instr
->def
;
119 float *v
= ralloc_array(NULL
, float, 1);
120 nir_const_value_to_array(v
, instr
->value
, instr
->def
.num_components
, f32
);
121 _mesa_hash_table_u64_insert(ctx
->ssa_constants
, def
.index
+ 1, v
);
125 alloc_mir_temp(struct compiler_context
*ctx
)
127 return SSA_TEMP_VALUE(ctx
->mir_temp
++);
131 emit_ld_vary_addr_constant(struct compiler_context
*ctx
, uint32_t location
)
133 // LD_VAR_ADDR.f32 {R0, T1}, R61, R62, location:1, R12
135 // ST_VAR.v4 T1, R12, R13, R14, R4
137 // R61-R62 is filled with information needed for varying interpolation
138 // This loads a vec3 with the information that ST_VAR needs to work
140 uint32_t mir_temp_location
= alloc_mir_temp(ctx
);
141 // This instruction loads a vec3 starting from the initial register
142 struct bifrost_instruction instr
= {
143 .op
= op_ld_var_addr
,
144 .dest_components
= 3,
146 .dest
= mir_temp_location
,
147 .src0
= SSA_FIXED_REGISTER(61),
148 .src1
= SSA_FIXED_REGISTER(62),
149 .src2
= SSA_INVALID_VALUE
,
150 .src3
= SSA_INVALID_VALUE
,
152 .literal_args
[0] = location
,
154 emit_mir_instruction(ctx
, instr
);
156 return mir_temp_location
;
159 // XXX: Doesn't support duplicated values in the components!
162 emit_create_vector(struct compiler_context
*ctx
, unsigned dest
, unsigned num_comps
, uint32_t *comps
)
164 assert(num_comps
<= 4 && "Can't make a vector larger than 4 components");
166 // This instruction loads a vec3 starting from the initial register
167 struct bifrost_instruction instr
= {
168 .op
= op_create_vector
,
169 .dest_components
= num_comps
,
175 uint32_t *srcs
[4] = {
176 &instr
.ssa_args
.src0
,
177 &instr
.ssa_args
.src1
,
178 &instr
.ssa_args
.src2
,
179 &instr
.ssa_args
.src3
,
182 for (unsigned i
= 0; i
< 4; ++i
) {
186 *srcs
[i
] = SSA_INVALID_VALUE
;
188 emit_mir_instruction(ctx
, instr
);
192 emit_extract_vector_element(struct compiler_context
*ctx
, unsigned ssa_vector
, unsigned element
)
194 uint32_t mir_temp_location
= alloc_mir_temp(ctx
);
195 // This instruction loads a vec3 starting from the initial register
196 struct bifrost_instruction instr
= {
197 .op
= op_extract_element
,
198 .dest_components
= 1,
200 .dest
= mir_temp_location
,
202 .src1
= SSA_INVALID_VALUE
,
203 .src2
= SSA_INVALID_VALUE
,
204 .src3
= SSA_INVALID_VALUE
,
206 .literal_args
[0] = element
,
208 emit_mir_instruction(ctx
, instr
);
210 return mir_temp_location
;
213 emit_movi(struct compiler_context
*ctx
, uint32_t literal
)
215 uint32_t mir_temp_location
= alloc_mir_temp(ctx
);
216 // This instruction loads a vec3 starting from the initial register
217 struct bifrost_instruction instr
= {
219 .dest_components
= 1,
221 .dest
= mir_temp_location
,
222 .src0
= SSA_INVALID_VALUE
,
223 .src1
= SSA_INVALID_VALUE
,
224 .src2
= SSA_INVALID_VALUE
,
225 .src3
= SSA_INVALID_VALUE
,
227 .literal_args
[0] = literal
,
229 emit_mir_instruction(ctx
, instr
);
231 return mir_temp_location
;
235 nir_alu_src_index_scalar(compiler_context
*ctx
, nir_alu_instr
*nir_instr
, unsigned src
)
237 // NIR uses a combination of single channels plus swizzles to determine which component is pulled out of a source
238 for (unsigned c
= 0; c
< NIR_MAX_VEC_COMPONENTS
; c
++) {
239 if (!nir_alu_instr_channel_used(nir_instr
, src
, c
))
241 // Pull the swizzle from this element that is active and use it as the source
242 unsigned element
= nir_instr
->src
[src
].swizzle
[c
];
244 // Create an op that extracts an element from a vector
245 return emit_extract_vector_element(ctx
, nir_alu_src_index(ctx
, &nir_instr
->src
[src
]), element
);
252 emit_intrinsic(struct compiler_context
*ctx
, nir_intrinsic_instr
*nir_instr
)
254 nir_const_value
*const_offset
;
255 unsigned offset
, reg
;
257 switch (nir_instr
->intrinsic
) {
258 case nir_intrinsic_load_ubo
: {
259 nir_const_value
*location
= nir_src_as_const_value(nir_instr
->src
[0]);
260 const_offset
= nir_src_as_const_value(nir_instr
->src
[1]);
261 assert (location
&& "no indirect ubo selection");
262 assert (const_offset
&& "no indirect inputs");
264 enum bifrost_ir_ops op
;
266 // load_ubo <UBO binding>, <byte offset>
267 // ld_ubo <byte offset>, <UBO binding>
268 switch (nir_dest_num_components(nir_instr
->dest
)) {
286 reg
= nir_dest_index(ctx
, &nir_instr
->dest
);
287 struct bifrost_instruction instr
= {
289 .dest_components
= nir_dest_num_components(nir_instr
->dest
),
292 .src0
= SSA_INVALID_VALUE
,
293 .src1
= SSA_INVALID_VALUE
,
294 .src2
= SSA_INVALID_VALUE
,
295 .src3
= SSA_INVALID_VALUE
,
297 .literal_args
[0] = nir_src_as_uint(nir_instr
->src
[1]),
298 .literal_args
[1] = nir_src_as_uint(nir_instr
->src
[0]),
301 emit_mir_instruction(ctx
, instr
);
304 case nir_intrinsic_store_ssbo
: {
305 nir_const_value
*location
= nir_src_as_const_value(nir_instr
->src
[1]);
306 const_offset
= nir_src_as_const_value(nir_instr
->src
[2]);
307 assert (location
&& "no indirect ubo selection");
308 assert (const_offset
&& "no indirect inputs");
310 // store_ssbo <Value>, <binding>, <offset>
311 // store_vN <Addr>, <Value>
312 reg
= nir_src_index(ctx
, &nir_instr
->src
[0]);
314 enum bifrost_ir_ops op
;
315 switch (nir_src_num_components(nir_instr
->src
[0])) {
333 struct bifrost_instruction instr
= {
335 .dest_components
= 0,
337 .dest
= SSA_INVALID_VALUE
,
339 .src1
= SSA_INVALID_VALUE
,
340 .src2
= SSA_INVALID_VALUE
,
341 .src3
= SSA_INVALID_VALUE
,
343 .literal_args
[0] = nir_src_as_uint(nir_instr
->src
[2]),
345 emit_mir_instruction(ctx
, instr
);
348 case nir_intrinsic_load_uniform
:
349 offset
= nir_intrinsic_base(nir_instr
);
351 if (nir_src_is_const(nir_instr
->src
[0])) {
352 offset
+= nir_src_as_uint(nir_instr
->src
[0]);
354 assert(0 && "Can't handle indirect load_uniform");
357 reg
= nir_dest_index(ctx
, &nir_instr
->dest
);
359 unsigned num_components
= nir_dest_num_components(nir_instr
->dest
);
360 if (num_components
== 1) {
361 struct bifrost_instruction instr
= {
363 .dest_components
= 1,
366 .src0
= SSA_FIXED_UREGISTER(offset
),
367 .src1
= SSA_INVALID_VALUE
,
368 .src2
= SSA_INVALID_VALUE
,
369 .src3
= SSA_INVALID_VALUE
,
372 emit_mir_instruction(ctx
, instr
);
376 for (unsigned i
= 0; i
< nir_dest_num_components(nir_instr
->dest
); ++i
) {
377 uint32_t temp_dest
= alloc_mir_temp(ctx
);
378 comps
[i
] = temp_dest
;
379 struct bifrost_instruction instr
= {
381 .dest_components
= 1,
384 .src0
= SSA_FIXED_UREGISTER(offset
+ (i
* 4)),
385 .src1
= SSA_INVALID_VALUE
,
386 .src2
= SSA_INVALID_VALUE
,
387 .src3
= SSA_INVALID_VALUE
,
390 emit_mir_instruction(ctx
, instr
);
393 emit_create_vector(ctx
, reg
, num_components
, comps
);
397 case nir_intrinsic_load_input
: {
398 const_offset
= nir_src_as_const_value(nir_instr
->src
[0]);
399 assert (const_offset
&& "no indirect inputs");
401 offset
= nir_intrinsic_base(nir_instr
) + nir_src_as_uint(nir_instr
->src
[0]);
403 reg
= nir_dest_index(ctx
, &nir_instr
->dest
);
405 enum bifrost_ir_ops op
;
406 switch (nir_dest_num_components(nir_instr
->dest
)) {
424 struct bifrost_instruction instr
= {
426 .dest_components
= nir_dest_num_components(nir_instr
->dest
),
430 .src1
= SSA_INVALID_VALUE
,
431 .src2
= SSA_INVALID_VALUE
,
432 .src3
= SSA_INVALID_VALUE
,
436 emit_mir_instruction(ctx
, instr
);
439 case nir_intrinsic_store_output
: {
440 const_offset
= nir_src_as_const_value(nir_instr
->src
[1]);
441 assert(const_offset
&& "no indirect outputs");
443 offset
= nir_intrinsic_base(nir_instr
);
444 if (ctx
->stage
== MESA_SHADER_FRAGMENT
) {
445 int comp
= nir_intrinsic_component(nir_instr
);
447 // XXX: Once we support more than colour output then this will need to change
448 void *entry
= _mesa_hash_table_u64_search(ctx
->outputs_nir_to_bi
, offset
+ FRAG_RESULT_DATA0
+ 1);
451 printf("WARNING: skipping fragment output\n");
455 offset
= (uintptr_t) (entry
) - 1;
456 reg
= nir_src_index(ctx
, &nir_instr
->src
[0]);
458 enum bifrost_ir_ops op
;
459 switch (nir_src_num_components(nir_instr
->src
[0])) {
477 // XXX: All offsets aren't vec4 aligned. Will need to adjust this in the future
478 // XXX: This needs to offset correctly in to memory so the blend step can pick it up
479 uint32_t movi
= emit_movi(ctx
, offset
* 16);
480 uint32_t movi2
= emit_movi(ctx
, 0);
482 uint32_t comps
[2] = {
485 uint32_t offset_val
= alloc_mir_temp(ctx
);
486 emit_create_vector(ctx
, offset_val
, 2, comps
);
488 struct bifrost_instruction instr
= {
490 .dest_components
= 0,
492 .dest
= SSA_INVALID_VALUE
,
495 .src2
= SSA_INVALID_VALUE
,
496 .src3
= SSA_INVALID_VALUE
,
499 emit_mir_instruction(ctx
, instr
);
500 } else if (ctx
->stage
== MESA_SHADER_VERTEX
) {
501 int comp
= nir_intrinsic_component(nir_instr
);
503 void *entry
= _mesa_hash_table_u64_search(ctx
->varying_nir_to_bi
, offset
+ 2);
506 printf("WARNING: skipping varying\n");
510 offset
= (uintptr_t) (entry
) - 1;
512 reg
= nir_src_index(ctx
, &nir_instr
->src
[0]);
513 // LD_VAR_ADDR.f32 {R0, T1}, R61, R62, location:1, R12
515 // ST_VAR.v4 T1, R12, R13, R14, R4
517 offset
= emit_ld_vary_addr_constant(ctx
, offset
);
518 enum bifrost_ir_ops op
;
519 switch (nir_src_num_components(nir_instr
->src
[0])) {
537 struct bifrost_instruction instr
= {
539 .dest_components
= 0,
541 .dest
= SSA_INVALID_VALUE
,
544 .src2
= SSA_INVALID_VALUE
,
545 .src3
= SSA_INVALID_VALUE
,
548 emit_mir_instruction(ctx
, instr
);
550 assert(0 && "Unknown store_output stage");
555 printf ("Unhandled intrinsic %s\n", nir_intrinsic_infos
[nir_instr
->intrinsic
].name
);
560 #define ALU_CASE(arguments, nir, name) \
562 argument_count = arguments; \
565 #define ALU_CASE_MOD(arguments, nir, name, modifiers) \
567 argument_count = arguments; \
569 src_modifiers = modifiers; \
573 emit_alu(struct compiler_context
*ctx
, nir_alu_instr
*nir_instr
)
575 unsigned dest
= nir_dest_index(ctx
, &nir_instr
->dest
.dest
);
576 unsigned op
= ~0U, argument_count
;
577 unsigned src_modifiers
= 0;
579 switch (nir_instr
->op
) {
580 ALU_CASE(2, fmul
, fmul_f32
);
581 ALU_CASE(2, fadd
, fadd_f32
);
582 ALU_CASE_MOD(2, fsub
, fadd_f32
, SOURCE_MODIFIER(1, SRC_MOD_NEG
));
583 ALU_CASE(1, ftrunc
, trunc
);
584 ALU_CASE(1, fceil
, ceil
);
585 ALU_CASE(1, ffloor
, floor
);
586 ALU_CASE(1, fround_even
, roundeven
);
587 ALU_CASE(1, frcp
, frcp_fast_f32
);
588 ALU_CASE(2, fmax
, max_f32
);
589 ALU_CASE(2, fmin
, min_f32
);
590 ALU_CASE(2, iadd
, add_i32
);
591 ALU_CASE(2, isub
, sub_i32
);
592 ALU_CASE(2, imul
, mul_i32
);
593 ALU_CASE(2, iand
, and_i32
);
594 ALU_CASE(2, ior
, or_i32
);
595 ALU_CASE(2, ixor
, xor_i32
);
596 ALU_CASE(2, ishl
, lshift_i32
);
597 ALU_CASE(2, ushr
, rshift_i32
);
598 ALU_CASE(2, ishr
, arshift_i32
);
600 unsigned src0
= nir_alu_src_index_scalar(ctx
, nir_instr
, 0);
601 printf("ineg 0x%08x\n", src0
);
602 struct bifrost_instruction instr
= {
604 .dest_components
= 1,
607 .src0
= SSA_FIXED_CONST_0
,
609 .src2
= SSA_INVALID_VALUE
,
610 .src3
= SSA_INVALID_VALUE
,
614 emit_mir_instruction(ctx
, instr
);
619 uint32_t comps
[3] = {
620 nir_alu_src_index(ctx
, &nir_instr
->src
[0]),
621 nir_alu_src_index(ctx
, &nir_instr
->src
[1]),
623 emit_create_vector(ctx
, dest
, 2, comps
);
628 uint32_t comps
[3] = {
629 nir_alu_src_index(ctx
, &nir_instr
->src
[0]),
630 nir_alu_src_index(ctx
, &nir_instr
->src
[1]),
631 nir_alu_src_index(ctx
, &nir_instr
->src
[2]),
633 emit_create_vector(ctx
, dest
, 3, comps
);
638 uint32_t comps
[4] = {
639 nir_alu_src_index(ctx
, &nir_instr
->src
[0]),
640 nir_alu_src_index(ctx
, &nir_instr
->src
[1]),
641 nir_alu_src_index(ctx
, &nir_instr
->src
[2]),
642 nir_alu_src_index(ctx
, &nir_instr
->src
[3]),
644 emit_create_vector(ctx
, dest
, 4, comps
);
649 unsigned src0
= nir_alu_src_index_scalar(ctx
, nir_instr
, 0);
650 unsigned src1
= nir_alu_src_index_scalar(ctx
, nir_instr
, 1);
651 uint32_t mir_temp_location
= alloc_mir_temp(ctx
);
653 struct bifrost_instruction instr
= {
654 .op
= op_frcp_fast_f32
,
655 .dest_components
= 1,
657 .dest
= mir_temp_location
,
659 .src1
= SSA_INVALID_VALUE
,
660 .src2
= SSA_INVALID_VALUE
,
661 .src3
= SSA_INVALID_VALUE
,
664 emit_mir_instruction(ctx
, instr
);
667 struct bifrost_instruction instr
= {
669 .dest_components
= 1,
674 .src2
= SSA_INVALID_VALUE
,
675 .src3
= SSA_INVALID_VALUE
,
677 .src_modifiers
= src_modifiers
,
680 emit_mir_instruction(ctx
, instr
);
688 unsigned src0
= nir_alu_src_index_scalar(ctx
, nir_instr
, 0);
689 unsigned src1
= nir_alu_src_index_scalar(ctx
, nir_instr
, 1);
690 struct bifrost_instruction instr
= {
692 .dest_components
= 1,
700 .src_modifiers
= src_modifiers
,
701 .literal_args
[0] = 0, /* XXX: Comparison operator */
704 emit_mir_instruction(ctx
, instr
);
712 unsigned src0
= nir_alu_src_index_scalar(ctx
, nir_instr
, 0);
713 unsigned src1
= nir_alu_src_index_scalar(ctx
, nir_instr
, 1);
714 unsigned src2
= nir_alu_src_index_scalar(ctx
, nir_instr
, 2);
717 if (nir_instr
->op
== nir_op_umin3
)
719 else if (nir_instr
->op
== nir_op_imin3
)
721 else if (nir_instr
->op
== nir_op_umax3
)
723 else if (nir_instr
->op
== nir_op_imax3
)
725 struct bifrost_instruction instr
= {
727 .dest_components
= 1,
733 .src3
= SSA_INVALID_VALUE
,
735 .src_modifiers
= src_modifiers
,
738 emit_mir_instruction(ctx
, instr
);
744 uint32_t movi
= emit_movi(ctx
, ~0U);
745 unsigned src0
= nir_alu_src_index(ctx
, &nir_instr
->src
[0]);
746 unsigned src1
= nir_alu_src_index(ctx
, &nir_instr
->src
[1]);
747 struct bifrost_instruction instr
= {
749 .dest_components
= 1,
755 .src3
= SSA_FIXED_CONST_0
,
757 .src_modifiers
= src_modifiers
,
758 .literal_args
[0] = CSEL_IEQ
, /* XXX: Comparison operator */
761 emit_mir_instruction(ctx
, instr
);
766 printf("Unhandled ALU op %s\n", nir_op_infos
[nir_instr
->op
].name
);
770 unsigned src0
= nir_alu_src_index_scalar(ctx
, nir_instr
, 0);
771 unsigned src1
= argument_count
>= 2 ? nir_alu_src_index_scalar(ctx
, nir_instr
, 1) : SSA_INVALID_VALUE
;
772 unsigned src2
= argument_count
>= 3 ? nir_alu_src_index_scalar(ctx
, nir_instr
, 2) : SSA_INVALID_VALUE
;
773 unsigned src3
= argument_count
>= 4 ? nir_alu_src_index_scalar(ctx
, nir_instr
, 3) : SSA_INVALID_VALUE
;
775 struct bifrost_instruction instr
= {
777 .dest_components
= 1,
785 .src_modifiers
= src_modifiers
,
788 emit_mir_instruction(ctx
, instr
);
792 emit_instr(struct compiler_context
*ctx
, struct nir_instr
*instr
)
794 switch (instr
->type
) {
795 case nir_instr_type_load_const
:
796 emit_load_const(ctx
, nir_instr_as_load_const(instr
));
798 case nir_instr_type_intrinsic
:
799 emit_intrinsic(ctx
, nir_instr_as_intrinsic(instr
));
801 case nir_instr_type_alu
:
802 emit_alu(ctx
, nir_instr_as_alu(instr
));
804 case nir_instr_type_tex
:
805 printf("Unhandled NIR inst tex\n");
807 case nir_instr_type_jump
:
808 printf("Unhandled NIR inst jump\n");
810 case nir_instr_type_ssa_undef
:
811 printf("Unhandled NIR inst ssa_undef\n");
814 printf("Unhandled instruction type\n");
820 static bifrost_block
*
821 emit_block(struct compiler_context
*ctx
, nir_block
*block
)
823 bifrost_block
*this_block
= calloc(sizeof(bifrost_block
), 1);
824 list_addtail(&this_block
->link
, &ctx
->blocks
);
828 /* Add this block to be a successor to the previous block */
829 if (ctx
->current_block
)
830 bifrost_block_add_successor(ctx
->current_block
, this_block
);
832 /* Set up current block */
833 list_inithead(&this_block
->instructions
);
834 ctx
->current_block
= this_block
;
836 nir_foreach_instr(instr
, block
) {
837 emit_instr(ctx
, instr
);
838 ++ctx
->instruction_count
;
842 print_mir_block(this_block
, false);
848 emit_if(struct compiler_context
*ctx
, nir_if
*nir_inst
);
850 static struct bifrost_block
*
851 emit_cf_list(struct compiler_context
*ctx
, struct exec_list
*list
)
853 struct bifrost_block
*start_block
= NULL
;
854 foreach_list_typed(nir_cf_node
, node
, node
, list
) {
855 switch (node
->type
) {
856 case nir_cf_node_block
: {
857 bifrost_block
*block
= emit_block(ctx
, nir_cf_node_as_block(node
));
866 emit_if(ctx
, nir_cf_node_as_if(node
));
870 case nir_cf_node_loop
:
871 case nir_cf_node_function
:
881 emit_if(struct compiler_context
*ctx
, nir_if
*nir_inst
)
884 // XXX: Conditional branch instruction can do a variety of comparisons with the sources
885 // Merge the source instruction `ine` with our conditional branch
887 uint32_t movi
= emit_movi(ctx
, ~0U);
888 struct bifrost_instruction instr
= {
890 .dest_components
= 0,
892 .dest
= SSA_INVALID_VALUE
,
893 .src0
= nir_src_index(ctx
, &nir_inst
->condition
),
895 .src2
= SSA_INVALID_VALUE
,
896 .src3
= SSA_INVALID_VALUE
,
899 .literal_args
[0] = BR_COND_EQ
, /* XXX: Comparison Arg type */
900 .literal_args
[1] = 0, /* XXX: Branch target */
903 emit_mir_instruction(ctx
, instr
);
906 bifrost_instruction
*true_branch
= mir_last_instr_in_block(ctx
->current_block
);
908 bifrost_block
*true_block
= emit_cf_list(ctx
, &nir_inst
->then_list
);
911 struct bifrost_instruction instr
= {
913 .dest_components
= 0,
915 .dest
= SSA_INVALID_VALUE
,
916 .src0
= SSA_INVALID_VALUE
,
917 .src1
= SSA_INVALID_VALUE
,
918 .src2
= SSA_INVALID_VALUE
,
919 .src3
= SSA_INVALID_VALUE
,
922 .literal_args
[0] = BR_ALWAYS
, /* XXX: ALWAYS */
923 .literal_args
[1] = 0, /* XXX: Branch target */
926 emit_mir_instruction(ctx
, instr
);
928 bifrost_instruction
*true_exit_branch
= mir_last_instr_in_block(ctx
->current_block
);
930 unsigned false_idx
= ctx
->block_count
;
931 unsigned inst_count
= ctx
->instruction_count
;
933 bifrost_block
*false_block
= emit_cf_list(ctx
, &nir_inst
->else_list
);
935 unsigned if_footer_idx
= ctx
->block_count
;
940 if (ctx
->instruction_count
== inst_count
) {
941 // If the else branch didn't have anything in it then we can remove the dead jump
942 mir_remove_instr(true_exit_branch
);
944 true_exit_branch
->literal_args
[1] = if_footer_idx
;
947 true_branch
->literal_args
[1] = false_idx
;
951 bifrost_compile_shader_nir(nir_shader
*nir
, struct bifrost_program
*program
)
953 struct compiler_context ictx
= {
955 .stage
= nir
->info
.stage
,
958 struct compiler_context
*ctx
= &ictx
;
962 /* Initialize at a global (not block) level hash tables */
963 ctx
->ssa_constants
= _mesa_hash_table_u64_create(NULL
);
964 ctx
->hash_to_temp
= _mesa_hash_table_u64_create(NULL
);
966 /* Assign actual uniform location, skipping over samplers */
967 ctx
->uniform_nir_to_bi
= _mesa_hash_table_u64_create(NULL
);
969 nir_foreach_variable(var
, &nir
->uniforms
) {
970 if (glsl_get_base_type(var
->type
) == GLSL_TYPE_SAMPLER
) continue;
972 for (int col
= 0; col
< glsl_get_matrix_columns(var
->type
); ++col
) {
973 int id
= ctx
->uniform_count
++;
974 _mesa_hash_table_u64_insert(ctx
->uniform_nir_to_bi
, var
->data
.driver_location
+ col
+ 1, (void *) ((uintptr_t) (id
+ 1)));
978 if (ctx
->stage
== MESA_SHADER_VERTEX
) {
979 ctx
->varying_nir_to_bi
= _mesa_hash_table_u64_create(NULL
);
980 nir_foreach_variable(var
, &nir
->outputs
) {
981 if (var
->data
.location
< VARYING_SLOT_VAR0
) {
982 if (var
->data
.location
== VARYING_SLOT_POS
)
983 ctx
->varying_count
++;
984 _mesa_hash_table_u64_insert(ctx
->varying_nir_to_bi
, var
->data
.driver_location
+ 1, (void *) ((uintptr_t) (1)));
989 for (int col
= 0; col
< glsl_get_matrix_columns(var
->type
); ++col
) {
990 for (int comp
= 0; comp
< 4; ++comp
) {
991 int id
= comp
+ ctx
->varying_count
++;
992 _mesa_hash_table_u64_insert(ctx
->varying_nir_to_bi
, var
->data
.driver_location
+ col
+ comp
+ 1, (void *) ((uintptr_t) (id
+ 1)));
997 } else if (ctx
->stage
== MESA_SHADER_FRAGMENT
) {
998 ctx
->outputs_nir_to_bi
= _mesa_hash_table_u64_create(NULL
);
999 nir_foreach_variable(var
, &nir
->outputs
) {
1000 if (var
->data
.location
>= FRAG_RESULT_DATA0
&& var
->data
.location
<= FRAG_RESULT_DATA7
) {
1001 int id
= ctx
->outputs_count
++;
1002 printf("Driver location: %d with id %d\n", var
->data
.location
+ 1, id
);
1003 _mesa_hash_table_u64_insert(ctx
->outputs_nir_to_bi
, var
->data
.location
+ 1, (void *) ((uintptr_t) (id
+ 1)));
1008 /* Optimisation passes */
1012 nir_print_shader(nir
, stdout
);
1015 /* Generate machine IR for shader */
1016 nir_foreach_function(func
, nir
) {
1019 nir_builder_init(ctx
->b
, func
->impl
);
1021 list_inithead(&ctx
->blocks
);
1022 ctx
->block_count
= 0;
1025 emit_cf_list(ctx
, &func
->impl
->body
);
1027 break; // XXX: Once we support multi function shaders then implement
1030 util_dynarray_init(&program
->compiled
, NULL
);
1032 // MIR pre-RA optimizations
1034 bool progress
= false;
1038 mir_foreach_block(ctx
, block
) {
1039 // XXX: Not yet working
1040 // progress |= bifrost_opt_branch_fusion(ctx, block);
1044 schedule_program(ctx
);
1047 nir_print_shader(nir
, stdout
);
1048 disassemble_bifrost(program
->compiled
.data
, program
->compiled
.size
, false);