2 * Copyright 2017 Red Hat Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Karol Herbst <kherbst@redhat.com>
25 #include "compiler/nir/nir.h"
27 #include "util/u_debug.h"
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_from_common.h"
31 #include "codegen/nv50_ir_lowering_helper.h"
32 #include "codegen/nv50_ir_util.h"
33 #include "tgsi/tgsi_from_mesa.h"
35 #if __cplusplus >= 201103L
36 #include <unordered_map>
38 #include <tr1/unordered_map>
46 #if __cplusplus >= 201103L
48 using std::unordered_map
;
51 using std::tr1::unordered_map
;
54 using namespace nv50_ir
;
57 type_size(const struct glsl_type
*type
, bool bindless
)
59 return glsl_count_attribute_slots(type
, false);
62 class Converter
: public ConverterCommon
65 Converter(Program
*, nir_shader
*, nv50_ir_prog_info
*);
69 typedef std::vector
<LValue
*> LValues
;
70 typedef unordered_map
<unsigned, LValues
> NirDefMap
;
71 typedef unordered_map
<unsigned, nir_load_const_instr
*> ImmediateMap
;
72 typedef unordered_map
<unsigned, uint32_t> NirArrayLMemOffsets
;
73 typedef unordered_map
<unsigned, BasicBlock
*> NirBlockMap
;
75 CacheMode
convert(enum gl_access_qualifier
);
76 TexTarget
convert(glsl_sampler_dim
, bool isArray
, bool isShadow
);
77 LValues
& convert(nir_alu_dest
*);
78 BasicBlock
* convert(nir_block
*);
79 LValues
& convert(nir_dest
*);
80 SVSemantic
convert(nir_intrinsic_op
);
81 Value
* convert(nir_load_const_instr
*, uint8_t);
82 LValues
& convert(nir_register
*);
83 LValues
& convert(nir_ssa_def
*);
85 Value
* getSrc(nir_alu_src
*, uint8_t component
= 0);
86 Value
* getSrc(nir_register
*, uint8_t);
87 Value
* getSrc(nir_src
*, uint8_t, bool indirect
= false);
88 Value
* getSrc(nir_ssa_def
*, uint8_t);
90 // returned value is the constant part of the given source (either the
91 // nir_src or the selected source component of an intrinsic). Even though
92 // this is mostly an optimization to be able to skip indirects in a few
93 // cases, sometimes we require immediate values or set some fileds on
94 // instructions (e.g. tex) in order for codegen to consume those.
95 // If the found value has not a constant part, the Value gets returned
96 // through the Value parameter.
97 uint32_t getIndirect(nir_src
*, uint8_t, Value
*&);
98 // isScalar indicates that the addressing is scalar, vec4 addressing is
100 uint32_t getIndirect(nir_intrinsic_instr
*, uint8_t s
, uint8_t c
, Value
*&,
101 bool isScalar
= false);
103 uint32_t getSlotAddress(nir_intrinsic_instr
*, uint8_t idx
, uint8_t slot
);
105 void setInterpolate(nv50_ir_varying
*,
110 Instruction
*loadFrom(DataFile
, uint8_t, DataType
, Value
*def
, uint32_t base
,
111 uint8_t c
, Value
*indirect0
= NULL
,
112 Value
*indirect1
= NULL
, bool patch
= false);
113 void storeTo(nir_intrinsic_instr
*, DataFile
, operation
, DataType
,
114 Value
*src
, uint8_t idx
, uint8_t c
, Value
*indirect0
= NULL
,
115 Value
*indirect1
= NULL
);
117 bool isFloatType(nir_alu_type
);
118 bool isSignedType(nir_alu_type
);
119 bool isResultFloat(nir_op
);
120 bool isResultSigned(nir_op
);
122 DataType
getDType(nir_alu_instr
*);
123 DataType
getDType(nir_intrinsic_instr
*);
124 DataType
getDType(nir_intrinsic_instr
*, bool isSigned
);
125 DataType
getDType(nir_op
, uint8_t);
127 std::vector
<DataType
> getSTypes(nir_alu_instr
*);
128 DataType
getSType(nir_src
&, bool isFloat
, bool isSigned
);
130 operation
getOperation(nir_intrinsic_op
);
131 operation
getOperation(nir_op
);
132 operation
getOperation(nir_texop
);
133 operation
preOperationNeeded(nir_op
);
135 int getSubOp(nir_intrinsic_op
);
136 int getSubOp(nir_op
);
138 CondCode
getCondCode(nir_op
);
143 bool visit(nir_alu_instr
*);
144 bool visit(nir_block
*);
145 bool visit(nir_cf_node
*);
146 bool visit(nir_deref_instr
*);
147 bool visit(nir_function
*);
148 bool visit(nir_if
*);
149 bool visit(nir_instr
*);
150 bool visit(nir_intrinsic_instr
*);
151 bool visit(nir_jump_instr
*);
152 bool visit(nir_load_const_instr
*);
153 bool visit(nir_loop
*);
154 bool visit(nir_ssa_undef_instr
*);
155 bool visit(nir_tex_instr
*);
158 Value
* applyProjection(Value
*src
, Value
*proj
);
159 unsigned int getNIRArgCount(TexInstruction::Target
&);
162 uint16_t handleDeref(nir_deref_instr
*, Value
* & indirect
, const nir_variable
* &);
163 CacheMode
getCacheModeFromVar(const nir_variable
*);
169 ImmediateMap immediates
;
170 NirArrayLMemOffsets regToLmemOffset
;
172 unsigned int curLoopDepth
;
176 Instruction
*immInsertPos
;
178 int clipVertexOutput
;
187 Converter::Converter(Program
*prog
, nir_shader
*nir
, nv50_ir_prog_info
*info
)
188 : ConverterCommon(prog
, info
),
193 zero
= mkImm((uint32_t)0);
197 Converter::convert(nir_block
*block
)
199 NirBlockMap::iterator it
= blocks
.find(block
->index
);
200 if (it
!= blocks
.end())
203 BasicBlock
*bb
= new BasicBlock(func
);
204 blocks
[block
->index
] = bb
;
209 Converter::isFloatType(nir_alu_type type
)
211 return nir_alu_type_get_base_type(type
) == nir_type_float
;
215 Converter::isSignedType(nir_alu_type type
)
217 return nir_alu_type_get_base_type(type
) == nir_type_int
;
221 Converter::isResultFloat(nir_op op
)
223 const nir_op_info
&info
= nir_op_infos
[op
];
224 if (info
.output_type
!= nir_type_invalid
)
225 return isFloatType(info
.output_type
);
227 ERROR("isResultFloat not implemented for %s\n", nir_op_infos
[op
].name
);
233 Converter::isResultSigned(nir_op op
)
236 // there is no umul and we get wrong results if we treat all muls as signed
241 const nir_op_info
&info
= nir_op_infos
[op
];
242 if (info
.output_type
!= nir_type_invalid
)
243 return isSignedType(info
.output_type
);
244 ERROR("isResultSigned not implemented for %s\n", nir_op_infos
[op
].name
);
251 Converter::getDType(nir_alu_instr
*insn
)
253 if (insn
->dest
.dest
.is_ssa
)
254 return getDType(insn
->op
, insn
->dest
.dest
.ssa
.bit_size
);
256 return getDType(insn
->op
, insn
->dest
.dest
.reg
.reg
->bit_size
);
260 Converter::getDType(nir_intrinsic_instr
*insn
)
263 switch (insn
->intrinsic
) {
264 case nir_intrinsic_shared_atomic_imax
:
265 case nir_intrinsic_shared_atomic_imin
:
266 case nir_intrinsic_ssbo_atomic_imax
:
267 case nir_intrinsic_ssbo_atomic_imin
:
275 return getDType(insn
, isSigned
);
279 Converter::getDType(nir_intrinsic_instr
*insn
, bool isSigned
)
281 if (insn
->dest
.is_ssa
)
282 return typeOfSize(insn
->dest
.ssa
.bit_size
/ 8, false, isSigned
);
284 return typeOfSize(insn
->dest
.reg
.reg
->bit_size
/ 8, false, isSigned
);
288 Converter::getDType(nir_op op
, uint8_t bitSize
)
290 DataType ty
= typeOfSize(bitSize
/ 8, isResultFloat(op
), isResultSigned(op
));
291 if (ty
== TYPE_NONE
) {
292 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos
[op
].name
, bitSize
);
298 std::vector
<DataType
>
299 Converter::getSTypes(nir_alu_instr
*insn
)
301 const nir_op_info
&info
= nir_op_infos
[insn
->op
];
302 std::vector
<DataType
> res(info
.num_inputs
);
304 for (uint8_t i
= 0; i
< info
.num_inputs
; ++i
) {
305 if (info
.input_types
[i
] != nir_type_invalid
) {
306 res
[i
] = getSType(insn
->src
[i
].src
, isFloatType(info
.input_types
[i
]), isSignedType(info
.input_types
[i
]));
308 ERROR("getSType not implemented for %s idx %u\n", info
.name
, i
);
319 Converter::getSType(nir_src
&src
, bool isFloat
, bool isSigned
)
323 bitSize
= src
.ssa
->bit_size
;
325 bitSize
= src
.reg
.reg
->bit_size
;
327 DataType ty
= typeOfSize(bitSize
/ 8, isFloat
, isSigned
);
328 if (ty
== TYPE_NONE
) {
336 ERROR("couldn't get Type for %s with bitSize %u\n", str
, bitSize
);
343 Converter::getOperation(nir_op op
)
346 // basic ops with float and int variants
355 case nir_op_ifind_msb
:
356 case nir_op_ufind_msb
:
378 case nir_op_fddx_coarse
:
379 case nir_op_fddx_fine
:
382 case nir_op_fddy_coarse
:
383 case nir_op_fddy_fine
:
401 case nir_op_pack_64_2x32_split
:
415 case nir_op_imul_high
:
416 case nir_op_umul_high
:
458 ERROR("couldn't get operation for op %s\n", nir_op_infos
[op
].name
);
465 Converter::getOperation(nir_texop op
)
477 case nir_texop_txf_ms
:
483 case nir_texop_query_levels
:
484 case nir_texop_texture_samples
:
488 ERROR("couldn't get operation for nir_texop %u\n", op
);
495 Converter::getOperation(nir_intrinsic_op op
)
498 case nir_intrinsic_emit_vertex
:
500 case nir_intrinsic_end_primitive
:
502 case nir_intrinsic_bindless_image_atomic_add
:
503 case nir_intrinsic_image_atomic_add
:
504 case nir_intrinsic_image_deref_atomic_add
:
505 case nir_intrinsic_bindless_image_atomic_and
:
506 case nir_intrinsic_image_atomic_and
:
507 case nir_intrinsic_image_deref_atomic_and
:
508 case nir_intrinsic_bindless_image_atomic_comp_swap
:
509 case nir_intrinsic_image_atomic_comp_swap
:
510 case nir_intrinsic_image_deref_atomic_comp_swap
:
511 case nir_intrinsic_bindless_image_atomic_exchange
:
512 case nir_intrinsic_image_atomic_exchange
:
513 case nir_intrinsic_image_deref_atomic_exchange
:
514 case nir_intrinsic_bindless_image_atomic_imax
:
515 case nir_intrinsic_image_atomic_imax
:
516 case nir_intrinsic_image_deref_atomic_imax
:
517 case nir_intrinsic_bindless_image_atomic_umax
:
518 case nir_intrinsic_image_atomic_umax
:
519 case nir_intrinsic_image_deref_atomic_umax
:
520 case nir_intrinsic_bindless_image_atomic_imin
:
521 case nir_intrinsic_image_atomic_imin
:
522 case nir_intrinsic_image_deref_atomic_imin
:
523 case nir_intrinsic_bindless_image_atomic_umin
:
524 case nir_intrinsic_image_atomic_umin
:
525 case nir_intrinsic_image_deref_atomic_umin
:
526 case nir_intrinsic_bindless_image_atomic_or
:
527 case nir_intrinsic_image_atomic_or
:
528 case nir_intrinsic_image_deref_atomic_or
:
529 case nir_intrinsic_bindless_image_atomic_xor
:
530 case nir_intrinsic_image_atomic_xor
:
531 case nir_intrinsic_image_deref_atomic_xor
:
533 case nir_intrinsic_bindless_image_load
:
534 case nir_intrinsic_image_load
:
535 case nir_intrinsic_image_deref_load
:
537 case nir_intrinsic_bindless_image_samples
:
538 case nir_intrinsic_image_samples
:
539 case nir_intrinsic_image_deref_samples
:
540 case nir_intrinsic_bindless_image_size
:
541 case nir_intrinsic_image_size
:
542 case nir_intrinsic_image_deref_size
:
544 case nir_intrinsic_bindless_image_store
:
545 case nir_intrinsic_image_store
:
546 case nir_intrinsic_image_deref_store
:
549 ERROR("couldn't get operation for nir_intrinsic_op %u\n", op
);
556 Converter::preOperationNeeded(nir_op op
)
568 Converter::getSubOp(nir_op op
)
571 case nir_op_imul_high
:
572 case nir_op_umul_high
:
573 return NV50_IR_SUBOP_MUL_HIGH
;
577 return NV50_IR_SUBOP_SHIFT_WRAP
;
584 Converter::getSubOp(nir_intrinsic_op op
)
587 case nir_intrinsic_bindless_image_atomic_add
:
588 case nir_intrinsic_global_atomic_add
:
589 case nir_intrinsic_image_atomic_add
:
590 case nir_intrinsic_image_deref_atomic_add
:
591 case nir_intrinsic_shared_atomic_add
:
592 case nir_intrinsic_ssbo_atomic_add
:
593 return NV50_IR_SUBOP_ATOM_ADD
;
594 case nir_intrinsic_bindless_image_atomic_and
:
595 case nir_intrinsic_global_atomic_and
:
596 case nir_intrinsic_image_atomic_and
:
597 case nir_intrinsic_image_deref_atomic_and
:
598 case nir_intrinsic_shared_atomic_and
:
599 case nir_intrinsic_ssbo_atomic_and
:
600 return NV50_IR_SUBOP_ATOM_AND
;
601 case nir_intrinsic_bindless_image_atomic_comp_swap
:
602 case nir_intrinsic_global_atomic_comp_swap
:
603 case nir_intrinsic_image_atomic_comp_swap
:
604 case nir_intrinsic_image_deref_atomic_comp_swap
:
605 case nir_intrinsic_shared_atomic_comp_swap
:
606 case nir_intrinsic_ssbo_atomic_comp_swap
:
607 return NV50_IR_SUBOP_ATOM_CAS
;
608 case nir_intrinsic_bindless_image_atomic_exchange
:
609 case nir_intrinsic_global_atomic_exchange
:
610 case nir_intrinsic_image_atomic_exchange
:
611 case nir_intrinsic_image_deref_atomic_exchange
:
612 case nir_intrinsic_shared_atomic_exchange
:
613 case nir_intrinsic_ssbo_atomic_exchange
:
614 return NV50_IR_SUBOP_ATOM_EXCH
;
615 case nir_intrinsic_bindless_image_atomic_or
:
616 case nir_intrinsic_global_atomic_or
:
617 case nir_intrinsic_image_atomic_or
:
618 case nir_intrinsic_image_deref_atomic_or
:
619 case nir_intrinsic_shared_atomic_or
:
620 case nir_intrinsic_ssbo_atomic_or
:
621 return NV50_IR_SUBOP_ATOM_OR
;
622 case nir_intrinsic_bindless_image_atomic_imax
:
623 case nir_intrinsic_bindless_image_atomic_umax
:
624 case nir_intrinsic_global_atomic_imax
:
625 case nir_intrinsic_global_atomic_umax
:
626 case nir_intrinsic_image_atomic_imax
:
627 case nir_intrinsic_image_atomic_umax
:
628 case nir_intrinsic_image_deref_atomic_imax
:
629 case nir_intrinsic_image_deref_atomic_umax
:
630 case nir_intrinsic_shared_atomic_imax
:
631 case nir_intrinsic_shared_atomic_umax
:
632 case nir_intrinsic_ssbo_atomic_imax
:
633 case nir_intrinsic_ssbo_atomic_umax
:
634 return NV50_IR_SUBOP_ATOM_MAX
;
635 case nir_intrinsic_bindless_image_atomic_imin
:
636 case nir_intrinsic_bindless_image_atomic_umin
:
637 case nir_intrinsic_global_atomic_imin
:
638 case nir_intrinsic_global_atomic_umin
:
639 case nir_intrinsic_image_atomic_imin
:
640 case nir_intrinsic_image_atomic_umin
:
641 case nir_intrinsic_image_deref_atomic_imin
:
642 case nir_intrinsic_image_deref_atomic_umin
:
643 case nir_intrinsic_shared_atomic_imin
:
644 case nir_intrinsic_shared_atomic_umin
:
645 case nir_intrinsic_ssbo_atomic_imin
:
646 case nir_intrinsic_ssbo_atomic_umin
:
647 return NV50_IR_SUBOP_ATOM_MIN
;
648 case nir_intrinsic_bindless_image_atomic_xor
:
649 case nir_intrinsic_global_atomic_xor
:
650 case nir_intrinsic_image_atomic_xor
:
651 case nir_intrinsic_image_deref_atomic_xor
:
652 case nir_intrinsic_shared_atomic_xor
:
653 case nir_intrinsic_ssbo_atomic_xor
:
654 return NV50_IR_SUBOP_ATOM_XOR
;
656 case nir_intrinsic_group_memory_barrier
:
657 case nir_intrinsic_memory_barrier
:
658 case nir_intrinsic_memory_barrier_buffer
:
659 case nir_intrinsic_memory_barrier_image
:
660 return NV50_IR_SUBOP_MEMBAR(M
, GL
);
661 case nir_intrinsic_memory_barrier_shared
:
662 return NV50_IR_SUBOP_MEMBAR(M
, CTA
);
664 case nir_intrinsic_vote_all
:
665 return NV50_IR_SUBOP_VOTE_ALL
;
666 case nir_intrinsic_vote_any
:
667 return NV50_IR_SUBOP_VOTE_ANY
;
668 case nir_intrinsic_vote_ieq
:
669 return NV50_IR_SUBOP_VOTE_UNI
;
676 Converter::getCondCode(nir_op op
)
695 ERROR("couldn't get CondCode for op %s\n", nir_op_infos
[op
].name
);
702 Converter::convert(nir_alu_dest
*dest
)
704 return convert(&dest
->dest
);
708 Converter::convert(nir_dest
*dest
)
711 return convert(&dest
->ssa
);
712 if (dest
->reg
.indirect
) {
713 ERROR("no support for indirects.");
716 return convert(dest
->reg
.reg
);
720 Converter::convert(nir_register
*reg
)
722 NirDefMap::iterator it
= regDefs
.find(reg
->index
);
723 if (it
!= regDefs
.end())
726 LValues
newDef(reg
->num_components
);
727 for (uint8_t i
= 0; i
< reg
->num_components
; i
++)
728 newDef
[i
] = getScratch(std::max(4, reg
->bit_size
/ 8));
729 return regDefs
[reg
->index
] = newDef
;
733 Converter::convert(nir_ssa_def
*def
)
735 NirDefMap::iterator it
= ssaDefs
.find(def
->index
);
736 if (it
!= ssaDefs
.end())
739 LValues
newDef(def
->num_components
);
740 for (uint8_t i
= 0; i
< def
->num_components
; i
++)
741 newDef
[i
] = getSSA(std::max(4, def
->bit_size
/ 8));
742 return ssaDefs
[def
->index
] = newDef
;
746 Converter::getSrc(nir_alu_src
*src
, uint8_t component
)
748 if (src
->abs
|| src
->negate
) {
749 ERROR("modifiers currently not supported on nir_alu_src\n");
752 return getSrc(&src
->src
, src
->swizzle
[component
]);
756 Converter::getSrc(nir_register
*reg
, uint8_t idx
)
758 NirDefMap::iterator it
= regDefs
.find(reg
->index
);
759 if (it
== regDefs
.end())
760 return convert(reg
)[idx
];
761 return it
->second
[idx
];
765 Converter::getSrc(nir_src
*src
, uint8_t idx
, bool indirect
)
768 return getSrc(src
->ssa
, idx
);
770 if (src
->reg
.indirect
) {
772 return getSrc(src
->reg
.indirect
, idx
);
773 ERROR("no support for indirects.");
778 return getSrc(src
->reg
.reg
, idx
);
782 Converter::getSrc(nir_ssa_def
*src
, uint8_t idx
)
784 ImmediateMap::iterator iit
= immediates
.find(src
->index
);
785 if (iit
!= immediates
.end())
786 return convert((*iit
).second
, idx
);
788 NirDefMap::iterator it
= ssaDefs
.find(src
->index
);
789 if (it
== ssaDefs
.end()) {
790 ERROR("SSA value %u not found\n", src
->index
);
794 return it
->second
[idx
];
798 Converter::getIndirect(nir_src
*src
, uint8_t idx
, Value
*&indirect
)
800 nir_const_value
*offset
= nir_src_as_const_value(*src
);
804 return offset
[0].u32
;
807 indirect
= getSrc(src
, idx
, true);
812 Converter::getIndirect(nir_intrinsic_instr
*insn
, uint8_t s
, uint8_t c
, Value
*&indirect
, bool isScalar
)
814 int32_t idx
= nir_intrinsic_base(insn
) + getIndirect(&insn
->src
[s
], c
, indirect
);
815 if (indirect
&& !isScalar
)
816 indirect
= mkOp2v(OP_SHL
, TYPE_U32
, getSSA(4, FILE_ADDRESS
), indirect
, loadImm(NULL
, 4));
821 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot
, unsigned *name
, unsigned *index
)
823 assert(name
&& index
);
825 if (slot
>= VERT_ATTRIB_MAX
) {
826 ERROR("invalid varying slot %u\n", slot
);
831 if (slot
>= VERT_ATTRIB_GENERIC0
&&
832 slot
< VERT_ATTRIB_GENERIC0
+ VERT_ATTRIB_GENERIC_MAX
) {
833 *name
= TGSI_SEMANTIC_GENERIC
;
834 *index
= slot
- VERT_ATTRIB_GENERIC0
;
838 if (slot
>= VERT_ATTRIB_TEX0
&&
839 slot
< VERT_ATTRIB_TEX0
+ VERT_ATTRIB_TEX_MAX
) {
840 *name
= TGSI_SEMANTIC_TEXCOORD
;
841 *index
= slot
- VERT_ATTRIB_TEX0
;
846 case VERT_ATTRIB_COLOR0
:
847 *name
= TGSI_SEMANTIC_COLOR
;
850 case VERT_ATTRIB_COLOR1
:
851 *name
= TGSI_SEMANTIC_COLOR
;
854 case VERT_ATTRIB_EDGEFLAG
:
855 *name
= TGSI_SEMANTIC_EDGEFLAG
;
858 case VERT_ATTRIB_FOG
:
859 *name
= TGSI_SEMANTIC_FOG
;
862 case VERT_ATTRIB_NORMAL
:
863 *name
= TGSI_SEMANTIC_NORMAL
;
866 case VERT_ATTRIB_POS
:
867 *name
= TGSI_SEMANTIC_POSITION
;
870 case VERT_ATTRIB_POINT_SIZE
:
871 *name
= TGSI_SEMANTIC_PSIZE
;
875 ERROR("unknown vert attrib slot %u\n", slot
);
882 Converter::setInterpolate(nv50_ir_varying
*var
,
888 case INTERP_MODE_FLAT
:
891 case INTERP_MODE_NONE
:
892 if (semantic
== TGSI_SEMANTIC_COLOR
)
894 else if (semantic
== TGSI_SEMANTIC_POSITION
)
897 case INTERP_MODE_NOPERSPECTIVE
:
900 case INTERP_MODE_SMOOTH
:
903 var
->centroid
= centroid
;
907 calcSlots(const glsl_type
*type
, Program::Type stage
, const shader_info
&info
,
908 bool input
, const nir_variable
*var
)
910 if (!type
->is_array())
911 return type
->count_attribute_slots(false);
915 case Program::TYPE_GEOMETRY
:
916 slots
= type
->uniform_locations();
918 slots
/= info
.gs
.vertices_in
;
920 case Program::TYPE_TESSELLATION_CONTROL
:
921 case Program::TYPE_TESSELLATION_EVAL
:
922 // remove first dimension
923 if (var
->data
.patch
|| (!input
&& stage
== Program::TYPE_TESSELLATION_EVAL
))
924 slots
= type
->uniform_locations();
926 slots
= type
->fields
.array
->uniform_locations();
929 slots
= type
->count_attribute_slots(false);
936 bool Converter::assignSlots() {
940 info
->io
.viewportId
= -1;
942 info
->numOutputs
= 0;
944 // we have to fixup the uniform locations for arrays
945 unsigned numImages
= 0;
946 nir_foreach_variable(var
, &nir
->uniforms
) {
947 const glsl_type
*type
= var
->type
;
948 if (!type
->without_array()->is_image())
950 var
->data
.driver_location
= numImages
;
951 numImages
+= type
->is_array() ? type
->arrays_of_arrays_size() : 1;
954 info
->numSysVals
= 0;
955 for (uint8_t i
= 0; i
< SYSTEM_VALUE_MAX
; ++i
) {
956 if (!(nir
->info
.system_values_read
& 1ull << i
))
959 info
->sv
[info
->numSysVals
].sn
= tgsi_get_sysval_semantic(i
);
960 info
->sv
[info
->numSysVals
].si
= 0;
961 info
->sv
[info
->numSysVals
].input
= 0; // TODO inferSysValDirection(sn);
964 case SYSTEM_VALUE_INSTANCE_ID
:
965 info
->io
.instanceId
= info
->numSysVals
;
967 case SYSTEM_VALUE_TESS_LEVEL_INNER
:
968 case SYSTEM_VALUE_TESS_LEVEL_OUTER
:
969 info
->sv
[info
->numSysVals
].patch
= 1;
971 case SYSTEM_VALUE_VERTEX_ID
:
972 info
->io
.vertexId
= info
->numSysVals
;
978 info
->numSysVals
+= 1;
981 if (prog
->getType() == Program::TYPE_COMPUTE
)
984 nir_foreach_variable(var
, &nir
->inputs
) {
985 const glsl_type
*type
= var
->type
;
986 int slot
= var
->data
.location
;
987 uint16_t slots
= calcSlots(type
, prog
->getType(), nir
->info
, true, var
);
988 uint32_t comp
= type
->is_array() ? type
->without_array()->component_slots()
989 : type
->component_slots();
990 uint32_t frac
= var
->data
.location_frac
;
991 uint32_t vary
= var
->data
.driver_location
;
993 if (glsl_base_type_is_64bit(type
->without_array()->base_type
)) {
998 assert(vary
+ slots
<= PIPE_MAX_SHADER_INPUTS
);
1000 switch(prog
->getType()) {
1001 case Program::TYPE_FRAGMENT
:
1002 tgsi_get_gl_varying_semantic((gl_varying_slot
)slot
, true,
1004 for (uint16_t i
= 0; i
< slots
; ++i
) {
1005 setInterpolate(&info
->in
[vary
+ i
], var
->data
.interpolation
,
1006 var
->data
.centroid
| var
->data
.sample
, name
);
1009 case Program::TYPE_GEOMETRY
:
1010 tgsi_get_gl_varying_semantic((gl_varying_slot
)slot
, true,
1013 case Program::TYPE_TESSELLATION_CONTROL
:
1014 case Program::TYPE_TESSELLATION_EVAL
:
1015 tgsi_get_gl_varying_semantic((gl_varying_slot
)slot
, true,
1017 if (var
->data
.patch
&& name
== TGSI_SEMANTIC_PATCH
)
1018 info
->numPatchConstants
= MAX2(info
->numPatchConstants
, index
+ slots
);
1020 case Program::TYPE_VERTEX
:
1021 vert_attrib_to_tgsi_semantic((gl_vert_attrib
)slot
, &name
, &index
);
1023 case TGSI_SEMANTIC_EDGEFLAG
:
1024 info
->io
.edgeFlagIn
= vary
;
1031 ERROR("unknown shader type %u in assignSlots\n", prog
->getType());
1035 for (uint16_t i
= 0u; i
< slots
; ++i
, ++vary
) {
1036 info
->in
[vary
].id
= vary
;
1037 info
->in
[vary
].patch
= var
->data
.patch
;
1038 info
->in
[vary
].sn
= name
;
1039 info
->in
[vary
].si
= index
+ i
;
1040 if (glsl_base_type_is_64bit(type
->without_array()->base_type
))
1042 info
->in
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) >> 0x4);
1044 info
->in
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) & 0xf);
1046 info
->in
[vary
].mask
|= ((1 << comp
) - 1) << frac
;
1048 info
->numInputs
= std::max
<uint8_t>(info
->numInputs
, vary
);
1051 nir_foreach_variable(var
, &nir
->outputs
) {
1052 const glsl_type
*type
= var
->type
;
1053 int slot
= var
->data
.location
;
1054 uint16_t slots
= calcSlots(type
, prog
->getType(), nir
->info
, false, var
);
1055 uint32_t comp
= type
->is_array() ? type
->without_array()->component_slots()
1056 : type
->component_slots();
1057 uint32_t frac
= var
->data
.location_frac
;
1058 uint32_t vary
= var
->data
.driver_location
;
1060 if (glsl_base_type_is_64bit(type
->without_array()->base_type
)) {
1065 assert(vary
< PIPE_MAX_SHADER_OUTPUTS
);
1067 switch(prog
->getType()) {
1068 case Program::TYPE_FRAGMENT
:
1069 tgsi_get_gl_frag_result_semantic((gl_frag_result
)slot
, &name
, &index
);
1071 case TGSI_SEMANTIC_COLOR
:
1072 if (!var
->data
.fb_fetch_output
)
1073 info
->prop
.fp
.numColourResults
++;
1075 if (var
->data
.location
== FRAG_RESULT_COLOR
&&
1076 nir
->info
.outputs_written
& BITFIELD64_BIT(var
->data
.location
))
1077 info
->prop
.fp
.separateFragData
= true;
1079 // sometimes we get FRAG_RESULT_DATAX with data.index 0
1080 // sometimes we get FRAG_RESULT_DATA0 with data.index X
1081 index
= index
== 0 ? var
->data
.index
: index
;
1083 case TGSI_SEMANTIC_POSITION
:
1084 info
->io
.fragDepth
= vary
;
1085 info
->prop
.fp
.writesDepth
= true;
1087 case TGSI_SEMANTIC_SAMPLEMASK
:
1088 info
->io
.sampleMask
= vary
;
1094 case Program::TYPE_GEOMETRY
:
1095 case Program::TYPE_TESSELLATION_CONTROL
:
1096 case Program::TYPE_TESSELLATION_EVAL
:
1097 case Program::TYPE_VERTEX
:
1098 tgsi_get_gl_varying_semantic((gl_varying_slot
)slot
, true,
1101 if (var
->data
.patch
&& name
!= TGSI_SEMANTIC_TESSINNER
&&
1102 name
!= TGSI_SEMANTIC_TESSOUTER
)
1103 info
->numPatchConstants
= MAX2(info
->numPatchConstants
, index
+ slots
);
1106 case TGSI_SEMANTIC_CLIPDIST
:
1107 info
->io
.genUserClip
= -1;
1109 case TGSI_SEMANTIC_CLIPVERTEX
:
1110 clipVertexOutput
= vary
;
1112 case TGSI_SEMANTIC_EDGEFLAG
:
1113 info
->io
.edgeFlagOut
= vary
;
1115 case TGSI_SEMANTIC_POSITION
:
1116 if (clipVertexOutput
< 0)
1117 clipVertexOutput
= vary
;
1124 ERROR("unknown shader type %u in assignSlots\n", prog
->getType());
1128 for (uint16_t i
= 0u; i
< slots
; ++i
, ++vary
) {
1129 info
->out
[vary
].id
= vary
;
1130 info
->out
[vary
].patch
= var
->data
.patch
;
1131 info
->out
[vary
].sn
= name
;
1132 info
->out
[vary
].si
= index
+ i
;
1133 if (glsl_base_type_is_64bit(type
->without_array()->base_type
))
1135 info
->out
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) >> 0x4);
1137 info
->out
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) & 0xf);
1139 info
->out
[vary
].mask
|= ((1 << comp
) - 1) << frac
;
1141 if (nir
->info
.outputs_read
& 1ull << slot
)
1142 info
->out
[vary
].oread
= 1;
1144 info
->numOutputs
= std::max
<uint8_t>(info
->numOutputs
, vary
);
1147 if (info
->io
.genUserClip
> 0) {
1148 info
->io
.clipDistances
= info
->io
.genUserClip
;
1150 const unsigned int nOut
= (info
->io
.genUserClip
+ 3) / 4;
1152 for (unsigned int n
= 0; n
< nOut
; ++n
) {
1153 unsigned int i
= info
->numOutputs
++;
1154 info
->out
[i
].id
= i
;
1155 info
->out
[i
].sn
= TGSI_SEMANTIC_CLIPDIST
;
1156 info
->out
[i
].si
= n
;
1157 info
->out
[i
].mask
= ((1 << info
->io
.clipDistances
) - 1) >> (n
* 4);
1161 return info
->assignSlots(info
) == 0;
1165 Converter::getSlotAddress(nir_intrinsic_instr
*insn
, uint8_t idx
, uint8_t slot
)
1168 int offset
= nir_intrinsic_component(insn
);
1171 if (nir_intrinsic_infos
[insn
->intrinsic
].has_dest
)
1172 ty
= getDType(insn
);
1174 ty
= getSType(insn
->src
[0], false, false);
1176 switch (insn
->intrinsic
) {
1177 case nir_intrinsic_load_input
:
1178 case nir_intrinsic_load_interpolated_input
:
1179 case nir_intrinsic_load_per_vertex_input
:
1182 case nir_intrinsic_load_output
:
1183 case nir_intrinsic_load_per_vertex_output
:
1184 case nir_intrinsic_store_output
:
1185 case nir_intrinsic_store_per_vertex_output
:
1189 ERROR("unknown intrinsic in getSlotAddress %s",
1190 nir_intrinsic_infos
[insn
->intrinsic
].name
);
1196 if (typeSizeof(ty
) == 8) {
1208 assert(!input
|| idx
< PIPE_MAX_SHADER_INPUTS
);
1209 assert(input
|| idx
< PIPE_MAX_SHADER_OUTPUTS
);
1211 const nv50_ir_varying
*vary
= input
? info
->in
: info
->out
;
1212 return vary
[idx
].slot
[slot
] * 4;
1216 Converter::loadFrom(DataFile file
, uint8_t i
, DataType ty
, Value
*def
,
1217 uint32_t base
, uint8_t c
, Value
*indirect0
,
1218 Value
*indirect1
, bool patch
)
1220 unsigned int tySize
= typeSizeof(ty
);
1223 (file
== FILE_MEMORY_CONST
|| file
== FILE_MEMORY_BUFFER
|| indirect0
)) {
1224 Value
*lo
= getSSA();
1225 Value
*hi
= getSSA();
1228 mkLoad(TYPE_U32
, lo
,
1229 mkSymbol(file
, i
, TYPE_U32
, base
+ c
* tySize
),
1231 loi
->setIndirect(0, 1, indirect1
);
1232 loi
->perPatch
= patch
;
1235 mkLoad(TYPE_U32
, hi
,
1236 mkSymbol(file
, i
, TYPE_U32
, base
+ c
* tySize
+ 4),
1238 hii
->setIndirect(0, 1, indirect1
);
1239 hii
->perPatch
= patch
;
1241 return mkOp2(OP_MERGE
, ty
, def
, lo
, hi
);
1244 mkLoad(ty
, def
, mkSymbol(file
, i
, ty
, base
+ c
* tySize
), indirect0
);
1245 ld
->setIndirect(0, 1, indirect1
);
1246 ld
->perPatch
= patch
;
1252 Converter::storeTo(nir_intrinsic_instr
*insn
, DataFile file
, operation op
,
1253 DataType ty
, Value
*src
, uint8_t idx
, uint8_t c
,
1254 Value
*indirect0
, Value
*indirect1
)
1256 uint8_t size
= typeSizeof(ty
);
1257 uint32_t address
= getSlotAddress(insn
, idx
, c
);
1259 if (size
== 8 && indirect0
) {
1261 mkSplit(split
, 4, src
);
1263 if (op
== OP_EXPORT
) {
1264 split
[0] = mkMov(getSSA(), split
[0], ty
)->getDef(0);
1265 split
[1] = mkMov(getSSA(), split
[1], ty
)->getDef(0);
1268 mkStore(op
, TYPE_U32
, mkSymbol(file
, 0, TYPE_U32
, address
), indirect0
,
1269 split
[0])->perPatch
= info
->out
[idx
].patch
;
1270 mkStore(op
, TYPE_U32
, mkSymbol(file
, 0, TYPE_U32
, address
+ 4), indirect0
,
1271 split
[1])->perPatch
= info
->out
[idx
].patch
;
1273 if (op
== OP_EXPORT
)
1274 src
= mkMov(getSSA(size
), src
, ty
)->getDef(0);
1275 mkStore(op
, ty
, mkSymbol(file
, 0, ty
, address
), indirect0
,
1276 src
)->perPatch
= info
->out
[idx
].patch
;
1281 Converter::parseNIR()
1283 info
->bin
.tlsSpace
= 0;
1284 info
->io
.clipDistances
= nir
->info
.clip_distance_array_size
;
1285 info
->io
.cullDistances
= nir
->info
.cull_distance_array_size
;
1287 switch(prog
->getType()) {
1288 case Program::TYPE_COMPUTE
:
1289 info
->prop
.cp
.numThreads
[0] = nir
->info
.cs
.local_size
[0];
1290 info
->prop
.cp
.numThreads
[1] = nir
->info
.cs
.local_size
[1];
1291 info
->prop
.cp
.numThreads
[2] = nir
->info
.cs
.local_size
[2];
1292 info
->bin
.smemSize
= nir
->info
.cs
.shared_size
;
1294 case Program::TYPE_FRAGMENT
:
1295 info
->prop
.fp
.earlyFragTests
= nir
->info
.fs
.early_fragment_tests
;
1296 info
->prop
.fp
.persampleInvocation
=
1297 (nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_ID
) ||
1298 (nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_POS
);
1299 info
->prop
.fp
.postDepthCoverage
= nir
->info
.fs
.post_depth_coverage
;
1300 info
->prop
.fp
.readsSampleLocations
=
1301 (nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_POS
);
1302 info
->prop
.fp
.usesDiscard
= nir
->info
.fs
.uses_discard
;
1303 info
->prop
.fp
.usesSampleMaskIn
=
1304 !!(nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_MASK_IN
);
1306 case Program::TYPE_GEOMETRY
:
1307 info
->prop
.gp
.inputPrim
= nir
->info
.gs
.input_primitive
;
1308 info
->prop
.gp
.instanceCount
= nir
->info
.gs
.invocations
;
1309 info
->prop
.gp
.maxVertices
= nir
->info
.gs
.vertices_out
;
1310 info
->prop
.gp
.outputPrim
= nir
->info
.gs
.output_primitive
;
1312 case Program::TYPE_TESSELLATION_CONTROL
:
1313 case Program::TYPE_TESSELLATION_EVAL
:
1314 if (nir
->info
.tess
.primitive_mode
== GL_ISOLINES
)
1315 info
->prop
.tp
.domain
= GL_LINES
;
1317 info
->prop
.tp
.domain
= nir
->info
.tess
.primitive_mode
;
1318 info
->prop
.tp
.outputPatchSize
= nir
->info
.tess
.tcs_vertices_out
;
1319 info
->prop
.tp
.outputPrim
=
1320 nir
->info
.tess
.point_mode
? PIPE_PRIM_POINTS
: PIPE_PRIM_TRIANGLES
;
1321 info
->prop
.tp
.partitioning
= (nir
->info
.tess
.spacing
+ 1) % 3;
1322 info
->prop
.tp
.winding
= !nir
->info
.tess
.ccw
;
1324 case Program::TYPE_VERTEX
:
1325 info
->prop
.vp
.usesDrawParameters
=
1326 (nir
->info
.system_values_read
& BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX
)) ||
1327 (nir
->info
.system_values_read
& BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE
)) ||
1328 (nir
->info
.system_values_read
& BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID
));
1338 Converter::visit(nir_function
*function
)
1340 assert(function
->impl
);
1342 // usually the blocks will set everything up, but main is special
1343 BasicBlock
*entry
= new BasicBlock(prog
->main
);
1344 exit
= new BasicBlock(prog
->main
);
1345 blocks
[nir_start_block(function
->impl
)->index
] = entry
;
1346 prog
->main
->setEntry(entry
);
1347 prog
->main
->setExit(exit
);
1349 setPosition(entry
, true);
1351 if (info
->io
.genUserClip
> 0) {
1352 for (int c
= 0; c
< 4; ++c
)
1353 clipVtx
[c
] = getScratch();
1356 switch (prog
->getType()) {
1357 case Program::TYPE_TESSELLATION_CONTROL
:
1359 OP_SUB
, TYPE_U32
, getSSA(),
1360 mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_LANEID
, 0)),
1361 mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_INVOCATION_ID
, 0)));
1363 case Program::TYPE_FRAGMENT
: {
1364 Symbol
*sv
= mkSysVal(SV_POSITION
, 3);
1365 fragCoord
[3] = mkOp1v(OP_RDSV
, TYPE_F32
, getSSA(), sv
);
1366 fp
.position
= mkOp1v(OP_RCP
, TYPE_F32
, fragCoord
[3], fragCoord
[3]);
1373 nir_foreach_register(reg
, &function
->impl
->registers
) {
1374 if (reg
->num_array_elems
) {
1375 // TODO: packed variables would be nice, but MemoryOpt fails
1376 // replace 4 with reg->num_components
1377 uint32_t size
= 4 * reg
->num_array_elems
* (reg
->bit_size
/ 8);
1378 regToLmemOffset
[reg
->index
] = info
->bin
.tlsSpace
;
1379 info
->bin
.tlsSpace
+= size
;
1383 nir_index_ssa_defs(function
->impl
);
1384 foreach_list_typed(nir_cf_node
, node
, node
, &function
->impl
->body
) {
1389 bb
->cfg
.attach(&exit
->cfg
, Graph::Edge::TREE
);
1390 setPosition(exit
, true);
1392 if ((prog
->getType() == Program::TYPE_VERTEX
||
1393 prog
->getType() == Program::TYPE_TESSELLATION_EVAL
)
1394 && info
->io
.genUserClip
> 0)
1395 handleUserClipPlanes();
1397 // TODO: for non main function this needs to be a OP_RETURN
1398 mkOp(OP_EXIT
, TYPE_NONE
, NULL
)->terminator
= 1;
1403 Converter::visit(nir_cf_node
*node
)
1405 switch (node
->type
) {
1406 case nir_cf_node_block
:
1407 return visit(nir_cf_node_as_block(node
));
1408 case nir_cf_node_if
:
1409 return visit(nir_cf_node_as_if(node
));
1410 case nir_cf_node_loop
:
1411 return visit(nir_cf_node_as_loop(node
));
1413 ERROR("unknown nir_cf_node type %u\n", node
->type
);
1419 Converter::visit(nir_block
*block
)
1421 if (!block
->predecessors
->entries
&& block
->instr_list
.is_empty())
1424 BasicBlock
*bb
= convert(block
);
1426 setPosition(bb
, true);
1427 nir_foreach_instr(insn
, block
) {
1435 Converter::visit(nir_if
*nif
)
1437 DataType sType
= getSType(nif
->condition
, false, false);
1438 Value
*src
= getSrc(&nif
->condition
, 0);
1440 nir_block
*lastThen
= nir_if_last_then_block(nif
);
1441 nir_block
*lastElse
= nir_if_last_else_block(nif
);
1443 assert(!lastThen
->successors
[1]);
1444 assert(!lastElse
->successors
[1]);
1446 BasicBlock
*ifBB
= convert(nir_if_first_then_block(nif
));
1447 BasicBlock
*elseBB
= convert(nir_if_first_else_block(nif
));
1449 bb
->cfg
.attach(&ifBB
->cfg
, Graph::Edge::TREE
);
1450 bb
->cfg
.attach(&elseBB
->cfg
, Graph::Edge::TREE
);
1452 // we only insert joinats, if both nodes end up at the end of the if again.
1453 // the reason for this to not happens are breaks/continues/ret/... which
1454 // have their own handling
1455 if (lastThen
->successors
[0] == lastElse
->successors
[0])
1456 bb
->joinAt
= mkFlow(OP_JOINAT
, convert(lastThen
->successors
[0]),
1459 mkFlow(OP_BRA
, elseBB
, CC_EQ
, src
)->setType(sType
);
1461 foreach_list_typed(nir_cf_node
, node
, node
, &nif
->then_list
) {
1465 setPosition(convert(lastThen
), true);
1466 if (!bb
->getExit() ||
1467 !bb
->getExit()->asFlow() ||
1468 bb
->getExit()->asFlow()->op
== OP_JOIN
) {
1469 BasicBlock
*tailBB
= convert(lastThen
->successors
[0]);
1470 mkFlow(OP_BRA
, tailBB
, CC_ALWAYS
, NULL
);
1471 bb
->cfg
.attach(&tailBB
->cfg
, Graph::Edge::FORWARD
);
1474 foreach_list_typed(nir_cf_node
, node
, node
, &nif
->else_list
) {
1478 setPosition(convert(lastElse
), true);
1479 if (!bb
->getExit() ||
1480 !bb
->getExit()->asFlow() ||
1481 bb
->getExit()->asFlow()->op
== OP_JOIN
) {
1482 BasicBlock
*tailBB
= convert(lastElse
->successors
[0]);
1483 mkFlow(OP_BRA
, tailBB
, CC_ALWAYS
, NULL
);
1484 bb
->cfg
.attach(&tailBB
->cfg
, Graph::Edge::FORWARD
);
1487 if (lastThen
->successors
[0] == lastElse
->successors
[0]) {
1488 setPosition(convert(lastThen
->successors
[0]), true);
1489 mkFlow(OP_JOIN
, NULL
, CC_ALWAYS
, NULL
)->fixed
= 1;
1496 Converter::visit(nir_loop
*loop
)
1499 func
->loopNestingBound
= std::max(func
->loopNestingBound
, curLoopDepth
);
1501 BasicBlock
*loopBB
= convert(nir_loop_first_block(loop
));
1502 BasicBlock
*tailBB
=
1503 convert(nir_cf_node_as_block(nir_cf_node_next(&loop
->cf_node
)));
1504 bb
->cfg
.attach(&loopBB
->cfg
, Graph::Edge::TREE
);
1506 mkFlow(OP_PREBREAK
, tailBB
, CC_ALWAYS
, NULL
);
1507 setPosition(loopBB
, false);
1508 mkFlow(OP_PRECONT
, loopBB
, CC_ALWAYS
, NULL
);
1510 foreach_list_typed(nir_cf_node
, node
, node
, &loop
->body
) {
1514 Instruction
*insn
= bb
->getExit();
1515 if (bb
->cfg
.incidentCount() != 0) {
1516 if (!insn
|| !insn
->asFlow()) {
1517 mkFlow(OP_CONT
, loopBB
, CC_ALWAYS
, NULL
);
1518 bb
->cfg
.attach(&loopBB
->cfg
, Graph::Edge::BACK
);
1519 } else if (insn
&& insn
->op
== OP_BRA
&& !insn
->getPredicate() &&
1520 tailBB
->cfg
.incidentCount() == 0) {
1521 // RA doesn't like having blocks around with no incident edge,
1522 // so we create a fake one to make it happy
1523 bb
->cfg
.attach(&tailBB
->cfg
, Graph::Edge::TREE
);
1533 Converter::visit(nir_instr
*insn
)
1535 // we need an insertion point for on the fly generated immediate loads
1536 immInsertPos
= bb
->getExit();
1537 switch (insn
->type
) {
1538 case nir_instr_type_alu
:
1539 return visit(nir_instr_as_alu(insn
));
1540 case nir_instr_type_deref
:
1541 return visit(nir_instr_as_deref(insn
));
1542 case nir_instr_type_intrinsic
:
1543 return visit(nir_instr_as_intrinsic(insn
));
1544 case nir_instr_type_jump
:
1545 return visit(nir_instr_as_jump(insn
));
1546 case nir_instr_type_load_const
:
1547 return visit(nir_instr_as_load_const(insn
));
1548 case nir_instr_type_ssa_undef
:
1549 return visit(nir_instr_as_ssa_undef(insn
));
1550 case nir_instr_type_tex
:
1551 return visit(nir_instr_as_tex(insn
));
1553 ERROR("unknown nir_instr type %u\n", insn
->type
);
1560 Converter::convert(nir_intrinsic_op intr
)
1563 case nir_intrinsic_load_base_vertex
:
1564 return SV_BASEVERTEX
;
1565 case nir_intrinsic_load_base_instance
:
1566 return SV_BASEINSTANCE
;
1567 case nir_intrinsic_load_draw_id
:
1569 case nir_intrinsic_load_front_face
:
1571 case nir_intrinsic_load_helper_invocation
:
1572 return SV_THREAD_KILL
;
1573 case nir_intrinsic_load_instance_id
:
1574 return SV_INSTANCE_ID
;
1575 case nir_intrinsic_load_invocation_id
:
1576 return SV_INVOCATION_ID
;
1577 case nir_intrinsic_load_local_group_size
:
1579 case nir_intrinsic_load_local_invocation_id
:
1581 case nir_intrinsic_load_num_work_groups
:
1583 case nir_intrinsic_load_patch_vertices_in
:
1584 return SV_VERTEX_COUNT
;
1585 case nir_intrinsic_load_primitive_id
:
1586 return SV_PRIMITIVE_ID
;
1587 case nir_intrinsic_load_sample_id
:
1588 return SV_SAMPLE_INDEX
;
1589 case nir_intrinsic_load_sample_mask_in
:
1590 return SV_SAMPLE_MASK
;
1591 case nir_intrinsic_load_sample_pos
:
1592 return SV_SAMPLE_POS
;
1593 case nir_intrinsic_load_subgroup_eq_mask
:
1594 return SV_LANEMASK_EQ
;
1595 case nir_intrinsic_load_subgroup_ge_mask
:
1596 return SV_LANEMASK_GE
;
1597 case nir_intrinsic_load_subgroup_gt_mask
:
1598 return SV_LANEMASK_GT
;
1599 case nir_intrinsic_load_subgroup_le_mask
:
1600 return SV_LANEMASK_LE
;
1601 case nir_intrinsic_load_subgroup_lt_mask
:
1602 return SV_LANEMASK_LT
;
1603 case nir_intrinsic_load_subgroup_invocation
:
1605 case nir_intrinsic_load_tess_coord
:
1606 return SV_TESS_COORD
;
1607 case nir_intrinsic_load_tess_level_inner
:
1608 return SV_TESS_INNER
;
1609 case nir_intrinsic_load_tess_level_outer
:
1610 return SV_TESS_OUTER
;
1611 case nir_intrinsic_load_vertex_id
:
1612 return SV_VERTEX_ID
;
1613 case nir_intrinsic_load_work_group_id
:
1616 ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1617 nir_intrinsic_infos
[intr
].name
);
1624 Converter::visit(nir_intrinsic_instr
*insn
)
1626 nir_intrinsic_op op
= insn
->intrinsic
;
1627 const nir_intrinsic_info
&opInfo
= nir_intrinsic_infos
[op
];
1628 unsigned dest_components
= nir_intrinsic_dest_components(insn
);
1631 case nir_intrinsic_load_uniform
: {
1632 LValues
&newDefs
= convert(&insn
->dest
);
1633 const DataType dType
= getDType(insn
);
1635 uint32_t coffset
= getIndirect(insn
, 0, 0, indirect
);
1636 for (uint8_t i
= 0; i
< dest_components
; ++i
) {
1637 loadFrom(FILE_MEMORY_CONST
, 0, dType
, newDefs
[i
], 16 * coffset
, i
, indirect
);
1641 case nir_intrinsic_store_output
:
1642 case nir_intrinsic_store_per_vertex_output
: {
1644 DataType dType
= getSType(insn
->src
[0], false, false);
1645 uint32_t idx
= getIndirect(insn
, op
== nir_intrinsic_store_output
? 1 : 2, 0, indirect
);
1647 for (uint8_t i
= 0u; i
< nir_intrinsic_src_components(insn
, 0); ++i
) {
1648 if (!((1u << i
) & nir_intrinsic_write_mask(insn
)))
1652 Value
*src
= getSrc(&insn
->src
[0], i
);
1653 switch (prog
->getType()) {
1654 case Program::TYPE_FRAGMENT
: {
1655 if (info
->out
[idx
].sn
== TGSI_SEMANTIC_POSITION
) {
1656 // TGSI uses a different interface than NIR, TGSI stores that
1657 // value in the z component, NIR in X
1659 src
= mkOp1v(OP_SAT
, TYPE_F32
, getScratch(), src
);
1663 case Program::TYPE_GEOMETRY
:
1664 case Program::TYPE_VERTEX
: {
1665 if (info
->io
.genUserClip
> 0 && idx
== (uint32_t)clipVertexOutput
) {
1666 mkMov(clipVtx
[i
], src
);
1675 storeTo(insn
, FILE_SHADER_OUTPUT
, OP_EXPORT
, dType
, src
, idx
, i
+ offset
, indirect
);
1679 case nir_intrinsic_load_input
:
1680 case nir_intrinsic_load_interpolated_input
:
1681 case nir_intrinsic_load_output
: {
1682 LValues
&newDefs
= convert(&insn
->dest
);
1685 if (prog
->getType() == Program::TYPE_FRAGMENT
&&
1686 op
== nir_intrinsic_load_output
) {
1687 std::vector
<Value
*> defs
, srcs
;
1690 srcs
.push_back(getSSA());
1691 srcs
.push_back(getSSA());
1692 Value
*x
= mkOp1v(OP_RDSV
, TYPE_F32
, getSSA(), mkSysVal(SV_POSITION
, 0));
1693 Value
*y
= mkOp1v(OP_RDSV
, TYPE_F32
, getSSA(), mkSysVal(SV_POSITION
, 1));
1694 mkCvt(OP_CVT
, TYPE_U32
, srcs
[0], TYPE_F32
, x
)->rnd
= ROUND_Z
;
1695 mkCvt(OP_CVT
, TYPE_U32
, srcs
[1], TYPE_F32
, y
)->rnd
= ROUND_Z
;
1697 srcs
.push_back(mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_LAYER
, 0)));
1698 srcs
.push_back(mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_SAMPLE_INDEX
, 0)));
1700 for (uint8_t i
= 0u; i
< dest_components
; ++i
) {
1701 defs
.push_back(newDefs
[i
]);
1705 TexInstruction
*texi
= mkTex(OP_TXF
, TEX_TARGET_2D_MS_ARRAY
, 0, 0, defs
, srcs
);
1706 texi
->tex
.levelZero
= 1;
1707 texi
->tex
.mask
= mask
;
1708 texi
->tex
.useOffsets
= 0;
1709 texi
->tex
.r
= 0xffff;
1710 texi
->tex
.s
= 0xffff;
1712 info
->prop
.fp
.readsFramebuffer
= true;
1716 const DataType dType
= getDType(insn
);
1718 bool input
= op
!= nir_intrinsic_load_output
;
1722 uint32_t idx
= getIndirect(insn
, op
== nir_intrinsic_load_interpolated_input
? 1 : 0, 0, indirect
);
1723 nv50_ir_varying
& vary
= input
? info
->in
[idx
] : info
->out
[idx
];
1725 // see load_barycentric_* handling
1726 if (prog
->getType() == Program::TYPE_FRAGMENT
) {
1727 mode
= translateInterpMode(&vary
, nvirOp
);
1728 if (op
== nir_intrinsic_load_interpolated_input
) {
1729 ImmediateValue immMode
;
1730 if (getSrc(&insn
->src
[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode
))
1731 mode
|= immMode
.reg
.data
.u32
;
1735 for (uint8_t i
= 0u; i
< dest_components
; ++i
) {
1736 uint32_t address
= getSlotAddress(insn
, idx
, i
);
1737 Symbol
*sym
= mkSymbol(input
? FILE_SHADER_INPUT
: FILE_SHADER_OUTPUT
, 0, dType
, address
);
1738 if (prog
->getType() == Program::TYPE_FRAGMENT
) {
1740 if (typeSizeof(dType
) == 8) {
1741 Value
*lo
= getSSA();
1742 Value
*hi
= getSSA();
1743 Instruction
*interp
;
1745 interp
= mkOp1(nvirOp
, TYPE_U32
, lo
, sym
);
1746 if (nvirOp
== OP_PINTERP
)
1747 interp
->setSrc(s
++, fp
.position
);
1748 if (mode
& NV50_IR_INTERP_OFFSET
)
1749 interp
->setSrc(s
++, getSrc(&insn
->src
[0], 0));
1750 interp
->setInterpolate(mode
);
1751 interp
->setIndirect(0, 0, indirect
);
1753 Symbol
*sym1
= mkSymbol(input
? FILE_SHADER_INPUT
: FILE_SHADER_OUTPUT
, 0, dType
, address
+ 4);
1754 interp
= mkOp1(nvirOp
, TYPE_U32
, hi
, sym1
);
1755 if (nvirOp
== OP_PINTERP
)
1756 interp
->setSrc(s
++, fp
.position
);
1757 if (mode
& NV50_IR_INTERP_OFFSET
)
1758 interp
->setSrc(s
++, getSrc(&insn
->src
[0], 0));
1759 interp
->setInterpolate(mode
);
1760 interp
->setIndirect(0, 0, indirect
);
1762 mkOp2(OP_MERGE
, dType
, newDefs
[i
], lo
, hi
);
1764 Instruction
*interp
= mkOp1(nvirOp
, dType
, newDefs
[i
], sym
);
1765 if (nvirOp
== OP_PINTERP
)
1766 interp
->setSrc(s
++, fp
.position
);
1767 if (mode
& NV50_IR_INTERP_OFFSET
)
1768 interp
->setSrc(s
++, getSrc(&insn
->src
[0], 0));
1769 interp
->setInterpolate(mode
);
1770 interp
->setIndirect(0, 0, indirect
);
1773 mkLoad(dType
, newDefs
[i
], sym
, indirect
)->perPatch
= vary
.patch
;
1778 case nir_intrinsic_load_kernel_input
: {
1779 assert(prog
->getType() == Program::TYPE_COMPUTE
);
1780 assert(insn
->num_components
== 1);
1782 LValues
&newDefs
= convert(&insn
->dest
);
1783 const DataType dType
= getDType(insn
);
1785 uint32_t idx
= getIndirect(insn
, 0, 0, indirect
, true);
1787 mkLoad(dType
, newDefs
[0], mkSymbol(FILE_SHADER_INPUT
, 0, dType
, idx
), indirect
);
1790 case nir_intrinsic_load_barycentric_at_offset
:
1791 case nir_intrinsic_load_barycentric_at_sample
:
1792 case nir_intrinsic_load_barycentric_centroid
:
1793 case nir_intrinsic_load_barycentric_pixel
:
1794 case nir_intrinsic_load_barycentric_sample
: {
1795 LValues
&newDefs
= convert(&insn
->dest
);
1798 if (op
== nir_intrinsic_load_barycentric_centroid
||
1799 op
== nir_intrinsic_load_barycentric_sample
) {
1800 mode
= NV50_IR_INTERP_CENTROID
;
1801 } else if (op
== nir_intrinsic_load_barycentric_at_offset
) {
1803 for (uint8_t c
= 0; c
< 2; c
++) {
1804 offs
[c
] = getScratch();
1805 mkOp2(OP_MIN
, TYPE_F32
, offs
[c
], getSrc(&insn
->src
[0], c
), loadImm(NULL
, 0.4375f
));
1806 mkOp2(OP_MAX
, TYPE_F32
, offs
[c
], offs
[c
], loadImm(NULL
, -0.5f
));
1807 mkOp2(OP_MUL
, TYPE_F32
, offs
[c
], offs
[c
], loadImm(NULL
, 4096.0f
));
1808 mkCvt(OP_CVT
, TYPE_S32
, offs
[c
], TYPE_F32
, offs
[c
]);
1810 mkOp3v(OP_INSBF
, TYPE_U32
, newDefs
[0], offs
[1], mkImm(0x1010), offs
[0]);
1812 mode
= NV50_IR_INTERP_OFFSET
;
1813 } else if (op
== nir_intrinsic_load_barycentric_pixel
) {
1814 mode
= NV50_IR_INTERP_DEFAULT
;
1815 } else if (op
== nir_intrinsic_load_barycentric_at_sample
) {
1816 info
->prop
.fp
.readsSampleLocations
= true;
1817 mkOp1(OP_PIXLD
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0], 0))->subOp
= NV50_IR_SUBOP_PIXLD_OFFSET
;
1818 mode
= NV50_IR_INTERP_OFFSET
;
1820 unreachable("all intrinsics already handled above");
1823 loadImm(newDefs
[1], mode
);
1826 case nir_intrinsic_discard
:
1827 mkOp(OP_DISCARD
, TYPE_NONE
, NULL
);
1829 case nir_intrinsic_discard_if
: {
1830 Value
*pred
= getSSA(1, FILE_PREDICATE
);
1831 if (insn
->num_components
> 1) {
1832 ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
1836 mkCmp(OP_SET
, CC_NE
, TYPE_U8
, pred
, TYPE_U32
, getSrc(&insn
->src
[0], 0), zero
);
1837 mkOp(OP_DISCARD
, TYPE_NONE
, NULL
)->setPredicate(CC_P
, pred
);
1840 case nir_intrinsic_load_base_vertex
:
1841 case nir_intrinsic_load_base_instance
:
1842 case nir_intrinsic_load_draw_id
:
1843 case nir_intrinsic_load_front_face
:
1844 case nir_intrinsic_load_helper_invocation
:
1845 case nir_intrinsic_load_instance_id
:
1846 case nir_intrinsic_load_invocation_id
:
1847 case nir_intrinsic_load_local_group_size
:
1848 case nir_intrinsic_load_local_invocation_id
:
1849 case nir_intrinsic_load_num_work_groups
:
1850 case nir_intrinsic_load_patch_vertices_in
:
1851 case nir_intrinsic_load_primitive_id
:
1852 case nir_intrinsic_load_sample_id
:
1853 case nir_intrinsic_load_sample_mask_in
:
1854 case nir_intrinsic_load_sample_pos
:
1855 case nir_intrinsic_load_subgroup_eq_mask
:
1856 case nir_intrinsic_load_subgroup_ge_mask
:
1857 case nir_intrinsic_load_subgroup_gt_mask
:
1858 case nir_intrinsic_load_subgroup_le_mask
:
1859 case nir_intrinsic_load_subgroup_lt_mask
:
1860 case nir_intrinsic_load_subgroup_invocation
:
1861 case nir_intrinsic_load_tess_coord
:
1862 case nir_intrinsic_load_tess_level_inner
:
1863 case nir_intrinsic_load_tess_level_outer
:
1864 case nir_intrinsic_load_vertex_id
:
1865 case nir_intrinsic_load_work_group_id
: {
1866 const DataType dType
= getDType(insn
);
1867 SVSemantic sv
= convert(op
);
1868 LValues
&newDefs
= convert(&insn
->dest
);
1870 for (uint8_t i
= 0u; i
< nir_intrinsic_dest_components(insn
); ++i
) {
1872 if (typeSizeof(dType
) == 8)
1877 if (sv
== SV_TID
&& info
->prop
.cp
.numThreads
[i
] == 1) {
1880 Symbol
*sym
= mkSysVal(sv
, i
);
1881 Instruction
*rdsv
= mkOp1(OP_RDSV
, TYPE_U32
, def
, sym
);
1882 if (sv
== SV_TESS_OUTER
|| sv
== SV_TESS_INNER
)
1886 if (typeSizeof(dType
) == 8)
1887 mkOp2(OP_MERGE
, dType
, newDefs
[i
], def
, loadImm(getSSA(), 0u));
1892 case nir_intrinsic_load_subgroup_size
: {
1893 LValues
&newDefs
= convert(&insn
->dest
);
1894 loadImm(newDefs
[0], 32u);
1897 case nir_intrinsic_vote_all
:
1898 case nir_intrinsic_vote_any
:
1899 case nir_intrinsic_vote_ieq
: {
1900 LValues
&newDefs
= convert(&insn
->dest
);
1901 Value
*pred
= getScratch(1, FILE_PREDICATE
);
1902 mkCmp(OP_SET
, CC_NE
, TYPE_U32
, pred
, TYPE_U32
, getSrc(&insn
->src
[0], 0), zero
);
1903 mkOp1(OP_VOTE
, TYPE_U32
, pred
, pred
)->subOp
= getSubOp(op
);
1904 mkCvt(OP_CVT
, TYPE_U32
, newDefs
[0], TYPE_U8
, pred
);
1907 case nir_intrinsic_ballot
: {
1908 LValues
&newDefs
= convert(&insn
->dest
);
1909 Value
*pred
= getSSA(1, FILE_PREDICATE
);
1910 mkCmp(OP_SET
, CC_NE
, TYPE_U32
, pred
, TYPE_U32
, getSrc(&insn
->src
[0], 0), zero
);
1911 mkOp1(OP_VOTE
, TYPE_U32
, newDefs
[0], pred
)->subOp
= NV50_IR_SUBOP_VOTE_ANY
;
1914 case nir_intrinsic_read_first_invocation
:
1915 case nir_intrinsic_read_invocation
: {
1916 LValues
&newDefs
= convert(&insn
->dest
);
1917 const DataType dType
= getDType(insn
);
1918 Value
*tmp
= getScratch();
1920 if (op
== nir_intrinsic_read_first_invocation
) {
1921 mkOp1(OP_VOTE
, TYPE_U32
, tmp
, mkImm(1))->subOp
= NV50_IR_SUBOP_VOTE_ANY
;
1922 mkOp1(OP_BREV
, TYPE_U32
, tmp
, tmp
);
1923 mkOp1(OP_BFIND
, TYPE_U32
, tmp
, tmp
)->subOp
= NV50_IR_SUBOP_BFIND_SAMT
;
1925 tmp
= getSrc(&insn
->src
[1], 0);
1927 for (uint8_t i
= 0; i
< dest_components
; ++i
) {
1928 mkOp3(OP_SHFL
, dType
, newDefs
[i
], getSrc(&insn
->src
[0], i
), tmp
, mkImm(0x1f))
1929 ->subOp
= NV50_IR_SUBOP_SHFL_IDX
;
1933 case nir_intrinsic_load_per_vertex_input
: {
1934 const DataType dType
= getDType(insn
);
1935 LValues
&newDefs
= convert(&insn
->dest
);
1936 Value
*indirectVertex
;
1937 Value
*indirectOffset
;
1938 uint32_t baseVertex
= getIndirect(&insn
->src
[0], 0, indirectVertex
);
1939 uint32_t idx
= getIndirect(insn
, 1, 0, indirectOffset
);
1941 Value
*vtxBase
= mkOp2v(OP_PFETCH
, TYPE_U32
, getSSA(4, FILE_ADDRESS
),
1942 mkImm(baseVertex
), indirectVertex
);
1943 for (uint8_t i
= 0u; i
< dest_components
; ++i
) {
1944 uint32_t address
= getSlotAddress(insn
, idx
, i
);
1945 loadFrom(FILE_SHADER_INPUT
, 0, dType
, newDefs
[i
], address
, 0,
1946 indirectOffset
, vtxBase
, info
->in
[idx
].patch
);
1950 case nir_intrinsic_load_per_vertex_output
: {
1951 const DataType dType
= getDType(insn
);
1952 LValues
&newDefs
= convert(&insn
->dest
);
1953 Value
*indirectVertex
;
1954 Value
*indirectOffset
;
1955 uint32_t baseVertex
= getIndirect(&insn
->src
[0], 0, indirectVertex
);
1956 uint32_t idx
= getIndirect(insn
, 1, 0, indirectOffset
);
1957 Value
*vtxBase
= NULL
;
1960 vtxBase
= indirectVertex
;
1962 vtxBase
= loadImm(NULL
, baseVertex
);
1964 vtxBase
= mkOp2v(OP_ADD
, TYPE_U32
, getSSA(4, FILE_ADDRESS
), outBase
, vtxBase
);
1966 for (uint8_t i
= 0u; i
< dest_components
; ++i
) {
1967 uint32_t address
= getSlotAddress(insn
, idx
, i
);
1968 loadFrom(FILE_SHADER_OUTPUT
, 0, dType
, newDefs
[i
], address
, 0,
1969 indirectOffset
, vtxBase
, info
->in
[idx
].patch
);
1973 case nir_intrinsic_emit_vertex
:
1974 if (info
->io
.genUserClip
> 0)
1975 handleUserClipPlanes();
1977 case nir_intrinsic_end_primitive
: {
1978 uint32_t idx
= nir_intrinsic_stream_id(insn
);
1979 mkOp1(getOperation(op
), TYPE_U32
, NULL
, mkImm(idx
))->fixed
= 1;
1982 case nir_intrinsic_load_ubo
: {
1983 const DataType dType
= getDType(insn
);
1984 LValues
&newDefs
= convert(&insn
->dest
);
1985 Value
*indirectIndex
;
1986 Value
*indirectOffset
;
1987 uint32_t index
= getIndirect(&insn
->src
[0], 0, indirectIndex
) + 1;
1988 uint32_t offset
= getIndirect(&insn
->src
[1], 0, indirectOffset
);
1990 for (uint8_t i
= 0u; i
< dest_components
; ++i
) {
1991 loadFrom(FILE_MEMORY_CONST
, index
, dType
, newDefs
[i
], offset
, i
,
1992 indirectOffset
, indirectIndex
);
1996 case nir_intrinsic_get_buffer_size
: {
1997 LValues
&newDefs
= convert(&insn
->dest
);
1998 const DataType dType
= getDType(insn
);
1999 Value
*indirectBuffer
;
2000 uint32_t buffer
= getIndirect(&insn
->src
[0], 0, indirectBuffer
);
2002 Symbol
*sym
= mkSymbol(FILE_MEMORY_BUFFER
, buffer
, dType
, 0);
2003 mkOp1(OP_BUFQ
, dType
, newDefs
[0], sym
)->setIndirect(0, 0, indirectBuffer
);
2006 case nir_intrinsic_store_ssbo
: {
2007 DataType sType
= getSType(insn
->src
[0], false, false);
2008 Value
*indirectBuffer
;
2009 Value
*indirectOffset
;
2010 uint32_t buffer
= getIndirect(&insn
->src
[1], 0, indirectBuffer
);
2011 uint32_t offset
= getIndirect(&insn
->src
[2], 0, indirectOffset
);
2013 for (uint8_t i
= 0u; i
< nir_intrinsic_src_components(insn
, 0); ++i
) {
2014 if (!((1u << i
) & nir_intrinsic_write_mask(insn
)))
2016 Symbol
*sym
= mkSymbol(FILE_MEMORY_BUFFER
, buffer
, sType
,
2017 offset
+ i
* typeSizeof(sType
));
2018 mkStore(OP_STORE
, sType
, sym
, indirectOffset
, getSrc(&insn
->src
[0], i
))
2019 ->setIndirect(0, 1, indirectBuffer
);
2021 info
->io
.globalAccess
|= 0x2;
2024 case nir_intrinsic_load_ssbo
: {
2025 const DataType dType
= getDType(insn
);
2026 LValues
&newDefs
= convert(&insn
->dest
);
2027 Value
*indirectBuffer
;
2028 Value
*indirectOffset
;
2029 uint32_t buffer
= getIndirect(&insn
->src
[0], 0, indirectBuffer
);
2030 uint32_t offset
= getIndirect(&insn
->src
[1], 0, indirectOffset
);
2032 for (uint8_t i
= 0u; i
< dest_components
; ++i
)
2033 loadFrom(FILE_MEMORY_BUFFER
, buffer
, dType
, newDefs
[i
], offset
, i
,
2034 indirectOffset
, indirectBuffer
);
2036 info
->io
.globalAccess
|= 0x1;
2039 case nir_intrinsic_shared_atomic_add
:
2040 case nir_intrinsic_shared_atomic_and
:
2041 case nir_intrinsic_shared_atomic_comp_swap
:
2042 case nir_intrinsic_shared_atomic_exchange
:
2043 case nir_intrinsic_shared_atomic_or
:
2044 case nir_intrinsic_shared_atomic_imax
:
2045 case nir_intrinsic_shared_atomic_imin
:
2046 case nir_intrinsic_shared_atomic_umax
:
2047 case nir_intrinsic_shared_atomic_umin
:
2048 case nir_intrinsic_shared_atomic_xor
: {
2049 const DataType dType
= getDType(insn
);
2050 LValues
&newDefs
= convert(&insn
->dest
);
2051 Value
*indirectOffset
;
2052 uint32_t offset
= getIndirect(&insn
->src
[0], 0, indirectOffset
);
2053 Symbol
*sym
= mkSymbol(FILE_MEMORY_SHARED
, 0, dType
, offset
);
2054 Instruction
*atom
= mkOp2(OP_ATOM
, dType
, newDefs
[0], sym
, getSrc(&insn
->src
[1], 0));
2055 if (op
== nir_intrinsic_shared_atomic_comp_swap
)
2056 atom
->setSrc(2, getSrc(&insn
->src
[2], 0));
2057 atom
->setIndirect(0, 0, indirectOffset
);
2058 atom
->subOp
= getSubOp(op
);
2061 case nir_intrinsic_ssbo_atomic_add
:
2062 case nir_intrinsic_ssbo_atomic_and
:
2063 case nir_intrinsic_ssbo_atomic_comp_swap
:
2064 case nir_intrinsic_ssbo_atomic_exchange
:
2065 case nir_intrinsic_ssbo_atomic_or
:
2066 case nir_intrinsic_ssbo_atomic_imax
:
2067 case nir_intrinsic_ssbo_atomic_imin
:
2068 case nir_intrinsic_ssbo_atomic_umax
:
2069 case nir_intrinsic_ssbo_atomic_umin
:
2070 case nir_intrinsic_ssbo_atomic_xor
: {
2071 const DataType dType
= getDType(insn
);
2072 LValues
&newDefs
= convert(&insn
->dest
);
2073 Value
*indirectBuffer
;
2074 Value
*indirectOffset
;
2075 uint32_t buffer
= getIndirect(&insn
->src
[0], 0, indirectBuffer
);
2076 uint32_t offset
= getIndirect(&insn
->src
[1], 0, indirectOffset
);
2078 Symbol
*sym
= mkSymbol(FILE_MEMORY_BUFFER
, buffer
, dType
, offset
);
2079 Instruction
*atom
= mkOp2(OP_ATOM
, dType
, newDefs
[0], sym
,
2080 getSrc(&insn
->src
[2], 0));
2081 if (op
== nir_intrinsic_ssbo_atomic_comp_swap
)
2082 atom
->setSrc(2, getSrc(&insn
->src
[3], 0));
2083 atom
->setIndirect(0, 0, indirectOffset
);
2084 atom
->setIndirect(0, 1, indirectBuffer
);
2085 atom
->subOp
= getSubOp(op
);
2087 info
->io
.globalAccess
|= 0x2;
2090 case nir_intrinsic_global_atomic_add
:
2091 case nir_intrinsic_global_atomic_and
:
2092 case nir_intrinsic_global_atomic_comp_swap
:
2093 case nir_intrinsic_global_atomic_exchange
:
2094 case nir_intrinsic_global_atomic_or
:
2095 case nir_intrinsic_global_atomic_imax
:
2096 case nir_intrinsic_global_atomic_imin
:
2097 case nir_intrinsic_global_atomic_umax
:
2098 case nir_intrinsic_global_atomic_umin
:
2099 case nir_intrinsic_global_atomic_xor
: {
2100 const DataType dType
= getDType(insn
);
2101 LValues
&newDefs
= convert(&insn
->dest
);
2103 uint32_t offset
= getIndirect(&insn
->src
[0], 0, address
);
2105 Symbol
*sym
= mkSymbol(FILE_MEMORY_GLOBAL
, 0, dType
, offset
);
2107 mkOp2(OP_ATOM
, dType
, newDefs
[0], sym
, getSrc(&insn
->src
[1], 0));
2108 atom
->setIndirect(0, 0, address
);
2109 atom
->subOp
= getSubOp(op
);
2111 info
->io
.globalAccess
|= 0x2;
2114 case nir_intrinsic_bindless_image_atomic_add
:
2115 case nir_intrinsic_bindless_image_atomic_and
:
2116 case nir_intrinsic_bindless_image_atomic_comp_swap
:
2117 case nir_intrinsic_bindless_image_atomic_exchange
:
2118 case nir_intrinsic_bindless_image_atomic_imax
:
2119 case nir_intrinsic_bindless_image_atomic_umax
:
2120 case nir_intrinsic_bindless_image_atomic_imin
:
2121 case nir_intrinsic_bindless_image_atomic_umin
:
2122 case nir_intrinsic_bindless_image_atomic_or
:
2123 case nir_intrinsic_bindless_image_atomic_xor
:
2124 case nir_intrinsic_bindless_image_load
:
2125 case nir_intrinsic_bindless_image_samples
:
2126 case nir_intrinsic_bindless_image_size
:
2127 case nir_intrinsic_bindless_image_store
: {
2128 std::vector
<Value
*> srcs
, defs
;
2129 Value
*indirect
= getSrc(&insn
->src
[0], 0);
2133 TexInstruction::Target target
=
2134 convert(nir_intrinsic_image_dim(insn
), !!nir_intrinsic_image_array(insn
), false);
2135 unsigned int argCount
= getNIRArgCount(target
);
2136 uint16_t location
= 0;
2138 if (opInfo
.has_dest
) {
2139 LValues
&newDefs
= convert(&insn
->dest
);
2140 for (uint8_t i
= 0u; i
< newDefs
.size(); ++i
) {
2141 defs
.push_back(newDefs
[i
]);
2147 case nir_intrinsic_bindless_image_atomic_add
:
2148 case nir_intrinsic_bindless_image_atomic_and
:
2149 case nir_intrinsic_bindless_image_atomic_comp_swap
:
2150 case nir_intrinsic_bindless_image_atomic_exchange
:
2151 case nir_intrinsic_bindless_image_atomic_imax
:
2152 case nir_intrinsic_bindless_image_atomic_umax
:
2153 case nir_intrinsic_bindless_image_atomic_imin
:
2154 case nir_intrinsic_bindless_image_atomic_umin
:
2155 case nir_intrinsic_bindless_image_atomic_or
:
2156 case nir_intrinsic_bindless_image_atomic_xor
:
2157 ty
= getDType(insn
);
2159 info
->io
.globalAccess
|= 0x2;
2161 case nir_intrinsic_bindless_image_load
:
2163 info
->io
.globalAccess
|= 0x1;
2165 case nir_intrinsic_bindless_image_store
:
2168 info
->io
.globalAccess
|= 0x2;
2170 case nir_intrinsic_bindless_image_samples
:
2174 case nir_intrinsic_bindless_image_size
:
2178 unreachable("unhandled image opcode");
2183 if (opInfo
.num_srcs
>= 2)
2184 for (unsigned int i
= 0u; i
< argCount
; ++i
)
2185 srcs
.push_back(getSrc(&insn
->src
[1], i
));
2187 // the sampler is just another src added after coords
2188 if (opInfo
.num_srcs
>= 3 && target
.isMS())
2189 srcs
.push_back(getSrc(&insn
->src
[2], 0));
2191 if (opInfo
.num_srcs
>= 4) {
2192 unsigned components
= opInfo
.src_components
[3] ? opInfo
.src_components
[3] : insn
->num_components
;
2193 for (uint8_t i
= 0u; i
< components
; ++i
)
2194 srcs
.push_back(getSrc(&insn
->src
[3], i
));
2197 if (opInfo
.num_srcs
>= 5)
2198 // 1 for aotmic swap
2199 for (uint8_t i
= 0u; i
< opInfo
.src_components
[4]; ++i
)
2200 srcs
.push_back(getSrc(&insn
->src
[4], i
));
2202 TexInstruction
*texi
= mkTex(getOperation(op
), target
.getEnum(), location
, 0, defs
, srcs
);
2203 texi
->tex
.bindless
= false;
2204 texi
->tex
.format
= nv50_ir::TexInstruction::translateImgFormat(nir_intrinsic_format(insn
));
2205 texi
->tex
.mask
= mask
;
2206 texi
->tex
.bindless
= true;
2207 texi
->cache
= convert(nir_intrinsic_access(insn
));
2209 texi
->subOp
= getSubOp(op
);
2212 texi
->setIndirectR(indirect
);
2216 case nir_intrinsic_image_deref_atomic_add
:
2217 case nir_intrinsic_image_deref_atomic_and
:
2218 case nir_intrinsic_image_deref_atomic_comp_swap
:
2219 case nir_intrinsic_image_deref_atomic_exchange
:
2220 case nir_intrinsic_image_deref_atomic_imax
:
2221 case nir_intrinsic_image_deref_atomic_umax
:
2222 case nir_intrinsic_image_deref_atomic_imin
:
2223 case nir_intrinsic_image_deref_atomic_umin
:
2224 case nir_intrinsic_image_deref_atomic_or
:
2225 case nir_intrinsic_image_deref_atomic_xor
:
2226 case nir_intrinsic_image_deref_load
:
2227 case nir_intrinsic_image_deref_samples
:
2228 case nir_intrinsic_image_deref_size
:
2229 case nir_intrinsic_image_deref_store
: {
2230 const nir_variable
*tex
;
2231 std::vector
<Value
*> srcs
, defs
;
2236 nir_deref_instr
*deref
= nir_src_as_deref(insn
->src
[0]);
2237 const glsl_type
*type
= deref
->type
;
2238 TexInstruction::Target target
=
2239 convert((glsl_sampler_dim
)type
->sampler_dimensionality
,
2240 type
->sampler_array
, type
->sampler_shadow
);
2241 unsigned int argCount
= getNIRArgCount(target
);
2242 uint16_t location
= handleDeref(deref
, indirect
, tex
);
2244 if (opInfo
.has_dest
) {
2245 LValues
&newDefs
= convert(&insn
->dest
);
2246 for (uint8_t i
= 0u; i
< newDefs
.size(); ++i
) {
2247 defs
.push_back(newDefs
[i
]);
2253 case nir_intrinsic_image_deref_atomic_add
:
2254 case nir_intrinsic_image_deref_atomic_and
:
2255 case nir_intrinsic_image_deref_atomic_comp_swap
:
2256 case nir_intrinsic_image_deref_atomic_exchange
:
2257 case nir_intrinsic_image_deref_atomic_imax
:
2258 case nir_intrinsic_image_deref_atomic_umax
:
2259 case nir_intrinsic_image_deref_atomic_imin
:
2260 case nir_intrinsic_image_deref_atomic_umin
:
2261 case nir_intrinsic_image_deref_atomic_or
:
2262 case nir_intrinsic_image_deref_atomic_xor
:
2263 ty
= getDType(insn
);
2265 info
->io
.globalAccess
|= 0x2;
2267 case nir_intrinsic_image_deref_load
:
2269 info
->io
.globalAccess
|= 0x1;
2271 case nir_intrinsic_image_deref_store
:
2274 info
->io
.globalAccess
|= 0x2;
2276 case nir_intrinsic_image_deref_samples
:
2280 case nir_intrinsic_image_deref_size
:
2284 unreachable("unhandled image opcode");
2289 if (opInfo
.num_srcs
>= 2)
2290 for (unsigned int i
= 0u; i
< argCount
; ++i
)
2291 srcs
.push_back(getSrc(&insn
->src
[1], i
));
2293 // the sampler is just another src added after coords
2294 if (opInfo
.num_srcs
>= 3 && target
.isMS())
2295 srcs
.push_back(getSrc(&insn
->src
[2], 0));
2297 if (opInfo
.num_srcs
>= 4) {
2298 unsigned components
= opInfo
.src_components
[3] ? opInfo
.src_components
[3] : insn
->num_components
;
2299 for (uint8_t i
= 0u; i
< components
; ++i
)
2300 srcs
.push_back(getSrc(&insn
->src
[3], i
));
2303 if (opInfo
.num_srcs
>= 5)
2304 // 1 for aotmic swap
2305 for (uint8_t i
= 0u; i
< opInfo
.src_components
[4]; ++i
)
2306 srcs
.push_back(getSrc(&insn
->src
[4], i
));
2308 TexInstruction
*texi
= mkTex(getOperation(op
), target
.getEnum(), location
, 0, defs
, srcs
);
2309 texi
->tex
.bindless
= false;
2310 texi
->tex
.format
= nv50_ir::TexInstruction::translateImgFormat(tex
->data
.image
.format
);
2311 texi
->tex
.mask
= mask
;
2312 texi
->cache
= getCacheModeFromVar(tex
);
2314 texi
->subOp
= getSubOp(op
);
2317 texi
->setIndirectR(indirect
);
2321 case nir_intrinsic_store_shared
: {
2322 DataType sType
= getSType(insn
->src
[0], false, false);
2323 Value
*indirectOffset
;
2324 uint32_t offset
= getIndirect(&insn
->src
[1], 0, indirectOffset
);
2326 for (uint8_t i
= 0u; i
< nir_intrinsic_src_components(insn
, 0); ++i
) {
2327 if (!((1u << i
) & nir_intrinsic_write_mask(insn
)))
2329 Symbol
*sym
= mkSymbol(FILE_MEMORY_SHARED
, 0, sType
, offset
+ i
* typeSizeof(sType
));
2330 mkStore(OP_STORE
, sType
, sym
, indirectOffset
, getSrc(&insn
->src
[0], i
));
2334 case nir_intrinsic_load_shared
: {
2335 const DataType dType
= getDType(insn
);
2336 LValues
&newDefs
= convert(&insn
->dest
);
2337 Value
*indirectOffset
;
2338 uint32_t offset
= getIndirect(&insn
->src
[0], 0, indirectOffset
);
2340 for (uint8_t i
= 0u; i
< dest_components
; ++i
)
2341 loadFrom(FILE_MEMORY_SHARED
, 0, dType
, newDefs
[i
], offset
, i
, indirectOffset
);
2345 case nir_intrinsic_control_barrier
: {
2346 // TODO: add flag to shader_info
2347 info
->numBarriers
= 1;
2348 Instruction
*bar
= mkOp2(OP_BAR
, TYPE_U32
, NULL
, mkImm(0), mkImm(0));
2350 bar
->subOp
= NV50_IR_SUBOP_BAR_SYNC
;
2353 case nir_intrinsic_group_memory_barrier
:
2354 case nir_intrinsic_memory_barrier
:
2355 case nir_intrinsic_memory_barrier_buffer
:
2356 case nir_intrinsic_memory_barrier_image
:
2357 case nir_intrinsic_memory_barrier_shared
: {
2358 Instruction
*bar
= mkOp(OP_MEMBAR
, TYPE_NONE
, NULL
);
2360 bar
->subOp
= getSubOp(op
);
2363 case nir_intrinsic_memory_barrier_tcs_patch
:
2365 case nir_intrinsic_shader_clock
: {
2366 const DataType dType
= getDType(insn
);
2367 LValues
&newDefs
= convert(&insn
->dest
);
2369 loadImm(newDefs
[0], 0u);
2370 mkOp1(OP_RDSV
, dType
, newDefs
[1], mkSysVal(SV_CLOCK
, 0))->fixed
= 1;
2373 case nir_intrinsic_load_global
: {
2374 const DataType dType
= getDType(insn
);
2375 LValues
&newDefs
= convert(&insn
->dest
);
2376 Value
*indirectOffset
;
2377 uint32_t offset
= getIndirect(&insn
->src
[0], 0, indirectOffset
);
2379 for (auto i
= 0u; i
< dest_components
; ++i
)
2380 loadFrom(FILE_MEMORY_GLOBAL
, 0, dType
, newDefs
[i
], offset
, i
, indirectOffset
);
2382 info
->io
.globalAccess
|= 0x1;
2385 case nir_intrinsic_store_global
: {
2386 DataType sType
= getSType(insn
->src
[0], false, false);
2388 for (auto i
= 0u; i
< nir_intrinsic_src_components(insn
, 0); ++i
) {
2389 if (!((1u << i
) & nir_intrinsic_write_mask(insn
)))
2391 if (typeSizeof(sType
) == 8) {
2393 mkSplit(split
, 4, getSrc(&insn
->src
[0], i
));
2395 Symbol
*sym
= mkSymbol(FILE_MEMORY_GLOBAL
, 0, TYPE_U32
, i
* typeSizeof(sType
));
2396 mkStore(OP_STORE
, TYPE_U32
, sym
, getSrc(&insn
->src
[1], 0), split
[0]);
2398 sym
= mkSymbol(FILE_MEMORY_GLOBAL
, 0, TYPE_U32
, i
* typeSizeof(sType
) + 4);
2399 mkStore(OP_STORE
, TYPE_U32
, sym
, getSrc(&insn
->src
[1], 0), split
[1]);
2401 Symbol
*sym
= mkSymbol(FILE_MEMORY_GLOBAL
, 0, sType
, i
* typeSizeof(sType
));
2402 mkStore(OP_STORE
, sType
, sym
, getSrc(&insn
->src
[1], 0), getSrc(&insn
->src
[0], i
));
2406 info
->io
.globalAccess
|= 0x2;
2410 ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos
[op
].name
);
2418 Converter::visit(nir_jump_instr
*insn
)
2420 switch (insn
->type
) {
2421 case nir_jump_return
:
2422 // TODO: this only works in the main function
2423 mkFlow(OP_BRA
, exit
, CC_ALWAYS
, NULL
);
2424 bb
->cfg
.attach(&exit
->cfg
, Graph::Edge::CROSS
);
2426 case nir_jump_break
:
2427 case nir_jump_continue
: {
2428 bool isBreak
= insn
->type
== nir_jump_break
;
2429 nir_block
*block
= insn
->instr
.block
;
2430 assert(!block
->successors
[1]);
2431 BasicBlock
*target
= convert(block
->successors
[0]);
2432 mkFlow(isBreak
? OP_BREAK
: OP_CONT
, target
, CC_ALWAYS
, NULL
);
2433 bb
->cfg
.attach(&target
->cfg
, isBreak
? Graph::Edge::CROSS
: Graph::Edge::BACK
);
2437 ERROR("unknown nir_jump_type %u\n", insn
->type
);
2445 Converter::convert(nir_load_const_instr
*insn
, uint8_t idx
)
2450 setPosition(immInsertPos
, true);
2452 setPosition(bb
, false);
2454 switch (insn
->def
.bit_size
) {
2456 val
= loadImm(getSSA(8), insn
->value
[idx
].u64
);
2459 val
= loadImm(getSSA(4), insn
->value
[idx
].u32
);
2462 val
= loadImm(getSSA(2), insn
->value
[idx
].u16
);
2465 val
= loadImm(getSSA(1), insn
->value
[idx
].u8
);
2468 unreachable("unhandled bit size!\n");
2470 setPosition(bb
, true);
2475 Converter::visit(nir_load_const_instr
*insn
)
2477 assert(insn
->def
.bit_size
<= 64);
2478 immediates
[insn
->def
.index
] = insn
;
2482 #define DEFAULT_CHECKS \
2483 if (insn->dest.dest.ssa.num_components > 1) { \
2484 ERROR("nir_alu_instr only supported with 1 component!\n"); \
2487 if (insn->dest.write_mask != 1) { \
2488 ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2492 Converter::visit(nir_alu_instr
*insn
)
2494 const nir_op op
= insn
->op
;
2495 const nir_op_info
&info
= nir_op_infos
[op
];
2496 DataType dType
= getDType(insn
);
2497 const std::vector
<DataType
> sTypes
= getSTypes(insn
);
2499 Instruction
*oldPos
= this->bb
->getExit();
2510 case nir_op_fddx_coarse
:
2511 case nir_op_fddx_fine
:
2513 case nir_op_fddy_coarse
:
2514 case nir_op_fddy_fine
:
2533 case nir_op_imul_high
:
2534 case nir_op_umul_high
:
2539 case nir_op_pack_64_2x32_split
:
2554 LValues
&newDefs
= convert(&insn
->dest
);
2555 operation preOp
= preOperationNeeded(op
);
2556 if (preOp
!= OP_NOP
) {
2557 assert(info
.num_inputs
< 2);
2558 Value
*tmp
= getSSA(typeSizeof(dType
));
2559 Instruction
*i0
= mkOp(preOp
, dType
, tmp
);
2560 Instruction
*i1
= mkOp(getOperation(op
), dType
, newDefs
[0]);
2561 if (info
.num_inputs
) {
2562 i0
->setSrc(0, getSrc(&insn
->src
[0]));
2565 i1
->subOp
= getSubOp(op
);
2567 Instruction
*i
= mkOp(getOperation(op
), dType
, newDefs
[0]);
2568 for (unsigned s
= 0u; s
< info
.num_inputs
; ++s
) {
2569 i
->setSrc(s
, getSrc(&insn
->src
[s
]));
2571 i
->subOp
= getSubOp(op
);
2575 case nir_op_ifind_msb
:
2576 case nir_op_ufind_msb
: {
2578 LValues
&newDefs
= convert(&insn
->dest
);
2580 mkOp1(getOperation(op
), dType
, newDefs
[0], getSrc(&insn
->src
[0]));
2583 case nir_op_fround_even
: {
2585 LValues
&newDefs
= convert(&insn
->dest
);
2586 mkCvt(OP_CVT
, dType
, newDefs
[0], dType
, getSrc(&insn
->src
[0]))->rnd
= ROUND_NI
;
2589 // convert instructions
2603 case nir_op_u2u64
: {
2605 LValues
&newDefs
= convert(&insn
->dest
);
2606 Instruction
*i
= mkOp1(getOperation(op
), dType
, newDefs
[0], getSrc(&insn
->src
[0]));
2607 if (op
== nir_op_f2i32
|| op
== nir_op_f2i64
|| op
== nir_op_f2u32
|| op
== nir_op_f2u64
)
2609 i
->sType
= sTypes
[0];
2612 // compare instructions
2622 case nir_op_ine32
: {
2624 LValues
&newDefs
= convert(&insn
->dest
);
2625 Instruction
*i
= mkCmp(getOperation(op
),
2630 getSrc(&insn
->src
[0]),
2631 getSrc(&insn
->src
[1]));
2632 if (info
.num_inputs
== 3)
2633 i
->setSrc(2, getSrc(&insn
->src
[2]));
2634 i
->sType
= sTypes
[0];
2637 // those are weird ALU ops and need special handling, because
2638 // 1. they are always componend based
2639 // 2. they basically just merge multiple values into one data type
2641 if (!insn
->dest
.dest
.is_ssa
&& insn
->dest
.dest
.reg
.reg
->num_array_elems
) {
2642 nir_reg_dest
& reg
= insn
->dest
.dest
.reg
;
2643 uint32_t goffset
= regToLmemOffset
[reg
.reg
->index
];
2644 uint8_t comps
= reg
.reg
->num_components
;
2645 uint8_t size
= reg
.reg
->bit_size
/ 8;
2646 uint8_t csize
= 4 * size
; // TODO after fixing MemoryOpts: comps * size;
2647 uint32_t aoffset
= csize
* reg
.base_offset
;
2648 Value
*indirect
= NULL
;
2651 indirect
= mkOp2v(OP_MUL
, TYPE_U32
, getSSA(4, FILE_ADDRESS
),
2652 getSrc(reg
.indirect
, 0), mkImm(csize
));
2654 for (uint8_t i
= 0u; i
< comps
; ++i
) {
2655 if (!((1u << i
) & insn
->dest
.write_mask
))
2658 Symbol
*sym
= mkSymbol(FILE_MEMORY_LOCAL
, 0, dType
, goffset
+ aoffset
+ i
* size
);
2659 mkStore(OP_STORE
, dType
, sym
, indirect
, getSrc(&insn
->src
[0], i
));
2662 } else if (!insn
->src
[0].src
.is_ssa
&& insn
->src
[0].src
.reg
.reg
->num_array_elems
) {
2663 LValues
&newDefs
= convert(&insn
->dest
);
2664 nir_reg_src
& reg
= insn
->src
[0].src
.reg
;
2665 uint32_t goffset
= regToLmemOffset
[reg
.reg
->index
];
2666 // uint8_t comps = reg.reg->num_components;
2667 uint8_t size
= reg
.reg
->bit_size
/ 8;
2668 uint8_t csize
= 4 * size
; // TODO after fixing MemoryOpts: comps * size;
2669 uint32_t aoffset
= csize
* reg
.base_offset
;
2670 Value
*indirect
= NULL
;
2673 indirect
= mkOp2v(OP_MUL
, TYPE_U32
, getSSA(4, FILE_ADDRESS
), getSrc(reg
.indirect
, 0), mkImm(csize
));
2675 for (uint8_t i
= 0u; i
< newDefs
.size(); ++i
)
2676 loadFrom(FILE_MEMORY_LOCAL
, 0, dType
, newDefs
[i
], goffset
+ aoffset
, i
, indirect
);
2680 LValues
&newDefs
= convert(&insn
->dest
);
2681 for (LValues::size_type c
= 0u; c
< newDefs
.size(); ++c
) {
2682 mkMov(newDefs
[c
], getSrc(&insn
->src
[0], c
), dType
);
2690 case nir_op_vec16
: {
2691 LValues
&newDefs
= convert(&insn
->dest
);
2692 for (LValues::size_type c
= 0u; c
< newDefs
.size(); ++c
) {
2693 mkMov(newDefs
[c
], getSrc(&insn
->src
[c
]), dType
);
2698 case nir_op_pack_64_2x32
: {
2699 LValues
&newDefs
= convert(&insn
->dest
);
2700 Instruction
*merge
= mkOp(OP_MERGE
, dType
, newDefs
[0]);
2701 merge
->setSrc(0, getSrc(&insn
->src
[0], 0));
2702 merge
->setSrc(1, getSrc(&insn
->src
[0], 1));
2705 case nir_op_pack_half_2x16_split
: {
2706 LValues
&newDefs
= convert(&insn
->dest
);
2707 Value
*tmpH
= getSSA();
2708 Value
*tmpL
= getSSA();
2710 mkCvt(OP_CVT
, TYPE_F16
, tmpL
, TYPE_F32
, getSrc(&insn
->src
[0]));
2711 mkCvt(OP_CVT
, TYPE_F16
, tmpH
, TYPE_F32
, getSrc(&insn
->src
[1]));
2712 mkOp3(OP_INSBF
, TYPE_U32
, newDefs
[0], tmpH
, mkImm(0x1010), tmpL
);
2715 case nir_op_unpack_half_2x16_split_x
:
2716 case nir_op_unpack_half_2x16_split_y
: {
2717 LValues
&newDefs
= convert(&insn
->dest
);
2718 Instruction
*cvt
= mkCvt(OP_CVT
, TYPE_F32
, newDefs
[0], TYPE_F16
, getSrc(&insn
->src
[0]));
2719 if (op
== nir_op_unpack_half_2x16_split_y
)
2723 case nir_op_unpack_64_2x32
: {
2724 LValues
&newDefs
= convert(&insn
->dest
);
2725 mkOp1(OP_SPLIT
, dType
, newDefs
[0], getSrc(&insn
->src
[0]))->setDef(1, newDefs
[1]);
2728 case nir_op_unpack_64_2x32_split_x
: {
2729 LValues
&newDefs
= convert(&insn
->dest
);
2730 mkOp1(OP_SPLIT
, dType
, newDefs
[0], getSrc(&insn
->src
[0]))->setDef(1, getSSA());
2733 case nir_op_unpack_64_2x32_split_y
: {
2734 LValues
&newDefs
= convert(&insn
->dest
);
2735 mkOp1(OP_SPLIT
, dType
, getSSA(), getSrc(&insn
->src
[0]))->setDef(1, newDefs
[0]);
2738 // special instructions
2740 case nir_op_isign
: {
2743 if (::isFloatType(dType
))
2748 LValues
&newDefs
= convert(&insn
->dest
);
2749 LValue
*val0
= getScratch();
2750 LValue
*val1
= getScratch();
2751 mkCmp(OP_SET
, CC_GT
, iType
, val0
, dType
, getSrc(&insn
->src
[0]), zero
);
2752 mkCmp(OP_SET
, CC_LT
, iType
, val1
, dType
, getSrc(&insn
->src
[0]), zero
);
2754 if (dType
== TYPE_F64
) {
2755 mkOp2(OP_SUB
, iType
, val0
, val0
, val1
);
2756 mkCvt(OP_CVT
, TYPE_F64
, newDefs
[0], iType
, val0
);
2757 } else if (dType
== TYPE_S64
|| dType
== TYPE_U64
) {
2758 mkOp2(OP_SUB
, iType
, val0
, val1
, val0
);
2759 mkOp2(OP_SHR
, iType
, val1
, val0
, loadImm(NULL
, 31));
2760 mkOp2(OP_MERGE
, dType
, newDefs
[0], val0
, val1
);
2761 } else if (::isFloatType(dType
))
2762 mkOp2(OP_SUB
, iType
, newDefs
[0], val0
, val1
);
2764 mkOp2(OP_SUB
, iType
, newDefs
[0], val1
, val0
);
2768 case nir_op_b32csel
: {
2770 LValues
&newDefs
= convert(&insn
->dest
);
2771 mkCmp(OP_SLCT
, CC_NE
, dType
, newDefs
[0], sTypes
[0], getSrc(&insn
->src
[1]), getSrc(&insn
->src
[2]), getSrc(&insn
->src
[0]));
2774 case nir_op_ibitfield_extract
:
2775 case nir_op_ubitfield_extract
: {
2777 Value
*tmp
= getSSA();
2778 LValues
&newDefs
= convert(&insn
->dest
);
2779 mkOp3(OP_INSBF
, dType
, tmp
, getSrc(&insn
->src
[2]), loadImm(NULL
, 0x808), getSrc(&insn
->src
[1]));
2780 mkOp2(OP_EXTBF
, dType
, newDefs
[0], getSrc(&insn
->src
[0]), tmp
);
2785 LValues
&newDefs
= convert(&insn
->dest
);
2786 mkOp2(OP_BMSK
, dType
, newDefs
[0], getSrc(&insn
->src
[1]), getSrc(&insn
->src
[0]))->subOp
= NV50_IR_SUBOP_BMSK_W
;
2789 case nir_op_bitfield_insert
: {
2791 LValues
&newDefs
= convert(&insn
->dest
);
2792 LValue
*temp
= getSSA();
2793 mkOp3(OP_INSBF
, TYPE_U32
, temp
, getSrc(&insn
->src
[3]), mkImm(0x808), getSrc(&insn
->src
[2]));
2794 mkOp3(OP_INSBF
, dType
, newDefs
[0], getSrc(&insn
->src
[1]), temp
, getSrc(&insn
->src
[0]));
2797 case nir_op_bit_count
: {
2799 LValues
&newDefs
= convert(&insn
->dest
);
2800 mkOp2(OP_POPCNT
, dType
, newDefs
[0], getSrc(&insn
->src
[0]), getSrc(&insn
->src
[0]));
2803 case nir_op_bitfield_reverse
: {
2805 LValues
&newDefs
= convert(&insn
->dest
);
2806 mkOp1(OP_BREV
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]));
2809 case nir_op_find_lsb
: {
2811 LValues
&newDefs
= convert(&insn
->dest
);
2812 Value
*tmp
= getSSA();
2813 mkOp1(OP_BREV
, TYPE_U32
, tmp
, getSrc(&insn
->src
[0]));
2814 mkOp1(OP_BFIND
, TYPE_U32
, newDefs
[0], tmp
)->subOp
= NV50_IR_SUBOP_BFIND_SAMT
;
2817 case nir_op_extract_u8
: {
2819 LValues
&newDefs
= convert(&insn
->dest
);
2820 Value
*prmt
= getSSA();
2821 mkOp2(OP_OR
, TYPE_U32
, prmt
, getSrc(&insn
->src
[1]), loadImm(NULL
, 0x4440));
2822 mkOp3(OP_PERMT
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), prmt
, loadImm(NULL
, 0));
2825 case nir_op_extract_i8
: {
2827 LValues
&newDefs
= convert(&insn
->dest
);
2828 Value
*prmt
= getSSA();
2829 mkOp3(OP_MAD
, TYPE_U32
, prmt
, getSrc(&insn
->src
[1]), loadImm(NULL
, 0x1111), loadImm(NULL
, 0x8880));
2830 mkOp3(OP_PERMT
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), prmt
, loadImm(NULL
, 0));
2833 case nir_op_extract_u16
: {
2835 LValues
&newDefs
= convert(&insn
->dest
);
2836 Value
*prmt
= getSSA();
2837 mkOp3(OP_MAD
, TYPE_U32
, prmt
, getSrc(&insn
->src
[1]), loadImm(NULL
, 0x22), loadImm(NULL
, 0x4410));
2838 mkOp3(OP_PERMT
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), prmt
, loadImm(NULL
, 0));
2841 case nir_op_extract_i16
: {
2843 LValues
&newDefs
= convert(&insn
->dest
);
2844 Value
*prmt
= getSSA();
2845 mkOp3(OP_MAD
, TYPE_U32
, prmt
, getSrc(&insn
->src
[1]), loadImm(NULL
, 0x2222), loadImm(NULL
, 0x9910));
2846 mkOp3(OP_PERMT
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), prmt
, loadImm(NULL
, 0));
2851 LValues
&newDefs
= convert(&insn
->dest
);
2852 mkOp3(OP_SHF
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]),
2853 getSrc(&insn
->src
[1]), getSrc(&insn
->src
[0]))
2854 ->subOp
= NV50_IR_SUBOP_SHF_L
|
2855 NV50_IR_SUBOP_SHF_W
|
2856 NV50_IR_SUBOP_SHF_HI
;
2861 LValues
&newDefs
= convert(&insn
->dest
);
2862 mkOp3(OP_SHF
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]),
2863 getSrc(&insn
->src
[1]), getSrc(&insn
->src
[0]))
2864 ->subOp
= NV50_IR_SUBOP_SHF_R
|
2865 NV50_IR_SUBOP_SHF_W
|
2866 NV50_IR_SUBOP_SHF_LO
;
2869 // boolean conversions
2870 case nir_op_b2f32
: {
2872 LValues
&newDefs
= convert(&insn
->dest
);
2873 mkOp2(OP_AND
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), loadImm(NULL
, 1.0f
));
2876 case nir_op_b2f64
: {
2878 LValues
&newDefs
= convert(&insn
->dest
);
2879 Value
*tmp
= getSSA(4);
2880 mkOp2(OP_AND
, TYPE_U32
, tmp
, getSrc(&insn
->src
[0]), loadImm(NULL
, 0x3ff00000));
2881 mkOp2(OP_MERGE
, TYPE_U64
, newDefs
[0], loadImm(NULL
, 0), tmp
);
2885 case nir_op_i2b32
: {
2887 LValues
&newDefs
= convert(&insn
->dest
);
2889 if (typeSizeof(sTypes
[0]) == 8) {
2890 src1
= loadImm(getSSA(8), 0.0);
2894 CondCode cc
= op
== nir_op_f2b32
? CC_NEU
: CC_NE
;
2895 mkCmp(OP_SET
, cc
, TYPE_U32
, newDefs
[0], sTypes
[0], getSrc(&insn
->src
[0]), src1
);
2898 case nir_op_b2i32
: {
2900 LValues
&newDefs
= convert(&insn
->dest
);
2901 mkOp2(OP_AND
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), loadImm(NULL
, 1));
2904 case nir_op_b2i64
: {
2906 LValues
&newDefs
= convert(&insn
->dest
);
2907 LValue
*def
= getScratch();
2908 mkOp2(OP_AND
, TYPE_U32
, def
, getSrc(&insn
->src
[0]), loadImm(NULL
, 1));
2909 mkOp2(OP_MERGE
, TYPE_S64
, newDefs
[0], def
, loadImm(NULL
, 0));
2913 ERROR("unknown nir_op %s\n", info
.name
);
2918 oldPos
= this->bb
->getEntry();
2919 oldPos
->precise
= insn
->exact
;
2922 if (unlikely(!oldPos
))
2925 while (oldPos
->next
) {
2926 oldPos
= oldPos
->next
;
2927 oldPos
->precise
= insn
->exact
;
2929 oldPos
->saturate
= insn
->dest
.saturate
;
2933 #undef DEFAULT_CHECKS
2936 Converter::visit(nir_ssa_undef_instr
*insn
)
2938 LValues
&newDefs
= convert(&insn
->def
);
2939 for (uint8_t i
= 0u; i
< insn
->def
.num_components
; ++i
) {
2940 mkOp(OP_NOP
, TYPE_NONE
, newDefs
[i
]);
2945 #define CASE_SAMPLER(ty) \
2946 case GLSL_SAMPLER_DIM_ ## ty : \
2947 if (isArray && !isShadow) \
2948 return TEX_TARGET_ ## ty ## _ARRAY; \
2949 else if (!isArray && isShadow) \
2950 return TEX_TARGET_## ty ## _SHADOW; \
2951 else if (isArray && isShadow) \
2952 return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
2954 return TEX_TARGET_ ## ty
2957 Converter::convert(glsl_sampler_dim dim
, bool isArray
, bool isShadow
)
2963 case GLSL_SAMPLER_DIM_3D
:
2964 return TEX_TARGET_3D
;
2965 case GLSL_SAMPLER_DIM_MS
:
2967 return TEX_TARGET_2D_MS_ARRAY
;
2968 return TEX_TARGET_2D_MS
;
2969 case GLSL_SAMPLER_DIM_RECT
:
2971 return TEX_TARGET_RECT_SHADOW
;
2972 return TEX_TARGET_RECT
;
2973 case GLSL_SAMPLER_DIM_BUF
:
2974 return TEX_TARGET_BUFFER
;
2975 case GLSL_SAMPLER_DIM_EXTERNAL
:
2976 return TEX_TARGET_2D
;
2978 ERROR("unknown glsl_sampler_dim %u\n", dim
);
2980 return TEX_TARGET_COUNT
;
2986 Converter::applyProjection(Value
*src
, Value
*proj
)
2990 return mkOp2v(OP_MUL
, TYPE_F32
, getScratch(), src
, proj
);
2994 Converter::getNIRArgCount(TexInstruction::Target
& target
)
2996 unsigned int result
= target
.getArgCount();
2997 if (target
.isCube() && target
.isArray())
3005 Converter::handleDeref(nir_deref_instr
*deref
, Value
* &indirect
, const nir_variable
* &tex
)
3007 typedef std::pair
<uint32_t,Value
*> DerefPair
;
3008 std::list
<DerefPair
> derefs
;
3010 uint16_t result
= 0;
3011 while (deref
->deref_type
!= nir_deref_type_var
) {
3012 switch (deref
->deref_type
) {
3013 case nir_deref_type_array
: {
3015 uint8_t size
= type_size(deref
->type
, true);
3016 result
+= size
* getIndirect(&deref
->arr
.index
, 0, indirect
);
3019 derefs
.push_front(std::make_pair(size
, indirect
));
3024 case nir_deref_type_struct
: {
3025 result
+= nir_deref_instr_parent(deref
)->type
->struct_location_offset(deref
->strct
.index
);
3028 case nir_deref_type_var
:
3030 unreachable("nir_deref_type_var reached in handleDeref!");
3033 deref
= nir_deref_instr_parent(deref
);
3037 for (std::list
<DerefPair
>::const_iterator it
= derefs
.begin(); it
!= derefs
.end(); ++it
) {
3038 Value
*offset
= mkOp2v(OP_MUL
, TYPE_U32
, getSSA(), loadImm(getSSA(), it
->first
), it
->second
);
3040 indirect
= mkOp2v(OP_ADD
, TYPE_U32
, getSSA(), indirect
, offset
);
3045 tex
= nir_deref_instr_get_variable(deref
);
3048 return result
+ tex
->data
.driver_location
;
3052 Converter::convert(enum gl_access_qualifier access
)
3055 case ACCESS_VOLATILE
:
3057 case ACCESS_COHERENT
:
3065 Converter::getCacheModeFromVar(const nir_variable
*var
)
3067 return convert(var
->data
.access
);
3071 Converter::visit(nir_tex_instr
*insn
)
3075 case nir_texop_query_levels
:
3077 case nir_texop_texture_samples
:
3082 case nir_texop_txf_ms
:
3084 case nir_texop_txs
: {
3085 LValues
&newDefs
= convert(&insn
->dest
);
3086 std::vector
<Value
*> srcs
;
3087 std::vector
<Value
*> defs
;
3088 std::vector
<nir_src
*> offsets
;
3092 TexInstruction::Target target
= convert(insn
->sampler_dim
, insn
->is_array
, insn
->is_shadow
);
3093 operation op
= getOperation(insn
->op
);
3096 int biasIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_bias
);
3097 int compIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_comparator
);
3098 int coordsIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_coord
);
3099 int ddxIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_ddx
);
3100 int ddyIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_ddy
);
3101 int msIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_ms_index
);
3102 int lodIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_lod
);
3103 int offsetIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_offset
);
3104 int projIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_projector
);
3105 int sampOffIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_sampler_offset
);
3106 int texOffIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_texture_offset
);
3107 int sampHandleIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_sampler_handle
);
3108 int texHandleIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_texture_handle
);
3110 bool bindless
= sampHandleIdx
!= -1 || texHandleIdx
!= -1;
3111 assert((sampHandleIdx
!= -1) == (texHandleIdx
!= -1));
3114 proj
= mkOp1v(OP_RCP
, TYPE_F32
, getScratch(), getSrc(&insn
->src
[projIdx
].src
, 0));
3116 srcs
.resize(insn
->coord_components
);
3117 for (uint8_t i
= 0u; i
< insn
->coord_components
; ++i
)
3118 srcs
[i
] = applyProjection(getSrc(&insn
->src
[coordsIdx
].src
, i
), proj
);
3120 // sometimes we get less args than target.getArgCount, but codegen expects the latter
3121 if (insn
->coord_components
) {
3122 uint32_t argCount
= target
.getArgCount();
3127 for (uint32_t i
= 0u; i
< (argCount
- insn
->coord_components
); ++i
)
3128 srcs
.push_back(getSSA());
3131 if (insn
->op
== nir_texop_texture_samples
)
3132 srcs
.push_back(zero
);
3133 else if (!insn
->num_srcs
)
3134 srcs
.push_back(loadImm(NULL
, 0));
3136 srcs
.push_back(getSrc(&insn
->src
[biasIdx
].src
, 0));
3138 srcs
.push_back(getSrc(&insn
->src
[lodIdx
].src
, 0));
3139 else if (op
== OP_TXF
)
3142 srcs
.push_back(getSrc(&insn
->src
[msIdx
].src
, 0));
3143 if (offsetIdx
!= -1)
3144 offsets
.push_back(&insn
->src
[offsetIdx
].src
);
3146 srcs
.push_back(applyProjection(getSrc(&insn
->src
[compIdx
].src
, 0), proj
));
3147 if (texOffIdx
!= -1) {
3148 srcs
.push_back(getSrc(&insn
->src
[texOffIdx
].src
, 0));
3149 texOffIdx
= srcs
.size() - 1;
3151 if (sampOffIdx
!= -1) {
3152 srcs
.push_back(getSrc(&insn
->src
[sampOffIdx
].src
, 0));
3153 sampOffIdx
= srcs
.size() - 1;
3156 // currently we use the lower bits
3158 Value
*handle
= getSrc(&insn
->src
[sampHandleIdx
].src
, 0);
3160 mkSplit(split
, 4, handle
);
3162 srcs
.push_back(split
[0]);
3163 texOffIdx
= srcs
.size() - 1;
3166 r
= bindless
? 0xff : insn
->texture_index
;
3167 s
= bindless
? 0x1f : insn
->sampler_index
;
3169 defs
.resize(newDefs
.size());
3170 for (uint8_t d
= 0u; d
< newDefs
.size(); ++d
) {
3171 defs
[d
] = newDefs
[d
];
3174 if (target
.isMS() || (op
== OP_TEX
&& prog
->getType() != Program::TYPE_FRAGMENT
))
3177 TexInstruction
*texi
= mkTex(op
, target
.getEnum(), r
, s
, defs
, srcs
);
3178 texi
->tex
.levelZero
= lz
;
3179 texi
->tex
.mask
= mask
;
3180 texi
->tex
.bindless
= bindless
;
3182 if (texOffIdx
!= -1)
3183 texi
->tex
.rIndirectSrc
= texOffIdx
;
3184 if (sampOffIdx
!= -1)
3185 texi
->tex
.sIndirectSrc
= sampOffIdx
;
3189 if (!target
.isShadow())
3190 texi
->tex
.gatherComp
= insn
->component
;
3193 texi
->tex
.query
= TXQ_DIMS
;
3195 case nir_texop_texture_samples
:
3196 texi
->tex
.mask
= 0x4;
3197 texi
->tex
.query
= TXQ_TYPE
;
3199 case nir_texop_query_levels
:
3200 texi
->tex
.mask
= 0x8;
3201 texi
->tex
.query
= TXQ_DIMS
;
3207 texi
->tex
.useOffsets
= offsets
.size();
3208 if (texi
->tex
.useOffsets
) {
3209 for (uint8_t s
= 0; s
< texi
->tex
.useOffsets
; ++s
) {
3210 for (uint32_t c
= 0u; c
< 3; ++c
) {
3211 uint8_t s2
= std::min(c
, target
.getDim() - 1);
3212 texi
->offset
[s
][c
].set(getSrc(offsets
[s
], s2
));
3213 texi
->offset
[s
][c
].setInsn(texi
);
3218 if (op
== OP_TXG
&& offsetIdx
== -1) {
3219 if (nir_tex_instr_has_explicit_tg4_offsets(insn
)) {
3220 texi
->tex
.useOffsets
= 4;
3221 setPosition(texi
, false);
3222 for (uint8_t i
= 0; i
< 4; ++i
) {
3223 for (uint8_t j
= 0; j
< 2; ++j
) {
3224 texi
->offset
[i
][j
].set(loadImm(NULL
, insn
->tg4_offsets
[i
][j
]));
3225 texi
->offset
[i
][j
].setInsn(texi
);
3228 setPosition(texi
, true);
3232 if (ddxIdx
!= -1 && ddyIdx
!= -1) {
3233 for (uint8_t c
= 0u; c
< target
.getDim() + target
.isCube(); ++c
) {
3234 texi
->dPdx
[c
].set(getSrc(&insn
->src
[ddxIdx
].src
, c
));
3235 texi
->dPdy
[c
].set(getSrc(&insn
->src
[ddyIdx
].src
, c
));
3242 ERROR("unknown nir_texop %u\n", insn
->op
);
3249 Converter::visit(nir_deref_instr
*deref
)
3251 // we just ignore those, because images intrinsics are the only place where
3252 // we should end up with deref sources and those have to backtrack anyway
3253 // to get the nir_variable. This code just exists to handle some special
3255 switch (deref
->deref_type
) {
3256 case nir_deref_type_array
:
3257 case nir_deref_type_struct
:
3258 case nir_deref_type_var
:
3261 ERROR("unknown nir_deref_instr %u\n", deref
->deref_type
);
3272 if (prog
->dbgFlags
& NV50_IR_DEBUG_VERBOSE
)
3273 nir_print_shader(nir
, stderr
);
3275 struct nir_lower_subgroups_options subgroup_options
= {
3276 .subgroup_size
= 32,
3277 .ballot_bit_size
= 32,
3280 NIR_PASS_V(nir
, nir_lower_io
, nir_var_all
, type_size
, (nir_lower_io_options
)0);
3281 NIR_PASS_V(nir
, nir_lower_subgroups
, &subgroup_options
);
3282 NIR_PASS_V(nir
, nir_lower_regs_to_ssa
);
3283 NIR_PASS_V(nir
, nir_lower_load_const_to_scalar
);
3284 NIR_PASS_V(nir
, nir_lower_vars_to_ssa
);
3285 NIR_PASS_V(nir
, nir_lower_alu_to_scalar
, NULL
, NULL
);
3286 NIR_PASS_V(nir
, nir_lower_phis_to_scalar
);
3288 /*TODO: improve this lowering/optimisation loop so that we can use
3289 * nir_opt_idiv_const effectively before this.
3291 NIR_PASS(progress
, nir
, nir_lower_idiv
, nir_lower_idiv_precise
);
3295 NIR_PASS(progress
, nir
, nir_copy_prop
);
3296 NIR_PASS(progress
, nir
, nir_opt_remove_phis
);
3297 NIR_PASS(progress
, nir
, nir_opt_trivial_continues
);
3298 NIR_PASS(progress
, nir
, nir_opt_cse
);
3299 NIR_PASS(progress
, nir
, nir_opt_algebraic
);
3300 NIR_PASS(progress
, nir
, nir_opt_constant_folding
);
3301 NIR_PASS(progress
, nir
, nir_copy_prop
);
3302 NIR_PASS(progress
, nir
, nir_opt_dce
);
3303 NIR_PASS(progress
, nir
, nir_opt_dead_cf
);
3306 NIR_PASS_V(nir
, nir_lower_bool_to_int32
);
3307 NIR_PASS_V(nir
, nir_lower_locals_to_regs
);
3308 NIR_PASS_V(nir
, nir_remove_dead_variables
, nir_var_function_temp
, NULL
);
3309 NIR_PASS_V(nir
, nir_convert_from_ssa
, true);
3311 // Garbage collect dead instructions
3315 ERROR("Couldn't prase NIR!\n");
3319 if (!assignSlots()) {
3320 ERROR("Couldn't assign slots!\n");
3324 if (prog
->dbgFlags
& NV50_IR_DEBUG_BASIC
)
3325 nir_print_shader(nir
, stderr
);
3327 nir_foreach_function(function
, nir
) {
3328 if (!visit(function
))
3335 } // unnamed namespace
3340 Program::makeFromNIR(struct nv50_ir_prog_info
*info
)
3342 nir_shader
*nir
= (nir_shader
*)info
->bin
.source
;
3343 Converter
converter(this, nir
, info
);
3344 bool result
= converter
.run();
3347 LoweringHelper lowering
;
3349 tlsSize
= info
->bin
.tlsSpace
;
3353 } // namespace nv50_ir
3355 static nir_shader_compiler_options
3356 nvir_nir_shader_compiler_options(int chipset
)
3359 .lower_fdiv
= false,
3360 .lower_ffma
= false,
3361 .fuse_ffma
= false, /* nir doesn't track mad vs fma */
3362 .lower_flrp16
= false,
3363 .lower_flrp32
= true,
3364 .lower_flrp64
= true,
3365 .lower_fpow
= false,
3366 .lower_fsat
= false,
3367 .lower_fsqrt
= false, // TODO: only before gm200
3368 .lower_sincos
= false,
3370 .lower_bitfield_extract
= false,
3371 .lower_bitfield_extract_to_shifts
= false,
3372 .lower_bitfield_insert
= false,
3373 .lower_bitfield_insert_to_shifts
= false,
3374 .lower_bitfield_insert_to_bitfield_select
= false,
3375 .lower_bitfield_reverse
= false,
3376 .lower_bit_count
= false,
3377 .lower_ifind_msb
= false,
3378 .lower_find_lsb
= false,
3379 .lower_uadd_carry
= true, // TODO
3380 .lower_usub_borrow
= true, // TODO
3381 .lower_mul_high
= false,
3382 .lower_negate
= false,
3384 .lower_scmp
= true, // TODO: not implemented yet
3385 .lower_vector_cmp
= false,
3387 .lower_bitops
= false,
3388 .lower_isign
= false, // TODO
3389 .lower_fsign
= false,
3390 .lower_fdph
= false,
3391 .lower_fdot
= false,
3392 .fdot_replicates
= false, // TODO
3393 .lower_ffloor
= false, // TODO
3394 .lower_ffract
= true,
3395 .lower_fceil
= false, // TODO
3396 .lower_ftrunc
= false,
3397 .lower_ldexp
= true,
3398 .lower_pack_half_2x16
= true,
3399 .lower_pack_unorm_2x16
= true,
3400 .lower_pack_snorm_2x16
= true,
3401 .lower_pack_unorm_4x8
= true,
3402 .lower_pack_snorm_4x8
= true,
3403 .lower_unpack_half_2x16
= true,
3404 .lower_unpack_unorm_2x16
= true,
3405 .lower_unpack_snorm_2x16
= true,
3406 .lower_unpack_unorm_4x8
= true,
3407 .lower_unpack_snorm_4x8
= true,
3408 .lower_pack_split
= false,
3409 .lower_extract_byte
= true,
3410 .lower_extract_word
= true,
3411 .lower_all_io_to_temps
= false,
3412 .lower_all_io_to_elements
= false,
3413 .vertex_id_zero_based
= false,
3414 .lower_base_vertex
= false,
3415 .lower_helper_invocation
= false,
3416 .optimize_sample_mask_in
= false,
3417 .lower_cs_local_index_from_id
= true,
3418 .lower_cs_local_id_from_index
= false,
3419 .lower_device_index_to_zero
= false, // TODO
3420 .lower_wpos_pntc
= false, // TODO
3421 .lower_hadd
= true, // TODO
3422 .lower_add_sat
= true, // TODO
3423 .vectorize_io
= false,
3424 .lower_to_scalar
= true,
3425 .unify_interfaces
= false,
3426 .use_interpolated_input_intrinsics
= true,
3427 .lower_mul_2x32_64
= true, // TODO
3428 .lower_rotate
= true,
3429 .has_imul24
= false,
3430 .intel_vec4
= false,
3431 .max_unroll_iterations
= 32,
3432 .lower_int64_options
= (nir_lower_int64_options
) ( // TODO
3433 nir_lower_divmod64
|
3434 nir_lower_ufind_msb64
3436 .lower_doubles_options
= (nir_lower_doubles_options
) ( // TODO
3442 static const nir_shader_compiler_options gf100_nir_shader_compiler_options
=
3443 nvir_nir_shader_compiler_options(NVISA_GF100_CHIPSET
);
3445 const nir_shader_compiler_options
*
3446 nv50_ir_nir_shader_compiler_options(int chipset
)
3448 return &gf100_nir_shader_compiler_options
;