2 * Copyright 2017 Red Hat Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Karol Herbst <kherbst@redhat.com>
25 #include "compiler/nir/nir.h"
27 #include "util/u_debug.h"
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_from_common.h"
31 #include "codegen/nv50_ir_lowering_helper.h"
32 #include "codegen/nv50_ir_util.h"
34 #if __cplusplus >= 201103L
35 #include <unordered_map>
37 #include <tr1/unordered_map>
44 #if __cplusplus >= 201103L
46 using std::unordered_map
;
49 using std::tr1::unordered_map
;
52 using namespace nv50_ir
;
55 type_size(const struct glsl_type
*type
)
57 return glsl_count_attribute_slots(type
, false);
60 class Converter
: public ConverterCommon
63 Converter(Program
*, nir_shader
*, nv50_ir_prog_info
*);
67 typedef std::vector
<LValue
*> LValues
;
68 typedef unordered_map
<unsigned, LValues
> NirDefMap
;
69 typedef unordered_map
<unsigned, uint32_t> NirArrayLMemOffsets
;
70 typedef unordered_map
<unsigned, BasicBlock
*> NirBlockMap
;
72 TexTarget
convert(glsl_sampler_dim
, bool isArray
, bool isShadow
);
73 LValues
& convert(nir_alu_dest
*);
74 BasicBlock
* convert(nir_block
*);
75 LValues
& convert(nir_dest
*);
76 SVSemantic
convert(nir_intrinsic_op
);
77 LValues
& convert(nir_register
*);
78 LValues
& convert(nir_ssa_def
*);
80 ImgFormat
convertGLImgFormat(GLuint
);
82 Value
* getSrc(nir_alu_src
*, uint8_t component
= 0);
83 Value
* getSrc(nir_register
*, uint8_t);
84 Value
* getSrc(nir_src
*, uint8_t, bool indirect
= false);
85 Value
* getSrc(nir_ssa_def
*, uint8_t);
87 // returned value is the constant part of the given source (either the
88 // nir_src or the selected source component of an intrinsic). Even though
89 // this is mostly an optimization to be able to skip indirects in a few
90 // cases, sometimes we require immediate values or set some fileds on
91 // instructions (e.g. tex) in order for codegen to consume those.
92 // If the found value has not a constant part, the Value gets returned
93 // through the Value parameter.
94 uint32_t getIndirect(nir_src
*, uint8_t, Value
*&);
95 uint32_t getIndirect(nir_intrinsic_instr
*, uint8_t s
, uint8_t c
, Value
*&);
97 uint32_t getSlotAddress(nir_intrinsic_instr
*, uint8_t idx
, uint8_t slot
);
99 void setInterpolate(nv50_ir_varying
*,
104 Instruction
*loadFrom(DataFile
, uint8_t, DataType
, Value
*def
, uint32_t base
,
105 uint8_t c
, Value
*indirect0
= NULL
,
106 Value
*indirect1
= NULL
, bool patch
= false);
107 void storeTo(nir_intrinsic_instr
*, DataFile
, operation
, DataType
,
108 Value
*src
, uint8_t idx
, uint8_t c
, Value
*indirect0
= NULL
,
109 Value
*indirect1
= NULL
);
111 bool isFloatType(nir_alu_type
);
112 bool isSignedType(nir_alu_type
);
113 bool isResultFloat(nir_op
);
114 bool isResultSigned(nir_op
);
116 DataType
getDType(nir_alu_instr
*);
117 DataType
getDType(nir_intrinsic_instr
*);
118 DataType
getDType(nir_intrinsic_instr
*, bool isSigned
);
119 DataType
getDType(nir_op
, uint8_t);
121 std::vector
<DataType
> getSTypes(nir_alu_instr
*);
122 DataType
getSType(nir_src
&, bool isFloat
, bool isSigned
);
124 operation
getOperation(nir_intrinsic_op
);
125 operation
getOperation(nir_op
);
126 operation
getOperation(nir_texop
);
127 operation
preOperationNeeded(nir_op
);
129 int getSubOp(nir_intrinsic_op
);
130 int getSubOp(nir_op
);
132 CondCode
getCondCode(nir_op
);
137 bool visit(nir_alu_instr
*);
138 bool visit(nir_block
*);
139 bool visit(nir_cf_node
*);
140 bool visit(nir_deref_instr
*);
141 bool visit(nir_function
*);
142 bool visit(nir_if
*);
143 bool visit(nir_instr
*);
144 bool visit(nir_intrinsic_instr
*);
145 bool visit(nir_jump_instr
*);
146 bool visit(nir_load_const_instr
*);
147 bool visit(nir_loop
*);
148 bool visit(nir_ssa_undef_instr
*);
149 bool visit(nir_tex_instr
*);
152 Value
* applyProjection(Value
*src
, Value
*proj
);
153 unsigned int getNIRArgCount(TexInstruction::Target
&);
156 uint16_t handleDeref(nir_deref_instr
*, Value
* & indirect
, const nir_variable
* &);
157 CacheMode
getCacheModeFromVar(const nir_variable
*);
163 NirArrayLMemOffsets regToLmemOffset
;
165 unsigned int curLoopDepth
;
170 int clipVertexOutput
;
179 Converter::Converter(Program
*prog
, nir_shader
*nir
, nv50_ir_prog_info
*info
)
180 : ConverterCommon(prog
, info
),
185 zero
= mkImm((uint32_t)0);
189 Converter::convert(nir_block
*block
)
191 NirBlockMap::iterator it
= blocks
.find(block
->index
);
192 if (it
!= blocks
.end())
195 BasicBlock
*bb
= new BasicBlock(func
);
196 blocks
[block
->index
] = bb
;
201 Converter::isFloatType(nir_alu_type type
)
203 return nir_alu_type_get_base_type(type
) == nir_type_float
;
207 Converter::isSignedType(nir_alu_type type
)
209 return nir_alu_type_get_base_type(type
) == nir_type_int
;
213 Converter::isResultFloat(nir_op op
)
215 const nir_op_info
&info
= nir_op_infos
[op
];
216 if (info
.output_type
!= nir_type_invalid
)
217 return isFloatType(info
.output_type
);
219 ERROR("isResultFloat not implemented for %s\n", nir_op_infos
[op
].name
);
225 Converter::isResultSigned(nir_op op
)
228 // there is no umul and we get wrong results if we treat all muls as signed
233 const nir_op_info
&info
= nir_op_infos
[op
];
234 if (info
.output_type
!= nir_type_invalid
)
235 return isSignedType(info
.output_type
);
236 ERROR("isResultSigned not implemented for %s\n", nir_op_infos
[op
].name
);
243 Converter::getDType(nir_alu_instr
*insn
)
245 if (insn
->dest
.dest
.is_ssa
)
246 return getDType(insn
->op
, insn
->dest
.dest
.ssa
.bit_size
);
248 return getDType(insn
->op
, insn
->dest
.dest
.reg
.reg
->bit_size
);
252 Converter::getDType(nir_intrinsic_instr
*insn
)
255 switch (insn
->intrinsic
) {
256 case nir_intrinsic_shared_atomic_imax
:
257 case nir_intrinsic_shared_atomic_imin
:
258 case nir_intrinsic_ssbo_atomic_imax
:
259 case nir_intrinsic_ssbo_atomic_imin
:
267 return getDType(insn
, isSigned
);
271 Converter::getDType(nir_intrinsic_instr
*insn
, bool isSigned
)
273 if (insn
->dest
.is_ssa
)
274 return typeOfSize(insn
->dest
.ssa
.bit_size
/ 8, false, isSigned
);
276 return typeOfSize(insn
->dest
.reg
.reg
->bit_size
/ 8, false, isSigned
);
280 Converter::getDType(nir_op op
, uint8_t bitSize
)
282 DataType ty
= typeOfSize(bitSize
/ 8, isResultFloat(op
), isResultSigned(op
));
283 if (ty
== TYPE_NONE
) {
284 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos
[op
].name
, bitSize
);
290 std::vector
<DataType
>
291 Converter::getSTypes(nir_alu_instr
*insn
)
293 const nir_op_info
&info
= nir_op_infos
[insn
->op
];
294 std::vector
<DataType
> res(info
.num_inputs
);
296 for (uint8_t i
= 0; i
< info
.num_inputs
; ++i
) {
297 if (info
.input_types
[i
] != nir_type_invalid
) {
298 res
[i
] = getSType(insn
->src
[i
].src
, isFloatType(info
.input_types
[i
]), isSignedType(info
.input_types
[i
]));
300 ERROR("getSType not implemented for %s idx %u\n", info
.name
, i
);
311 Converter::getSType(nir_src
&src
, bool isFloat
, bool isSigned
)
315 bitSize
= src
.ssa
->bit_size
;
317 bitSize
= src
.reg
.reg
->bit_size
;
319 DataType ty
= typeOfSize(bitSize
/ 8, isFloat
, isSigned
);
320 if (ty
== TYPE_NONE
) {
328 ERROR("couldn't get Type for %s with bitSize %u\n", str
, bitSize
);
335 Converter::getOperation(nir_op op
)
338 // basic ops with float and int variants
348 case nir_op_ifind_msb
:
349 case nir_op_ufind_msb
:
371 case nir_op_fddx_coarse
:
372 case nir_op_fddx_fine
:
375 case nir_op_fddy_coarse
:
376 case nir_op_fddy_fine
:
394 case nir_op_pack_64_2x32_split
:
408 case nir_op_imul_high
:
409 case nir_op_umul_high
:
457 ERROR("couldn't get operation for op %s\n", nir_op_infos
[op
].name
);
464 Converter::getOperation(nir_texop op
)
476 case nir_texop_txf_ms
:
482 case nir_texop_query_levels
:
483 case nir_texop_texture_samples
:
487 ERROR("couldn't get operation for nir_texop %u\n", op
);
494 Converter::getOperation(nir_intrinsic_op op
)
497 case nir_intrinsic_emit_vertex
:
499 case nir_intrinsic_end_primitive
:
501 case nir_intrinsic_image_deref_atomic_add
:
502 case nir_intrinsic_image_deref_atomic_and
:
503 case nir_intrinsic_image_deref_atomic_comp_swap
:
504 case nir_intrinsic_image_deref_atomic_exchange
:
505 case nir_intrinsic_image_deref_atomic_max
:
506 case nir_intrinsic_image_deref_atomic_min
:
507 case nir_intrinsic_image_deref_atomic_or
:
508 case nir_intrinsic_image_deref_atomic_xor
:
510 case nir_intrinsic_image_deref_load
:
512 case nir_intrinsic_image_deref_samples
:
513 case nir_intrinsic_image_deref_size
:
515 case nir_intrinsic_image_deref_store
:
518 ERROR("couldn't get operation for nir_intrinsic_op %u\n", op
);
525 Converter::preOperationNeeded(nir_op op
)
537 Converter::getSubOp(nir_op op
)
540 case nir_op_imul_high
:
541 case nir_op_umul_high
:
542 return NV50_IR_SUBOP_MUL_HIGH
;
549 Converter::getSubOp(nir_intrinsic_op op
)
552 case nir_intrinsic_image_deref_atomic_add
:
553 case nir_intrinsic_shared_atomic_add
:
554 case nir_intrinsic_ssbo_atomic_add
:
555 return NV50_IR_SUBOP_ATOM_ADD
;
556 case nir_intrinsic_image_deref_atomic_and
:
557 case nir_intrinsic_shared_atomic_and
:
558 case nir_intrinsic_ssbo_atomic_and
:
559 return NV50_IR_SUBOP_ATOM_AND
;
560 case nir_intrinsic_image_deref_atomic_comp_swap
:
561 case nir_intrinsic_shared_atomic_comp_swap
:
562 case nir_intrinsic_ssbo_atomic_comp_swap
:
563 return NV50_IR_SUBOP_ATOM_CAS
;
564 case nir_intrinsic_image_deref_atomic_exchange
:
565 case nir_intrinsic_shared_atomic_exchange
:
566 case nir_intrinsic_ssbo_atomic_exchange
:
567 return NV50_IR_SUBOP_ATOM_EXCH
;
568 case nir_intrinsic_image_deref_atomic_or
:
569 case nir_intrinsic_shared_atomic_or
:
570 case nir_intrinsic_ssbo_atomic_or
:
571 return NV50_IR_SUBOP_ATOM_OR
;
572 case nir_intrinsic_image_deref_atomic_max
:
573 case nir_intrinsic_shared_atomic_imax
:
574 case nir_intrinsic_shared_atomic_umax
:
575 case nir_intrinsic_ssbo_atomic_imax
:
576 case nir_intrinsic_ssbo_atomic_umax
:
577 return NV50_IR_SUBOP_ATOM_MAX
;
578 case nir_intrinsic_image_deref_atomic_min
:
579 case nir_intrinsic_shared_atomic_imin
:
580 case nir_intrinsic_shared_atomic_umin
:
581 case nir_intrinsic_ssbo_atomic_imin
:
582 case nir_intrinsic_ssbo_atomic_umin
:
583 return NV50_IR_SUBOP_ATOM_MIN
;
584 case nir_intrinsic_image_deref_atomic_xor
:
585 case nir_intrinsic_shared_atomic_xor
:
586 case nir_intrinsic_ssbo_atomic_xor
:
587 return NV50_IR_SUBOP_ATOM_XOR
;
588 case nir_intrinsic_vote_all
:
589 return NV50_IR_SUBOP_VOTE_ALL
;
590 case nir_intrinsic_vote_any
:
591 return NV50_IR_SUBOP_VOTE_ANY
;
592 case nir_intrinsic_vote_ieq
:
593 return NV50_IR_SUBOP_VOTE_UNI
;
600 Converter::getCondCode(nir_op op
)
619 ERROR("couldn't get CondCode for op %s\n", nir_op_infos
[op
].name
);
626 Converter::convert(nir_alu_dest
*dest
)
628 return convert(&dest
->dest
);
632 Converter::convert(nir_dest
*dest
)
635 return convert(&dest
->ssa
);
636 if (dest
->reg
.indirect
) {
637 ERROR("no support for indirects.");
640 return convert(dest
->reg
.reg
);
644 Converter::convert(nir_register
*reg
)
646 NirDefMap::iterator it
= regDefs
.find(reg
->index
);
647 if (it
!= regDefs
.end())
650 LValues
newDef(reg
->num_components
);
651 for (uint8_t i
= 0; i
< reg
->num_components
; i
++)
652 newDef
[i
] = getScratch(std::max(4, reg
->bit_size
/ 8));
653 return regDefs
[reg
->index
] = newDef
;
657 Converter::convert(nir_ssa_def
*def
)
659 NirDefMap::iterator it
= ssaDefs
.find(def
->index
);
660 if (it
!= ssaDefs
.end())
663 LValues
newDef(def
->num_components
);
664 for (uint8_t i
= 0; i
< def
->num_components
; i
++)
665 newDef
[i
] = getSSA(std::max(4, def
->bit_size
/ 8));
666 return ssaDefs
[def
->index
] = newDef
;
670 Converter::getSrc(nir_alu_src
*src
, uint8_t component
)
672 if (src
->abs
|| src
->negate
) {
673 ERROR("modifiers currently not supported on nir_alu_src\n");
676 return getSrc(&src
->src
, src
->swizzle
[component
]);
680 Converter::getSrc(nir_register
*reg
, uint8_t idx
)
682 NirDefMap::iterator it
= regDefs
.find(reg
->index
);
683 if (it
== regDefs
.end())
684 return convert(reg
)[idx
];
685 return it
->second
[idx
];
689 Converter::getSrc(nir_src
*src
, uint8_t idx
, bool indirect
)
692 return getSrc(src
->ssa
, idx
);
694 if (src
->reg
.indirect
) {
696 return getSrc(src
->reg
.indirect
, idx
);
697 ERROR("no support for indirects.");
702 return getSrc(src
->reg
.reg
, idx
);
706 Converter::getSrc(nir_ssa_def
*src
, uint8_t idx
)
708 NirDefMap::iterator it
= ssaDefs
.find(src
->index
);
709 if (it
== ssaDefs
.end()) {
710 ERROR("SSA value %u not found\n", src
->index
);
714 return it
->second
[idx
];
718 Converter::getIndirect(nir_src
*src
, uint8_t idx
, Value
*&indirect
)
720 nir_const_value
*offset
= nir_src_as_const_value(*src
);
724 return offset
->u32
[0];
727 indirect
= getSrc(src
, idx
, true);
732 Converter::getIndirect(nir_intrinsic_instr
*insn
, uint8_t s
, uint8_t c
, Value
*&indirect
)
734 int32_t idx
= nir_intrinsic_base(insn
) + getIndirect(&insn
->src
[s
], c
, indirect
);
736 indirect
= mkOp2v(OP_SHL
, TYPE_U32
, getSSA(4, FILE_ADDRESS
), indirect
, loadImm(NULL
, 4));
741 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot
, unsigned *name
, unsigned *index
)
743 assert(name
&& index
);
745 if (slot
>= VERT_ATTRIB_MAX
) {
746 ERROR("invalid varying slot %u\n", slot
);
751 if (slot
>= VERT_ATTRIB_GENERIC0
&&
752 slot
< VERT_ATTRIB_GENERIC0
+ VERT_ATTRIB_GENERIC_MAX
) {
753 *name
= TGSI_SEMANTIC_GENERIC
;
754 *index
= slot
- VERT_ATTRIB_GENERIC0
;
758 if (slot
>= VERT_ATTRIB_TEX0
&&
759 slot
< VERT_ATTRIB_TEX0
+ VERT_ATTRIB_TEX_MAX
) {
760 *name
= TGSI_SEMANTIC_TEXCOORD
;
761 *index
= slot
- VERT_ATTRIB_TEX0
;
766 case VERT_ATTRIB_COLOR0
:
767 *name
= TGSI_SEMANTIC_COLOR
;
770 case VERT_ATTRIB_COLOR1
:
771 *name
= TGSI_SEMANTIC_COLOR
;
774 case VERT_ATTRIB_EDGEFLAG
:
775 *name
= TGSI_SEMANTIC_EDGEFLAG
;
778 case VERT_ATTRIB_FOG
:
779 *name
= TGSI_SEMANTIC_FOG
;
782 case VERT_ATTRIB_NORMAL
:
783 *name
= TGSI_SEMANTIC_NORMAL
;
786 case VERT_ATTRIB_POS
:
787 *name
= TGSI_SEMANTIC_POSITION
;
790 case VERT_ATTRIB_POINT_SIZE
:
791 *name
= TGSI_SEMANTIC_PSIZE
;
795 ERROR("unknown vert attrib slot %u\n", slot
);
802 varying_slot_to_tgsi_semantic(gl_varying_slot slot
, unsigned *name
, unsigned *index
)
804 assert(name
&& index
);
806 if (slot
>= VARYING_SLOT_TESS_MAX
) {
807 ERROR("invalid varying slot %u\n", slot
);
812 if (slot
>= VARYING_SLOT_PATCH0
) {
813 *name
= TGSI_SEMANTIC_PATCH
;
814 *index
= slot
- VARYING_SLOT_PATCH0
;
818 if (slot
>= VARYING_SLOT_VAR0
) {
819 *name
= TGSI_SEMANTIC_GENERIC
;
820 *index
= slot
- VARYING_SLOT_VAR0
;
824 if (slot
>= VARYING_SLOT_TEX0
&& slot
<= VARYING_SLOT_TEX7
) {
825 *name
= TGSI_SEMANTIC_TEXCOORD
;
826 *index
= slot
- VARYING_SLOT_TEX0
;
831 case VARYING_SLOT_BFC0
:
832 *name
= TGSI_SEMANTIC_BCOLOR
;
835 case VARYING_SLOT_BFC1
:
836 *name
= TGSI_SEMANTIC_BCOLOR
;
839 case VARYING_SLOT_CLIP_DIST0
:
840 *name
= TGSI_SEMANTIC_CLIPDIST
;
843 case VARYING_SLOT_CLIP_DIST1
:
844 *name
= TGSI_SEMANTIC_CLIPDIST
;
847 case VARYING_SLOT_CLIP_VERTEX
:
848 *name
= TGSI_SEMANTIC_CLIPVERTEX
;
851 case VARYING_SLOT_COL0
:
852 *name
= TGSI_SEMANTIC_COLOR
;
855 case VARYING_SLOT_COL1
:
856 *name
= TGSI_SEMANTIC_COLOR
;
859 case VARYING_SLOT_EDGE
:
860 *name
= TGSI_SEMANTIC_EDGEFLAG
;
863 case VARYING_SLOT_FACE
:
864 *name
= TGSI_SEMANTIC_FACE
;
867 case VARYING_SLOT_FOGC
:
868 *name
= TGSI_SEMANTIC_FOG
;
871 case VARYING_SLOT_LAYER
:
872 *name
= TGSI_SEMANTIC_LAYER
;
875 case VARYING_SLOT_PNTC
:
876 *name
= TGSI_SEMANTIC_PCOORD
;
879 case VARYING_SLOT_POS
:
880 *name
= TGSI_SEMANTIC_POSITION
;
883 case VARYING_SLOT_PRIMITIVE_ID
:
884 *name
= TGSI_SEMANTIC_PRIMID
;
887 case VARYING_SLOT_PSIZ
:
888 *name
= TGSI_SEMANTIC_PSIZE
;
891 case VARYING_SLOT_TESS_LEVEL_INNER
:
892 *name
= TGSI_SEMANTIC_TESSINNER
;
895 case VARYING_SLOT_TESS_LEVEL_OUTER
:
896 *name
= TGSI_SEMANTIC_TESSOUTER
;
899 case VARYING_SLOT_VIEWPORT
:
900 *name
= TGSI_SEMANTIC_VIEWPORT_INDEX
;
904 ERROR("unknown varying slot %u\n", slot
);
911 frag_result_to_tgsi_semantic(unsigned slot
, unsigned *name
, unsigned *index
)
913 if (slot
>= FRAG_RESULT_DATA0
) {
914 *name
= TGSI_SEMANTIC_COLOR
;
915 *index
= slot
- FRAG_RESULT_COLOR
- 2; // intentional
920 case FRAG_RESULT_COLOR
:
921 *name
= TGSI_SEMANTIC_COLOR
;
924 case FRAG_RESULT_DEPTH
:
925 *name
= TGSI_SEMANTIC_POSITION
;
928 case FRAG_RESULT_SAMPLE_MASK
:
929 *name
= TGSI_SEMANTIC_SAMPLEMASK
;
933 ERROR("unknown frag result slot %u\n", slot
);
939 // copy of _mesa_sysval_to_semantic
941 system_val_to_tgsi_semantic(unsigned val
, unsigned *name
, unsigned *index
)
946 case SYSTEM_VALUE_VERTEX_ID
:
947 *name
= TGSI_SEMANTIC_VERTEXID
;
949 case SYSTEM_VALUE_INSTANCE_ID
:
950 *name
= TGSI_SEMANTIC_INSTANCEID
;
952 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE
:
953 *name
= TGSI_SEMANTIC_VERTEXID_NOBASE
;
955 case SYSTEM_VALUE_BASE_VERTEX
:
956 *name
= TGSI_SEMANTIC_BASEVERTEX
;
958 case SYSTEM_VALUE_BASE_INSTANCE
:
959 *name
= TGSI_SEMANTIC_BASEINSTANCE
;
961 case SYSTEM_VALUE_DRAW_ID
:
962 *name
= TGSI_SEMANTIC_DRAWID
;
966 case SYSTEM_VALUE_INVOCATION_ID
:
967 *name
= TGSI_SEMANTIC_INVOCATIONID
;
971 case SYSTEM_VALUE_FRAG_COORD
:
972 *name
= TGSI_SEMANTIC_POSITION
;
974 case SYSTEM_VALUE_FRONT_FACE
:
975 *name
= TGSI_SEMANTIC_FACE
;
977 case SYSTEM_VALUE_SAMPLE_ID
:
978 *name
= TGSI_SEMANTIC_SAMPLEID
;
980 case SYSTEM_VALUE_SAMPLE_POS
:
981 *name
= TGSI_SEMANTIC_SAMPLEPOS
;
983 case SYSTEM_VALUE_SAMPLE_MASK_IN
:
984 *name
= TGSI_SEMANTIC_SAMPLEMASK
;
986 case SYSTEM_VALUE_HELPER_INVOCATION
:
987 *name
= TGSI_SEMANTIC_HELPER_INVOCATION
;
990 // Tessellation shader
991 case SYSTEM_VALUE_TESS_COORD
:
992 *name
= TGSI_SEMANTIC_TESSCOORD
;
994 case SYSTEM_VALUE_VERTICES_IN
:
995 *name
= TGSI_SEMANTIC_VERTICESIN
;
997 case SYSTEM_VALUE_PRIMITIVE_ID
:
998 *name
= TGSI_SEMANTIC_PRIMID
;
1000 case SYSTEM_VALUE_TESS_LEVEL_OUTER
:
1001 *name
= TGSI_SEMANTIC_TESSOUTER
;
1003 case SYSTEM_VALUE_TESS_LEVEL_INNER
:
1004 *name
= TGSI_SEMANTIC_TESSINNER
;
1008 case SYSTEM_VALUE_LOCAL_INVOCATION_ID
:
1009 *name
= TGSI_SEMANTIC_THREAD_ID
;
1011 case SYSTEM_VALUE_WORK_GROUP_ID
:
1012 *name
= TGSI_SEMANTIC_BLOCK_ID
;
1014 case SYSTEM_VALUE_NUM_WORK_GROUPS
:
1015 *name
= TGSI_SEMANTIC_GRID_SIZE
;
1017 case SYSTEM_VALUE_LOCAL_GROUP_SIZE
:
1018 *name
= TGSI_SEMANTIC_BLOCK_SIZE
;
1021 // ARB_shader_ballot
1022 case SYSTEM_VALUE_SUBGROUP_SIZE
:
1023 *name
= TGSI_SEMANTIC_SUBGROUP_SIZE
;
1025 case SYSTEM_VALUE_SUBGROUP_INVOCATION
:
1026 *name
= TGSI_SEMANTIC_SUBGROUP_INVOCATION
;
1028 case SYSTEM_VALUE_SUBGROUP_EQ_MASK
:
1029 *name
= TGSI_SEMANTIC_SUBGROUP_EQ_MASK
;
1031 case SYSTEM_VALUE_SUBGROUP_GE_MASK
:
1032 *name
= TGSI_SEMANTIC_SUBGROUP_GE_MASK
;
1034 case SYSTEM_VALUE_SUBGROUP_GT_MASK
:
1035 *name
= TGSI_SEMANTIC_SUBGROUP_GT_MASK
;
1037 case SYSTEM_VALUE_SUBGROUP_LE_MASK
:
1038 *name
= TGSI_SEMANTIC_SUBGROUP_LE_MASK
;
1040 case SYSTEM_VALUE_SUBGROUP_LT_MASK
:
1041 *name
= TGSI_SEMANTIC_SUBGROUP_LT_MASK
;
1045 ERROR("unknown system value %u\n", val
);
1052 Converter::setInterpolate(nv50_ir_varying
*var
,
1058 case INTERP_MODE_FLAT
:
1061 case INTERP_MODE_NONE
:
1062 if (semantic
== TGSI_SEMANTIC_COLOR
)
1064 else if (semantic
== TGSI_SEMANTIC_POSITION
)
1067 case INTERP_MODE_NOPERSPECTIVE
:
1070 case INTERP_MODE_SMOOTH
:
1073 var
->centroid
= centroid
;
1077 calcSlots(const glsl_type
*type
, Program::Type stage
, const shader_info
&info
,
1078 bool input
, const nir_variable
*var
)
1080 if (!type
->is_array())
1081 return type
->count_attribute_slots(false);
1085 case Program::TYPE_GEOMETRY
:
1086 slots
= type
->uniform_locations();
1088 slots
/= info
.gs
.vertices_in
;
1090 case Program::TYPE_TESSELLATION_CONTROL
:
1091 case Program::TYPE_TESSELLATION_EVAL
:
1092 // remove first dimension
1093 if (var
->data
.patch
|| (!input
&& stage
== Program::TYPE_TESSELLATION_EVAL
))
1094 slots
= type
->uniform_locations();
1096 slots
= type
->fields
.array
->uniform_locations();
1099 slots
= type
->count_attribute_slots(false);
1106 bool Converter::assignSlots() {
1110 info
->io
.viewportId
= -1;
1111 info
->numInputs
= 0;
1113 // we have to fixup the uniform locations for arrays
1114 unsigned numImages
= 0;
1115 nir_foreach_variable(var
, &nir
->uniforms
) {
1116 const glsl_type
*type
= var
->type
;
1117 if (!type
->without_array()->is_image())
1119 var
->data
.driver_location
= numImages
;
1120 numImages
+= type
->is_array() ? type
->arrays_of_arrays_size() : 1;
1123 nir_foreach_variable(var
, &nir
->inputs
) {
1124 const glsl_type
*type
= var
->type
;
1125 int slot
= var
->data
.location
;
1126 uint16_t slots
= calcSlots(type
, prog
->getType(), nir
->info
, true, var
);
1127 uint32_t comp
= type
->is_array() ? type
->without_array()->component_slots()
1128 : type
->component_slots();
1129 uint32_t frac
= var
->data
.location_frac
;
1130 uint32_t vary
= var
->data
.driver_location
;
1132 if (glsl_base_type_is_64bit(type
->without_array()->base_type
)) {
1137 assert(vary
+ slots
<= PIPE_MAX_SHADER_INPUTS
);
1139 switch(prog
->getType()) {
1140 case Program::TYPE_FRAGMENT
:
1141 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
1142 for (uint16_t i
= 0; i
< slots
; ++i
) {
1143 setInterpolate(&info
->in
[vary
+ i
], var
->data
.interpolation
,
1144 var
->data
.centroid
| var
->data
.sample
, name
);
1147 case Program::TYPE_GEOMETRY
:
1148 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
1150 case Program::TYPE_TESSELLATION_CONTROL
:
1151 case Program::TYPE_TESSELLATION_EVAL
:
1152 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
1153 if (var
->data
.patch
&& name
== TGSI_SEMANTIC_PATCH
)
1154 info
->numPatchConstants
= MAX2(info
->numPatchConstants
, index
+ slots
);
1156 case Program::TYPE_VERTEX
:
1157 vert_attrib_to_tgsi_semantic((gl_vert_attrib
)slot
, &name
, &index
);
1159 case TGSI_SEMANTIC_EDGEFLAG
:
1160 info
->io
.edgeFlagIn
= vary
;
1167 ERROR("unknown shader type %u in assignSlots\n", prog
->getType());
1171 for (uint16_t i
= 0u; i
< slots
; ++i
, ++vary
) {
1172 info
->in
[vary
].id
= vary
;
1173 info
->in
[vary
].patch
= var
->data
.patch
;
1174 info
->in
[vary
].sn
= name
;
1175 info
->in
[vary
].si
= index
+ i
;
1176 if (glsl_base_type_is_64bit(type
->without_array()->base_type
))
1178 info
->in
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) >> 0x4);
1180 info
->in
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) & 0xf);
1182 info
->in
[vary
].mask
|= ((1 << comp
) - 1) << frac
;
1184 info
->numInputs
= std::max
<uint8_t>(info
->numInputs
, vary
);
1187 info
->numOutputs
= 0;
1188 nir_foreach_variable(var
, &nir
->outputs
) {
1189 const glsl_type
*type
= var
->type
;
1190 int slot
= var
->data
.location
;
1191 uint16_t slots
= calcSlots(type
, prog
->getType(), nir
->info
, false, var
);
1192 uint32_t comp
= type
->is_array() ? type
->without_array()->component_slots()
1193 : type
->component_slots();
1194 uint32_t frac
= var
->data
.location_frac
;
1195 uint32_t vary
= var
->data
.driver_location
;
1197 if (glsl_base_type_is_64bit(type
->without_array()->base_type
)) {
1202 assert(vary
< PIPE_MAX_SHADER_OUTPUTS
);
1204 switch(prog
->getType()) {
1205 case Program::TYPE_FRAGMENT
:
1206 frag_result_to_tgsi_semantic((gl_frag_result
)slot
, &name
, &index
);
1208 case TGSI_SEMANTIC_COLOR
:
1209 if (!var
->data
.fb_fetch_output
)
1210 info
->prop
.fp
.numColourResults
++;
1211 info
->prop
.fp
.separateFragData
= true;
1212 // sometimes we get FRAG_RESULT_DATAX with data.index 0
1213 // sometimes we get FRAG_RESULT_DATA0 with data.index X
1214 index
= index
== 0 ? var
->data
.index
: index
;
1216 case TGSI_SEMANTIC_POSITION
:
1217 info
->io
.fragDepth
= vary
;
1218 info
->prop
.fp
.writesDepth
= true;
1220 case TGSI_SEMANTIC_SAMPLEMASK
:
1221 info
->io
.sampleMask
= vary
;
1227 case Program::TYPE_GEOMETRY
:
1228 case Program::TYPE_TESSELLATION_CONTROL
:
1229 case Program::TYPE_TESSELLATION_EVAL
:
1230 case Program::TYPE_VERTEX
:
1231 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
1233 if (var
->data
.patch
&& name
!= TGSI_SEMANTIC_TESSINNER
&&
1234 name
!= TGSI_SEMANTIC_TESSOUTER
)
1235 info
->numPatchConstants
= MAX2(info
->numPatchConstants
, index
+ slots
);
1238 case TGSI_SEMANTIC_CLIPDIST
:
1239 info
->io
.genUserClip
= -1;
1241 case TGSI_SEMANTIC_CLIPVERTEX
:
1242 clipVertexOutput
= vary
;
1244 case TGSI_SEMANTIC_EDGEFLAG
:
1245 info
->io
.edgeFlagOut
= vary
;
1247 case TGSI_SEMANTIC_POSITION
:
1248 if (clipVertexOutput
< 0)
1249 clipVertexOutput
= vary
;
1256 ERROR("unknown shader type %u in assignSlots\n", prog
->getType());
1260 for (uint16_t i
= 0u; i
< slots
; ++i
, ++vary
) {
1261 info
->out
[vary
].id
= vary
;
1262 info
->out
[vary
].patch
= var
->data
.patch
;
1263 info
->out
[vary
].sn
= name
;
1264 info
->out
[vary
].si
= index
+ i
;
1265 if (glsl_base_type_is_64bit(type
->without_array()->base_type
))
1267 info
->out
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) >> 0x4);
1269 info
->out
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) & 0xf);
1271 info
->out
[vary
].mask
|= ((1 << comp
) - 1) << frac
;
1273 if (nir
->info
.outputs_read
& 1ll << slot
)
1274 info
->out
[vary
].oread
= 1;
1276 info
->numOutputs
= std::max
<uint8_t>(info
->numOutputs
, vary
);
1279 info
->numSysVals
= 0;
1280 for (uint8_t i
= 0; i
< 64; ++i
) {
1281 if (!(nir
->info
.system_values_read
& 1ll << i
))
1284 system_val_to_tgsi_semantic(i
, &name
, &index
);
1285 info
->sv
[info
->numSysVals
].sn
= name
;
1286 info
->sv
[info
->numSysVals
].si
= index
;
1287 info
->sv
[info
->numSysVals
].input
= 0; // TODO inferSysValDirection(sn);
1290 case SYSTEM_VALUE_INSTANCE_ID
:
1291 info
->io
.instanceId
= info
->numSysVals
;
1293 case SYSTEM_VALUE_TESS_LEVEL_INNER
:
1294 case SYSTEM_VALUE_TESS_LEVEL_OUTER
:
1295 info
->sv
[info
->numSysVals
].patch
= 1;
1297 case SYSTEM_VALUE_VERTEX_ID
:
1298 info
->io
.vertexId
= info
->numSysVals
;
1304 info
->numSysVals
+= 1;
1307 if (info
->io
.genUserClip
> 0) {
1308 info
->io
.clipDistances
= info
->io
.genUserClip
;
1310 const unsigned int nOut
= (info
->io
.genUserClip
+ 3) / 4;
1312 for (unsigned int n
= 0; n
< nOut
; ++n
) {
1313 unsigned int i
= info
->numOutputs
++;
1314 info
->out
[i
].id
= i
;
1315 info
->out
[i
].sn
= TGSI_SEMANTIC_CLIPDIST
;
1316 info
->out
[i
].si
= n
;
1317 info
->out
[i
].mask
= ((1 << info
->io
.clipDistances
) - 1) >> (n
* 4);
1321 return info
->assignSlots(info
) == 0;
1325 Converter::getSlotAddress(nir_intrinsic_instr
*insn
, uint8_t idx
, uint8_t slot
)
1328 int offset
= nir_intrinsic_component(insn
);
1331 if (nir_intrinsic_infos
[insn
->intrinsic
].has_dest
)
1332 ty
= getDType(insn
);
1334 ty
= getSType(insn
->src
[0], false, false);
1336 switch (insn
->intrinsic
) {
1337 case nir_intrinsic_load_input
:
1338 case nir_intrinsic_load_interpolated_input
:
1339 case nir_intrinsic_load_per_vertex_input
:
1342 case nir_intrinsic_load_output
:
1343 case nir_intrinsic_load_per_vertex_output
:
1344 case nir_intrinsic_store_output
:
1345 case nir_intrinsic_store_per_vertex_output
:
1349 ERROR("unknown intrinsic in getSlotAddress %s",
1350 nir_intrinsic_infos
[insn
->intrinsic
].name
);
1356 if (typeSizeof(ty
) == 8) {
1368 assert(!input
|| idx
< PIPE_MAX_SHADER_INPUTS
);
1369 assert(input
|| idx
< PIPE_MAX_SHADER_OUTPUTS
);
1371 const nv50_ir_varying
*vary
= input
? info
->in
: info
->out
;
1372 return vary
[idx
].slot
[slot
] * 4;
1376 Converter::loadFrom(DataFile file
, uint8_t i
, DataType ty
, Value
*def
,
1377 uint32_t base
, uint8_t c
, Value
*indirect0
,
1378 Value
*indirect1
, bool patch
)
1380 unsigned int tySize
= typeSizeof(ty
);
1383 (file
== FILE_MEMORY_CONST
|| file
== FILE_MEMORY_BUFFER
|| indirect0
)) {
1384 Value
*lo
= getSSA();
1385 Value
*hi
= getSSA();
1388 mkLoad(TYPE_U32
, lo
,
1389 mkSymbol(file
, i
, TYPE_U32
, base
+ c
* tySize
),
1391 loi
->setIndirect(0, 1, indirect1
);
1392 loi
->perPatch
= patch
;
1395 mkLoad(TYPE_U32
, hi
,
1396 mkSymbol(file
, i
, TYPE_U32
, base
+ c
* tySize
+ 4),
1398 hii
->setIndirect(0, 1, indirect1
);
1399 hii
->perPatch
= patch
;
1401 return mkOp2(OP_MERGE
, ty
, def
, lo
, hi
);
1404 mkLoad(ty
, def
, mkSymbol(file
, i
, ty
, base
+ c
* tySize
), indirect0
);
1405 ld
->setIndirect(0, 1, indirect1
);
1406 ld
->perPatch
= patch
;
1412 Converter::storeTo(nir_intrinsic_instr
*insn
, DataFile file
, operation op
,
1413 DataType ty
, Value
*src
, uint8_t idx
, uint8_t c
,
1414 Value
*indirect0
, Value
*indirect1
)
1416 uint8_t size
= typeSizeof(ty
);
1417 uint32_t address
= getSlotAddress(insn
, idx
, c
);
1419 if (size
== 8 && indirect0
) {
1421 mkSplit(split
, 4, src
);
1423 if (op
== OP_EXPORT
) {
1424 split
[0] = mkMov(getSSA(), split
[0], ty
)->getDef(0);
1425 split
[1] = mkMov(getSSA(), split
[1], ty
)->getDef(0);
1428 mkStore(op
, TYPE_U32
, mkSymbol(file
, 0, TYPE_U32
, address
), indirect0
,
1429 split
[0])->perPatch
= info
->out
[idx
].patch
;
1430 mkStore(op
, TYPE_U32
, mkSymbol(file
, 0, TYPE_U32
, address
+ 4), indirect0
,
1431 split
[1])->perPatch
= info
->out
[idx
].patch
;
1433 if (op
== OP_EXPORT
)
1434 src
= mkMov(getSSA(size
), src
, ty
)->getDef(0);
1435 mkStore(op
, ty
, mkSymbol(file
, 0, ty
, address
), indirect0
,
1436 src
)->perPatch
= info
->out
[idx
].patch
;
1441 Converter::parseNIR()
1443 info
->bin
.tlsSpace
= 0;
1444 info
->io
.clipDistances
= nir
->info
.clip_distance_array_size
;
1445 info
->io
.cullDistances
= nir
->info
.cull_distance_array_size
;
1447 switch(prog
->getType()) {
1448 case Program::TYPE_COMPUTE
:
1449 info
->prop
.cp
.numThreads
[0] = nir
->info
.cs
.local_size
[0];
1450 info
->prop
.cp
.numThreads
[1] = nir
->info
.cs
.local_size
[1];
1451 info
->prop
.cp
.numThreads
[2] = nir
->info
.cs
.local_size
[2];
1452 info
->bin
.smemSize
= nir
->info
.cs
.shared_size
;
1454 case Program::TYPE_FRAGMENT
:
1455 info
->prop
.fp
.earlyFragTests
= nir
->info
.fs
.early_fragment_tests
;
1456 info
->prop
.fp
.persampleInvocation
=
1457 (nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_ID
) ||
1458 (nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_POS
);
1459 info
->prop
.fp
.postDepthCoverage
= nir
->info
.fs
.post_depth_coverage
;
1460 info
->prop
.fp
.readsSampleLocations
=
1461 (nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_POS
);
1462 info
->prop
.fp
.usesDiscard
= nir
->info
.fs
.uses_discard
;
1463 info
->prop
.fp
.usesSampleMaskIn
=
1464 !!(nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_MASK_IN
);
1466 case Program::TYPE_GEOMETRY
:
1467 info
->prop
.gp
.inputPrim
= nir
->info
.gs
.input_primitive
;
1468 info
->prop
.gp
.instanceCount
= nir
->info
.gs
.invocations
;
1469 info
->prop
.gp
.maxVertices
= nir
->info
.gs
.vertices_out
;
1470 info
->prop
.gp
.outputPrim
= nir
->info
.gs
.output_primitive
;
1472 case Program::TYPE_TESSELLATION_CONTROL
:
1473 case Program::TYPE_TESSELLATION_EVAL
:
1474 if (nir
->info
.tess
.primitive_mode
== GL_ISOLINES
)
1475 info
->prop
.tp
.domain
= GL_LINES
;
1477 info
->prop
.tp
.domain
= nir
->info
.tess
.primitive_mode
;
1478 info
->prop
.tp
.outputPatchSize
= nir
->info
.tess
.tcs_vertices_out
;
1479 info
->prop
.tp
.outputPrim
=
1480 nir
->info
.tess
.point_mode
? PIPE_PRIM_POINTS
: PIPE_PRIM_TRIANGLES
;
1481 info
->prop
.tp
.partitioning
= (nir
->info
.tess
.spacing
+ 1) % 3;
1482 info
->prop
.tp
.winding
= !nir
->info
.tess
.ccw
;
1484 case Program::TYPE_VERTEX
:
1485 info
->prop
.vp
.usesDrawParameters
=
1486 (nir
->info
.system_values_read
& BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX
)) ||
1487 (nir
->info
.system_values_read
& BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE
)) ||
1488 (nir
->info
.system_values_read
& BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID
));
1498 Converter::visit(nir_function
*function
)
1500 // we only support emiting the main function for now
1501 assert(!strcmp(function
->name
, "main"));
1502 assert(function
->impl
);
1504 // usually the blocks will set everything up, but main is special
1505 BasicBlock
*entry
= new BasicBlock(prog
->main
);
1506 exit
= new BasicBlock(prog
->main
);
1507 blocks
[nir_start_block(function
->impl
)->index
] = entry
;
1508 prog
->main
->setEntry(entry
);
1509 prog
->main
->setExit(exit
);
1511 setPosition(entry
, true);
1513 if (info
->io
.genUserClip
> 0) {
1514 for (int c
= 0; c
< 4; ++c
)
1515 clipVtx
[c
] = getScratch();
1518 switch (prog
->getType()) {
1519 case Program::TYPE_TESSELLATION_CONTROL
:
1521 OP_SUB
, TYPE_U32
, getSSA(),
1522 mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_LANEID
, 0)),
1523 mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_INVOCATION_ID
, 0)));
1525 case Program::TYPE_FRAGMENT
: {
1526 Symbol
*sv
= mkSysVal(SV_POSITION
, 3);
1527 fragCoord
[3] = mkOp1v(OP_RDSV
, TYPE_F32
, getSSA(), sv
);
1528 fp
.position
= mkOp1v(OP_RCP
, TYPE_F32
, fragCoord
[3], fragCoord
[3]);
1535 nir_foreach_register(reg
, &function
->impl
->registers
) {
1536 if (reg
->num_array_elems
) {
1537 // TODO: packed variables would be nice, but MemoryOpt fails
1538 // replace 4 with reg->num_components
1539 uint32_t size
= 4 * reg
->num_array_elems
* (reg
->bit_size
/ 8);
1540 regToLmemOffset
[reg
->index
] = info
->bin
.tlsSpace
;
1541 info
->bin
.tlsSpace
+= size
;
1545 nir_index_ssa_defs(function
->impl
);
1546 foreach_list_typed(nir_cf_node
, node
, node
, &function
->impl
->body
) {
1551 bb
->cfg
.attach(&exit
->cfg
, Graph::Edge::TREE
);
1552 setPosition(exit
, true);
1554 if (info
->io
.genUserClip
> 0)
1555 handleUserClipPlanes();
1557 // TODO: for non main function this needs to be a OP_RETURN
1558 mkOp(OP_EXIT
, TYPE_NONE
, NULL
)->terminator
= 1;
1563 Converter::visit(nir_cf_node
*node
)
1565 switch (node
->type
) {
1566 case nir_cf_node_block
:
1567 return visit(nir_cf_node_as_block(node
));
1568 case nir_cf_node_if
:
1569 return visit(nir_cf_node_as_if(node
));
1570 case nir_cf_node_loop
:
1571 return visit(nir_cf_node_as_loop(node
));
1573 ERROR("unknown nir_cf_node type %u\n", node
->type
);
1579 Converter::visit(nir_block
*block
)
1581 if (!block
->predecessors
->entries
&& block
->instr_list
.is_empty())
1584 BasicBlock
*bb
= convert(block
);
1586 setPosition(bb
, true);
1587 nir_foreach_instr(insn
, block
) {
1595 Converter::visit(nir_if
*nif
)
1597 DataType sType
= getSType(nif
->condition
, false, false);
1598 Value
*src
= getSrc(&nif
->condition
, 0);
1600 nir_block
*lastThen
= nir_if_last_then_block(nif
);
1601 nir_block
*lastElse
= nir_if_last_else_block(nif
);
1603 assert(!lastThen
->successors
[1]);
1604 assert(!lastElse
->successors
[1]);
1606 BasicBlock
*ifBB
= convert(nir_if_first_then_block(nif
));
1607 BasicBlock
*elseBB
= convert(nir_if_first_else_block(nif
));
1609 bb
->cfg
.attach(&ifBB
->cfg
, Graph::Edge::TREE
);
1610 bb
->cfg
.attach(&elseBB
->cfg
, Graph::Edge::TREE
);
1612 // we only insert joinats, if both nodes end up at the end of the if again.
1613 // the reason for this to not happens are breaks/continues/ret/... which
1614 // have their own handling
1615 if (lastThen
->successors
[0] == lastElse
->successors
[0])
1616 bb
->joinAt
= mkFlow(OP_JOINAT
, convert(lastThen
->successors
[0]),
1619 mkFlow(OP_BRA
, elseBB
, CC_EQ
, src
)->setType(sType
);
1621 foreach_list_typed(nir_cf_node
, node
, node
, &nif
->then_list
) {
1625 setPosition(convert(lastThen
), true);
1626 if (!bb
->getExit() ||
1627 !bb
->getExit()->asFlow() ||
1628 bb
->getExit()->asFlow()->op
== OP_JOIN
) {
1629 BasicBlock
*tailBB
= convert(lastThen
->successors
[0]);
1630 mkFlow(OP_BRA
, tailBB
, CC_ALWAYS
, NULL
);
1631 bb
->cfg
.attach(&tailBB
->cfg
, Graph::Edge::FORWARD
);
1634 foreach_list_typed(nir_cf_node
, node
, node
, &nif
->else_list
) {
1638 setPosition(convert(lastElse
), true);
1639 if (!bb
->getExit() ||
1640 !bb
->getExit()->asFlow() ||
1641 bb
->getExit()->asFlow()->op
== OP_JOIN
) {
1642 BasicBlock
*tailBB
= convert(lastElse
->successors
[0]);
1643 mkFlow(OP_BRA
, tailBB
, CC_ALWAYS
, NULL
);
1644 bb
->cfg
.attach(&tailBB
->cfg
, Graph::Edge::FORWARD
);
1647 if (lastThen
->successors
[0] == lastElse
->successors
[0]) {
1648 setPosition(convert(lastThen
->successors
[0]), true);
1649 mkFlow(OP_JOIN
, NULL
, CC_ALWAYS
, NULL
)->fixed
= 1;
1656 Converter::visit(nir_loop
*loop
)
1659 func
->loopNestingBound
= std::max(func
->loopNestingBound
, curLoopDepth
);
1661 BasicBlock
*loopBB
= convert(nir_loop_first_block(loop
));
1662 BasicBlock
*tailBB
=
1663 convert(nir_cf_node_as_block(nir_cf_node_next(&loop
->cf_node
)));
1664 bb
->cfg
.attach(&loopBB
->cfg
, Graph::Edge::TREE
);
1666 mkFlow(OP_PREBREAK
, tailBB
, CC_ALWAYS
, NULL
);
1667 setPosition(loopBB
, false);
1668 mkFlow(OP_PRECONT
, loopBB
, CC_ALWAYS
, NULL
);
1670 foreach_list_typed(nir_cf_node
, node
, node
, &loop
->body
) {
1674 Instruction
*insn
= bb
->getExit();
1675 if (bb
->cfg
.incidentCount() != 0) {
1676 if (!insn
|| !insn
->asFlow()) {
1677 mkFlow(OP_CONT
, loopBB
, CC_ALWAYS
, NULL
);
1678 bb
->cfg
.attach(&loopBB
->cfg
, Graph::Edge::BACK
);
1679 } else if (insn
&& insn
->op
== OP_BRA
&& !insn
->getPredicate() &&
1680 tailBB
->cfg
.incidentCount() == 0) {
1681 // RA doesn't like having blocks around with no incident edge,
1682 // so we create a fake one to make it happy
1683 bb
->cfg
.attach(&tailBB
->cfg
, Graph::Edge::TREE
);
1693 Converter::visit(nir_instr
*insn
)
1695 switch (insn
->type
) {
1696 case nir_instr_type_alu
:
1697 return visit(nir_instr_as_alu(insn
));
1698 case nir_instr_type_deref
:
1699 return visit(nir_instr_as_deref(insn
));
1700 case nir_instr_type_intrinsic
:
1701 return visit(nir_instr_as_intrinsic(insn
));
1702 case nir_instr_type_jump
:
1703 return visit(nir_instr_as_jump(insn
));
1704 case nir_instr_type_load_const
:
1705 return visit(nir_instr_as_load_const(insn
));
1706 case nir_instr_type_ssa_undef
:
1707 return visit(nir_instr_as_ssa_undef(insn
));
1708 case nir_instr_type_tex
:
1709 return visit(nir_instr_as_tex(insn
));
1711 ERROR("unknown nir_instr type %u\n", insn
->type
);
1718 Converter::convert(nir_intrinsic_op intr
)
1721 case nir_intrinsic_load_base_vertex
:
1722 return SV_BASEVERTEX
;
1723 case nir_intrinsic_load_base_instance
:
1724 return SV_BASEINSTANCE
;
1725 case nir_intrinsic_load_draw_id
:
1727 case nir_intrinsic_load_front_face
:
1729 case nir_intrinsic_load_helper_invocation
:
1730 return SV_THREAD_KILL
;
1731 case nir_intrinsic_load_instance_id
:
1732 return SV_INSTANCE_ID
;
1733 case nir_intrinsic_load_invocation_id
:
1734 return SV_INVOCATION_ID
;
1735 case nir_intrinsic_load_local_group_size
:
1737 case nir_intrinsic_load_local_invocation_id
:
1739 case nir_intrinsic_load_num_work_groups
:
1741 case nir_intrinsic_load_patch_vertices_in
:
1742 return SV_VERTEX_COUNT
;
1743 case nir_intrinsic_load_primitive_id
:
1744 return SV_PRIMITIVE_ID
;
1745 case nir_intrinsic_load_sample_id
:
1746 return SV_SAMPLE_INDEX
;
1747 case nir_intrinsic_load_sample_mask_in
:
1748 return SV_SAMPLE_MASK
;
1749 case nir_intrinsic_load_sample_pos
:
1750 return SV_SAMPLE_POS
;
1751 case nir_intrinsic_load_subgroup_eq_mask
:
1752 return SV_LANEMASK_EQ
;
1753 case nir_intrinsic_load_subgroup_ge_mask
:
1754 return SV_LANEMASK_GE
;
1755 case nir_intrinsic_load_subgroup_gt_mask
:
1756 return SV_LANEMASK_GT
;
1757 case nir_intrinsic_load_subgroup_le_mask
:
1758 return SV_LANEMASK_LE
;
1759 case nir_intrinsic_load_subgroup_lt_mask
:
1760 return SV_LANEMASK_LT
;
1761 case nir_intrinsic_load_subgroup_invocation
:
1763 case nir_intrinsic_load_tess_coord
:
1764 return SV_TESS_COORD
;
1765 case nir_intrinsic_load_tess_level_inner
:
1766 return SV_TESS_INNER
;
1767 case nir_intrinsic_load_tess_level_outer
:
1768 return SV_TESS_OUTER
;
1769 case nir_intrinsic_load_vertex_id
:
1770 return SV_VERTEX_ID
;
1771 case nir_intrinsic_load_work_group_id
:
1774 ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1775 nir_intrinsic_infos
[intr
].name
);
1782 Converter::convertGLImgFormat(GLuint format
)
1784 #define FMT_CASE(a, b) \
1785 case GL_ ## a: return nv50_ir::FMT_ ## b
1788 FMT_CASE(NONE
, NONE
);
1790 FMT_CASE(RGBA32F
, RGBA32F
);
1791 FMT_CASE(RGBA16F
, RGBA16F
);
1792 FMT_CASE(RG32F
, RG32F
);
1793 FMT_CASE(RG16F
, RG16F
);
1794 FMT_CASE(R11F_G11F_B10F
, R11G11B10F
);
1795 FMT_CASE(R32F
, R32F
);
1796 FMT_CASE(R16F
, R16F
);
1798 FMT_CASE(RGBA32UI
, RGBA32UI
);
1799 FMT_CASE(RGBA16UI
, RGBA16UI
);
1800 FMT_CASE(RGB10_A2UI
, RGB10A2UI
);
1801 FMT_CASE(RGBA8UI
, RGBA8UI
);
1802 FMT_CASE(RG32UI
, RG32UI
);
1803 FMT_CASE(RG16UI
, RG16UI
);
1804 FMT_CASE(RG8UI
, RG8UI
);
1805 FMT_CASE(R32UI
, R32UI
);
1806 FMT_CASE(R16UI
, R16UI
);
1807 FMT_CASE(R8UI
, R8UI
);
1809 FMT_CASE(RGBA32I
, RGBA32I
);
1810 FMT_CASE(RGBA16I
, RGBA16I
);
1811 FMT_CASE(RGBA8I
, RGBA8I
);
1812 FMT_CASE(RG32I
, RG32I
);
1813 FMT_CASE(RG16I
, RG16I
);
1814 FMT_CASE(RG8I
, RG8I
);
1815 FMT_CASE(R32I
, R32I
);
1816 FMT_CASE(R16I
, R16I
);
1819 FMT_CASE(RGBA16
, RGBA16
);
1820 FMT_CASE(RGB10_A2
, RGB10A2
);
1821 FMT_CASE(RGBA8
, RGBA8
);
1822 FMT_CASE(RG16
, RG16
);
1827 FMT_CASE(RGBA16_SNORM
, RGBA16_SNORM
);
1828 FMT_CASE(RGBA8_SNORM
, RGBA8_SNORM
);
1829 FMT_CASE(RG16_SNORM
, RG16_SNORM
);
1830 FMT_CASE(RG8_SNORM
, RG8_SNORM
);
1831 FMT_CASE(R16_SNORM
, R16_SNORM
);
1832 FMT_CASE(R8_SNORM
, R8_SNORM
);
1834 FMT_CASE(BGRA_INTEGER
, BGRA8
);
1836 ERROR("unknown format %x\n", format
);
1838 return nv50_ir::FMT_NONE
;
1844 Converter::visit(nir_intrinsic_instr
*insn
)
1846 nir_intrinsic_op op
= insn
->intrinsic
;
1847 const nir_intrinsic_info
&opInfo
= nir_intrinsic_infos
[op
];
1850 case nir_intrinsic_load_uniform
: {
1851 LValues
&newDefs
= convert(&insn
->dest
);
1852 const DataType dType
= getDType(insn
);
1854 uint32_t coffset
= getIndirect(insn
, 0, 0, indirect
);
1855 for (uint8_t i
= 0; i
< insn
->num_components
; ++i
) {
1856 loadFrom(FILE_MEMORY_CONST
, 0, dType
, newDefs
[i
], 16 * coffset
, i
, indirect
);
1860 case nir_intrinsic_store_output
:
1861 case nir_intrinsic_store_per_vertex_output
: {
1863 DataType dType
= getSType(insn
->src
[0], false, false);
1864 uint32_t idx
= getIndirect(insn
, op
== nir_intrinsic_store_output
? 1 : 2, 0, indirect
);
1866 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
1867 if (!((1u << i
) & nir_intrinsic_write_mask(insn
)))
1871 Value
*src
= getSrc(&insn
->src
[0], i
);
1872 switch (prog
->getType()) {
1873 case Program::TYPE_FRAGMENT
: {
1874 if (info
->out
[idx
].sn
== TGSI_SEMANTIC_POSITION
) {
1875 // TGSI uses a different interface than NIR, TGSI stores that
1876 // value in the z component, NIR in X
1878 src
= mkOp1v(OP_SAT
, TYPE_F32
, getScratch(), src
);
1882 case Program::TYPE_VERTEX
: {
1883 if (info
->io
.genUserClip
> 0 && idx
== clipVertexOutput
) {
1884 mkMov(clipVtx
[i
], src
);
1893 storeTo(insn
, FILE_SHADER_OUTPUT
, OP_EXPORT
, dType
, src
, idx
, i
+ offset
, indirect
);
1897 case nir_intrinsic_load_input
:
1898 case nir_intrinsic_load_interpolated_input
:
1899 case nir_intrinsic_load_output
: {
1900 LValues
&newDefs
= convert(&insn
->dest
);
1903 if (prog
->getType() == Program::TYPE_FRAGMENT
&&
1904 op
== nir_intrinsic_load_output
) {
1905 std::vector
<Value
*> defs
, srcs
;
1908 srcs
.push_back(getSSA());
1909 srcs
.push_back(getSSA());
1910 Value
*x
= mkOp1v(OP_RDSV
, TYPE_F32
, getSSA(), mkSysVal(SV_POSITION
, 0));
1911 Value
*y
= mkOp1v(OP_RDSV
, TYPE_F32
, getSSA(), mkSysVal(SV_POSITION
, 1));
1912 mkCvt(OP_CVT
, TYPE_U32
, srcs
[0], TYPE_F32
, x
)->rnd
= ROUND_Z
;
1913 mkCvt(OP_CVT
, TYPE_U32
, srcs
[1], TYPE_F32
, y
)->rnd
= ROUND_Z
;
1915 srcs
.push_back(mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_LAYER
, 0)));
1916 srcs
.push_back(mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_SAMPLE_INDEX
, 0)));
1918 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
1919 defs
.push_back(newDefs
[i
]);
1923 TexInstruction
*texi
= mkTex(OP_TXF
, TEX_TARGET_2D_MS_ARRAY
, 0, 0, defs
, srcs
);
1924 texi
->tex
.levelZero
= 1;
1925 texi
->tex
.mask
= mask
;
1926 texi
->tex
.useOffsets
= 0;
1927 texi
->tex
.r
= 0xffff;
1928 texi
->tex
.s
= 0xffff;
1930 info
->prop
.fp
.readsFramebuffer
= true;
1934 const DataType dType
= getDType(insn
);
1936 bool input
= op
!= nir_intrinsic_load_output
;
1940 uint32_t idx
= getIndirect(insn
, op
== nir_intrinsic_load_interpolated_input
? 1 : 0, 0, indirect
);
1941 nv50_ir_varying
& vary
= input
? info
->in
[idx
] : info
->out
[idx
];
1943 // see load_barycentric_* handling
1944 if (prog
->getType() == Program::TYPE_FRAGMENT
) {
1945 mode
= translateInterpMode(&vary
, nvirOp
);
1946 if (op
== nir_intrinsic_load_interpolated_input
) {
1947 ImmediateValue immMode
;
1948 if (getSrc(&insn
->src
[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode
))
1949 mode
|= immMode
.reg
.data
.u32
;
1953 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
1954 uint32_t address
= getSlotAddress(insn
, idx
, i
);
1955 Symbol
*sym
= mkSymbol(input
? FILE_SHADER_INPUT
: FILE_SHADER_OUTPUT
, 0, dType
, address
);
1956 if (prog
->getType() == Program::TYPE_FRAGMENT
) {
1958 if (typeSizeof(dType
) == 8) {
1959 Value
*lo
= getSSA();
1960 Value
*hi
= getSSA();
1961 Instruction
*interp
;
1963 interp
= mkOp1(nvirOp
, TYPE_U32
, lo
, sym
);
1964 if (nvirOp
== OP_PINTERP
)
1965 interp
->setSrc(s
++, fp
.position
);
1966 if (mode
& NV50_IR_INTERP_OFFSET
)
1967 interp
->setSrc(s
++, getSrc(&insn
->src
[0], 0));
1968 interp
->setInterpolate(mode
);
1969 interp
->setIndirect(0, 0, indirect
);
1971 Symbol
*sym1
= mkSymbol(input
? FILE_SHADER_INPUT
: FILE_SHADER_OUTPUT
, 0, dType
, address
+ 4);
1972 interp
= mkOp1(nvirOp
, TYPE_U32
, hi
, sym1
);
1973 if (nvirOp
== OP_PINTERP
)
1974 interp
->setSrc(s
++, fp
.position
);
1975 if (mode
& NV50_IR_INTERP_OFFSET
)
1976 interp
->setSrc(s
++, getSrc(&insn
->src
[0], 0));
1977 interp
->setInterpolate(mode
);
1978 interp
->setIndirect(0, 0, indirect
);
1980 mkOp2(OP_MERGE
, dType
, newDefs
[i
], lo
, hi
);
1982 Instruction
*interp
= mkOp1(nvirOp
, dType
, newDefs
[i
], sym
);
1983 if (nvirOp
== OP_PINTERP
)
1984 interp
->setSrc(s
++, fp
.position
);
1985 if (mode
& NV50_IR_INTERP_OFFSET
)
1986 interp
->setSrc(s
++, getSrc(&insn
->src
[0], 0));
1987 interp
->setInterpolate(mode
);
1988 interp
->setIndirect(0, 0, indirect
);
1991 mkLoad(dType
, newDefs
[i
], sym
, indirect
)->perPatch
= vary
.patch
;
1996 case nir_intrinsic_load_barycentric_at_offset
:
1997 case nir_intrinsic_load_barycentric_at_sample
:
1998 case nir_intrinsic_load_barycentric_centroid
:
1999 case nir_intrinsic_load_barycentric_pixel
:
2000 case nir_intrinsic_load_barycentric_sample
: {
2001 LValues
&newDefs
= convert(&insn
->dest
);
2004 if (op
== nir_intrinsic_load_barycentric_centroid
||
2005 op
== nir_intrinsic_load_barycentric_sample
) {
2006 mode
= NV50_IR_INTERP_CENTROID
;
2007 } else if (op
== nir_intrinsic_load_barycentric_at_offset
) {
2009 for (uint8_t c
= 0; c
< 2; c
++) {
2010 offs
[c
] = getScratch();
2011 mkOp2(OP_MIN
, TYPE_F32
, offs
[c
], getSrc(&insn
->src
[0], c
), loadImm(NULL
, 0.4375f
));
2012 mkOp2(OP_MAX
, TYPE_F32
, offs
[c
], offs
[c
], loadImm(NULL
, -0.5f
));
2013 mkOp2(OP_MUL
, TYPE_F32
, offs
[c
], offs
[c
], loadImm(NULL
, 4096.0f
));
2014 mkCvt(OP_CVT
, TYPE_S32
, offs
[c
], TYPE_F32
, offs
[c
]);
2016 mkOp3v(OP_INSBF
, TYPE_U32
, newDefs
[0], offs
[1], mkImm(0x1010), offs
[0]);
2018 mode
= NV50_IR_INTERP_OFFSET
;
2019 } else if (op
== nir_intrinsic_load_barycentric_pixel
) {
2020 mode
= NV50_IR_INTERP_DEFAULT
;
2021 } else if (op
== nir_intrinsic_load_barycentric_at_sample
) {
2022 info
->prop
.fp
.readsSampleLocations
= true;
2023 mkOp1(OP_PIXLD
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0], 0))->subOp
= NV50_IR_SUBOP_PIXLD_OFFSET
;
2024 mode
= NV50_IR_INTERP_OFFSET
;
2026 unreachable("all intrinsics already handled above");
2029 loadImm(newDefs
[1], mode
);
2032 case nir_intrinsic_discard
:
2033 mkOp(OP_DISCARD
, TYPE_NONE
, NULL
);
2035 case nir_intrinsic_discard_if
: {
2036 Value
*pred
= getSSA(1, FILE_PREDICATE
);
2037 if (insn
->num_components
> 1) {
2038 ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
2042 mkCmp(OP_SET
, CC_NE
, TYPE_U8
, pred
, TYPE_U32
, getSrc(&insn
->src
[0], 0), zero
);
2043 mkOp(OP_DISCARD
, TYPE_NONE
, NULL
)->setPredicate(CC_P
, pred
);
2046 case nir_intrinsic_load_base_vertex
:
2047 case nir_intrinsic_load_base_instance
:
2048 case nir_intrinsic_load_draw_id
:
2049 case nir_intrinsic_load_front_face
:
2050 case nir_intrinsic_load_helper_invocation
:
2051 case nir_intrinsic_load_instance_id
:
2052 case nir_intrinsic_load_invocation_id
:
2053 case nir_intrinsic_load_local_group_size
:
2054 case nir_intrinsic_load_local_invocation_id
:
2055 case nir_intrinsic_load_num_work_groups
:
2056 case nir_intrinsic_load_patch_vertices_in
:
2057 case nir_intrinsic_load_primitive_id
:
2058 case nir_intrinsic_load_sample_id
:
2059 case nir_intrinsic_load_sample_mask_in
:
2060 case nir_intrinsic_load_sample_pos
:
2061 case nir_intrinsic_load_subgroup_eq_mask
:
2062 case nir_intrinsic_load_subgroup_ge_mask
:
2063 case nir_intrinsic_load_subgroup_gt_mask
:
2064 case nir_intrinsic_load_subgroup_le_mask
:
2065 case nir_intrinsic_load_subgroup_lt_mask
:
2066 case nir_intrinsic_load_subgroup_invocation
:
2067 case nir_intrinsic_load_tess_coord
:
2068 case nir_intrinsic_load_tess_level_inner
:
2069 case nir_intrinsic_load_tess_level_outer
:
2070 case nir_intrinsic_load_vertex_id
:
2071 case nir_intrinsic_load_work_group_id
: {
2072 const DataType dType
= getDType(insn
);
2073 SVSemantic sv
= convert(op
);
2074 LValues
&newDefs
= convert(&insn
->dest
);
2076 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
2078 if (typeSizeof(dType
) == 8)
2083 if (sv
== SV_TID
&& info
->prop
.cp
.numThreads
[i
] == 1) {
2086 Symbol
*sym
= mkSysVal(sv
, i
);
2087 Instruction
*rdsv
= mkOp1(OP_RDSV
, TYPE_U32
, def
, sym
);
2088 if (sv
== SV_TESS_OUTER
|| sv
== SV_TESS_INNER
)
2092 if (typeSizeof(dType
) == 8)
2093 mkOp2(OP_MERGE
, dType
, newDefs
[i
], def
, loadImm(getSSA(), 0u));
2098 case nir_intrinsic_load_subgroup_size
: {
2099 LValues
&newDefs
= convert(&insn
->dest
);
2100 loadImm(newDefs
[0], 32u);
2103 case nir_intrinsic_vote_all
:
2104 case nir_intrinsic_vote_any
:
2105 case nir_intrinsic_vote_ieq
: {
2106 LValues
&newDefs
= convert(&insn
->dest
);
2107 Value
*pred
= getScratch(1, FILE_PREDICATE
);
2108 mkCmp(OP_SET
, CC_NE
, TYPE_U32
, pred
, TYPE_U32
, getSrc(&insn
->src
[0], 0), zero
);
2109 mkOp1(OP_VOTE
, TYPE_U32
, pred
, pred
)->subOp
= getSubOp(op
);
2110 mkCvt(OP_CVT
, TYPE_U32
, newDefs
[0], TYPE_U8
, pred
);
2113 case nir_intrinsic_ballot
: {
2114 LValues
&newDefs
= convert(&insn
->dest
);
2115 Value
*pred
= getSSA(1, FILE_PREDICATE
);
2116 mkCmp(OP_SET
, CC_NE
, TYPE_U32
, pred
, TYPE_U32
, getSrc(&insn
->src
[0], 0), zero
);
2117 mkOp1(OP_VOTE
, TYPE_U32
, newDefs
[0], pred
)->subOp
= NV50_IR_SUBOP_VOTE_ANY
;
2120 case nir_intrinsic_read_first_invocation
:
2121 case nir_intrinsic_read_invocation
: {
2122 LValues
&newDefs
= convert(&insn
->dest
);
2123 const DataType dType
= getDType(insn
);
2124 Value
*tmp
= getScratch();
2126 if (op
== nir_intrinsic_read_first_invocation
) {
2127 mkOp1(OP_VOTE
, TYPE_U32
, tmp
, mkImm(1))->subOp
= NV50_IR_SUBOP_VOTE_ANY
;
2128 mkOp2(OP_EXTBF
, TYPE_U32
, tmp
, tmp
, mkImm(0x2000))->subOp
= NV50_IR_SUBOP_EXTBF_REV
;
2129 mkOp1(OP_BFIND
, TYPE_U32
, tmp
, tmp
)->subOp
= NV50_IR_SUBOP_BFIND_SAMT
;
2131 tmp
= getSrc(&insn
->src
[1], 0);
2133 for (uint8_t i
= 0; i
< insn
->num_components
; ++i
) {
2134 mkOp3(OP_SHFL
, dType
, newDefs
[i
], getSrc(&insn
->src
[0], i
), tmp
, mkImm(0x1f))
2135 ->subOp
= NV50_IR_SUBOP_SHFL_IDX
;
2139 case nir_intrinsic_load_per_vertex_input
: {
2140 const DataType dType
= getDType(insn
);
2141 LValues
&newDefs
= convert(&insn
->dest
);
2142 Value
*indirectVertex
;
2143 Value
*indirectOffset
;
2144 uint32_t baseVertex
= getIndirect(&insn
->src
[0], 0, indirectVertex
);
2145 uint32_t idx
= getIndirect(insn
, 1, 0, indirectOffset
);
2147 Value
*vtxBase
= mkOp2v(OP_PFETCH
, TYPE_U32
, getSSA(4, FILE_ADDRESS
),
2148 mkImm(baseVertex
), indirectVertex
);
2149 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
2150 uint32_t address
= getSlotAddress(insn
, idx
, i
);
2151 loadFrom(FILE_SHADER_INPUT
, 0, dType
, newDefs
[i
], address
, 0,
2152 indirectOffset
, vtxBase
, info
->in
[idx
].patch
);
2156 case nir_intrinsic_emit_vertex
:
2157 case nir_intrinsic_end_primitive
: {
2158 uint32_t idx
= nir_intrinsic_stream_id(insn
);
2159 mkOp1(getOperation(op
), TYPE_U32
, NULL
, mkImm(idx
))->fixed
= 1;
2162 case nir_intrinsic_load_ubo
: {
2163 const DataType dType
= getDType(insn
);
2164 LValues
&newDefs
= convert(&insn
->dest
);
2165 Value
*indirectIndex
;
2166 Value
*indirectOffset
;
2167 uint32_t index
= getIndirect(&insn
->src
[0], 0, indirectIndex
) + 1;
2168 uint32_t offset
= getIndirect(&insn
->src
[1], 0, indirectOffset
);
2170 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
2171 loadFrom(FILE_MEMORY_CONST
, index
, dType
, newDefs
[i
], offset
, i
,
2172 indirectOffset
, indirectIndex
);
2176 case nir_intrinsic_get_buffer_size
: {
2177 LValues
&newDefs
= convert(&insn
->dest
);
2178 const DataType dType
= getDType(insn
);
2179 Value
*indirectBuffer
;
2180 uint32_t buffer
= getIndirect(&insn
->src
[0], 0, indirectBuffer
);
2182 Symbol
*sym
= mkSymbol(FILE_MEMORY_BUFFER
, buffer
, dType
, 0);
2183 mkOp1(OP_BUFQ
, dType
, newDefs
[0], sym
)->setIndirect(0, 0, indirectBuffer
);
2186 case nir_intrinsic_store_ssbo
: {
2187 DataType sType
= getSType(insn
->src
[0], false, false);
2188 Value
*indirectBuffer
;
2189 Value
*indirectOffset
;
2190 uint32_t buffer
= getIndirect(&insn
->src
[1], 0, indirectBuffer
);
2191 uint32_t offset
= getIndirect(&insn
->src
[2], 0, indirectOffset
);
2193 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
2194 if (!((1u << i
) & nir_intrinsic_write_mask(insn
)))
2196 Symbol
*sym
= mkSymbol(FILE_MEMORY_BUFFER
, buffer
, sType
,
2197 offset
+ i
* typeSizeof(sType
));
2198 mkStore(OP_STORE
, sType
, sym
, indirectOffset
, getSrc(&insn
->src
[0], i
))
2199 ->setIndirect(0, 1, indirectBuffer
);
2201 info
->io
.globalAccess
|= 0x2;
2204 case nir_intrinsic_load_ssbo
: {
2205 const DataType dType
= getDType(insn
);
2206 LValues
&newDefs
= convert(&insn
->dest
);
2207 Value
*indirectBuffer
;
2208 Value
*indirectOffset
;
2209 uint32_t buffer
= getIndirect(&insn
->src
[0], 0, indirectBuffer
);
2210 uint32_t offset
= getIndirect(&insn
->src
[1], 0, indirectOffset
);
2212 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
)
2213 loadFrom(FILE_MEMORY_BUFFER
, buffer
, dType
, newDefs
[i
], offset
, i
,
2214 indirectOffset
, indirectBuffer
);
2216 info
->io
.globalAccess
|= 0x1;
2219 case nir_intrinsic_shared_atomic_add
:
2220 case nir_intrinsic_shared_atomic_and
:
2221 case nir_intrinsic_shared_atomic_comp_swap
:
2222 case nir_intrinsic_shared_atomic_exchange
:
2223 case nir_intrinsic_shared_atomic_or
:
2224 case nir_intrinsic_shared_atomic_imax
:
2225 case nir_intrinsic_shared_atomic_imin
:
2226 case nir_intrinsic_shared_atomic_umax
:
2227 case nir_intrinsic_shared_atomic_umin
:
2228 case nir_intrinsic_shared_atomic_xor
: {
2229 const DataType dType
= getDType(insn
);
2230 LValues
&newDefs
= convert(&insn
->dest
);
2231 Value
*indirectOffset
;
2232 uint32_t offset
= getIndirect(&insn
->src
[0], 0, indirectOffset
);
2233 Symbol
*sym
= mkSymbol(FILE_MEMORY_SHARED
, 0, dType
, offset
);
2234 Instruction
*atom
= mkOp2(OP_ATOM
, dType
, newDefs
[0], sym
, getSrc(&insn
->src
[1], 0));
2235 if (op
== nir_intrinsic_shared_atomic_comp_swap
)
2236 atom
->setSrc(2, getSrc(&insn
->src
[2], 0));
2237 atom
->setIndirect(0, 0, indirectOffset
);
2238 atom
->subOp
= getSubOp(op
);
2241 case nir_intrinsic_ssbo_atomic_add
:
2242 case nir_intrinsic_ssbo_atomic_and
:
2243 case nir_intrinsic_ssbo_atomic_comp_swap
:
2244 case nir_intrinsic_ssbo_atomic_exchange
:
2245 case nir_intrinsic_ssbo_atomic_or
:
2246 case nir_intrinsic_ssbo_atomic_imax
:
2247 case nir_intrinsic_ssbo_atomic_imin
:
2248 case nir_intrinsic_ssbo_atomic_umax
:
2249 case nir_intrinsic_ssbo_atomic_umin
:
2250 case nir_intrinsic_ssbo_atomic_xor
: {
2251 const DataType dType
= getDType(insn
);
2252 LValues
&newDefs
= convert(&insn
->dest
);
2253 Value
*indirectBuffer
;
2254 Value
*indirectOffset
;
2255 uint32_t buffer
= getIndirect(&insn
->src
[0], 0, indirectBuffer
);
2256 uint32_t offset
= getIndirect(&insn
->src
[1], 0, indirectOffset
);
2258 Symbol
*sym
= mkSymbol(FILE_MEMORY_BUFFER
, buffer
, dType
, offset
);
2259 Instruction
*atom
= mkOp2(OP_ATOM
, dType
, newDefs
[0], sym
,
2260 getSrc(&insn
->src
[2], 0));
2261 if (op
== nir_intrinsic_ssbo_atomic_comp_swap
)
2262 atom
->setSrc(2, getSrc(&insn
->src
[3], 0));
2263 atom
->setIndirect(0, 0, indirectOffset
);
2264 atom
->setIndirect(0, 1, indirectBuffer
);
2265 atom
->subOp
= getSubOp(op
);
2267 info
->io
.globalAccess
|= 0x2;
2270 case nir_intrinsic_image_deref_atomic_add
:
2271 case nir_intrinsic_image_deref_atomic_and
:
2272 case nir_intrinsic_image_deref_atomic_comp_swap
:
2273 case nir_intrinsic_image_deref_atomic_exchange
:
2274 case nir_intrinsic_image_deref_atomic_max
:
2275 case nir_intrinsic_image_deref_atomic_min
:
2276 case nir_intrinsic_image_deref_atomic_or
:
2277 case nir_intrinsic_image_deref_atomic_xor
:
2278 case nir_intrinsic_image_deref_load
:
2279 case nir_intrinsic_image_deref_samples
:
2280 case nir_intrinsic_image_deref_size
:
2281 case nir_intrinsic_image_deref_store
: {
2282 const nir_variable
*tex
;
2283 std::vector
<Value
*> srcs
, defs
;
2288 nir_deref_instr
*deref
= nir_src_as_deref(insn
->src
[0]);
2289 const glsl_type
*type
= deref
->type
;
2290 TexInstruction::Target target
=
2291 convert((glsl_sampler_dim
)type
->sampler_dimensionality
,
2292 type
->sampler_array
, type
->sampler_shadow
);
2293 unsigned int argCount
= getNIRArgCount(target
);
2294 uint16_t location
= handleDeref(deref
, indirect
, tex
);
2296 if (opInfo
.has_dest
) {
2297 LValues
&newDefs
= convert(&insn
->dest
);
2298 for (uint8_t i
= 0u; i
< newDefs
.size(); ++i
) {
2299 defs
.push_back(newDefs
[i
]);
2305 case nir_intrinsic_image_deref_atomic_add
:
2306 case nir_intrinsic_image_deref_atomic_and
:
2307 case nir_intrinsic_image_deref_atomic_comp_swap
:
2308 case nir_intrinsic_image_deref_atomic_exchange
:
2309 case nir_intrinsic_image_deref_atomic_max
:
2310 case nir_intrinsic_image_deref_atomic_min
:
2311 case nir_intrinsic_image_deref_atomic_or
:
2312 case nir_intrinsic_image_deref_atomic_xor
:
2313 ty
= getDType(insn
);
2315 info
->io
.globalAccess
|= 0x2;
2317 case nir_intrinsic_image_deref_load
:
2319 info
->io
.globalAccess
|= 0x1;
2321 case nir_intrinsic_image_deref_store
:
2324 info
->io
.globalAccess
|= 0x2;
2326 case nir_intrinsic_image_deref_samples
:
2330 case nir_intrinsic_image_deref_size
:
2334 unreachable("unhandled image opcode");
2339 if (opInfo
.num_srcs
>= 2)
2340 for (unsigned int i
= 0u; i
< argCount
; ++i
)
2341 srcs
.push_back(getSrc(&insn
->src
[1], i
));
2343 // the sampler is just another src added after coords
2344 if (opInfo
.num_srcs
>= 3 && target
.isMS())
2345 srcs
.push_back(getSrc(&insn
->src
[2], 0));
2347 if (opInfo
.num_srcs
>= 4) {
2348 unsigned components
= opInfo
.src_components
[3] ? opInfo
.src_components
[3] : insn
->num_components
;
2349 for (uint8_t i
= 0u; i
< components
; ++i
)
2350 srcs
.push_back(getSrc(&insn
->src
[3], i
));
2353 if (opInfo
.num_srcs
>= 5)
2354 // 1 for aotmic swap
2355 for (uint8_t i
= 0u; i
< opInfo
.src_components
[4]; ++i
)
2356 srcs
.push_back(getSrc(&insn
->src
[4], i
));
2358 TexInstruction
*texi
= mkTex(getOperation(op
), target
.getEnum(), location
, 0, defs
, srcs
);
2359 texi
->tex
.bindless
= false;
2360 texi
->tex
.format
= &nv50_ir::TexInstruction::formatTable
[convertGLImgFormat(tex
->data
.image
.format
)];
2361 texi
->tex
.mask
= mask
;
2362 texi
->cache
= getCacheModeFromVar(tex
);
2364 texi
->subOp
= getSubOp(op
);
2367 texi
->setIndirectR(indirect
);
2371 case nir_intrinsic_store_shared
: {
2372 DataType sType
= getSType(insn
->src
[0], false, false);
2373 Value
*indirectOffset
;
2374 uint32_t offset
= getIndirect(&insn
->src
[1], 0, indirectOffset
);
2376 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
2377 if (!((1u << i
) & nir_intrinsic_write_mask(insn
)))
2379 Symbol
*sym
= mkSymbol(FILE_MEMORY_SHARED
, 0, sType
, offset
+ i
* typeSizeof(sType
));
2380 mkStore(OP_STORE
, sType
, sym
, indirectOffset
, getSrc(&insn
->src
[0], i
));
2384 case nir_intrinsic_load_shared
: {
2385 const DataType dType
= getDType(insn
);
2386 LValues
&newDefs
= convert(&insn
->dest
);
2387 Value
*indirectOffset
;
2388 uint32_t offset
= getIndirect(&insn
->src
[0], 0, indirectOffset
);
2390 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
)
2391 loadFrom(FILE_MEMORY_SHARED
, 0, dType
, newDefs
[i
], offset
, i
, indirectOffset
);
2395 case nir_intrinsic_barrier
: {
2396 // TODO: add flag to shader_info
2397 info
->numBarriers
= 1;
2398 Instruction
*bar
= mkOp2(OP_BAR
, TYPE_U32
, NULL
, mkImm(0), mkImm(0));
2400 bar
->subOp
= NV50_IR_SUBOP_BAR_SYNC
;
2404 ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos
[op
].name
);
2412 Converter::visit(nir_jump_instr
*insn
)
2414 switch (insn
->type
) {
2415 case nir_jump_return
:
2416 // TODO: this only works in the main function
2417 mkFlow(OP_BRA
, exit
, CC_ALWAYS
, NULL
);
2418 bb
->cfg
.attach(&exit
->cfg
, Graph::Edge::CROSS
);
2420 case nir_jump_break
:
2421 case nir_jump_continue
: {
2422 bool isBreak
= insn
->type
== nir_jump_break
;
2423 nir_block
*block
= insn
->instr
.block
;
2424 assert(!block
->successors
[1]);
2425 BasicBlock
*target
= convert(block
->successors
[0]);
2426 mkFlow(isBreak
? OP_BREAK
: OP_CONT
, target
, CC_ALWAYS
, NULL
);
2427 bb
->cfg
.attach(&target
->cfg
, isBreak
? Graph::Edge::CROSS
: Graph::Edge::BACK
);
2431 ERROR("unknown nir_jump_type %u\n", insn
->type
);
2439 Converter::visit(nir_load_const_instr
*insn
)
2441 assert(insn
->def
.bit_size
<= 64);
2443 LValues
&newDefs
= convert(&insn
->def
);
2444 for (int i
= 0; i
< insn
->def
.num_components
; i
++) {
2445 switch (insn
->def
.bit_size
) {
2447 loadImm(newDefs
[i
], insn
->value
.u64
[i
]);
2450 loadImm(newDefs
[i
], insn
->value
.u32
[i
]);
2453 loadImm(newDefs
[i
], insn
->value
.u16
[i
]);
2456 loadImm(newDefs
[i
], insn
->value
.u8
[i
]);
2463 #define DEFAULT_CHECKS \
2464 if (insn->dest.dest.ssa.num_components > 1) { \
2465 ERROR("nir_alu_instr only supported with 1 component!\n"); \
2468 if (insn->dest.write_mask != 1) { \
2469 ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2473 Converter::visit(nir_alu_instr
*insn
)
2475 const nir_op op
= insn
->op
;
2476 const nir_op_info
&info
= nir_op_infos
[op
];
2477 DataType dType
= getDType(insn
);
2478 const std::vector
<DataType
> sTypes
= getSTypes(insn
);
2480 Instruction
*oldPos
= this->bb
->getExit();
2492 case nir_op_fddx_coarse
:
2493 case nir_op_fddx_fine
:
2495 case nir_op_fddy_coarse
:
2496 case nir_op_fddy_fine
:
2515 case nir_op_imul_high
:
2516 case nir_op_umul_high
:
2523 case nir_op_pack_64_2x32_split
:
2541 LValues
&newDefs
= convert(&insn
->dest
);
2542 operation preOp
= preOperationNeeded(op
);
2543 if (preOp
!= OP_NOP
) {
2544 assert(info
.num_inputs
< 2);
2545 Value
*tmp
= getSSA(typeSizeof(dType
));
2546 Instruction
*i0
= mkOp(preOp
, dType
, tmp
);
2547 Instruction
*i1
= mkOp(getOperation(op
), dType
, newDefs
[0]);
2548 if (info
.num_inputs
) {
2549 i0
->setSrc(0, getSrc(&insn
->src
[0]));
2552 i1
->subOp
= getSubOp(op
);
2554 Instruction
*i
= mkOp(getOperation(op
), dType
, newDefs
[0]);
2555 for (unsigned s
= 0u; s
< info
.num_inputs
; ++s
) {
2556 i
->setSrc(s
, getSrc(&insn
->src
[s
]));
2558 i
->subOp
= getSubOp(op
);
2562 case nir_op_ifind_msb
:
2563 case nir_op_ufind_msb
: {
2565 LValues
&newDefs
= convert(&insn
->dest
);
2567 mkOp1(getOperation(op
), dType
, newDefs
[0], getSrc(&insn
->src
[0]));
2570 case nir_op_fround_even
: {
2572 LValues
&newDefs
= convert(&insn
->dest
);
2573 mkCvt(OP_CVT
, dType
, newDefs
[0], dType
, getSrc(&insn
->src
[0]))->rnd
= ROUND_NI
;
2576 // convert instructions
2590 case nir_op_u2u64
: {
2592 LValues
&newDefs
= convert(&insn
->dest
);
2593 Instruction
*i
= mkOp1(getOperation(op
), dType
, newDefs
[0], getSrc(&insn
->src
[0]));
2594 if (op
== nir_op_f2i32
|| op
== nir_op_f2i64
|| op
== nir_op_f2u32
|| op
== nir_op_f2u64
)
2596 i
->sType
= sTypes
[0];
2599 // compare instructions
2609 case nir_op_ine32
: {
2611 LValues
&newDefs
= convert(&insn
->dest
);
2612 Instruction
*i
= mkCmp(getOperation(op
),
2617 getSrc(&insn
->src
[0]),
2618 getSrc(&insn
->src
[1]));
2619 if (info
.num_inputs
== 3)
2620 i
->setSrc(2, getSrc(&insn
->src
[2]));
2621 i
->sType
= sTypes
[0];
2624 // those are weird ALU ops and need special handling, because
2625 // 1. they are always componend based
2626 // 2. they basically just merge multiple values into one data type
2629 if (!insn
->dest
.dest
.is_ssa
&& insn
->dest
.dest
.reg
.reg
->num_array_elems
) {
2630 nir_reg_dest
& reg
= insn
->dest
.dest
.reg
;
2631 uint32_t goffset
= regToLmemOffset
[reg
.reg
->index
];
2632 uint8_t comps
= reg
.reg
->num_components
;
2633 uint8_t size
= reg
.reg
->bit_size
/ 8;
2634 uint8_t csize
= 4 * size
; // TODO after fixing MemoryOpts: comps * size;
2635 uint32_t aoffset
= csize
* reg
.base_offset
;
2636 Value
*indirect
= NULL
;
2639 indirect
= mkOp2v(OP_MUL
, TYPE_U32
, getSSA(4, FILE_ADDRESS
),
2640 getSrc(reg
.indirect
, 0), mkImm(csize
));
2642 for (uint8_t i
= 0u; i
< comps
; ++i
) {
2643 if (!((1u << i
) & insn
->dest
.write_mask
))
2646 Symbol
*sym
= mkSymbol(FILE_MEMORY_LOCAL
, 0, dType
, goffset
+ aoffset
+ i
* size
);
2647 mkStore(OP_STORE
, dType
, sym
, indirect
, getSrc(&insn
->src
[0], i
));
2650 } else if (!insn
->src
[0].src
.is_ssa
&& insn
->src
[0].src
.reg
.reg
->num_array_elems
) {
2651 LValues
&newDefs
= convert(&insn
->dest
);
2652 nir_reg_src
& reg
= insn
->src
[0].src
.reg
;
2653 uint32_t goffset
= regToLmemOffset
[reg
.reg
->index
];
2654 // uint8_t comps = reg.reg->num_components;
2655 uint8_t size
= reg
.reg
->bit_size
/ 8;
2656 uint8_t csize
= 4 * size
; // TODO after fixing MemoryOpts: comps * size;
2657 uint32_t aoffset
= csize
* reg
.base_offset
;
2658 Value
*indirect
= NULL
;
2661 indirect
= mkOp2v(OP_MUL
, TYPE_U32
, getSSA(4, FILE_ADDRESS
), getSrc(reg
.indirect
, 0), mkImm(csize
));
2663 for (uint8_t i
= 0u; i
< newDefs
.size(); ++i
)
2664 loadFrom(FILE_MEMORY_LOCAL
, 0, dType
, newDefs
[i
], goffset
+ aoffset
, i
, indirect
);
2668 LValues
&newDefs
= convert(&insn
->dest
);
2669 for (LValues::size_type c
= 0u; c
< newDefs
.size(); ++c
) {
2670 mkMov(newDefs
[c
], getSrc(&insn
->src
[0], c
), dType
);
2677 LValues
&newDefs
= convert(&insn
->dest
);
2678 for (LValues::size_type c
= 0u; c
< newDefs
.size(); ++c
) {
2679 mkMov(newDefs
[c
], getSrc(&insn
->src
[c
]), dType
);
2684 case nir_op_pack_64_2x32
: {
2685 LValues
&newDefs
= convert(&insn
->dest
);
2686 Instruction
*merge
= mkOp(OP_MERGE
, dType
, newDefs
[0]);
2687 merge
->setSrc(0, getSrc(&insn
->src
[0], 0));
2688 merge
->setSrc(1, getSrc(&insn
->src
[0], 1));
2691 case nir_op_pack_half_2x16_split
: {
2692 LValues
&newDefs
= convert(&insn
->dest
);
2693 Value
*tmpH
= getSSA();
2694 Value
*tmpL
= getSSA();
2696 mkCvt(OP_CVT
, TYPE_F16
, tmpL
, TYPE_F32
, getSrc(&insn
->src
[0]));
2697 mkCvt(OP_CVT
, TYPE_F16
, tmpH
, TYPE_F32
, getSrc(&insn
->src
[1]));
2698 mkOp3(OP_INSBF
, TYPE_U32
, newDefs
[0], tmpH
, mkImm(0x1010), tmpL
);
2701 case nir_op_unpack_half_2x16_split_x
:
2702 case nir_op_unpack_half_2x16_split_y
: {
2703 LValues
&newDefs
= convert(&insn
->dest
);
2704 Instruction
*cvt
= mkCvt(OP_CVT
, TYPE_F32
, newDefs
[0], TYPE_F16
, getSrc(&insn
->src
[0]));
2705 if (op
== nir_op_unpack_half_2x16_split_y
)
2709 case nir_op_unpack_64_2x32
: {
2710 LValues
&newDefs
= convert(&insn
->dest
);
2711 mkOp1(OP_SPLIT
, dType
, newDefs
[0], getSrc(&insn
->src
[0]))->setDef(1, newDefs
[1]);
2714 case nir_op_unpack_64_2x32_split_x
: {
2715 LValues
&newDefs
= convert(&insn
->dest
);
2716 mkOp1(OP_SPLIT
, dType
, newDefs
[0], getSrc(&insn
->src
[0]))->setDef(1, getSSA());
2719 case nir_op_unpack_64_2x32_split_y
: {
2720 LValues
&newDefs
= convert(&insn
->dest
);
2721 mkOp1(OP_SPLIT
, dType
, getSSA(), getSrc(&insn
->src
[0]))->setDef(1, newDefs
[0]);
2724 // special instructions
2726 case nir_op_isign
: {
2729 if (::isFloatType(dType
))
2734 LValues
&newDefs
= convert(&insn
->dest
);
2735 LValue
*val0
= getScratch();
2736 LValue
*val1
= getScratch();
2737 mkCmp(OP_SET
, CC_GT
, iType
, val0
, dType
, getSrc(&insn
->src
[0]), zero
);
2738 mkCmp(OP_SET
, CC_LT
, iType
, val1
, dType
, getSrc(&insn
->src
[0]), zero
);
2740 if (dType
== TYPE_F64
) {
2741 mkOp2(OP_SUB
, iType
, val0
, val0
, val1
);
2742 mkCvt(OP_CVT
, TYPE_F64
, newDefs
[0], iType
, val0
);
2743 } else if (dType
== TYPE_S64
|| dType
== TYPE_U64
) {
2744 mkOp2(OP_SUB
, iType
, val0
, val1
, val0
);
2745 mkOp2(OP_SHR
, iType
, val1
, val0
, loadImm(NULL
, 31));
2746 mkOp2(OP_MERGE
, dType
, newDefs
[0], val0
, val1
);
2747 } else if (::isFloatType(dType
))
2748 mkOp2(OP_SUB
, iType
, newDefs
[0], val0
, val1
);
2750 mkOp2(OP_SUB
, iType
, newDefs
[0], val1
, val0
);
2754 case nir_op_b32csel
: {
2756 LValues
&newDefs
= convert(&insn
->dest
);
2757 mkCmp(OP_SLCT
, CC_NE
, dType
, newDefs
[0], sTypes
[0], getSrc(&insn
->src
[1]), getSrc(&insn
->src
[2]), getSrc(&insn
->src
[0]));
2760 case nir_op_ibitfield_extract
:
2761 case nir_op_ubitfield_extract
: {
2763 Value
*tmp
= getSSA();
2764 LValues
&newDefs
= convert(&insn
->dest
);
2765 mkOp3(OP_INSBF
, dType
, tmp
, getSrc(&insn
->src
[2]), loadImm(NULL
, 0x808), getSrc(&insn
->src
[1]));
2766 mkOp2(OP_EXTBF
, dType
, newDefs
[0], getSrc(&insn
->src
[0]), tmp
);
2771 LValues
&newDefs
= convert(&insn
->dest
);
2772 mkOp3(OP_INSBF
, dType
, newDefs
[0], getSrc(&insn
->src
[0]), loadImm(NULL
, 0x808), getSrc(&insn
->src
[1]));
2775 case nir_op_bitfield_insert
: {
2777 LValues
&newDefs
= convert(&insn
->dest
);
2778 LValue
*temp
= getSSA();
2779 mkOp3(OP_INSBF
, TYPE_U32
, temp
, getSrc(&insn
->src
[3]), mkImm(0x808), getSrc(&insn
->src
[2]));
2780 mkOp3(OP_INSBF
, dType
, newDefs
[0], getSrc(&insn
->src
[1]), temp
, getSrc(&insn
->src
[0]));
2783 case nir_op_bit_count
: {
2785 LValues
&newDefs
= convert(&insn
->dest
);
2786 mkOp2(OP_POPCNT
, dType
, newDefs
[0], getSrc(&insn
->src
[0]), getSrc(&insn
->src
[0]));
2789 case nir_op_bitfield_reverse
: {
2791 LValues
&newDefs
= convert(&insn
->dest
);
2792 mkOp2(OP_EXTBF
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), mkImm(0x2000))->subOp
= NV50_IR_SUBOP_EXTBF_REV
;
2795 case nir_op_find_lsb
: {
2797 LValues
&newDefs
= convert(&insn
->dest
);
2798 Value
*tmp
= getSSA();
2799 mkOp2(OP_EXTBF
, TYPE_U32
, tmp
, getSrc(&insn
->src
[0]), mkImm(0x2000))->subOp
= NV50_IR_SUBOP_EXTBF_REV
;
2800 mkOp1(OP_BFIND
, TYPE_U32
, newDefs
[0], tmp
)->subOp
= NV50_IR_SUBOP_BFIND_SAMT
;
2803 // boolean conversions
2804 case nir_op_b2f32
: {
2806 LValues
&newDefs
= convert(&insn
->dest
);
2807 mkOp2(OP_AND
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), loadImm(NULL
, 1.0f
));
2810 case nir_op_b2f64
: {
2812 LValues
&newDefs
= convert(&insn
->dest
);
2813 Value
*tmp
= getSSA(4);
2814 mkOp2(OP_AND
, TYPE_U32
, tmp
, getSrc(&insn
->src
[0]), loadImm(NULL
, 0x3ff00000));
2815 mkOp2(OP_MERGE
, TYPE_U64
, newDefs
[0], loadImm(NULL
, 0), tmp
);
2819 case nir_op_i2b32
: {
2821 LValues
&newDefs
= convert(&insn
->dest
);
2823 if (typeSizeof(sTypes
[0]) == 8) {
2824 src1
= loadImm(getSSA(8), 0.0);
2828 CondCode cc
= op
== nir_op_f2b32
? CC_NEU
: CC_NE
;
2829 mkCmp(OP_SET
, cc
, TYPE_U32
, newDefs
[0], sTypes
[0], getSrc(&insn
->src
[0]), src1
);
2832 case nir_op_b2i32
: {
2834 LValues
&newDefs
= convert(&insn
->dest
);
2835 mkOp2(OP_AND
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), loadImm(NULL
, 1));
2838 case nir_op_b2i64
: {
2840 LValues
&newDefs
= convert(&insn
->dest
);
2841 LValue
*def
= getScratch();
2842 mkOp2(OP_AND
, TYPE_U32
, def
, getSrc(&insn
->src
[0]), loadImm(NULL
, 1));
2843 mkOp2(OP_MERGE
, TYPE_S64
, newDefs
[0], def
, loadImm(NULL
, 0));
2847 ERROR("unknown nir_op %s\n", info
.name
);
2852 oldPos
= this->bb
->getEntry();
2853 oldPos
->precise
= insn
->exact
;
2856 if (unlikely(!oldPos
))
2859 while (oldPos
->next
) {
2860 oldPos
= oldPos
->next
;
2861 oldPos
->precise
= insn
->exact
;
2863 oldPos
->saturate
= insn
->dest
.saturate
;
2867 #undef DEFAULT_CHECKS
2870 Converter::visit(nir_ssa_undef_instr
*insn
)
2872 LValues
&newDefs
= convert(&insn
->def
);
2873 for (uint8_t i
= 0u; i
< insn
->def
.num_components
; ++i
) {
2874 mkOp(OP_NOP
, TYPE_NONE
, newDefs
[i
]);
2879 #define CASE_SAMPLER(ty) \
2880 case GLSL_SAMPLER_DIM_ ## ty : \
2881 if (isArray && !isShadow) \
2882 return TEX_TARGET_ ## ty ## _ARRAY; \
2883 else if (!isArray && isShadow) \
2884 return TEX_TARGET_## ty ## _SHADOW; \
2885 else if (isArray && isShadow) \
2886 return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
2888 return TEX_TARGET_ ## ty
2891 Converter::convert(glsl_sampler_dim dim
, bool isArray
, bool isShadow
)
2897 case GLSL_SAMPLER_DIM_3D
:
2898 return TEX_TARGET_3D
;
2899 case GLSL_SAMPLER_DIM_MS
:
2901 return TEX_TARGET_2D_MS_ARRAY
;
2902 return TEX_TARGET_2D_MS
;
2903 case GLSL_SAMPLER_DIM_RECT
:
2905 return TEX_TARGET_RECT_SHADOW
;
2906 return TEX_TARGET_RECT
;
2907 case GLSL_SAMPLER_DIM_BUF
:
2908 return TEX_TARGET_BUFFER
;
2909 case GLSL_SAMPLER_DIM_EXTERNAL
:
2910 return TEX_TARGET_2D
;
2912 ERROR("unknown glsl_sampler_dim %u\n", dim
);
2914 return TEX_TARGET_COUNT
;
2920 Converter::applyProjection(Value
*src
, Value
*proj
)
2924 return mkOp2v(OP_MUL
, TYPE_F32
, getScratch(), src
, proj
);
2928 Converter::getNIRArgCount(TexInstruction::Target
& target
)
2930 unsigned int result
= target
.getArgCount();
2931 if (target
.isCube() && target
.isArray())
2939 Converter::handleDeref(nir_deref_instr
*deref
, Value
* &indirect
, const nir_variable
* &tex
)
2941 typedef std::pair
<uint32_t,Value
*> DerefPair
;
2942 std::list
<DerefPair
> derefs
;
2944 uint16_t result
= 0;
2945 while (deref
->deref_type
!= nir_deref_type_var
) {
2946 switch (deref
->deref_type
) {
2947 case nir_deref_type_array
: {
2949 uint8_t size
= type_size(deref
->type
);
2950 result
+= size
* getIndirect(&deref
->arr
.index
, 0, indirect
);
2953 derefs
.push_front(std::make_pair(size
, indirect
));
2958 case nir_deref_type_struct
: {
2959 result
+= nir_deref_instr_parent(deref
)->type
->struct_location_offset(deref
->strct
.index
);
2962 case nir_deref_type_var
:
2964 unreachable("nir_deref_type_var reached in handleDeref!");
2967 deref
= nir_deref_instr_parent(deref
);
2971 for (std::list
<DerefPair
>::const_iterator it
= derefs
.begin(); it
!= derefs
.end(); ++it
) {
2972 Value
*offset
= mkOp2v(OP_MUL
, TYPE_U32
, getSSA(), loadImm(getSSA(), it
->first
), it
->second
);
2974 indirect
= mkOp2v(OP_ADD
, TYPE_U32
, getSSA(), indirect
, offset
);
2979 tex
= nir_deref_instr_get_variable(deref
);
2982 return result
+ tex
->data
.driver_location
;
2986 Converter::getCacheModeFromVar(const nir_variable
*var
)
2988 if (var
->data
.image
.access
== ACCESS_VOLATILE
)
2990 if (var
->data
.image
.access
== ACCESS_COHERENT
)
2996 Converter::visit(nir_tex_instr
*insn
)
3000 case nir_texop_query_levels
:
3002 case nir_texop_texture_samples
:
3007 case nir_texop_txf_ms
:
3009 case nir_texop_txs
: {
3010 LValues
&newDefs
= convert(&insn
->dest
);
3011 std::vector
<Value
*> srcs
;
3012 std::vector
<Value
*> defs
;
3013 std::vector
<nir_src
*> offsets
;
3017 TexInstruction::Target target
= convert(insn
->sampler_dim
, insn
->is_array
, insn
->is_shadow
);
3018 operation op
= getOperation(insn
->op
);
3021 int biasIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_bias
);
3022 int compIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_comparator
);
3023 int coordsIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_coord
);
3024 int ddxIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_ddx
);
3025 int ddyIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_ddy
);
3026 int msIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_ms_index
);
3027 int lodIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_lod
);
3028 int offsetIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_offset
);
3029 int projIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_projector
);
3030 int sampOffIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_sampler_offset
);
3031 int texOffIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_texture_offset
);
3034 proj
= mkOp1v(OP_RCP
, TYPE_F32
, getScratch(), getSrc(&insn
->src
[projIdx
].src
, 0));
3036 srcs
.resize(insn
->coord_components
);
3037 for (uint8_t i
= 0u; i
< insn
->coord_components
; ++i
)
3038 srcs
[i
] = applyProjection(getSrc(&insn
->src
[coordsIdx
].src
, i
), proj
);
3040 // sometimes we get less args than target.getArgCount, but codegen expects the latter
3041 if (insn
->coord_components
) {
3042 uint32_t argCount
= target
.getArgCount();
3047 for (uint32_t i
= 0u; i
< (argCount
- insn
->coord_components
); ++i
)
3048 srcs
.push_back(getSSA());
3051 if (insn
->op
== nir_texop_texture_samples
)
3052 srcs
.push_back(zero
);
3053 else if (!insn
->num_srcs
)
3054 srcs
.push_back(loadImm(NULL
, 0));
3056 srcs
.push_back(getSrc(&insn
->src
[biasIdx
].src
, 0));
3058 srcs
.push_back(getSrc(&insn
->src
[lodIdx
].src
, 0));
3059 else if (op
== OP_TXF
)
3062 srcs
.push_back(getSrc(&insn
->src
[msIdx
].src
, 0));
3063 if (offsetIdx
!= -1)
3064 offsets
.push_back(&insn
->src
[offsetIdx
].src
);
3066 srcs
.push_back(applyProjection(getSrc(&insn
->src
[compIdx
].src
, 0), proj
));
3067 if (texOffIdx
!= -1) {
3068 srcs
.push_back(getSrc(&insn
->src
[texOffIdx
].src
, 0));
3069 texOffIdx
= srcs
.size() - 1;
3071 if (sampOffIdx
!= -1) {
3072 srcs
.push_back(getSrc(&insn
->src
[sampOffIdx
].src
, 0));
3073 sampOffIdx
= srcs
.size() - 1;
3076 r
= insn
->texture_index
;
3077 s
= insn
->sampler_index
;
3079 defs
.resize(newDefs
.size());
3080 for (uint8_t d
= 0u; d
< newDefs
.size(); ++d
) {
3081 defs
[d
] = newDefs
[d
];
3084 if (target
.isMS() || (op
== OP_TEX
&& prog
->getType() != Program::TYPE_FRAGMENT
))
3087 TexInstruction
*texi
= mkTex(op
, target
.getEnum(), r
, s
, defs
, srcs
);
3088 texi
->tex
.levelZero
= lz
;
3089 texi
->tex
.mask
= mask
;
3091 if (texOffIdx
!= -1)
3092 texi
->tex
.rIndirectSrc
= texOffIdx
;
3093 if (sampOffIdx
!= -1)
3094 texi
->tex
.sIndirectSrc
= sampOffIdx
;
3098 if (!target
.isShadow())
3099 texi
->tex
.gatherComp
= insn
->component
;
3102 texi
->tex
.query
= TXQ_DIMS
;
3104 case nir_texop_texture_samples
:
3105 texi
->tex
.mask
= 0x4;
3106 texi
->tex
.query
= TXQ_TYPE
;
3108 case nir_texop_query_levels
:
3109 texi
->tex
.mask
= 0x8;
3110 texi
->tex
.query
= TXQ_DIMS
;
3116 texi
->tex
.useOffsets
= offsets
.size();
3117 if (texi
->tex
.useOffsets
) {
3118 for (uint8_t s
= 0; s
< texi
->tex
.useOffsets
; ++s
) {
3119 for (uint32_t c
= 0u; c
< 3; ++c
) {
3120 uint8_t s2
= std::min(c
, target
.getDim() - 1);
3121 texi
->offset
[s
][c
].set(getSrc(offsets
[s
], s2
));
3122 texi
->offset
[s
][c
].setInsn(texi
);
3127 if (ddxIdx
!= -1 && ddyIdx
!= -1) {
3128 for (uint8_t c
= 0u; c
< target
.getDim() + target
.isCube(); ++c
) {
3129 texi
->dPdx
[c
].set(getSrc(&insn
->src
[ddxIdx
].src
, c
));
3130 texi
->dPdy
[c
].set(getSrc(&insn
->src
[ddyIdx
].src
, c
));
3137 ERROR("unknown nir_texop %u\n", insn
->op
);
3144 Converter::visit(nir_deref_instr
*deref
)
3146 // we just ignore those, because images intrinsics are the only place where
3147 // we should end up with deref sources and those have to backtrack anyway
3148 // to get the nir_variable. This code just exists to handle some special
3150 switch (deref
->deref_type
) {
3151 case nir_deref_type_array
:
3152 case nir_deref_type_struct
:
3153 case nir_deref_type_var
:
3156 ERROR("unknown nir_deref_instr %u\n", deref
->deref_type
);
3167 if (prog
->dbgFlags
& NV50_IR_DEBUG_VERBOSE
)
3168 nir_print_shader(nir
, stderr
);
3170 struct nir_lower_subgroups_options subgroup_options
= {
3171 .subgroup_size
= 32,
3172 .ballot_bit_size
= 32,
3175 NIR_PASS_V(nir
, nir_lower_io
, nir_var_all
, type_size
, (nir_lower_io_options
)0);
3176 NIR_PASS_V(nir
, nir_lower_subgroups
, &subgroup_options
);
3177 NIR_PASS_V(nir
, nir_lower_regs_to_ssa
);
3178 NIR_PASS_V(nir
, nir_lower_load_const_to_scalar
);
3179 NIR_PASS_V(nir
, nir_lower_vars_to_ssa
);
3180 NIR_PASS_V(nir
, nir_lower_alu_to_scalar
);
3181 NIR_PASS_V(nir
, nir_lower_phis_to_scalar
);
3185 NIR_PASS(progress
, nir
, nir_copy_prop
);
3186 NIR_PASS(progress
, nir
, nir_opt_remove_phis
);
3187 NIR_PASS(progress
, nir
, nir_opt_trivial_continues
);
3188 NIR_PASS(progress
, nir
, nir_opt_cse
);
3189 NIR_PASS(progress
, nir
, nir_opt_algebraic
);
3190 NIR_PASS(progress
, nir
, nir_opt_constant_folding
);
3191 NIR_PASS(progress
, nir
, nir_copy_prop
);
3192 NIR_PASS(progress
, nir
, nir_opt_dce
);
3193 NIR_PASS(progress
, nir
, nir_opt_dead_cf
);
3196 NIR_PASS_V(nir
, nir_lower_bool_to_int32
);
3197 NIR_PASS_V(nir
, nir_lower_locals_to_regs
);
3198 NIR_PASS_V(nir
, nir_remove_dead_variables
, nir_var_function_temp
);
3199 NIR_PASS_V(nir
, nir_convert_from_ssa
, true);
3201 // Garbage collect dead instructions
3205 ERROR("Couldn't prase NIR!\n");
3209 if (!assignSlots()) {
3210 ERROR("Couldn't assign slots!\n");
3214 if (prog
->dbgFlags
& NV50_IR_DEBUG_BASIC
)
3215 nir_print_shader(nir
, stderr
);
3217 nir_foreach_function(function
, nir
) {
3218 if (!visit(function
))
3225 } // unnamed namespace
3230 Program::makeFromNIR(struct nv50_ir_prog_info
*info
)
3232 nir_shader
*nir
= (nir_shader
*)info
->bin
.source
;
3233 Converter
converter(this, nir
, info
);
3234 bool result
= converter
.run();
3237 LoweringHelper lowering
;
3239 tlsSize
= info
->bin
.tlsSpace
;
3243 } // namespace nv50_ir