2 * Copyright 2017 Red Hat Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Karol Herbst <kherbst@redhat.com>
25 #include "compiler/nir/nir.h"
27 #include "util/u_debug.h"
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_from_common.h"
31 #include "codegen/nv50_ir_lowering_helper.h"
32 #include "codegen/nv50_ir_util.h"
34 #if __cplusplus >= 201103L
35 #include <unordered_map>
37 #include <tr1/unordered_map>
43 #if __cplusplus >= 201103L
45 using std::unordered_map
;
48 using std::tr1::unordered_map
;
51 using namespace nv50_ir
;
54 type_size(const struct glsl_type
*type
)
56 return glsl_count_attribute_slots(type
, false);
59 class Converter
: public ConverterCommon
62 Converter(Program
*, nir_shader
*, nv50_ir_prog_info
*);
66 typedef std::vector
<LValue
*> LValues
;
67 typedef unordered_map
<unsigned, LValues
> NirDefMap
;
68 typedef unordered_map
<unsigned, uint32_t> NirArrayLMemOffsets
;
69 typedef unordered_map
<unsigned, BasicBlock
*> NirBlockMap
;
71 TexTarget
convert(glsl_sampler_dim
, bool isArray
, bool isShadow
);
72 LValues
& convert(nir_alu_dest
*);
73 BasicBlock
* convert(nir_block
*);
74 LValues
& convert(nir_dest
*);
75 SVSemantic
convert(nir_intrinsic_op
);
76 LValues
& convert(nir_register
*);
77 LValues
& convert(nir_ssa_def
*);
79 Value
* getSrc(nir_alu_src
*, uint8_t component
= 0);
80 Value
* getSrc(nir_register
*, uint8_t);
81 Value
* getSrc(nir_src
*, uint8_t, bool indirect
= false);
82 Value
* getSrc(nir_ssa_def
*, uint8_t);
84 // returned value is the constant part of the given source (either the
85 // nir_src or the selected source component of an intrinsic). Even though
86 // this is mostly an optimization to be able to skip indirects in a few
87 // cases, sometimes we require immediate values or set some fileds on
88 // instructions (e.g. tex) in order for codegen to consume those.
89 // If the found value has not a constant part, the Value gets returned
90 // through the Value parameter.
91 uint32_t getIndirect(nir_src
*, uint8_t, Value
*&);
92 uint32_t getIndirect(nir_intrinsic_instr
*, uint8_t s
, uint8_t c
, Value
*&);
94 uint32_t getSlotAddress(nir_intrinsic_instr
*, uint8_t idx
, uint8_t slot
);
96 void setInterpolate(nv50_ir_varying
*,
101 Instruction
*loadFrom(DataFile
, uint8_t, DataType
, Value
*def
, uint32_t base
,
102 uint8_t c
, Value
*indirect0
= NULL
,
103 Value
*indirect1
= NULL
, bool patch
= false);
104 void storeTo(nir_intrinsic_instr
*, DataFile
, operation
, DataType
,
105 Value
*src
, uint8_t idx
, uint8_t c
, Value
*indirect0
= NULL
,
106 Value
*indirect1
= NULL
);
108 bool isFloatType(nir_alu_type
);
109 bool isSignedType(nir_alu_type
);
110 bool isResultFloat(nir_op
);
111 bool isResultSigned(nir_op
);
113 DataType
getDType(nir_alu_instr
*);
114 DataType
getDType(nir_intrinsic_instr
*);
115 DataType
getDType(nir_op
, uint8_t);
117 std::vector
<DataType
> getSTypes(nir_alu_instr
*);
118 DataType
getSType(nir_src
&, bool isFloat
, bool isSigned
);
120 operation
getOperation(nir_intrinsic_op
);
121 operation
getOperation(nir_op
);
122 operation
getOperation(nir_texop
);
123 operation
preOperationNeeded(nir_op
);
125 int getSubOp(nir_intrinsic_op
);
126 int getSubOp(nir_op
);
128 CondCode
getCondCode(nir_op
);
133 bool visit(nir_alu_instr
*);
134 bool visit(nir_block
*);
135 bool visit(nir_cf_node
*);
136 bool visit(nir_function
*);
137 bool visit(nir_if
*);
138 bool visit(nir_instr
*);
139 bool visit(nir_intrinsic_instr
*);
140 bool visit(nir_jump_instr
*);
141 bool visit(nir_load_const_instr
*);
142 bool visit(nir_loop
*);
143 bool visit(nir_ssa_undef_instr
*);
144 bool visit(nir_tex_instr
*);
147 Value
* applyProjection(Value
*src
, Value
*proj
);
153 NirArrayLMemOffsets regToLmemOffset
;
155 unsigned int curLoopDepth
;
160 int clipVertexOutput
;
169 Converter::Converter(Program
*prog
, nir_shader
*nir
, nv50_ir_prog_info
*info
)
170 : ConverterCommon(prog
, info
),
175 zero
= mkImm((uint32_t)0);
179 Converter::convert(nir_block
*block
)
181 NirBlockMap::iterator it
= blocks
.find(block
->index
);
182 if (it
!= blocks
.end())
185 BasicBlock
*bb
= new BasicBlock(func
);
186 blocks
[block
->index
] = bb
;
191 Converter::isFloatType(nir_alu_type type
)
193 return nir_alu_type_get_base_type(type
) == nir_type_float
;
197 Converter::isSignedType(nir_alu_type type
)
199 return nir_alu_type_get_base_type(type
) == nir_type_int
;
203 Converter::isResultFloat(nir_op op
)
205 const nir_op_info
&info
= nir_op_infos
[op
];
206 if (info
.output_type
!= nir_type_invalid
)
207 return isFloatType(info
.output_type
);
209 ERROR("isResultFloat not implemented for %s\n", nir_op_infos
[op
].name
);
215 Converter::isResultSigned(nir_op op
)
218 // there is no umul and we get wrong results if we treat all muls as signed
223 const nir_op_info
&info
= nir_op_infos
[op
];
224 if (info
.output_type
!= nir_type_invalid
)
225 return isSignedType(info
.output_type
);
226 ERROR("isResultSigned not implemented for %s\n", nir_op_infos
[op
].name
);
233 Converter::getDType(nir_alu_instr
*insn
)
235 if (insn
->dest
.dest
.is_ssa
)
236 return getDType(insn
->op
, insn
->dest
.dest
.ssa
.bit_size
);
238 return getDType(insn
->op
, insn
->dest
.dest
.reg
.reg
->bit_size
);
242 Converter::getDType(nir_intrinsic_instr
*insn
)
244 if (insn
->dest
.is_ssa
)
245 return typeOfSize(insn
->dest
.ssa
.bit_size
/ 8, false, false);
247 return typeOfSize(insn
->dest
.reg
.reg
->bit_size
/ 8, false, false);
251 Converter::getDType(nir_op op
, uint8_t bitSize
)
253 DataType ty
= typeOfSize(bitSize
/ 8, isResultFloat(op
), isResultSigned(op
));
254 if (ty
== TYPE_NONE
) {
255 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos
[op
].name
, bitSize
);
261 std::vector
<DataType
>
262 Converter::getSTypes(nir_alu_instr
*insn
)
264 const nir_op_info
&info
= nir_op_infos
[insn
->op
];
265 std::vector
<DataType
> res(info
.num_inputs
);
267 for (uint8_t i
= 0; i
< info
.num_inputs
; ++i
) {
268 if (info
.input_types
[i
] != nir_type_invalid
) {
269 res
[i
] = getSType(insn
->src
[i
].src
, isFloatType(info
.input_types
[i
]), isSignedType(info
.input_types
[i
]));
271 ERROR("getSType not implemented for %s idx %u\n", info
.name
, i
);
282 Converter::getSType(nir_src
&src
, bool isFloat
, bool isSigned
)
286 bitSize
= src
.ssa
->bit_size
;
288 bitSize
= src
.reg
.reg
->bit_size
;
290 DataType ty
= typeOfSize(bitSize
/ 8, isFloat
, isSigned
);
291 if (ty
== TYPE_NONE
) {
299 ERROR("couldn't get Type for %s with bitSize %u\n", str
, bitSize
);
306 Converter::getOperation(nir_op op
)
309 // basic ops with float and int variants
319 case nir_op_ifind_msb
:
320 case nir_op_ufind_msb
:
342 case nir_op_fddx_coarse
:
343 case nir_op_fddx_fine
:
346 case nir_op_fddy_coarse
:
347 case nir_op_fddy_fine
:
365 case nir_op_pack_64_2x32_split
:
379 case nir_op_imul_high
:
380 case nir_op_umul_high
:
428 ERROR("couldn't get operation for op %s\n", nir_op_infos
[op
].name
);
435 Converter::getOperation(nir_texop op
)
447 case nir_texop_txf_ms
:
453 case nir_texop_query_levels
:
454 case nir_texop_texture_samples
:
458 ERROR("couldn't get operation for nir_texop %u\n", op
);
465 Converter::getOperation(nir_intrinsic_op op
)
468 case nir_intrinsic_emit_vertex
:
470 case nir_intrinsic_end_primitive
:
473 ERROR("couldn't get operation for nir_intrinsic_op %u\n", op
);
480 Converter::preOperationNeeded(nir_op op
)
492 Converter::getSubOp(nir_op op
)
495 case nir_op_imul_high
:
496 case nir_op_umul_high
:
497 return NV50_IR_SUBOP_MUL_HIGH
;
504 Converter::getSubOp(nir_intrinsic_op op
)
507 case nir_intrinsic_ssbo_atomic_add
:
508 return NV50_IR_SUBOP_ATOM_ADD
;
509 case nir_intrinsic_ssbo_atomic_and
:
510 return NV50_IR_SUBOP_ATOM_AND
;
511 case nir_intrinsic_ssbo_atomic_comp_swap
:
512 return NV50_IR_SUBOP_ATOM_CAS
;
513 case nir_intrinsic_ssbo_atomic_exchange
:
514 return NV50_IR_SUBOP_ATOM_EXCH
;
515 case nir_intrinsic_ssbo_atomic_or
:
516 return NV50_IR_SUBOP_ATOM_OR
;
517 case nir_intrinsic_ssbo_atomic_imax
:
518 case nir_intrinsic_ssbo_atomic_umax
:
519 return NV50_IR_SUBOP_ATOM_MAX
;
520 case nir_intrinsic_ssbo_atomic_imin
:
521 case nir_intrinsic_ssbo_atomic_umin
:
522 return NV50_IR_SUBOP_ATOM_MIN
;
523 case nir_intrinsic_ssbo_atomic_xor
:
524 return NV50_IR_SUBOP_ATOM_XOR
;
525 case nir_intrinsic_vote_all
:
526 return NV50_IR_SUBOP_VOTE_ALL
;
527 case nir_intrinsic_vote_any
:
528 return NV50_IR_SUBOP_VOTE_ANY
;
529 case nir_intrinsic_vote_ieq
:
530 return NV50_IR_SUBOP_VOTE_UNI
;
537 Converter::getCondCode(nir_op op
)
556 ERROR("couldn't get CondCode for op %s\n", nir_op_infos
[op
].name
);
563 Converter::convert(nir_alu_dest
*dest
)
565 return convert(&dest
->dest
);
569 Converter::convert(nir_dest
*dest
)
572 return convert(&dest
->ssa
);
573 if (dest
->reg
.indirect
) {
574 ERROR("no support for indirects.");
577 return convert(dest
->reg
.reg
);
581 Converter::convert(nir_register
*reg
)
583 NirDefMap::iterator it
= regDefs
.find(reg
->index
);
584 if (it
!= regDefs
.end())
587 LValues
newDef(reg
->num_components
);
588 for (uint8_t i
= 0; i
< reg
->num_components
; i
++)
589 newDef
[i
] = getScratch(std::max(4, reg
->bit_size
/ 8));
590 return regDefs
[reg
->index
] = newDef
;
594 Converter::convert(nir_ssa_def
*def
)
596 NirDefMap::iterator it
= ssaDefs
.find(def
->index
);
597 if (it
!= ssaDefs
.end())
600 LValues
newDef(def
->num_components
);
601 for (uint8_t i
= 0; i
< def
->num_components
; i
++)
602 newDef
[i
] = getSSA(std::max(4, def
->bit_size
/ 8));
603 return ssaDefs
[def
->index
] = newDef
;
607 Converter::getSrc(nir_alu_src
*src
, uint8_t component
)
609 if (src
->abs
|| src
->negate
) {
610 ERROR("modifiers currently not supported on nir_alu_src\n");
613 return getSrc(&src
->src
, src
->swizzle
[component
]);
617 Converter::getSrc(nir_register
*reg
, uint8_t idx
)
619 NirDefMap::iterator it
= regDefs
.find(reg
->index
);
620 if (it
== regDefs
.end())
621 return convert(reg
)[idx
];
622 return it
->second
[idx
];
626 Converter::getSrc(nir_src
*src
, uint8_t idx
, bool indirect
)
629 return getSrc(src
->ssa
, idx
);
631 if (src
->reg
.indirect
) {
633 return getSrc(src
->reg
.indirect
, idx
);
634 ERROR("no support for indirects.");
639 return getSrc(src
->reg
.reg
, idx
);
643 Converter::getSrc(nir_ssa_def
*src
, uint8_t idx
)
645 NirDefMap::iterator it
= ssaDefs
.find(src
->index
);
646 if (it
== ssaDefs
.end()) {
647 ERROR("SSA value %u not found\n", src
->index
);
651 return it
->second
[idx
];
655 Converter::getIndirect(nir_src
*src
, uint8_t idx
, Value
*&indirect
)
657 nir_const_value
*offset
= nir_src_as_const_value(*src
);
661 return offset
->u32
[0];
664 indirect
= getSrc(src
, idx
, true);
669 Converter::getIndirect(nir_intrinsic_instr
*insn
, uint8_t s
, uint8_t c
, Value
*&indirect
)
671 int32_t idx
= nir_intrinsic_base(insn
) + getIndirect(&insn
->src
[s
], c
, indirect
);
673 indirect
= mkOp2v(OP_SHL
, TYPE_U32
, getSSA(4, FILE_ADDRESS
), indirect
, loadImm(NULL
, 4));
678 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot
, unsigned *name
, unsigned *index
)
680 assert(name
&& index
);
682 if (slot
>= VERT_ATTRIB_MAX
) {
683 ERROR("invalid varying slot %u\n", slot
);
688 if (slot
>= VERT_ATTRIB_GENERIC0
&&
689 slot
< VERT_ATTRIB_GENERIC0
+ VERT_ATTRIB_GENERIC_MAX
) {
690 *name
= TGSI_SEMANTIC_GENERIC
;
691 *index
= slot
- VERT_ATTRIB_GENERIC0
;
695 if (slot
>= VERT_ATTRIB_TEX0
&&
696 slot
< VERT_ATTRIB_TEX0
+ VERT_ATTRIB_TEX_MAX
) {
697 *name
= TGSI_SEMANTIC_TEXCOORD
;
698 *index
= slot
- VERT_ATTRIB_TEX0
;
703 case VERT_ATTRIB_COLOR0
:
704 *name
= TGSI_SEMANTIC_COLOR
;
707 case VERT_ATTRIB_COLOR1
:
708 *name
= TGSI_SEMANTIC_COLOR
;
711 case VERT_ATTRIB_EDGEFLAG
:
712 *name
= TGSI_SEMANTIC_EDGEFLAG
;
715 case VERT_ATTRIB_FOG
:
716 *name
= TGSI_SEMANTIC_FOG
;
719 case VERT_ATTRIB_NORMAL
:
720 *name
= TGSI_SEMANTIC_NORMAL
;
723 case VERT_ATTRIB_POS
:
724 *name
= TGSI_SEMANTIC_POSITION
;
727 case VERT_ATTRIB_POINT_SIZE
:
728 *name
= TGSI_SEMANTIC_PSIZE
;
732 ERROR("unknown vert attrib slot %u\n", slot
);
739 varying_slot_to_tgsi_semantic(gl_varying_slot slot
, unsigned *name
, unsigned *index
)
741 assert(name
&& index
);
743 if (slot
>= VARYING_SLOT_TESS_MAX
) {
744 ERROR("invalid varying slot %u\n", slot
);
749 if (slot
>= VARYING_SLOT_PATCH0
) {
750 *name
= TGSI_SEMANTIC_PATCH
;
751 *index
= slot
- VARYING_SLOT_PATCH0
;
755 if (slot
>= VARYING_SLOT_VAR0
) {
756 *name
= TGSI_SEMANTIC_GENERIC
;
757 *index
= slot
- VARYING_SLOT_VAR0
;
761 if (slot
>= VARYING_SLOT_TEX0
&& slot
<= VARYING_SLOT_TEX7
) {
762 *name
= TGSI_SEMANTIC_TEXCOORD
;
763 *index
= slot
- VARYING_SLOT_TEX0
;
768 case VARYING_SLOT_BFC0
:
769 *name
= TGSI_SEMANTIC_BCOLOR
;
772 case VARYING_SLOT_BFC1
:
773 *name
= TGSI_SEMANTIC_BCOLOR
;
776 case VARYING_SLOT_CLIP_DIST0
:
777 *name
= TGSI_SEMANTIC_CLIPDIST
;
780 case VARYING_SLOT_CLIP_DIST1
:
781 *name
= TGSI_SEMANTIC_CLIPDIST
;
784 case VARYING_SLOT_CLIP_VERTEX
:
785 *name
= TGSI_SEMANTIC_CLIPVERTEX
;
788 case VARYING_SLOT_COL0
:
789 *name
= TGSI_SEMANTIC_COLOR
;
792 case VARYING_SLOT_COL1
:
793 *name
= TGSI_SEMANTIC_COLOR
;
796 case VARYING_SLOT_EDGE
:
797 *name
= TGSI_SEMANTIC_EDGEFLAG
;
800 case VARYING_SLOT_FACE
:
801 *name
= TGSI_SEMANTIC_FACE
;
804 case VARYING_SLOT_FOGC
:
805 *name
= TGSI_SEMANTIC_FOG
;
808 case VARYING_SLOT_LAYER
:
809 *name
= TGSI_SEMANTIC_LAYER
;
812 case VARYING_SLOT_PNTC
:
813 *name
= TGSI_SEMANTIC_PCOORD
;
816 case VARYING_SLOT_POS
:
817 *name
= TGSI_SEMANTIC_POSITION
;
820 case VARYING_SLOT_PRIMITIVE_ID
:
821 *name
= TGSI_SEMANTIC_PRIMID
;
824 case VARYING_SLOT_PSIZ
:
825 *name
= TGSI_SEMANTIC_PSIZE
;
828 case VARYING_SLOT_TESS_LEVEL_INNER
:
829 *name
= TGSI_SEMANTIC_TESSINNER
;
832 case VARYING_SLOT_TESS_LEVEL_OUTER
:
833 *name
= TGSI_SEMANTIC_TESSOUTER
;
836 case VARYING_SLOT_VIEWPORT
:
837 *name
= TGSI_SEMANTIC_VIEWPORT_INDEX
;
841 ERROR("unknown varying slot %u\n", slot
);
848 frag_result_to_tgsi_semantic(unsigned slot
, unsigned *name
, unsigned *index
)
850 if (slot
>= FRAG_RESULT_DATA0
) {
851 *name
= TGSI_SEMANTIC_COLOR
;
852 *index
= slot
- FRAG_RESULT_COLOR
- 2; // intentional
857 case FRAG_RESULT_COLOR
:
858 *name
= TGSI_SEMANTIC_COLOR
;
861 case FRAG_RESULT_DEPTH
:
862 *name
= TGSI_SEMANTIC_POSITION
;
865 case FRAG_RESULT_SAMPLE_MASK
:
866 *name
= TGSI_SEMANTIC_SAMPLEMASK
;
870 ERROR("unknown frag result slot %u\n", slot
);
876 // copy of _mesa_sysval_to_semantic
878 system_val_to_tgsi_semantic(unsigned val
, unsigned *name
, unsigned *index
)
883 case SYSTEM_VALUE_VERTEX_ID
:
884 *name
= TGSI_SEMANTIC_VERTEXID
;
886 case SYSTEM_VALUE_INSTANCE_ID
:
887 *name
= TGSI_SEMANTIC_INSTANCEID
;
889 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE
:
890 *name
= TGSI_SEMANTIC_VERTEXID_NOBASE
;
892 case SYSTEM_VALUE_BASE_VERTEX
:
893 *name
= TGSI_SEMANTIC_BASEVERTEX
;
895 case SYSTEM_VALUE_BASE_INSTANCE
:
896 *name
= TGSI_SEMANTIC_BASEINSTANCE
;
898 case SYSTEM_VALUE_DRAW_ID
:
899 *name
= TGSI_SEMANTIC_DRAWID
;
903 case SYSTEM_VALUE_INVOCATION_ID
:
904 *name
= TGSI_SEMANTIC_INVOCATIONID
;
908 case SYSTEM_VALUE_FRAG_COORD
:
909 *name
= TGSI_SEMANTIC_POSITION
;
911 case SYSTEM_VALUE_FRONT_FACE
:
912 *name
= TGSI_SEMANTIC_FACE
;
914 case SYSTEM_VALUE_SAMPLE_ID
:
915 *name
= TGSI_SEMANTIC_SAMPLEID
;
917 case SYSTEM_VALUE_SAMPLE_POS
:
918 *name
= TGSI_SEMANTIC_SAMPLEPOS
;
920 case SYSTEM_VALUE_SAMPLE_MASK_IN
:
921 *name
= TGSI_SEMANTIC_SAMPLEMASK
;
923 case SYSTEM_VALUE_HELPER_INVOCATION
:
924 *name
= TGSI_SEMANTIC_HELPER_INVOCATION
;
927 // Tessellation shader
928 case SYSTEM_VALUE_TESS_COORD
:
929 *name
= TGSI_SEMANTIC_TESSCOORD
;
931 case SYSTEM_VALUE_VERTICES_IN
:
932 *name
= TGSI_SEMANTIC_VERTICESIN
;
934 case SYSTEM_VALUE_PRIMITIVE_ID
:
935 *name
= TGSI_SEMANTIC_PRIMID
;
937 case SYSTEM_VALUE_TESS_LEVEL_OUTER
:
938 *name
= TGSI_SEMANTIC_TESSOUTER
;
940 case SYSTEM_VALUE_TESS_LEVEL_INNER
:
941 *name
= TGSI_SEMANTIC_TESSINNER
;
945 case SYSTEM_VALUE_LOCAL_INVOCATION_ID
:
946 *name
= TGSI_SEMANTIC_THREAD_ID
;
948 case SYSTEM_VALUE_WORK_GROUP_ID
:
949 *name
= TGSI_SEMANTIC_BLOCK_ID
;
951 case SYSTEM_VALUE_NUM_WORK_GROUPS
:
952 *name
= TGSI_SEMANTIC_GRID_SIZE
;
954 case SYSTEM_VALUE_LOCAL_GROUP_SIZE
:
955 *name
= TGSI_SEMANTIC_BLOCK_SIZE
;
959 case SYSTEM_VALUE_SUBGROUP_SIZE
:
960 *name
= TGSI_SEMANTIC_SUBGROUP_SIZE
;
962 case SYSTEM_VALUE_SUBGROUP_INVOCATION
:
963 *name
= TGSI_SEMANTIC_SUBGROUP_INVOCATION
;
965 case SYSTEM_VALUE_SUBGROUP_EQ_MASK
:
966 *name
= TGSI_SEMANTIC_SUBGROUP_EQ_MASK
;
968 case SYSTEM_VALUE_SUBGROUP_GE_MASK
:
969 *name
= TGSI_SEMANTIC_SUBGROUP_GE_MASK
;
971 case SYSTEM_VALUE_SUBGROUP_GT_MASK
:
972 *name
= TGSI_SEMANTIC_SUBGROUP_GT_MASK
;
974 case SYSTEM_VALUE_SUBGROUP_LE_MASK
:
975 *name
= TGSI_SEMANTIC_SUBGROUP_LE_MASK
;
977 case SYSTEM_VALUE_SUBGROUP_LT_MASK
:
978 *name
= TGSI_SEMANTIC_SUBGROUP_LT_MASK
;
982 ERROR("unknown system value %u\n", val
);
989 Converter::setInterpolate(nv50_ir_varying
*var
,
995 case INTERP_MODE_FLAT
:
998 case INTERP_MODE_NONE
:
999 if (semantic
== TGSI_SEMANTIC_COLOR
)
1001 else if (semantic
== TGSI_SEMANTIC_POSITION
)
1004 case INTERP_MODE_NOPERSPECTIVE
:
1007 case INTERP_MODE_SMOOTH
:
1010 var
->centroid
= centroid
;
1014 calcSlots(const glsl_type
*type
, Program::Type stage
, const shader_info
&info
,
1015 bool input
, const nir_variable
*var
)
1017 if (!type
->is_array())
1018 return type
->count_attribute_slots(false);
1022 case Program::TYPE_GEOMETRY
:
1023 slots
= type
->uniform_locations();
1025 slots
/= info
.gs
.vertices_in
;
1027 case Program::TYPE_TESSELLATION_CONTROL
:
1028 case Program::TYPE_TESSELLATION_EVAL
:
1029 // remove first dimension
1030 if (var
->data
.patch
|| (!input
&& stage
== Program::TYPE_TESSELLATION_EVAL
))
1031 slots
= type
->uniform_locations();
1033 slots
= type
->fields
.array
->uniform_locations();
1036 slots
= type
->count_attribute_slots(false);
1043 bool Converter::assignSlots() {
1047 info
->io
.viewportId
= -1;
1048 info
->numInputs
= 0;
1050 // we have to fixup the uniform locations for arrays
1051 unsigned numImages
= 0;
1052 nir_foreach_variable(var
, &nir
->uniforms
) {
1053 const glsl_type
*type
= var
->type
;
1054 if (!type
->without_array()->is_image())
1056 var
->data
.driver_location
= numImages
;
1057 numImages
+= type
->is_array() ? type
->arrays_of_arrays_size() : 1;
1060 nir_foreach_variable(var
, &nir
->inputs
) {
1061 const glsl_type
*type
= var
->type
;
1062 int slot
= var
->data
.location
;
1063 uint16_t slots
= calcSlots(type
, prog
->getType(), nir
->info
, true, var
);
1064 uint32_t comp
= type
->is_array() ? type
->without_array()->component_slots()
1065 : type
->component_slots();
1066 uint32_t frac
= var
->data
.location_frac
;
1067 uint32_t vary
= var
->data
.driver_location
;
1069 if (glsl_base_type_is_64bit(type
->without_array()->base_type
)) {
1074 assert(vary
+ slots
<= PIPE_MAX_SHADER_INPUTS
);
1076 switch(prog
->getType()) {
1077 case Program::TYPE_FRAGMENT
:
1078 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
1079 for (uint16_t i
= 0; i
< slots
; ++i
) {
1080 setInterpolate(&info
->in
[vary
+ i
], var
->data
.interpolation
,
1081 var
->data
.centroid
| var
->data
.sample
, name
);
1084 case Program::TYPE_GEOMETRY
:
1085 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
1087 case Program::TYPE_TESSELLATION_CONTROL
:
1088 case Program::TYPE_TESSELLATION_EVAL
:
1089 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
1090 if (var
->data
.patch
&& name
== TGSI_SEMANTIC_PATCH
)
1091 info
->numPatchConstants
= MAX2(info
->numPatchConstants
, index
+ slots
);
1093 case Program::TYPE_VERTEX
:
1094 vert_attrib_to_tgsi_semantic((gl_vert_attrib
)slot
, &name
, &index
);
1096 case TGSI_SEMANTIC_EDGEFLAG
:
1097 info
->io
.edgeFlagIn
= vary
;
1104 ERROR("unknown shader type %u in assignSlots\n", prog
->getType());
1108 for (uint16_t i
= 0u; i
< slots
; ++i
, ++vary
) {
1109 info
->in
[vary
].id
= vary
;
1110 info
->in
[vary
].patch
= var
->data
.patch
;
1111 info
->in
[vary
].sn
= name
;
1112 info
->in
[vary
].si
= index
+ i
;
1113 if (glsl_base_type_is_64bit(type
->without_array()->base_type
))
1115 info
->in
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) >> 0x4);
1117 info
->in
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) & 0xf);
1119 info
->in
[vary
].mask
|= ((1 << comp
) - 1) << frac
;
1121 info
->numInputs
= std::max
<uint8_t>(info
->numInputs
, vary
);
1124 info
->numOutputs
= 0;
1125 nir_foreach_variable(var
, &nir
->outputs
) {
1126 const glsl_type
*type
= var
->type
;
1127 int slot
= var
->data
.location
;
1128 uint16_t slots
= calcSlots(type
, prog
->getType(), nir
->info
, false, var
);
1129 uint32_t comp
= type
->is_array() ? type
->without_array()->component_slots()
1130 : type
->component_slots();
1131 uint32_t frac
= var
->data
.location_frac
;
1132 uint32_t vary
= var
->data
.driver_location
;
1134 if (glsl_base_type_is_64bit(type
->without_array()->base_type
)) {
1139 assert(vary
< PIPE_MAX_SHADER_OUTPUTS
);
1141 switch(prog
->getType()) {
1142 case Program::TYPE_FRAGMENT
:
1143 frag_result_to_tgsi_semantic((gl_frag_result
)slot
, &name
, &index
);
1145 case TGSI_SEMANTIC_COLOR
:
1146 if (!var
->data
.fb_fetch_output
)
1147 info
->prop
.fp
.numColourResults
++;
1148 info
->prop
.fp
.separateFragData
= true;
1149 // sometimes we get FRAG_RESULT_DATAX with data.index 0
1150 // sometimes we get FRAG_RESULT_DATA0 with data.index X
1151 index
= index
== 0 ? var
->data
.index
: index
;
1153 case TGSI_SEMANTIC_POSITION
:
1154 info
->io
.fragDepth
= vary
;
1155 info
->prop
.fp
.writesDepth
= true;
1157 case TGSI_SEMANTIC_SAMPLEMASK
:
1158 info
->io
.sampleMask
= vary
;
1164 case Program::TYPE_GEOMETRY
:
1165 case Program::TYPE_TESSELLATION_CONTROL
:
1166 case Program::TYPE_TESSELLATION_EVAL
:
1167 case Program::TYPE_VERTEX
:
1168 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
1170 if (var
->data
.patch
&& name
!= TGSI_SEMANTIC_TESSINNER
&&
1171 name
!= TGSI_SEMANTIC_TESSOUTER
)
1172 info
->numPatchConstants
= MAX2(info
->numPatchConstants
, index
+ slots
);
1175 case TGSI_SEMANTIC_CLIPDIST
:
1176 info
->io
.genUserClip
= -1;
1178 case TGSI_SEMANTIC_CLIPVERTEX
:
1179 clipVertexOutput
= vary
;
1181 case TGSI_SEMANTIC_EDGEFLAG
:
1182 info
->io
.edgeFlagOut
= vary
;
1184 case TGSI_SEMANTIC_POSITION
:
1185 if (clipVertexOutput
< 0)
1186 clipVertexOutput
= vary
;
1193 ERROR("unknown shader type %u in assignSlots\n", prog
->getType());
1197 for (uint16_t i
= 0u; i
< slots
; ++i
, ++vary
) {
1198 info
->out
[vary
].id
= vary
;
1199 info
->out
[vary
].patch
= var
->data
.patch
;
1200 info
->out
[vary
].sn
= name
;
1201 info
->out
[vary
].si
= index
+ i
;
1202 if (glsl_base_type_is_64bit(type
->without_array()->base_type
))
1204 info
->out
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) >> 0x4);
1206 info
->out
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) & 0xf);
1208 info
->out
[vary
].mask
|= ((1 << comp
) - 1) << frac
;
1210 if (nir
->info
.outputs_read
& 1ll << slot
)
1211 info
->out
[vary
].oread
= 1;
1213 info
->numOutputs
= std::max
<uint8_t>(info
->numOutputs
, vary
);
1216 info
->numSysVals
= 0;
1217 for (uint8_t i
= 0; i
< 64; ++i
) {
1218 if (!(nir
->info
.system_values_read
& 1ll << i
))
1221 system_val_to_tgsi_semantic(i
, &name
, &index
);
1222 info
->sv
[info
->numSysVals
].sn
= name
;
1223 info
->sv
[info
->numSysVals
].si
= index
;
1224 info
->sv
[info
->numSysVals
].input
= 0; // TODO inferSysValDirection(sn);
1227 case SYSTEM_VALUE_INSTANCE_ID
:
1228 info
->io
.instanceId
= info
->numSysVals
;
1230 case SYSTEM_VALUE_TESS_LEVEL_INNER
:
1231 case SYSTEM_VALUE_TESS_LEVEL_OUTER
:
1232 info
->sv
[info
->numSysVals
].patch
= 1;
1234 case SYSTEM_VALUE_VERTEX_ID
:
1235 info
->io
.vertexId
= info
->numSysVals
;
1241 info
->numSysVals
+= 1;
1244 if (info
->io
.genUserClip
> 0) {
1245 info
->io
.clipDistances
= info
->io
.genUserClip
;
1247 const unsigned int nOut
= (info
->io
.genUserClip
+ 3) / 4;
1249 for (unsigned int n
= 0; n
< nOut
; ++n
) {
1250 unsigned int i
= info
->numOutputs
++;
1251 info
->out
[i
].id
= i
;
1252 info
->out
[i
].sn
= TGSI_SEMANTIC_CLIPDIST
;
1253 info
->out
[i
].si
= n
;
1254 info
->out
[i
].mask
= ((1 << info
->io
.clipDistances
) - 1) >> (n
* 4);
1258 return info
->assignSlots(info
) == 0;
1262 Converter::getSlotAddress(nir_intrinsic_instr
*insn
, uint8_t idx
, uint8_t slot
)
1265 int offset
= nir_intrinsic_component(insn
);
1268 if (nir_intrinsic_infos
[insn
->intrinsic
].has_dest
)
1269 ty
= getDType(insn
);
1271 ty
= getSType(insn
->src
[0], false, false);
1273 switch (insn
->intrinsic
) {
1274 case nir_intrinsic_load_input
:
1275 case nir_intrinsic_load_interpolated_input
:
1276 case nir_intrinsic_load_per_vertex_input
:
1279 case nir_intrinsic_load_output
:
1280 case nir_intrinsic_load_per_vertex_output
:
1281 case nir_intrinsic_store_output
:
1282 case nir_intrinsic_store_per_vertex_output
:
1286 ERROR("unknown intrinsic in getSlotAddress %s",
1287 nir_intrinsic_infos
[insn
->intrinsic
].name
);
1293 if (typeSizeof(ty
) == 8) {
1305 assert(!input
|| idx
< PIPE_MAX_SHADER_INPUTS
);
1306 assert(input
|| idx
< PIPE_MAX_SHADER_OUTPUTS
);
1308 const nv50_ir_varying
*vary
= input
? info
->in
: info
->out
;
1309 return vary
[idx
].slot
[slot
] * 4;
1313 Converter::loadFrom(DataFile file
, uint8_t i
, DataType ty
, Value
*def
,
1314 uint32_t base
, uint8_t c
, Value
*indirect0
,
1315 Value
*indirect1
, bool patch
)
1317 unsigned int tySize
= typeSizeof(ty
);
1320 (file
== FILE_MEMORY_CONST
|| file
== FILE_MEMORY_BUFFER
|| indirect0
)) {
1321 Value
*lo
= getSSA();
1322 Value
*hi
= getSSA();
1325 mkLoad(TYPE_U32
, lo
,
1326 mkSymbol(file
, i
, TYPE_U32
, base
+ c
* tySize
),
1328 loi
->setIndirect(0, 1, indirect1
);
1329 loi
->perPatch
= patch
;
1332 mkLoad(TYPE_U32
, hi
,
1333 mkSymbol(file
, i
, TYPE_U32
, base
+ c
* tySize
+ 4),
1335 hii
->setIndirect(0, 1, indirect1
);
1336 hii
->perPatch
= patch
;
1338 return mkOp2(OP_MERGE
, ty
, def
, lo
, hi
);
1341 mkLoad(ty
, def
, mkSymbol(file
, i
, ty
, base
+ c
* tySize
), indirect0
);
1342 ld
->setIndirect(0, 1, indirect1
);
1343 ld
->perPatch
= patch
;
1349 Converter::storeTo(nir_intrinsic_instr
*insn
, DataFile file
, operation op
,
1350 DataType ty
, Value
*src
, uint8_t idx
, uint8_t c
,
1351 Value
*indirect0
, Value
*indirect1
)
1353 uint8_t size
= typeSizeof(ty
);
1354 uint32_t address
= getSlotAddress(insn
, idx
, c
);
1356 if (size
== 8 && indirect0
) {
1358 mkSplit(split
, 4, src
);
1360 if (op
== OP_EXPORT
) {
1361 split
[0] = mkMov(getSSA(), split
[0], ty
)->getDef(0);
1362 split
[1] = mkMov(getSSA(), split
[1], ty
)->getDef(0);
1365 mkStore(op
, TYPE_U32
, mkSymbol(file
, 0, TYPE_U32
, address
), indirect0
,
1366 split
[0])->perPatch
= info
->out
[idx
].patch
;
1367 mkStore(op
, TYPE_U32
, mkSymbol(file
, 0, TYPE_U32
, address
+ 4), indirect0
,
1368 split
[1])->perPatch
= info
->out
[idx
].patch
;
1370 if (op
== OP_EXPORT
)
1371 src
= mkMov(getSSA(size
), src
, ty
)->getDef(0);
1372 mkStore(op
, ty
, mkSymbol(file
, 0, ty
, address
), indirect0
,
1373 src
)->perPatch
= info
->out
[idx
].patch
;
1378 Converter::parseNIR()
1380 info
->bin
.tlsSpace
= 0;
1381 info
->io
.clipDistances
= nir
->info
.clip_distance_array_size
;
1382 info
->io
.cullDistances
= nir
->info
.cull_distance_array_size
;
1384 switch(prog
->getType()) {
1385 case Program::TYPE_COMPUTE
:
1386 info
->prop
.cp
.numThreads
[0] = nir
->info
.cs
.local_size
[0];
1387 info
->prop
.cp
.numThreads
[1] = nir
->info
.cs
.local_size
[1];
1388 info
->prop
.cp
.numThreads
[2] = nir
->info
.cs
.local_size
[2];
1389 info
->bin
.smemSize
= nir
->info
.cs
.shared_size
;
1391 case Program::TYPE_FRAGMENT
:
1392 info
->prop
.fp
.earlyFragTests
= nir
->info
.fs
.early_fragment_tests
;
1393 info
->prop
.fp
.persampleInvocation
=
1394 (nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_ID
) ||
1395 (nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_POS
);
1396 info
->prop
.fp
.postDepthCoverage
= nir
->info
.fs
.post_depth_coverage
;
1397 info
->prop
.fp
.readsSampleLocations
=
1398 (nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_POS
);
1399 info
->prop
.fp
.usesDiscard
= nir
->info
.fs
.uses_discard
;
1400 info
->prop
.fp
.usesSampleMaskIn
=
1401 !!(nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_MASK_IN
);
1403 case Program::TYPE_GEOMETRY
:
1404 info
->prop
.gp
.inputPrim
= nir
->info
.gs
.input_primitive
;
1405 info
->prop
.gp
.instanceCount
= nir
->info
.gs
.invocations
;
1406 info
->prop
.gp
.maxVertices
= nir
->info
.gs
.vertices_out
;
1407 info
->prop
.gp
.outputPrim
= nir
->info
.gs
.output_primitive
;
1409 case Program::TYPE_TESSELLATION_CONTROL
:
1410 case Program::TYPE_TESSELLATION_EVAL
:
1411 if (nir
->info
.tess
.primitive_mode
== GL_ISOLINES
)
1412 info
->prop
.tp
.domain
= GL_LINES
;
1414 info
->prop
.tp
.domain
= nir
->info
.tess
.primitive_mode
;
1415 info
->prop
.tp
.outputPatchSize
= nir
->info
.tess
.tcs_vertices_out
;
1416 info
->prop
.tp
.outputPrim
=
1417 nir
->info
.tess
.point_mode
? PIPE_PRIM_POINTS
: PIPE_PRIM_TRIANGLES
;
1418 info
->prop
.tp
.partitioning
= (nir
->info
.tess
.spacing
+ 1) % 3;
1419 info
->prop
.tp
.winding
= !nir
->info
.tess
.ccw
;
1421 case Program::TYPE_VERTEX
:
1422 info
->prop
.vp
.usesDrawParameters
=
1423 (nir
->info
.system_values_read
& BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX
)) ||
1424 (nir
->info
.system_values_read
& BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE
)) ||
1425 (nir
->info
.system_values_read
& BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID
));
1435 Converter::visit(nir_function
*function
)
1437 // we only support emiting the main function for now
1438 assert(!strcmp(function
->name
, "main"));
1439 assert(function
->impl
);
1441 // usually the blocks will set everything up, but main is special
1442 BasicBlock
*entry
= new BasicBlock(prog
->main
);
1443 exit
= new BasicBlock(prog
->main
);
1444 blocks
[nir_start_block(function
->impl
)->index
] = entry
;
1445 prog
->main
->setEntry(entry
);
1446 prog
->main
->setExit(exit
);
1448 setPosition(entry
, true);
1450 if (info
->io
.genUserClip
> 0) {
1451 for (int c
= 0; c
< 4; ++c
)
1452 clipVtx
[c
] = getScratch();
1455 switch (prog
->getType()) {
1456 case Program::TYPE_TESSELLATION_CONTROL
:
1458 OP_SUB
, TYPE_U32
, getSSA(),
1459 mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_LANEID
, 0)),
1460 mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_INVOCATION_ID
, 0)));
1462 case Program::TYPE_FRAGMENT
: {
1463 Symbol
*sv
= mkSysVal(SV_POSITION
, 3);
1464 fragCoord
[3] = mkOp1v(OP_RDSV
, TYPE_F32
, getSSA(), sv
);
1465 fp
.position
= mkOp1v(OP_RCP
, TYPE_F32
, fragCoord
[3], fragCoord
[3]);
1472 nir_foreach_register(reg
, &function
->impl
->registers
) {
1473 if (reg
->num_array_elems
) {
1474 // TODO: packed variables would be nice, but MemoryOpt fails
1475 // replace 4 with reg->num_components
1476 uint32_t size
= 4 * reg
->num_array_elems
* (reg
->bit_size
/ 8);
1477 regToLmemOffset
[reg
->index
] = info
->bin
.tlsSpace
;
1478 info
->bin
.tlsSpace
+= size
;
1482 nir_index_ssa_defs(function
->impl
);
1483 foreach_list_typed(nir_cf_node
, node
, node
, &function
->impl
->body
) {
1488 bb
->cfg
.attach(&exit
->cfg
, Graph::Edge::TREE
);
1489 setPosition(exit
, true);
1491 if (info
->io
.genUserClip
> 0)
1492 handleUserClipPlanes();
1494 // TODO: for non main function this needs to be a OP_RETURN
1495 mkOp(OP_EXIT
, TYPE_NONE
, NULL
)->terminator
= 1;
1500 Converter::visit(nir_cf_node
*node
)
1502 switch (node
->type
) {
1503 case nir_cf_node_block
:
1504 return visit(nir_cf_node_as_block(node
));
1505 case nir_cf_node_if
:
1506 return visit(nir_cf_node_as_if(node
));
1507 case nir_cf_node_loop
:
1508 return visit(nir_cf_node_as_loop(node
));
1510 ERROR("unknown nir_cf_node type %u\n", node
->type
);
1516 Converter::visit(nir_block
*block
)
1518 if (!block
->predecessors
->entries
&& block
->instr_list
.is_empty())
1521 BasicBlock
*bb
= convert(block
);
1523 setPosition(bb
, true);
1524 nir_foreach_instr(insn
, block
) {
1532 Converter::visit(nir_if
*nif
)
1534 DataType sType
= getSType(nif
->condition
, false, false);
1535 Value
*src
= getSrc(&nif
->condition
, 0);
1537 nir_block
*lastThen
= nir_if_last_then_block(nif
);
1538 nir_block
*lastElse
= nir_if_last_else_block(nif
);
1540 assert(!lastThen
->successors
[1]);
1541 assert(!lastElse
->successors
[1]);
1543 BasicBlock
*ifBB
= convert(nir_if_first_then_block(nif
));
1544 BasicBlock
*elseBB
= convert(nir_if_first_else_block(nif
));
1546 bb
->cfg
.attach(&ifBB
->cfg
, Graph::Edge::TREE
);
1547 bb
->cfg
.attach(&elseBB
->cfg
, Graph::Edge::TREE
);
1549 // we only insert joinats, if both nodes end up at the end of the if again.
1550 // the reason for this to not happens are breaks/continues/ret/... which
1551 // have their own handling
1552 if (lastThen
->successors
[0] == lastElse
->successors
[0])
1553 bb
->joinAt
= mkFlow(OP_JOINAT
, convert(lastThen
->successors
[0]),
1556 mkFlow(OP_BRA
, elseBB
, CC_EQ
, src
)->setType(sType
);
1558 foreach_list_typed(nir_cf_node
, node
, node
, &nif
->then_list
) {
1562 setPosition(convert(lastThen
), true);
1563 if (!bb
->getExit() ||
1564 !bb
->getExit()->asFlow() ||
1565 bb
->getExit()->asFlow()->op
== OP_JOIN
) {
1566 BasicBlock
*tailBB
= convert(lastThen
->successors
[0]);
1567 mkFlow(OP_BRA
, tailBB
, CC_ALWAYS
, NULL
);
1568 bb
->cfg
.attach(&tailBB
->cfg
, Graph::Edge::FORWARD
);
1571 foreach_list_typed(nir_cf_node
, node
, node
, &nif
->else_list
) {
1575 setPosition(convert(lastElse
), true);
1576 if (!bb
->getExit() ||
1577 !bb
->getExit()->asFlow() ||
1578 bb
->getExit()->asFlow()->op
== OP_JOIN
) {
1579 BasicBlock
*tailBB
= convert(lastElse
->successors
[0]);
1580 mkFlow(OP_BRA
, tailBB
, CC_ALWAYS
, NULL
);
1581 bb
->cfg
.attach(&tailBB
->cfg
, Graph::Edge::FORWARD
);
1584 if (lastThen
->successors
[0] == lastElse
->successors
[0]) {
1585 setPosition(convert(lastThen
->successors
[0]), true);
1586 mkFlow(OP_JOIN
, NULL
, CC_ALWAYS
, NULL
)->fixed
= 1;
1593 Converter::visit(nir_loop
*loop
)
1596 func
->loopNestingBound
= std::max(func
->loopNestingBound
, curLoopDepth
);
1598 BasicBlock
*loopBB
= convert(nir_loop_first_block(loop
));
1599 BasicBlock
*tailBB
=
1600 convert(nir_cf_node_as_block(nir_cf_node_next(&loop
->cf_node
)));
1601 bb
->cfg
.attach(&loopBB
->cfg
, Graph::Edge::TREE
);
1603 mkFlow(OP_PREBREAK
, tailBB
, CC_ALWAYS
, NULL
);
1604 setPosition(loopBB
, false);
1605 mkFlow(OP_PRECONT
, loopBB
, CC_ALWAYS
, NULL
);
1607 foreach_list_typed(nir_cf_node
, node
, node
, &loop
->body
) {
1611 Instruction
*insn
= bb
->getExit();
1612 if (bb
->cfg
.incidentCount() != 0) {
1613 if (!insn
|| !insn
->asFlow()) {
1614 mkFlow(OP_CONT
, loopBB
, CC_ALWAYS
, NULL
);
1615 bb
->cfg
.attach(&loopBB
->cfg
, Graph::Edge::BACK
);
1616 } else if (insn
&& insn
->op
== OP_BRA
&& !insn
->getPredicate() &&
1617 tailBB
->cfg
.incidentCount() == 0) {
1618 // RA doesn't like having blocks around with no incident edge,
1619 // so we create a fake one to make it happy
1620 bb
->cfg
.attach(&tailBB
->cfg
, Graph::Edge::TREE
);
1630 Converter::visit(nir_instr
*insn
)
1632 switch (insn
->type
) {
1633 case nir_instr_type_alu
:
1634 return visit(nir_instr_as_alu(insn
));
1635 case nir_instr_type_intrinsic
:
1636 return visit(nir_instr_as_intrinsic(insn
));
1637 case nir_instr_type_jump
:
1638 return visit(nir_instr_as_jump(insn
));
1639 case nir_instr_type_load_const
:
1640 return visit(nir_instr_as_load_const(insn
));
1641 case nir_instr_type_ssa_undef
:
1642 return visit(nir_instr_as_ssa_undef(insn
));
1643 case nir_instr_type_tex
:
1644 return visit(nir_instr_as_tex(insn
));
1646 ERROR("unknown nir_instr type %u\n", insn
->type
);
1653 Converter::convert(nir_intrinsic_op intr
)
1656 case nir_intrinsic_load_base_vertex
:
1657 return SV_BASEVERTEX
;
1658 case nir_intrinsic_load_base_instance
:
1659 return SV_BASEINSTANCE
;
1660 case nir_intrinsic_load_draw_id
:
1662 case nir_intrinsic_load_front_face
:
1664 case nir_intrinsic_load_helper_invocation
:
1665 return SV_THREAD_KILL
;
1666 case nir_intrinsic_load_instance_id
:
1667 return SV_INSTANCE_ID
;
1668 case nir_intrinsic_load_invocation_id
:
1669 return SV_INVOCATION_ID
;
1670 case nir_intrinsic_load_local_group_size
:
1672 case nir_intrinsic_load_local_invocation_id
:
1674 case nir_intrinsic_load_num_work_groups
:
1676 case nir_intrinsic_load_patch_vertices_in
:
1677 return SV_VERTEX_COUNT
;
1678 case nir_intrinsic_load_primitive_id
:
1679 return SV_PRIMITIVE_ID
;
1680 case nir_intrinsic_load_sample_id
:
1681 return SV_SAMPLE_INDEX
;
1682 case nir_intrinsic_load_sample_mask_in
:
1683 return SV_SAMPLE_MASK
;
1684 case nir_intrinsic_load_sample_pos
:
1685 return SV_SAMPLE_POS
;
1686 case nir_intrinsic_load_subgroup_eq_mask
:
1687 return SV_LANEMASK_EQ
;
1688 case nir_intrinsic_load_subgroup_ge_mask
:
1689 return SV_LANEMASK_GE
;
1690 case nir_intrinsic_load_subgroup_gt_mask
:
1691 return SV_LANEMASK_GT
;
1692 case nir_intrinsic_load_subgroup_le_mask
:
1693 return SV_LANEMASK_LE
;
1694 case nir_intrinsic_load_subgroup_lt_mask
:
1695 return SV_LANEMASK_LT
;
1696 case nir_intrinsic_load_subgroup_invocation
:
1698 case nir_intrinsic_load_tess_coord
:
1699 return SV_TESS_COORD
;
1700 case nir_intrinsic_load_tess_level_inner
:
1701 return SV_TESS_INNER
;
1702 case nir_intrinsic_load_tess_level_outer
:
1703 return SV_TESS_OUTER
;
1704 case nir_intrinsic_load_vertex_id
:
1705 return SV_VERTEX_ID
;
1706 case nir_intrinsic_load_work_group_id
:
1709 ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1710 nir_intrinsic_infos
[intr
].name
);
1717 Converter::visit(nir_intrinsic_instr
*insn
)
1719 nir_intrinsic_op op
= insn
->intrinsic
;
1722 case nir_intrinsic_load_uniform
: {
1723 LValues
&newDefs
= convert(&insn
->dest
);
1724 const DataType dType
= getDType(insn
);
1726 uint32_t coffset
= getIndirect(insn
, 0, 0, indirect
);
1727 for (uint8_t i
= 0; i
< insn
->num_components
; ++i
) {
1728 loadFrom(FILE_MEMORY_CONST
, 0, dType
, newDefs
[i
], 16 * coffset
, i
, indirect
);
1732 case nir_intrinsic_store_output
:
1733 case nir_intrinsic_store_per_vertex_output
: {
1735 DataType dType
= getSType(insn
->src
[0], false, false);
1736 uint32_t idx
= getIndirect(insn
, op
== nir_intrinsic_store_output
? 1 : 2, 0, indirect
);
1738 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
1739 if (!((1u << i
) & nir_intrinsic_write_mask(insn
)))
1743 Value
*src
= getSrc(&insn
->src
[0], i
);
1744 switch (prog
->getType()) {
1745 case Program::TYPE_FRAGMENT
: {
1746 if (info
->out
[idx
].sn
== TGSI_SEMANTIC_POSITION
) {
1747 // TGSI uses a different interface than NIR, TGSI stores that
1748 // value in the z component, NIR in X
1750 src
= mkOp1v(OP_SAT
, TYPE_F32
, getScratch(), src
);
1754 case Program::TYPE_VERTEX
: {
1755 if (info
->io
.genUserClip
> 0 && idx
== clipVertexOutput
) {
1756 mkMov(clipVtx
[i
], src
);
1765 storeTo(insn
, FILE_SHADER_OUTPUT
, OP_EXPORT
, dType
, src
, idx
, i
+ offset
, indirect
);
1769 case nir_intrinsic_load_input
:
1770 case nir_intrinsic_load_interpolated_input
:
1771 case nir_intrinsic_load_output
: {
1772 LValues
&newDefs
= convert(&insn
->dest
);
1775 if (prog
->getType() == Program::TYPE_FRAGMENT
&&
1776 op
== nir_intrinsic_load_output
) {
1777 std::vector
<Value
*> defs
, srcs
;
1780 srcs
.push_back(getSSA());
1781 srcs
.push_back(getSSA());
1782 Value
*x
= mkOp1v(OP_RDSV
, TYPE_F32
, getSSA(), mkSysVal(SV_POSITION
, 0));
1783 Value
*y
= mkOp1v(OP_RDSV
, TYPE_F32
, getSSA(), mkSysVal(SV_POSITION
, 1));
1784 mkCvt(OP_CVT
, TYPE_U32
, srcs
[0], TYPE_F32
, x
)->rnd
= ROUND_Z
;
1785 mkCvt(OP_CVT
, TYPE_U32
, srcs
[1], TYPE_F32
, y
)->rnd
= ROUND_Z
;
1787 srcs
.push_back(mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_LAYER
, 0)));
1788 srcs
.push_back(mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_SAMPLE_INDEX
, 0)));
1790 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
1791 defs
.push_back(newDefs
[i
]);
1795 TexInstruction
*texi
= mkTex(OP_TXF
, TEX_TARGET_2D_MS_ARRAY
, 0, 0, defs
, srcs
);
1796 texi
->tex
.levelZero
= 1;
1797 texi
->tex
.mask
= mask
;
1798 texi
->tex
.useOffsets
= 0;
1799 texi
->tex
.r
= 0xffff;
1800 texi
->tex
.s
= 0xffff;
1802 info
->prop
.fp
.readsFramebuffer
= true;
1806 const DataType dType
= getDType(insn
);
1808 bool input
= op
!= nir_intrinsic_load_output
;
1812 uint32_t idx
= getIndirect(insn
, op
== nir_intrinsic_load_interpolated_input
? 1 : 0, 0, indirect
);
1813 nv50_ir_varying
& vary
= input
? info
->in
[idx
] : info
->out
[idx
];
1815 // see load_barycentric_* handling
1816 if (prog
->getType() == Program::TYPE_FRAGMENT
) {
1817 mode
= translateInterpMode(&vary
, nvirOp
);
1818 if (op
== nir_intrinsic_load_interpolated_input
) {
1819 ImmediateValue immMode
;
1820 if (getSrc(&insn
->src
[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode
))
1821 mode
|= immMode
.reg
.data
.u32
;
1825 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
1826 uint32_t address
= getSlotAddress(insn
, idx
, i
);
1827 Symbol
*sym
= mkSymbol(input
? FILE_SHADER_INPUT
: FILE_SHADER_OUTPUT
, 0, dType
, address
);
1828 if (prog
->getType() == Program::TYPE_FRAGMENT
) {
1830 if (typeSizeof(dType
) == 8) {
1831 Value
*lo
= getSSA();
1832 Value
*hi
= getSSA();
1833 Instruction
*interp
;
1835 interp
= mkOp1(nvirOp
, TYPE_U32
, lo
, sym
);
1836 if (nvirOp
== OP_PINTERP
)
1837 interp
->setSrc(s
++, fp
.position
);
1838 if (mode
& NV50_IR_INTERP_OFFSET
)
1839 interp
->setSrc(s
++, getSrc(&insn
->src
[0], 0));
1840 interp
->setInterpolate(mode
);
1841 interp
->setIndirect(0, 0, indirect
);
1843 Symbol
*sym1
= mkSymbol(input
? FILE_SHADER_INPUT
: FILE_SHADER_OUTPUT
, 0, dType
, address
+ 4);
1844 interp
= mkOp1(nvirOp
, TYPE_U32
, hi
, sym1
);
1845 if (nvirOp
== OP_PINTERP
)
1846 interp
->setSrc(s
++, fp
.position
);
1847 if (mode
& NV50_IR_INTERP_OFFSET
)
1848 interp
->setSrc(s
++, getSrc(&insn
->src
[0], 0));
1849 interp
->setInterpolate(mode
);
1850 interp
->setIndirect(0, 0, indirect
);
1852 mkOp2(OP_MERGE
, dType
, newDefs
[i
], lo
, hi
);
1854 Instruction
*interp
= mkOp1(nvirOp
, dType
, newDefs
[i
], sym
);
1855 if (nvirOp
== OP_PINTERP
)
1856 interp
->setSrc(s
++, fp
.position
);
1857 if (mode
& NV50_IR_INTERP_OFFSET
)
1858 interp
->setSrc(s
++, getSrc(&insn
->src
[0], 0));
1859 interp
->setInterpolate(mode
);
1860 interp
->setIndirect(0, 0, indirect
);
1863 mkLoad(dType
, newDefs
[i
], sym
, indirect
)->perPatch
= vary
.patch
;
1868 case nir_intrinsic_load_barycentric_at_offset
:
1869 case nir_intrinsic_load_barycentric_at_sample
:
1870 case nir_intrinsic_load_barycentric_centroid
:
1871 case nir_intrinsic_load_barycentric_pixel
:
1872 case nir_intrinsic_load_barycentric_sample
: {
1873 LValues
&newDefs
= convert(&insn
->dest
);
1876 if (op
== nir_intrinsic_load_barycentric_centroid
||
1877 op
== nir_intrinsic_load_barycentric_sample
) {
1878 mode
= NV50_IR_INTERP_CENTROID
;
1879 } else if (op
== nir_intrinsic_load_barycentric_at_offset
) {
1881 for (uint8_t c
= 0; c
< 2; c
++) {
1882 offs
[c
] = getScratch();
1883 mkOp2(OP_MIN
, TYPE_F32
, offs
[c
], getSrc(&insn
->src
[0], c
), loadImm(NULL
, 0.4375f
));
1884 mkOp2(OP_MAX
, TYPE_F32
, offs
[c
], offs
[c
], loadImm(NULL
, -0.5f
));
1885 mkOp2(OP_MUL
, TYPE_F32
, offs
[c
], offs
[c
], loadImm(NULL
, 4096.0f
));
1886 mkCvt(OP_CVT
, TYPE_S32
, offs
[c
], TYPE_F32
, offs
[c
]);
1888 mkOp3v(OP_INSBF
, TYPE_U32
, newDefs
[0], offs
[1], mkImm(0x1010), offs
[0]);
1890 mode
= NV50_IR_INTERP_OFFSET
;
1891 } else if (op
== nir_intrinsic_load_barycentric_pixel
) {
1892 mode
= NV50_IR_INTERP_DEFAULT
;
1893 } else if (op
== nir_intrinsic_load_barycentric_at_sample
) {
1894 info
->prop
.fp
.readsSampleLocations
= true;
1895 mkOp1(OP_PIXLD
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0], 0))->subOp
= NV50_IR_SUBOP_PIXLD_OFFSET
;
1896 mode
= NV50_IR_INTERP_OFFSET
;
1898 unreachable("all intrinsics already handled above");
1901 loadImm(newDefs
[1], mode
);
1904 case nir_intrinsic_discard
:
1905 mkOp(OP_DISCARD
, TYPE_NONE
, NULL
);
1907 case nir_intrinsic_discard_if
: {
1908 Value
*pred
= getSSA(1, FILE_PREDICATE
);
1909 if (insn
->num_components
> 1) {
1910 ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
1914 mkCmp(OP_SET
, CC_NE
, TYPE_U8
, pred
, TYPE_U32
, getSrc(&insn
->src
[0], 0), zero
);
1915 mkOp(OP_DISCARD
, TYPE_NONE
, NULL
)->setPredicate(CC_P
, pred
);
1918 case nir_intrinsic_load_base_vertex
:
1919 case nir_intrinsic_load_base_instance
:
1920 case nir_intrinsic_load_draw_id
:
1921 case nir_intrinsic_load_front_face
:
1922 case nir_intrinsic_load_helper_invocation
:
1923 case nir_intrinsic_load_instance_id
:
1924 case nir_intrinsic_load_invocation_id
:
1925 case nir_intrinsic_load_local_group_size
:
1926 case nir_intrinsic_load_local_invocation_id
:
1927 case nir_intrinsic_load_num_work_groups
:
1928 case nir_intrinsic_load_patch_vertices_in
:
1929 case nir_intrinsic_load_primitive_id
:
1930 case nir_intrinsic_load_sample_id
:
1931 case nir_intrinsic_load_sample_mask_in
:
1932 case nir_intrinsic_load_sample_pos
:
1933 case nir_intrinsic_load_subgroup_eq_mask
:
1934 case nir_intrinsic_load_subgroup_ge_mask
:
1935 case nir_intrinsic_load_subgroup_gt_mask
:
1936 case nir_intrinsic_load_subgroup_le_mask
:
1937 case nir_intrinsic_load_subgroup_lt_mask
:
1938 case nir_intrinsic_load_subgroup_invocation
:
1939 case nir_intrinsic_load_tess_coord
:
1940 case nir_intrinsic_load_tess_level_inner
:
1941 case nir_intrinsic_load_tess_level_outer
:
1942 case nir_intrinsic_load_vertex_id
:
1943 case nir_intrinsic_load_work_group_id
: {
1944 const DataType dType
= getDType(insn
);
1945 SVSemantic sv
= convert(op
);
1946 LValues
&newDefs
= convert(&insn
->dest
);
1948 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
1950 if (typeSizeof(dType
) == 8)
1955 if (sv
== SV_TID
&& info
->prop
.cp
.numThreads
[i
] == 1) {
1958 Symbol
*sym
= mkSysVal(sv
, i
);
1959 Instruction
*rdsv
= mkOp1(OP_RDSV
, TYPE_U32
, def
, sym
);
1960 if (sv
== SV_TESS_OUTER
|| sv
== SV_TESS_INNER
)
1964 if (typeSizeof(dType
) == 8)
1965 mkOp2(OP_MERGE
, dType
, newDefs
[i
], def
, loadImm(getSSA(), 0u));
1970 case nir_intrinsic_load_subgroup_size
: {
1971 LValues
&newDefs
= convert(&insn
->dest
);
1972 loadImm(newDefs
[0], 32u);
1975 case nir_intrinsic_vote_all
:
1976 case nir_intrinsic_vote_any
:
1977 case nir_intrinsic_vote_ieq
: {
1978 LValues
&newDefs
= convert(&insn
->dest
);
1979 Value
*pred
= getScratch(1, FILE_PREDICATE
);
1980 mkCmp(OP_SET
, CC_NE
, TYPE_U32
, pred
, TYPE_U32
, getSrc(&insn
->src
[0], 0), zero
);
1981 mkOp1(OP_VOTE
, TYPE_U32
, pred
, pred
)->subOp
= getSubOp(op
);
1982 mkCvt(OP_CVT
, TYPE_U32
, newDefs
[0], TYPE_U8
, pred
);
1985 case nir_intrinsic_ballot
: {
1986 LValues
&newDefs
= convert(&insn
->dest
);
1987 Value
*pred
= getSSA(1, FILE_PREDICATE
);
1988 mkCmp(OP_SET
, CC_NE
, TYPE_U32
, pred
, TYPE_U32
, getSrc(&insn
->src
[0], 0), zero
);
1989 mkOp1(OP_VOTE
, TYPE_U32
, newDefs
[0], pred
)->subOp
= NV50_IR_SUBOP_VOTE_ANY
;
1992 case nir_intrinsic_read_first_invocation
:
1993 case nir_intrinsic_read_invocation
: {
1994 LValues
&newDefs
= convert(&insn
->dest
);
1995 const DataType dType
= getDType(insn
);
1996 Value
*tmp
= getScratch();
1998 if (op
== nir_intrinsic_read_first_invocation
) {
1999 mkOp1(OP_VOTE
, TYPE_U32
, tmp
, mkImm(1))->subOp
= NV50_IR_SUBOP_VOTE_ANY
;
2000 mkOp2(OP_EXTBF
, TYPE_U32
, tmp
, tmp
, mkImm(0x2000))->subOp
= NV50_IR_SUBOP_EXTBF_REV
;
2001 mkOp1(OP_BFIND
, TYPE_U32
, tmp
, tmp
)->subOp
= NV50_IR_SUBOP_BFIND_SAMT
;
2003 tmp
= getSrc(&insn
->src
[1], 0);
2005 for (uint8_t i
= 0; i
< insn
->num_components
; ++i
) {
2006 mkOp3(OP_SHFL
, dType
, newDefs
[i
], getSrc(&insn
->src
[0], i
), tmp
, mkImm(0x1f))
2007 ->subOp
= NV50_IR_SUBOP_SHFL_IDX
;
2011 case nir_intrinsic_load_per_vertex_input
: {
2012 const DataType dType
= getDType(insn
);
2013 LValues
&newDefs
= convert(&insn
->dest
);
2014 Value
*indirectVertex
;
2015 Value
*indirectOffset
;
2016 uint32_t baseVertex
= getIndirect(&insn
->src
[0], 0, indirectVertex
);
2017 uint32_t idx
= getIndirect(insn
, 1, 0, indirectOffset
);
2019 Value
*vtxBase
= mkOp2v(OP_PFETCH
, TYPE_U32
, getSSA(4, FILE_ADDRESS
),
2020 mkImm(baseVertex
), indirectVertex
);
2021 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
2022 uint32_t address
= getSlotAddress(insn
, idx
, i
);
2023 loadFrom(FILE_SHADER_INPUT
, 0, dType
, newDefs
[i
], address
, 0,
2024 indirectOffset
, vtxBase
, info
->in
[idx
].patch
);
2028 case nir_intrinsic_emit_vertex
:
2029 case nir_intrinsic_end_primitive
: {
2030 uint32_t idx
= nir_intrinsic_stream_id(insn
);
2031 mkOp1(getOperation(op
), TYPE_U32
, NULL
, mkImm(idx
))->fixed
= 1;
2034 case nir_intrinsic_load_ubo
: {
2035 const DataType dType
= getDType(insn
);
2036 LValues
&newDefs
= convert(&insn
->dest
);
2037 Value
*indirectIndex
;
2038 Value
*indirectOffset
;
2039 uint32_t index
= getIndirect(&insn
->src
[0], 0, indirectIndex
) + 1;
2040 uint32_t offset
= getIndirect(&insn
->src
[1], 0, indirectOffset
);
2042 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
2043 loadFrom(FILE_MEMORY_CONST
, index
, dType
, newDefs
[i
], offset
, i
,
2044 indirectOffset
, indirectIndex
);
2048 case nir_intrinsic_get_buffer_size
: {
2049 LValues
&newDefs
= convert(&insn
->dest
);
2050 const DataType dType
= getDType(insn
);
2051 Value
*indirectBuffer
;
2052 uint32_t buffer
= getIndirect(&insn
->src
[0], 0, indirectBuffer
);
2054 Symbol
*sym
= mkSymbol(FILE_MEMORY_BUFFER
, buffer
, dType
, 0);
2055 mkOp1(OP_BUFQ
, dType
, newDefs
[0], sym
)->setIndirect(0, 0, indirectBuffer
);
2058 case nir_intrinsic_store_ssbo
: {
2059 DataType sType
= getSType(insn
->src
[0], false, false);
2060 Value
*indirectBuffer
;
2061 Value
*indirectOffset
;
2062 uint32_t buffer
= getIndirect(&insn
->src
[1], 0, indirectBuffer
);
2063 uint32_t offset
= getIndirect(&insn
->src
[2], 0, indirectOffset
);
2065 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
2066 if (!((1u << i
) & nir_intrinsic_write_mask(insn
)))
2068 Symbol
*sym
= mkSymbol(FILE_MEMORY_BUFFER
, buffer
, sType
,
2069 offset
+ i
* typeSizeof(sType
));
2070 mkStore(OP_STORE
, sType
, sym
, indirectOffset
, getSrc(&insn
->src
[0], i
))
2071 ->setIndirect(0, 1, indirectBuffer
);
2073 info
->io
.globalAccess
|= 0x2;
2076 case nir_intrinsic_load_ssbo
: {
2077 const DataType dType
= getDType(insn
);
2078 LValues
&newDefs
= convert(&insn
->dest
);
2079 Value
*indirectBuffer
;
2080 Value
*indirectOffset
;
2081 uint32_t buffer
= getIndirect(&insn
->src
[0], 0, indirectBuffer
);
2082 uint32_t offset
= getIndirect(&insn
->src
[1], 0, indirectOffset
);
2084 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
)
2085 loadFrom(FILE_MEMORY_BUFFER
, buffer
, dType
, newDefs
[i
], offset
, i
,
2086 indirectOffset
, indirectBuffer
);
2088 info
->io
.globalAccess
|= 0x1;
2091 case nir_intrinsic_ssbo_atomic_add
:
2092 case nir_intrinsic_ssbo_atomic_and
:
2093 case nir_intrinsic_ssbo_atomic_comp_swap
:
2094 case nir_intrinsic_ssbo_atomic_exchange
:
2095 case nir_intrinsic_ssbo_atomic_or
:
2096 case nir_intrinsic_ssbo_atomic_imax
:
2097 case nir_intrinsic_ssbo_atomic_imin
:
2098 case nir_intrinsic_ssbo_atomic_umax
:
2099 case nir_intrinsic_ssbo_atomic_umin
:
2100 case nir_intrinsic_ssbo_atomic_xor
: {
2101 const DataType dType
= getDType(insn
);
2102 LValues
&newDefs
= convert(&insn
->dest
);
2103 Value
*indirectBuffer
;
2104 Value
*indirectOffset
;
2105 uint32_t buffer
= getIndirect(&insn
->src
[0], 0, indirectBuffer
);
2106 uint32_t offset
= getIndirect(&insn
->src
[1], 0, indirectOffset
);
2108 Symbol
*sym
= mkSymbol(FILE_MEMORY_BUFFER
, buffer
, dType
, offset
);
2109 Instruction
*atom
= mkOp2(OP_ATOM
, dType
, newDefs
[0], sym
,
2110 getSrc(&insn
->src
[2], 0));
2111 if (op
== nir_intrinsic_ssbo_atomic_comp_swap
)
2112 atom
->setSrc(2, getSrc(&insn
->src
[3], 0));
2113 atom
->setIndirect(0, 0, indirectOffset
);
2114 atom
->setIndirect(0, 1, indirectBuffer
);
2115 atom
->subOp
= getSubOp(op
);
2117 info
->io
.globalAccess
|= 0x2;
2121 ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos
[op
].name
);
2129 Converter::visit(nir_jump_instr
*insn
)
2131 switch (insn
->type
) {
2132 case nir_jump_return
:
2133 // TODO: this only works in the main function
2134 mkFlow(OP_BRA
, exit
, CC_ALWAYS
, NULL
);
2135 bb
->cfg
.attach(&exit
->cfg
, Graph::Edge::CROSS
);
2137 case nir_jump_break
:
2138 case nir_jump_continue
: {
2139 bool isBreak
= insn
->type
== nir_jump_break
;
2140 nir_block
*block
= insn
->instr
.block
;
2141 assert(!block
->successors
[1]);
2142 BasicBlock
*target
= convert(block
->successors
[0]);
2143 mkFlow(isBreak
? OP_BREAK
: OP_CONT
, target
, CC_ALWAYS
, NULL
);
2144 bb
->cfg
.attach(&target
->cfg
, isBreak
? Graph::Edge::CROSS
: Graph::Edge::BACK
);
2148 ERROR("unknown nir_jump_type %u\n", insn
->type
);
2156 Converter::visit(nir_load_const_instr
*insn
)
2158 assert(insn
->def
.bit_size
<= 64);
2160 LValues
&newDefs
= convert(&insn
->def
);
2161 for (int i
= 0; i
< insn
->def
.num_components
; i
++) {
2162 switch (insn
->def
.bit_size
) {
2164 loadImm(newDefs
[i
], insn
->value
.u64
[i
]);
2167 loadImm(newDefs
[i
], insn
->value
.u32
[i
]);
2170 loadImm(newDefs
[i
], insn
->value
.u16
[i
]);
2173 loadImm(newDefs
[i
], insn
->value
.u8
[i
]);
2180 #define DEFAULT_CHECKS \
2181 if (insn->dest.dest.ssa.num_components > 1) { \
2182 ERROR("nir_alu_instr only supported with 1 component!\n"); \
2185 if (insn->dest.write_mask != 1) { \
2186 ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2190 Converter::visit(nir_alu_instr
*insn
)
2192 const nir_op op
= insn
->op
;
2193 const nir_op_info
&info
= nir_op_infos
[op
];
2194 DataType dType
= getDType(insn
);
2195 const std::vector
<DataType
> sTypes
= getSTypes(insn
);
2197 Instruction
*oldPos
= this->bb
->getExit();
2209 case nir_op_fddx_coarse
:
2210 case nir_op_fddx_fine
:
2212 case nir_op_fddy_coarse
:
2213 case nir_op_fddy_fine
:
2232 case nir_op_imul_high
:
2233 case nir_op_umul_high
:
2240 case nir_op_pack_64_2x32_split
:
2258 LValues
&newDefs
= convert(&insn
->dest
);
2259 operation preOp
= preOperationNeeded(op
);
2260 if (preOp
!= OP_NOP
) {
2261 assert(info
.num_inputs
< 2);
2262 Value
*tmp
= getSSA(typeSizeof(dType
));
2263 Instruction
*i0
= mkOp(preOp
, dType
, tmp
);
2264 Instruction
*i1
= mkOp(getOperation(op
), dType
, newDefs
[0]);
2265 if (info
.num_inputs
) {
2266 i0
->setSrc(0, getSrc(&insn
->src
[0]));
2269 i1
->subOp
= getSubOp(op
);
2271 Instruction
*i
= mkOp(getOperation(op
), dType
, newDefs
[0]);
2272 for (unsigned s
= 0u; s
< info
.num_inputs
; ++s
) {
2273 i
->setSrc(s
, getSrc(&insn
->src
[s
]));
2275 i
->subOp
= getSubOp(op
);
2279 case nir_op_ifind_msb
:
2280 case nir_op_ufind_msb
: {
2282 LValues
&newDefs
= convert(&insn
->dest
);
2284 mkOp1(getOperation(op
), dType
, newDefs
[0], getSrc(&insn
->src
[0]));
2287 case nir_op_fround_even
: {
2289 LValues
&newDefs
= convert(&insn
->dest
);
2290 mkCvt(OP_CVT
, dType
, newDefs
[0], dType
, getSrc(&insn
->src
[0]))->rnd
= ROUND_NI
;
2293 // convert instructions
2307 case nir_op_u2u64
: {
2309 LValues
&newDefs
= convert(&insn
->dest
);
2310 Instruction
*i
= mkOp1(getOperation(op
), dType
, newDefs
[0], getSrc(&insn
->src
[0]));
2311 if (op
== nir_op_f2i32
|| op
== nir_op_f2i64
|| op
== nir_op_f2u32
|| op
== nir_op_f2u64
)
2313 i
->sType
= sTypes
[0];
2316 // compare instructions
2326 case nir_op_ine32
: {
2328 LValues
&newDefs
= convert(&insn
->dest
);
2329 Instruction
*i
= mkCmp(getOperation(op
),
2334 getSrc(&insn
->src
[0]),
2335 getSrc(&insn
->src
[1]));
2336 if (info
.num_inputs
== 3)
2337 i
->setSrc(2, getSrc(&insn
->src
[2]));
2338 i
->sType
= sTypes
[0];
2341 // those are weird ALU ops and need special handling, because
2342 // 1. they are always componend based
2343 // 2. they basically just merge multiple values into one data type
2346 if (!insn
->dest
.dest
.is_ssa
&& insn
->dest
.dest
.reg
.reg
->num_array_elems
) {
2347 nir_reg_dest
& reg
= insn
->dest
.dest
.reg
;
2348 uint32_t goffset
= regToLmemOffset
[reg
.reg
->index
];
2349 uint8_t comps
= reg
.reg
->num_components
;
2350 uint8_t size
= reg
.reg
->bit_size
/ 8;
2351 uint8_t csize
= 4 * size
; // TODO after fixing MemoryOpts: comps * size;
2352 uint32_t aoffset
= csize
* reg
.base_offset
;
2353 Value
*indirect
= NULL
;
2356 indirect
= mkOp2v(OP_MUL
, TYPE_U32
, getSSA(4, FILE_ADDRESS
),
2357 getSrc(reg
.indirect
, 0), mkImm(csize
));
2359 for (uint8_t i
= 0u; i
< comps
; ++i
) {
2360 if (!((1u << i
) & insn
->dest
.write_mask
))
2363 Symbol
*sym
= mkSymbol(FILE_MEMORY_LOCAL
, 0, dType
, goffset
+ aoffset
+ i
* size
);
2364 mkStore(OP_STORE
, dType
, sym
, indirect
, getSrc(&insn
->src
[0], i
));
2367 } else if (!insn
->src
[0].src
.is_ssa
&& insn
->src
[0].src
.reg
.reg
->num_array_elems
) {
2368 LValues
&newDefs
= convert(&insn
->dest
);
2369 nir_reg_src
& reg
= insn
->src
[0].src
.reg
;
2370 uint32_t goffset
= regToLmemOffset
[reg
.reg
->index
];
2371 // uint8_t comps = reg.reg->num_components;
2372 uint8_t size
= reg
.reg
->bit_size
/ 8;
2373 uint8_t csize
= 4 * size
; // TODO after fixing MemoryOpts: comps * size;
2374 uint32_t aoffset
= csize
* reg
.base_offset
;
2375 Value
*indirect
= NULL
;
2378 indirect
= mkOp2v(OP_MUL
, TYPE_U32
, getSSA(4, FILE_ADDRESS
), getSrc(reg
.indirect
, 0), mkImm(csize
));
2380 for (uint8_t i
= 0u; i
< newDefs
.size(); ++i
)
2381 loadFrom(FILE_MEMORY_LOCAL
, 0, dType
, newDefs
[i
], goffset
+ aoffset
, i
, indirect
);
2385 LValues
&newDefs
= convert(&insn
->dest
);
2386 for (LValues::size_type c
= 0u; c
< newDefs
.size(); ++c
) {
2387 mkMov(newDefs
[c
], getSrc(&insn
->src
[0], c
), dType
);
2394 LValues
&newDefs
= convert(&insn
->dest
);
2395 for (LValues::size_type c
= 0u; c
< newDefs
.size(); ++c
) {
2396 mkMov(newDefs
[c
], getSrc(&insn
->src
[c
]), dType
);
2401 case nir_op_pack_64_2x32
: {
2402 LValues
&newDefs
= convert(&insn
->dest
);
2403 Instruction
*merge
= mkOp(OP_MERGE
, dType
, newDefs
[0]);
2404 merge
->setSrc(0, getSrc(&insn
->src
[0], 0));
2405 merge
->setSrc(1, getSrc(&insn
->src
[0], 1));
2408 case nir_op_pack_half_2x16_split
: {
2409 LValues
&newDefs
= convert(&insn
->dest
);
2410 Value
*tmpH
= getSSA();
2411 Value
*tmpL
= getSSA();
2413 mkCvt(OP_CVT
, TYPE_F16
, tmpL
, TYPE_F32
, getSrc(&insn
->src
[0]));
2414 mkCvt(OP_CVT
, TYPE_F16
, tmpH
, TYPE_F32
, getSrc(&insn
->src
[1]));
2415 mkOp3(OP_INSBF
, TYPE_U32
, newDefs
[0], tmpH
, mkImm(0x1010), tmpL
);
2418 case nir_op_unpack_half_2x16_split_x
:
2419 case nir_op_unpack_half_2x16_split_y
: {
2420 LValues
&newDefs
= convert(&insn
->dest
);
2421 Instruction
*cvt
= mkCvt(OP_CVT
, TYPE_F32
, newDefs
[0], TYPE_F16
, getSrc(&insn
->src
[0]));
2422 if (op
== nir_op_unpack_half_2x16_split_y
)
2426 case nir_op_unpack_64_2x32
: {
2427 LValues
&newDefs
= convert(&insn
->dest
);
2428 mkOp1(OP_SPLIT
, dType
, newDefs
[0], getSrc(&insn
->src
[0]))->setDef(1, newDefs
[1]);
2431 case nir_op_unpack_64_2x32_split_x
: {
2432 LValues
&newDefs
= convert(&insn
->dest
);
2433 mkOp1(OP_SPLIT
, dType
, newDefs
[0], getSrc(&insn
->src
[0]))->setDef(1, getSSA());
2436 case nir_op_unpack_64_2x32_split_y
: {
2437 LValues
&newDefs
= convert(&insn
->dest
);
2438 mkOp1(OP_SPLIT
, dType
, getSSA(), getSrc(&insn
->src
[0]))->setDef(1, newDefs
[0]);
2441 // special instructions
2443 case nir_op_isign
: {
2446 if (::isFloatType(dType
))
2451 LValues
&newDefs
= convert(&insn
->dest
);
2452 LValue
*val0
= getScratch();
2453 LValue
*val1
= getScratch();
2454 mkCmp(OP_SET
, CC_GT
, iType
, val0
, dType
, getSrc(&insn
->src
[0]), zero
);
2455 mkCmp(OP_SET
, CC_LT
, iType
, val1
, dType
, getSrc(&insn
->src
[0]), zero
);
2457 if (dType
== TYPE_F64
) {
2458 mkOp2(OP_SUB
, iType
, val0
, val0
, val1
);
2459 mkCvt(OP_CVT
, TYPE_F64
, newDefs
[0], iType
, val0
);
2460 } else if (dType
== TYPE_S64
|| dType
== TYPE_U64
) {
2461 mkOp2(OP_SUB
, iType
, val0
, val1
, val0
);
2462 mkOp2(OP_SHR
, iType
, val1
, val0
, loadImm(NULL
, 31));
2463 mkOp2(OP_MERGE
, dType
, newDefs
[0], val0
, val1
);
2464 } else if (::isFloatType(dType
))
2465 mkOp2(OP_SUB
, iType
, newDefs
[0], val0
, val1
);
2467 mkOp2(OP_SUB
, iType
, newDefs
[0], val1
, val0
);
2471 case nir_op_b32csel
: {
2473 LValues
&newDefs
= convert(&insn
->dest
);
2474 mkCmp(OP_SLCT
, CC_NE
, dType
, newDefs
[0], sTypes
[0], getSrc(&insn
->src
[1]), getSrc(&insn
->src
[2]), getSrc(&insn
->src
[0]));
2477 case nir_op_ibitfield_extract
:
2478 case nir_op_ubitfield_extract
: {
2480 Value
*tmp
= getSSA();
2481 LValues
&newDefs
= convert(&insn
->dest
);
2482 mkOp3(OP_INSBF
, dType
, tmp
, getSrc(&insn
->src
[2]), loadImm(NULL
, 0x808), getSrc(&insn
->src
[1]));
2483 mkOp2(OP_EXTBF
, dType
, newDefs
[0], getSrc(&insn
->src
[0]), tmp
);
2488 LValues
&newDefs
= convert(&insn
->dest
);
2489 mkOp3(OP_INSBF
, dType
, newDefs
[0], getSrc(&insn
->src
[0]), loadImm(NULL
, 0x808), getSrc(&insn
->src
[1]));
2492 case nir_op_bitfield_insert
: {
2494 LValues
&newDefs
= convert(&insn
->dest
);
2495 LValue
*temp
= getSSA();
2496 mkOp3(OP_INSBF
, TYPE_U32
, temp
, getSrc(&insn
->src
[3]), mkImm(0x808), getSrc(&insn
->src
[2]));
2497 mkOp3(OP_INSBF
, dType
, newDefs
[0], getSrc(&insn
->src
[1]), temp
, getSrc(&insn
->src
[0]));
2500 case nir_op_bit_count
: {
2502 LValues
&newDefs
= convert(&insn
->dest
);
2503 mkOp2(OP_POPCNT
, dType
, newDefs
[0], getSrc(&insn
->src
[0]), getSrc(&insn
->src
[0]));
2506 case nir_op_bitfield_reverse
: {
2508 LValues
&newDefs
= convert(&insn
->dest
);
2509 mkOp2(OP_EXTBF
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), mkImm(0x2000))->subOp
= NV50_IR_SUBOP_EXTBF_REV
;
2512 case nir_op_find_lsb
: {
2514 LValues
&newDefs
= convert(&insn
->dest
);
2515 Value
*tmp
= getSSA();
2516 mkOp2(OP_EXTBF
, TYPE_U32
, tmp
, getSrc(&insn
->src
[0]), mkImm(0x2000))->subOp
= NV50_IR_SUBOP_EXTBF_REV
;
2517 mkOp1(OP_BFIND
, TYPE_U32
, newDefs
[0], tmp
)->subOp
= NV50_IR_SUBOP_BFIND_SAMT
;
2520 // boolean conversions
2521 case nir_op_b2f32
: {
2523 LValues
&newDefs
= convert(&insn
->dest
);
2524 mkOp2(OP_AND
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), loadImm(NULL
, 1.0f
));
2527 case nir_op_b2f64
: {
2529 LValues
&newDefs
= convert(&insn
->dest
);
2530 Value
*tmp
= getSSA(4);
2531 mkOp2(OP_AND
, TYPE_U32
, tmp
, getSrc(&insn
->src
[0]), loadImm(NULL
, 0x3ff00000));
2532 mkOp2(OP_MERGE
, TYPE_U64
, newDefs
[0], loadImm(NULL
, 0), tmp
);
2536 case nir_op_i2b32
: {
2538 LValues
&newDefs
= convert(&insn
->dest
);
2540 if (typeSizeof(sTypes
[0]) == 8) {
2541 src1
= loadImm(getSSA(8), 0.0);
2545 CondCode cc
= op
== nir_op_f2b32
? CC_NEU
: CC_NE
;
2546 mkCmp(OP_SET
, cc
, TYPE_U32
, newDefs
[0], sTypes
[0], getSrc(&insn
->src
[0]), src1
);
2549 case nir_op_b2i32
: {
2551 LValues
&newDefs
= convert(&insn
->dest
);
2552 mkOp2(OP_AND
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), loadImm(NULL
, 1));
2555 case nir_op_b2i64
: {
2557 LValues
&newDefs
= convert(&insn
->dest
);
2558 LValue
*def
= getScratch();
2559 mkOp2(OP_AND
, TYPE_U32
, def
, getSrc(&insn
->src
[0]), loadImm(NULL
, 1));
2560 mkOp2(OP_MERGE
, TYPE_S64
, newDefs
[0], def
, loadImm(NULL
, 0));
2564 ERROR("unknown nir_op %s\n", info
.name
);
2569 oldPos
= this->bb
->getEntry();
2570 oldPos
->precise
= insn
->exact
;
2573 if (unlikely(!oldPos
))
2576 while (oldPos
->next
) {
2577 oldPos
= oldPos
->next
;
2578 oldPos
->precise
= insn
->exact
;
2580 oldPos
->saturate
= insn
->dest
.saturate
;
2584 #undef DEFAULT_CHECKS
2587 Converter::visit(nir_ssa_undef_instr
*insn
)
2589 LValues
&newDefs
= convert(&insn
->def
);
2590 for (uint8_t i
= 0u; i
< insn
->def
.num_components
; ++i
) {
2591 mkOp(OP_NOP
, TYPE_NONE
, newDefs
[i
]);
2596 #define CASE_SAMPLER(ty) \
2597 case GLSL_SAMPLER_DIM_ ## ty : \
2598 if (isArray && !isShadow) \
2599 return TEX_TARGET_ ## ty ## _ARRAY; \
2600 else if (!isArray && isShadow) \
2601 return TEX_TARGET_## ty ## _SHADOW; \
2602 else if (isArray && isShadow) \
2603 return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
2605 return TEX_TARGET_ ## ty
2608 Converter::convert(glsl_sampler_dim dim
, bool isArray
, bool isShadow
)
2614 case GLSL_SAMPLER_DIM_3D
:
2615 return TEX_TARGET_3D
;
2616 case GLSL_SAMPLER_DIM_MS
:
2618 return TEX_TARGET_2D_MS_ARRAY
;
2619 return TEX_TARGET_2D_MS
;
2620 case GLSL_SAMPLER_DIM_RECT
:
2622 return TEX_TARGET_RECT_SHADOW
;
2623 return TEX_TARGET_RECT
;
2624 case GLSL_SAMPLER_DIM_BUF
:
2625 return TEX_TARGET_BUFFER
;
2626 case GLSL_SAMPLER_DIM_EXTERNAL
:
2627 return TEX_TARGET_2D
;
2629 ERROR("unknown glsl_sampler_dim %u\n", dim
);
2631 return TEX_TARGET_COUNT
;
2637 Converter::applyProjection(Value
*src
, Value
*proj
)
2641 return mkOp2v(OP_MUL
, TYPE_F32
, getScratch(), src
, proj
);
2645 Converter::visit(nir_tex_instr
*insn
)
2649 case nir_texop_query_levels
:
2651 case nir_texop_texture_samples
:
2656 case nir_texop_txf_ms
:
2658 case nir_texop_txs
: {
2659 LValues
&newDefs
= convert(&insn
->dest
);
2660 std::vector
<Value
*> srcs
;
2661 std::vector
<Value
*> defs
;
2662 std::vector
<nir_src
*> offsets
;
2666 TexInstruction::Target target
= convert(insn
->sampler_dim
, insn
->is_array
, insn
->is_shadow
);
2667 operation op
= getOperation(insn
->op
);
2670 int biasIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_bias
);
2671 int compIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_comparator
);
2672 int coordsIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_coord
);
2673 int ddxIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_ddx
);
2674 int ddyIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_ddy
);
2675 int msIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_ms_index
);
2676 int lodIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_lod
);
2677 int offsetIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_offset
);
2678 int projIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_projector
);
2679 int sampOffIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_sampler_offset
);
2680 int texOffIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_texture_offset
);
2683 proj
= mkOp1v(OP_RCP
, TYPE_F32
, getScratch(), getSrc(&insn
->src
[projIdx
].src
, 0));
2685 srcs
.resize(insn
->coord_components
);
2686 for (uint8_t i
= 0u; i
< insn
->coord_components
; ++i
)
2687 srcs
[i
] = applyProjection(getSrc(&insn
->src
[coordsIdx
].src
, i
), proj
);
2689 // sometimes we get less args than target.getArgCount, but codegen expects the latter
2690 if (insn
->coord_components
) {
2691 uint32_t argCount
= target
.getArgCount();
2696 for (uint32_t i
= 0u; i
< (argCount
- insn
->coord_components
); ++i
)
2697 srcs
.push_back(getSSA());
2700 if (insn
->op
== nir_texop_texture_samples
)
2701 srcs
.push_back(zero
);
2702 else if (!insn
->num_srcs
)
2703 srcs
.push_back(loadImm(NULL
, 0));
2705 srcs
.push_back(getSrc(&insn
->src
[biasIdx
].src
, 0));
2707 srcs
.push_back(getSrc(&insn
->src
[lodIdx
].src
, 0));
2708 else if (op
== OP_TXF
)
2711 srcs
.push_back(getSrc(&insn
->src
[msIdx
].src
, 0));
2712 if (offsetIdx
!= -1)
2713 offsets
.push_back(&insn
->src
[offsetIdx
].src
);
2715 srcs
.push_back(applyProjection(getSrc(&insn
->src
[compIdx
].src
, 0), proj
));
2716 if (texOffIdx
!= -1) {
2717 srcs
.push_back(getSrc(&insn
->src
[texOffIdx
].src
, 0));
2718 texOffIdx
= srcs
.size() - 1;
2720 if (sampOffIdx
!= -1) {
2721 srcs
.push_back(getSrc(&insn
->src
[sampOffIdx
].src
, 0));
2722 sampOffIdx
= srcs
.size() - 1;
2725 r
= insn
->texture_index
;
2726 s
= insn
->sampler_index
;
2728 defs
.resize(newDefs
.size());
2729 for (uint8_t d
= 0u; d
< newDefs
.size(); ++d
) {
2730 defs
[d
] = newDefs
[d
];
2733 if (target
.isMS() || (op
== OP_TEX
&& prog
->getType() != Program::TYPE_FRAGMENT
))
2736 TexInstruction
*texi
= mkTex(op
, target
.getEnum(), r
, s
, defs
, srcs
);
2737 texi
->tex
.levelZero
= lz
;
2738 texi
->tex
.mask
= mask
;
2740 if (texOffIdx
!= -1)
2741 texi
->tex
.rIndirectSrc
= texOffIdx
;
2742 if (sampOffIdx
!= -1)
2743 texi
->tex
.sIndirectSrc
= sampOffIdx
;
2747 if (!target
.isShadow())
2748 texi
->tex
.gatherComp
= insn
->component
;
2751 texi
->tex
.query
= TXQ_DIMS
;
2753 case nir_texop_texture_samples
:
2754 texi
->tex
.mask
= 0x4;
2755 texi
->tex
.query
= TXQ_TYPE
;
2757 case nir_texop_query_levels
:
2758 texi
->tex
.mask
= 0x8;
2759 texi
->tex
.query
= TXQ_DIMS
;
2765 texi
->tex
.useOffsets
= offsets
.size();
2766 if (texi
->tex
.useOffsets
) {
2767 for (uint8_t s
= 0; s
< texi
->tex
.useOffsets
; ++s
) {
2768 for (uint32_t c
= 0u; c
< 3; ++c
) {
2769 uint8_t s2
= std::min(c
, target
.getDim() - 1);
2770 texi
->offset
[s
][c
].set(getSrc(offsets
[s
], s2
));
2771 texi
->offset
[s
][c
].setInsn(texi
);
2776 if (ddxIdx
!= -1 && ddyIdx
!= -1) {
2777 for (uint8_t c
= 0u; c
< target
.getDim() + target
.isCube(); ++c
) {
2778 texi
->dPdx
[c
].set(getSrc(&insn
->src
[ddxIdx
].src
, c
));
2779 texi
->dPdy
[c
].set(getSrc(&insn
->src
[ddyIdx
].src
, c
));
2786 ERROR("unknown nir_texop %u\n", insn
->op
);
2797 if (prog
->dbgFlags
& NV50_IR_DEBUG_VERBOSE
)
2798 nir_print_shader(nir
, stderr
);
2800 struct nir_lower_subgroups_options subgroup_options
= {
2801 .subgroup_size
= 32,
2802 .ballot_bit_size
= 32,
2805 NIR_PASS_V(nir
, nir_lower_io
, nir_var_all
, type_size
, (nir_lower_io_options
)0);
2806 NIR_PASS_V(nir
, nir_lower_subgroups
, &subgroup_options
);
2807 NIR_PASS_V(nir
, nir_lower_regs_to_ssa
);
2808 NIR_PASS_V(nir
, nir_lower_load_const_to_scalar
);
2809 NIR_PASS_V(nir
, nir_lower_vars_to_ssa
);
2810 NIR_PASS_V(nir
, nir_lower_alu_to_scalar
);
2811 NIR_PASS_V(nir
, nir_lower_phis_to_scalar
);
2815 NIR_PASS(progress
, nir
, nir_copy_prop
);
2816 NIR_PASS(progress
, nir
, nir_opt_remove_phis
);
2817 NIR_PASS(progress
, nir
, nir_opt_trivial_continues
);
2818 NIR_PASS(progress
, nir
, nir_opt_cse
);
2819 NIR_PASS(progress
, nir
, nir_opt_algebraic
);
2820 NIR_PASS(progress
, nir
, nir_opt_constant_folding
);
2821 NIR_PASS(progress
, nir
, nir_copy_prop
);
2822 NIR_PASS(progress
, nir
, nir_opt_dce
);
2823 NIR_PASS(progress
, nir
, nir_opt_dead_cf
);
2826 NIR_PASS_V(nir
, nir_lower_bool_to_int32
);
2827 NIR_PASS_V(nir
, nir_lower_locals_to_regs
);
2828 NIR_PASS_V(nir
, nir_remove_dead_variables
, nir_var_function_temp
);
2829 NIR_PASS_V(nir
, nir_convert_from_ssa
, true);
2831 // Garbage collect dead instructions
2835 ERROR("Couldn't prase NIR!\n");
2839 if (!assignSlots()) {
2840 ERROR("Couldn't assign slots!\n");
2844 if (prog
->dbgFlags
& NV50_IR_DEBUG_BASIC
)
2845 nir_print_shader(nir
, stderr
);
2847 nir_foreach_function(function
, nir
) {
2848 if (!visit(function
))
2855 } // unnamed namespace
2860 Program::makeFromNIR(struct nv50_ir_prog_info
*info
)
2862 nir_shader
*nir
= (nir_shader
*)info
->bin
.source
;
2863 Converter
converter(this, nir
, info
);
2864 bool result
= converter
.run();
2867 LoweringHelper lowering
;
2869 tlsSize
= info
->bin
.tlsSpace
;
2873 } // namespace nv50_ir