2 * Copyright 2017 Red Hat Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Karol Herbst <kherbst@redhat.com>
25 #include "compiler/nir/nir.h"
27 #include "util/u_debug.h"
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_from_common.h"
31 #include "codegen/nv50_ir_lowering_helper.h"
32 #include "codegen/nv50_ir_util.h"
34 #if __cplusplus >= 201103L
35 #include <unordered_map>
37 #include <tr1/unordered_map>
43 #if __cplusplus >= 201103L
45 using std::unordered_map
;
48 using std::tr1::unordered_map
;
51 using namespace nv50_ir
;
54 type_size(const struct glsl_type
*type
)
56 return glsl_count_attribute_slots(type
, false);
59 class Converter
: public ConverterCommon
62 Converter(Program
*, nir_shader
*, nv50_ir_prog_info
*);
66 typedef std::vector
<LValue
*> LValues
;
67 typedef unordered_map
<unsigned, LValues
> NirDefMap
;
68 typedef unordered_map
<unsigned, BasicBlock
*> NirBlockMap
;
70 LValues
& convert(nir_alu_dest
*);
71 BasicBlock
* convert(nir_block
*);
72 LValues
& convert(nir_dest
*);
73 SVSemantic
convert(nir_intrinsic_op
);
74 LValues
& convert(nir_register
*);
75 LValues
& convert(nir_ssa_def
*);
77 Value
* getSrc(nir_alu_src
*, uint8_t component
= 0);
78 Value
* getSrc(nir_register
*, uint8_t);
79 Value
* getSrc(nir_src
*, uint8_t, bool indirect
= false);
80 Value
* getSrc(nir_ssa_def
*, uint8_t);
82 // returned value is the constant part of the given source (either the
83 // nir_src or the selected source component of an intrinsic). Even though
84 // this is mostly an optimization to be able to skip indirects in a few
85 // cases, sometimes we require immediate values or set some fileds on
86 // instructions (e.g. tex) in order for codegen to consume those.
87 // If the found value has not a constant part, the Value gets returned
88 // through the Value parameter.
89 uint32_t getIndirect(nir_src
*, uint8_t, Value
*&);
90 uint32_t getIndirect(nir_intrinsic_instr
*, uint8_t s
, uint8_t c
, Value
*&);
92 uint32_t getSlotAddress(nir_intrinsic_instr
*, uint8_t idx
, uint8_t slot
);
94 void setInterpolate(nv50_ir_varying
*,
99 Instruction
*loadFrom(DataFile
, uint8_t, DataType
, Value
*def
, uint32_t base
,
100 uint8_t c
, Value
*indirect0
= NULL
,
101 Value
*indirect1
= NULL
, bool patch
= false);
102 void storeTo(nir_intrinsic_instr
*, DataFile
, operation
, DataType
,
103 Value
*src
, uint8_t idx
, uint8_t c
, Value
*indirect0
= NULL
,
104 Value
*indirect1
= NULL
);
106 bool isFloatType(nir_alu_type
);
107 bool isSignedType(nir_alu_type
);
108 bool isResultFloat(nir_op
);
109 bool isResultSigned(nir_op
);
111 DataType
getDType(nir_alu_instr
*);
112 DataType
getDType(nir_intrinsic_instr
*);
113 DataType
getDType(nir_op
, uint8_t);
115 std::vector
<DataType
> getSTypes(nir_alu_instr
*);
116 DataType
getSType(nir_src
&, bool isFloat
, bool isSigned
);
118 operation
getOperation(nir_op
);
119 operation
preOperationNeeded(nir_op
);
121 int getSubOp(nir_op
);
123 CondCode
getCondCode(nir_op
);
128 bool visit(nir_alu_instr
*);
129 bool visit(nir_block
*);
130 bool visit(nir_cf_node
*);
131 bool visit(nir_function
*);
132 bool visit(nir_if
*);
133 bool visit(nir_instr
*);
134 bool visit(nir_intrinsic_instr
*);
135 bool visit(nir_jump_instr
*);
136 bool visit(nir_load_const_instr
*);
137 bool visit(nir_loop
*);
144 unsigned int curLoopDepth
;
149 int clipVertexOutput
;
158 Converter::Converter(Program
*prog
, nir_shader
*nir
, nv50_ir_prog_info
*info
)
159 : ConverterCommon(prog
, info
),
164 zero
= mkImm((uint32_t)0);
168 Converter::convert(nir_block
*block
)
170 NirBlockMap::iterator it
= blocks
.find(block
->index
);
171 if (it
!= blocks
.end())
174 BasicBlock
*bb
= new BasicBlock(func
);
175 blocks
[block
->index
] = bb
;
180 Converter::isFloatType(nir_alu_type type
)
182 return nir_alu_type_get_base_type(type
) == nir_type_float
;
186 Converter::isSignedType(nir_alu_type type
)
188 return nir_alu_type_get_base_type(type
) == nir_type_int
;
192 Converter::isResultFloat(nir_op op
)
194 const nir_op_info
&info
= nir_op_infos
[op
];
195 if (info
.output_type
!= nir_type_invalid
)
196 return isFloatType(info
.output_type
);
198 ERROR("isResultFloat not implemented for %s\n", nir_op_infos
[op
].name
);
204 Converter::isResultSigned(nir_op op
)
207 // there is no umul and we get wrong results if we treat all muls as signed
212 const nir_op_info
&info
= nir_op_infos
[op
];
213 if (info
.output_type
!= nir_type_invalid
)
214 return isSignedType(info
.output_type
);
215 ERROR("isResultSigned not implemented for %s\n", nir_op_infos
[op
].name
);
222 Converter::getDType(nir_alu_instr
*insn
)
224 if (insn
->dest
.dest
.is_ssa
)
225 return getDType(insn
->op
, insn
->dest
.dest
.ssa
.bit_size
);
227 return getDType(insn
->op
, insn
->dest
.dest
.reg
.reg
->bit_size
);
231 Converter::getDType(nir_intrinsic_instr
*insn
)
233 if (insn
->dest
.is_ssa
)
234 return typeOfSize(insn
->dest
.ssa
.bit_size
/ 8, false, false);
236 return typeOfSize(insn
->dest
.reg
.reg
->bit_size
/ 8, false, false);
240 Converter::getDType(nir_op op
, uint8_t bitSize
)
242 DataType ty
= typeOfSize(bitSize
/ 8, isResultFloat(op
), isResultSigned(op
));
243 if (ty
== TYPE_NONE
) {
244 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos
[op
].name
, bitSize
);
250 std::vector
<DataType
>
251 Converter::getSTypes(nir_alu_instr
*insn
)
253 const nir_op_info
&info
= nir_op_infos
[insn
->op
];
254 std::vector
<DataType
> res(info
.num_inputs
);
256 for (uint8_t i
= 0; i
< info
.num_inputs
; ++i
) {
257 if (info
.input_types
[i
] != nir_type_invalid
) {
258 res
[i
] = getSType(insn
->src
[i
].src
, isFloatType(info
.input_types
[i
]), isSignedType(info
.input_types
[i
]));
260 ERROR("getSType not implemented for %s idx %u\n", info
.name
, i
);
271 Converter::getSType(nir_src
&src
, bool isFloat
, bool isSigned
)
275 bitSize
= src
.ssa
->bit_size
;
277 bitSize
= src
.reg
.reg
->bit_size
;
279 DataType ty
= typeOfSize(bitSize
/ 8, isFloat
, isSigned
);
280 if (ty
== TYPE_NONE
) {
288 ERROR("couldn't get Type for %s with bitSize %u\n", str
, bitSize
);
295 Converter::getOperation(nir_op op
)
298 // basic ops with float and int variants
308 case nir_op_ifind_msb
:
309 case nir_op_ufind_msb
:
331 case nir_op_fddx_coarse
:
332 case nir_op_fddx_fine
:
335 case nir_op_fddy_coarse
:
336 case nir_op_fddy_fine
:
354 case nir_op_pack_64_2x32_split
:
368 case nir_op_imul_high
:
369 case nir_op_umul_high
:
417 ERROR("couldn't get operation for op %s\n", nir_op_infos
[op
].name
);
424 Converter::preOperationNeeded(nir_op op
)
436 Converter::getSubOp(nir_op op
)
439 case nir_op_imul_high
:
440 case nir_op_umul_high
:
441 return NV50_IR_SUBOP_MUL_HIGH
;
448 Converter::getCondCode(nir_op op
)
467 ERROR("couldn't get CondCode for op %s\n", nir_op_infos
[op
].name
);
474 Converter::convert(nir_alu_dest
*dest
)
476 return convert(&dest
->dest
);
480 Converter::convert(nir_dest
*dest
)
483 return convert(&dest
->ssa
);
484 if (dest
->reg
.indirect
) {
485 ERROR("no support for indirects.");
488 return convert(dest
->reg
.reg
);
492 Converter::convert(nir_register
*reg
)
494 NirDefMap::iterator it
= regDefs
.find(reg
->index
);
495 if (it
!= regDefs
.end())
498 LValues
newDef(reg
->num_components
);
499 for (uint8_t i
= 0; i
< reg
->num_components
; i
++)
500 newDef
[i
] = getScratch(std::max(4, reg
->bit_size
/ 8));
501 return regDefs
[reg
->index
] = newDef
;
505 Converter::convert(nir_ssa_def
*def
)
507 NirDefMap::iterator it
= ssaDefs
.find(def
->index
);
508 if (it
!= ssaDefs
.end())
511 LValues
newDef(def
->num_components
);
512 for (uint8_t i
= 0; i
< def
->num_components
; i
++)
513 newDef
[i
] = getSSA(std::max(4, def
->bit_size
/ 8));
514 return ssaDefs
[def
->index
] = newDef
;
518 Converter::getSrc(nir_alu_src
*src
, uint8_t component
)
520 if (src
->abs
|| src
->negate
) {
521 ERROR("modifiers currently not supported on nir_alu_src\n");
524 return getSrc(&src
->src
, src
->swizzle
[component
]);
528 Converter::getSrc(nir_register
*reg
, uint8_t idx
)
530 NirDefMap::iterator it
= regDefs
.find(reg
->index
);
531 if (it
== regDefs
.end())
532 return convert(reg
)[idx
];
533 return it
->second
[idx
];
537 Converter::getSrc(nir_src
*src
, uint8_t idx
, bool indirect
)
540 return getSrc(src
->ssa
, idx
);
542 if (src
->reg
.indirect
) {
544 return getSrc(src
->reg
.indirect
, idx
);
545 ERROR("no support for indirects.");
550 return getSrc(src
->reg
.reg
, idx
);
554 Converter::getSrc(nir_ssa_def
*src
, uint8_t idx
)
556 NirDefMap::iterator it
= ssaDefs
.find(src
->index
);
557 if (it
== ssaDefs
.end()) {
558 ERROR("SSA value %u not found\n", src
->index
);
562 return it
->second
[idx
];
566 Converter::getIndirect(nir_src
*src
, uint8_t idx
, Value
*&indirect
)
568 nir_const_value
*offset
= nir_src_as_const_value(*src
);
572 return offset
->u32
[0];
575 indirect
= getSrc(src
, idx
, true);
580 Converter::getIndirect(nir_intrinsic_instr
*insn
, uint8_t s
, uint8_t c
, Value
*&indirect
)
582 int32_t idx
= nir_intrinsic_base(insn
) + getIndirect(&insn
->src
[s
], c
, indirect
);
584 indirect
= mkOp2v(OP_SHL
, TYPE_U32
, getSSA(4, FILE_ADDRESS
), indirect
, loadImm(NULL
, 4));
589 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot
, unsigned *name
, unsigned *index
)
591 assert(name
&& index
);
593 if (slot
>= VERT_ATTRIB_MAX
) {
594 ERROR("invalid varying slot %u\n", slot
);
599 if (slot
>= VERT_ATTRIB_GENERIC0
&&
600 slot
< VERT_ATTRIB_GENERIC0
+ VERT_ATTRIB_GENERIC_MAX
) {
601 *name
= TGSI_SEMANTIC_GENERIC
;
602 *index
= slot
- VERT_ATTRIB_GENERIC0
;
606 if (slot
>= VERT_ATTRIB_TEX0
&&
607 slot
< VERT_ATTRIB_TEX0
+ VERT_ATTRIB_TEX_MAX
) {
608 *name
= TGSI_SEMANTIC_TEXCOORD
;
609 *index
= slot
- VERT_ATTRIB_TEX0
;
614 case VERT_ATTRIB_COLOR0
:
615 *name
= TGSI_SEMANTIC_COLOR
;
618 case VERT_ATTRIB_COLOR1
:
619 *name
= TGSI_SEMANTIC_COLOR
;
622 case VERT_ATTRIB_EDGEFLAG
:
623 *name
= TGSI_SEMANTIC_EDGEFLAG
;
626 case VERT_ATTRIB_FOG
:
627 *name
= TGSI_SEMANTIC_FOG
;
630 case VERT_ATTRIB_NORMAL
:
631 *name
= TGSI_SEMANTIC_NORMAL
;
634 case VERT_ATTRIB_POS
:
635 *name
= TGSI_SEMANTIC_POSITION
;
638 case VERT_ATTRIB_POINT_SIZE
:
639 *name
= TGSI_SEMANTIC_PSIZE
;
643 ERROR("unknown vert attrib slot %u\n", slot
);
650 varying_slot_to_tgsi_semantic(gl_varying_slot slot
, unsigned *name
, unsigned *index
)
652 assert(name
&& index
);
654 if (slot
>= VARYING_SLOT_TESS_MAX
) {
655 ERROR("invalid varying slot %u\n", slot
);
660 if (slot
>= VARYING_SLOT_PATCH0
) {
661 *name
= TGSI_SEMANTIC_PATCH
;
662 *index
= slot
- VARYING_SLOT_PATCH0
;
666 if (slot
>= VARYING_SLOT_VAR0
) {
667 *name
= TGSI_SEMANTIC_GENERIC
;
668 *index
= slot
- VARYING_SLOT_VAR0
;
672 if (slot
>= VARYING_SLOT_TEX0
&& slot
<= VARYING_SLOT_TEX7
) {
673 *name
= TGSI_SEMANTIC_TEXCOORD
;
674 *index
= slot
- VARYING_SLOT_TEX0
;
679 case VARYING_SLOT_BFC0
:
680 *name
= TGSI_SEMANTIC_BCOLOR
;
683 case VARYING_SLOT_BFC1
:
684 *name
= TGSI_SEMANTIC_BCOLOR
;
687 case VARYING_SLOT_CLIP_DIST0
:
688 *name
= TGSI_SEMANTIC_CLIPDIST
;
691 case VARYING_SLOT_CLIP_DIST1
:
692 *name
= TGSI_SEMANTIC_CLIPDIST
;
695 case VARYING_SLOT_CLIP_VERTEX
:
696 *name
= TGSI_SEMANTIC_CLIPVERTEX
;
699 case VARYING_SLOT_COL0
:
700 *name
= TGSI_SEMANTIC_COLOR
;
703 case VARYING_SLOT_COL1
:
704 *name
= TGSI_SEMANTIC_COLOR
;
707 case VARYING_SLOT_EDGE
:
708 *name
= TGSI_SEMANTIC_EDGEFLAG
;
711 case VARYING_SLOT_FACE
:
712 *name
= TGSI_SEMANTIC_FACE
;
715 case VARYING_SLOT_FOGC
:
716 *name
= TGSI_SEMANTIC_FOG
;
719 case VARYING_SLOT_LAYER
:
720 *name
= TGSI_SEMANTIC_LAYER
;
723 case VARYING_SLOT_PNTC
:
724 *name
= TGSI_SEMANTIC_PCOORD
;
727 case VARYING_SLOT_POS
:
728 *name
= TGSI_SEMANTIC_POSITION
;
731 case VARYING_SLOT_PRIMITIVE_ID
:
732 *name
= TGSI_SEMANTIC_PRIMID
;
735 case VARYING_SLOT_PSIZ
:
736 *name
= TGSI_SEMANTIC_PSIZE
;
739 case VARYING_SLOT_TESS_LEVEL_INNER
:
740 *name
= TGSI_SEMANTIC_TESSINNER
;
743 case VARYING_SLOT_TESS_LEVEL_OUTER
:
744 *name
= TGSI_SEMANTIC_TESSOUTER
;
747 case VARYING_SLOT_VIEWPORT
:
748 *name
= TGSI_SEMANTIC_VIEWPORT_INDEX
;
752 ERROR("unknown varying slot %u\n", slot
);
759 frag_result_to_tgsi_semantic(unsigned slot
, unsigned *name
, unsigned *index
)
761 if (slot
>= FRAG_RESULT_DATA0
) {
762 *name
= TGSI_SEMANTIC_COLOR
;
763 *index
= slot
- FRAG_RESULT_COLOR
- 2; // intentional
768 case FRAG_RESULT_COLOR
:
769 *name
= TGSI_SEMANTIC_COLOR
;
772 case FRAG_RESULT_DEPTH
:
773 *name
= TGSI_SEMANTIC_POSITION
;
776 case FRAG_RESULT_SAMPLE_MASK
:
777 *name
= TGSI_SEMANTIC_SAMPLEMASK
;
781 ERROR("unknown frag result slot %u\n", slot
);
787 // copy of _mesa_sysval_to_semantic
789 system_val_to_tgsi_semantic(unsigned val
, unsigned *name
, unsigned *index
)
794 case SYSTEM_VALUE_VERTEX_ID
:
795 *name
= TGSI_SEMANTIC_VERTEXID
;
797 case SYSTEM_VALUE_INSTANCE_ID
:
798 *name
= TGSI_SEMANTIC_INSTANCEID
;
800 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE
:
801 *name
= TGSI_SEMANTIC_VERTEXID_NOBASE
;
803 case SYSTEM_VALUE_BASE_VERTEX
:
804 *name
= TGSI_SEMANTIC_BASEVERTEX
;
806 case SYSTEM_VALUE_BASE_INSTANCE
:
807 *name
= TGSI_SEMANTIC_BASEINSTANCE
;
809 case SYSTEM_VALUE_DRAW_ID
:
810 *name
= TGSI_SEMANTIC_DRAWID
;
814 case SYSTEM_VALUE_INVOCATION_ID
:
815 *name
= TGSI_SEMANTIC_INVOCATIONID
;
819 case SYSTEM_VALUE_FRAG_COORD
:
820 *name
= TGSI_SEMANTIC_POSITION
;
822 case SYSTEM_VALUE_FRONT_FACE
:
823 *name
= TGSI_SEMANTIC_FACE
;
825 case SYSTEM_VALUE_SAMPLE_ID
:
826 *name
= TGSI_SEMANTIC_SAMPLEID
;
828 case SYSTEM_VALUE_SAMPLE_POS
:
829 *name
= TGSI_SEMANTIC_SAMPLEPOS
;
831 case SYSTEM_VALUE_SAMPLE_MASK_IN
:
832 *name
= TGSI_SEMANTIC_SAMPLEMASK
;
834 case SYSTEM_VALUE_HELPER_INVOCATION
:
835 *name
= TGSI_SEMANTIC_HELPER_INVOCATION
;
838 // Tessellation shader
839 case SYSTEM_VALUE_TESS_COORD
:
840 *name
= TGSI_SEMANTIC_TESSCOORD
;
842 case SYSTEM_VALUE_VERTICES_IN
:
843 *name
= TGSI_SEMANTIC_VERTICESIN
;
845 case SYSTEM_VALUE_PRIMITIVE_ID
:
846 *name
= TGSI_SEMANTIC_PRIMID
;
848 case SYSTEM_VALUE_TESS_LEVEL_OUTER
:
849 *name
= TGSI_SEMANTIC_TESSOUTER
;
851 case SYSTEM_VALUE_TESS_LEVEL_INNER
:
852 *name
= TGSI_SEMANTIC_TESSINNER
;
856 case SYSTEM_VALUE_LOCAL_INVOCATION_ID
:
857 *name
= TGSI_SEMANTIC_THREAD_ID
;
859 case SYSTEM_VALUE_WORK_GROUP_ID
:
860 *name
= TGSI_SEMANTIC_BLOCK_ID
;
862 case SYSTEM_VALUE_NUM_WORK_GROUPS
:
863 *name
= TGSI_SEMANTIC_GRID_SIZE
;
865 case SYSTEM_VALUE_LOCAL_GROUP_SIZE
:
866 *name
= TGSI_SEMANTIC_BLOCK_SIZE
;
870 case SYSTEM_VALUE_SUBGROUP_SIZE
:
871 *name
= TGSI_SEMANTIC_SUBGROUP_SIZE
;
873 case SYSTEM_VALUE_SUBGROUP_INVOCATION
:
874 *name
= TGSI_SEMANTIC_SUBGROUP_INVOCATION
;
876 case SYSTEM_VALUE_SUBGROUP_EQ_MASK
:
877 *name
= TGSI_SEMANTIC_SUBGROUP_EQ_MASK
;
879 case SYSTEM_VALUE_SUBGROUP_GE_MASK
:
880 *name
= TGSI_SEMANTIC_SUBGROUP_GE_MASK
;
882 case SYSTEM_VALUE_SUBGROUP_GT_MASK
:
883 *name
= TGSI_SEMANTIC_SUBGROUP_GT_MASK
;
885 case SYSTEM_VALUE_SUBGROUP_LE_MASK
:
886 *name
= TGSI_SEMANTIC_SUBGROUP_LE_MASK
;
888 case SYSTEM_VALUE_SUBGROUP_LT_MASK
:
889 *name
= TGSI_SEMANTIC_SUBGROUP_LT_MASK
;
893 ERROR("unknown system value %u\n", val
);
900 Converter::setInterpolate(nv50_ir_varying
*var
,
906 case INTERP_MODE_FLAT
:
909 case INTERP_MODE_NONE
:
910 if (semantic
== TGSI_SEMANTIC_COLOR
)
912 else if (semantic
== TGSI_SEMANTIC_POSITION
)
915 case INTERP_MODE_NOPERSPECTIVE
:
918 case INTERP_MODE_SMOOTH
:
921 var
->centroid
= centroid
;
925 calcSlots(const glsl_type
*type
, Program::Type stage
, const shader_info
&info
,
926 bool input
, const nir_variable
*var
)
928 if (!type
->is_array())
929 return type
->count_attribute_slots(false);
933 case Program::TYPE_GEOMETRY
:
934 slots
= type
->uniform_locations();
936 slots
/= info
.gs
.vertices_in
;
938 case Program::TYPE_TESSELLATION_CONTROL
:
939 case Program::TYPE_TESSELLATION_EVAL
:
940 // remove first dimension
941 if (var
->data
.patch
|| (!input
&& stage
== Program::TYPE_TESSELLATION_EVAL
))
942 slots
= type
->uniform_locations();
944 slots
= type
->fields
.array
->uniform_locations();
947 slots
= type
->count_attribute_slots(false);
954 bool Converter::assignSlots() {
958 info
->io
.viewportId
= -1;
961 // we have to fixup the uniform locations for arrays
962 unsigned numImages
= 0;
963 nir_foreach_variable(var
, &nir
->uniforms
) {
964 const glsl_type
*type
= var
->type
;
965 if (!type
->without_array()->is_image())
967 var
->data
.driver_location
= numImages
;
968 numImages
+= type
->is_array() ? type
->arrays_of_arrays_size() : 1;
971 nir_foreach_variable(var
, &nir
->inputs
) {
972 const glsl_type
*type
= var
->type
;
973 int slot
= var
->data
.location
;
974 uint16_t slots
= calcSlots(type
, prog
->getType(), nir
->info
, true, var
);
975 uint32_t comp
= type
->is_array() ? type
->without_array()->component_slots()
976 : type
->component_slots();
977 uint32_t frac
= var
->data
.location_frac
;
978 uint32_t vary
= var
->data
.driver_location
;
980 if (glsl_base_type_is_64bit(type
->without_array()->base_type
)) {
985 assert(vary
+ slots
<= PIPE_MAX_SHADER_INPUTS
);
987 switch(prog
->getType()) {
988 case Program::TYPE_FRAGMENT
:
989 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
990 for (uint16_t i
= 0; i
< slots
; ++i
) {
991 setInterpolate(&info
->in
[vary
+ i
], var
->data
.interpolation
,
992 var
->data
.centroid
| var
->data
.sample
, name
);
995 case Program::TYPE_GEOMETRY
:
996 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
998 case Program::TYPE_TESSELLATION_CONTROL
:
999 case Program::TYPE_TESSELLATION_EVAL
:
1000 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
1001 if (var
->data
.patch
&& name
== TGSI_SEMANTIC_PATCH
)
1002 info
->numPatchConstants
= MAX2(info
->numPatchConstants
, index
+ slots
);
1004 case Program::TYPE_VERTEX
:
1005 vert_attrib_to_tgsi_semantic((gl_vert_attrib
)slot
, &name
, &index
);
1007 case TGSI_SEMANTIC_EDGEFLAG
:
1008 info
->io
.edgeFlagIn
= vary
;
1015 ERROR("unknown shader type %u in assignSlots\n", prog
->getType());
1019 for (uint16_t i
= 0u; i
< slots
; ++i
, ++vary
) {
1020 info
->in
[vary
].id
= vary
;
1021 info
->in
[vary
].patch
= var
->data
.patch
;
1022 info
->in
[vary
].sn
= name
;
1023 info
->in
[vary
].si
= index
+ i
;
1024 if (glsl_base_type_is_64bit(type
->without_array()->base_type
))
1026 info
->in
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) >> 0x4);
1028 info
->in
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) & 0xf);
1030 info
->in
[vary
].mask
|= ((1 << comp
) - 1) << frac
;
1032 info
->numInputs
= std::max
<uint8_t>(info
->numInputs
, vary
);
1035 info
->numOutputs
= 0;
1036 nir_foreach_variable(var
, &nir
->outputs
) {
1037 const glsl_type
*type
= var
->type
;
1038 int slot
= var
->data
.location
;
1039 uint16_t slots
= calcSlots(type
, prog
->getType(), nir
->info
, false, var
);
1040 uint32_t comp
= type
->is_array() ? type
->without_array()->component_slots()
1041 : type
->component_slots();
1042 uint32_t frac
= var
->data
.location_frac
;
1043 uint32_t vary
= var
->data
.driver_location
;
1045 if (glsl_base_type_is_64bit(type
->without_array()->base_type
)) {
1050 assert(vary
< PIPE_MAX_SHADER_OUTPUTS
);
1052 switch(prog
->getType()) {
1053 case Program::TYPE_FRAGMENT
:
1054 frag_result_to_tgsi_semantic((gl_frag_result
)slot
, &name
, &index
);
1056 case TGSI_SEMANTIC_COLOR
:
1057 if (!var
->data
.fb_fetch_output
)
1058 info
->prop
.fp
.numColourResults
++;
1059 info
->prop
.fp
.separateFragData
= true;
1060 // sometimes we get FRAG_RESULT_DATAX with data.index 0
1061 // sometimes we get FRAG_RESULT_DATA0 with data.index X
1062 index
= index
== 0 ? var
->data
.index
: index
;
1064 case TGSI_SEMANTIC_POSITION
:
1065 info
->io
.fragDepth
= vary
;
1066 info
->prop
.fp
.writesDepth
= true;
1068 case TGSI_SEMANTIC_SAMPLEMASK
:
1069 info
->io
.sampleMask
= vary
;
1075 case Program::TYPE_GEOMETRY
:
1076 case Program::TYPE_TESSELLATION_CONTROL
:
1077 case Program::TYPE_TESSELLATION_EVAL
:
1078 case Program::TYPE_VERTEX
:
1079 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
1081 if (var
->data
.patch
&& name
!= TGSI_SEMANTIC_TESSINNER
&&
1082 name
!= TGSI_SEMANTIC_TESSOUTER
)
1083 info
->numPatchConstants
= MAX2(info
->numPatchConstants
, index
+ slots
);
1086 case TGSI_SEMANTIC_CLIPDIST
:
1087 info
->io
.genUserClip
= -1;
1089 case TGSI_SEMANTIC_CLIPVERTEX
:
1090 clipVertexOutput
= vary
;
1092 case TGSI_SEMANTIC_EDGEFLAG
:
1093 info
->io
.edgeFlagOut
= vary
;
1095 case TGSI_SEMANTIC_POSITION
:
1096 if (clipVertexOutput
< 0)
1097 clipVertexOutput
= vary
;
1104 ERROR("unknown shader type %u in assignSlots\n", prog
->getType());
1108 for (uint16_t i
= 0u; i
< slots
; ++i
, ++vary
) {
1109 info
->out
[vary
].id
= vary
;
1110 info
->out
[vary
].patch
= var
->data
.patch
;
1111 info
->out
[vary
].sn
= name
;
1112 info
->out
[vary
].si
= index
+ i
;
1113 if (glsl_base_type_is_64bit(type
->without_array()->base_type
))
1115 info
->out
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) >> 0x4);
1117 info
->out
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) & 0xf);
1119 info
->out
[vary
].mask
|= ((1 << comp
) - 1) << frac
;
1121 if (nir
->info
.outputs_read
& 1ll << slot
)
1122 info
->out
[vary
].oread
= 1;
1124 info
->numOutputs
= std::max
<uint8_t>(info
->numOutputs
, vary
);
1127 info
->numSysVals
= 0;
1128 for (uint8_t i
= 0; i
< 64; ++i
) {
1129 if (!(nir
->info
.system_values_read
& 1ll << i
))
1132 system_val_to_tgsi_semantic(i
, &name
, &index
);
1133 info
->sv
[info
->numSysVals
].sn
= name
;
1134 info
->sv
[info
->numSysVals
].si
= index
;
1135 info
->sv
[info
->numSysVals
].input
= 0; // TODO inferSysValDirection(sn);
1138 case SYSTEM_VALUE_INSTANCE_ID
:
1139 info
->io
.instanceId
= info
->numSysVals
;
1141 case SYSTEM_VALUE_TESS_LEVEL_INNER
:
1142 case SYSTEM_VALUE_TESS_LEVEL_OUTER
:
1143 info
->sv
[info
->numSysVals
].patch
= 1;
1145 case SYSTEM_VALUE_VERTEX_ID
:
1146 info
->io
.vertexId
= info
->numSysVals
;
1152 info
->numSysVals
+= 1;
1155 if (info
->io
.genUserClip
> 0) {
1156 info
->io
.clipDistances
= info
->io
.genUserClip
;
1158 const unsigned int nOut
= (info
->io
.genUserClip
+ 3) / 4;
1160 for (unsigned int n
= 0; n
< nOut
; ++n
) {
1161 unsigned int i
= info
->numOutputs
++;
1162 info
->out
[i
].id
= i
;
1163 info
->out
[i
].sn
= TGSI_SEMANTIC_CLIPDIST
;
1164 info
->out
[i
].si
= n
;
1165 info
->out
[i
].mask
= ((1 << info
->io
.clipDistances
) - 1) >> (n
* 4);
1169 return info
->assignSlots(info
) == 0;
1173 Converter::getSlotAddress(nir_intrinsic_instr
*insn
, uint8_t idx
, uint8_t slot
)
1176 int offset
= nir_intrinsic_component(insn
);
1179 if (nir_intrinsic_infos
[insn
->intrinsic
].has_dest
)
1180 ty
= getDType(insn
);
1182 ty
= getSType(insn
->src
[0], false, false);
1184 switch (insn
->intrinsic
) {
1185 case nir_intrinsic_load_input
:
1186 case nir_intrinsic_load_interpolated_input
:
1187 case nir_intrinsic_load_per_vertex_input
:
1190 case nir_intrinsic_load_output
:
1191 case nir_intrinsic_load_per_vertex_output
:
1192 case nir_intrinsic_store_output
:
1193 case nir_intrinsic_store_per_vertex_output
:
1197 ERROR("unknown intrinsic in getSlotAddress %s",
1198 nir_intrinsic_infos
[insn
->intrinsic
].name
);
1204 if (typeSizeof(ty
) == 8) {
1216 assert(!input
|| idx
< PIPE_MAX_SHADER_INPUTS
);
1217 assert(input
|| idx
< PIPE_MAX_SHADER_OUTPUTS
);
1219 const nv50_ir_varying
*vary
= input
? info
->in
: info
->out
;
1220 return vary
[idx
].slot
[slot
] * 4;
1224 Converter::loadFrom(DataFile file
, uint8_t i
, DataType ty
, Value
*def
,
1225 uint32_t base
, uint8_t c
, Value
*indirect0
,
1226 Value
*indirect1
, bool patch
)
1228 unsigned int tySize
= typeSizeof(ty
);
1231 (file
== FILE_MEMORY_CONST
|| file
== FILE_MEMORY_BUFFER
|| indirect0
)) {
1232 Value
*lo
= getSSA();
1233 Value
*hi
= getSSA();
1236 mkLoad(TYPE_U32
, lo
,
1237 mkSymbol(file
, i
, TYPE_U32
, base
+ c
* tySize
),
1239 loi
->setIndirect(0, 1, indirect1
);
1240 loi
->perPatch
= patch
;
1243 mkLoad(TYPE_U32
, hi
,
1244 mkSymbol(file
, i
, TYPE_U32
, base
+ c
* tySize
+ 4),
1246 hii
->setIndirect(0, 1, indirect1
);
1247 hii
->perPatch
= patch
;
1249 return mkOp2(OP_MERGE
, ty
, def
, lo
, hi
);
1252 mkLoad(ty
, def
, mkSymbol(file
, i
, ty
, base
+ c
* tySize
), indirect0
);
1253 ld
->setIndirect(0, 1, indirect1
);
1254 ld
->perPatch
= patch
;
1260 Converter::storeTo(nir_intrinsic_instr
*insn
, DataFile file
, operation op
,
1261 DataType ty
, Value
*src
, uint8_t idx
, uint8_t c
,
1262 Value
*indirect0
, Value
*indirect1
)
1264 uint8_t size
= typeSizeof(ty
);
1265 uint32_t address
= getSlotAddress(insn
, idx
, c
);
1267 if (size
== 8 && indirect0
) {
1269 mkSplit(split
, 4, src
);
1271 if (op
== OP_EXPORT
) {
1272 split
[0] = mkMov(getSSA(), split
[0], ty
)->getDef(0);
1273 split
[1] = mkMov(getSSA(), split
[1], ty
)->getDef(0);
1276 mkStore(op
, TYPE_U32
, mkSymbol(file
, 0, TYPE_U32
, address
), indirect0
,
1277 split
[0])->perPatch
= info
->out
[idx
].patch
;
1278 mkStore(op
, TYPE_U32
, mkSymbol(file
, 0, TYPE_U32
, address
+ 4), indirect0
,
1279 split
[1])->perPatch
= info
->out
[idx
].patch
;
1281 if (op
== OP_EXPORT
)
1282 src
= mkMov(getSSA(size
), src
, ty
)->getDef(0);
1283 mkStore(op
, ty
, mkSymbol(file
, 0, ty
, address
), indirect0
,
1284 src
)->perPatch
= info
->out
[idx
].patch
;
1289 Converter::parseNIR()
1291 info
->io
.clipDistances
= nir
->info
.clip_distance_array_size
;
1292 info
->io
.cullDistances
= nir
->info
.cull_distance_array_size
;
1294 switch(prog
->getType()) {
1295 case Program::TYPE_COMPUTE
:
1296 info
->prop
.cp
.numThreads
[0] = nir
->info
.cs
.local_size
[0];
1297 info
->prop
.cp
.numThreads
[1] = nir
->info
.cs
.local_size
[1];
1298 info
->prop
.cp
.numThreads
[2] = nir
->info
.cs
.local_size
[2];
1299 info
->bin
.smemSize
= nir
->info
.cs
.shared_size
;
1301 case Program::TYPE_FRAGMENT
:
1302 info
->prop
.fp
.earlyFragTests
= nir
->info
.fs
.early_fragment_tests
;
1303 info
->prop
.fp
.persampleInvocation
=
1304 (nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_ID
) ||
1305 (nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_POS
);
1306 info
->prop
.fp
.postDepthCoverage
= nir
->info
.fs
.post_depth_coverage
;
1307 info
->prop
.fp
.readsSampleLocations
=
1308 (nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_POS
);
1309 info
->prop
.fp
.usesDiscard
= nir
->info
.fs
.uses_discard
;
1310 info
->prop
.fp
.usesSampleMaskIn
=
1311 !!(nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_MASK_IN
);
1313 case Program::TYPE_GEOMETRY
:
1314 info
->prop
.gp
.inputPrim
= nir
->info
.gs
.input_primitive
;
1315 info
->prop
.gp
.instanceCount
= nir
->info
.gs
.invocations
;
1316 info
->prop
.gp
.maxVertices
= nir
->info
.gs
.vertices_out
;
1317 info
->prop
.gp
.outputPrim
= nir
->info
.gs
.output_primitive
;
1319 case Program::TYPE_TESSELLATION_CONTROL
:
1320 case Program::TYPE_TESSELLATION_EVAL
:
1321 if (nir
->info
.tess
.primitive_mode
== GL_ISOLINES
)
1322 info
->prop
.tp
.domain
= GL_LINES
;
1324 info
->prop
.tp
.domain
= nir
->info
.tess
.primitive_mode
;
1325 info
->prop
.tp
.outputPatchSize
= nir
->info
.tess
.tcs_vertices_out
;
1326 info
->prop
.tp
.outputPrim
=
1327 nir
->info
.tess
.point_mode
? PIPE_PRIM_POINTS
: PIPE_PRIM_TRIANGLES
;
1328 info
->prop
.tp
.partitioning
= (nir
->info
.tess
.spacing
+ 1) % 3;
1329 info
->prop
.tp
.winding
= !nir
->info
.tess
.ccw
;
1331 case Program::TYPE_VERTEX
:
1332 info
->prop
.vp
.usesDrawParameters
=
1333 (nir
->info
.system_values_read
& BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX
)) ||
1334 (nir
->info
.system_values_read
& BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE
)) ||
1335 (nir
->info
.system_values_read
& BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID
));
1345 Converter::visit(nir_function
*function
)
1347 // we only support emiting the main function for now
1348 assert(!strcmp(function
->name
, "main"));
1349 assert(function
->impl
);
1351 // usually the blocks will set everything up, but main is special
1352 BasicBlock
*entry
= new BasicBlock(prog
->main
);
1353 exit
= new BasicBlock(prog
->main
);
1354 blocks
[nir_start_block(function
->impl
)->index
] = entry
;
1355 prog
->main
->setEntry(entry
);
1356 prog
->main
->setExit(exit
);
1358 setPosition(entry
, true);
1360 if (info
->io
.genUserClip
> 0) {
1361 for (int c
= 0; c
< 4; ++c
)
1362 clipVtx
[c
] = getScratch();
1365 switch (prog
->getType()) {
1366 case Program::TYPE_TESSELLATION_CONTROL
:
1368 OP_SUB
, TYPE_U32
, getSSA(),
1369 mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_LANEID
, 0)),
1370 mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_INVOCATION_ID
, 0)));
1372 case Program::TYPE_FRAGMENT
: {
1373 Symbol
*sv
= mkSysVal(SV_POSITION
, 3);
1374 fragCoord
[3] = mkOp1v(OP_RDSV
, TYPE_F32
, getSSA(), sv
);
1375 fp
.position
= mkOp1v(OP_RCP
, TYPE_F32
, fragCoord
[3], fragCoord
[3]);
1382 nir_index_ssa_defs(function
->impl
);
1383 foreach_list_typed(nir_cf_node
, node
, node
, &function
->impl
->body
) {
1388 bb
->cfg
.attach(&exit
->cfg
, Graph::Edge::TREE
);
1389 setPosition(exit
, true);
1391 if (info
->io
.genUserClip
> 0)
1392 handleUserClipPlanes();
1394 // TODO: for non main function this needs to be a OP_RETURN
1395 mkOp(OP_EXIT
, TYPE_NONE
, NULL
)->terminator
= 1;
1400 Converter::visit(nir_cf_node
*node
)
1402 switch (node
->type
) {
1403 case nir_cf_node_block
:
1404 return visit(nir_cf_node_as_block(node
));
1405 case nir_cf_node_if
:
1406 return visit(nir_cf_node_as_if(node
));
1407 case nir_cf_node_loop
:
1408 return visit(nir_cf_node_as_loop(node
));
1410 ERROR("unknown nir_cf_node type %u\n", node
->type
);
1416 Converter::visit(nir_block
*block
)
1418 if (!block
->predecessors
->entries
&& block
->instr_list
.is_empty())
1421 BasicBlock
*bb
= convert(block
);
1423 setPosition(bb
, true);
1424 nir_foreach_instr(insn
, block
) {
1432 Converter::visit(nir_if
*nif
)
1434 DataType sType
= getSType(nif
->condition
, false, false);
1435 Value
*src
= getSrc(&nif
->condition
, 0);
1437 nir_block
*lastThen
= nir_if_last_then_block(nif
);
1438 nir_block
*lastElse
= nir_if_last_else_block(nif
);
1440 assert(!lastThen
->successors
[1]);
1441 assert(!lastElse
->successors
[1]);
1443 BasicBlock
*ifBB
= convert(nir_if_first_then_block(nif
));
1444 BasicBlock
*elseBB
= convert(nir_if_first_else_block(nif
));
1446 bb
->cfg
.attach(&ifBB
->cfg
, Graph::Edge::TREE
);
1447 bb
->cfg
.attach(&elseBB
->cfg
, Graph::Edge::TREE
);
1449 // we only insert joinats, if both nodes end up at the end of the if again.
1450 // the reason for this to not happens are breaks/continues/ret/... which
1451 // have their own handling
1452 if (lastThen
->successors
[0] == lastElse
->successors
[0])
1453 bb
->joinAt
= mkFlow(OP_JOINAT
, convert(lastThen
->successors
[0]),
1456 mkFlow(OP_BRA
, elseBB
, CC_EQ
, src
)->setType(sType
);
1458 foreach_list_typed(nir_cf_node
, node
, node
, &nif
->then_list
) {
1462 setPosition(convert(lastThen
), true);
1463 if (!bb
->getExit() ||
1464 !bb
->getExit()->asFlow() ||
1465 bb
->getExit()->asFlow()->op
== OP_JOIN
) {
1466 BasicBlock
*tailBB
= convert(lastThen
->successors
[0]);
1467 mkFlow(OP_BRA
, tailBB
, CC_ALWAYS
, NULL
);
1468 bb
->cfg
.attach(&tailBB
->cfg
, Graph::Edge::FORWARD
);
1471 foreach_list_typed(nir_cf_node
, node
, node
, &nif
->else_list
) {
1475 setPosition(convert(lastElse
), true);
1476 if (!bb
->getExit() ||
1477 !bb
->getExit()->asFlow() ||
1478 bb
->getExit()->asFlow()->op
== OP_JOIN
) {
1479 BasicBlock
*tailBB
= convert(lastElse
->successors
[0]);
1480 mkFlow(OP_BRA
, tailBB
, CC_ALWAYS
, NULL
);
1481 bb
->cfg
.attach(&tailBB
->cfg
, Graph::Edge::FORWARD
);
1484 if (lastThen
->successors
[0] == lastElse
->successors
[0]) {
1485 setPosition(convert(lastThen
->successors
[0]), true);
1486 mkFlow(OP_JOIN
, NULL
, CC_ALWAYS
, NULL
)->fixed
= 1;
1493 Converter::visit(nir_loop
*loop
)
1496 func
->loopNestingBound
= std::max(func
->loopNestingBound
, curLoopDepth
);
1498 BasicBlock
*loopBB
= convert(nir_loop_first_block(loop
));
1499 BasicBlock
*tailBB
=
1500 convert(nir_cf_node_as_block(nir_cf_node_next(&loop
->cf_node
)));
1501 bb
->cfg
.attach(&loopBB
->cfg
, Graph::Edge::TREE
);
1503 mkFlow(OP_PREBREAK
, tailBB
, CC_ALWAYS
, NULL
);
1504 setPosition(loopBB
, false);
1505 mkFlow(OP_PRECONT
, loopBB
, CC_ALWAYS
, NULL
);
1507 foreach_list_typed(nir_cf_node
, node
, node
, &loop
->body
) {
1511 Instruction
*insn
= bb
->getExit();
1512 if (bb
->cfg
.incidentCount() != 0) {
1513 if (!insn
|| !insn
->asFlow()) {
1514 mkFlow(OP_CONT
, loopBB
, CC_ALWAYS
, NULL
);
1515 bb
->cfg
.attach(&loopBB
->cfg
, Graph::Edge::BACK
);
1516 } else if (insn
&& insn
->op
== OP_BRA
&& !insn
->getPredicate() &&
1517 tailBB
->cfg
.incidentCount() == 0) {
1518 // RA doesn't like having blocks around with no incident edge,
1519 // so we create a fake one to make it happy
1520 bb
->cfg
.attach(&tailBB
->cfg
, Graph::Edge::TREE
);
1530 Converter::visit(nir_instr
*insn
)
1532 switch (insn
->type
) {
1533 case nir_instr_type_alu
:
1534 return visit(nir_instr_as_alu(insn
));
1535 case nir_instr_type_intrinsic
:
1536 return visit(nir_instr_as_intrinsic(insn
));
1537 case nir_instr_type_jump
:
1538 return visit(nir_instr_as_jump(insn
));
1539 case nir_instr_type_load_const
:
1540 return visit(nir_instr_as_load_const(insn
));
1542 ERROR("unknown nir_instr type %u\n", insn
->type
);
1549 Converter::convert(nir_intrinsic_op intr
)
1552 case nir_intrinsic_load_base_vertex
:
1553 return SV_BASEVERTEX
;
1554 case nir_intrinsic_load_base_instance
:
1555 return SV_BASEINSTANCE
;
1556 case nir_intrinsic_load_draw_id
:
1558 case nir_intrinsic_load_front_face
:
1560 case nir_intrinsic_load_helper_invocation
:
1561 return SV_THREAD_KILL
;
1562 case nir_intrinsic_load_instance_id
:
1563 return SV_INSTANCE_ID
;
1564 case nir_intrinsic_load_invocation_id
:
1565 return SV_INVOCATION_ID
;
1566 case nir_intrinsic_load_local_group_size
:
1568 case nir_intrinsic_load_local_invocation_id
:
1570 case nir_intrinsic_load_num_work_groups
:
1572 case nir_intrinsic_load_patch_vertices_in
:
1573 return SV_VERTEX_COUNT
;
1574 case nir_intrinsic_load_primitive_id
:
1575 return SV_PRIMITIVE_ID
;
1576 case nir_intrinsic_load_sample_id
:
1577 return SV_SAMPLE_INDEX
;
1578 case nir_intrinsic_load_sample_mask_in
:
1579 return SV_SAMPLE_MASK
;
1580 case nir_intrinsic_load_sample_pos
:
1581 return SV_SAMPLE_POS
;
1582 case nir_intrinsic_load_subgroup_eq_mask
:
1583 return SV_LANEMASK_EQ
;
1584 case nir_intrinsic_load_subgroup_ge_mask
:
1585 return SV_LANEMASK_GE
;
1586 case nir_intrinsic_load_subgroup_gt_mask
:
1587 return SV_LANEMASK_GT
;
1588 case nir_intrinsic_load_subgroup_le_mask
:
1589 return SV_LANEMASK_LE
;
1590 case nir_intrinsic_load_subgroup_lt_mask
:
1591 return SV_LANEMASK_LT
;
1592 case nir_intrinsic_load_subgroup_invocation
:
1594 case nir_intrinsic_load_tess_coord
:
1595 return SV_TESS_COORD
;
1596 case nir_intrinsic_load_tess_level_inner
:
1597 return SV_TESS_INNER
;
1598 case nir_intrinsic_load_tess_level_outer
:
1599 return SV_TESS_OUTER
;
1600 case nir_intrinsic_load_vertex_id
:
1601 return SV_VERTEX_ID
;
1602 case nir_intrinsic_load_work_group_id
:
1605 ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1606 nir_intrinsic_infos
[intr
].name
);
1613 Converter::visit(nir_intrinsic_instr
*insn
)
1615 nir_intrinsic_op op
= insn
->intrinsic
;
1618 case nir_intrinsic_load_uniform
: {
1619 LValues
&newDefs
= convert(&insn
->dest
);
1620 const DataType dType
= getDType(insn
);
1622 uint32_t coffset
= getIndirect(insn
, 0, 0, indirect
);
1623 for (uint8_t i
= 0; i
< insn
->num_components
; ++i
) {
1624 loadFrom(FILE_MEMORY_CONST
, 0, dType
, newDefs
[i
], 16 * coffset
, i
, indirect
);
1628 case nir_intrinsic_store_output
:
1629 case nir_intrinsic_store_per_vertex_output
: {
1631 DataType dType
= getSType(insn
->src
[0], false, false);
1632 uint32_t idx
= getIndirect(insn
, op
== nir_intrinsic_store_output
? 1 : 2, 0, indirect
);
1634 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
1635 if (!((1u << i
) & nir_intrinsic_write_mask(insn
)))
1639 Value
*src
= getSrc(&insn
->src
[0], i
);
1640 switch (prog
->getType()) {
1641 case Program::TYPE_FRAGMENT
: {
1642 if (info
->out
[idx
].sn
== TGSI_SEMANTIC_POSITION
) {
1643 // TGSI uses a different interface than NIR, TGSI stores that
1644 // value in the z component, NIR in X
1646 src
= mkOp1v(OP_SAT
, TYPE_F32
, getScratch(), src
);
1650 case Program::TYPE_VERTEX
: {
1651 if (info
->io
.genUserClip
> 0 && idx
== clipVertexOutput
) {
1652 mkMov(clipVtx
[i
], src
);
1661 storeTo(insn
, FILE_SHADER_OUTPUT
, OP_EXPORT
, dType
, src
, idx
, i
+ offset
, indirect
);
1665 case nir_intrinsic_load_input
:
1666 case nir_intrinsic_load_interpolated_input
:
1667 case nir_intrinsic_load_output
: {
1668 LValues
&newDefs
= convert(&insn
->dest
);
1671 if (prog
->getType() == Program::TYPE_FRAGMENT
&&
1672 op
== nir_intrinsic_load_output
) {
1673 std::vector
<Value
*> defs
, srcs
;
1676 srcs
.push_back(getSSA());
1677 srcs
.push_back(getSSA());
1678 Value
*x
= mkOp1v(OP_RDSV
, TYPE_F32
, getSSA(), mkSysVal(SV_POSITION
, 0));
1679 Value
*y
= mkOp1v(OP_RDSV
, TYPE_F32
, getSSA(), mkSysVal(SV_POSITION
, 1));
1680 mkCvt(OP_CVT
, TYPE_U32
, srcs
[0], TYPE_F32
, x
)->rnd
= ROUND_Z
;
1681 mkCvt(OP_CVT
, TYPE_U32
, srcs
[1], TYPE_F32
, y
)->rnd
= ROUND_Z
;
1683 srcs
.push_back(mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_LAYER
, 0)));
1684 srcs
.push_back(mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_SAMPLE_INDEX
, 0)));
1686 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
1687 defs
.push_back(newDefs
[i
]);
1691 TexInstruction
*texi
= mkTex(OP_TXF
, TEX_TARGET_2D_MS_ARRAY
, 0, 0, defs
, srcs
);
1692 texi
->tex
.levelZero
= 1;
1693 texi
->tex
.mask
= mask
;
1694 texi
->tex
.useOffsets
= 0;
1695 texi
->tex
.r
= 0xffff;
1696 texi
->tex
.s
= 0xffff;
1698 info
->prop
.fp
.readsFramebuffer
= true;
1702 const DataType dType
= getDType(insn
);
1704 bool input
= op
!= nir_intrinsic_load_output
;
1708 uint32_t idx
= getIndirect(insn
, op
== nir_intrinsic_load_interpolated_input
? 1 : 0, 0, indirect
);
1709 nv50_ir_varying
& vary
= input
? info
->in
[idx
] : info
->out
[idx
];
1711 // see load_barycentric_* handling
1712 if (prog
->getType() == Program::TYPE_FRAGMENT
) {
1713 mode
= translateInterpMode(&vary
, nvirOp
);
1714 if (op
== nir_intrinsic_load_interpolated_input
) {
1715 ImmediateValue immMode
;
1716 if (getSrc(&insn
->src
[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode
))
1717 mode
|= immMode
.reg
.data
.u32
;
1721 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
1722 uint32_t address
= getSlotAddress(insn
, idx
, i
);
1723 Symbol
*sym
= mkSymbol(input
? FILE_SHADER_INPUT
: FILE_SHADER_OUTPUT
, 0, dType
, address
);
1724 if (prog
->getType() == Program::TYPE_FRAGMENT
) {
1726 if (typeSizeof(dType
) == 8) {
1727 Value
*lo
= getSSA();
1728 Value
*hi
= getSSA();
1729 Instruction
*interp
;
1731 interp
= mkOp1(nvirOp
, TYPE_U32
, lo
, sym
);
1732 if (nvirOp
== OP_PINTERP
)
1733 interp
->setSrc(s
++, fp
.position
);
1734 if (mode
& NV50_IR_INTERP_OFFSET
)
1735 interp
->setSrc(s
++, getSrc(&insn
->src
[0], 0));
1736 interp
->setInterpolate(mode
);
1737 interp
->setIndirect(0, 0, indirect
);
1739 Symbol
*sym1
= mkSymbol(input
? FILE_SHADER_INPUT
: FILE_SHADER_OUTPUT
, 0, dType
, address
+ 4);
1740 interp
= mkOp1(nvirOp
, TYPE_U32
, hi
, sym1
);
1741 if (nvirOp
== OP_PINTERP
)
1742 interp
->setSrc(s
++, fp
.position
);
1743 if (mode
& NV50_IR_INTERP_OFFSET
)
1744 interp
->setSrc(s
++, getSrc(&insn
->src
[0], 0));
1745 interp
->setInterpolate(mode
);
1746 interp
->setIndirect(0, 0, indirect
);
1748 mkOp2(OP_MERGE
, dType
, newDefs
[i
], lo
, hi
);
1750 Instruction
*interp
= mkOp1(nvirOp
, dType
, newDefs
[i
], sym
);
1751 if (nvirOp
== OP_PINTERP
)
1752 interp
->setSrc(s
++, fp
.position
);
1753 if (mode
& NV50_IR_INTERP_OFFSET
)
1754 interp
->setSrc(s
++, getSrc(&insn
->src
[0], 0));
1755 interp
->setInterpolate(mode
);
1756 interp
->setIndirect(0, 0, indirect
);
1759 mkLoad(dType
, newDefs
[i
], sym
, indirect
)->perPatch
= vary
.patch
;
1764 case nir_intrinsic_load_barycentric_at_offset
:
1765 case nir_intrinsic_load_barycentric_at_sample
:
1766 case nir_intrinsic_load_barycentric_centroid
:
1767 case nir_intrinsic_load_barycentric_pixel
:
1768 case nir_intrinsic_load_barycentric_sample
: {
1769 LValues
&newDefs
= convert(&insn
->dest
);
1772 if (op
== nir_intrinsic_load_barycentric_centroid
||
1773 op
== nir_intrinsic_load_barycentric_sample
) {
1774 mode
= NV50_IR_INTERP_CENTROID
;
1775 } else if (op
== nir_intrinsic_load_barycentric_at_offset
) {
1777 for (uint8_t c
= 0; c
< 2; c
++) {
1778 offs
[c
] = getScratch();
1779 mkOp2(OP_MIN
, TYPE_F32
, offs
[c
], getSrc(&insn
->src
[0], c
), loadImm(NULL
, 0.4375f
));
1780 mkOp2(OP_MAX
, TYPE_F32
, offs
[c
], offs
[c
], loadImm(NULL
, -0.5f
));
1781 mkOp2(OP_MUL
, TYPE_F32
, offs
[c
], offs
[c
], loadImm(NULL
, 4096.0f
));
1782 mkCvt(OP_CVT
, TYPE_S32
, offs
[c
], TYPE_F32
, offs
[c
]);
1784 mkOp3v(OP_INSBF
, TYPE_U32
, newDefs
[0], offs
[1], mkImm(0x1010), offs
[0]);
1786 mode
= NV50_IR_INTERP_OFFSET
;
1787 } else if (op
== nir_intrinsic_load_barycentric_pixel
) {
1788 mode
= NV50_IR_INTERP_DEFAULT
;
1789 } else if (op
== nir_intrinsic_load_barycentric_at_sample
) {
1790 info
->prop
.fp
.readsSampleLocations
= true;
1791 mkOp1(OP_PIXLD
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0], 0))->subOp
= NV50_IR_SUBOP_PIXLD_OFFSET
;
1792 mode
= NV50_IR_INTERP_OFFSET
;
1794 unreachable("all intrinsics already handled above");
1797 loadImm(newDefs
[1], mode
);
1800 case nir_intrinsic_discard
:
1801 mkOp(OP_DISCARD
, TYPE_NONE
, NULL
);
1803 case nir_intrinsic_discard_if
: {
1804 Value
*pred
= getSSA(1, FILE_PREDICATE
);
1805 if (insn
->num_components
> 1) {
1806 ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
1810 mkCmp(OP_SET
, CC_NE
, TYPE_U8
, pred
, TYPE_U32
, getSrc(&insn
->src
[0], 0), zero
);
1811 mkOp(OP_DISCARD
, TYPE_NONE
, NULL
)->setPredicate(CC_P
, pred
);
1814 case nir_intrinsic_load_base_vertex
:
1815 case nir_intrinsic_load_base_instance
:
1816 case nir_intrinsic_load_draw_id
:
1817 case nir_intrinsic_load_front_face
:
1818 case nir_intrinsic_load_helper_invocation
:
1819 case nir_intrinsic_load_instance_id
:
1820 case nir_intrinsic_load_invocation_id
:
1821 case nir_intrinsic_load_local_group_size
:
1822 case nir_intrinsic_load_local_invocation_id
:
1823 case nir_intrinsic_load_num_work_groups
:
1824 case nir_intrinsic_load_patch_vertices_in
:
1825 case nir_intrinsic_load_primitive_id
:
1826 case nir_intrinsic_load_sample_id
:
1827 case nir_intrinsic_load_sample_mask_in
:
1828 case nir_intrinsic_load_sample_pos
:
1829 case nir_intrinsic_load_subgroup_eq_mask
:
1830 case nir_intrinsic_load_subgroup_ge_mask
:
1831 case nir_intrinsic_load_subgroup_gt_mask
:
1832 case nir_intrinsic_load_subgroup_le_mask
:
1833 case nir_intrinsic_load_subgroup_lt_mask
:
1834 case nir_intrinsic_load_subgroup_invocation
:
1835 case nir_intrinsic_load_tess_coord
:
1836 case nir_intrinsic_load_tess_level_inner
:
1837 case nir_intrinsic_load_tess_level_outer
:
1838 case nir_intrinsic_load_vertex_id
:
1839 case nir_intrinsic_load_work_group_id
: {
1840 const DataType dType
= getDType(insn
);
1841 SVSemantic sv
= convert(op
);
1842 LValues
&newDefs
= convert(&insn
->dest
);
1844 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
1846 if (typeSizeof(dType
) == 8)
1851 if (sv
== SV_TID
&& info
->prop
.cp
.numThreads
[i
] == 1) {
1854 Symbol
*sym
= mkSysVal(sv
, i
);
1855 Instruction
*rdsv
= mkOp1(OP_RDSV
, TYPE_U32
, def
, sym
);
1856 if (sv
== SV_TESS_OUTER
|| sv
== SV_TESS_INNER
)
1860 if (typeSizeof(dType
) == 8)
1861 mkOp2(OP_MERGE
, dType
, newDefs
[i
], def
, loadImm(getSSA(), 0u));
1866 case nir_intrinsic_load_subgroup_size
: {
1867 LValues
&newDefs
= convert(&insn
->dest
);
1868 loadImm(newDefs
[0], 32u);
1872 ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos
[op
].name
);
1880 Converter::visit(nir_jump_instr
*insn
)
1882 switch (insn
->type
) {
1883 case nir_jump_return
:
1884 // TODO: this only works in the main function
1885 mkFlow(OP_BRA
, exit
, CC_ALWAYS
, NULL
);
1886 bb
->cfg
.attach(&exit
->cfg
, Graph::Edge::CROSS
);
1888 case nir_jump_break
:
1889 case nir_jump_continue
: {
1890 bool isBreak
= insn
->type
== nir_jump_break
;
1891 nir_block
*block
= insn
->instr
.block
;
1892 assert(!block
->successors
[1]);
1893 BasicBlock
*target
= convert(block
->successors
[0]);
1894 mkFlow(isBreak
? OP_BREAK
: OP_CONT
, target
, CC_ALWAYS
, NULL
);
1895 bb
->cfg
.attach(&target
->cfg
, isBreak
? Graph::Edge::CROSS
: Graph::Edge::BACK
);
1899 ERROR("unknown nir_jump_type %u\n", insn
->type
);
1907 Converter::visit(nir_load_const_instr
*insn
)
1909 assert(insn
->def
.bit_size
<= 64);
1911 LValues
&newDefs
= convert(&insn
->def
);
1912 for (int i
= 0; i
< insn
->def
.num_components
; i
++) {
1913 switch (insn
->def
.bit_size
) {
1915 loadImm(newDefs
[i
], insn
->value
.u64
[i
]);
1918 loadImm(newDefs
[i
], insn
->value
.u32
[i
]);
1921 loadImm(newDefs
[i
], insn
->value
.u16
[i
]);
1924 loadImm(newDefs
[i
], insn
->value
.u8
[i
]);
1931 #define DEFAULT_CHECKS \
1932 if (insn->dest.dest.ssa.num_components > 1) { \
1933 ERROR("nir_alu_instr only supported with 1 component!\n"); \
1936 if (insn->dest.write_mask != 1) { \
1937 ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
1941 Converter::visit(nir_alu_instr
*insn
)
1943 const nir_op op
= insn
->op
;
1944 const nir_op_info
&info
= nir_op_infos
[op
];
1945 DataType dType
= getDType(insn
);
1946 const std::vector
<DataType
> sTypes
= getSTypes(insn
);
1948 Instruction
*oldPos
= this->bb
->getExit();
1960 case nir_op_fddx_coarse
:
1961 case nir_op_fddx_fine
:
1963 case nir_op_fddy_coarse
:
1964 case nir_op_fddy_fine
:
1983 case nir_op_imul_high
:
1984 case nir_op_umul_high
:
1991 case nir_op_pack_64_2x32_split
:
2009 LValues
&newDefs
= convert(&insn
->dest
);
2010 operation preOp
= preOperationNeeded(op
);
2011 if (preOp
!= OP_NOP
) {
2012 assert(info
.num_inputs
< 2);
2013 Value
*tmp
= getSSA(typeSizeof(dType
));
2014 Instruction
*i0
= mkOp(preOp
, dType
, tmp
);
2015 Instruction
*i1
= mkOp(getOperation(op
), dType
, newDefs
[0]);
2016 if (info
.num_inputs
) {
2017 i0
->setSrc(0, getSrc(&insn
->src
[0]));
2020 i1
->subOp
= getSubOp(op
);
2022 Instruction
*i
= mkOp(getOperation(op
), dType
, newDefs
[0]);
2023 for (unsigned s
= 0u; s
< info
.num_inputs
; ++s
) {
2024 i
->setSrc(s
, getSrc(&insn
->src
[s
]));
2026 i
->subOp
= getSubOp(op
);
2030 case nir_op_ifind_msb
:
2031 case nir_op_ufind_msb
: {
2033 LValues
&newDefs
= convert(&insn
->dest
);
2035 mkOp1(getOperation(op
), dType
, newDefs
[0], getSrc(&insn
->src
[0]));
2038 case nir_op_fround_even
: {
2040 LValues
&newDefs
= convert(&insn
->dest
);
2041 mkCvt(OP_CVT
, dType
, newDefs
[0], dType
, getSrc(&insn
->src
[0]))->rnd
= ROUND_NI
;
2044 // convert instructions
2058 case nir_op_u2u64
: {
2060 LValues
&newDefs
= convert(&insn
->dest
);
2061 Instruction
*i
= mkOp1(getOperation(op
), dType
, newDefs
[0], getSrc(&insn
->src
[0]));
2062 if (op
== nir_op_f2i32
|| op
== nir_op_f2i64
|| op
== nir_op_f2u32
|| op
== nir_op_f2u64
)
2064 i
->sType
= sTypes
[0];
2067 // compare instructions
2077 case nir_op_ine32
: {
2079 LValues
&newDefs
= convert(&insn
->dest
);
2080 Instruction
*i
= mkCmp(getOperation(op
),
2085 getSrc(&insn
->src
[0]),
2086 getSrc(&insn
->src
[1]));
2087 if (info
.num_inputs
== 3)
2088 i
->setSrc(2, getSrc(&insn
->src
[2]));
2089 i
->sType
= sTypes
[0];
2092 // those are weird ALU ops and need special handling, because
2093 // 1. they are always componend based
2094 // 2. they basically just merge multiple values into one data type
2100 LValues
&newDefs
= convert(&insn
->dest
);
2101 for (LValues::size_type c
= 0u; c
< newDefs
.size(); ++c
) {
2102 mkMov(newDefs
[c
], getSrc(&insn
->src
[c
]), dType
);
2107 case nir_op_pack_64_2x32
: {
2108 LValues
&newDefs
= convert(&insn
->dest
);
2109 Instruction
*merge
= mkOp(OP_MERGE
, dType
, newDefs
[0]);
2110 merge
->setSrc(0, getSrc(&insn
->src
[0], 0));
2111 merge
->setSrc(1, getSrc(&insn
->src
[0], 1));
2114 case nir_op_pack_half_2x16_split
: {
2115 LValues
&newDefs
= convert(&insn
->dest
);
2116 Value
*tmpH
= getSSA();
2117 Value
*tmpL
= getSSA();
2119 mkCvt(OP_CVT
, TYPE_F16
, tmpL
, TYPE_F32
, getSrc(&insn
->src
[0]));
2120 mkCvt(OP_CVT
, TYPE_F16
, tmpH
, TYPE_F32
, getSrc(&insn
->src
[1]));
2121 mkOp3(OP_INSBF
, TYPE_U32
, newDefs
[0], tmpH
, mkImm(0x1010), tmpL
);
2124 case nir_op_unpack_half_2x16_split_x
:
2125 case nir_op_unpack_half_2x16_split_y
: {
2126 LValues
&newDefs
= convert(&insn
->dest
);
2127 Instruction
*cvt
= mkCvt(OP_CVT
, TYPE_F32
, newDefs
[0], TYPE_F16
, getSrc(&insn
->src
[0]));
2128 if (op
== nir_op_unpack_half_2x16_split_y
)
2132 case nir_op_unpack_64_2x32
: {
2133 LValues
&newDefs
= convert(&insn
->dest
);
2134 mkOp1(OP_SPLIT
, dType
, newDefs
[0], getSrc(&insn
->src
[0]))->setDef(1, newDefs
[1]);
2137 case nir_op_unpack_64_2x32_split_x
: {
2138 LValues
&newDefs
= convert(&insn
->dest
);
2139 mkOp1(OP_SPLIT
, dType
, newDefs
[0], getSrc(&insn
->src
[0]))->setDef(1, getSSA());
2142 case nir_op_unpack_64_2x32_split_y
: {
2143 LValues
&newDefs
= convert(&insn
->dest
);
2144 mkOp1(OP_SPLIT
, dType
, getSSA(), getSrc(&insn
->src
[0]))->setDef(1, newDefs
[0]);
2147 // special instructions
2149 case nir_op_isign
: {
2152 if (::isFloatType(dType
))
2157 LValues
&newDefs
= convert(&insn
->dest
);
2158 LValue
*val0
= getScratch();
2159 LValue
*val1
= getScratch();
2160 mkCmp(OP_SET
, CC_GT
, iType
, val0
, dType
, getSrc(&insn
->src
[0]), zero
);
2161 mkCmp(OP_SET
, CC_LT
, iType
, val1
, dType
, getSrc(&insn
->src
[0]), zero
);
2163 if (dType
== TYPE_F64
) {
2164 mkOp2(OP_SUB
, iType
, val0
, val0
, val1
);
2165 mkCvt(OP_CVT
, TYPE_F64
, newDefs
[0], iType
, val0
);
2166 } else if (dType
== TYPE_S64
|| dType
== TYPE_U64
) {
2167 mkOp2(OP_SUB
, iType
, val0
, val1
, val0
);
2168 mkOp2(OP_SHR
, iType
, val1
, val0
, loadImm(NULL
, 31));
2169 mkOp2(OP_MERGE
, dType
, newDefs
[0], val0
, val1
);
2170 } else if (::isFloatType(dType
))
2171 mkOp2(OP_SUB
, iType
, newDefs
[0], val0
, val1
);
2173 mkOp2(OP_SUB
, iType
, newDefs
[0], val1
, val0
);
2177 case nir_op_b32csel
: {
2179 LValues
&newDefs
= convert(&insn
->dest
);
2180 mkCmp(OP_SLCT
, CC_NE
, dType
, newDefs
[0], sTypes
[0], getSrc(&insn
->src
[1]), getSrc(&insn
->src
[2]), getSrc(&insn
->src
[0]));
2183 case nir_op_ibitfield_extract
:
2184 case nir_op_ubitfield_extract
: {
2186 Value
*tmp
= getSSA();
2187 LValues
&newDefs
= convert(&insn
->dest
);
2188 mkOp3(OP_INSBF
, dType
, tmp
, getSrc(&insn
->src
[2]), loadImm(NULL
, 0x808), getSrc(&insn
->src
[1]));
2189 mkOp2(OP_EXTBF
, dType
, newDefs
[0], getSrc(&insn
->src
[0]), tmp
);
2194 LValues
&newDefs
= convert(&insn
->dest
);
2195 mkOp3(OP_INSBF
, dType
, newDefs
[0], getSrc(&insn
->src
[0]), loadImm(NULL
, 0x808), getSrc(&insn
->src
[1]));
2198 case nir_op_bitfield_insert
: {
2200 LValues
&newDefs
= convert(&insn
->dest
);
2201 LValue
*temp
= getSSA();
2202 mkOp3(OP_INSBF
, TYPE_U32
, temp
, getSrc(&insn
->src
[3]), mkImm(0x808), getSrc(&insn
->src
[2]));
2203 mkOp3(OP_INSBF
, dType
, newDefs
[0], getSrc(&insn
->src
[1]), temp
, getSrc(&insn
->src
[0]));
2206 case nir_op_bit_count
: {
2208 LValues
&newDefs
= convert(&insn
->dest
);
2209 mkOp2(OP_POPCNT
, dType
, newDefs
[0], getSrc(&insn
->src
[0]), getSrc(&insn
->src
[0]));
2212 case nir_op_bitfield_reverse
: {
2214 LValues
&newDefs
= convert(&insn
->dest
);
2215 mkOp2(OP_EXTBF
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), mkImm(0x2000))->subOp
= NV50_IR_SUBOP_EXTBF_REV
;
2218 case nir_op_find_lsb
: {
2220 LValues
&newDefs
= convert(&insn
->dest
);
2221 Value
*tmp
= getSSA();
2222 mkOp2(OP_EXTBF
, TYPE_U32
, tmp
, getSrc(&insn
->src
[0]), mkImm(0x2000))->subOp
= NV50_IR_SUBOP_EXTBF_REV
;
2223 mkOp1(OP_BFIND
, TYPE_U32
, newDefs
[0], tmp
)->subOp
= NV50_IR_SUBOP_BFIND_SAMT
;
2226 // boolean conversions
2227 case nir_op_b2f32
: {
2229 LValues
&newDefs
= convert(&insn
->dest
);
2230 mkOp2(OP_AND
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), loadImm(NULL
, 1.0f
));
2233 case nir_op_b2f64
: {
2235 LValues
&newDefs
= convert(&insn
->dest
);
2236 Value
*tmp
= getSSA(4);
2237 mkOp2(OP_AND
, TYPE_U32
, tmp
, getSrc(&insn
->src
[0]), loadImm(NULL
, 0x3ff00000));
2238 mkOp2(OP_MERGE
, TYPE_U64
, newDefs
[0], loadImm(NULL
, 0), tmp
);
2242 case nir_op_i2b32
: {
2244 LValues
&newDefs
= convert(&insn
->dest
);
2246 if (typeSizeof(sTypes
[0]) == 8) {
2247 src1
= loadImm(getSSA(8), 0.0);
2251 CondCode cc
= op
== nir_op_f2b32
? CC_NEU
: CC_NE
;
2252 mkCmp(OP_SET
, cc
, TYPE_U32
, newDefs
[0], sTypes
[0], getSrc(&insn
->src
[0]), src1
);
2255 case nir_op_b2i32
: {
2257 LValues
&newDefs
= convert(&insn
->dest
);
2258 mkOp2(OP_AND
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), loadImm(NULL
, 1));
2261 case nir_op_b2i64
: {
2263 LValues
&newDefs
= convert(&insn
->dest
);
2264 LValue
*def
= getScratch();
2265 mkOp2(OP_AND
, TYPE_U32
, def
, getSrc(&insn
->src
[0]), loadImm(NULL
, 1));
2266 mkOp2(OP_MERGE
, TYPE_S64
, newDefs
[0], def
, loadImm(NULL
, 0));
2270 ERROR("unknown nir_op %s\n", info
.name
);
2275 oldPos
= this->bb
->getEntry();
2276 oldPos
->precise
= insn
->exact
;
2279 if (unlikely(!oldPos
))
2282 while (oldPos
->next
) {
2283 oldPos
= oldPos
->next
;
2284 oldPos
->precise
= insn
->exact
;
2286 oldPos
->saturate
= insn
->dest
.saturate
;
2290 #undef DEFAULT_CHECKS
2297 if (prog
->dbgFlags
& NV50_IR_DEBUG_VERBOSE
)
2298 nir_print_shader(nir
, stderr
);
2300 NIR_PASS_V(nir
, nir_lower_io
, nir_var_all
, type_size
, (nir_lower_io_options
)0);
2301 NIR_PASS_V(nir
, nir_lower_regs_to_ssa
);
2302 NIR_PASS_V(nir
, nir_lower_load_const_to_scalar
);
2303 NIR_PASS_V(nir
, nir_lower_vars_to_ssa
);
2304 NIR_PASS_V(nir
, nir_lower_alu_to_scalar
);
2305 NIR_PASS_V(nir
, nir_lower_phis_to_scalar
);
2309 NIR_PASS(progress
, nir
, nir_copy_prop
);
2310 NIR_PASS(progress
, nir
, nir_opt_remove_phis
);
2311 NIR_PASS(progress
, nir
, nir_opt_trivial_continues
);
2312 NIR_PASS(progress
, nir
, nir_opt_cse
);
2313 NIR_PASS(progress
, nir
, nir_opt_algebraic
);
2314 NIR_PASS(progress
, nir
, nir_opt_constant_folding
);
2315 NIR_PASS(progress
, nir
, nir_copy_prop
);
2316 NIR_PASS(progress
, nir
, nir_opt_dce
);
2317 NIR_PASS(progress
, nir
, nir_opt_dead_cf
);
2320 NIR_PASS_V(nir
, nir_lower_bool_to_int32
);
2321 NIR_PASS_V(nir
, nir_lower_locals_to_regs
);
2322 NIR_PASS_V(nir
, nir_remove_dead_variables
, nir_var_function_temp
);
2323 NIR_PASS_V(nir
, nir_convert_from_ssa
, true);
2325 // Garbage collect dead instructions
2329 ERROR("Couldn't prase NIR!\n");
2333 if (!assignSlots()) {
2334 ERROR("Couldn't assign slots!\n");
2338 if (prog
->dbgFlags
& NV50_IR_DEBUG_BASIC
)
2339 nir_print_shader(nir
, stderr
);
2341 nir_foreach_function(function
, nir
) {
2342 if (!visit(function
))
2349 } // unnamed namespace
2354 Program::makeFromNIR(struct nv50_ir_prog_info
*info
)
2356 nir_shader
*nir
= (nir_shader
*)info
->bin
.source
;
2357 Converter
converter(this, nir
, info
);
2358 bool result
= converter
.run();
2361 LoweringHelper lowering
;
2363 tlsSize
= info
->bin
.tlsSpace
;
2367 } // namespace nv50_ir