2 * Copyright 2017 Red Hat Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Karol Herbst <kherbst@redhat.com>
25 #include "compiler/nir/nir.h"
27 #include "util/u_debug.h"
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_from_common.h"
31 #include "codegen/nv50_ir_lowering_helper.h"
32 #include "codegen/nv50_ir_util.h"
34 #if __cplusplus >= 201103L
35 #include <unordered_map>
37 #include <tr1/unordered_map>
43 #if __cplusplus >= 201103L
45 using std::unordered_map
;
48 using std::tr1::unordered_map
;
51 using namespace nv50_ir
;
54 type_size(const struct glsl_type
*type
)
56 return glsl_count_attribute_slots(type
, false);
59 class Converter
: public ConverterCommon
62 Converter(Program
*, nir_shader
*, nv50_ir_prog_info
*);
66 typedef std::vector
<LValue
*> LValues
;
67 typedef unordered_map
<unsigned, LValues
> NirDefMap
;
68 typedef unordered_map
<unsigned, BasicBlock
*> NirBlockMap
;
70 LValues
& convert(nir_alu_dest
*);
71 BasicBlock
* convert(nir_block
*);
72 LValues
& convert(nir_dest
*);
73 LValues
& convert(nir_register
*);
74 LValues
& convert(nir_ssa_def
*);
76 Value
* getSrc(nir_alu_src
*, uint8_t component
= 0);
77 Value
* getSrc(nir_register
*, uint8_t);
78 Value
* getSrc(nir_src
*, uint8_t, bool indirect
= false);
79 Value
* getSrc(nir_ssa_def
*, uint8_t);
81 // returned value is the constant part of the given source (either the
82 // nir_src or the selected source component of an intrinsic). Even though
83 // this is mostly an optimization to be able to skip indirects in a few
84 // cases, sometimes we require immediate values or set some fileds on
85 // instructions (e.g. tex) in order for codegen to consume those.
86 // If the found value has not a constant part, the Value gets returned
87 // through the Value parameter.
88 uint32_t getIndirect(nir_src
*, uint8_t, Value
*&);
89 uint32_t getIndirect(nir_intrinsic_instr
*, uint8_t s
, uint8_t c
, Value
*&);
91 uint32_t getSlotAddress(nir_intrinsic_instr
*, uint8_t idx
, uint8_t slot
);
93 void setInterpolate(nv50_ir_varying
*,
98 Instruction
*loadFrom(DataFile
, uint8_t, DataType
, Value
*def
, uint32_t base
,
99 uint8_t c
, Value
*indirect0
= NULL
,
100 Value
*indirect1
= NULL
, bool patch
= false);
101 void storeTo(nir_intrinsic_instr
*, DataFile
, operation
, DataType
,
102 Value
*src
, uint8_t idx
, uint8_t c
, Value
*indirect0
= NULL
,
103 Value
*indirect1
= NULL
);
105 bool isFloatType(nir_alu_type
);
106 bool isSignedType(nir_alu_type
);
107 bool isResultFloat(nir_op
);
108 bool isResultSigned(nir_op
);
110 DataType
getDType(nir_alu_instr
*);
111 DataType
getDType(nir_intrinsic_instr
*);
112 DataType
getDType(nir_op
, uint8_t);
114 std::vector
<DataType
> getSTypes(nir_alu_instr
*);
115 DataType
getSType(nir_src
&, bool isFloat
, bool isSigned
);
117 operation
getOperation(nir_op
);
118 operation
preOperationNeeded(nir_op
);
120 int getSubOp(nir_op
);
122 CondCode
getCondCode(nir_op
);
127 bool visit(nir_alu_instr
*);
128 bool visit(nir_block
*);
129 bool visit(nir_cf_node
*);
130 bool visit(nir_function
*);
131 bool visit(nir_if
*);
132 bool visit(nir_instr
*);
133 bool visit(nir_intrinsic_instr
*);
134 bool visit(nir_jump_instr
*);
135 bool visit(nir_load_const_instr
*);
136 bool visit(nir_loop
*);
143 unsigned int curLoopDepth
;
148 int clipVertexOutput
;
157 Converter::Converter(Program
*prog
, nir_shader
*nir
, nv50_ir_prog_info
*info
)
158 : ConverterCommon(prog
, info
),
163 zero
= mkImm((uint32_t)0);
167 Converter::convert(nir_block
*block
)
169 NirBlockMap::iterator it
= blocks
.find(block
->index
);
170 if (it
!= blocks
.end())
173 BasicBlock
*bb
= new BasicBlock(func
);
174 blocks
[block
->index
] = bb
;
179 Converter::isFloatType(nir_alu_type type
)
181 return nir_alu_type_get_base_type(type
) == nir_type_float
;
185 Converter::isSignedType(nir_alu_type type
)
187 return nir_alu_type_get_base_type(type
) == nir_type_int
;
191 Converter::isResultFloat(nir_op op
)
193 const nir_op_info
&info
= nir_op_infos
[op
];
194 if (info
.output_type
!= nir_type_invalid
)
195 return isFloatType(info
.output_type
);
197 ERROR("isResultFloat not implemented for %s\n", nir_op_infos
[op
].name
);
203 Converter::isResultSigned(nir_op op
)
206 // there is no umul and we get wrong results if we treat all muls as signed
211 const nir_op_info
&info
= nir_op_infos
[op
];
212 if (info
.output_type
!= nir_type_invalid
)
213 return isSignedType(info
.output_type
);
214 ERROR("isResultSigned not implemented for %s\n", nir_op_infos
[op
].name
);
221 Converter::getDType(nir_alu_instr
*insn
)
223 if (insn
->dest
.dest
.is_ssa
)
224 return getDType(insn
->op
, insn
->dest
.dest
.ssa
.bit_size
);
226 return getDType(insn
->op
, insn
->dest
.dest
.reg
.reg
->bit_size
);
230 Converter::getDType(nir_intrinsic_instr
*insn
)
232 if (insn
->dest
.is_ssa
)
233 return typeOfSize(insn
->dest
.ssa
.bit_size
/ 8, false, false);
235 return typeOfSize(insn
->dest
.reg
.reg
->bit_size
/ 8, false, false);
239 Converter::getDType(nir_op op
, uint8_t bitSize
)
241 DataType ty
= typeOfSize(bitSize
/ 8, isResultFloat(op
), isResultSigned(op
));
242 if (ty
== TYPE_NONE
) {
243 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos
[op
].name
, bitSize
);
249 std::vector
<DataType
>
250 Converter::getSTypes(nir_alu_instr
*insn
)
252 const nir_op_info
&info
= nir_op_infos
[insn
->op
];
253 std::vector
<DataType
> res(info
.num_inputs
);
255 for (uint8_t i
= 0; i
< info
.num_inputs
; ++i
) {
256 if (info
.input_types
[i
] != nir_type_invalid
) {
257 res
[i
] = getSType(insn
->src
[i
].src
, isFloatType(info
.input_types
[i
]), isSignedType(info
.input_types
[i
]));
259 ERROR("getSType not implemented for %s idx %u\n", info
.name
, i
);
270 Converter::getSType(nir_src
&src
, bool isFloat
, bool isSigned
)
274 bitSize
= src
.ssa
->bit_size
;
276 bitSize
= src
.reg
.reg
->bit_size
;
278 DataType ty
= typeOfSize(bitSize
/ 8, isFloat
, isSigned
);
279 if (ty
== TYPE_NONE
) {
287 ERROR("couldn't get Type for %s with bitSize %u\n", str
, bitSize
);
294 Converter::getOperation(nir_op op
)
297 // basic ops with float and int variants
307 case nir_op_ifind_msb
:
308 case nir_op_ufind_msb
:
330 case nir_op_fddx_coarse
:
331 case nir_op_fddx_fine
:
334 case nir_op_fddy_coarse
:
335 case nir_op_fddy_fine
:
353 case nir_op_pack_64_2x32_split
:
367 case nir_op_imul_high
:
368 case nir_op_umul_high
:
416 ERROR("couldn't get operation for op %s\n", nir_op_infos
[op
].name
);
423 Converter::preOperationNeeded(nir_op op
)
435 Converter::getSubOp(nir_op op
)
438 case nir_op_imul_high
:
439 case nir_op_umul_high
:
440 return NV50_IR_SUBOP_MUL_HIGH
;
447 Converter::getCondCode(nir_op op
)
466 ERROR("couldn't get CondCode for op %s\n", nir_op_infos
[op
].name
);
473 Converter::convert(nir_alu_dest
*dest
)
475 return convert(&dest
->dest
);
479 Converter::convert(nir_dest
*dest
)
482 return convert(&dest
->ssa
);
483 if (dest
->reg
.indirect
) {
484 ERROR("no support for indirects.");
487 return convert(dest
->reg
.reg
);
491 Converter::convert(nir_register
*reg
)
493 NirDefMap::iterator it
= regDefs
.find(reg
->index
);
494 if (it
!= regDefs
.end())
497 LValues
newDef(reg
->num_components
);
498 for (uint8_t i
= 0; i
< reg
->num_components
; i
++)
499 newDef
[i
] = getScratch(std::max(4, reg
->bit_size
/ 8));
500 return regDefs
[reg
->index
] = newDef
;
504 Converter::convert(nir_ssa_def
*def
)
506 NirDefMap::iterator it
= ssaDefs
.find(def
->index
);
507 if (it
!= ssaDefs
.end())
510 LValues
newDef(def
->num_components
);
511 for (uint8_t i
= 0; i
< def
->num_components
; i
++)
512 newDef
[i
] = getSSA(std::max(4, def
->bit_size
/ 8));
513 return ssaDefs
[def
->index
] = newDef
;
517 Converter::getSrc(nir_alu_src
*src
, uint8_t component
)
519 if (src
->abs
|| src
->negate
) {
520 ERROR("modifiers currently not supported on nir_alu_src\n");
523 return getSrc(&src
->src
, src
->swizzle
[component
]);
527 Converter::getSrc(nir_register
*reg
, uint8_t idx
)
529 NirDefMap::iterator it
= regDefs
.find(reg
->index
);
530 if (it
== regDefs
.end())
531 return convert(reg
)[idx
];
532 return it
->second
[idx
];
536 Converter::getSrc(nir_src
*src
, uint8_t idx
, bool indirect
)
539 return getSrc(src
->ssa
, idx
);
541 if (src
->reg
.indirect
) {
543 return getSrc(src
->reg
.indirect
, idx
);
544 ERROR("no support for indirects.");
549 return getSrc(src
->reg
.reg
, idx
);
553 Converter::getSrc(nir_ssa_def
*src
, uint8_t idx
)
555 NirDefMap::iterator it
= ssaDefs
.find(src
->index
);
556 if (it
== ssaDefs
.end()) {
557 ERROR("SSA value %u not found\n", src
->index
);
561 return it
->second
[idx
];
565 Converter::getIndirect(nir_src
*src
, uint8_t idx
, Value
*&indirect
)
567 nir_const_value
*offset
= nir_src_as_const_value(*src
);
571 return offset
->u32
[0];
574 indirect
= getSrc(src
, idx
, true);
579 Converter::getIndirect(nir_intrinsic_instr
*insn
, uint8_t s
, uint8_t c
, Value
*&indirect
)
581 int32_t idx
= nir_intrinsic_base(insn
) + getIndirect(&insn
->src
[s
], c
, indirect
);
583 indirect
= mkOp2v(OP_SHL
, TYPE_U32
, getSSA(4, FILE_ADDRESS
), indirect
, loadImm(NULL
, 4));
588 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot
, unsigned *name
, unsigned *index
)
590 assert(name
&& index
);
592 if (slot
>= VERT_ATTRIB_MAX
) {
593 ERROR("invalid varying slot %u\n", slot
);
598 if (slot
>= VERT_ATTRIB_GENERIC0
&&
599 slot
< VERT_ATTRIB_GENERIC0
+ VERT_ATTRIB_GENERIC_MAX
) {
600 *name
= TGSI_SEMANTIC_GENERIC
;
601 *index
= slot
- VERT_ATTRIB_GENERIC0
;
605 if (slot
>= VERT_ATTRIB_TEX0
&&
606 slot
< VERT_ATTRIB_TEX0
+ VERT_ATTRIB_TEX_MAX
) {
607 *name
= TGSI_SEMANTIC_TEXCOORD
;
608 *index
= slot
- VERT_ATTRIB_TEX0
;
613 case VERT_ATTRIB_COLOR0
:
614 *name
= TGSI_SEMANTIC_COLOR
;
617 case VERT_ATTRIB_COLOR1
:
618 *name
= TGSI_SEMANTIC_COLOR
;
621 case VERT_ATTRIB_EDGEFLAG
:
622 *name
= TGSI_SEMANTIC_EDGEFLAG
;
625 case VERT_ATTRIB_FOG
:
626 *name
= TGSI_SEMANTIC_FOG
;
629 case VERT_ATTRIB_NORMAL
:
630 *name
= TGSI_SEMANTIC_NORMAL
;
633 case VERT_ATTRIB_POS
:
634 *name
= TGSI_SEMANTIC_POSITION
;
637 case VERT_ATTRIB_POINT_SIZE
:
638 *name
= TGSI_SEMANTIC_PSIZE
;
642 ERROR("unknown vert attrib slot %u\n", slot
);
649 varying_slot_to_tgsi_semantic(gl_varying_slot slot
, unsigned *name
, unsigned *index
)
651 assert(name
&& index
);
653 if (slot
>= VARYING_SLOT_TESS_MAX
) {
654 ERROR("invalid varying slot %u\n", slot
);
659 if (slot
>= VARYING_SLOT_PATCH0
) {
660 *name
= TGSI_SEMANTIC_PATCH
;
661 *index
= slot
- VARYING_SLOT_PATCH0
;
665 if (slot
>= VARYING_SLOT_VAR0
) {
666 *name
= TGSI_SEMANTIC_GENERIC
;
667 *index
= slot
- VARYING_SLOT_VAR0
;
671 if (slot
>= VARYING_SLOT_TEX0
&& slot
<= VARYING_SLOT_TEX7
) {
672 *name
= TGSI_SEMANTIC_TEXCOORD
;
673 *index
= slot
- VARYING_SLOT_TEX0
;
678 case VARYING_SLOT_BFC0
:
679 *name
= TGSI_SEMANTIC_BCOLOR
;
682 case VARYING_SLOT_BFC1
:
683 *name
= TGSI_SEMANTIC_BCOLOR
;
686 case VARYING_SLOT_CLIP_DIST0
:
687 *name
= TGSI_SEMANTIC_CLIPDIST
;
690 case VARYING_SLOT_CLIP_DIST1
:
691 *name
= TGSI_SEMANTIC_CLIPDIST
;
694 case VARYING_SLOT_CLIP_VERTEX
:
695 *name
= TGSI_SEMANTIC_CLIPVERTEX
;
698 case VARYING_SLOT_COL0
:
699 *name
= TGSI_SEMANTIC_COLOR
;
702 case VARYING_SLOT_COL1
:
703 *name
= TGSI_SEMANTIC_COLOR
;
706 case VARYING_SLOT_EDGE
:
707 *name
= TGSI_SEMANTIC_EDGEFLAG
;
710 case VARYING_SLOT_FACE
:
711 *name
= TGSI_SEMANTIC_FACE
;
714 case VARYING_SLOT_FOGC
:
715 *name
= TGSI_SEMANTIC_FOG
;
718 case VARYING_SLOT_LAYER
:
719 *name
= TGSI_SEMANTIC_LAYER
;
722 case VARYING_SLOT_PNTC
:
723 *name
= TGSI_SEMANTIC_PCOORD
;
726 case VARYING_SLOT_POS
:
727 *name
= TGSI_SEMANTIC_POSITION
;
730 case VARYING_SLOT_PRIMITIVE_ID
:
731 *name
= TGSI_SEMANTIC_PRIMID
;
734 case VARYING_SLOT_PSIZ
:
735 *name
= TGSI_SEMANTIC_PSIZE
;
738 case VARYING_SLOT_TESS_LEVEL_INNER
:
739 *name
= TGSI_SEMANTIC_TESSINNER
;
742 case VARYING_SLOT_TESS_LEVEL_OUTER
:
743 *name
= TGSI_SEMANTIC_TESSOUTER
;
746 case VARYING_SLOT_VIEWPORT
:
747 *name
= TGSI_SEMANTIC_VIEWPORT_INDEX
;
751 ERROR("unknown varying slot %u\n", slot
);
758 frag_result_to_tgsi_semantic(unsigned slot
, unsigned *name
, unsigned *index
)
760 if (slot
>= FRAG_RESULT_DATA0
) {
761 *name
= TGSI_SEMANTIC_COLOR
;
762 *index
= slot
- FRAG_RESULT_COLOR
- 2; // intentional
767 case FRAG_RESULT_COLOR
:
768 *name
= TGSI_SEMANTIC_COLOR
;
771 case FRAG_RESULT_DEPTH
:
772 *name
= TGSI_SEMANTIC_POSITION
;
775 case FRAG_RESULT_SAMPLE_MASK
:
776 *name
= TGSI_SEMANTIC_SAMPLEMASK
;
780 ERROR("unknown frag result slot %u\n", slot
);
786 // copy of _mesa_sysval_to_semantic
788 system_val_to_tgsi_semantic(unsigned val
, unsigned *name
, unsigned *index
)
793 case SYSTEM_VALUE_VERTEX_ID
:
794 *name
= TGSI_SEMANTIC_VERTEXID
;
796 case SYSTEM_VALUE_INSTANCE_ID
:
797 *name
= TGSI_SEMANTIC_INSTANCEID
;
799 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE
:
800 *name
= TGSI_SEMANTIC_VERTEXID_NOBASE
;
802 case SYSTEM_VALUE_BASE_VERTEX
:
803 *name
= TGSI_SEMANTIC_BASEVERTEX
;
805 case SYSTEM_VALUE_BASE_INSTANCE
:
806 *name
= TGSI_SEMANTIC_BASEINSTANCE
;
808 case SYSTEM_VALUE_DRAW_ID
:
809 *name
= TGSI_SEMANTIC_DRAWID
;
813 case SYSTEM_VALUE_INVOCATION_ID
:
814 *name
= TGSI_SEMANTIC_INVOCATIONID
;
818 case SYSTEM_VALUE_FRAG_COORD
:
819 *name
= TGSI_SEMANTIC_POSITION
;
821 case SYSTEM_VALUE_FRONT_FACE
:
822 *name
= TGSI_SEMANTIC_FACE
;
824 case SYSTEM_VALUE_SAMPLE_ID
:
825 *name
= TGSI_SEMANTIC_SAMPLEID
;
827 case SYSTEM_VALUE_SAMPLE_POS
:
828 *name
= TGSI_SEMANTIC_SAMPLEPOS
;
830 case SYSTEM_VALUE_SAMPLE_MASK_IN
:
831 *name
= TGSI_SEMANTIC_SAMPLEMASK
;
833 case SYSTEM_VALUE_HELPER_INVOCATION
:
834 *name
= TGSI_SEMANTIC_HELPER_INVOCATION
;
837 // Tessellation shader
838 case SYSTEM_VALUE_TESS_COORD
:
839 *name
= TGSI_SEMANTIC_TESSCOORD
;
841 case SYSTEM_VALUE_VERTICES_IN
:
842 *name
= TGSI_SEMANTIC_VERTICESIN
;
844 case SYSTEM_VALUE_PRIMITIVE_ID
:
845 *name
= TGSI_SEMANTIC_PRIMID
;
847 case SYSTEM_VALUE_TESS_LEVEL_OUTER
:
848 *name
= TGSI_SEMANTIC_TESSOUTER
;
850 case SYSTEM_VALUE_TESS_LEVEL_INNER
:
851 *name
= TGSI_SEMANTIC_TESSINNER
;
855 case SYSTEM_VALUE_LOCAL_INVOCATION_ID
:
856 *name
= TGSI_SEMANTIC_THREAD_ID
;
858 case SYSTEM_VALUE_WORK_GROUP_ID
:
859 *name
= TGSI_SEMANTIC_BLOCK_ID
;
861 case SYSTEM_VALUE_NUM_WORK_GROUPS
:
862 *name
= TGSI_SEMANTIC_GRID_SIZE
;
864 case SYSTEM_VALUE_LOCAL_GROUP_SIZE
:
865 *name
= TGSI_SEMANTIC_BLOCK_SIZE
;
869 case SYSTEM_VALUE_SUBGROUP_SIZE
:
870 *name
= TGSI_SEMANTIC_SUBGROUP_SIZE
;
872 case SYSTEM_VALUE_SUBGROUP_INVOCATION
:
873 *name
= TGSI_SEMANTIC_SUBGROUP_INVOCATION
;
875 case SYSTEM_VALUE_SUBGROUP_EQ_MASK
:
876 *name
= TGSI_SEMANTIC_SUBGROUP_EQ_MASK
;
878 case SYSTEM_VALUE_SUBGROUP_GE_MASK
:
879 *name
= TGSI_SEMANTIC_SUBGROUP_GE_MASK
;
881 case SYSTEM_VALUE_SUBGROUP_GT_MASK
:
882 *name
= TGSI_SEMANTIC_SUBGROUP_GT_MASK
;
884 case SYSTEM_VALUE_SUBGROUP_LE_MASK
:
885 *name
= TGSI_SEMANTIC_SUBGROUP_LE_MASK
;
887 case SYSTEM_VALUE_SUBGROUP_LT_MASK
:
888 *name
= TGSI_SEMANTIC_SUBGROUP_LT_MASK
;
892 ERROR("unknown system value %u\n", val
);
899 Converter::setInterpolate(nv50_ir_varying
*var
,
905 case INTERP_MODE_FLAT
:
908 case INTERP_MODE_NONE
:
909 if (semantic
== TGSI_SEMANTIC_COLOR
)
911 else if (semantic
== TGSI_SEMANTIC_POSITION
)
914 case INTERP_MODE_NOPERSPECTIVE
:
917 case INTERP_MODE_SMOOTH
:
920 var
->centroid
= centroid
;
924 calcSlots(const glsl_type
*type
, Program::Type stage
, const shader_info
&info
,
925 bool input
, const nir_variable
*var
)
927 if (!type
->is_array())
928 return type
->count_attribute_slots(false);
932 case Program::TYPE_GEOMETRY
:
933 slots
= type
->uniform_locations();
935 slots
/= info
.gs
.vertices_in
;
937 case Program::TYPE_TESSELLATION_CONTROL
:
938 case Program::TYPE_TESSELLATION_EVAL
:
939 // remove first dimension
940 if (var
->data
.patch
|| (!input
&& stage
== Program::TYPE_TESSELLATION_EVAL
))
941 slots
= type
->uniform_locations();
943 slots
= type
->fields
.array
->uniform_locations();
946 slots
= type
->count_attribute_slots(false);
953 bool Converter::assignSlots() {
957 info
->io
.viewportId
= -1;
960 // we have to fixup the uniform locations for arrays
961 unsigned numImages
= 0;
962 nir_foreach_variable(var
, &nir
->uniforms
) {
963 const glsl_type
*type
= var
->type
;
964 if (!type
->without_array()->is_image())
966 var
->data
.driver_location
= numImages
;
967 numImages
+= type
->is_array() ? type
->arrays_of_arrays_size() : 1;
970 nir_foreach_variable(var
, &nir
->inputs
) {
971 const glsl_type
*type
= var
->type
;
972 int slot
= var
->data
.location
;
973 uint16_t slots
= calcSlots(type
, prog
->getType(), nir
->info
, true, var
);
974 uint32_t comp
= type
->is_array() ? type
->without_array()->component_slots()
975 : type
->component_slots();
976 uint32_t frac
= var
->data
.location_frac
;
977 uint32_t vary
= var
->data
.driver_location
;
979 if (glsl_base_type_is_64bit(type
->without_array()->base_type
)) {
984 assert(vary
+ slots
<= PIPE_MAX_SHADER_INPUTS
);
986 switch(prog
->getType()) {
987 case Program::TYPE_FRAGMENT
:
988 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
989 for (uint16_t i
= 0; i
< slots
; ++i
) {
990 setInterpolate(&info
->in
[vary
+ i
], var
->data
.interpolation
,
991 var
->data
.centroid
| var
->data
.sample
, name
);
994 case Program::TYPE_GEOMETRY
:
995 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
997 case Program::TYPE_TESSELLATION_CONTROL
:
998 case Program::TYPE_TESSELLATION_EVAL
:
999 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
1000 if (var
->data
.patch
&& name
== TGSI_SEMANTIC_PATCH
)
1001 info
->numPatchConstants
= MAX2(info
->numPatchConstants
, index
+ slots
);
1003 case Program::TYPE_VERTEX
:
1004 vert_attrib_to_tgsi_semantic((gl_vert_attrib
)slot
, &name
, &index
);
1006 case TGSI_SEMANTIC_EDGEFLAG
:
1007 info
->io
.edgeFlagIn
= vary
;
1014 ERROR("unknown shader type %u in assignSlots\n", prog
->getType());
1018 for (uint16_t i
= 0u; i
< slots
; ++i
, ++vary
) {
1019 info
->in
[vary
].id
= vary
;
1020 info
->in
[vary
].patch
= var
->data
.patch
;
1021 info
->in
[vary
].sn
= name
;
1022 info
->in
[vary
].si
= index
+ i
;
1023 if (glsl_base_type_is_64bit(type
->without_array()->base_type
))
1025 info
->in
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) >> 0x4);
1027 info
->in
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) & 0xf);
1029 info
->in
[vary
].mask
|= ((1 << comp
) - 1) << frac
;
1031 info
->numInputs
= std::max
<uint8_t>(info
->numInputs
, vary
);
1034 info
->numOutputs
= 0;
1035 nir_foreach_variable(var
, &nir
->outputs
) {
1036 const glsl_type
*type
= var
->type
;
1037 int slot
= var
->data
.location
;
1038 uint16_t slots
= calcSlots(type
, prog
->getType(), nir
->info
, false, var
);
1039 uint32_t comp
= type
->is_array() ? type
->without_array()->component_slots()
1040 : type
->component_slots();
1041 uint32_t frac
= var
->data
.location_frac
;
1042 uint32_t vary
= var
->data
.driver_location
;
1044 if (glsl_base_type_is_64bit(type
->without_array()->base_type
)) {
1049 assert(vary
< PIPE_MAX_SHADER_OUTPUTS
);
1051 switch(prog
->getType()) {
1052 case Program::TYPE_FRAGMENT
:
1053 frag_result_to_tgsi_semantic((gl_frag_result
)slot
, &name
, &index
);
1055 case TGSI_SEMANTIC_COLOR
:
1056 if (!var
->data
.fb_fetch_output
)
1057 info
->prop
.fp
.numColourResults
++;
1058 info
->prop
.fp
.separateFragData
= true;
1059 // sometimes we get FRAG_RESULT_DATAX with data.index 0
1060 // sometimes we get FRAG_RESULT_DATA0 with data.index X
1061 index
= index
== 0 ? var
->data
.index
: index
;
1063 case TGSI_SEMANTIC_POSITION
:
1064 info
->io
.fragDepth
= vary
;
1065 info
->prop
.fp
.writesDepth
= true;
1067 case TGSI_SEMANTIC_SAMPLEMASK
:
1068 info
->io
.sampleMask
= vary
;
1074 case Program::TYPE_GEOMETRY
:
1075 case Program::TYPE_TESSELLATION_CONTROL
:
1076 case Program::TYPE_TESSELLATION_EVAL
:
1077 case Program::TYPE_VERTEX
:
1078 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
1080 if (var
->data
.patch
&& name
!= TGSI_SEMANTIC_TESSINNER
&&
1081 name
!= TGSI_SEMANTIC_TESSOUTER
)
1082 info
->numPatchConstants
= MAX2(info
->numPatchConstants
, index
+ slots
);
1085 case TGSI_SEMANTIC_CLIPDIST
:
1086 info
->io
.genUserClip
= -1;
1088 case TGSI_SEMANTIC_CLIPVERTEX
:
1089 clipVertexOutput
= vary
;
1091 case TGSI_SEMANTIC_EDGEFLAG
:
1092 info
->io
.edgeFlagOut
= vary
;
1094 case TGSI_SEMANTIC_POSITION
:
1095 if (clipVertexOutput
< 0)
1096 clipVertexOutput
= vary
;
1103 ERROR("unknown shader type %u in assignSlots\n", prog
->getType());
1107 for (uint16_t i
= 0u; i
< slots
; ++i
, ++vary
) {
1108 info
->out
[vary
].id
= vary
;
1109 info
->out
[vary
].patch
= var
->data
.patch
;
1110 info
->out
[vary
].sn
= name
;
1111 info
->out
[vary
].si
= index
+ i
;
1112 if (glsl_base_type_is_64bit(type
->without_array()->base_type
))
1114 info
->out
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) >> 0x4);
1116 info
->out
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) & 0xf);
1118 info
->out
[vary
].mask
|= ((1 << comp
) - 1) << frac
;
1120 if (nir
->info
.outputs_read
& 1ll << slot
)
1121 info
->out
[vary
].oread
= 1;
1123 info
->numOutputs
= std::max
<uint8_t>(info
->numOutputs
, vary
);
1126 info
->numSysVals
= 0;
1127 for (uint8_t i
= 0; i
< 64; ++i
) {
1128 if (!(nir
->info
.system_values_read
& 1ll << i
))
1131 system_val_to_tgsi_semantic(i
, &name
, &index
);
1132 info
->sv
[info
->numSysVals
].sn
= name
;
1133 info
->sv
[info
->numSysVals
].si
= index
;
1134 info
->sv
[info
->numSysVals
].input
= 0; // TODO inferSysValDirection(sn);
1137 case SYSTEM_VALUE_INSTANCE_ID
:
1138 info
->io
.instanceId
= info
->numSysVals
;
1140 case SYSTEM_VALUE_TESS_LEVEL_INNER
:
1141 case SYSTEM_VALUE_TESS_LEVEL_OUTER
:
1142 info
->sv
[info
->numSysVals
].patch
= 1;
1144 case SYSTEM_VALUE_VERTEX_ID
:
1145 info
->io
.vertexId
= info
->numSysVals
;
1151 info
->numSysVals
+= 1;
1154 if (info
->io
.genUserClip
> 0) {
1155 info
->io
.clipDistances
= info
->io
.genUserClip
;
1157 const unsigned int nOut
= (info
->io
.genUserClip
+ 3) / 4;
1159 for (unsigned int n
= 0; n
< nOut
; ++n
) {
1160 unsigned int i
= info
->numOutputs
++;
1161 info
->out
[i
].id
= i
;
1162 info
->out
[i
].sn
= TGSI_SEMANTIC_CLIPDIST
;
1163 info
->out
[i
].si
= n
;
1164 info
->out
[i
].mask
= ((1 << info
->io
.clipDistances
) - 1) >> (n
* 4);
1168 return info
->assignSlots(info
) == 0;
1172 Converter::getSlotAddress(nir_intrinsic_instr
*insn
, uint8_t idx
, uint8_t slot
)
1175 int offset
= nir_intrinsic_component(insn
);
1178 if (nir_intrinsic_infos
[insn
->intrinsic
].has_dest
)
1179 ty
= getDType(insn
);
1181 ty
= getSType(insn
->src
[0], false, false);
1183 switch (insn
->intrinsic
) {
1184 case nir_intrinsic_load_input
:
1185 case nir_intrinsic_load_interpolated_input
:
1186 case nir_intrinsic_load_per_vertex_input
:
1189 case nir_intrinsic_load_output
:
1190 case nir_intrinsic_load_per_vertex_output
:
1191 case nir_intrinsic_store_output
:
1192 case nir_intrinsic_store_per_vertex_output
:
1196 ERROR("unknown intrinsic in getSlotAddress %s",
1197 nir_intrinsic_infos
[insn
->intrinsic
].name
);
1203 if (typeSizeof(ty
) == 8) {
1215 assert(!input
|| idx
< PIPE_MAX_SHADER_INPUTS
);
1216 assert(input
|| idx
< PIPE_MAX_SHADER_OUTPUTS
);
1218 const nv50_ir_varying
*vary
= input
? info
->in
: info
->out
;
1219 return vary
[idx
].slot
[slot
] * 4;
1223 Converter::loadFrom(DataFile file
, uint8_t i
, DataType ty
, Value
*def
,
1224 uint32_t base
, uint8_t c
, Value
*indirect0
,
1225 Value
*indirect1
, bool patch
)
1227 unsigned int tySize
= typeSizeof(ty
);
1230 (file
== FILE_MEMORY_CONST
|| file
== FILE_MEMORY_BUFFER
|| indirect0
)) {
1231 Value
*lo
= getSSA();
1232 Value
*hi
= getSSA();
1235 mkLoad(TYPE_U32
, lo
,
1236 mkSymbol(file
, i
, TYPE_U32
, base
+ c
* tySize
),
1238 loi
->setIndirect(0, 1, indirect1
);
1239 loi
->perPatch
= patch
;
1242 mkLoad(TYPE_U32
, hi
,
1243 mkSymbol(file
, i
, TYPE_U32
, base
+ c
* tySize
+ 4),
1245 hii
->setIndirect(0, 1, indirect1
);
1246 hii
->perPatch
= patch
;
1248 return mkOp2(OP_MERGE
, ty
, def
, lo
, hi
);
1251 mkLoad(ty
, def
, mkSymbol(file
, i
, ty
, base
+ c
* tySize
), indirect0
);
1252 ld
->setIndirect(0, 1, indirect1
);
1253 ld
->perPatch
= patch
;
1259 Converter::storeTo(nir_intrinsic_instr
*insn
, DataFile file
, operation op
,
1260 DataType ty
, Value
*src
, uint8_t idx
, uint8_t c
,
1261 Value
*indirect0
, Value
*indirect1
)
1263 uint8_t size
= typeSizeof(ty
);
1264 uint32_t address
= getSlotAddress(insn
, idx
, c
);
1266 if (size
== 8 && indirect0
) {
1268 mkSplit(split
, 4, src
);
1270 if (op
== OP_EXPORT
) {
1271 split
[0] = mkMov(getSSA(), split
[0], ty
)->getDef(0);
1272 split
[1] = mkMov(getSSA(), split
[1], ty
)->getDef(0);
1275 mkStore(op
, TYPE_U32
, mkSymbol(file
, 0, TYPE_U32
, address
), indirect0
,
1276 split
[0])->perPatch
= info
->out
[idx
].patch
;
1277 mkStore(op
, TYPE_U32
, mkSymbol(file
, 0, TYPE_U32
, address
+ 4), indirect0
,
1278 split
[1])->perPatch
= info
->out
[idx
].patch
;
1280 if (op
== OP_EXPORT
)
1281 src
= mkMov(getSSA(size
), src
, ty
)->getDef(0);
1282 mkStore(op
, ty
, mkSymbol(file
, 0, ty
, address
), indirect0
,
1283 src
)->perPatch
= info
->out
[idx
].patch
;
1288 Converter::parseNIR()
1290 info
->io
.clipDistances
= nir
->info
.clip_distance_array_size
;
1291 info
->io
.cullDistances
= nir
->info
.cull_distance_array_size
;
1293 switch(prog
->getType()) {
1294 case Program::TYPE_COMPUTE
:
1295 info
->prop
.cp
.numThreads
[0] = nir
->info
.cs
.local_size
[0];
1296 info
->prop
.cp
.numThreads
[1] = nir
->info
.cs
.local_size
[1];
1297 info
->prop
.cp
.numThreads
[2] = nir
->info
.cs
.local_size
[2];
1298 info
->bin
.smemSize
= nir
->info
.cs
.shared_size
;
1300 case Program::TYPE_FRAGMENT
:
1301 info
->prop
.fp
.earlyFragTests
= nir
->info
.fs
.early_fragment_tests
;
1302 info
->prop
.fp
.persampleInvocation
=
1303 (nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_ID
) ||
1304 (nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_POS
);
1305 info
->prop
.fp
.postDepthCoverage
= nir
->info
.fs
.post_depth_coverage
;
1306 info
->prop
.fp
.readsSampleLocations
=
1307 (nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_POS
);
1308 info
->prop
.fp
.usesDiscard
= nir
->info
.fs
.uses_discard
;
1309 info
->prop
.fp
.usesSampleMaskIn
=
1310 !!(nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_MASK_IN
);
1312 case Program::TYPE_GEOMETRY
:
1313 info
->prop
.gp
.inputPrim
= nir
->info
.gs
.input_primitive
;
1314 info
->prop
.gp
.instanceCount
= nir
->info
.gs
.invocations
;
1315 info
->prop
.gp
.maxVertices
= nir
->info
.gs
.vertices_out
;
1316 info
->prop
.gp
.outputPrim
= nir
->info
.gs
.output_primitive
;
1318 case Program::TYPE_TESSELLATION_CONTROL
:
1319 case Program::TYPE_TESSELLATION_EVAL
:
1320 if (nir
->info
.tess
.primitive_mode
== GL_ISOLINES
)
1321 info
->prop
.tp
.domain
= GL_LINES
;
1323 info
->prop
.tp
.domain
= nir
->info
.tess
.primitive_mode
;
1324 info
->prop
.tp
.outputPatchSize
= nir
->info
.tess
.tcs_vertices_out
;
1325 info
->prop
.tp
.outputPrim
=
1326 nir
->info
.tess
.point_mode
? PIPE_PRIM_POINTS
: PIPE_PRIM_TRIANGLES
;
1327 info
->prop
.tp
.partitioning
= (nir
->info
.tess
.spacing
+ 1) % 3;
1328 info
->prop
.tp
.winding
= !nir
->info
.tess
.ccw
;
1330 case Program::TYPE_VERTEX
:
1331 info
->prop
.vp
.usesDrawParameters
=
1332 (nir
->info
.system_values_read
& BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX
)) ||
1333 (nir
->info
.system_values_read
& BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE
)) ||
1334 (nir
->info
.system_values_read
& BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID
));
1344 Converter::visit(nir_function
*function
)
1346 // we only support emiting the main function for now
1347 assert(!strcmp(function
->name
, "main"));
1348 assert(function
->impl
);
1350 // usually the blocks will set everything up, but main is special
1351 BasicBlock
*entry
= new BasicBlock(prog
->main
);
1352 exit
= new BasicBlock(prog
->main
);
1353 blocks
[nir_start_block(function
->impl
)->index
] = entry
;
1354 prog
->main
->setEntry(entry
);
1355 prog
->main
->setExit(exit
);
1357 setPosition(entry
, true);
1359 if (info
->io
.genUserClip
> 0) {
1360 for (int c
= 0; c
< 4; ++c
)
1361 clipVtx
[c
] = getScratch();
1364 switch (prog
->getType()) {
1365 case Program::TYPE_TESSELLATION_CONTROL
:
1367 OP_SUB
, TYPE_U32
, getSSA(),
1368 mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_LANEID
, 0)),
1369 mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_INVOCATION_ID
, 0)));
1371 case Program::TYPE_FRAGMENT
: {
1372 Symbol
*sv
= mkSysVal(SV_POSITION
, 3);
1373 fragCoord
[3] = mkOp1v(OP_RDSV
, TYPE_F32
, getSSA(), sv
);
1374 fp
.position
= mkOp1v(OP_RCP
, TYPE_F32
, fragCoord
[3], fragCoord
[3]);
1381 nir_index_ssa_defs(function
->impl
);
1382 foreach_list_typed(nir_cf_node
, node
, node
, &function
->impl
->body
) {
1387 bb
->cfg
.attach(&exit
->cfg
, Graph::Edge::TREE
);
1388 setPosition(exit
, true);
1390 if (info
->io
.genUserClip
> 0)
1391 handleUserClipPlanes();
1393 // TODO: for non main function this needs to be a OP_RETURN
1394 mkOp(OP_EXIT
, TYPE_NONE
, NULL
)->terminator
= 1;
1399 Converter::visit(nir_cf_node
*node
)
1401 switch (node
->type
) {
1402 case nir_cf_node_block
:
1403 return visit(nir_cf_node_as_block(node
));
1404 case nir_cf_node_if
:
1405 return visit(nir_cf_node_as_if(node
));
1406 case nir_cf_node_loop
:
1407 return visit(nir_cf_node_as_loop(node
));
1409 ERROR("unknown nir_cf_node type %u\n", node
->type
);
1415 Converter::visit(nir_block
*block
)
1417 if (!block
->predecessors
->entries
&& block
->instr_list
.is_empty())
1420 BasicBlock
*bb
= convert(block
);
1422 setPosition(bb
, true);
1423 nir_foreach_instr(insn
, block
) {
1431 Converter::visit(nir_if
*nif
)
1433 DataType sType
= getSType(nif
->condition
, false, false);
1434 Value
*src
= getSrc(&nif
->condition
, 0);
1436 nir_block
*lastThen
= nir_if_last_then_block(nif
);
1437 nir_block
*lastElse
= nir_if_last_else_block(nif
);
1439 assert(!lastThen
->successors
[1]);
1440 assert(!lastElse
->successors
[1]);
1442 BasicBlock
*ifBB
= convert(nir_if_first_then_block(nif
));
1443 BasicBlock
*elseBB
= convert(nir_if_first_else_block(nif
));
1445 bb
->cfg
.attach(&ifBB
->cfg
, Graph::Edge::TREE
);
1446 bb
->cfg
.attach(&elseBB
->cfg
, Graph::Edge::TREE
);
1448 // we only insert joinats, if both nodes end up at the end of the if again.
1449 // the reason for this to not happens are breaks/continues/ret/... which
1450 // have their own handling
1451 if (lastThen
->successors
[0] == lastElse
->successors
[0])
1452 bb
->joinAt
= mkFlow(OP_JOINAT
, convert(lastThen
->successors
[0]),
1455 mkFlow(OP_BRA
, elseBB
, CC_EQ
, src
)->setType(sType
);
1457 foreach_list_typed(nir_cf_node
, node
, node
, &nif
->then_list
) {
1461 setPosition(convert(lastThen
), true);
1462 if (!bb
->getExit() ||
1463 !bb
->getExit()->asFlow() ||
1464 bb
->getExit()->asFlow()->op
== OP_JOIN
) {
1465 BasicBlock
*tailBB
= convert(lastThen
->successors
[0]);
1466 mkFlow(OP_BRA
, tailBB
, CC_ALWAYS
, NULL
);
1467 bb
->cfg
.attach(&tailBB
->cfg
, Graph::Edge::FORWARD
);
1470 foreach_list_typed(nir_cf_node
, node
, node
, &nif
->else_list
) {
1474 setPosition(convert(lastElse
), true);
1475 if (!bb
->getExit() ||
1476 !bb
->getExit()->asFlow() ||
1477 bb
->getExit()->asFlow()->op
== OP_JOIN
) {
1478 BasicBlock
*tailBB
= convert(lastElse
->successors
[0]);
1479 mkFlow(OP_BRA
, tailBB
, CC_ALWAYS
, NULL
);
1480 bb
->cfg
.attach(&tailBB
->cfg
, Graph::Edge::FORWARD
);
1483 if (lastThen
->successors
[0] == lastElse
->successors
[0]) {
1484 setPosition(convert(lastThen
->successors
[0]), true);
1485 mkFlow(OP_JOIN
, NULL
, CC_ALWAYS
, NULL
)->fixed
= 1;
1492 Converter::visit(nir_loop
*loop
)
1495 func
->loopNestingBound
= std::max(func
->loopNestingBound
, curLoopDepth
);
1497 BasicBlock
*loopBB
= convert(nir_loop_first_block(loop
));
1498 BasicBlock
*tailBB
=
1499 convert(nir_cf_node_as_block(nir_cf_node_next(&loop
->cf_node
)));
1500 bb
->cfg
.attach(&loopBB
->cfg
, Graph::Edge::TREE
);
1502 mkFlow(OP_PREBREAK
, tailBB
, CC_ALWAYS
, NULL
);
1503 setPosition(loopBB
, false);
1504 mkFlow(OP_PRECONT
, loopBB
, CC_ALWAYS
, NULL
);
1506 foreach_list_typed(nir_cf_node
, node
, node
, &loop
->body
) {
1510 Instruction
*insn
= bb
->getExit();
1511 if (bb
->cfg
.incidentCount() != 0) {
1512 if (!insn
|| !insn
->asFlow()) {
1513 mkFlow(OP_CONT
, loopBB
, CC_ALWAYS
, NULL
);
1514 bb
->cfg
.attach(&loopBB
->cfg
, Graph::Edge::BACK
);
1515 } else if (insn
&& insn
->op
== OP_BRA
&& !insn
->getPredicate() &&
1516 tailBB
->cfg
.incidentCount() == 0) {
1517 // RA doesn't like having blocks around with no incident edge,
1518 // so we create a fake one to make it happy
1519 bb
->cfg
.attach(&tailBB
->cfg
, Graph::Edge::TREE
);
1529 Converter::visit(nir_instr
*insn
)
1531 switch (insn
->type
) {
1532 case nir_instr_type_alu
:
1533 return visit(nir_instr_as_alu(insn
));
1534 case nir_instr_type_intrinsic
:
1535 return visit(nir_instr_as_intrinsic(insn
));
1536 case nir_instr_type_jump
:
1537 return visit(nir_instr_as_jump(insn
));
1538 case nir_instr_type_load_const
:
1539 return visit(nir_instr_as_load_const(insn
));
1541 ERROR("unknown nir_instr type %u\n", insn
->type
);
1548 Converter::visit(nir_intrinsic_instr
*insn
)
1550 nir_intrinsic_op op
= insn
->intrinsic
;
1553 case nir_intrinsic_load_uniform
: {
1554 LValues
&newDefs
= convert(&insn
->dest
);
1555 const DataType dType
= getDType(insn
);
1557 uint32_t coffset
= getIndirect(insn
, 0, 0, indirect
);
1558 for (uint8_t i
= 0; i
< insn
->num_components
; ++i
) {
1559 loadFrom(FILE_MEMORY_CONST
, 0, dType
, newDefs
[i
], 16 * coffset
, i
, indirect
);
1563 case nir_intrinsic_store_output
:
1564 case nir_intrinsic_store_per_vertex_output
: {
1566 DataType dType
= getSType(insn
->src
[0], false, false);
1567 uint32_t idx
= getIndirect(insn
, op
== nir_intrinsic_store_output
? 1 : 2, 0, indirect
);
1569 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
1570 if (!((1u << i
) & nir_intrinsic_write_mask(insn
)))
1574 Value
*src
= getSrc(&insn
->src
[0], i
);
1575 switch (prog
->getType()) {
1576 case Program::TYPE_FRAGMENT
: {
1577 if (info
->out
[idx
].sn
== TGSI_SEMANTIC_POSITION
) {
1578 // TGSI uses a different interface than NIR, TGSI stores that
1579 // value in the z component, NIR in X
1581 src
= mkOp1v(OP_SAT
, TYPE_F32
, getScratch(), src
);
1585 case Program::TYPE_VERTEX
: {
1586 if (info
->io
.genUserClip
> 0 && idx
== clipVertexOutput
) {
1587 mkMov(clipVtx
[i
], src
);
1596 storeTo(insn
, FILE_SHADER_OUTPUT
, OP_EXPORT
, dType
, src
, idx
, i
+ offset
, indirect
);
1600 case nir_intrinsic_load_input
:
1601 case nir_intrinsic_load_interpolated_input
:
1602 case nir_intrinsic_load_output
: {
1603 LValues
&newDefs
= convert(&insn
->dest
);
1606 if (prog
->getType() == Program::TYPE_FRAGMENT
&&
1607 op
== nir_intrinsic_load_output
) {
1608 std::vector
<Value
*> defs
, srcs
;
1611 srcs
.push_back(getSSA());
1612 srcs
.push_back(getSSA());
1613 Value
*x
= mkOp1v(OP_RDSV
, TYPE_F32
, getSSA(), mkSysVal(SV_POSITION
, 0));
1614 Value
*y
= mkOp1v(OP_RDSV
, TYPE_F32
, getSSA(), mkSysVal(SV_POSITION
, 1));
1615 mkCvt(OP_CVT
, TYPE_U32
, srcs
[0], TYPE_F32
, x
)->rnd
= ROUND_Z
;
1616 mkCvt(OP_CVT
, TYPE_U32
, srcs
[1], TYPE_F32
, y
)->rnd
= ROUND_Z
;
1618 srcs
.push_back(mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_LAYER
, 0)));
1619 srcs
.push_back(mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_SAMPLE_INDEX
, 0)));
1621 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
1622 defs
.push_back(newDefs
[i
]);
1626 TexInstruction
*texi
= mkTex(OP_TXF
, TEX_TARGET_2D_MS_ARRAY
, 0, 0, defs
, srcs
);
1627 texi
->tex
.levelZero
= 1;
1628 texi
->tex
.mask
= mask
;
1629 texi
->tex
.useOffsets
= 0;
1630 texi
->tex
.r
= 0xffff;
1631 texi
->tex
.s
= 0xffff;
1633 info
->prop
.fp
.readsFramebuffer
= true;
1637 const DataType dType
= getDType(insn
);
1639 bool input
= op
!= nir_intrinsic_load_output
;
1643 uint32_t idx
= getIndirect(insn
, op
== nir_intrinsic_load_interpolated_input
? 1 : 0, 0, indirect
);
1644 nv50_ir_varying
& vary
= input
? info
->in
[idx
] : info
->out
[idx
];
1646 // see load_barycentric_* handling
1647 if (prog
->getType() == Program::TYPE_FRAGMENT
) {
1648 mode
= translateInterpMode(&vary
, nvirOp
);
1649 if (op
== nir_intrinsic_load_interpolated_input
) {
1650 ImmediateValue immMode
;
1651 if (getSrc(&insn
->src
[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode
))
1652 mode
|= immMode
.reg
.data
.u32
;
1656 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
1657 uint32_t address
= getSlotAddress(insn
, idx
, i
);
1658 Symbol
*sym
= mkSymbol(input
? FILE_SHADER_INPUT
: FILE_SHADER_OUTPUT
, 0, dType
, address
);
1659 if (prog
->getType() == Program::TYPE_FRAGMENT
) {
1661 if (typeSizeof(dType
) == 8) {
1662 Value
*lo
= getSSA();
1663 Value
*hi
= getSSA();
1664 Instruction
*interp
;
1666 interp
= mkOp1(nvirOp
, TYPE_U32
, lo
, sym
);
1667 if (nvirOp
== OP_PINTERP
)
1668 interp
->setSrc(s
++, fp
.position
);
1669 if (mode
& NV50_IR_INTERP_OFFSET
)
1670 interp
->setSrc(s
++, getSrc(&insn
->src
[0], 0));
1671 interp
->setInterpolate(mode
);
1672 interp
->setIndirect(0, 0, indirect
);
1674 Symbol
*sym1
= mkSymbol(input
? FILE_SHADER_INPUT
: FILE_SHADER_OUTPUT
, 0, dType
, address
+ 4);
1675 interp
= mkOp1(nvirOp
, TYPE_U32
, hi
, sym1
);
1676 if (nvirOp
== OP_PINTERP
)
1677 interp
->setSrc(s
++, fp
.position
);
1678 if (mode
& NV50_IR_INTERP_OFFSET
)
1679 interp
->setSrc(s
++, getSrc(&insn
->src
[0], 0));
1680 interp
->setInterpolate(mode
);
1681 interp
->setIndirect(0, 0, indirect
);
1683 mkOp2(OP_MERGE
, dType
, newDefs
[i
], lo
, hi
);
1685 Instruction
*interp
= mkOp1(nvirOp
, dType
, newDefs
[i
], sym
);
1686 if (nvirOp
== OP_PINTERP
)
1687 interp
->setSrc(s
++, fp
.position
);
1688 if (mode
& NV50_IR_INTERP_OFFSET
)
1689 interp
->setSrc(s
++, getSrc(&insn
->src
[0], 0));
1690 interp
->setInterpolate(mode
);
1691 interp
->setIndirect(0, 0, indirect
);
1694 mkLoad(dType
, newDefs
[i
], sym
, indirect
)->perPatch
= vary
.patch
;
1699 case nir_intrinsic_load_barycentric_at_offset
:
1700 case nir_intrinsic_load_barycentric_at_sample
:
1701 case nir_intrinsic_load_barycentric_centroid
:
1702 case nir_intrinsic_load_barycentric_pixel
:
1703 case nir_intrinsic_load_barycentric_sample
: {
1704 LValues
&newDefs
= convert(&insn
->dest
);
1707 if (op
== nir_intrinsic_load_barycentric_centroid
||
1708 op
== nir_intrinsic_load_barycentric_sample
) {
1709 mode
= NV50_IR_INTERP_CENTROID
;
1710 } else if (op
== nir_intrinsic_load_barycentric_at_offset
) {
1712 for (uint8_t c
= 0; c
< 2; c
++) {
1713 offs
[c
] = getScratch();
1714 mkOp2(OP_MIN
, TYPE_F32
, offs
[c
], getSrc(&insn
->src
[0], c
), loadImm(NULL
, 0.4375f
));
1715 mkOp2(OP_MAX
, TYPE_F32
, offs
[c
], offs
[c
], loadImm(NULL
, -0.5f
));
1716 mkOp2(OP_MUL
, TYPE_F32
, offs
[c
], offs
[c
], loadImm(NULL
, 4096.0f
));
1717 mkCvt(OP_CVT
, TYPE_S32
, offs
[c
], TYPE_F32
, offs
[c
]);
1719 mkOp3v(OP_INSBF
, TYPE_U32
, newDefs
[0], offs
[1], mkImm(0x1010), offs
[0]);
1721 mode
= NV50_IR_INTERP_OFFSET
;
1722 } else if (op
== nir_intrinsic_load_barycentric_pixel
) {
1723 mode
= NV50_IR_INTERP_DEFAULT
;
1724 } else if (op
== nir_intrinsic_load_barycentric_at_sample
) {
1725 info
->prop
.fp
.readsSampleLocations
= true;
1726 mkOp1(OP_PIXLD
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0], 0))->subOp
= NV50_IR_SUBOP_PIXLD_OFFSET
;
1727 mode
= NV50_IR_INTERP_OFFSET
;
1729 unreachable("all intrinsics already handled above");
1732 loadImm(newDefs
[1], mode
);
1736 ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos
[op
].name
);
1744 Converter::visit(nir_jump_instr
*insn
)
1746 switch (insn
->type
) {
1747 case nir_jump_return
:
1748 // TODO: this only works in the main function
1749 mkFlow(OP_BRA
, exit
, CC_ALWAYS
, NULL
);
1750 bb
->cfg
.attach(&exit
->cfg
, Graph::Edge::CROSS
);
1752 case nir_jump_break
:
1753 case nir_jump_continue
: {
1754 bool isBreak
= insn
->type
== nir_jump_break
;
1755 nir_block
*block
= insn
->instr
.block
;
1756 assert(!block
->successors
[1]);
1757 BasicBlock
*target
= convert(block
->successors
[0]);
1758 mkFlow(isBreak
? OP_BREAK
: OP_CONT
, target
, CC_ALWAYS
, NULL
);
1759 bb
->cfg
.attach(&target
->cfg
, isBreak
? Graph::Edge::CROSS
: Graph::Edge::BACK
);
1763 ERROR("unknown nir_jump_type %u\n", insn
->type
);
1771 Converter::visit(nir_load_const_instr
*insn
)
1773 assert(insn
->def
.bit_size
<= 64);
1775 LValues
&newDefs
= convert(&insn
->def
);
1776 for (int i
= 0; i
< insn
->def
.num_components
; i
++) {
1777 switch (insn
->def
.bit_size
) {
1779 loadImm(newDefs
[i
], insn
->value
.u64
[i
]);
1782 loadImm(newDefs
[i
], insn
->value
.u32
[i
]);
1785 loadImm(newDefs
[i
], insn
->value
.u16
[i
]);
1788 loadImm(newDefs
[i
], insn
->value
.u8
[i
]);
1795 #define DEFAULT_CHECKS \
1796 if (insn->dest.dest.ssa.num_components > 1) { \
1797 ERROR("nir_alu_instr only supported with 1 component!\n"); \
1800 if (insn->dest.write_mask != 1) { \
1801 ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
1805 Converter::visit(nir_alu_instr
*insn
)
1807 const nir_op op
= insn
->op
;
1808 const nir_op_info
&info
= nir_op_infos
[op
];
1809 DataType dType
= getDType(insn
);
1810 const std::vector
<DataType
> sTypes
= getSTypes(insn
);
1812 Instruction
*oldPos
= this->bb
->getExit();
1824 case nir_op_fddx_coarse
:
1825 case nir_op_fddx_fine
:
1827 case nir_op_fddy_coarse
:
1828 case nir_op_fddy_fine
:
1847 case nir_op_imul_high
:
1848 case nir_op_umul_high
:
1855 case nir_op_pack_64_2x32_split
:
1873 LValues
&newDefs
= convert(&insn
->dest
);
1874 operation preOp
= preOperationNeeded(op
);
1875 if (preOp
!= OP_NOP
) {
1876 assert(info
.num_inputs
< 2);
1877 Value
*tmp
= getSSA(typeSizeof(dType
));
1878 Instruction
*i0
= mkOp(preOp
, dType
, tmp
);
1879 Instruction
*i1
= mkOp(getOperation(op
), dType
, newDefs
[0]);
1880 if (info
.num_inputs
) {
1881 i0
->setSrc(0, getSrc(&insn
->src
[0]));
1884 i1
->subOp
= getSubOp(op
);
1886 Instruction
*i
= mkOp(getOperation(op
), dType
, newDefs
[0]);
1887 for (unsigned s
= 0u; s
< info
.num_inputs
; ++s
) {
1888 i
->setSrc(s
, getSrc(&insn
->src
[s
]));
1890 i
->subOp
= getSubOp(op
);
1894 case nir_op_ifind_msb
:
1895 case nir_op_ufind_msb
: {
1897 LValues
&newDefs
= convert(&insn
->dest
);
1899 mkOp1(getOperation(op
), dType
, newDefs
[0], getSrc(&insn
->src
[0]));
1902 case nir_op_fround_even
: {
1904 LValues
&newDefs
= convert(&insn
->dest
);
1905 mkCvt(OP_CVT
, dType
, newDefs
[0], dType
, getSrc(&insn
->src
[0]))->rnd
= ROUND_NI
;
1908 // convert instructions
1922 case nir_op_u2u64
: {
1924 LValues
&newDefs
= convert(&insn
->dest
);
1925 Instruction
*i
= mkOp1(getOperation(op
), dType
, newDefs
[0], getSrc(&insn
->src
[0]));
1926 if (op
== nir_op_f2i32
|| op
== nir_op_f2i64
|| op
== nir_op_f2u32
|| op
== nir_op_f2u64
)
1928 i
->sType
= sTypes
[0];
1931 // compare instructions
1941 case nir_op_ine32
: {
1943 LValues
&newDefs
= convert(&insn
->dest
);
1944 Instruction
*i
= mkCmp(getOperation(op
),
1949 getSrc(&insn
->src
[0]),
1950 getSrc(&insn
->src
[1]));
1951 if (info
.num_inputs
== 3)
1952 i
->setSrc(2, getSrc(&insn
->src
[2]));
1953 i
->sType
= sTypes
[0];
1956 // those are weird ALU ops and need special handling, because
1957 // 1. they are always componend based
1958 // 2. they basically just merge multiple values into one data type
1964 LValues
&newDefs
= convert(&insn
->dest
);
1965 for (LValues::size_type c
= 0u; c
< newDefs
.size(); ++c
) {
1966 mkMov(newDefs
[c
], getSrc(&insn
->src
[c
]), dType
);
1971 case nir_op_pack_64_2x32
: {
1972 LValues
&newDefs
= convert(&insn
->dest
);
1973 Instruction
*merge
= mkOp(OP_MERGE
, dType
, newDefs
[0]);
1974 merge
->setSrc(0, getSrc(&insn
->src
[0], 0));
1975 merge
->setSrc(1, getSrc(&insn
->src
[0], 1));
1978 case nir_op_pack_half_2x16_split
: {
1979 LValues
&newDefs
= convert(&insn
->dest
);
1980 Value
*tmpH
= getSSA();
1981 Value
*tmpL
= getSSA();
1983 mkCvt(OP_CVT
, TYPE_F16
, tmpL
, TYPE_F32
, getSrc(&insn
->src
[0]));
1984 mkCvt(OP_CVT
, TYPE_F16
, tmpH
, TYPE_F32
, getSrc(&insn
->src
[1]));
1985 mkOp3(OP_INSBF
, TYPE_U32
, newDefs
[0], tmpH
, mkImm(0x1010), tmpL
);
1988 case nir_op_unpack_half_2x16_split_x
:
1989 case nir_op_unpack_half_2x16_split_y
: {
1990 LValues
&newDefs
= convert(&insn
->dest
);
1991 Instruction
*cvt
= mkCvt(OP_CVT
, TYPE_F32
, newDefs
[0], TYPE_F16
, getSrc(&insn
->src
[0]));
1992 if (op
== nir_op_unpack_half_2x16_split_y
)
1996 case nir_op_unpack_64_2x32
: {
1997 LValues
&newDefs
= convert(&insn
->dest
);
1998 mkOp1(OP_SPLIT
, dType
, newDefs
[0], getSrc(&insn
->src
[0]))->setDef(1, newDefs
[1]);
2001 case nir_op_unpack_64_2x32_split_x
: {
2002 LValues
&newDefs
= convert(&insn
->dest
);
2003 mkOp1(OP_SPLIT
, dType
, newDefs
[0], getSrc(&insn
->src
[0]))->setDef(1, getSSA());
2006 case nir_op_unpack_64_2x32_split_y
: {
2007 LValues
&newDefs
= convert(&insn
->dest
);
2008 mkOp1(OP_SPLIT
, dType
, getSSA(), getSrc(&insn
->src
[0]))->setDef(1, newDefs
[0]);
2011 // special instructions
2013 case nir_op_isign
: {
2016 if (::isFloatType(dType
))
2021 LValues
&newDefs
= convert(&insn
->dest
);
2022 LValue
*val0
= getScratch();
2023 LValue
*val1
= getScratch();
2024 mkCmp(OP_SET
, CC_GT
, iType
, val0
, dType
, getSrc(&insn
->src
[0]), zero
);
2025 mkCmp(OP_SET
, CC_LT
, iType
, val1
, dType
, getSrc(&insn
->src
[0]), zero
);
2027 if (dType
== TYPE_F64
) {
2028 mkOp2(OP_SUB
, iType
, val0
, val0
, val1
);
2029 mkCvt(OP_CVT
, TYPE_F64
, newDefs
[0], iType
, val0
);
2030 } else if (dType
== TYPE_S64
|| dType
== TYPE_U64
) {
2031 mkOp2(OP_SUB
, iType
, val0
, val1
, val0
);
2032 mkOp2(OP_SHR
, iType
, val1
, val0
, loadImm(NULL
, 31));
2033 mkOp2(OP_MERGE
, dType
, newDefs
[0], val0
, val1
);
2034 } else if (::isFloatType(dType
))
2035 mkOp2(OP_SUB
, iType
, newDefs
[0], val0
, val1
);
2037 mkOp2(OP_SUB
, iType
, newDefs
[0], val1
, val0
);
2041 case nir_op_b32csel
: {
2043 LValues
&newDefs
= convert(&insn
->dest
);
2044 mkCmp(OP_SLCT
, CC_NE
, dType
, newDefs
[0], sTypes
[0], getSrc(&insn
->src
[1]), getSrc(&insn
->src
[2]), getSrc(&insn
->src
[0]));
2047 case nir_op_ibitfield_extract
:
2048 case nir_op_ubitfield_extract
: {
2050 Value
*tmp
= getSSA();
2051 LValues
&newDefs
= convert(&insn
->dest
);
2052 mkOp3(OP_INSBF
, dType
, tmp
, getSrc(&insn
->src
[2]), loadImm(NULL
, 0x808), getSrc(&insn
->src
[1]));
2053 mkOp2(OP_EXTBF
, dType
, newDefs
[0], getSrc(&insn
->src
[0]), tmp
);
2058 LValues
&newDefs
= convert(&insn
->dest
);
2059 mkOp3(OP_INSBF
, dType
, newDefs
[0], getSrc(&insn
->src
[0]), loadImm(NULL
, 0x808), getSrc(&insn
->src
[1]));
2062 case nir_op_bitfield_insert
: {
2064 LValues
&newDefs
= convert(&insn
->dest
);
2065 LValue
*temp
= getSSA();
2066 mkOp3(OP_INSBF
, TYPE_U32
, temp
, getSrc(&insn
->src
[3]), mkImm(0x808), getSrc(&insn
->src
[2]));
2067 mkOp3(OP_INSBF
, dType
, newDefs
[0], getSrc(&insn
->src
[1]), temp
, getSrc(&insn
->src
[0]));
2070 case nir_op_bit_count
: {
2072 LValues
&newDefs
= convert(&insn
->dest
);
2073 mkOp2(OP_POPCNT
, dType
, newDefs
[0], getSrc(&insn
->src
[0]), getSrc(&insn
->src
[0]));
2076 case nir_op_bitfield_reverse
: {
2078 LValues
&newDefs
= convert(&insn
->dest
);
2079 mkOp2(OP_EXTBF
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), mkImm(0x2000))->subOp
= NV50_IR_SUBOP_EXTBF_REV
;
2082 case nir_op_find_lsb
: {
2084 LValues
&newDefs
= convert(&insn
->dest
);
2085 Value
*tmp
= getSSA();
2086 mkOp2(OP_EXTBF
, TYPE_U32
, tmp
, getSrc(&insn
->src
[0]), mkImm(0x2000))->subOp
= NV50_IR_SUBOP_EXTBF_REV
;
2087 mkOp1(OP_BFIND
, TYPE_U32
, newDefs
[0], tmp
)->subOp
= NV50_IR_SUBOP_BFIND_SAMT
;
2090 // boolean conversions
2091 case nir_op_b2f32
: {
2093 LValues
&newDefs
= convert(&insn
->dest
);
2094 mkOp2(OP_AND
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), loadImm(NULL
, 1.0f
));
2097 case nir_op_b2f64
: {
2099 LValues
&newDefs
= convert(&insn
->dest
);
2100 Value
*tmp
= getSSA(4);
2101 mkOp2(OP_AND
, TYPE_U32
, tmp
, getSrc(&insn
->src
[0]), loadImm(NULL
, 0x3ff00000));
2102 mkOp2(OP_MERGE
, TYPE_U64
, newDefs
[0], loadImm(NULL
, 0), tmp
);
2106 case nir_op_i2b32
: {
2108 LValues
&newDefs
= convert(&insn
->dest
);
2110 if (typeSizeof(sTypes
[0]) == 8) {
2111 src1
= loadImm(getSSA(8), 0.0);
2115 CondCode cc
= op
== nir_op_f2b32
? CC_NEU
: CC_NE
;
2116 mkCmp(OP_SET
, cc
, TYPE_U32
, newDefs
[0], sTypes
[0], getSrc(&insn
->src
[0]), src1
);
2119 case nir_op_b2i32
: {
2121 LValues
&newDefs
= convert(&insn
->dest
);
2122 mkOp2(OP_AND
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), loadImm(NULL
, 1));
2125 case nir_op_b2i64
: {
2127 LValues
&newDefs
= convert(&insn
->dest
);
2128 LValue
*def
= getScratch();
2129 mkOp2(OP_AND
, TYPE_U32
, def
, getSrc(&insn
->src
[0]), loadImm(NULL
, 1));
2130 mkOp2(OP_MERGE
, TYPE_S64
, newDefs
[0], def
, loadImm(NULL
, 0));
2134 ERROR("unknown nir_op %s\n", info
.name
);
2139 oldPos
= this->bb
->getEntry();
2140 oldPos
->precise
= insn
->exact
;
2143 if (unlikely(!oldPos
))
2146 while (oldPos
->next
) {
2147 oldPos
= oldPos
->next
;
2148 oldPos
->precise
= insn
->exact
;
2150 oldPos
->saturate
= insn
->dest
.saturate
;
2154 #undef DEFAULT_CHECKS
2161 if (prog
->dbgFlags
& NV50_IR_DEBUG_VERBOSE
)
2162 nir_print_shader(nir
, stderr
);
2164 NIR_PASS_V(nir
, nir_lower_io
, nir_var_all
, type_size
, (nir_lower_io_options
)0);
2165 NIR_PASS_V(nir
, nir_lower_regs_to_ssa
);
2166 NIR_PASS_V(nir
, nir_lower_load_const_to_scalar
);
2167 NIR_PASS_V(nir
, nir_lower_vars_to_ssa
);
2168 NIR_PASS_V(nir
, nir_lower_alu_to_scalar
);
2169 NIR_PASS_V(nir
, nir_lower_phis_to_scalar
);
2173 NIR_PASS(progress
, nir
, nir_copy_prop
);
2174 NIR_PASS(progress
, nir
, nir_opt_remove_phis
);
2175 NIR_PASS(progress
, nir
, nir_opt_trivial_continues
);
2176 NIR_PASS(progress
, nir
, nir_opt_cse
);
2177 NIR_PASS(progress
, nir
, nir_opt_algebraic
);
2178 NIR_PASS(progress
, nir
, nir_opt_constant_folding
);
2179 NIR_PASS(progress
, nir
, nir_copy_prop
);
2180 NIR_PASS(progress
, nir
, nir_opt_dce
);
2181 NIR_PASS(progress
, nir
, nir_opt_dead_cf
);
2184 NIR_PASS_V(nir
, nir_lower_bool_to_int32
);
2185 NIR_PASS_V(nir
, nir_lower_locals_to_regs
);
2186 NIR_PASS_V(nir
, nir_remove_dead_variables
, nir_var_function_temp
);
2187 NIR_PASS_V(nir
, nir_convert_from_ssa
, true);
2189 // Garbage collect dead instructions
2193 ERROR("Couldn't prase NIR!\n");
2197 if (!assignSlots()) {
2198 ERROR("Couldn't assign slots!\n");
2202 if (prog
->dbgFlags
& NV50_IR_DEBUG_BASIC
)
2203 nir_print_shader(nir
, stderr
);
2205 nir_foreach_function(function
, nir
) {
2206 if (!visit(function
))
2213 } // unnamed namespace
2218 Program::makeFromNIR(struct nv50_ir_prog_info
*info
)
2220 nir_shader
*nir
= (nir_shader
*)info
->bin
.source
;
2221 Converter
converter(this, nir
, info
);
2222 bool result
= converter
.run();
2225 LoweringHelper lowering
;
2227 tlsSize
= info
->bin
.tlsSpace
;
2231 } // namespace nv50_ir