2 * Copyright 2017 Red Hat Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Karol Herbst <kherbst@redhat.com>
25 #include "compiler/nir/nir.h"
27 #include "util/u_debug.h"
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_from_common.h"
31 #include "codegen/nv50_ir_lowering_helper.h"
32 #include "codegen/nv50_ir_util.h"
33 #include "tgsi/tgsi_from_mesa.h"
35 #if __cplusplus >= 201103L
36 #include <unordered_map>
38 #include <tr1/unordered_map>
46 #if __cplusplus >= 201103L
48 using std::unordered_map
;
51 using std::tr1::unordered_map
;
54 using namespace nv50_ir
;
57 type_size(const struct glsl_type
*type
, bool bindless
)
59 return glsl_count_attribute_slots(type
, false);
63 function_temp_type_info(const struct glsl_type
*type
, unsigned *size
, unsigned *align
)
65 assert(glsl_type_is_vector_or_scalar(type
));
67 unsigned comp_size
= glsl_type_is_boolean(type
) ? 4 : glsl_get_bit_size(type
) / 8;
68 unsigned length
= glsl_get_vector_elements(type
);
70 *size
= comp_size
* length
;
74 class Converter
: public ConverterCommon
77 Converter(Program
*, nir_shader
*, nv50_ir_prog_info
*, nv50_ir_prog_info_out
*);
81 typedef std::vector
<LValue
*> LValues
;
82 typedef unordered_map
<unsigned, LValues
> NirDefMap
;
83 typedef unordered_map
<unsigned, nir_load_const_instr
*> ImmediateMap
;
84 typedef unordered_map
<unsigned, BasicBlock
*> NirBlockMap
;
86 CacheMode
convert(enum gl_access_qualifier
);
87 TexTarget
convert(glsl_sampler_dim
, bool isArray
, bool isShadow
);
88 LValues
& convert(nir_alu_dest
*);
89 BasicBlock
* convert(nir_block
*);
90 LValues
& convert(nir_dest
*);
91 SVSemantic
convert(nir_intrinsic_op
);
92 Value
* convert(nir_load_const_instr
*, uint8_t);
93 LValues
& convert(nir_register
*);
94 LValues
& convert(nir_ssa_def
*);
96 Value
* getSrc(nir_alu_src
*, uint8_t component
= 0);
97 Value
* getSrc(nir_register
*, uint8_t);
98 Value
* getSrc(nir_src
*, uint8_t, bool indirect
= false);
99 Value
* getSrc(nir_ssa_def
*, uint8_t);
101 // returned value is the constant part of the given source (either the
102 // nir_src or the selected source component of an intrinsic). Even though
103 // this is mostly an optimization to be able to skip indirects in a few
104 // cases, sometimes we require immediate values or set some fileds on
105 // instructions (e.g. tex) in order for codegen to consume those.
106 // If the found value has not a constant part, the Value gets returned
107 // through the Value parameter.
108 uint32_t getIndirect(nir_src
*, uint8_t, Value
*&);
109 // isScalar indicates that the addressing is scalar, vec4 addressing is
111 uint32_t getIndirect(nir_intrinsic_instr
*, uint8_t s
, uint8_t c
, Value
*&,
112 bool isScalar
= false);
114 uint32_t getSlotAddress(nir_intrinsic_instr
*, uint8_t idx
, uint8_t slot
);
116 void setInterpolate(nv50_ir_varying
*,
121 Instruction
*loadFrom(DataFile
, uint8_t, DataType
, Value
*def
, uint32_t base
,
122 uint8_t c
, Value
*indirect0
= NULL
,
123 Value
*indirect1
= NULL
, bool patch
= false);
124 void storeTo(nir_intrinsic_instr
*, DataFile
, operation
, DataType
,
125 Value
*src
, uint8_t idx
, uint8_t c
, Value
*indirect0
= NULL
,
126 Value
*indirect1
= NULL
);
128 bool isFloatType(nir_alu_type
);
129 bool isSignedType(nir_alu_type
);
130 bool isResultFloat(nir_op
);
131 bool isResultSigned(nir_op
);
133 DataType
getDType(nir_alu_instr
*);
134 DataType
getDType(nir_intrinsic_instr
*);
135 DataType
getDType(nir_intrinsic_instr
*, bool isSigned
);
136 DataType
getDType(nir_op
, uint8_t);
138 DataFile
getFile(nir_intrinsic_op
);
140 std::vector
<DataType
> getSTypes(nir_alu_instr
*);
141 DataType
getSType(nir_src
&, bool isFloat
, bool isSigned
);
143 operation
getOperation(nir_intrinsic_op
);
144 operation
getOperation(nir_op
);
145 operation
getOperation(nir_texop
);
146 operation
preOperationNeeded(nir_op
);
148 int getSubOp(nir_intrinsic_op
);
149 int getSubOp(nir_op
);
151 CondCode
getCondCode(nir_op
);
156 bool visit(nir_alu_instr
*);
157 bool visit(nir_block
*);
158 bool visit(nir_cf_node
*);
159 bool visit(nir_function
*);
160 bool visit(nir_if
*);
161 bool visit(nir_instr
*);
162 bool visit(nir_intrinsic_instr
*);
163 bool visit(nir_jump_instr
*);
164 bool visit(nir_load_const_instr
*);
165 bool visit(nir_loop
*);
166 bool visit(nir_ssa_undef_instr
*);
167 bool visit(nir_tex_instr
*);
170 Value
* applyProjection(Value
*src
, Value
*proj
);
171 unsigned int getNIRArgCount(TexInstruction::Target
&);
177 ImmediateMap immediates
;
179 unsigned int curLoopDepth
;
180 unsigned int curIfDepth
;
184 Instruction
*immInsertPos
;
186 int clipVertexOutput
;
195 Converter::Converter(Program
*prog
, nir_shader
*nir
, nv50_ir_prog_info
*info
,
196 nv50_ir_prog_info_out
*info_out
)
197 : ConverterCommon(prog
, info
, info_out
),
203 zero
= mkImm((uint32_t)0);
207 Converter::convert(nir_block
*block
)
209 NirBlockMap::iterator it
= blocks
.find(block
->index
);
210 if (it
!= blocks
.end())
213 BasicBlock
*bb
= new BasicBlock(func
);
214 blocks
[block
->index
] = bb
;
219 Converter::isFloatType(nir_alu_type type
)
221 return nir_alu_type_get_base_type(type
) == nir_type_float
;
225 Converter::isSignedType(nir_alu_type type
)
227 return nir_alu_type_get_base_type(type
) == nir_type_int
;
231 Converter::isResultFloat(nir_op op
)
233 const nir_op_info
&info
= nir_op_infos
[op
];
234 if (info
.output_type
!= nir_type_invalid
)
235 return isFloatType(info
.output_type
);
237 ERROR("isResultFloat not implemented for %s\n", nir_op_infos
[op
].name
);
243 Converter::isResultSigned(nir_op op
)
246 // there is no umul and we get wrong results if we treat all muls as signed
251 const nir_op_info
&info
= nir_op_infos
[op
];
252 if (info
.output_type
!= nir_type_invalid
)
253 return isSignedType(info
.output_type
);
254 ERROR("isResultSigned not implemented for %s\n", nir_op_infos
[op
].name
);
261 Converter::getDType(nir_alu_instr
*insn
)
263 if (insn
->dest
.dest
.is_ssa
)
264 return getDType(insn
->op
, insn
->dest
.dest
.ssa
.bit_size
);
266 return getDType(insn
->op
, insn
->dest
.dest
.reg
.reg
->bit_size
);
270 Converter::getDType(nir_intrinsic_instr
*insn
)
273 switch (insn
->intrinsic
) {
274 case nir_intrinsic_shared_atomic_imax
:
275 case nir_intrinsic_shared_atomic_imin
:
276 case nir_intrinsic_ssbo_atomic_imax
:
277 case nir_intrinsic_ssbo_atomic_imin
:
285 return getDType(insn
, isSigned
);
289 Converter::getDType(nir_intrinsic_instr
*insn
, bool isSigned
)
291 if (insn
->dest
.is_ssa
)
292 return typeOfSize(insn
->dest
.ssa
.bit_size
/ 8, false, isSigned
);
294 return typeOfSize(insn
->dest
.reg
.reg
->bit_size
/ 8, false, isSigned
);
298 Converter::getDType(nir_op op
, uint8_t bitSize
)
300 DataType ty
= typeOfSize(bitSize
/ 8, isResultFloat(op
), isResultSigned(op
));
301 if (ty
== TYPE_NONE
) {
302 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos
[op
].name
, bitSize
);
308 std::vector
<DataType
>
309 Converter::getSTypes(nir_alu_instr
*insn
)
311 const nir_op_info
&info
= nir_op_infos
[insn
->op
];
312 std::vector
<DataType
> res(info
.num_inputs
);
314 for (uint8_t i
= 0; i
< info
.num_inputs
; ++i
) {
315 if (info
.input_types
[i
] != nir_type_invalid
) {
316 res
[i
] = getSType(insn
->src
[i
].src
, isFloatType(info
.input_types
[i
]), isSignedType(info
.input_types
[i
]));
318 ERROR("getSType not implemented for %s idx %u\n", info
.name
, i
);
329 Converter::getSType(nir_src
&src
, bool isFloat
, bool isSigned
)
333 bitSize
= src
.ssa
->bit_size
;
335 bitSize
= src
.reg
.reg
->bit_size
;
337 DataType ty
= typeOfSize(bitSize
/ 8, isFloat
, isSigned
);
338 if (ty
== TYPE_NONE
) {
346 ERROR("couldn't get Type for %s with bitSize %u\n", str
, bitSize
);
353 Converter::getFile(nir_intrinsic_op op
)
356 case nir_intrinsic_load_global
:
357 case nir_intrinsic_store_global
:
358 case nir_intrinsic_load_global_constant
:
359 return FILE_MEMORY_GLOBAL
;
360 case nir_intrinsic_load_scratch
:
361 case nir_intrinsic_store_scratch
:
362 return FILE_MEMORY_LOCAL
;
363 case nir_intrinsic_load_shared
:
364 case nir_intrinsic_store_shared
:
365 return FILE_MEMORY_SHARED
;
366 case nir_intrinsic_load_kernel_input
:
367 return FILE_SHADER_INPUT
;
369 ERROR("couldn't get DateFile for op %s\n", nir_intrinsic_infos
[op
].name
);
376 Converter::getOperation(nir_op op
)
379 // basic ops with float and int variants
388 case nir_op_ifind_msb
:
389 case nir_op_ufind_msb
:
411 case nir_op_fddx_coarse
:
412 case nir_op_fddx_fine
:
415 case nir_op_fddy_coarse
:
416 case nir_op_fddy_fine
:
434 case nir_op_pack_64_2x32_split
:
448 case nir_op_imul_high
:
449 case nir_op_umul_high
:
491 ERROR("couldn't get operation for op %s\n", nir_op_infos
[op
].name
);
498 Converter::getOperation(nir_texop op
)
510 case nir_texop_txf_ms
:
516 case nir_texop_query_levels
:
517 case nir_texop_texture_samples
:
521 ERROR("couldn't get operation for nir_texop %u\n", op
);
528 Converter::getOperation(nir_intrinsic_op op
)
531 case nir_intrinsic_emit_vertex
:
533 case nir_intrinsic_end_primitive
:
535 case nir_intrinsic_bindless_image_atomic_add
:
536 case nir_intrinsic_image_atomic_add
:
537 case nir_intrinsic_bindless_image_atomic_and
:
538 case nir_intrinsic_image_atomic_and
:
539 case nir_intrinsic_bindless_image_atomic_comp_swap
:
540 case nir_intrinsic_image_atomic_comp_swap
:
541 case nir_intrinsic_bindless_image_atomic_exchange
:
542 case nir_intrinsic_image_atomic_exchange
:
543 case nir_intrinsic_bindless_image_atomic_imax
:
544 case nir_intrinsic_image_atomic_imax
:
545 case nir_intrinsic_bindless_image_atomic_umax
:
546 case nir_intrinsic_image_atomic_umax
:
547 case nir_intrinsic_bindless_image_atomic_imin
:
548 case nir_intrinsic_image_atomic_imin
:
549 case nir_intrinsic_bindless_image_atomic_umin
:
550 case nir_intrinsic_image_atomic_umin
:
551 case nir_intrinsic_bindless_image_atomic_or
:
552 case nir_intrinsic_image_atomic_or
:
553 case nir_intrinsic_bindless_image_atomic_xor
:
554 case nir_intrinsic_image_atomic_xor
:
555 case nir_intrinsic_bindless_image_atomic_inc_wrap
:
556 case nir_intrinsic_image_atomic_inc_wrap
:
557 case nir_intrinsic_bindless_image_atomic_dec_wrap
:
558 case nir_intrinsic_image_atomic_dec_wrap
:
560 case nir_intrinsic_bindless_image_load
:
561 case nir_intrinsic_image_load
:
563 case nir_intrinsic_bindless_image_samples
:
564 case nir_intrinsic_image_samples
:
565 case nir_intrinsic_bindless_image_size
:
566 case nir_intrinsic_image_size
:
568 case nir_intrinsic_bindless_image_store
:
569 case nir_intrinsic_image_store
:
572 ERROR("couldn't get operation for nir_intrinsic_op %u\n", op
);
579 Converter::preOperationNeeded(nir_op op
)
591 Converter::getSubOp(nir_op op
)
594 case nir_op_imul_high
:
595 case nir_op_umul_high
:
596 return NV50_IR_SUBOP_MUL_HIGH
;
600 return NV50_IR_SUBOP_SHIFT_WRAP
;
607 Converter::getSubOp(nir_intrinsic_op op
)
610 case nir_intrinsic_bindless_image_atomic_add
:
611 case nir_intrinsic_global_atomic_add
:
612 case nir_intrinsic_image_atomic_add
:
613 case nir_intrinsic_shared_atomic_add
:
614 case nir_intrinsic_ssbo_atomic_add
:
615 return NV50_IR_SUBOP_ATOM_ADD
;
616 case nir_intrinsic_bindless_image_atomic_and
:
617 case nir_intrinsic_global_atomic_and
:
618 case nir_intrinsic_image_atomic_and
:
619 case nir_intrinsic_shared_atomic_and
:
620 case nir_intrinsic_ssbo_atomic_and
:
621 return NV50_IR_SUBOP_ATOM_AND
;
622 case nir_intrinsic_bindless_image_atomic_comp_swap
:
623 case nir_intrinsic_global_atomic_comp_swap
:
624 case nir_intrinsic_image_atomic_comp_swap
:
625 case nir_intrinsic_shared_atomic_comp_swap
:
626 case nir_intrinsic_ssbo_atomic_comp_swap
:
627 return NV50_IR_SUBOP_ATOM_CAS
;
628 case nir_intrinsic_bindless_image_atomic_exchange
:
629 case nir_intrinsic_global_atomic_exchange
:
630 case nir_intrinsic_image_atomic_exchange
:
631 case nir_intrinsic_shared_atomic_exchange
:
632 case nir_intrinsic_ssbo_atomic_exchange
:
633 return NV50_IR_SUBOP_ATOM_EXCH
;
634 case nir_intrinsic_bindless_image_atomic_or
:
635 case nir_intrinsic_global_atomic_or
:
636 case nir_intrinsic_image_atomic_or
:
637 case nir_intrinsic_shared_atomic_or
:
638 case nir_intrinsic_ssbo_atomic_or
:
639 return NV50_IR_SUBOP_ATOM_OR
;
640 case nir_intrinsic_bindless_image_atomic_imax
:
641 case nir_intrinsic_bindless_image_atomic_umax
:
642 case nir_intrinsic_global_atomic_imax
:
643 case nir_intrinsic_global_atomic_umax
:
644 case nir_intrinsic_image_atomic_imax
:
645 case nir_intrinsic_image_atomic_umax
:
646 case nir_intrinsic_shared_atomic_imax
:
647 case nir_intrinsic_shared_atomic_umax
:
648 case nir_intrinsic_ssbo_atomic_imax
:
649 case nir_intrinsic_ssbo_atomic_umax
:
650 return NV50_IR_SUBOP_ATOM_MAX
;
651 case nir_intrinsic_bindless_image_atomic_imin
:
652 case nir_intrinsic_bindless_image_atomic_umin
:
653 case nir_intrinsic_global_atomic_imin
:
654 case nir_intrinsic_global_atomic_umin
:
655 case nir_intrinsic_image_atomic_imin
:
656 case nir_intrinsic_image_atomic_umin
:
657 case nir_intrinsic_shared_atomic_imin
:
658 case nir_intrinsic_shared_atomic_umin
:
659 case nir_intrinsic_ssbo_atomic_imin
:
660 case nir_intrinsic_ssbo_atomic_umin
:
661 return NV50_IR_SUBOP_ATOM_MIN
;
662 case nir_intrinsic_bindless_image_atomic_xor
:
663 case nir_intrinsic_global_atomic_xor
:
664 case nir_intrinsic_image_atomic_xor
:
665 case nir_intrinsic_shared_atomic_xor
:
666 case nir_intrinsic_ssbo_atomic_xor
:
667 return NV50_IR_SUBOP_ATOM_XOR
;
668 case nir_intrinsic_bindless_image_atomic_inc_wrap
:
669 case nir_intrinsic_image_atomic_inc_wrap
:
670 return NV50_IR_SUBOP_ATOM_INC
;
671 case nir_intrinsic_bindless_image_atomic_dec_wrap
:
672 case nir_intrinsic_image_atomic_dec_wrap
:
673 return NV50_IR_SUBOP_ATOM_DEC
;
675 case nir_intrinsic_group_memory_barrier
:
676 case nir_intrinsic_memory_barrier
:
677 case nir_intrinsic_memory_barrier_buffer
:
678 case nir_intrinsic_memory_barrier_image
:
679 return NV50_IR_SUBOP_MEMBAR(M
, GL
);
680 case nir_intrinsic_memory_barrier_shared
:
681 return NV50_IR_SUBOP_MEMBAR(M
, CTA
);
683 case nir_intrinsic_vote_all
:
684 return NV50_IR_SUBOP_VOTE_ALL
;
685 case nir_intrinsic_vote_any
:
686 return NV50_IR_SUBOP_VOTE_ANY
;
687 case nir_intrinsic_vote_ieq
:
688 return NV50_IR_SUBOP_VOTE_UNI
;
695 Converter::getCondCode(nir_op op
)
714 ERROR("couldn't get CondCode for op %s\n", nir_op_infos
[op
].name
);
721 Converter::convert(nir_alu_dest
*dest
)
723 return convert(&dest
->dest
);
727 Converter::convert(nir_dest
*dest
)
730 return convert(&dest
->ssa
);
731 if (dest
->reg
.indirect
) {
732 ERROR("no support for indirects.");
735 return convert(dest
->reg
.reg
);
739 Converter::convert(nir_register
*reg
)
741 assert(!reg
->num_array_elems
);
743 NirDefMap::iterator it
= regDefs
.find(reg
->index
);
744 if (it
!= regDefs
.end())
747 LValues
newDef(reg
->num_components
);
748 for (uint8_t i
= 0; i
< reg
->num_components
; i
++)
749 newDef
[i
] = getScratch(std::max(4, reg
->bit_size
/ 8));
750 return regDefs
[reg
->index
] = newDef
;
754 Converter::convert(nir_ssa_def
*def
)
756 NirDefMap::iterator it
= ssaDefs
.find(def
->index
);
757 if (it
!= ssaDefs
.end())
760 LValues
newDef(def
->num_components
);
761 for (uint8_t i
= 0; i
< def
->num_components
; i
++)
762 newDef
[i
] = getSSA(std::max(4, def
->bit_size
/ 8));
763 return ssaDefs
[def
->index
] = newDef
;
767 Converter::getSrc(nir_alu_src
*src
, uint8_t component
)
769 if (src
->abs
|| src
->negate
) {
770 ERROR("modifiers currently not supported on nir_alu_src\n");
773 return getSrc(&src
->src
, src
->swizzle
[component
]);
777 Converter::getSrc(nir_register
*reg
, uint8_t idx
)
779 NirDefMap::iterator it
= regDefs
.find(reg
->index
);
780 if (it
== regDefs
.end())
781 return convert(reg
)[idx
];
782 return it
->second
[idx
];
786 Converter::getSrc(nir_src
*src
, uint8_t idx
, bool indirect
)
789 return getSrc(src
->ssa
, idx
);
791 if (src
->reg
.indirect
) {
793 return getSrc(src
->reg
.indirect
, idx
);
794 ERROR("no support for indirects.");
799 return getSrc(src
->reg
.reg
, idx
);
803 Converter::getSrc(nir_ssa_def
*src
, uint8_t idx
)
805 ImmediateMap::iterator iit
= immediates
.find(src
->index
);
806 if (iit
!= immediates
.end())
807 return convert((*iit
).second
, idx
);
809 NirDefMap::iterator it
= ssaDefs
.find(src
->index
);
810 if (it
== ssaDefs
.end()) {
811 ERROR("SSA value %u not found\n", src
->index
);
815 return it
->second
[idx
];
819 Converter::getIndirect(nir_src
*src
, uint8_t idx
, Value
*&indirect
)
821 nir_const_value
*offset
= nir_src_as_const_value(*src
);
825 return offset
[0].u32
;
828 indirect
= getSrc(src
, idx
, true);
833 Converter::getIndirect(nir_intrinsic_instr
*insn
, uint8_t s
, uint8_t c
, Value
*&indirect
, bool isScalar
)
835 int32_t idx
= nir_intrinsic_base(insn
) + getIndirect(&insn
->src
[s
], c
, indirect
);
836 if (indirect
&& !isScalar
)
837 indirect
= mkOp2v(OP_SHL
, TYPE_U32
, getSSA(4, FILE_ADDRESS
), indirect
, loadImm(NULL
, 4));
842 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot
, unsigned *name
, unsigned *index
)
844 assert(name
&& index
);
846 if (slot
>= VERT_ATTRIB_MAX
) {
847 ERROR("invalid varying slot %u\n", slot
);
852 if (slot
>= VERT_ATTRIB_GENERIC0
&&
853 slot
< VERT_ATTRIB_GENERIC0
+ VERT_ATTRIB_GENERIC_MAX
) {
854 *name
= TGSI_SEMANTIC_GENERIC
;
855 *index
= slot
- VERT_ATTRIB_GENERIC0
;
859 if (slot
>= VERT_ATTRIB_TEX0
&&
860 slot
< VERT_ATTRIB_TEX0
+ VERT_ATTRIB_TEX_MAX
) {
861 *name
= TGSI_SEMANTIC_TEXCOORD
;
862 *index
= slot
- VERT_ATTRIB_TEX0
;
867 case VERT_ATTRIB_COLOR0
:
868 *name
= TGSI_SEMANTIC_COLOR
;
871 case VERT_ATTRIB_COLOR1
:
872 *name
= TGSI_SEMANTIC_COLOR
;
875 case VERT_ATTRIB_EDGEFLAG
:
876 *name
= TGSI_SEMANTIC_EDGEFLAG
;
879 case VERT_ATTRIB_FOG
:
880 *name
= TGSI_SEMANTIC_FOG
;
883 case VERT_ATTRIB_NORMAL
:
884 *name
= TGSI_SEMANTIC_NORMAL
;
887 case VERT_ATTRIB_POS
:
888 *name
= TGSI_SEMANTIC_POSITION
;
891 case VERT_ATTRIB_POINT_SIZE
:
892 *name
= TGSI_SEMANTIC_PSIZE
;
896 ERROR("unknown vert attrib slot %u\n", slot
);
903 Converter::setInterpolate(nv50_ir_varying
*var
,
909 case INTERP_MODE_FLAT
:
912 case INTERP_MODE_NONE
:
913 if (semantic
== TGSI_SEMANTIC_COLOR
)
915 else if (semantic
== TGSI_SEMANTIC_POSITION
)
918 case INTERP_MODE_NOPERSPECTIVE
:
921 case INTERP_MODE_SMOOTH
:
924 var
->centroid
= centroid
;
928 calcSlots(const glsl_type
*type
, Program::Type stage
, const shader_info
&info
,
929 bool input
, const nir_variable
*var
)
931 if (!type
->is_array())
932 return type
->count_attribute_slots(false);
936 case Program::TYPE_GEOMETRY
:
937 slots
= type
->count_attribute_slots(false);
939 slots
/= info
.gs
.vertices_in
;
941 case Program::TYPE_TESSELLATION_CONTROL
:
942 case Program::TYPE_TESSELLATION_EVAL
:
943 // remove first dimension
944 if (var
->data
.patch
|| (!input
&& stage
== Program::TYPE_TESSELLATION_EVAL
))
945 slots
= type
->count_attribute_slots(false);
947 slots
= type
->fields
.array
->count_attribute_slots(false);
950 slots
= type
->count_attribute_slots(false);
958 getMaskForType(const glsl_type
*type
, uint8_t slot
) {
959 uint16_t comp
= type
->without_array()->components();
960 comp
= comp
? comp
: 4;
962 if (glsl_base_type_is_64bit(type
->without_array()->base_type
)) {
972 return (1 << comp
) - 1;
975 bool Converter::assignSlots() {
979 info
->io
.viewportId
= -1;
980 info_out
->numInputs
= 0;
981 info_out
->numOutputs
= 0;
982 info_out
->numSysVals
= 0;
984 for (uint8_t i
= 0; i
< SYSTEM_VALUE_MAX
; ++i
) {
985 if (!(nir
->info
.system_values_read
& 1ull << i
))
988 info_out
->sv
[info_out
->numSysVals
].sn
= tgsi_get_sysval_semantic(i
);
989 info_out
->sv
[info_out
->numSysVals
].si
= 0;
990 info_out
->sv
[info_out
->numSysVals
].input
= 0; // TODO inferSysValDirection(sn);
993 case SYSTEM_VALUE_INSTANCE_ID
:
994 info_out
->io
.instanceId
= info_out
->numSysVals
;
996 case SYSTEM_VALUE_TESS_LEVEL_INNER
:
997 case SYSTEM_VALUE_TESS_LEVEL_OUTER
:
998 info_out
->sv
[info_out
->numSysVals
].patch
= 1;
1000 case SYSTEM_VALUE_VERTEX_ID
:
1001 info_out
->io
.vertexId
= info_out
->numSysVals
;
1007 info_out
->numSysVals
+= 1;
1010 if (prog
->getType() == Program::TYPE_COMPUTE
)
1013 nir_foreach_shader_in_variable(var
, nir
) {
1014 const glsl_type
*type
= var
->type
;
1015 int slot
= var
->data
.location
;
1016 uint16_t slots
= calcSlots(type
, prog
->getType(), nir
->info
, true, var
);
1017 uint32_t vary
= var
->data
.driver_location
;
1019 assert(vary
+ slots
<= PIPE_MAX_SHADER_INPUTS
);
1021 switch(prog
->getType()) {
1022 case Program::TYPE_FRAGMENT
:
1023 tgsi_get_gl_varying_semantic((gl_varying_slot
)slot
, true,
1025 for (uint16_t i
= 0; i
< slots
; ++i
) {
1026 setInterpolate(&info_out
->in
[vary
+ i
], var
->data
.interpolation
,
1027 var
->data
.centroid
| var
->data
.sample
, name
);
1030 case Program::TYPE_GEOMETRY
:
1031 tgsi_get_gl_varying_semantic((gl_varying_slot
)slot
, true,
1034 case Program::TYPE_TESSELLATION_CONTROL
:
1035 case Program::TYPE_TESSELLATION_EVAL
:
1036 tgsi_get_gl_varying_semantic((gl_varying_slot
)slot
, true,
1038 if (var
->data
.patch
&& name
== TGSI_SEMANTIC_PATCH
)
1039 info_out
->numPatchConstants
= MAX2(info_out
->numPatchConstants
, index
+ slots
);
1041 case Program::TYPE_VERTEX
:
1042 if (slot
>= VERT_ATTRIB_GENERIC0
)
1043 slot
= VERT_ATTRIB_GENERIC0
+ vary
;
1044 vert_attrib_to_tgsi_semantic((gl_vert_attrib
)slot
, &name
, &index
);
1046 case TGSI_SEMANTIC_EDGEFLAG
:
1047 info_out
->io
.edgeFlagIn
= vary
;
1054 ERROR("unknown shader type %u in assignSlots\n", prog
->getType());
1058 for (uint16_t i
= 0u; i
< slots
; ++i
, ++vary
) {
1059 nv50_ir_varying
*v
= &info_out
->in
[vary
];
1061 v
->patch
= var
->data
.patch
;
1064 v
->mask
|= getMaskForType(type
, i
) << var
->data
.location_frac
;
1066 info_out
->numInputs
= std::max
<uint8_t>(info_out
->numInputs
, vary
);
1069 nir_foreach_shader_out_variable(var
, nir
) {
1070 const glsl_type
*type
= var
->type
;
1071 int slot
= var
->data
.location
;
1072 uint16_t slots
= calcSlots(type
, prog
->getType(), nir
->info
, false, var
);
1073 uint32_t vary
= var
->data
.driver_location
;
1075 assert(vary
< PIPE_MAX_SHADER_OUTPUTS
);
1077 switch(prog
->getType()) {
1078 case Program::TYPE_FRAGMENT
:
1079 tgsi_get_gl_frag_result_semantic((gl_frag_result
)slot
, &name
, &index
);
1081 case TGSI_SEMANTIC_COLOR
:
1082 if (!var
->data
.fb_fetch_output
)
1083 info_out
->prop
.fp
.numColourResults
++;
1084 if (var
->data
.location
== FRAG_RESULT_COLOR
&&
1085 nir
->info
.outputs_written
& BITFIELD64_BIT(var
->data
.location
))
1086 info_out
->prop
.fp
.separateFragData
= true;
1087 // sometimes we get FRAG_RESULT_DATAX with data.index 0
1088 // sometimes we get FRAG_RESULT_DATA0 with data.index X
1089 index
= index
== 0 ? var
->data
.index
: index
;
1091 case TGSI_SEMANTIC_POSITION
:
1092 info_out
->io
.fragDepth
= vary
;
1093 info_out
->prop
.fp
.writesDepth
= true;
1095 case TGSI_SEMANTIC_SAMPLEMASK
:
1096 info_out
->io
.sampleMask
= vary
;
1102 case Program::TYPE_GEOMETRY
:
1103 case Program::TYPE_TESSELLATION_CONTROL
:
1104 case Program::TYPE_TESSELLATION_EVAL
:
1105 case Program::TYPE_VERTEX
:
1106 tgsi_get_gl_varying_semantic((gl_varying_slot
)slot
, true,
1109 if (var
->data
.patch
&& name
!= TGSI_SEMANTIC_TESSINNER
&&
1110 name
!= TGSI_SEMANTIC_TESSOUTER
)
1111 info_out
->numPatchConstants
= MAX2(info_out
->numPatchConstants
, index
+ slots
);
1114 case TGSI_SEMANTIC_CLIPDIST
:
1115 info_out
->io
.genUserClip
= -1;
1117 case TGSI_SEMANTIC_CLIPVERTEX
:
1118 clipVertexOutput
= vary
;
1120 case TGSI_SEMANTIC_EDGEFLAG
:
1121 info_out
->io
.edgeFlagOut
= vary
;
1123 case TGSI_SEMANTIC_POSITION
:
1124 if (clipVertexOutput
< 0)
1125 clipVertexOutput
= vary
;
1132 ERROR("unknown shader type %u in assignSlots\n", prog
->getType());
1136 for (uint16_t i
= 0u; i
< slots
; ++i
, ++vary
) {
1137 nv50_ir_varying
*v
= &info_out
->out
[vary
];
1138 v
->patch
= var
->data
.patch
;
1141 v
->mask
|= getMaskForType(type
, i
) << var
->data
.location_frac
;
1143 if (nir
->info
.outputs_read
& 1ull << slot
)
1146 info_out
->numOutputs
= std::max
<uint8_t>(info_out
->numOutputs
, vary
);
1149 if (info_out
->io
.genUserClip
> 0) {
1150 info_out
->io
.clipDistances
= info_out
->io
.genUserClip
;
1152 const unsigned int nOut
= (info_out
->io
.genUserClip
+ 3) / 4;
1154 for (unsigned int n
= 0; n
< nOut
; ++n
) {
1155 unsigned int i
= info_out
->numOutputs
++;
1156 info_out
->out
[i
].id
= i
;
1157 info_out
->out
[i
].sn
= TGSI_SEMANTIC_CLIPDIST
;
1158 info_out
->out
[i
].si
= n
;
1159 info_out
->out
[i
].mask
= ((1 << info_out
->io
.clipDistances
) - 1) >> (n
* 4);
1163 return info
->assignSlots(info_out
) == 0;
1167 Converter::getSlotAddress(nir_intrinsic_instr
*insn
, uint8_t idx
, uint8_t slot
)
1170 int offset
= nir_intrinsic_component(insn
);
1173 if (nir_intrinsic_infos
[insn
->intrinsic
].has_dest
)
1174 ty
= getDType(insn
);
1176 ty
= getSType(insn
->src
[0], false, false);
1178 switch (insn
->intrinsic
) {
1179 case nir_intrinsic_load_input
:
1180 case nir_intrinsic_load_interpolated_input
:
1181 case nir_intrinsic_load_per_vertex_input
:
1184 case nir_intrinsic_load_output
:
1185 case nir_intrinsic_load_per_vertex_output
:
1186 case nir_intrinsic_store_output
:
1187 case nir_intrinsic_store_per_vertex_output
:
1191 ERROR("unknown intrinsic in getSlotAddress %s",
1192 nir_intrinsic_infos
[insn
->intrinsic
].name
);
1198 if (typeSizeof(ty
) == 8) {
1210 assert(!input
|| idx
< PIPE_MAX_SHADER_INPUTS
);
1211 assert(input
|| idx
< PIPE_MAX_SHADER_OUTPUTS
);
1213 const nv50_ir_varying
*vary
= input
? info_out
->in
: info_out
->out
;
1214 return vary
[idx
].slot
[slot
] * 4;
1218 Converter::loadFrom(DataFile file
, uint8_t i
, DataType ty
, Value
*def
,
1219 uint32_t base
, uint8_t c
, Value
*indirect0
,
1220 Value
*indirect1
, bool patch
)
1222 unsigned int tySize
= typeSizeof(ty
);
1225 (file
== FILE_MEMORY_CONST
|| file
== FILE_MEMORY_BUFFER
|| indirect0
)) {
1226 Value
*lo
= getSSA();
1227 Value
*hi
= getSSA();
1230 mkLoad(TYPE_U32
, lo
,
1231 mkSymbol(file
, i
, TYPE_U32
, base
+ c
* tySize
),
1233 loi
->setIndirect(0, 1, indirect1
);
1234 loi
->perPatch
= patch
;
1237 mkLoad(TYPE_U32
, hi
,
1238 mkSymbol(file
, i
, TYPE_U32
, base
+ c
* tySize
+ 4),
1240 hii
->setIndirect(0, 1, indirect1
);
1241 hii
->perPatch
= patch
;
1243 return mkOp2(OP_MERGE
, ty
, def
, lo
, hi
);
1246 mkLoad(ty
, def
, mkSymbol(file
, i
, ty
, base
+ c
* tySize
), indirect0
);
1247 ld
->setIndirect(0, 1, indirect1
);
1248 ld
->perPatch
= patch
;
1254 Converter::storeTo(nir_intrinsic_instr
*insn
, DataFile file
, operation op
,
1255 DataType ty
, Value
*src
, uint8_t idx
, uint8_t c
,
1256 Value
*indirect0
, Value
*indirect1
)
1258 uint8_t size
= typeSizeof(ty
);
1259 uint32_t address
= getSlotAddress(insn
, idx
, c
);
1261 if (size
== 8 && indirect0
) {
1263 mkSplit(split
, 4, src
);
1265 if (op
== OP_EXPORT
) {
1266 split
[0] = mkMov(getSSA(), split
[0], ty
)->getDef(0);
1267 split
[1] = mkMov(getSSA(), split
[1], ty
)->getDef(0);
1270 mkStore(op
, TYPE_U32
, mkSymbol(file
, 0, TYPE_U32
, address
), indirect0
,
1271 split
[0])->perPatch
= info_out
->out
[idx
].patch
;
1272 mkStore(op
, TYPE_U32
, mkSymbol(file
, 0, TYPE_U32
, address
+ 4), indirect0
,
1273 split
[1])->perPatch
= info_out
->out
[idx
].patch
;
1275 if (op
== OP_EXPORT
)
1276 src
= mkMov(getSSA(size
), src
, ty
)->getDef(0);
1277 mkStore(op
, ty
, mkSymbol(file
, 0, ty
, address
), indirect0
,
1278 src
)->perPatch
= info_out
->out
[idx
].patch
;
1283 Converter::parseNIR()
1285 info_out
->bin
.tlsSpace
= nir
->scratch_size
;
1286 info_out
->io
.clipDistances
= nir
->info
.clip_distance_array_size
;
1287 info_out
->io
.cullDistances
= nir
->info
.cull_distance_array_size
;
1288 info_out
->io
.layer_viewport_relative
= nir
->info
.layer_viewport_relative
;
1290 switch(prog
->getType()) {
1291 case Program::TYPE_COMPUTE
:
1292 info
->prop
.cp
.numThreads
[0] = nir
->info
.cs
.local_size
[0];
1293 info
->prop
.cp
.numThreads
[1] = nir
->info
.cs
.local_size
[1];
1294 info
->prop
.cp
.numThreads
[2] = nir
->info
.cs
.local_size
[2];
1295 info_out
->bin
.smemSize
+= nir
->info
.cs
.shared_size
;
1297 case Program::TYPE_FRAGMENT
:
1298 info_out
->prop
.fp
.earlyFragTests
= nir
->info
.fs
.early_fragment_tests
;
1299 prog
->persampleInvocation
=
1300 (nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_ID
) ||
1301 (nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_POS
);
1302 info_out
->prop
.fp
.postDepthCoverage
= nir
->info
.fs
.post_depth_coverage
;
1303 info_out
->prop
.fp
.readsSampleLocations
=
1304 (nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_POS
);
1305 info_out
->prop
.fp
.usesDiscard
= nir
->info
.fs
.uses_discard
|| nir
->info
.fs
.uses_demote
;
1306 info_out
->prop
.fp
.usesSampleMaskIn
=
1307 !!(nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_MASK_IN
);
1309 case Program::TYPE_GEOMETRY
:
1310 info_out
->prop
.gp
.instanceCount
= nir
->info
.gs
.invocations
;
1311 info_out
->prop
.gp
.maxVertices
= nir
->info
.gs
.vertices_out
;
1312 info_out
->prop
.gp
.outputPrim
= nir
->info
.gs
.output_primitive
;
1314 case Program::TYPE_TESSELLATION_CONTROL
:
1315 case Program::TYPE_TESSELLATION_EVAL
:
1316 if (nir
->info
.tess
.primitive_mode
== GL_ISOLINES
)
1317 info_out
->prop
.tp
.domain
= GL_LINES
;
1319 info_out
->prop
.tp
.domain
= nir
->info
.tess
.primitive_mode
;
1320 info_out
->prop
.tp
.outputPatchSize
= nir
->info
.tess
.tcs_vertices_out
;
1321 info_out
->prop
.tp
.outputPrim
=
1322 nir
->info
.tess
.point_mode
? PIPE_PRIM_POINTS
: PIPE_PRIM_TRIANGLES
;
1323 info_out
->prop
.tp
.partitioning
= (nir
->info
.tess
.spacing
+ 1) % 3;
1324 info_out
->prop
.tp
.winding
= !nir
->info
.tess
.ccw
;
1326 case Program::TYPE_VERTEX
:
1327 info_out
->prop
.vp
.usesDrawParameters
=
1328 (nir
->info
.system_values_read
& BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX
)) ||
1329 (nir
->info
.system_values_read
& BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE
)) ||
1330 (nir
->info
.system_values_read
& BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID
));
1340 Converter::visit(nir_function
*function
)
1342 assert(function
->impl
);
1344 // usually the blocks will set everything up, but main is special
1345 BasicBlock
*entry
= new BasicBlock(prog
->main
);
1346 exit
= new BasicBlock(prog
->main
);
1347 blocks
[nir_start_block(function
->impl
)->index
] = entry
;
1348 prog
->main
->setEntry(entry
);
1349 prog
->main
->setExit(exit
);
1351 setPosition(entry
, true);
1353 if (info_out
->io
.genUserClip
> 0) {
1354 for (int c
= 0; c
< 4; ++c
)
1355 clipVtx
[c
] = getScratch();
1358 switch (prog
->getType()) {
1359 case Program::TYPE_TESSELLATION_CONTROL
:
1361 OP_SUB
, TYPE_U32
, getSSA(),
1362 mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_LANEID
, 0)),
1363 mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_INVOCATION_ID
, 0)));
1365 case Program::TYPE_FRAGMENT
: {
1366 Symbol
*sv
= mkSysVal(SV_POSITION
, 3);
1367 fragCoord
[3] = mkOp1v(OP_RDSV
, TYPE_F32
, getSSA(), sv
);
1368 fp
.position
= mkOp1v(OP_RCP
, TYPE_F32
, fragCoord
[3], fragCoord
[3]);
1375 nir_index_ssa_defs(function
->impl
);
1376 foreach_list_typed(nir_cf_node
, node
, node
, &function
->impl
->body
) {
1381 bb
->cfg
.attach(&exit
->cfg
, Graph::Edge::TREE
);
1382 setPosition(exit
, true);
1384 if ((prog
->getType() == Program::TYPE_VERTEX
||
1385 prog
->getType() == Program::TYPE_TESSELLATION_EVAL
)
1386 && info_out
->io
.genUserClip
> 0)
1387 handleUserClipPlanes();
1389 // TODO: for non main function this needs to be a OP_RETURN
1390 mkOp(OP_EXIT
, TYPE_NONE
, NULL
)->terminator
= 1;
1395 Converter::visit(nir_cf_node
*node
)
1397 switch (node
->type
) {
1398 case nir_cf_node_block
:
1399 return visit(nir_cf_node_as_block(node
));
1400 case nir_cf_node_if
:
1401 return visit(nir_cf_node_as_if(node
));
1402 case nir_cf_node_loop
:
1403 return visit(nir_cf_node_as_loop(node
));
1405 ERROR("unknown nir_cf_node type %u\n", node
->type
);
1411 Converter::visit(nir_block
*block
)
1413 if (!block
->predecessors
->entries
&& block
->instr_list
.is_empty())
1416 BasicBlock
*bb
= convert(block
);
1418 setPosition(bb
, true);
1419 nir_foreach_instr(insn
, block
) {
1427 Converter::visit(nir_if
*nif
)
1431 DataType sType
= getSType(nif
->condition
, false, false);
1432 Value
*src
= getSrc(&nif
->condition
, 0);
1434 nir_block
*lastThen
= nir_if_last_then_block(nif
);
1435 nir_block
*lastElse
= nir_if_last_else_block(nif
);
1437 BasicBlock
*headBB
= bb
;
1438 BasicBlock
*ifBB
= convert(nir_if_first_then_block(nif
));
1439 BasicBlock
*elseBB
= convert(nir_if_first_else_block(nif
));
1441 bb
->cfg
.attach(&ifBB
->cfg
, Graph::Edge::TREE
);
1442 bb
->cfg
.attach(&elseBB
->cfg
, Graph::Edge::TREE
);
1444 bool insertJoins
= lastThen
->successors
[0] == lastElse
->successors
[0];
1445 mkFlow(OP_BRA
, elseBB
, CC_EQ
, src
)->setType(sType
);
1447 foreach_list_typed(nir_cf_node
, node
, node
, &nif
->then_list
) {
1452 setPosition(convert(lastThen
), true);
1453 if (!bb
->isTerminated()) {
1454 BasicBlock
*tailBB
= convert(lastThen
->successors
[0]);
1455 mkFlow(OP_BRA
, tailBB
, CC_ALWAYS
, NULL
);
1456 bb
->cfg
.attach(&tailBB
->cfg
, Graph::Edge::FORWARD
);
1458 insertJoins
= insertJoins
&& bb
->getExit()->op
== OP_BRA
;
1461 foreach_list_typed(nir_cf_node
, node
, node
, &nif
->else_list
) {
1466 setPosition(convert(lastElse
), true);
1467 if (!bb
->isTerminated()) {
1468 BasicBlock
*tailBB
= convert(lastElse
->successors
[0]);
1469 mkFlow(OP_BRA
, tailBB
, CC_ALWAYS
, NULL
);
1470 bb
->cfg
.attach(&tailBB
->cfg
, Graph::Edge::FORWARD
);
1472 insertJoins
= insertJoins
&& bb
->getExit()->op
== OP_BRA
;
1475 /* only insert joins for the most outer if */
1477 insertJoins
= false;
1479 /* we made sure that all threads would converge at the same block */
1481 BasicBlock
*conv
= convert(lastThen
->successors
[0]);
1482 setPosition(headBB
->getExit(), false);
1483 headBB
->joinAt
= mkFlow(OP_JOINAT
, conv
, CC_ALWAYS
, NULL
);
1484 setPosition(conv
, false);
1485 mkFlow(OP_JOIN
, NULL
, CC_ALWAYS
, NULL
)->fixed
= 1;
1491 // TODO: add convergency
1493 Converter::visit(nir_loop
*loop
)
1496 func
->loopNestingBound
= std::max(func
->loopNestingBound
, curLoopDepth
);
1498 BasicBlock
*loopBB
= convert(nir_loop_first_block(loop
));
1499 BasicBlock
*tailBB
= convert(nir_cf_node_as_block(nir_cf_node_next(&loop
->cf_node
)));
1501 bb
->cfg
.attach(&loopBB
->cfg
, Graph::Edge::TREE
);
1503 mkFlow(OP_PREBREAK
, tailBB
, CC_ALWAYS
, NULL
);
1504 setPosition(loopBB
, false);
1505 mkFlow(OP_PRECONT
, loopBB
, CC_ALWAYS
, NULL
);
1507 foreach_list_typed(nir_cf_node
, node
, node
, &loop
->body
) {
1512 if (!bb
->isTerminated()) {
1513 mkFlow(OP_CONT
, loopBB
, CC_ALWAYS
, NULL
);
1514 bb
->cfg
.attach(&loopBB
->cfg
, Graph::Edge::BACK
);
1517 if (tailBB
->cfg
.incidentCount() == 0)
1518 loopBB
->cfg
.attach(&tailBB
->cfg
, Graph::Edge::TREE
);
1526 Converter::visit(nir_instr
*insn
)
1528 // we need an insertion point for on the fly generated immediate loads
1529 immInsertPos
= bb
->getExit();
1530 switch (insn
->type
) {
1531 case nir_instr_type_alu
:
1532 return visit(nir_instr_as_alu(insn
));
1533 case nir_instr_type_intrinsic
:
1534 return visit(nir_instr_as_intrinsic(insn
));
1535 case nir_instr_type_jump
:
1536 return visit(nir_instr_as_jump(insn
));
1537 case nir_instr_type_load_const
:
1538 return visit(nir_instr_as_load_const(insn
));
1539 case nir_instr_type_ssa_undef
:
1540 return visit(nir_instr_as_ssa_undef(insn
));
1541 case nir_instr_type_tex
:
1542 return visit(nir_instr_as_tex(insn
));
1544 ERROR("unknown nir_instr type %u\n", insn
->type
);
1551 Converter::convert(nir_intrinsic_op intr
)
1554 case nir_intrinsic_load_base_vertex
:
1555 return SV_BASEVERTEX
;
1556 case nir_intrinsic_load_base_instance
:
1557 return SV_BASEINSTANCE
;
1558 case nir_intrinsic_load_draw_id
:
1560 case nir_intrinsic_load_front_face
:
1562 case nir_intrinsic_is_helper_invocation
:
1563 case nir_intrinsic_load_helper_invocation
:
1564 return SV_THREAD_KILL
;
1565 case nir_intrinsic_load_instance_id
:
1566 return SV_INSTANCE_ID
;
1567 case nir_intrinsic_load_invocation_id
:
1568 return SV_INVOCATION_ID
;
1569 case nir_intrinsic_load_local_group_size
:
1571 case nir_intrinsic_load_local_invocation_id
:
1573 case nir_intrinsic_load_num_work_groups
:
1575 case nir_intrinsic_load_patch_vertices_in
:
1576 return SV_VERTEX_COUNT
;
1577 case nir_intrinsic_load_primitive_id
:
1578 return SV_PRIMITIVE_ID
;
1579 case nir_intrinsic_load_sample_id
:
1580 return SV_SAMPLE_INDEX
;
1581 case nir_intrinsic_load_sample_mask_in
:
1582 return SV_SAMPLE_MASK
;
1583 case nir_intrinsic_load_sample_pos
:
1584 return SV_SAMPLE_POS
;
1585 case nir_intrinsic_load_subgroup_eq_mask
:
1586 return SV_LANEMASK_EQ
;
1587 case nir_intrinsic_load_subgroup_ge_mask
:
1588 return SV_LANEMASK_GE
;
1589 case nir_intrinsic_load_subgroup_gt_mask
:
1590 return SV_LANEMASK_GT
;
1591 case nir_intrinsic_load_subgroup_le_mask
:
1592 return SV_LANEMASK_LE
;
1593 case nir_intrinsic_load_subgroup_lt_mask
:
1594 return SV_LANEMASK_LT
;
1595 case nir_intrinsic_load_subgroup_invocation
:
1597 case nir_intrinsic_load_tess_coord
:
1598 return SV_TESS_COORD
;
1599 case nir_intrinsic_load_tess_level_inner
:
1600 return SV_TESS_INNER
;
1601 case nir_intrinsic_load_tess_level_outer
:
1602 return SV_TESS_OUTER
;
1603 case nir_intrinsic_load_vertex_id
:
1604 return SV_VERTEX_ID
;
1605 case nir_intrinsic_load_work_group_id
:
1607 case nir_intrinsic_load_work_dim
:
1610 ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1611 nir_intrinsic_infos
[intr
].name
);
1618 Converter::visit(nir_intrinsic_instr
*insn
)
1620 nir_intrinsic_op op
= insn
->intrinsic
;
1621 const nir_intrinsic_info
&opInfo
= nir_intrinsic_infos
[op
];
1622 unsigned dest_components
= nir_intrinsic_dest_components(insn
);
1625 case nir_intrinsic_load_uniform
: {
1626 LValues
&newDefs
= convert(&insn
->dest
);
1627 const DataType dType
= getDType(insn
);
1629 uint32_t coffset
= getIndirect(insn
, 0, 0, indirect
);
1630 for (uint8_t i
= 0; i
< dest_components
; ++i
) {
1631 loadFrom(FILE_MEMORY_CONST
, 0, dType
, newDefs
[i
], 16 * coffset
, i
, indirect
);
1635 case nir_intrinsic_store_output
:
1636 case nir_intrinsic_store_per_vertex_output
: {
1638 DataType dType
= getSType(insn
->src
[0], false, false);
1639 uint32_t idx
= getIndirect(insn
, op
== nir_intrinsic_store_output
? 1 : 2, 0, indirect
);
1641 for (uint8_t i
= 0u; i
< nir_intrinsic_src_components(insn
, 0); ++i
) {
1642 if (!((1u << i
) & nir_intrinsic_write_mask(insn
)))
1646 Value
*src
= getSrc(&insn
->src
[0], i
);
1647 switch (prog
->getType()) {
1648 case Program::TYPE_FRAGMENT
: {
1649 if (info_out
->out
[idx
].sn
== TGSI_SEMANTIC_POSITION
) {
1650 // TGSI uses a different interface than NIR, TGSI stores that
1651 // value in the z component, NIR in X
1653 src
= mkOp1v(OP_SAT
, TYPE_F32
, getScratch(), src
);
1657 case Program::TYPE_GEOMETRY
:
1658 case Program::TYPE_TESSELLATION_EVAL
:
1659 case Program::TYPE_VERTEX
: {
1660 if (info_out
->io
.genUserClip
> 0 && idx
== (uint32_t)clipVertexOutput
) {
1661 mkMov(clipVtx
[i
], src
);
1670 storeTo(insn
, FILE_SHADER_OUTPUT
, OP_EXPORT
, dType
, src
, idx
, i
+ offset
, indirect
);
1674 case nir_intrinsic_load_input
:
1675 case nir_intrinsic_load_interpolated_input
:
1676 case nir_intrinsic_load_output
: {
1677 LValues
&newDefs
= convert(&insn
->dest
);
1680 if (prog
->getType() == Program::TYPE_FRAGMENT
&&
1681 op
== nir_intrinsic_load_output
) {
1682 std::vector
<Value
*> defs
, srcs
;
1685 srcs
.push_back(getSSA());
1686 srcs
.push_back(getSSA());
1687 Value
*x
= mkOp1v(OP_RDSV
, TYPE_F32
, getSSA(), mkSysVal(SV_POSITION
, 0));
1688 Value
*y
= mkOp1v(OP_RDSV
, TYPE_F32
, getSSA(), mkSysVal(SV_POSITION
, 1));
1689 mkCvt(OP_CVT
, TYPE_U32
, srcs
[0], TYPE_F32
, x
)->rnd
= ROUND_Z
;
1690 mkCvt(OP_CVT
, TYPE_U32
, srcs
[1], TYPE_F32
, y
)->rnd
= ROUND_Z
;
1692 srcs
.push_back(mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_LAYER
, 0)));
1693 srcs
.push_back(mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_SAMPLE_INDEX
, 0)));
1695 for (uint8_t i
= 0u; i
< dest_components
; ++i
) {
1696 defs
.push_back(newDefs
[i
]);
1700 TexInstruction
*texi
= mkTex(OP_TXF
, TEX_TARGET_2D_MS_ARRAY
, 0, 0, defs
, srcs
);
1701 texi
->tex
.levelZero
= 1;
1702 texi
->tex
.mask
= mask
;
1703 texi
->tex
.useOffsets
= 0;
1704 texi
->tex
.r
= 0xffff;
1705 texi
->tex
.s
= 0xffff;
1707 info_out
->prop
.fp
.readsFramebuffer
= true;
1711 const DataType dType
= getDType(insn
);
1713 bool input
= op
!= nir_intrinsic_load_output
;
1717 uint32_t idx
= getIndirect(insn
, op
== nir_intrinsic_load_interpolated_input
? 1 : 0, 0, indirect
);
1718 nv50_ir_varying
& vary
= input
? info_out
->in
[idx
] : info_out
->out
[idx
];
1720 // see load_barycentric_* handling
1721 if (prog
->getType() == Program::TYPE_FRAGMENT
) {
1722 if (op
== nir_intrinsic_load_interpolated_input
) {
1723 ImmediateValue immMode
;
1724 if (getSrc(&insn
->src
[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode
))
1725 mode
= immMode
.reg
.data
.u32
;
1727 if (mode
== NV50_IR_INTERP_DEFAULT
)
1728 mode
|= translateInterpMode(&vary
, nvirOp
);
1731 nvirOp
= OP_LINTERP
;
1732 mode
|= NV50_IR_INTERP_LINEAR
;
1734 nvirOp
= OP_PINTERP
;
1735 mode
|= NV50_IR_INTERP_PERSPECTIVE
;
1740 for (uint8_t i
= 0u; i
< dest_components
; ++i
) {
1741 uint32_t address
= getSlotAddress(insn
, idx
, i
);
1742 Symbol
*sym
= mkSymbol(input
? FILE_SHADER_INPUT
: FILE_SHADER_OUTPUT
, 0, dType
, address
);
1743 if (prog
->getType() == Program::TYPE_FRAGMENT
) {
1745 if (typeSizeof(dType
) == 8) {
1746 Value
*lo
= getSSA();
1747 Value
*hi
= getSSA();
1748 Instruction
*interp
;
1750 interp
= mkOp1(nvirOp
, TYPE_U32
, lo
, sym
);
1751 if (nvirOp
== OP_PINTERP
)
1752 interp
->setSrc(s
++, fp
.position
);
1753 if (mode
& NV50_IR_INTERP_OFFSET
)
1754 interp
->setSrc(s
++, getSrc(&insn
->src
[0], 0));
1755 interp
->setInterpolate(mode
);
1756 interp
->setIndirect(0, 0, indirect
);
1758 Symbol
*sym1
= mkSymbol(input
? FILE_SHADER_INPUT
: FILE_SHADER_OUTPUT
, 0, dType
, address
+ 4);
1759 interp
= mkOp1(nvirOp
, TYPE_U32
, hi
, sym1
);
1760 if (nvirOp
== OP_PINTERP
)
1761 interp
->setSrc(s
++, fp
.position
);
1762 if (mode
& NV50_IR_INTERP_OFFSET
)
1763 interp
->setSrc(s
++, getSrc(&insn
->src
[0], 0));
1764 interp
->setInterpolate(mode
);
1765 interp
->setIndirect(0, 0, indirect
);
1767 mkOp2(OP_MERGE
, dType
, newDefs
[i
], lo
, hi
);
1769 Instruction
*interp
= mkOp1(nvirOp
, dType
, newDefs
[i
], sym
);
1770 if (nvirOp
== OP_PINTERP
)
1771 interp
->setSrc(s
++, fp
.position
);
1772 if (mode
& NV50_IR_INTERP_OFFSET
)
1773 interp
->setSrc(s
++, getSrc(&insn
->src
[0], 0));
1774 interp
->setInterpolate(mode
);
1775 interp
->setIndirect(0, 0, indirect
);
1778 mkLoad(dType
, newDefs
[i
], sym
, indirect
)->perPatch
= vary
.patch
;
1783 case nir_intrinsic_load_barycentric_at_offset
:
1784 case nir_intrinsic_load_barycentric_at_sample
:
1785 case nir_intrinsic_load_barycentric_centroid
:
1786 case nir_intrinsic_load_barycentric_pixel
:
1787 case nir_intrinsic_load_barycentric_sample
: {
1788 LValues
&newDefs
= convert(&insn
->dest
);
1791 if (op
== nir_intrinsic_load_barycentric_centroid
||
1792 op
== nir_intrinsic_load_barycentric_sample
) {
1793 mode
= NV50_IR_INTERP_CENTROID
;
1794 } else if (op
== nir_intrinsic_load_barycentric_at_offset
) {
1796 for (uint8_t c
= 0; c
< 2; c
++) {
1797 offs
[c
] = getScratch();
1798 mkOp2(OP_MIN
, TYPE_F32
, offs
[c
], getSrc(&insn
->src
[0], c
), loadImm(NULL
, 0.4375f
));
1799 mkOp2(OP_MAX
, TYPE_F32
, offs
[c
], offs
[c
], loadImm(NULL
, -0.5f
));
1800 mkOp2(OP_MUL
, TYPE_F32
, offs
[c
], offs
[c
], loadImm(NULL
, 4096.0f
));
1801 mkCvt(OP_CVT
, TYPE_S32
, offs
[c
], TYPE_F32
, offs
[c
]);
1803 mkOp3v(OP_INSBF
, TYPE_U32
, newDefs
[0], offs
[1], mkImm(0x1010), offs
[0]);
1805 mode
= NV50_IR_INTERP_OFFSET
;
1806 } else if (op
== nir_intrinsic_load_barycentric_pixel
) {
1807 mode
= NV50_IR_INTERP_DEFAULT
;
1808 } else if (op
== nir_intrinsic_load_barycentric_at_sample
) {
1809 info_out
->prop
.fp
.readsSampleLocations
= true;
1810 mkOp1(OP_PIXLD
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0], 0))->subOp
= NV50_IR_SUBOP_PIXLD_OFFSET
;
1811 mode
= NV50_IR_INTERP_OFFSET
;
1813 unreachable("all intrinsics already handled above");
1816 loadImm(newDefs
[1], mode
);
1819 case nir_intrinsic_demote
:
1820 case nir_intrinsic_discard
:
1821 mkOp(OP_DISCARD
, TYPE_NONE
, NULL
);
1823 case nir_intrinsic_demote_if
:
1824 case nir_intrinsic_discard_if
: {
1825 Value
*pred
= getSSA(1, FILE_PREDICATE
);
1826 if (insn
->num_components
> 1) {
1827 ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
1831 mkCmp(OP_SET
, CC_NE
, TYPE_U8
, pred
, TYPE_U32
, getSrc(&insn
->src
[0], 0), zero
);
1832 mkOp(OP_DISCARD
, TYPE_NONE
, NULL
)->setPredicate(CC_P
, pred
);
1835 case nir_intrinsic_load_base_vertex
:
1836 case nir_intrinsic_load_base_instance
:
1837 case nir_intrinsic_load_draw_id
:
1838 case nir_intrinsic_load_front_face
:
1839 case nir_intrinsic_is_helper_invocation
:
1840 case nir_intrinsic_load_helper_invocation
:
1841 case nir_intrinsic_load_instance_id
:
1842 case nir_intrinsic_load_invocation_id
:
1843 case nir_intrinsic_load_local_group_size
:
1844 case nir_intrinsic_load_local_invocation_id
:
1845 case nir_intrinsic_load_num_work_groups
:
1846 case nir_intrinsic_load_patch_vertices_in
:
1847 case nir_intrinsic_load_primitive_id
:
1848 case nir_intrinsic_load_sample_id
:
1849 case nir_intrinsic_load_sample_mask_in
:
1850 case nir_intrinsic_load_sample_pos
:
1851 case nir_intrinsic_load_subgroup_eq_mask
:
1852 case nir_intrinsic_load_subgroup_ge_mask
:
1853 case nir_intrinsic_load_subgroup_gt_mask
:
1854 case nir_intrinsic_load_subgroup_le_mask
:
1855 case nir_intrinsic_load_subgroup_lt_mask
:
1856 case nir_intrinsic_load_subgroup_invocation
:
1857 case nir_intrinsic_load_tess_coord
:
1858 case nir_intrinsic_load_tess_level_inner
:
1859 case nir_intrinsic_load_tess_level_outer
:
1860 case nir_intrinsic_load_vertex_id
:
1861 case nir_intrinsic_load_work_group_id
:
1862 case nir_intrinsic_load_work_dim
: {
1863 const DataType dType
= getDType(insn
);
1864 SVSemantic sv
= convert(op
);
1865 LValues
&newDefs
= convert(&insn
->dest
);
1867 for (uint8_t i
= 0u; i
< nir_intrinsic_dest_components(insn
); ++i
) {
1869 if (typeSizeof(dType
) == 8)
1874 if (sv
== SV_TID
&& info
->prop
.cp
.numThreads
[i
] == 1) {
1877 Symbol
*sym
= mkSysVal(sv
, i
);
1878 Instruction
*rdsv
= mkOp1(OP_RDSV
, TYPE_U32
, def
, sym
);
1879 if (sv
== SV_TESS_OUTER
|| sv
== SV_TESS_INNER
)
1883 if (typeSizeof(dType
) == 8)
1884 mkOp2(OP_MERGE
, dType
, newDefs
[i
], def
, loadImm(getSSA(), 0u));
1889 case nir_intrinsic_load_subgroup_size
: {
1890 LValues
&newDefs
= convert(&insn
->dest
);
1891 loadImm(newDefs
[0], 32u);
1894 case nir_intrinsic_vote_all
:
1895 case nir_intrinsic_vote_any
:
1896 case nir_intrinsic_vote_ieq
: {
1897 LValues
&newDefs
= convert(&insn
->dest
);
1898 Value
*pred
= getScratch(1, FILE_PREDICATE
);
1899 mkCmp(OP_SET
, CC_NE
, TYPE_U32
, pred
, TYPE_U32
, getSrc(&insn
->src
[0], 0), zero
);
1900 mkOp1(OP_VOTE
, TYPE_U32
, pred
, pred
)->subOp
= getSubOp(op
);
1901 mkCvt(OP_CVT
, TYPE_U32
, newDefs
[0], TYPE_U8
, pred
);
1904 case nir_intrinsic_ballot
: {
1905 LValues
&newDefs
= convert(&insn
->dest
);
1906 Value
*pred
= getSSA(1, FILE_PREDICATE
);
1907 mkCmp(OP_SET
, CC_NE
, TYPE_U32
, pred
, TYPE_U32
, getSrc(&insn
->src
[0], 0), zero
);
1908 mkOp1(OP_VOTE
, TYPE_U32
, newDefs
[0], pred
)->subOp
= NV50_IR_SUBOP_VOTE_ANY
;
1911 case nir_intrinsic_read_first_invocation
:
1912 case nir_intrinsic_read_invocation
: {
1913 LValues
&newDefs
= convert(&insn
->dest
);
1914 const DataType dType
= getDType(insn
);
1915 Value
*tmp
= getScratch();
1917 if (op
== nir_intrinsic_read_first_invocation
) {
1918 mkOp1(OP_VOTE
, TYPE_U32
, tmp
, mkImm(1))->subOp
= NV50_IR_SUBOP_VOTE_ANY
;
1919 mkOp1(OP_BREV
, TYPE_U32
, tmp
, tmp
);
1920 mkOp1(OP_BFIND
, TYPE_U32
, tmp
, tmp
)->subOp
= NV50_IR_SUBOP_BFIND_SAMT
;
1922 tmp
= getSrc(&insn
->src
[1], 0);
1924 for (uint8_t i
= 0; i
< dest_components
; ++i
) {
1925 mkOp3(OP_SHFL
, dType
, newDefs
[i
], getSrc(&insn
->src
[0], i
), tmp
, mkImm(0x1f))
1926 ->subOp
= NV50_IR_SUBOP_SHFL_IDX
;
1930 case nir_intrinsic_load_per_vertex_input
: {
1931 const DataType dType
= getDType(insn
);
1932 LValues
&newDefs
= convert(&insn
->dest
);
1933 Value
*indirectVertex
;
1934 Value
*indirectOffset
;
1935 uint32_t baseVertex
= getIndirect(&insn
->src
[0], 0, indirectVertex
);
1936 uint32_t idx
= getIndirect(insn
, 1, 0, indirectOffset
);
1938 Value
*vtxBase
= mkOp2v(OP_PFETCH
, TYPE_U32
, getSSA(4, FILE_ADDRESS
),
1939 mkImm(baseVertex
), indirectVertex
);
1940 for (uint8_t i
= 0u; i
< dest_components
; ++i
) {
1941 uint32_t address
= getSlotAddress(insn
, idx
, i
);
1942 loadFrom(FILE_SHADER_INPUT
, 0, dType
, newDefs
[i
], address
, 0,
1943 indirectOffset
, vtxBase
, info_out
->in
[idx
].patch
);
1947 case nir_intrinsic_load_per_vertex_output
: {
1948 const DataType dType
= getDType(insn
);
1949 LValues
&newDefs
= convert(&insn
->dest
);
1950 Value
*indirectVertex
;
1951 Value
*indirectOffset
;
1952 uint32_t baseVertex
= getIndirect(&insn
->src
[0], 0, indirectVertex
);
1953 uint32_t idx
= getIndirect(insn
, 1, 0, indirectOffset
);
1954 Value
*vtxBase
= NULL
;
1957 vtxBase
= indirectVertex
;
1959 vtxBase
= loadImm(NULL
, baseVertex
);
1961 vtxBase
= mkOp2v(OP_ADD
, TYPE_U32
, getSSA(4, FILE_ADDRESS
), outBase
, vtxBase
);
1963 for (uint8_t i
= 0u; i
< dest_components
; ++i
) {
1964 uint32_t address
= getSlotAddress(insn
, idx
, i
);
1965 loadFrom(FILE_SHADER_OUTPUT
, 0, dType
, newDefs
[i
], address
, 0,
1966 indirectOffset
, vtxBase
, info_out
->in
[idx
].patch
);
1970 case nir_intrinsic_emit_vertex
: {
1971 if (info_out
->io
.genUserClip
> 0)
1972 handleUserClipPlanes();
1973 uint32_t idx
= nir_intrinsic_stream_id(insn
);
1974 mkOp1(getOperation(op
), TYPE_U32
, NULL
, mkImm(idx
))->fixed
= 1;
1977 case nir_intrinsic_end_primitive
: {
1978 uint32_t idx
= nir_intrinsic_stream_id(insn
);
1981 mkOp1(getOperation(op
), TYPE_U32
, NULL
, mkImm(idx
))->fixed
= 1;
1984 case nir_intrinsic_load_ubo
: {
1985 const DataType dType
= getDType(insn
);
1986 LValues
&newDefs
= convert(&insn
->dest
);
1987 Value
*indirectIndex
;
1988 Value
*indirectOffset
;
1989 uint32_t index
= getIndirect(&insn
->src
[0], 0, indirectIndex
) + 1;
1990 uint32_t offset
= getIndirect(&insn
->src
[1], 0, indirectOffset
);
1992 for (uint8_t i
= 0u; i
< dest_components
; ++i
) {
1993 loadFrom(FILE_MEMORY_CONST
, index
, dType
, newDefs
[i
], offset
, i
,
1994 indirectOffset
, indirectIndex
);
1998 case nir_intrinsic_get_buffer_size
: {
1999 LValues
&newDefs
= convert(&insn
->dest
);
2000 const DataType dType
= getDType(insn
);
2001 Value
*indirectBuffer
;
2002 uint32_t buffer
= getIndirect(&insn
->src
[0], 0, indirectBuffer
);
2004 Symbol
*sym
= mkSymbol(FILE_MEMORY_BUFFER
, buffer
, dType
, 0);
2005 mkOp1(OP_BUFQ
, dType
, newDefs
[0], sym
)->setIndirect(0, 0, indirectBuffer
);
2008 case nir_intrinsic_store_ssbo
: {
2009 DataType sType
= getSType(insn
->src
[0], false, false);
2010 Value
*indirectBuffer
;
2011 Value
*indirectOffset
;
2012 uint32_t buffer
= getIndirect(&insn
->src
[1], 0, indirectBuffer
);
2013 uint32_t offset
= getIndirect(&insn
->src
[2], 0, indirectOffset
);
2015 for (uint8_t i
= 0u; i
< nir_intrinsic_src_components(insn
, 0); ++i
) {
2016 if (!((1u << i
) & nir_intrinsic_write_mask(insn
)))
2018 Symbol
*sym
= mkSymbol(FILE_MEMORY_BUFFER
, buffer
, sType
,
2019 offset
+ i
* typeSizeof(sType
));
2020 mkStore(OP_STORE
, sType
, sym
, indirectOffset
, getSrc(&insn
->src
[0], i
))
2021 ->setIndirect(0, 1, indirectBuffer
);
2023 info_out
->io
.globalAccess
|= 0x2;
2026 case nir_intrinsic_load_ssbo
: {
2027 const DataType dType
= getDType(insn
);
2028 LValues
&newDefs
= convert(&insn
->dest
);
2029 Value
*indirectBuffer
;
2030 Value
*indirectOffset
;
2031 uint32_t buffer
= getIndirect(&insn
->src
[0], 0, indirectBuffer
);
2032 uint32_t offset
= getIndirect(&insn
->src
[1], 0, indirectOffset
);
2034 for (uint8_t i
= 0u; i
< dest_components
; ++i
)
2035 loadFrom(FILE_MEMORY_BUFFER
, buffer
, dType
, newDefs
[i
], offset
, i
,
2036 indirectOffset
, indirectBuffer
);
2038 info_out
->io
.globalAccess
|= 0x1;
2041 case nir_intrinsic_shared_atomic_add
:
2042 case nir_intrinsic_shared_atomic_and
:
2043 case nir_intrinsic_shared_atomic_comp_swap
:
2044 case nir_intrinsic_shared_atomic_exchange
:
2045 case nir_intrinsic_shared_atomic_or
:
2046 case nir_intrinsic_shared_atomic_imax
:
2047 case nir_intrinsic_shared_atomic_imin
:
2048 case nir_intrinsic_shared_atomic_umax
:
2049 case nir_intrinsic_shared_atomic_umin
:
2050 case nir_intrinsic_shared_atomic_xor
: {
2051 const DataType dType
= getDType(insn
);
2052 LValues
&newDefs
= convert(&insn
->dest
);
2053 Value
*indirectOffset
;
2054 uint32_t offset
= getIndirect(&insn
->src
[0], 0, indirectOffset
);
2055 Symbol
*sym
= mkSymbol(FILE_MEMORY_SHARED
, 0, dType
, offset
);
2056 Instruction
*atom
= mkOp2(OP_ATOM
, dType
, newDefs
[0], sym
, getSrc(&insn
->src
[1], 0));
2057 if (op
== nir_intrinsic_shared_atomic_comp_swap
)
2058 atom
->setSrc(2, getSrc(&insn
->src
[2], 0));
2059 atom
->setIndirect(0, 0, indirectOffset
);
2060 atom
->subOp
= getSubOp(op
);
2063 case nir_intrinsic_ssbo_atomic_add
:
2064 case nir_intrinsic_ssbo_atomic_and
:
2065 case nir_intrinsic_ssbo_atomic_comp_swap
:
2066 case nir_intrinsic_ssbo_atomic_exchange
:
2067 case nir_intrinsic_ssbo_atomic_or
:
2068 case nir_intrinsic_ssbo_atomic_imax
:
2069 case nir_intrinsic_ssbo_atomic_imin
:
2070 case nir_intrinsic_ssbo_atomic_umax
:
2071 case nir_intrinsic_ssbo_atomic_umin
:
2072 case nir_intrinsic_ssbo_atomic_xor
: {
2073 const DataType dType
= getDType(insn
);
2074 LValues
&newDefs
= convert(&insn
->dest
);
2075 Value
*indirectBuffer
;
2076 Value
*indirectOffset
;
2077 uint32_t buffer
= getIndirect(&insn
->src
[0], 0, indirectBuffer
);
2078 uint32_t offset
= getIndirect(&insn
->src
[1], 0, indirectOffset
);
2080 Symbol
*sym
= mkSymbol(FILE_MEMORY_BUFFER
, buffer
, dType
, offset
);
2081 Instruction
*atom
= mkOp2(OP_ATOM
, dType
, newDefs
[0], sym
,
2082 getSrc(&insn
->src
[2], 0));
2083 if (op
== nir_intrinsic_ssbo_atomic_comp_swap
)
2084 atom
->setSrc(2, getSrc(&insn
->src
[3], 0));
2085 atom
->setIndirect(0, 0, indirectOffset
);
2086 atom
->setIndirect(0, 1, indirectBuffer
);
2087 atom
->subOp
= getSubOp(op
);
2089 info_out
->io
.globalAccess
|= 0x2;
2092 case nir_intrinsic_global_atomic_add
:
2093 case nir_intrinsic_global_atomic_and
:
2094 case nir_intrinsic_global_atomic_comp_swap
:
2095 case nir_intrinsic_global_atomic_exchange
:
2096 case nir_intrinsic_global_atomic_or
:
2097 case nir_intrinsic_global_atomic_imax
:
2098 case nir_intrinsic_global_atomic_imin
:
2099 case nir_intrinsic_global_atomic_umax
:
2100 case nir_intrinsic_global_atomic_umin
:
2101 case nir_intrinsic_global_atomic_xor
: {
2102 const DataType dType
= getDType(insn
);
2103 LValues
&newDefs
= convert(&insn
->dest
);
2105 uint32_t offset
= getIndirect(&insn
->src
[0], 0, address
);
2107 Symbol
*sym
= mkSymbol(FILE_MEMORY_GLOBAL
, 0, dType
, offset
);
2109 mkOp2(OP_ATOM
, dType
, newDefs
[0], sym
, getSrc(&insn
->src
[1], 0));
2110 if (op
== nir_intrinsic_global_atomic_comp_swap
)
2111 atom
->setSrc(2, getSrc(&insn
->src
[2], 0));
2112 atom
->setIndirect(0, 0, address
);
2113 atom
->subOp
= getSubOp(op
);
2115 info_out
->io
.globalAccess
|= 0x2;
2118 case nir_intrinsic_bindless_image_atomic_add
:
2119 case nir_intrinsic_bindless_image_atomic_and
:
2120 case nir_intrinsic_bindless_image_atomic_comp_swap
:
2121 case nir_intrinsic_bindless_image_atomic_exchange
:
2122 case nir_intrinsic_bindless_image_atomic_imax
:
2123 case nir_intrinsic_bindless_image_atomic_umax
:
2124 case nir_intrinsic_bindless_image_atomic_imin
:
2125 case nir_intrinsic_bindless_image_atomic_umin
:
2126 case nir_intrinsic_bindless_image_atomic_or
:
2127 case nir_intrinsic_bindless_image_atomic_xor
:
2128 case nir_intrinsic_bindless_image_atomic_inc_wrap
:
2129 case nir_intrinsic_bindless_image_atomic_dec_wrap
:
2130 case nir_intrinsic_bindless_image_load
:
2131 case nir_intrinsic_bindless_image_samples
:
2132 case nir_intrinsic_bindless_image_size
:
2133 case nir_intrinsic_bindless_image_store
:
2134 case nir_intrinsic_image_atomic_add
:
2135 case nir_intrinsic_image_atomic_and
:
2136 case nir_intrinsic_image_atomic_comp_swap
:
2137 case nir_intrinsic_image_atomic_exchange
:
2138 case nir_intrinsic_image_atomic_imax
:
2139 case nir_intrinsic_image_atomic_umax
:
2140 case nir_intrinsic_image_atomic_imin
:
2141 case nir_intrinsic_image_atomic_umin
:
2142 case nir_intrinsic_image_atomic_or
:
2143 case nir_intrinsic_image_atomic_xor
:
2144 case nir_intrinsic_image_atomic_inc_wrap
:
2145 case nir_intrinsic_image_atomic_dec_wrap
:
2146 case nir_intrinsic_image_load
:
2147 case nir_intrinsic_image_samples
:
2148 case nir_intrinsic_image_size
:
2149 case nir_intrinsic_image_store
: {
2150 std::vector
<Value
*> srcs
, defs
;
2155 TexInstruction::Target target
=
2156 convert(nir_intrinsic_image_dim(insn
), !!nir_intrinsic_image_array(insn
), false);
2157 unsigned int argCount
= getNIRArgCount(target
);
2158 uint16_t location
= 0;
2160 if (opInfo
.has_dest
) {
2161 LValues
&newDefs
= convert(&insn
->dest
);
2162 for (uint8_t i
= 0u; i
< newDefs
.size(); ++i
) {
2163 defs
.push_back(newDefs
[i
]);
2169 bool bindless
= false;
2171 case nir_intrinsic_bindless_image_atomic_add
:
2172 case nir_intrinsic_bindless_image_atomic_and
:
2173 case nir_intrinsic_bindless_image_atomic_comp_swap
:
2174 case nir_intrinsic_bindless_image_atomic_exchange
:
2175 case nir_intrinsic_bindless_image_atomic_imax
:
2176 case nir_intrinsic_bindless_image_atomic_umax
:
2177 case nir_intrinsic_bindless_image_atomic_imin
:
2178 case nir_intrinsic_bindless_image_atomic_umin
:
2179 case nir_intrinsic_bindless_image_atomic_or
:
2180 case nir_intrinsic_bindless_image_atomic_xor
:
2181 case nir_intrinsic_bindless_image_atomic_inc_wrap
:
2182 case nir_intrinsic_bindless_image_atomic_dec_wrap
:
2183 ty
= getDType(insn
);
2185 info_out
->io
.globalAccess
|= 0x2;
2188 case nir_intrinsic_image_atomic_add
:
2189 case nir_intrinsic_image_atomic_and
:
2190 case nir_intrinsic_image_atomic_comp_swap
:
2191 case nir_intrinsic_image_atomic_exchange
:
2192 case nir_intrinsic_image_atomic_imax
:
2193 case nir_intrinsic_image_atomic_umax
:
2194 case nir_intrinsic_image_atomic_imin
:
2195 case nir_intrinsic_image_atomic_umin
:
2196 case nir_intrinsic_image_atomic_or
:
2197 case nir_intrinsic_image_atomic_xor
:
2198 case nir_intrinsic_image_atomic_inc_wrap
:
2199 case nir_intrinsic_image_atomic_dec_wrap
:
2200 ty
= getDType(insn
);
2202 info_out
->io
.globalAccess
|= 0x2;
2205 case nir_intrinsic_bindless_image_load
:
2206 case nir_intrinsic_image_load
:
2208 bindless
= op
== nir_intrinsic_bindless_image_load
;
2209 info_out
->io
.globalAccess
|= 0x1;
2212 case nir_intrinsic_bindless_image_store
:
2213 case nir_intrinsic_image_store
:
2216 bindless
= op
== nir_intrinsic_bindless_image_store
;
2217 info_out
->io
.globalAccess
|= 0x2;
2221 case nir_intrinsic_bindless_image_samples
:
2223 case nir_intrinsic_image_samples
:
2225 bindless
= op
== nir_intrinsic_bindless_image_samples
;
2228 case nir_intrinsic_bindless_image_size
:
2229 case nir_intrinsic_image_size
:
2230 assert(nir_src_as_uint(insn
->src
[1]) == 0);
2232 bindless
= op
== nir_intrinsic_bindless_image_size
;
2235 unreachable("unhandled image opcode");
2240 indirect
= getSrc(&insn
->src
[0], 0);
2242 location
= getIndirect(&insn
->src
[0], 0, indirect
);
2245 if (opInfo
.num_srcs
>= 2)
2246 for (unsigned int i
= 0u; i
< argCount
; ++i
)
2247 srcs
.push_back(getSrc(&insn
->src
[1], i
));
2249 // the sampler is just another src added after coords
2250 if (opInfo
.num_srcs
>= 3 && target
.isMS())
2251 srcs
.push_back(getSrc(&insn
->src
[2], 0));
2253 if (opInfo
.num_srcs
>= 4 && lod_src
!= 4) {
2254 unsigned components
= opInfo
.src_components
[3] ? opInfo
.src_components
[3] : insn
->num_components
;
2255 for (uint8_t i
= 0u; i
< components
; ++i
)
2256 srcs
.push_back(getSrc(&insn
->src
[3], i
));
2259 if (opInfo
.num_srcs
>= 5 && lod_src
!= 5)
2260 // 1 for aotmic swap
2261 for (uint8_t i
= 0u; i
< opInfo
.src_components
[4]; ++i
)
2262 srcs
.push_back(getSrc(&insn
->src
[4], i
));
2264 TexInstruction
*texi
= mkTex(getOperation(op
), target
.getEnum(), location
, 0, defs
, srcs
);
2265 texi
->tex
.bindless
= bindless
;
2266 texi
->tex
.format
= nv50_ir::TexInstruction::translateImgFormat(nir_intrinsic_format(insn
));
2267 texi
->tex
.mask
= mask
;
2268 texi
->cache
= convert(nir_intrinsic_access(insn
));
2270 texi
->subOp
= getSubOp(op
);
2273 texi
->setIndirectR(indirect
);
2277 case nir_intrinsic_store_scratch
:
2278 case nir_intrinsic_store_shared
: {
2279 DataType sType
= getSType(insn
->src
[0], false, false);
2280 Value
*indirectOffset
;
2281 uint32_t offset
= getIndirect(&insn
->src
[1], 0, indirectOffset
);
2283 for (uint8_t i
= 0u; i
< nir_intrinsic_src_components(insn
, 0); ++i
) {
2284 if (!((1u << i
) & nir_intrinsic_write_mask(insn
)))
2286 Symbol
*sym
= mkSymbol(getFile(op
), 0, sType
, offset
+ i
* typeSizeof(sType
));
2287 mkStore(OP_STORE
, sType
, sym
, indirectOffset
, getSrc(&insn
->src
[0], i
));
2291 case nir_intrinsic_load_kernel_input
:
2292 case nir_intrinsic_load_scratch
:
2293 case nir_intrinsic_load_shared
: {
2294 const DataType dType
= getDType(insn
);
2295 LValues
&newDefs
= convert(&insn
->dest
);
2296 Value
*indirectOffset
;
2297 uint32_t offset
= getIndirect(&insn
->src
[0], 0, indirectOffset
);
2299 for (uint8_t i
= 0u; i
< dest_components
; ++i
)
2300 loadFrom(getFile(op
), 0, dType
, newDefs
[i
], offset
, i
, indirectOffset
);
2304 case nir_intrinsic_control_barrier
: {
2305 // TODO: add flag to shader_info
2306 info_out
->numBarriers
= 1;
2307 Instruction
*bar
= mkOp2(OP_BAR
, TYPE_U32
, NULL
, mkImm(0), mkImm(0));
2309 bar
->subOp
= NV50_IR_SUBOP_BAR_SYNC
;
2312 case nir_intrinsic_group_memory_barrier
:
2313 case nir_intrinsic_memory_barrier
:
2314 case nir_intrinsic_memory_barrier_buffer
:
2315 case nir_intrinsic_memory_barrier_image
:
2316 case nir_intrinsic_memory_barrier_shared
: {
2317 Instruction
*bar
= mkOp(OP_MEMBAR
, TYPE_NONE
, NULL
);
2319 bar
->subOp
= getSubOp(op
);
2322 case nir_intrinsic_memory_barrier_tcs_patch
:
2324 case nir_intrinsic_shader_clock
: {
2325 const DataType dType
= getDType(insn
);
2326 LValues
&newDefs
= convert(&insn
->dest
);
2328 loadImm(newDefs
[0], 0u);
2329 mkOp1(OP_RDSV
, dType
, newDefs
[1], mkSysVal(SV_CLOCK
, 0))->fixed
= 1;
2332 case nir_intrinsic_load_global
:
2333 case nir_intrinsic_load_global_constant
: {
2334 const DataType dType
= getDType(insn
);
2335 LValues
&newDefs
= convert(&insn
->dest
);
2336 Value
*indirectOffset
;
2337 uint32_t offset
= getIndirect(&insn
->src
[0], 0, indirectOffset
);
2339 for (auto i
= 0u; i
< dest_components
; ++i
)
2340 loadFrom(FILE_MEMORY_GLOBAL
, 0, dType
, newDefs
[i
], offset
, i
, indirectOffset
);
2342 info_out
->io
.globalAccess
|= 0x1;
2345 case nir_intrinsic_store_global
: {
2346 DataType sType
= getSType(insn
->src
[0], false, false);
2348 for (auto i
= 0u; i
< nir_intrinsic_src_components(insn
, 0); ++i
) {
2349 if (!((1u << i
) & nir_intrinsic_write_mask(insn
)))
2351 if (typeSizeof(sType
) == 8) {
2353 mkSplit(split
, 4, getSrc(&insn
->src
[0], i
));
2355 Symbol
*sym
= mkSymbol(FILE_MEMORY_GLOBAL
, 0, TYPE_U32
, i
* typeSizeof(sType
));
2356 mkStore(OP_STORE
, TYPE_U32
, sym
, getSrc(&insn
->src
[1], 0), split
[0]);
2358 sym
= mkSymbol(FILE_MEMORY_GLOBAL
, 0, TYPE_U32
, i
* typeSizeof(sType
) + 4);
2359 mkStore(OP_STORE
, TYPE_U32
, sym
, getSrc(&insn
->src
[1], 0), split
[1]);
2361 Symbol
*sym
= mkSymbol(FILE_MEMORY_GLOBAL
, 0, sType
, i
* typeSizeof(sType
));
2362 mkStore(OP_STORE
, sType
, sym
, getSrc(&insn
->src
[1], 0), getSrc(&insn
->src
[0], i
));
2366 info_out
->io
.globalAccess
|= 0x2;
2370 ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos
[op
].name
);
2378 Converter::visit(nir_jump_instr
*insn
)
2380 switch (insn
->type
) {
2381 case nir_jump_return
:
2382 // TODO: this only works in the main function
2383 mkFlow(OP_BRA
, exit
, CC_ALWAYS
, NULL
);
2384 bb
->cfg
.attach(&exit
->cfg
, Graph::Edge::CROSS
);
2386 case nir_jump_break
:
2387 case nir_jump_continue
: {
2388 bool isBreak
= insn
->type
== nir_jump_break
;
2389 nir_block
*block
= insn
->instr
.block
;
2390 BasicBlock
*target
= convert(block
->successors
[0]);
2391 mkFlow(isBreak
? OP_BREAK
: OP_CONT
, target
, CC_ALWAYS
, NULL
);
2392 bb
->cfg
.attach(&target
->cfg
, isBreak
? Graph::Edge::CROSS
: Graph::Edge::BACK
);
2396 ERROR("unknown nir_jump_type %u\n", insn
->type
);
2404 Converter::convert(nir_load_const_instr
*insn
, uint8_t idx
)
2409 setPosition(immInsertPos
, true);
2411 setPosition(bb
, false);
2413 switch (insn
->def
.bit_size
) {
2415 val
= loadImm(getSSA(8), insn
->value
[idx
].u64
);
2418 val
= loadImm(getSSA(4), insn
->value
[idx
].u32
);
2421 val
= loadImm(getSSA(2), insn
->value
[idx
].u16
);
2424 val
= loadImm(getSSA(1), insn
->value
[idx
].u8
);
2427 unreachable("unhandled bit size!\n");
2429 setPosition(bb
, true);
2434 Converter::visit(nir_load_const_instr
*insn
)
2436 assert(insn
->def
.bit_size
<= 64);
2437 immediates
[insn
->def
.index
] = insn
;
2441 #define DEFAULT_CHECKS \
2442 if (insn->dest.dest.ssa.num_components > 1) { \
2443 ERROR("nir_alu_instr only supported with 1 component!\n"); \
2446 if (insn->dest.write_mask != 1) { \
2447 ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2451 Converter::visit(nir_alu_instr
*insn
)
2453 const nir_op op
= insn
->op
;
2454 const nir_op_info
&info
= nir_op_infos
[op
];
2455 DataType dType
= getDType(insn
);
2456 const std::vector
<DataType
> sTypes
= getSTypes(insn
);
2458 Instruction
*oldPos
= this->bb
->getExit();
2469 case nir_op_fddx_coarse
:
2470 case nir_op_fddx_fine
:
2472 case nir_op_fddy_coarse
:
2473 case nir_op_fddy_fine
:
2492 case nir_op_imul_high
:
2493 case nir_op_umul_high
:
2498 case nir_op_pack_64_2x32_split
:
2513 LValues
&newDefs
= convert(&insn
->dest
);
2514 operation preOp
= preOperationNeeded(op
);
2515 if (preOp
!= OP_NOP
) {
2516 assert(info
.num_inputs
< 2);
2517 Value
*tmp
= getSSA(typeSizeof(dType
));
2518 Instruction
*i0
= mkOp(preOp
, dType
, tmp
);
2519 Instruction
*i1
= mkOp(getOperation(op
), dType
, newDefs
[0]);
2520 if (info
.num_inputs
) {
2521 i0
->setSrc(0, getSrc(&insn
->src
[0]));
2524 i1
->subOp
= getSubOp(op
);
2526 Instruction
*i
= mkOp(getOperation(op
), dType
, newDefs
[0]);
2527 for (unsigned s
= 0u; s
< info
.num_inputs
; ++s
) {
2528 i
->setSrc(s
, getSrc(&insn
->src
[s
]));
2530 i
->subOp
= getSubOp(op
);
2534 case nir_op_ifind_msb
:
2535 case nir_op_ufind_msb
: {
2537 LValues
&newDefs
= convert(&insn
->dest
);
2539 mkOp1(getOperation(op
), dType
, newDefs
[0], getSrc(&insn
->src
[0]));
2542 case nir_op_fround_even
: {
2544 LValues
&newDefs
= convert(&insn
->dest
);
2545 mkCvt(OP_CVT
, dType
, newDefs
[0], dType
, getSrc(&insn
->src
[0]))->rnd
= ROUND_NI
;
2548 // convert instructions
2562 case nir_op_u2u64
: {
2564 LValues
&newDefs
= convert(&insn
->dest
);
2565 Instruction
*i
= mkOp1(getOperation(op
), dType
, newDefs
[0], getSrc(&insn
->src
[0]));
2566 if (op
== nir_op_f2i32
|| op
== nir_op_f2i64
|| op
== nir_op_f2u32
|| op
== nir_op_f2u64
)
2568 i
->sType
= sTypes
[0];
2571 // compare instructions
2581 case nir_op_ine32
: {
2583 LValues
&newDefs
= convert(&insn
->dest
);
2584 Instruction
*i
= mkCmp(getOperation(op
),
2589 getSrc(&insn
->src
[0]),
2590 getSrc(&insn
->src
[1]));
2591 if (info
.num_inputs
== 3)
2592 i
->setSrc(2, getSrc(&insn
->src
[2]));
2593 i
->sType
= sTypes
[0];
2601 case nir_op_vec16
: {
2602 LValues
&newDefs
= convert(&insn
->dest
);
2603 for (LValues::size_type c
= 0u; c
< newDefs
.size(); ++c
) {
2604 mkMov(newDefs
[c
], getSrc(&insn
->src
[c
]), dType
);
2609 case nir_op_pack_64_2x32
: {
2610 LValues
&newDefs
= convert(&insn
->dest
);
2611 Instruction
*merge
= mkOp(OP_MERGE
, dType
, newDefs
[0]);
2612 merge
->setSrc(0, getSrc(&insn
->src
[0], 0));
2613 merge
->setSrc(1, getSrc(&insn
->src
[0], 1));
2616 case nir_op_pack_half_2x16_split
: {
2617 LValues
&newDefs
= convert(&insn
->dest
);
2618 Value
*tmpH
= getSSA();
2619 Value
*tmpL
= getSSA();
2621 mkCvt(OP_CVT
, TYPE_F16
, tmpL
, TYPE_F32
, getSrc(&insn
->src
[0]));
2622 mkCvt(OP_CVT
, TYPE_F16
, tmpH
, TYPE_F32
, getSrc(&insn
->src
[1]));
2623 mkOp3(OP_INSBF
, TYPE_U32
, newDefs
[0], tmpH
, mkImm(0x1010), tmpL
);
2626 case nir_op_unpack_half_2x16_split_x
:
2627 case nir_op_unpack_half_2x16_split_y
: {
2628 LValues
&newDefs
= convert(&insn
->dest
);
2629 Instruction
*cvt
= mkCvt(OP_CVT
, TYPE_F32
, newDefs
[0], TYPE_F16
, getSrc(&insn
->src
[0]));
2630 if (op
== nir_op_unpack_half_2x16_split_y
)
2634 case nir_op_unpack_64_2x32
: {
2635 LValues
&newDefs
= convert(&insn
->dest
);
2636 mkOp1(OP_SPLIT
, dType
, newDefs
[0], getSrc(&insn
->src
[0]))->setDef(1, newDefs
[1]);
2639 case nir_op_unpack_64_2x32_split_x
: {
2640 LValues
&newDefs
= convert(&insn
->dest
);
2641 mkOp1(OP_SPLIT
, dType
, newDefs
[0], getSrc(&insn
->src
[0]))->setDef(1, getSSA());
2644 case nir_op_unpack_64_2x32_split_y
: {
2645 LValues
&newDefs
= convert(&insn
->dest
);
2646 mkOp1(OP_SPLIT
, dType
, getSSA(), getSrc(&insn
->src
[0]))->setDef(1, newDefs
[0]);
2649 // special instructions
2651 case nir_op_isign
: {
2654 if (::isFloatType(dType
))
2659 LValues
&newDefs
= convert(&insn
->dest
);
2660 LValue
*val0
= getScratch();
2661 LValue
*val1
= getScratch();
2662 mkCmp(OP_SET
, CC_GT
, iType
, val0
, dType
, getSrc(&insn
->src
[0]), zero
);
2663 mkCmp(OP_SET
, CC_LT
, iType
, val1
, dType
, getSrc(&insn
->src
[0]), zero
);
2665 if (dType
== TYPE_F64
) {
2666 mkOp2(OP_SUB
, iType
, val0
, val0
, val1
);
2667 mkCvt(OP_CVT
, TYPE_F64
, newDefs
[0], iType
, val0
);
2668 } else if (dType
== TYPE_S64
|| dType
== TYPE_U64
) {
2669 mkOp2(OP_SUB
, iType
, val0
, val1
, val0
);
2670 mkOp2(OP_SHR
, iType
, val1
, val0
, loadImm(NULL
, 31));
2671 mkOp2(OP_MERGE
, dType
, newDefs
[0], val0
, val1
);
2672 } else if (::isFloatType(dType
))
2673 mkOp2(OP_SUB
, iType
, newDefs
[0], val0
, val1
);
2675 mkOp2(OP_SUB
, iType
, newDefs
[0], val1
, val0
);
2679 case nir_op_b32csel
: {
2681 LValues
&newDefs
= convert(&insn
->dest
);
2682 mkCmp(OP_SLCT
, CC_NE
, dType
, newDefs
[0], sTypes
[0], getSrc(&insn
->src
[1]), getSrc(&insn
->src
[2]), getSrc(&insn
->src
[0]));
2685 case nir_op_ibitfield_extract
:
2686 case nir_op_ubitfield_extract
: {
2688 Value
*tmp
= getSSA();
2689 LValues
&newDefs
= convert(&insn
->dest
);
2690 mkOp3(OP_INSBF
, dType
, tmp
, getSrc(&insn
->src
[2]), loadImm(NULL
, 0x808), getSrc(&insn
->src
[1]));
2691 mkOp2(OP_EXTBF
, dType
, newDefs
[0], getSrc(&insn
->src
[0]), tmp
);
2696 LValues
&newDefs
= convert(&insn
->dest
);
2697 mkOp2(OP_BMSK
, dType
, newDefs
[0], getSrc(&insn
->src
[1]), getSrc(&insn
->src
[0]))->subOp
= NV50_IR_SUBOP_BMSK_W
;
2700 case nir_op_bitfield_insert
: {
2702 LValues
&newDefs
= convert(&insn
->dest
);
2703 LValue
*temp
= getSSA();
2704 mkOp3(OP_INSBF
, TYPE_U32
, temp
, getSrc(&insn
->src
[3]), mkImm(0x808), getSrc(&insn
->src
[2]));
2705 mkOp3(OP_INSBF
, dType
, newDefs
[0], getSrc(&insn
->src
[1]), temp
, getSrc(&insn
->src
[0]));
2708 case nir_op_bit_count
: {
2710 LValues
&newDefs
= convert(&insn
->dest
);
2711 mkOp2(OP_POPCNT
, dType
, newDefs
[0], getSrc(&insn
->src
[0]), getSrc(&insn
->src
[0]));
2714 case nir_op_bitfield_reverse
: {
2716 LValues
&newDefs
= convert(&insn
->dest
);
2717 mkOp1(OP_BREV
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]));
2720 case nir_op_find_lsb
: {
2722 LValues
&newDefs
= convert(&insn
->dest
);
2723 Value
*tmp
= getSSA();
2724 mkOp1(OP_BREV
, TYPE_U32
, tmp
, getSrc(&insn
->src
[0]));
2725 mkOp1(OP_BFIND
, TYPE_U32
, newDefs
[0], tmp
)->subOp
= NV50_IR_SUBOP_BFIND_SAMT
;
2728 case nir_op_extract_u8
: {
2730 LValues
&newDefs
= convert(&insn
->dest
);
2731 Value
*prmt
= getSSA();
2732 mkOp2(OP_OR
, TYPE_U32
, prmt
, getSrc(&insn
->src
[1]), loadImm(NULL
, 0x4440));
2733 mkOp3(OP_PERMT
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), prmt
, loadImm(NULL
, 0));
2736 case nir_op_extract_i8
: {
2738 LValues
&newDefs
= convert(&insn
->dest
);
2739 Value
*prmt
= getSSA();
2740 mkOp3(OP_MAD
, TYPE_U32
, prmt
, getSrc(&insn
->src
[1]), loadImm(NULL
, 0x1111), loadImm(NULL
, 0x8880));
2741 mkOp3(OP_PERMT
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), prmt
, loadImm(NULL
, 0));
2744 case nir_op_extract_u16
: {
2746 LValues
&newDefs
= convert(&insn
->dest
);
2747 Value
*prmt
= getSSA();
2748 mkOp3(OP_MAD
, TYPE_U32
, prmt
, getSrc(&insn
->src
[1]), loadImm(NULL
, 0x22), loadImm(NULL
, 0x4410));
2749 mkOp3(OP_PERMT
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), prmt
, loadImm(NULL
, 0));
2752 case nir_op_extract_i16
: {
2754 LValues
&newDefs
= convert(&insn
->dest
);
2755 Value
*prmt
= getSSA();
2756 mkOp3(OP_MAD
, TYPE_U32
, prmt
, getSrc(&insn
->src
[1]), loadImm(NULL
, 0x2222), loadImm(NULL
, 0x9910));
2757 mkOp3(OP_PERMT
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), prmt
, loadImm(NULL
, 0));
2762 LValues
&newDefs
= convert(&insn
->dest
);
2763 mkOp3(OP_SHF
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]),
2764 getSrc(&insn
->src
[1]), getSrc(&insn
->src
[0]))
2765 ->subOp
= NV50_IR_SUBOP_SHF_L
|
2766 NV50_IR_SUBOP_SHF_W
|
2767 NV50_IR_SUBOP_SHF_HI
;
2772 LValues
&newDefs
= convert(&insn
->dest
);
2773 mkOp3(OP_SHF
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]),
2774 getSrc(&insn
->src
[1]), getSrc(&insn
->src
[0]))
2775 ->subOp
= NV50_IR_SUBOP_SHF_R
|
2776 NV50_IR_SUBOP_SHF_W
|
2777 NV50_IR_SUBOP_SHF_LO
;
2780 // boolean conversions
2781 case nir_op_b2f32
: {
2783 LValues
&newDefs
= convert(&insn
->dest
);
2784 mkOp2(OP_AND
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), loadImm(NULL
, 1.0f
));
2787 case nir_op_b2f64
: {
2789 LValues
&newDefs
= convert(&insn
->dest
);
2790 Value
*tmp
= getSSA(4);
2791 mkOp2(OP_AND
, TYPE_U32
, tmp
, getSrc(&insn
->src
[0]), loadImm(NULL
, 0x3ff00000));
2792 mkOp2(OP_MERGE
, TYPE_U64
, newDefs
[0], loadImm(NULL
, 0), tmp
);
2796 case nir_op_i2b32
: {
2798 LValues
&newDefs
= convert(&insn
->dest
);
2800 if (typeSizeof(sTypes
[0]) == 8) {
2801 src1
= loadImm(getSSA(8), 0.0);
2805 CondCode cc
= op
== nir_op_f2b32
? CC_NEU
: CC_NE
;
2806 mkCmp(OP_SET
, cc
, TYPE_U32
, newDefs
[0], sTypes
[0], getSrc(&insn
->src
[0]), src1
);
2809 case nir_op_b2i32
: {
2811 LValues
&newDefs
= convert(&insn
->dest
);
2812 mkOp2(OP_AND
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), loadImm(NULL
, 1));
2815 case nir_op_b2i64
: {
2817 LValues
&newDefs
= convert(&insn
->dest
);
2818 LValue
*def
= getScratch();
2819 mkOp2(OP_AND
, TYPE_U32
, def
, getSrc(&insn
->src
[0]), loadImm(NULL
, 1));
2820 mkOp2(OP_MERGE
, TYPE_S64
, newDefs
[0], def
, loadImm(NULL
, 0));
2824 ERROR("unknown nir_op %s\n", info
.name
);
2830 oldPos
= this->bb
->getEntry();
2831 oldPos
->precise
= insn
->exact
;
2834 if (unlikely(!oldPos
))
2837 while (oldPos
->next
) {
2838 oldPos
= oldPos
->next
;
2839 oldPos
->precise
= insn
->exact
;
2841 oldPos
->saturate
= insn
->dest
.saturate
;
2845 #undef DEFAULT_CHECKS
2848 Converter::visit(nir_ssa_undef_instr
*insn
)
2850 LValues
&newDefs
= convert(&insn
->def
);
2851 for (uint8_t i
= 0u; i
< insn
->def
.num_components
; ++i
) {
2852 mkOp(OP_NOP
, TYPE_NONE
, newDefs
[i
]);
2857 #define CASE_SAMPLER(ty) \
2858 case GLSL_SAMPLER_DIM_ ## ty : \
2859 if (isArray && !isShadow) \
2860 return TEX_TARGET_ ## ty ## _ARRAY; \
2861 else if (!isArray && isShadow) \
2862 return TEX_TARGET_## ty ## _SHADOW; \
2863 else if (isArray && isShadow) \
2864 return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
2866 return TEX_TARGET_ ## ty
2869 Converter::convert(glsl_sampler_dim dim
, bool isArray
, bool isShadow
)
2875 case GLSL_SAMPLER_DIM_3D
:
2876 return TEX_TARGET_3D
;
2877 case GLSL_SAMPLER_DIM_MS
:
2879 return TEX_TARGET_2D_MS_ARRAY
;
2880 return TEX_TARGET_2D_MS
;
2881 case GLSL_SAMPLER_DIM_RECT
:
2883 return TEX_TARGET_RECT_SHADOW
;
2884 return TEX_TARGET_RECT
;
2885 case GLSL_SAMPLER_DIM_BUF
:
2886 return TEX_TARGET_BUFFER
;
2887 case GLSL_SAMPLER_DIM_EXTERNAL
:
2888 return TEX_TARGET_2D
;
2890 ERROR("unknown glsl_sampler_dim %u\n", dim
);
2892 return TEX_TARGET_COUNT
;
2898 Converter::applyProjection(Value
*src
, Value
*proj
)
2902 return mkOp2v(OP_MUL
, TYPE_F32
, getScratch(), src
, proj
);
2906 Converter::getNIRArgCount(TexInstruction::Target
& target
)
2908 unsigned int result
= target
.getArgCount();
2909 if (target
.isCube() && target
.isArray())
2917 Converter::convert(enum gl_access_qualifier access
)
2919 if (access
& ACCESS_VOLATILE
)
2921 if (access
& ACCESS_COHERENT
)
2927 Converter::visit(nir_tex_instr
*insn
)
2931 case nir_texop_query_levels
:
2933 case nir_texop_texture_samples
:
2938 case nir_texop_txf_ms
:
2940 case nir_texop_txs
: {
2941 LValues
&newDefs
= convert(&insn
->dest
);
2942 std::vector
<Value
*> srcs
;
2943 std::vector
<Value
*> defs
;
2944 std::vector
<nir_src
*> offsets
;
2948 TexInstruction::Target target
= convert(insn
->sampler_dim
, insn
->is_array
, insn
->is_shadow
);
2949 operation op
= getOperation(insn
->op
);
2952 int biasIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_bias
);
2953 int compIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_comparator
);
2954 int coordsIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_coord
);
2955 int ddxIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_ddx
);
2956 int ddyIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_ddy
);
2957 int msIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_ms_index
);
2958 int lodIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_lod
);
2959 int offsetIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_offset
);
2960 int projIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_projector
);
2961 int sampOffIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_sampler_offset
);
2962 int texOffIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_texture_offset
);
2963 int sampHandleIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_sampler_handle
);
2964 int texHandleIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_texture_handle
);
2966 bool bindless
= sampHandleIdx
!= -1 || texHandleIdx
!= -1;
2967 assert((sampHandleIdx
!= -1) == (texHandleIdx
!= -1));
2970 proj
= mkOp1v(OP_RCP
, TYPE_F32
, getScratch(), getSrc(&insn
->src
[projIdx
].src
, 0));
2972 srcs
.resize(insn
->coord_components
);
2973 for (uint8_t i
= 0u; i
< insn
->coord_components
; ++i
)
2974 srcs
[i
] = applyProjection(getSrc(&insn
->src
[coordsIdx
].src
, i
), proj
);
2976 // sometimes we get less args than target.getArgCount, but codegen expects the latter
2977 if (insn
->coord_components
) {
2978 uint32_t argCount
= target
.getArgCount();
2983 for (uint32_t i
= 0u; i
< (argCount
- insn
->coord_components
); ++i
)
2984 srcs
.push_back(getSSA());
2987 if (insn
->op
== nir_texop_texture_samples
)
2988 srcs
.push_back(zero
);
2989 else if (!insn
->num_srcs
)
2990 srcs
.push_back(loadImm(NULL
, 0));
2992 srcs
.push_back(getSrc(&insn
->src
[biasIdx
].src
, 0));
2994 srcs
.push_back(getSrc(&insn
->src
[lodIdx
].src
, 0));
2995 else if (op
== OP_TXF
)
2998 srcs
.push_back(getSrc(&insn
->src
[msIdx
].src
, 0));
2999 if (offsetIdx
!= -1)
3000 offsets
.push_back(&insn
->src
[offsetIdx
].src
);
3002 srcs
.push_back(applyProjection(getSrc(&insn
->src
[compIdx
].src
, 0), proj
));
3003 if (texOffIdx
!= -1) {
3004 srcs
.push_back(getSrc(&insn
->src
[texOffIdx
].src
, 0));
3005 texOffIdx
= srcs
.size() - 1;
3007 if (sampOffIdx
!= -1) {
3008 srcs
.push_back(getSrc(&insn
->src
[sampOffIdx
].src
, 0));
3009 sampOffIdx
= srcs
.size() - 1;
3012 // currently we use the lower bits
3014 Value
*handle
= getSrc(&insn
->src
[sampHandleIdx
].src
, 0);
3016 mkSplit(split
, 4, handle
);
3018 srcs
.push_back(split
[0]);
3019 texOffIdx
= srcs
.size() - 1;
3022 r
= bindless
? 0xff : insn
->texture_index
;
3023 s
= bindless
? 0x1f : insn
->sampler_index
;
3025 defs
.resize(newDefs
.size());
3026 for (uint8_t d
= 0u; d
< newDefs
.size(); ++d
) {
3027 defs
[d
] = newDefs
[d
];
3030 if (target
.isMS() || (op
== OP_TEX
&& prog
->getType() != Program::TYPE_FRAGMENT
))
3033 TexInstruction
*texi
= mkTex(op
, target
.getEnum(), r
, s
, defs
, srcs
);
3034 texi
->tex
.levelZero
= lz
;
3035 texi
->tex
.mask
= mask
;
3036 texi
->tex
.bindless
= bindless
;
3038 if (texOffIdx
!= -1)
3039 texi
->tex
.rIndirectSrc
= texOffIdx
;
3040 if (sampOffIdx
!= -1)
3041 texi
->tex
.sIndirectSrc
= sampOffIdx
;
3045 if (!target
.isShadow())
3046 texi
->tex
.gatherComp
= insn
->component
;
3049 texi
->tex
.query
= TXQ_DIMS
;
3051 case nir_texop_texture_samples
:
3052 texi
->tex
.mask
= 0x4;
3053 texi
->tex
.query
= TXQ_TYPE
;
3055 case nir_texop_query_levels
:
3056 texi
->tex
.mask
= 0x8;
3057 texi
->tex
.query
= TXQ_DIMS
;
3063 texi
->tex
.useOffsets
= offsets
.size();
3064 if (texi
->tex
.useOffsets
) {
3065 for (uint8_t s
= 0; s
< texi
->tex
.useOffsets
; ++s
) {
3066 for (uint32_t c
= 0u; c
< 3; ++c
) {
3067 uint8_t s2
= std::min(c
, target
.getDim() - 1);
3068 texi
->offset
[s
][c
].set(getSrc(offsets
[s
], s2
));
3069 texi
->offset
[s
][c
].setInsn(texi
);
3074 if (op
== OP_TXG
&& offsetIdx
== -1) {
3075 if (nir_tex_instr_has_explicit_tg4_offsets(insn
)) {
3076 texi
->tex
.useOffsets
= 4;
3077 setPosition(texi
, false);
3078 for (uint8_t i
= 0; i
< 4; ++i
) {
3079 for (uint8_t j
= 0; j
< 2; ++j
) {
3080 texi
->offset
[i
][j
].set(loadImm(NULL
, insn
->tg4_offsets
[i
][j
]));
3081 texi
->offset
[i
][j
].setInsn(texi
);
3084 setPosition(texi
, true);
3088 if (ddxIdx
!= -1 && ddyIdx
!= -1) {
3089 for (uint8_t c
= 0u; c
< target
.getDim() + target
.isCube(); ++c
) {
3090 texi
->dPdx
[c
].set(getSrc(&insn
->src
[ddxIdx
].src
, c
));
3091 texi
->dPdy
[c
].set(getSrc(&insn
->src
[ddyIdx
].src
, c
));
3098 ERROR("unknown nir_texop %u\n", insn
->op
);
3109 if (prog
->dbgFlags
& NV50_IR_DEBUG_VERBOSE
)
3110 nir_print_shader(nir
, stderr
);
3112 struct nir_lower_subgroups_options subgroup_options
= {
3113 .subgroup_size
= 32,
3114 .ballot_bit_size
= 32,
3117 /* prepare for IO lowering */
3118 NIR_PASS_V(nir
, nir_opt_deref
);
3119 NIR_PASS_V(nir
, nir_lower_regs_to_ssa
);
3120 NIR_PASS_V(nir
, nir_lower_vars_to_ssa
);
3122 /* codegen assumes vec4 alignment for memory */
3123 NIR_PASS_V(nir
, nir_lower_vars_to_explicit_types
, nir_var_function_temp
, function_temp_type_info
);
3124 NIR_PASS_V(nir
, nir_lower_explicit_io
, nir_var_function_temp
, nir_address_format_32bit_offset
);
3125 NIR_PASS_V(nir
, nir_remove_dead_variables
, nir_var_function_temp
, NULL
);
3127 NIR_PASS_V(nir
, nir_lower_io
, nir_var_shader_in
| nir_var_shader_out
,
3128 type_size
, (nir_lower_io_options
)0);
3130 NIR_PASS_V(nir
, nir_lower_subgroups
, &subgroup_options
);
3132 NIR_PASS_V(nir
, nir_lower_load_const_to_scalar
);
3133 NIR_PASS_V(nir
, nir_lower_alu_to_scalar
, NULL
, NULL
);
3134 NIR_PASS_V(nir
, nir_lower_phis_to_scalar
);
3136 /*TODO: improve this lowering/optimisation loop so that we can use
3137 * nir_opt_idiv_const effectively before this.
3139 NIR_PASS(progress
, nir
, nir_lower_idiv
, nir_lower_idiv_precise
);
3143 NIR_PASS(progress
, nir
, nir_copy_prop
);
3144 NIR_PASS(progress
, nir
, nir_opt_remove_phis
);
3145 NIR_PASS(progress
, nir
, nir_opt_trivial_continues
);
3146 NIR_PASS(progress
, nir
, nir_opt_cse
);
3147 NIR_PASS(progress
, nir
, nir_opt_algebraic
);
3148 NIR_PASS(progress
, nir
, nir_opt_constant_folding
);
3149 NIR_PASS(progress
, nir
, nir_copy_prop
);
3150 NIR_PASS(progress
, nir
, nir_opt_dce
);
3151 NIR_PASS(progress
, nir
, nir_opt_dead_cf
);
3154 NIR_PASS_V(nir
, nir_lower_bool_to_int32
);
3155 NIR_PASS_V(nir
, nir_convert_from_ssa
, true);
3157 // Garbage collect dead instructions
3161 ERROR("Couldn't prase NIR!\n");
3165 if (!assignSlots()) {
3166 ERROR("Couldn't assign slots!\n");
3170 if (prog
->dbgFlags
& NV50_IR_DEBUG_BASIC
)
3171 nir_print_shader(nir
, stderr
);
3173 nir_foreach_function(function
, nir
) {
3174 if (!visit(function
))
3181 } // unnamed namespace
3186 Program::makeFromNIR(struct nv50_ir_prog_info
*info
,
3187 struct nv50_ir_prog_info_out
*info_out
)
3189 nir_shader
*nir
= (nir_shader
*)info
->bin
.source
;
3190 Converter
converter(this, nir
, info
, info_out
);
3191 bool result
= converter
.run();
3194 LoweringHelper lowering
;
3196 tlsSize
= info_out
->bin
.tlsSpace
;
3200 } // namespace nv50_ir
3202 static nir_shader_compiler_options
3203 nvir_nir_shader_compiler_options(int chipset
)
3205 nir_shader_compiler_options op
= {};
3206 op
.lower_fdiv
= (chipset
>= NVISA_GV100_CHIPSET
);
3207 op
.lower_ffma
= false;
3208 op
.fuse_ffma
= false; /* nir doesn't track mad vs fma */
3209 op
.lower_flrp16
= (chipset
>= NVISA_GV100_CHIPSET
);
3210 op
.lower_flrp32
= true;
3211 op
.lower_flrp64
= true;
3212 op
.lower_fpow
= false; // TODO: nir's lowering is broken, or we could use it
3213 op
.lower_fsat
= false;
3214 op
.lower_fsqrt
= false; // TODO: only before gm200
3215 op
.lower_sincos
= false;
3216 op
.lower_fmod
= true;
3217 op
.lower_bitfield_extract
= false;
3218 op
.lower_bitfield_extract_to_shifts
= (chipset
>= NVISA_GV100_CHIPSET
);
3219 op
.lower_bitfield_insert
= false;
3220 op
.lower_bitfield_insert_to_shifts
= (chipset
>= NVISA_GV100_CHIPSET
);
3221 op
.lower_bitfield_insert_to_bitfield_select
= false;
3222 op
.lower_bitfield_reverse
= false;
3223 op
.lower_bit_count
= false;
3224 op
.lower_ifind_msb
= false;
3225 op
.lower_find_lsb
= false;
3226 op
.lower_uadd_carry
= true; // TODO
3227 op
.lower_usub_borrow
= true; // TODO
3228 op
.lower_mul_high
= false;
3229 op
.lower_negate
= false;
3230 op
.lower_sub
= true;
3231 op
.lower_scmp
= true; // TODO: not implemented yet
3232 op
.lower_vector_cmp
= false;
3233 op
.lower_idiv
= true;
3234 op
.lower_bitops
= false;
3235 op
.lower_isign
= (chipset
>= NVISA_GV100_CHIPSET
);
3236 op
.lower_fsign
= (chipset
>= NVISA_GV100_CHIPSET
);
3237 op
.lower_fdph
= false;
3238 op
.lower_fdot
= false;
3239 op
.fdot_replicates
= false; // TODO
3240 op
.lower_ffloor
= false; // TODO
3241 op
.lower_ffract
= true;
3242 op
.lower_fceil
= false; // TODO
3243 op
.lower_ftrunc
= false;
3244 op
.lower_ldexp
= true;
3245 op
.lower_pack_half_2x16
= true;
3246 op
.lower_pack_unorm_2x16
= true;
3247 op
.lower_pack_snorm_2x16
= true;
3248 op
.lower_pack_unorm_4x8
= true;
3249 op
.lower_pack_snorm_4x8
= true;
3250 op
.lower_unpack_half_2x16
= true;
3251 op
.lower_unpack_unorm_2x16
= true;
3252 op
.lower_unpack_snorm_2x16
= true;
3253 op
.lower_unpack_unorm_4x8
= true;
3254 op
.lower_unpack_snorm_4x8
= true;
3255 op
.lower_pack_split
= false;
3256 op
.lower_extract_byte
= (chipset
< NVISA_GM107_CHIPSET
);
3257 op
.lower_extract_word
= (chipset
< NVISA_GM107_CHIPSET
);
3258 op
.lower_all_io_to_temps
= false;
3259 op
.lower_all_io_to_elements
= false;
3260 op
.vertex_id_zero_based
= false;
3261 op
.lower_base_vertex
= false;
3262 op
.lower_helper_invocation
= false;
3263 op
.optimize_sample_mask_in
= false;
3264 op
.lower_cs_local_index_from_id
= true;
3265 op
.lower_cs_local_id_from_index
= false;
3266 op
.lower_device_index_to_zero
= false; // TODO
3267 op
.lower_wpos_pntc
= false; // TODO
3268 op
.lower_hadd
= true; // TODO
3269 op
.lower_add_sat
= true; // TODO
3270 op
.vectorize_io
= false;
3271 op
.lower_to_scalar
= false;
3272 op
.unify_interfaces
= false;
3273 op
.use_interpolated_input_intrinsics
= true;
3274 op
.lower_mul_2x32_64
= true; // TODO
3275 op
.lower_rotate
= (chipset
< NVISA_GV100_CHIPSET
);
3276 op
.has_imul24
= false;
3277 op
.intel_vec4
= false;
3278 op
.max_unroll_iterations
= 32;
3279 op
.lower_int64_options
= (nir_lower_int64_options
) (
3280 ((chipset
>= NVISA_GV100_CHIPSET
) ? nir_lower_imul64
: 0) |
3281 ((chipset
>= NVISA_GV100_CHIPSET
) ? nir_lower_isign64
: 0) |
3282 nir_lower_divmod64
|
3283 ((chipset
>= NVISA_GV100_CHIPSET
) ? nir_lower_imul_high64
: 0) |
3284 ((chipset
>= NVISA_GV100_CHIPSET
) ? nir_lower_mov64
: 0) |
3285 ((chipset
>= NVISA_GV100_CHIPSET
) ? nir_lower_icmp64
: 0) |
3286 ((chipset
>= NVISA_GV100_CHIPSET
) ? nir_lower_iabs64
: 0) |
3287 ((chipset
>= NVISA_GV100_CHIPSET
) ? nir_lower_ineg64
: 0) |
3288 ((chipset
>= NVISA_GV100_CHIPSET
) ? nir_lower_logic64
: 0) |
3289 ((chipset
>= NVISA_GV100_CHIPSET
) ? nir_lower_minmax64
: 0) |
3290 ((chipset
>= NVISA_GV100_CHIPSET
) ? nir_lower_shift64
: 0) |
3291 ((chipset
>= NVISA_GV100_CHIPSET
) ? nir_lower_imul_2x32_64
: 0) |
3292 ((chipset
>= NVISA_GM107_CHIPSET
) ? nir_lower_extract64
: 0) |
3293 nir_lower_ufind_msb64
3295 op
.lower_doubles_options
= (nir_lower_doubles_options
) (
3296 ((chipset
>= NVISA_GV100_CHIPSET
) ? nir_lower_drcp
: 0) |
3297 ((chipset
>= NVISA_GV100_CHIPSET
) ? nir_lower_dsqrt
: 0) |
3298 ((chipset
>= NVISA_GV100_CHIPSET
) ? nir_lower_drsq
: 0) |
3299 ((chipset
>= NVISA_GV100_CHIPSET
) ? nir_lower_dfract
: 0) |
3301 ((chipset
>= NVISA_GV100_CHIPSET
) ? nir_lower_dsub
: 0) |
3302 ((chipset
>= NVISA_GV100_CHIPSET
) ? nir_lower_ddiv
: 0)
3307 static const nir_shader_compiler_options gf100_nir_shader_compiler_options
=
3308 nvir_nir_shader_compiler_options(NVISA_GF100_CHIPSET
);
3309 static const nir_shader_compiler_options gm107_nir_shader_compiler_options
=
3310 nvir_nir_shader_compiler_options(NVISA_GM107_CHIPSET
);
3311 static const nir_shader_compiler_options gv100_nir_shader_compiler_options
=
3312 nvir_nir_shader_compiler_options(NVISA_GV100_CHIPSET
);
3314 const nir_shader_compiler_options
*
3315 nv50_ir_nir_shader_compiler_options(int chipset
)
3317 if (chipset
>= NVISA_GV100_CHIPSET
)
3318 return &gv100_nir_shader_compiler_options
;
3319 if (chipset
>= NVISA_GM107_CHIPSET
)
3320 return &gm107_nir_shader_compiler_options
;
3321 return &gf100_nir_shader_compiler_options
;