2 * Copyright 2017 Red Hat Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Karol Herbst <kherbst@redhat.com>
25 #include "compiler/nir/nir.h"
27 #include "util/u_debug.h"
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_from_common.h"
31 #include "codegen/nv50_ir_lowering_helper.h"
32 #include "codegen/nv50_ir_util.h"
34 #if __cplusplus >= 201103L
35 #include <unordered_map>
37 #include <tr1/unordered_map>
45 #if __cplusplus >= 201103L
47 using std::unordered_map
;
50 using std::tr1::unordered_map
;
53 using namespace nv50_ir
;
56 type_size(const struct glsl_type
*type
, bool bindless
)
58 return glsl_count_attribute_slots(type
, false);
61 class Converter
: public ConverterCommon
64 Converter(Program
*, nir_shader
*, nv50_ir_prog_info
*);
68 typedef std::vector
<LValue
*> LValues
;
69 typedef unordered_map
<unsigned, LValues
> NirDefMap
;
70 typedef unordered_map
<unsigned, nir_load_const_instr
*> ImmediateMap
;
71 typedef unordered_map
<unsigned, uint32_t> NirArrayLMemOffsets
;
72 typedef unordered_map
<unsigned, BasicBlock
*> NirBlockMap
;
74 CacheMode
convert(enum gl_access_qualifier
);
75 TexTarget
convert(glsl_sampler_dim
, bool isArray
, bool isShadow
);
76 LValues
& convert(nir_alu_dest
*);
77 BasicBlock
* convert(nir_block
*);
78 LValues
& convert(nir_dest
*);
79 SVSemantic
convert(nir_intrinsic_op
);
80 Value
* convert(nir_load_const_instr
*, uint8_t);
81 LValues
& convert(nir_register
*);
82 LValues
& convert(nir_ssa_def
*);
84 ImgFormat
convertGLImgFormat(GLuint
);
86 Value
* getSrc(nir_alu_src
*, uint8_t component
= 0);
87 Value
* getSrc(nir_register
*, uint8_t);
88 Value
* getSrc(nir_src
*, uint8_t, bool indirect
= false);
89 Value
* getSrc(nir_ssa_def
*, uint8_t);
91 // returned value is the constant part of the given source (either the
92 // nir_src or the selected source component of an intrinsic). Even though
93 // this is mostly an optimization to be able to skip indirects in a few
94 // cases, sometimes we require immediate values or set some fileds on
95 // instructions (e.g. tex) in order for codegen to consume those.
96 // If the found value has not a constant part, the Value gets returned
97 // through the Value parameter.
98 uint32_t getIndirect(nir_src
*, uint8_t, Value
*&);
99 // isScalar indicates that the addressing is scalar, vec4 addressing is
101 uint32_t getIndirect(nir_intrinsic_instr
*, uint8_t s
, uint8_t c
, Value
*&,
102 bool isScalar
= false);
104 uint32_t getSlotAddress(nir_intrinsic_instr
*, uint8_t idx
, uint8_t slot
);
106 void setInterpolate(nv50_ir_varying
*,
111 Instruction
*loadFrom(DataFile
, uint8_t, DataType
, Value
*def
, uint32_t base
,
112 uint8_t c
, Value
*indirect0
= NULL
,
113 Value
*indirect1
= NULL
, bool patch
= false);
114 void storeTo(nir_intrinsic_instr
*, DataFile
, operation
, DataType
,
115 Value
*src
, uint8_t idx
, uint8_t c
, Value
*indirect0
= NULL
,
116 Value
*indirect1
= NULL
);
118 bool isFloatType(nir_alu_type
);
119 bool isSignedType(nir_alu_type
);
120 bool isResultFloat(nir_op
);
121 bool isResultSigned(nir_op
);
123 DataType
getDType(nir_alu_instr
*);
124 DataType
getDType(nir_intrinsic_instr
*);
125 DataType
getDType(nir_intrinsic_instr
*, bool isSigned
);
126 DataType
getDType(nir_op
, uint8_t);
128 std::vector
<DataType
> getSTypes(nir_alu_instr
*);
129 DataType
getSType(nir_src
&, bool isFloat
, bool isSigned
);
131 operation
getOperation(nir_intrinsic_op
);
132 operation
getOperation(nir_op
);
133 operation
getOperation(nir_texop
);
134 operation
preOperationNeeded(nir_op
);
136 int getSubOp(nir_intrinsic_op
);
137 int getSubOp(nir_op
);
139 CondCode
getCondCode(nir_op
);
144 bool visit(nir_alu_instr
*);
145 bool visit(nir_block
*);
146 bool visit(nir_cf_node
*);
147 bool visit(nir_deref_instr
*);
148 bool visit(nir_function
*);
149 bool visit(nir_if
*);
150 bool visit(nir_instr
*);
151 bool visit(nir_intrinsic_instr
*);
152 bool visit(nir_jump_instr
*);
153 bool visit(nir_load_const_instr
*);
154 bool visit(nir_loop
*);
155 bool visit(nir_ssa_undef_instr
*);
156 bool visit(nir_tex_instr
*);
159 Value
* applyProjection(Value
*src
, Value
*proj
);
160 unsigned int getNIRArgCount(TexInstruction::Target
&);
163 uint16_t handleDeref(nir_deref_instr
*, Value
* & indirect
, const nir_variable
* &);
164 CacheMode
getCacheModeFromVar(const nir_variable
*);
170 ImmediateMap immediates
;
171 NirArrayLMemOffsets regToLmemOffset
;
173 unsigned int curLoopDepth
;
177 Instruction
*immInsertPos
;
179 int clipVertexOutput
;
188 Converter::Converter(Program
*prog
, nir_shader
*nir
, nv50_ir_prog_info
*info
)
189 : ConverterCommon(prog
, info
),
194 zero
= mkImm((uint32_t)0);
198 Converter::convert(nir_block
*block
)
200 NirBlockMap::iterator it
= blocks
.find(block
->index
);
201 if (it
!= blocks
.end())
204 BasicBlock
*bb
= new BasicBlock(func
);
205 blocks
[block
->index
] = bb
;
210 Converter::isFloatType(nir_alu_type type
)
212 return nir_alu_type_get_base_type(type
) == nir_type_float
;
216 Converter::isSignedType(nir_alu_type type
)
218 return nir_alu_type_get_base_type(type
) == nir_type_int
;
222 Converter::isResultFloat(nir_op op
)
224 const nir_op_info
&info
= nir_op_infos
[op
];
225 if (info
.output_type
!= nir_type_invalid
)
226 return isFloatType(info
.output_type
);
228 ERROR("isResultFloat not implemented for %s\n", nir_op_infos
[op
].name
);
234 Converter::isResultSigned(nir_op op
)
237 // there is no umul and we get wrong results if we treat all muls as signed
242 const nir_op_info
&info
= nir_op_infos
[op
];
243 if (info
.output_type
!= nir_type_invalid
)
244 return isSignedType(info
.output_type
);
245 ERROR("isResultSigned not implemented for %s\n", nir_op_infos
[op
].name
);
252 Converter::getDType(nir_alu_instr
*insn
)
254 if (insn
->dest
.dest
.is_ssa
)
255 return getDType(insn
->op
, insn
->dest
.dest
.ssa
.bit_size
);
257 return getDType(insn
->op
, insn
->dest
.dest
.reg
.reg
->bit_size
);
261 Converter::getDType(nir_intrinsic_instr
*insn
)
264 switch (insn
->intrinsic
) {
265 case nir_intrinsic_shared_atomic_imax
:
266 case nir_intrinsic_shared_atomic_imin
:
267 case nir_intrinsic_ssbo_atomic_imax
:
268 case nir_intrinsic_ssbo_atomic_imin
:
276 return getDType(insn
, isSigned
);
280 Converter::getDType(nir_intrinsic_instr
*insn
, bool isSigned
)
282 if (insn
->dest
.is_ssa
)
283 return typeOfSize(insn
->dest
.ssa
.bit_size
/ 8, false, isSigned
);
285 return typeOfSize(insn
->dest
.reg
.reg
->bit_size
/ 8, false, isSigned
);
289 Converter::getDType(nir_op op
, uint8_t bitSize
)
291 DataType ty
= typeOfSize(bitSize
/ 8, isResultFloat(op
), isResultSigned(op
));
292 if (ty
== TYPE_NONE
) {
293 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos
[op
].name
, bitSize
);
299 std::vector
<DataType
>
300 Converter::getSTypes(nir_alu_instr
*insn
)
302 const nir_op_info
&info
= nir_op_infos
[insn
->op
];
303 std::vector
<DataType
> res(info
.num_inputs
);
305 for (uint8_t i
= 0; i
< info
.num_inputs
; ++i
) {
306 if (info
.input_types
[i
] != nir_type_invalid
) {
307 res
[i
] = getSType(insn
->src
[i
].src
, isFloatType(info
.input_types
[i
]), isSignedType(info
.input_types
[i
]));
309 ERROR("getSType not implemented for %s idx %u\n", info
.name
, i
);
320 Converter::getSType(nir_src
&src
, bool isFloat
, bool isSigned
)
324 bitSize
= src
.ssa
->bit_size
;
326 bitSize
= src
.reg
.reg
->bit_size
;
328 DataType ty
= typeOfSize(bitSize
/ 8, isFloat
, isSigned
);
329 if (ty
== TYPE_NONE
) {
337 ERROR("couldn't get Type for %s with bitSize %u\n", str
, bitSize
);
344 Converter::getOperation(nir_op op
)
347 // basic ops with float and int variants
356 case nir_op_ifind_msb
:
357 case nir_op_ufind_msb
:
379 case nir_op_fddx_coarse
:
380 case nir_op_fddx_fine
:
383 case nir_op_fddy_coarse
:
384 case nir_op_fddy_fine
:
402 case nir_op_pack_64_2x32_split
:
416 case nir_op_imul_high
:
417 case nir_op_umul_high
:
459 ERROR("couldn't get operation for op %s\n", nir_op_infos
[op
].name
);
466 Converter::getOperation(nir_texop op
)
478 case nir_texop_txf_ms
:
484 case nir_texop_query_levels
:
485 case nir_texop_texture_samples
:
489 ERROR("couldn't get operation for nir_texop %u\n", op
);
496 Converter::getOperation(nir_intrinsic_op op
)
499 case nir_intrinsic_emit_vertex
:
501 case nir_intrinsic_end_primitive
:
503 case nir_intrinsic_bindless_image_atomic_add
:
504 case nir_intrinsic_image_atomic_add
:
505 case nir_intrinsic_image_deref_atomic_add
:
506 case nir_intrinsic_bindless_image_atomic_and
:
507 case nir_intrinsic_image_atomic_and
:
508 case nir_intrinsic_image_deref_atomic_and
:
509 case nir_intrinsic_bindless_image_atomic_comp_swap
:
510 case nir_intrinsic_image_atomic_comp_swap
:
511 case nir_intrinsic_image_deref_atomic_comp_swap
:
512 case nir_intrinsic_bindless_image_atomic_exchange
:
513 case nir_intrinsic_image_atomic_exchange
:
514 case nir_intrinsic_image_deref_atomic_exchange
:
515 case nir_intrinsic_bindless_image_atomic_imax
:
516 case nir_intrinsic_image_atomic_imax
:
517 case nir_intrinsic_image_deref_atomic_imax
:
518 case nir_intrinsic_bindless_image_atomic_umax
:
519 case nir_intrinsic_image_atomic_umax
:
520 case nir_intrinsic_image_deref_atomic_umax
:
521 case nir_intrinsic_bindless_image_atomic_imin
:
522 case nir_intrinsic_image_atomic_imin
:
523 case nir_intrinsic_image_deref_atomic_imin
:
524 case nir_intrinsic_bindless_image_atomic_umin
:
525 case nir_intrinsic_image_atomic_umin
:
526 case nir_intrinsic_image_deref_atomic_umin
:
527 case nir_intrinsic_bindless_image_atomic_or
:
528 case nir_intrinsic_image_atomic_or
:
529 case nir_intrinsic_image_deref_atomic_or
:
530 case nir_intrinsic_bindless_image_atomic_xor
:
531 case nir_intrinsic_image_atomic_xor
:
532 case nir_intrinsic_image_deref_atomic_xor
:
534 case nir_intrinsic_bindless_image_load
:
535 case nir_intrinsic_image_load
:
536 case nir_intrinsic_image_deref_load
:
538 case nir_intrinsic_bindless_image_samples
:
539 case nir_intrinsic_image_samples
:
540 case nir_intrinsic_image_deref_samples
:
541 case nir_intrinsic_bindless_image_size
:
542 case nir_intrinsic_image_size
:
543 case nir_intrinsic_image_deref_size
:
545 case nir_intrinsic_bindless_image_store
:
546 case nir_intrinsic_image_store
:
547 case nir_intrinsic_image_deref_store
:
550 ERROR("couldn't get operation for nir_intrinsic_op %u\n", op
);
557 Converter::preOperationNeeded(nir_op op
)
569 Converter::getSubOp(nir_op op
)
572 case nir_op_imul_high
:
573 case nir_op_umul_high
:
574 return NV50_IR_SUBOP_MUL_HIGH
;
581 Converter::getSubOp(nir_intrinsic_op op
)
584 case nir_intrinsic_bindless_image_atomic_add
:
585 case nir_intrinsic_global_atomic_add
:
586 case nir_intrinsic_image_atomic_add
:
587 case nir_intrinsic_image_deref_atomic_add
:
588 case nir_intrinsic_shared_atomic_add
:
589 case nir_intrinsic_ssbo_atomic_add
:
590 return NV50_IR_SUBOP_ATOM_ADD
;
591 case nir_intrinsic_bindless_image_atomic_and
:
592 case nir_intrinsic_global_atomic_and
:
593 case nir_intrinsic_image_atomic_and
:
594 case nir_intrinsic_image_deref_atomic_and
:
595 case nir_intrinsic_shared_atomic_and
:
596 case nir_intrinsic_ssbo_atomic_and
:
597 return NV50_IR_SUBOP_ATOM_AND
;
598 case nir_intrinsic_bindless_image_atomic_comp_swap
:
599 case nir_intrinsic_global_atomic_comp_swap
:
600 case nir_intrinsic_image_atomic_comp_swap
:
601 case nir_intrinsic_image_deref_atomic_comp_swap
:
602 case nir_intrinsic_shared_atomic_comp_swap
:
603 case nir_intrinsic_ssbo_atomic_comp_swap
:
604 return NV50_IR_SUBOP_ATOM_CAS
;
605 case nir_intrinsic_bindless_image_atomic_exchange
:
606 case nir_intrinsic_global_atomic_exchange
:
607 case nir_intrinsic_image_atomic_exchange
:
608 case nir_intrinsic_image_deref_atomic_exchange
:
609 case nir_intrinsic_shared_atomic_exchange
:
610 case nir_intrinsic_ssbo_atomic_exchange
:
611 return NV50_IR_SUBOP_ATOM_EXCH
;
612 case nir_intrinsic_bindless_image_atomic_or
:
613 case nir_intrinsic_global_atomic_or
:
614 case nir_intrinsic_image_atomic_or
:
615 case nir_intrinsic_image_deref_atomic_or
:
616 case nir_intrinsic_shared_atomic_or
:
617 case nir_intrinsic_ssbo_atomic_or
:
618 return NV50_IR_SUBOP_ATOM_OR
;
619 case nir_intrinsic_bindless_image_atomic_imax
:
620 case nir_intrinsic_bindless_image_atomic_umax
:
621 case nir_intrinsic_global_atomic_imax
:
622 case nir_intrinsic_global_atomic_umax
:
623 case nir_intrinsic_image_atomic_imax
:
624 case nir_intrinsic_image_atomic_umax
:
625 case nir_intrinsic_image_deref_atomic_imax
:
626 case nir_intrinsic_image_deref_atomic_umax
:
627 case nir_intrinsic_shared_atomic_imax
:
628 case nir_intrinsic_shared_atomic_umax
:
629 case nir_intrinsic_ssbo_atomic_imax
:
630 case nir_intrinsic_ssbo_atomic_umax
:
631 return NV50_IR_SUBOP_ATOM_MAX
;
632 case nir_intrinsic_bindless_image_atomic_imin
:
633 case nir_intrinsic_bindless_image_atomic_umin
:
634 case nir_intrinsic_global_atomic_imin
:
635 case nir_intrinsic_global_atomic_umin
:
636 case nir_intrinsic_image_atomic_imin
:
637 case nir_intrinsic_image_atomic_umin
:
638 case nir_intrinsic_image_deref_atomic_imin
:
639 case nir_intrinsic_image_deref_atomic_umin
:
640 case nir_intrinsic_shared_atomic_imin
:
641 case nir_intrinsic_shared_atomic_umin
:
642 case nir_intrinsic_ssbo_atomic_imin
:
643 case nir_intrinsic_ssbo_atomic_umin
:
644 return NV50_IR_SUBOP_ATOM_MIN
;
645 case nir_intrinsic_bindless_image_atomic_xor
:
646 case nir_intrinsic_global_atomic_xor
:
647 case nir_intrinsic_image_atomic_xor
:
648 case nir_intrinsic_image_deref_atomic_xor
:
649 case nir_intrinsic_shared_atomic_xor
:
650 case nir_intrinsic_ssbo_atomic_xor
:
651 return NV50_IR_SUBOP_ATOM_XOR
;
653 case nir_intrinsic_group_memory_barrier
:
654 case nir_intrinsic_memory_barrier
:
655 case nir_intrinsic_memory_barrier_buffer
:
656 case nir_intrinsic_memory_barrier_image
:
657 return NV50_IR_SUBOP_MEMBAR(M
, GL
);
658 case nir_intrinsic_memory_barrier_shared
:
659 return NV50_IR_SUBOP_MEMBAR(M
, CTA
);
661 case nir_intrinsic_vote_all
:
662 return NV50_IR_SUBOP_VOTE_ALL
;
663 case nir_intrinsic_vote_any
:
664 return NV50_IR_SUBOP_VOTE_ANY
;
665 case nir_intrinsic_vote_ieq
:
666 return NV50_IR_SUBOP_VOTE_UNI
;
673 Converter::getCondCode(nir_op op
)
692 ERROR("couldn't get CondCode for op %s\n", nir_op_infos
[op
].name
);
699 Converter::convert(nir_alu_dest
*dest
)
701 return convert(&dest
->dest
);
705 Converter::convert(nir_dest
*dest
)
708 return convert(&dest
->ssa
);
709 if (dest
->reg
.indirect
) {
710 ERROR("no support for indirects.");
713 return convert(dest
->reg
.reg
);
717 Converter::convert(nir_register
*reg
)
719 NirDefMap::iterator it
= regDefs
.find(reg
->index
);
720 if (it
!= regDefs
.end())
723 LValues
newDef(reg
->num_components
);
724 for (uint8_t i
= 0; i
< reg
->num_components
; i
++)
725 newDef
[i
] = getScratch(std::max(4, reg
->bit_size
/ 8));
726 return regDefs
[reg
->index
] = newDef
;
730 Converter::convert(nir_ssa_def
*def
)
732 NirDefMap::iterator it
= ssaDefs
.find(def
->index
);
733 if (it
!= ssaDefs
.end())
736 LValues
newDef(def
->num_components
);
737 for (uint8_t i
= 0; i
< def
->num_components
; i
++)
738 newDef
[i
] = getSSA(std::max(4, def
->bit_size
/ 8));
739 return ssaDefs
[def
->index
] = newDef
;
743 Converter::getSrc(nir_alu_src
*src
, uint8_t component
)
745 if (src
->abs
|| src
->negate
) {
746 ERROR("modifiers currently not supported on nir_alu_src\n");
749 return getSrc(&src
->src
, src
->swizzle
[component
]);
753 Converter::getSrc(nir_register
*reg
, uint8_t idx
)
755 NirDefMap::iterator it
= regDefs
.find(reg
->index
);
756 if (it
== regDefs
.end())
757 return convert(reg
)[idx
];
758 return it
->second
[idx
];
762 Converter::getSrc(nir_src
*src
, uint8_t idx
, bool indirect
)
765 return getSrc(src
->ssa
, idx
);
767 if (src
->reg
.indirect
) {
769 return getSrc(src
->reg
.indirect
, idx
);
770 ERROR("no support for indirects.");
775 return getSrc(src
->reg
.reg
, idx
);
779 Converter::getSrc(nir_ssa_def
*src
, uint8_t idx
)
781 ImmediateMap::iterator iit
= immediates
.find(src
->index
);
782 if (iit
!= immediates
.end())
783 return convert((*iit
).second
, idx
);
785 NirDefMap::iterator it
= ssaDefs
.find(src
->index
);
786 if (it
== ssaDefs
.end()) {
787 ERROR("SSA value %u not found\n", src
->index
);
791 return it
->second
[idx
];
795 Converter::getIndirect(nir_src
*src
, uint8_t idx
, Value
*&indirect
)
797 nir_const_value
*offset
= nir_src_as_const_value(*src
);
801 return offset
[0].u32
;
804 indirect
= getSrc(src
, idx
, true);
809 Converter::getIndirect(nir_intrinsic_instr
*insn
, uint8_t s
, uint8_t c
, Value
*&indirect
, bool isScalar
)
811 int32_t idx
= nir_intrinsic_base(insn
) + getIndirect(&insn
->src
[s
], c
, indirect
);
812 if (indirect
&& !isScalar
)
813 indirect
= mkOp2v(OP_SHL
, TYPE_U32
, getSSA(4, FILE_ADDRESS
), indirect
, loadImm(NULL
, 4));
818 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot
, unsigned *name
, unsigned *index
)
820 assert(name
&& index
);
822 if (slot
>= VERT_ATTRIB_MAX
) {
823 ERROR("invalid varying slot %u\n", slot
);
828 if (slot
>= VERT_ATTRIB_GENERIC0
&&
829 slot
< VERT_ATTRIB_GENERIC0
+ VERT_ATTRIB_GENERIC_MAX
) {
830 *name
= TGSI_SEMANTIC_GENERIC
;
831 *index
= slot
- VERT_ATTRIB_GENERIC0
;
835 if (slot
>= VERT_ATTRIB_TEX0
&&
836 slot
< VERT_ATTRIB_TEX0
+ VERT_ATTRIB_TEX_MAX
) {
837 *name
= TGSI_SEMANTIC_TEXCOORD
;
838 *index
= slot
- VERT_ATTRIB_TEX0
;
843 case VERT_ATTRIB_COLOR0
:
844 *name
= TGSI_SEMANTIC_COLOR
;
847 case VERT_ATTRIB_COLOR1
:
848 *name
= TGSI_SEMANTIC_COLOR
;
851 case VERT_ATTRIB_EDGEFLAG
:
852 *name
= TGSI_SEMANTIC_EDGEFLAG
;
855 case VERT_ATTRIB_FOG
:
856 *name
= TGSI_SEMANTIC_FOG
;
859 case VERT_ATTRIB_NORMAL
:
860 *name
= TGSI_SEMANTIC_NORMAL
;
863 case VERT_ATTRIB_POS
:
864 *name
= TGSI_SEMANTIC_POSITION
;
867 case VERT_ATTRIB_POINT_SIZE
:
868 *name
= TGSI_SEMANTIC_PSIZE
;
872 ERROR("unknown vert attrib slot %u\n", slot
);
879 varying_slot_to_tgsi_semantic(gl_varying_slot slot
, unsigned *name
, unsigned *index
)
881 assert(name
&& index
);
883 if (slot
>= VARYING_SLOT_TESS_MAX
) {
884 ERROR("invalid varying slot %u\n", slot
);
889 if (slot
>= VARYING_SLOT_PATCH0
) {
890 *name
= TGSI_SEMANTIC_PATCH
;
891 *index
= slot
- VARYING_SLOT_PATCH0
;
895 if (slot
>= VARYING_SLOT_VAR0
) {
896 *name
= TGSI_SEMANTIC_GENERIC
;
897 *index
= slot
- VARYING_SLOT_VAR0
;
901 if (slot
>= VARYING_SLOT_TEX0
&& slot
<= VARYING_SLOT_TEX7
) {
902 *name
= TGSI_SEMANTIC_TEXCOORD
;
903 *index
= slot
- VARYING_SLOT_TEX0
;
908 case VARYING_SLOT_BFC0
:
909 *name
= TGSI_SEMANTIC_BCOLOR
;
912 case VARYING_SLOT_BFC1
:
913 *name
= TGSI_SEMANTIC_BCOLOR
;
916 case VARYING_SLOT_CLIP_DIST0
:
917 *name
= TGSI_SEMANTIC_CLIPDIST
;
920 case VARYING_SLOT_CLIP_DIST1
:
921 *name
= TGSI_SEMANTIC_CLIPDIST
;
924 case VARYING_SLOT_CLIP_VERTEX
:
925 *name
= TGSI_SEMANTIC_CLIPVERTEX
;
928 case VARYING_SLOT_COL0
:
929 *name
= TGSI_SEMANTIC_COLOR
;
932 case VARYING_SLOT_COL1
:
933 *name
= TGSI_SEMANTIC_COLOR
;
936 case VARYING_SLOT_EDGE
:
937 *name
= TGSI_SEMANTIC_EDGEFLAG
;
940 case VARYING_SLOT_FACE
:
941 *name
= TGSI_SEMANTIC_FACE
;
944 case VARYING_SLOT_FOGC
:
945 *name
= TGSI_SEMANTIC_FOG
;
948 case VARYING_SLOT_LAYER
:
949 *name
= TGSI_SEMANTIC_LAYER
;
952 case VARYING_SLOT_PNTC
:
953 *name
= TGSI_SEMANTIC_PCOORD
;
956 case VARYING_SLOT_POS
:
957 *name
= TGSI_SEMANTIC_POSITION
;
960 case VARYING_SLOT_PRIMITIVE_ID
:
961 *name
= TGSI_SEMANTIC_PRIMID
;
964 case VARYING_SLOT_PSIZ
:
965 *name
= TGSI_SEMANTIC_PSIZE
;
968 case VARYING_SLOT_TESS_LEVEL_INNER
:
969 *name
= TGSI_SEMANTIC_TESSINNER
;
972 case VARYING_SLOT_TESS_LEVEL_OUTER
:
973 *name
= TGSI_SEMANTIC_TESSOUTER
;
976 case VARYING_SLOT_VIEWPORT
:
977 *name
= TGSI_SEMANTIC_VIEWPORT_INDEX
;
981 ERROR("unknown varying slot %u\n", slot
);
988 frag_result_to_tgsi_semantic(unsigned slot
, unsigned *name
, unsigned *index
)
990 if (slot
>= FRAG_RESULT_DATA0
) {
991 *name
= TGSI_SEMANTIC_COLOR
;
992 *index
= slot
- FRAG_RESULT_COLOR
- 2; // intentional
997 case FRAG_RESULT_COLOR
:
998 *name
= TGSI_SEMANTIC_COLOR
;
1001 case FRAG_RESULT_DEPTH
:
1002 *name
= TGSI_SEMANTIC_POSITION
;
1005 case FRAG_RESULT_SAMPLE_MASK
:
1006 *name
= TGSI_SEMANTIC_SAMPLEMASK
;
1010 ERROR("unknown frag result slot %u\n", slot
);
1016 // copy of _mesa_sysval_to_semantic
1018 system_val_to_tgsi_semantic(unsigned val
, unsigned *name
, unsigned *index
)
1023 case SYSTEM_VALUE_VERTEX_ID
:
1024 *name
= TGSI_SEMANTIC_VERTEXID
;
1026 case SYSTEM_VALUE_INSTANCE_ID
:
1027 *name
= TGSI_SEMANTIC_INSTANCEID
;
1029 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE
:
1030 *name
= TGSI_SEMANTIC_VERTEXID_NOBASE
;
1032 case SYSTEM_VALUE_BASE_VERTEX
:
1033 *name
= TGSI_SEMANTIC_BASEVERTEX
;
1035 case SYSTEM_VALUE_BASE_INSTANCE
:
1036 *name
= TGSI_SEMANTIC_BASEINSTANCE
;
1038 case SYSTEM_VALUE_DRAW_ID
:
1039 *name
= TGSI_SEMANTIC_DRAWID
;
1043 case SYSTEM_VALUE_INVOCATION_ID
:
1044 *name
= TGSI_SEMANTIC_INVOCATIONID
;
1048 case SYSTEM_VALUE_FRAG_COORD
:
1049 *name
= TGSI_SEMANTIC_POSITION
;
1051 case SYSTEM_VALUE_FRONT_FACE
:
1052 *name
= TGSI_SEMANTIC_FACE
;
1054 case SYSTEM_VALUE_SAMPLE_ID
:
1055 *name
= TGSI_SEMANTIC_SAMPLEID
;
1057 case SYSTEM_VALUE_SAMPLE_POS
:
1058 *name
= TGSI_SEMANTIC_SAMPLEPOS
;
1060 case SYSTEM_VALUE_SAMPLE_MASK_IN
:
1061 *name
= TGSI_SEMANTIC_SAMPLEMASK
;
1063 case SYSTEM_VALUE_HELPER_INVOCATION
:
1064 *name
= TGSI_SEMANTIC_HELPER_INVOCATION
;
1067 // Tessellation shader
1068 case SYSTEM_VALUE_TESS_COORD
:
1069 *name
= TGSI_SEMANTIC_TESSCOORD
;
1071 case SYSTEM_VALUE_VERTICES_IN
:
1072 *name
= TGSI_SEMANTIC_VERTICESIN
;
1074 case SYSTEM_VALUE_PRIMITIVE_ID
:
1075 *name
= TGSI_SEMANTIC_PRIMID
;
1077 case SYSTEM_VALUE_TESS_LEVEL_OUTER
:
1078 *name
= TGSI_SEMANTIC_TESSOUTER
;
1080 case SYSTEM_VALUE_TESS_LEVEL_INNER
:
1081 *name
= TGSI_SEMANTIC_TESSINNER
;
1085 case SYSTEM_VALUE_LOCAL_INVOCATION_ID
:
1086 *name
= TGSI_SEMANTIC_THREAD_ID
;
1088 case SYSTEM_VALUE_WORK_GROUP_ID
:
1089 *name
= TGSI_SEMANTIC_BLOCK_ID
;
1091 case SYSTEM_VALUE_NUM_WORK_GROUPS
:
1092 *name
= TGSI_SEMANTIC_GRID_SIZE
;
1094 case SYSTEM_VALUE_LOCAL_GROUP_SIZE
:
1095 *name
= TGSI_SEMANTIC_BLOCK_SIZE
;
1098 // ARB_shader_ballot
1099 case SYSTEM_VALUE_SUBGROUP_SIZE
:
1100 *name
= TGSI_SEMANTIC_SUBGROUP_SIZE
;
1102 case SYSTEM_VALUE_SUBGROUP_INVOCATION
:
1103 *name
= TGSI_SEMANTIC_SUBGROUP_INVOCATION
;
1105 case SYSTEM_VALUE_SUBGROUP_EQ_MASK
:
1106 *name
= TGSI_SEMANTIC_SUBGROUP_EQ_MASK
;
1108 case SYSTEM_VALUE_SUBGROUP_GE_MASK
:
1109 *name
= TGSI_SEMANTIC_SUBGROUP_GE_MASK
;
1111 case SYSTEM_VALUE_SUBGROUP_GT_MASK
:
1112 *name
= TGSI_SEMANTIC_SUBGROUP_GT_MASK
;
1114 case SYSTEM_VALUE_SUBGROUP_LE_MASK
:
1115 *name
= TGSI_SEMANTIC_SUBGROUP_LE_MASK
;
1117 case SYSTEM_VALUE_SUBGROUP_LT_MASK
:
1118 *name
= TGSI_SEMANTIC_SUBGROUP_LT_MASK
;
1122 ERROR("unknown system value %u\n", val
);
1129 Converter::setInterpolate(nv50_ir_varying
*var
,
1135 case INTERP_MODE_FLAT
:
1138 case INTERP_MODE_NONE
:
1139 if (semantic
== TGSI_SEMANTIC_COLOR
)
1141 else if (semantic
== TGSI_SEMANTIC_POSITION
)
1144 case INTERP_MODE_NOPERSPECTIVE
:
1147 case INTERP_MODE_SMOOTH
:
1150 var
->centroid
= centroid
;
1154 calcSlots(const glsl_type
*type
, Program::Type stage
, const shader_info
&info
,
1155 bool input
, const nir_variable
*var
)
1157 if (!type
->is_array())
1158 return type
->count_attribute_slots(false);
1162 case Program::TYPE_GEOMETRY
:
1163 slots
= type
->uniform_locations();
1165 slots
/= info
.gs
.vertices_in
;
1167 case Program::TYPE_TESSELLATION_CONTROL
:
1168 case Program::TYPE_TESSELLATION_EVAL
:
1169 // remove first dimension
1170 if (var
->data
.patch
|| (!input
&& stage
== Program::TYPE_TESSELLATION_EVAL
))
1171 slots
= type
->uniform_locations();
1173 slots
= type
->fields
.array
->uniform_locations();
1176 slots
= type
->count_attribute_slots(false);
1183 bool Converter::assignSlots() {
1187 info
->io
.viewportId
= -1;
1188 info
->numInputs
= 0;
1189 info
->numOutputs
= 0;
1191 // we have to fixup the uniform locations for arrays
1192 unsigned numImages
= 0;
1193 nir_foreach_variable(var
, &nir
->uniforms
) {
1194 const glsl_type
*type
= var
->type
;
1195 if (!type
->without_array()->is_image())
1197 var
->data
.driver_location
= numImages
;
1198 numImages
+= type
->is_array() ? type
->arrays_of_arrays_size() : 1;
1201 info
->numSysVals
= 0;
1202 for (uint8_t i
= 0; i
< SYSTEM_VALUE_MAX
; ++i
) {
1203 if (!(nir
->info
.system_values_read
& 1ull << i
))
1206 system_val_to_tgsi_semantic(i
, &name
, &index
);
1207 info
->sv
[info
->numSysVals
].sn
= name
;
1208 info
->sv
[info
->numSysVals
].si
= index
;
1209 info
->sv
[info
->numSysVals
].input
= 0; // TODO inferSysValDirection(sn);
1212 case SYSTEM_VALUE_INSTANCE_ID
:
1213 info
->io
.instanceId
= info
->numSysVals
;
1215 case SYSTEM_VALUE_TESS_LEVEL_INNER
:
1216 case SYSTEM_VALUE_TESS_LEVEL_OUTER
:
1217 info
->sv
[info
->numSysVals
].patch
= 1;
1219 case SYSTEM_VALUE_VERTEX_ID
:
1220 info
->io
.vertexId
= info
->numSysVals
;
1226 info
->numSysVals
+= 1;
1229 if (prog
->getType() == Program::TYPE_COMPUTE
)
1232 nir_foreach_variable(var
, &nir
->inputs
) {
1233 const glsl_type
*type
= var
->type
;
1234 int slot
= var
->data
.location
;
1235 uint16_t slots
= calcSlots(type
, prog
->getType(), nir
->info
, true, var
);
1236 uint32_t comp
= type
->is_array() ? type
->without_array()->component_slots()
1237 : type
->component_slots();
1238 uint32_t frac
= var
->data
.location_frac
;
1239 uint32_t vary
= var
->data
.driver_location
;
1241 if (glsl_base_type_is_64bit(type
->without_array()->base_type
)) {
1246 assert(vary
+ slots
<= PIPE_MAX_SHADER_INPUTS
);
1248 switch(prog
->getType()) {
1249 case Program::TYPE_FRAGMENT
:
1250 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
1251 for (uint16_t i
= 0; i
< slots
; ++i
) {
1252 setInterpolate(&info
->in
[vary
+ i
], var
->data
.interpolation
,
1253 var
->data
.centroid
| var
->data
.sample
, name
);
1256 case Program::TYPE_GEOMETRY
:
1257 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
1259 case Program::TYPE_TESSELLATION_CONTROL
:
1260 case Program::TYPE_TESSELLATION_EVAL
:
1261 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
1262 if (var
->data
.patch
&& name
== TGSI_SEMANTIC_PATCH
)
1263 info
->numPatchConstants
= MAX2(info
->numPatchConstants
, index
+ slots
);
1265 case Program::TYPE_VERTEX
:
1266 vert_attrib_to_tgsi_semantic((gl_vert_attrib
)slot
, &name
, &index
);
1268 case TGSI_SEMANTIC_EDGEFLAG
:
1269 info
->io
.edgeFlagIn
= vary
;
1276 ERROR("unknown shader type %u in assignSlots\n", prog
->getType());
1280 for (uint16_t i
= 0u; i
< slots
; ++i
, ++vary
) {
1281 info
->in
[vary
].id
= vary
;
1282 info
->in
[vary
].patch
= var
->data
.patch
;
1283 info
->in
[vary
].sn
= name
;
1284 info
->in
[vary
].si
= index
+ i
;
1285 if (glsl_base_type_is_64bit(type
->without_array()->base_type
))
1287 info
->in
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) >> 0x4);
1289 info
->in
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) & 0xf);
1291 info
->in
[vary
].mask
|= ((1 << comp
) - 1) << frac
;
1293 info
->numInputs
= std::max
<uint8_t>(info
->numInputs
, vary
);
1296 nir_foreach_variable(var
, &nir
->outputs
) {
1297 const glsl_type
*type
= var
->type
;
1298 int slot
= var
->data
.location
;
1299 uint16_t slots
= calcSlots(type
, prog
->getType(), nir
->info
, false, var
);
1300 uint32_t comp
= type
->is_array() ? type
->without_array()->component_slots()
1301 : type
->component_slots();
1302 uint32_t frac
= var
->data
.location_frac
;
1303 uint32_t vary
= var
->data
.driver_location
;
1305 if (glsl_base_type_is_64bit(type
->without_array()->base_type
)) {
1310 assert(vary
< PIPE_MAX_SHADER_OUTPUTS
);
1312 switch(prog
->getType()) {
1313 case Program::TYPE_FRAGMENT
:
1314 frag_result_to_tgsi_semantic((gl_frag_result
)slot
, &name
, &index
);
1316 case TGSI_SEMANTIC_COLOR
:
1317 if (!var
->data
.fb_fetch_output
)
1318 info
->prop
.fp
.numColourResults
++;
1319 info
->prop
.fp
.separateFragData
= true;
1320 // sometimes we get FRAG_RESULT_DATAX with data.index 0
1321 // sometimes we get FRAG_RESULT_DATA0 with data.index X
1322 index
= index
== 0 ? var
->data
.index
: index
;
1324 case TGSI_SEMANTIC_POSITION
:
1325 info
->io
.fragDepth
= vary
;
1326 info
->prop
.fp
.writesDepth
= true;
1328 case TGSI_SEMANTIC_SAMPLEMASK
:
1329 info
->io
.sampleMask
= vary
;
1335 case Program::TYPE_GEOMETRY
:
1336 case Program::TYPE_TESSELLATION_CONTROL
:
1337 case Program::TYPE_TESSELLATION_EVAL
:
1338 case Program::TYPE_VERTEX
:
1339 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
1341 if (var
->data
.patch
&& name
!= TGSI_SEMANTIC_TESSINNER
&&
1342 name
!= TGSI_SEMANTIC_TESSOUTER
)
1343 info
->numPatchConstants
= MAX2(info
->numPatchConstants
, index
+ slots
);
1346 case TGSI_SEMANTIC_CLIPDIST
:
1347 info
->io
.genUserClip
= -1;
1349 case TGSI_SEMANTIC_CLIPVERTEX
:
1350 clipVertexOutput
= vary
;
1352 case TGSI_SEMANTIC_EDGEFLAG
:
1353 info
->io
.edgeFlagOut
= vary
;
1355 case TGSI_SEMANTIC_POSITION
:
1356 if (clipVertexOutput
< 0)
1357 clipVertexOutput
= vary
;
1364 ERROR("unknown shader type %u in assignSlots\n", prog
->getType());
1368 for (uint16_t i
= 0u; i
< slots
; ++i
, ++vary
) {
1369 info
->out
[vary
].id
= vary
;
1370 info
->out
[vary
].patch
= var
->data
.patch
;
1371 info
->out
[vary
].sn
= name
;
1372 info
->out
[vary
].si
= index
+ i
;
1373 if (glsl_base_type_is_64bit(type
->without_array()->base_type
))
1375 info
->out
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) >> 0x4);
1377 info
->out
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) & 0xf);
1379 info
->out
[vary
].mask
|= ((1 << comp
) - 1) << frac
;
1381 if (nir
->info
.outputs_read
& 1ull << slot
)
1382 info
->out
[vary
].oread
= 1;
1384 info
->numOutputs
= std::max
<uint8_t>(info
->numOutputs
, vary
);
1387 if (info
->io
.genUserClip
> 0) {
1388 info
->io
.clipDistances
= info
->io
.genUserClip
;
1390 const unsigned int nOut
= (info
->io
.genUserClip
+ 3) / 4;
1392 for (unsigned int n
= 0; n
< nOut
; ++n
) {
1393 unsigned int i
= info
->numOutputs
++;
1394 info
->out
[i
].id
= i
;
1395 info
->out
[i
].sn
= TGSI_SEMANTIC_CLIPDIST
;
1396 info
->out
[i
].si
= n
;
1397 info
->out
[i
].mask
= ((1 << info
->io
.clipDistances
) - 1) >> (n
* 4);
1401 return info
->assignSlots(info
) == 0;
1405 Converter::getSlotAddress(nir_intrinsic_instr
*insn
, uint8_t idx
, uint8_t slot
)
1408 int offset
= nir_intrinsic_component(insn
);
1411 if (nir_intrinsic_infos
[insn
->intrinsic
].has_dest
)
1412 ty
= getDType(insn
);
1414 ty
= getSType(insn
->src
[0], false, false);
1416 switch (insn
->intrinsic
) {
1417 case nir_intrinsic_load_input
:
1418 case nir_intrinsic_load_interpolated_input
:
1419 case nir_intrinsic_load_per_vertex_input
:
1422 case nir_intrinsic_load_output
:
1423 case nir_intrinsic_load_per_vertex_output
:
1424 case nir_intrinsic_store_output
:
1425 case nir_intrinsic_store_per_vertex_output
:
1429 ERROR("unknown intrinsic in getSlotAddress %s",
1430 nir_intrinsic_infos
[insn
->intrinsic
].name
);
1436 if (typeSizeof(ty
) == 8) {
1448 assert(!input
|| idx
< PIPE_MAX_SHADER_INPUTS
);
1449 assert(input
|| idx
< PIPE_MAX_SHADER_OUTPUTS
);
1451 const nv50_ir_varying
*vary
= input
? info
->in
: info
->out
;
1452 return vary
[idx
].slot
[slot
] * 4;
1456 Converter::loadFrom(DataFile file
, uint8_t i
, DataType ty
, Value
*def
,
1457 uint32_t base
, uint8_t c
, Value
*indirect0
,
1458 Value
*indirect1
, bool patch
)
1460 unsigned int tySize
= typeSizeof(ty
);
1463 (file
== FILE_MEMORY_CONST
|| file
== FILE_MEMORY_BUFFER
|| indirect0
)) {
1464 Value
*lo
= getSSA();
1465 Value
*hi
= getSSA();
1468 mkLoad(TYPE_U32
, lo
,
1469 mkSymbol(file
, i
, TYPE_U32
, base
+ c
* tySize
),
1471 loi
->setIndirect(0, 1, indirect1
);
1472 loi
->perPatch
= patch
;
1475 mkLoad(TYPE_U32
, hi
,
1476 mkSymbol(file
, i
, TYPE_U32
, base
+ c
* tySize
+ 4),
1478 hii
->setIndirect(0, 1, indirect1
);
1479 hii
->perPatch
= patch
;
1481 return mkOp2(OP_MERGE
, ty
, def
, lo
, hi
);
1484 mkLoad(ty
, def
, mkSymbol(file
, i
, ty
, base
+ c
* tySize
), indirect0
);
1485 ld
->setIndirect(0, 1, indirect1
);
1486 ld
->perPatch
= patch
;
1492 Converter::storeTo(nir_intrinsic_instr
*insn
, DataFile file
, operation op
,
1493 DataType ty
, Value
*src
, uint8_t idx
, uint8_t c
,
1494 Value
*indirect0
, Value
*indirect1
)
1496 uint8_t size
= typeSizeof(ty
);
1497 uint32_t address
= getSlotAddress(insn
, idx
, c
);
1499 if (size
== 8 && indirect0
) {
1501 mkSplit(split
, 4, src
);
1503 if (op
== OP_EXPORT
) {
1504 split
[0] = mkMov(getSSA(), split
[0], ty
)->getDef(0);
1505 split
[1] = mkMov(getSSA(), split
[1], ty
)->getDef(0);
1508 mkStore(op
, TYPE_U32
, mkSymbol(file
, 0, TYPE_U32
, address
), indirect0
,
1509 split
[0])->perPatch
= info
->out
[idx
].patch
;
1510 mkStore(op
, TYPE_U32
, mkSymbol(file
, 0, TYPE_U32
, address
+ 4), indirect0
,
1511 split
[1])->perPatch
= info
->out
[idx
].patch
;
1513 if (op
== OP_EXPORT
)
1514 src
= mkMov(getSSA(size
), src
, ty
)->getDef(0);
1515 mkStore(op
, ty
, mkSymbol(file
, 0, ty
, address
), indirect0
,
1516 src
)->perPatch
= info
->out
[idx
].patch
;
1521 Converter::parseNIR()
1523 info
->bin
.tlsSpace
= 0;
1524 info
->io
.clipDistances
= nir
->info
.clip_distance_array_size
;
1525 info
->io
.cullDistances
= nir
->info
.cull_distance_array_size
;
1527 switch(prog
->getType()) {
1528 case Program::TYPE_COMPUTE
:
1529 info
->prop
.cp
.numThreads
[0] = nir
->info
.cs
.local_size
[0];
1530 info
->prop
.cp
.numThreads
[1] = nir
->info
.cs
.local_size
[1];
1531 info
->prop
.cp
.numThreads
[2] = nir
->info
.cs
.local_size
[2];
1532 info
->bin
.smemSize
= nir
->info
.cs
.shared_size
;
1534 case Program::TYPE_FRAGMENT
:
1535 info
->prop
.fp
.earlyFragTests
= nir
->info
.fs
.early_fragment_tests
;
1536 info
->prop
.fp
.persampleInvocation
=
1537 (nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_ID
) ||
1538 (nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_POS
);
1539 info
->prop
.fp
.postDepthCoverage
= nir
->info
.fs
.post_depth_coverage
;
1540 info
->prop
.fp
.readsSampleLocations
=
1541 (nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_POS
);
1542 info
->prop
.fp
.usesDiscard
= nir
->info
.fs
.uses_discard
;
1543 info
->prop
.fp
.usesSampleMaskIn
=
1544 !!(nir
->info
.system_values_read
& SYSTEM_BIT_SAMPLE_MASK_IN
);
1546 case Program::TYPE_GEOMETRY
:
1547 info
->prop
.gp
.inputPrim
= nir
->info
.gs
.input_primitive
;
1548 info
->prop
.gp
.instanceCount
= nir
->info
.gs
.invocations
;
1549 info
->prop
.gp
.maxVertices
= nir
->info
.gs
.vertices_out
;
1550 info
->prop
.gp
.outputPrim
= nir
->info
.gs
.output_primitive
;
1552 case Program::TYPE_TESSELLATION_CONTROL
:
1553 case Program::TYPE_TESSELLATION_EVAL
:
1554 if (nir
->info
.tess
.primitive_mode
== GL_ISOLINES
)
1555 info
->prop
.tp
.domain
= GL_LINES
;
1557 info
->prop
.tp
.domain
= nir
->info
.tess
.primitive_mode
;
1558 info
->prop
.tp
.outputPatchSize
= nir
->info
.tess
.tcs_vertices_out
;
1559 info
->prop
.tp
.outputPrim
=
1560 nir
->info
.tess
.point_mode
? PIPE_PRIM_POINTS
: PIPE_PRIM_TRIANGLES
;
1561 info
->prop
.tp
.partitioning
= (nir
->info
.tess
.spacing
+ 1) % 3;
1562 info
->prop
.tp
.winding
= !nir
->info
.tess
.ccw
;
1564 case Program::TYPE_VERTEX
:
1565 info
->prop
.vp
.usesDrawParameters
=
1566 (nir
->info
.system_values_read
& BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX
)) ||
1567 (nir
->info
.system_values_read
& BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE
)) ||
1568 (nir
->info
.system_values_read
& BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID
));
1578 Converter::visit(nir_function
*function
)
1580 assert(function
->impl
);
1582 // usually the blocks will set everything up, but main is special
1583 BasicBlock
*entry
= new BasicBlock(prog
->main
);
1584 exit
= new BasicBlock(prog
->main
);
1585 blocks
[nir_start_block(function
->impl
)->index
] = entry
;
1586 prog
->main
->setEntry(entry
);
1587 prog
->main
->setExit(exit
);
1589 setPosition(entry
, true);
1591 if (info
->io
.genUserClip
> 0) {
1592 for (int c
= 0; c
< 4; ++c
)
1593 clipVtx
[c
] = getScratch();
1596 switch (prog
->getType()) {
1597 case Program::TYPE_TESSELLATION_CONTROL
:
1599 OP_SUB
, TYPE_U32
, getSSA(),
1600 mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_LANEID
, 0)),
1601 mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_INVOCATION_ID
, 0)));
1603 case Program::TYPE_FRAGMENT
: {
1604 Symbol
*sv
= mkSysVal(SV_POSITION
, 3);
1605 fragCoord
[3] = mkOp1v(OP_RDSV
, TYPE_F32
, getSSA(), sv
);
1606 fp
.position
= mkOp1v(OP_RCP
, TYPE_F32
, fragCoord
[3], fragCoord
[3]);
1613 nir_foreach_register(reg
, &function
->impl
->registers
) {
1614 if (reg
->num_array_elems
) {
1615 // TODO: packed variables would be nice, but MemoryOpt fails
1616 // replace 4 with reg->num_components
1617 uint32_t size
= 4 * reg
->num_array_elems
* (reg
->bit_size
/ 8);
1618 regToLmemOffset
[reg
->index
] = info
->bin
.tlsSpace
;
1619 info
->bin
.tlsSpace
+= size
;
1623 nir_index_ssa_defs(function
->impl
);
1624 foreach_list_typed(nir_cf_node
, node
, node
, &function
->impl
->body
) {
1629 bb
->cfg
.attach(&exit
->cfg
, Graph::Edge::TREE
);
1630 setPosition(exit
, true);
1632 if ((prog
->getType() == Program::TYPE_VERTEX
||
1633 prog
->getType() == Program::TYPE_TESSELLATION_EVAL
)
1634 && info
->io
.genUserClip
> 0)
1635 handleUserClipPlanes();
1637 // TODO: for non main function this needs to be a OP_RETURN
1638 mkOp(OP_EXIT
, TYPE_NONE
, NULL
)->terminator
= 1;
1643 Converter::visit(nir_cf_node
*node
)
1645 switch (node
->type
) {
1646 case nir_cf_node_block
:
1647 return visit(nir_cf_node_as_block(node
));
1648 case nir_cf_node_if
:
1649 return visit(nir_cf_node_as_if(node
));
1650 case nir_cf_node_loop
:
1651 return visit(nir_cf_node_as_loop(node
));
1653 ERROR("unknown nir_cf_node type %u\n", node
->type
);
1659 Converter::visit(nir_block
*block
)
1661 if (!block
->predecessors
->entries
&& block
->instr_list
.is_empty())
1664 BasicBlock
*bb
= convert(block
);
1666 setPosition(bb
, true);
1667 nir_foreach_instr(insn
, block
) {
1675 Converter::visit(nir_if
*nif
)
1677 DataType sType
= getSType(nif
->condition
, false, false);
1678 Value
*src
= getSrc(&nif
->condition
, 0);
1680 nir_block
*lastThen
= nir_if_last_then_block(nif
);
1681 nir_block
*lastElse
= nir_if_last_else_block(nif
);
1683 assert(!lastThen
->successors
[1]);
1684 assert(!lastElse
->successors
[1]);
1686 BasicBlock
*ifBB
= convert(nir_if_first_then_block(nif
));
1687 BasicBlock
*elseBB
= convert(nir_if_first_else_block(nif
));
1689 bb
->cfg
.attach(&ifBB
->cfg
, Graph::Edge::TREE
);
1690 bb
->cfg
.attach(&elseBB
->cfg
, Graph::Edge::TREE
);
1692 // we only insert joinats, if both nodes end up at the end of the if again.
1693 // the reason for this to not happens are breaks/continues/ret/... which
1694 // have their own handling
1695 if (lastThen
->successors
[0] == lastElse
->successors
[0])
1696 bb
->joinAt
= mkFlow(OP_JOINAT
, convert(lastThen
->successors
[0]),
1699 mkFlow(OP_BRA
, elseBB
, CC_EQ
, src
)->setType(sType
);
1701 foreach_list_typed(nir_cf_node
, node
, node
, &nif
->then_list
) {
1705 setPosition(convert(lastThen
), true);
1706 if (!bb
->getExit() ||
1707 !bb
->getExit()->asFlow() ||
1708 bb
->getExit()->asFlow()->op
== OP_JOIN
) {
1709 BasicBlock
*tailBB
= convert(lastThen
->successors
[0]);
1710 mkFlow(OP_BRA
, tailBB
, CC_ALWAYS
, NULL
);
1711 bb
->cfg
.attach(&tailBB
->cfg
, Graph::Edge::FORWARD
);
1714 foreach_list_typed(nir_cf_node
, node
, node
, &nif
->else_list
) {
1718 setPosition(convert(lastElse
), true);
1719 if (!bb
->getExit() ||
1720 !bb
->getExit()->asFlow() ||
1721 bb
->getExit()->asFlow()->op
== OP_JOIN
) {
1722 BasicBlock
*tailBB
= convert(lastElse
->successors
[0]);
1723 mkFlow(OP_BRA
, tailBB
, CC_ALWAYS
, NULL
);
1724 bb
->cfg
.attach(&tailBB
->cfg
, Graph::Edge::FORWARD
);
1727 if (lastThen
->successors
[0] == lastElse
->successors
[0]) {
1728 setPosition(convert(lastThen
->successors
[0]), true);
1729 mkFlow(OP_JOIN
, NULL
, CC_ALWAYS
, NULL
)->fixed
= 1;
1736 Converter::visit(nir_loop
*loop
)
1739 func
->loopNestingBound
= std::max(func
->loopNestingBound
, curLoopDepth
);
1741 BasicBlock
*loopBB
= convert(nir_loop_first_block(loop
));
1742 BasicBlock
*tailBB
=
1743 convert(nir_cf_node_as_block(nir_cf_node_next(&loop
->cf_node
)));
1744 bb
->cfg
.attach(&loopBB
->cfg
, Graph::Edge::TREE
);
1746 mkFlow(OP_PREBREAK
, tailBB
, CC_ALWAYS
, NULL
);
1747 setPosition(loopBB
, false);
1748 mkFlow(OP_PRECONT
, loopBB
, CC_ALWAYS
, NULL
);
1750 foreach_list_typed(nir_cf_node
, node
, node
, &loop
->body
) {
1754 Instruction
*insn
= bb
->getExit();
1755 if (bb
->cfg
.incidentCount() != 0) {
1756 if (!insn
|| !insn
->asFlow()) {
1757 mkFlow(OP_CONT
, loopBB
, CC_ALWAYS
, NULL
);
1758 bb
->cfg
.attach(&loopBB
->cfg
, Graph::Edge::BACK
);
1759 } else if (insn
&& insn
->op
== OP_BRA
&& !insn
->getPredicate() &&
1760 tailBB
->cfg
.incidentCount() == 0) {
1761 // RA doesn't like having blocks around with no incident edge,
1762 // so we create a fake one to make it happy
1763 bb
->cfg
.attach(&tailBB
->cfg
, Graph::Edge::TREE
);
1773 Converter::visit(nir_instr
*insn
)
1775 // we need an insertion point for on the fly generated immediate loads
1776 immInsertPos
= bb
->getExit();
1777 switch (insn
->type
) {
1778 case nir_instr_type_alu
:
1779 return visit(nir_instr_as_alu(insn
));
1780 case nir_instr_type_deref
:
1781 return visit(nir_instr_as_deref(insn
));
1782 case nir_instr_type_intrinsic
:
1783 return visit(nir_instr_as_intrinsic(insn
));
1784 case nir_instr_type_jump
:
1785 return visit(nir_instr_as_jump(insn
));
1786 case nir_instr_type_load_const
:
1787 return visit(nir_instr_as_load_const(insn
));
1788 case nir_instr_type_ssa_undef
:
1789 return visit(nir_instr_as_ssa_undef(insn
));
1790 case nir_instr_type_tex
:
1791 return visit(nir_instr_as_tex(insn
));
1793 ERROR("unknown nir_instr type %u\n", insn
->type
);
1800 Converter::convert(nir_intrinsic_op intr
)
1803 case nir_intrinsic_load_base_vertex
:
1804 return SV_BASEVERTEX
;
1805 case nir_intrinsic_load_base_instance
:
1806 return SV_BASEINSTANCE
;
1807 case nir_intrinsic_load_draw_id
:
1809 case nir_intrinsic_load_front_face
:
1811 case nir_intrinsic_load_helper_invocation
:
1812 return SV_THREAD_KILL
;
1813 case nir_intrinsic_load_instance_id
:
1814 return SV_INSTANCE_ID
;
1815 case nir_intrinsic_load_invocation_id
:
1816 return SV_INVOCATION_ID
;
1817 case nir_intrinsic_load_local_group_size
:
1819 case nir_intrinsic_load_local_invocation_id
:
1821 case nir_intrinsic_load_num_work_groups
:
1823 case nir_intrinsic_load_patch_vertices_in
:
1824 return SV_VERTEX_COUNT
;
1825 case nir_intrinsic_load_primitive_id
:
1826 return SV_PRIMITIVE_ID
;
1827 case nir_intrinsic_load_sample_id
:
1828 return SV_SAMPLE_INDEX
;
1829 case nir_intrinsic_load_sample_mask_in
:
1830 return SV_SAMPLE_MASK
;
1831 case nir_intrinsic_load_sample_pos
:
1832 return SV_SAMPLE_POS
;
1833 case nir_intrinsic_load_subgroup_eq_mask
:
1834 return SV_LANEMASK_EQ
;
1835 case nir_intrinsic_load_subgroup_ge_mask
:
1836 return SV_LANEMASK_GE
;
1837 case nir_intrinsic_load_subgroup_gt_mask
:
1838 return SV_LANEMASK_GT
;
1839 case nir_intrinsic_load_subgroup_le_mask
:
1840 return SV_LANEMASK_LE
;
1841 case nir_intrinsic_load_subgroup_lt_mask
:
1842 return SV_LANEMASK_LT
;
1843 case nir_intrinsic_load_subgroup_invocation
:
1845 case nir_intrinsic_load_tess_coord
:
1846 return SV_TESS_COORD
;
1847 case nir_intrinsic_load_tess_level_inner
:
1848 return SV_TESS_INNER
;
1849 case nir_intrinsic_load_tess_level_outer
:
1850 return SV_TESS_OUTER
;
1851 case nir_intrinsic_load_vertex_id
:
1852 return SV_VERTEX_ID
;
1853 case nir_intrinsic_load_work_group_id
:
1856 ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1857 nir_intrinsic_infos
[intr
].name
);
1864 Converter::convertGLImgFormat(GLuint format
)
1866 #define FMT_CASE(a, b) \
1867 case GL_ ## a: return nv50_ir::FMT_ ## b
1870 FMT_CASE(NONE
, NONE
);
1872 FMT_CASE(RGBA32F
, RGBA32F
);
1873 FMT_CASE(RGBA16F
, RGBA16F
);
1874 FMT_CASE(RG32F
, RG32F
);
1875 FMT_CASE(RG16F
, RG16F
);
1876 FMT_CASE(R11F_G11F_B10F
, R11G11B10F
);
1877 FMT_CASE(R32F
, R32F
);
1878 FMT_CASE(R16F
, R16F
);
1880 FMT_CASE(RGBA32UI
, RGBA32UI
);
1881 FMT_CASE(RGBA16UI
, RGBA16UI
);
1882 FMT_CASE(RGB10_A2UI
, RGB10A2UI
);
1883 FMT_CASE(RGBA8UI
, RGBA8UI
);
1884 FMT_CASE(RG32UI
, RG32UI
);
1885 FMT_CASE(RG16UI
, RG16UI
);
1886 FMT_CASE(RG8UI
, RG8UI
);
1887 FMT_CASE(R32UI
, R32UI
);
1888 FMT_CASE(R16UI
, R16UI
);
1889 FMT_CASE(R8UI
, R8UI
);
1891 FMT_CASE(RGBA32I
, RGBA32I
);
1892 FMT_CASE(RGBA16I
, RGBA16I
);
1893 FMT_CASE(RGBA8I
, RGBA8I
);
1894 FMT_CASE(RG32I
, RG32I
);
1895 FMT_CASE(RG16I
, RG16I
);
1896 FMT_CASE(RG8I
, RG8I
);
1897 FMT_CASE(R32I
, R32I
);
1898 FMT_CASE(R16I
, R16I
);
1901 FMT_CASE(RGBA16
, RGBA16
);
1902 FMT_CASE(RGB10_A2
, RGB10A2
);
1903 FMT_CASE(RGBA8
, RGBA8
);
1904 FMT_CASE(RG16
, RG16
);
1909 FMT_CASE(RGBA16_SNORM
, RGBA16_SNORM
);
1910 FMT_CASE(RGBA8_SNORM
, RGBA8_SNORM
);
1911 FMT_CASE(RG16_SNORM
, RG16_SNORM
);
1912 FMT_CASE(RG8_SNORM
, RG8_SNORM
);
1913 FMT_CASE(R16_SNORM
, R16_SNORM
);
1914 FMT_CASE(R8_SNORM
, R8_SNORM
);
1916 FMT_CASE(BGRA_INTEGER
, BGRA8
);
1918 ERROR("unknown format %x\n", format
);
1920 return nv50_ir::FMT_NONE
;
1926 Converter::visit(nir_intrinsic_instr
*insn
)
1928 nir_intrinsic_op op
= insn
->intrinsic
;
1929 const nir_intrinsic_info
&opInfo
= nir_intrinsic_infos
[op
];
1932 case nir_intrinsic_load_uniform
: {
1933 LValues
&newDefs
= convert(&insn
->dest
);
1934 const DataType dType
= getDType(insn
);
1936 uint32_t coffset
= getIndirect(insn
, 0, 0, indirect
);
1937 for (uint8_t i
= 0; i
< insn
->num_components
; ++i
) {
1938 loadFrom(FILE_MEMORY_CONST
, 0, dType
, newDefs
[i
], 16 * coffset
, i
, indirect
);
1942 case nir_intrinsic_store_output
:
1943 case nir_intrinsic_store_per_vertex_output
: {
1945 DataType dType
= getSType(insn
->src
[0], false, false);
1946 uint32_t idx
= getIndirect(insn
, op
== nir_intrinsic_store_output
? 1 : 2, 0, indirect
);
1948 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
1949 if (!((1u << i
) & nir_intrinsic_write_mask(insn
)))
1953 Value
*src
= getSrc(&insn
->src
[0], i
);
1954 switch (prog
->getType()) {
1955 case Program::TYPE_FRAGMENT
: {
1956 if (info
->out
[idx
].sn
== TGSI_SEMANTIC_POSITION
) {
1957 // TGSI uses a different interface than NIR, TGSI stores that
1958 // value in the z component, NIR in X
1960 src
= mkOp1v(OP_SAT
, TYPE_F32
, getScratch(), src
);
1964 case Program::TYPE_GEOMETRY
:
1965 case Program::TYPE_VERTEX
: {
1966 if (info
->io
.genUserClip
> 0 && idx
== (uint32_t)clipVertexOutput
) {
1967 mkMov(clipVtx
[i
], src
);
1976 storeTo(insn
, FILE_SHADER_OUTPUT
, OP_EXPORT
, dType
, src
, idx
, i
+ offset
, indirect
);
1980 case nir_intrinsic_load_input
:
1981 case nir_intrinsic_load_interpolated_input
:
1982 case nir_intrinsic_load_output
: {
1983 LValues
&newDefs
= convert(&insn
->dest
);
1986 if (prog
->getType() == Program::TYPE_FRAGMENT
&&
1987 op
== nir_intrinsic_load_output
) {
1988 std::vector
<Value
*> defs
, srcs
;
1991 srcs
.push_back(getSSA());
1992 srcs
.push_back(getSSA());
1993 Value
*x
= mkOp1v(OP_RDSV
, TYPE_F32
, getSSA(), mkSysVal(SV_POSITION
, 0));
1994 Value
*y
= mkOp1v(OP_RDSV
, TYPE_F32
, getSSA(), mkSysVal(SV_POSITION
, 1));
1995 mkCvt(OP_CVT
, TYPE_U32
, srcs
[0], TYPE_F32
, x
)->rnd
= ROUND_Z
;
1996 mkCvt(OP_CVT
, TYPE_U32
, srcs
[1], TYPE_F32
, y
)->rnd
= ROUND_Z
;
1998 srcs
.push_back(mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_LAYER
, 0)));
1999 srcs
.push_back(mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_SAMPLE_INDEX
, 0)));
2001 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
2002 defs
.push_back(newDefs
[i
]);
2006 TexInstruction
*texi
= mkTex(OP_TXF
, TEX_TARGET_2D_MS_ARRAY
, 0, 0, defs
, srcs
);
2007 texi
->tex
.levelZero
= 1;
2008 texi
->tex
.mask
= mask
;
2009 texi
->tex
.useOffsets
= 0;
2010 texi
->tex
.r
= 0xffff;
2011 texi
->tex
.s
= 0xffff;
2013 info
->prop
.fp
.readsFramebuffer
= true;
2017 const DataType dType
= getDType(insn
);
2019 bool input
= op
!= nir_intrinsic_load_output
;
2023 uint32_t idx
= getIndirect(insn
, op
== nir_intrinsic_load_interpolated_input
? 1 : 0, 0, indirect
);
2024 nv50_ir_varying
& vary
= input
? info
->in
[idx
] : info
->out
[idx
];
2026 // see load_barycentric_* handling
2027 if (prog
->getType() == Program::TYPE_FRAGMENT
) {
2028 mode
= translateInterpMode(&vary
, nvirOp
);
2029 if (op
== nir_intrinsic_load_interpolated_input
) {
2030 ImmediateValue immMode
;
2031 if (getSrc(&insn
->src
[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode
))
2032 mode
|= immMode
.reg
.data
.u32
;
2036 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
2037 uint32_t address
= getSlotAddress(insn
, idx
, i
);
2038 Symbol
*sym
= mkSymbol(input
? FILE_SHADER_INPUT
: FILE_SHADER_OUTPUT
, 0, dType
, address
);
2039 if (prog
->getType() == Program::TYPE_FRAGMENT
) {
2041 if (typeSizeof(dType
) == 8) {
2042 Value
*lo
= getSSA();
2043 Value
*hi
= getSSA();
2044 Instruction
*interp
;
2046 interp
= mkOp1(nvirOp
, TYPE_U32
, lo
, sym
);
2047 if (nvirOp
== OP_PINTERP
)
2048 interp
->setSrc(s
++, fp
.position
);
2049 if (mode
& NV50_IR_INTERP_OFFSET
)
2050 interp
->setSrc(s
++, getSrc(&insn
->src
[0], 0));
2051 interp
->setInterpolate(mode
);
2052 interp
->setIndirect(0, 0, indirect
);
2054 Symbol
*sym1
= mkSymbol(input
? FILE_SHADER_INPUT
: FILE_SHADER_OUTPUT
, 0, dType
, address
+ 4);
2055 interp
= mkOp1(nvirOp
, TYPE_U32
, hi
, sym1
);
2056 if (nvirOp
== OP_PINTERP
)
2057 interp
->setSrc(s
++, fp
.position
);
2058 if (mode
& NV50_IR_INTERP_OFFSET
)
2059 interp
->setSrc(s
++, getSrc(&insn
->src
[0], 0));
2060 interp
->setInterpolate(mode
);
2061 interp
->setIndirect(0, 0, indirect
);
2063 mkOp2(OP_MERGE
, dType
, newDefs
[i
], lo
, hi
);
2065 Instruction
*interp
= mkOp1(nvirOp
, dType
, newDefs
[i
], sym
);
2066 if (nvirOp
== OP_PINTERP
)
2067 interp
->setSrc(s
++, fp
.position
);
2068 if (mode
& NV50_IR_INTERP_OFFSET
)
2069 interp
->setSrc(s
++, getSrc(&insn
->src
[0], 0));
2070 interp
->setInterpolate(mode
);
2071 interp
->setIndirect(0, 0, indirect
);
2074 mkLoad(dType
, newDefs
[i
], sym
, indirect
)->perPatch
= vary
.patch
;
2079 case nir_intrinsic_load_kernel_input
: {
2080 assert(prog
->getType() == Program::TYPE_COMPUTE
);
2081 assert(insn
->num_components
== 1);
2083 LValues
&newDefs
= convert(&insn
->dest
);
2084 const DataType dType
= getDType(insn
);
2086 uint32_t idx
= getIndirect(insn
, 0, 0, indirect
, true);
2088 mkLoad(dType
, newDefs
[0], mkSymbol(FILE_SHADER_INPUT
, 0, dType
, idx
), indirect
);
2091 case nir_intrinsic_load_barycentric_at_offset
:
2092 case nir_intrinsic_load_barycentric_at_sample
:
2093 case nir_intrinsic_load_barycentric_centroid
:
2094 case nir_intrinsic_load_barycentric_pixel
:
2095 case nir_intrinsic_load_barycentric_sample
: {
2096 LValues
&newDefs
= convert(&insn
->dest
);
2099 if (op
== nir_intrinsic_load_barycentric_centroid
||
2100 op
== nir_intrinsic_load_barycentric_sample
) {
2101 mode
= NV50_IR_INTERP_CENTROID
;
2102 } else if (op
== nir_intrinsic_load_barycentric_at_offset
) {
2104 for (uint8_t c
= 0; c
< 2; c
++) {
2105 offs
[c
] = getScratch();
2106 mkOp2(OP_MIN
, TYPE_F32
, offs
[c
], getSrc(&insn
->src
[0], c
), loadImm(NULL
, 0.4375f
));
2107 mkOp2(OP_MAX
, TYPE_F32
, offs
[c
], offs
[c
], loadImm(NULL
, -0.5f
));
2108 mkOp2(OP_MUL
, TYPE_F32
, offs
[c
], offs
[c
], loadImm(NULL
, 4096.0f
));
2109 mkCvt(OP_CVT
, TYPE_S32
, offs
[c
], TYPE_F32
, offs
[c
]);
2111 mkOp3v(OP_INSBF
, TYPE_U32
, newDefs
[0], offs
[1], mkImm(0x1010), offs
[0]);
2113 mode
= NV50_IR_INTERP_OFFSET
;
2114 } else if (op
== nir_intrinsic_load_barycentric_pixel
) {
2115 mode
= NV50_IR_INTERP_DEFAULT
;
2116 } else if (op
== nir_intrinsic_load_barycentric_at_sample
) {
2117 info
->prop
.fp
.readsSampleLocations
= true;
2118 mkOp1(OP_PIXLD
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0], 0))->subOp
= NV50_IR_SUBOP_PIXLD_OFFSET
;
2119 mode
= NV50_IR_INTERP_OFFSET
;
2121 unreachable("all intrinsics already handled above");
2124 loadImm(newDefs
[1], mode
);
2127 case nir_intrinsic_discard
:
2128 mkOp(OP_DISCARD
, TYPE_NONE
, NULL
);
2130 case nir_intrinsic_discard_if
: {
2131 Value
*pred
= getSSA(1, FILE_PREDICATE
);
2132 if (insn
->num_components
> 1) {
2133 ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
2137 mkCmp(OP_SET
, CC_NE
, TYPE_U8
, pred
, TYPE_U32
, getSrc(&insn
->src
[0], 0), zero
);
2138 mkOp(OP_DISCARD
, TYPE_NONE
, NULL
)->setPredicate(CC_P
, pred
);
2141 case nir_intrinsic_load_base_vertex
:
2142 case nir_intrinsic_load_base_instance
:
2143 case nir_intrinsic_load_draw_id
:
2144 case nir_intrinsic_load_front_face
:
2145 case nir_intrinsic_load_helper_invocation
:
2146 case nir_intrinsic_load_instance_id
:
2147 case nir_intrinsic_load_invocation_id
:
2148 case nir_intrinsic_load_local_group_size
:
2149 case nir_intrinsic_load_local_invocation_id
:
2150 case nir_intrinsic_load_num_work_groups
:
2151 case nir_intrinsic_load_patch_vertices_in
:
2152 case nir_intrinsic_load_primitive_id
:
2153 case nir_intrinsic_load_sample_id
:
2154 case nir_intrinsic_load_sample_mask_in
:
2155 case nir_intrinsic_load_sample_pos
:
2156 case nir_intrinsic_load_subgroup_eq_mask
:
2157 case nir_intrinsic_load_subgroup_ge_mask
:
2158 case nir_intrinsic_load_subgroup_gt_mask
:
2159 case nir_intrinsic_load_subgroup_le_mask
:
2160 case nir_intrinsic_load_subgroup_lt_mask
:
2161 case nir_intrinsic_load_subgroup_invocation
:
2162 case nir_intrinsic_load_tess_coord
:
2163 case nir_intrinsic_load_tess_level_inner
:
2164 case nir_intrinsic_load_tess_level_outer
:
2165 case nir_intrinsic_load_vertex_id
:
2166 case nir_intrinsic_load_work_group_id
: {
2167 const DataType dType
= getDType(insn
);
2168 SVSemantic sv
= convert(op
);
2169 LValues
&newDefs
= convert(&insn
->dest
);
2171 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
2173 if (typeSizeof(dType
) == 8)
2178 if (sv
== SV_TID
&& info
->prop
.cp
.numThreads
[i
] == 1) {
2181 Symbol
*sym
= mkSysVal(sv
, i
);
2182 Instruction
*rdsv
= mkOp1(OP_RDSV
, TYPE_U32
, def
, sym
);
2183 if (sv
== SV_TESS_OUTER
|| sv
== SV_TESS_INNER
)
2187 if (typeSizeof(dType
) == 8)
2188 mkOp2(OP_MERGE
, dType
, newDefs
[i
], def
, loadImm(getSSA(), 0u));
2193 case nir_intrinsic_load_subgroup_size
: {
2194 LValues
&newDefs
= convert(&insn
->dest
);
2195 loadImm(newDefs
[0], 32u);
2198 case nir_intrinsic_vote_all
:
2199 case nir_intrinsic_vote_any
:
2200 case nir_intrinsic_vote_ieq
: {
2201 LValues
&newDefs
= convert(&insn
->dest
);
2202 Value
*pred
= getScratch(1, FILE_PREDICATE
);
2203 mkCmp(OP_SET
, CC_NE
, TYPE_U32
, pred
, TYPE_U32
, getSrc(&insn
->src
[0], 0), zero
);
2204 mkOp1(OP_VOTE
, TYPE_U32
, pred
, pred
)->subOp
= getSubOp(op
);
2205 mkCvt(OP_CVT
, TYPE_U32
, newDefs
[0], TYPE_U8
, pred
);
2208 case nir_intrinsic_ballot
: {
2209 LValues
&newDefs
= convert(&insn
->dest
);
2210 Value
*pred
= getSSA(1, FILE_PREDICATE
);
2211 mkCmp(OP_SET
, CC_NE
, TYPE_U32
, pred
, TYPE_U32
, getSrc(&insn
->src
[0], 0), zero
);
2212 mkOp1(OP_VOTE
, TYPE_U32
, newDefs
[0], pred
)->subOp
= NV50_IR_SUBOP_VOTE_ANY
;
2215 case nir_intrinsic_read_first_invocation
:
2216 case nir_intrinsic_read_invocation
: {
2217 LValues
&newDefs
= convert(&insn
->dest
);
2218 const DataType dType
= getDType(insn
);
2219 Value
*tmp
= getScratch();
2221 if (op
== nir_intrinsic_read_first_invocation
) {
2222 mkOp1(OP_VOTE
, TYPE_U32
, tmp
, mkImm(1))->subOp
= NV50_IR_SUBOP_VOTE_ANY
;
2223 mkOp2(OP_EXTBF
, TYPE_U32
, tmp
, tmp
, mkImm(0x2000))->subOp
= NV50_IR_SUBOP_EXTBF_REV
;
2224 mkOp1(OP_BFIND
, TYPE_U32
, tmp
, tmp
)->subOp
= NV50_IR_SUBOP_BFIND_SAMT
;
2226 tmp
= getSrc(&insn
->src
[1], 0);
2228 for (uint8_t i
= 0; i
< insn
->num_components
; ++i
) {
2229 mkOp3(OP_SHFL
, dType
, newDefs
[i
], getSrc(&insn
->src
[0], i
), tmp
, mkImm(0x1f))
2230 ->subOp
= NV50_IR_SUBOP_SHFL_IDX
;
2234 case nir_intrinsic_load_per_vertex_input
: {
2235 const DataType dType
= getDType(insn
);
2236 LValues
&newDefs
= convert(&insn
->dest
);
2237 Value
*indirectVertex
;
2238 Value
*indirectOffset
;
2239 uint32_t baseVertex
= getIndirect(&insn
->src
[0], 0, indirectVertex
);
2240 uint32_t idx
= getIndirect(insn
, 1, 0, indirectOffset
);
2242 Value
*vtxBase
= mkOp2v(OP_PFETCH
, TYPE_U32
, getSSA(4, FILE_ADDRESS
),
2243 mkImm(baseVertex
), indirectVertex
);
2244 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
2245 uint32_t address
= getSlotAddress(insn
, idx
, i
);
2246 loadFrom(FILE_SHADER_INPUT
, 0, dType
, newDefs
[i
], address
, 0,
2247 indirectOffset
, vtxBase
, info
->in
[idx
].patch
);
2251 case nir_intrinsic_load_per_vertex_output
: {
2252 const DataType dType
= getDType(insn
);
2253 LValues
&newDefs
= convert(&insn
->dest
);
2254 Value
*indirectVertex
;
2255 Value
*indirectOffset
;
2256 uint32_t baseVertex
= getIndirect(&insn
->src
[0], 0, indirectVertex
);
2257 uint32_t idx
= getIndirect(insn
, 1, 0, indirectOffset
);
2258 Value
*vtxBase
= NULL
;
2261 vtxBase
= indirectVertex
;
2263 vtxBase
= loadImm(NULL
, baseVertex
);
2265 vtxBase
= mkOp2v(OP_ADD
, TYPE_U32
, getSSA(4, FILE_ADDRESS
), outBase
, vtxBase
);
2267 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
2268 uint32_t address
= getSlotAddress(insn
, idx
, i
);
2269 loadFrom(FILE_SHADER_OUTPUT
, 0, dType
, newDefs
[i
], address
, 0,
2270 indirectOffset
, vtxBase
, info
->in
[idx
].patch
);
2274 case nir_intrinsic_emit_vertex
:
2275 if (info
->io
.genUserClip
> 0)
2276 handleUserClipPlanes();
2278 case nir_intrinsic_end_primitive
: {
2279 uint32_t idx
= nir_intrinsic_stream_id(insn
);
2280 mkOp1(getOperation(op
), TYPE_U32
, NULL
, mkImm(idx
))->fixed
= 1;
2283 case nir_intrinsic_load_ubo
: {
2284 const DataType dType
= getDType(insn
);
2285 LValues
&newDefs
= convert(&insn
->dest
);
2286 Value
*indirectIndex
;
2287 Value
*indirectOffset
;
2288 uint32_t index
= getIndirect(&insn
->src
[0], 0, indirectIndex
) + 1;
2289 uint32_t offset
= getIndirect(&insn
->src
[1], 0, indirectOffset
);
2291 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
2292 loadFrom(FILE_MEMORY_CONST
, index
, dType
, newDefs
[i
], offset
, i
,
2293 indirectOffset
, indirectIndex
);
2297 case nir_intrinsic_get_buffer_size
: {
2298 LValues
&newDefs
= convert(&insn
->dest
);
2299 const DataType dType
= getDType(insn
);
2300 Value
*indirectBuffer
;
2301 uint32_t buffer
= getIndirect(&insn
->src
[0], 0, indirectBuffer
);
2303 Symbol
*sym
= mkSymbol(FILE_MEMORY_BUFFER
, buffer
, dType
, 0);
2304 mkOp1(OP_BUFQ
, dType
, newDefs
[0], sym
)->setIndirect(0, 0, indirectBuffer
);
2307 case nir_intrinsic_store_ssbo
: {
2308 DataType sType
= getSType(insn
->src
[0], false, false);
2309 Value
*indirectBuffer
;
2310 Value
*indirectOffset
;
2311 uint32_t buffer
= getIndirect(&insn
->src
[1], 0, indirectBuffer
);
2312 uint32_t offset
= getIndirect(&insn
->src
[2], 0, indirectOffset
);
2314 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
2315 if (!((1u << i
) & nir_intrinsic_write_mask(insn
)))
2317 Symbol
*sym
= mkSymbol(FILE_MEMORY_BUFFER
, buffer
, sType
,
2318 offset
+ i
* typeSizeof(sType
));
2319 mkStore(OP_STORE
, sType
, sym
, indirectOffset
, getSrc(&insn
->src
[0], i
))
2320 ->setIndirect(0, 1, indirectBuffer
);
2322 info
->io
.globalAccess
|= 0x2;
2325 case nir_intrinsic_load_ssbo
: {
2326 const DataType dType
= getDType(insn
);
2327 LValues
&newDefs
= convert(&insn
->dest
);
2328 Value
*indirectBuffer
;
2329 Value
*indirectOffset
;
2330 uint32_t buffer
= getIndirect(&insn
->src
[0], 0, indirectBuffer
);
2331 uint32_t offset
= getIndirect(&insn
->src
[1], 0, indirectOffset
);
2333 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
)
2334 loadFrom(FILE_MEMORY_BUFFER
, buffer
, dType
, newDefs
[i
], offset
, i
,
2335 indirectOffset
, indirectBuffer
);
2337 info
->io
.globalAccess
|= 0x1;
2340 case nir_intrinsic_shared_atomic_add
:
2341 case nir_intrinsic_shared_atomic_and
:
2342 case nir_intrinsic_shared_atomic_comp_swap
:
2343 case nir_intrinsic_shared_atomic_exchange
:
2344 case nir_intrinsic_shared_atomic_or
:
2345 case nir_intrinsic_shared_atomic_imax
:
2346 case nir_intrinsic_shared_atomic_imin
:
2347 case nir_intrinsic_shared_atomic_umax
:
2348 case nir_intrinsic_shared_atomic_umin
:
2349 case nir_intrinsic_shared_atomic_xor
: {
2350 const DataType dType
= getDType(insn
);
2351 LValues
&newDefs
= convert(&insn
->dest
);
2352 Value
*indirectOffset
;
2353 uint32_t offset
= getIndirect(&insn
->src
[0], 0, indirectOffset
);
2354 Symbol
*sym
= mkSymbol(FILE_MEMORY_SHARED
, 0, dType
, offset
);
2355 Instruction
*atom
= mkOp2(OP_ATOM
, dType
, newDefs
[0], sym
, getSrc(&insn
->src
[1], 0));
2356 if (op
== nir_intrinsic_shared_atomic_comp_swap
)
2357 atom
->setSrc(2, getSrc(&insn
->src
[2], 0));
2358 atom
->setIndirect(0, 0, indirectOffset
);
2359 atom
->subOp
= getSubOp(op
);
2362 case nir_intrinsic_ssbo_atomic_add
:
2363 case nir_intrinsic_ssbo_atomic_and
:
2364 case nir_intrinsic_ssbo_atomic_comp_swap
:
2365 case nir_intrinsic_ssbo_atomic_exchange
:
2366 case nir_intrinsic_ssbo_atomic_or
:
2367 case nir_intrinsic_ssbo_atomic_imax
:
2368 case nir_intrinsic_ssbo_atomic_imin
:
2369 case nir_intrinsic_ssbo_atomic_umax
:
2370 case nir_intrinsic_ssbo_atomic_umin
:
2371 case nir_intrinsic_ssbo_atomic_xor
: {
2372 const DataType dType
= getDType(insn
);
2373 LValues
&newDefs
= convert(&insn
->dest
);
2374 Value
*indirectBuffer
;
2375 Value
*indirectOffset
;
2376 uint32_t buffer
= getIndirect(&insn
->src
[0], 0, indirectBuffer
);
2377 uint32_t offset
= getIndirect(&insn
->src
[1], 0, indirectOffset
);
2379 Symbol
*sym
= mkSymbol(FILE_MEMORY_BUFFER
, buffer
, dType
, offset
);
2380 Instruction
*atom
= mkOp2(OP_ATOM
, dType
, newDefs
[0], sym
,
2381 getSrc(&insn
->src
[2], 0));
2382 if (op
== nir_intrinsic_ssbo_atomic_comp_swap
)
2383 atom
->setSrc(2, getSrc(&insn
->src
[3], 0));
2384 atom
->setIndirect(0, 0, indirectOffset
);
2385 atom
->setIndirect(0, 1, indirectBuffer
);
2386 atom
->subOp
= getSubOp(op
);
2388 info
->io
.globalAccess
|= 0x2;
2391 case nir_intrinsic_global_atomic_add
:
2392 case nir_intrinsic_global_atomic_and
:
2393 case nir_intrinsic_global_atomic_comp_swap
:
2394 case nir_intrinsic_global_atomic_exchange
:
2395 case nir_intrinsic_global_atomic_or
:
2396 case nir_intrinsic_global_atomic_imax
:
2397 case nir_intrinsic_global_atomic_imin
:
2398 case nir_intrinsic_global_atomic_umax
:
2399 case nir_intrinsic_global_atomic_umin
:
2400 case nir_intrinsic_global_atomic_xor
: {
2401 const DataType dType
= getDType(insn
);
2402 LValues
&newDefs
= convert(&insn
->dest
);
2404 uint32_t offset
= getIndirect(&insn
->src
[0], 0, address
);
2406 Symbol
*sym
= mkSymbol(FILE_MEMORY_GLOBAL
, 0, dType
, offset
);
2408 mkOp2(OP_ATOM
, dType
, newDefs
[0], sym
, getSrc(&insn
->src
[1], 0));
2409 atom
->setIndirect(0, 0, address
);
2410 atom
->subOp
= getSubOp(op
);
2412 info
->io
.globalAccess
|= 0x2;
2415 case nir_intrinsic_bindless_image_atomic_add
:
2416 case nir_intrinsic_bindless_image_atomic_and
:
2417 case nir_intrinsic_bindless_image_atomic_comp_swap
:
2418 case nir_intrinsic_bindless_image_atomic_exchange
:
2419 case nir_intrinsic_bindless_image_atomic_imax
:
2420 case nir_intrinsic_bindless_image_atomic_umax
:
2421 case nir_intrinsic_bindless_image_atomic_imin
:
2422 case nir_intrinsic_bindless_image_atomic_umin
:
2423 case nir_intrinsic_bindless_image_atomic_or
:
2424 case nir_intrinsic_bindless_image_atomic_xor
:
2425 case nir_intrinsic_bindless_image_load
:
2426 case nir_intrinsic_bindless_image_samples
:
2427 case nir_intrinsic_bindless_image_size
:
2428 case nir_intrinsic_bindless_image_store
: {
2429 std::vector
<Value
*> srcs
, defs
;
2430 Value
*indirect
= getSrc(&insn
->src
[0], 0);
2434 TexInstruction::Target target
=
2435 convert(nir_intrinsic_image_dim(insn
), !!nir_intrinsic_image_array(insn
), false);
2436 unsigned int argCount
= getNIRArgCount(target
);
2437 uint16_t location
= 0;
2439 if (opInfo
.has_dest
) {
2440 LValues
&newDefs
= convert(&insn
->dest
);
2441 for (uint8_t i
= 0u; i
< newDefs
.size(); ++i
) {
2442 defs
.push_back(newDefs
[i
]);
2448 case nir_intrinsic_bindless_image_atomic_add
:
2449 case nir_intrinsic_bindless_image_atomic_and
:
2450 case nir_intrinsic_bindless_image_atomic_comp_swap
:
2451 case nir_intrinsic_bindless_image_atomic_exchange
:
2452 case nir_intrinsic_bindless_image_atomic_imax
:
2453 case nir_intrinsic_bindless_image_atomic_umax
:
2454 case nir_intrinsic_bindless_image_atomic_imin
:
2455 case nir_intrinsic_bindless_image_atomic_umin
:
2456 case nir_intrinsic_bindless_image_atomic_or
:
2457 case nir_intrinsic_bindless_image_atomic_xor
:
2458 ty
= getDType(insn
);
2460 info
->io
.globalAccess
|= 0x2;
2462 case nir_intrinsic_bindless_image_load
:
2464 info
->io
.globalAccess
|= 0x1;
2466 case nir_intrinsic_bindless_image_store
:
2469 info
->io
.globalAccess
|= 0x2;
2471 case nir_intrinsic_bindless_image_samples
:
2475 case nir_intrinsic_bindless_image_size
:
2479 unreachable("unhandled image opcode");
2484 if (opInfo
.num_srcs
>= 2)
2485 for (unsigned int i
= 0u; i
< argCount
; ++i
)
2486 srcs
.push_back(getSrc(&insn
->src
[1], i
));
2488 // the sampler is just another src added after coords
2489 if (opInfo
.num_srcs
>= 3 && target
.isMS())
2490 srcs
.push_back(getSrc(&insn
->src
[2], 0));
2492 if (opInfo
.num_srcs
>= 4) {
2493 unsigned components
= opInfo
.src_components
[3] ? opInfo
.src_components
[3] : insn
->num_components
;
2494 for (uint8_t i
= 0u; i
< components
; ++i
)
2495 srcs
.push_back(getSrc(&insn
->src
[3], i
));
2498 if (opInfo
.num_srcs
>= 5)
2499 // 1 for aotmic swap
2500 for (uint8_t i
= 0u; i
< opInfo
.src_components
[4]; ++i
)
2501 srcs
.push_back(getSrc(&insn
->src
[4], i
));
2503 TexInstruction
*texi
= mkTex(getOperation(op
), target
.getEnum(), location
, 0, defs
, srcs
);
2504 texi
->tex
.bindless
= false;
2505 texi
->tex
.format
= &nv50_ir::TexInstruction::formatTable
[convertGLImgFormat(nir_intrinsic_format(insn
))];
2506 texi
->tex
.mask
= mask
;
2507 texi
->tex
.bindless
= true;
2508 texi
->cache
= convert(nir_intrinsic_access(insn
));
2510 texi
->subOp
= getSubOp(op
);
2513 texi
->setIndirectR(indirect
);
2517 case nir_intrinsic_image_deref_atomic_add
:
2518 case nir_intrinsic_image_deref_atomic_and
:
2519 case nir_intrinsic_image_deref_atomic_comp_swap
:
2520 case nir_intrinsic_image_deref_atomic_exchange
:
2521 case nir_intrinsic_image_deref_atomic_imax
:
2522 case nir_intrinsic_image_deref_atomic_umax
:
2523 case nir_intrinsic_image_deref_atomic_imin
:
2524 case nir_intrinsic_image_deref_atomic_umin
:
2525 case nir_intrinsic_image_deref_atomic_or
:
2526 case nir_intrinsic_image_deref_atomic_xor
:
2527 case nir_intrinsic_image_deref_load
:
2528 case nir_intrinsic_image_deref_samples
:
2529 case nir_intrinsic_image_deref_size
:
2530 case nir_intrinsic_image_deref_store
: {
2531 const nir_variable
*tex
;
2532 std::vector
<Value
*> srcs
, defs
;
2537 nir_deref_instr
*deref
= nir_src_as_deref(insn
->src
[0]);
2538 const glsl_type
*type
= deref
->type
;
2539 TexInstruction::Target target
=
2540 convert((glsl_sampler_dim
)type
->sampler_dimensionality
,
2541 type
->sampler_array
, type
->sampler_shadow
);
2542 unsigned int argCount
= getNIRArgCount(target
);
2543 uint16_t location
= handleDeref(deref
, indirect
, tex
);
2545 if (opInfo
.has_dest
) {
2546 LValues
&newDefs
= convert(&insn
->dest
);
2547 for (uint8_t i
= 0u; i
< newDefs
.size(); ++i
) {
2548 defs
.push_back(newDefs
[i
]);
2554 case nir_intrinsic_image_deref_atomic_add
:
2555 case nir_intrinsic_image_deref_atomic_and
:
2556 case nir_intrinsic_image_deref_atomic_comp_swap
:
2557 case nir_intrinsic_image_deref_atomic_exchange
:
2558 case nir_intrinsic_image_deref_atomic_imax
:
2559 case nir_intrinsic_image_deref_atomic_umax
:
2560 case nir_intrinsic_image_deref_atomic_imin
:
2561 case nir_intrinsic_image_deref_atomic_umin
:
2562 case nir_intrinsic_image_deref_atomic_or
:
2563 case nir_intrinsic_image_deref_atomic_xor
:
2564 ty
= getDType(insn
);
2566 info
->io
.globalAccess
|= 0x2;
2568 case nir_intrinsic_image_deref_load
:
2570 info
->io
.globalAccess
|= 0x1;
2572 case nir_intrinsic_image_deref_store
:
2575 info
->io
.globalAccess
|= 0x2;
2577 case nir_intrinsic_image_deref_samples
:
2581 case nir_intrinsic_image_deref_size
:
2585 unreachable("unhandled image opcode");
2590 if (opInfo
.num_srcs
>= 2)
2591 for (unsigned int i
= 0u; i
< argCount
; ++i
)
2592 srcs
.push_back(getSrc(&insn
->src
[1], i
));
2594 // the sampler is just another src added after coords
2595 if (opInfo
.num_srcs
>= 3 && target
.isMS())
2596 srcs
.push_back(getSrc(&insn
->src
[2], 0));
2598 if (opInfo
.num_srcs
>= 4) {
2599 unsigned components
= opInfo
.src_components
[3] ? opInfo
.src_components
[3] : insn
->num_components
;
2600 for (uint8_t i
= 0u; i
< components
; ++i
)
2601 srcs
.push_back(getSrc(&insn
->src
[3], i
));
2604 if (opInfo
.num_srcs
>= 5)
2605 // 1 for aotmic swap
2606 for (uint8_t i
= 0u; i
< opInfo
.src_components
[4]; ++i
)
2607 srcs
.push_back(getSrc(&insn
->src
[4], i
));
2609 TexInstruction
*texi
= mkTex(getOperation(op
), target
.getEnum(), location
, 0, defs
, srcs
);
2610 texi
->tex
.bindless
= false;
2611 texi
->tex
.format
= &nv50_ir::TexInstruction::formatTable
[convertGLImgFormat(tex
->data
.image
.format
)];
2612 texi
->tex
.mask
= mask
;
2613 texi
->cache
= getCacheModeFromVar(tex
);
2615 texi
->subOp
= getSubOp(op
);
2618 texi
->setIndirectR(indirect
);
2622 case nir_intrinsic_store_shared
: {
2623 DataType sType
= getSType(insn
->src
[0], false, false);
2624 Value
*indirectOffset
;
2625 uint32_t offset
= getIndirect(&insn
->src
[1], 0, indirectOffset
);
2627 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
) {
2628 if (!((1u << i
) & nir_intrinsic_write_mask(insn
)))
2630 Symbol
*sym
= mkSymbol(FILE_MEMORY_SHARED
, 0, sType
, offset
+ i
* typeSizeof(sType
));
2631 mkStore(OP_STORE
, sType
, sym
, indirectOffset
, getSrc(&insn
->src
[0], i
));
2635 case nir_intrinsic_load_shared
: {
2636 const DataType dType
= getDType(insn
);
2637 LValues
&newDefs
= convert(&insn
->dest
);
2638 Value
*indirectOffset
;
2639 uint32_t offset
= getIndirect(&insn
->src
[0], 0, indirectOffset
);
2641 for (uint8_t i
= 0u; i
< insn
->num_components
; ++i
)
2642 loadFrom(FILE_MEMORY_SHARED
, 0, dType
, newDefs
[i
], offset
, i
, indirectOffset
);
2646 case nir_intrinsic_control_barrier
: {
2647 // TODO: add flag to shader_info
2648 info
->numBarriers
= 1;
2649 Instruction
*bar
= mkOp2(OP_BAR
, TYPE_U32
, NULL
, mkImm(0), mkImm(0));
2651 bar
->subOp
= NV50_IR_SUBOP_BAR_SYNC
;
2654 case nir_intrinsic_group_memory_barrier
:
2655 case nir_intrinsic_memory_barrier
:
2656 case nir_intrinsic_memory_barrier_buffer
:
2657 case nir_intrinsic_memory_barrier_image
:
2658 case nir_intrinsic_memory_barrier_shared
: {
2659 Instruction
*bar
= mkOp(OP_MEMBAR
, TYPE_NONE
, NULL
);
2661 bar
->subOp
= getSubOp(op
);
2664 case nir_intrinsic_memory_barrier_tcs_patch
:
2666 case nir_intrinsic_shader_clock
: {
2667 const DataType dType
= getDType(insn
);
2668 LValues
&newDefs
= convert(&insn
->dest
);
2670 loadImm(newDefs
[0], 0u);
2671 mkOp1(OP_RDSV
, dType
, newDefs
[1], mkSysVal(SV_CLOCK
, 0))->fixed
= 1;
2674 case nir_intrinsic_load_global
: {
2675 const DataType dType
= getDType(insn
);
2676 LValues
&newDefs
= convert(&insn
->dest
);
2677 Value
*indirectOffset
;
2678 uint32_t offset
= getIndirect(&insn
->src
[0], 0, indirectOffset
);
2680 for (auto i
= 0u; i
< insn
->num_components
; ++i
)
2681 loadFrom(FILE_MEMORY_GLOBAL
, 0, dType
, newDefs
[i
], offset
, i
, indirectOffset
);
2683 info
->io
.globalAccess
|= 0x1;
2686 case nir_intrinsic_store_global
: {
2687 DataType sType
= getSType(insn
->src
[0], false, false);
2689 for (auto i
= 0u; i
< insn
->num_components
; ++i
) {
2690 if (!((1u << i
) & nir_intrinsic_write_mask(insn
)))
2692 if (typeSizeof(sType
) == 8) {
2694 mkSplit(split
, 4, getSrc(&insn
->src
[0], i
));
2696 Symbol
*sym
= mkSymbol(FILE_MEMORY_GLOBAL
, 0, TYPE_U32
, i
* typeSizeof(sType
));
2697 mkStore(OP_STORE
, TYPE_U32
, sym
, getSrc(&insn
->src
[1], 0), split
[0]);
2699 sym
= mkSymbol(FILE_MEMORY_GLOBAL
, 0, TYPE_U32
, i
* typeSizeof(sType
) + 4);
2700 mkStore(OP_STORE
, TYPE_U32
, sym
, getSrc(&insn
->src
[1], 0), split
[1]);
2702 Symbol
*sym
= mkSymbol(FILE_MEMORY_GLOBAL
, 0, sType
, i
* typeSizeof(sType
));
2703 mkStore(OP_STORE
, sType
, sym
, getSrc(&insn
->src
[1], 0), getSrc(&insn
->src
[0], i
));
2707 info
->io
.globalAccess
|= 0x2;
2711 ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos
[op
].name
);
2719 Converter::visit(nir_jump_instr
*insn
)
2721 switch (insn
->type
) {
2722 case nir_jump_return
:
2723 // TODO: this only works in the main function
2724 mkFlow(OP_BRA
, exit
, CC_ALWAYS
, NULL
);
2725 bb
->cfg
.attach(&exit
->cfg
, Graph::Edge::CROSS
);
2727 case nir_jump_break
:
2728 case nir_jump_continue
: {
2729 bool isBreak
= insn
->type
== nir_jump_break
;
2730 nir_block
*block
= insn
->instr
.block
;
2731 assert(!block
->successors
[1]);
2732 BasicBlock
*target
= convert(block
->successors
[0]);
2733 mkFlow(isBreak
? OP_BREAK
: OP_CONT
, target
, CC_ALWAYS
, NULL
);
2734 bb
->cfg
.attach(&target
->cfg
, isBreak
? Graph::Edge::CROSS
: Graph::Edge::BACK
);
2738 ERROR("unknown nir_jump_type %u\n", insn
->type
);
2746 Converter::convert(nir_load_const_instr
*insn
, uint8_t idx
)
2751 setPosition(immInsertPos
, true);
2753 setPosition(bb
, false);
2755 switch (insn
->def
.bit_size
) {
2757 val
= loadImm(getSSA(8), insn
->value
[idx
].u64
);
2760 val
= loadImm(getSSA(4), insn
->value
[idx
].u32
);
2763 val
= loadImm(getSSA(2), insn
->value
[idx
].u16
);
2766 val
= loadImm(getSSA(1), insn
->value
[idx
].u8
);
2769 unreachable("unhandled bit size!\n");
2771 setPosition(bb
, true);
2776 Converter::visit(nir_load_const_instr
*insn
)
2778 assert(insn
->def
.bit_size
<= 64);
2779 immediates
[insn
->def
.index
] = insn
;
2783 #define DEFAULT_CHECKS \
2784 if (insn->dest.dest.ssa.num_components > 1) { \
2785 ERROR("nir_alu_instr only supported with 1 component!\n"); \
2788 if (insn->dest.write_mask != 1) { \
2789 ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2793 Converter::visit(nir_alu_instr
*insn
)
2795 const nir_op op
= insn
->op
;
2796 const nir_op_info
&info
= nir_op_infos
[op
];
2797 DataType dType
= getDType(insn
);
2798 const std::vector
<DataType
> sTypes
= getSTypes(insn
);
2800 Instruction
*oldPos
= this->bb
->getExit();
2811 case nir_op_fddx_coarse
:
2812 case nir_op_fddx_fine
:
2814 case nir_op_fddy_coarse
:
2815 case nir_op_fddy_fine
:
2834 case nir_op_imul_high
:
2835 case nir_op_umul_high
:
2840 case nir_op_pack_64_2x32_split
:
2855 LValues
&newDefs
= convert(&insn
->dest
);
2856 operation preOp
= preOperationNeeded(op
);
2857 if (preOp
!= OP_NOP
) {
2858 assert(info
.num_inputs
< 2);
2859 Value
*tmp
= getSSA(typeSizeof(dType
));
2860 Instruction
*i0
= mkOp(preOp
, dType
, tmp
);
2861 Instruction
*i1
= mkOp(getOperation(op
), dType
, newDefs
[0]);
2862 if (info
.num_inputs
) {
2863 i0
->setSrc(0, getSrc(&insn
->src
[0]));
2866 i1
->subOp
= getSubOp(op
);
2868 Instruction
*i
= mkOp(getOperation(op
), dType
, newDefs
[0]);
2869 for (unsigned s
= 0u; s
< info
.num_inputs
; ++s
) {
2870 i
->setSrc(s
, getSrc(&insn
->src
[s
]));
2872 i
->subOp
= getSubOp(op
);
2876 case nir_op_ifind_msb
:
2877 case nir_op_ufind_msb
: {
2879 LValues
&newDefs
= convert(&insn
->dest
);
2881 mkOp1(getOperation(op
), dType
, newDefs
[0], getSrc(&insn
->src
[0]));
2884 case nir_op_fround_even
: {
2886 LValues
&newDefs
= convert(&insn
->dest
);
2887 mkCvt(OP_CVT
, dType
, newDefs
[0], dType
, getSrc(&insn
->src
[0]))->rnd
= ROUND_NI
;
2890 // convert instructions
2904 case nir_op_u2u64
: {
2906 LValues
&newDefs
= convert(&insn
->dest
);
2907 Instruction
*i
= mkOp1(getOperation(op
), dType
, newDefs
[0], getSrc(&insn
->src
[0]));
2908 if (op
== nir_op_f2i32
|| op
== nir_op_f2i64
|| op
== nir_op_f2u32
|| op
== nir_op_f2u64
)
2910 i
->sType
= sTypes
[0];
2913 // compare instructions
2923 case nir_op_ine32
: {
2925 LValues
&newDefs
= convert(&insn
->dest
);
2926 Instruction
*i
= mkCmp(getOperation(op
),
2931 getSrc(&insn
->src
[0]),
2932 getSrc(&insn
->src
[1]));
2933 if (info
.num_inputs
== 3)
2934 i
->setSrc(2, getSrc(&insn
->src
[2]));
2935 i
->sType
= sTypes
[0];
2938 // those are weird ALU ops and need special handling, because
2939 // 1. they are always componend based
2940 // 2. they basically just merge multiple values into one data type
2942 if (!insn
->dest
.dest
.is_ssa
&& insn
->dest
.dest
.reg
.reg
->num_array_elems
) {
2943 nir_reg_dest
& reg
= insn
->dest
.dest
.reg
;
2944 uint32_t goffset
= regToLmemOffset
[reg
.reg
->index
];
2945 uint8_t comps
= reg
.reg
->num_components
;
2946 uint8_t size
= reg
.reg
->bit_size
/ 8;
2947 uint8_t csize
= 4 * size
; // TODO after fixing MemoryOpts: comps * size;
2948 uint32_t aoffset
= csize
* reg
.base_offset
;
2949 Value
*indirect
= NULL
;
2952 indirect
= mkOp2v(OP_MUL
, TYPE_U32
, getSSA(4, FILE_ADDRESS
),
2953 getSrc(reg
.indirect
, 0), mkImm(csize
));
2955 for (uint8_t i
= 0u; i
< comps
; ++i
) {
2956 if (!((1u << i
) & insn
->dest
.write_mask
))
2959 Symbol
*sym
= mkSymbol(FILE_MEMORY_LOCAL
, 0, dType
, goffset
+ aoffset
+ i
* size
);
2960 mkStore(OP_STORE
, dType
, sym
, indirect
, getSrc(&insn
->src
[0], i
));
2963 } else if (!insn
->src
[0].src
.is_ssa
&& insn
->src
[0].src
.reg
.reg
->num_array_elems
) {
2964 LValues
&newDefs
= convert(&insn
->dest
);
2965 nir_reg_src
& reg
= insn
->src
[0].src
.reg
;
2966 uint32_t goffset
= regToLmemOffset
[reg
.reg
->index
];
2967 // uint8_t comps = reg.reg->num_components;
2968 uint8_t size
= reg
.reg
->bit_size
/ 8;
2969 uint8_t csize
= 4 * size
; // TODO after fixing MemoryOpts: comps * size;
2970 uint32_t aoffset
= csize
* reg
.base_offset
;
2971 Value
*indirect
= NULL
;
2974 indirect
= mkOp2v(OP_MUL
, TYPE_U32
, getSSA(4, FILE_ADDRESS
), getSrc(reg
.indirect
, 0), mkImm(csize
));
2976 for (uint8_t i
= 0u; i
< newDefs
.size(); ++i
)
2977 loadFrom(FILE_MEMORY_LOCAL
, 0, dType
, newDefs
[i
], goffset
+ aoffset
, i
, indirect
);
2981 LValues
&newDefs
= convert(&insn
->dest
);
2982 for (LValues::size_type c
= 0u; c
< newDefs
.size(); ++c
) {
2983 mkMov(newDefs
[c
], getSrc(&insn
->src
[0], c
), dType
);
2991 case nir_op_vec16
: {
2992 LValues
&newDefs
= convert(&insn
->dest
);
2993 for (LValues::size_type c
= 0u; c
< newDefs
.size(); ++c
) {
2994 mkMov(newDefs
[c
], getSrc(&insn
->src
[c
]), dType
);
2999 case nir_op_pack_64_2x32
: {
3000 LValues
&newDefs
= convert(&insn
->dest
);
3001 Instruction
*merge
= mkOp(OP_MERGE
, dType
, newDefs
[0]);
3002 merge
->setSrc(0, getSrc(&insn
->src
[0], 0));
3003 merge
->setSrc(1, getSrc(&insn
->src
[0], 1));
3006 case nir_op_pack_half_2x16_split
: {
3007 LValues
&newDefs
= convert(&insn
->dest
);
3008 Value
*tmpH
= getSSA();
3009 Value
*tmpL
= getSSA();
3011 mkCvt(OP_CVT
, TYPE_F16
, tmpL
, TYPE_F32
, getSrc(&insn
->src
[0]));
3012 mkCvt(OP_CVT
, TYPE_F16
, tmpH
, TYPE_F32
, getSrc(&insn
->src
[1]));
3013 mkOp3(OP_INSBF
, TYPE_U32
, newDefs
[0], tmpH
, mkImm(0x1010), tmpL
);
3016 case nir_op_unpack_half_2x16_split_x
:
3017 case nir_op_unpack_half_2x16_split_y
: {
3018 LValues
&newDefs
= convert(&insn
->dest
);
3019 Instruction
*cvt
= mkCvt(OP_CVT
, TYPE_F32
, newDefs
[0], TYPE_F16
, getSrc(&insn
->src
[0]));
3020 if (op
== nir_op_unpack_half_2x16_split_y
)
3024 case nir_op_unpack_64_2x32
: {
3025 LValues
&newDefs
= convert(&insn
->dest
);
3026 mkOp1(OP_SPLIT
, dType
, newDefs
[0], getSrc(&insn
->src
[0]))->setDef(1, newDefs
[1]);
3029 case nir_op_unpack_64_2x32_split_x
: {
3030 LValues
&newDefs
= convert(&insn
->dest
);
3031 mkOp1(OP_SPLIT
, dType
, newDefs
[0], getSrc(&insn
->src
[0]))->setDef(1, getSSA());
3034 case nir_op_unpack_64_2x32_split_y
: {
3035 LValues
&newDefs
= convert(&insn
->dest
);
3036 mkOp1(OP_SPLIT
, dType
, getSSA(), getSrc(&insn
->src
[0]))->setDef(1, newDefs
[0]);
3039 // special instructions
3041 case nir_op_isign
: {
3044 if (::isFloatType(dType
))
3049 LValues
&newDefs
= convert(&insn
->dest
);
3050 LValue
*val0
= getScratch();
3051 LValue
*val1
= getScratch();
3052 mkCmp(OP_SET
, CC_GT
, iType
, val0
, dType
, getSrc(&insn
->src
[0]), zero
);
3053 mkCmp(OP_SET
, CC_LT
, iType
, val1
, dType
, getSrc(&insn
->src
[0]), zero
);
3055 if (dType
== TYPE_F64
) {
3056 mkOp2(OP_SUB
, iType
, val0
, val0
, val1
);
3057 mkCvt(OP_CVT
, TYPE_F64
, newDefs
[0], iType
, val0
);
3058 } else if (dType
== TYPE_S64
|| dType
== TYPE_U64
) {
3059 mkOp2(OP_SUB
, iType
, val0
, val1
, val0
);
3060 mkOp2(OP_SHR
, iType
, val1
, val0
, loadImm(NULL
, 31));
3061 mkOp2(OP_MERGE
, dType
, newDefs
[0], val0
, val1
);
3062 } else if (::isFloatType(dType
))
3063 mkOp2(OP_SUB
, iType
, newDefs
[0], val0
, val1
);
3065 mkOp2(OP_SUB
, iType
, newDefs
[0], val1
, val0
);
3069 case nir_op_b32csel
: {
3071 LValues
&newDefs
= convert(&insn
->dest
);
3072 mkCmp(OP_SLCT
, CC_NE
, dType
, newDefs
[0], sTypes
[0], getSrc(&insn
->src
[1]), getSrc(&insn
->src
[2]), getSrc(&insn
->src
[0]));
3075 case nir_op_ibitfield_extract
:
3076 case nir_op_ubitfield_extract
: {
3078 Value
*tmp
= getSSA();
3079 LValues
&newDefs
= convert(&insn
->dest
);
3080 mkOp3(OP_INSBF
, dType
, tmp
, getSrc(&insn
->src
[2]), loadImm(NULL
, 0x808), getSrc(&insn
->src
[1]));
3081 mkOp2(OP_EXTBF
, dType
, newDefs
[0], getSrc(&insn
->src
[0]), tmp
);
3086 LValues
&newDefs
= convert(&insn
->dest
);
3087 mkOp3(OP_INSBF
, dType
, newDefs
[0], getSrc(&insn
->src
[0]), loadImm(NULL
, 0x808), getSrc(&insn
->src
[1]));
3090 case nir_op_bitfield_insert
: {
3092 LValues
&newDefs
= convert(&insn
->dest
);
3093 LValue
*temp
= getSSA();
3094 mkOp3(OP_INSBF
, TYPE_U32
, temp
, getSrc(&insn
->src
[3]), mkImm(0x808), getSrc(&insn
->src
[2]));
3095 mkOp3(OP_INSBF
, dType
, newDefs
[0], getSrc(&insn
->src
[1]), temp
, getSrc(&insn
->src
[0]));
3098 case nir_op_bit_count
: {
3100 LValues
&newDefs
= convert(&insn
->dest
);
3101 mkOp2(OP_POPCNT
, dType
, newDefs
[0], getSrc(&insn
->src
[0]), getSrc(&insn
->src
[0]));
3104 case nir_op_bitfield_reverse
: {
3106 LValues
&newDefs
= convert(&insn
->dest
);
3107 mkOp2(OP_EXTBF
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), mkImm(0x2000))->subOp
= NV50_IR_SUBOP_EXTBF_REV
;
3110 case nir_op_find_lsb
: {
3112 LValues
&newDefs
= convert(&insn
->dest
);
3113 Value
*tmp
= getSSA();
3114 mkOp2(OP_EXTBF
, TYPE_U32
, tmp
, getSrc(&insn
->src
[0]), mkImm(0x2000))->subOp
= NV50_IR_SUBOP_EXTBF_REV
;
3115 mkOp1(OP_BFIND
, TYPE_U32
, newDefs
[0], tmp
)->subOp
= NV50_IR_SUBOP_BFIND_SAMT
;
3118 // boolean conversions
3119 case nir_op_b2f32
: {
3121 LValues
&newDefs
= convert(&insn
->dest
);
3122 mkOp2(OP_AND
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), loadImm(NULL
, 1.0f
));
3125 case nir_op_b2f64
: {
3127 LValues
&newDefs
= convert(&insn
->dest
);
3128 Value
*tmp
= getSSA(4);
3129 mkOp2(OP_AND
, TYPE_U32
, tmp
, getSrc(&insn
->src
[0]), loadImm(NULL
, 0x3ff00000));
3130 mkOp2(OP_MERGE
, TYPE_U64
, newDefs
[0], loadImm(NULL
, 0), tmp
);
3134 case nir_op_i2b32
: {
3136 LValues
&newDefs
= convert(&insn
->dest
);
3138 if (typeSizeof(sTypes
[0]) == 8) {
3139 src1
= loadImm(getSSA(8), 0.0);
3143 CondCode cc
= op
== nir_op_f2b32
? CC_NEU
: CC_NE
;
3144 mkCmp(OP_SET
, cc
, TYPE_U32
, newDefs
[0], sTypes
[0], getSrc(&insn
->src
[0]), src1
);
3147 case nir_op_b2i32
: {
3149 LValues
&newDefs
= convert(&insn
->dest
);
3150 mkOp2(OP_AND
, TYPE_U32
, newDefs
[0], getSrc(&insn
->src
[0]), loadImm(NULL
, 1));
3153 case nir_op_b2i64
: {
3155 LValues
&newDefs
= convert(&insn
->dest
);
3156 LValue
*def
= getScratch();
3157 mkOp2(OP_AND
, TYPE_U32
, def
, getSrc(&insn
->src
[0]), loadImm(NULL
, 1));
3158 mkOp2(OP_MERGE
, TYPE_S64
, newDefs
[0], def
, loadImm(NULL
, 0));
3162 ERROR("unknown nir_op %s\n", info
.name
);
3167 oldPos
= this->bb
->getEntry();
3168 oldPos
->precise
= insn
->exact
;
3171 if (unlikely(!oldPos
))
3174 while (oldPos
->next
) {
3175 oldPos
= oldPos
->next
;
3176 oldPos
->precise
= insn
->exact
;
3178 oldPos
->saturate
= insn
->dest
.saturate
;
3182 #undef DEFAULT_CHECKS
3185 Converter::visit(nir_ssa_undef_instr
*insn
)
3187 LValues
&newDefs
= convert(&insn
->def
);
3188 for (uint8_t i
= 0u; i
< insn
->def
.num_components
; ++i
) {
3189 mkOp(OP_NOP
, TYPE_NONE
, newDefs
[i
]);
3194 #define CASE_SAMPLER(ty) \
3195 case GLSL_SAMPLER_DIM_ ## ty : \
3196 if (isArray && !isShadow) \
3197 return TEX_TARGET_ ## ty ## _ARRAY; \
3198 else if (!isArray && isShadow) \
3199 return TEX_TARGET_## ty ## _SHADOW; \
3200 else if (isArray && isShadow) \
3201 return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
3203 return TEX_TARGET_ ## ty
3206 Converter::convert(glsl_sampler_dim dim
, bool isArray
, bool isShadow
)
3212 case GLSL_SAMPLER_DIM_3D
:
3213 return TEX_TARGET_3D
;
3214 case GLSL_SAMPLER_DIM_MS
:
3216 return TEX_TARGET_2D_MS_ARRAY
;
3217 return TEX_TARGET_2D_MS
;
3218 case GLSL_SAMPLER_DIM_RECT
:
3220 return TEX_TARGET_RECT_SHADOW
;
3221 return TEX_TARGET_RECT
;
3222 case GLSL_SAMPLER_DIM_BUF
:
3223 return TEX_TARGET_BUFFER
;
3224 case GLSL_SAMPLER_DIM_EXTERNAL
:
3225 return TEX_TARGET_2D
;
3227 ERROR("unknown glsl_sampler_dim %u\n", dim
);
3229 return TEX_TARGET_COUNT
;
3235 Converter::applyProjection(Value
*src
, Value
*proj
)
3239 return mkOp2v(OP_MUL
, TYPE_F32
, getScratch(), src
, proj
);
3243 Converter::getNIRArgCount(TexInstruction::Target
& target
)
3245 unsigned int result
= target
.getArgCount();
3246 if (target
.isCube() && target
.isArray())
3254 Converter::handleDeref(nir_deref_instr
*deref
, Value
* &indirect
, const nir_variable
* &tex
)
3256 typedef std::pair
<uint32_t,Value
*> DerefPair
;
3257 std::list
<DerefPair
> derefs
;
3259 uint16_t result
= 0;
3260 while (deref
->deref_type
!= nir_deref_type_var
) {
3261 switch (deref
->deref_type
) {
3262 case nir_deref_type_array
: {
3264 uint8_t size
= type_size(deref
->type
, true);
3265 result
+= size
* getIndirect(&deref
->arr
.index
, 0, indirect
);
3268 derefs
.push_front(std::make_pair(size
, indirect
));
3273 case nir_deref_type_struct
: {
3274 result
+= nir_deref_instr_parent(deref
)->type
->struct_location_offset(deref
->strct
.index
);
3277 case nir_deref_type_var
:
3279 unreachable("nir_deref_type_var reached in handleDeref!");
3282 deref
= nir_deref_instr_parent(deref
);
3286 for (std::list
<DerefPair
>::const_iterator it
= derefs
.begin(); it
!= derefs
.end(); ++it
) {
3287 Value
*offset
= mkOp2v(OP_MUL
, TYPE_U32
, getSSA(), loadImm(getSSA(), it
->first
), it
->second
);
3289 indirect
= mkOp2v(OP_ADD
, TYPE_U32
, getSSA(), indirect
, offset
);
3294 tex
= nir_deref_instr_get_variable(deref
);
3297 return result
+ tex
->data
.driver_location
;
3301 Converter::convert(enum gl_access_qualifier access
)
3304 case ACCESS_VOLATILE
:
3306 case ACCESS_COHERENT
:
3314 Converter::getCacheModeFromVar(const nir_variable
*var
)
3316 return convert(var
->data
.access
);
3320 Converter::visit(nir_tex_instr
*insn
)
3324 case nir_texop_query_levels
:
3326 case nir_texop_texture_samples
:
3331 case nir_texop_txf_ms
:
3333 case nir_texop_txs
: {
3334 LValues
&newDefs
= convert(&insn
->dest
);
3335 std::vector
<Value
*> srcs
;
3336 std::vector
<Value
*> defs
;
3337 std::vector
<nir_src
*> offsets
;
3341 TexInstruction::Target target
= convert(insn
->sampler_dim
, insn
->is_array
, insn
->is_shadow
);
3342 operation op
= getOperation(insn
->op
);
3345 int biasIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_bias
);
3346 int compIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_comparator
);
3347 int coordsIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_coord
);
3348 int ddxIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_ddx
);
3349 int ddyIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_ddy
);
3350 int msIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_ms_index
);
3351 int lodIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_lod
);
3352 int offsetIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_offset
);
3353 int projIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_projector
);
3354 int sampOffIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_sampler_offset
);
3355 int texOffIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_texture_offset
);
3356 int sampHandleIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_sampler_handle
);
3357 int texHandleIdx
= nir_tex_instr_src_index(insn
, nir_tex_src_texture_handle
);
3359 bool bindless
= sampHandleIdx
!= -1 || texHandleIdx
!= -1;
3360 assert((sampHandleIdx
!= -1) == (texHandleIdx
!= -1));
3363 proj
= mkOp1v(OP_RCP
, TYPE_F32
, getScratch(), getSrc(&insn
->src
[projIdx
].src
, 0));
3365 srcs
.resize(insn
->coord_components
);
3366 for (uint8_t i
= 0u; i
< insn
->coord_components
; ++i
)
3367 srcs
[i
] = applyProjection(getSrc(&insn
->src
[coordsIdx
].src
, i
), proj
);
3369 // sometimes we get less args than target.getArgCount, but codegen expects the latter
3370 if (insn
->coord_components
) {
3371 uint32_t argCount
= target
.getArgCount();
3376 for (uint32_t i
= 0u; i
< (argCount
- insn
->coord_components
); ++i
)
3377 srcs
.push_back(getSSA());
3380 if (insn
->op
== nir_texop_texture_samples
)
3381 srcs
.push_back(zero
);
3382 else if (!insn
->num_srcs
)
3383 srcs
.push_back(loadImm(NULL
, 0));
3385 srcs
.push_back(getSrc(&insn
->src
[biasIdx
].src
, 0));
3387 srcs
.push_back(getSrc(&insn
->src
[lodIdx
].src
, 0));
3388 else if (op
== OP_TXF
)
3391 srcs
.push_back(getSrc(&insn
->src
[msIdx
].src
, 0));
3392 if (offsetIdx
!= -1)
3393 offsets
.push_back(&insn
->src
[offsetIdx
].src
);
3395 srcs
.push_back(applyProjection(getSrc(&insn
->src
[compIdx
].src
, 0), proj
));
3396 if (texOffIdx
!= -1) {
3397 srcs
.push_back(getSrc(&insn
->src
[texOffIdx
].src
, 0));
3398 texOffIdx
= srcs
.size() - 1;
3400 if (sampOffIdx
!= -1) {
3401 srcs
.push_back(getSrc(&insn
->src
[sampOffIdx
].src
, 0));
3402 sampOffIdx
= srcs
.size() - 1;
3405 // currently we use the lower bits
3407 Value
*handle
= getSrc(&insn
->src
[sampHandleIdx
].src
, 0);
3409 mkSplit(split
, 4, handle
);
3411 srcs
.push_back(split
[0]);
3412 texOffIdx
= srcs
.size() - 1;
3415 r
= bindless
? 0xff : insn
->texture_index
;
3416 s
= bindless
? 0x1f : insn
->sampler_index
;
3418 defs
.resize(newDefs
.size());
3419 for (uint8_t d
= 0u; d
< newDefs
.size(); ++d
) {
3420 defs
[d
] = newDefs
[d
];
3423 if (target
.isMS() || (op
== OP_TEX
&& prog
->getType() != Program::TYPE_FRAGMENT
))
3426 TexInstruction
*texi
= mkTex(op
, target
.getEnum(), r
, s
, defs
, srcs
);
3427 texi
->tex
.levelZero
= lz
;
3428 texi
->tex
.mask
= mask
;
3429 texi
->tex
.bindless
= bindless
;
3431 if (texOffIdx
!= -1)
3432 texi
->tex
.rIndirectSrc
= texOffIdx
;
3433 if (sampOffIdx
!= -1)
3434 texi
->tex
.sIndirectSrc
= sampOffIdx
;
3438 if (!target
.isShadow())
3439 texi
->tex
.gatherComp
= insn
->component
;
3442 texi
->tex
.query
= TXQ_DIMS
;
3444 case nir_texop_texture_samples
:
3445 texi
->tex
.mask
= 0x4;
3446 texi
->tex
.query
= TXQ_TYPE
;
3448 case nir_texop_query_levels
:
3449 texi
->tex
.mask
= 0x8;
3450 texi
->tex
.query
= TXQ_DIMS
;
3456 texi
->tex
.useOffsets
= offsets
.size();
3457 if (texi
->tex
.useOffsets
) {
3458 for (uint8_t s
= 0; s
< texi
->tex
.useOffsets
; ++s
) {
3459 for (uint32_t c
= 0u; c
< 3; ++c
) {
3460 uint8_t s2
= std::min(c
, target
.getDim() - 1);
3461 texi
->offset
[s
][c
].set(getSrc(offsets
[s
], s2
));
3462 texi
->offset
[s
][c
].setInsn(texi
);
3467 if (op
== OP_TXG
&& offsetIdx
== -1) {
3468 if (nir_tex_instr_has_explicit_tg4_offsets(insn
)) {
3469 texi
->tex
.useOffsets
= 4;
3470 setPosition(texi
, false);
3471 for (uint8_t i
= 0; i
< 4; ++i
) {
3472 for (uint8_t j
= 0; j
< 2; ++j
) {
3473 texi
->offset
[i
][j
].set(loadImm(NULL
, insn
->tg4_offsets
[i
][j
]));
3474 texi
->offset
[i
][j
].setInsn(texi
);
3477 setPosition(texi
, true);
3481 if (ddxIdx
!= -1 && ddyIdx
!= -1) {
3482 for (uint8_t c
= 0u; c
< target
.getDim() + target
.isCube(); ++c
) {
3483 texi
->dPdx
[c
].set(getSrc(&insn
->src
[ddxIdx
].src
, c
));
3484 texi
->dPdy
[c
].set(getSrc(&insn
->src
[ddyIdx
].src
, c
));
3491 ERROR("unknown nir_texop %u\n", insn
->op
);
3498 Converter::visit(nir_deref_instr
*deref
)
3500 // we just ignore those, because images intrinsics are the only place where
3501 // we should end up with deref sources and those have to backtrack anyway
3502 // to get the nir_variable. This code just exists to handle some special
3504 switch (deref
->deref_type
) {
3505 case nir_deref_type_array
:
3506 case nir_deref_type_struct
:
3507 case nir_deref_type_var
:
3510 ERROR("unknown nir_deref_instr %u\n", deref
->deref_type
);
3521 if (prog
->dbgFlags
& NV50_IR_DEBUG_VERBOSE
)
3522 nir_print_shader(nir
, stderr
);
3524 struct nir_lower_subgroups_options subgroup_options
= {
3525 .subgroup_size
= 32,
3526 .ballot_bit_size
= 32,
3529 NIR_PASS_V(nir
, nir_lower_io
, nir_var_all
, type_size
, (nir_lower_io_options
)0);
3530 NIR_PASS_V(nir
, nir_lower_subgroups
, &subgroup_options
);
3531 NIR_PASS_V(nir
, nir_lower_regs_to_ssa
);
3532 NIR_PASS_V(nir
, nir_lower_load_const_to_scalar
);
3533 NIR_PASS_V(nir
, nir_lower_vars_to_ssa
);
3534 NIR_PASS_V(nir
, nir_lower_alu_to_scalar
, NULL
, NULL
);
3535 NIR_PASS_V(nir
, nir_lower_phis_to_scalar
);
3539 NIR_PASS(progress
, nir
, nir_copy_prop
);
3540 NIR_PASS(progress
, nir
, nir_opt_remove_phis
);
3541 NIR_PASS(progress
, nir
, nir_opt_trivial_continues
);
3542 NIR_PASS(progress
, nir
, nir_opt_cse
);
3543 NIR_PASS(progress
, nir
, nir_opt_algebraic
);
3544 NIR_PASS(progress
, nir
, nir_opt_constant_folding
);
3545 NIR_PASS(progress
, nir
, nir_copy_prop
);
3546 NIR_PASS(progress
, nir
, nir_opt_dce
);
3547 NIR_PASS(progress
, nir
, nir_opt_dead_cf
);
3550 NIR_PASS_V(nir
, nir_lower_bool_to_int32
);
3551 NIR_PASS_V(nir
, nir_lower_locals_to_regs
);
3552 NIR_PASS_V(nir
, nir_remove_dead_variables
, nir_var_function_temp
);
3553 NIR_PASS_V(nir
, nir_convert_from_ssa
, true);
3555 // Garbage collect dead instructions
3559 ERROR("Couldn't prase NIR!\n");
3563 if (!assignSlots()) {
3564 ERROR("Couldn't assign slots!\n");
3568 if (prog
->dbgFlags
& NV50_IR_DEBUG_BASIC
)
3569 nir_print_shader(nir
, stderr
);
3571 nir_foreach_function(function
, nir
) {
3572 if (!visit(function
))
3579 } // unnamed namespace
3584 Program::makeFromNIR(struct nv50_ir_prog_info
*info
)
3586 nir_shader
*nir
= (nir_shader
*)info
->bin
.source
;
3587 Converter
converter(this, nir
, info
);
3588 bool result
= converter
.run();
3591 LoweringHelper lowering
;
3593 tlsSize
= info
->bin
.tlsSpace
;
3597 } // namespace nv50_ir