2 * Copyright 2017 Red Hat Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Karol Herbst <kherbst@redhat.com>
25 #include "compiler/nir/nir.h"
27 #include "util/u_debug.h"
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_from_common.h"
31 #include "codegen/nv50_ir_lowering_helper.h"
32 #include "codegen/nv50_ir_util.h"
34 #if __cplusplus >= 201103L
35 #include <unordered_map>
37 #include <tr1/unordered_map>
43 #if __cplusplus >= 201103L
45 using std::unordered_map
;
48 using std::tr1::unordered_map
;
51 using namespace nv50_ir
;
54 type_size(const struct glsl_type
*type
)
56 return glsl_count_attribute_slots(type
, false);
59 class Converter
: public ConverterCommon
62 Converter(Program
*, nir_shader
*, nv50_ir_prog_info
*);
66 typedef std::vector
<LValue
*> LValues
;
67 typedef unordered_map
<unsigned, LValues
> NirDefMap
;
69 LValues
& convert(nir_alu_dest
*);
70 LValues
& convert(nir_dest
*);
71 LValues
& convert(nir_register
*);
72 LValues
& convert(nir_ssa_def
*);
74 Value
* getSrc(nir_alu_src
*, uint8_t component
= 0);
75 Value
* getSrc(nir_register
*, uint8_t);
76 Value
* getSrc(nir_src
*, uint8_t, bool indirect
= false);
77 Value
* getSrc(nir_ssa_def
*, uint8_t);
79 // returned value is the constant part of the given source (either the
80 // nir_src or the selected source component of an intrinsic). Even though
81 // this is mostly an optimization to be able to skip indirects in a few
82 // cases, sometimes we require immediate values or set some fileds on
83 // instructions (e.g. tex) in order for codegen to consume those.
84 // If the found value has not a constant part, the Value gets returned
85 // through the Value parameter.
86 uint32_t getIndirect(nir_src
*, uint8_t, Value
*&);
87 uint32_t getIndirect(nir_intrinsic_instr
*, uint8_t s
, uint8_t c
, Value
*&);
89 uint32_t getSlotAddress(nir_intrinsic_instr
*, uint8_t idx
, uint8_t slot
);
91 void setInterpolate(nv50_ir_varying
*,
96 bool isFloatType(nir_alu_type
);
97 bool isSignedType(nir_alu_type
);
98 bool isResultFloat(nir_op
);
99 bool isResultSigned(nir_op
);
101 DataType
getDType(nir_alu_instr
*);
102 DataType
getDType(nir_intrinsic_instr
*);
103 DataType
getDType(nir_op
, uint8_t);
105 std::vector
<DataType
> getSTypes(nir_alu_instr
*);
106 DataType
getSType(nir_src
&, bool isFloat
, bool isSigned
);
116 Converter::Converter(Program
*prog
, nir_shader
*nir
, nv50_ir_prog_info
*info
)
117 : ConverterCommon(prog
, info
),
121 Converter::isFloatType(nir_alu_type type
)
123 return nir_alu_type_get_base_type(type
) == nir_type_float
;
127 Converter::isSignedType(nir_alu_type type
)
129 return nir_alu_type_get_base_type(type
) == nir_type_int
;
133 Converter::isResultFloat(nir_op op
)
135 const nir_op_info
&info
= nir_op_infos
[op
];
136 if (info
.output_type
!= nir_type_invalid
)
137 return isFloatType(info
.output_type
);
139 ERROR("isResultFloat not implemented for %s\n", nir_op_infos
[op
].name
);
145 Converter::isResultSigned(nir_op op
)
148 // there is no umul and we get wrong results if we treat all muls as signed
153 const nir_op_info
&info
= nir_op_infos
[op
];
154 if (info
.output_type
!= nir_type_invalid
)
155 return isSignedType(info
.output_type
);
156 ERROR("isResultSigned not implemented for %s\n", nir_op_infos
[op
].name
);
163 Converter::getDType(nir_alu_instr
*insn
)
165 if (insn
->dest
.dest
.is_ssa
)
166 return getDType(insn
->op
, insn
->dest
.dest
.ssa
.bit_size
);
168 return getDType(insn
->op
, insn
->dest
.dest
.reg
.reg
->bit_size
);
172 Converter::getDType(nir_intrinsic_instr
*insn
)
174 if (insn
->dest
.is_ssa
)
175 return typeOfSize(insn
->dest
.ssa
.bit_size
/ 8, false, false);
177 return typeOfSize(insn
->dest
.reg
.reg
->bit_size
/ 8, false, false);
181 Converter::getDType(nir_op op
, uint8_t bitSize
)
183 DataType ty
= typeOfSize(bitSize
/ 8, isResultFloat(op
), isResultSigned(op
));
184 if (ty
== TYPE_NONE
) {
185 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos
[op
].name
, bitSize
);
191 std::vector
<DataType
>
192 Converter::getSTypes(nir_alu_instr
*insn
)
194 const nir_op_info
&info
= nir_op_infos
[insn
->op
];
195 std::vector
<DataType
> res(info
.num_inputs
);
197 for (uint8_t i
= 0; i
< info
.num_inputs
; ++i
) {
198 if (info
.input_types
[i
] != nir_type_invalid
) {
199 res
[i
] = getSType(insn
->src
[i
].src
, isFloatType(info
.input_types
[i
]), isSignedType(info
.input_types
[i
]));
201 ERROR("getSType not implemented for %s idx %u\n", info
.name
, i
);
212 Converter::getSType(nir_src
&src
, bool isFloat
, bool isSigned
)
216 bitSize
= src
.ssa
->bit_size
;
218 bitSize
= src
.reg
.reg
->bit_size
;
220 DataType ty
= typeOfSize(bitSize
/ 8, isFloat
, isSigned
);
221 if (ty
== TYPE_NONE
) {
229 ERROR("couldn't get Type for %s with bitSize %u\n", str
, bitSize
);
236 Converter::convert(nir_dest
*dest
)
239 return convert(&dest
->ssa
);
240 if (dest
->reg
.indirect
) {
241 ERROR("no support for indirects.");
244 return convert(dest
->reg
.reg
);
248 Converter::convert(nir_register
*reg
)
250 NirDefMap::iterator it
= regDefs
.find(reg
->index
);
251 if (it
!= regDefs
.end())
254 LValues
newDef(reg
->num_components
);
255 for (uint8_t i
= 0; i
< reg
->num_components
; i
++)
256 newDef
[i
] = getScratch(std::max(4, reg
->bit_size
/ 8));
257 return regDefs
[reg
->index
] = newDef
;
261 Converter::convert(nir_ssa_def
*def
)
263 NirDefMap::iterator it
= ssaDefs
.find(def
->index
);
264 if (it
!= ssaDefs
.end())
267 LValues
newDef(def
->num_components
);
268 for (uint8_t i
= 0; i
< def
->num_components
; i
++)
269 newDef
[i
] = getSSA(std::max(4, def
->bit_size
/ 8));
270 return ssaDefs
[def
->index
] = newDef
;
274 Converter::getSrc(nir_alu_src
*src
, uint8_t component
)
276 if (src
->abs
|| src
->negate
) {
277 ERROR("modifiers currently not supported on nir_alu_src\n");
280 return getSrc(&src
->src
, src
->swizzle
[component
]);
284 Converter::getSrc(nir_register
*reg
, uint8_t idx
)
286 NirDefMap::iterator it
= regDefs
.find(reg
->index
);
287 if (it
== regDefs
.end())
288 return convert(reg
)[idx
];
289 return it
->second
[idx
];
293 Converter::getSrc(nir_src
*src
, uint8_t idx
, bool indirect
)
296 return getSrc(src
->ssa
, idx
);
298 if (src
->reg
.indirect
) {
300 return getSrc(src
->reg
.indirect
, idx
);
301 ERROR("no support for indirects.");
306 return getSrc(src
->reg
.reg
, idx
);
310 Converter::getSrc(nir_ssa_def
*src
, uint8_t idx
)
312 NirDefMap::iterator it
= ssaDefs
.find(src
->index
);
313 if (it
== ssaDefs
.end()) {
314 ERROR("SSA value %u not found\n", src
->index
);
318 return it
->second
[idx
];
322 Converter::getIndirect(nir_src
*src
, uint8_t idx
, Value
*&indirect
)
324 nir_const_value
*offset
= nir_src_as_const_value(*src
);
328 return offset
->u32
[0];
331 indirect
= getSrc(src
, idx
, true);
336 Converter::getIndirect(nir_intrinsic_instr
*insn
, uint8_t s
, uint8_t c
, Value
*&indirect
)
338 int32_t idx
= nir_intrinsic_base(insn
) + getIndirect(&insn
->src
[s
], c
, indirect
);
340 indirect
= mkOp2v(OP_SHL
, TYPE_U32
, getSSA(4, FILE_ADDRESS
), indirect
, loadImm(NULL
, 4));
345 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot
, unsigned *name
, unsigned *index
)
347 assert(name
&& index
);
349 if (slot
>= VERT_ATTRIB_MAX
) {
350 ERROR("invalid varying slot %u\n", slot
);
355 if (slot
>= VERT_ATTRIB_GENERIC0
&&
356 slot
< VERT_ATTRIB_GENERIC0
+ VERT_ATTRIB_GENERIC_MAX
) {
357 *name
= TGSI_SEMANTIC_GENERIC
;
358 *index
= slot
- VERT_ATTRIB_GENERIC0
;
362 if (slot
>= VERT_ATTRIB_TEX0
&&
363 slot
< VERT_ATTRIB_TEX0
+ VERT_ATTRIB_TEX_MAX
) {
364 *name
= TGSI_SEMANTIC_TEXCOORD
;
365 *index
= slot
- VERT_ATTRIB_TEX0
;
370 case VERT_ATTRIB_COLOR0
:
371 *name
= TGSI_SEMANTIC_COLOR
;
374 case VERT_ATTRIB_COLOR1
:
375 *name
= TGSI_SEMANTIC_COLOR
;
378 case VERT_ATTRIB_EDGEFLAG
:
379 *name
= TGSI_SEMANTIC_EDGEFLAG
;
382 case VERT_ATTRIB_FOG
:
383 *name
= TGSI_SEMANTIC_FOG
;
386 case VERT_ATTRIB_NORMAL
:
387 *name
= TGSI_SEMANTIC_NORMAL
;
390 case VERT_ATTRIB_POS
:
391 *name
= TGSI_SEMANTIC_POSITION
;
394 case VERT_ATTRIB_POINT_SIZE
:
395 *name
= TGSI_SEMANTIC_PSIZE
;
399 ERROR("unknown vert attrib slot %u\n", slot
);
406 varying_slot_to_tgsi_semantic(gl_varying_slot slot
, unsigned *name
, unsigned *index
)
408 assert(name
&& index
);
410 if (slot
>= VARYING_SLOT_TESS_MAX
) {
411 ERROR("invalid varying slot %u\n", slot
);
416 if (slot
>= VARYING_SLOT_PATCH0
) {
417 *name
= TGSI_SEMANTIC_PATCH
;
418 *index
= slot
- VARYING_SLOT_PATCH0
;
422 if (slot
>= VARYING_SLOT_VAR0
) {
423 *name
= TGSI_SEMANTIC_GENERIC
;
424 *index
= slot
- VARYING_SLOT_VAR0
;
428 if (slot
>= VARYING_SLOT_TEX0
&& slot
<= VARYING_SLOT_TEX7
) {
429 *name
= TGSI_SEMANTIC_TEXCOORD
;
430 *index
= slot
- VARYING_SLOT_TEX0
;
435 case VARYING_SLOT_BFC0
:
436 *name
= TGSI_SEMANTIC_BCOLOR
;
439 case VARYING_SLOT_BFC1
:
440 *name
= TGSI_SEMANTIC_BCOLOR
;
443 case VARYING_SLOT_CLIP_DIST0
:
444 *name
= TGSI_SEMANTIC_CLIPDIST
;
447 case VARYING_SLOT_CLIP_DIST1
:
448 *name
= TGSI_SEMANTIC_CLIPDIST
;
451 case VARYING_SLOT_CLIP_VERTEX
:
452 *name
= TGSI_SEMANTIC_CLIPVERTEX
;
455 case VARYING_SLOT_COL0
:
456 *name
= TGSI_SEMANTIC_COLOR
;
459 case VARYING_SLOT_COL1
:
460 *name
= TGSI_SEMANTIC_COLOR
;
463 case VARYING_SLOT_EDGE
:
464 *name
= TGSI_SEMANTIC_EDGEFLAG
;
467 case VARYING_SLOT_FACE
:
468 *name
= TGSI_SEMANTIC_FACE
;
471 case VARYING_SLOT_FOGC
:
472 *name
= TGSI_SEMANTIC_FOG
;
475 case VARYING_SLOT_LAYER
:
476 *name
= TGSI_SEMANTIC_LAYER
;
479 case VARYING_SLOT_PNTC
:
480 *name
= TGSI_SEMANTIC_PCOORD
;
483 case VARYING_SLOT_POS
:
484 *name
= TGSI_SEMANTIC_POSITION
;
487 case VARYING_SLOT_PRIMITIVE_ID
:
488 *name
= TGSI_SEMANTIC_PRIMID
;
491 case VARYING_SLOT_PSIZ
:
492 *name
= TGSI_SEMANTIC_PSIZE
;
495 case VARYING_SLOT_TESS_LEVEL_INNER
:
496 *name
= TGSI_SEMANTIC_TESSINNER
;
499 case VARYING_SLOT_TESS_LEVEL_OUTER
:
500 *name
= TGSI_SEMANTIC_TESSOUTER
;
503 case VARYING_SLOT_VIEWPORT
:
504 *name
= TGSI_SEMANTIC_VIEWPORT_INDEX
;
508 ERROR("unknown varying slot %u\n", slot
);
515 frag_result_to_tgsi_semantic(unsigned slot
, unsigned *name
, unsigned *index
)
517 if (slot
>= FRAG_RESULT_DATA0
) {
518 *name
= TGSI_SEMANTIC_COLOR
;
519 *index
= slot
- FRAG_RESULT_COLOR
- 2; // intentional
524 case FRAG_RESULT_COLOR
:
525 *name
= TGSI_SEMANTIC_COLOR
;
528 case FRAG_RESULT_DEPTH
:
529 *name
= TGSI_SEMANTIC_POSITION
;
532 case FRAG_RESULT_SAMPLE_MASK
:
533 *name
= TGSI_SEMANTIC_SAMPLEMASK
;
537 ERROR("unknown frag result slot %u\n", slot
);
543 // copy of _mesa_sysval_to_semantic
545 system_val_to_tgsi_semantic(unsigned val
, unsigned *name
, unsigned *index
)
550 case SYSTEM_VALUE_VERTEX_ID
:
551 *name
= TGSI_SEMANTIC_VERTEXID
;
553 case SYSTEM_VALUE_INSTANCE_ID
:
554 *name
= TGSI_SEMANTIC_INSTANCEID
;
556 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE
:
557 *name
= TGSI_SEMANTIC_VERTEXID_NOBASE
;
559 case SYSTEM_VALUE_BASE_VERTEX
:
560 *name
= TGSI_SEMANTIC_BASEVERTEX
;
562 case SYSTEM_VALUE_BASE_INSTANCE
:
563 *name
= TGSI_SEMANTIC_BASEINSTANCE
;
565 case SYSTEM_VALUE_DRAW_ID
:
566 *name
= TGSI_SEMANTIC_DRAWID
;
570 case SYSTEM_VALUE_INVOCATION_ID
:
571 *name
= TGSI_SEMANTIC_INVOCATIONID
;
575 case SYSTEM_VALUE_FRAG_COORD
:
576 *name
= TGSI_SEMANTIC_POSITION
;
578 case SYSTEM_VALUE_FRONT_FACE
:
579 *name
= TGSI_SEMANTIC_FACE
;
581 case SYSTEM_VALUE_SAMPLE_ID
:
582 *name
= TGSI_SEMANTIC_SAMPLEID
;
584 case SYSTEM_VALUE_SAMPLE_POS
:
585 *name
= TGSI_SEMANTIC_SAMPLEPOS
;
587 case SYSTEM_VALUE_SAMPLE_MASK_IN
:
588 *name
= TGSI_SEMANTIC_SAMPLEMASK
;
590 case SYSTEM_VALUE_HELPER_INVOCATION
:
591 *name
= TGSI_SEMANTIC_HELPER_INVOCATION
;
594 // Tessellation shader
595 case SYSTEM_VALUE_TESS_COORD
:
596 *name
= TGSI_SEMANTIC_TESSCOORD
;
598 case SYSTEM_VALUE_VERTICES_IN
:
599 *name
= TGSI_SEMANTIC_VERTICESIN
;
601 case SYSTEM_VALUE_PRIMITIVE_ID
:
602 *name
= TGSI_SEMANTIC_PRIMID
;
604 case SYSTEM_VALUE_TESS_LEVEL_OUTER
:
605 *name
= TGSI_SEMANTIC_TESSOUTER
;
607 case SYSTEM_VALUE_TESS_LEVEL_INNER
:
608 *name
= TGSI_SEMANTIC_TESSINNER
;
612 case SYSTEM_VALUE_LOCAL_INVOCATION_ID
:
613 *name
= TGSI_SEMANTIC_THREAD_ID
;
615 case SYSTEM_VALUE_WORK_GROUP_ID
:
616 *name
= TGSI_SEMANTIC_BLOCK_ID
;
618 case SYSTEM_VALUE_NUM_WORK_GROUPS
:
619 *name
= TGSI_SEMANTIC_GRID_SIZE
;
621 case SYSTEM_VALUE_LOCAL_GROUP_SIZE
:
622 *name
= TGSI_SEMANTIC_BLOCK_SIZE
;
626 case SYSTEM_VALUE_SUBGROUP_SIZE
:
627 *name
= TGSI_SEMANTIC_SUBGROUP_SIZE
;
629 case SYSTEM_VALUE_SUBGROUP_INVOCATION
:
630 *name
= TGSI_SEMANTIC_SUBGROUP_INVOCATION
;
632 case SYSTEM_VALUE_SUBGROUP_EQ_MASK
:
633 *name
= TGSI_SEMANTIC_SUBGROUP_EQ_MASK
;
635 case SYSTEM_VALUE_SUBGROUP_GE_MASK
:
636 *name
= TGSI_SEMANTIC_SUBGROUP_GE_MASK
;
638 case SYSTEM_VALUE_SUBGROUP_GT_MASK
:
639 *name
= TGSI_SEMANTIC_SUBGROUP_GT_MASK
;
641 case SYSTEM_VALUE_SUBGROUP_LE_MASK
:
642 *name
= TGSI_SEMANTIC_SUBGROUP_LE_MASK
;
644 case SYSTEM_VALUE_SUBGROUP_LT_MASK
:
645 *name
= TGSI_SEMANTIC_SUBGROUP_LT_MASK
;
649 ERROR("unknown system value %u\n", val
);
656 Converter::setInterpolate(nv50_ir_varying
*var
,
662 case INTERP_MODE_FLAT
:
665 case INTERP_MODE_NONE
:
666 if (semantic
== TGSI_SEMANTIC_COLOR
)
668 else if (semantic
== TGSI_SEMANTIC_POSITION
)
671 case INTERP_MODE_NOPERSPECTIVE
:
674 case INTERP_MODE_SMOOTH
:
677 var
->centroid
= centroid
;
681 calcSlots(const glsl_type
*type
, Program::Type stage
, const shader_info
&info
,
682 bool input
, const nir_variable
*var
)
684 if (!type
->is_array())
685 return type
->count_attribute_slots(false);
689 case Program::TYPE_GEOMETRY
:
690 slots
= type
->uniform_locations();
692 slots
/= info
.gs
.vertices_in
;
694 case Program::TYPE_TESSELLATION_CONTROL
:
695 case Program::TYPE_TESSELLATION_EVAL
:
696 // remove first dimension
697 if (var
->data
.patch
|| (!input
&& stage
== Program::TYPE_TESSELLATION_EVAL
))
698 slots
= type
->uniform_locations();
700 slots
= type
->fields
.array
->uniform_locations();
703 slots
= type
->count_attribute_slots(false);
710 bool Converter::assignSlots() {
714 info
->io
.viewportId
= -1;
717 // we have to fixup the uniform locations for arrays
718 unsigned numImages
= 0;
719 nir_foreach_variable(var
, &nir
->uniforms
) {
720 const glsl_type
*type
= var
->type
;
721 if (!type
->without_array()->is_image())
723 var
->data
.driver_location
= numImages
;
724 numImages
+= type
->is_array() ? type
->arrays_of_arrays_size() : 1;
727 nir_foreach_variable(var
, &nir
->inputs
) {
728 const glsl_type
*type
= var
->type
;
729 int slot
= var
->data
.location
;
730 uint16_t slots
= calcSlots(type
, prog
->getType(), nir
->info
, true, var
);
731 uint32_t comp
= type
->is_array() ? type
->without_array()->component_slots()
732 : type
->component_slots();
733 uint32_t frac
= var
->data
.location_frac
;
734 uint32_t vary
= var
->data
.driver_location
;
736 if (glsl_base_type_is_64bit(type
->without_array()->base_type
)) {
741 assert(vary
+ slots
<= PIPE_MAX_SHADER_INPUTS
);
743 switch(prog
->getType()) {
744 case Program::TYPE_FRAGMENT
:
745 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
746 for (uint16_t i
= 0; i
< slots
; ++i
) {
747 setInterpolate(&info
->in
[vary
+ i
], var
->data
.interpolation
,
748 var
->data
.centroid
| var
->data
.sample
, name
);
751 case Program::TYPE_GEOMETRY
:
752 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
754 case Program::TYPE_TESSELLATION_CONTROL
:
755 case Program::TYPE_TESSELLATION_EVAL
:
756 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
757 if (var
->data
.patch
&& name
== TGSI_SEMANTIC_PATCH
)
758 info
->numPatchConstants
= MAX2(info
->numPatchConstants
, index
+ slots
);
760 case Program::TYPE_VERTEX
:
761 vert_attrib_to_tgsi_semantic((gl_vert_attrib
)slot
, &name
, &index
);
763 case TGSI_SEMANTIC_EDGEFLAG
:
764 info
->io
.edgeFlagIn
= vary
;
771 ERROR("unknown shader type %u in assignSlots\n", prog
->getType());
775 for (uint16_t i
= 0u; i
< slots
; ++i
, ++vary
) {
776 info
->in
[vary
].id
= vary
;
777 info
->in
[vary
].patch
= var
->data
.patch
;
778 info
->in
[vary
].sn
= name
;
779 info
->in
[vary
].si
= index
+ i
;
780 if (glsl_base_type_is_64bit(type
->without_array()->base_type
))
782 info
->in
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) >> 0x4);
784 info
->in
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) & 0xf);
786 info
->in
[vary
].mask
|= ((1 << comp
) - 1) << frac
;
788 info
->numInputs
= std::max
<uint8_t>(info
->numInputs
, vary
);
791 info
->numOutputs
= 0;
792 nir_foreach_variable(var
, &nir
->outputs
) {
793 const glsl_type
*type
= var
->type
;
794 int slot
= var
->data
.location
;
795 uint16_t slots
= calcSlots(type
, prog
->getType(), nir
->info
, false, var
);
796 uint32_t comp
= type
->is_array() ? type
->without_array()->component_slots()
797 : type
->component_slots();
798 uint32_t frac
= var
->data
.location_frac
;
799 uint32_t vary
= var
->data
.driver_location
;
801 if (glsl_base_type_is_64bit(type
->without_array()->base_type
)) {
806 assert(vary
< PIPE_MAX_SHADER_OUTPUTS
);
808 switch(prog
->getType()) {
809 case Program::TYPE_FRAGMENT
:
810 frag_result_to_tgsi_semantic((gl_frag_result
)slot
, &name
, &index
);
812 case TGSI_SEMANTIC_COLOR
:
813 if (!var
->data
.fb_fetch_output
)
814 info
->prop
.fp
.numColourResults
++;
815 info
->prop
.fp
.separateFragData
= true;
816 // sometimes we get FRAG_RESULT_DATAX with data.index 0
817 // sometimes we get FRAG_RESULT_DATA0 with data.index X
818 index
= index
== 0 ? var
->data
.index
: index
;
820 case TGSI_SEMANTIC_POSITION
:
821 info
->io
.fragDepth
= vary
;
822 info
->prop
.fp
.writesDepth
= true;
824 case TGSI_SEMANTIC_SAMPLEMASK
:
825 info
->io
.sampleMask
= vary
;
831 case Program::TYPE_GEOMETRY
:
832 case Program::TYPE_TESSELLATION_CONTROL
:
833 case Program::TYPE_TESSELLATION_EVAL
:
834 case Program::TYPE_VERTEX
:
835 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
837 if (var
->data
.patch
&& name
!= TGSI_SEMANTIC_TESSINNER
&&
838 name
!= TGSI_SEMANTIC_TESSOUTER
)
839 info
->numPatchConstants
= MAX2(info
->numPatchConstants
, index
+ slots
);
842 case TGSI_SEMANTIC_CLIPDIST
:
843 info
->io
.genUserClip
= -1;
845 case TGSI_SEMANTIC_EDGEFLAG
:
846 info
->io
.edgeFlagOut
= vary
;
853 ERROR("unknown shader type %u in assignSlots\n", prog
->getType());
857 for (uint16_t i
= 0u; i
< slots
; ++i
, ++vary
) {
858 info
->out
[vary
].id
= vary
;
859 info
->out
[vary
].patch
= var
->data
.patch
;
860 info
->out
[vary
].sn
= name
;
861 info
->out
[vary
].si
= index
+ i
;
862 if (glsl_base_type_is_64bit(type
->without_array()->base_type
))
864 info
->out
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) >> 0x4);
866 info
->out
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) & 0xf);
868 info
->out
[vary
].mask
|= ((1 << comp
) - 1) << frac
;
870 if (nir
->info
.outputs_read
& 1ll << slot
)
871 info
->out
[vary
].oread
= 1;
873 info
->numOutputs
= std::max
<uint8_t>(info
->numOutputs
, vary
);
876 info
->numSysVals
= 0;
877 for (uint8_t i
= 0; i
< 64; ++i
) {
878 if (!(nir
->info
.system_values_read
& 1ll << i
))
881 system_val_to_tgsi_semantic(i
, &name
, &index
);
882 info
->sv
[info
->numSysVals
].sn
= name
;
883 info
->sv
[info
->numSysVals
].si
= index
;
884 info
->sv
[info
->numSysVals
].input
= 0; // TODO inferSysValDirection(sn);
887 case SYSTEM_VALUE_INSTANCE_ID
:
888 info
->io
.instanceId
= info
->numSysVals
;
890 case SYSTEM_VALUE_TESS_LEVEL_INNER
:
891 case SYSTEM_VALUE_TESS_LEVEL_OUTER
:
892 info
->sv
[info
->numSysVals
].patch
= 1;
894 case SYSTEM_VALUE_VERTEX_ID
:
895 info
->io
.vertexId
= info
->numSysVals
;
901 info
->numSysVals
+= 1;
904 if (info
->io
.genUserClip
> 0) {
905 info
->io
.clipDistances
= info
->io
.genUserClip
;
907 const unsigned int nOut
= (info
->io
.genUserClip
+ 3) / 4;
909 for (unsigned int n
= 0; n
< nOut
; ++n
) {
910 unsigned int i
= info
->numOutputs
++;
912 info
->out
[i
].sn
= TGSI_SEMANTIC_CLIPDIST
;
914 info
->out
[i
].mask
= ((1 << info
->io
.clipDistances
) - 1) >> (n
* 4);
918 return info
->assignSlots(info
) == 0;
922 Converter::getSlotAddress(nir_intrinsic_instr
*insn
, uint8_t idx
, uint8_t slot
)
925 int offset
= nir_intrinsic_component(insn
);
928 if (nir_intrinsic_infos
[insn
->intrinsic
].has_dest
)
931 ty
= getSType(insn
->src
[0], false, false);
933 switch (insn
->intrinsic
) {
934 case nir_intrinsic_load_input
:
935 case nir_intrinsic_load_interpolated_input
:
936 case nir_intrinsic_load_per_vertex_input
:
939 case nir_intrinsic_load_output
:
940 case nir_intrinsic_load_per_vertex_output
:
941 case nir_intrinsic_store_output
:
942 case nir_intrinsic_store_per_vertex_output
:
946 ERROR("unknown intrinsic in getSlotAddress %s",
947 nir_intrinsic_infos
[insn
->intrinsic
].name
);
953 if (typeSizeof(ty
) == 8) {
965 assert(!input
|| idx
< PIPE_MAX_SHADER_INPUTS
);
966 assert(input
|| idx
< PIPE_MAX_SHADER_OUTPUTS
);
968 const nv50_ir_varying
*vary
= input
? info
->in
: info
->out
;
969 return vary
[idx
].slot
[slot
] * 4;
977 if (prog
->dbgFlags
& NV50_IR_DEBUG_VERBOSE
)
978 nir_print_shader(nir
, stderr
);
980 NIR_PASS_V(nir
, nir_lower_io
, nir_var_all
, type_size
, (nir_lower_io_options
)0);
981 NIR_PASS_V(nir
, nir_lower_regs_to_ssa
);
982 NIR_PASS_V(nir
, nir_lower_load_const_to_scalar
);
983 NIR_PASS_V(nir
, nir_lower_vars_to_ssa
);
984 NIR_PASS_V(nir
, nir_lower_alu_to_scalar
);
985 NIR_PASS_V(nir
, nir_lower_phis_to_scalar
);
989 NIR_PASS(progress
, nir
, nir_copy_prop
);
990 NIR_PASS(progress
, nir
, nir_opt_remove_phis
);
991 NIR_PASS(progress
, nir
, nir_opt_trivial_continues
);
992 NIR_PASS(progress
, nir
, nir_opt_cse
);
993 NIR_PASS(progress
, nir
, nir_opt_algebraic
);
994 NIR_PASS(progress
, nir
, nir_opt_constant_folding
);
995 NIR_PASS(progress
, nir
, nir_copy_prop
);
996 NIR_PASS(progress
, nir
, nir_opt_dce
);
997 NIR_PASS(progress
, nir
, nir_opt_dead_cf
);
1000 NIR_PASS_V(nir
, nir_lower_bool_to_int32
);
1001 NIR_PASS_V(nir
, nir_lower_locals_to_regs
);
1002 NIR_PASS_V(nir
, nir_remove_dead_variables
, nir_var_function_temp
);
1003 NIR_PASS_V(nir
, nir_convert_from_ssa
, true);
1005 // Garbage collect dead instructions
1008 if (!assignSlots()) {
1009 ERROR("Couldn't assign slots!\n");
1013 if (prog
->dbgFlags
& NV50_IR_DEBUG_BASIC
)
1014 nir_print_shader(nir
, stderr
);
1019 } // unnamed namespace
1024 Program::makeFromNIR(struct nv50_ir_prog_info
*info
)
1026 nir_shader
*nir
= (nir_shader
*)info
->bin
.source
;
1027 Converter
converter(this, nir
, info
);
1028 bool result
= converter
.run();
1031 LoweringHelper lowering
;
1033 tlsSize
= info
->bin
.tlsSpace
;
1037 } // namespace nv50_ir