2 * Copyright 2017 Red Hat Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Karol Herbst <kherbst@redhat.com>
25 #include "compiler/nir/nir.h"
27 #include "util/u_debug.h"
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_from_common.h"
31 #include "codegen/nv50_ir_lowering_helper.h"
32 #include "codegen/nv50_ir_util.h"
34 #if __cplusplus >= 201103L
35 #include <unordered_map>
37 #include <tr1/unordered_map>
43 #if __cplusplus >= 201103L
45 using std::unordered_map
;
48 using std::tr1::unordered_map
;
51 using namespace nv50_ir
;
54 type_size(const struct glsl_type
*type
)
56 return glsl_count_attribute_slots(type
, false);
59 class Converter
: public ConverterCommon
62 Converter(Program
*, nir_shader
*, nv50_ir_prog_info
*);
66 typedef std::vector
<LValue
*> LValues
;
67 typedef unordered_map
<unsigned, LValues
> NirDefMap
;
69 LValues
& convert(nir_alu_dest
*);
70 LValues
& convert(nir_dest
*);
71 LValues
& convert(nir_register
*);
72 LValues
& convert(nir_ssa_def
*);
74 Value
* getSrc(nir_alu_src
*, uint8_t component
= 0);
75 Value
* getSrc(nir_register
*, uint8_t);
76 Value
* getSrc(nir_src
*, uint8_t, bool indirect
= false);
77 Value
* getSrc(nir_ssa_def
*, uint8_t);
79 // returned value is the constant part of the given source (either the
80 // nir_src or the selected source component of an intrinsic). Even though
81 // this is mostly an optimization to be able to skip indirects in a few
82 // cases, sometimes we require immediate values or set some fileds on
83 // instructions (e.g. tex) in order for codegen to consume those.
84 // If the found value has not a constant part, the Value gets returned
85 // through the Value parameter.
86 uint32_t getIndirect(nir_src
*, uint8_t, Value
*&);
87 uint32_t getIndirect(nir_intrinsic_instr
*, uint8_t s
, uint8_t c
, Value
*&);
89 uint32_t getSlotAddress(nir_intrinsic_instr
*, uint8_t idx
, uint8_t slot
);
91 void setInterpolate(nv50_ir_varying
*,
96 Instruction
*loadFrom(DataFile
, uint8_t, DataType
, Value
*def
, uint32_t base
,
97 uint8_t c
, Value
*indirect0
= NULL
,
98 Value
*indirect1
= NULL
, bool patch
= false);
99 void storeTo(nir_intrinsic_instr
*, DataFile
, operation
, DataType
,
100 Value
*src
, uint8_t idx
, uint8_t c
, Value
*indirect0
= NULL
,
101 Value
*indirect1
= NULL
);
103 bool isFloatType(nir_alu_type
);
104 bool isSignedType(nir_alu_type
);
105 bool isResultFloat(nir_op
);
106 bool isResultSigned(nir_op
);
108 DataType
getDType(nir_alu_instr
*);
109 DataType
getDType(nir_intrinsic_instr
*);
110 DataType
getDType(nir_op
, uint8_t);
112 std::vector
<DataType
> getSTypes(nir_alu_instr
*);
113 DataType
getSType(nir_src
&, bool isFloat
, bool isSigned
);
123 Converter::Converter(Program
*prog
, nir_shader
*nir
, nv50_ir_prog_info
*info
)
124 : ConverterCommon(prog
, info
),
128 Converter::isFloatType(nir_alu_type type
)
130 return nir_alu_type_get_base_type(type
) == nir_type_float
;
134 Converter::isSignedType(nir_alu_type type
)
136 return nir_alu_type_get_base_type(type
) == nir_type_int
;
140 Converter::isResultFloat(nir_op op
)
142 const nir_op_info
&info
= nir_op_infos
[op
];
143 if (info
.output_type
!= nir_type_invalid
)
144 return isFloatType(info
.output_type
);
146 ERROR("isResultFloat not implemented for %s\n", nir_op_infos
[op
].name
);
152 Converter::isResultSigned(nir_op op
)
155 // there is no umul and we get wrong results if we treat all muls as signed
160 const nir_op_info
&info
= nir_op_infos
[op
];
161 if (info
.output_type
!= nir_type_invalid
)
162 return isSignedType(info
.output_type
);
163 ERROR("isResultSigned not implemented for %s\n", nir_op_infos
[op
].name
);
170 Converter::getDType(nir_alu_instr
*insn
)
172 if (insn
->dest
.dest
.is_ssa
)
173 return getDType(insn
->op
, insn
->dest
.dest
.ssa
.bit_size
);
175 return getDType(insn
->op
, insn
->dest
.dest
.reg
.reg
->bit_size
);
179 Converter::getDType(nir_intrinsic_instr
*insn
)
181 if (insn
->dest
.is_ssa
)
182 return typeOfSize(insn
->dest
.ssa
.bit_size
/ 8, false, false);
184 return typeOfSize(insn
->dest
.reg
.reg
->bit_size
/ 8, false, false);
188 Converter::getDType(nir_op op
, uint8_t bitSize
)
190 DataType ty
= typeOfSize(bitSize
/ 8, isResultFloat(op
), isResultSigned(op
));
191 if (ty
== TYPE_NONE
) {
192 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos
[op
].name
, bitSize
);
198 std::vector
<DataType
>
199 Converter::getSTypes(nir_alu_instr
*insn
)
201 const nir_op_info
&info
= nir_op_infos
[insn
->op
];
202 std::vector
<DataType
> res(info
.num_inputs
);
204 for (uint8_t i
= 0; i
< info
.num_inputs
; ++i
) {
205 if (info
.input_types
[i
] != nir_type_invalid
) {
206 res
[i
] = getSType(insn
->src
[i
].src
, isFloatType(info
.input_types
[i
]), isSignedType(info
.input_types
[i
]));
208 ERROR("getSType not implemented for %s idx %u\n", info
.name
, i
);
219 Converter::getSType(nir_src
&src
, bool isFloat
, bool isSigned
)
223 bitSize
= src
.ssa
->bit_size
;
225 bitSize
= src
.reg
.reg
->bit_size
;
227 DataType ty
= typeOfSize(bitSize
/ 8, isFloat
, isSigned
);
228 if (ty
== TYPE_NONE
) {
236 ERROR("couldn't get Type for %s with bitSize %u\n", str
, bitSize
);
243 Converter::convert(nir_dest
*dest
)
246 return convert(&dest
->ssa
);
247 if (dest
->reg
.indirect
) {
248 ERROR("no support for indirects.");
251 return convert(dest
->reg
.reg
);
255 Converter::convert(nir_register
*reg
)
257 NirDefMap::iterator it
= regDefs
.find(reg
->index
);
258 if (it
!= regDefs
.end())
261 LValues
newDef(reg
->num_components
);
262 for (uint8_t i
= 0; i
< reg
->num_components
; i
++)
263 newDef
[i
] = getScratch(std::max(4, reg
->bit_size
/ 8));
264 return regDefs
[reg
->index
] = newDef
;
268 Converter::convert(nir_ssa_def
*def
)
270 NirDefMap::iterator it
= ssaDefs
.find(def
->index
);
271 if (it
!= ssaDefs
.end())
274 LValues
newDef(def
->num_components
);
275 for (uint8_t i
= 0; i
< def
->num_components
; i
++)
276 newDef
[i
] = getSSA(std::max(4, def
->bit_size
/ 8));
277 return ssaDefs
[def
->index
] = newDef
;
281 Converter::getSrc(nir_alu_src
*src
, uint8_t component
)
283 if (src
->abs
|| src
->negate
) {
284 ERROR("modifiers currently not supported on nir_alu_src\n");
287 return getSrc(&src
->src
, src
->swizzle
[component
]);
291 Converter::getSrc(nir_register
*reg
, uint8_t idx
)
293 NirDefMap::iterator it
= regDefs
.find(reg
->index
);
294 if (it
== regDefs
.end())
295 return convert(reg
)[idx
];
296 return it
->second
[idx
];
300 Converter::getSrc(nir_src
*src
, uint8_t idx
, bool indirect
)
303 return getSrc(src
->ssa
, idx
);
305 if (src
->reg
.indirect
) {
307 return getSrc(src
->reg
.indirect
, idx
);
308 ERROR("no support for indirects.");
313 return getSrc(src
->reg
.reg
, idx
);
317 Converter::getSrc(nir_ssa_def
*src
, uint8_t idx
)
319 NirDefMap::iterator it
= ssaDefs
.find(src
->index
);
320 if (it
== ssaDefs
.end()) {
321 ERROR("SSA value %u not found\n", src
->index
);
325 return it
->second
[idx
];
329 Converter::getIndirect(nir_src
*src
, uint8_t idx
, Value
*&indirect
)
331 nir_const_value
*offset
= nir_src_as_const_value(*src
);
335 return offset
->u32
[0];
338 indirect
= getSrc(src
, idx
, true);
343 Converter::getIndirect(nir_intrinsic_instr
*insn
, uint8_t s
, uint8_t c
, Value
*&indirect
)
345 int32_t idx
= nir_intrinsic_base(insn
) + getIndirect(&insn
->src
[s
], c
, indirect
);
347 indirect
= mkOp2v(OP_SHL
, TYPE_U32
, getSSA(4, FILE_ADDRESS
), indirect
, loadImm(NULL
, 4));
352 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot
, unsigned *name
, unsigned *index
)
354 assert(name
&& index
);
356 if (slot
>= VERT_ATTRIB_MAX
) {
357 ERROR("invalid varying slot %u\n", slot
);
362 if (slot
>= VERT_ATTRIB_GENERIC0
&&
363 slot
< VERT_ATTRIB_GENERIC0
+ VERT_ATTRIB_GENERIC_MAX
) {
364 *name
= TGSI_SEMANTIC_GENERIC
;
365 *index
= slot
- VERT_ATTRIB_GENERIC0
;
369 if (slot
>= VERT_ATTRIB_TEX0
&&
370 slot
< VERT_ATTRIB_TEX0
+ VERT_ATTRIB_TEX_MAX
) {
371 *name
= TGSI_SEMANTIC_TEXCOORD
;
372 *index
= slot
- VERT_ATTRIB_TEX0
;
377 case VERT_ATTRIB_COLOR0
:
378 *name
= TGSI_SEMANTIC_COLOR
;
381 case VERT_ATTRIB_COLOR1
:
382 *name
= TGSI_SEMANTIC_COLOR
;
385 case VERT_ATTRIB_EDGEFLAG
:
386 *name
= TGSI_SEMANTIC_EDGEFLAG
;
389 case VERT_ATTRIB_FOG
:
390 *name
= TGSI_SEMANTIC_FOG
;
393 case VERT_ATTRIB_NORMAL
:
394 *name
= TGSI_SEMANTIC_NORMAL
;
397 case VERT_ATTRIB_POS
:
398 *name
= TGSI_SEMANTIC_POSITION
;
401 case VERT_ATTRIB_POINT_SIZE
:
402 *name
= TGSI_SEMANTIC_PSIZE
;
406 ERROR("unknown vert attrib slot %u\n", slot
);
413 varying_slot_to_tgsi_semantic(gl_varying_slot slot
, unsigned *name
, unsigned *index
)
415 assert(name
&& index
);
417 if (slot
>= VARYING_SLOT_TESS_MAX
) {
418 ERROR("invalid varying slot %u\n", slot
);
423 if (slot
>= VARYING_SLOT_PATCH0
) {
424 *name
= TGSI_SEMANTIC_PATCH
;
425 *index
= slot
- VARYING_SLOT_PATCH0
;
429 if (slot
>= VARYING_SLOT_VAR0
) {
430 *name
= TGSI_SEMANTIC_GENERIC
;
431 *index
= slot
- VARYING_SLOT_VAR0
;
435 if (slot
>= VARYING_SLOT_TEX0
&& slot
<= VARYING_SLOT_TEX7
) {
436 *name
= TGSI_SEMANTIC_TEXCOORD
;
437 *index
= slot
- VARYING_SLOT_TEX0
;
442 case VARYING_SLOT_BFC0
:
443 *name
= TGSI_SEMANTIC_BCOLOR
;
446 case VARYING_SLOT_BFC1
:
447 *name
= TGSI_SEMANTIC_BCOLOR
;
450 case VARYING_SLOT_CLIP_DIST0
:
451 *name
= TGSI_SEMANTIC_CLIPDIST
;
454 case VARYING_SLOT_CLIP_DIST1
:
455 *name
= TGSI_SEMANTIC_CLIPDIST
;
458 case VARYING_SLOT_CLIP_VERTEX
:
459 *name
= TGSI_SEMANTIC_CLIPVERTEX
;
462 case VARYING_SLOT_COL0
:
463 *name
= TGSI_SEMANTIC_COLOR
;
466 case VARYING_SLOT_COL1
:
467 *name
= TGSI_SEMANTIC_COLOR
;
470 case VARYING_SLOT_EDGE
:
471 *name
= TGSI_SEMANTIC_EDGEFLAG
;
474 case VARYING_SLOT_FACE
:
475 *name
= TGSI_SEMANTIC_FACE
;
478 case VARYING_SLOT_FOGC
:
479 *name
= TGSI_SEMANTIC_FOG
;
482 case VARYING_SLOT_LAYER
:
483 *name
= TGSI_SEMANTIC_LAYER
;
486 case VARYING_SLOT_PNTC
:
487 *name
= TGSI_SEMANTIC_PCOORD
;
490 case VARYING_SLOT_POS
:
491 *name
= TGSI_SEMANTIC_POSITION
;
494 case VARYING_SLOT_PRIMITIVE_ID
:
495 *name
= TGSI_SEMANTIC_PRIMID
;
498 case VARYING_SLOT_PSIZ
:
499 *name
= TGSI_SEMANTIC_PSIZE
;
502 case VARYING_SLOT_TESS_LEVEL_INNER
:
503 *name
= TGSI_SEMANTIC_TESSINNER
;
506 case VARYING_SLOT_TESS_LEVEL_OUTER
:
507 *name
= TGSI_SEMANTIC_TESSOUTER
;
510 case VARYING_SLOT_VIEWPORT
:
511 *name
= TGSI_SEMANTIC_VIEWPORT_INDEX
;
515 ERROR("unknown varying slot %u\n", slot
);
522 frag_result_to_tgsi_semantic(unsigned slot
, unsigned *name
, unsigned *index
)
524 if (slot
>= FRAG_RESULT_DATA0
) {
525 *name
= TGSI_SEMANTIC_COLOR
;
526 *index
= slot
- FRAG_RESULT_COLOR
- 2; // intentional
531 case FRAG_RESULT_COLOR
:
532 *name
= TGSI_SEMANTIC_COLOR
;
535 case FRAG_RESULT_DEPTH
:
536 *name
= TGSI_SEMANTIC_POSITION
;
539 case FRAG_RESULT_SAMPLE_MASK
:
540 *name
= TGSI_SEMANTIC_SAMPLEMASK
;
544 ERROR("unknown frag result slot %u\n", slot
);
550 // copy of _mesa_sysval_to_semantic
552 system_val_to_tgsi_semantic(unsigned val
, unsigned *name
, unsigned *index
)
557 case SYSTEM_VALUE_VERTEX_ID
:
558 *name
= TGSI_SEMANTIC_VERTEXID
;
560 case SYSTEM_VALUE_INSTANCE_ID
:
561 *name
= TGSI_SEMANTIC_INSTANCEID
;
563 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE
:
564 *name
= TGSI_SEMANTIC_VERTEXID_NOBASE
;
566 case SYSTEM_VALUE_BASE_VERTEX
:
567 *name
= TGSI_SEMANTIC_BASEVERTEX
;
569 case SYSTEM_VALUE_BASE_INSTANCE
:
570 *name
= TGSI_SEMANTIC_BASEINSTANCE
;
572 case SYSTEM_VALUE_DRAW_ID
:
573 *name
= TGSI_SEMANTIC_DRAWID
;
577 case SYSTEM_VALUE_INVOCATION_ID
:
578 *name
= TGSI_SEMANTIC_INVOCATIONID
;
582 case SYSTEM_VALUE_FRAG_COORD
:
583 *name
= TGSI_SEMANTIC_POSITION
;
585 case SYSTEM_VALUE_FRONT_FACE
:
586 *name
= TGSI_SEMANTIC_FACE
;
588 case SYSTEM_VALUE_SAMPLE_ID
:
589 *name
= TGSI_SEMANTIC_SAMPLEID
;
591 case SYSTEM_VALUE_SAMPLE_POS
:
592 *name
= TGSI_SEMANTIC_SAMPLEPOS
;
594 case SYSTEM_VALUE_SAMPLE_MASK_IN
:
595 *name
= TGSI_SEMANTIC_SAMPLEMASK
;
597 case SYSTEM_VALUE_HELPER_INVOCATION
:
598 *name
= TGSI_SEMANTIC_HELPER_INVOCATION
;
601 // Tessellation shader
602 case SYSTEM_VALUE_TESS_COORD
:
603 *name
= TGSI_SEMANTIC_TESSCOORD
;
605 case SYSTEM_VALUE_VERTICES_IN
:
606 *name
= TGSI_SEMANTIC_VERTICESIN
;
608 case SYSTEM_VALUE_PRIMITIVE_ID
:
609 *name
= TGSI_SEMANTIC_PRIMID
;
611 case SYSTEM_VALUE_TESS_LEVEL_OUTER
:
612 *name
= TGSI_SEMANTIC_TESSOUTER
;
614 case SYSTEM_VALUE_TESS_LEVEL_INNER
:
615 *name
= TGSI_SEMANTIC_TESSINNER
;
619 case SYSTEM_VALUE_LOCAL_INVOCATION_ID
:
620 *name
= TGSI_SEMANTIC_THREAD_ID
;
622 case SYSTEM_VALUE_WORK_GROUP_ID
:
623 *name
= TGSI_SEMANTIC_BLOCK_ID
;
625 case SYSTEM_VALUE_NUM_WORK_GROUPS
:
626 *name
= TGSI_SEMANTIC_GRID_SIZE
;
628 case SYSTEM_VALUE_LOCAL_GROUP_SIZE
:
629 *name
= TGSI_SEMANTIC_BLOCK_SIZE
;
633 case SYSTEM_VALUE_SUBGROUP_SIZE
:
634 *name
= TGSI_SEMANTIC_SUBGROUP_SIZE
;
636 case SYSTEM_VALUE_SUBGROUP_INVOCATION
:
637 *name
= TGSI_SEMANTIC_SUBGROUP_INVOCATION
;
639 case SYSTEM_VALUE_SUBGROUP_EQ_MASK
:
640 *name
= TGSI_SEMANTIC_SUBGROUP_EQ_MASK
;
642 case SYSTEM_VALUE_SUBGROUP_GE_MASK
:
643 *name
= TGSI_SEMANTIC_SUBGROUP_GE_MASK
;
645 case SYSTEM_VALUE_SUBGROUP_GT_MASK
:
646 *name
= TGSI_SEMANTIC_SUBGROUP_GT_MASK
;
648 case SYSTEM_VALUE_SUBGROUP_LE_MASK
:
649 *name
= TGSI_SEMANTIC_SUBGROUP_LE_MASK
;
651 case SYSTEM_VALUE_SUBGROUP_LT_MASK
:
652 *name
= TGSI_SEMANTIC_SUBGROUP_LT_MASK
;
656 ERROR("unknown system value %u\n", val
);
663 Converter::setInterpolate(nv50_ir_varying
*var
,
669 case INTERP_MODE_FLAT
:
672 case INTERP_MODE_NONE
:
673 if (semantic
== TGSI_SEMANTIC_COLOR
)
675 else if (semantic
== TGSI_SEMANTIC_POSITION
)
678 case INTERP_MODE_NOPERSPECTIVE
:
681 case INTERP_MODE_SMOOTH
:
684 var
->centroid
= centroid
;
688 calcSlots(const glsl_type
*type
, Program::Type stage
, const shader_info
&info
,
689 bool input
, const nir_variable
*var
)
691 if (!type
->is_array())
692 return type
->count_attribute_slots(false);
696 case Program::TYPE_GEOMETRY
:
697 slots
= type
->uniform_locations();
699 slots
/= info
.gs
.vertices_in
;
701 case Program::TYPE_TESSELLATION_CONTROL
:
702 case Program::TYPE_TESSELLATION_EVAL
:
703 // remove first dimension
704 if (var
->data
.patch
|| (!input
&& stage
== Program::TYPE_TESSELLATION_EVAL
))
705 slots
= type
->uniform_locations();
707 slots
= type
->fields
.array
->uniform_locations();
710 slots
= type
->count_attribute_slots(false);
717 bool Converter::assignSlots() {
721 info
->io
.viewportId
= -1;
724 // we have to fixup the uniform locations for arrays
725 unsigned numImages
= 0;
726 nir_foreach_variable(var
, &nir
->uniforms
) {
727 const glsl_type
*type
= var
->type
;
728 if (!type
->without_array()->is_image())
730 var
->data
.driver_location
= numImages
;
731 numImages
+= type
->is_array() ? type
->arrays_of_arrays_size() : 1;
734 nir_foreach_variable(var
, &nir
->inputs
) {
735 const glsl_type
*type
= var
->type
;
736 int slot
= var
->data
.location
;
737 uint16_t slots
= calcSlots(type
, prog
->getType(), nir
->info
, true, var
);
738 uint32_t comp
= type
->is_array() ? type
->without_array()->component_slots()
739 : type
->component_slots();
740 uint32_t frac
= var
->data
.location_frac
;
741 uint32_t vary
= var
->data
.driver_location
;
743 if (glsl_base_type_is_64bit(type
->without_array()->base_type
)) {
748 assert(vary
+ slots
<= PIPE_MAX_SHADER_INPUTS
);
750 switch(prog
->getType()) {
751 case Program::TYPE_FRAGMENT
:
752 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
753 for (uint16_t i
= 0; i
< slots
; ++i
) {
754 setInterpolate(&info
->in
[vary
+ i
], var
->data
.interpolation
,
755 var
->data
.centroid
| var
->data
.sample
, name
);
758 case Program::TYPE_GEOMETRY
:
759 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
761 case Program::TYPE_TESSELLATION_CONTROL
:
762 case Program::TYPE_TESSELLATION_EVAL
:
763 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
764 if (var
->data
.patch
&& name
== TGSI_SEMANTIC_PATCH
)
765 info
->numPatchConstants
= MAX2(info
->numPatchConstants
, index
+ slots
);
767 case Program::TYPE_VERTEX
:
768 vert_attrib_to_tgsi_semantic((gl_vert_attrib
)slot
, &name
, &index
);
770 case TGSI_SEMANTIC_EDGEFLAG
:
771 info
->io
.edgeFlagIn
= vary
;
778 ERROR("unknown shader type %u in assignSlots\n", prog
->getType());
782 for (uint16_t i
= 0u; i
< slots
; ++i
, ++vary
) {
783 info
->in
[vary
].id
= vary
;
784 info
->in
[vary
].patch
= var
->data
.patch
;
785 info
->in
[vary
].sn
= name
;
786 info
->in
[vary
].si
= index
+ i
;
787 if (glsl_base_type_is_64bit(type
->without_array()->base_type
))
789 info
->in
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) >> 0x4);
791 info
->in
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) & 0xf);
793 info
->in
[vary
].mask
|= ((1 << comp
) - 1) << frac
;
795 info
->numInputs
= std::max
<uint8_t>(info
->numInputs
, vary
);
798 info
->numOutputs
= 0;
799 nir_foreach_variable(var
, &nir
->outputs
) {
800 const glsl_type
*type
= var
->type
;
801 int slot
= var
->data
.location
;
802 uint16_t slots
= calcSlots(type
, prog
->getType(), nir
->info
, false, var
);
803 uint32_t comp
= type
->is_array() ? type
->without_array()->component_slots()
804 : type
->component_slots();
805 uint32_t frac
= var
->data
.location_frac
;
806 uint32_t vary
= var
->data
.driver_location
;
808 if (glsl_base_type_is_64bit(type
->without_array()->base_type
)) {
813 assert(vary
< PIPE_MAX_SHADER_OUTPUTS
);
815 switch(prog
->getType()) {
816 case Program::TYPE_FRAGMENT
:
817 frag_result_to_tgsi_semantic((gl_frag_result
)slot
, &name
, &index
);
819 case TGSI_SEMANTIC_COLOR
:
820 if (!var
->data
.fb_fetch_output
)
821 info
->prop
.fp
.numColourResults
++;
822 info
->prop
.fp
.separateFragData
= true;
823 // sometimes we get FRAG_RESULT_DATAX with data.index 0
824 // sometimes we get FRAG_RESULT_DATA0 with data.index X
825 index
= index
== 0 ? var
->data
.index
: index
;
827 case TGSI_SEMANTIC_POSITION
:
828 info
->io
.fragDepth
= vary
;
829 info
->prop
.fp
.writesDepth
= true;
831 case TGSI_SEMANTIC_SAMPLEMASK
:
832 info
->io
.sampleMask
= vary
;
838 case Program::TYPE_GEOMETRY
:
839 case Program::TYPE_TESSELLATION_CONTROL
:
840 case Program::TYPE_TESSELLATION_EVAL
:
841 case Program::TYPE_VERTEX
:
842 varying_slot_to_tgsi_semantic((gl_varying_slot
)slot
, &name
, &index
);
844 if (var
->data
.patch
&& name
!= TGSI_SEMANTIC_TESSINNER
&&
845 name
!= TGSI_SEMANTIC_TESSOUTER
)
846 info
->numPatchConstants
= MAX2(info
->numPatchConstants
, index
+ slots
);
849 case TGSI_SEMANTIC_CLIPDIST
:
850 info
->io
.genUserClip
= -1;
852 case TGSI_SEMANTIC_EDGEFLAG
:
853 info
->io
.edgeFlagOut
= vary
;
860 ERROR("unknown shader type %u in assignSlots\n", prog
->getType());
864 for (uint16_t i
= 0u; i
< slots
; ++i
, ++vary
) {
865 info
->out
[vary
].id
= vary
;
866 info
->out
[vary
].patch
= var
->data
.patch
;
867 info
->out
[vary
].sn
= name
;
868 info
->out
[vary
].si
= index
+ i
;
869 if (glsl_base_type_is_64bit(type
->without_array()->base_type
))
871 info
->out
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) >> 0x4);
873 info
->out
[vary
].mask
|= (((1 << (comp
* 2)) - 1) << (frac
* 2) & 0xf);
875 info
->out
[vary
].mask
|= ((1 << comp
) - 1) << frac
;
877 if (nir
->info
.outputs_read
& 1ll << slot
)
878 info
->out
[vary
].oread
= 1;
880 info
->numOutputs
= std::max
<uint8_t>(info
->numOutputs
, vary
);
883 info
->numSysVals
= 0;
884 for (uint8_t i
= 0; i
< 64; ++i
) {
885 if (!(nir
->info
.system_values_read
& 1ll << i
))
888 system_val_to_tgsi_semantic(i
, &name
, &index
);
889 info
->sv
[info
->numSysVals
].sn
= name
;
890 info
->sv
[info
->numSysVals
].si
= index
;
891 info
->sv
[info
->numSysVals
].input
= 0; // TODO inferSysValDirection(sn);
894 case SYSTEM_VALUE_INSTANCE_ID
:
895 info
->io
.instanceId
= info
->numSysVals
;
897 case SYSTEM_VALUE_TESS_LEVEL_INNER
:
898 case SYSTEM_VALUE_TESS_LEVEL_OUTER
:
899 info
->sv
[info
->numSysVals
].patch
= 1;
901 case SYSTEM_VALUE_VERTEX_ID
:
902 info
->io
.vertexId
= info
->numSysVals
;
908 info
->numSysVals
+= 1;
911 if (info
->io
.genUserClip
> 0) {
912 info
->io
.clipDistances
= info
->io
.genUserClip
;
914 const unsigned int nOut
= (info
->io
.genUserClip
+ 3) / 4;
916 for (unsigned int n
= 0; n
< nOut
; ++n
) {
917 unsigned int i
= info
->numOutputs
++;
919 info
->out
[i
].sn
= TGSI_SEMANTIC_CLIPDIST
;
921 info
->out
[i
].mask
= ((1 << info
->io
.clipDistances
) - 1) >> (n
* 4);
925 return info
->assignSlots(info
) == 0;
929 Converter::getSlotAddress(nir_intrinsic_instr
*insn
, uint8_t idx
, uint8_t slot
)
932 int offset
= nir_intrinsic_component(insn
);
935 if (nir_intrinsic_infos
[insn
->intrinsic
].has_dest
)
938 ty
= getSType(insn
->src
[0], false, false);
940 switch (insn
->intrinsic
) {
941 case nir_intrinsic_load_input
:
942 case nir_intrinsic_load_interpolated_input
:
943 case nir_intrinsic_load_per_vertex_input
:
946 case nir_intrinsic_load_output
:
947 case nir_intrinsic_load_per_vertex_output
:
948 case nir_intrinsic_store_output
:
949 case nir_intrinsic_store_per_vertex_output
:
953 ERROR("unknown intrinsic in getSlotAddress %s",
954 nir_intrinsic_infos
[insn
->intrinsic
].name
);
960 if (typeSizeof(ty
) == 8) {
972 assert(!input
|| idx
< PIPE_MAX_SHADER_INPUTS
);
973 assert(input
|| idx
< PIPE_MAX_SHADER_OUTPUTS
);
975 const nv50_ir_varying
*vary
= input
? info
->in
: info
->out
;
976 return vary
[idx
].slot
[slot
] * 4;
980 Converter::loadFrom(DataFile file
, uint8_t i
, DataType ty
, Value
*def
,
981 uint32_t base
, uint8_t c
, Value
*indirect0
,
982 Value
*indirect1
, bool patch
)
984 unsigned int tySize
= typeSizeof(ty
);
987 (file
== FILE_MEMORY_CONST
|| file
== FILE_MEMORY_BUFFER
|| indirect0
)) {
988 Value
*lo
= getSSA();
989 Value
*hi
= getSSA();
993 mkSymbol(file
, i
, TYPE_U32
, base
+ c
* tySize
),
995 loi
->setIndirect(0, 1, indirect1
);
996 loi
->perPatch
= patch
;
1000 mkSymbol(file
, i
, TYPE_U32
, base
+ c
* tySize
+ 4),
1002 hii
->setIndirect(0, 1, indirect1
);
1003 hii
->perPatch
= patch
;
1005 return mkOp2(OP_MERGE
, ty
, def
, lo
, hi
);
1008 mkLoad(ty
, def
, mkSymbol(file
, i
, ty
, base
+ c
* tySize
), indirect0
);
1009 ld
->setIndirect(0, 1, indirect1
);
1010 ld
->perPatch
= patch
;
1016 Converter::storeTo(nir_intrinsic_instr
*insn
, DataFile file
, operation op
,
1017 DataType ty
, Value
*src
, uint8_t idx
, uint8_t c
,
1018 Value
*indirect0
, Value
*indirect1
)
1020 uint8_t size
= typeSizeof(ty
);
1021 uint32_t address
= getSlotAddress(insn
, idx
, c
);
1023 if (size
== 8 && indirect0
) {
1025 mkSplit(split
, 4, src
);
1027 if (op
== OP_EXPORT
) {
1028 split
[0] = mkMov(getSSA(), split
[0], ty
)->getDef(0);
1029 split
[1] = mkMov(getSSA(), split
[1], ty
)->getDef(0);
1032 mkStore(op
, TYPE_U32
, mkSymbol(file
, 0, TYPE_U32
, address
), indirect0
,
1033 split
[0])->perPatch
= info
->out
[idx
].patch
;
1034 mkStore(op
, TYPE_U32
, mkSymbol(file
, 0, TYPE_U32
, address
+ 4), indirect0
,
1035 split
[1])->perPatch
= info
->out
[idx
].patch
;
1037 if (op
== OP_EXPORT
)
1038 src
= mkMov(getSSA(size
), src
, ty
)->getDef(0);
1039 mkStore(op
, ty
, mkSymbol(file
, 0, ty
, address
), indirect0
,
1040 src
)->perPatch
= info
->out
[idx
].patch
;
1049 if (prog
->dbgFlags
& NV50_IR_DEBUG_VERBOSE
)
1050 nir_print_shader(nir
, stderr
);
1052 NIR_PASS_V(nir
, nir_lower_io
, nir_var_all
, type_size
, (nir_lower_io_options
)0);
1053 NIR_PASS_V(nir
, nir_lower_regs_to_ssa
);
1054 NIR_PASS_V(nir
, nir_lower_load_const_to_scalar
);
1055 NIR_PASS_V(nir
, nir_lower_vars_to_ssa
);
1056 NIR_PASS_V(nir
, nir_lower_alu_to_scalar
);
1057 NIR_PASS_V(nir
, nir_lower_phis_to_scalar
);
1061 NIR_PASS(progress
, nir
, nir_copy_prop
);
1062 NIR_PASS(progress
, nir
, nir_opt_remove_phis
);
1063 NIR_PASS(progress
, nir
, nir_opt_trivial_continues
);
1064 NIR_PASS(progress
, nir
, nir_opt_cse
);
1065 NIR_PASS(progress
, nir
, nir_opt_algebraic
);
1066 NIR_PASS(progress
, nir
, nir_opt_constant_folding
);
1067 NIR_PASS(progress
, nir
, nir_copy_prop
);
1068 NIR_PASS(progress
, nir
, nir_opt_dce
);
1069 NIR_PASS(progress
, nir
, nir_opt_dead_cf
);
1072 NIR_PASS_V(nir
, nir_lower_bool_to_int32
);
1073 NIR_PASS_V(nir
, nir_lower_locals_to_regs
);
1074 NIR_PASS_V(nir
, nir_remove_dead_variables
, nir_var_function_temp
);
1075 NIR_PASS_V(nir
, nir_convert_from_ssa
, true);
1077 // Garbage collect dead instructions
1080 if (!assignSlots()) {
1081 ERROR("Couldn't assign slots!\n");
1085 if (prog
->dbgFlags
& NV50_IR_DEBUG_BASIC
)
1086 nir_print_shader(nir
, stderr
);
1091 } // unnamed namespace
1096 Program::makeFromNIR(struct nv50_ir_prog_info
*info
)
1098 nir_shader
*nir
= (nir_shader
*)info
->bin
.source
;
1099 Converter
converter(this, nir
, info
);
1100 bool result
= converter
.run();
1103 LoweringHelper lowering
;
1105 tlsSize
= info
->bin
.tlsSpace
;
1109 } // namespace nv50_ir