nv50/ir/nir: run assignSlots
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_from_nir.cpp
1 /*
2 * Copyright 2017 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Karol Herbst <kherbst@redhat.com>
23 */
24
25 #include "compiler/nir/nir.h"
26
27 #include "util/u_debug.h"
28
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_from_common.h"
31 #include "codegen/nv50_ir_lowering_helper.h"
32 #include "codegen/nv50_ir_util.h"
33
34 #if __cplusplus >= 201103L
35 #include <unordered_map>
36 #else
37 #include <tr1/unordered_map>
38 #endif
39 #include <vector>
40
41 namespace {
42
43 #if __cplusplus >= 201103L
44 using std::hash;
45 using std::unordered_map;
46 #else
47 using std::tr1::hash;
48 using std::tr1::unordered_map;
49 #endif
50
51 using namespace nv50_ir;
52
53 int
54 type_size(const struct glsl_type *type)
55 {
56 return glsl_count_attribute_slots(type, false);
57 }
58
59 class Converter : public ConverterCommon
60 {
61 public:
62 Converter(Program *, nir_shader *, nv50_ir_prog_info *);
63
64 bool run();
65 private:
66 typedef std::vector<LValue *> LValues;
67 typedef unordered_map<unsigned, LValues> NirDefMap;
68
69 LValues& convert(nir_alu_dest *);
70 LValues& convert(nir_dest *);
71 LValues& convert(nir_register *);
72 LValues& convert(nir_ssa_def *);
73
74 Value* getSrc(nir_alu_src *, uint8_t component = 0);
75 Value* getSrc(nir_register *, uint8_t);
76 Value* getSrc(nir_src *, uint8_t, bool indirect = false);
77 Value* getSrc(nir_ssa_def *, uint8_t);
78
79 // returned value is the constant part of the given source (either the
80 // nir_src or the selected source component of an intrinsic). Even though
81 // this is mostly an optimization to be able to skip indirects in a few
82 // cases, sometimes we require immediate values or set some fileds on
83 // instructions (e.g. tex) in order for codegen to consume those.
84 // If the found value has not a constant part, the Value gets returned
85 // through the Value parameter.
86 uint32_t getIndirect(nir_src *, uint8_t, Value *&);
87 uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&);
88
89 uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
90
91 void setInterpolate(nv50_ir_varying *,
92 uint8_t,
93 bool centroid,
94 unsigned semantics);
95
96 bool isFloatType(nir_alu_type);
97 bool isSignedType(nir_alu_type);
98 bool isResultFloat(nir_op);
99 bool isResultSigned(nir_op);
100
101 DataType getDType(nir_alu_instr *);
102 DataType getDType(nir_intrinsic_instr *);
103 DataType getDType(nir_op, uint8_t);
104
105 std::vector<DataType> getSTypes(nir_alu_instr *);
106 DataType getSType(nir_src &, bool isFloat, bool isSigned);
107
108 bool assignSlots();
109
110 nir_shader *nir;
111
112 NirDefMap ssaDefs;
113 NirDefMap regDefs;
114 };
115
116 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
117 : ConverterCommon(prog, info),
118 nir(nir) {}
119
120 bool
121 Converter::isFloatType(nir_alu_type type)
122 {
123 return nir_alu_type_get_base_type(type) == nir_type_float;
124 }
125
126 bool
127 Converter::isSignedType(nir_alu_type type)
128 {
129 return nir_alu_type_get_base_type(type) == nir_type_int;
130 }
131
132 bool
133 Converter::isResultFloat(nir_op op)
134 {
135 const nir_op_info &info = nir_op_infos[op];
136 if (info.output_type != nir_type_invalid)
137 return isFloatType(info.output_type);
138
139 ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
140 assert(false);
141 return true;
142 }
143
144 bool
145 Converter::isResultSigned(nir_op op)
146 {
147 switch (op) {
148 // there is no umul and we get wrong results if we treat all muls as signed
149 case nir_op_imul:
150 case nir_op_inot:
151 return false;
152 default:
153 const nir_op_info &info = nir_op_infos[op];
154 if (info.output_type != nir_type_invalid)
155 return isSignedType(info.output_type);
156 ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
157 assert(false);
158 return true;
159 }
160 }
161
162 DataType
163 Converter::getDType(nir_alu_instr *insn)
164 {
165 if (insn->dest.dest.is_ssa)
166 return getDType(insn->op, insn->dest.dest.ssa.bit_size);
167 else
168 return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
169 }
170
171 DataType
172 Converter::getDType(nir_intrinsic_instr *insn)
173 {
174 if (insn->dest.is_ssa)
175 return typeOfSize(insn->dest.ssa.bit_size / 8, false, false);
176 else
177 return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false);
178 }
179
180 DataType
181 Converter::getDType(nir_op op, uint8_t bitSize)
182 {
183 DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
184 if (ty == TYPE_NONE) {
185 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
186 assert(false);
187 }
188 return ty;
189 }
190
191 std::vector<DataType>
192 Converter::getSTypes(nir_alu_instr *insn)
193 {
194 const nir_op_info &info = nir_op_infos[insn->op];
195 std::vector<DataType> res(info.num_inputs);
196
197 for (uint8_t i = 0; i < info.num_inputs; ++i) {
198 if (info.input_types[i] != nir_type_invalid) {
199 res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
200 } else {
201 ERROR("getSType not implemented for %s idx %u\n", info.name, i);
202 assert(false);
203 res[i] = TYPE_NONE;
204 break;
205 }
206 }
207
208 return res;
209 }
210
211 DataType
212 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
213 {
214 uint8_t bitSize;
215 if (src.is_ssa)
216 bitSize = src.ssa->bit_size;
217 else
218 bitSize = src.reg.reg->bit_size;
219
220 DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
221 if (ty == TYPE_NONE) {
222 const char *str;
223 if (isFloat)
224 str = "float";
225 else if (isSigned)
226 str = "int";
227 else
228 str = "uint";
229 ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
230 assert(false);
231 }
232 return ty;
233 }
234
235 Converter::LValues&
236 Converter::convert(nir_dest *dest)
237 {
238 if (dest->is_ssa)
239 return convert(&dest->ssa);
240 if (dest->reg.indirect) {
241 ERROR("no support for indirects.");
242 assert(false);
243 }
244 return convert(dest->reg.reg);
245 }
246
247 Converter::LValues&
248 Converter::convert(nir_register *reg)
249 {
250 NirDefMap::iterator it = regDefs.find(reg->index);
251 if (it != regDefs.end())
252 return it->second;
253
254 LValues newDef(reg->num_components);
255 for (uint8_t i = 0; i < reg->num_components; i++)
256 newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
257 return regDefs[reg->index] = newDef;
258 }
259
260 Converter::LValues&
261 Converter::convert(nir_ssa_def *def)
262 {
263 NirDefMap::iterator it = ssaDefs.find(def->index);
264 if (it != ssaDefs.end())
265 return it->second;
266
267 LValues newDef(def->num_components);
268 for (uint8_t i = 0; i < def->num_components; i++)
269 newDef[i] = getSSA(std::max(4, def->bit_size / 8));
270 return ssaDefs[def->index] = newDef;
271 }
272
273 Value*
274 Converter::getSrc(nir_alu_src *src, uint8_t component)
275 {
276 if (src->abs || src->negate) {
277 ERROR("modifiers currently not supported on nir_alu_src\n");
278 assert(false);
279 }
280 return getSrc(&src->src, src->swizzle[component]);
281 }
282
283 Value*
284 Converter::getSrc(nir_register *reg, uint8_t idx)
285 {
286 NirDefMap::iterator it = regDefs.find(reg->index);
287 if (it == regDefs.end())
288 return convert(reg)[idx];
289 return it->second[idx];
290 }
291
292 Value*
293 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
294 {
295 if (src->is_ssa)
296 return getSrc(src->ssa, idx);
297
298 if (src->reg.indirect) {
299 if (indirect)
300 return getSrc(src->reg.indirect, idx);
301 ERROR("no support for indirects.");
302 assert(false);
303 return NULL;
304 }
305
306 return getSrc(src->reg.reg, idx);
307 }
308
309 Value*
310 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
311 {
312 NirDefMap::iterator it = ssaDefs.find(src->index);
313 if (it == ssaDefs.end()) {
314 ERROR("SSA value %u not found\n", src->index);
315 assert(false);
316 return NULL;
317 }
318 return it->second[idx];
319 }
320
321 uint32_t
322 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
323 {
324 nir_const_value *offset = nir_src_as_const_value(*src);
325
326 if (offset) {
327 indirect = NULL;
328 return offset->u32[0];
329 }
330
331 indirect = getSrc(src, idx, true);
332 return 0;
333 }
334
335 uint32_t
336 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect)
337 {
338 int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
339 if (indirect)
340 indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
341 return idx;
342 }
343
344 static void
345 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
346 {
347 assert(name && index);
348
349 if (slot >= VERT_ATTRIB_MAX) {
350 ERROR("invalid varying slot %u\n", slot);
351 assert(false);
352 return;
353 }
354
355 if (slot >= VERT_ATTRIB_GENERIC0 &&
356 slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
357 *name = TGSI_SEMANTIC_GENERIC;
358 *index = slot - VERT_ATTRIB_GENERIC0;
359 return;
360 }
361
362 if (slot >= VERT_ATTRIB_TEX0 &&
363 slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
364 *name = TGSI_SEMANTIC_TEXCOORD;
365 *index = slot - VERT_ATTRIB_TEX0;
366 return;
367 }
368
369 switch (slot) {
370 case VERT_ATTRIB_COLOR0:
371 *name = TGSI_SEMANTIC_COLOR;
372 *index = 0;
373 break;
374 case VERT_ATTRIB_COLOR1:
375 *name = TGSI_SEMANTIC_COLOR;
376 *index = 1;
377 break;
378 case VERT_ATTRIB_EDGEFLAG:
379 *name = TGSI_SEMANTIC_EDGEFLAG;
380 *index = 0;
381 break;
382 case VERT_ATTRIB_FOG:
383 *name = TGSI_SEMANTIC_FOG;
384 *index = 0;
385 break;
386 case VERT_ATTRIB_NORMAL:
387 *name = TGSI_SEMANTIC_NORMAL;
388 *index = 0;
389 break;
390 case VERT_ATTRIB_POS:
391 *name = TGSI_SEMANTIC_POSITION;
392 *index = 0;
393 break;
394 case VERT_ATTRIB_POINT_SIZE:
395 *name = TGSI_SEMANTIC_PSIZE;
396 *index = 0;
397 break;
398 default:
399 ERROR("unknown vert attrib slot %u\n", slot);
400 assert(false);
401 break;
402 }
403 }
404
405 static void
406 varying_slot_to_tgsi_semantic(gl_varying_slot slot, unsigned *name, unsigned *index)
407 {
408 assert(name && index);
409
410 if (slot >= VARYING_SLOT_TESS_MAX) {
411 ERROR("invalid varying slot %u\n", slot);
412 assert(false);
413 return;
414 }
415
416 if (slot >= VARYING_SLOT_PATCH0) {
417 *name = TGSI_SEMANTIC_PATCH;
418 *index = slot - VARYING_SLOT_PATCH0;
419 return;
420 }
421
422 if (slot >= VARYING_SLOT_VAR0) {
423 *name = TGSI_SEMANTIC_GENERIC;
424 *index = slot - VARYING_SLOT_VAR0;
425 return;
426 }
427
428 if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
429 *name = TGSI_SEMANTIC_TEXCOORD;
430 *index = slot - VARYING_SLOT_TEX0;
431 return;
432 }
433
434 switch (slot) {
435 case VARYING_SLOT_BFC0:
436 *name = TGSI_SEMANTIC_BCOLOR;
437 *index = 0;
438 break;
439 case VARYING_SLOT_BFC1:
440 *name = TGSI_SEMANTIC_BCOLOR;
441 *index = 1;
442 break;
443 case VARYING_SLOT_CLIP_DIST0:
444 *name = TGSI_SEMANTIC_CLIPDIST;
445 *index = 0;
446 break;
447 case VARYING_SLOT_CLIP_DIST1:
448 *name = TGSI_SEMANTIC_CLIPDIST;
449 *index = 1;
450 break;
451 case VARYING_SLOT_CLIP_VERTEX:
452 *name = TGSI_SEMANTIC_CLIPVERTEX;
453 *index = 0;
454 break;
455 case VARYING_SLOT_COL0:
456 *name = TGSI_SEMANTIC_COLOR;
457 *index = 0;
458 break;
459 case VARYING_SLOT_COL1:
460 *name = TGSI_SEMANTIC_COLOR;
461 *index = 1;
462 break;
463 case VARYING_SLOT_EDGE:
464 *name = TGSI_SEMANTIC_EDGEFLAG;
465 *index = 0;
466 break;
467 case VARYING_SLOT_FACE:
468 *name = TGSI_SEMANTIC_FACE;
469 *index = 0;
470 break;
471 case VARYING_SLOT_FOGC:
472 *name = TGSI_SEMANTIC_FOG;
473 *index = 0;
474 break;
475 case VARYING_SLOT_LAYER:
476 *name = TGSI_SEMANTIC_LAYER;
477 *index = 0;
478 break;
479 case VARYING_SLOT_PNTC:
480 *name = TGSI_SEMANTIC_PCOORD;
481 *index = 0;
482 break;
483 case VARYING_SLOT_POS:
484 *name = TGSI_SEMANTIC_POSITION;
485 *index = 0;
486 break;
487 case VARYING_SLOT_PRIMITIVE_ID:
488 *name = TGSI_SEMANTIC_PRIMID;
489 *index = 0;
490 break;
491 case VARYING_SLOT_PSIZ:
492 *name = TGSI_SEMANTIC_PSIZE;
493 *index = 0;
494 break;
495 case VARYING_SLOT_TESS_LEVEL_INNER:
496 *name = TGSI_SEMANTIC_TESSINNER;
497 *index = 0;
498 break;
499 case VARYING_SLOT_TESS_LEVEL_OUTER:
500 *name = TGSI_SEMANTIC_TESSOUTER;
501 *index = 0;
502 break;
503 case VARYING_SLOT_VIEWPORT:
504 *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
505 *index = 0;
506 break;
507 default:
508 ERROR("unknown varying slot %u\n", slot);
509 assert(false);
510 break;
511 }
512 }
513
514 static void
515 frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
516 {
517 if (slot >= FRAG_RESULT_DATA0) {
518 *name = TGSI_SEMANTIC_COLOR;
519 *index = slot - FRAG_RESULT_COLOR - 2; // intentional
520 return;
521 }
522
523 switch (slot) {
524 case FRAG_RESULT_COLOR:
525 *name = TGSI_SEMANTIC_COLOR;
526 *index = 0;
527 break;
528 case FRAG_RESULT_DEPTH:
529 *name = TGSI_SEMANTIC_POSITION;
530 *index = 0;
531 break;
532 case FRAG_RESULT_SAMPLE_MASK:
533 *name = TGSI_SEMANTIC_SAMPLEMASK;
534 *index = 0;
535 break;
536 default:
537 ERROR("unknown frag result slot %u\n", slot);
538 assert(false);
539 break;
540 }
541 }
542
543 // copy of _mesa_sysval_to_semantic
544 static void
545 system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
546 {
547 *index = 0;
548 switch (val) {
549 // Vertex shader
550 case SYSTEM_VALUE_VERTEX_ID:
551 *name = TGSI_SEMANTIC_VERTEXID;
552 break;
553 case SYSTEM_VALUE_INSTANCE_ID:
554 *name = TGSI_SEMANTIC_INSTANCEID;
555 break;
556 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
557 *name = TGSI_SEMANTIC_VERTEXID_NOBASE;
558 break;
559 case SYSTEM_VALUE_BASE_VERTEX:
560 *name = TGSI_SEMANTIC_BASEVERTEX;
561 break;
562 case SYSTEM_VALUE_BASE_INSTANCE:
563 *name = TGSI_SEMANTIC_BASEINSTANCE;
564 break;
565 case SYSTEM_VALUE_DRAW_ID:
566 *name = TGSI_SEMANTIC_DRAWID;
567 break;
568
569 // Geometry shader
570 case SYSTEM_VALUE_INVOCATION_ID:
571 *name = TGSI_SEMANTIC_INVOCATIONID;
572 break;
573
574 // Fragment shader
575 case SYSTEM_VALUE_FRAG_COORD:
576 *name = TGSI_SEMANTIC_POSITION;
577 break;
578 case SYSTEM_VALUE_FRONT_FACE:
579 *name = TGSI_SEMANTIC_FACE;
580 break;
581 case SYSTEM_VALUE_SAMPLE_ID:
582 *name = TGSI_SEMANTIC_SAMPLEID;
583 break;
584 case SYSTEM_VALUE_SAMPLE_POS:
585 *name = TGSI_SEMANTIC_SAMPLEPOS;
586 break;
587 case SYSTEM_VALUE_SAMPLE_MASK_IN:
588 *name = TGSI_SEMANTIC_SAMPLEMASK;
589 break;
590 case SYSTEM_VALUE_HELPER_INVOCATION:
591 *name = TGSI_SEMANTIC_HELPER_INVOCATION;
592 break;
593
594 // Tessellation shader
595 case SYSTEM_VALUE_TESS_COORD:
596 *name = TGSI_SEMANTIC_TESSCOORD;
597 break;
598 case SYSTEM_VALUE_VERTICES_IN:
599 *name = TGSI_SEMANTIC_VERTICESIN;
600 break;
601 case SYSTEM_VALUE_PRIMITIVE_ID:
602 *name = TGSI_SEMANTIC_PRIMID;
603 break;
604 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
605 *name = TGSI_SEMANTIC_TESSOUTER;
606 break;
607 case SYSTEM_VALUE_TESS_LEVEL_INNER:
608 *name = TGSI_SEMANTIC_TESSINNER;
609 break;
610
611 // Compute shader
612 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
613 *name = TGSI_SEMANTIC_THREAD_ID;
614 break;
615 case SYSTEM_VALUE_WORK_GROUP_ID:
616 *name = TGSI_SEMANTIC_BLOCK_ID;
617 break;
618 case SYSTEM_VALUE_NUM_WORK_GROUPS:
619 *name = TGSI_SEMANTIC_GRID_SIZE;
620 break;
621 case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
622 *name = TGSI_SEMANTIC_BLOCK_SIZE;
623 break;
624
625 // ARB_shader_ballot
626 case SYSTEM_VALUE_SUBGROUP_SIZE:
627 *name = TGSI_SEMANTIC_SUBGROUP_SIZE;
628 break;
629 case SYSTEM_VALUE_SUBGROUP_INVOCATION:
630 *name = TGSI_SEMANTIC_SUBGROUP_INVOCATION;
631 break;
632 case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
633 *name = TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
634 break;
635 case SYSTEM_VALUE_SUBGROUP_GE_MASK:
636 *name = TGSI_SEMANTIC_SUBGROUP_GE_MASK;
637 break;
638 case SYSTEM_VALUE_SUBGROUP_GT_MASK:
639 *name = TGSI_SEMANTIC_SUBGROUP_GT_MASK;
640 break;
641 case SYSTEM_VALUE_SUBGROUP_LE_MASK:
642 *name = TGSI_SEMANTIC_SUBGROUP_LE_MASK;
643 break;
644 case SYSTEM_VALUE_SUBGROUP_LT_MASK:
645 *name = TGSI_SEMANTIC_SUBGROUP_LT_MASK;
646 break;
647
648 default:
649 ERROR("unknown system value %u\n", val);
650 assert(false);
651 break;
652 }
653 }
654
655 void
656 Converter::setInterpolate(nv50_ir_varying *var,
657 uint8_t mode,
658 bool centroid,
659 unsigned semantic)
660 {
661 switch (mode) {
662 case INTERP_MODE_FLAT:
663 var->flat = 1;
664 break;
665 case INTERP_MODE_NONE:
666 if (semantic == TGSI_SEMANTIC_COLOR)
667 var->sc = 1;
668 else if (semantic == TGSI_SEMANTIC_POSITION)
669 var->linear = 1;
670 break;
671 case INTERP_MODE_NOPERSPECTIVE:
672 var->linear = 1;
673 break;
674 case INTERP_MODE_SMOOTH:
675 break;
676 }
677 var->centroid = centroid;
678 }
679
680 static uint16_t
681 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
682 bool input, const nir_variable *var)
683 {
684 if (!type->is_array())
685 return type->count_attribute_slots(false);
686
687 uint16_t slots;
688 switch (stage) {
689 case Program::TYPE_GEOMETRY:
690 slots = type->uniform_locations();
691 if (input)
692 slots /= info.gs.vertices_in;
693 break;
694 case Program::TYPE_TESSELLATION_CONTROL:
695 case Program::TYPE_TESSELLATION_EVAL:
696 // remove first dimension
697 if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
698 slots = type->uniform_locations();
699 else
700 slots = type->fields.array->uniform_locations();
701 break;
702 default:
703 slots = type->count_attribute_slots(false);
704 break;
705 }
706
707 return slots;
708 }
709
710 bool Converter::assignSlots() {
711 unsigned name;
712 unsigned index;
713
714 info->io.viewportId = -1;
715 info->numInputs = 0;
716
717 // we have to fixup the uniform locations for arrays
718 unsigned numImages = 0;
719 nir_foreach_variable(var, &nir->uniforms) {
720 const glsl_type *type = var->type;
721 if (!type->without_array()->is_image())
722 continue;
723 var->data.driver_location = numImages;
724 numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
725 }
726
727 nir_foreach_variable(var, &nir->inputs) {
728 const glsl_type *type = var->type;
729 int slot = var->data.location;
730 uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
731 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
732 : type->component_slots();
733 uint32_t frac = var->data.location_frac;
734 uint32_t vary = var->data.driver_location;
735
736 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
737 if (comp > 2)
738 slots *= 2;
739 }
740
741 assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
742
743 switch(prog->getType()) {
744 case Program::TYPE_FRAGMENT:
745 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
746 for (uint16_t i = 0; i < slots; ++i) {
747 setInterpolate(&info->in[vary + i], var->data.interpolation,
748 var->data.centroid | var->data.sample, name);
749 }
750 break;
751 case Program::TYPE_GEOMETRY:
752 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
753 break;
754 case Program::TYPE_TESSELLATION_CONTROL:
755 case Program::TYPE_TESSELLATION_EVAL:
756 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
757 if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
758 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
759 break;
760 case Program::TYPE_VERTEX:
761 vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
762 switch (name) {
763 case TGSI_SEMANTIC_EDGEFLAG:
764 info->io.edgeFlagIn = vary;
765 break;
766 default:
767 break;
768 }
769 break;
770 default:
771 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
772 return false;
773 }
774
775 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
776 info->in[vary].id = vary;
777 info->in[vary].patch = var->data.patch;
778 info->in[vary].sn = name;
779 info->in[vary].si = index + i;
780 if (glsl_base_type_is_64bit(type->without_array()->base_type))
781 if (i & 0x1)
782 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
783 else
784 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
785 else
786 info->in[vary].mask |= ((1 << comp) - 1) << frac;
787 }
788 info->numInputs = std::max<uint8_t>(info->numInputs, vary);
789 }
790
791 info->numOutputs = 0;
792 nir_foreach_variable(var, &nir->outputs) {
793 const glsl_type *type = var->type;
794 int slot = var->data.location;
795 uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
796 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
797 : type->component_slots();
798 uint32_t frac = var->data.location_frac;
799 uint32_t vary = var->data.driver_location;
800
801 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
802 if (comp > 2)
803 slots *= 2;
804 }
805
806 assert(vary < PIPE_MAX_SHADER_OUTPUTS);
807
808 switch(prog->getType()) {
809 case Program::TYPE_FRAGMENT:
810 frag_result_to_tgsi_semantic((gl_frag_result)slot, &name, &index);
811 switch (name) {
812 case TGSI_SEMANTIC_COLOR:
813 if (!var->data.fb_fetch_output)
814 info->prop.fp.numColourResults++;
815 info->prop.fp.separateFragData = true;
816 // sometimes we get FRAG_RESULT_DATAX with data.index 0
817 // sometimes we get FRAG_RESULT_DATA0 with data.index X
818 index = index == 0 ? var->data.index : index;
819 break;
820 case TGSI_SEMANTIC_POSITION:
821 info->io.fragDepth = vary;
822 info->prop.fp.writesDepth = true;
823 break;
824 case TGSI_SEMANTIC_SAMPLEMASK:
825 info->io.sampleMask = vary;
826 break;
827 default:
828 break;
829 }
830 break;
831 case Program::TYPE_GEOMETRY:
832 case Program::TYPE_TESSELLATION_CONTROL:
833 case Program::TYPE_TESSELLATION_EVAL:
834 case Program::TYPE_VERTEX:
835 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
836
837 if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
838 name != TGSI_SEMANTIC_TESSOUTER)
839 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
840
841 switch (name) {
842 case TGSI_SEMANTIC_CLIPDIST:
843 info->io.genUserClip = -1;
844 break;
845 case TGSI_SEMANTIC_EDGEFLAG:
846 info->io.edgeFlagOut = vary;
847 break;
848 default:
849 break;
850 }
851 break;
852 default:
853 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
854 return false;
855 }
856
857 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
858 info->out[vary].id = vary;
859 info->out[vary].patch = var->data.patch;
860 info->out[vary].sn = name;
861 info->out[vary].si = index + i;
862 if (glsl_base_type_is_64bit(type->without_array()->base_type))
863 if (i & 0x1)
864 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
865 else
866 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
867 else
868 info->out[vary].mask |= ((1 << comp) - 1) << frac;
869
870 if (nir->info.outputs_read & 1ll << slot)
871 info->out[vary].oread = 1;
872 }
873 info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
874 }
875
876 info->numSysVals = 0;
877 for (uint8_t i = 0; i < 64; ++i) {
878 if (!(nir->info.system_values_read & 1ll << i))
879 continue;
880
881 system_val_to_tgsi_semantic(i, &name, &index);
882 info->sv[info->numSysVals].sn = name;
883 info->sv[info->numSysVals].si = index;
884 info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
885
886 switch (i) {
887 case SYSTEM_VALUE_INSTANCE_ID:
888 info->io.instanceId = info->numSysVals;
889 break;
890 case SYSTEM_VALUE_TESS_LEVEL_INNER:
891 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
892 info->sv[info->numSysVals].patch = 1;
893 break;
894 case SYSTEM_VALUE_VERTEX_ID:
895 info->io.vertexId = info->numSysVals;
896 break;
897 default:
898 break;
899 }
900
901 info->numSysVals += 1;
902 }
903
904 if (info->io.genUserClip > 0) {
905 info->io.clipDistances = info->io.genUserClip;
906
907 const unsigned int nOut = (info->io.genUserClip + 3) / 4;
908
909 for (unsigned int n = 0; n < nOut; ++n) {
910 unsigned int i = info->numOutputs++;
911 info->out[i].id = i;
912 info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
913 info->out[i].si = n;
914 info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
915 }
916 }
917
918 return info->assignSlots(info) == 0;
919 }
920
921 uint32_t
922 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
923 {
924 DataType ty;
925 int offset = nir_intrinsic_component(insn);
926 bool input;
927
928 if (nir_intrinsic_infos[insn->intrinsic].has_dest)
929 ty = getDType(insn);
930 else
931 ty = getSType(insn->src[0], false, false);
932
933 switch (insn->intrinsic) {
934 case nir_intrinsic_load_input:
935 case nir_intrinsic_load_interpolated_input:
936 case nir_intrinsic_load_per_vertex_input:
937 input = true;
938 break;
939 case nir_intrinsic_load_output:
940 case nir_intrinsic_load_per_vertex_output:
941 case nir_intrinsic_store_output:
942 case nir_intrinsic_store_per_vertex_output:
943 input = false;
944 break;
945 default:
946 ERROR("unknown intrinsic in getSlotAddress %s",
947 nir_intrinsic_infos[insn->intrinsic].name);
948 input = false;
949 assert(false);
950 break;
951 }
952
953 if (typeSizeof(ty) == 8) {
954 slot *= 2;
955 slot += offset;
956 if (slot >= 4) {
957 idx += 1;
958 slot -= 4;
959 }
960 } else {
961 slot += offset;
962 }
963
964 assert(slot < 4);
965 assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
966 assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
967
968 const nv50_ir_varying *vary = input ? info->in : info->out;
969 return vary[idx].slot[slot] * 4;
970 }
971
972 bool
973 Converter::run()
974 {
975 bool progress;
976
977 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
978 nir_print_shader(nir, stderr);
979
980 NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
981 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
982 NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
983 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
984 NIR_PASS_V(nir, nir_lower_alu_to_scalar);
985 NIR_PASS_V(nir, nir_lower_phis_to_scalar);
986
987 do {
988 progress = false;
989 NIR_PASS(progress, nir, nir_copy_prop);
990 NIR_PASS(progress, nir, nir_opt_remove_phis);
991 NIR_PASS(progress, nir, nir_opt_trivial_continues);
992 NIR_PASS(progress, nir, nir_opt_cse);
993 NIR_PASS(progress, nir, nir_opt_algebraic);
994 NIR_PASS(progress, nir, nir_opt_constant_folding);
995 NIR_PASS(progress, nir, nir_copy_prop);
996 NIR_PASS(progress, nir, nir_opt_dce);
997 NIR_PASS(progress, nir, nir_opt_dead_cf);
998 } while (progress);
999
1000 NIR_PASS_V(nir, nir_lower_bool_to_int32);
1001 NIR_PASS_V(nir, nir_lower_locals_to_regs);
1002 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
1003 NIR_PASS_V(nir, nir_convert_from_ssa, true);
1004
1005 // Garbage collect dead instructions
1006 nir_sweep(nir);
1007
1008 if (!assignSlots()) {
1009 ERROR("Couldn't assign slots!\n");
1010 return false;
1011 }
1012
1013 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
1014 nir_print_shader(nir, stderr);
1015
1016 return false;
1017 }
1018
1019 } // unnamed namespace
1020
1021 namespace nv50_ir {
1022
1023 bool
1024 Program::makeFromNIR(struct nv50_ir_prog_info *info)
1025 {
1026 nir_shader *nir = (nir_shader*)info->bin.source;
1027 Converter converter(this, nir, info);
1028 bool result = converter.run();
1029 if (!result)
1030 return result;
1031 LoweringHelper lowering;
1032 lowering.run(this);
1033 tlsSize = info->bin.tlsSpace;
1034 return result;
1035 }
1036
1037 } // namespace nv50_ir