nv50/ir/nir: add loadFrom and storeTo helpler
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_from_nir.cpp
1 /*
2 * Copyright 2017 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Karol Herbst <kherbst@redhat.com>
23 */
24
25 #include "compiler/nir/nir.h"
26
27 #include "util/u_debug.h"
28
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_from_common.h"
31 #include "codegen/nv50_ir_lowering_helper.h"
32 #include "codegen/nv50_ir_util.h"
33
34 #if __cplusplus >= 201103L
35 #include <unordered_map>
36 #else
37 #include <tr1/unordered_map>
38 #endif
39 #include <vector>
40
41 namespace {
42
43 #if __cplusplus >= 201103L
44 using std::hash;
45 using std::unordered_map;
46 #else
47 using std::tr1::hash;
48 using std::tr1::unordered_map;
49 #endif
50
51 using namespace nv50_ir;
52
53 int
54 type_size(const struct glsl_type *type)
55 {
56 return glsl_count_attribute_slots(type, false);
57 }
58
59 class Converter : public ConverterCommon
60 {
61 public:
62 Converter(Program *, nir_shader *, nv50_ir_prog_info *);
63
64 bool run();
65 private:
66 typedef std::vector<LValue *> LValues;
67 typedef unordered_map<unsigned, LValues> NirDefMap;
68
69 LValues& convert(nir_alu_dest *);
70 LValues& convert(nir_dest *);
71 LValues& convert(nir_register *);
72 LValues& convert(nir_ssa_def *);
73
74 Value* getSrc(nir_alu_src *, uint8_t component = 0);
75 Value* getSrc(nir_register *, uint8_t);
76 Value* getSrc(nir_src *, uint8_t, bool indirect = false);
77 Value* getSrc(nir_ssa_def *, uint8_t);
78
79 // returned value is the constant part of the given source (either the
80 // nir_src or the selected source component of an intrinsic). Even though
81 // this is mostly an optimization to be able to skip indirects in a few
82 // cases, sometimes we require immediate values or set some fileds on
83 // instructions (e.g. tex) in order for codegen to consume those.
84 // If the found value has not a constant part, the Value gets returned
85 // through the Value parameter.
86 uint32_t getIndirect(nir_src *, uint8_t, Value *&);
87 uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&);
88
89 uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
90
91 void setInterpolate(nv50_ir_varying *,
92 uint8_t,
93 bool centroid,
94 unsigned semantics);
95
96 Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
97 uint8_t c, Value *indirect0 = NULL,
98 Value *indirect1 = NULL, bool patch = false);
99 void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
100 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
101 Value *indirect1 = NULL);
102
103 bool isFloatType(nir_alu_type);
104 bool isSignedType(nir_alu_type);
105 bool isResultFloat(nir_op);
106 bool isResultSigned(nir_op);
107
108 DataType getDType(nir_alu_instr *);
109 DataType getDType(nir_intrinsic_instr *);
110 DataType getDType(nir_op, uint8_t);
111
112 std::vector<DataType> getSTypes(nir_alu_instr *);
113 DataType getSType(nir_src &, bool isFloat, bool isSigned);
114
115 bool assignSlots();
116
117 nir_shader *nir;
118
119 NirDefMap ssaDefs;
120 NirDefMap regDefs;
121 };
122
123 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
124 : ConverterCommon(prog, info),
125 nir(nir) {}
126
127 bool
128 Converter::isFloatType(nir_alu_type type)
129 {
130 return nir_alu_type_get_base_type(type) == nir_type_float;
131 }
132
133 bool
134 Converter::isSignedType(nir_alu_type type)
135 {
136 return nir_alu_type_get_base_type(type) == nir_type_int;
137 }
138
139 bool
140 Converter::isResultFloat(nir_op op)
141 {
142 const nir_op_info &info = nir_op_infos[op];
143 if (info.output_type != nir_type_invalid)
144 return isFloatType(info.output_type);
145
146 ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
147 assert(false);
148 return true;
149 }
150
151 bool
152 Converter::isResultSigned(nir_op op)
153 {
154 switch (op) {
155 // there is no umul and we get wrong results if we treat all muls as signed
156 case nir_op_imul:
157 case nir_op_inot:
158 return false;
159 default:
160 const nir_op_info &info = nir_op_infos[op];
161 if (info.output_type != nir_type_invalid)
162 return isSignedType(info.output_type);
163 ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
164 assert(false);
165 return true;
166 }
167 }
168
169 DataType
170 Converter::getDType(nir_alu_instr *insn)
171 {
172 if (insn->dest.dest.is_ssa)
173 return getDType(insn->op, insn->dest.dest.ssa.bit_size);
174 else
175 return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
176 }
177
178 DataType
179 Converter::getDType(nir_intrinsic_instr *insn)
180 {
181 if (insn->dest.is_ssa)
182 return typeOfSize(insn->dest.ssa.bit_size / 8, false, false);
183 else
184 return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false);
185 }
186
187 DataType
188 Converter::getDType(nir_op op, uint8_t bitSize)
189 {
190 DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
191 if (ty == TYPE_NONE) {
192 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
193 assert(false);
194 }
195 return ty;
196 }
197
198 std::vector<DataType>
199 Converter::getSTypes(nir_alu_instr *insn)
200 {
201 const nir_op_info &info = nir_op_infos[insn->op];
202 std::vector<DataType> res(info.num_inputs);
203
204 for (uint8_t i = 0; i < info.num_inputs; ++i) {
205 if (info.input_types[i] != nir_type_invalid) {
206 res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
207 } else {
208 ERROR("getSType not implemented for %s idx %u\n", info.name, i);
209 assert(false);
210 res[i] = TYPE_NONE;
211 break;
212 }
213 }
214
215 return res;
216 }
217
218 DataType
219 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
220 {
221 uint8_t bitSize;
222 if (src.is_ssa)
223 bitSize = src.ssa->bit_size;
224 else
225 bitSize = src.reg.reg->bit_size;
226
227 DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
228 if (ty == TYPE_NONE) {
229 const char *str;
230 if (isFloat)
231 str = "float";
232 else if (isSigned)
233 str = "int";
234 else
235 str = "uint";
236 ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
237 assert(false);
238 }
239 return ty;
240 }
241
242 Converter::LValues&
243 Converter::convert(nir_dest *dest)
244 {
245 if (dest->is_ssa)
246 return convert(&dest->ssa);
247 if (dest->reg.indirect) {
248 ERROR("no support for indirects.");
249 assert(false);
250 }
251 return convert(dest->reg.reg);
252 }
253
254 Converter::LValues&
255 Converter::convert(nir_register *reg)
256 {
257 NirDefMap::iterator it = regDefs.find(reg->index);
258 if (it != regDefs.end())
259 return it->second;
260
261 LValues newDef(reg->num_components);
262 for (uint8_t i = 0; i < reg->num_components; i++)
263 newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
264 return regDefs[reg->index] = newDef;
265 }
266
267 Converter::LValues&
268 Converter::convert(nir_ssa_def *def)
269 {
270 NirDefMap::iterator it = ssaDefs.find(def->index);
271 if (it != ssaDefs.end())
272 return it->second;
273
274 LValues newDef(def->num_components);
275 for (uint8_t i = 0; i < def->num_components; i++)
276 newDef[i] = getSSA(std::max(4, def->bit_size / 8));
277 return ssaDefs[def->index] = newDef;
278 }
279
280 Value*
281 Converter::getSrc(nir_alu_src *src, uint8_t component)
282 {
283 if (src->abs || src->negate) {
284 ERROR("modifiers currently not supported on nir_alu_src\n");
285 assert(false);
286 }
287 return getSrc(&src->src, src->swizzle[component]);
288 }
289
290 Value*
291 Converter::getSrc(nir_register *reg, uint8_t idx)
292 {
293 NirDefMap::iterator it = regDefs.find(reg->index);
294 if (it == regDefs.end())
295 return convert(reg)[idx];
296 return it->second[idx];
297 }
298
299 Value*
300 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
301 {
302 if (src->is_ssa)
303 return getSrc(src->ssa, idx);
304
305 if (src->reg.indirect) {
306 if (indirect)
307 return getSrc(src->reg.indirect, idx);
308 ERROR("no support for indirects.");
309 assert(false);
310 return NULL;
311 }
312
313 return getSrc(src->reg.reg, idx);
314 }
315
316 Value*
317 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
318 {
319 NirDefMap::iterator it = ssaDefs.find(src->index);
320 if (it == ssaDefs.end()) {
321 ERROR("SSA value %u not found\n", src->index);
322 assert(false);
323 return NULL;
324 }
325 return it->second[idx];
326 }
327
328 uint32_t
329 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
330 {
331 nir_const_value *offset = nir_src_as_const_value(*src);
332
333 if (offset) {
334 indirect = NULL;
335 return offset->u32[0];
336 }
337
338 indirect = getSrc(src, idx, true);
339 return 0;
340 }
341
342 uint32_t
343 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect)
344 {
345 int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
346 if (indirect)
347 indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
348 return idx;
349 }
350
351 static void
352 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
353 {
354 assert(name && index);
355
356 if (slot >= VERT_ATTRIB_MAX) {
357 ERROR("invalid varying slot %u\n", slot);
358 assert(false);
359 return;
360 }
361
362 if (slot >= VERT_ATTRIB_GENERIC0 &&
363 slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
364 *name = TGSI_SEMANTIC_GENERIC;
365 *index = slot - VERT_ATTRIB_GENERIC0;
366 return;
367 }
368
369 if (slot >= VERT_ATTRIB_TEX0 &&
370 slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
371 *name = TGSI_SEMANTIC_TEXCOORD;
372 *index = slot - VERT_ATTRIB_TEX0;
373 return;
374 }
375
376 switch (slot) {
377 case VERT_ATTRIB_COLOR0:
378 *name = TGSI_SEMANTIC_COLOR;
379 *index = 0;
380 break;
381 case VERT_ATTRIB_COLOR1:
382 *name = TGSI_SEMANTIC_COLOR;
383 *index = 1;
384 break;
385 case VERT_ATTRIB_EDGEFLAG:
386 *name = TGSI_SEMANTIC_EDGEFLAG;
387 *index = 0;
388 break;
389 case VERT_ATTRIB_FOG:
390 *name = TGSI_SEMANTIC_FOG;
391 *index = 0;
392 break;
393 case VERT_ATTRIB_NORMAL:
394 *name = TGSI_SEMANTIC_NORMAL;
395 *index = 0;
396 break;
397 case VERT_ATTRIB_POS:
398 *name = TGSI_SEMANTIC_POSITION;
399 *index = 0;
400 break;
401 case VERT_ATTRIB_POINT_SIZE:
402 *name = TGSI_SEMANTIC_PSIZE;
403 *index = 0;
404 break;
405 default:
406 ERROR("unknown vert attrib slot %u\n", slot);
407 assert(false);
408 break;
409 }
410 }
411
412 static void
413 varying_slot_to_tgsi_semantic(gl_varying_slot slot, unsigned *name, unsigned *index)
414 {
415 assert(name && index);
416
417 if (slot >= VARYING_SLOT_TESS_MAX) {
418 ERROR("invalid varying slot %u\n", slot);
419 assert(false);
420 return;
421 }
422
423 if (slot >= VARYING_SLOT_PATCH0) {
424 *name = TGSI_SEMANTIC_PATCH;
425 *index = slot - VARYING_SLOT_PATCH0;
426 return;
427 }
428
429 if (slot >= VARYING_SLOT_VAR0) {
430 *name = TGSI_SEMANTIC_GENERIC;
431 *index = slot - VARYING_SLOT_VAR0;
432 return;
433 }
434
435 if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
436 *name = TGSI_SEMANTIC_TEXCOORD;
437 *index = slot - VARYING_SLOT_TEX0;
438 return;
439 }
440
441 switch (slot) {
442 case VARYING_SLOT_BFC0:
443 *name = TGSI_SEMANTIC_BCOLOR;
444 *index = 0;
445 break;
446 case VARYING_SLOT_BFC1:
447 *name = TGSI_SEMANTIC_BCOLOR;
448 *index = 1;
449 break;
450 case VARYING_SLOT_CLIP_DIST0:
451 *name = TGSI_SEMANTIC_CLIPDIST;
452 *index = 0;
453 break;
454 case VARYING_SLOT_CLIP_DIST1:
455 *name = TGSI_SEMANTIC_CLIPDIST;
456 *index = 1;
457 break;
458 case VARYING_SLOT_CLIP_VERTEX:
459 *name = TGSI_SEMANTIC_CLIPVERTEX;
460 *index = 0;
461 break;
462 case VARYING_SLOT_COL0:
463 *name = TGSI_SEMANTIC_COLOR;
464 *index = 0;
465 break;
466 case VARYING_SLOT_COL1:
467 *name = TGSI_SEMANTIC_COLOR;
468 *index = 1;
469 break;
470 case VARYING_SLOT_EDGE:
471 *name = TGSI_SEMANTIC_EDGEFLAG;
472 *index = 0;
473 break;
474 case VARYING_SLOT_FACE:
475 *name = TGSI_SEMANTIC_FACE;
476 *index = 0;
477 break;
478 case VARYING_SLOT_FOGC:
479 *name = TGSI_SEMANTIC_FOG;
480 *index = 0;
481 break;
482 case VARYING_SLOT_LAYER:
483 *name = TGSI_SEMANTIC_LAYER;
484 *index = 0;
485 break;
486 case VARYING_SLOT_PNTC:
487 *name = TGSI_SEMANTIC_PCOORD;
488 *index = 0;
489 break;
490 case VARYING_SLOT_POS:
491 *name = TGSI_SEMANTIC_POSITION;
492 *index = 0;
493 break;
494 case VARYING_SLOT_PRIMITIVE_ID:
495 *name = TGSI_SEMANTIC_PRIMID;
496 *index = 0;
497 break;
498 case VARYING_SLOT_PSIZ:
499 *name = TGSI_SEMANTIC_PSIZE;
500 *index = 0;
501 break;
502 case VARYING_SLOT_TESS_LEVEL_INNER:
503 *name = TGSI_SEMANTIC_TESSINNER;
504 *index = 0;
505 break;
506 case VARYING_SLOT_TESS_LEVEL_OUTER:
507 *name = TGSI_SEMANTIC_TESSOUTER;
508 *index = 0;
509 break;
510 case VARYING_SLOT_VIEWPORT:
511 *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
512 *index = 0;
513 break;
514 default:
515 ERROR("unknown varying slot %u\n", slot);
516 assert(false);
517 break;
518 }
519 }
520
521 static void
522 frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
523 {
524 if (slot >= FRAG_RESULT_DATA0) {
525 *name = TGSI_SEMANTIC_COLOR;
526 *index = slot - FRAG_RESULT_COLOR - 2; // intentional
527 return;
528 }
529
530 switch (slot) {
531 case FRAG_RESULT_COLOR:
532 *name = TGSI_SEMANTIC_COLOR;
533 *index = 0;
534 break;
535 case FRAG_RESULT_DEPTH:
536 *name = TGSI_SEMANTIC_POSITION;
537 *index = 0;
538 break;
539 case FRAG_RESULT_SAMPLE_MASK:
540 *name = TGSI_SEMANTIC_SAMPLEMASK;
541 *index = 0;
542 break;
543 default:
544 ERROR("unknown frag result slot %u\n", slot);
545 assert(false);
546 break;
547 }
548 }
549
550 // copy of _mesa_sysval_to_semantic
551 static void
552 system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
553 {
554 *index = 0;
555 switch (val) {
556 // Vertex shader
557 case SYSTEM_VALUE_VERTEX_ID:
558 *name = TGSI_SEMANTIC_VERTEXID;
559 break;
560 case SYSTEM_VALUE_INSTANCE_ID:
561 *name = TGSI_SEMANTIC_INSTANCEID;
562 break;
563 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
564 *name = TGSI_SEMANTIC_VERTEXID_NOBASE;
565 break;
566 case SYSTEM_VALUE_BASE_VERTEX:
567 *name = TGSI_SEMANTIC_BASEVERTEX;
568 break;
569 case SYSTEM_VALUE_BASE_INSTANCE:
570 *name = TGSI_SEMANTIC_BASEINSTANCE;
571 break;
572 case SYSTEM_VALUE_DRAW_ID:
573 *name = TGSI_SEMANTIC_DRAWID;
574 break;
575
576 // Geometry shader
577 case SYSTEM_VALUE_INVOCATION_ID:
578 *name = TGSI_SEMANTIC_INVOCATIONID;
579 break;
580
581 // Fragment shader
582 case SYSTEM_VALUE_FRAG_COORD:
583 *name = TGSI_SEMANTIC_POSITION;
584 break;
585 case SYSTEM_VALUE_FRONT_FACE:
586 *name = TGSI_SEMANTIC_FACE;
587 break;
588 case SYSTEM_VALUE_SAMPLE_ID:
589 *name = TGSI_SEMANTIC_SAMPLEID;
590 break;
591 case SYSTEM_VALUE_SAMPLE_POS:
592 *name = TGSI_SEMANTIC_SAMPLEPOS;
593 break;
594 case SYSTEM_VALUE_SAMPLE_MASK_IN:
595 *name = TGSI_SEMANTIC_SAMPLEMASK;
596 break;
597 case SYSTEM_VALUE_HELPER_INVOCATION:
598 *name = TGSI_SEMANTIC_HELPER_INVOCATION;
599 break;
600
601 // Tessellation shader
602 case SYSTEM_VALUE_TESS_COORD:
603 *name = TGSI_SEMANTIC_TESSCOORD;
604 break;
605 case SYSTEM_VALUE_VERTICES_IN:
606 *name = TGSI_SEMANTIC_VERTICESIN;
607 break;
608 case SYSTEM_VALUE_PRIMITIVE_ID:
609 *name = TGSI_SEMANTIC_PRIMID;
610 break;
611 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
612 *name = TGSI_SEMANTIC_TESSOUTER;
613 break;
614 case SYSTEM_VALUE_TESS_LEVEL_INNER:
615 *name = TGSI_SEMANTIC_TESSINNER;
616 break;
617
618 // Compute shader
619 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
620 *name = TGSI_SEMANTIC_THREAD_ID;
621 break;
622 case SYSTEM_VALUE_WORK_GROUP_ID:
623 *name = TGSI_SEMANTIC_BLOCK_ID;
624 break;
625 case SYSTEM_VALUE_NUM_WORK_GROUPS:
626 *name = TGSI_SEMANTIC_GRID_SIZE;
627 break;
628 case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
629 *name = TGSI_SEMANTIC_BLOCK_SIZE;
630 break;
631
632 // ARB_shader_ballot
633 case SYSTEM_VALUE_SUBGROUP_SIZE:
634 *name = TGSI_SEMANTIC_SUBGROUP_SIZE;
635 break;
636 case SYSTEM_VALUE_SUBGROUP_INVOCATION:
637 *name = TGSI_SEMANTIC_SUBGROUP_INVOCATION;
638 break;
639 case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
640 *name = TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
641 break;
642 case SYSTEM_VALUE_SUBGROUP_GE_MASK:
643 *name = TGSI_SEMANTIC_SUBGROUP_GE_MASK;
644 break;
645 case SYSTEM_VALUE_SUBGROUP_GT_MASK:
646 *name = TGSI_SEMANTIC_SUBGROUP_GT_MASK;
647 break;
648 case SYSTEM_VALUE_SUBGROUP_LE_MASK:
649 *name = TGSI_SEMANTIC_SUBGROUP_LE_MASK;
650 break;
651 case SYSTEM_VALUE_SUBGROUP_LT_MASK:
652 *name = TGSI_SEMANTIC_SUBGROUP_LT_MASK;
653 break;
654
655 default:
656 ERROR("unknown system value %u\n", val);
657 assert(false);
658 break;
659 }
660 }
661
662 void
663 Converter::setInterpolate(nv50_ir_varying *var,
664 uint8_t mode,
665 bool centroid,
666 unsigned semantic)
667 {
668 switch (mode) {
669 case INTERP_MODE_FLAT:
670 var->flat = 1;
671 break;
672 case INTERP_MODE_NONE:
673 if (semantic == TGSI_SEMANTIC_COLOR)
674 var->sc = 1;
675 else if (semantic == TGSI_SEMANTIC_POSITION)
676 var->linear = 1;
677 break;
678 case INTERP_MODE_NOPERSPECTIVE:
679 var->linear = 1;
680 break;
681 case INTERP_MODE_SMOOTH:
682 break;
683 }
684 var->centroid = centroid;
685 }
686
687 static uint16_t
688 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
689 bool input, const nir_variable *var)
690 {
691 if (!type->is_array())
692 return type->count_attribute_slots(false);
693
694 uint16_t slots;
695 switch (stage) {
696 case Program::TYPE_GEOMETRY:
697 slots = type->uniform_locations();
698 if (input)
699 slots /= info.gs.vertices_in;
700 break;
701 case Program::TYPE_TESSELLATION_CONTROL:
702 case Program::TYPE_TESSELLATION_EVAL:
703 // remove first dimension
704 if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
705 slots = type->uniform_locations();
706 else
707 slots = type->fields.array->uniform_locations();
708 break;
709 default:
710 slots = type->count_attribute_slots(false);
711 break;
712 }
713
714 return slots;
715 }
716
717 bool Converter::assignSlots() {
718 unsigned name;
719 unsigned index;
720
721 info->io.viewportId = -1;
722 info->numInputs = 0;
723
724 // we have to fixup the uniform locations for arrays
725 unsigned numImages = 0;
726 nir_foreach_variable(var, &nir->uniforms) {
727 const glsl_type *type = var->type;
728 if (!type->without_array()->is_image())
729 continue;
730 var->data.driver_location = numImages;
731 numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
732 }
733
734 nir_foreach_variable(var, &nir->inputs) {
735 const glsl_type *type = var->type;
736 int slot = var->data.location;
737 uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
738 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
739 : type->component_slots();
740 uint32_t frac = var->data.location_frac;
741 uint32_t vary = var->data.driver_location;
742
743 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
744 if (comp > 2)
745 slots *= 2;
746 }
747
748 assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
749
750 switch(prog->getType()) {
751 case Program::TYPE_FRAGMENT:
752 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
753 for (uint16_t i = 0; i < slots; ++i) {
754 setInterpolate(&info->in[vary + i], var->data.interpolation,
755 var->data.centroid | var->data.sample, name);
756 }
757 break;
758 case Program::TYPE_GEOMETRY:
759 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
760 break;
761 case Program::TYPE_TESSELLATION_CONTROL:
762 case Program::TYPE_TESSELLATION_EVAL:
763 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
764 if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
765 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
766 break;
767 case Program::TYPE_VERTEX:
768 vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
769 switch (name) {
770 case TGSI_SEMANTIC_EDGEFLAG:
771 info->io.edgeFlagIn = vary;
772 break;
773 default:
774 break;
775 }
776 break;
777 default:
778 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
779 return false;
780 }
781
782 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
783 info->in[vary].id = vary;
784 info->in[vary].patch = var->data.patch;
785 info->in[vary].sn = name;
786 info->in[vary].si = index + i;
787 if (glsl_base_type_is_64bit(type->without_array()->base_type))
788 if (i & 0x1)
789 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
790 else
791 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
792 else
793 info->in[vary].mask |= ((1 << comp) - 1) << frac;
794 }
795 info->numInputs = std::max<uint8_t>(info->numInputs, vary);
796 }
797
798 info->numOutputs = 0;
799 nir_foreach_variable(var, &nir->outputs) {
800 const glsl_type *type = var->type;
801 int slot = var->data.location;
802 uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
803 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
804 : type->component_slots();
805 uint32_t frac = var->data.location_frac;
806 uint32_t vary = var->data.driver_location;
807
808 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
809 if (comp > 2)
810 slots *= 2;
811 }
812
813 assert(vary < PIPE_MAX_SHADER_OUTPUTS);
814
815 switch(prog->getType()) {
816 case Program::TYPE_FRAGMENT:
817 frag_result_to_tgsi_semantic((gl_frag_result)slot, &name, &index);
818 switch (name) {
819 case TGSI_SEMANTIC_COLOR:
820 if (!var->data.fb_fetch_output)
821 info->prop.fp.numColourResults++;
822 info->prop.fp.separateFragData = true;
823 // sometimes we get FRAG_RESULT_DATAX with data.index 0
824 // sometimes we get FRAG_RESULT_DATA0 with data.index X
825 index = index == 0 ? var->data.index : index;
826 break;
827 case TGSI_SEMANTIC_POSITION:
828 info->io.fragDepth = vary;
829 info->prop.fp.writesDepth = true;
830 break;
831 case TGSI_SEMANTIC_SAMPLEMASK:
832 info->io.sampleMask = vary;
833 break;
834 default:
835 break;
836 }
837 break;
838 case Program::TYPE_GEOMETRY:
839 case Program::TYPE_TESSELLATION_CONTROL:
840 case Program::TYPE_TESSELLATION_EVAL:
841 case Program::TYPE_VERTEX:
842 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
843
844 if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
845 name != TGSI_SEMANTIC_TESSOUTER)
846 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
847
848 switch (name) {
849 case TGSI_SEMANTIC_CLIPDIST:
850 info->io.genUserClip = -1;
851 break;
852 case TGSI_SEMANTIC_EDGEFLAG:
853 info->io.edgeFlagOut = vary;
854 break;
855 default:
856 break;
857 }
858 break;
859 default:
860 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
861 return false;
862 }
863
864 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
865 info->out[vary].id = vary;
866 info->out[vary].patch = var->data.patch;
867 info->out[vary].sn = name;
868 info->out[vary].si = index + i;
869 if (glsl_base_type_is_64bit(type->without_array()->base_type))
870 if (i & 0x1)
871 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
872 else
873 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
874 else
875 info->out[vary].mask |= ((1 << comp) - 1) << frac;
876
877 if (nir->info.outputs_read & 1ll << slot)
878 info->out[vary].oread = 1;
879 }
880 info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
881 }
882
883 info->numSysVals = 0;
884 for (uint8_t i = 0; i < 64; ++i) {
885 if (!(nir->info.system_values_read & 1ll << i))
886 continue;
887
888 system_val_to_tgsi_semantic(i, &name, &index);
889 info->sv[info->numSysVals].sn = name;
890 info->sv[info->numSysVals].si = index;
891 info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
892
893 switch (i) {
894 case SYSTEM_VALUE_INSTANCE_ID:
895 info->io.instanceId = info->numSysVals;
896 break;
897 case SYSTEM_VALUE_TESS_LEVEL_INNER:
898 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
899 info->sv[info->numSysVals].patch = 1;
900 break;
901 case SYSTEM_VALUE_VERTEX_ID:
902 info->io.vertexId = info->numSysVals;
903 break;
904 default:
905 break;
906 }
907
908 info->numSysVals += 1;
909 }
910
911 if (info->io.genUserClip > 0) {
912 info->io.clipDistances = info->io.genUserClip;
913
914 const unsigned int nOut = (info->io.genUserClip + 3) / 4;
915
916 for (unsigned int n = 0; n < nOut; ++n) {
917 unsigned int i = info->numOutputs++;
918 info->out[i].id = i;
919 info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
920 info->out[i].si = n;
921 info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
922 }
923 }
924
925 return info->assignSlots(info) == 0;
926 }
927
928 uint32_t
929 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
930 {
931 DataType ty;
932 int offset = nir_intrinsic_component(insn);
933 bool input;
934
935 if (nir_intrinsic_infos[insn->intrinsic].has_dest)
936 ty = getDType(insn);
937 else
938 ty = getSType(insn->src[0], false, false);
939
940 switch (insn->intrinsic) {
941 case nir_intrinsic_load_input:
942 case nir_intrinsic_load_interpolated_input:
943 case nir_intrinsic_load_per_vertex_input:
944 input = true;
945 break;
946 case nir_intrinsic_load_output:
947 case nir_intrinsic_load_per_vertex_output:
948 case nir_intrinsic_store_output:
949 case nir_intrinsic_store_per_vertex_output:
950 input = false;
951 break;
952 default:
953 ERROR("unknown intrinsic in getSlotAddress %s",
954 nir_intrinsic_infos[insn->intrinsic].name);
955 input = false;
956 assert(false);
957 break;
958 }
959
960 if (typeSizeof(ty) == 8) {
961 slot *= 2;
962 slot += offset;
963 if (slot >= 4) {
964 idx += 1;
965 slot -= 4;
966 }
967 } else {
968 slot += offset;
969 }
970
971 assert(slot < 4);
972 assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
973 assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
974
975 const nv50_ir_varying *vary = input ? info->in : info->out;
976 return vary[idx].slot[slot] * 4;
977 }
978
979 Instruction *
980 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
981 uint32_t base, uint8_t c, Value *indirect0,
982 Value *indirect1, bool patch)
983 {
984 unsigned int tySize = typeSizeof(ty);
985
986 if (tySize == 8 &&
987 (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
988 Value *lo = getSSA();
989 Value *hi = getSSA();
990
991 Instruction *loi =
992 mkLoad(TYPE_U32, lo,
993 mkSymbol(file, i, TYPE_U32, base + c * tySize),
994 indirect0);
995 loi->setIndirect(0, 1, indirect1);
996 loi->perPatch = patch;
997
998 Instruction *hii =
999 mkLoad(TYPE_U32, hi,
1000 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1001 indirect0);
1002 hii->setIndirect(0, 1, indirect1);
1003 hii->perPatch = patch;
1004
1005 return mkOp2(OP_MERGE, ty, def, lo, hi);
1006 } else {
1007 Instruction *ld =
1008 mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1009 ld->setIndirect(0, 1, indirect1);
1010 ld->perPatch = patch;
1011 return ld;
1012 }
1013 }
1014
1015 void
1016 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1017 DataType ty, Value *src, uint8_t idx, uint8_t c,
1018 Value *indirect0, Value *indirect1)
1019 {
1020 uint8_t size = typeSizeof(ty);
1021 uint32_t address = getSlotAddress(insn, idx, c);
1022
1023 if (size == 8 && indirect0) {
1024 Value *split[2];
1025 mkSplit(split, 4, src);
1026
1027 if (op == OP_EXPORT) {
1028 split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1029 split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1030 }
1031
1032 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1033 split[0])->perPatch = info->out[idx].patch;
1034 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1035 split[1])->perPatch = info->out[idx].patch;
1036 } else {
1037 if (op == OP_EXPORT)
1038 src = mkMov(getSSA(size), src, ty)->getDef(0);
1039 mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1040 src)->perPatch = info->out[idx].patch;
1041 }
1042 }
1043
1044 bool
1045 Converter::run()
1046 {
1047 bool progress;
1048
1049 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1050 nir_print_shader(nir, stderr);
1051
1052 NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
1053 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
1054 NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
1055 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
1056 NIR_PASS_V(nir, nir_lower_alu_to_scalar);
1057 NIR_PASS_V(nir, nir_lower_phis_to_scalar);
1058
1059 do {
1060 progress = false;
1061 NIR_PASS(progress, nir, nir_copy_prop);
1062 NIR_PASS(progress, nir, nir_opt_remove_phis);
1063 NIR_PASS(progress, nir, nir_opt_trivial_continues);
1064 NIR_PASS(progress, nir, nir_opt_cse);
1065 NIR_PASS(progress, nir, nir_opt_algebraic);
1066 NIR_PASS(progress, nir, nir_opt_constant_folding);
1067 NIR_PASS(progress, nir, nir_copy_prop);
1068 NIR_PASS(progress, nir, nir_opt_dce);
1069 NIR_PASS(progress, nir, nir_opt_dead_cf);
1070 } while (progress);
1071
1072 NIR_PASS_V(nir, nir_lower_bool_to_int32);
1073 NIR_PASS_V(nir, nir_lower_locals_to_regs);
1074 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
1075 NIR_PASS_V(nir, nir_convert_from_ssa, true);
1076
1077 // Garbage collect dead instructions
1078 nir_sweep(nir);
1079
1080 if (!assignSlots()) {
1081 ERROR("Couldn't assign slots!\n");
1082 return false;
1083 }
1084
1085 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
1086 nir_print_shader(nir, stderr);
1087
1088 return false;
1089 }
1090
1091 } // unnamed namespace
1092
1093 namespace nv50_ir {
1094
1095 bool
1096 Program::makeFromNIR(struct nv50_ir_prog_info *info)
1097 {
1098 nir_shader *nir = (nir_shader*)info->bin.source;
1099 Converter converter(this, nir, info);
1100 bool result = converter.run();
1101 if (!result)
1102 return result;
1103 LoweringHelper lowering;
1104 lowering.run(this);
1105 tlsSize = info->bin.tlsSpace;
1106 return result;
1107 }
1108
1109 } // namespace nv50_ir