nv50/ir/nir: implement loading system values
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_from_nir.cpp
1 /*
2 * Copyright 2017 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Karol Herbst <kherbst@redhat.com>
23 */
24
25 #include "compiler/nir/nir.h"
26
27 #include "util/u_debug.h"
28
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_from_common.h"
31 #include "codegen/nv50_ir_lowering_helper.h"
32 #include "codegen/nv50_ir_util.h"
33
34 #if __cplusplus >= 201103L
35 #include <unordered_map>
36 #else
37 #include <tr1/unordered_map>
38 #endif
39 #include <vector>
40
41 namespace {
42
43 #if __cplusplus >= 201103L
44 using std::hash;
45 using std::unordered_map;
46 #else
47 using std::tr1::hash;
48 using std::tr1::unordered_map;
49 #endif
50
51 using namespace nv50_ir;
52
53 int
54 type_size(const struct glsl_type *type)
55 {
56 return glsl_count_attribute_slots(type, false);
57 }
58
59 class Converter : public ConverterCommon
60 {
61 public:
62 Converter(Program *, nir_shader *, nv50_ir_prog_info *);
63
64 bool run();
65 private:
66 typedef std::vector<LValue*> LValues;
67 typedef unordered_map<unsigned, LValues> NirDefMap;
68 typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
69
70 LValues& convert(nir_alu_dest *);
71 BasicBlock* convert(nir_block *);
72 LValues& convert(nir_dest *);
73 SVSemantic convert(nir_intrinsic_op);
74 LValues& convert(nir_register *);
75 LValues& convert(nir_ssa_def *);
76
77 Value* getSrc(nir_alu_src *, uint8_t component = 0);
78 Value* getSrc(nir_register *, uint8_t);
79 Value* getSrc(nir_src *, uint8_t, bool indirect = false);
80 Value* getSrc(nir_ssa_def *, uint8_t);
81
82 // returned value is the constant part of the given source (either the
83 // nir_src or the selected source component of an intrinsic). Even though
84 // this is mostly an optimization to be able to skip indirects in a few
85 // cases, sometimes we require immediate values or set some fileds on
86 // instructions (e.g. tex) in order for codegen to consume those.
87 // If the found value has not a constant part, the Value gets returned
88 // through the Value parameter.
89 uint32_t getIndirect(nir_src *, uint8_t, Value *&);
90 uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&);
91
92 uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
93
94 void setInterpolate(nv50_ir_varying *,
95 uint8_t,
96 bool centroid,
97 unsigned semantics);
98
99 Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
100 uint8_t c, Value *indirect0 = NULL,
101 Value *indirect1 = NULL, bool patch = false);
102 void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
103 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
104 Value *indirect1 = NULL);
105
106 bool isFloatType(nir_alu_type);
107 bool isSignedType(nir_alu_type);
108 bool isResultFloat(nir_op);
109 bool isResultSigned(nir_op);
110
111 DataType getDType(nir_alu_instr *);
112 DataType getDType(nir_intrinsic_instr *);
113 DataType getDType(nir_op, uint8_t);
114
115 std::vector<DataType> getSTypes(nir_alu_instr *);
116 DataType getSType(nir_src &, bool isFloat, bool isSigned);
117
118 operation getOperation(nir_op);
119 operation preOperationNeeded(nir_op);
120
121 int getSubOp(nir_op);
122
123 CondCode getCondCode(nir_op);
124
125 bool assignSlots();
126 bool parseNIR();
127
128 bool visit(nir_alu_instr *);
129 bool visit(nir_block *);
130 bool visit(nir_cf_node *);
131 bool visit(nir_function *);
132 bool visit(nir_if *);
133 bool visit(nir_instr *);
134 bool visit(nir_intrinsic_instr *);
135 bool visit(nir_jump_instr *);
136 bool visit(nir_load_const_instr*);
137 bool visit(nir_loop *);
138
139 nir_shader *nir;
140
141 NirDefMap ssaDefs;
142 NirDefMap regDefs;
143 NirBlockMap blocks;
144 unsigned int curLoopDepth;
145
146 BasicBlock *exit;
147 Value *zero;
148
149 int clipVertexOutput;
150
151 union {
152 struct {
153 Value *position;
154 } fp;
155 };
156 };
157
158 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
159 : ConverterCommon(prog, info),
160 nir(nir),
161 curLoopDepth(0),
162 clipVertexOutput(-1)
163 {
164 zero = mkImm((uint32_t)0);
165 }
166
167 BasicBlock *
168 Converter::convert(nir_block *block)
169 {
170 NirBlockMap::iterator it = blocks.find(block->index);
171 if (it != blocks.end())
172 return it->second;
173
174 BasicBlock *bb = new BasicBlock(func);
175 blocks[block->index] = bb;
176 return bb;
177 }
178
179 bool
180 Converter::isFloatType(nir_alu_type type)
181 {
182 return nir_alu_type_get_base_type(type) == nir_type_float;
183 }
184
185 bool
186 Converter::isSignedType(nir_alu_type type)
187 {
188 return nir_alu_type_get_base_type(type) == nir_type_int;
189 }
190
191 bool
192 Converter::isResultFloat(nir_op op)
193 {
194 const nir_op_info &info = nir_op_infos[op];
195 if (info.output_type != nir_type_invalid)
196 return isFloatType(info.output_type);
197
198 ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
199 assert(false);
200 return true;
201 }
202
203 bool
204 Converter::isResultSigned(nir_op op)
205 {
206 switch (op) {
207 // there is no umul and we get wrong results if we treat all muls as signed
208 case nir_op_imul:
209 case nir_op_inot:
210 return false;
211 default:
212 const nir_op_info &info = nir_op_infos[op];
213 if (info.output_type != nir_type_invalid)
214 return isSignedType(info.output_type);
215 ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
216 assert(false);
217 return true;
218 }
219 }
220
221 DataType
222 Converter::getDType(nir_alu_instr *insn)
223 {
224 if (insn->dest.dest.is_ssa)
225 return getDType(insn->op, insn->dest.dest.ssa.bit_size);
226 else
227 return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
228 }
229
230 DataType
231 Converter::getDType(nir_intrinsic_instr *insn)
232 {
233 if (insn->dest.is_ssa)
234 return typeOfSize(insn->dest.ssa.bit_size / 8, false, false);
235 else
236 return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false);
237 }
238
239 DataType
240 Converter::getDType(nir_op op, uint8_t bitSize)
241 {
242 DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
243 if (ty == TYPE_NONE) {
244 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
245 assert(false);
246 }
247 return ty;
248 }
249
250 std::vector<DataType>
251 Converter::getSTypes(nir_alu_instr *insn)
252 {
253 const nir_op_info &info = nir_op_infos[insn->op];
254 std::vector<DataType> res(info.num_inputs);
255
256 for (uint8_t i = 0; i < info.num_inputs; ++i) {
257 if (info.input_types[i] != nir_type_invalid) {
258 res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
259 } else {
260 ERROR("getSType not implemented for %s idx %u\n", info.name, i);
261 assert(false);
262 res[i] = TYPE_NONE;
263 break;
264 }
265 }
266
267 return res;
268 }
269
270 DataType
271 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
272 {
273 uint8_t bitSize;
274 if (src.is_ssa)
275 bitSize = src.ssa->bit_size;
276 else
277 bitSize = src.reg.reg->bit_size;
278
279 DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
280 if (ty == TYPE_NONE) {
281 const char *str;
282 if (isFloat)
283 str = "float";
284 else if (isSigned)
285 str = "int";
286 else
287 str = "uint";
288 ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
289 assert(false);
290 }
291 return ty;
292 }
293
294 operation
295 Converter::getOperation(nir_op op)
296 {
297 switch (op) {
298 // basic ops with float and int variants
299 case nir_op_fabs:
300 case nir_op_iabs:
301 return OP_ABS;
302 case nir_op_fadd:
303 case nir_op_iadd:
304 return OP_ADD;
305 case nir_op_fand:
306 case nir_op_iand:
307 return OP_AND;
308 case nir_op_ifind_msb:
309 case nir_op_ufind_msb:
310 return OP_BFIND;
311 case nir_op_fceil:
312 return OP_CEIL;
313 case nir_op_fcos:
314 return OP_COS;
315 case nir_op_f2f32:
316 case nir_op_f2f64:
317 case nir_op_f2i32:
318 case nir_op_f2i64:
319 case nir_op_f2u32:
320 case nir_op_f2u64:
321 case nir_op_i2f32:
322 case nir_op_i2f64:
323 case nir_op_i2i32:
324 case nir_op_i2i64:
325 case nir_op_u2f32:
326 case nir_op_u2f64:
327 case nir_op_u2u32:
328 case nir_op_u2u64:
329 return OP_CVT;
330 case nir_op_fddx:
331 case nir_op_fddx_coarse:
332 case nir_op_fddx_fine:
333 return OP_DFDX;
334 case nir_op_fddy:
335 case nir_op_fddy_coarse:
336 case nir_op_fddy_fine:
337 return OP_DFDY;
338 case nir_op_fdiv:
339 case nir_op_idiv:
340 case nir_op_udiv:
341 return OP_DIV;
342 case nir_op_fexp2:
343 return OP_EX2;
344 case nir_op_ffloor:
345 return OP_FLOOR;
346 case nir_op_ffma:
347 return OP_FMA;
348 case nir_op_flog2:
349 return OP_LG2;
350 case nir_op_fmax:
351 case nir_op_imax:
352 case nir_op_umax:
353 return OP_MAX;
354 case nir_op_pack_64_2x32_split:
355 return OP_MERGE;
356 case nir_op_fmin:
357 case nir_op_imin:
358 case nir_op_umin:
359 return OP_MIN;
360 case nir_op_fmod:
361 case nir_op_imod:
362 case nir_op_umod:
363 case nir_op_frem:
364 case nir_op_irem:
365 return OP_MOD;
366 case nir_op_fmul:
367 case nir_op_imul:
368 case nir_op_imul_high:
369 case nir_op_umul_high:
370 return OP_MUL;
371 case nir_op_fneg:
372 case nir_op_ineg:
373 return OP_NEG;
374 case nir_op_fnot:
375 case nir_op_inot:
376 return OP_NOT;
377 case nir_op_for:
378 case nir_op_ior:
379 return OP_OR;
380 case nir_op_fpow:
381 return OP_POW;
382 case nir_op_frcp:
383 return OP_RCP;
384 case nir_op_frsq:
385 return OP_RSQ;
386 case nir_op_fsat:
387 return OP_SAT;
388 case nir_op_feq32:
389 case nir_op_ieq32:
390 case nir_op_fge32:
391 case nir_op_ige32:
392 case nir_op_uge32:
393 case nir_op_flt32:
394 case nir_op_ilt32:
395 case nir_op_ult32:
396 case nir_op_fne32:
397 case nir_op_ine32:
398 return OP_SET;
399 case nir_op_ishl:
400 return OP_SHL;
401 case nir_op_ishr:
402 case nir_op_ushr:
403 return OP_SHR;
404 case nir_op_fsin:
405 return OP_SIN;
406 case nir_op_fsqrt:
407 return OP_SQRT;
408 case nir_op_fsub:
409 case nir_op_isub:
410 return OP_SUB;
411 case nir_op_ftrunc:
412 return OP_TRUNC;
413 case nir_op_fxor:
414 case nir_op_ixor:
415 return OP_XOR;
416 default:
417 ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
418 assert(false);
419 return OP_NOP;
420 }
421 }
422
423 operation
424 Converter::preOperationNeeded(nir_op op)
425 {
426 switch (op) {
427 case nir_op_fcos:
428 case nir_op_fsin:
429 return OP_PRESIN;
430 default:
431 return OP_NOP;
432 }
433 }
434
435 int
436 Converter::getSubOp(nir_op op)
437 {
438 switch (op) {
439 case nir_op_imul_high:
440 case nir_op_umul_high:
441 return NV50_IR_SUBOP_MUL_HIGH;
442 default:
443 return 0;
444 }
445 }
446
447 CondCode
448 Converter::getCondCode(nir_op op)
449 {
450 switch (op) {
451 case nir_op_feq32:
452 case nir_op_ieq32:
453 return CC_EQ;
454 case nir_op_fge32:
455 case nir_op_ige32:
456 case nir_op_uge32:
457 return CC_GE;
458 case nir_op_flt32:
459 case nir_op_ilt32:
460 case nir_op_ult32:
461 return CC_LT;
462 case nir_op_fne32:
463 return CC_NEU;
464 case nir_op_ine32:
465 return CC_NE;
466 default:
467 ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
468 assert(false);
469 return CC_FL;
470 }
471 }
472
473 Converter::LValues&
474 Converter::convert(nir_alu_dest *dest)
475 {
476 return convert(&dest->dest);
477 }
478
479 Converter::LValues&
480 Converter::convert(nir_dest *dest)
481 {
482 if (dest->is_ssa)
483 return convert(&dest->ssa);
484 if (dest->reg.indirect) {
485 ERROR("no support for indirects.");
486 assert(false);
487 }
488 return convert(dest->reg.reg);
489 }
490
491 Converter::LValues&
492 Converter::convert(nir_register *reg)
493 {
494 NirDefMap::iterator it = regDefs.find(reg->index);
495 if (it != regDefs.end())
496 return it->second;
497
498 LValues newDef(reg->num_components);
499 for (uint8_t i = 0; i < reg->num_components; i++)
500 newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
501 return regDefs[reg->index] = newDef;
502 }
503
504 Converter::LValues&
505 Converter::convert(nir_ssa_def *def)
506 {
507 NirDefMap::iterator it = ssaDefs.find(def->index);
508 if (it != ssaDefs.end())
509 return it->second;
510
511 LValues newDef(def->num_components);
512 for (uint8_t i = 0; i < def->num_components; i++)
513 newDef[i] = getSSA(std::max(4, def->bit_size / 8));
514 return ssaDefs[def->index] = newDef;
515 }
516
517 Value*
518 Converter::getSrc(nir_alu_src *src, uint8_t component)
519 {
520 if (src->abs || src->negate) {
521 ERROR("modifiers currently not supported on nir_alu_src\n");
522 assert(false);
523 }
524 return getSrc(&src->src, src->swizzle[component]);
525 }
526
527 Value*
528 Converter::getSrc(nir_register *reg, uint8_t idx)
529 {
530 NirDefMap::iterator it = regDefs.find(reg->index);
531 if (it == regDefs.end())
532 return convert(reg)[idx];
533 return it->second[idx];
534 }
535
536 Value*
537 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
538 {
539 if (src->is_ssa)
540 return getSrc(src->ssa, idx);
541
542 if (src->reg.indirect) {
543 if (indirect)
544 return getSrc(src->reg.indirect, idx);
545 ERROR("no support for indirects.");
546 assert(false);
547 return NULL;
548 }
549
550 return getSrc(src->reg.reg, idx);
551 }
552
553 Value*
554 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
555 {
556 NirDefMap::iterator it = ssaDefs.find(src->index);
557 if (it == ssaDefs.end()) {
558 ERROR("SSA value %u not found\n", src->index);
559 assert(false);
560 return NULL;
561 }
562 return it->second[idx];
563 }
564
565 uint32_t
566 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
567 {
568 nir_const_value *offset = nir_src_as_const_value(*src);
569
570 if (offset) {
571 indirect = NULL;
572 return offset->u32[0];
573 }
574
575 indirect = getSrc(src, idx, true);
576 return 0;
577 }
578
579 uint32_t
580 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect)
581 {
582 int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
583 if (indirect)
584 indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
585 return idx;
586 }
587
588 static void
589 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
590 {
591 assert(name && index);
592
593 if (slot >= VERT_ATTRIB_MAX) {
594 ERROR("invalid varying slot %u\n", slot);
595 assert(false);
596 return;
597 }
598
599 if (slot >= VERT_ATTRIB_GENERIC0 &&
600 slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
601 *name = TGSI_SEMANTIC_GENERIC;
602 *index = slot - VERT_ATTRIB_GENERIC0;
603 return;
604 }
605
606 if (slot >= VERT_ATTRIB_TEX0 &&
607 slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
608 *name = TGSI_SEMANTIC_TEXCOORD;
609 *index = slot - VERT_ATTRIB_TEX0;
610 return;
611 }
612
613 switch (slot) {
614 case VERT_ATTRIB_COLOR0:
615 *name = TGSI_SEMANTIC_COLOR;
616 *index = 0;
617 break;
618 case VERT_ATTRIB_COLOR1:
619 *name = TGSI_SEMANTIC_COLOR;
620 *index = 1;
621 break;
622 case VERT_ATTRIB_EDGEFLAG:
623 *name = TGSI_SEMANTIC_EDGEFLAG;
624 *index = 0;
625 break;
626 case VERT_ATTRIB_FOG:
627 *name = TGSI_SEMANTIC_FOG;
628 *index = 0;
629 break;
630 case VERT_ATTRIB_NORMAL:
631 *name = TGSI_SEMANTIC_NORMAL;
632 *index = 0;
633 break;
634 case VERT_ATTRIB_POS:
635 *name = TGSI_SEMANTIC_POSITION;
636 *index = 0;
637 break;
638 case VERT_ATTRIB_POINT_SIZE:
639 *name = TGSI_SEMANTIC_PSIZE;
640 *index = 0;
641 break;
642 default:
643 ERROR("unknown vert attrib slot %u\n", slot);
644 assert(false);
645 break;
646 }
647 }
648
649 static void
650 varying_slot_to_tgsi_semantic(gl_varying_slot slot, unsigned *name, unsigned *index)
651 {
652 assert(name && index);
653
654 if (slot >= VARYING_SLOT_TESS_MAX) {
655 ERROR("invalid varying slot %u\n", slot);
656 assert(false);
657 return;
658 }
659
660 if (slot >= VARYING_SLOT_PATCH0) {
661 *name = TGSI_SEMANTIC_PATCH;
662 *index = slot - VARYING_SLOT_PATCH0;
663 return;
664 }
665
666 if (slot >= VARYING_SLOT_VAR0) {
667 *name = TGSI_SEMANTIC_GENERIC;
668 *index = slot - VARYING_SLOT_VAR0;
669 return;
670 }
671
672 if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
673 *name = TGSI_SEMANTIC_TEXCOORD;
674 *index = slot - VARYING_SLOT_TEX0;
675 return;
676 }
677
678 switch (slot) {
679 case VARYING_SLOT_BFC0:
680 *name = TGSI_SEMANTIC_BCOLOR;
681 *index = 0;
682 break;
683 case VARYING_SLOT_BFC1:
684 *name = TGSI_SEMANTIC_BCOLOR;
685 *index = 1;
686 break;
687 case VARYING_SLOT_CLIP_DIST0:
688 *name = TGSI_SEMANTIC_CLIPDIST;
689 *index = 0;
690 break;
691 case VARYING_SLOT_CLIP_DIST1:
692 *name = TGSI_SEMANTIC_CLIPDIST;
693 *index = 1;
694 break;
695 case VARYING_SLOT_CLIP_VERTEX:
696 *name = TGSI_SEMANTIC_CLIPVERTEX;
697 *index = 0;
698 break;
699 case VARYING_SLOT_COL0:
700 *name = TGSI_SEMANTIC_COLOR;
701 *index = 0;
702 break;
703 case VARYING_SLOT_COL1:
704 *name = TGSI_SEMANTIC_COLOR;
705 *index = 1;
706 break;
707 case VARYING_SLOT_EDGE:
708 *name = TGSI_SEMANTIC_EDGEFLAG;
709 *index = 0;
710 break;
711 case VARYING_SLOT_FACE:
712 *name = TGSI_SEMANTIC_FACE;
713 *index = 0;
714 break;
715 case VARYING_SLOT_FOGC:
716 *name = TGSI_SEMANTIC_FOG;
717 *index = 0;
718 break;
719 case VARYING_SLOT_LAYER:
720 *name = TGSI_SEMANTIC_LAYER;
721 *index = 0;
722 break;
723 case VARYING_SLOT_PNTC:
724 *name = TGSI_SEMANTIC_PCOORD;
725 *index = 0;
726 break;
727 case VARYING_SLOT_POS:
728 *name = TGSI_SEMANTIC_POSITION;
729 *index = 0;
730 break;
731 case VARYING_SLOT_PRIMITIVE_ID:
732 *name = TGSI_SEMANTIC_PRIMID;
733 *index = 0;
734 break;
735 case VARYING_SLOT_PSIZ:
736 *name = TGSI_SEMANTIC_PSIZE;
737 *index = 0;
738 break;
739 case VARYING_SLOT_TESS_LEVEL_INNER:
740 *name = TGSI_SEMANTIC_TESSINNER;
741 *index = 0;
742 break;
743 case VARYING_SLOT_TESS_LEVEL_OUTER:
744 *name = TGSI_SEMANTIC_TESSOUTER;
745 *index = 0;
746 break;
747 case VARYING_SLOT_VIEWPORT:
748 *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
749 *index = 0;
750 break;
751 default:
752 ERROR("unknown varying slot %u\n", slot);
753 assert(false);
754 break;
755 }
756 }
757
758 static void
759 frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
760 {
761 if (slot >= FRAG_RESULT_DATA0) {
762 *name = TGSI_SEMANTIC_COLOR;
763 *index = slot - FRAG_RESULT_COLOR - 2; // intentional
764 return;
765 }
766
767 switch (slot) {
768 case FRAG_RESULT_COLOR:
769 *name = TGSI_SEMANTIC_COLOR;
770 *index = 0;
771 break;
772 case FRAG_RESULT_DEPTH:
773 *name = TGSI_SEMANTIC_POSITION;
774 *index = 0;
775 break;
776 case FRAG_RESULT_SAMPLE_MASK:
777 *name = TGSI_SEMANTIC_SAMPLEMASK;
778 *index = 0;
779 break;
780 default:
781 ERROR("unknown frag result slot %u\n", slot);
782 assert(false);
783 break;
784 }
785 }
786
787 // copy of _mesa_sysval_to_semantic
788 static void
789 system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
790 {
791 *index = 0;
792 switch (val) {
793 // Vertex shader
794 case SYSTEM_VALUE_VERTEX_ID:
795 *name = TGSI_SEMANTIC_VERTEXID;
796 break;
797 case SYSTEM_VALUE_INSTANCE_ID:
798 *name = TGSI_SEMANTIC_INSTANCEID;
799 break;
800 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
801 *name = TGSI_SEMANTIC_VERTEXID_NOBASE;
802 break;
803 case SYSTEM_VALUE_BASE_VERTEX:
804 *name = TGSI_SEMANTIC_BASEVERTEX;
805 break;
806 case SYSTEM_VALUE_BASE_INSTANCE:
807 *name = TGSI_SEMANTIC_BASEINSTANCE;
808 break;
809 case SYSTEM_VALUE_DRAW_ID:
810 *name = TGSI_SEMANTIC_DRAWID;
811 break;
812
813 // Geometry shader
814 case SYSTEM_VALUE_INVOCATION_ID:
815 *name = TGSI_SEMANTIC_INVOCATIONID;
816 break;
817
818 // Fragment shader
819 case SYSTEM_VALUE_FRAG_COORD:
820 *name = TGSI_SEMANTIC_POSITION;
821 break;
822 case SYSTEM_VALUE_FRONT_FACE:
823 *name = TGSI_SEMANTIC_FACE;
824 break;
825 case SYSTEM_VALUE_SAMPLE_ID:
826 *name = TGSI_SEMANTIC_SAMPLEID;
827 break;
828 case SYSTEM_VALUE_SAMPLE_POS:
829 *name = TGSI_SEMANTIC_SAMPLEPOS;
830 break;
831 case SYSTEM_VALUE_SAMPLE_MASK_IN:
832 *name = TGSI_SEMANTIC_SAMPLEMASK;
833 break;
834 case SYSTEM_VALUE_HELPER_INVOCATION:
835 *name = TGSI_SEMANTIC_HELPER_INVOCATION;
836 break;
837
838 // Tessellation shader
839 case SYSTEM_VALUE_TESS_COORD:
840 *name = TGSI_SEMANTIC_TESSCOORD;
841 break;
842 case SYSTEM_VALUE_VERTICES_IN:
843 *name = TGSI_SEMANTIC_VERTICESIN;
844 break;
845 case SYSTEM_VALUE_PRIMITIVE_ID:
846 *name = TGSI_SEMANTIC_PRIMID;
847 break;
848 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
849 *name = TGSI_SEMANTIC_TESSOUTER;
850 break;
851 case SYSTEM_VALUE_TESS_LEVEL_INNER:
852 *name = TGSI_SEMANTIC_TESSINNER;
853 break;
854
855 // Compute shader
856 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
857 *name = TGSI_SEMANTIC_THREAD_ID;
858 break;
859 case SYSTEM_VALUE_WORK_GROUP_ID:
860 *name = TGSI_SEMANTIC_BLOCK_ID;
861 break;
862 case SYSTEM_VALUE_NUM_WORK_GROUPS:
863 *name = TGSI_SEMANTIC_GRID_SIZE;
864 break;
865 case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
866 *name = TGSI_SEMANTIC_BLOCK_SIZE;
867 break;
868
869 // ARB_shader_ballot
870 case SYSTEM_VALUE_SUBGROUP_SIZE:
871 *name = TGSI_SEMANTIC_SUBGROUP_SIZE;
872 break;
873 case SYSTEM_VALUE_SUBGROUP_INVOCATION:
874 *name = TGSI_SEMANTIC_SUBGROUP_INVOCATION;
875 break;
876 case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
877 *name = TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
878 break;
879 case SYSTEM_VALUE_SUBGROUP_GE_MASK:
880 *name = TGSI_SEMANTIC_SUBGROUP_GE_MASK;
881 break;
882 case SYSTEM_VALUE_SUBGROUP_GT_MASK:
883 *name = TGSI_SEMANTIC_SUBGROUP_GT_MASK;
884 break;
885 case SYSTEM_VALUE_SUBGROUP_LE_MASK:
886 *name = TGSI_SEMANTIC_SUBGROUP_LE_MASK;
887 break;
888 case SYSTEM_VALUE_SUBGROUP_LT_MASK:
889 *name = TGSI_SEMANTIC_SUBGROUP_LT_MASK;
890 break;
891
892 default:
893 ERROR("unknown system value %u\n", val);
894 assert(false);
895 break;
896 }
897 }
898
899 void
900 Converter::setInterpolate(nv50_ir_varying *var,
901 uint8_t mode,
902 bool centroid,
903 unsigned semantic)
904 {
905 switch (mode) {
906 case INTERP_MODE_FLAT:
907 var->flat = 1;
908 break;
909 case INTERP_MODE_NONE:
910 if (semantic == TGSI_SEMANTIC_COLOR)
911 var->sc = 1;
912 else if (semantic == TGSI_SEMANTIC_POSITION)
913 var->linear = 1;
914 break;
915 case INTERP_MODE_NOPERSPECTIVE:
916 var->linear = 1;
917 break;
918 case INTERP_MODE_SMOOTH:
919 break;
920 }
921 var->centroid = centroid;
922 }
923
924 static uint16_t
925 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
926 bool input, const nir_variable *var)
927 {
928 if (!type->is_array())
929 return type->count_attribute_slots(false);
930
931 uint16_t slots;
932 switch (stage) {
933 case Program::TYPE_GEOMETRY:
934 slots = type->uniform_locations();
935 if (input)
936 slots /= info.gs.vertices_in;
937 break;
938 case Program::TYPE_TESSELLATION_CONTROL:
939 case Program::TYPE_TESSELLATION_EVAL:
940 // remove first dimension
941 if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
942 slots = type->uniform_locations();
943 else
944 slots = type->fields.array->uniform_locations();
945 break;
946 default:
947 slots = type->count_attribute_slots(false);
948 break;
949 }
950
951 return slots;
952 }
953
954 bool Converter::assignSlots() {
955 unsigned name;
956 unsigned index;
957
958 info->io.viewportId = -1;
959 info->numInputs = 0;
960
961 // we have to fixup the uniform locations for arrays
962 unsigned numImages = 0;
963 nir_foreach_variable(var, &nir->uniforms) {
964 const glsl_type *type = var->type;
965 if (!type->without_array()->is_image())
966 continue;
967 var->data.driver_location = numImages;
968 numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
969 }
970
971 nir_foreach_variable(var, &nir->inputs) {
972 const glsl_type *type = var->type;
973 int slot = var->data.location;
974 uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
975 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
976 : type->component_slots();
977 uint32_t frac = var->data.location_frac;
978 uint32_t vary = var->data.driver_location;
979
980 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
981 if (comp > 2)
982 slots *= 2;
983 }
984
985 assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
986
987 switch(prog->getType()) {
988 case Program::TYPE_FRAGMENT:
989 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
990 for (uint16_t i = 0; i < slots; ++i) {
991 setInterpolate(&info->in[vary + i], var->data.interpolation,
992 var->data.centroid | var->data.sample, name);
993 }
994 break;
995 case Program::TYPE_GEOMETRY:
996 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
997 break;
998 case Program::TYPE_TESSELLATION_CONTROL:
999 case Program::TYPE_TESSELLATION_EVAL:
1000 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1001 if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
1002 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1003 break;
1004 case Program::TYPE_VERTEX:
1005 vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1006 switch (name) {
1007 case TGSI_SEMANTIC_EDGEFLAG:
1008 info->io.edgeFlagIn = vary;
1009 break;
1010 default:
1011 break;
1012 }
1013 break;
1014 default:
1015 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1016 return false;
1017 }
1018
1019 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1020 info->in[vary].id = vary;
1021 info->in[vary].patch = var->data.patch;
1022 info->in[vary].sn = name;
1023 info->in[vary].si = index + i;
1024 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1025 if (i & 0x1)
1026 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1027 else
1028 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1029 else
1030 info->in[vary].mask |= ((1 << comp) - 1) << frac;
1031 }
1032 info->numInputs = std::max<uint8_t>(info->numInputs, vary);
1033 }
1034
1035 info->numOutputs = 0;
1036 nir_foreach_variable(var, &nir->outputs) {
1037 const glsl_type *type = var->type;
1038 int slot = var->data.location;
1039 uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1040 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1041 : type->component_slots();
1042 uint32_t frac = var->data.location_frac;
1043 uint32_t vary = var->data.driver_location;
1044
1045 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1046 if (comp > 2)
1047 slots *= 2;
1048 }
1049
1050 assert(vary < PIPE_MAX_SHADER_OUTPUTS);
1051
1052 switch(prog->getType()) {
1053 case Program::TYPE_FRAGMENT:
1054 frag_result_to_tgsi_semantic((gl_frag_result)slot, &name, &index);
1055 switch (name) {
1056 case TGSI_SEMANTIC_COLOR:
1057 if (!var->data.fb_fetch_output)
1058 info->prop.fp.numColourResults++;
1059 info->prop.fp.separateFragData = true;
1060 // sometimes we get FRAG_RESULT_DATAX with data.index 0
1061 // sometimes we get FRAG_RESULT_DATA0 with data.index X
1062 index = index == 0 ? var->data.index : index;
1063 break;
1064 case TGSI_SEMANTIC_POSITION:
1065 info->io.fragDepth = vary;
1066 info->prop.fp.writesDepth = true;
1067 break;
1068 case TGSI_SEMANTIC_SAMPLEMASK:
1069 info->io.sampleMask = vary;
1070 break;
1071 default:
1072 break;
1073 }
1074 break;
1075 case Program::TYPE_GEOMETRY:
1076 case Program::TYPE_TESSELLATION_CONTROL:
1077 case Program::TYPE_TESSELLATION_EVAL:
1078 case Program::TYPE_VERTEX:
1079 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1080
1081 if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1082 name != TGSI_SEMANTIC_TESSOUTER)
1083 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1084
1085 switch (name) {
1086 case TGSI_SEMANTIC_CLIPDIST:
1087 info->io.genUserClip = -1;
1088 break;
1089 case TGSI_SEMANTIC_CLIPVERTEX:
1090 clipVertexOutput = vary;
1091 break;
1092 case TGSI_SEMANTIC_EDGEFLAG:
1093 info->io.edgeFlagOut = vary;
1094 break;
1095 case TGSI_SEMANTIC_POSITION:
1096 if (clipVertexOutput < 0)
1097 clipVertexOutput = vary;
1098 break;
1099 default:
1100 break;
1101 }
1102 break;
1103 default:
1104 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1105 return false;
1106 }
1107
1108 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1109 info->out[vary].id = vary;
1110 info->out[vary].patch = var->data.patch;
1111 info->out[vary].sn = name;
1112 info->out[vary].si = index + i;
1113 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1114 if (i & 0x1)
1115 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1116 else
1117 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1118 else
1119 info->out[vary].mask |= ((1 << comp) - 1) << frac;
1120
1121 if (nir->info.outputs_read & 1ll << slot)
1122 info->out[vary].oread = 1;
1123 }
1124 info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
1125 }
1126
1127 info->numSysVals = 0;
1128 for (uint8_t i = 0; i < 64; ++i) {
1129 if (!(nir->info.system_values_read & 1ll << i))
1130 continue;
1131
1132 system_val_to_tgsi_semantic(i, &name, &index);
1133 info->sv[info->numSysVals].sn = name;
1134 info->sv[info->numSysVals].si = index;
1135 info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
1136
1137 switch (i) {
1138 case SYSTEM_VALUE_INSTANCE_ID:
1139 info->io.instanceId = info->numSysVals;
1140 break;
1141 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1142 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1143 info->sv[info->numSysVals].patch = 1;
1144 break;
1145 case SYSTEM_VALUE_VERTEX_ID:
1146 info->io.vertexId = info->numSysVals;
1147 break;
1148 default:
1149 break;
1150 }
1151
1152 info->numSysVals += 1;
1153 }
1154
1155 if (info->io.genUserClip > 0) {
1156 info->io.clipDistances = info->io.genUserClip;
1157
1158 const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1159
1160 for (unsigned int n = 0; n < nOut; ++n) {
1161 unsigned int i = info->numOutputs++;
1162 info->out[i].id = i;
1163 info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1164 info->out[i].si = n;
1165 info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1166 }
1167 }
1168
1169 return info->assignSlots(info) == 0;
1170 }
1171
1172 uint32_t
1173 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1174 {
1175 DataType ty;
1176 int offset = nir_intrinsic_component(insn);
1177 bool input;
1178
1179 if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1180 ty = getDType(insn);
1181 else
1182 ty = getSType(insn->src[0], false, false);
1183
1184 switch (insn->intrinsic) {
1185 case nir_intrinsic_load_input:
1186 case nir_intrinsic_load_interpolated_input:
1187 case nir_intrinsic_load_per_vertex_input:
1188 input = true;
1189 break;
1190 case nir_intrinsic_load_output:
1191 case nir_intrinsic_load_per_vertex_output:
1192 case nir_intrinsic_store_output:
1193 case nir_intrinsic_store_per_vertex_output:
1194 input = false;
1195 break;
1196 default:
1197 ERROR("unknown intrinsic in getSlotAddress %s",
1198 nir_intrinsic_infos[insn->intrinsic].name);
1199 input = false;
1200 assert(false);
1201 break;
1202 }
1203
1204 if (typeSizeof(ty) == 8) {
1205 slot *= 2;
1206 slot += offset;
1207 if (slot >= 4) {
1208 idx += 1;
1209 slot -= 4;
1210 }
1211 } else {
1212 slot += offset;
1213 }
1214
1215 assert(slot < 4);
1216 assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
1217 assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
1218
1219 const nv50_ir_varying *vary = input ? info->in : info->out;
1220 return vary[idx].slot[slot] * 4;
1221 }
1222
1223 Instruction *
1224 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1225 uint32_t base, uint8_t c, Value *indirect0,
1226 Value *indirect1, bool patch)
1227 {
1228 unsigned int tySize = typeSizeof(ty);
1229
1230 if (tySize == 8 &&
1231 (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
1232 Value *lo = getSSA();
1233 Value *hi = getSSA();
1234
1235 Instruction *loi =
1236 mkLoad(TYPE_U32, lo,
1237 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1238 indirect0);
1239 loi->setIndirect(0, 1, indirect1);
1240 loi->perPatch = patch;
1241
1242 Instruction *hii =
1243 mkLoad(TYPE_U32, hi,
1244 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1245 indirect0);
1246 hii->setIndirect(0, 1, indirect1);
1247 hii->perPatch = patch;
1248
1249 return mkOp2(OP_MERGE, ty, def, lo, hi);
1250 } else {
1251 Instruction *ld =
1252 mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1253 ld->setIndirect(0, 1, indirect1);
1254 ld->perPatch = patch;
1255 return ld;
1256 }
1257 }
1258
1259 void
1260 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1261 DataType ty, Value *src, uint8_t idx, uint8_t c,
1262 Value *indirect0, Value *indirect1)
1263 {
1264 uint8_t size = typeSizeof(ty);
1265 uint32_t address = getSlotAddress(insn, idx, c);
1266
1267 if (size == 8 && indirect0) {
1268 Value *split[2];
1269 mkSplit(split, 4, src);
1270
1271 if (op == OP_EXPORT) {
1272 split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1273 split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1274 }
1275
1276 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1277 split[0])->perPatch = info->out[idx].patch;
1278 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1279 split[1])->perPatch = info->out[idx].patch;
1280 } else {
1281 if (op == OP_EXPORT)
1282 src = mkMov(getSSA(size), src, ty)->getDef(0);
1283 mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1284 src)->perPatch = info->out[idx].patch;
1285 }
1286 }
1287
1288 bool
1289 Converter::parseNIR()
1290 {
1291 info->io.clipDistances = nir->info.clip_distance_array_size;
1292 info->io.cullDistances = nir->info.cull_distance_array_size;
1293
1294 switch(prog->getType()) {
1295 case Program::TYPE_COMPUTE:
1296 info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
1297 info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
1298 info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
1299 info->bin.smemSize = nir->info.cs.shared_size;
1300 break;
1301 case Program::TYPE_FRAGMENT:
1302 info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1303 info->prop.fp.persampleInvocation =
1304 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
1305 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1306 info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1307 info->prop.fp.readsSampleLocations =
1308 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1309 info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
1310 info->prop.fp.usesSampleMaskIn =
1311 !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
1312 break;
1313 case Program::TYPE_GEOMETRY:
1314 info->prop.gp.inputPrim = nir->info.gs.input_primitive;
1315 info->prop.gp.instanceCount = nir->info.gs.invocations;
1316 info->prop.gp.maxVertices = nir->info.gs.vertices_out;
1317 info->prop.gp.outputPrim = nir->info.gs.output_primitive;
1318 break;
1319 case Program::TYPE_TESSELLATION_CONTROL:
1320 case Program::TYPE_TESSELLATION_EVAL:
1321 if (nir->info.tess.primitive_mode == GL_ISOLINES)
1322 info->prop.tp.domain = GL_LINES;
1323 else
1324 info->prop.tp.domain = nir->info.tess.primitive_mode;
1325 info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1326 info->prop.tp.outputPrim =
1327 nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1328 info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1329 info->prop.tp.winding = !nir->info.tess.ccw;
1330 break;
1331 case Program::TYPE_VERTEX:
1332 info->prop.vp.usesDrawParameters =
1333 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
1334 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
1335 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
1336 break;
1337 default:
1338 break;
1339 }
1340
1341 return true;
1342 }
1343
1344 bool
1345 Converter::visit(nir_function *function)
1346 {
1347 // we only support emiting the main function for now
1348 assert(!strcmp(function->name, "main"));
1349 assert(function->impl);
1350
1351 // usually the blocks will set everything up, but main is special
1352 BasicBlock *entry = new BasicBlock(prog->main);
1353 exit = new BasicBlock(prog->main);
1354 blocks[nir_start_block(function->impl)->index] = entry;
1355 prog->main->setEntry(entry);
1356 prog->main->setExit(exit);
1357
1358 setPosition(entry, true);
1359
1360 if (info->io.genUserClip > 0) {
1361 for (int c = 0; c < 4; ++c)
1362 clipVtx[c] = getScratch();
1363 }
1364
1365 switch (prog->getType()) {
1366 case Program::TYPE_TESSELLATION_CONTROL:
1367 outBase = mkOp2v(
1368 OP_SUB, TYPE_U32, getSSA(),
1369 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1370 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1371 break;
1372 case Program::TYPE_FRAGMENT: {
1373 Symbol *sv = mkSysVal(SV_POSITION, 3);
1374 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1375 fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1376 break;
1377 }
1378 default:
1379 break;
1380 }
1381
1382 nir_index_ssa_defs(function->impl);
1383 foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1384 if (!visit(node))
1385 return false;
1386 }
1387
1388 bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1389 setPosition(exit, true);
1390
1391 if (info->io.genUserClip > 0)
1392 handleUserClipPlanes();
1393
1394 // TODO: for non main function this needs to be a OP_RETURN
1395 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1396 return true;
1397 }
1398
1399 bool
1400 Converter::visit(nir_cf_node *node)
1401 {
1402 switch (node->type) {
1403 case nir_cf_node_block:
1404 return visit(nir_cf_node_as_block(node));
1405 case nir_cf_node_if:
1406 return visit(nir_cf_node_as_if(node));
1407 case nir_cf_node_loop:
1408 return visit(nir_cf_node_as_loop(node));
1409 default:
1410 ERROR("unknown nir_cf_node type %u\n", node->type);
1411 return false;
1412 }
1413 }
1414
1415 bool
1416 Converter::visit(nir_block *block)
1417 {
1418 if (!block->predecessors->entries && block->instr_list.is_empty())
1419 return true;
1420
1421 BasicBlock *bb = convert(block);
1422
1423 setPosition(bb, true);
1424 nir_foreach_instr(insn, block) {
1425 if (!visit(insn))
1426 return false;
1427 }
1428 return true;
1429 }
1430
1431 bool
1432 Converter::visit(nir_if *nif)
1433 {
1434 DataType sType = getSType(nif->condition, false, false);
1435 Value *src = getSrc(&nif->condition, 0);
1436
1437 nir_block *lastThen = nir_if_last_then_block(nif);
1438 nir_block *lastElse = nir_if_last_else_block(nif);
1439
1440 assert(!lastThen->successors[1]);
1441 assert(!lastElse->successors[1]);
1442
1443 BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1444 BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1445
1446 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1447 bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1448
1449 // we only insert joinats, if both nodes end up at the end of the if again.
1450 // the reason for this to not happens are breaks/continues/ret/... which
1451 // have their own handling
1452 if (lastThen->successors[0] == lastElse->successors[0])
1453 bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
1454 CC_ALWAYS, NULL);
1455
1456 mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1457
1458 foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1459 if (!visit(node))
1460 return false;
1461 }
1462 setPosition(convert(lastThen), true);
1463 if (!bb->getExit() ||
1464 !bb->getExit()->asFlow() ||
1465 bb->getExit()->asFlow()->op == OP_JOIN) {
1466 BasicBlock *tailBB = convert(lastThen->successors[0]);
1467 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1468 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1469 }
1470
1471 foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1472 if (!visit(node))
1473 return false;
1474 }
1475 setPosition(convert(lastElse), true);
1476 if (!bb->getExit() ||
1477 !bb->getExit()->asFlow() ||
1478 bb->getExit()->asFlow()->op == OP_JOIN) {
1479 BasicBlock *tailBB = convert(lastElse->successors[0]);
1480 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1481 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1482 }
1483
1484 if (lastThen->successors[0] == lastElse->successors[0]) {
1485 setPosition(convert(lastThen->successors[0]), true);
1486 mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1487 }
1488
1489 return true;
1490 }
1491
1492 bool
1493 Converter::visit(nir_loop *loop)
1494 {
1495 curLoopDepth += 1;
1496 func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1497
1498 BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1499 BasicBlock *tailBB =
1500 convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1501 bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1502
1503 mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1504 setPosition(loopBB, false);
1505 mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1506
1507 foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1508 if (!visit(node))
1509 return false;
1510 }
1511 Instruction *insn = bb->getExit();
1512 if (bb->cfg.incidentCount() != 0) {
1513 if (!insn || !insn->asFlow()) {
1514 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1515 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1516 } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
1517 tailBB->cfg.incidentCount() == 0) {
1518 // RA doesn't like having blocks around with no incident edge,
1519 // so we create a fake one to make it happy
1520 bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1521 }
1522 }
1523
1524 curLoopDepth -= 1;
1525
1526 return true;
1527 }
1528
1529 bool
1530 Converter::visit(nir_instr *insn)
1531 {
1532 switch (insn->type) {
1533 case nir_instr_type_alu:
1534 return visit(nir_instr_as_alu(insn));
1535 case nir_instr_type_intrinsic:
1536 return visit(nir_instr_as_intrinsic(insn));
1537 case nir_instr_type_jump:
1538 return visit(nir_instr_as_jump(insn));
1539 case nir_instr_type_load_const:
1540 return visit(nir_instr_as_load_const(insn));
1541 default:
1542 ERROR("unknown nir_instr type %u\n", insn->type);
1543 return false;
1544 }
1545 return true;
1546 }
1547
1548 SVSemantic
1549 Converter::convert(nir_intrinsic_op intr)
1550 {
1551 switch (intr) {
1552 case nir_intrinsic_load_base_vertex:
1553 return SV_BASEVERTEX;
1554 case nir_intrinsic_load_base_instance:
1555 return SV_BASEINSTANCE;
1556 case nir_intrinsic_load_draw_id:
1557 return SV_DRAWID;
1558 case nir_intrinsic_load_front_face:
1559 return SV_FACE;
1560 case nir_intrinsic_load_helper_invocation:
1561 return SV_THREAD_KILL;
1562 case nir_intrinsic_load_instance_id:
1563 return SV_INSTANCE_ID;
1564 case nir_intrinsic_load_invocation_id:
1565 return SV_INVOCATION_ID;
1566 case nir_intrinsic_load_local_group_size:
1567 return SV_NTID;
1568 case nir_intrinsic_load_local_invocation_id:
1569 return SV_TID;
1570 case nir_intrinsic_load_num_work_groups:
1571 return SV_NCTAID;
1572 case nir_intrinsic_load_patch_vertices_in:
1573 return SV_VERTEX_COUNT;
1574 case nir_intrinsic_load_primitive_id:
1575 return SV_PRIMITIVE_ID;
1576 case nir_intrinsic_load_sample_id:
1577 return SV_SAMPLE_INDEX;
1578 case nir_intrinsic_load_sample_mask_in:
1579 return SV_SAMPLE_MASK;
1580 case nir_intrinsic_load_sample_pos:
1581 return SV_SAMPLE_POS;
1582 case nir_intrinsic_load_subgroup_eq_mask:
1583 return SV_LANEMASK_EQ;
1584 case nir_intrinsic_load_subgroup_ge_mask:
1585 return SV_LANEMASK_GE;
1586 case nir_intrinsic_load_subgroup_gt_mask:
1587 return SV_LANEMASK_GT;
1588 case nir_intrinsic_load_subgroup_le_mask:
1589 return SV_LANEMASK_LE;
1590 case nir_intrinsic_load_subgroup_lt_mask:
1591 return SV_LANEMASK_LT;
1592 case nir_intrinsic_load_subgroup_invocation:
1593 return SV_LANEID;
1594 case nir_intrinsic_load_tess_coord:
1595 return SV_TESS_COORD;
1596 case nir_intrinsic_load_tess_level_inner:
1597 return SV_TESS_INNER;
1598 case nir_intrinsic_load_tess_level_outer:
1599 return SV_TESS_OUTER;
1600 case nir_intrinsic_load_vertex_id:
1601 return SV_VERTEX_ID;
1602 case nir_intrinsic_load_work_group_id:
1603 return SV_CTAID;
1604 default:
1605 ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1606 nir_intrinsic_infos[intr].name);
1607 assert(false);
1608 return SV_LAST;
1609 }
1610 }
1611
1612 bool
1613 Converter::visit(nir_intrinsic_instr *insn)
1614 {
1615 nir_intrinsic_op op = insn->intrinsic;
1616
1617 switch (op) {
1618 case nir_intrinsic_load_uniform: {
1619 LValues &newDefs = convert(&insn->dest);
1620 const DataType dType = getDType(insn);
1621 Value *indirect;
1622 uint32_t coffset = getIndirect(insn, 0, 0, indirect);
1623 for (uint8_t i = 0; i < insn->num_components; ++i) {
1624 loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
1625 }
1626 break;
1627 }
1628 case nir_intrinsic_store_output:
1629 case nir_intrinsic_store_per_vertex_output: {
1630 Value *indirect;
1631 DataType dType = getSType(insn->src[0], false, false);
1632 uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1633
1634 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1635 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1636 continue;
1637
1638 uint8_t offset = 0;
1639 Value *src = getSrc(&insn->src[0], i);
1640 switch (prog->getType()) {
1641 case Program::TYPE_FRAGMENT: {
1642 if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1643 // TGSI uses a different interface than NIR, TGSI stores that
1644 // value in the z component, NIR in X
1645 offset += 2;
1646 src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1647 }
1648 break;
1649 }
1650 case Program::TYPE_VERTEX: {
1651 if (info->io.genUserClip > 0 && idx == clipVertexOutput) {
1652 mkMov(clipVtx[i], src);
1653 src = clipVtx[i];
1654 }
1655 break;
1656 }
1657 default:
1658 break;
1659 }
1660
1661 storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1662 }
1663 break;
1664 }
1665 case nir_intrinsic_load_input:
1666 case nir_intrinsic_load_interpolated_input:
1667 case nir_intrinsic_load_output: {
1668 LValues &newDefs = convert(&insn->dest);
1669
1670 // FBFetch
1671 if (prog->getType() == Program::TYPE_FRAGMENT &&
1672 op == nir_intrinsic_load_output) {
1673 std::vector<Value*> defs, srcs;
1674 uint8_t mask = 0;
1675
1676 srcs.push_back(getSSA());
1677 srcs.push_back(getSSA());
1678 Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1679 Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1680 mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1681 mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1682
1683 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1684 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1685
1686 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1687 defs.push_back(newDefs[i]);
1688 mask |= 1 << i;
1689 }
1690
1691 TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
1692 texi->tex.levelZero = 1;
1693 texi->tex.mask = mask;
1694 texi->tex.useOffsets = 0;
1695 texi->tex.r = 0xffff;
1696 texi->tex.s = 0xffff;
1697
1698 info->prop.fp.readsFramebuffer = true;
1699 break;
1700 }
1701
1702 const DataType dType = getDType(insn);
1703 Value *indirect;
1704 bool input = op != nir_intrinsic_load_output;
1705 operation nvirOp;
1706 uint32_t mode = 0;
1707
1708 uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
1709 nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
1710
1711 // see load_barycentric_* handling
1712 if (prog->getType() == Program::TYPE_FRAGMENT) {
1713 mode = translateInterpMode(&vary, nvirOp);
1714 if (op == nir_intrinsic_load_interpolated_input) {
1715 ImmediateValue immMode;
1716 if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
1717 mode |= immMode.reg.data.u32;
1718 }
1719 }
1720
1721 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1722 uint32_t address = getSlotAddress(insn, idx, i);
1723 Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
1724 if (prog->getType() == Program::TYPE_FRAGMENT) {
1725 int s = 1;
1726 if (typeSizeof(dType) == 8) {
1727 Value *lo = getSSA();
1728 Value *hi = getSSA();
1729 Instruction *interp;
1730
1731 interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
1732 if (nvirOp == OP_PINTERP)
1733 interp->setSrc(s++, fp.position);
1734 if (mode & NV50_IR_INTERP_OFFSET)
1735 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1736 interp->setInterpolate(mode);
1737 interp->setIndirect(0, 0, indirect);
1738
1739 Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
1740 interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
1741 if (nvirOp == OP_PINTERP)
1742 interp->setSrc(s++, fp.position);
1743 if (mode & NV50_IR_INTERP_OFFSET)
1744 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1745 interp->setInterpolate(mode);
1746 interp->setIndirect(0, 0, indirect);
1747
1748 mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
1749 } else {
1750 Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
1751 if (nvirOp == OP_PINTERP)
1752 interp->setSrc(s++, fp.position);
1753 if (mode & NV50_IR_INTERP_OFFSET)
1754 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1755 interp->setInterpolate(mode);
1756 interp->setIndirect(0, 0, indirect);
1757 }
1758 } else {
1759 mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
1760 }
1761 }
1762 break;
1763 }
1764 case nir_intrinsic_load_barycentric_at_offset:
1765 case nir_intrinsic_load_barycentric_at_sample:
1766 case nir_intrinsic_load_barycentric_centroid:
1767 case nir_intrinsic_load_barycentric_pixel:
1768 case nir_intrinsic_load_barycentric_sample: {
1769 LValues &newDefs = convert(&insn->dest);
1770 uint32_t mode;
1771
1772 if (op == nir_intrinsic_load_barycentric_centroid ||
1773 op == nir_intrinsic_load_barycentric_sample) {
1774 mode = NV50_IR_INTERP_CENTROID;
1775 } else if (op == nir_intrinsic_load_barycentric_at_offset) {
1776 Value *offs[2];
1777 for (uint8_t c = 0; c < 2; c++) {
1778 offs[c] = getScratch();
1779 mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
1780 mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
1781 mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
1782 mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
1783 }
1784 mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
1785
1786 mode = NV50_IR_INTERP_OFFSET;
1787 } else if (op == nir_intrinsic_load_barycentric_pixel) {
1788 mode = NV50_IR_INTERP_DEFAULT;
1789 } else if (op == nir_intrinsic_load_barycentric_at_sample) {
1790 info->prop.fp.readsSampleLocations = true;
1791 mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
1792 mode = NV50_IR_INTERP_OFFSET;
1793 } else {
1794 unreachable("all intrinsics already handled above");
1795 }
1796
1797 loadImm(newDefs[1], mode);
1798 break;
1799 }
1800 case nir_intrinsic_discard:
1801 mkOp(OP_DISCARD, TYPE_NONE, NULL);
1802 break;
1803 case nir_intrinsic_discard_if: {
1804 Value *pred = getSSA(1, FILE_PREDICATE);
1805 if (insn->num_components > 1) {
1806 ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
1807 assert(false);
1808 return false;
1809 }
1810 mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1811 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
1812 break;
1813 }
1814 case nir_intrinsic_load_base_vertex:
1815 case nir_intrinsic_load_base_instance:
1816 case nir_intrinsic_load_draw_id:
1817 case nir_intrinsic_load_front_face:
1818 case nir_intrinsic_load_helper_invocation:
1819 case nir_intrinsic_load_instance_id:
1820 case nir_intrinsic_load_invocation_id:
1821 case nir_intrinsic_load_local_group_size:
1822 case nir_intrinsic_load_local_invocation_id:
1823 case nir_intrinsic_load_num_work_groups:
1824 case nir_intrinsic_load_patch_vertices_in:
1825 case nir_intrinsic_load_primitive_id:
1826 case nir_intrinsic_load_sample_id:
1827 case nir_intrinsic_load_sample_mask_in:
1828 case nir_intrinsic_load_sample_pos:
1829 case nir_intrinsic_load_subgroup_eq_mask:
1830 case nir_intrinsic_load_subgroup_ge_mask:
1831 case nir_intrinsic_load_subgroup_gt_mask:
1832 case nir_intrinsic_load_subgroup_le_mask:
1833 case nir_intrinsic_load_subgroup_lt_mask:
1834 case nir_intrinsic_load_subgroup_invocation:
1835 case nir_intrinsic_load_tess_coord:
1836 case nir_intrinsic_load_tess_level_inner:
1837 case nir_intrinsic_load_tess_level_outer:
1838 case nir_intrinsic_load_vertex_id:
1839 case nir_intrinsic_load_work_group_id: {
1840 const DataType dType = getDType(insn);
1841 SVSemantic sv = convert(op);
1842 LValues &newDefs = convert(&insn->dest);
1843
1844 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1845 Value *def;
1846 if (typeSizeof(dType) == 8)
1847 def = getSSA();
1848 else
1849 def = newDefs[i];
1850
1851 if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
1852 loadImm(def, 0u);
1853 } else {
1854 Symbol *sym = mkSysVal(sv, i);
1855 Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
1856 if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
1857 rdsv->perPatch = 1;
1858 }
1859
1860 if (typeSizeof(dType) == 8)
1861 mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
1862 }
1863 break;
1864 }
1865 // constants
1866 case nir_intrinsic_load_subgroup_size: {
1867 LValues &newDefs = convert(&insn->dest);
1868 loadImm(newDefs[0], 32u);
1869 break;
1870 }
1871 default:
1872 ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
1873 return false;
1874 }
1875
1876 return true;
1877 }
1878
1879 bool
1880 Converter::visit(nir_jump_instr *insn)
1881 {
1882 switch (insn->type) {
1883 case nir_jump_return:
1884 // TODO: this only works in the main function
1885 mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
1886 bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
1887 break;
1888 case nir_jump_break:
1889 case nir_jump_continue: {
1890 bool isBreak = insn->type == nir_jump_break;
1891 nir_block *block = insn->instr.block;
1892 assert(!block->successors[1]);
1893 BasicBlock *target = convert(block->successors[0]);
1894 mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
1895 bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
1896 break;
1897 }
1898 default:
1899 ERROR("unknown nir_jump_type %u\n", insn->type);
1900 return false;
1901 }
1902
1903 return true;
1904 }
1905
1906 bool
1907 Converter::visit(nir_load_const_instr *insn)
1908 {
1909 assert(insn->def.bit_size <= 64);
1910
1911 LValues &newDefs = convert(&insn->def);
1912 for (int i = 0; i < insn->def.num_components; i++) {
1913 switch (insn->def.bit_size) {
1914 case 64:
1915 loadImm(newDefs[i], insn->value.u64[i]);
1916 break;
1917 case 32:
1918 loadImm(newDefs[i], insn->value.u32[i]);
1919 break;
1920 case 16:
1921 loadImm(newDefs[i], insn->value.u16[i]);
1922 break;
1923 case 8:
1924 loadImm(newDefs[i], insn->value.u8[i]);
1925 break;
1926 }
1927 }
1928 return true;
1929 }
1930
1931 #define DEFAULT_CHECKS \
1932 if (insn->dest.dest.ssa.num_components > 1) { \
1933 ERROR("nir_alu_instr only supported with 1 component!\n"); \
1934 return false; \
1935 } \
1936 if (insn->dest.write_mask != 1) { \
1937 ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
1938 return false; \
1939 }
1940 bool
1941 Converter::visit(nir_alu_instr *insn)
1942 {
1943 const nir_op op = insn->op;
1944 const nir_op_info &info = nir_op_infos[op];
1945 DataType dType = getDType(insn);
1946 const std::vector<DataType> sTypes = getSTypes(insn);
1947
1948 Instruction *oldPos = this->bb->getExit();
1949
1950 switch (op) {
1951 case nir_op_fabs:
1952 case nir_op_iabs:
1953 case nir_op_fadd:
1954 case nir_op_iadd:
1955 case nir_op_fand:
1956 case nir_op_iand:
1957 case nir_op_fceil:
1958 case nir_op_fcos:
1959 case nir_op_fddx:
1960 case nir_op_fddx_coarse:
1961 case nir_op_fddx_fine:
1962 case nir_op_fddy:
1963 case nir_op_fddy_coarse:
1964 case nir_op_fddy_fine:
1965 case nir_op_fdiv:
1966 case nir_op_idiv:
1967 case nir_op_udiv:
1968 case nir_op_fexp2:
1969 case nir_op_ffloor:
1970 case nir_op_ffma:
1971 case nir_op_flog2:
1972 case nir_op_fmax:
1973 case nir_op_imax:
1974 case nir_op_umax:
1975 case nir_op_fmin:
1976 case nir_op_imin:
1977 case nir_op_umin:
1978 case nir_op_fmod:
1979 case nir_op_imod:
1980 case nir_op_umod:
1981 case nir_op_fmul:
1982 case nir_op_imul:
1983 case nir_op_imul_high:
1984 case nir_op_umul_high:
1985 case nir_op_fneg:
1986 case nir_op_ineg:
1987 case nir_op_fnot:
1988 case nir_op_inot:
1989 case nir_op_for:
1990 case nir_op_ior:
1991 case nir_op_pack_64_2x32_split:
1992 case nir_op_fpow:
1993 case nir_op_frcp:
1994 case nir_op_frem:
1995 case nir_op_irem:
1996 case nir_op_frsq:
1997 case nir_op_fsat:
1998 case nir_op_ishr:
1999 case nir_op_ushr:
2000 case nir_op_fsin:
2001 case nir_op_fsqrt:
2002 case nir_op_fsub:
2003 case nir_op_isub:
2004 case nir_op_ftrunc:
2005 case nir_op_ishl:
2006 case nir_op_fxor:
2007 case nir_op_ixor: {
2008 DEFAULT_CHECKS;
2009 LValues &newDefs = convert(&insn->dest);
2010 operation preOp = preOperationNeeded(op);
2011 if (preOp != OP_NOP) {
2012 assert(info.num_inputs < 2);
2013 Value *tmp = getSSA(typeSizeof(dType));
2014 Instruction *i0 = mkOp(preOp, dType, tmp);
2015 Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2016 if (info.num_inputs) {
2017 i0->setSrc(0, getSrc(&insn->src[0]));
2018 i1->setSrc(0, tmp);
2019 }
2020 i1->subOp = getSubOp(op);
2021 } else {
2022 Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2023 for (unsigned s = 0u; s < info.num_inputs; ++s) {
2024 i->setSrc(s, getSrc(&insn->src[s]));
2025 }
2026 i->subOp = getSubOp(op);
2027 }
2028 break;
2029 }
2030 case nir_op_ifind_msb:
2031 case nir_op_ufind_msb: {
2032 DEFAULT_CHECKS;
2033 LValues &newDefs = convert(&insn->dest);
2034 dType = sTypes[0];
2035 mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2036 break;
2037 }
2038 case nir_op_fround_even: {
2039 DEFAULT_CHECKS;
2040 LValues &newDefs = convert(&insn->dest);
2041 mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2042 break;
2043 }
2044 // convert instructions
2045 case nir_op_f2f32:
2046 case nir_op_f2i32:
2047 case nir_op_f2u32:
2048 case nir_op_i2f32:
2049 case nir_op_i2i32:
2050 case nir_op_u2f32:
2051 case nir_op_u2u32:
2052 case nir_op_f2f64:
2053 case nir_op_f2i64:
2054 case nir_op_f2u64:
2055 case nir_op_i2f64:
2056 case nir_op_i2i64:
2057 case nir_op_u2f64:
2058 case nir_op_u2u64: {
2059 DEFAULT_CHECKS;
2060 LValues &newDefs = convert(&insn->dest);
2061 Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2062 if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
2063 i->rnd = ROUND_Z;
2064 i->sType = sTypes[0];
2065 break;
2066 }
2067 // compare instructions
2068 case nir_op_feq32:
2069 case nir_op_ieq32:
2070 case nir_op_fge32:
2071 case nir_op_ige32:
2072 case nir_op_uge32:
2073 case nir_op_flt32:
2074 case nir_op_ilt32:
2075 case nir_op_ult32:
2076 case nir_op_fne32:
2077 case nir_op_ine32: {
2078 DEFAULT_CHECKS;
2079 LValues &newDefs = convert(&insn->dest);
2080 Instruction *i = mkCmp(getOperation(op),
2081 getCondCode(op),
2082 dType,
2083 newDefs[0],
2084 dType,
2085 getSrc(&insn->src[0]),
2086 getSrc(&insn->src[1]));
2087 if (info.num_inputs == 3)
2088 i->setSrc(2, getSrc(&insn->src[2]));
2089 i->sType = sTypes[0];
2090 break;
2091 }
2092 // those are weird ALU ops and need special handling, because
2093 // 1. they are always componend based
2094 // 2. they basically just merge multiple values into one data type
2095 case nir_op_imov:
2096 case nir_op_fmov:
2097 case nir_op_vec2:
2098 case nir_op_vec3:
2099 case nir_op_vec4: {
2100 LValues &newDefs = convert(&insn->dest);
2101 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2102 mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2103 }
2104 break;
2105 }
2106 // (un)pack
2107 case nir_op_pack_64_2x32: {
2108 LValues &newDefs = convert(&insn->dest);
2109 Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2110 merge->setSrc(0, getSrc(&insn->src[0], 0));
2111 merge->setSrc(1, getSrc(&insn->src[0], 1));
2112 break;
2113 }
2114 case nir_op_pack_half_2x16_split: {
2115 LValues &newDefs = convert(&insn->dest);
2116 Value *tmpH = getSSA();
2117 Value *tmpL = getSSA();
2118
2119 mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2120 mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2121 mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2122 break;
2123 }
2124 case nir_op_unpack_half_2x16_split_x:
2125 case nir_op_unpack_half_2x16_split_y: {
2126 LValues &newDefs = convert(&insn->dest);
2127 Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2128 if (op == nir_op_unpack_half_2x16_split_y)
2129 cvt->subOp = 1;
2130 break;
2131 }
2132 case nir_op_unpack_64_2x32: {
2133 LValues &newDefs = convert(&insn->dest);
2134 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2135 break;
2136 }
2137 case nir_op_unpack_64_2x32_split_x: {
2138 LValues &newDefs = convert(&insn->dest);
2139 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
2140 break;
2141 }
2142 case nir_op_unpack_64_2x32_split_y: {
2143 LValues &newDefs = convert(&insn->dest);
2144 mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
2145 break;
2146 }
2147 // special instructions
2148 case nir_op_fsign:
2149 case nir_op_isign: {
2150 DEFAULT_CHECKS;
2151 DataType iType;
2152 if (::isFloatType(dType))
2153 iType = TYPE_F32;
2154 else
2155 iType = TYPE_S32;
2156
2157 LValues &newDefs = convert(&insn->dest);
2158 LValue *val0 = getScratch();
2159 LValue *val1 = getScratch();
2160 mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
2161 mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
2162
2163 if (dType == TYPE_F64) {
2164 mkOp2(OP_SUB, iType, val0, val0, val1);
2165 mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
2166 } else if (dType == TYPE_S64 || dType == TYPE_U64) {
2167 mkOp2(OP_SUB, iType, val0, val1, val0);
2168 mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
2169 mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
2170 } else if (::isFloatType(dType))
2171 mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
2172 else
2173 mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
2174 break;
2175 }
2176 case nir_op_fcsel:
2177 case nir_op_b32csel: {
2178 DEFAULT_CHECKS;
2179 LValues &newDefs = convert(&insn->dest);
2180 mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
2181 break;
2182 }
2183 case nir_op_ibitfield_extract:
2184 case nir_op_ubitfield_extract: {
2185 DEFAULT_CHECKS;
2186 Value *tmp = getSSA();
2187 LValues &newDefs = convert(&insn->dest);
2188 mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2189 mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
2190 break;
2191 }
2192 case nir_op_bfm: {
2193 DEFAULT_CHECKS;
2194 LValues &newDefs = convert(&insn->dest);
2195 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2196 break;
2197 }
2198 case nir_op_bitfield_insert: {
2199 DEFAULT_CHECKS;
2200 LValues &newDefs = convert(&insn->dest);
2201 LValue *temp = getSSA();
2202 mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
2203 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
2204 break;
2205 }
2206 case nir_op_bit_count: {
2207 DEFAULT_CHECKS;
2208 LValues &newDefs = convert(&insn->dest);
2209 mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
2210 break;
2211 }
2212 case nir_op_bitfield_reverse: {
2213 DEFAULT_CHECKS;
2214 LValues &newDefs = convert(&insn->dest);
2215 mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2216 break;
2217 }
2218 case nir_op_find_lsb: {
2219 DEFAULT_CHECKS;
2220 LValues &newDefs = convert(&insn->dest);
2221 Value *tmp = getSSA();
2222 mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2223 mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2224 break;
2225 }
2226 // boolean conversions
2227 case nir_op_b2f32: {
2228 DEFAULT_CHECKS;
2229 LValues &newDefs = convert(&insn->dest);
2230 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
2231 break;
2232 }
2233 case nir_op_b2f64: {
2234 DEFAULT_CHECKS;
2235 LValues &newDefs = convert(&insn->dest);
2236 Value *tmp = getSSA(4);
2237 mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
2238 mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
2239 break;
2240 }
2241 case nir_op_f2b32:
2242 case nir_op_i2b32: {
2243 DEFAULT_CHECKS;
2244 LValues &newDefs = convert(&insn->dest);
2245 Value *src1;
2246 if (typeSizeof(sTypes[0]) == 8) {
2247 src1 = loadImm(getSSA(8), 0.0);
2248 } else {
2249 src1 = zero;
2250 }
2251 CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
2252 mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
2253 break;
2254 }
2255 case nir_op_b2i32: {
2256 DEFAULT_CHECKS;
2257 LValues &newDefs = convert(&insn->dest);
2258 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
2259 break;
2260 }
2261 case nir_op_b2i64: {
2262 DEFAULT_CHECKS;
2263 LValues &newDefs = convert(&insn->dest);
2264 LValue *def = getScratch();
2265 mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
2266 mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
2267 break;
2268 }
2269 default:
2270 ERROR("unknown nir_op %s\n", info.name);
2271 return false;
2272 }
2273
2274 if (!oldPos) {
2275 oldPos = this->bb->getEntry();
2276 oldPos->precise = insn->exact;
2277 }
2278
2279 if (unlikely(!oldPos))
2280 return true;
2281
2282 while (oldPos->next) {
2283 oldPos = oldPos->next;
2284 oldPos->precise = insn->exact;
2285 }
2286 oldPos->saturate = insn->dest.saturate;
2287
2288 return true;
2289 }
2290 #undef DEFAULT_CHECKS
2291
2292 bool
2293 Converter::run()
2294 {
2295 bool progress;
2296
2297 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
2298 nir_print_shader(nir, stderr);
2299
2300 NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
2301 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
2302 NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
2303 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
2304 NIR_PASS_V(nir, nir_lower_alu_to_scalar);
2305 NIR_PASS_V(nir, nir_lower_phis_to_scalar);
2306
2307 do {
2308 progress = false;
2309 NIR_PASS(progress, nir, nir_copy_prop);
2310 NIR_PASS(progress, nir, nir_opt_remove_phis);
2311 NIR_PASS(progress, nir, nir_opt_trivial_continues);
2312 NIR_PASS(progress, nir, nir_opt_cse);
2313 NIR_PASS(progress, nir, nir_opt_algebraic);
2314 NIR_PASS(progress, nir, nir_opt_constant_folding);
2315 NIR_PASS(progress, nir, nir_copy_prop);
2316 NIR_PASS(progress, nir, nir_opt_dce);
2317 NIR_PASS(progress, nir, nir_opt_dead_cf);
2318 } while (progress);
2319
2320 NIR_PASS_V(nir, nir_lower_bool_to_int32);
2321 NIR_PASS_V(nir, nir_lower_locals_to_regs);
2322 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
2323 NIR_PASS_V(nir, nir_convert_from_ssa, true);
2324
2325 // Garbage collect dead instructions
2326 nir_sweep(nir);
2327
2328 if (!parseNIR()) {
2329 ERROR("Couldn't prase NIR!\n");
2330 return false;
2331 }
2332
2333 if (!assignSlots()) {
2334 ERROR("Couldn't assign slots!\n");
2335 return false;
2336 }
2337
2338 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
2339 nir_print_shader(nir, stderr);
2340
2341 nir_foreach_function(function, nir) {
2342 if (!visit(function))
2343 return false;
2344 }
2345
2346 return true;
2347 }
2348
2349 } // unnamed namespace
2350
2351 namespace nv50_ir {
2352
2353 bool
2354 Program::makeFromNIR(struct nv50_ir_prog_info *info)
2355 {
2356 nir_shader *nir = (nir_shader*)info->bin.source;
2357 Converter converter(this, nir, info);
2358 bool result = converter.run();
2359 if (!result)
2360 return result;
2361 LoweringHelper lowering;
2362 lowering.run(this);
2363 tlsSize = info->bin.tlsSpace;
2364 return result;
2365 }
2366
2367 } // namespace nv50_ir