nv50/ir/nir: implement nir_alu_instr handling
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_from_nir.cpp
1 /*
2 * Copyright 2017 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Karol Herbst <kherbst@redhat.com>
23 */
24
25 #include "compiler/nir/nir.h"
26
27 #include "util/u_debug.h"
28
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_from_common.h"
31 #include "codegen/nv50_ir_lowering_helper.h"
32 #include "codegen/nv50_ir_util.h"
33
34 #if __cplusplus >= 201103L
35 #include <unordered_map>
36 #else
37 #include <tr1/unordered_map>
38 #endif
39 #include <vector>
40
41 namespace {
42
43 #if __cplusplus >= 201103L
44 using std::hash;
45 using std::unordered_map;
46 #else
47 using std::tr1::hash;
48 using std::tr1::unordered_map;
49 #endif
50
51 using namespace nv50_ir;
52
53 int
54 type_size(const struct glsl_type *type)
55 {
56 return glsl_count_attribute_slots(type, false);
57 }
58
59 class Converter : public ConverterCommon
60 {
61 public:
62 Converter(Program *, nir_shader *, nv50_ir_prog_info *);
63
64 bool run();
65 private:
66 typedef std::vector<LValue*> LValues;
67 typedef unordered_map<unsigned, LValues> NirDefMap;
68 typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
69
70 LValues& convert(nir_alu_dest *);
71 BasicBlock* convert(nir_block *);
72 LValues& convert(nir_dest *);
73 LValues& convert(nir_register *);
74 LValues& convert(nir_ssa_def *);
75
76 Value* getSrc(nir_alu_src *, uint8_t component = 0);
77 Value* getSrc(nir_register *, uint8_t);
78 Value* getSrc(nir_src *, uint8_t, bool indirect = false);
79 Value* getSrc(nir_ssa_def *, uint8_t);
80
81 // returned value is the constant part of the given source (either the
82 // nir_src or the selected source component of an intrinsic). Even though
83 // this is mostly an optimization to be able to skip indirects in a few
84 // cases, sometimes we require immediate values or set some fileds on
85 // instructions (e.g. tex) in order for codegen to consume those.
86 // If the found value has not a constant part, the Value gets returned
87 // through the Value parameter.
88 uint32_t getIndirect(nir_src *, uint8_t, Value *&);
89 uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&);
90
91 uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
92
93 void setInterpolate(nv50_ir_varying *,
94 uint8_t,
95 bool centroid,
96 unsigned semantics);
97
98 Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
99 uint8_t c, Value *indirect0 = NULL,
100 Value *indirect1 = NULL, bool patch = false);
101 void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
102 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
103 Value *indirect1 = NULL);
104
105 bool isFloatType(nir_alu_type);
106 bool isSignedType(nir_alu_type);
107 bool isResultFloat(nir_op);
108 bool isResultSigned(nir_op);
109
110 DataType getDType(nir_alu_instr *);
111 DataType getDType(nir_intrinsic_instr *);
112 DataType getDType(nir_op, uint8_t);
113
114 std::vector<DataType> getSTypes(nir_alu_instr *);
115 DataType getSType(nir_src &, bool isFloat, bool isSigned);
116
117 operation getOperation(nir_op);
118 operation preOperationNeeded(nir_op);
119
120 int getSubOp(nir_op);
121
122 CondCode getCondCode(nir_op);
123
124 bool assignSlots();
125 bool parseNIR();
126
127 bool visit(nir_alu_instr *);
128 bool visit(nir_block *);
129 bool visit(nir_cf_node *);
130 bool visit(nir_function *);
131 bool visit(nir_if *);
132 bool visit(nir_instr *);
133 bool visit(nir_intrinsic_instr *);
134 bool visit(nir_jump_instr *);
135 bool visit(nir_load_const_instr*);
136 bool visit(nir_loop *);
137
138 nir_shader *nir;
139
140 NirDefMap ssaDefs;
141 NirDefMap regDefs;
142 NirBlockMap blocks;
143 unsigned int curLoopDepth;
144
145 BasicBlock *exit;
146 Value *zero;
147
148 union {
149 struct {
150 Value *position;
151 } fp;
152 };
153 };
154
155 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
156 : ConverterCommon(prog, info),
157 nir(nir),
158 curLoopDepth(0)
159 {
160 zero = mkImm((uint32_t)0);
161 }
162
163 BasicBlock *
164 Converter::convert(nir_block *block)
165 {
166 NirBlockMap::iterator it = blocks.find(block->index);
167 if (it != blocks.end())
168 return it->second;
169
170 BasicBlock *bb = new BasicBlock(func);
171 blocks[block->index] = bb;
172 return bb;
173 }
174
175 bool
176 Converter::isFloatType(nir_alu_type type)
177 {
178 return nir_alu_type_get_base_type(type) == nir_type_float;
179 }
180
181 bool
182 Converter::isSignedType(nir_alu_type type)
183 {
184 return nir_alu_type_get_base_type(type) == nir_type_int;
185 }
186
187 bool
188 Converter::isResultFloat(nir_op op)
189 {
190 const nir_op_info &info = nir_op_infos[op];
191 if (info.output_type != nir_type_invalid)
192 return isFloatType(info.output_type);
193
194 ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
195 assert(false);
196 return true;
197 }
198
199 bool
200 Converter::isResultSigned(nir_op op)
201 {
202 switch (op) {
203 // there is no umul and we get wrong results if we treat all muls as signed
204 case nir_op_imul:
205 case nir_op_inot:
206 return false;
207 default:
208 const nir_op_info &info = nir_op_infos[op];
209 if (info.output_type != nir_type_invalid)
210 return isSignedType(info.output_type);
211 ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
212 assert(false);
213 return true;
214 }
215 }
216
217 DataType
218 Converter::getDType(nir_alu_instr *insn)
219 {
220 if (insn->dest.dest.is_ssa)
221 return getDType(insn->op, insn->dest.dest.ssa.bit_size);
222 else
223 return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
224 }
225
226 DataType
227 Converter::getDType(nir_intrinsic_instr *insn)
228 {
229 if (insn->dest.is_ssa)
230 return typeOfSize(insn->dest.ssa.bit_size / 8, false, false);
231 else
232 return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false);
233 }
234
235 DataType
236 Converter::getDType(nir_op op, uint8_t bitSize)
237 {
238 DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
239 if (ty == TYPE_NONE) {
240 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
241 assert(false);
242 }
243 return ty;
244 }
245
246 std::vector<DataType>
247 Converter::getSTypes(nir_alu_instr *insn)
248 {
249 const nir_op_info &info = nir_op_infos[insn->op];
250 std::vector<DataType> res(info.num_inputs);
251
252 for (uint8_t i = 0; i < info.num_inputs; ++i) {
253 if (info.input_types[i] != nir_type_invalid) {
254 res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
255 } else {
256 ERROR("getSType not implemented for %s idx %u\n", info.name, i);
257 assert(false);
258 res[i] = TYPE_NONE;
259 break;
260 }
261 }
262
263 return res;
264 }
265
266 DataType
267 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
268 {
269 uint8_t bitSize;
270 if (src.is_ssa)
271 bitSize = src.ssa->bit_size;
272 else
273 bitSize = src.reg.reg->bit_size;
274
275 DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
276 if (ty == TYPE_NONE) {
277 const char *str;
278 if (isFloat)
279 str = "float";
280 else if (isSigned)
281 str = "int";
282 else
283 str = "uint";
284 ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
285 assert(false);
286 }
287 return ty;
288 }
289
290 operation
291 Converter::getOperation(nir_op op)
292 {
293 switch (op) {
294 // basic ops with float and int variants
295 case nir_op_fabs:
296 case nir_op_iabs:
297 return OP_ABS;
298 case nir_op_fadd:
299 case nir_op_iadd:
300 return OP_ADD;
301 case nir_op_fand:
302 case nir_op_iand:
303 return OP_AND;
304 case nir_op_ifind_msb:
305 case nir_op_ufind_msb:
306 return OP_BFIND;
307 case nir_op_fceil:
308 return OP_CEIL;
309 case nir_op_fcos:
310 return OP_COS;
311 case nir_op_f2f32:
312 case nir_op_f2f64:
313 case nir_op_f2i32:
314 case nir_op_f2i64:
315 case nir_op_f2u32:
316 case nir_op_f2u64:
317 case nir_op_i2f32:
318 case nir_op_i2f64:
319 case nir_op_i2i32:
320 case nir_op_i2i64:
321 case nir_op_u2f32:
322 case nir_op_u2f64:
323 case nir_op_u2u32:
324 case nir_op_u2u64:
325 return OP_CVT;
326 case nir_op_fddx:
327 case nir_op_fddx_coarse:
328 case nir_op_fddx_fine:
329 return OP_DFDX;
330 case nir_op_fddy:
331 case nir_op_fddy_coarse:
332 case nir_op_fddy_fine:
333 return OP_DFDY;
334 case nir_op_fdiv:
335 case nir_op_idiv:
336 case nir_op_udiv:
337 return OP_DIV;
338 case nir_op_fexp2:
339 return OP_EX2;
340 case nir_op_ffloor:
341 return OP_FLOOR;
342 case nir_op_ffma:
343 return OP_FMA;
344 case nir_op_flog2:
345 return OP_LG2;
346 case nir_op_fmax:
347 case nir_op_imax:
348 case nir_op_umax:
349 return OP_MAX;
350 case nir_op_pack_64_2x32_split:
351 return OP_MERGE;
352 case nir_op_fmin:
353 case nir_op_imin:
354 case nir_op_umin:
355 return OP_MIN;
356 case nir_op_fmod:
357 case nir_op_imod:
358 case nir_op_umod:
359 case nir_op_frem:
360 case nir_op_irem:
361 return OP_MOD;
362 case nir_op_fmul:
363 case nir_op_imul:
364 case nir_op_imul_high:
365 case nir_op_umul_high:
366 return OP_MUL;
367 case nir_op_fneg:
368 case nir_op_ineg:
369 return OP_NEG;
370 case nir_op_fnot:
371 case nir_op_inot:
372 return OP_NOT;
373 case nir_op_for:
374 case nir_op_ior:
375 return OP_OR;
376 case nir_op_fpow:
377 return OP_POW;
378 case nir_op_frcp:
379 return OP_RCP;
380 case nir_op_frsq:
381 return OP_RSQ;
382 case nir_op_fsat:
383 return OP_SAT;
384 case nir_op_feq32:
385 case nir_op_ieq32:
386 case nir_op_fge32:
387 case nir_op_ige32:
388 case nir_op_uge32:
389 case nir_op_flt32:
390 case nir_op_ilt32:
391 case nir_op_ult32:
392 case nir_op_fne32:
393 case nir_op_ine32:
394 return OP_SET;
395 case nir_op_ishl:
396 return OP_SHL;
397 case nir_op_ishr:
398 case nir_op_ushr:
399 return OP_SHR;
400 case nir_op_fsin:
401 return OP_SIN;
402 case nir_op_fsqrt:
403 return OP_SQRT;
404 case nir_op_fsub:
405 case nir_op_isub:
406 return OP_SUB;
407 case nir_op_ftrunc:
408 return OP_TRUNC;
409 case nir_op_fxor:
410 case nir_op_ixor:
411 return OP_XOR;
412 default:
413 ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
414 assert(false);
415 return OP_NOP;
416 }
417 }
418
419 operation
420 Converter::preOperationNeeded(nir_op op)
421 {
422 switch (op) {
423 case nir_op_fcos:
424 case nir_op_fsin:
425 return OP_PRESIN;
426 default:
427 return OP_NOP;
428 }
429 }
430
431 int
432 Converter::getSubOp(nir_op op)
433 {
434 switch (op) {
435 case nir_op_imul_high:
436 case nir_op_umul_high:
437 return NV50_IR_SUBOP_MUL_HIGH;
438 default:
439 return 0;
440 }
441 }
442
443 CondCode
444 Converter::getCondCode(nir_op op)
445 {
446 switch (op) {
447 case nir_op_feq32:
448 case nir_op_ieq32:
449 return CC_EQ;
450 case nir_op_fge32:
451 case nir_op_ige32:
452 case nir_op_uge32:
453 return CC_GE;
454 case nir_op_flt32:
455 case nir_op_ilt32:
456 case nir_op_ult32:
457 return CC_LT;
458 case nir_op_fne32:
459 return CC_NEU;
460 case nir_op_ine32:
461 return CC_NE;
462 default:
463 ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
464 assert(false);
465 return CC_FL;
466 }
467 }
468
469 Converter::LValues&
470 Converter::convert(nir_alu_dest *dest)
471 {
472 return convert(&dest->dest);
473 }
474
475 Converter::LValues&
476 Converter::convert(nir_dest *dest)
477 {
478 if (dest->is_ssa)
479 return convert(&dest->ssa);
480 if (dest->reg.indirect) {
481 ERROR("no support for indirects.");
482 assert(false);
483 }
484 return convert(dest->reg.reg);
485 }
486
487 Converter::LValues&
488 Converter::convert(nir_register *reg)
489 {
490 NirDefMap::iterator it = regDefs.find(reg->index);
491 if (it != regDefs.end())
492 return it->second;
493
494 LValues newDef(reg->num_components);
495 for (uint8_t i = 0; i < reg->num_components; i++)
496 newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
497 return regDefs[reg->index] = newDef;
498 }
499
500 Converter::LValues&
501 Converter::convert(nir_ssa_def *def)
502 {
503 NirDefMap::iterator it = ssaDefs.find(def->index);
504 if (it != ssaDefs.end())
505 return it->second;
506
507 LValues newDef(def->num_components);
508 for (uint8_t i = 0; i < def->num_components; i++)
509 newDef[i] = getSSA(std::max(4, def->bit_size / 8));
510 return ssaDefs[def->index] = newDef;
511 }
512
513 Value*
514 Converter::getSrc(nir_alu_src *src, uint8_t component)
515 {
516 if (src->abs || src->negate) {
517 ERROR("modifiers currently not supported on nir_alu_src\n");
518 assert(false);
519 }
520 return getSrc(&src->src, src->swizzle[component]);
521 }
522
523 Value*
524 Converter::getSrc(nir_register *reg, uint8_t idx)
525 {
526 NirDefMap::iterator it = regDefs.find(reg->index);
527 if (it == regDefs.end())
528 return convert(reg)[idx];
529 return it->second[idx];
530 }
531
532 Value*
533 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
534 {
535 if (src->is_ssa)
536 return getSrc(src->ssa, idx);
537
538 if (src->reg.indirect) {
539 if (indirect)
540 return getSrc(src->reg.indirect, idx);
541 ERROR("no support for indirects.");
542 assert(false);
543 return NULL;
544 }
545
546 return getSrc(src->reg.reg, idx);
547 }
548
549 Value*
550 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
551 {
552 NirDefMap::iterator it = ssaDefs.find(src->index);
553 if (it == ssaDefs.end()) {
554 ERROR("SSA value %u not found\n", src->index);
555 assert(false);
556 return NULL;
557 }
558 return it->second[idx];
559 }
560
561 uint32_t
562 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
563 {
564 nir_const_value *offset = nir_src_as_const_value(*src);
565
566 if (offset) {
567 indirect = NULL;
568 return offset->u32[0];
569 }
570
571 indirect = getSrc(src, idx, true);
572 return 0;
573 }
574
575 uint32_t
576 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect)
577 {
578 int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
579 if (indirect)
580 indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
581 return idx;
582 }
583
584 static void
585 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
586 {
587 assert(name && index);
588
589 if (slot >= VERT_ATTRIB_MAX) {
590 ERROR("invalid varying slot %u\n", slot);
591 assert(false);
592 return;
593 }
594
595 if (slot >= VERT_ATTRIB_GENERIC0 &&
596 slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
597 *name = TGSI_SEMANTIC_GENERIC;
598 *index = slot - VERT_ATTRIB_GENERIC0;
599 return;
600 }
601
602 if (slot >= VERT_ATTRIB_TEX0 &&
603 slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
604 *name = TGSI_SEMANTIC_TEXCOORD;
605 *index = slot - VERT_ATTRIB_TEX0;
606 return;
607 }
608
609 switch (slot) {
610 case VERT_ATTRIB_COLOR0:
611 *name = TGSI_SEMANTIC_COLOR;
612 *index = 0;
613 break;
614 case VERT_ATTRIB_COLOR1:
615 *name = TGSI_SEMANTIC_COLOR;
616 *index = 1;
617 break;
618 case VERT_ATTRIB_EDGEFLAG:
619 *name = TGSI_SEMANTIC_EDGEFLAG;
620 *index = 0;
621 break;
622 case VERT_ATTRIB_FOG:
623 *name = TGSI_SEMANTIC_FOG;
624 *index = 0;
625 break;
626 case VERT_ATTRIB_NORMAL:
627 *name = TGSI_SEMANTIC_NORMAL;
628 *index = 0;
629 break;
630 case VERT_ATTRIB_POS:
631 *name = TGSI_SEMANTIC_POSITION;
632 *index = 0;
633 break;
634 case VERT_ATTRIB_POINT_SIZE:
635 *name = TGSI_SEMANTIC_PSIZE;
636 *index = 0;
637 break;
638 default:
639 ERROR("unknown vert attrib slot %u\n", slot);
640 assert(false);
641 break;
642 }
643 }
644
645 static void
646 varying_slot_to_tgsi_semantic(gl_varying_slot slot, unsigned *name, unsigned *index)
647 {
648 assert(name && index);
649
650 if (slot >= VARYING_SLOT_TESS_MAX) {
651 ERROR("invalid varying slot %u\n", slot);
652 assert(false);
653 return;
654 }
655
656 if (slot >= VARYING_SLOT_PATCH0) {
657 *name = TGSI_SEMANTIC_PATCH;
658 *index = slot - VARYING_SLOT_PATCH0;
659 return;
660 }
661
662 if (slot >= VARYING_SLOT_VAR0) {
663 *name = TGSI_SEMANTIC_GENERIC;
664 *index = slot - VARYING_SLOT_VAR0;
665 return;
666 }
667
668 if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
669 *name = TGSI_SEMANTIC_TEXCOORD;
670 *index = slot - VARYING_SLOT_TEX0;
671 return;
672 }
673
674 switch (slot) {
675 case VARYING_SLOT_BFC0:
676 *name = TGSI_SEMANTIC_BCOLOR;
677 *index = 0;
678 break;
679 case VARYING_SLOT_BFC1:
680 *name = TGSI_SEMANTIC_BCOLOR;
681 *index = 1;
682 break;
683 case VARYING_SLOT_CLIP_DIST0:
684 *name = TGSI_SEMANTIC_CLIPDIST;
685 *index = 0;
686 break;
687 case VARYING_SLOT_CLIP_DIST1:
688 *name = TGSI_SEMANTIC_CLIPDIST;
689 *index = 1;
690 break;
691 case VARYING_SLOT_CLIP_VERTEX:
692 *name = TGSI_SEMANTIC_CLIPVERTEX;
693 *index = 0;
694 break;
695 case VARYING_SLOT_COL0:
696 *name = TGSI_SEMANTIC_COLOR;
697 *index = 0;
698 break;
699 case VARYING_SLOT_COL1:
700 *name = TGSI_SEMANTIC_COLOR;
701 *index = 1;
702 break;
703 case VARYING_SLOT_EDGE:
704 *name = TGSI_SEMANTIC_EDGEFLAG;
705 *index = 0;
706 break;
707 case VARYING_SLOT_FACE:
708 *name = TGSI_SEMANTIC_FACE;
709 *index = 0;
710 break;
711 case VARYING_SLOT_FOGC:
712 *name = TGSI_SEMANTIC_FOG;
713 *index = 0;
714 break;
715 case VARYING_SLOT_LAYER:
716 *name = TGSI_SEMANTIC_LAYER;
717 *index = 0;
718 break;
719 case VARYING_SLOT_PNTC:
720 *name = TGSI_SEMANTIC_PCOORD;
721 *index = 0;
722 break;
723 case VARYING_SLOT_POS:
724 *name = TGSI_SEMANTIC_POSITION;
725 *index = 0;
726 break;
727 case VARYING_SLOT_PRIMITIVE_ID:
728 *name = TGSI_SEMANTIC_PRIMID;
729 *index = 0;
730 break;
731 case VARYING_SLOT_PSIZ:
732 *name = TGSI_SEMANTIC_PSIZE;
733 *index = 0;
734 break;
735 case VARYING_SLOT_TESS_LEVEL_INNER:
736 *name = TGSI_SEMANTIC_TESSINNER;
737 *index = 0;
738 break;
739 case VARYING_SLOT_TESS_LEVEL_OUTER:
740 *name = TGSI_SEMANTIC_TESSOUTER;
741 *index = 0;
742 break;
743 case VARYING_SLOT_VIEWPORT:
744 *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
745 *index = 0;
746 break;
747 default:
748 ERROR("unknown varying slot %u\n", slot);
749 assert(false);
750 break;
751 }
752 }
753
754 static void
755 frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
756 {
757 if (slot >= FRAG_RESULT_DATA0) {
758 *name = TGSI_SEMANTIC_COLOR;
759 *index = slot - FRAG_RESULT_COLOR - 2; // intentional
760 return;
761 }
762
763 switch (slot) {
764 case FRAG_RESULT_COLOR:
765 *name = TGSI_SEMANTIC_COLOR;
766 *index = 0;
767 break;
768 case FRAG_RESULT_DEPTH:
769 *name = TGSI_SEMANTIC_POSITION;
770 *index = 0;
771 break;
772 case FRAG_RESULT_SAMPLE_MASK:
773 *name = TGSI_SEMANTIC_SAMPLEMASK;
774 *index = 0;
775 break;
776 default:
777 ERROR("unknown frag result slot %u\n", slot);
778 assert(false);
779 break;
780 }
781 }
782
783 // copy of _mesa_sysval_to_semantic
784 static void
785 system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
786 {
787 *index = 0;
788 switch (val) {
789 // Vertex shader
790 case SYSTEM_VALUE_VERTEX_ID:
791 *name = TGSI_SEMANTIC_VERTEXID;
792 break;
793 case SYSTEM_VALUE_INSTANCE_ID:
794 *name = TGSI_SEMANTIC_INSTANCEID;
795 break;
796 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
797 *name = TGSI_SEMANTIC_VERTEXID_NOBASE;
798 break;
799 case SYSTEM_VALUE_BASE_VERTEX:
800 *name = TGSI_SEMANTIC_BASEVERTEX;
801 break;
802 case SYSTEM_VALUE_BASE_INSTANCE:
803 *name = TGSI_SEMANTIC_BASEINSTANCE;
804 break;
805 case SYSTEM_VALUE_DRAW_ID:
806 *name = TGSI_SEMANTIC_DRAWID;
807 break;
808
809 // Geometry shader
810 case SYSTEM_VALUE_INVOCATION_ID:
811 *name = TGSI_SEMANTIC_INVOCATIONID;
812 break;
813
814 // Fragment shader
815 case SYSTEM_VALUE_FRAG_COORD:
816 *name = TGSI_SEMANTIC_POSITION;
817 break;
818 case SYSTEM_VALUE_FRONT_FACE:
819 *name = TGSI_SEMANTIC_FACE;
820 break;
821 case SYSTEM_VALUE_SAMPLE_ID:
822 *name = TGSI_SEMANTIC_SAMPLEID;
823 break;
824 case SYSTEM_VALUE_SAMPLE_POS:
825 *name = TGSI_SEMANTIC_SAMPLEPOS;
826 break;
827 case SYSTEM_VALUE_SAMPLE_MASK_IN:
828 *name = TGSI_SEMANTIC_SAMPLEMASK;
829 break;
830 case SYSTEM_VALUE_HELPER_INVOCATION:
831 *name = TGSI_SEMANTIC_HELPER_INVOCATION;
832 break;
833
834 // Tessellation shader
835 case SYSTEM_VALUE_TESS_COORD:
836 *name = TGSI_SEMANTIC_TESSCOORD;
837 break;
838 case SYSTEM_VALUE_VERTICES_IN:
839 *name = TGSI_SEMANTIC_VERTICESIN;
840 break;
841 case SYSTEM_VALUE_PRIMITIVE_ID:
842 *name = TGSI_SEMANTIC_PRIMID;
843 break;
844 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
845 *name = TGSI_SEMANTIC_TESSOUTER;
846 break;
847 case SYSTEM_VALUE_TESS_LEVEL_INNER:
848 *name = TGSI_SEMANTIC_TESSINNER;
849 break;
850
851 // Compute shader
852 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
853 *name = TGSI_SEMANTIC_THREAD_ID;
854 break;
855 case SYSTEM_VALUE_WORK_GROUP_ID:
856 *name = TGSI_SEMANTIC_BLOCK_ID;
857 break;
858 case SYSTEM_VALUE_NUM_WORK_GROUPS:
859 *name = TGSI_SEMANTIC_GRID_SIZE;
860 break;
861 case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
862 *name = TGSI_SEMANTIC_BLOCK_SIZE;
863 break;
864
865 // ARB_shader_ballot
866 case SYSTEM_VALUE_SUBGROUP_SIZE:
867 *name = TGSI_SEMANTIC_SUBGROUP_SIZE;
868 break;
869 case SYSTEM_VALUE_SUBGROUP_INVOCATION:
870 *name = TGSI_SEMANTIC_SUBGROUP_INVOCATION;
871 break;
872 case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
873 *name = TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
874 break;
875 case SYSTEM_VALUE_SUBGROUP_GE_MASK:
876 *name = TGSI_SEMANTIC_SUBGROUP_GE_MASK;
877 break;
878 case SYSTEM_VALUE_SUBGROUP_GT_MASK:
879 *name = TGSI_SEMANTIC_SUBGROUP_GT_MASK;
880 break;
881 case SYSTEM_VALUE_SUBGROUP_LE_MASK:
882 *name = TGSI_SEMANTIC_SUBGROUP_LE_MASK;
883 break;
884 case SYSTEM_VALUE_SUBGROUP_LT_MASK:
885 *name = TGSI_SEMANTIC_SUBGROUP_LT_MASK;
886 break;
887
888 default:
889 ERROR("unknown system value %u\n", val);
890 assert(false);
891 break;
892 }
893 }
894
895 void
896 Converter::setInterpolate(nv50_ir_varying *var,
897 uint8_t mode,
898 bool centroid,
899 unsigned semantic)
900 {
901 switch (mode) {
902 case INTERP_MODE_FLAT:
903 var->flat = 1;
904 break;
905 case INTERP_MODE_NONE:
906 if (semantic == TGSI_SEMANTIC_COLOR)
907 var->sc = 1;
908 else if (semantic == TGSI_SEMANTIC_POSITION)
909 var->linear = 1;
910 break;
911 case INTERP_MODE_NOPERSPECTIVE:
912 var->linear = 1;
913 break;
914 case INTERP_MODE_SMOOTH:
915 break;
916 }
917 var->centroid = centroid;
918 }
919
920 static uint16_t
921 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
922 bool input, const nir_variable *var)
923 {
924 if (!type->is_array())
925 return type->count_attribute_slots(false);
926
927 uint16_t slots;
928 switch (stage) {
929 case Program::TYPE_GEOMETRY:
930 slots = type->uniform_locations();
931 if (input)
932 slots /= info.gs.vertices_in;
933 break;
934 case Program::TYPE_TESSELLATION_CONTROL:
935 case Program::TYPE_TESSELLATION_EVAL:
936 // remove first dimension
937 if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
938 slots = type->uniform_locations();
939 else
940 slots = type->fields.array->uniform_locations();
941 break;
942 default:
943 slots = type->count_attribute_slots(false);
944 break;
945 }
946
947 return slots;
948 }
949
950 bool Converter::assignSlots() {
951 unsigned name;
952 unsigned index;
953
954 info->io.viewportId = -1;
955 info->numInputs = 0;
956
957 // we have to fixup the uniform locations for arrays
958 unsigned numImages = 0;
959 nir_foreach_variable(var, &nir->uniforms) {
960 const glsl_type *type = var->type;
961 if (!type->without_array()->is_image())
962 continue;
963 var->data.driver_location = numImages;
964 numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
965 }
966
967 nir_foreach_variable(var, &nir->inputs) {
968 const glsl_type *type = var->type;
969 int slot = var->data.location;
970 uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
971 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
972 : type->component_slots();
973 uint32_t frac = var->data.location_frac;
974 uint32_t vary = var->data.driver_location;
975
976 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
977 if (comp > 2)
978 slots *= 2;
979 }
980
981 assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
982
983 switch(prog->getType()) {
984 case Program::TYPE_FRAGMENT:
985 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
986 for (uint16_t i = 0; i < slots; ++i) {
987 setInterpolate(&info->in[vary + i], var->data.interpolation,
988 var->data.centroid | var->data.sample, name);
989 }
990 break;
991 case Program::TYPE_GEOMETRY:
992 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
993 break;
994 case Program::TYPE_TESSELLATION_CONTROL:
995 case Program::TYPE_TESSELLATION_EVAL:
996 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
997 if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
998 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
999 break;
1000 case Program::TYPE_VERTEX:
1001 vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1002 switch (name) {
1003 case TGSI_SEMANTIC_EDGEFLAG:
1004 info->io.edgeFlagIn = vary;
1005 break;
1006 default:
1007 break;
1008 }
1009 break;
1010 default:
1011 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1012 return false;
1013 }
1014
1015 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1016 info->in[vary].id = vary;
1017 info->in[vary].patch = var->data.patch;
1018 info->in[vary].sn = name;
1019 info->in[vary].si = index + i;
1020 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1021 if (i & 0x1)
1022 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1023 else
1024 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1025 else
1026 info->in[vary].mask |= ((1 << comp) - 1) << frac;
1027 }
1028 info->numInputs = std::max<uint8_t>(info->numInputs, vary);
1029 }
1030
1031 info->numOutputs = 0;
1032 nir_foreach_variable(var, &nir->outputs) {
1033 const glsl_type *type = var->type;
1034 int slot = var->data.location;
1035 uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1036 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1037 : type->component_slots();
1038 uint32_t frac = var->data.location_frac;
1039 uint32_t vary = var->data.driver_location;
1040
1041 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1042 if (comp > 2)
1043 slots *= 2;
1044 }
1045
1046 assert(vary < PIPE_MAX_SHADER_OUTPUTS);
1047
1048 switch(prog->getType()) {
1049 case Program::TYPE_FRAGMENT:
1050 frag_result_to_tgsi_semantic((gl_frag_result)slot, &name, &index);
1051 switch (name) {
1052 case TGSI_SEMANTIC_COLOR:
1053 if (!var->data.fb_fetch_output)
1054 info->prop.fp.numColourResults++;
1055 info->prop.fp.separateFragData = true;
1056 // sometimes we get FRAG_RESULT_DATAX with data.index 0
1057 // sometimes we get FRAG_RESULT_DATA0 with data.index X
1058 index = index == 0 ? var->data.index : index;
1059 break;
1060 case TGSI_SEMANTIC_POSITION:
1061 info->io.fragDepth = vary;
1062 info->prop.fp.writesDepth = true;
1063 break;
1064 case TGSI_SEMANTIC_SAMPLEMASK:
1065 info->io.sampleMask = vary;
1066 break;
1067 default:
1068 break;
1069 }
1070 break;
1071 case Program::TYPE_GEOMETRY:
1072 case Program::TYPE_TESSELLATION_CONTROL:
1073 case Program::TYPE_TESSELLATION_EVAL:
1074 case Program::TYPE_VERTEX:
1075 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1076
1077 if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1078 name != TGSI_SEMANTIC_TESSOUTER)
1079 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1080
1081 switch (name) {
1082 case TGSI_SEMANTIC_CLIPDIST:
1083 info->io.genUserClip = -1;
1084 break;
1085 case TGSI_SEMANTIC_EDGEFLAG:
1086 info->io.edgeFlagOut = vary;
1087 break;
1088 default:
1089 break;
1090 }
1091 break;
1092 default:
1093 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1094 return false;
1095 }
1096
1097 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1098 info->out[vary].id = vary;
1099 info->out[vary].patch = var->data.patch;
1100 info->out[vary].sn = name;
1101 info->out[vary].si = index + i;
1102 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1103 if (i & 0x1)
1104 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1105 else
1106 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1107 else
1108 info->out[vary].mask |= ((1 << comp) - 1) << frac;
1109
1110 if (nir->info.outputs_read & 1ll << slot)
1111 info->out[vary].oread = 1;
1112 }
1113 info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
1114 }
1115
1116 info->numSysVals = 0;
1117 for (uint8_t i = 0; i < 64; ++i) {
1118 if (!(nir->info.system_values_read & 1ll << i))
1119 continue;
1120
1121 system_val_to_tgsi_semantic(i, &name, &index);
1122 info->sv[info->numSysVals].sn = name;
1123 info->sv[info->numSysVals].si = index;
1124 info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
1125
1126 switch (i) {
1127 case SYSTEM_VALUE_INSTANCE_ID:
1128 info->io.instanceId = info->numSysVals;
1129 break;
1130 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1131 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1132 info->sv[info->numSysVals].patch = 1;
1133 break;
1134 case SYSTEM_VALUE_VERTEX_ID:
1135 info->io.vertexId = info->numSysVals;
1136 break;
1137 default:
1138 break;
1139 }
1140
1141 info->numSysVals += 1;
1142 }
1143
1144 if (info->io.genUserClip > 0) {
1145 info->io.clipDistances = info->io.genUserClip;
1146
1147 const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1148
1149 for (unsigned int n = 0; n < nOut; ++n) {
1150 unsigned int i = info->numOutputs++;
1151 info->out[i].id = i;
1152 info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1153 info->out[i].si = n;
1154 info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1155 }
1156 }
1157
1158 return info->assignSlots(info) == 0;
1159 }
1160
1161 uint32_t
1162 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1163 {
1164 DataType ty;
1165 int offset = nir_intrinsic_component(insn);
1166 bool input;
1167
1168 if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1169 ty = getDType(insn);
1170 else
1171 ty = getSType(insn->src[0], false, false);
1172
1173 switch (insn->intrinsic) {
1174 case nir_intrinsic_load_input:
1175 case nir_intrinsic_load_interpolated_input:
1176 case nir_intrinsic_load_per_vertex_input:
1177 input = true;
1178 break;
1179 case nir_intrinsic_load_output:
1180 case nir_intrinsic_load_per_vertex_output:
1181 case nir_intrinsic_store_output:
1182 case nir_intrinsic_store_per_vertex_output:
1183 input = false;
1184 break;
1185 default:
1186 ERROR("unknown intrinsic in getSlotAddress %s",
1187 nir_intrinsic_infos[insn->intrinsic].name);
1188 input = false;
1189 assert(false);
1190 break;
1191 }
1192
1193 if (typeSizeof(ty) == 8) {
1194 slot *= 2;
1195 slot += offset;
1196 if (slot >= 4) {
1197 idx += 1;
1198 slot -= 4;
1199 }
1200 } else {
1201 slot += offset;
1202 }
1203
1204 assert(slot < 4);
1205 assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
1206 assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
1207
1208 const nv50_ir_varying *vary = input ? info->in : info->out;
1209 return vary[idx].slot[slot] * 4;
1210 }
1211
1212 Instruction *
1213 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1214 uint32_t base, uint8_t c, Value *indirect0,
1215 Value *indirect1, bool patch)
1216 {
1217 unsigned int tySize = typeSizeof(ty);
1218
1219 if (tySize == 8 &&
1220 (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
1221 Value *lo = getSSA();
1222 Value *hi = getSSA();
1223
1224 Instruction *loi =
1225 mkLoad(TYPE_U32, lo,
1226 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1227 indirect0);
1228 loi->setIndirect(0, 1, indirect1);
1229 loi->perPatch = patch;
1230
1231 Instruction *hii =
1232 mkLoad(TYPE_U32, hi,
1233 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1234 indirect0);
1235 hii->setIndirect(0, 1, indirect1);
1236 hii->perPatch = patch;
1237
1238 return mkOp2(OP_MERGE, ty, def, lo, hi);
1239 } else {
1240 Instruction *ld =
1241 mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1242 ld->setIndirect(0, 1, indirect1);
1243 ld->perPatch = patch;
1244 return ld;
1245 }
1246 }
1247
1248 void
1249 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1250 DataType ty, Value *src, uint8_t idx, uint8_t c,
1251 Value *indirect0, Value *indirect1)
1252 {
1253 uint8_t size = typeSizeof(ty);
1254 uint32_t address = getSlotAddress(insn, idx, c);
1255
1256 if (size == 8 && indirect0) {
1257 Value *split[2];
1258 mkSplit(split, 4, src);
1259
1260 if (op == OP_EXPORT) {
1261 split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1262 split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1263 }
1264
1265 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1266 split[0])->perPatch = info->out[idx].patch;
1267 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1268 split[1])->perPatch = info->out[idx].patch;
1269 } else {
1270 if (op == OP_EXPORT)
1271 src = mkMov(getSSA(size), src, ty)->getDef(0);
1272 mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1273 src)->perPatch = info->out[idx].patch;
1274 }
1275 }
1276
1277 bool
1278 Converter::parseNIR()
1279 {
1280 info->io.clipDistances = nir->info.clip_distance_array_size;
1281 info->io.cullDistances = nir->info.cull_distance_array_size;
1282
1283 switch(prog->getType()) {
1284 case Program::TYPE_COMPUTE:
1285 info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
1286 info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
1287 info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
1288 info->bin.smemSize = nir->info.cs.shared_size;
1289 break;
1290 case Program::TYPE_FRAGMENT:
1291 info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1292 info->prop.fp.persampleInvocation =
1293 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
1294 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1295 info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1296 info->prop.fp.readsSampleLocations =
1297 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1298 info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
1299 info->prop.fp.usesSampleMaskIn =
1300 !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
1301 break;
1302 case Program::TYPE_GEOMETRY:
1303 info->prop.gp.inputPrim = nir->info.gs.input_primitive;
1304 info->prop.gp.instanceCount = nir->info.gs.invocations;
1305 info->prop.gp.maxVertices = nir->info.gs.vertices_out;
1306 info->prop.gp.outputPrim = nir->info.gs.output_primitive;
1307 break;
1308 case Program::TYPE_TESSELLATION_CONTROL:
1309 case Program::TYPE_TESSELLATION_EVAL:
1310 if (nir->info.tess.primitive_mode == GL_ISOLINES)
1311 info->prop.tp.domain = GL_LINES;
1312 else
1313 info->prop.tp.domain = nir->info.tess.primitive_mode;
1314 info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1315 info->prop.tp.outputPrim =
1316 nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1317 info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1318 info->prop.tp.winding = !nir->info.tess.ccw;
1319 break;
1320 case Program::TYPE_VERTEX:
1321 info->prop.vp.usesDrawParameters =
1322 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
1323 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
1324 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
1325 break;
1326 default:
1327 break;
1328 }
1329
1330 return true;
1331 }
1332
1333 bool
1334 Converter::visit(nir_function *function)
1335 {
1336 // we only support emiting the main function for now
1337 assert(!strcmp(function->name, "main"));
1338 assert(function->impl);
1339
1340 // usually the blocks will set everything up, but main is special
1341 BasicBlock *entry = new BasicBlock(prog->main);
1342 exit = new BasicBlock(prog->main);
1343 blocks[nir_start_block(function->impl)->index] = entry;
1344 prog->main->setEntry(entry);
1345 prog->main->setExit(exit);
1346
1347 setPosition(entry, true);
1348
1349 switch (prog->getType()) {
1350 case Program::TYPE_TESSELLATION_CONTROL:
1351 outBase = mkOp2v(
1352 OP_SUB, TYPE_U32, getSSA(),
1353 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1354 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1355 break;
1356 case Program::TYPE_FRAGMENT: {
1357 Symbol *sv = mkSysVal(SV_POSITION, 3);
1358 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1359 fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1360 break;
1361 }
1362 default:
1363 break;
1364 }
1365
1366 nir_index_ssa_defs(function->impl);
1367 foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1368 if (!visit(node))
1369 return false;
1370 }
1371
1372 bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1373 setPosition(exit, true);
1374
1375 // TODO: for non main function this needs to be a OP_RETURN
1376 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1377 return true;
1378 }
1379
1380 bool
1381 Converter::visit(nir_cf_node *node)
1382 {
1383 switch (node->type) {
1384 case nir_cf_node_block:
1385 return visit(nir_cf_node_as_block(node));
1386 case nir_cf_node_if:
1387 return visit(nir_cf_node_as_if(node));
1388 case nir_cf_node_loop:
1389 return visit(nir_cf_node_as_loop(node));
1390 default:
1391 ERROR("unknown nir_cf_node type %u\n", node->type);
1392 return false;
1393 }
1394 }
1395
1396 bool
1397 Converter::visit(nir_block *block)
1398 {
1399 if (!block->predecessors->entries && block->instr_list.is_empty())
1400 return true;
1401
1402 BasicBlock *bb = convert(block);
1403
1404 setPosition(bb, true);
1405 nir_foreach_instr(insn, block) {
1406 if (!visit(insn))
1407 return false;
1408 }
1409 return true;
1410 }
1411
1412 bool
1413 Converter::visit(nir_if *nif)
1414 {
1415 DataType sType = getSType(nif->condition, false, false);
1416 Value *src = getSrc(&nif->condition, 0);
1417
1418 nir_block *lastThen = nir_if_last_then_block(nif);
1419 nir_block *lastElse = nir_if_last_else_block(nif);
1420
1421 assert(!lastThen->successors[1]);
1422 assert(!lastElse->successors[1]);
1423
1424 BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1425 BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1426
1427 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1428 bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1429
1430 // we only insert joinats, if both nodes end up at the end of the if again.
1431 // the reason for this to not happens are breaks/continues/ret/... which
1432 // have their own handling
1433 if (lastThen->successors[0] == lastElse->successors[0])
1434 bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
1435 CC_ALWAYS, NULL);
1436
1437 mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1438
1439 foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1440 if (!visit(node))
1441 return false;
1442 }
1443 setPosition(convert(lastThen), true);
1444 if (!bb->getExit() ||
1445 !bb->getExit()->asFlow() ||
1446 bb->getExit()->asFlow()->op == OP_JOIN) {
1447 BasicBlock *tailBB = convert(lastThen->successors[0]);
1448 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1449 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1450 }
1451
1452 foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1453 if (!visit(node))
1454 return false;
1455 }
1456 setPosition(convert(lastElse), true);
1457 if (!bb->getExit() ||
1458 !bb->getExit()->asFlow() ||
1459 bb->getExit()->asFlow()->op == OP_JOIN) {
1460 BasicBlock *tailBB = convert(lastElse->successors[0]);
1461 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1462 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1463 }
1464
1465 if (lastThen->successors[0] == lastElse->successors[0]) {
1466 setPosition(convert(lastThen->successors[0]), true);
1467 mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1468 }
1469
1470 return true;
1471 }
1472
1473 bool
1474 Converter::visit(nir_loop *loop)
1475 {
1476 curLoopDepth += 1;
1477 func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1478
1479 BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1480 BasicBlock *tailBB =
1481 convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1482 bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1483
1484 mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1485 setPosition(loopBB, false);
1486 mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1487
1488 foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1489 if (!visit(node))
1490 return false;
1491 }
1492 Instruction *insn = bb->getExit();
1493 if (bb->cfg.incidentCount() != 0) {
1494 if (!insn || !insn->asFlow()) {
1495 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1496 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1497 } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
1498 tailBB->cfg.incidentCount() == 0) {
1499 // RA doesn't like having blocks around with no incident edge,
1500 // so we create a fake one to make it happy
1501 bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1502 }
1503 }
1504
1505 curLoopDepth -= 1;
1506
1507 return true;
1508 }
1509
1510 bool
1511 Converter::visit(nir_instr *insn)
1512 {
1513 switch (insn->type) {
1514 case nir_instr_type_alu:
1515 return visit(nir_instr_as_alu(insn));
1516 case nir_instr_type_intrinsic:
1517 return visit(nir_instr_as_intrinsic(insn));
1518 case nir_instr_type_jump:
1519 return visit(nir_instr_as_jump(insn));
1520 case nir_instr_type_load_const:
1521 return visit(nir_instr_as_load_const(insn));
1522 default:
1523 ERROR("unknown nir_instr type %u\n", insn->type);
1524 return false;
1525 }
1526 return true;
1527 }
1528
1529 bool
1530 Converter::visit(nir_intrinsic_instr *insn)
1531 {
1532 nir_intrinsic_op op = insn->intrinsic;
1533
1534 switch (op) {
1535 default:
1536 ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
1537 return false;
1538 }
1539
1540 return true;
1541 }
1542
1543 bool
1544 Converter::visit(nir_jump_instr *insn)
1545 {
1546 switch (insn->type) {
1547 case nir_jump_return:
1548 // TODO: this only works in the main function
1549 mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
1550 bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
1551 break;
1552 case nir_jump_break:
1553 case nir_jump_continue: {
1554 bool isBreak = insn->type == nir_jump_break;
1555 nir_block *block = insn->instr.block;
1556 assert(!block->successors[1]);
1557 BasicBlock *target = convert(block->successors[0]);
1558 mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
1559 bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
1560 break;
1561 }
1562 default:
1563 ERROR("unknown nir_jump_type %u\n", insn->type);
1564 return false;
1565 }
1566
1567 return true;
1568 }
1569
1570 bool
1571 Converter::visit(nir_load_const_instr *insn)
1572 {
1573 assert(insn->def.bit_size <= 64);
1574
1575 LValues &newDefs = convert(&insn->def);
1576 for (int i = 0; i < insn->def.num_components; i++) {
1577 switch (insn->def.bit_size) {
1578 case 64:
1579 loadImm(newDefs[i], insn->value.u64[i]);
1580 break;
1581 case 32:
1582 loadImm(newDefs[i], insn->value.u32[i]);
1583 break;
1584 case 16:
1585 loadImm(newDefs[i], insn->value.u16[i]);
1586 break;
1587 case 8:
1588 loadImm(newDefs[i], insn->value.u8[i]);
1589 break;
1590 }
1591 }
1592 return true;
1593 }
1594
1595 #define DEFAULT_CHECKS \
1596 if (insn->dest.dest.ssa.num_components > 1) { \
1597 ERROR("nir_alu_instr only supported with 1 component!\n"); \
1598 return false; \
1599 } \
1600 if (insn->dest.write_mask != 1) { \
1601 ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
1602 return false; \
1603 }
1604 bool
1605 Converter::visit(nir_alu_instr *insn)
1606 {
1607 const nir_op op = insn->op;
1608 const nir_op_info &info = nir_op_infos[op];
1609 DataType dType = getDType(insn);
1610 const std::vector<DataType> sTypes = getSTypes(insn);
1611
1612 Instruction *oldPos = this->bb->getExit();
1613
1614 switch (op) {
1615 case nir_op_fabs:
1616 case nir_op_iabs:
1617 case nir_op_fadd:
1618 case nir_op_iadd:
1619 case nir_op_fand:
1620 case nir_op_iand:
1621 case nir_op_fceil:
1622 case nir_op_fcos:
1623 case nir_op_fddx:
1624 case nir_op_fddx_coarse:
1625 case nir_op_fddx_fine:
1626 case nir_op_fddy:
1627 case nir_op_fddy_coarse:
1628 case nir_op_fddy_fine:
1629 case nir_op_fdiv:
1630 case nir_op_idiv:
1631 case nir_op_udiv:
1632 case nir_op_fexp2:
1633 case nir_op_ffloor:
1634 case nir_op_ffma:
1635 case nir_op_flog2:
1636 case nir_op_fmax:
1637 case nir_op_imax:
1638 case nir_op_umax:
1639 case nir_op_fmin:
1640 case nir_op_imin:
1641 case nir_op_umin:
1642 case nir_op_fmod:
1643 case nir_op_imod:
1644 case nir_op_umod:
1645 case nir_op_fmul:
1646 case nir_op_imul:
1647 case nir_op_imul_high:
1648 case nir_op_umul_high:
1649 case nir_op_fneg:
1650 case nir_op_ineg:
1651 case nir_op_fnot:
1652 case nir_op_inot:
1653 case nir_op_for:
1654 case nir_op_ior:
1655 case nir_op_pack_64_2x32_split:
1656 case nir_op_fpow:
1657 case nir_op_frcp:
1658 case nir_op_frem:
1659 case nir_op_irem:
1660 case nir_op_frsq:
1661 case nir_op_fsat:
1662 case nir_op_ishr:
1663 case nir_op_ushr:
1664 case nir_op_fsin:
1665 case nir_op_fsqrt:
1666 case nir_op_fsub:
1667 case nir_op_isub:
1668 case nir_op_ftrunc:
1669 case nir_op_ishl:
1670 case nir_op_fxor:
1671 case nir_op_ixor: {
1672 DEFAULT_CHECKS;
1673 LValues &newDefs = convert(&insn->dest);
1674 operation preOp = preOperationNeeded(op);
1675 if (preOp != OP_NOP) {
1676 assert(info.num_inputs < 2);
1677 Value *tmp = getSSA(typeSizeof(dType));
1678 Instruction *i0 = mkOp(preOp, dType, tmp);
1679 Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
1680 if (info.num_inputs) {
1681 i0->setSrc(0, getSrc(&insn->src[0]));
1682 i1->setSrc(0, tmp);
1683 }
1684 i1->subOp = getSubOp(op);
1685 } else {
1686 Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
1687 for (unsigned s = 0u; s < info.num_inputs; ++s) {
1688 i->setSrc(s, getSrc(&insn->src[s]));
1689 }
1690 i->subOp = getSubOp(op);
1691 }
1692 break;
1693 }
1694 case nir_op_ifind_msb:
1695 case nir_op_ufind_msb: {
1696 DEFAULT_CHECKS;
1697 LValues &newDefs = convert(&insn->dest);
1698 dType = sTypes[0];
1699 mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
1700 break;
1701 }
1702 case nir_op_fround_even: {
1703 DEFAULT_CHECKS;
1704 LValues &newDefs = convert(&insn->dest);
1705 mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
1706 break;
1707 }
1708 // convert instructions
1709 case nir_op_f2f32:
1710 case nir_op_f2i32:
1711 case nir_op_f2u32:
1712 case nir_op_i2f32:
1713 case nir_op_i2i32:
1714 case nir_op_u2f32:
1715 case nir_op_u2u32:
1716 case nir_op_f2f64:
1717 case nir_op_f2i64:
1718 case nir_op_f2u64:
1719 case nir_op_i2f64:
1720 case nir_op_i2i64:
1721 case nir_op_u2f64:
1722 case nir_op_u2u64: {
1723 DEFAULT_CHECKS;
1724 LValues &newDefs = convert(&insn->dest);
1725 Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
1726 if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
1727 i->rnd = ROUND_Z;
1728 i->sType = sTypes[0];
1729 break;
1730 }
1731 // compare instructions
1732 case nir_op_feq32:
1733 case nir_op_ieq32:
1734 case nir_op_fge32:
1735 case nir_op_ige32:
1736 case nir_op_uge32:
1737 case nir_op_flt32:
1738 case nir_op_ilt32:
1739 case nir_op_ult32:
1740 case nir_op_fne32:
1741 case nir_op_ine32: {
1742 DEFAULT_CHECKS;
1743 LValues &newDefs = convert(&insn->dest);
1744 Instruction *i = mkCmp(getOperation(op),
1745 getCondCode(op),
1746 dType,
1747 newDefs[0],
1748 dType,
1749 getSrc(&insn->src[0]),
1750 getSrc(&insn->src[1]));
1751 if (info.num_inputs == 3)
1752 i->setSrc(2, getSrc(&insn->src[2]));
1753 i->sType = sTypes[0];
1754 break;
1755 }
1756 // those are weird ALU ops and need special handling, because
1757 // 1. they are always componend based
1758 // 2. they basically just merge multiple values into one data type
1759 case nir_op_imov:
1760 case nir_op_fmov:
1761 case nir_op_vec2:
1762 case nir_op_vec3:
1763 case nir_op_vec4: {
1764 LValues &newDefs = convert(&insn->dest);
1765 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
1766 mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
1767 }
1768 break;
1769 }
1770 // (un)pack
1771 case nir_op_pack_64_2x32: {
1772 LValues &newDefs = convert(&insn->dest);
1773 Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
1774 merge->setSrc(0, getSrc(&insn->src[0], 0));
1775 merge->setSrc(1, getSrc(&insn->src[0], 1));
1776 break;
1777 }
1778 case nir_op_pack_half_2x16_split: {
1779 LValues &newDefs = convert(&insn->dest);
1780 Value *tmpH = getSSA();
1781 Value *tmpL = getSSA();
1782
1783 mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
1784 mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
1785 mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
1786 break;
1787 }
1788 case nir_op_unpack_half_2x16_split_x:
1789 case nir_op_unpack_half_2x16_split_y: {
1790 LValues &newDefs = convert(&insn->dest);
1791 Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
1792 if (op == nir_op_unpack_half_2x16_split_y)
1793 cvt->subOp = 1;
1794 break;
1795 }
1796 case nir_op_unpack_64_2x32: {
1797 LValues &newDefs = convert(&insn->dest);
1798 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
1799 break;
1800 }
1801 case nir_op_unpack_64_2x32_split_x: {
1802 LValues &newDefs = convert(&insn->dest);
1803 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
1804 break;
1805 }
1806 case nir_op_unpack_64_2x32_split_y: {
1807 LValues &newDefs = convert(&insn->dest);
1808 mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
1809 break;
1810 }
1811 // special instructions
1812 case nir_op_fsign:
1813 case nir_op_isign: {
1814 DEFAULT_CHECKS;
1815 DataType iType;
1816 if (::isFloatType(dType))
1817 iType = TYPE_F32;
1818 else
1819 iType = TYPE_S32;
1820
1821 LValues &newDefs = convert(&insn->dest);
1822 LValue *val0 = getScratch();
1823 LValue *val1 = getScratch();
1824 mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
1825 mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
1826
1827 if (dType == TYPE_F64) {
1828 mkOp2(OP_SUB, iType, val0, val0, val1);
1829 mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
1830 } else if (dType == TYPE_S64 || dType == TYPE_U64) {
1831 mkOp2(OP_SUB, iType, val0, val1, val0);
1832 mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
1833 mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
1834 } else if (::isFloatType(dType))
1835 mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
1836 else
1837 mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
1838 break;
1839 }
1840 case nir_op_fcsel:
1841 case nir_op_b32csel: {
1842 DEFAULT_CHECKS;
1843 LValues &newDefs = convert(&insn->dest);
1844 mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
1845 break;
1846 }
1847 case nir_op_ibitfield_extract:
1848 case nir_op_ubitfield_extract: {
1849 DEFAULT_CHECKS;
1850 Value *tmp = getSSA();
1851 LValues &newDefs = convert(&insn->dest);
1852 mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
1853 mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
1854 break;
1855 }
1856 case nir_op_bfm: {
1857 DEFAULT_CHECKS;
1858 LValues &newDefs = convert(&insn->dest);
1859 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
1860 break;
1861 }
1862 case nir_op_bitfield_insert: {
1863 DEFAULT_CHECKS;
1864 LValues &newDefs = convert(&insn->dest);
1865 LValue *temp = getSSA();
1866 mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
1867 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
1868 break;
1869 }
1870 case nir_op_bit_count: {
1871 DEFAULT_CHECKS;
1872 LValues &newDefs = convert(&insn->dest);
1873 mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
1874 break;
1875 }
1876 case nir_op_bitfield_reverse: {
1877 DEFAULT_CHECKS;
1878 LValues &newDefs = convert(&insn->dest);
1879 mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
1880 break;
1881 }
1882 case nir_op_find_lsb: {
1883 DEFAULT_CHECKS;
1884 LValues &newDefs = convert(&insn->dest);
1885 Value *tmp = getSSA();
1886 mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
1887 mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
1888 break;
1889 }
1890 // boolean conversions
1891 case nir_op_b2f32: {
1892 DEFAULT_CHECKS;
1893 LValues &newDefs = convert(&insn->dest);
1894 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
1895 break;
1896 }
1897 case nir_op_b2f64: {
1898 DEFAULT_CHECKS;
1899 LValues &newDefs = convert(&insn->dest);
1900 Value *tmp = getSSA(4);
1901 mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
1902 mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
1903 break;
1904 }
1905 case nir_op_f2b32:
1906 case nir_op_i2b32: {
1907 DEFAULT_CHECKS;
1908 LValues &newDefs = convert(&insn->dest);
1909 Value *src1;
1910 if (typeSizeof(sTypes[0]) == 8) {
1911 src1 = loadImm(getSSA(8), 0.0);
1912 } else {
1913 src1 = zero;
1914 }
1915 CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
1916 mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
1917 break;
1918 }
1919 case nir_op_b2i32: {
1920 DEFAULT_CHECKS;
1921 LValues &newDefs = convert(&insn->dest);
1922 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
1923 break;
1924 }
1925 case nir_op_b2i64: {
1926 DEFAULT_CHECKS;
1927 LValues &newDefs = convert(&insn->dest);
1928 LValue *def = getScratch();
1929 mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
1930 mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
1931 break;
1932 }
1933 default:
1934 ERROR("unknown nir_op %s\n", info.name);
1935 return false;
1936 }
1937
1938 if (!oldPos) {
1939 oldPos = this->bb->getEntry();
1940 oldPos->precise = insn->exact;
1941 }
1942
1943 if (unlikely(!oldPos))
1944 return true;
1945
1946 while (oldPos->next) {
1947 oldPos = oldPos->next;
1948 oldPos->precise = insn->exact;
1949 }
1950 oldPos->saturate = insn->dest.saturate;
1951
1952 return true;
1953 }
1954 #undef DEFAULT_CHECKS
1955
1956 bool
1957 Converter::run()
1958 {
1959 bool progress;
1960
1961 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1962 nir_print_shader(nir, stderr);
1963
1964 NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
1965 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
1966 NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
1967 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
1968 NIR_PASS_V(nir, nir_lower_alu_to_scalar);
1969 NIR_PASS_V(nir, nir_lower_phis_to_scalar);
1970
1971 do {
1972 progress = false;
1973 NIR_PASS(progress, nir, nir_copy_prop);
1974 NIR_PASS(progress, nir, nir_opt_remove_phis);
1975 NIR_PASS(progress, nir, nir_opt_trivial_continues);
1976 NIR_PASS(progress, nir, nir_opt_cse);
1977 NIR_PASS(progress, nir, nir_opt_algebraic);
1978 NIR_PASS(progress, nir, nir_opt_constant_folding);
1979 NIR_PASS(progress, nir, nir_copy_prop);
1980 NIR_PASS(progress, nir, nir_opt_dce);
1981 NIR_PASS(progress, nir, nir_opt_dead_cf);
1982 } while (progress);
1983
1984 NIR_PASS_V(nir, nir_lower_bool_to_int32);
1985 NIR_PASS_V(nir, nir_lower_locals_to_regs);
1986 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
1987 NIR_PASS_V(nir, nir_convert_from_ssa, true);
1988
1989 // Garbage collect dead instructions
1990 nir_sweep(nir);
1991
1992 if (!parseNIR()) {
1993 ERROR("Couldn't prase NIR!\n");
1994 return false;
1995 }
1996
1997 if (!assignSlots()) {
1998 ERROR("Couldn't assign slots!\n");
1999 return false;
2000 }
2001
2002 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
2003 nir_print_shader(nir, stderr);
2004
2005 nir_foreach_function(function, nir) {
2006 if (!visit(function))
2007 return false;
2008 }
2009
2010 return true;
2011 }
2012
2013 } // unnamed namespace
2014
2015 namespace nv50_ir {
2016
2017 bool
2018 Program::makeFromNIR(struct nv50_ir_prog_info *info)
2019 {
2020 nir_shader *nir = (nir_shader*)info->bin.source;
2021 Converter converter(this, nir, info);
2022 bool result = converter.run();
2023 if (!result)
2024 return result;
2025 LoweringHelper lowering;
2026 lowering.run(this);
2027 tlsSize = info->bin.tlsSpace;
2028 return result;
2029 }
2030
2031 } // namespace nv50_ir