nv50/ir/nir: implement geometry shader nir_intrinsics
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_from_nir.cpp
1 /*
2 * Copyright 2017 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Karol Herbst <kherbst@redhat.com>
23 */
24
25 #include "compiler/nir/nir.h"
26
27 #include "util/u_debug.h"
28
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_from_common.h"
31 #include "codegen/nv50_ir_lowering_helper.h"
32 #include "codegen/nv50_ir_util.h"
33
34 #if __cplusplus >= 201103L
35 #include <unordered_map>
36 #else
37 #include <tr1/unordered_map>
38 #endif
39 #include <vector>
40
41 namespace {
42
43 #if __cplusplus >= 201103L
44 using std::hash;
45 using std::unordered_map;
46 #else
47 using std::tr1::hash;
48 using std::tr1::unordered_map;
49 #endif
50
51 using namespace nv50_ir;
52
53 int
54 type_size(const struct glsl_type *type)
55 {
56 return glsl_count_attribute_slots(type, false);
57 }
58
59 class Converter : public ConverterCommon
60 {
61 public:
62 Converter(Program *, nir_shader *, nv50_ir_prog_info *);
63
64 bool run();
65 private:
66 typedef std::vector<LValue*> LValues;
67 typedef unordered_map<unsigned, LValues> NirDefMap;
68 typedef unordered_map<unsigned, uint32_t> NirArrayLMemOffsets;
69 typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
70
71 TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
72 LValues& convert(nir_alu_dest *);
73 BasicBlock* convert(nir_block *);
74 LValues& convert(nir_dest *);
75 SVSemantic convert(nir_intrinsic_op);
76 LValues& convert(nir_register *);
77 LValues& convert(nir_ssa_def *);
78
79 Value* getSrc(nir_alu_src *, uint8_t component = 0);
80 Value* getSrc(nir_register *, uint8_t);
81 Value* getSrc(nir_src *, uint8_t, bool indirect = false);
82 Value* getSrc(nir_ssa_def *, uint8_t);
83
84 // returned value is the constant part of the given source (either the
85 // nir_src or the selected source component of an intrinsic). Even though
86 // this is mostly an optimization to be able to skip indirects in a few
87 // cases, sometimes we require immediate values or set some fileds on
88 // instructions (e.g. tex) in order for codegen to consume those.
89 // If the found value has not a constant part, the Value gets returned
90 // through the Value parameter.
91 uint32_t getIndirect(nir_src *, uint8_t, Value *&);
92 uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&);
93
94 uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
95
96 void setInterpolate(nv50_ir_varying *,
97 uint8_t,
98 bool centroid,
99 unsigned semantics);
100
101 Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
102 uint8_t c, Value *indirect0 = NULL,
103 Value *indirect1 = NULL, bool patch = false);
104 void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
105 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
106 Value *indirect1 = NULL);
107
108 bool isFloatType(nir_alu_type);
109 bool isSignedType(nir_alu_type);
110 bool isResultFloat(nir_op);
111 bool isResultSigned(nir_op);
112
113 DataType getDType(nir_alu_instr *);
114 DataType getDType(nir_intrinsic_instr *);
115 DataType getDType(nir_op, uint8_t);
116
117 std::vector<DataType> getSTypes(nir_alu_instr *);
118 DataType getSType(nir_src &, bool isFloat, bool isSigned);
119
120 operation getOperation(nir_intrinsic_op);
121 operation getOperation(nir_op);
122 operation getOperation(nir_texop);
123 operation preOperationNeeded(nir_op);
124
125 int getSubOp(nir_intrinsic_op);
126 int getSubOp(nir_op);
127
128 CondCode getCondCode(nir_op);
129
130 bool assignSlots();
131 bool parseNIR();
132
133 bool visit(nir_alu_instr *);
134 bool visit(nir_block *);
135 bool visit(nir_cf_node *);
136 bool visit(nir_function *);
137 bool visit(nir_if *);
138 bool visit(nir_instr *);
139 bool visit(nir_intrinsic_instr *);
140 bool visit(nir_jump_instr *);
141 bool visit(nir_load_const_instr*);
142 bool visit(nir_loop *);
143 bool visit(nir_ssa_undef_instr *);
144 bool visit(nir_tex_instr *);
145
146 // tex stuff
147 Value* applyProjection(Value *src, Value *proj);
148
149 nir_shader *nir;
150
151 NirDefMap ssaDefs;
152 NirDefMap regDefs;
153 NirArrayLMemOffsets regToLmemOffset;
154 NirBlockMap blocks;
155 unsigned int curLoopDepth;
156
157 BasicBlock *exit;
158 Value *zero;
159
160 int clipVertexOutput;
161
162 union {
163 struct {
164 Value *position;
165 } fp;
166 };
167 };
168
169 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
170 : ConverterCommon(prog, info),
171 nir(nir),
172 curLoopDepth(0),
173 clipVertexOutput(-1)
174 {
175 zero = mkImm((uint32_t)0);
176 }
177
178 BasicBlock *
179 Converter::convert(nir_block *block)
180 {
181 NirBlockMap::iterator it = blocks.find(block->index);
182 if (it != blocks.end())
183 return it->second;
184
185 BasicBlock *bb = new BasicBlock(func);
186 blocks[block->index] = bb;
187 return bb;
188 }
189
190 bool
191 Converter::isFloatType(nir_alu_type type)
192 {
193 return nir_alu_type_get_base_type(type) == nir_type_float;
194 }
195
196 bool
197 Converter::isSignedType(nir_alu_type type)
198 {
199 return nir_alu_type_get_base_type(type) == nir_type_int;
200 }
201
202 bool
203 Converter::isResultFloat(nir_op op)
204 {
205 const nir_op_info &info = nir_op_infos[op];
206 if (info.output_type != nir_type_invalid)
207 return isFloatType(info.output_type);
208
209 ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
210 assert(false);
211 return true;
212 }
213
214 bool
215 Converter::isResultSigned(nir_op op)
216 {
217 switch (op) {
218 // there is no umul and we get wrong results if we treat all muls as signed
219 case nir_op_imul:
220 case nir_op_inot:
221 return false;
222 default:
223 const nir_op_info &info = nir_op_infos[op];
224 if (info.output_type != nir_type_invalid)
225 return isSignedType(info.output_type);
226 ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
227 assert(false);
228 return true;
229 }
230 }
231
232 DataType
233 Converter::getDType(nir_alu_instr *insn)
234 {
235 if (insn->dest.dest.is_ssa)
236 return getDType(insn->op, insn->dest.dest.ssa.bit_size);
237 else
238 return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
239 }
240
241 DataType
242 Converter::getDType(nir_intrinsic_instr *insn)
243 {
244 if (insn->dest.is_ssa)
245 return typeOfSize(insn->dest.ssa.bit_size / 8, false, false);
246 else
247 return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false);
248 }
249
250 DataType
251 Converter::getDType(nir_op op, uint8_t bitSize)
252 {
253 DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
254 if (ty == TYPE_NONE) {
255 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
256 assert(false);
257 }
258 return ty;
259 }
260
261 std::vector<DataType>
262 Converter::getSTypes(nir_alu_instr *insn)
263 {
264 const nir_op_info &info = nir_op_infos[insn->op];
265 std::vector<DataType> res(info.num_inputs);
266
267 for (uint8_t i = 0; i < info.num_inputs; ++i) {
268 if (info.input_types[i] != nir_type_invalid) {
269 res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
270 } else {
271 ERROR("getSType not implemented for %s idx %u\n", info.name, i);
272 assert(false);
273 res[i] = TYPE_NONE;
274 break;
275 }
276 }
277
278 return res;
279 }
280
281 DataType
282 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
283 {
284 uint8_t bitSize;
285 if (src.is_ssa)
286 bitSize = src.ssa->bit_size;
287 else
288 bitSize = src.reg.reg->bit_size;
289
290 DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
291 if (ty == TYPE_NONE) {
292 const char *str;
293 if (isFloat)
294 str = "float";
295 else if (isSigned)
296 str = "int";
297 else
298 str = "uint";
299 ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
300 assert(false);
301 }
302 return ty;
303 }
304
305 operation
306 Converter::getOperation(nir_op op)
307 {
308 switch (op) {
309 // basic ops with float and int variants
310 case nir_op_fabs:
311 case nir_op_iabs:
312 return OP_ABS;
313 case nir_op_fadd:
314 case nir_op_iadd:
315 return OP_ADD;
316 case nir_op_fand:
317 case nir_op_iand:
318 return OP_AND;
319 case nir_op_ifind_msb:
320 case nir_op_ufind_msb:
321 return OP_BFIND;
322 case nir_op_fceil:
323 return OP_CEIL;
324 case nir_op_fcos:
325 return OP_COS;
326 case nir_op_f2f32:
327 case nir_op_f2f64:
328 case nir_op_f2i32:
329 case nir_op_f2i64:
330 case nir_op_f2u32:
331 case nir_op_f2u64:
332 case nir_op_i2f32:
333 case nir_op_i2f64:
334 case nir_op_i2i32:
335 case nir_op_i2i64:
336 case nir_op_u2f32:
337 case nir_op_u2f64:
338 case nir_op_u2u32:
339 case nir_op_u2u64:
340 return OP_CVT;
341 case nir_op_fddx:
342 case nir_op_fddx_coarse:
343 case nir_op_fddx_fine:
344 return OP_DFDX;
345 case nir_op_fddy:
346 case nir_op_fddy_coarse:
347 case nir_op_fddy_fine:
348 return OP_DFDY;
349 case nir_op_fdiv:
350 case nir_op_idiv:
351 case nir_op_udiv:
352 return OP_DIV;
353 case nir_op_fexp2:
354 return OP_EX2;
355 case nir_op_ffloor:
356 return OP_FLOOR;
357 case nir_op_ffma:
358 return OP_FMA;
359 case nir_op_flog2:
360 return OP_LG2;
361 case nir_op_fmax:
362 case nir_op_imax:
363 case nir_op_umax:
364 return OP_MAX;
365 case nir_op_pack_64_2x32_split:
366 return OP_MERGE;
367 case nir_op_fmin:
368 case nir_op_imin:
369 case nir_op_umin:
370 return OP_MIN;
371 case nir_op_fmod:
372 case nir_op_imod:
373 case nir_op_umod:
374 case nir_op_frem:
375 case nir_op_irem:
376 return OP_MOD;
377 case nir_op_fmul:
378 case nir_op_imul:
379 case nir_op_imul_high:
380 case nir_op_umul_high:
381 return OP_MUL;
382 case nir_op_fneg:
383 case nir_op_ineg:
384 return OP_NEG;
385 case nir_op_fnot:
386 case nir_op_inot:
387 return OP_NOT;
388 case nir_op_for:
389 case nir_op_ior:
390 return OP_OR;
391 case nir_op_fpow:
392 return OP_POW;
393 case nir_op_frcp:
394 return OP_RCP;
395 case nir_op_frsq:
396 return OP_RSQ;
397 case nir_op_fsat:
398 return OP_SAT;
399 case nir_op_feq32:
400 case nir_op_ieq32:
401 case nir_op_fge32:
402 case nir_op_ige32:
403 case nir_op_uge32:
404 case nir_op_flt32:
405 case nir_op_ilt32:
406 case nir_op_ult32:
407 case nir_op_fne32:
408 case nir_op_ine32:
409 return OP_SET;
410 case nir_op_ishl:
411 return OP_SHL;
412 case nir_op_ishr:
413 case nir_op_ushr:
414 return OP_SHR;
415 case nir_op_fsin:
416 return OP_SIN;
417 case nir_op_fsqrt:
418 return OP_SQRT;
419 case nir_op_fsub:
420 case nir_op_isub:
421 return OP_SUB;
422 case nir_op_ftrunc:
423 return OP_TRUNC;
424 case nir_op_fxor:
425 case nir_op_ixor:
426 return OP_XOR;
427 default:
428 ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
429 assert(false);
430 return OP_NOP;
431 }
432 }
433
434 operation
435 Converter::getOperation(nir_texop op)
436 {
437 switch (op) {
438 case nir_texop_tex:
439 return OP_TEX;
440 case nir_texop_lod:
441 return OP_TXLQ;
442 case nir_texop_txb:
443 return OP_TXB;
444 case nir_texop_txd:
445 return OP_TXD;
446 case nir_texop_txf:
447 case nir_texop_txf_ms:
448 return OP_TXF;
449 case nir_texop_tg4:
450 return OP_TXG;
451 case nir_texop_txl:
452 return OP_TXL;
453 case nir_texop_query_levels:
454 case nir_texop_texture_samples:
455 case nir_texop_txs:
456 return OP_TXQ;
457 default:
458 ERROR("couldn't get operation for nir_texop %u\n", op);
459 assert(false);
460 return OP_NOP;
461 }
462 }
463
464 operation
465 Converter::getOperation(nir_intrinsic_op op)
466 {
467 switch (op) {
468 case nir_intrinsic_emit_vertex:
469 return OP_EMIT;
470 case nir_intrinsic_end_primitive:
471 return OP_RESTART;
472 default:
473 ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
474 assert(false);
475 return OP_NOP;
476 }
477 }
478
479 operation
480 Converter::preOperationNeeded(nir_op op)
481 {
482 switch (op) {
483 case nir_op_fcos:
484 case nir_op_fsin:
485 return OP_PRESIN;
486 default:
487 return OP_NOP;
488 }
489 }
490
491 int
492 Converter::getSubOp(nir_op op)
493 {
494 switch (op) {
495 case nir_op_imul_high:
496 case nir_op_umul_high:
497 return NV50_IR_SUBOP_MUL_HIGH;
498 default:
499 return 0;
500 }
501 }
502
503 int
504 Converter::getSubOp(nir_intrinsic_op op)
505 {
506 switch (op) {
507 case nir_intrinsic_vote_all:
508 return NV50_IR_SUBOP_VOTE_ALL;
509 case nir_intrinsic_vote_any:
510 return NV50_IR_SUBOP_VOTE_ANY;
511 case nir_intrinsic_vote_ieq:
512 return NV50_IR_SUBOP_VOTE_UNI;
513 default:
514 return 0;
515 }
516 }
517
518 CondCode
519 Converter::getCondCode(nir_op op)
520 {
521 switch (op) {
522 case nir_op_feq32:
523 case nir_op_ieq32:
524 return CC_EQ;
525 case nir_op_fge32:
526 case nir_op_ige32:
527 case nir_op_uge32:
528 return CC_GE;
529 case nir_op_flt32:
530 case nir_op_ilt32:
531 case nir_op_ult32:
532 return CC_LT;
533 case nir_op_fne32:
534 return CC_NEU;
535 case nir_op_ine32:
536 return CC_NE;
537 default:
538 ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
539 assert(false);
540 return CC_FL;
541 }
542 }
543
544 Converter::LValues&
545 Converter::convert(nir_alu_dest *dest)
546 {
547 return convert(&dest->dest);
548 }
549
550 Converter::LValues&
551 Converter::convert(nir_dest *dest)
552 {
553 if (dest->is_ssa)
554 return convert(&dest->ssa);
555 if (dest->reg.indirect) {
556 ERROR("no support for indirects.");
557 assert(false);
558 }
559 return convert(dest->reg.reg);
560 }
561
562 Converter::LValues&
563 Converter::convert(nir_register *reg)
564 {
565 NirDefMap::iterator it = regDefs.find(reg->index);
566 if (it != regDefs.end())
567 return it->second;
568
569 LValues newDef(reg->num_components);
570 for (uint8_t i = 0; i < reg->num_components; i++)
571 newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
572 return regDefs[reg->index] = newDef;
573 }
574
575 Converter::LValues&
576 Converter::convert(nir_ssa_def *def)
577 {
578 NirDefMap::iterator it = ssaDefs.find(def->index);
579 if (it != ssaDefs.end())
580 return it->second;
581
582 LValues newDef(def->num_components);
583 for (uint8_t i = 0; i < def->num_components; i++)
584 newDef[i] = getSSA(std::max(4, def->bit_size / 8));
585 return ssaDefs[def->index] = newDef;
586 }
587
588 Value*
589 Converter::getSrc(nir_alu_src *src, uint8_t component)
590 {
591 if (src->abs || src->negate) {
592 ERROR("modifiers currently not supported on nir_alu_src\n");
593 assert(false);
594 }
595 return getSrc(&src->src, src->swizzle[component]);
596 }
597
598 Value*
599 Converter::getSrc(nir_register *reg, uint8_t idx)
600 {
601 NirDefMap::iterator it = regDefs.find(reg->index);
602 if (it == regDefs.end())
603 return convert(reg)[idx];
604 return it->second[idx];
605 }
606
607 Value*
608 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
609 {
610 if (src->is_ssa)
611 return getSrc(src->ssa, idx);
612
613 if (src->reg.indirect) {
614 if (indirect)
615 return getSrc(src->reg.indirect, idx);
616 ERROR("no support for indirects.");
617 assert(false);
618 return NULL;
619 }
620
621 return getSrc(src->reg.reg, idx);
622 }
623
624 Value*
625 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
626 {
627 NirDefMap::iterator it = ssaDefs.find(src->index);
628 if (it == ssaDefs.end()) {
629 ERROR("SSA value %u not found\n", src->index);
630 assert(false);
631 return NULL;
632 }
633 return it->second[idx];
634 }
635
636 uint32_t
637 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
638 {
639 nir_const_value *offset = nir_src_as_const_value(*src);
640
641 if (offset) {
642 indirect = NULL;
643 return offset->u32[0];
644 }
645
646 indirect = getSrc(src, idx, true);
647 return 0;
648 }
649
650 uint32_t
651 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect)
652 {
653 int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
654 if (indirect)
655 indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
656 return idx;
657 }
658
659 static void
660 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
661 {
662 assert(name && index);
663
664 if (slot >= VERT_ATTRIB_MAX) {
665 ERROR("invalid varying slot %u\n", slot);
666 assert(false);
667 return;
668 }
669
670 if (slot >= VERT_ATTRIB_GENERIC0 &&
671 slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
672 *name = TGSI_SEMANTIC_GENERIC;
673 *index = slot - VERT_ATTRIB_GENERIC0;
674 return;
675 }
676
677 if (slot >= VERT_ATTRIB_TEX0 &&
678 slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
679 *name = TGSI_SEMANTIC_TEXCOORD;
680 *index = slot - VERT_ATTRIB_TEX0;
681 return;
682 }
683
684 switch (slot) {
685 case VERT_ATTRIB_COLOR0:
686 *name = TGSI_SEMANTIC_COLOR;
687 *index = 0;
688 break;
689 case VERT_ATTRIB_COLOR1:
690 *name = TGSI_SEMANTIC_COLOR;
691 *index = 1;
692 break;
693 case VERT_ATTRIB_EDGEFLAG:
694 *name = TGSI_SEMANTIC_EDGEFLAG;
695 *index = 0;
696 break;
697 case VERT_ATTRIB_FOG:
698 *name = TGSI_SEMANTIC_FOG;
699 *index = 0;
700 break;
701 case VERT_ATTRIB_NORMAL:
702 *name = TGSI_SEMANTIC_NORMAL;
703 *index = 0;
704 break;
705 case VERT_ATTRIB_POS:
706 *name = TGSI_SEMANTIC_POSITION;
707 *index = 0;
708 break;
709 case VERT_ATTRIB_POINT_SIZE:
710 *name = TGSI_SEMANTIC_PSIZE;
711 *index = 0;
712 break;
713 default:
714 ERROR("unknown vert attrib slot %u\n", slot);
715 assert(false);
716 break;
717 }
718 }
719
720 static void
721 varying_slot_to_tgsi_semantic(gl_varying_slot slot, unsigned *name, unsigned *index)
722 {
723 assert(name && index);
724
725 if (slot >= VARYING_SLOT_TESS_MAX) {
726 ERROR("invalid varying slot %u\n", slot);
727 assert(false);
728 return;
729 }
730
731 if (slot >= VARYING_SLOT_PATCH0) {
732 *name = TGSI_SEMANTIC_PATCH;
733 *index = slot - VARYING_SLOT_PATCH0;
734 return;
735 }
736
737 if (slot >= VARYING_SLOT_VAR0) {
738 *name = TGSI_SEMANTIC_GENERIC;
739 *index = slot - VARYING_SLOT_VAR0;
740 return;
741 }
742
743 if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
744 *name = TGSI_SEMANTIC_TEXCOORD;
745 *index = slot - VARYING_SLOT_TEX0;
746 return;
747 }
748
749 switch (slot) {
750 case VARYING_SLOT_BFC0:
751 *name = TGSI_SEMANTIC_BCOLOR;
752 *index = 0;
753 break;
754 case VARYING_SLOT_BFC1:
755 *name = TGSI_SEMANTIC_BCOLOR;
756 *index = 1;
757 break;
758 case VARYING_SLOT_CLIP_DIST0:
759 *name = TGSI_SEMANTIC_CLIPDIST;
760 *index = 0;
761 break;
762 case VARYING_SLOT_CLIP_DIST1:
763 *name = TGSI_SEMANTIC_CLIPDIST;
764 *index = 1;
765 break;
766 case VARYING_SLOT_CLIP_VERTEX:
767 *name = TGSI_SEMANTIC_CLIPVERTEX;
768 *index = 0;
769 break;
770 case VARYING_SLOT_COL0:
771 *name = TGSI_SEMANTIC_COLOR;
772 *index = 0;
773 break;
774 case VARYING_SLOT_COL1:
775 *name = TGSI_SEMANTIC_COLOR;
776 *index = 1;
777 break;
778 case VARYING_SLOT_EDGE:
779 *name = TGSI_SEMANTIC_EDGEFLAG;
780 *index = 0;
781 break;
782 case VARYING_SLOT_FACE:
783 *name = TGSI_SEMANTIC_FACE;
784 *index = 0;
785 break;
786 case VARYING_SLOT_FOGC:
787 *name = TGSI_SEMANTIC_FOG;
788 *index = 0;
789 break;
790 case VARYING_SLOT_LAYER:
791 *name = TGSI_SEMANTIC_LAYER;
792 *index = 0;
793 break;
794 case VARYING_SLOT_PNTC:
795 *name = TGSI_SEMANTIC_PCOORD;
796 *index = 0;
797 break;
798 case VARYING_SLOT_POS:
799 *name = TGSI_SEMANTIC_POSITION;
800 *index = 0;
801 break;
802 case VARYING_SLOT_PRIMITIVE_ID:
803 *name = TGSI_SEMANTIC_PRIMID;
804 *index = 0;
805 break;
806 case VARYING_SLOT_PSIZ:
807 *name = TGSI_SEMANTIC_PSIZE;
808 *index = 0;
809 break;
810 case VARYING_SLOT_TESS_LEVEL_INNER:
811 *name = TGSI_SEMANTIC_TESSINNER;
812 *index = 0;
813 break;
814 case VARYING_SLOT_TESS_LEVEL_OUTER:
815 *name = TGSI_SEMANTIC_TESSOUTER;
816 *index = 0;
817 break;
818 case VARYING_SLOT_VIEWPORT:
819 *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
820 *index = 0;
821 break;
822 default:
823 ERROR("unknown varying slot %u\n", slot);
824 assert(false);
825 break;
826 }
827 }
828
829 static void
830 frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
831 {
832 if (slot >= FRAG_RESULT_DATA0) {
833 *name = TGSI_SEMANTIC_COLOR;
834 *index = slot - FRAG_RESULT_COLOR - 2; // intentional
835 return;
836 }
837
838 switch (slot) {
839 case FRAG_RESULT_COLOR:
840 *name = TGSI_SEMANTIC_COLOR;
841 *index = 0;
842 break;
843 case FRAG_RESULT_DEPTH:
844 *name = TGSI_SEMANTIC_POSITION;
845 *index = 0;
846 break;
847 case FRAG_RESULT_SAMPLE_MASK:
848 *name = TGSI_SEMANTIC_SAMPLEMASK;
849 *index = 0;
850 break;
851 default:
852 ERROR("unknown frag result slot %u\n", slot);
853 assert(false);
854 break;
855 }
856 }
857
858 // copy of _mesa_sysval_to_semantic
859 static void
860 system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
861 {
862 *index = 0;
863 switch (val) {
864 // Vertex shader
865 case SYSTEM_VALUE_VERTEX_ID:
866 *name = TGSI_SEMANTIC_VERTEXID;
867 break;
868 case SYSTEM_VALUE_INSTANCE_ID:
869 *name = TGSI_SEMANTIC_INSTANCEID;
870 break;
871 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
872 *name = TGSI_SEMANTIC_VERTEXID_NOBASE;
873 break;
874 case SYSTEM_VALUE_BASE_VERTEX:
875 *name = TGSI_SEMANTIC_BASEVERTEX;
876 break;
877 case SYSTEM_VALUE_BASE_INSTANCE:
878 *name = TGSI_SEMANTIC_BASEINSTANCE;
879 break;
880 case SYSTEM_VALUE_DRAW_ID:
881 *name = TGSI_SEMANTIC_DRAWID;
882 break;
883
884 // Geometry shader
885 case SYSTEM_VALUE_INVOCATION_ID:
886 *name = TGSI_SEMANTIC_INVOCATIONID;
887 break;
888
889 // Fragment shader
890 case SYSTEM_VALUE_FRAG_COORD:
891 *name = TGSI_SEMANTIC_POSITION;
892 break;
893 case SYSTEM_VALUE_FRONT_FACE:
894 *name = TGSI_SEMANTIC_FACE;
895 break;
896 case SYSTEM_VALUE_SAMPLE_ID:
897 *name = TGSI_SEMANTIC_SAMPLEID;
898 break;
899 case SYSTEM_VALUE_SAMPLE_POS:
900 *name = TGSI_SEMANTIC_SAMPLEPOS;
901 break;
902 case SYSTEM_VALUE_SAMPLE_MASK_IN:
903 *name = TGSI_SEMANTIC_SAMPLEMASK;
904 break;
905 case SYSTEM_VALUE_HELPER_INVOCATION:
906 *name = TGSI_SEMANTIC_HELPER_INVOCATION;
907 break;
908
909 // Tessellation shader
910 case SYSTEM_VALUE_TESS_COORD:
911 *name = TGSI_SEMANTIC_TESSCOORD;
912 break;
913 case SYSTEM_VALUE_VERTICES_IN:
914 *name = TGSI_SEMANTIC_VERTICESIN;
915 break;
916 case SYSTEM_VALUE_PRIMITIVE_ID:
917 *name = TGSI_SEMANTIC_PRIMID;
918 break;
919 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
920 *name = TGSI_SEMANTIC_TESSOUTER;
921 break;
922 case SYSTEM_VALUE_TESS_LEVEL_INNER:
923 *name = TGSI_SEMANTIC_TESSINNER;
924 break;
925
926 // Compute shader
927 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
928 *name = TGSI_SEMANTIC_THREAD_ID;
929 break;
930 case SYSTEM_VALUE_WORK_GROUP_ID:
931 *name = TGSI_SEMANTIC_BLOCK_ID;
932 break;
933 case SYSTEM_VALUE_NUM_WORK_GROUPS:
934 *name = TGSI_SEMANTIC_GRID_SIZE;
935 break;
936 case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
937 *name = TGSI_SEMANTIC_BLOCK_SIZE;
938 break;
939
940 // ARB_shader_ballot
941 case SYSTEM_VALUE_SUBGROUP_SIZE:
942 *name = TGSI_SEMANTIC_SUBGROUP_SIZE;
943 break;
944 case SYSTEM_VALUE_SUBGROUP_INVOCATION:
945 *name = TGSI_SEMANTIC_SUBGROUP_INVOCATION;
946 break;
947 case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
948 *name = TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
949 break;
950 case SYSTEM_VALUE_SUBGROUP_GE_MASK:
951 *name = TGSI_SEMANTIC_SUBGROUP_GE_MASK;
952 break;
953 case SYSTEM_VALUE_SUBGROUP_GT_MASK:
954 *name = TGSI_SEMANTIC_SUBGROUP_GT_MASK;
955 break;
956 case SYSTEM_VALUE_SUBGROUP_LE_MASK:
957 *name = TGSI_SEMANTIC_SUBGROUP_LE_MASK;
958 break;
959 case SYSTEM_VALUE_SUBGROUP_LT_MASK:
960 *name = TGSI_SEMANTIC_SUBGROUP_LT_MASK;
961 break;
962
963 default:
964 ERROR("unknown system value %u\n", val);
965 assert(false);
966 break;
967 }
968 }
969
970 void
971 Converter::setInterpolate(nv50_ir_varying *var,
972 uint8_t mode,
973 bool centroid,
974 unsigned semantic)
975 {
976 switch (mode) {
977 case INTERP_MODE_FLAT:
978 var->flat = 1;
979 break;
980 case INTERP_MODE_NONE:
981 if (semantic == TGSI_SEMANTIC_COLOR)
982 var->sc = 1;
983 else if (semantic == TGSI_SEMANTIC_POSITION)
984 var->linear = 1;
985 break;
986 case INTERP_MODE_NOPERSPECTIVE:
987 var->linear = 1;
988 break;
989 case INTERP_MODE_SMOOTH:
990 break;
991 }
992 var->centroid = centroid;
993 }
994
995 static uint16_t
996 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
997 bool input, const nir_variable *var)
998 {
999 if (!type->is_array())
1000 return type->count_attribute_slots(false);
1001
1002 uint16_t slots;
1003 switch (stage) {
1004 case Program::TYPE_GEOMETRY:
1005 slots = type->uniform_locations();
1006 if (input)
1007 slots /= info.gs.vertices_in;
1008 break;
1009 case Program::TYPE_TESSELLATION_CONTROL:
1010 case Program::TYPE_TESSELLATION_EVAL:
1011 // remove first dimension
1012 if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
1013 slots = type->uniform_locations();
1014 else
1015 slots = type->fields.array->uniform_locations();
1016 break;
1017 default:
1018 slots = type->count_attribute_slots(false);
1019 break;
1020 }
1021
1022 return slots;
1023 }
1024
1025 bool Converter::assignSlots() {
1026 unsigned name;
1027 unsigned index;
1028
1029 info->io.viewportId = -1;
1030 info->numInputs = 0;
1031
1032 // we have to fixup the uniform locations for arrays
1033 unsigned numImages = 0;
1034 nir_foreach_variable(var, &nir->uniforms) {
1035 const glsl_type *type = var->type;
1036 if (!type->without_array()->is_image())
1037 continue;
1038 var->data.driver_location = numImages;
1039 numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
1040 }
1041
1042 nir_foreach_variable(var, &nir->inputs) {
1043 const glsl_type *type = var->type;
1044 int slot = var->data.location;
1045 uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
1046 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1047 : type->component_slots();
1048 uint32_t frac = var->data.location_frac;
1049 uint32_t vary = var->data.driver_location;
1050
1051 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1052 if (comp > 2)
1053 slots *= 2;
1054 }
1055
1056 assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
1057
1058 switch(prog->getType()) {
1059 case Program::TYPE_FRAGMENT:
1060 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1061 for (uint16_t i = 0; i < slots; ++i) {
1062 setInterpolate(&info->in[vary + i], var->data.interpolation,
1063 var->data.centroid | var->data.sample, name);
1064 }
1065 break;
1066 case Program::TYPE_GEOMETRY:
1067 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1068 break;
1069 case Program::TYPE_TESSELLATION_CONTROL:
1070 case Program::TYPE_TESSELLATION_EVAL:
1071 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1072 if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
1073 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1074 break;
1075 case Program::TYPE_VERTEX:
1076 vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1077 switch (name) {
1078 case TGSI_SEMANTIC_EDGEFLAG:
1079 info->io.edgeFlagIn = vary;
1080 break;
1081 default:
1082 break;
1083 }
1084 break;
1085 default:
1086 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1087 return false;
1088 }
1089
1090 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1091 info->in[vary].id = vary;
1092 info->in[vary].patch = var->data.patch;
1093 info->in[vary].sn = name;
1094 info->in[vary].si = index + i;
1095 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1096 if (i & 0x1)
1097 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1098 else
1099 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1100 else
1101 info->in[vary].mask |= ((1 << comp) - 1) << frac;
1102 }
1103 info->numInputs = std::max<uint8_t>(info->numInputs, vary);
1104 }
1105
1106 info->numOutputs = 0;
1107 nir_foreach_variable(var, &nir->outputs) {
1108 const glsl_type *type = var->type;
1109 int slot = var->data.location;
1110 uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1111 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1112 : type->component_slots();
1113 uint32_t frac = var->data.location_frac;
1114 uint32_t vary = var->data.driver_location;
1115
1116 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1117 if (comp > 2)
1118 slots *= 2;
1119 }
1120
1121 assert(vary < PIPE_MAX_SHADER_OUTPUTS);
1122
1123 switch(prog->getType()) {
1124 case Program::TYPE_FRAGMENT:
1125 frag_result_to_tgsi_semantic((gl_frag_result)slot, &name, &index);
1126 switch (name) {
1127 case TGSI_SEMANTIC_COLOR:
1128 if (!var->data.fb_fetch_output)
1129 info->prop.fp.numColourResults++;
1130 info->prop.fp.separateFragData = true;
1131 // sometimes we get FRAG_RESULT_DATAX with data.index 0
1132 // sometimes we get FRAG_RESULT_DATA0 with data.index X
1133 index = index == 0 ? var->data.index : index;
1134 break;
1135 case TGSI_SEMANTIC_POSITION:
1136 info->io.fragDepth = vary;
1137 info->prop.fp.writesDepth = true;
1138 break;
1139 case TGSI_SEMANTIC_SAMPLEMASK:
1140 info->io.sampleMask = vary;
1141 break;
1142 default:
1143 break;
1144 }
1145 break;
1146 case Program::TYPE_GEOMETRY:
1147 case Program::TYPE_TESSELLATION_CONTROL:
1148 case Program::TYPE_TESSELLATION_EVAL:
1149 case Program::TYPE_VERTEX:
1150 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1151
1152 if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1153 name != TGSI_SEMANTIC_TESSOUTER)
1154 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1155
1156 switch (name) {
1157 case TGSI_SEMANTIC_CLIPDIST:
1158 info->io.genUserClip = -1;
1159 break;
1160 case TGSI_SEMANTIC_CLIPVERTEX:
1161 clipVertexOutput = vary;
1162 break;
1163 case TGSI_SEMANTIC_EDGEFLAG:
1164 info->io.edgeFlagOut = vary;
1165 break;
1166 case TGSI_SEMANTIC_POSITION:
1167 if (clipVertexOutput < 0)
1168 clipVertexOutput = vary;
1169 break;
1170 default:
1171 break;
1172 }
1173 break;
1174 default:
1175 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1176 return false;
1177 }
1178
1179 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1180 info->out[vary].id = vary;
1181 info->out[vary].patch = var->data.patch;
1182 info->out[vary].sn = name;
1183 info->out[vary].si = index + i;
1184 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1185 if (i & 0x1)
1186 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1187 else
1188 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1189 else
1190 info->out[vary].mask |= ((1 << comp) - 1) << frac;
1191
1192 if (nir->info.outputs_read & 1ll << slot)
1193 info->out[vary].oread = 1;
1194 }
1195 info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
1196 }
1197
1198 info->numSysVals = 0;
1199 for (uint8_t i = 0; i < 64; ++i) {
1200 if (!(nir->info.system_values_read & 1ll << i))
1201 continue;
1202
1203 system_val_to_tgsi_semantic(i, &name, &index);
1204 info->sv[info->numSysVals].sn = name;
1205 info->sv[info->numSysVals].si = index;
1206 info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
1207
1208 switch (i) {
1209 case SYSTEM_VALUE_INSTANCE_ID:
1210 info->io.instanceId = info->numSysVals;
1211 break;
1212 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1213 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1214 info->sv[info->numSysVals].patch = 1;
1215 break;
1216 case SYSTEM_VALUE_VERTEX_ID:
1217 info->io.vertexId = info->numSysVals;
1218 break;
1219 default:
1220 break;
1221 }
1222
1223 info->numSysVals += 1;
1224 }
1225
1226 if (info->io.genUserClip > 0) {
1227 info->io.clipDistances = info->io.genUserClip;
1228
1229 const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1230
1231 for (unsigned int n = 0; n < nOut; ++n) {
1232 unsigned int i = info->numOutputs++;
1233 info->out[i].id = i;
1234 info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1235 info->out[i].si = n;
1236 info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1237 }
1238 }
1239
1240 return info->assignSlots(info) == 0;
1241 }
1242
1243 uint32_t
1244 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1245 {
1246 DataType ty;
1247 int offset = nir_intrinsic_component(insn);
1248 bool input;
1249
1250 if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1251 ty = getDType(insn);
1252 else
1253 ty = getSType(insn->src[0], false, false);
1254
1255 switch (insn->intrinsic) {
1256 case nir_intrinsic_load_input:
1257 case nir_intrinsic_load_interpolated_input:
1258 case nir_intrinsic_load_per_vertex_input:
1259 input = true;
1260 break;
1261 case nir_intrinsic_load_output:
1262 case nir_intrinsic_load_per_vertex_output:
1263 case nir_intrinsic_store_output:
1264 case nir_intrinsic_store_per_vertex_output:
1265 input = false;
1266 break;
1267 default:
1268 ERROR("unknown intrinsic in getSlotAddress %s",
1269 nir_intrinsic_infos[insn->intrinsic].name);
1270 input = false;
1271 assert(false);
1272 break;
1273 }
1274
1275 if (typeSizeof(ty) == 8) {
1276 slot *= 2;
1277 slot += offset;
1278 if (slot >= 4) {
1279 idx += 1;
1280 slot -= 4;
1281 }
1282 } else {
1283 slot += offset;
1284 }
1285
1286 assert(slot < 4);
1287 assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
1288 assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
1289
1290 const nv50_ir_varying *vary = input ? info->in : info->out;
1291 return vary[idx].slot[slot] * 4;
1292 }
1293
1294 Instruction *
1295 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1296 uint32_t base, uint8_t c, Value *indirect0,
1297 Value *indirect1, bool patch)
1298 {
1299 unsigned int tySize = typeSizeof(ty);
1300
1301 if (tySize == 8 &&
1302 (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
1303 Value *lo = getSSA();
1304 Value *hi = getSSA();
1305
1306 Instruction *loi =
1307 mkLoad(TYPE_U32, lo,
1308 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1309 indirect0);
1310 loi->setIndirect(0, 1, indirect1);
1311 loi->perPatch = patch;
1312
1313 Instruction *hii =
1314 mkLoad(TYPE_U32, hi,
1315 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1316 indirect0);
1317 hii->setIndirect(0, 1, indirect1);
1318 hii->perPatch = patch;
1319
1320 return mkOp2(OP_MERGE, ty, def, lo, hi);
1321 } else {
1322 Instruction *ld =
1323 mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1324 ld->setIndirect(0, 1, indirect1);
1325 ld->perPatch = patch;
1326 return ld;
1327 }
1328 }
1329
1330 void
1331 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1332 DataType ty, Value *src, uint8_t idx, uint8_t c,
1333 Value *indirect0, Value *indirect1)
1334 {
1335 uint8_t size = typeSizeof(ty);
1336 uint32_t address = getSlotAddress(insn, idx, c);
1337
1338 if (size == 8 && indirect0) {
1339 Value *split[2];
1340 mkSplit(split, 4, src);
1341
1342 if (op == OP_EXPORT) {
1343 split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1344 split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1345 }
1346
1347 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1348 split[0])->perPatch = info->out[idx].patch;
1349 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1350 split[1])->perPatch = info->out[idx].patch;
1351 } else {
1352 if (op == OP_EXPORT)
1353 src = mkMov(getSSA(size), src, ty)->getDef(0);
1354 mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1355 src)->perPatch = info->out[idx].patch;
1356 }
1357 }
1358
1359 bool
1360 Converter::parseNIR()
1361 {
1362 info->bin.tlsSpace = 0;
1363 info->io.clipDistances = nir->info.clip_distance_array_size;
1364 info->io.cullDistances = nir->info.cull_distance_array_size;
1365
1366 switch(prog->getType()) {
1367 case Program::TYPE_COMPUTE:
1368 info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
1369 info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
1370 info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
1371 info->bin.smemSize = nir->info.cs.shared_size;
1372 break;
1373 case Program::TYPE_FRAGMENT:
1374 info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1375 info->prop.fp.persampleInvocation =
1376 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
1377 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1378 info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1379 info->prop.fp.readsSampleLocations =
1380 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1381 info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
1382 info->prop.fp.usesSampleMaskIn =
1383 !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
1384 break;
1385 case Program::TYPE_GEOMETRY:
1386 info->prop.gp.inputPrim = nir->info.gs.input_primitive;
1387 info->prop.gp.instanceCount = nir->info.gs.invocations;
1388 info->prop.gp.maxVertices = nir->info.gs.vertices_out;
1389 info->prop.gp.outputPrim = nir->info.gs.output_primitive;
1390 break;
1391 case Program::TYPE_TESSELLATION_CONTROL:
1392 case Program::TYPE_TESSELLATION_EVAL:
1393 if (nir->info.tess.primitive_mode == GL_ISOLINES)
1394 info->prop.tp.domain = GL_LINES;
1395 else
1396 info->prop.tp.domain = nir->info.tess.primitive_mode;
1397 info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1398 info->prop.tp.outputPrim =
1399 nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1400 info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1401 info->prop.tp.winding = !nir->info.tess.ccw;
1402 break;
1403 case Program::TYPE_VERTEX:
1404 info->prop.vp.usesDrawParameters =
1405 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
1406 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
1407 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
1408 break;
1409 default:
1410 break;
1411 }
1412
1413 return true;
1414 }
1415
1416 bool
1417 Converter::visit(nir_function *function)
1418 {
1419 // we only support emiting the main function for now
1420 assert(!strcmp(function->name, "main"));
1421 assert(function->impl);
1422
1423 // usually the blocks will set everything up, but main is special
1424 BasicBlock *entry = new BasicBlock(prog->main);
1425 exit = new BasicBlock(prog->main);
1426 blocks[nir_start_block(function->impl)->index] = entry;
1427 prog->main->setEntry(entry);
1428 prog->main->setExit(exit);
1429
1430 setPosition(entry, true);
1431
1432 if (info->io.genUserClip > 0) {
1433 for (int c = 0; c < 4; ++c)
1434 clipVtx[c] = getScratch();
1435 }
1436
1437 switch (prog->getType()) {
1438 case Program::TYPE_TESSELLATION_CONTROL:
1439 outBase = mkOp2v(
1440 OP_SUB, TYPE_U32, getSSA(),
1441 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1442 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1443 break;
1444 case Program::TYPE_FRAGMENT: {
1445 Symbol *sv = mkSysVal(SV_POSITION, 3);
1446 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1447 fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1448 break;
1449 }
1450 default:
1451 break;
1452 }
1453
1454 nir_foreach_register(reg, &function->impl->registers) {
1455 if (reg->num_array_elems) {
1456 // TODO: packed variables would be nice, but MemoryOpt fails
1457 // replace 4 with reg->num_components
1458 uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8);
1459 regToLmemOffset[reg->index] = info->bin.tlsSpace;
1460 info->bin.tlsSpace += size;
1461 }
1462 }
1463
1464 nir_index_ssa_defs(function->impl);
1465 foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1466 if (!visit(node))
1467 return false;
1468 }
1469
1470 bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1471 setPosition(exit, true);
1472
1473 if (info->io.genUserClip > 0)
1474 handleUserClipPlanes();
1475
1476 // TODO: for non main function this needs to be a OP_RETURN
1477 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1478 return true;
1479 }
1480
1481 bool
1482 Converter::visit(nir_cf_node *node)
1483 {
1484 switch (node->type) {
1485 case nir_cf_node_block:
1486 return visit(nir_cf_node_as_block(node));
1487 case nir_cf_node_if:
1488 return visit(nir_cf_node_as_if(node));
1489 case nir_cf_node_loop:
1490 return visit(nir_cf_node_as_loop(node));
1491 default:
1492 ERROR("unknown nir_cf_node type %u\n", node->type);
1493 return false;
1494 }
1495 }
1496
1497 bool
1498 Converter::visit(nir_block *block)
1499 {
1500 if (!block->predecessors->entries && block->instr_list.is_empty())
1501 return true;
1502
1503 BasicBlock *bb = convert(block);
1504
1505 setPosition(bb, true);
1506 nir_foreach_instr(insn, block) {
1507 if (!visit(insn))
1508 return false;
1509 }
1510 return true;
1511 }
1512
1513 bool
1514 Converter::visit(nir_if *nif)
1515 {
1516 DataType sType = getSType(nif->condition, false, false);
1517 Value *src = getSrc(&nif->condition, 0);
1518
1519 nir_block *lastThen = nir_if_last_then_block(nif);
1520 nir_block *lastElse = nir_if_last_else_block(nif);
1521
1522 assert(!lastThen->successors[1]);
1523 assert(!lastElse->successors[1]);
1524
1525 BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1526 BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1527
1528 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1529 bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1530
1531 // we only insert joinats, if both nodes end up at the end of the if again.
1532 // the reason for this to not happens are breaks/continues/ret/... which
1533 // have their own handling
1534 if (lastThen->successors[0] == lastElse->successors[0])
1535 bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
1536 CC_ALWAYS, NULL);
1537
1538 mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1539
1540 foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1541 if (!visit(node))
1542 return false;
1543 }
1544 setPosition(convert(lastThen), true);
1545 if (!bb->getExit() ||
1546 !bb->getExit()->asFlow() ||
1547 bb->getExit()->asFlow()->op == OP_JOIN) {
1548 BasicBlock *tailBB = convert(lastThen->successors[0]);
1549 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1550 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1551 }
1552
1553 foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1554 if (!visit(node))
1555 return false;
1556 }
1557 setPosition(convert(lastElse), true);
1558 if (!bb->getExit() ||
1559 !bb->getExit()->asFlow() ||
1560 bb->getExit()->asFlow()->op == OP_JOIN) {
1561 BasicBlock *tailBB = convert(lastElse->successors[0]);
1562 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1563 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1564 }
1565
1566 if (lastThen->successors[0] == lastElse->successors[0]) {
1567 setPosition(convert(lastThen->successors[0]), true);
1568 mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1569 }
1570
1571 return true;
1572 }
1573
1574 bool
1575 Converter::visit(nir_loop *loop)
1576 {
1577 curLoopDepth += 1;
1578 func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1579
1580 BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1581 BasicBlock *tailBB =
1582 convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1583 bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1584
1585 mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1586 setPosition(loopBB, false);
1587 mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1588
1589 foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1590 if (!visit(node))
1591 return false;
1592 }
1593 Instruction *insn = bb->getExit();
1594 if (bb->cfg.incidentCount() != 0) {
1595 if (!insn || !insn->asFlow()) {
1596 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1597 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1598 } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
1599 tailBB->cfg.incidentCount() == 0) {
1600 // RA doesn't like having blocks around with no incident edge,
1601 // so we create a fake one to make it happy
1602 bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1603 }
1604 }
1605
1606 curLoopDepth -= 1;
1607
1608 return true;
1609 }
1610
1611 bool
1612 Converter::visit(nir_instr *insn)
1613 {
1614 switch (insn->type) {
1615 case nir_instr_type_alu:
1616 return visit(nir_instr_as_alu(insn));
1617 case nir_instr_type_intrinsic:
1618 return visit(nir_instr_as_intrinsic(insn));
1619 case nir_instr_type_jump:
1620 return visit(nir_instr_as_jump(insn));
1621 case nir_instr_type_load_const:
1622 return visit(nir_instr_as_load_const(insn));
1623 case nir_instr_type_ssa_undef:
1624 return visit(nir_instr_as_ssa_undef(insn));
1625 case nir_instr_type_tex:
1626 return visit(nir_instr_as_tex(insn));
1627 default:
1628 ERROR("unknown nir_instr type %u\n", insn->type);
1629 return false;
1630 }
1631 return true;
1632 }
1633
1634 SVSemantic
1635 Converter::convert(nir_intrinsic_op intr)
1636 {
1637 switch (intr) {
1638 case nir_intrinsic_load_base_vertex:
1639 return SV_BASEVERTEX;
1640 case nir_intrinsic_load_base_instance:
1641 return SV_BASEINSTANCE;
1642 case nir_intrinsic_load_draw_id:
1643 return SV_DRAWID;
1644 case nir_intrinsic_load_front_face:
1645 return SV_FACE;
1646 case nir_intrinsic_load_helper_invocation:
1647 return SV_THREAD_KILL;
1648 case nir_intrinsic_load_instance_id:
1649 return SV_INSTANCE_ID;
1650 case nir_intrinsic_load_invocation_id:
1651 return SV_INVOCATION_ID;
1652 case nir_intrinsic_load_local_group_size:
1653 return SV_NTID;
1654 case nir_intrinsic_load_local_invocation_id:
1655 return SV_TID;
1656 case nir_intrinsic_load_num_work_groups:
1657 return SV_NCTAID;
1658 case nir_intrinsic_load_patch_vertices_in:
1659 return SV_VERTEX_COUNT;
1660 case nir_intrinsic_load_primitive_id:
1661 return SV_PRIMITIVE_ID;
1662 case nir_intrinsic_load_sample_id:
1663 return SV_SAMPLE_INDEX;
1664 case nir_intrinsic_load_sample_mask_in:
1665 return SV_SAMPLE_MASK;
1666 case nir_intrinsic_load_sample_pos:
1667 return SV_SAMPLE_POS;
1668 case nir_intrinsic_load_subgroup_eq_mask:
1669 return SV_LANEMASK_EQ;
1670 case nir_intrinsic_load_subgroup_ge_mask:
1671 return SV_LANEMASK_GE;
1672 case nir_intrinsic_load_subgroup_gt_mask:
1673 return SV_LANEMASK_GT;
1674 case nir_intrinsic_load_subgroup_le_mask:
1675 return SV_LANEMASK_LE;
1676 case nir_intrinsic_load_subgroup_lt_mask:
1677 return SV_LANEMASK_LT;
1678 case nir_intrinsic_load_subgroup_invocation:
1679 return SV_LANEID;
1680 case nir_intrinsic_load_tess_coord:
1681 return SV_TESS_COORD;
1682 case nir_intrinsic_load_tess_level_inner:
1683 return SV_TESS_INNER;
1684 case nir_intrinsic_load_tess_level_outer:
1685 return SV_TESS_OUTER;
1686 case nir_intrinsic_load_vertex_id:
1687 return SV_VERTEX_ID;
1688 case nir_intrinsic_load_work_group_id:
1689 return SV_CTAID;
1690 default:
1691 ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1692 nir_intrinsic_infos[intr].name);
1693 assert(false);
1694 return SV_LAST;
1695 }
1696 }
1697
1698 bool
1699 Converter::visit(nir_intrinsic_instr *insn)
1700 {
1701 nir_intrinsic_op op = insn->intrinsic;
1702
1703 switch (op) {
1704 case nir_intrinsic_load_uniform: {
1705 LValues &newDefs = convert(&insn->dest);
1706 const DataType dType = getDType(insn);
1707 Value *indirect;
1708 uint32_t coffset = getIndirect(insn, 0, 0, indirect);
1709 for (uint8_t i = 0; i < insn->num_components; ++i) {
1710 loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
1711 }
1712 break;
1713 }
1714 case nir_intrinsic_store_output:
1715 case nir_intrinsic_store_per_vertex_output: {
1716 Value *indirect;
1717 DataType dType = getSType(insn->src[0], false, false);
1718 uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1719
1720 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1721 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1722 continue;
1723
1724 uint8_t offset = 0;
1725 Value *src = getSrc(&insn->src[0], i);
1726 switch (prog->getType()) {
1727 case Program::TYPE_FRAGMENT: {
1728 if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1729 // TGSI uses a different interface than NIR, TGSI stores that
1730 // value in the z component, NIR in X
1731 offset += 2;
1732 src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1733 }
1734 break;
1735 }
1736 case Program::TYPE_VERTEX: {
1737 if (info->io.genUserClip > 0 && idx == clipVertexOutput) {
1738 mkMov(clipVtx[i], src);
1739 src = clipVtx[i];
1740 }
1741 break;
1742 }
1743 default:
1744 break;
1745 }
1746
1747 storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1748 }
1749 break;
1750 }
1751 case nir_intrinsic_load_input:
1752 case nir_intrinsic_load_interpolated_input:
1753 case nir_intrinsic_load_output: {
1754 LValues &newDefs = convert(&insn->dest);
1755
1756 // FBFetch
1757 if (prog->getType() == Program::TYPE_FRAGMENT &&
1758 op == nir_intrinsic_load_output) {
1759 std::vector<Value*> defs, srcs;
1760 uint8_t mask = 0;
1761
1762 srcs.push_back(getSSA());
1763 srcs.push_back(getSSA());
1764 Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1765 Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1766 mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1767 mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1768
1769 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1770 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1771
1772 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1773 defs.push_back(newDefs[i]);
1774 mask |= 1 << i;
1775 }
1776
1777 TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
1778 texi->tex.levelZero = 1;
1779 texi->tex.mask = mask;
1780 texi->tex.useOffsets = 0;
1781 texi->tex.r = 0xffff;
1782 texi->tex.s = 0xffff;
1783
1784 info->prop.fp.readsFramebuffer = true;
1785 break;
1786 }
1787
1788 const DataType dType = getDType(insn);
1789 Value *indirect;
1790 bool input = op != nir_intrinsic_load_output;
1791 operation nvirOp;
1792 uint32_t mode = 0;
1793
1794 uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
1795 nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
1796
1797 // see load_barycentric_* handling
1798 if (prog->getType() == Program::TYPE_FRAGMENT) {
1799 mode = translateInterpMode(&vary, nvirOp);
1800 if (op == nir_intrinsic_load_interpolated_input) {
1801 ImmediateValue immMode;
1802 if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
1803 mode |= immMode.reg.data.u32;
1804 }
1805 }
1806
1807 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1808 uint32_t address = getSlotAddress(insn, idx, i);
1809 Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
1810 if (prog->getType() == Program::TYPE_FRAGMENT) {
1811 int s = 1;
1812 if (typeSizeof(dType) == 8) {
1813 Value *lo = getSSA();
1814 Value *hi = getSSA();
1815 Instruction *interp;
1816
1817 interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
1818 if (nvirOp == OP_PINTERP)
1819 interp->setSrc(s++, fp.position);
1820 if (mode & NV50_IR_INTERP_OFFSET)
1821 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1822 interp->setInterpolate(mode);
1823 interp->setIndirect(0, 0, indirect);
1824
1825 Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
1826 interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
1827 if (nvirOp == OP_PINTERP)
1828 interp->setSrc(s++, fp.position);
1829 if (mode & NV50_IR_INTERP_OFFSET)
1830 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1831 interp->setInterpolate(mode);
1832 interp->setIndirect(0, 0, indirect);
1833
1834 mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
1835 } else {
1836 Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
1837 if (nvirOp == OP_PINTERP)
1838 interp->setSrc(s++, fp.position);
1839 if (mode & NV50_IR_INTERP_OFFSET)
1840 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1841 interp->setInterpolate(mode);
1842 interp->setIndirect(0, 0, indirect);
1843 }
1844 } else {
1845 mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
1846 }
1847 }
1848 break;
1849 }
1850 case nir_intrinsic_load_barycentric_at_offset:
1851 case nir_intrinsic_load_barycentric_at_sample:
1852 case nir_intrinsic_load_barycentric_centroid:
1853 case nir_intrinsic_load_barycentric_pixel:
1854 case nir_intrinsic_load_barycentric_sample: {
1855 LValues &newDefs = convert(&insn->dest);
1856 uint32_t mode;
1857
1858 if (op == nir_intrinsic_load_barycentric_centroid ||
1859 op == nir_intrinsic_load_barycentric_sample) {
1860 mode = NV50_IR_INTERP_CENTROID;
1861 } else if (op == nir_intrinsic_load_barycentric_at_offset) {
1862 Value *offs[2];
1863 for (uint8_t c = 0; c < 2; c++) {
1864 offs[c] = getScratch();
1865 mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
1866 mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
1867 mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
1868 mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
1869 }
1870 mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
1871
1872 mode = NV50_IR_INTERP_OFFSET;
1873 } else if (op == nir_intrinsic_load_barycentric_pixel) {
1874 mode = NV50_IR_INTERP_DEFAULT;
1875 } else if (op == nir_intrinsic_load_barycentric_at_sample) {
1876 info->prop.fp.readsSampleLocations = true;
1877 mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
1878 mode = NV50_IR_INTERP_OFFSET;
1879 } else {
1880 unreachable("all intrinsics already handled above");
1881 }
1882
1883 loadImm(newDefs[1], mode);
1884 break;
1885 }
1886 case nir_intrinsic_discard:
1887 mkOp(OP_DISCARD, TYPE_NONE, NULL);
1888 break;
1889 case nir_intrinsic_discard_if: {
1890 Value *pred = getSSA(1, FILE_PREDICATE);
1891 if (insn->num_components > 1) {
1892 ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
1893 assert(false);
1894 return false;
1895 }
1896 mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1897 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
1898 break;
1899 }
1900 case nir_intrinsic_load_base_vertex:
1901 case nir_intrinsic_load_base_instance:
1902 case nir_intrinsic_load_draw_id:
1903 case nir_intrinsic_load_front_face:
1904 case nir_intrinsic_load_helper_invocation:
1905 case nir_intrinsic_load_instance_id:
1906 case nir_intrinsic_load_invocation_id:
1907 case nir_intrinsic_load_local_group_size:
1908 case nir_intrinsic_load_local_invocation_id:
1909 case nir_intrinsic_load_num_work_groups:
1910 case nir_intrinsic_load_patch_vertices_in:
1911 case nir_intrinsic_load_primitive_id:
1912 case nir_intrinsic_load_sample_id:
1913 case nir_intrinsic_load_sample_mask_in:
1914 case nir_intrinsic_load_sample_pos:
1915 case nir_intrinsic_load_subgroup_eq_mask:
1916 case nir_intrinsic_load_subgroup_ge_mask:
1917 case nir_intrinsic_load_subgroup_gt_mask:
1918 case nir_intrinsic_load_subgroup_le_mask:
1919 case nir_intrinsic_load_subgroup_lt_mask:
1920 case nir_intrinsic_load_subgroup_invocation:
1921 case nir_intrinsic_load_tess_coord:
1922 case nir_intrinsic_load_tess_level_inner:
1923 case nir_intrinsic_load_tess_level_outer:
1924 case nir_intrinsic_load_vertex_id:
1925 case nir_intrinsic_load_work_group_id: {
1926 const DataType dType = getDType(insn);
1927 SVSemantic sv = convert(op);
1928 LValues &newDefs = convert(&insn->dest);
1929
1930 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1931 Value *def;
1932 if (typeSizeof(dType) == 8)
1933 def = getSSA();
1934 else
1935 def = newDefs[i];
1936
1937 if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
1938 loadImm(def, 0u);
1939 } else {
1940 Symbol *sym = mkSysVal(sv, i);
1941 Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
1942 if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
1943 rdsv->perPatch = 1;
1944 }
1945
1946 if (typeSizeof(dType) == 8)
1947 mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
1948 }
1949 break;
1950 }
1951 // constants
1952 case nir_intrinsic_load_subgroup_size: {
1953 LValues &newDefs = convert(&insn->dest);
1954 loadImm(newDefs[0], 32u);
1955 break;
1956 }
1957 case nir_intrinsic_vote_all:
1958 case nir_intrinsic_vote_any:
1959 case nir_intrinsic_vote_ieq: {
1960 LValues &newDefs = convert(&insn->dest);
1961 Value *pred = getScratch(1, FILE_PREDICATE);
1962 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1963 mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
1964 mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
1965 break;
1966 }
1967 case nir_intrinsic_ballot: {
1968 LValues &newDefs = convert(&insn->dest);
1969 Value *pred = getSSA(1, FILE_PREDICATE);
1970 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1971 mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;
1972 break;
1973 }
1974 case nir_intrinsic_read_first_invocation:
1975 case nir_intrinsic_read_invocation: {
1976 LValues &newDefs = convert(&insn->dest);
1977 const DataType dType = getDType(insn);
1978 Value *tmp = getScratch();
1979
1980 if (op == nir_intrinsic_read_first_invocation) {
1981 mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
1982 mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
1983 mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
1984 } else
1985 tmp = getSrc(&insn->src[1], 0);
1986
1987 for (uint8_t i = 0; i < insn->num_components; ++i) {
1988 mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
1989 ->subOp = NV50_IR_SUBOP_SHFL_IDX;
1990 }
1991 break;
1992 }
1993 case nir_intrinsic_load_per_vertex_input: {
1994 const DataType dType = getDType(insn);
1995 LValues &newDefs = convert(&insn->dest);
1996 Value *indirectVertex;
1997 Value *indirectOffset;
1998 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
1999 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2000
2001 Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
2002 mkImm(baseVertex), indirectVertex);
2003 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2004 uint32_t address = getSlotAddress(insn, idx, i);
2005 loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
2006 indirectOffset, vtxBase, info->in[idx].patch);
2007 }
2008 break;
2009 }
2010 case nir_intrinsic_emit_vertex:
2011 case nir_intrinsic_end_primitive: {
2012 uint32_t idx = nir_intrinsic_stream_id(insn);
2013 mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
2014 break;
2015 }
2016 default:
2017 ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
2018 return false;
2019 }
2020
2021 return true;
2022 }
2023
2024 bool
2025 Converter::visit(nir_jump_instr *insn)
2026 {
2027 switch (insn->type) {
2028 case nir_jump_return:
2029 // TODO: this only works in the main function
2030 mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
2031 bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
2032 break;
2033 case nir_jump_break:
2034 case nir_jump_continue: {
2035 bool isBreak = insn->type == nir_jump_break;
2036 nir_block *block = insn->instr.block;
2037 assert(!block->successors[1]);
2038 BasicBlock *target = convert(block->successors[0]);
2039 mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
2040 bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
2041 break;
2042 }
2043 default:
2044 ERROR("unknown nir_jump_type %u\n", insn->type);
2045 return false;
2046 }
2047
2048 return true;
2049 }
2050
2051 bool
2052 Converter::visit(nir_load_const_instr *insn)
2053 {
2054 assert(insn->def.bit_size <= 64);
2055
2056 LValues &newDefs = convert(&insn->def);
2057 for (int i = 0; i < insn->def.num_components; i++) {
2058 switch (insn->def.bit_size) {
2059 case 64:
2060 loadImm(newDefs[i], insn->value.u64[i]);
2061 break;
2062 case 32:
2063 loadImm(newDefs[i], insn->value.u32[i]);
2064 break;
2065 case 16:
2066 loadImm(newDefs[i], insn->value.u16[i]);
2067 break;
2068 case 8:
2069 loadImm(newDefs[i], insn->value.u8[i]);
2070 break;
2071 }
2072 }
2073 return true;
2074 }
2075
2076 #define DEFAULT_CHECKS \
2077 if (insn->dest.dest.ssa.num_components > 1) { \
2078 ERROR("nir_alu_instr only supported with 1 component!\n"); \
2079 return false; \
2080 } \
2081 if (insn->dest.write_mask != 1) { \
2082 ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2083 return false; \
2084 }
2085 bool
2086 Converter::visit(nir_alu_instr *insn)
2087 {
2088 const nir_op op = insn->op;
2089 const nir_op_info &info = nir_op_infos[op];
2090 DataType dType = getDType(insn);
2091 const std::vector<DataType> sTypes = getSTypes(insn);
2092
2093 Instruction *oldPos = this->bb->getExit();
2094
2095 switch (op) {
2096 case nir_op_fabs:
2097 case nir_op_iabs:
2098 case nir_op_fadd:
2099 case nir_op_iadd:
2100 case nir_op_fand:
2101 case nir_op_iand:
2102 case nir_op_fceil:
2103 case nir_op_fcos:
2104 case nir_op_fddx:
2105 case nir_op_fddx_coarse:
2106 case nir_op_fddx_fine:
2107 case nir_op_fddy:
2108 case nir_op_fddy_coarse:
2109 case nir_op_fddy_fine:
2110 case nir_op_fdiv:
2111 case nir_op_idiv:
2112 case nir_op_udiv:
2113 case nir_op_fexp2:
2114 case nir_op_ffloor:
2115 case nir_op_ffma:
2116 case nir_op_flog2:
2117 case nir_op_fmax:
2118 case nir_op_imax:
2119 case nir_op_umax:
2120 case nir_op_fmin:
2121 case nir_op_imin:
2122 case nir_op_umin:
2123 case nir_op_fmod:
2124 case nir_op_imod:
2125 case nir_op_umod:
2126 case nir_op_fmul:
2127 case nir_op_imul:
2128 case nir_op_imul_high:
2129 case nir_op_umul_high:
2130 case nir_op_fneg:
2131 case nir_op_ineg:
2132 case nir_op_fnot:
2133 case nir_op_inot:
2134 case nir_op_for:
2135 case nir_op_ior:
2136 case nir_op_pack_64_2x32_split:
2137 case nir_op_fpow:
2138 case nir_op_frcp:
2139 case nir_op_frem:
2140 case nir_op_irem:
2141 case nir_op_frsq:
2142 case nir_op_fsat:
2143 case nir_op_ishr:
2144 case nir_op_ushr:
2145 case nir_op_fsin:
2146 case nir_op_fsqrt:
2147 case nir_op_fsub:
2148 case nir_op_isub:
2149 case nir_op_ftrunc:
2150 case nir_op_ishl:
2151 case nir_op_fxor:
2152 case nir_op_ixor: {
2153 DEFAULT_CHECKS;
2154 LValues &newDefs = convert(&insn->dest);
2155 operation preOp = preOperationNeeded(op);
2156 if (preOp != OP_NOP) {
2157 assert(info.num_inputs < 2);
2158 Value *tmp = getSSA(typeSizeof(dType));
2159 Instruction *i0 = mkOp(preOp, dType, tmp);
2160 Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2161 if (info.num_inputs) {
2162 i0->setSrc(0, getSrc(&insn->src[0]));
2163 i1->setSrc(0, tmp);
2164 }
2165 i1->subOp = getSubOp(op);
2166 } else {
2167 Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2168 for (unsigned s = 0u; s < info.num_inputs; ++s) {
2169 i->setSrc(s, getSrc(&insn->src[s]));
2170 }
2171 i->subOp = getSubOp(op);
2172 }
2173 break;
2174 }
2175 case nir_op_ifind_msb:
2176 case nir_op_ufind_msb: {
2177 DEFAULT_CHECKS;
2178 LValues &newDefs = convert(&insn->dest);
2179 dType = sTypes[0];
2180 mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2181 break;
2182 }
2183 case nir_op_fround_even: {
2184 DEFAULT_CHECKS;
2185 LValues &newDefs = convert(&insn->dest);
2186 mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2187 break;
2188 }
2189 // convert instructions
2190 case nir_op_f2f32:
2191 case nir_op_f2i32:
2192 case nir_op_f2u32:
2193 case nir_op_i2f32:
2194 case nir_op_i2i32:
2195 case nir_op_u2f32:
2196 case nir_op_u2u32:
2197 case nir_op_f2f64:
2198 case nir_op_f2i64:
2199 case nir_op_f2u64:
2200 case nir_op_i2f64:
2201 case nir_op_i2i64:
2202 case nir_op_u2f64:
2203 case nir_op_u2u64: {
2204 DEFAULT_CHECKS;
2205 LValues &newDefs = convert(&insn->dest);
2206 Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2207 if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
2208 i->rnd = ROUND_Z;
2209 i->sType = sTypes[0];
2210 break;
2211 }
2212 // compare instructions
2213 case nir_op_feq32:
2214 case nir_op_ieq32:
2215 case nir_op_fge32:
2216 case nir_op_ige32:
2217 case nir_op_uge32:
2218 case nir_op_flt32:
2219 case nir_op_ilt32:
2220 case nir_op_ult32:
2221 case nir_op_fne32:
2222 case nir_op_ine32: {
2223 DEFAULT_CHECKS;
2224 LValues &newDefs = convert(&insn->dest);
2225 Instruction *i = mkCmp(getOperation(op),
2226 getCondCode(op),
2227 dType,
2228 newDefs[0],
2229 dType,
2230 getSrc(&insn->src[0]),
2231 getSrc(&insn->src[1]));
2232 if (info.num_inputs == 3)
2233 i->setSrc(2, getSrc(&insn->src[2]));
2234 i->sType = sTypes[0];
2235 break;
2236 }
2237 // those are weird ALU ops and need special handling, because
2238 // 1. they are always componend based
2239 // 2. they basically just merge multiple values into one data type
2240 case nir_op_imov:
2241 case nir_op_fmov:
2242 if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) {
2243 nir_reg_dest& reg = insn->dest.dest.reg;
2244 uint32_t goffset = regToLmemOffset[reg.reg->index];
2245 uint8_t comps = reg.reg->num_components;
2246 uint8_t size = reg.reg->bit_size / 8;
2247 uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2248 uint32_t aoffset = csize * reg.base_offset;
2249 Value *indirect = NULL;
2250
2251 if (reg.indirect)
2252 indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS),
2253 getSrc(reg.indirect, 0), mkImm(csize));
2254
2255 for (uint8_t i = 0u; i < comps; ++i) {
2256 if (!((1u << i) & insn->dest.write_mask))
2257 continue;
2258
2259 Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + aoffset + i * size);
2260 mkStore(OP_STORE, dType, sym, indirect, getSrc(&insn->src[0], i));
2261 }
2262 break;
2263 } else if (!insn->src[0].src.is_ssa && insn->src[0].src.reg.reg->num_array_elems) {
2264 LValues &newDefs = convert(&insn->dest);
2265 nir_reg_src& reg = insn->src[0].src.reg;
2266 uint32_t goffset = regToLmemOffset[reg.reg->index];
2267 // uint8_t comps = reg.reg->num_components;
2268 uint8_t size = reg.reg->bit_size / 8;
2269 uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2270 uint32_t aoffset = csize * reg.base_offset;
2271 Value *indirect = NULL;
2272
2273 if (reg.indirect)
2274 indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), getSrc(reg.indirect, 0), mkImm(csize));
2275
2276 for (uint8_t i = 0u; i < newDefs.size(); ++i)
2277 loadFrom(FILE_MEMORY_LOCAL, 0, dType, newDefs[i], goffset + aoffset, i, indirect);
2278
2279 break;
2280 } else {
2281 LValues &newDefs = convert(&insn->dest);
2282 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2283 mkMov(newDefs[c], getSrc(&insn->src[0], c), dType);
2284 }
2285 }
2286 break;
2287 case nir_op_vec2:
2288 case nir_op_vec3:
2289 case nir_op_vec4: {
2290 LValues &newDefs = convert(&insn->dest);
2291 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2292 mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2293 }
2294 break;
2295 }
2296 // (un)pack
2297 case nir_op_pack_64_2x32: {
2298 LValues &newDefs = convert(&insn->dest);
2299 Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2300 merge->setSrc(0, getSrc(&insn->src[0], 0));
2301 merge->setSrc(1, getSrc(&insn->src[0], 1));
2302 break;
2303 }
2304 case nir_op_pack_half_2x16_split: {
2305 LValues &newDefs = convert(&insn->dest);
2306 Value *tmpH = getSSA();
2307 Value *tmpL = getSSA();
2308
2309 mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2310 mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2311 mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2312 break;
2313 }
2314 case nir_op_unpack_half_2x16_split_x:
2315 case nir_op_unpack_half_2x16_split_y: {
2316 LValues &newDefs = convert(&insn->dest);
2317 Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2318 if (op == nir_op_unpack_half_2x16_split_y)
2319 cvt->subOp = 1;
2320 break;
2321 }
2322 case nir_op_unpack_64_2x32: {
2323 LValues &newDefs = convert(&insn->dest);
2324 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2325 break;
2326 }
2327 case nir_op_unpack_64_2x32_split_x: {
2328 LValues &newDefs = convert(&insn->dest);
2329 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
2330 break;
2331 }
2332 case nir_op_unpack_64_2x32_split_y: {
2333 LValues &newDefs = convert(&insn->dest);
2334 mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
2335 break;
2336 }
2337 // special instructions
2338 case nir_op_fsign:
2339 case nir_op_isign: {
2340 DEFAULT_CHECKS;
2341 DataType iType;
2342 if (::isFloatType(dType))
2343 iType = TYPE_F32;
2344 else
2345 iType = TYPE_S32;
2346
2347 LValues &newDefs = convert(&insn->dest);
2348 LValue *val0 = getScratch();
2349 LValue *val1 = getScratch();
2350 mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
2351 mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
2352
2353 if (dType == TYPE_F64) {
2354 mkOp2(OP_SUB, iType, val0, val0, val1);
2355 mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
2356 } else if (dType == TYPE_S64 || dType == TYPE_U64) {
2357 mkOp2(OP_SUB, iType, val0, val1, val0);
2358 mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
2359 mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
2360 } else if (::isFloatType(dType))
2361 mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
2362 else
2363 mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
2364 break;
2365 }
2366 case nir_op_fcsel:
2367 case nir_op_b32csel: {
2368 DEFAULT_CHECKS;
2369 LValues &newDefs = convert(&insn->dest);
2370 mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
2371 break;
2372 }
2373 case nir_op_ibitfield_extract:
2374 case nir_op_ubitfield_extract: {
2375 DEFAULT_CHECKS;
2376 Value *tmp = getSSA();
2377 LValues &newDefs = convert(&insn->dest);
2378 mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2379 mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
2380 break;
2381 }
2382 case nir_op_bfm: {
2383 DEFAULT_CHECKS;
2384 LValues &newDefs = convert(&insn->dest);
2385 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2386 break;
2387 }
2388 case nir_op_bitfield_insert: {
2389 DEFAULT_CHECKS;
2390 LValues &newDefs = convert(&insn->dest);
2391 LValue *temp = getSSA();
2392 mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
2393 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
2394 break;
2395 }
2396 case nir_op_bit_count: {
2397 DEFAULT_CHECKS;
2398 LValues &newDefs = convert(&insn->dest);
2399 mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
2400 break;
2401 }
2402 case nir_op_bitfield_reverse: {
2403 DEFAULT_CHECKS;
2404 LValues &newDefs = convert(&insn->dest);
2405 mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2406 break;
2407 }
2408 case nir_op_find_lsb: {
2409 DEFAULT_CHECKS;
2410 LValues &newDefs = convert(&insn->dest);
2411 Value *tmp = getSSA();
2412 mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2413 mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2414 break;
2415 }
2416 // boolean conversions
2417 case nir_op_b2f32: {
2418 DEFAULT_CHECKS;
2419 LValues &newDefs = convert(&insn->dest);
2420 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
2421 break;
2422 }
2423 case nir_op_b2f64: {
2424 DEFAULT_CHECKS;
2425 LValues &newDefs = convert(&insn->dest);
2426 Value *tmp = getSSA(4);
2427 mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
2428 mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
2429 break;
2430 }
2431 case nir_op_f2b32:
2432 case nir_op_i2b32: {
2433 DEFAULT_CHECKS;
2434 LValues &newDefs = convert(&insn->dest);
2435 Value *src1;
2436 if (typeSizeof(sTypes[0]) == 8) {
2437 src1 = loadImm(getSSA(8), 0.0);
2438 } else {
2439 src1 = zero;
2440 }
2441 CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
2442 mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
2443 break;
2444 }
2445 case nir_op_b2i32: {
2446 DEFAULT_CHECKS;
2447 LValues &newDefs = convert(&insn->dest);
2448 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
2449 break;
2450 }
2451 case nir_op_b2i64: {
2452 DEFAULT_CHECKS;
2453 LValues &newDefs = convert(&insn->dest);
2454 LValue *def = getScratch();
2455 mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
2456 mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
2457 break;
2458 }
2459 default:
2460 ERROR("unknown nir_op %s\n", info.name);
2461 return false;
2462 }
2463
2464 if (!oldPos) {
2465 oldPos = this->bb->getEntry();
2466 oldPos->precise = insn->exact;
2467 }
2468
2469 if (unlikely(!oldPos))
2470 return true;
2471
2472 while (oldPos->next) {
2473 oldPos = oldPos->next;
2474 oldPos->precise = insn->exact;
2475 }
2476 oldPos->saturate = insn->dest.saturate;
2477
2478 return true;
2479 }
2480 #undef DEFAULT_CHECKS
2481
2482 bool
2483 Converter::visit(nir_ssa_undef_instr *insn)
2484 {
2485 LValues &newDefs = convert(&insn->def);
2486 for (uint8_t i = 0u; i < insn->def.num_components; ++i) {
2487 mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
2488 }
2489 return true;
2490 }
2491
2492 #define CASE_SAMPLER(ty) \
2493 case GLSL_SAMPLER_DIM_ ## ty : \
2494 if (isArray && !isShadow) \
2495 return TEX_TARGET_ ## ty ## _ARRAY; \
2496 else if (!isArray && isShadow) \
2497 return TEX_TARGET_## ty ## _SHADOW; \
2498 else if (isArray && isShadow) \
2499 return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
2500 else \
2501 return TEX_TARGET_ ## ty
2502
2503 TexTarget
2504 Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
2505 {
2506 switch (dim) {
2507 CASE_SAMPLER(1D);
2508 CASE_SAMPLER(2D);
2509 CASE_SAMPLER(CUBE);
2510 case GLSL_SAMPLER_DIM_3D:
2511 return TEX_TARGET_3D;
2512 case GLSL_SAMPLER_DIM_MS:
2513 if (isArray)
2514 return TEX_TARGET_2D_MS_ARRAY;
2515 return TEX_TARGET_2D_MS;
2516 case GLSL_SAMPLER_DIM_RECT:
2517 if (isShadow)
2518 return TEX_TARGET_RECT_SHADOW;
2519 return TEX_TARGET_RECT;
2520 case GLSL_SAMPLER_DIM_BUF:
2521 return TEX_TARGET_BUFFER;
2522 case GLSL_SAMPLER_DIM_EXTERNAL:
2523 return TEX_TARGET_2D;
2524 default:
2525 ERROR("unknown glsl_sampler_dim %u\n", dim);
2526 assert(false);
2527 return TEX_TARGET_COUNT;
2528 }
2529 }
2530 #undef CASE_SAMPLER
2531
2532 Value*
2533 Converter::applyProjection(Value *src, Value *proj)
2534 {
2535 if (!proj)
2536 return src;
2537 return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj);
2538 }
2539
2540 bool
2541 Converter::visit(nir_tex_instr *insn)
2542 {
2543 switch (insn->op) {
2544 case nir_texop_lod:
2545 case nir_texop_query_levels:
2546 case nir_texop_tex:
2547 case nir_texop_texture_samples:
2548 case nir_texop_tg4:
2549 case nir_texop_txb:
2550 case nir_texop_txd:
2551 case nir_texop_txf:
2552 case nir_texop_txf_ms:
2553 case nir_texop_txl:
2554 case nir_texop_txs: {
2555 LValues &newDefs = convert(&insn->dest);
2556 std::vector<Value*> srcs;
2557 std::vector<Value*> defs;
2558 std::vector<nir_src*> offsets;
2559 uint8_t mask = 0;
2560 bool lz = false;
2561 Value *proj = NULL;
2562 TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow);
2563 operation op = getOperation(insn->op);
2564
2565 int r, s;
2566 int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias);
2567 int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator);
2568 int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord);
2569 int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx);
2570 int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy);
2571 int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index);
2572 int lodIdx = nir_tex_instr_src_index(insn, nir_tex_src_lod);
2573 int offsetIdx = nir_tex_instr_src_index(insn, nir_tex_src_offset);
2574 int projIdx = nir_tex_instr_src_index(insn, nir_tex_src_projector);
2575 int sampOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_offset);
2576 int texOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_offset);
2577
2578 if (projIdx != -1)
2579 proj = mkOp1v(OP_RCP, TYPE_F32, getScratch(), getSrc(&insn->src[projIdx].src, 0));
2580
2581 srcs.resize(insn->coord_components);
2582 for (uint8_t i = 0u; i < insn->coord_components; ++i)
2583 srcs[i] = applyProjection(getSrc(&insn->src[coordsIdx].src, i), proj);
2584
2585 // sometimes we get less args than target.getArgCount, but codegen expects the latter
2586 if (insn->coord_components) {
2587 uint32_t argCount = target.getArgCount();
2588
2589 if (target.isMS())
2590 argCount -= 1;
2591
2592 for (uint32_t i = 0u; i < (argCount - insn->coord_components); ++i)
2593 srcs.push_back(getSSA());
2594 }
2595
2596 if (insn->op == nir_texop_texture_samples)
2597 srcs.push_back(zero);
2598 else if (!insn->num_srcs)
2599 srcs.push_back(loadImm(NULL, 0));
2600 if (biasIdx != -1)
2601 srcs.push_back(getSrc(&insn->src[biasIdx].src, 0));
2602 if (lodIdx != -1)
2603 srcs.push_back(getSrc(&insn->src[lodIdx].src, 0));
2604 else if (op == OP_TXF)
2605 lz = true;
2606 if (msIdx != -1)
2607 srcs.push_back(getSrc(&insn->src[msIdx].src, 0));
2608 if (offsetIdx != -1)
2609 offsets.push_back(&insn->src[offsetIdx].src);
2610 if (compIdx != -1)
2611 srcs.push_back(applyProjection(getSrc(&insn->src[compIdx].src, 0), proj));
2612 if (texOffIdx != -1) {
2613 srcs.push_back(getSrc(&insn->src[texOffIdx].src, 0));
2614 texOffIdx = srcs.size() - 1;
2615 }
2616 if (sampOffIdx != -1) {
2617 srcs.push_back(getSrc(&insn->src[sampOffIdx].src, 0));
2618 sampOffIdx = srcs.size() - 1;
2619 }
2620
2621 r = insn->texture_index;
2622 s = insn->sampler_index;
2623
2624 defs.resize(newDefs.size());
2625 for (uint8_t d = 0u; d < newDefs.size(); ++d) {
2626 defs[d] = newDefs[d];
2627 mask |= 1 << d;
2628 }
2629 if (target.isMS() || (op == OP_TEX && prog->getType() != Program::TYPE_FRAGMENT))
2630 lz = true;
2631
2632 TexInstruction *texi = mkTex(op, target.getEnum(), r, s, defs, srcs);
2633 texi->tex.levelZero = lz;
2634 texi->tex.mask = mask;
2635
2636 if (texOffIdx != -1)
2637 texi->tex.rIndirectSrc = texOffIdx;
2638 if (sampOffIdx != -1)
2639 texi->tex.sIndirectSrc = sampOffIdx;
2640
2641 switch (insn->op) {
2642 case nir_texop_tg4:
2643 if (!target.isShadow())
2644 texi->tex.gatherComp = insn->component;
2645 break;
2646 case nir_texop_txs:
2647 texi->tex.query = TXQ_DIMS;
2648 break;
2649 case nir_texop_texture_samples:
2650 texi->tex.mask = 0x4;
2651 texi->tex.query = TXQ_TYPE;
2652 break;
2653 case nir_texop_query_levels:
2654 texi->tex.mask = 0x8;
2655 texi->tex.query = TXQ_DIMS;
2656 break;
2657 default:
2658 break;
2659 }
2660
2661 texi->tex.useOffsets = offsets.size();
2662 if (texi->tex.useOffsets) {
2663 for (uint8_t s = 0; s < texi->tex.useOffsets; ++s) {
2664 for (uint32_t c = 0u; c < 3; ++c) {
2665 uint8_t s2 = std::min(c, target.getDim() - 1);
2666 texi->offset[s][c].set(getSrc(offsets[s], s2));
2667 texi->offset[s][c].setInsn(texi);
2668 }
2669 }
2670 }
2671
2672 if (ddxIdx != -1 && ddyIdx != -1) {
2673 for (uint8_t c = 0u; c < target.getDim() + target.isCube(); ++c) {
2674 texi->dPdx[c].set(getSrc(&insn->src[ddxIdx].src, c));
2675 texi->dPdy[c].set(getSrc(&insn->src[ddyIdx].src, c));
2676 }
2677 }
2678
2679 break;
2680 }
2681 default:
2682 ERROR("unknown nir_texop %u\n", insn->op);
2683 return false;
2684 }
2685 return true;
2686 }
2687
2688 bool
2689 Converter::run()
2690 {
2691 bool progress;
2692
2693 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
2694 nir_print_shader(nir, stderr);
2695
2696 struct nir_lower_subgroups_options subgroup_options = {
2697 .subgroup_size = 32,
2698 .ballot_bit_size = 32,
2699 };
2700
2701 NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
2702 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
2703 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
2704 NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
2705 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
2706 NIR_PASS_V(nir, nir_lower_alu_to_scalar);
2707 NIR_PASS_V(nir, nir_lower_phis_to_scalar);
2708
2709 do {
2710 progress = false;
2711 NIR_PASS(progress, nir, nir_copy_prop);
2712 NIR_PASS(progress, nir, nir_opt_remove_phis);
2713 NIR_PASS(progress, nir, nir_opt_trivial_continues);
2714 NIR_PASS(progress, nir, nir_opt_cse);
2715 NIR_PASS(progress, nir, nir_opt_algebraic);
2716 NIR_PASS(progress, nir, nir_opt_constant_folding);
2717 NIR_PASS(progress, nir, nir_copy_prop);
2718 NIR_PASS(progress, nir, nir_opt_dce);
2719 NIR_PASS(progress, nir, nir_opt_dead_cf);
2720 } while (progress);
2721
2722 NIR_PASS_V(nir, nir_lower_bool_to_int32);
2723 NIR_PASS_V(nir, nir_lower_locals_to_regs);
2724 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
2725 NIR_PASS_V(nir, nir_convert_from_ssa, true);
2726
2727 // Garbage collect dead instructions
2728 nir_sweep(nir);
2729
2730 if (!parseNIR()) {
2731 ERROR("Couldn't prase NIR!\n");
2732 return false;
2733 }
2734
2735 if (!assignSlots()) {
2736 ERROR("Couldn't assign slots!\n");
2737 return false;
2738 }
2739
2740 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
2741 nir_print_shader(nir, stderr);
2742
2743 nir_foreach_function(function, nir) {
2744 if (!visit(function))
2745 return false;
2746 }
2747
2748 return true;
2749 }
2750
2751 } // unnamed namespace
2752
2753 namespace nv50_ir {
2754
2755 bool
2756 Program::makeFromNIR(struct nv50_ir_prog_info *info)
2757 {
2758 nir_shader *nir = (nir_shader*)info->bin.source;
2759 Converter converter(this, nir, info);
2760 bool result = converter.run();
2761 if (!result)
2762 return result;
2763 LoweringHelper lowering;
2764 lowering.run(this);
2765 tlsSize = info->bin.tlsSpace;
2766 return result;
2767 }
2768
2769 } // namespace nv50_ir