nv50/ir/nir: implement variable indexing
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_from_nir.cpp
1 /*
2 * Copyright 2017 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Karol Herbst <kherbst@redhat.com>
23 */
24
25 #include "compiler/nir/nir.h"
26
27 #include "util/u_debug.h"
28
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_from_common.h"
31 #include "codegen/nv50_ir_lowering_helper.h"
32 #include "codegen/nv50_ir_util.h"
33
34 #if __cplusplus >= 201103L
35 #include <unordered_map>
36 #else
37 #include <tr1/unordered_map>
38 #endif
39 #include <vector>
40
41 namespace {
42
43 #if __cplusplus >= 201103L
44 using std::hash;
45 using std::unordered_map;
46 #else
47 using std::tr1::hash;
48 using std::tr1::unordered_map;
49 #endif
50
51 using namespace nv50_ir;
52
53 int
54 type_size(const struct glsl_type *type)
55 {
56 return glsl_count_attribute_slots(type, false);
57 }
58
59 class Converter : public ConverterCommon
60 {
61 public:
62 Converter(Program *, nir_shader *, nv50_ir_prog_info *);
63
64 bool run();
65 private:
66 typedef std::vector<LValue*> LValues;
67 typedef unordered_map<unsigned, LValues> NirDefMap;
68 typedef unordered_map<unsigned, uint32_t> NirArrayLMemOffsets;
69 typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
70
71 TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
72 LValues& convert(nir_alu_dest *);
73 BasicBlock* convert(nir_block *);
74 LValues& convert(nir_dest *);
75 SVSemantic convert(nir_intrinsic_op);
76 LValues& convert(nir_register *);
77 LValues& convert(nir_ssa_def *);
78
79 Value* getSrc(nir_alu_src *, uint8_t component = 0);
80 Value* getSrc(nir_register *, uint8_t);
81 Value* getSrc(nir_src *, uint8_t, bool indirect = false);
82 Value* getSrc(nir_ssa_def *, uint8_t);
83
84 // returned value is the constant part of the given source (either the
85 // nir_src or the selected source component of an intrinsic). Even though
86 // this is mostly an optimization to be able to skip indirects in a few
87 // cases, sometimes we require immediate values or set some fileds on
88 // instructions (e.g. tex) in order for codegen to consume those.
89 // If the found value has not a constant part, the Value gets returned
90 // through the Value parameter.
91 uint32_t getIndirect(nir_src *, uint8_t, Value *&);
92 uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&);
93
94 uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
95
96 void setInterpolate(nv50_ir_varying *,
97 uint8_t,
98 bool centroid,
99 unsigned semantics);
100
101 Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
102 uint8_t c, Value *indirect0 = NULL,
103 Value *indirect1 = NULL, bool patch = false);
104 void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
105 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
106 Value *indirect1 = NULL);
107
108 bool isFloatType(nir_alu_type);
109 bool isSignedType(nir_alu_type);
110 bool isResultFloat(nir_op);
111 bool isResultSigned(nir_op);
112
113 DataType getDType(nir_alu_instr *);
114 DataType getDType(nir_intrinsic_instr *);
115 DataType getDType(nir_op, uint8_t);
116
117 std::vector<DataType> getSTypes(nir_alu_instr *);
118 DataType getSType(nir_src &, bool isFloat, bool isSigned);
119
120 operation getOperation(nir_intrinsic_op);
121 operation getOperation(nir_op);
122 operation getOperation(nir_texop);
123 operation preOperationNeeded(nir_op);
124
125 int getSubOp(nir_intrinsic_op);
126 int getSubOp(nir_op);
127
128 CondCode getCondCode(nir_op);
129
130 bool assignSlots();
131 bool parseNIR();
132
133 bool visit(nir_alu_instr *);
134 bool visit(nir_block *);
135 bool visit(nir_cf_node *);
136 bool visit(nir_function *);
137 bool visit(nir_if *);
138 bool visit(nir_instr *);
139 bool visit(nir_intrinsic_instr *);
140 bool visit(nir_jump_instr *);
141 bool visit(nir_load_const_instr*);
142 bool visit(nir_loop *);
143 bool visit(nir_ssa_undef_instr *);
144 bool visit(nir_tex_instr *);
145
146 // tex stuff
147 Value* applyProjection(Value *src, Value *proj);
148
149 nir_shader *nir;
150
151 NirDefMap ssaDefs;
152 NirDefMap regDefs;
153 NirArrayLMemOffsets regToLmemOffset;
154 NirBlockMap blocks;
155 unsigned int curLoopDepth;
156
157 BasicBlock *exit;
158 Value *zero;
159
160 int clipVertexOutput;
161
162 union {
163 struct {
164 Value *position;
165 } fp;
166 };
167 };
168
169 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
170 : ConverterCommon(prog, info),
171 nir(nir),
172 curLoopDepth(0),
173 clipVertexOutput(-1)
174 {
175 zero = mkImm((uint32_t)0);
176 }
177
178 BasicBlock *
179 Converter::convert(nir_block *block)
180 {
181 NirBlockMap::iterator it = blocks.find(block->index);
182 if (it != blocks.end())
183 return it->second;
184
185 BasicBlock *bb = new BasicBlock(func);
186 blocks[block->index] = bb;
187 return bb;
188 }
189
190 bool
191 Converter::isFloatType(nir_alu_type type)
192 {
193 return nir_alu_type_get_base_type(type) == nir_type_float;
194 }
195
196 bool
197 Converter::isSignedType(nir_alu_type type)
198 {
199 return nir_alu_type_get_base_type(type) == nir_type_int;
200 }
201
202 bool
203 Converter::isResultFloat(nir_op op)
204 {
205 const nir_op_info &info = nir_op_infos[op];
206 if (info.output_type != nir_type_invalid)
207 return isFloatType(info.output_type);
208
209 ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
210 assert(false);
211 return true;
212 }
213
214 bool
215 Converter::isResultSigned(nir_op op)
216 {
217 switch (op) {
218 // there is no umul and we get wrong results if we treat all muls as signed
219 case nir_op_imul:
220 case nir_op_inot:
221 return false;
222 default:
223 const nir_op_info &info = nir_op_infos[op];
224 if (info.output_type != nir_type_invalid)
225 return isSignedType(info.output_type);
226 ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
227 assert(false);
228 return true;
229 }
230 }
231
232 DataType
233 Converter::getDType(nir_alu_instr *insn)
234 {
235 if (insn->dest.dest.is_ssa)
236 return getDType(insn->op, insn->dest.dest.ssa.bit_size);
237 else
238 return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
239 }
240
241 DataType
242 Converter::getDType(nir_intrinsic_instr *insn)
243 {
244 if (insn->dest.is_ssa)
245 return typeOfSize(insn->dest.ssa.bit_size / 8, false, false);
246 else
247 return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false);
248 }
249
250 DataType
251 Converter::getDType(nir_op op, uint8_t bitSize)
252 {
253 DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
254 if (ty == TYPE_NONE) {
255 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
256 assert(false);
257 }
258 return ty;
259 }
260
261 std::vector<DataType>
262 Converter::getSTypes(nir_alu_instr *insn)
263 {
264 const nir_op_info &info = nir_op_infos[insn->op];
265 std::vector<DataType> res(info.num_inputs);
266
267 for (uint8_t i = 0; i < info.num_inputs; ++i) {
268 if (info.input_types[i] != nir_type_invalid) {
269 res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
270 } else {
271 ERROR("getSType not implemented for %s idx %u\n", info.name, i);
272 assert(false);
273 res[i] = TYPE_NONE;
274 break;
275 }
276 }
277
278 return res;
279 }
280
281 DataType
282 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
283 {
284 uint8_t bitSize;
285 if (src.is_ssa)
286 bitSize = src.ssa->bit_size;
287 else
288 bitSize = src.reg.reg->bit_size;
289
290 DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
291 if (ty == TYPE_NONE) {
292 const char *str;
293 if (isFloat)
294 str = "float";
295 else if (isSigned)
296 str = "int";
297 else
298 str = "uint";
299 ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
300 assert(false);
301 }
302 return ty;
303 }
304
305 operation
306 Converter::getOperation(nir_op op)
307 {
308 switch (op) {
309 // basic ops with float and int variants
310 case nir_op_fabs:
311 case nir_op_iabs:
312 return OP_ABS;
313 case nir_op_fadd:
314 case nir_op_iadd:
315 return OP_ADD;
316 case nir_op_fand:
317 case nir_op_iand:
318 return OP_AND;
319 case nir_op_ifind_msb:
320 case nir_op_ufind_msb:
321 return OP_BFIND;
322 case nir_op_fceil:
323 return OP_CEIL;
324 case nir_op_fcos:
325 return OP_COS;
326 case nir_op_f2f32:
327 case nir_op_f2f64:
328 case nir_op_f2i32:
329 case nir_op_f2i64:
330 case nir_op_f2u32:
331 case nir_op_f2u64:
332 case nir_op_i2f32:
333 case nir_op_i2f64:
334 case nir_op_i2i32:
335 case nir_op_i2i64:
336 case nir_op_u2f32:
337 case nir_op_u2f64:
338 case nir_op_u2u32:
339 case nir_op_u2u64:
340 return OP_CVT;
341 case nir_op_fddx:
342 case nir_op_fddx_coarse:
343 case nir_op_fddx_fine:
344 return OP_DFDX;
345 case nir_op_fddy:
346 case nir_op_fddy_coarse:
347 case nir_op_fddy_fine:
348 return OP_DFDY;
349 case nir_op_fdiv:
350 case nir_op_idiv:
351 case nir_op_udiv:
352 return OP_DIV;
353 case nir_op_fexp2:
354 return OP_EX2;
355 case nir_op_ffloor:
356 return OP_FLOOR;
357 case nir_op_ffma:
358 return OP_FMA;
359 case nir_op_flog2:
360 return OP_LG2;
361 case nir_op_fmax:
362 case nir_op_imax:
363 case nir_op_umax:
364 return OP_MAX;
365 case nir_op_pack_64_2x32_split:
366 return OP_MERGE;
367 case nir_op_fmin:
368 case nir_op_imin:
369 case nir_op_umin:
370 return OP_MIN;
371 case nir_op_fmod:
372 case nir_op_imod:
373 case nir_op_umod:
374 case nir_op_frem:
375 case nir_op_irem:
376 return OP_MOD;
377 case nir_op_fmul:
378 case nir_op_imul:
379 case nir_op_imul_high:
380 case nir_op_umul_high:
381 return OP_MUL;
382 case nir_op_fneg:
383 case nir_op_ineg:
384 return OP_NEG;
385 case nir_op_fnot:
386 case nir_op_inot:
387 return OP_NOT;
388 case nir_op_for:
389 case nir_op_ior:
390 return OP_OR;
391 case nir_op_fpow:
392 return OP_POW;
393 case nir_op_frcp:
394 return OP_RCP;
395 case nir_op_frsq:
396 return OP_RSQ;
397 case nir_op_fsat:
398 return OP_SAT;
399 case nir_op_feq32:
400 case nir_op_ieq32:
401 case nir_op_fge32:
402 case nir_op_ige32:
403 case nir_op_uge32:
404 case nir_op_flt32:
405 case nir_op_ilt32:
406 case nir_op_ult32:
407 case nir_op_fne32:
408 case nir_op_ine32:
409 return OP_SET;
410 case nir_op_ishl:
411 return OP_SHL;
412 case nir_op_ishr:
413 case nir_op_ushr:
414 return OP_SHR;
415 case nir_op_fsin:
416 return OP_SIN;
417 case nir_op_fsqrt:
418 return OP_SQRT;
419 case nir_op_fsub:
420 case nir_op_isub:
421 return OP_SUB;
422 case nir_op_ftrunc:
423 return OP_TRUNC;
424 case nir_op_fxor:
425 case nir_op_ixor:
426 return OP_XOR;
427 default:
428 ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
429 assert(false);
430 return OP_NOP;
431 }
432 }
433
434 operation
435 Converter::getOperation(nir_texop op)
436 {
437 switch (op) {
438 case nir_texop_tex:
439 return OP_TEX;
440 case nir_texop_lod:
441 return OP_TXLQ;
442 case nir_texop_txb:
443 return OP_TXB;
444 case nir_texop_txd:
445 return OP_TXD;
446 case nir_texop_txf:
447 case nir_texop_txf_ms:
448 return OP_TXF;
449 case nir_texop_tg4:
450 return OP_TXG;
451 case nir_texop_txl:
452 return OP_TXL;
453 case nir_texop_query_levels:
454 case nir_texop_texture_samples:
455 case nir_texop_txs:
456 return OP_TXQ;
457 default:
458 ERROR("couldn't get operation for nir_texop %u\n", op);
459 assert(false);
460 return OP_NOP;
461 }
462 }
463
464 operation
465 Converter::getOperation(nir_intrinsic_op op)
466 {
467 switch (op) {
468 default:
469 ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
470 assert(false);
471 return OP_NOP;
472 }
473 }
474
475 operation
476 Converter::preOperationNeeded(nir_op op)
477 {
478 switch (op) {
479 case nir_op_fcos:
480 case nir_op_fsin:
481 return OP_PRESIN;
482 default:
483 return OP_NOP;
484 }
485 }
486
487 int
488 Converter::getSubOp(nir_op op)
489 {
490 switch (op) {
491 case nir_op_imul_high:
492 case nir_op_umul_high:
493 return NV50_IR_SUBOP_MUL_HIGH;
494 default:
495 return 0;
496 }
497 }
498
499 int
500 Converter::getSubOp(nir_intrinsic_op op)
501 {
502 switch (op) {
503 case nir_intrinsic_vote_all:
504 return NV50_IR_SUBOP_VOTE_ALL;
505 case nir_intrinsic_vote_any:
506 return NV50_IR_SUBOP_VOTE_ANY;
507 case nir_intrinsic_vote_ieq:
508 return NV50_IR_SUBOP_VOTE_UNI;
509 default:
510 return 0;
511 }
512 }
513
514 CondCode
515 Converter::getCondCode(nir_op op)
516 {
517 switch (op) {
518 case nir_op_feq32:
519 case nir_op_ieq32:
520 return CC_EQ;
521 case nir_op_fge32:
522 case nir_op_ige32:
523 case nir_op_uge32:
524 return CC_GE;
525 case nir_op_flt32:
526 case nir_op_ilt32:
527 case nir_op_ult32:
528 return CC_LT;
529 case nir_op_fne32:
530 return CC_NEU;
531 case nir_op_ine32:
532 return CC_NE;
533 default:
534 ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
535 assert(false);
536 return CC_FL;
537 }
538 }
539
540 Converter::LValues&
541 Converter::convert(nir_alu_dest *dest)
542 {
543 return convert(&dest->dest);
544 }
545
546 Converter::LValues&
547 Converter::convert(nir_dest *dest)
548 {
549 if (dest->is_ssa)
550 return convert(&dest->ssa);
551 if (dest->reg.indirect) {
552 ERROR("no support for indirects.");
553 assert(false);
554 }
555 return convert(dest->reg.reg);
556 }
557
558 Converter::LValues&
559 Converter::convert(nir_register *reg)
560 {
561 NirDefMap::iterator it = regDefs.find(reg->index);
562 if (it != regDefs.end())
563 return it->second;
564
565 LValues newDef(reg->num_components);
566 for (uint8_t i = 0; i < reg->num_components; i++)
567 newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
568 return regDefs[reg->index] = newDef;
569 }
570
571 Converter::LValues&
572 Converter::convert(nir_ssa_def *def)
573 {
574 NirDefMap::iterator it = ssaDefs.find(def->index);
575 if (it != ssaDefs.end())
576 return it->second;
577
578 LValues newDef(def->num_components);
579 for (uint8_t i = 0; i < def->num_components; i++)
580 newDef[i] = getSSA(std::max(4, def->bit_size / 8));
581 return ssaDefs[def->index] = newDef;
582 }
583
584 Value*
585 Converter::getSrc(nir_alu_src *src, uint8_t component)
586 {
587 if (src->abs || src->negate) {
588 ERROR("modifiers currently not supported on nir_alu_src\n");
589 assert(false);
590 }
591 return getSrc(&src->src, src->swizzle[component]);
592 }
593
594 Value*
595 Converter::getSrc(nir_register *reg, uint8_t idx)
596 {
597 NirDefMap::iterator it = regDefs.find(reg->index);
598 if (it == regDefs.end())
599 return convert(reg)[idx];
600 return it->second[idx];
601 }
602
603 Value*
604 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
605 {
606 if (src->is_ssa)
607 return getSrc(src->ssa, idx);
608
609 if (src->reg.indirect) {
610 if (indirect)
611 return getSrc(src->reg.indirect, idx);
612 ERROR("no support for indirects.");
613 assert(false);
614 return NULL;
615 }
616
617 return getSrc(src->reg.reg, idx);
618 }
619
620 Value*
621 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
622 {
623 NirDefMap::iterator it = ssaDefs.find(src->index);
624 if (it == ssaDefs.end()) {
625 ERROR("SSA value %u not found\n", src->index);
626 assert(false);
627 return NULL;
628 }
629 return it->second[idx];
630 }
631
632 uint32_t
633 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
634 {
635 nir_const_value *offset = nir_src_as_const_value(*src);
636
637 if (offset) {
638 indirect = NULL;
639 return offset->u32[0];
640 }
641
642 indirect = getSrc(src, idx, true);
643 return 0;
644 }
645
646 uint32_t
647 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect)
648 {
649 int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
650 if (indirect)
651 indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
652 return idx;
653 }
654
655 static void
656 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
657 {
658 assert(name && index);
659
660 if (slot >= VERT_ATTRIB_MAX) {
661 ERROR("invalid varying slot %u\n", slot);
662 assert(false);
663 return;
664 }
665
666 if (slot >= VERT_ATTRIB_GENERIC0 &&
667 slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
668 *name = TGSI_SEMANTIC_GENERIC;
669 *index = slot - VERT_ATTRIB_GENERIC0;
670 return;
671 }
672
673 if (slot >= VERT_ATTRIB_TEX0 &&
674 slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
675 *name = TGSI_SEMANTIC_TEXCOORD;
676 *index = slot - VERT_ATTRIB_TEX0;
677 return;
678 }
679
680 switch (slot) {
681 case VERT_ATTRIB_COLOR0:
682 *name = TGSI_SEMANTIC_COLOR;
683 *index = 0;
684 break;
685 case VERT_ATTRIB_COLOR1:
686 *name = TGSI_SEMANTIC_COLOR;
687 *index = 1;
688 break;
689 case VERT_ATTRIB_EDGEFLAG:
690 *name = TGSI_SEMANTIC_EDGEFLAG;
691 *index = 0;
692 break;
693 case VERT_ATTRIB_FOG:
694 *name = TGSI_SEMANTIC_FOG;
695 *index = 0;
696 break;
697 case VERT_ATTRIB_NORMAL:
698 *name = TGSI_SEMANTIC_NORMAL;
699 *index = 0;
700 break;
701 case VERT_ATTRIB_POS:
702 *name = TGSI_SEMANTIC_POSITION;
703 *index = 0;
704 break;
705 case VERT_ATTRIB_POINT_SIZE:
706 *name = TGSI_SEMANTIC_PSIZE;
707 *index = 0;
708 break;
709 default:
710 ERROR("unknown vert attrib slot %u\n", slot);
711 assert(false);
712 break;
713 }
714 }
715
716 static void
717 varying_slot_to_tgsi_semantic(gl_varying_slot slot, unsigned *name, unsigned *index)
718 {
719 assert(name && index);
720
721 if (slot >= VARYING_SLOT_TESS_MAX) {
722 ERROR("invalid varying slot %u\n", slot);
723 assert(false);
724 return;
725 }
726
727 if (slot >= VARYING_SLOT_PATCH0) {
728 *name = TGSI_SEMANTIC_PATCH;
729 *index = slot - VARYING_SLOT_PATCH0;
730 return;
731 }
732
733 if (slot >= VARYING_SLOT_VAR0) {
734 *name = TGSI_SEMANTIC_GENERIC;
735 *index = slot - VARYING_SLOT_VAR0;
736 return;
737 }
738
739 if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
740 *name = TGSI_SEMANTIC_TEXCOORD;
741 *index = slot - VARYING_SLOT_TEX0;
742 return;
743 }
744
745 switch (slot) {
746 case VARYING_SLOT_BFC0:
747 *name = TGSI_SEMANTIC_BCOLOR;
748 *index = 0;
749 break;
750 case VARYING_SLOT_BFC1:
751 *name = TGSI_SEMANTIC_BCOLOR;
752 *index = 1;
753 break;
754 case VARYING_SLOT_CLIP_DIST0:
755 *name = TGSI_SEMANTIC_CLIPDIST;
756 *index = 0;
757 break;
758 case VARYING_SLOT_CLIP_DIST1:
759 *name = TGSI_SEMANTIC_CLIPDIST;
760 *index = 1;
761 break;
762 case VARYING_SLOT_CLIP_VERTEX:
763 *name = TGSI_SEMANTIC_CLIPVERTEX;
764 *index = 0;
765 break;
766 case VARYING_SLOT_COL0:
767 *name = TGSI_SEMANTIC_COLOR;
768 *index = 0;
769 break;
770 case VARYING_SLOT_COL1:
771 *name = TGSI_SEMANTIC_COLOR;
772 *index = 1;
773 break;
774 case VARYING_SLOT_EDGE:
775 *name = TGSI_SEMANTIC_EDGEFLAG;
776 *index = 0;
777 break;
778 case VARYING_SLOT_FACE:
779 *name = TGSI_SEMANTIC_FACE;
780 *index = 0;
781 break;
782 case VARYING_SLOT_FOGC:
783 *name = TGSI_SEMANTIC_FOG;
784 *index = 0;
785 break;
786 case VARYING_SLOT_LAYER:
787 *name = TGSI_SEMANTIC_LAYER;
788 *index = 0;
789 break;
790 case VARYING_SLOT_PNTC:
791 *name = TGSI_SEMANTIC_PCOORD;
792 *index = 0;
793 break;
794 case VARYING_SLOT_POS:
795 *name = TGSI_SEMANTIC_POSITION;
796 *index = 0;
797 break;
798 case VARYING_SLOT_PRIMITIVE_ID:
799 *name = TGSI_SEMANTIC_PRIMID;
800 *index = 0;
801 break;
802 case VARYING_SLOT_PSIZ:
803 *name = TGSI_SEMANTIC_PSIZE;
804 *index = 0;
805 break;
806 case VARYING_SLOT_TESS_LEVEL_INNER:
807 *name = TGSI_SEMANTIC_TESSINNER;
808 *index = 0;
809 break;
810 case VARYING_SLOT_TESS_LEVEL_OUTER:
811 *name = TGSI_SEMANTIC_TESSOUTER;
812 *index = 0;
813 break;
814 case VARYING_SLOT_VIEWPORT:
815 *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
816 *index = 0;
817 break;
818 default:
819 ERROR("unknown varying slot %u\n", slot);
820 assert(false);
821 break;
822 }
823 }
824
825 static void
826 frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
827 {
828 if (slot >= FRAG_RESULT_DATA0) {
829 *name = TGSI_SEMANTIC_COLOR;
830 *index = slot - FRAG_RESULT_COLOR - 2; // intentional
831 return;
832 }
833
834 switch (slot) {
835 case FRAG_RESULT_COLOR:
836 *name = TGSI_SEMANTIC_COLOR;
837 *index = 0;
838 break;
839 case FRAG_RESULT_DEPTH:
840 *name = TGSI_SEMANTIC_POSITION;
841 *index = 0;
842 break;
843 case FRAG_RESULT_SAMPLE_MASK:
844 *name = TGSI_SEMANTIC_SAMPLEMASK;
845 *index = 0;
846 break;
847 default:
848 ERROR("unknown frag result slot %u\n", slot);
849 assert(false);
850 break;
851 }
852 }
853
854 // copy of _mesa_sysval_to_semantic
855 static void
856 system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
857 {
858 *index = 0;
859 switch (val) {
860 // Vertex shader
861 case SYSTEM_VALUE_VERTEX_ID:
862 *name = TGSI_SEMANTIC_VERTEXID;
863 break;
864 case SYSTEM_VALUE_INSTANCE_ID:
865 *name = TGSI_SEMANTIC_INSTANCEID;
866 break;
867 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
868 *name = TGSI_SEMANTIC_VERTEXID_NOBASE;
869 break;
870 case SYSTEM_VALUE_BASE_VERTEX:
871 *name = TGSI_SEMANTIC_BASEVERTEX;
872 break;
873 case SYSTEM_VALUE_BASE_INSTANCE:
874 *name = TGSI_SEMANTIC_BASEINSTANCE;
875 break;
876 case SYSTEM_VALUE_DRAW_ID:
877 *name = TGSI_SEMANTIC_DRAWID;
878 break;
879
880 // Geometry shader
881 case SYSTEM_VALUE_INVOCATION_ID:
882 *name = TGSI_SEMANTIC_INVOCATIONID;
883 break;
884
885 // Fragment shader
886 case SYSTEM_VALUE_FRAG_COORD:
887 *name = TGSI_SEMANTIC_POSITION;
888 break;
889 case SYSTEM_VALUE_FRONT_FACE:
890 *name = TGSI_SEMANTIC_FACE;
891 break;
892 case SYSTEM_VALUE_SAMPLE_ID:
893 *name = TGSI_SEMANTIC_SAMPLEID;
894 break;
895 case SYSTEM_VALUE_SAMPLE_POS:
896 *name = TGSI_SEMANTIC_SAMPLEPOS;
897 break;
898 case SYSTEM_VALUE_SAMPLE_MASK_IN:
899 *name = TGSI_SEMANTIC_SAMPLEMASK;
900 break;
901 case SYSTEM_VALUE_HELPER_INVOCATION:
902 *name = TGSI_SEMANTIC_HELPER_INVOCATION;
903 break;
904
905 // Tessellation shader
906 case SYSTEM_VALUE_TESS_COORD:
907 *name = TGSI_SEMANTIC_TESSCOORD;
908 break;
909 case SYSTEM_VALUE_VERTICES_IN:
910 *name = TGSI_SEMANTIC_VERTICESIN;
911 break;
912 case SYSTEM_VALUE_PRIMITIVE_ID:
913 *name = TGSI_SEMANTIC_PRIMID;
914 break;
915 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
916 *name = TGSI_SEMANTIC_TESSOUTER;
917 break;
918 case SYSTEM_VALUE_TESS_LEVEL_INNER:
919 *name = TGSI_SEMANTIC_TESSINNER;
920 break;
921
922 // Compute shader
923 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
924 *name = TGSI_SEMANTIC_THREAD_ID;
925 break;
926 case SYSTEM_VALUE_WORK_GROUP_ID:
927 *name = TGSI_SEMANTIC_BLOCK_ID;
928 break;
929 case SYSTEM_VALUE_NUM_WORK_GROUPS:
930 *name = TGSI_SEMANTIC_GRID_SIZE;
931 break;
932 case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
933 *name = TGSI_SEMANTIC_BLOCK_SIZE;
934 break;
935
936 // ARB_shader_ballot
937 case SYSTEM_VALUE_SUBGROUP_SIZE:
938 *name = TGSI_SEMANTIC_SUBGROUP_SIZE;
939 break;
940 case SYSTEM_VALUE_SUBGROUP_INVOCATION:
941 *name = TGSI_SEMANTIC_SUBGROUP_INVOCATION;
942 break;
943 case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
944 *name = TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
945 break;
946 case SYSTEM_VALUE_SUBGROUP_GE_MASK:
947 *name = TGSI_SEMANTIC_SUBGROUP_GE_MASK;
948 break;
949 case SYSTEM_VALUE_SUBGROUP_GT_MASK:
950 *name = TGSI_SEMANTIC_SUBGROUP_GT_MASK;
951 break;
952 case SYSTEM_VALUE_SUBGROUP_LE_MASK:
953 *name = TGSI_SEMANTIC_SUBGROUP_LE_MASK;
954 break;
955 case SYSTEM_VALUE_SUBGROUP_LT_MASK:
956 *name = TGSI_SEMANTIC_SUBGROUP_LT_MASK;
957 break;
958
959 default:
960 ERROR("unknown system value %u\n", val);
961 assert(false);
962 break;
963 }
964 }
965
966 void
967 Converter::setInterpolate(nv50_ir_varying *var,
968 uint8_t mode,
969 bool centroid,
970 unsigned semantic)
971 {
972 switch (mode) {
973 case INTERP_MODE_FLAT:
974 var->flat = 1;
975 break;
976 case INTERP_MODE_NONE:
977 if (semantic == TGSI_SEMANTIC_COLOR)
978 var->sc = 1;
979 else if (semantic == TGSI_SEMANTIC_POSITION)
980 var->linear = 1;
981 break;
982 case INTERP_MODE_NOPERSPECTIVE:
983 var->linear = 1;
984 break;
985 case INTERP_MODE_SMOOTH:
986 break;
987 }
988 var->centroid = centroid;
989 }
990
991 static uint16_t
992 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
993 bool input, const nir_variable *var)
994 {
995 if (!type->is_array())
996 return type->count_attribute_slots(false);
997
998 uint16_t slots;
999 switch (stage) {
1000 case Program::TYPE_GEOMETRY:
1001 slots = type->uniform_locations();
1002 if (input)
1003 slots /= info.gs.vertices_in;
1004 break;
1005 case Program::TYPE_TESSELLATION_CONTROL:
1006 case Program::TYPE_TESSELLATION_EVAL:
1007 // remove first dimension
1008 if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
1009 slots = type->uniform_locations();
1010 else
1011 slots = type->fields.array->uniform_locations();
1012 break;
1013 default:
1014 slots = type->count_attribute_slots(false);
1015 break;
1016 }
1017
1018 return slots;
1019 }
1020
1021 bool Converter::assignSlots() {
1022 unsigned name;
1023 unsigned index;
1024
1025 info->io.viewportId = -1;
1026 info->numInputs = 0;
1027
1028 // we have to fixup the uniform locations for arrays
1029 unsigned numImages = 0;
1030 nir_foreach_variable(var, &nir->uniforms) {
1031 const glsl_type *type = var->type;
1032 if (!type->without_array()->is_image())
1033 continue;
1034 var->data.driver_location = numImages;
1035 numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
1036 }
1037
1038 nir_foreach_variable(var, &nir->inputs) {
1039 const glsl_type *type = var->type;
1040 int slot = var->data.location;
1041 uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
1042 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1043 : type->component_slots();
1044 uint32_t frac = var->data.location_frac;
1045 uint32_t vary = var->data.driver_location;
1046
1047 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1048 if (comp > 2)
1049 slots *= 2;
1050 }
1051
1052 assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
1053
1054 switch(prog->getType()) {
1055 case Program::TYPE_FRAGMENT:
1056 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1057 for (uint16_t i = 0; i < slots; ++i) {
1058 setInterpolate(&info->in[vary + i], var->data.interpolation,
1059 var->data.centroid | var->data.sample, name);
1060 }
1061 break;
1062 case Program::TYPE_GEOMETRY:
1063 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1064 break;
1065 case Program::TYPE_TESSELLATION_CONTROL:
1066 case Program::TYPE_TESSELLATION_EVAL:
1067 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1068 if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
1069 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1070 break;
1071 case Program::TYPE_VERTEX:
1072 vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1073 switch (name) {
1074 case TGSI_SEMANTIC_EDGEFLAG:
1075 info->io.edgeFlagIn = vary;
1076 break;
1077 default:
1078 break;
1079 }
1080 break;
1081 default:
1082 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1083 return false;
1084 }
1085
1086 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1087 info->in[vary].id = vary;
1088 info->in[vary].patch = var->data.patch;
1089 info->in[vary].sn = name;
1090 info->in[vary].si = index + i;
1091 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1092 if (i & 0x1)
1093 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1094 else
1095 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1096 else
1097 info->in[vary].mask |= ((1 << comp) - 1) << frac;
1098 }
1099 info->numInputs = std::max<uint8_t>(info->numInputs, vary);
1100 }
1101
1102 info->numOutputs = 0;
1103 nir_foreach_variable(var, &nir->outputs) {
1104 const glsl_type *type = var->type;
1105 int slot = var->data.location;
1106 uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1107 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1108 : type->component_slots();
1109 uint32_t frac = var->data.location_frac;
1110 uint32_t vary = var->data.driver_location;
1111
1112 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1113 if (comp > 2)
1114 slots *= 2;
1115 }
1116
1117 assert(vary < PIPE_MAX_SHADER_OUTPUTS);
1118
1119 switch(prog->getType()) {
1120 case Program::TYPE_FRAGMENT:
1121 frag_result_to_tgsi_semantic((gl_frag_result)slot, &name, &index);
1122 switch (name) {
1123 case TGSI_SEMANTIC_COLOR:
1124 if (!var->data.fb_fetch_output)
1125 info->prop.fp.numColourResults++;
1126 info->prop.fp.separateFragData = true;
1127 // sometimes we get FRAG_RESULT_DATAX with data.index 0
1128 // sometimes we get FRAG_RESULT_DATA0 with data.index X
1129 index = index == 0 ? var->data.index : index;
1130 break;
1131 case TGSI_SEMANTIC_POSITION:
1132 info->io.fragDepth = vary;
1133 info->prop.fp.writesDepth = true;
1134 break;
1135 case TGSI_SEMANTIC_SAMPLEMASK:
1136 info->io.sampleMask = vary;
1137 break;
1138 default:
1139 break;
1140 }
1141 break;
1142 case Program::TYPE_GEOMETRY:
1143 case Program::TYPE_TESSELLATION_CONTROL:
1144 case Program::TYPE_TESSELLATION_EVAL:
1145 case Program::TYPE_VERTEX:
1146 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1147
1148 if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1149 name != TGSI_SEMANTIC_TESSOUTER)
1150 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1151
1152 switch (name) {
1153 case TGSI_SEMANTIC_CLIPDIST:
1154 info->io.genUserClip = -1;
1155 break;
1156 case TGSI_SEMANTIC_CLIPVERTEX:
1157 clipVertexOutput = vary;
1158 break;
1159 case TGSI_SEMANTIC_EDGEFLAG:
1160 info->io.edgeFlagOut = vary;
1161 break;
1162 case TGSI_SEMANTIC_POSITION:
1163 if (clipVertexOutput < 0)
1164 clipVertexOutput = vary;
1165 break;
1166 default:
1167 break;
1168 }
1169 break;
1170 default:
1171 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1172 return false;
1173 }
1174
1175 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1176 info->out[vary].id = vary;
1177 info->out[vary].patch = var->data.patch;
1178 info->out[vary].sn = name;
1179 info->out[vary].si = index + i;
1180 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1181 if (i & 0x1)
1182 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1183 else
1184 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1185 else
1186 info->out[vary].mask |= ((1 << comp) - 1) << frac;
1187
1188 if (nir->info.outputs_read & 1ll << slot)
1189 info->out[vary].oread = 1;
1190 }
1191 info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
1192 }
1193
1194 info->numSysVals = 0;
1195 for (uint8_t i = 0; i < 64; ++i) {
1196 if (!(nir->info.system_values_read & 1ll << i))
1197 continue;
1198
1199 system_val_to_tgsi_semantic(i, &name, &index);
1200 info->sv[info->numSysVals].sn = name;
1201 info->sv[info->numSysVals].si = index;
1202 info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
1203
1204 switch (i) {
1205 case SYSTEM_VALUE_INSTANCE_ID:
1206 info->io.instanceId = info->numSysVals;
1207 break;
1208 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1209 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1210 info->sv[info->numSysVals].patch = 1;
1211 break;
1212 case SYSTEM_VALUE_VERTEX_ID:
1213 info->io.vertexId = info->numSysVals;
1214 break;
1215 default:
1216 break;
1217 }
1218
1219 info->numSysVals += 1;
1220 }
1221
1222 if (info->io.genUserClip > 0) {
1223 info->io.clipDistances = info->io.genUserClip;
1224
1225 const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1226
1227 for (unsigned int n = 0; n < nOut; ++n) {
1228 unsigned int i = info->numOutputs++;
1229 info->out[i].id = i;
1230 info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1231 info->out[i].si = n;
1232 info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1233 }
1234 }
1235
1236 return info->assignSlots(info) == 0;
1237 }
1238
1239 uint32_t
1240 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1241 {
1242 DataType ty;
1243 int offset = nir_intrinsic_component(insn);
1244 bool input;
1245
1246 if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1247 ty = getDType(insn);
1248 else
1249 ty = getSType(insn->src[0], false, false);
1250
1251 switch (insn->intrinsic) {
1252 case nir_intrinsic_load_input:
1253 case nir_intrinsic_load_interpolated_input:
1254 case nir_intrinsic_load_per_vertex_input:
1255 input = true;
1256 break;
1257 case nir_intrinsic_load_output:
1258 case nir_intrinsic_load_per_vertex_output:
1259 case nir_intrinsic_store_output:
1260 case nir_intrinsic_store_per_vertex_output:
1261 input = false;
1262 break;
1263 default:
1264 ERROR("unknown intrinsic in getSlotAddress %s",
1265 nir_intrinsic_infos[insn->intrinsic].name);
1266 input = false;
1267 assert(false);
1268 break;
1269 }
1270
1271 if (typeSizeof(ty) == 8) {
1272 slot *= 2;
1273 slot += offset;
1274 if (slot >= 4) {
1275 idx += 1;
1276 slot -= 4;
1277 }
1278 } else {
1279 slot += offset;
1280 }
1281
1282 assert(slot < 4);
1283 assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
1284 assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
1285
1286 const nv50_ir_varying *vary = input ? info->in : info->out;
1287 return vary[idx].slot[slot] * 4;
1288 }
1289
1290 Instruction *
1291 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1292 uint32_t base, uint8_t c, Value *indirect0,
1293 Value *indirect1, bool patch)
1294 {
1295 unsigned int tySize = typeSizeof(ty);
1296
1297 if (tySize == 8 &&
1298 (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
1299 Value *lo = getSSA();
1300 Value *hi = getSSA();
1301
1302 Instruction *loi =
1303 mkLoad(TYPE_U32, lo,
1304 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1305 indirect0);
1306 loi->setIndirect(0, 1, indirect1);
1307 loi->perPatch = patch;
1308
1309 Instruction *hii =
1310 mkLoad(TYPE_U32, hi,
1311 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1312 indirect0);
1313 hii->setIndirect(0, 1, indirect1);
1314 hii->perPatch = patch;
1315
1316 return mkOp2(OP_MERGE, ty, def, lo, hi);
1317 } else {
1318 Instruction *ld =
1319 mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1320 ld->setIndirect(0, 1, indirect1);
1321 ld->perPatch = patch;
1322 return ld;
1323 }
1324 }
1325
1326 void
1327 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1328 DataType ty, Value *src, uint8_t idx, uint8_t c,
1329 Value *indirect0, Value *indirect1)
1330 {
1331 uint8_t size = typeSizeof(ty);
1332 uint32_t address = getSlotAddress(insn, idx, c);
1333
1334 if (size == 8 && indirect0) {
1335 Value *split[2];
1336 mkSplit(split, 4, src);
1337
1338 if (op == OP_EXPORT) {
1339 split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1340 split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1341 }
1342
1343 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1344 split[0])->perPatch = info->out[idx].patch;
1345 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1346 split[1])->perPatch = info->out[idx].patch;
1347 } else {
1348 if (op == OP_EXPORT)
1349 src = mkMov(getSSA(size), src, ty)->getDef(0);
1350 mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1351 src)->perPatch = info->out[idx].patch;
1352 }
1353 }
1354
1355 bool
1356 Converter::parseNIR()
1357 {
1358 info->bin.tlsSpace = 0;
1359 info->io.clipDistances = nir->info.clip_distance_array_size;
1360 info->io.cullDistances = nir->info.cull_distance_array_size;
1361
1362 switch(prog->getType()) {
1363 case Program::TYPE_COMPUTE:
1364 info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
1365 info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
1366 info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
1367 info->bin.smemSize = nir->info.cs.shared_size;
1368 break;
1369 case Program::TYPE_FRAGMENT:
1370 info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1371 info->prop.fp.persampleInvocation =
1372 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
1373 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1374 info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1375 info->prop.fp.readsSampleLocations =
1376 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1377 info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
1378 info->prop.fp.usesSampleMaskIn =
1379 !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
1380 break;
1381 case Program::TYPE_GEOMETRY:
1382 info->prop.gp.inputPrim = nir->info.gs.input_primitive;
1383 info->prop.gp.instanceCount = nir->info.gs.invocations;
1384 info->prop.gp.maxVertices = nir->info.gs.vertices_out;
1385 info->prop.gp.outputPrim = nir->info.gs.output_primitive;
1386 break;
1387 case Program::TYPE_TESSELLATION_CONTROL:
1388 case Program::TYPE_TESSELLATION_EVAL:
1389 if (nir->info.tess.primitive_mode == GL_ISOLINES)
1390 info->prop.tp.domain = GL_LINES;
1391 else
1392 info->prop.tp.domain = nir->info.tess.primitive_mode;
1393 info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1394 info->prop.tp.outputPrim =
1395 nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1396 info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1397 info->prop.tp.winding = !nir->info.tess.ccw;
1398 break;
1399 case Program::TYPE_VERTEX:
1400 info->prop.vp.usesDrawParameters =
1401 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
1402 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
1403 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
1404 break;
1405 default:
1406 break;
1407 }
1408
1409 return true;
1410 }
1411
1412 bool
1413 Converter::visit(nir_function *function)
1414 {
1415 // we only support emiting the main function for now
1416 assert(!strcmp(function->name, "main"));
1417 assert(function->impl);
1418
1419 // usually the blocks will set everything up, but main is special
1420 BasicBlock *entry = new BasicBlock(prog->main);
1421 exit = new BasicBlock(prog->main);
1422 blocks[nir_start_block(function->impl)->index] = entry;
1423 prog->main->setEntry(entry);
1424 prog->main->setExit(exit);
1425
1426 setPosition(entry, true);
1427
1428 if (info->io.genUserClip > 0) {
1429 for (int c = 0; c < 4; ++c)
1430 clipVtx[c] = getScratch();
1431 }
1432
1433 switch (prog->getType()) {
1434 case Program::TYPE_TESSELLATION_CONTROL:
1435 outBase = mkOp2v(
1436 OP_SUB, TYPE_U32, getSSA(),
1437 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1438 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1439 break;
1440 case Program::TYPE_FRAGMENT: {
1441 Symbol *sv = mkSysVal(SV_POSITION, 3);
1442 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1443 fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1444 break;
1445 }
1446 default:
1447 break;
1448 }
1449
1450 nir_foreach_register(reg, &function->impl->registers) {
1451 if (reg->num_array_elems) {
1452 // TODO: packed variables would be nice, but MemoryOpt fails
1453 // replace 4 with reg->num_components
1454 uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8);
1455 regToLmemOffset[reg->index] = info->bin.tlsSpace;
1456 info->bin.tlsSpace += size;
1457 }
1458 }
1459
1460 nir_index_ssa_defs(function->impl);
1461 foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1462 if (!visit(node))
1463 return false;
1464 }
1465
1466 bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1467 setPosition(exit, true);
1468
1469 if (info->io.genUserClip > 0)
1470 handleUserClipPlanes();
1471
1472 // TODO: for non main function this needs to be a OP_RETURN
1473 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1474 return true;
1475 }
1476
1477 bool
1478 Converter::visit(nir_cf_node *node)
1479 {
1480 switch (node->type) {
1481 case nir_cf_node_block:
1482 return visit(nir_cf_node_as_block(node));
1483 case nir_cf_node_if:
1484 return visit(nir_cf_node_as_if(node));
1485 case nir_cf_node_loop:
1486 return visit(nir_cf_node_as_loop(node));
1487 default:
1488 ERROR("unknown nir_cf_node type %u\n", node->type);
1489 return false;
1490 }
1491 }
1492
1493 bool
1494 Converter::visit(nir_block *block)
1495 {
1496 if (!block->predecessors->entries && block->instr_list.is_empty())
1497 return true;
1498
1499 BasicBlock *bb = convert(block);
1500
1501 setPosition(bb, true);
1502 nir_foreach_instr(insn, block) {
1503 if (!visit(insn))
1504 return false;
1505 }
1506 return true;
1507 }
1508
1509 bool
1510 Converter::visit(nir_if *nif)
1511 {
1512 DataType sType = getSType(nif->condition, false, false);
1513 Value *src = getSrc(&nif->condition, 0);
1514
1515 nir_block *lastThen = nir_if_last_then_block(nif);
1516 nir_block *lastElse = nir_if_last_else_block(nif);
1517
1518 assert(!lastThen->successors[1]);
1519 assert(!lastElse->successors[1]);
1520
1521 BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1522 BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1523
1524 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1525 bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1526
1527 // we only insert joinats, if both nodes end up at the end of the if again.
1528 // the reason for this to not happens are breaks/continues/ret/... which
1529 // have their own handling
1530 if (lastThen->successors[0] == lastElse->successors[0])
1531 bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
1532 CC_ALWAYS, NULL);
1533
1534 mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1535
1536 foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1537 if (!visit(node))
1538 return false;
1539 }
1540 setPosition(convert(lastThen), true);
1541 if (!bb->getExit() ||
1542 !bb->getExit()->asFlow() ||
1543 bb->getExit()->asFlow()->op == OP_JOIN) {
1544 BasicBlock *tailBB = convert(lastThen->successors[0]);
1545 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1546 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1547 }
1548
1549 foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1550 if (!visit(node))
1551 return false;
1552 }
1553 setPosition(convert(lastElse), true);
1554 if (!bb->getExit() ||
1555 !bb->getExit()->asFlow() ||
1556 bb->getExit()->asFlow()->op == OP_JOIN) {
1557 BasicBlock *tailBB = convert(lastElse->successors[0]);
1558 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1559 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1560 }
1561
1562 if (lastThen->successors[0] == lastElse->successors[0]) {
1563 setPosition(convert(lastThen->successors[0]), true);
1564 mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1565 }
1566
1567 return true;
1568 }
1569
1570 bool
1571 Converter::visit(nir_loop *loop)
1572 {
1573 curLoopDepth += 1;
1574 func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1575
1576 BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1577 BasicBlock *tailBB =
1578 convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1579 bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1580
1581 mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1582 setPosition(loopBB, false);
1583 mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1584
1585 foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1586 if (!visit(node))
1587 return false;
1588 }
1589 Instruction *insn = bb->getExit();
1590 if (bb->cfg.incidentCount() != 0) {
1591 if (!insn || !insn->asFlow()) {
1592 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1593 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1594 } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
1595 tailBB->cfg.incidentCount() == 0) {
1596 // RA doesn't like having blocks around with no incident edge,
1597 // so we create a fake one to make it happy
1598 bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1599 }
1600 }
1601
1602 curLoopDepth -= 1;
1603
1604 return true;
1605 }
1606
1607 bool
1608 Converter::visit(nir_instr *insn)
1609 {
1610 switch (insn->type) {
1611 case nir_instr_type_alu:
1612 return visit(nir_instr_as_alu(insn));
1613 case nir_instr_type_intrinsic:
1614 return visit(nir_instr_as_intrinsic(insn));
1615 case nir_instr_type_jump:
1616 return visit(nir_instr_as_jump(insn));
1617 case nir_instr_type_load_const:
1618 return visit(nir_instr_as_load_const(insn));
1619 case nir_instr_type_ssa_undef:
1620 return visit(nir_instr_as_ssa_undef(insn));
1621 case nir_instr_type_tex:
1622 return visit(nir_instr_as_tex(insn));
1623 default:
1624 ERROR("unknown nir_instr type %u\n", insn->type);
1625 return false;
1626 }
1627 return true;
1628 }
1629
1630 SVSemantic
1631 Converter::convert(nir_intrinsic_op intr)
1632 {
1633 switch (intr) {
1634 case nir_intrinsic_load_base_vertex:
1635 return SV_BASEVERTEX;
1636 case nir_intrinsic_load_base_instance:
1637 return SV_BASEINSTANCE;
1638 case nir_intrinsic_load_draw_id:
1639 return SV_DRAWID;
1640 case nir_intrinsic_load_front_face:
1641 return SV_FACE;
1642 case nir_intrinsic_load_helper_invocation:
1643 return SV_THREAD_KILL;
1644 case nir_intrinsic_load_instance_id:
1645 return SV_INSTANCE_ID;
1646 case nir_intrinsic_load_invocation_id:
1647 return SV_INVOCATION_ID;
1648 case nir_intrinsic_load_local_group_size:
1649 return SV_NTID;
1650 case nir_intrinsic_load_local_invocation_id:
1651 return SV_TID;
1652 case nir_intrinsic_load_num_work_groups:
1653 return SV_NCTAID;
1654 case nir_intrinsic_load_patch_vertices_in:
1655 return SV_VERTEX_COUNT;
1656 case nir_intrinsic_load_primitive_id:
1657 return SV_PRIMITIVE_ID;
1658 case nir_intrinsic_load_sample_id:
1659 return SV_SAMPLE_INDEX;
1660 case nir_intrinsic_load_sample_mask_in:
1661 return SV_SAMPLE_MASK;
1662 case nir_intrinsic_load_sample_pos:
1663 return SV_SAMPLE_POS;
1664 case nir_intrinsic_load_subgroup_eq_mask:
1665 return SV_LANEMASK_EQ;
1666 case nir_intrinsic_load_subgroup_ge_mask:
1667 return SV_LANEMASK_GE;
1668 case nir_intrinsic_load_subgroup_gt_mask:
1669 return SV_LANEMASK_GT;
1670 case nir_intrinsic_load_subgroup_le_mask:
1671 return SV_LANEMASK_LE;
1672 case nir_intrinsic_load_subgroup_lt_mask:
1673 return SV_LANEMASK_LT;
1674 case nir_intrinsic_load_subgroup_invocation:
1675 return SV_LANEID;
1676 case nir_intrinsic_load_tess_coord:
1677 return SV_TESS_COORD;
1678 case nir_intrinsic_load_tess_level_inner:
1679 return SV_TESS_INNER;
1680 case nir_intrinsic_load_tess_level_outer:
1681 return SV_TESS_OUTER;
1682 case nir_intrinsic_load_vertex_id:
1683 return SV_VERTEX_ID;
1684 case nir_intrinsic_load_work_group_id:
1685 return SV_CTAID;
1686 default:
1687 ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1688 nir_intrinsic_infos[intr].name);
1689 assert(false);
1690 return SV_LAST;
1691 }
1692 }
1693
1694 bool
1695 Converter::visit(nir_intrinsic_instr *insn)
1696 {
1697 nir_intrinsic_op op = insn->intrinsic;
1698
1699 switch (op) {
1700 case nir_intrinsic_load_uniform: {
1701 LValues &newDefs = convert(&insn->dest);
1702 const DataType dType = getDType(insn);
1703 Value *indirect;
1704 uint32_t coffset = getIndirect(insn, 0, 0, indirect);
1705 for (uint8_t i = 0; i < insn->num_components; ++i) {
1706 loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
1707 }
1708 break;
1709 }
1710 case nir_intrinsic_store_output:
1711 case nir_intrinsic_store_per_vertex_output: {
1712 Value *indirect;
1713 DataType dType = getSType(insn->src[0], false, false);
1714 uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1715
1716 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1717 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1718 continue;
1719
1720 uint8_t offset = 0;
1721 Value *src = getSrc(&insn->src[0], i);
1722 switch (prog->getType()) {
1723 case Program::TYPE_FRAGMENT: {
1724 if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1725 // TGSI uses a different interface than NIR, TGSI stores that
1726 // value in the z component, NIR in X
1727 offset += 2;
1728 src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1729 }
1730 break;
1731 }
1732 case Program::TYPE_VERTEX: {
1733 if (info->io.genUserClip > 0 && idx == clipVertexOutput) {
1734 mkMov(clipVtx[i], src);
1735 src = clipVtx[i];
1736 }
1737 break;
1738 }
1739 default:
1740 break;
1741 }
1742
1743 storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1744 }
1745 break;
1746 }
1747 case nir_intrinsic_load_input:
1748 case nir_intrinsic_load_interpolated_input:
1749 case nir_intrinsic_load_output: {
1750 LValues &newDefs = convert(&insn->dest);
1751
1752 // FBFetch
1753 if (prog->getType() == Program::TYPE_FRAGMENT &&
1754 op == nir_intrinsic_load_output) {
1755 std::vector<Value*> defs, srcs;
1756 uint8_t mask = 0;
1757
1758 srcs.push_back(getSSA());
1759 srcs.push_back(getSSA());
1760 Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1761 Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1762 mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1763 mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1764
1765 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1766 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1767
1768 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1769 defs.push_back(newDefs[i]);
1770 mask |= 1 << i;
1771 }
1772
1773 TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
1774 texi->tex.levelZero = 1;
1775 texi->tex.mask = mask;
1776 texi->tex.useOffsets = 0;
1777 texi->tex.r = 0xffff;
1778 texi->tex.s = 0xffff;
1779
1780 info->prop.fp.readsFramebuffer = true;
1781 break;
1782 }
1783
1784 const DataType dType = getDType(insn);
1785 Value *indirect;
1786 bool input = op != nir_intrinsic_load_output;
1787 operation nvirOp;
1788 uint32_t mode = 0;
1789
1790 uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
1791 nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
1792
1793 // see load_barycentric_* handling
1794 if (prog->getType() == Program::TYPE_FRAGMENT) {
1795 mode = translateInterpMode(&vary, nvirOp);
1796 if (op == nir_intrinsic_load_interpolated_input) {
1797 ImmediateValue immMode;
1798 if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
1799 mode |= immMode.reg.data.u32;
1800 }
1801 }
1802
1803 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1804 uint32_t address = getSlotAddress(insn, idx, i);
1805 Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
1806 if (prog->getType() == Program::TYPE_FRAGMENT) {
1807 int s = 1;
1808 if (typeSizeof(dType) == 8) {
1809 Value *lo = getSSA();
1810 Value *hi = getSSA();
1811 Instruction *interp;
1812
1813 interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
1814 if (nvirOp == OP_PINTERP)
1815 interp->setSrc(s++, fp.position);
1816 if (mode & NV50_IR_INTERP_OFFSET)
1817 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1818 interp->setInterpolate(mode);
1819 interp->setIndirect(0, 0, indirect);
1820
1821 Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
1822 interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
1823 if (nvirOp == OP_PINTERP)
1824 interp->setSrc(s++, fp.position);
1825 if (mode & NV50_IR_INTERP_OFFSET)
1826 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1827 interp->setInterpolate(mode);
1828 interp->setIndirect(0, 0, indirect);
1829
1830 mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
1831 } else {
1832 Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
1833 if (nvirOp == OP_PINTERP)
1834 interp->setSrc(s++, fp.position);
1835 if (mode & NV50_IR_INTERP_OFFSET)
1836 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1837 interp->setInterpolate(mode);
1838 interp->setIndirect(0, 0, indirect);
1839 }
1840 } else {
1841 mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
1842 }
1843 }
1844 break;
1845 }
1846 case nir_intrinsic_load_barycentric_at_offset:
1847 case nir_intrinsic_load_barycentric_at_sample:
1848 case nir_intrinsic_load_barycentric_centroid:
1849 case nir_intrinsic_load_barycentric_pixel:
1850 case nir_intrinsic_load_barycentric_sample: {
1851 LValues &newDefs = convert(&insn->dest);
1852 uint32_t mode;
1853
1854 if (op == nir_intrinsic_load_barycentric_centroid ||
1855 op == nir_intrinsic_load_barycentric_sample) {
1856 mode = NV50_IR_INTERP_CENTROID;
1857 } else if (op == nir_intrinsic_load_barycentric_at_offset) {
1858 Value *offs[2];
1859 for (uint8_t c = 0; c < 2; c++) {
1860 offs[c] = getScratch();
1861 mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
1862 mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
1863 mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
1864 mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
1865 }
1866 mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
1867
1868 mode = NV50_IR_INTERP_OFFSET;
1869 } else if (op == nir_intrinsic_load_barycentric_pixel) {
1870 mode = NV50_IR_INTERP_DEFAULT;
1871 } else if (op == nir_intrinsic_load_barycentric_at_sample) {
1872 info->prop.fp.readsSampleLocations = true;
1873 mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
1874 mode = NV50_IR_INTERP_OFFSET;
1875 } else {
1876 unreachable("all intrinsics already handled above");
1877 }
1878
1879 loadImm(newDefs[1], mode);
1880 break;
1881 }
1882 case nir_intrinsic_discard:
1883 mkOp(OP_DISCARD, TYPE_NONE, NULL);
1884 break;
1885 case nir_intrinsic_discard_if: {
1886 Value *pred = getSSA(1, FILE_PREDICATE);
1887 if (insn->num_components > 1) {
1888 ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
1889 assert(false);
1890 return false;
1891 }
1892 mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1893 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
1894 break;
1895 }
1896 case nir_intrinsic_load_base_vertex:
1897 case nir_intrinsic_load_base_instance:
1898 case nir_intrinsic_load_draw_id:
1899 case nir_intrinsic_load_front_face:
1900 case nir_intrinsic_load_helper_invocation:
1901 case nir_intrinsic_load_instance_id:
1902 case nir_intrinsic_load_invocation_id:
1903 case nir_intrinsic_load_local_group_size:
1904 case nir_intrinsic_load_local_invocation_id:
1905 case nir_intrinsic_load_num_work_groups:
1906 case nir_intrinsic_load_patch_vertices_in:
1907 case nir_intrinsic_load_primitive_id:
1908 case nir_intrinsic_load_sample_id:
1909 case nir_intrinsic_load_sample_mask_in:
1910 case nir_intrinsic_load_sample_pos:
1911 case nir_intrinsic_load_subgroup_eq_mask:
1912 case nir_intrinsic_load_subgroup_ge_mask:
1913 case nir_intrinsic_load_subgroup_gt_mask:
1914 case nir_intrinsic_load_subgroup_le_mask:
1915 case nir_intrinsic_load_subgroup_lt_mask:
1916 case nir_intrinsic_load_subgroup_invocation:
1917 case nir_intrinsic_load_tess_coord:
1918 case nir_intrinsic_load_tess_level_inner:
1919 case nir_intrinsic_load_tess_level_outer:
1920 case nir_intrinsic_load_vertex_id:
1921 case nir_intrinsic_load_work_group_id: {
1922 const DataType dType = getDType(insn);
1923 SVSemantic sv = convert(op);
1924 LValues &newDefs = convert(&insn->dest);
1925
1926 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1927 Value *def;
1928 if (typeSizeof(dType) == 8)
1929 def = getSSA();
1930 else
1931 def = newDefs[i];
1932
1933 if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
1934 loadImm(def, 0u);
1935 } else {
1936 Symbol *sym = mkSysVal(sv, i);
1937 Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
1938 if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
1939 rdsv->perPatch = 1;
1940 }
1941
1942 if (typeSizeof(dType) == 8)
1943 mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
1944 }
1945 break;
1946 }
1947 // constants
1948 case nir_intrinsic_load_subgroup_size: {
1949 LValues &newDefs = convert(&insn->dest);
1950 loadImm(newDefs[0], 32u);
1951 break;
1952 }
1953 case nir_intrinsic_vote_all:
1954 case nir_intrinsic_vote_any:
1955 case nir_intrinsic_vote_ieq: {
1956 LValues &newDefs = convert(&insn->dest);
1957 Value *pred = getScratch(1, FILE_PREDICATE);
1958 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1959 mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
1960 mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
1961 break;
1962 }
1963 case nir_intrinsic_ballot: {
1964 LValues &newDefs = convert(&insn->dest);
1965 Value *pred = getSSA(1, FILE_PREDICATE);
1966 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1967 mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;
1968 break;
1969 }
1970 case nir_intrinsic_read_first_invocation:
1971 case nir_intrinsic_read_invocation: {
1972 LValues &newDefs = convert(&insn->dest);
1973 const DataType dType = getDType(insn);
1974 Value *tmp = getScratch();
1975
1976 if (op == nir_intrinsic_read_first_invocation) {
1977 mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
1978 mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
1979 mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
1980 } else
1981 tmp = getSrc(&insn->src[1], 0);
1982
1983 for (uint8_t i = 0; i < insn->num_components; ++i) {
1984 mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
1985 ->subOp = NV50_IR_SUBOP_SHFL_IDX;
1986 }
1987 break;
1988 }
1989 default:
1990 ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
1991 return false;
1992 }
1993
1994 return true;
1995 }
1996
1997 bool
1998 Converter::visit(nir_jump_instr *insn)
1999 {
2000 switch (insn->type) {
2001 case nir_jump_return:
2002 // TODO: this only works in the main function
2003 mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
2004 bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
2005 break;
2006 case nir_jump_break:
2007 case nir_jump_continue: {
2008 bool isBreak = insn->type == nir_jump_break;
2009 nir_block *block = insn->instr.block;
2010 assert(!block->successors[1]);
2011 BasicBlock *target = convert(block->successors[0]);
2012 mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
2013 bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
2014 break;
2015 }
2016 default:
2017 ERROR("unknown nir_jump_type %u\n", insn->type);
2018 return false;
2019 }
2020
2021 return true;
2022 }
2023
2024 bool
2025 Converter::visit(nir_load_const_instr *insn)
2026 {
2027 assert(insn->def.bit_size <= 64);
2028
2029 LValues &newDefs = convert(&insn->def);
2030 for (int i = 0; i < insn->def.num_components; i++) {
2031 switch (insn->def.bit_size) {
2032 case 64:
2033 loadImm(newDefs[i], insn->value.u64[i]);
2034 break;
2035 case 32:
2036 loadImm(newDefs[i], insn->value.u32[i]);
2037 break;
2038 case 16:
2039 loadImm(newDefs[i], insn->value.u16[i]);
2040 break;
2041 case 8:
2042 loadImm(newDefs[i], insn->value.u8[i]);
2043 break;
2044 }
2045 }
2046 return true;
2047 }
2048
2049 #define DEFAULT_CHECKS \
2050 if (insn->dest.dest.ssa.num_components > 1) { \
2051 ERROR("nir_alu_instr only supported with 1 component!\n"); \
2052 return false; \
2053 } \
2054 if (insn->dest.write_mask != 1) { \
2055 ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2056 return false; \
2057 }
2058 bool
2059 Converter::visit(nir_alu_instr *insn)
2060 {
2061 const nir_op op = insn->op;
2062 const nir_op_info &info = nir_op_infos[op];
2063 DataType dType = getDType(insn);
2064 const std::vector<DataType> sTypes = getSTypes(insn);
2065
2066 Instruction *oldPos = this->bb->getExit();
2067
2068 switch (op) {
2069 case nir_op_fabs:
2070 case nir_op_iabs:
2071 case nir_op_fadd:
2072 case nir_op_iadd:
2073 case nir_op_fand:
2074 case nir_op_iand:
2075 case nir_op_fceil:
2076 case nir_op_fcos:
2077 case nir_op_fddx:
2078 case nir_op_fddx_coarse:
2079 case nir_op_fddx_fine:
2080 case nir_op_fddy:
2081 case nir_op_fddy_coarse:
2082 case nir_op_fddy_fine:
2083 case nir_op_fdiv:
2084 case nir_op_idiv:
2085 case nir_op_udiv:
2086 case nir_op_fexp2:
2087 case nir_op_ffloor:
2088 case nir_op_ffma:
2089 case nir_op_flog2:
2090 case nir_op_fmax:
2091 case nir_op_imax:
2092 case nir_op_umax:
2093 case nir_op_fmin:
2094 case nir_op_imin:
2095 case nir_op_umin:
2096 case nir_op_fmod:
2097 case nir_op_imod:
2098 case nir_op_umod:
2099 case nir_op_fmul:
2100 case nir_op_imul:
2101 case nir_op_imul_high:
2102 case nir_op_umul_high:
2103 case nir_op_fneg:
2104 case nir_op_ineg:
2105 case nir_op_fnot:
2106 case nir_op_inot:
2107 case nir_op_for:
2108 case nir_op_ior:
2109 case nir_op_pack_64_2x32_split:
2110 case nir_op_fpow:
2111 case nir_op_frcp:
2112 case nir_op_frem:
2113 case nir_op_irem:
2114 case nir_op_frsq:
2115 case nir_op_fsat:
2116 case nir_op_ishr:
2117 case nir_op_ushr:
2118 case nir_op_fsin:
2119 case nir_op_fsqrt:
2120 case nir_op_fsub:
2121 case nir_op_isub:
2122 case nir_op_ftrunc:
2123 case nir_op_ishl:
2124 case nir_op_fxor:
2125 case nir_op_ixor: {
2126 DEFAULT_CHECKS;
2127 LValues &newDefs = convert(&insn->dest);
2128 operation preOp = preOperationNeeded(op);
2129 if (preOp != OP_NOP) {
2130 assert(info.num_inputs < 2);
2131 Value *tmp = getSSA(typeSizeof(dType));
2132 Instruction *i0 = mkOp(preOp, dType, tmp);
2133 Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2134 if (info.num_inputs) {
2135 i0->setSrc(0, getSrc(&insn->src[0]));
2136 i1->setSrc(0, tmp);
2137 }
2138 i1->subOp = getSubOp(op);
2139 } else {
2140 Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2141 for (unsigned s = 0u; s < info.num_inputs; ++s) {
2142 i->setSrc(s, getSrc(&insn->src[s]));
2143 }
2144 i->subOp = getSubOp(op);
2145 }
2146 break;
2147 }
2148 case nir_op_ifind_msb:
2149 case nir_op_ufind_msb: {
2150 DEFAULT_CHECKS;
2151 LValues &newDefs = convert(&insn->dest);
2152 dType = sTypes[0];
2153 mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2154 break;
2155 }
2156 case nir_op_fround_even: {
2157 DEFAULT_CHECKS;
2158 LValues &newDefs = convert(&insn->dest);
2159 mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2160 break;
2161 }
2162 // convert instructions
2163 case nir_op_f2f32:
2164 case nir_op_f2i32:
2165 case nir_op_f2u32:
2166 case nir_op_i2f32:
2167 case nir_op_i2i32:
2168 case nir_op_u2f32:
2169 case nir_op_u2u32:
2170 case nir_op_f2f64:
2171 case nir_op_f2i64:
2172 case nir_op_f2u64:
2173 case nir_op_i2f64:
2174 case nir_op_i2i64:
2175 case nir_op_u2f64:
2176 case nir_op_u2u64: {
2177 DEFAULT_CHECKS;
2178 LValues &newDefs = convert(&insn->dest);
2179 Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2180 if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
2181 i->rnd = ROUND_Z;
2182 i->sType = sTypes[0];
2183 break;
2184 }
2185 // compare instructions
2186 case nir_op_feq32:
2187 case nir_op_ieq32:
2188 case nir_op_fge32:
2189 case nir_op_ige32:
2190 case nir_op_uge32:
2191 case nir_op_flt32:
2192 case nir_op_ilt32:
2193 case nir_op_ult32:
2194 case nir_op_fne32:
2195 case nir_op_ine32: {
2196 DEFAULT_CHECKS;
2197 LValues &newDefs = convert(&insn->dest);
2198 Instruction *i = mkCmp(getOperation(op),
2199 getCondCode(op),
2200 dType,
2201 newDefs[0],
2202 dType,
2203 getSrc(&insn->src[0]),
2204 getSrc(&insn->src[1]));
2205 if (info.num_inputs == 3)
2206 i->setSrc(2, getSrc(&insn->src[2]));
2207 i->sType = sTypes[0];
2208 break;
2209 }
2210 // those are weird ALU ops and need special handling, because
2211 // 1. they are always componend based
2212 // 2. they basically just merge multiple values into one data type
2213 case nir_op_imov:
2214 case nir_op_fmov:
2215 if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) {
2216 nir_reg_dest& reg = insn->dest.dest.reg;
2217 uint32_t goffset = regToLmemOffset[reg.reg->index];
2218 uint8_t comps = reg.reg->num_components;
2219 uint8_t size = reg.reg->bit_size / 8;
2220 uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2221 uint32_t aoffset = csize * reg.base_offset;
2222 Value *indirect = NULL;
2223
2224 if (reg.indirect)
2225 indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS),
2226 getSrc(reg.indirect, 0), mkImm(csize));
2227
2228 for (uint8_t i = 0u; i < comps; ++i) {
2229 if (!((1u << i) & insn->dest.write_mask))
2230 continue;
2231
2232 Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + aoffset + i * size);
2233 mkStore(OP_STORE, dType, sym, indirect, getSrc(&insn->src[0], i));
2234 }
2235 break;
2236 } else if (!insn->src[0].src.is_ssa && insn->src[0].src.reg.reg->num_array_elems) {
2237 LValues &newDefs = convert(&insn->dest);
2238 nir_reg_src& reg = insn->src[0].src.reg;
2239 uint32_t goffset = regToLmemOffset[reg.reg->index];
2240 // uint8_t comps = reg.reg->num_components;
2241 uint8_t size = reg.reg->bit_size / 8;
2242 uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2243 uint32_t aoffset = csize * reg.base_offset;
2244 Value *indirect = NULL;
2245
2246 if (reg.indirect)
2247 indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), getSrc(reg.indirect, 0), mkImm(csize));
2248
2249 for (uint8_t i = 0u; i < newDefs.size(); ++i)
2250 loadFrom(FILE_MEMORY_LOCAL, 0, dType, newDefs[i], goffset + aoffset, i, indirect);
2251
2252 break;
2253 } else {
2254 LValues &newDefs = convert(&insn->dest);
2255 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2256 mkMov(newDefs[c], getSrc(&insn->src[0], c), dType);
2257 }
2258 }
2259 break;
2260 case nir_op_vec2:
2261 case nir_op_vec3:
2262 case nir_op_vec4: {
2263 LValues &newDefs = convert(&insn->dest);
2264 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2265 mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2266 }
2267 break;
2268 }
2269 // (un)pack
2270 case nir_op_pack_64_2x32: {
2271 LValues &newDefs = convert(&insn->dest);
2272 Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2273 merge->setSrc(0, getSrc(&insn->src[0], 0));
2274 merge->setSrc(1, getSrc(&insn->src[0], 1));
2275 break;
2276 }
2277 case nir_op_pack_half_2x16_split: {
2278 LValues &newDefs = convert(&insn->dest);
2279 Value *tmpH = getSSA();
2280 Value *tmpL = getSSA();
2281
2282 mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2283 mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2284 mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2285 break;
2286 }
2287 case nir_op_unpack_half_2x16_split_x:
2288 case nir_op_unpack_half_2x16_split_y: {
2289 LValues &newDefs = convert(&insn->dest);
2290 Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2291 if (op == nir_op_unpack_half_2x16_split_y)
2292 cvt->subOp = 1;
2293 break;
2294 }
2295 case nir_op_unpack_64_2x32: {
2296 LValues &newDefs = convert(&insn->dest);
2297 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2298 break;
2299 }
2300 case nir_op_unpack_64_2x32_split_x: {
2301 LValues &newDefs = convert(&insn->dest);
2302 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
2303 break;
2304 }
2305 case nir_op_unpack_64_2x32_split_y: {
2306 LValues &newDefs = convert(&insn->dest);
2307 mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
2308 break;
2309 }
2310 // special instructions
2311 case nir_op_fsign:
2312 case nir_op_isign: {
2313 DEFAULT_CHECKS;
2314 DataType iType;
2315 if (::isFloatType(dType))
2316 iType = TYPE_F32;
2317 else
2318 iType = TYPE_S32;
2319
2320 LValues &newDefs = convert(&insn->dest);
2321 LValue *val0 = getScratch();
2322 LValue *val1 = getScratch();
2323 mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
2324 mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
2325
2326 if (dType == TYPE_F64) {
2327 mkOp2(OP_SUB, iType, val0, val0, val1);
2328 mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
2329 } else if (dType == TYPE_S64 || dType == TYPE_U64) {
2330 mkOp2(OP_SUB, iType, val0, val1, val0);
2331 mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
2332 mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
2333 } else if (::isFloatType(dType))
2334 mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
2335 else
2336 mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
2337 break;
2338 }
2339 case nir_op_fcsel:
2340 case nir_op_b32csel: {
2341 DEFAULT_CHECKS;
2342 LValues &newDefs = convert(&insn->dest);
2343 mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
2344 break;
2345 }
2346 case nir_op_ibitfield_extract:
2347 case nir_op_ubitfield_extract: {
2348 DEFAULT_CHECKS;
2349 Value *tmp = getSSA();
2350 LValues &newDefs = convert(&insn->dest);
2351 mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2352 mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
2353 break;
2354 }
2355 case nir_op_bfm: {
2356 DEFAULT_CHECKS;
2357 LValues &newDefs = convert(&insn->dest);
2358 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2359 break;
2360 }
2361 case nir_op_bitfield_insert: {
2362 DEFAULT_CHECKS;
2363 LValues &newDefs = convert(&insn->dest);
2364 LValue *temp = getSSA();
2365 mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
2366 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
2367 break;
2368 }
2369 case nir_op_bit_count: {
2370 DEFAULT_CHECKS;
2371 LValues &newDefs = convert(&insn->dest);
2372 mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
2373 break;
2374 }
2375 case nir_op_bitfield_reverse: {
2376 DEFAULT_CHECKS;
2377 LValues &newDefs = convert(&insn->dest);
2378 mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2379 break;
2380 }
2381 case nir_op_find_lsb: {
2382 DEFAULT_CHECKS;
2383 LValues &newDefs = convert(&insn->dest);
2384 Value *tmp = getSSA();
2385 mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2386 mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2387 break;
2388 }
2389 // boolean conversions
2390 case nir_op_b2f32: {
2391 DEFAULT_CHECKS;
2392 LValues &newDefs = convert(&insn->dest);
2393 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
2394 break;
2395 }
2396 case nir_op_b2f64: {
2397 DEFAULT_CHECKS;
2398 LValues &newDefs = convert(&insn->dest);
2399 Value *tmp = getSSA(4);
2400 mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
2401 mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
2402 break;
2403 }
2404 case nir_op_f2b32:
2405 case nir_op_i2b32: {
2406 DEFAULT_CHECKS;
2407 LValues &newDefs = convert(&insn->dest);
2408 Value *src1;
2409 if (typeSizeof(sTypes[0]) == 8) {
2410 src1 = loadImm(getSSA(8), 0.0);
2411 } else {
2412 src1 = zero;
2413 }
2414 CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
2415 mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
2416 break;
2417 }
2418 case nir_op_b2i32: {
2419 DEFAULT_CHECKS;
2420 LValues &newDefs = convert(&insn->dest);
2421 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
2422 break;
2423 }
2424 case nir_op_b2i64: {
2425 DEFAULT_CHECKS;
2426 LValues &newDefs = convert(&insn->dest);
2427 LValue *def = getScratch();
2428 mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
2429 mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
2430 break;
2431 }
2432 default:
2433 ERROR("unknown nir_op %s\n", info.name);
2434 return false;
2435 }
2436
2437 if (!oldPos) {
2438 oldPos = this->bb->getEntry();
2439 oldPos->precise = insn->exact;
2440 }
2441
2442 if (unlikely(!oldPos))
2443 return true;
2444
2445 while (oldPos->next) {
2446 oldPos = oldPos->next;
2447 oldPos->precise = insn->exact;
2448 }
2449 oldPos->saturate = insn->dest.saturate;
2450
2451 return true;
2452 }
2453 #undef DEFAULT_CHECKS
2454
2455 bool
2456 Converter::visit(nir_ssa_undef_instr *insn)
2457 {
2458 LValues &newDefs = convert(&insn->def);
2459 for (uint8_t i = 0u; i < insn->def.num_components; ++i) {
2460 mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
2461 }
2462 return true;
2463 }
2464
2465 #define CASE_SAMPLER(ty) \
2466 case GLSL_SAMPLER_DIM_ ## ty : \
2467 if (isArray && !isShadow) \
2468 return TEX_TARGET_ ## ty ## _ARRAY; \
2469 else if (!isArray && isShadow) \
2470 return TEX_TARGET_## ty ## _SHADOW; \
2471 else if (isArray && isShadow) \
2472 return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
2473 else \
2474 return TEX_TARGET_ ## ty
2475
2476 TexTarget
2477 Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
2478 {
2479 switch (dim) {
2480 CASE_SAMPLER(1D);
2481 CASE_SAMPLER(2D);
2482 CASE_SAMPLER(CUBE);
2483 case GLSL_SAMPLER_DIM_3D:
2484 return TEX_TARGET_3D;
2485 case GLSL_SAMPLER_DIM_MS:
2486 if (isArray)
2487 return TEX_TARGET_2D_MS_ARRAY;
2488 return TEX_TARGET_2D_MS;
2489 case GLSL_SAMPLER_DIM_RECT:
2490 if (isShadow)
2491 return TEX_TARGET_RECT_SHADOW;
2492 return TEX_TARGET_RECT;
2493 case GLSL_SAMPLER_DIM_BUF:
2494 return TEX_TARGET_BUFFER;
2495 case GLSL_SAMPLER_DIM_EXTERNAL:
2496 return TEX_TARGET_2D;
2497 default:
2498 ERROR("unknown glsl_sampler_dim %u\n", dim);
2499 assert(false);
2500 return TEX_TARGET_COUNT;
2501 }
2502 }
2503 #undef CASE_SAMPLER
2504
2505 Value*
2506 Converter::applyProjection(Value *src, Value *proj)
2507 {
2508 if (!proj)
2509 return src;
2510 return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj);
2511 }
2512
2513 bool
2514 Converter::visit(nir_tex_instr *insn)
2515 {
2516 switch (insn->op) {
2517 case nir_texop_lod:
2518 case nir_texop_query_levels:
2519 case nir_texop_tex:
2520 case nir_texop_texture_samples:
2521 case nir_texop_tg4:
2522 case nir_texop_txb:
2523 case nir_texop_txd:
2524 case nir_texop_txf:
2525 case nir_texop_txf_ms:
2526 case nir_texop_txl:
2527 case nir_texop_txs: {
2528 LValues &newDefs = convert(&insn->dest);
2529 std::vector<Value*> srcs;
2530 std::vector<Value*> defs;
2531 std::vector<nir_src*> offsets;
2532 uint8_t mask = 0;
2533 bool lz = false;
2534 Value *proj = NULL;
2535 TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow);
2536 operation op = getOperation(insn->op);
2537
2538 int r, s;
2539 int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias);
2540 int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator);
2541 int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord);
2542 int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx);
2543 int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy);
2544 int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index);
2545 int lodIdx = nir_tex_instr_src_index(insn, nir_tex_src_lod);
2546 int offsetIdx = nir_tex_instr_src_index(insn, nir_tex_src_offset);
2547 int projIdx = nir_tex_instr_src_index(insn, nir_tex_src_projector);
2548 int sampOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_offset);
2549 int texOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_offset);
2550
2551 if (projIdx != -1)
2552 proj = mkOp1v(OP_RCP, TYPE_F32, getScratch(), getSrc(&insn->src[projIdx].src, 0));
2553
2554 srcs.resize(insn->coord_components);
2555 for (uint8_t i = 0u; i < insn->coord_components; ++i)
2556 srcs[i] = applyProjection(getSrc(&insn->src[coordsIdx].src, i), proj);
2557
2558 // sometimes we get less args than target.getArgCount, but codegen expects the latter
2559 if (insn->coord_components) {
2560 uint32_t argCount = target.getArgCount();
2561
2562 if (target.isMS())
2563 argCount -= 1;
2564
2565 for (uint32_t i = 0u; i < (argCount - insn->coord_components); ++i)
2566 srcs.push_back(getSSA());
2567 }
2568
2569 if (insn->op == nir_texop_texture_samples)
2570 srcs.push_back(zero);
2571 else if (!insn->num_srcs)
2572 srcs.push_back(loadImm(NULL, 0));
2573 if (biasIdx != -1)
2574 srcs.push_back(getSrc(&insn->src[biasIdx].src, 0));
2575 if (lodIdx != -1)
2576 srcs.push_back(getSrc(&insn->src[lodIdx].src, 0));
2577 else if (op == OP_TXF)
2578 lz = true;
2579 if (msIdx != -1)
2580 srcs.push_back(getSrc(&insn->src[msIdx].src, 0));
2581 if (offsetIdx != -1)
2582 offsets.push_back(&insn->src[offsetIdx].src);
2583 if (compIdx != -1)
2584 srcs.push_back(applyProjection(getSrc(&insn->src[compIdx].src, 0), proj));
2585 if (texOffIdx != -1) {
2586 srcs.push_back(getSrc(&insn->src[texOffIdx].src, 0));
2587 texOffIdx = srcs.size() - 1;
2588 }
2589 if (sampOffIdx != -1) {
2590 srcs.push_back(getSrc(&insn->src[sampOffIdx].src, 0));
2591 sampOffIdx = srcs.size() - 1;
2592 }
2593
2594 r = insn->texture_index;
2595 s = insn->sampler_index;
2596
2597 defs.resize(newDefs.size());
2598 for (uint8_t d = 0u; d < newDefs.size(); ++d) {
2599 defs[d] = newDefs[d];
2600 mask |= 1 << d;
2601 }
2602 if (target.isMS() || (op == OP_TEX && prog->getType() != Program::TYPE_FRAGMENT))
2603 lz = true;
2604
2605 TexInstruction *texi = mkTex(op, target.getEnum(), r, s, defs, srcs);
2606 texi->tex.levelZero = lz;
2607 texi->tex.mask = mask;
2608
2609 if (texOffIdx != -1)
2610 texi->tex.rIndirectSrc = texOffIdx;
2611 if (sampOffIdx != -1)
2612 texi->tex.sIndirectSrc = sampOffIdx;
2613
2614 switch (insn->op) {
2615 case nir_texop_tg4:
2616 if (!target.isShadow())
2617 texi->tex.gatherComp = insn->component;
2618 break;
2619 case nir_texop_txs:
2620 texi->tex.query = TXQ_DIMS;
2621 break;
2622 case nir_texop_texture_samples:
2623 texi->tex.mask = 0x4;
2624 texi->tex.query = TXQ_TYPE;
2625 break;
2626 case nir_texop_query_levels:
2627 texi->tex.mask = 0x8;
2628 texi->tex.query = TXQ_DIMS;
2629 break;
2630 default:
2631 break;
2632 }
2633
2634 texi->tex.useOffsets = offsets.size();
2635 if (texi->tex.useOffsets) {
2636 for (uint8_t s = 0; s < texi->tex.useOffsets; ++s) {
2637 for (uint32_t c = 0u; c < 3; ++c) {
2638 uint8_t s2 = std::min(c, target.getDim() - 1);
2639 texi->offset[s][c].set(getSrc(offsets[s], s2));
2640 texi->offset[s][c].setInsn(texi);
2641 }
2642 }
2643 }
2644
2645 if (ddxIdx != -1 && ddyIdx != -1) {
2646 for (uint8_t c = 0u; c < target.getDim() + target.isCube(); ++c) {
2647 texi->dPdx[c].set(getSrc(&insn->src[ddxIdx].src, c));
2648 texi->dPdy[c].set(getSrc(&insn->src[ddyIdx].src, c));
2649 }
2650 }
2651
2652 break;
2653 }
2654 default:
2655 ERROR("unknown nir_texop %u\n", insn->op);
2656 return false;
2657 }
2658 return true;
2659 }
2660
2661 bool
2662 Converter::run()
2663 {
2664 bool progress;
2665
2666 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
2667 nir_print_shader(nir, stderr);
2668
2669 struct nir_lower_subgroups_options subgroup_options = {
2670 .subgroup_size = 32,
2671 .ballot_bit_size = 32,
2672 };
2673
2674 NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
2675 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
2676 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
2677 NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
2678 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
2679 NIR_PASS_V(nir, nir_lower_alu_to_scalar);
2680 NIR_PASS_V(nir, nir_lower_phis_to_scalar);
2681
2682 do {
2683 progress = false;
2684 NIR_PASS(progress, nir, nir_copy_prop);
2685 NIR_PASS(progress, nir, nir_opt_remove_phis);
2686 NIR_PASS(progress, nir, nir_opt_trivial_continues);
2687 NIR_PASS(progress, nir, nir_opt_cse);
2688 NIR_PASS(progress, nir, nir_opt_algebraic);
2689 NIR_PASS(progress, nir, nir_opt_constant_folding);
2690 NIR_PASS(progress, nir, nir_copy_prop);
2691 NIR_PASS(progress, nir, nir_opt_dce);
2692 NIR_PASS(progress, nir, nir_opt_dead_cf);
2693 } while (progress);
2694
2695 NIR_PASS_V(nir, nir_lower_bool_to_int32);
2696 NIR_PASS_V(nir, nir_lower_locals_to_regs);
2697 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
2698 NIR_PASS_V(nir, nir_convert_from_ssa, true);
2699
2700 // Garbage collect dead instructions
2701 nir_sweep(nir);
2702
2703 if (!parseNIR()) {
2704 ERROR("Couldn't prase NIR!\n");
2705 return false;
2706 }
2707
2708 if (!assignSlots()) {
2709 ERROR("Couldn't assign slots!\n");
2710 return false;
2711 }
2712
2713 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
2714 nir_print_shader(nir, stderr);
2715
2716 nir_foreach_function(function, nir) {
2717 if (!visit(function))
2718 return false;
2719 }
2720
2721 return true;
2722 }
2723
2724 } // unnamed namespace
2725
2726 namespace nv50_ir {
2727
2728 bool
2729 Program::makeFromNIR(struct nv50_ir_prog_info *info)
2730 {
2731 nir_shader *nir = (nir_shader*)info->bin.source;
2732 Converter converter(this, nir, info);
2733 bool result = converter.run();
2734 if (!result)
2735 return result;
2736 LoweringHelper lowering;
2737 lowering.run(this);
2738 tlsSize = info->bin.tlsSpace;
2739 return result;
2740 }
2741
2742 } // namespace nv50_ir