7a10a408b701e3022f969efdc9e1cfa5a88a35af
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_from_nir.cpp
1 /*
2 * Copyright 2017 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Karol Herbst <kherbst@redhat.com>
23 */
24
25 #include "compiler/nir/nir.h"
26
27 #include "util/u_debug.h"
28
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_from_common.h"
31 #include "codegen/nv50_ir_lowering_helper.h"
32 #include "codegen/nv50_ir_util.h"
33
34 #if __cplusplus >= 201103L
35 #include <unordered_map>
36 #else
37 #include <tr1/unordered_map>
38 #endif
39 #include <vector>
40
41 namespace {
42
43 #if __cplusplus >= 201103L
44 using std::hash;
45 using std::unordered_map;
46 #else
47 using std::tr1::hash;
48 using std::tr1::unordered_map;
49 #endif
50
51 using namespace nv50_ir;
52
53 int
54 type_size(const struct glsl_type *type)
55 {
56 return glsl_count_attribute_slots(type, false);
57 }
58
59 class Converter : public ConverterCommon
60 {
61 public:
62 Converter(Program *, nir_shader *, nv50_ir_prog_info *);
63
64 bool run();
65 private:
66 typedef std::vector<LValue*> LValues;
67 typedef unordered_map<unsigned, LValues> NirDefMap;
68 typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
69
70 TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
71 LValues& convert(nir_alu_dest *);
72 BasicBlock* convert(nir_block *);
73 LValues& convert(nir_dest *);
74 SVSemantic convert(nir_intrinsic_op);
75 LValues& convert(nir_register *);
76 LValues& convert(nir_ssa_def *);
77
78 Value* getSrc(nir_alu_src *, uint8_t component = 0);
79 Value* getSrc(nir_register *, uint8_t);
80 Value* getSrc(nir_src *, uint8_t, bool indirect = false);
81 Value* getSrc(nir_ssa_def *, uint8_t);
82
83 // returned value is the constant part of the given source (either the
84 // nir_src or the selected source component of an intrinsic). Even though
85 // this is mostly an optimization to be able to skip indirects in a few
86 // cases, sometimes we require immediate values or set some fileds on
87 // instructions (e.g. tex) in order for codegen to consume those.
88 // If the found value has not a constant part, the Value gets returned
89 // through the Value parameter.
90 uint32_t getIndirect(nir_src *, uint8_t, Value *&);
91 uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&);
92
93 uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
94
95 void setInterpolate(nv50_ir_varying *,
96 uint8_t,
97 bool centroid,
98 unsigned semantics);
99
100 Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
101 uint8_t c, Value *indirect0 = NULL,
102 Value *indirect1 = NULL, bool patch = false);
103 void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
104 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
105 Value *indirect1 = NULL);
106
107 bool isFloatType(nir_alu_type);
108 bool isSignedType(nir_alu_type);
109 bool isResultFloat(nir_op);
110 bool isResultSigned(nir_op);
111
112 DataType getDType(nir_alu_instr *);
113 DataType getDType(nir_intrinsic_instr *);
114 DataType getDType(nir_op, uint8_t);
115
116 std::vector<DataType> getSTypes(nir_alu_instr *);
117 DataType getSType(nir_src &, bool isFloat, bool isSigned);
118
119 operation getOperation(nir_intrinsic_op);
120 operation getOperation(nir_op);
121 operation getOperation(nir_texop);
122 operation preOperationNeeded(nir_op);
123
124 int getSubOp(nir_intrinsic_op);
125 int getSubOp(nir_op);
126
127 CondCode getCondCode(nir_op);
128
129 bool assignSlots();
130 bool parseNIR();
131
132 bool visit(nir_alu_instr *);
133 bool visit(nir_block *);
134 bool visit(nir_cf_node *);
135 bool visit(nir_function *);
136 bool visit(nir_if *);
137 bool visit(nir_instr *);
138 bool visit(nir_intrinsic_instr *);
139 bool visit(nir_jump_instr *);
140 bool visit(nir_load_const_instr*);
141 bool visit(nir_loop *);
142 bool visit(nir_ssa_undef_instr *);
143 bool visit(nir_tex_instr *);
144
145 // tex stuff
146 Value* applyProjection(Value *src, Value *proj);
147
148 nir_shader *nir;
149
150 NirDefMap ssaDefs;
151 NirDefMap regDefs;
152 NirBlockMap blocks;
153 unsigned int curLoopDepth;
154
155 BasicBlock *exit;
156 Value *zero;
157
158 int clipVertexOutput;
159
160 union {
161 struct {
162 Value *position;
163 } fp;
164 };
165 };
166
167 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
168 : ConverterCommon(prog, info),
169 nir(nir),
170 curLoopDepth(0),
171 clipVertexOutput(-1)
172 {
173 zero = mkImm((uint32_t)0);
174 }
175
176 BasicBlock *
177 Converter::convert(nir_block *block)
178 {
179 NirBlockMap::iterator it = blocks.find(block->index);
180 if (it != blocks.end())
181 return it->second;
182
183 BasicBlock *bb = new BasicBlock(func);
184 blocks[block->index] = bb;
185 return bb;
186 }
187
188 bool
189 Converter::isFloatType(nir_alu_type type)
190 {
191 return nir_alu_type_get_base_type(type) == nir_type_float;
192 }
193
194 bool
195 Converter::isSignedType(nir_alu_type type)
196 {
197 return nir_alu_type_get_base_type(type) == nir_type_int;
198 }
199
200 bool
201 Converter::isResultFloat(nir_op op)
202 {
203 const nir_op_info &info = nir_op_infos[op];
204 if (info.output_type != nir_type_invalid)
205 return isFloatType(info.output_type);
206
207 ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
208 assert(false);
209 return true;
210 }
211
212 bool
213 Converter::isResultSigned(nir_op op)
214 {
215 switch (op) {
216 // there is no umul and we get wrong results if we treat all muls as signed
217 case nir_op_imul:
218 case nir_op_inot:
219 return false;
220 default:
221 const nir_op_info &info = nir_op_infos[op];
222 if (info.output_type != nir_type_invalid)
223 return isSignedType(info.output_type);
224 ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
225 assert(false);
226 return true;
227 }
228 }
229
230 DataType
231 Converter::getDType(nir_alu_instr *insn)
232 {
233 if (insn->dest.dest.is_ssa)
234 return getDType(insn->op, insn->dest.dest.ssa.bit_size);
235 else
236 return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
237 }
238
239 DataType
240 Converter::getDType(nir_intrinsic_instr *insn)
241 {
242 if (insn->dest.is_ssa)
243 return typeOfSize(insn->dest.ssa.bit_size / 8, false, false);
244 else
245 return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false);
246 }
247
248 DataType
249 Converter::getDType(nir_op op, uint8_t bitSize)
250 {
251 DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
252 if (ty == TYPE_NONE) {
253 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
254 assert(false);
255 }
256 return ty;
257 }
258
259 std::vector<DataType>
260 Converter::getSTypes(nir_alu_instr *insn)
261 {
262 const nir_op_info &info = nir_op_infos[insn->op];
263 std::vector<DataType> res(info.num_inputs);
264
265 for (uint8_t i = 0; i < info.num_inputs; ++i) {
266 if (info.input_types[i] != nir_type_invalid) {
267 res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
268 } else {
269 ERROR("getSType not implemented for %s idx %u\n", info.name, i);
270 assert(false);
271 res[i] = TYPE_NONE;
272 break;
273 }
274 }
275
276 return res;
277 }
278
279 DataType
280 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
281 {
282 uint8_t bitSize;
283 if (src.is_ssa)
284 bitSize = src.ssa->bit_size;
285 else
286 bitSize = src.reg.reg->bit_size;
287
288 DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
289 if (ty == TYPE_NONE) {
290 const char *str;
291 if (isFloat)
292 str = "float";
293 else if (isSigned)
294 str = "int";
295 else
296 str = "uint";
297 ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
298 assert(false);
299 }
300 return ty;
301 }
302
303 operation
304 Converter::getOperation(nir_op op)
305 {
306 switch (op) {
307 // basic ops with float and int variants
308 case nir_op_fabs:
309 case nir_op_iabs:
310 return OP_ABS;
311 case nir_op_fadd:
312 case nir_op_iadd:
313 return OP_ADD;
314 case nir_op_fand:
315 case nir_op_iand:
316 return OP_AND;
317 case nir_op_ifind_msb:
318 case nir_op_ufind_msb:
319 return OP_BFIND;
320 case nir_op_fceil:
321 return OP_CEIL;
322 case nir_op_fcos:
323 return OP_COS;
324 case nir_op_f2f32:
325 case nir_op_f2f64:
326 case nir_op_f2i32:
327 case nir_op_f2i64:
328 case nir_op_f2u32:
329 case nir_op_f2u64:
330 case nir_op_i2f32:
331 case nir_op_i2f64:
332 case nir_op_i2i32:
333 case nir_op_i2i64:
334 case nir_op_u2f32:
335 case nir_op_u2f64:
336 case nir_op_u2u32:
337 case nir_op_u2u64:
338 return OP_CVT;
339 case nir_op_fddx:
340 case nir_op_fddx_coarse:
341 case nir_op_fddx_fine:
342 return OP_DFDX;
343 case nir_op_fddy:
344 case nir_op_fddy_coarse:
345 case nir_op_fddy_fine:
346 return OP_DFDY;
347 case nir_op_fdiv:
348 case nir_op_idiv:
349 case nir_op_udiv:
350 return OP_DIV;
351 case nir_op_fexp2:
352 return OP_EX2;
353 case nir_op_ffloor:
354 return OP_FLOOR;
355 case nir_op_ffma:
356 return OP_FMA;
357 case nir_op_flog2:
358 return OP_LG2;
359 case nir_op_fmax:
360 case nir_op_imax:
361 case nir_op_umax:
362 return OP_MAX;
363 case nir_op_pack_64_2x32_split:
364 return OP_MERGE;
365 case nir_op_fmin:
366 case nir_op_imin:
367 case nir_op_umin:
368 return OP_MIN;
369 case nir_op_fmod:
370 case nir_op_imod:
371 case nir_op_umod:
372 case nir_op_frem:
373 case nir_op_irem:
374 return OP_MOD;
375 case nir_op_fmul:
376 case nir_op_imul:
377 case nir_op_imul_high:
378 case nir_op_umul_high:
379 return OP_MUL;
380 case nir_op_fneg:
381 case nir_op_ineg:
382 return OP_NEG;
383 case nir_op_fnot:
384 case nir_op_inot:
385 return OP_NOT;
386 case nir_op_for:
387 case nir_op_ior:
388 return OP_OR;
389 case nir_op_fpow:
390 return OP_POW;
391 case nir_op_frcp:
392 return OP_RCP;
393 case nir_op_frsq:
394 return OP_RSQ;
395 case nir_op_fsat:
396 return OP_SAT;
397 case nir_op_feq32:
398 case nir_op_ieq32:
399 case nir_op_fge32:
400 case nir_op_ige32:
401 case nir_op_uge32:
402 case nir_op_flt32:
403 case nir_op_ilt32:
404 case nir_op_ult32:
405 case nir_op_fne32:
406 case nir_op_ine32:
407 return OP_SET;
408 case nir_op_ishl:
409 return OP_SHL;
410 case nir_op_ishr:
411 case nir_op_ushr:
412 return OP_SHR;
413 case nir_op_fsin:
414 return OP_SIN;
415 case nir_op_fsqrt:
416 return OP_SQRT;
417 case nir_op_fsub:
418 case nir_op_isub:
419 return OP_SUB;
420 case nir_op_ftrunc:
421 return OP_TRUNC;
422 case nir_op_fxor:
423 case nir_op_ixor:
424 return OP_XOR;
425 default:
426 ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
427 assert(false);
428 return OP_NOP;
429 }
430 }
431
432 operation
433 Converter::getOperation(nir_texop op)
434 {
435 switch (op) {
436 case nir_texop_tex:
437 return OP_TEX;
438 case nir_texop_lod:
439 return OP_TXLQ;
440 case nir_texop_txb:
441 return OP_TXB;
442 case nir_texop_txd:
443 return OP_TXD;
444 case nir_texop_txf:
445 case nir_texop_txf_ms:
446 return OP_TXF;
447 case nir_texop_tg4:
448 return OP_TXG;
449 case nir_texop_txl:
450 return OP_TXL;
451 case nir_texop_query_levels:
452 case nir_texop_texture_samples:
453 case nir_texop_txs:
454 return OP_TXQ;
455 default:
456 ERROR("couldn't get operation for nir_texop %u\n", op);
457 assert(false);
458 return OP_NOP;
459 }
460 }
461
462 operation
463 Converter::getOperation(nir_intrinsic_op op)
464 {
465 switch (op) {
466 default:
467 ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
468 assert(false);
469 return OP_NOP;
470 }
471 }
472
473 operation
474 Converter::preOperationNeeded(nir_op op)
475 {
476 switch (op) {
477 case nir_op_fcos:
478 case nir_op_fsin:
479 return OP_PRESIN;
480 default:
481 return OP_NOP;
482 }
483 }
484
485 int
486 Converter::getSubOp(nir_op op)
487 {
488 switch (op) {
489 case nir_op_imul_high:
490 case nir_op_umul_high:
491 return NV50_IR_SUBOP_MUL_HIGH;
492 default:
493 return 0;
494 }
495 }
496
497 int
498 Converter::getSubOp(nir_intrinsic_op op)
499 {
500 switch (op) {
501 case nir_intrinsic_vote_all:
502 return NV50_IR_SUBOP_VOTE_ALL;
503 case nir_intrinsic_vote_any:
504 return NV50_IR_SUBOP_VOTE_ANY;
505 case nir_intrinsic_vote_ieq:
506 return NV50_IR_SUBOP_VOTE_UNI;
507 default:
508 return 0;
509 }
510 }
511
512 CondCode
513 Converter::getCondCode(nir_op op)
514 {
515 switch (op) {
516 case nir_op_feq32:
517 case nir_op_ieq32:
518 return CC_EQ;
519 case nir_op_fge32:
520 case nir_op_ige32:
521 case nir_op_uge32:
522 return CC_GE;
523 case nir_op_flt32:
524 case nir_op_ilt32:
525 case nir_op_ult32:
526 return CC_LT;
527 case nir_op_fne32:
528 return CC_NEU;
529 case nir_op_ine32:
530 return CC_NE;
531 default:
532 ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
533 assert(false);
534 return CC_FL;
535 }
536 }
537
538 Converter::LValues&
539 Converter::convert(nir_alu_dest *dest)
540 {
541 return convert(&dest->dest);
542 }
543
544 Converter::LValues&
545 Converter::convert(nir_dest *dest)
546 {
547 if (dest->is_ssa)
548 return convert(&dest->ssa);
549 if (dest->reg.indirect) {
550 ERROR("no support for indirects.");
551 assert(false);
552 }
553 return convert(dest->reg.reg);
554 }
555
556 Converter::LValues&
557 Converter::convert(nir_register *reg)
558 {
559 NirDefMap::iterator it = regDefs.find(reg->index);
560 if (it != regDefs.end())
561 return it->second;
562
563 LValues newDef(reg->num_components);
564 for (uint8_t i = 0; i < reg->num_components; i++)
565 newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
566 return regDefs[reg->index] = newDef;
567 }
568
569 Converter::LValues&
570 Converter::convert(nir_ssa_def *def)
571 {
572 NirDefMap::iterator it = ssaDefs.find(def->index);
573 if (it != ssaDefs.end())
574 return it->second;
575
576 LValues newDef(def->num_components);
577 for (uint8_t i = 0; i < def->num_components; i++)
578 newDef[i] = getSSA(std::max(4, def->bit_size / 8));
579 return ssaDefs[def->index] = newDef;
580 }
581
582 Value*
583 Converter::getSrc(nir_alu_src *src, uint8_t component)
584 {
585 if (src->abs || src->negate) {
586 ERROR("modifiers currently not supported on nir_alu_src\n");
587 assert(false);
588 }
589 return getSrc(&src->src, src->swizzle[component]);
590 }
591
592 Value*
593 Converter::getSrc(nir_register *reg, uint8_t idx)
594 {
595 NirDefMap::iterator it = regDefs.find(reg->index);
596 if (it == regDefs.end())
597 return convert(reg)[idx];
598 return it->second[idx];
599 }
600
601 Value*
602 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
603 {
604 if (src->is_ssa)
605 return getSrc(src->ssa, idx);
606
607 if (src->reg.indirect) {
608 if (indirect)
609 return getSrc(src->reg.indirect, idx);
610 ERROR("no support for indirects.");
611 assert(false);
612 return NULL;
613 }
614
615 return getSrc(src->reg.reg, idx);
616 }
617
618 Value*
619 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
620 {
621 NirDefMap::iterator it = ssaDefs.find(src->index);
622 if (it == ssaDefs.end()) {
623 ERROR("SSA value %u not found\n", src->index);
624 assert(false);
625 return NULL;
626 }
627 return it->second[idx];
628 }
629
630 uint32_t
631 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
632 {
633 nir_const_value *offset = nir_src_as_const_value(*src);
634
635 if (offset) {
636 indirect = NULL;
637 return offset->u32[0];
638 }
639
640 indirect = getSrc(src, idx, true);
641 return 0;
642 }
643
644 uint32_t
645 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect)
646 {
647 int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
648 if (indirect)
649 indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
650 return idx;
651 }
652
653 static void
654 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
655 {
656 assert(name && index);
657
658 if (slot >= VERT_ATTRIB_MAX) {
659 ERROR("invalid varying slot %u\n", slot);
660 assert(false);
661 return;
662 }
663
664 if (slot >= VERT_ATTRIB_GENERIC0 &&
665 slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
666 *name = TGSI_SEMANTIC_GENERIC;
667 *index = slot - VERT_ATTRIB_GENERIC0;
668 return;
669 }
670
671 if (slot >= VERT_ATTRIB_TEX0 &&
672 slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
673 *name = TGSI_SEMANTIC_TEXCOORD;
674 *index = slot - VERT_ATTRIB_TEX0;
675 return;
676 }
677
678 switch (slot) {
679 case VERT_ATTRIB_COLOR0:
680 *name = TGSI_SEMANTIC_COLOR;
681 *index = 0;
682 break;
683 case VERT_ATTRIB_COLOR1:
684 *name = TGSI_SEMANTIC_COLOR;
685 *index = 1;
686 break;
687 case VERT_ATTRIB_EDGEFLAG:
688 *name = TGSI_SEMANTIC_EDGEFLAG;
689 *index = 0;
690 break;
691 case VERT_ATTRIB_FOG:
692 *name = TGSI_SEMANTIC_FOG;
693 *index = 0;
694 break;
695 case VERT_ATTRIB_NORMAL:
696 *name = TGSI_SEMANTIC_NORMAL;
697 *index = 0;
698 break;
699 case VERT_ATTRIB_POS:
700 *name = TGSI_SEMANTIC_POSITION;
701 *index = 0;
702 break;
703 case VERT_ATTRIB_POINT_SIZE:
704 *name = TGSI_SEMANTIC_PSIZE;
705 *index = 0;
706 break;
707 default:
708 ERROR("unknown vert attrib slot %u\n", slot);
709 assert(false);
710 break;
711 }
712 }
713
714 static void
715 varying_slot_to_tgsi_semantic(gl_varying_slot slot, unsigned *name, unsigned *index)
716 {
717 assert(name && index);
718
719 if (slot >= VARYING_SLOT_TESS_MAX) {
720 ERROR("invalid varying slot %u\n", slot);
721 assert(false);
722 return;
723 }
724
725 if (slot >= VARYING_SLOT_PATCH0) {
726 *name = TGSI_SEMANTIC_PATCH;
727 *index = slot - VARYING_SLOT_PATCH0;
728 return;
729 }
730
731 if (slot >= VARYING_SLOT_VAR0) {
732 *name = TGSI_SEMANTIC_GENERIC;
733 *index = slot - VARYING_SLOT_VAR0;
734 return;
735 }
736
737 if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
738 *name = TGSI_SEMANTIC_TEXCOORD;
739 *index = slot - VARYING_SLOT_TEX0;
740 return;
741 }
742
743 switch (slot) {
744 case VARYING_SLOT_BFC0:
745 *name = TGSI_SEMANTIC_BCOLOR;
746 *index = 0;
747 break;
748 case VARYING_SLOT_BFC1:
749 *name = TGSI_SEMANTIC_BCOLOR;
750 *index = 1;
751 break;
752 case VARYING_SLOT_CLIP_DIST0:
753 *name = TGSI_SEMANTIC_CLIPDIST;
754 *index = 0;
755 break;
756 case VARYING_SLOT_CLIP_DIST1:
757 *name = TGSI_SEMANTIC_CLIPDIST;
758 *index = 1;
759 break;
760 case VARYING_SLOT_CLIP_VERTEX:
761 *name = TGSI_SEMANTIC_CLIPVERTEX;
762 *index = 0;
763 break;
764 case VARYING_SLOT_COL0:
765 *name = TGSI_SEMANTIC_COLOR;
766 *index = 0;
767 break;
768 case VARYING_SLOT_COL1:
769 *name = TGSI_SEMANTIC_COLOR;
770 *index = 1;
771 break;
772 case VARYING_SLOT_EDGE:
773 *name = TGSI_SEMANTIC_EDGEFLAG;
774 *index = 0;
775 break;
776 case VARYING_SLOT_FACE:
777 *name = TGSI_SEMANTIC_FACE;
778 *index = 0;
779 break;
780 case VARYING_SLOT_FOGC:
781 *name = TGSI_SEMANTIC_FOG;
782 *index = 0;
783 break;
784 case VARYING_SLOT_LAYER:
785 *name = TGSI_SEMANTIC_LAYER;
786 *index = 0;
787 break;
788 case VARYING_SLOT_PNTC:
789 *name = TGSI_SEMANTIC_PCOORD;
790 *index = 0;
791 break;
792 case VARYING_SLOT_POS:
793 *name = TGSI_SEMANTIC_POSITION;
794 *index = 0;
795 break;
796 case VARYING_SLOT_PRIMITIVE_ID:
797 *name = TGSI_SEMANTIC_PRIMID;
798 *index = 0;
799 break;
800 case VARYING_SLOT_PSIZ:
801 *name = TGSI_SEMANTIC_PSIZE;
802 *index = 0;
803 break;
804 case VARYING_SLOT_TESS_LEVEL_INNER:
805 *name = TGSI_SEMANTIC_TESSINNER;
806 *index = 0;
807 break;
808 case VARYING_SLOT_TESS_LEVEL_OUTER:
809 *name = TGSI_SEMANTIC_TESSOUTER;
810 *index = 0;
811 break;
812 case VARYING_SLOT_VIEWPORT:
813 *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
814 *index = 0;
815 break;
816 default:
817 ERROR("unknown varying slot %u\n", slot);
818 assert(false);
819 break;
820 }
821 }
822
823 static void
824 frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
825 {
826 if (slot >= FRAG_RESULT_DATA0) {
827 *name = TGSI_SEMANTIC_COLOR;
828 *index = slot - FRAG_RESULT_COLOR - 2; // intentional
829 return;
830 }
831
832 switch (slot) {
833 case FRAG_RESULT_COLOR:
834 *name = TGSI_SEMANTIC_COLOR;
835 *index = 0;
836 break;
837 case FRAG_RESULT_DEPTH:
838 *name = TGSI_SEMANTIC_POSITION;
839 *index = 0;
840 break;
841 case FRAG_RESULT_SAMPLE_MASK:
842 *name = TGSI_SEMANTIC_SAMPLEMASK;
843 *index = 0;
844 break;
845 default:
846 ERROR("unknown frag result slot %u\n", slot);
847 assert(false);
848 break;
849 }
850 }
851
852 // copy of _mesa_sysval_to_semantic
853 static void
854 system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
855 {
856 *index = 0;
857 switch (val) {
858 // Vertex shader
859 case SYSTEM_VALUE_VERTEX_ID:
860 *name = TGSI_SEMANTIC_VERTEXID;
861 break;
862 case SYSTEM_VALUE_INSTANCE_ID:
863 *name = TGSI_SEMANTIC_INSTANCEID;
864 break;
865 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
866 *name = TGSI_SEMANTIC_VERTEXID_NOBASE;
867 break;
868 case SYSTEM_VALUE_BASE_VERTEX:
869 *name = TGSI_SEMANTIC_BASEVERTEX;
870 break;
871 case SYSTEM_VALUE_BASE_INSTANCE:
872 *name = TGSI_SEMANTIC_BASEINSTANCE;
873 break;
874 case SYSTEM_VALUE_DRAW_ID:
875 *name = TGSI_SEMANTIC_DRAWID;
876 break;
877
878 // Geometry shader
879 case SYSTEM_VALUE_INVOCATION_ID:
880 *name = TGSI_SEMANTIC_INVOCATIONID;
881 break;
882
883 // Fragment shader
884 case SYSTEM_VALUE_FRAG_COORD:
885 *name = TGSI_SEMANTIC_POSITION;
886 break;
887 case SYSTEM_VALUE_FRONT_FACE:
888 *name = TGSI_SEMANTIC_FACE;
889 break;
890 case SYSTEM_VALUE_SAMPLE_ID:
891 *name = TGSI_SEMANTIC_SAMPLEID;
892 break;
893 case SYSTEM_VALUE_SAMPLE_POS:
894 *name = TGSI_SEMANTIC_SAMPLEPOS;
895 break;
896 case SYSTEM_VALUE_SAMPLE_MASK_IN:
897 *name = TGSI_SEMANTIC_SAMPLEMASK;
898 break;
899 case SYSTEM_VALUE_HELPER_INVOCATION:
900 *name = TGSI_SEMANTIC_HELPER_INVOCATION;
901 break;
902
903 // Tessellation shader
904 case SYSTEM_VALUE_TESS_COORD:
905 *name = TGSI_SEMANTIC_TESSCOORD;
906 break;
907 case SYSTEM_VALUE_VERTICES_IN:
908 *name = TGSI_SEMANTIC_VERTICESIN;
909 break;
910 case SYSTEM_VALUE_PRIMITIVE_ID:
911 *name = TGSI_SEMANTIC_PRIMID;
912 break;
913 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
914 *name = TGSI_SEMANTIC_TESSOUTER;
915 break;
916 case SYSTEM_VALUE_TESS_LEVEL_INNER:
917 *name = TGSI_SEMANTIC_TESSINNER;
918 break;
919
920 // Compute shader
921 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
922 *name = TGSI_SEMANTIC_THREAD_ID;
923 break;
924 case SYSTEM_VALUE_WORK_GROUP_ID:
925 *name = TGSI_SEMANTIC_BLOCK_ID;
926 break;
927 case SYSTEM_VALUE_NUM_WORK_GROUPS:
928 *name = TGSI_SEMANTIC_GRID_SIZE;
929 break;
930 case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
931 *name = TGSI_SEMANTIC_BLOCK_SIZE;
932 break;
933
934 // ARB_shader_ballot
935 case SYSTEM_VALUE_SUBGROUP_SIZE:
936 *name = TGSI_SEMANTIC_SUBGROUP_SIZE;
937 break;
938 case SYSTEM_VALUE_SUBGROUP_INVOCATION:
939 *name = TGSI_SEMANTIC_SUBGROUP_INVOCATION;
940 break;
941 case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
942 *name = TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
943 break;
944 case SYSTEM_VALUE_SUBGROUP_GE_MASK:
945 *name = TGSI_SEMANTIC_SUBGROUP_GE_MASK;
946 break;
947 case SYSTEM_VALUE_SUBGROUP_GT_MASK:
948 *name = TGSI_SEMANTIC_SUBGROUP_GT_MASK;
949 break;
950 case SYSTEM_VALUE_SUBGROUP_LE_MASK:
951 *name = TGSI_SEMANTIC_SUBGROUP_LE_MASK;
952 break;
953 case SYSTEM_VALUE_SUBGROUP_LT_MASK:
954 *name = TGSI_SEMANTIC_SUBGROUP_LT_MASK;
955 break;
956
957 default:
958 ERROR("unknown system value %u\n", val);
959 assert(false);
960 break;
961 }
962 }
963
964 void
965 Converter::setInterpolate(nv50_ir_varying *var,
966 uint8_t mode,
967 bool centroid,
968 unsigned semantic)
969 {
970 switch (mode) {
971 case INTERP_MODE_FLAT:
972 var->flat = 1;
973 break;
974 case INTERP_MODE_NONE:
975 if (semantic == TGSI_SEMANTIC_COLOR)
976 var->sc = 1;
977 else if (semantic == TGSI_SEMANTIC_POSITION)
978 var->linear = 1;
979 break;
980 case INTERP_MODE_NOPERSPECTIVE:
981 var->linear = 1;
982 break;
983 case INTERP_MODE_SMOOTH:
984 break;
985 }
986 var->centroid = centroid;
987 }
988
989 static uint16_t
990 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
991 bool input, const nir_variable *var)
992 {
993 if (!type->is_array())
994 return type->count_attribute_slots(false);
995
996 uint16_t slots;
997 switch (stage) {
998 case Program::TYPE_GEOMETRY:
999 slots = type->uniform_locations();
1000 if (input)
1001 slots /= info.gs.vertices_in;
1002 break;
1003 case Program::TYPE_TESSELLATION_CONTROL:
1004 case Program::TYPE_TESSELLATION_EVAL:
1005 // remove first dimension
1006 if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
1007 slots = type->uniform_locations();
1008 else
1009 slots = type->fields.array->uniform_locations();
1010 break;
1011 default:
1012 slots = type->count_attribute_slots(false);
1013 break;
1014 }
1015
1016 return slots;
1017 }
1018
1019 bool Converter::assignSlots() {
1020 unsigned name;
1021 unsigned index;
1022
1023 info->io.viewportId = -1;
1024 info->numInputs = 0;
1025
1026 // we have to fixup the uniform locations for arrays
1027 unsigned numImages = 0;
1028 nir_foreach_variable(var, &nir->uniforms) {
1029 const glsl_type *type = var->type;
1030 if (!type->without_array()->is_image())
1031 continue;
1032 var->data.driver_location = numImages;
1033 numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
1034 }
1035
1036 nir_foreach_variable(var, &nir->inputs) {
1037 const glsl_type *type = var->type;
1038 int slot = var->data.location;
1039 uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
1040 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1041 : type->component_slots();
1042 uint32_t frac = var->data.location_frac;
1043 uint32_t vary = var->data.driver_location;
1044
1045 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1046 if (comp > 2)
1047 slots *= 2;
1048 }
1049
1050 assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
1051
1052 switch(prog->getType()) {
1053 case Program::TYPE_FRAGMENT:
1054 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1055 for (uint16_t i = 0; i < slots; ++i) {
1056 setInterpolate(&info->in[vary + i], var->data.interpolation,
1057 var->data.centroid | var->data.sample, name);
1058 }
1059 break;
1060 case Program::TYPE_GEOMETRY:
1061 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1062 break;
1063 case Program::TYPE_TESSELLATION_CONTROL:
1064 case Program::TYPE_TESSELLATION_EVAL:
1065 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1066 if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
1067 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1068 break;
1069 case Program::TYPE_VERTEX:
1070 vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1071 switch (name) {
1072 case TGSI_SEMANTIC_EDGEFLAG:
1073 info->io.edgeFlagIn = vary;
1074 break;
1075 default:
1076 break;
1077 }
1078 break;
1079 default:
1080 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1081 return false;
1082 }
1083
1084 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1085 info->in[vary].id = vary;
1086 info->in[vary].patch = var->data.patch;
1087 info->in[vary].sn = name;
1088 info->in[vary].si = index + i;
1089 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1090 if (i & 0x1)
1091 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1092 else
1093 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1094 else
1095 info->in[vary].mask |= ((1 << comp) - 1) << frac;
1096 }
1097 info->numInputs = std::max<uint8_t>(info->numInputs, vary);
1098 }
1099
1100 info->numOutputs = 0;
1101 nir_foreach_variable(var, &nir->outputs) {
1102 const glsl_type *type = var->type;
1103 int slot = var->data.location;
1104 uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1105 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1106 : type->component_slots();
1107 uint32_t frac = var->data.location_frac;
1108 uint32_t vary = var->data.driver_location;
1109
1110 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1111 if (comp > 2)
1112 slots *= 2;
1113 }
1114
1115 assert(vary < PIPE_MAX_SHADER_OUTPUTS);
1116
1117 switch(prog->getType()) {
1118 case Program::TYPE_FRAGMENT:
1119 frag_result_to_tgsi_semantic((gl_frag_result)slot, &name, &index);
1120 switch (name) {
1121 case TGSI_SEMANTIC_COLOR:
1122 if (!var->data.fb_fetch_output)
1123 info->prop.fp.numColourResults++;
1124 info->prop.fp.separateFragData = true;
1125 // sometimes we get FRAG_RESULT_DATAX with data.index 0
1126 // sometimes we get FRAG_RESULT_DATA0 with data.index X
1127 index = index == 0 ? var->data.index : index;
1128 break;
1129 case TGSI_SEMANTIC_POSITION:
1130 info->io.fragDepth = vary;
1131 info->prop.fp.writesDepth = true;
1132 break;
1133 case TGSI_SEMANTIC_SAMPLEMASK:
1134 info->io.sampleMask = vary;
1135 break;
1136 default:
1137 break;
1138 }
1139 break;
1140 case Program::TYPE_GEOMETRY:
1141 case Program::TYPE_TESSELLATION_CONTROL:
1142 case Program::TYPE_TESSELLATION_EVAL:
1143 case Program::TYPE_VERTEX:
1144 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1145
1146 if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1147 name != TGSI_SEMANTIC_TESSOUTER)
1148 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1149
1150 switch (name) {
1151 case TGSI_SEMANTIC_CLIPDIST:
1152 info->io.genUserClip = -1;
1153 break;
1154 case TGSI_SEMANTIC_CLIPVERTEX:
1155 clipVertexOutput = vary;
1156 break;
1157 case TGSI_SEMANTIC_EDGEFLAG:
1158 info->io.edgeFlagOut = vary;
1159 break;
1160 case TGSI_SEMANTIC_POSITION:
1161 if (clipVertexOutput < 0)
1162 clipVertexOutput = vary;
1163 break;
1164 default:
1165 break;
1166 }
1167 break;
1168 default:
1169 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1170 return false;
1171 }
1172
1173 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1174 info->out[vary].id = vary;
1175 info->out[vary].patch = var->data.patch;
1176 info->out[vary].sn = name;
1177 info->out[vary].si = index + i;
1178 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1179 if (i & 0x1)
1180 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1181 else
1182 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1183 else
1184 info->out[vary].mask |= ((1 << comp) - 1) << frac;
1185
1186 if (nir->info.outputs_read & 1ll << slot)
1187 info->out[vary].oread = 1;
1188 }
1189 info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
1190 }
1191
1192 info->numSysVals = 0;
1193 for (uint8_t i = 0; i < 64; ++i) {
1194 if (!(nir->info.system_values_read & 1ll << i))
1195 continue;
1196
1197 system_val_to_tgsi_semantic(i, &name, &index);
1198 info->sv[info->numSysVals].sn = name;
1199 info->sv[info->numSysVals].si = index;
1200 info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
1201
1202 switch (i) {
1203 case SYSTEM_VALUE_INSTANCE_ID:
1204 info->io.instanceId = info->numSysVals;
1205 break;
1206 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1207 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1208 info->sv[info->numSysVals].patch = 1;
1209 break;
1210 case SYSTEM_VALUE_VERTEX_ID:
1211 info->io.vertexId = info->numSysVals;
1212 break;
1213 default:
1214 break;
1215 }
1216
1217 info->numSysVals += 1;
1218 }
1219
1220 if (info->io.genUserClip > 0) {
1221 info->io.clipDistances = info->io.genUserClip;
1222
1223 const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1224
1225 for (unsigned int n = 0; n < nOut; ++n) {
1226 unsigned int i = info->numOutputs++;
1227 info->out[i].id = i;
1228 info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1229 info->out[i].si = n;
1230 info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1231 }
1232 }
1233
1234 return info->assignSlots(info) == 0;
1235 }
1236
1237 uint32_t
1238 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1239 {
1240 DataType ty;
1241 int offset = nir_intrinsic_component(insn);
1242 bool input;
1243
1244 if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1245 ty = getDType(insn);
1246 else
1247 ty = getSType(insn->src[0], false, false);
1248
1249 switch (insn->intrinsic) {
1250 case nir_intrinsic_load_input:
1251 case nir_intrinsic_load_interpolated_input:
1252 case nir_intrinsic_load_per_vertex_input:
1253 input = true;
1254 break;
1255 case nir_intrinsic_load_output:
1256 case nir_intrinsic_load_per_vertex_output:
1257 case nir_intrinsic_store_output:
1258 case nir_intrinsic_store_per_vertex_output:
1259 input = false;
1260 break;
1261 default:
1262 ERROR("unknown intrinsic in getSlotAddress %s",
1263 nir_intrinsic_infos[insn->intrinsic].name);
1264 input = false;
1265 assert(false);
1266 break;
1267 }
1268
1269 if (typeSizeof(ty) == 8) {
1270 slot *= 2;
1271 slot += offset;
1272 if (slot >= 4) {
1273 idx += 1;
1274 slot -= 4;
1275 }
1276 } else {
1277 slot += offset;
1278 }
1279
1280 assert(slot < 4);
1281 assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
1282 assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
1283
1284 const nv50_ir_varying *vary = input ? info->in : info->out;
1285 return vary[idx].slot[slot] * 4;
1286 }
1287
1288 Instruction *
1289 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1290 uint32_t base, uint8_t c, Value *indirect0,
1291 Value *indirect1, bool patch)
1292 {
1293 unsigned int tySize = typeSizeof(ty);
1294
1295 if (tySize == 8 &&
1296 (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
1297 Value *lo = getSSA();
1298 Value *hi = getSSA();
1299
1300 Instruction *loi =
1301 mkLoad(TYPE_U32, lo,
1302 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1303 indirect0);
1304 loi->setIndirect(0, 1, indirect1);
1305 loi->perPatch = patch;
1306
1307 Instruction *hii =
1308 mkLoad(TYPE_U32, hi,
1309 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1310 indirect0);
1311 hii->setIndirect(0, 1, indirect1);
1312 hii->perPatch = patch;
1313
1314 return mkOp2(OP_MERGE, ty, def, lo, hi);
1315 } else {
1316 Instruction *ld =
1317 mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1318 ld->setIndirect(0, 1, indirect1);
1319 ld->perPatch = patch;
1320 return ld;
1321 }
1322 }
1323
1324 void
1325 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1326 DataType ty, Value *src, uint8_t idx, uint8_t c,
1327 Value *indirect0, Value *indirect1)
1328 {
1329 uint8_t size = typeSizeof(ty);
1330 uint32_t address = getSlotAddress(insn, idx, c);
1331
1332 if (size == 8 && indirect0) {
1333 Value *split[2];
1334 mkSplit(split, 4, src);
1335
1336 if (op == OP_EXPORT) {
1337 split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1338 split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1339 }
1340
1341 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1342 split[0])->perPatch = info->out[idx].patch;
1343 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1344 split[1])->perPatch = info->out[idx].patch;
1345 } else {
1346 if (op == OP_EXPORT)
1347 src = mkMov(getSSA(size), src, ty)->getDef(0);
1348 mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1349 src)->perPatch = info->out[idx].patch;
1350 }
1351 }
1352
1353 bool
1354 Converter::parseNIR()
1355 {
1356 info->io.clipDistances = nir->info.clip_distance_array_size;
1357 info->io.cullDistances = nir->info.cull_distance_array_size;
1358
1359 switch(prog->getType()) {
1360 case Program::TYPE_COMPUTE:
1361 info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
1362 info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
1363 info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
1364 info->bin.smemSize = nir->info.cs.shared_size;
1365 break;
1366 case Program::TYPE_FRAGMENT:
1367 info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1368 info->prop.fp.persampleInvocation =
1369 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
1370 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1371 info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1372 info->prop.fp.readsSampleLocations =
1373 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1374 info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
1375 info->prop.fp.usesSampleMaskIn =
1376 !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
1377 break;
1378 case Program::TYPE_GEOMETRY:
1379 info->prop.gp.inputPrim = nir->info.gs.input_primitive;
1380 info->prop.gp.instanceCount = nir->info.gs.invocations;
1381 info->prop.gp.maxVertices = nir->info.gs.vertices_out;
1382 info->prop.gp.outputPrim = nir->info.gs.output_primitive;
1383 break;
1384 case Program::TYPE_TESSELLATION_CONTROL:
1385 case Program::TYPE_TESSELLATION_EVAL:
1386 if (nir->info.tess.primitive_mode == GL_ISOLINES)
1387 info->prop.tp.domain = GL_LINES;
1388 else
1389 info->prop.tp.domain = nir->info.tess.primitive_mode;
1390 info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1391 info->prop.tp.outputPrim =
1392 nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1393 info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1394 info->prop.tp.winding = !nir->info.tess.ccw;
1395 break;
1396 case Program::TYPE_VERTEX:
1397 info->prop.vp.usesDrawParameters =
1398 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
1399 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
1400 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
1401 break;
1402 default:
1403 break;
1404 }
1405
1406 return true;
1407 }
1408
1409 bool
1410 Converter::visit(nir_function *function)
1411 {
1412 // we only support emiting the main function for now
1413 assert(!strcmp(function->name, "main"));
1414 assert(function->impl);
1415
1416 // usually the blocks will set everything up, but main is special
1417 BasicBlock *entry = new BasicBlock(prog->main);
1418 exit = new BasicBlock(prog->main);
1419 blocks[nir_start_block(function->impl)->index] = entry;
1420 prog->main->setEntry(entry);
1421 prog->main->setExit(exit);
1422
1423 setPosition(entry, true);
1424
1425 if (info->io.genUserClip > 0) {
1426 for (int c = 0; c < 4; ++c)
1427 clipVtx[c] = getScratch();
1428 }
1429
1430 switch (prog->getType()) {
1431 case Program::TYPE_TESSELLATION_CONTROL:
1432 outBase = mkOp2v(
1433 OP_SUB, TYPE_U32, getSSA(),
1434 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1435 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1436 break;
1437 case Program::TYPE_FRAGMENT: {
1438 Symbol *sv = mkSysVal(SV_POSITION, 3);
1439 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1440 fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1441 break;
1442 }
1443 default:
1444 break;
1445 }
1446
1447 nir_index_ssa_defs(function->impl);
1448 foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1449 if (!visit(node))
1450 return false;
1451 }
1452
1453 bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1454 setPosition(exit, true);
1455
1456 if (info->io.genUserClip > 0)
1457 handleUserClipPlanes();
1458
1459 // TODO: for non main function this needs to be a OP_RETURN
1460 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1461 return true;
1462 }
1463
1464 bool
1465 Converter::visit(nir_cf_node *node)
1466 {
1467 switch (node->type) {
1468 case nir_cf_node_block:
1469 return visit(nir_cf_node_as_block(node));
1470 case nir_cf_node_if:
1471 return visit(nir_cf_node_as_if(node));
1472 case nir_cf_node_loop:
1473 return visit(nir_cf_node_as_loop(node));
1474 default:
1475 ERROR("unknown nir_cf_node type %u\n", node->type);
1476 return false;
1477 }
1478 }
1479
1480 bool
1481 Converter::visit(nir_block *block)
1482 {
1483 if (!block->predecessors->entries && block->instr_list.is_empty())
1484 return true;
1485
1486 BasicBlock *bb = convert(block);
1487
1488 setPosition(bb, true);
1489 nir_foreach_instr(insn, block) {
1490 if (!visit(insn))
1491 return false;
1492 }
1493 return true;
1494 }
1495
1496 bool
1497 Converter::visit(nir_if *nif)
1498 {
1499 DataType sType = getSType(nif->condition, false, false);
1500 Value *src = getSrc(&nif->condition, 0);
1501
1502 nir_block *lastThen = nir_if_last_then_block(nif);
1503 nir_block *lastElse = nir_if_last_else_block(nif);
1504
1505 assert(!lastThen->successors[1]);
1506 assert(!lastElse->successors[1]);
1507
1508 BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1509 BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1510
1511 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1512 bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1513
1514 // we only insert joinats, if both nodes end up at the end of the if again.
1515 // the reason for this to not happens are breaks/continues/ret/... which
1516 // have their own handling
1517 if (lastThen->successors[0] == lastElse->successors[0])
1518 bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
1519 CC_ALWAYS, NULL);
1520
1521 mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1522
1523 foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1524 if (!visit(node))
1525 return false;
1526 }
1527 setPosition(convert(lastThen), true);
1528 if (!bb->getExit() ||
1529 !bb->getExit()->asFlow() ||
1530 bb->getExit()->asFlow()->op == OP_JOIN) {
1531 BasicBlock *tailBB = convert(lastThen->successors[0]);
1532 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1533 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1534 }
1535
1536 foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1537 if (!visit(node))
1538 return false;
1539 }
1540 setPosition(convert(lastElse), true);
1541 if (!bb->getExit() ||
1542 !bb->getExit()->asFlow() ||
1543 bb->getExit()->asFlow()->op == OP_JOIN) {
1544 BasicBlock *tailBB = convert(lastElse->successors[0]);
1545 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1546 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1547 }
1548
1549 if (lastThen->successors[0] == lastElse->successors[0]) {
1550 setPosition(convert(lastThen->successors[0]), true);
1551 mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1552 }
1553
1554 return true;
1555 }
1556
1557 bool
1558 Converter::visit(nir_loop *loop)
1559 {
1560 curLoopDepth += 1;
1561 func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1562
1563 BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1564 BasicBlock *tailBB =
1565 convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1566 bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1567
1568 mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1569 setPosition(loopBB, false);
1570 mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1571
1572 foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1573 if (!visit(node))
1574 return false;
1575 }
1576 Instruction *insn = bb->getExit();
1577 if (bb->cfg.incidentCount() != 0) {
1578 if (!insn || !insn->asFlow()) {
1579 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1580 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1581 } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
1582 tailBB->cfg.incidentCount() == 0) {
1583 // RA doesn't like having blocks around with no incident edge,
1584 // so we create a fake one to make it happy
1585 bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1586 }
1587 }
1588
1589 curLoopDepth -= 1;
1590
1591 return true;
1592 }
1593
1594 bool
1595 Converter::visit(nir_instr *insn)
1596 {
1597 switch (insn->type) {
1598 case nir_instr_type_alu:
1599 return visit(nir_instr_as_alu(insn));
1600 case nir_instr_type_intrinsic:
1601 return visit(nir_instr_as_intrinsic(insn));
1602 case nir_instr_type_jump:
1603 return visit(nir_instr_as_jump(insn));
1604 case nir_instr_type_load_const:
1605 return visit(nir_instr_as_load_const(insn));
1606 case nir_instr_type_ssa_undef:
1607 return visit(nir_instr_as_ssa_undef(insn));
1608 case nir_instr_type_tex:
1609 return visit(nir_instr_as_tex(insn));
1610 default:
1611 ERROR("unknown nir_instr type %u\n", insn->type);
1612 return false;
1613 }
1614 return true;
1615 }
1616
1617 SVSemantic
1618 Converter::convert(nir_intrinsic_op intr)
1619 {
1620 switch (intr) {
1621 case nir_intrinsic_load_base_vertex:
1622 return SV_BASEVERTEX;
1623 case nir_intrinsic_load_base_instance:
1624 return SV_BASEINSTANCE;
1625 case nir_intrinsic_load_draw_id:
1626 return SV_DRAWID;
1627 case nir_intrinsic_load_front_face:
1628 return SV_FACE;
1629 case nir_intrinsic_load_helper_invocation:
1630 return SV_THREAD_KILL;
1631 case nir_intrinsic_load_instance_id:
1632 return SV_INSTANCE_ID;
1633 case nir_intrinsic_load_invocation_id:
1634 return SV_INVOCATION_ID;
1635 case nir_intrinsic_load_local_group_size:
1636 return SV_NTID;
1637 case nir_intrinsic_load_local_invocation_id:
1638 return SV_TID;
1639 case nir_intrinsic_load_num_work_groups:
1640 return SV_NCTAID;
1641 case nir_intrinsic_load_patch_vertices_in:
1642 return SV_VERTEX_COUNT;
1643 case nir_intrinsic_load_primitive_id:
1644 return SV_PRIMITIVE_ID;
1645 case nir_intrinsic_load_sample_id:
1646 return SV_SAMPLE_INDEX;
1647 case nir_intrinsic_load_sample_mask_in:
1648 return SV_SAMPLE_MASK;
1649 case nir_intrinsic_load_sample_pos:
1650 return SV_SAMPLE_POS;
1651 case nir_intrinsic_load_subgroup_eq_mask:
1652 return SV_LANEMASK_EQ;
1653 case nir_intrinsic_load_subgroup_ge_mask:
1654 return SV_LANEMASK_GE;
1655 case nir_intrinsic_load_subgroup_gt_mask:
1656 return SV_LANEMASK_GT;
1657 case nir_intrinsic_load_subgroup_le_mask:
1658 return SV_LANEMASK_LE;
1659 case nir_intrinsic_load_subgroup_lt_mask:
1660 return SV_LANEMASK_LT;
1661 case nir_intrinsic_load_subgroup_invocation:
1662 return SV_LANEID;
1663 case nir_intrinsic_load_tess_coord:
1664 return SV_TESS_COORD;
1665 case nir_intrinsic_load_tess_level_inner:
1666 return SV_TESS_INNER;
1667 case nir_intrinsic_load_tess_level_outer:
1668 return SV_TESS_OUTER;
1669 case nir_intrinsic_load_vertex_id:
1670 return SV_VERTEX_ID;
1671 case nir_intrinsic_load_work_group_id:
1672 return SV_CTAID;
1673 default:
1674 ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1675 nir_intrinsic_infos[intr].name);
1676 assert(false);
1677 return SV_LAST;
1678 }
1679 }
1680
1681 bool
1682 Converter::visit(nir_intrinsic_instr *insn)
1683 {
1684 nir_intrinsic_op op = insn->intrinsic;
1685
1686 switch (op) {
1687 case nir_intrinsic_load_uniform: {
1688 LValues &newDefs = convert(&insn->dest);
1689 const DataType dType = getDType(insn);
1690 Value *indirect;
1691 uint32_t coffset = getIndirect(insn, 0, 0, indirect);
1692 for (uint8_t i = 0; i < insn->num_components; ++i) {
1693 loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
1694 }
1695 break;
1696 }
1697 case nir_intrinsic_store_output:
1698 case nir_intrinsic_store_per_vertex_output: {
1699 Value *indirect;
1700 DataType dType = getSType(insn->src[0], false, false);
1701 uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1702
1703 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1704 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1705 continue;
1706
1707 uint8_t offset = 0;
1708 Value *src = getSrc(&insn->src[0], i);
1709 switch (prog->getType()) {
1710 case Program::TYPE_FRAGMENT: {
1711 if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1712 // TGSI uses a different interface than NIR, TGSI stores that
1713 // value in the z component, NIR in X
1714 offset += 2;
1715 src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1716 }
1717 break;
1718 }
1719 case Program::TYPE_VERTEX: {
1720 if (info->io.genUserClip > 0 && idx == clipVertexOutput) {
1721 mkMov(clipVtx[i], src);
1722 src = clipVtx[i];
1723 }
1724 break;
1725 }
1726 default:
1727 break;
1728 }
1729
1730 storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1731 }
1732 break;
1733 }
1734 case nir_intrinsic_load_input:
1735 case nir_intrinsic_load_interpolated_input:
1736 case nir_intrinsic_load_output: {
1737 LValues &newDefs = convert(&insn->dest);
1738
1739 // FBFetch
1740 if (prog->getType() == Program::TYPE_FRAGMENT &&
1741 op == nir_intrinsic_load_output) {
1742 std::vector<Value*> defs, srcs;
1743 uint8_t mask = 0;
1744
1745 srcs.push_back(getSSA());
1746 srcs.push_back(getSSA());
1747 Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1748 Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1749 mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1750 mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1751
1752 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1753 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1754
1755 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1756 defs.push_back(newDefs[i]);
1757 mask |= 1 << i;
1758 }
1759
1760 TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
1761 texi->tex.levelZero = 1;
1762 texi->tex.mask = mask;
1763 texi->tex.useOffsets = 0;
1764 texi->tex.r = 0xffff;
1765 texi->tex.s = 0xffff;
1766
1767 info->prop.fp.readsFramebuffer = true;
1768 break;
1769 }
1770
1771 const DataType dType = getDType(insn);
1772 Value *indirect;
1773 bool input = op != nir_intrinsic_load_output;
1774 operation nvirOp;
1775 uint32_t mode = 0;
1776
1777 uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
1778 nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
1779
1780 // see load_barycentric_* handling
1781 if (prog->getType() == Program::TYPE_FRAGMENT) {
1782 mode = translateInterpMode(&vary, nvirOp);
1783 if (op == nir_intrinsic_load_interpolated_input) {
1784 ImmediateValue immMode;
1785 if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
1786 mode |= immMode.reg.data.u32;
1787 }
1788 }
1789
1790 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1791 uint32_t address = getSlotAddress(insn, idx, i);
1792 Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
1793 if (prog->getType() == Program::TYPE_FRAGMENT) {
1794 int s = 1;
1795 if (typeSizeof(dType) == 8) {
1796 Value *lo = getSSA();
1797 Value *hi = getSSA();
1798 Instruction *interp;
1799
1800 interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
1801 if (nvirOp == OP_PINTERP)
1802 interp->setSrc(s++, fp.position);
1803 if (mode & NV50_IR_INTERP_OFFSET)
1804 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1805 interp->setInterpolate(mode);
1806 interp->setIndirect(0, 0, indirect);
1807
1808 Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
1809 interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
1810 if (nvirOp == OP_PINTERP)
1811 interp->setSrc(s++, fp.position);
1812 if (mode & NV50_IR_INTERP_OFFSET)
1813 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1814 interp->setInterpolate(mode);
1815 interp->setIndirect(0, 0, indirect);
1816
1817 mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
1818 } else {
1819 Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
1820 if (nvirOp == OP_PINTERP)
1821 interp->setSrc(s++, fp.position);
1822 if (mode & NV50_IR_INTERP_OFFSET)
1823 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1824 interp->setInterpolate(mode);
1825 interp->setIndirect(0, 0, indirect);
1826 }
1827 } else {
1828 mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
1829 }
1830 }
1831 break;
1832 }
1833 case nir_intrinsic_load_barycentric_at_offset:
1834 case nir_intrinsic_load_barycentric_at_sample:
1835 case nir_intrinsic_load_barycentric_centroid:
1836 case nir_intrinsic_load_barycentric_pixel:
1837 case nir_intrinsic_load_barycentric_sample: {
1838 LValues &newDefs = convert(&insn->dest);
1839 uint32_t mode;
1840
1841 if (op == nir_intrinsic_load_barycentric_centroid ||
1842 op == nir_intrinsic_load_barycentric_sample) {
1843 mode = NV50_IR_INTERP_CENTROID;
1844 } else if (op == nir_intrinsic_load_barycentric_at_offset) {
1845 Value *offs[2];
1846 for (uint8_t c = 0; c < 2; c++) {
1847 offs[c] = getScratch();
1848 mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
1849 mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
1850 mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
1851 mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
1852 }
1853 mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
1854
1855 mode = NV50_IR_INTERP_OFFSET;
1856 } else if (op == nir_intrinsic_load_barycentric_pixel) {
1857 mode = NV50_IR_INTERP_DEFAULT;
1858 } else if (op == nir_intrinsic_load_barycentric_at_sample) {
1859 info->prop.fp.readsSampleLocations = true;
1860 mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
1861 mode = NV50_IR_INTERP_OFFSET;
1862 } else {
1863 unreachable("all intrinsics already handled above");
1864 }
1865
1866 loadImm(newDefs[1], mode);
1867 break;
1868 }
1869 case nir_intrinsic_discard:
1870 mkOp(OP_DISCARD, TYPE_NONE, NULL);
1871 break;
1872 case nir_intrinsic_discard_if: {
1873 Value *pred = getSSA(1, FILE_PREDICATE);
1874 if (insn->num_components > 1) {
1875 ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
1876 assert(false);
1877 return false;
1878 }
1879 mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1880 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
1881 break;
1882 }
1883 case nir_intrinsic_load_base_vertex:
1884 case nir_intrinsic_load_base_instance:
1885 case nir_intrinsic_load_draw_id:
1886 case nir_intrinsic_load_front_face:
1887 case nir_intrinsic_load_helper_invocation:
1888 case nir_intrinsic_load_instance_id:
1889 case nir_intrinsic_load_invocation_id:
1890 case nir_intrinsic_load_local_group_size:
1891 case nir_intrinsic_load_local_invocation_id:
1892 case nir_intrinsic_load_num_work_groups:
1893 case nir_intrinsic_load_patch_vertices_in:
1894 case nir_intrinsic_load_primitive_id:
1895 case nir_intrinsic_load_sample_id:
1896 case nir_intrinsic_load_sample_mask_in:
1897 case nir_intrinsic_load_sample_pos:
1898 case nir_intrinsic_load_subgroup_eq_mask:
1899 case nir_intrinsic_load_subgroup_ge_mask:
1900 case nir_intrinsic_load_subgroup_gt_mask:
1901 case nir_intrinsic_load_subgroup_le_mask:
1902 case nir_intrinsic_load_subgroup_lt_mask:
1903 case nir_intrinsic_load_subgroup_invocation:
1904 case nir_intrinsic_load_tess_coord:
1905 case nir_intrinsic_load_tess_level_inner:
1906 case nir_intrinsic_load_tess_level_outer:
1907 case nir_intrinsic_load_vertex_id:
1908 case nir_intrinsic_load_work_group_id: {
1909 const DataType dType = getDType(insn);
1910 SVSemantic sv = convert(op);
1911 LValues &newDefs = convert(&insn->dest);
1912
1913 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1914 Value *def;
1915 if (typeSizeof(dType) == 8)
1916 def = getSSA();
1917 else
1918 def = newDefs[i];
1919
1920 if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
1921 loadImm(def, 0u);
1922 } else {
1923 Symbol *sym = mkSysVal(sv, i);
1924 Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
1925 if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
1926 rdsv->perPatch = 1;
1927 }
1928
1929 if (typeSizeof(dType) == 8)
1930 mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
1931 }
1932 break;
1933 }
1934 // constants
1935 case nir_intrinsic_load_subgroup_size: {
1936 LValues &newDefs = convert(&insn->dest);
1937 loadImm(newDefs[0], 32u);
1938 break;
1939 }
1940 case nir_intrinsic_vote_all:
1941 case nir_intrinsic_vote_any:
1942 case nir_intrinsic_vote_ieq: {
1943 LValues &newDefs = convert(&insn->dest);
1944 Value *pred = getScratch(1, FILE_PREDICATE);
1945 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1946 mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
1947 mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
1948 break;
1949 }
1950 case nir_intrinsic_ballot: {
1951 LValues &newDefs = convert(&insn->dest);
1952 Value *pred = getSSA(1, FILE_PREDICATE);
1953 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1954 mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;
1955 break;
1956 }
1957 case nir_intrinsic_read_first_invocation:
1958 case nir_intrinsic_read_invocation: {
1959 LValues &newDefs = convert(&insn->dest);
1960 const DataType dType = getDType(insn);
1961 Value *tmp = getScratch();
1962
1963 if (op == nir_intrinsic_read_first_invocation) {
1964 mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
1965 mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
1966 mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
1967 } else
1968 tmp = getSrc(&insn->src[1], 0);
1969
1970 for (uint8_t i = 0; i < insn->num_components; ++i) {
1971 mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
1972 ->subOp = NV50_IR_SUBOP_SHFL_IDX;
1973 }
1974 break;
1975 }
1976 default:
1977 ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
1978 return false;
1979 }
1980
1981 return true;
1982 }
1983
1984 bool
1985 Converter::visit(nir_jump_instr *insn)
1986 {
1987 switch (insn->type) {
1988 case nir_jump_return:
1989 // TODO: this only works in the main function
1990 mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
1991 bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
1992 break;
1993 case nir_jump_break:
1994 case nir_jump_continue: {
1995 bool isBreak = insn->type == nir_jump_break;
1996 nir_block *block = insn->instr.block;
1997 assert(!block->successors[1]);
1998 BasicBlock *target = convert(block->successors[0]);
1999 mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
2000 bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
2001 break;
2002 }
2003 default:
2004 ERROR("unknown nir_jump_type %u\n", insn->type);
2005 return false;
2006 }
2007
2008 return true;
2009 }
2010
2011 bool
2012 Converter::visit(nir_load_const_instr *insn)
2013 {
2014 assert(insn->def.bit_size <= 64);
2015
2016 LValues &newDefs = convert(&insn->def);
2017 for (int i = 0; i < insn->def.num_components; i++) {
2018 switch (insn->def.bit_size) {
2019 case 64:
2020 loadImm(newDefs[i], insn->value.u64[i]);
2021 break;
2022 case 32:
2023 loadImm(newDefs[i], insn->value.u32[i]);
2024 break;
2025 case 16:
2026 loadImm(newDefs[i], insn->value.u16[i]);
2027 break;
2028 case 8:
2029 loadImm(newDefs[i], insn->value.u8[i]);
2030 break;
2031 }
2032 }
2033 return true;
2034 }
2035
2036 #define DEFAULT_CHECKS \
2037 if (insn->dest.dest.ssa.num_components > 1) { \
2038 ERROR("nir_alu_instr only supported with 1 component!\n"); \
2039 return false; \
2040 } \
2041 if (insn->dest.write_mask != 1) { \
2042 ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2043 return false; \
2044 }
2045 bool
2046 Converter::visit(nir_alu_instr *insn)
2047 {
2048 const nir_op op = insn->op;
2049 const nir_op_info &info = nir_op_infos[op];
2050 DataType dType = getDType(insn);
2051 const std::vector<DataType> sTypes = getSTypes(insn);
2052
2053 Instruction *oldPos = this->bb->getExit();
2054
2055 switch (op) {
2056 case nir_op_fabs:
2057 case nir_op_iabs:
2058 case nir_op_fadd:
2059 case nir_op_iadd:
2060 case nir_op_fand:
2061 case nir_op_iand:
2062 case nir_op_fceil:
2063 case nir_op_fcos:
2064 case nir_op_fddx:
2065 case nir_op_fddx_coarse:
2066 case nir_op_fddx_fine:
2067 case nir_op_fddy:
2068 case nir_op_fddy_coarse:
2069 case nir_op_fddy_fine:
2070 case nir_op_fdiv:
2071 case nir_op_idiv:
2072 case nir_op_udiv:
2073 case nir_op_fexp2:
2074 case nir_op_ffloor:
2075 case nir_op_ffma:
2076 case nir_op_flog2:
2077 case nir_op_fmax:
2078 case nir_op_imax:
2079 case nir_op_umax:
2080 case nir_op_fmin:
2081 case nir_op_imin:
2082 case nir_op_umin:
2083 case nir_op_fmod:
2084 case nir_op_imod:
2085 case nir_op_umod:
2086 case nir_op_fmul:
2087 case nir_op_imul:
2088 case nir_op_imul_high:
2089 case nir_op_umul_high:
2090 case nir_op_fneg:
2091 case nir_op_ineg:
2092 case nir_op_fnot:
2093 case nir_op_inot:
2094 case nir_op_for:
2095 case nir_op_ior:
2096 case nir_op_pack_64_2x32_split:
2097 case nir_op_fpow:
2098 case nir_op_frcp:
2099 case nir_op_frem:
2100 case nir_op_irem:
2101 case nir_op_frsq:
2102 case nir_op_fsat:
2103 case nir_op_ishr:
2104 case nir_op_ushr:
2105 case nir_op_fsin:
2106 case nir_op_fsqrt:
2107 case nir_op_fsub:
2108 case nir_op_isub:
2109 case nir_op_ftrunc:
2110 case nir_op_ishl:
2111 case nir_op_fxor:
2112 case nir_op_ixor: {
2113 DEFAULT_CHECKS;
2114 LValues &newDefs = convert(&insn->dest);
2115 operation preOp = preOperationNeeded(op);
2116 if (preOp != OP_NOP) {
2117 assert(info.num_inputs < 2);
2118 Value *tmp = getSSA(typeSizeof(dType));
2119 Instruction *i0 = mkOp(preOp, dType, tmp);
2120 Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2121 if (info.num_inputs) {
2122 i0->setSrc(0, getSrc(&insn->src[0]));
2123 i1->setSrc(0, tmp);
2124 }
2125 i1->subOp = getSubOp(op);
2126 } else {
2127 Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2128 for (unsigned s = 0u; s < info.num_inputs; ++s) {
2129 i->setSrc(s, getSrc(&insn->src[s]));
2130 }
2131 i->subOp = getSubOp(op);
2132 }
2133 break;
2134 }
2135 case nir_op_ifind_msb:
2136 case nir_op_ufind_msb: {
2137 DEFAULT_CHECKS;
2138 LValues &newDefs = convert(&insn->dest);
2139 dType = sTypes[0];
2140 mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2141 break;
2142 }
2143 case nir_op_fround_even: {
2144 DEFAULT_CHECKS;
2145 LValues &newDefs = convert(&insn->dest);
2146 mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2147 break;
2148 }
2149 // convert instructions
2150 case nir_op_f2f32:
2151 case nir_op_f2i32:
2152 case nir_op_f2u32:
2153 case nir_op_i2f32:
2154 case nir_op_i2i32:
2155 case nir_op_u2f32:
2156 case nir_op_u2u32:
2157 case nir_op_f2f64:
2158 case nir_op_f2i64:
2159 case nir_op_f2u64:
2160 case nir_op_i2f64:
2161 case nir_op_i2i64:
2162 case nir_op_u2f64:
2163 case nir_op_u2u64: {
2164 DEFAULT_CHECKS;
2165 LValues &newDefs = convert(&insn->dest);
2166 Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2167 if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
2168 i->rnd = ROUND_Z;
2169 i->sType = sTypes[0];
2170 break;
2171 }
2172 // compare instructions
2173 case nir_op_feq32:
2174 case nir_op_ieq32:
2175 case nir_op_fge32:
2176 case nir_op_ige32:
2177 case nir_op_uge32:
2178 case nir_op_flt32:
2179 case nir_op_ilt32:
2180 case nir_op_ult32:
2181 case nir_op_fne32:
2182 case nir_op_ine32: {
2183 DEFAULT_CHECKS;
2184 LValues &newDefs = convert(&insn->dest);
2185 Instruction *i = mkCmp(getOperation(op),
2186 getCondCode(op),
2187 dType,
2188 newDefs[0],
2189 dType,
2190 getSrc(&insn->src[0]),
2191 getSrc(&insn->src[1]));
2192 if (info.num_inputs == 3)
2193 i->setSrc(2, getSrc(&insn->src[2]));
2194 i->sType = sTypes[0];
2195 break;
2196 }
2197 // those are weird ALU ops and need special handling, because
2198 // 1. they are always componend based
2199 // 2. they basically just merge multiple values into one data type
2200 case nir_op_imov:
2201 case nir_op_fmov:
2202 case nir_op_vec2:
2203 case nir_op_vec3:
2204 case nir_op_vec4: {
2205 LValues &newDefs = convert(&insn->dest);
2206 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2207 mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2208 }
2209 break;
2210 }
2211 // (un)pack
2212 case nir_op_pack_64_2x32: {
2213 LValues &newDefs = convert(&insn->dest);
2214 Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2215 merge->setSrc(0, getSrc(&insn->src[0], 0));
2216 merge->setSrc(1, getSrc(&insn->src[0], 1));
2217 break;
2218 }
2219 case nir_op_pack_half_2x16_split: {
2220 LValues &newDefs = convert(&insn->dest);
2221 Value *tmpH = getSSA();
2222 Value *tmpL = getSSA();
2223
2224 mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2225 mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2226 mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2227 break;
2228 }
2229 case nir_op_unpack_half_2x16_split_x:
2230 case nir_op_unpack_half_2x16_split_y: {
2231 LValues &newDefs = convert(&insn->dest);
2232 Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2233 if (op == nir_op_unpack_half_2x16_split_y)
2234 cvt->subOp = 1;
2235 break;
2236 }
2237 case nir_op_unpack_64_2x32: {
2238 LValues &newDefs = convert(&insn->dest);
2239 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2240 break;
2241 }
2242 case nir_op_unpack_64_2x32_split_x: {
2243 LValues &newDefs = convert(&insn->dest);
2244 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
2245 break;
2246 }
2247 case nir_op_unpack_64_2x32_split_y: {
2248 LValues &newDefs = convert(&insn->dest);
2249 mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
2250 break;
2251 }
2252 // special instructions
2253 case nir_op_fsign:
2254 case nir_op_isign: {
2255 DEFAULT_CHECKS;
2256 DataType iType;
2257 if (::isFloatType(dType))
2258 iType = TYPE_F32;
2259 else
2260 iType = TYPE_S32;
2261
2262 LValues &newDefs = convert(&insn->dest);
2263 LValue *val0 = getScratch();
2264 LValue *val1 = getScratch();
2265 mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
2266 mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
2267
2268 if (dType == TYPE_F64) {
2269 mkOp2(OP_SUB, iType, val0, val0, val1);
2270 mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
2271 } else if (dType == TYPE_S64 || dType == TYPE_U64) {
2272 mkOp2(OP_SUB, iType, val0, val1, val0);
2273 mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
2274 mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
2275 } else if (::isFloatType(dType))
2276 mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
2277 else
2278 mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
2279 break;
2280 }
2281 case nir_op_fcsel:
2282 case nir_op_b32csel: {
2283 DEFAULT_CHECKS;
2284 LValues &newDefs = convert(&insn->dest);
2285 mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
2286 break;
2287 }
2288 case nir_op_ibitfield_extract:
2289 case nir_op_ubitfield_extract: {
2290 DEFAULT_CHECKS;
2291 Value *tmp = getSSA();
2292 LValues &newDefs = convert(&insn->dest);
2293 mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2294 mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
2295 break;
2296 }
2297 case nir_op_bfm: {
2298 DEFAULT_CHECKS;
2299 LValues &newDefs = convert(&insn->dest);
2300 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2301 break;
2302 }
2303 case nir_op_bitfield_insert: {
2304 DEFAULT_CHECKS;
2305 LValues &newDefs = convert(&insn->dest);
2306 LValue *temp = getSSA();
2307 mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
2308 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
2309 break;
2310 }
2311 case nir_op_bit_count: {
2312 DEFAULT_CHECKS;
2313 LValues &newDefs = convert(&insn->dest);
2314 mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
2315 break;
2316 }
2317 case nir_op_bitfield_reverse: {
2318 DEFAULT_CHECKS;
2319 LValues &newDefs = convert(&insn->dest);
2320 mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2321 break;
2322 }
2323 case nir_op_find_lsb: {
2324 DEFAULT_CHECKS;
2325 LValues &newDefs = convert(&insn->dest);
2326 Value *tmp = getSSA();
2327 mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2328 mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2329 break;
2330 }
2331 // boolean conversions
2332 case nir_op_b2f32: {
2333 DEFAULT_CHECKS;
2334 LValues &newDefs = convert(&insn->dest);
2335 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
2336 break;
2337 }
2338 case nir_op_b2f64: {
2339 DEFAULT_CHECKS;
2340 LValues &newDefs = convert(&insn->dest);
2341 Value *tmp = getSSA(4);
2342 mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
2343 mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
2344 break;
2345 }
2346 case nir_op_f2b32:
2347 case nir_op_i2b32: {
2348 DEFAULT_CHECKS;
2349 LValues &newDefs = convert(&insn->dest);
2350 Value *src1;
2351 if (typeSizeof(sTypes[0]) == 8) {
2352 src1 = loadImm(getSSA(8), 0.0);
2353 } else {
2354 src1 = zero;
2355 }
2356 CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
2357 mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
2358 break;
2359 }
2360 case nir_op_b2i32: {
2361 DEFAULT_CHECKS;
2362 LValues &newDefs = convert(&insn->dest);
2363 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
2364 break;
2365 }
2366 case nir_op_b2i64: {
2367 DEFAULT_CHECKS;
2368 LValues &newDefs = convert(&insn->dest);
2369 LValue *def = getScratch();
2370 mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
2371 mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
2372 break;
2373 }
2374 default:
2375 ERROR("unknown nir_op %s\n", info.name);
2376 return false;
2377 }
2378
2379 if (!oldPos) {
2380 oldPos = this->bb->getEntry();
2381 oldPos->precise = insn->exact;
2382 }
2383
2384 if (unlikely(!oldPos))
2385 return true;
2386
2387 while (oldPos->next) {
2388 oldPos = oldPos->next;
2389 oldPos->precise = insn->exact;
2390 }
2391 oldPos->saturate = insn->dest.saturate;
2392
2393 return true;
2394 }
2395 #undef DEFAULT_CHECKS
2396
2397 bool
2398 Converter::visit(nir_ssa_undef_instr *insn)
2399 {
2400 LValues &newDefs = convert(&insn->def);
2401 for (uint8_t i = 0u; i < insn->def.num_components; ++i) {
2402 mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
2403 }
2404 return true;
2405 }
2406
2407 #define CASE_SAMPLER(ty) \
2408 case GLSL_SAMPLER_DIM_ ## ty : \
2409 if (isArray && !isShadow) \
2410 return TEX_TARGET_ ## ty ## _ARRAY; \
2411 else if (!isArray && isShadow) \
2412 return TEX_TARGET_## ty ## _SHADOW; \
2413 else if (isArray && isShadow) \
2414 return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
2415 else \
2416 return TEX_TARGET_ ## ty
2417
2418 TexTarget
2419 Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
2420 {
2421 switch (dim) {
2422 CASE_SAMPLER(1D);
2423 CASE_SAMPLER(2D);
2424 CASE_SAMPLER(CUBE);
2425 case GLSL_SAMPLER_DIM_3D:
2426 return TEX_TARGET_3D;
2427 case GLSL_SAMPLER_DIM_MS:
2428 if (isArray)
2429 return TEX_TARGET_2D_MS_ARRAY;
2430 return TEX_TARGET_2D_MS;
2431 case GLSL_SAMPLER_DIM_RECT:
2432 if (isShadow)
2433 return TEX_TARGET_RECT_SHADOW;
2434 return TEX_TARGET_RECT;
2435 case GLSL_SAMPLER_DIM_BUF:
2436 return TEX_TARGET_BUFFER;
2437 case GLSL_SAMPLER_DIM_EXTERNAL:
2438 return TEX_TARGET_2D;
2439 default:
2440 ERROR("unknown glsl_sampler_dim %u\n", dim);
2441 assert(false);
2442 return TEX_TARGET_COUNT;
2443 }
2444 }
2445 #undef CASE_SAMPLER
2446
2447 Value*
2448 Converter::applyProjection(Value *src, Value *proj)
2449 {
2450 if (!proj)
2451 return src;
2452 return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj);
2453 }
2454
2455 bool
2456 Converter::visit(nir_tex_instr *insn)
2457 {
2458 switch (insn->op) {
2459 case nir_texop_lod:
2460 case nir_texop_query_levels:
2461 case nir_texop_tex:
2462 case nir_texop_texture_samples:
2463 case nir_texop_tg4:
2464 case nir_texop_txb:
2465 case nir_texop_txd:
2466 case nir_texop_txf:
2467 case nir_texop_txf_ms:
2468 case nir_texop_txl:
2469 case nir_texop_txs: {
2470 LValues &newDefs = convert(&insn->dest);
2471 std::vector<Value*> srcs;
2472 std::vector<Value*> defs;
2473 std::vector<nir_src*> offsets;
2474 uint8_t mask = 0;
2475 bool lz = false;
2476 Value *proj = NULL;
2477 TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow);
2478 operation op = getOperation(insn->op);
2479
2480 int r, s;
2481 int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias);
2482 int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator);
2483 int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord);
2484 int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx);
2485 int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy);
2486 int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index);
2487 int lodIdx = nir_tex_instr_src_index(insn, nir_tex_src_lod);
2488 int offsetIdx = nir_tex_instr_src_index(insn, nir_tex_src_offset);
2489 int projIdx = nir_tex_instr_src_index(insn, nir_tex_src_projector);
2490 int sampOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_offset);
2491 int texOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_offset);
2492
2493 if (projIdx != -1)
2494 proj = mkOp1v(OP_RCP, TYPE_F32, getScratch(), getSrc(&insn->src[projIdx].src, 0));
2495
2496 srcs.resize(insn->coord_components);
2497 for (uint8_t i = 0u; i < insn->coord_components; ++i)
2498 srcs[i] = applyProjection(getSrc(&insn->src[coordsIdx].src, i), proj);
2499
2500 // sometimes we get less args than target.getArgCount, but codegen expects the latter
2501 if (insn->coord_components) {
2502 uint32_t argCount = target.getArgCount();
2503
2504 if (target.isMS())
2505 argCount -= 1;
2506
2507 for (uint32_t i = 0u; i < (argCount - insn->coord_components); ++i)
2508 srcs.push_back(getSSA());
2509 }
2510
2511 if (insn->op == nir_texop_texture_samples)
2512 srcs.push_back(zero);
2513 else if (!insn->num_srcs)
2514 srcs.push_back(loadImm(NULL, 0));
2515 if (biasIdx != -1)
2516 srcs.push_back(getSrc(&insn->src[biasIdx].src, 0));
2517 if (lodIdx != -1)
2518 srcs.push_back(getSrc(&insn->src[lodIdx].src, 0));
2519 else if (op == OP_TXF)
2520 lz = true;
2521 if (msIdx != -1)
2522 srcs.push_back(getSrc(&insn->src[msIdx].src, 0));
2523 if (offsetIdx != -1)
2524 offsets.push_back(&insn->src[offsetIdx].src);
2525 if (compIdx != -1)
2526 srcs.push_back(applyProjection(getSrc(&insn->src[compIdx].src, 0), proj));
2527 if (texOffIdx != -1) {
2528 srcs.push_back(getSrc(&insn->src[texOffIdx].src, 0));
2529 texOffIdx = srcs.size() - 1;
2530 }
2531 if (sampOffIdx != -1) {
2532 srcs.push_back(getSrc(&insn->src[sampOffIdx].src, 0));
2533 sampOffIdx = srcs.size() - 1;
2534 }
2535
2536 r = insn->texture_index;
2537 s = insn->sampler_index;
2538
2539 defs.resize(newDefs.size());
2540 for (uint8_t d = 0u; d < newDefs.size(); ++d) {
2541 defs[d] = newDefs[d];
2542 mask |= 1 << d;
2543 }
2544 if (target.isMS() || (op == OP_TEX && prog->getType() != Program::TYPE_FRAGMENT))
2545 lz = true;
2546
2547 TexInstruction *texi = mkTex(op, target.getEnum(), r, s, defs, srcs);
2548 texi->tex.levelZero = lz;
2549 texi->tex.mask = mask;
2550
2551 if (texOffIdx != -1)
2552 texi->tex.rIndirectSrc = texOffIdx;
2553 if (sampOffIdx != -1)
2554 texi->tex.sIndirectSrc = sampOffIdx;
2555
2556 switch (insn->op) {
2557 case nir_texop_tg4:
2558 if (!target.isShadow())
2559 texi->tex.gatherComp = insn->component;
2560 break;
2561 case nir_texop_txs:
2562 texi->tex.query = TXQ_DIMS;
2563 break;
2564 case nir_texop_texture_samples:
2565 texi->tex.mask = 0x4;
2566 texi->tex.query = TXQ_TYPE;
2567 break;
2568 case nir_texop_query_levels:
2569 texi->tex.mask = 0x8;
2570 texi->tex.query = TXQ_DIMS;
2571 break;
2572 default:
2573 break;
2574 }
2575
2576 texi->tex.useOffsets = offsets.size();
2577 if (texi->tex.useOffsets) {
2578 for (uint8_t s = 0; s < texi->tex.useOffsets; ++s) {
2579 for (uint32_t c = 0u; c < 3; ++c) {
2580 uint8_t s2 = std::min(c, target.getDim() - 1);
2581 texi->offset[s][c].set(getSrc(offsets[s], s2));
2582 texi->offset[s][c].setInsn(texi);
2583 }
2584 }
2585 }
2586
2587 if (ddxIdx != -1 && ddyIdx != -1) {
2588 for (uint8_t c = 0u; c < target.getDim() + target.isCube(); ++c) {
2589 texi->dPdx[c].set(getSrc(&insn->src[ddxIdx].src, c));
2590 texi->dPdy[c].set(getSrc(&insn->src[ddyIdx].src, c));
2591 }
2592 }
2593
2594 break;
2595 }
2596 default:
2597 ERROR("unknown nir_texop %u\n", insn->op);
2598 return false;
2599 }
2600 return true;
2601 }
2602
2603 bool
2604 Converter::run()
2605 {
2606 bool progress;
2607
2608 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
2609 nir_print_shader(nir, stderr);
2610
2611 struct nir_lower_subgroups_options subgroup_options = {
2612 .subgroup_size = 32,
2613 .ballot_bit_size = 32,
2614 };
2615
2616 NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
2617 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
2618 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
2619 NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
2620 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
2621 NIR_PASS_V(nir, nir_lower_alu_to_scalar);
2622 NIR_PASS_V(nir, nir_lower_phis_to_scalar);
2623
2624 do {
2625 progress = false;
2626 NIR_PASS(progress, nir, nir_copy_prop);
2627 NIR_PASS(progress, nir, nir_opt_remove_phis);
2628 NIR_PASS(progress, nir, nir_opt_trivial_continues);
2629 NIR_PASS(progress, nir, nir_opt_cse);
2630 NIR_PASS(progress, nir, nir_opt_algebraic);
2631 NIR_PASS(progress, nir, nir_opt_constant_folding);
2632 NIR_PASS(progress, nir, nir_copy_prop);
2633 NIR_PASS(progress, nir, nir_opt_dce);
2634 NIR_PASS(progress, nir, nir_opt_dead_cf);
2635 } while (progress);
2636
2637 NIR_PASS_V(nir, nir_lower_bool_to_int32);
2638 NIR_PASS_V(nir, nir_lower_locals_to_regs);
2639 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
2640 NIR_PASS_V(nir, nir_convert_from_ssa, true);
2641
2642 // Garbage collect dead instructions
2643 nir_sweep(nir);
2644
2645 if (!parseNIR()) {
2646 ERROR("Couldn't prase NIR!\n");
2647 return false;
2648 }
2649
2650 if (!assignSlots()) {
2651 ERROR("Couldn't assign slots!\n");
2652 return false;
2653 }
2654
2655 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
2656 nir_print_shader(nir, stderr);
2657
2658 nir_foreach_function(function, nir) {
2659 if (!visit(function))
2660 return false;
2661 }
2662
2663 return true;
2664 }
2665
2666 } // unnamed namespace
2667
2668 namespace nv50_ir {
2669
2670 bool
2671 Program::makeFromNIR(struct nv50_ir_prog_info *info)
2672 {
2673 nir_shader *nir = (nir_shader*)info->bin.source;
2674 Converter converter(this, nir, info);
2675 bool result = converter.run();
2676 if (!result)
2677 return result;
2678 LoweringHelper lowering;
2679 lowering.run(this);
2680 tlsSize = info->bin.tlsSpace;
2681 return result;
2682 }
2683
2684 } // namespace nv50_ir