nv50/ir/nir: don't assert on !main
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_from_nir.cpp
1 /*
2 * Copyright 2017 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Karol Herbst <kherbst@redhat.com>
23 */
24
25 #include "compiler/nir/nir.h"
26
27 #include "util/u_debug.h"
28
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_from_common.h"
31 #include "codegen/nv50_ir_lowering_helper.h"
32 #include "codegen/nv50_ir_util.h"
33
34 #if __cplusplus >= 201103L
35 #include <unordered_map>
36 #else
37 #include <tr1/unordered_map>
38 #endif
39 #include <cstring>
40 #include <list>
41 #include <vector>
42
43 namespace {
44
45 #if __cplusplus >= 201103L
46 using std::hash;
47 using std::unordered_map;
48 #else
49 using std::tr1::hash;
50 using std::tr1::unordered_map;
51 #endif
52
53 using namespace nv50_ir;
54
55 int
56 type_size(const struct glsl_type *type, bool bindless)
57 {
58 return glsl_count_attribute_slots(type, false);
59 }
60
61 class Converter : public ConverterCommon
62 {
63 public:
64 Converter(Program *, nir_shader *, nv50_ir_prog_info *);
65
66 bool run();
67 private:
68 typedef std::vector<LValue*> LValues;
69 typedef unordered_map<unsigned, LValues> NirDefMap;
70 typedef unordered_map<unsigned, nir_load_const_instr*> ImmediateMap;
71 typedef unordered_map<unsigned, uint32_t> NirArrayLMemOffsets;
72 typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
73
74 CacheMode convert(enum gl_access_qualifier);
75 TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
76 LValues& convert(nir_alu_dest *);
77 BasicBlock* convert(nir_block *);
78 LValues& convert(nir_dest *);
79 SVSemantic convert(nir_intrinsic_op);
80 Value* convert(nir_load_const_instr*, uint8_t);
81 LValues& convert(nir_register *);
82 LValues& convert(nir_ssa_def *);
83
84 ImgFormat convertGLImgFormat(GLuint);
85
86 Value* getSrc(nir_alu_src *, uint8_t component = 0);
87 Value* getSrc(nir_register *, uint8_t);
88 Value* getSrc(nir_src *, uint8_t, bool indirect = false);
89 Value* getSrc(nir_ssa_def *, uint8_t);
90
91 // returned value is the constant part of the given source (either the
92 // nir_src or the selected source component of an intrinsic). Even though
93 // this is mostly an optimization to be able to skip indirects in a few
94 // cases, sometimes we require immediate values or set some fileds on
95 // instructions (e.g. tex) in order for codegen to consume those.
96 // If the found value has not a constant part, the Value gets returned
97 // through the Value parameter.
98 uint32_t getIndirect(nir_src *, uint8_t, Value *&);
99 uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&);
100
101 uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
102
103 void setInterpolate(nv50_ir_varying *,
104 uint8_t,
105 bool centroid,
106 unsigned semantics);
107
108 Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
109 uint8_t c, Value *indirect0 = NULL,
110 Value *indirect1 = NULL, bool patch = false);
111 void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
112 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
113 Value *indirect1 = NULL);
114
115 bool isFloatType(nir_alu_type);
116 bool isSignedType(nir_alu_type);
117 bool isResultFloat(nir_op);
118 bool isResultSigned(nir_op);
119
120 DataType getDType(nir_alu_instr *);
121 DataType getDType(nir_intrinsic_instr *);
122 DataType getDType(nir_intrinsic_instr *, bool isSigned);
123 DataType getDType(nir_op, uint8_t);
124
125 std::vector<DataType> getSTypes(nir_alu_instr *);
126 DataType getSType(nir_src &, bool isFloat, bool isSigned);
127
128 operation getOperation(nir_intrinsic_op);
129 operation getOperation(nir_op);
130 operation getOperation(nir_texop);
131 operation preOperationNeeded(nir_op);
132
133 int getSubOp(nir_intrinsic_op);
134 int getSubOp(nir_op);
135
136 CondCode getCondCode(nir_op);
137
138 bool assignSlots();
139 bool parseNIR();
140
141 bool visit(nir_alu_instr *);
142 bool visit(nir_block *);
143 bool visit(nir_cf_node *);
144 bool visit(nir_deref_instr *);
145 bool visit(nir_function *);
146 bool visit(nir_if *);
147 bool visit(nir_instr *);
148 bool visit(nir_intrinsic_instr *);
149 bool visit(nir_jump_instr *);
150 bool visit(nir_load_const_instr*);
151 bool visit(nir_loop *);
152 bool visit(nir_ssa_undef_instr *);
153 bool visit(nir_tex_instr *);
154
155 // tex stuff
156 Value* applyProjection(Value *src, Value *proj);
157 unsigned int getNIRArgCount(TexInstruction::Target&);
158
159 // image stuff
160 uint16_t handleDeref(nir_deref_instr *, Value * & indirect, const nir_variable * &);
161 CacheMode getCacheModeFromVar(const nir_variable *);
162
163 nir_shader *nir;
164
165 NirDefMap ssaDefs;
166 NirDefMap regDefs;
167 ImmediateMap immediates;
168 NirArrayLMemOffsets regToLmemOffset;
169 NirBlockMap blocks;
170 unsigned int curLoopDepth;
171
172 BasicBlock *exit;
173 Value *zero;
174 Instruction *immInsertPos;
175
176 int clipVertexOutput;
177
178 union {
179 struct {
180 Value *position;
181 } fp;
182 };
183 };
184
185 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
186 : ConverterCommon(prog, info),
187 nir(nir),
188 curLoopDepth(0),
189 clipVertexOutput(-1)
190 {
191 zero = mkImm((uint32_t)0);
192 }
193
194 BasicBlock *
195 Converter::convert(nir_block *block)
196 {
197 NirBlockMap::iterator it = blocks.find(block->index);
198 if (it != blocks.end())
199 return it->second;
200
201 BasicBlock *bb = new BasicBlock(func);
202 blocks[block->index] = bb;
203 return bb;
204 }
205
206 bool
207 Converter::isFloatType(nir_alu_type type)
208 {
209 return nir_alu_type_get_base_type(type) == nir_type_float;
210 }
211
212 bool
213 Converter::isSignedType(nir_alu_type type)
214 {
215 return nir_alu_type_get_base_type(type) == nir_type_int;
216 }
217
218 bool
219 Converter::isResultFloat(nir_op op)
220 {
221 const nir_op_info &info = nir_op_infos[op];
222 if (info.output_type != nir_type_invalid)
223 return isFloatType(info.output_type);
224
225 ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
226 assert(false);
227 return true;
228 }
229
230 bool
231 Converter::isResultSigned(nir_op op)
232 {
233 switch (op) {
234 // there is no umul and we get wrong results if we treat all muls as signed
235 case nir_op_imul:
236 case nir_op_inot:
237 return false;
238 default:
239 const nir_op_info &info = nir_op_infos[op];
240 if (info.output_type != nir_type_invalid)
241 return isSignedType(info.output_type);
242 ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
243 assert(false);
244 return true;
245 }
246 }
247
248 DataType
249 Converter::getDType(nir_alu_instr *insn)
250 {
251 if (insn->dest.dest.is_ssa)
252 return getDType(insn->op, insn->dest.dest.ssa.bit_size);
253 else
254 return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
255 }
256
257 DataType
258 Converter::getDType(nir_intrinsic_instr *insn)
259 {
260 bool isSigned;
261 switch (insn->intrinsic) {
262 case nir_intrinsic_shared_atomic_imax:
263 case nir_intrinsic_shared_atomic_imin:
264 case nir_intrinsic_ssbo_atomic_imax:
265 case nir_intrinsic_ssbo_atomic_imin:
266 isSigned = true;
267 break;
268 default:
269 isSigned = false;
270 break;
271 }
272
273 return getDType(insn, isSigned);
274 }
275
276 DataType
277 Converter::getDType(nir_intrinsic_instr *insn, bool isSigned)
278 {
279 if (insn->dest.is_ssa)
280 return typeOfSize(insn->dest.ssa.bit_size / 8, false, isSigned);
281 else
282 return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, isSigned);
283 }
284
285 DataType
286 Converter::getDType(nir_op op, uint8_t bitSize)
287 {
288 DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
289 if (ty == TYPE_NONE) {
290 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
291 assert(false);
292 }
293 return ty;
294 }
295
296 std::vector<DataType>
297 Converter::getSTypes(nir_alu_instr *insn)
298 {
299 const nir_op_info &info = nir_op_infos[insn->op];
300 std::vector<DataType> res(info.num_inputs);
301
302 for (uint8_t i = 0; i < info.num_inputs; ++i) {
303 if (info.input_types[i] != nir_type_invalid) {
304 res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
305 } else {
306 ERROR("getSType not implemented for %s idx %u\n", info.name, i);
307 assert(false);
308 res[i] = TYPE_NONE;
309 break;
310 }
311 }
312
313 return res;
314 }
315
316 DataType
317 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
318 {
319 uint8_t bitSize;
320 if (src.is_ssa)
321 bitSize = src.ssa->bit_size;
322 else
323 bitSize = src.reg.reg->bit_size;
324
325 DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
326 if (ty == TYPE_NONE) {
327 const char *str;
328 if (isFloat)
329 str = "float";
330 else if (isSigned)
331 str = "int";
332 else
333 str = "uint";
334 ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
335 assert(false);
336 }
337 return ty;
338 }
339
340 operation
341 Converter::getOperation(nir_op op)
342 {
343 switch (op) {
344 // basic ops with float and int variants
345 case nir_op_fabs:
346 case nir_op_iabs:
347 return OP_ABS;
348 case nir_op_fadd:
349 case nir_op_iadd:
350 return OP_ADD;
351 case nir_op_iand:
352 return OP_AND;
353 case nir_op_ifind_msb:
354 case nir_op_ufind_msb:
355 return OP_BFIND;
356 case nir_op_fceil:
357 return OP_CEIL;
358 case nir_op_fcos:
359 return OP_COS;
360 case nir_op_f2f32:
361 case nir_op_f2f64:
362 case nir_op_f2i32:
363 case nir_op_f2i64:
364 case nir_op_f2u32:
365 case nir_op_f2u64:
366 case nir_op_i2f32:
367 case nir_op_i2f64:
368 case nir_op_i2i32:
369 case nir_op_i2i64:
370 case nir_op_u2f32:
371 case nir_op_u2f64:
372 case nir_op_u2u32:
373 case nir_op_u2u64:
374 return OP_CVT;
375 case nir_op_fddx:
376 case nir_op_fddx_coarse:
377 case nir_op_fddx_fine:
378 return OP_DFDX;
379 case nir_op_fddy:
380 case nir_op_fddy_coarse:
381 case nir_op_fddy_fine:
382 return OP_DFDY;
383 case nir_op_fdiv:
384 case nir_op_idiv:
385 case nir_op_udiv:
386 return OP_DIV;
387 case nir_op_fexp2:
388 return OP_EX2;
389 case nir_op_ffloor:
390 return OP_FLOOR;
391 case nir_op_ffma:
392 return OP_FMA;
393 case nir_op_flog2:
394 return OP_LG2;
395 case nir_op_fmax:
396 case nir_op_imax:
397 case nir_op_umax:
398 return OP_MAX;
399 case nir_op_pack_64_2x32_split:
400 return OP_MERGE;
401 case nir_op_fmin:
402 case nir_op_imin:
403 case nir_op_umin:
404 return OP_MIN;
405 case nir_op_fmod:
406 case nir_op_imod:
407 case nir_op_umod:
408 case nir_op_frem:
409 case nir_op_irem:
410 return OP_MOD;
411 case nir_op_fmul:
412 case nir_op_imul:
413 case nir_op_imul_high:
414 case nir_op_umul_high:
415 return OP_MUL;
416 case nir_op_fneg:
417 case nir_op_ineg:
418 return OP_NEG;
419 case nir_op_inot:
420 return OP_NOT;
421 case nir_op_ior:
422 return OP_OR;
423 case nir_op_fpow:
424 return OP_POW;
425 case nir_op_frcp:
426 return OP_RCP;
427 case nir_op_frsq:
428 return OP_RSQ;
429 case nir_op_fsat:
430 return OP_SAT;
431 case nir_op_feq32:
432 case nir_op_ieq32:
433 case nir_op_fge32:
434 case nir_op_ige32:
435 case nir_op_uge32:
436 case nir_op_flt32:
437 case nir_op_ilt32:
438 case nir_op_ult32:
439 case nir_op_fne32:
440 case nir_op_ine32:
441 return OP_SET;
442 case nir_op_ishl:
443 return OP_SHL;
444 case nir_op_ishr:
445 case nir_op_ushr:
446 return OP_SHR;
447 case nir_op_fsin:
448 return OP_SIN;
449 case nir_op_fsqrt:
450 return OP_SQRT;
451 case nir_op_fsub:
452 case nir_op_isub:
453 return OP_SUB;
454 case nir_op_ftrunc:
455 return OP_TRUNC;
456 case nir_op_ixor:
457 return OP_XOR;
458 default:
459 ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
460 assert(false);
461 return OP_NOP;
462 }
463 }
464
465 operation
466 Converter::getOperation(nir_texop op)
467 {
468 switch (op) {
469 case nir_texop_tex:
470 return OP_TEX;
471 case nir_texop_lod:
472 return OP_TXLQ;
473 case nir_texop_txb:
474 return OP_TXB;
475 case nir_texop_txd:
476 return OP_TXD;
477 case nir_texop_txf:
478 case nir_texop_txf_ms:
479 return OP_TXF;
480 case nir_texop_tg4:
481 return OP_TXG;
482 case nir_texop_txl:
483 return OP_TXL;
484 case nir_texop_query_levels:
485 case nir_texop_texture_samples:
486 case nir_texop_txs:
487 return OP_TXQ;
488 default:
489 ERROR("couldn't get operation for nir_texop %u\n", op);
490 assert(false);
491 return OP_NOP;
492 }
493 }
494
495 operation
496 Converter::getOperation(nir_intrinsic_op op)
497 {
498 switch (op) {
499 case nir_intrinsic_emit_vertex:
500 return OP_EMIT;
501 case nir_intrinsic_end_primitive:
502 return OP_RESTART;
503 case nir_intrinsic_bindless_image_atomic_add:
504 case nir_intrinsic_image_atomic_add:
505 case nir_intrinsic_image_deref_atomic_add:
506 case nir_intrinsic_bindless_image_atomic_and:
507 case nir_intrinsic_image_atomic_and:
508 case nir_intrinsic_image_deref_atomic_and:
509 case nir_intrinsic_bindless_image_atomic_comp_swap:
510 case nir_intrinsic_image_atomic_comp_swap:
511 case nir_intrinsic_image_deref_atomic_comp_swap:
512 case nir_intrinsic_bindless_image_atomic_exchange:
513 case nir_intrinsic_image_atomic_exchange:
514 case nir_intrinsic_image_deref_atomic_exchange:
515 case nir_intrinsic_bindless_image_atomic_max:
516 case nir_intrinsic_image_atomic_max:
517 case nir_intrinsic_image_deref_atomic_max:
518 case nir_intrinsic_bindless_image_atomic_min:
519 case nir_intrinsic_image_atomic_min:
520 case nir_intrinsic_image_deref_atomic_min:
521 case nir_intrinsic_bindless_image_atomic_or:
522 case nir_intrinsic_image_atomic_or:
523 case nir_intrinsic_image_deref_atomic_or:
524 case nir_intrinsic_bindless_image_atomic_xor:
525 case nir_intrinsic_image_atomic_xor:
526 case nir_intrinsic_image_deref_atomic_xor:
527 return OP_SUREDP;
528 case nir_intrinsic_bindless_image_load:
529 case nir_intrinsic_image_load:
530 case nir_intrinsic_image_deref_load:
531 return OP_SULDP;
532 case nir_intrinsic_bindless_image_samples:
533 case nir_intrinsic_image_samples:
534 case nir_intrinsic_image_deref_samples:
535 case nir_intrinsic_bindless_image_size:
536 case nir_intrinsic_image_size:
537 case nir_intrinsic_image_deref_size:
538 return OP_SUQ;
539 case nir_intrinsic_bindless_image_store:
540 case nir_intrinsic_image_store:
541 case nir_intrinsic_image_deref_store:
542 return OP_SUSTP;
543 default:
544 ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
545 assert(false);
546 return OP_NOP;
547 }
548 }
549
550 operation
551 Converter::preOperationNeeded(nir_op op)
552 {
553 switch (op) {
554 case nir_op_fcos:
555 case nir_op_fsin:
556 return OP_PRESIN;
557 default:
558 return OP_NOP;
559 }
560 }
561
562 int
563 Converter::getSubOp(nir_op op)
564 {
565 switch (op) {
566 case nir_op_imul_high:
567 case nir_op_umul_high:
568 return NV50_IR_SUBOP_MUL_HIGH;
569 default:
570 return 0;
571 }
572 }
573
574 int
575 Converter::getSubOp(nir_intrinsic_op op)
576 {
577 switch (op) {
578 case nir_intrinsic_bindless_image_atomic_add:
579 case nir_intrinsic_image_atomic_add:
580 case nir_intrinsic_image_deref_atomic_add:
581 case nir_intrinsic_shared_atomic_add:
582 case nir_intrinsic_ssbo_atomic_add:
583 return NV50_IR_SUBOP_ATOM_ADD;
584 case nir_intrinsic_bindless_image_atomic_and:
585 case nir_intrinsic_image_atomic_and:
586 case nir_intrinsic_image_deref_atomic_and:
587 case nir_intrinsic_shared_atomic_and:
588 case nir_intrinsic_ssbo_atomic_and:
589 return NV50_IR_SUBOP_ATOM_AND;
590 case nir_intrinsic_bindless_image_atomic_comp_swap:
591 case nir_intrinsic_image_atomic_comp_swap:
592 case nir_intrinsic_image_deref_atomic_comp_swap:
593 case nir_intrinsic_shared_atomic_comp_swap:
594 case nir_intrinsic_ssbo_atomic_comp_swap:
595 return NV50_IR_SUBOP_ATOM_CAS;
596 case nir_intrinsic_bindless_image_atomic_exchange:
597 case nir_intrinsic_image_atomic_exchange:
598 case nir_intrinsic_image_deref_atomic_exchange:
599 case nir_intrinsic_shared_atomic_exchange:
600 case nir_intrinsic_ssbo_atomic_exchange:
601 return NV50_IR_SUBOP_ATOM_EXCH;
602 case nir_intrinsic_bindless_image_atomic_or:
603 case nir_intrinsic_image_atomic_or:
604 case nir_intrinsic_image_deref_atomic_or:
605 case nir_intrinsic_shared_atomic_or:
606 case nir_intrinsic_ssbo_atomic_or:
607 return NV50_IR_SUBOP_ATOM_OR;
608 case nir_intrinsic_bindless_image_atomic_max:
609 case nir_intrinsic_image_atomic_max:
610 case nir_intrinsic_image_deref_atomic_max:
611 case nir_intrinsic_shared_atomic_imax:
612 case nir_intrinsic_shared_atomic_umax:
613 case nir_intrinsic_ssbo_atomic_imax:
614 case nir_intrinsic_ssbo_atomic_umax:
615 return NV50_IR_SUBOP_ATOM_MAX;
616 case nir_intrinsic_bindless_image_atomic_min:
617 case nir_intrinsic_image_atomic_min:
618 case nir_intrinsic_image_deref_atomic_min:
619 case nir_intrinsic_shared_atomic_imin:
620 case nir_intrinsic_shared_atomic_umin:
621 case nir_intrinsic_ssbo_atomic_imin:
622 case nir_intrinsic_ssbo_atomic_umin:
623 return NV50_IR_SUBOP_ATOM_MIN;
624 case nir_intrinsic_bindless_image_atomic_xor:
625 case nir_intrinsic_image_atomic_xor:
626 case nir_intrinsic_image_deref_atomic_xor:
627 case nir_intrinsic_shared_atomic_xor:
628 case nir_intrinsic_ssbo_atomic_xor:
629 return NV50_IR_SUBOP_ATOM_XOR;
630
631 case nir_intrinsic_group_memory_barrier:
632 case nir_intrinsic_memory_barrier:
633 case nir_intrinsic_memory_barrier_atomic_counter:
634 case nir_intrinsic_memory_barrier_buffer:
635 case nir_intrinsic_memory_barrier_image:
636 return NV50_IR_SUBOP_MEMBAR(M, GL);
637 case nir_intrinsic_memory_barrier_shared:
638 return NV50_IR_SUBOP_MEMBAR(M, CTA);
639
640 case nir_intrinsic_vote_all:
641 return NV50_IR_SUBOP_VOTE_ALL;
642 case nir_intrinsic_vote_any:
643 return NV50_IR_SUBOP_VOTE_ANY;
644 case nir_intrinsic_vote_ieq:
645 return NV50_IR_SUBOP_VOTE_UNI;
646 default:
647 return 0;
648 }
649 }
650
651 CondCode
652 Converter::getCondCode(nir_op op)
653 {
654 switch (op) {
655 case nir_op_feq32:
656 case nir_op_ieq32:
657 return CC_EQ;
658 case nir_op_fge32:
659 case nir_op_ige32:
660 case nir_op_uge32:
661 return CC_GE;
662 case nir_op_flt32:
663 case nir_op_ilt32:
664 case nir_op_ult32:
665 return CC_LT;
666 case nir_op_fne32:
667 return CC_NEU;
668 case nir_op_ine32:
669 return CC_NE;
670 default:
671 ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
672 assert(false);
673 return CC_FL;
674 }
675 }
676
677 Converter::LValues&
678 Converter::convert(nir_alu_dest *dest)
679 {
680 return convert(&dest->dest);
681 }
682
683 Converter::LValues&
684 Converter::convert(nir_dest *dest)
685 {
686 if (dest->is_ssa)
687 return convert(&dest->ssa);
688 if (dest->reg.indirect) {
689 ERROR("no support for indirects.");
690 assert(false);
691 }
692 return convert(dest->reg.reg);
693 }
694
695 Converter::LValues&
696 Converter::convert(nir_register *reg)
697 {
698 NirDefMap::iterator it = regDefs.find(reg->index);
699 if (it != regDefs.end())
700 return it->second;
701
702 LValues newDef(reg->num_components);
703 for (uint8_t i = 0; i < reg->num_components; i++)
704 newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
705 return regDefs[reg->index] = newDef;
706 }
707
708 Converter::LValues&
709 Converter::convert(nir_ssa_def *def)
710 {
711 NirDefMap::iterator it = ssaDefs.find(def->index);
712 if (it != ssaDefs.end())
713 return it->second;
714
715 LValues newDef(def->num_components);
716 for (uint8_t i = 0; i < def->num_components; i++)
717 newDef[i] = getSSA(std::max(4, def->bit_size / 8));
718 return ssaDefs[def->index] = newDef;
719 }
720
721 Value*
722 Converter::getSrc(nir_alu_src *src, uint8_t component)
723 {
724 if (src->abs || src->negate) {
725 ERROR("modifiers currently not supported on nir_alu_src\n");
726 assert(false);
727 }
728 return getSrc(&src->src, src->swizzle[component]);
729 }
730
731 Value*
732 Converter::getSrc(nir_register *reg, uint8_t idx)
733 {
734 NirDefMap::iterator it = regDefs.find(reg->index);
735 if (it == regDefs.end())
736 return convert(reg)[idx];
737 return it->second[idx];
738 }
739
740 Value*
741 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
742 {
743 if (src->is_ssa)
744 return getSrc(src->ssa, idx);
745
746 if (src->reg.indirect) {
747 if (indirect)
748 return getSrc(src->reg.indirect, idx);
749 ERROR("no support for indirects.");
750 assert(false);
751 return NULL;
752 }
753
754 return getSrc(src->reg.reg, idx);
755 }
756
757 Value*
758 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
759 {
760 ImmediateMap::iterator iit = immediates.find(src->index);
761 if (iit != immediates.end())
762 return convert((*iit).second, idx);
763
764 NirDefMap::iterator it = ssaDefs.find(src->index);
765 if (it == ssaDefs.end()) {
766 ERROR("SSA value %u not found\n", src->index);
767 assert(false);
768 return NULL;
769 }
770 return it->second[idx];
771 }
772
773 uint32_t
774 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
775 {
776 nir_const_value *offset = nir_src_as_const_value(*src);
777
778 if (offset) {
779 indirect = NULL;
780 return offset[0].u32;
781 }
782
783 indirect = getSrc(src, idx, true);
784 return 0;
785 }
786
787 uint32_t
788 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect)
789 {
790 int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
791 if (indirect)
792 indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
793 return idx;
794 }
795
796 static void
797 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
798 {
799 assert(name && index);
800
801 if (slot >= VERT_ATTRIB_MAX) {
802 ERROR("invalid varying slot %u\n", slot);
803 assert(false);
804 return;
805 }
806
807 if (slot >= VERT_ATTRIB_GENERIC0 &&
808 slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
809 *name = TGSI_SEMANTIC_GENERIC;
810 *index = slot - VERT_ATTRIB_GENERIC0;
811 return;
812 }
813
814 if (slot >= VERT_ATTRIB_TEX0 &&
815 slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
816 *name = TGSI_SEMANTIC_TEXCOORD;
817 *index = slot - VERT_ATTRIB_TEX0;
818 return;
819 }
820
821 switch (slot) {
822 case VERT_ATTRIB_COLOR0:
823 *name = TGSI_SEMANTIC_COLOR;
824 *index = 0;
825 break;
826 case VERT_ATTRIB_COLOR1:
827 *name = TGSI_SEMANTIC_COLOR;
828 *index = 1;
829 break;
830 case VERT_ATTRIB_EDGEFLAG:
831 *name = TGSI_SEMANTIC_EDGEFLAG;
832 *index = 0;
833 break;
834 case VERT_ATTRIB_FOG:
835 *name = TGSI_SEMANTIC_FOG;
836 *index = 0;
837 break;
838 case VERT_ATTRIB_NORMAL:
839 *name = TGSI_SEMANTIC_NORMAL;
840 *index = 0;
841 break;
842 case VERT_ATTRIB_POS:
843 *name = TGSI_SEMANTIC_POSITION;
844 *index = 0;
845 break;
846 case VERT_ATTRIB_POINT_SIZE:
847 *name = TGSI_SEMANTIC_PSIZE;
848 *index = 0;
849 break;
850 default:
851 ERROR("unknown vert attrib slot %u\n", slot);
852 assert(false);
853 break;
854 }
855 }
856
857 static void
858 varying_slot_to_tgsi_semantic(gl_varying_slot slot, unsigned *name, unsigned *index)
859 {
860 assert(name && index);
861
862 if (slot >= VARYING_SLOT_TESS_MAX) {
863 ERROR("invalid varying slot %u\n", slot);
864 assert(false);
865 return;
866 }
867
868 if (slot >= VARYING_SLOT_PATCH0) {
869 *name = TGSI_SEMANTIC_PATCH;
870 *index = slot - VARYING_SLOT_PATCH0;
871 return;
872 }
873
874 if (slot >= VARYING_SLOT_VAR0) {
875 *name = TGSI_SEMANTIC_GENERIC;
876 *index = slot - VARYING_SLOT_VAR0;
877 return;
878 }
879
880 if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
881 *name = TGSI_SEMANTIC_TEXCOORD;
882 *index = slot - VARYING_SLOT_TEX0;
883 return;
884 }
885
886 switch (slot) {
887 case VARYING_SLOT_BFC0:
888 *name = TGSI_SEMANTIC_BCOLOR;
889 *index = 0;
890 break;
891 case VARYING_SLOT_BFC1:
892 *name = TGSI_SEMANTIC_BCOLOR;
893 *index = 1;
894 break;
895 case VARYING_SLOT_CLIP_DIST0:
896 *name = TGSI_SEMANTIC_CLIPDIST;
897 *index = 0;
898 break;
899 case VARYING_SLOT_CLIP_DIST1:
900 *name = TGSI_SEMANTIC_CLIPDIST;
901 *index = 1;
902 break;
903 case VARYING_SLOT_CLIP_VERTEX:
904 *name = TGSI_SEMANTIC_CLIPVERTEX;
905 *index = 0;
906 break;
907 case VARYING_SLOT_COL0:
908 *name = TGSI_SEMANTIC_COLOR;
909 *index = 0;
910 break;
911 case VARYING_SLOT_COL1:
912 *name = TGSI_SEMANTIC_COLOR;
913 *index = 1;
914 break;
915 case VARYING_SLOT_EDGE:
916 *name = TGSI_SEMANTIC_EDGEFLAG;
917 *index = 0;
918 break;
919 case VARYING_SLOT_FACE:
920 *name = TGSI_SEMANTIC_FACE;
921 *index = 0;
922 break;
923 case VARYING_SLOT_FOGC:
924 *name = TGSI_SEMANTIC_FOG;
925 *index = 0;
926 break;
927 case VARYING_SLOT_LAYER:
928 *name = TGSI_SEMANTIC_LAYER;
929 *index = 0;
930 break;
931 case VARYING_SLOT_PNTC:
932 *name = TGSI_SEMANTIC_PCOORD;
933 *index = 0;
934 break;
935 case VARYING_SLOT_POS:
936 *name = TGSI_SEMANTIC_POSITION;
937 *index = 0;
938 break;
939 case VARYING_SLOT_PRIMITIVE_ID:
940 *name = TGSI_SEMANTIC_PRIMID;
941 *index = 0;
942 break;
943 case VARYING_SLOT_PSIZ:
944 *name = TGSI_SEMANTIC_PSIZE;
945 *index = 0;
946 break;
947 case VARYING_SLOT_TESS_LEVEL_INNER:
948 *name = TGSI_SEMANTIC_TESSINNER;
949 *index = 0;
950 break;
951 case VARYING_SLOT_TESS_LEVEL_OUTER:
952 *name = TGSI_SEMANTIC_TESSOUTER;
953 *index = 0;
954 break;
955 case VARYING_SLOT_VIEWPORT:
956 *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
957 *index = 0;
958 break;
959 default:
960 ERROR("unknown varying slot %u\n", slot);
961 assert(false);
962 break;
963 }
964 }
965
966 static void
967 frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
968 {
969 if (slot >= FRAG_RESULT_DATA0) {
970 *name = TGSI_SEMANTIC_COLOR;
971 *index = slot - FRAG_RESULT_COLOR - 2; // intentional
972 return;
973 }
974
975 switch (slot) {
976 case FRAG_RESULT_COLOR:
977 *name = TGSI_SEMANTIC_COLOR;
978 *index = 0;
979 break;
980 case FRAG_RESULT_DEPTH:
981 *name = TGSI_SEMANTIC_POSITION;
982 *index = 0;
983 break;
984 case FRAG_RESULT_SAMPLE_MASK:
985 *name = TGSI_SEMANTIC_SAMPLEMASK;
986 *index = 0;
987 break;
988 default:
989 ERROR("unknown frag result slot %u\n", slot);
990 assert(false);
991 break;
992 }
993 }
994
995 // copy of _mesa_sysval_to_semantic
996 static void
997 system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
998 {
999 *index = 0;
1000 switch (val) {
1001 // Vertex shader
1002 case SYSTEM_VALUE_VERTEX_ID:
1003 *name = TGSI_SEMANTIC_VERTEXID;
1004 break;
1005 case SYSTEM_VALUE_INSTANCE_ID:
1006 *name = TGSI_SEMANTIC_INSTANCEID;
1007 break;
1008 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
1009 *name = TGSI_SEMANTIC_VERTEXID_NOBASE;
1010 break;
1011 case SYSTEM_VALUE_BASE_VERTEX:
1012 *name = TGSI_SEMANTIC_BASEVERTEX;
1013 break;
1014 case SYSTEM_VALUE_BASE_INSTANCE:
1015 *name = TGSI_SEMANTIC_BASEINSTANCE;
1016 break;
1017 case SYSTEM_VALUE_DRAW_ID:
1018 *name = TGSI_SEMANTIC_DRAWID;
1019 break;
1020
1021 // Geometry shader
1022 case SYSTEM_VALUE_INVOCATION_ID:
1023 *name = TGSI_SEMANTIC_INVOCATIONID;
1024 break;
1025
1026 // Fragment shader
1027 case SYSTEM_VALUE_FRAG_COORD:
1028 *name = TGSI_SEMANTIC_POSITION;
1029 break;
1030 case SYSTEM_VALUE_FRONT_FACE:
1031 *name = TGSI_SEMANTIC_FACE;
1032 break;
1033 case SYSTEM_VALUE_SAMPLE_ID:
1034 *name = TGSI_SEMANTIC_SAMPLEID;
1035 break;
1036 case SYSTEM_VALUE_SAMPLE_POS:
1037 *name = TGSI_SEMANTIC_SAMPLEPOS;
1038 break;
1039 case SYSTEM_VALUE_SAMPLE_MASK_IN:
1040 *name = TGSI_SEMANTIC_SAMPLEMASK;
1041 break;
1042 case SYSTEM_VALUE_HELPER_INVOCATION:
1043 *name = TGSI_SEMANTIC_HELPER_INVOCATION;
1044 break;
1045
1046 // Tessellation shader
1047 case SYSTEM_VALUE_TESS_COORD:
1048 *name = TGSI_SEMANTIC_TESSCOORD;
1049 break;
1050 case SYSTEM_VALUE_VERTICES_IN:
1051 *name = TGSI_SEMANTIC_VERTICESIN;
1052 break;
1053 case SYSTEM_VALUE_PRIMITIVE_ID:
1054 *name = TGSI_SEMANTIC_PRIMID;
1055 break;
1056 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1057 *name = TGSI_SEMANTIC_TESSOUTER;
1058 break;
1059 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1060 *name = TGSI_SEMANTIC_TESSINNER;
1061 break;
1062
1063 // Compute shader
1064 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
1065 *name = TGSI_SEMANTIC_THREAD_ID;
1066 break;
1067 case SYSTEM_VALUE_WORK_GROUP_ID:
1068 *name = TGSI_SEMANTIC_BLOCK_ID;
1069 break;
1070 case SYSTEM_VALUE_NUM_WORK_GROUPS:
1071 *name = TGSI_SEMANTIC_GRID_SIZE;
1072 break;
1073 case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
1074 *name = TGSI_SEMANTIC_BLOCK_SIZE;
1075 break;
1076
1077 // ARB_shader_ballot
1078 case SYSTEM_VALUE_SUBGROUP_SIZE:
1079 *name = TGSI_SEMANTIC_SUBGROUP_SIZE;
1080 break;
1081 case SYSTEM_VALUE_SUBGROUP_INVOCATION:
1082 *name = TGSI_SEMANTIC_SUBGROUP_INVOCATION;
1083 break;
1084 case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
1085 *name = TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
1086 break;
1087 case SYSTEM_VALUE_SUBGROUP_GE_MASK:
1088 *name = TGSI_SEMANTIC_SUBGROUP_GE_MASK;
1089 break;
1090 case SYSTEM_VALUE_SUBGROUP_GT_MASK:
1091 *name = TGSI_SEMANTIC_SUBGROUP_GT_MASK;
1092 break;
1093 case SYSTEM_VALUE_SUBGROUP_LE_MASK:
1094 *name = TGSI_SEMANTIC_SUBGROUP_LE_MASK;
1095 break;
1096 case SYSTEM_VALUE_SUBGROUP_LT_MASK:
1097 *name = TGSI_SEMANTIC_SUBGROUP_LT_MASK;
1098 break;
1099
1100 default:
1101 ERROR("unknown system value %u\n", val);
1102 assert(false);
1103 break;
1104 }
1105 }
1106
1107 void
1108 Converter::setInterpolate(nv50_ir_varying *var,
1109 uint8_t mode,
1110 bool centroid,
1111 unsigned semantic)
1112 {
1113 switch (mode) {
1114 case INTERP_MODE_FLAT:
1115 var->flat = 1;
1116 break;
1117 case INTERP_MODE_NONE:
1118 if (semantic == TGSI_SEMANTIC_COLOR)
1119 var->sc = 1;
1120 else if (semantic == TGSI_SEMANTIC_POSITION)
1121 var->linear = 1;
1122 break;
1123 case INTERP_MODE_NOPERSPECTIVE:
1124 var->linear = 1;
1125 break;
1126 case INTERP_MODE_SMOOTH:
1127 break;
1128 }
1129 var->centroid = centroid;
1130 }
1131
1132 static uint16_t
1133 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
1134 bool input, const nir_variable *var)
1135 {
1136 if (!type->is_array())
1137 return type->count_attribute_slots(false);
1138
1139 uint16_t slots;
1140 switch (stage) {
1141 case Program::TYPE_GEOMETRY:
1142 slots = type->uniform_locations();
1143 if (input)
1144 slots /= info.gs.vertices_in;
1145 break;
1146 case Program::TYPE_TESSELLATION_CONTROL:
1147 case Program::TYPE_TESSELLATION_EVAL:
1148 // remove first dimension
1149 if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
1150 slots = type->uniform_locations();
1151 else
1152 slots = type->fields.array->uniform_locations();
1153 break;
1154 default:
1155 slots = type->count_attribute_slots(false);
1156 break;
1157 }
1158
1159 return slots;
1160 }
1161
1162 bool Converter::assignSlots() {
1163 unsigned name;
1164 unsigned index;
1165
1166 info->io.viewportId = -1;
1167 info->numInputs = 0;
1168 info->numOutputs = 0;
1169
1170 // we have to fixup the uniform locations for arrays
1171 unsigned numImages = 0;
1172 nir_foreach_variable(var, &nir->uniforms) {
1173 const glsl_type *type = var->type;
1174 if (!type->without_array()->is_image())
1175 continue;
1176 var->data.driver_location = numImages;
1177 numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
1178 }
1179
1180 info->numSysVals = 0;
1181 for (uint8_t i = 0; i < SYSTEM_VALUE_MAX; ++i) {
1182 if (!(nir->info.system_values_read & 1ull << i))
1183 continue;
1184
1185 system_val_to_tgsi_semantic(i, &name, &index);
1186 info->sv[info->numSysVals].sn = name;
1187 info->sv[info->numSysVals].si = index;
1188 info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
1189
1190 switch (i) {
1191 case SYSTEM_VALUE_INSTANCE_ID:
1192 info->io.instanceId = info->numSysVals;
1193 break;
1194 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1195 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1196 info->sv[info->numSysVals].patch = 1;
1197 break;
1198 case SYSTEM_VALUE_VERTEX_ID:
1199 info->io.vertexId = info->numSysVals;
1200 break;
1201 default:
1202 break;
1203 }
1204
1205 info->numSysVals += 1;
1206 }
1207
1208 if (prog->getType() == Program::TYPE_COMPUTE)
1209 return true;
1210
1211 nir_foreach_variable(var, &nir->inputs) {
1212 const glsl_type *type = var->type;
1213 int slot = var->data.location;
1214 uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
1215 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1216 : type->component_slots();
1217 uint32_t frac = var->data.location_frac;
1218 uint32_t vary = var->data.driver_location;
1219
1220 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1221 if (comp > 2)
1222 slots *= 2;
1223 }
1224
1225 assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
1226
1227 switch(prog->getType()) {
1228 case Program::TYPE_FRAGMENT:
1229 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1230 for (uint16_t i = 0; i < slots; ++i) {
1231 setInterpolate(&info->in[vary + i], var->data.interpolation,
1232 var->data.centroid | var->data.sample, name);
1233 }
1234 break;
1235 case Program::TYPE_GEOMETRY:
1236 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1237 break;
1238 case Program::TYPE_TESSELLATION_CONTROL:
1239 case Program::TYPE_TESSELLATION_EVAL:
1240 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1241 if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
1242 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1243 break;
1244 case Program::TYPE_VERTEX:
1245 vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1246 switch (name) {
1247 case TGSI_SEMANTIC_EDGEFLAG:
1248 info->io.edgeFlagIn = vary;
1249 break;
1250 default:
1251 break;
1252 }
1253 break;
1254 default:
1255 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1256 return false;
1257 }
1258
1259 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1260 info->in[vary].id = vary;
1261 info->in[vary].patch = var->data.patch;
1262 info->in[vary].sn = name;
1263 info->in[vary].si = index + i;
1264 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1265 if (i & 0x1)
1266 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1267 else
1268 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1269 else
1270 info->in[vary].mask |= ((1 << comp) - 1) << frac;
1271 }
1272 info->numInputs = std::max<uint8_t>(info->numInputs, vary);
1273 }
1274
1275 nir_foreach_variable(var, &nir->outputs) {
1276 const glsl_type *type = var->type;
1277 int slot = var->data.location;
1278 uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1279 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1280 : type->component_slots();
1281 uint32_t frac = var->data.location_frac;
1282 uint32_t vary = var->data.driver_location;
1283
1284 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1285 if (comp > 2)
1286 slots *= 2;
1287 }
1288
1289 assert(vary < PIPE_MAX_SHADER_OUTPUTS);
1290
1291 switch(prog->getType()) {
1292 case Program::TYPE_FRAGMENT:
1293 frag_result_to_tgsi_semantic((gl_frag_result)slot, &name, &index);
1294 switch (name) {
1295 case TGSI_SEMANTIC_COLOR:
1296 if (!var->data.fb_fetch_output)
1297 info->prop.fp.numColourResults++;
1298 info->prop.fp.separateFragData = true;
1299 // sometimes we get FRAG_RESULT_DATAX with data.index 0
1300 // sometimes we get FRAG_RESULT_DATA0 with data.index X
1301 index = index == 0 ? var->data.index : index;
1302 break;
1303 case TGSI_SEMANTIC_POSITION:
1304 info->io.fragDepth = vary;
1305 info->prop.fp.writesDepth = true;
1306 break;
1307 case TGSI_SEMANTIC_SAMPLEMASK:
1308 info->io.sampleMask = vary;
1309 break;
1310 default:
1311 break;
1312 }
1313 break;
1314 case Program::TYPE_GEOMETRY:
1315 case Program::TYPE_TESSELLATION_CONTROL:
1316 case Program::TYPE_TESSELLATION_EVAL:
1317 case Program::TYPE_VERTEX:
1318 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1319
1320 if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1321 name != TGSI_SEMANTIC_TESSOUTER)
1322 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1323
1324 switch (name) {
1325 case TGSI_SEMANTIC_CLIPDIST:
1326 info->io.genUserClip = -1;
1327 break;
1328 case TGSI_SEMANTIC_CLIPVERTEX:
1329 clipVertexOutput = vary;
1330 break;
1331 case TGSI_SEMANTIC_EDGEFLAG:
1332 info->io.edgeFlagOut = vary;
1333 break;
1334 case TGSI_SEMANTIC_POSITION:
1335 if (clipVertexOutput < 0)
1336 clipVertexOutput = vary;
1337 break;
1338 default:
1339 break;
1340 }
1341 break;
1342 default:
1343 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1344 return false;
1345 }
1346
1347 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1348 info->out[vary].id = vary;
1349 info->out[vary].patch = var->data.patch;
1350 info->out[vary].sn = name;
1351 info->out[vary].si = index + i;
1352 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1353 if (i & 0x1)
1354 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1355 else
1356 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1357 else
1358 info->out[vary].mask |= ((1 << comp) - 1) << frac;
1359
1360 if (nir->info.outputs_read & 1ull << slot)
1361 info->out[vary].oread = 1;
1362 }
1363 info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
1364 }
1365
1366 if (info->io.genUserClip > 0) {
1367 info->io.clipDistances = info->io.genUserClip;
1368
1369 const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1370
1371 for (unsigned int n = 0; n < nOut; ++n) {
1372 unsigned int i = info->numOutputs++;
1373 info->out[i].id = i;
1374 info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1375 info->out[i].si = n;
1376 info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1377 }
1378 }
1379
1380 return info->assignSlots(info) == 0;
1381 }
1382
1383 uint32_t
1384 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1385 {
1386 DataType ty;
1387 int offset = nir_intrinsic_component(insn);
1388 bool input;
1389
1390 if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1391 ty = getDType(insn);
1392 else
1393 ty = getSType(insn->src[0], false, false);
1394
1395 switch (insn->intrinsic) {
1396 case nir_intrinsic_load_input:
1397 case nir_intrinsic_load_interpolated_input:
1398 case nir_intrinsic_load_per_vertex_input:
1399 input = true;
1400 break;
1401 case nir_intrinsic_load_output:
1402 case nir_intrinsic_load_per_vertex_output:
1403 case nir_intrinsic_store_output:
1404 case nir_intrinsic_store_per_vertex_output:
1405 input = false;
1406 break;
1407 default:
1408 ERROR("unknown intrinsic in getSlotAddress %s",
1409 nir_intrinsic_infos[insn->intrinsic].name);
1410 input = false;
1411 assert(false);
1412 break;
1413 }
1414
1415 if (typeSizeof(ty) == 8) {
1416 slot *= 2;
1417 slot += offset;
1418 if (slot >= 4) {
1419 idx += 1;
1420 slot -= 4;
1421 }
1422 } else {
1423 slot += offset;
1424 }
1425
1426 assert(slot < 4);
1427 assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
1428 assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
1429
1430 const nv50_ir_varying *vary = input ? info->in : info->out;
1431 return vary[idx].slot[slot] * 4;
1432 }
1433
1434 Instruction *
1435 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1436 uint32_t base, uint8_t c, Value *indirect0,
1437 Value *indirect1, bool patch)
1438 {
1439 unsigned int tySize = typeSizeof(ty);
1440
1441 if (tySize == 8 &&
1442 (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
1443 Value *lo = getSSA();
1444 Value *hi = getSSA();
1445
1446 Instruction *loi =
1447 mkLoad(TYPE_U32, lo,
1448 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1449 indirect0);
1450 loi->setIndirect(0, 1, indirect1);
1451 loi->perPatch = patch;
1452
1453 Instruction *hii =
1454 mkLoad(TYPE_U32, hi,
1455 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1456 indirect0);
1457 hii->setIndirect(0, 1, indirect1);
1458 hii->perPatch = patch;
1459
1460 return mkOp2(OP_MERGE, ty, def, lo, hi);
1461 } else {
1462 Instruction *ld =
1463 mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1464 ld->setIndirect(0, 1, indirect1);
1465 ld->perPatch = patch;
1466 return ld;
1467 }
1468 }
1469
1470 void
1471 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1472 DataType ty, Value *src, uint8_t idx, uint8_t c,
1473 Value *indirect0, Value *indirect1)
1474 {
1475 uint8_t size = typeSizeof(ty);
1476 uint32_t address = getSlotAddress(insn, idx, c);
1477
1478 if (size == 8 && indirect0) {
1479 Value *split[2];
1480 mkSplit(split, 4, src);
1481
1482 if (op == OP_EXPORT) {
1483 split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1484 split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1485 }
1486
1487 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1488 split[0])->perPatch = info->out[idx].patch;
1489 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1490 split[1])->perPatch = info->out[idx].patch;
1491 } else {
1492 if (op == OP_EXPORT)
1493 src = mkMov(getSSA(size), src, ty)->getDef(0);
1494 mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1495 src)->perPatch = info->out[idx].patch;
1496 }
1497 }
1498
1499 bool
1500 Converter::parseNIR()
1501 {
1502 info->bin.tlsSpace = 0;
1503 info->io.clipDistances = nir->info.clip_distance_array_size;
1504 info->io.cullDistances = nir->info.cull_distance_array_size;
1505
1506 switch(prog->getType()) {
1507 case Program::TYPE_COMPUTE:
1508 info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
1509 info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
1510 info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
1511 info->bin.smemSize = nir->info.cs.shared_size;
1512 break;
1513 case Program::TYPE_FRAGMENT:
1514 info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1515 info->prop.fp.persampleInvocation =
1516 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
1517 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1518 info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1519 info->prop.fp.readsSampleLocations =
1520 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1521 info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
1522 info->prop.fp.usesSampleMaskIn =
1523 !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
1524 break;
1525 case Program::TYPE_GEOMETRY:
1526 info->prop.gp.inputPrim = nir->info.gs.input_primitive;
1527 info->prop.gp.instanceCount = nir->info.gs.invocations;
1528 info->prop.gp.maxVertices = nir->info.gs.vertices_out;
1529 info->prop.gp.outputPrim = nir->info.gs.output_primitive;
1530 break;
1531 case Program::TYPE_TESSELLATION_CONTROL:
1532 case Program::TYPE_TESSELLATION_EVAL:
1533 if (nir->info.tess.primitive_mode == GL_ISOLINES)
1534 info->prop.tp.domain = GL_LINES;
1535 else
1536 info->prop.tp.domain = nir->info.tess.primitive_mode;
1537 info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1538 info->prop.tp.outputPrim =
1539 nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1540 info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1541 info->prop.tp.winding = !nir->info.tess.ccw;
1542 break;
1543 case Program::TYPE_VERTEX:
1544 info->prop.vp.usesDrawParameters =
1545 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
1546 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
1547 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
1548 break;
1549 default:
1550 break;
1551 }
1552
1553 return true;
1554 }
1555
1556 bool
1557 Converter::visit(nir_function *function)
1558 {
1559 assert(function->impl);
1560
1561 // usually the blocks will set everything up, but main is special
1562 BasicBlock *entry = new BasicBlock(prog->main);
1563 exit = new BasicBlock(prog->main);
1564 blocks[nir_start_block(function->impl)->index] = entry;
1565 prog->main->setEntry(entry);
1566 prog->main->setExit(exit);
1567
1568 setPosition(entry, true);
1569
1570 if (info->io.genUserClip > 0) {
1571 for (int c = 0; c < 4; ++c)
1572 clipVtx[c] = getScratch();
1573 }
1574
1575 switch (prog->getType()) {
1576 case Program::TYPE_TESSELLATION_CONTROL:
1577 outBase = mkOp2v(
1578 OP_SUB, TYPE_U32, getSSA(),
1579 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1580 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1581 break;
1582 case Program::TYPE_FRAGMENT: {
1583 Symbol *sv = mkSysVal(SV_POSITION, 3);
1584 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1585 fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1586 break;
1587 }
1588 default:
1589 break;
1590 }
1591
1592 nir_foreach_register(reg, &function->impl->registers) {
1593 if (reg->num_array_elems) {
1594 // TODO: packed variables would be nice, but MemoryOpt fails
1595 // replace 4 with reg->num_components
1596 uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8);
1597 regToLmemOffset[reg->index] = info->bin.tlsSpace;
1598 info->bin.tlsSpace += size;
1599 }
1600 }
1601
1602 nir_index_ssa_defs(function->impl);
1603 foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1604 if (!visit(node))
1605 return false;
1606 }
1607
1608 bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1609 setPosition(exit, true);
1610
1611 if ((prog->getType() == Program::TYPE_VERTEX ||
1612 prog->getType() == Program::TYPE_TESSELLATION_EVAL)
1613 && info->io.genUserClip > 0)
1614 handleUserClipPlanes();
1615
1616 // TODO: for non main function this needs to be a OP_RETURN
1617 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1618 return true;
1619 }
1620
1621 bool
1622 Converter::visit(nir_cf_node *node)
1623 {
1624 switch (node->type) {
1625 case nir_cf_node_block:
1626 return visit(nir_cf_node_as_block(node));
1627 case nir_cf_node_if:
1628 return visit(nir_cf_node_as_if(node));
1629 case nir_cf_node_loop:
1630 return visit(nir_cf_node_as_loop(node));
1631 default:
1632 ERROR("unknown nir_cf_node type %u\n", node->type);
1633 return false;
1634 }
1635 }
1636
1637 bool
1638 Converter::visit(nir_block *block)
1639 {
1640 if (!block->predecessors->entries && block->instr_list.is_empty())
1641 return true;
1642
1643 BasicBlock *bb = convert(block);
1644
1645 setPosition(bb, true);
1646 nir_foreach_instr(insn, block) {
1647 if (!visit(insn))
1648 return false;
1649 }
1650 return true;
1651 }
1652
1653 bool
1654 Converter::visit(nir_if *nif)
1655 {
1656 DataType sType = getSType(nif->condition, false, false);
1657 Value *src = getSrc(&nif->condition, 0);
1658
1659 nir_block *lastThen = nir_if_last_then_block(nif);
1660 nir_block *lastElse = nir_if_last_else_block(nif);
1661
1662 assert(!lastThen->successors[1]);
1663 assert(!lastElse->successors[1]);
1664
1665 BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1666 BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1667
1668 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1669 bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1670
1671 // we only insert joinats, if both nodes end up at the end of the if again.
1672 // the reason for this to not happens are breaks/continues/ret/... which
1673 // have their own handling
1674 if (lastThen->successors[0] == lastElse->successors[0])
1675 bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
1676 CC_ALWAYS, NULL);
1677
1678 mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1679
1680 foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1681 if (!visit(node))
1682 return false;
1683 }
1684 setPosition(convert(lastThen), true);
1685 if (!bb->getExit() ||
1686 !bb->getExit()->asFlow() ||
1687 bb->getExit()->asFlow()->op == OP_JOIN) {
1688 BasicBlock *tailBB = convert(lastThen->successors[0]);
1689 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1690 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1691 }
1692
1693 foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1694 if (!visit(node))
1695 return false;
1696 }
1697 setPosition(convert(lastElse), true);
1698 if (!bb->getExit() ||
1699 !bb->getExit()->asFlow() ||
1700 bb->getExit()->asFlow()->op == OP_JOIN) {
1701 BasicBlock *tailBB = convert(lastElse->successors[0]);
1702 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1703 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1704 }
1705
1706 if (lastThen->successors[0] == lastElse->successors[0]) {
1707 setPosition(convert(lastThen->successors[0]), true);
1708 mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1709 }
1710
1711 return true;
1712 }
1713
1714 bool
1715 Converter::visit(nir_loop *loop)
1716 {
1717 curLoopDepth += 1;
1718 func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1719
1720 BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1721 BasicBlock *tailBB =
1722 convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1723 bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1724
1725 mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1726 setPosition(loopBB, false);
1727 mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1728
1729 foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1730 if (!visit(node))
1731 return false;
1732 }
1733 Instruction *insn = bb->getExit();
1734 if (bb->cfg.incidentCount() != 0) {
1735 if (!insn || !insn->asFlow()) {
1736 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1737 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1738 } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
1739 tailBB->cfg.incidentCount() == 0) {
1740 // RA doesn't like having blocks around with no incident edge,
1741 // so we create a fake one to make it happy
1742 bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1743 }
1744 }
1745
1746 curLoopDepth -= 1;
1747
1748 return true;
1749 }
1750
1751 bool
1752 Converter::visit(nir_instr *insn)
1753 {
1754 // we need an insertion point for on the fly generated immediate loads
1755 immInsertPos = bb->getExit();
1756 switch (insn->type) {
1757 case nir_instr_type_alu:
1758 return visit(nir_instr_as_alu(insn));
1759 case nir_instr_type_deref:
1760 return visit(nir_instr_as_deref(insn));
1761 case nir_instr_type_intrinsic:
1762 return visit(nir_instr_as_intrinsic(insn));
1763 case nir_instr_type_jump:
1764 return visit(nir_instr_as_jump(insn));
1765 case nir_instr_type_load_const:
1766 return visit(nir_instr_as_load_const(insn));
1767 case nir_instr_type_ssa_undef:
1768 return visit(nir_instr_as_ssa_undef(insn));
1769 case nir_instr_type_tex:
1770 return visit(nir_instr_as_tex(insn));
1771 default:
1772 ERROR("unknown nir_instr type %u\n", insn->type);
1773 return false;
1774 }
1775 return true;
1776 }
1777
1778 SVSemantic
1779 Converter::convert(nir_intrinsic_op intr)
1780 {
1781 switch (intr) {
1782 case nir_intrinsic_load_base_vertex:
1783 return SV_BASEVERTEX;
1784 case nir_intrinsic_load_base_instance:
1785 return SV_BASEINSTANCE;
1786 case nir_intrinsic_load_draw_id:
1787 return SV_DRAWID;
1788 case nir_intrinsic_load_front_face:
1789 return SV_FACE;
1790 case nir_intrinsic_load_helper_invocation:
1791 return SV_THREAD_KILL;
1792 case nir_intrinsic_load_instance_id:
1793 return SV_INSTANCE_ID;
1794 case nir_intrinsic_load_invocation_id:
1795 return SV_INVOCATION_ID;
1796 case nir_intrinsic_load_local_group_size:
1797 return SV_NTID;
1798 case nir_intrinsic_load_local_invocation_id:
1799 return SV_TID;
1800 case nir_intrinsic_load_num_work_groups:
1801 return SV_NCTAID;
1802 case nir_intrinsic_load_patch_vertices_in:
1803 return SV_VERTEX_COUNT;
1804 case nir_intrinsic_load_primitive_id:
1805 return SV_PRIMITIVE_ID;
1806 case nir_intrinsic_load_sample_id:
1807 return SV_SAMPLE_INDEX;
1808 case nir_intrinsic_load_sample_mask_in:
1809 return SV_SAMPLE_MASK;
1810 case nir_intrinsic_load_sample_pos:
1811 return SV_SAMPLE_POS;
1812 case nir_intrinsic_load_subgroup_eq_mask:
1813 return SV_LANEMASK_EQ;
1814 case nir_intrinsic_load_subgroup_ge_mask:
1815 return SV_LANEMASK_GE;
1816 case nir_intrinsic_load_subgroup_gt_mask:
1817 return SV_LANEMASK_GT;
1818 case nir_intrinsic_load_subgroup_le_mask:
1819 return SV_LANEMASK_LE;
1820 case nir_intrinsic_load_subgroup_lt_mask:
1821 return SV_LANEMASK_LT;
1822 case nir_intrinsic_load_subgroup_invocation:
1823 return SV_LANEID;
1824 case nir_intrinsic_load_tess_coord:
1825 return SV_TESS_COORD;
1826 case nir_intrinsic_load_tess_level_inner:
1827 return SV_TESS_INNER;
1828 case nir_intrinsic_load_tess_level_outer:
1829 return SV_TESS_OUTER;
1830 case nir_intrinsic_load_vertex_id:
1831 return SV_VERTEX_ID;
1832 case nir_intrinsic_load_work_group_id:
1833 return SV_CTAID;
1834 default:
1835 ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1836 nir_intrinsic_infos[intr].name);
1837 assert(false);
1838 return SV_LAST;
1839 }
1840 }
1841
1842 ImgFormat
1843 Converter::convertGLImgFormat(GLuint format)
1844 {
1845 #define FMT_CASE(a, b) \
1846 case GL_ ## a: return nv50_ir::FMT_ ## b
1847
1848 switch (format) {
1849 FMT_CASE(NONE, NONE);
1850
1851 FMT_CASE(RGBA32F, RGBA32F);
1852 FMT_CASE(RGBA16F, RGBA16F);
1853 FMT_CASE(RG32F, RG32F);
1854 FMT_CASE(RG16F, RG16F);
1855 FMT_CASE(R11F_G11F_B10F, R11G11B10F);
1856 FMT_CASE(R32F, R32F);
1857 FMT_CASE(R16F, R16F);
1858
1859 FMT_CASE(RGBA32UI, RGBA32UI);
1860 FMT_CASE(RGBA16UI, RGBA16UI);
1861 FMT_CASE(RGB10_A2UI, RGB10A2UI);
1862 FMT_CASE(RGBA8UI, RGBA8UI);
1863 FMT_CASE(RG32UI, RG32UI);
1864 FMT_CASE(RG16UI, RG16UI);
1865 FMT_CASE(RG8UI, RG8UI);
1866 FMT_CASE(R32UI, R32UI);
1867 FMT_CASE(R16UI, R16UI);
1868 FMT_CASE(R8UI, R8UI);
1869
1870 FMT_CASE(RGBA32I, RGBA32I);
1871 FMT_CASE(RGBA16I, RGBA16I);
1872 FMT_CASE(RGBA8I, RGBA8I);
1873 FMT_CASE(RG32I, RG32I);
1874 FMT_CASE(RG16I, RG16I);
1875 FMT_CASE(RG8I, RG8I);
1876 FMT_CASE(R32I, R32I);
1877 FMT_CASE(R16I, R16I);
1878 FMT_CASE(R8I, R8I);
1879
1880 FMT_CASE(RGBA16, RGBA16);
1881 FMT_CASE(RGB10_A2, RGB10A2);
1882 FMT_CASE(RGBA8, RGBA8);
1883 FMT_CASE(RG16, RG16);
1884 FMT_CASE(RG8, RG8);
1885 FMT_CASE(R16, R16);
1886 FMT_CASE(R8, R8);
1887
1888 FMT_CASE(RGBA16_SNORM, RGBA16_SNORM);
1889 FMT_CASE(RGBA8_SNORM, RGBA8_SNORM);
1890 FMT_CASE(RG16_SNORM, RG16_SNORM);
1891 FMT_CASE(RG8_SNORM, RG8_SNORM);
1892 FMT_CASE(R16_SNORM, R16_SNORM);
1893 FMT_CASE(R8_SNORM, R8_SNORM);
1894
1895 FMT_CASE(BGRA_INTEGER, BGRA8);
1896 default:
1897 ERROR("unknown format %x\n", format);
1898 assert(false);
1899 return nv50_ir::FMT_NONE;
1900 }
1901 #undef FMT_CASE
1902 }
1903
1904 bool
1905 Converter::visit(nir_intrinsic_instr *insn)
1906 {
1907 nir_intrinsic_op op = insn->intrinsic;
1908 const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op];
1909
1910 switch (op) {
1911 case nir_intrinsic_load_uniform: {
1912 LValues &newDefs = convert(&insn->dest);
1913 const DataType dType = getDType(insn);
1914 Value *indirect;
1915 uint32_t coffset = getIndirect(insn, 0, 0, indirect);
1916 for (uint8_t i = 0; i < insn->num_components; ++i) {
1917 loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
1918 }
1919 break;
1920 }
1921 case nir_intrinsic_store_output:
1922 case nir_intrinsic_store_per_vertex_output: {
1923 Value *indirect;
1924 DataType dType = getSType(insn->src[0], false, false);
1925 uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1926
1927 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1928 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1929 continue;
1930
1931 uint8_t offset = 0;
1932 Value *src = getSrc(&insn->src[0], i);
1933 switch (prog->getType()) {
1934 case Program::TYPE_FRAGMENT: {
1935 if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1936 // TGSI uses a different interface than NIR, TGSI stores that
1937 // value in the z component, NIR in X
1938 offset += 2;
1939 src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1940 }
1941 break;
1942 }
1943 case Program::TYPE_GEOMETRY:
1944 case Program::TYPE_VERTEX: {
1945 if (info->io.genUserClip > 0 && idx == clipVertexOutput) {
1946 mkMov(clipVtx[i], src);
1947 src = clipVtx[i];
1948 }
1949 break;
1950 }
1951 default:
1952 break;
1953 }
1954
1955 storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1956 }
1957 break;
1958 }
1959 case nir_intrinsic_load_input:
1960 case nir_intrinsic_load_interpolated_input:
1961 case nir_intrinsic_load_output: {
1962 LValues &newDefs = convert(&insn->dest);
1963
1964 // FBFetch
1965 if (prog->getType() == Program::TYPE_FRAGMENT &&
1966 op == nir_intrinsic_load_output) {
1967 std::vector<Value*> defs, srcs;
1968 uint8_t mask = 0;
1969
1970 srcs.push_back(getSSA());
1971 srcs.push_back(getSSA());
1972 Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1973 Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1974 mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1975 mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1976
1977 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1978 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1979
1980 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1981 defs.push_back(newDefs[i]);
1982 mask |= 1 << i;
1983 }
1984
1985 TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
1986 texi->tex.levelZero = 1;
1987 texi->tex.mask = mask;
1988 texi->tex.useOffsets = 0;
1989 texi->tex.r = 0xffff;
1990 texi->tex.s = 0xffff;
1991
1992 info->prop.fp.readsFramebuffer = true;
1993 break;
1994 }
1995
1996 const DataType dType = getDType(insn);
1997 Value *indirect;
1998 bool input = op != nir_intrinsic_load_output;
1999 operation nvirOp;
2000 uint32_t mode = 0;
2001
2002 uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
2003 nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
2004
2005 // see load_barycentric_* handling
2006 if (prog->getType() == Program::TYPE_FRAGMENT) {
2007 mode = translateInterpMode(&vary, nvirOp);
2008 if (op == nir_intrinsic_load_interpolated_input) {
2009 ImmediateValue immMode;
2010 if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
2011 mode |= immMode.reg.data.u32;
2012 }
2013 }
2014
2015 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2016 uint32_t address = getSlotAddress(insn, idx, i);
2017 Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
2018 if (prog->getType() == Program::TYPE_FRAGMENT) {
2019 int s = 1;
2020 if (typeSizeof(dType) == 8) {
2021 Value *lo = getSSA();
2022 Value *hi = getSSA();
2023 Instruction *interp;
2024
2025 interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
2026 if (nvirOp == OP_PINTERP)
2027 interp->setSrc(s++, fp.position);
2028 if (mode & NV50_IR_INTERP_OFFSET)
2029 interp->setSrc(s++, getSrc(&insn->src[0], 0));
2030 interp->setInterpolate(mode);
2031 interp->setIndirect(0, 0, indirect);
2032
2033 Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
2034 interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
2035 if (nvirOp == OP_PINTERP)
2036 interp->setSrc(s++, fp.position);
2037 if (mode & NV50_IR_INTERP_OFFSET)
2038 interp->setSrc(s++, getSrc(&insn->src[0], 0));
2039 interp->setInterpolate(mode);
2040 interp->setIndirect(0, 0, indirect);
2041
2042 mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
2043 } else {
2044 Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
2045 if (nvirOp == OP_PINTERP)
2046 interp->setSrc(s++, fp.position);
2047 if (mode & NV50_IR_INTERP_OFFSET)
2048 interp->setSrc(s++, getSrc(&insn->src[0], 0));
2049 interp->setInterpolate(mode);
2050 interp->setIndirect(0, 0, indirect);
2051 }
2052 } else {
2053 mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
2054 }
2055 }
2056 break;
2057 }
2058 case nir_intrinsic_load_barycentric_at_offset:
2059 case nir_intrinsic_load_barycentric_at_sample:
2060 case nir_intrinsic_load_barycentric_centroid:
2061 case nir_intrinsic_load_barycentric_pixel:
2062 case nir_intrinsic_load_barycentric_sample: {
2063 LValues &newDefs = convert(&insn->dest);
2064 uint32_t mode;
2065
2066 if (op == nir_intrinsic_load_barycentric_centroid ||
2067 op == nir_intrinsic_load_barycentric_sample) {
2068 mode = NV50_IR_INTERP_CENTROID;
2069 } else if (op == nir_intrinsic_load_barycentric_at_offset) {
2070 Value *offs[2];
2071 for (uint8_t c = 0; c < 2; c++) {
2072 offs[c] = getScratch();
2073 mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
2074 mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
2075 mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
2076 mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
2077 }
2078 mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
2079
2080 mode = NV50_IR_INTERP_OFFSET;
2081 } else if (op == nir_intrinsic_load_barycentric_pixel) {
2082 mode = NV50_IR_INTERP_DEFAULT;
2083 } else if (op == nir_intrinsic_load_barycentric_at_sample) {
2084 info->prop.fp.readsSampleLocations = true;
2085 mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
2086 mode = NV50_IR_INTERP_OFFSET;
2087 } else {
2088 unreachable("all intrinsics already handled above");
2089 }
2090
2091 loadImm(newDefs[1], mode);
2092 break;
2093 }
2094 case nir_intrinsic_discard:
2095 mkOp(OP_DISCARD, TYPE_NONE, NULL);
2096 break;
2097 case nir_intrinsic_discard_if: {
2098 Value *pred = getSSA(1, FILE_PREDICATE);
2099 if (insn->num_components > 1) {
2100 ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
2101 assert(false);
2102 return false;
2103 }
2104 mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2105 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
2106 break;
2107 }
2108 case nir_intrinsic_load_base_vertex:
2109 case nir_intrinsic_load_base_instance:
2110 case nir_intrinsic_load_draw_id:
2111 case nir_intrinsic_load_front_face:
2112 case nir_intrinsic_load_helper_invocation:
2113 case nir_intrinsic_load_instance_id:
2114 case nir_intrinsic_load_invocation_id:
2115 case nir_intrinsic_load_local_group_size:
2116 case nir_intrinsic_load_local_invocation_id:
2117 case nir_intrinsic_load_num_work_groups:
2118 case nir_intrinsic_load_patch_vertices_in:
2119 case nir_intrinsic_load_primitive_id:
2120 case nir_intrinsic_load_sample_id:
2121 case nir_intrinsic_load_sample_mask_in:
2122 case nir_intrinsic_load_sample_pos:
2123 case nir_intrinsic_load_subgroup_eq_mask:
2124 case nir_intrinsic_load_subgroup_ge_mask:
2125 case nir_intrinsic_load_subgroup_gt_mask:
2126 case nir_intrinsic_load_subgroup_le_mask:
2127 case nir_intrinsic_load_subgroup_lt_mask:
2128 case nir_intrinsic_load_subgroup_invocation:
2129 case nir_intrinsic_load_tess_coord:
2130 case nir_intrinsic_load_tess_level_inner:
2131 case nir_intrinsic_load_tess_level_outer:
2132 case nir_intrinsic_load_vertex_id:
2133 case nir_intrinsic_load_work_group_id: {
2134 const DataType dType = getDType(insn);
2135 SVSemantic sv = convert(op);
2136 LValues &newDefs = convert(&insn->dest);
2137
2138 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2139 Value *def;
2140 if (typeSizeof(dType) == 8)
2141 def = getSSA();
2142 else
2143 def = newDefs[i];
2144
2145 if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
2146 loadImm(def, 0u);
2147 } else {
2148 Symbol *sym = mkSysVal(sv, i);
2149 Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
2150 if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
2151 rdsv->perPatch = 1;
2152 }
2153
2154 if (typeSizeof(dType) == 8)
2155 mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
2156 }
2157 break;
2158 }
2159 // constants
2160 case nir_intrinsic_load_subgroup_size: {
2161 LValues &newDefs = convert(&insn->dest);
2162 loadImm(newDefs[0], 32u);
2163 break;
2164 }
2165 case nir_intrinsic_vote_all:
2166 case nir_intrinsic_vote_any:
2167 case nir_intrinsic_vote_ieq: {
2168 LValues &newDefs = convert(&insn->dest);
2169 Value *pred = getScratch(1, FILE_PREDICATE);
2170 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2171 mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
2172 mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
2173 break;
2174 }
2175 case nir_intrinsic_ballot: {
2176 LValues &newDefs = convert(&insn->dest);
2177 Value *pred = getSSA(1, FILE_PREDICATE);
2178 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2179 mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;
2180 break;
2181 }
2182 case nir_intrinsic_read_first_invocation:
2183 case nir_intrinsic_read_invocation: {
2184 LValues &newDefs = convert(&insn->dest);
2185 const DataType dType = getDType(insn);
2186 Value *tmp = getScratch();
2187
2188 if (op == nir_intrinsic_read_first_invocation) {
2189 mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
2190 mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2191 mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2192 } else
2193 tmp = getSrc(&insn->src[1], 0);
2194
2195 for (uint8_t i = 0; i < insn->num_components; ++i) {
2196 mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
2197 ->subOp = NV50_IR_SUBOP_SHFL_IDX;
2198 }
2199 break;
2200 }
2201 case nir_intrinsic_load_per_vertex_input: {
2202 const DataType dType = getDType(insn);
2203 LValues &newDefs = convert(&insn->dest);
2204 Value *indirectVertex;
2205 Value *indirectOffset;
2206 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
2207 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2208
2209 Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
2210 mkImm(baseVertex), indirectVertex);
2211 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2212 uint32_t address = getSlotAddress(insn, idx, i);
2213 loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
2214 indirectOffset, vtxBase, info->in[idx].patch);
2215 }
2216 break;
2217 }
2218 case nir_intrinsic_load_per_vertex_output: {
2219 const DataType dType = getDType(insn);
2220 LValues &newDefs = convert(&insn->dest);
2221 Value *indirectVertex;
2222 Value *indirectOffset;
2223 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
2224 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2225 Value *vtxBase = NULL;
2226
2227 if (indirectVertex)
2228 vtxBase = indirectVertex;
2229 else
2230 vtxBase = loadImm(NULL, baseVertex);
2231
2232 vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase);
2233
2234 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2235 uint32_t address = getSlotAddress(insn, idx, i);
2236 loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
2237 indirectOffset, vtxBase, info->in[idx].patch);
2238 }
2239 break;
2240 }
2241 case nir_intrinsic_emit_vertex:
2242 if (info->io.genUserClip > 0)
2243 handleUserClipPlanes();
2244 // fallthrough
2245 case nir_intrinsic_end_primitive: {
2246 uint32_t idx = nir_intrinsic_stream_id(insn);
2247 mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
2248 break;
2249 }
2250 case nir_intrinsic_load_ubo: {
2251 const DataType dType = getDType(insn);
2252 LValues &newDefs = convert(&insn->dest);
2253 Value *indirectIndex;
2254 Value *indirectOffset;
2255 uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1;
2256 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2257
2258 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2259 loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
2260 indirectOffset, indirectIndex);
2261 }
2262 break;
2263 }
2264 case nir_intrinsic_get_buffer_size: {
2265 LValues &newDefs = convert(&insn->dest);
2266 const DataType dType = getDType(insn);
2267 Value *indirectBuffer;
2268 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2269
2270 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0);
2271 mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, indirectBuffer);
2272 break;
2273 }
2274 case nir_intrinsic_store_ssbo: {
2275 DataType sType = getSType(insn->src[0], false, false);
2276 Value *indirectBuffer;
2277 Value *indirectOffset;
2278 uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer);
2279 uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset);
2280
2281 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2282 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2283 continue;
2284 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
2285 offset + i * typeSizeof(sType));
2286 mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i))
2287 ->setIndirect(0, 1, indirectBuffer);
2288 }
2289 info->io.globalAccess |= 0x2;
2290 break;
2291 }
2292 case nir_intrinsic_load_ssbo: {
2293 const DataType dType = getDType(insn);
2294 LValues &newDefs = convert(&insn->dest);
2295 Value *indirectBuffer;
2296 Value *indirectOffset;
2297 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2298 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2299
2300 for (uint8_t i = 0u; i < insn->num_components; ++i)
2301 loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
2302 indirectOffset, indirectBuffer);
2303
2304 info->io.globalAccess |= 0x1;
2305 break;
2306 }
2307 case nir_intrinsic_shared_atomic_add:
2308 case nir_intrinsic_shared_atomic_and:
2309 case nir_intrinsic_shared_atomic_comp_swap:
2310 case nir_intrinsic_shared_atomic_exchange:
2311 case nir_intrinsic_shared_atomic_or:
2312 case nir_intrinsic_shared_atomic_imax:
2313 case nir_intrinsic_shared_atomic_imin:
2314 case nir_intrinsic_shared_atomic_umax:
2315 case nir_intrinsic_shared_atomic_umin:
2316 case nir_intrinsic_shared_atomic_xor: {
2317 const DataType dType = getDType(insn);
2318 LValues &newDefs = convert(&insn->dest);
2319 Value *indirectOffset;
2320 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2321 Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, dType, offset);
2322 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2323 if (op == nir_intrinsic_shared_atomic_comp_swap)
2324 atom->setSrc(2, getSrc(&insn->src[2], 0));
2325 atom->setIndirect(0, 0, indirectOffset);
2326 atom->subOp = getSubOp(op);
2327 break;
2328 }
2329 case nir_intrinsic_ssbo_atomic_add:
2330 case nir_intrinsic_ssbo_atomic_and:
2331 case nir_intrinsic_ssbo_atomic_comp_swap:
2332 case nir_intrinsic_ssbo_atomic_exchange:
2333 case nir_intrinsic_ssbo_atomic_or:
2334 case nir_intrinsic_ssbo_atomic_imax:
2335 case nir_intrinsic_ssbo_atomic_imin:
2336 case nir_intrinsic_ssbo_atomic_umax:
2337 case nir_intrinsic_ssbo_atomic_umin:
2338 case nir_intrinsic_ssbo_atomic_xor: {
2339 const DataType dType = getDType(insn);
2340 LValues &newDefs = convert(&insn->dest);
2341 Value *indirectBuffer;
2342 Value *indirectOffset;
2343 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2344 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2345
2346 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset);
2347 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym,
2348 getSrc(&insn->src[2], 0));
2349 if (op == nir_intrinsic_ssbo_atomic_comp_swap)
2350 atom->setSrc(2, getSrc(&insn->src[3], 0));
2351 atom->setIndirect(0, 0, indirectOffset);
2352 atom->setIndirect(0, 1, indirectBuffer);
2353 atom->subOp = getSubOp(op);
2354
2355 info->io.globalAccess |= 0x2;
2356 break;
2357 }
2358 case nir_intrinsic_bindless_image_atomic_add:
2359 case nir_intrinsic_bindless_image_atomic_and:
2360 case nir_intrinsic_bindless_image_atomic_comp_swap:
2361 case nir_intrinsic_bindless_image_atomic_exchange:
2362 case nir_intrinsic_bindless_image_atomic_max:
2363 case nir_intrinsic_bindless_image_atomic_min:
2364 case nir_intrinsic_bindless_image_atomic_or:
2365 case nir_intrinsic_bindless_image_atomic_xor:
2366 case nir_intrinsic_bindless_image_load:
2367 case nir_intrinsic_bindless_image_samples:
2368 case nir_intrinsic_bindless_image_size:
2369 case nir_intrinsic_bindless_image_store: {
2370 std::vector<Value*> srcs, defs;
2371 Value *indirect = getSrc(&insn->src[0], 0);
2372 DataType ty;
2373
2374 uint32_t mask = 0;
2375 TexInstruction::Target target =
2376 convert(nir_intrinsic_image_dim(insn), !!nir_intrinsic_image_array(insn), false);
2377 unsigned int argCount = getNIRArgCount(target);
2378 uint16_t location = 0;
2379
2380 if (opInfo.has_dest) {
2381 LValues &newDefs = convert(&insn->dest);
2382 for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2383 defs.push_back(newDefs[i]);
2384 mask |= 1 << i;
2385 }
2386 }
2387
2388 switch (op) {
2389 case nir_intrinsic_bindless_image_atomic_add:
2390 case nir_intrinsic_bindless_image_atomic_and:
2391 case nir_intrinsic_bindless_image_atomic_comp_swap:
2392 case nir_intrinsic_bindless_image_atomic_exchange:
2393 case nir_intrinsic_bindless_image_atomic_max:
2394 case nir_intrinsic_bindless_image_atomic_min:
2395 case nir_intrinsic_bindless_image_atomic_or:
2396 case nir_intrinsic_bindless_image_atomic_xor:
2397 ty = getDType(insn);
2398 mask = 0x1;
2399 info->io.globalAccess |= 0x2;
2400 break;
2401 case nir_intrinsic_bindless_image_load:
2402 ty = TYPE_U32;
2403 info->io.globalAccess |= 0x1;
2404 break;
2405 case nir_intrinsic_bindless_image_store:
2406 ty = TYPE_U32;
2407 mask = 0xf;
2408 info->io.globalAccess |= 0x2;
2409 break;
2410 case nir_intrinsic_bindless_image_samples:
2411 mask = 0x8;
2412 ty = TYPE_U32;
2413 break;
2414 case nir_intrinsic_bindless_image_size:
2415 ty = TYPE_U32;
2416 break;
2417 default:
2418 unreachable("unhandled image opcode");
2419 break;
2420 }
2421
2422 // coords
2423 if (opInfo.num_srcs >= 2)
2424 for (unsigned int i = 0u; i < argCount; ++i)
2425 srcs.push_back(getSrc(&insn->src[1], i));
2426
2427 // the sampler is just another src added after coords
2428 if (opInfo.num_srcs >= 3 && target.isMS())
2429 srcs.push_back(getSrc(&insn->src[2], 0));
2430
2431 if (opInfo.num_srcs >= 4) {
2432 unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2433 for (uint8_t i = 0u; i < components; ++i)
2434 srcs.push_back(getSrc(&insn->src[3], i));
2435 }
2436
2437 if (opInfo.num_srcs >= 5)
2438 // 1 for aotmic swap
2439 for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2440 srcs.push_back(getSrc(&insn->src[4], i));
2441
2442 TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2443 texi->tex.bindless = false;
2444 texi->tex.format = &nv50_ir::TexInstruction::formatTable[convertGLImgFormat(nir_intrinsic_format(insn))];
2445 texi->tex.mask = mask;
2446 texi->tex.bindless = true;
2447 texi->cache = convert(nir_intrinsic_access(insn));
2448 texi->setType(ty);
2449 texi->subOp = getSubOp(op);
2450
2451 if (indirect)
2452 texi->setIndirectR(indirect);
2453
2454 break;
2455 }
2456 case nir_intrinsic_image_deref_atomic_add:
2457 case nir_intrinsic_image_deref_atomic_and:
2458 case nir_intrinsic_image_deref_atomic_comp_swap:
2459 case nir_intrinsic_image_deref_atomic_exchange:
2460 case nir_intrinsic_image_deref_atomic_max:
2461 case nir_intrinsic_image_deref_atomic_min:
2462 case nir_intrinsic_image_deref_atomic_or:
2463 case nir_intrinsic_image_deref_atomic_xor:
2464 case nir_intrinsic_image_deref_load:
2465 case nir_intrinsic_image_deref_samples:
2466 case nir_intrinsic_image_deref_size:
2467 case nir_intrinsic_image_deref_store: {
2468 const nir_variable *tex;
2469 std::vector<Value*> srcs, defs;
2470 Value *indirect;
2471 DataType ty;
2472
2473 uint32_t mask = 0;
2474 nir_deref_instr *deref = nir_src_as_deref(insn->src[0]);
2475 const glsl_type *type = deref->type;
2476 TexInstruction::Target target =
2477 convert((glsl_sampler_dim)type->sampler_dimensionality,
2478 type->sampler_array, type->sampler_shadow);
2479 unsigned int argCount = getNIRArgCount(target);
2480 uint16_t location = handleDeref(deref, indirect, tex);
2481
2482 if (opInfo.has_dest) {
2483 LValues &newDefs = convert(&insn->dest);
2484 for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2485 defs.push_back(newDefs[i]);
2486 mask |= 1 << i;
2487 }
2488 }
2489
2490 switch (op) {
2491 case nir_intrinsic_image_deref_atomic_add:
2492 case nir_intrinsic_image_deref_atomic_and:
2493 case nir_intrinsic_image_deref_atomic_comp_swap:
2494 case nir_intrinsic_image_deref_atomic_exchange:
2495 case nir_intrinsic_image_deref_atomic_max:
2496 case nir_intrinsic_image_deref_atomic_min:
2497 case nir_intrinsic_image_deref_atomic_or:
2498 case nir_intrinsic_image_deref_atomic_xor:
2499 ty = getDType(insn);
2500 mask = 0x1;
2501 info->io.globalAccess |= 0x2;
2502 break;
2503 case nir_intrinsic_image_deref_load:
2504 ty = TYPE_U32;
2505 info->io.globalAccess |= 0x1;
2506 break;
2507 case nir_intrinsic_image_deref_store:
2508 ty = TYPE_U32;
2509 mask = 0xf;
2510 info->io.globalAccess |= 0x2;
2511 break;
2512 case nir_intrinsic_image_deref_samples:
2513 mask = 0x8;
2514 ty = TYPE_U32;
2515 break;
2516 case nir_intrinsic_image_deref_size:
2517 ty = TYPE_U32;
2518 break;
2519 default:
2520 unreachable("unhandled image opcode");
2521 break;
2522 }
2523
2524 // coords
2525 if (opInfo.num_srcs >= 2)
2526 for (unsigned int i = 0u; i < argCount; ++i)
2527 srcs.push_back(getSrc(&insn->src[1], i));
2528
2529 // the sampler is just another src added after coords
2530 if (opInfo.num_srcs >= 3 && target.isMS())
2531 srcs.push_back(getSrc(&insn->src[2], 0));
2532
2533 if (opInfo.num_srcs >= 4) {
2534 unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2535 for (uint8_t i = 0u; i < components; ++i)
2536 srcs.push_back(getSrc(&insn->src[3], i));
2537 }
2538
2539 if (opInfo.num_srcs >= 5)
2540 // 1 for aotmic swap
2541 for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2542 srcs.push_back(getSrc(&insn->src[4], i));
2543
2544 TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2545 texi->tex.bindless = false;
2546 texi->tex.format = &nv50_ir::TexInstruction::formatTable[convertGLImgFormat(tex->data.image.format)];
2547 texi->tex.mask = mask;
2548 texi->cache = getCacheModeFromVar(tex);
2549 texi->setType(ty);
2550 texi->subOp = getSubOp(op);
2551
2552 if (indirect)
2553 texi->setIndirectR(indirect);
2554
2555 break;
2556 }
2557 case nir_intrinsic_store_shared: {
2558 DataType sType = getSType(insn->src[0], false, false);
2559 Value *indirectOffset;
2560 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2561
2562 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2563 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2564 continue;
2565 Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, sType, offset + i * typeSizeof(sType));
2566 mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i));
2567 }
2568 break;
2569 }
2570 case nir_intrinsic_load_shared: {
2571 const DataType dType = getDType(insn);
2572 LValues &newDefs = convert(&insn->dest);
2573 Value *indirectOffset;
2574 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2575
2576 for (uint8_t i = 0u; i < insn->num_components; ++i)
2577 loadFrom(FILE_MEMORY_SHARED, 0, dType, newDefs[i], offset, i, indirectOffset);
2578
2579 break;
2580 }
2581 case nir_intrinsic_barrier: {
2582 // TODO: add flag to shader_info
2583 info->numBarriers = 1;
2584 Instruction *bar = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
2585 bar->fixed = 1;
2586 bar->subOp = NV50_IR_SUBOP_BAR_SYNC;
2587 break;
2588 }
2589 case nir_intrinsic_group_memory_barrier:
2590 case nir_intrinsic_memory_barrier:
2591 case nir_intrinsic_memory_barrier_atomic_counter:
2592 case nir_intrinsic_memory_barrier_buffer:
2593 case nir_intrinsic_memory_barrier_image:
2594 case nir_intrinsic_memory_barrier_shared: {
2595 Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
2596 bar->fixed = 1;
2597 bar->subOp = getSubOp(op);
2598 break;
2599 }
2600 case nir_intrinsic_shader_clock: {
2601 const DataType dType = getDType(insn);
2602 LValues &newDefs = convert(&insn->dest);
2603
2604 loadImm(newDefs[0], 0u);
2605 mkOp1(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
2606 break;
2607 }
2608 default:
2609 ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
2610 return false;
2611 }
2612
2613 return true;
2614 }
2615
2616 bool
2617 Converter::visit(nir_jump_instr *insn)
2618 {
2619 switch (insn->type) {
2620 case nir_jump_return:
2621 // TODO: this only works in the main function
2622 mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
2623 bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
2624 break;
2625 case nir_jump_break:
2626 case nir_jump_continue: {
2627 bool isBreak = insn->type == nir_jump_break;
2628 nir_block *block = insn->instr.block;
2629 assert(!block->successors[1]);
2630 BasicBlock *target = convert(block->successors[0]);
2631 mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
2632 bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
2633 break;
2634 }
2635 default:
2636 ERROR("unknown nir_jump_type %u\n", insn->type);
2637 return false;
2638 }
2639
2640 return true;
2641 }
2642
2643 Value*
2644 Converter::convert(nir_load_const_instr *insn, uint8_t idx)
2645 {
2646 Value *val;
2647
2648 if (immInsertPos)
2649 setPosition(immInsertPos, true);
2650 else
2651 setPosition(bb, false);
2652
2653 switch (insn->def.bit_size) {
2654 case 64:
2655 val = loadImm(getSSA(8), insn->value[idx].u64);
2656 break;
2657 case 32:
2658 val = loadImm(getSSA(4), insn->value[idx].u32);
2659 break;
2660 case 16:
2661 val = loadImm(getSSA(2), insn->value[idx].u16);
2662 break;
2663 case 8:
2664 val = loadImm(getSSA(1), insn->value[idx].u8);
2665 break;
2666 default:
2667 unreachable("unhandled bit size!\n");
2668 }
2669 setPosition(bb, true);
2670 return val;
2671 }
2672
2673 bool
2674 Converter::visit(nir_load_const_instr *insn)
2675 {
2676 assert(insn->def.bit_size <= 64);
2677 immediates[insn->def.index] = insn;
2678 return true;
2679 }
2680
2681 #define DEFAULT_CHECKS \
2682 if (insn->dest.dest.ssa.num_components > 1) { \
2683 ERROR("nir_alu_instr only supported with 1 component!\n"); \
2684 return false; \
2685 } \
2686 if (insn->dest.write_mask != 1) { \
2687 ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2688 return false; \
2689 }
2690 bool
2691 Converter::visit(nir_alu_instr *insn)
2692 {
2693 const nir_op op = insn->op;
2694 const nir_op_info &info = nir_op_infos[op];
2695 DataType dType = getDType(insn);
2696 const std::vector<DataType> sTypes = getSTypes(insn);
2697
2698 Instruction *oldPos = this->bb->getExit();
2699
2700 switch (op) {
2701 case nir_op_fabs:
2702 case nir_op_iabs:
2703 case nir_op_fadd:
2704 case nir_op_iadd:
2705 case nir_op_iand:
2706 case nir_op_fceil:
2707 case nir_op_fcos:
2708 case nir_op_fddx:
2709 case nir_op_fddx_coarse:
2710 case nir_op_fddx_fine:
2711 case nir_op_fddy:
2712 case nir_op_fddy_coarse:
2713 case nir_op_fddy_fine:
2714 case nir_op_fdiv:
2715 case nir_op_idiv:
2716 case nir_op_udiv:
2717 case nir_op_fexp2:
2718 case nir_op_ffloor:
2719 case nir_op_ffma:
2720 case nir_op_flog2:
2721 case nir_op_fmax:
2722 case nir_op_imax:
2723 case nir_op_umax:
2724 case nir_op_fmin:
2725 case nir_op_imin:
2726 case nir_op_umin:
2727 case nir_op_fmod:
2728 case nir_op_imod:
2729 case nir_op_umod:
2730 case nir_op_fmul:
2731 case nir_op_imul:
2732 case nir_op_imul_high:
2733 case nir_op_umul_high:
2734 case nir_op_fneg:
2735 case nir_op_ineg:
2736 case nir_op_inot:
2737 case nir_op_ior:
2738 case nir_op_pack_64_2x32_split:
2739 case nir_op_fpow:
2740 case nir_op_frcp:
2741 case nir_op_frem:
2742 case nir_op_irem:
2743 case nir_op_frsq:
2744 case nir_op_fsat:
2745 case nir_op_ishr:
2746 case nir_op_ushr:
2747 case nir_op_fsin:
2748 case nir_op_fsqrt:
2749 case nir_op_fsub:
2750 case nir_op_isub:
2751 case nir_op_ftrunc:
2752 case nir_op_ishl:
2753 case nir_op_ixor: {
2754 DEFAULT_CHECKS;
2755 LValues &newDefs = convert(&insn->dest);
2756 operation preOp = preOperationNeeded(op);
2757 if (preOp != OP_NOP) {
2758 assert(info.num_inputs < 2);
2759 Value *tmp = getSSA(typeSizeof(dType));
2760 Instruction *i0 = mkOp(preOp, dType, tmp);
2761 Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2762 if (info.num_inputs) {
2763 i0->setSrc(0, getSrc(&insn->src[0]));
2764 i1->setSrc(0, tmp);
2765 }
2766 i1->subOp = getSubOp(op);
2767 } else {
2768 Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2769 for (unsigned s = 0u; s < info.num_inputs; ++s) {
2770 i->setSrc(s, getSrc(&insn->src[s]));
2771 }
2772 i->subOp = getSubOp(op);
2773 }
2774 break;
2775 }
2776 case nir_op_ifind_msb:
2777 case nir_op_ufind_msb: {
2778 DEFAULT_CHECKS;
2779 LValues &newDefs = convert(&insn->dest);
2780 dType = sTypes[0];
2781 mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2782 break;
2783 }
2784 case nir_op_fround_even: {
2785 DEFAULT_CHECKS;
2786 LValues &newDefs = convert(&insn->dest);
2787 mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2788 break;
2789 }
2790 // convert instructions
2791 case nir_op_f2f32:
2792 case nir_op_f2i32:
2793 case nir_op_f2u32:
2794 case nir_op_i2f32:
2795 case nir_op_i2i32:
2796 case nir_op_u2f32:
2797 case nir_op_u2u32:
2798 case nir_op_f2f64:
2799 case nir_op_f2i64:
2800 case nir_op_f2u64:
2801 case nir_op_i2f64:
2802 case nir_op_i2i64:
2803 case nir_op_u2f64:
2804 case nir_op_u2u64: {
2805 DEFAULT_CHECKS;
2806 LValues &newDefs = convert(&insn->dest);
2807 Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2808 if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
2809 i->rnd = ROUND_Z;
2810 i->sType = sTypes[0];
2811 break;
2812 }
2813 // compare instructions
2814 case nir_op_feq32:
2815 case nir_op_ieq32:
2816 case nir_op_fge32:
2817 case nir_op_ige32:
2818 case nir_op_uge32:
2819 case nir_op_flt32:
2820 case nir_op_ilt32:
2821 case nir_op_ult32:
2822 case nir_op_fne32:
2823 case nir_op_ine32: {
2824 DEFAULT_CHECKS;
2825 LValues &newDefs = convert(&insn->dest);
2826 Instruction *i = mkCmp(getOperation(op),
2827 getCondCode(op),
2828 dType,
2829 newDefs[0],
2830 dType,
2831 getSrc(&insn->src[0]),
2832 getSrc(&insn->src[1]));
2833 if (info.num_inputs == 3)
2834 i->setSrc(2, getSrc(&insn->src[2]));
2835 i->sType = sTypes[0];
2836 break;
2837 }
2838 // those are weird ALU ops and need special handling, because
2839 // 1. they are always componend based
2840 // 2. they basically just merge multiple values into one data type
2841 case nir_op_mov:
2842 if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) {
2843 nir_reg_dest& reg = insn->dest.dest.reg;
2844 uint32_t goffset = regToLmemOffset[reg.reg->index];
2845 uint8_t comps = reg.reg->num_components;
2846 uint8_t size = reg.reg->bit_size / 8;
2847 uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2848 uint32_t aoffset = csize * reg.base_offset;
2849 Value *indirect = NULL;
2850
2851 if (reg.indirect)
2852 indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS),
2853 getSrc(reg.indirect, 0), mkImm(csize));
2854
2855 for (uint8_t i = 0u; i < comps; ++i) {
2856 if (!((1u << i) & insn->dest.write_mask))
2857 continue;
2858
2859 Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + aoffset + i * size);
2860 mkStore(OP_STORE, dType, sym, indirect, getSrc(&insn->src[0], i));
2861 }
2862 break;
2863 } else if (!insn->src[0].src.is_ssa && insn->src[0].src.reg.reg->num_array_elems) {
2864 LValues &newDefs = convert(&insn->dest);
2865 nir_reg_src& reg = insn->src[0].src.reg;
2866 uint32_t goffset = regToLmemOffset[reg.reg->index];
2867 // uint8_t comps = reg.reg->num_components;
2868 uint8_t size = reg.reg->bit_size / 8;
2869 uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2870 uint32_t aoffset = csize * reg.base_offset;
2871 Value *indirect = NULL;
2872
2873 if (reg.indirect)
2874 indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), getSrc(reg.indirect, 0), mkImm(csize));
2875
2876 for (uint8_t i = 0u; i < newDefs.size(); ++i)
2877 loadFrom(FILE_MEMORY_LOCAL, 0, dType, newDefs[i], goffset + aoffset, i, indirect);
2878
2879 break;
2880 } else {
2881 LValues &newDefs = convert(&insn->dest);
2882 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2883 mkMov(newDefs[c], getSrc(&insn->src[0], c), dType);
2884 }
2885 }
2886 break;
2887 case nir_op_vec2:
2888 case nir_op_vec3:
2889 case nir_op_vec4: {
2890 LValues &newDefs = convert(&insn->dest);
2891 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2892 mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2893 }
2894 break;
2895 }
2896 // (un)pack
2897 case nir_op_pack_64_2x32: {
2898 LValues &newDefs = convert(&insn->dest);
2899 Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2900 merge->setSrc(0, getSrc(&insn->src[0], 0));
2901 merge->setSrc(1, getSrc(&insn->src[0], 1));
2902 break;
2903 }
2904 case nir_op_pack_half_2x16_split: {
2905 LValues &newDefs = convert(&insn->dest);
2906 Value *tmpH = getSSA();
2907 Value *tmpL = getSSA();
2908
2909 mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2910 mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2911 mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2912 break;
2913 }
2914 case nir_op_unpack_half_2x16_split_x:
2915 case nir_op_unpack_half_2x16_split_y: {
2916 LValues &newDefs = convert(&insn->dest);
2917 Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2918 if (op == nir_op_unpack_half_2x16_split_y)
2919 cvt->subOp = 1;
2920 break;
2921 }
2922 case nir_op_unpack_64_2x32: {
2923 LValues &newDefs = convert(&insn->dest);
2924 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2925 break;
2926 }
2927 case nir_op_unpack_64_2x32_split_x: {
2928 LValues &newDefs = convert(&insn->dest);
2929 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
2930 break;
2931 }
2932 case nir_op_unpack_64_2x32_split_y: {
2933 LValues &newDefs = convert(&insn->dest);
2934 mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
2935 break;
2936 }
2937 // special instructions
2938 case nir_op_fsign:
2939 case nir_op_isign: {
2940 DEFAULT_CHECKS;
2941 DataType iType;
2942 if (::isFloatType(dType))
2943 iType = TYPE_F32;
2944 else
2945 iType = TYPE_S32;
2946
2947 LValues &newDefs = convert(&insn->dest);
2948 LValue *val0 = getScratch();
2949 LValue *val1 = getScratch();
2950 mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
2951 mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
2952
2953 if (dType == TYPE_F64) {
2954 mkOp2(OP_SUB, iType, val0, val0, val1);
2955 mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
2956 } else if (dType == TYPE_S64 || dType == TYPE_U64) {
2957 mkOp2(OP_SUB, iType, val0, val1, val0);
2958 mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
2959 mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
2960 } else if (::isFloatType(dType))
2961 mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
2962 else
2963 mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
2964 break;
2965 }
2966 case nir_op_fcsel:
2967 case nir_op_b32csel: {
2968 DEFAULT_CHECKS;
2969 LValues &newDefs = convert(&insn->dest);
2970 mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
2971 break;
2972 }
2973 case nir_op_ibitfield_extract:
2974 case nir_op_ubitfield_extract: {
2975 DEFAULT_CHECKS;
2976 Value *tmp = getSSA();
2977 LValues &newDefs = convert(&insn->dest);
2978 mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2979 mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
2980 break;
2981 }
2982 case nir_op_bfm: {
2983 DEFAULT_CHECKS;
2984 LValues &newDefs = convert(&insn->dest);
2985 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2986 break;
2987 }
2988 case nir_op_bitfield_insert: {
2989 DEFAULT_CHECKS;
2990 LValues &newDefs = convert(&insn->dest);
2991 LValue *temp = getSSA();
2992 mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
2993 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
2994 break;
2995 }
2996 case nir_op_bit_count: {
2997 DEFAULT_CHECKS;
2998 LValues &newDefs = convert(&insn->dest);
2999 mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
3000 break;
3001 }
3002 case nir_op_bitfield_reverse: {
3003 DEFAULT_CHECKS;
3004 LValues &newDefs = convert(&insn->dest);
3005 mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
3006 break;
3007 }
3008 case nir_op_find_lsb: {
3009 DEFAULT_CHECKS;
3010 LValues &newDefs = convert(&insn->dest);
3011 Value *tmp = getSSA();
3012 mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
3013 mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
3014 break;
3015 }
3016 // boolean conversions
3017 case nir_op_b2f32: {
3018 DEFAULT_CHECKS;
3019 LValues &newDefs = convert(&insn->dest);
3020 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
3021 break;
3022 }
3023 case nir_op_b2f64: {
3024 DEFAULT_CHECKS;
3025 LValues &newDefs = convert(&insn->dest);
3026 Value *tmp = getSSA(4);
3027 mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
3028 mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
3029 break;
3030 }
3031 case nir_op_f2b32:
3032 case nir_op_i2b32: {
3033 DEFAULT_CHECKS;
3034 LValues &newDefs = convert(&insn->dest);
3035 Value *src1;
3036 if (typeSizeof(sTypes[0]) == 8) {
3037 src1 = loadImm(getSSA(8), 0.0);
3038 } else {
3039 src1 = zero;
3040 }
3041 CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
3042 mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
3043 break;
3044 }
3045 case nir_op_b2i32: {
3046 DEFAULT_CHECKS;
3047 LValues &newDefs = convert(&insn->dest);
3048 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
3049 break;
3050 }
3051 case nir_op_b2i64: {
3052 DEFAULT_CHECKS;
3053 LValues &newDefs = convert(&insn->dest);
3054 LValue *def = getScratch();
3055 mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
3056 mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
3057 break;
3058 }
3059 default:
3060 ERROR("unknown nir_op %s\n", info.name);
3061 return false;
3062 }
3063
3064 if (!oldPos) {
3065 oldPos = this->bb->getEntry();
3066 oldPos->precise = insn->exact;
3067 }
3068
3069 if (unlikely(!oldPos))
3070 return true;
3071
3072 while (oldPos->next) {
3073 oldPos = oldPos->next;
3074 oldPos->precise = insn->exact;
3075 }
3076 oldPos->saturate = insn->dest.saturate;
3077
3078 return true;
3079 }
3080 #undef DEFAULT_CHECKS
3081
3082 bool
3083 Converter::visit(nir_ssa_undef_instr *insn)
3084 {
3085 LValues &newDefs = convert(&insn->def);
3086 for (uint8_t i = 0u; i < insn->def.num_components; ++i) {
3087 mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
3088 }
3089 return true;
3090 }
3091
3092 #define CASE_SAMPLER(ty) \
3093 case GLSL_SAMPLER_DIM_ ## ty : \
3094 if (isArray && !isShadow) \
3095 return TEX_TARGET_ ## ty ## _ARRAY; \
3096 else if (!isArray && isShadow) \
3097 return TEX_TARGET_## ty ## _SHADOW; \
3098 else if (isArray && isShadow) \
3099 return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
3100 else \
3101 return TEX_TARGET_ ## ty
3102
3103 TexTarget
3104 Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
3105 {
3106 switch (dim) {
3107 CASE_SAMPLER(1D);
3108 CASE_SAMPLER(2D);
3109 CASE_SAMPLER(CUBE);
3110 case GLSL_SAMPLER_DIM_3D:
3111 return TEX_TARGET_3D;
3112 case GLSL_SAMPLER_DIM_MS:
3113 if (isArray)
3114 return TEX_TARGET_2D_MS_ARRAY;
3115 return TEX_TARGET_2D_MS;
3116 case GLSL_SAMPLER_DIM_RECT:
3117 if (isShadow)
3118 return TEX_TARGET_RECT_SHADOW;
3119 return TEX_TARGET_RECT;
3120 case GLSL_SAMPLER_DIM_BUF:
3121 return TEX_TARGET_BUFFER;
3122 case GLSL_SAMPLER_DIM_EXTERNAL:
3123 return TEX_TARGET_2D;
3124 default:
3125 ERROR("unknown glsl_sampler_dim %u\n", dim);
3126 assert(false);
3127 return TEX_TARGET_COUNT;
3128 }
3129 }
3130 #undef CASE_SAMPLER
3131
3132 Value*
3133 Converter::applyProjection(Value *src, Value *proj)
3134 {
3135 if (!proj)
3136 return src;
3137 return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj);
3138 }
3139
3140 unsigned int
3141 Converter::getNIRArgCount(TexInstruction::Target& target)
3142 {
3143 unsigned int result = target.getArgCount();
3144 if (target.isCube() && target.isArray())
3145 result--;
3146 if (target.isMS())
3147 result--;
3148 return result;
3149 }
3150
3151 uint16_t
3152 Converter::handleDeref(nir_deref_instr *deref, Value * &indirect, const nir_variable * &tex)
3153 {
3154 typedef std::pair<uint32_t,Value*> DerefPair;
3155 std::list<DerefPair> derefs;
3156
3157 uint16_t result = 0;
3158 while (deref->deref_type != nir_deref_type_var) {
3159 switch (deref->deref_type) {
3160 case nir_deref_type_array: {
3161 Value *indirect;
3162 uint8_t size = type_size(deref->type, true);
3163 result += size * getIndirect(&deref->arr.index, 0, indirect);
3164
3165 if (indirect) {
3166 derefs.push_front(std::make_pair(size, indirect));
3167 }
3168
3169 break;
3170 }
3171 case nir_deref_type_struct: {
3172 result += nir_deref_instr_parent(deref)->type->struct_location_offset(deref->strct.index);
3173 break;
3174 }
3175 case nir_deref_type_var:
3176 default:
3177 unreachable("nir_deref_type_var reached in handleDeref!");
3178 break;
3179 }
3180 deref = nir_deref_instr_parent(deref);
3181 }
3182
3183 indirect = NULL;
3184 for (std::list<DerefPair>::const_iterator it = derefs.begin(); it != derefs.end(); ++it) {
3185 Value *offset = mkOp2v(OP_MUL, TYPE_U32, getSSA(), loadImm(getSSA(), it->first), it->second);
3186 if (indirect)
3187 indirect = mkOp2v(OP_ADD, TYPE_U32, getSSA(), indirect, offset);
3188 else
3189 indirect = offset;
3190 }
3191
3192 tex = nir_deref_instr_get_variable(deref);
3193 assert(tex);
3194
3195 return result + tex->data.driver_location;
3196 }
3197
3198 CacheMode
3199 Converter::convert(enum gl_access_qualifier access)
3200 {
3201 switch (access) {
3202 case ACCESS_VOLATILE:
3203 return CACHE_CV;
3204 case ACCESS_COHERENT:
3205 return CACHE_CG;
3206 default:
3207 return CACHE_CA;
3208 }
3209 }
3210
3211 CacheMode
3212 Converter::getCacheModeFromVar(const nir_variable *var)
3213 {
3214 return convert(var->data.image.access);
3215 }
3216
3217 bool
3218 Converter::visit(nir_tex_instr *insn)
3219 {
3220 switch (insn->op) {
3221 case nir_texop_lod:
3222 case nir_texop_query_levels:
3223 case nir_texop_tex:
3224 case nir_texop_texture_samples:
3225 case nir_texop_tg4:
3226 case nir_texop_txb:
3227 case nir_texop_txd:
3228 case nir_texop_txf:
3229 case nir_texop_txf_ms:
3230 case nir_texop_txl:
3231 case nir_texop_txs: {
3232 LValues &newDefs = convert(&insn->dest);
3233 std::vector<Value*> srcs;
3234 std::vector<Value*> defs;
3235 std::vector<nir_src*> offsets;
3236 uint8_t mask = 0;
3237 bool lz = false;
3238 Value *proj = NULL;
3239 TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow);
3240 operation op = getOperation(insn->op);
3241
3242 int r, s;
3243 int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias);
3244 int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator);
3245 int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord);
3246 int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx);
3247 int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy);
3248 int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index);
3249 int lodIdx = nir_tex_instr_src_index(insn, nir_tex_src_lod);
3250 int offsetIdx = nir_tex_instr_src_index(insn, nir_tex_src_offset);
3251 int projIdx = nir_tex_instr_src_index(insn, nir_tex_src_projector);
3252 int sampOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_offset);
3253 int texOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_offset);
3254 int sampHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_handle);
3255 int texHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_handle);
3256
3257 bool bindless = sampHandleIdx != -1 || texHandleIdx != -1;
3258 assert((sampHandleIdx != -1) == (texHandleIdx != -1));
3259
3260 if (projIdx != -1)
3261 proj = mkOp1v(OP_RCP, TYPE_F32, getScratch(), getSrc(&insn->src[projIdx].src, 0));
3262
3263 srcs.resize(insn->coord_components);
3264 for (uint8_t i = 0u; i < insn->coord_components; ++i)
3265 srcs[i] = applyProjection(getSrc(&insn->src[coordsIdx].src, i), proj);
3266
3267 // sometimes we get less args than target.getArgCount, but codegen expects the latter
3268 if (insn->coord_components) {
3269 uint32_t argCount = target.getArgCount();
3270
3271 if (target.isMS())
3272 argCount -= 1;
3273
3274 for (uint32_t i = 0u; i < (argCount - insn->coord_components); ++i)
3275 srcs.push_back(getSSA());
3276 }
3277
3278 if (insn->op == nir_texop_texture_samples)
3279 srcs.push_back(zero);
3280 else if (!insn->num_srcs)
3281 srcs.push_back(loadImm(NULL, 0));
3282 if (biasIdx != -1)
3283 srcs.push_back(getSrc(&insn->src[biasIdx].src, 0));
3284 if (lodIdx != -1)
3285 srcs.push_back(getSrc(&insn->src[lodIdx].src, 0));
3286 else if (op == OP_TXF)
3287 lz = true;
3288 if (msIdx != -1)
3289 srcs.push_back(getSrc(&insn->src[msIdx].src, 0));
3290 if (offsetIdx != -1)
3291 offsets.push_back(&insn->src[offsetIdx].src);
3292 if (compIdx != -1)
3293 srcs.push_back(applyProjection(getSrc(&insn->src[compIdx].src, 0), proj));
3294 if (texOffIdx != -1) {
3295 srcs.push_back(getSrc(&insn->src[texOffIdx].src, 0));
3296 texOffIdx = srcs.size() - 1;
3297 }
3298 if (sampOffIdx != -1) {
3299 srcs.push_back(getSrc(&insn->src[sampOffIdx].src, 0));
3300 sampOffIdx = srcs.size() - 1;
3301 }
3302 if (bindless) {
3303 // currently we use the lower bits
3304 Value *split[2];
3305 Value *handle = getSrc(&insn->src[sampHandleIdx].src, 0);
3306
3307 mkSplit(split, 4, handle);
3308
3309 srcs.push_back(split[0]);
3310 texOffIdx = srcs.size() - 1;
3311 }
3312
3313 r = bindless ? 0xff : insn->texture_index;
3314 s = bindless ? 0x1f : insn->sampler_index;
3315
3316 defs.resize(newDefs.size());
3317 for (uint8_t d = 0u; d < newDefs.size(); ++d) {
3318 defs[d] = newDefs[d];
3319 mask |= 1 << d;
3320 }
3321 if (target.isMS() || (op == OP_TEX && prog->getType() != Program::TYPE_FRAGMENT))
3322 lz = true;
3323
3324 TexInstruction *texi = mkTex(op, target.getEnum(), r, s, defs, srcs);
3325 texi->tex.levelZero = lz;
3326 texi->tex.mask = mask;
3327 texi->tex.bindless = bindless;
3328
3329 if (texOffIdx != -1)
3330 texi->tex.rIndirectSrc = texOffIdx;
3331 if (sampOffIdx != -1)
3332 texi->tex.sIndirectSrc = sampOffIdx;
3333
3334 switch (insn->op) {
3335 case nir_texop_tg4:
3336 if (!target.isShadow())
3337 texi->tex.gatherComp = insn->component;
3338 break;
3339 case nir_texop_txs:
3340 texi->tex.query = TXQ_DIMS;
3341 break;
3342 case nir_texop_texture_samples:
3343 texi->tex.mask = 0x4;
3344 texi->tex.query = TXQ_TYPE;
3345 break;
3346 case nir_texop_query_levels:
3347 texi->tex.mask = 0x8;
3348 texi->tex.query = TXQ_DIMS;
3349 break;
3350 default:
3351 break;
3352 }
3353
3354 texi->tex.useOffsets = offsets.size();
3355 if (texi->tex.useOffsets) {
3356 for (uint8_t s = 0; s < texi->tex.useOffsets; ++s) {
3357 for (uint32_t c = 0u; c < 3; ++c) {
3358 uint8_t s2 = std::min(c, target.getDim() - 1);
3359 texi->offset[s][c].set(getSrc(offsets[s], s2));
3360 texi->offset[s][c].setInsn(texi);
3361 }
3362 }
3363 }
3364
3365 if (op == OP_TXG && offsetIdx == -1) {
3366 if (nir_tex_instr_has_explicit_tg4_offsets(insn)) {
3367 texi->tex.useOffsets = 4;
3368 setPosition(texi, false);
3369 for (uint8_t i = 0; i < 4; ++i) {
3370 for (uint8_t j = 0; j < 2; ++j) {
3371 texi->offset[i][j].set(loadImm(NULL, insn->tg4_offsets[i][j]));
3372 texi->offset[i][j].setInsn(texi);
3373 }
3374 }
3375 setPosition(texi, true);
3376 }
3377 }
3378
3379 if (ddxIdx != -1 && ddyIdx != -1) {
3380 for (uint8_t c = 0u; c < target.getDim() + target.isCube(); ++c) {
3381 texi->dPdx[c].set(getSrc(&insn->src[ddxIdx].src, c));
3382 texi->dPdy[c].set(getSrc(&insn->src[ddyIdx].src, c));
3383 }
3384 }
3385
3386 break;
3387 }
3388 default:
3389 ERROR("unknown nir_texop %u\n", insn->op);
3390 return false;
3391 }
3392 return true;
3393 }
3394
3395 bool
3396 Converter::visit(nir_deref_instr *deref)
3397 {
3398 // we just ignore those, because images intrinsics are the only place where
3399 // we should end up with deref sources and those have to backtrack anyway
3400 // to get the nir_variable. This code just exists to handle some special
3401 // cases.
3402 switch (deref->deref_type) {
3403 case nir_deref_type_array:
3404 case nir_deref_type_struct:
3405 case nir_deref_type_var:
3406 break;
3407 default:
3408 ERROR("unknown nir_deref_instr %u\n", deref->deref_type);
3409 return false;
3410 }
3411 return true;
3412 }
3413
3414 bool
3415 Converter::run()
3416 {
3417 bool progress;
3418
3419 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
3420 nir_print_shader(nir, stderr);
3421
3422 struct nir_lower_subgroups_options subgroup_options = {
3423 .subgroup_size = 32,
3424 .ballot_bit_size = 32,
3425 };
3426
3427 NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
3428 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
3429 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
3430 NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
3431 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3432 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL);
3433 NIR_PASS_V(nir, nir_lower_phis_to_scalar);
3434
3435 do {
3436 progress = false;
3437 NIR_PASS(progress, nir, nir_copy_prop);
3438 NIR_PASS(progress, nir, nir_opt_remove_phis);
3439 NIR_PASS(progress, nir, nir_opt_trivial_continues);
3440 NIR_PASS(progress, nir, nir_opt_cse);
3441 NIR_PASS(progress, nir, nir_opt_algebraic);
3442 NIR_PASS(progress, nir, nir_opt_constant_folding);
3443 NIR_PASS(progress, nir, nir_copy_prop);
3444 NIR_PASS(progress, nir, nir_opt_dce);
3445 NIR_PASS(progress, nir, nir_opt_dead_cf);
3446 } while (progress);
3447
3448 NIR_PASS_V(nir, nir_lower_bool_to_int32);
3449 NIR_PASS_V(nir, nir_lower_locals_to_regs);
3450 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
3451 NIR_PASS_V(nir, nir_convert_from_ssa, true);
3452
3453 // Garbage collect dead instructions
3454 nir_sweep(nir);
3455
3456 if (!parseNIR()) {
3457 ERROR("Couldn't prase NIR!\n");
3458 return false;
3459 }
3460
3461 if (!assignSlots()) {
3462 ERROR("Couldn't assign slots!\n");
3463 return false;
3464 }
3465
3466 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
3467 nir_print_shader(nir, stderr);
3468
3469 nir_foreach_function(function, nir) {
3470 if (!visit(function))
3471 return false;
3472 }
3473
3474 return true;
3475 }
3476
3477 } // unnamed namespace
3478
3479 namespace nv50_ir {
3480
3481 bool
3482 Program::makeFromNIR(struct nv50_ir_prog_info *info)
3483 {
3484 nir_shader *nir = (nir_shader*)info->bin.source;
3485 Converter converter(this, nir, info);
3486 bool result = converter.run();
3487 if (!result)
3488 return result;
3489 LoweringHelper lowering;
3490 lowering.run(this);
3491 tlsSize = info->bin.tlsSpace;
3492 return result;
3493 }
3494
3495 } // namespace nv50_ir