nv50/ir/nir: handle kernel inputs
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_from_nir.cpp
1 /*
2 * Copyright 2017 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Karol Herbst <kherbst@redhat.com>
23 */
24
25 #include "compiler/nir/nir.h"
26
27 #include "util/u_debug.h"
28
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_from_common.h"
31 #include "codegen/nv50_ir_lowering_helper.h"
32 #include "codegen/nv50_ir_util.h"
33
34 #if __cplusplus >= 201103L
35 #include <unordered_map>
36 #else
37 #include <tr1/unordered_map>
38 #endif
39 #include <cstring>
40 #include <list>
41 #include <vector>
42
43 namespace {
44
45 #if __cplusplus >= 201103L
46 using std::hash;
47 using std::unordered_map;
48 #else
49 using std::tr1::hash;
50 using std::tr1::unordered_map;
51 #endif
52
53 using namespace nv50_ir;
54
55 int
56 type_size(const struct glsl_type *type, bool bindless)
57 {
58 return glsl_count_attribute_slots(type, false);
59 }
60
61 class Converter : public ConverterCommon
62 {
63 public:
64 Converter(Program *, nir_shader *, nv50_ir_prog_info *);
65
66 bool run();
67 private:
68 typedef std::vector<LValue*> LValues;
69 typedef unordered_map<unsigned, LValues> NirDefMap;
70 typedef unordered_map<unsigned, nir_load_const_instr*> ImmediateMap;
71 typedef unordered_map<unsigned, uint32_t> NirArrayLMemOffsets;
72 typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
73
74 CacheMode convert(enum gl_access_qualifier);
75 TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
76 LValues& convert(nir_alu_dest *);
77 BasicBlock* convert(nir_block *);
78 LValues& convert(nir_dest *);
79 SVSemantic convert(nir_intrinsic_op);
80 Value* convert(nir_load_const_instr*, uint8_t);
81 LValues& convert(nir_register *);
82 LValues& convert(nir_ssa_def *);
83
84 ImgFormat convertGLImgFormat(GLuint);
85
86 Value* getSrc(nir_alu_src *, uint8_t component = 0);
87 Value* getSrc(nir_register *, uint8_t);
88 Value* getSrc(nir_src *, uint8_t, bool indirect = false);
89 Value* getSrc(nir_ssa_def *, uint8_t);
90
91 // returned value is the constant part of the given source (either the
92 // nir_src or the selected source component of an intrinsic). Even though
93 // this is mostly an optimization to be able to skip indirects in a few
94 // cases, sometimes we require immediate values or set some fileds on
95 // instructions (e.g. tex) in order for codegen to consume those.
96 // If the found value has not a constant part, the Value gets returned
97 // through the Value parameter.
98 uint32_t getIndirect(nir_src *, uint8_t, Value *&);
99 // isScalar indicates that the addressing is scalar, vec4 addressing is
100 // assumed otherwise
101 uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&,
102 bool isScalar = false);
103
104 uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
105
106 void setInterpolate(nv50_ir_varying *,
107 uint8_t,
108 bool centroid,
109 unsigned semantics);
110
111 Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
112 uint8_t c, Value *indirect0 = NULL,
113 Value *indirect1 = NULL, bool patch = false);
114 void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
115 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
116 Value *indirect1 = NULL);
117
118 bool isFloatType(nir_alu_type);
119 bool isSignedType(nir_alu_type);
120 bool isResultFloat(nir_op);
121 bool isResultSigned(nir_op);
122
123 DataType getDType(nir_alu_instr *);
124 DataType getDType(nir_intrinsic_instr *);
125 DataType getDType(nir_intrinsic_instr *, bool isSigned);
126 DataType getDType(nir_op, uint8_t);
127
128 std::vector<DataType> getSTypes(nir_alu_instr *);
129 DataType getSType(nir_src &, bool isFloat, bool isSigned);
130
131 operation getOperation(nir_intrinsic_op);
132 operation getOperation(nir_op);
133 operation getOperation(nir_texop);
134 operation preOperationNeeded(nir_op);
135
136 int getSubOp(nir_intrinsic_op);
137 int getSubOp(nir_op);
138
139 CondCode getCondCode(nir_op);
140
141 bool assignSlots();
142 bool parseNIR();
143
144 bool visit(nir_alu_instr *);
145 bool visit(nir_block *);
146 bool visit(nir_cf_node *);
147 bool visit(nir_deref_instr *);
148 bool visit(nir_function *);
149 bool visit(nir_if *);
150 bool visit(nir_instr *);
151 bool visit(nir_intrinsic_instr *);
152 bool visit(nir_jump_instr *);
153 bool visit(nir_load_const_instr*);
154 bool visit(nir_loop *);
155 bool visit(nir_ssa_undef_instr *);
156 bool visit(nir_tex_instr *);
157
158 // tex stuff
159 Value* applyProjection(Value *src, Value *proj);
160 unsigned int getNIRArgCount(TexInstruction::Target&);
161
162 // image stuff
163 uint16_t handleDeref(nir_deref_instr *, Value * & indirect, const nir_variable * &);
164 CacheMode getCacheModeFromVar(const nir_variable *);
165
166 nir_shader *nir;
167
168 NirDefMap ssaDefs;
169 NirDefMap regDefs;
170 ImmediateMap immediates;
171 NirArrayLMemOffsets regToLmemOffset;
172 NirBlockMap blocks;
173 unsigned int curLoopDepth;
174
175 BasicBlock *exit;
176 Value *zero;
177 Instruction *immInsertPos;
178
179 int clipVertexOutput;
180
181 union {
182 struct {
183 Value *position;
184 } fp;
185 };
186 };
187
188 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
189 : ConverterCommon(prog, info),
190 nir(nir),
191 curLoopDepth(0),
192 clipVertexOutput(-1)
193 {
194 zero = mkImm((uint32_t)0);
195 }
196
197 BasicBlock *
198 Converter::convert(nir_block *block)
199 {
200 NirBlockMap::iterator it = blocks.find(block->index);
201 if (it != blocks.end())
202 return it->second;
203
204 BasicBlock *bb = new BasicBlock(func);
205 blocks[block->index] = bb;
206 return bb;
207 }
208
209 bool
210 Converter::isFloatType(nir_alu_type type)
211 {
212 return nir_alu_type_get_base_type(type) == nir_type_float;
213 }
214
215 bool
216 Converter::isSignedType(nir_alu_type type)
217 {
218 return nir_alu_type_get_base_type(type) == nir_type_int;
219 }
220
221 bool
222 Converter::isResultFloat(nir_op op)
223 {
224 const nir_op_info &info = nir_op_infos[op];
225 if (info.output_type != nir_type_invalid)
226 return isFloatType(info.output_type);
227
228 ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
229 assert(false);
230 return true;
231 }
232
233 bool
234 Converter::isResultSigned(nir_op op)
235 {
236 switch (op) {
237 // there is no umul and we get wrong results if we treat all muls as signed
238 case nir_op_imul:
239 case nir_op_inot:
240 return false;
241 default:
242 const nir_op_info &info = nir_op_infos[op];
243 if (info.output_type != nir_type_invalid)
244 return isSignedType(info.output_type);
245 ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
246 assert(false);
247 return true;
248 }
249 }
250
251 DataType
252 Converter::getDType(nir_alu_instr *insn)
253 {
254 if (insn->dest.dest.is_ssa)
255 return getDType(insn->op, insn->dest.dest.ssa.bit_size);
256 else
257 return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
258 }
259
260 DataType
261 Converter::getDType(nir_intrinsic_instr *insn)
262 {
263 bool isSigned;
264 switch (insn->intrinsic) {
265 case nir_intrinsic_shared_atomic_imax:
266 case nir_intrinsic_shared_atomic_imin:
267 case nir_intrinsic_ssbo_atomic_imax:
268 case nir_intrinsic_ssbo_atomic_imin:
269 isSigned = true;
270 break;
271 default:
272 isSigned = false;
273 break;
274 }
275
276 return getDType(insn, isSigned);
277 }
278
279 DataType
280 Converter::getDType(nir_intrinsic_instr *insn, bool isSigned)
281 {
282 if (insn->dest.is_ssa)
283 return typeOfSize(insn->dest.ssa.bit_size / 8, false, isSigned);
284 else
285 return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, isSigned);
286 }
287
288 DataType
289 Converter::getDType(nir_op op, uint8_t bitSize)
290 {
291 DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
292 if (ty == TYPE_NONE) {
293 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
294 assert(false);
295 }
296 return ty;
297 }
298
299 std::vector<DataType>
300 Converter::getSTypes(nir_alu_instr *insn)
301 {
302 const nir_op_info &info = nir_op_infos[insn->op];
303 std::vector<DataType> res(info.num_inputs);
304
305 for (uint8_t i = 0; i < info.num_inputs; ++i) {
306 if (info.input_types[i] != nir_type_invalid) {
307 res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
308 } else {
309 ERROR("getSType not implemented for %s idx %u\n", info.name, i);
310 assert(false);
311 res[i] = TYPE_NONE;
312 break;
313 }
314 }
315
316 return res;
317 }
318
319 DataType
320 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
321 {
322 uint8_t bitSize;
323 if (src.is_ssa)
324 bitSize = src.ssa->bit_size;
325 else
326 bitSize = src.reg.reg->bit_size;
327
328 DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
329 if (ty == TYPE_NONE) {
330 const char *str;
331 if (isFloat)
332 str = "float";
333 else if (isSigned)
334 str = "int";
335 else
336 str = "uint";
337 ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
338 assert(false);
339 }
340 return ty;
341 }
342
343 operation
344 Converter::getOperation(nir_op op)
345 {
346 switch (op) {
347 // basic ops with float and int variants
348 case nir_op_fabs:
349 case nir_op_iabs:
350 return OP_ABS;
351 case nir_op_fadd:
352 case nir_op_iadd:
353 return OP_ADD;
354 case nir_op_iand:
355 return OP_AND;
356 case nir_op_ifind_msb:
357 case nir_op_ufind_msb:
358 return OP_BFIND;
359 case nir_op_fceil:
360 return OP_CEIL;
361 case nir_op_fcos:
362 return OP_COS;
363 case nir_op_f2f32:
364 case nir_op_f2f64:
365 case nir_op_f2i32:
366 case nir_op_f2i64:
367 case nir_op_f2u32:
368 case nir_op_f2u64:
369 case nir_op_i2f32:
370 case nir_op_i2f64:
371 case nir_op_i2i32:
372 case nir_op_i2i64:
373 case nir_op_u2f32:
374 case nir_op_u2f64:
375 case nir_op_u2u32:
376 case nir_op_u2u64:
377 return OP_CVT;
378 case nir_op_fddx:
379 case nir_op_fddx_coarse:
380 case nir_op_fddx_fine:
381 return OP_DFDX;
382 case nir_op_fddy:
383 case nir_op_fddy_coarse:
384 case nir_op_fddy_fine:
385 return OP_DFDY;
386 case nir_op_fdiv:
387 case nir_op_idiv:
388 case nir_op_udiv:
389 return OP_DIV;
390 case nir_op_fexp2:
391 return OP_EX2;
392 case nir_op_ffloor:
393 return OP_FLOOR;
394 case nir_op_ffma:
395 return OP_FMA;
396 case nir_op_flog2:
397 return OP_LG2;
398 case nir_op_fmax:
399 case nir_op_imax:
400 case nir_op_umax:
401 return OP_MAX;
402 case nir_op_pack_64_2x32_split:
403 return OP_MERGE;
404 case nir_op_fmin:
405 case nir_op_imin:
406 case nir_op_umin:
407 return OP_MIN;
408 case nir_op_fmod:
409 case nir_op_imod:
410 case nir_op_umod:
411 case nir_op_frem:
412 case nir_op_irem:
413 return OP_MOD;
414 case nir_op_fmul:
415 case nir_op_imul:
416 case nir_op_imul_high:
417 case nir_op_umul_high:
418 return OP_MUL;
419 case nir_op_fneg:
420 case nir_op_ineg:
421 return OP_NEG;
422 case nir_op_inot:
423 return OP_NOT;
424 case nir_op_ior:
425 return OP_OR;
426 case nir_op_fpow:
427 return OP_POW;
428 case nir_op_frcp:
429 return OP_RCP;
430 case nir_op_frsq:
431 return OP_RSQ;
432 case nir_op_fsat:
433 return OP_SAT;
434 case nir_op_feq32:
435 case nir_op_ieq32:
436 case nir_op_fge32:
437 case nir_op_ige32:
438 case nir_op_uge32:
439 case nir_op_flt32:
440 case nir_op_ilt32:
441 case nir_op_ult32:
442 case nir_op_fne32:
443 case nir_op_ine32:
444 return OP_SET;
445 case nir_op_ishl:
446 return OP_SHL;
447 case nir_op_ishr:
448 case nir_op_ushr:
449 return OP_SHR;
450 case nir_op_fsin:
451 return OP_SIN;
452 case nir_op_fsqrt:
453 return OP_SQRT;
454 case nir_op_fsub:
455 case nir_op_isub:
456 return OP_SUB;
457 case nir_op_ftrunc:
458 return OP_TRUNC;
459 case nir_op_ixor:
460 return OP_XOR;
461 default:
462 ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
463 assert(false);
464 return OP_NOP;
465 }
466 }
467
468 operation
469 Converter::getOperation(nir_texop op)
470 {
471 switch (op) {
472 case nir_texop_tex:
473 return OP_TEX;
474 case nir_texop_lod:
475 return OP_TXLQ;
476 case nir_texop_txb:
477 return OP_TXB;
478 case nir_texop_txd:
479 return OP_TXD;
480 case nir_texop_txf:
481 case nir_texop_txf_ms:
482 return OP_TXF;
483 case nir_texop_tg4:
484 return OP_TXG;
485 case nir_texop_txl:
486 return OP_TXL;
487 case nir_texop_query_levels:
488 case nir_texop_texture_samples:
489 case nir_texop_txs:
490 return OP_TXQ;
491 default:
492 ERROR("couldn't get operation for nir_texop %u\n", op);
493 assert(false);
494 return OP_NOP;
495 }
496 }
497
498 operation
499 Converter::getOperation(nir_intrinsic_op op)
500 {
501 switch (op) {
502 case nir_intrinsic_emit_vertex:
503 return OP_EMIT;
504 case nir_intrinsic_end_primitive:
505 return OP_RESTART;
506 case nir_intrinsic_bindless_image_atomic_add:
507 case nir_intrinsic_image_atomic_add:
508 case nir_intrinsic_image_deref_atomic_add:
509 case nir_intrinsic_bindless_image_atomic_and:
510 case nir_intrinsic_image_atomic_and:
511 case nir_intrinsic_image_deref_atomic_and:
512 case nir_intrinsic_bindless_image_atomic_comp_swap:
513 case nir_intrinsic_image_atomic_comp_swap:
514 case nir_intrinsic_image_deref_atomic_comp_swap:
515 case nir_intrinsic_bindless_image_atomic_exchange:
516 case nir_intrinsic_image_atomic_exchange:
517 case nir_intrinsic_image_deref_atomic_exchange:
518 case nir_intrinsic_bindless_image_atomic_max:
519 case nir_intrinsic_image_atomic_max:
520 case nir_intrinsic_image_deref_atomic_max:
521 case nir_intrinsic_bindless_image_atomic_min:
522 case nir_intrinsic_image_atomic_min:
523 case nir_intrinsic_image_deref_atomic_min:
524 case nir_intrinsic_bindless_image_atomic_or:
525 case nir_intrinsic_image_atomic_or:
526 case nir_intrinsic_image_deref_atomic_or:
527 case nir_intrinsic_bindless_image_atomic_xor:
528 case nir_intrinsic_image_atomic_xor:
529 case nir_intrinsic_image_deref_atomic_xor:
530 return OP_SUREDP;
531 case nir_intrinsic_bindless_image_load:
532 case nir_intrinsic_image_load:
533 case nir_intrinsic_image_deref_load:
534 return OP_SULDP;
535 case nir_intrinsic_bindless_image_samples:
536 case nir_intrinsic_image_samples:
537 case nir_intrinsic_image_deref_samples:
538 case nir_intrinsic_bindless_image_size:
539 case nir_intrinsic_image_size:
540 case nir_intrinsic_image_deref_size:
541 return OP_SUQ;
542 case nir_intrinsic_bindless_image_store:
543 case nir_intrinsic_image_store:
544 case nir_intrinsic_image_deref_store:
545 return OP_SUSTP;
546 default:
547 ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
548 assert(false);
549 return OP_NOP;
550 }
551 }
552
553 operation
554 Converter::preOperationNeeded(nir_op op)
555 {
556 switch (op) {
557 case nir_op_fcos:
558 case nir_op_fsin:
559 return OP_PRESIN;
560 default:
561 return OP_NOP;
562 }
563 }
564
565 int
566 Converter::getSubOp(nir_op op)
567 {
568 switch (op) {
569 case nir_op_imul_high:
570 case nir_op_umul_high:
571 return NV50_IR_SUBOP_MUL_HIGH;
572 default:
573 return 0;
574 }
575 }
576
577 int
578 Converter::getSubOp(nir_intrinsic_op op)
579 {
580 switch (op) {
581 case nir_intrinsic_bindless_image_atomic_add:
582 case nir_intrinsic_image_atomic_add:
583 case nir_intrinsic_image_deref_atomic_add:
584 case nir_intrinsic_shared_atomic_add:
585 case nir_intrinsic_ssbo_atomic_add:
586 return NV50_IR_SUBOP_ATOM_ADD;
587 case nir_intrinsic_bindless_image_atomic_and:
588 case nir_intrinsic_image_atomic_and:
589 case nir_intrinsic_image_deref_atomic_and:
590 case nir_intrinsic_shared_atomic_and:
591 case nir_intrinsic_ssbo_atomic_and:
592 return NV50_IR_SUBOP_ATOM_AND;
593 case nir_intrinsic_bindless_image_atomic_comp_swap:
594 case nir_intrinsic_image_atomic_comp_swap:
595 case nir_intrinsic_image_deref_atomic_comp_swap:
596 case nir_intrinsic_shared_atomic_comp_swap:
597 case nir_intrinsic_ssbo_atomic_comp_swap:
598 return NV50_IR_SUBOP_ATOM_CAS;
599 case nir_intrinsic_bindless_image_atomic_exchange:
600 case nir_intrinsic_image_atomic_exchange:
601 case nir_intrinsic_image_deref_atomic_exchange:
602 case nir_intrinsic_shared_atomic_exchange:
603 case nir_intrinsic_ssbo_atomic_exchange:
604 return NV50_IR_SUBOP_ATOM_EXCH;
605 case nir_intrinsic_bindless_image_atomic_or:
606 case nir_intrinsic_image_atomic_or:
607 case nir_intrinsic_image_deref_atomic_or:
608 case nir_intrinsic_shared_atomic_or:
609 case nir_intrinsic_ssbo_atomic_or:
610 return NV50_IR_SUBOP_ATOM_OR;
611 case nir_intrinsic_bindless_image_atomic_max:
612 case nir_intrinsic_image_atomic_max:
613 case nir_intrinsic_image_deref_atomic_max:
614 case nir_intrinsic_shared_atomic_imax:
615 case nir_intrinsic_shared_atomic_umax:
616 case nir_intrinsic_ssbo_atomic_imax:
617 case nir_intrinsic_ssbo_atomic_umax:
618 return NV50_IR_SUBOP_ATOM_MAX;
619 case nir_intrinsic_bindless_image_atomic_min:
620 case nir_intrinsic_image_atomic_min:
621 case nir_intrinsic_image_deref_atomic_min:
622 case nir_intrinsic_shared_atomic_imin:
623 case nir_intrinsic_shared_atomic_umin:
624 case nir_intrinsic_ssbo_atomic_imin:
625 case nir_intrinsic_ssbo_atomic_umin:
626 return NV50_IR_SUBOP_ATOM_MIN;
627 case nir_intrinsic_bindless_image_atomic_xor:
628 case nir_intrinsic_image_atomic_xor:
629 case nir_intrinsic_image_deref_atomic_xor:
630 case nir_intrinsic_shared_atomic_xor:
631 case nir_intrinsic_ssbo_atomic_xor:
632 return NV50_IR_SUBOP_ATOM_XOR;
633
634 case nir_intrinsic_group_memory_barrier:
635 case nir_intrinsic_memory_barrier:
636 case nir_intrinsic_memory_barrier_atomic_counter:
637 case nir_intrinsic_memory_barrier_buffer:
638 case nir_intrinsic_memory_barrier_image:
639 return NV50_IR_SUBOP_MEMBAR(M, GL);
640 case nir_intrinsic_memory_barrier_shared:
641 return NV50_IR_SUBOP_MEMBAR(M, CTA);
642
643 case nir_intrinsic_vote_all:
644 return NV50_IR_SUBOP_VOTE_ALL;
645 case nir_intrinsic_vote_any:
646 return NV50_IR_SUBOP_VOTE_ANY;
647 case nir_intrinsic_vote_ieq:
648 return NV50_IR_SUBOP_VOTE_UNI;
649 default:
650 return 0;
651 }
652 }
653
654 CondCode
655 Converter::getCondCode(nir_op op)
656 {
657 switch (op) {
658 case nir_op_feq32:
659 case nir_op_ieq32:
660 return CC_EQ;
661 case nir_op_fge32:
662 case nir_op_ige32:
663 case nir_op_uge32:
664 return CC_GE;
665 case nir_op_flt32:
666 case nir_op_ilt32:
667 case nir_op_ult32:
668 return CC_LT;
669 case nir_op_fne32:
670 return CC_NEU;
671 case nir_op_ine32:
672 return CC_NE;
673 default:
674 ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
675 assert(false);
676 return CC_FL;
677 }
678 }
679
680 Converter::LValues&
681 Converter::convert(nir_alu_dest *dest)
682 {
683 return convert(&dest->dest);
684 }
685
686 Converter::LValues&
687 Converter::convert(nir_dest *dest)
688 {
689 if (dest->is_ssa)
690 return convert(&dest->ssa);
691 if (dest->reg.indirect) {
692 ERROR("no support for indirects.");
693 assert(false);
694 }
695 return convert(dest->reg.reg);
696 }
697
698 Converter::LValues&
699 Converter::convert(nir_register *reg)
700 {
701 NirDefMap::iterator it = regDefs.find(reg->index);
702 if (it != regDefs.end())
703 return it->second;
704
705 LValues newDef(reg->num_components);
706 for (uint8_t i = 0; i < reg->num_components; i++)
707 newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
708 return regDefs[reg->index] = newDef;
709 }
710
711 Converter::LValues&
712 Converter::convert(nir_ssa_def *def)
713 {
714 NirDefMap::iterator it = ssaDefs.find(def->index);
715 if (it != ssaDefs.end())
716 return it->second;
717
718 LValues newDef(def->num_components);
719 for (uint8_t i = 0; i < def->num_components; i++)
720 newDef[i] = getSSA(std::max(4, def->bit_size / 8));
721 return ssaDefs[def->index] = newDef;
722 }
723
724 Value*
725 Converter::getSrc(nir_alu_src *src, uint8_t component)
726 {
727 if (src->abs || src->negate) {
728 ERROR("modifiers currently not supported on nir_alu_src\n");
729 assert(false);
730 }
731 return getSrc(&src->src, src->swizzle[component]);
732 }
733
734 Value*
735 Converter::getSrc(nir_register *reg, uint8_t idx)
736 {
737 NirDefMap::iterator it = regDefs.find(reg->index);
738 if (it == regDefs.end())
739 return convert(reg)[idx];
740 return it->second[idx];
741 }
742
743 Value*
744 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
745 {
746 if (src->is_ssa)
747 return getSrc(src->ssa, idx);
748
749 if (src->reg.indirect) {
750 if (indirect)
751 return getSrc(src->reg.indirect, idx);
752 ERROR("no support for indirects.");
753 assert(false);
754 return NULL;
755 }
756
757 return getSrc(src->reg.reg, idx);
758 }
759
760 Value*
761 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
762 {
763 ImmediateMap::iterator iit = immediates.find(src->index);
764 if (iit != immediates.end())
765 return convert((*iit).second, idx);
766
767 NirDefMap::iterator it = ssaDefs.find(src->index);
768 if (it == ssaDefs.end()) {
769 ERROR("SSA value %u not found\n", src->index);
770 assert(false);
771 return NULL;
772 }
773 return it->second[idx];
774 }
775
776 uint32_t
777 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
778 {
779 nir_const_value *offset = nir_src_as_const_value(*src);
780
781 if (offset) {
782 indirect = NULL;
783 return offset[0].u32;
784 }
785
786 indirect = getSrc(src, idx, true);
787 return 0;
788 }
789
790 uint32_t
791 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect, bool isScalar)
792 {
793 int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
794 if (indirect && !isScalar)
795 indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
796 return idx;
797 }
798
799 static void
800 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
801 {
802 assert(name && index);
803
804 if (slot >= VERT_ATTRIB_MAX) {
805 ERROR("invalid varying slot %u\n", slot);
806 assert(false);
807 return;
808 }
809
810 if (slot >= VERT_ATTRIB_GENERIC0 &&
811 slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
812 *name = TGSI_SEMANTIC_GENERIC;
813 *index = slot - VERT_ATTRIB_GENERIC0;
814 return;
815 }
816
817 if (slot >= VERT_ATTRIB_TEX0 &&
818 slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
819 *name = TGSI_SEMANTIC_TEXCOORD;
820 *index = slot - VERT_ATTRIB_TEX0;
821 return;
822 }
823
824 switch (slot) {
825 case VERT_ATTRIB_COLOR0:
826 *name = TGSI_SEMANTIC_COLOR;
827 *index = 0;
828 break;
829 case VERT_ATTRIB_COLOR1:
830 *name = TGSI_SEMANTIC_COLOR;
831 *index = 1;
832 break;
833 case VERT_ATTRIB_EDGEFLAG:
834 *name = TGSI_SEMANTIC_EDGEFLAG;
835 *index = 0;
836 break;
837 case VERT_ATTRIB_FOG:
838 *name = TGSI_SEMANTIC_FOG;
839 *index = 0;
840 break;
841 case VERT_ATTRIB_NORMAL:
842 *name = TGSI_SEMANTIC_NORMAL;
843 *index = 0;
844 break;
845 case VERT_ATTRIB_POS:
846 *name = TGSI_SEMANTIC_POSITION;
847 *index = 0;
848 break;
849 case VERT_ATTRIB_POINT_SIZE:
850 *name = TGSI_SEMANTIC_PSIZE;
851 *index = 0;
852 break;
853 default:
854 ERROR("unknown vert attrib slot %u\n", slot);
855 assert(false);
856 break;
857 }
858 }
859
860 static void
861 varying_slot_to_tgsi_semantic(gl_varying_slot slot, unsigned *name, unsigned *index)
862 {
863 assert(name && index);
864
865 if (slot >= VARYING_SLOT_TESS_MAX) {
866 ERROR("invalid varying slot %u\n", slot);
867 assert(false);
868 return;
869 }
870
871 if (slot >= VARYING_SLOT_PATCH0) {
872 *name = TGSI_SEMANTIC_PATCH;
873 *index = slot - VARYING_SLOT_PATCH0;
874 return;
875 }
876
877 if (slot >= VARYING_SLOT_VAR0) {
878 *name = TGSI_SEMANTIC_GENERIC;
879 *index = slot - VARYING_SLOT_VAR0;
880 return;
881 }
882
883 if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
884 *name = TGSI_SEMANTIC_TEXCOORD;
885 *index = slot - VARYING_SLOT_TEX0;
886 return;
887 }
888
889 switch (slot) {
890 case VARYING_SLOT_BFC0:
891 *name = TGSI_SEMANTIC_BCOLOR;
892 *index = 0;
893 break;
894 case VARYING_SLOT_BFC1:
895 *name = TGSI_SEMANTIC_BCOLOR;
896 *index = 1;
897 break;
898 case VARYING_SLOT_CLIP_DIST0:
899 *name = TGSI_SEMANTIC_CLIPDIST;
900 *index = 0;
901 break;
902 case VARYING_SLOT_CLIP_DIST1:
903 *name = TGSI_SEMANTIC_CLIPDIST;
904 *index = 1;
905 break;
906 case VARYING_SLOT_CLIP_VERTEX:
907 *name = TGSI_SEMANTIC_CLIPVERTEX;
908 *index = 0;
909 break;
910 case VARYING_SLOT_COL0:
911 *name = TGSI_SEMANTIC_COLOR;
912 *index = 0;
913 break;
914 case VARYING_SLOT_COL1:
915 *name = TGSI_SEMANTIC_COLOR;
916 *index = 1;
917 break;
918 case VARYING_SLOT_EDGE:
919 *name = TGSI_SEMANTIC_EDGEFLAG;
920 *index = 0;
921 break;
922 case VARYING_SLOT_FACE:
923 *name = TGSI_SEMANTIC_FACE;
924 *index = 0;
925 break;
926 case VARYING_SLOT_FOGC:
927 *name = TGSI_SEMANTIC_FOG;
928 *index = 0;
929 break;
930 case VARYING_SLOT_LAYER:
931 *name = TGSI_SEMANTIC_LAYER;
932 *index = 0;
933 break;
934 case VARYING_SLOT_PNTC:
935 *name = TGSI_SEMANTIC_PCOORD;
936 *index = 0;
937 break;
938 case VARYING_SLOT_POS:
939 *name = TGSI_SEMANTIC_POSITION;
940 *index = 0;
941 break;
942 case VARYING_SLOT_PRIMITIVE_ID:
943 *name = TGSI_SEMANTIC_PRIMID;
944 *index = 0;
945 break;
946 case VARYING_SLOT_PSIZ:
947 *name = TGSI_SEMANTIC_PSIZE;
948 *index = 0;
949 break;
950 case VARYING_SLOT_TESS_LEVEL_INNER:
951 *name = TGSI_SEMANTIC_TESSINNER;
952 *index = 0;
953 break;
954 case VARYING_SLOT_TESS_LEVEL_OUTER:
955 *name = TGSI_SEMANTIC_TESSOUTER;
956 *index = 0;
957 break;
958 case VARYING_SLOT_VIEWPORT:
959 *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
960 *index = 0;
961 break;
962 default:
963 ERROR("unknown varying slot %u\n", slot);
964 assert(false);
965 break;
966 }
967 }
968
969 static void
970 frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
971 {
972 if (slot >= FRAG_RESULT_DATA0) {
973 *name = TGSI_SEMANTIC_COLOR;
974 *index = slot - FRAG_RESULT_COLOR - 2; // intentional
975 return;
976 }
977
978 switch (slot) {
979 case FRAG_RESULT_COLOR:
980 *name = TGSI_SEMANTIC_COLOR;
981 *index = 0;
982 break;
983 case FRAG_RESULT_DEPTH:
984 *name = TGSI_SEMANTIC_POSITION;
985 *index = 0;
986 break;
987 case FRAG_RESULT_SAMPLE_MASK:
988 *name = TGSI_SEMANTIC_SAMPLEMASK;
989 *index = 0;
990 break;
991 default:
992 ERROR("unknown frag result slot %u\n", slot);
993 assert(false);
994 break;
995 }
996 }
997
998 // copy of _mesa_sysval_to_semantic
999 static void
1000 system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
1001 {
1002 *index = 0;
1003 switch (val) {
1004 // Vertex shader
1005 case SYSTEM_VALUE_VERTEX_ID:
1006 *name = TGSI_SEMANTIC_VERTEXID;
1007 break;
1008 case SYSTEM_VALUE_INSTANCE_ID:
1009 *name = TGSI_SEMANTIC_INSTANCEID;
1010 break;
1011 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
1012 *name = TGSI_SEMANTIC_VERTEXID_NOBASE;
1013 break;
1014 case SYSTEM_VALUE_BASE_VERTEX:
1015 *name = TGSI_SEMANTIC_BASEVERTEX;
1016 break;
1017 case SYSTEM_VALUE_BASE_INSTANCE:
1018 *name = TGSI_SEMANTIC_BASEINSTANCE;
1019 break;
1020 case SYSTEM_VALUE_DRAW_ID:
1021 *name = TGSI_SEMANTIC_DRAWID;
1022 break;
1023
1024 // Geometry shader
1025 case SYSTEM_VALUE_INVOCATION_ID:
1026 *name = TGSI_SEMANTIC_INVOCATIONID;
1027 break;
1028
1029 // Fragment shader
1030 case SYSTEM_VALUE_FRAG_COORD:
1031 *name = TGSI_SEMANTIC_POSITION;
1032 break;
1033 case SYSTEM_VALUE_FRONT_FACE:
1034 *name = TGSI_SEMANTIC_FACE;
1035 break;
1036 case SYSTEM_VALUE_SAMPLE_ID:
1037 *name = TGSI_SEMANTIC_SAMPLEID;
1038 break;
1039 case SYSTEM_VALUE_SAMPLE_POS:
1040 *name = TGSI_SEMANTIC_SAMPLEPOS;
1041 break;
1042 case SYSTEM_VALUE_SAMPLE_MASK_IN:
1043 *name = TGSI_SEMANTIC_SAMPLEMASK;
1044 break;
1045 case SYSTEM_VALUE_HELPER_INVOCATION:
1046 *name = TGSI_SEMANTIC_HELPER_INVOCATION;
1047 break;
1048
1049 // Tessellation shader
1050 case SYSTEM_VALUE_TESS_COORD:
1051 *name = TGSI_SEMANTIC_TESSCOORD;
1052 break;
1053 case SYSTEM_VALUE_VERTICES_IN:
1054 *name = TGSI_SEMANTIC_VERTICESIN;
1055 break;
1056 case SYSTEM_VALUE_PRIMITIVE_ID:
1057 *name = TGSI_SEMANTIC_PRIMID;
1058 break;
1059 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1060 *name = TGSI_SEMANTIC_TESSOUTER;
1061 break;
1062 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1063 *name = TGSI_SEMANTIC_TESSINNER;
1064 break;
1065
1066 // Compute shader
1067 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
1068 *name = TGSI_SEMANTIC_THREAD_ID;
1069 break;
1070 case SYSTEM_VALUE_WORK_GROUP_ID:
1071 *name = TGSI_SEMANTIC_BLOCK_ID;
1072 break;
1073 case SYSTEM_VALUE_NUM_WORK_GROUPS:
1074 *name = TGSI_SEMANTIC_GRID_SIZE;
1075 break;
1076 case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
1077 *name = TGSI_SEMANTIC_BLOCK_SIZE;
1078 break;
1079
1080 // ARB_shader_ballot
1081 case SYSTEM_VALUE_SUBGROUP_SIZE:
1082 *name = TGSI_SEMANTIC_SUBGROUP_SIZE;
1083 break;
1084 case SYSTEM_VALUE_SUBGROUP_INVOCATION:
1085 *name = TGSI_SEMANTIC_SUBGROUP_INVOCATION;
1086 break;
1087 case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
1088 *name = TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
1089 break;
1090 case SYSTEM_VALUE_SUBGROUP_GE_MASK:
1091 *name = TGSI_SEMANTIC_SUBGROUP_GE_MASK;
1092 break;
1093 case SYSTEM_VALUE_SUBGROUP_GT_MASK:
1094 *name = TGSI_SEMANTIC_SUBGROUP_GT_MASK;
1095 break;
1096 case SYSTEM_VALUE_SUBGROUP_LE_MASK:
1097 *name = TGSI_SEMANTIC_SUBGROUP_LE_MASK;
1098 break;
1099 case SYSTEM_VALUE_SUBGROUP_LT_MASK:
1100 *name = TGSI_SEMANTIC_SUBGROUP_LT_MASK;
1101 break;
1102
1103 default:
1104 ERROR("unknown system value %u\n", val);
1105 assert(false);
1106 break;
1107 }
1108 }
1109
1110 void
1111 Converter::setInterpolate(nv50_ir_varying *var,
1112 uint8_t mode,
1113 bool centroid,
1114 unsigned semantic)
1115 {
1116 switch (mode) {
1117 case INTERP_MODE_FLAT:
1118 var->flat = 1;
1119 break;
1120 case INTERP_MODE_NONE:
1121 if (semantic == TGSI_SEMANTIC_COLOR)
1122 var->sc = 1;
1123 else if (semantic == TGSI_SEMANTIC_POSITION)
1124 var->linear = 1;
1125 break;
1126 case INTERP_MODE_NOPERSPECTIVE:
1127 var->linear = 1;
1128 break;
1129 case INTERP_MODE_SMOOTH:
1130 break;
1131 }
1132 var->centroid = centroid;
1133 }
1134
1135 static uint16_t
1136 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
1137 bool input, const nir_variable *var)
1138 {
1139 if (!type->is_array())
1140 return type->count_attribute_slots(false);
1141
1142 uint16_t slots;
1143 switch (stage) {
1144 case Program::TYPE_GEOMETRY:
1145 slots = type->uniform_locations();
1146 if (input)
1147 slots /= info.gs.vertices_in;
1148 break;
1149 case Program::TYPE_TESSELLATION_CONTROL:
1150 case Program::TYPE_TESSELLATION_EVAL:
1151 // remove first dimension
1152 if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
1153 slots = type->uniform_locations();
1154 else
1155 slots = type->fields.array->uniform_locations();
1156 break;
1157 default:
1158 slots = type->count_attribute_slots(false);
1159 break;
1160 }
1161
1162 return slots;
1163 }
1164
1165 bool Converter::assignSlots() {
1166 unsigned name;
1167 unsigned index;
1168
1169 info->io.viewportId = -1;
1170 info->numInputs = 0;
1171 info->numOutputs = 0;
1172
1173 // we have to fixup the uniform locations for arrays
1174 unsigned numImages = 0;
1175 nir_foreach_variable(var, &nir->uniforms) {
1176 const glsl_type *type = var->type;
1177 if (!type->without_array()->is_image())
1178 continue;
1179 var->data.driver_location = numImages;
1180 numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
1181 }
1182
1183 info->numSysVals = 0;
1184 for (uint8_t i = 0; i < SYSTEM_VALUE_MAX; ++i) {
1185 if (!(nir->info.system_values_read & 1ull << i))
1186 continue;
1187
1188 system_val_to_tgsi_semantic(i, &name, &index);
1189 info->sv[info->numSysVals].sn = name;
1190 info->sv[info->numSysVals].si = index;
1191 info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
1192
1193 switch (i) {
1194 case SYSTEM_VALUE_INSTANCE_ID:
1195 info->io.instanceId = info->numSysVals;
1196 break;
1197 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1198 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1199 info->sv[info->numSysVals].patch = 1;
1200 break;
1201 case SYSTEM_VALUE_VERTEX_ID:
1202 info->io.vertexId = info->numSysVals;
1203 break;
1204 default:
1205 break;
1206 }
1207
1208 info->numSysVals += 1;
1209 }
1210
1211 if (prog->getType() == Program::TYPE_COMPUTE)
1212 return true;
1213
1214 nir_foreach_variable(var, &nir->inputs) {
1215 const glsl_type *type = var->type;
1216 int slot = var->data.location;
1217 uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
1218 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1219 : type->component_slots();
1220 uint32_t frac = var->data.location_frac;
1221 uint32_t vary = var->data.driver_location;
1222
1223 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1224 if (comp > 2)
1225 slots *= 2;
1226 }
1227
1228 assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
1229
1230 switch(prog->getType()) {
1231 case Program::TYPE_FRAGMENT:
1232 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1233 for (uint16_t i = 0; i < slots; ++i) {
1234 setInterpolate(&info->in[vary + i], var->data.interpolation,
1235 var->data.centroid | var->data.sample, name);
1236 }
1237 break;
1238 case Program::TYPE_GEOMETRY:
1239 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1240 break;
1241 case Program::TYPE_TESSELLATION_CONTROL:
1242 case Program::TYPE_TESSELLATION_EVAL:
1243 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1244 if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
1245 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1246 break;
1247 case Program::TYPE_VERTEX:
1248 vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1249 switch (name) {
1250 case TGSI_SEMANTIC_EDGEFLAG:
1251 info->io.edgeFlagIn = vary;
1252 break;
1253 default:
1254 break;
1255 }
1256 break;
1257 default:
1258 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1259 return false;
1260 }
1261
1262 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1263 info->in[vary].id = vary;
1264 info->in[vary].patch = var->data.patch;
1265 info->in[vary].sn = name;
1266 info->in[vary].si = index + i;
1267 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1268 if (i & 0x1)
1269 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1270 else
1271 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1272 else
1273 info->in[vary].mask |= ((1 << comp) - 1) << frac;
1274 }
1275 info->numInputs = std::max<uint8_t>(info->numInputs, vary);
1276 }
1277
1278 nir_foreach_variable(var, &nir->outputs) {
1279 const glsl_type *type = var->type;
1280 int slot = var->data.location;
1281 uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1282 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1283 : type->component_slots();
1284 uint32_t frac = var->data.location_frac;
1285 uint32_t vary = var->data.driver_location;
1286
1287 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1288 if (comp > 2)
1289 slots *= 2;
1290 }
1291
1292 assert(vary < PIPE_MAX_SHADER_OUTPUTS);
1293
1294 switch(prog->getType()) {
1295 case Program::TYPE_FRAGMENT:
1296 frag_result_to_tgsi_semantic((gl_frag_result)slot, &name, &index);
1297 switch (name) {
1298 case TGSI_SEMANTIC_COLOR:
1299 if (!var->data.fb_fetch_output)
1300 info->prop.fp.numColourResults++;
1301 info->prop.fp.separateFragData = true;
1302 // sometimes we get FRAG_RESULT_DATAX with data.index 0
1303 // sometimes we get FRAG_RESULT_DATA0 with data.index X
1304 index = index == 0 ? var->data.index : index;
1305 break;
1306 case TGSI_SEMANTIC_POSITION:
1307 info->io.fragDepth = vary;
1308 info->prop.fp.writesDepth = true;
1309 break;
1310 case TGSI_SEMANTIC_SAMPLEMASK:
1311 info->io.sampleMask = vary;
1312 break;
1313 default:
1314 break;
1315 }
1316 break;
1317 case Program::TYPE_GEOMETRY:
1318 case Program::TYPE_TESSELLATION_CONTROL:
1319 case Program::TYPE_TESSELLATION_EVAL:
1320 case Program::TYPE_VERTEX:
1321 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1322
1323 if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1324 name != TGSI_SEMANTIC_TESSOUTER)
1325 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1326
1327 switch (name) {
1328 case TGSI_SEMANTIC_CLIPDIST:
1329 info->io.genUserClip = -1;
1330 break;
1331 case TGSI_SEMANTIC_CLIPVERTEX:
1332 clipVertexOutput = vary;
1333 break;
1334 case TGSI_SEMANTIC_EDGEFLAG:
1335 info->io.edgeFlagOut = vary;
1336 break;
1337 case TGSI_SEMANTIC_POSITION:
1338 if (clipVertexOutput < 0)
1339 clipVertexOutput = vary;
1340 break;
1341 default:
1342 break;
1343 }
1344 break;
1345 default:
1346 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1347 return false;
1348 }
1349
1350 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1351 info->out[vary].id = vary;
1352 info->out[vary].patch = var->data.patch;
1353 info->out[vary].sn = name;
1354 info->out[vary].si = index + i;
1355 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1356 if (i & 0x1)
1357 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1358 else
1359 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1360 else
1361 info->out[vary].mask |= ((1 << comp) - 1) << frac;
1362
1363 if (nir->info.outputs_read & 1ull << slot)
1364 info->out[vary].oread = 1;
1365 }
1366 info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
1367 }
1368
1369 if (info->io.genUserClip > 0) {
1370 info->io.clipDistances = info->io.genUserClip;
1371
1372 const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1373
1374 for (unsigned int n = 0; n < nOut; ++n) {
1375 unsigned int i = info->numOutputs++;
1376 info->out[i].id = i;
1377 info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1378 info->out[i].si = n;
1379 info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1380 }
1381 }
1382
1383 return info->assignSlots(info) == 0;
1384 }
1385
1386 uint32_t
1387 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1388 {
1389 DataType ty;
1390 int offset = nir_intrinsic_component(insn);
1391 bool input;
1392
1393 if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1394 ty = getDType(insn);
1395 else
1396 ty = getSType(insn->src[0], false, false);
1397
1398 switch (insn->intrinsic) {
1399 case nir_intrinsic_load_input:
1400 case nir_intrinsic_load_interpolated_input:
1401 case nir_intrinsic_load_per_vertex_input:
1402 input = true;
1403 break;
1404 case nir_intrinsic_load_output:
1405 case nir_intrinsic_load_per_vertex_output:
1406 case nir_intrinsic_store_output:
1407 case nir_intrinsic_store_per_vertex_output:
1408 input = false;
1409 break;
1410 default:
1411 ERROR("unknown intrinsic in getSlotAddress %s",
1412 nir_intrinsic_infos[insn->intrinsic].name);
1413 input = false;
1414 assert(false);
1415 break;
1416 }
1417
1418 if (typeSizeof(ty) == 8) {
1419 slot *= 2;
1420 slot += offset;
1421 if (slot >= 4) {
1422 idx += 1;
1423 slot -= 4;
1424 }
1425 } else {
1426 slot += offset;
1427 }
1428
1429 assert(slot < 4);
1430 assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
1431 assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
1432
1433 const nv50_ir_varying *vary = input ? info->in : info->out;
1434 return vary[idx].slot[slot] * 4;
1435 }
1436
1437 Instruction *
1438 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1439 uint32_t base, uint8_t c, Value *indirect0,
1440 Value *indirect1, bool patch)
1441 {
1442 unsigned int tySize = typeSizeof(ty);
1443
1444 if (tySize == 8 &&
1445 (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
1446 Value *lo = getSSA();
1447 Value *hi = getSSA();
1448
1449 Instruction *loi =
1450 mkLoad(TYPE_U32, lo,
1451 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1452 indirect0);
1453 loi->setIndirect(0, 1, indirect1);
1454 loi->perPatch = patch;
1455
1456 Instruction *hii =
1457 mkLoad(TYPE_U32, hi,
1458 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1459 indirect0);
1460 hii->setIndirect(0, 1, indirect1);
1461 hii->perPatch = patch;
1462
1463 return mkOp2(OP_MERGE, ty, def, lo, hi);
1464 } else {
1465 Instruction *ld =
1466 mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1467 ld->setIndirect(0, 1, indirect1);
1468 ld->perPatch = patch;
1469 return ld;
1470 }
1471 }
1472
1473 void
1474 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1475 DataType ty, Value *src, uint8_t idx, uint8_t c,
1476 Value *indirect0, Value *indirect1)
1477 {
1478 uint8_t size = typeSizeof(ty);
1479 uint32_t address = getSlotAddress(insn, idx, c);
1480
1481 if (size == 8 && indirect0) {
1482 Value *split[2];
1483 mkSplit(split, 4, src);
1484
1485 if (op == OP_EXPORT) {
1486 split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1487 split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1488 }
1489
1490 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1491 split[0])->perPatch = info->out[idx].patch;
1492 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1493 split[1])->perPatch = info->out[idx].patch;
1494 } else {
1495 if (op == OP_EXPORT)
1496 src = mkMov(getSSA(size), src, ty)->getDef(0);
1497 mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1498 src)->perPatch = info->out[idx].patch;
1499 }
1500 }
1501
1502 bool
1503 Converter::parseNIR()
1504 {
1505 info->bin.tlsSpace = 0;
1506 info->io.clipDistances = nir->info.clip_distance_array_size;
1507 info->io.cullDistances = nir->info.cull_distance_array_size;
1508
1509 switch(prog->getType()) {
1510 case Program::TYPE_COMPUTE:
1511 info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
1512 info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
1513 info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
1514 info->bin.smemSize = nir->info.cs.shared_size;
1515 break;
1516 case Program::TYPE_FRAGMENT:
1517 info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1518 info->prop.fp.persampleInvocation =
1519 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
1520 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1521 info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1522 info->prop.fp.readsSampleLocations =
1523 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1524 info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
1525 info->prop.fp.usesSampleMaskIn =
1526 !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
1527 break;
1528 case Program::TYPE_GEOMETRY:
1529 info->prop.gp.inputPrim = nir->info.gs.input_primitive;
1530 info->prop.gp.instanceCount = nir->info.gs.invocations;
1531 info->prop.gp.maxVertices = nir->info.gs.vertices_out;
1532 info->prop.gp.outputPrim = nir->info.gs.output_primitive;
1533 break;
1534 case Program::TYPE_TESSELLATION_CONTROL:
1535 case Program::TYPE_TESSELLATION_EVAL:
1536 if (nir->info.tess.primitive_mode == GL_ISOLINES)
1537 info->prop.tp.domain = GL_LINES;
1538 else
1539 info->prop.tp.domain = nir->info.tess.primitive_mode;
1540 info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1541 info->prop.tp.outputPrim =
1542 nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1543 info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1544 info->prop.tp.winding = !nir->info.tess.ccw;
1545 break;
1546 case Program::TYPE_VERTEX:
1547 info->prop.vp.usesDrawParameters =
1548 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
1549 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
1550 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
1551 break;
1552 default:
1553 break;
1554 }
1555
1556 return true;
1557 }
1558
1559 bool
1560 Converter::visit(nir_function *function)
1561 {
1562 assert(function->impl);
1563
1564 // usually the blocks will set everything up, but main is special
1565 BasicBlock *entry = new BasicBlock(prog->main);
1566 exit = new BasicBlock(prog->main);
1567 blocks[nir_start_block(function->impl)->index] = entry;
1568 prog->main->setEntry(entry);
1569 prog->main->setExit(exit);
1570
1571 setPosition(entry, true);
1572
1573 if (info->io.genUserClip > 0) {
1574 for (int c = 0; c < 4; ++c)
1575 clipVtx[c] = getScratch();
1576 }
1577
1578 switch (prog->getType()) {
1579 case Program::TYPE_TESSELLATION_CONTROL:
1580 outBase = mkOp2v(
1581 OP_SUB, TYPE_U32, getSSA(),
1582 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1583 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1584 break;
1585 case Program::TYPE_FRAGMENT: {
1586 Symbol *sv = mkSysVal(SV_POSITION, 3);
1587 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1588 fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1589 break;
1590 }
1591 default:
1592 break;
1593 }
1594
1595 nir_foreach_register(reg, &function->impl->registers) {
1596 if (reg->num_array_elems) {
1597 // TODO: packed variables would be nice, but MemoryOpt fails
1598 // replace 4 with reg->num_components
1599 uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8);
1600 regToLmemOffset[reg->index] = info->bin.tlsSpace;
1601 info->bin.tlsSpace += size;
1602 }
1603 }
1604
1605 nir_index_ssa_defs(function->impl);
1606 foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1607 if (!visit(node))
1608 return false;
1609 }
1610
1611 bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1612 setPosition(exit, true);
1613
1614 if ((prog->getType() == Program::TYPE_VERTEX ||
1615 prog->getType() == Program::TYPE_TESSELLATION_EVAL)
1616 && info->io.genUserClip > 0)
1617 handleUserClipPlanes();
1618
1619 // TODO: for non main function this needs to be a OP_RETURN
1620 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1621 return true;
1622 }
1623
1624 bool
1625 Converter::visit(nir_cf_node *node)
1626 {
1627 switch (node->type) {
1628 case nir_cf_node_block:
1629 return visit(nir_cf_node_as_block(node));
1630 case nir_cf_node_if:
1631 return visit(nir_cf_node_as_if(node));
1632 case nir_cf_node_loop:
1633 return visit(nir_cf_node_as_loop(node));
1634 default:
1635 ERROR("unknown nir_cf_node type %u\n", node->type);
1636 return false;
1637 }
1638 }
1639
1640 bool
1641 Converter::visit(nir_block *block)
1642 {
1643 if (!block->predecessors->entries && block->instr_list.is_empty())
1644 return true;
1645
1646 BasicBlock *bb = convert(block);
1647
1648 setPosition(bb, true);
1649 nir_foreach_instr(insn, block) {
1650 if (!visit(insn))
1651 return false;
1652 }
1653 return true;
1654 }
1655
1656 bool
1657 Converter::visit(nir_if *nif)
1658 {
1659 DataType sType = getSType(nif->condition, false, false);
1660 Value *src = getSrc(&nif->condition, 0);
1661
1662 nir_block *lastThen = nir_if_last_then_block(nif);
1663 nir_block *lastElse = nir_if_last_else_block(nif);
1664
1665 assert(!lastThen->successors[1]);
1666 assert(!lastElse->successors[1]);
1667
1668 BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1669 BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1670
1671 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1672 bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1673
1674 // we only insert joinats, if both nodes end up at the end of the if again.
1675 // the reason for this to not happens are breaks/continues/ret/... which
1676 // have their own handling
1677 if (lastThen->successors[0] == lastElse->successors[0])
1678 bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
1679 CC_ALWAYS, NULL);
1680
1681 mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1682
1683 foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1684 if (!visit(node))
1685 return false;
1686 }
1687 setPosition(convert(lastThen), true);
1688 if (!bb->getExit() ||
1689 !bb->getExit()->asFlow() ||
1690 bb->getExit()->asFlow()->op == OP_JOIN) {
1691 BasicBlock *tailBB = convert(lastThen->successors[0]);
1692 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1693 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1694 }
1695
1696 foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1697 if (!visit(node))
1698 return false;
1699 }
1700 setPosition(convert(lastElse), true);
1701 if (!bb->getExit() ||
1702 !bb->getExit()->asFlow() ||
1703 bb->getExit()->asFlow()->op == OP_JOIN) {
1704 BasicBlock *tailBB = convert(lastElse->successors[0]);
1705 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1706 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1707 }
1708
1709 if (lastThen->successors[0] == lastElse->successors[0]) {
1710 setPosition(convert(lastThen->successors[0]), true);
1711 mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1712 }
1713
1714 return true;
1715 }
1716
1717 bool
1718 Converter::visit(nir_loop *loop)
1719 {
1720 curLoopDepth += 1;
1721 func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1722
1723 BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1724 BasicBlock *tailBB =
1725 convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1726 bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1727
1728 mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1729 setPosition(loopBB, false);
1730 mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1731
1732 foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1733 if (!visit(node))
1734 return false;
1735 }
1736 Instruction *insn = bb->getExit();
1737 if (bb->cfg.incidentCount() != 0) {
1738 if (!insn || !insn->asFlow()) {
1739 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1740 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1741 } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
1742 tailBB->cfg.incidentCount() == 0) {
1743 // RA doesn't like having blocks around with no incident edge,
1744 // so we create a fake one to make it happy
1745 bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1746 }
1747 }
1748
1749 curLoopDepth -= 1;
1750
1751 return true;
1752 }
1753
1754 bool
1755 Converter::visit(nir_instr *insn)
1756 {
1757 // we need an insertion point for on the fly generated immediate loads
1758 immInsertPos = bb->getExit();
1759 switch (insn->type) {
1760 case nir_instr_type_alu:
1761 return visit(nir_instr_as_alu(insn));
1762 case nir_instr_type_deref:
1763 return visit(nir_instr_as_deref(insn));
1764 case nir_instr_type_intrinsic:
1765 return visit(nir_instr_as_intrinsic(insn));
1766 case nir_instr_type_jump:
1767 return visit(nir_instr_as_jump(insn));
1768 case nir_instr_type_load_const:
1769 return visit(nir_instr_as_load_const(insn));
1770 case nir_instr_type_ssa_undef:
1771 return visit(nir_instr_as_ssa_undef(insn));
1772 case nir_instr_type_tex:
1773 return visit(nir_instr_as_tex(insn));
1774 default:
1775 ERROR("unknown nir_instr type %u\n", insn->type);
1776 return false;
1777 }
1778 return true;
1779 }
1780
1781 SVSemantic
1782 Converter::convert(nir_intrinsic_op intr)
1783 {
1784 switch (intr) {
1785 case nir_intrinsic_load_base_vertex:
1786 return SV_BASEVERTEX;
1787 case nir_intrinsic_load_base_instance:
1788 return SV_BASEINSTANCE;
1789 case nir_intrinsic_load_draw_id:
1790 return SV_DRAWID;
1791 case nir_intrinsic_load_front_face:
1792 return SV_FACE;
1793 case nir_intrinsic_load_helper_invocation:
1794 return SV_THREAD_KILL;
1795 case nir_intrinsic_load_instance_id:
1796 return SV_INSTANCE_ID;
1797 case nir_intrinsic_load_invocation_id:
1798 return SV_INVOCATION_ID;
1799 case nir_intrinsic_load_local_group_size:
1800 return SV_NTID;
1801 case nir_intrinsic_load_local_invocation_id:
1802 return SV_TID;
1803 case nir_intrinsic_load_num_work_groups:
1804 return SV_NCTAID;
1805 case nir_intrinsic_load_patch_vertices_in:
1806 return SV_VERTEX_COUNT;
1807 case nir_intrinsic_load_primitive_id:
1808 return SV_PRIMITIVE_ID;
1809 case nir_intrinsic_load_sample_id:
1810 return SV_SAMPLE_INDEX;
1811 case nir_intrinsic_load_sample_mask_in:
1812 return SV_SAMPLE_MASK;
1813 case nir_intrinsic_load_sample_pos:
1814 return SV_SAMPLE_POS;
1815 case nir_intrinsic_load_subgroup_eq_mask:
1816 return SV_LANEMASK_EQ;
1817 case nir_intrinsic_load_subgroup_ge_mask:
1818 return SV_LANEMASK_GE;
1819 case nir_intrinsic_load_subgroup_gt_mask:
1820 return SV_LANEMASK_GT;
1821 case nir_intrinsic_load_subgroup_le_mask:
1822 return SV_LANEMASK_LE;
1823 case nir_intrinsic_load_subgroup_lt_mask:
1824 return SV_LANEMASK_LT;
1825 case nir_intrinsic_load_subgroup_invocation:
1826 return SV_LANEID;
1827 case nir_intrinsic_load_tess_coord:
1828 return SV_TESS_COORD;
1829 case nir_intrinsic_load_tess_level_inner:
1830 return SV_TESS_INNER;
1831 case nir_intrinsic_load_tess_level_outer:
1832 return SV_TESS_OUTER;
1833 case nir_intrinsic_load_vertex_id:
1834 return SV_VERTEX_ID;
1835 case nir_intrinsic_load_work_group_id:
1836 return SV_CTAID;
1837 default:
1838 ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1839 nir_intrinsic_infos[intr].name);
1840 assert(false);
1841 return SV_LAST;
1842 }
1843 }
1844
1845 ImgFormat
1846 Converter::convertGLImgFormat(GLuint format)
1847 {
1848 #define FMT_CASE(a, b) \
1849 case GL_ ## a: return nv50_ir::FMT_ ## b
1850
1851 switch (format) {
1852 FMT_CASE(NONE, NONE);
1853
1854 FMT_CASE(RGBA32F, RGBA32F);
1855 FMT_CASE(RGBA16F, RGBA16F);
1856 FMT_CASE(RG32F, RG32F);
1857 FMT_CASE(RG16F, RG16F);
1858 FMT_CASE(R11F_G11F_B10F, R11G11B10F);
1859 FMT_CASE(R32F, R32F);
1860 FMT_CASE(R16F, R16F);
1861
1862 FMT_CASE(RGBA32UI, RGBA32UI);
1863 FMT_CASE(RGBA16UI, RGBA16UI);
1864 FMT_CASE(RGB10_A2UI, RGB10A2UI);
1865 FMT_CASE(RGBA8UI, RGBA8UI);
1866 FMT_CASE(RG32UI, RG32UI);
1867 FMT_CASE(RG16UI, RG16UI);
1868 FMT_CASE(RG8UI, RG8UI);
1869 FMT_CASE(R32UI, R32UI);
1870 FMT_CASE(R16UI, R16UI);
1871 FMT_CASE(R8UI, R8UI);
1872
1873 FMT_CASE(RGBA32I, RGBA32I);
1874 FMT_CASE(RGBA16I, RGBA16I);
1875 FMT_CASE(RGBA8I, RGBA8I);
1876 FMT_CASE(RG32I, RG32I);
1877 FMT_CASE(RG16I, RG16I);
1878 FMT_CASE(RG8I, RG8I);
1879 FMT_CASE(R32I, R32I);
1880 FMT_CASE(R16I, R16I);
1881 FMT_CASE(R8I, R8I);
1882
1883 FMT_CASE(RGBA16, RGBA16);
1884 FMT_CASE(RGB10_A2, RGB10A2);
1885 FMT_CASE(RGBA8, RGBA8);
1886 FMT_CASE(RG16, RG16);
1887 FMT_CASE(RG8, RG8);
1888 FMT_CASE(R16, R16);
1889 FMT_CASE(R8, R8);
1890
1891 FMT_CASE(RGBA16_SNORM, RGBA16_SNORM);
1892 FMT_CASE(RGBA8_SNORM, RGBA8_SNORM);
1893 FMT_CASE(RG16_SNORM, RG16_SNORM);
1894 FMT_CASE(RG8_SNORM, RG8_SNORM);
1895 FMT_CASE(R16_SNORM, R16_SNORM);
1896 FMT_CASE(R8_SNORM, R8_SNORM);
1897
1898 FMT_CASE(BGRA_INTEGER, BGRA8);
1899 default:
1900 ERROR("unknown format %x\n", format);
1901 assert(false);
1902 return nv50_ir::FMT_NONE;
1903 }
1904 #undef FMT_CASE
1905 }
1906
1907 bool
1908 Converter::visit(nir_intrinsic_instr *insn)
1909 {
1910 nir_intrinsic_op op = insn->intrinsic;
1911 const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op];
1912
1913 switch (op) {
1914 case nir_intrinsic_load_uniform: {
1915 LValues &newDefs = convert(&insn->dest);
1916 const DataType dType = getDType(insn);
1917 Value *indirect;
1918 uint32_t coffset = getIndirect(insn, 0, 0, indirect);
1919 for (uint8_t i = 0; i < insn->num_components; ++i) {
1920 loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
1921 }
1922 break;
1923 }
1924 case nir_intrinsic_store_output:
1925 case nir_intrinsic_store_per_vertex_output: {
1926 Value *indirect;
1927 DataType dType = getSType(insn->src[0], false, false);
1928 uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1929
1930 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1931 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1932 continue;
1933
1934 uint8_t offset = 0;
1935 Value *src = getSrc(&insn->src[0], i);
1936 switch (prog->getType()) {
1937 case Program::TYPE_FRAGMENT: {
1938 if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1939 // TGSI uses a different interface than NIR, TGSI stores that
1940 // value in the z component, NIR in X
1941 offset += 2;
1942 src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1943 }
1944 break;
1945 }
1946 case Program::TYPE_GEOMETRY:
1947 case Program::TYPE_VERTEX: {
1948 if (info->io.genUserClip > 0 && idx == clipVertexOutput) {
1949 mkMov(clipVtx[i], src);
1950 src = clipVtx[i];
1951 }
1952 break;
1953 }
1954 default:
1955 break;
1956 }
1957
1958 storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1959 }
1960 break;
1961 }
1962 case nir_intrinsic_load_input:
1963 case nir_intrinsic_load_interpolated_input:
1964 case nir_intrinsic_load_output: {
1965 LValues &newDefs = convert(&insn->dest);
1966
1967 // FBFetch
1968 if (prog->getType() == Program::TYPE_FRAGMENT &&
1969 op == nir_intrinsic_load_output) {
1970 std::vector<Value*> defs, srcs;
1971 uint8_t mask = 0;
1972
1973 srcs.push_back(getSSA());
1974 srcs.push_back(getSSA());
1975 Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1976 Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1977 mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1978 mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1979
1980 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1981 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1982
1983 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1984 defs.push_back(newDefs[i]);
1985 mask |= 1 << i;
1986 }
1987
1988 TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
1989 texi->tex.levelZero = 1;
1990 texi->tex.mask = mask;
1991 texi->tex.useOffsets = 0;
1992 texi->tex.r = 0xffff;
1993 texi->tex.s = 0xffff;
1994
1995 info->prop.fp.readsFramebuffer = true;
1996 break;
1997 }
1998
1999 const DataType dType = getDType(insn);
2000 Value *indirect;
2001 bool input = op != nir_intrinsic_load_output;
2002 operation nvirOp;
2003 uint32_t mode = 0;
2004
2005 uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
2006 nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
2007
2008 // see load_barycentric_* handling
2009 if (prog->getType() == Program::TYPE_FRAGMENT) {
2010 mode = translateInterpMode(&vary, nvirOp);
2011 if (op == nir_intrinsic_load_interpolated_input) {
2012 ImmediateValue immMode;
2013 if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
2014 mode |= immMode.reg.data.u32;
2015 }
2016 }
2017
2018 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2019 uint32_t address = getSlotAddress(insn, idx, i);
2020 Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
2021 if (prog->getType() == Program::TYPE_FRAGMENT) {
2022 int s = 1;
2023 if (typeSizeof(dType) == 8) {
2024 Value *lo = getSSA();
2025 Value *hi = getSSA();
2026 Instruction *interp;
2027
2028 interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
2029 if (nvirOp == OP_PINTERP)
2030 interp->setSrc(s++, fp.position);
2031 if (mode & NV50_IR_INTERP_OFFSET)
2032 interp->setSrc(s++, getSrc(&insn->src[0], 0));
2033 interp->setInterpolate(mode);
2034 interp->setIndirect(0, 0, indirect);
2035
2036 Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
2037 interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
2038 if (nvirOp == OP_PINTERP)
2039 interp->setSrc(s++, fp.position);
2040 if (mode & NV50_IR_INTERP_OFFSET)
2041 interp->setSrc(s++, getSrc(&insn->src[0], 0));
2042 interp->setInterpolate(mode);
2043 interp->setIndirect(0, 0, indirect);
2044
2045 mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
2046 } else {
2047 Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
2048 if (nvirOp == OP_PINTERP)
2049 interp->setSrc(s++, fp.position);
2050 if (mode & NV50_IR_INTERP_OFFSET)
2051 interp->setSrc(s++, getSrc(&insn->src[0], 0));
2052 interp->setInterpolate(mode);
2053 interp->setIndirect(0, 0, indirect);
2054 }
2055 } else {
2056 mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
2057 }
2058 }
2059 break;
2060 }
2061 case nir_intrinsic_load_kernel_input: {
2062 assert(prog->getType() == Program::TYPE_COMPUTE);
2063 assert(insn->num_components == 1);
2064
2065 LValues &newDefs = convert(&insn->dest);
2066 const DataType dType = getDType(insn);
2067 Value *indirect;
2068 uint32_t idx = getIndirect(insn, 0, 0, indirect, true);
2069
2070 mkLoad(dType, newDefs[0], mkSymbol(FILE_SHADER_INPUT, 0, dType, idx), indirect);
2071 break;
2072 }
2073 case nir_intrinsic_load_barycentric_at_offset:
2074 case nir_intrinsic_load_barycentric_at_sample:
2075 case nir_intrinsic_load_barycentric_centroid:
2076 case nir_intrinsic_load_barycentric_pixel:
2077 case nir_intrinsic_load_barycentric_sample: {
2078 LValues &newDefs = convert(&insn->dest);
2079 uint32_t mode;
2080
2081 if (op == nir_intrinsic_load_barycentric_centroid ||
2082 op == nir_intrinsic_load_barycentric_sample) {
2083 mode = NV50_IR_INTERP_CENTROID;
2084 } else if (op == nir_intrinsic_load_barycentric_at_offset) {
2085 Value *offs[2];
2086 for (uint8_t c = 0; c < 2; c++) {
2087 offs[c] = getScratch();
2088 mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
2089 mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
2090 mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
2091 mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
2092 }
2093 mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
2094
2095 mode = NV50_IR_INTERP_OFFSET;
2096 } else if (op == nir_intrinsic_load_barycentric_pixel) {
2097 mode = NV50_IR_INTERP_DEFAULT;
2098 } else if (op == nir_intrinsic_load_barycentric_at_sample) {
2099 info->prop.fp.readsSampleLocations = true;
2100 mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
2101 mode = NV50_IR_INTERP_OFFSET;
2102 } else {
2103 unreachable("all intrinsics already handled above");
2104 }
2105
2106 loadImm(newDefs[1], mode);
2107 break;
2108 }
2109 case nir_intrinsic_discard:
2110 mkOp(OP_DISCARD, TYPE_NONE, NULL);
2111 break;
2112 case nir_intrinsic_discard_if: {
2113 Value *pred = getSSA(1, FILE_PREDICATE);
2114 if (insn->num_components > 1) {
2115 ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
2116 assert(false);
2117 return false;
2118 }
2119 mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2120 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
2121 break;
2122 }
2123 case nir_intrinsic_load_base_vertex:
2124 case nir_intrinsic_load_base_instance:
2125 case nir_intrinsic_load_draw_id:
2126 case nir_intrinsic_load_front_face:
2127 case nir_intrinsic_load_helper_invocation:
2128 case nir_intrinsic_load_instance_id:
2129 case nir_intrinsic_load_invocation_id:
2130 case nir_intrinsic_load_local_group_size:
2131 case nir_intrinsic_load_local_invocation_id:
2132 case nir_intrinsic_load_num_work_groups:
2133 case nir_intrinsic_load_patch_vertices_in:
2134 case nir_intrinsic_load_primitive_id:
2135 case nir_intrinsic_load_sample_id:
2136 case nir_intrinsic_load_sample_mask_in:
2137 case nir_intrinsic_load_sample_pos:
2138 case nir_intrinsic_load_subgroup_eq_mask:
2139 case nir_intrinsic_load_subgroup_ge_mask:
2140 case nir_intrinsic_load_subgroup_gt_mask:
2141 case nir_intrinsic_load_subgroup_le_mask:
2142 case nir_intrinsic_load_subgroup_lt_mask:
2143 case nir_intrinsic_load_subgroup_invocation:
2144 case nir_intrinsic_load_tess_coord:
2145 case nir_intrinsic_load_tess_level_inner:
2146 case nir_intrinsic_load_tess_level_outer:
2147 case nir_intrinsic_load_vertex_id:
2148 case nir_intrinsic_load_work_group_id: {
2149 const DataType dType = getDType(insn);
2150 SVSemantic sv = convert(op);
2151 LValues &newDefs = convert(&insn->dest);
2152
2153 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2154 Value *def;
2155 if (typeSizeof(dType) == 8)
2156 def = getSSA();
2157 else
2158 def = newDefs[i];
2159
2160 if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
2161 loadImm(def, 0u);
2162 } else {
2163 Symbol *sym = mkSysVal(sv, i);
2164 Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
2165 if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
2166 rdsv->perPatch = 1;
2167 }
2168
2169 if (typeSizeof(dType) == 8)
2170 mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
2171 }
2172 break;
2173 }
2174 // constants
2175 case nir_intrinsic_load_subgroup_size: {
2176 LValues &newDefs = convert(&insn->dest);
2177 loadImm(newDefs[0], 32u);
2178 break;
2179 }
2180 case nir_intrinsic_vote_all:
2181 case nir_intrinsic_vote_any:
2182 case nir_intrinsic_vote_ieq: {
2183 LValues &newDefs = convert(&insn->dest);
2184 Value *pred = getScratch(1, FILE_PREDICATE);
2185 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2186 mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
2187 mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
2188 break;
2189 }
2190 case nir_intrinsic_ballot: {
2191 LValues &newDefs = convert(&insn->dest);
2192 Value *pred = getSSA(1, FILE_PREDICATE);
2193 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2194 mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;
2195 break;
2196 }
2197 case nir_intrinsic_read_first_invocation:
2198 case nir_intrinsic_read_invocation: {
2199 LValues &newDefs = convert(&insn->dest);
2200 const DataType dType = getDType(insn);
2201 Value *tmp = getScratch();
2202
2203 if (op == nir_intrinsic_read_first_invocation) {
2204 mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
2205 mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2206 mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2207 } else
2208 tmp = getSrc(&insn->src[1], 0);
2209
2210 for (uint8_t i = 0; i < insn->num_components; ++i) {
2211 mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
2212 ->subOp = NV50_IR_SUBOP_SHFL_IDX;
2213 }
2214 break;
2215 }
2216 case nir_intrinsic_load_per_vertex_input: {
2217 const DataType dType = getDType(insn);
2218 LValues &newDefs = convert(&insn->dest);
2219 Value *indirectVertex;
2220 Value *indirectOffset;
2221 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
2222 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2223
2224 Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
2225 mkImm(baseVertex), indirectVertex);
2226 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2227 uint32_t address = getSlotAddress(insn, idx, i);
2228 loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
2229 indirectOffset, vtxBase, info->in[idx].patch);
2230 }
2231 break;
2232 }
2233 case nir_intrinsic_load_per_vertex_output: {
2234 const DataType dType = getDType(insn);
2235 LValues &newDefs = convert(&insn->dest);
2236 Value *indirectVertex;
2237 Value *indirectOffset;
2238 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
2239 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2240 Value *vtxBase = NULL;
2241
2242 if (indirectVertex)
2243 vtxBase = indirectVertex;
2244 else
2245 vtxBase = loadImm(NULL, baseVertex);
2246
2247 vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase);
2248
2249 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2250 uint32_t address = getSlotAddress(insn, idx, i);
2251 loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
2252 indirectOffset, vtxBase, info->in[idx].patch);
2253 }
2254 break;
2255 }
2256 case nir_intrinsic_emit_vertex:
2257 if (info->io.genUserClip > 0)
2258 handleUserClipPlanes();
2259 // fallthrough
2260 case nir_intrinsic_end_primitive: {
2261 uint32_t idx = nir_intrinsic_stream_id(insn);
2262 mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
2263 break;
2264 }
2265 case nir_intrinsic_load_ubo: {
2266 const DataType dType = getDType(insn);
2267 LValues &newDefs = convert(&insn->dest);
2268 Value *indirectIndex;
2269 Value *indirectOffset;
2270 uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1;
2271 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2272
2273 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2274 loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
2275 indirectOffset, indirectIndex);
2276 }
2277 break;
2278 }
2279 case nir_intrinsic_get_buffer_size: {
2280 LValues &newDefs = convert(&insn->dest);
2281 const DataType dType = getDType(insn);
2282 Value *indirectBuffer;
2283 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2284
2285 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0);
2286 mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, indirectBuffer);
2287 break;
2288 }
2289 case nir_intrinsic_store_ssbo: {
2290 DataType sType = getSType(insn->src[0], false, false);
2291 Value *indirectBuffer;
2292 Value *indirectOffset;
2293 uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer);
2294 uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset);
2295
2296 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2297 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2298 continue;
2299 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
2300 offset + i * typeSizeof(sType));
2301 mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i))
2302 ->setIndirect(0, 1, indirectBuffer);
2303 }
2304 info->io.globalAccess |= 0x2;
2305 break;
2306 }
2307 case nir_intrinsic_load_ssbo: {
2308 const DataType dType = getDType(insn);
2309 LValues &newDefs = convert(&insn->dest);
2310 Value *indirectBuffer;
2311 Value *indirectOffset;
2312 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2313 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2314
2315 for (uint8_t i = 0u; i < insn->num_components; ++i)
2316 loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
2317 indirectOffset, indirectBuffer);
2318
2319 info->io.globalAccess |= 0x1;
2320 break;
2321 }
2322 case nir_intrinsic_shared_atomic_add:
2323 case nir_intrinsic_shared_atomic_and:
2324 case nir_intrinsic_shared_atomic_comp_swap:
2325 case nir_intrinsic_shared_atomic_exchange:
2326 case nir_intrinsic_shared_atomic_or:
2327 case nir_intrinsic_shared_atomic_imax:
2328 case nir_intrinsic_shared_atomic_imin:
2329 case nir_intrinsic_shared_atomic_umax:
2330 case nir_intrinsic_shared_atomic_umin:
2331 case nir_intrinsic_shared_atomic_xor: {
2332 const DataType dType = getDType(insn);
2333 LValues &newDefs = convert(&insn->dest);
2334 Value *indirectOffset;
2335 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2336 Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, dType, offset);
2337 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2338 if (op == nir_intrinsic_shared_atomic_comp_swap)
2339 atom->setSrc(2, getSrc(&insn->src[2], 0));
2340 atom->setIndirect(0, 0, indirectOffset);
2341 atom->subOp = getSubOp(op);
2342 break;
2343 }
2344 case nir_intrinsic_ssbo_atomic_add:
2345 case nir_intrinsic_ssbo_atomic_and:
2346 case nir_intrinsic_ssbo_atomic_comp_swap:
2347 case nir_intrinsic_ssbo_atomic_exchange:
2348 case nir_intrinsic_ssbo_atomic_or:
2349 case nir_intrinsic_ssbo_atomic_imax:
2350 case nir_intrinsic_ssbo_atomic_imin:
2351 case nir_intrinsic_ssbo_atomic_umax:
2352 case nir_intrinsic_ssbo_atomic_umin:
2353 case nir_intrinsic_ssbo_atomic_xor: {
2354 const DataType dType = getDType(insn);
2355 LValues &newDefs = convert(&insn->dest);
2356 Value *indirectBuffer;
2357 Value *indirectOffset;
2358 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2359 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2360
2361 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset);
2362 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym,
2363 getSrc(&insn->src[2], 0));
2364 if (op == nir_intrinsic_ssbo_atomic_comp_swap)
2365 atom->setSrc(2, getSrc(&insn->src[3], 0));
2366 atom->setIndirect(0, 0, indirectOffset);
2367 atom->setIndirect(0, 1, indirectBuffer);
2368 atom->subOp = getSubOp(op);
2369
2370 info->io.globalAccess |= 0x2;
2371 break;
2372 }
2373 case nir_intrinsic_bindless_image_atomic_add:
2374 case nir_intrinsic_bindless_image_atomic_and:
2375 case nir_intrinsic_bindless_image_atomic_comp_swap:
2376 case nir_intrinsic_bindless_image_atomic_exchange:
2377 case nir_intrinsic_bindless_image_atomic_max:
2378 case nir_intrinsic_bindless_image_atomic_min:
2379 case nir_intrinsic_bindless_image_atomic_or:
2380 case nir_intrinsic_bindless_image_atomic_xor:
2381 case nir_intrinsic_bindless_image_load:
2382 case nir_intrinsic_bindless_image_samples:
2383 case nir_intrinsic_bindless_image_size:
2384 case nir_intrinsic_bindless_image_store: {
2385 std::vector<Value*> srcs, defs;
2386 Value *indirect = getSrc(&insn->src[0], 0);
2387 DataType ty;
2388
2389 uint32_t mask = 0;
2390 TexInstruction::Target target =
2391 convert(nir_intrinsic_image_dim(insn), !!nir_intrinsic_image_array(insn), false);
2392 unsigned int argCount = getNIRArgCount(target);
2393 uint16_t location = 0;
2394
2395 if (opInfo.has_dest) {
2396 LValues &newDefs = convert(&insn->dest);
2397 for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2398 defs.push_back(newDefs[i]);
2399 mask |= 1 << i;
2400 }
2401 }
2402
2403 switch (op) {
2404 case nir_intrinsic_bindless_image_atomic_add:
2405 case nir_intrinsic_bindless_image_atomic_and:
2406 case nir_intrinsic_bindless_image_atomic_comp_swap:
2407 case nir_intrinsic_bindless_image_atomic_exchange:
2408 case nir_intrinsic_bindless_image_atomic_max:
2409 case nir_intrinsic_bindless_image_atomic_min:
2410 case nir_intrinsic_bindless_image_atomic_or:
2411 case nir_intrinsic_bindless_image_atomic_xor:
2412 ty = getDType(insn);
2413 mask = 0x1;
2414 info->io.globalAccess |= 0x2;
2415 break;
2416 case nir_intrinsic_bindless_image_load:
2417 ty = TYPE_U32;
2418 info->io.globalAccess |= 0x1;
2419 break;
2420 case nir_intrinsic_bindless_image_store:
2421 ty = TYPE_U32;
2422 mask = 0xf;
2423 info->io.globalAccess |= 0x2;
2424 break;
2425 case nir_intrinsic_bindless_image_samples:
2426 mask = 0x8;
2427 ty = TYPE_U32;
2428 break;
2429 case nir_intrinsic_bindless_image_size:
2430 ty = TYPE_U32;
2431 break;
2432 default:
2433 unreachable("unhandled image opcode");
2434 break;
2435 }
2436
2437 // coords
2438 if (opInfo.num_srcs >= 2)
2439 for (unsigned int i = 0u; i < argCount; ++i)
2440 srcs.push_back(getSrc(&insn->src[1], i));
2441
2442 // the sampler is just another src added after coords
2443 if (opInfo.num_srcs >= 3 && target.isMS())
2444 srcs.push_back(getSrc(&insn->src[2], 0));
2445
2446 if (opInfo.num_srcs >= 4) {
2447 unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2448 for (uint8_t i = 0u; i < components; ++i)
2449 srcs.push_back(getSrc(&insn->src[3], i));
2450 }
2451
2452 if (opInfo.num_srcs >= 5)
2453 // 1 for aotmic swap
2454 for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2455 srcs.push_back(getSrc(&insn->src[4], i));
2456
2457 TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2458 texi->tex.bindless = false;
2459 texi->tex.format = &nv50_ir::TexInstruction::formatTable[convertGLImgFormat(nir_intrinsic_format(insn))];
2460 texi->tex.mask = mask;
2461 texi->tex.bindless = true;
2462 texi->cache = convert(nir_intrinsic_access(insn));
2463 texi->setType(ty);
2464 texi->subOp = getSubOp(op);
2465
2466 if (indirect)
2467 texi->setIndirectR(indirect);
2468
2469 break;
2470 }
2471 case nir_intrinsic_image_deref_atomic_add:
2472 case nir_intrinsic_image_deref_atomic_and:
2473 case nir_intrinsic_image_deref_atomic_comp_swap:
2474 case nir_intrinsic_image_deref_atomic_exchange:
2475 case nir_intrinsic_image_deref_atomic_max:
2476 case nir_intrinsic_image_deref_atomic_min:
2477 case nir_intrinsic_image_deref_atomic_or:
2478 case nir_intrinsic_image_deref_atomic_xor:
2479 case nir_intrinsic_image_deref_load:
2480 case nir_intrinsic_image_deref_samples:
2481 case nir_intrinsic_image_deref_size:
2482 case nir_intrinsic_image_deref_store: {
2483 const nir_variable *tex;
2484 std::vector<Value*> srcs, defs;
2485 Value *indirect;
2486 DataType ty;
2487
2488 uint32_t mask = 0;
2489 nir_deref_instr *deref = nir_src_as_deref(insn->src[0]);
2490 const glsl_type *type = deref->type;
2491 TexInstruction::Target target =
2492 convert((glsl_sampler_dim)type->sampler_dimensionality,
2493 type->sampler_array, type->sampler_shadow);
2494 unsigned int argCount = getNIRArgCount(target);
2495 uint16_t location = handleDeref(deref, indirect, tex);
2496
2497 if (opInfo.has_dest) {
2498 LValues &newDefs = convert(&insn->dest);
2499 for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2500 defs.push_back(newDefs[i]);
2501 mask |= 1 << i;
2502 }
2503 }
2504
2505 switch (op) {
2506 case nir_intrinsic_image_deref_atomic_add:
2507 case nir_intrinsic_image_deref_atomic_and:
2508 case nir_intrinsic_image_deref_atomic_comp_swap:
2509 case nir_intrinsic_image_deref_atomic_exchange:
2510 case nir_intrinsic_image_deref_atomic_max:
2511 case nir_intrinsic_image_deref_atomic_min:
2512 case nir_intrinsic_image_deref_atomic_or:
2513 case nir_intrinsic_image_deref_atomic_xor:
2514 ty = getDType(insn);
2515 mask = 0x1;
2516 info->io.globalAccess |= 0x2;
2517 break;
2518 case nir_intrinsic_image_deref_load:
2519 ty = TYPE_U32;
2520 info->io.globalAccess |= 0x1;
2521 break;
2522 case nir_intrinsic_image_deref_store:
2523 ty = TYPE_U32;
2524 mask = 0xf;
2525 info->io.globalAccess |= 0x2;
2526 break;
2527 case nir_intrinsic_image_deref_samples:
2528 mask = 0x8;
2529 ty = TYPE_U32;
2530 break;
2531 case nir_intrinsic_image_deref_size:
2532 ty = TYPE_U32;
2533 break;
2534 default:
2535 unreachable("unhandled image opcode");
2536 break;
2537 }
2538
2539 // coords
2540 if (opInfo.num_srcs >= 2)
2541 for (unsigned int i = 0u; i < argCount; ++i)
2542 srcs.push_back(getSrc(&insn->src[1], i));
2543
2544 // the sampler is just another src added after coords
2545 if (opInfo.num_srcs >= 3 && target.isMS())
2546 srcs.push_back(getSrc(&insn->src[2], 0));
2547
2548 if (opInfo.num_srcs >= 4) {
2549 unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2550 for (uint8_t i = 0u; i < components; ++i)
2551 srcs.push_back(getSrc(&insn->src[3], i));
2552 }
2553
2554 if (opInfo.num_srcs >= 5)
2555 // 1 for aotmic swap
2556 for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2557 srcs.push_back(getSrc(&insn->src[4], i));
2558
2559 TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2560 texi->tex.bindless = false;
2561 texi->tex.format = &nv50_ir::TexInstruction::formatTable[convertGLImgFormat(tex->data.image.format)];
2562 texi->tex.mask = mask;
2563 texi->cache = getCacheModeFromVar(tex);
2564 texi->setType(ty);
2565 texi->subOp = getSubOp(op);
2566
2567 if (indirect)
2568 texi->setIndirectR(indirect);
2569
2570 break;
2571 }
2572 case nir_intrinsic_store_shared: {
2573 DataType sType = getSType(insn->src[0], false, false);
2574 Value *indirectOffset;
2575 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2576
2577 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2578 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2579 continue;
2580 Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, sType, offset + i * typeSizeof(sType));
2581 mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i));
2582 }
2583 break;
2584 }
2585 case nir_intrinsic_load_shared: {
2586 const DataType dType = getDType(insn);
2587 LValues &newDefs = convert(&insn->dest);
2588 Value *indirectOffset;
2589 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2590
2591 for (uint8_t i = 0u; i < insn->num_components; ++i)
2592 loadFrom(FILE_MEMORY_SHARED, 0, dType, newDefs[i], offset, i, indirectOffset);
2593
2594 break;
2595 }
2596 case nir_intrinsic_barrier: {
2597 // TODO: add flag to shader_info
2598 info->numBarriers = 1;
2599 Instruction *bar = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
2600 bar->fixed = 1;
2601 bar->subOp = NV50_IR_SUBOP_BAR_SYNC;
2602 break;
2603 }
2604 case nir_intrinsic_group_memory_barrier:
2605 case nir_intrinsic_memory_barrier:
2606 case nir_intrinsic_memory_barrier_atomic_counter:
2607 case nir_intrinsic_memory_barrier_buffer:
2608 case nir_intrinsic_memory_barrier_image:
2609 case nir_intrinsic_memory_barrier_shared: {
2610 Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
2611 bar->fixed = 1;
2612 bar->subOp = getSubOp(op);
2613 break;
2614 }
2615 case nir_intrinsic_shader_clock: {
2616 const DataType dType = getDType(insn);
2617 LValues &newDefs = convert(&insn->dest);
2618
2619 loadImm(newDefs[0], 0u);
2620 mkOp1(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
2621 break;
2622 }
2623 default:
2624 ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
2625 return false;
2626 }
2627
2628 return true;
2629 }
2630
2631 bool
2632 Converter::visit(nir_jump_instr *insn)
2633 {
2634 switch (insn->type) {
2635 case nir_jump_return:
2636 // TODO: this only works in the main function
2637 mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
2638 bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
2639 break;
2640 case nir_jump_break:
2641 case nir_jump_continue: {
2642 bool isBreak = insn->type == nir_jump_break;
2643 nir_block *block = insn->instr.block;
2644 assert(!block->successors[1]);
2645 BasicBlock *target = convert(block->successors[0]);
2646 mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
2647 bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
2648 break;
2649 }
2650 default:
2651 ERROR("unknown nir_jump_type %u\n", insn->type);
2652 return false;
2653 }
2654
2655 return true;
2656 }
2657
2658 Value*
2659 Converter::convert(nir_load_const_instr *insn, uint8_t idx)
2660 {
2661 Value *val;
2662
2663 if (immInsertPos)
2664 setPosition(immInsertPos, true);
2665 else
2666 setPosition(bb, false);
2667
2668 switch (insn->def.bit_size) {
2669 case 64:
2670 val = loadImm(getSSA(8), insn->value[idx].u64);
2671 break;
2672 case 32:
2673 val = loadImm(getSSA(4), insn->value[idx].u32);
2674 break;
2675 case 16:
2676 val = loadImm(getSSA(2), insn->value[idx].u16);
2677 break;
2678 case 8:
2679 val = loadImm(getSSA(1), insn->value[idx].u8);
2680 break;
2681 default:
2682 unreachable("unhandled bit size!\n");
2683 }
2684 setPosition(bb, true);
2685 return val;
2686 }
2687
2688 bool
2689 Converter::visit(nir_load_const_instr *insn)
2690 {
2691 assert(insn->def.bit_size <= 64);
2692 immediates[insn->def.index] = insn;
2693 return true;
2694 }
2695
2696 #define DEFAULT_CHECKS \
2697 if (insn->dest.dest.ssa.num_components > 1) { \
2698 ERROR("nir_alu_instr only supported with 1 component!\n"); \
2699 return false; \
2700 } \
2701 if (insn->dest.write_mask != 1) { \
2702 ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2703 return false; \
2704 }
2705 bool
2706 Converter::visit(nir_alu_instr *insn)
2707 {
2708 const nir_op op = insn->op;
2709 const nir_op_info &info = nir_op_infos[op];
2710 DataType dType = getDType(insn);
2711 const std::vector<DataType> sTypes = getSTypes(insn);
2712
2713 Instruction *oldPos = this->bb->getExit();
2714
2715 switch (op) {
2716 case nir_op_fabs:
2717 case nir_op_iabs:
2718 case nir_op_fadd:
2719 case nir_op_iadd:
2720 case nir_op_iand:
2721 case nir_op_fceil:
2722 case nir_op_fcos:
2723 case nir_op_fddx:
2724 case nir_op_fddx_coarse:
2725 case nir_op_fddx_fine:
2726 case nir_op_fddy:
2727 case nir_op_fddy_coarse:
2728 case nir_op_fddy_fine:
2729 case nir_op_fdiv:
2730 case nir_op_idiv:
2731 case nir_op_udiv:
2732 case nir_op_fexp2:
2733 case nir_op_ffloor:
2734 case nir_op_ffma:
2735 case nir_op_flog2:
2736 case nir_op_fmax:
2737 case nir_op_imax:
2738 case nir_op_umax:
2739 case nir_op_fmin:
2740 case nir_op_imin:
2741 case nir_op_umin:
2742 case nir_op_fmod:
2743 case nir_op_imod:
2744 case nir_op_umod:
2745 case nir_op_fmul:
2746 case nir_op_imul:
2747 case nir_op_imul_high:
2748 case nir_op_umul_high:
2749 case nir_op_fneg:
2750 case nir_op_ineg:
2751 case nir_op_inot:
2752 case nir_op_ior:
2753 case nir_op_pack_64_2x32_split:
2754 case nir_op_fpow:
2755 case nir_op_frcp:
2756 case nir_op_frem:
2757 case nir_op_irem:
2758 case nir_op_frsq:
2759 case nir_op_fsat:
2760 case nir_op_ishr:
2761 case nir_op_ushr:
2762 case nir_op_fsin:
2763 case nir_op_fsqrt:
2764 case nir_op_fsub:
2765 case nir_op_isub:
2766 case nir_op_ftrunc:
2767 case nir_op_ishl:
2768 case nir_op_ixor: {
2769 DEFAULT_CHECKS;
2770 LValues &newDefs = convert(&insn->dest);
2771 operation preOp = preOperationNeeded(op);
2772 if (preOp != OP_NOP) {
2773 assert(info.num_inputs < 2);
2774 Value *tmp = getSSA(typeSizeof(dType));
2775 Instruction *i0 = mkOp(preOp, dType, tmp);
2776 Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2777 if (info.num_inputs) {
2778 i0->setSrc(0, getSrc(&insn->src[0]));
2779 i1->setSrc(0, tmp);
2780 }
2781 i1->subOp = getSubOp(op);
2782 } else {
2783 Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2784 for (unsigned s = 0u; s < info.num_inputs; ++s) {
2785 i->setSrc(s, getSrc(&insn->src[s]));
2786 }
2787 i->subOp = getSubOp(op);
2788 }
2789 break;
2790 }
2791 case nir_op_ifind_msb:
2792 case nir_op_ufind_msb: {
2793 DEFAULT_CHECKS;
2794 LValues &newDefs = convert(&insn->dest);
2795 dType = sTypes[0];
2796 mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2797 break;
2798 }
2799 case nir_op_fround_even: {
2800 DEFAULT_CHECKS;
2801 LValues &newDefs = convert(&insn->dest);
2802 mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2803 break;
2804 }
2805 // convert instructions
2806 case nir_op_f2f32:
2807 case nir_op_f2i32:
2808 case nir_op_f2u32:
2809 case nir_op_i2f32:
2810 case nir_op_i2i32:
2811 case nir_op_u2f32:
2812 case nir_op_u2u32:
2813 case nir_op_f2f64:
2814 case nir_op_f2i64:
2815 case nir_op_f2u64:
2816 case nir_op_i2f64:
2817 case nir_op_i2i64:
2818 case nir_op_u2f64:
2819 case nir_op_u2u64: {
2820 DEFAULT_CHECKS;
2821 LValues &newDefs = convert(&insn->dest);
2822 Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2823 if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
2824 i->rnd = ROUND_Z;
2825 i->sType = sTypes[0];
2826 break;
2827 }
2828 // compare instructions
2829 case nir_op_feq32:
2830 case nir_op_ieq32:
2831 case nir_op_fge32:
2832 case nir_op_ige32:
2833 case nir_op_uge32:
2834 case nir_op_flt32:
2835 case nir_op_ilt32:
2836 case nir_op_ult32:
2837 case nir_op_fne32:
2838 case nir_op_ine32: {
2839 DEFAULT_CHECKS;
2840 LValues &newDefs = convert(&insn->dest);
2841 Instruction *i = mkCmp(getOperation(op),
2842 getCondCode(op),
2843 dType,
2844 newDefs[0],
2845 dType,
2846 getSrc(&insn->src[0]),
2847 getSrc(&insn->src[1]));
2848 if (info.num_inputs == 3)
2849 i->setSrc(2, getSrc(&insn->src[2]));
2850 i->sType = sTypes[0];
2851 break;
2852 }
2853 // those are weird ALU ops and need special handling, because
2854 // 1. they are always componend based
2855 // 2. they basically just merge multiple values into one data type
2856 case nir_op_mov:
2857 if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) {
2858 nir_reg_dest& reg = insn->dest.dest.reg;
2859 uint32_t goffset = regToLmemOffset[reg.reg->index];
2860 uint8_t comps = reg.reg->num_components;
2861 uint8_t size = reg.reg->bit_size / 8;
2862 uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2863 uint32_t aoffset = csize * reg.base_offset;
2864 Value *indirect = NULL;
2865
2866 if (reg.indirect)
2867 indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS),
2868 getSrc(reg.indirect, 0), mkImm(csize));
2869
2870 for (uint8_t i = 0u; i < comps; ++i) {
2871 if (!((1u << i) & insn->dest.write_mask))
2872 continue;
2873
2874 Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + aoffset + i * size);
2875 mkStore(OP_STORE, dType, sym, indirect, getSrc(&insn->src[0], i));
2876 }
2877 break;
2878 } else if (!insn->src[0].src.is_ssa && insn->src[0].src.reg.reg->num_array_elems) {
2879 LValues &newDefs = convert(&insn->dest);
2880 nir_reg_src& reg = insn->src[0].src.reg;
2881 uint32_t goffset = regToLmemOffset[reg.reg->index];
2882 // uint8_t comps = reg.reg->num_components;
2883 uint8_t size = reg.reg->bit_size / 8;
2884 uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2885 uint32_t aoffset = csize * reg.base_offset;
2886 Value *indirect = NULL;
2887
2888 if (reg.indirect)
2889 indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), getSrc(reg.indirect, 0), mkImm(csize));
2890
2891 for (uint8_t i = 0u; i < newDefs.size(); ++i)
2892 loadFrom(FILE_MEMORY_LOCAL, 0, dType, newDefs[i], goffset + aoffset, i, indirect);
2893
2894 break;
2895 } else {
2896 LValues &newDefs = convert(&insn->dest);
2897 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2898 mkMov(newDefs[c], getSrc(&insn->src[0], c), dType);
2899 }
2900 }
2901 break;
2902 case nir_op_vec2:
2903 case nir_op_vec3:
2904 case nir_op_vec4: {
2905 LValues &newDefs = convert(&insn->dest);
2906 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2907 mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2908 }
2909 break;
2910 }
2911 // (un)pack
2912 case nir_op_pack_64_2x32: {
2913 LValues &newDefs = convert(&insn->dest);
2914 Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2915 merge->setSrc(0, getSrc(&insn->src[0], 0));
2916 merge->setSrc(1, getSrc(&insn->src[0], 1));
2917 break;
2918 }
2919 case nir_op_pack_half_2x16_split: {
2920 LValues &newDefs = convert(&insn->dest);
2921 Value *tmpH = getSSA();
2922 Value *tmpL = getSSA();
2923
2924 mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2925 mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2926 mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2927 break;
2928 }
2929 case nir_op_unpack_half_2x16_split_x:
2930 case nir_op_unpack_half_2x16_split_y: {
2931 LValues &newDefs = convert(&insn->dest);
2932 Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2933 if (op == nir_op_unpack_half_2x16_split_y)
2934 cvt->subOp = 1;
2935 break;
2936 }
2937 case nir_op_unpack_64_2x32: {
2938 LValues &newDefs = convert(&insn->dest);
2939 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2940 break;
2941 }
2942 case nir_op_unpack_64_2x32_split_x: {
2943 LValues &newDefs = convert(&insn->dest);
2944 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
2945 break;
2946 }
2947 case nir_op_unpack_64_2x32_split_y: {
2948 LValues &newDefs = convert(&insn->dest);
2949 mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
2950 break;
2951 }
2952 // special instructions
2953 case nir_op_fsign:
2954 case nir_op_isign: {
2955 DEFAULT_CHECKS;
2956 DataType iType;
2957 if (::isFloatType(dType))
2958 iType = TYPE_F32;
2959 else
2960 iType = TYPE_S32;
2961
2962 LValues &newDefs = convert(&insn->dest);
2963 LValue *val0 = getScratch();
2964 LValue *val1 = getScratch();
2965 mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
2966 mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
2967
2968 if (dType == TYPE_F64) {
2969 mkOp2(OP_SUB, iType, val0, val0, val1);
2970 mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
2971 } else if (dType == TYPE_S64 || dType == TYPE_U64) {
2972 mkOp2(OP_SUB, iType, val0, val1, val0);
2973 mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
2974 mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
2975 } else if (::isFloatType(dType))
2976 mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
2977 else
2978 mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
2979 break;
2980 }
2981 case nir_op_fcsel:
2982 case nir_op_b32csel: {
2983 DEFAULT_CHECKS;
2984 LValues &newDefs = convert(&insn->dest);
2985 mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
2986 break;
2987 }
2988 case nir_op_ibitfield_extract:
2989 case nir_op_ubitfield_extract: {
2990 DEFAULT_CHECKS;
2991 Value *tmp = getSSA();
2992 LValues &newDefs = convert(&insn->dest);
2993 mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2994 mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
2995 break;
2996 }
2997 case nir_op_bfm: {
2998 DEFAULT_CHECKS;
2999 LValues &newDefs = convert(&insn->dest);
3000 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
3001 break;
3002 }
3003 case nir_op_bitfield_insert: {
3004 DEFAULT_CHECKS;
3005 LValues &newDefs = convert(&insn->dest);
3006 LValue *temp = getSSA();
3007 mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
3008 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
3009 break;
3010 }
3011 case nir_op_bit_count: {
3012 DEFAULT_CHECKS;
3013 LValues &newDefs = convert(&insn->dest);
3014 mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
3015 break;
3016 }
3017 case nir_op_bitfield_reverse: {
3018 DEFAULT_CHECKS;
3019 LValues &newDefs = convert(&insn->dest);
3020 mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
3021 break;
3022 }
3023 case nir_op_find_lsb: {
3024 DEFAULT_CHECKS;
3025 LValues &newDefs = convert(&insn->dest);
3026 Value *tmp = getSSA();
3027 mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
3028 mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
3029 break;
3030 }
3031 // boolean conversions
3032 case nir_op_b2f32: {
3033 DEFAULT_CHECKS;
3034 LValues &newDefs = convert(&insn->dest);
3035 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
3036 break;
3037 }
3038 case nir_op_b2f64: {
3039 DEFAULT_CHECKS;
3040 LValues &newDefs = convert(&insn->dest);
3041 Value *tmp = getSSA(4);
3042 mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
3043 mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
3044 break;
3045 }
3046 case nir_op_f2b32:
3047 case nir_op_i2b32: {
3048 DEFAULT_CHECKS;
3049 LValues &newDefs = convert(&insn->dest);
3050 Value *src1;
3051 if (typeSizeof(sTypes[0]) == 8) {
3052 src1 = loadImm(getSSA(8), 0.0);
3053 } else {
3054 src1 = zero;
3055 }
3056 CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
3057 mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
3058 break;
3059 }
3060 case nir_op_b2i32: {
3061 DEFAULT_CHECKS;
3062 LValues &newDefs = convert(&insn->dest);
3063 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
3064 break;
3065 }
3066 case nir_op_b2i64: {
3067 DEFAULT_CHECKS;
3068 LValues &newDefs = convert(&insn->dest);
3069 LValue *def = getScratch();
3070 mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
3071 mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
3072 break;
3073 }
3074 default:
3075 ERROR("unknown nir_op %s\n", info.name);
3076 return false;
3077 }
3078
3079 if (!oldPos) {
3080 oldPos = this->bb->getEntry();
3081 oldPos->precise = insn->exact;
3082 }
3083
3084 if (unlikely(!oldPos))
3085 return true;
3086
3087 while (oldPos->next) {
3088 oldPos = oldPos->next;
3089 oldPos->precise = insn->exact;
3090 }
3091 oldPos->saturate = insn->dest.saturate;
3092
3093 return true;
3094 }
3095 #undef DEFAULT_CHECKS
3096
3097 bool
3098 Converter::visit(nir_ssa_undef_instr *insn)
3099 {
3100 LValues &newDefs = convert(&insn->def);
3101 for (uint8_t i = 0u; i < insn->def.num_components; ++i) {
3102 mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
3103 }
3104 return true;
3105 }
3106
3107 #define CASE_SAMPLER(ty) \
3108 case GLSL_SAMPLER_DIM_ ## ty : \
3109 if (isArray && !isShadow) \
3110 return TEX_TARGET_ ## ty ## _ARRAY; \
3111 else if (!isArray && isShadow) \
3112 return TEX_TARGET_## ty ## _SHADOW; \
3113 else if (isArray && isShadow) \
3114 return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
3115 else \
3116 return TEX_TARGET_ ## ty
3117
3118 TexTarget
3119 Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
3120 {
3121 switch (dim) {
3122 CASE_SAMPLER(1D);
3123 CASE_SAMPLER(2D);
3124 CASE_SAMPLER(CUBE);
3125 case GLSL_SAMPLER_DIM_3D:
3126 return TEX_TARGET_3D;
3127 case GLSL_SAMPLER_DIM_MS:
3128 if (isArray)
3129 return TEX_TARGET_2D_MS_ARRAY;
3130 return TEX_TARGET_2D_MS;
3131 case GLSL_SAMPLER_DIM_RECT:
3132 if (isShadow)
3133 return TEX_TARGET_RECT_SHADOW;
3134 return TEX_TARGET_RECT;
3135 case GLSL_SAMPLER_DIM_BUF:
3136 return TEX_TARGET_BUFFER;
3137 case GLSL_SAMPLER_DIM_EXTERNAL:
3138 return TEX_TARGET_2D;
3139 default:
3140 ERROR("unknown glsl_sampler_dim %u\n", dim);
3141 assert(false);
3142 return TEX_TARGET_COUNT;
3143 }
3144 }
3145 #undef CASE_SAMPLER
3146
3147 Value*
3148 Converter::applyProjection(Value *src, Value *proj)
3149 {
3150 if (!proj)
3151 return src;
3152 return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj);
3153 }
3154
3155 unsigned int
3156 Converter::getNIRArgCount(TexInstruction::Target& target)
3157 {
3158 unsigned int result = target.getArgCount();
3159 if (target.isCube() && target.isArray())
3160 result--;
3161 if (target.isMS())
3162 result--;
3163 return result;
3164 }
3165
3166 uint16_t
3167 Converter::handleDeref(nir_deref_instr *deref, Value * &indirect, const nir_variable * &tex)
3168 {
3169 typedef std::pair<uint32_t,Value*> DerefPair;
3170 std::list<DerefPair> derefs;
3171
3172 uint16_t result = 0;
3173 while (deref->deref_type != nir_deref_type_var) {
3174 switch (deref->deref_type) {
3175 case nir_deref_type_array: {
3176 Value *indirect;
3177 uint8_t size = type_size(deref->type, true);
3178 result += size * getIndirect(&deref->arr.index, 0, indirect);
3179
3180 if (indirect) {
3181 derefs.push_front(std::make_pair(size, indirect));
3182 }
3183
3184 break;
3185 }
3186 case nir_deref_type_struct: {
3187 result += nir_deref_instr_parent(deref)->type->struct_location_offset(deref->strct.index);
3188 break;
3189 }
3190 case nir_deref_type_var:
3191 default:
3192 unreachable("nir_deref_type_var reached in handleDeref!");
3193 break;
3194 }
3195 deref = nir_deref_instr_parent(deref);
3196 }
3197
3198 indirect = NULL;
3199 for (std::list<DerefPair>::const_iterator it = derefs.begin(); it != derefs.end(); ++it) {
3200 Value *offset = mkOp2v(OP_MUL, TYPE_U32, getSSA(), loadImm(getSSA(), it->first), it->second);
3201 if (indirect)
3202 indirect = mkOp2v(OP_ADD, TYPE_U32, getSSA(), indirect, offset);
3203 else
3204 indirect = offset;
3205 }
3206
3207 tex = nir_deref_instr_get_variable(deref);
3208 assert(tex);
3209
3210 return result + tex->data.driver_location;
3211 }
3212
3213 CacheMode
3214 Converter::convert(enum gl_access_qualifier access)
3215 {
3216 switch (access) {
3217 case ACCESS_VOLATILE:
3218 return CACHE_CV;
3219 case ACCESS_COHERENT:
3220 return CACHE_CG;
3221 default:
3222 return CACHE_CA;
3223 }
3224 }
3225
3226 CacheMode
3227 Converter::getCacheModeFromVar(const nir_variable *var)
3228 {
3229 return convert(var->data.image.access);
3230 }
3231
3232 bool
3233 Converter::visit(nir_tex_instr *insn)
3234 {
3235 switch (insn->op) {
3236 case nir_texop_lod:
3237 case nir_texop_query_levels:
3238 case nir_texop_tex:
3239 case nir_texop_texture_samples:
3240 case nir_texop_tg4:
3241 case nir_texop_txb:
3242 case nir_texop_txd:
3243 case nir_texop_txf:
3244 case nir_texop_txf_ms:
3245 case nir_texop_txl:
3246 case nir_texop_txs: {
3247 LValues &newDefs = convert(&insn->dest);
3248 std::vector<Value*> srcs;
3249 std::vector<Value*> defs;
3250 std::vector<nir_src*> offsets;
3251 uint8_t mask = 0;
3252 bool lz = false;
3253 Value *proj = NULL;
3254 TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow);
3255 operation op = getOperation(insn->op);
3256
3257 int r, s;
3258 int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias);
3259 int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator);
3260 int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord);
3261 int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx);
3262 int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy);
3263 int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index);
3264 int lodIdx = nir_tex_instr_src_index(insn, nir_tex_src_lod);
3265 int offsetIdx = nir_tex_instr_src_index(insn, nir_tex_src_offset);
3266 int projIdx = nir_tex_instr_src_index(insn, nir_tex_src_projector);
3267 int sampOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_offset);
3268 int texOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_offset);
3269 int sampHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_handle);
3270 int texHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_handle);
3271
3272 bool bindless = sampHandleIdx != -1 || texHandleIdx != -1;
3273 assert((sampHandleIdx != -1) == (texHandleIdx != -1));
3274
3275 if (projIdx != -1)
3276 proj = mkOp1v(OP_RCP, TYPE_F32, getScratch(), getSrc(&insn->src[projIdx].src, 0));
3277
3278 srcs.resize(insn->coord_components);
3279 for (uint8_t i = 0u; i < insn->coord_components; ++i)
3280 srcs[i] = applyProjection(getSrc(&insn->src[coordsIdx].src, i), proj);
3281
3282 // sometimes we get less args than target.getArgCount, but codegen expects the latter
3283 if (insn->coord_components) {
3284 uint32_t argCount = target.getArgCount();
3285
3286 if (target.isMS())
3287 argCount -= 1;
3288
3289 for (uint32_t i = 0u; i < (argCount - insn->coord_components); ++i)
3290 srcs.push_back(getSSA());
3291 }
3292
3293 if (insn->op == nir_texop_texture_samples)
3294 srcs.push_back(zero);
3295 else if (!insn->num_srcs)
3296 srcs.push_back(loadImm(NULL, 0));
3297 if (biasIdx != -1)
3298 srcs.push_back(getSrc(&insn->src[biasIdx].src, 0));
3299 if (lodIdx != -1)
3300 srcs.push_back(getSrc(&insn->src[lodIdx].src, 0));
3301 else if (op == OP_TXF)
3302 lz = true;
3303 if (msIdx != -1)
3304 srcs.push_back(getSrc(&insn->src[msIdx].src, 0));
3305 if (offsetIdx != -1)
3306 offsets.push_back(&insn->src[offsetIdx].src);
3307 if (compIdx != -1)
3308 srcs.push_back(applyProjection(getSrc(&insn->src[compIdx].src, 0), proj));
3309 if (texOffIdx != -1) {
3310 srcs.push_back(getSrc(&insn->src[texOffIdx].src, 0));
3311 texOffIdx = srcs.size() - 1;
3312 }
3313 if (sampOffIdx != -1) {
3314 srcs.push_back(getSrc(&insn->src[sampOffIdx].src, 0));
3315 sampOffIdx = srcs.size() - 1;
3316 }
3317 if (bindless) {
3318 // currently we use the lower bits
3319 Value *split[2];
3320 Value *handle = getSrc(&insn->src[sampHandleIdx].src, 0);
3321
3322 mkSplit(split, 4, handle);
3323
3324 srcs.push_back(split[0]);
3325 texOffIdx = srcs.size() - 1;
3326 }
3327
3328 r = bindless ? 0xff : insn->texture_index;
3329 s = bindless ? 0x1f : insn->sampler_index;
3330
3331 defs.resize(newDefs.size());
3332 for (uint8_t d = 0u; d < newDefs.size(); ++d) {
3333 defs[d] = newDefs[d];
3334 mask |= 1 << d;
3335 }
3336 if (target.isMS() || (op == OP_TEX && prog->getType() != Program::TYPE_FRAGMENT))
3337 lz = true;
3338
3339 TexInstruction *texi = mkTex(op, target.getEnum(), r, s, defs, srcs);
3340 texi->tex.levelZero = lz;
3341 texi->tex.mask = mask;
3342 texi->tex.bindless = bindless;
3343
3344 if (texOffIdx != -1)
3345 texi->tex.rIndirectSrc = texOffIdx;
3346 if (sampOffIdx != -1)
3347 texi->tex.sIndirectSrc = sampOffIdx;
3348
3349 switch (insn->op) {
3350 case nir_texop_tg4:
3351 if (!target.isShadow())
3352 texi->tex.gatherComp = insn->component;
3353 break;
3354 case nir_texop_txs:
3355 texi->tex.query = TXQ_DIMS;
3356 break;
3357 case nir_texop_texture_samples:
3358 texi->tex.mask = 0x4;
3359 texi->tex.query = TXQ_TYPE;
3360 break;
3361 case nir_texop_query_levels:
3362 texi->tex.mask = 0x8;
3363 texi->tex.query = TXQ_DIMS;
3364 break;
3365 default:
3366 break;
3367 }
3368
3369 texi->tex.useOffsets = offsets.size();
3370 if (texi->tex.useOffsets) {
3371 for (uint8_t s = 0; s < texi->tex.useOffsets; ++s) {
3372 for (uint32_t c = 0u; c < 3; ++c) {
3373 uint8_t s2 = std::min(c, target.getDim() - 1);
3374 texi->offset[s][c].set(getSrc(offsets[s], s2));
3375 texi->offset[s][c].setInsn(texi);
3376 }
3377 }
3378 }
3379
3380 if (op == OP_TXG && offsetIdx == -1) {
3381 if (nir_tex_instr_has_explicit_tg4_offsets(insn)) {
3382 texi->tex.useOffsets = 4;
3383 setPosition(texi, false);
3384 for (uint8_t i = 0; i < 4; ++i) {
3385 for (uint8_t j = 0; j < 2; ++j) {
3386 texi->offset[i][j].set(loadImm(NULL, insn->tg4_offsets[i][j]));
3387 texi->offset[i][j].setInsn(texi);
3388 }
3389 }
3390 setPosition(texi, true);
3391 }
3392 }
3393
3394 if (ddxIdx != -1 && ddyIdx != -1) {
3395 for (uint8_t c = 0u; c < target.getDim() + target.isCube(); ++c) {
3396 texi->dPdx[c].set(getSrc(&insn->src[ddxIdx].src, c));
3397 texi->dPdy[c].set(getSrc(&insn->src[ddyIdx].src, c));
3398 }
3399 }
3400
3401 break;
3402 }
3403 default:
3404 ERROR("unknown nir_texop %u\n", insn->op);
3405 return false;
3406 }
3407 return true;
3408 }
3409
3410 bool
3411 Converter::visit(nir_deref_instr *deref)
3412 {
3413 // we just ignore those, because images intrinsics are the only place where
3414 // we should end up with deref sources and those have to backtrack anyway
3415 // to get the nir_variable. This code just exists to handle some special
3416 // cases.
3417 switch (deref->deref_type) {
3418 case nir_deref_type_array:
3419 case nir_deref_type_struct:
3420 case nir_deref_type_var:
3421 break;
3422 default:
3423 ERROR("unknown nir_deref_instr %u\n", deref->deref_type);
3424 return false;
3425 }
3426 return true;
3427 }
3428
3429 bool
3430 Converter::run()
3431 {
3432 bool progress;
3433
3434 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
3435 nir_print_shader(nir, stderr);
3436
3437 struct nir_lower_subgroups_options subgroup_options = {
3438 .subgroup_size = 32,
3439 .ballot_bit_size = 32,
3440 };
3441
3442 NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
3443 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
3444 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
3445 NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
3446 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3447 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL);
3448 NIR_PASS_V(nir, nir_lower_phis_to_scalar);
3449
3450 do {
3451 progress = false;
3452 NIR_PASS(progress, nir, nir_copy_prop);
3453 NIR_PASS(progress, nir, nir_opt_remove_phis);
3454 NIR_PASS(progress, nir, nir_opt_trivial_continues);
3455 NIR_PASS(progress, nir, nir_opt_cse);
3456 NIR_PASS(progress, nir, nir_opt_algebraic);
3457 NIR_PASS(progress, nir, nir_opt_constant_folding);
3458 NIR_PASS(progress, nir, nir_copy_prop);
3459 NIR_PASS(progress, nir, nir_opt_dce);
3460 NIR_PASS(progress, nir, nir_opt_dead_cf);
3461 } while (progress);
3462
3463 NIR_PASS_V(nir, nir_lower_bool_to_int32);
3464 NIR_PASS_V(nir, nir_lower_locals_to_regs);
3465 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
3466 NIR_PASS_V(nir, nir_convert_from_ssa, true);
3467
3468 // Garbage collect dead instructions
3469 nir_sweep(nir);
3470
3471 if (!parseNIR()) {
3472 ERROR("Couldn't prase NIR!\n");
3473 return false;
3474 }
3475
3476 if (!assignSlots()) {
3477 ERROR("Couldn't assign slots!\n");
3478 return false;
3479 }
3480
3481 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
3482 nir_print_shader(nir, stderr);
3483
3484 nir_foreach_function(function, nir) {
3485 if (!visit(function))
3486 return false;
3487 }
3488
3489 return true;
3490 }
3491
3492 } // unnamed namespace
3493
3494 namespace nv50_ir {
3495
3496 bool
3497 Program::makeFromNIR(struct nv50_ir_prog_info *info)
3498 {
3499 nir_shader *nir = (nir_shader*)info->bin.source;
3500 Converter converter(this, nir, info);
3501 bool result = converter.run();
3502 if (!result)
3503 return result;
3504 LoweringHelper lowering;
3505 lowering.run(this);
3506 tlsSize = info->bin.tlsSpace;
3507 return result;
3508 }
3509
3510 } // namespace nv50_ir