nv50/ir/nir: parse system values first and stop for compute shaders
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_from_nir.cpp
1 /*
2 * Copyright 2017 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Karol Herbst <kherbst@redhat.com>
23 */
24
25 #include "compiler/nir/nir.h"
26
27 #include "util/u_debug.h"
28
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_from_common.h"
31 #include "codegen/nv50_ir_lowering_helper.h"
32 #include "codegen/nv50_ir_util.h"
33
34 #if __cplusplus >= 201103L
35 #include <unordered_map>
36 #else
37 #include <tr1/unordered_map>
38 #endif
39 #include <cstring>
40 #include <list>
41 #include <vector>
42
43 namespace {
44
45 #if __cplusplus >= 201103L
46 using std::hash;
47 using std::unordered_map;
48 #else
49 using std::tr1::hash;
50 using std::tr1::unordered_map;
51 #endif
52
53 using namespace nv50_ir;
54
55 int
56 type_size(const struct glsl_type *type, bool bindless)
57 {
58 return glsl_count_attribute_slots(type, false);
59 }
60
61 class Converter : public ConverterCommon
62 {
63 public:
64 Converter(Program *, nir_shader *, nv50_ir_prog_info *);
65
66 bool run();
67 private:
68 typedef std::vector<LValue*> LValues;
69 typedef unordered_map<unsigned, LValues> NirDefMap;
70 typedef unordered_map<unsigned, nir_load_const_instr*> ImmediateMap;
71 typedef unordered_map<unsigned, uint32_t> NirArrayLMemOffsets;
72 typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
73
74 CacheMode convert(enum gl_access_qualifier);
75 TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
76 LValues& convert(nir_alu_dest *);
77 BasicBlock* convert(nir_block *);
78 LValues& convert(nir_dest *);
79 SVSemantic convert(nir_intrinsic_op);
80 Value* convert(nir_load_const_instr*, uint8_t);
81 LValues& convert(nir_register *);
82 LValues& convert(nir_ssa_def *);
83
84 ImgFormat convertGLImgFormat(GLuint);
85
86 Value* getSrc(nir_alu_src *, uint8_t component = 0);
87 Value* getSrc(nir_register *, uint8_t);
88 Value* getSrc(nir_src *, uint8_t, bool indirect = false);
89 Value* getSrc(nir_ssa_def *, uint8_t);
90
91 // returned value is the constant part of the given source (either the
92 // nir_src or the selected source component of an intrinsic). Even though
93 // this is mostly an optimization to be able to skip indirects in a few
94 // cases, sometimes we require immediate values or set some fileds on
95 // instructions (e.g. tex) in order for codegen to consume those.
96 // If the found value has not a constant part, the Value gets returned
97 // through the Value parameter.
98 uint32_t getIndirect(nir_src *, uint8_t, Value *&);
99 uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&);
100
101 uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
102
103 void setInterpolate(nv50_ir_varying *,
104 uint8_t,
105 bool centroid,
106 unsigned semantics);
107
108 Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
109 uint8_t c, Value *indirect0 = NULL,
110 Value *indirect1 = NULL, bool patch = false);
111 void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
112 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
113 Value *indirect1 = NULL);
114
115 bool isFloatType(nir_alu_type);
116 bool isSignedType(nir_alu_type);
117 bool isResultFloat(nir_op);
118 bool isResultSigned(nir_op);
119
120 DataType getDType(nir_alu_instr *);
121 DataType getDType(nir_intrinsic_instr *);
122 DataType getDType(nir_intrinsic_instr *, bool isSigned);
123 DataType getDType(nir_op, uint8_t);
124
125 std::vector<DataType> getSTypes(nir_alu_instr *);
126 DataType getSType(nir_src &, bool isFloat, bool isSigned);
127
128 operation getOperation(nir_intrinsic_op);
129 operation getOperation(nir_op);
130 operation getOperation(nir_texop);
131 operation preOperationNeeded(nir_op);
132
133 int getSubOp(nir_intrinsic_op);
134 int getSubOp(nir_op);
135
136 CondCode getCondCode(nir_op);
137
138 bool assignSlots();
139 bool parseNIR();
140
141 bool visit(nir_alu_instr *);
142 bool visit(nir_block *);
143 bool visit(nir_cf_node *);
144 bool visit(nir_deref_instr *);
145 bool visit(nir_function *);
146 bool visit(nir_if *);
147 bool visit(nir_instr *);
148 bool visit(nir_intrinsic_instr *);
149 bool visit(nir_jump_instr *);
150 bool visit(nir_load_const_instr*);
151 bool visit(nir_loop *);
152 bool visit(nir_ssa_undef_instr *);
153 bool visit(nir_tex_instr *);
154
155 // tex stuff
156 Value* applyProjection(Value *src, Value *proj);
157 unsigned int getNIRArgCount(TexInstruction::Target&);
158
159 // image stuff
160 uint16_t handleDeref(nir_deref_instr *, Value * & indirect, const nir_variable * &);
161 CacheMode getCacheModeFromVar(const nir_variable *);
162
163 nir_shader *nir;
164
165 NirDefMap ssaDefs;
166 NirDefMap regDefs;
167 ImmediateMap immediates;
168 NirArrayLMemOffsets regToLmemOffset;
169 NirBlockMap blocks;
170 unsigned int curLoopDepth;
171
172 BasicBlock *exit;
173 Value *zero;
174 Instruction *immInsertPos;
175
176 int clipVertexOutput;
177
178 union {
179 struct {
180 Value *position;
181 } fp;
182 };
183 };
184
185 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
186 : ConverterCommon(prog, info),
187 nir(nir),
188 curLoopDepth(0),
189 clipVertexOutput(-1)
190 {
191 zero = mkImm((uint32_t)0);
192 }
193
194 BasicBlock *
195 Converter::convert(nir_block *block)
196 {
197 NirBlockMap::iterator it = blocks.find(block->index);
198 if (it != blocks.end())
199 return it->second;
200
201 BasicBlock *bb = new BasicBlock(func);
202 blocks[block->index] = bb;
203 return bb;
204 }
205
206 bool
207 Converter::isFloatType(nir_alu_type type)
208 {
209 return nir_alu_type_get_base_type(type) == nir_type_float;
210 }
211
212 bool
213 Converter::isSignedType(nir_alu_type type)
214 {
215 return nir_alu_type_get_base_type(type) == nir_type_int;
216 }
217
218 bool
219 Converter::isResultFloat(nir_op op)
220 {
221 const nir_op_info &info = nir_op_infos[op];
222 if (info.output_type != nir_type_invalid)
223 return isFloatType(info.output_type);
224
225 ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
226 assert(false);
227 return true;
228 }
229
230 bool
231 Converter::isResultSigned(nir_op op)
232 {
233 switch (op) {
234 // there is no umul and we get wrong results if we treat all muls as signed
235 case nir_op_imul:
236 case nir_op_inot:
237 return false;
238 default:
239 const nir_op_info &info = nir_op_infos[op];
240 if (info.output_type != nir_type_invalid)
241 return isSignedType(info.output_type);
242 ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
243 assert(false);
244 return true;
245 }
246 }
247
248 DataType
249 Converter::getDType(nir_alu_instr *insn)
250 {
251 if (insn->dest.dest.is_ssa)
252 return getDType(insn->op, insn->dest.dest.ssa.bit_size);
253 else
254 return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
255 }
256
257 DataType
258 Converter::getDType(nir_intrinsic_instr *insn)
259 {
260 bool isSigned;
261 switch (insn->intrinsic) {
262 case nir_intrinsic_shared_atomic_imax:
263 case nir_intrinsic_shared_atomic_imin:
264 case nir_intrinsic_ssbo_atomic_imax:
265 case nir_intrinsic_ssbo_atomic_imin:
266 isSigned = true;
267 break;
268 default:
269 isSigned = false;
270 break;
271 }
272
273 return getDType(insn, isSigned);
274 }
275
276 DataType
277 Converter::getDType(nir_intrinsic_instr *insn, bool isSigned)
278 {
279 if (insn->dest.is_ssa)
280 return typeOfSize(insn->dest.ssa.bit_size / 8, false, isSigned);
281 else
282 return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, isSigned);
283 }
284
285 DataType
286 Converter::getDType(nir_op op, uint8_t bitSize)
287 {
288 DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
289 if (ty == TYPE_NONE) {
290 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
291 assert(false);
292 }
293 return ty;
294 }
295
296 std::vector<DataType>
297 Converter::getSTypes(nir_alu_instr *insn)
298 {
299 const nir_op_info &info = nir_op_infos[insn->op];
300 std::vector<DataType> res(info.num_inputs);
301
302 for (uint8_t i = 0; i < info.num_inputs; ++i) {
303 if (info.input_types[i] != nir_type_invalid) {
304 res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
305 } else {
306 ERROR("getSType not implemented for %s idx %u\n", info.name, i);
307 assert(false);
308 res[i] = TYPE_NONE;
309 break;
310 }
311 }
312
313 return res;
314 }
315
316 DataType
317 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
318 {
319 uint8_t bitSize;
320 if (src.is_ssa)
321 bitSize = src.ssa->bit_size;
322 else
323 bitSize = src.reg.reg->bit_size;
324
325 DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
326 if (ty == TYPE_NONE) {
327 const char *str;
328 if (isFloat)
329 str = "float";
330 else if (isSigned)
331 str = "int";
332 else
333 str = "uint";
334 ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
335 assert(false);
336 }
337 return ty;
338 }
339
340 operation
341 Converter::getOperation(nir_op op)
342 {
343 switch (op) {
344 // basic ops with float and int variants
345 case nir_op_fabs:
346 case nir_op_iabs:
347 return OP_ABS;
348 case nir_op_fadd:
349 case nir_op_iadd:
350 return OP_ADD;
351 case nir_op_iand:
352 return OP_AND;
353 case nir_op_ifind_msb:
354 case nir_op_ufind_msb:
355 return OP_BFIND;
356 case nir_op_fceil:
357 return OP_CEIL;
358 case nir_op_fcos:
359 return OP_COS;
360 case nir_op_f2f32:
361 case nir_op_f2f64:
362 case nir_op_f2i32:
363 case nir_op_f2i64:
364 case nir_op_f2u32:
365 case nir_op_f2u64:
366 case nir_op_i2f32:
367 case nir_op_i2f64:
368 case nir_op_i2i32:
369 case nir_op_i2i64:
370 case nir_op_u2f32:
371 case nir_op_u2f64:
372 case nir_op_u2u32:
373 case nir_op_u2u64:
374 return OP_CVT;
375 case nir_op_fddx:
376 case nir_op_fddx_coarse:
377 case nir_op_fddx_fine:
378 return OP_DFDX;
379 case nir_op_fddy:
380 case nir_op_fddy_coarse:
381 case nir_op_fddy_fine:
382 return OP_DFDY;
383 case nir_op_fdiv:
384 case nir_op_idiv:
385 case nir_op_udiv:
386 return OP_DIV;
387 case nir_op_fexp2:
388 return OP_EX2;
389 case nir_op_ffloor:
390 return OP_FLOOR;
391 case nir_op_ffma:
392 return OP_FMA;
393 case nir_op_flog2:
394 return OP_LG2;
395 case nir_op_fmax:
396 case nir_op_imax:
397 case nir_op_umax:
398 return OP_MAX;
399 case nir_op_pack_64_2x32_split:
400 return OP_MERGE;
401 case nir_op_fmin:
402 case nir_op_imin:
403 case nir_op_umin:
404 return OP_MIN;
405 case nir_op_fmod:
406 case nir_op_imod:
407 case nir_op_umod:
408 case nir_op_frem:
409 case nir_op_irem:
410 return OP_MOD;
411 case nir_op_fmul:
412 case nir_op_imul:
413 case nir_op_imul_high:
414 case nir_op_umul_high:
415 return OP_MUL;
416 case nir_op_fneg:
417 case nir_op_ineg:
418 return OP_NEG;
419 case nir_op_inot:
420 return OP_NOT;
421 case nir_op_ior:
422 return OP_OR;
423 case nir_op_fpow:
424 return OP_POW;
425 case nir_op_frcp:
426 return OP_RCP;
427 case nir_op_frsq:
428 return OP_RSQ;
429 case nir_op_fsat:
430 return OP_SAT;
431 case nir_op_feq32:
432 case nir_op_ieq32:
433 case nir_op_fge32:
434 case nir_op_ige32:
435 case nir_op_uge32:
436 case nir_op_flt32:
437 case nir_op_ilt32:
438 case nir_op_ult32:
439 case nir_op_fne32:
440 case nir_op_ine32:
441 return OP_SET;
442 case nir_op_ishl:
443 return OP_SHL;
444 case nir_op_ishr:
445 case nir_op_ushr:
446 return OP_SHR;
447 case nir_op_fsin:
448 return OP_SIN;
449 case nir_op_fsqrt:
450 return OP_SQRT;
451 case nir_op_fsub:
452 case nir_op_isub:
453 return OP_SUB;
454 case nir_op_ftrunc:
455 return OP_TRUNC;
456 case nir_op_ixor:
457 return OP_XOR;
458 default:
459 ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
460 assert(false);
461 return OP_NOP;
462 }
463 }
464
465 operation
466 Converter::getOperation(nir_texop op)
467 {
468 switch (op) {
469 case nir_texop_tex:
470 return OP_TEX;
471 case nir_texop_lod:
472 return OP_TXLQ;
473 case nir_texop_txb:
474 return OP_TXB;
475 case nir_texop_txd:
476 return OP_TXD;
477 case nir_texop_txf:
478 case nir_texop_txf_ms:
479 return OP_TXF;
480 case nir_texop_tg4:
481 return OP_TXG;
482 case nir_texop_txl:
483 return OP_TXL;
484 case nir_texop_query_levels:
485 case nir_texop_texture_samples:
486 case nir_texop_txs:
487 return OP_TXQ;
488 default:
489 ERROR("couldn't get operation for nir_texop %u\n", op);
490 assert(false);
491 return OP_NOP;
492 }
493 }
494
495 operation
496 Converter::getOperation(nir_intrinsic_op op)
497 {
498 switch (op) {
499 case nir_intrinsic_emit_vertex:
500 return OP_EMIT;
501 case nir_intrinsic_end_primitive:
502 return OP_RESTART;
503 case nir_intrinsic_bindless_image_atomic_add:
504 case nir_intrinsic_image_atomic_add:
505 case nir_intrinsic_image_deref_atomic_add:
506 case nir_intrinsic_bindless_image_atomic_and:
507 case nir_intrinsic_image_atomic_and:
508 case nir_intrinsic_image_deref_atomic_and:
509 case nir_intrinsic_bindless_image_atomic_comp_swap:
510 case nir_intrinsic_image_atomic_comp_swap:
511 case nir_intrinsic_image_deref_atomic_comp_swap:
512 case nir_intrinsic_bindless_image_atomic_exchange:
513 case nir_intrinsic_image_atomic_exchange:
514 case nir_intrinsic_image_deref_atomic_exchange:
515 case nir_intrinsic_bindless_image_atomic_max:
516 case nir_intrinsic_image_atomic_max:
517 case nir_intrinsic_image_deref_atomic_max:
518 case nir_intrinsic_bindless_image_atomic_min:
519 case nir_intrinsic_image_atomic_min:
520 case nir_intrinsic_image_deref_atomic_min:
521 case nir_intrinsic_bindless_image_atomic_or:
522 case nir_intrinsic_image_atomic_or:
523 case nir_intrinsic_image_deref_atomic_or:
524 case nir_intrinsic_bindless_image_atomic_xor:
525 case nir_intrinsic_image_atomic_xor:
526 case nir_intrinsic_image_deref_atomic_xor:
527 return OP_SUREDP;
528 case nir_intrinsic_bindless_image_load:
529 case nir_intrinsic_image_load:
530 case nir_intrinsic_image_deref_load:
531 return OP_SULDP;
532 case nir_intrinsic_bindless_image_samples:
533 case nir_intrinsic_image_samples:
534 case nir_intrinsic_image_deref_samples:
535 case nir_intrinsic_bindless_image_size:
536 case nir_intrinsic_image_size:
537 case nir_intrinsic_image_deref_size:
538 return OP_SUQ;
539 case nir_intrinsic_bindless_image_store:
540 case nir_intrinsic_image_store:
541 case nir_intrinsic_image_deref_store:
542 return OP_SUSTP;
543 default:
544 ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
545 assert(false);
546 return OP_NOP;
547 }
548 }
549
550 operation
551 Converter::preOperationNeeded(nir_op op)
552 {
553 switch (op) {
554 case nir_op_fcos:
555 case nir_op_fsin:
556 return OP_PRESIN;
557 default:
558 return OP_NOP;
559 }
560 }
561
562 int
563 Converter::getSubOp(nir_op op)
564 {
565 switch (op) {
566 case nir_op_imul_high:
567 case nir_op_umul_high:
568 return NV50_IR_SUBOP_MUL_HIGH;
569 default:
570 return 0;
571 }
572 }
573
574 int
575 Converter::getSubOp(nir_intrinsic_op op)
576 {
577 switch (op) {
578 case nir_intrinsic_bindless_image_atomic_add:
579 case nir_intrinsic_image_atomic_add:
580 case nir_intrinsic_image_deref_atomic_add:
581 case nir_intrinsic_shared_atomic_add:
582 case nir_intrinsic_ssbo_atomic_add:
583 return NV50_IR_SUBOP_ATOM_ADD;
584 case nir_intrinsic_bindless_image_atomic_and:
585 case nir_intrinsic_image_atomic_and:
586 case nir_intrinsic_image_deref_atomic_and:
587 case nir_intrinsic_shared_atomic_and:
588 case nir_intrinsic_ssbo_atomic_and:
589 return NV50_IR_SUBOP_ATOM_AND;
590 case nir_intrinsic_bindless_image_atomic_comp_swap:
591 case nir_intrinsic_image_atomic_comp_swap:
592 case nir_intrinsic_image_deref_atomic_comp_swap:
593 case nir_intrinsic_shared_atomic_comp_swap:
594 case nir_intrinsic_ssbo_atomic_comp_swap:
595 return NV50_IR_SUBOP_ATOM_CAS;
596 case nir_intrinsic_bindless_image_atomic_exchange:
597 case nir_intrinsic_image_atomic_exchange:
598 case nir_intrinsic_image_deref_atomic_exchange:
599 case nir_intrinsic_shared_atomic_exchange:
600 case nir_intrinsic_ssbo_atomic_exchange:
601 return NV50_IR_SUBOP_ATOM_EXCH;
602 case nir_intrinsic_bindless_image_atomic_or:
603 case nir_intrinsic_image_atomic_or:
604 case nir_intrinsic_image_deref_atomic_or:
605 case nir_intrinsic_shared_atomic_or:
606 case nir_intrinsic_ssbo_atomic_or:
607 return NV50_IR_SUBOP_ATOM_OR;
608 case nir_intrinsic_bindless_image_atomic_max:
609 case nir_intrinsic_image_atomic_max:
610 case nir_intrinsic_image_deref_atomic_max:
611 case nir_intrinsic_shared_atomic_imax:
612 case nir_intrinsic_shared_atomic_umax:
613 case nir_intrinsic_ssbo_atomic_imax:
614 case nir_intrinsic_ssbo_atomic_umax:
615 return NV50_IR_SUBOP_ATOM_MAX;
616 case nir_intrinsic_bindless_image_atomic_min:
617 case nir_intrinsic_image_atomic_min:
618 case nir_intrinsic_image_deref_atomic_min:
619 case nir_intrinsic_shared_atomic_imin:
620 case nir_intrinsic_shared_atomic_umin:
621 case nir_intrinsic_ssbo_atomic_imin:
622 case nir_intrinsic_ssbo_atomic_umin:
623 return NV50_IR_SUBOP_ATOM_MIN;
624 case nir_intrinsic_bindless_image_atomic_xor:
625 case nir_intrinsic_image_atomic_xor:
626 case nir_intrinsic_image_deref_atomic_xor:
627 case nir_intrinsic_shared_atomic_xor:
628 case nir_intrinsic_ssbo_atomic_xor:
629 return NV50_IR_SUBOP_ATOM_XOR;
630
631 case nir_intrinsic_group_memory_barrier:
632 case nir_intrinsic_memory_barrier:
633 case nir_intrinsic_memory_barrier_atomic_counter:
634 case nir_intrinsic_memory_barrier_buffer:
635 case nir_intrinsic_memory_barrier_image:
636 return NV50_IR_SUBOP_MEMBAR(M, GL);
637 case nir_intrinsic_memory_barrier_shared:
638 return NV50_IR_SUBOP_MEMBAR(M, CTA);
639
640 case nir_intrinsic_vote_all:
641 return NV50_IR_SUBOP_VOTE_ALL;
642 case nir_intrinsic_vote_any:
643 return NV50_IR_SUBOP_VOTE_ANY;
644 case nir_intrinsic_vote_ieq:
645 return NV50_IR_SUBOP_VOTE_UNI;
646 default:
647 return 0;
648 }
649 }
650
651 CondCode
652 Converter::getCondCode(nir_op op)
653 {
654 switch (op) {
655 case nir_op_feq32:
656 case nir_op_ieq32:
657 return CC_EQ;
658 case nir_op_fge32:
659 case nir_op_ige32:
660 case nir_op_uge32:
661 return CC_GE;
662 case nir_op_flt32:
663 case nir_op_ilt32:
664 case nir_op_ult32:
665 return CC_LT;
666 case nir_op_fne32:
667 return CC_NEU;
668 case nir_op_ine32:
669 return CC_NE;
670 default:
671 ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
672 assert(false);
673 return CC_FL;
674 }
675 }
676
677 Converter::LValues&
678 Converter::convert(nir_alu_dest *dest)
679 {
680 return convert(&dest->dest);
681 }
682
683 Converter::LValues&
684 Converter::convert(nir_dest *dest)
685 {
686 if (dest->is_ssa)
687 return convert(&dest->ssa);
688 if (dest->reg.indirect) {
689 ERROR("no support for indirects.");
690 assert(false);
691 }
692 return convert(dest->reg.reg);
693 }
694
695 Converter::LValues&
696 Converter::convert(nir_register *reg)
697 {
698 NirDefMap::iterator it = regDefs.find(reg->index);
699 if (it != regDefs.end())
700 return it->second;
701
702 LValues newDef(reg->num_components);
703 for (uint8_t i = 0; i < reg->num_components; i++)
704 newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
705 return regDefs[reg->index] = newDef;
706 }
707
708 Converter::LValues&
709 Converter::convert(nir_ssa_def *def)
710 {
711 NirDefMap::iterator it = ssaDefs.find(def->index);
712 if (it != ssaDefs.end())
713 return it->second;
714
715 LValues newDef(def->num_components);
716 for (uint8_t i = 0; i < def->num_components; i++)
717 newDef[i] = getSSA(std::max(4, def->bit_size / 8));
718 return ssaDefs[def->index] = newDef;
719 }
720
721 Value*
722 Converter::getSrc(nir_alu_src *src, uint8_t component)
723 {
724 if (src->abs || src->negate) {
725 ERROR("modifiers currently not supported on nir_alu_src\n");
726 assert(false);
727 }
728 return getSrc(&src->src, src->swizzle[component]);
729 }
730
731 Value*
732 Converter::getSrc(nir_register *reg, uint8_t idx)
733 {
734 NirDefMap::iterator it = regDefs.find(reg->index);
735 if (it == regDefs.end())
736 return convert(reg)[idx];
737 return it->second[idx];
738 }
739
740 Value*
741 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
742 {
743 if (src->is_ssa)
744 return getSrc(src->ssa, idx);
745
746 if (src->reg.indirect) {
747 if (indirect)
748 return getSrc(src->reg.indirect, idx);
749 ERROR("no support for indirects.");
750 assert(false);
751 return NULL;
752 }
753
754 return getSrc(src->reg.reg, idx);
755 }
756
757 Value*
758 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
759 {
760 ImmediateMap::iterator iit = immediates.find(src->index);
761 if (iit != immediates.end())
762 return convert((*iit).second, idx);
763
764 NirDefMap::iterator it = ssaDefs.find(src->index);
765 if (it == ssaDefs.end()) {
766 ERROR("SSA value %u not found\n", src->index);
767 assert(false);
768 return NULL;
769 }
770 return it->second[idx];
771 }
772
773 uint32_t
774 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
775 {
776 nir_const_value *offset = nir_src_as_const_value(*src);
777
778 if (offset) {
779 indirect = NULL;
780 return offset[0].u32;
781 }
782
783 indirect = getSrc(src, idx, true);
784 return 0;
785 }
786
787 uint32_t
788 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect)
789 {
790 int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
791 if (indirect)
792 indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
793 return idx;
794 }
795
796 static void
797 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
798 {
799 assert(name && index);
800
801 if (slot >= VERT_ATTRIB_MAX) {
802 ERROR("invalid varying slot %u\n", slot);
803 assert(false);
804 return;
805 }
806
807 if (slot >= VERT_ATTRIB_GENERIC0 &&
808 slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
809 *name = TGSI_SEMANTIC_GENERIC;
810 *index = slot - VERT_ATTRIB_GENERIC0;
811 return;
812 }
813
814 if (slot >= VERT_ATTRIB_TEX0 &&
815 slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
816 *name = TGSI_SEMANTIC_TEXCOORD;
817 *index = slot - VERT_ATTRIB_TEX0;
818 return;
819 }
820
821 switch (slot) {
822 case VERT_ATTRIB_COLOR0:
823 *name = TGSI_SEMANTIC_COLOR;
824 *index = 0;
825 break;
826 case VERT_ATTRIB_COLOR1:
827 *name = TGSI_SEMANTIC_COLOR;
828 *index = 1;
829 break;
830 case VERT_ATTRIB_EDGEFLAG:
831 *name = TGSI_SEMANTIC_EDGEFLAG;
832 *index = 0;
833 break;
834 case VERT_ATTRIB_FOG:
835 *name = TGSI_SEMANTIC_FOG;
836 *index = 0;
837 break;
838 case VERT_ATTRIB_NORMAL:
839 *name = TGSI_SEMANTIC_NORMAL;
840 *index = 0;
841 break;
842 case VERT_ATTRIB_POS:
843 *name = TGSI_SEMANTIC_POSITION;
844 *index = 0;
845 break;
846 case VERT_ATTRIB_POINT_SIZE:
847 *name = TGSI_SEMANTIC_PSIZE;
848 *index = 0;
849 break;
850 default:
851 ERROR("unknown vert attrib slot %u\n", slot);
852 assert(false);
853 break;
854 }
855 }
856
857 static void
858 varying_slot_to_tgsi_semantic(gl_varying_slot slot, unsigned *name, unsigned *index)
859 {
860 assert(name && index);
861
862 if (slot >= VARYING_SLOT_TESS_MAX) {
863 ERROR("invalid varying slot %u\n", slot);
864 assert(false);
865 return;
866 }
867
868 if (slot >= VARYING_SLOT_PATCH0) {
869 *name = TGSI_SEMANTIC_PATCH;
870 *index = slot - VARYING_SLOT_PATCH0;
871 return;
872 }
873
874 if (slot >= VARYING_SLOT_VAR0) {
875 *name = TGSI_SEMANTIC_GENERIC;
876 *index = slot - VARYING_SLOT_VAR0;
877 return;
878 }
879
880 if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
881 *name = TGSI_SEMANTIC_TEXCOORD;
882 *index = slot - VARYING_SLOT_TEX0;
883 return;
884 }
885
886 switch (slot) {
887 case VARYING_SLOT_BFC0:
888 *name = TGSI_SEMANTIC_BCOLOR;
889 *index = 0;
890 break;
891 case VARYING_SLOT_BFC1:
892 *name = TGSI_SEMANTIC_BCOLOR;
893 *index = 1;
894 break;
895 case VARYING_SLOT_CLIP_DIST0:
896 *name = TGSI_SEMANTIC_CLIPDIST;
897 *index = 0;
898 break;
899 case VARYING_SLOT_CLIP_DIST1:
900 *name = TGSI_SEMANTIC_CLIPDIST;
901 *index = 1;
902 break;
903 case VARYING_SLOT_CLIP_VERTEX:
904 *name = TGSI_SEMANTIC_CLIPVERTEX;
905 *index = 0;
906 break;
907 case VARYING_SLOT_COL0:
908 *name = TGSI_SEMANTIC_COLOR;
909 *index = 0;
910 break;
911 case VARYING_SLOT_COL1:
912 *name = TGSI_SEMANTIC_COLOR;
913 *index = 1;
914 break;
915 case VARYING_SLOT_EDGE:
916 *name = TGSI_SEMANTIC_EDGEFLAG;
917 *index = 0;
918 break;
919 case VARYING_SLOT_FACE:
920 *name = TGSI_SEMANTIC_FACE;
921 *index = 0;
922 break;
923 case VARYING_SLOT_FOGC:
924 *name = TGSI_SEMANTIC_FOG;
925 *index = 0;
926 break;
927 case VARYING_SLOT_LAYER:
928 *name = TGSI_SEMANTIC_LAYER;
929 *index = 0;
930 break;
931 case VARYING_SLOT_PNTC:
932 *name = TGSI_SEMANTIC_PCOORD;
933 *index = 0;
934 break;
935 case VARYING_SLOT_POS:
936 *name = TGSI_SEMANTIC_POSITION;
937 *index = 0;
938 break;
939 case VARYING_SLOT_PRIMITIVE_ID:
940 *name = TGSI_SEMANTIC_PRIMID;
941 *index = 0;
942 break;
943 case VARYING_SLOT_PSIZ:
944 *name = TGSI_SEMANTIC_PSIZE;
945 *index = 0;
946 break;
947 case VARYING_SLOT_TESS_LEVEL_INNER:
948 *name = TGSI_SEMANTIC_TESSINNER;
949 *index = 0;
950 break;
951 case VARYING_SLOT_TESS_LEVEL_OUTER:
952 *name = TGSI_SEMANTIC_TESSOUTER;
953 *index = 0;
954 break;
955 case VARYING_SLOT_VIEWPORT:
956 *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
957 *index = 0;
958 break;
959 default:
960 ERROR("unknown varying slot %u\n", slot);
961 assert(false);
962 break;
963 }
964 }
965
966 static void
967 frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
968 {
969 if (slot >= FRAG_RESULT_DATA0) {
970 *name = TGSI_SEMANTIC_COLOR;
971 *index = slot - FRAG_RESULT_COLOR - 2; // intentional
972 return;
973 }
974
975 switch (slot) {
976 case FRAG_RESULT_COLOR:
977 *name = TGSI_SEMANTIC_COLOR;
978 *index = 0;
979 break;
980 case FRAG_RESULT_DEPTH:
981 *name = TGSI_SEMANTIC_POSITION;
982 *index = 0;
983 break;
984 case FRAG_RESULT_SAMPLE_MASK:
985 *name = TGSI_SEMANTIC_SAMPLEMASK;
986 *index = 0;
987 break;
988 default:
989 ERROR("unknown frag result slot %u\n", slot);
990 assert(false);
991 break;
992 }
993 }
994
995 // copy of _mesa_sysval_to_semantic
996 static void
997 system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
998 {
999 *index = 0;
1000 switch (val) {
1001 // Vertex shader
1002 case SYSTEM_VALUE_VERTEX_ID:
1003 *name = TGSI_SEMANTIC_VERTEXID;
1004 break;
1005 case SYSTEM_VALUE_INSTANCE_ID:
1006 *name = TGSI_SEMANTIC_INSTANCEID;
1007 break;
1008 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
1009 *name = TGSI_SEMANTIC_VERTEXID_NOBASE;
1010 break;
1011 case SYSTEM_VALUE_BASE_VERTEX:
1012 *name = TGSI_SEMANTIC_BASEVERTEX;
1013 break;
1014 case SYSTEM_VALUE_BASE_INSTANCE:
1015 *name = TGSI_SEMANTIC_BASEINSTANCE;
1016 break;
1017 case SYSTEM_VALUE_DRAW_ID:
1018 *name = TGSI_SEMANTIC_DRAWID;
1019 break;
1020
1021 // Geometry shader
1022 case SYSTEM_VALUE_INVOCATION_ID:
1023 *name = TGSI_SEMANTIC_INVOCATIONID;
1024 break;
1025
1026 // Fragment shader
1027 case SYSTEM_VALUE_FRAG_COORD:
1028 *name = TGSI_SEMANTIC_POSITION;
1029 break;
1030 case SYSTEM_VALUE_FRONT_FACE:
1031 *name = TGSI_SEMANTIC_FACE;
1032 break;
1033 case SYSTEM_VALUE_SAMPLE_ID:
1034 *name = TGSI_SEMANTIC_SAMPLEID;
1035 break;
1036 case SYSTEM_VALUE_SAMPLE_POS:
1037 *name = TGSI_SEMANTIC_SAMPLEPOS;
1038 break;
1039 case SYSTEM_VALUE_SAMPLE_MASK_IN:
1040 *name = TGSI_SEMANTIC_SAMPLEMASK;
1041 break;
1042 case SYSTEM_VALUE_HELPER_INVOCATION:
1043 *name = TGSI_SEMANTIC_HELPER_INVOCATION;
1044 break;
1045
1046 // Tessellation shader
1047 case SYSTEM_VALUE_TESS_COORD:
1048 *name = TGSI_SEMANTIC_TESSCOORD;
1049 break;
1050 case SYSTEM_VALUE_VERTICES_IN:
1051 *name = TGSI_SEMANTIC_VERTICESIN;
1052 break;
1053 case SYSTEM_VALUE_PRIMITIVE_ID:
1054 *name = TGSI_SEMANTIC_PRIMID;
1055 break;
1056 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1057 *name = TGSI_SEMANTIC_TESSOUTER;
1058 break;
1059 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1060 *name = TGSI_SEMANTIC_TESSINNER;
1061 break;
1062
1063 // Compute shader
1064 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
1065 *name = TGSI_SEMANTIC_THREAD_ID;
1066 break;
1067 case SYSTEM_VALUE_WORK_GROUP_ID:
1068 *name = TGSI_SEMANTIC_BLOCK_ID;
1069 break;
1070 case SYSTEM_VALUE_NUM_WORK_GROUPS:
1071 *name = TGSI_SEMANTIC_GRID_SIZE;
1072 break;
1073 case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
1074 *name = TGSI_SEMANTIC_BLOCK_SIZE;
1075 break;
1076
1077 // ARB_shader_ballot
1078 case SYSTEM_VALUE_SUBGROUP_SIZE:
1079 *name = TGSI_SEMANTIC_SUBGROUP_SIZE;
1080 break;
1081 case SYSTEM_VALUE_SUBGROUP_INVOCATION:
1082 *name = TGSI_SEMANTIC_SUBGROUP_INVOCATION;
1083 break;
1084 case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
1085 *name = TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
1086 break;
1087 case SYSTEM_VALUE_SUBGROUP_GE_MASK:
1088 *name = TGSI_SEMANTIC_SUBGROUP_GE_MASK;
1089 break;
1090 case SYSTEM_VALUE_SUBGROUP_GT_MASK:
1091 *name = TGSI_SEMANTIC_SUBGROUP_GT_MASK;
1092 break;
1093 case SYSTEM_VALUE_SUBGROUP_LE_MASK:
1094 *name = TGSI_SEMANTIC_SUBGROUP_LE_MASK;
1095 break;
1096 case SYSTEM_VALUE_SUBGROUP_LT_MASK:
1097 *name = TGSI_SEMANTIC_SUBGROUP_LT_MASK;
1098 break;
1099
1100 default:
1101 ERROR("unknown system value %u\n", val);
1102 assert(false);
1103 break;
1104 }
1105 }
1106
1107 void
1108 Converter::setInterpolate(nv50_ir_varying *var,
1109 uint8_t mode,
1110 bool centroid,
1111 unsigned semantic)
1112 {
1113 switch (mode) {
1114 case INTERP_MODE_FLAT:
1115 var->flat = 1;
1116 break;
1117 case INTERP_MODE_NONE:
1118 if (semantic == TGSI_SEMANTIC_COLOR)
1119 var->sc = 1;
1120 else if (semantic == TGSI_SEMANTIC_POSITION)
1121 var->linear = 1;
1122 break;
1123 case INTERP_MODE_NOPERSPECTIVE:
1124 var->linear = 1;
1125 break;
1126 case INTERP_MODE_SMOOTH:
1127 break;
1128 }
1129 var->centroid = centroid;
1130 }
1131
1132 static uint16_t
1133 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
1134 bool input, const nir_variable *var)
1135 {
1136 if (!type->is_array())
1137 return type->count_attribute_slots(false);
1138
1139 uint16_t slots;
1140 switch (stage) {
1141 case Program::TYPE_GEOMETRY:
1142 slots = type->uniform_locations();
1143 if (input)
1144 slots /= info.gs.vertices_in;
1145 break;
1146 case Program::TYPE_TESSELLATION_CONTROL:
1147 case Program::TYPE_TESSELLATION_EVAL:
1148 // remove first dimension
1149 if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
1150 slots = type->uniform_locations();
1151 else
1152 slots = type->fields.array->uniform_locations();
1153 break;
1154 default:
1155 slots = type->count_attribute_slots(false);
1156 break;
1157 }
1158
1159 return slots;
1160 }
1161
1162 bool Converter::assignSlots() {
1163 unsigned name;
1164 unsigned index;
1165
1166 info->io.viewportId = -1;
1167 info->numInputs = 0;
1168 info->numOutputs = 0;
1169
1170 // we have to fixup the uniform locations for arrays
1171 unsigned numImages = 0;
1172 nir_foreach_variable(var, &nir->uniforms) {
1173 const glsl_type *type = var->type;
1174 if (!type->without_array()->is_image())
1175 continue;
1176 var->data.driver_location = numImages;
1177 numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
1178 }
1179
1180 info->numSysVals = 0;
1181 for (uint8_t i = 0; i < SYSTEM_VALUE_MAX; ++i) {
1182 if (!(nir->info.system_values_read & 1ull << i))
1183 continue;
1184
1185 system_val_to_tgsi_semantic(i, &name, &index);
1186 info->sv[info->numSysVals].sn = name;
1187 info->sv[info->numSysVals].si = index;
1188 info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
1189
1190 switch (i) {
1191 case SYSTEM_VALUE_INSTANCE_ID:
1192 info->io.instanceId = info->numSysVals;
1193 break;
1194 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1195 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1196 info->sv[info->numSysVals].patch = 1;
1197 break;
1198 case SYSTEM_VALUE_VERTEX_ID:
1199 info->io.vertexId = info->numSysVals;
1200 break;
1201 default:
1202 break;
1203 }
1204
1205 info->numSysVals += 1;
1206 }
1207
1208 if (prog->getType() == Program::TYPE_COMPUTE)
1209 return true;
1210
1211 nir_foreach_variable(var, &nir->inputs) {
1212 const glsl_type *type = var->type;
1213 int slot = var->data.location;
1214 uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
1215 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1216 : type->component_slots();
1217 uint32_t frac = var->data.location_frac;
1218 uint32_t vary = var->data.driver_location;
1219
1220 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1221 if (comp > 2)
1222 slots *= 2;
1223 }
1224
1225 assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
1226
1227 switch(prog->getType()) {
1228 case Program::TYPE_FRAGMENT:
1229 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1230 for (uint16_t i = 0; i < slots; ++i) {
1231 setInterpolate(&info->in[vary + i], var->data.interpolation,
1232 var->data.centroid | var->data.sample, name);
1233 }
1234 break;
1235 case Program::TYPE_GEOMETRY:
1236 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1237 break;
1238 case Program::TYPE_TESSELLATION_CONTROL:
1239 case Program::TYPE_TESSELLATION_EVAL:
1240 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1241 if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
1242 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1243 break;
1244 case Program::TYPE_VERTEX:
1245 vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1246 switch (name) {
1247 case TGSI_SEMANTIC_EDGEFLAG:
1248 info->io.edgeFlagIn = vary;
1249 break;
1250 default:
1251 break;
1252 }
1253 break;
1254 default:
1255 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1256 return false;
1257 }
1258
1259 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1260 info->in[vary].id = vary;
1261 info->in[vary].patch = var->data.patch;
1262 info->in[vary].sn = name;
1263 info->in[vary].si = index + i;
1264 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1265 if (i & 0x1)
1266 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1267 else
1268 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1269 else
1270 info->in[vary].mask |= ((1 << comp) - 1) << frac;
1271 }
1272 info->numInputs = std::max<uint8_t>(info->numInputs, vary);
1273 }
1274
1275 nir_foreach_variable(var, &nir->outputs) {
1276 const glsl_type *type = var->type;
1277 int slot = var->data.location;
1278 uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1279 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1280 : type->component_slots();
1281 uint32_t frac = var->data.location_frac;
1282 uint32_t vary = var->data.driver_location;
1283
1284 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1285 if (comp > 2)
1286 slots *= 2;
1287 }
1288
1289 assert(vary < PIPE_MAX_SHADER_OUTPUTS);
1290
1291 switch(prog->getType()) {
1292 case Program::TYPE_FRAGMENT:
1293 frag_result_to_tgsi_semantic((gl_frag_result)slot, &name, &index);
1294 switch (name) {
1295 case TGSI_SEMANTIC_COLOR:
1296 if (!var->data.fb_fetch_output)
1297 info->prop.fp.numColourResults++;
1298 info->prop.fp.separateFragData = true;
1299 // sometimes we get FRAG_RESULT_DATAX with data.index 0
1300 // sometimes we get FRAG_RESULT_DATA0 with data.index X
1301 index = index == 0 ? var->data.index : index;
1302 break;
1303 case TGSI_SEMANTIC_POSITION:
1304 info->io.fragDepth = vary;
1305 info->prop.fp.writesDepth = true;
1306 break;
1307 case TGSI_SEMANTIC_SAMPLEMASK:
1308 info->io.sampleMask = vary;
1309 break;
1310 default:
1311 break;
1312 }
1313 break;
1314 case Program::TYPE_GEOMETRY:
1315 case Program::TYPE_TESSELLATION_CONTROL:
1316 case Program::TYPE_TESSELLATION_EVAL:
1317 case Program::TYPE_VERTEX:
1318 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1319
1320 if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1321 name != TGSI_SEMANTIC_TESSOUTER)
1322 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1323
1324 switch (name) {
1325 case TGSI_SEMANTIC_CLIPDIST:
1326 info->io.genUserClip = -1;
1327 break;
1328 case TGSI_SEMANTIC_CLIPVERTEX:
1329 clipVertexOutput = vary;
1330 break;
1331 case TGSI_SEMANTIC_EDGEFLAG:
1332 info->io.edgeFlagOut = vary;
1333 break;
1334 case TGSI_SEMANTIC_POSITION:
1335 if (clipVertexOutput < 0)
1336 clipVertexOutput = vary;
1337 break;
1338 default:
1339 break;
1340 }
1341 break;
1342 default:
1343 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1344 return false;
1345 }
1346
1347 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1348 info->out[vary].id = vary;
1349 info->out[vary].patch = var->data.patch;
1350 info->out[vary].sn = name;
1351 info->out[vary].si = index + i;
1352 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1353 if (i & 0x1)
1354 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1355 else
1356 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1357 else
1358 info->out[vary].mask |= ((1 << comp) - 1) << frac;
1359
1360 if (nir->info.outputs_read & 1ull << slot)
1361 info->out[vary].oread = 1;
1362 }
1363 info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
1364 }
1365
1366 if (info->io.genUserClip > 0) {
1367 info->io.clipDistances = info->io.genUserClip;
1368
1369 const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1370
1371 for (unsigned int n = 0; n < nOut; ++n) {
1372 unsigned int i = info->numOutputs++;
1373 info->out[i].id = i;
1374 info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1375 info->out[i].si = n;
1376 info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1377 }
1378 }
1379
1380 return info->assignSlots(info) == 0;
1381 }
1382
1383 uint32_t
1384 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1385 {
1386 DataType ty;
1387 int offset = nir_intrinsic_component(insn);
1388 bool input;
1389
1390 if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1391 ty = getDType(insn);
1392 else
1393 ty = getSType(insn->src[0], false, false);
1394
1395 switch (insn->intrinsic) {
1396 case nir_intrinsic_load_input:
1397 case nir_intrinsic_load_interpolated_input:
1398 case nir_intrinsic_load_per_vertex_input:
1399 input = true;
1400 break;
1401 case nir_intrinsic_load_output:
1402 case nir_intrinsic_load_per_vertex_output:
1403 case nir_intrinsic_store_output:
1404 case nir_intrinsic_store_per_vertex_output:
1405 input = false;
1406 break;
1407 default:
1408 ERROR("unknown intrinsic in getSlotAddress %s",
1409 nir_intrinsic_infos[insn->intrinsic].name);
1410 input = false;
1411 assert(false);
1412 break;
1413 }
1414
1415 if (typeSizeof(ty) == 8) {
1416 slot *= 2;
1417 slot += offset;
1418 if (slot >= 4) {
1419 idx += 1;
1420 slot -= 4;
1421 }
1422 } else {
1423 slot += offset;
1424 }
1425
1426 assert(slot < 4);
1427 assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
1428 assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
1429
1430 const nv50_ir_varying *vary = input ? info->in : info->out;
1431 return vary[idx].slot[slot] * 4;
1432 }
1433
1434 Instruction *
1435 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1436 uint32_t base, uint8_t c, Value *indirect0,
1437 Value *indirect1, bool patch)
1438 {
1439 unsigned int tySize = typeSizeof(ty);
1440
1441 if (tySize == 8 &&
1442 (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
1443 Value *lo = getSSA();
1444 Value *hi = getSSA();
1445
1446 Instruction *loi =
1447 mkLoad(TYPE_U32, lo,
1448 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1449 indirect0);
1450 loi->setIndirect(0, 1, indirect1);
1451 loi->perPatch = patch;
1452
1453 Instruction *hii =
1454 mkLoad(TYPE_U32, hi,
1455 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1456 indirect0);
1457 hii->setIndirect(0, 1, indirect1);
1458 hii->perPatch = patch;
1459
1460 return mkOp2(OP_MERGE, ty, def, lo, hi);
1461 } else {
1462 Instruction *ld =
1463 mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1464 ld->setIndirect(0, 1, indirect1);
1465 ld->perPatch = patch;
1466 return ld;
1467 }
1468 }
1469
1470 void
1471 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1472 DataType ty, Value *src, uint8_t idx, uint8_t c,
1473 Value *indirect0, Value *indirect1)
1474 {
1475 uint8_t size = typeSizeof(ty);
1476 uint32_t address = getSlotAddress(insn, idx, c);
1477
1478 if (size == 8 && indirect0) {
1479 Value *split[2];
1480 mkSplit(split, 4, src);
1481
1482 if (op == OP_EXPORT) {
1483 split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1484 split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1485 }
1486
1487 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1488 split[0])->perPatch = info->out[idx].patch;
1489 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1490 split[1])->perPatch = info->out[idx].patch;
1491 } else {
1492 if (op == OP_EXPORT)
1493 src = mkMov(getSSA(size), src, ty)->getDef(0);
1494 mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1495 src)->perPatch = info->out[idx].patch;
1496 }
1497 }
1498
1499 bool
1500 Converter::parseNIR()
1501 {
1502 info->bin.tlsSpace = 0;
1503 info->io.clipDistances = nir->info.clip_distance_array_size;
1504 info->io.cullDistances = nir->info.cull_distance_array_size;
1505
1506 switch(prog->getType()) {
1507 case Program::TYPE_COMPUTE:
1508 info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
1509 info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
1510 info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
1511 info->bin.smemSize = nir->info.cs.shared_size;
1512 break;
1513 case Program::TYPE_FRAGMENT:
1514 info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1515 info->prop.fp.persampleInvocation =
1516 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
1517 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1518 info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1519 info->prop.fp.readsSampleLocations =
1520 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1521 info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
1522 info->prop.fp.usesSampleMaskIn =
1523 !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
1524 break;
1525 case Program::TYPE_GEOMETRY:
1526 info->prop.gp.inputPrim = nir->info.gs.input_primitive;
1527 info->prop.gp.instanceCount = nir->info.gs.invocations;
1528 info->prop.gp.maxVertices = nir->info.gs.vertices_out;
1529 info->prop.gp.outputPrim = nir->info.gs.output_primitive;
1530 break;
1531 case Program::TYPE_TESSELLATION_CONTROL:
1532 case Program::TYPE_TESSELLATION_EVAL:
1533 if (nir->info.tess.primitive_mode == GL_ISOLINES)
1534 info->prop.tp.domain = GL_LINES;
1535 else
1536 info->prop.tp.domain = nir->info.tess.primitive_mode;
1537 info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1538 info->prop.tp.outputPrim =
1539 nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1540 info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1541 info->prop.tp.winding = !nir->info.tess.ccw;
1542 break;
1543 case Program::TYPE_VERTEX:
1544 info->prop.vp.usesDrawParameters =
1545 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
1546 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
1547 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
1548 break;
1549 default:
1550 break;
1551 }
1552
1553 return true;
1554 }
1555
1556 bool
1557 Converter::visit(nir_function *function)
1558 {
1559 // we only support emiting the main function for now
1560 assert(!strcmp(function->name, "main"));
1561 assert(function->impl);
1562
1563 // usually the blocks will set everything up, but main is special
1564 BasicBlock *entry = new BasicBlock(prog->main);
1565 exit = new BasicBlock(prog->main);
1566 blocks[nir_start_block(function->impl)->index] = entry;
1567 prog->main->setEntry(entry);
1568 prog->main->setExit(exit);
1569
1570 setPosition(entry, true);
1571
1572 if (info->io.genUserClip > 0) {
1573 for (int c = 0; c < 4; ++c)
1574 clipVtx[c] = getScratch();
1575 }
1576
1577 switch (prog->getType()) {
1578 case Program::TYPE_TESSELLATION_CONTROL:
1579 outBase = mkOp2v(
1580 OP_SUB, TYPE_U32, getSSA(),
1581 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1582 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1583 break;
1584 case Program::TYPE_FRAGMENT: {
1585 Symbol *sv = mkSysVal(SV_POSITION, 3);
1586 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1587 fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1588 break;
1589 }
1590 default:
1591 break;
1592 }
1593
1594 nir_foreach_register(reg, &function->impl->registers) {
1595 if (reg->num_array_elems) {
1596 // TODO: packed variables would be nice, but MemoryOpt fails
1597 // replace 4 with reg->num_components
1598 uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8);
1599 regToLmemOffset[reg->index] = info->bin.tlsSpace;
1600 info->bin.tlsSpace += size;
1601 }
1602 }
1603
1604 nir_index_ssa_defs(function->impl);
1605 foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1606 if (!visit(node))
1607 return false;
1608 }
1609
1610 bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1611 setPosition(exit, true);
1612
1613 if ((prog->getType() == Program::TYPE_VERTEX ||
1614 prog->getType() == Program::TYPE_TESSELLATION_EVAL)
1615 && info->io.genUserClip > 0)
1616 handleUserClipPlanes();
1617
1618 // TODO: for non main function this needs to be a OP_RETURN
1619 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1620 return true;
1621 }
1622
1623 bool
1624 Converter::visit(nir_cf_node *node)
1625 {
1626 switch (node->type) {
1627 case nir_cf_node_block:
1628 return visit(nir_cf_node_as_block(node));
1629 case nir_cf_node_if:
1630 return visit(nir_cf_node_as_if(node));
1631 case nir_cf_node_loop:
1632 return visit(nir_cf_node_as_loop(node));
1633 default:
1634 ERROR("unknown nir_cf_node type %u\n", node->type);
1635 return false;
1636 }
1637 }
1638
1639 bool
1640 Converter::visit(nir_block *block)
1641 {
1642 if (!block->predecessors->entries && block->instr_list.is_empty())
1643 return true;
1644
1645 BasicBlock *bb = convert(block);
1646
1647 setPosition(bb, true);
1648 nir_foreach_instr(insn, block) {
1649 if (!visit(insn))
1650 return false;
1651 }
1652 return true;
1653 }
1654
1655 bool
1656 Converter::visit(nir_if *nif)
1657 {
1658 DataType sType = getSType(nif->condition, false, false);
1659 Value *src = getSrc(&nif->condition, 0);
1660
1661 nir_block *lastThen = nir_if_last_then_block(nif);
1662 nir_block *lastElse = nir_if_last_else_block(nif);
1663
1664 assert(!lastThen->successors[1]);
1665 assert(!lastElse->successors[1]);
1666
1667 BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1668 BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1669
1670 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1671 bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1672
1673 // we only insert joinats, if both nodes end up at the end of the if again.
1674 // the reason for this to not happens are breaks/continues/ret/... which
1675 // have their own handling
1676 if (lastThen->successors[0] == lastElse->successors[0])
1677 bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
1678 CC_ALWAYS, NULL);
1679
1680 mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1681
1682 foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1683 if (!visit(node))
1684 return false;
1685 }
1686 setPosition(convert(lastThen), true);
1687 if (!bb->getExit() ||
1688 !bb->getExit()->asFlow() ||
1689 bb->getExit()->asFlow()->op == OP_JOIN) {
1690 BasicBlock *tailBB = convert(lastThen->successors[0]);
1691 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1692 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1693 }
1694
1695 foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1696 if (!visit(node))
1697 return false;
1698 }
1699 setPosition(convert(lastElse), true);
1700 if (!bb->getExit() ||
1701 !bb->getExit()->asFlow() ||
1702 bb->getExit()->asFlow()->op == OP_JOIN) {
1703 BasicBlock *tailBB = convert(lastElse->successors[0]);
1704 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1705 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1706 }
1707
1708 if (lastThen->successors[0] == lastElse->successors[0]) {
1709 setPosition(convert(lastThen->successors[0]), true);
1710 mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1711 }
1712
1713 return true;
1714 }
1715
1716 bool
1717 Converter::visit(nir_loop *loop)
1718 {
1719 curLoopDepth += 1;
1720 func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1721
1722 BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1723 BasicBlock *tailBB =
1724 convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1725 bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1726
1727 mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1728 setPosition(loopBB, false);
1729 mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1730
1731 foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1732 if (!visit(node))
1733 return false;
1734 }
1735 Instruction *insn = bb->getExit();
1736 if (bb->cfg.incidentCount() != 0) {
1737 if (!insn || !insn->asFlow()) {
1738 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1739 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1740 } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
1741 tailBB->cfg.incidentCount() == 0) {
1742 // RA doesn't like having blocks around with no incident edge,
1743 // so we create a fake one to make it happy
1744 bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1745 }
1746 }
1747
1748 curLoopDepth -= 1;
1749
1750 return true;
1751 }
1752
1753 bool
1754 Converter::visit(nir_instr *insn)
1755 {
1756 // we need an insertion point for on the fly generated immediate loads
1757 immInsertPos = bb->getExit();
1758 switch (insn->type) {
1759 case nir_instr_type_alu:
1760 return visit(nir_instr_as_alu(insn));
1761 case nir_instr_type_deref:
1762 return visit(nir_instr_as_deref(insn));
1763 case nir_instr_type_intrinsic:
1764 return visit(nir_instr_as_intrinsic(insn));
1765 case nir_instr_type_jump:
1766 return visit(nir_instr_as_jump(insn));
1767 case nir_instr_type_load_const:
1768 return visit(nir_instr_as_load_const(insn));
1769 case nir_instr_type_ssa_undef:
1770 return visit(nir_instr_as_ssa_undef(insn));
1771 case nir_instr_type_tex:
1772 return visit(nir_instr_as_tex(insn));
1773 default:
1774 ERROR("unknown nir_instr type %u\n", insn->type);
1775 return false;
1776 }
1777 return true;
1778 }
1779
1780 SVSemantic
1781 Converter::convert(nir_intrinsic_op intr)
1782 {
1783 switch (intr) {
1784 case nir_intrinsic_load_base_vertex:
1785 return SV_BASEVERTEX;
1786 case nir_intrinsic_load_base_instance:
1787 return SV_BASEINSTANCE;
1788 case nir_intrinsic_load_draw_id:
1789 return SV_DRAWID;
1790 case nir_intrinsic_load_front_face:
1791 return SV_FACE;
1792 case nir_intrinsic_load_helper_invocation:
1793 return SV_THREAD_KILL;
1794 case nir_intrinsic_load_instance_id:
1795 return SV_INSTANCE_ID;
1796 case nir_intrinsic_load_invocation_id:
1797 return SV_INVOCATION_ID;
1798 case nir_intrinsic_load_local_group_size:
1799 return SV_NTID;
1800 case nir_intrinsic_load_local_invocation_id:
1801 return SV_TID;
1802 case nir_intrinsic_load_num_work_groups:
1803 return SV_NCTAID;
1804 case nir_intrinsic_load_patch_vertices_in:
1805 return SV_VERTEX_COUNT;
1806 case nir_intrinsic_load_primitive_id:
1807 return SV_PRIMITIVE_ID;
1808 case nir_intrinsic_load_sample_id:
1809 return SV_SAMPLE_INDEX;
1810 case nir_intrinsic_load_sample_mask_in:
1811 return SV_SAMPLE_MASK;
1812 case nir_intrinsic_load_sample_pos:
1813 return SV_SAMPLE_POS;
1814 case nir_intrinsic_load_subgroup_eq_mask:
1815 return SV_LANEMASK_EQ;
1816 case nir_intrinsic_load_subgroup_ge_mask:
1817 return SV_LANEMASK_GE;
1818 case nir_intrinsic_load_subgroup_gt_mask:
1819 return SV_LANEMASK_GT;
1820 case nir_intrinsic_load_subgroup_le_mask:
1821 return SV_LANEMASK_LE;
1822 case nir_intrinsic_load_subgroup_lt_mask:
1823 return SV_LANEMASK_LT;
1824 case nir_intrinsic_load_subgroup_invocation:
1825 return SV_LANEID;
1826 case nir_intrinsic_load_tess_coord:
1827 return SV_TESS_COORD;
1828 case nir_intrinsic_load_tess_level_inner:
1829 return SV_TESS_INNER;
1830 case nir_intrinsic_load_tess_level_outer:
1831 return SV_TESS_OUTER;
1832 case nir_intrinsic_load_vertex_id:
1833 return SV_VERTEX_ID;
1834 case nir_intrinsic_load_work_group_id:
1835 return SV_CTAID;
1836 default:
1837 ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1838 nir_intrinsic_infos[intr].name);
1839 assert(false);
1840 return SV_LAST;
1841 }
1842 }
1843
1844 ImgFormat
1845 Converter::convertGLImgFormat(GLuint format)
1846 {
1847 #define FMT_CASE(a, b) \
1848 case GL_ ## a: return nv50_ir::FMT_ ## b
1849
1850 switch (format) {
1851 FMT_CASE(NONE, NONE);
1852
1853 FMT_CASE(RGBA32F, RGBA32F);
1854 FMT_CASE(RGBA16F, RGBA16F);
1855 FMT_CASE(RG32F, RG32F);
1856 FMT_CASE(RG16F, RG16F);
1857 FMT_CASE(R11F_G11F_B10F, R11G11B10F);
1858 FMT_CASE(R32F, R32F);
1859 FMT_CASE(R16F, R16F);
1860
1861 FMT_CASE(RGBA32UI, RGBA32UI);
1862 FMT_CASE(RGBA16UI, RGBA16UI);
1863 FMT_CASE(RGB10_A2UI, RGB10A2UI);
1864 FMT_CASE(RGBA8UI, RGBA8UI);
1865 FMT_CASE(RG32UI, RG32UI);
1866 FMT_CASE(RG16UI, RG16UI);
1867 FMT_CASE(RG8UI, RG8UI);
1868 FMT_CASE(R32UI, R32UI);
1869 FMT_CASE(R16UI, R16UI);
1870 FMT_CASE(R8UI, R8UI);
1871
1872 FMT_CASE(RGBA32I, RGBA32I);
1873 FMT_CASE(RGBA16I, RGBA16I);
1874 FMT_CASE(RGBA8I, RGBA8I);
1875 FMT_CASE(RG32I, RG32I);
1876 FMT_CASE(RG16I, RG16I);
1877 FMT_CASE(RG8I, RG8I);
1878 FMT_CASE(R32I, R32I);
1879 FMT_CASE(R16I, R16I);
1880 FMT_CASE(R8I, R8I);
1881
1882 FMT_CASE(RGBA16, RGBA16);
1883 FMT_CASE(RGB10_A2, RGB10A2);
1884 FMT_CASE(RGBA8, RGBA8);
1885 FMT_CASE(RG16, RG16);
1886 FMT_CASE(RG8, RG8);
1887 FMT_CASE(R16, R16);
1888 FMT_CASE(R8, R8);
1889
1890 FMT_CASE(RGBA16_SNORM, RGBA16_SNORM);
1891 FMT_CASE(RGBA8_SNORM, RGBA8_SNORM);
1892 FMT_CASE(RG16_SNORM, RG16_SNORM);
1893 FMT_CASE(RG8_SNORM, RG8_SNORM);
1894 FMT_CASE(R16_SNORM, R16_SNORM);
1895 FMT_CASE(R8_SNORM, R8_SNORM);
1896
1897 FMT_CASE(BGRA_INTEGER, BGRA8);
1898 default:
1899 ERROR("unknown format %x\n", format);
1900 assert(false);
1901 return nv50_ir::FMT_NONE;
1902 }
1903 #undef FMT_CASE
1904 }
1905
1906 bool
1907 Converter::visit(nir_intrinsic_instr *insn)
1908 {
1909 nir_intrinsic_op op = insn->intrinsic;
1910 const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op];
1911
1912 switch (op) {
1913 case nir_intrinsic_load_uniform: {
1914 LValues &newDefs = convert(&insn->dest);
1915 const DataType dType = getDType(insn);
1916 Value *indirect;
1917 uint32_t coffset = getIndirect(insn, 0, 0, indirect);
1918 for (uint8_t i = 0; i < insn->num_components; ++i) {
1919 loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
1920 }
1921 break;
1922 }
1923 case nir_intrinsic_store_output:
1924 case nir_intrinsic_store_per_vertex_output: {
1925 Value *indirect;
1926 DataType dType = getSType(insn->src[0], false, false);
1927 uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1928
1929 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1930 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1931 continue;
1932
1933 uint8_t offset = 0;
1934 Value *src = getSrc(&insn->src[0], i);
1935 switch (prog->getType()) {
1936 case Program::TYPE_FRAGMENT: {
1937 if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1938 // TGSI uses a different interface than NIR, TGSI stores that
1939 // value in the z component, NIR in X
1940 offset += 2;
1941 src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1942 }
1943 break;
1944 }
1945 case Program::TYPE_GEOMETRY:
1946 case Program::TYPE_VERTEX: {
1947 if (info->io.genUserClip > 0 && idx == clipVertexOutput) {
1948 mkMov(clipVtx[i], src);
1949 src = clipVtx[i];
1950 }
1951 break;
1952 }
1953 default:
1954 break;
1955 }
1956
1957 storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1958 }
1959 break;
1960 }
1961 case nir_intrinsic_load_input:
1962 case nir_intrinsic_load_interpolated_input:
1963 case nir_intrinsic_load_output: {
1964 LValues &newDefs = convert(&insn->dest);
1965
1966 // FBFetch
1967 if (prog->getType() == Program::TYPE_FRAGMENT &&
1968 op == nir_intrinsic_load_output) {
1969 std::vector<Value*> defs, srcs;
1970 uint8_t mask = 0;
1971
1972 srcs.push_back(getSSA());
1973 srcs.push_back(getSSA());
1974 Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1975 Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1976 mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1977 mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1978
1979 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1980 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1981
1982 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1983 defs.push_back(newDefs[i]);
1984 mask |= 1 << i;
1985 }
1986
1987 TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
1988 texi->tex.levelZero = 1;
1989 texi->tex.mask = mask;
1990 texi->tex.useOffsets = 0;
1991 texi->tex.r = 0xffff;
1992 texi->tex.s = 0xffff;
1993
1994 info->prop.fp.readsFramebuffer = true;
1995 break;
1996 }
1997
1998 const DataType dType = getDType(insn);
1999 Value *indirect;
2000 bool input = op != nir_intrinsic_load_output;
2001 operation nvirOp;
2002 uint32_t mode = 0;
2003
2004 uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
2005 nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
2006
2007 // see load_barycentric_* handling
2008 if (prog->getType() == Program::TYPE_FRAGMENT) {
2009 mode = translateInterpMode(&vary, nvirOp);
2010 if (op == nir_intrinsic_load_interpolated_input) {
2011 ImmediateValue immMode;
2012 if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
2013 mode |= immMode.reg.data.u32;
2014 }
2015 }
2016
2017 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2018 uint32_t address = getSlotAddress(insn, idx, i);
2019 Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
2020 if (prog->getType() == Program::TYPE_FRAGMENT) {
2021 int s = 1;
2022 if (typeSizeof(dType) == 8) {
2023 Value *lo = getSSA();
2024 Value *hi = getSSA();
2025 Instruction *interp;
2026
2027 interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
2028 if (nvirOp == OP_PINTERP)
2029 interp->setSrc(s++, fp.position);
2030 if (mode & NV50_IR_INTERP_OFFSET)
2031 interp->setSrc(s++, getSrc(&insn->src[0], 0));
2032 interp->setInterpolate(mode);
2033 interp->setIndirect(0, 0, indirect);
2034
2035 Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
2036 interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
2037 if (nvirOp == OP_PINTERP)
2038 interp->setSrc(s++, fp.position);
2039 if (mode & NV50_IR_INTERP_OFFSET)
2040 interp->setSrc(s++, getSrc(&insn->src[0], 0));
2041 interp->setInterpolate(mode);
2042 interp->setIndirect(0, 0, indirect);
2043
2044 mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
2045 } else {
2046 Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
2047 if (nvirOp == OP_PINTERP)
2048 interp->setSrc(s++, fp.position);
2049 if (mode & NV50_IR_INTERP_OFFSET)
2050 interp->setSrc(s++, getSrc(&insn->src[0], 0));
2051 interp->setInterpolate(mode);
2052 interp->setIndirect(0, 0, indirect);
2053 }
2054 } else {
2055 mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
2056 }
2057 }
2058 break;
2059 }
2060 case nir_intrinsic_load_barycentric_at_offset:
2061 case nir_intrinsic_load_barycentric_at_sample:
2062 case nir_intrinsic_load_barycentric_centroid:
2063 case nir_intrinsic_load_barycentric_pixel:
2064 case nir_intrinsic_load_barycentric_sample: {
2065 LValues &newDefs = convert(&insn->dest);
2066 uint32_t mode;
2067
2068 if (op == nir_intrinsic_load_barycentric_centroid ||
2069 op == nir_intrinsic_load_barycentric_sample) {
2070 mode = NV50_IR_INTERP_CENTROID;
2071 } else if (op == nir_intrinsic_load_barycentric_at_offset) {
2072 Value *offs[2];
2073 for (uint8_t c = 0; c < 2; c++) {
2074 offs[c] = getScratch();
2075 mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
2076 mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
2077 mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
2078 mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
2079 }
2080 mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
2081
2082 mode = NV50_IR_INTERP_OFFSET;
2083 } else if (op == nir_intrinsic_load_barycentric_pixel) {
2084 mode = NV50_IR_INTERP_DEFAULT;
2085 } else if (op == nir_intrinsic_load_barycentric_at_sample) {
2086 info->prop.fp.readsSampleLocations = true;
2087 mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
2088 mode = NV50_IR_INTERP_OFFSET;
2089 } else {
2090 unreachable("all intrinsics already handled above");
2091 }
2092
2093 loadImm(newDefs[1], mode);
2094 break;
2095 }
2096 case nir_intrinsic_discard:
2097 mkOp(OP_DISCARD, TYPE_NONE, NULL);
2098 break;
2099 case nir_intrinsic_discard_if: {
2100 Value *pred = getSSA(1, FILE_PREDICATE);
2101 if (insn->num_components > 1) {
2102 ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
2103 assert(false);
2104 return false;
2105 }
2106 mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2107 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
2108 break;
2109 }
2110 case nir_intrinsic_load_base_vertex:
2111 case nir_intrinsic_load_base_instance:
2112 case nir_intrinsic_load_draw_id:
2113 case nir_intrinsic_load_front_face:
2114 case nir_intrinsic_load_helper_invocation:
2115 case nir_intrinsic_load_instance_id:
2116 case nir_intrinsic_load_invocation_id:
2117 case nir_intrinsic_load_local_group_size:
2118 case nir_intrinsic_load_local_invocation_id:
2119 case nir_intrinsic_load_num_work_groups:
2120 case nir_intrinsic_load_patch_vertices_in:
2121 case nir_intrinsic_load_primitive_id:
2122 case nir_intrinsic_load_sample_id:
2123 case nir_intrinsic_load_sample_mask_in:
2124 case nir_intrinsic_load_sample_pos:
2125 case nir_intrinsic_load_subgroup_eq_mask:
2126 case nir_intrinsic_load_subgroup_ge_mask:
2127 case nir_intrinsic_load_subgroup_gt_mask:
2128 case nir_intrinsic_load_subgroup_le_mask:
2129 case nir_intrinsic_load_subgroup_lt_mask:
2130 case nir_intrinsic_load_subgroup_invocation:
2131 case nir_intrinsic_load_tess_coord:
2132 case nir_intrinsic_load_tess_level_inner:
2133 case nir_intrinsic_load_tess_level_outer:
2134 case nir_intrinsic_load_vertex_id:
2135 case nir_intrinsic_load_work_group_id: {
2136 const DataType dType = getDType(insn);
2137 SVSemantic sv = convert(op);
2138 LValues &newDefs = convert(&insn->dest);
2139
2140 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2141 Value *def;
2142 if (typeSizeof(dType) == 8)
2143 def = getSSA();
2144 else
2145 def = newDefs[i];
2146
2147 if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
2148 loadImm(def, 0u);
2149 } else {
2150 Symbol *sym = mkSysVal(sv, i);
2151 Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
2152 if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
2153 rdsv->perPatch = 1;
2154 }
2155
2156 if (typeSizeof(dType) == 8)
2157 mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
2158 }
2159 break;
2160 }
2161 // constants
2162 case nir_intrinsic_load_subgroup_size: {
2163 LValues &newDefs = convert(&insn->dest);
2164 loadImm(newDefs[0], 32u);
2165 break;
2166 }
2167 case nir_intrinsic_vote_all:
2168 case nir_intrinsic_vote_any:
2169 case nir_intrinsic_vote_ieq: {
2170 LValues &newDefs = convert(&insn->dest);
2171 Value *pred = getScratch(1, FILE_PREDICATE);
2172 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2173 mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
2174 mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
2175 break;
2176 }
2177 case nir_intrinsic_ballot: {
2178 LValues &newDefs = convert(&insn->dest);
2179 Value *pred = getSSA(1, FILE_PREDICATE);
2180 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2181 mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;
2182 break;
2183 }
2184 case nir_intrinsic_read_first_invocation:
2185 case nir_intrinsic_read_invocation: {
2186 LValues &newDefs = convert(&insn->dest);
2187 const DataType dType = getDType(insn);
2188 Value *tmp = getScratch();
2189
2190 if (op == nir_intrinsic_read_first_invocation) {
2191 mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
2192 mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2193 mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2194 } else
2195 tmp = getSrc(&insn->src[1], 0);
2196
2197 for (uint8_t i = 0; i < insn->num_components; ++i) {
2198 mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
2199 ->subOp = NV50_IR_SUBOP_SHFL_IDX;
2200 }
2201 break;
2202 }
2203 case nir_intrinsic_load_per_vertex_input: {
2204 const DataType dType = getDType(insn);
2205 LValues &newDefs = convert(&insn->dest);
2206 Value *indirectVertex;
2207 Value *indirectOffset;
2208 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
2209 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2210
2211 Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
2212 mkImm(baseVertex), indirectVertex);
2213 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2214 uint32_t address = getSlotAddress(insn, idx, i);
2215 loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
2216 indirectOffset, vtxBase, info->in[idx].patch);
2217 }
2218 break;
2219 }
2220 case nir_intrinsic_load_per_vertex_output: {
2221 const DataType dType = getDType(insn);
2222 LValues &newDefs = convert(&insn->dest);
2223 Value *indirectVertex;
2224 Value *indirectOffset;
2225 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
2226 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2227 Value *vtxBase = NULL;
2228
2229 if (indirectVertex)
2230 vtxBase = indirectVertex;
2231 else
2232 vtxBase = loadImm(NULL, baseVertex);
2233
2234 vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase);
2235
2236 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2237 uint32_t address = getSlotAddress(insn, idx, i);
2238 loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
2239 indirectOffset, vtxBase, info->in[idx].patch);
2240 }
2241 break;
2242 }
2243 case nir_intrinsic_emit_vertex:
2244 if (info->io.genUserClip > 0)
2245 handleUserClipPlanes();
2246 // fallthrough
2247 case nir_intrinsic_end_primitive: {
2248 uint32_t idx = nir_intrinsic_stream_id(insn);
2249 mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
2250 break;
2251 }
2252 case nir_intrinsic_load_ubo: {
2253 const DataType dType = getDType(insn);
2254 LValues &newDefs = convert(&insn->dest);
2255 Value *indirectIndex;
2256 Value *indirectOffset;
2257 uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1;
2258 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2259
2260 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2261 loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
2262 indirectOffset, indirectIndex);
2263 }
2264 break;
2265 }
2266 case nir_intrinsic_get_buffer_size: {
2267 LValues &newDefs = convert(&insn->dest);
2268 const DataType dType = getDType(insn);
2269 Value *indirectBuffer;
2270 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2271
2272 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0);
2273 mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, indirectBuffer);
2274 break;
2275 }
2276 case nir_intrinsic_store_ssbo: {
2277 DataType sType = getSType(insn->src[0], false, false);
2278 Value *indirectBuffer;
2279 Value *indirectOffset;
2280 uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer);
2281 uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset);
2282
2283 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2284 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2285 continue;
2286 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
2287 offset + i * typeSizeof(sType));
2288 mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i))
2289 ->setIndirect(0, 1, indirectBuffer);
2290 }
2291 info->io.globalAccess |= 0x2;
2292 break;
2293 }
2294 case nir_intrinsic_load_ssbo: {
2295 const DataType dType = getDType(insn);
2296 LValues &newDefs = convert(&insn->dest);
2297 Value *indirectBuffer;
2298 Value *indirectOffset;
2299 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2300 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2301
2302 for (uint8_t i = 0u; i < insn->num_components; ++i)
2303 loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
2304 indirectOffset, indirectBuffer);
2305
2306 info->io.globalAccess |= 0x1;
2307 break;
2308 }
2309 case nir_intrinsic_shared_atomic_add:
2310 case nir_intrinsic_shared_atomic_and:
2311 case nir_intrinsic_shared_atomic_comp_swap:
2312 case nir_intrinsic_shared_atomic_exchange:
2313 case nir_intrinsic_shared_atomic_or:
2314 case nir_intrinsic_shared_atomic_imax:
2315 case nir_intrinsic_shared_atomic_imin:
2316 case nir_intrinsic_shared_atomic_umax:
2317 case nir_intrinsic_shared_atomic_umin:
2318 case nir_intrinsic_shared_atomic_xor: {
2319 const DataType dType = getDType(insn);
2320 LValues &newDefs = convert(&insn->dest);
2321 Value *indirectOffset;
2322 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2323 Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, dType, offset);
2324 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2325 if (op == nir_intrinsic_shared_atomic_comp_swap)
2326 atom->setSrc(2, getSrc(&insn->src[2], 0));
2327 atom->setIndirect(0, 0, indirectOffset);
2328 atom->subOp = getSubOp(op);
2329 break;
2330 }
2331 case nir_intrinsic_ssbo_atomic_add:
2332 case nir_intrinsic_ssbo_atomic_and:
2333 case nir_intrinsic_ssbo_atomic_comp_swap:
2334 case nir_intrinsic_ssbo_atomic_exchange:
2335 case nir_intrinsic_ssbo_atomic_or:
2336 case nir_intrinsic_ssbo_atomic_imax:
2337 case nir_intrinsic_ssbo_atomic_imin:
2338 case nir_intrinsic_ssbo_atomic_umax:
2339 case nir_intrinsic_ssbo_atomic_umin:
2340 case nir_intrinsic_ssbo_atomic_xor: {
2341 const DataType dType = getDType(insn);
2342 LValues &newDefs = convert(&insn->dest);
2343 Value *indirectBuffer;
2344 Value *indirectOffset;
2345 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2346 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2347
2348 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset);
2349 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym,
2350 getSrc(&insn->src[2], 0));
2351 if (op == nir_intrinsic_ssbo_atomic_comp_swap)
2352 atom->setSrc(2, getSrc(&insn->src[3], 0));
2353 atom->setIndirect(0, 0, indirectOffset);
2354 atom->setIndirect(0, 1, indirectBuffer);
2355 atom->subOp = getSubOp(op);
2356
2357 info->io.globalAccess |= 0x2;
2358 break;
2359 }
2360 case nir_intrinsic_bindless_image_atomic_add:
2361 case nir_intrinsic_bindless_image_atomic_and:
2362 case nir_intrinsic_bindless_image_atomic_comp_swap:
2363 case nir_intrinsic_bindless_image_atomic_exchange:
2364 case nir_intrinsic_bindless_image_atomic_max:
2365 case nir_intrinsic_bindless_image_atomic_min:
2366 case nir_intrinsic_bindless_image_atomic_or:
2367 case nir_intrinsic_bindless_image_atomic_xor:
2368 case nir_intrinsic_bindless_image_load:
2369 case nir_intrinsic_bindless_image_samples:
2370 case nir_intrinsic_bindless_image_size:
2371 case nir_intrinsic_bindless_image_store: {
2372 std::vector<Value*> srcs, defs;
2373 Value *indirect = getSrc(&insn->src[0], 0);
2374 DataType ty;
2375
2376 uint32_t mask = 0;
2377 TexInstruction::Target target =
2378 convert(nir_intrinsic_image_dim(insn), !!nir_intrinsic_image_array(insn), false);
2379 unsigned int argCount = getNIRArgCount(target);
2380 uint16_t location = 0;
2381
2382 if (opInfo.has_dest) {
2383 LValues &newDefs = convert(&insn->dest);
2384 for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2385 defs.push_back(newDefs[i]);
2386 mask |= 1 << i;
2387 }
2388 }
2389
2390 switch (op) {
2391 case nir_intrinsic_bindless_image_atomic_add:
2392 case nir_intrinsic_bindless_image_atomic_and:
2393 case nir_intrinsic_bindless_image_atomic_comp_swap:
2394 case nir_intrinsic_bindless_image_atomic_exchange:
2395 case nir_intrinsic_bindless_image_atomic_max:
2396 case nir_intrinsic_bindless_image_atomic_min:
2397 case nir_intrinsic_bindless_image_atomic_or:
2398 case nir_intrinsic_bindless_image_atomic_xor:
2399 ty = getDType(insn);
2400 mask = 0x1;
2401 info->io.globalAccess |= 0x2;
2402 break;
2403 case nir_intrinsic_bindless_image_load:
2404 ty = TYPE_U32;
2405 info->io.globalAccess |= 0x1;
2406 break;
2407 case nir_intrinsic_bindless_image_store:
2408 ty = TYPE_U32;
2409 mask = 0xf;
2410 info->io.globalAccess |= 0x2;
2411 break;
2412 case nir_intrinsic_bindless_image_samples:
2413 mask = 0x8;
2414 ty = TYPE_U32;
2415 break;
2416 case nir_intrinsic_bindless_image_size:
2417 ty = TYPE_U32;
2418 break;
2419 default:
2420 unreachable("unhandled image opcode");
2421 break;
2422 }
2423
2424 // coords
2425 if (opInfo.num_srcs >= 2)
2426 for (unsigned int i = 0u; i < argCount; ++i)
2427 srcs.push_back(getSrc(&insn->src[1], i));
2428
2429 // the sampler is just another src added after coords
2430 if (opInfo.num_srcs >= 3 && target.isMS())
2431 srcs.push_back(getSrc(&insn->src[2], 0));
2432
2433 if (opInfo.num_srcs >= 4) {
2434 unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2435 for (uint8_t i = 0u; i < components; ++i)
2436 srcs.push_back(getSrc(&insn->src[3], i));
2437 }
2438
2439 if (opInfo.num_srcs >= 5)
2440 // 1 for aotmic swap
2441 for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2442 srcs.push_back(getSrc(&insn->src[4], i));
2443
2444 TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2445 texi->tex.bindless = false;
2446 texi->tex.format = &nv50_ir::TexInstruction::formatTable[convertGLImgFormat(nir_intrinsic_format(insn))];
2447 texi->tex.mask = mask;
2448 texi->tex.bindless = true;
2449 texi->cache = convert(nir_intrinsic_access(insn));
2450 texi->setType(ty);
2451 texi->subOp = getSubOp(op);
2452
2453 if (indirect)
2454 texi->setIndirectR(indirect);
2455
2456 break;
2457 }
2458 case nir_intrinsic_image_deref_atomic_add:
2459 case nir_intrinsic_image_deref_atomic_and:
2460 case nir_intrinsic_image_deref_atomic_comp_swap:
2461 case nir_intrinsic_image_deref_atomic_exchange:
2462 case nir_intrinsic_image_deref_atomic_max:
2463 case nir_intrinsic_image_deref_atomic_min:
2464 case nir_intrinsic_image_deref_atomic_or:
2465 case nir_intrinsic_image_deref_atomic_xor:
2466 case nir_intrinsic_image_deref_load:
2467 case nir_intrinsic_image_deref_samples:
2468 case nir_intrinsic_image_deref_size:
2469 case nir_intrinsic_image_deref_store: {
2470 const nir_variable *tex;
2471 std::vector<Value*> srcs, defs;
2472 Value *indirect;
2473 DataType ty;
2474
2475 uint32_t mask = 0;
2476 nir_deref_instr *deref = nir_src_as_deref(insn->src[0]);
2477 const glsl_type *type = deref->type;
2478 TexInstruction::Target target =
2479 convert((glsl_sampler_dim)type->sampler_dimensionality,
2480 type->sampler_array, type->sampler_shadow);
2481 unsigned int argCount = getNIRArgCount(target);
2482 uint16_t location = handleDeref(deref, indirect, tex);
2483
2484 if (opInfo.has_dest) {
2485 LValues &newDefs = convert(&insn->dest);
2486 for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2487 defs.push_back(newDefs[i]);
2488 mask |= 1 << i;
2489 }
2490 }
2491
2492 switch (op) {
2493 case nir_intrinsic_image_deref_atomic_add:
2494 case nir_intrinsic_image_deref_atomic_and:
2495 case nir_intrinsic_image_deref_atomic_comp_swap:
2496 case nir_intrinsic_image_deref_atomic_exchange:
2497 case nir_intrinsic_image_deref_atomic_max:
2498 case nir_intrinsic_image_deref_atomic_min:
2499 case nir_intrinsic_image_deref_atomic_or:
2500 case nir_intrinsic_image_deref_atomic_xor:
2501 ty = getDType(insn);
2502 mask = 0x1;
2503 info->io.globalAccess |= 0x2;
2504 break;
2505 case nir_intrinsic_image_deref_load:
2506 ty = TYPE_U32;
2507 info->io.globalAccess |= 0x1;
2508 break;
2509 case nir_intrinsic_image_deref_store:
2510 ty = TYPE_U32;
2511 mask = 0xf;
2512 info->io.globalAccess |= 0x2;
2513 break;
2514 case nir_intrinsic_image_deref_samples:
2515 mask = 0x8;
2516 ty = TYPE_U32;
2517 break;
2518 case nir_intrinsic_image_deref_size:
2519 ty = TYPE_U32;
2520 break;
2521 default:
2522 unreachable("unhandled image opcode");
2523 break;
2524 }
2525
2526 // coords
2527 if (opInfo.num_srcs >= 2)
2528 for (unsigned int i = 0u; i < argCount; ++i)
2529 srcs.push_back(getSrc(&insn->src[1], i));
2530
2531 // the sampler is just another src added after coords
2532 if (opInfo.num_srcs >= 3 && target.isMS())
2533 srcs.push_back(getSrc(&insn->src[2], 0));
2534
2535 if (opInfo.num_srcs >= 4) {
2536 unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2537 for (uint8_t i = 0u; i < components; ++i)
2538 srcs.push_back(getSrc(&insn->src[3], i));
2539 }
2540
2541 if (opInfo.num_srcs >= 5)
2542 // 1 for aotmic swap
2543 for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2544 srcs.push_back(getSrc(&insn->src[4], i));
2545
2546 TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2547 texi->tex.bindless = false;
2548 texi->tex.format = &nv50_ir::TexInstruction::formatTable[convertGLImgFormat(tex->data.image.format)];
2549 texi->tex.mask = mask;
2550 texi->cache = getCacheModeFromVar(tex);
2551 texi->setType(ty);
2552 texi->subOp = getSubOp(op);
2553
2554 if (indirect)
2555 texi->setIndirectR(indirect);
2556
2557 break;
2558 }
2559 case nir_intrinsic_store_shared: {
2560 DataType sType = getSType(insn->src[0], false, false);
2561 Value *indirectOffset;
2562 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2563
2564 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2565 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2566 continue;
2567 Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, sType, offset + i * typeSizeof(sType));
2568 mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i));
2569 }
2570 break;
2571 }
2572 case nir_intrinsic_load_shared: {
2573 const DataType dType = getDType(insn);
2574 LValues &newDefs = convert(&insn->dest);
2575 Value *indirectOffset;
2576 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2577
2578 for (uint8_t i = 0u; i < insn->num_components; ++i)
2579 loadFrom(FILE_MEMORY_SHARED, 0, dType, newDefs[i], offset, i, indirectOffset);
2580
2581 break;
2582 }
2583 case nir_intrinsic_barrier: {
2584 // TODO: add flag to shader_info
2585 info->numBarriers = 1;
2586 Instruction *bar = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
2587 bar->fixed = 1;
2588 bar->subOp = NV50_IR_SUBOP_BAR_SYNC;
2589 break;
2590 }
2591 case nir_intrinsic_group_memory_barrier:
2592 case nir_intrinsic_memory_barrier:
2593 case nir_intrinsic_memory_barrier_atomic_counter:
2594 case nir_intrinsic_memory_barrier_buffer:
2595 case nir_intrinsic_memory_barrier_image:
2596 case nir_intrinsic_memory_barrier_shared: {
2597 Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
2598 bar->fixed = 1;
2599 bar->subOp = getSubOp(op);
2600 break;
2601 }
2602 case nir_intrinsic_shader_clock: {
2603 const DataType dType = getDType(insn);
2604 LValues &newDefs = convert(&insn->dest);
2605
2606 loadImm(newDefs[0], 0u);
2607 mkOp1(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
2608 break;
2609 }
2610 default:
2611 ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
2612 return false;
2613 }
2614
2615 return true;
2616 }
2617
2618 bool
2619 Converter::visit(nir_jump_instr *insn)
2620 {
2621 switch (insn->type) {
2622 case nir_jump_return:
2623 // TODO: this only works in the main function
2624 mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
2625 bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
2626 break;
2627 case nir_jump_break:
2628 case nir_jump_continue: {
2629 bool isBreak = insn->type == nir_jump_break;
2630 nir_block *block = insn->instr.block;
2631 assert(!block->successors[1]);
2632 BasicBlock *target = convert(block->successors[0]);
2633 mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
2634 bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
2635 break;
2636 }
2637 default:
2638 ERROR("unknown nir_jump_type %u\n", insn->type);
2639 return false;
2640 }
2641
2642 return true;
2643 }
2644
2645 Value*
2646 Converter::convert(nir_load_const_instr *insn, uint8_t idx)
2647 {
2648 Value *val;
2649
2650 if (immInsertPos)
2651 setPosition(immInsertPos, true);
2652 else
2653 setPosition(bb, false);
2654
2655 switch (insn->def.bit_size) {
2656 case 64:
2657 val = loadImm(getSSA(8), insn->value[idx].u64);
2658 break;
2659 case 32:
2660 val = loadImm(getSSA(4), insn->value[idx].u32);
2661 break;
2662 case 16:
2663 val = loadImm(getSSA(2), insn->value[idx].u16);
2664 break;
2665 case 8:
2666 val = loadImm(getSSA(1), insn->value[idx].u8);
2667 break;
2668 default:
2669 unreachable("unhandled bit size!\n");
2670 }
2671 setPosition(bb, true);
2672 return val;
2673 }
2674
2675 bool
2676 Converter::visit(nir_load_const_instr *insn)
2677 {
2678 assert(insn->def.bit_size <= 64);
2679 immediates[insn->def.index] = insn;
2680 return true;
2681 }
2682
2683 #define DEFAULT_CHECKS \
2684 if (insn->dest.dest.ssa.num_components > 1) { \
2685 ERROR("nir_alu_instr only supported with 1 component!\n"); \
2686 return false; \
2687 } \
2688 if (insn->dest.write_mask != 1) { \
2689 ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2690 return false; \
2691 }
2692 bool
2693 Converter::visit(nir_alu_instr *insn)
2694 {
2695 const nir_op op = insn->op;
2696 const nir_op_info &info = nir_op_infos[op];
2697 DataType dType = getDType(insn);
2698 const std::vector<DataType> sTypes = getSTypes(insn);
2699
2700 Instruction *oldPos = this->bb->getExit();
2701
2702 switch (op) {
2703 case nir_op_fabs:
2704 case nir_op_iabs:
2705 case nir_op_fadd:
2706 case nir_op_iadd:
2707 case nir_op_iand:
2708 case nir_op_fceil:
2709 case nir_op_fcos:
2710 case nir_op_fddx:
2711 case nir_op_fddx_coarse:
2712 case nir_op_fddx_fine:
2713 case nir_op_fddy:
2714 case nir_op_fddy_coarse:
2715 case nir_op_fddy_fine:
2716 case nir_op_fdiv:
2717 case nir_op_idiv:
2718 case nir_op_udiv:
2719 case nir_op_fexp2:
2720 case nir_op_ffloor:
2721 case nir_op_ffma:
2722 case nir_op_flog2:
2723 case nir_op_fmax:
2724 case nir_op_imax:
2725 case nir_op_umax:
2726 case nir_op_fmin:
2727 case nir_op_imin:
2728 case nir_op_umin:
2729 case nir_op_fmod:
2730 case nir_op_imod:
2731 case nir_op_umod:
2732 case nir_op_fmul:
2733 case nir_op_imul:
2734 case nir_op_imul_high:
2735 case nir_op_umul_high:
2736 case nir_op_fneg:
2737 case nir_op_ineg:
2738 case nir_op_inot:
2739 case nir_op_ior:
2740 case nir_op_pack_64_2x32_split:
2741 case nir_op_fpow:
2742 case nir_op_frcp:
2743 case nir_op_frem:
2744 case nir_op_irem:
2745 case nir_op_frsq:
2746 case nir_op_fsat:
2747 case nir_op_ishr:
2748 case nir_op_ushr:
2749 case nir_op_fsin:
2750 case nir_op_fsqrt:
2751 case nir_op_fsub:
2752 case nir_op_isub:
2753 case nir_op_ftrunc:
2754 case nir_op_ishl:
2755 case nir_op_ixor: {
2756 DEFAULT_CHECKS;
2757 LValues &newDefs = convert(&insn->dest);
2758 operation preOp = preOperationNeeded(op);
2759 if (preOp != OP_NOP) {
2760 assert(info.num_inputs < 2);
2761 Value *tmp = getSSA(typeSizeof(dType));
2762 Instruction *i0 = mkOp(preOp, dType, tmp);
2763 Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2764 if (info.num_inputs) {
2765 i0->setSrc(0, getSrc(&insn->src[0]));
2766 i1->setSrc(0, tmp);
2767 }
2768 i1->subOp = getSubOp(op);
2769 } else {
2770 Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2771 for (unsigned s = 0u; s < info.num_inputs; ++s) {
2772 i->setSrc(s, getSrc(&insn->src[s]));
2773 }
2774 i->subOp = getSubOp(op);
2775 }
2776 break;
2777 }
2778 case nir_op_ifind_msb:
2779 case nir_op_ufind_msb: {
2780 DEFAULT_CHECKS;
2781 LValues &newDefs = convert(&insn->dest);
2782 dType = sTypes[0];
2783 mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2784 break;
2785 }
2786 case nir_op_fround_even: {
2787 DEFAULT_CHECKS;
2788 LValues &newDefs = convert(&insn->dest);
2789 mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2790 break;
2791 }
2792 // convert instructions
2793 case nir_op_f2f32:
2794 case nir_op_f2i32:
2795 case nir_op_f2u32:
2796 case nir_op_i2f32:
2797 case nir_op_i2i32:
2798 case nir_op_u2f32:
2799 case nir_op_u2u32:
2800 case nir_op_f2f64:
2801 case nir_op_f2i64:
2802 case nir_op_f2u64:
2803 case nir_op_i2f64:
2804 case nir_op_i2i64:
2805 case nir_op_u2f64:
2806 case nir_op_u2u64: {
2807 DEFAULT_CHECKS;
2808 LValues &newDefs = convert(&insn->dest);
2809 Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2810 if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
2811 i->rnd = ROUND_Z;
2812 i->sType = sTypes[0];
2813 break;
2814 }
2815 // compare instructions
2816 case nir_op_feq32:
2817 case nir_op_ieq32:
2818 case nir_op_fge32:
2819 case nir_op_ige32:
2820 case nir_op_uge32:
2821 case nir_op_flt32:
2822 case nir_op_ilt32:
2823 case nir_op_ult32:
2824 case nir_op_fne32:
2825 case nir_op_ine32: {
2826 DEFAULT_CHECKS;
2827 LValues &newDefs = convert(&insn->dest);
2828 Instruction *i = mkCmp(getOperation(op),
2829 getCondCode(op),
2830 dType,
2831 newDefs[0],
2832 dType,
2833 getSrc(&insn->src[0]),
2834 getSrc(&insn->src[1]));
2835 if (info.num_inputs == 3)
2836 i->setSrc(2, getSrc(&insn->src[2]));
2837 i->sType = sTypes[0];
2838 break;
2839 }
2840 // those are weird ALU ops and need special handling, because
2841 // 1. they are always componend based
2842 // 2. they basically just merge multiple values into one data type
2843 case nir_op_mov:
2844 if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) {
2845 nir_reg_dest& reg = insn->dest.dest.reg;
2846 uint32_t goffset = regToLmemOffset[reg.reg->index];
2847 uint8_t comps = reg.reg->num_components;
2848 uint8_t size = reg.reg->bit_size / 8;
2849 uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2850 uint32_t aoffset = csize * reg.base_offset;
2851 Value *indirect = NULL;
2852
2853 if (reg.indirect)
2854 indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS),
2855 getSrc(reg.indirect, 0), mkImm(csize));
2856
2857 for (uint8_t i = 0u; i < comps; ++i) {
2858 if (!((1u << i) & insn->dest.write_mask))
2859 continue;
2860
2861 Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + aoffset + i * size);
2862 mkStore(OP_STORE, dType, sym, indirect, getSrc(&insn->src[0], i));
2863 }
2864 break;
2865 } else if (!insn->src[0].src.is_ssa && insn->src[0].src.reg.reg->num_array_elems) {
2866 LValues &newDefs = convert(&insn->dest);
2867 nir_reg_src& reg = insn->src[0].src.reg;
2868 uint32_t goffset = regToLmemOffset[reg.reg->index];
2869 // uint8_t comps = reg.reg->num_components;
2870 uint8_t size = reg.reg->bit_size / 8;
2871 uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2872 uint32_t aoffset = csize * reg.base_offset;
2873 Value *indirect = NULL;
2874
2875 if (reg.indirect)
2876 indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), getSrc(reg.indirect, 0), mkImm(csize));
2877
2878 for (uint8_t i = 0u; i < newDefs.size(); ++i)
2879 loadFrom(FILE_MEMORY_LOCAL, 0, dType, newDefs[i], goffset + aoffset, i, indirect);
2880
2881 break;
2882 } else {
2883 LValues &newDefs = convert(&insn->dest);
2884 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2885 mkMov(newDefs[c], getSrc(&insn->src[0], c), dType);
2886 }
2887 }
2888 break;
2889 case nir_op_vec2:
2890 case nir_op_vec3:
2891 case nir_op_vec4: {
2892 LValues &newDefs = convert(&insn->dest);
2893 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2894 mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2895 }
2896 break;
2897 }
2898 // (un)pack
2899 case nir_op_pack_64_2x32: {
2900 LValues &newDefs = convert(&insn->dest);
2901 Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2902 merge->setSrc(0, getSrc(&insn->src[0], 0));
2903 merge->setSrc(1, getSrc(&insn->src[0], 1));
2904 break;
2905 }
2906 case nir_op_pack_half_2x16_split: {
2907 LValues &newDefs = convert(&insn->dest);
2908 Value *tmpH = getSSA();
2909 Value *tmpL = getSSA();
2910
2911 mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2912 mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2913 mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2914 break;
2915 }
2916 case nir_op_unpack_half_2x16_split_x:
2917 case nir_op_unpack_half_2x16_split_y: {
2918 LValues &newDefs = convert(&insn->dest);
2919 Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2920 if (op == nir_op_unpack_half_2x16_split_y)
2921 cvt->subOp = 1;
2922 break;
2923 }
2924 case nir_op_unpack_64_2x32: {
2925 LValues &newDefs = convert(&insn->dest);
2926 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2927 break;
2928 }
2929 case nir_op_unpack_64_2x32_split_x: {
2930 LValues &newDefs = convert(&insn->dest);
2931 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
2932 break;
2933 }
2934 case nir_op_unpack_64_2x32_split_y: {
2935 LValues &newDefs = convert(&insn->dest);
2936 mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
2937 break;
2938 }
2939 // special instructions
2940 case nir_op_fsign:
2941 case nir_op_isign: {
2942 DEFAULT_CHECKS;
2943 DataType iType;
2944 if (::isFloatType(dType))
2945 iType = TYPE_F32;
2946 else
2947 iType = TYPE_S32;
2948
2949 LValues &newDefs = convert(&insn->dest);
2950 LValue *val0 = getScratch();
2951 LValue *val1 = getScratch();
2952 mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
2953 mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
2954
2955 if (dType == TYPE_F64) {
2956 mkOp2(OP_SUB, iType, val0, val0, val1);
2957 mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
2958 } else if (dType == TYPE_S64 || dType == TYPE_U64) {
2959 mkOp2(OP_SUB, iType, val0, val1, val0);
2960 mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
2961 mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
2962 } else if (::isFloatType(dType))
2963 mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
2964 else
2965 mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
2966 break;
2967 }
2968 case nir_op_fcsel:
2969 case nir_op_b32csel: {
2970 DEFAULT_CHECKS;
2971 LValues &newDefs = convert(&insn->dest);
2972 mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
2973 break;
2974 }
2975 case nir_op_ibitfield_extract:
2976 case nir_op_ubitfield_extract: {
2977 DEFAULT_CHECKS;
2978 Value *tmp = getSSA();
2979 LValues &newDefs = convert(&insn->dest);
2980 mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2981 mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
2982 break;
2983 }
2984 case nir_op_bfm: {
2985 DEFAULT_CHECKS;
2986 LValues &newDefs = convert(&insn->dest);
2987 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2988 break;
2989 }
2990 case nir_op_bitfield_insert: {
2991 DEFAULT_CHECKS;
2992 LValues &newDefs = convert(&insn->dest);
2993 LValue *temp = getSSA();
2994 mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
2995 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
2996 break;
2997 }
2998 case nir_op_bit_count: {
2999 DEFAULT_CHECKS;
3000 LValues &newDefs = convert(&insn->dest);
3001 mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
3002 break;
3003 }
3004 case nir_op_bitfield_reverse: {
3005 DEFAULT_CHECKS;
3006 LValues &newDefs = convert(&insn->dest);
3007 mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
3008 break;
3009 }
3010 case nir_op_find_lsb: {
3011 DEFAULT_CHECKS;
3012 LValues &newDefs = convert(&insn->dest);
3013 Value *tmp = getSSA();
3014 mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
3015 mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
3016 break;
3017 }
3018 // boolean conversions
3019 case nir_op_b2f32: {
3020 DEFAULT_CHECKS;
3021 LValues &newDefs = convert(&insn->dest);
3022 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
3023 break;
3024 }
3025 case nir_op_b2f64: {
3026 DEFAULT_CHECKS;
3027 LValues &newDefs = convert(&insn->dest);
3028 Value *tmp = getSSA(4);
3029 mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
3030 mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
3031 break;
3032 }
3033 case nir_op_f2b32:
3034 case nir_op_i2b32: {
3035 DEFAULT_CHECKS;
3036 LValues &newDefs = convert(&insn->dest);
3037 Value *src1;
3038 if (typeSizeof(sTypes[0]) == 8) {
3039 src1 = loadImm(getSSA(8), 0.0);
3040 } else {
3041 src1 = zero;
3042 }
3043 CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
3044 mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
3045 break;
3046 }
3047 case nir_op_b2i32: {
3048 DEFAULT_CHECKS;
3049 LValues &newDefs = convert(&insn->dest);
3050 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
3051 break;
3052 }
3053 case nir_op_b2i64: {
3054 DEFAULT_CHECKS;
3055 LValues &newDefs = convert(&insn->dest);
3056 LValue *def = getScratch();
3057 mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
3058 mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
3059 break;
3060 }
3061 default:
3062 ERROR("unknown nir_op %s\n", info.name);
3063 return false;
3064 }
3065
3066 if (!oldPos) {
3067 oldPos = this->bb->getEntry();
3068 oldPos->precise = insn->exact;
3069 }
3070
3071 if (unlikely(!oldPos))
3072 return true;
3073
3074 while (oldPos->next) {
3075 oldPos = oldPos->next;
3076 oldPos->precise = insn->exact;
3077 }
3078 oldPos->saturate = insn->dest.saturate;
3079
3080 return true;
3081 }
3082 #undef DEFAULT_CHECKS
3083
3084 bool
3085 Converter::visit(nir_ssa_undef_instr *insn)
3086 {
3087 LValues &newDefs = convert(&insn->def);
3088 for (uint8_t i = 0u; i < insn->def.num_components; ++i) {
3089 mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
3090 }
3091 return true;
3092 }
3093
3094 #define CASE_SAMPLER(ty) \
3095 case GLSL_SAMPLER_DIM_ ## ty : \
3096 if (isArray && !isShadow) \
3097 return TEX_TARGET_ ## ty ## _ARRAY; \
3098 else if (!isArray && isShadow) \
3099 return TEX_TARGET_## ty ## _SHADOW; \
3100 else if (isArray && isShadow) \
3101 return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
3102 else \
3103 return TEX_TARGET_ ## ty
3104
3105 TexTarget
3106 Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
3107 {
3108 switch (dim) {
3109 CASE_SAMPLER(1D);
3110 CASE_SAMPLER(2D);
3111 CASE_SAMPLER(CUBE);
3112 case GLSL_SAMPLER_DIM_3D:
3113 return TEX_TARGET_3D;
3114 case GLSL_SAMPLER_DIM_MS:
3115 if (isArray)
3116 return TEX_TARGET_2D_MS_ARRAY;
3117 return TEX_TARGET_2D_MS;
3118 case GLSL_SAMPLER_DIM_RECT:
3119 if (isShadow)
3120 return TEX_TARGET_RECT_SHADOW;
3121 return TEX_TARGET_RECT;
3122 case GLSL_SAMPLER_DIM_BUF:
3123 return TEX_TARGET_BUFFER;
3124 case GLSL_SAMPLER_DIM_EXTERNAL:
3125 return TEX_TARGET_2D;
3126 default:
3127 ERROR("unknown glsl_sampler_dim %u\n", dim);
3128 assert(false);
3129 return TEX_TARGET_COUNT;
3130 }
3131 }
3132 #undef CASE_SAMPLER
3133
3134 Value*
3135 Converter::applyProjection(Value *src, Value *proj)
3136 {
3137 if (!proj)
3138 return src;
3139 return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj);
3140 }
3141
3142 unsigned int
3143 Converter::getNIRArgCount(TexInstruction::Target& target)
3144 {
3145 unsigned int result = target.getArgCount();
3146 if (target.isCube() && target.isArray())
3147 result--;
3148 if (target.isMS())
3149 result--;
3150 return result;
3151 }
3152
3153 uint16_t
3154 Converter::handleDeref(nir_deref_instr *deref, Value * &indirect, const nir_variable * &tex)
3155 {
3156 typedef std::pair<uint32_t,Value*> DerefPair;
3157 std::list<DerefPair> derefs;
3158
3159 uint16_t result = 0;
3160 while (deref->deref_type != nir_deref_type_var) {
3161 switch (deref->deref_type) {
3162 case nir_deref_type_array: {
3163 Value *indirect;
3164 uint8_t size = type_size(deref->type, true);
3165 result += size * getIndirect(&deref->arr.index, 0, indirect);
3166
3167 if (indirect) {
3168 derefs.push_front(std::make_pair(size, indirect));
3169 }
3170
3171 break;
3172 }
3173 case nir_deref_type_struct: {
3174 result += nir_deref_instr_parent(deref)->type->struct_location_offset(deref->strct.index);
3175 break;
3176 }
3177 case nir_deref_type_var:
3178 default:
3179 unreachable("nir_deref_type_var reached in handleDeref!");
3180 break;
3181 }
3182 deref = nir_deref_instr_parent(deref);
3183 }
3184
3185 indirect = NULL;
3186 for (std::list<DerefPair>::const_iterator it = derefs.begin(); it != derefs.end(); ++it) {
3187 Value *offset = mkOp2v(OP_MUL, TYPE_U32, getSSA(), loadImm(getSSA(), it->first), it->second);
3188 if (indirect)
3189 indirect = mkOp2v(OP_ADD, TYPE_U32, getSSA(), indirect, offset);
3190 else
3191 indirect = offset;
3192 }
3193
3194 tex = nir_deref_instr_get_variable(deref);
3195 assert(tex);
3196
3197 return result + tex->data.driver_location;
3198 }
3199
3200 CacheMode
3201 Converter::convert(enum gl_access_qualifier access)
3202 {
3203 switch (access) {
3204 case ACCESS_VOLATILE:
3205 return CACHE_CV;
3206 case ACCESS_COHERENT:
3207 return CACHE_CG;
3208 default:
3209 return CACHE_CA;
3210 }
3211 }
3212
3213 CacheMode
3214 Converter::getCacheModeFromVar(const nir_variable *var)
3215 {
3216 return convert(var->data.image.access);
3217 }
3218
3219 bool
3220 Converter::visit(nir_tex_instr *insn)
3221 {
3222 switch (insn->op) {
3223 case nir_texop_lod:
3224 case nir_texop_query_levels:
3225 case nir_texop_tex:
3226 case nir_texop_texture_samples:
3227 case nir_texop_tg4:
3228 case nir_texop_txb:
3229 case nir_texop_txd:
3230 case nir_texop_txf:
3231 case nir_texop_txf_ms:
3232 case nir_texop_txl:
3233 case nir_texop_txs: {
3234 LValues &newDefs = convert(&insn->dest);
3235 std::vector<Value*> srcs;
3236 std::vector<Value*> defs;
3237 std::vector<nir_src*> offsets;
3238 uint8_t mask = 0;
3239 bool lz = false;
3240 Value *proj = NULL;
3241 TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow);
3242 operation op = getOperation(insn->op);
3243
3244 int r, s;
3245 int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias);
3246 int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator);
3247 int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord);
3248 int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx);
3249 int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy);
3250 int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index);
3251 int lodIdx = nir_tex_instr_src_index(insn, nir_tex_src_lod);
3252 int offsetIdx = nir_tex_instr_src_index(insn, nir_tex_src_offset);
3253 int projIdx = nir_tex_instr_src_index(insn, nir_tex_src_projector);
3254 int sampOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_offset);
3255 int texOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_offset);
3256 int sampHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_handle);
3257 int texHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_handle);
3258
3259 bool bindless = sampHandleIdx != -1 || texHandleIdx != -1;
3260 assert((sampHandleIdx != -1) == (texHandleIdx != -1));
3261
3262 if (projIdx != -1)
3263 proj = mkOp1v(OP_RCP, TYPE_F32, getScratch(), getSrc(&insn->src[projIdx].src, 0));
3264
3265 srcs.resize(insn->coord_components);
3266 for (uint8_t i = 0u; i < insn->coord_components; ++i)
3267 srcs[i] = applyProjection(getSrc(&insn->src[coordsIdx].src, i), proj);
3268
3269 // sometimes we get less args than target.getArgCount, but codegen expects the latter
3270 if (insn->coord_components) {
3271 uint32_t argCount = target.getArgCount();
3272
3273 if (target.isMS())
3274 argCount -= 1;
3275
3276 for (uint32_t i = 0u; i < (argCount - insn->coord_components); ++i)
3277 srcs.push_back(getSSA());
3278 }
3279
3280 if (insn->op == nir_texop_texture_samples)
3281 srcs.push_back(zero);
3282 else if (!insn->num_srcs)
3283 srcs.push_back(loadImm(NULL, 0));
3284 if (biasIdx != -1)
3285 srcs.push_back(getSrc(&insn->src[biasIdx].src, 0));
3286 if (lodIdx != -1)
3287 srcs.push_back(getSrc(&insn->src[lodIdx].src, 0));
3288 else if (op == OP_TXF)
3289 lz = true;
3290 if (msIdx != -1)
3291 srcs.push_back(getSrc(&insn->src[msIdx].src, 0));
3292 if (offsetIdx != -1)
3293 offsets.push_back(&insn->src[offsetIdx].src);
3294 if (compIdx != -1)
3295 srcs.push_back(applyProjection(getSrc(&insn->src[compIdx].src, 0), proj));
3296 if (texOffIdx != -1) {
3297 srcs.push_back(getSrc(&insn->src[texOffIdx].src, 0));
3298 texOffIdx = srcs.size() - 1;
3299 }
3300 if (sampOffIdx != -1) {
3301 srcs.push_back(getSrc(&insn->src[sampOffIdx].src, 0));
3302 sampOffIdx = srcs.size() - 1;
3303 }
3304 if (bindless) {
3305 // currently we use the lower bits
3306 Value *split[2];
3307 Value *handle = getSrc(&insn->src[sampHandleIdx].src, 0);
3308
3309 mkSplit(split, 4, handle);
3310
3311 srcs.push_back(split[0]);
3312 texOffIdx = srcs.size() - 1;
3313 }
3314
3315 r = bindless ? 0xff : insn->texture_index;
3316 s = bindless ? 0x1f : insn->sampler_index;
3317
3318 defs.resize(newDefs.size());
3319 for (uint8_t d = 0u; d < newDefs.size(); ++d) {
3320 defs[d] = newDefs[d];
3321 mask |= 1 << d;
3322 }
3323 if (target.isMS() || (op == OP_TEX && prog->getType() != Program::TYPE_FRAGMENT))
3324 lz = true;
3325
3326 TexInstruction *texi = mkTex(op, target.getEnum(), r, s, defs, srcs);
3327 texi->tex.levelZero = lz;
3328 texi->tex.mask = mask;
3329 texi->tex.bindless = bindless;
3330
3331 if (texOffIdx != -1)
3332 texi->tex.rIndirectSrc = texOffIdx;
3333 if (sampOffIdx != -1)
3334 texi->tex.sIndirectSrc = sampOffIdx;
3335
3336 switch (insn->op) {
3337 case nir_texop_tg4:
3338 if (!target.isShadow())
3339 texi->tex.gatherComp = insn->component;
3340 break;
3341 case nir_texop_txs:
3342 texi->tex.query = TXQ_DIMS;
3343 break;
3344 case nir_texop_texture_samples:
3345 texi->tex.mask = 0x4;
3346 texi->tex.query = TXQ_TYPE;
3347 break;
3348 case nir_texop_query_levels:
3349 texi->tex.mask = 0x8;
3350 texi->tex.query = TXQ_DIMS;
3351 break;
3352 default:
3353 break;
3354 }
3355
3356 texi->tex.useOffsets = offsets.size();
3357 if (texi->tex.useOffsets) {
3358 for (uint8_t s = 0; s < texi->tex.useOffsets; ++s) {
3359 for (uint32_t c = 0u; c < 3; ++c) {
3360 uint8_t s2 = std::min(c, target.getDim() - 1);
3361 texi->offset[s][c].set(getSrc(offsets[s], s2));
3362 texi->offset[s][c].setInsn(texi);
3363 }
3364 }
3365 }
3366
3367 if (op == OP_TXG && offsetIdx == -1) {
3368 if (nir_tex_instr_has_explicit_tg4_offsets(insn)) {
3369 texi->tex.useOffsets = 4;
3370 setPosition(texi, false);
3371 for (uint8_t i = 0; i < 4; ++i) {
3372 for (uint8_t j = 0; j < 2; ++j) {
3373 texi->offset[i][j].set(loadImm(NULL, insn->tg4_offsets[i][j]));
3374 texi->offset[i][j].setInsn(texi);
3375 }
3376 }
3377 setPosition(texi, true);
3378 }
3379 }
3380
3381 if (ddxIdx != -1 && ddyIdx != -1) {
3382 for (uint8_t c = 0u; c < target.getDim() + target.isCube(); ++c) {
3383 texi->dPdx[c].set(getSrc(&insn->src[ddxIdx].src, c));
3384 texi->dPdy[c].set(getSrc(&insn->src[ddyIdx].src, c));
3385 }
3386 }
3387
3388 break;
3389 }
3390 default:
3391 ERROR("unknown nir_texop %u\n", insn->op);
3392 return false;
3393 }
3394 return true;
3395 }
3396
3397 bool
3398 Converter::visit(nir_deref_instr *deref)
3399 {
3400 // we just ignore those, because images intrinsics are the only place where
3401 // we should end up with deref sources and those have to backtrack anyway
3402 // to get the nir_variable. This code just exists to handle some special
3403 // cases.
3404 switch (deref->deref_type) {
3405 case nir_deref_type_array:
3406 case nir_deref_type_struct:
3407 case nir_deref_type_var:
3408 break;
3409 default:
3410 ERROR("unknown nir_deref_instr %u\n", deref->deref_type);
3411 return false;
3412 }
3413 return true;
3414 }
3415
3416 bool
3417 Converter::run()
3418 {
3419 bool progress;
3420
3421 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
3422 nir_print_shader(nir, stderr);
3423
3424 struct nir_lower_subgroups_options subgroup_options = {
3425 .subgroup_size = 32,
3426 .ballot_bit_size = 32,
3427 };
3428
3429 NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
3430 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
3431 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
3432 NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
3433 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3434 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL);
3435 NIR_PASS_V(nir, nir_lower_phis_to_scalar);
3436
3437 do {
3438 progress = false;
3439 NIR_PASS(progress, nir, nir_copy_prop);
3440 NIR_PASS(progress, nir, nir_opt_remove_phis);
3441 NIR_PASS(progress, nir, nir_opt_trivial_continues);
3442 NIR_PASS(progress, nir, nir_opt_cse);
3443 NIR_PASS(progress, nir, nir_opt_algebraic);
3444 NIR_PASS(progress, nir, nir_opt_constant_folding);
3445 NIR_PASS(progress, nir, nir_copy_prop);
3446 NIR_PASS(progress, nir, nir_opt_dce);
3447 NIR_PASS(progress, nir, nir_opt_dead_cf);
3448 } while (progress);
3449
3450 NIR_PASS_V(nir, nir_lower_bool_to_int32);
3451 NIR_PASS_V(nir, nir_lower_locals_to_regs);
3452 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
3453 NIR_PASS_V(nir, nir_convert_from_ssa, true);
3454
3455 // Garbage collect dead instructions
3456 nir_sweep(nir);
3457
3458 if (!parseNIR()) {
3459 ERROR("Couldn't prase NIR!\n");
3460 return false;
3461 }
3462
3463 if (!assignSlots()) {
3464 ERROR("Couldn't assign slots!\n");
3465 return false;
3466 }
3467
3468 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
3469 nir_print_shader(nir, stderr);
3470
3471 nir_foreach_function(function, nir) {
3472 if (!visit(function))
3473 return false;
3474 }
3475
3476 return true;
3477 }
3478
3479 } // unnamed namespace
3480
3481 namespace nv50_ir {
3482
3483 bool
3484 Program::makeFromNIR(struct nv50_ir_prog_info *info)
3485 {
3486 nir_shader *nir = (nir_shader*)info->bin.source;
3487 Converter converter(this, nir, info);
3488 bool result = converter.run();
3489 if (!result)
3490 return result;
3491 LoweringHelper lowering;
3492 lowering.run(this);
3493 tlsSize = info->bin.tlsSpace;
3494 return result;
3495 }
3496
3497 } // namespace nv50_ir