nir: remove fnot/fxor/fand/for opcodes
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_from_nir.cpp
1 /*
2 * Copyright 2017 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Karol Herbst <kherbst@redhat.com>
23 */
24
25 #include "compiler/nir/nir.h"
26
27 #include "util/u_debug.h"
28
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_from_common.h"
31 #include "codegen/nv50_ir_lowering_helper.h"
32 #include "codegen/nv50_ir_util.h"
33
34 #if __cplusplus >= 201103L
35 #include <unordered_map>
36 #else
37 #include <tr1/unordered_map>
38 #endif
39 #include <cstring>
40 #include <list>
41 #include <vector>
42
43 namespace {
44
45 #if __cplusplus >= 201103L
46 using std::hash;
47 using std::unordered_map;
48 #else
49 using std::tr1::hash;
50 using std::tr1::unordered_map;
51 #endif
52
53 using namespace nv50_ir;
54
55 int
56 type_size(const struct glsl_type *type, bool bindless)
57 {
58 return glsl_count_attribute_slots(type, false);
59 }
60
61 class Converter : public ConverterCommon
62 {
63 public:
64 Converter(Program *, nir_shader *, nv50_ir_prog_info *);
65
66 bool run();
67 private:
68 typedef std::vector<LValue*> LValues;
69 typedef unordered_map<unsigned, LValues> NirDefMap;
70 typedef unordered_map<unsigned, nir_load_const_instr*> ImmediateMap;
71 typedef unordered_map<unsigned, uint32_t> NirArrayLMemOffsets;
72 typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
73
74 CacheMode convert(enum gl_access_qualifier);
75 TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
76 LValues& convert(nir_alu_dest *);
77 BasicBlock* convert(nir_block *);
78 LValues& convert(nir_dest *);
79 SVSemantic convert(nir_intrinsic_op);
80 Value* convert(nir_load_const_instr*, uint8_t);
81 LValues& convert(nir_register *);
82 LValues& convert(nir_ssa_def *);
83
84 ImgFormat convertGLImgFormat(GLuint);
85
86 Value* getSrc(nir_alu_src *, uint8_t component = 0);
87 Value* getSrc(nir_register *, uint8_t);
88 Value* getSrc(nir_src *, uint8_t, bool indirect = false);
89 Value* getSrc(nir_ssa_def *, uint8_t);
90
91 // returned value is the constant part of the given source (either the
92 // nir_src or the selected source component of an intrinsic). Even though
93 // this is mostly an optimization to be able to skip indirects in a few
94 // cases, sometimes we require immediate values or set some fileds on
95 // instructions (e.g. tex) in order for codegen to consume those.
96 // If the found value has not a constant part, the Value gets returned
97 // through the Value parameter.
98 uint32_t getIndirect(nir_src *, uint8_t, Value *&);
99 uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&);
100
101 uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
102
103 void setInterpolate(nv50_ir_varying *,
104 uint8_t,
105 bool centroid,
106 unsigned semantics);
107
108 Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
109 uint8_t c, Value *indirect0 = NULL,
110 Value *indirect1 = NULL, bool patch = false);
111 void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
112 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
113 Value *indirect1 = NULL);
114
115 bool isFloatType(nir_alu_type);
116 bool isSignedType(nir_alu_type);
117 bool isResultFloat(nir_op);
118 bool isResultSigned(nir_op);
119
120 DataType getDType(nir_alu_instr *);
121 DataType getDType(nir_intrinsic_instr *);
122 DataType getDType(nir_intrinsic_instr *, bool isSigned);
123 DataType getDType(nir_op, uint8_t);
124
125 std::vector<DataType> getSTypes(nir_alu_instr *);
126 DataType getSType(nir_src &, bool isFloat, bool isSigned);
127
128 operation getOperation(nir_intrinsic_op);
129 operation getOperation(nir_op);
130 operation getOperation(nir_texop);
131 operation preOperationNeeded(nir_op);
132
133 int getSubOp(nir_intrinsic_op);
134 int getSubOp(nir_op);
135
136 CondCode getCondCode(nir_op);
137
138 bool assignSlots();
139 bool parseNIR();
140
141 bool visit(nir_alu_instr *);
142 bool visit(nir_block *);
143 bool visit(nir_cf_node *);
144 bool visit(nir_deref_instr *);
145 bool visit(nir_function *);
146 bool visit(nir_if *);
147 bool visit(nir_instr *);
148 bool visit(nir_intrinsic_instr *);
149 bool visit(nir_jump_instr *);
150 bool visit(nir_load_const_instr*);
151 bool visit(nir_loop *);
152 bool visit(nir_ssa_undef_instr *);
153 bool visit(nir_tex_instr *);
154
155 // tex stuff
156 Value* applyProjection(Value *src, Value *proj);
157 unsigned int getNIRArgCount(TexInstruction::Target&);
158
159 // image stuff
160 uint16_t handleDeref(nir_deref_instr *, Value * & indirect, const nir_variable * &);
161 CacheMode getCacheModeFromVar(const nir_variable *);
162
163 nir_shader *nir;
164
165 NirDefMap ssaDefs;
166 NirDefMap regDefs;
167 ImmediateMap immediates;
168 NirArrayLMemOffsets regToLmemOffset;
169 NirBlockMap blocks;
170 unsigned int curLoopDepth;
171
172 BasicBlock *exit;
173 Value *zero;
174 Instruction *immInsertPos;
175
176 int clipVertexOutput;
177
178 union {
179 struct {
180 Value *position;
181 } fp;
182 };
183 };
184
185 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
186 : ConverterCommon(prog, info),
187 nir(nir),
188 curLoopDepth(0),
189 clipVertexOutput(-1)
190 {
191 zero = mkImm((uint32_t)0);
192 }
193
194 BasicBlock *
195 Converter::convert(nir_block *block)
196 {
197 NirBlockMap::iterator it = blocks.find(block->index);
198 if (it != blocks.end())
199 return it->second;
200
201 BasicBlock *bb = new BasicBlock(func);
202 blocks[block->index] = bb;
203 return bb;
204 }
205
206 bool
207 Converter::isFloatType(nir_alu_type type)
208 {
209 return nir_alu_type_get_base_type(type) == nir_type_float;
210 }
211
212 bool
213 Converter::isSignedType(nir_alu_type type)
214 {
215 return nir_alu_type_get_base_type(type) == nir_type_int;
216 }
217
218 bool
219 Converter::isResultFloat(nir_op op)
220 {
221 const nir_op_info &info = nir_op_infos[op];
222 if (info.output_type != nir_type_invalid)
223 return isFloatType(info.output_type);
224
225 ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
226 assert(false);
227 return true;
228 }
229
230 bool
231 Converter::isResultSigned(nir_op op)
232 {
233 switch (op) {
234 // there is no umul and we get wrong results if we treat all muls as signed
235 case nir_op_imul:
236 case nir_op_inot:
237 return false;
238 default:
239 const nir_op_info &info = nir_op_infos[op];
240 if (info.output_type != nir_type_invalid)
241 return isSignedType(info.output_type);
242 ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
243 assert(false);
244 return true;
245 }
246 }
247
248 DataType
249 Converter::getDType(nir_alu_instr *insn)
250 {
251 if (insn->dest.dest.is_ssa)
252 return getDType(insn->op, insn->dest.dest.ssa.bit_size);
253 else
254 return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
255 }
256
257 DataType
258 Converter::getDType(nir_intrinsic_instr *insn)
259 {
260 bool isSigned;
261 switch (insn->intrinsic) {
262 case nir_intrinsic_shared_atomic_imax:
263 case nir_intrinsic_shared_atomic_imin:
264 case nir_intrinsic_ssbo_atomic_imax:
265 case nir_intrinsic_ssbo_atomic_imin:
266 isSigned = true;
267 break;
268 default:
269 isSigned = false;
270 break;
271 }
272
273 return getDType(insn, isSigned);
274 }
275
276 DataType
277 Converter::getDType(nir_intrinsic_instr *insn, bool isSigned)
278 {
279 if (insn->dest.is_ssa)
280 return typeOfSize(insn->dest.ssa.bit_size / 8, false, isSigned);
281 else
282 return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, isSigned);
283 }
284
285 DataType
286 Converter::getDType(nir_op op, uint8_t bitSize)
287 {
288 DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
289 if (ty == TYPE_NONE) {
290 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
291 assert(false);
292 }
293 return ty;
294 }
295
296 std::vector<DataType>
297 Converter::getSTypes(nir_alu_instr *insn)
298 {
299 const nir_op_info &info = nir_op_infos[insn->op];
300 std::vector<DataType> res(info.num_inputs);
301
302 for (uint8_t i = 0; i < info.num_inputs; ++i) {
303 if (info.input_types[i] != nir_type_invalid) {
304 res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
305 } else {
306 ERROR("getSType not implemented for %s idx %u\n", info.name, i);
307 assert(false);
308 res[i] = TYPE_NONE;
309 break;
310 }
311 }
312
313 return res;
314 }
315
316 DataType
317 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
318 {
319 uint8_t bitSize;
320 if (src.is_ssa)
321 bitSize = src.ssa->bit_size;
322 else
323 bitSize = src.reg.reg->bit_size;
324
325 DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
326 if (ty == TYPE_NONE) {
327 const char *str;
328 if (isFloat)
329 str = "float";
330 else if (isSigned)
331 str = "int";
332 else
333 str = "uint";
334 ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
335 assert(false);
336 }
337 return ty;
338 }
339
340 operation
341 Converter::getOperation(nir_op op)
342 {
343 switch (op) {
344 // basic ops with float and int variants
345 case nir_op_fabs:
346 case nir_op_iabs:
347 return OP_ABS;
348 case nir_op_fadd:
349 case nir_op_iadd:
350 return OP_ADD;
351 case nir_op_iand:
352 return OP_AND;
353 case nir_op_ifind_msb:
354 case nir_op_ufind_msb:
355 return OP_BFIND;
356 case nir_op_fceil:
357 return OP_CEIL;
358 case nir_op_fcos:
359 return OP_COS;
360 case nir_op_f2f32:
361 case nir_op_f2f64:
362 case nir_op_f2i32:
363 case nir_op_f2i64:
364 case nir_op_f2u32:
365 case nir_op_f2u64:
366 case nir_op_i2f32:
367 case nir_op_i2f64:
368 case nir_op_i2i32:
369 case nir_op_i2i64:
370 case nir_op_u2f32:
371 case nir_op_u2f64:
372 case nir_op_u2u32:
373 case nir_op_u2u64:
374 return OP_CVT;
375 case nir_op_fddx:
376 case nir_op_fddx_coarse:
377 case nir_op_fddx_fine:
378 return OP_DFDX;
379 case nir_op_fddy:
380 case nir_op_fddy_coarse:
381 case nir_op_fddy_fine:
382 return OP_DFDY;
383 case nir_op_fdiv:
384 case nir_op_idiv:
385 case nir_op_udiv:
386 return OP_DIV;
387 case nir_op_fexp2:
388 return OP_EX2;
389 case nir_op_ffloor:
390 return OP_FLOOR;
391 case nir_op_ffma:
392 return OP_FMA;
393 case nir_op_flog2:
394 return OP_LG2;
395 case nir_op_fmax:
396 case nir_op_imax:
397 case nir_op_umax:
398 return OP_MAX;
399 case nir_op_pack_64_2x32_split:
400 return OP_MERGE;
401 case nir_op_fmin:
402 case nir_op_imin:
403 case nir_op_umin:
404 return OP_MIN;
405 case nir_op_fmod:
406 case nir_op_imod:
407 case nir_op_umod:
408 case nir_op_frem:
409 case nir_op_irem:
410 return OP_MOD;
411 case nir_op_fmul:
412 case nir_op_imul:
413 case nir_op_imul_high:
414 case nir_op_umul_high:
415 return OP_MUL;
416 case nir_op_fneg:
417 case nir_op_ineg:
418 return OP_NEG;
419 case nir_op_inot:
420 return OP_NOT;
421 case nir_op_ior:
422 return OP_OR;
423 case nir_op_fpow:
424 return OP_POW;
425 case nir_op_frcp:
426 return OP_RCP;
427 case nir_op_frsq:
428 return OP_RSQ;
429 case nir_op_fsat:
430 return OP_SAT;
431 case nir_op_feq32:
432 case nir_op_ieq32:
433 case nir_op_fge32:
434 case nir_op_ige32:
435 case nir_op_uge32:
436 case nir_op_flt32:
437 case nir_op_ilt32:
438 case nir_op_ult32:
439 case nir_op_fne32:
440 case nir_op_ine32:
441 return OP_SET;
442 case nir_op_ishl:
443 return OP_SHL;
444 case nir_op_ishr:
445 case nir_op_ushr:
446 return OP_SHR;
447 case nir_op_fsin:
448 return OP_SIN;
449 case nir_op_fsqrt:
450 return OP_SQRT;
451 case nir_op_fsub:
452 case nir_op_isub:
453 return OP_SUB;
454 case nir_op_ftrunc:
455 return OP_TRUNC;
456 case nir_op_ixor:
457 return OP_XOR;
458 default:
459 ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
460 assert(false);
461 return OP_NOP;
462 }
463 }
464
465 operation
466 Converter::getOperation(nir_texop op)
467 {
468 switch (op) {
469 case nir_texop_tex:
470 return OP_TEX;
471 case nir_texop_lod:
472 return OP_TXLQ;
473 case nir_texop_txb:
474 return OP_TXB;
475 case nir_texop_txd:
476 return OP_TXD;
477 case nir_texop_txf:
478 case nir_texop_txf_ms:
479 return OP_TXF;
480 case nir_texop_tg4:
481 return OP_TXG;
482 case nir_texop_txl:
483 return OP_TXL;
484 case nir_texop_query_levels:
485 case nir_texop_texture_samples:
486 case nir_texop_txs:
487 return OP_TXQ;
488 default:
489 ERROR("couldn't get operation for nir_texop %u\n", op);
490 assert(false);
491 return OP_NOP;
492 }
493 }
494
495 operation
496 Converter::getOperation(nir_intrinsic_op op)
497 {
498 switch (op) {
499 case nir_intrinsic_emit_vertex:
500 return OP_EMIT;
501 case nir_intrinsic_end_primitive:
502 return OP_RESTART;
503 case nir_intrinsic_bindless_image_atomic_add:
504 case nir_intrinsic_image_atomic_add:
505 case nir_intrinsic_image_deref_atomic_add:
506 case nir_intrinsic_bindless_image_atomic_and:
507 case nir_intrinsic_image_atomic_and:
508 case nir_intrinsic_image_deref_atomic_and:
509 case nir_intrinsic_bindless_image_atomic_comp_swap:
510 case nir_intrinsic_image_atomic_comp_swap:
511 case nir_intrinsic_image_deref_atomic_comp_swap:
512 case nir_intrinsic_bindless_image_atomic_exchange:
513 case nir_intrinsic_image_atomic_exchange:
514 case nir_intrinsic_image_deref_atomic_exchange:
515 case nir_intrinsic_bindless_image_atomic_max:
516 case nir_intrinsic_image_atomic_max:
517 case nir_intrinsic_image_deref_atomic_max:
518 case nir_intrinsic_bindless_image_atomic_min:
519 case nir_intrinsic_image_atomic_min:
520 case nir_intrinsic_image_deref_atomic_min:
521 case nir_intrinsic_bindless_image_atomic_or:
522 case nir_intrinsic_image_atomic_or:
523 case nir_intrinsic_image_deref_atomic_or:
524 case nir_intrinsic_bindless_image_atomic_xor:
525 case nir_intrinsic_image_atomic_xor:
526 case nir_intrinsic_image_deref_atomic_xor:
527 return OP_SUREDP;
528 case nir_intrinsic_bindless_image_load:
529 case nir_intrinsic_image_load:
530 case nir_intrinsic_image_deref_load:
531 return OP_SULDP;
532 case nir_intrinsic_bindless_image_samples:
533 case nir_intrinsic_image_samples:
534 case nir_intrinsic_image_deref_samples:
535 case nir_intrinsic_bindless_image_size:
536 case nir_intrinsic_image_size:
537 case nir_intrinsic_image_deref_size:
538 return OP_SUQ;
539 case nir_intrinsic_bindless_image_store:
540 case nir_intrinsic_image_store:
541 case nir_intrinsic_image_deref_store:
542 return OP_SUSTP;
543 default:
544 ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
545 assert(false);
546 return OP_NOP;
547 }
548 }
549
550 operation
551 Converter::preOperationNeeded(nir_op op)
552 {
553 switch (op) {
554 case nir_op_fcos:
555 case nir_op_fsin:
556 return OP_PRESIN;
557 default:
558 return OP_NOP;
559 }
560 }
561
562 int
563 Converter::getSubOp(nir_op op)
564 {
565 switch (op) {
566 case nir_op_imul_high:
567 case nir_op_umul_high:
568 return NV50_IR_SUBOP_MUL_HIGH;
569 default:
570 return 0;
571 }
572 }
573
574 int
575 Converter::getSubOp(nir_intrinsic_op op)
576 {
577 switch (op) {
578 case nir_intrinsic_bindless_image_atomic_add:
579 case nir_intrinsic_image_atomic_add:
580 case nir_intrinsic_image_deref_atomic_add:
581 case nir_intrinsic_shared_atomic_add:
582 case nir_intrinsic_ssbo_atomic_add:
583 return NV50_IR_SUBOP_ATOM_ADD;
584 case nir_intrinsic_bindless_image_atomic_and:
585 case nir_intrinsic_image_atomic_and:
586 case nir_intrinsic_image_deref_atomic_and:
587 case nir_intrinsic_shared_atomic_and:
588 case nir_intrinsic_ssbo_atomic_and:
589 return NV50_IR_SUBOP_ATOM_AND;
590 case nir_intrinsic_bindless_image_atomic_comp_swap:
591 case nir_intrinsic_image_atomic_comp_swap:
592 case nir_intrinsic_image_deref_atomic_comp_swap:
593 case nir_intrinsic_shared_atomic_comp_swap:
594 case nir_intrinsic_ssbo_atomic_comp_swap:
595 return NV50_IR_SUBOP_ATOM_CAS;
596 case nir_intrinsic_bindless_image_atomic_exchange:
597 case nir_intrinsic_image_atomic_exchange:
598 case nir_intrinsic_image_deref_atomic_exchange:
599 case nir_intrinsic_shared_atomic_exchange:
600 case nir_intrinsic_ssbo_atomic_exchange:
601 return NV50_IR_SUBOP_ATOM_EXCH;
602 case nir_intrinsic_bindless_image_atomic_or:
603 case nir_intrinsic_image_atomic_or:
604 case nir_intrinsic_image_deref_atomic_or:
605 case nir_intrinsic_shared_atomic_or:
606 case nir_intrinsic_ssbo_atomic_or:
607 return NV50_IR_SUBOP_ATOM_OR;
608 case nir_intrinsic_bindless_image_atomic_max:
609 case nir_intrinsic_image_atomic_max:
610 case nir_intrinsic_image_deref_atomic_max:
611 case nir_intrinsic_shared_atomic_imax:
612 case nir_intrinsic_shared_atomic_umax:
613 case nir_intrinsic_ssbo_atomic_imax:
614 case nir_intrinsic_ssbo_atomic_umax:
615 return NV50_IR_SUBOP_ATOM_MAX;
616 case nir_intrinsic_bindless_image_atomic_min:
617 case nir_intrinsic_image_atomic_min:
618 case nir_intrinsic_image_deref_atomic_min:
619 case nir_intrinsic_shared_atomic_imin:
620 case nir_intrinsic_shared_atomic_umin:
621 case nir_intrinsic_ssbo_atomic_imin:
622 case nir_intrinsic_ssbo_atomic_umin:
623 return NV50_IR_SUBOP_ATOM_MIN;
624 case nir_intrinsic_bindless_image_atomic_xor:
625 case nir_intrinsic_image_atomic_xor:
626 case nir_intrinsic_image_deref_atomic_xor:
627 case nir_intrinsic_shared_atomic_xor:
628 case nir_intrinsic_ssbo_atomic_xor:
629 return NV50_IR_SUBOP_ATOM_XOR;
630
631 case nir_intrinsic_group_memory_barrier:
632 case nir_intrinsic_memory_barrier:
633 case nir_intrinsic_memory_barrier_atomic_counter:
634 case nir_intrinsic_memory_barrier_buffer:
635 case nir_intrinsic_memory_barrier_image:
636 return NV50_IR_SUBOP_MEMBAR(M, GL);
637 case nir_intrinsic_memory_barrier_shared:
638 return NV50_IR_SUBOP_MEMBAR(M, CTA);
639
640 case nir_intrinsic_vote_all:
641 return NV50_IR_SUBOP_VOTE_ALL;
642 case nir_intrinsic_vote_any:
643 return NV50_IR_SUBOP_VOTE_ANY;
644 case nir_intrinsic_vote_ieq:
645 return NV50_IR_SUBOP_VOTE_UNI;
646 default:
647 return 0;
648 }
649 }
650
651 CondCode
652 Converter::getCondCode(nir_op op)
653 {
654 switch (op) {
655 case nir_op_feq32:
656 case nir_op_ieq32:
657 return CC_EQ;
658 case nir_op_fge32:
659 case nir_op_ige32:
660 case nir_op_uge32:
661 return CC_GE;
662 case nir_op_flt32:
663 case nir_op_ilt32:
664 case nir_op_ult32:
665 return CC_LT;
666 case nir_op_fne32:
667 return CC_NEU;
668 case nir_op_ine32:
669 return CC_NE;
670 default:
671 ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
672 assert(false);
673 return CC_FL;
674 }
675 }
676
677 Converter::LValues&
678 Converter::convert(nir_alu_dest *dest)
679 {
680 return convert(&dest->dest);
681 }
682
683 Converter::LValues&
684 Converter::convert(nir_dest *dest)
685 {
686 if (dest->is_ssa)
687 return convert(&dest->ssa);
688 if (dest->reg.indirect) {
689 ERROR("no support for indirects.");
690 assert(false);
691 }
692 return convert(dest->reg.reg);
693 }
694
695 Converter::LValues&
696 Converter::convert(nir_register *reg)
697 {
698 NirDefMap::iterator it = regDefs.find(reg->index);
699 if (it != regDefs.end())
700 return it->second;
701
702 LValues newDef(reg->num_components);
703 for (uint8_t i = 0; i < reg->num_components; i++)
704 newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
705 return regDefs[reg->index] = newDef;
706 }
707
708 Converter::LValues&
709 Converter::convert(nir_ssa_def *def)
710 {
711 NirDefMap::iterator it = ssaDefs.find(def->index);
712 if (it != ssaDefs.end())
713 return it->second;
714
715 LValues newDef(def->num_components);
716 for (uint8_t i = 0; i < def->num_components; i++)
717 newDef[i] = getSSA(std::max(4, def->bit_size / 8));
718 return ssaDefs[def->index] = newDef;
719 }
720
721 Value*
722 Converter::getSrc(nir_alu_src *src, uint8_t component)
723 {
724 if (src->abs || src->negate) {
725 ERROR("modifiers currently not supported on nir_alu_src\n");
726 assert(false);
727 }
728 return getSrc(&src->src, src->swizzle[component]);
729 }
730
731 Value*
732 Converter::getSrc(nir_register *reg, uint8_t idx)
733 {
734 NirDefMap::iterator it = regDefs.find(reg->index);
735 if (it == regDefs.end())
736 return convert(reg)[idx];
737 return it->second[idx];
738 }
739
740 Value*
741 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
742 {
743 if (src->is_ssa)
744 return getSrc(src->ssa, idx);
745
746 if (src->reg.indirect) {
747 if (indirect)
748 return getSrc(src->reg.indirect, idx);
749 ERROR("no support for indirects.");
750 assert(false);
751 return NULL;
752 }
753
754 return getSrc(src->reg.reg, idx);
755 }
756
757 Value*
758 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
759 {
760 ImmediateMap::iterator iit = immediates.find(src->index);
761 if (iit != immediates.end())
762 return convert((*iit).second, idx);
763
764 NirDefMap::iterator it = ssaDefs.find(src->index);
765 if (it == ssaDefs.end()) {
766 ERROR("SSA value %u not found\n", src->index);
767 assert(false);
768 return NULL;
769 }
770 return it->second[idx];
771 }
772
773 uint32_t
774 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
775 {
776 nir_const_value *offset = nir_src_as_const_value(*src);
777
778 if (offset) {
779 indirect = NULL;
780 return offset[0].u32;
781 }
782
783 indirect = getSrc(src, idx, true);
784 return 0;
785 }
786
787 uint32_t
788 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect)
789 {
790 int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
791 if (indirect)
792 indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
793 return idx;
794 }
795
796 static void
797 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
798 {
799 assert(name && index);
800
801 if (slot >= VERT_ATTRIB_MAX) {
802 ERROR("invalid varying slot %u\n", slot);
803 assert(false);
804 return;
805 }
806
807 if (slot >= VERT_ATTRIB_GENERIC0 &&
808 slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
809 *name = TGSI_SEMANTIC_GENERIC;
810 *index = slot - VERT_ATTRIB_GENERIC0;
811 return;
812 }
813
814 if (slot >= VERT_ATTRIB_TEX0 &&
815 slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
816 *name = TGSI_SEMANTIC_TEXCOORD;
817 *index = slot - VERT_ATTRIB_TEX0;
818 return;
819 }
820
821 switch (slot) {
822 case VERT_ATTRIB_COLOR0:
823 *name = TGSI_SEMANTIC_COLOR;
824 *index = 0;
825 break;
826 case VERT_ATTRIB_COLOR1:
827 *name = TGSI_SEMANTIC_COLOR;
828 *index = 1;
829 break;
830 case VERT_ATTRIB_EDGEFLAG:
831 *name = TGSI_SEMANTIC_EDGEFLAG;
832 *index = 0;
833 break;
834 case VERT_ATTRIB_FOG:
835 *name = TGSI_SEMANTIC_FOG;
836 *index = 0;
837 break;
838 case VERT_ATTRIB_NORMAL:
839 *name = TGSI_SEMANTIC_NORMAL;
840 *index = 0;
841 break;
842 case VERT_ATTRIB_POS:
843 *name = TGSI_SEMANTIC_POSITION;
844 *index = 0;
845 break;
846 case VERT_ATTRIB_POINT_SIZE:
847 *name = TGSI_SEMANTIC_PSIZE;
848 *index = 0;
849 break;
850 default:
851 ERROR("unknown vert attrib slot %u\n", slot);
852 assert(false);
853 break;
854 }
855 }
856
857 static void
858 varying_slot_to_tgsi_semantic(gl_varying_slot slot, unsigned *name, unsigned *index)
859 {
860 assert(name && index);
861
862 if (slot >= VARYING_SLOT_TESS_MAX) {
863 ERROR("invalid varying slot %u\n", slot);
864 assert(false);
865 return;
866 }
867
868 if (slot >= VARYING_SLOT_PATCH0) {
869 *name = TGSI_SEMANTIC_PATCH;
870 *index = slot - VARYING_SLOT_PATCH0;
871 return;
872 }
873
874 if (slot >= VARYING_SLOT_VAR0) {
875 *name = TGSI_SEMANTIC_GENERIC;
876 *index = slot - VARYING_SLOT_VAR0;
877 return;
878 }
879
880 if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
881 *name = TGSI_SEMANTIC_TEXCOORD;
882 *index = slot - VARYING_SLOT_TEX0;
883 return;
884 }
885
886 switch (slot) {
887 case VARYING_SLOT_BFC0:
888 *name = TGSI_SEMANTIC_BCOLOR;
889 *index = 0;
890 break;
891 case VARYING_SLOT_BFC1:
892 *name = TGSI_SEMANTIC_BCOLOR;
893 *index = 1;
894 break;
895 case VARYING_SLOT_CLIP_DIST0:
896 *name = TGSI_SEMANTIC_CLIPDIST;
897 *index = 0;
898 break;
899 case VARYING_SLOT_CLIP_DIST1:
900 *name = TGSI_SEMANTIC_CLIPDIST;
901 *index = 1;
902 break;
903 case VARYING_SLOT_CLIP_VERTEX:
904 *name = TGSI_SEMANTIC_CLIPVERTEX;
905 *index = 0;
906 break;
907 case VARYING_SLOT_COL0:
908 *name = TGSI_SEMANTIC_COLOR;
909 *index = 0;
910 break;
911 case VARYING_SLOT_COL1:
912 *name = TGSI_SEMANTIC_COLOR;
913 *index = 1;
914 break;
915 case VARYING_SLOT_EDGE:
916 *name = TGSI_SEMANTIC_EDGEFLAG;
917 *index = 0;
918 break;
919 case VARYING_SLOT_FACE:
920 *name = TGSI_SEMANTIC_FACE;
921 *index = 0;
922 break;
923 case VARYING_SLOT_FOGC:
924 *name = TGSI_SEMANTIC_FOG;
925 *index = 0;
926 break;
927 case VARYING_SLOT_LAYER:
928 *name = TGSI_SEMANTIC_LAYER;
929 *index = 0;
930 break;
931 case VARYING_SLOT_PNTC:
932 *name = TGSI_SEMANTIC_PCOORD;
933 *index = 0;
934 break;
935 case VARYING_SLOT_POS:
936 *name = TGSI_SEMANTIC_POSITION;
937 *index = 0;
938 break;
939 case VARYING_SLOT_PRIMITIVE_ID:
940 *name = TGSI_SEMANTIC_PRIMID;
941 *index = 0;
942 break;
943 case VARYING_SLOT_PSIZ:
944 *name = TGSI_SEMANTIC_PSIZE;
945 *index = 0;
946 break;
947 case VARYING_SLOT_TESS_LEVEL_INNER:
948 *name = TGSI_SEMANTIC_TESSINNER;
949 *index = 0;
950 break;
951 case VARYING_SLOT_TESS_LEVEL_OUTER:
952 *name = TGSI_SEMANTIC_TESSOUTER;
953 *index = 0;
954 break;
955 case VARYING_SLOT_VIEWPORT:
956 *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
957 *index = 0;
958 break;
959 default:
960 ERROR("unknown varying slot %u\n", slot);
961 assert(false);
962 break;
963 }
964 }
965
966 static void
967 frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
968 {
969 if (slot >= FRAG_RESULT_DATA0) {
970 *name = TGSI_SEMANTIC_COLOR;
971 *index = slot - FRAG_RESULT_COLOR - 2; // intentional
972 return;
973 }
974
975 switch (slot) {
976 case FRAG_RESULT_COLOR:
977 *name = TGSI_SEMANTIC_COLOR;
978 *index = 0;
979 break;
980 case FRAG_RESULT_DEPTH:
981 *name = TGSI_SEMANTIC_POSITION;
982 *index = 0;
983 break;
984 case FRAG_RESULT_SAMPLE_MASK:
985 *name = TGSI_SEMANTIC_SAMPLEMASK;
986 *index = 0;
987 break;
988 default:
989 ERROR("unknown frag result slot %u\n", slot);
990 assert(false);
991 break;
992 }
993 }
994
995 // copy of _mesa_sysval_to_semantic
996 static void
997 system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
998 {
999 *index = 0;
1000 switch (val) {
1001 // Vertex shader
1002 case SYSTEM_VALUE_VERTEX_ID:
1003 *name = TGSI_SEMANTIC_VERTEXID;
1004 break;
1005 case SYSTEM_VALUE_INSTANCE_ID:
1006 *name = TGSI_SEMANTIC_INSTANCEID;
1007 break;
1008 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
1009 *name = TGSI_SEMANTIC_VERTEXID_NOBASE;
1010 break;
1011 case SYSTEM_VALUE_BASE_VERTEX:
1012 *name = TGSI_SEMANTIC_BASEVERTEX;
1013 break;
1014 case SYSTEM_VALUE_BASE_INSTANCE:
1015 *name = TGSI_SEMANTIC_BASEINSTANCE;
1016 break;
1017 case SYSTEM_VALUE_DRAW_ID:
1018 *name = TGSI_SEMANTIC_DRAWID;
1019 break;
1020
1021 // Geometry shader
1022 case SYSTEM_VALUE_INVOCATION_ID:
1023 *name = TGSI_SEMANTIC_INVOCATIONID;
1024 break;
1025
1026 // Fragment shader
1027 case SYSTEM_VALUE_FRAG_COORD:
1028 *name = TGSI_SEMANTIC_POSITION;
1029 break;
1030 case SYSTEM_VALUE_FRONT_FACE:
1031 *name = TGSI_SEMANTIC_FACE;
1032 break;
1033 case SYSTEM_VALUE_SAMPLE_ID:
1034 *name = TGSI_SEMANTIC_SAMPLEID;
1035 break;
1036 case SYSTEM_VALUE_SAMPLE_POS:
1037 *name = TGSI_SEMANTIC_SAMPLEPOS;
1038 break;
1039 case SYSTEM_VALUE_SAMPLE_MASK_IN:
1040 *name = TGSI_SEMANTIC_SAMPLEMASK;
1041 break;
1042 case SYSTEM_VALUE_HELPER_INVOCATION:
1043 *name = TGSI_SEMANTIC_HELPER_INVOCATION;
1044 break;
1045
1046 // Tessellation shader
1047 case SYSTEM_VALUE_TESS_COORD:
1048 *name = TGSI_SEMANTIC_TESSCOORD;
1049 break;
1050 case SYSTEM_VALUE_VERTICES_IN:
1051 *name = TGSI_SEMANTIC_VERTICESIN;
1052 break;
1053 case SYSTEM_VALUE_PRIMITIVE_ID:
1054 *name = TGSI_SEMANTIC_PRIMID;
1055 break;
1056 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1057 *name = TGSI_SEMANTIC_TESSOUTER;
1058 break;
1059 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1060 *name = TGSI_SEMANTIC_TESSINNER;
1061 break;
1062
1063 // Compute shader
1064 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
1065 *name = TGSI_SEMANTIC_THREAD_ID;
1066 break;
1067 case SYSTEM_VALUE_WORK_GROUP_ID:
1068 *name = TGSI_SEMANTIC_BLOCK_ID;
1069 break;
1070 case SYSTEM_VALUE_NUM_WORK_GROUPS:
1071 *name = TGSI_SEMANTIC_GRID_SIZE;
1072 break;
1073 case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
1074 *name = TGSI_SEMANTIC_BLOCK_SIZE;
1075 break;
1076
1077 // ARB_shader_ballot
1078 case SYSTEM_VALUE_SUBGROUP_SIZE:
1079 *name = TGSI_SEMANTIC_SUBGROUP_SIZE;
1080 break;
1081 case SYSTEM_VALUE_SUBGROUP_INVOCATION:
1082 *name = TGSI_SEMANTIC_SUBGROUP_INVOCATION;
1083 break;
1084 case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
1085 *name = TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
1086 break;
1087 case SYSTEM_VALUE_SUBGROUP_GE_MASK:
1088 *name = TGSI_SEMANTIC_SUBGROUP_GE_MASK;
1089 break;
1090 case SYSTEM_VALUE_SUBGROUP_GT_MASK:
1091 *name = TGSI_SEMANTIC_SUBGROUP_GT_MASK;
1092 break;
1093 case SYSTEM_VALUE_SUBGROUP_LE_MASK:
1094 *name = TGSI_SEMANTIC_SUBGROUP_LE_MASK;
1095 break;
1096 case SYSTEM_VALUE_SUBGROUP_LT_MASK:
1097 *name = TGSI_SEMANTIC_SUBGROUP_LT_MASK;
1098 break;
1099
1100 default:
1101 ERROR("unknown system value %u\n", val);
1102 assert(false);
1103 break;
1104 }
1105 }
1106
1107 void
1108 Converter::setInterpolate(nv50_ir_varying *var,
1109 uint8_t mode,
1110 bool centroid,
1111 unsigned semantic)
1112 {
1113 switch (mode) {
1114 case INTERP_MODE_FLAT:
1115 var->flat = 1;
1116 break;
1117 case INTERP_MODE_NONE:
1118 if (semantic == TGSI_SEMANTIC_COLOR)
1119 var->sc = 1;
1120 else if (semantic == TGSI_SEMANTIC_POSITION)
1121 var->linear = 1;
1122 break;
1123 case INTERP_MODE_NOPERSPECTIVE:
1124 var->linear = 1;
1125 break;
1126 case INTERP_MODE_SMOOTH:
1127 break;
1128 }
1129 var->centroid = centroid;
1130 }
1131
1132 static uint16_t
1133 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
1134 bool input, const nir_variable *var)
1135 {
1136 if (!type->is_array())
1137 return type->count_attribute_slots(false);
1138
1139 uint16_t slots;
1140 switch (stage) {
1141 case Program::TYPE_GEOMETRY:
1142 slots = type->uniform_locations();
1143 if (input)
1144 slots /= info.gs.vertices_in;
1145 break;
1146 case Program::TYPE_TESSELLATION_CONTROL:
1147 case Program::TYPE_TESSELLATION_EVAL:
1148 // remove first dimension
1149 if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
1150 slots = type->uniform_locations();
1151 else
1152 slots = type->fields.array->uniform_locations();
1153 break;
1154 default:
1155 slots = type->count_attribute_slots(false);
1156 break;
1157 }
1158
1159 return slots;
1160 }
1161
1162 bool Converter::assignSlots() {
1163 unsigned name;
1164 unsigned index;
1165
1166 info->io.viewportId = -1;
1167 info->numInputs = 0;
1168
1169 // we have to fixup the uniform locations for arrays
1170 unsigned numImages = 0;
1171 nir_foreach_variable(var, &nir->uniforms) {
1172 const glsl_type *type = var->type;
1173 if (!type->without_array()->is_image())
1174 continue;
1175 var->data.driver_location = numImages;
1176 numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
1177 }
1178
1179 nir_foreach_variable(var, &nir->inputs) {
1180 const glsl_type *type = var->type;
1181 int slot = var->data.location;
1182 uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
1183 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1184 : type->component_slots();
1185 uint32_t frac = var->data.location_frac;
1186 uint32_t vary = var->data.driver_location;
1187
1188 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1189 if (comp > 2)
1190 slots *= 2;
1191 }
1192
1193 assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
1194
1195 switch(prog->getType()) {
1196 case Program::TYPE_FRAGMENT:
1197 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1198 for (uint16_t i = 0; i < slots; ++i) {
1199 setInterpolate(&info->in[vary + i], var->data.interpolation,
1200 var->data.centroid | var->data.sample, name);
1201 }
1202 break;
1203 case Program::TYPE_GEOMETRY:
1204 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1205 break;
1206 case Program::TYPE_TESSELLATION_CONTROL:
1207 case Program::TYPE_TESSELLATION_EVAL:
1208 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1209 if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
1210 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1211 break;
1212 case Program::TYPE_VERTEX:
1213 vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1214 switch (name) {
1215 case TGSI_SEMANTIC_EDGEFLAG:
1216 info->io.edgeFlagIn = vary;
1217 break;
1218 default:
1219 break;
1220 }
1221 break;
1222 default:
1223 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1224 return false;
1225 }
1226
1227 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1228 info->in[vary].id = vary;
1229 info->in[vary].patch = var->data.patch;
1230 info->in[vary].sn = name;
1231 info->in[vary].si = index + i;
1232 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1233 if (i & 0x1)
1234 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1235 else
1236 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1237 else
1238 info->in[vary].mask |= ((1 << comp) - 1) << frac;
1239 }
1240 info->numInputs = std::max<uint8_t>(info->numInputs, vary);
1241 }
1242
1243 info->numOutputs = 0;
1244 nir_foreach_variable(var, &nir->outputs) {
1245 const glsl_type *type = var->type;
1246 int slot = var->data.location;
1247 uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1248 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1249 : type->component_slots();
1250 uint32_t frac = var->data.location_frac;
1251 uint32_t vary = var->data.driver_location;
1252
1253 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1254 if (comp > 2)
1255 slots *= 2;
1256 }
1257
1258 assert(vary < PIPE_MAX_SHADER_OUTPUTS);
1259
1260 switch(prog->getType()) {
1261 case Program::TYPE_FRAGMENT:
1262 frag_result_to_tgsi_semantic((gl_frag_result)slot, &name, &index);
1263 switch (name) {
1264 case TGSI_SEMANTIC_COLOR:
1265 if (!var->data.fb_fetch_output)
1266 info->prop.fp.numColourResults++;
1267 info->prop.fp.separateFragData = true;
1268 // sometimes we get FRAG_RESULT_DATAX with data.index 0
1269 // sometimes we get FRAG_RESULT_DATA0 with data.index X
1270 index = index == 0 ? var->data.index : index;
1271 break;
1272 case TGSI_SEMANTIC_POSITION:
1273 info->io.fragDepth = vary;
1274 info->prop.fp.writesDepth = true;
1275 break;
1276 case TGSI_SEMANTIC_SAMPLEMASK:
1277 info->io.sampleMask = vary;
1278 break;
1279 default:
1280 break;
1281 }
1282 break;
1283 case Program::TYPE_GEOMETRY:
1284 case Program::TYPE_TESSELLATION_CONTROL:
1285 case Program::TYPE_TESSELLATION_EVAL:
1286 case Program::TYPE_VERTEX:
1287 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1288
1289 if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1290 name != TGSI_SEMANTIC_TESSOUTER)
1291 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1292
1293 switch (name) {
1294 case TGSI_SEMANTIC_CLIPDIST:
1295 info->io.genUserClip = -1;
1296 break;
1297 case TGSI_SEMANTIC_CLIPVERTEX:
1298 clipVertexOutput = vary;
1299 break;
1300 case TGSI_SEMANTIC_EDGEFLAG:
1301 info->io.edgeFlagOut = vary;
1302 break;
1303 case TGSI_SEMANTIC_POSITION:
1304 if (clipVertexOutput < 0)
1305 clipVertexOutput = vary;
1306 break;
1307 default:
1308 break;
1309 }
1310 break;
1311 default:
1312 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1313 return false;
1314 }
1315
1316 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1317 info->out[vary].id = vary;
1318 info->out[vary].patch = var->data.patch;
1319 info->out[vary].sn = name;
1320 info->out[vary].si = index + i;
1321 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1322 if (i & 0x1)
1323 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1324 else
1325 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1326 else
1327 info->out[vary].mask |= ((1 << comp) - 1) << frac;
1328
1329 if (nir->info.outputs_read & 1ull << slot)
1330 info->out[vary].oread = 1;
1331 }
1332 info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
1333 }
1334
1335 info->numSysVals = 0;
1336 for (uint8_t i = 0; i < SYSTEM_VALUE_MAX; ++i) {
1337 if (!(nir->info.system_values_read & 1ull << i))
1338 continue;
1339
1340 system_val_to_tgsi_semantic(i, &name, &index);
1341 info->sv[info->numSysVals].sn = name;
1342 info->sv[info->numSysVals].si = index;
1343 info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
1344
1345 switch (i) {
1346 case SYSTEM_VALUE_INSTANCE_ID:
1347 info->io.instanceId = info->numSysVals;
1348 break;
1349 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1350 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1351 info->sv[info->numSysVals].patch = 1;
1352 break;
1353 case SYSTEM_VALUE_VERTEX_ID:
1354 info->io.vertexId = info->numSysVals;
1355 break;
1356 default:
1357 break;
1358 }
1359
1360 info->numSysVals += 1;
1361 }
1362
1363 if (info->io.genUserClip > 0) {
1364 info->io.clipDistances = info->io.genUserClip;
1365
1366 const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1367
1368 for (unsigned int n = 0; n < nOut; ++n) {
1369 unsigned int i = info->numOutputs++;
1370 info->out[i].id = i;
1371 info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1372 info->out[i].si = n;
1373 info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1374 }
1375 }
1376
1377 return info->assignSlots(info) == 0;
1378 }
1379
1380 uint32_t
1381 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1382 {
1383 DataType ty;
1384 int offset = nir_intrinsic_component(insn);
1385 bool input;
1386
1387 if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1388 ty = getDType(insn);
1389 else
1390 ty = getSType(insn->src[0], false, false);
1391
1392 switch (insn->intrinsic) {
1393 case nir_intrinsic_load_input:
1394 case nir_intrinsic_load_interpolated_input:
1395 case nir_intrinsic_load_per_vertex_input:
1396 input = true;
1397 break;
1398 case nir_intrinsic_load_output:
1399 case nir_intrinsic_load_per_vertex_output:
1400 case nir_intrinsic_store_output:
1401 case nir_intrinsic_store_per_vertex_output:
1402 input = false;
1403 break;
1404 default:
1405 ERROR("unknown intrinsic in getSlotAddress %s",
1406 nir_intrinsic_infos[insn->intrinsic].name);
1407 input = false;
1408 assert(false);
1409 break;
1410 }
1411
1412 if (typeSizeof(ty) == 8) {
1413 slot *= 2;
1414 slot += offset;
1415 if (slot >= 4) {
1416 idx += 1;
1417 slot -= 4;
1418 }
1419 } else {
1420 slot += offset;
1421 }
1422
1423 assert(slot < 4);
1424 assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
1425 assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
1426
1427 const nv50_ir_varying *vary = input ? info->in : info->out;
1428 return vary[idx].slot[slot] * 4;
1429 }
1430
1431 Instruction *
1432 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1433 uint32_t base, uint8_t c, Value *indirect0,
1434 Value *indirect1, bool patch)
1435 {
1436 unsigned int tySize = typeSizeof(ty);
1437
1438 if (tySize == 8 &&
1439 (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
1440 Value *lo = getSSA();
1441 Value *hi = getSSA();
1442
1443 Instruction *loi =
1444 mkLoad(TYPE_U32, lo,
1445 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1446 indirect0);
1447 loi->setIndirect(0, 1, indirect1);
1448 loi->perPatch = patch;
1449
1450 Instruction *hii =
1451 mkLoad(TYPE_U32, hi,
1452 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1453 indirect0);
1454 hii->setIndirect(0, 1, indirect1);
1455 hii->perPatch = patch;
1456
1457 return mkOp2(OP_MERGE, ty, def, lo, hi);
1458 } else {
1459 Instruction *ld =
1460 mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1461 ld->setIndirect(0, 1, indirect1);
1462 ld->perPatch = patch;
1463 return ld;
1464 }
1465 }
1466
1467 void
1468 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1469 DataType ty, Value *src, uint8_t idx, uint8_t c,
1470 Value *indirect0, Value *indirect1)
1471 {
1472 uint8_t size = typeSizeof(ty);
1473 uint32_t address = getSlotAddress(insn, idx, c);
1474
1475 if (size == 8 && indirect0) {
1476 Value *split[2];
1477 mkSplit(split, 4, src);
1478
1479 if (op == OP_EXPORT) {
1480 split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1481 split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1482 }
1483
1484 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1485 split[0])->perPatch = info->out[idx].patch;
1486 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1487 split[1])->perPatch = info->out[idx].patch;
1488 } else {
1489 if (op == OP_EXPORT)
1490 src = mkMov(getSSA(size), src, ty)->getDef(0);
1491 mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1492 src)->perPatch = info->out[idx].patch;
1493 }
1494 }
1495
1496 bool
1497 Converter::parseNIR()
1498 {
1499 info->bin.tlsSpace = 0;
1500 info->io.clipDistances = nir->info.clip_distance_array_size;
1501 info->io.cullDistances = nir->info.cull_distance_array_size;
1502
1503 switch(prog->getType()) {
1504 case Program::TYPE_COMPUTE:
1505 info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
1506 info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
1507 info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
1508 info->bin.smemSize = nir->info.cs.shared_size;
1509 break;
1510 case Program::TYPE_FRAGMENT:
1511 info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1512 info->prop.fp.persampleInvocation =
1513 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
1514 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1515 info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1516 info->prop.fp.readsSampleLocations =
1517 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1518 info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
1519 info->prop.fp.usesSampleMaskIn =
1520 !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
1521 break;
1522 case Program::TYPE_GEOMETRY:
1523 info->prop.gp.inputPrim = nir->info.gs.input_primitive;
1524 info->prop.gp.instanceCount = nir->info.gs.invocations;
1525 info->prop.gp.maxVertices = nir->info.gs.vertices_out;
1526 info->prop.gp.outputPrim = nir->info.gs.output_primitive;
1527 break;
1528 case Program::TYPE_TESSELLATION_CONTROL:
1529 case Program::TYPE_TESSELLATION_EVAL:
1530 if (nir->info.tess.primitive_mode == GL_ISOLINES)
1531 info->prop.tp.domain = GL_LINES;
1532 else
1533 info->prop.tp.domain = nir->info.tess.primitive_mode;
1534 info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1535 info->prop.tp.outputPrim =
1536 nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1537 info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1538 info->prop.tp.winding = !nir->info.tess.ccw;
1539 break;
1540 case Program::TYPE_VERTEX:
1541 info->prop.vp.usesDrawParameters =
1542 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
1543 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
1544 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
1545 break;
1546 default:
1547 break;
1548 }
1549
1550 return true;
1551 }
1552
1553 bool
1554 Converter::visit(nir_function *function)
1555 {
1556 // we only support emiting the main function for now
1557 assert(!strcmp(function->name, "main"));
1558 assert(function->impl);
1559
1560 // usually the blocks will set everything up, but main is special
1561 BasicBlock *entry = new BasicBlock(prog->main);
1562 exit = new BasicBlock(prog->main);
1563 blocks[nir_start_block(function->impl)->index] = entry;
1564 prog->main->setEntry(entry);
1565 prog->main->setExit(exit);
1566
1567 setPosition(entry, true);
1568
1569 if (info->io.genUserClip > 0) {
1570 for (int c = 0; c < 4; ++c)
1571 clipVtx[c] = getScratch();
1572 }
1573
1574 switch (prog->getType()) {
1575 case Program::TYPE_TESSELLATION_CONTROL:
1576 outBase = mkOp2v(
1577 OP_SUB, TYPE_U32, getSSA(),
1578 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1579 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1580 break;
1581 case Program::TYPE_FRAGMENT: {
1582 Symbol *sv = mkSysVal(SV_POSITION, 3);
1583 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1584 fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1585 break;
1586 }
1587 default:
1588 break;
1589 }
1590
1591 nir_foreach_register(reg, &function->impl->registers) {
1592 if (reg->num_array_elems) {
1593 // TODO: packed variables would be nice, but MemoryOpt fails
1594 // replace 4 with reg->num_components
1595 uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8);
1596 regToLmemOffset[reg->index] = info->bin.tlsSpace;
1597 info->bin.tlsSpace += size;
1598 }
1599 }
1600
1601 nir_index_ssa_defs(function->impl);
1602 foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1603 if (!visit(node))
1604 return false;
1605 }
1606
1607 bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1608 setPosition(exit, true);
1609
1610 if ((prog->getType() == Program::TYPE_VERTEX ||
1611 prog->getType() == Program::TYPE_TESSELLATION_EVAL)
1612 && info->io.genUserClip > 0)
1613 handleUserClipPlanes();
1614
1615 // TODO: for non main function this needs to be a OP_RETURN
1616 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1617 return true;
1618 }
1619
1620 bool
1621 Converter::visit(nir_cf_node *node)
1622 {
1623 switch (node->type) {
1624 case nir_cf_node_block:
1625 return visit(nir_cf_node_as_block(node));
1626 case nir_cf_node_if:
1627 return visit(nir_cf_node_as_if(node));
1628 case nir_cf_node_loop:
1629 return visit(nir_cf_node_as_loop(node));
1630 default:
1631 ERROR("unknown nir_cf_node type %u\n", node->type);
1632 return false;
1633 }
1634 }
1635
1636 bool
1637 Converter::visit(nir_block *block)
1638 {
1639 if (!block->predecessors->entries && block->instr_list.is_empty())
1640 return true;
1641
1642 BasicBlock *bb = convert(block);
1643
1644 setPosition(bb, true);
1645 nir_foreach_instr(insn, block) {
1646 if (!visit(insn))
1647 return false;
1648 }
1649 return true;
1650 }
1651
1652 bool
1653 Converter::visit(nir_if *nif)
1654 {
1655 DataType sType = getSType(nif->condition, false, false);
1656 Value *src = getSrc(&nif->condition, 0);
1657
1658 nir_block *lastThen = nir_if_last_then_block(nif);
1659 nir_block *lastElse = nir_if_last_else_block(nif);
1660
1661 assert(!lastThen->successors[1]);
1662 assert(!lastElse->successors[1]);
1663
1664 BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1665 BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1666
1667 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1668 bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1669
1670 // we only insert joinats, if both nodes end up at the end of the if again.
1671 // the reason for this to not happens are breaks/continues/ret/... which
1672 // have their own handling
1673 if (lastThen->successors[0] == lastElse->successors[0])
1674 bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
1675 CC_ALWAYS, NULL);
1676
1677 mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1678
1679 foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1680 if (!visit(node))
1681 return false;
1682 }
1683 setPosition(convert(lastThen), true);
1684 if (!bb->getExit() ||
1685 !bb->getExit()->asFlow() ||
1686 bb->getExit()->asFlow()->op == OP_JOIN) {
1687 BasicBlock *tailBB = convert(lastThen->successors[0]);
1688 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1689 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1690 }
1691
1692 foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1693 if (!visit(node))
1694 return false;
1695 }
1696 setPosition(convert(lastElse), true);
1697 if (!bb->getExit() ||
1698 !bb->getExit()->asFlow() ||
1699 bb->getExit()->asFlow()->op == OP_JOIN) {
1700 BasicBlock *tailBB = convert(lastElse->successors[0]);
1701 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1702 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1703 }
1704
1705 if (lastThen->successors[0] == lastElse->successors[0]) {
1706 setPosition(convert(lastThen->successors[0]), true);
1707 mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1708 }
1709
1710 return true;
1711 }
1712
1713 bool
1714 Converter::visit(nir_loop *loop)
1715 {
1716 curLoopDepth += 1;
1717 func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1718
1719 BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1720 BasicBlock *tailBB =
1721 convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1722 bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1723
1724 mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1725 setPosition(loopBB, false);
1726 mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1727
1728 foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1729 if (!visit(node))
1730 return false;
1731 }
1732 Instruction *insn = bb->getExit();
1733 if (bb->cfg.incidentCount() != 0) {
1734 if (!insn || !insn->asFlow()) {
1735 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1736 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1737 } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
1738 tailBB->cfg.incidentCount() == 0) {
1739 // RA doesn't like having blocks around with no incident edge,
1740 // so we create a fake one to make it happy
1741 bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1742 }
1743 }
1744
1745 curLoopDepth -= 1;
1746
1747 return true;
1748 }
1749
1750 bool
1751 Converter::visit(nir_instr *insn)
1752 {
1753 // we need an insertion point for on the fly generated immediate loads
1754 immInsertPos = bb->getExit();
1755 switch (insn->type) {
1756 case nir_instr_type_alu:
1757 return visit(nir_instr_as_alu(insn));
1758 case nir_instr_type_deref:
1759 return visit(nir_instr_as_deref(insn));
1760 case nir_instr_type_intrinsic:
1761 return visit(nir_instr_as_intrinsic(insn));
1762 case nir_instr_type_jump:
1763 return visit(nir_instr_as_jump(insn));
1764 case nir_instr_type_load_const:
1765 return visit(nir_instr_as_load_const(insn));
1766 case nir_instr_type_ssa_undef:
1767 return visit(nir_instr_as_ssa_undef(insn));
1768 case nir_instr_type_tex:
1769 return visit(nir_instr_as_tex(insn));
1770 default:
1771 ERROR("unknown nir_instr type %u\n", insn->type);
1772 return false;
1773 }
1774 return true;
1775 }
1776
1777 SVSemantic
1778 Converter::convert(nir_intrinsic_op intr)
1779 {
1780 switch (intr) {
1781 case nir_intrinsic_load_base_vertex:
1782 return SV_BASEVERTEX;
1783 case nir_intrinsic_load_base_instance:
1784 return SV_BASEINSTANCE;
1785 case nir_intrinsic_load_draw_id:
1786 return SV_DRAWID;
1787 case nir_intrinsic_load_front_face:
1788 return SV_FACE;
1789 case nir_intrinsic_load_helper_invocation:
1790 return SV_THREAD_KILL;
1791 case nir_intrinsic_load_instance_id:
1792 return SV_INSTANCE_ID;
1793 case nir_intrinsic_load_invocation_id:
1794 return SV_INVOCATION_ID;
1795 case nir_intrinsic_load_local_group_size:
1796 return SV_NTID;
1797 case nir_intrinsic_load_local_invocation_id:
1798 return SV_TID;
1799 case nir_intrinsic_load_num_work_groups:
1800 return SV_NCTAID;
1801 case nir_intrinsic_load_patch_vertices_in:
1802 return SV_VERTEX_COUNT;
1803 case nir_intrinsic_load_primitive_id:
1804 return SV_PRIMITIVE_ID;
1805 case nir_intrinsic_load_sample_id:
1806 return SV_SAMPLE_INDEX;
1807 case nir_intrinsic_load_sample_mask_in:
1808 return SV_SAMPLE_MASK;
1809 case nir_intrinsic_load_sample_pos:
1810 return SV_SAMPLE_POS;
1811 case nir_intrinsic_load_subgroup_eq_mask:
1812 return SV_LANEMASK_EQ;
1813 case nir_intrinsic_load_subgroup_ge_mask:
1814 return SV_LANEMASK_GE;
1815 case nir_intrinsic_load_subgroup_gt_mask:
1816 return SV_LANEMASK_GT;
1817 case nir_intrinsic_load_subgroup_le_mask:
1818 return SV_LANEMASK_LE;
1819 case nir_intrinsic_load_subgroup_lt_mask:
1820 return SV_LANEMASK_LT;
1821 case nir_intrinsic_load_subgroup_invocation:
1822 return SV_LANEID;
1823 case nir_intrinsic_load_tess_coord:
1824 return SV_TESS_COORD;
1825 case nir_intrinsic_load_tess_level_inner:
1826 return SV_TESS_INNER;
1827 case nir_intrinsic_load_tess_level_outer:
1828 return SV_TESS_OUTER;
1829 case nir_intrinsic_load_vertex_id:
1830 return SV_VERTEX_ID;
1831 case nir_intrinsic_load_work_group_id:
1832 return SV_CTAID;
1833 default:
1834 ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1835 nir_intrinsic_infos[intr].name);
1836 assert(false);
1837 return SV_LAST;
1838 }
1839 }
1840
1841 ImgFormat
1842 Converter::convertGLImgFormat(GLuint format)
1843 {
1844 #define FMT_CASE(a, b) \
1845 case GL_ ## a: return nv50_ir::FMT_ ## b
1846
1847 switch (format) {
1848 FMT_CASE(NONE, NONE);
1849
1850 FMT_CASE(RGBA32F, RGBA32F);
1851 FMT_CASE(RGBA16F, RGBA16F);
1852 FMT_CASE(RG32F, RG32F);
1853 FMT_CASE(RG16F, RG16F);
1854 FMT_CASE(R11F_G11F_B10F, R11G11B10F);
1855 FMT_CASE(R32F, R32F);
1856 FMT_CASE(R16F, R16F);
1857
1858 FMT_CASE(RGBA32UI, RGBA32UI);
1859 FMT_CASE(RGBA16UI, RGBA16UI);
1860 FMT_CASE(RGB10_A2UI, RGB10A2UI);
1861 FMT_CASE(RGBA8UI, RGBA8UI);
1862 FMT_CASE(RG32UI, RG32UI);
1863 FMT_CASE(RG16UI, RG16UI);
1864 FMT_CASE(RG8UI, RG8UI);
1865 FMT_CASE(R32UI, R32UI);
1866 FMT_CASE(R16UI, R16UI);
1867 FMT_CASE(R8UI, R8UI);
1868
1869 FMT_CASE(RGBA32I, RGBA32I);
1870 FMT_CASE(RGBA16I, RGBA16I);
1871 FMT_CASE(RGBA8I, RGBA8I);
1872 FMT_CASE(RG32I, RG32I);
1873 FMT_CASE(RG16I, RG16I);
1874 FMT_CASE(RG8I, RG8I);
1875 FMT_CASE(R32I, R32I);
1876 FMT_CASE(R16I, R16I);
1877 FMT_CASE(R8I, R8I);
1878
1879 FMT_CASE(RGBA16, RGBA16);
1880 FMT_CASE(RGB10_A2, RGB10A2);
1881 FMT_CASE(RGBA8, RGBA8);
1882 FMT_CASE(RG16, RG16);
1883 FMT_CASE(RG8, RG8);
1884 FMT_CASE(R16, R16);
1885 FMT_CASE(R8, R8);
1886
1887 FMT_CASE(RGBA16_SNORM, RGBA16_SNORM);
1888 FMT_CASE(RGBA8_SNORM, RGBA8_SNORM);
1889 FMT_CASE(RG16_SNORM, RG16_SNORM);
1890 FMT_CASE(RG8_SNORM, RG8_SNORM);
1891 FMT_CASE(R16_SNORM, R16_SNORM);
1892 FMT_CASE(R8_SNORM, R8_SNORM);
1893
1894 FMT_CASE(BGRA_INTEGER, BGRA8);
1895 default:
1896 ERROR("unknown format %x\n", format);
1897 assert(false);
1898 return nv50_ir::FMT_NONE;
1899 }
1900 #undef FMT_CASE
1901 }
1902
1903 bool
1904 Converter::visit(nir_intrinsic_instr *insn)
1905 {
1906 nir_intrinsic_op op = insn->intrinsic;
1907 const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op];
1908
1909 switch (op) {
1910 case nir_intrinsic_load_uniform: {
1911 LValues &newDefs = convert(&insn->dest);
1912 const DataType dType = getDType(insn);
1913 Value *indirect;
1914 uint32_t coffset = getIndirect(insn, 0, 0, indirect);
1915 for (uint8_t i = 0; i < insn->num_components; ++i) {
1916 loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
1917 }
1918 break;
1919 }
1920 case nir_intrinsic_store_output:
1921 case nir_intrinsic_store_per_vertex_output: {
1922 Value *indirect;
1923 DataType dType = getSType(insn->src[0], false, false);
1924 uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1925
1926 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1927 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1928 continue;
1929
1930 uint8_t offset = 0;
1931 Value *src = getSrc(&insn->src[0], i);
1932 switch (prog->getType()) {
1933 case Program::TYPE_FRAGMENT: {
1934 if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1935 // TGSI uses a different interface than NIR, TGSI stores that
1936 // value in the z component, NIR in X
1937 offset += 2;
1938 src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1939 }
1940 break;
1941 }
1942 case Program::TYPE_GEOMETRY:
1943 case Program::TYPE_VERTEX: {
1944 if (info->io.genUserClip > 0 && idx == clipVertexOutput) {
1945 mkMov(clipVtx[i], src);
1946 src = clipVtx[i];
1947 }
1948 break;
1949 }
1950 default:
1951 break;
1952 }
1953
1954 storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1955 }
1956 break;
1957 }
1958 case nir_intrinsic_load_input:
1959 case nir_intrinsic_load_interpolated_input:
1960 case nir_intrinsic_load_output: {
1961 LValues &newDefs = convert(&insn->dest);
1962
1963 // FBFetch
1964 if (prog->getType() == Program::TYPE_FRAGMENT &&
1965 op == nir_intrinsic_load_output) {
1966 std::vector<Value*> defs, srcs;
1967 uint8_t mask = 0;
1968
1969 srcs.push_back(getSSA());
1970 srcs.push_back(getSSA());
1971 Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1972 Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1973 mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1974 mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1975
1976 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1977 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1978
1979 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1980 defs.push_back(newDefs[i]);
1981 mask |= 1 << i;
1982 }
1983
1984 TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
1985 texi->tex.levelZero = 1;
1986 texi->tex.mask = mask;
1987 texi->tex.useOffsets = 0;
1988 texi->tex.r = 0xffff;
1989 texi->tex.s = 0xffff;
1990
1991 info->prop.fp.readsFramebuffer = true;
1992 break;
1993 }
1994
1995 const DataType dType = getDType(insn);
1996 Value *indirect;
1997 bool input = op != nir_intrinsic_load_output;
1998 operation nvirOp;
1999 uint32_t mode = 0;
2000
2001 uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
2002 nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
2003
2004 // see load_barycentric_* handling
2005 if (prog->getType() == Program::TYPE_FRAGMENT) {
2006 mode = translateInterpMode(&vary, nvirOp);
2007 if (op == nir_intrinsic_load_interpolated_input) {
2008 ImmediateValue immMode;
2009 if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
2010 mode |= immMode.reg.data.u32;
2011 }
2012 }
2013
2014 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2015 uint32_t address = getSlotAddress(insn, idx, i);
2016 Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
2017 if (prog->getType() == Program::TYPE_FRAGMENT) {
2018 int s = 1;
2019 if (typeSizeof(dType) == 8) {
2020 Value *lo = getSSA();
2021 Value *hi = getSSA();
2022 Instruction *interp;
2023
2024 interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
2025 if (nvirOp == OP_PINTERP)
2026 interp->setSrc(s++, fp.position);
2027 if (mode & NV50_IR_INTERP_OFFSET)
2028 interp->setSrc(s++, getSrc(&insn->src[0], 0));
2029 interp->setInterpolate(mode);
2030 interp->setIndirect(0, 0, indirect);
2031
2032 Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
2033 interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
2034 if (nvirOp == OP_PINTERP)
2035 interp->setSrc(s++, fp.position);
2036 if (mode & NV50_IR_INTERP_OFFSET)
2037 interp->setSrc(s++, getSrc(&insn->src[0], 0));
2038 interp->setInterpolate(mode);
2039 interp->setIndirect(0, 0, indirect);
2040
2041 mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
2042 } else {
2043 Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
2044 if (nvirOp == OP_PINTERP)
2045 interp->setSrc(s++, fp.position);
2046 if (mode & NV50_IR_INTERP_OFFSET)
2047 interp->setSrc(s++, getSrc(&insn->src[0], 0));
2048 interp->setInterpolate(mode);
2049 interp->setIndirect(0, 0, indirect);
2050 }
2051 } else {
2052 mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
2053 }
2054 }
2055 break;
2056 }
2057 case nir_intrinsic_load_barycentric_at_offset:
2058 case nir_intrinsic_load_barycentric_at_sample:
2059 case nir_intrinsic_load_barycentric_centroid:
2060 case nir_intrinsic_load_barycentric_pixel:
2061 case nir_intrinsic_load_barycentric_sample: {
2062 LValues &newDefs = convert(&insn->dest);
2063 uint32_t mode;
2064
2065 if (op == nir_intrinsic_load_barycentric_centroid ||
2066 op == nir_intrinsic_load_barycentric_sample) {
2067 mode = NV50_IR_INTERP_CENTROID;
2068 } else if (op == nir_intrinsic_load_barycentric_at_offset) {
2069 Value *offs[2];
2070 for (uint8_t c = 0; c < 2; c++) {
2071 offs[c] = getScratch();
2072 mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
2073 mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
2074 mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
2075 mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
2076 }
2077 mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
2078
2079 mode = NV50_IR_INTERP_OFFSET;
2080 } else if (op == nir_intrinsic_load_barycentric_pixel) {
2081 mode = NV50_IR_INTERP_DEFAULT;
2082 } else if (op == nir_intrinsic_load_barycentric_at_sample) {
2083 info->prop.fp.readsSampleLocations = true;
2084 mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
2085 mode = NV50_IR_INTERP_OFFSET;
2086 } else {
2087 unreachable("all intrinsics already handled above");
2088 }
2089
2090 loadImm(newDefs[1], mode);
2091 break;
2092 }
2093 case nir_intrinsic_discard:
2094 mkOp(OP_DISCARD, TYPE_NONE, NULL);
2095 break;
2096 case nir_intrinsic_discard_if: {
2097 Value *pred = getSSA(1, FILE_PREDICATE);
2098 if (insn->num_components > 1) {
2099 ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
2100 assert(false);
2101 return false;
2102 }
2103 mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2104 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
2105 break;
2106 }
2107 case nir_intrinsic_load_base_vertex:
2108 case nir_intrinsic_load_base_instance:
2109 case nir_intrinsic_load_draw_id:
2110 case nir_intrinsic_load_front_face:
2111 case nir_intrinsic_load_helper_invocation:
2112 case nir_intrinsic_load_instance_id:
2113 case nir_intrinsic_load_invocation_id:
2114 case nir_intrinsic_load_local_group_size:
2115 case nir_intrinsic_load_local_invocation_id:
2116 case nir_intrinsic_load_num_work_groups:
2117 case nir_intrinsic_load_patch_vertices_in:
2118 case nir_intrinsic_load_primitive_id:
2119 case nir_intrinsic_load_sample_id:
2120 case nir_intrinsic_load_sample_mask_in:
2121 case nir_intrinsic_load_sample_pos:
2122 case nir_intrinsic_load_subgroup_eq_mask:
2123 case nir_intrinsic_load_subgroup_ge_mask:
2124 case nir_intrinsic_load_subgroup_gt_mask:
2125 case nir_intrinsic_load_subgroup_le_mask:
2126 case nir_intrinsic_load_subgroup_lt_mask:
2127 case nir_intrinsic_load_subgroup_invocation:
2128 case nir_intrinsic_load_tess_coord:
2129 case nir_intrinsic_load_tess_level_inner:
2130 case nir_intrinsic_load_tess_level_outer:
2131 case nir_intrinsic_load_vertex_id:
2132 case nir_intrinsic_load_work_group_id: {
2133 const DataType dType = getDType(insn);
2134 SVSemantic sv = convert(op);
2135 LValues &newDefs = convert(&insn->dest);
2136
2137 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2138 Value *def;
2139 if (typeSizeof(dType) == 8)
2140 def = getSSA();
2141 else
2142 def = newDefs[i];
2143
2144 if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
2145 loadImm(def, 0u);
2146 } else {
2147 Symbol *sym = mkSysVal(sv, i);
2148 Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
2149 if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
2150 rdsv->perPatch = 1;
2151 }
2152
2153 if (typeSizeof(dType) == 8)
2154 mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
2155 }
2156 break;
2157 }
2158 // constants
2159 case nir_intrinsic_load_subgroup_size: {
2160 LValues &newDefs = convert(&insn->dest);
2161 loadImm(newDefs[0], 32u);
2162 break;
2163 }
2164 case nir_intrinsic_vote_all:
2165 case nir_intrinsic_vote_any:
2166 case nir_intrinsic_vote_ieq: {
2167 LValues &newDefs = convert(&insn->dest);
2168 Value *pred = getScratch(1, FILE_PREDICATE);
2169 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2170 mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
2171 mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
2172 break;
2173 }
2174 case nir_intrinsic_ballot: {
2175 LValues &newDefs = convert(&insn->dest);
2176 Value *pred = getSSA(1, FILE_PREDICATE);
2177 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2178 mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;
2179 break;
2180 }
2181 case nir_intrinsic_read_first_invocation:
2182 case nir_intrinsic_read_invocation: {
2183 LValues &newDefs = convert(&insn->dest);
2184 const DataType dType = getDType(insn);
2185 Value *tmp = getScratch();
2186
2187 if (op == nir_intrinsic_read_first_invocation) {
2188 mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
2189 mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2190 mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2191 } else
2192 tmp = getSrc(&insn->src[1], 0);
2193
2194 for (uint8_t i = 0; i < insn->num_components; ++i) {
2195 mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
2196 ->subOp = NV50_IR_SUBOP_SHFL_IDX;
2197 }
2198 break;
2199 }
2200 case nir_intrinsic_load_per_vertex_input: {
2201 const DataType dType = getDType(insn);
2202 LValues &newDefs = convert(&insn->dest);
2203 Value *indirectVertex;
2204 Value *indirectOffset;
2205 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
2206 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2207
2208 Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
2209 mkImm(baseVertex), indirectVertex);
2210 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2211 uint32_t address = getSlotAddress(insn, idx, i);
2212 loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
2213 indirectOffset, vtxBase, info->in[idx].patch);
2214 }
2215 break;
2216 }
2217 case nir_intrinsic_load_per_vertex_output: {
2218 const DataType dType = getDType(insn);
2219 LValues &newDefs = convert(&insn->dest);
2220 Value *indirectVertex;
2221 Value *indirectOffset;
2222 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
2223 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2224 Value *vtxBase = NULL;
2225
2226 if (indirectVertex)
2227 vtxBase = indirectVertex;
2228 else
2229 vtxBase = loadImm(NULL, baseVertex);
2230
2231 vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase);
2232
2233 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2234 uint32_t address = getSlotAddress(insn, idx, i);
2235 loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
2236 indirectOffset, vtxBase, info->in[idx].patch);
2237 }
2238 break;
2239 }
2240 case nir_intrinsic_emit_vertex:
2241 if (info->io.genUserClip > 0)
2242 handleUserClipPlanes();
2243 // fallthrough
2244 case nir_intrinsic_end_primitive: {
2245 uint32_t idx = nir_intrinsic_stream_id(insn);
2246 mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
2247 break;
2248 }
2249 case nir_intrinsic_load_ubo: {
2250 const DataType dType = getDType(insn);
2251 LValues &newDefs = convert(&insn->dest);
2252 Value *indirectIndex;
2253 Value *indirectOffset;
2254 uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1;
2255 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2256
2257 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2258 loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
2259 indirectOffset, indirectIndex);
2260 }
2261 break;
2262 }
2263 case nir_intrinsic_get_buffer_size: {
2264 LValues &newDefs = convert(&insn->dest);
2265 const DataType dType = getDType(insn);
2266 Value *indirectBuffer;
2267 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2268
2269 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0);
2270 mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, indirectBuffer);
2271 break;
2272 }
2273 case nir_intrinsic_store_ssbo: {
2274 DataType sType = getSType(insn->src[0], false, false);
2275 Value *indirectBuffer;
2276 Value *indirectOffset;
2277 uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer);
2278 uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset);
2279
2280 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2281 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2282 continue;
2283 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
2284 offset + i * typeSizeof(sType));
2285 mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i))
2286 ->setIndirect(0, 1, indirectBuffer);
2287 }
2288 info->io.globalAccess |= 0x2;
2289 break;
2290 }
2291 case nir_intrinsic_load_ssbo: {
2292 const DataType dType = getDType(insn);
2293 LValues &newDefs = convert(&insn->dest);
2294 Value *indirectBuffer;
2295 Value *indirectOffset;
2296 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2297 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2298
2299 for (uint8_t i = 0u; i < insn->num_components; ++i)
2300 loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
2301 indirectOffset, indirectBuffer);
2302
2303 info->io.globalAccess |= 0x1;
2304 break;
2305 }
2306 case nir_intrinsic_shared_atomic_add:
2307 case nir_intrinsic_shared_atomic_and:
2308 case nir_intrinsic_shared_atomic_comp_swap:
2309 case nir_intrinsic_shared_atomic_exchange:
2310 case nir_intrinsic_shared_atomic_or:
2311 case nir_intrinsic_shared_atomic_imax:
2312 case nir_intrinsic_shared_atomic_imin:
2313 case nir_intrinsic_shared_atomic_umax:
2314 case nir_intrinsic_shared_atomic_umin:
2315 case nir_intrinsic_shared_atomic_xor: {
2316 const DataType dType = getDType(insn);
2317 LValues &newDefs = convert(&insn->dest);
2318 Value *indirectOffset;
2319 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2320 Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, dType, offset);
2321 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2322 if (op == nir_intrinsic_shared_atomic_comp_swap)
2323 atom->setSrc(2, getSrc(&insn->src[2], 0));
2324 atom->setIndirect(0, 0, indirectOffset);
2325 atom->subOp = getSubOp(op);
2326 break;
2327 }
2328 case nir_intrinsic_ssbo_atomic_add:
2329 case nir_intrinsic_ssbo_atomic_and:
2330 case nir_intrinsic_ssbo_atomic_comp_swap:
2331 case nir_intrinsic_ssbo_atomic_exchange:
2332 case nir_intrinsic_ssbo_atomic_or:
2333 case nir_intrinsic_ssbo_atomic_imax:
2334 case nir_intrinsic_ssbo_atomic_imin:
2335 case nir_intrinsic_ssbo_atomic_umax:
2336 case nir_intrinsic_ssbo_atomic_umin:
2337 case nir_intrinsic_ssbo_atomic_xor: {
2338 const DataType dType = getDType(insn);
2339 LValues &newDefs = convert(&insn->dest);
2340 Value *indirectBuffer;
2341 Value *indirectOffset;
2342 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2343 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2344
2345 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset);
2346 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym,
2347 getSrc(&insn->src[2], 0));
2348 if (op == nir_intrinsic_ssbo_atomic_comp_swap)
2349 atom->setSrc(2, getSrc(&insn->src[3], 0));
2350 atom->setIndirect(0, 0, indirectOffset);
2351 atom->setIndirect(0, 1, indirectBuffer);
2352 atom->subOp = getSubOp(op);
2353
2354 info->io.globalAccess |= 0x2;
2355 break;
2356 }
2357 case nir_intrinsic_bindless_image_atomic_add:
2358 case nir_intrinsic_bindless_image_atomic_and:
2359 case nir_intrinsic_bindless_image_atomic_comp_swap:
2360 case nir_intrinsic_bindless_image_atomic_exchange:
2361 case nir_intrinsic_bindless_image_atomic_max:
2362 case nir_intrinsic_bindless_image_atomic_min:
2363 case nir_intrinsic_bindless_image_atomic_or:
2364 case nir_intrinsic_bindless_image_atomic_xor:
2365 case nir_intrinsic_bindless_image_load:
2366 case nir_intrinsic_bindless_image_samples:
2367 case nir_intrinsic_bindless_image_size:
2368 case nir_intrinsic_bindless_image_store: {
2369 std::vector<Value*> srcs, defs;
2370 Value *indirect = getSrc(&insn->src[0], 0);
2371 DataType ty;
2372
2373 uint32_t mask = 0;
2374 TexInstruction::Target target =
2375 convert(nir_intrinsic_image_dim(insn), !!nir_intrinsic_image_array(insn), false);
2376 unsigned int argCount = getNIRArgCount(target);
2377 uint16_t location = 0;
2378
2379 if (opInfo.has_dest) {
2380 LValues &newDefs = convert(&insn->dest);
2381 for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2382 defs.push_back(newDefs[i]);
2383 mask |= 1 << i;
2384 }
2385 }
2386
2387 switch (op) {
2388 case nir_intrinsic_bindless_image_atomic_add:
2389 case nir_intrinsic_bindless_image_atomic_and:
2390 case nir_intrinsic_bindless_image_atomic_comp_swap:
2391 case nir_intrinsic_bindless_image_atomic_exchange:
2392 case nir_intrinsic_bindless_image_atomic_max:
2393 case nir_intrinsic_bindless_image_atomic_min:
2394 case nir_intrinsic_bindless_image_atomic_or:
2395 case nir_intrinsic_bindless_image_atomic_xor:
2396 ty = getDType(insn);
2397 mask = 0x1;
2398 info->io.globalAccess |= 0x2;
2399 break;
2400 case nir_intrinsic_bindless_image_load:
2401 ty = TYPE_U32;
2402 info->io.globalAccess |= 0x1;
2403 break;
2404 case nir_intrinsic_bindless_image_store:
2405 ty = TYPE_U32;
2406 mask = 0xf;
2407 info->io.globalAccess |= 0x2;
2408 break;
2409 case nir_intrinsic_bindless_image_samples:
2410 mask = 0x8;
2411 ty = TYPE_U32;
2412 break;
2413 case nir_intrinsic_bindless_image_size:
2414 ty = TYPE_U32;
2415 break;
2416 default:
2417 unreachable("unhandled image opcode");
2418 break;
2419 }
2420
2421 // coords
2422 if (opInfo.num_srcs >= 2)
2423 for (unsigned int i = 0u; i < argCount; ++i)
2424 srcs.push_back(getSrc(&insn->src[1], i));
2425
2426 // the sampler is just another src added after coords
2427 if (opInfo.num_srcs >= 3 && target.isMS())
2428 srcs.push_back(getSrc(&insn->src[2], 0));
2429
2430 if (opInfo.num_srcs >= 4) {
2431 unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2432 for (uint8_t i = 0u; i < components; ++i)
2433 srcs.push_back(getSrc(&insn->src[3], i));
2434 }
2435
2436 if (opInfo.num_srcs >= 5)
2437 // 1 for aotmic swap
2438 for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2439 srcs.push_back(getSrc(&insn->src[4], i));
2440
2441 TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2442 texi->tex.bindless = false;
2443 texi->tex.format = &nv50_ir::TexInstruction::formatTable[convertGLImgFormat(nir_intrinsic_format(insn))];
2444 texi->tex.mask = mask;
2445 texi->tex.bindless = true;
2446 texi->cache = convert(nir_intrinsic_access(insn));
2447 texi->setType(ty);
2448 texi->subOp = getSubOp(op);
2449
2450 if (indirect)
2451 texi->setIndirectR(indirect);
2452
2453 break;
2454 }
2455 case nir_intrinsic_image_deref_atomic_add:
2456 case nir_intrinsic_image_deref_atomic_and:
2457 case nir_intrinsic_image_deref_atomic_comp_swap:
2458 case nir_intrinsic_image_deref_atomic_exchange:
2459 case nir_intrinsic_image_deref_atomic_max:
2460 case nir_intrinsic_image_deref_atomic_min:
2461 case nir_intrinsic_image_deref_atomic_or:
2462 case nir_intrinsic_image_deref_atomic_xor:
2463 case nir_intrinsic_image_deref_load:
2464 case nir_intrinsic_image_deref_samples:
2465 case nir_intrinsic_image_deref_size:
2466 case nir_intrinsic_image_deref_store: {
2467 const nir_variable *tex;
2468 std::vector<Value*> srcs, defs;
2469 Value *indirect;
2470 DataType ty;
2471
2472 uint32_t mask = 0;
2473 nir_deref_instr *deref = nir_src_as_deref(insn->src[0]);
2474 const glsl_type *type = deref->type;
2475 TexInstruction::Target target =
2476 convert((glsl_sampler_dim)type->sampler_dimensionality,
2477 type->sampler_array, type->sampler_shadow);
2478 unsigned int argCount = getNIRArgCount(target);
2479 uint16_t location = handleDeref(deref, indirect, tex);
2480
2481 if (opInfo.has_dest) {
2482 LValues &newDefs = convert(&insn->dest);
2483 for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2484 defs.push_back(newDefs[i]);
2485 mask |= 1 << i;
2486 }
2487 }
2488
2489 switch (op) {
2490 case nir_intrinsic_image_deref_atomic_add:
2491 case nir_intrinsic_image_deref_atomic_and:
2492 case nir_intrinsic_image_deref_atomic_comp_swap:
2493 case nir_intrinsic_image_deref_atomic_exchange:
2494 case nir_intrinsic_image_deref_atomic_max:
2495 case nir_intrinsic_image_deref_atomic_min:
2496 case nir_intrinsic_image_deref_atomic_or:
2497 case nir_intrinsic_image_deref_atomic_xor:
2498 ty = getDType(insn);
2499 mask = 0x1;
2500 info->io.globalAccess |= 0x2;
2501 break;
2502 case nir_intrinsic_image_deref_load:
2503 ty = TYPE_U32;
2504 info->io.globalAccess |= 0x1;
2505 break;
2506 case nir_intrinsic_image_deref_store:
2507 ty = TYPE_U32;
2508 mask = 0xf;
2509 info->io.globalAccess |= 0x2;
2510 break;
2511 case nir_intrinsic_image_deref_samples:
2512 mask = 0x8;
2513 ty = TYPE_U32;
2514 break;
2515 case nir_intrinsic_image_deref_size:
2516 ty = TYPE_U32;
2517 break;
2518 default:
2519 unreachable("unhandled image opcode");
2520 break;
2521 }
2522
2523 // coords
2524 if (opInfo.num_srcs >= 2)
2525 for (unsigned int i = 0u; i < argCount; ++i)
2526 srcs.push_back(getSrc(&insn->src[1], i));
2527
2528 // the sampler is just another src added after coords
2529 if (opInfo.num_srcs >= 3 && target.isMS())
2530 srcs.push_back(getSrc(&insn->src[2], 0));
2531
2532 if (opInfo.num_srcs >= 4) {
2533 unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2534 for (uint8_t i = 0u; i < components; ++i)
2535 srcs.push_back(getSrc(&insn->src[3], i));
2536 }
2537
2538 if (opInfo.num_srcs >= 5)
2539 // 1 for aotmic swap
2540 for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2541 srcs.push_back(getSrc(&insn->src[4], i));
2542
2543 TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2544 texi->tex.bindless = false;
2545 texi->tex.format = &nv50_ir::TexInstruction::formatTable[convertGLImgFormat(tex->data.image.format)];
2546 texi->tex.mask = mask;
2547 texi->cache = getCacheModeFromVar(tex);
2548 texi->setType(ty);
2549 texi->subOp = getSubOp(op);
2550
2551 if (indirect)
2552 texi->setIndirectR(indirect);
2553
2554 break;
2555 }
2556 case nir_intrinsic_store_shared: {
2557 DataType sType = getSType(insn->src[0], false, false);
2558 Value *indirectOffset;
2559 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2560
2561 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2562 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2563 continue;
2564 Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, sType, offset + i * typeSizeof(sType));
2565 mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i));
2566 }
2567 break;
2568 }
2569 case nir_intrinsic_load_shared: {
2570 const DataType dType = getDType(insn);
2571 LValues &newDefs = convert(&insn->dest);
2572 Value *indirectOffset;
2573 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2574
2575 for (uint8_t i = 0u; i < insn->num_components; ++i)
2576 loadFrom(FILE_MEMORY_SHARED, 0, dType, newDefs[i], offset, i, indirectOffset);
2577
2578 break;
2579 }
2580 case nir_intrinsic_barrier: {
2581 // TODO: add flag to shader_info
2582 info->numBarriers = 1;
2583 Instruction *bar = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
2584 bar->fixed = 1;
2585 bar->subOp = NV50_IR_SUBOP_BAR_SYNC;
2586 break;
2587 }
2588 case nir_intrinsic_group_memory_barrier:
2589 case nir_intrinsic_memory_barrier:
2590 case nir_intrinsic_memory_barrier_atomic_counter:
2591 case nir_intrinsic_memory_barrier_buffer:
2592 case nir_intrinsic_memory_barrier_image:
2593 case nir_intrinsic_memory_barrier_shared: {
2594 Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
2595 bar->fixed = 1;
2596 bar->subOp = getSubOp(op);
2597 break;
2598 }
2599 case nir_intrinsic_shader_clock: {
2600 const DataType dType = getDType(insn);
2601 LValues &newDefs = convert(&insn->dest);
2602
2603 loadImm(newDefs[0], 0u);
2604 mkOp1(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
2605 break;
2606 }
2607 default:
2608 ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
2609 return false;
2610 }
2611
2612 return true;
2613 }
2614
2615 bool
2616 Converter::visit(nir_jump_instr *insn)
2617 {
2618 switch (insn->type) {
2619 case nir_jump_return:
2620 // TODO: this only works in the main function
2621 mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
2622 bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
2623 break;
2624 case nir_jump_break:
2625 case nir_jump_continue: {
2626 bool isBreak = insn->type == nir_jump_break;
2627 nir_block *block = insn->instr.block;
2628 assert(!block->successors[1]);
2629 BasicBlock *target = convert(block->successors[0]);
2630 mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
2631 bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
2632 break;
2633 }
2634 default:
2635 ERROR("unknown nir_jump_type %u\n", insn->type);
2636 return false;
2637 }
2638
2639 return true;
2640 }
2641
2642 Value*
2643 Converter::convert(nir_load_const_instr *insn, uint8_t idx)
2644 {
2645 Value *val;
2646
2647 if (immInsertPos)
2648 setPosition(immInsertPos, true);
2649 else
2650 setPosition(bb, false);
2651
2652 switch (insn->def.bit_size) {
2653 case 64:
2654 val = loadImm(getSSA(8), insn->value[idx].u64);
2655 break;
2656 case 32:
2657 val = loadImm(getSSA(4), insn->value[idx].u32);
2658 break;
2659 case 16:
2660 val = loadImm(getSSA(2), insn->value[idx].u16);
2661 break;
2662 case 8:
2663 val = loadImm(getSSA(1), insn->value[idx].u8);
2664 break;
2665 default:
2666 unreachable("unhandled bit size!\n");
2667 }
2668 setPosition(bb, true);
2669 return val;
2670 }
2671
2672 bool
2673 Converter::visit(nir_load_const_instr *insn)
2674 {
2675 assert(insn->def.bit_size <= 64);
2676 immediates[insn->def.index] = insn;
2677 return true;
2678 }
2679
2680 #define DEFAULT_CHECKS \
2681 if (insn->dest.dest.ssa.num_components > 1) { \
2682 ERROR("nir_alu_instr only supported with 1 component!\n"); \
2683 return false; \
2684 } \
2685 if (insn->dest.write_mask != 1) { \
2686 ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2687 return false; \
2688 }
2689 bool
2690 Converter::visit(nir_alu_instr *insn)
2691 {
2692 const nir_op op = insn->op;
2693 const nir_op_info &info = nir_op_infos[op];
2694 DataType dType = getDType(insn);
2695 const std::vector<DataType> sTypes = getSTypes(insn);
2696
2697 Instruction *oldPos = this->bb->getExit();
2698
2699 switch (op) {
2700 case nir_op_fabs:
2701 case nir_op_iabs:
2702 case nir_op_fadd:
2703 case nir_op_iadd:
2704 case nir_op_iand:
2705 case nir_op_fceil:
2706 case nir_op_fcos:
2707 case nir_op_fddx:
2708 case nir_op_fddx_coarse:
2709 case nir_op_fddx_fine:
2710 case nir_op_fddy:
2711 case nir_op_fddy_coarse:
2712 case nir_op_fddy_fine:
2713 case nir_op_fdiv:
2714 case nir_op_idiv:
2715 case nir_op_udiv:
2716 case nir_op_fexp2:
2717 case nir_op_ffloor:
2718 case nir_op_ffma:
2719 case nir_op_flog2:
2720 case nir_op_fmax:
2721 case nir_op_imax:
2722 case nir_op_umax:
2723 case nir_op_fmin:
2724 case nir_op_imin:
2725 case nir_op_umin:
2726 case nir_op_fmod:
2727 case nir_op_imod:
2728 case nir_op_umod:
2729 case nir_op_fmul:
2730 case nir_op_imul:
2731 case nir_op_imul_high:
2732 case nir_op_umul_high:
2733 case nir_op_fneg:
2734 case nir_op_ineg:
2735 case nir_op_inot:
2736 case nir_op_ior:
2737 case nir_op_pack_64_2x32_split:
2738 case nir_op_fpow:
2739 case nir_op_frcp:
2740 case nir_op_frem:
2741 case nir_op_irem:
2742 case nir_op_frsq:
2743 case nir_op_fsat:
2744 case nir_op_ishr:
2745 case nir_op_ushr:
2746 case nir_op_fsin:
2747 case nir_op_fsqrt:
2748 case nir_op_fsub:
2749 case nir_op_isub:
2750 case nir_op_ftrunc:
2751 case nir_op_ishl:
2752 case nir_op_ixor: {
2753 DEFAULT_CHECKS;
2754 LValues &newDefs = convert(&insn->dest);
2755 operation preOp = preOperationNeeded(op);
2756 if (preOp != OP_NOP) {
2757 assert(info.num_inputs < 2);
2758 Value *tmp = getSSA(typeSizeof(dType));
2759 Instruction *i0 = mkOp(preOp, dType, tmp);
2760 Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2761 if (info.num_inputs) {
2762 i0->setSrc(0, getSrc(&insn->src[0]));
2763 i1->setSrc(0, tmp);
2764 }
2765 i1->subOp = getSubOp(op);
2766 } else {
2767 Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2768 for (unsigned s = 0u; s < info.num_inputs; ++s) {
2769 i->setSrc(s, getSrc(&insn->src[s]));
2770 }
2771 i->subOp = getSubOp(op);
2772 }
2773 break;
2774 }
2775 case nir_op_ifind_msb:
2776 case nir_op_ufind_msb: {
2777 DEFAULT_CHECKS;
2778 LValues &newDefs = convert(&insn->dest);
2779 dType = sTypes[0];
2780 mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2781 break;
2782 }
2783 case nir_op_fround_even: {
2784 DEFAULT_CHECKS;
2785 LValues &newDefs = convert(&insn->dest);
2786 mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2787 break;
2788 }
2789 // convert instructions
2790 case nir_op_f2f32:
2791 case nir_op_f2i32:
2792 case nir_op_f2u32:
2793 case nir_op_i2f32:
2794 case nir_op_i2i32:
2795 case nir_op_u2f32:
2796 case nir_op_u2u32:
2797 case nir_op_f2f64:
2798 case nir_op_f2i64:
2799 case nir_op_f2u64:
2800 case nir_op_i2f64:
2801 case nir_op_i2i64:
2802 case nir_op_u2f64:
2803 case nir_op_u2u64: {
2804 DEFAULT_CHECKS;
2805 LValues &newDefs = convert(&insn->dest);
2806 Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2807 if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
2808 i->rnd = ROUND_Z;
2809 i->sType = sTypes[0];
2810 break;
2811 }
2812 // compare instructions
2813 case nir_op_feq32:
2814 case nir_op_ieq32:
2815 case nir_op_fge32:
2816 case nir_op_ige32:
2817 case nir_op_uge32:
2818 case nir_op_flt32:
2819 case nir_op_ilt32:
2820 case nir_op_ult32:
2821 case nir_op_fne32:
2822 case nir_op_ine32: {
2823 DEFAULT_CHECKS;
2824 LValues &newDefs = convert(&insn->dest);
2825 Instruction *i = mkCmp(getOperation(op),
2826 getCondCode(op),
2827 dType,
2828 newDefs[0],
2829 dType,
2830 getSrc(&insn->src[0]),
2831 getSrc(&insn->src[1]));
2832 if (info.num_inputs == 3)
2833 i->setSrc(2, getSrc(&insn->src[2]));
2834 i->sType = sTypes[0];
2835 break;
2836 }
2837 // those are weird ALU ops and need special handling, because
2838 // 1. they are always componend based
2839 // 2. they basically just merge multiple values into one data type
2840 case nir_op_mov:
2841 if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) {
2842 nir_reg_dest& reg = insn->dest.dest.reg;
2843 uint32_t goffset = regToLmemOffset[reg.reg->index];
2844 uint8_t comps = reg.reg->num_components;
2845 uint8_t size = reg.reg->bit_size / 8;
2846 uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2847 uint32_t aoffset = csize * reg.base_offset;
2848 Value *indirect = NULL;
2849
2850 if (reg.indirect)
2851 indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS),
2852 getSrc(reg.indirect, 0), mkImm(csize));
2853
2854 for (uint8_t i = 0u; i < comps; ++i) {
2855 if (!((1u << i) & insn->dest.write_mask))
2856 continue;
2857
2858 Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + aoffset + i * size);
2859 mkStore(OP_STORE, dType, sym, indirect, getSrc(&insn->src[0], i));
2860 }
2861 break;
2862 } else if (!insn->src[0].src.is_ssa && insn->src[0].src.reg.reg->num_array_elems) {
2863 LValues &newDefs = convert(&insn->dest);
2864 nir_reg_src& reg = insn->src[0].src.reg;
2865 uint32_t goffset = regToLmemOffset[reg.reg->index];
2866 // uint8_t comps = reg.reg->num_components;
2867 uint8_t size = reg.reg->bit_size / 8;
2868 uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2869 uint32_t aoffset = csize * reg.base_offset;
2870 Value *indirect = NULL;
2871
2872 if (reg.indirect)
2873 indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), getSrc(reg.indirect, 0), mkImm(csize));
2874
2875 for (uint8_t i = 0u; i < newDefs.size(); ++i)
2876 loadFrom(FILE_MEMORY_LOCAL, 0, dType, newDefs[i], goffset + aoffset, i, indirect);
2877
2878 break;
2879 } else {
2880 LValues &newDefs = convert(&insn->dest);
2881 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2882 mkMov(newDefs[c], getSrc(&insn->src[0], c), dType);
2883 }
2884 }
2885 break;
2886 case nir_op_vec2:
2887 case nir_op_vec3:
2888 case nir_op_vec4: {
2889 LValues &newDefs = convert(&insn->dest);
2890 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2891 mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2892 }
2893 break;
2894 }
2895 // (un)pack
2896 case nir_op_pack_64_2x32: {
2897 LValues &newDefs = convert(&insn->dest);
2898 Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2899 merge->setSrc(0, getSrc(&insn->src[0], 0));
2900 merge->setSrc(1, getSrc(&insn->src[0], 1));
2901 break;
2902 }
2903 case nir_op_pack_half_2x16_split: {
2904 LValues &newDefs = convert(&insn->dest);
2905 Value *tmpH = getSSA();
2906 Value *tmpL = getSSA();
2907
2908 mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2909 mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2910 mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2911 break;
2912 }
2913 case nir_op_unpack_half_2x16_split_x:
2914 case nir_op_unpack_half_2x16_split_y: {
2915 LValues &newDefs = convert(&insn->dest);
2916 Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2917 if (op == nir_op_unpack_half_2x16_split_y)
2918 cvt->subOp = 1;
2919 break;
2920 }
2921 case nir_op_unpack_64_2x32: {
2922 LValues &newDefs = convert(&insn->dest);
2923 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2924 break;
2925 }
2926 case nir_op_unpack_64_2x32_split_x: {
2927 LValues &newDefs = convert(&insn->dest);
2928 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
2929 break;
2930 }
2931 case nir_op_unpack_64_2x32_split_y: {
2932 LValues &newDefs = convert(&insn->dest);
2933 mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
2934 break;
2935 }
2936 // special instructions
2937 case nir_op_fsign:
2938 case nir_op_isign: {
2939 DEFAULT_CHECKS;
2940 DataType iType;
2941 if (::isFloatType(dType))
2942 iType = TYPE_F32;
2943 else
2944 iType = TYPE_S32;
2945
2946 LValues &newDefs = convert(&insn->dest);
2947 LValue *val0 = getScratch();
2948 LValue *val1 = getScratch();
2949 mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
2950 mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
2951
2952 if (dType == TYPE_F64) {
2953 mkOp2(OP_SUB, iType, val0, val0, val1);
2954 mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
2955 } else if (dType == TYPE_S64 || dType == TYPE_U64) {
2956 mkOp2(OP_SUB, iType, val0, val1, val0);
2957 mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
2958 mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
2959 } else if (::isFloatType(dType))
2960 mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
2961 else
2962 mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
2963 break;
2964 }
2965 case nir_op_fcsel:
2966 case nir_op_b32csel: {
2967 DEFAULT_CHECKS;
2968 LValues &newDefs = convert(&insn->dest);
2969 mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
2970 break;
2971 }
2972 case nir_op_ibitfield_extract:
2973 case nir_op_ubitfield_extract: {
2974 DEFAULT_CHECKS;
2975 Value *tmp = getSSA();
2976 LValues &newDefs = convert(&insn->dest);
2977 mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2978 mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
2979 break;
2980 }
2981 case nir_op_bfm: {
2982 DEFAULT_CHECKS;
2983 LValues &newDefs = convert(&insn->dest);
2984 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2985 break;
2986 }
2987 case nir_op_bitfield_insert: {
2988 DEFAULT_CHECKS;
2989 LValues &newDefs = convert(&insn->dest);
2990 LValue *temp = getSSA();
2991 mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
2992 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
2993 break;
2994 }
2995 case nir_op_bit_count: {
2996 DEFAULT_CHECKS;
2997 LValues &newDefs = convert(&insn->dest);
2998 mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
2999 break;
3000 }
3001 case nir_op_bitfield_reverse: {
3002 DEFAULT_CHECKS;
3003 LValues &newDefs = convert(&insn->dest);
3004 mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
3005 break;
3006 }
3007 case nir_op_find_lsb: {
3008 DEFAULT_CHECKS;
3009 LValues &newDefs = convert(&insn->dest);
3010 Value *tmp = getSSA();
3011 mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
3012 mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
3013 break;
3014 }
3015 // boolean conversions
3016 case nir_op_b2f32: {
3017 DEFAULT_CHECKS;
3018 LValues &newDefs = convert(&insn->dest);
3019 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
3020 break;
3021 }
3022 case nir_op_b2f64: {
3023 DEFAULT_CHECKS;
3024 LValues &newDefs = convert(&insn->dest);
3025 Value *tmp = getSSA(4);
3026 mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
3027 mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
3028 break;
3029 }
3030 case nir_op_f2b32:
3031 case nir_op_i2b32: {
3032 DEFAULT_CHECKS;
3033 LValues &newDefs = convert(&insn->dest);
3034 Value *src1;
3035 if (typeSizeof(sTypes[0]) == 8) {
3036 src1 = loadImm(getSSA(8), 0.0);
3037 } else {
3038 src1 = zero;
3039 }
3040 CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
3041 mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
3042 break;
3043 }
3044 case nir_op_b2i32: {
3045 DEFAULT_CHECKS;
3046 LValues &newDefs = convert(&insn->dest);
3047 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
3048 break;
3049 }
3050 case nir_op_b2i64: {
3051 DEFAULT_CHECKS;
3052 LValues &newDefs = convert(&insn->dest);
3053 LValue *def = getScratch();
3054 mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
3055 mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
3056 break;
3057 }
3058 default:
3059 ERROR("unknown nir_op %s\n", info.name);
3060 return false;
3061 }
3062
3063 if (!oldPos) {
3064 oldPos = this->bb->getEntry();
3065 oldPos->precise = insn->exact;
3066 }
3067
3068 if (unlikely(!oldPos))
3069 return true;
3070
3071 while (oldPos->next) {
3072 oldPos = oldPos->next;
3073 oldPos->precise = insn->exact;
3074 }
3075 oldPos->saturate = insn->dest.saturate;
3076
3077 return true;
3078 }
3079 #undef DEFAULT_CHECKS
3080
3081 bool
3082 Converter::visit(nir_ssa_undef_instr *insn)
3083 {
3084 LValues &newDefs = convert(&insn->def);
3085 for (uint8_t i = 0u; i < insn->def.num_components; ++i) {
3086 mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
3087 }
3088 return true;
3089 }
3090
3091 #define CASE_SAMPLER(ty) \
3092 case GLSL_SAMPLER_DIM_ ## ty : \
3093 if (isArray && !isShadow) \
3094 return TEX_TARGET_ ## ty ## _ARRAY; \
3095 else if (!isArray && isShadow) \
3096 return TEX_TARGET_## ty ## _SHADOW; \
3097 else if (isArray && isShadow) \
3098 return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
3099 else \
3100 return TEX_TARGET_ ## ty
3101
3102 TexTarget
3103 Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
3104 {
3105 switch (dim) {
3106 CASE_SAMPLER(1D);
3107 CASE_SAMPLER(2D);
3108 CASE_SAMPLER(CUBE);
3109 case GLSL_SAMPLER_DIM_3D:
3110 return TEX_TARGET_3D;
3111 case GLSL_SAMPLER_DIM_MS:
3112 if (isArray)
3113 return TEX_TARGET_2D_MS_ARRAY;
3114 return TEX_TARGET_2D_MS;
3115 case GLSL_SAMPLER_DIM_RECT:
3116 if (isShadow)
3117 return TEX_TARGET_RECT_SHADOW;
3118 return TEX_TARGET_RECT;
3119 case GLSL_SAMPLER_DIM_BUF:
3120 return TEX_TARGET_BUFFER;
3121 case GLSL_SAMPLER_DIM_EXTERNAL:
3122 return TEX_TARGET_2D;
3123 default:
3124 ERROR("unknown glsl_sampler_dim %u\n", dim);
3125 assert(false);
3126 return TEX_TARGET_COUNT;
3127 }
3128 }
3129 #undef CASE_SAMPLER
3130
3131 Value*
3132 Converter::applyProjection(Value *src, Value *proj)
3133 {
3134 if (!proj)
3135 return src;
3136 return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj);
3137 }
3138
3139 unsigned int
3140 Converter::getNIRArgCount(TexInstruction::Target& target)
3141 {
3142 unsigned int result = target.getArgCount();
3143 if (target.isCube() && target.isArray())
3144 result--;
3145 if (target.isMS())
3146 result--;
3147 return result;
3148 }
3149
3150 uint16_t
3151 Converter::handleDeref(nir_deref_instr *deref, Value * &indirect, const nir_variable * &tex)
3152 {
3153 typedef std::pair<uint32_t,Value*> DerefPair;
3154 std::list<DerefPair> derefs;
3155
3156 uint16_t result = 0;
3157 while (deref->deref_type != nir_deref_type_var) {
3158 switch (deref->deref_type) {
3159 case nir_deref_type_array: {
3160 Value *indirect;
3161 uint8_t size = type_size(deref->type, true);
3162 result += size * getIndirect(&deref->arr.index, 0, indirect);
3163
3164 if (indirect) {
3165 derefs.push_front(std::make_pair(size, indirect));
3166 }
3167
3168 break;
3169 }
3170 case nir_deref_type_struct: {
3171 result += nir_deref_instr_parent(deref)->type->struct_location_offset(deref->strct.index);
3172 break;
3173 }
3174 case nir_deref_type_var:
3175 default:
3176 unreachable("nir_deref_type_var reached in handleDeref!");
3177 break;
3178 }
3179 deref = nir_deref_instr_parent(deref);
3180 }
3181
3182 indirect = NULL;
3183 for (std::list<DerefPair>::const_iterator it = derefs.begin(); it != derefs.end(); ++it) {
3184 Value *offset = mkOp2v(OP_MUL, TYPE_U32, getSSA(), loadImm(getSSA(), it->first), it->second);
3185 if (indirect)
3186 indirect = mkOp2v(OP_ADD, TYPE_U32, getSSA(), indirect, offset);
3187 else
3188 indirect = offset;
3189 }
3190
3191 tex = nir_deref_instr_get_variable(deref);
3192 assert(tex);
3193
3194 return result + tex->data.driver_location;
3195 }
3196
3197 CacheMode
3198 Converter::convert(enum gl_access_qualifier access)
3199 {
3200 switch (access) {
3201 case ACCESS_VOLATILE:
3202 return CACHE_CV;
3203 case ACCESS_COHERENT:
3204 return CACHE_CG;
3205 default:
3206 return CACHE_CA;
3207 }
3208 }
3209
3210 CacheMode
3211 Converter::getCacheModeFromVar(const nir_variable *var)
3212 {
3213 return convert(var->data.image.access);
3214 }
3215
3216 bool
3217 Converter::visit(nir_tex_instr *insn)
3218 {
3219 switch (insn->op) {
3220 case nir_texop_lod:
3221 case nir_texop_query_levels:
3222 case nir_texop_tex:
3223 case nir_texop_texture_samples:
3224 case nir_texop_tg4:
3225 case nir_texop_txb:
3226 case nir_texop_txd:
3227 case nir_texop_txf:
3228 case nir_texop_txf_ms:
3229 case nir_texop_txl:
3230 case nir_texop_txs: {
3231 LValues &newDefs = convert(&insn->dest);
3232 std::vector<Value*> srcs;
3233 std::vector<Value*> defs;
3234 std::vector<nir_src*> offsets;
3235 uint8_t mask = 0;
3236 bool lz = false;
3237 Value *proj = NULL;
3238 TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow);
3239 operation op = getOperation(insn->op);
3240
3241 int r, s;
3242 int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias);
3243 int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator);
3244 int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord);
3245 int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx);
3246 int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy);
3247 int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index);
3248 int lodIdx = nir_tex_instr_src_index(insn, nir_tex_src_lod);
3249 int offsetIdx = nir_tex_instr_src_index(insn, nir_tex_src_offset);
3250 int projIdx = nir_tex_instr_src_index(insn, nir_tex_src_projector);
3251 int sampOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_offset);
3252 int texOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_offset);
3253 int sampHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_handle);
3254 int texHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_handle);
3255
3256 bool bindless = sampHandleIdx != -1 || texHandleIdx != -1;
3257 assert((sampHandleIdx != -1) == (texHandleIdx != -1));
3258
3259 if (projIdx != -1)
3260 proj = mkOp1v(OP_RCP, TYPE_F32, getScratch(), getSrc(&insn->src[projIdx].src, 0));
3261
3262 srcs.resize(insn->coord_components);
3263 for (uint8_t i = 0u; i < insn->coord_components; ++i)
3264 srcs[i] = applyProjection(getSrc(&insn->src[coordsIdx].src, i), proj);
3265
3266 // sometimes we get less args than target.getArgCount, but codegen expects the latter
3267 if (insn->coord_components) {
3268 uint32_t argCount = target.getArgCount();
3269
3270 if (target.isMS())
3271 argCount -= 1;
3272
3273 for (uint32_t i = 0u; i < (argCount - insn->coord_components); ++i)
3274 srcs.push_back(getSSA());
3275 }
3276
3277 if (insn->op == nir_texop_texture_samples)
3278 srcs.push_back(zero);
3279 else if (!insn->num_srcs)
3280 srcs.push_back(loadImm(NULL, 0));
3281 if (biasIdx != -1)
3282 srcs.push_back(getSrc(&insn->src[biasIdx].src, 0));
3283 if (lodIdx != -1)
3284 srcs.push_back(getSrc(&insn->src[lodIdx].src, 0));
3285 else if (op == OP_TXF)
3286 lz = true;
3287 if (msIdx != -1)
3288 srcs.push_back(getSrc(&insn->src[msIdx].src, 0));
3289 if (offsetIdx != -1)
3290 offsets.push_back(&insn->src[offsetIdx].src);
3291 if (compIdx != -1)
3292 srcs.push_back(applyProjection(getSrc(&insn->src[compIdx].src, 0), proj));
3293 if (texOffIdx != -1) {
3294 srcs.push_back(getSrc(&insn->src[texOffIdx].src, 0));
3295 texOffIdx = srcs.size() - 1;
3296 }
3297 if (sampOffIdx != -1) {
3298 srcs.push_back(getSrc(&insn->src[sampOffIdx].src, 0));
3299 sampOffIdx = srcs.size() - 1;
3300 }
3301 if (bindless) {
3302 // currently we use the lower bits
3303 Value *split[2];
3304 Value *handle = getSrc(&insn->src[sampHandleIdx].src, 0);
3305
3306 mkSplit(split, 4, handle);
3307
3308 srcs.push_back(split[0]);
3309 texOffIdx = srcs.size() - 1;
3310 }
3311
3312 r = bindless ? 0xff : insn->texture_index;
3313 s = bindless ? 0x1f : insn->sampler_index;
3314
3315 defs.resize(newDefs.size());
3316 for (uint8_t d = 0u; d < newDefs.size(); ++d) {
3317 defs[d] = newDefs[d];
3318 mask |= 1 << d;
3319 }
3320 if (target.isMS() || (op == OP_TEX && prog->getType() != Program::TYPE_FRAGMENT))
3321 lz = true;
3322
3323 TexInstruction *texi = mkTex(op, target.getEnum(), r, s, defs, srcs);
3324 texi->tex.levelZero = lz;
3325 texi->tex.mask = mask;
3326 texi->tex.bindless = bindless;
3327
3328 if (texOffIdx != -1)
3329 texi->tex.rIndirectSrc = texOffIdx;
3330 if (sampOffIdx != -1)
3331 texi->tex.sIndirectSrc = sampOffIdx;
3332
3333 switch (insn->op) {
3334 case nir_texop_tg4:
3335 if (!target.isShadow())
3336 texi->tex.gatherComp = insn->component;
3337 break;
3338 case nir_texop_txs:
3339 texi->tex.query = TXQ_DIMS;
3340 break;
3341 case nir_texop_texture_samples:
3342 texi->tex.mask = 0x4;
3343 texi->tex.query = TXQ_TYPE;
3344 break;
3345 case nir_texop_query_levels:
3346 texi->tex.mask = 0x8;
3347 texi->tex.query = TXQ_DIMS;
3348 break;
3349 default:
3350 break;
3351 }
3352
3353 texi->tex.useOffsets = offsets.size();
3354 if (texi->tex.useOffsets) {
3355 for (uint8_t s = 0; s < texi->tex.useOffsets; ++s) {
3356 for (uint32_t c = 0u; c < 3; ++c) {
3357 uint8_t s2 = std::min(c, target.getDim() - 1);
3358 texi->offset[s][c].set(getSrc(offsets[s], s2));
3359 texi->offset[s][c].setInsn(texi);
3360 }
3361 }
3362 }
3363
3364 if (op == OP_TXG && offsetIdx == -1) {
3365 if (nir_tex_instr_has_explicit_tg4_offsets(insn)) {
3366 texi->tex.useOffsets = 4;
3367 setPosition(texi, false);
3368 for (uint8_t i = 0; i < 4; ++i) {
3369 for (uint8_t j = 0; j < 2; ++j) {
3370 texi->offset[i][j].set(loadImm(NULL, insn->tg4_offsets[i][j]));
3371 texi->offset[i][j].setInsn(texi);
3372 }
3373 }
3374 setPosition(texi, true);
3375 }
3376 }
3377
3378 if (ddxIdx != -1 && ddyIdx != -1) {
3379 for (uint8_t c = 0u; c < target.getDim() + target.isCube(); ++c) {
3380 texi->dPdx[c].set(getSrc(&insn->src[ddxIdx].src, c));
3381 texi->dPdy[c].set(getSrc(&insn->src[ddyIdx].src, c));
3382 }
3383 }
3384
3385 break;
3386 }
3387 default:
3388 ERROR("unknown nir_texop %u\n", insn->op);
3389 return false;
3390 }
3391 return true;
3392 }
3393
3394 bool
3395 Converter::visit(nir_deref_instr *deref)
3396 {
3397 // we just ignore those, because images intrinsics are the only place where
3398 // we should end up with deref sources and those have to backtrack anyway
3399 // to get the nir_variable. This code just exists to handle some special
3400 // cases.
3401 switch (deref->deref_type) {
3402 case nir_deref_type_array:
3403 case nir_deref_type_struct:
3404 case nir_deref_type_var:
3405 break;
3406 default:
3407 ERROR("unknown nir_deref_instr %u\n", deref->deref_type);
3408 return false;
3409 }
3410 return true;
3411 }
3412
3413 bool
3414 Converter::run()
3415 {
3416 bool progress;
3417
3418 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
3419 nir_print_shader(nir, stderr);
3420
3421 struct nir_lower_subgroups_options subgroup_options = {
3422 .subgroup_size = 32,
3423 .ballot_bit_size = 32,
3424 };
3425
3426 NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
3427 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
3428 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
3429 NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
3430 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3431 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL);
3432 NIR_PASS_V(nir, nir_lower_phis_to_scalar);
3433
3434 do {
3435 progress = false;
3436 NIR_PASS(progress, nir, nir_copy_prop);
3437 NIR_PASS(progress, nir, nir_opt_remove_phis);
3438 NIR_PASS(progress, nir, nir_opt_trivial_continues);
3439 NIR_PASS(progress, nir, nir_opt_cse);
3440 NIR_PASS(progress, nir, nir_opt_algebraic);
3441 NIR_PASS(progress, nir, nir_opt_constant_folding);
3442 NIR_PASS(progress, nir, nir_copy_prop);
3443 NIR_PASS(progress, nir, nir_opt_dce);
3444 NIR_PASS(progress, nir, nir_opt_dead_cf);
3445 } while (progress);
3446
3447 NIR_PASS_V(nir, nir_lower_bool_to_int32);
3448 NIR_PASS_V(nir, nir_lower_locals_to_regs);
3449 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
3450 NIR_PASS_V(nir, nir_convert_from_ssa, true);
3451
3452 // Garbage collect dead instructions
3453 nir_sweep(nir);
3454
3455 if (!parseNIR()) {
3456 ERROR("Couldn't prase NIR!\n");
3457 return false;
3458 }
3459
3460 if (!assignSlots()) {
3461 ERROR("Couldn't assign slots!\n");
3462 return false;
3463 }
3464
3465 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
3466 nir_print_shader(nir, stderr);
3467
3468 nir_foreach_function(function, nir) {
3469 if (!visit(function))
3470 return false;
3471 }
3472
3473 return true;
3474 }
3475
3476 } // unnamed namespace
3477
3478 namespace nv50_ir {
3479
3480 bool
3481 Program::makeFromNIR(struct nv50_ir_prog_info *info)
3482 {
3483 nir_shader *nir = (nir_shader*)info->bin.source;
3484 Converter converter(this, nir, info);
3485 bool result = converter.run();
3486 if (!result)
3487 return result;
3488 LoweringHelper lowering;
3489 lowering.run(this);
3490 tlsSize = info->bin.tlsSpace;
3491 return result;
3492 }
3493
3494 } // namespace nv50_ir