nir: Add a new memory_barrier_tcs_patch intrinsic
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_from_nir.cpp
1 /*
2 * Copyright 2017 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Karol Herbst <kherbst@redhat.com>
23 */
24
25 #include "compiler/nir/nir.h"
26
27 #include "util/u_debug.h"
28
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_from_common.h"
31 #include "codegen/nv50_ir_lowering_helper.h"
32 #include "codegen/nv50_ir_util.h"
33
34 #if __cplusplus >= 201103L
35 #include <unordered_map>
36 #else
37 #include <tr1/unordered_map>
38 #endif
39 #include <cstring>
40 #include <list>
41 #include <vector>
42
43 namespace {
44
45 #if __cplusplus >= 201103L
46 using std::hash;
47 using std::unordered_map;
48 #else
49 using std::tr1::hash;
50 using std::tr1::unordered_map;
51 #endif
52
53 using namespace nv50_ir;
54
55 int
56 type_size(const struct glsl_type *type, bool bindless)
57 {
58 return glsl_count_attribute_slots(type, false);
59 }
60
61 class Converter : public ConverterCommon
62 {
63 public:
64 Converter(Program *, nir_shader *, nv50_ir_prog_info *);
65
66 bool run();
67 private:
68 typedef std::vector<LValue*> LValues;
69 typedef unordered_map<unsigned, LValues> NirDefMap;
70 typedef unordered_map<unsigned, nir_load_const_instr*> ImmediateMap;
71 typedef unordered_map<unsigned, uint32_t> NirArrayLMemOffsets;
72 typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
73
74 CacheMode convert(enum gl_access_qualifier);
75 TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
76 LValues& convert(nir_alu_dest *);
77 BasicBlock* convert(nir_block *);
78 LValues& convert(nir_dest *);
79 SVSemantic convert(nir_intrinsic_op);
80 Value* convert(nir_load_const_instr*, uint8_t);
81 LValues& convert(nir_register *);
82 LValues& convert(nir_ssa_def *);
83
84 ImgFormat convertGLImgFormat(GLuint);
85
86 Value* getSrc(nir_alu_src *, uint8_t component = 0);
87 Value* getSrc(nir_register *, uint8_t);
88 Value* getSrc(nir_src *, uint8_t, bool indirect = false);
89 Value* getSrc(nir_ssa_def *, uint8_t);
90
91 // returned value is the constant part of the given source (either the
92 // nir_src or the selected source component of an intrinsic). Even though
93 // this is mostly an optimization to be able to skip indirects in a few
94 // cases, sometimes we require immediate values or set some fileds on
95 // instructions (e.g. tex) in order for codegen to consume those.
96 // If the found value has not a constant part, the Value gets returned
97 // through the Value parameter.
98 uint32_t getIndirect(nir_src *, uint8_t, Value *&);
99 // isScalar indicates that the addressing is scalar, vec4 addressing is
100 // assumed otherwise
101 uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&,
102 bool isScalar = false);
103
104 uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
105
106 void setInterpolate(nv50_ir_varying *,
107 uint8_t,
108 bool centroid,
109 unsigned semantics);
110
111 Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
112 uint8_t c, Value *indirect0 = NULL,
113 Value *indirect1 = NULL, bool patch = false);
114 void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
115 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
116 Value *indirect1 = NULL);
117
118 bool isFloatType(nir_alu_type);
119 bool isSignedType(nir_alu_type);
120 bool isResultFloat(nir_op);
121 bool isResultSigned(nir_op);
122
123 DataType getDType(nir_alu_instr *);
124 DataType getDType(nir_intrinsic_instr *);
125 DataType getDType(nir_intrinsic_instr *, bool isSigned);
126 DataType getDType(nir_op, uint8_t);
127
128 std::vector<DataType> getSTypes(nir_alu_instr *);
129 DataType getSType(nir_src &, bool isFloat, bool isSigned);
130
131 operation getOperation(nir_intrinsic_op);
132 operation getOperation(nir_op);
133 operation getOperation(nir_texop);
134 operation preOperationNeeded(nir_op);
135
136 int getSubOp(nir_intrinsic_op);
137 int getSubOp(nir_op);
138
139 CondCode getCondCode(nir_op);
140
141 bool assignSlots();
142 bool parseNIR();
143
144 bool visit(nir_alu_instr *);
145 bool visit(nir_block *);
146 bool visit(nir_cf_node *);
147 bool visit(nir_deref_instr *);
148 bool visit(nir_function *);
149 bool visit(nir_if *);
150 bool visit(nir_instr *);
151 bool visit(nir_intrinsic_instr *);
152 bool visit(nir_jump_instr *);
153 bool visit(nir_load_const_instr*);
154 bool visit(nir_loop *);
155 bool visit(nir_ssa_undef_instr *);
156 bool visit(nir_tex_instr *);
157
158 // tex stuff
159 Value* applyProjection(Value *src, Value *proj);
160 unsigned int getNIRArgCount(TexInstruction::Target&);
161
162 // image stuff
163 uint16_t handleDeref(nir_deref_instr *, Value * & indirect, const nir_variable * &);
164 CacheMode getCacheModeFromVar(const nir_variable *);
165
166 nir_shader *nir;
167
168 NirDefMap ssaDefs;
169 NirDefMap regDefs;
170 ImmediateMap immediates;
171 NirArrayLMemOffsets regToLmemOffset;
172 NirBlockMap blocks;
173 unsigned int curLoopDepth;
174
175 BasicBlock *exit;
176 Value *zero;
177 Instruction *immInsertPos;
178
179 int clipVertexOutput;
180
181 union {
182 struct {
183 Value *position;
184 } fp;
185 };
186 };
187
188 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
189 : ConverterCommon(prog, info),
190 nir(nir),
191 curLoopDepth(0),
192 clipVertexOutput(-1)
193 {
194 zero = mkImm((uint32_t)0);
195 }
196
197 BasicBlock *
198 Converter::convert(nir_block *block)
199 {
200 NirBlockMap::iterator it = blocks.find(block->index);
201 if (it != blocks.end())
202 return it->second;
203
204 BasicBlock *bb = new BasicBlock(func);
205 blocks[block->index] = bb;
206 return bb;
207 }
208
209 bool
210 Converter::isFloatType(nir_alu_type type)
211 {
212 return nir_alu_type_get_base_type(type) == nir_type_float;
213 }
214
215 bool
216 Converter::isSignedType(nir_alu_type type)
217 {
218 return nir_alu_type_get_base_type(type) == nir_type_int;
219 }
220
221 bool
222 Converter::isResultFloat(nir_op op)
223 {
224 const nir_op_info &info = nir_op_infos[op];
225 if (info.output_type != nir_type_invalid)
226 return isFloatType(info.output_type);
227
228 ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
229 assert(false);
230 return true;
231 }
232
233 bool
234 Converter::isResultSigned(nir_op op)
235 {
236 switch (op) {
237 // there is no umul and we get wrong results if we treat all muls as signed
238 case nir_op_imul:
239 case nir_op_inot:
240 return false;
241 default:
242 const nir_op_info &info = nir_op_infos[op];
243 if (info.output_type != nir_type_invalid)
244 return isSignedType(info.output_type);
245 ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
246 assert(false);
247 return true;
248 }
249 }
250
251 DataType
252 Converter::getDType(nir_alu_instr *insn)
253 {
254 if (insn->dest.dest.is_ssa)
255 return getDType(insn->op, insn->dest.dest.ssa.bit_size);
256 else
257 return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
258 }
259
260 DataType
261 Converter::getDType(nir_intrinsic_instr *insn)
262 {
263 bool isSigned;
264 switch (insn->intrinsic) {
265 case nir_intrinsic_shared_atomic_imax:
266 case nir_intrinsic_shared_atomic_imin:
267 case nir_intrinsic_ssbo_atomic_imax:
268 case nir_intrinsic_ssbo_atomic_imin:
269 isSigned = true;
270 break;
271 default:
272 isSigned = false;
273 break;
274 }
275
276 return getDType(insn, isSigned);
277 }
278
279 DataType
280 Converter::getDType(nir_intrinsic_instr *insn, bool isSigned)
281 {
282 if (insn->dest.is_ssa)
283 return typeOfSize(insn->dest.ssa.bit_size / 8, false, isSigned);
284 else
285 return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, isSigned);
286 }
287
288 DataType
289 Converter::getDType(nir_op op, uint8_t bitSize)
290 {
291 DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
292 if (ty == TYPE_NONE) {
293 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
294 assert(false);
295 }
296 return ty;
297 }
298
299 std::vector<DataType>
300 Converter::getSTypes(nir_alu_instr *insn)
301 {
302 const nir_op_info &info = nir_op_infos[insn->op];
303 std::vector<DataType> res(info.num_inputs);
304
305 for (uint8_t i = 0; i < info.num_inputs; ++i) {
306 if (info.input_types[i] != nir_type_invalid) {
307 res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
308 } else {
309 ERROR("getSType not implemented for %s idx %u\n", info.name, i);
310 assert(false);
311 res[i] = TYPE_NONE;
312 break;
313 }
314 }
315
316 return res;
317 }
318
319 DataType
320 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
321 {
322 uint8_t bitSize;
323 if (src.is_ssa)
324 bitSize = src.ssa->bit_size;
325 else
326 bitSize = src.reg.reg->bit_size;
327
328 DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
329 if (ty == TYPE_NONE) {
330 const char *str;
331 if (isFloat)
332 str = "float";
333 else if (isSigned)
334 str = "int";
335 else
336 str = "uint";
337 ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
338 assert(false);
339 }
340 return ty;
341 }
342
343 operation
344 Converter::getOperation(nir_op op)
345 {
346 switch (op) {
347 // basic ops with float and int variants
348 case nir_op_fabs:
349 case nir_op_iabs:
350 return OP_ABS;
351 case nir_op_fadd:
352 case nir_op_iadd:
353 return OP_ADD;
354 case nir_op_iand:
355 return OP_AND;
356 case nir_op_ifind_msb:
357 case nir_op_ufind_msb:
358 return OP_BFIND;
359 case nir_op_fceil:
360 return OP_CEIL;
361 case nir_op_fcos:
362 return OP_COS;
363 case nir_op_f2f32:
364 case nir_op_f2f64:
365 case nir_op_f2i32:
366 case nir_op_f2i64:
367 case nir_op_f2u32:
368 case nir_op_f2u64:
369 case nir_op_i2f32:
370 case nir_op_i2f64:
371 case nir_op_i2i32:
372 case nir_op_i2i64:
373 case nir_op_u2f32:
374 case nir_op_u2f64:
375 case nir_op_u2u32:
376 case nir_op_u2u64:
377 return OP_CVT;
378 case nir_op_fddx:
379 case nir_op_fddx_coarse:
380 case nir_op_fddx_fine:
381 return OP_DFDX;
382 case nir_op_fddy:
383 case nir_op_fddy_coarse:
384 case nir_op_fddy_fine:
385 return OP_DFDY;
386 case nir_op_fdiv:
387 case nir_op_idiv:
388 case nir_op_udiv:
389 return OP_DIV;
390 case nir_op_fexp2:
391 return OP_EX2;
392 case nir_op_ffloor:
393 return OP_FLOOR;
394 case nir_op_ffma:
395 return OP_FMA;
396 case nir_op_flog2:
397 return OP_LG2;
398 case nir_op_fmax:
399 case nir_op_imax:
400 case nir_op_umax:
401 return OP_MAX;
402 case nir_op_pack_64_2x32_split:
403 return OP_MERGE;
404 case nir_op_fmin:
405 case nir_op_imin:
406 case nir_op_umin:
407 return OP_MIN;
408 case nir_op_fmod:
409 case nir_op_imod:
410 case nir_op_umod:
411 case nir_op_frem:
412 case nir_op_irem:
413 return OP_MOD;
414 case nir_op_fmul:
415 case nir_op_imul:
416 case nir_op_imul_high:
417 case nir_op_umul_high:
418 return OP_MUL;
419 case nir_op_fneg:
420 case nir_op_ineg:
421 return OP_NEG;
422 case nir_op_inot:
423 return OP_NOT;
424 case nir_op_ior:
425 return OP_OR;
426 case nir_op_fpow:
427 return OP_POW;
428 case nir_op_frcp:
429 return OP_RCP;
430 case nir_op_frsq:
431 return OP_RSQ;
432 case nir_op_fsat:
433 return OP_SAT;
434 case nir_op_feq32:
435 case nir_op_ieq32:
436 case nir_op_fge32:
437 case nir_op_ige32:
438 case nir_op_uge32:
439 case nir_op_flt32:
440 case nir_op_ilt32:
441 case nir_op_ult32:
442 case nir_op_fne32:
443 case nir_op_ine32:
444 return OP_SET;
445 case nir_op_ishl:
446 return OP_SHL;
447 case nir_op_ishr:
448 case nir_op_ushr:
449 return OP_SHR;
450 case nir_op_fsin:
451 return OP_SIN;
452 case nir_op_fsqrt:
453 return OP_SQRT;
454 case nir_op_ftrunc:
455 return OP_TRUNC;
456 case nir_op_ixor:
457 return OP_XOR;
458 default:
459 ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
460 assert(false);
461 return OP_NOP;
462 }
463 }
464
465 operation
466 Converter::getOperation(nir_texop op)
467 {
468 switch (op) {
469 case nir_texop_tex:
470 return OP_TEX;
471 case nir_texop_lod:
472 return OP_TXLQ;
473 case nir_texop_txb:
474 return OP_TXB;
475 case nir_texop_txd:
476 return OP_TXD;
477 case nir_texop_txf:
478 case nir_texop_txf_ms:
479 return OP_TXF;
480 case nir_texop_tg4:
481 return OP_TXG;
482 case nir_texop_txl:
483 return OP_TXL;
484 case nir_texop_query_levels:
485 case nir_texop_texture_samples:
486 case nir_texop_txs:
487 return OP_TXQ;
488 default:
489 ERROR("couldn't get operation for nir_texop %u\n", op);
490 assert(false);
491 return OP_NOP;
492 }
493 }
494
495 operation
496 Converter::getOperation(nir_intrinsic_op op)
497 {
498 switch (op) {
499 case nir_intrinsic_emit_vertex:
500 return OP_EMIT;
501 case nir_intrinsic_end_primitive:
502 return OP_RESTART;
503 case nir_intrinsic_bindless_image_atomic_add:
504 case nir_intrinsic_image_atomic_add:
505 case nir_intrinsic_image_deref_atomic_add:
506 case nir_intrinsic_bindless_image_atomic_and:
507 case nir_intrinsic_image_atomic_and:
508 case nir_intrinsic_image_deref_atomic_and:
509 case nir_intrinsic_bindless_image_atomic_comp_swap:
510 case nir_intrinsic_image_atomic_comp_swap:
511 case nir_intrinsic_image_deref_atomic_comp_swap:
512 case nir_intrinsic_bindless_image_atomic_exchange:
513 case nir_intrinsic_image_atomic_exchange:
514 case nir_intrinsic_image_deref_atomic_exchange:
515 case nir_intrinsic_bindless_image_atomic_imax:
516 case nir_intrinsic_image_atomic_imax:
517 case nir_intrinsic_image_deref_atomic_imax:
518 case nir_intrinsic_bindless_image_atomic_umax:
519 case nir_intrinsic_image_atomic_umax:
520 case nir_intrinsic_image_deref_atomic_umax:
521 case nir_intrinsic_bindless_image_atomic_imin:
522 case nir_intrinsic_image_atomic_imin:
523 case nir_intrinsic_image_deref_atomic_imin:
524 case nir_intrinsic_bindless_image_atomic_umin:
525 case nir_intrinsic_image_atomic_umin:
526 case nir_intrinsic_image_deref_atomic_umin:
527 case nir_intrinsic_bindless_image_atomic_or:
528 case nir_intrinsic_image_atomic_or:
529 case nir_intrinsic_image_deref_atomic_or:
530 case nir_intrinsic_bindless_image_atomic_xor:
531 case nir_intrinsic_image_atomic_xor:
532 case nir_intrinsic_image_deref_atomic_xor:
533 return OP_SUREDP;
534 case nir_intrinsic_bindless_image_load:
535 case nir_intrinsic_image_load:
536 case nir_intrinsic_image_deref_load:
537 return OP_SULDP;
538 case nir_intrinsic_bindless_image_samples:
539 case nir_intrinsic_image_samples:
540 case nir_intrinsic_image_deref_samples:
541 case nir_intrinsic_bindless_image_size:
542 case nir_intrinsic_image_size:
543 case nir_intrinsic_image_deref_size:
544 return OP_SUQ;
545 case nir_intrinsic_bindless_image_store:
546 case nir_intrinsic_image_store:
547 case nir_intrinsic_image_deref_store:
548 return OP_SUSTP;
549 default:
550 ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
551 assert(false);
552 return OP_NOP;
553 }
554 }
555
556 operation
557 Converter::preOperationNeeded(nir_op op)
558 {
559 switch (op) {
560 case nir_op_fcos:
561 case nir_op_fsin:
562 return OP_PRESIN;
563 default:
564 return OP_NOP;
565 }
566 }
567
568 int
569 Converter::getSubOp(nir_op op)
570 {
571 switch (op) {
572 case nir_op_imul_high:
573 case nir_op_umul_high:
574 return NV50_IR_SUBOP_MUL_HIGH;
575 default:
576 return 0;
577 }
578 }
579
580 int
581 Converter::getSubOp(nir_intrinsic_op op)
582 {
583 switch (op) {
584 case nir_intrinsic_bindless_image_atomic_add:
585 case nir_intrinsic_global_atomic_add:
586 case nir_intrinsic_image_atomic_add:
587 case nir_intrinsic_image_deref_atomic_add:
588 case nir_intrinsic_shared_atomic_add:
589 case nir_intrinsic_ssbo_atomic_add:
590 return NV50_IR_SUBOP_ATOM_ADD;
591 case nir_intrinsic_bindless_image_atomic_and:
592 case nir_intrinsic_global_atomic_and:
593 case nir_intrinsic_image_atomic_and:
594 case nir_intrinsic_image_deref_atomic_and:
595 case nir_intrinsic_shared_atomic_and:
596 case nir_intrinsic_ssbo_atomic_and:
597 return NV50_IR_SUBOP_ATOM_AND;
598 case nir_intrinsic_bindless_image_atomic_comp_swap:
599 case nir_intrinsic_global_atomic_comp_swap:
600 case nir_intrinsic_image_atomic_comp_swap:
601 case nir_intrinsic_image_deref_atomic_comp_swap:
602 case nir_intrinsic_shared_atomic_comp_swap:
603 case nir_intrinsic_ssbo_atomic_comp_swap:
604 return NV50_IR_SUBOP_ATOM_CAS;
605 case nir_intrinsic_bindless_image_atomic_exchange:
606 case nir_intrinsic_global_atomic_exchange:
607 case nir_intrinsic_image_atomic_exchange:
608 case nir_intrinsic_image_deref_atomic_exchange:
609 case nir_intrinsic_shared_atomic_exchange:
610 case nir_intrinsic_ssbo_atomic_exchange:
611 return NV50_IR_SUBOP_ATOM_EXCH;
612 case nir_intrinsic_bindless_image_atomic_or:
613 case nir_intrinsic_global_atomic_or:
614 case nir_intrinsic_image_atomic_or:
615 case nir_intrinsic_image_deref_atomic_or:
616 case nir_intrinsic_shared_atomic_or:
617 case nir_intrinsic_ssbo_atomic_or:
618 return NV50_IR_SUBOP_ATOM_OR;
619 case nir_intrinsic_bindless_image_atomic_imax:
620 case nir_intrinsic_bindless_image_atomic_umax:
621 case nir_intrinsic_global_atomic_imax:
622 case nir_intrinsic_global_atomic_umax:
623 case nir_intrinsic_image_atomic_imax:
624 case nir_intrinsic_image_atomic_umax:
625 case nir_intrinsic_image_deref_atomic_imax:
626 case nir_intrinsic_image_deref_atomic_umax:
627 case nir_intrinsic_shared_atomic_imax:
628 case nir_intrinsic_shared_atomic_umax:
629 case nir_intrinsic_ssbo_atomic_imax:
630 case nir_intrinsic_ssbo_atomic_umax:
631 return NV50_IR_SUBOP_ATOM_MAX;
632 case nir_intrinsic_bindless_image_atomic_imin:
633 case nir_intrinsic_bindless_image_atomic_umin:
634 case nir_intrinsic_global_atomic_imin:
635 case nir_intrinsic_global_atomic_umin:
636 case nir_intrinsic_image_atomic_imin:
637 case nir_intrinsic_image_atomic_umin:
638 case nir_intrinsic_image_deref_atomic_imin:
639 case nir_intrinsic_image_deref_atomic_umin:
640 case nir_intrinsic_shared_atomic_imin:
641 case nir_intrinsic_shared_atomic_umin:
642 case nir_intrinsic_ssbo_atomic_imin:
643 case nir_intrinsic_ssbo_atomic_umin:
644 return NV50_IR_SUBOP_ATOM_MIN;
645 case nir_intrinsic_bindless_image_atomic_xor:
646 case nir_intrinsic_global_atomic_xor:
647 case nir_intrinsic_image_atomic_xor:
648 case nir_intrinsic_image_deref_atomic_xor:
649 case nir_intrinsic_shared_atomic_xor:
650 case nir_intrinsic_ssbo_atomic_xor:
651 return NV50_IR_SUBOP_ATOM_XOR;
652
653 case nir_intrinsic_group_memory_barrier:
654 case nir_intrinsic_memory_barrier:
655 case nir_intrinsic_memory_barrier_atomic_counter:
656 case nir_intrinsic_memory_barrier_buffer:
657 case nir_intrinsic_memory_barrier_image:
658 return NV50_IR_SUBOP_MEMBAR(M, GL);
659 case nir_intrinsic_memory_barrier_shared:
660 return NV50_IR_SUBOP_MEMBAR(M, CTA);
661
662 case nir_intrinsic_vote_all:
663 return NV50_IR_SUBOP_VOTE_ALL;
664 case nir_intrinsic_vote_any:
665 return NV50_IR_SUBOP_VOTE_ANY;
666 case nir_intrinsic_vote_ieq:
667 return NV50_IR_SUBOP_VOTE_UNI;
668 default:
669 return 0;
670 }
671 }
672
673 CondCode
674 Converter::getCondCode(nir_op op)
675 {
676 switch (op) {
677 case nir_op_feq32:
678 case nir_op_ieq32:
679 return CC_EQ;
680 case nir_op_fge32:
681 case nir_op_ige32:
682 case nir_op_uge32:
683 return CC_GE;
684 case nir_op_flt32:
685 case nir_op_ilt32:
686 case nir_op_ult32:
687 return CC_LT;
688 case nir_op_fne32:
689 return CC_NEU;
690 case nir_op_ine32:
691 return CC_NE;
692 default:
693 ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
694 assert(false);
695 return CC_FL;
696 }
697 }
698
699 Converter::LValues&
700 Converter::convert(nir_alu_dest *dest)
701 {
702 return convert(&dest->dest);
703 }
704
705 Converter::LValues&
706 Converter::convert(nir_dest *dest)
707 {
708 if (dest->is_ssa)
709 return convert(&dest->ssa);
710 if (dest->reg.indirect) {
711 ERROR("no support for indirects.");
712 assert(false);
713 }
714 return convert(dest->reg.reg);
715 }
716
717 Converter::LValues&
718 Converter::convert(nir_register *reg)
719 {
720 NirDefMap::iterator it = regDefs.find(reg->index);
721 if (it != regDefs.end())
722 return it->second;
723
724 LValues newDef(reg->num_components);
725 for (uint8_t i = 0; i < reg->num_components; i++)
726 newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
727 return regDefs[reg->index] = newDef;
728 }
729
730 Converter::LValues&
731 Converter::convert(nir_ssa_def *def)
732 {
733 NirDefMap::iterator it = ssaDefs.find(def->index);
734 if (it != ssaDefs.end())
735 return it->second;
736
737 LValues newDef(def->num_components);
738 for (uint8_t i = 0; i < def->num_components; i++)
739 newDef[i] = getSSA(std::max(4, def->bit_size / 8));
740 return ssaDefs[def->index] = newDef;
741 }
742
743 Value*
744 Converter::getSrc(nir_alu_src *src, uint8_t component)
745 {
746 if (src->abs || src->negate) {
747 ERROR("modifiers currently not supported on nir_alu_src\n");
748 assert(false);
749 }
750 return getSrc(&src->src, src->swizzle[component]);
751 }
752
753 Value*
754 Converter::getSrc(nir_register *reg, uint8_t idx)
755 {
756 NirDefMap::iterator it = regDefs.find(reg->index);
757 if (it == regDefs.end())
758 return convert(reg)[idx];
759 return it->second[idx];
760 }
761
762 Value*
763 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
764 {
765 if (src->is_ssa)
766 return getSrc(src->ssa, idx);
767
768 if (src->reg.indirect) {
769 if (indirect)
770 return getSrc(src->reg.indirect, idx);
771 ERROR("no support for indirects.");
772 assert(false);
773 return NULL;
774 }
775
776 return getSrc(src->reg.reg, idx);
777 }
778
779 Value*
780 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
781 {
782 ImmediateMap::iterator iit = immediates.find(src->index);
783 if (iit != immediates.end())
784 return convert((*iit).second, idx);
785
786 NirDefMap::iterator it = ssaDefs.find(src->index);
787 if (it == ssaDefs.end()) {
788 ERROR("SSA value %u not found\n", src->index);
789 assert(false);
790 return NULL;
791 }
792 return it->second[idx];
793 }
794
795 uint32_t
796 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
797 {
798 nir_const_value *offset = nir_src_as_const_value(*src);
799
800 if (offset) {
801 indirect = NULL;
802 return offset[0].u32;
803 }
804
805 indirect = getSrc(src, idx, true);
806 return 0;
807 }
808
809 uint32_t
810 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect, bool isScalar)
811 {
812 int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
813 if (indirect && !isScalar)
814 indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
815 return idx;
816 }
817
818 static void
819 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
820 {
821 assert(name && index);
822
823 if (slot >= VERT_ATTRIB_MAX) {
824 ERROR("invalid varying slot %u\n", slot);
825 assert(false);
826 return;
827 }
828
829 if (slot >= VERT_ATTRIB_GENERIC0 &&
830 slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
831 *name = TGSI_SEMANTIC_GENERIC;
832 *index = slot - VERT_ATTRIB_GENERIC0;
833 return;
834 }
835
836 if (slot >= VERT_ATTRIB_TEX0 &&
837 slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
838 *name = TGSI_SEMANTIC_TEXCOORD;
839 *index = slot - VERT_ATTRIB_TEX0;
840 return;
841 }
842
843 switch (slot) {
844 case VERT_ATTRIB_COLOR0:
845 *name = TGSI_SEMANTIC_COLOR;
846 *index = 0;
847 break;
848 case VERT_ATTRIB_COLOR1:
849 *name = TGSI_SEMANTIC_COLOR;
850 *index = 1;
851 break;
852 case VERT_ATTRIB_EDGEFLAG:
853 *name = TGSI_SEMANTIC_EDGEFLAG;
854 *index = 0;
855 break;
856 case VERT_ATTRIB_FOG:
857 *name = TGSI_SEMANTIC_FOG;
858 *index = 0;
859 break;
860 case VERT_ATTRIB_NORMAL:
861 *name = TGSI_SEMANTIC_NORMAL;
862 *index = 0;
863 break;
864 case VERT_ATTRIB_POS:
865 *name = TGSI_SEMANTIC_POSITION;
866 *index = 0;
867 break;
868 case VERT_ATTRIB_POINT_SIZE:
869 *name = TGSI_SEMANTIC_PSIZE;
870 *index = 0;
871 break;
872 default:
873 ERROR("unknown vert attrib slot %u\n", slot);
874 assert(false);
875 break;
876 }
877 }
878
879 static void
880 varying_slot_to_tgsi_semantic(gl_varying_slot slot, unsigned *name, unsigned *index)
881 {
882 assert(name && index);
883
884 if (slot >= VARYING_SLOT_TESS_MAX) {
885 ERROR("invalid varying slot %u\n", slot);
886 assert(false);
887 return;
888 }
889
890 if (slot >= VARYING_SLOT_PATCH0) {
891 *name = TGSI_SEMANTIC_PATCH;
892 *index = slot - VARYING_SLOT_PATCH0;
893 return;
894 }
895
896 if (slot >= VARYING_SLOT_VAR0) {
897 *name = TGSI_SEMANTIC_GENERIC;
898 *index = slot - VARYING_SLOT_VAR0;
899 return;
900 }
901
902 if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
903 *name = TGSI_SEMANTIC_TEXCOORD;
904 *index = slot - VARYING_SLOT_TEX0;
905 return;
906 }
907
908 switch (slot) {
909 case VARYING_SLOT_BFC0:
910 *name = TGSI_SEMANTIC_BCOLOR;
911 *index = 0;
912 break;
913 case VARYING_SLOT_BFC1:
914 *name = TGSI_SEMANTIC_BCOLOR;
915 *index = 1;
916 break;
917 case VARYING_SLOT_CLIP_DIST0:
918 *name = TGSI_SEMANTIC_CLIPDIST;
919 *index = 0;
920 break;
921 case VARYING_SLOT_CLIP_DIST1:
922 *name = TGSI_SEMANTIC_CLIPDIST;
923 *index = 1;
924 break;
925 case VARYING_SLOT_CLIP_VERTEX:
926 *name = TGSI_SEMANTIC_CLIPVERTEX;
927 *index = 0;
928 break;
929 case VARYING_SLOT_COL0:
930 *name = TGSI_SEMANTIC_COLOR;
931 *index = 0;
932 break;
933 case VARYING_SLOT_COL1:
934 *name = TGSI_SEMANTIC_COLOR;
935 *index = 1;
936 break;
937 case VARYING_SLOT_EDGE:
938 *name = TGSI_SEMANTIC_EDGEFLAG;
939 *index = 0;
940 break;
941 case VARYING_SLOT_FACE:
942 *name = TGSI_SEMANTIC_FACE;
943 *index = 0;
944 break;
945 case VARYING_SLOT_FOGC:
946 *name = TGSI_SEMANTIC_FOG;
947 *index = 0;
948 break;
949 case VARYING_SLOT_LAYER:
950 *name = TGSI_SEMANTIC_LAYER;
951 *index = 0;
952 break;
953 case VARYING_SLOT_PNTC:
954 *name = TGSI_SEMANTIC_PCOORD;
955 *index = 0;
956 break;
957 case VARYING_SLOT_POS:
958 *name = TGSI_SEMANTIC_POSITION;
959 *index = 0;
960 break;
961 case VARYING_SLOT_PRIMITIVE_ID:
962 *name = TGSI_SEMANTIC_PRIMID;
963 *index = 0;
964 break;
965 case VARYING_SLOT_PSIZ:
966 *name = TGSI_SEMANTIC_PSIZE;
967 *index = 0;
968 break;
969 case VARYING_SLOT_TESS_LEVEL_INNER:
970 *name = TGSI_SEMANTIC_TESSINNER;
971 *index = 0;
972 break;
973 case VARYING_SLOT_TESS_LEVEL_OUTER:
974 *name = TGSI_SEMANTIC_TESSOUTER;
975 *index = 0;
976 break;
977 case VARYING_SLOT_VIEWPORT:
978 *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
979 *index = 0;
980 break;
981 default:
982 ERROR("unknown varying slot %u\n", slot);
983 assert(false);
984 break;
985 }
986 }
987
988 static void
989 frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
990 {
991 if (slot >= FRAG_RESULT_DATA0) {
992 *name = TGSI_SEMANTIC_COLOR;
993 *index = slot - FRAG_RESULT_COLOR - 2; // intentional
994 return;
995 }
996
997 switch (slot) {
998 case FRAG_RESULT_COLOR:
999 *name = TGSI_SEMANTIC_COLOR;
1000 *index = 0;
1001 break;
1002 case FRAG_RESULT_DEPTH:
1003 *name = TGSI_SEMANTIC_POSITION;
1004 *index = 0;
1005 break;
1006 case FRAG_RESULT_SAMPLE_MASK:
1007 *name = TGSI_SEMANTIC_SAMPLEMASK;
1008 *index = 0;
1009 break;
1010 default:
1011 ERROR("unknown frag result slot %u\n", slot);
1012 assert(false);
1013 break;
1014 }
1015 }
1016
1017 // copy of _mesa_sysval_to_semantic
1018 static void
1019 system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
1020 {
1021 *index = 0;
1022 switch (val) {
1023 // Vertex shader
1024 case SYSTEM_VALUE_VERTEX_ID:
1025 *name = TGSI_SEMANTIC_VERTEXID;
1026 break;
1027 case SYSTEM_VALUE_INSTANCE_ID:
1028 *name = TGSI_SEMANTIC_INSTANCEID;
1029 break;
1030 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
1031 *name = TGSI_SEMANTIC_VERTEXID_NOBASE;
1032 break;
1033 case SYSTEM_VALUE_BASE_VERTEX:
1034 *name = TGSI_SEMANTIC_BASEVERTEX;
1035 break;
1036 case SYSTEM_VALUE_BASE_INSTANCE:
1037 *name = TGSI_SEMANTIC_BASEINSTANCE;
1038 break;
1039 case SYSTEM_VALUE_DRAW_ID:
1040 *name = TGSI_SEMANTIC_DRAWID;
1041 break;
1042
1043 // Geometry shader
1044 case SYSTEM_VALUE_INVOCATION_ID:
1045 *name = TGSI_SEMANTIC_INVOCATIONID;
1046 break;
1047
1048 // Fragment shader
1049 case SYSTEM_VALUE_FRAG_COORD:
1050 *name = TGSI_SEMANTIC_POSITION;
1051 break;
1052 case SYSTEM_VALUE_FRONT_FACE:
1053 *name = TGSI_SEMANTIC_FACE;
1054 break;
1055 case SYSTEM_VALUE_SAMPLE_ID:
1056 *name = TGSI_SEMANTIC_SAMPLEID;
1057 break;
1058 case SYSTEM_VALUE_SAMPLE_POS:
1059 *name = TGSI_SEMANTIC_SAMPLEPOS;
1060 break;
1061 case SYSTEM_VALUE_SAMPLE_MASK_IN:
1062 *name = TGSI_SEMANTIC_SAMPLEMASK;
1063 break;
1064 case SYSTEM_VALUE_HELPER_INVOCATION:
1065 *name = TGSI_SEMANTIC_HELPER_INVOCATION;
1066 break;
1067
1068 // Tessellation shader
1069 case SYSTEM_VALUE_TESS_COORD:
1070 *name = TGSI_SEMANTIC_TESSCOORD;
1071 break;
1072 case SYSTEM_VALUE_VERTICES_IN:
1073 *name = TGSI_SEMANTIC_VERTICESIN;
1074 break;
1075 case SYSTEM_VALUE_PRIMITIVE_ID:
1076 *name = TGSI_SEMANTIC_PRIMID;
1077 break;
1078 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1079 *name = TGSI_SEMANTIC_TESSOUTER;
1080 break;
1081 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1082 *name = TGSI_SEMANTIC_TESSINNER;
1083 break;
1084
1085 // Compute shader
1086 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
1087 *name = TGSI_SEMANTIC_THREAD_ID;
1088 break;
1089 case SYSTEM_VALUE_WORK_GROUP_ID:
1090 *name = TGSI_SEMANTIC_BLOCK_ID;
1091 break;
1092 case SYSTEM_VALUE_NUM_WORK_GROUPS:
1093 *name = TGSI_SEMANTIC_GRID_SIZE;
1094 break;
1095 case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
1096 *name = TGSI_SEMANTIC_BLOCK_SIZE;
1097 break;
1098
1099 // ARB_shader_ballot
1100 case SYSTEM_VALUE_SUBGROUP_SIZE:
1101 *name = TGSI_SEMANTIC_SUBGROUP_SIZE;
1102 break;
1103 case SYSTEM_VALUE_SUBGROUP_INVOCATION:
1104 *name = TGSI_SEMANTIC_SUBGROUP_INVOCATION;
1105 break;
1106 case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
1107 *name = TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
1108 break;
1109 case SYSTEM_VALUE_SUBGROUP_GE_MASK:
1110 *name = TGSI_SEMANTIC_SUBGROUP_GE_MASK;
1111 break;
1112 case SYSTEM_VALUE_SUBGROUP_GT_MASK:
1113 *name = TGSI_SEMANTIC_SUBGROUP_GT_MASK;
1114 break;
1115 case SYSTEM_VALUE_SUBGROUP_LE_MASK:
1116 *name = TGSI_SEMANTIC_SUBGROUP_LE_MASK;
1117 break;
1118 case SYSTEM_VALUE_SUBGROUP_LT_MASK:
1119 *name = TGSI_SEMANTIC_SUBGROUP_LT_MASK;
1120 break;
1121
1122 default:
1123 ERROR("unknown system value %u\n", val);
1124 assert(false);
1125 break;
1126 }
1127 }
1128
1129 void
1130 Converter::setInterpolate(nv50_ir_varying *var,
1131 uint8_t mode,
1132 bool centroid,
1133 unsigned semantic)
1134 {
1135 switch (mode) {
1136 case INTERP_MODE_FLAT:
1137 var->flat = 1;
1138 break;
1139 case INTERP_MODE_NONE:
1140 if (semantic == TGSI_SEMANTIC_COLOR)
1141 var->sc = 1;
1142 else if (semantic == TGSI_SEMANTIC_POSITION)
1143 var->linear = 1;
1144 break;
1145 case INTERP_MODE_NOPERSPECTIVE:
1146 var->linear = 1;
1147 break;
1148 case INTERP_MODE_SMOOTH:
1149 break;
1150 }
1151 var->centroid = centroid;
1152 }
1153
1154 static uint16_t
1155 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
1156 bool input, const nir_variable *var)
1157 {
1158 if (!type->is_array())
1159 return type->count_attribute_slots(false);
1160
1161 uint16_t slots;
1162 switch (stage) {
1163 case Program::TYPE_GEOMETRY:
1164 slots = type->uniform_locations();
1165 if (input)
1166 slots /= info.gs.vertices_in;
1167 break;
1168 case Program::TYPE_TESSELLATION_CONTROL:
1169 case Program::TYPE_TESSELLATION_EVAL:
1170 // remove first dimension
1171 if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
1172 slots = type->uniform_locations();
1173 else
1174 slots = type->fields.array->uniform_locations();
1175 break;
1176 default:
1177 slots = type->count_attribute_slots(false);
1178 break;
1179 }
1180
1181 return slots;
1182 }
1183
1184 bool Converter::assignSlots() {
1185 unsigned name;
1186 unsigned index;
1187
1188 info->io.viewportId = -1;
1189 info->numInputs = 0;
1190 info->numOutputs = 0;
1191
1192 // we have to fixup the uniform locations for arrays
1193 unsigned numImages = 0;
1194 nir_foreach_variable(var, &nir->uniforms) {
1195 const glsl_type *type = var->type;
1196 if (!type->without_array()->is_image())
1197 continue;
1198 var->data.driver_location = numImages;
1199 numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
1200 }
1201
1202 info->numSysVals = 0;
1203 for (uint8_t i = 0; i < SYSTEM_VALUE_MAX; ++i) {
1204 if (!(nir->info.system_values_read & 1ull << i))
1205 continue;
1206
1207 system_val_to_tgsi_semantic(i, &name, &index);
1208 info->sv[info->numSysVals].sn = name;
1209 info->sv[info->numSysVals].si = index;
1210 info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
1211
1212 switch (i) {
1213 case SYSTEM_VALUE_INSTANCE_ID:
1214 info->io.instanceId = info->numSysVals;
1215 break;
1216 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1217 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1218 info->sv[info->numSysVals].patch = 1;
1219 break;
1220 case SYSTEM_VALUE_VERTEX_ID:
1221 info->io.vertexId = info->numSysVals;
1222 break;
1223 default:
1224 break;
1225 }
1226
1227 info->numSysVals += 1;
1228 }
1229
1230 if (prog->getType() == Program::TYPE_COMPUTE)
1231 return true;
1232
1233 nir_foreach_variable(var, &nir->inputs) {
1234 const glsl_type *type = var->type;
1235 int slot = var->data.location;
1236 uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
1237 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1238 : type->component_slots();
1239 uint32_t frac = var->data.location_frac;
1240 uint32_t vary = var->data.driver_location;
1241
1242 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1243 if (comp > 2)
1244 slots *= 2;
1245 }
1246
1247 assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
1248
1249 switch(prog->getType()) {
1250 case Program::TYPE_FRAGMENT:
1251 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1252 for (uint16_t i = 0; i < slots; ++i) {
1253 setInterpolate(&info->in[vary + i], var->data.interpolation,
1254 var->data.centroid | var->data.sample, name);
1255 }
1256 break;
1257 case Program::TYPE_GEOMETRY:
1258 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1259 break;
1260 case Program::TYPE_TESSELLATION_CONTROL:
1261 case Program::TYPE_TESSELLATION_EVAL:
1262 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1263 if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
1264 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1265 break;
1266 case Program::TYPE_VERTEX:
1267 vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1268 switch (name) {
1269 case TGSI_SEMANTIC_EDGEFLAG:
1270 info->io.edgeFlagIn = vary;
1271 break;
1272 default:
1273 break;
1274 }
1275 break;
1276 default:
1277 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1278 return false;
1279 }
1280
1281 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1282 info->in[vary].id = vary;
1283 info->in[vary].patch = var->data.patch;
1284 info->in[vary].sn = name;
1285 info->in[vary].si = index + i;
1286 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1287 if (i & 0x1)
1288 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1289 else
1290 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1291 else
1292 info->in[vary].mask |= ((1 << comp) - 1) << frac;
1293 }
1294 info->numInputs = std::max<uint8_t>(info->numInputs, vary);
1295 }
1296
1297 nir_foreach_variable(var, &nir->outputs) {
1298 const glsl_type *type = var->type;
1299 int slot = var->data.location;
1300 uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1301 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1302 : type->component_slots();
1303 uint32_t frac = var->data.location_frac;
1304 uint32_t vary = var->data.driver_location;
1305
1306 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1307 if (comp > 2)
1308 slots *= 2;
1309 }
1310
1311 assert(vary < PIPE_MAX_SHADER_OUTPUTS);
1312
1313 switch(prog->getType()) {
1314 case Program::TYPE_FRAGMENT:
1315 frag_result_to_tgsi_semantic((gl_frag_result)slot, &name, &index);
1316 switch (name) {
1317 case TGSI_SEMANTIC_COLOR:
1318 if (!var->data.fb_fetch_output)
1319 info->prop.fp.numColourResults++;
1320 info->prop.fp.separateFragData = true;
1321 // sometimes we get FRAG_RESULT_DATAX with data.index 0
1322 // sometimes we get FRAG_RESULT_DATA0 with data.index X
1323 index = index == 0 ? var->data.index : index;
1324 break;
1325 case TGSI_SEMANTIC_POSITION:
1326 info->io.fragDepth = vary;
1327 info->prop.fp.writesDepth = true;
1328 break;
1329 case TGSI_SEMANTIC_SAMPLEMASK:
1330 info->io.sampleMask = vary;
1331 break;
1332 default:
1333 break;
1334 }
1335 break;
1336 case Program::TYPE_GEOMETRY:
1337 case Program::TYPE_TESSELLATION_CONTROL:
1338 case Program::TYPE_TESSELLATION_EVAL:
1339 case Program::TYPE_VERTEX:
1340 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1341
1342 if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1343 name != TGSI_SEMANTIC_TESSOUTER)
1344 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1345
1346 switch (name) {
1347 case TGSI_SEMANTIC_CLIPDIST:
1348 info->io.genUserClip = -1;
1349 break;
1350 case TGSI_SEMANTIC_CLIPVERTEX:
1351 clipVertexOutput = vary;
1352 break;
1353 case TGSI_SEMANTIC_EDGEFLAG:
1354 info->io.edgeFlagOut = vary;
1355 break;
1356 case TGSI_SEMANTIC_POSITION:
1357 if (clipVertexOutput < 0)
1358 clipVertexOutput = vary;
1359 break;
1360 default:
1361 break;
1362 }
1363 break;
1364 default:
1365 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1366 return false;
1367 }
1368
1369 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1370 info->out[vary].id = vary;
1371 info->out[vary].patch = var->data.patch;
1372 info->out[vary].sn = name;
1373 info->out[vary].si = index + i;
1374 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1375 if (i & 0x1)
1376 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1377 else
1378 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1379 else
1380 info->out[vary].mask |= ((1 << comp) - 1) << frac;
1381
1382 if (nir->info.outputs_read & 1ull << slot)
1383 info->out[vary].oread = 1;
1384 }
1385 info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
1386 }
1387
1388 if (info->io.genUserClip > 0) {
1389 info->io.clipDistances = info->io.genUserClip;
1390
1391 const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1392
1393 for (unsigned int n = 0; n < nOut; ++n) {
1394 unsigned int i = info->numOutputs++;
1395 info->out[i].id = i;
1396 info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1397 info->out[i].si = n;
1398 info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1399 }
1400 }
1401
1402 return info->assignSlots(info) == 0;
1403 }
1404
1405 uint32_t
1406 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1407 {
1408 DataType ty;
1409 int offset = nir_intrinsic_component(insn);
1410 bool input;
1411
1412 if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1413 ty = getDType(insn);
1414 else
1415 ty = getSType(insn->src[0], false, false);
1416
1417 switch (insn->intrinsic) {
1418 case nir_intrinsic_load_input:
1419 case nir_intrinsic_load_interpolated_input:
1420 case nir_intrinsic_load_per_vertex_input:
1421 input = true;
1422 break;
1423 case nir_intrinsic_load_output:
1424 case nir_intrinsic_load_per_vertex_output:
1425 case nir_intrinsic_store_output:
1426 case nir_intrinsic_store_per_vertex_output:
1427 input = false;
1428 break;
1429 default:
1430 ERROR("unknown intrinsic in getSlotAddress %s",
1431 nir_intrinsic_infos[insn->intrinsic].name);
1432 input = false;
1433 assert(false);
1434 break;
1435 }
1436
1437 if (typeSizeof(ty) == 8) {
1438 slot *= 2;
1439 slot += offset;
1440 if (slot >= 4) {
1441 idx += 1;
1442 slot -= 4;
1443 }
1444 } else {
1445 slot += offset;
1446 }
1447
1448 assert(slot < 4);
1449 assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
1450 assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
1451
1452 const nv50_ir_varying *vary = input ? info->in : info->out;
1453 return vary[idx].slot[slot] * 4;
1454 }
1455
1456 Instruction *
1457 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1458 uint32_t base, uint8_t c, Value *indirect0,
1459 Value *indirect1, bool patch)
1460 {
1461 unsigned int tySize = typeSizeof(ty);
1462
1463 if (tySize == 8 &&
1464 (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
1465 Value *lo = getSSA();
1466 Value *hi = getSSA();
1467
1468 Instruction *loi =
1469 mkLoad(TYPE_U32, lo,
1470 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1471 indirect0);
1472 loi->setIndirect(0, 1, indirect1);
1473 loi->perPatch = patch;
1474
1475 Instruction *hii =
1476 mkLoad(TYPE_U32, hi,
1477 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1478 indirect0);
1479 hii->setIndirect(0, 1, indirect1);
1480 hii->perPatch = patch;
1481
1482 return mkOp2(OP_MERGE, ty, def, lo, hi);
1483 } else {
1484 Instruction *ld =
1485 mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1486 ld->setIndirect(0, 1, indirect1);
1487 ld->perPatch = patch;
1488 return ld;
1489 }
1490 }
1491
1492 void
1493 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1494 DataType ty, Value *src, uint8_t idx, uint8_t c,
1495 Value *indirect0, Value *indirect1)
1496 {
1497 uint8_t size = typeSizeof(ty);
1498 uint32_t address = getSlotAddress(insn, idx, c);
1499
1500 if (size == 8 && indirect0) {
1501 Value *split[2];
1502 mkSplit(split, 4, src);
1503
1504 if (op == OP_EXPORT) {
1505 split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1506 split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1507 }
1508
1509 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1510 split[0])->perPatch = info->out[idx].patch;
1511 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1512 split[1])->perPatch = info->out[idx].patch;
1513 } else {
1514 if (op == OP_EXPORT)
1515 src = mkMov(getSSA(size), src, ty)->getDef(0);
1516 mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1517 src)->perPatch = info->out[idx].patch;
1518 }
1519 }
1520
1521 bool
1522 Converter::parseNIR()
1523 {
1524 info->bin.tlsSpace = 0;
1525 info->io.clipDistances = nir->info.clip_distance_array_size;
1526 info->io.cullDistances = nir->info.cull_distance_array_size;
1527
1528 switch(prog->getType()) {
1529 case Program::TYPE_COMPUTE:
1530 info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
1531 info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
1532 info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
1533 info->bin.smemSize = nir->info.cs.shared_size;
1534 break;
1535 case Program::TYPE_FRAGMENT:
1536 info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1537 info->prop.fp.persampleInvocation =
1538 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
1539 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1540 info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1541 info->prop.fp.readsSampleLocations =
1542 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1543 info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
1544 info->prop.fp.usesSampleMaskIn =
1545 !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
1546 break;
1547 case Program::TYPE_GEOMETRY:
1548 info->prop.gp.inputPrim = nir->info.gs.input_primitive;
1549 info->prop.gp.instanceCount = nir->info.gs.invocations;
1550 info->prop.gp.maxVertices = nir->info.gs.vertices_out;
1551 info->prop.gp.outputPrim = nir->info.gs.output_primitive;
1552 break;
1553 case Program::TYPE_TESSELLATION_CONTROL:
1554 case Program::TYPE_TESSELLATION_EVAL:
1555 if (nir->info.tess.primitive_mode == GL_ISOLINES)
1556 info->prop.tp.domain = GL_LINES;
1557 else
1558 info->prop.tp.domain = nir->info.tess.primitive_mode;
1559 info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1560 info->prop.tp.outputPrim =
1561 nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1562 info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1563 info->prop.tp.winding = !nir->info.tess.ccw;
1564 break;
1565 case Program::TYPE_VERTEX:
1566 info->prop.vp.usesDrawParameters =
1567 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
1568 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
1569 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
1570 break;
1571 default:
1572 break;
1573 }
1574
1575 return true;
1576 }
1577
1578 bool
1579 Converter::visit(nir_function *function)
1580 {
1581 assert(function->impl);
1582
1583 // usually the blocks will set everything up, but main is special
1584 BasicBlock *entry = new BasicBlock(prog->main);
1585 exit = new BasicBlock(prog->main);
1586 blocks[nir_start_block(function->impl)->index] = entry;
1587 prog->main->setEntry(entry);
1588 prog->main->setExit(exit);
1589
1590 setPosition(entry, true);
1591
1592 if (info->io.genUserClip > 0) {
1593 for (int c = 0; c < 4; ++c)
1594 clipVtx[c] = getScratch();
1595 }
1596
1597 switch (prog->getType()) {
1598 case Program::TYPE_TESSELLATION_CONTROL:
1599 outBase = mkOp2v(
1600 OP_SUB, TYPE_U32, getSSA(),
1601 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1602 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1603 break;
1604 case Program::TYPE_FRAGMENT: {
1605 Symbol *sv = mkSysVal(SV_POSITION, 3);
1606 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1607 fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1608 break;
1609 }
1610 default:
1611 break;
1612 }
1613
1614 nir_foreach_register(reg, &function->impl->registers) {
1615 if (reg->num_array_elems) {
1616 // TODO: packed variables would be nice, but MemoryOpt fails
1617 // replace 4 with reg->num_components
1618 uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8);
1619 regToLmemOffset[reg->index] = info->bin.tlsSpace;
1620 info->bin.tlsSpace += size;
1621 }
1622 }
1623
1624 nir_index_ssa_defs(function->impl);
1625 foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1626 if (!visit(node))
1627 return false;
1628 }
1629
1630 bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1631 setPosition(exit, true);
1632
1633 if ((prog->getType() == Program::TYPE_VERTEX ||
1634 prog->getType() == Program::TYPE_TESSELLATION_EVAL)
1635 && info->io.genUserClip > 0)
1636 handleUserClipPlanes();
1637
1638 // TODO: for non main function this needs to be a OP_RETURN
1639 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1640 return true;
1641 }
1642
1643 bool
1644 Converter::visit(nir_cf_node *node)
1645 {
1646 switch (node->type) {
1647 case nir_cf_node_block:
1648 return visit(nir_cf_node_as_block(node));
1649 case nir_cf_node_if:
1650 return visit(nir_cf_node_as_if(node));
1651 case nir_cf_node_loop:
1652 return visit(nir_cf_node_as_loop(node));
1653 default:
1654 ERROR("unknown nir_cf_node type %u\n", node->type);
1655 return false;
1656 }
1657 }
1658
1659 bool
1660 Converter::visit(nir_block *block)
1661 {
1662 if (!block->predecessors->entries && block->instr_list.is_empty())
1663 return true;
1664
1665 BasicBlock *bb = convert(block);
1666
1667 setPosition(bb, true);
1668 nir_foreach_instr(insn, block) {
1669 if (!visit(insn))
1670 return false;
1671 }
1672 return true;
1673 }
1674
1675 bool
1676 Converter::visit(nir_if *nif)
1677 {
1678 DataType sType = getSType(nif->condition, false, false);
1679 Value *src = getSrc(&nif->condition, 0);
1680
1681 nir_block *lastThen = nir_if_last_then_block(nif);
1682 nir_block *lastElse = nir_if_last_else_block(nif);
1683
1684 assert(!lastThen->successors[1]);
1685 assert(!lastElse->successors[1]);
1686
1687 BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1688 BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1689
1690 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1691 bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1692
1693 // we only insert joinats, if both nodes end up at the end of the if again.
1694 // the reason for this to not happens are breaks/continues/ret/... which
1695 // have their own handling
1696 if (lastThen->successors[0] == lastElse->successors[0])
1697 bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
1698 CC_ALWAYS, NULL);
1699
1700 mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1701
1702 foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1703 if (!visit(node))
1704 return false;
1705 }
1706 setPosition(convert(lastThen), true);
1707 if (!bb->getExit() ||
1708 !bb->getExit()->asFlow() ||
1709 bb->getExit()->asFlow()->op == OP_JOIN) {
1710 BasicBlock *tailBB = convert(lastThen->successors[0]);
1711 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1712 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1713 }
1714
1715 foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1716 if (!visit(node))
1717 return false;
1718 }
1719 setPosition(convert(lastElse), true);
1720 if (!bb->getExit() ||
1721 !bb->getExit()->asFlow() ||
1722 bb->getExit()->asFlow()->op == OP_JOIN) {
1723 BasicBlock *tailBB = convert(lastElse->successors[0]);
1724 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1725 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1726 }
1727
1728 if (lastThen->successors[0] == lastElse->successors[0]) {
1729 setPosition(convert(lastThen->successors[0]), true);
1730 mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1731 }
1732
1733 return true;
1734 }
1735
1736 bool
1737 Converter::visit(nir_loop *loop)
1738 {
1739 curLoopDepth += 1;
1740 func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1741
1742 BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1743 BasicBlock *tailBB =
1744 convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1745 bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1746
1747 mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1748 setPosition(loopBB, false);
1749 mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1750
1751 foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1752 if (!visit(node))
1753 return false;
1754 }
1755 Instruction *insn = bb->getExit();
1756 if (bb->cfg.incidentCount() != 0) {
1757 if (!insn || !insn->asFlow()) {
1758 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1759 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1760 } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
1761 tailBB->cfg.incidentCount() == 0) {
1762 // RA doesn't like having blocks around with no incident edge,
1763 // so we create a fake one to make it happy
1764 bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1765 }
1766 }
1767
1768 curLoopDepth -= 1;
1769
1770 return true;
1771 }
1772
1773 bool
1774 Converter::visit(nir_instr *insn)
1775 {
1776 // we need an insertion point for on the fly generated immediate loads
1777 immInsertPos = bb->getExit();
1778 switch (insn->type) {
1779 case nir_instr_type_alu:
1780 return visit(nir_instr_as_alu(insn));
1781 case nir_instr_type_deref:
1782 return visit(nir_instr_as_deref(insn));
1783 case nir_instr_type_intrinsic:
1784 return visit(nir_instr_as_intrinsic(insn));
1785 case nir_instr_type_jump:
1786 return visit(nir_instr_as_jump(insn));
1787 case nir_instr_type_load_const:
1788 return visit(nir_instr_as_load_const(insn));
1789 case nir_instr_type_ssa_undef:
1790 return visit(nir_instr_as_ssa_undef(insn));
1791 case nir_instr_type_tex:
1792 return visit(nir_instr_as_tex(insn));
1793 default:
1794 ERROR("unknown nir_instr type %u\n", insn->type);
1795 return false;
1796 }
1797 return true;
1798 }
1799
1800 SVSemantic
1801 Converter::convert(nir_intrinsic_op intr)
1802 {
1803 switch (intr) {
1804 case nir_intrinsic_load_base_vertex:
1805 return SV_BASEVERTEX;
1806 case nir_intrinsic_load_base_instance:
1807 return SV_BASEINSTANCE;
1808 case nir_intrinsic_load_draw_id:
1809 return SV_DRAWID;
1810 case nir_intrinsic_load_front_face:
1811 return SV_FACE;
1812 case nir_intrinsic_load_helper_invocation:
1813 return SV_THREAD_KILL;
1814 case nir_intrinsic_load_instance_id:
1815 return SV_INSTANCE_ID;
1816 case nir_intrinsic_load_invocation_id:
1817 return SV_INVOCATION_ID;
1818 case nir_intrinsic_load_local_group_size:
1819 return SV_NTID;
1820 case nir_intrinsic_load_local_invocation_id:
1821 return SV_TID;
1822 case nir_intrinsic_load_num_work_groups:
1823 return SV_NCTAID;
1824 case nir_intrinsic_load_patch_vertices_in:
1825 return SV_VERTEX_COUNT;
1826 case nir_intrinsic_load_primitive_id:
1827 return SV_PRIMITIVE_ID;
1828 case nir_intrinsic_load_sample_id:
1829 return SV_SAMPLE_INDEX;
1830 case nir_intrinsic_load_sample_mask_in:
1831 return SV_SAMPLE_MASK;
1832 case nir_intrinsic_load_sample_pos:
1833 return SV_SAMPLE_POS;
1834 case nir_intrinsic_load_subgroup_eq_mask:
1835 return SV_LANEMASK_EQ;
1836 case nir_intrinsic_load_subgroup_ge_mask:
1837 return SV_LANEMASK_GE;
1838 case nir_intrinsic_load_subgroup_gt_mask:
1839 return SV_LANEMASK_GT;
1840 case nir_intrinsic_load_subgroup_le_mask:
1841 return SV_LANEMASK_LE;
1842 case nir_intrinsic_load_subgroup_lt_mask:
1843 return SV_LANEMASK_LT;
1844 case nir_intrinsic_load_subgroup_invocation:
1845 return SV_LANEID;
1846 case nir_intrinsic_load_tess_coord:
1847 return SV_TESS_COORD;
1848 case nir_intrinsic_load_tess_level_inner:
1849 return SV_TESS_INNER;
1850 case nir_intrinsic_load_tess_level_outer:
1851 return SV_TESS_OUTER;
1852 case nir_intrinsic_load_vertex_id:
1853 return SV_VERTEX_ID;
1854 case nir_intrinsic_load_work_group_id:
1855 return SV_CTAID;
1856 default:
1857 ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1858 nir_intrinsic_infos[intr].name);
1859 assert(false);
1860 return SV_LAST;
1861 }
1862 }
1863
1864 ImgFormat
1865 Converter::convertGLImgFormat(GLuint format)
1866 {
1867 #define FMT_CASE(a, b) \
1868 case GL_ ## a: return nv50_ir::FMT_ ## b
1869
1870 switch (format) {
1871 FMT_CASE(NONE, NONE);
1872
1873 FMT_CASE(RGBA32F, RGBA32F);
1874 FMT_CASE(RGBA16F, RGBA16F);
1875 FMT_CASE(RG32F, RG32F);
1876 FMT_CASE(RG16F, RG16F);
1877 FMT_CASE(R11F_G11F_B10F, R11G11B10F);
1878 FMT_CASE(R32F, R32F);
1879 FMT_CASE(R16F, R16F);
1880
1881 FMT_CASE(RGBA32UI, RGBA32UI);
1882 FMT_CASE(RGBA16UI, RGBA16UI);
1883 FMT_CASE(RGB10_A2UI, RGB10A2UI);
1884 FMT_CASE(RGBA8UI, RGBA8UI);
1885 FMT_CASE(RG32UI, RG32UI);
1886 FMT_CASE(RG16UI, RG16UI);
1887 FMT_CASE(RG8UI, RG8UI);
1888 FMT_CASE(R32UI, R32UI);
1889 FMT_CASE(R16UI, R16UI);
1890 FMT_CASE(R8UI, R8UI);
1891
1892 FMT_CASE(RGBA32I, RGBA32I);
1893 FMT_CASE(RGBA16I, RGBA16I);
1894 FMT_CASE(RGBA8I, RGBA8I);
1895 FMT_CASE(RG32I, RG32I);
1896 FMT_CASE(RG16I, RG16I);
1897 FMT_CASE(RG8I, RG8I);
1898 FMT_CASE(R32I, R32I);
1899 FMT_CASE(R16I, R16I);
1900 FMT_CASE(R8I, R8I);
1901
1902 FMT_CASE(RGBA16, RGBA16);
1903 FMT_CASE(RGB10_A2, RGB10A2);
1904 FMT_CASE(RGBA8, RGBA8);
1905 FMT_CASE(RG16, RG16);
1906 FMT_CASE(RG8, RG8);
1907 FMT_CASE(R16, R16);
1908 FMT_CASE(R8, R8);
1909
1910 FMT_CASE(RGBA16_SNORM, RGBA16_SNORM);
1911 FMT_CASE(RGBA8_SNORM, RGBA8_SNORM);
1912 FMT_CASE(RG16_SNORM, RG16_SNORM);
1913 FMT_CASE(RG8_SNORM, RG8_SNORM);
1914 FMT_CASE(R16_SNORM, R16_SNORM);
1915 FMT_CASE(R8_SNORM, R8_SNORM);
1916
1917 FMT_CASE(BGRA_INTEGER, BGRA8);
1918 default:
1919 ERROR("unknown format %x\n", format);
1920 assert(false);
1921 return nv50_ir::FMT_NONE;
1922 }
1923 #undef FMT_CASE
1924 }
1925
1926 bool
1927 Converter::visit(nir_intrinsic_instr *insn)
1928 {
1929 nir_intrinsic_op op = insn->intrinsic;
1930 const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op];
1931
1932 switch (op) {
1933 case nir_intrinsic_load_uniform: {
1934 LValues &newDefs = convert(&insn->dest);
1935 const DataType dType = getDType(insn);
1936 Value *indirect;
1937 uint32_t coffset = getIndirect(insn, 0, 0, indirect);
1938 for (uint8_t i = 0; i < insn->num_components; ++i) {
1939 loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
1940 }
1941 break;
1942 }
1943 case nir_intrinsic_store_output:
1944 case nir_intrinsic_store_per_vertex_output: {
1945 Value *indirect;
1946 DataType dType = getSType(insn->src[0], false, false);
1947 uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1948
1949 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1950 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1951 continue;
1952
1953 uint8_t offset = 0;
1954 Value *src = getSrc(&insn->src[0], i);
1955 switch (prog->getType()) {
1956 case Program::TYPE_FRAGMENT: {
1957 if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1958 // TGSI uses a different interface than NIR, TGSI stores that
1959 // value in the z component, NIR in X
1960 offset += 2;
1961 src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1962 }
1963 break;
1964 }
1965 case Program::TYPE_GEOMETRY:
1966 case Program::TYPE_VERTEX: {
1967 if (info->io.genUserClip > 0 && idx == (uint32_t)clipVertexOutput) {
1968 mkMov(clipVtx[i], src);
1969 src = clipVtx[i];
1970 }
1971 break;
1972 }
1973 default:
1974 break;
1975 }
1976
1977 storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1978 }
1979 break;
1980 }
1981 case nir_intrinsic_load_input:
1982 case nir_intrinsic_load_interpolated_input:
1983 case nir_intrinsic_load_output: {
1984 LValues &newDefs = convert(&insn->dest);
1985
1986 // FBFetch
1987 if (prog->getType() == Program::TYPE_FRAGMENT &&
1988 op == nir_intrinsic_load_output) {
1989 std::vector<Value*> defs, srcs;
1990 uint8_t mask = 0;
1991
1992 srcs.push_back(getSSA());
1993 srcs.push_back(getSSA());
1994 Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1995 Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1996 mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1997 mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1998
1999 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
2000 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
2001
2002 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2003 defs.push_back(newDefs[i]);
2004 mask |= 1 << i;
2005 }
2006
2007 TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
2008 texi->tex.levelZero = 1;
2009 texi->tex.mask = mask;
2010 texi->tex.useOffsets = 0;
2011 texi->tex.r = 0xffff;
2012 texi->tex.s = 0xffff;
2013
2014 info->prop.fp.readsFramebuffer = true;
2015 break;
2016 }
2017
2018 const DataType dType = getDType(insn);
2019 Value *indirect;
2020 bool input = op != nir_intrinsic_load_output;
2021 operation nvirOp;
2022 uint32_t mode = 0;
2023
2024 uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
2025 nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
2026
2027 // see load_barycentric_* handling
2028 if (prog->getType() == Program::TYPE_FRAGMENT) {
2029 mode = translateInterpMode(&vary, nvirOp);
2030 if (op == nir_intrinsic_load_interpolated_input) {
2031 ImmediateValue immMode;
2032 if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
2033 mode |= immMode.reg.data.u32;
2034 }
2035 }
2036
2037 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2038 uint32_t address = getSlotAddress(insn, idx, i);
2039 Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
2040 if (prog->getType() == Program::TYPE_FRAGMENT) {
2041 int s = 1;
2042 if (typeSizeof(dType) == 8) {
2043 Value *lo = getSSA();
2044 Value *hi = getSSA();
2045 Instruction *interp;
2046
2047 interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
2048 if (nvirOp == OP_PINTERP)
2049 interp->setSrc(s++, fp.position);
2050 if (mode & NV50_IR_INTERP_OFFSET)
2051 interp->setSrc(s++, getSrc(&insn->src[0], 0));
2052 interp->setInterpolate(mode);
2053 interp->setIndirect(0, 0, indirect);
2054
2055 Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
2056 interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
2057 if (nvirOp == OP_PINTERP)
2058 interp->setSrc(s++, fp.position);
2059 if (mode & NV50_IR_INTERP_OFFSET)
2060 interp->setSrc(s++, getSrc(&insn->src[0], 0));
2061 interp->setInterpolate(mode);
2062 interp->setIndirect(0, 0, indirect);
2063
2064 mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
2065 } else {
2066 Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
2067 if (nvirOp == OP_PINTERP)
2068 interp->setSrc(s++, fp.position);
2069 if (mode & NV50_IR_INTERP_OFFSET)
2070 interp->setSrc(s++, getSrc(&insn->src[0], 0));
2071 interp->setInterpolate(mode);
2072 interp->setIndirect(0, 0, indirect);
2073 }
2074 } else {
2075 mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
2076 }
2077 }
2078 break;
2079 }
2080 case nir_intrinsic_load_kernel_input: {
2081 assert(prog->getType() == Program::TYPE_COMPUTE);
2082 assert(insn->num_components == 1);
2083
2084 LValues &newDefs = convert(&insn->dest);
2085 const DataType dType = getDType(insn);
2086 Value *indirect;
2087 uint32_t idx = getIndirect(insn, 0, 0, indirect, true);
2088
2089 mkLoad(dType, newDefs[0], mkSymbol(FILE_SHADER_INPUT, 0, dType, idx), indirect);
2090 break;
2091 }
2092 case nir_intrinsic_load_barycentric_at_offset:
2093 case nir_intrinsic_load_barycentric_at_sample:
2094 case nir_intrinsic_load_barycentric_centroid:
2095 case nir_intrinsic_load_barycentric_pixel:
2096 case nir_intrinsic_load_barycentric_sample: {
2097 LValues &newDefs = convert(&insn->dest);
2098 uint32_t mode;
2099
2100 if (op == nir_intrinsic_load_barycentric_centroid ||
2101 op == nir_intrinsic_load_barycentric_sample) {
2102 mode = NV50_IR_INTERP_CENTROID;
2103 } else if (op == nir_intrinsic_load_barycentric_at_offset) {
2104 Value *offs[2];
2105 for (uint8_t c = 0; c < 2; c++) {
2106 offs[c] = getScratch();
2107 mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
2108 mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
2109 mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
2110 mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
2111 }
2112 mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
2113
2114 mode = NV50_IR_INTERP_OFFSET;
2115 } else if (op == nir_intrinsic_load_barycentric_pixel) {
2116 mode = NV50_IR_INTERP_DEFAULT;
2117 } else if (op == nir_intrinsic_load_barycentric_at_sample) {
2118 info->prop.fp.readsSampleLocations = true;
2119 mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
2120 mode = NV50_IR_INTERP_OFFSET;
2121 } else {
2122 unreachable("all intrinsics already handled above");
2123 }
2124
2125 loadImm(newDefs[1], mode);
2126 break;
2127 }
2128 case nir_intrinsic_discard:
2129 mkOp(OP_DISCARD, TYPE_NONE, NULL);
2130 break;
2131 case nir_intrinsic_discard_if: {
2132 Value *pred = getSSA(1, FILE_PREDICATE);
2133 if (insn->num_components > 1) {
2134 ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
2135 assert(false);
2136 return false;
2137 }
2138 mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2139 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
2140 break;
2141 }
2142 case nir_intrinsic_load_base_vertex:
2143 case nir_intrinsic_load_base_instance:
2144 case nir_intrinsic_load_draw_id:
2145 case nir_intrinsic_load_front_face:
2146 case nir_intrinsic_load_helper_invocation:
2147 case nir_intrinsic_load_instance_id:
2148 case nir_intrinsic_load_invocation_id:
2149 case nir_intrinsic_load_local_group_size:
2150 case nir_intrinsic_load_local_invocation_id:
2151 case nir_intrinsic_load_num_work_groups:
2152 case nir_intrinsic_load_patch_vertices_in:
2153 case nir_intrinsic_load_primitive_id:
2154 case nir_intrinsic_load_sample_id:
2155 case nir_intrinsic_load_sample_mask_in:
2156 case nir_intrinsic_load_sample_pos:
2157 case nir_intrinsic_load_subgroup_eq_mask:
2158 case nir_intrinsic_load_subgroup_ge_mask:
2159 case nir_intrinsic_load_subgroup_gt_mask:
2160 case nir_intrinsic_load_subgroup_le_mask:
2161 case nir_intrinsic_load_subgroup_lt_mask:
2162 case nir_intrinsic_load_subgroup_invocation:
2163 case nir_intrinsic_load_tess_coord:
2164 case nir_intrinsic_load_tess_level_inner:
2165 case nir_intrinsic_load_tess_level_outer:
2166 case nir_intrinsic_load_vertex_id:
2167 case nir_intrinsic_load_work_group_id: {
2168 const DataType dType = getDType(insn);
2169 SVSemantic sv = convert(op);
2170 LValues &newDefs = convert(&insn->dest);
2171
2172 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2173 Value *def;
2174 if (typeSizeof(dType) == 8)
2175 def = getSSA();
2176 else
2177 def = newDefs[i];
2178
2179 if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
2180 loadImm(def, 0u);
2181 } else {
2182 Symbol *sym = mkSysVal(sv, i);
2183 Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
2184 if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
2185 rdsv->perPatch = 1;
2186 }
2187
2188 if (typeSizeof(dType) == 8)
2189 mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
2190 }
2191 break;
2192 }
2193 // constants
2194 case nir_intrinsic_load_subgroup_size: {
2195 LValues &newDefs = convert(&insn->dest);
2196 loadImm(newDefs[0], 32u);
2197 break;
2198 }
2199 case nir_intrinsic_vote_all:
2200 case nir_intrinsic_vote_any:
2201 case nir_intrinsic_vote_ieq: {
2202 LValues &newDefs = convert(&insn->dest);
2203 Value *pred = getScratch(1, FILE_PREDICATE);
2204 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2205 mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
2206 mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
2207 break;
2208 }
2209 case nir_intrinsic_ballot: {
2210 LValues &newDefs = convert(&insn->dest);
2211 Value *pred = getSSA(1, FILE_PREDICATE);
2212 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2213 mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;
2214 break;
2215 }
2216 case nir_intrinsic_read_first_invocation:
2217 case nir_intrinsic_read_invocation: {
2218 LValues &newDefs = convert(&insn->dest);
2219 const DataType dType = getDType(insn);
2220 Value *tmp = getScratch();
2221
2222 if (op == nir_intrinsic_read_first_invocation) {
2223 mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
2224 mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2225 mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2226 } else
2227 tmp = getSrc(&insn->src[1], 0);
2228
2229 for (uint8_t i = 0; i < insn->num_components; ++i) {
2230 mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
2231 ->subOp = NV50_IR_SUBOP_SHFL_IDX;
2232 }
2233 break;
2234 }
2235 case nir_intrinsic_load_per_vertex_input: {
2236 const DataType dType = getDType(insn);
2237 LValues &newDefs = convert(&insn->dest);
2238 Value *indirectVertex;
2239 Value *indirectOffset;
2240 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
2241 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2242
2243 Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
2244 mkImm(baseVertex), indirectVertex);
2245 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2246 uint32_t address = getSlotAddress(insn, idx, i);
2247 loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
2248 indirectOffset, vtxBase, info->in[idx].patch);
2249 }
2250 break;
2251 }
2252 case nir_intrinsic_load_per_vertex_output: {
2253 const DataType dType = getDType(insn);
2254 LValues &newDefs = convert(&insn->dest);
2255 Value *indirectVertex;
2256 Value *indirectOffset;
2257 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
2258 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2259 Value *vtxBase = NULL;
2260
2261 if (indirectVertex)
2262 vtxBase = indirectVertex;
2263 else
2264 vtxBase = loadImm(NULL, baseVertex);
2265
2266 vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase);
2267
2268 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2269 uint32_t address = getSlotAddress(insn, idx, i);
2270 loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
2271 indirectOffset, vtxBase, info->in[idx].patch);
2272 }
2273 break;
2274 }
2275 case nir_intrinsic_emit_vertex:
2276 if (info->io.genUserClip > 0)
2277 handleUserClipPlanes();
2278 // fallthrough
2279 case nir_intrinsic_end_primitive: {
2280 uint32_t idx = nir_intrinsic_stream_id(insn);
2281 mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
2282 break;
2283 }
2284 case nir_intrinsic_load_ubo: {
2285 const DataType dType = getDType(insn);
2286 LValues &newDefs = convert(&insn->dest);
2287 Value *indirectIndex;
2288 Value *indirectOffset;
2289 uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1;
2290 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2291
2292 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2293 loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
2294 indirectOffset, indirectIndex);
2295 }
2296 break;
2297 }
2298 case nir_intrinsic_get_buffer_size: {
2299 LValues &newDefs = convert(&insn->dest);
2300 const DataType dType = getDType(insn);
2301 Value *indirectBuffer;
2302 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2303
2304 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0);
2305 mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, indirectBuffer);
2306 break;
2307 }
2308 case nir_intrinsic_store_ssbo: {
2309 DataType sType = getSType(insn->src[0], false, false);
2310 Value *indirectBuffer;
2311 Value *indirectOffset;
2312 uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer);
2313 uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset);
2314
2315 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2316 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2317 continue;
2318 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
2319 offset + i * typeSizeof(sType));
2320 mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i))
2321 ->setIndirect(0, 1, indirectBuffer);
2322 }
2323 info->io.globalAccess |= 0x2;
2324 break;
2325 }
2326 case nir_intrinsic_load_ssbo: {
2327 const DataType dType = getDType(insn);
2328 LValues &newDefs = convert(&insn->dest);
2329 Value *indirectBuffer;
2330 Value *indirectOffset;
2331 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2332 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2333
2334 for (uint8_t i = 0u; i < insn->num_components; ++i)
2335 loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
2336 indirectOffset, indirectBuffer);
2337
2338 info->io.globalAccess |= 0x1;
2339 break;
2340 }
2341 case nir_intrinsic_shared_atomic_add:
2342 case nir_intrinsic_shared_atomic_and:
2343 case nir_intrinsic_shared_atomic_comp_swap:
2344 case nir_intrinsic_shared_atomic_exchange:
2345 case nir_intrinsic_shared_atomic_or:
2346 case nir_intrinsic_shared_atomic_imax:
2347 case nir_intrinsic_shared_atomic_imin:
2348 case nir_intrinsic_shared_atomic_umax:
2349 case nir_intrinsic_shared_atomic_umin:
2350 case nir_intrinsic_shared_atomic_xor: {
2351 const DataType dType = getDType(insn);
2352 LValues &newDefs = convert(&insn->dest);
2353 Value *indirectOffset;
2354 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2355 Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, dType, offset);
2356 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2357 if (op == nir_intrinsic_shared_atomic_comp_swap)
2358 atom->setSrc(2, getSrc(&insn->src[2], 0));
2359 atom->setIndirect(0, 0, indirectOffset);
2360 atom->subOp = getSubOp(op);
2361 break;
2362 }
2363 case nir_intrinsic_ssbo_atomic_add:
2364 case nir_intrinsic_ssbo_atomic_and:
2365 case nir_intrinsic_ssbo_atomic_comp_swap:
2366 case nir_intrinsic_ssbo_atomic_exchange:
2367 case nir_intrinsic_ssbo_atomic_or:
2368 case nir_intrinsic_ssbo_atomic_imax:
2369 case nir_intrinsic_ssbo_atomic_imin:
2370 case nir_intrinsic_ssbo_atomic_umax:
2371 case nir_intrinsic_ssbo_atomic_umin:
2372 case nir_intrinsic_ssbo_atomic_xor: {
2373 const DataType dType = getDType(insn);
2374 LValues &newDefs = convert(&insn->dest);
2375 Value *indirectBuffer;
2376 Value *indirectOffset;
2377 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2378 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2379
2380 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset);
2381 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym,
2382 getSrc(&insn->src[2], 0));
2383 if (op == nir_intrinsic_ssbo_atomic_comp_swap)
2384 atom->setSrc(2, getSrc(&insn->src[3], 0));
2385 atom->setIndirect(0, 0, indirectOffset);
2386 atom->setIndirect(0, 1, indirectBuffer);
2387 atom->subOp = getSubOp(op);
2388
2389 info->io.globalAccess |= 0x2;
2390 break;
2391 }
2392 case nir_intrinsic_global_atomic_add:
2393 case nir_intrinsic_global_atomic_and:
2394 case nir_intrinsic_global_atomic_comp_swap:
2395 case nir_intrinsic_global_atomic_exchange:
2396 case nir_intrinsic_global_atomic_or:
2397 case nir_intrinsic_global_atomic_imax:
2398 case nir_intrinsic_global_atomic_imin:
2399 case nir_intrinsic_global_atomic_umax:
2400 case nir_intrinsic_global_atomic_umin:
2401 case nir_intrinsic_global_atomic_xor: {
2402 const DataType dType = getDType(insn);
2403 LValues &newDefs = convert(&insn->dest);
2404 Value *address;
2405 uint32_t offset = getIndirect(&insn->src[0], 0, address);
2406
2407 Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, dType, offset);
2408 Instruction *atom =
2409 mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2410 atom->setIndirect(0, 0, address);
2411 atom->subOp = getSubOp(op);
2412
2413 info->io.globalAccess |= 0x2;
2414 break;
2415 }
2416 case nir_intrinsic_bindless_image_atomic_add:
2417 case nir_intrinsic_bindless_image_atomic_and:
2418 case nir_intrinsic_bindless_image_atomic_comp_swap:
2419 case nir_intrinsic_bindless_image_atomic_exchange:
2420 case nir_intrinsic_bindless_image_atomic_imax:
2421 case nir_intrinsic_bindless_image_atomic_umax:
2422 case nir_intrinsic_bindless_image_atomic_imin:
2423 case nir_intrinsic_bindless_image_atomic_umin:
2424 case nir_intrinsic_bindless_image_atomic_or:
2425 case nir_intrinsic_bindless_image_atomic_xor:
2426 case nir_intrinsic_bindless_image_load:
2427 case nir_intrinsic_bindless_image_samples:
2428 case nir_intrinsic_bindless_image_size:
2429 case nir_intrinsic_bindless_image_store: {
2430 std::vector<Value*> srcs, defs;
2431 Value *indirect = getSrc(&insn->src[0], 0);
2432 DataType ty;
2433
2434 uint32_t mask = 0;
2435 TexInstruction::Target target =
2436 convert(nir_intrinsic_image_dim(insn), !!nir_intrinsic_image_array(insn), false);
2437 unsigned int argCount = getNIRArgCount(target);
2438 uint16_t location = 0;
2439
2440 if (opInfo.has_dest) {
2441 LValues &newDefs = convert(&insn->dest);
2442 for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2443 defs.push_back(newDefs[i]);
2444 mask |= 1 << i;
2445 }
2446 }
2447
2448 switch (op) {
2449 case nir_intrinsic_bindless_image_atomic_add:
2450 case nir_intrinsic_bindless_image_atomic_and:
2451 case nir_intrinsic_bindless_image_atomic_comp_swap:
2452 case nir_intrinsic_bindless_image_atomic_exchange:
2453 case nir_intrinsic_bindless_image_atomic_imax:
2454 case nir_intrinsic_bindless_image_atomic_umax:
2455 case nir_intrinsic_bindless_image_atomic_imin:
2456 case nir_intrinsic_bindless_image_atomic_umin:
2457 case nir_intrinsic_bindless_image_atomic_or:
2458 case nir_intrinsic_bindless_image_atomic_xor:
2459 ty = getDType(insn);
2460 mask = 0x1;
2461 info->io.globalAccess |= 0x2;
2462 break;
2463 case nir_intrinsic_bindless_image_load:
2464 ty = TYPE_U32;
2465 info->io.globalAccess |= 0x1;
2466 break;
2467 case nir_intrinsic_bindless_image_store:
2468 ty = TYPE_U32;
2469 mask = 0xf;
2470 info->io.globalAccess |= 0x2;
2471 break;
2472 case nir_intrinsic_bindless_image_samples:
2473 mask = 0x8;
2474 ty = TYPE_U32;
2475 break;
2476 case nir_intrinsic_bindless_image_size:
2477 ty = TYPE_U32;
2478 break;
2479 default:
2480 unreachable("unhandled image opcode");
2481 break;
2482 }
2483
2484 // coords
2485 if (opInfo.num_srcs >= 2)
2486 for (unsigned int i = 0u; i < argCount; ++i)
2487 srcs.push_back(getSrc(&insn->src[1], i));
2488
2489 // the sampler is just another src added after coords
2490 if (opInfo.num_srcs >= 3 && target.isMS())
2491 srcs.push_back(getSrc(&insn->src[2], 0));
2492
2493 if (opInfo.num_srcs >= 4) {
2494 unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2495 for (uint8_t i = 0u; i < components; ++i)
2496 srcs.push_back(getSrc(&insn->src[3], i));
2497 }
2498
2499 if (opInfo.num_srcs >= 5)
2500 // 1 for aotmic swap
2501 for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2502 srcs.push_back(getSrc(&insn->src[4], i));
2503
2504 TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2505 texi->tex.bindless = false;
2506 texi->tex.format = &nv50_ir::TexInstruction::formatTable[convertGLImgFormat(nir_intrinsic_format(insn))];
2507 texi->tex.mask = mask;
2508 texi->tex.bindless = true;
2509 texi->cache = convert(nir_intrinsic_access(insn));
2510 texi->setType(ty);
2511 texi->subOp = getSubOp(op);
2512
2513 if (indirect)
2514 texi->setIndirectR(indirect);
2515
2516 break;
2517 }
2518 case nir_intrinsic_image_deref_atomic_add:
2519 case nir_intrinsic_image_deref_atomic_and:
2520 case nir_intrinsic_image_deref_atomic_comp_swap:
2521 case nir_intrinsic_image_deref_atomic_exchange:
2522 case nir_intrinsic_image_deref_atomic_imax:
2523 case nir_intrinsic_image_deref_atomic_umax:
2524 case nir_intrinsic_image_deref_atomic_imin:
2525 case nir_intrinsic_image_deref_atomic_umin:
2526 case nir_intrinsic_image_deref_atomic_or:
2527 case nir_intrinsic_image_deref_atomic_xor:
2528 case nir_intrinsic_image_deref_load:
2529 case nir_intrinsic_image_deref_samples:
2530 case nir_intrinsic_image_deref_size:
2531 case nir_intrinsic_image_deref_store: {
2532 const nir_variable *tex;
2533 std::vector<Value*> srcs, defs;
2534 Value *indirect;
2535 DataType ty;
2536
2537 uint32_t mask = 0;
2538 nir_deref_instr *deref = nir_src_as_deref(insn->src[0]);
2539 const glsl_type *type = deref->type;
2540 TexInstruction::Target target =
2541 convert((glsl_sampler_dim)type->sampler_dimensionality,
2542 type->sampler_array, type->sampler_shadow);
2543 unsigned int argCount = getNIRArgCount(target);
2544 uint16_t location = handleDeref(deref, indirect, tex);
2545
2546 if (opInfo.has_dest) {
2547 LValues &newDefs = convert(&insn->dest);
2548 for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2549 defs.push_back(newDefs[i]);
2550 mask |= 1 << i;
2551 }
2552 }
2553
2554 switch (op) {
2555 case nir_intrinsic_image_deref_atomic_add:
2556 case nir_intrinsic_image_deref_atomic_and:
2557 case nir_intrinsic_image_deref_atomic_comp_swap:
2558 case nir_intrinsic_image_deref_atomic_exchange:
2559 case nir_intrinsic_image_deref_atomic_imax:
2560 case nir_intrinsic_image_deref_atomic_umax:
2561 case nir_intrinsic_image_deref_atomic_imin:
2562 case nir_intrinsic_image_deref_atomic_umin:
2563 case nir_intrinsic_image_deref_atomic_or:
2564 case nir_intrinsic_image_deref_atomic_xor:
2565 ty = getDType(insn);
2566 mask = 0x1;
2567 info->io.globalAccess |= 0x2;
2568 break;
2569 case nir_intrinsic_image_deref_load:
2570 ty = TYPE_U32;
2571 info->io.globalAccess |= 0x1;
2572 break;
2573 case nir_intrinsic_image_deref_store:
2574 ty = TYPE_U32;
2575 mask = 0xf;
2576 info->io.globalAccess |= 0x2;
2577 break;
2578 case nir_intrinsic_image_deref_samples:
2579 mask = 0x8;
2580 ty = TYPE_U32;
2581 break;
2582 case nir_intrinsic_image_deref_size:
2583 ty = TYPE_U32;
2584 break;
2585 default:
2586 unreachable("unhandled image opcode");
2587 break;
2588 }
2589
2590 // coords
2591 if (opInfo.num_srcs >= 2)
2592 for (unsigned int i = 0u; i < argCount; ++i)
2593 srcs.push_back(getSrc(&insn->src[1], i));
2594
2595 // the sampler is just another src added after coords
2596 if (opInfo.num_srcs >= 3 && target.isMS())
2597 srcs.push_back(getSrc(&insn->src[2], 0));
2598
2599 if (opInfo.num_srcs >= 4) {
2600 unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2601 for (uint8_t i = 0u; i < components; ++i)
2602 srcs.push_back(getSrc(&insn->src[3], i));
2603 }
2604
2605 if (opInfo.num_srcs >= 5)
2606 // 1 for aotmic swap
2607 for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2608 srcs.push_back(getSrc(&insn->src[4], i));
2609
2610 TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2611 texi->tex.bindless = false;
2612 texi->tex.format = &nv50_ir::TexInstruction::formatTable[convertGLImgFormat(tex->data.image.format)];
2613 texi->tex.mask = mask;
2614 texi->cache = getCacheModeFromVar(tex);
2615 texi->setType(ty);
2616 texi->subOp = getSubOp(op);
2617
2618 if (indirect)
2619 texi->setIndirectR(indirect);
2620
2621 break;
2622 }
2623 case nir_intrinsic_store_shared: {
2624 DataType sType = getSType(insn->src[0], false, false);
2625 Value *indirectOffset;
2626 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2627
2628 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2629 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2630 continue;
2631 Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, sType, offset + i * typeSizeof(sType));
2632 mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i));
2633 }
2634 break;
2635 }
2636 case nir_intrinsic_load_shared: {
2637 const DataType dType = getDType(insn);
2638 LValues &newDefs = convert(&insn->dest);
2639 Value *indirectOffset;
2640 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2641
2642 for (uint8_t i = 0u; i < insn->num_components; ++i)
2643 loadFrom(FILE_MEMORY_SHARED, 0, dType, newDefs[i], offset, i, indirectOffset);
2644
2645 break;
2646 }
2647 case nir_intrinsic_barrier: {
2648 // TODO: add flag to shader_info
2649 info->numBarriers = 1;
2650 Instruction *bar = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
2651 bar->fixed = 1;
2652 bar->subOp = NV50_IR_SUBOP_BAR_SYNC;
2653 break;
2654 }
2655 case nir_intrinsic_group_memory_barrier:
2656 case nir_intrinsic_memory_barrier:
2657 case nir_intrinsic_memory_barrier_atomic_counter:
2658 case nir_intrinsic_memory_barrier_buffer:
2659 case nir_intrinsic_memory_barrier_image:
2660 case nir_intrinsic_memory_barrier_shared: {
2661 Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
2662 bar->fixed = 1;
2663 bar->subOp = getSubOp(op);
2664 break;
2665 }
2666 case nir_intrinsic_memory_barrier_tcs_patch:
2667 break;
2668 case nir_intrinsic_shader_clock: {
2669 const DataType dType = getDType(insn);
2670 LValues &newDefs = convert(&insn->dest);
2671
2672 loadImm(newDefs[0], 0u);
2673 mkOp1(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
2674 break;
2675 }
2676 case nir_intrinsic_load_global: {
2677 const DataType dType = getDType(insn);
2678 LValues &newDefs = convert(&insn->dest);
2679 Value *indirectOffset;
2680 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2681
2682 for (auto i = 0u; i < insn->num_components; ++i)
2683 loadFrom(FILE_MEMORY_GLOBAL, 0, dType, newDefs[i], offset, i, indirectOffset);
2684
2685 info->io.globalAccess |= 0x1;
2686 break;
2687 }
2688 case nir_intrinsic_store_global: {
2689 DataType sType = getSType(insn->src[0], false, false);
2690
2691 for (auto i = 0u; i < insn->num_components; ++i) {
2692 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2693 continue;
2694 if (typeSizeof(sType) == 8) {
2695 Value *split[2];
2696 mkSplit(split, 4, getSrc(&insn->src[0], i));
2697
2698 Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType));
2699 mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[0]);
2700
2701 sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType) + 4);
2702 mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[1]);
2703 } else {
2704 Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, sType, i * typeSizeof(sType));
2705 mkStore(OP_STORE, sType, sym, getSrc(&insn->src[1], 0), getSrc(&insn->src[0], i));
2706 }
2707 }
2708
2709 info->io.globalAccess |= 0x2;
2710 break;
2711 }
2712 default:
2713 ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
2714 return false;
2715 }
2716
2717 return true;
2718 }
2719
2720 bool
2721 Converter::visit(nir_jump_instr *insn)
2722 {
2723 switch (insn->type) {
2724 case nir_jump_return:
2725 // TODO: this only works in the main function
2726 mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
2727 bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
2728 break;
2729 case nir_jump_break:
2730 case nir_jump_continue: {
2731 bool isBreak = insn->type == nir_jump_break;
2732 nir_block *block = insn->instr.block;
2733 assert(!block->successors[1]);
2734 BasicBlock *target = convert(block->successors[0]);
2735 mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
2736 bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
2737 break;
2738 }
2739 default:
2740 ERROR("unknown nir_jump_type %u\n", insn->type);
2741 return false;
2742 }
2743
2744 return true;
2745 }
2746
2747 Value*
2748 Converter::convert(nir_load_const_instr *insn, uint8_t idx)
2749 {
2750 Value *val;
2751
2752 if (immInsertPos)
2753 setPosition(immInsertPos, true);
2754 else
2755 setPosition(bb, false);
2756
2757 switch (insn->def.bit_size) {
2758 case 64:
2759 val = loadImm(getSSA(8), insn->value[idx].u64);
2760 break;
2761 case 32:
2762 val = loadImm(getSSA(4), insn->value[idx].u32);
2763 break;
2764 case 16:
2765 val = loadImm(getSSA(2), insn->value[idx].u16);
2766 break;
2767 case 8:
2768 val = loadImm(getSSA(1), insn->value[idx].u8);
2769 break;
2770 default:
2771 unreachable("unhandled bit size!\n");
2772 }
2773 setPosition(bb, true);
2774 return val;
2775 }
2776
2777 bool
2778 Converter::visit(nir_load_const_instr *insn)
2779 {
2780 assert(insn->def.bit_size <= 64);
2781 immediates[insn->def.index] = insn;
2782 return true;
2783 }
2784
2785 #define DEFAULT_CHECKS \
2786 if (insn->dest.dest.ssa.num_components > 1) { \
2787 ERROR("nir_alu_instr only supported with 1 component!\n"); \
2788 return false; \
2789 } \
2790 if (insn->dest.write_mask != 1) { \
2791 ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2792 return false; \
2793 }
2794 bool
2795 Converter::visit(nir_alu_instr *insn)
2796 {
2797 const nir_op op = insn->op;
2798 const nir_op_info &info = nir_op_infos[op];
2799 DataType dType = getDType(insn);
2800 const std::vector<DataType> sTypes = getSTypes(insn);
2801
2802 Instruction *oldPos = this->bb->getExit();
2803
2804 switch (op) {
2805 case nir_op_fabs:
2806 case nir_op_iabs:
2807 case nir_op_fadd:
2808 case nir_op_iadd:
2809 case nir_op_iand:
2810 case nir_op_fceil:
2811 case nir_op_fcos:
2812 case nir_op_fddx:
2813 case nir_op_fddx_coarse:
2814 case nir_op_fddx_fine:
2815 case nir_op_fddy:
2816 case nir_op_fddy_coarse:
2817 case nir_op_fddy_fine:
2818 case nir_op_fdiv:
2819 case nir_op_idiv:
2820 case nir_op_udiv:
2821 case nir_op_fexp2:
2822 case nir_op_ffloor:
2823 case nir_op_ffma:
2824 case nir_op_flog2:
2825 case nir_op_fmax:
2826 case nir_op_imax:
2827 case nir_op_umax:
2828 case nir_op_fmin:
2829 case nir_op_imin:
2830 case nir_op_umin:
2831 case nir_op_fmod:
2832 case nir_op_imod: