nv50ir/nir: move immediates before use
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_from_nir.cpp
1 /*
2 * Copyright 2017 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Karol Herbst <kherbst@redhat.com>
23 */
24
25 #include "compiler/nir/nir.h"
26
27 #include "util/u_debug.h"
28
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_from_common.h"
31 #include "codegen/nv50_ir_lowering_helper.h"
32 #include "codegen/nv50_ir_util.h"
33
34 #if __cplusplus >= 201103L
35 #include <unordered_map>
36 #else
37 #include <tr1/unordered_map>
38 #endif
39 #include <list>
40 #include <vector>
41
42 namespace {
43
44 #if __cplusplus >= 201103L
45 using std::hash;
46 using std::unordered_map;
47 #else
48 using std::tr1::hash;
49 using std::tr1::unordered_map;
50 #endif
51
52 using namespace nv50_ir;
53
54 int
55 type_size(const struct glsl_type *type)
56 {
57 return glsl_count_attribute_slots(type, false);
58 }
59
60 class Converter : public ConverterCommon
61 {
62 public:
63 Converter(Program *, nir_shader *, nv50_ir_prog_info *);
64
65 bool run();
66 private:
67 typedef std::vector<LValue*> LValues;
68 typedef unordered_map<unsigned, LValues> NirDefMap;
69 typedef unordered_map<unsigned, nir_load_const_instr*> ImmediateMap;
70 typedef unordered_map<unsigned, uint32_t> NirArrayLMemOffsets;
71 typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
72
73 TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
74 LValues& convert(nir_alu_dest *);
75 BasicBlock* convert(nir_block *);
76 LValues& convert(nir_dest *);
77 SVSemantic convert(nir_intrinsic_op);
78 Value* convert(nir_load_const_instr*, uint8_t);
79 LValues& convert(nir_register *);
80 LValues& convert(nir_ssa_def *);
81
82 ImgFormat convertGLImgFormat(GLuint);
83
84 Value* getSrc(nir_alu_src *, uint8_t component = 0);
85 Value* getSrc(nir_register *, uint8_t);
86 Value* getSrc(nir_src *, uint8_t, bool indirect = false);
87 Value* getSrc(nir_ssa_def *, uint8_t);
88
89 // returned value is the constant part of the given source (either the
90 // nir_src or the selected source component of an intrinsic). Even though
91 // this is mostly an optimization to be able to skip indirects in a few
92 // cases, sometimes we require immediate values or set some fileds on
93 // instructions (e.g. tex) in order for codegen to consume those.
94 // If the found value has not a constant part, the Value gets returned
95 // through the Value parameter.
96 uint32_t getIndirect(nir_src *, uint8_t, Value *&);
97 uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&);
98
99 uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
100
101 void setInterpolate(nv50_ir_varying *,
102 uint8_t,
103 bool centroid,
104 unsigned semantics);
105
106 Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
107 uint8_t c, Value *indirect0 = NULL,
108 Value *indirect1 = NULL, bool patch = false);
109 void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
110 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
111 Value *indirect1 = NULL);
112
113 bool isFloatType(nir_alu_type);
114 bool isSignedType(nir_alu_type);
115 bool isResultFloat(nir_op);
116 bool isResultSigned(nir_op);
117
118 DataType getDType(nir_alu_instr *);
119 DataType getDType(nir_intrinsic_instr *);
120 DataType getDType(nir_intrinsic_instr *, bool isSigned);
121 DataType getDType(nir_op, uint8_t);
122
123 std::vector<DataType> getSTypes(nir_alu_instr *);
124 DataType getSType(nir_src &, bool isFloat, bool isSigned);
125
126 operation getOperation(nir_intrinsic_op);
127 operation getOperation(nir_op);
128 operation getOperation(nir_texop);
129 operation preOperationNeeded(nir_op);
130
131 int getSubOp(nir_intrinsic_op);
132 int getSubOp(nir_op);
133
134 CondCode getCondCode(nir_op);
135
136 bool assignSlots();
137 bool parseNIR();
138
139 bool visit(nir_alu_instr *);
140 bool visit(nir_block *);
141 bool visit(nir_cf_node *);
142 bool visit(nir_deref_instr *);
143 bool visit(nir_function *);
144 bool visit(nir_if *);
145 bool visit(nir_instr *);
146 bool visit(nir_intrinsic_instr *);
147 bool visit(nir_jump_instr *);
148 bool visit(nir_load_const_instr*);
149 bool visit(nir_loop *);
150 bool visit(nir_ssa_undef_instr *);
151 bool visit(nir_tex_instr *);
152
153 // tex stuff
154 Value* applyProjection(Value *src, Value *proj);
155 unsigned int getNIRArgCount(TexInstruction::Target&);
156
157 // image stuff
158 uint16_t handleDeref(nir_deref_instr *, Value * & indirect, const nir_variable * &);
159 CacheMode getCacheModeFromVar(const nir_variable *);
160
161 nir_shader *nir;
162
163 NirDefMap ssaDefs;
164 NirDefMap regDefs;
165 ImmediateMap immediates;
166 NirArrayLMemOffsets regToLmemOffset;
167 NirBlockMap blocks;
168 unsigned int curLoopDepth;
169
170 BasicBlock *exit;
171 Value *zero;
172 Instruction *immInsertPos;
173
174 int clipVertexOutput;
175
176 union {
177 struct {
178 Value *position;
179 } fp;
180 };
181 };
182
183 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
184 : ConverterCommon(prog, info),
185 nir(nir),
186 curLoopDepth(0),
187 clipVertexOutput(-1)
188 {
189 zero = mkImm((uint32_t)0);
190 }
191
192 BasicBlock *
193 Converter::convert(nir_block *block)
194 {
195 NirBlockMap::iterator it = blocks.find(block->index);
196 if (it != blocks.end())
197 return it->second;
198
199 BasicBlock *bb = new BasicBlock(func);
200 blocks[block->index] = bb;
201 return bb;
202 }
203
204 bool
205 Converter::isFloatType(nir_alu_type type)
206 {
207 return nir_alu_type_get_base_type(type) == nir_type_float;
208 }
209
210 bool
211 Converter::isSignedType(nir_alu_type type)
212 {
213 return nir_alu_type_get_base_type(type) == nir_type_int;
214 }
215
216 bool
217 Converter::isResultFloat(nir_op op)
218 {
219 const nir_op_info &info = nir_op_infos[op];
220 if (info.output_type != nir_type_invalid)
221 return isFloatType(info.output_type);
222
223 ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
224 assert(false);
225 return true;
226 }
227
228 bool
229 Converter::isResultSigned(nir_op op)
230 {
231 switch (op) {
232 // there is no umul and we get wrong results if we treat all muls as signed
233 case nir_op_imul:
234 case nir_op_inot:
235 return false;
236 default:
237 const nir_op_info &info = nir_op_infos[op];
238 if (info.output_type != nir_type_invalid)
239 return isSignedType(info.output_type);
240 ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
241 assert(false);
242 return true;
243 }
244 }
245
246 DataType
247 Converter::getDType(nir_alu_instr *insn)
248 {
249 if (insn->dest.dest.is_ssa)
250 return getDType(insn->op, insn->dest.dest.ssa.bit_size);
251 else
252 return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
253 }
254
255 DataType
256 Converter::getDType(nir_intrinsic_instr *insn)
257 {
258 bool isSigned;
259 switch (insn->intrinsic) {
260 case nir_intrinsic_shared_atomic_imax:
261 case nir_intrinsic_shared_atomic_imin:
262 case nir_intrinsic_ssbo_atomic_imax:
263 case nir_intrinsic_ssbo_atomic_imin:
264 isSigned = true;
265 break;
266 default:
267 isSigned = false;
268 break;
269 }
270
271 return getDType(insn, isSigned);
272 }
273
274 DataType
275 Converter::getDType(nir_intrinsic_instr *insn, bool isSigned)
276 {
277 if (insn->dest.is_ssa)
278 return typeOfSize(insn->dest.ssa.bit_size / 8, false, isSigned);
279 else
280 return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, isSigned);
281 }
282
283 DataType
284 Converter::getDType(nir_op op, uint8_t bitSize)
285 {
286 DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
287 if (ty == TYPE_NONE) {
288 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
289 assert(false);
290 }
291 return ty;
292 }
293
294 std::vector<DataType>
295 Converter::getSTypes(nir_alu_instr *insn)
296 {
297 const nir_op_info &info = nir_op_infos[insn->op];
298 std::vector<DataType> res(info.num_inputs);
299
300 for (uint8_t i = 0; i < info.num_inputs; ++i) {
301 if (info.input_types[i] != nir_type_invalid) {
302 res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
303 } else {
304 ERROR("getSType not implemented for %s idx %u\n", info.name, i);
305 assert(false);
306 res[i] = TYPE_NONE;
307 break;
308 }
309 }
310
311 return res;
312 }
313
314 DataType
315 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
316 {
317 uint8_t bitSize;
318 if (src.is_ssa)
319 bitSize = src.ssa->bit_size;
320 else
321 bitSize = src.reg.reg->bit_size;
322
323 DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
324 if (ty == TYPE_NONE) {
325 const char *str;
326 if (isFloat)
327 str = "float";
328 else if (isSigned)
329 str = "int";
330 else
331 str = "uint";
332 ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
333 assert(false);
334 }
335 return ty;
336 }
337
338 operation
339 Converter::getOperation(nir_op op)
340 {
341 switch (op) {
342 // basic ops with float and int variants
343 case nir_op_fabs:
344 case nir_op_iabs:
345 return OP_ABS;
346 case nir_op_fadd:
347 case nir_op_iadd:
348 return OP_ADD;
349 case nir_op_fand:
350 case nir_op_iand:
351 return OP_AND;
352 case nir_op_ifind_msb:
353 case nir_op_ufind_msb:
354 return OP_BFIND;
355 case nir_op_fceil:
356 return OP_CEIL;
357 case nir_op_fcos:
358 return OP_COS;
359 case nir_op_f2f32:
360 case nir_op_f2f64:
361 case nir_op_f2i32:
362 case nir_op_f2i64:
363 case nir_op_f2u32:
364 case nir_op_f2u64:
365 case nir_op_i2f32:
366 case nir_op_i2f64:
367 case nir_op_i2i32:
368 case nir_op_i2i64:
369 case nir_op_u2f32:
370 case nir_op_u2f64:
371 case nir_op_u2u32:
372 case nir_op_u2u64:
373 return OP_CVT;
374 case nir_op_fddx:
375 case nir_op_fddx_coarse:
376 case nir_op_fddx_fine:
377 return OP_DFDX;
378 case nir_op_fddy:
379 case nir_op_fddy_coarse:
380 case nir_op_fddy_fine:
381 return OP_DFDY;
382 case nir_op_fdiv:
383 case nir_op_idiv:
384 case nir_op_udiv:
385 return OP_DIV;
386 case nir_op_fexp2:
387 return OP_EX2;
388 case nir_op_ffloor:
389 return OP_FLOOR;
390 case nir_op_ffma:
391 return OP_FMA;
392 case nir_op_flog2:
393 return OP_LG2;
394 case nir_op_fmax:
395 case nir_op_imax:
396 case nir_op_umax:
397 return OP_MAX;
398 case nir_op_pack_64_2x32_split:
399 return OP_MERGE;
400 case nir_op_fmin:
401 case nir_op_imin:
402 case nir_op_umin:
403 return OP_MIN;
404 case nir_op_fmod:
405 case nir_op_imod:
406 case nir_op_umod:
407 case nir_op_frem:
408 case nir_op_irem:
409 return OP_MOD;
410 case nir_op_fmul:
411 case nir_op_imul:
412 case nir_op_imul_high:
413 case nir_op_umul_high:
414 return OP_MUL;
415 case nir_op_fneg:
416 case nir_op_ineg:
417 return OP_NEG;
418 case nir_op_fnot:
419 case nir_op_inot:
420 return OP_NOT;
421 case nir_op_for:
422 case nir_op_ior:
423 return OP_OR;
424 case nir_op_fpow:
425 return OP_POW;
426 case nir_op_frcp:
427 return OP_RCP;
428 case nir_op_frsq:
429 return OP_RSQ;
430 case nir_op_fsat:
431 return OP_SAT;
432 case nir_op_feq32:
433 case nir_op_ieq32:
434 case nir_op_fge32:
435 case nir_op_ige32:
436 case nir_op_uge32:
437 case nir_op_flt32:
438 case nir_op_ilt32:
439 case nir_op_ult32:
440 case nir_op_fne32:
441 case nir_op_ine32:
442 return OP_SET;
443 case nir_op_ishl:
444 return OP_SHL;
445 case nir_op_ishr:
446 case nir_op_ushr:
447 return OP_SHR;
448 case nir_op_fsin:
449 return OP_SIN;
450 case nir_op_fsqrt:
451 return OP_SQRT;
452 case nir_op_fsub:
453 case nir_op_isub:
454 return OP_SUB;
455 case nir_op_ftrunc:
456 return OP_TRUNC;
457 case nir_op_fxor:
458 case nir_op_ixor:
459 return OP_XOR;
460 default:
461 ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
462 assert(false);
463 return OP_NOP;
464 }
465 }
466
467 operation
468 Converter::getOperation(nir_texop op)
469 {
470 switch (op) {
471 case nir_texop_tex:
472 return OP_TEX;
473 case nir_texop_lod:
474 return OP_TXLQ;
475 case nir_texop_txb:
476 return OP_TXB;
477 case nir_texop_txd:
478 return OP_TXD;
479 case nir_texop_txf:
480 case nir_texop_txf_ms:
481 return OP_TXF;
482 case nir_texop_tg4:
483 return OP_TXG;
484 case nir_texop_txl:
485 return OP_TXL;
486 case nir_texop_query_levels:
487 case nir_texop_texture_samples:
488 case nir_texop_txs:
489 return OP_TXQ;
490 default:
491 ERROR("couldn't get operation for nir_texop %u\n", op);
492 assert(false);
493 return OP_NOP;
494 }
495 }
496
497 operation
498 Converter::getOperation(nir_intrinsic_op op)
499 {
500 switch (op) {
501 case nir_intrinsic_emit_vertex:
502 return OP_EMIT;
503 case nir_intrinsic_end_primitive:
504 return OP_RESTART;
505 case nir_intrinsic_image_deref_atomic_add:
506 case nir_intrinsic_image_deref_atomic_and:
507 case nir_intrinsic_image_deref_atomic_comp_swap:
508 case nir_intrinsic_image_deref_atomic_exchange:
509 case nir_intrinsic_image_deref_atomic_max:
510 case nir_intrinsic_image_deref_atomic_min:
511 case nir_intrinsic_image_deref_atomic_or:
512 case nir_intrinsic_image_deref_atomic_xor:
513 return OP_SUREDP;
514 case nir_intrinsic_image_deref_load:
515 return OP_SULDP;
516 case nir_intrinsic_image_deref_samples:
517 case nir_intrinsic_image_deref_size:
518 return OP_SUQ;
519 case nir_intrinsic_image_deref_store:
520 return OP_SUSTP;
521 default:
522 ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
523 assert(false);
524 return OP_NOP;
525 }
526 }
527
528 operation
529 Converter::preOperationNeeded(nir_op op)
530 {
531 switch (op) {
532 case nir_op_fcos:
533 case nir_op_fsin:
534 return OP_PRESIN;
535 default:
536 return OP_NOP;
537 }
538 }
539
540 int
541 Converter::getSubOp(nir_op op)
542 {
543 switch (op) {
544 case nir_op_imul_high:
545 case nir_op_umul_high:
546 return NV50_IR_SUBOP_MUL_HIGH;
547 default:
548 return 0;
549 }
550 }
551
552 int
553 Converter::getSubOp(nir_intrinsic_op op)
554 {
555 switch (op) {
556 case nir_intrinsic_image_deref_atomic_add:
557 case nir_intrinsic_shared_atomic_add:
558 case nir_intrinsic_ssbo_atomic_add:
559 return NV50_IR_SUBOP_ATOM_ADD;
560 case nir_intrinsic_image_deref_atomic_and:
561 case nir_intrinsic_shared_atomic_and:
562 case nir_intrinsic_ssbo_atomic_and:
563 return NV50_IR_SUBOP_ATOM_AND;
564 case nir_intrinsic_image_deref_atomic_comp_swap:
565 case nir_intrinsic_shared_atomic_comp_swap:
566 case nir_intrinsic_ssbo_atomic_comp_swap:
567 return NV50_IR_SUBOP_ATOM_CAS;
568 case nir_intrinsic_image_deref_atomic_exchange:
569 case nir_intrinsic_shared_atomic_exchange:
570 case nir_intrinsic_ssbo_atomic_exchange:
571 return NV50_IR_SUBOP_ATOM_EXCH;
572 case nir_intrinsic_image_deref_atomic_or:
573 case nir_intrinsic_shared_atomic_or:
574 case nir_intrinsic_ssbo_atomic_or:
575 return NV50_IR_SUBOP_ATOM_OR;
576 case nir_intrinsic_image_deref_atomic_max:
577 case nir_intrinsic_shared_atomic_imax:
578 case nir_intrinsic_shared_atomic_umax:
579 case nir_intrinsic_ssbo_atomic_imax:
580 case nir_intrinsic_ssbo_atomic_umax:
581 return NV50_IR_SUBOP_ATOM_MAX;
582 case nir_intrinsic_image_deref_atomic_min:
583 case nir_intrinsic_shared_atomic_imin:
584 case nir_intrinsic_shared_atomic_umin:
585 case nir_intrinsic_ssbo_atomic_imin:
586 case nir_intrinsic_ssbo_atomic_umin:
587 return NV50_IR_SUBOP_ATOM_MIN;
588 case nir_intrinsic_image_deref_atomic_xor:
589 case nir_intrinsic_shared_atomic_xor:
590 case nir_intrinsic_ssbo_atomic_xor:
591 return NV50_IR_SUBOP_ATOM_XOR;
592
593 case nir_intrinsic_group_memory_barrier:
594 case nir_intrinsic_memory_barrier:
595 case nir_intrinsic_memory_barrier_atomic_counter:
596 case nir_intrinsic_memory_barrier_buffer:
597 case nir_intrinsic_memory_barrier_image:
598 return NV50_IR_SUBOP_MEMBAR(M, GL);
599 case nir_intrinsic_memory_barrier_shared:
600 return NV50_IR_SUBOP_MEMBAR(M, CTA);
601
602 case nir_intrinsic_vote_all:
603 return NV50_IR_SUBOP_VOTE_ALL;
604 case nir_intrinsic_vote_any:
605 return NV50_IR_SUBOP_VOTE_ANY;
606 case nir_intrinsic_vote_ieq:
607 return NV50_IR_SUBOP_VOTE_UNI;
608 default:
609 return 0;
610 }
611 }
612
613 CondCode
614 Converter::getCondCode(nir_op op)
615 {
616 switch (op) {
617 case nir_op_feq32:
618 case nir_op_ieq32:
619 return CC_EQ;
620 case nir_op_fge32:
621 case nir_op_ige32:
622 case nir_op_uge32:
623 return CC_GE;
624 case nir_op_flt32:
625 case nir_op_ilt32:
626 case nir_op_ult32:
627 return CC_LT;
628 case nir_op_fne32:
629 return CC_NEU;
630 case nir_op_ine32:
631 return CC_NE;
632 default:
633 ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
634 assert(false);
635 return CC_FL;
636 }
637 }
638
639 Converter::LValues&
640 Converter::convert(nir_alu_dest *dest)
641 {
642 return convert(&dest->dest);
643 }
644
645 Converter::LValues&
646 Converter::convert(nir_dest *dest)
647 {
648 if (dest->is_ssa)
649 return convert(&dest->ssa);
650 if (dest->reg.indirect) {
651 ERROR("no support for indirects.");
652 assert(false);
653 }
654 return convert(dest->reg.reg);
655 }
656
657 Converter::LValues&
658 Converter::convert(nir_register *reg)
659 {
660 NirDefMap::iterator it = regDefs.find(reg->index);
661 if (it != regDefs.end())
662 return it->second;
663
664 LValues newDef(reg->num_components);
665 for (uint8_t i = 0; i < reg->num_components; i++)
666 newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
667 return regDefs[reg->index] = newDef;
668 }
669
670 Converter::LValues&
671 Converter::convert(nir_ssa_def *def)
672 {
673 NirDefMap::iterator it = ssaDefs.find(def->index);
674 if (it != ssaDefs.end())
675 return it->second;
676
677 LValues newDef(def->num_components);
678 for (uint8_t i = 0; i < def->num_components; i++)
679 newDef[i] = getSSA(std::max(4, def->bit_size / 8));
680 return ssaDefs[def->index] = newDef;
681 }
682
683 Value*
684 Converter::getSrc(nir_alu_src *src, uint8_t component)
685 {
686 if (src->abs || src->negate) {
687 ERROR("modifiers currently not supported on nir_alu_src\n");
688 assert(false);
689 }
690 return getSrc(&src->src, src->swizzle[component]);
691 }
692
693 Value*
694 Converter::getSrc(nir_register *reg, uint8_t idx)
695 {
696 NirDefMap::iterator it = regDefs.find(reg->index);
697 if (it == regDefs.end())
698 return convert(reg)[idx];
699 return it->second[idx];
700 }
701
702 Value*
703 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
704 {
705 if (src->is_ssa)
706 return getSrc(src->ssa, idx);
707
708 if (src->reg.indirect) {
709 if (indirect)
710 return getSrc(src->reg.indirect, idx);
711 ERROR("no support for indirects.");
712 assert(false);
713 return NULL;
714 }
715
716 return getSrc(src->reg.reg, idx);
717 }
718
719 Value*
720 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
721 {
722 ImmediateMap::iterator iit = immediates.find(src->index);
723 if (iit != immediates.end())
724 return convert((*iit).second, idx);
725
726 NirDefMap::iterator it = ssaDefs.find(src->index);
727 if (it == ssaDefs.end()) {
728 ERROR("SSA value %u not found\n", src->index);
729 assert(false);
730 return NULL;
731 }
732 return it->second[idx];
733 }
734
735 uint32_t
736 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
737 {
738 nir_const_value *offset = nir_src_as_const_value(*src);
739
740 if (offset) {
741 indirect = NULL;
742 return offset->u32[0];
743 }
744
745 indirect = getSrc(src, idx, true);
746 return 0;
747 }
748
749 uint32_t
750 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect)
751 {
752 int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
753 if (indirect)
754 indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
755 return idx;
756 }
757
758 static void
759 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
760 {
761 assert(name && index);
762
763 if (slot >= VERT_ATTRIB_MAX) {
764 ERROR("invalid varying slot %u\n", slot);
765 assert(false);
766 return;
767 }
768
769 if (slot >= VERT_ATTRIB_GENERIC0 &&
770 slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
771 *name = TGSI_SEMANTIC_GENERIC;
772 *index = slot - VERT_ATTRIB_GENERIC0;
773 return;
774 }
775
776 if (slot >= VERT_ATTRIB_TEX0 &&
777 slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
778 *name = TGSI_SEMANTIC_TEXCOORD;
779 *index = slot - VERT_ATTRIB_TEX0;
780 return;
781 }
782
783 switch (slot) {
784 case VERT_ATTRIB_COLOR0:
785 *name = TGSI_SEMANTIC_COLOR;
786 *index = 0;
787 break;
788 case VERT_ATTRIB_COLOR1:
789 *name = TGSI_SEMANTIC_COLOR;
790 *index = 1;
791 break;
792 case VERT_ATTRIB_EDGEFLAG:
793 *name = TGSI_SEMANTIC_EDGEFLAG;
794 *index = 0;
795 break;
796 case VERT_ATTRIB_FOG:
797 *name = TGSI_SEMANTIC_FOG;
798 *index = 0;
799 break;
800 case VERT_ATTRIB_NORMAL:
801 *name = TGSI_SEMANTIC_NORMAL;
802 *index = 0;
803 break;
804 case VERT_ATTRIB_POS:
805 *name = TGSI_SEMANTIC_POSITION;
806 *index = 0;
807 break;
808 case VERT_ATTRIB_POINT_SIZE:
809 *name = TGSI_SEMANTIC_PSIZE;
810 *index = 0;
811 break;
812 default:
813 ERROR("unknown vert attrib slot %u\n", slot);
814 assert(false);
815 break;
816 }
817 }
818
819 static void
820 varying_slot_to_tgsi_semantic(gl_varying_slot slot, unsigned *name, unsigned *index)
821 {
822 assert(name && index);
823
824 if (slot >= VARYING_SLOT_TESS_MAX) {
825 ERROR("invalid varying slot %u\n", slot);
826 assert(false);
827 return;
828 }
829
830 if (slot >= VARYING_SLOT_PATCH0) {
831 *name = TGSI_SEMANTIC_PATCH;
832 *index = slot - VARYING_SLOT_PATCH0;
833 return;
834 }
835
836 if (slot >= VARYING_SLOT_VAR0) {
837 *name = TGSI_SEMANTIC_GENERIC;
838 *index = slot - VARYING_SLOT_VAR0;
839 return;
840 }
841
842 if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
843 *name = TGSI_SEMANTIC_TEXCOORD;
844 *index = slot - VARYING_SLOT_TEX0;
845 return;
846 }
847
848 switch (slot) {
849 case VARYING_SLOT_BFC0:
850 *name = TGSI_SEMANTIC_BCOLOR;
851 *index = 0;
852 break;
853 case VARYING_SLOT_BFC1:
854 *name = TGSI_SEMANTIC_BCOLOR;
855 *index = 1;
856 break;
857 case VARYING_SLOT_CLIP_DIST0:
858 *name = TGSI_SEMANTIC_CLIPDIST;
859 *index = 0;
860 break;
861 case VARYING_SLOT_CLIP_DIST1:
862 *name = TGSI_SEMANTIC_CLIPDIST;
863 *index = 1;
864 break;
865 case VARYING_SLOT_CLIP_VERTEX:
866 *name = TGSI_SEMANTIC_CLIPVERTEX;
867 *index = 0;
868 break;
869 case VARYING_SLOT_COL0:
870 *name = TGSI_SEMANTIC_COLOR;
871 *index = 0;
872 break;
873 case VARYING_SLOT_COL1:
874 *name = TGSI_SEMANTIC_COLOR;
875 *index = 1;
876 break;
877 case VARYING_SLOT_EDGE:
878 *name = TGSI_SEMANTIC_EDGEFLAG;
879 *index = 0;
880 break;
881 case VARYING_SLOT_FACE:
882 *name = TGSI_SEMANTIC_FACE;
883 *index = 0;
884 break;
885 case VARYING_SLOT_FOGC:
886 *name = TGSI_SEMANTIC_FOG;
887 *index = 0;
888 break;
889 case VARYING_SLOT_LAYER:
890 *name = TGSI_SEMANTIC_LAYER;
891 *index = 0;
892 break;
893 case VARYING_SLOT_PNTC:
894 *name = TGSI_SEMANTIC_PCOORD;
895 *index = 0;
896 break;
897 case VARYING_SLOT_POS:
898 *name = TGSI_SEMANTIC_POSITION;
899 *index = 0;
900 break;
901 case VARYING_SLOT_PRIMITIVE_ID:
902 *name = TGSI_SEMANTIC_PRIMID;
903 *index = 0;
904 break;
905 case VARYING_SLOT_PSIZ:
906 *name = TGSI_SEMANTIC_PSIZE;
907 *index = 0;
908 break;
909 case VARYING_SLOT_TESS_LEVEL_INNER:
910 *name = TGSI_SEMANTIC_TESSINNER;
911 *index = 0;
912 break;
913 case VARYING_SLOT_TESS_LEVEL_OUTER:
914 *name = TGSI_SEMANTIC_TESSOUTER;
915 *index = 0;
916 break;
917 case VARYING_SLOT_VIEWPORT:
918 *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
919 *index = 0;
920 break;
921 default:
922 ERROR("unknown varying slot %u\n", slot);
923 assert(false);
924 break;
925 }
926 }
927
928 static void
929 frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
930 {
931 if (slot >= FRAG_RESULT_DATA0) {
932 *name = TGSI_SEMANTIC_COLOR;
933 *index = slot - FRAG_RESULT_COLOR - 2; // intentional
934 return;
935 }
936
937 switch (slot) {
938 case FRAG_RESULT_COLOR:
939 *name = TGSI_SEMANTIC_COLOR;
940 *index = 0;
941 break;
942 case FRAG_RESULT_DEPTH:
943 *name = TGSI_SEMANTIC_POSITION;
944 *index = 0;
945 break;
946 case FRAG_RESULT_SAMPLE_MASK:
947 *name = TGSI_SEMANTIC_SAMPLEMASK;
948 *index = 0;
949 break;
950 default:
951 ERROR("unknown frag result slot %u\n", slot);
952 assert(false);
953 break;
954 }
955 }
956
957 // copy of _mesa_sysval_to_semantic
958 static void
959 system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
960 {
961 *index = 0;
962 switch (val) {
963 // Vertex shader
964 case SYSTEM_VALUE_VERTEX_ID:
965 *name = TGSI_SEMANTIC_VERTEXID;
966 break;
967 case SYSTEM_VALUE_INSTANCE_ID:
968 *name = TGSI_SEMANTIC_INSTANCEID;
969 break;
970 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
971 *name = TGSI_SEMANTIC_VERTEXID_NOBASE;
972 break;
973 case SYSTEM_VALUE_BASE_VERTEX:
974 *name = TGSI_SEMANTIC_BASEVERTEX;
975 break;
976 case SYSTEM_VALUE_BASE_INSTANCE:
977 *name = TGSI_SEMANTIC_BASEINSTANCE;
978 break;
979 case SYSTEM_VALUE_DRAW_ID:
980 *name = TGSI_SEMANTIC_DRAWID;
981 break;
982
983 // Geometry shader
984 case SYSTEM_VALUE_INVOCATION_ID:
985 *name = TGSI_SEMANTIC_INVOCATIONID;
986 break;
987
988 // Fragment shader
989 case SYSTEM_VALUE_FRAG_COORD:
990 *name = TGSI_SEMANTIC_POSITION;
991 break;
992 case SYSTEM_VALUE_FRONT_FACE:
993 *name = TGSI_SEMANTIC_FACE;
994 break;
995 case SYSTEM_VALUE_SAMPLE_ID:
996 *name = TGSI_SEMANTIC_SAMPLEID;
997 break;
998 case SYSTEM_VALUE_SAMPLE_POS:
999 *name = TGSI_SEMANTIC_SAMPLEPOS;
1000 break;
1001 case SYSTEM_VALUE_SAMPLE_MASK_IN:
1002 *name = TGSI_SEMANTIC_SAMPLEMASK;
1003 break;
1004 case SYSTEM_VALUE_HELPER_INVOCATION:
1005 *name = TGSI_SEMANTIC_HELPER_INVOCATION;
1006 break;
1007
1008 // Tessellation shader
1009 case SYSTEM_VALUE_TESS_COORD:
1010 *name = TGSI_SEMANTIC_TESSCOORD;
1011 break;
1012 case SYSTEM_VALUE_VERTICES_IN:
1013 *name = TGSI_SEMANTIC_VERTICESIN;
1014 break;
1015 case SYSTEM_VALUE_PRIMITIVE_ID:
1016 *name = TGSI_SEMANTIC_PRIMID;
1017 break;
1018 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1019 *name = TGSI_SEMANTIC_TESSOUTER;
1020 break;
1021 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1022 *name = TGSI_SEMANTIC_TESSINNER;
1023 break;
1024
1025 // Compute shader
1026 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
1027 *name = TGSI_SEMANTIC_THREAD_ID;
1028 break;
1029 case SYSTEM_VALUE_WORK_GROUP_ID:
1030 *name = TGSI_SEMANTIC_BLOCK_ID;
1031 break;
1032 case SYSTEM_VALUE_NUM_WORK_GROUPS:
1033 *name = TGSI_SEMANTIC_GRID_SIZE;
1034 break;
1035 case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
1036 *name = TGSI_SEMANTIC_BLOCK_SIZE;
1037 break;
1038
1039 // ARB_shader_ballot
1040 case SYSTEM_VALUE_SUBGROUP_SIZE:
1041 *name = TGSI_SEMANTIC_SUBGROUP_SIZE;
1042 break;
1043 case SYSTEM_VALUE_SUBGROUP_INVOCATION:
1044 *name = TGSI_SEMANTIC_SUBGROUP_INVOCATION;
1045 break;
1046 case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
1047 *name = TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
1048 break;
1049 case SYSTEM_VALUE_SUBGROUP_GE_MASK:
1050 *name = TGSI_SEMANTIC_SUBGROUP_GE_MASK;
1051 break;
1052 case SYSTEM_VALUE_SUBGROUP_GT_MASK:
1053 *name = TGSI_SEMANTIC_SUBGROUP_GT_MASK;
1054 break;
1055 case SYSTEM_VALUE_SUBGROUP_LE_MASK:
1056 *name = TGSI_SEMANTIC_SUBGROUP_LE_MASK;
1057 break;
1058 case SYSTEM_VALUE_SUBGROUP_LT_MASK:
1059 *name = TGSI_SEMANTIC_SUBGROUP_LT_MASK;
1060 break;
1061
1062 default:
1063 ERROR("unknown system value %u\n", val);
1064 assert(false);
1065 break;
1066 }
1067 }
1068
1069 void
1070 Converter::setInterpolate(nv50_ir_varying *var,
1071 uint8_t mode,
1072 bool centroid,
1073 unsigned semantic)
1074 {
1075 switch (mode) {
1076 case INTERP_MODE_FLAT:
1077 var->flat = 1;
1078 break;
1079 case INTERP_MODE_NONE:
1080 if (semantic == TGSI_SEMANTIC_COLOR)
1081 var->sc = 1;
1082 else if (semantic == TGSI_SEMANTIC_POSITION)
1083 var->linear = 1;
1084 break;
1085 case INTERP_MODE_NOPERSPECTIVE:
1086 var->linear = 1;
1087 break;
1088 case INTERP_MODE_SMOOTH:
1089 break;
1090 }
1091 var->centroid = centroid;
1092 }
1093
1094 static uint16_t
1095 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
1096 bool input, const nir_variable *var)
1097 {
1098 if (!type->is_array())
1099 return type->count_attribute_slots(false);
1100
1101 uint16_t slots;
1102 switch (stage) {
1103 case Program::TYPE_GEOMETRY:
1104 slots = type->uniform_locations();
1105 if (input)
1106 slots /= info.gs.vertices_in;
1107 break;
1108 case Program::TYPE_TESSELLATION_CONTROL:
1109 case Program::TYPE_TESSELLATION_EVAL:
1110 // remove first dimension
1111 if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
1112 slots = type->uniform_locations();
1113 else
1114 slots = type->fields.array->uniform_locations();
1115 break;
1116 default:
1117 slots = type->count_attribute_slots(false);
1118 break;
1119 }
1120
1121 return slots;
1122 }
1123
1124 bool Converter::assignSlots() {
1125 unsigned name;
1126 unsigned index;
1127
1128 info->io.viewportId = -1;
1129 info->numInputs = 0;
1130
1131 // we have to fixup the uniform locations for arrays
1132 unsigned numImages = 0;
1133 nir_foreach_variable(var, &nir->uniforms) {
1134 const glsl_type *type = var->type;
1135 if (!type->without_array()->is_image())
1136 continue;
1137 var->data.driver_location = numImages;
1138 numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
1139 }
1140
1141 nir_foreach_variable(var, &nir->inputs) {
1142 const glsl_type *type = var->type;
1143 int slot = var->data.location;
1144 uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
1145 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1146 : type->component_slots();
1147 uint32_t frac = var->data.location_frac;
1148 uint32_t vary = var->data.driver_location;
1149
1150 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1151 if (comp > 2)
1152 slots *= 2;
1153 }
1154
1155 assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
1156
1157 switch(prog->getType()) {
1158 case Program::TYPE_FRAGMENT:
1159 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1160 for (uint16_t i = 0; i < slots; ++i) {
1161 setInterpolate(&info->in[vary + i], var->data.interpolation,
1162 var->data.centroid | var->data.sample, name);
1163 }
1164 break;
1165 case Program::TYPE_GEOMETRY:
1166 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1167 break;
1168 case Program::TYPE_TESSELLATION_CONTROL:
1169 case Program::TYPE_TESSELLATION_EVAL:
1170 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1171 if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
1172 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1173 break;
1174 case Program::TYPE_VERTEX:
1175 vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1176 switch (name) {
1177 case TGSI_SEMANTIC_EDGEFLAG:
1178 info->io.edgeFlagIn = vary;
1179 break;
1180 default:
1181 break;
1182 }
1183 break;
1184 default:
1185 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1186 return false;
1187 }
1188
1189 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1190 info->in[vary].id = vary;
1191 info->in[vary].patch = var->data.patch;
1192 info->in[vary].sn = name;
1193 info->in[vary].si = index + i;
1194 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1195 if (i & 0x1)
1196 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1197 else
1198 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1199 else
1200 info->in[vary].mask |= ((1 << comp) - 1) << frac;
1201 }
1202 info->numInputs = std::max<uint8_t>(info->numInputs, vary);
1203 }
1204
1205 info->numOutputs = 0;
1206 nir_foreach_variable(var, &nir->outputs) {
1207 const glsl_type *type = var->type;
1208 int slot = var->data.location;
1209 uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1210 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1211 : type->component_slots();
1212 uint32_t frac = var->data.location_frac;
1213 uint32_t vary = var->data.driver_location;
1214
1215 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1216 if (comp > 2)
1217 slots *= 2;
1218 }
1219
1220 assert(vary < PIPE_MAX_SHADER_OUTPUTS);
1221
1222 switch(prog->getType()) {
1223 case Program::TYPE_FRAGMENT:
1224 frag_result_to_tgsi_semantic((gl_frag_result)slot, &name, &index);
1225 switch (name) {
1226 case TGSI_SEMANTIC_COLOR:
1227 if (!var->data.fb_fetch_output)
1228 info->prop.fp.numColourResults++;
1229 info->prop.fp.separateFragData = true;
1230 // sometimes we get FRAG_RESULT_DATAX with data.index 0
1231 // sometimes we get FRAG_RESULT_DATA0 with data.index X
1232 index = index == 0 ? var->data.index : index;
1233 break;
1234 case TGSI_SEMANTIC_POSITION:
1235 info->io.fragDepth = vary;
1236 info->prop.fp.writesDepth = true;
1237 break;
1238 case TGSI_SEMANTIC_SAMPLEMASK:
1239 info->io.sampleMask = vary;
1240 break;
1241 default:
1242 break;
1243 }
1244 break;
1245 case Program::TYPE_GEOMETRY:
1246 case Program::TYPE_TESSELLATION_CONTROL:
1247 case Program::TYPE_TESSELLATION_EVAL:
1248 case Program::TYPE_VERTEX:
1249 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1250
1251 if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1252 name != TGSI_SEMANTIC_TESSOUTER)
1253 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1254
1255 switch (name) {
1256 case TGSI_SEMANTIC_CLIPDIST:
1257 info->io.genUserClip = -1;
1258 break;
1259 case TGSI_SEMANTIC_CLIPVERTEX:
1260 clipVertexOutput = vary;
1261 break;
1262 case TGSI_SEMANTIC_EDGEFLAG:
1263 info->io.edgeFlagOut = vary;
1264 break;
1265 case TGSI_SEMANTIC_POSITION:
1266 if (clipVertexOutput < 0)
1267 clipVertexOutput = vary;
1268 break;
1269 default:
1270 break;
1271 }
1272 break;
1273 default:
1274 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1275 return false;
1276 }
1277
1278 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1279 info->out[vary].id = vary;
1280 info->out[vary].patch = var->data.patch;
1281 info->out[vary].sn = name;
1282 info->out[vary].si = index + i;
1283 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1284 if (i & 0x1)
1285 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1286 else
1287 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1288 else
1289 info->out[vary].mask |= ((1 << comp) - 1) << frac;
1290
1291 if (nir->info.outputs_read & 1ll << slot)
1292 info->out[vary].oread = 1;
1293 }
1294 info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
1295 }
1296
1297 info->numSysVals = 0;
1298 for (uint8_t i = 0; i < 64; ++i) {
1299 if (!(nir->info.system_values_read & 1ll << i))
1300 continue;
1301
1302 system_val_to_tgsi_semantic(i, &name, &index);
1303 info->sv[info->numSysVals].sn = name;
1304 info->sv[info->numSysVals].si = index;
1305 info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
1306
1307 switch (i) {
1308 case SYSTEM_VALUE_INSTANCE_ID:
1309 info->io.instanceId = info->numSysVals;
1310 break;
1311 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1312 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1313 info->sv[info->numSysVals].patch = 1;
1314 break;
1315 case SYSTEM_VALUE_VERTEX_ID:
1316 info->io.vertexId = info->numSysVals;
1317 break;
1318 default:
1319 break;
1320 }
1321
1322 info->numSysVals += 1;
1323 }
1324
1325 if (info->io.genUserClip > 0) {
1326 info->io.clipDistances = info->io.genUserClip;
1327
1328 const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1329
1330 for (unsigned int n = 0; n < nOut; ++n) {
1331 unsigned int i = info->numOutputs++;
1332 info->out[i].id = i;
1333 info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1334 info->out[i].si = n;
1335 info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1336 }
1337 }
1338
1339 return info->assignSlots(info) == 0;
1340 }
1341
1342 uint32_t
1343 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1344 {
1345 DataType ty;
1346 int offset = nir_intrinsic_component(insn);
1347 bool input;
1348
1349 if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1350 ty = getDType(insn);
1351 else
1352 ty = getSType(insn->src[0], false, false);
1353
1354 switch (insn->intrinsic) {
1355 case nir_intrinsic_load_input:
1356 case nir_intrinsic_load_interpolated_input:
1357 case nir_intrinsic_load_per_vertex_input:
1358 input = true;
1359 break;
1360 case nir_intrinsic_load_output:
1361 case nir_intrinsic_load_per_vertex_output:
1362 case nir_intrinsic_store_output:
1363 case nir_intrinsic_store_per_vertex_output:
1364 input = false;
1365 break;
1366 default:
1367 ERROR("unknown intrinsic in getSlotAddress %s",
1368 nir_intrinsic_infos[insn->intrinsic].name);
1369 input = false;
1370 assert(false);
1371 break;
1372 }
1373
1374 if (typeSizeof(ty) == 8) {
1375 slot *= 2;
1376 slot += offset;
1377 if (slot >= 4) {
1378 idx += 1;
1379 slot -= 4;
1380 }
1381 } else {
1382 slot += offset;
1383 }
1384
1385 assert(slot < 4);
1386 assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
1387 assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
1388
1389 const nv50_ir_varying *vary = input ? info->in : info->out;
1390 return vary[idx].slot[slot] * 4;
1391 }
1392
1393 Instruction *
1394 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1395 uint32_t base, uint8_t c, Value *indirect0,
1396 Value *indirect1, bool patch)
1397 {
1398 unsigned int tySize = typeSizeof(ty);
1399
1400 if (tySize == 8 &&
1401 (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
1402 Value *lo = getSSA();
1403 Value *hi = getSSA();
1404
1405 Instruction *loi =
1406 mkLoad(TYPE_U32, lo,
1407 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1408 indirect0);
1409 loi->setIndirect(0, 1, indirect1);
1410 loi->perPatch = patch;
1411
1412 Instruction *hii =
1413 mkLoad(TYPE_U32, hi,
1414 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1415 indirect0);
1416 hii->setIndirect(0, 1, indirect1);
1417 hii->perPatch = patch;
1418
1419 return mkOp2(OP_MERGE, ty, def, lo, hi);
1420 } else {
1421 Instruction *ld =
1422 mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1423 ld->setIndirect(0, 1, indirect1);
1424 ld->perPatch = patch;
1425 return ld;
1426 }
1427 }
1428
1429 void
1430 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1431 DataType ty, Value *src, uint8_t idx, uint8_t c,
1432 Value *indirect0, Value *indirect1)
1433 {
1434 uint8_t size = typeSizeof(ty);
1435 uint32_t address = getSlotAddress(insn, idx, c);
1436
1437 if (size == 8 && indirect0) {
1438 Value *split[2];
1439 mkSplit(split, 4, src);
1440
1441 if (op == OP_EXPORT) {
1442 split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1443 split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1444 }
1445
1446 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1447 split[0])->perPatch = info->out[idx].patch;
1448 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1449 split[1])->perPatch = info->out[idx].patch;
1450 } else {
1451 if (op == OP_EXPORT)
1452 src = mkMov(getSSA(size), src, ty)->getDef(0);
1453 mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1454 src)->perPatch = info->out[idx].patch;
1455 }
1456 }
1457
1458 bool
1459 Converter::parseNIR()
1460 {
1461 info->bin.tlsSpace = 0;
1462 info->io.clipDistances = nir->info.clip_distance_array_size;
1463 info->io.cullDistances = nir->info.cull_distance_array_size;
1464
1465 switch(prog->getType()) {
1466 case Program::TYPE_COMPUTE:
1467 info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
1468 info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
1469 info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
1470 info->bin.smemSize = nir->info.cs.shared_size;
1471 break;
1472 case Program::TYPE_FRAGMENT:
1473 info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1474 info->prop.fp.persampleInvocation =
1475 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
1476 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1477 info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1478 info->prop.fp.readsSampleLocations =
1479 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1480 info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
1481 info->prop.fp.usesSampleMaskIn =
1482 !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
1483 break;
1484 case Program::TYPE_GEOMETRY:
1485 info->prop.gp.inputPrim = nir->info.gs.input_primitive;
1486 info->prop.gp.instanceCount = nir->info.gs.invocations;
1487 info->prop.gp.maxVertices = nir->info.gs.vertices_out;
1488 info->prop.gp.outputPrim = nir->info.gs.output_primitive;
1489 break;
1490 case Program::TYPE_TESSELLATION_CONTROL:
1491 case Program::TYPE_TESSELLATION_EVAL:
1492 if (nir->info.tess.primitive_mode == GL_ISOLINES)
1493 info->prop.tp.domain = GL_LINES;
1494 else
1495 info->prop.tp.domain = nir->info.tess.primitive_mode;
1496 info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1497 info->prop.tp.outputPrim =
1498 nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1499 info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1500 info->prop.tp.winding = !nir->info.tess.ccw;
1501 break;
1502 case Program::TYPE_VERTEX:
1503 info->prop.vp.usesDrawParameters =
1504 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
1505 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
1506 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
1507 break;
1508 default:
1509 break;
1510 }
1511
1512 return true;
1513 }
1514
1515 bool
1516 Converter::visit(nir_function *function)
1517 {
1518 // we only support emiting the main function for now
1519 assert(!strcmp(function->name, "main"));
1520 assert(function->impl);
1521
1522 // usually the blocks will set everything up, but main is special
1523 BasicBlock *entry = new BasicBlock(prog->main);
1524 exit = new BasicBlock(prog->main);
1525 blocks[nir_start_block(function->impl)->index] = entry;
1526 prog->main->setEntry(entry);
1527 prog->main->setExit(exit);
1528
1529 setPosition(entry, true);
1530
1531 if (info->io.genUserClip > 0) {
1532 for (int c = 0; c < 4; ++c)
1533 clipVtx[c] = getScratch();
1534 }
1535
1536 switch (prog->getType()) {
1537 case Program::TYPE_TESSELLATION_CONTROL:
1538 outBase = mkOp2v(
1539 OP_SUB, TYPE_U32, getSSA(),
1540 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1541 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1542 break;
1543 case Program::TYPE_FRAGMENT: {
1544 Symbol *sv = mkSysVal(SV_POSITION, 3);
1545 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1546 fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1547 break;
1548 }
1549 default:
1550 break;
1551 }
1552
1553 nir_foreach_register(reg, &function->impl->registers) {
1554 if (reg->num_array_elems) {
1555 // TODO: packed variables would be nice, but MemoryOpt fails
1556 // replace 4 with reg->num_components
1557 uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8);
1558 regToLmemOffset[reg->index] = info->bin.tlsSpace;
1559 info->bin.tlsSpace += size;
1560 }
1561 }
1562
1563 nir_index_ssa_defs(function->impl);
1564 foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1565 if (!visit(node))
1566 return false;
1567 }
1568
1569 bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1570 setPosition(exit, true);
1571
1572 if ((prog->getType() == Program::TYPE_VERTEX ||
1573 prog->getType() == Program::TYPE_TESSELLATION_EVAL)
1574 && info->io.genUserClip > 0)
1575 handleUserClipPlanes();
1576
1577 // TODO: for non main function this needs to be a OP_RETURN
1578 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1579 return true;
1580 }
1581
1582 bool
1583 Converter::visit(nir_cf_node *node)
1584 {
1585 switch (node->type) {
1586 case nir_cf_node_block:
1587 return visit(nir_cf_node_as_block(node));
1588 case nir_cf_node_if:
1589 return visit(nir_cf_node_as_if(node));
1590 case nir_cf_node_loop:
1591 return visit(nir_cf_node_as_loop(node));
1592 default:
1593 ERROR("unknown nir_cf_node type %u\n", node->type);
1594 return false;
1595 }
1596 }
1597
1598 bool
1599 Converter::visit(nir_block *block)
1600 {
1601 if (!block->predecessors->entries && block->instr_list.is_empty())
1602 return true;
1603
1604 BasicBlock *bb = convert(block);
1605
1606 setPosition(bb, true);
1607 nir_foreach_instr(insn, block) {
1608 if (!visit(insn))
1609 return false;
1610 }
1611 return true;
1612 }
1613
1614 bool
1615 Converter::visit(nir_if *nif)
1616 {
1617 DataType sType = getSType(nif->condition, false, false);
1618 Value *src = getSrc(&nif->condition, 0);
1619
1620 nir_block *lastThen = nir_if_last_then_block(nif);
1621 nir_block *lastElse = nir_if_last_else_block(nif);
1622
1623 assert(!lastThen->successors[1]);
1624 assert(!lastElse->successors[1]);
1625
1626 BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1627 BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1628
1629 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1630 bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1631
1632 // we only insert joinats, if both nodes end up at the end of the if again.
1633 // the reason for this to not happens are breaks/continues/ret/... which
1634 // have their own handling
1635 if (lastThen->successors[0] == lastElse->successors[0])
1636 bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
1637 CC_ALWAYS, NULL);
1638
1639 mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1640
1641 foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1642 if (!visit(node))
1643 return false;
1644 }
1645 setPosition(convert(lastThen), true);
1646 if (!bb->getExit() ||
1647 !bb->getExit()->asFlow() ||
1648 bb->getExit()->asFlow()->op == OP_JOIN) {
1649 BasicBlock *tailBB = convert(lastThen->successors[0]);
1650 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1651 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1652 }
1653
1654 foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1655 if (!visit(node))
1656 return false;
1657 }
1658 setPosition(convert(lastElse), true);
1659 if (!bb->getExit() ||
1660 !bb->getExit()->asFlow() ||
1661 bb->getExit()->asFlow()->op == OP_JOIN) {
1662 BasicBlock *tailBB = convert(lastElse->successors[0]);
1663 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1664 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1665 }
1666
1667 if (lastThen->successors[0] == lastElse->successors[0]) {
1668 setPosition(convert(lastThen->successors[0]), true);
1669 mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1670 }
1671
1672 return true;
1673 }
1674
1675 bool
1676 Converter::visit(nir_loop *loop)
1677 {
1678 curLoopDepth += 1;
1679 func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1680
1681 BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1682 BasicBlock *tailBB =
1683 convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1684 bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1685
1686 mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1687 setPosition(loopBB, false);
1688 mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1689
1690 foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1691 if (!visit(node))
1692 return false;
1693 }
1694 Instruction *insn = bb->getExit();
1695 if (bb->cfg.incidentCount() != 0) {
1696 if (!insn || !insn->asFlow()) {
1697 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1698 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1699 } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
1700 tailBB->cfg.incidentCount() == 0) {
1701 // RA doesn't like having blocks around with no incident edge,
1702 // so we create a fake one to make it happy
1703 bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1704 }
1705 }
1706
1707 curLoopDepth -= 1;
1708
1709 return true;
1710 }
1711
1712 bool
1713 Converter::visit(nir_instr *insn)
1714 {
1715 // we need an insertion point for on the fly generated immediate loads
1716 immInsertPos = bb->getExit();
1717 switch (insn->type) {
1718 case nir_instr_type_alu:
1719 return visit(nir_instr_as_alu(insn));
1720 case nir_instr_type_deref:
1721 return visit(nir_instr_as_deref(insn));
1722 case nir_instr_type_intrinsic:
1723 return visit(nir_instr_as_intrinsic(insn));
1724 case nir_instr_type_jump:
1725 return visit(nir_instr_as_jump(insn));
1726 case nir_instr_type_load_const:
1727 return visit(nir_instr_as_load_const(insn));
1728 case nir_instr_type_ssa_undef:
1729 return visit(nir_instr_as_ssa_undef(insn));
1730 case nir_instr_type_tex:
1731 return visit(nir_instr_as_tex(insn));
1732 default:
1733 ERROR("unknown nir_instr type %u\n", insn->type);
1734 return false;
1735 }
1736 return true;
1737 }
1738
1739 SVSemantic
1740 Converter::convert(nir_intrinsic_op intr)
1741 {
1742 switch (intr) {
1743 case nir_intrinsic_load_base_vertex:
1744 return SV_BASEVERTEX;
1745 case nir_intrinsic_load_base_instance:
1746 return SV_BASEINSTANCE;
1747 case nir_intrinsic_load_draw_id:
1748 return SV_DRAWID;
1749 case nir_intrinsic_load_front_face:
1750 return SV_FACE;
1751 case nir_intrinsic_load_helper_invocation:
1752 return SV_THREAD_KILL;
1753 case nir_intrinsic_load_instance_id:
1754 return SV_INSTANCE_ID;
1755 case nir_intrinsic_load_invocation_id:
1756 return SV_INVOCATION_ID;
1757 case nir_intrinsic_load_local_group_size:
1758 return SV_NTID;
1759 case nir_intrinsic_load_local_invocation_id:
1760 return SV_TID;
1761 case nir_intrinsic_load_num_work_groups:
1762 return SV_NCTAID;
1763 case nir_intrinsic_load_patch_vertices_in:
1764 return SV_VERTEX_COUNT;
1765 case nir_intrinsic_load_primitive_id:
1766 return SV_PRIMITIVE_ID;
1767 case nir_intrinsic_load_sample_id:
1768 return SV_SAMPLE_INDEX;
1769 case nir_intrinsic_load_sample_mask_in:
1770 return SV_SAMPLE_MASK;
1771 case nir_intrinsic_load_sample_pos:
1772 return SV_SAMPLE_POS;
1773 case nir_intrinsic_load_subgroup_eq_mask:
1774 return SV_LANEMASK_EQ;
1775 case nir_intrinsic_load_subgroup_ge_mask:
1776 return SV_LANEMASK_GE;
1777 case nir_intrinsic_load_subgroup_gt_mask:
1778 return SV_LANEMASK_GT;
1779 case nir_intrinsic_load_subgroup_le_mask:
1780 return SV_LANEMASK_LE;
1781 case nir_intrinsic_load_subgroup_lt_mask:
1782 return SV_LANEMASK_LT;
1783 case nir_intrinsic_load_subgroup_invocation:
1784 return SV_LANEID;
1785 case nir_intrinsic_load_tess_coord:
1786 return SV_TESS_COORD;
1787 case nir_intrinsic_load_tess_level_inner:
1788 return SV_TESS_INNER;
1789 case nir_intrinsic_load_tess_level_outer:
1790 return SV_TESS_OUTER;
1791 case nir_intrinsic_load_vertex_id:
1792 return SV_VERTEX_ID;
1793 case nir_intrinsic_load_work_group_id:
1794 return SV_CTAID;
1795 default:
1796 ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1797 nir_intrinsic_infos[intr].name);
1798 assert(false);
1799 return SV_LAST;
1800 }
1801 }
1802
1803 ImgFormat
1804 Converter::convertGLImgFormat(GLuint format)
1805 {
1806 #define FMT_CASE(a, b) \
1807 case GL_ ## a: return nv50_ir::FMT_ ## b
1808
1809 switch (format) {
1810 FMT_CASE(NONE, NONE);
1811
1812 FMT_CASE(RGBA32F, RGBA32F);
1813 FMT_CASE(RGBA16F, RGBA16F);
1814 FMT_CASE(RG32F, RG32F);
1815 FMT_CASE(RG16F, RG16F);
1816 FMT_CASE(R11F_G11F_B10F, R11G11B10F);
1817 FMT_CASE(R32F, R32F);
1818 FMT_CASE(R16F, R16F);
1819
1820 FMT_CASE(RGBA32UI, RGBA32UI);
1821 FMT_CASE(RGBA16UI, RGBA16UI);
1822 FMT_CASE(RGB10_A2UI, RGB10A2UI);
1823 FMT_CASE(RGBA8UI, RGBA8UI);
1824 FMT_CASE(RG32UI, RG32UI);
1825 FMT_CASE(RG16UI, RG16UI);
1826 FMT_CASE(RG8UI, RG8UI);
1827 FMT_CASE(R32UI, R32UI);
1828 FMT_CASE(R16UI, R16UI);
1829 FMT_CASE(R8UI, R8UI);
1830
1831 FMT_CASE(RGBA32I, RGBA32I);
1832 FMT_CASE(RGBA16I, RGBA16I);
1833 FMT_CASE(RGBA8I, RGBA8I);
1834 FMT_CASE(RG32I, RG32I);
1835 FMT_CASE(RG16I, RG16I);
1836 FMT_CASE(RG8I, RG8I);
1837 FMT_CASE(R32I, R32I);
1838 FMT_CASE(R16I, R16I);
1839 FMT_CASE(R8I, R8I);
1840
1841 FMT_CASE(RGBA16, RGBA16);
1842 FMT_CASE(RGB10_A2, RGB10A2);
1843 FMT_CASE(RGBA8, RGBA8);
1844 FMT_CASE(RG16, RG16);
1845 FMT_CASE(RG8, RG8);
1846 FMT_CASE(R16, R16);
1847 FMT_CASE(R8, R8);
1848
1849 FMT_CASE(RGBA16_SNORM, RGBA16_SNORM);
1850 FMT_CASE(RGBA8_SNORM, RGBA8_SNORM);
1851 FMT_CASE(RG16_SNORM, RG16_SNORM);
1852 FMT_CASE(RG8_SNORM, RG8_SNORM);
1853 FMT_CASE(R16_SNORM, R16_SNORM);
1854 FMT_CASE(R8_SNORM, R8_SNORM);
1855
1856 FMT_CASE(BGRA_INTEGER, BGRA8);
1857 default:
1858 ERROR("unknown format %x\n", format);
1859 assert(false);
1860 return nv50_ir::FMT_NONE;
1861 }
1862 #undef FMT_CASE
1863 }
1864
1865 bool
1866 Converter::visit(nir_intrinsic_instr *insn)
1867 {
1868 nir_intrinsic_op op = insn->intrinsic;
1869 const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op];
1870
1871 switch (op) {
1872 case nir_intrinsic_load_uniform: {
1873 LValues &newDefs = convert(&insn->dest);
1874 const DataType dType = getDType(insn);
1875 Value *indirect;
1876 uint32_t coffset = getIndirect(insn, 0, 0, indirect);
1877 for (uint8_t i = 0; i < insn->num_components; ++i) {
1878 loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
1879 }
1880 break;
1881 }
1882 case nir_intrinsic_store_output:
1883 case nir_intrinsic_store_per_vertex_output: {
1884 Value *indirect;
1885 DataType dType = getSType(insn->src[0], false, false);
1886 uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1887
1888 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1889 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1890 continue;
1891
1892 uint8_t offset = 0;
1893 Value *src = getSrc(&insn->src[0], i);
1894 switch (prog->getType()) {
1895 case Program::TYPE_FRAGMENT: {
1896 if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1897 // TGSI uses a different interface than NIR, TGSI stores that
1898 // value in the z component, NIR in X
1899 offset += 2;
1900 src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1901 }
1902 break;
1903 }
1904 case Program::TYPE_GEOMETRY:
1905 case Program::TYPE_VERTEX: {
1906 if (info->io.genUserClip > 0 && idx == clipVertexOutput) {
1907 mkMov(clipVtx[i], src);
1908 src = clipVtx[i];
1909 }
1910 break;
1911 }
1912 default:
1913 break;
1914 }
1915
1916 storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1917 }
1918 break;
1919 }
1920 case nir_intrinsic_load_input:
1921 case nir_intrinsic_load_interpolated_input:
1922 case nir_intrinsic_load_output: {
1923 LValues &newDefs = convert(&insn->dest);
1924
1925 // FBFetch
1926 if (prog->getType() == Program::TYPE_FRAGMENT &&
1927 op == nir_intrinsic_load_output) {
1928 std::vector<Value*> defs, srcs;
1929 uint8_t mask = 0;
1930
1931 srcs.push_back(getSSA());
1932 srcs.push_back(getSSA());
1933 Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1934 Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1935 mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1936 mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1937
1938 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1939 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1940
1941 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1942 defs.push_back(newDefs[i]);
1943 mask |= 1 << i;
1944 }
1945
1946 TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
1947 texi->tex.levelZero = 1;
1948 texi->tex.mask = mask;
1949 texi->tex.useOffsets = 0;
1950 texi->tex.r = 0xffff;
1951 texi->tex.s = 0xffff;
1952
1953 info->prop.fp.readsFramebuffer = true;
1954 break;
1955 }
1956
1957 const DataType dType = getDType(insn);
1958 Value *indirect;
1959 bool input = op != nir_intrinsic_load_output;
1960 operation nvirOp;
1961 uint32_t mode = 0;
1962
1963 uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
1964 nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
1965
1966 // see load_barycentric_* handling
1967 if (prog->getType() == Program::TYPE_FRAGMENT) {
1968 mode = translateInterpMode(&vary, nvirOp);
1969 if (op == nir_intrinsic_load_interpolated_input) {
1970 ImmediateValue immMode;
1971 if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
1972 mode |= immMode.reg.data.u32;
1973 }
1974 }
1975
1976 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1977 uint32_t address = getSlotAddress(insn, idx, i);
1978 Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
1979 if (prog->getType() == Program::TYPE_FRAGMENT) {
1980 int s = 1;
1981 if (typeSizeof(dType) == 8) {
1982 Value *lo = getSSA();
1983 Value *hi = getSSA();
1984 Instruction *interp;
1985
1986 interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
1987 if (nvirOp == OP_PINTERP)
1988 interp->setSrc(s++, fp.position);
1989 if (mode & NV50_IR_INTERP_OFFSET)
1990 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1991 interp->setInterpolate(mode);
1992 interp->setIndirect(0, 0, indirect);
1993
1994 Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
1995 interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
1996 if (nvirOp == OP_PINTERP)
1997 interp->setSrc(s++, fp.position);
1998 if (mode & NV50_IR_INTERP_OFFSET)
1999 interp->setSrc(s++, getSrc(&insn->src[0], 0));
2000 interp->setInterpolate(mode);
2001 interp->setIndirect(0, 0, indirect);
2002
2003 mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
2004 } else {
2005 Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
2006 if (nvirOp == OP_PINTERP)
2007 interp->setSrc(s++, fp.position);
2008 if (mode & NV50_IR_INTERP_OFFSET)
2009 interp->setSrc(s++, getSrc(&insn->src[0], 0));
2010 interp->setInterpolate(mode);
2011 interp->setIndirect(0, 0, indirect);
2012 }
2013 } else {
2014 mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
2015 }
2016 }
2017 break;
2018 }
2019 case nir_intrinsic_load_barycentric_at_offset:
2020 case nir_intrinsic_load_barycentric_at_sample:
2021 case nir_intrinsic_load_barycentric_centroid:
2022 case nir_intrinsic_load_barycentric_pixel:
2023 case nir_intrinsic_load_barycentric_sample: {
2024 LValues &newDefs = convert(&insn->dest);
2025 uint32_t mode;
2026
2027 if (op == nir_intrinsic_load_barycentric_centroid ||
2028 op == nir_intrinsic_load_barycentric_sample) {
2029 mode = NV50_IR_INTERP_CENTROID;
2030 } else if (op == nir_intrinsic_load_barycentric_at_offset) {
2031 Value *offs[2];
2032 for (uint8_t c = 0; c < 2; c++) {
2033 offs[c] = getScratch();
2034 mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
2035 mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
2036 mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
2037 mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
2038 }
2039 mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
2040
2041 mode = NV50_IR_INTERP_OFFSET;
2042 } else if (op == nir_intrinsic_load_barycentric_pixel) {
2043 mode = NV50_IR_INTERP_DEFAULT;
2044 } else if (op == nir_intrinsic_load_barycentric_at_sample) {
2045 info->prop.fp.readsSampleLocations = true;
2046 mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
2047 mode = NV50_IR_INTERP_OFFSET;
2048 } else {
2049 unreachable("all intrinsics already handled above");
2050 }
2051
2052 loadImm(newDefs[1], mode);
2053 break;
2054 }
2055 case nir_intrinsic_discard:
2056 mkOp(OP_DISCARD, TYPE_NONE, NULL);
2057 break;
2058 case nir_intrinsic_discard_if: {
2059 Value *pred = getSSA(1, FILE_PREDICATE);
2060 if (insn->num_components > 1) {
2061 ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
2062 assert(false);
2063 return false;
2064 }
2065 mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2066 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
2067 break;
2068 }
2069 case nir_intrinsic_load_base_vertex:
2070 case nir_intrinsic_load_base_instance:
2071 case nir_intrinsic_load_draw_id:
2072 case nir_intrinsic_load_front_face:
2073 case nir_intrinsic_load_helper_invocation:
2074 case nir_intrinsic_load_instance_id:
2075 case nir_intrinsic_load_invocation_id:
2076 case nir_intrinsic_load_local_group_size:
2077 case nir_intrinsic_load_local_invocation_id:
2078 case nir_intrinsic_load_num_work_groups:
2079 case nir_intrinsic_load_patch_vertices_in:
2080 case nir_intrinsic_load_primitive_id:
2081 case nir_intrinsic_load_sample_id:
2082 case nir_intrinsic_load_sample_mask_in:
2083 case nir_intrinsic_load_sample_pos:
2084 case nir_intrinsic_load_subgroup_eq_mask:
2085 case nir_intrinsic_load_subgroup_ge_mask:
2086 case nir_intrinsic_load_subgroup_gt_mask:
2087 case nir_intrinsic_load_subgroup_le_mask:
2088 case nir_intrinsic_load_subgroup_lt_mask:
2089 case nir_intrinsic_load_subgroup_invocation:
2090 case nir_intrinsic_load_tess_coord:
2091 case nir_intrinsic_load_tess_level_inner:
2092 case nir_intrinsic_load_tess_level_outer:
2093 case nir_intrinsic_load_vertex_id:
2094 case nir_intrinsic_load_work_group_id: {
2095 const DataType dType = getDType(insn);
2096 SVSemantic sv = convert(op);
2097 LValues &newDefs = convert(&insn->dest);
2098
2099 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2100 Value *def;
2101 if (typeSizeof(dType) == 8)
2102 def = getSSA();
2103 else
2104 def = newDefs[i];
2105
2106 if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
2107 loadImm(def, 0u);
2108 } else {
2109 Symbol *sym = mkSysVal(sv, i);
2110 Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
2111 if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
2112 rdsv->perPatch = 1;
2113 }
2114
2115 if (typeSizeof(dType) == 8)
2116 mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
2117 }
2118 break;
2119 }
2120 // constants
2121 case nir_intrinsic_load_subgroup_size: {
2122 LValues &newDefs = convert(&insn->dest);
2123 loadImm(newDefs[0], 32u);
2124 break;
2125 }
2126 case nir_intrinsic_vote_all:
2127 case nir_intrinsic_vote_any:
2128 case nir_intrinsic_vote_ieq: {
2129 LValues &newDefs = convert(&insn->dest);
2130 Value *pred = getScratch(1, FILE_PREDICATE);
2131 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2132 mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
2133 mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
2134 break;
2135 }
2136 case nir_intrinsic_ballot: {
2137 LValues &newDefs = convert(&insn->dest);
2138 Value *pred = getSSA(1, FILE_PREDICATE);
2139 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2140 mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;
2141 break;
2142 }
2143 case nir_intrinsic_read_first_invocation:
2144 case nir_intrinsic_read_invocation: {
2145 LValues &newDefs = convert(&insn->dest);
2146 const DataType dType = getDType(insn);
2147 Value *tmp = getScratch();
2148
2149 if (op == nir_intrinsic_read_first_invocation) {
2150 mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
2151 mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2152 mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2153 } else
2154 tmp = getSrc(&insn->src[1], 0);
2155
2156 for (uint8_t i = 0; i < insn->num_components; ++i) {
2157 mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
2158 ->subOp = NV50_IR_SUBOP_SHFL_IDX;
2159 }
2160 break;
2161 }
2162 case nir_intrinsic_load_per_vertex_input: {
2163 const DataType dType = getDType(insn);
2164 LValues &newDefs = convert(&insn->dest);
2165 Value *indirectVertex;
2166 Value *indirectOffset;
2167 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
2168 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2169
2170 Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
2171 mkImm(baseVertex), indirectVertex);
2172 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2173 uint32_t address = getSlotAddress(insn, idx, i);
2174 loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
2175 indirectOffset, vtxBase, info->in[idx].patch);
2176 }
2177 break;
2178 }
2179 case nir_intrinsic_load_per_vertex_output: {
2180 const DataType dType = getDType(insn);
2181 LValues &newDefs = convert(&insn->dest);
2182 Value *indirectVertex;
2183 Value *indirectOffset;
2184 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
2185 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2186 Value *vtxBase = NULL;
2187
2188 if (indirectVertex)
2189 vtxBase = indirectVertex;
2190 else
2191 vtxBase = loadImm(NULL, baseVertex);
2192
2193 vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase);
2194
2195 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2196 uint32_t address = getSlotAddress(insn, idx, i);
2197 loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
2198 indirectOffset, vtxBase, info->in[idx].patch);
2199 }
2200 break;
2201 }
2202 case nir_intrinsic_emit_vertex:
2203 if (info->io.genUserClip > 0)
2204 handleUserClipPlanes();
2205 // fallthrough
2206 case nir_intrinsic_end_primitive: {
2207 uint32_t idx = nir_intrinsic_stream_id(insn);
2208 mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
2209 break;
2210 }
2211 case nir_intrinsic_load_ubo: {
2212 const DataType dType = getDType(insn);
2213 LValues &newDefs = convert(&insn->dest);
2214 Value *indirectIndex;
2215 Value *indirectOffset;
2216 uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1;
2217 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2218
2219 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2220 loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
2221 indirectOffset, indirectIndex);
2222 }
2223 break;
2224 }
2225 case nir_intrinsic_get_buffer_size: {
2226 LValues &newDefs = convert(&insn->dest);
2227 const DataType dType = getDType(insn);
2228 Value *indirectBuffer;
2229 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2230
2231 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0);
2232 mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, indirectBuffer);
2233 break;
2234 }
2235 case nir_intrinsic_store_ssbo: {
2236 DataType sType = getSType(insn->src[0], false, false);
2237 Value *indirectBuffer;
2238 Value *indirectOffset;
2239 uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer);
2240 uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset);
2241
2242 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2243 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2244 continue;
2245 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
2246 offset + i * typeSizeof(sType));
2247 mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i))
2248 ->setIndirect(0, 1, indirectBuffer);
2249 }
2250 info->io.globalAccess |= 0x2;
2251 break;
2252 }
2253 case nir_intrinsic_load_ssbo: {
2254 const DataType dType = getDType(insn);
2255 LValues &newDefs = convert(&insn->dest);
2256 Value *indirectBuffer;
2257 Value *indirectOffset;
2258 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2259 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2260
2261 for (uint8_t i = 0u; i < insn->num_components; ++i)
2262 loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
2263 indirectOffset, indirectBuffer);
2264
2265 info->io.globalAccess |= 0x1;
2266 break;
2267 }
2268 case nir_intrinsic_shared_atomic_add:
2269 case nir_intrinsic_shared_atomic_and:
2270 case nir_intrinsic_shared_atomic_comp_swap:
2271 case nir_intrinsic_shared_atomic_exchange:
2272 case nir_intrinsic_shared_atomic_or:
2273 case nir_intrinsic_shared_atomic_imax:
2274 case nir_intrinsic_shared_atomic_imin:
2275 case nir_intrinsic_shared_atomic_umax:
2276 case nir_intrinsic_shared_atomic_umin:
2277 case nir_intrinsic_shared_atomic_xor: {
2278 const DataType dType = getDType(insn);
2279 LValues &newDefs = convert(&insn->dest);
2280 Value *indirectOffset;
2281 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2282 Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, dType, offset);
2283 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2284 if (op == nir_intrinsic_shared_atomic_comp_swap)
2285 atom->setSrc(2, getSrc(&insn->src[2], 0));
2286 atom->setIndirect(0, 0, indirectOffset);
2287 atom->subOp = getSubOp(op);
2288 break;
2289 }
2290 case nir_intrinsic_ssbo_atomic_add:
2291 case nir_intrinsic_ssbo_atomic_and:
2292 case nir_intrinsic_ssbo_atomic_comp_swap:
2293 case nir_intrinsic_ssbo_atomic_exchange:
2294 case nir_intrinsic_ssbo_atomic_or:
2295 case nir_intrinsic_ssbo_atomic_imax:
2296 case nir_intrinsic_ssbo_atomic_imin:
2297 case nir_intrinsic_ssbo_atomic_umax:
2298 case nir_intrinsic_ssbo_atomic_umin:
2299 case nir_intrinsic_ssbo_atomic_xor: {
2300 const DataType dType = getDType(insn);
2301 LValues &newDefs = convert(&insn->dest);
2302 Value *indirectBuffer;
2303 Value *indirectOffset;
2304 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2305 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2306
2307 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset);
2308 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym,
2309 getSrc(&insn->src[2], 0));
2310 if (op == nir_intrinsic_ssbo_atomic_comp_swap)
2311 atom->setSrc(2, getSrc(&insn->src[3], 0));
2312 atom->setIndirect(0, 0, indirectOffset);
2313 atom->setIndirect(0, 1, indirectBuffer);
2314 atom->subOp = getSubOp(op);
2315
2316 info->io.globalAccess |= 0x2;
2317 break;
2318 }
2319 case nir_intrinsic_image_deref_atomic_add:
2320 case nir_intrinsic_image_deref_atomic_and:
2321 case nir_intrinsic_image_deref_atomic_comp_swap:
2322 case nir_intrinsic_image_deref_atomic_exchange:
2323 case nir_intrinsic_image_deref_atomic_max:
2324 case nir_intrinsic_image_deref_atomic_min:
2325 case nir_intrinsic_image_deref_atomic_or:
2326 case nir_intrinsic_image_deref_atomic_xor:
2327 case nir_intrinsic_image_deref_load:
2328 case nir_intrinsic_image_deref_samples:
2329 case nir_intrinsic_image_deref_size:
2330 case nir_intrinsic_image_deref_store: {
2331 const nir_variable *tex;
2332 std::vector<Value*> srcs, defs;
2333 Value *indirect;
2334 DataType ty;
2335
2336 uint32_t mask = 0;
2337 nir_deref_instr *deref = nir_src_as_deref(insn->src[0]);
2338 const glsl_type *type = deref->type;
2339 TexInstruction::Target target =
2340 convert((glsl_sampler_dim)type->sampler_dimensionality,
2341 type->sampler_array, type->sampler_shadow);
2342 unsigned int argCount = getNIRArgCount(target);
2343 uint16_t location = handleDeref(deref, indirect, tex);
2344
2345 if (opInfo.has_dest) {
2346 LValues &newDefs = convert(&insn->dest);
2347 for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2348 defs.push_back(newDefs[i]);
2349 mask |= 1 << i;
2350 }
2351 }
2352
2353 switch (op) {
2354 case nir_intrinsic_image_deref_atomic_add:
2355 case nir_intrinsic_image_deref_atomic_and:
2356 case nir_intrinsic_image_deref_atomic_comp_swap:
2357 case nir_intrinsic_image_deref_atomic_exchange:
2358 case nir_intrinsic_image_deref_atomic_max:
2359 case nir_intrinsic_image_deref_atomic_min:
2360 case nir_intrinsic_image_deref_atomic_or:
2361 case nir_intrinsic_image_deref_atomic_xor:
2362 ty = getDType(insn);
2363 mask = 0x1;
2364 info->io.globalAccess |= 0x2;
2365 break;
2366 case nir_intrinsic_image_deref_load:
2367 ty = TYPE_U32;
2368 info->io.globalAccess |= 0x1;
2369 break;
2370 case nir_intrinsic_image_deref_store:
2371 ty = TYPE_U32;
2372 mask = 0xf;
2373 info->io.globalAccess |= 0x2;
2374 break;
2375 case nir_intrinsic_image_deref_samples:
2376 mask = 0x8;
2377 ty = TYPE_U32;
2378 break;
2379 case nir_intrinsic_image_deref_size:
2380 ty = TYPE_U32;
2381 break;
2382 default:
2383 unreachable("unhandled image opcode");
2384 break;
2385 }
2386
2387 // coords
2388 if (opInfo.num_srcs >= 2)
2389 for (unsigned int i = 0u; i < argCount; ++i)
2390 srcs.push_back(getSrc(&insn->src[1], i));
2391
2392 // the sampler is just another src added after coords
2393 if (opInfo.num_srcs >= 3 && target.isMS())
2394 srcs.push_back(getSrc(&insn->src[2], 0));
2395
2396 if (opInfo.num_srcs >= 4) {
2397 unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2398 for (uint8_t i = 0u; i < components; ++i)
2399 srcs.push_back(getSrc(&insn->src[3], i));
2400 }
2401
2402 if (opInfo.num_srcs >= 5)
2403 // 1 for aotmic swap
2404 for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2405 srcs.push_back(getSrc(&insn->src[4], i));
2406
2407 TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2408 texi->tex.bindless = false;
2409 texi->tex.format = &nv50_ir::TexInstruction::formatTable[convertGLImgFormat(tex->data.image.format)];
2410 texi->tex.mask = mask;
2411 texi->cache = getCacheModeFromVar(tex);
2412 texi->setType(ty);
2413 texi->subOp = getSubOp(op);
2414
2415 if (indirect)
2416 texi->setIndirectR(indirect);
2417
2418 break;
2419 }
2420 case nir_intrinsic_store_shared: {
2421 DataType sType = getSType(insn->src[0], false, false);
2422 Value *indirectOffset;
2423 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2424
2425 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2426 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2427 continue;
2428 Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, sType, offset + i * typeSizeof(sType));
2429 mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i));
2430 }
2431 break;
2432 }
2433 case nir_intrinsic_load_shared: {
2434 const DataType dType = getDType(insn);
2435 LValues &newDefs = convert(&insn->dest);
2436 Value *indirectOffset;
2437 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2438
2439 for (uint8_t i = 0u; i < insn->num_components; ++i)
2440 loadFrom(FILE_MEMORY_SHARED, 0, dType, newDefs[i], offset, i, indirectOffset);
2441
2442 break;
2443 }
2444 case nir_intrinsic_barrier: {
2445 // TODO: add flag to shader_info
2446 info->numBarriers = 1;
2447 Instruction *bar = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
2448 bar->fixed = 1;
2449 bar->subOp = NV50_IR_SUBOP_BAR_SYNC;
2450 break;
2451 }
2452 case nir_intrinsic_group_memory_barrier:
2453 case nir_intrinsic_memory_barrier:
2454 case nir_intrinsic_memory_barrier_atomic_counter:
2455 case nir_intrinsic_memory_barrier_buffer:
2456 case nir_intrinsic_memory_barrier_image:
2457 case nir_intrinsic_memory_barrier_shared: {
2458 Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
2459 bar->fixed = 1;
2460 bar->subOp = getSubOp(op);
2461 break;
2462 }
2463 case nir_intrinsic_shader_clock: {
2464 const DataType dType = getDType(insn);
2465 LValues &newDefs = convert(&insn->dest);
2466
2467 loadImm(newDefs[0], 0u);
2468 mkOp1(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
2469 break;
2470 }
2471 default:
2472 ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
2473 return false;
2474 }
2475
2476 return true;
2477 }
2478
2479 bool
2480 Converter::visit(nir_jump_instr *insn)
2481 {
2482 switch (insn->type) {
2483 case nir_jump_return:
2484 // TODO: this only works in the main function
2485 mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
2486 bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
2487 break;
2488 case nir_jump_break:
2489 case nir_jump_continue: {
2490 bool isBreak = insn->type == nir_jump_break;
2491 nir_block *block = insn->instr.block;
2492 assert(!block->successors[1]);
2493 BasicBlock *target = convert(block->successors[0]);
2494 mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
2495 bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
2496 break;
2497 }
2498 default:
2499 ERROR("unknown nir_jump_type %u\n", insn->type);
2500 return false;
2501 }
2502
2503 return true;
2504 }
2505
2506 Value*
2507 Converter::convert(nir_load_const_instr *insn, uint8_t idx)
2508 {
2509 Value *val;
2510
2511 if (immInsertPos)
2512 setPosition(immInsertPos, true);
2513 else
2514 setPosition(bb, false);
2515
2516 switch (insn->def.bit_size) {
2517 case 64:
2518 val = loadImm(getSSA(8), insn->value.u64[idx]);
2519 break;
2520 case 32:
2521 val = loadImm(getSSA(4), insn->value.u32[idx]);
2522 break;
2523 case 16:
2524 val = loadImm(getSSA(2), insn->value.u16[idx]);
2525 break;
2526 case 8:
2527 val = loadImm(getSSA(1), insn->value.u8[idx]);
2528 break;
2529 default:
2530 unreachable("unhandled bit size!\n");
2531 }
2532 setPosition(bb, true);
2533 return val;
2534 }
2535
2536 bool
2537 Converter::visit(nir_load_const_instr *insn)
2538 {
2539 assert(insn->def.bit_size <= 64);
2540 immediates[insn->def.index] = insn;
2541 return true;
2542 }
2543
2544 #define DEFAULT_CHECKS \
2545 if (insn->dest.dest.ssa.num_components > 1) { \
2546 ERROR("nir_alu_instr only supported with 1 component!\n"); \
2547 return false; \
2548 } \
2549 if (insn->dest.write_mask != 1) { \
2550 ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2551 return false; \
2552 }
2553 bool
2554 Converter::visit(nir_alu_instr *insn)
2555 {
2556 const nir_op op = insn->op;
2557 const nir_op_info &info = nir_op_infos[op];
2558 DataType dType = getDType(insn);
2559 const std::vector<DataType> sTypes = getSTypes(insn);
2560
2561 Instruction *oldPos = this->bb->getExit();
2562
2563 switch (op) {
2564 case nir_op_fabs:
2565 case nir_op_iabs:
2566 case nir_op_fadd:
2567 case nir_op_iadd:
2568 case nir_op_fand:
2569 case nir_op_iand:
2570 case nir_op_fceil:
2571 case nir_op_fcos:
2572 case nir_op_fddx:
2573 case nir_op_fddx_coarse:
2574 case nir_op_fddx_fine:
2575 case nir_op_fddy:
2576 case nir_op_fddy_coarse:
2577 case nir_op_fddy_fine:
2578 case nir_op_fdiv:
2579 case nir_op_idiv:
2580 case nir_op_udiv:
2581 case nir_op_fexp2:
2582 case nir_op_ffloor:
2583 case nir_op_ffma:
2584 case nir_op_flog2:
2585 case nir_op_fmax:
2586 case nir_op_imax:
2587 case nir_op_umax:
2588 case nir_op_fmin:
2589 case nir_op_imin:
2590 case nir_op_umin:
2591 case nir_op_fmod:
2592 case nir_op_imod:
2593 case nir_op_umod:
2594 case nir_op_fmul:
2595 case nir_op_imul:
2596 case nir_op_imul_high:
2597 case nir_op_umul_high:
2598 case nir_op_fneg:
2599 case nir_op_ineg:
2600 case nir_op_fnot:
2601 case nir_op_inot:
2602 case nir_op_for:
2603 case nir_op_ior:
2604 case nir_op_pack_64_2x32_split:
2605 case nir_op_fpow:
2606 case nir_op_frcp:
2607 case nir_op_frem:
2608 case nir_op_irem:
2609 case nir_op_frsq:
2610 case nir_op_fsat:
2611 case nir_op_ishr:
2612 case nir_op_ushr:
2613 case nir_op_fsin:
2614 case nir_op_fsqrt:
2615 case nir_op_fsub:
2616 case nir_op_isub:
2617 case nir_op_ftrunc:
2618 case nir_op_ishl:
2619 case nir_op_fxor:
2620 case nir_op_ixor: {
2621 DEFAULT_CHECKS;
2622 LValues &newDefs = convert(&insn->dest);
2623 operation preOp = preOperationNeeded(op);
2624 if (preOp != OP_NOP) {
2625 assert(info.num_inputs < 2);
2626 Value *tmp = getSSA(typeSizeof(dType));
2627 Instruction *i0 = mkOp(preOp, dType, tmp);
2628 Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2629 if (info.num_inputs) {
2630 i0->setSrc(0, getSrc(&insn->src[0]));
2631 i1->setSrc(0, tmp);
2632 }
2633 i1->subOp = getSubOp(op);
2634 } else {
2635 Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2636 for (unsigned s = 0u; s < info.num_inputs; ++s) {
2637 i->setSrc(s, getSrc(&insn->src[s]));
2638 }
2639 i->subOp = getSubOp(op);
2640 }
2641 break;
2642 }
2643 case nir_op_ifind_msb:
2644 case nir_op_ufind_msb: {
2645 DEFAULT_CHECKS;
2646 LValues &newDefs = convert(&insn->dest);
2647 dType = sTypes[0];
2648 mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2649 break;
2650 }
2651 case nir_op_fround_even: {
2652 DEFAULT_CHECKS;
2653 LValues &newDefs = convert(&insn->dest);
2654 mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2655 break;
2656 }
2657 // convert instructions
2658 case nir_op_f2f32:
2659 case nir_op_f2i32:
2660 case nir_op_f2u32:
2661 case nir_op_i2f32:
2662 case nir_op_i2i32:
2663 case nir_op_u2f32:
2664 case nir_op_u2u32:
2665 case nir_op_f2f64:
2666 case nir_op_f2i64:
2667 case nir_op_f2u64:
2668 case nir_op_i2f64:
2669 case nir_op_i2i64:
2670 case nir_op_u2f64:
2671 case nir_op_u2u64: {
2672 DEFAULT_CHECKS;
2673 LValues &newDefs = convert(&insn->dest);
2674 Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2675 if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
2676 i->rnd = ROUND_Z;
2677 i->sType = sTypes[0];
2678 break;
2679 }
2680 // compare instructions
2681 case nir_op_feq32:
2682 case nir_op_ieq32:
2683 case nir_op_fge32:
2684 case nir_op_ige32:
2685 case nir_op_uge32:
2686 case nir_op_flt32:
2687 case nir_op_ilt32:
2688 case nir_op_ult32:
2689 case nir_op_fne32:
2690 case nir_op_ine32: {
2691 DEFAULT_CHECKS;
2692 LValues &newDefs = convert(&insn->dest);
2693 Instruction *i = mkCmp(getOperation(op),
2694 getCondCode(op),
2695 dType,
2696 newDefs[0],
2697 dType,
2698 getSrc(&insn->src[0]),
2699 getSrc(&insn->src[1]));
2700 if (info.num_inputs == 3)
2701 i->setSrc(2, getSrc(&insn->src[2]));
2702 i->sType = sTypes[0];
2703 break;
2704 }
2705 // those are weird ALU ops and need special handling, because
2706 // 1. they are always componend based
2707 // 2. they basically just merge multiple values into one data type
2708 case nir_op_imov:
2709 case nir_op_fmov:
2710 if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) {
2711 nir_reg_dest& reg = insn->dest.dest.reg;
2712 uint32_t goffset = regToLmemOffset[reg.reg->index];
2713 uint8_t comps = reg.reg->num_components;
2714 uint8_t size = reg.reg->bit_size / 8;
2715 uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2716 uint32_t aoffset = csize * reg.base_offset;
2717 Value *indirect = NULL;
2718
2719 if (reg.indirect)
2720 indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS),
2721 getSrc(reg.indirect, 0), mkImm(csize));
2722
2723 for (uint8_t i = 0u; i < comps; ++i) {
2724 if (!((1u << i) & insn->dest.write_mask))
2725 continue;
2726
2727 Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + aoffset + i * size);
2728 mkStore(OP_STORE, dType, sym, indirect, getSrc(&insn->src[0], i));
2729 }
2730 break;
2731 } else if (!insn->src[0].src.is_ssa && insn->src[0].src.reg.reg->num_array_elems) {
2732 LValues &newDefs = convert(&insn->dest);
2733 nir_reg_src& reg = insn->src[0].src.reg;
2734 uint32_t goffset = regToLmemOffset[reg.reg->index];
2735 // uint8_t comps = reg.reg->num_components;
2736 uint8_t size = reg.reg->bit_size / 8;
2737 uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2738 uint32_t aoffset = csize * reg.base_offset;
2739 Value *indirect = NULL;
2740
2741 if (reg.indirect)
2742 indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), getSrc(reg.indirect, 0), mkImm(csize));
2743
2744 for (uint8_t i = 0u; i < newDefs.size(); ++i)
2745 loadFrom(FILE_MEMORY_LOCAL, 0, dType, newDefs[i], goffset + aoffset, i, indirect);
2746
2747 break;
2748 } else {
2749 LValues &newDefs = convert(&insn->dest);
2750 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2751 mkMov(newDefs[c], getSrc(&insn->src[0], c), dType);
2752 }
2753 }
2754 break;
2755 case nir_op_vec2:
2756 case nir_op_vec3:
2757 case nir_op_vec4: {
2758 LValues &newDefs = convert(&insn->dest);
2759 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2760 mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2761 }
2762 break;
2763 }
2764 // (un)pack
2765 case nir_op_pack_64_2x32: {
2766 LValues &newDefs = convert(&insn->dest);
2767 Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2768 merge->setSrc(0, getSrc(&insn->src[0], 0));
2769 merge->setSrc(1, getSrc(&insn->src[0], 1));
2770 break;
2771 }
2772 case nir_op_pack_half_2x16_split: {
2773 LValues &newDefs = convert(&insn->dest);
2774 Value *tmpH = getSSA();
2775 Value *tmpL = getSSA();
2776
2777 mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2778 mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2779 mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2780 break;
2781 }
2782 case nir_op_unpack_half_2x16_split_x:
2783 case nir_op_unpack_half_2x16_split_y: {
2784 LValues &newDefs = convert(&insn->dest);
2785 Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2786 if (op == nir_op_unpack_half_2x16_split_y)
2787 cvt->subOp = 1;
2788 break;
2789 }
2790 case nir_op_unpack_64_2x32: {
2791 LValues &newDefs = convert(&insn->dest);
2792 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2793 break;
2794 }
2795 case nir_op_unpack_64_2x32_split_x: {
2796 LValues &newDefs = convert(&insn->dest);
2797 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
2798 break;
2799 }
2800 case nir_op_unpack_64_2x32_split_y: {
2801 LValues &newDefs = convert(&insn->dest);
2802 mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
2803 break;
2804 }
2805 // special instructions
2806 case nir_op_fsign:
2807 case nir_op_isign: {
2808 DEFAULT_CHECKS;
2809 DataType iType;
2810 if (::isFloatType(dType))
2811 iType = TYPE_F32;
2812 else
2813 iType = TYPE_S32;
2814
2815 LValues &newDefs = convert(&insn->dest);
2816 LValue *val0 = getScratch();
2817 LValue *val1 = getScratch();
2818 mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
2819 mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
2820
2821 if (dType == TYPE_F64) {
2822 mkOp2(OP_SUB, iType, val0, val0, val1);
2823 mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
2824 } else if (dType == TYPE_S64 || dType == TYPE_U64) {
2825 mkOp2(OP_SUB, iType, val0, val1, val0);
2826 mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
2827 mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
2828 } else if (::isFloatType(dType))
2829 mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
2830 else
2831 mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
2832 break;
2833 }
2834 case nir_op_fcsel:
2835 case nir_op_b32csel: {
2836 DEFAULT_CHECKS;
2837 LValues &newDefs = convert(&insn->dest);
2838 mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
2839 break;
2840 }
2841 case nir_op_ibitfield_extract:
2842 case nir_op_ubitfield_extract: {
2843 DEFAULT_CHECKS;
2844 Value *tmp = getSSA();
2845 LValues &newDefs = convert(&insn->dest);
2846 mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2847 mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
2848 break;
2849 }
2850 case nir_op_bfm: {
2851 DEFAULT_CHECKS;
2852 LValues &newDefs = convert(&insn->dest);
2853 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2854 break;
2855 }
2856 case nir_op_bitfield_insert: {
2857 DEFAULT_CHECKS;
2858 LValues &newDefs = convert(&insn->dest);
2859 LValue *temp = getSSA();
2860 mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
2861 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
2862 break;
2863 }
2864 case nir_op_bit_count: {
2865 DEFAULT_CHECKS;
2866 LValues &newDefs = convert(&insn->dest);
2867 mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
2868 break;
2869 }
2870 case nir_op_bitfield_reverse: {
2871 DEFAULT_CHECKS;
2872 LValues &newDefs = convert(&insn->dest);
2873 mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2874 break;
2875 }
2876 case nir_op_find_lsb: {
2877 DEFAULT_CHECKS;
2878 LValues &newDefs = convert(&insn->dest);
2879 Value *tmp = getSSA();
2880 mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2881 mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2882 break;
2883 }
2884 // boolean conversions
2885 case nir_op_b2f32: {
2886 DEFAULT_CHECKS;
2887 LValues &newDefs = convert(&insn->dest);
2888 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
2889 break;
2890 }
2891 case nir_op_b2f64: {
2892 DEFAULT_CHECKS;
2893 LValues &newDefs = convert(&insn->dest);
2894 Value *tmp = getSSA(4);
2895 mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
2896 mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
2897 break;
2898 }
2899 case nir_op_f2b32:
2900 case nir_op_i2b32: {
2901 DEFAULT_CHECKS;
2902 LValues &newDefs = convert(&insn->dest);
2903 Value *src1;
2904 if (typeSizeof(sTypes[0]) == 8) {
2905 src1 = loadImm(getSSA(8), 0.0);
2906 } else {
2907 src1 = zero;
2908 }
2909 CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
2910 mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
2911 break;
2912 }
2913 case nir_op_b2i32: {
2914 DEFAULT_CHECKS;
2915 LValues &newDefs = convert(&insn->dest);
2916 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
2917 break;
2918 }
2919 case nir_op_b2i64: {
2920 DEFAULT_CHECKS;
2921 LValues &newDefs = convert(&insn->dest);
2922 LValue *def = getScratch();
2923 mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
2924 mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
2925 break;
2926 }
2927 default:
2928 ERROR("unknown nir_op %s\n", info.name);
2929 return false;
2930 }
2931
2932 if (!oldPos) {
2933 oldPos = this->bb->getEntry();
2934 oldPos->precise = insn->exact;
2935 }
2936
2937 if (unlikely(!oldPos))
2938 return true;
2939
2940 while (oldPos->next) {
2941 oldPos = oldPos->next;
2942 oldPos->precise = insn->exact;
2943 }
2944 oldPos->saturate = insn->dest.saturate;
2945
2946 return true;
2947 }
2948 #undef DEFAULT_CHECKS
2949
2950 bool
2951 Converter::visit(nir_ssa_undef_instr *insn)
2952 {
2953 LValues &newDefs = convert(&insn->def);
2954 for (uint8_t i = 0u; i < insn->def.num_components; ++i) {
2955 mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
2956 }
2957 return true;
2958 }
2959
2960 #define CASE_SAMPLER(ty) \
2961 case GLSL_SAMPLER_DIM_ ## ty : \
2962 if (isArray && !isShadow) \
2963 return TEX_TARGET_ ## ty ## _ARRAY; \
2964 else if (!isArray && isShadow) \
2965 return TEX_TARGET_## ty ## _SHADOW; \
2966 else if (isArray && isShadow) \
2967 return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
2968 else \
2969 return TEX_TARGET_ ## ty
2970
2971 TexTarget
2972 Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
2973 {
2974 switch (dim) {
2975 CASE_SAMPLER(1D);
2976 CASE_SAMPLER(2D);
2977 CASE_SAMPLER(CUBE);
2978 case GLSL_SAMPLER_DIM_3D:
2979 return TEX_TARGET_3D;
2980 case GLSL_SAMPLER_DIM_MS:
2981 if (isArray)
2982 return TEX_TARGET_2D_MS_ARRAY;
2983 return TEX_TARGET_2D_MS;
2984 case GLSL_SAMPLER_DIM_RECT:
2985 if (isShadow)
2986 return TEX_TARGET_RECT_SHADOW;
2987 return TEX_TARGET_RECT;
2988 case GLSL_SAMPLER_DIM_BUF:
2989 return TEX_TARGET_BUFFER;
2990 case GLSL_SAMPLER_DIM_EXTERNAL:
2991 return TEX_TARGET_2D;
2992 default:
2993 ERROR("unknown glsl_sampler_dim %u\n", dim);
2994 assert(false);
2995 return TEX_TARGET_COUNT;
2996 }
2997 }
2998 #undef CASE_SAMPLER
2999
3000 Value*
3001 Converter::applyProjection(Value *src, Value *proj)
3002 {
3003 if (!proj)
3004 return src;
3005 return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj);
3006 }
3007
3008 unsigned int
3009 Converter::getNIRArgCount(TexInstruction::Target& target)
3010 {
3011 unsigned int result = target.getArgCount();
3012 if (target.isCube() && target.isArray())
3013 result--;
3014 if (target.isMS())
3015 result--;
3016 return result;
3017 }
3018
3019 uint16_t
3020 Converter::handleDeref(nir_deref_instr *deref, Value * &indirect, const nir_variable * &tex)
3021 {
3022 typedef std::pair<uint32_t,Value*> DerefPair;
3023 std::list<DerefPair> derefs;
3024
3025 uint16_t result = 0;
3026 while (deref->deref_type != nir_deref_type_var) {
3027 switch (deref->deref_type) {
3028 case nir_deref_type_array: {
3029 Value *indirect;
3030 uint8_t size = type_size(deref->type);
3031 result += size * getIndirect(&deref->arr.index, 0, indirect);
3032
3033 if (indirect) {
3034 derefs.push_front(std::make_pair(size, indirect));
3035 }
3036
3037 break;
3038 }
3039 case nir_deref_type_struct: {
3040 result += nir_deref_instr_parent(deref)->type->struct_location_offset(deref->strct.index);
3041 break;
3042 }
3043 case nir_deref_type_var:
3044 default:
3045 unreachable("nir_deref_type_var reached in handleDeref!");
3046 break;
3047 }
3048 deref = nir_deref_instr_parent(deref);
3049 }
3050
3051 indirect = NULL;
3052 for (std::list<DerefPair>::const_iterator it = derefs.begin(); it != derefs.end(); ++it) {
3053 Value *offset = mkOp2v(OP_MUL, TYPE_U32, getSSA(), loadImm(getSSA(), it->first), it->second);
3054 if (indirect)
3055 indirect = mkOp2v(OP_ADD, TYPE_U32, getSSA(), indirect, offset);
3056 else
3057 indirect = offset;
3058 }
3059
3060 tex = nir_deref_instr_get_variable(deref);
3061 assert(tex);
3062
3063 return result + tex->data.driver_location;
3064 }
3065
3066 CacheMode
3067 Converter::getCacheModeFromVar(const nir_variable *var)
3068 {
3069 if (var->data.image.access == ACCESS_VOLATILE)
3070 return CACHE_CV;
3071 if (var->data.image.access == ACCESS_COHERENT)
3072 return CACHE_CG;
3073 return CACHE_CA;
3074 }
3075
3076 bool
3077 Converter::visit(nir_tex_instr *insn)
3078 {
3079 switch (insn->op) {
3080 case nir_texop_lod:
3081 case nir_texop_query_levels:
3082 case nir_texop_tex:
3083 case nir_texop_texture_samples:
3084 case nir_texop_tg4:
3085 case nir_texop_txb:
3086 case nir_texop_txd:
3087 case nir_texop_txf:
3088 case nir_texop_txf_ms:
3089 case nir_texop_txl:
3090 case nir_texop_txs: {
3091 LValues &newDefs = convert(&insn->dest);
3092 std::vector<Value*> srcs;
3093 std::vector<Value*> defs;
3094 std::vector<nir_src*> offsets;
3095 uint8_t mask = 0;
3096 bool lz = false;
3097 Value *proj = NULL;
3098 TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow);
3099 operation op = getOperation(insn->op);
3100
3101 int r, s;
3102 int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias);
3103 int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator);
3104 int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord);
3105 int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx);
3106 int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy);
3107 int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index);
3108 int lodIdx = nir_tex_instr_src_index(insn, nir_tex_src_lod);
3109 int offsetIdx = nir_tex_instr_src_index(insn, nir_tex_src_offset);
3110 int projIdx = nir_tex_instr_src_index(insn, nir_tex_src_projector);
3111 int sampOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_offset);
3112 int texOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_offset);
3113
3114 if (projIdx != -1)
3115 proj = mkOp1v(OP_RCP, TYPE_F32, getScratch(), getSrc(&insn->src[projIdx].src, 0));
3116
3117 srcs.resize(insn->coord_components);
3118 for (uint8_t i = 0u; i < insn->coord_components; ++i)
3119 srcs[i] = applyProjection(getSrc(&insn->src[coordsIdx].src, i), proj);
3120
3121 // sometimes we get less args than target.getArgCount, but codegen expects the latter
3122 if (insn->coord_components) {
3123 uint32_t argCount = target.getArgCount();
3124
3125 if (target.isMS())
3126 argCount -= 1;
3127
3128 for (uint32_t i = 0u; i < (argCount - insn->coord_components); ++i)
3129 srcs.push_back(getSSA());
3130 }
3131
3132 if (insn->op == nir_texop_texture_samples)
3133 srcs.push_back(zero);
3134 else if (!insn->num_srcs)
3135 srcs.push_back(loadImm(NULL, 0));
3136 if (biasIdx != -1)
3137 srcs.push_back(getSrc(&insn->src[biasIdx].src, 0));
3138 if (lodIdx != -1)
3139 srcs.push_back(getSrc(&insn->src[lodIdx].src, 0));
3140 else if (op == OP_TXF)
3141 lz = true;
3142 if (msIdx != -1)
3143 srcs.push_back(getSrc(&insn->src[msIdx].src, 0));
3144 if (offsetIdx != -1)
3145 offsets.push_back(&insn->src[offsetIdx].src);
3146 if (compIdx != -1)
3147 srcs.push_back(applyProjection(getSrc(&insn->src[compIdx].src, 0), proj));
3148 if (texOffIdx != -1) {
3149 srcs.push_back(getSrc(&insn->src[texOffIdx].src, 0));
3150 texOffIdx = srcs.size() - 1;
3151 }
3152 if (sampOffIdx != -1) {
3153 srcs.push_back(getSrc(&insn->src[sampOffIdx].src, 0));
3154 sampOffIdx = srcs.size() - 1;
3155 }
3156
3157 r = insn->texture_index;
3158 s = insn->sampler_index;
3159
3160 defs.resize(newDefs.size());
3161 for (uint8_t d = 0u; d < newDefs.size(); ++d) {
3162 defs[d] = newDefs[d];
3163 mask |= 1 << d;
3164 }
3165 if (target.isMS() || (op == OP_TEX && prog->getType() != Program::TYPE_FRAGMENT))
3166 lz = true;
3167
3168 TexInstruction *texi = mkTex(op, target.getEnum(), r, s, defs, srcs);
3169 texi->tex.levelZero = lz;
3170 texi->tex.mask = mask;
3171
3172 if (texOffIdx != -1)
3173 texi->tex.rIndirectSrc = texOffIdx;
3174 if (sampOffIdx != -1)
3175 texi->tex.sIndirectSrc = sampOffIdx;
3176
3177 switch (insn->op) {
3178 case nir_texop_tg4:
3179 if (!target.isShadow())
3180 texi->tex.gatherComp = insn->component;
3181 break;
3182 case nir_texop_txs:
3183 texi->tex.query = TXQ_DIMS;
3184 break;
3185 case nir_texop_texture_samples:
3186 texi->tex.mask = 0x4;
3187 texi->tex.query = TXQ_TYPE;
3188 break;
3189 case nir_texop_query_levels:
3190 texi->tex.mask = 0x8;
3191 texi->tex.query = TXQ_DIMS;
3192 break;
3193 default:
3194 break;
3195 }
3196
3197 texi->tex.useOffsets = offsets.size();
3198 if (texi->tex.useOffsets) {
3199 for (uint8_t s = 0; s < texi->tex.useOffsets; ++s) {
3200 for (uint32_t c = 0u; c < 3; ++c) {
3201 uint8_t s2 = std::min(c, target.getDim() - 1);
3202 texi->offset[s][c].set(getSrc(offsets[s], s2));
3203 texi->offset[s][c].setInsn(texi);
3204 }
3205 }
3206 }
3207
3208 if (ddxIdx != -1 && ddyIdx != -1) {
3209 for (uint8_t c = 0u; c < target.getDim() + target.isCube(); ++c) {
3210 texi->dPdx[c].set(getSrc(&insn->src[ddxIdx].src, c));
3211 texi->dPdy[c].set(getSrc(&insn->src[ddyIdx].src, c));
3212 }
3213 }
3214
3215 break;
3216 }
3217 default:
3218 ERROR("unknown nir_texop %u\n", insn->op);
3219 return false;
3220 }
3221 return true;
3222 }
3223
3224 bool
3225 Converter::visit(nir_deref_instr *deref)
3226 {
3227 // we just ignore those, because images intrinsics are the only place where
3228 // we should end up with deref sources and those have to backtrack anyway
3229 // to get the nir_variable. This code just exists to handle some special
3230 // cases.
3231 switch (deref->deref_type) {
3232 case nir_deref_type_array:
3233 case nir_deref_type_struct:
3234 case nir_deref_type_var:
3235 break;
3236 default:
3237 ERROR("unknown nir_deref_instr %u\n", deref->deref_type);
3238 return false;
3239 }
3240 return true;
3241 }
3242
3243 bool
3244 Converter::run()
3245 {
3246 bool progress;
3247
3248 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
3249 nir_print_shader(nir, stderr);
3250
3251 struct nir_lower_subgroups_options subgroup_options = {
3252 .subgroup_size = 32,
3253 .ballot_bit_size = 32,
3254 };
3255
3256 NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
3257 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
3258 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
3259 NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
3260 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3261 NIR_PASS_V(nir, nir_lower_alu_to_scalar);
3262 NIR_PASS_V(nir, nir_lower_phis_to_scalar);
3263
3264 do {
3265 progress = false;
3266 NIR_PASS(progress, nir, nir_copy_prop);
3267 NIR_PASS(progress, nir, nir_opt_remove_phis);
3268 NIR_PASS(progress, nir, nir_opt_trivial_continues);
3269 NIR_PASS(progress, nir, nir_opt_cse);
3270 NIR_PASS(progress, nir, nir_opt_algebraic);
3271 NIR_PASS(progress, nir, nir_opt_constant_folding);
3272 NIR_PASS(progress, nir, nir_copy_prop);
3273 NIR_PASS(progress, nir, nir_opt_dce);
3274 NIR_PASS(progress, nir, nir_opt_dead_cf);
3275 } while (progress);
3276
3277 NIR_PASS_V(nir, nir_lower_bool_to_int32);
3278 NIR_PASS_V(nir, nir_lower_locals_to_regs);
3279 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
3280 NIR_PASS_V(nir, nir_convert_from_ssa, true);
3281
3282 // Garbage collect dead instructions
3283 nir_sweep(nir);
3284
3285 if (!parseNIR()) {
3286 ERROR("Couldn't prase NIR!\n");
3287 return false;
3288 }
3289
3290 if (!assignSlots()) {
3291 ERROR("Couldn't assign slots!\n");
3292 return false;
3293 }
3294
3295 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
3296 nir_print_shader(nir, stderr);
3297
3298 nir_foreach_function(function, nir) {
3299 if (!visit(function))
3300 return false;
3301 }
3302
3303 return true;
3304 }
3305
3306 } // unnamed namespace
3307
3308 namespace nv50_ir {
3309
3310 bool
3311 Program::makeFromNIR(struct nv50_ir_prog_info *info)
3312 {
3313 nir_shader *nir = (nir_shader*)info->bin.source;
3314 Converter converter(this, nir, info);
3315 bool result = converter.run();
3316 if (!result)
3317 return result;
3318 LoweringHelper lowering;
3319 lowering.run(this);
3320 tlsSize = info->bin.tlsSpace;
3321 return result;
3322 }
3323
3324 } // namespace nv50_ir