bd78b76f38499b76d3283bbf458e0b1c9a167fad
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_from_nir.cpp
1 /*
2 * Copyright 2017 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Karol Herbst <kherbst@redhat.com>
23 */
24
25 #include "compiler/nir/nir.h"
26
27 #include "util/u_debug.h"
28
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_from_common.h"
31 #include "codegen/nv50_ir_lowering_helper.h"
32 #include "codegen/nv50_ir_util.h"
33 #include "tgsi/tgsi_from_mesa.h"
34
35 #if __cplusplus >= 201103L
36 #include <unordered_map>
37 #else
38 #include <tr1/unordered_map>
39 #endif
40 #include <cstring>
41 #include <list>
42 #include <vector>
43
44 namespace {
45
46 #if __cplusplus >= 201103L
47 using std::hash;
48 using std::unordered_map;
49 #else
50 using std::tr1::hash;
51 using std::tr1::unordered_map;
52 #endif
53
54 using namespace nv50_ir;
55
56 int
57 type_size(const struct glsl_type *type, bool bindless)
58 {
59 return glsl_count_attribute_slots(type, false);
60 }
61
62 class Converter : public ConverterCommon
63 {
64 public:
65 Converter(Program *, nir_shader *, nv50_ir_prog_info *);
66
67 bool run();
68 private:
69 typedef std::vector<LValue*> LValues;
70 typedef unordered_map<unsigned, LValues> NirDefMap;
71 typedef unordered_map<unsigned, nir_load_const_instr*> ImmediateMap;
72 typedef unordered_map<unsigned, uint32_t> NirArrayLMemOffsets;
73 typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
74
75 CacheMode convert(enum gl_access_qualifier);
76 TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
77 LValues& convert(nir_alu_dest *);
78 BasicBlock* convert(nir_block *);
79 LValues& convert(nir_dest *);
80 SVSemantic convert(nir_intrinsic_op);
81 Value* convert(nir_load_const_instr*, uint8_t);
82 LValues& convert(nir_register *);
83 LValues& convert(nir_ssa_def *);
84
85 Value* getSrc(nir_alu_src *, uint8_t component = 0);
86 Value* getSrc(nir_register *, uint8_t);
87 Value* getSrc(nir_src *, uint8_t, bool indirect = false);
88 Value* getSrc(nir_ssa_def *, uint8_t);
89
90 // returned value is the constant part of the given source (either the
91 // nir_src or the selected source component of an intrinsic). Even though
92 // this is mostly an optimization to be able to skip indirects in a few
93 // cases, sometimes we require immediate values or set some fileds on
94 // instructions (e.g. tex) in order for codegen to consume those.
95 // If the found value has not a constant part, the Value gets returned
96 // through the Value parameter.
97 uint32_t getIndirect(nir_src *, uint8_t, Value *&);
98 // isScalar indicates that the addressing is scalar, vec4 addressing is
99 // assumed otherwise
100 uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&,
101 bool isScalar = false);
102
103 uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
104
105 void setInterpolate(nv50_ir_varying *,
106 uint8_t,
107 bool centroid,
108 unsigned semantics);
109
110 Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
111 uint8_t c, Value *indirect0 = NULL,
112 Value *indirect1 = NULL, bool patch = false);
113 void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
114 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
115 Value *indirect1 = NULL);
116
117 bool isFloatType(nir_alu_type);
118 bool isSignedType(nir_alu_type);
119 bool isResultFloat(nir_op);
120 bool isResultSigned(nir_op);
121
122 DataType getDType(nir_alu_instr *);
123 DataType getDType(nir_intrinsic_instr *);
124 DataType getDType(nir_intrinsic_instr *, bool isSigned);
125 DataType getDType(nir_op, uint8_t);
126
127 std::vector<DataType> getSTypes(nir_alu_instr *);
128 DataType getSType(nir_src &, bool isFloat, bool isSigned);
129
130 operation getOperation(nir_intrinsic_op);
131 operation getOperation(nir_op);
132 operation getOperation(nir_texop);
133 operation preOperationNeeded(nir_op);
134
135 int getSubOp(nir_intrinsic_op);
136 int getSubOp(nir_op);
137
138 CondCode getCondCode(nir_op);
139
140 bool assignSlots();
141 bool parseNIR();
142
143 bool visit(nir_alu_instr *);
144 bool visit(nir_block *);
145 bool visit(nir_cf_node *);
146 bool visit(nir_deref_instr *);
147 bool visit(nir_function *);
148 bool visit(nir_if *);
149 bool visit(nir_instr *);
150 bool visit(nir_intrinsic_instr *);
151 bool visit(nir_jump_instr *);
152 bool visit(nir_load_const_instr*);
153 bool visit(nir_loop *);
154 bool visit(nir_ssa_undef_instr *);
155 bool visit(nir_tex_instr *);
156
157 // tex stuff
158 Value* applyProjection(Value *src, Value *proj);
159 unsigned int getNIRArgCount(TexInstruction::Target&);
160
161 // image stuff
162 uint16_t handleDeref(nir_deref_instr *, Value * & indirect, const nir_variable * &);
163 CacheMode getCacheModeFromVar(const nir_variable *);
164
165 nir_shader *nir;
166
167 NirDefMap ssaDefs;
168 NirDefMap regDefs;
169 ImmediateMap immediates;
170 NirArrayLMemOffsets regToLmemOffset;
171 NirBlockMap blocks;
172 unsigned int curLoopDepth;
173
174 BasicBlock *exit;
175 Value *zero;
176 Instruction *immInsertPos;
177
178 int clipVertexOutput;
179
180 union {
181 struct {
182 Value *position;
183 } fp;
184 };
185 };
186
187 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
188 : ConverterCommon(prog, info),
189 nir(nir),
190 curLoopDepth(0),
191 clipVertexOutput(-1)
192 {
193 zero = mkImm((uint32_t)0);
194 }
195
196 BasicBlock *
197 Converter::convert(nir_block *block)
198 {
199 NirBlockMap::iterator it = blocks.find(block->index);
200 if (it != blocks.end())
201 return it->second;
202
203 BasicBlock *bb = new BasicBlock(func);
204 blocks[block->index] = bb;
205 return bb;
206 }
207
208 bool
209 Converter::isFloatType(nir_alu_type type)
210 {
211 return nir_alu_type_get_base_type(type) == nir_type_float;
212 }
213
214 bool
215 Converter::isSignedType(nir_alu_type type)
216 {
217 return nir_alu_type_get_base_type(type) == nir_type_int;
218 }
219
220 bool
221 Converter::isResultFloat(nir_op op)
222 {
223 const nir_op_info &info = nir_op_infos[op];
224 if (info.output_type != nir_type_invalid)
225 return isFloatType(info.output_type);
226
227 ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
228 assert(false);
229 return true;
230 }
231
232 bool
233 Converter::isResultSigned(nir_op op)
234 {
235 switch (op) {
236 // there is no umul and we get wrong results if we treat all muls as signed
237 case nir_op_imul:
238 case nir_op_inot:
239 return false;
240 default:
241 const nir_op_info &info = nir_op_infos[op];
242 if (info.output_type != nir_type_invalid)
243 return isSignedType(info.output_type);
244 ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
245 assert(false);
246 return true;
247 }
248 }
249
250 DataType
251 Converter::getDType(nir_alu_instr *insn)
252 {
253 if (insn->dest.dest.is_ssa)
254 return getDType(insn->op, insn->dest.dest.ssa.bit_size);
255 else
256 return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
257 }
258
259 DataType
260 Converter::getDType(nir_intrinsic_instr *insn)
261 {
262 bool isSigned;
263 switch (insn->intrinsic) {
264 case nir_intrinsic_shared_atomic_imax:
265 case nir_intrinsic_shared_atomic_imin:
266 case nir_intrinsic_ssbo_atomic_imax:
267 case nir_intrinsic_ssbo_atomic_imin:
268 isSigned = true;
269 break;
270 default:
271 isSigned = false;
272 break;
273 }
274
275 return getDType(insn, isSigned);
276 }
277
278 DataType
279 Converter::getDType(nir_intrinsic_instr *insn, bool isSigned)
280 {
281 if (insn->dest.is_ssa)
282 return typeOfSize(insn->dest.ssa.bit_size / 8, false, isSigned);
283 else
284 return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, isSigned);
285 }
286
287 DataType
288 Converter::getDType(nir_op op, uint8_t bitSize)
289 {
290 DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
291 if (ty == TYPE_NONE) {
292 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
293 assert(false);
294 }
295 return ty;
296 }
297
298 std::vector<DataType>
299 Converter::getSTypes(nir_alu_instr *insn)
300 {
301 const nir_op_info &info = nir_op_infos[insn->op];
302 std::vector<DataType> res(info.num_inputs);
303
304 for (uint8_t i = 0; i < info.num_inputs; ++i) {
305 if (info.input_types[i] != nir_type_invalid) {
306 res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
307 } else {
308 ERROR("getSType not implemented for %s idx %u\n", info.name, i);
309 assert(false);
310 res[i] = TYPE_NONE;
311 break;
312 }
313 }
314
315 return res;
316 }
317
318 DataType
319 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
320 {
321 uint8_t bitSize;
322 if (src.is_ssa)
323 bitSize = src.ssa->bit_size;
324 else
325 bitSize = src.reg.reg->bit_size;
326
327 DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
328 if (ty == TYPE_NONE) {
329 const char *str;
330 if (isFloat)
331 str = "float";
332 else if (isSigned)
333 str = "int";
334 else
335 str = "uint";
336 ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
337 assert(false);
338 }
339 return ty;
340 }
341
342 operation
343 Converter::getOperation(nir_op op)
344 {
345 switch (op) {
346 // basic ops with float and int variants
347 case nir_op_fabs:
348 case nir_op_iabs:
349 return OP_ABS;
350 case nir_op_fadd:
351 case nir_op_iadd:
352 return OP_ADD;
353 case nir_op_iand:
354 return OP_AND;
355 case nir_op_ifind_msb:
356 case nir_op_ufind_msb:
357 return OP_BFIND;
358 case nir_op_fceil:
359 return OP_CEIL;
360 case nir_op_fcos:
361 return OP_COS;
362 case nir_op_f2f32:
363 case nir_op_f2f64:
364 case nir_op_f2i32:
365 case nir_op_f2i64:
366 case nir_op_f2u32:
367 case nir_op_f2u64:
368 case nir_op_i2f32:
369 case nir_op_i2f64:
370 case nir_op_i2i32:
371 case nir_op_i2i64:
372 case nir_op_u2f32:
373 case nir_op_u2f64:
374 case nir_op_u2u32:
375 case nir_op_u2u64:
376 return OP_CVT;
377 case nir_op_fddx:
378 case nir_op_fddx_coarse:
379 case nir_op_fddx_fine:
380 return OP_DFDX;
381 case nir_op_fddy:
382 case nir_op_fddy_coarse:
383 case nir_op_fddy_fine:
384 return OP_DFDY;
385 case nir_op_fdiv:
386 case nir_op_idiv:
387 case nir_op_udiv:
388 return OP_DIV;
389 case nir_op_fexp2:
390 return OP_EX2;
391 case nir_op_ffloor:
392 return OP_FLOOR;
393 case nir_op_ffma:
394 return OP_FMA;
395 case nir_op_flog2:
396 return OP_LG2;
397 case nir_op_fmax:
398 case nir_op_imax:
399 case nir_op_umax:
400 return OP_MAX;
401 case nir_op_pack_64_2x32_split:
402 return OP_MERGE;
403 case nir_op_fmin:
404 case nir_op_imin:
405 case nir_op_umin:
406 return OP_MIN;
407 case nir_op_fmod:
408 case nir_op_imod:
409 case nir_op_umod:
410 case nir_op_frem:
411 case nir_op_irem:
412 return OP_MOD;
413 case nir_op_fmul:
414 case nir_op_imul:
415 case nir_op_imul_high:
416 case nir_op_umul_high:
417 return OP_MUL;
418 case nir_op_fneg:
419 case nir_op_ineg:
420 return OP_NEG;
421 case nir_op_inot:
422 return OP_NOT;
423 case nir_op_ior:
424 return OP_OR;
425 case nir_op_fpow:
426 return OP_POW;
427 case nir_op_frcp:
428 return OP_RCP;
429 case nir_op_frsq:
430 return OP_RSQ;
431 case nir_op_fsat:
432 return OP_SAT;
433 case nir_op_feq32:
434 case nir_op_ieq32:
435 case nir_op_fge32:
436 case nir_op_ige32:
437 case nir_op_uge32:
438 case nir_op_flt32:
439 case nir_op_ilt32:
440 case nir_op_ult32:
441 case nir_op_fne32:
442 case nir_op_ine32:
443 return OP_SET;
444 case nir_op_ishl:
445 return OP_SHL;
446 case nir_op_ishr:
447 case nir_op_ushr:
448 return OP_SHR;
449 case nir_op_fsin:
450 return OP_SIN;
451 case nir_op_fsqrt:
452 return OP_SQRT;
453 case nir_op_ftrunc:
454 return OP_TRUNC;
455 case nir_op_ixor:
456 return OP_XOR;
457 default:
458 ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
459 assert(false);
460 return OP_NOP;
461 }
462 }
463
464 operation
465 Converter::getOperation(nir_texop op)
466 {
467 switch (op) {
468 case nir_texop_tex:
469 return OP_TEX;
470 case nir_texop_lod:
471 return OP_TXLQ;
472 case nir_texop_txb:
473 return OP_TXB;
474 case nir_texop_txd:
475 return OP_TXD;
476 case nir_texop_txf:
477 case nir_texop_txf_ms:
478 return OP_TXF;
479 case nir_texop_tg4:
480 return OP_TXG;
481 case nir_texop_txl:
482 return OP_TXL;
483 case nir_texop_query_levels:
484 case nir_texop_texture_samples:
485 case nir_texop_txs:
486 return OP_TXQ;
487 default:
488 ERROR("couldn't get operation for nir_texop %u\n", op);
489 assert(false);
490 return OP_NOP;
491 }
492 }
493
494 operation
495 Converter::getOperation(nir_intrinsic_op op)
496 {
497 switch (op) {
498 case nir_intrinsic_emit_vertex:
499 return OP_EMIT;
500 case nir_intrinsic_end_primitive:
501 return OP_RESTART;
502 case nir_intrinsic_bindless_image_atomic_add:
503 case nir_intrinsic_image_atomic_add:
504 case nir_intrinsic_image_deref_atomic_add:
505 case nir_intrinsic_bindless_image_atomic_and:
506 case nir_intrinsic_image_atomic_and:
507 case nir_intrinsic_image_deref_atomic_and:
508 case nir_intrinsic_bindless_image_atomic_comp_swap:
509 case nir_intrinsic_image_atomic_comp_swap:
510 case nir_intrinsic_image_deref_atomic_comp_swap:
511 case nir_intrinsic_bindless_image_atomic_exchange:
512 case nir_intrinsic_image_atomic_exchange:
513 case nir_intrinsic_image_deref_atomic_exchange:
514 case nir_intrinsic_bindless_image_atomic_imax:
515 case nir_intrinsic_image_atomic_imax:
516 case nir_intrinsic_image_deref_atomic_imax:
517 case nir_intrinsic_bindless_image_atomic_umax:
518 case nir_intrinsic_image_atomic_umax:
519 case nir_intrinsic_image_deref_atomic_umax:
520 case nir_intrinsic_bindless_image_atomic_imin:
521 case nir_intrinsic_image_atomic_imin:
522 case nir_intrinsic_image_deref_atomic_imin:
523 case nir_intrinsic_bindless_image_atomic_umin:
524 case nir_intrinsic_image_atomic_umin:
525 case nir_intrinsic_image_deref_atomic_umin:
526 case nir_intrinsic_bindless_image_atomic_or:
527 case nir_intrinsic_image_atomic_or:
528 case nir_intrinsic_image_deref_atomic_or:
529 case nir_intrinsic_bindless_image_atomic_xor:
530 case nir_intrinsic_image_atomic_xor:
531 case nir_intrinsic_image_deref_atomic_xor:
532 return OP_SUREDP;
533 case nir_intrinsic_bindless_image_load:
534 case nir_intrinsic_image_load:
535 case nir_intrinsic_image_deref_load:
536 return OP_SULDP;
537 case nir_intrinsic_bindless_image_samples:
538 case nir_intrinsic_image_samples:
539 case nir_intrinsic_image_deref_samples:
540 case nir_intrinsic_bindless_image_size:
541 case nir_intrinsic_image_size:
542 case nir_intrinsic_image_deref_size:
543 return OP_SUQ;
544 case nir_intrinsic_bindless_image_store:
545 case nir_intrinsic_image_store:
546 case nir_intrinsic_image_deref_store:
547 return OP_SUSTP;
548 default:
549 ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
550 assert(false);
551 return OP_NOP;
552 }
553 }
554
555 operation
556 Converter::preOperationNeeded(nir_op op)
557 {
558 switch (op) {
559 case nir_op_fcos:
560 case nir_op_fsin:
561 return OP_PRESIN;
562 default:
563 return OP_NOP;
564 }
565 }
566
567 int
568 Converter::getSubOp(nir_op op)
569 {
570 switch (op) {
571 case nir_op_imul_high:
572 case nir_op_umul_high:
573 return NV50_IR_SUBOP_MUL_HIGH;
574 default:
575 return 0;
576 }
577 }
578
579 int
580 Converter::getSubOp(nir_intrinsic_op op)
581 {
582 switch (op) {
583 case nir_intrinsic_bindless_image_atomic_add:
584 case nir_intrinsic_global_atomic_add:
585 case nir_intrinsic_image_atomic_add:
586 case nir_intrinsic_image_deref_atomic_add:
587 case nir_intrinsic_shared_atomic_add:
588 case nir_intrinsic_ssbo_atomic_add:
589 return NV50_IR_SUBOP_ATOM_ADD;
590 case nir_intrinsic_bindless_image_atomic_and:
591 case nir_intrinsic_global_atomic_and:
592 case nir_intrinsic_image_atomic_and:
593 case nir_intrinsic_image_deref_atomic_and:
594 case nir_intrinsic_shared_atomic_and:
595 case nir_intrinsic_ssbo_atomic_and:
596 return NV50_IR_SUBOP_ATOM_AND;
597 case nir_intrinsic_bindless_image_atomic_comp_swap:
598 case nir_intrinsic_global_atomic_comp_swap:
599 case nir_intrinsic_image_atomic_comp_swap:
600 case nir_intrinsic_image_deref_atomic_comp_swap:
601 case nir_intrinsic_shared_atomic_comp_swap:
602 case nir_intrinsic_ssbo_atomic_comp_swap:
603 return NV50_IR_SUBOP_ATOM_CAS;
604 case nir_intrinsic_bindless_image_atomic_exchange:
605 case nir_intrinsic_global_atomic_exchange:
606 case nir_intrinsic_image_atomic_exchange:
607 case nir_intrinsic_image_deref_atomic_exchange:
608 case nir_intrinsic_shared_atomic_exchange:
609 case nir_intrinsic_ssbo_atomic_exchange:
610 return NV50_IR_SUBOP_ATOM_EXCH;
611 case nir_intrinsic_bindless_image_atomic_or:
612 case nir_intrinsic_global_atomic_or:
613 case nir_intrinsic_image_atomic_or:
614 case nir_intrinsic_image_deref_atomic_or:
615 case nir_intrinsic_shared_atomic_or:
616 case nir_intrinsic_ssbo_atomic_or:
617 return NV50_IR_SUBOP_ATOM_OR;
618 case nir_intrinsic_bindless_image_atomic_imax:
619 case nir_intrinsic_bindless_image_atomic_umax:
620 case nir_intrinsic_global_atomic_imax:
621 case nir_intrinsic_global_atomic_umax:
622 case nir_intrinsic_image_atomic_imax:
623 case nir_intrinsic_image_atomic_umax:
624 case nir_intrinsic_image_deref_atomic_imax:
625 case nir_intrinsic_image_deref_atomic_umax:
626 case nir_intrinsic_shared_atomic_imax:
627 case nir_intrinsic_shared_atomic_umax:
628 case nir_intrinsic_ssbo_atomic_imax:
629 case nir_intrinsic_ssbo_atomic_umax:
630 return NV50_IR_SUBOP_ATOM_MAX;
631 case nir_intrinsic_bindless_image_atomic_imin:
632 case nir_intrinsic_bindless_image_atomic_umin:
633 case nir_intrinsic_global_atomic_imin:
634 case nir_intrinsic_global_atomic_umin:
635 case nir_intrinsic_image_atomic_imin:
636 case nir_intrinsic_image_atomic_umin:
637 case nir_intrinsic_image_deref_atomic_imin:
638 case nir_intrinsic_image_deref_atomic_umin:
639 case nir_intrinsic_shared_atomic_imin:
640 case nir_intrinsic_shared_atomic_umin:
641 case nir_intrinsic_ssbo_atomic_imin:
642 case nir_intrinsic_ssbo_atomic_umin:
643 return NV50_IR_SUBOP_ATOM_MIN;
644 case nir_intrinsic_bindless_image_atomic_xor:
645 case nir_intrinsic_global_atomic_xor:
646 case nir_intrinsic_image_atomic_xor:
647 case nir_intrinsic_image_deref_atomic_xor:
648 case nir_intrinsic_shared_atomic_xor:
649 case nir_intrinsic_ssbo_atomic_xor:
650 return NV50_IR_SUBOP_ATOM_XOR;
651
652 case nir_intrinsic_group_memory_barrier:
653 case nir_intrinsic_memory_barrier:
654 case nir_intrinsic_memory_barrier_buffer:
655 case nir_intrinsic_memory_barrier_image:
656 return NV50_IR_SUBOP_MEMBAR(M, GL);
657 case nir_intrinsic_memory_barrier_shared:
658 return NV50_IR_SUBOP_MEMBAR(M, CTA);
659
660 case nir_intrinsic_vote_all:
661 return NV50_IR_SUBOP_VOTE_ALL;
662 case nir_intrinsic_vote_any:
663 return NV50_IR_SUBOP_VOTE_ANY;
664 case nir_intrinsic_vote_ieq:
665 return NV50_IR_SUBOP_VOTE_UNI;
666 default:
667 return 0;
668 }
669 }
670
671 CondCode
672 Converter::getCondCode(nir_op op)
673 {
674 switch (op) {
675 case nir_op_feq32:
676 case nir_op_ieq32:
677 return CC_EQ;
678 case nir_op_fge32:
679 case nir_op_ige32:
680 case nir_op_uge32:
681 return CC_GE;
682 case nir_op_flt32:
683 case nir_op_ilt32:
684 case nir_op_ult32:
685 return CC_LT;
686 case nir_op_fne32:
687 return CC_NEU;
688 case nir_op_ine32:
689 return CC_NE;
690 default:
691 ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
692 assert(false);
693 return CC_FL;
694 }
695 }
696
697 Converter::LValues&
698 Converter::convert(nir_alu_dest *dest)
699 {
700 return convert(&dest->dest);
701 }
702
703 Converter::LValues&
704 Converter::convert(nir_dest *dest)
705 {
706 if (dest->is_ssa)
707 return convert(&dest->ssa);
708 if (dest->reg.indirect) {
709 ERROR("no support for indirects.");
710 assert(false);
711 }
712 return convert(dest->reg.reg);
713 }
714
715 Converter::LValues&
716 Converter::convert(nir_register *reg)
717 {
718 NirDefMap::iterator it = regDefs.find(reg->index);
719 if (it != regDefs.end())
720 return it->second;
721
722 LValues newDef(reg->num_components);
723 for (uint8_t i = 0; i < reg->num_components; i++)
724 newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
725 return regDefs[reg->index] = newDef;
726 }
727
728 Converter::LValues&
729 Converter::convert(nir_ssa_def *def)
730 {
731 NirDefMap::iterator it = ssaDefs.find(def->index);
732 if (it != ssaDefs.end())
733 return it->second;
734
735 LValues newDef(def->num_components);
736 for (uint8_t i = 0; i < def->num_components; i++)
737 newDef[i] = getSSA(std::max(4, def->bit_size / 8));
738 return ssaDefs[def->index] = newDef;
739 }
740
741 Value*
742 Converter::getSrc(nir_alu_src *src, uint8_t component)
743 {
744 if (src->abs || src->negate) {
745 ERROR("modifiers currently not supported on nir_alu_src\n");
746 assert(false);
747 }
748 return getSrc(&src->src, src->swizzle[component]);
749 }
750
751 Value*
752 Converter::getSrc(nir_register *reg, uint8_t idx)
753 {
754 NirDefMap::iterator it = regDefs.find(reg->index);
755 if (it == regDefs.end())
756 return convert(reg)[idx];
757 return it->second[idx];
758 }
759
760 Value*
761 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
762 {
763 if (src->is_ssa)
764 return getSrc(src->ssa, idx);
765
766 if (src->reg.indirect) {
767 if (indirect)
768 return getSrc(src->reg.indirect, idx);
769 ERROR("no support for indirects.");
770 assert(false);
771 return NULL;
772 }
773
774 return getSrc(src->reg.reg, idx);
775 }
776
777 Value*
778 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
779 {
780 ImmediateMap::iterator iit = immediates.find(src->index);
781 if (iit != immediates.end())
782 return convert((*iit).second, idx);
783
784 NirDefMap::iterator it = ssaDefs.find(src->index);
785 if (it == ssaDefs.end()) {
786 ERROR("SSA value %u not found\n", src->index);
787 assert(false);
788 return NULL;
789 }
790 return it->second[idx];
791 }
792
793 uint32_t
794 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
795 {
796 nir_const_value *offset = nir_src_as_const_value(*src);
797
798 if (offset) {
799 indirect = NULL;
800 return offset[0].u32;
801 }
802
803 indirect = getSrc(src, idx, true);
804 return 0;
805 }
806
807 uint32_t
808 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect, bool isScalar)
809 {
810 int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
811 if (indirect && !isScalar)
812 indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
813 return idx;
814 }
815
816 static void
817 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
818 {
819 assert(name && index);
820
821 if (slot >= VERT_ATTRIB_MAX) {
822 ERROR("invalid varying slot %u\n", slot);
823 assert(false);
824 return;
825 }
826
827 if (slot >= VERT_ATTRIB_GENERIC0 &&
828 slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
829 *name = TGSI_SEMANTIC_GENERIC;
830 *index = slot - VERT_ATTRIB_GENERIC0;
831 return;
832 }
833
834 if (slot >= VERT_ATTRIB_TEX0 &&
835 slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
836 *name = TGSI_SEMANTIC_TEXCOORD;
837 *index = slot - VERT_ATTRIB_TEX0;
838 return;
839 }
840
841 switch (slot) {
842 case VERT_ATTRIB_COLOR0:
843 *name = TGSI_SEMANTIC_COLOR;
844 *index = 0;
845 break;
846 case VERT_ATTRIB_COLOR1:
847 *name = TGSI_SEMANTIC_COLOR;
848 *index = 1;
849 break;
850 case VERT_ATTRIB_EDGEFLAG:
851 *name = TGSI_SEMANTIC_EDGEFLAG;
852 *index = 0;
853 break;
854 case VERT_ATTRIB_FOG:
855 *name = TGSI_SEMANTIC_FOG;
856 *index = 0;
857 break;
858 case VERT_ATTRIB_NORMAL:
859 *name = TGSI_SEMANTIC_NORMAL;
860 *index = 0;
861 break;
862 case VERT_ATTRIB_POS:
863 *name = TGSI_SEMANTIC_POSITION;
864 *index = 0;
865 break;
866 case VERT_ATTRIB_POINT_SIZE:
867 *name = TGSI_SEMANTIC_PSIZE;
868 *index = 0;
869 break;
870 default:
871 ERROR("unknown vert attrib slot %u\n", slot);
872 assert(false);
873 break;
874 }
875 }
876
877 void
878 Converter::setInterpolate(nv50_ir_varying *var,
879 uint8_t mode,
880 bool centroid,
881 unsigned semantic)
882 {
883 switch (mode) {
884 case INTERP_MODE_FLAT:
885 var->flat = 1;
886 break;
887 case INTERP_MODE_NONE:
888 if (semantic == TGSI_SEMANTIC_COLOR)
889 var->sc = 1;
890 else if (semantic == TGSI_SEMANTIC_POSITION)
891 var->linear = 1;
892 break;
893 case INTERP_MODE_NOPERSPECTIVE:
894 var->linear = 1;
895 break;
896 case INTERP_MODE_SMOOTH:
897 break;
898 }
899 var->centroid = centroid;
900 }
901
902 static uint16_t
903 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
904 bool input, const nir_variable *var)
905 {
906 if (!type->is_array())
907 return type->count_attribute_slots(false);
908
909 uint16_t slots;
910 switch (stage) {
911 case Program::TYPE_GEOMETRY:
912 slots = type->uniform_locations();
913 if (input)
914 slots /= info.gs.vertices_in;
915 break;
916 case Program::TYPE_TESSELLATION_CONTROL:
917 case Program::TYPE_TESSELLATION_EVAL:
918 // remove first dimension
919 if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
920 slots = type->uniform_locations();
921 else
922 slots = type->fields.array->uniform_locations();
923 break;
924 default:
925 slots = type->count_attribute_slots(false);
926 break;
927 }
928
929 return slots;
930 }
931
932 bool Converter::assignSlots() {
933 unsigned name;
934 unsigned index;
935
936 info->io.viewportId = -1;
937 info->numInputs = 0;
938 info->numOutputs = 0;
939
940 // we have to fixup the uniform locations for arrays
941 unsigned numImages = 0;
942 nir_foreach_variable(var, &nir->uniforms) {
943 const glsl_type *type = var->type;
944 if (!type->without_array()->is_image())
945 continue;
946 var->data.driver_location = numImages;
947 numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
948 }
949
950 info->numSysVals = 0;
951 for (uint8_t i = 0; i < SYSTEM_VALUE_MAX; ++i) {
952 if (!(nir->info.system_values_read & 1ull << i))
953 continue;
954
955 info->sv[info->numSysVals].sn = tgsi_get_sysval_semantic(i);
956 info->sv[info->numSysVals].si = 0;
957 info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
958
959 switch (i) {
960 case SYSTEM_VALUE_INSTANCE_ID:
961 info->io.instanceId = info->numSysVals;
962 break;
963 case SYSTEM_VALUE_TESS_LEVEL_INNER:
964 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
965 info->sv[info->numSysVals].patch = 1;
966 break;
967 case SYSTEM_VALUE_VERTEX_ID:
968 info->io.vertexId = info->numSysVals;
969 break;
970 default:
971 break;
972 }
973
974 info->numSysVals += 1;
975 }
976
977 if (prog->getType() == Program::TYPE_COMPUTE)
978 return true;
979
980 nir_foreach_variable(var, &nir->inputs) {
981 const glsl_type *type = var->type;
982 int slot = var->data.location;
983 uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
984 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
985 : type->component_slots();
986 uint32_t frac = var->data.location_frac;
987 uint32_t vary = var->data.driver_location;
988
989 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
990 if (comp > 2)
991 slots *= 2;
992 }
993
994 assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
995
996 switch(prog->getType()) {
997 case Program::TYPE_FRAGMENT:
998 tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
999 &name, &index);
1000 for (uint16_t i = 0; i < slots; ++i) {
1001 setInterpolate(&info->in[vary + i], var->data.interpolation,
1002 var->data.centroid | var->data.sample, name);
1003 }
1004 break;
1005 case Program::TYPE_GEOMETRY:
1006 tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
1007 &name, &index);
1008 break;
1009 case Program::TYPE_TESSELLATION_CONTROL:
1010 case Program::TYPE_TESSELLATION_EVAL:
1011 tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
1012 &name, &index);
1013 if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
1014 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1015 break;
1016 case Program::TYPE_VERTEX:
1017 vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1018 switch (name) {
1019 case TGSI_SEMANTIC_EDGEFLAG:
1020 info->io.edgeFlagIn = vary;
1021 break;
1022 default:
1023 break;
1024 }
1025 break;
1026 default:
1027 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1028 return false;
1029 }
1030
1031 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1032 info->in[vary].id = vary;
1033 info->in[vary].patch = var->data.patch;
1034 info->in[vary].sn = name;
1035 info->in[vary].si = index + i;
1036 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1037 if (i & 0x1)
1038 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1039 else
1040 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1041 else
1042 info->in[vary].mask |= ((1 << comp) - 1) << frac;
1043 }
1044 info->numInputs = std::max<uint8_t>(info->numInputs, vary);
1045 }
1046
1047 nir_foreach_variable(var, &nir->outputs) {
1048 const glsl_type *type = var->type;
1049 int slot = var->data.location;
1050 uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1051 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1052 : type->component_slots();
1053 uint32_t frac = var->data.location_frac;
1054 uint32_t vary = var->data.driver_location;
1055
1056 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1057 if (comp > 2)
1058 slots *= 2;
1059 }
1060
1061 assert(vary < PIPE_MAX_SHADER_OUTPUTS);
1062
1063 switch(prog->getType()) {
1064 case Program::TYPE_FRAGMENT:
1065 tgsi_get_gl_frag_result_semantic((gl_frag_result)slot, &name, &index);
1066 switch (name) {
1067 case TGSI_SEMANTIC_COLOR:
1068 if (!var->data.fb_fetch_output)
1069 info->prop.fp.numColourResults++;
1070 info->prop.fp.separateFragData = true;
1071 // sometimes we get FRAG_RESULT_DATAX with data.index 0
1072 // sometimes we get FRAG_RESULT_DATA0 with data.index X
1073 index = index == 0 ? var->data.index : index;
1074 break;
1075 case TGSI_SEMANTIC_POSITION:
1076 info->io.fragDepth = vary;
1077 info->prop.fp.writesDepth = true;
1078 break;
1079 case TGSI_SEMANTIC_SAMPLEMASK:
1080 info->io.sampleMask = vary;
1081 break;
1082 default:
1083 break;
1084 }
1085 break;
1086 case Program::TYPE_GEOMETRY:
1087 case Program::TYPE_TESSELLATION_CONTROL:
1088 case Program::TYPE_TESSELLATION_EVAL:
1089 case Program::TYPE_VERTEX:
1090 tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
1091 &name, &index);
1092
1093 if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1094 name != TGSI_SEMANTIC_TESSOUTER)
1095 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1096
1097 switch (name) {
1098 case TGSI_SEMANTIC_CLIPDIST:
1099 info->io.genUserClip = -1;
1100 break;
1101 case TGSI_SEMANTIC_CLIPVERTEX:
1102 clipVertexOutput = vary;
1103 break;
1104 case TGSI_SEMANTIC_EDGEFLAG:
1105 info->io.edgeFlagOut = vary;
1106 break;
1107 case TGSI_SEMANTIC_POSITION:
1108 if (clipVertexOutput < 0)
1109 clipVertexOutput = vary;
1110 break;
1111 default:
1112 break;
1113 }
1114 break;
1115 default:
1116 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1117 return false;
1118 }
1119
1120 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1121 info->out[vary].id = vary;
1122 info->out[vary].patch = var->data.patch;
1123 info->out[vary].sn = name;
1124 info->out[vary].si = index + i;
1125 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1126 if (i & 0x1)
1127 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1128 else
1129 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1130 else
1131 info->out[vary].mask |= ((1 << comp) - 1) << frac;
1132
1133 if (nir->info.outputs_read & 1ull << slot)
1134 info->out[vary].oread = 1;
1135 }
1136 info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
1137 }
1138
1139 if (info->io.genUserClip > 0) {
1140 info->io.clipDistances = info->io.genUserClip;
1141
1142 const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1143
1144 for (unsigned int n = 0; n < nOut; ++n) {
1145 unsigned int i = info->numOutputs++;
1146 info->out[i].id = i;
1147 info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1148 info->out[i].si = n;
1149 info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1150 }
1151 }
1152
1153 return info->assignSlots(info) == 0;
1154 }
1155
1156 uint32_t
1157 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1158 {
1159 DataType ty;
1160 int offset = nir_intrinsic_component(insn);
1161 bool input;
1162
1163 if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1164 ty = getDType(insn);
1165 else
1166 ty = getSType(insn->src[0], false, false);
1167
1168 switch (insn->intrinsic) {
1169 case nir_intrinsic_load_input:
1170 case nir_intrinsic_load_interpolated_input:
1171 case nir_intrinsic_load_per_vertex_input:
1172 input = true;
1173 break;
1174 case nir_intrinsic_load_output:
1175 case nir_intrinsic_load_per_vertex_output:
1176 case nir_intrinsic_store_output:
1177 case nir_intrinsic_store_per_vertex_output:
1178 input = false;
1179 break;
1180 default:
1181 ERROR("unknown intrinsic in getSlotAddress %s",
1182 nir_intrinsic_infos[insn->intrinsic].name);
1183 input = false;
1184 assert(false);
1185 break;
1186 }
1187
1188 if (typeSizeof(ty) == 8) {
1189 slot *= 2;
1190 slot += offset;
1191 if (slot >= 4) {
1192 idx += 1;
1193 slot -= 4;
1194 }
1195 } else {
1196 slot += offset;
1197 }
1198
1199 assert(slot < 4);
1200 assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
1201 assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
1202
1203 const nv50_ir_varying *vary = input ? info->in : info->out;
1204 return vary[idx].slot[slot] * 4;
1205 }
1206
1207 Instruction *
1208 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1209 uint32_t base, uint8_t c, Value *indirect0,
1210 Value *indirect1, bool patch)
1211 {
1212 unsigned int tySize = typeSizeof(ty);
1213
1214 if (tySize == 8 &&
1215 (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
1216 Value *lo = getSSA();
1217 Value *hi = getSSA();
1218
1219 Instruction *loi =
1220 mkLoad(TYPE_U32, lo,
1221 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1222 indirect0);
1223 loi->setIndirect(0, 1, indirect1);
1224 loi->perPatch = patch;
1225
1226 Instruction *hii =
1227 mkLoad(TYPE_U32, hi,
1228 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1229 indirect0);
1230 hii->setIndirect(0, 1, indirect1);
1231 hii->perPatch = patch;
1232
1233 return mkOp2(OP_MERGE, ty, def, lo, hi);
1234 } else {
1235 Instruction *ld =
1236 mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1237 ld->setIndirect(0, 1, indirect1);
1238 ld->perPatch = patch;
1239 return ld;
1240 }
1241 }
1242
1243 void
1244 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1245 DataType ty, Value *src, uint8_t idx, uint8_t c,
1246 Value *indirect0, Value *indirect1)
1247 {
1248 uint8_t size = typeSizeof(ty);
1249 uint32_t address = getSlotAddress(insn, idx, c);
1250
1251 if (size == 8 && indirect0) {
1252 Value *split[2];
1253 mkSplit(split, 4, src);
1254
1255 if (op == OP_EXPORT) {
1256 split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1257 split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1258 }
1259
1260 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1261 split[0])->perPatch = info->out[idx].patch;
1262 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1263 split[1])->perPatch = info->out[idx].patch;
1264 } else {
1265 if (op == OP_EXPORT)
1266 src = mkMov(getSSA(size), src, ty)->getDef(0);
1267 mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1268 src)->perPatch = info->out[idx].patch;
1269 }
1270 }
1271
1272 bool
1273 Converter::parseNIR()
1274 {
1275 info->bin.tlsSpace = 0;
1276 info->io.clipDistances = nir->info.clip_distance_array_size;
1277 info->io.cullDistances = nir->info.cull_distance_array_size;
1278
1279 switch(prog->getType()) {
1280 case Program::TYPE_COMPUTE:
1281 info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
1282 info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
1283 info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
1284 info->bin.smemSize = nir->info.cs.shared_size;
1285 break;
1286 case Program::TYPE_FRAGMENT:
1287 info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1288 info->prop.fp.persampleInvocation =
1289 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
1290 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1291 info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1292 info->prop.fp.readsSampleLocations =
1293 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1294 info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
1295 info->prop.fp.usesSampleMaskIn =
1296 !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
1297 break;
1298 case Program::TYPE_GEOMETRY:
1299 info->prop.gp.inputPrim = nir->info.gs.input_primitive;
1300 info->prop.gp.instanceCount = nir->info.gs.invocations;
1301 info->prop.gp.maxVertices = nir->info.gs.vertices_out;
1302 info->prop.gp.outputPrim = nir->info.gs.output_primitive;
1303 break;
1304 case Program::TYPE_TESSELLATION_CONTROL:
1305 case Program::TYPE_TESSELLATION_EVAL:
1306 if (nir->info.tess.primitive_mode == GL_ISOLINES)
1307 info->prop.tp.domain = GL_LINES;
1308 else
1309 info->prop.tp.domain = nir->info.tess.primitive_mode;
1310 info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1311 info->prop.tp.outputPrim =
1312 nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1313 info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1314 info->prop.tp.winding = !nir->info.tess.ccw;
1315 break;
1316 case Program::TYPE_VERTEX:
1317 info->prop.vp.usesDrawParameters =
1318 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
1319 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
1320 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
1321 break;
1322 default:
1323 break;
1324 }
1325
1326 return true;
1327 }
1328
1329 bool
1330 Converter::visit(nir_function *function)
1331 {
1332 assert(function->impl);
1333
1334 // usually the blocks will set everything up, but main is special
1335 BasicBlock *entry = new BasicBlock(prog->main);
1336 exit = new BasicBlock(prog->main);
1337 blocks[nir_start_block(function->impl)->index] = entry;
1338 prog->main->setEntry(entry);
1339 prog->main->setExit(exit);
1340
1341 setPosition(entry, true);
1342
1343 if (info->io.genUserClip > 0) {
1344 for (int c = 0; c < 4; ++c)
1345 clipVtx[c] = getScratch();
1346 }
1347
1348 switch (prog->getType()) {
1349 case Program::TYPE_TESSELLATION_CONTROL:
1350 outBase = mkOp2v(
1351 OP_SUB, TYPE_U32, getSSA(),
1352 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1353 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1354 break;
1355 case Program::TYPE_FRAGMENT: {
1356 Symbol *sv = mkSysVal(SV_POSITION, 3);
1357 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1358 fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1359 break;
1360 }
1361 default:
1362 break;
1363 }
1364
1365 nir_foreach_register(reg, &function->impl->registers) {
1366 if (reg->num_array_elems) {
1367 // TODO: packed variables would be nice, but MemoryOpt fails
1368 // replace 4 with reg->num_components
1369 uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8);
1370 regToLmemOffset[reg->index] = info->bin.tlsSpace;
1371 info->bin.tlsSpace += size;
1372 }
1373 }
1374
1375 nir_index_ssa_defs(function->impl);
1376 foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1377 if (!visit(node))
1378 return false;
1379 }
1380
1381 bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1382 setPosition(exit, true);
1383
1384 if ((prog->getType() == Program::TYPE_VERTEX ||
1385 prog->getType() == Program::TYPE_TESSELLATION_EVAL)
1386 && info->io.genUserClip > 0)
1387 handleUserClipPlanes();
1388
1389 // TODO: for non main function this needs to be a OP_RETURN
1390 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1391 return true;
1392 }
1393
1394 bool
1395 Converter::visit(nir_cf_node *node)
1396 {
1397 switch (node->type) {
1398 case nir_cf_node_block:
1399 return visit(nir_cf_node_as_block(node));
1400 case nir_cf_node_if:
1401 return visit(nir_cf_node_as_if(node));
1402 case nir_cf_node_loop:
1403 return visit(nir_cf_node_as_loop(node));
1404 default:
1405 ERROR("unknown nir_cf_node type %u\n", node->type);
1406 return false;
1407 }
1408 }
1409
1410 bool
1411 Converter::visit(nir_block *block)
1412 {
1413 if (!block->predecessors->entries && block->instr_list.is_empty())
1414 return true;
1415
1416 BasicBlock *bb = convert(block);
1417
1418 setPosition(bb, true);
1419 nir_foreach_instr(insn, block) {
1420 if (!visit(insn))
1421 return false;
1422 }
1423 return true;
1424 }
1425
1426 bool
1427 Converter::visit(nir_if *nif)
1428 {
1429 DataType sType = getSType(nif->condition, false, false);
1430 Value *src = getSrc(&nif->condition, 0);
1431
1432 nir_block *lastThen = nir_if_last_then_block(nif);
1433 nir_block *lastElse = nir_if_last_else_block(nif);
1434
1435 assert(!lastThen->successors[1]);
1436 assert(!lastElse->successors[1]);
1437
1438 BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1439 BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1440
1441 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1442 bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1443
1444 // we only insert joinats, if both nodes end up at the end of the if again.
1445 // the reason for this to not happens are breaks/continues/ret/... which
1446 // have their own handling
1447 if (lastThen->successors[0] == lastElse->successors[0])
1448 bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
1449 CC_ALWAYS, NULL);
1450
1451 mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1452
1453 foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1454 if (!visit(node))
1455 return false;
1456 }
1457 setPosition(convert(lastThen), true);
1458 if (!bb->getExit() ||
1459 !bb->getExit()->asFlow() ||
1460 bb->getExit()->asFlow()->op == OP_JOIN) {
1461 BasicBlock *tailBB = convert(lastThen->successors[0]);
1462 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1463 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1464 }
1465
1466 foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1467 if (!visit(node))
1468 return false;
1469 }
1470 setPosition(convert(lastElse), true);
1471 if (!bb->getExit() ||
1472 !bb->getExit()->asFlow() ||
1473 bb->getExit()->asFlow()->op == OP_JOIN) {
1474 BasicBlock *tailBB = convert(lastElse->successors[0]);
1475 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1476 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1477 }
1478
1479 if (lastThen->successors[0] == lastElse->successors[0]) {
1480 setPosition(convert(lastThen->successors[0]), true);
1481 mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1482 }
1483
1484 return true;
1485 }
1486
1487 bool
1488 Converter::visit(nir_loop *loop)
1489 {
1490 curLoopDepth += 1;
1491 func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1492
1493 BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1494 BasicBlock *tailBB =
1495 convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1496 bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1497
1498 mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1499 setPosition(loopBB, false);
1500 mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1501
1502 foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1503 if (!visit(node))
1504 return false;
1505 }
1506 Instruction *insn = bb->getExit();
1507 if (bb->cfg.incidentCount() != 0) {
1508 if (!insn || !insn->asFlow()) {
1509 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1510 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1511 } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
1512 tailBB->cfg.incidentCount() == 0) {
1513 // RA doesn't like having blocks around with no incident edge,
1514 // so we create a fake one to make it happy
1515 bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1516 }
1517 }
1518
1519 curLoopDepth -= 1;
1520
1521 return true;
1522 }
1523
1524 bool
1525 Converter::visit(nir_instr *insn)
1526 {
1527 // we need an insertion point for on the fly generated immediate loads
1528 immInsertPos = bb->getExit();
1529 switch (insn->type) {
1530 case nir_instr_type_alu:
1531 return visit(nir_instr_as_alu(insn));
1532 case nir_instr_type_deref:
1533 return visit(nir_instr_as_deref(insn));
1534 case nir_instr_type_intrinsic:
1535 return visit(nir_instr_as_intrinsic(insn));
1536 case nir_instr_type_jump:
1537 return visit(nir_instr_as_jump(insn));
1538 case nir_instr_type_load_const:
1539 return visit(nir_instr_as_load_const(insn));
1540 case nir_instr_type_ssa_undef:
1541 return visit(nir_instr_as_ssa_undef(insn));
1542 case nir_instr_type_tex:
1543 return visit(nir_instr_as_tex(insn));
1544 default:
1545 ERROR("unknown nir_instr type %u\n", insn->type);
1546 return false;
1547 }
1548 return true;
1549 }
1550
1551 SVSemantic
1552 Converter::convert(nir_intrinsic_op intr)
1553 {
1554 switch (intr) {
1555 case nir_intrinsic_load_base_vertex:
1556 return SV_BASEVERTEX;
1557 case nir_intrinsic_load_base_instance:
1558 return SV_BASEINSTANCE;
1559 case nir_intrinsic_load_draw_id:
1560 return SV_DRAWID;
1561 case nir_intrinsic_load_front_face:
1562 return SV_FACE;
1563 case nir_intrinsic_load_helper_invocation:
1564 return SV_THREAD_KILL;
1565 case nir_intrinsic_load_instance_id:
1566 return SV_INSTANCE_ID;
1567 case nir_intrinsic_load_invocation_id:
1568 return SV_INVOCATION_ID;
1569 case nir_intrinsic_load_local_group_size:
1570 return SV_NTID;
1571 case nir_intrinsic_load_local_invocation_id:
1572 return SV_TID;
1573 case nir_intrinsic_load_num_work_groups:
1574 return SV_NCTAID;
1575 case nir_intrinsic_load_patch_vertices_in:
1576 return SV_VERTEX_COUNT;
1577 case nir_intrinsic_load_primitive_id:
1578 return SV_PRIMITIVE_ID;
1579 case nir_intrinsic_load_sample_id:
1580 return SV_SAMPLE_INDEX;
1581 case nir_intrinsic_load_sample_mask_in:
1582 return SV_SAMPLE_MASK;
1583 case nir_intrinsic_load_sample_pos:
1584 return SV_SAMPLE_POS;
1585 case nir_intrinsic_load_subgroup_eq_mask:
1586 return SV_LANEMASK_EQ;
1587 case nir_intrinsic_load_subgroup_ge_mask:
1588 return SV_LANEMASK_GE;
1589 case nir_intrinsic_load_subgroup_gt_mask:
1590 return SV_LANEMASK_GT;
1591 case nir_intrinsic_load_subgroup_le_mask:
1592 return SV_LANEMASK_LE;
1593 case nir_intrinsic_load_subgroup_lt_mask:
1594 return SV_LANEMASK_LT;
1595 case nir_intrinsic_load_subgroup_invocation:
1596 return SV_LANEID;
1597 case nir_intrinsic_load_tess_coord:
1598 return SV_TESS_COORD;
1599 case nir_intrinsic_load_tess_level_inner:
1600 return SV_TESS_INNER;
1601 case nir_intrinsic_load_tess_level_outer:
1602 return SV_TESS_OUTER;
1603 case nir_intrinsic_load_vertex_id:
1604 return SV_VERTEX_ID;
1605 case nir_intrinsic_load_work_group_id:
1606 return SV_CTAID;
1607 default:
1608 ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1609 nir_intrinsic_infos[intr].name);
1610 assert(false);
1611 return SV_LAST;
1612 }
1613 }
1614
1615 bool
1616 Converter::visit(nir_intrinsic_instr *insn)
1617 {
1618 nir_intrinsic_op op = insn->intrinsic;
1619 const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op];
1620
1621 switch (op) {
1622 case nir_intrinsic_load_uniform: {
1623 LValues &newDefs = convert(&insn->dest);
1624 const DataType dType = getDType(insn);
1625 Value *indirect;
1626 uint32_t coffset = getIndirect(insn, 0, 0, indirect);
1627 for (uint8_t i = 0; i < insn->num_components; ++i) {
1628 loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
1629 }
1630 break;
1631 }
1632 case nir_intrinsic_store_output:
1633 case nir_intrinsic_store_per_vertex_output: {
1634 Value *indirect;
1635 DataType dType = getSType(insn->src[0], false, false);
1636 uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1637
1638 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1639 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1640 continue;
1641
1642 uint8_t offset = 0;
1643 Value *src = getSrc(&insn->src[0], i);
1644 switch (prog->getType()) {
1645 case Program::TYPE_FRAGMENT: {
1646 if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1647 // TGSI uses a different interface than NIR, TGSI stores that
1648 // value in the z component, NIR in X
1649 offset += 2;
1650 src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1651 }
1652 break;
1653 }
1654 case Program::TYPE_GEOMETRY:
1655 case Program::TYPE_VERTEX: {
1656 if (info->io.genUserClip > 0 && idx == (uint32_t)clipVertexOutput) {
1657 mkMov(clipVtx[i], src);
1658 src = clipVtx[i];
1659 }
1660 break;
1661 }
1662 default:
1663 break;
1664 }
1665
1666 storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1667 }
1668 break;
1669 }
1670 case nir_intrinsic_load_input:
1671 case nir_intrinsic_load_interpolated_input:
1672 case nir_intrinsic_load_output: {
1673 LValues &newDefs = convert(&insn->dest);
1674
1675 // FBFetch
1676 if (prog->getType() == Program::TYPE_FRAGMENT &&
1677 op == nir_intrinsic_load_output) {
1678 std::vector<Value*> defs, srcs;
1679 uint8_t mask = 0;
1680
1681 srcs.push_back(getSSA());
1682 srcs.push_back(getSSA());
1683 Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1684 Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1685 mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1686 mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1687
1688 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1689 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1690
1691 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1692 defs.push_back(newDefs[i]);
1693 mask |= 1 << i;
1694 }
1695
1696 TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
1697 texi->tex.levelZero = 1;
1698 texi->tex.mask = mask;
1699 texi->tex.useOffsets = 0;
1700 texi->tex.r = 0xffff;
1701 texi->tex.s = 0xffff;
1702
1703 info->prop.fp.readsFramebuffer = true;
1704 break;
1705 }
1706
1707 const DataType dType = getDType(insn);
1708 Value *indirect;
1709 bool input = op != nir_intrinsic_load_output;
1710 operation nvirOp;
1711 uint32_t mode = 0;
1712
1713 uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
1714 nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
1715
1716 // see load_barycentric_* handling
1717 if (prog->getType() == Program::TYPE_FRAGMENT) {
1718 mode = translateInterpMode(&vary, nvirOp);
1719 if (op == nir_intrinsic_load_interpolated_input) {
1720 ImmediateValue immMode;
1721 if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
1722 mode |= immMode.reg.data.u32;
1723 }
1724 }
1725
1726 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1727 uint32_t address = getSlotAddress(insn, idx, i);
1728 Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
1729 if (prog->getType() == Program::TYPE_FRAGMENT) {
1730 int s = 1;
1731 if (typeSizeof(dType) == 8) {
1732 Value *lo = getSSA();
1733 Value *hi = getSSA();
1734 Instruction *interp;
1735
1736 interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
1737 if (nvirOp == OP_PINTERP)
1738 interp->setSrc(s++, fp.position);
1739 if (mode & NV50_IR_INTERP_OFFSET)
1740 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1741 interp->setInterpolate(mode);
1742 interp->setIndirect(0, 0, indirect);
1743
1744 Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
1745 interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
1746 if (nvirOp == OP_PINTERP)
1747 interp->setSrc(s++, fp.position);
1748 if (mode & NV50_IR_INTERP_OFFSET)
1749 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1750 interp->setInterpolate(mode);
1751 interp->setIndirect(0, 0, indirect);
1752
1753 mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
1754 } else {
1755 Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
1756 if (nvirOp == OP_PINTERP)
1757 interp->setSrc(s++, fp.position);
1758 if (mode & NV50_IR_INTERP_OFFSET)
1759 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1760 interp->setInterpolate(mode);
1761 interp->setIndirect(0, 0, indirect);
1762 }
1763 } else {
1764 mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
1765 }
1766 }
1767 break;
1768 }
1769 case nir_intrinsic_load_kernel_input: {
1770 assert(prog->getType() == Program::TYPE_COMPUTE);
1771 assert(insn->num_components == 1);
1772
1773 LValues &newDefs = convert(&insn->dest);
1774 const DataType dType = getDType(insn);
1775 Value *indirect;
1776 uint32_t idx = getIndirect(insn, 0, 0, indirect, true);
1777
1778 mkLoad(dType, newDefs[0], mkSymbol(FILE_SHADER_INPUT, 0, dType, idx), indirect);
1779 break;
1780 }
1781 case nir_intrinsic_load_barycentric_at_offset:
1782 case nir_intrinsic_load_barycentric_at_sample:
1783 case nir_intrinsic_load_barycentric_centroid:
1784 case nir_intrinsic_load_barycentric_pixel:
1785 case nir_intrinsic_load_barycentric_sample: {
1786 LValues &newDefs = convert(&insn->dest);
1787 uint32_t mode;
1788
1789 if (op == nir_intrinsic_load_barycentric_centroid ||
1790 op == nir_intrinsic_load_barycentric_sample) {
1791 mode = NV50_IR_INTERP_CENTROID;
1792 } else if (op == nir_intrinsic_load_barycentric_at_offset) {
1793 Value *offs[2];
1794 for (uint8_t c = 0; c < 2; c++) {
1795 offs[c] = getScratch();
1796 mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
1797 mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
1798 mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
1799 mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
1800 }
1801 mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
1802
1803 mode = NV50_IR_INTERP_OFFSET;
1804 } else if (op == nir_intrinsic_load_barycentric_pixel) {
1805 mode = NV50_IR_INTERP_DEFAULT;
1806 } else if (op == nir_intrinsic_load_barycentric_at_sample) {
1807 info->prop.fp.readsSampleLocations = true;
1808 mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
1809 mode = NV50_IR_INTERP_OFFSET;
1810 } else {
1811 unreachable("all intrinsics already handled above");
1812 }
1813
1814 loadImm(newDefs[1], mode);
1815 break;
1816 }
1817 case nir_intrinsic_discard:
1818 mkOp(OP_DISCARD, TYPE_NONE, NULL);
1819 break;
1820 case nir_intrinsic_discard_if: {
1821 Value *pred = getSSA(1, FILE_PREDICATE);
1822 if (insn->num_components > 1) {
1823 ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
1824 assert(false);
1825 return false;
1826 }
1827 mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1828 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
1829 break;
1830 }
1831 case nir_intrinsic_load_base_vertex:
1832 case nir_intrinsic_load_base_instance:
1833 case nir_intrinsic_load_draw_id:
1834 case nir_intrinsic_load_front_face:
1835 case nir_intrinsic_load_helper_invocation:
1836 case nir_intrinsic_load_instance_id:
1837 case nir_intrinsic_load_invocation_id:
1838 case nir_intrinsic_load_local_group_size:
1839 case nir_intrinsic_load_local_invocation_id:
1840 case nir_intrinsic_load_num_work_groups:
1841 case nir_intrinsic_load_patch_vertices_in:
1842 case nir_intrinsic_load_primitive_id:
1843 case nir_intrinsic_load_sample_id:
1844 case nir_intrinsic_load_sample_mask_in:
1845 case nir_intrinsic_load_sample_pos:
1846 case nir_intrinsic_load_subgroup_eq_mask:
1847 case nir_intrinsic_load_subgroup_ge_mask:
1848 case nir_intrinsic_load_subgroup_gt_mask:
1849 case nir_intrinsic_load_subgroup_le_mask:
1850 case nir_intrinsic_load_subgroup_lt_mask:
1851 case nir_intrinsic_load_subgroup_invocation:
1852 case nir_intrinsic_load_tess_coord:
1853 case nir_intrinsic_load_tess_level_inner:
1854 case nir_intrinsic_load_tess_level_outer:
1855 case nir_intrinsic_load_vertex_id:
1856 case nir_intrinsic_load_work_group_id: {
1857 const DataType dType = getDType(insn);
1858 SVSemantic sv = convert(op);
1859 LValues &newDefs = convert(&insn->dest);
1860
1861 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1862 Value *def;
1863 if (typeSizeof(dType) == 8)
1864 def = getSSA();
1865 else
1866 def = newDefs[i];
1867
1868 if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
1869 loadImm(def, 0u);
1870 } else {
1871 Symbol *sym = mkSysVal(sv, i);
1872 Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
1873 if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
1874 rdsv->perPatch = 1;
1875 }
1876
1877 if (typeSizeof(dType) == 8)
1878 mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
1879 }
1880 break;
1881 }
1882 // constants
1883 case nir_intrinsic_load_subgroup_size: {
1884 LValues &newDefs = convert(&insn->dest);
1885 loadImm(newDefs[0], 32u);
1886 break;
1887 }
1888 case nir_intrinsic_vote_all:
1889 case nir_intrinsic_vote_any:
1890 case nir_intrinsic_vote_ieq: {
1891 LValues &newDefs = convert(&insn->dest);
1892 Value *pred = getScratch(1, FILE_PREDICATE);
1893 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1894 mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
1895 mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
1896 break;
1897 }
1898 case nir_intrinsic_ballot: {
1899 LValues &newDefs = convert(&insn->dest);
1900 Value *pred = getSSA(1, FILE_PREDICATE);
1901 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1902 mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;
1903 break;
1904 }
1905 case nir_intrinsic_read_first_invocation:
1906 case nir_intrinsic_read_invocation: {
1907 LValues &newDefs = convert(&insn->dest);
1908 const DataType dType = getDType(insn);
1909 Value *tmp = getScratch();
1910
1911 if (op == nir_intrinsic_read_first_invocation) {
1912 mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
1913 mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
1914 mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
1915 } else
1916 tmp = getSrc(&insn->src[1], 0);
1917
1918 for (uint8_t i = 0; i < insn->num_components; ++i) {
1919 mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
1920 ->subOp = NV50_IR_SUBOP_SHFL_IDX;
1921 }
1922 break;
1923 }
1924 case nir_intrinsic_load_per_vertex_input: {
1925 const DataType dType = getDType(insn);
1926 LValues &newDefs = convert(&insn->dest);
1927 Value *indirectVertex;
1928 Value *indirectOffset;
1929 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
1930 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
1931
1932 Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
1933 mkImm(baseVertex), indirectVertex);
1934 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1935 uint32_t address = getSlotAddress(insn, idx, i);
1936 loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
1937 indirectOffset, vtxBase, info->in[idx].patch);
1938 }
1939 break;
1940 }
1941 case nir_intrinsic_load_per_vertex_output: {
1942 const DataType dType = getDType(insn);
1943 LValues &newDefs = convert(&insn->dest);
1944 Value *indirectVertex;
1945 Value *indirectOffset;
1946 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
1947 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
1948 Value *vtxBase = NULL;
1949
1950 if (indirectVertex)
1951 vtxBase = indirectVertex;
1952 else
1953 vtxBase = loadImm(NULL, baseVertex);
1954
1955 vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase);
1956
1957 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1958 uint32_t address = getSlotAddress(insn, idx, i);
1959 loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
1960 indirectOffset, vtxBase, info->in[idx].patch);
1961 }
1962 break;
1963 }
1964 case nir_intrinsic_emit_vertex:
1965 if (info->io.genUserClip > 0)
1966 handleUserClipPlanes();
1967 // fallthrough
1968 case nir_intrinsic_end_primitive: {
1969 uint32_t idx = nir_intrinsic_stream_id(insn);
1970 mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
1971 break;
1972 }
1973 case nir_intrinsic_load_ubo: {
1974 const DataType dType = getDType(insn);
1975 LValues &newDefs = convert(&insn->dest);
1976 Value *indirectIndex;
1977 Value *indirectOffset;
1978 uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1;
1979 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
1980
1981 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1982 loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
1983 indirectOffset, indirectIndex);
1984 }
1985 break;
1986 }
1987 case nir_intrinsic_get_buffer_size: {
1988 LValues &newDefs = convert(&insn->dest);
1989 const DataType dType = getDType(insn);
1990 Value *indirectBuffer;
1991 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
1992
1993 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0);
1994 mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, indirectBuffer);
1995 break;
1996 }
1997 case nir_intrinsic_store_ssbo: {
1998 DataType sType = getSType(insn->src[0], false, false);
1999 Value *indirectBuffer;
2000 Value *indirectOffset;
2001 uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer);
2002 uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset);
2003
2004 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2005 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2006 continue;
2007 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
2008 offset + i * typeSizeof(sType));
2009 mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i))
2010 ->setIndirect(0, 1, indirectBuffer);
2011 }
2012 info->io.globalAccess |= 0x2;
2013 break;
2014 }
2015 case nir_intrinsic_load_ssbo: {
2016 const DataType dType = getDType(insn);
2017 LValues &newDefs = convert(&insn->dest);
2018 Value *indirectBuffer;
2019 Value *indirectOffset;
2020 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2021 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2022
2023 for (uint8_t i = 0u; i < insn->num_components; ++i)
2024 loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
2025 indirectOffset, indirectBuffer);
2026
2027 info->io.globalAccess |= 0x1;
2028 break;
2029 }
2030 case nir_intrinsic_shared_atomic_add:
2031 case nir_intrinsic_shared_atomic_and:
2032 case nir_intrinsic_shared_atomic_comp_swap:
2033 case nir_intrinsic_shared_atomic_exchange:
2034 case nir_intrinsic_shared_atomic_or:
2035 case nir_intrinsic_shared_atomic_imax:
2036 case nir_intrinsic_shared_atomic_imin:
2037 case nir_intrinsic_shared_atomic_umax:
2038 case nir_intrinsic_shared_atomic_umin:
2039 case nir_intrinsic_shared_atomic_xor: {
2040 const DataType dType = getDType(insn);
2041 LValues &newDefs = convert(&insn->dest);
2042 Value *indirectOffset;
2043 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2044 Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, dType, offset);
2045 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2046 if (op == nir_intrinsic_shared_atomic_comp_swap)
2047 atom->setSrc(2, getSrc(&insn->src[2], 0));
2048 atom->setIndirect(0, 0, indirectOffset);
2049 atom->subOp = getSubOp(op);
2050 break;
2051 }
2052 case nir_intrinsic_ssbo_atomic_add:
2053 case nir_intrinsic_ssbo_atomic_and:
2054 case nir_intrinsic_ssbo_atomic_comp_swap:
2055 case nir_intrinsic_ssbo_atomic_exchange:
2056 case nir_intrinsic_ssbo_atomic_or:
2057 case nir_intrinsic_ssbo_atomic_imax:
2058 case nir_intrinsic_ssbo_atomic_imin:
2059 case nir_intrinsic_ssbo_atomic_umax:
2060 case nir_intrinsic_ssbo_atomic_umin:
2061 case nir_intrinsic_ssbo_atomic_xor: {
2062 const DataType dType = getDType(insn);
2063 LValues &newDefs = convert(&insn->dest);
2064 Value *indirectBuffer;
2065 Value *indirectOffset;
2066 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2067 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2068
2069 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset);
2070 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym,
2071 getSrc(&insn->src[2], 0));
2072 if (op == nir_intrinsic_ssbo_atomic_comp_swap)
2073 atom->setSrc(2, getSrc(&insn->src[3], 0));
2074 atom->setIndirect(0, 0, indirectOffset);
2075 atom->setIndirect(0, 1, indirectBuffer);
2076 atom->subOp = getSubOp(op);
2077
2078 info->io.globalAccess |= 0x2;
2079 break;
2080 }
2081 case nir_intrinsic_global_atomic_add:
2082 case nir_intrinsic_global_atomic_and:
2083 case nir_intrinsic_global_atomic_comp_swap:
2084 case nir_intrinsic_global_atomic_exchange:
2085 case nir_intrinsic_global_atomic_or:
2086 case nir_intrinsic_global_atomic_imax:
2087 case nir_intrinsic_global_atomic_imin:
2088 case nir_intrinsic_global_atomic_umax:
2089 case nir_intrinsic_global_atomic_umin:
2090 case nir_intrinsic_global_atomic_xor: {
2091 const DataType dType = getDType(insn);
2092 LValues &newDefs = convert(&insn->dest);
2093 Value *address;
2094 uint32_t offset = getIndirect(&insn->src[0], 0, address);
2095
2096 Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, dType, offset);
2097 Instruction *atom =
2098 mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2099 atom->setIndirect(0, 0, address);
2100 atom->subOp = getSubOp(op);
2101
2102 info->io.globalAccess |= 0x2;
2103 break;
2104 }
2105 case nir_intrinsic_bindless_image_atomic_add:
2106 case nir_intrinsic_bindless_image_atomic_and:
2107 case nir_intrinsic_bindless_image_atomic_comp_swap:
2108 case nir_intrinsic_bindless_image_atomic_exchange:
2109 case nir_intrinsic_bindless_image_atomic_imax:
2110 case nir_intrinsic_bindless_image_atomic_umax:
2111 case nir_intrinsic_bindless_image_atomic_imin:
2112 case nir_intrinsic_bindless_image_atomic_umin:
2113 case nir_intrinsic_bindless_image_atomic_or:
2114 case nir_intrinsic_bindless_image_atomic_xor:
2115 case nir_intrinsic_bindless_image_load:
2116 case nir_intrinsic_bindless_image_samples:
2117 case nir_intrinsic_bindless_image_size:
2118 case nir_intrinsic_bindless_image_store: {
2119 std::vector<Value*> srcs, defs;
2120 Value *indirect = getSrc(&insn->src[0], 0);
2121 DataType ty;
2122
2123 uint32_t mask = 0;
2124 TexInstruction::Target target =
2125 convert(nir_intrinsic_image_dim(insn), !!nir_intrinsic_image_array(insn), false);
2126 unsigned int argCount = getNIRArgCount(target);
2127 uint16_t location = 0;
2128
2129 if (opInfo.has_dest) {
2130 LValues &newDefs = convert(&insn->dest);
2131 for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2132 defs.push_back(newDefs[i]);
2133 mask |= 1 << i;
2134 }
2135 }
2136
2137 switch (op) {
2138 case nir_intrinsic_bindless_image_atomic_add:
2139 case nir_intrinsic_bindless_image_atomic_and:
2140 case nir_intrinsic_bindless_image_atomic_comp_swap:
2141 case nir_intrinsic_bindless_image_atomic_exchange:
2142 case nir_intrinsic_bindless_image_atomic_imax:
2143 case nir_intrinsic_bindless_image_atomic_umax:
2144 case nir_intrinsic_bindless_image_atomic_imin:
2145 case nir_intrinsic_bindless_image_atomic_umin:
2146 case nir_intrinsic_bindless_image_atomic_or:
2147 case nir_intrinsic_bindless_image_atomic_xor:
2148 ty = getDType(insn);
2149 mask = 0x1;
2150 info->io.globalAccess |= 0x2;
2151 break;
2152 case nir_intrinsic_bindless_image_load:
2153 ty = TYPE_U32;
2154 info->io.globalAccess |= 0x1;
2155 break;
2156 case nir_intrinsic_bindless_image_store:
2157 ty = TYPE_U32;
2158 mask = 0xf;
2159 info->io.globalAccess |= 0x2;
2160 break;
2161 case nir_intrinsic_bindless_image_samples:
2162 mask = 0x8;
2163 ty = TYPE_U32;
2164 break;
2165 case nir_intrinsic_bindless_image_size:
2166 ty = TYPE_U32;
2167 break;
2168 default:
2169 unreachable("unhandled image opcode");
2170 break;
2171 }
2172
2173 // coords
2174 if (opInfo.num_srcs >= 2)
2175 for (unsigned int i = 0u; i < argCount; ++i)
2176 srcs.push_back(getSrc(&insn->src[1], i));
2177
2178 // the sampler is just another src added after coords
2179 if (opInfo.num_srcs >= 3 && target.isMS())
2180 srcs.push_back(getSrc(&insn->src[2], 0));
2181
2182 if (opInfo.num_srcs >= 4) {
2183 unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2184 for (uint8_t i = 0u; i < components; ++i)
2185 srcs.push_back(getSrc(&insn->src[3], i));
2186 }
2187
2188 if (opInfo.num_srcs >= 5)
2189 // 1 for aotmic swap
2190 for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2191 srcs.push_back(getSrc(&insn->src[4], i));
2192
2193 TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2194 texi->tex.bindless = false;
2195 texi->tex.format = nv50_ir::TexInstruction::translateImgFormat(nir_intrinsic_format(insn));
2196 texi->tex.mask = mask;
2197 texi->tex.bindless = true;
2198 texi->cache = convert(nir_intrinsic_access(insn));
2199 texi->setType(ty);
2200 texi->subOp = getSubOp(op);
2201
2202 if (indirect)
2203 texi->setIndirectR(indirect);
2204
2205 break;
2206 }
2207 case nir_intrinsic_image_deref_atomic_add:
2208 case nir_intrinsic_image_deref_atomic_and:
2209 case nir_intrinsic_image_deref_atomic_comp_swap:
2210 case nir_intrinsic_image_deref_atomic_exchange:
2211 case nir_intrinsic_image_deref_atomic_imax:
2212 case nir_intrinsic_image_deref_atomic_umax:
2213 case nir_intrinsic_image_deref_atomic_imin:
2214 case nir_intrinsic_image_deref_atomic_umin:
2215 case nir_intrinsic_image_deref_atomic_or:
2216 case nir_intrinsic_image_deref_atomic_xor:
2217 case nir_intrinsic_image_deref_load:
2218 case nir_intrinsic_image_deref_samples:
2219 case nir_intrinsic_image_deref_size:
2220 case nir_intrinsic_image_deref_store: {
2221 const nir_variable *tex;
2222 std::vector<Value*> srcs, defs;
2223 Value *indirect;
2224 DataType ty;
2225
2226 uint32_t mask = 0;
2227 nir_deref_instr *deref = nir_src_as_deref(insn->src[0]);
2228 const glsl_type *type = deref->type;
2229 TexInstruction::Target target =
2230 convert((glsl_sampler_dim)type->sampler_dimensionality,
2231 type->sampler_array, type->sampler_shadow);
2232 unsigned int argCount = getNIRArgCount(target);
2233 uint16_t location = handleDeref(deref, indirect, tex);
2234
2235 if (opInfo.has_dest) {
2236 LValues &newDefs = convert(&insn->dest);
2237 for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2238 defs.push_back(newDefs[i]);
2239 mask |= 1 << i;
2240 }
2241 }
2242
2243 switch (op) {
2244 case nir_intrinsic_image_deref_atomic_add:
2245 case nir_intrinsic_image_deref_atomic_and:
2246 case nir_intrinsic_image_deref_atomic_comp_swap:
2247 case nir_intrinsic_image_deref_atomic_exchange:
2248 case nir_intrinsic_image_deref_atomic_imax:
2249 case nir_intrinsic_image_deref_atomic_umax:
2250 case nir_intrinsic_image_deref_atomic_imin:
2251 case nir_intrinsic_image_deref_atomic_umin:
2252 case nir_intrinsic_image_deref_atomic_or:
2253 case nir_intrinsic_image_deref_atomic_xor:
2254 ty = getDType(insn);
2255 mask = 0x1;
2256 info->io.globalAccess |= 0x2;
2257 break;
2258 case nir_intrinsic_image_deref_load:
2259 ty = TYPE_U32;
2260 info->io.globalAccess |= 0x1;
2261 break;
2262 case nir_intrinsic_image_deref_store:
2263 ty = TYPE_U32;
2264 mask = 0xf;
2265 info->io.globalAccess |= 0x2;
2266 break;
2267 case nir_intrinsic_image_deref_samples:
2268 mask = 0x8;
2269 ty = TYPE_U32;
2270 break;
2271 case nir_intrinsic_image_deref_size:
2272 ty = TYPE_U32;
2273 break;
2274 default:
2275 unreachable("unhandled image opcode");
2276 break;
2277 }
2278
2279 // coords
2280 if (opInfo.num_srcs >= 2)
2281 for (unsigned int i = 0u; i < argCount; ++i)
2282 srcs.push_back(getSrc(&insn->src[1], i));
2283
2284 // the sampler is just another src added after coords
2285 if (opInfo.num_srcs >= 3 && target.isMS())
2286 srcs.push_back(getSrc(&insn->src[2], 0));
2287
2288 if (opInfo.num_srcs >= 4) {
2289 unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2290 for (uint8_t i = 0u; i < components; ++i)
2291 srcs.push_back(getSrc(&insn->src[3], i));
2292 }
2293
2294 if (opInfo.num_srcs >= 5)
2295 // 1 for aotmic swap
2296 for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2297 srcs.push_back(getSrc(&insn->src[4], i));
2298
2299 TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2300 texi->tex.bindless = false;
2301 texi->tex.format = nv50_ir::TexInstruction::translateImgFormat(tex->data.image.format);
2302 texi->tex.mask = mask;
2303 texi->cache = getCacheModeFromVar(tex);
2304 texi->setType(ty);
2305 texi->subOp = getSubOp(op);
2306
2307 if (indirect)
2308 texi->setIndirectR(indirect);
2309
2310 break;
2311 }
2312 case nir_intrinsic_store_shared: {
2313 DataType sType = getSType(insn->src[0], false, false);
2314 Value *indirectOffset;
2315 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2316
2317 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2318 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2319 continue;
2320 Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, sType, offset + i * typeSizeof(sType));
2321 mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i));
2322 }
2323 break;
2324 }
2325 case nir_intrinsic_load_shared: {
2326 const DataType dType = getDType(insn);
2327 LValues &newDefs = convert(&insn->dest);
2328 Value *indirectOffset;
2329 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2330
2331 for (uint8_t i = 0u; i < insn->num_components; ++i)
2332 loadFrom(FILE_MEMORY_SHARED, 0, dType, newDefs[i], offset, i, indirectOffset);
2333
2334 break;
2335 }
2336 case nir_intrinsic_control_barrier: {
2337 // TODO: add flag to shader_info
2338 info->numBarriers = 1;
2339 Instruction *bar = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
2340 bar->fixed = 1;
2341 bar->subOp = NV50_IR_SUBOP_BAR_SYNC;
2342 break;
2343 }
2344 case nir_intrinsic_group_memory_barrier:
2345 case nir_intrinsic_memory_barrier:
2346 case nir_intrinsic_memory_barrier_buffer:
2347 case nir_intrinsic_memory_barrier_image:
2348 case nir_intrinsic_memory_barrier_shared: {
2349 Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
2350 bar->fixed = 1;
2351 bar->subOp = getSubOp(op);
2352 break;
2353 }
2354 case nir_intrinsic_memory_barrier_tcs_patch:
2355 break;
2356 case nir_intrinsic_shader_clock: {
2357 const DataType dType = getDType(insn);
2358 LValues &newDefs = convert(&insn->dest);
2359
2360 loadImm(newDefs[0], 0u);
2361 mkOp1(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
2362 break;
2363 }
2364 case nir_intrinsic_load_global: {
2365 const DataType dType = getDType(insn);
2366 LValues &newDefs = convert(&insn->dest);
2367 Value *indirectOffset;
2368 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2369
2370 for (auto i = 0u; i < insn->num_components; ++i)
2371 loadFrom(FILE_MEMORY_GLOBAL, 0, dType, newDefs[i], offset, i, indirectOffset);
2372
2373 info->io.globalAccess |= 0x1;
2374 break;
2375 }
2376 case nir_intrinsic_store_global: {
2377 DataType sType = getSType(insn->src[0], false, false);
2378
2379 for (auto i = 0u; i < insn->num_components; ++i) {
2380 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2381 continue;
2382 if (typeSizeof(sType) == 8) {
2383 Value *split[2];
2384 mkSplit(split, 4, getSrc(&insn->src[0], i));
2385
2386 Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType));
2387 mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[0]);
2388
2389 sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType) + 4);
2390 mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[1]);
2391 } else {
2392 Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, sType, i * typeSizeof(sType));
2393 mkStore(OP_STORE, sType, sym, getSrc(&insn->src[1], 0), getSrc(&insn->src[0], i));
2394 }
2395 }
2396
2397 info->io.globalAccess |= 0x2;
2398 break;
2399 }
2400 default:
2401 ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
2402 return false;
2403 }
2404
2405 return true;
2406 }
2407
2408 bool
2409 Converter::visit(nir_jump_instr *insn)
2410 {
2411 switch (insn->type) {
2412 case nir_jump_return:
2413 // TODO: this only works in the main function
2414 mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
2415 bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
2416 break;
2417 case nir_jump_break:
2418 case nir_jump_continue: {
2419 bool isBreak = insn->type == nir_jump_break;
2420 nir_block *block = insn->instr.block;
2421 assert(!block->successors[1]);
2422 BasicBlock *target = convert(block->successors[0]);
2423 mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
2424 bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
2425 break;
2426 }
2427 default:
2428 ERROR("unknown nir_jump_type %u\n", insn->type);
2429 return false;
2430 }
2431
2432 return true;
2433 }
2434
2435 Value*
2436 Converter::convert(nir_load_const_instr *insn, uint8_t idx)
2437 {
2438 Value *val;
2439
2440 if (immInsertPos)
2441 setPosition(immInsertPos, true);
2442 else
2443 setPosition(bb, false);
2444
2445 switch (insn->def.bit_size) {
2446 case 64:
2447 val = loadImm(getSSA(8), insn->value[idx].u64);
2448 break;
2449 case 32:
2450 val = loadImm(getSSA(4), insn->value[idx].u32);
2451 break;
2452 case 16:
2453 val = loadImm(getSSA(2), insn->value[idx].u16);
2454 break;
2455 case 8:
2456 val = loadImm(getSSA(1), insn->value[idx].u8);
2457 break;
2458 default:
2459 unreachable("unhandled bit size!\n");
2460 }
2461 setPosition(bb, true);
2462 return val;
2463 }
2464
2465 bool
2466 Converter::visit(nir_load_const_instr *insn)
2467 {
2468 assert(insn->def.bit_size <= 64);
2469 immediates[insn->def.index] = insn;
2470 return true;
2471 }
2472
2473 #define DEFAULT_CHECKS \
2474 if (insn->dest.dest.ssa.num_components > 1) { \
2475 ERROR("nir_alu_instr only supported with 1 component!\n"); \
2476 return false; \
2477 } \
2478 if (insn->dest.write_mask != 1) { \
2479 ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2480 return false; \
2481 }
2482 bool
2483 Converter::visit(nir_alu_instr *insn)
2484 {
2485 const nir_op op = insn->op;
2486 const nir_op_info &info = nir_op_infos[op];
2487 DataType dType = getDType(insn);
2488 const std::vector<DataType> sTypes = getSTypes(insn);
2489
2490 Instruction *oldPos = this->bb->getExit();
2491
2492 switch (op) {
2493 case nir_op_fabs:
2494 case nir_op_iabs:
2495 case nir_op_fadd:
2496 case nir_op_iadd:
2497 case nir_op_iand:
2498 case nir_op_fceil:
2499 case nir_op_fcos:
2500 case nir_op_fddx:
2501 case nir_op_fddx_coarse:
2502 case nir_op_fddx_fine:
2503 case nir_op_fddy:
2504 case nir_op_fddy_coarse:
2505 case nir_op_fddy_fine:
2506 case nir_op_fdiv:
2507 case nir_op_idiv:
2508 case nir_op_udiv:
2509 case nir_op_fexp2:
2510 case nir_op_ffloor:
2511 case nir_op_ffma:
2512 case nir_op_flog2:
2513 case nir_op_fmax:
2514 case nir_op_imax:
2515 case nir_op_umax:
2516 case nir_op_fmin:
2517 case nir_op_imin:
2518 case nir_op_umin:
2519 case nir_op_fmod:
2520 case nir_op_imod:
2521 case nir_op_umod:
2522 case nir_op_fmul:
2523 case nir_op_imul:
2524 case nir_op_imul_high:
2525 case nir_op_umul_high:
2526 case nir_op_fneg:
2527 case nir_op_ineg:
2528 case nir_op_inot:
2529 case nir_op_ior:
2530 case nir_op_pack_64_2x32_split:
2531 case nir_op_fpow:
2532 case nir_op_frcp:
2533 case nir_op_frem:
2534 case nir_op_irem:
2535 case nir_op_frsq:
2536 case nir_op_fsat:
2537 case nir_op_ishr:
2538 case nir_op_ushr:
2539 case nir_op_fsin:
2540 case nir_op_fsqrt:
2541 case nir_op_ftrunc:
2542 case nir_op_ishl:
2543 case nir_op_ixor: {
2544 DEFAULT_CHECKS;
2545 LValues &newDefs = convert(&insn->dest);
2546 operation preOp = preOperationNeeded(op);
2547 if (preOp != OP_NOP) {
2548 assert(info.num_inputs < 2);
2549 Value *tmp = getSSA(typeSizeof(dType));
2550 Instruction *i0 = mkOp(preOp, dType, tmp);
2551 Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2552 if (info.num_inputs) {
2553 i0->setSrc(0, getSrc(&insn->src[0]));
2554 i1->setSrc(0, tmp);
2555 }
2556 i1->subOp = getSubOp(op);
2557 } else {
2558 Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2559 for (unsigned s = 0u; s < info.num_inputs; ++s) {
2560 i->setSrc(s, getSrc(&insn->src[s]));
2561 }
2562 i->subOp = getSubOp(op);
2563 }
2564 break;
2565 }
2566 case nir_op_ifind_msb:
2567 case nir_op_ufind_msb: {
2568 DEFAULT_CHECKS;
2569 LValues &newDefs = convert(&insn->dest);
2570 dType = sTypes[0];
2571 mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2572 break;
2573 }
2574 case nir_op_fround_even: {
2575 DEFAULT_CHECKS;
2576 LValues &newDefs = convert(&insn->dest);
2577 mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2578 break;
2579 }
2580 // convert instructions
2581 case nir_op_f2f32:
2582 case nir_op_f2i32:
2583 case nir_op_f2u32:
2584 case nir_op_i2f32:
2585 case nir_op_i2i32:
2586 case nir_op_u2f32:
2587 case nir_op_u2u32:
2588 case nir_op_f2f64:
2589 case nir_op_f2i64:
2590 case nir_op_f2u64:
2591 case nir_op_i2f64:
2592 case nir_op_i2i64:
2593 case nir_op_u2f64:
2594 case nir_op_u2u64: {
2595 DEFAULT_CHECKS;
2596 LValues &newDefs = convert(&insn->dest);
2597 Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2598 if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
2599 i->rnd = ROUND_Z;
2600 i->sType = sTypes[0];
2601 break;
2602 }
2603 // compare instructions
2604 case nir_op_feq32:
2605 case nir_op_ieq32:
2606 case nir_op_fge32:
2607 case nir_op_ige32:
2608 case nir_op_uge32:
2609 case nir_op_flt32:
2610 case nir_op_ilt32:
2611 case nir_op_ult32:
2612 case nir_op_fne32:
2613 case nir_op_ine32: {
2614 DEFAULT_CHECKS;
2615 LValues &newDefs = convert(&insn->dest);
2616 Instruction *i = mkCmp(getOperation(op),
2617 getCondCode(op),
2618 dType,
2619 newDefs[0],
2620 dType,
2621 getSrc(&insn->src[0]),
2622 getSrc(&insn->src[1]));
2623 if (info.num_inputs == 3)
2624 i->setSrc(2, getSrc(&insn->src[2]));
2625 i->sType = sTypes[0];
2626 break;
2627 }
2628 // those are weird ALU ops and need special handling, because
2629 // 1. they are always componend based
2630 // 2. they basically just merge multiple values into one data type
2631 case nir_op_mov:
2632 if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) {
2633 nir_reg_dest& reg = insn->dest.dest.reg;
2634 uint32_t goffset = regToLmemOffset[reg.reg->index];
2635 uint8_t comps = reg.reg->num_components;
2636 uint8_t size = reg.reg->bit_size / 8;
2637 uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2638 uint32_t aoffset = csize * reg.base_offset;
2639 Value *indirect = NULL;
2640
2641 if (reg.indirect)
2642 indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS),
2643 getSrc(reg.indirect, 0), mkImm(csize));
2644
2645 for (uint8_t i = 0u; i < comps; ++i) {
2646 if (!((1u << i) & insn->dest.write_mask))
2647 continue;
2648
2649 Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + aoffset + i * size);
2650 mkStore(OP_STORE, dType, sym, indirect, getSrc(&insn->src[0], i));
2651 }
2652 break;
2653 } else if (!insn->src[0].src.is_ssa && insn->src[0].src.reg.reg->num_array_elems) {
2654 LValues &newDefs = convert(&insn->dest);
2655 nir_reg_src& reg = insn->src[0].src.reg;
2656 uint32_t goffset = regToLmemOffset[reg.reg->index];
2657 // uint8_t comps = reg.reg->num_components;
2658 uint8_t size = reg.reg->bit_size / 8;
2659 uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2660 uint32_t aoffset = csize * reg.base_offset;
2661 Value *indirect = NULL;
2662
2663 if (reg.indirect)
2664 indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), getSrc(reg.indirect, 0), mkImm(csize));
2665
2666 for (uint8_t i = 0u; i < newDefs.size(); ++i)
2667 loadFrom(FILE_MEMORY_LOCAL, 0, dType, newDefs[i], goffset + aoffset, i, indirect);
2668
2669 break;
2670 } else {
2671 LValues &newDefs = convert(&insn->dest);
2672 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2673 mkMov(newDefs[c], getSrc(&insn->src[0], c), dType);
2674 }
2675 }
2676 break;
2677 case nir_op_vec2:
2678 case nir_op_vec3:
2679 case nir_op_vec4:
2680 case nir_op_vec8:
2681 case nir_op_vec16: {
2682 LValues &newDefs = convert(&insn->dest);
2683 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2684 mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2685 }
2686 break;
2687 }
2688 // (un)pack
2689 case nir_op_pack_64_2x32: {
2690 LValues &newDefs = convert(&insn->dest);
2691 Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2692 merge->setSrc(0, getSrc(&insn->src[0], 0));
2693 merge->setSrc(1, getSrc(&insn->src[0], 1));
2694 break;
2695 }
2696 case nir_op_pack_half_2x16_split: {
2697 LValues &newDefs = convert(&insn->dest);
2698 Value *tmpH = getSSA();
2699 Value *tmpL = getSSA();
2700
2701 mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2702 mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2703 mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2704 break;
2705 }
2706 case nir_op_unpack_half_2x16_split_x:
2707 case nir_op_unpack_half_2x16_split_y: {
2708 LValues &newDefs = convert(&insn->dest);
2709 Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2710 if (op == nir_op_unpack_half_2x16_split_y)
2711 cvt->subOp = 1;
2712 break;
2713 }
2714 case nir_op_unpack_64_2x32: {
2715 LValues &newDefs = convert(&insn->dest);
2716 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2717 break;
2718 }
2719 case nir_op_unpack_64_2x32_split_x: {
2720 LValues &newDefs = convert(&insn->dest);
2721 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
2722 break;
2723 }
2724 case nir_op_unpack_64_2x32_split_y: {
2725 LValues &newDefs = convert(&insn->dest);
2726 mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
2727 break;
2728 }
2729 // special instructions
2730 case nir_op_fsign:
2731 case nir_op_isign: {
2732 DEFAULT_CHECKS;
2733 DataType iType;
2734 if (::isFloatType(dType))
2735 iType = TYPE_F32;
2736 else
2737 iType = TYPE_S32;
2738
2739 LValues &newDefs = convert(&insn->dest);
2740 LValue *val0 = getScratch();
2741 LValue *val1 = getScratch();
2742 mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
2743 mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
2744
2745 if (dType == TYPE_F64) {
2746 mkOp2(OP_SUB, iType, val0, val0, val1);
2747 mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
2748 } else if (dType == TYPE_S64 || dType == TYPE_U64) {
2749 mkOp2(OP_SUB, iType, val0, val1, val0);
2750 mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
2751 mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
2752 } else if (::isFloatType(dType))
2753 mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
2754 else
2755 mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
2756 break;
2757 }
2758 case nir_op_fcsel:
2759 case nir_op_b32csel: {
2760 DEFAULT_CHECKS;
2761 LValues &newDefs = convert(&insn->dest);
2762 mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
2763 break;
2764 }
2765 case nir_op_ibitfield_extract:
2766 case nir_op_ubitfield_extract: {
2767 DEFAULT_CHECKS;
2768 Value *tmp = getSSA();
2769 LValues &newDefs = convert(&insn->dest);
2770 mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2771 mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
2772 break;
2773 }
2774 case nir_op_bfm: {
2775 DEFAULT_CHECKS;
2776 LValues &newDefs = convert(&insn->dest);
2777 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2778 break;
2779 }
2780 case nir_op_bitfield_insert: {
2781 DEFAULT_CHECKS;
2782 LValues &newDefs = convert(&insn->dest);
2783 LValue *temp = getSSA();
2784 mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
2785 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
2786 break;
2787 }
2788 case nir_op_bit_count: {
2789 DEFAULT_CHECKS;
2790 LValues &newDefs = convert(&insn->dest);
2791 mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
2792 break;
2793 }
2794 case nir_op_bitfield_reverse: {
2795 DEFAULT_CHECKS;
2796 LValues &newDefs = convert(&insn->dest);
2797 mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2798 break;
2799 }
2800 case nir_op_find_lsb: {
2801 DEFAULT_CHECKS;
2802 LValues &newDefs = convert(&insn->dest);
2803 Value *tmp = getSSA();
2804 mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2805 mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2806 break;
2807 }
2808 // boolean conversions
2809 case nir_op_b2f32: {
2810 DEFAULT_CHECKS;
2811 LValues &newDefs = convert(&insn->dest);
2812 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
2813 break;
2814 }
2815 case nir_op_b2f64: {
2816 DEFAULT_CHECKS;
2817 LValues &newDefs = convert(&insn->dest);
2818 Value *tmp = getSSA(4);
2819 mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
2820 mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
2821 break;
2822 }
2823 case nir_op_f2b32:
2824 case nir_op_i2b32: {
2825 DEFAULT_CHECKS;
2826 LValues &newDefs = convert(&insn->dest);
2827 Value *src1;
2828 if (typeSizeof(sTypes[0]) == 8) {
2829 src1 = loadImm(getSSA(8), 0.0);
2830 } else {
2831 src1 = zero;
2832 }
2833 CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
2834 mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
2835 break;
2836 }
2837 case nir_op_b2i32: {
2838 DEFAULT_CHECKS;
2839 LValues &newDefs = convert(&insn->dest);
2840 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
2841 break;
2842 }
2843 case nir_op_b2i64: {
2844 DEFAULT_CHECKS;
2845 LValues &newDefs = convert(&insn->dest);
2846 LValue *def = getScratch();
2847 mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
2848 mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
2849 break;
2850 }
2851 default:
2852 ERROR("unknown nir_op %s\n", info.name);
2853 return false;
2854 }
2855
2856 if (!oldPos) {
2857 oldPos = this->bb->getEntry();
2858 oldPos->precise = insn->exact;
2859 }
2860
2861 if (unlikely(!oldPos))
2862 return true;
2863
2864 while (oldPos->next) {
2865 oldPos = oldPos->next;
2866 oldPos->precise = insn->exact;
2867 }
2868 oldPos->saturate = insn->dest.saturate;
2869
2870 return true;
2871 }
2872 #undef DEFAULT_CHECKS
2873
2874 bool
2875 Converter::visit(nir_ssa_undef_instr *insn)
2876 {
2877 LValues &newDefs = convert(&insn->def);
2878 for (uint8_t i = 0u; i < insn->def.num_components; ++i) {
2879 mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
2880 }
2881 return true;
2882 }
2883
2884 #define CASE_SAMPLER(ty) \
2885 case GLSL_SAMPLER_DIM_ ## ty : \
2886 if (isArray && !isShadow) \
2887 return TEX_TARGET_ ## ty ## _ARRAY; \
2888 else if (!isArray && isShadow) \
2889 return TEX_TARGET_## ty ## _SHADOW; \
2890 else if (isArray && isShadow) \
2891 return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
2892 else \
2893 return TEX_TARGET_ ## ty
2894
2895 TexTarget
2896 Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
2897 {
2898 switch (dim) {
2899 CASE_SAMPLER(1D);
2900 CASE_SAMPLER(2D);
2901 CASE_SAMPLER(CUBE);
2902 case GLSL_SAMPLER_DIM_3D:
2903 return TEX_TARGET_3D;
2904 case GLSL_SAMPLER_DIM_MS:
2905 if (isArray)
2906 return TEX_TARGET_2D_MS_ARRAY;
2907 return TEX_TARGET_2D_MS;
2908 case GLSL_SAMPLER_DIM_RECT:
2909 if (isShadow)
2910 return TEX_TARGET_RECT_SHADOW;
2911 return TEX_TARGET_RECT;
2912 case GLSL_SAMPLER_DIM_BUF:
2913 return TEX_TARGET_BUFFER;
2914 case GLSL_SAMPLER_DIM_EXTERNAL:
2915 return TEX_TARGET_2D;
2916 default:
2917 ERROR("unknown glsl_sampler_dim %u\n", dim);
2918 assert(false);
2919 return TEX_TARGET_COUNT;
2920 }
2921 }
2922 #undef CASE_SAMPLER
2923
2924 Value*
2925 Converter::applyProjection(Value *src, Value *proj)
2926 {
2927 if (!proj)
2928 return src;
2929 return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj);
2930 }
2931
2932 unsigned int
2933 Converter::getNIRArgCount(TexInstruction::Target& target)
2934 {
2935 unsigned int result = target.getArgCount();
2936 if (target.isCube() && target.isArray())
2937 result--;
2938 if (target.isMS())
2939 result--;
2940 return result;
2941 }
2942
2943 uint16_t
2944 Converter::handleDeref(nir_deref_instr *deref, Value * &indirect, const nir_variable * &tex)
2945 {
2946 typedef std::pair<uint32_t,Value*> DerefPair;
2947 std::list<DerefPair> derefs;
2948
2949 uint16_t result = 0;
2950 while (deref->deref_type != nir_deref_type_var) {
2951 switch (deref->deref_type) {
2952 case nir_deref_type_array: {
2953 Value *indirect;
2954 uint8_t size = type_size(deref->type, true);
2955 result += size * getIndirect(&deref->arr.index, 0, indirect);
2956
2957 if (indirect) {
2958 derefs.push_front(std::make_pair(size, indirect));
2959 }
2960
2961 break;
2962 }
2963 case nir_deref_type_struct: {
2964 result += nir_deref_instr_parent(deref)->type->struct_location_offset(deref->strct.index);
2965 break;
2966 }
2967 case nir_deref_type_var:
2968 default:
2969 unreachable("nir_deref_type_var reached in handleDeref!");
2970 break;
2971 }
2972 deref = nir_deref_instr_parent(deref);
2973 }
2974
2975 indirect = NULL;
2976 for (std::list<DerefPair>::const_iterator it = derefs.begin(); it != derefs.end(); ++it) {
2977 Value *offset = mkOp2v(OP_MUL, TYPE_U32, getSSA(), loadImm(getSSA(), it->first), it->second);
2978 if (indirect)
2979 indirect = mkOp2v(OP_ADD, TYPE_U32, getSSA(), indirect, offset);
2980 else
2981 indirect = offset;
2982 }
2983
2984 tex = nir_deref_instr_get_variable(deref);
2985 assert(tex);
2986
2987 return result + tex->data.driver_location;
2988 }
2989
2990 CacheMode
2991 Converter::convert(enum gl_access_qualifier access)
2992 {
2993 switch (access) {
2994 case ACCESS_VOLATILE:
2995 return CACHE_CV;
2996 case ACCESS_COHERENT:
2997 return CACHE_CG;
2998 default:
2999 return CACHE_CA;
3000 }
3001 }
3002
3003 CacheMode
3004 Converter::getCacheModeFromVar(const nir_variable *var)
3005 {
3006 return convert(var->data.access);
3007 }
3008
3009 bool
3010 Converter::visit(nir_tex_instr *insn)
3011 {
3012 switch (insn->op) {
3013 case nir_texop_lod:
3014 case nir_texop_query_levels:
3015 case nir_texop_tex:
3016 case nir_texop_texture_samples:
3017 case nir_texop_tg4:
3018 case nir_texop_txb:
3019 case nir_texop_txd:
3020 case nir_texop_txf:
3021 case nir_texop_txf_ms:
3022 case nir_texop_txl:
3023 case nir_texop_txs: {
3024 LValues &newDefs = convert(&insn->dest);
3025 std::vector<Value*> srcs;
3026 std::vector<Value*> defs;
3027 std::vector<nir_src*> offsets;
3028 uint8_t mask = 0;
3029 bool lz = false;
3030 Value *proj = NULL;
3031 TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow);
3032 operation op = getOperation(insn->op);
3033
3034 int r, s;
3035 int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias);
3036 int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator);
3037 int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord);
3038 int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx);
3039 int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy);
3040 int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index);
3041 int lodIdx = nir_tex_instr_src_index(insn, nir_tex_src_lod);
3042 int offsetIdx = nir_tex_instr_src_index(insn, nir_tex_src_offset);
3043 int projIdx = nir_tex_instr_src_index(insn, nir_tex_src_projector);
3044 int sampOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_offset);
3045 int texOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_offset);
3046 int sampHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_handle);
3047 int texHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_handle);
3048
3049 bool bindless = sampHandleIdx != -1 || texHandleIdx != -1;
3050 assert((sampHandleIdx != -1) == (texHandleIdx != -1));
3051
3052 if (projIdx != -1)
3053 proj = mkOp1v(OP_RCP, TYPE_F32, getScratch(), getSrc(&insn->src[projIdx].src, 0));
3054
3055 srcs.resize(insn->coord_components);
3056 for (uint8_t i = 0u; i < insn->coord_components; ++i)
3057 srcs[i] = applyProjection(getSrc(&insn->src[coordsIdx].src, i), proj);
3058
3059 // sometimes we get less args than target.getArgCount, but codegen expects the latter
3060 if (insn->coord_components) {
3061 uint32_t argCount = target.getArgCount();
3062
3063 if (target.isMS())
3064 argCount -= 1;
3065
3066 for (uint32_t i = 0u; i < (argCount - insn->coord_components); ++i)
3067 srcs.push_back(getSSA());
3068 }
3069
3070 if (insn->op == nir_texop_texture_samples)
3071 srcs.push_back(zero);
3072 else if (!insn->num_srcs)
3073 srcs.push_back(loadImm(NULL, 0));
3074 if (biasIdx != -1)
3075 srcs.push_back(getSrc(&insn->src[biasIdx].src, 0));
3076 if (lodIdx != -1)
3077 srcs.push_back(getSrc(&insn->src[lodIdx].src, 0));
3078 else if (op == OP_TXF)
3079 lz = true;
3080 if (msIdx != -1)
3081 srcs.push_back(getSrc(&insn->src[msIdx].src, 0));
3082 if (offsetIdx != -1)
3083 offsets.push_back(&insn->src[offsetIdx].src);
3084 if (compIdx != -1)
3085 srcs.push_back(applyProjection(getSrc(&insn->src[compIdx].src, 0), proj));
3086 if (texOffIdx != -1) {
3087 srcs.push_back(getSrc(&insn->src[texOffIdx].src, 0));
3088 texOffIdx = srcs.size() - 1;
3089 }
3090 if (sampOffIdx != -1) {
3091 srcs.push_back(getSrc(&insn->src[sampOffIdx].src, 0));
3092 sampOffIdx = srcs.size() - 1;
3093 }
3094 if (bindless) {
3095 // currently we use the lower bits
3096 Value *split[2];
3097 Value *handle = getSrc(&insn->src[sampHandleIdx].src, 0);
3098
3099 mkSplit(split, 4, handle);
3100
3101 srcs.push_back(split[0]);
3102 texOffIdx = srcs.size() - 1;
3103 }
3104
3105 r = bindless ? 0xff : insn->texture_index;
3106 s = bindless ? 0x1f : insn->sampler_index;
3107
3108 defs.resize(newDefs.size());
3109 for (uint8_t d = 0u; d < newDefs.size(); ++d) {
3110 defs[d] = newDefs[d];
3111 mask |= 1 << d;
3112 }
3113 if (target.isMS() || (op == OP_TEX && prog->getType() != Program::TYPE_FRAGMENT))
3114 lz = true;
3115
3116 TexInstruction *texi = mkTex(op, target.getEnum(), r, s, defs, srcs);
3117 texi->tex.levelZero = lz;
3118 texi->tex.mask = mask;
3119 texi->tex.bindless = bindless;
3120
3121 if (texOffIdx != -1)
3122 texi->tex.rIndirectSrc = texOffIdx;
3123 if (sampOffIdx != -1)
3124 texi->tex.sIndirectSrc = sampOffIdx;
3125
3126 switch (insn->op) {
3127 case nir_texop_tg4:
3128 if (!target.isShadow())
3129 texi->tex.gatherComp = insn->component;
3130 break;
3131 case nir_texop_txs:
3132 texi->tex.query = TXQ_DIMS;
3133 break;
3134 case nir_texop_texture_samples:
3135 texi->tex.mask = 0x4;
3136 texi->tex.query = TXQ_TYPE;
3137 break;
3138 case nir_texop_query_levels:
3139 texi->tex.mask = 0x8;
3140 texi->tex.query = TXQ_DIMS;
3141 break;
3142 default:
3143 break;
3144 }
3145
3146 texi->tex.useOffsets = offsets.size();
3147 if (texi->tex.useOffsets) {
3148 for (uint8_t s = 0; s < texi->tex.useOffsets; ++s) {
3149 for (uint32_t c = 0u; c < 3; ++c) {
3150 uint8_t s2 = std::min(c, target.getDim() - 1);
3151 texi->offset[s][c].set(getSrc(offsets[s], s2));
3152 texi->offset[s][c].setInsn(texi);
3153 }
3154 }
3155 }
3156
3157 if (op == OP_TXG && offsetIdx == -1) {
3158 if (nir_tex_instr_has_explicit_tg4_offsets(insn)) {
3159 texi->tex.useOffsets = 4;
3160 setPosition(texi, false);
3161 for (uint8_t i = 0; i < 4; ++i) {
3162 for (uint8_t j = 0; j < 2; ++j) {
3163 texi->offset[i][j].set(loadImm(NULL, insn->tg4_offsets[i][j]));
3164 texi->offset[i][j].setInsn(texi);
3165 }
3166 }
3167 setPosition(texi, true);
3168 }
3169 }
3170
3171 if (ddxIdx != -1 && ddyIdx != -1) {
3172 for (uint8_t c = 0u; c < target.getDim() + target.isCube(); ++c) {
3173 texi->dPdx[c].set(getSrc(&insn->src[ddxIdx].src, c));
3174 texi->dPdy[c].set(getSrc(&insn->src[ddyIdx].src, c));
3175 }
3176 }
3177
3178 break;
3179 }
3180 default:
3181 ERROR("unknown nir_texop %u\n", insn->op);
3182 return false;
3183 }
3184 return true;
3185 }
3186
3187 bool
3188 Converter::visit(nir_deref_instr *deref)
3189 {
3190 // we just ignore those, because images intrinsics are the only place where
3191 // we should end up with deref sources and those have to backtrack anyway
3192 // to get the nir_variable. This code just exists to handle some special
3193 // cases.
3194 switch (deref->deref_type) {
3195 case nir_deref_type_array:
3196 case nir_deref_type_struct:
3197 case nir_deref_type_var:
3198 break;
3199 default:
3200 ERROR("unknown nir_deref_instr %u\n", deref->deref_type);
3201 return false;
3202 }
3203 return true;
3204 }
3205
3206 bool
3207 Converter::run()
3208 {
3209 bool progress;
3210
3211 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
3212 nir_print_shader(nir, stderr);
3213
3214 struct nir_lower_subgroups_options subgroup_options = {
3215 .subgroup_size = 32,
3216 .ballot_bit_size = 32,
3217 };
3218
3219 NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
3220 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
3221 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
3222 NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
3223 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3224 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
3225 NIR_PASS_V(nir, nir_lower_phis_to_scalar);
3226
3227 do {
3228 progress = false;
3229 NIR_PASS(progress, nir, nir_copy_prop);
3230 NIR_PASS(progress, nir, nir_opt_remove_phis);
3231 NIR_PASS(progress, nir, nir_opt_trivial_continues);
3232 NIR_PASS(progress, nir, nir_opt_cse);
3233 NIR_PASS(progress, nir, nir_opt_algebraic);
3234 NIR_PASS(progress, nir, nir_opt_constant_folding);
3235 NIR_PASS(progress, nir, nir_copy_prop);
3236 NIR_PASS(progress, nir, nir_opt_dce);
3237 NIR_PASS(progress, nir, nir_opt_dead_cf);
3238 } while (progress);
3239
3240 NIR_PASS_V(nir, nir_lower_bool_to_int32);
3241 NIR_PASS_V(nir, nir_lower_locals_to_regs);
3242 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
3243 NIR_PASS_V(nir, nir_convert_from_ssa, true);
3244
3245 // Garbage collect dead instructions
3246 nir_sweep(nir);
3247
3248 if (!parseNIR()) {
3249 ERROR("Couldn't prase NIR!\n");
3250 return false;
3251 }
3252
3253 if (!assignSlots()) {
3254 ERROR("Couldn't assign slots!\n");
3255 return false;
3256 }
3257
3258 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
3259 nir_print_shader(nir, stderr);
3260
3261 nir_foreach_function(function, nir) {
3262 if (!visit(function))
3263 return false;
3264 }
3265
3266 return true;
3267 }
3268
3269 } // unnamed namespace
3270
3271 namespace nv50_ir {
3272
3273 bool
3274 Program::makeFromNIR(struct nv50_ir_prog_info *info)
3275 {
3276 nir_shader *nir = (nir_shader*)info->bin.source;
3277 Converter converter(this, nir, info);
3278 bool result = converter.run();
3279 if (!result)
3280 return result;
3281 LoweringHelper lowering;
3282 lowering.run(this);
3283 tlsSize = info->bin.tlsSpace;
3284 return result;
3285 }
3286
3287 } // namespace nv50_ir