nvir/nir: use component helpers instead of insn->num_components
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_from_nir.cpp
1 /*
2 * Copyright 2017 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Karol Herbst <kherbst@redhat.com>
23 */
24
25 #include "compiler/nir/nir.h"
26
27 #include "util/u_debug.h"
28
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_from_common.h"
31 #include "codegen/nv50_ir_lowering_helper.h"
32 #include "codegen/nv50_ir_util.h"
33 #include "tgsi/tgsi_from_mesa.h"
34
35 #if __cplusplus >= 201103L
36 #include <unordered_map>
37 #else
38 #include <tr1/unordered_map>
39 #endif
40 #include <cstring>
41 #include <list>
42 #include <vector>
43
44 namespace {
45
46 #if __cplusplus >= 201103L
47 using std::hash;
48 using std::unordered_map;
49 #else
50 using std::tr1::hash;
51 using std::tr1::unordered_map;
52 #endif
53
54 using namespace nv50_ir;
55
56 int
57 type_size(const struct glsl_type *type, bool bindless)
58 {
59 return glsl_count_attribute_slots(type, false);
60 }
61
62 class Converter : public ConverterCommon
63 {
64 public:
65 Converter(Program *, nir_shader *, nv50_ir_prog_info *);
66
67 bool run();
68 private:
69 typedef std::vector<LValue*> LValues;
70 typedef unordered_map<unsigned, LValues> NirDefMap;
71 typedef unordered_map<unsigned, nir_load_const_instr*> ImmediateMap;
72 typedef unordered_map<unsigned, uint32_t> NirArrayLMemOffsets;
73 typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
74
75 CacheMode convert(enum gl_access_qualifier);
76 TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
77 LValues& convert(nir_alu_dest *);
78 BasicBlock* convert(nir_block *);
79 LValues& convert(nir_dest *);
80 SVSemantic convert(nir_intrinsic_op);
81 Value* convert(nir_load_const_instr*, uint8_t);
82 LValues& convert(nir_register *);
83 LValues& convert(nir_ssa_def *);
84
85 Value* getSrc(nir_alu_src *, uint8_t component = 0);
86 Value* getSrc(nir_register *, uint8_t);
87 Value* getSrc(nir_src *, uint8_t, bool indirect = false);
88 Value* getSrc(nir_ssa_def *, uint8_t);
89
90 // returned value is the constant part of the given source (either the
91 // nir_src or the selected source component of an intrinsic). Even though
92 // this is mostly an optimization to be able to skip indirects in a few
93 // cases, sometimes we require immediate values or set some fileds on
94 // instructions (e.g. tex) in order for codegen to consume those.
95 // If the found value has not a constant part, the Value gets returned
96 // through the Value parameter.
97 uint32_t getIndirect(nir_src *, uint8_t, Value *&);
98 // isScalar indicates that the addressing is scalar, vec4 addressing is
99 // assumed otherwise
100 uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&,
101 bool isScalar = false);
102
103 uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
104
105 void setInterpolate(nv50_ir_varying *,
106 uint8_t,
107 bool centroid,
108 unsigned semantics);
109
110 Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
111 uint8_t c, Value *indirect0 = NULL,
112 Value *indirect1 = NULL, bool patch = false);
113 void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
114 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
115 Value *indirect1 = NULL);
116
117 bool isFloatType(nir_alu_type);
118 bool isSignedType(nir_alu_type);
119 bool isResultFloat(nir_op);
120 bool isResultSigned(nir_op);
121
122 DataType getDType(nir_alu_instr *);
123 DataType getDType(nir_intrinsic_instr *);
124 DataType getDType(nir_intrinsic_instr *, bool isSigned);
125 DataType getDType(nir_op, uint8_t);
126
127 std::vector<DataType> getSTypes(nir_alu_instr *);
128 DataType getSType(nir_src &, bool isFloat, bool isSigned);
129
130 operation getOperation(nir_intrinsic_op);
131 operation getOperation(nir_op);
132 operation getOperation(nir_texop);
133 operation preOperationNeeded(nir_op);
134
135 int getSubOp(nir_intrinsic_op);
136 int getSubOp(nir_op);
137
138 CondCode getCondCode(nir_op);
139
140 bool assignSlots();
141 bool parseNIR();
142
143 bool visit(nir_alu_instr *);
144 bool visit(nir_block *);
145 bool visit(nir_cf_node *);
146 bool visit(nir_deref_instr *);
147 bool visit(nir_function *);
148 bool visit(nir_if *);
149 bool visit(nir_instr *);
150 bool visit(nir_intrinsic_instr *);
151 bool visit(nir_jump_instr *);
152 bool visit(nir_load_const_instr*);
153 bool visit(nir_loop *);
154 bool visit(nir_ssa_undef_instr *);
155 bool visit(nir_tex_instr *);
156
157 // tex stuff
158 Value* applyProjection(Value *src, Value *proj);
159 unsigned int getNIRArgCount(TexInstruction::Target&);
160
161 // image stuff
162 uint16_t handleDeref(nir_deref_instr *, Value * & indirect, const nir_variable * &);
163 CacheMode getCacheModeFromVar(const nir_variable *);
164
165 nir_shader *nir;
166
167 NirDefMap ssaDefs;
168 NirDefMap regDefs;
169 ImmediateMap immediates;
170 NirArrayLMemOffsets regToLmemOffset;
171 NirBlockMap blocks;
172 unsigned int curLoopDepth;
173
174 BasicBlock *exit;
175 Value *zero;
176 Instruction *immInsertPos;
177
178 int clipVertexOutput;
179
180 union {
181 struct {
182 Value *position;
183 } fp;
184 };
185 };
186
187 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
188 : ConverterCommon(prog, info),
189 nir(nir),
190 curLoopDepth(0),
191 clipVertexOutput(-1)
192 {
193 zero = mkImm((uint32_t)0);
194 }
195
196 BasicBlock *
197 Converter::convert(nir_block *block)
198 {
199 NirBlockMap::iterator it = blocks.find(block->index);
200 if (it != blocks.end())
201 return it->second;
202
203 BasicBlock *bb = new BasicBlock(func);
204 blocks[block->index] = bb;
205 return bb;
206 }
207
208 bool
209 Converter::isFloatType(nir_alu_type type)
210 {
211 return nir_alu_type_get_base_type(type) == nir_type_float;
212 }
213
214 bool
215 Converter::isSignedType(nir_alu_type type)
216 {
217 return nir_alu_type_get_base_type(type) == nir_type_int;
218 }
219
220 bool
221 Converter::isResultFloat(nir_op op)
222 {
223 const nir_op_info &info = nir_op_infos[op];
224 if (info.output_type != nir_type_invalid)
225 return isFloatType(info.output_type);
226
227 ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
228 assert(false);
229 return true;
230 }
231
232 bool
233 Converter::isResultSigned(nir_op op)
234 {
235 switch (op) {
236 // there is no umul and we get wrong results if we treat all muls as signed
237 case nir_op_imul:
238 case nir_op_inot:
239 return false;
240 default:
241 const nir_op_info &info = nir_op_infos[op];
242 if (info.output_type != nir_type_invalid)
243 return isSignedType(info.output_type);
244 ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
245 assert(false);
246 return true;
247 }
248 }
249
250 DataType
251 Converter::getDType(nir_alu_instr *insn)
252 {
253 if (insn->dest.dest.is_ssa)
254 return getDType(insn->op, insn->dest.dest.ssa.bit_size);
255 else
256 return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
257 }
258
259 DataType
260 Converter::getDType(nir_intrinsic_instr *insn)
261 {
262 bool isSigned;
263 switch (insn->intrinsic) {
264 case nir_intrinsic_shared_atomic_imax:
265 case nir_intrinsic_shared_atomic_imin:
266 case nir_intrinsic_ssbo_atomic_imax:
267 case nir_intrinsic_ssbo_atomic_imin:
268 isSigned = true;
269 break;
270 default:
271 isSigned = false;
272 break;
273 }
274
275 return getDType(insn, isSigned);
276 }
277
278 DataType
279 Converter::getDType(nir_intrinsic_instr *insn, bool isSigned)
280 {
281 if (insn->dest.is_ssa)
282 return typeOfSize(insn->dest.ssa.bit_size / 8, false, isSigned);
283 else
284 return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, isSigned);
285 }
286
287 DataType
288 Converter::getDType(nir_op op, uint8_t bitSize)
289 {
290 DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
291 if (ty == TYPE_NONE) {
292 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
293 assert(false);
294 }
295 return ty;
296 }
297
298 std::vector<DataType>
299 Converter::getSTypes(nir_alu_instr *insn)
300 {
301 const nir_op_info &info = nir_op_infos[insn->op];
302 std::vector<DataType> res(info.num_inputs);
303
304 for (uint8_t i = 0; i < info.num_inputs; ++i) {
305 if (info.input_types[i] != nir_type_invalid) {
306 res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
307 } else {
308 ERROR("getSType not implemented for %s idx %u\n", info.name, i);
309 assert(false);
310 res[i] = TYPE_NONE;
311 break;
312 }
313 }
314
315 return res;
316 }
317
318 DataType
319 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
320 {
321 uint8_t bitSize;
322 if (src.is_ssa)
323 bitSize = src.ssa->bit_size;
324 else
325 bitSize = src.reg.reg->bit_size;
326
327 DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
328 if (ty == TYPE_NONE) {
329 const char *str;
330 if (isFloat)
331 str = "float";
332 else if (isSigned)
333 str = "int";
334 else
335 str = "uint";
336 ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
337 assert(false);
338 }
339 return ty;
340 }
341
342 operation
343 Converter::getOperation(nir_op op)
344 {
345 switch (op) {
346 // basic ops with float and int variants
347 case nir_op_fabs:
348 case nir_op_iabs:
349 return OP_ABS;
350 case nir_op_fadd:
351 case nir_op_iadd:
352 return OP_ADD;
353 case nir_op_iand:
354 return OP_AND;
355 case nir_op_ifind_msb:
356 case nir_op_ufind_msb:
357 return OP_BFIND;
358 case nir_op_fceil:
359 return OP_CEIL;
360 case nir_op_fcos:
361 return OP_COS;
362 case nir_op_f2f32:
363 case nir_op_f2f64:
364 case nir_op_f2i32:
365 case nir_op_f2i64:
366 case nir_op_f2u32:
367 case nir_op_f2u64:
368 case nir_op_i2f32:
369 case nir_op_i2f64:
370 case nir_op_i2i32:
371 case nir_op_i2i64:
372 case nir_op_u2f32:
373 case nir_op_u2f64:
374 case nir_op_u2u32:
375 case nir_op_u2u64:
376 return OP_CVT;
377 case nir_op_fddx:
378 case nir_op_fddx_coarse:
379 case nir_op_fddx_fine:
380 return OP_DFDX;
381 case nir_op_fddy:
382 case nir_op_fddy_coarse:
383 case nir_op_fddy_fine:
384 return OP_DFDY;
385 case nir_op_fdiv:
386 case nir_op_idiv:
387 case nir_op_udiv:
388 return OP_DIV;
389 case nir_op_fexp2:
390 return OP_EX2;
391 case nir_op_ffloor:
392 return OP_FLOOR;
393 case nir_op_ffma:
394 return OP_FMA;
395 case nir_op_flog2:
396 return OP_LG2;
397 case nir_op_fmax:
398 case nir_op_imax:
399 case nir_op_umax:
400 return OP_MAX;
401 case nir_op_pack_64_2x32_split:
402 return OP_MERGE;
403 case nir_op_fmin:
404 case nir_op_imin:
405 case nir_op_umin:
406 return OP_MIN;
407 case nir_op_fmod:
408 case nir_op_imod:
409 case nir_op_umod:
410 case nir_op_frem:
411 case nir_op_irem:
412 return OP_MOD;
413 case nir_op_fmul:
414 case nir_op_imul:
415 case nir_op_imul_high:
416 case nir_op_umul_high:
417 return OP_MUL;
418 case nir_op_fneg:
419 case nir_op_ineg:
420 return OP_NEG;
421 case nir_op_inot:
422 return OP_NOT;
423 case nir_op_ior:
424 return OP_OR;
425 case nir_op_fpow:
426 return OP_POW;
427 case nir_op_frcp:
428 return OP_RCP;
429 case nir_op_frsq:
430 return OP_RSQ;
431 case nir_op_fsat:
432 return OP_SAT;
433 case nir_op_feq32:
434 case nir_op_ieq32:
435 case nir_op_fge32:
436 case nir_op_ige32:
437 case nir_op_uge32:
438 case nir_op_flt32:
439 case nir_op_ilt32:
440 case nir_op_ult32:
441 case nir_op_fne32:
442 case nir_op_ine32:
443 return OP_SET;
444 case nir_op_ishl:
445 return OP_SHL;
446 case nir_op_ishr:
447 case nir_op_ushr:
448 return OP_SHR;
449 case nir_op_fsin:
450 return OP_SIN;
451 case nir_op_fsqrt:
452 return OP_SQRT;
453 case nir_op_ftrunc:
454 return OP_TRUNC;
455 case nir_op_ixor:
456 return OP_XOR;
457 default:
458 ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
459 assert(false);
460 return OP_NOP;
461 }
462 }
463
464 operation
465 Converter::getOperation(nir_texop op)
466 {
467 switch (op) {
468 case nir_texop_tex:
469 return OP_TEX;
470 case nir_texop_lod:
471 return OP_TXLQ;
472 case nir_texop_txb:
473 return OP_TXB;
474 case nir_texop_txd:
475 return OP_TXD;
476 case nir_texop_txf:
477 case nir_texop_txf_ms:
478 return OP_TXF;
479 case nir_texop_tg4:
480 return OP_TXG;
481 case nir_texop_txl:
482 return OP_TXL;
483 case nir_texop_query_levels:
484 case nir_texop_texture_samples:
485 case nir_texop_txs:
486 return OP_TXQ;
487 default:
488 ERROR("couldn't get operation for nir_texop %u\n", op);
489 assert(false);
490 return OP_NOP;
491 }
492 }
493
494 operation
495 Converter::getOperation(nir_intrinsic_op op)
496 {
497 switch (op) {
498 case nir_intrinsic_emit_vertex:
499 return OP_EMIT;
500 case nir_intrinsic_end_primitive:
501 return OP_RESTART;
502 case nir_intrinsic_bindless_image_atomic_add:
503 case nir_intrinsic_image_atomic_add:
504 case nir_intrinsic_image_deref_atomic_add:
505 case nir_intrinsic_bindless_image_atomic_and:
506 case nir_intrinsic_image_atomic_and:
507 case nir_intrinsic_image_deref_atomic_and:
508 case nir_intrinsic_bindless_image_atomic_comp_swap:
509 case nir_intrinsic_image_atomic_comp_swap:
510 case nir_intrinsic_image_deref_atomic_comp_swap:
511 case nir_intrinsic_bindless_image_atomic_exchange:
512 case nir_intrinsic_image_atomic_exchange:
513 case nir_intrinsic_image_deref_atomic_exchange:
514 case nir_intrinsic_bindless_image_atomic_imax:
515 case nir_intrinsic_image_atomic_imax:
516 case nir_intrinsic_image_deref_atomic_imax:
517 case nir_intrinsic_bindless_image_atomic_umax:
518 case nir_intrinsic_image_atomic_umax:
519 case nir_intrinsic_image_deref_atomic_umax:
520 case nir_intrinsic_bindless_image_atomic_imin:
521 case nir_intrinsic_image_atomic_imin:
522 case nir_intrinsic_image_deref_atomic_imin:
523 case nir_intrinsic_bindless_image_atomic_umin:
524 case nir_intrinsic_image_atomic_umin:
525 case nir_intrinsic_image_deref_atomic_umin:
526 case nir_intrinsic_bindless_image_atomic_or:
527 case nir_intrinsic_image_atomic_or:
528 case nir_intrinsic_image_deref_atomic_or:
529 case nir_intrinsic_bindless_image_atomic_xor:
530 case nir_intrinsic_image_atomic_xor:
531 case nir_intrinsic_image_deref_atomic_xor:
532 return OP_SUREDP;
533 case nir_intrinsic_bindless_image_load:
534 case nir_intrinsic_image_load:
535 case nir_intrinsic_image_deref_load:
536 return OP_SULDP;
537 case nir_intrinsic_bindless_image_samples:
538 case nir_intrinsic_image_samples:
539 case nir_intrinsic_image_deref_samples:
540 case nir_intrinsic_bindless_image_size:
541 case nir_intrinsic_image_size:
542 case nir_intrinsic_image_deref_size:
543 return OP_SUQ;
544 case nir_intrinsic_bindless_image_store:
545 case nir_intrinsic_image_store:
546 case nir_intrinsic_image_deref_store:
547 return OP_SUSTP;
548 default:
549 ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
550 assert(false);
551 return OP_NOP;
552 }
553 }
554
555 operation
556 Converter::preOperationNeeded(nir_op op)
557 {
558 switch (op) {
559 case nir_op_fcos:
560 case nir_op_fsin:
561 return OP_PRESIN;
562 default:
563 return OP_NOP;
564 }
565 }
566
567 int
568 Converter::getSubOp(nir_op op)
569 {
570 switch (op) {
571 case nir_op_imul_high:
572 case nir_op_umul_high:
573 return NV50_IR_SUBOP_MUL_HIGH;
574 default:
575 return 0;
576 }
577 }
578
579 int
580 Converter::getSubOp(nir_intrinsic_op op)
581 {
582 switch (op) {
583 case nir_intrinsic_bindless_image_atomic_add:
584 case nir_intrinsic_global_atomic_add:
585 case nir_intrinsic_image_atomic_add:
586 case nir_intrinsic_image_deref_atomic_add:
587 case nir_intrinsic_shared_atomic_add:
588 case nir_intrinsic_ssbo_atomic_add:
589 return NV50_IR_SUBOP_ATOM_ADD;
590 case nir_intrinsic_bindless_image_atomic_and:
591 case nir_intrinsic_global_atomic_and:
592 case nir_intrinsic_image_atomic_and:
593 case nir_intrinsic_image_deref_atomic_and:
594 case nir_intrinsic_shared_atomic_and:
595 case nir_intrinsic_ssbo_atomic_and:
596 return NV50_IR_SUBOP_ATOM_AND;
597 case nir_intrinsic_bindless_image_atomic_comp_swap:
598 case nir_intrinsic_global_atomic_comp_swap:
599 case nir_intrinsic_image_atomic_comp_swap:
600 case nir_intrinsic_image_deref_atomic_comp_swap:
601 case nir_intrinsic_shared_atomic_comp_swap:
602 case nir_intrinsic_ssbo_atomic_comp_swap:
603 return NV50_IR_SUBOP_ATOM_CAS;
604 case nir_intrinsic_bindless_image_atomic_exchange:
605 case nir_intrinsic_global_atomic_exchange:
606 case nir_intrinsic_image_atomic_exchange:
607 case nir_intrinsic_image_deref_atomic_exchange:
608 case nir_intrinsic_shared_atomic_exchange:
609 case nir_intrinsic_ssbo_atomic_exchange:
610 return NV50_IR_SUBOP_ATOM_EXCH;
611 case nir_intrinsic_bindless_image_atomic_or:
612 case nir_intrinsic_global_atomic_or:
613 case nir_intrinsic_image_atomic_or:
614 case nir_intrinsic_image_deref_atomic_or:
615 case nir_intrinsic_shared_atomic_or:
616 case nir_intrinsic_ssbo_atomic_or:
617 return NV50_IR_SUBOP_ATOM_OR;
618 case nir_intrinsic_bindless_image_atomic_imax:
619 case nir_intrinsic_bindless_image_atomic_umax:
620 case nir_intrinsic_global_atomic_imax:
621 case nir_intrinsic_global_atomic_umax:
622 case nir_intrinsic_image_atomic_imax:
623 case nir_intrinsic_image_atomic_umax:
624 case nir_intrinsic_image_deref_atomic_imax:
625 case nir_intrinsic_image_deref_atomic_umax:
626 case nir_intrinsic_shared_atomic_imax:
627 case nir_intrinsic_shared_atomic_umax:
628 case nir_intrinsic_ssbo_atomic_imax:
629 case nir_intrinsic_ssbo_atomic_umax:
630 return NV50_IR_SUBOP_ATOM_MAX;
631 case nir_intrinsic_bindless_image_atomic_imin:
632 case nir_intrinsic_bindless_image_atomic_umin:
633 case nir_intrinsic_global_atomic_imin:
634 case nir_intrinsic_global_atomic_umin:
635 case nir_intrinsic_image_atomic_imin:
636 case nir_intrinsic_image_atomic_umin:
637 case nir_intrinsic_image_deref_atomic_imin:
638 case nir_intrinsic_image_deref_atomic_umin:
639 case nir_intrinsic_shared_atomic_imin:
640 case nir_intrinsic_shared_atomic_umin:
641 case nir_intrinsic_ssbo_atomic_imin:
642 case nir_intrinsic_ssbo_atomic_umin:
643 return NV50_IR_SUBOP_ATOM_MIN;
644 case nir_intrinsic_bindless_image_atomic_xor:
645 case nir_intrinsic_global_atomic_xor:
646 case nir_intrinsic_image_atomic_xor:
647 case nir_intrinsic_image_deref_atomic_xor:
648 case nir_intrinsic_shared_atomic_xor:
649 case nir_intrinsic_ssbo_atomic_xor:
650 return NV50_IR_SUBOP_ATOM_XOR;
651
652 case nir_intrinsic_group_memory_barrier:
653 case nir_intrinsic_memory_barrier:
654 case nir_intrinsic_memory_barrier_buffer:
655 case nir_intrinsic_memory_barrier_image:
656 return NV50_IR_SUBOP_MEMBAR(M, GL);
657 case nir_intrinsic_memory_barrier_shared:
658 return NV50_IR_SUBOP_MEMBAR(M, CTA);
659
660 case nir_intrinsic_vote_all:
661 return NV50_IR_SUBOP_VOTE_ALL;
662 case nir_intrinsic_vote_any:
663 return NV50_IR_SUBOP_VOTE_ANY;
664 case nir_intrinsic_vote_ieq:
665 return NV50_IR_SUBOP_VOTE_UNI;
666 default:
667 return 0;
668 }
669 }
670
671 CondCode
672 Converter::getCondCode(nir_op op)
673 {
674 switch (op) {
675 case nir_op_feq32:
676 case nir_op_ieq32:
677 return CC_EQ;
678 case nir_op_fge32:
679 case nir_op_ige32:
680 case nir_op_uge32:
681 return CC_GE;
682 case nir_op_flt32:
683 case nir_op_ilt32:
684 case nir_op_ult32:
685 return CC_LT;
686 case nir_op_fne32:
687 return CC_NEU;
688 case nir_op_ine32:
689 return CC_NE;
690 default:
691 ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
692 assert(false);
693 return CC_FL;
694 }
695 }
696
697 Converter::LValues&
698 Converter::convert(nir_alu_dest *dest)
699 {
700 return convert(&dest->dest);
701 }
702
703 Converter::LValues&
704 Converter::convert(nir_dest *dest)
705 {
706 if (dest->is_ssa)
707 return convert(&dest->ssa);
708 if (dest->reg.indirect) {
709 ERROR("no support for indirects.");
710 assert(false);
711 }
712 return convert(dest->reg.reg);
713 }
714
715 Converter::LValues&
716 Converter::convert(nir_register *reg)
717 {
718 NirDefMap::iterator it = regDefs.find(reg->index);
719 if (it != regDefs.end())
720 return it->second;
721
722 LValues newDef(reg->num_components);
723 for (uint8_t i = 0; i < reg->num_components; i++)
724 newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
725 return regDefs[reg->index] = newDef;
726 }
727
728 Converter::LValues&
729 Converter::convert(nir_ssa_def *def)
730 {
731 NirDefMap::iterator it = ssaDefs.find(def->index);
732 if (it != ssaDefs.end())
733 return it->second;
734
735 LValues newDef(def->num_components);
736 for (uint8_t i = 0; i < def->num_components; i++)
737 newDef[i] = getSSA(std::max(4, def->bit_size / 8));
738 return ssaDefs[def->index] = newDef;
739 }
740
741 Value*
742 Converter::getSrc(nir_alu_src *src, uint8_t component)
743 {
744 if (src->abs || src->negate) {
745 ERROR("modifiers currently not supported on nir_alu_src\n");
746 assert(false);
747 }
748 return getSrc(&src->src, src->swizzle[component]);
749 }
750
751 Value*
752 Converter::getSrc(nir_register *reg, uint8_t idx)
753 {
754 NirDefMap::iterator it = regDefs.find(reg->index);
755 if (it == regDefs.end())
756 return convert(reg)[idx];
757 return it->second[idx];
758 }
759
760 Value*
761 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
762 {
763 if (src->is_ssa)
764 return getSrc(src->ssa, idx);
765
766 if (src->reg.indirect) {
767 if (indirect)
768 return getSrc(src->reg.indirect, idx);
769 ERROR("no support for indirects.");
770 assert(false);
771 return NULL;
772 }
773
774 return getSrc(src->reg.reg, idx);
775 }
776
777 Value*
778 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
779 {
780 ImmediateMap::iterator iit = immediates.find(src->index);
781 if (iit != immediates.end())
782 return convert((*iit).second, idx);
783
784 NirDefMap::iterator it = ssaDefs.find(src->index);
785 if (it == ssaDefs.end()) {
786 ERROR("SSA value %u not found\n", src->index);
787 assert(false);
788 return NULL;
789 }
790 return it->second[idx];
791 }
792
793 uint32_t
794 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
795 {
796 nir_const_value *offset = nir_src_as_const_value(*src);
797
798 if (offset) {
799 indirect = NULL;
800 return offset[0].u32;
801 }
802
803 indirect = getSrc(src, idx, true);
804 return 0;
805 }
806
807 uint32_t
808 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect, bool isScalar)
809 {
810 int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
811 if (indirect && !isScalar)
812 indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
813 return idx;
814 }
815
816 static void
817 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
818 {
819 assert(name && index);
820
821 if (slot >= VERT_ATTRIB_MAX) {
822 ERROR("invalid varying slot %u\n", slot);
823 assert(false);
824 return;
825 }
826
827 if (slot >= VERT_ATTRIB_GENERIC0 &&
828 slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
829 *name = TGSI_SEMANTIC_GENERIC;
830 *index = slot - VERT_ATTRIB_GENERIC0;
831 return;
832 }
833
834 if (slot >= VERT_ATTRIB_TEX0 &&
835 slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
836 *name = TGSI_SEMANTIC_TEXCOORD;
837 *index = slot - VERT_ATTRIB_TEX0;
838 return;
839 }
840
841 switch (slot) {
842 case VERT_ATTRIB_COLOR0:
843 *name = TGSI_SEMANTIC_COLOR;
844 *index = 0;
845 break;
846 case VERT_ATTRIB_COLOR1:
847 *name = TGSI_SEMANTIC_COLOR;
848 *index = 1;
849 break;
850 case VERT_ATTRIB_EDGEFLAG:
851 *name = TGSI_SEMANTIC_EDGEFLAG;
852 *index = 0;
853 break;
854 case VERT_ATTRIB_FOG:
855 *name = TGSI_SEMANTIC_FOG;
856 *index = 0;
857 break;
858 case VERT_ATTRIB_NORMAL:
859 *name = TGSI_SEMANTIC_NORMAL;
860 *index = 0;
861 break;
862 case VERT_ATTRIB_POS:
863 *name = TGSI_SEMANTIC_POSITION;
864 *index = 0;
865 break;
866 case VERT_ATTRIB_POINT_SIZE:
867 *name = TGSI_SEMANTIC_PSIZE;
868 *index = 0;
869 break;
870 default:
871 ERROR("unknown vert attrib slot %u\n", slot);
872 assert(false);
873 break;
874 }
875 }
876
877 void
878 Converter::setInterpolate(nv50_ir_varying *var,
879 uint8_t mode,
880 bool centroid,
881 unsigned semantic)
882 {
883 switch (mode) {
884 case INTERP_MODE_FLAT:
885 var->flat = 1;
886 break;
887 case INTERP_MODE_NONE:
888 if (semantic == TGSI_SEMANTIC_COLOR)
889 var->sc = 1;
890 else if (semantic == TGSI_SEMANTIC_POSITION)
891 var->linear = 1;
892 break;
893 case INTERP_MODE_NOPERSPECTIVE:
894 var->linear = 1;
895 break;
896 case INTERP_MODE_SMOOTH:
897 break;
898 }
899 var->centroid = centroid;
900 }
901
902 static uint16_t
903 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
904 bool input, const nir_variable *var)
905 {
906 if (!type->is_array())
907 return type->count_attribute_slots(false);
908
909 uint16_t slots;
910 switch (stage) {
911 case Program::TYPE_GEOMETRY:
912 slots = type->uniform_locations();
913 if (input)
914 slots /= info.gs.vertices_in;
915 break;
916 case Program::TYPE_TESSELLATION_CONTROL:
917 case Program::TYPE_TESSELLATION_EVAL:
918 // remove first dimension
919 if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
920 slots = type->uniform_locations();
921 else
922 slots = type->fields.array->uniform_locations();
923 break;
924 default:
925 slots = type->count_attribute_slots(false);
926 break;
927 }
928
929 return slots;
930 }
931
932 bool Converter::assignSlots() {
933 unsigned name;
934 unsigned index;
935
936 info->io.viewportId = -1;
937 info->numInputs = 0;
938 info->numOutputs = 0;
939
940 // we have to fixup the uniform locations for arrays
941 unsigned numImages = 0;
942 nir_foreach_variable(var, &nir->uniforms) {
943 const glsl_type *type = var->type;
944 if (!type->without_array()->is_image())
945 continue;
946 var->data.driver_location = numImages;
947 numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
948 }
949
950 info->numSysVals = 0;
951 for (uint8_t i = 0; i < SYSTEM_VALUE_MAX; ++i) {
952 if (!(nir->info.system_values_read & 1ull << i))
953 continue;
954
955 info->sv[info->numSysVals].sn = tgsi_get_sysval_semantic(i);
956 info->sv[info->numSysVals].si = 0;
957 info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
958
959 switch (i) {
960 case SYSTEM_VALUE_INSTANCE_ID:
961 info->io.instanceId = info->numSysVals;
962 break;
963 case SYSTEM_VALUE_TESS_LEVEL_INNER:
964 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
965 info->sv[info->numSysVals].patch = 1;
966 break;
967 case SYSTEM_VALUE_VERTEX_ID:
968 info->io.vertexId = info->numSysVals;
969 break;
970 default:
971 break;
972 }
973
974 info->numSysVals += 1;
975 }
976
977 if (prog->getType() == Program::TYPE_COMPUTE)
978 return true;
979
980 nir_foreach_variable(var, &nir->inputs) {
981 const glsl_type *type = var->type;
982 int slot = var->data.location;
983 uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
984 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
985 : type->component_slots();
986 uint32_t frac = var->data.location_frac;
987 uint32_t vary = var->data.driver_location;
988
989 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
990 if (comp > 2)
991 slots *= 2;
992 }
993
994 assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
995
996 switch(prog->getType()) {
997 case Program::TYPE_FRAGMENT:
998 tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
999 &name, &index);
1000 for (uint16_t i = 0; i < slots; ++i) {
1001 setInterpolate(&info->in[vary + i], var->data.interpolation,
1002 var->data.centroid | var->data.sample, name);
1003 }
1004 break;
1005 case Program::TYPE_GEOMETRY:
1006 tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
1007 &name, &index);
1008 break;
1009 case Program::TYPE_TESSELLATION_CONTROL:
1010 case Program::TYPE_TESSELLATION_EVAL:
1011 tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
1012 &name, &index);
1013 if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
1014 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1015 break;
1016 case Program::TYPE_VERTEX:
1017 vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1018 switch (name) {
1019 case TGSI_SEMANTIC_EDGEFLAG:
1020 info->io.edgeFlagIn = vary;
1021 break;
1022 default:
1023 break;
1024 }
1025 break;
1026 default:
1027 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1028 return false;
1029 }
1030
1031 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1032 info->in[vary].id = vary;
1033 info->in[vary].patch = var->data.patch;
1034 info->in[vary].sn = name;
1035 info->in[vary].si = index + i;
1036 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1037 if (i & 0x1)
1038 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1039 else
1040 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1041 else
1042 info->in[vary].mask |= ((1 << comp) - 1) << frac;
1043 }
1044 info->numInputs = std::max<uint8_t>(info->numInputs, vary);
1045 }
1046
1047 nir_foreach_variable(var, &nir->outputs) {
1048 const glsl_type *type = var->type;
1049 int slot = var->data.location;
1050 uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1051 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1052 : type->component_slots();
1053 uint32_t frac = var->data.location_frac;
1054 uint32_t vary = var->data.driver_location;
1055
1056 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1057 if (comp > 2)
1058 slots *= 2;
1059 }
1060
1061 assert(vary < PIPE_MAX_SHADER_OUTPUTS);
1062
1063 switch(prog->getType()) {
1064 case Program::TYPE_FRAGMENT:
1065 tgsi_get_gl_frag_result_semantic((gl_frag_result)slot, &name, &index);
1066 switch (name) {
1067 case TGSI_SEMANTIC_COLOR:
1068 if (!var->data.fb_fetch_output)
1069 info->prop.fp.numColourResults++;
1070 info->prop.fp.separateFragData = true;
1071 // sometimes we get FRAG_RESULT_DATAX with data.index 0
1072 // sometimes we get FRAG_RESULT_DATA0 with data.index X
1073 index = index == 0 ? var->data.index : index;
1074 break;
1075 case TGSI_SEMANTIC_POSITION:
1076 info->io.fragDepth = vary;
1077 info->prop.fp.writesDepth = true;
1078 break;
1079 case TGSI_SEMANTIC_SAMPLEMASK:
1080 info->io.sampleMask = vary;
1081 break;
1082 default:
1083 break;
1084 }
1085 break;
1086 case Program::TYPE_GEOMETRY:
1087 case Program::TYPE_TESSELLATION_CONTROL:
1088 case Program::TYPE_TESSELLATION_EVAL:
1089 case Program::TYPE_VERTEX:
1090 tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
1091 &name, &index);
1092
1093 if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1094 name != TGSI_SEMANTIC_TESSOUTER)
1095 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1096
1097 switch (name) {
1098 case TGSI_SEMANTIC_CLIPDIST:
1099 info->io.genUserClip = -1;
1100 break;
1101 case TGSI_SEMANTIC_CLIPVERTEX:
1102 clipVertexOutput = vary;
1103 break;
1104 case TGSI_SEMANTIC_EDGEFLAG:
1105 info->io.edgeFlagOut = vary;
1106 break;
1107 case TGSI_SEMANTIC_POSITION:
1108 if (clipVertexOutput < 0)
1109 clipVertexOutput = vary;
1110 break;
1111 default:
1112 break;
1113 }
1114 break;
1115 default:
1116 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1117 return false;
1118 }
1119
1120 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1121 info->out[vary].id = vary;
1122 info->out[vary].patch = var->data.patch;
1123 info->out[vary].sn = name;
1124 info->out[vary].si = index + i;
1125 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1126 if (i & 0x1)
1127 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1128 else
1129 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1130 else
1131 info->out[vary].mask |= ((1 << comp) - 1) << frac;
1132
1133 if (nir->info.outputs_read & 1ull << slot)
1134 info->out[vary].oread = 1;
1135 }
1136 info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
1137 }
1138
1139 if (info->io.genUserClip > 0) {
1140 info->io.clipDistances = info->io.genUserClip;
1141
1142 const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1143
1144 for (unsigned int n = 0; n < nOut; ++n) {
1145 unsigned int i = info->numOutputs++;
1146 info->out[i].id = i;
1147 info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1148 info->out[i].si = n;
1149 info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1150 }
1151 }
1152
1153 return info->assignSlots(info) == 0;
1154 }
1155
1156 uint32_t
1157 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1158 {
1159 DataType ty;
1160 int offset = nir_intrinsic_component(insn);
1161 bool input;
1162
1163 if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1164 ty = getDType(insn);
1165 else
1166 ty = getSType(insn->src[0], false, false);
1167
1168 switch (insn->intrinsic) {
1169 case nir_intrinsic_load_input:
1170 case nir_intrinsic_load_interpolated_input:
1171 case nir_intrinsic_load_per_vertex_input:
1172 input = true;
1173 break;
1174 case nir_intrinsic_load_output:
1175 case nir_intrinsic_load_per_vertex_output:
1176 case nir_intrinsic_store_output:
1177 case nir_intrinsic_store_per_vertex_output:
1178 input = false;
1179 break;
1180 default:
1181 ERROR("unknown intrinsic in getSlotAddress %s",
1182 nir_intrinsic_infos[insn->intrinsic].name);
1183 input = false;
1184 assert(false);
1185 break;
1186 }
1187
1188 if (typeSizeof(ty) == 8) {
1189 slot *= 2;
1190 slot += offset;
1191 if (slot >= 4) {
1192 idx += 1;
1193 slot -= 4;
1194 }
1195 } else {
1196 slot += offset;
1197 }
1198
1199 assert(slot < 4);
1200 assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
1201 assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
1202
1203 const nv50_ir_varying *vary = input ? info->in : info->out;
1204 return vary[idx].slot[slot] * 4;
1205 }
1206
1207 Instruction *
1208 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1209 uint32_t base, uint8_t c, Value *indirect0,
1210 Value *indirect1, bool patch)
1211 {
1212 unsigned int tySize = typeSizeof(ty);
1213
1214 if (tySize == 8 &&
1215 (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
1216 Value *lo = getSSA();
1217 Value *hi = getSSA();
1218
1219 Instruction *loi =
1220 mkLoad(TYPE_U32, lo,
1221 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1222 indirect0);
1223 loi->setIndirect(0, 1, indirect1);
1224 loi->perPatch = patch;
1225
1226 Instruction *hii =
1227 mkLoad(TYPE_U32, hi,
1228 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1229 indirect0);
1230 hii->setIndirect(0, 1, indirect1);
1231 hii->perPatch = patch;
1232
1233 return mkOp2(OP_MERGE, ty, def, lo, hi);
1234 } else {
1235 Instruction *ld =
1236 mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1237 ld->setIndirect(0, 1, indirect1);
1238 ld->perPatch = patch;
1239 return ld;
1240 }
1241 }
1242
1243 void
1244 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1245 DataType ty, Value *src, uint8_t idx, uint8_t c,
1246 Value *indirect0, Value *indirect1)
1247 {
1248 uint8_t size = typeSizeof(ty);
1249 uint32_t address = getSlotAddress(insn, idx, c);
1250
1251 if (size == 8 && indirect0) {
1252 Value *split[2];
1253 mkSplit(split, 4, src);
1254
1255 if (op == OP_EXPORT) {
1256 split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1257 split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1258 }
1259
1260 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1261 split[0])->perPatch = info->out[idx].patch;
1262 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1263 split[1])->perPatch = info->out[idx].patch;
1264 } else {
1265 if (op == OP_EXPORT)
1266 src = mkMov(getSSA(size), src, ty)->getDef(0);
1267 mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1268 src)->perPatch = info->out[idx].patch;
1269 }
1270 }
1271
1272 bool
1273 Converter::parseNIR()
1274 {
1275 info->bin.tlsSpace = 0;
1276 info->io.clipDistances = nir->info.clip_distance_array_size;
1277 info->io.cullDistances = nir->info.cull_distance_array_size;
1278
1279 switch(prog->getType()) {
1280 case Program::TYPE_COMPUTE:
1281 info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
1282 info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
1283 info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
1284 info->bin.smemSize = nir->info.cs.shared_size;
1285 break;
1286 case Program::TYPE_FRAGMENT:
1287 info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1288 info->prop.fp.persampleInvocation =
1289 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
1290 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1291 info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1292 info->prop.fp.readsSampleLocations =
1293 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1294 info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
1295 info->prop.fp.usesSampleMaskIn =
1296 !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
1297 break;
1298 case Program::TYPE_GEOMETRY:
1299 info->prop.gp.inputPrim = nir->info.gs.input_primitive;
1300 info->prop.gp.instanceCount = nir->info.gs.invocations;
1301 info->prop.gp.maxVertices = nir->info.gs.vertices_out;
1302 info->prop.gp.outputPrim = nir->info.gs.output_primitive;
1303 break;
1304 case Program::TYPE_TESSELLATION_CONTROL:
1305 case Program::TYPE_TESSELLATION_EVAL:
1306 if (nir->info.tess.primitive_mode == GL_ISOLINES)
1307 info->prop.tp.domain = GL_LINES;
1308 else
1309 info->prop.tp.domain = nir->info.tess.primitive_mode;
1310 info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1311 info->prop.tp.outputPrim =
1312 nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1313 info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1314 info->prop.tp.winding = !nir->info.tess.ccw;
1315 break;
1316 case Program::TYPE_VERTEX:
1317 info->prop.vp.usesDrawParameters =
1318 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
1319 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
1320 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
1321 break;
1322 default:
1323 break;
1324 }
1325
1326 return true;
1327 }
1328
1329 bool
1330 Converter::visit(nir_function *function)
1331 {
1332 assert(function->impl);
1333
1334 // usually the blocks will set everything up, but main is special
1335 BasicBlock *entry = new BasicBlock(prog->main);
1336 exit = new BasicBlock(prog->main);
1337 blocks[nir_start_block(function->impl)->index] = entry;
1338 prog->main->setEntry(entry);
1339 prog->main->setExit(exit);
1340
1341 setPosition(entry, true);
1342
1343 if (info->io.genUserClip > 0) {
1344 for (int c = 0; c < 4; ++c)
1345 clipVtx[c] = getScratch();
1346 }
1347
1348 switch (prog->getType()) {
1349 case Program::TYPE_TESSELLATION_CONTROL:
1350 outBase = mkOp2v(
1351 OP_SUB, TYPE_U32, getSSA(),
1352 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1353 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1354 break;
1355 case Program::TYPE_FRAGMENT: {
1356 Symbol *sv = mkSysVal(SV_POSITION, 3);
1357 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1358 fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1359 break;
1360 }
1361 default:
1362 break;
1363 }
1364
1365 nir_foreach_register(reg, &function->impl->registers) {
1366 if (reg->num_array_elems) {
1367 // TODO: packed variables would be nice, but MemoryOpt fails
1368 // replace 4 with reg->num_components
1369 uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8);
1370 regToLmemOffset[reg->index] = info->bin.tlsSpace;
1371 info->bin.tlsSpace += size;
1372 }
1373 }
1374
1375 nir_index_ssa_defs(function->impl);
1376 foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1377 if (!visit(node))
1378 return false;
1379 }
1380
1381 bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1382 setPosition(exit, true);
1383
1384 if ((prog->getType() == Program::TYPE_VERTEX ||
1385 prog->getType() == Program::TYPE_TESSELLATION_EVAL)
1386 && info->io.genUserClip > 0)
1387 handleUserClipPlanes();
1388
1389 // TODO: for non main function this needs to be a OP_RETURN
1390 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1391 return true;
1392 }
1393
1394 bool
1395 Converter::visit(nir_cf_node *node)
1396 {
1397 switch (node->type) {
1398 case nir_cf_node_block:
1399 return visit(nir_cf_node_as_block(node));
1400 case nir_cf_node_if:
1401 return visit(nir_cf_node_as_if(node));
1402 case nir_cf_node_loop:
1403 return visit(nir_cf_node_as_loop(node));
1404 default:
1405 ERROR("unknown nir_cf_node type %u\n", node->type);
1406 return false;
1407 }
1408 }
1409
1410 bool
1411 Converter::visit(nir_block *block)
1412 {
1413 if (!block->predecessors->entries && block->instr_list.is_empty())
1414 return true;
1415
1416 BasicBlock *bb = convert(block);
1417
1418 setPosition(bb, true);
1419 nir_foreach_instr(insn, block) {
1420 if (!visit(insn))
1421 return false;
1422 }
1423 return true;
1424 }
1425
1426 bool
1427 Converter::visit(nir_if *nif)
1428 {
1429 DataType sType = getSType(nif->condition, false, false);
1430 Value *src = getSrc(&nif->condition, 0);
1431
1432 nir_block *lastThen = nir_if_last_then_block(nif);
1433 nir_block *lastElse = nir_if_last_else_block(nif);
1434
1435 assert(!lastThen->successors[1]);
1436 assert(!lastElse->successors[1]);
1437
1438 BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1439 BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1440
1441 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1442 bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1443
1444 // we only insert joinats, if both nodes end up at the end of the if again.
1445 // the reason for this to not happens are breaks/continues/ret/... which
1446 // have their own handling
1447 if (lastThen->successors[0] == lastElse->successors[0])
1448 bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
1449 CC_ALWAYS, NULL);
1450
1451 mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1452
1453 foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1454 if (!visit(node))
1455 return false;
1456 }
1457 setPosition(convert(lastThen), true);
1458 if (!bb->getExit() ||
1459 !bb->getExit()->asFlow() ||
1460 bb->getExit()->asFlow()->op == OP_JOIN) {
1461 BasicBlock *tailBB = convert(lastThen->successors[0]);
1462 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1463 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1464 }
1465
1466 foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1467 if (!visit(node))
1468 return false;
1469 }
1470 setPosition(convert(lastElse), true);
1471 if (!bb->getExit() ||
1472 !bb->getExit()->asFlow() ||
1473 bb->getExit()->asFlow()->op == OP_JOIN) {
1474 BasicBlock *tailBB = convert(lastElse->successors[0]);
1475 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1476 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1477 }
1478
1479 if (lastThen->successors[0] == lastElse->successors[0]) {
1480 setPosition(convert(lastThen->successors[0]), true);
1481 mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1482 }
1483
1484 return true;
1485 }
1486
1487 bool
1488 Converter::visit(nir_loop *loop)
1489 {
1490 curLoopDepth += 1;
1491 func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1492
1493 BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1494 BasicBlock *tailBB =
1495 convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1496 bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1497
1498 mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1499 setPosition(loopBB, false);
1500 mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1501
1502 foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1503 if (!visit(node))
1504 return false;
1505 }
1506 Instruction *insn = bb->getExit();
1507 if (bb->cfg.incidentCount() != 0) {
1508 if (!insn || !insn->asFlow()) {
1509 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1510 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1511 } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
1512 tailBB->cfg.incidentCount() == 0) {
1513 // RA doesn't like having blocks around with no incident edge,
1514 // so we create a fake one to make it happy
1515 bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1516 }
1517 }
1518
1519 curLoopDepth -= 1;
1520
1521 return true;
1522 }
1523
1524 bool
1525 Converter::visit(nir_instr *insn)
1526 {
1527 // we need an insertion point for on the fly generated immediate loads
1528 immInsertPos = bb->getExit();
1529 switch (insn->type) {
1530 case nir_instr_type_alu:
1531 return visit(nir_instr_as_alu(insn));
1532 case nir_instr_type_deref:
1533 return visit(nir_instr_as_deref(insn));
1534 case nir_instr_type_intrinsic:
1535 return visit(nir_instr_as_intrinsic(insn));
1536 case nir_instr_type_jump:
1537 return visit(nir_instr_as_jump(insn));
1538 case nir_instr_type_load_const:
1539 return visit(nir_instr_as_load_const(insn));
1540 case nir_instr_type_ssa_undef:
1541 return visit(nir_instr_as_ssa_undef(insn));
1542 case nir_instr_type_tex:
1543 return visit(nir_instr_as_tex(insn));
1544 default:
1545 ERROR("unknown nir_instr type %u\n", insn->type);
1546 return false;
1547 }
1548 return true;
1549 }
1550
1551 SVSemantic
1552 Converter::convert(nir_intrinsic_op intr)
1553 {
1554 switch (intr) {
1555 case nir_intrinsic_load_base_vertex:
1556 return SV_BASEVERTEX;
1557 case nir_intrinsic_load_base_instance:
1558 return SV_BASEINSTANCE;
1559 case nir_intrinsic_load_draw_id:
1560 return SV_DRAWID;
1561 case nir_intrinsic_load_front_face:
1562 return SV_FACE;
1563 case nir_intrinsic_load_helper_invocation:
1564 return SV_THREAD_KILL;
1565 case nir_intrinsic_load_instance_id:
1566 return SV_INSTANCE_ID;
1567 case nir_intrinsic_load_invocation_id:
1568 return SV_INVOCATION_ID;
1569 case nir_intrinsic_load_local_group_size:
1570 return SV_NTID;
1571 case nir_intrinsic_load_local_invocation_id:
1572 return SV_TID;
1573 case nir_intrinsic_load_num_work_groups:
1574 return SV_NCTAID;
1575 case nir_intrinsic_load_patch_vertices_in:
1576 return SV_VERTEX_COUNT;
1577 case nir_intrinsic_load_primitive_id:
1578 return SV_PRIMITIVE_ID;
1579 case nir_intrinsic_load_sample_id:
1580 return SV_SAMPLE_INDEX;
1581 case nir_intrinsic_load_sample_mask_in:
1582 return SV_SAMPLE_MASK;
1583 case nir_intrinsic_load_sample_pos:
1584 return SV_SAMPLE_POS;
1585 case nir_intrinsic_load_subgroup_eq_mask:
1586 return SV_LANEMASK_EQ;
1587 case nir_intrinsic_load_subgroup_ge_mask:
1588 return SV_LANEMASK_GE;
1589 case nir_intrinsic_load_subgroup_gt_mask:
1590 return SV_LANEMASK_GT;
1591 case nir_intrinsic_load_subgroup_le_mask:
1592 return SV_LANEMASK_LE;
1593 case nir_intrinsic_load_subgroup_lt_mask:
1594 return SV_LANEMASK_LT;
1595 case nir_intrinsic_load_subgroup_invocation:
1596 return SV_LANEID;
1597 case nir_intrinsic_load_tess_coord:
1598 return SV_TESS_COORD;
1599 case nir_intrinsic_load_tess_level_inner:
1600 return SV_TESS_INNER;
1601 case nir_intrinsic_load_tess_level_outer:
1602 return SV_TESS_OUTER;
1603 case nir_intrinsic_load_vertex_id:
1604 return SV_VERTEX_ID;
1605 case nir_intrinsic_load_work_group_id:
1606 return SV_CTAID;
1607 default:
1608 ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1609 nir_intrinsic_infos[intr].name);
1610 assert(false);
1611 return SV_LAST;
1612 }
1613 }
1614
1615 bool
1616 Converter::visit(nir_intrinsic_instr *insn)
1617 {
1618 nir_intrinsic_op op = insn->intrinsic;
1619 const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op];
1620 unsigned dest_components = nir_intrinsic_dest_components(insn);
1621
1622 switch (op) {
1623 case nir_intrinsic_load_uniform: {
1624 LValues &newDefs = convert(&insn->dest);
1625 const DataType dType = getDType(insn);
1626 Value *indirect;
1627 uint32_t coffset = getIndirect(insn, 0, 0, indirect);
1628 for (uint8_t i = 0; i < dest_components; ++i) {
1629 loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
1630 }
1631 break;
1632 }
1633 case nir_intrinsic_store_output:
1634 case nir_intrinsic_store_per_vertex_output: {
1635 Value *indirect;
1636 DataType dType = getSType(insn->src[0], false, false);
1637 uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1638
1639 for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
1640 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1641 continue;
1642
1643 uint8_t offset = 0;
1644 Value *src = getSrc(&insn->src[0], i);
1645 switch (prog->getType()) {
1646 case Program::TYPE_FRAGMENT: {
1647 if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1648 // TGSI uses a different interface than NIR, TGSI stores that
1649 // value in the z component, NIR in X
1650 offset += 2;
1651 src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1652 }
1653 break;
1654 }
1655 case Program::TYPE_GEOMETRY:
1656 case Program::TYPE_VERTEX: {
1657 if (info->io.genUserClip > 0 && idx == (uint32_t)clipVertexOutput) {
1658 mkMov(clipVtx[i], src);
1659 src = clipVtx[i];
1660 }
1661 break;
1662 }
1663 default:
1664 break;
1665 }
1666
1667 storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1668 }
1669 break;
1670 }
1671 case nir_intrinsic_load_input:
1672 case nir_intrinsic_load_interpolated_input:
1673 case nir_intrinsic_load_output: {
1674 LValues &newDefs = convert(&insn->dest);
1675
1676 // FBFetch
1677 if (prog->getType() == Program::TYPE_FRAGMENT &&
1678 op == nir_intrinsic_load_output) {
1679 std::vector<Value*> defs, srcs;
1680 uint8_t mask = 0;
1681
1682 srcs.push_back(getSSA());
1683 srcs.push_back(getSSA());
1684 Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1685 Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1686 mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1687 mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1688
1689 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1690 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1691
1692 for (uint8_t i = 0u; i < dest_components; ++i) {
1693 defs.push_back(newDefs[i]);
1694 mask |= 1 << i;
1695 }
1696
1697 TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
1698 texi->tex.levelZero = 1;
1699 texi->tex.mask = mask;
1700 texi->tex.useOffsets = 0;
1701 texi->tex.r = 0xffff;
1702 texi->tex.s = 0xffff;
1703
1704 info->prop.fp.readsFramebuffer = true;
1705 break;
1706 }
1707
1708 const DataType dType = getDType(insn);
1709 Value *indirect;
1710 bool input = op != nir_intrinsic_load_output;
1711 operation nvirOp;
1712 uint32_t mode = 0;
1713
1714 uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
1715 nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
1716
1717 // see load_barycentric_* handling
1718 if (prog->getType() == Program::TYPE_FRAGMENT) {
1719 mode = translateInterpMode(&vary, nvirOp);
1720 if (op == nir_intrinsic_load_interpolated_input) {
1721 ImmediateValue immMode;
1722 if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
1723 mode |= immMode.reg.data.u32;
1724 }
1725 }
1726
1727 for (uint8_t i = 0u; i < dest_components; ++i) {
1728 uint32_t address = getSlotAddress(insn, idx, i);
1729 Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
1730 if (prog->getType() == Program::TYPE_FRAGMENT) {
1731 int s = 1;
1732 if (typeSizeof(dType) == 8) {
1733 Value *lo = getSSA();
1734 Value *hi = getSSA();
1735 Instruction *interp;
1736
1737 interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
1738 if (nvirOp == OP_PINTERP)
1739 interp->setSrc(s++, fp.position);
1740 if (mode & NV50_IR_INTERP_OFFSET)
1741 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1742 interp->setInterpolate(mode);
1743 interp->setIndirect(0, 0, indirect);
1744
1745 Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
1746 interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
1747 if (nvirOp == OP_PINTERP)
1748 interp->setSrc(s++, fp.position);
1749 if (mode & NV50_IR_INTERP_OFFSET)
1750 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1751 interp->setInterpolate(mode);
1752 interp->setIndirect(0, 0, indirect);
1753
1754 mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
1755 } else {
1756 Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
1757 if (nvirOp == OP_PINTERP)
1758 interp->setSrc(s++, fp.position);
1759 if (mode & NV50_IR_INTERP_OFFSET)
1760 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1761 interp->setInterpolate(mode);
1762 interp->setIndirect(0, 0, indirect);
1763 }
1764 } else {
1765 mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
1766 }
1767 }
1768 break;
1769 }
1770 case nir_intrinsic_load_kernel_input: {
1771 assert(prog->getType() == Program::TYPE_COMPUTE);
1772 assert(insn->num_components == 1);
1773
1774 LValues &newDefs = convert(&insn->dest);
1775 const DataType dType = getDType(insn);
1776 Value *indirect;
1777 uint32_t idx = getIndirect(insn, 0, 0, indirect, true);
1778
1779 mkLoad(dType, newDefs[0], mkSymbol(FILE_SHADER_INPUT, 0, dType, idx), indirect);
1780 break;
1781 }
1782 case nir_intrinsic_load_barycentric_at_offset:
1783 case nir_intrinsic_load_barycentric_at_sample:
1784 case nir_intrinsic_load_barycentric_centroid:
1785 case nir_intrinsic_load_barycentric_pixel:
1786 case nir_intrinsic_load_barycentric_sample: {
1787 LValues &newDefs = convert(&insn->dest);
1788 uint32_t mode;
1789
1790 if (op == nir_intrinsic_load_barycentric_centroid ||
1791 op == nir_intrinsic_load_barycentric_sample) {
1792 mode = NV50_IR_INTERP_CENTROID;
1793 } else if (op == nir_intrinsic_load_barycentric_at_offset) {
1794 Value *offs[2];
1795 for (uint8_t c = 0; c < 2; c++) {
1796 offs[c] = getScratch();
1797 mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
1798 mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
1799 mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
1800 mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
1801 }
1802 mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
1803
1804 mode = NV50_IR_INTERP_OFFSET;
1805 } else if (op == nir_intrinsic_load_barycentric_pixel) {
1806 mode = NV50_IR_INTERP_DEFAULT;
1807 } else if (op == nir_intrinsic_load_barycentric_at_sample) {
1808 info->prop.fp.readsSampleLocations = true;
1809 mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
1810 mode = NV50_IR_INTERP_OFFSET;
1811 } else {
1812 unreachable("all intrinsics already handled above");
1813 }
1814
1815 loadImm(newDefs[1], mode);
1816 break;
1817 }
1818 case nir_intrinsic_discard:
1819 mkOp(OP_DISCARD, TYPE_NONE, NULL);
1820 break;
1821 case nir_intrinsic_discard_if: {
1822 Value *pred = getSSA(1, FILE_PREDICATE);
1823 if (insn->num_components > 1) {
1824 ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
1825 assert(false);
1826 return false;
1827 }
1828 mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1829 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
1830 break;
1831 }
1832 case nir_intrinsic_load_base_vertex:
1833 case nir_intrinsic_load_base_instance:
1834 case nir_intrinsic_load_draw_id:
1835 case nir_intrinsic_load_front_face:
1836 case nir_intrinsic_load_helper_invocation:
1837 case nir_intrinsic_load_instance_id:
1838 case nir_intrinsic_load_invocation_id:
1839 case nir_intrinsic_load_local_group_size:
1840 case nir_intrinsic_load_local_invocation_id:
1841 case nir_intrinsic_load_num_work_groups:
1842 case nir_intrinsic_load_patch_vertices_in:
1843 case nir_intrinsic_load_primitive_id:
1844 case nir_intrinsic_load_sample_id:
1845 case nir_intrinsic_load_sample_mask_in:
1846 case nir_intrinsic_load_sample_pos:
1847 case nir_intrinsic_load_subgroup_eq_mask:
1848 case nir_intrinsic_load_subgroup_ge_mask:
1849 case nir_intrinsic_load_subgroup_gt_mask:
1850 case nir_intrinsic_load_subgroup_le_mask:
1851 case nir_intrinsic_load_subgroup_lt_mask:
1852 case nir_intrinsic_load_subgroup_invocation:
1853 case nir_intrinsic_load_tess_coord:
1854 case nir_intrinsic_load_tess_level_inner:
1855 case nir_intrinsic_load_tess_level_outer:
1856 case nir_intrinsic_load_vertex_id:
1857 case nir_intrinsic_load_work_group_id: {
1858 const DataType dType = getDType(insn);
1859 SVSemantic sv = convert(op);
1860 LValues &newDefs = convert(&insn->dest);
1861
1862 for (uint8_t i = 0u; i < nir_intrinsic_dest_components(insn); ++i) {
1863 Value *def;
1864 if (typeSizeof(dType) == 8)
1865 def = getSSA();
1866 else
1867 def = newDefs[i];
1868
1869 if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
1870 loadImm(def, 0u);
1871 } else {
1872 Symbol *sym = mkSysVal(sv, i);
1873 Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
1874 if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
1875 rdsv->perPatch = 1;
1876 }
1877
1878 if (typeSizeof(dType) == 8)
1879 mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
1880 }
1881 break;
1882 }
1883 // constants
1884 case nir_intrinsic_load_subgroup_size: {
1885 LValues &newDefs = convert(&insn->dest);
1886 loadImm(newDefs[0], 32u);
1887 break;
1888 }
1889 case nir_intrinsic_vote_all:
1890 case nir_intrinsic_vote_any:
1891 case nir_intrinsic_vote_ieq: {
1892 LValues &newDefs = convert(&insn->dest);
1893 Value *pred = getScratch(1, FILE_PREDICATE);
1894 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1895 mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
1896 mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
1897 break;
1898 }
1899 case nir_intrinsic_ballot: {
1900 LValues &newDefs = convert(&insn->dest);
1901 Value *pred = getSSA(1, FILE_PREDICATE);
1902 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1903 mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;
1904 break;
1905 }
1906 case nir_intrinsic_read_first_invocation:
1907 case nir_intrinsic_read_invocation: {
1908 LValues &newDefs = convert(&insn->dest);
1909 const DataType dType = getDType(insn);
1910 Value *tmp = getScratch();
1911
1912 if (op == nir_intrinsic_read_first_invocation) {
1913 mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
1914 mkOp1(OP_BREV, TYPE_U32, tmp, tmp);
1915 mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
1916 } else
1917 tmp = getSrc(&insn->src[1], 0);
1918
1919 for (uint8_t i = 0; i < dest_components; ++i) {
1920 mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
1921 ->subOp = NV50_IR_SUBOP_SHFL_IDX;
1922 }
1923 break;
1924 }
1925 case nir_intrinsic_load_per_vertex_input: {
1926 const DataType dType = getDType(insn);
1927 LValues &newDefs = convert(&insn->dest);
1928 Value *indirectVertex;
1929 Value *indirectOffset;
1930 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
1931 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
1932
1933 Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
1934 mkImm(baseVertex), indirectVertex);
1935 for (uint8_t i = 0u; i < dest_components; ++i) {
1936 uint32_t address = getSlotAddress(insn, idx, i);
1937 loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
1938 indirectOffset, vtxBase, info->in[idx].patch);
1939 }
1940 break;
1941 }
1942 case nir_intrinsic_load_per_vertex_output: {
1943 const DataType dType = getDType(insn);
1944 LValues &newDefs = convert(&insn->dest);
1945 Value *indirectVertex;
1946 Value *indirectOffset;
1947 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
1948 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
1949 Value *vtxBase = NULL;
1950
1951 if (indirectVertex)
1952 vtxBase = indirectVertex;
1953 else
1954 vtxBase = loadImm(NULL, baseVertex);
1955
1956 vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase);
1957
1958 for (uint8_t i = 0u; i < dest_components; ++i) {
1959 uint32_t address = getSlotAddress(insn, idx, i);
1960 loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
1961 indirectOffset, vtxBase, info->in[idx].patch);
1962 }
1963 break;
1964 }
1965 case nir_intrinsic_emit_vertex:
1966 if (info->io.genUserClip > 0)
1967 handleUserClipPlanes();
1968 // fallthrough
1969 case nir_intrinsic_end_primitive: {
1970 uint32_t idx = nir_intrinsic_stream_id(insn);
1971 mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
1972 break;
1973 }
1974 case nir_intrinsic_load_ubo: {
1975 const DataType dType = getDType(insn);
1976 LValues &newDefs = convert(&insn->dest);
1977 Value *indirectIndex;
1978 Value *indirectOffset;
1979 uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1;
1980 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
1981
1982 for (uint8_t i = 0u; i < dest_components; ++i) {
1983 loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
1984 indirectOffset, indirectIndex);
1985 }
1986 break;
1987 }
1988 case nir_intrinsic_get_buffer_size: {
1989 LValues &newDefs = convert(&insn->dest);
1990 const DataType dType = getDType(insn);
1991 Value *indirectBuffer;
1992 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
1993
1994 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0);
1995 mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, indirectBuffer);
1996 break;
1997 }
1998 case nir_intrinsic_store_ssbo: {
1999 DataType sType = getSType(insn->src[0], false, false);
2000 Value *indirectBuffer;
2001 Value *indirectOffset;
2002 uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer);
2003 uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset);
2004
2005 for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
2006 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2007 continue;
2008 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
2009 offset + i * typeSizeof(sType));
2010 mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i))
2011 ->setIndirect(0, 1, indirectBuffer);
2012 }
2013 info->io.globalAccess |= 0x2;
2014 break;
2015 }
2016 case nir_intrinsic_load_ssbo: {
2017 const DataType dType = getDType(insn);
2018 LValues &newDefs = convert(&insn->dest);
2019 Value *indirectBuffer;
2020 Value *indirectOffset;
2021 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2022 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2023
2024 for (uint8_t i = 0u; i < dest_components; ++i)
2025 loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
2026 indirectOffset, indirectBuffer);
2027
2028 info->io.globalAccess |= 0x1;
2029 break;
2030 }
2031 case nir_intrinsic_shared_atomic_add:
2032 case nir_intrinsic_shared_atomic_and:
2033 case nir_intrinsic_shared_atomic_comp_swap:
2034 case nir_intrinsic_shared_atomic_exchange:
2035 case nir_intrinsic_shared_atomic_or:
2036 case nir_intrinsic_shared_atomic_imax:
2037 case nir_intrinsic_shared_atomic_imin:
2038 case nir_intrinsic_shared_atomic_umax:
2039 case nir_intrinsic_shared_atomic_umin:
2040 case nir_intrinsic_shared_atomic_xor: {
2041 const DataType dType = getDType(insn);
2042 LValues &newDefs = convert(&insn->dest);
2043 Value *indirectOffset;
2044 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2045 Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, dType, offset);
2046 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2047 if (op == nir_intrinsic_shared_atomic_comp_swap)
2048 atom->setSrc(2, getSrc(&insn->src[2], 0));
2049 atom->setIndirect(0, 0, indirectOffset);
2050 atom->subOp = getSubOp(op);
2051 break;
2052 }
2053 case nir_intrinsic_ssbo_atomic_add:
2054 case nir_intrinsic_ssbo_atomic_and:
2055 case nir_intrinsic_ssbo_atomic_comp_swap:
2056 case nir_intrinsic_ssbo_atomic_exchange:
2057 case nir_intrinsic_ssbo_atomic_or:
2058 case nir_intrinsic_ssbo_atomic_imax:
2059 case nir_intrinsic_ssbo_atomic_imin:
2060 case nir_intrinsic_ssbo_atomic_umax:
2061 case nir_intrinsic_ssbo_atomic_umin:
2062 case nir_intrinsic_ssbo_atomic_xor: {
2063 const DataType dType = getDType(insn);
2064 LValues &newDefs = convert(&insn->dest);
2065 Value *indirectBuffer;
2066 Value *indirectOffset;
2067 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2068 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2069
2070 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset);
2071 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym,
2072 getSrc(&insn->src[2], 0));
2073 if (op == nir_intrinsic_ssbo_atomic_comp_swap)
2074 atom->setSrc(2, getSrc(&insn->src[3], 0));
2075 atom->setIndirect(0, 0, indirectOffset);
2076 atom->setIndirect(0, 1, indirectBuffer);
2077 atom->subOp = getSubOp(op);
2078
2079 info->io.globalAccess |= 0x2;
2080 break;
2081 }
2082 case nir_intrinsic_global_atomic_add:
2083 case nir_intrinsic_global_atomic_and:
2084 case nir_intrinsic_global_atomic_comp_swap:
2085 case nir_intrinsic_global_atomic_exchange:
2086 case nir_intrinsic_global_atomic_or:
2087 case nir_intrinsic_global_atomic_imax:
2088 case nir_intrinsic_global_atomic_imin:
2089 case nir_intrinsic_global_atomic_umax:
2090 case nir_intrinsic_global_atomic_umin:
2091 case nir_intrinsic_global_atomic_xor: {
2092 const DataType dType = getDType(insn);
2093 LValues &newDefs = convert(&insn->dest);
2094 Value *address;
2095 uint32_t offset = getIndirect(&insn->src[0], 0, address);
2096
2097 Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, dType, offset);
2098 Instruction *atom =
2099 mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2100 atom->setIndirect(0, 0, address);
2101 atom->subOp = getSubOp(op);
2102
2103 info->io.globalAccess |= 0x2;
2104 break;
2105 }
2106 case nir_intrinsic_bindless_image_atomic_add:
2107 case nir_intrinsic_bindless_image_atomic_and:
2108 case nir_intrinsic_bindless_image_atomic_comp_swap:
2109 case nir_intrinsic_bindless_image_atomic_exchange:
2110 case nir_intrinsic_bindless_image_atomic_imax:
2111 case nir_intrinsic_bindless_image_atomic_umax:
2112 case nir_intrinsic_bindless_image_atomic_imin:
2113 case nir_intrinsic_bindless_image_atomic_umin:
2114 case nir_intrinsic_bindless_image_atomic_or:
2115 case nir_intrinsic_bindless_image_atomic_xor:
2116 case nir_intrinsic_bindless_image_load:
2117 case nir_intrinsic_bindless_image_samples:
2118 case nir_intrinsic_bindless_image_size:
2119 case nir_intrinsic_bindless_image_store: {
2120 std::vector<Value*> srcs, defs;
2121 Value *indirect = getSrc(&insn->src[0], 0);
2122 DataType ty;
2123
2124 uint32_t mask = 0;
2125 TexInstruction::Target target =
2126 convert(nir_intrinsic_image_dim(insn), !!nir_intrinsic_image_array(insn), false);
2127 unsigned int argCount = getNIRArgCount(target);
2128 uint16_t location = 0;
2129
2130 if (opInfo.has_dest) {
2131 LValues &newDefs = convert(&insn->dest);
2132 for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2133 defs.push_back(newDefs[i]);
2134 mask |= 1 << i;
2135 }
2136 }
2137
2138 switch (op) {
2139 case nir_intrinsic_bindless_image_atomic_add:
2140 case nir_intrinsic_bindless_image_atomic_and:
2141 case nir_intrinsic_bindless_image_atomic_comp_swap:
2142 case nir_intrinsic_bindless_image_atomic_exchange:
2143 case nir_intrinsic_bindless_image_atomic_imax:
2144 case nir_intrinsic_bindless_image_atomic_umax:
2145 case nir_intrinsic_bindless_image_atomic_imin:
2146 case nir_intrinsic_bindless_image_atomic_umin:
2147 case nir_intrinsic_bindless_image_atomic_or:
2148 case nir_intrinsic_bindless_image_atomic_xor:
2149 ty = getDType(insn);
2150 mask = 0x1;
2151 info->io.globalAccess |= 0x2;
2152 break;
2153 case nir_intrinsic_bindless_image_load:
2154 ty = TYPE_U32;
2155 info->io.globalAccess |= 0x1;
2156 break;
2157 case nir_intrinsic_bindless_image_store:
2158 ty = TYPE_U32;
2159 mask = 0xf;
2160 info->io.globalAccess |= 0x2;
2161 break;
2162 case nir_intrinsic_bindless_image_samples:
2163 mask = 0x8;
2164 ty = TYPE_U32;
2165 break;
2166 case nir_intrinsic_bindless_image_size:
2167 ty = TYPE_U32;
2168 break;
2169 default:
2170 unreachable("unhandled image opcode");
2171 break;
2172 }
2173
2174 // coords
2175 if (opInfo.num_srcs >= 2)
2176 for (unsigned int i = 0u; i < argCount; ++i)
2177 srcs.push_back(getSrc(&insn->src[1], i));
2178
2179 // the sampler is just another src added after coords
2180 if (opInfo.num_srcs >= 3 && target.isMS())
2181 srcs.push_back(getSrc(&insn->src[2], 0));
2182
2183 if (opInfo.num_srcs >= 4) {
2184 unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2185 for (uint8_t i = 0u; i < components; ++i)
2186 srcs.push_back(getSrc(&insn->src[3], i));
2187 }
2188
2189 if (opInfo.num_srcs >= 5)
2190 // 1 for aotmic swap
2191 for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2192 srcs.push_back(getSrc(&insn->src[4], i));
2193
2194 TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2195 texi->tex.bindless = false;
2196 texi->tex.format = nv50_ir::TexInstruction::translateImgFormat(nir_intrinsic_format(insn));
2197 texi->tex.mask = mask;
2198 texi->tex.bindless = true;
2199 texi->cache = convert(nir_intrinsic_access(insn));
2200 texi->setType(ty);
2201 texi->subOp = getSubOp(op);
2202
2203 if (indirect)
2204 texi->setIndirectR(indirect);
2205
2206 break;
2207 }
2208 case nir_intrinsic_image_deref_atomic_add:
2209 case nir_intrinsic_image_deref_atomic_and:
2210 case nir_intrinsic_image_deref_atomic_comp_swap:
2211 case nir_intrinsic_image_deref_atomic_exchange:
2212 case nir_intrinsic_image_deref_atomic_imax:
2213 case nir_intrinsic_image_deref_atomic_umax:
2214 case nir_intrinsic_image_deref_atomic_imin:
2215 case nir_intrinsic_image_deref_atomic_umin:
2216 case nir_intrinsic_image_deref_atomic_or:
2217 case nir_intrinsic_image_deref_atomic_xor:
2218 case nir_intrinsic_image_deref_load:
2219 case nir_intrinsic_image_deref_samples:
2220 case nir_intrinsic_image_deref_size:
2221 case nir_intrinsic_image_deref_store: {
2222 const nir_variable *tex;
2223 std::vector<Value*> srcs, defs;
2224 Value *indirect;
2225 DataType ty;
2226
2227 uint32_t mask = 0;
2228 nir_deref_instr *deref = nir_src_as_deref(insn->src[0]);
2229 const glsl_type *type = deref->type;
2230 TexInstruction::Target target =
2231 convert((glsl_sampler_dim)type->sampler_dimensionality,
2232 type->sampler_array, type->sampler_shadow);
2233 unsigned int argCount = getNIRArgCount(target);
2234 uint16_t location = handleDeref(deref, indirect, tex);
2235
2236 if (opInfo.has_dest) {
2237 LValues &newDefs = convert(&insn->dest);
2238 for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2239 defs.push_back(newDefs[i]);
2240 mask |= 1 << i;
2241 }
2242 }
2243
2244 switch (op) {
2245 case nir_intrinsic_image_deref_atomic_add:
2246 case nir_intrinsic_image_deref_atomic_and:
2247 case nir_intrinsic_image_deref_atomic_comp_swap:
2248 case nir_intrinsic_image_deref_atomic_exchange:
2249 case nir_intrinsic_image_deref_atomic_imax:
2250 case nir_intrinsic_image_deref_atomic_umax:
2251 case nir_intrinsic_image_deref_atomic_imin:
2252 case nir_intrinsic_image_deref_atomic_umin:
2253 case nir_intrinsic_image_deref_atomic_or:
2254 case nir_intrinsic_image_deref_atomic_xor:
2255 ty = getDType(insn);
2256 mask = 0x1;
2257 info->io.globalAccess |= 0x2;
2258 break;
2259 case nir_intrinsic_image_deref_load:
2260 ty = TYPE_U32;
2261 info->io.globalAccess |= 0x1;
2262 break;
2263 case nir_intrinsic_image_deref_store:
2264 ty = TYPE_U32;
2265 mask = 0xf;
2266 info->io.globalAccess |= 0x2;
2267 break;
2268 case nir_intrinsic_image_deref_samples:
2269 mask = 0x8;
2270 ty = TYPE_U32;
2271 break;
2272 case nir_intrinsic_image_deref_size:
2273 ty = TYPE_U32;
2274 break;
2275 default:
2276 unreachable("unhandled image opcode");
2277 break;
2278 }
2279
2280 // coords
2281 if (opInfo.num_srcs >= 2)
2282 for (unsigned int i = 0u; i < argCount; ++i)
2283 srcs.push_back(getSrc(&insn->src[1], i));
2284
2285 // the sampler is just another src added after coords
2286 if (opInfo.num_srcs >= 3 && target.isMS())
2287 srcs.push_back(getSrc(&insn->src[2], 0));
2288
2289 if (opInfo.num_srcs >= 4) {
2290 unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2291 for (uint8_t i = 0u; i < components; ++i)
2292 srcs.push_back(getSrc(&insn->src[3], i));
2293 }
2294
2295 if (opInfo.num_srcs >= 5)
2296 // 1 for aotmic swap
2297 for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2298 srcs.push_back(getSrc(&insn->src[4], i));
2299
2300 TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2301 texi->tex.bindless = false;
2302 texi->tex.format = nv50_ir::TexInstruction::translateImgFormat(tex->data.image.format);
2303 texi->tex.mask = mask;
2304 texi->cache = getCacheModeFromVar(tex);
2305 texi->setType(ty);
2306 texi->subOp = getSubOp(op);
2307
2308 if (indirect)
2309 texi->setIndirectR(indirect);
2310
2311 break;
2312 }
2313 case nir_intrinsic_store_shared: {
2314 DataType sType = getSType(insn->src[0], false, false);
2315 Value *indirectOffset;
2316 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2317
2318 for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
2319 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2320 continue;
2321 Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, sType, offset + i * typeSizeof(sType));
2322 mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i));
2323 }
2324 break;
2325 }
2326 case nir_intrinsic_load_shared: {
2327 const DataType dType = getDType(insn);
2328 LValues &newDefs = convert(&insn->dest);
2329 Value *indirectOffset;
2330 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2331
2332 for (uint8_t i = 0u; i < dest_components; ++i)
2333 loadFrom(FILE_MEMORY_SHARED, 0, dType, newDefs[i], offset, i, indirectOffset);
2334
2335 break;
2336 }
2337 case nir_intrinsic_control_barrier: {
2338 // TODO: add flag to shader_info
2339 info->numBarriers = 1;
2340 Instruction *bar = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
2341 bar->fixed = 1;
2342 bar->subOp = NV50_IR_SUBOP_BAR_SYNC;
2343 break;
2344 }
2345 case nir_intrinsic_group_memory_barrier:
2346 case nir_intrinsic_memory_barrier:
2347 case nir_intrinsic_memory_barrier_buffer:
2348 case nir_intrinsic_memory_barrier_image:
2349 case nir_intrinsic_memory_barrier_shared: {
2350 Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
2351 bar->fixed = 1;
2352 bar->subOp = getSubOp(op);
2353 break;
2354 }
2355 case nir_intrinsic_memory_barrier_tcs_patch:
2356 break;
2357 case nir_intrinsic_shader_clock: {
2358 const DataType dType = getDType(insn);
2359 LValues &newDefs = convert(&insn->dest);
2360
2361 loadImm(newDefs[0], 0u);
2362 mkOp1(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
2363 break;
2364 }
2365 case nir_intrinsic_load_global: {
2366 const DataType dType = getDType(insn);
2367 LValues &newDefs = convert(&insn->dest);
2368 Value *indirectOffset;
2369 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2370
2371 for (auto i = 0u; i < dest_components; ++i)
2372 loadFrom(FILE_MEMORY_GLOBAL, 0, dType, newDefs[i], offset, i, indirectOffset);
2373
2374 info->io.globalAccess |= 0x1;
2375 break;
2376 }
2377 case nir_intrinsic_store_global: {
2378 DataType sType = getSType(insn->src[0], false, false);
2379
2380 for (auto i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
2381 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2382 continue;
2383 if (typeSizeof(sType) == 8) {
2384 Value *split[2];
2385 mkSplit(split, 4, getSrc(&insn->src[0], i));
2386
2387 Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType));
2388 mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[0]);
2389
2390 sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType) + 4);
2391 mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[1]);
2392 } else {
2393 Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, sType, i * typeSizeof(sType));
2394 mkStore(OP_STORE, sType, sym, getSrc(&insn->src[1], 0), getSrc(&insn->src[0], i));
2395 }
2396 }
2397
2398 info->io.globalAccess |= 0x2;
2399 break;
2400 }
2401 default:
2402 ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
2403 return false;
2404 }
2405
2406 return true;
2407 }
2408
2409 bool
2410 Converter::visit(nir_jump_instr *insn)
2411 {
2412 switch (insn->type) {
2413 case nir_jump_return:
2414 // TODO: this only works in the main function
2415 mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
2416 bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
2417 break;
2418 case nir_jump_break:
2419 case nir_jump_continue: {
2420 bool isBreak = insn->type == nir_jump_break;
2421 nir_block *block = insn->instr.block;
2422 assert(!block->successors[1]);
2423 BasicBlock *target = convert(block->successors[0]);
2424 mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
2425 bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
2426 break;
2427 }
2428 default:
2429 ERROR("unknown nir_jump_type %u\n", insn->type);
2430 return false;
2431 }
2432
2433 return true;
2434 }
2435
2436 Value*
2437 Converter::convert(nir_load_const_instr *insn, uint8_t idx)
2438 {
2439 Value *val;
2440
2441 if (immInsertPos)
2442 setPosition(immInsertPos, true);
2443 else
2444 setPosition(bb, false);
2445
2446 switch (insn->def.bit_size) {
2447 case 64:
2448 val = loadImm(getSSA(8), insn->value[idx].u64);
2449 break;
2450 case 32:
2451 val = loadImm(getSSA(4), insn->value[idx].u32);
2452 break;
2453 case 16:
2454 val = loadImm(getSSA(2), insn->value[idx].u16);
2455 break;
2456 case 8:
2457 val = loadImm(getSSA(1), insn->value[idx].u8);
2458 break;
2459 default:
2460 unreachable("unhandled bit size!\n");
2461 }
2462 setPosition(bb, true);
2463 return val;
2464 }
2465
2466 bool
2467 Converter::visit(nir_load_const_instr *insn)
2468 {
2469 assert(insn->def.bit_size <= 64);
2470 immediates[insn->def.index] = insn;
2471 return true;
2472 }
2473
2474 #define DEFAULT_CHECKS \
2475 if (insn->dest.dest.ssa.num_components > 1) { \
2476 ERROR("nir_alu_instr only supported with 1 component!\n"); \
2477 return false; \
2478 } \
2479 if (insn->dest.write_mask != 1) { \
2480 ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2481 return false; \
2482 }
2483 bool
2484 Converter::visit(nir_alu_instr *insn)
2485 {
2486 const nir_op op = insn->op;
2487 const nir_op_info &info = nir_op_infos[op];
2488 DataType dType = getDType(insn);
2489 const std::vector<DataType> sTypes = getSTypes(insn);
2490
2491 Instruction *oldPos = this->bb->getExit();
2492
2493 switch (op) {
2494 case nir_op_fabs:
2495 case nir_op_iabs:
2496 case nir_op_fadd:
2497 case nir_op_iadd:
2498 case nir_op_iand:
2499 case nir_op_fceil:
2500 case nir_op_fcos:
2501 case nir_op_fddx:
2502 case nir_op_fddx_coarse:
2503 case nir_op_fddx_fine:
2504 case nir_op_fddy:
2505 case nir_op_fddy_coarse:
2506 case nir_op_fddy_fine:
2507 case nir_op_fdiv:
2508 case nir_op_idiv:
2509 case nir_op_udiv:
2510 case nir_op_fexp2:
2511 case nir_op_ffloor:
2512 case nir_op_ffma:
2513 case nir_op_flog2:
2514 case nir_op_fmax:
2515 case nir_op_imax:
2516 case nir_op_umax:
2517 case nir_op_fmin:
2518 case nir_op_imin:
2519 case nir_op_umin:
2520 case nir_op_fmod:
2521 case nir_op_imod:
2522 case nir_op_umod:
2523 case nir_op_fmul:
2524 case nir_op_imul:
2525 case nir_op_imul_high:
2526 case nir_op_umul_high:
2527 case nir_op_fneg:
2528 case nir_op_ineg:
2529 case nir_op_inot:
2530 case nir_op_ior:
2531 case nir_op_pack_64_2x32_split:
2532 case nir_op_fpow:
2533 case nir_op_frcp:
2534 case nir_op_frem:
2535 case nir_op_irem:
2536 case nir_op_frsq:
2537 case nir_op_fsat:
2538 case nir_op_ishr:
2539 case nir_op_ushr:
2540 case nir_op_fsin:
2541 case nir_op_fsqrt:
2542 case nir_op_ftrunc:
2543 case nir_op_ishl:
2544 case nir_op_ixor: {
2545 DEFAULT_CHECKS;
2546 LValues &newDefs = convert(&insn->dest);
2547 operation preOp = preOperationNeeded(op);
2548 if (preOp != OP_NOP) {
2549 assert(info.num_inputs < 2);
2550 Value *tmp = getSSA(typeSizeof(dType));
2551 Instruction *i0 = mkOp(preOp, dType, tmp);
2552 Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2553 if (info.num_inputs) {
2554 i0->setSrc(0, getSrc(&insn->src[0]));
2555 i1->setSrc(0, tmp);
2556 }
2557 i1->subOp = getSubOp(op);
2558 } else {
2559 Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2560 for (unsigned s = 0u; s < info.num_inputs; ++s) {
2561 i->setSrc(s, getSrc(&insn->src[s]));
2562 }
2563 i->subOp = getSubOp(op);
2564 }
2565 break;
2566 }
2567 case nir_op_ifind_msb:
2568 case nir_op_ufind_msb: {
2569 DEFAULT_CHECKS;
2570 LValues &newDefs = convert(&insn->dest);
2571 dType = sTypes[0];
2572 mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2573 break;
2574 }
2575 case nir_op_fround_even: {
2576 DEFAULT_CHECKS;
2577 LValues &newDefs = convert(&insn->dest);
2578 mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2579 break;
2580 }
2581 // convert instructions
2582 case nir_op_f2f32:
2583 case nir_op_f2i32:
2584 case nir_op_f2u32:
2585 case nir_op_i2f32:
2586 case nir_op_i2i32:
2587 case nir_op_u2f32:
2588 case nir_op_u2u32:
2589 case nir_op_f2f64:
2590 case nir_op_f2i64:
2591 case nir_op_f2u64:
2592 case nir_op_i2f64:
2593 case nir_op_i2i64:
2594 case nir_op_u2f64:
2595 case nir_op_u2u64: {
2596 DEFAULT_CHECKS;
2597 LValues &newDefs = convert(&insn->dest);
2598 Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2599 if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
2600 i->rnd = ROUND_Z;
2601 i->sType = sTypes[0];
2602 break;
2603 }
2604 // compare instructions
2605 case nir_op_feq32:
2606 case nir_op_ieq32:
2607 case nir_op_fge32:
2608 case nir_op_ige32:
2609 case nir_op_uge32:
2610 case nir_op_flt32:
2611 case nir_op_ilt32:
2612 case nir_op_ult32:
2613 case nir_op_fne32:
2614 case nir_op_ine32: {
2615 DEFAULT_CHECKS;
2616 LValues &newDefs = convert(&insn->dest);
2617 Instruction *i = mkCmp(getOperation(op),
2618 getCondCode(op),
2619 dType,
2620 newDefs[0],
2621 dType,
2622 getSrc(&insn->src[0]),
2623 getSrc(&insn->src[1]));
2624 if (info.num_inputs == 3)
2625 i->setSrc(2, getSrc(&insn->src[2]));
2626 i->sType = sTypes[0];
2627 break;
2628 }
2629 // those are weird ALU ops and need special handling, because
2630 // 1. they are always componend based
2631 // 2. they basically just merge multiple values into one data type
2632 case nir_op_mov:
2633 if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) {
2634 nir_reg_dest& reg = insn->dest.dest.reg;
2635 uint32_t goffset = regToLmemOffset[reg.reg->index];
2636 uint8_t comps = reg.reg->num_components;
2637 uint8_t size = reg.reg->bit_size / 8;
2638 uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2639 uint32_t aoffset = csize * reg.base_offset;
2640 Value *indirect = NULL;
2641
2642 if (reg.indirect)
2643 indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS),
2644 getSrc(reg.indirect, 0), mkImm(csize));
2645
2646 for (uint8_t i = 0u; i < comps; ++i) {
2647 if (!((1u << i) & insn->dest.write_mask))
2648 continue;
2649
2650 Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + aoffset + i * size);
2651 mkStore(OP_STORE, dType, sym, indirect, getSrc(&insn->src[0], i));
2652 }
2653 break;
2654 } else if (!insn->src[0].src.is_ssa && insn->src[0].src.reg.reg->num_array_elems) {
2655 LValues &newDefs = convert(&insn->dest);
2656 nir_reg_src& reg = insn->src[0].src.reg;
2657 uint32_t goffset = regToLmemOffset[reg.reg->index];
2658 // uint8_t comps = reg.reg->num_components;
2659 uint8_t size = reg.reg->bit_size / 8;
2660 uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2661 uint32_t aoffset = csize * reg.base_offset;
2662 Value *indirect = NULL;
2663
2664 if (reg.indirect)
2665 indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), getSrc(reg.indirect, 0), mkImm(csize));
2666
2667 for (uint8_t i = 0u; i < newDefs.size(); ++i)
2668 loadFrom(FILE_MEMORY_LOCAL, 0, dType, newDefs[i], goffset + aoffset, i, indirect);
2669
2670 break;
2671 } else {
2672 LValues &newDefs = convert(&insn->dest);
2673 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2674 mkMov(newDefs[c], getSrc(&insn->src[0], c), dType);
2675 }
2676 }
2677 break;
2678 case nir_op_vec2:
2679 case nir_op_vec3:
2680 case nir_op_vec4:
2681 case nir_op_vec8:
2682 case nir_op_vec16: {
2683 LValues &newDefs = convert(&insn->dest);
2684 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2685 mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2686 }
2687 break;
2688 }
2689 // (un)pack
2690 case nir_op_pack_64_2x32: {
2691 LValues &newDefs = convert(&insn->dest);
2692 Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2693 merge->setSrc(0, getSrc(&insn->src[0], 0));
2694 merge->setSrc(1, getSrc(&insn->src[0], 1));
2695 break;
2696 }
2697 case nir_op_pack_half_2x16_split: {
2698 LValues &newDefs = convert(&insn->dest);
2699 Value *tmpH = getSSA();
2700 Value *tmpL = getSSA();
2701
2702 mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2703 mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2704 mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2705 break;
2706 }
2707 case nir_op_unpack_half_2x16_split_x:
2708 case nir_op_unpack_half_2x16_split_y: {
2709 LValues &newDefs = convert(&insn->dest);
2710 Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2711 if (op == nir_op_unpack_half_2x16_split_y)
2712 cvt->subOp = 1;
2713 break;
2714 }
2715 case nir_op_unpack_64_2x32: {
2716 LValues &newDefs = convert(&insn->dest);
2717 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2718 break;
2719 }
2720 case nir_op_unpack_64_2x32_split_x: {
2721 LValues &newDefs = convert(&insn->dest);
2722 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
2723 break;
2724 }
2725 case nir_op_unpack_64_2x32_split_y: {
2726 LValues &newDefs = convert(&insn->dest);
2727 mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
2728 break;
2729 }
2730 // special instructions
2731 case nir_op_fsign:
2732 case nir_op_isign: {
2733 DEFAULT_CHECKS;
2734 DataType iType;
2735 if (::isFloatType(dType))
2736 iType = TYPE_F32;
2737 else
2738 iType = TYPE_S32;
2739
2740 LValues &newDefs = convert(&insn->dest);
2741 LValue *val0 = getScratch();
2742 LValue *val1 = getScratch();
2743 mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
2744 mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
2745
2746 if (dType == TYPE_F64) {
2747 mkOp2(OP_SUB, iType, val0, val0, val1);
2748 mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
2749 } else if (dType == TYPE_S64 || dType == TYPE_U64) {
2750 mkOp2(OP_SUB, iType, val0, val1, val0);
2751 mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
2752 mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
2753 } else if (::isFloatType(dType))
2754 mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
2755 else
2756 mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
2757 break;
2758 }
2759 case nir_op_fcsel:
2760 case nir_op_b32csel: {
2761 DEFAULT_CHECKS;
2762 LValues &newDefs = convert(&insn->dest);
2763 mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
2764 break;
2765 }
2766 case nir_op_ibitfield_extract:
2767 case nir_op_ubitfield_extract: {
2768 DEFAULT_CHECKS;
2769 Value *tmp = getSSA();
2770 LValues &newDefs = convert(&insn->dest);
2771 mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2772 mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
2773 break;
2774 }
2775 case nir_op_bfm: {
2776 DEFAULT_CHECKS;
2777 LValues &newDefs = convert(&insn->dest);
2778 mkOp2(OP_BMSK, dType, newDefs[0], getSrc(&insn->src[1]), getSrc(&insn->src[0]))->subOp = NV50_IR_SUBOP_BMSK_W;
2779 break;
2780 }
2781 case nir_op_bitfield_insert: {
2782 DEFAULT_CHECKS;
2783 LValues &newDefs = convert(&insn->dest);
2784 LValue *temp = getSSA();
2785 mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
2786 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
2787 break;
2788 }
2789 case nir_op_bit_count: {
2790 DEFAULT_CHECKS;
2791 LValues &newDefs = convert(&insn->dest);
2792 mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
2793 break;
2794 }
2795 case nir_op_bitfield_reverse: {
2796 DEFAULT_CHECKS;
2797 LValues &newDefs = convert(&insn->dest);
2798 mkOp1(OP_BREV, TYPE_U32, newDefs[0], getSrc(&insn->src[0]));
2799 break;
2800 }
2801 case nir_op_find_lsb: {
2802 DEFAULT_CHECKS;
2803 LValues &newDefs = convert(&insn->dest);
2804 Value *tmp = getSSA();
2805 mkOp1(OP_BREV, TYPE_U32, tmp, getSrc(&insn->src[0]));
2806 mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2807 break;
2808 }
2809 // boolean conversions
2810 case nir_op_b2f32: {
2811 DEFAULT_CHECKS;
2812 LValues &newDefs = convert(&insn->dest);
2813 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
2814 break;
2815 }
2816 case nir_op_b2f64: {
2817 DEFAULT_CHECKS;
2818 LValues &newDefs = convert(&insn->dest);
2819 Value *tmp = getSSA(4);
2820 mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
2821 mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
2822 break;
2823 }
2824 case nir_op_f2b32:
2825 case nir_op_i2b32: {
2826 DEFAULT_CHECKS;
2827 LValues &newDefs = convert(&insn->dest);
2828 Value *src1;
2829 if (typeSizeof(sTypes[0]) == 8) {
2830 src1 = loadImm(getSSA(8), 0.0);
2831 } else {
2832 src1 = zero;
2833 }
2834 CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
2835 mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
2836 break;
2837 }
2838 case nir_op_b2i32: {
2839 DEFAULT_CHECKS;
2840 LValues &newDefs = convert(&insn->dest);
2841 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
2842 break;
2843 }
2844 case nir_op_b2i64: {
2845 DEFAULT_CHECKS;
2846 LValues &newDefs = convert(&insn->dest);
2847 LValue *def = getScratch();
2848 mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
2849 mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
2850 break;
2851 }
2852 default:
2853 ERROR("unknown nir_op %s\n", info.name);
2854 return false;
2855 }
2856
2857 if (!oldPos) {
2858 oldPos = this->bb->getEntry();
2859 oldPos->precise = insn->exact;
2860 }
2861
2862 if (unlikely(!oldPos))
2863 return true;
2864
2865 while (oldPos->next) {
2866 oldPos = oldPos->next;
2867 oldPos->precise = insn->exact;
2868 }
2869 oldPos->saturate = insn->dest.saturate;
2870
2871 return true;
2872 }
2873 #undef DEFAULT_CHECKS
2874
2875 bool
2876 Converter::visit(nir_ssa_undef_instr *insn)
2877 {
2878 LValues &newDefs = convert(&insn->def);
2879 for (uint8_t i = 0u; i < insn->def.num_components; ++i) {
2880 mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
2881 }
2882 return true;
2883 }
2884
2885 #define CASE_SAMPLER(ty) \
2886 case GLSL_SAMPLER_DIM_ ## ty : \
2887 if (isArray && !isShadow) \
2888 return TEX_TARGET_ ## ty ## _ARRAY; \
2889 else if (!isArray && isShadow) \
2890 return TEX_TARGET_## ty ## _SHADOW; \
2891 else if (isArray && isShadow) \
2892 return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
2893 else \
2894 return TEX_TARGET_ ## ty
2895
2896 TexTarget
2897 Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
2898 {
2899 switch (dim) {
2900 CASE_SAMPLER(1D);
2901 CASE_SAMPLER(2D);
2902 CASE_SAMPLER(CUBE);
2903 case GLSL_SAMPLER_DIM_3D:
2904 return TEX_TARGET_3D;
2905 case GLSL_SAMPLER_DIM_MS:
2906 if (isArray)
2907 return TEX_TARGET_2D_MS_ARRAY;
2908 return TEX_TARGET_2D_MS;
2909 case GLSL_SAMPLER_DIM_RECT:
2910 if (isShadow)
2911 return TEX_TARGET_RECT_SHADOW;
2912 return TEX_TARGET_RECT;
2913 case GLSL_SAMPLER_DIM_BUF:
2914 return TEX_TARGET_BUFFER;
2915 case GLSL_SAMPLER_DIM_EXTERNAL:
2916 return TEX_TARGET_2D;
2917 default:
2918 ERROR("unknown glsl_sampler_dim %u\n", dim);
2919 assert(false);
2920 return TEX_TARGET_COUNT;
2921 }
2922 }
2923 #undef CASE_SAMPLER
2924
2925 Value*
2926 Converter::applyProjection(Value *src, Value *proj)
2927 {
2928 if (!proj)
2929 return src;
2930 return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj);
2931 }
2932
2933 unsigned int
2934 Converter::getNIRArgCount(TexInstruction::Target& target)
2935 {
2936 unsigned int result = target.getArgCount();
2937 if (target.isCube() && target.isArray())
2938 result--;
2939 if (target.isMS())
2940 result--;
2941 return result;
2942 }
2943
2944 uint16_t
2945 Converter::handleDeref(nir_deref_instr *deref, Value * &indirect, const nir_variable * &tex)
2946 {
2947 typedef std::pair<uint32_t,Value*> DerefPair;
2948 std::list<DerefPair> derefs;
2949
2950 uint16_t result = 0;
2951 while (deref->deref_type != nir_deref_type_var) {
2952 switch (deref->deref_type) {
2953 case nir_deref_type_array: {
2954 Value *indirect;
2955 uint8_t size = type_size(deref->type, true);
2956 result += size * getIndirect(&deref->arr.index, 0, indirect);
2957
2958 if (indirect) {
2959 derefs.push_front(std::make_pair(size, indirect));
2960 }
2961
2962 break;
2963 }
2964 case nir_deref_type_struct: {
2965 result += nir_deref_instr_parent(deref)->type->struct_location_offset(deref->strct.index);
2966 break;
2967 }
2968 case nir_deref_type_var:
2969 default:
2970 unreachable("nir_deref_type_var reached in handleDeref!");
2971 break;
2972 }
2973 deref = nir_deref_instr_parent(deref);
2974 }
2975
2976 indirect = NULL;
2977 for (std::list<DerefPair>::const_iterator it = derefs.begin(); it != derefs.end(); ++it) {
2978 Value *offset = mkOp2v(OP_MUL, TYPE_U32, getSSA(), loadImm(getSSA(), it->first), it->second);
2979 if (indirect)
2980 indirect = mkOp2v(OP_ADD, TYPE_U32, getSSA(), indirect, offset);
2981 else
2982 indirect = offset;
2983 }
2984
2985 tex = nir_deref_instr_get_variable(deref);
2986 assert(tex);
2987
2988 return result + tex->data.driver_location;
2989 }
2990
2991 CacheMode
2992 Converter::convert(enum gl_access_qualifier access)
2993 {
2994 switch (access) {
2995 case ACCESS_VOLATILE:
2996 return CACHE_CV;
2997 case ACCESS_COHERENT:
2998 return CACHE_CG;
2999 default:
3000 return CACHE_CA;
3001 }
3002 }
3003
3004 CacheMode
3005 Converter::getCacheModeFromVar(const nir_variable *var)
3006 {
3007 return convert(var->data.access);
3008 }
3009
3010 bool
3011 Converter::visit(nir_tex_instr *insn)
3012 {
3013 switch (insn->op) {
3014 case nir_texop_lod:
3015 case nir_texop_query_levels:
3016 case nir_texop_tex:
3017 case nir_texop_texture_samples:
3018 case nir_texop_tg4:
3019 case nir_texop_txb:
3020 case nir_texop_txd:
3021 case nir_texop_txf:
3022 case nir_texop_txf_ms:
3023 case nir_texop_txl:
3024 case nir_texop_txs: {
3025 LValues &newDefs = convert(&insn->dest);
3026 std::vector<Value*> srcs;
3027 std::vector<Value*> defs;
3028 std::vector<nir_src*> offsets;
3029 uint8_t mask = 0;
3030 bool lz = false;
3031 Value *proj = NULL;
3032 TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow);
3033 operation op = getOperation(insn->op);
3034
3035 int r, s;
3036 int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias);
3037 int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator);
3038 int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord);
3039 int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx);
3040 int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy);
3041 int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index);
3042 int lodIdx = nir_tex_instr_src_index(insn, nir_tex_src_lod);
3043 int offsetIdx = nir_tex_instr_src_index(insn, nir_tex_src_offset);
3044 int projIdx = nir_tex_instr_src_index(insn, nir_tex_src_projector);
3045 int sampOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_offset);
3046 int texOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_offset);
3047 int sampHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_handle);
3048 int texHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_handle);
3049
3050 bool bindless = sampHandleIdx != -1 || texHandleIdx != -1;
3051 assert((sampHandleIdx != -1) == (texHandleIdx != -1));
3052
3053 if (projIdx != -1)
3054 proj = mkOp1v(OP_RCP, TYPE_F32, getScratch(), getSrc(&insn->src[projIdx].src, 0));
3055
3056 srcs.resize(insn->coord_components);
3057 for (uint8_t i = 0u; i < insn->coord_components; ++i)
3058 srcs[i] = applyProjection(getSrc(&insn->src[coordsIdx].src, i), proj);
3059
3060 // sometimes we get less args than target.getArgCount, but codegen expects the latter
3061 if (insn->coord_components) {
3062 uint32_t argCount = target.getArgCount();
3063
3064 if (target.isMS())
3065 argCount -= 1;
3066
3067 for (uint32_t i = 0u; i < (argCount - insn->coord_components); ++i)
3068 srcs.push_back(getSSA());
3069 }
3070
3071 if (insn->op == nir_texop_texture_samples)
3072 srcs.push_back(zero);
3073 else if (!insn->num_srcs)
3074 srcs.push_back(loadImm(NULL, 0));
3075 if (biasIdx != -1)
3076 srcs.push_back(getSrc(&insn->src[biasIdx].src, 0));
3077 if (lodIdx != -1)
3078 srcs.push_back(getSrc(&insn->src[lodIdx].src, 0));
3079 else if (op == OP_TXF)
3080 lz = true;
3081 if (msIdx != -1)
3082 srcs.push_back(getSrc(&insn->src[msIdx].src, 0));
3083 if (offsetIdx != -1)
3084 offsets.push_back(&insn->src[offsetIdx].src);
3085 if (compIdx != -1)
3086 srcs.push_back(applyProjection(getSrc(&insn->src[compIdx].src, 0), proj));
3087 if (texOffIdx != -1) {
3088 srcs.push_back(getSrc(&insn->src[texOffIdx].src, 0));
3089 texOffIdx = srcs.size() - 1;
3090 }
3091 if (sampOffIdx != -1) {
3092 srcs.push_back(getSrc(&insn->src[sampOffIdx].src, 0));
3093 sampOffIdx = srcs.size() - 1;
3094 }
3095 if (bindless) {
3096 // currently we use the lower bits
3097 Value *split[2];
3098 Value *handle = getSrc(&insn->src[sampHandleIdx].src, 0);
3099
3100 mkSplit(split, 4, handle);
3101
3102 srcs.push_back(split[0]);
3103 texOffIdx = srcs.size() - 1;
3104 }
3105
3106 r = bindless ? 0xff : insn->texture_index;
3107 s = bindless ? 0x1f : insn->sampler_index;
3108
3109 defs.resize(newDefs.size());
3110 for (uint8_t d = 0u; d < newDefs.size(); ++d) {
3111 defs[d] = newDefs[d];
3112 mask |= 1 << d;
3113 }
3114 if (target.isMS() || (op == OP_TEX && prog->getType() != Program::TYPE_FRAGMENT))
3115 lz = true;
3116
3117 TexInstruction *texi = mkTex(op, target.getEnum(), r, s, defs, srcs);
3118 texi->tex.levelZero = lz;
3119 texi->tex.mask = mask;
3120 texi->tex.bindless = bindless;
3121
3122 if (texOffIdx != -1)
3123 texi->tex.rIndirectSrc = texOffIdx;
3124 if (sampOffIdx != -1)
3125 texi->tex.sIndirectSrc = sampOffIdx;
3126
3127 switch (insn->op) {
3128 case nir_texop_tg4:
3129 if (!target.isShadow())
3130 texi->tex.gatherComp = insn->component;
3131 break;
3132 case nir_texop_txs:
3133 texi->tex.query = TXQ_DIMS;
3134 break;
3135 case nir_texop_texture_samples:
3136 texi->tex.mask = 0x4;
3137 texi->tex.query = TXQ_TYPE;
3138 break;
3139 case nir_texop_query_levels:
3140 texi->tex.mask = 0x8;
3141 texi->tex.query = TXQ_DIMS;
3142 break;
3143 default:
3144 break;
3145 }
3146
3147 texi->tex.useOffsets = offsets.size();
3148 if (texi->tex.useOffsets) {
3149 for (uint8_t s = 0; s < texi->tex.useOffsets; ++s) {
3150 for (uint32_t c = 0u; c < 3; ++c) {
3151 uint8_t s2 = std::min(c, target.getDim() - 1);
3152 texi->offset[s][c].set(getSrc(offsets[s], s2));
3153 texi->offset[s][c].setInsn(texi);
3154 }
3155 }
3156 }
3157
3158 if (op == OP_TXG && offsetIdx == -1) {
3159 if (nir_tex_instr_has_explicit_tg4_offsets(insn)) {
3160 texi->tex.useOffsets = 4;
3161 setPosition(texi, false);
3162 for (uint8_t i = 0; i < 4; ++i) {
3163 for (uint8_t j = 0; j < 2; ++j) {
3164 texi->offset[i][j].set(loadImm(NULL, insn->tg4_offsets[i][j]));
3165 texi->offset[i][j].setInsn(texi);
3166 }
3167 }
3168 setPosition(texi, true);
3169 }
3170 }
3171
3172 if (ddxIdx != -1 && ddyIdx != -1) {
3173 for (uint8_t c = 0u; c < target.getDim() + target.isCube(); ++c) {
3174 texi->dPdx[c].set(getSrc(&insn->src[ddxIdx].src, c));
3175 texi->dPdy[c].set(getSrc(&insn->src[ddyIdx].src, c));
3176 }
3177 }
3178
3179 break;
3180 }
3181 default:
3182 ERROR("unknown nir_texop %u\n", insn->op);
3183 return false;
3184 }
3185 return true;
3186 }
3187
3188 bool
3189 Converter::visit(nir_deref_instr *deref)
3190 {
3191 // we just ignore those, because images intrinsics are the only place where
3192 // we should end up with deref sources and those have to backtrack anyway
3193 // to get the nir_variable. This code just exists to handle some special
3194 // cases.
3195 switch (deref->deref_type) {
3196 case nir_deref_type_array:
3197 case nir_deref_type_struct:
3198 case nir_deref_type_var:
3199 break;
3200 default:
3201 ERROR("unknown nir_deref_instr %u\n", deref->deref_type);
3202 return false;
3203 }
3204 return true;
3205 }
3206
3207 bool
3208 Converter::run()
3209 {
3210 bool progress;
3211
3212 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
3213 nir_print_shader(nir, stderr);
3214
3215 struct nir_lower_subgroups_options subgroup_options = {
3216 .subgroup_size = 32,
3217 .ballot_bit_size = 32,
3218 };
3219
3220 NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
3221 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
3222 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
3223 NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
3224 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3225 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
3226 NIR_PASS_V(nir, nir_lower_phis_to_scalar);
3227
3228 do {
3229 progress = false;
3230 NIR_PASS(progress, nir, nir_copy_prop);
3231 NIR_PASS(progress, nir, nir_opt_remove_phis);
3232 NIR_PASS(progress, nir, nir_opt_trivial_continues);
3233 NIR_PASS(progress, nir, nir_opt_cse);
3234 NIR_PASS(progress, nir, nir_opt_algebraic);
3235 NIR_PASS(progress, nir, nir_opt_constant_folding);
3236 NIR_PASS(progress, nir, nir_copy_prop);
3237 NIR_PASS(progress, nir, nir_opt_dce);
3238 NIR_PASS(progress, nir, nir_opt_dead_cf);
3239 } while (progress);
3240
3241 NIR_PASS_V(nir, nir_lower_bool_to_int32);
3242 NIR_PASS_V(nir, nir_lower_locals_to_regs);
3243 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
3244 NIR_PASS_V(nir, nir_convert_from_ssa, true);
3245
3246 // Garbage collect dead instructions
3247 nir_sweep(nir);
3248
3249 if (!parseNIR()) {
3250 ERROR("Couldn't prase NIR!\n");
3251 return false;
3252 }
3253
3254 if (!assignSlots()) {
3255 ERROR("Couldn't assign slots!\n");
3256 return false;
3257 }
3258
3259 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
3260 nir_print_shader(nir, stderr);
3261
3262 nir_foreach_function(function, nir) {
3263 if (!visit(function))
3264 return false;
3265 }
3266
3267 return true;
3268 }
3269
3270 } // unnamed namespace
3271
3272 namespace nv50_ir {
3273
3274 bool
3275 Program::makeFromNIR(struct nv50_ir_prog_info *info)
3276 {
3277 nir_shader *nir = (nir_shader*)info->bin.source;
3278 Converter converter(this, nir, info);
3279 bool result = converter.run();
3280 if (!result)
3281 return result;
3282 LoweringHelper lowering;
3283 lowering.run(this);
3284 tlsSize = info->bin.tlsSpace;
3285 return result;
3286 }
3287
3288 } // namespace nv50_ir