nv50/ir/nir: implement load/store_global
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_from_nir.cpp
1 /*
2 * Copyright 2017 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Karol Herbst <kherbst@redhat.com>
23 */
24
25 #include "compiler/nir/nir.h"
26
27 #include "util/u_debug.h"
28
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_from_common.h"
31 #include "codegen/nv50_ir_lowering_helper.h"
32 #include "codegen/nv50_ir_util.h"
33
34 #if __cplusplus >= 201103L
35 #include <unordered_map>
36 #else
37 #include <tr1/unordered_map>
38 #endif
39 #include <cstring>
40 #include <list>
41 #include <vector>
42
43 namespace {
44
45 #if __cplusplus >= 201103L
46 using std::hash;
47 using std::unordered_map;
48 #else
49 using std::tr1::hash;
50 using std::tr1::unordered_map;
51 #endif
52
53 using namespace nv50_ir;
54
55 int
56 type_size(const struct glsl_type *type, bool bindless)
57 {
58 return glsl_count_attribute_slots(type, false);
59 }
60
61 class Converter : public ConverterCommon
62 {
63 public:
64 Converter(Program *, nir_shader *, nv50_ir_prog_info *);
65
66 bool run();
67 private:
68 typedef std::vector<LValue*> LValues;
69 typedef unordered_map<unsigned, LValues> NirDefMap;
70 typedef unordered_map<unsigned, nir_load_const_instr*> ImmediateMap;
71 typedef unordered_map<unsigned, uint32_t> NirArrayLMemOffsets;
72 typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
73
74 CacheMode convert(enum gl_access_qualifier);
75 TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
76 LValues& convert(nir_alu_dest *);
77 BasicBlock* convert(nir_block *);
78 LValues& convert(nir_dest *);
79 SVSemantic convert(nir_intrinsic_op);
80 Value* convert(nir_load_const_instr*, uint8_t);
81 LValues& convert(nir_register *);
82 LValues& convert(nir_ssa_def *);
83
84 ImgFormat convertGLImgFormat(GLuint);
85
86 Value* getSrc(nir_alu_src *, uint8_t component = 0);
87 Value* getSrc(nir_register *, uint8_t);
88 Value* getSrc(nir_src *, uint8_t, bool indirect = false);
89 Value* getSrc(nir_ssa_def *, uint8_t);
90
91 // returned value is the constant part of the given source (either the
92 // nir_src or the selected source component of an intrinsic). Even though
93 // this is mostly an optimization to be able to skip indirects in a few
94 // cases, sometimes we require immediate values or set some fileds on
95 // instructions (e.g. tex) in order for codegen to consume those.
96 // If the found value has not a constant part, the Value gets returned
97 // through the Value parameter.
98 uint32_t getIndirect(nir_src *, uint8_t, Value *&);
99 // isScalar indicates that the addressing is scalar, vec4 addressing is
100 // assumed otherwise
101 uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&,
102 bool isScalar = false);
103
104 uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
105
106 void setInterpolate(nv50_ir_varying *,
107 uint8_t,
108 bool centroid,
109 unsigned semantics);
110
111 Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
112 uint8_t c, Value *indirect0 = NULL,
113 Value *indirect1 = NULL, bool patch = false);
114 void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
115 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
116 Value *indirect1 = NULL);
117
118 bool isFloatType(nir_alu_type);
119 bool isSignedType(nir_alu_type);
120 bool isResultFloat(nir_op);
121 bool isResultSigned(nir_op);
122
123 DataType getDType(nir_alu_instr *);
124 DataType getDType(nir_intrinsic_instr *);
125 DataType getDType(nir_intrinsic_instr *, bool isSigned);
126 DataType getDType(nir_op, uint8_t);
127
128 std::vector<DataType> getSTypes(nir_alu_instr *);
129 DataType getSType(nir_src &, bool isFloat, bool isSigned);
130
131 operation getOperation(nir_intrinsic_op);
132 operation getOperation(nir_op);
133 operation getOperation(nir_texop);
134 operation preOperationNeeded(nir_op);
135
136 int getSubOp(nir_intrinsic_op);
137 int getSubOp(nir_op);
138
139 CondCode getCondCode(nir_op);
140
141 bool assignSlots();
142 bool parseNIR();
143
144 bool visit(nir_alu_instr *);
145 bool visit(nir_block *);
146 bool visit(nir_cf_node *);
147 bool visit(nir_deref_instr *);
148 bool visit(nir_function *);
149 bool visit(nir_if *);
150 bool visit(nir_instr *);
151 bool visit(nir_intrinsic_instr *);
152 bool visit(nir_jump_instr *);
153 bool visit(nir_load_const_instr*);
154 bool visit(nir_loop *);
155 bool visit(nir_ssa_undef_instr *);
156 bool visit(nir_tex_instr *);
157
158 // tex stuff
159 Value* applyProjection(Value *src, Value *proj);
160 unsigned int getNIRArgCount(TexInstruction::Target&);
161
162 // image stuff
163 uint16_t handleDeref(nir_deref_instr *, Value * & indirect, const nir_variable * &);
164 CacheMode getCacheModeFromVar(const nir_variable *);
165
166 nir_shader *nir;
167
168 NirDefMap ssaDefs;
169 NirDefMap regDefs;
170 ImmediateMap immediates;
171 NirArrayLMemOffsets regToLmemOffset;
172 NirBlockMap blocks;
173 unsigned int curLoopDepth;
174
175 BasicBlock *exit;
176 Value *zero;
177 Instruction *immInsertPos;
178
179 int clipVertexOutput;
180
181 union {
182 struct {
183 Value *position;
184 } fp;
185 };
186 };
187
188 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
189 : ConverterCommon(prog, info),
190 nir(nir),
191 curLoopDepth(0),
192 clipVertexOutput(-1)
193 {
194 zero = mkImm((uint32_t)0);
195 }
196
197 BasicBlock *
198 Converter::convert(nir_block *block)
199 {
200 NirBlockMap::iterator it = blocks.find(block->index);
201 if (it != blocks.end())
202 return it->second;
203
204 BasicBlock *bb = new BasicBlock(func);
205 blocks[block->index] = bb;
206 return bb;
207 }
208
209 bool
210 Converter::isFloatType(nir_alu_type type)
211 {
212 return nir_alu_type_get_base_type(type) == nir_type_float;
213 }
214
215 bool
216 Converter::isSignedType(nir_alu_type type)
217 {
218 return nir_alu_type_get_base_type(type) == nir_type_int;
219 }
220
221 bool
222 Converter::isResultFloat(nir_op op)
223 {
224 const nir_op_info &info = nir_op_infos[op];
225 if (info.output_type != nir_type_invalid)
226 return isFloatType(info.output_type);
227
228 ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
229 assert(false);
230 return true;
231 }
232
233 bool
234 Converter::isResultSigned(nir_op op)
235 {
236 switch (op) {
237 // there is no umul and we get wrong results if we treat all muls as signed
238 case nir_op_imul:
239 case nir_op_inot:
240 return false;
241 default:
242 const nir_op_info &info = nir_op_infos[op];
243 if (info.output_type != nir_type_invalid)
244 return isSignedType(info.output_type);
245 ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
246 assert(false);
247 return true;
248 }
249 }
250
251 DataType
252 Converter::getDType(nir_alu_instr *insn)
253 {
254 if (insn->dest.dest.is_ssa)
255 return getDType(insn->op, insn->dest.dest.ssa.bit_size);
256 else
257 return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
258 }
259
260 DataType
261 Converter::getDType(nir_intrinsic_instr *insn)
262 {
263 bool isSigned;
264 switch (insn->intrinsic) {
265 case nir_intrinsic_shared_atomic_imax:
266 case nir_intrinsic_shared_atomic_imin:
267 case nir_intrinsic_ssbo_atomic_imax:
268 case nir_intrinsic_ssbo_atomic_imin:
269 isSigned = true;
270 break;
271 default:
272 isSigned = false;
273 break;
274 }
275
276 return getDType(insn, isSigned);
277 }
278
279 DataType
280 Converter::getDType(nir_intrinsic_instr *insn, bool isSigned)
281 {
282 if (insn->dest.is_ssa)
283 return typeOfSize(insn->dest.ssa.bit_size / 8, false, isSigned);
284 else
285 return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, isSigned);
286 }
287
288 DataType
289 Converter::getDType(nir_op op, uint8_t bitSize)
290 {
291 DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
292 if (ty == TYPE_NONE) {
293 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
294 assert(false);
295 }
296 return ty;
297 }
298
299 std::vector<DataType>
300 Converter::getSTypes(nir_alu_instr *insn)
301 {
302 const nir_op_info &info = nir_op_infos[insn->op];
303 std::vector<DataType> res(info.num_inputs);
304
305 for (uint8_t i = 0; i < info.num_inputs; ++i) {
306 if (info.input_types[i] != nir_type_invalid) {
307 res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
308 } else {
309 ERROR("getSType not implemented for %s idx %u\n", info.name, i);
310 assert(false);
311 res[i] = TYPE_NONE;
312 break;
313 }
314 }
315
316 return res;
317 }
318
319 DataType
320 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
321 {
322 uint8_t bitSize;
323 if (src.is_ssa)
324 bitSize = src.ssa->bit_size;
325 else
326 bitSize = src.reg.reg->bit_size;
327
328 DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
329 if (ty == TYPE_NONE) {
330 const char *str;
331 if (isFloat)
332 str = "float";
333 else if (isSigned)
334 str = "int";
335 else
336 str = "uint";
337 ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
338 assert(false);
339 }
340 return ty;
341 }
342
343 operation
344 Converter::getOperation(nir_op op)
345 {
346 switch (op) {
347 // basic ops with float and int variants
348 case nir_op_fabs:
349 case nir_op_iabs:
350 return OP_ABS;
351 case nir_op_fadd:
352 case nir_op_iadd:
353 return OP_ADD;
354 case nir_op_iand:
355 return OP_AND;
356 case nir_op_ifind_msb:
357 case nir_op_ufind_msb:
358 return OP_BFIND;
359 case nir_op_fceil:
360 return OP_CEIL;
361 case nir_op_fcos:
362 return OP_COS;
363 case nir_op_f2f32:
364 case nir_op_f2f64:
365 case nir_op_f2i32:
366 case nir_op_f2i64:
367 case nir_op_f2u32:
368 case nir_op_f2u64:
369 case nir_op_i2f32:
370 case nir_op_i2f64:
371 case nir_op_i2i32:
372 case nir_op_i2i64:
373 case nir_op_u2f32:
374 case nir_op_u2f64:
375 case nir_op_u2u32:
376 case nir_op_u2u64:
377 return OP_CVT;
378 case nir_op_fddx:
379 case nir_op_fddx_coarse:
380 case nir_op_fddx_fine:
381 return OP_DFDX;
382 case nir_op_fddy:
383 case nir_op_fddy_coarse:
384 case nir_op_fddy_fine:
385 return OP_DFDY;
386 case nir_op_fdiv:
387 case nir_op_idiv:
388 case nir_op_udiv:
389 return OP_DIV;
390 case nir_op_fexp2:
391 return OP_EX2;
392 case nir_op_ffloor:
393 return OP_FLOOR;
394 case nir_op_ffma:
395 return OP_FMA;
396 case nir_op_flog2:
397 return OP_LG2;
398 case nir_op_fmax:
399 case nir_op_imax:
400 case nir_op_umax:
401 return OP_MAX;
402 case nir_op_pack_64_2x32_split:
403 return OP_MERGE;
404 case nir_op_fmin:
405 case nir_op_imin:
406 case nir_op_umin:
407 return OP_MIN;
408 case nir_op_fmod:
409 case nir_op_imod:
410 case nir_op_umod:
411 case nir_op_frem:
412 case nir_op_irem:
413 return OP_MOD;
414 case nir_op_fmul:
415 case nir_op_imul:
416 case nir_op_imul_high:
417 case nir_op_umul_high:
418 return OP_MUL;
419 case nir_op_fneg:
420 case nir_op_ineg:
421 return OP_NEG;
422 case nir_op_inot:
423 return OP_NOT;
424 case nir_op_ior:
425 return OP_OR;
426 case nir_op_fpow:
427 return OP_POW;
428 case nir_op_frcp:
429 return OP_RCP;
430 case nir_op_frsq:
431 return OP_RSQ;
432 case nir_op_fsat:
433 return OP_SAT;
434 case nir_op_feq32:
435 case nir_op_ieq32:
436 case nir_op_fge32:
437 case nir_op_ige32:
438 case nir_op_uge32:
439 case nir_op_flt32:
440 case nir_op_ilt32:
441 case nir_op_ult32:
442 case nir_op_fne32:
443 case nir_op_ine32:
444 return OP_SET;
445 case nir_op_ishl:
446 return OP_SHL;
447 case nir_op_ishr:
448 case nir_op_ushr:
449 return OP_SHR;
450 case nir_op_fsin:
451 return OP_SIN;
452 case nir_op_fsqrt:
453 return OP_SQRT;
454 case nir_op_fsub:
455 case nir_op_isub:
456 return OP_SUB;
457 case nir_op_ftrunc:
458 return OP_TRUNC;
459 case nir_op_ixor:
460 return OP_XOR;
461 default:
462 ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
463 assert(false);
464 return OP_NOP;
465 }
466 }
467
468 operation
469 Converter::getOperation(nir_texop op)
470 {
471 switch (op) {
472 case nir_texop_tex:
473 return OP_TEX;
474 case nir_texop_lod:
475 return OP_TXLQ;
476 case nir_texop_txb:
477 return OP_TXB;
478 case nir_texop_txd:
479 return OP_TXD;
480 case nir_texop_txf:
481 case nir_texop_txf_ms:
482 return OP_TXF;
483 case nir_texop_tg4:
484 return OP_TXG;
485 case nir_texop_txl:
486 return OP_TXL;
487 case nir_texop_query_levels:
488 case nir_texop_texture_samples:
489 case nir_texop_txs:
490 return OP_TXQ;
491 default:
492 ERROR("couldn't get operation for nir_texop %u\n", op);
493 assert(false);
494 return OP_NOP;
495 }
496 }
497
498 operation
499 Converter::getOperation(nir_intrinsic_op op)
500 {
501 switch (op) {
502 case nir_intrinsic_emit_vertex:
503 return OP_EMIT;
504 case nir_intrinsic_end_primitive:
505 return OP_RESTART;
506 case nir_intrinsic_bindless_image_atomic_add:
507 case nir_intrinsic_image_atomic_add:
508 case nir_intrinsic_image_deref_atomic_add:
509 case nir_intrinsic_bindless_image_atomic_and:
510 case nir_intrinsic_image_atomic_and:
511 case nir_intrinsic_image_deref_atomic_and:
512 case nir_intrinsic_bindless_image_atomic_comp_swap:
513 case nir_intrinsic_image_atomic_comp_swap:
514 case nir_intrinsic_image_deref_atomic_comp_swap:
515 case nir_intrinsic_bindless_image_atomic_exchange:
516 case nir_intrinsic_image_atomic_exchange:
517 case nir_intrinsic_image_deref_atomic_exchange:
518 case nir_intrinsic_bindless_image_atomic_max:
519 case nir_intrinsic_image_atomic_max:
520 case nir_intrinsic_image_deref_atomic_max:
521 case nir_intrinsic_bindless_image_atomic_min:
522 case nir_intrinsic_image_atomic_min:
523 case nir_intrinsic_image_deref_atomic_min:
524 case nir_intrinsic_bindless_image_atomic_or:
525 case nir_intrinsic_image_atomic_or:
526 case nir_intrinsic_image_deref_atomic_or:
527 case nir_intrinsic_bindless_image_atomic_xor:
528 case nir_intrinsic_image_atomic_xor:
529 case nir_intrinsic_image_deref_atomic_xor:
530 return OP_SUREDP;
531 case nir_intrinsic_bindless_image_load:
532 case nir_intrinsic_image_load:
533 case nir_intrinsic_image_deref_load:
534 return OP_SULDP;
535 case nir_intrinsic_bindless_image_samples:
536 case nir_intrinsic_image_samples:
537 case nir_intrinsic_image_deref_samples:
538 case nir_intrinsic_bindless_image_size:
539 case nir_intrinsic_image_size:
540 case nir_intrinsic_image_deref_size:
541 return OP_SUQ;
542 case nir_intrinsic_bindless_image_store:
543 case nir_intrinsic_image_store:
544 case nir_intrinsic_image_deref_store:
545 return OP_SUSTP;
546 default:
547 ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
548 assert(false);
549 return OP_NOP;
550 }
551 }
552
553 operation
554 Converter::preOperationNeeded(nir_op op)
555 {
556 switch (op) {
557 case nir_op_fcos:
558 case nir_op_fsin:
559 return OP_PRESIN;
560 default:
561 return OP_NOP;
562 }
563 }
564
565 int
566 Converter::getSubOp(nir_op op)
567 {
568 switch (op) {
569 case nir_op_imul_high:
570 case nir_op_umul_high:
571 return NV50_IR_SUBOP_MUL_HIGH;
572 default:
573 return 0;
574 }
575 }
576
577 int
578 Converter::getSubOp(nir_intrinsic_op op)
579 {
580 switch (op) {
581 case nir_intrinsic_bindless_image_atomic_add:
582 case nir_intrinsic_image_atomic_add:
583 case nir_intrinsic_image_deref_atomic_add:
584 case nir_intrinsic_shared_atomic_add:
585 case nir_intrinsic_ssbo_atomic_add:
586 return NV50_IR_SUBOP_ATOM_ADD;
587 case nir_intrinsic_bindless_image_atomic_and:
588 case nir_intrinsic_image_atomic_and:
589 case nir_intrinsic_image_deref_atomic_and:
590 case nir_intrinsic_shared_atomic_and:
591 case nir_intrinsic_ssbo_atomic_and:
592 return NV50_IR_SUBOP_ATOM_AND;
593 case nir_intrinsic_bindless_image_atomic_comp_swap:
594 case nir_intrinsic_image_atomic_comp_swap:
595 case nir_intrinsic_image_deref_atomic_comp_swap:
596 case nir_intrinsic_shared_atomic_comp_swap:
597 case nir_intrinsic_ssbo_atomic_comp_swap:
598 return NV50_IR_SUBOP_ATOM_CAS;
599 case nir_intrinsic_bindless_image_atomic_exchange:
600 case nir_intrinsic_image_atomic_exchange:
601 case nir_intrinsic_image_deref_atomic_exchange:
602 case nir_intrinsic_shared_atomic_exchange:
603 case nir_intrinsic_ssbo_atomic_exchange:
604 return NV50_IR_SUBOP_ATOM_EXCH;
605 case nir_intrinsic_bindless_image_atomic_or:
606 case nir_intrinsic_image_atomic_or:
607 case nir_intrinsic_image_deref_atomic_or:
608 case nir_intrinsic_shared_atomic_or:
609 case nir_intrinsic_ssbo_atomic_or:
610 return NV50_IR_SUBOP_ATOM_OR;
611 case nir_intrinsic_bindless_image_atomic_max:
612 case nir_intrinsic_image_atomic_max:
613 case nir_intrinsic_image_deref_atomic_max:
614 case nir_intrinsic_shared_atomic_imax:
615 case nir_intrinsic_shared_atomic_umax:
616 case nir_intrinsic_ssbo_atomic_imax:
617 case nir_intrinsic_ssbo_atomic_umax:
618 return NV50_IR_SUBOP_ATOM_MAX;
619 case nir_intrinsic_bindless_image_atomic_min:
620 case nir_intrinsic_image_atomic_min:
621 case nir_intrinsic_image_deref_atomic_min:
622 case nir_intrinsic_shared_atomic_imin:
623 case nir_intrinsic_shared_atomic_umin:
624 case nir_intrinsic_ssbo_atomic_imin:
625 case nir_intrinsic_ssbo_atomic_umin:
626 return NV50_IR_SUBOP_ATOM_MIN;
627 case nir_intrinsic_bindless_image_atomic_xor:
628 case nir_intrinsic_image_atomic_xor:
629 case nir_intrinsic_image_deref_atomic_xor:
630 case nir_intrinsic_shared_atomic_xor:
631 case nir_intrinsic_ssbo_atomic_xor:
632 return NV50_IR_SUBOP_ATOM_XOR;
633
634 case nir_intrinsic_group_memory_barrier:
635 case nir_intrinsic_memory_barrier:
636 case nir_intrinsic_memory_barrier_atomic_counter:
637 case nir_intrinsic_memory_barrier_buffer:
638 case nir_intrinsic_memory_barrier_image:
639 return NV50_IR_SUBOP_MEMBAR(M, GL);
640 case nir_intrinsic_memory_barrier_shared:
641 return NV50_IR_SUBOP_MEMBAR(M, CTA);
642
643 case nir_intrinsic_vote_all:
644 return NV50_IR_SUBOP_VOTE_ALL;
645 case nir_intrinsic_vote_any:
646 return NV50_IR_SUBOP_VOTE_ANY;
647 case nir_intrinsic_vote_ieq:
648 return NV50_IR_SUBOP_VOTE_UNI;
649 default:
650 return 0;
651 }
652 }
653
654 CondCode
655 Converter::getCondCode(nir_op op)
656 {
657 switch (op) {
658 case nir_op_feq32:
659 case nir_op_ieq32:
660 return CC_EQ;
661 case nir_op_fge32:
662 case nir_op_ige32:
663 case nir_op_uge32:
664 return CC_GE;
665 case nir_op_flt32:
666 case nir_op_ilt32:
667 case nir_op_ult32:
668 return CC_LT;
669 case nir_op_fne32:
670 return CC_NEU;
671 case nir_op_ine32:
672 return CC_NE;
673 default:
674 ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
675 assert(false);
676 return CC_FL;
677 }
678 }
679
680 Converter::LValues&
681 Converter::convert(nir_alu_dest *dest)
682 {
683 return convert(&dest->dest);
684 }
685
686 Converter::LValues&
687 Converter::convert(nir_dest *dest)
688 {
689 if (dest->is_ssa)
690 return convert(&dest->ssa);
691 if (dest->reg.indirect) {
692 ERROR("no support for indirects.");
693 assert(false);
694 }
695 return convert(dest->reg.reg);
696 }
697
698 Converter::LValues&
699 Converter::convert(nir_register *reg)
700 {
701 NirDefMap::iterator it = regDefs.find(reg->index);
702 if (it != regDefs.end())
703 return it->second;
704
705 LValues newDef(reg->num_components);
706 for (uint8_t i = 0; i < reg->num_components; i++)
707 newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
708 return regDefs[reg->index] = newDef;
709 }
710
711 Converter::LValues&
712 Converter::convert(nir_ssa_def *def)
713 {
714 NirDefMap::iterator it = ssaDefs.find(def->index);
715 if (it != ssaDefs.end())
716 return it->second;
717
718 LValues newDef(def->num_components);
719 for (uint8_t i = 0; i < def->num_components; i++)
720 newDef[i] = getSSA(std::max(4, def->bit_size / 8));
721 return ssaDefs[def->index] = newDef;
722 }
723
724 Value*
725 Converter::getSrc(nir_alu_src *src, uint8_t component)
726 {
727 if (src->abs || src->negate) {
728 ERROR("modifiers currently not supported on nir_alu_src\n");
729 assert(false);
730 }
731 return getSrc(&src->src, src->swizzle[component]);
732 }
733
734 Value*
735 Converter::getSrc(nir_register *reg, uint8_t idx)
736 {
737 NirDefMap::iterator it = regDefs.find(reg->index);
738 if (it == regDefs.end())
739 return convert(reg)[idx];
740 return it->second[idx];
741 }
742
743 Value*
744 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
745 {
746 if (src->is_ssa)
747 return getSrc(src->ssa, idx);
748
749 if (src->reg.indirect) {
750 if (indirect)
751 return getSrc(src->reg.indirect, idx);
752 ERROR("no support for indirects.");
753 assert(false);
754 return NULL;
755 }
756
757 return getSrc(src->reg.reg, idx);
758 }
759
760 Value*
761 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
762 {
763 ImmediateMap::iterator iit = immediates.find(src->index);
764 if (iit != immediates.end())
765 return convert((*iit).second, idx);
766
767 NirDefMap::iterator it = ssaDefs.find(src->index);
768 if (it == ssaDefs.end()) {
769 ERROR("SSA value %u not found\n", src->index);
770 assert(false);
771 return NULL;
772 }
773 return it->second[idx];
774 }
775
776 uint32_t
777 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
778 {
779 nir_const_value *offset = nir_src_as_const_value(*src);
780
781 if (offset) {
782 indirect = NULL;
783 return offset[0].u32;
784 }
785
786 indirect = getSrc(src, idx, true);
787 return 0;
788 }
789
790 uint32_t
791 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect, bool isScalar)
792 {
793 int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
794 if (indirect && !isScalar)
795 indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
796 return idx;
797 }
798
799 static void
800 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
801 {
802 assert(name && index);
803
804 if (slot >= VERT_ATTRIB_MAX) {
805 ERROR("invalid varying slot %u\n", slot);
806 assert(false);
807 return;
808 }
809
810 if (slot >= VERT_ATTRIB_GENERIC0 &&
811 slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
812 *name = TGSI_SEMANTIC_GENERIC;
813 *index = slot - VERT_ATTRIB_GENERIC0;
814 return;
815 }
816
817 if (slot >= VERT_ATTRIB_TEX0 &&
818 slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
819 *name = TGSI_SEMANTIC_TEXCOORD;
820 *index = slot - VERT_ATTRIB_TEX0;
821 return;
822 }
823
824 switch (slot) {
825 case VERT_ATTRIB_COLOR0:
826 *name = TGSI_SEMANTIC_COLOR;
827 *index = 0;
828 break;
829 case VERT_ATTRIB_COLOR1:
830 *name = TGSI_SEMANTIC_COLOR;
831 *index = 1;
832 break;
833 case VERT_ATTRIB_EDGEFLAG:
834 *name = TGSI_SEMANTIC_EDGEFLAG;
835 *index = 0;
836 break;
837 case VERT_ATTRIB_FOG:
838 *name = TGSI_SEMANTIC_FOG;
839 *index = 0;
840 break;
841 case VERT_ATTRIB_NORMAL:
842 *name = TGSI_SEMANTIC_NORMAL;
843 *index = 0;
844 break;
845 case VERT_ATTRIB_POS:
846 *name = TGSI_SEMANTIC_POSITION;
847 *index = 0;
848 break;
849 case VERT_ATTRIB_POINT_SIZE:
850 *name = TGSI_SEMANTIC_PSIZE;
851 *index = 0;
852 break;
853 default:
854 ERROR("unknown vert attrib slot %u\n", slot);
855 assert(false);
856 break;
857 }
858 }
859
860 static void
861 varying_slot_to_tgsi_semantic(gl_varying_slot slot, unsigned *name, unsigned *index)
862 {
863 assert(name && index);
864
865 if (slot >= VARYING_SLOT_TESS_MAX) {
866 ERROR("invalid varying slot %u\n", slot);
867 assert(false);
868 return;
869 }
870
871 if (slot >= VARYING_SLOT_PATCH0) {
872 *name = TGSI_SEMANTIC_PATCH;
873 *index = slot - VARYING_SLOT_PATCH0;
874 return;
875 }
876
877 if (slot >= VARYING_SLOT_VAR0) {
878 *name = TGSI_SEMANTIC_GENERIC;
879 *index = slot - VARYING_SLOT_VAR0;
880 return;
881 }
882
883 if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
884 *name = TGSI_SEMANTIC_TEXCOORD;
885 *index = slot - VARYING_SLOT_TEX0;
886 return;
887 }
888
889 switch (slot) {
890 case VARYING_SLOT_BFC0:
891 *name = TGSI_SEMANTIC_BCOLOR;
892 *index = 0;
893 break;
894 case VARYING_SLOT_BFC1:
895 *name = TGSI_SEMANTIC_BCOLOR;
896 *index = 1;
897 break;
898 case VARYING_SLOT_CLIP_DIST0:
899 *name = TGSI_SEMANTIC_CLIPDIST;
900 *index = 0;
901 break;
902 case VARYING_SLOT_CLIP_DIST1:
903 *name = TGSI_SEMANTIC_CLIPDIST;
904 *index = 1;
905 break;
906 case VARYING_SLOT_CLIP_VERTEX:
907 *name = TGSI_SEMANTIC_CLIPVERTEX;
908 *index = 0;
909 break;
910 case VARYING_SLOT_COL0:
911 *name = TGSI_SEMANTIC_COLOR;
912 *index = 0;
913 break;
914 case VARYING_SLOT_COL1:
915 *name = TGSI_SEMANTIC_COLOR;
916 *index = 1;
917 break;
918 case VARYING_SLOT_EDGE:
919 *name = TGSI_SEMANTIC_EDGEFLAG;
920 *index = 0;
921 break;
922 case VARYING_SLOT_FACE:
923 *name = TGSI_SEMANTIC_FACE;
924 *index = 0;
925 break;
926 case VARYING_SLOT_FOGC:
927 *name = TGSI_SEMANTIC_FOG;
928 *index = 0;
929 break;
930 case VARYING_SLOT_LAYER:
931 *name = TGSI_SEMANTIC_LAYER;
932 *index = 0;
933 break;
934 case VARYING_SLOT_PNTC:
935 *name = TGSI_SEMANTIC_PCOORD;
936 *index = 0;
937 break;
938 case VARYING_SLOT_POS:
939 *name = TGSI_SEMANTIC_POSITION;
940 *index = 0;
941 break;
942 case VARYING_SLOT_PRIMITIVE_ID:
943 *name = TGSI_SEMANTIC_PRIMID;
944 *index = 0;
945 break;
946 case VARYING_SLOT_PSIZ:
947 *name = TGSI_SEMANTIC_PSIZE;
948 *index = 0;
949 break;
950 case VARYING_SLOT_TESS_LEVEL_INNER:
951 *name = TGSI_SEMANTIC_TESSINNER;
952 *index = 0;
953 break;
954 case VARYING_SLOT_TESS_LEVEL_OUTER:
955 *name = TGSI_SEMANTIC_TESSOUTER;
956 *index = 0;
957 break;
958 case VARYING_SLOT_VIEWPORT:
959 *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
960 *index = 0;
961 break;
962 default:
963 ERROR("unknown varying slot %u\n", slot);
964 assert(false);
965 break;
966 }
967 }
968
969 static void
970 frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
971 {
972 if (slot >= FRAG_RESULT_DATA0) {
973 *name = TGSI_SEMANTIC_COLOR;
974 *index = slot - FRAG_RESULT_COLOR - 2; // intentional
975 return;
976 }
977
978 switch (slot) {
979 case FRAG_RESULT_COLOR:
980 *name = TGSI_SEMANTIC_COLOR;
981 *index = 0;
982 break;
983 case FRAG_RESULT_DEPTH:
984 *name = TGSI_SEMANTIC_POSITION;
985 *index = 0;
986 break;
987 case FRAG_RESULT_SAMPLE_MASK:
988 *name = TGSI_SEMANTIC_SAMPLEMASK;
989 *index = 0;
990 break;
991 default:
992 ERROR("unknown frag result slot %u\n", slot);
993 assert(false);
994 break;
995 }
996 }
997
998 // copy of _mesa_sysval_to_semantic
999 static void
1000 system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
1001 {
1002 *index = 0;
1003 switch (val) {
1004 // Vertex shader
1005 case SYSTEM_VALUE_VERTEX_ID:
1006 *name = TGSI_SEMANTIC_VERTEXID;
1007 break;
1008 case SYSTEM_VALUE_INSTANCE_ID:
1009 *name = TGSI_SEMANTIC_INSTANCEID;
1010 break;
1011 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
1012 *name = TGSI_SEMANTIC_VERTEXID_NOBASE;
1013 break;
1014 case SYSTEM_VALUE_BASE_VERTEX:
1015 *name = TGSI_SEMANTIC_BASEVERTEX;
1016 break;
1017 case SYSTEM_VALUE_BASE_INSTANCE:
1018 *name = TGSI_SEMANTIC_BASEINSTANCE;
1019 break;
1020 case SYSTEM_VALUE_DRAW_ID:
1021 *name = TGSI_SEMANTIC_DRAWID;
1022 break;
1023
1024 // Geometry shader
1025 case SYSTEM_VALUE_INVOCATION_ID:
1026 *name = TGSI_SEMANTIC_INVOCATIONID;
1027 break;
1028
1029 // Fragment shader
1030 case SYSTEM_VALUE_FRAG_COORD:
1031 *name = TGSI_SEMANTIC_POSITION;
1032 break;
1033 case SYSTEM_VALUE_FRONT_FACE:
1034 *name = TGSI_SEMANTIC_FACE;
1035 break;
1036 case SYSTEM_VALUE_SAMPLE_ID:
1037 *name = TGSI_SEMANTIC_SAMPLEID;
1038 break;
1039 case SYSTEM_VALUE_SAMPLE_POS:
1040 *name = TGSI_SEMANTIC_SAMPLEPOS;
1041 break;
1042 case SYSTEM_VALUE_SAMPLE_MASK_IN:
1043 *name = TGSI_SEMANTIC_SAMPLEMASK;
1044 break;
1045 case SYSTEM_VALUE_HELPER_INVOCATION:
1046 *name = TGSI_SEMANTIC_HELPER_INVOCATION;
1047 break;
1048
1049 // Tessellation shader
1050 case SYSTEM_VALUE_TESS_COORD:
1051 *name = TGSI_SEMANTIC_TESSCOORD;
1052 break;
1053 case SYSTEM_VALUE_VERTICES_IN:
1054 *name = TGSI_SEMANTIC_VERTICESIN;
1055 break;
1056 case SYSTEM_VALUE_PRIMITIVE_ID:
1057 *name = TGSI_SEMANTIC_PRIMID;
1058 break;
1059 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1060 *name = TGSI_SEMANTIC_TESSOUTER;
1061 break;
1062 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1063 *name = TGSI_SEMANTIC_TESSINNER;
1064 break;
1065
1066 // Compute shader
1067 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
1068 *name = TGSI_SEMANTIC_THREAD_ID;
1069 break;
1070 case SYSTEM_VALUE_WORK_GROUP_ID:
1071 *name = TGSI_SEMANTIC_BLOCK_ID;
1072 break;
1073 case SYSTEM_VALUE_NUM_WORK_GROUPS:
1074 *name = TGSI_SEMANTIC_GRID_SIZE;
1075 break;
1076 case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
1077 *name = TGSI_SEMANTIC_BLOCK_SIZE;
1078 break;
1079
1080 // ARB_shader_ballot
1081 case SYSTEM_VALUE_SUBGROUP_SIZE:
1082 *name = TGSI_SEMANTIC_SUBGROUP_SIZE;
1083 break;
1084 case SYSTEM_VALUE_SUBGROUP_INVOCATION:
1085 *name = TGSI_SEMANTIC_SUBGROUP_INVOCATION;
1086 break;
1087 case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
1088 *name = TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
1089 break;
1090 case SYSTEM_VALUE_SUBGROUP_GE_MASK:
1091 *name = TGSI_SEMANTIC_SUBGROUP_GE_MASK;
1092 break;
1093 case SYSTEM_VALUE_SUBGROUP_GT_MASK:
1094 *name = TGSI_SEMANTIC_SUBGROUP_GT_MASK;
1095 break;
1096 case SYSTEM_VALUE_SUBGROUP_LE_MASK:
1097 *name = TGSI_SEMANTIC_SUBGROUP_LE_MASK;
1098 break;
1099 case SYSTEM_VALUE_SUBGROUP_LT_MASK:
1100 *name = TGSI_SEMANTIC_SUBGROUP_LT_MASK;
1101 break;
1102
1103 default:
1104 ERROR("unknown system value %u\n", val);
1105 assert(false);
1106 break;
1107 }
1108 }
1109
1110 void
1111 Converter::setInterpolate(nv50_ir_varying *var,
1112 uint8_t mode,
1113 bool centroid,
1114 unsigned semantic)
1115 {
1116 switch (mode) {
1117 case INTERP_MODE_FLAT:
1118 var->flat = 1;
1119 break;
1120 case INTERP_MODE_NONE:
1121 if (semantic == TGSI_SEMANTIC_COLOR)
1122 var->sc = 1;
1123 else if (semantic == TGSI_SEMANTIC_POSITION)
1124 var->linear = 1;
1125 break;
1126 case INTERP_MODE_NOPERSPECTIVE:
1127 var->linear = 1;
1128 break;
1129 case INTERP_MODE_SMOOTH:
1130 break;
1131 }
1132 var->centroid = centroid;
1133 }
1134
1135 static uint16_t
1136 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
1137 bool input, const nir_variable *var)
1138 {
1139 if (!type->is_array())
1140 return type->count_attribute_slots(false);
1141
1142 uint16_t slots;
1143 switch (stage) {
1144 case Program::TYPE_GEOMETRY:
1145 slots = type->uniform_locations();
1146 if (input)
1147 slots /= info.gs.vertices_in;
1148 break;
1149 case Program::TYPE_TESSELLATION_CONTROL:
1150 case Program::TYPE_TESSELLATION_EVAL:
1151 // remove first dimension
1152 if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
1153 slots = type->uniform_locations();
1154 else
1155 slots = type->fields.array->uniform_locations();
1156 break;
1157 default:
1158 slots = type->count_attribute_slots(false);
1159 break;
1160 }
1161
1162 return slots;
1163 }
1164
1165 bool Converter::assignSlots() {
1166 unsigned name;
1167 unsigned index;
1168
1169 info->io.viewportId = -1;
1170 info->numInputs = 0;
1171 info->numOutputs = 0;
1172
1173 // we have to fixup the uniform locations for arrays
1174 unsigned numImages = 0;
1175 nir_foreach_variable(var, &nir->uniforms) {
1176 const glsl_type *type = var->type;
1177 if (!type->without_array()->is_image())
1178 continue;
1179 var->data.driver_location = numImages;
1180 numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
1181 }
1182
1183 info->numSysVals = 0;
1184 for (uint8_t i = 0; i < SYSTEM_VALUE_MAX; ++i) {
1185 if (!(nir->info.system_values_read & 1ull << i))
1186 continue;
1187
1188 system_val_to_tgsi_semantic(i, &name, &index);
1189 info->sv[info->numSysVals].sn = name;
1190 info->sv[info->numSysVals].si = index;
1191 info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
1192
1193 switch (i) {
1194 case SYSTEM_VALUE_INSTANCE_ID:
1195 info->io.instanceId = info->numSysVals;
1196 break;
1197 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1198 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1199 info->sv[info->numSysVals].patch = 1;
1200 break;
1201 case SYSTEM_VALUE_VERTEX_ID:
1202 info->io.vertexId = info->numSysVals;
1203 break;
1204 default:
1205 break;
1206 }
1207
1208 info->numSysVals += 1;
1209 }
1210
1211 if (prog->getType() == Program::TYPE_COMPUTE)
1212 return true;
1213
1214 nir_foreach_variable(var, &nir->inputs) {
1215 const glsl_type *type = var->type;
1216 int slot = var->data.location;
1217 uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
1218 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1219 : type->component_slots();
1220 uint32_t frac = var->data.location_frac;
1221 uint32_t vary = var->data.driver_location;
1222
1223 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1224 if (comp > 2)
1225 slots *= 2;
1226 }
1227
1228 assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
1229
1230 switch(prog->getType()) {
1231 case Program::TYPE_FRAGMENT:
1232 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1233 for (uint16_t i = 0; i < slots; ++i) {
1234 setInterpolate(&info->in[vary + i], var->data.interpolation,
1235 var->data.centroid | var->data.sample, name);
1236 }
1237 break;
1238 case Program::TYPE_GEOMETRY:
1239 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1240 break;
1241 case Program::TYPE_TESSELLATION_CONTROL:
1242 case Program::TYPE_TESSELLATION_EVAL:
1243 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1244 if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
1245 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1246 break;
1247 case Program::TYPE_VERTEX:
1248 vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1249 switch (name) {
1250 case TGSI_SEMANTIC_EDGEFLAG:
1251 info->io.edgeFlagIn = vary;
1252 break;
1253 default:
1254 break;
1255 }
1256 break;
1257 default:
1258 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1259 return false;
1260 }
1261
1262 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1263 info->in[vary].id = vary;
1264 info->in[vary].patch = var->data.patch;
1265 info->in[vary].sn = name;
1266 info->in[vary].si = index + i;
1267 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1268 if (i & 0x1)
1269 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1270 else
1271 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1272 else
1273 info->in[vary].mask |= ((1 << comp) - 1) << frac;
1274 }
1275 info->numInputs = std::max<uint8_t>(info->numInputs, vary);
1276 }
1277
1278 nir_foreach_variable(var, &nir->outputs) {
1279 const glsl_type *type = var->type;
1280 int slot = var->data.location;
1281 uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1282 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1283 : type->component_slots();
1284 uint32_t frac = var->data.location_frac;
1285 uint32_t vary = var->data.driver_location;
1286
1287 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1288 if (comp > 2)
1289 slots *= 2;
1290 }
1291
1292 assert(vary < PIPE_MAX_SHADER_OUTPUTS);
1293
1294 switch(prog->getType()) {
1295 case Program::TYPE_FRAGMENT:
1296 frag_result_to_tgsi_semantic((gl_frag_result)slot, &name, &index);
1297 switch (name) {
1298 case TGSI_SEMANTIC_COLOR:
1299 if (!var->data.fb_fetch_output)
1300 info->prop.fp.numColourResults++;
1301 info->prop.fp.separateFragData = true;
1302 // sometimes we get FRAG_RESULT_DATAX with data.index 0
1303 // sometimes we get FRAG_RESULT_DATA0 with data.index X
1304 index = index == 0 ? var->data.index : index;
1305 break;
1306 case TGSI_SEMANTIC_POSITION:
1307 info->io.fragDepth = vary;
1308 info->prop.fp.writesDepth = true;
1309 break;
1310 case TGSI_SEMANTIC_SAMPLEMASK:
1311 info->io.sampleMask = vary;
1312 break;
1313 default:
1314 break;
1315 }
1316 break;
1317 case Program::TYPE_GEOMETRY:
1318 case Program::TYPE_TESSELLATION_CONTROL:
1319 case Program::TYPE_TESSELLATION_EVAL:
1320 case Program::TYPE_VERTEX:
1321 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1322
1323 if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1324 name != TGSI_SEMANTIC_TESSOUTER)
1325 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1326
1327 switch (name) {
1328 case TGSI_SEMANTIC_CLIPDIST:
1329 info->io.genUserClip = -1;
1330 break;
1331 case TGSI_SEMANTIC_CLIPVERTEX:
1332 clipVertexOutput = vary;
1333 break;
1334 case TGSI_SEMANTIC_EDGEFLAG:
1335 info->io.edgeFlagOut = vary;
1336 break;
1337 case TGSI_SEMANTIC_POSITION:
1338 if (clipVertexOutput < 0)
1339 clipVertexOutput = vary;
1340 break;
1341 default:
1342 break;
1343 }
1344 break;
1345 default:
1346 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1347 return false;
1348 }
1349
1350 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1351 info->out[vary].id = vary;
1352 info->out[vary].patch = var->data.patch;
1353 info->out[vary].sn = name;
1354 info->out[vary].si = index + i;
1355 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1356 if (i & 0x1)
1357 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1358 else
1359 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1360 else
1361 info->out[vary].mask |= ((1 << comp) - 1) << frac;
1362
1363 if (nir->info.outputs_read & 1ull << slot)
1364 info->out[vary].oread = 1;
1365 }
1366 info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
1367 }
1368
1369 if (info->io.genUserClip > 0) {
1370 info->io.clipDistances = info->io.genUserClip;
1371
1372 const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1373
1374 for (unsigned int n = 0; n < nOut; ++n) {
1375 unsigned int i = info->numOutputs++;
1376 info->out[i].id = i;
1377 info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1378 info->out[i].si = n;
1379 info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1380 }
1381 }
1382
1383 return info->assignSlots(info) == 0;
1384 }
1385
1386 uint32_t
1387 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1388 {
1389 DataType ty;
1390 int offset = nir_intrinsic_component(insn);
1391 bool input;
1392
1393 if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1394 ty = getDType(insn);
1395 else
1396 ty = getSType(insn->src[0], false, false);
1397
1398 switch (insn->intrinsic) {
1399 case nir_intrinsic_load_input:
1400 case nir_intrinsic_load_interpolated_input:
1401 case nir_intrinsic_load_per_vertex_input:
1402 input = true;
1403 break;
1404 case nir_intrinsic_load_output:
1405 case nir_intrinsic_load_per_vertex_output:
1406 case nir_intrinsic_store_output:
1407 case nir_intrinsic_store_per_vertex_output:
1408 input = false;
1409 break;
1410 default:
1411 ERROR("unknown intrinsic in getSlotAddress %s",
1412 nir_intrinsic_infos[insn->intrinsic].name);
1413 input = false;
1414 assert(false);
1415 break;
1416 }
1417
1418 if (typeSizeof(ty) == 8) {
1419 slot *= 2;
1420 slot += offset;
1421 if (slot >= 4) {
1422 idx += 1;
1423 slot -= 4;
1424 }
1425 } else {
1426 slot += offset;
1427 }
1428
1429 assert(slot < 4);
1430 assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
1431 assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
1432
1433 const nv50_ir_varying *vary = input ? info->in : info->out;
1434 return vary[idx].slot[slot] * 4;
1435 }
1436
1437 Instruction *
1438 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1439 uint32_t base, uint8_t c, Value *indirect0,
1440 Value *indirect1, bool patch)
1441 {
1442 unsigned int tySize = typeSizeof(ty);
1443
1444 if (tySize == 8 &&
1445 (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
1446 Value *lo = getSSA();
1447 Value *hi = getSSA();
1448
1449 Instruction *loi =
1450 mkLoad(TYPE_U32, lo,
1451 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1452 indirect0);
1453 loi->setIndirect(0, 1, indirect1);
1454 loi->perPatch = patch;
1455
1456 Instruction *hii =
1457 mkLoad(TYPE_U32, hi,
1458 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1459 indirect0);
1460 hii->setIndirect(0, 1, indirect1);
1461 hii->perPatch = patch;
1462
1463 return mkOp2(OP_MERGE, ty, def, lo, hi);
1464 } else {
1465 Instruction *ld =
1466 mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1467 ld->setIndirect(0, 1, indirect1);
1468 ld->perPatch = patch;
1469 return ld;
1470 }
1471 }
1472
1473 void
1474 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1475 DataType ty, Value *src, uint8_t idx, uint8_t c,
1476 Value *indirect0, Value *indirect1)
1477 {
1478 uint8_t size = typeSizeof(ty);
1479 uint32_t address = getSlotAddress(insn, idx, c);
1480
1481 if (size == 8 && indirect0) {
1482 Value *split[2];
1483 mkSplit(split, 4, src);
1484
1485 if (op == OP_EXPORT) {
1486 split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1487 split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1488 }
1489
1490 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1491 split[0])->perPatch = info->out[idx].patch;
1492 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1493 split[1])->perPatch = info->out[idx].patch;
1494 } else {
1495 if (op == OP_EXPORT)
1496 src = mkMov(getSSA(size), src, ty)->getDef(0);
1497 mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1498 src)->perPatch = info->out[idx].patch;
1499 }
1500 }
1501
1502 bool
1503 Converter::parseNIR()
1504 {
1505 info->bin.tlsSpace = 0;
1506 info->io.clipDistances = nir->info.clip_distance_array_size;
1507 info->io.cullDistances = nir->info.cull_distance_array_size;
1508
1509 switch(prog->getType()) {
1510 case Program::TYPE_COMPUTE:
1511 info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
1512 info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
1513 info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
1514 info->bin.smemSize = nir->info.cs.shared_size;
1515 break;
1516 case Program::TYPE_FRAGMENT:
1517 info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1518 info->prop.fp.persampleInvocation =
1519 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
1520 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1521 info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1522 info->prop.fp.readsSampleLocations =
1523 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1524 info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
1525 info->prop.fp.usesSampleMaskIn =
1526 !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
1527 break;
1528 case Program::TYPE_GEOMETRY:
1529 info->prop.gp.inputPrim = nir->info.gs.input_primitive;
1530 info->prop.gp.instanceCount = nir->info.gs.invocations;
1531 info->prop.gp.maxVertices = nir->info.gs.vertices_out;
1532 info->prop.gp.outputPrim = nir->info.gs.output_primitive;
1533 break;
1534 case Program::TYPE_TESSELLATION_CONTROL:
1535 case Program::TYPE_TESSELLATION_EVAL:
1536 if (nir->info.tess.primitive_mode == GL_ISOLINES)
1537 info->prop.tp.domain = GL_LINES;
1538 else
1539 info->prop.tp.domain = nir->info.tess.primitive_mode;
1540 info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1541 info->prop.tp.outputPrim =
1542 nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1543 info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1544 info->prop.tp.winding = !nir->info.tess.ccw;
1545 break;
1546 case Program::TYPE_VERTEX:
1547 info->prop.vp.usesDrawParameters =
1548 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
1549 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
1550 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
1551 break;
1552 default:
1553 break;
1554 }
1555
1556 return true;
1557 }
1558
1559 bool
1560 Converter::visit(nir_function *function)
1561 {
1562 assert(function->impl);
1563
1564 // usually the blocks will set everything up, but main is special
1565 BasicBlock *entry = new BasicBlock(prog->main);
1566 exit = new BasicBlock(prog->main);
1567 blocks[nir_start_block(function->impl)->index] = entry;
1568 prog->main->setEntry(entry);
1569 prog->main->setExit(exit);
1570
1571 setPosition(entry, true);
1572
1573 if (info->io.genUserClip > 0) {
1574 for (int c = 0; c < 4; ++c)
1575 clipVtx[c] = getScratch();
1576 }
1577
1578 switch (prog->getType()) {
1579 case Program::TYPE_TESSELLATION_CONTROL:
1580 outBase = mkOp2v(
1581 OP_SUB, TYPE_U32, getSSA(),
1582 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1583 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1584 break;
1585 case Program::TYPE_FRAGMENT: {
1586 Symbol *sv = mkSysVal(SV_POSITION, 3);
1587 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1588 fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1589 break;
1590 }
1591 default:
1592 break;
1593 }
1594
1595 nir_foreach_register(reg, &function->impl->registers) {
1596 if (reg->num_array_elems) {
1597 // TODO: packed variables would be nice, but MemoryOpt fails
1598 // replace 4 with reg->num_components
1599 uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8);
1600 regToLmemOffset[reg->index] = info->bin.tlsSpace;
1601 info->bin.tlsSpace += size;
1602 }
1603 }
1604
1605 nir_index_ssa_defs(function->impl);
1606 foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1607 if (!visit(node))
1608 return false;
1609 }
1610
1611 bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1612 setPosition(exit, true);
1613
1614 if ((prog->getType() == Program::TYPE_VERTEX ||
1615 prog->getType() == Program::TYPE_TESSELLATION_EVAL)
1616 && info->io.genUserClip > 0)
1617 handleUserClipPlanes();
1618
1619 // TODO: for non main function this needs to be a OP_RETURN
1620 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1621 return true;
1622 }
1623
1624 bool
1625 Converter::visit(nir_cf_node *node)
1626 {
1627 switch (node->type) {
1628 case nir_cf_node_block:
1629 return visit(nir_cf_node_as_block(node));
1630 case nir_cf_node_if:
1631 return visit(nir_cf_node_as_if(node));
1632 case nir_cf_node_loop:
1633 return visit(nir_cf_node_as_loop(node));
1634 default:
1635 ERROR("unknown nir_cf_node type %u\n", node->type);
1636 return false;
1637 }
1638 }
1639
1640 bool
1641 Converter::visit(nir_block *block)
1642 {
1643 if (!block->predecessors->entries && block->instr_list.is_empty())
1644 return true;
1645
1646 BasicBlock *bb = convert(block);
1647
1648 setPosition(bb, true);
1649 nir_foreach_instr(insn, block) {
1650 if (!visit(insn))
1651 return false;
1652 }
1653 return true;
1654 }
1655
1656 bool
1657 Converter::visit(nir_if *nif)
1658 {
1659 DataType sType = getSType(nif->condition, false, false);
1660 Value *src = getSrc(&nif->condition, 0);
1661
1662 nir_block *lastThen = nir_if_last_then_block(nif);
1663 nir_block *lastElse = nir_if_last_else_block(nif);
1664
1665 assert(!lastThen->successors[1]);
1666 assert(!lastElse->successors[1]);
1667
1668 BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1669 BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1670
1671 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1672 bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1673
1674 // we only insert joinats, if both nodes end up at the end of the if again.
1675 // the reason for this to not happens are breaks/continues/ret/... which
1676 // have their own handling
1677 if (lastThen->successors[0] == lastElse->successors[0])
1678 bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
1679 CC_ALWAYS, NULL);
1680
1681 mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1682
1683 foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1684 if (!visit(node))
1685 return false;
1686 }
1687 setPosition(convert(lastThen), true);
1688 if (!bb->getExit() ||
1689 !bb->getExit()->asFlow() ||
1690 bb->getExit()->asFlow()->op == OP_JOIN) {
1691 BasicBlock *tailBB = convert(lastThen->successors[0]);
1692 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1693 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1694 }
1695
1696 foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1697 if (!visit(node))
1698 return false;
1699 }
1700 setPosition(convert(lastElse), true);
1701 if (!bb->getExit() ||
1702 !bb->getExit()->asFlow() ||
1703 bb->getExit()->asFlow()->op == OP_JOIN) {
1704 BasicBlock *tailBB = convert(lastElse->successors[0]);
1705 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1706 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1707 }
1708
1709 if (lastThen->successors[0] == lastElse->successors[0]) {
1710 setPosition(convert(lastThen->successors[0]), true);
1711 mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1712 }
1713
1714 return true;
1715 }
1716
1717 bool
1718 Converter::visit(nir_loop *loop)
1719 {
1720 curLoopDepth += 1;
1721 func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1722
1723 BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1724 BasicBlock *tailBB =
1725 convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1726 bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1727
1728 mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1729 setPosition(loopBB, false);
1730 mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1731
1732 foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1733 if (!visit(node))
1734 return false;
1735 }
1736 Instruction *insn = bb->getExit();
1737 if (bb->cfg.incidentCount() != 0) {
1738 if (!insn || !insn->asFlow()) {
1739 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1740 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1741 } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
1742 tailBB->cfg.incidentCount() == 0) {
1743 // RA doesn't like having blocks around with no incident edge,
1744 // so we create a fake one to make it happy
1745 bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1746 }
1747 }
1748
1749 curLoopDepth -= 1;
1750
1751 return true;
1752 }
1753
1754 bool
1755 Converter::visit(nir_instr *insn)
1756 {
1757 // we need an insertion point for on the fly generated immediate loads
1758 immInsertPos = bb->getExit();
1759 switch (insn->type) {
1760 case nir_instr_type_alu:
1761 return visit(nir_instr_as_alu(insn));
1762 case nir_instr_type_deref:
1763 return visit(nir_instr_as_deref(insn));
1764 case nir_instr_type_intrinsic:
1765 return visit(nir_instr_as_intrinsic(insn));
1766 case nir_instr_type_jump:
1767 return visit(nir_instr_as_jump(insn));
1768 case nir_instr_type_load_const:
1769 return visit(nir_instr_as_load_const(insn));
1770 case nir_instr_type_ssa_undef:
1771 return visit(nir_instr_as_ssa_undef(insn));
1772 case nir_instr_type_tex:
1773 return visit(nir_instr_as_tex(insn));
1774 default:
1775 ERROR("unknown nir_instr type %u\n", insn->type);
1776 return false;
1777 }
1778 return true;
1779 }
1780
1781 SVSemantic
1782 Converter::convert(nir_intrinsic_op intr)
1783 {
1784 switch (intr) {
1785 case nir_intrinsic_load_base_vertex:
1786 return SV_BASEVERTEX;
1787 case nir_intrinsic_load_base_instance:
1788 return SV_BASEINSTANCE;
1789 case nir_intrinsic_load_draw_id:
1790 return SV_DRAWID;
1791 case nir_intrinsic_load_front_face:
1792 return SV_FACE;
1793 case nir_intrinsic_load_helper_invocation:
1794 return SV_THREAD_KILL;
1795 case nir_intrinsic_load_instance_id:
1796 return SV_INSTANCE_ID;
1797 case nir_intrinsic_load_invocation_id:
1798 return SV_INVOCATION_ID;
1799 case nir_intrinsic_load_local_group_size:
1800 return SV_NTID;
1801 case nir_intrinsic_load_local_invocation_id:
1802 return SV_TID;
1803 case nir_intrinsic_load_num_work_groups:
1804 return SV_NCTAID;
1805 case nir_intrinsic_load_patch_vertices_in:
1806 return SV_VERTEX_COUNT;
1807 case nir_intrinsic_load_primitive_id:
1808 return SV_PRIMITIVE_ID;
1809 case nir_intrinsic_load_sample_id:
1810 return SV_SAMPLE_INDEX;
1811 case nir_intrinsic_load_sample_mask_in:
1812 return SV_SAMPLE_MASK;
1813 case nir_intrinsic_load_sample_pos:
1814 return SV_SAMPLE_POS;
1815 case nir_intrinsic_load_subgroup_eq_mask:
1816 return SV_LANEMASK_EQ;
1817 case nir_intrinsic_load_subgroup_ge_mask:
1818 return SV_LANEMASK_GE;
1819 case nir_intrinsic_load_subgroup_gt_mask:
1820 return SV_LANEMASK_GT;
1821 case nir_intrinsic_load_subgroup_le_mask:
1822 return SV_LANEMASK_LE;
1823 case nir_intrinsic_load_subgroup_lt_mask:
1824 return SV_LANEMASK_LT;
1825 case nir_intrinsic_load_subgroup_invocation:
1826 return SV_LANEID;
1827 case nir_intrinsic_load_tess_coord:
1828 return SV_TESS_COORD;
1829 case nir_intrinsic_load_tess_level_inner:
1830 return SV_TESS_INNER;
1831 case nir_intrinsic_load_tess_level_outer:
1832 return SV_TESS_OUTER;
1833 case nir_intrinsic_load_vertex_id:
1834 return SV_VERTEX_ID;
1835 case nir_intrinsic_load_work_group_id:
1836 return SV_CTAID;
1837 default:
1838 ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1839 nir_intrinsic_infos[intr].name);
1840 assert(false);
1841 return SV_LAST;
1842 }
1843 }
1844
1845 ImgFormat
1846 Converter::convertGLImgFormat(GLuint format)
1847 {
1848 #define FMT_CASE(a, b) \
1849 case GL_ ## a: return nv50_ir::FMT_ ## b
1850
1851 switch (format) {
1852 FMT_CASE(NONE, NONE);
1853
1854 FMT_CASE(RGBA32F, RGBA32F);
1855 FMT_CASE(RGBA16F, RGBA16F);
1856 FMT_CASE(RG32F, RG32F);
1857 FMT_CASE(RG16F, RG16F);
1858 FMT_CASE(R11F_G11F_B10F, R11G11B10F);
1859 FMT_CASE(R32F, R32F);
1860 FMT_CASE(R16F, R16F);
1861
1862 FMT_CASE(RGBA32UI, RGBA32UI);
1863 FMT_CASE(RGBA16UI, RGBA16UI);
1864 FMT_CASE(RGB10_A2UI, RGB10A2UI);
1865 FMT_CASE(RGBA8UI, RGBA8UI);
1866 FMT_CASE(RG32UI, RG32UI);
1867 FMT_CASE(RG16UI, RG16UI);
1868 FMT_CASE(RG8UI, RG8UI);
1869 FMT_CASE(R32UI, R32UI);
1870 FMT_CASE(R16UI, R16UI);
1871 FMT_CASE(R8UI, R8UI);
1872
1873 FMT_CASE(RGBA32I, RGBA32I);
1874 FMT_CASE(RGBA16I, RGBA16I);
1875 FMT_CASE(RGBA8I, RGBA8I);
1876 FMT_CASE(RG32I, RG32I);
1877 FMT_CASE(RG16I, RG16I);
1878 FMT_CASE(RG8I, RG8I);
1879 FMT_CASE(R32I, R32I);
1880 FMT_CASE(R16I, R16I);
1881 FMT_CASE(R8I, R8I);
1882
1883 FMT_CASE(RGBA16, RGBA16);
1884 FMT_CASE(RGB10_A2, RGB10A2);
1885 FMT_CASE(RGBA8, RGBA8);
1886 FMT_CASE(RG16, RG16);
1887 FMT_CASE(RG8, RG8);
1888 FMT_CASE(R16, R16);
1889 FMT_CASE(R8, R8);
1890
1891 FMT_CASE(RGBA16_SNORM, RGBA16_SNORM);
1892 FMT_CASE(RGBA8_SNORM, RGBA8_SNORM);
1893 FMT_CASE(RG16_SNORM, RG16_SNORM);
1894 FMT_CASE(RG8_SNORM, RG8_SNORM);
1895 FMT_CASE(R16_SNORM, R16_SNORM);
1896 FMT_CASE(R8_SNORM, R8_SNORM);
1897
1898 FMT_CASE(BGRA_INTEGER, BGRA8);
1899 default:
1900 ERROR("unknown format %x\n", format);
1901 assert(false);
1902 return nv50_ir::FMT_NONE;
1903 }
1904 #undef FMT_CASE
1905 }
1906
1907 bool
1908 Converter::visit(nir_intrinsic_instr *insn)
1909 {
1910 nir_intrinsic_op op = insn->intrinsic;
1911 const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op];
1912
1913 switch (op) {
1914 case nir_intrinsic_load_uniform: {
1915 LValues &newDefs = convert(&insn->dest);
1916 const DataType dType = getDType(insn);
1917 Value *indirect;
1918 uint32_t coffset = getIndirect(insn, 0, 0, indirect);
1919 for (uint8_t i = 0; i < insn->num_components; ++i) {
1920 loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
1921 }
1922 break;
1923 }
1924 case nir_intrinsic_store_output:
1925 case nir_intrinsic_store_per_vertex_output: {
1926 Value *indirect;
1927 DataType dType = getSType(insn->src[0], false, false);
1928 uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1929
1930 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1931 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1932 continue;
1933
1934 uint8_t offset = 0;
1935 Value *src = getSrc(&insn->src[0], i);
1936 switch (prog->getType()) {
1937 case Program::TYPE_FRAGMENT: {
1938 if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1939 // TGSI uses a different interface than NIR, TGSI stores that
1940 // value in the z component, NIR in X
1941 offset += 2;
1942 src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1943 }
1944 break;
1945 }
1946 case Program::TYPE_GEOMETRY:
1947 case Program::TYPE_VERTEX: {
1948 if (info->io.genUserClip > 0 && idx == clipVertexOutput) {
1949 mkMov(clipVtx[i], src);
1950 src = clipVtx[i];
1951 }
1952 break;
1953 }
1954 default:
1955 break;
1956 }
1957
1958 storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1959 }
1960 break;
1961 }
1962 case nir_intrinsic_load_input:
1963 case nir_intrinsic_load_interpolated_input:
1964 case nir_intrinsic_load_output: {
1965 LValues &newDefs = convert(&insn->dest);
1966
1967 // FBFetch
1968 if (prog->getType() == Program::TYPE_FRAGMENT &&
1969 op == nir_intrinsic_load_output) {
1970 std::vector<Value*> defs, srcs;
1971 uint8_t mask = 0;
1972
1973 srcs.push_back(getSSA());
1974 srcs.push_back(getSSA());
1975 Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1976 Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1977 mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1978 mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1979
1980 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1981 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1982
1983 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1984 defs.push_back(newDefs[i]);
1985 mask |= 1 << i;
1986 }
1987
1988 TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
1989 texi->tex.levelZero = 1;
1990 texi->tex.mask = mask;
1991 texi->tex.useOffsets = 0;
1992 texi->tex.r = 0xffff;
1993 texi->tex.s = 0xffff;
1994
1995 info->prop.fp.readsFramebuffer = true;
1996 break;
1997 }
1998
1999 const DataType dType = getDType(insn);
2000 Value *indirect;
2001 bool input = op != nir_intrinsic_load_output;
2002 operation nvirOp;
2003 uint32_t mode = 0;
2004
2005 uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
2006 nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
2007
2008 // see load_barycentric_* handling
2009 if (prog->getType() == Program::TYPE_FRAGMENT) {
2010 mode = translateInterpMode(&vary, nvirOp);
2011 if (op == nir_intrinsic_load_interpolated_input) {
2012 ImmediateValue immMode;
2013 if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
2014 mode |= immMode.reg.data.u32;
2015 }
2016 }
2017
2018 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2019 uint32_t address = getSlotAddress(insn, idx, i);
2020 Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
2021 if (prog->getType() == Program::TYPE_FRAGMENT) {
2022 int s = 1;
2023 if (typeSizeof(dType) == 8) {
2024 Value *lo = getSSA();
2025 Value *hi = getSSA();
2026 Instruction *interp;
2027
2028 interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
2029 if (nvirOp == OP_PINTERP)
2030 interp->setSrc(s++, fp.position);
2031 if (mode & NV50_IR_INTERP_OFFSET)
2032 interp->setSrc(s++, getSrc(&insn->src[0], 0));
2033 interp->setInterpolate(mode);
2034 interp->setIndirect(0, 0, indirect);
2035
2036 Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
2037 interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
2038 if (nvirOp == OP_PINTERP)
2039 interp->setSrc(s++, fp.position);
2040 if (mode & NV50_IR_INTERP_OFFSET)
2041 interp->setSrc(s++, getSrc(&insn->src[0], 0));
2042 interp->setInterpolate(mode);
2043 interp->setIndirect(0, 0, indirect);
2044
2045 mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
2046 } else {
2047 Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
2048 if (nvirOp == OP_PINTERP)
2049 interp->setSrc(s++, fp.position);
2050 if (mode & NV50_IR_INTERP_OFFSET)
2051 interp->setSrc(s++, getSrc(&insn->src[0], 0));
2052 interp->setInterpolate(mode);
2053 interp->setIndirect(0, 0, indirect);
2054 }
2055 } else {
2056 mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
2057 }
2058 }
2059 break;
2060 }
2061 case nir_intrinsic_load_kernel_input: {
2062 assert(prog->getType() == Program::TYPE_COMPUTE);
2063 assert(insn->num_components == 1);
2064
2065 LValues &newDefs = convert(&insn->dest);
2066 const DataType dType = getDType(insn);
2067 Value *indirect;
2068 uint32_t idx = getIndirect(insn, 0, 0, indirect, true);
2069
2070 mkLoad(dType, newDefs[0], mkSymbol(FILE_SHADER_INPUT, 0, dType, idx), indirect);
2071 break;
2072 }
2073 case nir_intrinsic_load_barycentric_at_offset:
2074 case nir_intrinsic_load_barycentric_at_sample:
2075 case nir_intrinsic_load_barycentric_centroid:
2076 case nir_intrinsic_load_barycentric_pixel:
2077 case nir_intrinsic_load_barycentric_sample: {
2078 LValues &newDefs = convert(&insn->dest);
2079 uint32_t mode;
2080
2081 if (op == nir_intrinsic_load_barycentric_centroid ||
2082 op == nir_intrinsic_load_barycentric_sample) {
2083 mode = NV50_IR_INTERP_CENTROID;
2084 } else if (op == nir_intrinsic_load_barycentric_at_offset) {
2085 Value *offs[2];
2086 for (uint8_t c = 0; c < 2; c++) {
2087 offs[c] = getScratch();
2088 mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
2089 mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
2090 mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
2091 mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
2092 }
2093 mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
2094
2095 mode = NV50_IR_INTERP_OFFSET;
2096 } else if (op == nir_intrinsic_load_barycentric_pixel) {
2097 mode = NV50_IR_INTERP_DEFAULT;
2098 } else if (op == nir_intrinsic_load_barycentric_at_sample) {
2099 info->prop.fp.readsSampleLocations = true;
2100 mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
2101 mode = NV50_IR_INTERP_OFFSET;
2102 } else {
2103 unreachable("all intrinsics already handled above");
2104 }
2105
2106 loadImm(newDefs[1], mode);
2107 break;
2108 }
2109 case nir_intrinsic_discard:
2110 mkOp(OP_DISCARD, TYPE_NONE, NULL);
2111 break;
2112 case nir_intrinsic_discard_if: {
2113 Value *pred = getSSA(1, FILE_PREDICATE);
2114 if (insn->num_components > 1) {
2115 ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
2116 assert(false);
2117 return false;
2118 }
2119 mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2120 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
2121 break;
2122 }
2123 case nir_intrinsic_load_base_vertex:
2124 case nir_intrinsic_load_base_instance:
2125 case nir_intrinsic_load_draw_id:
2126 case nir_intrinsic_load_front_face:
2127 case nir_intrinsic_load_helper_invocation:
2128 case nir_intrinsic_load_instance_id:
2129 case nir_intrinsic_load_invocation_id:
2130 case nir_intrinsic_load_local_group_size:
2131 case nir_intrinsic_load_local_invocation_id:
2132 case nir_intrinsic_load_num_work_groups:
2133 case nir_intrinsic_load_patch_vertices_in:
2134 case nir_intrinsic_load_primitive_id:
2135 case nir_intrinsic_load_sample_id:
2136 case nir_intrinsic_load_sample_mask_in:
2137 case nir_intrinsic_load_sample_pos:
2138 case nir_intrinsic_load_subgroup_eq_mask:
2139 case nir_intrinsic_load_subgroup_ge_mask:
2140 case nir_intrinsic_load_subgroup_gt_mask:
2141 case nir_intrinsic_load_subgroup_le_mask:
2142 case nir_intrinsic_load_subgroup_lt_mask:
2143 case nir_intrinsic_load_subgroup_invocation:
2144 case nir_intrinsic_load_tess_coord:
2145 case nir_intrinsic_load_tess_level_inner:
2146 case nir_intrinsic_load_tess_level_outer:
2147 case nir_intrinsic_load_vertex_id:
2148 case nir_intrinsic_load_work_group_id: {
2149 const DataType dType = getDType(insn);
2150 SVSemantic sv = convert(op);
2151 LValues &newDefs = convert(&insn->dest);
2152
2153 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2154 Value *def;
2155 if (typeSizeof(dType) == 8)
2156 def = getSSA();
2157 else
2158 def = newDefs[i];
2159
2160 if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
2161 loadImm(def, 0u);
2162 } else {
2163 Symbol *sym = mkSysVal(sv, i);
2164 Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
2165 if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
2166 rdsv->perPatch = 1;
2167 }
2168
2169 if (typeSizeof(dType) == 8)
2170 mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
2171 }
2172 break;
2173 }
2174 // constants
2175 case nir_intrinsic_load_subgroup_size: {
2176 LValues &newDefs = convert(&insn->dest);
2177 loadImm(newDefs[0], 32u);
2178 break;
2179 }
2180 case nir_intrinsic_vote_all:
2181 case nir_intrinsic_vote_any:
2182 case nir_intrinsic_vote_ieq: {
2183 LValues &newDefs = convert(&insn->dest);
2184 Value *pred = getScratch(1, FILE_PREDICATE);
2185 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2186 mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
2187 mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
2188 break;
2189 }
2190 case nir_intrinsic_ballot: {
2191 LValues &newDefs = convert(&insn->dest);
2192 Value *pred = getSSA(1, FILE_PREDICATE);
2193 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2194 mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;
2195 break;
2196 }
2197 case nir_intrinsic_read_first_invocation:
2198 case nir_intrinsic_read_invocation: {
2199 LValues &newDefs = convert(&insn->dest);
2200 const DataType dType = getDType(insn);
2201 Value *tmp = getScratch();
2202
2203 if (op == nir_intrinsic_read_first_invocation) {
2204 mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
2205 mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2206 mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2207 } else
2208 tmp = getSrc(&insn->src[1], 0);
2209
2210 for (uint8_t i = 0; i < insn->num_components; ++i) {
2211 mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
2212 ->subOp = NV50_IR_SUBOP_SHFL_IDX;
2213 }
2214 break;
2215 }
2216 case nir_intrinsic_load_per_vertex_input: {
2217 const DataType dType = getDType(insn);
2218 LValues &newDefs = convert(&insn->dest);
2219 Value *indirectVertex;
2220 Value *indirectOffset;
2221 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
2222 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2223
2224 Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
2225 mkImm(baseVertex), indirectVertex);
2226 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2227 uint32_t address = getSlotAddress(insn, idx, i);
2228 loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
2229 indirectOffset, vtxBase, info->in[idx].patch);
2230 }
2231 break;
2232 }
2233 case nir_intrinsic_load_per_vertex_output: {
2234 const DataType dType = getDType(insn);
2235 LValues &newDefs = convert(&insn->dest);
2236 Value *indirectVertex;
2237 Value *indirectOffset;
2238 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
2239 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2240 Value *vtxBase = NULL;
2241
2242 if (indirectVertex)
2243 vtxBase = indirectVertex;
2244 else
2245 vtxBase = loadImm(NULL, baseVertex);
2246
2247 vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase);
2248
2249 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2250 uint32_t address = getSlotAddress(insn, idx, i);
2251 loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
2252 indirectOffset, vtxBase, info->in[idx].patch);
2253 }
2254 break;
2255 }
2256 case nir_intrinsic_emit_vertex:
2257 if (info->io.genUserClip > 0)
2258 handleUserClipPlanes();
2259 // fallthrough
2260 case nir_intrinsic_end_primitive: {
2261 uint32_t idx = nir_intrinsic_stream_id(insn);
2262 mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
2263 break;
2264 }
2265 case nir_intrinsic_load_ubo: {
2266 const DataType dType = getDType(insn);
2267 LValues &newDefs = convert(&insn->dest);
2268 Value *indirectIndex;
2269 Value *indirectOffset;
2270 uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1;
2271 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2272
2273 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2274 loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
2275 indirectOffset, indirectIndex);
2276 }
2277 break;
2278 }
2279 case nir_intrinsic_get_buffer_size: {
2280 LValues &newDefs = convert(&insn->dest);
2281 const DataType dType = getDType(insn);
2282 Value *indirectBuffer;
2283 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2284
2285 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0);
2286 mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, indirectBuffer);
2287 break;
2288 }
2289 case nir_intrinsic_store_ssbo: {
2290 DataType sType = getSType(insn->src[0], false, false);
2291 Value *indirectBuffer;
2292 Value *indirectOffset;
2293 uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer);
2294 uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset);
2295
2296 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2297 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2298 continue;
2299 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
2300 offset + i * typeSizeof(sType));
2301 mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i))
2302 ->setIndirect(0, 1, indirectBuffer);
2303 }
2304 info->io.globalAccess |= 0x2;
2305 break;
2306 }
2307 case nir_intrinsic_load_ssbo: {
2308 const DataType dType = getDType(insn);
2309 LValues &newDefs = convert(&insn->dest);
2310 Value *indirectBuffer;
2311 Value *indirectOffset;
2312 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2313 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2314
2315 for (uint8_t i = 0u; i < insn->num_components; ++i)
2316 loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
2317 indirectOffset, indirectBuffer);
2318
2319 info->io.globalAccess |= 0x1;
2320 break;
2321 }
2322 case nir_intrinsic_shared_atomic_add:
2323 case nir_intrinsic_shared_atomic_and:
2324 case nir_intrinsic_shared_atomic_comp_swap:
2325 case nir_intrinsic_shared_atomic_exchange:
2326 case nir_intrinsic_shared_atomic_or:
2327 case nir_intrinsic_shared_atomic_imax:
2328 case nir_intrinsic_shared_atomic_imin:
2329 case nir_intrinsic_shared_atomic_umax:
2330 case nir_intrinsic_shared_atomic_umin:
2331 case nir_intrinsic_shared_atomic_xor: {
2332 const DataType dType = getDType(insn);
2333 LValues &newDefs = convert(&insn->dest);
2334 Value *indirectOffset;
2335 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2336 Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, dType, offset);
2337 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2338 if (op == nir_intrinsic_shared_atomic_comp_swap)
2339 atom->setSrc(2, getSrc(&insn->src[2], 0));
2340 atom->setIndirect(0, 0, indirectOffset);
2341 atom->subOp = getSubOp(op);
2342 break;
2343 }
2344 case nir_intrinsic_ssbo_atomic_add:
2345 case nir_intrinsic_ssbo_atomic_and:
2346 case nir_intrinsic_ssbo_atomic_comp_swap:
2347 case nir_intrinsic_ssbo_atomic_exchange:
2348 case nir_intrinsic_ssbo_atomic_or:
2349 case nir_intrinsic_ssbo_atomic_imax:
2350 case nir_intrinsic_ssbo_atomic_imin:
2351 case nir_intrinsic_ssbo_atomic_umax:
2352 case nir_intrinsic_ssbo_atomic_umin:
2353 case nir_intrinsic_ssbo_atomic_xor: {
2354 const DataType dType = getDType(insn);
2355 LValues &newDefs = convert(&insn->dest);
2356 Value *indirectBuffer;
2357 Value *indirectOffset;
2358 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2359 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2360
2361 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset);
2362 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym,
2363 getSrc(&insn->src[2], 0));
2364 if (op == nir_intrinsic_ssbo_atomic_comp_swap)
2365 atom->setSrc(2, getSrc(&insn->src[3], 0));
2366 atom->setIndirect(0, 0, indirectOffset);
2367 atom->setIndirect(0, 1, indirectBuffer);
2368 atom->subOp = getSubOp(op);
2369
2370 info->io.globalAccess |= 0x2;
2371 break;
2372 }
2373 case nir_intrinsic_bindless_image_atomic_add:
2374 case nir_intrinsic_bindless_image_atomic_and:
2375 case nir_intrinsic_bindless_image_atomic_comp_swap:
2376 case nir_intrinsic_bindless_image_atomic_exchange:
2377 case nir_intrinsic_bindless_image_atomic_max:
2378 case nir_intrinsic_bindless_image_atomic_min:
2379 case nir_intrinsic_bindless_image_atomic_or:
2380 case nir_intrinsic_bindless_image_atomic_xor:
2381 case nir_intrinsic_bindless_image_load:
2382 case nir_intrinsic_bindless_image_samples:
2383 case nir_intrinsic_bindless_image_size:
2384 case nir_intrinsic_bindless_image_store: {
2385 std::vector<Value*> srcs, defs;
2386 Value *indirect = getSrc(&insn->src[0], 0);
2387 DataType ty;
2388
2389 uint32_t mask = 0;
2390 TexInstruction::Target target =
2391 convert(nir_intrinsic_image_dim(insn), !!nir_intrinsic_image_array(insn), false);
2392 unsigned int argCount = getNIRArgCount(target);
2393 uint16_t location = 0;
2394
2395 if (opInfo.has_dest) {
2396 LValues &newDefs = convert(&insn->dest);
2397 for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2398 defs.push_back(newDefs[i]);
2399 mask |= 1 << i;
2400 }
2401 }
2402
2403 switch (op) {
2404 case nir_intrinsic_bindless_image_atomic_add:
2405 case nir_intrinsic_bindless_image_atomic_and:
2406 case nir_intrinsic_bindless_image_atomic_comp_swap:
2407 case nir_intrinsic_bindless_image_atomic_exchange:
2408 case nir_intrinsic_bindless_image_atomic_max:
2409 case nir_intrinsic_bindless_image_atomic_min:
2410 case nir_intrinsic_bindless_image_atomic_or:
2411 case nir_intrinsic_bindless_image_atomic_xor:
2412 ty = getDType(insn);
2413 mask = 0x1;
2414 info->io.globalAccess |= 0x2;
2415 break;
2416 case nir_intrinsic_bindless_image_load:
2417 ty = TYPE_U32;
2418 info->io.globalAccess |= 0x1;
2419 break;
2420 case nir_intrinsic_bindless_image_store:
2421 ty = TYPE_U32;
2422 mask = 0xf;
2423 info->io.globalAccess |= 0x2;
2424 break;
2425 case nir_intrinsic_bindless_image_samples:
2426 mask = 0x8;
2427 ty = TYPE_U32;
2428 break;
2429 case nir_intrinsic_bindless_image_size:
2430 ty = TYPE_U32;
2431 break;
2432 default:
2433 unreachable("unhandled image opcode");
2434 break;
2435 }
2436
2437 // coords
2438 if (opInfo.num_srcs >= 2)
2439 for (unsigned int i = 0u; i < argCount; ++i)
2440 srcs.push_back(getSrc(&insn->src[1], i));
2441
2442 // the sampler is just another src added after coords
2443 if (opInfo.num_srcs >= 3 && target.isMS())
2444 srcs.push_back(getSrc(&insn->src[2], 0));
2445
2446 if (opInfo.num_srcs >= 4) {
2447 unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2448 for (uint8_t i = 0u; i < components; ++i)
2449 srcs.push_back(getSrc(&insn->src[3], i));
2450 }
2451
2452 if (opInfo.num_srcs >= 5)
2453 // 1 for aotmic swap
2454 for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2455 srcs.push_back(getSrc(&insn->src[4], i));
2456
2457 TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2458 texi->tex.bindless = false;
2459 texi->tex.format = &nv50_ir::TexInstruction::formatTable[convertGLImgFormat(nir_intrinsic_format(insn))];
2460 texi->tex.mask = mask;
2461 texi->tex.bindless = true;
2462 texi->cache = convert(nir_intrinsic_access(insn));
2463 texi->setType(ty);
2464 texi->subOp = getSubOp(op);
2465
2466 if (indirect)
2467 texi->setIndirectR(indirect);
2468
2469 break;
2470 }
2471 case nir_intrinsic_image_deref_atomic_add:
2472 case nir_intrinsic_image_deref_atomic_and:
2473 case nir_intrinsic_image_deref_atomic_comp_swap:
2474 case nir_intrinsic_image_deref_atomic_exchange:
2475 case nir_intrinsic_image_deref_atomic_max:
2476 case nir_intrinsic_image_deref_atomic_min:
2477 case nir_intrinsic_image_deref_atomic_or:
2478 case nir_intrinsic_image_deref_atomic_xor:
2479 case nir_intrinsic_image_deref_load:
2480 case nir_intrinsic_image_deref_samples:
2481 case nir_intrinsic_image_deref_size:
2482 case nir_intrinsic_image_deref_store: {
2483 const nir_variable *tex;
2484 std::vector<Value*> srcs, defs;
2485 Value *indirect;
2486 DataType ty;
2487
2488 uint32_t mask = 0;
2489 nir_deref_instr *deref = nir_src_as_deref(insn->src[0]);
2490 const glsl_type *type = deref->type;
2491 TexInstruction::Target target =
2492 convert((glsl_sampler_dim)type->sampler_dimensionality,
2493 type->sampler_array, type->sampler_shadow);
2494 unsigned int argCount = getNIRArgCount(target);
2495 uint16_t location = handleDeref(deref, indirect, tex);
2496
2497 if (opInfo.has_dest) {
2498 LValues &newDefs = convert(&insn->dest);
2499 for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2500 defs.push_back(newDefs[i]);
2501 mask |= 1 << i;
2502 }
2503 }
2504
2505 switch (op) {
2506 case nir_intrinsic_image_deref_atomic_add:
2507 case nir_intrinsic_image_deref_atomic_and:
2508 case nir_intrinsic_image_deref_atomic_comp_swap:
2509 case nir_intrinsic_image_deref_atomic_exchange:
2510 case nir_intrinsic_image_deref_atomic_max:
2511 case nir_intrinsic_image_deref_atomic_min:
2512 case nir_intrinsic_image_deref_atomic_or:
2513 case nir_intrinsic_image_deref_atomic_xor:
2514 ty = getDType(insn);
2515 mask = 0x1;
2516 info->io.globalAccess |= 0x2;
2517 break;
2518 case nir_intrinsic_image_deref_load:
2519 ty = TYPE_U32;
2520 info->io.globalAccess |= 0x1;
2521 break;
2522 case nir_intrinsic_image_deref_store:
2523 ty = TYPE_U32;
2524 mask = 0xf;
2525 info->io.globalAccess |= 0x2;
2526 break;
2527 case nir_intrinsic_image_deref_samples:
2528 mask = 0x8;
2529 ty = TYPE_U32;
2530 break;
2531 case nir_intrinsic_image_deref_size:
2532 ty = TYPE_U32;
2533 break;
2534 default:
2535 unreachable("unhandled image opcode");
2536 break;
2537 }
2538
2539 // coords
2540 if (opInfo.num_srcs >= 2)
2541 for (unsigned int i = 0u; i < argCount; ++i)
2542 srcs.push_back(getSrc(&insn->src[1], i));
2543
2544 // the sampler is just another src added after coords
2545 if (opInfo.num_srcs >= 3 && target.isMS())
2546 srcs.push_back(getSrc(&insn->src[2], 0));
2547
2548 if (opInfo.num_srcs >= 4) {
2549 unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2550 for (uint8_t i = 0u; i < components; ++i)
2551 srcs.push_back(getSrc(&insn->src[3], i));
2552 }
2553
2554 if (opInfo.num_srcs >= 5)
2555 // 1 for aotmic swap
2556 for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2557 srcs.push_back(getSrc(&insn->src[4], i));
2558
2559 TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2560 texi->tex.bindless = false;
2561 texi->tex.format = &nv50_ir::TexInstruction::formatTable[convertGLImgFormat(tex->data.image.format)];
2562 texi->tex.mask = mask;
2563 texi->cache = getCacheModeFromVar(tex);
2564 texi->setType(ty);
2565 texi->subOp = getSubOp(op);
2566
2567 if (indirect)
2568 texi->setIndirectR(indirect);
2569
2570 break;
2571 }
2572 case nir_intrinsic_store_shared: {
2573 DataType sType = getSType(insn->src[0], false, false);
2574 Value *indirectOffset;
2575 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2576
2577 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2578 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2579 continue;
2580 Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, sType, offset + i * typeSizeof(sType));
2581 mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i));
2582 }
2583 break;
2584 }
2585 case nir_intrinsic_load_shared: {
2586 const DataType dType = getDType(insn);
2587 LValues &newDefs = convert(&insn->dest);
2588 Value *indirectOffset;
2589 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2590
2591 for (uint8_t i = 0u; i < insn->num_components; ++i)
2592 loadFrom(FILE_MEMORY_SHARED, 0, dType, newDefs[i], offset, i, indirectOffset);
2593
2594 break;
2595 }
2596 case nir_intrinsic_barrier: {
2597 // TODO: add flag to shader_info
2598 info->numBarriers = 1;
2599 Instruction *bar = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
2600 bar->fixed = 1;
2601 bar->subOp = NV50_IR_SUBOP_BAR_SYNC;
2602 break;
2603 }
2604 case nir_intrinsic_group_memory_barrier:
2605 case nir_intrinsic_memory_barrier:
2606 case nir_intrinsic_memory_barrier_atomic_counter:
2607 case nir_intrinsic_memory_barrier_buffer:
2608 case nir_intrinsic_memory_barrier_image:
2609 case nir_intrinsic_memory_barrier_shared: {
2610 Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
2611 bar->fixed = 1;
2612 bar->subOp = getSubOp(op);
2613 break;
2614 }
2615 case nir_intrinsic_shader_clock: {
2616 const DataType dType = getDType(insn);
2617 LValues &newDefs = convert(&insn->dest);
2618
2619 loadImm(newDefs[0], 0u);
2620 mkOp1(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
2621 break;
2622 }
2623 case nir_intrinsic_load_global: {
2624 const DataType dType = getDType(insn);
2625 LValues &newDefs = convert(&insn->dest);
2626 Value *indirectOffset;
2627 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2628
2629 for (auto i = 0u; i < insn->num_components; ++i)
2630 loadFrom(FILE_MEMORY_GLOBAL, 0, dType, newDefs[i], offset, i, indirectOffset);
2631
2632 info->io.globalAccess |= 0x1;
2633 break;
2634 }
2635 case nir_intrinsic_store_global: {
2636 DataType sType = getSType(insn->src[0], false, false);
2637
2638 for (auto i = 0u; i < insn->num_components; ++i) {
2639 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2640 continue;
2641 if (typeSizeof(sType) == 8) {
2642 Value *split[2];
2643 mkSplit(split, 4, getSrc(&insn->src[0], i));
2644
2645 Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType));
2646 mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[0]);
2647
2648 sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType) + 4);
2649 mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[1]);
2650 } else {
2651 Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, sType, i * typeSizeof(sType));
2652 mkStore(OP_STORE, sType, sym, getSrc(&insn->src[1], 0), getSrc(&insn->src[0], i));
2653 }
2654 }
2655
2656 info->io.globalAccess |= 0x2;
2657 break;
2658 }
2659 default:
2660 ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
2661 return false;
2662 }
2663
2664 return true;
2665 }
2666
2667 bool
2668 Converter::visit(nir_jump_instr *insn)
2669 {
2670 switch (insn->type) {
2671 case nir_jump_return:
2672 // TODO: this only works in the main function
2673 mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
2674 bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
2675 break;
2676 case nir_jump_break:
2677 case nir_jump_continue: {
2678 bool isBreak = insn->type == nir_jump_break;
2679 nir_block *block = insn->instr.block;
2680 assert(!block->successors[1]);
2681 BasicBlock *target = convert(block->successors[0]);
2682 mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
2683 bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
2684 break;
2685 }
2686 default:
2687 ERROR("unknown nir_jump_type %u\n", insn->type);
2688 return false;
2689 }
2690
2691 return true;
2692 }
2693
2694 Value*
2695 Converter::convert(nir_load_const_instr *insn, uint8_t idx)
2696 {
2697 Value *val;
2698
2699 if (immInsertPos)
2700 setPosition(immInsertPos, true);
2701 else
2702 setPosition(bb, false);
2703
2704 switch (insn->def.bit_size) {
2705 case 64:
2706 val = loadImm(getSSA(8), insn->value[idx].u64);
2707 break;
2708 case 32:
2709 val = loadImm(getSSA(4), insn->value[idx].u32);
2710 break;
2711 case 16:
2712 val = loadImm(getSSA(2), insn->value[idx].u16);
2713 break;
2714 case 8:
2715 val = loadImm(getSSA(1), insn->value[idx].u8);
2716 break;
2717 default:
2718 unreachable("unhandled bit size!\n");
2719 }
2720 setPosition(bb, true);
2721 return val;
2722 }
2723
2724 bool
2725 Converter::visit(nir_load_const_instr *insn)
2726 {
2727 assert(insn->def.bit_size <= 64);
2728 immediates[insn->def.index] = insn;
2729 return true;
2730 }
2731
2732 #define DEFAULT_CHECKS \
2733 if (insn->dest.dest.ssa.num_components > 1) { \
2734 ERROR("nir_alu_instr only supported with 1 component!\n"); \
2735 return false; \
2736 } \
2737 if (insn->dest.write_mask != 1) { \
2738 ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2739 return false; \
2740 }
2741 bool
2742 Converter::visit(nir_alu_instr *insn)
2743 {
2744 const nir_op op = insn->op;
2745 const nir_op_info &info = nir_op_infos[op];
2746 DataType dType = getDType(insn);
2747 const std::vector<DataType> sTypes = getSTypes(insn);
2748
2749 Instruction *oldPos = this->bb->getExit();
2750
2751 switch (op) {
2752 case nir_op_fabs:
2753 case nir_op_iabs:
2754 case nir_op_fadd:
2755 case nir_op_iadd:
2756 case nir_op_iand:
2757 case nir_op_fceil:
2758 case nir_op_fcos:
2759 case nir_op_fddx:
2760 case nir_op_fddx_coarse:
2761 case nir_op_fddx_fine:
2762 case nir_op_fddy:
2763 case nir_op_fddy_coarse:
2764 case nir_op_fddy_fine:
2765 case nir_op_fdiv:
2766 case nir_op_idiv:
2767 case nir_op_udiv:
2768 case nir_op_fexp2:
2769 case nir_op_ffloor:
2770 case nir_op_ffma:
2771 case nir_op_flog2:
2772 case nir_op_fmax:
2773 case nir_op_imax:
2774 case nir_op_umax:
2775 case nir_op_fmin:
2776 case nir_op_imin:
2777 case nir_op_umin:
2778 case nir_op_fmod:
2779 case nir_op_imod:
2780 case nir_op_umod:
2781 case nir_op_fmul:
2782 case nir_op_imul:
2783 case nir_op_imul_high:
2784 case nir_op_umul_high:
2785 case nir_op_fneg:
2786 case nir_op_ineg:
2787 case nir_op_inot:
2788 case nir_op_ior:
2789 case nir_op_pack_64_2x32_split:
2790 case nir_op_fpow:
2791 case nir_op_frcp:
2792 case nir_op_frem:
2793 case nir_op_irem:
2794 case nir_op_frsq:
2795 case nir_op_fsat:
2796 case nir_op_ishr:
2797 case nir_op_ushr:
2798 case nir_op_fsin:
2799 case nir_op_fsqrt:
2800 case nir_op_fsub:
2801 case nir_op_isub:
2802 case nir_op_ftrunc:
2803 case nir_op_ishl:
2804 case nir_op_ixor: {
2805 DEFAULT_CHECKS;
2806 LValues &newDefs = convert(&insn->dest);
2807 operation preOp = preOperationNeeded(op);
2808 if (preOp != OP_NOP) {
2809 assert(info.num_inputs < 2);
2810 Value *tmp = getSSA(typeSizeof(dType));
2811 Instruction *i0 = mkOp(preOp, dType, tmp);
2812 Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2813 if (info.num_inputs) {
2814 i0->setSrc(0, getSrc(&insn->src[0]));
2815 i1->setSrc(0, tmp);
2816 }
2817 i1->subOp = getSubOp(op);
2818 } else {
2819 Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2820 for (unsigned s = 0u; s < info.num_inputs; ++s) {
2821 i->setSrc(s, getSrc(&insn->src[s]));
2822 }
2823 i->subOp = getSubOp(op);
2824 }
2825 break;
2826 }
2827 case nir_op_ifind_msb:
2828 case nir_op_ufind_msb: {
2829 DEFAULT_CHECKS;
2830 LValues &newDefs = convert(&insn->dest);
2831 dType = sTypes[0];
2832 mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2833 break;
2834 }
2835 case nir_op_fround_even: {
2836 DEFAULT_CHECKS;
2837 LValues &newDefs = convert(&insn->dest);
2838 mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2839 break;
2840 }
2841 // convert instructions
2842 case nir_op_f2f32:
2843 case nir_op_f2i32:
2844 case nir_op_f2u32:
2845 case nir_op_i2f32:
2846 case nir_op_i2i32:
2847 case nir_op_u2f32:
2848 case nir_op_u2u32:
2849 case nir_op_f2f64:
2850 case nir_op_f2i64:
2851 case nir_op_f2u64:
2852 case nir_op_i2f64:
2853 case nir_op_i2i64:
2854 case nir_op_u2f64:
2855 case nir_op_u2u64: {
2856 DEFAULT_CHECKS;
2857 LValues &newDefs = convert(&insn->dest);
2858 Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2859 if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
2860 i->rnd = ROUND_Z;
2861 i->sType = sTypes[0];
2862 break;
2863 }
2864 // compare instructions
2865 case nir_op_feq32:
2866 case nir_op_ieq32:
2867 case nir_op_fge32:
2868 case nir_op_ige32:
2869 case nir_op_uge32:
2870 case nir_op_flt32:
2871 case nir_op_ilt32:
2872 case nir_op_ult32:
2873 case nir_op_fne32:
2874 case nir_op_ine32: {
2875 DEFAULT_CHECKS;
2876 LValues &newDefs = convert(&insn->dest);
2877 Instruction *i = mkCmp(getOperation(op),
2878 getCondCode(op),
2879 dType,
2880 newDefs[0],
2881 dType,
2882 getSrc(&insn->src[0]),
2883 getSrc(&insn->src[1]));
2884 if (info.num_inputs == 3)
2885 i->setSrc(2, getSrc(&insn->src[2]));
2886 i->sType = sTypes[0];
2887 break;
2888 }
2889 // those are weird ALU ops and need special handling, because
2890 // 1. they are always componend based
2891 // 2. they basically just merge multiple values into one data type
2892 case nir_op_mov:
2893 if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) {
2894 nir_reg_dest& reg = insn->dest.dest.reg;
2895 uint32_t goffset = regToLmemOffset[reg.reg->index];
2896 uint8_t comps = reg.reg->num_components;
2897 uint8_t size = reg.reg->bit_size / 8;
2898 uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2899 uint32_t aoffset = csize * reg.base_offset;
2900 Value *indirect = NULL;
2901
2902 if (reg.indirect)
2903 indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS),
2904 getSrc(reg.indirect, 0), mkImm(csize));
2905
2906 for (uint8_t i = 0u; i < comps; ++i) {
2907 if (!((1u << i) & insn->dest.write_mask))
2908 continue;
2909
2910 Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + aoffset + i * size);
2911 mkStore(OP_STORE, dType, sym, indirect, getSrc(&insn->src[0], i));
2912 }
2913 break;
2914 } else if (!insn->src[0].src.is_ssa && insn->src[0].src.reg.reg->num_array_elems) {
2915 LValues &newDefs = convert(&insn->dest);
2916 nir_reg_src& reg = insn->src[0].src.reg;
2917 uint32_t goffset = regToLmemOffset[reg.reg->index];
2918 // uint8_t comps = reg.reg->num_components;
2919 uint8_t size = reg.reg->bit_size / 8;
2920 uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2921 uint32_t aoffset = csize * reg.base_offset;
2922 Value *indirect = NULL;
2923
2924 if (reg.indirect)
2925 indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), getSrc(reg.indirect, 0), mkImm(csize));
2926
2927 for (uint8_t i = 0u; i < newDefs.size(); ++i)
2928 loadFrom(FILE_MEMORY_LOCAL, 0, dType, newDefs[i], goffset + aoffset, i, indirect);
2929
2930 break;
2931 } else {
2932 LValues &newDefs = convert(&insn->dest);
2933 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2934 mkMov(newDefs[c], getSrc(&insn->src[0], c), dType);
2935 }
2936 }
2937 break;
2938 case nir_op_vec2:
2939 case nir_op_vec3:
2940 case nir_op_vec4: {
2941 LValues &newDefs = convert(&insn->dest);
2942 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2943 mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2944 }
2945 break;
2946 }
2947 // (un)pack
2948 case nir_op_pack_64_2x32: {
2949 LValues &newDefs = convert(&insn->dest);
2950 Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2951 merge->setSrc(0, getSrc(&insn->src[0], 0));
2952 merge->setSrc(1, getSrc(&insn->src[0], 1));
2953 break;
2954 }
2955 case nir_op_pack_half_2x16_split: {
2956 LValues &newDefs = convert(&insn->dest);
2957 Value *tmpH = getSSA();
2958 Value *tmpL = getSSA();
2959
2960 mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2961 mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2962 mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2963 break;
2964 }
2965 case nir_op_unpack_half_2x16_split_x:
2966 case nir_op_unpack_half_2x16_split_y: {
2967 LValues &newDefs = convert(&insn->dest);
2968 Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2969 if (op == nir_op_unpack_half_2x16_split_y)
2970 cvt->subOp = 1;
2971 break;
2972 }
2973 case nir_op_unpack_64_2x32: {
2974 LValues &newDefs = convert(&insn->dest);
2975 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2976 break;
2977 }
2978 case nir_op_unpack_64_2x32_split_x: {
2979 LValues &newDefs = convert(&insn->dest);
2980 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
2981 break;
2982 }
2983 case nir_op_unpack_64_2x32_split_y: {
2984 LValues &newDefs = convert(&insn->dest);
2985 mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
2986 break;
2987 }
2988 // special instructions
2989 case nir_op_fsign:
2990 case nir_op_isign: {
2991 DEFAULT_CHECKS;
2992 DataType iType;
2993 if (::isFloatType(dType))
2994 iType = TYPE_F32;
2995 else
2996 iType = TYPE_S32;
2997
2998 LValues &newDefs = convert(&insn->dest);
2999 LValue *val0 = getScratch();
3000 LValue *val1 = getScratch();
3001 mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
3002 mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
3003
3004 if (dType == TYPE_F64) {
3005 mkOp2(OP_SUB, iType, val0, val0, val1);
3006 mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
3007 } else if (dType == TYPE_S64 || dType == TYPE_U64) {
3008 mkOp2(OP_SUB, iType, val0, val1, val0);
3009 mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
3010 mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
3011 } else if (::isFloatType(dType))
3012 mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
3013 else
3014 mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
3015 break;
3016 }
3017 case nir_op_fcsel:
3018 case nir_op_b32csel: {
3019 DEFAULT_CHECKS;
3020 LValues &newDefs = convert(&insn->dest);
3021 mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
3022 break;
3023 }
3024 case nir_op_ibitfield_extract:
3025 case nir_op_ubitfield_extract: {
3026 DEFAULT_CHECKS;
3027 Value *tmp = getSSA();
3028 LValues &newDefs = convert(&insn->dest);
3029 mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
3030 mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
3031 break;
3032 }
3033 case nir_op_bfm: {
3034 DEFAULT_CHECKS;
3035 LValues &newDefs = convert(&insn->dest);
3036 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
3037 break;
3038 }
3039 case nir_op_bitfield_insert: {
3040 DEFAULT_CHECKS;
3041 LValues &newDefs = convert(&insn->dest);
3042 LValue *temp = getSSA();
3043 mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
3044 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
3045 break;
3046 }
3047 case nir_op_bit_count: {
3048 DEFAULT_CHECKS;
3049 LValues &newDefs = convert(&insn->dest);
3050 mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
3051 break;
3052 }
3053 case nir_op_bitfield_reverse: {
3054 DEFAULT_CHECKS;
3055 LValues &newDefs = convert(&insn->dest);
3056 mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
3057 break;
3058 }
3059 case nir_op_find_lsb: {
3060 DEFAULT_CHECKS;
3061 LValues &newDefs = convert(&insn->dest);
3062 Value *tmp = getSSA();
3063 mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
3064 mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
3065 break;
3066 }
3067 // boolean conversions
3068 case nir_op_b2f32: {
3069 DEFAULT_CHECKS;
3070 LValues &newDefs = convert(&insn->dest);
3071 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
3072 break;
3073 }
3074 case nir_op_b2f64: {
3075 DEFAULT_CHECKS;
3076 LValues &newDefs = convert(&insn->dest);
3077 Value *tmp = getSSA(4);
3078 mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
3079 mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
3080 break;
3081 }
3082 case nir_op_f2b32:
3083 case nir_op_i2b32: {
3084 DEFAULT_CHECKS;
3085 LValues &newDefs = convert(&insn->dest);
3086 Value *src1;
3087 if (typeSizeof(sTypes[0]) == 8) {
3088 src1 = loadImm(getSSA(8), 0.0);
3089 } else {
3090 src1 = zero;
3091 }
3092 CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
3093 mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
3094 break;
3095 }
3096 case nir_op_b2i32: {
3097 DEFAULT_CHECKS;
3098 LValues &newDefs = convert(&insn->dest);
3099 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
3100 break;
3101 }
3102 case nir_op_b2i64: {
3103 DEFAULT_CHECKS;
3104 LValues &newDefs = convert(&insn->dest);
3105 LValue *def = getScratch();
3106 mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
3107 mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
3108 break;
3109 }
3110 default:
3111 ERROR("unknown nir_op %s\n", info.name);
3112 return false;
3113 }
3114
3115 if (!oldPos) {
3116 oldPos = this->bb->getEntry();
3117 oldPos->precise = insn->exact;
3118 }
3119
3120 if (unlikely(!oldPos))
3121 return true;
3122
3123 while (oldPos->next) {
3124 oldPos = oldPos->next;
3125 oldPos->precise = insn->exact;
3126 }
3127 oldPos->saturate = insn->dest.saturate;
3128
3129 return true;
3130 }
3131 #undef DEFAULT_CHECKS
3132
3133 bool
3134 Converter::visit(nir_ssa_undef_instr *insn)
3135 {
3136 LValues &newDefs = convert(&insn->def);
3137 for (uint8_t i = 0u; i < insn->def.num_components; ++i) {
3138 mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
3139 }
3140 return true;
3141 }
3142
3143 #define CASE_SAMPLER(ty) \
3144 case GLSL_SAMPLER_DIM_ ## ty : \
3145 if (isArray && !isShadow) \
3146 return TEX_TARGET_ ## ty ## _ARRAY; \
3147 else if (!isArray && isShadow) \
3148 return TEX_TARGET_## ty ## _SHADOW; \
3149 else if (isArray && isShadow) \
3150 return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
3151 else \
3152 return TEX_TARGET_ ## ty
3153
3154 TexTarget
3155 Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
3156 {
3157 switch (dim) {
3158 CASE_SAMPLER(1D);
3159 CASE_SAMPLER(2D);
3160 CASE_SAMPLER(CUBE);
3161 case GLSL_SAMPLER_DIM_3D:
3162 return TEX_TARGET_3D;
3163 case GLSL_SAMPLER_DIM_MS:
3164 if (isArray)
3165 return TEX_TARGET_2D_MS_ARRAY;
3166 return TEX_TARGET_2D_MS;
3167 case GLSL_SAMPLER_DIM_RECT:
3168 if (isShadow)
3169 return TEX_TARGET_RECT_SHADOW;
3170 return TEX_TARGET_RECT;
3171 case GLSL_SAMPLER_DIM_BUF:
3172 return TEX_TARGET_BUFFER;
3173 case GLSL_SAMPLER_DIM_EXTERNAL:
3174 return TEX_TARGET_2D;
3175 default:
3176 ERROR("unknown glsl_sampler_dim %u\n", dim);
3177 assert(false);
3178 return TEX_TARGET_COUNT;
3179 }
3180 }
3181 #undef CASE_SAMPLER
3182
3183 Value*
3184 Converter::applyProjection(Value *src, Value *proj)
3185 {
3186 if (!proj)
3187 return src;
3188 return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj);
3189 }
3190
3191 unsigned int
3192 Converter::getNIRArgCount(TexInstruction::Target& target)
3193 {
3194 unsigned int result = target.getArgCount();
3195 if (target.isCube() && target.isArray())
3196 result--;
3197 if (target.isMS())
3198 result--;
3199 return result;
3200 }
3201
3202 uint16_t
3203 Converter::handleDeref(nir_deref_instr *deref, Value * &indirect, const nir_variable * &tex)
3204 {
3205 typedef std::pair<uint32_t,Value*> DerefPair;
3206 std::list<DerefPair> derefs;
3207
3208 uint16_t result = 0;
3209 while (deref->deref_type != nir_deref_type_var) {
3210 switch (deref->deref_type) {
3211 case nir_deref_type_array: {
3212 Value *indirect;
3213 uint8_t size = type_size(deref->type, true);
3214 result += size * getIndirect(&deref->arr.index, 0, indirect);
3215
3216 if (indirect) {
3217 derefs.push_front(std::make_pair(size, indirect));
3218 }
3219
3220 break;
3221 }
3222 case nir_deref_type_struct: {
3223 result += nir_deref_instr_parent(deref)->type->struct_location_offset(deref->strct.index);
3224 break;
3225 }
3226 case nir_deref_type_var:
3227 default:
3228 unreachable("nir_deref_type_var reached in handleDeref!");
3229 break;
3230 }
3231 deref = nir_deref_instr_parent(deref);
3232 }
3233
3234 indirect = NULL;
3235 for (std::list<DerefPair>::const_iterator it = derefs.begin(); it != derefs.end(); ++it) {
3236 Value *offset = mkOp2v(OP_MUL, TYPE_U32, getSSA(), loadImm(getSSA(), it->first), it->second);
3237 if (indirect)
3238 indirect = mkOp2v(OP_ADD, TYPE_U32, getSSA(), indirect, offset);
3239 else
3240 indirect = offset;
3241 }
3242
3243 tex = nir_deref_instr_get_variable(deref);
3244 assert(tex);
3245
3246 return result + tex->data.driver_location;
3247 }
3248
3249 CacheMode
3250 Converter::convert(enum gl_access_qualifier access)
3251 {
3252 switch (access) {
3253 case ACCESS_VOLATILE:
3254 return CACHE_CV;
3255 case ACCESS_COHERENT:
3256 return CACHE_CG;
3257 default:
3258 return CACHE_CA;
3259 }
3260 }
3261
3262 CacheMode
3263 Converter::getCacheModeFromVar(const nir_variable *var)
3264 {
3265 return convert(var->data.image.access);
3266 }
3267
3268 bool
3269 Converter::visit(nir_tex_instr *insn)
3270 {
3271 switch (insn->op) {
3272 case nir_texop_lod:
3273 case nir_texop_query_levels:
3274 case nir_texop_tex:
3275 case nir_texop_texture_samples:
3276 case nir_texop_tg4:
3277 case nir_texop_txb:
3278 case nir_texop_txd:
3279 case nir_texop_txf:
3280 case nir_texop_txf_ms:
3281 case nir_texop_txl:
3282 case nir_texop_txs: {
3283 LValues &newDefs = convert(&insn->dest);
3284 std::vector<Value*> srcs;
3285 std::vector<Value*> defs;
3286 std::vector<nir_src*> offsets;
3287 uint8_t mask = 0;
3288 bool lz = false;
3289 Value *proj = NULL;
3290 TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow);
3291 operation op = getOperation(insn->op);
3292
3293 int r, s;
3294 int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias);
3295 int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator);
3296 int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord);
3297 int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx);
3298 int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy);
3299 int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index);
3300 int lodIdx = nir_tex_instr_src_index(insn, nir_tex_src_lod);
3301 int offsetIdx = nir_tex_instr_src_index(insn, nir_tex_src_offset);
3302 int projIdx = nir_tex_instr_src_index(insn, nir_tex_src_projector);
3303 int sampOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_offset);
3304 int texOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_offset);
3305 int sampHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_handle);
3306 int texHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_handle);
3307
3308 bool bindless = sampHandleIdx != -1 || texHandleIdx != -1;
3309 assert((sampHandleIdx != -1) == (texHandleIdx != -1));
3310
3311 if (projIdx != -1)
3312 proj = mkOp1v(OP_RCP, TYPE_F32, getScratch(), getSrc(&insn->src[projIdx].src, 0));
3313
3314 srcs.resize(insn->coord_components);
3315 for (uint8_t i = 0u; i < insn->coord_components; ++i)
3316 srcs[i] = applyProjection(getSrc(&insn->src[coordsIdx].src, i), proj);
3317
3318 // sometimes we get less args than target.getArgCount, but codegen expects the latter
3319 if (insn->coord_components) {
3320 uint32_t argCount = target.getArgCount();
3321
3322 if (target.isMS())
3323 argCount -= 1;
3324
3325 for (uint32_t i = 0u; i < (argCount - insn->coord_components); ++i)
3326 srcs.push_back(getSSA());
3327 }
3328
3329 if (insn->op == nir_texop_texture_samples)
3330 srcs.push_back(zero);
3331 else if (!insn->num_srcs)
3332 srcs.push_back(loadImm(NULL, 0));
3333 if (biasIdx != -1)
3334 srcs.push_back(getSrc(&insn->src[biasIdx].src, 0));
3335 if (lodIdx != -1)
3336 srcs.push_back(getSrc(&insn->src[lodIdx].src, 0));
3337 else if (op == OP_TXF)
3338 lz = true;
3339 if (msIdx != -1)
3340 srcs.push_back(getSrc(&insn->src[msIdx].src, 0));
3341 if (offsetIdx != -1)
3342 offsets.push_back(&insn->src[offsetIdx].src);
3343 if (compIdx != -1)
3344 srcs.push_back(applyProjection(getSrc(&insn->src[compIdx].src, 0), proj));
3345 if (texOffIdx != -1) {
3346 srcs.push_back(getSrc(&insn->src[texOffIdx].src, 0));
3347 texOffIdx = srcs.size() - 1;
3348 }
3349 if (sampOffIdx != -1) {
3350 srcs.push_back(getSrc(&insn->src[sampOffIdx].src, 0));
3351 sampOffIdx = srcs.size() - 1;
3352 }
3353 if (bindless) {
3354 // currently we use the lower bits
3355 Value *split[2];
3356 Value *handle = getSrc(&insn->src[sampHandleIdx].src, 0);
3357
3358 mkSplit(split, 4, handle);
3359
3360 srcs.push_back(split[0]);
3361 texOffIdx = srcs.size() - 1;
3362 }
3363
3364 r = bindless ? 0xff : insn->texture_index;
3365 s = bindless ? 0x1f : insn->sampler_index;
3366
3367 defs.resize(newDefs.size());
3368 for (uint8_t d = 0u; d < newDefs.size(); ++d) {
3369 defs[d] = newDefs[d];
3370 mask |= 1 << d;
3371 }
3372 if (target.isMS() || (op == OP_TEX && prog->getType() != Program::TYPE_FRAGMENT))
3373 lz = true;
3374
3375 TexInstruction *texi = mkTex(op, target.getEnum(), r, s, defs, srcs);
3376 texi->tex.levelZero = lz;
3377 texi->tex.mask = mask;
3378 texi->tex.bindless = bindless;
3379
3380 if (texOffIdx != -1)
3381 texi->tex.rIndirectSrc = texOffIdx;
3382 if (sampOffIdx != -1)
3383 texi->tex.sIndirectSrc = sampOffIdx;
3384
3385 switch (insn->op) {
3386 case nir_texop_tg4:
3387 if (!target.isShadow())
3388 texi->tex.gatherComp = insn->component;
3389 break;
3390 case nir_texop_txs:
3391 texi->tex.query = TXQ_DIMS;
3392 break;
3393 case nir_texop_texture_samples:
3394 texi->tex.mask = 0x4;
3395 texi->tex.query = TXQ_TYPE;
3396 break;
3397 case nir_texop_query_levels:
3398 texi->tex.mask = 0x8;
3399 texi->tex.query = TXQ_DIMS;
3400 break;
3401 default:
3402 break;
3403 }
3404
3405 texi->tex.useOffsets = offsets.size();
3406 if (texi->tex.useOffsets) {
3407 for (uint8_t s = 0; s < texi->tex.useOffsets; ++s) {
3408 for (uint32_t c = 0u; c < 3; ++c) {
3409 uint8_t s2 = std::min(c, target.getDim() - 1);
3410 texi->offset[s][c].set(getSrc(offsets[s], s2));
3411 texi->offset[s][c].setInsn(texi);
3412 }
3413 }
3414 }
3415
3416 if (op == OP_TXG && offsetIdx == -1) {
3417 if (nir_tex_instr_has_explicit_tg4_offsets(insn)) {
3418 texi->tex.useOffsets = 4;
3419 setPosition(texi, false);
3420 for (uint8_t i = 0; i < 4; ++i) {
3421 for (uint8_t j = 0; j < 2; ++j) {
3422 texi->offset[i][j].set(loadImm(NULL, insn->tg4_offsets[i][j]));
3423 texi->offset[i][j].setInsn(texi);
3424 }
3425 }
3426 setPosition(texi, true);
3427 }
3428 }
3429
3430 if (ddxIdx != -1 && ddyIdx != -1) {
3431 for (uint8_t c = 0u; c < target.getDim() + target.isCube(); ++c) {
3432 texi->dPdx[c].set(getSrc(&insn->src[ddxIdx].src, c));
3433 texi->dPdy[c].set(getSrc(&insn->src[ddyIdx].src, c));
3434 }
3435 }
3436
3437 break;
3438 }
3439 default:
3440 ERROR("unknown nir_texop %u\n", insn->op);
3441 return false;
3442 }
3443 return true;
3444 }
3445
3446 bool
3447 Converter::visit(nir_deref_instr *deref)
3448 {
3449 // we just ignore those, because images intrinsics are the only place where
3450 // we should end up with deref sources and those have to backtrack anyway
3451 // to get the nir_variable. This code just exists to handle some special
3452 // cases.
3453 switch (deref->deref_type) {
3454 case nir_deref_type_array:
3455 case nir_deref_type_struct:
3456 case nir_deref_type_var:
3457 break;
3458 default:
3459 ERROR("unknown nir_deref_instr %u\n", deref->deref_type);
3460 return false;
3461 }
3462 return true;
3463 }
3464
3465 bool
3466 Converter::run()
3467 {
3468 bool progress;
3469
3470 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
3471 nir_print_shader(nir, stderr);
3472
3473 struct nir_lower_subgroups_options subgroup_options = {
3474 .subgroup_size = 32,
3475 .ballot_bit_size = 32,
3476 };
3477
3478 NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
3479 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
3480 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
3481 NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
3482 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3483 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL);
3484 NIR_PASS_V(nir, nir_lower_phis_to_scalar);
3485
3486 do {
3487 progress = false;
3488 NIR_PASS(progress, nir, nir_copy_prop);
3489 NIR_PASS(progress, nir, nir_opt_remove_phis);
3490 NIR_PASS(progress, nir, nir_opt_trivial_continues);
3491 NIR_PASS(progress, nir, nir_opt_cse);
3492 NIR_PASS(progress, nir, nir_opt_algebraic);
3493 NIR_PASS(progress, nir, nir_opt_constant_folding);
3494 NIR_PASS(progress, nir, nir_copy_prop);
3495 NIR_PASS(progress, nir, nir_opt_dce);
3496 NIR_PASS(progress, nir, nir_opt_dead_cf);
3497 } while (progress);
3498
3499 NIR_PASS_V(nir, nir_lower_bool_to_int32);
3500 NIR_PASS_V(nir, nir_lower_locals_to_regs);
3501 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
3502 NIR_PASS_V(nir, nir_convert_from_ssa, true);
3503
3504 // Garbage collect dead instructions
3505 nir_sweep(nir);
3506
3507 if (!parseNIR()) {
3508 ERROR("Couldn't prase NIR!\n");
3509 return false;
3510 }
3511
3512 if (!assignSlots()) {
3513 ERROR("Couldn't assign slots!\n");
3514 return false;
3515 }
3516
3517 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
3518 nir_print_shader(nir, stderr);
3519
3520 nir_foreach_function(function, nir) {
3521 if (!visit(function))
3522 return false;
3523 }
3524
3525 return true;
3526 }
3527
3528 } // unnamed namespace
3529
3530 namespace nv50_ir {
3531
3532 bool
3533 Program::makeFromNIR(struct nv50_ir_prog_info *info)
3534 {
3535 nir_shader *nir = (nir_shader*)info->bin.source;
3536 Converter converter(this, nir, info);
3537 bool result = converter.run();
3538 if (!result)
3539 return result;
3540 LoweringHelper lowering;
3541 lowering.run(this);
3542 tlsSize = info->bin.tlsSpace;
3543 return result;
3544 }
3545
3546 } // namespace nv50_ir