glsl,nir: Switch the enum representing shader image formats to PIPE_FORMAT.
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_from_nir.cpp
1 /*
2 * Copyright 2017 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Karol Herbst <kherbst@redhat.com>
23 */
24
25 #include "compiler/nir/nir.h"
26
27 #include "util/u_debug.h"
28
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_from_common.h"
31 #include "codegen/nv50_ir_lowering_helper.h"
32 #include "codegen/nv50_ir_util.h"
33
34 #if __cplusplus >= 201103L
35 #include <unordered_map>
36 #else
37 #include <tr1/unordered_map>
38 #endif
39 #include <cstring>
40 #include <list>
41 #include <vector>
42
43 namespace {
44
45 #if __cplusplus >= 201103L
46 using std::hash;
47 using std::unordered_map;
48 #else
49 using std::tr1::hash;
50 using std::tr1::unordered_map;
51 #endif
52
53 using namespace nv50_ir;
54
55 int
56 type_size(const struct glsl_type *type, bool bindless)
57 {
58 return glsl_count_attribute_slots(type, false);
59 }
60
61 class Converter : public ConverterCommon
62 {
63 public:
64 Converter(Program *, nir_shader *, nv50_ir_prog_info *);
65
66 bool run();
67 private:
68 typedef std::vector<LValue*> LValues;
69 typedef unordered_map<unsigned, LValues> NirDefMap;
70 typedef unordered_map<unsigned, nir_load_const_instr*> ImmediateMap;
71 typedef unordered_map<unsigned, uint32_t> NirArrayLMemOffsets;
72 typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
73
74 CacheMode convert(enum gl_access_qualifier);
75 TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
76 LValues& convert(nir_alu_dest *);
77 BasicBlock* convert(nir_block *);
78 LValues& convert(nir_dest *);
79 SVSemantic convert(nir_intrinsic_op);
80 Value* convert(nir_load_const_instr*, uint8_t);
81 LValues& convert(nir_register *);
82 LValues& convert(nir_ssa_def *);
83
84 Value* getSrc(nir_alu_src *, uint8_t component = 0);
85 Value* getSrc(nir_register *, uint8_t);
86 Value* getSrc(nir_src *, uint8_t, bool indirect = false);
87 Value* getSrc(nir_ssa_def *, uint8_t);
88
89 // returned value is the constant part of the given source (either the
90 // nir_src or the selected source component of an intrinsic). Even though
91 // this is mostly an optimization to be able to skip indirects in a few
92 // cases, sometimes we require immediate values or set some fileds on
93 // instructions (e.g. tex) in order for codegen to consume those.
94 // If the found value has not a constant part, the Value gets returned
95 // through the Value parameter.
96 uint32_t getIndirect(nir_src *, uint8_t, Value *&);
97 // isScalar indicates that the addressing is scalar, vec4 addressing is
98 // assumed otherwise
99 uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&,
100 bool isScalar = false);
101
102 uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
103
104 void setInterpolate(nv50_ir_varying *,
105 uint8_t,
106 bool centroid,
107 unsigned semantics);
108
109 Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
110 uint8_t c, Value *indirect0 = NULL,
111 Value *indirect1 = NULL, bool patch = false);
112 void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
113 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
114 Value *indirect1 = NULL);
115
116 bool isFloatType(nir_alu_type);
117 bool isSignedType(nir_alu_type);
118 bool isResultFloat(nir_op);
119 bool isResultSigned(nir_op);
120
121 DataType getDType(nir_alu_instr *);
122 DataType getDType(nir_intrinsic_instr *);
123 DataType getDType(nir_intrinsic_instr *, bool isSigned);
124 DataType getDType(nir_op, uint8_t);
125
126 std::vector<DataType> getSTypes(nir_alu_instr *);
127 DataType getSType(nir_src &, bool isFloat, bool isSigned);
128
129 operation getOperation(nir_intrinsic_op);
130 operation getOperation(nir_op);
131 operation getOperation(nir_texop);
132 operation preOperationNeeded(nir_op);
133
134 int getSubOp(nir_intrinsic_op);
135 int getSubOp(nir_op);
136
137 CondCode getCondCode(nir_op);
138
139 bool assignSlots();
140 bool parseNIR();
141
142 bool visit(nir_alu_instr *);
143 bool visit(nir_block *);
144 bool visit(nir_cf_node *);
145 bool visit(nir_deref_instr *);
146 bool visit(nir_function *);
147 bool visit(nir_if *);
148 bool visit(nir_instr *);
149 bool visit(nir_intrinsic_instr *);
150 bool visit(nir_jump_instr *);
151 bool visit(nir_load_const_instr*);
152 bool visit(nir_loop *);
153 bool visit(nir_ssa_undef_instr *);
154 bool visit(nir_tex_instr *);
155
156 // tex stuff
157 Value* applyProjection(Value *src, Value *proj);
158 unsigned int getNIRArgCount(TexInstruction::Target&);
159
160 // image stuff
161 uint16_t handleDeref(nir_deref_instr *, Value * & indirect, const nir_variable * &);
162 CacheMode getCacheModeFromVar(const nir_variable *);
163
164 nir_shader *nir;
165
166 NirDefMap ssaDefs;
167 NirDefMap regDefs;
168 ImmediateMap immediates;
169 NirArrayLMemOffsets regToLmemOffset;
170 NirBlockMap blocks;
171 unsigned int curLoopDepth;
172
173 BasicBlock *exit;
174 Value *zero;
175 Instruction *immInsertPos;
176
177 int clipVertexOutput;
178
179 union {
180 struct {
181 Value *position;
182 } fp;
183 };
184 };
185
186 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
187 : ConverterCommon(prog, info),
188 nir(nir),
189 curLoopDepth(0),
190 clipVertexOutput(-1)
191 {
192 zero = mkImm((uint32_t)0);
193 }
194
195 BasicBlock *
196 Converter::convert(nir_block *block)
197 {
198 NirBlockMap::iterator it = blocks.find(block->index);
199 if (it != blocks.end())
200 return it->second;
201
202 BasicBlock *bb = new BasicBlock(func);
203 blocks[block->index] = bb;
204 return bb;
205 }
206
207 bool
208 Converter::isFloatType(nir_alu_type type)
209 {
210 return nir_alu_type_get_base_type(type) == nir_type_float;
211 }
212
213 bool
214 Converter::isSignedType(nir_alu_type type)
215 {
216 return nir_alu_type_get_base_type(type) == nir_type_int;
217 }
218
219 bool
220 Converter::isResultFloat(nir_op op)
221 {
222 const nir_op_info &info = nir_op_infos[op];
223 if (info.output_type != nir_type_invalid)
224 return isFloatType(info.output_type);
225
226 ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
227 assert(false);
228 return true;
229 }
230
231 bool
232 Converter::isResultSigned(nir_op op)
233 {
234 switch (op) {
235 // there is no umul and we get wrong results if we treat all muls as signed
236 case nir_op_imul:
237 case nir_op_inot:
238 return false;
239 default:
240 const nir_op_info &info = nir_op_infos[op];
241 if (info.output_type != nir_type_invalid)
242 return isSignedType(info.output_type);
243 ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
244 assert(false);
245 return true;
246 }
247 }
248
249 DataType
250 Converter::getDType(nir_alu_instr *insn)
251 {
252 if (insn->dest.dest.is_ssa)
253 return getDType(insn->op, insn->dest.dest.ssa.bit_size);
254 else
255 return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
256 }
257
258 DataType
259 Converter::getDType(nir_intrinsic_instr *insn)
260 {
261 bool isSigned;
262 switch (insn->intrinsic) {
263 case nir_intrinsic_shared_atomic_imax:
264 case nir_intrinsic_shared_atomic_imin:
265 case nir_intrinsic_ssbo_atomic_imax:
266 case nir_intrinsic_ssbo_atomic_imin:
267 isSigned = true;
268 break;
269 default:
270 isSigned = false;
271 break;
272 }
273
274 return getDType(insn, isSigned);
275 }
276
277 DataType
278 Converter::getDType(nir_intrinsic_instr *insn, bool isSigned)
279 {
280 if (insn->dest.is_ssa)
281 return typeOfSize(insn->dest.ssa.bit_size / 8, false, isSigned);
282 else
283 return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, isSigned);
284 }
285
286 DataType
287 Converter::getDType(nir_op op, uint8_t bitSize)
288 {
289 DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
290 if (ty == TYPE_NONE) {
291 ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
292 assert(false);
293 }
294 return ty;
295 }
296
297 std::vector<DataType>
298 Converter::getSTypes(nir_alu_instr *insn)
299 {
300 const nir_op_info &info = nir_op_infos[insn->op];
301 std::vector<DataType> res(info.num_inputs);
302
303 for (uint8_t i = 0; i < info.num_inputs; ++i) {
304 if (info.input_types[i] != nir_type_invalid) {
305 res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
306 } else {
307 ERROR("getSType not implemented for %s idx %u\n", info.name, i);
308 assert(false);
309 res[i] = TYPE_NONE;
310 break;
311 }
312 }
313
314 return res;
315 }
316
317 DataType
318 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
319 {
320 uint8_t bitSize;
321 if (src.is_ssa)
322 bitSize = src.ssa->bit_size;
323 else
324 bitSize = src.reg.reg->bit_size;
325
326 DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
327 if (ty == TYPE_NONE) {
328 const char *str;
329 if (isFloat)
330 str = "float";
331 else if (isSigned)
332 str = "int";
333 else
334 str = "uint";
335 ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
336 assert(false);
337 }
338 return ty;
339 }
340
341 operation
342 Converter::getOperation(nir_op op)
343 {
344 switch (op) {
345 // basic ops with float and int variants
346 case nir_op_fabs:
347 case nir_op_iabs:
348 return OP_ABS;
349 case nir_op_fadd:
350 case nir_op_iadd:
351 return OP_ADD;
352 case nir_op_iand:
353 return OP_AND;
354 case nir_op_ifind_msb:
355 case nir_op_ufind_msb:
356 return OP_BFIND;
357 case nir_op_fceil:
358 return OP_CEIL;
359 case nir_op_fcos:
360 return OP_COS;
361 case nir_op_f2f32:
362 case nir_op_f2f64:
363 case nir_op_f2i32:
364 case nir_op_f2i64:
365 case nir_op_f2u32:
366 case nir_op_f2u64:
367 case nir_op_i2f32:
368 case nir_op_i2f64:
369 case nir_op_i2i32:
370 case nir_op_i2i64:
371 case nir_op_u2f32:
372 case nir_op_u2f64:
373 case nir_op_u2u32:
374 case nir_op_u2u64:
375 return OP_CVT;
376 case nir_op_fddx:
377 case nir_op_fddx_coarse:
378 case nir_op_fddx_fine:
379 return OP_DFDX;
380 case nir_op_fddy:
381 case nir_op_fddy_coarse:
382 case nir_op_fddy_fine:
383 return OP_DFDY;
384 case nir_op_fdiv:
385 case nir_op_idiv:
386 case nir_op_udiv:
387 return OP_DIV;
388 case nir_op_fexp2:
389 return OP_EX2;
390 case nir_op_ffloor:
391 return OP_FLOOR;
392 case nir_op_ffma:
393 return OP_FMA;
394 case nir_op_flog2:
395 return OP_LG2;
396 case nir_op_fmax:
397 case nir_op_imax:
398 case nir_op_umax:
399 return OP_MAX;
400 case nir_op_pack_64_2x32_split:
401 return OP_MERGE;
402 case nir_op_fmin:
403 case nir_op_imin:
404 case nir_op_umin:
405 return OP_MIN;
406 case nir_op_fmod:
407 case nir_op_imod:
408 case nir_op_umod:
409 case nir_op_frem:
410 case nir_op_irem:
411 return OP_MOD;
412 case nir_op_fmul:
413 case nir_op_imul:
414 case nir_op_imul_high:
415 case nir_op_umul_high:
416 return OP_MUL;
417 case nir_op_fneg:
418 case nir_op_ineg:
419 return OP_NEG;
420 case nir_op_inot:
421 return OP_NOT;
422 case nir_op_ior:
423 return OP_OR;
424 case nir_op_fpow:
425 return OP_POW;
426 case nir_op_frcp:
427 return OP_RCP;
428 case nir_op_frsq:
429 return OP_RSQ;
430 case nir_op_fsat:
431 return OP_SAT;
432 case nir_op_feq32:
433 case nir_op_ieq32:
434 case nir_op_fge32:
435 case nir_op_ige32:
436 case nir_op_uge32:
437 case nir_op_flt32:
438 case nir_op_ilt32:
439 case nir_op_ult32:
440 case nir_op_fne32:
441 case nir_op_ine32:
442 return OP_SET;
443 case nir_op_ishl:
444 return OP_SHL;
445 case nir_op_ishr:
446 case nir_op_ushr:
447 return OP_SHR;
448 case nir_op_fsin:
449 return OP_SIN;
450 case nir_op_fsqrt:
451 return OP_SQRT;
452 case nir_op_ftrunc:
453 return OP_TRUNC;
454 case nir_op_ixor:
455 return OP_XOR;
456 default:
457 ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
458 assert(false);
459 return OP_NOP;
460 }
461 }
462
463 operation
464 Converter::getOperation(nir_texop op)
465 {
466 switch (op) {
467 case nir_texop_tex:
468 return OP_TEX;
469 case nir_texop_lod:
470 return OP_TXLQ;
471 case nir_texop_txb:
472 return OP_TXB;
473 case nir_texop_txd:
474 return OP_TXD;
475 case nir_texop_txf:
476 case nir_texop_txf_ms:
477 return OP_TXF;
478 case nir_texop_tg4:
479 return OP_TXG;
480 case nir_texop_txl:
481 return OP_TXL;
482 case nir_texop_query_levels:
483 case nir_texop_texture_samples:
484 case nir_texop_txs:
485 return OP_TXQ;
486 default:
487 ERROR("couldn't get operation for nir_texop %u\n", op);
488 assert(false);
489 return OP_NOP;
490 }
491 }
492
493 operation
494 Converter::getOperation(nir_intrinsic_op op)
495 {
496 switch (op) {
497 case nir_intrinsic_emit_vertex:
498 return OP_EMIT;
499 case nir_intrinsic_end_primitive:
500 return OP_RESTART;
501 case nir_intrinsic_bindless_image_atomic_add:
502 case nir_intrinsic_image_atomic_add:
503 case nir_intrinsic_image_deref_atomic_add:
504 case nir_intrinsic_bindless_image_atomic_and:
505 case nir_intrinsic_image_atomic_and:
506 case nir_intrinsic_image_deref_atomic_and:
507 case nir_intrinsic_bindless_image_atomic_comp_swap:
508 case nir_intrinsic_image_atomic_comp_swap:
509 case nir_intrinsic_image_deref_atomic_comp_swap:
510 case nir_intrinsic_bindless_image_atomic_exchange:
511 case nir_intrinsic_image_atomic_exchange:
512 case nir_intrinsic_image_deref_atomic_exchange:
513 case nir_intrinsic_bindless_image_atomic_imax:
514 case nir_intrinsic_image_atomic_imax:
515 case nir_intrinsic_image_deref_atomic_imax:
516 case nir_intrinsic_bindless_image_atomic_umax:
517 case nir_intrinsic_image_atomic_umax:
518 case nir_intrinsic_image_deref_atomic_umax:
519 case nir_intrinsic_bindless_image_atomic_imin:
520 case nir_intrinsic_image_atomic_imin:
521 case nir_intrinsic_image_deref_atomic_imin:
522 case nir_intrinsic_bindless_image_atomic_umin:
523 case nir_intrinsic_image_atomic_umin:
524 case nir_intrinsic_image_deref_atomic_umin:
525 case nir_intrinsic_bindless_image_atomic_or:
526 case nir_intrinsic_image_atomic_or:
527 case nir_intrinsic_image_deref_atomic_or:
528 case nir_intrinsic_bindless_image_atomic_xor:
529 case nir_intrinsic_image_atomic_xor:
530 case nir_intrinsic_image_deref_atomic_xor:
531 return OP_SUREDP;
532 case nir_intrinsic_bindless_image_load:
533 case nir_intrinsic_image_load:
534 case nir_intrinsic_image_deref_load:
535 return OP_SULDP;
536 case nir_intrinsic_bindless_image_samples:
537 case nir_intrinsic_image_samples:
538 case nir_intrinsic_image_deref_samples:
539 case nir_intrinsic_bindless_image_size:
540 case nir_intrinsic_image_size:
541 case nir_intrinsic_image_deref_size:
542 return OP_SUQ;
543 case nir_intrinsic_bindless_image_store:
544 case nir_intrinsic_image_store:
545 case nir_intrinsic_image_deref_store:
546 return OP_SUSTP;
547 default:
548 ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
549 assert(false);
550 return OP_NOP;
551 }
552 }
553
554 operation
555 Converter::preOperationNeeded(nir_op op)
556 {
557 switch (op) {
558 case nir_op_fcos:
559 case nir_op_fsin:
560 return OP_PRESIN;
561 default:
562 return OP_NOP;
563 }
564 }
565
566 int
567 Converter::getSubOp(nir_op op)
568 {
569 switch (op) {
570 case nir_op_imul_high:
571 case nir_op_umul_high:
572 return NV50_IR_SUBOP_MUL_HIGH;
573 default:
574 return 0;
575 }
576 }
577
578 int
579 Converter::getSubOp(nir_intrinsic_op op)
580 {
581 switch (op) {
582 case nir_intrinsic_bindless_image_atomic_add:
583 case nir_intrinsic_global_atomic_add:
584 case nir_intrinsic_image_atomic_add:
585 case nir_intrinsic_image_deref_atomic_add:
586 case nir_intrinsic_shared_atomic_add:
587 case nir_intrinsic_ssbo_atomic_add:
588 return NV50_IR_SUBOP_ATOM_ADD;
589 case nir_intrinsic_bindless_image_atomic_and:
590 case nir_intrinsic_global_atomic_and:
591 case nir_intrinsic_image_atomic_and:
592 case nir_intrinsic_image_deref_atomic_and:
593 case nir_intrinsic_shared_atomic_and:
594 case nir_intrinsic_ssbo_atomic_and:
595 return NV50_IR_SUBOP_ATOM_AND;
596 case nir_intrinsic_bindless_image_atomic_comp_swap:
597 case nir_intrinsic_global_atomic_comp_swap:
598 case nir_intrinsic_image_atomic_comp_swap:
599 case nir_intrinsic_image_deref_atomic_comp_swap:
600 case nir_intrinsic_shared_atomic_comp_swap:
601 case nir_intrinsic_ssbo_atomic_comp_swap:
602 return NV50_IR_SUBOP_ATOM_CAS;
603 case nir_intrinsic_bindless_image_atomic_exchange:
604 case nir_intrinsic_global_atomic_exchange:
605 case nir_intrinsic_image_atomic_exchange:
606 case nir_intrinsic_image_deref_atomic_exchange:
607 case nir_intrinsic_shared_atomic_exchange:
608 case nir_intrinsic_ssbo_atomic_exchange:
609 return NV50_IR_SUBOP_ATOM_EXCH;
610 case nir_intrinsic_bindless_image_atomic_or:
611 case nir_intrinsic_global_atomic_or:
612 case nir_intrinsic_image_atomic_or:
613 case nir_intrinsic_image_deref_atomic_or:
614 case nir_intrinsic_shared_atomic_or:
615 case nir_intrinsic_ssbo_atomic_or:
616 return NV50_IR_SUBOP_ATOM_OR;
617 case nir_intrinsic_bindless_image_atomic_imax:
618 case nir_intrinsic_bindless_image_atomic_umax:
619 case nir_intrinsic_global_atomic_imax:
620 case nir_intrinsic_global_atomic_umax:
621 case nir_intrinsic_image_atomic_imax:
622 case nir_intrinsic_image_atomic_umax:
623 case nir_intrinsic_image_deref_atomic_imax:
624 case nir_intrinsic_image_deref_atomic_umax:
625 case nir_intrinsic_shared_atomic_imax:
626 case nir_intrinsic_shared_atomic_umax:
627 case nir_intrinsic_ssbo_atomic_imax:
628 case nir_intrinsic_ssbo_atomic_umax:
629 return NV50_IR_SUBOP_ATOM_MAX;
630 case nir_intrinsic_bindless_image_atomic_imin:
631 case nir_intrinsic_bindless_image_atomic_umin:
632 case nir_intrinsic_global_atomic_imin:
633 case nir_intrinsic_global_atomic_umin:
634 case nir_intrinsic_image_atomic_imin:
635 case nir_intrinsic_image_atomic_umin:
636 case nir_intrinsic_image_deref_atomic_imin:
637 case nir_intrinsic_image_deref_atomic_umin:
638 case nir_intrinsic_shared_atomic_imin:
639 case nir_intrinsic_shared_atomic_umin:
640 case nir_intrinsic_ssbo_atomic_imin:
641 case nir_intrinsic_ssbo_atomic_umin:
642 return NV50_IR_SUBOP_ATOM_MIN;
643 case nir_intrinsic_bindless_image_atomic_xor:
644 case nir_intrinsic_global_atomic_xor:
645 case nir_intrinsic_image_atomic_xor:
646 case nir_intrinsic_image_deref_atomic_xor:
647 case nir_intrinsic_shared_atomic_xor:
648 case nir_intrinsic_ssbo_atomic_xor:
649 return NV50_IR_SUBOP_ATOM_XOR;
650
651 case nir_intrinsic_group_memory_barrier:
652 case nir_intrinsic_memory_barrier:
653 case nir_intrinsic_memory_barrier_buffer:
654 case nir_intrinsic_memory_barrier_image:
655 return NV50_IR_SUBOP_MEMBAR(M, GL);
656 case nir_intrinsic_memory_barrier_shared:
657 return NV50_IR_SUBOP_MEMBAR(M, CTA);
658
659 case nir_intrinsic_vote_all:
660 return NV50_IR_SUBOP_VOTE_ALL;
661 case nir_intrinsic_vote_any:
662 return NV50_IR_SUBOP_VOTE_ANY;
663 case nir_intrinsic_vote_ieq:
664 return NV50_IR_SUBOP_VOTE_UNI;
665 default:
666 return 0;
667 }
668 }
669
670 CondCode
671 Converter::getCondCode(nir_op op)
672 {
673 switch (op) {
674 case nir_op_feq32:
675 case nir_op_ieq32:
676 return CC_EQ;
677 case nir_op_fge32:
678 case nir_op_ige32:
679 case nir_op_uge32:
680 return CC_GE;
681 case nir_op_flt32:
682 case nir_op_ilt32:
683 case nir_op_ult32:
684 return CC_LT;
685 case nir_op_fne32:
686 return CC_NEU;
687 case nir_op_ine32:
688 return CC_NE;
689 default:
690 ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
691 assert(false);
692 return CC_FL;
693 }
694 }
695
696 Converter::LValues&
697 Converter::convert(nir_alu_dest *dest)
698 {
699 return convert(&dest->dest);
700 }
701
702 Converter::LValues&
703 Converter::convert(nir_dest *dest)
704 {
705 if (dest->is_ssa)
706 return convert(&dest->ssa);
707 if (dest->reg.indirect) {
708 ERROR("no support for indirects.");
709 assert(false);
710 }
711 return convert(dest->reg.reg);
712 }
713
714 Converter::LValues&
715 Converter::convert(nir_register *reg)
716 {
717 NirDefMap::iterator it = regDefs.find(reg->index);
718 if (it != regDefs.end())
719 return it->second;
720
721 LValues newDef(reg->num_components);
722 for (uint8_t i = 0; i < reg->num_components; i++)
723 newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
724 return regDefs[reg->index] = newDef;
725 }
726
727 Converter::LValues&
728 Converter::convert(nir_ssa_def *def)
729 {
730 NirDefMap::iterator it = ssaDefs.find(def->index);
731 if (it != ssaDefs.end())
732 return it->second;
733
734 LValues newDef(def->num_components);
735 for (uint8_t i = 0; i < def->num_components; i++)
736 newDef[i] = getSSA(std::max(4, def->bit_size / 8));
737 return ssaDefs[def->index] = newDef;
738 }
739
740 Value*
741 Converter::getSrc(nir_alu_src *src, uint8_t component)
742 {
743 if (src->abs || src->negate) {
744 ERROR("modifiers currently not supported on nir_alu_src\n");
745 assert(false);
746 }
747 return getSrc(&src->src, src->swizzle[component]);
748 }
749
750 Value*
751 Converter::getSrc(nir_register *reg, uint8_t idx)
752 {
753 NirDefMap::iterator it = regDefs.find(reg->index);
754 if (it == regDefs.end())
755 return convert(reg)[idx];
756 return it->second[idx];
757 }
758
759 Value*
760 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
761 {
762 if (src->is_ssa)
763 return getSrc(src->ssa, idx);
764
765 if (src->reg.indirect) {
766 if (indirect)
767 return getSrc(src->reg.indirect, idx);
768 ERROR("no support for indirects.");
769 assert(false);
770 return NULL;
771 }
772
773 return getSrc(src->reg.reg, idx);
774 }
775
776 Value*
777 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
778 {
779 ImmediateMap::iterator iit = immediates.find(src->index);
780 if (iit != immediates.end())
781 return convert((*iit).second, idx);
782
783 NirDefMap::iterator it = ssaDefs.find(src->index);
784 if (it == ssaDefs.end()) {
785 ERROR("SSA value %u not found\n", src->index);
786 assert(false);
787 return NULL;
788 }
789 return it->second[idx];
790 }
791
792 uint32_t
793 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
794 {
795 nir_const_value *offset = nir_src_as_const_value(*src);
796
797 if (offset) {
798 indirect = NULL;
799 return offset[0].u32;
800 }
801
802 indirect = getSrc(src, idx, true);
803 return 0;
804 }
805
806 uint32_t
807 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect, bool isScalar)
808 {
809 int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
810 if (indirect && !isScalar)
811 indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
812 return idx;
813 }
814
815 static void
816 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
817 {
818 assert(name && index);
819
820 if (slot >= VERT_ATTRIB_MAX) {
821 ERROR("invalid varying slot %u\n", slot);
822 assert(false);
823 return;
824 }
825
826 if (slot >= VERT_ATTRIB_GENERIC0 &&
827 slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
828 *name = TGSI_SEMANTIC_GENERIC;
829 *index = slot - VERT_ATTRIB_GENERIC0;
830 return;
831 }
832
833 if (slot >= VERT_ATTRIB_TEX0 &&
834 slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
835 *name = TGSI_SEMANTIC_TEXCOORD;
836 *index = slot - VERT_ATTRIB_TEX0;
837 return;
838 }
839
840 switch (slot) {
841 case VERT_ATTRIB_COLOR0:
842 *name = TGSI_SEMANTIC_COLOR;
843 *index = 0;
844 break;
845 case VERT_ATTRIB_COLOR1:
846 *name = TGSI_SEMANTIC_COLOR;
847 *index = 1;
848 break;
849 case VERT_ATTRIB_EDGEFLAG:
850 *name = TGSI_SEMANTIC_EDGEFLAG;
851 *index = 0;
852 break;
853 case VERT_ATTRIB_FOG:
854 *name = TGSI_SEMANTIC_FOG;
855 *index = 0;
856 break;
857 case VERT_ATTRIB_NORMAL:
858 *name = TGSI_SEMANTIC_NORMAL;
859 *index = 0;
860 break;
861 case VERT_ATTRIB_POS:
862 *name = TGSI_SEMANTIC_POSITION;
863 *index = 0;
864 break;
865 case VERT_ATTRIB_POINT_SIZE:
866 *name = TGSI_SEMANTIC_PSIZE;
867 *index = 0;
868 break;
869 default:
870 ERROR("unknown vert attrib slot %u\n", slot);
871 assert(false);
872 break;
873 }
874 }
875
876 static void
877 varying_slot_to_tgsi_semantic(gl_varying_slot slot, unsigned *name, unsigned *index)
878 {
879 assert(name && index);
880
881 if (slot >= VARYING_SLOT_TESS_MAX) {
882 ERROR("invalid varying slot %u\n", slot);
883 assert(false);
884 return;
885 }
886
887 if (slot >= VARYING_SLOT_PATCH0) {
888 *name = TGSI_SEMANTIC_PATCH;
889 *index = slot - VARYING_SLOT_PATCH0;
890 return;
891 }
892
893 if (slot >= VARYING_SLOT_VAR0) {
894 *name = TGSI_SEMANTIC_GENERIC;
895 *index = slot - VARYING_SLOT_VAR0;
896 return;
897 }
898
899 if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
900 *name = TGSI_SEMANTIC_TEXCOORD;
901 *index = slot - VARYING_SLOT_TEX0;
902 return;
903 }
904
905 switch (slot) {
906 case VARYING_SLOT_BFC0:
907 *name = TGSI_SEMANTIC_BCOLOR;
908 *index = 0;
909 break;
910 case VARYING_SLOT_BFC1:
911 *name = TGSI_SEMANTIC_BCOLOR;
912 *index = 1;
913 break;
914 case VARYING_SLOT_CLIP_DIST0:
915 *name = TGSI_SEMANTIC_CLIPDIST;
916 *index = 0;
917 break;
918 case VARYING_SLOT_CLIP_DIST1:
919 *name = TGSI_SEMANTIC_CLIPDIST;
920 *index = 1;
921 break;
922 case VARYING_SLOT_CLIP_VERTEX:
923 *name = TGSI_SEMANTIC_CLIPVERTEX;
924 *index = 0;
925 break;
926 case VARYING_SLOT_COL0:
927 *name = TGSI_SEMANTIC_COLOR;
928 *index = 0;
929 break;
930 case VARYING_SLOT_COL1:
931 *name = TGSI_SEMANTIC_COLOR;
932 *index = 1;
933 break;
934 case VARYING_SLOT_EDGE:
935 *name = TGSI_SEMANTIC_EDGEFLAG;
936 *index = 0;
937 break;
938 case VARYING_SLOT_FACE:
939 *name = TGSI_SEMANTIC_FACE;
940 *index = 0;
941 break;
942 case VARYING_SLOT_FOGC:
943 *name = TGSI_SEMANTIC_FOG;
944 *index = 0;
945 break;
946 case VARYING_SLOT_LAYER:
947 *name = TGSI_SEMANTIC_LAYER;
948 *index = 0;
949 break;
950 case VARYING_SLOT_PNTC:
951 *name = TGSI_SEMANTIC_PCOORD;
952 *index = 0;
953 break;
954 case VARYING_SLOT_POS:
955 *name = TGSI_SEMANTIC_POSITION;
956 *index = 0;
957 break;
958 case VARYING_SLOT_PRIMITIVE_ID:
959 *name = TGSI_SEMANTIC_PRIMID;
960 *index = 0;
961 break;
962 case VARYING_SLOT_PSIZ:
963 *name = TGSI_SEMANTIC_PSIZE;
964 *index = 0;
965 break;
966 case VARYING_SLOT_TESS_LEVEL_INNER:
967 *name = TGSI_SEMANTIC_TESSINNER;
968 *index = 0;
969 break;
970 case VARYING_SLOT_TESS_LEVEL_OUTER:
971 *name = TGSI_SEMANTIC_TESSOUTER;
972 *index = 0;
973 break;
974 case VARYING_SLOT_VIEWPORT:
975 *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
976 *index = 0;
977 break;
978 default:
979 ERROR("unknown varying slot %u\n", slot);
980 assert(false);
981 break;
982 }
983 }
984
985 static void
986 frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
987 {
988 if (slot >= FRAG_RESULT_DATA0) {
989 *name = TGSI_SEMANTIC_COLOR;
990 *index = slot - FRAG_RESULT_COLOR - 2; // intentional
991 return;
992 }
993
994 switch (slot) {
995 case FRAG_RESULT_COLOR:
996 *name = TGSI_SEMANTIC_COLOR;
997 *index = 0;
998 break;
999 case FRAG_RESULT_DEPTH:
1000 *name = TGSI_SEMANTIC_POSITION;
1001 *index = 0;
1002 break;
1003 case FRAG_RESULT_SAMPLE_MASK:
1004 *name = TGSI_SEMANTIC_SAMPLEMASK;
1005 *index = 0;
1006 break;
1007 default:
1008 ERROR("unknown frag result slot %u\n", slot);
1009 assert(false);
1010 break;
1011 }
1012 }
1013
1014 // copy of _mesa_sysval_to_semantic
1015 static void
1016 system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
1017 {
1018 *index = 0;
1019 switch (val) {
1020 // Vertex shader
1021 case SYSTEM_VALUE_VERTEX_ID:
1022 *name = TGSI_SEMANTIC_VERTEXID;
1023 break;
1024 case SYSTEM_VALUE_INSTANCE_ID:
1025 *name = TGSI_SEMANTIC_INSTANCEID;
1026 break;
1027 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
1028 *name = TGSI_SEMANTIC_VERTEXID_NOBASE;
1029 break;
1030 case SYSTEM_VALUE_BASE_VERTEX:
1031 *name = TGSI_SEMANTIC_BASEVERTEX;
1032 break;
1033 case SYSTEM_VALUE_BASE_INSTANCE:
1034 *name = TGSI_SEMANTIC_BASEINSTANCE;
1035 break;
1036 case SYSTEM_VALUE_DRAW_ID:
1037 *name = TGSI_SEMANTIC_DRAWID;
1038 break;
1039
1040 // Geometry shader
1041 case SYSTEM_VALUE_INVOCATION_ID:
1042 *name = TGSI_SEMANTIC_INVOCATIONID;
1043 break;
1044
1045 // Fragment shader
1046 case SYSTEM_VALUE_FRAG_COORD:
1047 *name = TGSI_SEMANTIC_POSITION;
1048 break;
1049 case SYSTEM_VALUE_FRONT_FACE:
1050 *name = TGSI_SEMANTIC_FACE;
1051 break;
1052 case SYSTEM_VALUE_SAMPLE_ID:
1053 *name = TGSI_SEMANTIC_SAMPLEID;
1054 break;
1055 case SYSTEM_VALUE_SAMPLE_POS:
1056 *name = TGSI_SEMANTIC_SAMPLEPOS;
1057 break;
1058 case SYSTEM_VALUE_SAMPLE_MASK_IN:
1059 *name = TGSI_SEMANTIC_SAMPLEMASK;
1060 break;
1061 case SYSTEM_VALUE_HELPER_INVOCATION:
1062 *name = TGSI_SEMANTIC_HELPER_INVOCATION;
1063 break;
1064
1065 // Tessellation shader
1066 case SYSTEM_VALUE_TESS_COORD:
1067 *name = TGSI_SEMANTIC_TESSCOORD;
1068 break;
1069 case SYSTEM_VALUE_VERTICES_IN:
1070 *name = TGSI_SEMANTIC_VERTICESIN;
1071 break;
1072 case SYSTEM_VALUE_PRIMITIVE_ID:
1073 *name = TGSI_SEMANTIC_PRIMID;
1074 break;
1075 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1076 *name = TGSI_SEMANTIC_TESSOUTER;
1077 break;
1078 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1079 *name = TGSI_SEMANTIC_TESSINNER;
1080 break;
1081
1082 // Compute shader
1083 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
1084 *name = TGSI_SEMANTIC_THREAD_ID;
1085 break;
1086 case SYSTEM_VALUE_WORK_GROUP_ID:
1087 *name = TGSI_SEMANTIC_BLOCK_ID;
1088 break;
1089 case SYSTEM_VALUE_NUM_WORK_GROUPS:
1090 *name = TGSI_SEMANTIC_GRID_SIZE;
1091 break;
1092 case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
1093 *name = TGSI_SEMANTIC_BLOCK_SIZE;
1094 break;
1095
1096 // ARB_shader_ballot
1097 case SYSTEM_VALUE_SUBGROUP_SIZE:
1098 *name = TGSI_SEMANTIC_SUBGROUP_SIZE;
1099 break;
1100 case SYSTEM_VALUE_SUBGROUP_INVOCATION:
1101 *name = TGSI_SEMANTIC_SUBGROUP_INVOCATION;
1102 break;
1103 case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
1104 *name = TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
1105 break;
1106 case SYSTEM_VALUE_SUBGROUP_GE_MASK:
1107 *name = TGSI_SEMANTIC_SUBGROUP_GE_MASK;
1108 break;
1109 case SYSTEM_VALUE_SUBGROUP_GT_MASK:
1110 *name = TGSI_SEMANTIC_SUBGROUP_GT_MASK;
1111 break;
1112 case SYSTEM_VALUE_SUBGROUP_LE_MASK:
1113 *name = TGSI_SEMANTIC_SUBGROUP_LE_MASK;
1114 break;
1115 case SYSTEM_VALUE_SUBGROUP_LT_MASK:
1116 *name = TGSI_SEMANTIC_SUBGROUP_LT_MASK;
1117 break;
1118
1119 default:
1120 ERROR("unknown system value %u\n", val);
1121 assert(false);
1122 break;
1123 }
1124 }
1125
1126 void
1127 Converter::setInterpolate(nv50_ir_varying *var,
1128 uint8_t mode,
1129 bool centroid,
1130 unsigned semantic)
1131 {
1132 switch (mode) {
1133 case INTERP_MODE_FLAT:
1134 var->flat = 1;
1135 break;
1136 case INTERP_MODE_NONE:
1137 if (semantic == TGSI_SEMANTIC_COLOR)
1138 var->sc = 1;
1139 else if (semantic == TGSI_SEMANTIC_POSITION)
1140 var->linear = 1;
1141 break;
1142 case INTERP_MODE_NOPERSPECTIVE:
1143 var->linear = 1;
1144 break;
1145 case INTERP_MODE_SMOOTH:
1146 break;
1147 }
1148 var->centroid = centroid;
1149 }
1150
1151 static uint16_t
1152 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
1153 bool input, const nir_variable *var)
1154 {
1155 if (!type->is_array())
1156 return type->count_attribute_slots(false);
1157
1158 uint16_t slots;
1159 switch (stage) {
1160 case Program::TYPE_GEOMETRY:
1161 slots = type->uniform_locations();
1162 if (input)
1163 slots /= info.gs.vertices_in;
1164 break;
1165 case Program::TYPE_TESSELLATION_CONTROL:
1166 case Program::TYPE_TESSELLATION_EVAL:
1167 // remove first dimension
1168 if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
1169 slots = type->uniform_locations();
1170 else
1171 slots = type->fields.array->uniform_locations();
1172 break;
1173 default:
1174 slots = type->count_attribute_slots(false);
1175 break;
1176 }
1177
1178 return slots;
1179 }
1180
1181 bool Converter::assignSlots() {
1182 unsigned name;
1183 unsigned index;
1184
1185 info->io.viewportId = -1;
1186 info->numInputs = 0;
1187 info->numOutputs = 0;
1188
1189 // we have to fixup the uniform locations for arrays
1190 unsigned numImages = 0;
1191 nir_foreach_variable(var, &nir->uniforms) {
1192 const glsl_type *type = var->type;
1193 if (!type->without_array()->is_image())
1194 continue;
1195 var->data.driver_location = numImages;
1196 numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
1197 }
1198
1199 info->numSysVals = 0;
1200 for (uint8_t i = 0; i < SYSTEM_VALUE_MAX; ++i) {
1201 if (!(nir->info.system_values_read & 1ull << i))
1202 continue;
1203
1204 system_val_to_tgsi_semantic(i, &name, &index);
1205 info->sv[info->numSysVals].sn = name;
1206 info->sv[info->numSysVals].si = index;
1207 info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
1208
1209 switch (i) {
1210 case SYSTEM_VALUE_INSTANCE_ID:
1211 info->io.instanceId = info->numSysVals;
1212 break;
1213 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1214 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1215 info->sv[info->numSysVals].patch = 1;
1216 break;
1217 case SYSTEM_VALUE_VERTEX_ID:
1218 info->io.vertexId = info->numSysVals;
1219 break;
1220 default:
1221 break;
1222 }
1223
1224 info->numSysVals += 1;
1225 }
1226
1227 if (prog->getType() == Program::TYPE_COMPUTE)
1228 return true;
1229
1230 nir_foreach_variable(var, &nir->inputs) {
1231 const glsl_type *type = var->type;
1232 int slot = var->data.location;
1233 uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
1234 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1235 : type->component_slots();
1236 uint32_t frac = var->data.location_frac;
1237 uint32_t vary = var->data.driver_location;
1238
1239 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1240 if (comp > 2)
1241 slots *= 2;
1242 }
1243
1244 assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
1245
1246 switch(prog->getType()) {
1247 case Program::TYPE_FRAGMENT:
1248 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1249 for (uint16_t i = 0; i < slots; ++i) {
1250 setInterpolate(&info->in[vary + i], var->data.interpolation,
1251 var->data.centroid | var->data.sample, name);
1252 }
1253 break;
1254 case Program::TYPE_GEOMETRY:
1255 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1256 break;
1257 case Program::TYPE_TESSELLATION_CONTROL:
1258 case Program::TYPE_TESSELLATION_EVAL:
1259 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1260 if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
1261 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1262 break;
1263 case Program::TYPE_VERTEX:
1264 vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1265 switch (name) {
1266 case TGSI_SEMANTIC_EDGEFLAG:
1267 info->io.edgeFlagIn = vary;
1268 break;
1269 default:
1270 break;
1271 }
1272 break;
1273 default:
1274 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1275 return false;
1276 }
1277
1278 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1279 info->in[vary].id = vary;
1280 info->in[vary].patch = var->data.patch;
1281 info->in[vary].sn = name;
1282 info->in[vary].si = index + i;
1283 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1284 if (i & 0x1)
1285 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1286 else
1287 info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1288 else
1289 info->in[vary].mask |= ((1 << comp) - 1) << frac;
1290 }
1291 info->numInputs = std::max<uint8_t>(info->numInputs, vary);
1292 }
1293
1294 nir_foreach_variable(var, &nir->outputs) {
1295 const glsl_type *type = var->type;
1296 int slot = var->data.location;
1297 uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1298 uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1299 : type->component_slots();
1300 uint32_t frac = var->data.location_frac;
1301 uint32_t vary = var->data.driver_location;
1302
1303 if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1304 if (comp > 2)
1305 slots *= 2;
1306 }
1307
1308 assert(vary < PIPE_MAX_SHADER_OUTPUTS);
1309
1310 switch(prog->getType()) {
1311 case Program::TYPE_FRAGMENT:
1312 frag_result_to_tgsi_semantic((gl_frag_result)slot, &name, &index);
1313 switch (name) {
1314 case TGSI_SEMANTIC_COLOR:
1315 if (!var->data.fb_fetch_output)
1316 info->prop.fp.numColourResults++;
1317 info->prop.fp.separateFragData = true;
1318 // sometimes we get FRAG_RESULT_DATAX with data.index 0
1319 // sometimes we get FRAG_RESULT_DATA0 with data.index X
1320 index = index == 0 ? var->data.index : index;
1321 break;
1322 case TGSI_SEMANTIC_POSITION:
1323 info->io.fragDepth = vary;
1324 info->prop.fp.writesDepth = true;
1325 break;
1326 case TGSI_SEMANTIC_SAMPLEMASK:
1327 info->io.sampleMask = vary;
1328 break;
1329 default:
1330 break;
1331 }
1332 break;
1333 case Program::TYPE_GEOMETRY:
1334 case Program::TYPE_TESSELLATION_CONTROL:
1335 case Program::TYPE_TESSELLATION_EVAL:
1336 case Program::TYPE_VERTEX:
1337 varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1338
1339 if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1340 name != TGSI_SEMANTIC_TESSOUTER)
1341 info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1342
1343 switch (name) {
1344 case TGSI_SEMANTIC_CLIPDIST:
1345 info->io.genUserClip = -1;
1346 break;
1347 case TGSI_SEMANTIC_CLIPVERTEX:
1348 clipVertexOutput = vary;
1349 break;
1350 case TGSI_SEMANTIC_EDGEFLAG:
1351 info->io.edgeFlagOut = vary;
1352 break;
1353 case TGSI_SEMANTIC_POSITION:
1354 if (clipVertexOutput < 0)
1355 clipVertexOutput = vary;
1356 break;
1357 default:
1358 break;
1359 }
1360 break;
1361 default:
1362 ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1363 return false;
1364 }
1365
1366 for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1367 info->out[vary].id = vary;
1368 info->out[vary].patch = var->data.patch;
1369 info->out[vary].sn = name;
1370 info->out[vary].si = index + i;
1371 if (glsl_base_type_is_64bit(type->without_array()->base_type))
1372 if (i & 0x1)
1373 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1374 else
1375 info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1376 else
1377 info->out[vary].mask |= ((1 << comp) - 1) << frac;
1378
1379 if (nir->info.outputs_read & 1ull << slot)
1380 info->out[vary].oread = 1;
1381 }
1382 info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
1383 }
1384
1385 if (info->io.genUserClip > 0) {
1386 info->io.clipDistances = info->io.genUserClip;
1387
1388 const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1389
1390 for (unsigned int n = 0; n < nOut; ++n) {
1391 unsigned int i = info->numOutputs++;
1392 info->out[i].id = i;
1393 info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1394 info->out[i].si = n;
1395 info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1396 }
1397 }
1398
1399 return info->assignSlots(info) == 0;
1400 }
1401
1402 uint32_t
1403 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1404 {
1405 DataType ty;
1406 int offset = nir_intrinsic_component(insn);
1407 bool input;
1408
1409 if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1410 ty = getDType(insn);
1411 else
1412 ty = getSType(insn->src[0], false, false);
1413
1414 switch (insn->intrinsic) {
1415 case nir_intrinsic_load_input:
1416 case nir_intrinsic_load_interpolated_input:
1417 case nir_intrinsic_load_per_vertex_input:
1418 input = true;
1419 break;
1420 case nir_intrinsic_load_output:
1421 case nir_intrinsic_load_per_vertex_output:
1422 case nir_intrinsic_store_output:
1423 case nir_intrinsic_store_per_vertex_output:
1424 input = false;
1425 break;
1426 default:
1427 ERROR("unknown intrinsic in getSlotAddress %s",
1428 nir_intrinsic_infos[insn->intrinsic].name);
1429 input = false;
1430 assert(false);
1431 break;
1432 }
1433
1434 if (typeSizeof(ty) == 8) {
1435 slot *= 2;
1436 slot += offset;
1437 if (slot >= 4) {
1438 idx += 1;
1439 slot -= 4;
1440 }
1441 } else {
1442 slot += offset;
1443 }
1444
1445 assert(slot < 4);
1446 assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
1447 assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
1448
1449 const nv50_ir_varying *vary = input ? info->in : info->out;
1450 return vary[idx].slot[slot] * 4;
1451 }
1452
1453 Instruction *
1454 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1455 uint32_t base, uint8_t c, Value *indirect0,
1456 Value *indirect1, bool patch)
1457 {
1458 unsigned int tySize = typeSizeof(ty);
1459
1460 if (tySize == 8 &&
1461 (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
1462 Value *lo = getSSA();
1463 Value *hi = getSSA();
1464
1465 Instruction *loi =
1466 mkLoad(TYPE_U32, lo,
1467 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1468 indirect0);
1469 loi->setIndirect(0, 1, indirect1);
1470 loi->perPatch = patch;
1471
1472 Instruction *hii =
1473 mkLoad(TYPE_U32, hi,
1474 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1475 indirect0);
1476 hii->setIndirect(0, 1, indirect1);
1477 hii->perPatch = patch;
1478
1479 return mkOp2(OP_MERGE, ty, def, lo, hi);
1480 } else {
1481 Instruction *ld =
1482 mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1483 ld->setIndirect(0, 1, indirect1);
1484 ld->perPatch = patch;
1485 return ld;
1486 }
1487 }
1488
1489 void
1490 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1491 DataType ty, Value *src, uint8_t idx, uint8_t c,
1492 Value *indirect0, Value *indirect1)
1493 {
1494 uint8_t size = typeSizeof(ty);
1495 uint32_t address = getSlotAddress(insn, idx, c);
1496
1497 if (size == 8 && indirect0) {
1498 Value *split[2];
1499 mkSplit(split, 4, src);
1500
1501 if (op == OP_EXPORT) {
1502 split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1503 split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1504 }
1505
1506 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1507 split[0])->perPatch = info->out[idx].patch;
1508 mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1509 split[1])->perPatch = info->out[idx].patch;
1510 } else {
1511 if (op == OP_EXPORT)
1512 src = mkMov(getSSA(size), src, ty)->getDef(0);
1513 mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1514 src)->perPatch = info->out[idx].patch;
1515 }
1516 }
1517
1518 bool
1519 Converter::parseNIR()
1520 {
1521 info->bin.tlsSpace = 0;
1522 info->io.clipDistances = nir->info.clip_distance_array_size;
1523 info->io.cullDistances = nir->info.cull_distance_array_size;
1524
1525 switch(prog->getType()) {
1526 case Program::TYPE_COMPUTE:
1527 info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
1528 info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
1529 info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
1530 info->bin.smemSize = nir->info.cs.shared_size;
1531 break;
1532 case Program::TYPE_FRAGMENT:
1533 info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1534 info->prop.fp.persampleInvocation =
1535 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
1536 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1537 info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1538 info->prop.fp.readsSampleLocations =
1539 (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1540 info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
1541 info->prop.fp.usesSampleMaskIn =
1542 !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
1543 break;
1544 case Program::TYPE_GEOMETRY:
1545 info->prop.gp.inputPrim = nir->info.gs.input_primitive;
1546 info->prop.gp.instanceCount = nir->info.gs.invocations;
1547 info->prop.gp.maxVertices = nir->info.gs.vertices_out;
1548 info->prop.gp.outputPrim = nir->info.gs.output_primitive;
1549 break;
1550 case Program::TYPE_TESSELLATION_CONTROL:
1551 case Program::TYPE_TESSELLATION_EVAL:
1552 if (nir->info.tess.primitive_mode == GL_ISOLINES)
1553 info->prop.tp.domain = GL_LINES;
1554 else
1555 info->prop.tp.domain = nir->info.tess.primitive_mode;
1556 info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1557 info->prop.tp.outputPrim =
1558 nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1559 info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1560 info->prop.tp.winding = !nir->info.tess.ccw;
1561 break;
1562 case Program::TYPE_VERTEX:
1563 info->prop.vp.usesDrawParameters =
1564 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
1565 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
1566 (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
1567 break;
1568 default:
1569 break;
1570 }
1571
1572 return true;
1573 }
1574
1575 bool
1576 Converter::visit(nir_function *function)
1577 {
1578 assert(function->impl);
1579
1580 // usually the blocks will set everything up, but main is special
1581 BasicBlock *entry = new BasicBlock(prog->main);
1582 exit = new BasicBlock(prog->main);
1583 blocks[nir_start_block(function->impl)->index] = entry;
1584 prog->main->setEntry(entry);
1585 prog->main->setExit(exit);
1586
1587 setPosition(entry, true);
1588
1589 if (info->io.genUserClip > 0) {
1590 for (int c = 0; c < 4; ++c)
1591 clipVtx[c] = getScratch();
1592 }
1593
1594 switch (prog->getType()) {
1595 case Program::TYPE_TESSELLATION_CONTROL:
1596 outBase = mkOp2v(
1597 OP_SUB, TYPE_U32, getSSA(),
1598 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1599 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1600 break;
1601 case Program::TYPE_FRAGMENT: {
1602 Symbol *sv = mkSysVal(SV_POSITION, 3);
1603 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1604 fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1605 break;
1606 }
1607 default:
1608 break;
1609 }
1610
1611 nir_foreach_register(reg, &function->impl->registers) {
1612 if (reg->num_array_elems) {
1613 // TODO: packed variables would be nice, but MemoryOpt fails
1614 // replace 4 with reg->num_components
1615 uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8);
1616 regToLmemOffset[reg->index] = info->bin.tlsSpace;
1617 info->bin.tlsSpace += size;
1618 }
1619 }
1620
1621 nir_index_ssa_defs(function->impl);
1622 foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1623 if (!visit(node))
1624 return false;
1625 }
1626
1627 bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1628 setPosition(exit, true);
1629
1630 if ((prog->getType() == Program::TYPE_VERTEX ||
1631 prog->getType() == Program::TYPE_TESSELLATION_EVAL)
1632 && info->io.genUserClip > 0)
1633 handleUserClipPlanes();
1634
1635 // TODO: for non main function this needs to be a OP_RETURN
1636 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1637 return true;
1638 }
1639
1640 bool
1641 Converter::visit(nir_cf_node *node)
1642 {
1643 switch (node->type) {
1644 case nir_cf_node_block:
1645 return visit(nir_cf_node_as_block(node));
1646 case nir_cf_node_if:
1647 return visit(nir_cf_node_as_if(node));
1648 case nir_cf_node_loop:
1649 return visit(nir_cf_node_as_loop(node));
1650 default:
1651 ERROR("unknown nir_cf_node type %u\n", node->type);
1652 return false;
1653 }
1654 }
1655
1656 bool
1657 Converter::visit(nir_block *block)
1658 {
1659 if (!block->predecessors->entries && block->instr_list.is_empty())
1660 return true;
1661
1662 BasicBlock *bb = convert(block);
1663
1664 setPosition(bb, true);
1665 nir_foreach_instr(insn, block) {
1666 if (!visit(insn))
1667 return false;
1668 }
1669 return true;
1670 }
1671
1672 bool
1673 Converter::visit(nir_if *nif)
1674 {
1675 DataType sType = getSType(nif->condition, false, false);
1676 Value *src = getSrc(&nif->condition, 0);
1677
1678 nir_block *lastThen = nir_if_last_then_block(nif);
1679 nir_block *lastElse = nir_if_last_else_block(nif);
1680
1681 assert(!lastThen->successors[1]);
1682 assert(!lastElse->successors[1]);
1683
1684 BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1685 BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1686
1687 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1688 bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1689
1690 // we only insert joinats, if both nodes end up at the end of the if again.
1691 // the reason for this to not happens are breaks/continues/ret/... which
1692 // have their own handling
1693 if (lastThen->successors[0] == lastElse->successors[0])
1694 bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
1695 CC_ALWAYS, NULL);
1696
1697 mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1698
1699 foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1700 if (!visit(node))
1701 return false;
1702 }
1703 setPosition(convert(lastThen), true);
1704 if (!bb->getExit() ||
1705 !bb->getExit()->asFlow() ||
1706 bb->getExit()->asFlow()->op == OP_JOIN) {
1707 BasicBlock *tailBB = convert(lastThen->successors[0]);
1708 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1709 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1710 }
1711
1712 foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1713 if (!visit(node))
1714 return false;
1715 }
1716 setPosition(convert(lastElse), true);
1717 if (!bb->getExit() ||
1718 !bb->getExit()->asFlow() ||
1719 bb->getExit()->asFlow()->op == OP_JOIN) {
1720 BasicBlock *tailBB = convert(lastElse->successors[0]);
1721 mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1722 bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1723 }
1724
1725 if (lastThen->successors[0] == lastElse->successors[0]) {
1726 setPosition(convert(lastThen->successors[0]), true);
1727 mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1728 }
1729
1730 return true;
1731 }
1732
1733 bool
1734 Converter::visit(nir_loop *loop)
1735 {
1736 curLoopDepth += 1;
1737 func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1738
1739 BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1740 BasicBlock *tailBB =
1741 convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1742 bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1743
1744 mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1745 setPosition(loopBB, false);
1746 mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1747
1748 foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1749 if (!visit(node))
1750 return false;
1751 }
1752 Instruction *insn = bb->getExit();
1753 if (bb->cfg.incidentCount() != 0) {
1754 if (!insn || !insn->asFlow()) {
1755 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1756 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1757 } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
1758 tailBB->cfg.incidentCount() == 0) {
1759 // RA doesn't like having blocks around with no incident edge,
1760 // so we create a fake one to make it happy
1761 bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1762 }
1763 }
1764
1765 curLoopDepth -= 1;
1766
1767 return true;
1768 }
1769
1770 bool
1771 Converter::visit(nir_instr *insn)
1772 {
1773 // we need an insertion point for on the fly generated immediate loads
1774 immInsertPos = bb->getExit();
1775 switch (insn->type) {
1776 case nir_instr_type_alu:
1777 return visit(nir_instr_as_alu(insn));
1778 case nir_instr_type_deref:
1779 return visit(nir_instr_as_deref(insn));
1780 case nir_instr_type_intrinsic:
1781 return visit(nir_instr_as_intrinsic(insn));
1782 case nir_instr_type_jump:
1783 return visit(nir_instr_as_jump(insn));
1784 case nir_instr_type_load_const:
1785 return visit(nir_instr_as_load_const(insn));
1786 case nir_instr_type_ssa_undef:
1787 return visit(nir_instr_as_ssa_undef(insn));
1788 case nir_instr_type_tex:
1789 return visit(nir_instr_as_tex(insn));
1790 default:
1791 ERROR("unknown nir_instr type %u\n", insn->type);
1792 return false;
1793 }
1794 return true;
1795 }
1796
1797 SVSemantic
1798 Converter::convert(nir_intrinsic_op intr)
1799 {
1800 switch (intr) {
1801 case nir_intrinsic_load_base_vertex:
1802 return SV_BASEVERTEX;
1803 case nir_intrinsic_load_base_instance:
1804 return SV_BASEINSTANCE;
1805 case nir_intrinsic_load_draw_id:
1806 return SV_DRAWID;
1807 case nir_intrinsic_load_front_face:
1808 return SV_FACE;
1809 case nir_intrinsic_load_helper_invocation:
1810 return SV_THREAD_KILL;
1811 case nir_intrinsic_load_instance_id:
1812 return SV_INSTANCE_ID;
1813 case nir_intrinsic_load_invocation_id:
1814 return SV_INVOCATION_ID;
1815 case nir_intrinsic_load_local_group_size:
1816 return SV_NTID;
1817 case nir_intrinsic_load_local_invocation_id:
1818 return SV_TID;
1819 case nir_intrinsic_load_num_work_groups:
1820 return SV_NCTAID;
1821 case nir_intrinsic_load_patch_vertices_in:
1822 return SV_VERTEX_COUNT;
1823 case nir_intrinsic_load_primitive_id:
1824 return SV_PRIMITIVE_ID;
1825 case nir_intrinsic_load_sample_id:
1826 return SV_SAMPLE_INDEX;
1827 case nir_intrinsic_load_sample_mask_in:
1828 return SV_SAMPLE_MASK;
1829 case nir_intrinsic_load_sample_pos:
1830 return SV_SAMPLE_POS;
1831 case nir_intrinsic_load_subgroup_eq_mask:
1832 return SV_LANEMASK_EQ;
1833 case nir_intrinsic_load_subgroup_ge_mask:
1834 return SV_LANEMASK_GE;
1835 case nir_intrinsic_load_subgroup_gt_mask:
1836 return SV_LANEMASK_GT;
1837 case nir_intrinsic_load_subgroup_le_mask:
1838 return SV_LANEMASK_LE;
1839 case nir_intrinsic_load_subgroup_lt_mask:
1840 return SV_LANEMASK_LT;
1841 case nir_intrinsic_load_subgroup_invocation:
1842 return SV_LANEID;
1843 case nir_intrinsic_load_tess_coord:
1844 return SV_TESS_COORD;
1845 case nir_intrinsic_load_tess_level_inner:
1846 return SV_TESS_INNER;
1847 case nir_intrinsic_load_tess_level_outer:
1848 return SV_TESS_OUTER;
1849 case nir_intrinsic_load_vertex_id:
1850 return SV_VERTEX_ID;
1851 case nir_intrinsic_load_work_group_id:
1852 return SV_CTAID;
1853 default:
1854 ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1855 nir_intrinsic_infos[intr].name);
1856 assert(false);
1857 return SV_LAST;
1858 }
1859 }
1860
1861 bool
1862 Converter::visit(nir_intrinsic_instr *insn)
1863 {
1864 nir_intrinsic_op op = insn->intrinsic;
1865 const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op];
1866
1867 switch (op) {
1868 case nir_intrinsic_load_uniform: {
1869 LValues &newDefs = convert(&insn->dest);
1870 const DataType dType = getDType(insn);
1871 Value *indirect;
1872 uint32_t coffset = getIndirect(insn, 0, 0, indirect);
1873 for (uint8_t i = 0; i < insn->num_components; ++i) {
1874 loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
1875 }
1876 break;
1877 }
1878 case nir_intrinsic_store_output:
1879 case nir_intrinsic_store_per_vertex_output: {
1880 Value *indirect;
1881 DataType dType = getSType(insn->src[0], false, false);
1882 uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1883
1884 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1885 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1886 continue;
1887
1888 uint8_t offset = 0;
1889 Value *src = getSrc(&insn->src[0], i);
1890 switch (prog->getType()) {
1891 case Program::TYPE_FRAGMENT: {
1892 if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1893 // TGSI uses a different interface than NIR, TGSI stores that
1894 // value in the z component, NIR in X
1895 offset += 2;
1896 src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1897 }
1898 break;
1899 }
1900 case Program::TYPE_GEOMETRY:
1901 case Program::TYPE_VERTEX: {
1902 if (info->io.genUserClip > 0 && idx == (uint32_t)clipVertexOutput) {
1903 mkMov(clipVtx[i], src);
1904 src = clipVtx[i];
1905 }
1906 break;
1907 }
1908 default:
1909 break;
1910 }
1911
1912 storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1913 }
1914 break;
1915 }
1916 case nir_intrinsic_load_input:
1917 case nir_intrinsic_load_interpolated_input:
1918 case nir_intrinsic_load_output: {
1919 LValues &newDefs = convert(&insn->dest);
1920
1921 // FBFetch
1922 if (prog->getType() == Program::TYPE_FRAGMENT &&
1923 op == nir_intrinsic_load_output) {
1924 std::vector<Value*> defs, srcs;
1925 uint8_t mask = 0;
1926
1927 srcs.push_back(getSSA());
1928 srcs.push_back(getSSA());
1929 Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1930 Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1931 mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1932 mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1933
1934 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1935 srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1936
1937 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1938 defs.push_back(newDefs[i]);
1939 mask |= 1 << i;
1940 }
1941
1942 TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
1943 texi->tex.levelZero = 1;
1944 texi->tex.mask = mask;
1945 texi->tex.useOffsets = 0;
1946 texi->tex.r = 0xffff;
1947 texi->tex.s = 0xffff;
1948
1949 info->prop.fp.readsFramebuffer = true;
1950 break;
1951 }
1952
1953 const DataType dType = getDType(insn);
1954 Value *indirect;
1955 bool input = op != nir_intrinsic_load_output;
1956 operation nvirOp;
1957 uint32_t mode = 0;
1958
1959 uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
1960 nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
1961
1962 // see load_barycentric_* handling
1963 if (prog->getType() == Program::TYPE_FRAGMENT) {
1964 mode = translateInterpMode(&vary, nvirOp);
1965 if (op == nir_intrinsic_load_interpolated_input) {
1966 ImmediateValue immMode;
1967 if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
1968 mode |= immMode.reg.data.u32;
1969 }
1970 }
1971
1972 for (uint8_t i = 0u; i < insn->num_components; ++i) {
1973 uint32_t address = getSlotAddress(insn, idx, i);
1974 Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
1975 if (prog->getType() == Program::TYPE_FRAGMENT) {
1976 int s = 1;
1977 if (typeSizeof(dType) == 8) {
1978 Value *lo = getSSA();
1979 Value *hi = getSSA();
1980 Instruction *interp;
1981
1982 interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
1983 if (nvirOp == OP_PINTERP)
1984 interp->setSrc(s++, fp.position);
1985 if (mode & NV50_IR_INTERP_OFFSET)
1986 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1987 interp->setInterpolate(mode);
1988 interp->setIndirect(0, 0, indirect);
1989
1990 Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
1991 interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
1992 if (nvirOp == OP_PINTERP)
1993 interp->setSrc(s++, fp.position);
1994 if (mode & NV50_IR_INTERP_OFFSET)
1995 interp->setSrc(s++, getSrc(&insn->src[0], 0));
1996 interp->setInterpolate(mode);
1997 interp->setIndirect(0, 0, indirect);
1998
1999 mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
2000 } else {
2001 Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
2002 if (nvirOp == OP_PINTERP)
2003 interp->setSrc(s++, fp.position);
2004 if (mode & NV50_IR_INTERP_OFFSET)
2005 interp->setSrc(s++, getSrc(&insn->src[0], 0));
2006 interp->setInterpolate(mode);
2007 interp->setIndirect(0, 0, indirect);
2008 }
2009 } else {
2010 mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
2011 }
2012 }
2013 break;
2014 }
2015 case nir_intrinsic_load_kernel_input: {
2016 assert(prog->getType() == Program::TYPE_COMPUTE);
2017 assert(insn->num_components == 1);
2018
2019 LValues &newDefs = convert(&insn->dest);
2020 const DataType dType = getDType(insn);
2021 Value *indirect;
2022 uint32_t idx = getIndirect(insn, 0, 0, indirect, true);
2023
2024 mkLoad(dType, newDefs[0], mkSymbol(FILE_SHADER_INPUT, 0, dType, idx), indirect);
2025 break;
2026 }
2027 case nir_intrinsic_load_barycentric_at_offset:
2028 case nir_intrinsic_load_barycentric_at_sample:
2029 case nir_intrinsic_load_barycentric_centroid:
2030 case nir_intrinsic_load_barycentric_pixel:
2031 case nir_intrinsic_load_barycentric_sample: {
2032 LValues &newDefs = convert(&insn->dest);
2033 uint32_t mode;
2034
2035 if (op == nir_intrinsic_load_barycentric_centroid ||
2036 op == nir_intrinsic_load_barycentric_sample) {
2037 mode = NV50_IR_INTERP_CENTROID;
2038 } else if (op == nir_intrinsic_load_barycentric_at_offset) {
2039 Value *offs[2];
2040 for (uint8_t c = 0; c < 2; c++) {
2041 offs[c] = getScratch();
2042 mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
2043 mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
2044 mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
2045 mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
2046 }
2047 mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
2048
2049 mode = NV50_IR_INTERP_OFFSET;
2050 } else if (op == nir_intrinsic_load_barycentric_pixel) {
2051 mode = NV50_IR_INTERP_DEFAULT;
2052 } else if (op == nir_intrinsic_load_barycentric_at_sample) {
2053 info->prop.fp.readsSampleLocations = true;
2054 mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
2055 mode = NV50_IR_INTERP_OFFSET;
2056 } else {
2057 unreachable("all intrinsics already handled above");
2058 }
2059
2060 loadImm(newDefs[1], mode);
2061 break;
2062 }
2063 case nir_intrinsic_discard:
2064 mkOp(OP_DISCARD, TYPE_NONE, NULL);
2065 break;
2066 case nir_intrinsic_discard_if: {
2067 Value *pred = getSSA(1, FILE_PREDICATE);
2068 if (insn->num_components > 1) {
2069 ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
2070 assert(false);
2071 return false;
2072 }
2073 mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2074 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
2075 break;
2076 }
2077 case nir_intrinsic_load_base_vertex:
2078 case nir_intrinsic_load_base_instance:
2079 case nir_intrinsic_load_draw_id:
2080 case nir_intrinsic_load_front_face:
2081 case nir_intrinsic_load_helper_invocation:
2082 case nir_intrinsic_load_instance_id:
2083 case nir_intrinsic_load_invocation_id:
2084 case nir_intrinsic_load_local_group_size:
2085 case nir_intrinsic_load_local_invocation_id:
2086 case nir_intrinsic_load_num_work_groups:
2087 case nir_intrinsic_load_patch_vertices_in:
2088 case nir_intrinsic_load_primitive_id:
2089 case nir_intrinsic_load_sample_id:
2090 case nir_intrinsic_load_sample_mask_in:
2091 case nir_intrinsic_load_sample_pos:
2092 case nir_intrinsic_load_subgroup_eq_mask:
2093 case nir_intrinsic_load_subgroup_ge_mask:
2094 case nir_intrinsic_load_subgroup_gt_mask:
2095 case nir_intrinsic_load_subgroup_le_mask:
2096 case nir_intrinsic_load_subgroup_lt_mask:
2097 case nir_intrinsic_load_subgroup_invocation:
2098 case nir_intrinsic_load_tess_coord:
2099 case nir_intrinsic_load_tess_level_inner:
2100 case nir_intrinsic_load_tess_level_outer:
2101 case nir_intrinsic_load_vertex_id:
2102 case nir_intrinsic_load_work_group_id: {
2103 const DataType dType = getDType(insn);
2104 SVSemantic sv = convert(op);
2105 LValues &newDefs = convert(&insn->dest);
2106
2107 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2108 Value *def;
2109 if (typeSizeof(dType) == 8)
2110 def = getSSA();
2111 else
2112 def = newDefs[i];
2113
2114 if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
2115 loadImm(def, 0u);
2116 } else {
2117 Symbol *sym = mkSysVal(sv, i);
2118 Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
2119 if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
2120 rdsv->perPatch = 1;
2121 }
2122
2123 if (typeSizeof(dType) == 8)
2124 mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
2125 }
2126 break;
2127 }
2128 // constants
2129 case nir_intrinsic_load_subgroup_size: {
2130 LValues &newDefs = convert(&insn->dest);
2131 loadImm(newDefs[0], 32u);
2132 break;
2133 }
2134 case nir_intrinsic_vote_all:
2135 case nir_intrinsic_vote_any:
2136 case nir_intrinsic_vote_ieq: {
2137 LValues &newDefs = convert(&insn->dest);
2138 Value *pred = getScratch(1, FILE_PREDICATE);
2139 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2140 mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
2141 mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
2142 break;
2143 }
2144 case nir_intrinsic_ballot: {
2145 LValues &newDefs = convert(&insn->dest);
2146 Value *pred = getSSA(1, FILE_PREDICATE);
2147 mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2148 mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;
2149 break;
2150 }
2151 case nir_intrinsic_read_first_invocation:
2152 case nir_intrinsic_read_invocation: {
2153 LValues &newDefs = convert(&insn->dest);
2154 const DataType dType = getDType(insn);
2155 Value *tmp = getScratch();
2156
2157 if (op == nir_intrinsic_read_first_invocation) {
2158 mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
2159 mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2160 mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2161 } else
2162 tmp = getSrc(&insn->src[1], 0);
2163
2164 for (uint8_t i = 0; i < insn->num_components; ++i) {
2165 mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
2166 ->subOp = NV50_IR_SUBOP_SHFL_IDX;
2167 }
2168 break;
2169 }
2170 case nir_intrinsic_load_per_vertex_input: {
2171 const DataType dType = getDType(insn);
2172 LValues &newDefs = convert(&insn->dest);
2173 Value *indirectVertex;
2174 Value *indirectOffset;
2175 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
2176 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2177
2178 Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
2179 mkImm(baseVertex), indirectVertex);
2180 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2181 uint32_t address = getSlotAddress(insn, idx, i);
2182 loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
2183 indirectOffset, vtxBase, info->in[idx].patch);
2184 }
2185 break;
2186 }
2187 case nir_intrinsic_load_per_vertex_output: {
2188 const DataType dType = getDType(insn);
2189 LValues &newDefs = convert(&insn->dest);
2190 Value *indirectVertex;
2191 Value *indirectOffset;
2192 uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
2193 uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2194 Value *vtxBase = NULL;
2195
2196 if (indirectVertex)
2197 vtxBase = indirectVertex;
2198 else
2199 vtxBase = loadImm(NULL, baseVertex);
2200
2201 vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase);
2202
2203 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2204 uint32_t address = getSlotAddress(insn, idx, i);
2205 loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
2206 indirectOffset, vtxBase, info->in[idx].patch);
2207 }
2208 break;
2209 }
2210 case nir_intrinsic_emit_vertex:
2211 if (info->io.genUserClip > 0)
2212 handleUserClipPlanes();
2213 // fallthrough
2214 case nir_intrinsic_end_primitive: {
2215 uint32_t idx = nir_intrinsic_stream_id(insn);
2216 mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
2217 break;
2218 }
2219 case nir_intrinsic_load_ubo: {
2220 const DataType dType = getDType(insn);
2221 LValues &newDefs = convert(&insn->dest);
2222 Value *indirectIndex;
2223 Value *indirectOffset;
2224 uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1;
2225 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2226
2227 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2228 loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
2229 indirectOffset, indirectIndex);
2230 }
2231 break;
2232 }
2233 case nir_intrinsic_get_buffer_size: {
2234 LValues &newDefs = convert(&insn->dest);
2235 const DataType dType = getDType(insn);
2236 Value *indirectBuffer;
2237 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2238
2239 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0);
2240 mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, indirectBuffer);
2241 break;
2242 }
2243 case nir_intrinsic_store_ssbo: {
2244 DataType sType = getSType(insn->src[0], false, false);
2245 Value *indirectBuffer;
2246 Value *indirectOffset;
2247 uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer);
2248 uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset);
2249
2250 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2251 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2252 continue;
2253 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
2254 offset + i * typeSizeof(sType));
2255 mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i))
2256 ->setIndirect(0, 1, indirectBuffer);
2257 }
2258 info->io.globalAccess |= 0x2;
2259 break;
2260 }
2261 case nir_intrinsic_load_ssbo: {
2262 const DataType dType = getDType(insn);
2263 LValues &newDefs = convert(&insn->dest);
2264 Value *indirectBuffer;
2265 Value *indirectOffset;
2266 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2267 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2268
2269 for (uint8_t i = 0u; i < insn->num_components; ++i)
2270 loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
2271 indirectOffset, indirectBuffer);
2272
2273 info->io.globalAccess |= 0x1;
2274 break;
2275 }
2276 case nir_intrinsic_shared_atomic_add:
2277 case nir_intrinsic_shared_atomic_and:
2278 case nir_intrinsic_shared_atomic_comp_swap:
2279 case nir_intrinsic_shared_atomic_exchange:
2280 case nir_intrinsic_shared_atomic_or:
2281 case nir_intrinsic_shared_atomic_imax:
2282 case nir_intrinsic_shared_atomic_imin:
2283 case nir_intrinsic_shared_atomic_umax:
2284 case nir_intrinsic_shared_atomic_umin:
2285 case nir_intrinsic_shared_atomic_xor: {
2286 const DataType dType = getDType(insn);
2287 LValues &newDefs = convert(&insn->dest);
2288 Value *indirectOffset;
2289 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2290 Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, dType, offset);
2291 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2292 if (op == nir_intrinsic_shared_atomic_comp_swap)
2293 atom->setSrc(2, getSrc(&insn->src[2], 0));
2294 atom->setIndirect(0, 0, indirectOffset);
2295 atom->subOp = getSubOp(op);
2296 break;
2297 }
2298 case nir_intrinsic_ssbo_atomic_add:
2299 case nir_intrinsic_ssbo_atomic_and:
2300 case nir_intrinsic_ssbo_atomic_comp_swap:
2301 case nir_intrinsic_ssbo_atomic_exchange:
2302 case nir_intrinsic_ssbo_atomic_or:
2303 case nir_intrinsic_ssbo_atomic_imax:
2304 case nir_intrinsic_ssbo_atomic_imin:
2305 case nir_intrinsic_ssbo_atomic_umax:
2306 case nir_intrinsic_ssbo_atomic_umin:
2307 case nir_intrinsic_ssbo_atomic_xor: {
2308 const DataType dType = getDType(insn);
2309 LValues &newDefs = convert(&insn->dest);
2310 Value *indirectBuffer;
2311 Value *indirectOffset;
2312 uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2313 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2314
2315 Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset);
2316 Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym,
2317 getSrc(&insn->src[2], 0));
2318 if (op == nir_intrinsic_ssbo_atomic_comp_swap)
2319 atom->setSrc(2, getSrc(&insn->src[3], 0));
2320 atom->setIndirect(0, 0, indirectOffset);
2321 atom->setIndirect(0, 1, indirectBuffer);
2322 atom->subOp = getSubOp(op);
2323
2324 info->io.globalAccess |= 0x2;
2325 break;
2326 }
2327 case nir_intrinsic_global_atomic_add:
2328 case nir_intrinsic_global_atomic_and:
2329 case nir_intrinsic_global_atomic_comp_swap:
2330 case nir_intrinsic_global_atomic_exchange:
2331 case nir_intrinsic_global_atomic_or:
2332 case nir_intrinsic_global_atomic_imax:
2333 case nir_intrinsic_global_atomic_imin:
2334 case nir_intrinsic_global_atomic_umax:
2335 case nir_intrinsic_global_atomic_umin:
2336 case nir_intrinsic_global_atomic_xor: {
2337 const DataType dType = getDType(insn);
2338 LValues &newDefs = convert(&insn->dest);
2339 Value *address;
2340 uint32_t offset = getIndirect(&insn->src[0], 0, address);
2341
2342 Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, dType, offset);
2343 Instruction *atom =
2344 mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2345 atom->setIndirect(0, 0, address);
2346 atom->subOp = getSubOp(op);
2347
2348 info->io.globalAccess |= 0x2;
2349 break;
2350 }
2351 case nir_intrinsic_bindless_image_atomic_add:
2352 case nir_intrinsic_bindless_image_atomic_and:
2353 case nir_intrinsic_bindless_image_atomic_comp_swap:
2354 case nir_intrinsic_bindless_image_atomic_exchange:
2355 case nir_intrinsic_bindless_image_atomic_imax:
2356 case nir_intrinsic_bindless_image_atomic_umax:
2357 case nir_intrinsic_bindless_image_atomic_imin:
2358 case nir_intrinsic_bindless_image_atomic_umin:
2359 case nir_intrinsic_bindless_image_atomic_or:
2360 case nir_intrinsic_bindless_image_atomic_xor:
2361 case nir_intrinsic_bindless_image_load:
2362 case nir_intrinsic_bindless_image_samples:
2363 case nir_intrinsic_bindless_image_size:
2364 case nir_intrinsic_bindless_image_store: {
2365 std::vector<Value*> srcs, defs;
2366 Value *indirect = getSrc(&insn->src[0], 0);
2367 DataType ty;
2368
2369 uint32_t mask = 0;
2370 TexInstruction::Target target =
2371 convert(nir_intrinsic_image_dim(insn), !!nir_intrinsic_image_array(insn), false);
2372 unsigned int argCount = getNIRArgCount(target);
2373 uint16_t location = 0;
2374
2375 if (opInfo.has_dest) {
2376 LValues &newDefs = convert(&insn->dest);
2377 for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2378 defs.push_back(newDefs[i]);
2379 mask |= 1 << i;
2380 }
2381 }
2382
2383 switch (op) {
2384 case nir_intrinsic_bindless_image_atomic_add:
2385 case nir_intrinsic_bindless_image_atomic_and:
2386 case nir_intrinsic_bindless_image_atomic_comp_swap:
2387 case nir_intrinsic_bindless_image_atomic_exchange:
2388 case nir_intrinsic_bindless_image_atomic_imax:
2389 case nir_intrinsic_bindless_image_atomic_umax:
2390 case nir_intrinsic_bindless_image_atomic_imin:
2391 case nir_intrinsic_bindless_image_atomic_umin:
2392 case nir_intrinsic_bindless_image_atomic_or:
2393 case nir_intrinsic_bindless_image_atomic_xor:
2394 ty = getDType(insn);
2395 mask = 0x1;
2396 info->io.globalAccess |= 0x2;
2397 break;
2398 case nir_intrinsic_bindless_image_load:
2399 ty = TYPE_U32;
2400 info->io.globalAccess |= 0x1;
2401 break;
2402 case nir_intrinsic_bindless_image_store:
2403 ty = TYPE_U32;
2404 mask = 0xf;
2405 info->io.globalAccess |= 0x2;
2406 break;
2407 case nir_intrinsic_bindless_image_samples:
2408 mask = 0x8;
2409 ty = TYPE_U32;
2410 break;
2411 case nir_intrinsic_bindless_image_size:
2412 ty = TYPE_U32;
2413 break;
2414 default:
2415 unreachable("unhandled image opcode");
2416 break;
2417 }
2418
2419 // coords
2420 if (opInfo.num_srcs >= 2)
2421 for (unsigned int i = 0u; i < argCount; ++i)
2422 srcs.push_back(getSrc(&insn->src[1], i));
2423
2424 // the sampler is just another src added after coords
2425 if (opInfo.num_srcs >= 3 && target.isMS())
2426 srcs.push_back(getSrc(&insn->src[2], 0));
2427
2428 if (opInfo.num_srcs >= 4) {
2429 unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2430 for (uint8_t i = 0u; i < components; ++i)
2431 srcs.push_back(getSrc(&insn->src[3], i));
2432 }
2433
2434 if (opInfo.num_srcs >= 5)
2435 // 1 for aotmic swap
2436 for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2437 srcs.push_back(getSrc(&insn->src[4], i));
2438
2439 TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2440 texi->tex.bindless = false;
2441 texi->tex.format = nv50_ir::TexInstruction::translateImgFormat(nir_intrinsic_format(insn));
2442 texi->tex.mask = mask;
2443 texi->tex.bindless = true;
2444 texi->cache = convert(nir_intrinsic_access(insn));
2445 texi->setType(ty);
2446 texi->subOp = getSubOp(op);
2447
2448 if (indirect)
2449 texi->setIndirectR(indirect);
2450
2451 break;
2452 }
2453 case nir_intrinsic_image_deref_atomic_add:
2454 case nir_intrinsic_image_deref_atomic_and:
2455 case nir_intrinsic_image_deref_atomic_comp_swap:
2456 case nir_intrinsic_image_deref_atomic_exchange:
2457 case nir_intrinsic_image_deref_atomic_imax:
2458 case nir_intrinsic_image_deref_atomic_umax:
2459 case nir_intrinsic_image_deref_atomic_imin:
2460 case nir_intrinsic_image_deref_atomic_umin:
2461 case nir_intrinsic_image_deref_atomic_or:
2462 case nir_intrinsic_image_deref_atomic_xor:
2463 case nir_intrinsic_image_deref_load:
2464 case nir_intrinsic_image_deref_samples:
2465 case nir_intrinsic_image_deref_size:
2466 case nir_intrinsic_image_deref_store: {
2467 const nir_variable *tex;
2468 std::vector<Value*> srcs, defs;
2469 Value *indirect;
2470 DataType ty;
2471
2472 uint32_t mask = 0;
2473 nir_deref_instr *deref = nir_src_as_deref(insn->src[0]);
2474 const glsl_type *type = deref->type;
2475 TexInstruction::Target target =
2476 convert((glsl_sampler_dim)type->sampler_dimensionality,
2477 type->sampler_array, type->sampler_shadow);
2478 unsigned int argCount = getNIRArgCount(target);
2479 uint16_t location = handleDeref(deref, indirect, tex);
2480
2481 if (opInfo.has_dest) {
2482 LValues &newDefs = convert(&insn->dest);
2483 for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2484 defs.push_back(newDefs[i]);
2485 mask |= 1 << i;
2486 }
2487 }
2488
2489 switch (op) {
2490 case nir_intrinsic_image_deref_atomic_add:
2491 case nir_intrinsic_image_deref_atomic_and:
2492 case nir_intrinsic_image_deref_atomic_comp_swap:
2493 case nir_intrinsic_image_deref_atomic_exchange:
2494 case nir_intrinsic_image_deref_atomic_imax:
2495 case nir_intrinsic_image_deref_atomic_umax:
2496 case nir_intrinsic_image_deref_atomic_imin:
2497 case nir_intrinsic_image_deref_atomic_umin:
2498 case nir_intrinsic_image_deref_atomic_or:
2499 case nir_intrinsic_image_deref_atomic_xor:
2500 ty = getDType(insn);
2501 mask = 0x1;
2502 info->io.globalAccess |= 0x2;
2503 break;
2504 case nir_intrinsic_image_deref_load:
2505 ty = TYPE_U32;
2506 info->io.globalAccess |= 0x1;
2507 break;
2508 case nir_intrinsic_image_deref_store:
2509 ty = TYPE_U32;
2510 mask = 0xf;
2511 info->io.globalAccess |= 0x2;
2512 break;
2513 case nir_intrinsic_image_deref_samples:
2514 mask = 0x8;
2515 ty = TYPE_U32;
2516 break;
2517 case nir_intrinsic_image_deref_size:
2518 ty = TYPE_U32;
2519 break;
2520 default:
2521 unreachable("unhandled image opcode");
2522 break;
2523 }
2524
2525 // coords
2526 if (opInfo.num_srcs >= 2)
2527 for (unsigned int i = 0u; i < argCount; ++i)
2528 srcs.push_back(getSrc(&insn->src[1], i));
2529
2530 // the sampler is just another src added after coords
2531 if (opInfo.num_srcs >= 3 && target.isMS())
2532 srcs.push_back(getSrc(&insn->src[2], 0));
2533
2534 if (opInfo.num_srcs >= 4) {
2535 unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2536 for (uint8_t i = 0u; i < components; ++i)
2537 srcs.push_back(getSrc(&insn->src[3], i));
2538 }
2539
2540 if (opInfo.num_srcs >= 5)
2541 // 1 for aotmic swap
2542 for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2543 srcs.push_back(getSrc(&insn->src[4], i));
2544
2545 TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2546 texi->tex.bindless = false;
2547 texi->tex.format = nv50_ir::TexInstruction::translateImgFormat(tex->data.image.format);
2548 texi->tex.mask = mask;
2549 texi->cache = getCacheModeFromVar(tex);
2550 texi->setType(ty);
2551 texi->subOp = getSubOp(op);
2552
2553 if (indirect)
2554 texi->setIndirectR(indirect);
2555
2556 break;
2557 }
2558 case nir_intrinsic_store_shared: {
2559 DataType sType = getSType(insn->src[0], false, false);
2560 Value *indirectOffset;
2561 uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2562
2563 for (uint8_t i = 0u; i < insn->num_components; ++i) {
2564 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2565 continue;
2566 Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, sType, offset + i * typeSizeof(sType));
2567 mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i));
2568 }
2569 break;
2570 }
2571 case nir_intrinsic_load_shared: {
2572 const DataType dType = getDType(insn);
2573 LValues &newDefs = convert(&insn->dest);
2574 Value *indirectOffset;
2575 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2576
2577 for (uint8_t i = 0u; i < insn->num_components; ++i)
2578 loadFrom(FILE_MEMORY_SHARED, 0, dType, newDefs[i], offset, i, indirectOffset);
2579
2580 break;
2581 }
2582 case nir_intrinsic_control_barrier: {
2583 // TODO: add flag to shader_info
2584 info->numBarriers = 1;
2585 Instruction *bar = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
2586 bar->fixed = 1;
2587 bar->subOp = NV50_IR_SUBOP_BAR_SYNC;
2588 break;
2589 }
2590 case nir_intrinsic_group_memory_barrier:
2591 case nir_intrinsic_memory_barrier:
2592 case nir_intrinsic_memory_barrier_buffer:
2593 case nir_intrinsic_memory_barrier_image:
2594 case nir_intrinsic_memory_barrier_shared: {
2595 Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
2596 bar->fixed = 1;
2597 bar->subOp = getSubOp(op);
2598 break;
2599 }
2600 case nir_intrinsic_memory_barrier_tcs_patch:
2601 break;
2602 case nir_intrinsic_shader_clock: {
2603 const DataType dType = getDType(insn);
2604 LValues &newDefs = convert(&insn->dest);
2605
2606 loadImm(newDefs[0], 0u);
2607 mkOp1(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
2608 break;
2609 }
2610 case nir_intrinsic_load_global: {
2611 const DataType dType = getDType(insn);
2612 LValues &newDefs = convert(&insn->dest);
2613 Value *indirectOffset;
2614 uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2615
2616 for (auto i = 0u; i < insn->num_components; ++i)
2617 loadFrom(FILE_MEMORY_GLOBAL, 0, dType, newDefs[i], offset, i, indirectOffset);
2618
2619 info->io.globalAccess |= 0x1;
2620 break;
2621 }
2622 case nir_intrinsic_store_global: {
2623 DataType sType = getSType(insn->src[0], false, false);
2624
2625 for (auto i = 0u; i < insn->num_components; ++i) {
2626 if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2627 continue;
2628 if (typeSizeof(sType) == 8) {
2629 Value *split[2];
2630 mkSplit(split, 4, getSrc(&insn->src[0], i));
2631
2632 Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType));
2633 mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[0]);
2634
2635 sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType) + 4);
2636 mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[1]);
2637 } else {
2638 Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, sType, i * typeSizeof(sType));
2639 mkStore(OP_STORE, sType, sym, getSrc(&insn->src[1], 0), getSrc(&insn->src[0], i));
2640 }
2641 }
2642
2643 info->io.globalAccess |= 0x2;
2644 break;
2645 }
2646 default:
2647 ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
2648 return false;
2649 }
2650
2651 return true;
2652 }
2653
2654 bool
2655 Converter::visit(nir_jump_instr *insn)
2656 {
2657 switch (insn->type) {
2658 case nir_jump_return:
2659 // TODO: this only works in the main function
2660 mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
2661 bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
2662 break;
2663 case nir_jump_break:
2664 case nir_jump_continue: {
2665 bool isBreak = insn->type == nir_jump_break;
2666 nir_block *block = insn->instr.block;
2667 assert(!block->successors[1]);
2668 BasicBlock *target = convert(block->successors[0]);
2669 mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
2670 bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
2671 break;
2672 }
2673 default:
2674 ERROR("unknown nir_jump_type %u\n", insn->type);
2675 return false;
2676 }
2677
2678 return true;
2679 }
2680
2681 Value*
2682 Converter::convert(nir_load_const_instr *insn, uint8_t idx)
2683 {
2684 Value *val;
2685
2686 if (immInsertPos)
2687 setPosition(immInsertPos, true);
2688 else
2689 setPosition(bb, false);
2690
2691 switch (insn->def.bit_size) {
2692 case 64:
2693 val = loadImm(getSSA(8), insn->value[idx].u64);
2694 break;
2695 case 32:
2696 val = loadImm(getSSA(4), insn->value[idx].u32);
2697 break;
2698 case 16:
2699 val = loadImm(getSSA(2), insn->value[idx].u16);
2700 break;
2701 case 8:
2702 val = loadImm(getSSA(1), insn->value[idx].u8);
2703 break;
2704 default:
2705 unreachable("unhandled bit size!\n");
2706 }
2707 setPosition(bb, true);
2708 return val;
2709 }
2710
2711 bool
2712 Converter::visit(nir_load_const_instr *insn)
2713 {
2714 assert(insn->def.bit_size <= 64);
2715 immediates[insn->def.index] = insn;
2716 return true;
2717 }
2718
2719 #define DEFAULT_CHECKS \
2720 if (insn->dest.dest.ssa.num_components > 1) { \
2721 ERROR("nir_alu_instr only supported with 1 component!\n"); \
2722 return false; \
2723 } \
2724 if (insn->dest.write_mask != 1) { \
2725 ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2726 return false; \
2727 }
2728 bool
2729 Converter::visit(nir_alu_instr *insn)
2730 {
2731 const nir_op op = insn->op;
2732 const nir_op_info &info = nir_op_infos[op];
2733 DataType dType = getDType(insn);
2734 const std::vector<DataType> sTypes = getSTypes(insn);
2735
2736 Instruction *oldPos = this->bb->getExit();
2737
2738 switch (op) {
2739 case nir_op_fabs:
2740 case nir_op_iabs:
2741 case nir_op_fadd:
2742 case nir_op_iadd:
2743 case nir_op_iand:
2744 case nir_op_fceil:
2745 case nir_op_fcos:
2746 case nir_op_fddx:
2747 case nir_op_fddx_coarse:
2748 case nir_op_fddx_fine:
2749 case nir_op_fddy:
2750 case nir_op_fddy_coarse:
2751 case nir_op_fddy_fine:
2752 case nir_op_fdiv:
2753 case nir_op_idiv:
2754 case nir_op_udiv:
2755 case nir_op_fexp2:
2756 case nir_op_ffloor:
2757 case nir_op_ffma:
2758 case nir_op_flog2:
2759 case nir_op_fmax:
2760 case nir_op_imax:
2761 case nir_op_umax:
2762 case nir_op_fmin:
2763 case nir_op_imin:
2764 case nir_op_umin:
2765 case nir_op_fmod:
2766 case nir_op_imod:
2767 case nir_op_umod:
2768 case nir_op_fmul:
2769 case nir_op_imul:
2770 case nir_op_imul_high:
2771 case nir_op_umul_high:
2772 case nir_op_fneg:
2773 case nir_op_ineg:
2774 case nir_op_inot:
2775 case nir_op_ior:
2776 case nir_op_pack_64_2x32_split:
2777 case nir_op_fpow:
2778 case nir_op_frcp:
2779 case nir_op_frem:
2780 case nir_op_irem:
2781 case nir_op_frsq:
2782 case nir_op_fsat:
2783 case nir_op_ishr:
2784 case nir_op_ushr:
2785 case nir_op_fsin:
2786 case nir_op_fsqrt:
2787 case nir_op_ftrunc:
2788 case nir_op_ishl:
2789 case nir_op_ixor: {
2790 DEFAULT_CHECKS;
2791 LValues &newDefs = convert(&insn->dest);
2792 operation preOp = preOperationNeeded(op);
2793 if (preOp != OP_NOP) {
2794 assert(info.num_inputs < 2);
2795 Value *tmp = getSSA(typeSizeof(dType));
2796 Instruction *i0 = mkOp(preOp, dType, tmp);
2797 Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2798 if (info.num_inputs) {
2799 i0->setSrc(0, getSrc(&insn->src[0]));
2800 i1->setSrc(0, tmp);
2801 }
2802 i1->subOp = getSubOp(op);
2803 } else {
2804 Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2805 for (unsigned s = 0u; s < info.num_inputs; ++s) {
2806 i->setSrc(s, getSrc(&insn->src[s]));
2807 }
2808 i->subOp = getSubOp(op);
2809 }
2810 break;
2811 }
2812 case nir_op_ifind_msb:
2813 case nir_op_ufind_msb: {
2814 DEFAULT_CHECKS;
2815 LValues &newDefs = convert(&insn->dest);
2816 dType = sTypes[0];
2817 mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2818 break;
2819 }
2820 case nir_op_fround_even: {
2821 DEFAULT_CHECKS;
2822 LValues &newDefs = convert(&insn->dest);
2823 mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2824 break;
2825 }
2826 // convert instructions
2827 case nir_op_f2f32:
2828 case nir_op_f2i32:
2829 case nir_op_f2u32:
2830 case nir_op_i2f32:
2831 case nir_op_i2i32:
2832 case nir_op_u2f32:
2833 case nir_op_u2u32:
2834 case nir_op_f2f64:
2835 case nir_op_f2i64:
2836 case nir_op_f2u64:
2837 case nir_op_i2f64:
2838 case nir_op_i2i64:
2839 case nir_op_u2f64:
2840 case nir_op_u2u64: {
2841 DEFAULT_CHECKS;
2842 LValues &newDefs = convert(&insn->dest);
2843 Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2844 if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
2845 i->rnd = ROUND_Z;
2846 i->sType = sTypes[0];
2847 break;
2848 }
2849 // compare instructions
2850 case nir_op_feq32:
2851 case nir_op_ieq32:
2852 case nir_op_fge32:
2853 case nir_op_ige32:
2854 case nir_op_uge32:
2855 case nir_op_flt32:
2856 case nir_op_ilt32:
2857 case nir_op_ult32:
2858 case nir_op_fne32:
2859 case nir_op_ine32: {
2860 DEFAULT_CHECKS;
2861 LValues &newDefs = convert(&insn->dest);
2862 Instruction *i = mkCmp(getOperation(op),
2863 getCondCode(op),
2864 dType,
2865 newDefs[0],
2866 dType,
2867 getSrc(&insn->src[0]),
2868 getSrc(&insn->src[1]));
2869 if (info.num_inputs == 3)
2870 i->setSrc(2, getSrc(&insn->src[2]));
2871 i->sType = sTypes[0];
2872 break;
2873 }
2874 // those are weird ALU ops and need special handling, because
2875 // 1. they are always componend based
2876 // 2. they basically just merge multiple values into one data type
2877 case nir_op_mov:
2878 if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) {
2879 nir_reg_dest& reg = insn->dest.dest.reg;
2880 uint32_t goffset = regToLmemOffset[reg.reg->index];
2881 uint8_t comps = reg.reg->num_components;
2882 uint8_t size = reg.reg->bit_size / 8;
2883 uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2884 uint32_t aoffset = csize * reg.base_offset;
2885 Value *indirect = NULL;
2886
2887 if (reg.indirect)
2888 indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS),
2889 getSrc(reg.indirect, 0), mkImm(csize));
2890
2891 for (uint8_t i = 0u; i < comps; ++i) {
2892 if (!((1u << i) & insn->dest.write_mask))
2893 continue;
2894
2895 Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + aoffset + i * size);
2896 mkStore(OP_STORE, dType, sym, indirect, getSrc(&insn->src[0], i));
2897 }
2898 break;
2899 } else if (!insn->src[0].src.is_ssa && insn->src[0].src.reg.reg->num_array_elems) {
2900 LValues &newDefs = convert(&insn->dest);
2901 nir_reg_src& reg = insn->src[0].src.reg;
2902 uint32_t goffset = regToLmemOffset[reg.reg->index];
2903 // uint8_t comps = reg.reg->num_components;
2904 uint8_t size = reg.reg->bit_size / 8;
2905 uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2906 uint32_t aoffset = csize * reg.base_offset;
2907 Value *indirect = NULL;
2908
2909 if (reg.indirect)
2910 indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), getSrc(reg.indirect, 0), mkImm(csize));
2911
2912 for (uint8_t i = 0u; i < newDefs.size(); ++i)
2913 loadFrom(FILE_MEMORY_LOCAL, 0, dType, newDefs[i], goffset + aoffset, i, indirect);
2914
2915 break;
2916 } else {
2917 LValues &newDefs = convert(&insn->dest);
2918 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2919 mkMov(newDefs[c], getSrc(&insn->src[0], c), dType);
2920 }
2921 }
2922 break;
2923 case nir_op_vec2:
2924 case nir_op_vec3:
2925 case nir_op_vec4:
2926 case nir_op_vec8:
2927 case nir_op_vec16: {
2928 LValues &newDefs = convert(&insn->dest);
2929 for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2930 mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2931 }
2932 break;
2933 }
2934 // (un)pack
2935 case nir_op_pack_64_2x32: {
2936 LValues &newDefs = convert(&insn->dest);
2937 Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2938 merge->setSrc(0, getSrc(&insn->src[0], 0));
2939 merge->setSrc(1, getSrc(&insn->src[0], 1));
2940 break;
2941 }
2942 case nir_op_pack_half_2x16_split: {
2943 LValues &newDefs = convert(&insn->dest);
2944 Value *tmpH = getSSA();
2945 Value *tmpL = getSSA();
2946
2947 mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2948 mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2949 mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2950 break;
2951 }
2952 case nir_op_unpack_half_2x16_split_x:
2953 case nir_op_unpack_half_2x16_split_y: {
2954 LValues &newDefs = convert(&insn->dest);
2955 Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2956 if (op == nir_op_unpack_half_2x16_split_y)
2957 cvt->subOp = 1;
2958 break;
2959 }
2960 case nir_op_unpack_64_2x32: {
2961 LValues &newDefs = convert(&insn->dest);
2962 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2963 break;
2964 }
2965 case nir_op_unpack_64_2x32_split_x: {
2966 LValues &newDefs = convert(&insn->dest);
2967 mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
2968 break;
2969 }
2970 case nir_op_unpack_64_2x32_split_y: {
2971 LValues &newDefs = convert(&insn->dest);
2972 mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
2973 break;
2974 }
2975 // special instructions
2976 case nir_op_fsign:
2977 case nir_op_isign: {
2978 DEFAULT_CHECKS;
2979 DataType iType;
2980 if (::isFloatType(dType))
2981 iType = TYPE_F32;
2982 else
2983 iType = TYPE_S32;
2984
2985 LValues &newDefs = convert(&insn->dest);
2986 LValue *val0 = getScratch();
2987 LValue *val1 = getScratch();
2988 mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
2989 mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
2990
2991 if (dType == TYPE_F64) {
2992 mkOp2(OP_SUB, iType, val0, val0, val1);
2993 mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
2994 } else if (dType == TYPE_S64 || dType == TYPE_U64) {
2995 mkOp2(OP_SUB, iType, val0, val1, val0);
2996 mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
2997 mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
2998 } else if (::isFloatType(dType))
2999 mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
3000 else
3001 mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
3002 break;
3003 }
3004 case nir_op_fcsel:
3005 case nir_op_b32csel: {
3006 DEFAULT_CHECKS;
3007 LValues &newDefs = convert(&insn->dest);
3008 mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
3009 break;
3010 }
3011 case nir_op_ibitfield_extract:
3012 case nir_op_ubitfield_extract: {
3013 DEFAULT_CHECKS;
3014 Value *tmp = getSSA();
3015 LValues &newDefs = convert(&insn->dest);
3016 mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
3017 mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
3018 break;
3019 }
3020 case nir_op_bfm: {
3021 DEFAULT_CHECKS;
3022 LValues &newDefs = convert(&insn->dest);
3023 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
3024 break;
3025 }
3026 case nir_op_bitfield_insert: {
3027 DEFAULT_CHECKS;
3028 LValues &newDefs = convert(&insn->dest);
3029 LValue *temp = getSSA();
3030 mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
3031 mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
3032 break;
3033 }
3034 case nir_op_bit_count: {
3035 DEFAULT_CHECKS;
3036 LValues &newDefs = convert(&insn->dest);
3037 mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
3038 break;
3039 }
3040 case nir_op_bitfield_reverse: {
3041 DEFAULT_CHECKS;
3042 LValues &newDefs = convert(&insn->dest);
3043 mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
3044 break;
3045 }
3046 case nir_op_find_lsb: {
3047 DEFAULT_CHECKS;
3048 LValues &newDefs = convert(&insn->dest);
3049 Value *tmp = getSSA();
3050 mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
3051 mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
3052 break;
3053 }
3054 // boolean conversions
3055 case nir_op_b2f32: {
3056 DEFAULT_CHECKS;
3057 LValues &newDefs = convert(&insn->dest);
3058 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
3059 break;
3060 }
3061 case nir_op_b2f64: {
3062 DEFAULT_CHECKS;
3063 LValues &newDefs = convert(&insn->dest);
3064 Value *tmp = getSSA(4);
3065 mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
3066 mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
3067 break;
3068 }
3069 case nir_op_f2b32:
3070 case nir_op_i2b32: {
3071 DEFAULT_CHECKS;
3072 LValues &newDefs = convert(&insn->dest);
3073 Value *src1;
3074 if (typeSizeof(sTypes[0]) == 8) {
3075 src1 = loadImm(getSSA(8), 0.0);
3076 } else {
3077 src1 = zero;
3078 }
3079 CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
3080 mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
3081 break;
3082 }
3083 case nir_op_b2i32: {
3084 DEFAULT_CHECKS;
3085 LValues &newDefs = convert(&insn->dest);
3086 mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
3087 break;
3088 }
3089 case nir_op_b2i64: {
3090 DEFAULT_CHECKS;
3091 LValues &newDefs = convert(&insn->dest);
3092 LValue *def = getScratch();
3093 mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
3094 mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
3095 break;
3096 }
3097 default:
3098 ERROR("unknown nir_op %s\n", info.name);
3099 return false;
3100 }
3101
3102 if (!oldPos) {
3103 oldPos = this->bb->getEntry();
3104 oldPos->precise = insn->exact;
3105 }
3106
3107 if (unlikely(!oldPos))
3108 return true;
3109
3110 while (oldPos->next) {
3111 oldPos = oldPos->next;
3112 oldPos->precise = insn->exact;
3113 }
3114 oldPos->saturate = insn->dest.saturate;
3115
3116 return true;
3117 }
3118 #undef DEFAULT_CHECKS
3119
3120 bool
3121 Converter::visit(nir_ssa_undef_instr *insn)
3122 {
3123 LValues &newDefs = convert(&insn->def);
3124 for (uint8_t i = 0u; i < insn->def.num_components; ++i) {
3125 mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
3126 }
3127 return true;
3128 }
3129
3130 #define CASE_SAMPLER(ty) \
3131 case GLSL_SAMPLER_DIM_ ## ty : \
3132 if (isArray && !isShadow) \
3133 return TEX_TARGET_ ## ty ## _ARRAY; \
3134 else if (!isArray && isShadow) \
3135 return TEX_TARGET_## ty ## _SHADOW; \
3136 else if (isArray && isShadow) \
3137 return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
3138 else \
3139 return TEX_TARGET_ ## ty
3140
3141 TexTarget
3142 Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
3143 {
3144 switch (dim) {
3145 CASE_SAMPLER(1D);
3146 CASE_SAMPLER(2D);
3147 CASE_SAMPLER(CUBE);
3148 case GLSL_SAMPLER_DIM_3D:
3149 return TEX_TARGET_3D;
3150 case GLSL_SAMPLER_DIM_MS:
3151 if (isArray)
3152 return TEX_TARGET_2D_MS_ARRAY;
3153 return TEX_TARGET_2D_MS;
3154 case GLSL_SAMPLER_DIM_RECT:
3155 if (isShadow)
3156 return TEX_TARGET_RECT_SHADOW;
3157 return TEX_TARGET_RECT;
3158 case GLSL_SAMPLER_DIM_BUF:
3159 return TEX_TARGET_BUFFER;
3160 case GLSL_SAMPLER_DIM_EXTERNAL:
3161 return TEX_TARGET_2D;
3162 default:
3163 ERROR("unknown glsl_sampler_dim %u\n", dim);
3164 assert(false);
3165 return TEX_TARGET_COUNT;
3166 }
3167 }
3168 #undef CASE_SAMPLER
3169
3170 Value*
3171 Converter::applyProjection(Value *src, Value *proj)
3172 {
3173 if (!proj)
3174 return src;
3175 return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj);
3176 }
3177
3178 unsigned int
3179 Converter::getNIRArgCount(TexInstruction::Target& target)
3180 {
3181 unsigned int result = target.getArgCount();
3182 if (target.isCube() && target.isArray())
3183 result--;
3184 if (target.isMS())
3185 result--;
3186 return result;
3187 }
3188
3189 uint16_t
3190 Converter::handleDeref(nir_deref_instr *deref, Value * &indirect, const nir_variable * &tex)
3191 {
3192 typedef std::pair<uint32_t,Value*> DerefPair;
3193 std::list<DerefPair> derefs;
3194
3195 uint16_t result = 0;
3196 while (deref->deref_type != nir_deref_type_var) {
3197 switch (deref->deref_type) {
3198 case nir_deref_type_array: {
3199 Value *indirect;
3200 uint8_t size = type_size(deref->type, true);
3201 result += size * getIndirect(&deref->arr.index, 0, indirect);
3202
3203 if (indirect) {
3204 derefs.push_front(std::make_pair(size, indirect));
3205 }
3206
3207 break;
3208 }
3209 case nir_deref_type_struct: {
3210 result += nir_deref_instr_parent(deref)->type->struct_location_offset(deref->strct.index);
3211 break;
3212 }
3213 case nir_deref_type_var:
3214 default:
3215 unreachable("nir_deref_type_var reached in handleDeref!");
3216 break;
3217 }
3218 deref = nir_deref_instr_parent(deref);
3219 }
3220
3221 indirect = NULL;
3222 for (std::list<DerefPair>::const_iterator it = derefs.begin(); it != derefs.end(); ++it) {
3223 Value *offset = mkOp2v(OP_MUL, TYPE_U32, getSSA(), loadImm(getSSA(), it->first), it->second);
3224 if (indirect)
3225 indirect = mkOp2v(OP_ADD, TYPE_U32, getSSA(), indirect, offset);
3226 else
3227 indirect = offset;
3228 }
3229
3230 tex = nir_deref_instr_get_variable(deref);
3231 assert(tex);
3232
3233 return result + tex->data.driver_location;
3234 }
3235
3236 CacheMode
3237 Converter::convert(enum gl_access_qualifier access)
3238 {
3239 switch (access) {
3240 case ACCESS_VOLATILE:
3241 return CACHE_CV;
3242 case ACCESS_COHERENT:
3243 return CACHE_CG;
3244 default:
3245 return CACHE_CA;
3246 }
3247 }
3248
3249 CacheMode
3250 Converter::getCacheModeFromVar(const nir_variable *var)
3251 {
3252 return convert(var->data.access);
3253 }
3254
3255 bool
3256 Converter::visit(nir_tex_instr *insn)
3257 {
3258 switch (insn->op) {
3259 case nir_texop_lod:
3260 case nir_texop_query_levels:
3261 case nir_texop_tex:
3262 case nir_texop_texture_samples:
3263 case nir_texop_tg4:
3264 case nir_texop_txb:
3265 case nir_texop_txd:
3266 case nir_texop_txf:
3267 case nir_texop_txf_ms:
3268 case nir_texop_txl:
3269 case nir_texop_txs: {
3270 LValues &newDefs = convert(&insn->dest);
3271 std::vector<Value*> srcs;
3272 std::vector<Value*> defs;
3273 std::vector<nir_src*> offsets;
3274 uint8_t mask = 0;
3275 bool lz = false;
3276 Value *proj = NULL;
3277 TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow);
3278 operation op = getOperation(insn->op);
3279
3280 int r, s;
3281 int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias);
3282 int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator);
3283 int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord);
3284 int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx);
3285 int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy);
3286 int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index);
3287 int lodIdx = nir_tex_instr_src_index(insn, nir_tex_src_lod);
3288 int offsetIdx = nir_tex_instr_src_index(insn, nir_tex_src_offset);
3289 int projIdx = nir_tex_instr_src_index(insn, nir_tex_src_projector);
3290 int sampOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_offset);
3291 int texOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_offset);
3292 int sampHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_handle);
3293 int texHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_handle);
3294
3295 bool bindless = sampHandleIdx != -1 || texHandleIdx != -1;
3296 assert((sampHandleIdx != -1) == (texHandleIdx != -1));
3297
3298 if (projIdx != -1)
3299 proj = mkOp1v(OP_RCP, TYPE_F32, getScratch(), getSrc(&insn->src[projIdx].src, 0));
3300
3301 srcs.resize(insn->coord_components);
3302 for (uint8_t i = 0u; i < insn->coord_components; ++i)
3303 srcs[i] = applyProjection(getSrc(&insn->src[coordsIdx].src, i), proj);
3304
3305 // sometimes we get less args than target.getArgCount, but codegen expects the latter
3306 if (insn->coord_components) {
3307 uint32_t argCount = target.getArgCount();
3308
3309 if (target.isMS())
3310 argCount -= 1;
3311
3312 for (uint32_t i = 0u; i < (argCount - insn->coord_components); ++i)
3313 srcs.push_back(getSSA());
3314 }
3315
3316 if (insn->op == nir_texop_texture_samples)
3317 srcs.push_back(zero);
3318 else if (!insn->num_srcs)
3319 srcs.push_back(loadImm(NULL, 0));
3320 if (biasIdx != -1)
3321 srcs.push_back(getSrc(&insn->src[biasIdx].src, 0));
3322 if (lodIdx != -1)
3323 srcs.push_back(getSrc(&insn->src[lodIdx].src, 0));
3324 else if (op == OP_TXF)
3325 lz = true;
3326 if (msIdx != -1)
3327 srcs.push_back(getSrc(&insn->src[msIdx].src, 0));
3328 if (offsetIdx != -1)
3329 offsets.push_back(&insn->src[offsetIdx].src);
3330 if (compIdx != -1)
3331 srcs.push_back(applyProjection(getSrc(&insn->src[compIdx].src, 0), proj));
3332 if (texOffIdx != -1) {
3333 srcs.push_back(getSrc(&insn->src[texOffIdx].src, 0));
3334 texOffIdx = srcs.size() - 1;
3335 }
3336 if (sampOffIdx != -1) {
3337 srcs.push_back(getSrc(&insn->src[sampOffIdx].src, 0));
3338 sampOffIdx = srcs.size() - 1;
3339 }
3340 if (bindless) {
3341 // currently we use the lower bits
3342 Value *split[2];
3343 Value *handle = getSrc(&insn->src[sampHandleIdx].src, 0);
3344
3345 mkSplit(split, 4, handle);
3346
3347 srcs.push_back(split[0]);
3348 texOffIdx = srcs.size() - 1;
3349 }
3350
3351 r = bindless ? 0xff : insn->texture_index;
3352 s = bindless ? 0x1f : insn->sampler_index;
3353
3354 defs.resize(newDefs.size());
3355 for (uint8_t d = 0u; d < newDefs.size(); ++d) {
3356 defs[d] = newDefs[d];
3357 mask |= 1 << d;
3358 }
3359 if (target.isMS() || (op == OP_TEX && prog->getType() != Program::TYPE_FRAGMENT))
3360 lz = true;
3361
3362 TexInstruction *texi = mkTex(op, target.getEnum(), r, s, defs, srcs);
3363 texi->tex.levelZero = lz;
3364 texi->tex.mask = mask;
3365 texi->tex.bindless = bindless;
3366
3367 if (texOffIdx != -1)
3368 texi->tex.rIndirectSrc = texOffIdx;
3369 if (sampOffIdx != -1)
3370 texi->tex.sIndirectSrc = sampOffIdx;
3371
3372 switch (insn->op) {
3373 case nir_texop_tg4:
3374 if (!target.isShadow())
3375 texi->tex.gatherComp = insn->component;
3376 break;
3377 case nir_texop_txs:
3378 texi->tex.query = TXQ_DIMS;
3379 break;
3380 case nir_texop_texture_samples:
3381 texi->tex.mask = 0x4;
3382 texi->tex.query = TXQ_TYPE;
3383 break;
3384 case nir_texop_query_levels:
3385 texi->tex.mask = 0x8;
3386 texi->tex.query = TXQ_DIMS;
3387 break;
3388 default:
3389 break;
3390 }
3391
3392 texi->tex.useOffsets = offsets.size();
3393 if (texi->tex.useOffsets) {
3394 for (uint8_t s = 0; s < texi->tex.useOffsets; ++s) {
3395 for (uint32_t c = 0u; c < 3; ++c) {
3396 uint8_t s2 = std::min(c, target.getDim() - 1);
3397 texi->offset[s][c].set(getSrc(offsets[s], s2));
3398 texi->offset[s][c].setInsn(texi);
3399 }
3400 }
3401 }
3402
3403 if (op == OP_TXG && offsetIdx == -1) {
3404 if (nir_tex_instr_has_explicit_tg4_offsets(insn)) {
3405 texi->tex.useOffsets = 4;
3406 setPosition(texi, false);
3407 for (uint8_t i = 0; i < 4; ++i) {
3408 for (uint8_t j = 0; j < 2; ++j) {
3409 texi->offset[i][j].set(loadImm(NULL, insn->tg4_offsets[i][j]));
3410 texi->offset[i][j].setInsn(texi);
3411 }
3412 }
3413 setPosition(texi, true);
3414 }
3415 }
3416
3417 if (ddxIdx != -1 && ddyIdx != -1) {
3418 for (uint8_t c = 0u; c < target.getDim() + target.isCube(); ++c) {
3419 texi->dPdx[c].set(getSrc(&insn->src[ddxIdx].src, c));
3420 texi->dPdy[c].set(getSrc(&insn->src[ddyIdx].src, c));
3421 }
3422 }
3423
3424 break;
3425 }
3426 default:
3427 ERROR("unknown nir_texop %u\n", insn->op);
3428 return false;
3429 }
3430 return true;
3431 }
3432
3433 bool
3434 Converter::visit(nir_deref_instr *deref)
3435 {
3436 // we just ignore those, because images intrinsics are the only place where
3437 // we should end up with deref sources and those have to backtrack anyway
3438 // to get the nir_variable. This code just exists to handle some special
3439 // cases.
3440 switch (deref->deref_type) {
3441 case nir_deref_type_array:
3442 case nir_deref_type_struct:
3443 case nir_deref_type_var:
3444 break;
3445 default:
3446 ERROR("unknown nir_deref_instr %u\n", deref->deref_type);
3447 return false;
3448 }
3449 return true;
3450 }
3451
3452 bool
3453 Converter::run()
3454 {
3455 bool progress;
3456
3457 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
3458 nir_print_shader(nir, stderr);
3459
3460 struct nir_lower_subgroups_options subgroup_options = {
3461 .subgroup_size = 32,
3462 .ballot_bit_size = 32,
3463 };
3464
3465 NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
3466 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
3467 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
3468 NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
3469 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3470 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
3471 NIR_PASS_V(nir, nir_lower_phis_to_scalar);
3472
3473 do {
3474 progress = false;
3475 NIR_PASS(progress, nir, nir_copy_prop);
3476 NIR_PASS(progress, nir, nir_opt_remove_phis);
3477 NIR_PASS(progress, nir, nir_opt_trivial_continues);
3478 NIR_PASS(progress, nir, nir_opt_cse);
3479 NIR_PASS(progress, nir, nir_opt_algebraic);
3480 NIR_PASS(progress, nir, nir_opt_constant_folding);
3481 NIR_PASS(progress, nir, nir_copy_prop);
3482 NIR_PASS(progress, nir, nir_opt_dce);
3483 NIR_PASS(progress, nir, nir_opt_dead_cf);
3484 } while (progress);
3485
3486 NIR_PASS_V(nir, nir_lower_bool_to_int32);
3487 NIR_PASS_V(nir, nir_lower_locals_to_regs);
3488 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
3489 NIR_PASS_V(nir, nir_convert_from_ssa, true);
3490
3491 // Garbage collect dead instructions
3492 nir_sweep(nir);
3493
3494 if (!parseNIR()) {
3495 ERROR("Couldn't prase NIR!\n");
3496 return false;
3497 }
3498
3499 if (!assignSlots()) {
3500 ERROR("Couldn't assign slots!\n");
3501 return false;
3502 }
3503
3504 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
3505 nir_print_shader(nir, stderr);
3506
3507 nir_foreach_function(function, nir) {
3508 if (!visit(function))
3509 return false;
3510 }
3511
3512 return true;
3513 }
3514
3515 } // unnamed namespace
3516
3517 namespace nv50_ir {
3518
3519 bool
3520 Program::makeFromNIR(struct nv50_ir_prog_info *info)
3521 {
3522 nir_shader *nir = (nir_shader*)info->bin.source;
3523 Converter converter(this, nir, info);
3524 bool result = converter.run();
3525 if (!result)
3526 return result;
3527 LoweringHelper lowering;
3528 lowering.run(this);
3529 tlsSize = info->bin.tlsSpace;
3530 return result;
3531 }
3532
3533 } // namespace nv50_ir