2dcadeed44d52f075538bab3c7063bec4cc0509e
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_from_tgsi.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "tgsi/tgsi_dump.h"
24 #include "tgsi/tgsi_scan.h"
25 #include "tgsi/tgsi_util.h"
26
27 #include <set>
28
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_util.h"
31 #include "codegen/nv50_ir_build_util.h"
32
33 namespace tgsi {
34
35 class Source;
36
37 static nv50_ir::operation translateOpcode(uint opcode);
38 static nv50_ir::DataFile translateFile(uint file);
39 static nv50_ir::TexTarget translateTexture(uint texTarg);
40 static nv50_ir::SVSemantic translateSysVal(uint sysval);
41
42 class Instruction
43 {
44 public:
45 Instruction(const struct tgsi_full_instruction *inst) : insn(inst) { }
46
47 class SrcRegister
48 {
49 public:
50 SrcRegister(const struct tgsi_full_src_register *src)
51 : reg(src->Register),
52 fsr(src)
53 { }
54
55 SrcRegister(const struct tgsi_src_register& src) : reg(src), fsr(NULL) { }
56
57 SrcRegister(const struct tgsi_ind_register& ind)
58 : reg(tgsi_util_get_src_from_ind(&ind)),
59 fsr(NULL)
60 { }
61
62 struct tgsi_src_register offsetToSrc(struct tgsi_texture_offset off)
63 {
64 struct tgsi_src_register reg;
65 memset(&reg, 0, sizeof(reg));
66 reg.Index = off.Index;
67 reg.File = off.File;
68 reg.SwizzleX = off.SwizzleX;
69 reg.SwizzleY = off.SwizzleY;
70 reg.SwizzleZ = off.SwizzleZ;
71 return reg;
72 }
73
74 SrcRegister(const struct tgsi_texture_offset& off) :
75 reg(offsetToSrc(off)),
76 fsr(NULL)
77 { }
78
79 uint getFile() const { return reg.File; }
80
81 bool is2D() const { return reg.Dimension; }
82
83 bool isIndirect(int dim) const
84 {
85 return (dim && fsr) ? fsr->Dimension.Indirect : reg.Indirect;
86 }
87
88 int getIndex(int dim) const
89 {
90 return (dim && fsr) ? fsr->Dimension.Index : reg.Index;
91 }
92
93 int getSwizzle(int chan) const
94 {
95 return tgsi_util_get_src_register_swizzle(&reg, chan);
96 }
97
98 nv50_ir::Modifier getMod(int chan) const;
99
100 SrcRegister getIndirect(int dim) const
101 {
102 assert(fsr && isIndirect(dim));
103 if (dim)
104 return SrcRegister(fsr->DimIndirect);
105 return SrcRegister(fsr->Indirect);
106 }
107
108 uint32_t getValueU32(int c, const struct nv50_ir_prog_info *info) const
109 {
110 assert(reg.File == TGSI_FILE_IMMEDIATE);
111 assert(!reg.Absolute);
112 assert(!reg.Negate);
113 return info->immd.data[reg.Index * 4 + getSwizzle(c)];
114 }
115
116 private:
117 const struct tgsi_src_register reg;
118 const struct tgsi_full_src_register *fsr;
119 };
120
121 class DstRegister
122 {
123 public:
124 DstRegister(const struct tgsi_full_dst_register *dst)
125 : reg(dst->Register),
126 fdr(dst)
127 { }
128
129 DstRegister(const struct tgsi_dst_register& dst) : reg(dst), fdr(NULL) { }
130
131 uint getFile() const { return reg.File; }
132
133 bool is2D() const { return reg.Dimension; }
134
135 bool isIndirect(int dim) const
136 {
137 return (dim && fdr) ? fdr->Dimension.Indirect : reg.Indirect;
138 }
139
140 int getIndex(int dim) const
141 {
142 return (dim && fdr) ? fdr->Dimension.Dimension : reg.Index;
143 }
144
145 unsigned int getMask() const { return reg.WriteMask; }
146
147 bool isMasked(int chan) const { return !(getMask() & (1 << chan)); }
148
149 SrcRegister getIndirect(int dim) const
150 {
151 assert(fdr && isIndirect(dim));
152 if (dim)
153 return SrcRegister(fdr->DimIndirect);
154 return SrcRegister(fdr->Indirect);
155 }
156
157 private:
158 const struct tgsi_dst_register reg;
159 const struct tgsi_full_dst_register *fdr;
160 };
161
162 inline uint getOpcode() const { return insn->Instruction.Opcode; }
163
164 unsigned int srcCount() const { return insn->Instruction.NumSrcRegs; }
165 unsigned int dstCount() const { return insn->Instruction.NumDstRegs; }
166
167 // mask of used components of source s
168 unsigned int srcMask(unsigned int s) const;
169
170 SrcRegister getSrc(unsigned int s) const
171 {
172 assert(s < srcCount());
173 return SrcRegister(&insn->Src[s]);
174 }
175
176 DstRegister getDst(unsigned int d) const
177 {
178 assert(d < dstCount());
179 return DstRegister(&insn->Dst[d]);
180 }
181
182 SrcRegister getTexOffset(unsigned int i) const
183 {
184 assert(i < TGSI_FULL_MAX_TEX_OFFSETS);
185 return SrcRegister(insn->TexOffsets[i]);
186 }
187
188 unsigned int getNumTexOffsets() const { return insn->Texture.NumOffsets; }
189
190 bool checkDstSrcAliasing() const;
191
192 inline nv50_ir::operation getOP() const {
193 return translateOpcode(getOpcode()); }
194
195 nv50_ir::DataType inferSrcType() const;
196 nv50_ir::DataType inferDstType() const;
197
198 nv50_ir::CondCode getSetCond() const;
199
200 nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const;
201
202 inline uint getLabel() { return insn->Label.Label; }
203
204 unsigned getSaturate() const { return insn->Instruction.Saturate; }
205
206 void print() const
207 {
208 tgsi_dump_instruction(insn, 1);
209 }
210
211 private:
212 const struct tgsi_full_instruction *insn;
213 };
214
215 unsigned int Instruction::srcMask(unsigned int s) const
216 {
217 unsigned int mask = insn->Dst[0].Register.WriteMask;
218
219 switch (insn->Instruction.Opcode) {
220 case TGSI_OPCODE_COS:
221 case TGSI_OPCODE_SIN:
222 return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
223 case TGSI_OPCODE_DP2:
224 return 0x3;
225 case TGSI_OPCODE_DP3:
226 return 0x7;
227 case TGSI_OPCODE_DP4:
228 case TGSI_OPCODE_DPH:
229 case TGSI_OPCODE_KILL_IF: /* WriteMask ignored */
230 return 0xf;
231 case TGSI_OPCODE_DST:
232 return mask & (s ? 0xa : 0x6);
233 case TGSI_OPCODE_EX2:
234 case TGSI_OPCODE_EXP:
235 case TGSI_OPCODE_LG2:
236 case TGSI_OPCODE_LOG:
237 case TGSI_OPCODE_POW:
238 case TGSI_OPCODE_RCP:
239 case TGSI_OPCODE_RSQ:
240 case TGSI_OPCODE_SCS:
241 return 0x1;
242 case TGSI_OPCODE_IF:
243 case TGSI_OPCODE_UIF:
244 return 0x1;
245 case TGSI_OPCODE_LIT:
246 return 0xb;
247 case TGSI_OPCODE_TEX2:
248 case TGSI_OPCODE_TXB2:
249 case TGSI_OPCODE_TXL2:
250 return (s == 0) ? 0xf : 0x3;
251 case TGSI_OPCODE_TEX:
252 case TGSI_OPCODE_TXB:
253 case TGSI_OPCODE_TXD:
254 case TGSI_OPCODE_TXL:
255 case TGSI_OPCODE_TXP:
256 case TGSI_OPCODE_LODQ:
257 {
258 const struct tgsi_instruction_texture *tex = &insn->Texture;
259
260 assert(insn->Instruction.Texture);
261
262 mask = 0x7;
263 if (insn->Instruction.Opcode != TGSI_OPCODE_TEX &&
264 insn->Instruction.Opcode != TGSI_OPCODE_TXD)
265 mask |= 0x8; /* bias, lod or proj */
266
267 switch (tex->Texture) {
268 case TGSI_TEXTURE_1D:
269 mask &= 0x9;
270 break;
271 case TGSI_TEXTURE_SHADOW1D:
272 mask &= 0xd;
273 break;
274 case TGSI_TEXTURE_1D_ARRAY:
275 case TGSI_TEXTURE_2D:
276 case TGSI_TEXTURE_RECT:
277 mask &= 0xb;
278 break;
279 case TGSI_TEXTURE_CUBE_ARRAY:
280 case TGSI_TEXTURE_SHADOW2D_ARRAY:
281 case TGSI_TEXTURE_SHADOWCUBE:
282 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
283 mask |= 0x8;
284 break;
285 default:
286 break;
287 }
288 }
289 return mask;
290 case TGSI_OPCODE_XPD:
291 {
292 unsigned int x = 0;
293 if (mask & 1) x |= 0x6;
294 if (mask & 2) x |= 0x5;
295 if (mask & 4) x |= 0x3;
296 return x;
297 }
298 case TGSI_OPCODE_D2I:
299 case TGSI_OPCODE_D2U:
300 case TGSI_OPCODE_D2F:
301 case TGSI_OPCODE_DSLT:
302 case TGSI_OPCODE_DSGE:
303 case TGSI_OPCODE_DSEQ:
304 case TGSI_OPCODE_DSNE:
305 switch (util_bitcount(mask)) {
306 case 1: return 0x3;
307 case 2: return 0xf;
308 default:
309 assert(!"unexpected mask");
310 return 0xf;
311 }
312 case TGSI_OPCODE_I2D:
313 case TGSI_OPCODE_U2D:
314 case TGSI_OPCODE_F2D: {
315 unsigned int x = 0;
316 if ((mask & 0x3) == 0x3)
317 x |= 1;
318 if ((mask & 0xc) == 0xc)
319 x |= 2;
320 return x;
321 }
322 default:
323 break;
324 }
325
326 return mask;
327 }
328
329 nv50_ir::Modifier Instruction::SrcRegister::getMod(int chan) const
330 {
331 nv50_ir::Modifier m(0);
332
333 if (reg.Absolute)
334 m = m | nv50_ir::Modifier(NV50_IR_MOD_ABS);
335 if (reg.Negate)
336 m = m | nv50_ir::Modifier(NV50_IR_MOD_NEG);
337 return m;
338 }
339
340 static nv50_ir::DataFile translateFile(uint file)
341 {
342 switch (file) {
343 case TGSI_FILE_CONSTANT: return nv50_ir::FILE_MEMORY_CONST;
344 case TGSI_FILE_INPUT: return nv50_ir::FILE_SHADER_INPUT;
345 case TGSI_FILE_OUTPUT: return nv50_ir::FILE_SHADER_OUTPUT;
346 case TGSI_FILE_TEMPORARY: return nv50_ir::FILE_GPR;
347 case TGSI_FILE_ADDRESS: return nv50_ir::FILE_ADDRESS;
348 case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE;
349 case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;
350 case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;
351 case TGSI_FILE_RESOURCE: return nv50_ir::FILE_MEMORY_GLOBAL;
352 case TGSI_FILE_SAMPLER:
353 case TGSI_FILE_NULL:
354 default:
355 return nv50_ir::FILE_NULL;
356 }
357 }
358
359 static nv50_ir::SVSemantic translateSysVal(uint sysval)
360 {
361 switch (sysval) {
362 case TGSI_SEMANTIC_FACE: return nv50_ir::SV_FACE;
363 case TGSI_SEMANTIC_PSIZE: return nv50_ir::SV_POINT_SIZE;
364 case TGSI_SEMANTIC_PRIMID: return nv50_ir::SV_PRIMITIVE_ID;
365 case TGSI_SEMANTIC_INSTANCEID: return nv50_ir::SV_INSTANCE_ID;
366 case TGSI_SEMANTIC_VERTEXID: return nv50_ir::SV_VERTEX_ID;
367 case TGSI_SEMANTIC_GRID_SIZE: return nv50_ir::SV_NCTAID;
368 case TGSI_SEMANTIC_BLOCK_ID: return nv50_ir::SV_CTAID;
369 case TGSI_SEMANTIC_BLOCK_SIZE: return nv50_ir::SV_NTID;
370 case TGSI_SEMANTIC_THREAD_ID: return nv50_ir::SV_TID;
371 case TGSI_SEMANTIC_SAMPLEID: return nv50_ir::SV_SAMPLE_INDEX;
372 case TGSI_SEMANTIC_SAMPLEPOS: return nv50_ir::SV_SAMPLE_POS;
373 case TGSI_SEMANTIC_SAMPLEMASK: return nv50_ir::SV_SAMPLE_MASK;
374 case TGSI_SEMANTIC_INVOCATIONID: return nv50_ir::SV_INVOCATION_ID;
375 default:
376 assert(0);
377 return nv50_ir::SV_CLOCK;
378 }
379 }
380
381 #define NV50_IR_TEX_TARG_CASE(a, b) \
382 case TGSI_TEXTURE_##a: return nv50_ir::TEX_TARGET_##b;
383
384 static nv50_ir::TexTarget translateTexture(uint tex)
385 {
386 switch (tex) {
387 NV50_IR_TEX_TARG_CASE(1D, 1D);
388 NV50_IR_TEX_TARG_CASE(2D, 2D);
389 NV50_IR_TEX_TARG_CASE(2D_MSAA, 2D_MS);
390 NV50_IR_TEX_TARG_CASE(3D, 3D);
391 NV50_IR_TEX_TARG_CASE(CUBE, CUBE);
392 NV50_IR_TEX_TARG_CASE(RECT, RECT);
393 NV50_IR_TEX_TARG_CASE(1D_ARRAY, 1D_ARRAY);
394 NV50_IR_TEX_TARG_CASE(2D_ARRAY, 2D_ARRAY);
395 NV50_IR_TEX_TARG_CASE(2D_ARRAY_MSAA, 2D_MS_ARRAY);
396 NV50_IR_TEX_TARG_CASE(CUBE_ARRAY, CUBE_ARRAY);
397 NV50_IR_TEX_TARG_CASE(SHADOW1D, 1D_SHADOW);
398 NV50_IR_TEX_TARG_CASE(SHADOW2D, 2D_SHADOW);
399 NV50_IR_TEX_TARG_CASE(SHADOWCUBE, CUBE_SHADOW);
400 NV50_IR_TEX_TARG_CASE(SHADOWRECT, RECT_SHADOW);
401 NV50_IR_TEX_TARG_CASE(SHADOW1D_ARRAY, 1D_ARRAY_SHADOW);
402 NV50_IR_TEX_TARG_CASE(SHADOW2D_ARRAY, 2D_ARRAY_SHADOW);
403 NV50_IR_TEX_TARG_CASE(SHADOWCUBE_ARRAY, CUBE_ARRAY_SHADOW);
404 NV50_IR_TEX_TARG_CASE(BUFFER, BUFFER);
405
406 case TGSI_TEXTURE_UNKNOWN:
407 default:
408 assert(!"invalid texture target");
409 return nv50_ir::TEX_TARGET_2D;
410 }
411 }
412
413 nv50_ir::DataType Instruction::inferSrcType() const
414 {
415 switch (getOpcode()) {
416 case TGSI_OPCODE_UIF:
417 case TGSI_OPCODE_AND:
418 case TGSI_OPCODE_OR:
419 case TGSI_OPCODE_XOR:
420 case TGSI_OPCODE_NOT:
421 case TGSI_OPCODE_SHL:
422 case TGSI_OPCODE_U2F:
423 case TGSI_OPCODE_U2D:
424 case TGSI_OPCODE_UADD:
425 case TGSI_OPCODE_UDIV:
426 case TGSI_OPCODE_UMOD:
427 case TGSI_OPCODE_UMAD:
428 case TGSI_OPCODE_UMUL:
429 case TGSI_OPCODE_UMUL_HI:
430 case TGSI_OPCODE_UMAX:
431 case TGSI_OPCODE_UMIN:
432 case TGSI_OPCODE_USEQ:
433 case TGSI_OPCODE_USGE:
434 case TGSI_OPCODE_USLT:
435 case TGSI_OPCODE_USNE:
436 case TGSI_OPCODE_USHR:
437 case TGSI_OPCODE_UCMP:
438 case TGSI_OPCODE_ATOMUADD:
439 case TGSI_OPCODE_ATOMXCHG:
440 case TGSI_OPCODE_ATOMCAS:
441 case TGSI_OPCODE_ATOMAND:
442 case TGSI_OPCODE_ATOMOR:
443 case TGSI_OPCODE_ATOMXOR:
444 case TGSI_OPCODE_ATOMUMIN:
445 case TGSI_OPCODE_ATOMUMAX:
446 case TGSI_OPCODE_UBFE:
447 case TGSI_OPCODE_UMSB:
448 return nv50_ir::TYPE_U32;
449 case TGSI_OPCODE_I2F:
450 case TGSI_OPCODE_I2D:
451 case TGSI_OPCODE_IDIV:
452 case TGSI_OPCODE_IMUL_HI:
453 case TGSI_OPCODE_IMAX:
454 case TGSI_OPCODE_IMIN:
455 case TGSI_OPCODE_IABS:
456 case TGSI_OPCODE_INEG:
457 case TGSI_OPCODE_ISGE:
458 case TGSI_OPCODE_ISHR:
459 case TGSI_OPCODE_ISLT:
460 case TGSI_OPCODE_ISSG:
461 case TGSI_OPCODE_SAD: // not sure about SAD, but no one has a float version
462 case TGSI_OPCODE_MOD:
463 case TGSI_OPCODE_UARL:
464 case TGSI_OPCODE_ATOMIMIN:
465 case TGSI_OPCODE_ATOMIMAX:
466 case TGSI_OPCODE_IBFE:
467 case TGSI_OPCODE_IMSB:
468 return nv50_ir::TYPE_S32;
469 case TGSI_OPCODE_D2F:
470 case TGSI_OPCODE_D2I:
471 case TGSI_OPCODE_D2U:
472 case TGSI_OPCODE_DABS:
473 case TGSI_OPCODE_DNEG:
474 case TGSI_OPCODE_DADD:
475 case TGSI_OPCODE_DMUL:
476 case TGSI_OPCODE_DMAX:
477 case TGSI_OPCODE_DMIN:
478 case TGSI_OPCODE_DSLT:
479 case TGSI_OPCODE_DSGE:
480 case TGSI_OPCODE_DSEQ:
481 case TGSI_OPCODE_DSNE:
482 case TGSI_OPCODE_DRCP:
483 case TGSI_OPCODE_DSQRT:
484 case TGSI_OPCODE_DMAD:
485 case TGSI_OPCODE_DFRAC:
486 case TGSI_OPCODE_DRSQ:
487 case TGSI_OPCODE_DTRUNC:
488 case TGSI_OPCODE_DCEIL:
489 case TGSI_OPCODE_DFLR:
490 case TGSI_OPCODE_DROUND:
491 return nv50_ir::TYPE_F64;
492 default:
493 return nv50_ir::TYPE_F32;
494 }
495 }
496
497 nv50_ir::DataType Instruction::inferDstType() const
498 {
499 switch (getOpcode()) {
500 case TGSI_OPCODE_D2U:
501 case TGSI_OPCODE_F2U: return nv50_ir::TYPE_U32;
502 case TGSI_OPCODE_D2I:
503 case TGSI_OPCODE_F2I: return nv50_ir::TYPE_S32;
504 case TGSI_OPCODE_FSEQ:
505 case TGSI_OPCODE_FSGE:
506 case TGSI_OPCODE_FSLT:
507 case TGSI_OPCODE_FSNE:
508 case TGSI_OPCODE_DSEQ:
509 case TGSI_OPCODE_DSGE:
510 case TGSI_OPCODE_DSLT:
511 case TGSI_OPCODE_DSNE:
512 return nv50_ir::TYPE_U32;
513 case TGSI_OPCODE_I2F:
514 case TGSI_OPCODE_U2F:
515 case TGSI_OPCODE_D2F:
516 return nv50_ir::TYPE_F32;
517 case TGSI_OPCODE_I2D:
518 case TGSI_OPCODE_U2D:
519 case TGSI_OPCODE_F2D:
520 return nv50_ir::TYPE_F64;
521 default:
522 return inferSrcType();
523 }
524 }
525
526 nv50_ir::CondCode Instruction::getSetCond() const
527 {
528 using namespace nv50_ir;
529
530 switch (getOpcode()) {
531 case TGSI_OPCODE_SLT:
532 case TGSI_OPCODE_ISLT:
533 case TGSI_OPCODE_USLT:
534 case TGSI_OPCODE_FSLT:
535 case TGSI_OPCODE_DSLT:
536 return CC_LT;
537 case TGSI_OPCODE_SLE:
538 return CC_LE;
539 case TGSI_OPCODE_SGE:
540 case TGSI_OPCODE_ISGE:
541 case TGSI_OPCODE_USGE:
542 case TGSI_OPCODE_FSGE:
543 case TGSI_OPCODE_DSGE:
544 return CC_GE;
545 case TGSI_OPCODE_SGT:
546 return CC_GT;
547 case TGSI_OPCODE_SEQ:
548 case TGSI_OPCODE_USEQ:
549 case TGSI_OPCODE_FSEQ:
550 case TGSI_OPCODE_DSEQ:
551 return CC_EQ;
552 case TGSI_OPCODE_SNE:
553 case TGSI_OPCODE_FSNE:
554 case TGSI_OPCODE_DSNE:
555 return CC_NEU;
556 case TGSI_OPCODE_USNE:
557 return CC_NE;
558 default:
559 return CC_ALWAYS;
560 }
561 }
562
563 #define NV50_IR_OPCODE_CASE(a, b) case TGSI_OPCODE_##a: return nv50_ir::OP_##b
564
565 static nv50_ir::operation translateOpcode(uint opcode)
566 {
567 switch (opcode) {
568 NV50_IR_OPCODE_CASE(ARL, SHL);
569 NV50_IR_OPCODE_CASE(MOV, MOV);
570
571 NV50_IR_OPCODE_CASE(RCP, RCP);
572 NV50_IR_OPCODE_CASE(RSQ, RSQ);
573
574 NV50_IR_OPCODE_CASE(MUL, MUL);
575 NV50_IR_OPCODE_CASE(ADD, ADD);
576
577 NV50_IR_OPCODE_CASE(MIN, MIN);
578 NV50_IR_OPCODE_CASE(MAX, MAX);
579 NV50_IR_OPCODE_CASE(SLT, SET);
580 NV50_IR_OPCODE_CASE(SGE, SET);
581 NV50_IR_OPCODE_CASE(MAD, MAD);
582 NV50_IR_OPCODE_CASE(SUB, SUB);
583
584 NV50_IR_OPCODE_CASE(FLR, FLOOR);
585 NV50_IR_OPCODE_CASE(ROUND, CVT);
586 NV50_IR_OPCODE_CASE(EX2, EX2);
587 NV50_IR_OPCODE_CASE(LG2, LG2);
588 NV50_IR_OPCODE_CASE(POW, POW);
589
590 NV50_IR_OPCODE_CASE(ABS, ABS);
591
592 NV50_IR_OPCODE_CASE(COS, COS);
593 NV50_IR_OPCODE_CASE(DDX, DFDX);
594 NV50_IR_OPCODE_CASE(DDX_FINE, DFDX);
595 NV50_IR_OPCODE_CASE(DDY, DFDY);
596 NV50_IR_OPCODE_CASE(DDY_FINE, DFDY);
597 NV50_IR_OPCODE_CASE(KILL, DISCARD);
598
599 NV50_IR_OPCODE_CASE(SEQ, SET);
600 NV50_IR_OPCODE_CASE(SGT, SET);
601 NV50_IR_OPCODE_CASE(SIN, SIN);
602 NV50_IR_OPCODE_CASE(SLE, SET);
603 NV50_IR_OPCODE_CASE(SNE, SET);
604 NV50_IR_OPCODE_CASE(TEX, TEX);
605 NV50_IR_OPCODE_CASE(TXD, TXD);
606 NV50_IR_OPCODE_CASE(TXP, TEX);
607
608 NV50_IR_OPCODE_CASE(CAL, CALL);
609 NV50_IR_OPCODE_CASE(RET, RET);
610 NV50_IR_OPCODE_CASE(CMP, SLCT);
611
612 NV50_IR_OPCODE_CASE(TXB, TXB);
613
614 NV50_IR_OPCODE_CASE(DIV, DIV);
615
616 NV50_IR_OPCODE_CASE(TXL, TXL);
617
618 NV50_IR_OPCODE_CASE(CEIL, CEIL);
619 NV50_IR_OPCODE_CASE(I2F, CVT);
620 NV50_IR_OPCODE_CASE(NOT, NOT);
621 NV50_IR_OPCODE_CASE(TRUNC, TRUNC);
622 NV50_IR_OPCODE_CASE(SHL, SHL);
623
624 NV50_IR_OPCODE_CASE(AND, AND);
625 NV50_IR_OPCODE_CASE(OR, OR);
626 NV50_IR_OPCODE_CASE(MOD, MOD);
627 NV50_IR_OPCODE_CASE(XOR, XOR);
628 NV50_IR_OPCODE_CASE(SAD, SAD);
629 NV50_IR_OPCODE_CASE(TXF, TXF);
630 NV50_IR_OPCODE_CASE(TXQ, TXQ);
631 NV50_IR_OPCODE_CASE(TG4, TXG);
632 NV50_IR_OPCODE_CASE(LODQ, TXLQ);
633
634 NV50_IR_OPCODE_CASE(EMIT, EMIT);
635 NV50_IR_OPCODE_CASE(ENDPRIM, RESTART);
636
637 NV50_IR_OPCODE_CASE(KILL_IF, DISCARD);
638
639 NV50_IR_OPCODE_CASE(F2I, CVT);
640 NV50_IR_OPCODE_CASE(FSEQ, SET);
641 NV50_IR_OPCODE_CASE(FSGE, SET);
642 NV50_IR_OPCODE_CASE(FSLT, SET);
643 NV50_IR_OPCODE_CASE(FSNE, SET);
644 NV50_IR_OPCODE_CASE(IDIV, DIV);
645 NV50_IR_OPCODE_CASE(IMAX, MAX);
646 NV50_IR_OPCODE_CASE(IMIN, MIN);
647 NV50_IR_OPCODE_CASE(IABS, ABS);
648 NV50_IR_OPCODE_CASE(INEG, NEG);
649 NV50_IR_OPCODE_CASE(ISGE, SET);
650 NV50_IR_OPCODE_CASE(ISHR, SHR);
651 NV50_IR_OPCODE_CASE(ISLT, SET);
652 NV50_IR_OPCODE_CASE(F2U, CVT);
653 NV50_IR_OPCODE_CASE(U2F, CVT);
654 NV50_IR_OPCODE_CASE(UADD, ADD);
655 NV50_IR_OPCODE_CASE(UDIV, DIV);
656 NV50_IR_OPCODE_CASE(UMAD, MAD);
657 NV50_IR_OPCODE_CASE(UMAX, MAX);
658 NV50_IR_OPCODE_CASE(UMIN, MIN);
659 NV50_IR_OPCODE_CASE(UMOD, MOD);
660 NV50_IR_OPCODE_CASE(UMUL, MUL);
661 NV50_IR_OPCODE_CASE(USEQ, SET);
662 NV50_IR_OPCODE_CASE(USGE, SET);
663 NV50_IR_OPCODE_CASE(USHR, SHR);
664 NV50_IR_OPCODE_CASE(USLT, SET);
665 NV50_IR_OPCODE_CASE(USNE, SET);
666
667 NV50_IR_OPCODE_CASE(DABS, ABS);
668 NV50_IR_OPCODE_CASE(DNEG, NEG);
669 NV50_IR_OPCODE_CASE(DADD, ADD);
670 NV50_IR_OPCODE_CASE(DMUL, MUL);
671 NV50_IR_OPCODE_CASE(DMAX, MAX);
672 NV50_IR_OPCODE_CASE(DMIN, MIN);
673 NV50_IR_OPCODE_CASE(DSLT, SET);
674 NV50_IR_OPCODE_CASE(DSGE, SET);
675 NV50_IR_OPCODE_CASE(DSEQ, SET);
676 NV50_IR_OPCODE_CASE(DSNE, SET);
677 NV50_IR_OPCODE_CASE(DRCP, RCP);
678 NV50_IR_OPCODE_CASE(DSQRT, SQRT);
679 NV50_IR_OPCODE_CASE(DMAD, MAD);
680 NV50_IR_OPCODE_CASE(D2I, CVT);
681 NV50_IR_OPCODE_CASE(D2U, CVT);
682 NV50_IR_OPCODE_CASE(I2D, CVT);
683 NV50_IR_OPCODE_CASE(U2D, CVT);
684 NV50_IR_OPCODE_CASE(DRSQ, RSQ);
685 NV50_IR_OPCODE_CASE(DTRUNC, TRUNC);
686 NV50_IR_OPCODE_CASE(DCEIL, CEIL);
687 NV50_IR_OPCODE_CASE(DFLR, FLOOR);
688 NV50_IR_OPCODE_CASE(DROUND, CVT);
689
690 NV50_IR_OPCODE_CASE(IMUL_HI, MUL);
691 NV50_IR_OPCODE_CASE(UMUL_HI, MUL);
692
693 NV50_IR_OPCODE_CASE(SAMPLE, TEX);
694 NV50_IR_OPCODE_CASE(SAMPLE_B, TXB);
695 NV50_IR_OPCODE_CASE(SAMPLE_C, TEX);
696 NV50_IR_OPCODE_CASE(SAMPLE_C_LZ, TEX);
697 NV50_IR_OPCODE_CASE(SAMPLE_D, TXD);
698 NV50_IR_OPCODE_CASE(SAMPLE_L, TXL);
699 NV50_IR_OPCODE_CASE(SAMPLE_I, TXF);
700 NV50_IR_OPCODE_CASE(SAMPLE_I_MS, TXF);
701 NV50_IR_OPCODE_CASE(GATHER4, TXG);
702 NV50_IR_OPCODE_CASE(SVIEWINFO, TXQ);
703
704 NV50_IR_OPCODE_CASE(ATOMUADD, ATOM);
705 NV50_IR_OPCODE_CASE(ATOMXCHG, ATOM);
706 NV50_IR_OPCODE_CASE(ATOMCAS, ATOM);
707 NV50_IR_OPCODE_CASE(ATOMAND, ATOM);
708 NV50_IR_OPCODE_CASE(ATOMOR, ATOM);
709 NV50_IR_OPCODE_CASE(ATOMXOR, ATOM);
710 NV50_IR_OPCODE_CASE(ATOMUMIN, ATOM);
711 NV50_IR_OPCODE_CASE(ATOMUMAX, ATOM);
712 NV50_IR_OPCODE_CASE(ATOMIMIN, ATOM);
713 NV50_IR_OPCODE_CASE(ATOMIMAX, ATOM);
714
715 NV50_IR_OPCODE_CASE(TEX2, TEX);
716 NV50_IR_OPCODE_CASE(TXB2, TXB);
717 NV50_IR_OPCODE_CASE(TXL2, TXL);
718
719 NV50_IR_OPCODE_CASE(IBFE, EXTBF);
720 NV50_IR_OPCODE_CASE(UBFE, EXTBF);
721 NV50_IR_OPCODE_CASE(BFI, INSBF);
722 NV50_IR_OPCODE_CASE(BREV, EXTBF);
723 NV50_IR_OPCODE_CASE(POPC, POPCNT);
724 NV50_IR_OPCODE_CASE(LSB, BFIND);
725 NV50_IR_OPCODE_CASE(IMSB, BFIND);
726 NV50_IR_OPCODE_CASE(UMSB, BFIND);
727
728 NV50_IR_OPCODE_CASE(END, EXIT);
729
730 default:
731 return nv50_ir::OP_NOP;
732 }
733 }
734
735 static uint16_t opcodeToSubOp(uint opcode)
736 {
737 switch (opcode) {
738 case TGSI_OPCODE_LFENCE: return NV50_IR_SUBOP_MEMBAR(L, GL);
739 case TGSI_OPCODE_SFENCE: return NV50_IR_SUBOP_MEMBAR(S, GL);
740 case TGSI_OPCODE_MFENCE: return NV50_IR_SUBOP_MEMBAR(M, GL);
741 case TGSI_OPCODE_ATOMUADD: return NV50_IR_SUBOP_ATOM_ADD;
742 case TGSI_OPCODE_ATOMXCHG: return NV50_IR_SUBOP_ATOM_EXCH;
743 case TGSI_OPCODE_ATOMCAS: return NV50_IR_SUBOP_ATOM_CAS;
744 case TGSI_OPCODE_ATOMAND: return NV50_IR_SUBOP_ATOM_AND;
745 case TGSI_OPCODE_ATOMOR: return NV50_IR_SUBOP_ATOM_OR;
746 case TGSI_OPCODE_ATOMXOR: return NV50_IR_SUBOP_ATOM_XOR;
747 case TGSI_OPCODE_ATOMUMIN: return NV50_IR_SUBOP_ATOM_MIN;
748 case TGSI_OPCODE_ATOMIMIN: return NV50_IR_SUBOP_ATOM_MIN;
749 case TGSI_OPCODE_ATOMUMAX: return NV50_IR_SUBOP_ATOM_MAX;
750 case TGSI_OPCODE_ATOMIMAX: return NV50_IR_SUBOP_ATOM_MAX;
751 case TGSI_OPCODE_IMUL_HI:
752 case TGSI_OPCODE_UMUL_HI:
753 return NV50_IR_SUBOP_MUL_HIGH;
754 default:
755 return 0;
756 }
757 }
758
759 bool Instruction::checkDstSrcAliasing() const
760 {
761 if (insn->Dst[0].Register.Indirect) // no danger if indirect, using memory
762 return false;
763
764 for (int s = 0; s < TGSI_FULL_MAX_SRC_REGISTERS; ++s) {
765 if (insn->Src[s].Register.File == TGSI_FILE_NULL)
766 break;
767 if (insn->Src[s].Register.File == insn->Dst[0].Register.File &&
768 insn->Src[s].Register.Index == insn->Dst[0].Register.Index)
769 return true;
770 }
771 return false;
772 }
773
774 class Source
775 {
776 public:
777 Source(struct nv50_ir_prog_info *);
778 ~Source();
779
780 public:
781 bool scanSource();
782 unsigned fileSize(unsigned file) const { return scan.file_max[file] + 1; }
783
784 public:
785 struct tgsi_shader_info scan;
786 struct tgsi_full_instruction *insns;
787 const struct tgsi_token *tokens;
788 struct nv50_ir_prog_info *info;
789
790 nv50_ir::DynArray tempArrays;
791 nv50_ir::DynArray immdArrays;
792
793 typedef nv50_ir::BuildUtil::Location Location;
794 // these registers are per-subroutine, cannot be used for parameter passing
795 std::set<Location> locals;
796
797 bool mainTempsInLMem;
798
799 int clipVertexOutput;
800
801 struct TextureView {
802 uint8_t target; // TGSI_TEXTURE_*
803 };
804 std::vector<TextureView> textureViews;
805
806 struct Resource {
807 uint8_t target; // TGSI_TEXTURE_*
808 bool raw;
809 uint8_t slot; // $surface index
810 };
811 std::vector<Resource> resources;
812
813 private:
814 int inferSysValDirection(unsigned sn) const;
815 bool scanDeclaration(const struct tgsi_full_declaration *);
816 bool scanInstruction(const struct tgsi_full_instruction *);
817 void scanProperty(const struct tgsi_full_property *);
818 void scanImmediate(const struct tgsi_full_immediate *);
819
820 inline bool isEdgeFlagPassthrough(const Instruction&) const;
821 };
822
823 Source::Source(struct nv50_ir_prog_info *prog) : info(prog)
824 {
825 tokens = (const struct tgsi_token *)info->bin.source;
826
827 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
828 tgsi_dump(tokens, 0);
829
830 mainTempsInLMem = FALSE;
831 }
832
833 Source::~Source()
834 {
835 if (insns)
836 FREE(insns);
837
838 if (info->immd.data)
839 FREE(info->immd.data);
840 if (info->immd.type)
841 FREE(info->immd.type);
842 }
843
844 bool Source::scanSource()
845 {
846 unsigned insnCount = 0;
847 struct tgsi_parse_context parse;
848
849 tgsi_scan_shader(tokens, &scan);
850
851 insns = (struct tgsi_full_instruction *)MALLOC(scan.num_instructions *
852 sizeof(insns[0]));
853 if (!insns)
854 return false;
855
856 clipVertexOutput = -1;
857
858 textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);
859 resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
860
861 info->immd.bufSize = 0;
862
863 info->numInputs = scan.file_max[TGSI_FILE_INPUT] + 1;
864 info->numOutputs = scan.file_max[TGSI_FILE_OUTPUT] + 1;
865 info->numSysVals = scan.file_max[TGSI_FILE_SYSTEM_VALUE] + 1;
866
867 if (info->type == PIPE_SHADER_FRAGMENT) {
868 info->prop.fp.writesDepth = scan.writes_z;
869 info->prop.fp.usesDiscard = scan.uses_kill;
870 } else
871 if (info->type == PIPE_SHADER_GEOMETRY) {
872 info->prop.gp.instanceCount = 1; // default value
873 }
874
875 info->io.viewportId = -1;
876
877 info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16);
878 info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte));
879
880 tgsi_parse_init(&parse, tokens);
881 while (!tgsi_parse_end_of_tokens(&parse)) {
882 tgsi_parse_token(&parse);
883
884 switch (parse.FullToken.Token.Type) {
885 case TGSI_TOKEN_TYPE_IMMEDIATE:
886 scanImmediate(&parse.FullToken.FullImmediate);
887 break;
888 case TGSI_TOKEN_TYPE_DECLARATION:
889 scanDeclaration(&parse.FullToken.FullDeclaration);
890 break;
891 case TGSI_TOKEN_TYPE_INSTRUCTION:
892 insns[insnCount++] = parse.FullToken.FullInstruction;
893 scanInstruction(&parse.FullToken.FullInstruction);
894 break;
895 case TGSI_TOKEN_TYPE_PROPERTY:
896 scanProperty(&parse.FullToken.FullProperty);
897 break;
898 default:
899 INFO("unknown TGSI token type: %d\n", parse.FullToken.Token.Type);
900 break;
901 }
902 }
903 tgsi_parse_free(&parse);
904
905 if (mainTempsInLMem)
906 info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16;
907
908 if (info->io.genUserClip > 0) {
909 info->io.clipDistanceMask = (1 << info->io.genUserClip) - 1;
910
911 const unsigned int nOut = (info->io.genUserClip + 3) / 4;
912
913 for (unsigned int n = 0; n < nOut; ++n) {
914 unsigned int i = info->numOutputs++;
915 info->out[i].id = i;
916 info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
917 info->out[i].si = n;
918 info->out[i].mask = info->io.clipDistanceMask >> (n * 4);
919 }
920 }
921
922 return info->assignSlots(info) == 0;
923 }
924
925 void Source::scanProperty(const struct tgsi_full_property *prop)
926 {
927 switch (prop->Property.PropertyName) {
928 case TGSI_PROPERTY_GS_OUTPUT_PRIM:
929 info->prop.gp.outputPrim = prop->u[0].Data;
930 break;
931 case TGSI_PROPERTY_GS_INPUT_PRIM:
932 info->prop.gp.inputPrim = prop->u[0].Data;
933 break;
934 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
935 info->prop.gp.maxVertices = prop->u[0].Data;
936 break;
937 case TGSI_PROPERTY_GS_INVOCATIONS:
938 info->prop.gp.instanceCount = prop->u[0].Data;
939 break;
940 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
941 info->prop.fp.separateFragData = TRUE;
942 break;
943 case TGSI_PROPERTY_FS_COORD_ORIGIN:
944 case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
945 // we don't care
946 break;
947 case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
948 info->io.genUserClip = -1;
949 break;
950 default:
951 INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);
952 break;
953 }
954 }
955
956 void Source::scanImmediate(const struct tgsi_full_immediate *imm)
957 {
958 const unsigned n = info->immd.count++;
959
960 assert(n < scan.immediate_count);
961
962 for (int c = 0; c < 4; ++c)
963 info->immd.data[n * 4 + c] = imm->u[c].Uint;
964
965 info->immd.type[n] = imm->Immediate.DataType;
966 }
967
968 int Source::inferSysValDirection(unsigned sn) const
969 {
970 switch (sn) {
971 case TGSI_SEMANTIC_INSTANCEID:
972 case TGSI_SEMANTIC_VERTEXID:
973 return 1;
974 case TGSI_SEMANTIC_LAYER:
975 #if 0
976 case TGSI_SEMANTIC_VIEWPORTINDEX:
977 return 0;
978 #endif
979 case TGSI_SEMANTIC_PRIMID:
980 return (info->type == PIPE_SHADER_FRAGMENT) ? 1 : 0;
981 default:
982 return 0;
983 }
984 }
985
986 bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
987 {
988 unsigned i, c;
989 unsigned sn = TGSI_SEMANTIC_GENERIC;
990 unsigned si = 0;
991 const unsigned first = decl->Range.First, last = decl->Range.Last;
992
993 if (decl->Declaration.Semantic) {
994 sn = decl->Semantic.Name;
995 si = decl->Semantic.Index;
996 }
997
998 if (decl->Declaration.Local) {
999 for (i = first; i <= last; ++i) {
1000 for (c = 0; c < 4; ++c) {
1001 locals.insert(
1002 Location(decl->Declaration.File, decl->Dim.Index2D, i, c));
1003 }
1004 }
1005 }
1006
1007 switch (decl->Declaration.File) {
1008 case TGSI_FILE_INPUT:
1009 if (info->type == PIPE_SHADER_VERTEX) {
1010 // all vertex attributes are equal
1011 for (i = first; i <= last; ++i) {
1012 info->in[i].sn = TGSI_SEMANTIC_GENERIC;
1013 info->in[i].si = i;
1014 }
1015 } else {
1016 for (i = first; i <= last; ++i, ++si) {
1017 info->in[i].id = i;
1018 info->in[i].sn = sn;
1019 info->in[i].si = si;
1020 if (info->type == PIPE_SHADER_FRAGMENT) {
1021 // translate interpolation mode
1022 switch (decl->Interp.Interpolate) {
1023 case TGSI_INTERPOLATE_CONSTANT:
1024 info->in[i].flat = 1;
1025 break;
1026 case TGSI_INTERPOLATE_COLOR:
1027 info->in[i].sc = 1;
1028 break;
1029 case TGSI_INTERPOLATE_LINEAR:
1030 info->in[i].linear = 1;
1031 break;
1032 default:
1033 break;
1034 }
1035 if (decl->Interp.Location || info->io.sampleInterp)
1036 info->in[i].centroid = 1;
1037 }
1038 }
1039 }
1040 break;
1041 case TGSI_FILE_OUTPUT:
1042 for (i = first; i <= last; ++i, ++si) {
1043 switch (sn) {
1044 case TGSI_SEMANTIC_POSITION:
1045 if (info->type == PIPE_SHADER_FRAGMENT)
1046 info->io.fragDepth = i;
1047 else
1048 if (clipVertexOutput < 0)
1049 clipVertexOutput = i;
1050 break;
1051 case TGSI_SEMANTIC_COLOR:
1052 if (info->type == PIPE_SHADER_FRAGMENT)
1053 info->prop.fp.numColourResults++;
1054 break;
1055 case TGSI_SEMANTIC_EDGEFLAG:
1056 info->io.edgeFlagOut = i;
1057 break;
1058 case TGSI_SEMANTIC_CLIPVERTEX:
1059 clipVertexOutput = i;
1060 break;
1061 case TGSI_SEMANTIC_CLIPDIST:
1062 info->io.clipDistanceMask |=
1063 decl->Declaration.UsageMask << (si * 4);
1064 info->io.genUserClip = -1;
1065 break;
1066 case TGSI_SEMANTIC_SAMPLEMASK:
1067 info->io.sampleMask = i;
1068 break;
1069 case TGSI_SEMANTIC_VIEWPORT_INDEX:
1070 info->io.viewportId = i;
1071 break;
1072 default:
1073 break;
1074 }
1075 info->out[i].id = i;
1076 info->out[i].sn = sn;
1077 info->out[i].si = si;
1078 }
1079 break;
1080 case TGSI_FILE_SYSTEM_VALUE:
1081 switch (sn) {
1082 case TGSI_SEMANTIC_INSTANCEID:
1083 info->io.instanceId = first;
1084 break;
1085 case TGSI_SEMANTIC_VERTEXID:
1086 info->io.vertexId = first;
1087 break;
1088 default:
1089 break;
1090 }
1091 for (i = first; i <= last; ++i, ++si) {
1092 info->sv[i].sn = sn;
1093 info->sv[i].si = si;
1094 info->sv[i].input = inferSysValDirection(sn);
1095 }
1096 break;
1097 case TGSI_FILE_RESOURCE:
1098 for (i = first; i <= last; ++i) {
1099 resources[i].target = decl->Resource.Resource;
1100 resources[i].raw = decl->Resource.Raw;
1101 resources[i].slot = i;
1102 }
1103 break;
1104 case TGSI_FILE_SAMPLER_VIEW:
1105 for (i = first; i <= last; ++i)
1106 textureViews[i].target = decl->SamplerView.Resource;
1107 break;
1108 case TGSI_FILE_NULL:
1109 case TGSI_FILE_TEMPORARY:
1110 case TGSI_FILE_ADDRESS:
1111 case TGSI_FILE_CONSTANT:
1112 case TGSI_FILE_IMMEDIATE:
1113 case TGSI_FILE_PREDICATE:
1114 case TGSI_FILE_SAMPLER:
1115 break;
1116 default:
1117 ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
1118 return false;
1119 }
1120 return true;
1121 }
1122
1123 inline bool Source::isEdgeFlagPassthrough(const Instruction& insn) const
1124 {
1125 return insn.getOpcode() == TGSI_OPCODE_MOV &&
1126 insn.getDst(0).getIndex(0) == info->io.edgeFlagOut &&
1127 insn.getSrc(0).getFile() == TGSI_FILE_INPUT;
1128 }
1129
1130 bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
1131 {
1132 Instruction insn(inst);
1133
1134 if (insn.getOpcode() == TGSI_OPCODE_BARRIER)
1135 info->numBarriers = 1;
1136
1137 if (insn.dstCount()) {
1138 if (insn.getDst(0).getFile() == TGSI_FILE_OUTPUT) {
1139 Instruction::DstRegister dst = insn.getDst(0);
1140
1141 if (dst.isIndirect(0))
1142 for (unsigned i = 0; i < info->numOutputs; ++i)
1143 info->out[i].mask = 0xf;
1144 else
1145 info->out[dst.getIndex(0)].mask |= dst.getMask();
1146
1147 if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE ||
1148 info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PRIMID ||
1149 info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_LAYER ||
1150 info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_VIEWPORT_INDEX ||
1151 info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_FOG)
1152 info->out[dst.getIndex(0)].mask &= 1;
1153
1154 if (isEdgeFlagPassthrough(insn))
1155 info->io.edgeFlagIn = insn.getSrc(0).getIndex(0);
1156 } else
1157 if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) {
1158 if (insn.getDst(0).isIndirect(0))
1159 mainTempsInLMem = TRUE;
1160 }
1161 }
1162
1163 for (unsigned s = 0; s < insn.srcCount(); ++s) {
1164 Instruction::SrcRegister src = insn.getSrc(s);
1165 if (src.getFile() == TGSI_FILE_TEMPORARY) {
1166 if (src.isIndirect(0))
1167 mainTempsInLMem = TRUE;
1168 } else
1169 if (src.getFile() == TGSI_FILE_RESOURCE) {
1170 if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL)
1171 info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
1172 0x1 : 0x2;
1173 }
1174 if (src.getFile() != TGSI_FILE_INPUT)
1175 continue;
1176 unsigned mask = insn.srcMask(s);
1177
1178 if (src.isIndirect(0)) {
1179 for (unsigned i = 0; i < info->numInputs; ++i)
1180 info->in[i].mask = 0xf;
1181 } else {
1182 const int i = src.getIndex(0);
1183 for (unsigned c = 0; c < 4; ++c) {
1184 if (!(mask & (1 << c)))
1185 continue;
1186 int k = src.getSwizzle(c);
1187 if (k <= TGSI_SWIZZLE_W)
1188 info->in[i].mask |= 1 << k;
1189 }
1190 switch (info->in[i].sn) {
1191 case TGSI_SEMANTIC_PSIZE:
1192 case TGSI_SEMANTIC_PRIMID:
1193 case TGSI_SEMANTIC_FOG:
1194 info->in[i].mask &= 0x1;
1195 break;
1196 case TGSI_SEMANTIC_PCOORD:
1197 info->in[i].mask &= 0x3;
1198 break;
1199 default:
1200 break;
1201 }
1202 }
1203 }
1204 return true;
1205 }
1206
1207 nv50_ir::TexInstruction::Target
1208 Instruction::getTexture(const tgsi::Source *code, int s) const
1209 {
1210 // XXX: indirect access
1211 unsigned int r;
1212
1213 switch (getSrc(s).getFile()) {
1214 case TGSI_FILE_RESOURCE:
1215 r = getSrc(s).getIndex(0);
1216 return translateTexture(code->resources.at(r).target);
1217 case TGSI_FILE_SAMPLER_VIEW:
1218 r = getSrc(s).getIndex(0);
1219 return translateTexture(code->textureViews.at(r).target);
1220 default:
1221 return translateTexture(insn->Texture.Texture);
1222 }
1223 }
1224
1225 } // namespace tgsi
1226
1227 namespace {
1228
1229 using namespace nv50_ir;
1230
1231 class Converter : public BuildUtil
1232 {
1233 public:
1234 Converter(Program *, const tgsi::Source *);
1235 ~Converter();
1236
1237 bool run();
1238
1239 private:
1240 struct Subroutine
1241 {
1242 Subroutine(Function *f) : f(f) { }
1243 Function *f;
1244 ValueMap values;
1245 };
1246
1247 Value *shiftAddress(Value *);
1248 Value *getVertexBase(int s);
1249 DataArray *getArrayForFile(unsigned file, int idx);
1250 Value *fetchSrc(int s, int c);
1251 Value *acquireDst(int d, int c);
1252 void storeDst(int d, int c, Value *);
1253
1254 Value *fetchSrc(const tgsi::Instruction::SrcRegister src, int c, Value *ptr);
1255 void storeDst(const tgsi::Instruction::DstRegister dst, int c,
1256 Value *val, Value *ptr);
1257
1258 Value *applySrcMod(Value *, int s, int c);
1259
1260 Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr);
1261 Symbol *srcToSym(tgsi::Instruction::SrcRegister, int c);
1262 Symbol *dstToSym(tgsi::Instruction::DstRegister, int c);
1263
1264 bool handleInstruction(const struct tgsi_full_instruction *);
1265 void exportOutputs();
1266 inline Subroutine *getSubroutine(unsigned ip);
1267 inline Subroutine *getSubroutine(Function *);
1268 inline bool isEndOfSubroutine(uint ip);
1269
1270 void loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask);
1271
1272 // R,S,L,C,Dx,Dy encode TGSI sources for respective values (0xSf for auto)
1273 void setTexRS(TexInstruction *, unsigned int& s, int R, int S);
1274 void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy);
1275 void handleTXF(Value *dst0[4], int R, int L_M);
1276 void handleTXQ(Value *dst0[4], enum TexQuery);
1277 void handleLIT(Value *dst0[4]);
1278 void handleUserClipPlanes();
1279
1280 Symbol *getResourceBase(int r);
1281 void getResourceCoords(std::vector<Value *>&, int r, int s);
1282
1283 void handleLOAD(Value *dst0[4]);
1284 void handleSTORE();
1285 void handleATOM(Value *dst0[4], DataType, uint16_t subOp);
1286
1287 void handleINTERP(Value *dst0[4]);
1288
1289 Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr);
1290
1291 void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
1292
1293 Value *buildDot(int dim);
1294
1295 class BindArgumentsPass : public Pass {
1296 public:
1297 BindArgumentsPass(Converter &conv) : conv(conv) { }
1298
1299 private:
1300 Converter &conv;
1301 Subroutine *sub;
1302
1303 inline const Location *getValueLocation(Subroutine *, Value *);
1304
1305 template<typename T> inline void
1306 updateCallArgs(Instruction *i, void (Instruction::*setArg)(int, Value *),
1307 T (Function::*proto));
1308
1309 template<typename T> inline void
1310 updatePrototype(BitSet *set, void (Function::*updateSet)(),
1311 T (Function::*proto));
1312
1313 protected:
1314 bool visit(Function *);
1315 bool visit(BasicBlock *bb) { return false; }
1316 };
1317
1318 private:
1319 const tgsi::Source *code;
1320 const struct nv50_ir_prog_info *info;
1321
1322 struct {
1323 std::map<unsigned, Subroutine> map;
1324 Subroutine *cur;
1325 } sub;
1326
1327 uint ip; // instruction pointer
1328
1329 tgsi::Instruction tgsi;
1330
1331 DataType dstTy;
1332 DataType srcTy;
1333
1334 DataArray tData; // TGSI_FILE_TEMPORARY
1335 DataArray aData; // TGSI_FILE_ADDRESS
1336 DataArray pData; // TGSI_FILE_PREDICATE
1337 DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers)
1338
1339 Value *zero;
1340 Value *fragCoord[4];
1341 Value *clipVtx[4];
1342
1343 Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP)
1344 uint8_t vtxBaseValid;
1345
1346 Stack condBBs; // fork BB, then else clause BB
1347 Stack joinBBs; // fork BB, for inserting join ops on ENDIF
1348 Stack loopBBs; // loop headers
1349 Stack breakBBs; // end of / after loop
1350
1351 Value *viewport;
1352 };
1353
1354 Symbol *
1355 Converter::srcToSym(tgsi::Instruction::SrcRegister src, int c)
1356 {
1357 const int swz = src.getSwizzle(c);
1358
1359 return makeSym(src.getFile(),
1360 src.is2D() ? src.getIndex(1) : 0,
1361 src.isIndirect(0) ? -1 : src.getIndex(0), swz,
1362 src.getIndex(0) * 16 + swz * 4);
1363 }
1364
1365 Symbol *
1366 Converter::dstToSym(tgsi::Instruction::DstRegister dst, int c)
1367 {
1368 return makeSym(dst.getFile(),
1369 dst.is2D() ? dst.getIndex(1) : 0,
1370 dst.isIndirect(0) ? -1 : dst.getIndex(0), c,
1371 dst.getIndex(0) * 16 + c * 4);
1372 }
1373
1374 Symbol *
1375 Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address)
1376 {
1377 Symbol *sym = new_Symbol(prog, tgsi::translateFile(tgsiFile));
1378
1379 sym->reg.fileIndex = fileIdx;
1380
1381 if (idx >= 0) {
1382 if (sym->reg.file == FILE_SHADER_INPUT)
1383 sym->setOffset(info->in[idx].slot[c] * 4);
1384 else
1385 if (sym->reg.file == FILE_SHADER_OUTPUT)
1386 sym->setOffset(info->out[idx].slot[c] * 4);
1387 else
1388 if (sym->reg.file == FILE_SYSTEM_VALUE)
1389 sym->setSV(tgsi::translateSysVal(info->sv[idx].sn), c);
1390 else
1391 sym->setOffset(address);
1392 } else {
1393 sym->setOffset(address);
1394 }
1395 return sym;
1396 }
1397
1398 static inline uint8_t
1399 translateInterpMode(const struct nv50_ir_varying *var, operation& op)
1400 {
1401 uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
1402
1403 if (var->flat)
1404 mode = NV50_IR_INTERP_FLAT;
1405 else
1406 if (var->linear)
1407 mode = NV50_IR_INTERP_LINEAR;
1408 else
1409 if (var->sc)
1410 mode = NV50_IR_INTERP_SC;
1411
1412 op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC)
1413 ? OP_PINTERP : OP_LINTERP;
1414
1415 if (var->centroid)
1416 mode |= NV50_IR_INTERP_CENTROID;
1417
1418 return mode;
1419 }
1420
1421 Value *
1422 Converter::interpolate(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
1423 {
1424 operation op;
1425
1426 // XXX: no way to know interpolation mode if we don't know what's accessed
1427 const uint8_t mode = translateInterpMode(&info->in[ptr ? 0 :
1428 src.getIndex(0)], op);
1429
1430 Instruction *insn = new_Instruction(func, op, TYPE_F32);
1431
1432 insn->setDef(0, getScratch());
1433 insn->setSrc(0, srcToSym(src, c));
1434 if (op == OP_PINTERP)
1435 insn->setSrc(1, fragCoord[3]);
1436 if (ptr)
1437 insn->setIndirect(0, 0, ptr);
1438
1439 insn->setInterpolate(mode);
1440
1441 bb->insertTail(insn);
1442 return insn->getDef(0);
1443 }
1444
1445 Value *
1446 Converter::applySrcMod(Value *val, int s, int c)
1447 {
1448 Modifier m = tgsi.getSrc(s).getMod(c);
1449 DataType ty = tgsi.inferSrcType();
1450
1451 if (m & Modifier(NV50_IR_MOD_ABS))
1452 val = mkOp1v(OP_ABS, ty, getScratch(), val);
1453
1454 if (m & Modifier(NV50_IR_MOD_NEG))
1455 val = mkOp1v(OP_NEG, ty, getScratch(), val);
1456
1457 return val;
1458 }
1459
1460 Value *
1461 Converter::getVertexBase(int s)
1462 {
1463 assert(s < 5);
1464 if (!(vtxBaseValid & (1 << s))) {
1465 const int index = tgsi.getSrc(s).getIndex(1);
1466 Value *rel = NULL;
1467 if (tgsi.getSrc(s).isIndirect(1))
1468 rel = fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL);
1469 vtxBaseValid |= 1 << s;
1470 vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
1471 mkImm(index), rel);
1472 }
1473 return vtxBase[s];
1474 }
1475
1476 Value *
1477 Converter::fetchSrc(int s, int c)
1478 {
1479 Value *res;
1480 Value *ptr = NULL, *dimRel = NULL;
1481
1482 tgsi::Instruction::SrcRegister src = tgsi.getSrc(s);
1483
1484 if (src.isIndirect(0))
1485 ptr = fetchSrc(src.getIndirect(0), 0, NULL);
1486
1487 if (src.is2D()) {
1488 switch (src.getFile()) {
1489 case TGSI_FILE_INPUT:
1490 dimRel = getVertexBase(s);
1491 break;
1492 case TGSI_FILE_CONSTANT:
1493 // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k]
1494 if (src.isIndirect(1))
1495 dimRel = fetchSrc(src.getIndirect(1), 0, 0);
1496 break;
1497 default:
1498 break;
1499 }
1500 }
1501
1502 res = fetchSrc(src, c, ptr);
1503
1504 if (dimRel)
1505 res->getInsn()->setIndirect(0, 1, dimRel);
1506
1507 return applySrcMod(res, s, c);
1508 }
1509
1510 Converter::DataArray *
1511 Converter::getArrayForFile(unsigned file, int idx)
1512 {
1513 switch (file) {
1514 case TGSI_FILE_TEMPORARY:
1515 return &tData;
1516 case TGSI_FILE_PREDICATE:
1517 return &pData;
1518 case TGSI_FILE_ADDRESS:
1519 return &aData;
1520 case TGSI_FILE_OUTPUT:
1521 assert(prog->getType() == Program::TYPE_FRAGMENT);
1522 return &oData;
1523 default:
1524 assert(!"invalid/unhandled TGSI source file");
1525 return NULL;
1526 }
1527 }
1528
1529 Value *
1530 Converter::shiftAddress(Value *index)
1531 {
1532 if (!index)
1533 return NULL;
1534 return mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), index, mkImm(4));
1535 }
1536
1537 Value *
1538 Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
1539 {
1540 const int idx2d = src.is2D() ? src.getIndex(1) : 0;
1541 const int idx = src.getIndex(0);
1542 const int swz = src.getSwizzle(c);
1543
1544 switch (src.getFile()) {
1545 case TGSI_FILE_IMMEDIATE:
1546 assert(!ptr);
1547 return loadImm(NULL, info->immd.data[idx * 4 + swz]);
1548 case TGSI_FILE_CONSTANT:
1549 return mkLoadv(TYPE_U32, srcToSym(src, c), shiftAddress(ptr));
1550 case TGSI_FILE_INPUT:
1551 if (prog->getType() == Program::TYPE_FRAGMENT) {
1552 // don't load masked inputs, won't be assigned a slot
1553 if (!ptr && !(info->in[idx].mask & (1 << swz)))
1554 return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f);
1555 if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE)
1556 return mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_FACE, 0));
1557 return interpolate(src, c, shiftAddress(ptr));
1558 } else
1559 if (prog->getType() == Program::TYPE_GEOMETRY) {
1560 if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_PRIMID)
1561 return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_PRIMITIVE_ID, 0));
1562 // XXX: This is going to be a problem with scalar arrays, i.e. when
1563 // we cannot assume that the address is given in units of vec4.
1564 //
1565 // nv50 and nvc0 need different things here, so let the lowering
1566 // passes decide what to do with the address
1567 if (ptr)
1568 return mkLoadv(TYPE_U32, srcToSym(src, c), ptr);
1569 }
1570 return mkLoadv(TYPE_U32, srcToSym(src, c), shiftAddress(ptr));
1571 case TGSI_FILE_OUTPUT:
1572 assert(!"load from output file");
1573 return NULL;
1574 case TGSI_FILE_SYSTEM_VALUE:
1575 assert(!ptr);
1576 return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
1577 default:
1578 return getArrayForFile(src.getFile(), idx2d)->load(
1579 sub.cur->values, idx, swz, shiftAddress(ptr));
1580 }
1581 }
1582
1583 Value *
1584 Converter::acquireDst(int d, int c)
1585 {
1586 const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
1587 const unsigned f = dst.getFile();
1588 const int idx = dst.getIndex(0);
1589 const int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
1590
1591 if (dst.isMasked(c) || f == TGSI_FILE_RESOURCE)
1592 return NULL;
1593
1594 if (dst.isIndirect(0) ||
1595 f == TGSI_FILE_SYSTEM_VALUE ||
1596 (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT))
1597 return getScratch();
1598
1599 return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c);
1600 }
1601
1602 void
1603 Converter::storeDst(int d, int c, Value *val)
1604 {
1605 const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
1606
1607 if (tgsi.getSaturate()) {
1608 mkOp1(OP_SAT, dstTy, val, val);
1609 }
1610
1611 Value *ptr = NULL;
1612 if (dst.isIndirect(0))
1613 ptr = shiftAddress(fetchSrc(dst.getIndirect(0), 0, NULL));
1614
1615 if (info->io.genUserClip > 0 &&
1616 dst.getFile() == TGSI_FILE_OUTPUT &&
1617 !dst.isIndirect(0) && dst.getIndex(0) == code->clipVertexOutput) {
1618 mkMov(clipVtx[c], val);
1619 val = clipVtx[c];
1620 }
1621
1622 storeDst(dst, c, val, ptr);
1623 }
1624
1625 void
1626 Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,
1627 Value *val, Value *ptr)
1628 {
1629 const unsigned f = dst.getFile();
1630 const int idx = dst.getIndex(0);
1631 const int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
1632
1633 if (f == TGSI_FILE_SYSTEM_VALUE) {
1634 assert(!ptr);
1635 mkOp2(OP_WRSV, TYPE_U32, NULL, dstToSym(dst, c), val);
1636 } else
1637 if (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT) {
1638
1639 if (ptr || (info->out[idx].mask & (1 << c))) {
1640 /* Save the viewport index into a scratch register so that it can be
1641 exported at EMIT time */
1642 if (info->out[idx].sn == TGSI_SEMANTIC_VIEWPORT_INDEX &&
1643 viewport != NULL)
1644 mkOp1(OP_MOV, TYPE_U32, viewport, val);
1645 else
1646 mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val);
1647 }
1648 } else
1649 if (f == TGSI_FILE_TEMPORARY ||
1650 f == TGSI_FILE_PREDICATE ||
1651 f == TGSI_FILE_ADDRESS ||
1652 f == TGSI_FILE_OUTPUT) {
1653 getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val);
1654 } else {
1655 assert(!"invalid dst file");
1656 }
1657 }
1658
1659 #define FOR_EACH_DST_ENABLED_CHANNEL(d, chan, inst) \
1660 for (chan = 0; chan < 4; ++chan) \
1661 if (!inst.getDst(d).isMasked(chan))
1662
1663 Value *
1664 Converter::buildDot(int dim)
1665 {
1666 assert(dim > 0);
1667
1668 Value *src0 = fetchSrc(0, 0), *src1 = fetchSrc(1, 0);
1669 Value *dotp = getScratch();
1670
1671 mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1);
1672
1673 for (int c = 1; c < dim; ++c) {
1674 src0 = fetchSrc(0, c);
1675 src1 = fetchSrc(1, c);
1676 mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp);
1677 }
1678 return dotp;
1679 }
1680
1681 void
1682 Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork)
1683 {
1684 FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL);
1685 join->fixed = 1;
1686 conv->insertHead(join);
1687
1688 fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv);
1689 fork->insertBefore(fork->getExit(), fork->joinAt);
1690 }
1691
1692 void
1693 Converter::setTexRS(TexInstruction *tex, unsigned int& s, int R, int S)
1694 {
1695 unsigned rIdx = 0, sIdx = 0;
1696
1697 if (R >= 0)
1698 rIdx = tgsi.getSrc(R).getIndex(0);
1699 if (S >= 0)
1700 sIdx = tgsi.getSrc(S).getIndex(0);
1701
1702 tex->setTexture(tgsi.getTexture(code, R), rIdx, sIdx);
1703
1704 if (tgsi.getSrc(R).isIndirect(0)) {
1705 tex->tex.rIndirectSrc = s;
1706 tex->setSrc(s++, fetchSrc(tgsi.getSrc(R).getIndirect(0), 0, NULL));
1707 }
1708 if (S >= 0 && tgsi.getSrc(S).isIndirect(0)) {
1709 tex->tex.sIndirectSrc = s;
1710 tex->setSrc(s++, fetchSrc(tgsi.getSrc(S).getIndirect(0), 0, NULL));
1711 }
1712 }
1713
1714 void
1715 Converter::handleTXQ(Value *dst0[4], enum TexQuery query)
1716 {
1717 TexInstruction *tex = new_TexInstruction(func, OP_TXQ);
1718 tex->tex.query = query;
1719 unsigned int c, d;
1720
1721 for (d = 0, c = 0; c < 4; ++c) {
1722 if (!dst0[c])
1723 continue;
1724 tex->tex.mask |= 1 << c;
1725 tex->setDef(d++, dst0[c]);
1726 }
1727 tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level
1728
1729 setTexRS(tex, c, 1, -1);
1730
1731 bb->insertTail(tex);
1732 }
1733
1734 void
1735 Converter::loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask)
1736 {
1737 Value *proj = fetchSrc(0, 3);
1738 Instruction *insn = proj->getUniqueInsn();
1739 int c;
1740
1741 if (insn->op == OP_PINTERP) {
1742 bb->insertTail(insn = cloneForward(func, insn));
1743 insn->op = OP_LINTERP;
1744 insn->setInterpolate(NV50_IR_INTERP_LINEAR | insn->getSampleMode());
1745 insn->setSrc(1, NULL);
1746 proj = insn->getDef(0);
1747 }
1748 proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), proj);
1749
1750 for (c = 0; c < 4; ++c) {
1751 if (!(mask & (1 << c)))
1752 continue;
1753 if ((insn = src[c]->getUniqueInsn())->op != OP_PINTERP)
1754 continue;
1755 mask &= ~(1 << c);
1756
1757 bb->insertTail(insn = cloneForward(func, insn));
1758 insn->setInterpolate(NV50_IR_INTERP_PERSPECTIVE | insn->getSampleMode());
1759 insn->setSrc(1, proj);
1760 dst[c] = insn->getDef(0);
1761 }
1762 if (!mask)
1763 return;
1764
1765 proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), fetchSrc(0, 3));
1766
1767 for (c = 0; c < 4; ++c)
1768 if (mask & (1 << c))
1769 dst[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), src[c], proj);
1770 }
1771
1772 // order of nv50 ir sources: x y z layer lod/bias shadow
1773 // order of TGSI TEX sources: x y z layer shadow lod/bias
1774 // lowering will finally set the hw specific order (like array first on nvc0)
1775 void
1776 Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy)
1777 {
1778 Value *val;
1779 Value *arg[4], *src[8];
1780 Value *lod = NULL, *shd = NULL;
1781 unsigned int s, c, d;
1782 TexInstruction *texi = new_TexInstruction(func, tgsi.getOP());
1783
1784 TexInstruction::Target tgt = tgsi.getTexture(code, R);
1785
1786 for (s = 0; s < tgt.getArgCount(); ++s)
1787 arg[s] = src[s] = fetchSrc(0, s);
1788
1789 if (texi->op == OP_TXL || texi->op == OP_TXB)
1790 lod = fetchSrc(L >> 4, L & 3);
1791
1792 if (C == 0x0f)
1793 C = 0x00 | MAX2(tgt.getArgCount(), 2); // guess DC src
1794
1795 if (tgsi.getOpcode() == TGSI_OPCODE_TG4 &&
1796 tgt == TEX_TARGET_CUBE_ARRAY_SHADOW)
1797 shd = fetchSrc(1, 0);
1798 else if (tgt.isShadow())
1799 shd = fetchSrc(C >> 4, C & 3);
1800
1801 if (texi->op == OP_TXD) {
1802 for (c = 0; c < tgt.getDim(); ++c) {
1803 texi->dPdx[c].set(fetchSrc(Dx >> 4, (Dx & 3) + c));
1804 texi->dPdy[c].set(fetchSrc(Dy >> 4, (Dy & 3) + c));
1805 }
1806 }
1807
1808 // cube textures don't care about projection value, it's divided out
1809 if (tgsi.getOpcode() == TGSI_OPCODE_TXP && !tgt.isCube() && !tgt.isArray()) {
1810 unsigned int n = tgt.getDim();
1811 if (shd) {
1812 arg[n] = shd;
1813 ++n;
1814 assert(tgt.getDim() == tgt.getArgCount());
1815 }
1816 loadProjTexCoords(src, arg, (1 << n) - 1);
1817 if (shd)
1818 shd = src[n - 1];
1819 }
1820
1821 if (tgt.isCube()) {
1822 for (c = 0; c < 3; ++c)
1823 src[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]);
1824 val = getScratch();
1825 mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
1826 mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
1827 mkOp1(OP_RCP, TYPE_F32, val, val);
1828 for (c = 0; c < 3; ++c)
1829 src[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val);
1830 }
1831
1832 for (c = 0, d = 0; c < 4; ++c) {
1833 if (dst[c]) {
1834 texi->setDef(d++, dst[c]);
1835 texi->tex.mask |= 1 << c;
1836 } else {
1837 // NOTE: maybe hook up def too, for CSE
1838 }
1839 }
1840 for (s = 0; s < tgt.getArgCount(); ++s)
1841 texi->setSrc(s, src[s]);
1842 if (lod)
1843 texi->setSrc(s++, lod);
1844 if (shd)
1845 texi->setSrc(s++, shd);
1846
1847 setTexRS(texi, s, R, S);
1848
1849 if (tgsi.getOpcode() == TGSI_OPCODE_SAMPLE_C_LZ)
1850 texi->tex.levelZero = true;
1851 if (tgsi.getOpcode() == TGSI_OPCODE_TG4 && !tgt.isShadow())
1852 texi->tex.gatherComp = tgsi.getSrc(1).getValueU32(0, info);
1853
1854 texi->tex.useOffsets = tgsi.getNumTexOffsets();
1855 for (s = 0; s < tgsi.getNumTexOffsets(); ++s) {
1856 for (c = 0; c < 3; ++c) {
1857 texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL));
1858 texi->offset[s][c].setInsn(texi);
1859 }
1860 }
1861
1862 bb->insertTail(texi);
1863 }
1864
1865 // 1st source: xyz = coordinates, w = lod/sample
1866 // 2nd source: offset
1867 void
1868 Converter::handleTXF(Value *dst[4], int R, int L_M)
1869 {
1870 TexInstruction *texi = new_TexInstruction(func, tgsi.getOP());
1871 int ms;
1872 unsigned int c, d, s;
1873
1874 texi->tex.target = tgsi.getTexture(code, R);
1875
1876 ms = texi->tex.target.isMS() ? 1 : 0;
1877 texi->tex.levelZero = ms; /* MS textures don't have mip-maps */
1878
1879 for (c = 0, d = 0; c < 4; ++c) {
1880 if (dst[c]) {
1881 texi->setDef(d++, dst[c]);
1882 texi->tex.mask |= 1 << c;
1883 }
1884 }
1885 for (c = 0; c < (texi->tex.target.getArgCount() - ms); ++c)
1886 texi->setSrc(c, fetchSrc(0, c));
1887 texi->setSrc(c++, fetchSrc(L_M >> 4, L_M & 3)); // lod or ms
1888
1889 setTexRS(texi, c, R, -1);
1890
1891 texi->tex.useOffsets = tgsi.getNumTexOffsets();
1892 for (s = 0; s < tgsi.getNumTexOffsets(); ++s) {
1893 for (c = 0; c < 3; ++c) {
1894 texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL));
1895 texi->offset[s][c].setInsn(texi);
1896 }
1897 }
1898
1899 bb->insertTail(texi);
1900 }
1901
1902 void
1903 Converter::handleLIT(Value *dst0[4])
1904 {
1905 Value *val0 = NULL;
1906 unsigned int mask = tgsi.getDst(0).getMask();
1907
1908 if (mask & (1 << 0))
1909 loadImm(dst0[0], 1.0f);
1910
1911 if (mask & (1 << 3))
1912 loadImm(dst0[3], 1.0f);
1913
1914 if (mask & (3 << 1)) {
1915 val0 = getScratch();
1916 mkOp2(OP_MAX, TYPE_F32, val0, fetchSrc(0, 0), zero);
1917 if (mask & (1 << 1))
1918 mkMov(dst0[1], val0);
1919 }
1920
1921 if (mask & (1 << 2)) {
1922 Value *src1 = fetchSrc(0, 1), *src3 = fetchSrc(0, 3);
1923 Value *val1 = getScratch(), *val3 = getScratch();
1924
1925 Value *pos128 = loadImm(NULL, +127.999999f);
1926 Value *neg128 = loadImm(NULL, -127.999999f);
1927
1928 mkOp2(OP_MAX, TYPE_F32, val1, src1, zero);
1929 mkOp2(OP_MAX, TYPE_F32, val3, src3, neg128);
1930 mkOp2(OP_MIN, TYPE_F32, val3, val3, pos128);
1931 mkOp2(OP_POW, TYPE_F32, val3, val1, val3);
1932
1933 mkCmp(OP_SLCT, CC_GT, TYPE_F32, dst0[2], TYPE_F32, val3, zero, val0);
1934 }
1935 }
1936
1937 static inline bool
1938 isResourceSpecial(const int r)
1939 {
1940 return (r == TGSI_RESOURCE_GLOBAL ||
1941 r == TGSI_RESOURCE_LOCAL ||
1942 r == TGSI_RESOURCE_PRIVATE ||
1943 r == TGSI_RESOURCE_INPUT);
1944 }
1945
1946 static inline bool
1947 isResourceRaw(const tgsi::Source *code, const int r)
1948 {
1949 return isResourceSpecial(r) || code->resources[r].raw;
1950 }
1951
1952 static inline nv50_ir::TexTarget
1953 getResourceTarget(const tgsi::Source *code, int r)
1954 {
1955 if (isResourceSpecial(r))
1956 return nv50_ir::TEX_TARGET_BUFFER;
1957 return tgsi::translateTexture(code->resources.at(r).target);
1958 }
1959
1960 Symbol *
1961 Converter::getResourceBase(const int r)
1962 {
1963 Symbol *sym = NULL;
1964
1965 switch (r) {
1966 case TGSI_RESOURCE_GLOBAL:
1967 sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL, 15);
1968 break;
1969 case TGSI_RESOURCE_LOCAL:
1970 assert(prog->getType() == Program::TYPE_COMPUTE);
1971 sym = mkSymbol(nv50_ir::FILE_MEMORY_SHARED, 0, TYPE_U32,
1972 info->prop.cp.sharedOffset);
1973 break;
1974 case TGSI_RESOURCE_PRIVATE:
1975 sym = mkSymbol(nv50_ir::FILE_MEMORY_LOCAL, 0, TYPE_U32,
1976 info->bin.tlsSpace);
1977 break;
1978 case TGSI_RESOURCE_INPUT:
1979 assert(prog->getType() == Program::TYPE_COMPUTE);
1980 sym = mkSymbol(nv50_ir::FILE_SHADER_INPUT, 0, TYPE_U32,
1981 info->prop.cp.inputOffset);
1982 break;
1983 default:
1984 sym = new_Symbol(prog,
1985 nv50_ir::FILE_MEMORY_GLOBAL, code->resources.at(r).slot);
1986 break;
1987 }
1988 return sym;
1989 }
1990
1991 void
1992 Converter::getResourceCoords(std::vector<Value *> &coords, int r, int s)
1993 {
1994 const int arg =
1995 TexInstruction::Target(getResourceTarget(code, r)).getArgCount();
1996
1997 for (int c = 0; c < arg; ++c)
1998 coords.push_back(fetchSrc(s, c));
1999
2000 // NOTE: TGSI_RESOURCE_GLOBAL needs FILE_GPR; this is an nv50 quirk
2001 if (r == TGSI_RESOURCE_LOCAL ||
2002 r == TGSI_RESOURCE_PRIVATE ||
2003 r == TGSI_RESOURCE_INPUT)
2004 coords[0] = mkOp1v(OP_MOV, TYPE_U32, getScratch(4, FILE_ADDRESS),
2005 coords[0]);
2006 }
2007
2008 static inline int
2009 partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask)
2010 {
2011 int n = 0;
2012
2013 while (mask) {
2014 if (mask & 1) {
2015 size[n]++;
2016 } else {
2017 if (size[n])
2018 comp[n = 1] = size[0] + 1;
2019 else
2020 comp[n]++;
2021 }
2022 mask >>= 1;
2023 }
2024 if (size[0] == 3) {
2025 n = 1;
2026 size[0] = (comp[0] == 1) ? 1 : 2;
2027 size[1] = 3 - size[0];
2028 comp[1] = comp[0] + size[0];
2029 }
2030 return n + 1;
2031 }
2032
2033 // For raw loads, granularity is 4 byte.
2034 // Usage of the texture read mask on OP_SULDP is not allowed.
2035 void
2036 Converter::handleLOAD(Value *dst0[4])
2037 {
2038 const int r = tgsi.getSrc(0).getIndex(0);
2039 int c;
2040 std::vector<Value *> off, src, ldv, def;
2041
2042 getResourceCoords(off, r, 1);
2043
2044 if (isResourceRaw(code, r)) {
2045 uint8_t mask = 0;
2046 uint8_t comp[2] = { 0, 0 };
2047 uint8_t size[2] = { 0, 0 };
2048
2049 Symbol *base = getResourceBase(r);
2050
2051 // determine the base and size of the at most 2 load ops
2052 for (c = 0; c < 4; ++c)
2053 if (!tgsi.getDst(0).isMasked(c))
2054 mask |= 1 << (tgsi.getSrc(0).getSwizzle(c) - TGSI_SWIZZLE_X);
2055
2056 int n = partitionLoadStore(comp, size, mask);
2057
2058 src = off;
2059
2060 def.resize(4); // index by component, the ones we need will be non-NULL
2061 for (c = 0; c < 4; ++c) {
2062 if (dst0[c] && tgsi.getSrc(0).getSwizzle(c) == (TGSI_SWIZZLE_X + c))
2063 def[c] = dst0[c];
2064 else
2065 if (mask & (1 << c))
2066 def[c] = getScratch();
2067 }
2068
2069 const bool useLd = isResourceSpecial(r) ||
2070 (info->io.nv50styleSurfaces &&
2071 code->resources[r].target == TGSI_TEXTURE_BUFFER);
2072
2073 for (int i = 0; i < n; ++i) {
2074 ldv.assign(def.begin() + comp[i], def.begin() + comp[i] + size[i]);
2075
2076 if (comp[i]) // adjust x component of source address if necessary
2077 src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),
2078 off[0], mkImm(comp[i] * 4));
2079 else
2080 src[0] = off[0];
2081
2082 if (useLd) {
2083 Instruction *ld =
2084 mkLoad(typeOfSize(size[i] * 4), ldv[0], base, src[0]);
2085 for (size_t c = 1; c < ldv.size(); ++c)
2086 ld->setDef(c, ldv[c]);
2087 } else {
2088 mkTex(OP_SULDB, getResourceTarget(code, r), code->resources[r].slot,
2089 0, ldv, src)->dType = typeOfSize(size[i] * 4);
2090 }
2091 }
2092 } else {
2093 def.resize(4);
2094 for (c = 0; c < 4; ++c) {
2095 if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c))
2096 def[c] = getScratch();
2097 else
2098 def[c] = dst0[c];
2099 }
2100
2101 mkTex(OP_SULDP, getResourceTarget(code, r), code->resources[r].slot, 0,
2102 def, off);
2103 }
2104 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2105 if (dst0[c] != def[c])
2106 mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
2107 }
2108
2109 // For formatted stores, the write mask on OP_SUSTP can be used.
2110 // Raw stores have to be split.
2111 void
2112 Converter::handleSTORE()
2113 {
2114 const int r = tgsi.getDst(0).getIndex(0);
2115 int c;
2116 std::vector<Value *> off, src, dummy;
2117
2118 getResourceCoords(off, r, 0);
2119 src = off;
2120 const int s = src.size();
2121
2122 if (isResourceRaw(code, r)) {
2123 uint8_t comp[2] = { 0, 0 };
2124 uint8_t size[2] = { 0, 0 };
2125
2126 int n = partitionLoadStore(comp, size, tgsi.getDst(0).getMask());
2127
2128 Symbol *base = getResourceBase(r);
2129
2130 const bool useSt = isResourceSpecial(r) ||
2131 (info->io.nv50styleSurfaces &&
2132 code->resources[r].target == TGSI_TEXTURE_BUFFER);
2133
2134 for (int i = 0; i < n; ++i) {
2135 if (comp[i]) // adjust x component of source address if necessary
2136 src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),
2137 off[0], mkImm(comp[i] * 4));
2138 else
2139 src[0] = off[0];
2140
2141 const DataType stTy = typeOfSize(size[i] * 4);
2142
2143 if (useSt) {
2144 Instruction *st =
2145 mkStore(OP_STORE, stTy, base, NULL, fetchSrc(1, comp[i]));
2146 for (c = 1; c < size[i]; ++c)
2147 st->setSrc(1 + c, fetchSrc(1, comp[i] + c));
2148 st->setIndirect(0, 0, src[0]);
2149 } else {
2150 // attach values to be stored
2151 src.resize(s + size[i]);
2152 for (c = 0; c < size[i]; ++c)
2153 src[s + c] = fetchSrc(1, comp[i] + c);
2154 mkTex(OP_SUSTB, getResourceTarget(code, r), code->resources[r].slot,
2155 0, dummy, src)->setType(stTy);
2156 }
2157 }
2158 } else {
2159 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2160 src.push_back(fetchSrc(1, c));
2161
2162 mkTex(OP_SUSTP, getResourceTarget(code, r), code->resources[r].slot, 0,
2163 dummy, src)->tex.mask = tgsi.getDst(0).getMask();
2164 }
2165 }
2166
2167 // XXX: These only work on resources with the single-component u32/s32 formats.
2168 // Therefore the result is replicated. This might not be intended by TGSI, but
2169 // operating on more than 1 component would produce undefined results because
2170 // they do not exist.
2171 void
2172 Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
2173 {
2174 const int r = tgsi.getSrc(0).getIndex(0);
2175 std::vector<Value *> srcv;
2176 std::vector<Value *> defv;
2177 LValue *dst = getScratch();
2178
2179 getResourceCoords(srcv, r, 1);
2180
2181 if (isResourceSpecial(r)) {
2182 assert(r != TGSI_RESOURCE_INPUT);
2183 Instruction *insn;
2184 insn = mkOp2(OP_ATOM, ty, dst, getResourceBase(r), fetchSrc(2, 0));
2185 insn->subOp = subOp;
2186 if (subOp == NV50_IR_SUBOP_ATOM_CAS)
2187 insn->setSrc(2, fetchSrc(3, 0));
2188 insn->setIndirect(0, 0, srcv.at(0));
2189 } else {
2190 operation op = isResourceRaw(code, r) ? OP_SUREDB : OP_SUREDP;
2191 TexTarget targ = getResourceTarget(code, r);
2192 int idx = code->resources[r].slot;
2193 defv.push_back(dst);
2194 srcv.push_back(fetchSrc(2, 0));
2195 if (subOp == NV50_IR_SUBOP_ATOM_CAS)
2196 srcv.push_back(fetchSrc(3, 0));
2197 TexInstruction *tex = mkTex(op, targ, idx, 0, defv, srcv);
2198 tex->subOp = subOp;
2199 tex->tex.mask = 1;
2200 tex->setType(ty);
2201 }
2202
2203 for (int c = 0; c < 4; ++c)
2204 if (dst0[c])
2205 dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
2206 }
2207
2208 void
2209 Converter::handleINTERP(Value *dst[4])
2210 {
2211 // Check whether the input is linear. All other attributes ignored.
2212 Instruction *insn;
2213 Value *offset = NULL, *ptr = NULL, *w = NULL;
2214 bool linear;
2215 operation op;
2216 int c, mode;
2217
2218 tgsi::Instruction::SrcRegister src = tgsi.getSrc(0);
2219 assert(src.getFile() == TGSI_FILE_INPUT);
2220
2221 if (src.isIndirect(0))
2222 ptr = fetchSrc(src.getIndirect(0), 0, NULL);
2223
2224 // XXX: no way to know interp mode if we don't know the index
2225 linear = info->in[ptr ? 0 : src.getIndex(0)].linear;
2226 if (linear) {
2227 op = OP_LINTERP;
2228 mode = NV50_IR_INTERP_LINEAR;
2229 } else {
2230 op = OP_PINTERP;
2231 mode = NV50_IR_INTERP_PERSPECTIVE;
2232 }
2233
2234 switch (tgsi.getOpcode()) {
2235 case TGSI_OPCODE_INTERP_CENTROID:
2236 mode |= NV50_IR_INTERP_CENTROID;
2237 break;
2238 case TGSI_OPCODE_INTERP_SAMPLE:
2239 insn = mkOp1(OP_PIXLD, TYPE_U32, (offset = getScratch()), fetchSrc(1, 0));
2240 insn->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
2241 mode |= NV50_IR_INTERP_OFFSET;
2242 break;
2243 case TGSI_OPCODE_INTERP_OFFSET: {
2244 // The input in src1.xy is float, but we need a single 32-bit value
2245 // where the upper and lower 16 bits are encoded in S0.12 format. We need
2246 // to clamp the input coordinates to (-0.5, 0.4375), multiply by 4096,
2247 // and then convert to s32.
2248 Value *offs[2];
2249 for (c = 0; c < 2; c++) {
2250 offs[c] = fetchSrc(1, c);
2251 mkOp2(OP_MIN, TYPE_F32, offs[c], offs[c], loadImm(NULL, 0.4375f));
2252 mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
2253 mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
2254 mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
2255 }
2256 offset = mkOp3v(OP_INSBF, TYPE_U32, getScratch(),
2257 offs[1], mkImm(0x1010), offs[0]);
2258 mode |= NV50_IR_INTERP_OFFSET;
2259 break;
2260 }
2261 }
2262
2263 if (op == OP_PINTERP) {
2264 if (offset) {
2265 w = mkOp2v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 3), offset);
2266 mkOp1(OP_RCP, TYPE_F32, w, w);
2267 } else {
2268 w = fragCoord[3];
2269 }
2270 }
2271
2272
2273 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2274 insn = mkOp1(op, TYPE_F32, dst[c], srcToSym(src, c));
2275 if (op == OP_PINTERP)
2276 insn->setSrc(1, w);
2277 if (ptr)
2278 insn->setIndirect(0, 0, ptr);
2279 if (offset)
2280 insn->setSrc(op == OP_PINTERP ? 2 : 1, offset);
2281
2282 insn->setInterpolate(mode);
2283 }
2284 }
2285
2286 Converter::Subroutine *
2287 Converter::getSubroutine(unsigned ip)
2288 {
2289 std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip);
2290
2291 if (it == sub.map.end())
2292 it = sub.map.insert(std::make_pair(
2293 ip, Subroutine(new Function(prog, "SUB", ip)))).first;
2294
2295 return &it->second;
2296 }
2297
2298 Converter::Subroutine *
2299 Converter::getSubroutine(Function *f)
2300 {
2301 unsigned ip = f->getLabel();
2302 std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip);
2303
2304 if (it == sub.map.end())
2305 it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first;
2306
2307 return &it->second;
2308 }
2309
2310 bool
2311 Converter::isEndOfSubroutine(uint ip)
2312 {
2313 assert(ip < code->scan.num_instructions);
2314 tgsi::Instruction insn(&code->insns[ip]);
2315 return (insn.getOpcode() == TGSI_OPCODE_END ||
2316 insn.getOpcode() == TGSI_OPCODE_ENDSUB ||
2317 // does END occur at end of main or the very end ?
2318 insn.getOpcode() == TGSI_OPCODE_BGNSUB);
2319 }
2320
2321 bool
2322 Converter::handleInstruction(const struct tgsi_full_instruction *insn)
2323 {
2324 Instruction *geni;
2325
2326 Value *dst0[4], *rDst0[4];
2327 Value *src0, *src1, *src2, *src3;
2328 Value *val0, *val1;
2329 int c;
2330
2331 tgsi = tgsi::Instruction(insn);
2332
2333 bool useScratchDst = tgsi.checkDstSrcAliasing();
2334
2335 operation op = tgsi.getOP();
2336 dstTy = tgsi.inferDstType();
2337 srcTy = tgsi.inferSrcType();
2338
2339 unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0;
2340
2341 if (tgsi.dstCount()) {
2342 for (c = 0; c < 4; ++c) {
2343 rDst0[c] = acquireDst(0, c);
2344 dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c];
2345 }
2346 }
2347
2348 switch (tgsi.getOpcode()) {
2349 case TGSI_OPCODE_ADD:
2350 case TGSI_OPCODE_UADD:
2351 case TGSI_OPCODE_AND:
2352 case TGSI_OPCODE_DIV:
2353 case TGSI_OPCODE_IDIV:
2354 case TGSI_OPCODE_UDIV:
2355 case TGSI_OPCODE_MAX:
2356 case TGSI_OPCODE_MIN:
2357 case TGSI_OPCODE_IMAX:
2358 case TGSI_OPCODE_IMIN:
2359 case TGSI_OPCODE_UMAX:
2360 case TGSI_OPCODE_UMIN:
2361 case TGSI_OPCODE_MOD:
2362 case TGSI_OPCODE_UMOD:
2363 case TGSI_OPCODE_MUL:
2364 case TGSI_OPCODE_UMUL:
2365 case TGSI_OPCODE_IMUL_HI:
2366 case TGSI_OPCODE_UMUL_HI:
2367 case TGSI_OPCODE_OR:
2368 case TGSI_OPCODE_SHL:
2369 case TGSI_OPCODE_ISHR:
2370 case TGSI_OPCODE_USHR:
2371 case TGSI_OPCODE_SUB:
2372 case TGSI_OPCODE_XOR:
2373 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2374 src0 = fetchSrc(0, c);
2375 src1 = fetchSrc(1, c);
2376 geni = mkOp2(op, dstTy, dst0[c], src0, src1);
2377 geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
2378 }
2379 break;
2380 case TGSI_OPCODE_MAD:
2381 case TGSI_OPCODE_UMAD:
2382 case TGSI_OPCODE_SAD:
2383 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2384 src0 = fetchSrc(0, c);
2385 src1 = fetchSrc(1, c);
2386 src2 = fetchSrc(2, c);
2387 mkOp3(op, dstTy, dst0[c], src0, src1, src2);
2388 }
2389 break;
2390 case TGSI_OPCODE_MOV:
2391 case TGSI_OPCODE_ABS:
2392 case TGSI_OPCODE_CEIL:
2393 case TGSI_OPCODE_FLR:
2394 case TGSI_OPCODE_TRUNC:
2395 case TGSI_OPCODE_RCP:
2396 case TGSI_OPCODE_IABS:
2397 case TGSI_OPCODE_INEG:
2398 case TGSI_OPCODE_NOT:
2399 case TGSI_OPCODE_DDX:
2400 case TGSI_OPCODE_DDY:
2401 case TGSI_OPCODE_DDX_FINE:
2402 case TGSI_OPCODE_DDY_FINE:
2403 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2404 mkOp1(op, dstTy, dst0[c], fetchSrc(0, c));
2405 break;
2406 case TGSI_OPCODE_RSQ:
2407 src0 = fetchSrc(0, 0);
2408 val0 = getScratch();
2409 mkOp1(OP_ABS, TYPE_F32, val0, src0);
2410 mkOp1(OP_RSQ, TYPE_F32, val0, val0);
2411 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2412 mkMov(dst0[c], val0);
2413 break;
2414 case TGSI_OPCODE_ARL:
2415 case TGSI_OPCODE_ARR:
2416 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2417 const RoundMode rnd =
2418 tgsi.getOpcode() == TGSI_OPCODE_ARR ? ROUND_N : ROUND_M;
2419 src0 = fetchSrc(0, c);
2420 mkCvt(OP_CVT, TYPE_S32, dst0[c], TYPE_F32, src0)->rnd = rnd;
2421 }
2422 break;
2423 case TGSI_OPCODE_UARL:
2424 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2425 mkOp1(OP_MOV, TYPE_U32, dst0[c], fetchSrc(0, c));
2426 break;
2427 case TGSI_OPCODE_POW:
2428 val0 = mkOp2v(op, TYPE_F32, getScratch(), fetchSrc(0, 0), fetchSrc(1, 0));
2429 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2430 mkOp1(OP_MOV, TYPE_F32, dst0[c], val0);
2431 break;
2432 case TGSI_OPCODE_EX2:
2433 case TGSI_OPCODE_LG2:
2434 val0 = mkOp1(op, TYPE_F32, getScratch(), fetchSrc(0, 0))->getDef(0);
2435 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2436 mkOp1(OP_MOV, TYPE_F32, dst0[c], val0);
2437 break;
2438 case TGSI_OPCODE_COS:
2439 case TGSI_OPCODE_SIN:
2440 val0 = getScratch();
2441 if (mask & 7) {
2442 mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 0));
2443 mkOp1(op, TYPE_F32, val0, val0);
2444 for (c = 0; c < 3; ++c)
2445 if (dst0[c])
2446 mkMov(dst0[c], val0);
2447 }
2448 if (dst0[3]) {
2449 mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 3));
2450 mkOp1(op, TYPE_F32, dst0[3], val0);
2451 }
2452 break;
2453 case TGSI_OPCODE_SCS:
2454 if (mask & 3) {
2455 val0 = mkOp1v(OP_PRESIN, TYPE_F32, getSSA(), fetchSrc(0, 0));
2456 if (dst0[0])
2457 mkOp1(OP_COS, TYPE_F32, dst0[0], val0);
2458 if (dst0[1])
2459 mkOp1(OP_SIN, TYPE_F32, dst0[1], val0);
2460 }
2461 if (dst0[2])
2462 loadImm(dst0[2], 0.0f);
2463 if (dst0[3])
2464 loadImm(dst0[3], 1.0f);
2465 break;
2466 case TGSI_OPCODE_EXP:
2467 src0 = fetchSrc(0, 0);
2468 val0 = mkOp1v(OP_FLOOR, TYPE_F32, getSSA(), src0);
2469 if (dst0[1])
2470 mkOp2(OP_SUB, TYPE_F32, dst0[1], src0, val0);
2471 if (dst0[0])
2472 mkOp1(OP_EX2, TYPE_F32, dst0[0], val0);
2473 if (dst0[2])
2474 mkOp1(OP_EX2, TYPE_F32, dst0[2], src0);
2475 if (dst0[3])
2476 loadImm(dst0[3], 1.0f);
2477 break;
2478 case TGSI_OPCODE_LOG:
2479 src0 = mkOp1v(OP_ABS, TYPE_F32, getSSA(), fetchSrc(0, 0));
2480 val0 = mkOp1v(OP_LG2, TYPE_F32, dst0[2] ? dst0[2] : getSSA(), src0);
2481 if (dst0[0] || dst0[1])
2482 val1 = mkOp1v(OP_FLOOR, TYPE_F32, dst0[0] ? dst0[0] : getSSA(), val0);
2483 if (dst0[1]) {
2484 mkOp1(OP_EX2, TYPE_F32, dst0[1], val1);
2485 mkOp1(OP_RCP, TYPE_F32, dst0[1], dst0[1]);
2486 mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0);
2487 }
2488 if (dst0[3])
2489 loadImm(dst0[3], 1.0f);
2490 break;
2491 case TGSI_OPCODE_DP2:
2492 val0 = buildDot(2);
2493 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2494 mkMov(dst0[c], val0);
2495 break;
2496 case TGSI_OPCODE_DP3:
2497 val0 = buildDot(3);
2498 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2499 mkMov(dst0[c], val0);
2500 break;
2501 case TGSI_OPCODE_DP4:
2502 val0 = buildDot(4);
2503 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2504 mkMov(dst0[c], val0);
2505 break;
2506 case TGSI_OPCODE_DPH:
2507 val0 = buildDot(3);
2508 src1 = fetchSrc(1, 3);
2509 mkOp2(OP_ADD, TYPE_F32, val0, val0, src1);
2510 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2511 mkMov(dst0[c], val0);
2512 break;
2513 case TGSI_OPCODE_DST:
2514 if (dst0[0])
2515 loadImm(dst0[0], 1.0f);
2516 if (dst0[1]) {
2517 src0 = fetchSrc(0, 1);
2518 src1 = fetchSrc(1, 1);
2519 mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1);
2520 }
2521 if (dst0[2])
2522 mkMov(dst0[2], fetchSrc(0, 2));
2523 if (dst0[3])
2524 mkMov(dst0[3], fetchSrc(1, 3));
2525 break;
2526 case TGSI_OPCODE_LRP:
2527 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2528 src0 = fetchSrc(0, c);
2529 src1 = fetchSrc(1, c);
2530 src2 = fetchSrc(2, c);
2531 mkOp3(OP_MAD, TYPE_F32, dst0[c],
2532 mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2);
2533 }
2534 break;
2535 case TGSI_OPCODE_LIT:
2536 handleLIT(dst0);
2537 break;
2538 case TGSI_OPCODE_XPD:
2539 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2540 if (c < 3) {
2541 val0 = getSSA();
2542 src0 = fetchSrc(1, (c + 1) % 3);
2543 src1 = fetchSrc(0, (c + 2) % 3);
2544 mkOp2(OP_MUL, TYPE_F32, val0, src0, src1);
2545 mkOp1(OP_NEG, TYPE_F32, val0, val0);
2546
2547 src0 = fetchSrc(0, (c + 1) % 3);
2548 src1 = fetchSrc(1, (c + 2) % 3);
2549 mkOp3(OP_MAD, TYPE_F32, dst0[c], src0, src1, val0);
2550 } else {
2551 loadImm(dst0[c], 1.0f);
2552 }
2553 }
2554 break;
2555 case TGSI_OPCODE_ISSG:
2556 case TGSI_OPCODE_SSG:
2557 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2558 src0 = fetchSrc(0, c);
2559 val0 = getScratch();
2560 val1 = getScratch();
2561 mkCmp(OP_SET, CC_GT, srcTy, val0, srcTy, src0, zero);
2562 mkCmp(OP_SET, CC_LT, srcTy, val1, srcTy, src0, zero);
2563 if (srcTy == TYPE_F32)
2564 mkOp2(OP_SUB, TYPE_F32, dst0[c], val0, val1);
2565 else
2566 mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0);
2567 }
2568 break;
2569 case TGSI_OPCODE_UCMP:
2570 case TGSI_OPCODE_CMP:
2571 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2572 src0 = fetchSrc(0, c);
2573 src1 = fetchSrc(1, c);
2574 src2 = fetchSrc(2, c);
2575 if (src1 == src2)
2576 mkMov(dst0[c], src1);
2577 else
2578 mkCmp(OP_SLCT, (srcTy == TYPE_F32) ? CC_LT : CC_NE,
2579 srcTy, dst0[c], srcTy, src1, src2, src0);
2580 }
2581 break;
2582 case TGSI_OPCODE_FRC:
2583 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2584 src0 = fetchSrc(0, c);
2585 val0 = getScratch();
2586 mkOp1(OP_FLOOR, TYPE_F32, val0, src0);
2587 mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0);
2588 }
2589 break;
2590 case TGSI_OPCODE_ROUND:
2591 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2592 mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F32, fetchSrc(0, c))
2593 ->rnd = ROUND_NI;
2594 break;
2595 case TGSI_OPCODE_CLAMP:
2596 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2597 src0 = fetchSrc(0, c);
2598 src1 = fetchSrc(1, c);
2599 src2 = fetchSrc(2, c);
2600 val0 = getScratch();
2601 mkOp2(OP_MIN, TYPE_F32, val0, src0, src1);
2602 mkOp2(OP_MAX, TYPE_F32, dst0[c], val0, src2);
2603 }
2604 break;
2605 case TGSI_OPCODE_SLT:
2606 case TGSI_OPCODE_SGE:
2607 case TGSI_OPCODE_SEQ:
2608 case TGSI_OPCODE_SGT:
2609 case TGSI_OPCODE_SLE:
2610 case TGSI_OPCODE_SNE:
2611 case TGSI_OPCODE_FSEQ:
2612 case TGSI_OPCODE_FSGE:
2613 case TGSI_OPCODE_FSLT:
2614 case TGSI_OPCODE_FSNE:
2615 case TGSI_OPCODE_ISGE:
2616 case TGSI_OPCODE_ISLT:
2617 case TGSI_OPCODE_USEQ:
2618 case TGSI_OPCODE_USGE:
2619 case TGSI_OPCODE_USLT:
2620 case TGSI_OPCODE_USNE:
2621 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2622 src0 = fetchSrc(0, c);
2623 src1 = fetchSrc(1, c);
2624 mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1);
2625 }
2626 break;
2627 case TGSI_OPCODE_KILL_IF:
2628 val0 = new_LValue(func, FILE_PREDICATE);
2629 mask = 0;
2630 for (c = 0; c < 4; ++c) {
2631 const int s = tgsi.getSrc(0).getSwizzle(c);
2632 if (mask & (1 << s))
2633 continue;
2634 mask |= 1 << s;
2635 mkCmp(OP_SET, CC_LT, TYPE_F32, val0, TYPE_F32, fetchSrc(0, c), zero);
2636 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, val0);
2637 }
2638 break;
2639 case TGSI_OPCODE_KILL:
2640 mkOp(OP_DISCARD, TYPE_NONE, NULL);
2641 break;
2642 case TGSI_OPCODE_TEX:
2643 case TGSI_OPCODE_TXB:
2644 case TGSI_OPCODE_TXL:
2645 case TGSI_OPCODE_TXP:
2646 case TGSI_OPCODE_LODQ:
2647 // R S L C Dx Dy
2648 handleTEX(dst0, 1, 1, 0x03, 0x0f, 0x00, 0x00);
2649 break;
2650 case TGSI_OPCODE_TXD:
2651 handleTEX(dst0, 3, 3, 0x03, 0x0f, 0x10, 0x20);
2652 break;
2653 case TGSI_OPCODE_TG4:
2654 handleTEX(dst0, 2, 2, 0x03, 0x0f, 0x00, 0x00);
2655 break;
2656 case TGSI_OPCODE_TEX2:
2657 handleTEX(dst0, 2, 2, 0x03, 0x10, 0x00, 0x00);
2658 break;
2659 case TGSI_OPCODE_TXB2:
2660 case TGSI_OPCODE_TXL2:
2661 handleTEX(dst0, 2, 2, 0x10, 0x0f, 0x00, 0x00);
2662 break;
2663 case TGSI_OPCODE_SAMPLE:
2664 case TGSI_OPCODE_SAMPLE_B:
2665 case TGSI_OPCODE_SAMPLE_D:
2666 case TGSI_OPCODE_SAMPLE_L:
2667 case TGSI_OPCODE_SAMPLE_C:
2668 case TGSI_OPCODE_SAMPLE_C_LZ:
2669 handleTEX(dst0, 1, 2, 0x30, 0x30, 0x30, 0x40);
2670 break;
2671 case TGSI_OPCODE_TXF:
2672 handleTXF(dst0, 1, 0x03);
2673 break;
2674 case TGSI_OPCODE_SAMPLE_I:
2675 handleTXF(dst0, 1, 0x03);
2676 break;
2677 case TGSI_OPCODE_SAMPLE_I_MS:
2678 handleTXF(dst0, 1, 0x20);
2679 break;
2680 case TGSI_OPCODE_TXQ:
2681 case TGSI_OPCODE_SVIEWINFO:
2682 handleTXQ(dst0, TXQ_DIMS);
2683 break;
2684 case TGSI_OPCODE_F2I:
2685 case TGSI_OPCODE_F2U:
2686 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2687 mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c))->rnd = ROUND_Z;
2688 break;
2689 case TGSI_OPCODE_I2F:
2690 case TGSI_OPCODE_U2F:
2691 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2692 mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c));
2693 break;
2694 case TGSI_OPCODE_EMIT:
2695 /* export the saved viewport index */
2696 if (viewport != NULL) {
2697 Symbol *vpSym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_U32,
2698 info->out[info->io.viewportId].slot[0] * 4);
2699 mkStore(OP_EXPORT, TYPE_U32, vpSym, NULL, viewport);
2700 }
2701 /* fallthrough */
2702 case TGSI_OPCODE_ENDPRIM:
2703 {
2704 // get vertex stream (must be immediate)
2705 unsigned int stream = tgsi.getSrc(0).getValueU32(0, info);
2706 if (stream && op == OP_RESTART)
2707 break;
2708 src0 = mkImm(stream);
2709 mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1;
2710 break;
2711 }
2712 case TGSI_OPCODE_IF:
2713 case TGSI_OPCODE_UIF:
2714 {
2715 BasicBlock *ifBB = new BasicBlock(func);
2716
2717 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
2718 condBBs.push(bb);
2719 joinBBs.push(bb);
2720
2721 mkFlow(OP_BRA, NULL, CC_NOT_P, fetchSrc(0, 0))->setType(srcTy);
2722
2723 setPosition(ifBB, true);
2724 }
2725 break;
2726 case TGSI_OPCODE_ELSE:
2727 {
2728 BasicBlock *elseBB = new BasicBlock(func);
2729 BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
2730
2731 forkBB->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
2732 condBBs.push(bb);
2733
2734 forkBB->getExit()->asFlow()->target.bb = elseBB;
2735 if (!bb->isTerminated())
2736 mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL);
2737
2738 setPosition(elseBB, true);
2739 }
2740 break;
2741 case TGSI_OPCODE_ENDIF:
2742 {
2743 BasicBlock *convBB = new BasicBlock(func);
2744 BasicBlock *prevBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
2745 BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(joinBBs.pop().u.p);
2746
2747 if (!bb->isTerminated()) {
2748 // we only want join if none of the clauses ended with CONT/BREAK/RET
2749 if (prevBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6)
2750 insertConvergenceOps(convBB, forkBB);
2751 mkFlow(OP_BRA, convBB, CC_ALWAYS, NULL);
2752 bb->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD);
2753 }
2754
2755 if (prevBB->getExit()->op == OP_BRA) {
2756 prevBB->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD);
2757 prevBB->getExit()->asFlow()->target.bb = convBB;
2758 }
2759 setPosition(convBB, true);
2760 }
2761 break;
2762 case TGSI_OPCODE_BGNLOOP:
2763 {
2764 BasicBlock *lbgnBB = new BasicBlock(func);
2765 BasicBlock *lbrkBB = new BasicBlock(func);
2766
2767 loopBBs.push(lbgnBB);
2768 breakBBs.push(lbrkBB);
2769 if (loopBBs.getSize() > func->loopNestingBound)
2770 func->loopNestingBound++;
2771
2772 mkFlow(OP_PREBREAK, lbrkBB, CC_ALWAYS, NULL);
2773
2774 bb->cfg.attach(&lbgnBB->cfg, Graph::Edge::TREE);
2775 setPosition(lbgnBB, true);
2776 mkFlow(OP_PRECONT, lbgnBB, CC_ALWAYS, NULL);
2777 }
2778 break;
2779 case TGSI_OPCODE_ENDLOOP:
2780 {
2781 BasicBlock *loopBB = reinterpret_cast<BasicBlock *>(loopBBs.pop().u.p);
2782
2783 if (!bb->isTerminated()) {
2784 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
2785 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
2786 }
2787 setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true);
2788 }
2789 break;
2790 case TGSI_OPCODE_BRK:
2791 {
2792 if (bb->isTerminated())
2793 break;
2794 BasicBlock *brkBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p);
2795 mkFlow(OP_BREAK, brkBB, CC_ALWAYS, NULL);
2796 bb->cfg.attach(&brkBB->cfg, Graph::Edge::CROSS);
2797 }
2798 break;
2799 case TGSI_OPCODE_CONT:
2800 {
2801 if (bb->isTerminated())
2802 break;
2803 BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p);
2804 mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL);
2805 contBB->explicitCont = true;
2806 bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK);
2807 }
2808 break;
2809 case TGSI_OPCODE_BGNSUB:
2810 {
2811 Subroutine *s = getSubroutine(ip);
2812 BasicBlock *entry = new BasicBlock(s->f);
2813 BasicBlock *leave = new BasicBlock(s->f);
2814
2815 // multiple entrypoints possible, keep the graph connected
2816 if (prog->getType() == Program::TYPE_COMPUTE)
2817 prog->main->call.attach(&s->f->call, Graph::Edge::TREE);
2818
2819 sub.cur = s;
2820 s->f->setEntry(entry);
2821 s->f->setExit(leave);
2822 setPosition(entry, true);
2823 return true;
2824 }
2825 case TGSI_OPCODE_ENDSUB:
2826 {
2827 sub.cur = getSubroutine(prog->main);
2828 setPosition(BasicBlock::get(sub.cur->f->cfg.getRoot()), true);
2829 return true;
2830 }
2831 case TGSI_OPCODE_CAL:
2832 {
2833 Subroutine *s = getSubroutine(tgsi.getLabel());
2834 mkFlow(OP_CALL, s->f, CC_ALWAYS, NULL);
2835 func->call.attach(&s->f->call, Graph::Edge::TREE);
2836 return true;
2837 }
2838 case TGSI_OPCODE_RET:
2839 {
2840 if (bb->isTerminated())
2841 return true;
2842 BasicBlock *leave = BasicBlock::get(func->cfgExit);
2843
2844 if (!isEndOfSubroutine(ip + 1)) {
2845 // insert a PRERET at the entry if this is an early return
2846 // (only needed for sharing code in the epilogue)
2847 BasicBlock *pos = getBB();
2848 setPosition(BasicBlock::get(func->cfg.getRoot()), false);
2849 mkFlow(OP_PRERET, leave, CC_ALWAYS, NULL)->fixed = 1;
2850 setPosition(pos, true);
2851 }
2852 mkFlow(OP_RET, NULL, CC_ALWAYS, NULL)->fixed = 1;
2853 bb->cfg.attach(&leave->cfg, Graph::Edge::CROSS);
2854 }
2855 break;
2856 case TGSI_OPCODE_END:
2857 {
2858 // attach and generate epilogue code
2859 BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
2860 bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE);
2861 setPosition(epilogue, true);
2862 if (prog->getType() == Program::TYPE_FRAGMENT)
2863 exportOutputs();
2864 if (info->io.genUserClip > 0)
2865 handleUserClipPlanes();
2866 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
2867 }
2868 break;
2869 case TGSI_OPCODE_SWITCH:
2870 case TGSI_OPCODE_CASE:
2871 ERROR("switch/case opcode encountered, should have been lowered\n");
2872 abort();
2873 break;
2874 case TGSI_OPCODE_LOAD:
2875 handleLOAD(dst0);
2876 break;
2877 case TGSI_OPCODE_STORE:
2878 handleSTORE();
2879 break;
2880 case TGSI_OPCODE_BARRIER:
2881 geni = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
2882 geni->fixed = 1;
2883 geni->subOp = NV50_IR_SUBOP_BAR_SYNC;
2884 break;
2885 case TGSI_OPCODE_MFENCE:
2886 case TGSI_OPCODE_LFENCE:
2887 case TGSI_OPCODE_SFENCE:
2888 geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
2889 geni->fixed = 1;
2890 geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
2891 break;
2892 case TGSI_OPCODE_ATOMUADD:
2893 case TGSI_OPCODE_ATOMXCHG:
2894 case TGSI_OPCODE_ATOMCAS:
2895 case TGSI_OPCODE_ATOMAND:
2896 case TGSI_OPCODE_ATOMOR:
2897 case TGSI_OPCODE_ATOMXOR:
2898 case TGSI_OPCODE_ATOMUMIN:
2899 case TGSI_OPCODE_ATOMIMIN:
2900 case TGSI_OPCODE_ATOMUMAX:
2901 case TGSI_OPCODE_ATOMIMAX:
2902 handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode()));
2903 break;
2904 case TGSI_OPCODE_IBFE:
2905 case TGSI_OPCODE_UBFE:
2906 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2907 src0 = fetchSrc(0, c);
2908 src1 = fetchSrc(1, c);
2909 src2 = fetchSrc(2, c);
2910 mkOp3(OP_INSBF, TYPE_U32, src1, src2, mkImm(0x808), src1);
2911 mkOp2(OP_EXTBF, dstTy, dst0[c], src0, src1);
2912 }
2913 break;
2914 case TGSI_OPCODE_BFI:
2915 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2916 src0 = fetchSrc(0, c);
2917 src1 = fetchSrc(1, c);
2918 src2 = fetchSrc(2, c);
2919 src3 = fetchSrc(3, c);
2920 mkOp3(OP_INSBF, TYPE_U32, src2, src3, mkImm(0x808), src2);
2921 mkOp3(OP_INSBF, TYPE_U32, dst0[c], src1, src2, src0);
2922 }
2923 break;
2924 case TGSI_OPCODE_LSB:
2925 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2926 src0 = fetchSrc(0, c);
2927 geni = mkOp2(OP_EXTBF, TYPE_U32, src0, src0, mkImm(0x2000));
2928 geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
2929 geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], src0);
2930 geni->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2931 }
2932 break;
2933 case TGSI_OPCODE_IMSB:
2934 case TGSI_OPCODE_UMSB:
2935 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2936 src0 = fetchSrc(0, c);
2937 mkOp1(OP_BFIND, srcTy, dst0[c], src0);
2938 }
2939 break;
2940 case TGSI_OPCODE_BREV:
2941 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2942 src0 = fetchSrc(0, c);
2943 geni = mkOp2(OP_EXTBF, TYPE_U32, dst0[c], src0, mkImm(0x2000));
2944 geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
2945 }
2946 break;
2947 case TGSI_OPCODE_POPC:
2948 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2949 src0 = fetchSrc(0, c);
2950 mkOp2(OP_POPCNT, TYPE_U32, dst0[c], src0, src0);
2951 }
2952 break;
2953 case TGSI_OPCODE_INTERP_CENTROID:
2954 case TGSI_OPCODE_INTERP_SAMPLE:
2955 case TGSI_OPCODE_INTERP_OFFSET:
2956 handleINTERP(dst0);
2957 break;
2958 case TGSI_OPCODE_D2I:
2959 case TGSI_OPCODE_D2U:
2960 case TGSI_OPCODE_D2F: {
2961 int pos = 0;
2962 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2963 Value *dreg = getSSA(8);
2964 src0 = fetchSrc(0, pos);
2965 src1 = fetchSrc(0, pos + 1);
2966 mkOp2(OP_MERGE, TYPE_U64, dreg, src0, src1);
2967 mkCvt(OP_CVT, dstTy, dst0[c], srcTy, dreg);
2968 pos += 2;
2969 }
2970 break;
2971 }
2972 case TGSI_OPCODE_I2D:
2973 case TGSI_OPCODE_U2D:
2974 case TGSI_OPCODE_F2D:
2975 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2976 Value *dreg = getSSA(8);
2977 mkCvt(OP_CVT, dstTy, dreg, srcTy, fetchSrc(0, c / 2));
2978 mkSplit(&dst0[c], 4, dreg);
2979 c++;
2980 }
2981 break;
2982 case TGSI_OPCODE_DABS:
2983 case TGSI_OPCODE_DNEG:
2984 case TGSI_OPCODE_DRCP:
2985 case TGSI_OPCODE_DSQRT:
2986 case TGSI_OPCODE_DRSQ:
2987 case TGSI_OPCODE_DTRUNC:
2988 case TGSI_OPCODE_DCEIL:
2989 case TGSI_OPCODE_DFLR:
2990 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2991 src0 = getSSA(8);
2992 Value *dst = getSSA(8), *tmp[2];
2993 tmp[0] = fetchSrc(0, c);
2994 tmp[1] = fetchSrc(0, c + 1);
2995 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
2996 mkOp1(op, dstTy, dst, src0);
2997 mkSplit(&dst0[c], 4, dst);
2998 c++;
2999 }
3000 break;
3001 case TGSI_OPCODE_DFRAC:
3002 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3003 src0 = getSSA(8);
3004 Value *dst = getSSA(8), *tmp[2];
3005 tmp[0] = fetchSrc(0, c);
3006 tmp[1] = fetchSrc(0, c + 1);
3007 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
3008 mkOp1(OP_FLOOR, TYPE_F64, dst, src0);
3009 mkOp2(OP_SUB, TYPE_F64, dst, src0, dst);
3010 mkSplit(&dst0[c], 4, dst);
3011 c++;
3012 }
3013 break;
3014 case TGSI_OPCODE_DSLT:
3015 case TGSI_OPCODE_DSGE:
3016 case TGSI_OPCODE_DSEQ:
3017 case TGSI_OPCODE_DSNE: {
3018 int pos = 0;
3019 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3020 Value *tmp[2];
3021
3022 src0 = getSSA(8);
3023 src1 = getSSA(8);
3024 tmp[0] = fetchSrc(0, pos);
3025 tmp[1] = fetchSrc(0, pos + 1);
3026 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
3027 tmp[0] = fetchSrc(1, pos);
3028 tmp[1] = fetchSrc(1, pos + 1);
3029 mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
3030 mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1);
3031 pos += 2;
3032 }
3033 break;
3034 }
3035 case TGSI_OPCODE_DADD:
3036 case TGSI_OPCODE_DMUL:
3037 case TGSI_OPCODE_DMAX:
3038 case TGSI_OPCODE_DMIN:
3039 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3040 src0 = getSSA(8);
3041 src1 = getSSA(8);
3042 Value *dst = getSSA(8), *tmp[2];
3043 tmp[0] = fetchSrc(0, c);
3044 tmp[1] = fetchSrc(0, c + 1);
3045 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
3046 tmp[0] = fetchSrc(1, c);
3047 tmp[1] = fetchSrc(1, c + 1);
3048 mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
3049 mkOp2(op, dstTy, dst, src0, src1);
3050 mkSplit(&dst0[c], 4, dst);
3051 c++;
3052 }
3053 break;
3054 case TGSI_OPCODE_DMAD:
3055 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3056 src0 = getSSA(8);
3057 src1 = getSSA(8);
3058 src2 = getSSA(8);
3059 Value *dst = getSSA(8), *tmp[2];
3060 tmp[0] = fetchSrc(0, c);
3061 tmp[1] = fetchSrc(0, c + 1);
3062 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
3063 tmp[0] = fetchSrc(1, c);
3064 tmp[1] = fetchSrc(1, c + 1);
3065 mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
3066 tmp[0] = fetchSrc(2, c);
3067 tmp[1] = fetchSrc(2, c + 1);
3068 mkOp2(OP_MERGE, TYPE_U64, src2, tmp[0], tmp[1]);
3069 mkOp3(op, dstTy, dst, src0, src1, src2);
3070 mkSplit(&dst0[c], 4, dst);
3071 c++;
3072 }
3073 break;
3074 case TGSI_OPCODE_DROUND:
3075 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3076 src0 = getSSA(8);
3077 Value *dst = getSSA(8), *tmp[2];
3078 tmp[0] = fetchSrc(0, c);
3079 tmp[1] = fetchSrc(0, c + 1);
3080 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
3081 mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F64, src0)
3082 ->rnd = ROUND_NI;
3083 mkSplit(&dst0[c], 4, dst);
3084 c++;
3085 }
3086 break;
3087 case TGSI_OPCODE_DSSG:
3088 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3089 src0 = getSSA(8);
3090 Value *dst = getSSA(8), *dstF32 = getSSA(), *tmp[2];
3091 tmp[0] = fetchSrc(0, c);
3092 tmp[1] = fetchSrc(0, c + 1);
3093 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
3094
3095 val0 = getScratch();
3096 val1 = getScratch();
3097 // The zero is wrong here since it's only 32-bit, but it works out in
3098 // the end since it gets replaced with $r63.
3099 mkCmp(OP_SET, CC_GT, TYPE_F32, val0, TYPE_F64, src0, zero);
3100 mkCmp(OP_SET, CC_LT, TYPE_F32, val1, TYPE_F64, src0, zero);
3101 mkOp2(OP_SUB, TYPE_F32, dstF32, val0, val1);
3102 mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F32, dstF32);
3103 mkSplit(&dst0[c], 4, dst);
3104 c++;
3105 }
3106 break;
3107 default:
3108 ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode());
3109 assert(0);
3110 break;
3111 }
3112
3113 if (tgsi.dstCount()) {
3114 for (c = 0; c < 4; ++c) {
3115 if (!dst0[c])
3116 continue;
3117 if (dst0[c] != rDst0[c])
3118 mkMov(rDst0[c], dst0[c]);
3119 storeDst(0, c, rDst0[c]);
3120 }
3121 }
3122 vtxBaseValid = 0;
3123
3124 return true;
3125 }
3126
3127 void
3128 Converter::handleUserClipPlanes()
3129 {
3130 Value *res[8];
3131 int n, i, c;
3132
3133 for (c = 0; c < 4; ++c) {
3134 for (i = 0; i < info->io.genUserClip; ++i) {
3135 Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.ucpCBSlot,
3136 TYPE_F32, info->io.ucpBase + i * 16 + c * 4);
3137 Value *ucp = mkLoadv(TYPE_F32, sym, NULL);
3138 if (c == 0)
3139 res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp);
3140 else
3141 mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]);
3142 }
3143 }
3144
3145 const int first = info->numOutputs - (info->io.genUserClip + 3) / 4;
3146
3147 for (i = 0; i < info->io.genUserClip; ++i) {
3148 n = i / 4 + first;
3149 c = i % 4;
3150 Symbol *sym =
3151 mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info->out[n].slot[c] * 4);
3152 mkStore(OP_EXPORT, TYPE_F32, sym, NULL, res[i]);
3153 }
3154 }
3155
3156 void
3157 Converter::exportOutputs()
3158 {
3159 for (unsigned int i = 0; i < info->numOutputs; ++i) {
3160 for (unsigned int c = 0; c < 4; ++c) {
3161 if (!oData.exists(sub.cur->values, i, c))
3162 continue;
3163 Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32,
3164 info->out[i].slot[c] * 4);
3165 Value *val = oData.load(sub.cur->values, i, c, NULL);
3166 if (val)
3167 mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val);
3168 }
3169 }
3170 }
3171
3172 Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir),
3173 code(code),
3174 tgsi(NULL),
3175 tData(this), aData(this), pData(this), oData(this)
3176 {
3177 info = code->info;
3178
3179 const DataFile tFile = code->mainTempsInLMem ? FILE_MEMORY_LOCAL : FILE_GPR;
3180
3181 const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY);
3182 const unsigned pSize = code->fileSize(TGSI_FILE_PREDICATE);
3183 const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS);
3184 const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT);
3185
3186 tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, tFile, 0);
3187 pData.setup(TGSI_FILE_PREDICATE, 0, 0, pSize, 4, 4, FILE_PREDICATE, 0);
3188 aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_GPR, 0);
3189 oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0);
3190
3191 zero = mkImm((uint32_t)0);
3192
3193 vtxBaseValid = 0;
3194 }
3195
3196 Converter::~Converter()
3197 {
3198 }
3199
3200 inline const Converter::Location *
3201 Converter::BindArgumentsPass::getValueLocation(Subroutine *s, Value *v)
3202 {
3203 ValueMap::l_iterator it = s->values.l.find(v);
3204 return it == s->values.l.end() ? NULL : &it->second;
3205 }
3206
3207 template<typename T> inline void
3208 Converter::BindArgumentsPass::updateCallArgs(
3209 Instruction *i, void (Instruction::*setArg)(int, Value *),
3210 T (Function::*proto))
3211 {
3212 Function *g = i->asFlow()->target.fn;
3213 Subroutine *subg = conv.getSubroutine(g);
3214
3215 for (unsigned a = 0; a < (g->*proto).size(); ++a) {
3216 Value *v = (g->*proto)[a].get();
3217 const Converter::Location &l = *getValueLocation(subg, v);
3218 Converter::DataArray *array = conv.getArrayForFile(l.array, l.arrayIdx);
3219
3220 (i->*setArg)(a, array->acquire(sub->values, l.i, l.c));
3221 }
3222 }
3223
3224 template<typename T> inline void
3225 Converter::BindArgumentsPass::updatePrototype(
3226 BitSet *set, void (Function::*updateSet)(), T (Function::*proto))
3227 {
3228 (func->*updateSet)();
3229
3230 for (unsigned i = 0; i < set->getSize(); ++i) {
3231 Value *v = func->getLValue(i);
3232 const Converter::Location *l = getValueLocation(sub, v);
3233
3234 // only include values with a matching TGSI register
3235 if (set->test(i) && l && !conv.code->locals.count(*l))
3236 (func->*proto).push_back(v);
3237 }
3238 }
3239
3240 bool
3241 Converter::BindArgumentsPass::visit(Function *f)
3242 {
3243 sub = conv.getSubroutine(f);
3244
3245 for (ArrayList::Iterator bi = f->allBBlocks.iterator();
3246 !bi.end(); bi.next()) {
3247 for (Instruction *i = BasicBlock::get(bi)->getFirst();
3248 i; i = i->next) {
3249 if (i->op == OP_CALL && !i->asFlow()->builtin) {
3250 updateCallArgs(i, &Instruction::setSrc, &Function::ins);
3251 updateCallArgs(i, &Instruction::setDef, &Function::outs);
3252 }
3253 }
3254 }
3255
3256 if (func == prog->main && prog->getType() != Program::TYPE_COMPUTE)
3257 return true;
3258 updatePrototype(&BasicBlock::get(f->cfg.getRoot())->liveSet,
3259 &Function::buildLiveSets, &Function::ins);
3260 updatePrototype(&BasicBlock::get(f->cfgExit)->defSet,
3261 &Function::buildDefSets, &Function::outs);
3262
3263 return true;
3264 }
3265
3266 bool
3267 Converter::run()
3268 {
3269 BasicBlock *entry = new BasicBlock(prog->main);
3270 BasicBlock *leave = new BasicBlock(prog->main);
3271
3272 prog->main->setEntry(entry);
3273 prog->main->setExit(leave);
3274
3275 setPosition(entry, true);
3276 sub.cur = getSubroutine(prog->main);
3277
3278 if (info->io.genUserClip > 0) {
3279 for (int c = 0; c < 4; ++c)
3280 clipVtx[c] = getScratch();
3281 }
3282
3283 if (prog->getType() == Program::TYPE_FRAGMENT) {
3284 Symbol *sv = mkSysVal(SV_POSITION, 3);
3285 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
3286 mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
3287 }
3288
3289 if (info->io.viewportId >= 0)
3290 viewport = getScratch();
3291 else
3292 viewport = NULL;
3293
3294 for (ip = 0; ip < code->scan.num_instructions; ++ip) {
3295 if (!handleInstruction(&code->insns[ip]))
3296 return false;
3297 }
3298
3299 if (!BindArgumentsPass(*this).run(prog))
3300 return false;
3301
3302 return true;
3303 }
3304
3305 } // unnamed namespace
3306
3307 namespace nv50_ir {
3308
3309 bool
3310 Program::makeFromTGSI(struct nv50_ir_prog_info *info)
3311 {
3312 tgsi::Source src(info);
3313 if (!src.scanSource())
3314 return false;
3315 tlsSize = info->bin.tlsSpace;
3316
3317 Converter builder(this, &src);
3318 return builder.run();
3319 }
3320
3321 } // namespace nv50_ir