nv50/ir/tgsi: handle TGSI_OPCODE_LOAD,STORE
[mesa.git] / src / gallium / drivers / nv50 / codegen / nv50_ir_from_tgsi.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 extern "C" {
24 #include "tgsi/tgsi_dump.h"
25 #include "tgsi/tgsi_scan.h"
26 }
27
28 #include "nv50_ir.h"
29 #include "nv50_ir_util.h"
30 #include "nv50_ir_build_util.h"
31
32 namespace tgsi {
33
34 class Source;
35
36 static nv50_ir::operation translateOpcode(uint opcode);
37 static nv50_ir::DataFile translateFile(uint file);
38 static nv50_ir::TexTarget translateTexture(uint texTarg);
39 static nv50_ir::SVSemantic translateSysVal(uint sysval);
40
41 class Instruction
42 {
43 public:
44 Instruction(const struct tgsi_full_instruction *inst) : insn(inst) { }
45
46 class SrcRegister
47 {
48 public:
49 SrcRegister(const struct tgsi_full_src_register *src)
50 : reg(src->Register),
51 fsr(src)
52 { }
53
54 SrcRegister(const struct tgsi_src_register& src) : reg(src), fsr(NULL) { }
55
56 struct tgsi_src_register offsetToSrc(struct tgsi_texture_offset off)
57 {
58 struct tgsi_src_register reg;
59 memset(&reg, 0, sizeof(reg));
60 reg.Index = off.Index;
61 reg.File = off.File;
62 reg.SwizzleX = off.SwizzleX;
63 reg.SwizzleY = off.SwizzleY;
64 reg.SwizzleZ = off.SwizzleZ;
65 return reg;
66 }
67
68 SrcRegister(const struct tgsi_texture_offset& off) :
69 reg(offsetToSrc(off)),
70 fsr(NULL)
71 { }
72
73 uint getFile() const { return reg.File; }
74
75 bool is2D() const { return reg.Dimension; }
76
77 bool isIndirect(int dim) const
78 {
79 return (dim && fsr) ? fsr->Dimension.Indirect : reg.Indirect;
80 }
81
82 int getIndex(int dim) const
83 {
84 return (dim && fsr) ? fsr->Dimension.Index : reg.Index;
85 }
86
87 int getSwizzle(int chan) const
88 {
89 return tgsi_util_get_src_register_swizzle(&reg, chan);
90 }
91
92 nv50_ir::Modifier getMod(int chan) const;
93
94 SrcRegister getIndirect(int dim) const
95 {
96 assert(fsr && isIndirect(dim));
97 if (dim)
98 return SrcRegister(fsr->DimIndirect);
99 return SrcRegister(fsr->Indirect);
100 }
101
102 uint32_t getValueU32(int c, const struct nv50_ir_prog_info *info) const
103 {
104 assert(reg.File == TGSI_FILE_IMMEDIATE);
105 assert(!reg.Absolute);
106 assert(!reg.Negate);
107 return info->immd.data[reg.Index * 4 + getSwizzle(c)];
108 }
109
110 private:
111 const struct tgsi_src_register reg;
112 const struct tgsi_full_src_register *fsr;
113 };
114
115 class DstRegister
116 {
117 public:
118 DstRegister(const struct tgsi_full_dst_register *dst)
119 : reg(dst->Register),
120 fdr(dst)
121 { }
122
123 DstRegister(const struct tgsi_dst_register& dst) : reg(dst), fdr(NULL) { }
124
125 uint getFile() const { return reg.File; }
126
127 bool is2D() const { return reg.Dimension; }
128
129 bool isIndirect(int dim) const
130 {
131 return (dim && fdr) ? fdr->Dimension.Indirect : reg.Indirect;
132 }
133
134 int getIndex(int dim) const
135 {
136 return (dim && fdr) ? fdr->Dimension.Dimension : reg.Index;
137 }
138
139 unsigned int getMask() const { return reg.WriteMask; }
140
141 bool isMasked(int chan) const { return !(getMask() & (1 << chan)); }
142
143 SrcRegister getIndirect(int dim) const
144 {
145 assert(fdr && isIndirect(dim));
146 if (dim)
147 return SrcRegister(fdr->DimIndirect);
148 return SrcRegister(fdr->Indirect);
149 }
150
151 private:
152 const struct tgsi_dst_register reg;
153 const struct tgsi_full_dst_register *fdr;
154 };
155
156 inline uint getOpcode() const { return insn->Instruction.Opcode; }
157
158 unsigned int srcCount() const { return insn->Instruction.NumSrcRegs; }
159 unsigned int dstCount() const { return insn->Instruction.NumDstRegs; }
160
161 // mask of used components of source s
162 unsigned int srcMask(unsigned int s) const;
163
164 SrcRegister getSrc(unsigned int s) const
165 {
166 assert(s < srcCount());
167 return SrcRegister(&insn->Src[s]);
168 }
169
170 DstRegister getDst(unsigned int d) const
171 {
172 assert(d < dstCount());
173 return DstRegister(&insn->Dst[d]);
174 }
175
176 SrcRegister getTexOffset(unsigned int i) const
177 {
178 assert(i < TGSI_FULL_MAX_TEX_OFFSETS);
179 return SrcRegister(insn->TexOffsets[i]);
180 }
181
182 unsigned int getNumTexOffsets() const { return insn->Texture.NumOffsets; }
183
184 bool checkDstSrcAliasing() const;
185
186 inline nv50_ir::operation getOP() const {
187 return translateOpcode(getOpcode()); }
188
189 nv50_ir::DataType inferSrcType() const;
190 nv50_ir::DataType inferDstType() const;
191
192 nv50_ir::CondCode getSetCond() const;
193
194 nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const;
195
196 inline uint getLabel() { return insn->Label.Label; }
197
198 unsigned getSaturate() const { return insn->Instruction.Saturate; }
199
200 void print() const
201 {
202 tgsi_dump_instruction(insn, 1);
203 }
204
205 private:
206 const struct tgsi_full_instruction *insn;
207 };
208
209 unsigned int Instruction::srcMask(unsigned int s) const
210 {
211 unsigned int mask = insn->Dst[0].Register.WriteMask;
212
213 switch (insn->Instruction.Opcode) {
214 case TGSI_OPCODE_COS:
215 case TGSI_OPCODE_SIN:
216 return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
217 case TGSI_OPCODE_DP2:
218 return 0x3;
219 case TGSI_OPCODE_DP3:
220 return 0x7;
221 case TGSI_OPCODE_DP4:
222 case TGSI_OPCODE_DPH:
223 case TGSI_OPCODE_KIL: /* WriteMask ignored */
224 return 0xf;
225 case TGSI_OPCODE_DST:
226 return mask & (s ? 0xa : 0x6);
227 case TGSI_OPCODE_EX2:
228 case TGSI_OPCODE_EXP:
229 case TGSI_OPCODE_LG2:
230 case TGSI_OPCODE_LOG:
231 case TGSI_OPCODE_POW:
232 case TGSI_OPCODE_RCP:
233 case TGSI_OPCODE_RSQ:
234 case TGSI_OPCODE_SCS:
235 return 0x1;
236 case TGSI_OPCODE_IF:
237 return 0x1;
238 case TGSI_OPCODE_LIT:
239 return 0xb;
240 case TGSI_OPCODE_TEX2:
241 case TGSI_OPCODE_TXB2:
242 case TGSI_OPCODE_TXL2:
243 return (s == 0) ? 0xf : 0x3;
244 case TGSI_OPCODE_TEX:
245 case TGSI_OPCODE_TXB:
246 case TGSI_OPCODE_TXD:
247 case TGSI_OPCODE_TXL:
248 case TGSI_OPCODE_TXP:
249 {
250 const struct tgsi_instruction_texture *tex = &insn->Texture;
251
252 assert(insn->Instruction.Texture);
253
254 mask = 0x7;
255 if (insn->Instruction.Opcode != TGSI_OPCODE_TEX &&
256 insn->Instruction.Opcode != TGSI_OPCODE_TXD)
257 mask |= 0x8; /* bias, lod or proj */
258
259 switch (tex->Texture) {
260 case TGSI_TEXTURE_1D:
261 mask &= 0x9;
262 break;
263 case TGSI_TEXTURE_SHADOW1D:
264 mask &= 0xd;
265 break;
266 case TGSI_TEXTURE_1D_ARRAY:
267 case TGSI_TEXTURE_2D:
268 case TGSI_TEXTURE_RECT:
269 mask &= 0xb;
270 break;
271 case TGSI_TEXTURE_CUBE_ARRAY:
272 case TGSI_TEXTURE_SHADOW2D_ARRAY:
273 case TGSI_TEXTURE_SHADOWCUBE:
274 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
275 mask |= 0x8;
276 break;
277 default:
278 break;
279 }
280 }
281 return mask;
282 case TGSI_OPCODE_XPD:
283 {
284 unsigned int x = 0;
285 if (mask & 1) x |= 0x6;
286 if (mask & 2) x |= 0x5;
287 if (mask & 4) x |= 0x3;
288 return x;
289 }
290 default:
291 break;
292 }
293
294 return mask;
295 }
296
297 nv50_ir::Modifier Instruction::SrcRegister::getMod(int chan) const
298 {
299 nv50_ir::Modifier m(0);
300
301 if (reg.Absolute)
302 m = m | nv50_ir::Modifier(NV50_IR_MOD_ABS);
303 if (reg.Negate)
304 m = m | nv50_ir::Modifier(NV50_IR_MOD_NEG);
305 return m;
306 }
307
308 static nv50_ir::DataFile translateFile(uint file)
309 {
310 switch (file) {
311 case TGSI_FILE_CONSTANT: return nv50_ir::FILE_MEMORY_CONST;
312 case TGSI_FILE_INPUT: return nv50_ir::FILE_SHADER_INPUT;
313 case TGSI_FILE_OUTPUT: return nv50_ir::FILE_SHADER_OUTPUT;
314 case TGSI_FILE_TEMPORARY: return nv50_ir::FILE_GPR;
315 case TGSI_FILE_ADDRESS: return nv50_ir::FILE_ADDRESS;
316 case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE;
317 case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;
318 case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;
319 case TGSI_FILE_IMMEDIATE_ARRAY: return nv50_ir::FILE_IMMEDIATE;
320 case TGSI_FILE_TEMPORARY_ARRAY: return nv50_ir::FILE_MEMORY_LOCAL;
321 case TGSI_FILE_RESOURCE: return nv50_ir::FILE_MEMORY_GLOBAL;
322 case TGSI_FILE_SAMPLER:
323 case TGSI_FILE_NULL:
324 default:
325 return nv50_ir::FILE_NULL;
326 }
327 }
328
329 static nv50_ir::SVSemantic translateSysVal(uint sysval)
330 {
331 switch (sysval) {
332 case TGSI_SEMANTIC_FACE: return nv50_ir::SV_FACE;
333 case TGSI_SEMANTIC_PSIZE: return nv50_ir::SV_POINT_SIZE;
334 case TGSI_SEMANTIC_PRIMID: return nv50_ir::SV_PRIMITIVE_ID;
335 case TGSI_SEMANTIC_INSTANCEID: return nv50_ir::SV_INSTANCE_ID;
336 case TGSI_SEMANTIC_VERTEXID: return nv50_ir::SV_VERTEX_ID;
337 case TGSI_SEMANTIC_GRID_SIZE: return nv50_ir::SV_NCTAID;
338 case TGSI_SEMANTIC_BLOCK_ID: return nv50_ir::SV_CTAID;
339 case TGSI_SEMANTIC_BLOCK_SIZE: return nv50_ir::SV_NTID;
340 case TGSI_SEMANTIC_THREAD_ID: return nv50_ir::SV_TID;
341 default:
342 assert(0);
343 return nv50_ir::SV_CLOCK;
344 }
345 }
346
347 #define NV50_IR_TEX_TARG_CASE(a, b) \
348 case TGSI_TEXTURE_##a: return nv50_ir::TEX_TARGET_##b;
349
350 static nv50_ir::TexTarget translateTexture(uint tex)
351 {
352 switch (tex) {
353 NV50_IR_TEX_TARG_CASE(1D, 1D);
354 NV50_IR_TEX_TARG_CASE(2D, 2D);
355 NV50_IR_TEX_TARG_CASE(3D, 3D);
356 NV50_IR_TEX_TARG_CASE(CUBE, CUBE);
357 NV50_IR_TEX_TARG_CASE(RECT, RECT);
358 NV50_IR_TEX_TARG_CASE(1D_ARRAY, 1D_ARRAY);
359 NV50_IR_TEX_TARG_CASE(2D_ARRAY, 2D_ARRAY);
360 NV50_IR_TEX_TARG_CASE(CUBE_ARRAY, CUBE_ARRAY);
361 NV50_IR_TEX_TARG_CASE(SHADOW1D, 1D_SHADOW);
362 NV50_IR_TEX_TARG_CASE(SHADOW2D, 2D_SHADOW);
363 NV50_IR_TEX_TARG_CASE(SHADOWCUBE, CUBE_SHADOW);
364 NV50_IR_TEX_TARG_CASE(SHADOWRECT, RECT_SHADOW);
365 NV50_IR_TEX_TARG_CASE(SHADOW1D_ARRAY, 1D_ARRAY_SHADOW);
366 NV50_IR_TEX_TARG_CASE(SHADOW2D_ARRAY, 2D_ARRAY_SHADOW);
367 NV50_IR_TEX_TARG_CASE(SHADOWCUBE_ARRAY, CUBE_ARRAY_SHADOW);
368 NV50_IR_TEX_TARG_CASE(BUFFER, BUFFER);
369
370 case TGSI_TEXTURE_UNKNOWN:
371 default:
372 assert(!"invalid texture target");
373 return nv50_ir::TEX_TARGET_2D;
374 }
375 }
376
377 nv50_ir::DataType Instruction::inferSrcType() const
378 {
379 switch (getOpcode()) {
380 case TGSI_OPCODE_AND:
381 case TGSI_OPCODE_OR:
382 case TGSI_OPCODE_XOR:
383 case TGSI_OPCODE_NOT:
384 case TGSI_OPCODE_U2F:
385 case TGSI_OPCODE_UADD:
386 case TGSI_OPCODE_UDIV:
387 case TGSI_OPCODE_UMOD:
388 case TGSI_OPCODE_UMAD:
389 case TGSI_OPCODE_UMUL:
390 case TGSI_OPCODE_UMAX:
391 case TGSI_OPCODE_UMIN:
392 case TGSI_OPCODE_USEQ:
393 case TGSI_OPCODE_USGE:
394 case TGSI_OPCODE_USLT:
395 case TGSI_OPCODE_USNE:
396 case TGSI_OPCODE_USHR:
397 case TGSI_OPCODE_UCMP:
398 return nv50_ir::TYPE_U32;
399 case TGSI_OPCODE_I2F:
400 case TGSI_OPCODE_IDIV:
401 case TGSI_OPCODE_IMAX:
402 case TGSI_OPCODE_IMIN:
403 case TGSI_OPCODE_IABS:
404 case TGSI_OPCODE_INEG:
405 case TGSI_OPCODE_ISGE:
406 case TGSI_OPCODE_ISHR:
407 case TGSI_OPCODE_ISLT:
408 case TGSI_OPCODE_ISSG:
409 case TGSI_OPCODE_SAD: // not sure about SAD, but no one has a float version
410 case TGSI_OPCODE_MOD:
411 case TGSI_OPCODE_UARL:
412 return nv50_ir::TYPE_S32;
413 default:
414 return nv50_ir::TYPE_F32;
415 }
416 }
417
418 nv50_ir::DataType Instruction::inferDstType() const
419 {
420 switch (getOpcode()) {
421 case TGSI_OPCODE_F2U: return nv50_ir::TYPE_U32;
422 case TGSI_OPCODE_F2I: return nv50_ir::TYPE_S32;
423 case TGSI_OPCODE_I2F:
424 case TGSI_OPCODE_U2F:
425 return nv50_ir::TYPE_F32;
426 default:
427 return inferSrcType();
428 }
429 }
430
431 nv50_ir::CondCode Instruction::getSetCond() const
432 {
433 using namespace nv50_ir;
434
435 switch (getOpcode()) {
436 case TGSI_OPCODE_SLT:
437 case TGSI_OPCODE_ISLT:
438 case TGSI_OPCODE_USLT:
439 return CC_LT;
440 case TGSI_OPCODE_SLE:
441 return CC_LE;
442 case TGSI_OPCODE_SGE:
443 case TGSI_OPCODE_ISGE:
444 case TGSI_OPCODE_USGE:
445 return CC_GE;
446 case TGSI_OPCODE_SGT:
447 return CC_GT;
448 case TGSI_OPCODE_SEQ:
449 case TGSI_OPCODE_USEQ:
450 return CC_EQ;
451 case TGSI_OPCODE_SNE:
452 return CC_NEU;
453 case TGSI_OPCODE_USNE:
454 return CC_NE;
455 case TGSI_OPCODE_SFL:
456 return CC_NEVER;
457 case TGSI_OPCODE_STR:
458 default:
459 return CC_ALWAYS;
460 }
461 }
462
463 #define NV50_IR_OPCODE_CASE(a, b) case TGSI_OPCODE_##a: return nv50_ir::OP_##b
464
465 static nv50_ir::operation translateOpcode(uint opcode)
466 {
467 switch (opcode) {
468 NV50_IR_OPCODE_CASE(ARL, SHL);
469 NV50_IR_OPCODE_CASE(MOV, MOV);
470
471 NV50_IR_OPCODE_CASE(RCP, RCP);
472 NV50_IR_OPCODE_CASE(RSQ, RSQ);
473
474 NV50_IR_OPCODE_CASE(MUL, MUL);
475 NV50_IR_OPCODE_CASE(ADD, ADD);
476
477 NV50_IR_OPCODE_CASE(MIN, MIN);
478 NV50_IR_OPCODE_CASE(MAX, MAX);
479 NV50_IR_OPCODE_CASE(SLT, SET);
480 NV50_IR_OPCODE_CASE(SGE, SET);
481 NV50_IR_OPCODE_CASE(MAD, MAD);
482 NV50_IR_OPCODE_CASE(SUB, SUB);
483
484 NV50_IR_OPCODE_CASE(FLR, FLOOR);
485 NV50_IR_OPCODE_CASE(ROUND, CVT);
486 NV50_IR_OPCODE_CASE(EX2, EX2);
487 NV50_IR_OPCODE_CASE(LG2, LG2);
488 NV50_IR_OPCODE_CASE(POW, POW);
489
490 NV50_IR_OPCODE_CASE(ABS, ABS);
491
492 NV50_IR_OPCODE_CASE(COS, COS);
493 NV50_IR_OPCODE_CASE(DDX, DFDX);
494 NV50_IR_OPCODE_CASE(DDY, DFDY);
495 NV50_IR_OPCODE_CASE(KILP, DISCARD);
496
497 NV50_IR_OPCODE_CASE(SEQ, SET);
498 NV50_IR_OPCODE_CASE(SFL, SET);
499 NV50_IR_OPCODE_CASE(SGT, SET);
500 NV50_IR_OPCODE_CASE(SIN, SIN);
501 NV50_IR_OPCODE_CASE(SLE, SET);
502 NV50_IR_OPCODE_CASE(SNE, SET);
503 NV50_IR_OPCODE_CASE(STR, SET);
504 NV50_IR_OPCODE_CASE(TEX, TEX);
505 NV50_IR_OPCODE_CASE(TXD, TXD);
506 NV50_IR_OPCODE_CASE(TXP, TEX);
507
508 NV50_IR_OPCODE_CASE(BRA, BRA);
509 NV50_IR_OPCODE_CASE(CAL, CALL);
510 NV50_IR_OPCODE_CASE(RET, RET);
511 NV50_IR_OPCODE_CASE(CMP, SLCT);
512
513 NV50_IR_OPCODE_CASE(TXB, TXB);
514
515 NV50_IR_OPCODE_CASE(DIV, DIV);
516
517 NV50_IR_OPCODE_CASE(TXL, TXL);
518
519 NV50_IR_OPCODE_CASE(CEIL, CEIL);
520 NV50_IR_OPCODE_CASE(I2F, CVT);
521 NV50_IR_OPCODE_CASE(NOT, NOT);
522 NV50_IR_OPCODE_CASE(TRUNC, TRUNC);
523 NV50_IR_OPCODE_CASE(SHL, SHL);
524
525 NV50_IR_OPCODE_CASE(AND, AND);
526 NV50_IR_OPCODE_CASE(OR, OR);
527 NV50_IR_OPCODE_CASE(MOD, MOD);
528 NV50_IR_OPCODE_CASE(XOR, XOR);
529 NV50_IR_OPCODE_CASE(SAD, SAD);
530 NV50_IR_OPCODE_CASE(TXF, TXF);
531 NV50_IR_OPCODE_CASE(TXQ, TXQ);
532
533 NV50_IR_OPCODE_CASE(EMIT, EMIT);
534 NV50_IR_OPCODE_CASE(ENDPRIM, RESTART);
535
536 NV50_IR_OPCODE_CASE(KIL, DISCARD);
537
538 NV50_IR_OPCODE_CASE(F2I, CVT);
539 NV50_IR_OPCODE_CASE(IDIV, DIV);
540 NV50_IR_OPCODE_CASE(IMAX, MAX);
541 NV50_IR_OPCODE_CASE(IMIN, MIN);
542 NV50_IR_OPCODE_CASE(IABS, ABS);
543 NV50_IR_OPCODE_CASE(INEG, NEG);
544 NV50_IR_OPCODE_CASE(ISGE, SET);
545 NV50_IR_OPCODE_CASE(ISHR, SHR);
546 NV50_IR_OPCODE_CASE(ISLT, SET);
547 NV50_IR_OPCODE_CASE(F2U, CVT);
548 NV50_IR_OPCODE_CASE(U2F, CVT);
549 NV50_IR_OPCODE_CASE(UADD, ADD);
550 NV50_IR_OPCODE_CASE(UDIV, DIV);
551 NV50_IR_OPCODE_CASE(UMAD, MAD);
552 NV50_IR_OPCODE_CASE(UMAX, MAX);
553 NV50_IR_OPCODE_CASE(UMIN, MIN);
554 NV50_IR_OPCODE_CASE(UMOD, MOD);
555 NV50_IR_OPCODE_CASE(UMUL, MUL);
556 NV50_IR_OPCODE_CASE(USEQ, SET);
557 NV50_IR_OPCODE_CASE(USGE, SET);
558 NV50_IR_OPCODE_CASE(USHR, SHR);
559 NV50_IR_OPCODE_CASE(USLT, SET);
560 NV50_IR_OPCODE_CASE(USNE, SET);
561
562 NV50_IR_OPCODE_CASE(SAMPLE, TEX);
563 NV50_IR_OPCODE_CASE(SAMPLE_B, TXB);
564 NV50_IR_OPCODE_CASE(SAMPLE_C, TEX);
565 NV50_IR_OPCODE_CASE(SAMPLE_C_LZ, TEX);
566 NV50_IR_OPCODE_CASE(SAMPLE_D, TXD);
567 NV50_IR_OPCODE_CASE(SAMPLE_L, TXL);
568 NV50_IR_OPCODE_CASE(GATHER4, TXG);
569 NV50_IR_OPCODE_CASE(SVIEWINFO, TXQ);
570
571 NV50_IR_OPCODE_CASE(TEX2, TEX);
572 NV50_IR_OPCODE_CASE(TXB2, TXB);
573 NV50_IR_OPCODE_CASE(TXL2, TXL);
574
575 NV50_IR_OPCODE_CASE(END, EXIT);
576
577 default:
578 return nv50_ir::OP_NOP;
579 }
580 }
581
582 bool Instruction::checkDstSrcAliasing() const
583 {
584 if (insn->Dst[0].Register.Indirect) // no danger if indirect, using memory
585 return false;
586
587 for (int s = 0; s < TGSI_FULL_MAX_SRC_REGISTERS; ++s) {
588 if (insn->Src[s].Register.File == TGSI_FILE_NULL)
589 break;
590 if (insn->Src[s].Register.File == insn->Dst[0].Register.File &&
591 insn->Src[s].Register.Index == insn->Dst[0].Register.Index)
592 return true;
593 }
594 return false;
595 }
596
597 class Source
598 {
599 public:
600 Source(struct nv50_ir_prog_info *);
601 ~Source();
602
603 public:
604 bool scanSource();
605 unsigned fileSize(unsigned file) const { return scan.file_max[file] + 1; }
606
607 public:
608 struct tgsi_shader_info scan;
609 struct tgsi_full_instruction *insns;
610 const struct tgsi_token *tokens;
611 struct nv50_ir_prog_info *info;
612
613 nv50_ir::DynArray tempArrays;
614 nv50_ir::DynArray immdArrays;
615 int tempArrayCount;
616 int immdArrayCount;
617
618 bool mainTempsInLMem;
619
620 int clipVertexOutput;
621
622 struct TextureView {
623 uint8_t target; // TGSI_TEXTURE_*
624 };
625 std::vector<TextureView> textureViews;
626
627 struct Resource {
628 uint8_t target; // TGSI_TEXTURE_*
629 bool raw;
630 uint8_t slot; // $surface index
631 };
632 std::vector<Resource> resources;
633
634 private:
635 int inferSysValDirection(unsigned sn) const;
636 bool scanDeclaration(const struct tgsi_full_declaration *);
637 bool scanInstruction(const struct tgsi_full_instruction *);
638 void scanProperty(const struct tgsi_full_property *);
639 void scanImmediate(const struct tgsi_full_immediate *);
640
641 inline bool isEdgeFlagPassthrough(const Instruction&) const;
642 };
643
644 Source::Source(struct nv50_ir_prog_info *prog) : info(prog)
645 {
646 tokens = (const struct tgsi_token *)info->bin.source;
647
648 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
649 tgsi_dump(tokens, 0);
650
651 mainTempsInLMem = FALSE;
652 }
653
654 Source::~Source()
655 {
656 if (insns)
657 FREE(insns);
658
659 if (info->immd.data)
660 FREE(info->immd.data);
661 if (info->immd.type)
662 FREE(info->immd.type);
663 }
664
665 bool Source::scanSource()
666 {
667 unsigned insnCount = 0;
668 struct tgsi_parse_context parse;
669
670 tgsi_scan_shader(tokens, &scan);
671
672 insns = (struct tgsi_full_instruction *)MALLOC(scan.num_instructions *
673 sizeof(insns[0]));
674 if (!insns)
675 return false;
676
677 clipVertexOutput = -1;
678
679 textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);
680 resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
681
682 info->immd.bufSize = 0;
683 tempArrayCount = 0;
684 immdArrayCount = 0;
685
686 info->numInputs = scan.file_max[TGSI_FILE_INPUT] + 1;
687 info->numOutputs = scan.file_max[TGSI_FILE_OUTPUT] + 1;
688 info->numSysVals = scan.file_max[TGSI_FILE_SYSTEM_VALUE] + 1;
689
690 if (info->type == PIPE_SHADER_FRAGMENT) {
691 info->prop.fp.writesDepth = scan.writes_z;
692 info->prop.fp.usesDiscard = scan.uses_kill;
693 } else
694 if (info->type == PIPE_SHADER_GEOMETRY) {
695 info->prop.gp.instanceCount = 1; // default value
696 }
697
698 info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16);
699 info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte));
700
701 tgsi_parse_init(&parse, tokens);
702 while (!tgsi_parse_end_of_tokens(&parse)) {
703 tgsi_parse_token(&parse);
704
705 switch (parse.FullToken.Token.Type) {
706 case TGSI_TOKEN_TYPE_IMMEDIATE:
707 scanImmediate(&parse.FullToken.FullImmediate);
708 break;
709 case TGSI_TOKEN_TYPE_DECLARATION:
710 scanDeclaration(&parse.FullToken.FullDeclaration);
711 break;
712 case TGSI_TOKEN_TYPE_INSTRUCTION:
713 insns[insnCount++] = parse.FullToken.FullInstruction;
714 scanInstruction(&parse.FullToken.FullInstruction);
715 break;
716 case TGSI_TOKEN_TYPE_PROPERTY:
717 scanProperty(&parse.FullToken.FullProperty);
718 break;
719 default:
720 INFO("unknown TGSI token type: %d\n", parse.FullToken.Token.Type);
721 break;
722 }
723 }
724 tgsi_parse_free(&parse);
725
726 if (mainTempsInLMem)
727 info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16;
728
729 if (info->io.genUserClip > 0) {
730 info->io.clipDistanceMask = (1 << info->io.genUserClip) - 1;
731
732 const unsigned int nOut = (info->io.genUserClip + 3) / 4;
733
734 for (unsigned int n = 0; n < nOut; ++n) {
735 unsigned int i = info->numOutputs++;
736 info->out[i].id = i;
737 info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
738 info->out[i].si = n;
739 info->out[i].mask = info->io.clipDistanceMask >> (n * 4);
740 }
741 }
742
743 return info->assignSlots(info) == 0;
744 }
745
746 void Source::scanProperty(const struct tgsi_full_property *prop)
747 {
748 switch (prop->Property.PropertyName) {
749 case TGSI_PROPERTY_GS_OUTPUT_PRIM:
750 info->prop.gp.outputPrim = prop->u[0].Data;
751 break;
752 case TGSI_PROPERTY_GS_INPUT_PRIM:
753 info->prop.gp.inputPrim = prop->u[0].Data;
754 break;
755 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
756 info->prop.gp.maxVertices = prop->u[0].Data;
757 break;
758 #if 0
759 case TGSI_PROPERTY_GS_INSTANCE_COUNT:
760 info->prop.gp.instanceCount = prop->u[0].Data;
761 break;
762 #endif
763 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
764 info->prop.fp.separateFragData = TRUE;
765 break;
766 case TGSI_PROPERTY_FS_COORD_ORIGIN:
767 case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
768 // we don't care
769 break;
770 case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
771 info->io.genUserClip = -1;
772 break;
773 default:
774 INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);
775 break;
776 }
777 }
778
779 void Source::scanImmediate(const struct tgsi_full_immediate *imm)
780 {
781 const unsigned n = info->immd.count++;
782
783 assert(n < scan.immediate_count);
784
785 for (int c = 0; c < 4; ++c)
786 info->immd.data[n * 4 + c] = imm->u[c].Uint;
787
788 info->immd.type[n] = imm->Immediate.DataType;
789 }
790
791 int Source::inferSysValDirection(unsigned sn) const
792 {
793 switch (sn) {
794 case TGSI_SEMANTIC_INSTANCEID:
795 case TGSI_SEMANTIC_VERTEXID:
796 return 1;
797 #if 0
798 case TGSI_SEMANTIC_LAYER:
799 case TGSI_SEMANTIC_VIEWPORTINDEX:
800 return 0;
801 #endif
802 case TGSI_SEMANTIC_PRIMID:
803 return (info->type == PIPE_SHADER_FRAGMENT) ? 1 : 0;
804 default:
805 return 0;
806 }
807 }
808
809 bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
810 {
811 unsigned i;
812 unsigned sn = TGSI_SEMANTIC_GENERIC;
813 unsigned si = 0;
814 const unsigned first = decl->Range.First, last = decl->Range.Last;
815
816 if (decl->Declaration.Semantic) {
817 sn = decl->Semantic.Name;
818 si = decl->Semantic.Index;
819 }
820
821 switch (decl->Declaration.File) {
822 case TGSI_FILE_INPUT:
823 if (info->type == PIPE_SHADER_VERTEX) {
824 // all vertex attributes are equal
825 for (i = first; i <= last; ++i) {
826 info->in[i].sn = TGSI_SEMANTIC_GENERIC;
827 info->in[i].si = i;
828 }
829 } else {
830 for (i = first; i <= last; ++i, ++si) {
831 info->in[i].id = i;
832 info->in[i].sn = sn;
833 info->in[i].si = si;
834 if (info->type == PIPE_SHADER_FRAGMENT) {
835 // translate interpolation mode
836 switch (decl->Interp.Interpolate) {
837 case TGSI_INTERPOLATE_CONSTANT:
838 info->in[i].flat = 1;
839 break;
840 case TGSI_INTERPOLATE_COLOR:
841 info->in[i].sc = 1;
842 break;
843 case TGSI_INTERPOLATE_LINEAR:
844 info->in[i].linear = 1;
845 break;
846 default:
847 break;
848 }
849 if (decl->Interp.Centroid)
850 info->in[i].centroid = 1;
851 }
852 }
853 }
854 break;
855 case TGSI_FILE_OUTPUT:
856 for (i = first; i <= last; ++i, ++si) {
857 switch (sn) {
858 case TGSI_SEMANTIC_POSITION:
859 if (info->type == PIPE_SHADER_FRAGMENT)
860 info->io.fragDepth = i;
861 else
862 if (clipVertexOutput < 0)
863 clipVertexOutput = i;
864 break;
865 case TGSI_SEMANTIC_COLOR:
866 if (info->type == PIPE_SHADER_FRAGMENT)
867 info->prop.fp.numColourResults++;
868 break;
869 case TGSI_SEMANTIC_EDGEFLAG:
870 info->io.edgeFlagOut = i;
871 break;
872 case TGSI_SEMANTIC_CLIPVERTEX:
873 clipVertexOutput = i;
874 break;
875 case TGSI_SEMANTIC_CLIPDIST:
876 info->io.clipDistanceMask |=
877 decl->Declaration.UsageMask << (si * 4);
878 info->io.genUserClip = -1;
879 break;
880 default:
881 break;
882 }
883 info->out[i].id = i;
884 info->out[i].sn = sn;
885 info->out[i].si = si;
886 }
887 break;
888 case TGSI_FILE_SYSTEM_VALUE:
889 switch (sn) {
890 case TGSI_SEMANTIC_INSTANCEID:
891 info->io.instanceId = first;
892 break;
893 case TGSI_SEMANTIC_VERTEXID:
894 info->io.vertexId = first;
895 break;
896 default:
897 break;
898 }
899 for (i = first; i <= last; ++i, ++si) {
900 info->sv[i].sn = sn;
901 info->sv[i].si = si;
902 info->sv[i].input = inferSysValDirection(sn);
903 }
904 break;
905 case TGSI_FILE_RESOURCE:
906 for (i = first; i <= last; ++i) {
907 resources[i].target = decl->Resource.Resource;
908 resources[i].raw = decl->Resource.Raw;
909 resources[i].slot = i;
910 }
911 break;
912 case TGSI_FILE_SAMPLER_VIEW:
913 for (i = first; i <= last; ++i)
914 textureViews[i].target = decl->SamplerView.Resource;
915 break;
916 case TGSI_FILE_IMMEDIATE_ARRAY:
917 {
918 if (decl->Dim.Index2D >= immdArrayCount)
919 immdArrayCount = decl->Dim.Index2D + 1;
920 immdArrays[decl->Dim.Index2D].u32 = (last + 1) << 2;
921 int c;
922 uint32_t base, count;
923 switch (decl->Declaration.UsageMask) {
924 case 0x1: c = 1; break;
925 case 0x3: c = 2; break;
926 default:
927 c = 4;
928 break;
929 }
930 immdArrays[decl->Dim.Index2D].u32 |= c;
931 count = (last + 1) * c;
932 base = info->immd.bufSize / 4;
933 info->immd.bufSize = (info->immd.bufSize + count * 4 + 0xf) & ~0xf;
934 info->immd.buf = (uint32_t *)REALLOC(info->immd.buf, base * 4,
935 info->immd.bufSize);
936 // NOTE: this assumes array declarations are ordered by Dim.Index2D
937 for (i = 0; i < count; ++i)
938 info->immd.buf[base + i] = decl->ImmediateData.u[i].Uint;
939 }
940 break;
941 case TGSI_FILE_TEMPORARY_ARRAY:
942 {
943 if (decl->Dim.Index2D >= tempArrayCount)
944 tempArrayCount = decl->Dim.Index2D + 1;
945 tempArrays[decl->Dim.Index2D].u32 = (last + 1) << 2;
946 int c;
947 uint32_t count;
948 switch (decl->Declaration.UsageMask) {
949 case 0x1: c = 1; break;
950 case 0x3: c = 2; break;
951 default:
952 c = 4;
953 break;
954 }
955 tempArrays[decl->Dim.Index2D].u32 |= c;
956 count = (last + 1) * c;
957 info->bin.tlsSpace += (info->bin.tlsSpace + count * 4 + 0xf) & ~0xf;
958 }
959 break;
960 case TGSI_FILE_NULL:
961 case TGSI_FILE_TEMPORARY:
962 case TGSI_FILE_ADDRESS:
963 case TGSI_FILE_CONSTANT:
964 case TGSI_FILE_IMMEDIATE:
965 case TGSI_FILE_PREDICATE:
966 case TGSI_FILE_SAMPLER:
967 break;
968 default:
969 ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
970 return false;
971 }
972 return true;
973 }
974
975 inline bool Source::isEdgeFlagPassthrough(const Instruction& insn) const
976 {
977 return insn.getOpcode() == TGSI_OPCODE_MOV &&
978 insn.getDst(0).getIndex(0) == info->io.edgeFlagOut &&
979 insn.getSrc(0).getFile() == TGSI_FILE_INPUT;
980 }
981
982 bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
983 {
984 Instruction insn(inst);
985
986 if (insn.dstCount()) {
987 if (insn.getDst(0).getFile() == TGSI_FILE_OUTPUT) {
988 Instruction::DstRegister dst = insn.getDst(0);
989
990 if (dst.isIndirect(0))
991 for (unsigned i = 0; i < info->numOutputs; ++i)
992 info->out[i].mask = 0xf;
993 else
994 info->out[dst.getIndex(0)].mask |= dst.getMask();
995
996 if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE)
997 info->out[dst.getIndex(0)].mask &= 1;
998
999 if (isEdgeFlagPassthrough(insn))
1000 info->io.edgeFlagIn = insn.getSrc(0).getIndex(0);
1001 } else
1002 if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) {
1003 if (insn.getDst(0).isIndirect(0))
1004 mainTempsInLMem = TRUE;
1005 }
1006 }
1007
1008 for (unsigned s = 0; s < insn.srcCount(); ++s) {
1009 Instruction::SrcRegister src = insn.getSrc(s);
1010 if (src.getFile() == TGSI_FILE_TEMPORARY) {
1011 if (src.isIndirect(0))
1012 mainTempsInLMem = TRUE;
1013 } else
1014 if (src.getFile() == TGSI_FILE_RESOURCE) {
1015 if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL)
1016 info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
1017 0x1 : 0x2;
1018 }
1019 if (src.getFile() != TGSI_FILE_INPUT)
1020 continue;
1021 unsigned mask = insn.srcMask(s);
1022
1023 if (src.isIndirect(0)) {
1024 for (unsigned i = 0; i < info->numInputs; ++i)
1025 info->in[i].mask = 0xf;
1026 } else {
1027 for (unsigned c = 0; c < 4; ++c) {
1028 if (!(mask & (1 << c)))
1029 continue;
1030 int k = src.getSwizzle(c);
1031 int i = src.getIndex(0);
1032 if (info->in[i].sn != TGSI_SEMANTIC_FOG || k == TGSI_SWIZZLE_X)
1033 if (k <= TGSI_SWIZZLE_W)
1034 info->in[i].mask |= 1 << k;
1035 }
1036 }
1037 }
1038 return true;
1039 }
1040
1041 nv50_ir::TexInstruction::Target
1042 Instruction::getTexture(const tgsi::Source *code, int s) const
1043 {
1044 // XXX: indirect access
1045 unsigned int r;
1046
1047 switch (getSrc(s).getFile()) {
1048 case TGSI_FILE_RESOURCE:
1049 r = getSrc(s).getIndex(0);
1050 return translateTexture(code->resources.at(r).target);
1051 case TGSI_FILE_SAMPLER_VIEW:
1052 r = getSrc(s).getIndex(0);
1053 return translateTexture(code->textureViews.at(r).target);
1054 default:
1055 return translateTexture(insn->Texture.Texture);
1056 }
1057 }
1058
1059 } // namespace tgsi
1060
1061 namespace {
1062
1063 using namespace nv50_ir;
1064
1065 class Converter : public BuildUtil
1066 {
1067 public:
1068 Converter(Program *, const tgsi::Source *);
1069 ~Converter();
1070
1071 bool run();
1072
1073 private:
1074 struct Subroutine
1075 {
1076 Subroutine(Function *f) : f(f) { }
1077 Function *f;
1078 ValueMap values;
1079 };
1080
1081 Value *getVertexBase(int s);
1082 DataArray *getArrayForFile(unsigned file, int idx);
1083 Value *fetchSrc(int s, int c);
1084 Value *acquireDst(int d, int c);
1085 void storeDst(int d, int c, Value *);
1086
1087 Value *fetchSrc(const tgsi::Instruction::SrcRegister src, int c, Value *ptr);
1088 void storeDst(const tgsi::Instruction::DstRegister dst, int c,
1089 Value *val, Value *ptr);
1090
1091 Value *applySrcMod(Value *, int s, int c);
1092
1093 Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr);
1094 Symbol *srcToSym(tgsi::Instruction::SrcRegister, int c);
1095 Symbol *dstToSym(tgsi::Instruction::DstRegister, int c);
1096
1097 bool handleInstruction(const struct tgsi_full_instruction *);
1098 void exportOutputs();
1099 inline Subroutine *getSubroutine(unsigned ip);
1100 inline Subroutine *getSubroutine(Function *);
1101 inline bool isEndOfSubroutine(uint ip);
1102
1103 void loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask);
1104
1105 // R,S,L,C,Dx,Dy encode TGSI sources for respective values (0xSf for auto)
1106 void setTexRS(TexInstruction *, unsigned int& s, int R, int S);
1107 void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy);
1108 void handleTXF(Value *dst0[4], int R);
1109 void handleTXQ(Value *dst0[4], enum TexQuery);
1110 void handleLIT(Value *dst0[4]);
1111 void handleUserClipPlanes();
1112
1113 Symbol *getResourceBase(int r);
1114 void getResourceCoords(std::vector<Value *>&, int r, int s);
1115
1116 void handleLOAD(Value *dst0[4]);
1117 void handleSTORE();
1118
1119 Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr);
1120
1121 void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
1122
1123 Value *buildDot(int dim);
1124
1125 class BindArgumentsPass : public Pass {
1126 public:
1127 BindArgumentsPass(Converter &conv) : conv(conv) { }
1128
1129 private:
1130 Converter &conv;
1131 Subroutine *sub;
1132
1133 template<typename T> inline void
1134 updateCallArgs(Instruction *i, void (Instruction::*setArg)(int, Value *),
1135 T (Function::*proto));
1136
1137 template<typename T> inline void
1138 updatePrototype(BitSet *set, void (Function::*updateSet)(),
1139 T (Function::*proto));
1140
1141 protected:
1142 bool visit(Function *);
1143 bool visit(BasicBlock *bb) { return false; }
1144 };
1145
1146 private:
1147 const struct tgsi::Source *code;
1148 const struct nv50_ir_prog_info *info;
1149
1150 struct {
1151 std::map<unsigned, Subroutine> map;
1152 Subroutine *cur;
1153 } sub;
1154
1155 uint ip; // instruction pointer
1156
1157 tgsi::Instruction tgsi;
1158
1159 DataType dstTy;
1160 DataType srcTy;
1161
1162 DataArray tData; // TGSI_FILE_TEMPORARY
1163 DataArray aData; // TGSI_FILE_ADDRESS
1164 DataArray pData; // TGSI_FILE_PREDICATE
1165 DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers)
1166 std::vector<DataArray> lData; // TGSI_FILE_TEMPORARY_ARRAY
1167 std::vector<DataArray> iData; // TGSI_FILE_IMMEDIATE_ARRAY
1168
1169 Value *zero;
1170 Value *fragCoord[4];
1171 Value *clipVtx[4];
1172
1173 Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP)
1174 uint8_t vtxBaseValid;
1175
1176 Stack condBBs; // fork BB, then else clause BB
1177 Stack joinBBs; // fork BB, for inserting join ops on ENDIF
1178 Stack loopBBs; // loop headers
1179 Stack breakBBs; // end of / after loop
1180 };
1181
1182 Symbol *
1183 Converter::srcToSym(tgsi::Instruction::SrcRegister src, int c)
1184 {
1185 const int swz = src.getSwizzle(c);
1186
1187 return makeSym(src.getFile(),
1188 src.is2D() ? src.getIndex(1) : 0,
1189 src.isIndirect(0) ? -1 : src.getIndex(0), swz,
1190 src.getIndex(0) * 16 + swz * 4);
1191 }
1192
1193 Symbol *
1194 Converter::dstToSym(tgsi::Instruction::DstRegister dst, int c)
1195 {
1196 return makeSym(dst.getFile(),
1197 dst.is2D() ? dst.getIndex(1) : 0,
1198 dst.isIndirect(0) ? -1 : dst.getIndex(0), c,
1199 dst.getIndex(0) * 16 + c * 4);
1200 }
1201
1202 Symbol *
1203 Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address)
1204 {
1205 Symbol *sym = new_Symbol(prog, tgsi::translateFile(tgsiFile));
1206
1207 sym->reg.fileIndex = fileIdx;
1208
1209 if (idx >= 0) {
1210 if (sym->reg.file == FILE_SHADER_INPUT)
1211 sym->setOffset(info->in[idx].slot[c] * 4);
1212 else
1213 if (sym->reg.file == FILE_SHADER_OUTPUT)
1214 sym->setOffset(info->out[idx].slot[c] * 4);
1215 else
1216 if (sym->reg.file == FILE_SYSTEM_VALUE)
1217 sym->setSV(tgsi::translateSysVal(info->sv[idx].sn), c);
1218 else
1219 sym->setOffset(address);
1220 } else {
1221 sym->setOffset(address);
1222 }
1223 return sym;
1224 }
1225
1226 static inline uint8_t
1227 translateInterpMode(const struct nv50_ir_varying *var, operation& op)
1228 {
1229 uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
1230
1231 if (var->flat)
1232 mode = NV50_IR_INTERP_FLAT;
1233 else
1234 if (var->linear)
1235 mode = NV50_IR_INTERP_LINEAR;
1236 else
1237 if (var->sc)
1238 mode = NV50_IR_INTERP_SC;
1239
1240 op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC)
1241 ? OP_PINTERP : OP_LINTERP;
1242
1243 if (var->centroid)
1244 mode |= NV50_IR_INTERP_CENTROID;
1245
1246 return mode;
1247 }
1248
1249 Value *
1250 Converter::interpolate(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
1251 {
1252 operation op;
1253
1254 // XXX: no way to know interpolation mode if we don't know what's accessed
1255 const uint8_t mode = translateInterpMode(&info->in[ptr ? 0 :
1256 src.getIndex(0)], op);
1257
1258 Instruction *insn = new_Instruction(func, op, TYPE_F32);
1259
1260 insn->setDef(0, getScratch());
1261 insn->setSrc(0, srcToSym(src, c));
1262 if (op == OP_PINTERP)
1263 insn->setSrc(1, fragCoord[3]);
1264 if (ptr)
1265 insn->setIndirect(0, 0, ptr);
1266
1267 insn->setInterpolate(mode);
1268
1269 bb->insertTail(insn);
1270 return insn->getDef(0);
1271 }
1272
1273 Value *
1274 Converter::applySrcMod(Value *val, int s, int c)
1275 {
1276 Modifier m = tgsi.getSrc(s).getMod(c);
1277 DataType ty = tgsi.inferSrcType();
1278
1279 if (m & Modifier(NV50_IR_MOD_ABS))
1280 val = mkOp1v(OP_ABS, ty, getScratch(), val);
1281
1282 if (m & Modifier(NV50_IR_MOD_NEG))
1283 val = mkOp1v(OP_NEG, ty, getScratch(), val);
1284
1285 return val;
1286 }
1287
1288 Value *
1289 Converter::getVertexBase(int s)
1290 {
1291 assert(s < 5);
1292 if (!(vtxBaseValid & (1 << s))) {
1293 const int index = tgsi.getSrc(s).getIndex(1);
1294 Value *rel = NULL;
1295 if (tgsi.getSrc(s).isIndirect(1))
1296 rel = fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL);
1297 vtxBaseValid |= 1 << s;
1298 vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(), mkImm(index), rel);
1299 }
1300 return vtxBase[s];
1301 }
1302
1303 Value *
1304 Converter::fetchSrc(int s, int c)
1305 {
1306 Value *res;
1307 Value *ptr = NULL, *dimRel = NULL;
1308
1309 tgsi::Instruction::SrcRegister src = tgsi.getSrc(s);
1310
1311 if (src.isIndirect(0))
1312 ptr = fetchSrc(src.getIndirect(0), 0, NULL);
1313
1314 if (src.is2D()) {
1315 switch (src.getFile()) {
1316 case TGSI_FILE_INPUT:
1317 dimRel = getVertexBase(s);
1318 break;
1319 case TGSI_FILE_CONSTANT:
1320 // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k]
1321 if (src.isIndirect(1))
1322 dimRel = fetchSrc(src.getIndirect(1), 0, 0);
1323 break;
1324 default:
1325 break;
1326 }
1327 }
1328
1329 res = fetchSrc(src, c, ptr);
1330
1331 if (dimRel)
1332 res->getInsn()->setIndirect(0, 1, dimRel);
1333
1334 return applySrcMod(res, s, c);
1335 }
1336
1337 Converter::DataArray *
1338 Converter::getArrayForFile(unsigned file, int idx)
1339 {
1340 switch (file) {
1341 case TGSI_FILE_TEMPORARY:
1342 return &tData;
1343 case TGSI_FILE_PREDICATE:
1344 return &pData;
1345 case TGSI_FILE_ADDRESS:
1346 return &aData;
1347 case TGSI_FILE_TEMPORARY_ARRAY:
1348 assert(idx < code->tempArrayCount);
1349 return &lData[idx];
1350 case TGSI_FILE_IMMEDIATE_ARRAY:
1351 assert(idx < code->immdArrayCount);
1352 return &iData[idx];
1353 case TGSI_FILE_OUTPUT:
1354 assert(prog->getType() == Program::TYPE_FRAGMENT);
1355 return &oData;
1356 default:
1357 assert(!"invalid/unhandled TGSI source file");
1358 return NULL;
1359 }
1360 }
1361
1362 Value *
1363 Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
1364 {
1365 const int idx2d = src.is2D() ? src.getIndex(1) : 0;
1366 const int idx = src.getIndex(0);
1367 const int swz = src.getSwizzle(c);
1368
1369 switch (src.getFile()) {
1370 case TGSI_FILE_IMMEDIATE:
1371 assert(!ptr);
1372 return loadImm(NULL, info->immd.data[idx * 4 + swz]);
1373 case TGSI_FILE_CONSTANT:
1374 return mkLoadv(TYPE_U32, srcToSym(src, c), ptr);
1375 case TGSI_FILE_INPUT:
1376 if (prog->getType() == Program::TYPE_FRAGMENT) {
1377 // don't load masked inputs, won't be assigned a slot
1378 if (!ptr && !(info->in[idx].mask & (1 << swz)))
1379 return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f);
1380 if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE)
1381 return mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_FACE, 0));
1382 return interpolate(src, c, ptr);
1383 }
1384 return mkLoadv(TYPE_U32, srcToSym(src, c), ptr);
1385 case TGSI_FILE_OUTPUT:
1386 assert(!"load from output file");
1387 return NULL;
1388 case TGSI_FILE_SYSTEM_VALUE:
1389 assert(!ptr);
1390 return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
1391 default:
1392 return getArrayForFile(src.getFile(), idx2d)->load(
1393 sub.cur->values, idx, swz, ptr);
1394 }
1395 }
1396
1397 Value *
1398 Converter::acquireDst(int d, int c)
1399 {
1400 const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
1401 const unsigned f = dst.getFile();
1402 const int idx = dst.getIndex(0);
1403 const int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
1404
1405 if (dst.isMasked(c) || f == TGSI_FILE_RESOURCE)
1406 return NULL;
1407
1408 if (dst.isIndirect(0) ||
1409 f == TGSI_FILE_TEMPORARY_ARRAY ||
1410 f == TGSI_FILE_SYSTEM_VALUE ||
1411 (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT))
1412 return getScratch();
1413
1414 return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c);
1415 }
1416
1417 void
1418 Converter::storeDst(int d, int c, Value *val)
1419 {
1420 const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
1421
1422 switch (tgsi.getSaturate()) {
1423 case TGSI_SAT_NONE:
1424 break;
1425 case TGSI_SAT_ZERO_ONE:
1426 mkOp1(OP_SAT, dstTy, val, val);
1427 break;
1428 case TGSI_SAT_MINUS_PLUS_ONE:
1429 mkOp2(OP_MAX, dstTy, val, val, mkImm(-1.0f));
1430 mkOp2(OP_MIN, dstTy, val, val, mkImm(+1.0f));
1431 break;
1432 default:
1433 assert(!"invalid saturation mode");
1434 break;
1435 }
1436
1437 Value *ptr = dst.isIndirect(0) ?
1438 fetchSrc(dst.getIndirect(0), 0, NULL) : NULL;
1439
1440 if (info->io.genUserClip > 0 &&
1441 dst.getFile() == TGSI_FILE_OUTPUT &&
1442 !dst.isIndirect(0) && dst.getIndex(0) == code->clipVertexOutput) {
1443 mkMov(clipVtx[c], val);
1444 val = clipVtx[c];
1445 }
1446
1447 storeDst(dst, c, val, ptr);
1448 }
1449
1450 void
1451 Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,
1452 Value *val, Value *ptr)
1453 {
1454 const unsigned f = dst.getFile();
1455 const int idx = dst.getIndex(0);
1456 const int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
1457
1458 if (f == TGSI_FILE_SYSTEM_VALUE) {
1459 assert(!ptr);
1460 mkOp2(OP_WRSV, TYPE_U32, NULL, dstToSym(dst, c), val);
1461 } else
1462 if (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT) {
1463 if (ptr || (info->out[idx].mask & (1 << c)))
1464 mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val);
1465 } else
1466 if (f == TGSI_FILE_TEMPORARY ||
1467 f == TGSI_FILE_TEMPORARY_ARRAY ||
1468 f == TGSI_FILE_PREDICATE ||
1469 f == TGSI_FILE_ADDRESS ||
1470 f == TGSI_FILE_OUTPUT) {
1471 getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val);
1472 } else {
1473 assert(!"invalid dst file");
1474 }
1475 }
1476
1477 #define FOR_EACH_DST_ENABLED_CHANNEL(d, chan, inst) \
1478 for (chan = 0; chan < 4; ++chan) \
1479 if (!inst.getDst(d).isMasked(chan))
1480
1481 Value *
1482 Converter::buildDot(int dim)
1483 {
1484 assert(dim > 0);
1485
1486 Value *src0 = fetchSrc(0, 0), *src1 = fetchSrc(1, 0);
1487 Value *dotp = getScratch();
1488
1489 mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1);
1490
1491 for (int c = 1; c < dim; ++c) {
1492 src0 = fetchSrc(0, c);
1493 src1 = fetchSrc(1, c);
1494 mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp);
1495 }
1496 return dotp;
1497 }
1498
1499 void
1500 Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork)
1501 {
1502 FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL);
1503 join->fixed = 1;
1504 conv->insertHead(join);
1505
1506 fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv);
1507 fork->insertBefore(fork->getExit(), fork->joinAt);
1508 }
1509
1510 void
1511 Converter::setTexRS(TexInstruction *tex, unsigned int& s, int R, int S)
1512 {
1513 unsigned rIdx = 0, sIdx = 0;
1514
1515 if (R >= 0)
1516 rIdx = tgsi.getSrc(R).getIndex(0);
1517 if (S >= 0)
1518 sIdx = tgsi.getSrc(S).getIndex(0);
1519
1520 tex->setTexture(tgsi.getTexture(code, R), rIdx, sIdx);
1521
1522 if (tgsi.getSrc(R).isIndirect(0)) {
1523 tex->tex.rIndirectSrc = s;
1524 tex->setSrc(s++, fetchSrc(tgsi.getSrc(R).getIndirect(0), 0, NULL));
1525 }
1526 if (S >= 0 && tgsi.getSrc(S).isIndirect(0)) {
1527 tex->tex.sIndirectSrc = s;
1528 tex->setSrc(s++, fetchSrc(tgsi.getSrc(S).getIndirect(0), 0, NULL));
1529 }
1530 }
1531
1532 void
1533 Converter::handleTXQ(Value *dst0[4], enum TexQuery query)
1534 {
1535 TexInstruction *tex = new_TexInstruction(func, OP_TXQ);
1536 tex->tex.query = query;
1537 unsigned int c, d;
1538
1539 for (d = 0, c = 0; c < 4; ++c) {
1540 if (!dst0[c])
1541 continue;
1542 tex->tex.mask |= 1 << c;
1543 tex->setDef(d++, dst0[c]);
1544 }
1545 tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level
1546
1547 setTexRS(tex, c, 1, -1);
1548
1549 bb->insertTail(tex);
1550 }
1551
1552 void
1553 Converter::loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask)
1554 {
1555 Value *proj = fetchSrc(0, 3);
1556 Instruction *insn = proj->getUniqueInsn();
1557 int c;
1558
1559 if (insn->op == OP_PINTERP) {
1560 bb->insertTail(insn = cloneForward(func, insn));
1561 insn->op = OP_LINTERP;
1562 insn->setInterpolate(NV50_IR_INTERP_LINEAR | insn->getSampleMode());
1563 insn->setSrc(1, NULL);
1564 proj = insn->getDef(0);
1565 }
1566 proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), proj);
1567
1568 for (c = 0; c < 4; ++c) {
1569 if (!(mask & (1 << c)))
1570 continue;
1571 if ((insn = src[c]->getUniqueInsn())->op != OP_PINTERP)
1572 continue;
1573 mask &= ~(1 << c);
1574
1575 bb->insertTail(insn = cloneForward(func, insn));
1576 insn->setInterpolate(NV50_IR_INTERP_PERSPECTIVE | insn->getSampleMode());
1577 insn->setSrc(1, proj);
1578 dst[c] = insn->getDef(0);
1579 }
1580 if (!mask)
1581 return;
1582
1583 proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), fetchSrc(0, 3));
1584
1585 for (c = 0; c < 4; ++c)
1586 if (mask & (1 << c))
1587 dst[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), src[c], proj);
1588 }
1589
1590 // order of nv50 ir sources: x y z layer lod/bias shadow
1591 // order of TGSI TEX sources: x y z layer shadow lod/bias
1592 // lowering will finally set the hw specific order (like array first on nvc0)
1593 void
1594 Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy)
1595 {
1596 Value *val;
1597 Value *arg[4], *src[8];
1598 Value *lod = NULL, *shd = NULL;
1599 unsigned int s, c, d;
1600 TexInstruction *texi = new_TexInstruction(func, tgsi.getOP());
1601
1602 TexInstruction::Target tgt = tgsi.getTexture(code, R);
1603
1604 for (s = 0; s < tgt.getArgCount(); ++s)
1605 arg[s] = src[s] = fetchSrc(0, s);
1606
1607 if (texi->op == OP_TXL || texi->op == OP_TXB)
1608 lod = fetchSrc(L >> 4, L & 3);
1609
1610 if (C == 0x0f)
1611 C = 0x00 | MAX2(tgt.getArgCount(), 2); // guess DC src
1612
1613 if (tgt.isShadow())
1614 shd = fetchSrc(C >> 4, C & 3);
1615
1616 if (texi->op == OP_TXD) {
1617 for (c = 0; c < tgt.getDim(); ++c) {
1618 texi->dPdx[c].set(fetchSrc(Dx >> 4, (Dx & 3) + c));
1619 texi->dPdy[c].set(fetchSrc(Dy >> 4, (Dy & 3) + c));
1620 }
1621 }
1622
1623 // cube textures don't care about projection value, it's divided out
1624 if (tgsi.getOpcode() == TGSI_OPCODE_TXP && !tgt.isCube() && !tgt.isArray()) {
1625 unsigned int n = tgt.getDim();
1626 if (shd) {
1627 arg[n] = shd;
1628 ++n;
1629 assert(tgt.getDim() == tgt.getArgCount());
1630 }
1631 loadProjTexCoords(src, arg, (1 << n) - 1);
1632 if (shd)
1633 shd = src[n - 1];
1634 }
1635
1636 if (tgt.isCube()) {
1637 for (c = 0; c < 3; ++c)
1638 src[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]);
1639 val = getScratch();
1640 mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
1641 mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
1642 mkOp1(OP_RCP, TYPE_F32, val, val);
1643 for (c = 0; c < 3; ++c)
1644 src[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val);
1645 }
1646
1647 for (c = 0, d = 0; c < 4; ++c) {
1648 if (dst[c]) {
1649 texi->setDef(d++, dst[c]);
1650 texi->tex.mask |= 1 << c;
1651 } else {
1652 // NOTE: maybe hook up def too, for CSE
1653 }
1654 }
1655 for (s = 0; s < tgt.getArgCount(); ++s)
1656 texi->setSrc(s, src[s]);
1657 if (lod)
1658 texi->setSrc(s++, lod);
1659 if (shd)
1660 texi->setSrc(s++, shd);
1661
1662 setTexRS(texi, s, R, S);
1663
1664 if (tgsi.getOpcode() == TGSI_OPCODE_SAMPLE_C_LZ)
1665 texi->tex.levelZero = true;
1666
1667 bb->insertTail(texi);
1668 }
1669
1670 // 1st source: xyz = coordinates, w = lod
1671 // 2nd source: offset
1672 void
1673 Converter::handleTXF(Value *dst[4], int R)
1674 {
1675 TexInstruction *texi = new_TexInstruction(func, tgsi.getOP());
1676 unsigned int c, d, s;
1677
1678 texi->tex.target = tgsi.getTexture(code, R);
1679
1680 for (c = 0, d = 0; c < 4; ++c) {
1681 if (dst[c]) {
1682 texi->setDef(d++, dst[c]);
1683 texi->tex.mask |= 1 << c;
1684 }
1685 }
1686 for (c = 0; c < texi->tex.target.getArgCount(); ++c)
1687 texi->setSrc(c, fetchSrc(0, c));
1688 texi->setSrc(c++, fetchSrc(0, 3)); // lod
1689
1690 setTexRS(texi, c, R, -1);
1691
1692 for (s = 0; s < tgsi.getNumTexOffsets(); ++s) {
1693 for (c = 0; c < 3; ++c) {
1694 texi->tex.offset[s][c] = tgsi.getTexOffset(s).getValueU32(c, info);
1695 if (texi->tex.offset[s][c])
1696 texi->tex.useOffsets = s + 1;
1697 }
1698 }
1699
1700 bb->insertTail(texi);
1701 }
1702
1703 void
1704 Converter::handleLIT(Value *dst0[4])
1705 {
1706 Value *val0 = NULL;
1707 unsigned int mask = tgsi.getDst(0).getMask();
1708
1709 if (mask & (1 << 0))
1710 loadImm(dst0[0], 1.0f);
1711
1712 if (mask & (1 << 3))
1713 loadImm(dst0[3], 1.0f);
1714
1715 if (mask & (3 << 1)) {
1716 val0 = getScratch();
1717 mkOp2(OP_MAX, TYPE_F32, val0, fetchSrc(0, 0), zero);
1718 if (mask & (1 << 1))
1719 mkMov(dst0[1], val0);
1720 }
1721
1722 if (mask & (1 << 2)) {
1723 Value *src1 = fetchSrc(0, 1), *src3 = fetchSrc(0, 3);
1724 Value *val1 = getScratch(), *val3 = getScratch();
1725
1726 Value *pos128 = loadImm(NULL, +127.999999f);
1727 Value *neg128 = loadImm(NULL, -127.999999f);
1728
1729 mkOp2(OP_MAX, TYPE_F32, val1, src1, zero);
1730 mkOp2(OP_MAX, TYPE_F32, val3, src3, neg128);
1731 mkOp2(OP_MIN, TYPE_F32, val3, val3, pos128);
1732 mkOp2(OP_POW, TYPE_F32, val3, val1, val3);
1733
1734 mkCmp(OP_SLCT, CC_GT, TYPE_F32, dst0[2], val3, zero, val0);
1735 }
1736 }
1737
1738 static inline bool
1739 isResourceSpecial(const int r)
1740 {
1741 return (r == TGSI_RESOURCE_GLOBAL ||
1742 r == TGSI_RESOURCE_LOCAL ||
1743 r == TGSI_RESOURCE_PRIVATE ||
1744 r == TGSI_RESOURCE_INPUT);
1745 }
1746
1747 static inline bool
1748 isResourceRaw(const struct tgsi::Source *code, const int r)
1749 {
1750 return isResourceSpecial(r) || code->resources[r].raw;
1751 }
1752
1753 static inline nv50_ir::TexTarget
1754 getResourceTarget(const struct tgsi::Source *code, int r)
1755 {
1756 if (isResourceSpecial(r))
1757 return nv50_ir::TEX_TARGET_BUFFER;
1758 return tgsi::translateTexture(code->resources.at(r).target);
1759 }
1760
1761 Symbol *
1762 Converter::getResourceBase(const int r)
1763 {
1764 Symbol *sym = NULL;
1765
1766 switch (r) {
1767 case TGSI_RESOURCE_GLOBAL:
1768 sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL, 15);
1769 break;
1770 case TGSI_RESOURCE_LOCAL:
1771 assert(prog->getType() == Program::TYPE_COMPUTE);
1772 sym = mkSymbol(nv50_ir::FILE_MEMORY_SHARED, 0, TYPE_U32,
1773 info->prop.cp.sharedOffset);
1774 break;
1775 case TGSI_RESOURCE_PRIVATE:
1776 sym = mkSymbol(nv50_ir::FILE_MEMORY_LOCAL, 0, TYPE_U32,
1777 info->bin.tlsSpace);
1778 break;
1779 case TGSI_RESOURCE_INPUT:
1780 assert(prog->getType() == Program::TYPE_COMPUTE);
1781 sym = mkSymbol(nv50_ir::FILE_SHADER_INPUT, 0, TYPE_U32,
1782 info->prop.cp.inputOffset);
1783 break;
1784 default:
1785 sym = new_Symbol(prog,
1786 nv50_ir::FILE_MEMORY_GLOBAL, code->resources.at(r).slot);
1787 break;
1788 }
1789 return sym;
1790 }
1791
1792 void
1793 Converter::getResourceCoords(std::vector<Value *> &coords, int r, int s)
1794 {
1795 const int arg =
1796 TexInstruction::Target(getResourceTarget(code, r)).getArgCount();
1797
1798 for (int c = 0; c < arg; ++c)
1799 coords.push_back(fetchSrc(s, c));
1800
1801 // NOTE: TGSI_RESOURCE_GLOBAL needs FILE_GPR; this is an nv50 quirk
1802 if (r == TGSI_RESOURCE_LOCAL ||
1803 r == TGSI_RESOURCE_PRIVATE ||
1804 r == TGSI_RESOURCE_INPUT)
1805 coords[0] = mkOp1v(OP_MOV, TYPE_U32, getScratch(4, FILE_ADDRESS),
1806 coords[0]);
1807 }
1808
1809 static inline int
1810 partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask)
1811 {
1812 int n = 0;
1813
1814 while (mask) {
1815 if (mask & 1) {
1816 size[n]++;
1817 } else {
1818 if (size[n])
1819 comp[n = 1] = size[0] + 1;
1820 else
1821 comp[n]++;
1822 }
1823 mask >>= 1;
1824 }
1825 if (size[0] == 3) {
1826 n = 1;
1827 size[0] = (comp[0] == 1) ? 1 : 2;
1828 size[1] = 3 - size[0];
1829 comp[1] = comp[0] + size[0];
1830 }
1831 return n + 1;
1832 }
1833
1834 // For raw loads, granularity is 4 byte.
1835 // Usage of the texture read mask on OP_SULDP is not allowed.
1836 void
1837 Converter::handleLOAD(Value *dst0[4])
1838 {
1839 const int r = tgsi.getSrc(0).getIndex(0);
1840 int c;
1841 std::vector<Value *> off, src, ldv, def;
1842
1843 getResourceCoords(off, r, 1);
1844
1845 if (isResourceRaw(code, r)) {
1846 uint8_t mask = 0;
1847 uint8_t comp[2] = { 0, 0 };
1848 uint8_t size[2] = { 0, 0 };
1849
1850 Symbol *base = getResourceBase(r);
1851
1852 // determine the base and size of the at most 2 load ops
1853 for (c = 0; c < 4; ++c)
1854 if (!tgsi.getDst(0).isMasked(c))
1855 mask |= 1 << (tgsi.getSrc(0).getSwizzle(c) - TGSI_SWIZZLE_X);
1856
1857 int n = partitionLoadStore(comp, size, mask);
1858
1859 src = off;
1860
1861 def.resize(4); // index by component, the ones we need will be non-NULL
1862 for (c = 0; c < 4; ++c) {
1863 if (dst0[c] && tgsi.getSrc(0).getSwizzle(c) == (TGSI_SWIZZLE_X + c))
1864 def[c] = dst0[c];
1865 else
1866 if (mask & (1 << c))
1867 def[c] = getScratch();
1868 }
1869
1870 const bool useLd = isResourceSpecial(r) ||
1871 (info->io.nv50styleSurfaces &&
1872 code->resources[r].target == TGSI_TEXTURE_BUFFER);
1873
1874 for (int i = 0; i < n; ++i) {
1875 ldv.assign(def.begin() + comp[i], def.begin() + comp[i] + size[i]);
1876
1877 if (comp[i]) // adjust x component of source address if necessary
1878 src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),
1879 off[0], mkImm(comp[i] * 4));
1880 else
1881 src[0] = off[0];
1882
1883 if (useLd) {
1884 Instruction *ld =
1885 mkLoad(typeOfSize(size[i] * 4), ldv[0], base, src[0]);
1886 for (size_t c = 1; c < ldv.size(); ++c)
1887 ld->setDef(c, ldv[c]);
1888 } else {
1889 mkTex(OP_SULDB, getResourceTarget(code, r), code->resources[r].slot,
1890 0, ldv, src)->dType = typeOfSize(size[i] * 4);
1891 }
1892 }
1893 } else {
1894 def.resize(4);
1895 for (c = 0; c < 4; ++c) {
1896 if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c))
1897 def[c] = getScratch();
1898 else
1899 def[c] = dst0[c];
1900 }
1901
1902 mkTex(OP_SULDP, getResourceTarget(code, r), code->resources[r].slot, 0,
1903 def, off);
1904 }
1905 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
1906 if (dst0[c] != def[c])
1907 mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
1908 }
1909
1910 // For formatted stores, the write mask on OP_SUSTP can be used.
1911 // Raw stores have to be split.
1912 void
1913 Converter::handleSTORE()
1914 {
1915 const int r = tgsi.getDst(0).getIndex(0);
1916 int c;
1917 std::vector<Value *> off, src, dummy;
1918
1919 getResourceCoords(off, r, 0);
1920 src = off;
1921 const int s = src.size();
1922
1923 if (isResourceRaw(code, r)) {
1924 uint8_t comp[2] = { 0, 0 };
1925 uint8_t size[2] = { 0, 0 };
1926
1927 int n = partitionLoadStore(comp, size, tgsi.getDst(0).getMask());
1928
1929 Symbol *base = getResourceBase(r);
1930
1931 const bool useSt = isResourceSpecial(r) ||
1932 (info->io.nv50styleSurfaces &&
1933 code->resources[r].target == TGSI_TEXTURE_BUFFER);
1934
1935 for (int i = 0; i < n; ++i) {
1936 if (comp[i]) // adjust x component of source address if necessary
1937 src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),
1938 off[0], mkImm(comp[i] * 4));
1939 else
1940 src[0] = off[0];
1941
1942 const DataType stTy = typeOfSize(size[i] * 4);
1943
1944 if (useSt) {
1945 Instruction *st =
1946 mkStore(OP_STORE, stTy, base, NULL, fetchSrc(1, comp[i]));
1947 for (c = 1; c < size[i]; ++c)
1948 st->setSrc(1 + c, fetchSrc(1, comp[i] + c));
1949 st->setIndirect(0, 0, src[0]);
1950 } else {
1951 // attach values to be stored
1952 src.resize(s + size[i]);
1953 for (c = 0; c < size[i]; ++c)
1954 src[s + c] = fetchSrc(1, comp[i] + c);
1955 mkTex(OP_SUSTB, getResourceTarget(code, r), code->resources[r].slot,
1956 0, dummy, src)->setType(stTy);
1957 }
1958 }
1959 } else {
1960 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
1961 src.push_back(fetchSrc(1, c));
1962
1963 mkTex(OP_SUSTP, getResourceTarget(code, r), code->resources[r].slot, 0,
1964 dummy, src)->tex.mask = tgsi.getDst(0).getMask();
1965 }
1966 }
1967
1968 Converter::Subroutine *
1969 Converter::getSubroutine(unsigned ip)
1970 {
1971 std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip);
1972
1973 if (it == sub.map.end())
1974 it = sub.map.insert(std::make_pair(
1975 ip, Subroutine(new Function(prog, "SUB", ip)))).first;
1976
1977 return &it->second;
1978 }
1979
1980 Converter::Subroutine *
1981 Converter::getSubroutine(Function *f)
1982 {
1983 unsigned ip = f->getLabel();
1984 std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip);
1985
1986 if (it == sub.map.end())
1987 it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first;
1988
1989 return &it->second;
1990 }
1991
1992 bool
1993 Converter::isEndOfSubroutine(uint ip)
1994 {
1995 assert(ip < code->scan.num_instructions);
1996 tgsi::Instruction insn(&code->insns[ip]);
1997 return (insn.getOpcode() == TGSI_OPCODE_END ||
1998 insn.getOpcode() == TGSI_OPCODE_ENDSUB ||
1999 // does END occur at end of main or the very end ?
2000 insn.getOpcode() == TGSI_OPCODE_BGNSUB);
2001 }
2002
2003 bool
2004 Converter::handleInstruction(const struct tgsi_full_instruction *insn)
2005 {
2006 Value *dst0[4], *rDst0[4];
2007 Value *src0, *src1, *src2;
2008 Value *val0, *val1;
2009 int c;
2010
2011 tgsi = tgsi::Instruction(insn);
2012
2013 bool useScratchDst = tgsi.checkDstSrcAliasing();
2014
2015 operation op = tgsi.getOP();
2016 dstTy = tgsi.inferDstType();
2017 srcTy = tgsi.inferSrcType();
2018
2019 unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0;
2020
2021 if (tgsi.dstCount()) {
2022 for (c = 0; c < 4; ++c) {
2023 rDst0[c] = acquireDst(0, c);
2024 dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c];
2025 }
2026 }
2027
2028 switch (tgsi.getOpcode()) {
2029 case TGSI_OPCODE_ADD:
2030 case TGSI_OPCODE_UADD:
2031 case TGSI_OPCODE_AND:
2032 case TGSI_OPCODE_DIV:
2033 case TGSI_OPCODE_IDIV:
2034 case TGSI_OPCODE_UDIV:
2035 case TGSI_OPCODE_MAX:
2036 case TGSI_OPCODE_MIN:
2037 case TGSI_OPCODE_IMAX:
2038 case TGSI_OPCODE_IMIN:
2039 case TGSI_OPCODE_UMAX:
2040 case TGSI_OPCODE_UMIN:
2041 case TGSI_OPCODE_MOD:
2042 case TGSI_OPCODE_UMOD:
2043 case TGSI_OPCODE_MUL:
2044 case TGSI_OPCODE_UMUL:
2045 case TGSI_OPCODE_OR:
2046 case TGSI_OPCODE_POW:
2047 case TGSI_OPCODE_SHL:
2048 case TGSI_OPCODE_ISHR:
2049 case TGSI_OPCODE_USHR:
2050 case TGSI_OPCODE_SUB:
2051 case TGSI_OPCODE_XOR:
2052 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2053 src0 = fetchSrc(0, c);
2054 src1 = fetchSrc(1, c);
2055 mkOp2(op, dstTy, dst0[c], src0, src1);
2056 }
2057 break;
2058 case TGSI_OPCODE_MAD:
2059 case TGSI_OPCODE_UMAD:
2060 case TGSI_OPCODE_SAD:
2061 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2062 src0 = fetchSrc(0, c);
2063 src1 = fetchSrc(1, c);
2064 src2 = fetchSrc(2, c);
2065 mkOp3(op, dstTy, dst0[c], src0, src1, src2);
2066 }
2067 break;
2068 case TGSI_OPCODE_MOV:
2069 case TGSI_OPCODE_ABS:
2070 case TGSI_OPCODE_CEIL:
2071 case TGSI_OPCODE_FLR:
2072 case TGSI_OPCODE_TRUNC:
2073 case TGSI_OPCODE_RCP:
2074 case TGSI_OPCODE_IABS:
2075 case TGSI_OPCODE_INEG:
2076 case TGSI_OPCODE_NOT:
2077 case TGSI_OPCODE_DDX:
2078 case TGSI_OPCODE_DDY:
2079 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2080 mkOp1(op, dstTy, dst0[c], fetchSrc(0, c));
2081 break;
2082 case TGSI_OPCODE_RSQ:
2083 src0 = fetchSrc(0, 0);
2084 val0 = getScratch();
2085 mkOp1(OP_ABS, TYPE_F32, val0, src0);
2086 mkOp1(OP_RSQ, TYPE_F32, val0, val0);
2087 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2088 mkMov(dst0[c], val0);
2089 break;
2090 case TGSI_OPCODE_ARL:
2091 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2092 src0 = fetchSrc(0, c);
2093 mkCvt(OP_CVT, TYPE_S32, dst0[c], TYPE_F32, src0)->rnd = ROUND_M;
2094 mkOp2(OP_SHL, TYPE_U32, dst0[c], dst0[c], mkImm(4));
2095 }
2096 break;
2097 case TGSI_OPCODE_UARL:
2098 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2099 mkOp2(OP_SHL, TYPE_U32, dst0[c], fetchSrc(0, c), mkImm(4));
2100 break;
2101 case TGSI_OPCODE_EX2:
2102 case TGSI_OPCODE_LG2:
2103 val0 = mkOp1(op, TYPE_F32, getScratch(), fetchSrc(0, 0))->getDef(0);
2104 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2105 mkOp1(OP_MOV, TYPE_F32, dst0[c], val0);
2106 break;
2107 case TGSI_OPCODE_COS:
2108 case TGSI_OPCODE_SIN:
2109 val0 = getScratch();
2110 if (mask & 7) {
2111 mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 0));
2112 mkOp1(op, TYPE_F32, val0, val0);
2113 for (c = 0; c < 3; ++c)
2114 if (dst0[c])
2115 mkMov(dst0[c], val0);
2116 }
2117 if (dst0[3]) {
2118 mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 3));
2119 mkOp1(op, TYPE_F32, dst0[3], val0);
2120 }
2121 break;
2122 case TGSI_OPCODE_SCS:
2123 if (mask & 3) {
2124 val0 = mkOp1v(OP_PRESIN, TYPE_F32, getSSA(), fetchSrc(0, 0));
2125 if (dst0[0])
2126 mkOp1(OP_COS, TYPE_F32, dst0[0], val0);
2127 if (dst0[1])
2128 mkOp1(OP_SIN, TYPE_F32, dst0[1], val0);
2129 }
2130 if (dst0[2])
2131 loadImm(dst0[2], 0.0f);
2132 if (dst0[3])
2133 loadImm(dst0[3], 1.0f);
2134 break;
2135 case TGSI_OPCODE_EXP:
2136 src0 = fetchSrc(0, 0);
2137 val0 = mkOp1v(OP_FLOOR, TYPE_F32, getSSA(), src0);
2138 if (dst0[1])
2139 mkOp2(OP_SUB, TYPE_F32, dst0[1], src0, val0);
2140 if (dst0[0])
2141 mkOp1(OP_EX2, TYPE_F32, dst0[0], val0);
2142 if (dst0[2])
2143 mkOp1(OP_EX2, TYPE_F32, dst0[2], src0);
2144 if (dst0[3])
2145 loadImm(dst0[3], 1.0f);
2146 break;
2147 case TGSI_OPCODE_LOG:
2148 src0 = mkOp1v(OP_ABS, TYPE_F32, getSSA(), fetchSrc(0, 0));
2149 val0 = mkOp1v(OP_LG2, TYPE_F32, dst0[2] ? dst0[2] : getSSA(), src0);
2150 if (dst0[0] || dst0[1])
2151 val1 = mkOp1v(OP_FLOOR, TYPE_F32, dst0[0] ? dst0[0] : getSSA(), val0);
2152 if (dst0[1]) {
2153 mkOp1(OP_EX2, TYPE_F32, dst0[1], val1);
2154 mkOp1(OP_RCP, TYPE_F32, dst0[1], dst0[1]);
2155 mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0);
2156 }
2157 if (dst0[3])
2158 loadImm(dst0[3], 1.0f);
2159 break;
2160 case TGSI_OPCODE_DP2:
2161 val0 = buildDot(2);
2162 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2163 mkMov(dst0[c], val0);
2164 break;
2165 case TGSI_OPCODE_DP3:
2166 val0 = buildDot(3);
2167 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2168 mkMov(dst0[c], val0);
2169 break;
2170 case TGSI_OPCODE_DP4:
2171 val0 = buildDot(4);
2172 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2173 mkMov(dst0[c], val0);
2174 break;
2175 case TGSI_OPCODE_DPH:
2176 val0 = buildDot(3);
2177 src1 = fetchSrc(1, 3);
2178 mkOp2(OP_ADD, TYPE_F32, val0, val0, src1);
2179 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2180 mkMov(dst0[c], val0);
2181 break;
2182 case TGSI_OPCODE_DST:
2183 if (dst0[0])
2184 loadImm(dst0[0], 1.0f);
2185 if (dst0[1]) {
2186 src0 = fetchSrc(0, 1);
2187 src1 = fetchSrc(1, 1);
2188 mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1);
2189 }
2190 if (dst0[2])
2191 mkMov(dst0[2], fetchSrc(0, 2));
2192 if (dst0[3])
2193 mkMov(dst0[3], fetchSrc(1, 3));
2194 break;
2195 case TGSI_OPCODE_LRP:
2196 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2197 src0 = fetchSrc(0, c);
2198 src1 = fetchSrc(1, c);
2199 src2 = fetchSrc(2, c);
2200 mkOp3(OP_MAD, TYPE_F32, dst0[c],
2201 mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2);
2202 }
2203 break;
2204 case TGSI_OPCODE_LIT:
2205 handleLIT(dst0);
2206 break;
2207 case TGSI_OPCODE_XPD:
2208 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2209 if (c < 3) {
2210 val0 = getSSA();
2211 src0 = fetchSrc(1, (c + 1) % 3);
2212 src1 = fetchSrc(0, (c + 2) % 3);
2213 mkOp2(OP_MUL, TYPE_F32, val0, src0, src1);
2214 mkOp1(OP_NEG, TYPE_F32, val0, val0);
2215
2216 src0 = fetchSrc(0, (c + 1) % 3);
2217 src1 = fetchSrc(1, (c + 2) % 3);
2218 mkOp3(OP_MAD, TYPE_F32, dst0[c], src0, src1, val0);
2219 } else {
2220 loadImm(dst0[c], 1.0f);
2221 }
2222 }
2223 break;
2224 case TGSI_OPCODE_ISSG:
2225 case TGSI_OPCODE_SSG:
2226 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2227 src0 = fetchSrc(0, c);
2228 val0 = getScratch();
2229 val1 = getScratch();
2230 mkCmp(OP_SET, CC_GT, srcTy, val0, src0, zero);
2231 mkCmp(OP_SET, CC_LT, srcTy, val1, src0, zero);
2232 if (srcTy == TYPE_F32)
2233 mkOp2(OP_SUB, TYPE_F32, dst0[c], val0, val1);
2234 else
2235 mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0);
2236 }
2237 break;
2238 case TGSI_OPCODE_UCMP:
2239 case TGSI_OPCODE_CMP:
2240 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2241 src0 = fetchSrc(0, c);
2242 src1 = fetchSrc(1, c);
2243 src2 = fetchSrc(2, c);
2244 if (src1 == src2)
2245 mkMov(dst0[c], src1);
2246 else
2247 mkCmp(OP_SLCT, (srcTy == TYPE_F32) ? CC_LT : CC_NE,
2248 srcTy, dst0[c], src1, src2, src0);
2249 }
2250 break;
2251 case TGSI_OPCODE_FRC:
2252 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2253 src0 = fetchSrc(0, c);
2254 val0 = getScratch();
2255 mkOp1(OP_FLOOR, TYPE_F32, val0, src0);
2256 mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0);
2257 }
2258 break;
2259 case TGSI_OPCODE_ROUND:
2260 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2261 mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F32, fetchSrc(0, c))
2262 ->rnd = ROUND_NI;
2263 break;
2264 case TGSI_OPCODE_CLAMP:
2265 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2266 src0 = fetchSrc(0, c);
2267 src1 = fetchSrc(1, c);
2268 src2 = fetchSrc(2, c);
2269 val0 = getScratch();
2270 mkOp2(OP_MIN, TYPE_F32, val0, src0, src1);
2271 mkOp2(OP_MAX, TYPE_F32, dst0[c], val0, src2);
2272 }
2273 break;
2274 case TGSI_OPCODE_SLT:
2275 case TGSI_OPCODE_SGE:
2276 case TGSI_OPCODE_SEQ:
2277 case TGSI_OPCODE_SFL:
2278 case TGSI_OPCODE_SGT:
2279 case TGSI_OPCODE_SLE:
2280 case TGSI_OPCODE_SNE:
2281 case TGSI_OPCODE_STR:
2282 case TGSI_OPCODE_ISGE:
2283 case TGSI_OPCODE_ISLT:
2284 case TGSI_OPCODE_USEQ:
2285 case TGSI_OPCODE_USGE:
2286 case TGSI_OPCODE_USLT:
2287 case TGSI_OPCODE_USNE:
2288 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2289 src0 = fetchSrc(0, c);
2290 src1 = fetchSrc(1, c);
2291 mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], src0, src1);
2292 }
2293 break;
2294 case TGSI_OPCODE_KIL:
2295 val0 = new_LValue(func, FILE_PREDICATE);
2296 for (c = 0; c < 4; ++c) {
2297 mkCmp(OP_SET, CC_LT, TYPE_F32, val0, fetchSrc(0, c), zero);
2298 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, val0);
2299 }
2300 break;
2301 case TGSI_OPCODE_KILP:
2302 mkOp(OP_DISCARD, TYPE_NONE, NULL);
2303 break;
2304 case TGSI_OPCODE_TEX:
2305 case TGSI_OPCODE_TXB:
2306 case TGSI_OPCODE_TXL:
2307 case TGSI_OPCODE_TXP:
2308 // R S L C Dx Dy
2309 handleTEX(dst0, 1, 1, 0x03, 0x0f, 0x00, 0x00);
2310 break;
2311 case TGSI_OPCODE_TXD:
2312 handleTEX(dst0, 3, 3, 0x03, 0x0f, 0x10, 0x20);
2313 break;
2314 case TGSI_OPCODE_TEX2:
2315 handleTEX(dst0, 2, 2, 0x03, 0x10, 0x00, 0x00);
2316 break;
2317 case TGSI_OPCODE_TXB2:
2318 case TGSI_OPCODE_TXL2:
2319 handleTEX(dst0, 2, 2, 0x10, 0x11, 0x00, 0x00);
2320 break;
2321 case TGSI_OPCODE_SAMPLE:
2322 case TGSI_OPCODE_SAMPLE_B:
2323 case TGSI_OPCODE_SAMPLE_D:
2324 case TGSI_OPCODE_SAMPLE_L:
2325 case TGSI_OPCODE_SAMPLE_C:
2326 case TGSI_OPCODE_SAMPLE_C_LZ:
2327 handleTEX(dst0, 1, 2, 0x30, 0x30, 0x30, 0x40);
2328 break;
2329 case TGSI_OPCODE_TXF:
2330 handleTXF(dst0, 1);
2331 break;
2332 case TGSI_OPCODE_TXQ:
2333 case TGSI_OPCODE_SVIEWINFO:
2334 handleTXQ(dst0, TXQ_DIMS);
2335 break;
2336 case TGSI_OPCODE_F2I:
2337 case TGSI_OPCODE_F2U:
2338 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2339 mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c))->rnd = ROUND_Z;
2340 break;
2341 case TGSI_OPCODE_I2F:
2342 case TGSI_OPCODE_U2F:
2343 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2344 mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c));
2345 break;
2346 case TGSI_OPCODE_EMIT:
2347 case TGSI_OPCODE_ENDPRIM:
2348 // get vertex stream if specified (must be immediate)
2349 src0 = tgsi.srcCount() ?
2350 mkImm(tgsi.getSrc(0).getValueU32(0, info)) : zero;
2351 mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1;
2352 break;
2353 case TGSI_OPCODE_IF:
2354 {
2355 BasicBlock *ifBB = new BasicBlock(func);
2356
2357 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
2358 condBBs.push(bb);
2359 joinBBs.push(bb);
2360
2361 mkFlow(OP_BRA, NULL, CC_NOT_P, fetchSrc(0, 0));
2362
2363 setPosition(ifBB, true);
2364 }
2365 break;
2366 case TGSI_OPCODE_ELSE:
2367 {
2368 BasicBlock *elseBB = new BasicBlock(func);
2369 BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
2370
2371 forkBB->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
2372 condBBs.push(bb);
2373
2374 forkBB->getExit()->asFlow()->target.bb = elseBB;
2375 if (!bb->isTerminated())
2376 mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL);
2377
2378 setPosition(elseBB, true);
2379 }
2380 break;
2381 case TGSI_OPCODE_ENDIF:
2382 {
2383 BasicBlock *convBB = new BasicBlock(func);
2384 BasicBlock *prevBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
2385 BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(joinBBs.pop().u.p);
2386
2387 if (!bb->isTerminated()) {
2388 // we only want join if none of the clauses ended with CONT/BREAK/RET
2389 if (prevBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6)
2390 insertConvergenceOps(convBB, forkBB);
2391 mkFlow(OP_BRA, convBB, CC_ALWAYS, NULL);
2392 bb->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD);
2393 }
2394
2395 if (prevBB->getExit()->op == OP_BRA) {
2396 prevBB->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD);
2397 prevBB->getExit()->asFlow()->target.bb = convBB;
2398 }
2399 setPosition(convBB, true);
2400 }
2401 break;
2402 case TGSI_OPCODE_BGNLOOP:
2403 {
2404 BasicBlock *lbgnBB = new BasicBlock(func);
2405 BasicBlock *lbrkBB = new BasicBlock(func);
2406
2407 loopBBs.push(lbgnBB);
2408 breakBBs.push(lbrkBB);
2409 if (loopBBs.getSize() > func->loopNestingBound)
2410 func->loopNestingBound++;
2411
2412 mkFlow(OP_PREBREAK, lbrkBB, CC_ALWAYS, NULL);
2413
2414 bb->cfg.attach(&lbgnBB->cfg, Graph::Edge::TREE);
2415 setPosition(lbgnBB, true);
2416 mkFlow(OP_PRECONT, lbgnBB, CC_ALWAYS, NULL);
2417 }
2418 break;
2419 case TGSI_OPCODE_ENDLOOP:
2420 {
2421 BasicBlock *loopBB = reinterpret_cast<BasicBlock *>(loopBBs.pop().u.p);
2422
2423 if (!bb->isTerminated()) {
2424 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
2425 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
2426 }
2427 setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true);
2428 }
2429 break;
2430 case TGSI_OPCODE_BRK:
2431 {
2432 if (bb->isTerminated())
2433 break;
2434 BasicBlock *brkBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p);
2435 mkFlow(OP_BREAK, brkBB, CC_ALWAYS, NULL);
2436 bb->cfg.attach(&brkBB->cfg, Graph::Edge::CROSS);
2437 }
2438 break;
2439 case TGSI_OPCODE_CONT:
2440 {
2441 if (bb->isTerminated())
2442 break;
2443 BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p);
2444 mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL);
2445 contBB->explicitCont = true;
2446 bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK);
2447 }
2448 break;
2449 case TGSI_OPCODE_BGNSUB:
2450 {
2451 Subroutine *s = getSubroutine(ip);
2452 BasicBlock *entry = new BasicBlock(s->f);
2453 BasicBlock *leave = new BasicBlock(s->f);
2454
2455 // multiple entrypoints possible, keep the graph connected
2456 if (prog->getType() == Program::TYPE_COMPUTE)
2457 prog->main->call.attach(&s->f->call, Graph::Edge::TREE);
2458
2459 sub.cur = s;
2460 s->f->setEntry(entry);
2461 s->f->setExit(leave);
2462 setPosition(entry, true);
2463 return true;
2464 }
2465 case TGSI_OPCODE_ENDSUB:
2466 {
2467 sub.cur = getSubroutine(prog->main);
2468 setPosition(BasicBlock::get(sub.cur->f->cfg.getRoot()), true);
2469 return true;
2470 }
2471 case TGSI_OPCODE_CAL:
2472 {
2473 Subroutine *s = getSubroutine(tgsi.getLabel());
2474 mkFlow(OP_CALL, s->f, CC_ALWAYS, NULL);
2475 func->call.attach(&s->f->call, Graph::Edge::TREE);
2476 return true;
2477 }
2478 case TGSI_OPCODE_RET:
2479 {
2480 if (bb->isTerminated())
2481 return true;
2482 BasicBlock *leave = BasicBlock::get(func->cfgExit);
2483
2484 if (!isEndOfSubroutine(ip + 1)) {
2485 // insert a PRERET at the entry if this is an early return
2486 // (only needed for sharing code in the epilogue)
2487 BasicBlock *pos = getBB();
2488 setPosition(BasicBlock::get(func->cfg.getRoot()), false);
2489 mkFlow(OP_PRERET, leave, CC_ALWAYS, NULL)->fixed = 1;
2490 setPosition(pos, true);
2491 }
2492 mkFlow(OP_RET, NULL, CC_ALWAYS, NULL)->fixed = 1;
2493 bb->cfg.attach(&leave->cfg, Graph::Edge::CROSS);
2494 }
2495 break;
2496 case TGSI_OPCODE_END:
2497 {
2498 // attach and generate epilogue code
2499 BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
2500 bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE);
2501 setPosition(epilogue, true);
2502 if (prog->getType() == Program::TYPE_FRAGMENT)
2503 exportOutputs();
2504 if (info->io.genUserClip > 0)
2505 handleUserClipPlanes();
2506 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
2507 }
2508 break;
2509 case TGSI_OPCODE_SWITCH:
2510 case TGSI_OPCODE_CASE:
2511 ERROR("switch/case opcode encountered, should have been lowered\n");
2512 abort();
2513 break;
2514 case TGSI_OPCODE_LOAD:
2515 handleLOAD(dst0);
2516 break;
2517 case TGSI_OPCODE_STORE:
2518 handleSTORE();
2519 break;
2520 default:
2521 ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode());
2522 assert(0);
2523 break;
2524 }
2525
2526 if (tgsi.dstCount()) {
2527 for (c = 0; c < 4; ++c) {
2528 if (!dst0[c])
2529 continue;
2530 if (dst0[c] != rDst0[c])
2531 mkMov(rDst0[c], dst0[c]);
2532 storeDst(0, c, rDst0[c]);
2533 }
2534 }
2535 vtxBaseValid = 0;
2536
2537 return true;
2538 }
2539
2540 void
2541 Converter::handleUserClipPlanes()
2542 {
2543 Value *res[8];
2544 int n, i, c;
2545
2546 for (c = 0; c < 4; ++c) {
2547 for (i = 0; i < info->io.genUserClip; ++i) {
2548 Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.ucpCBSlot,
2549 TYPE_F32, info->io.ucpBase + i * 16 + c * 4);
2550 Value *ucp = mkLoadv(TYPE_F32, sym, NULL);
2551 if (c == 0)
2552 res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp);
2553 else
2554 mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]);
2555 }
2556 }
2557
2558 const int first = info->numOutputs - (info->io.genUserClip + 3) / 4;
2559
2560 for (i = 0; i < info->io.genUserClip; ++i) {
2561 n = i / 4 + first;
2562 c = i % 4;
2563 Symbol *sym =
2564 mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info->out[n].slot[c] * 4);
2565 mkStore(OP_EXPORT, TYPE_F32, sym, NULL, res[i]);
2566 }
2567 }
2568
2569 void
2570 Converter::exportOutputs()
2571 {
2572 for (unsigned int i = 0; i < info->numOutputs; ++i) {
2573 for (unsigned int c = 0; c < 4; ++c) {
2574 if (!oData.exists(sub.cur->values, i, c))
2575 continue;
2576 Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32,
2577 info->out[i].slot[c] * 4);
2578 Value *val = oData.load(sub.cur->values, i, c, NULL);
2579 if (val)
2580 mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val);
2581 }
2582 }
2583 }
2584
2585 Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir),
2586 code(code),
2587 tgsi(NULL),
2588 tData(this), aData(this), pData(this), oData(this)
2589 {
2590 info = code->info;
2591
2592 const DataFile tFile = code->mainTempsInLMem ? FILE_MEMORY_LOCAL : FILE_GPR;
2593
2594 const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY);
2595 const unsigned pSize = code->fileSize(TGSI_FILE_PREDICATE);
2596 const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS);
2597 const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT);
2598
2599 tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, tFile, 0);
2600 pData.setup(TGSI_FILE_PREDICATE, 0, 0, pSize, 4, 4, FILE_PREDICATE, 0);
2601 aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_ADDRESS, 0);
2602 oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0);
2603
2604 for (int vol = 0, i = 0; i < code->tempArrayCount; ++i) {
2605 int len = code->tempArrays[i].u32 >> 2;
2606 int dim = code->tempArrays[i].u32 & 3;
2607
2608 lData.push_back(DataArray(this));
2609 lData.back().setup(TGSI_FILE_TEMPORARY_ARRAY, i, vol, len, dim, 4,
2610 FILE_MEMORY_LOCAL, 0);
2611
2612 vol += (len * dim * 4 + 0xf) & ~0xf;
2613 }
2614
2615 for (int vol = 0, i = 0; i < code->immdArrayCount; ++i) {
2616 int len = code->immdArrays[i].u32 >> 2;
2617 int dim = code->immdArrays[i].u32 & 3;
2618
2619 lData.push_back(DataArray(this));
2620 lData.back().setup(TGSI_FILE_IMMEDIATE_ARRAY, i, vol, len, dim, 4,
2621 FILE_MEMORY_CONST, 14);
2622
2623 vol += (len * dim * 4 + 0xf) & ~0xf;
2624 }
2625
2626 zero = mkImm((uint32_t)0);
2627
2628 vtxBaseValid = 0;
2629 }
2630
2631 Converter::~Converter()
2632 {
2633 }
2634
2635 template<typename T> inline void
2636 Converter::BindArgumentsPass::updateCallArgs(
2637 Instruction *i, void (Instruction::*setArg)(int, Value *),
2638 T (Function::*proto))
2639 {
2640 Function *g = i->asFlow()->target.fn;
2641 Subroutine *subg = conv.getSubroutine(g);
2642
2643 for (unsigned a = 0; a < (g->*proto).size(); ++a) {
2644 Value *v = (g->*proto)[a].get();
2645 const Converter::Location &l = subg->values.l.find(v)->second;
2646 Converter::DataArray *array = conv.getArrayForFile(l.array, l.arrayIdx);
2647
2648 (i->*setArg)(a, array->acquire(sub->values, l.i, l.c));
2649 }
2650 }
2651
2652 template<typename T> inline void
2653 Converter::BindArgumentsPass::updatePrototype(
2654 BitSet *set, void (Function::*updateSet)(), T (Function::*proto))
2655 {
2656 (func->*updateSet)();
2657
2658 for (unsigned i = 0; i < set->getSize(); ++i) {
2659 Value *v = func->getLValue(i);
2660
2661 // only include values with a matching TGSI register
2662 if (set->test(i) && sub->values.l.find(v) != sub->values.l.end())
2663 (func->*proto).push_back(v);
2664 }
2665 }
2666
2667 bool
2668 Converter::BindArgumentsPass::visit(Function *f)
2669 {
2670 sub = conv.getSubroutine(f);
2671
2672 for (ArrayList::Iterator bi = f->allBBlocks.iterator();
2673 !bi.end(); bi.next()) {
2674 for (Instruction *i = BasicBlock::get(bi)->getFirst();
2675 i; i = i->next) {
2676 if (i->op == OP_CALL && !i->asFlow()->builtin) {
2677 updateCallArgs(i, &Instruction::setSrc, &Function::ins);
2678 updateCallArgs(i, &Instruction::setDef, &Function::outs);
2679 }
2680 }
2681 }
2682
2683 if (func == prog->main && prog->getType() != Program::TYPE_COMPUTE)
2684 return true;
2685 updatePrototype(&BasicBlock::get(f->cfg.getRoot())->liveSet,
2686 &Function::buildLiveSets, &Function::ins);
2687 updatePrototype(&BasicBlock::get(f->cfgExit)->defSet,
2688 &Function::buildDefSets, &Function::outs);
2689
2690 return true;
2691 }
2692
2693 bool
2694 Converter::run()
2695 {
2696 BasicBlock *entry = new BasicBlock(prog->main);
2697 BasicBlock *leave = new BasicBlock(prog->main);
2698
2699 prog->main->setEntry(entry);
2700 prog->main->setExit(leave);
2701
2702 setPosition(entry, true);
2703 sub.cur = getSubroutine(prog->main);
2704
2705 if (info->io.genUserClip > 0) {
2706 for (int c = 0; c < 4; ++c)
2707 clipVtx[c] = getScratch();
2708 }
2709
2710 if (prog->getType() == Program::TYPE_FRAGMENT) {
2711 Symbol *sv = mkSysVal(SV_POSITION, 3);
2712 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
2713 mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
2714 }
2715
2716 for (ip = 0; ip < code->scan.num_instructions; ++ip) {
2717 if (!handleInstruction(&code->insns[ip]))
2718 return false;
2719 }
2720
2721 if (!BindArgumentsPass(*this).run(prog))
2722 return false;
2723
2724 return true;
2725 }
2726
2727 } // unnamed namespace
2728
2729 namespace nv50_ir {
2730
2731 bool
2732 Program::makeFromTGSI(struct nv50_ir_prog_info *info)
2733 {
2734 tgsi::Source src(info);
2735 if (!src.scanSource())
2736 return false;
2737 tlsSize = info->bin.tlsSpace;
2738
2739 Converter builder(this, &src);
2740 return builder.run();
2741 }
2742
2743 } // namespace nv50_ir