c6e69ff1230443a1bc85e159c882221035bc98cf
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_from_tgsi.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "tgsi/tgsi_build.h"
24 #include "tgsi/tgsi_dump.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_util.h"
27
28 #include <set>
29
30 #include "codegen/nv50_ir.h"
31 #include "codegen/nv50_ir_util.h"
32 #include "codegen/nv50_ir_build_util.h"
33
34 namespace tgsi {
35
36 class Source;
37
38 static nv50_ir::operation translateOpcode(uint opcode);
39 static nv50_ir::DataFile translateFile(uint file);
40 static nv50_ir::TexTarget translateTexture(uint texTarg);
41 static nv50_ir::SVSemantic translateSysVal(uint sysval);
42 static nv50_ir::CacheMode translateCacheMode(uint qualifier);
43 static nv50_ir::ImgFormat translateImgFormat(uint format);
44
45 class Instruction
46 {
47 public:
48 Instruction(const struct tgsi_full_instruction *inst) : insn(inst) { }
49
50 class SrcRegister
51 {
52 public:
53 SrcRegister(const struct tgsi_full_src_register *src)
54 : reg(src->Register),
55 fsr(src)
56 { }
57
58 SrcRegister(const struct tgsi_src_register& src) : reg(src), fsr(NULL) { }
59
60 SrcRegister(const struct tgsi_ind_register& ind)
61 : reg(tgsi_util_get_src_from_ind(&ind)),
62 fsr(NULL)
63 { }
64
65 struct tgsi_src_register offsetToSrc(struct tgsi_texture_offset off)
66 {
67 struct tgsi_src_register reg;
68 memset(&reg, 0, sizeof(reg));
69 reg.Index = off.Index;
70 reg.File = off.File;
71 reg.SwizzleX = off.SwizzleX;
72 reg.SwizzleY = off.SwizzleY;
73 reg.SwizzleZ = off.SwizzleZ;
74 return reg;
75 }
76
77 SrcRegister(const struct tgsi_texture_offset& off) :
78 reg(offsetToSrc(off)),
79 fsr(NULL)
80 { }
81
82 uint getFile() const { return reg.File; }
83
84 bool is2D() const { return reg.Dimension; }
85
86 bool isIndirect(int dim) const
87 {
88 return (dim && fsr) ? fsr->Dimension.Indirect : reg.Indirect;
89 }
90
91 int getIndex(int dim) const
92 {
93 return (dim && fsr) ? fsr->Dimension.Index : reg.Index;
94 }
95
96 int getSwizzle(int chan) const
97 {
98 return tgsi_util_get_src_register_swizzle(&reg, chan);
99 }
100
101 int getArrayId() const
102 {
103 if (isIndirect(0))
104 return fsr->Indirect.ArrayID;
105 return 0;
106 }
107
108 nv50_ir::Modifier getMod(int chan) const;
109
110 SrcRegister getIndirect(int dim) const
111 {
112 assert(fsr && isIndirect(dim));
113 if (dim)
114 return SrcRegister(fsr->DimIndirect);
115 return SrcRegister(fsr->Indirect);
116 }
117
118 uint32_t getValueU32(int c, const struct nv50_ir_prog_info *info) const
119 {
120 assert(reg.File == TGSI_FILE_IMMEDIATE);
121 assert(!reg.Absolute);
122 assert(!reg.Negate);
123 return info->immd.data[reg.Index * 4 + getSwizzle(c)];
124 }
125
126 private:
127 const struct tgsi_src_register reg;
128 const struct tgsi_full_src_register *fsr;
129 };
130
131 class DstRegister
132 {
133 public:
134 DstRegister(const struct tgsi_full_dst_register *dst)
135 : reg(dst->Register),
136 fdr(dst)
137 { }
138
139 DstRegister(const struct tgsi_dst_register& dst) : reg(dst), fdr(NULL) { }
140
141 uint getFile() const { return reg.File; }
142
143 bool is2D() const { return reg.Dimension; }
144
145 bool isIndirect(int dim) const
146 {
147 return (dim && fdr) ? fdr->Dimension.Indirect : reg.Indirect;
148 }
149
150 int getIndex(int dim) const
151 {
152 return (dim && fdr) ? fdr->Dimension.Dimension : reg.Index;
153 }
154
155 unsigned int getMask() const { return reg.WriteMask; }
156
157 bool isMasked(int chan) const { return !(getMask() & (1 << chan)); }
158
159 SrcRegister getIndirect(int dim) const
160 {
161 assert(fdr && isIndirect(dim));
162 if (dim)
163 return SrcRegister(fdr->DimIndirect);
164 return SrcRegister(fdr->Indirect);
165 }
166
167 struct tgsi_full_src_register asSrc()
168 {
169 assert(fdr);
170 return tgsi_full_src_register_from_dst(fdr);
171 }
172
173 int getArrayId() const
174 {
175 if (isIndirect(0))
176 return fdr->Indirect.ArrayID;
177 return 0;
178 }
179
180 private:
181 const struct tgsi_dst_register reg;
182 const struct tgsi_full_dst_register *fdr;
183 };
184
185 inline uint getOpcode() const { return insn->Instruction.Opcode; }
186
187 unsigned int srcCount() const { return insn->Instruction.NumSrcRegs; }
188 unsigned int dstCount() const { return insn->Instruction.NumDstRegs; }
189
190 // mask of used components of source s
191 unsigned int srcMask(unsigned int s) const;
192 unsigned int texOffsetMask() const;
193
194 SrcRegister getSrc(unsigned int s) const
195 {
196 assert(s < srcCount());
197 return SrcRegister(&insn->Src[s]);
198 }
199
200 DstRegister getDst(unsigned int d) const
201 {
202 assert(d < dstCount());
203 return DstRegister(&insn->Dst[d]);
204 }
205
206 SrcRegister getTexOffset(unsigned int i) const
207 {
208 assert(i < TGSI_FULL_MAX_TEX_OFFSETS);
209 return SrcRegister(insn->TexOffsets[i]);
210 }
211
212 unsigned int getNumTexOffsets() const { return insn->Texture.NumOffsets; }
213
214 bool checkDstSrcAliasing() const;
215
216 inline nv50_ir::operation getOP() const {
217 return translateOpcode(getOpcode()); }
218
219 nv50_ir::DataType inferSrcType() const;
220 nv50_ir::DataType inferDstType() const;
221
222 nv50_ir::CondCode getSetCond() const;
223
224 nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const;
225
226 const nv50_ir::TexInstruction::ImgFormatDesc *getImageFormat() const {
227 return &nv50_ir::TexInstruction::formatTable[
228 translateImgFormat(insn->Memory.Format)];
229 }
230
231 nv50_ir::TexTarget getImageTarget() const {
232 return translateTexture(insn->Memory.Texture);
233 }
234
235 nv50_ir::CacheMode getCacheMode() const {
236 if (!insn->Instruction.Memory)
237 return nv50_ir::CACHE_CA;
238 return translateCacheMode(insn->Memory.Qualifier);
239 }
240
241 inline uint getLabel() { return insn->Label.Label; }
242
243 unsigned getSaturate() const { return insn->Instruction.Saturate; }
244
245 void print() const
246 {
247 tgsi_dump_instruction(insn, 1);
248 }
249
250 private:
251 const struct tgsi_full_instruction *insn;
252 };
253
254 unsigned int Instruction::texOffsetMask() const
255 {
256 const struct tgsi_instruction_texture *tex = &insn->Texture;
257 assert(insn->Instruction.Texture);
258
259 switch (tex->Texture) {
260 case TGSI_TEXTURE_BUFFER:
261 case TGSI_TEXTURE_1D:
262 case TGSI_TEXTURE_SHADOW1D:
263 case TGSI_TEXTURE_1D_ARRAY:
264 case TGSI_TEXTURE_SHADOW1D_ARRAY:
265 return 0x1;
266 case TGSI_TEXTURE_2D:
267 case TGSI_TEXTURE_SHADOW2D:
268 case TGSI_TEXTURE_2D_ARRAY:
269 case TGSI_TEXTURE_SHADOW2D_ARRAY:
270 case TGSI_TEXTURE_RECT:
271 case TGSI_TEXTURE_SHADOWRECT:
272 case TGSI_TEXTURE_2D_MSAA:
273 case TGSI_TEXTURE_2D_ARRAY_MSAA:
274 return 0x3;
275 case TGSI_TEXTURE_3D:
276 return 0x7;
277 default:
278 assert(!"Unexpected texture target");
279 return 0xf;
280 }
281 }
282
283 unsigned int Instruction::srcMask(unsigned int s) const
284 {
285 unsigned int mask = insn->Dst[0].Register.WriteMask;
286
287 switch (insn->Instruction.Opcode) {
288 case TGSI_OPCODE_COS:
289 case TGSI_OPCODE_SIN:
290 return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
291 case TGSI_OPCODE_DP2:
292 return 0x3;
293 case TGSI_OPCODE_DP3:
294 return 0x7;
295 case TGSI_OPCODE_DP4:
296 case TGSI_OPCODE_KILL_IF: /* WriteMask ignored */
297 return 0xf;
298 case TGSI_OPCODE_DST:
299 return mask & (s ? 0xa : 0x6);
300 case TGSI_OPCODE_EX2:
301 case TGSI_OPCODE_EXP:
302 case TGSI_OPCODE_LG2:
303 case TGSI_OPCODE_LOG:
304 case TGSI_OPCODE_POW:
305 case TGSI_OPCODE_RCP:
306 case TGSI_OPCODE_RSQ:
307 return 0x1;
308 case TGSI_OPCODE_IF:
309 case TGSI_OPCODE_UIF:
310 return 0x1;
311 case TGSI_OPCODE_LIT:
312 return 0xb;
313 case TGSI_OPCODE_TEX2:
314 case TGSI_OPCODE_TXB2:
315 case TGSI_OPCODE_TXL2:
316 return (s == 0) ? 0xf : 0x3;
317 case TGSI_OPCODE_TEX:
318 case TGSI_OPCODE_TXB:
319 case TGSI_OPCODE_TXD:
320 case TGSI_OPCODE_TXL:
321 case TGSI_OPCODE_TXP:
322 case TGSI_OPCODE_TXF:
323 case TGSI_OPCODE_TG4:
324 case TGSI_OPCODE_TEX_LZ:
325 case TGSI_OPCODE_TXF_LZ:
326 case TGSI_OPCODE_LODQ:
327 {
328 const struct tgsi_instruction_texture *tex = &insn->Texture;
329
330 assert(insn->Instruction.Texture);
331
332 mask = 0x7;
333 if (insn->Instruction.Opcode != TGSI_OPCODE_TEX &&
334 insn->Instruction.Opcode != TGSI_OPCODE_TEX_LZ &&
335 insn->Instruction.Opcode != TGSI_OPCODE_TXF_LZ &&
336 insn->Instruction.Opcode != TGSI_OPCODE_TXD)
337 mask |= 0x8; /* bias, lod or proj */
338
339 switch (tex->Texture) {
340 case TGSI_TEXTURE_1D:
341 mask &= 0x9;
342 break;
343 case TGSI_TEXTURE_SHADOW1D:
344 mask &= 0xd;
345 break;
346 case TGSI_TEXTURE_1D_ARRAY:
347 case TGSI_TEXTURE_2D:
348 case TGSI_TEXTURE_RECT:
349 mask &= 0xb;
350 break;
351 case TGSI_TEXTURE_CUBE_ARRAY:
352 case TGSI_TEXTURE_SHADOW2D_ARRAY:
353 case TGSI_TEXTURE_SHADOWCUBE:
354 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
355 mask |= 0x8;
356 break;
357 default:
358 break;
359 }
360 }
361 return mask;
362 case TGSI_OPCODE_TXQ:
363 return 1;
364 case TGSI_OPCODE_D2I:
365 case TGSI_OPCODE_D2U:
366 case TGSI_OPCODE_D2F:
367 case TGSI_OPCODE_DSLT:
368 case TGSI_OPCODE_DSGE:
369 case TGSI_OPCODE_DSEQ:
370 case TGSI_OPCODE_DSNE:
371 case TGSI_OPCODE_U64SEQ:
372 case TGSI_OPCODE_U64SNE:
373 case TGSI_OPCODE_I64SLT:
374 case TGSI_OPCODE_U64SLT:
375 case TGSI_OPCODE_I64SGE:
376 case TGSI_OPCODE_U64SGE:
377 case TGSI_OPCODE_I642F:
378 case TGSI_OPCODE_U642F:
379 switch (util_bitcount(mask)) {
380 case 1: return 0x3;
381 case 2: return 0xf;
382 default:
383 assert(!"unexpected mask");
384 return 0xf;
385 }
386 case TGSI_OPCODE_I2D:
387 case TGSI_OPCODE_U2D:
388 case TGSI_OPCODE_F2D: {
389 unsigned int x = 0;
390 if ((mask & 0x3) == 0x3)
391 x |= 1;
392 if ((mask & 0xc) == 0xc)
393 x |= 2;
394 return x;
395 }
396 case TGSI_OPCODE_PK2H:
397 return 0x3;
398 case TGSI_OPCODE_UP2H:
399 return 0x1;
400 default:
401 break;
402 }
403
404 return mask;
405 }
406
407 nv50_ir::Modifier Instruction::SrcRegister::getMod(int chan) const
408 {
409 nv50_ir::Modifier m(0);
410
411 if (reg.Absolute)
412 m = m | nv50_ir::Modifier(NV50_IR_MOD_ABS);
413 if (reg.Negate)
414 m = m | nv50_ir::Modifier(NV50_IR_MOD_NEG);
415 return m;
416 }
417
418 static nv50_ir::DataFile translateFile(uint file)
419 {
420 switch (file) {
421 case TGSI_FILE_CONSTANT: return nv50_ir::FILE_MEMORY_CONST;
422 case TGSI_FILE_INPUT: return nv50_ir::FILE_SHADER_INPUT;
423 case TGSI_FILE_OUTPUT: return nv50_ir::FILE_SHADER_OUTPUT;
424 case TGSI_FILE_TEMPORARY: return nv50_ir::FILE_GPR;
425 case TGSI_FILE_ADDRESS: return nv50_ir::FILE_ADDRESS;
426 case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;
427 case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;
428 case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_BUFFER;
429 case TGSI_FILE_IMAGE: return nv50_ir::FILE_MEMORY_GLOBAL;
430 case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_GLOBAL;
431 case TGSI_FILE_SAMPLER:
432 case TGSI_FILE_NULL:
433 default:
434 return nv50_ir::FILE_NULL;
435 }
436 }
437
438 static nv50_ir::SVSemantic translateSysVal(uint sysval)
439 {
440 switch (sysval) {
441 case TGSI_SEMANTIC_FACE: return nv50_ir::SV_FACE;
442 case TGSI_SEMANTIC_PSIZE: return nv50_ir::SV_POINT_SIZE;
443 case TGSI_SEMANTIC_PRIMID: return nv50_ir::SV_PRIMITIVE_ID;
444 case TGSI_SEMANTIC_INSTANCEID: return nv50_ir::SV_INSTANCE_ID;
445 case TGSI_SEMANTIC_VERTEXID: return nv50_ir::SV_VERTEX_ID;
446 case TGSI_SEMANTIC_GRID_SIZE: return nv50_ir::SV_NCTAID;
447 case TGSI_SEMANTIC_BLOCK_ID: return nv50_ir::SV_CTAID;
448 case TGSI_SEMANTIC_BLOCK_SIZE: return nv50_ir::SV_NTID;
449 case TGSI_SEMANTIC_THREAD_ID: return nv50_ir::SV_TID;
450 case TGSI_SEMANTIC_SAMPLEID: return nv50_ir::SV_SAMPLE_INDEX;
451 case TGSI_SEMANTIC_SAMPLEPOS: return nv50_ir::SV_SAMPLE_POS;
452 case TGSI_SEMANTIC_SAMPLEMASK: return nv50_ir::SV_SAMPLE_MASK;
453 case TGSI_SEMANTIC_INVOCATIONID: return nv50_ir::SV_INVOCATION_ID;
454 case TGSI_SEMANTIC_TESSCOORD: return nv50_ir::SV_TESS_COORD;
455 case TGSI_SEMANTIC_TESSOUTER: return nv50_ir::SV_TESS_OUTER;
456 case TGSI_SEMANTIC_TESSINNER: return nv50_ir::SV_TESS_INNER;
457 case TGSI_SEMANTIC_VERTICESIN: return nv50_ir::SV_VERTEX_COUNT;
458 case TGSI_SEMANTIC_HELPER_INVOCATION: return nv50_ir::SV_THREAD_KILL;
459 case TGSI_SEMANTIC_BASEVERTEX: return nv50_ir::SV_BASEVERTEX;
460 case TGSI_SEMANTIC_BASEINSTANCE: return nv50_ir::SV_BASEINSTANCE;
461 case TGSI_SEMANTIC_DRAWID: return nv50_ir::SV_DRAWID;
462 case TGSI_SEMANTIC_WORK_DIM: return nv50_ir::SV_WORK_DIM;
463 case TGSI_SEMANTIC_SUBGROUP_INVOCATION: return nv50_ir::SV_LANEID;
464 case TGSI_SEMANTIC_SUBGROUP_EQ_MASK: return nv50_ir::SV_LANEMASK_EQ;
465 case TGSI_SEMANTIC_SUBGROUP_LT_MASK: return nv50_ir::SV_LANEMASK_LT;
466 case TGSI_SEMANTIC_SUBGROUP_LE_MASK: return nv50_ir::SV_LANEMASK_LE;
467 case TGSI_SEMANTIC_SUBGROUP_GT_MASK: return nv50_ir::SV_LANEMASK_GT;
468 case TGSI_SEMANTIC_SUBGROUP_GE_MASK: return nv50_ir::SV_LANEMASK_GE;
469 default:
470 assert(0);
471 return nv50_ir::SV_CLOCK;
472 }
473 }
474
475 #define NV50_IR_TEX_TARG_CASE(a, b) \
476 case TGSI_TEXTURE_##a: return nv50_ir::TEX_TARGET_##b;
477
478 static nv50_ir::TexTarget translateTexture(uint tex)
479 {
480 switch (tex) {
481 NV50_IR_TEX_TARG_CASE(1D, 1D);
482 NV50_IR_TEX_TARG_CASE(2D, 2D);
483 NV50_IR_TEX_TARG_CASE(2D_MSAA, 2D_MS);
484 NV50_IR_TEX_TARG_CASE(3D, 3D);
485 NV50_IR_TEX_TARG_CASE(CUBE, CUBE);
486 NV50_IR_TEX_TARG_CASE(RECT, RECT);
487 NV50_IR_TEX_TARG_CASE(1D_ARRAY, 1D_ARRAY);
488 NV50_IR_TEX_TARG_CASE(2D_ARRAY, 2D_ARRAY);
489 NV50_IR_TEX_TARG_CASE(2D_ARRAY_MSAA, 2D_MS_ARRAY);
490 NV50_IR_TEX_TARG_CASE(CUBE_ARRAY, CUBE_ARRAY);
491 NV50_IR_TEX_TARG_CASE(SHADOW1D, 1D_SHADOW);
492 NV50_IR_TEX_TARG_CASE(SHADOW2D, 2D_SHADOW);
493 NV50_IR_TEX_TARG_CASE(SHADOWCUBE, CUBE_SHADOW);
494 NV50_IR_TEX_TARG_CASE(SHADOWRECT, RECT_SHADOW);
495 NV50_IR_TEX_TARG_CASE(SHADOW1D_ARRAY, 1D_ARRAY_SHADOW);
496 NV50_IR_TEX_TARG_CASE(SHADOW2D_ARRAY, 2D_ARRAY_SHADOW);
497 NV50_IR_TEX_TARG_CASE(SHADOWCUBE_ARRAY, CUBE_ARRAY_SHADOW);
498 NV50_IR_TEX_TARG_CASE(BUFFER, BUFFER);
499
500 case TGSI_TEXTURE_UNKNOWN:
501 default:
502 assert(!"invalid texture target");
503 return nv50_ir::TEX_TARGET_2D;
504 }
505 }
506
507 static nv50_ir::CacheMode translateCacheMode(uint qualifier)
508 {
509 if (qualifier & TGSI_MEMORY_VOLATILE)
510 return nv50_ir::CACHE_CV;
511 if (qualifier & TGSI_MEMORY_COHERENT)
512 return nv50_ir::CACHE_CG;
513 return nv50_ir::CACHE_CA;
514 }
515
516 static nv50_ir::ImgFormat translateImgFormat(uint format)
517 {
518
519 #define FMT_CASE(a, b) \
520 case PIPE_FORMAT_ ## a: return nv50_ir::FMT_ ## b
521
522 switch (format) {
523 FMT_CASE(NONE, NONE);
524
525 FMT_CASE(R32G32B32A32_FLOAT, RGBA32F);
526 FMT_CASE(R16G16B16A16_FLOAT, RGBA16F);
527 FMT_CASE(R32G32_FLOAT, RG32F);
528 FMT_CASE(R16G16_FLOAT, RG16F);
529 FMT_CASE(R11G11B10_FLOAT, R11G11B10F);
530 FMT_CASE(R32_FLOAT, R32F);
531 FMT_CASE(R16_FLOAT, R16F);
532
533 FMT_CASE(R32G32B32A32_UINT, RGBA32UI);
534 FMT_CASE(R16G16B16A16_UINT, RGBA16UI);
535 FMT_CASE(R10G10B10A2_UINT, RGB10A2UI);
536 FMT_CASE(R8G8B8A8_UINT, RGBA8UI);
537 FMT_CASE(R32G32_UINT, RG32UI);
538 FMT_CASE(R16G16_UINT, RG16UI);
539 FMT_CASE(R8G8_UINT, RG8UI);
540 FMT_CASE(R32_UINT, R32UI);
541 FMT_CASE(R16_UINT, R16UI);
542 FMT_CASE(R8_UINT, R8UI);
543
544 FMT_CASE(R32G32B32A32_SINT, RGBA32I);
545 FMT_CASE(R16G16B16A16_SINT, RGBA16I);
546 FMT_CASE(R8G8B8A8_SINT, RGBA8I);
547 FMT_CASE(R32G32_SINT, RG32I);
548 FMT_CASE(R16G16_SINT, RG16I);
549 FMT_CASE(R8G8_SINT, RG8I);
550 FMT_CASE(R32_SINT, R32I);
551 FMT_CASE(R16_SINT, R16I);
552 FMT_CASE(R8_SINT, R8I);
553
554 FMT_CASE(R16G16B16A16_UNORM, RGBA16);
555 FMT_CASE(R10G10B10A2_UNORM, RGB10A2);
556 FMT_CASE(R8G8B8A8_UNORM, RGBA8);
557 FMT_CASE(R16G16_UNORM, RG16);
558 FMT_CASE(R8G8_UNORM, RG8);
559 FMT_CASE(R16_UNORM, R16);
560 FMT_CASE(R8_UNORM, R8);
561
562 FMT_CASE(R16G16B16A16_SNORM, RGBA16_SNORM);
563 FMT_CASE(R8G8B8A8_SNORM, RGBA8_SNORM);
564 FMT_CASE(R16G16_SNORM, RG16_SNORM);
565 FMT_CASE(R8G8_SNORM, RG8_SNORM);
566 FMT_CASE(R16_SNORM, R16_SNORM);
567 FMT_CASE(R8_SNORM, R8_SNORM);
568
569 FMT_CASE(B8G8R8A8_UNORM, BGRA8);
570 }
571
572 assert(!"Unexpected format");
573 return nv50_ir::FMT_NONE;
574 }
575
576 nv50_ir::DataType Instruction::inferSrcType() const
577 {
578 switch (getOpcode()) {
579 case TGSI_OPCODE_UIF:
580 case TGSI_OPCODE_AND:
581 case TGSI_OPCODE_OR:
582 case TGSI_OPCODE_XOR:
583 case TGSI_OPCODE_NOT:
584 case TGSI_OPCODE_SHL:
585 case TGSI_OPCODE_U2F:
586 case TGSI_OPCODE_U2D:
587 case TGSI_OPCODE_U2I64:
588 case TGSI_OPCODE_UADD:
589 case TGSI_OPCODE_UDIV:
590 case TGSI_OPCODE_UMOD:
591 case TGSI_OPCODE_UMAD:
592 case TGSI_OPCODE_UMUL:
593 case TGSI_OPCODE_UMUL_HI:
594 case TGSI_OPCODE_UMAX:
595 case TGSI_OPCODE_UMIN:
596 case TGSI_OPCODE_USEQ:
597 case TGSI_OPCODE_USGE:
598 case TGSI_OPCODE_USLT:
599 case TGSI_OPCODE_USNE:
600 case TGSI_OPCODE_USHR:
601 case TGSI_OPCODE_ATOMUADD:
602 case TGSI_OPCODE_ATOMXCHG:
603 case TGSI_OPCODE_ATOMCAS:
604 case TGSI_OPCODE_ATOMAND:
605 case TGSI_OPCODE_ATOMOR:
606 case TGSI_OPCODE_ATOMXOR:
607 case TGSI_OPCODE_ATOMUMIN:
608 case TGSI_OPCODE_ATOMUMAX:
609 case TGSI_OPCODE_UBFE:
610 case TGSI_OPCODE_UMSB:
611 case TGSI_OPCODE_UP2H:
612 case TGSI_OPCODE_VOTE_ALL:
613 case TGSI_OPCODE_VOTE_ANY:
614 case TGSI_OPCODE_VOTE_EQ:
615 return nv50_ir::TYPE_U32;
616 case TGSI_OPCODE_I2F:
617 case TGSI_OPCODE_I2D:
618 case TGSI_OPCODE_I2I64:
619 case TGSI_OPCODE_IDIV:
620 case TGSI_OPCODE_IMUL_HI:
621 case TGSI_OPCODE_IMAX:
622 case TGSI_OPCODE_IMIN:
623 case TGSI_OPCODE_IABS:
624 case TGSI_OPCODE_INEG:
625 case TGSI_OPCODE_ISGE:
626 case TGSI_OPCODE_ISHR:
627 case TGSI_OPCODE_ISLT:
628 case TGSI_OPCODE_ISSG:
629 case TGSI_OPCODE_MOD:
630 case TGSI_OPCODE_UARL:
631 case TGSI_OPCODE_ATOMIMIN:
632 case TGSI_OPCODE_ATOMIMAX:
633 case TGSI_OPCODE_IBFE:
634 case TGSI_OPCODE_IMSB:
635 return nv50_ir::TYPE_S32;
636 case TGSI_OPCODE_D2F:
637 case TGSI_OPCODE_D2I:
638 case TGSI_OPCODE_D2U:
639 case TGSI_OPCODE_D2I64:
640 case TGSI_OPCODE_D2U64:
641 case TGSI_OPCODE_DABS:
642 case TGSI_OPCODE_DNEG:
643 case TGSI_OPCODE_DADD:
644 case TGSI_OPCODE_DMUL:
645 case TGSI_OPCODE_DDIV:
646 case TGSI_OPCODE_DMAX:
647 case TGSI_OPCODE_DMIN:
648 case TGSI_OPCODE_DSLT:
649 case TGSI_OPCODE_DSGE:
650 case TGSI_OPCODE_DSEQ:
651 case TGSI_OPCODE_DSNE:
652 case TGSI_OPCODE_DRCP:
653 case TGSI_OPCODE_DSQRT:
654 case TGSI_OPCODE_DMAD:
655 case TGSI_OPCODE_DFMA:
656 case TGSI_OPCODE_DFRAC:
657 case TGSI_OPCODE_DRSQ:
658 case TGSI_OPCODE_DTRUNC:
659 case TGSI_OPCODE_DCEIL:
660 case TGSI_OPCODE_DFLR:
661 case TGSI_OPCODE_DROUND:
662 return nv50_ir::TYPE_F64;
663 case TGSI_OPCODE_U64SEQ:
664 case TGSI_OPCODE_U64SNE:
665 case TGSI_OPCODE_U64SLT:
666 case TGSI_OPCODE_U64SGE:
667 case TGSI_OPCODE_U64MIN:
668 case TGSI_OPCODE_U64MAX:
669 case TGSI_OPCODE_U64ADD:
670 case TGSI_OPCODE_U64MUL:
671 case TGSI_OPCODE_U64SHL:
672 case TGSI_OPCODE_U64SHR:
673 case TGSI_OPCODE_U64DIV:
674 case TGSI_OPCODE_U64MOD:
675 case TGSI_OPCODE_U642F:
676 case TGSI_OPCODE_U642D:
677 return nv50_ir::TYPE_U64;
678 case TGSI_OPCODE_I64ABS:
679 case TGSI_OPCODE_I64SSG:
680 case TGSI_OPCODE_I64NEG:
681 case TGSI_OPCODE_I64SLT:
682 case TGSI_OPCODE_I64SGE:
683 case TGSI_OPCODE_I64MIN:
684 case TGSI_OPCODE_I64MAX:
685 case TGSI_OPCODE_I64SHR:
686 case TGSI_OPCODE_I64DIV:
687 case TGSI_OPCODE_I64MOD:
688 case TGSI_OPCODE_I642F:
689 case TGSI_OPCODE_I642D:
690 return nv50_ir::TYPE_S64;
691 default:
692 return nv50_ir::TYPE_F32;
693 }
694 }
695
696 nv50_ir::DataType Instruction::inferDstType() const
697 {
698 switch (getOpcode()) {
699 case TGSI_OPCODE_D2U:
700 case TGSI_OPCODE_F2U: return nv50_ir::TYPE_U32;
701 case TGSI_OPCODE_D2I:
702 case TGSI_OPCODE_F2I: return nv50_ir::TYPE_S32;
703 case TGSI_OPCODE_FSEQ:
704 case TGSI_OPCODE_FSGE:
705 case TGSI_OPCODE_FSLT:
706 case TGSI_OPCODE_FSNE:
707 case TGSI_OPCODE_DSEQ:
708 case TGSI_OPCODE_DSGE:
709 case TGSI_OPCODE_DSLT:
710 case TGSI_OPCODE_DSNE:
711 case TGSI_OPCODE_I64SLT:
712 case TGSI_OPCODE_I64SGE:
713 case TGSI_OPCODE_U64SEQ:
714 case TGSI_OPCODE_U64SNE:
715 case TGSI_OPCODE_U64SLT:
716 case TGSI_OPCODE_U64SGE:
717 case TGSI_OPCODE_PK2H:
718 return nv50_ir::TYPE_U32;
719 case TGSI_OPCODE_I2F:
720 case TGSI_OPCODE_U2F:
721 case TGSI_OPCODE_D2F:
722 case TGSI_OPCODE_I642F:
723 case TGSI_OPCODE_U642F:
724 case TGSI_OPCODE_UP2H:
725 return nv50_ir::TYPE_F32;
726 case TGSI_OPCODE_I2D:
727 case TGSI_OPCODE_U2D:
728 case TGSI_OPCODE_F2D:
729 case TGSI_OPCODE_I642D:
730 case TGSI_OPCODE_U642D:
731 return nv50_ir::TYPE_F64;
732 case TGSI_OPCODE_I2I64:
733 case TGSI_OPCODE_U2I64:
734 case TGSI_OPCODE_F2I64:
735 case TGSI_OPCODE_D2I64:
736 return nv50_ir::TYPE_S64;
737 case TGSI_OPCODE_F2U64:
738 case TGSI_OPCODE_D2U64:
739 return nv50_ir::TYPE_U64;
740 default:
741 return inferSrcType();
742 }
743 }
744
745 nv50_ir::CondCode Instruction::getSetCond() const
746 {
747 using namespace nv50_ir;
748
749 switch (getOpcode()) {
750 case TGSI_OPCODE_SLT:
751 case TGSI_OPCODE_ISLT:
752 case TGSI_OPCODE_USLT:
753 case TGSI_OPCODE_FSLT:
754 case TGSI_OPCODE_DSLT:
755 case TGSI_OPCODE_I64SLT:
756 case TGSI_OPCODE_U64SLT:
757 return CC_LT;
758 case TGSI_OPCODE_SLE:
759 return CC_LE;
760 case TGSI_OPCODE_SGE:
761 case TGSI_OPCODE_ISGE:
762 case TGSI_OPCODE_USGE:
763 case TGSI_OPCODE_FSGE:
764 case TGSI_OPCODE_DSGE:
765 case TGSI_OPCODE_I64SGE:
766 case TGSI_OPCODE_U64SGE:
767 return CC_GE;
768 case TGSI_OPCODE_SGT:
769 return CC_GT;
770 case TGSI_OPCODE_SEQ:
771 case TGSI_OPCODE_USEQ:
772 case TGSI_OPCODE_FSEQ:
773 case TGSI_OPCODE_DSEQ:
774 case TGSI_OPCODE_U64SEQ:
775 return CC_EQ;
776 case TGSI_OPCODE_SNE:
777 case TGSI_OPCODE_FSNE:
778 case TGSI_OPCODE_DSNE:
779 case TGSI_OPCODE_U64SNE:
780 return CC_NEU;
781 case TGSI_OPCODE_USNE:
782 return CC_NE;
783 default:
784 return CC_ALWAYS;
785 }
786 }
787
788 #define NV50_IR_OPCODE_CASE(a, b) case TGSI_OPCODE_##a: return nv50_ir::OP_##b
789
790 static nv50_ir::operation translateOpcode(uint opcode)
791 {
792 switch (opcode) {
793 NV50_IR_OPCODE_CASE(ARL, SHL);
794 NV50_IR_OPCODE_CASE(MOV, MOV);
795
796 NV50_IR_OPCODE_CASE(RCP, RCP);
797 NV50_IR_OPCODE_CASE(RSQ, RSQ);
798 NV50_IR_OPCODE_CASE(SQRT, SQRT);
799
800 NV50_IR_OPCODE_CASE(MUL, MUL);
801 NV50_IR_OPCODE_CASE(ADD, ADD);
802
803 NV50_IR_OPCODE_CASE(MIN, MIN);
804 NV50_IR_OPCODE_CASE(MAX, MAX);
805 NV50_IR_OPCODE_CASE(SLT, SET);
806 NV50_IR_OPCODE_CASE(SGE, SET);
807 NV50_IR_OPCODE_CASE(MAD, MAD);
808 NV50_IR_OPCODE_CASE(FMA, FMA);
809
810 NV50_IR_OPCODE_CASE(FLR, FLOOR);
811 NV50_IR_OPCODE_CASE(ROUND, CVT);
812 NV50_IR_OPCODE_CASE(EX2, EX2);
813 NV50_IR_OPCODE_CASE(LG2, LG2);
814 NV50_IR_OPCODE_CASE(POW, POW);
815
816 NV50_IR_OPCODE_CASE(COS, COS);
817 NV50_IR_OPCODE_CASE(DDX, DFDX);
818 NV50_IR_OPCODE_CASE(DDX_FINE, DFDX);
819 NV50_IR_OPCODE_CASE(DDY, DFDY);
820 NV50_IR_OPCODE_CASE(DDY_FINE, DFDY);
821 NV50_IR_OPCODE_CASE(KILL, DISCARD);
822
823 NV50_IR_OPCODE_CASE(SEQ, SET);
824 NV50_IR_OPCODE_CASE(SGT, SET);
825 NV50_IR_OPCODE_CASE(SIN, SIN);
826 NV50_IR_OPCODE_CASE(SLE, SET);
827 NV50_IR_OPCODE_CASE(SNE, SET);
828 NV50_IR_OPCODE_CASE(TEX, TEX);
829 NV50_IR_OPCODE_CASE(TXD, TXD);
830 NV50_IR_OPCODE_CASE(TXP, TEX);
831
832 NV50_IR_OPCODE_CASE(CAL, CALL);
833 NV50_IR_OPCODE_CASE(RET, RET);
834 NV50_IR_OPCODE_CASE(CMP, SLCT);
835
836 NV50_IR_OPCODE_CASE(TXB, TXB);
837
838 NV50_IR_OPCODE_CASE(DIV, DIV);
839
840 NV50_IR_OPCODE_CASE(TXL, TXL);
841 NV50_IR_OPCODE_CASE(TEX_LZ, TXL);
842
843 NV50_IR_OPCODE_CASE(CEIL, CEIL);
844 NV50_IR_OPCODE_CASE(I2F, CVT);
845 NV50_IR_OPCODE_CASE(NOT, NOT);
846 NV50_IR_OPCODE_CASE(TRUNC, TRUNC);
847 NV50_IR_OPCODE_CASE(SHL, SHL);
848
849 NV50_IR_OPCODE_CASE(AND, AND);
850 NV50_IR_OPCODE_CASE(OR, OR);
851 NV50_IR_OPCODE_CASE(MOD, MOD);
852 NV50_IR_OPCODE_CASE(XOR, XOR);
853 NV50_IR_OPCODE_CASE(TXF, TXF);
854 NV50_IR_OPCODE_CASE(TXF_LZ, TXF);
855 NV50_IR_OPCODE_CASE(TXQ, TXQ);
856 NV50_IR_OPCODE_CASE(TXQS, TXQ);
857 NV50_IR_OPCODE_CASE(TG4, TXG);
858 NV50_IR_OPCODE_CASE(LODQ, TXLQ);
859
860 NV50_IR_OPCODE_CASE(EMIT, EMIT);
861 NV50_IR_OPCODE_CASE(ENDPRIM, RESTART);
862
863 NV50_IR_OPCODE_CASE(KILL_IF, DISCARD);
864
865 NV50_IR_OPCODE_CASE(F2I, CVT);
866 NV50_IR_OPCODE_CASE(FSEQ, SET);
867 NV50_IR_OPCODE_CASE(FSGE, SET);
868 NV50_IR_OPCODE_CASE(FSLT, SET);
869 NV50_IR_OPCODE_CASE(FSNE, SET);
870 NV50_IR_OPCODE_CASE(IDIV, DIV);
871 NV50_IR_OPCODE_CASE(IMAX, MAX);
872 NV50_IR_OPCODE_CASE(IMIN, MIN);
873 NV50_IR_OPCODE_CASE(IABS, ABS);
874 NV50_IR_OPCODE_CASE(INEG, NEG);
875 NV50_IR_OPCODE_CASE(ISGE, SET);
876 NV50_IR_OPCODE_CASE(ISHR, SHR);
877 NV50_IR_OPCODE_CASE(ISLT, SET);
878 NV50_IR_OPCODE_CASE(F2U, CVT);
879 NV50_IR_OPCODE_CASE(U2F, CVT);
880 NV50_IR_OPCODE_CASE(UADD, ADD);
881 NV50_IR_OPCODE_CASE(UDIV, DIV);
882 NV50_IR_OPCODE_CASE(UMAD, MAD);
883 NV50_IR_OPCODE_CASE(UMAX, MAX);
884 NV50_IR_OPCODE_CASE(UMIN, MIN);
885 NV50_IR_OPCODE_CASE(UMOD, MOD);
886 NV50_IR_OPCODE_CASE(UMUL, MUL);
887 NV50_IR_OPCODE_CASE(USEQ, SET);
888 NV50_IR_OPCODE_CASE(USGE, SET);
889 NV50_IR_OPCODE_CASE(USHR, SHR);
890 NV50_IR_OPCODE_CASE(USLT, SET);
891 NV50_IR_OPCODE_CASE(USNE, SET);
892
893 NV50_IR_OPCODE_CASE(DABS, ABS);
894 NV50_IR_OPCODE_CASE(DNEG, NEG);
895 NV50_IR_OPCODE_CASE(DADD, ADD);
896 NV50_IR_OPCODE_CASE(DMUL, MUL);
897 NV50_IR_OPCODE_CASE(DDIV, DIV);
898 NV50_IR_OPCODE_CASE(DMAX, MAX);
899 NV50_IR_OPCODE_CASE(DMIN, MIN);
900 NV50_IR_OPCODE_CASE(DSLT, SET);
901 NV50_IR_OPCODE_CASE(DSGE, SET);
902 NV50_IR_OPCODE_CASE(DSEQ, SET);
903 NV50_IR_OPCODE_CASE(DSNE, SET);
904 NV50_IR_OPCODE_CASE(DRCP, RCP);
905 NV50_IR_OPCODE_CASE(DSQRT, SQRT);
906 NV50_IR_OPCODE_CASE(DMAD, MAD);
907 NV50_IR_OPCODE_CASE(DFMA, FMA);
908 NV50_IR_OPCODE_CASE(D2I, CVT);
909 NV50_IR_OPCODE_CASE(D2U, CVT);
910 NV50_IR_OPCODE_CASE(I2D, CVT);
911 NV50_IR_OPCODE_CASE(U2D, CVT);
912 NV50_IR_OPCODE_CASE(DRSQ, RSQ);
913 NV50_IR_OPCODE_CASE(DTRUNC, TRUNC);
914 NV50_IR_OPCODE_CASE(DCEIL, CEIL);
915 NV50_IR_OPCODE_CASE(DFLR, FLOOR);
916 NV50_IR_OPCODE_CASE(DROUND, CVT);
917
918 NV50_IR_OPCODE_CASE(U64SEQ, SET);
919 NV50_IR_OPCODE_CASE(U64SNE, SET);
920 NV50_IR_OPCODE_CASE(U64SLT, SET);
921 NV50_IR_OPCODE_CASE(U64SGE, SET);
922 NV50_IR_OPCODE_CASE(I64SLT, SET);
923 NV50_IR_OPCODE_CASE(I64SGE, SET);
924 NV50_IR_OPCODE_CASE(I2I64, CVT);
925 NV50_IR_OPCODE_CASE(U2I64, CVT);
926 NV50_IR_OPCODE_CASE(F2I64, CVT);
927 NV50_IR_OPCODE_CASE(F2U64, CVT);
928 NV50_IR_OPCODE_CASE(D2I64, CVT);
929 NV50_IR_OPCODE_CASE(D2U64, CVT);
930 NV50_IR_OPCODE_CASE(I642F, CVT);
931 NV50_IR_OPCODE_CASE(U642F, CVT);
932 NV50_IR_OPCODE_CASE(I642D, CVT);
933 NV50_IR_OPCODE_CASE(U642D, CVT);
934
935 NV50_IR_OPCODE_CASE(I64MIN, MIN);
936 NV50_IR_OPCODE_CASE(U64MIN, MIN);
937 NV50_IR_OPCODE_CASE(I64MAX, MAX);
938 NV50_IR_OPCODE_CASE(U64MAX, MAX);
939 NV50_IR_OPCODE_CASE(I64ABS, ABS);
940 NV50_IR_OPCODE_CASE(I64NEG, NEG);
941 NV50_IR_OPCODE_CASE(U64ADD, ADD);
942 NV50_IR_OPCODE_CASE(U64MUL, MUL);
943 NV50_IR_OPCODE_CASE(U64SHL, SHL);
944 NV50_IR_OPCODE_CASE(I64SHR, SHR);
945 NV50_IR_OPCODE_CASE(U64SHR, SHR);
946
947 NV50_IR_OPCODE_CASE(IMUL_HI, MUL);
948 NV50_IR_OPCODE_CASE(UMUL_HI, MUL);
949
950 NV50_IR_OPCODE_CASE(SAMPLE, TEX);
951 NV50_IR_OPCODE_CASE(SAMPLE_B, TXB);
952 NV50_IR_OPCODE_CASE(SAMPLE_C, TEX);
953 NV50_IR_OPCODE_CASE(SAMPLE_C_LZ, TEX);
954 NV50_IR_OPCODE_CASE(SAMPLE_D, TXD);
955 NV50_IR_OPCODE_CASE(SAMPLE_L, TXL);
956 NV50_IR_OPCODE_CASE(SAMPLE_I, TXF);
957 NV50_IR_OPCODE_CASE(SAMPLE_I_MS, TXF);
958 NV50_IR_OPCODE_CASE(GATHER4, TXG);
959 NV50_IR_OPCODE_CASE(SVIEWINFO, TXQ);
960
961 NV50_IR_OPCODE_CASE(ATOMUADD, ATOM);
962 NV50_IR_OPCODE_CASE(ATOMXCHG, ATOM);
963 NV50_IR_OPCODE_CASE(ATOMCAS, ATOM);
964 NV50_IR_OPCODE_CASE(ATOMAND, ATOM);
965 NV50_IR_OPCODE_CASE(ATOMOR, ATOM);
966 NV50_IR_OPCODE_CASE(ATOMXOR, ATOM);
967 NV50_IR_OPCODE_CASE(ATOMUMIN, ATOM);
968 NV50_IR_OPCODE_CASE(ATOMUMAX, ATOM);
969 NV50_IR_OPCODE_CASE(ATOMIMIN, ATOM);
970 NV50_IR_OPCODE_CASE(ATOMIMAX, ATOM);
971
972 NV50_IR_OPCODE_CASE(TEX2, TEX);
973 NV50_IR_OPCODE_CASE(TXB2, TXB);
974 NV50_IR_OPCODE_CASE(TXL2, TXL);
975
976 NV50_IR_OPCODE_CASE(IBFE, EXTBF);
977 NV50_IR_OPCODE_CASE(UBFE, EXTBF);
978 NV50_IR_OPCODE_CASE(BFI, INSBF);
979 NV50_IR_OPCODE_CASE(BREV, EXTBF);
980 NV50_IR_OPCODE_CASE(POPC, POPCNT);
981 NV50_IR_OPCODE_CASE(LSB, BFIND);
982 NV50_IR_OPCODE_CASE(IMSB, BFIND);
983 NV50_IR_OPCODE_CASE(UMSB, BFIND);
984
985 NV50_IR_OPCODE_CASE(VOTE_ALL, VOTE);
986 NV50_IR_OPCODE_CASE(VOTE_ANY, VOTE);
987 NV50_IR_OPCODE_CASE(VOTE_EQ, VOTE);
988
989 NV50_IR_OPCODE_CASE(BALLOT, VOTE);
990 NV50_IR_OPCODE_CASE(READ_INVOC, SHFL);
991 NV50_IR_OPCODE_CASE(READ_FIRST, SHFL);
992
993 NV50_IR_OPCODE_CASE(END, EXIT);
994
995 default:
996 return nv50_ir::OP_NOP;
997 }
998 }
999
1000 static uint16_t opcodeToSubOp(uint opcode)
1001 {
1002 switch (opcode) {
1003 case TGSI_OPCODE_ATOMUADD: return NV50_IR_SUBOP_ATOM_ADD;
1004 case TGSI_OPCODE_ATOMXCHG: return NV50_IR_SUBOP_ATOM_EXCH;
1005 case TGSI_OPCODE_ATOMCAS: return NV50_IR_SUBOP_ATOM_CAS;
1006 case TGSI_OPCODE_ATOMAND: return NV50_IR_SUBOP_ATOM_AND;
1007 case TGSI_OPCODE_ATOMOR: return NV50_IR_SUBOP_ATOM_OR;
1008 case TGSI_OPCODE_ATOMXOR: return NV50_IR_SUBOP_ATOM_XOR;
1009 case TGSI_OPCODE_ATOMUMIN: return NV50_IR_SUBOP_ATOM_MIN;
1010 case TGSI_OPCODE_ATOMIMIN: return NV50_IR_SUBOP_ATOM_MIN;
1011 case TGSI_OPCODE_ATOMUMAX: return NV50_IR_SUBOP_ATOM_MAX;
1012 case TGSI_OPCODE_ATOMIMAX: return NV50_IR_SUBOP_ATOM_MAX;
1013 case TGSI_OPCODE_IMUL_HI:
1014 case TGSI_OPCODE_UMUL_HI:
1015 return NV50_IR_SUBOP_MUL_HIGH;
1016 case TGSI_OPCODE_VOTE_ALL: return NV50_IR_SUBOP_VOTE_ALL;
1017 case TGSI_OPCODE_VOTE_ANY: return NV50_IR_SUBOP_VOTE_ANY;
1018 case TGSI_OPCODE_VOTE_EQ: return NV50_IR_SUBOP_VOTE_UNI;
1019 default:
1020 return 0;
1021 }
1022 }
1023
1024 bool Instruction::checkDstSrcAliasing() const
1025 {
1026 if (insn->Dst[0].Register.Indirect) // no danger if indirect, using memory
1027 return false;
1028
1029 for (int s = 0; s < TGSI_FULL_MAX_SRC_REGISTERS; ++s) {
1030 if (insn->Src[s].Register.File == TGSI_FILE_NULL)
1031 break;
1032 if (insn->Src[s].Register.File == insn->Dst[0].Register.File &&
1033 insn->Src[s].Register.Index == insn->Dst[0].Register.Index)
1034 return true;
1035 }
1036 return false;
1037 }
1038
1039 class Source
1040 {
1041 public:
1042 Source(struct nv50_ir_prog_info *);
1043 ~Source();
1044
1045 public:
1046 bool scanSource();
1047 unsigned fileSize(unsigned file) const { return scan.file_max[file] + 1; }
1048
1049 public:
1050 struct tgsi_shader_info scan;
1051 struct tgsi_full_instruction *insns;
1052 const struct tgsi_token *tokens;
1053 struct nv50_ir_prog_info *info;
1054
1055 nv50_ir::DynArray tempArrays;
1056 nv50_ir::DynArray immdArrays;
1057
1058 typedef nv50_ir::BuildUtil::Location Location;
1059 // these registers are per-subroutine, cannot be used for parameter passing
1060 std::set<Location> locals;
1061
1062 std::set<int> indirectTempArrays;
1063 std::map<int, int> indirectTempOffsets;
1064 std::map<int, std::pair<int, int> > tempArrayInfo;
1065 std::vector<int> tempArrayId;
1066
1067 int clipVertexOutput;
1068
1069 struct TextureView {
1070 uint8_t target; // TGSI_TEXTURE_*
1071 };
1072 std::vector<TextureView> textureViews;
1073
1074 /*
1075 struct Resource {
1076 uint8_t target; // TGSI_TEXTURE_*
1077 bool raw;
1078 uint8_t slot; // $surface index
1079 };
1080 std::vector<Resource> resources;
1081 */
1082
1083 struct MemoryFile {
1084 uint8_t mem_type; // TGSI_MEMORY_TYPE_*
1085 };
1086 std::vector<MemoryFile> memoryFiles;
1087
1088 private:
1089 int inferSysValDirection(unsigned sn) const;
1090 bool scanDeclaration(const struct tgsi_full_declaration *);
1091 bool scanInstruction(const struct tgsi_full_instruction *);
1092 void scanInstructionSrc(const Instruction& insn,
1093 const Instruction::SrcRegister& src,
1094 unsigned mask);
1095 void scanProperty(const struct tgsi_full_property *);
1096 void scanImmediate(const struct tgsi_full_immediate *);
1097
1098 inline bool isEdgeFlagPassthrough(const Instruction&) const;
1099 };
1100
1101 Source::Source(struct nv50_ir_prog_info *prog) : info(prog)
1102 {
1103 tokens = (const struct tgsi_token *)info->bin.source;
1104
1105 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
1106 tgsi_dump(tokens, 0);
1107 }
1108
1109 Source::~Source()
1110 {
1111 if (insns)
1112 FREE(insns);
1113
1114 if (info->immd.data)
1115 FREE(info->immd.data);
1116 if (info->immd.type)
1117 FREE(info->immd.type);
1118 }
1119
1120 bool Source::scanSource()
1121 {
1122 unsigned insnCount = 0;
1123 struct tgsi_parse_context parse;
1124
1125 tgsi_scan_shader(tokens, &scan);
1126
1127 insns = (struct tgsi_full_instruction *)MALLOC(scan.num_instructions *
1128 sizeof(insns[0]));
1129 if (!insns)
1130 return false;
1131
1132 clipVertexOutput = -1;
1133
1134 textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);
1135 //resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
1136 tempArrayId.resize(scan.file_max[TGSI_FILE_TEMPORARY] + 1);
1137 memoryFiles.resize(scan.file_max[TGSI_FILE_MEMORY] + 1);
1138
1139 info->immd.bufSize = 0;
1140
1141 info->numInputs = scan.file_max[TGSI_FILE_INPUT] + 1;
1142 info->numOutputs = scan.file_max[TGSI_FILE_OUTPUT] + 1;
1143 info->numSysVals = scan.file_max[TGSI_FILE_SYSTEM_VALUE] + 1;
1144
1145 if (info->type == PIPE_SHADER_FRAGMENT) {
1146 info->prop.fp.writesDepth = scan.writes_z;
1147 info->prop.fp.usesDiscard = scan.uses_kill || info->io.alphaRefBase;
1148 } else
1149 if (info->type == PIPE_SHADER_GEOMETRY) {
1150 info->prop.gp.instanceCount = 1; // default value
1151 }
1152
1153 info->io.viewportId = -1;
1154
1155 info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16);
1156 info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte));
1157
1158 tgsi_parse_init(&parse, tokens);
1159 while (!tgsi_parse_end_of_tokens(&parse)) {
1160 tgsi_parse_token(&parse);
1161
1162 switch (parse.FullToken.Token.Type) {
1163 case TGSI_TOKEN_TYPE_IMMEDIATE:
1164 scanImmediate(&parse.FullToken.FullImmediate);
1165 break;
1166 case TGSI_TOKEN_TYPE_DECLARATION:
1167 scanDeclaration(&parse.FullToken.FullDeclaration);
1168 break;
1169 case TGSI_TOKEN_TYPE_INSTRUCTION:
1170 insns[insnCount++] = parse.FullToken.FullInstruction;
1171 scanInstruction(&parse.FullToken.FullInstruction);
1172 break;
1173 case TGSI_TOKEN_TYPE_PROPERTY:
1174 scanProperty(&parse.FullToken.FullProperty);
1175 break;
1176 default:
1177 INFO("unknown TGSI token type: %d\n", parse.FullToken.Token.Type);
1178 break;
1179 }
1180 }
1181 tgsi_parse_free(&parse);
1182
1183 if (indirectTempArrays.size()) {
1184 int tempBase = 0;
1185 for (std::set<int>::const_iterator it = indirectTempArrays.begin();
1186 it != indirectTempArrays.end(); ++it) {
1187 std::pair<int, int>& info = tempArrayInfo[*it];
1188 indirectTempOffsets.insert(std::make_pair(*it, tempBase - info.first));
1189 tempBase += info.second;
1190 }
1191 info->bin.tlsSpace += tempBase * 16;
1192 }
1193
1194 if (info->io.genUserClip > 0) {
1195 info->io.clipDistances = info->io.genUserClip;
1196
1197 const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1198
1199 for (unsigned int n = 0; n < nOut; ++n) {
1200 unsigned int i = info->numOutputs++;
1201 info->out[i].id = i;
1202 info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1203 info->out[i].si = n;
1204 info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1205 }
1206 }
1207
1208 return info->assignSlots(info) == 0;
1209 }
1210
1211 void Source::scanProperty(const struct tgsi_full_property *prop)
1212 {
1213 switch (prop->Property.PropertyName) {
1214 case TGSI_PROPERTY_GS_OUTPUT_PRIM:
1215 info->prop.gp.outputPrim = prop->u[0].Data;
1216 break;
1217 case TGSI_PROPERTY_GS_INPUT_PRIM:
1218 info->prop.gp.inputPrim = prop->u[0].Data;
1219 break;
1220 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
1221 info->prop.gp.maxVertices = prop->u[0].Data;
1222 break;
1223 case TGSI_PROPERTY_GS_INVOCATIONS:
1224 info->prop.gp.instanceCount = prop->u[0].Data;
1225 break;
1226 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
1227 info->prop.fp.separateFragData = true;
1228 break;
1229 case TGSI_PROPERTY_FS_COORD_ORIGIN:
1230 case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
1231 case TGSI_PROPERTY_FS_DEPTH_LAYOUT:
1232 // we don't care
1233 break;
1234 case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
1235 info->io.genUserClip = -1;
1236 break;
1237 case TGSI_PROPERTY_TCS_VERTICES_OUT:
1238 info->prop.tp.outputPatchSize = prop->u[0].Data;
1239 break;
1240 case TGSI_PROPERTY_TES_PRIM_MODE:
1241 info->prop.tp.domain = prop->u[0].Data;
1242 break;
1243 case TGSI_PROPERTY_TES_SPACING:
1244 info->prop.tp.partitioning = prop->u[0].Data;
1245 break;
1246 case TGSI_PROPERTY_TES_VERTEX_ORDER_CW:
1247 info->prop.tp.winding = prop->u[0].Data;
1248 break;
1249 case TGSI_PROPERTY_TES_POINT_MODE:
1250 if (prop->u[0].Data)
1251 info->prop.tp.outputPrim = PIPE_PRIM_POINTS;
1252 else
1253 info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */
1254 break;
1255 case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH:
1256 info->prop.cp.numThreads[0] = prop->u[0].Data;
1257 break;
1258 case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT:
1259 info->prop.cp.numThreads[1] = prop->u[0].Data;
1260 break;
1261 case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH:
1262 info->prop.cp.numThreads[2] = prop->u[0].Data;
1263 break;
1264 case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
1265 info->io.clipDistances = prop->u[0].Data;
1266 break;
1267 case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
1268 info->io.cullDistances = prop->u[0].Data;
1269 break;
1270 case TGSI_PROPERTY_NEXT_SHADER:
1271 /* Do not need to know the next shader stage. */
1272 break;
1273 case TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL:
1274 info->prop.fp.earlyFragTests = prop->u[0].Data;
1275 break;
1276 case TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE:
1277 info->prop.fp.postDepthCoverage = prop->u[0].Data;
1278 break;
1279 case TGSI_PROPERTY_MUL_ZERO_WINS:
1280 info->io.mul_zero_wins = prop->u[0].Data;
1281 break;
1282 default:
1283 INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);
1284 break;
1285 }
1286 }
1287
1288 void Source::scanImmediate(const struct tgsi_full_immediate *imm)
1289 {
1290 const unsigned n = info->immd.count++;
1291
1292 assert(n < scan.immediate_count);
1293
1294 for (int c = 0; c < 4; ++c)
1295 info->immd.data[n * 4 + c] = imm->u[c].Uint;
1296
1297 info->immd.type[n] = imm->Immediate.DataType;
1298 }
1299
1300 int Source::inferSysValDirection(unsigned sn) const
1301 {
1302 switch (sn) {
1303 case TGSI_SEMANTIC_INSTANCEID:
1304 case TGSI_SEMANTIC_VERTEXID:
1305 return 1;
1306 case TGSI_SEMANTIC_LAYER:
1307 #if 0
1308 case TGSI_SEMANTIC_VIEWPORTINDEX:
1309 return 0;
1310 #endif
1311 case TGSI_SEMANTIC_PRIMID:
1312 return (info->type == PIPE_SHADER_FRAGMENT) ? 1 : 0;
1313 default:
1314 return 0;
1315 }
1316 }
1317
1318 bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
1319 {
1320 unsigned i, c;
1321 unsigned sn = TGSI_SEMANTIC_GENERIC;
1322 unsigned si = 0;
1323 const unsigned first = decl->Range.First, last = decl->Range.Last;
1324 const int arrayId = decl->Array.ArrayID;
1325
1326 if (decl->Declaration.Semantic) {
1327 sn = decl->Semantic.Name;
1328 si = decl->Semantic.Index;
1329 }
1330
1331 if (decl->Declaration.Local || decl->Declaration.File == TGSI_FILE_ADDRESS) {
1332 for (i = first; i <= last; ++i) {
1333 for (c = 0; c < 4; ++c) {
1334 locals.insert(
1335 Location(decl->Declaration.File, decl->Dim.Index2D, i, c));
1336 }
1337 }
1338 }
1339
1340 switch (decl->Declaration.File) {
1341 case TGSI_FILE_INPUT:
1342 if (info->type == PIPE_SHADER_VERTEX) {
1343 // all vertex attributes are equal
1344 for (i = first; i <= last; ++i) {
1345 info->in[i].sn = TGSI_SEMANTIC_GENERIC;
1346 info->in[i].si = i;
1347 }
1348 } else {
1349 for (i = first; i <= last; ++i, ++si) {
1350 info->in[i].id = i;
1351 info->in[i].sn = sn;
1352 info->in[i].si = si;
1353 if (info->type == PIPE_SHADER_FRAGMENT) {
1354 // translate interpolation mode
1355 switch (decl->Interp.Interpolate) {
1356 case TGSI_INTERPOLATE_CONSTANT:
1357 info->in[i].flat = 1;
1358 break;
1359 case TGSI_INTERPOLATE_COLOR:
1360 info->in[i].sc = 1;
1361 break;
1362 case TGSI_INTERPOLATE_LINEAR:
1363 info->in[i].linear = 1;
1364 break;
1365 default:
1366 break;
1367 }
1368 if (decl->Interp.Location)
1369 info->in[i].centroid = 1;
1370 }
1371
1372 if (sn == TGSI_SEMANTIC_PATCH)
1373 info->in[i].patch = 1;
1374 if (sn == TGSI_SEMANTIC_PATCH)
1375 info->numPatchConstants = MAX2(info->numPatchConstants, si + 1);
1376 }
1377 }
1378 break;
1379 case TGSI_FILE_OUTPUT:
1380 for (i = first; i <= last; ++i, ++si) {
1381 switch (sn) {
1382 case TGSI_SEMANTIC_POSITION:
1383 if (info->type == PIPE_SHADER_FRAGMENT)
1384 info->io.fragDepth = i;
1385 else
1386 if (clipVertexOutput < 0)
1387 clipVertexOutput = i;
1388 break;
1389 case TGSI_SEMANTIC_COLOR:
1390 if (info->type == PIPE_SHADER_FRAGMENT)
1391 info->prop.fp.numColourResults++;
1392 break;
1393 case TGSI_SEMANTIC_EDGEFLAG:
1394 info->io.edgeFlagOut = i;
1395 break;
1396 case TGSI_SEMANTIC_CLIPVERTEX:
1397 clipVertexOutput = i;
1398 break;
1399 case TGSI_SEMANTIC_CLIPDIST:
1400 info->io.genUserClip = -1;
1401 break;
1402 case TGSI_SEMANTIC_SAMPLEMASK:
1403 info->io.sampleMask = i;
1404 break;
1405 case TGSI_SEMANTIC_VIEWPORT_INDEX:
1406 info->io.viewportId = i;
1407 break;
1408 case TGSI_SEMANTIC_PATCH:
1409 info->numPatchConstants = MAX2(info->numPatchConstants, si + 1);
1410 /* fallthrough */
1411 case TGSI_SEMANTIC_TESSOUTER:
1412 case TGSI_SEMANTIC_TESSINNER:
1413 info->out[i].patch = 1;
1414 break;
1415 default:
1416 break;
1417 }
1418 info->out[i].id = i;
1419 info->out[i].sn = sn;
1420 info->out[i].si = si;
1421 }
1422 break;
1423 case TGSI_FILE_SYSTEM_VALUE:
1424 switch (sn) {
1425 case TGSI_SEMANTIC_INSTANCEID:
1426 info->io.instanceId = first;
1427 break;
1428 case TGSI_SEMANTIC_VERTEXID:
1429 info->io.vertexId = first;
1430 break;
1431 case TGSI_SEMANTIC_BASEVERTEX:
1432 case TGSI_SEMANTIC_BASEINSTANCE:
1433 case TGSI_SEMANTIC_DRAWID:
1434 info->prop.vp.usesDrawParameters = true;
1435 break;
1436 case TGSI_SEMANTIC_SAMPLEID:
1437 case TGSI_SEMANTIC_SAMPLEPOS:
1438 info->prop.fp.persampleInvocation = true;
1439 break;
1440 case TGSI_SEMANTIC_SAMPLEMASK:
1441 info->prop.fp.usesSampleMaskIn = true;
1442 break;
1443 default:
1444 break;
1445 }
1446 for (i = first; i <= last; ++i, ++si) {
1447 info->sv[i].sn = sn;
1448 info->sv[i].si = si;
1449 info->sv[i].input = inferSysValDirection(sn);
1450
1451 switch (sn) {
1452 case TGSI_SEMANTIC_TESSOUTER:
1453 case TGSI_SEMANTIC_TESSINNER:
1454 info->sv[i].patch = 1;
1455 break;
1456 }
1457 }
1458 break;
1459 /*
1460 case TGSI_FILE_RESOURCE:
1461 for (i = first; i <= last; ++i) {
1462 resources[i].target = decl->Resource.Resource;
1463 resources[i].raw = decl->Resource.Raw;
1464 resources[i].slot = i;
1465 }
1466 break;
1467 */
1468 case TGSI_FILE_SAMPLER_VIEW:
1469 for (i = first; i <= last; ++i)
1470 textureViews[i].target = decl->SamplerView.Resource;
1471 break;
1472 case TGSI_FILE_MEMORY:
1473 for (i = first; i <= last; ++i)
1474 memoryFiles[i].mem_type = decl->Declaration.MemType;
1475 break;
1476 case TGSI_FILE_NULL:
1477 case TGSI_FILE_TEMPORARY:
1478 for (i = first; i <= last; ++i)
1479 tempArrayId[i] = arrayId;
1480 if (arrayId)
1481 tempArrayInfo.insert(std::make_pair(arrayId, std::make_pair(
1482 first, last - first + 1)));
1483 break;
1484 case TGSI_FILE_ADDRESS:
1485 case TGSI_FILE_CONSTANT:
1486 case TGSI_FILE_IMMEDIATE:
1487 case TGSI_FILE_SAMPLER:
1488 case TGSI_FILE_BUFFER:
1489 case TGSI_FILE_IMAGE:
1490 break;
1491 default:
1492 ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
1493 return false;
1494 }
1495 return true;
1496 }
1497
1498 inline bool Source::isEdgeFlagPassthrough(const Instruction& insn) const
1499 {
1500 return insn.getOpcode() == TGSI_OPCODE_MOV &&
1501 insn.getDst(0).getIndex(0) == info->io.edgeFlagOut &&
1502 insn.getSrc(0).getFile() == TGSI_FILE_INPUT;
1503 }
1504
1505 void Source::scanInstructionSrc(const Instruction& insn,
1506 const Instruction::SrcRegister& src,
1507 unsigned mask)
1508 {
1509 if (src.getFile() == TGSI_FILE_TEMPORARY) {
1510 if (src.isIndirect(0))
1511 indirectTempArrays.insert(src.getArrayId());
1512 } else
1513 if (src.getFile() == TGSI_FILE_OUTPUT) {
1514 if (src.isIndirect(0)) {
1515 // We don't know which one is accessed, just mark everything for
1516 // reading. This is an extremely unlikely occurrence.
1517 for (unsigned i = 0; i < info->numOutputs; ++i)
1518 info->out[i].oread = 1;
1519 } else {
1520 info->out[src.getIndex(0)].oread = 1;
1521 }
1522 }
1523 if (src.getFile() == TGSI_FILE_SYSTEM_VALUE) {
1524 if (info->sv[src.getIndex(0)].sn == TGSI_SEMANTIC_SAMPLEPOS)
1525 info->prop.fp.readsSampleLocations = true;
1526 }
1527 if (src.getFile() != TGSI_FILE_INPUT)
1528 return;
1529
1530 if (src.isIndirect(0)) {
1531 for (unsigned i = 0; i < info->numInputs; ++i)
1532 info->in[i].mask = 0xf;
1533 } else {
1534 const int i = src.getIndex(0);
1535 for (unsigned c = 0; c < 4; ++c) {
1536 if (!(mask & (1 << c)))
1537 continue;
1538 int k = src.getSwizzle(c);
1539 if (k <= TGSI_SWIZZLE_W)
1540 info->in[i].mask |= 1 << k;
1541 }
1542 switch (info->in[i].sn) {
1543 case TGSI_SEMANTIC_PSIZE:
1544 case TGSI_SEMANTIC_PRIMID:
1545 case TGSI_SEMANTIC_FOG:
1546 info->in[i].mask &= 0x1;
1547 break;
1548 case TGSI_SEMANTIC_PCOORD:
1549 info->in[i].mask &= 0x3;
1550 break;
1551 default:
1552 break;
1553 }
1554 }
1555 }
1556
1557 bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
1558 {
1559 Instruction insn(inst);
1560
1561 if (insn.getOpcode() == TGSI_OPCODE_BARRIER)
1562 info->numBarriers = 1;
1563
1564 if (insn.getOpcode() == TGSI_OPCODE_FBFETCH)
1565 info->prop.fp.readsFramebuffer = true;
1566
1567 if (insn.getOpcode() == TGSI_OPCODE_INTERP_SAMPLE)
1568 info->prop.fp.readsSampleLocations = true;
1569
1570 if (insn.dstCount()) {
1571 Instruction::DstRegister dst = insn.getDst(0);
1572
1573 if (insn.getOpcode() == TGSI_OPCODE_STORE &&
1574 dst.getFile() != TGSI_FILE_MEMORY) {
1575 info->io.globalAccess |= 0x2;
1576 }
1577
1578 if (dst.getFile() == TGSI_FILE_OUTPUT) {
1579 if (dst.isIndirect(0))
1580 for (unsigned i = 0; i < info->numOutputs; ++i)
1581 info->out[i].mask = 0xf;
1582 else
1583 info->out[dst.getIndex(0)].mask |= dst.getMask();
1584
1585 if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE ||
1586 info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PRIMID ||
1587 info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_LAYER ||
1588 info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_VIEWPORT_INDEX ||
1589 info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_FOG)
1590 info->out[dst.getIndex(0)].mask &= 1;
1591
1592 if (isEdgeFlagPassthrough(insn))
1593 info->io.edgeFlagIn = insn.getSrc(0).getIndex(0);
1594 } else
1595 if (dst.getFile() == TGSI_FILE_TEMPORARY) {
1596 if (dst.isIndirect(0))
1597 indirectTempArrays.insert(dst.getArrayId());
1598 } else
1599 if (dst.getFile() == TGSI_FILE_BUFFER ||
1600 dst.getFile() == TGSI_FILE_IMAGE ||
1601 (dst.getFile() == TGSI_FILE_MEMORY &&
1602 memoryFiles[dst.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) {
1603 info->io.globalAccess |= 0x2;
1604 }
1605 }
1606
1607 if (insn.srcCount() && (
1608 insn.getSrc(0).getFile() != TGSI_FILE_MEMORY ||
1609 memoryFiles[insn.getSrc(0).getIndex(0)].mem_type ==
1610 TGSI_MEMORY_TYPE_GLOBAL)) {
1611 switch (insn.getOpcode()) {
1612 case TGSI_OPCODE_ATOMUADD:
1613 case TGSI_OPCODE_ATOMXCHG:
1614 case TGSI_OPCODE_ATOMCAS:
1615 case TGSI_OPCODE_ATOMAND:
1616 case TGSI_OPCODE_ATOMOR:
1617 case TGSI_OPCODE_ATOMXOR:
1618 case TGSI_OPCODE_ATOMUMIN:
1619 case TGSI_OPCODE_ATOMIMIN:
1620 case TGSI_OPCODE_ATOMUMAX:
1621 case TGSI_OPCODE_ATOMIMAX:
1622 case TGSI_OPCODE_LOAD:
1623 info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
1624 0x1 : 0x2;
1625 break;
1626 }
1627 }
1628
1629
1630 for (unsigned s = 0; s < insn.srcCount(); ++s)
1631 scanInstructionSrc(insn, insn.getSrc(s), insn.srcMask(s));
1632
1633 for (unsigned s = 0; s < insn.getNumTexOffsets(); ++s)
1634 scanInstructionSrc(insn, insn.getTexOffset(s), insn.texOffsetMask());
1635
1636 return true;
1637 }
1638
1639 nv50_ir::TexInstruction::Target
1640 Instruction::getTexture(const tgsi::Source *code, int s) const
1641 {
1642 // XXX: indirect access
1643 unsigned int r;
1644
1645 switch (getSrc(s).getFile()) {
1646 /*
1647 case TGSI_FILE_RESOURCE:
1648 r = getSrc(s).getIndex(0);
1649 return translateTexture(code->resources.at(r).target);
1650 */
1651 case TGSI_FILE_SAMPLER_VIEW:
1652 r = getSrc(s).getIndex(0);
1653 return translateTexture(code->textureViews.at(r).target);
1654 default:
1655 return translateTexture(insn->Texture.Texture);
1656 }
1657 }
1658
1659 } // namespace tgsi
1660
1661 namespace {
1662
1663 using namespace nv50_ir;
1664
1665 class Converter : public BuildUtil
1666 {
1667 public:
1668 Converter(Program *, const tgsi::Source *);
1669 ~Converter();
1670
1671 bool run();
1672
1673 private:
1674 struct Subroutine
1675 {
1676 Subroutine(Function *f) : f(f) { }
1677 Function *f;
1678 ValueMap values;
1679 };
1680
1681 Value *shiftAddress(Value *);
1682 Value *getVertexBase(int s);
1683 Value *getOutputBase(int s);
1684 DataArray *getArrayForFile(unsigned file, int idx);
1685 Value *fetchSrc(int s, int c);
1686 Value *fetchDst(int d, int c);
1687 Value *acquireDst(int d, int c);
1688 void storeDst(int d, int c, Value *);
1689
1690 Value *fetchSrc(const tgsi::Instruction::SrcRegister src, int c, Value *ptr);
1691 void storeDst(const tgsi::Instruction::DstRegister dst, int c,
1692 Value *val, Value *ptr);
1693
1694 void adjustTempIndex(int arrayId, int &idx, int &idx2d) const;
1695 Value *applySrcMod(Value *, int s, int c);
1696
1697 Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr);
1698 Symbol *srcToSym(tgsi::Instruction::SrcRegister, int c);
1699 Symbol *dstToSym(tgsi::Instruction::DstRegister, int c);
1700
1701 bool isSubGroupMask(uint8_t semantic);
1702
1703 bool handleInstruction(const struct tgsi_full_instruction *);
1704 void exportOutputs();
1705 inline Subroutine *getSubroutine(unsigned ip);
1706 inline Subroutine *getSubroutine(Function *);
1707 inline bool isEndOfSubroutine(uint ip);
1708
1709 void loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask);
1710
1711 // R,S,L,C,Dx,Dy encode TGSI sources for respective values (0xSf for auto)
1712 void setTexRS(TexInstruction *, unsigned int& s, int R, int S);
1713 void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy);
1714 void handleTXF(Value *dst0[4], int R, int L_M);
1715 void handleTXQ(Value *dst0[4], enum TexQuery, int R);
1716 void handleFBFETCH(Value *dst0[4]);
1717 void handleLIT(Value *dst0[4]);
1718 void handleUserClipPlanes();
1719
1720 // Symbol *getResourceBase(int r);
1721 void getImageCoords(std::vector<Value *>&, int s);
1722
1723 void handleLOAD(Value *dst0[4]);
1724 void handleSTORE();
1725 void handleATOM(Value *dst0[4], DataType, uint16_t subOp);
1726
1727 void handleINTERP(Value *dst0[4]);
1728
1729 uint8_t translateInterpMode(const struct nv50_ir_varying *var,
1730 operation& op);
1731 Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr);
1732
1733 void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
1734
1735 Value *buildDot(int dim);
1736
1737 class BindArgumentsPass : public Pass {
1738 public:
1739 BindArgumentsPass(Converter &conv) : conv(conv) { }
1740
1741 private:
1742 Converter &conv;
1743 Subroutine *sub;
1744
1745 inline const Location *getValueLocation(Subroutine *, Value *);
1746
1747 template<typename T> inline void
1748 updateCallArgs(Instruction *i, void (Instruction::*setArg)(int, Value *),
1749 T (Function::*proto));
1750
1751 template<typename T> inline void
1752 updatePrototype(BitSet *set, void (Function::*updateSet)(),
1753 T (Function::*proto));
1754
1755 protected:
1756 bool visit(Function *);
1757 bool visit(BasicBlock *bb) { return false; }
1758 };
1759
1760 private:
1761 const tgsi::Source *code;
1762 const struct nv50_ir_prog_info *info;
1763
1764 struct {
1765 std::map<unsigned, Subroutine> map;
1766 Subroutine *cur;
1767 } sub;
1768
1769 uint ip; // instruction pointer
1770
1771 tgsi::Instruction tgsi;
1772
1773 DataType dstTy;
1774 DataType srcTy;
1775
1776 DataArray tData; // TGSI_FILE_TEMPORARY
1777 DataArray lData; // TGSI_FILE_TEMPORARY, for indirect arrays
1778 DataArray aData; // TGSI_FILE_ADDRESS
1779 DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers)
1780
1781 Value *zero;
1782 Value *fragCoord[4];
1783 Value *clipVtx[4];
1784
1785 Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP)
1786 uint8_t vtxBaseValid;
1787
1788 Value *outBase; // base address of vertex out patch (for TCP)
1789
1790 Stack condBBs; // fork BB, then else clause BB
1791 Stack joinBBs; // fork BB, for inserting join ops on ENDIF
1792 Stack loopBBs; // loop headers
1793 Stack breakBBs; // end of / after loop
1794
1795 Value *viewport;
1796 };
1797
1798 Symbol *
1799 Converter::srcToSym(tgsi::Instruction::SrcRegister src, int c)
1800 {
1801 const int swz = src.getSwizzle(c);
1802
1803 /* TODO: Use Array ID when it's available for the index */
1804 return makeSym(src.getFile(),
1805 src.is2D() ? src.getIndex(1) : 0,
1806 src.getIndex(0), swz,
1807 src.getIndex(0) * 16 + swz * 4);
1808 }
1809
1810 Symbol *
1811 Converter::dstToSym(tgsi::Instruction::DstRegister dst, int c)
1812 {
1813 /* TODO: Use Array ID when it's available for the index */
1814 return makeSym(dst.getFile(),
1815 dst.is2D() ? dst.getIndex(1) : 0,
1816 dst.getIndex(0), c,
1817 dst.getIndex(0) * 16 + c * 4);
1818 }
1819
1820 Symbol *
1821 Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address)
1822 {
1823 Symbol *sym = new_Symbol(prog, tgsi::translateFile(tgsiFile));
1824
1825 sym->reg.fileIndex = fileIdx;
1826
1827 if (tgsiFile == TGSI_FILE_MEMORY) {
1828 switch (code->memoryFiles[fileIdx].mem_type) {
1829 case TGSI_MEMORY_TYPE_GLOBAL:
1830 /* No-op this is the default for TGSI_FILE_MEMORY */
1831 sym->setFile(FILE_MEMORY_GLOBAL);
1832 break;
1833 case TGSI_MEMORY_TYPE_SHARED:
1834 sym->setFile(FILE_MEMORY_SHARED);
1835 break;
1836 case TGSI_MEMORY_TYPE_INPUT:
1837 assert(prog->getType() == Program::TYPE_COMPUTE);
1838 assert(idx == -1);
1839 sym->setFile(FILE_SHADER_INPUT);
1840 address += info->prop.cp.inputOffset;
1841 break;
1842 default:
1843 assert(0); /* TODO: Add support for global and private memory */
1844 }
1845 }
1846
1847 if (idx >= 0) {
1848 if (sym->reg.file == FILE_SHADER_INPUT)
1849 sym->setOffset(info->in[idx].slot[c] * 4);
1850 else
1851 if (sym->reg.file == FILE_SHADER_OUTPUT)
1852 sym->setOffset(info->out[idx].slot[c] * 4);
1853 else
1854 if (sym->reg.file == FILE_SYSTEM_VALUE)
1855 sym->setSV(tgsi::translateSysVal(info->sv[idx].sn), c);
1856 else
1857 sym->setOffset(address);
1858 } else {
1859 sym->setOffset(address);
1860 }
1861 return sym;
1862 }
1863
1864 uint8_t
1865 Converter::translateInterpMode(const struct nv50_ir_varying *var, operation& op)
1866 {
1867 uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
1868
1869 if (var->flat)
1870 mode = NV50_IR_INTERP_FLAT;
1871 else
1872 if (var->linear)
1873 mode = NV50_IR_INTERP_LINEAR;
1874 else
1875 if (var->sc)
1876 mode = NV50_IR_INTERP_SC;
1877
1878 op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC)
1879 ? OP_PINTERP : OP_LINTERP;
1880
1881 if (var->centroid)
1882 mode |= NV50_IR_INTERP_CENTROID;
1883
1884 return mode;
1885 }
1886
1887 Value *
1888 Converter::interpolate(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
1889 {
1890 operation op;
1891
1892 // XXX: no way to know interpolation mode if we don't know what's accessed
1893 const uint8_t mode = translateInterpMode(&info->in[ptr ? 0 :
1894 src.getIndex(0)], op);
1895
1896 Instruction *insn = new_Instruction(func, op, TYPE_F32);
1897
1898 insn->setDef(0, getScratch());
1899 insn->setSrc(0, srcToSym(src, c));
1900 if (op == OP_PINTERP)
1901 insn->setSrc(1, fragCoord[3]);
1902 if (ptr)
1903 insn->setIndirect(0, 0, ptr);
1904
1905 insn->setInterpolate(mode);
1906
1907 bb->insertTail(insn);
1908 return insn->getDef(0);
1909 }
1910
1911 Value *
1912 Converter::applySrcMod(Value *val, int s, int c)
1913 {
1914 Modifier m = tgsi.getSrc(s).getMod(c);
1915 DataType ty = tgsi.inferSrcType();
1916
1917 if (m & Modifier(NV50_IR_MOD_ABS))
1918 val = mkOp1v(OP_ABS, ty, getScratch(), val);
1919
1920 if (m & Modifier(NV50_IR_MOD_NEG))
1921 val = mkOp1v(OP_NEG, ty, getScratch(), val);
1922
1923 return val;
1924 }
1925
1926 Value *
1927 Converter::getVertexBase(int s)
1928 {
1929 assert(s < 5);
1930 if (!(vtxBaseValid & (1 << s))) {
1931 const int index = tgsi.getSrc(s).getIndex(1);
1932 Value *rel = NULL;
1933 if (tgsi.getSrc(s).isIndirect(1))
1934 rel = fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL);
1935 vtxBaseValid |= 1 << s;
1936 vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
1937 mkImm(index), rel);
1938 }
1939 return vtxBase[s];
1940 }
1941
1942 Value *
1943 Converter::getOutputBase(int s)
1944 {
1945 assert(s < 5);
1946 if (!(vtxBaseValid & (1 << s))) {
1947 Value *offset = loadImm(NULL, tgsi.getSrc(s).getIndex(1));
1948 if (tgsi.getSrc(s).isIndirect(1))
1949 offset = mkOp2v(OP_ADD, TYPE_U32, getSSA(),
1950 fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL),
1951 offset);
1952 vtxBaseValid |= 1 << s;
1953 vtxBase[s] = mkOp2v(OP_ADD, TYPE_U32, getSSA(), outBase, offset);
1954 }
1955 return vtxBase[s];
1956 }
1957
1958 Value *
1959 Converter::fetchSrc(int s, int c)
1960 {
1961 Value *res;
1962 Value *ptr = NULL, *dimRel = NULL;
1963
1964 tgsi::Instruction::SrcRegister src = tgsi.getSrc(s);
1965
1966 if (src.isIndirect(0))
1967 ptr = fetchSrc(src.getIndirect(0), 0, NULL);
1968
1969 if (src.is2D()) {
1970 switch (src.getFile()) {
1971 case TGSI_FILE_OUTPUT:
1972 dimRel = getOutputBase(s);
1973 break;
1974 case TGSI_FILE_INPUT:
1975 dimRel = getVertexBase(s);
1976 break;
1977 case TGSI_FILE_CONSTANT:
1978 // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k]
1979 if (src.isIndirect(1))
1980 dimRel = fetchSrc(src.getIndirect(1), 0, 0);
1981 break;
1982 default:
1983 break;
1984 }
1985 }
1986
1987 res = fetchSrc(src, c, ptr);
1988
1989 if (dimRel)
1990 res->getInsn()->setIndirect(0, 1, dimRel);
1991
1992 return applySrcMod(res, s, c);
1993 }
1994
1995 Value *
1996 Converter::fetchDst(int d, int c)
1997 {
1998 Value *res;
1999 Value *ptr = NULL, *dimRel = NULL;
2000
2001 tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
2002
2003 if (dst.isIndirect(0))
2004 ptr = fetchSrc(dst.getIndirect(0), 0, NULL);
2005
2006 if (dst.is2D()) {
2007 switch (dst.getFile()) {
2008 case TGSI_FILE_OUTPUT:
2009 assert(0); // TODO
2010 dimRel = NULL;
2011 break;
2012 case TGSI_FILE_INPUT:
2013 assert(0); // TODO
2014 dimRel = NULL;
2015 break;
2016 case TGSI_FILE_CONSTANT:
2017 // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k]
2018 if (dst.isIndirect(1))
2019 dimRel = fetchSrc(dst.getIndirect(1), 0, 0);
2020 break;
2021 default:
2022 break;
2023 }
2024 }
2025
2026 struct tgsi_full_src_register fsr = dst.asSrc();
2027 tgsi::Instruction::SrcRegister src(&fsr);
2028 res = fetchSrc(src, c, ptr);
2029
2030 if (dimRel)
2031 res->getInsn()->setIndirect(0, 1, dimRel);
2032
2033 return res;
2034 }
2035
2036 Converter::DataArray *
2037 Converter::getArrayForFile(unsigned file, int idx)
2038 {
2039 switch (file) {
2040 case TGSI_FILE_TEMPORARY:
2041 return idx == 0 ? &tData : &lData;
2042 case TGSI_FILE_ADDRESS:
2043 return &aData;
2044 case TGSI_FILE_OUTPUT:
2045 assert(prog->getType() == Program::TYPE_FRAGMENT);
2046 return &oData;
2047 default:
2048 assert(!"invalid/unhandled TGSI source file");
2049 return NULL;
2050 }
2051 }
2052
2053 Value *
2054 Converter::shiftAddress(Value *index)
2055 {
2056 if (!index)
2057 return NULL;
2058 return mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), index, mkImm(4));
2059 }
2060
2061 void
2062 Converter::adjustTempIndex(int arrayId, int &idx, int &idx2d) const
2063 {
2064 std::map<int, int>::const_iterator it =
2065 code->indirectTempOffsets.find(arrayId);
2066 if (it == code->indirectTempOffsets.end())
2067 return;
2068
2069 idx2d = 1;
2070 idx += it->second;
2071 }
2072
2073 bool
2074 Converter::isSubGroupMask(uint8_t semantic)
2075 {
2076 switch (semantic) {
2077 case TGSI_SEMANTIC_SUBGROUP_EQ_MASK:
2078 case TGSI_SEMANTIC_SUBGROUP_LT_MASK:
2079 case TGSI_SEMANTIC_SUBGROUP_LE_MASK:
2080 case TGSI_SEMANTIC_SUBGROUP_GT_MASK:
2081 case TGSI_SEMANTIC_SUBGROUP_GE_MASK:
2082 return true;
2083 default:
2084 return false;
2085 }
2086 }
2087
2088 Value *
2089 Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
2090 {
2091 int idx2d = src.is2D() ? src.getIndex(1) : 0;
2092 int idx = src.getIndex(0);
2093 const int swz = src.getSwizzle(c);
2094 Instruction *ld;
2095
2096 switch (src.getFile()) {
2097 case TGSI_FILE_IMMEDIATE:
2098 assert(!ptr);
2099 return loadImm(NULL, info->immd.data[idx * 4 + swz]);
2100 case TGSI_FILE_CONSTANT:
2101 return mkLoadv(TYPE_U32, srcToSym(src, c), shiftAddress(ptr));
2102 case TGSI_FILE_INPUT:
2103 if (prog->getType() == Program::TYPE_FRAGMENT) {
2104 // don't load masked inputs, won't be assigned a slot
2105 if (!ptr && !(info->in[idx].mask & (1 << swz)))
2106 return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f);
2107 return interpolate(src, c, shiftAddress(ptr));
2108 } else
2109 if (prog->getType() == Program::TYPE_GEOMETRY) {
2110 if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_PRIMID)
2111 return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_PRIMITIVE_ID, 0));
2112 // XXX: This is going to be a problem with scalar arrays, i.e. when
2113 // we cannot assume that the address is given in units of vec4.
2114 //
2115 // nv50 and nvc0 need different things here, so let the lowering
2116 // passes decide what to do with the address
2117 if (ptr)
2118 return mkLoadv(TYPE_U32, srcToSym(src, c), ptr);
2119 }
2120 ld = mkLoad(TYPE_U32, getSSA(), srcToSym(src, c), shiftAddress(ptr));
2121 ld->perPatch = info->in[idx].patch;
2122 return ld->getDef(0);
2123 case TGSI_FILE_OUTPUT:
2124 assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL);
2125 ld = mkLoad(TYPE_U32, getSSA(), srcToSym(src, c), shiftAddress(ptr));
2126 ld->perPatch = info->out[idx].patch;
2127 return ld->getDef(0);
2128 case TGSI_FILE_SYSTEM_VALUE:
2129 assert(!ptr);
2130 if (info->sv[idx].sn == TGSI_SEMANTIC_THREAD_ID &&
2131 info->prop.cp.numThreads[swz] == 1)
2132 return loadImm(NULL, 0u);
2133 if (isSubGroupMask(info->sv[idx].sn) && swz > 0)
2134 return loadImm(NULL, 0u);
2135 if (info->sv[idx].sn == TGSI_SEMANTIC_SUBGROUP_SIZE)
2136 return loadImm(NULL, 32u);
2137 ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
2138 ld->perPatch = info->sv[idx].patch;
2139 return ld->getDef(0);
2140 case TGSI_FILE_TEMPORARY: {
2141 int arrayid = src.getArrayId();
2142 if (!arrayid)
2143 arrayid = code->tempArrayId[idx];
2144 adjustTempIndex(arrayid, idx, idx2d);
2145 }
2146 /* fallthrough */
2147 default:
2148 return getArrayForFile(src.getFile(), idx2d)->load(
2149 sub.cur->values, idx, swz, shiftAddress(ptr));
2150 }
2151 }
2152
2153 Value *
2154 Converter::acquireDst(int d, int c)
2155 {
2156 const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
2157 const unsigned f = dst.getFile();
2158 int idx = dst.getIndex(0);
2159 int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
2160
2161 if (dst.isMasked(c) || f == TGSI_FILE_BUFFER || f == TGSI_FILE_MEMORY ||
2162 f == TGSI_FILE_IMAGE)
2163 return NULL;
2164
2165 if (dst.isIndirect(0) ||
2166 f == TGSI_FILE_SYSTEM_VALUE ||
2167 (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT))
2168 return getScratch();
2169
2170 if (f == TGSI_FILE_TEMPORARY) {
2171 int arrayid = dst.getArrayId();
2172 if (!arrayid)
2173 arrayid = code->tempArrayId[idx];
2174 adjustTempIndex(arrayid, idx, idx2d);
2175 }
2176
2177 return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c);
2178 }
2179
2180 void
2181 Converter::storeDst(int d, int c, Value *val)
2182 {
2183 const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
2184
2185 if (tgsi.getSaturate()) {
2186 mkOp1(OP_SAT, dstTy, val, val);
2187 }
2188
2189 Value *ptr = NULL;
2190 if (dst.isIndirect(0))
2191 ptr = shiftAddress(fetchSrc(dst.getIndirect(0), 0, NULL));
2192
2193 if (info->io.genUserClip > 0 &&
2194 dst.getFile() == TGSI_FILE_OUTPUT &&
2195 !dst.isIndirect(0) && dst.getIndex(0) == code->clipVertexOutput) {
2196 mkMov(clipVtx[c], val);
2197 val = clipVtx[c];
2198 }
2199
2200 storeDst(dst, c, val, ptr);
2201 }
2202
2203 void
2204 Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,
2205 Value *val, Value *ptr)
2206 {
2207 const unsigned f = dst.getFile();
2208 int idx = dst.getIndex(0);
2209 int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
2210
2211 if (f == TGSI_FILE_SYSTEM_VALUE) {
2212 assert(!ptr);
2213 mkOp2(OP_WRSV, TYPE_U32, NULL, dstToSym(dst, c), val);
2214 } else
2215 if (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT) {
2216
2217 if (ptr || (info->out[idx].mask & (1 << c))) {
2218 /* Save the viewport index into a scratch register so that it can be
2219 exported at EMIT time */
2220 if (info->out[idx].sn == TGSI_SEMANTIC_VIEWPORT_INDEX &&
2221 prog->getType() == Program::TYPE_GEOMETRY &&
2222 viewport != NULL)
2223 mkOp1(OP_MOV, TYPE_U32, viewport, val);
2224 else
2225 mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val)->perPatch =
2226 info->out[idx].patch;
2227 }
2228 } else
2229 if (f == TGSI_FILE_TEMPORARY ||
2230 f == TGSI_FILE_ADDRESS ||
2231 f == TGSI_FILE_OUTPUT) {
2232 if (f == TGSI_FILE_TEMPORARY) {
2233 int arrayid = dst.getArrayId();
2234 if (!arrayid)
2235 arrayid = code->tempArrayId[idx];
2236 adjustTempIndex(arrayid, idx, idx2d);
2237 }
2238
2239 getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val);
2240 } else {
2241 assert(!"invalid dst file");
2242 }
2243 }
2244
2245 #define FOR_EACH_DST_ENABLED_CHANNEL(d, chan, inst) \
2246 for (chan = 0; chan < 4; ++chan) \
2247 if (!inst.getDst(d).isMasked(chan))
2248
2249 Value *
2250 Converter::buildDot(int dim)
2251 {
2252 assert(dim > 0);
2253
2254 Value *src0 = fetchSrc(0, 0), *src1 = fetchSrc(1, 0);
2255 Value *dotp = getScratch();
2256
2257 mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1)
2258 ->dnz = info->io.mul_zero_wins;
2259
2260 for (int c = 1; c < dim; ++c) {
2261 src0 = fetchSrc(0, c);
2262 src1 = fetchSrc(1, c);
2263 mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp)
2264 ->dnz = info->io.mul_zero_wins;
2265 }
2266 return dotp;
2267 }
2268
2269 void
2270 Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork)
2271 {
2272 FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL);
2273 join->fixed = 1;
2274 conv->insertHead(join);
2275
2276 assert(!fork->joinAt);
2277 fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv);
2278 fork->insertBefore(fork->getExit(), fork->joinAt);
2279 }
2280
2281 void
2282 Converter::setTexRS(TexInstruction *tex, unsigned int& s, int R, int S)
2283 {
2284 unsigned rIdx = 0, sIdx = 0;
2285
2286 if (R >= 0 && tgsi.getSrc(R).getFile() != TGSI_FILE_SAMPLER) {
2287 // This is the bindless case. We have to get the actual value and pass
2288 // it in. This will be the complete handle.
2289 tex->tex.rIndirectSrc = s;
2290 tex->setSrc(s++, fetchSrc(R, 0));
2291 tex->setTexture(tgsi.getTexture(code, R), 0xff, 0x1f);
2292 tex->tex.bindless = true;
2293 return;
2294 }
2295
2296 if (R >= 0)
2297 rIdx = tgsi.getSrc(R).getIndex(0);
2298 if (S >= 0)
2299 sIdx = tgsi.getSrc(S).getIndex(0);
2300
2301 tex->setTexture(tgsi.getTexture(code, R), rIdx, sIdx);
2302
2303 if (tgsi.getSrc(R).isIndirect(0)) {
2304 tex->tex.rIndirectSrc = s;
2305 tex->setSrc(s++, fetchSrc(tgsi.getSrc(R).getIndirect(0), 0, NULL));
2306 }
2307 if (S >= 0 && tgsi.getSrc(S).isIndirect(0)) {
2308 tex->tex.sIndirectSrc = s;
2309 tex->setSrc(s++, fetchSrc(tgsi.getSrc(S).getIndirect(0), 0, NULL));
2310 }
2311 }
2312
2313 void
2314 Converter::handleTXQ(Value *dst0[4], enum TexQuery query, int R)
2315 {
2316 TexInstruction *tex = new_TexInstruction(func, OP_TXQ);
2317 tex->tex.query = query;
2318 unsigned int c, d;
2319
2320 for (d = 0, c = 0; c < 4; ++c) {
2321 if (!dst0[c])
2322 continue;
2323 tex->tex.mask |= 1 << c;
2324 tex->setDef(d++, dst0[c]);
2325 }
2326 if (query == TXQ_DIMS)
2327 tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level
2328 else
2329 tex->setSrc((c = 0), zero);
2330
2331 setTexRS(tex, ++c, R, -1);
2332
2333 bb->insertTail(tex);
2334 }
2335
2336 void
2337 Converter::loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask)
2338 {
2339 Value *proj = fetchSrc(0, 3);
2340 Instruction *insn = proj->getUniqueInsn();
2341 int c;
2342
2343 if (insn->op == OP_PINTERP) {
2344 bb->insertTail(insn = cloneForward(func, insn));
2345 insn->op = OP_LINTERP;
2346 insn->setInterpolate(NV50_IR_INTERP_LINEAR | insn->getSampleMode());
2347 insn->setSrc(1, NULL);
2348 proj = insn->getDef(0);
2349 }
2350 proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), proj);
2351
2352 for (c = 0; c < 4; ++c) {
2353 if (!(mask & (1 << c)))
2354 continue;
2355 if ((insn = src[c]->getUniqueInsn())->op != OP_PINTERP)
2356 continue;
2357 mask &= ~(1 << c);
2358
2359 bb->insertTail(insn = cloneForward(func, insn));
2360 insn->setInterpolate(NV50_IR_INTERP_PERSPECTIVE | insn->getSampleMode());
2361 insn->setSrc(1, proj);
2362 dst[c] = insn->getDef(0);
2363 }
2364 if (!mask)
2365 return;
2366
2367 proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), fetchSrc(0, 3));
2368
2369 for (c = 0; c < 4; ++c)
2370 if (mask & (1 << c))
2371 dst[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), src[c], proj);
2372 }
2373
2374 // order of nv50 ir sources: x y z layer lod/bias shadow
2375 // order of TGSI TEX sources: x y z layer shadow lod/bias
2376 // lowering will finally set the hw specific order (like array first on nvc0)
2377 void
2378 Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy)
2379 {
2380 Value *arg[4], *src[8];
2381 Value *lod = NULL, *shd = NULL;
2382 unsigned int s, c, d;
2383 TexInstruction *texi = new_TexInstruction(func, tgsi.getOP());
2384
2385 TexInstruction::Target tgt = tgsi.getTexture(code, R);
2386
2387 for (s = 0; s < tgt.getArgCount(); ++s)
2388 arg[s] = src[s] = fetchSrc(0, s);
2389
2390 if (tgsi.getOpcode() == TGSI_OPCODE_TEX_LZ)
2391 lod = loadImm(NULL, 0);
2392 else if (texi->op == OP_TXL || texi->op == OP_TXB)
2393 lod = fetchSrc(L >> 4, L & 3);
2394
2395 if (C == 0x0f)
2396 C = 0x00 | MAX2(tgt.getArgCount(), 2); // guess DC src
2397
2398 if (tgsi.getOpcode() == TGSI_OPCODE_TG4 &&
2399 tgt == TEX_TARGET_CUBE_ARRAY_SHADOW)
2400 shd = fetchSrc(1, 0);
2401 else if (tgt.isShadow())
2402 shd = fetchSrc(C >> 4, C & 3);
2403
2404 if (texi->op == OP_TXD) {
2405 for (c = 0; c < tgt.getDim() + tgt.isCube(); ++c) {
2406 texi->dPdx[c].set(fetchSrc(Dx >> 4, (Dx & 3) + c));
2407 texi->dPdy[c].set(fetchSrc(Dy >> 4, (Dy & 3) + c));
2408 }
2409 }
2410
2411 // cube textures don't care about projection value, it's divided out
2412 if (tgsi.getOpcode() == TGSI_OPCODE_TXP && !tgt.isCube() && !tgt.isArray()) {
2413 unsigned int n = tgt.getDim();
2414 if (shd) {
2415 arg[n] = shd;
2416 ++n;
2417 assert(tgt.getDim() == tgt.getArgCount());
2418 }
2419 loadProjTexCoords(src, arg, (1 << n) - 1);
2420 if (shd)
2421 shd = src[n - 1];
2422 }
2423
2424 for (c = 0, d = 0; c < 4; ++c) {
2425 if (dst[c]) {
2426 texi->setDef(d++, dst[c]);
2427 texi->tex.mask |= 1 << c;
2428 } else {
2429 // NOTE: maybe hook up def too, for CSE
2430 }
2431 }
2432 for (s = 0; s < tgt.getArgCount(); ++s)
2433 texi->setSrc(s, src[s]);
2434 if (lod)
2435 texi->setSrc(s++, lod);
2436 if (shd)
2437 texi->setSrc(s++, shd);
2438
2439 setTexRS(texi, s, R, S);
2440
2441 if (tgsi.getOpcode() == TGSI_OPCODE_SAMPLE_C_LZ)
2442 texi->tex.levelZero = true;
2443 if (prog->getType() != Program::TYPE_FRAGMENT &&
2444 (tgsi.getOpcode() == TGSI_OPCODE_TEX ||
2445 tgsi.getOpcode() == TGSI_OPCODE_TEX2 ||
2446 tgsi.getOpcode() == TGSI_OPCODE_TXP))
2447 texi->tex.levelZero = true;
2448 if (tgsi.getOpcode() == TGSI_OPCODE_TG4 && !tgt.isShadow())
2449 texi->tex.gatherComp = tgsi.getSrc(1).getValueU32(0, info);
2450
2451 texi->tex.useOffsets = tgsi.getNumTexOffsets();
2452 for (s = 0; s < tgsi.getNumTexOffsets(); ++s) {
2453 for (c = 0; c < 3; ++c) {
2454 texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL));
2455 texi->offset[s][c].setInsn(texi);
2456 }
2457 }
2458
2459 bb->insertTail(texi);
2460 }
2461
2462 // 1st source: xyz = coordinates, w = lod/sample
2463 // 2nd source: offset
2464 void
2465 Converter::handleTXF(Value *dst[4], int R, int L_M)
2466 {
2467 TexInstruction *texi = new_TexInstruction(func, tgsi.getOP());
2468 int ms;
2469 unsigned int c, d, s;
2470
2471 texi->tex.target = tgsi.getTexture(code, R);
2472
2473 ms = texi->tex.target.isMS() ? 1 : 0;
2474 texi->tex.levelZero = ms; /* MS textures don't have mip-maps */
2475
2476 for (c = 0, d = 0; c < 4; ++c) {
2477 if (dst[c]) {
2478 texi->setDef(d++, dst[c]);
2479 texi->tex.mask |= 1 << c;
2480 }
2481 }
2482 for (c = 0; c < (texi->tex.target.getArgCount() - ms); ++c)
2483 texi->setSrc(c, fetchSrc(0, c));
2484 if (!ms && tgsi.getOpcode() == TGSI_OPCODE_TXF_LZ)
2485 texi->setSrc(c++, loadImm(NULL, 0));
2486 else
2487 texi->setSrc(c++, fetchSrc(L_M >> 4, L_M & 3)); // lod or ms
2488
2489 setTexRS(texi, c, R, -1);
2490
2491 texi->tex.useOffsets = tgsi.getNumTexOffsets();
2492 for (s = 0; s < tgsi.getNumTexOffsets(); ++s) {
2493 for (c = 0; c < 3; ++c) {
2494 texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL));
2495 texi->offset[s][c].setInsn(texi);
2496 }
2497 }
2498
2499 bb->insertTail(texi);
2500 }
2501
2502 void
2503 Converter::handleFBFETCH(Value *dst[4])
2504 {
2505 TexInstruction *texi = new_TexInstruction(func, OP_TXF);
2506 unsigned int c, d;
2507
2508 texi->tex.target = TEX_TARGET_2D_MS_ARRAY;
2509 texi->tex.levelZero = 1;
2510 texi->tex.useOffsets = 0;
2511
2512 for (c = 0, d = 0; c < 4; ++c) {
2513 if (dst[c]) {
2514 texi->setDef(d++, dst[c]);
2515 texi->tex.mask |= 1 << c;
2516 }
2517 }
2518
2519 Value *x = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 0));
2520 Value *y = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 1));
2521 Value *z = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_LAYER, 0));
2522 Value *ms = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_SAMPLE_INDEX, 0));
2523
2524 mkCvt(OP_CVT, TYPE_U32, x, TYPE_F32, x)->rnd = ROUND_Z;
2525 mkCvt(OP_CVT, TYPE_U32, y, TYPE_F32, y)->rnd = ROUND_Z;
2526 texi->setSrc(0, x);
2527 texi->setSrc(1, y);
2528 texi->setSrc(2, z);
2529 texi->setSrc(3, ms);
2530
2531 texi->tex.r = texi->tex.s = -1;
2532
2533 bb->insertTail(texi);
2534 }
2535
2536 void
2537 Converter::handleLIT(Value *dst0[4])
2538 {
2539 Value *val0 = NULL;
2540 unsigned int mask = tgsi.getDst(0).getMask();
2541
2542 if (mask & (1 << 0))
2543 loadImm(dst0[0], 1.0f);
2544
2545 if (mask & (1 << 3))
2546 loadImm(dst0[3], 1.0f);
2547
2548 if (mask & (3 << 1)) {
2549 val0 = getScratch();
2550 mkOp2(OP_MAX, TYPE_F32, val0, fetchSrc(0, 0), zero);
2551 if (mask & (1 << 1))
2552 mkMov(dst0[1], val0);
2553 }
2554
2555 if (mask & (1 << 2)) {
2556 Value *src1 = fetchSrc(0, 1), *src3 = fetchSrc(0, 3);
2557 Value *val1 = getScratch(), *val3 = getScratch();
2558
2559 Value *pos128 = loadImm(NULL, +127.999999f);
2560 Value *neg128 = loadImm(NULL, -127.999999f);
2561
2562 mkOp2(OP_MAX, TYPE_F32, val1, src1, zero);
2563 mkOp2(OP_MAX, TYPE_F32, val3, src3, neg128);
2564 mkOp2(OP_MIN, TYPE_F32, val3, val3, pos128);
2565 mkOp2(OP_POW, TYPE_F32, val3, val1, val3);
2566
2567 mkCmp(OP_SLCT, CC_GT, TYPE_F32, dst0[2], TYPE_F32, val3, zero, val0);
2568 }
2569 }
2570
2571 /* Keep this around for now as reference when adding img support
2572 static inline bool
2573 isResourceSpecial(const int r)
2574 {
2575 return (r == TGSI_RESOURCE_GLOBAL ||
2576 r == TGSI_RESOURCE_LOCAL ||
2577 r == TGSI_RESOURCE_PRIVATE ||
2578 r == TGSI_RESOURCE_INPUT);
2579 }
2580
2581 static inline bool
2582 isResourceRaw(const tgsi::Source *code, const int r)
2583 {
2584 return isResourceSpecial(r) || code->resources[r].raw;
2585 }
2586
2587 static inline nv50_ir::TexTarget
2588 getResourceTarget(const tgsi::Source *code, int r)
2589 {
2590 if (isResourceSpecial(r))
2591 return nv50_ir::TEX_TARGET_BUFFER;
2592 return tgsi::translateTexture(code->resources.at(r).target);
2593 }
2594
2595 Symbol *
2596 Converter::getResourceBase(const int r)
2597 {
2598 Symbol *sym = NULL;
2599
2600 switch (r) {
2601 case TGSI_RESOURCE_GLOBAL:
2602 sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL,
2603 info->io.auxCBSlot);
2604 break;
2605 case TGSI_RESOURCE_LOCAL:
2606 assert(prog->getType() == Program::TYPE_COMPUTE);
2607 sym = mkSymbol(nv50_ir::FILE_MEMORY_SHARED, 0, TYPE_U32,
2608 info->prop.cp.sharedOffset);
2609 break;
2610 case TGSI_RESOURCE_PRIVATE:
2611 sym = mkSymbol(nv50_ir::FILE_MEMORY_LOCAL, 0, TYPE_U32,
2612 info->bin.tlsSpace);
2613 break;
2614 case TGSI_RESOURCE_INPUT:
2615 assert(prog->getType() == Program::TYPE_COMPUTE);
2616 sym = mkSymbol(nv50_ir::FILE_SHADER_INPUT, 0, TYPE_U32,
2617 info->prop.cp.inputOffset);
2618 break;
2619 default:
2620 sym = new_Symbol(prog,
2621 nv50_ir::FILE_MEMORY_GLOBAL, code->resources.at(r).slot);
2622 break;
2623 }
2624 return sym;
2625 }
2626
2627 void
2628 Converter::getResourceCoords(std::vector<Value *> &coords, int r, int s)
2629 {
2630 const int arg =
2631 TexInstruction::Target(getResourceTarget(code, r)).getArgCount();
2632
2633 for (int c = 0; c < arg; ++c)
2634 coords.push_back(fetchSrc(s, c));
2635
2636 // NOTE: TGSI_RESOURCE_GLOBAL needs FILE_GPR; this is an nv50 quirk
2637 if (r == TGSI_RESOURCE_LOCAL ||
2638 r == TGSI_RESOURCE_PRIVATE ||
2639 r == TGSI_RESOURCE_INPUT)
2640 coords[0] = mkOp1v(OP_MOV, TYPE_U32, getScratch(4, FILE_ADDRESS),
2641 coords[0]);
2642 }
2643
2644 static inline int
2645 partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask)
2646 {
2647 int n = 0;
2648
2649 while (mask) {
2650 if (mask & 1) {
2651 size[n]++;
2652 } else {
2653 if (size[n])
2654 comp[n = 1] = size[0] + 1;
2655 else
2656 comp[n]++;
2657 }
2658 mask >>= 1;
2659 }
2660 if (size[0] == 3) {
2661 n = 1;
2662 size[0] = (comp[0] == 1) ? 1 : 2;
2663 size[1] = 3 - size[0];
2664 comp[1] = comp[0] + size[0];
2665 }
2666 return n + 1;
2667 }
2668 */
2669 void
2670 Converter::getImageCoords(std::vector<Value *> &coords, int s)
2671 {
2672 TexInstruction::Target t =
2673 TexInstruction::Target(tgsi.getImageTarget());
2674 const int arg = t.getDim() + (t.isArray() || t.isCube());
2675
2676 for (int c = 0; c < arg; ++c)
2677 coords.push_back(fetchSrc(s, c));
2678
2679 if (t.isMS())
2680 coords.push_back(fetchSrc(s, 3));
2681 }
2682
2683 // For raw loads, granularity is 4 byte.
2684 // Usage of the texture read mask on OP_SULDP is not allowed.
2685 void
2686 Converter::handleLOAD(Value *dst0[4])
2687 {
2688 const int r = tgsi.getSrc(0).getIndex(0);
2689 int c;
2690 std::vector<Value *> off, src, ldv, def;
2691 Value *ind = NULL;
2692
2693 if (tgsi.getSrc(0).isIndirect(0))
2694 ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
2695
2696 switch (tgsi.getSrc(0).getFile()) {
2697 case TGSI_FILE_BUFFER:
2698 case TGSI_FILE_MEMORY:
2699 for (c = 0; c < 4; ++c) {
2700 if (!dst0[c])
2701 continue;
2702
2703 Value *off;
2704 Symbol *sym;
2705 uint32_t src0_component_offset = tgsi.getSrc(0).getSwizzle(c) * 4;
2706
2707 if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) {
2708 off = NULL;
2709 sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
2710 tgsi.getSrc(1).getValueU32(0, info) +
2711 src0_component_offset);
2712 } else {
2713 // yzw are ignored for buffers
2714 off = fetchSrc(1, 0);
2715 sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
2716 src0_component_offset);
2717 }
2718
2719 Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off);
2720 ld->cache = tgsi.getCacheMode();
2721 if (ind)
2722 ld->setIndirect(0, 1, ind);
2723 }
2724 break;
2725 default: {
2726 getImageCoords(off, 1);
2727 def.resize(4);
2728
2729 for (c = 0; c < 4; ++c) {
2730 if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c))
2731 def[c] = getScratch();
2732 else
2733 def[c] = dst0[c];
2734 }
2735
2736 bool bindless = tgsi.getSrc(0).getFile() != TGSI_FILE_IMAGE;
2737 if (bindless)
2738 ind = fetchSrc(0, 0);
2739
2740 TexInstruction *ld =
2741 mkTex(OP_SULDP, tgsi.getImageTarget(), 0, 0, def, off);
2742 ld->tex.mask = tgsi.getDst(0).getMask();
2743 ld->tex.format = tgsi.getImageFormat();
2744 ld->cache = tgsi.getCacheMode();
2745 ld->tex.bindless = bindless;
2746 if (!bindless)
2747 ld->tex.r = r;
2748 if (ind)
2749 ld->setIndirectR(ind);
2750
2751 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2752 if (dst0[c] != def[c])
2753 mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
2754 break;
2755 }
2756 }
2757
2758
2759 /* Keep this around for now as reference when adding img support
2760 getResourceCoords(off, r, 1);
2761
2762 if (isResourceRaw(code, r)) {
2763 uint8_t mask = 0;
2764 uint8_t comp[2] = { 0, 0 };
2765 uint8_t size[2] = { 0, 0 };
2766
2767 Symbol *base = getResourceBase(r);
2768
2769 // determine the base and size of the at most 2 load ops
2770 for (c = 0; c < 4; ++c)
2771 if (!tgsi.getDst(0).isMasked(c))
2772 mask |= 1 << (tgsi.getSrc(0).getSwizzle(c) - TGSI_SWIZZLE_X);
2773
2774 int n = partitionLoadStore(comp, size, mask);
2775
2776 src = off;
2777
2778 def.resize(4); // index by component, the ones we need will be non-NULL
2779 for (c = 0; c < 4; ++c) {
2780 if (dst0[c] && tgsi.getSrc(0).getSwizzle(c) == (TGSI_SWIZZLE_X + c))
2781 def[c] = dst0[c];
2782 else
2783 if (mask & (1 << c))
2784 def[c] = getScratch();
2785 }
2786
2787 const bool useLd = isResourceSpecial(r) ||
2788 (info->io.nv50styleSurfaces &&
2789 code->resources[r].target == TGSI_TEXTURE_BUFFER);
2790
2791 for (int i = 0; i < n; ++i) {
2792 ldv.assign(def.begin() + comp[i], def.begin() + comp[i] + size[i]);
2793
2794 if (comp[i]) // adjust x component of source address if necessary
2795 src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),
2796 off[0], mkImm(comp[i] * 4));
2797 else
2798 src[0] = off[0];
2799
2800 if (useLd) {
2801 Instruction *ld =
2802 mkLoad(typeOfSize(size[i] * 4), ldv[0], base, src[0]);
2803 for (size_t c = 1; c < ldv.size(); ++c)
2804 ld->setDef(c, ldv[c]);
2805 } else {
2806 mkTex(OP_SULDB, getResourceTarget(code, r), code->resources[r].slot,
2807 0, ldv, src)->dType = typeOfSize(size[i] * 4);
2808 }
2809 }
2810 } else {
2811 def.resize(4);
2812 for (c = 0; c < 4; ++c) {
2813 if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c))
2814 def[c] = getScratch();
2815 else
2816 def[c] = dst0[c];
2817 }
2818
2819 mkTex(OP_SULDP, getResourceTarget(code, r), code->resources[r].slot, 0,
2820 def, off);
2821 }
2822 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2823 if (dst0[c] != def[c])
2824 mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
2825 */
2826 }
2827
2828 // For formatted stores, the write mask on OP_SUSTP can be used.
2829 // Raw stores have to be split.
2830 void
2831 Converter::handleSTORE()
2832 {
2833 const int r = tgsi.getDst(0).getIndex(0);
2834 int c;
2835 std::vector<Value *> off, src, dummy;
2836 Value *ind = NULL;
2837
2838 if (tgsi.getDst(0).isIndirect(0))
2839 ind = fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0);
2840
2841 switch (tgsi.getDst(0).getFile()) {
2842 case TGSI_FILE_BUFFER:
2843 case TGSI_FILE_MEMORY:
2844 for (c = 0; c < 4; ++c) {
2845 if (!(tgsi.getDst(0).getMask() & (1 << c)))
2846 continue;
2847
2848 Symbol *sym;
2849 Value *off;
2850 if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMMEDIATE) {
2851 off = NULL;
2852 sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c,
2853 tgsi.getSrc(0).getValueU32(0, info) + 4 * c);
2854 } else {
2855 // yzw are ignored for buffers
2856 off = fetchSrc(0, 0);
2857 sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c, 4 * c);
2858 }
2859
2860 Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c));
2861 st->cache = tgsi.getCacheMode();
2862 if (ind)
2863 st->setIndirect(0, 1, ind);
2864 }
2865 break;
2866 default: {
2867 getImageCoords(off, 0);
2868 src = off;
2869
2870 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2871 src.push_back(fetchSrc(1, c));
2872
2873 bool bindless = tgsi.getDst(0).getFile() != TGSI_FILE_IMAGE;
2874 if (bindless)
2875 ind = fetchDst(0, 0);
2876
2877 TexInstruction *st =
2878 mkTex(OP_SUSTP, tgsi.getImageTarget(), 0, 0, dummy, src);
2879 st->tex.mask = tgsi.getDst(0).getMask();
2880 st->tex.format = tgsi.getImageFormat();
2881 st->cache = tgsi.getCacheMode();
2882 st->tex.bindless = bindless;
2883 if (!bindless)
2884 st->tex.r = r;
2885 if (ind)
2886 st->setIndirectR(ind);
2887
2888 break;
2889 }
2890 }
2891
2892 /* Keep this around for now as reference when adding img support
2893 getResourceCoords(off, r, 0);
2894 src = off;
2895 const int s = src.size();
2896
2897 if (isResourceRaw(code, r)) {
2898 uint8_t comp[2] = { 0, 0 };
2899 uint8_t size[2] = { 0, 0 };
2900
2901 int n = partitionLoadStore(comp, size, tgsi.getDst(0).getMask());
2902
2903 Symbol *base = getResourceBase(r);
2904
2905 const bool useSt = isResourceSpecial(r) ||
2906 (info->io.nv50styleSurfaces &&
2907 code->resources[r].target == TGSI_TEXTURE_BUFFER);
2908
2909 for (int i = 0; i < n; ++i) {
2910 if (comp[i]) // adjust x component of source address if necessary
2911 src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),
2912 off[0], mkImm(comp[i] * 4));
2913 else
2914 src[0] = off[0];
2915
2916 const DataType stTy = typeOfSize(size[i] * 4);
2917
2918 if (useSt) {
2919 Instruction *st =
2920 mkStore(OP_STORE, stTy, base, NULL, fetchSrc(1, comp[i]));
2921 for (c = 1; c < size[i]; ++c)
2922 st->setSrc(1 + c, fetchSrc(1, comp[i] + c));
2923 st->setIndirect(0, 0, src[0]);
2924 } else {
2925 // attach values to be stored
2926 src.resize(s + size[i]);
2927 for (c = 0; c < size[i]; ++c)
2928 src[s + c] = fetchSrc(1, comp[i] + c);
2929 mkTex(OP_SUSTB, getResourceTarget(code, r), code->resources[r].slot,
2930 0, dummy, src)->setType(stTy);
2931 }
2932 }
2933 } else {
2934 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2935 src.push_back(fetchSrc(1, c));
2936
2937 mkTex(OP_SUSTP, getResourceTarget(code, r), code->resources[r].slot, 0,
2938 dummy, src)->tex.mask = tgsi.getDst(0).getMask();
2939 }
2940 */
2941 }
2942
2943 // XXX: These only work on resources with the single-component u32/s32 formats.
2944 // Therefore the result is replicated. This might not be intended by TGSI, but
2945 // operating on more than 1 component would produce undefined results because
2946 // they do not exist.
2947 void
2948 Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
2949 {
2950 const int r = tgsi.getSrc(0).getIndex(0);
2951 std::vector<Value *> srcv;
2952 std::vector<Value *> defv;
2953 LValue *dst = getScratch();
2954 Value *ind = NULL;
2955
2956 if (tgsi.getSrc(0).isIndirect(0))
2957 ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
2958
2959 switch (tgsi.getSrc(0).getFile()) {
2960 case TGSI_FILE_BUFFER:
2961 case TGSI_FILE_MEMORY:
2962 for (int c = 0; c < 4; ++c) {
2963 if (!dst0[c])
2964 continue;
2965
2966 Instruction *insn;
2967 Value *off = fetchSrc(1, c);
2968 Value *sym;
2969 if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE)
2970 sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
2971 tgsi.getSrc(1).getValueU32(c, info));
2972 else
2973 sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 0);
2974 if (subOp == NV50_IR_SUBOP_ATOM_CAS)
2975 insn = mkOp3(OP_ATOM, ty, dst, sym, fetchSrc(2, c), fetchSrc(3, c));
2976 else
2977 insn = mkOp2(OP_ATOM, ty, dst, sym, fetchSrc(2, c));
2978 if (tgsi.getSrc(1).getFile() != TGSI_FILE_IMMEDIATE)
2979 insn->setIndirect(0, 0, off);
2980 if (ind)
2981 insn->setIndirect(0, 1, ind);
2982 insn->subOp = subOp;
2983 }
2984 for (int c = 0; c < 4; ++c)
2985 if (dst0[c])
2986 dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
2987 break;
2988 default: {
2989 getImageCoords(srcv, 1);
2990 defv.push_back(dst);
2991 srcv.push_back(fetchSrc(2, 0));
2992
2993 if (subOp == NV50_IR_SUBOP_ATOM_CAS)
2994 srcv.push_back(fetchSrc(3, 0));
2995
2996 bool bindless = tgsi.getSrc(0).getFile() != TGSI_FILE_IMAGE;
2997 if (bindless)
2998 ind = fetchSrc(0, 0);
2999
3000 TexInstruction *tex = mkTex(OP_SUREDP, tgsi.getImageTarget(),
3001 0, 0, defv, srcv);
3002 tex->subOp = subOp;
3003 tex->tex.mask = 1;
3004 tex->tex.format = tgsi.getImageFormat();
3005 tex->setType(ty);
3006 tex->tex.bindless = bindless;
3007 if (!bindless)
3008 tex->tex.r = r;
3009 if (ind)
3010 tex->setIndirectR(ind);
3011
3012 for (int c = 0; c < 4; ++c)
3013 if (dst0[c])
3014 dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
3015 break;
3016 }
3017 }
3018
3019 /* Keep this around for now as reference when adding img support
3020 getResourceCoords(srcv, r, 1);
3021
3022 if (isResourceSpecial(r)) {
3023 assert(r != TGSI_RESOURCE_INPUT);
3024 Instruction *insn;
3025 insn = mkOp2(OP_ATOM, ty, dst, getResourceBase(r), fetchSrc(2, 0));
3026 insn->subOp = subOp;
3027 if (subOp == NV50_IR_SUBOP_ATOM_CAS)
3028 insn->setSrc(2, fetchSrc(3, 0));
3029 insn->setIndirect(0, 0, srcv.at(0));
3030 } else {
3031 operation op = isResourceRaw(code, r) ? OP_SUREDB : OP_SUREDP;
3032 TexTarget targ = getResourceTarget(code, r);
3033 int idx = code->resources[r].slot;
3034 defv.push_back(dst);
3035 srcv.push_back(fetchSrc(2, 0));
3036 if (subOp == NV50_IR_SUBOP_ATOM_CAS)
3037 srcv.push_back(fetchSrc(3, 0));
3038 TexInstruction *tex = mkTex(op, targ, idx, 0, defv, srcv);
3039 tex->subOp = subOp;
3040 tex->tex.mask = 1;
3041 tex->setType(ty);
3042 }
3043
3044 for (int c = 0; c < 4; ++c)
3045 if (dst0[c])
3046 dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
3047 */
3048 }
3049
3050 void
3051 Converter::handleINTERP(Value *dst[4])
3052 {
3053 // Check whether the input is linear. All other attributes ignored.
3054 Instruction *insn;
3055 Value *offset = NULL, *ptr = NULL, *w = NULL;
3056 Symbol *sym[4] = { NULL };
3057 bool linear;
3058 operation op = OP_NOP;
3059 int c, mode = 0;
3060
3061 tgsi::Instruction::SrcRegister src = tgsi.getSrc(0);
3062
3063 // In some odd cases, in large part due to varying packing, the source
3064 // might not actually be an input. This is illegal TGSI, but it's easier to
3065 // account for it here than it is to fix it where the TGSI is being
3066 // generated. In that case, it's going to be a straight up mov (or sequence
3067 // of mov's) from the input in question. We follow the mov chain to see
3068 // which input we need to use.
3069 if (src.getFile() != TGSI_FILE_INPUT) {
3070 if (src.isIndirect(0)) {
3071 ERROR("Ignoring indirect input interpolation\n");
3072 return;
3073 }
3074 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3075 Value *val = fetchSrc(0, c);
3076 assert(val->defs.size() == 1);
3077 insn = val->getInsn();
3078 while (insn->op == OP_MOV) {
3079 assert(insn->getSrc(0)->defs.size() == 1);
3080 insn = insn->getSrc(0)->getInsn();
3081 if (!insn) {
3082 ERROR("Miscompiling shader due to unhandled INTERP\n");
3083 return;
3084 }
3085 }
3086 if (insn->op != OP_LINTERP && insn->op != OP_PINTERP) {
3087 ERROR("Trying to interpolate non-input, this is not allowed.\n");
3088 return;
3089 }
3090 sym[c] = insn->getSrc(0)->asSym();
3091 assert(sym[c]);
3092 op = insn->op;
3093 mode = insn->ipa;
3094 ptr = insn->getIndirect(0, 0);
3095 }
3096 } else {
3097 if (src.isIndirect(0))
3098 ptr = shiftAddress(fetchSrc(src.getIndirect(0), 0, NULL));
3099
3100 // We can assume that the fixed index will point to an input of the same
3101 // interpolation type in case of an indirect.
3102 // TODO: Make use of ArrayID.
3103 linear = info->in[src.getIndex(0)].linear;
3104 if (linear) {
3105 op = OP_LINTERP;
3106 mode = NV50_IR_INTERP_LINEAR;
3107 } else {
3108 op = OP_PINTERP;
3109 mode = NV50_IR_INTERP_PERSPECTIVE;
3110 }
3111 }
3112
3113 switch (tgsi.getOpcode()) {
3114 case TGSI_OPCODE_INTERP_CENTROID:
3115 mode |= NV50_IR_INTERP_CENTROID;
3116 break;
3117 case TGSI_OPCODE_INTERP_SAMPLE:
3118 insn = mkOp1(OP_PIXLD, TYPE_U32, (offset = getScratch()), fetchSrc(1, 0));
3119 insn->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
3120 mode |= NV50_IR_INTERP_OFFSET;
3121 break;
3122 case TGSI_OPCODE_INTERP_OFFSET: {
3123 // The input in src1.xy is float, but we need a single 32-bit value
3124 // where the upper and lower 16 bits are encoded in S0.12 format. We need
3125 // to clamp the input coordinates to (-0.5, 0.4375), multiply by 4096,
3126 // and then convert to s32.
3127 Value *offs[2];
3128 for (c = 0; c < 2; c++) {
3129 offs[c] = getScratch();
3130 mkOp2(OP_MIN, TYPE_F32, offs[c], fetchSrc(1, c), loadImm(NULL, 0.4375f));
3131 mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
3132 mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
3133 mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
3134 }
3135 offset = mkOp3v(OP_INSBF, TYPE_U32, getScratch(),
3136 offs[1], mkImm(0x1010), offs[0]);
3137 mode |= NV50_IR_INTERP_OFFSET;
3138 break;
3139 }
3140 }
3141
3142 if (op == OP_PINTERP) {
3143 if (offset) {
3144 w = mkOp2v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 3), offset);
3145 mkOp1(OP_RCP, TYPE_F32, w, w);
3146 } else {
3147 w = fragCoord[3];
3148 }
3149 }
3150
3151
3152 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3153 insn = mkOp1(op, TYPE_F32, dst[c], sym[c] ? sym[c] : srcToSym(src, c));
3154 if (op == OP_PINTERP)
3155 insn->setSrc(1, w);
3156 if (offset)
3157 insn->setSrc(op == OP_PINTERP ? 2 : 1, offset);
3158 if (ptr)
3159 insn->setIndirect(0, 0, ptr);
3160
3161 insn->setInterpolate(mode);
3162 }
3163 }
3164
3165 Converter::Subroutine *
3166 Converter::getSubroutine(unsigned ip)
3167 {
3168 std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip);
3169
3170 if (it == sub.map.end())
3171 it = sub.map.insert(std::make_pair(
3172 ip, Subroutine(new Function(prog, "SUB", ip)))).first;
3173
3174 return &it->second;
3175 }
3176
3177 Converter::Subroutine *
3178 Converter::getSubroutine(Function *f)
3179 {
3180 unsigned ip = f->getLabel();
3181 std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip);
3182
3183 if (it == sub.map.end())
3184 it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first;
3185
3186 return &it->second;
3187 }
3188
3189 bool
3190 Converter::isEndOfSubroutine(uint ip)
3191 {
3192 assert(ip < code->scan.num_instructions);
3193 tgsi::Instruction insn(&code->insns[ip]);
3194 return (insn.getOpcode() == TGSI_OPCODE_END ||
3195 insn.getOpcode() == TGSI_OPCODE_ENDSUB ||
3196 // does END occur at end of main or the very end ?
3197 insn.getOpcode() == TGSI_OPCODE_BGNSUB);
3198 }
3199
3200 bool
3201 Converter::handleInstruction(const struct tgsi_full_instruction *insn)
3202 {
3203 Instruction *geni;
3204
3205 Value *dst0[4], *rDst0[4];
3206 Value *src0, *src1, *src2, *src3;
3207 Value *val0, *val1;
3208 int c;
3209
3210 tgsi = tgsi::Instruction(insn);
3211
3212 bool useScratchDst = tgsi.checkDstSrcAliasing();
3213
3214 operation op = tgsi.getOP();
3215 dstTy = tgsi.inferDstType();
3216 srcTy = tgsi.inferSrcType();
3217
3218 unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0;
3219
3220 if (tgsi.dstCount() && tgsi.getOpcode() != TGSI_OPCODE_STORE) {
3221 for (c = 0; c < 4; ++c) {
3222 rDst0[c] = acquireDst(0, c);
3223 dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c];
3224 }
3225 }
3226
3227 switch (tgsi.getOpcode()) {
3228 case TGSI_OPCODE_ADD:
3229 case TGSI_OPCODE_UADD:
3230 case TGSI_OPCODE_AND:
3231 case TGSI_OPCODE_DIV:
3232 case TGSI_OPCODE_IDIV:
3233 case TGSI_OPCODE_UDIV:
3234 case TGSI_OPCODE_MAX:
3235 case TGSI_OPCODE_MIN:
3236 case TGSI_OPCODE_IMAX:
3237 case TGSI_OPCODE_IMIN:
3238 case TGSI_OPCODE_UMAX:
3239 case TGSI_OPCODE_UMIN:
3240 case TGSI_OPCODE_MOD:
3241 case TGSI_OPCODE_UMOD:
3242 case TGSI_OPCODE_MUL:
3243 case TGSI_OPCODE_UMUL:
3244 case TGSI_OPCODE_IMUL_HI:
3245 case TGSI_OPCODE_UMUL_HI:
3246 case TGSI_OPCODE_OR:
3247 case TGSI_OPCODE_SHL:
3248 case TGSI_OPCODE_ISHR:
3249 case TGSI_OPCODE_USHR:
3250 case TGSI_OPCODE_XOR:
3251 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3252 src0 = fetchSrc(0, c);
3253 src1 = fetchSrc(1, c);
3254 geni = mkOp2(op, dstTy, dst0[c], src0, src1);
3255 geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
3256 if (op == OP_MUL && dstTy == TYPE_F32)
3257 geni->dnz = info->io.mul_zero_wins;
3258 geni->precise = insn->Instruction.Precise;
3259 }
3260 break;
3261 case TGSI_OPCODE_MAD:
3262 case TGSI_OPCODE_UMAD:
3263 case TGSI_OPCODE_FMA:
3264 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3265 src0 = fetchSrc(0, c);
3266 src1 = fetchSrc(1, c);
3267 src2 = fetchSrc(2, c);
3268 geni = mkOp3(op, dstTy, dst0[c], src0, src1, src2);
3269 if (dstTy == TYPE_F32)
3270 geni->dnz = info->io.mul_zero_wins;
3271 geni->precise = insn->Instruction.Precise;
3272 }
3273 break;
3274 case TGSI_OPCODE_MOV:
3275 case TGSI_OPCODE_CEIL:
3276 case TGSI_OPCODE_FLR:
3277 case TGSI_OPCODE_TRUNC:
3278 case TGSI_OPCODE_RCP:
3279 case TGSI_OPCODE_SQRT:
3280 case TGSI_OPCODE_IABS:
3281 case TGSI_OPCODE_INEG:
3282 case TGSI_OPCODE_NOT:
3283 case TGSI_OPCODE_DDX:
3284 case TGSI_OPCODE_DDY:
3285 case TGSI_OPCODE_DDX_FINE:
3286 case TGSI_OPCODE_DDY_FINE:
3287 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3288 mkOp1(op, dstTy, dst0[c], fetchSrc(0, c));
3289 break;
3290 case TGSI_OPCODE_RSQ:
3291 src0 = fetchSrc(0, 0);
3292 val0 = getScratch();
3293 mkOp1(OP_ABS, TYPE_F32, val0, src0);
3294 mkOp1(OP_RSQ, TYPE_F32, val0, val0);
3295 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3296 mkMov(dst0[c], val0);
3297 break;
3298 case TGSI_OPCODE_ARL:
3299 case TGSI_OPCODE_ARR:
3300 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3301 const RoundMode rnd =
3302 tgsi.getOpcode() == TGSI_OPCODE_ARR ? ROUND_N : ROUND_M;
3303 src0 = fetchSrc(0, c);
3304 mkCvt(OP_CVT, TYPE_S32, dst0[c], TYPE_F32, src0)->rnd = rnd;
3305 }
3306 break;
3307 case TGSI_OPCODE_UARL:
3308 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3309 mkOp1(OP_MOV, TYPE_U32, dst0[c], fetchSrc(0, c));
3310 break;
3311 case TGSI_OPCODE_POW:
3312 val0 = mkOp2v(op, TYPE_F32, getScratch(), fetchSrc(0, 0), fetchSrc(1, 0));
3313 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3314 mkOp1(OP_MOV, TYPE_F32, dst0[c], val0);
3315 break;
3316 case TGSI_OPCODE_EX2:
3317 case TGSI_OPCODE_LG2:
3318 val0 = mkOp1(op, TYPE_F32, getScratch(), fetchSrc(0, 0))->getDef(0);
3319 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3320 mkOp1(OP_MOV, TYPE_F32, dst0[c], val0);
3321 break;
3322 case TGSI_OPCODE_COS:
3323 case TGSI_OPCODE_SIN:
3324 val0 = getScratch();
3325 if (mask & 7) {
3326 mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 0));
3327 mkOp1(op, TYPE_F32, val0, val0);
3328 for (c = 0; c < 3; ++c)
3329 if (dst0[c])
3330 mkMov(dst0[c], val0);
3331 }
3332 if (dst0[3]) {
3333 mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 3));
3334 mkOp1(op, TYPE_F32, dst0[3], val0);
3335 }
3336 break;
3337 case TGSI_OPCODE_EXP:
3338 src0 = fetchSrc(0, 0);
3339 val0 = mkOp1v(OP_FLOOR, TYPE_F32, getSSA(), src0);
3340 if (dst0[1])
3341 mkOp2(OP_SUB, TYPE_F32, dst0[1], src0, val0);
3342 if (dst0[0])
3343 mkOp1(OP_EX2, TYPE_F32, dst0[0], val0);
3344 if (dst0[2])
3345 mkOp1(OP_EX2, TYPE_F32, dst0[2], src0);
3346 if (dst0[3])
3347 loadImm(dst0[3], 1.0f);
3348 break;
3349 case TGSI_OPCODE_LOG:
3350 src0 = mkOp1v(OP_ABS, TYPE_F32, getSSA(), fetchSrc(0, 0));
3351 val0 = mkOp1v(OP_LG2, TYPE_F32, dst0[2] ? dst0[2] : getSSA(), src0);
3352 if (dst0[0] || dst0[1])
3353 val1 = mkOp1v(OP_FLOOR, TYPE_F32, dst0[0] ? dst0[0] : getSSA(), val0);
3354 if (dst0[1]) {
3355 mkOp1(OP_EX2, TYPE_F32, dst0[1], val1);
3356 mkOp1(OP_RCP, TYPE_F32, dst0[1], dst0[1]);
3357 mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0)
3358 ->dnz = info->io.mul_zero_wins;
3359 }
3360 if (dst0[3])
3361 loadImm(dst0[3], 1.0f);
3362 break;
3363 case TGSI_OPCODE_DP2:
3364 val0 = buildDot(2);
3365 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3366 mkMov(dst0[c], val0);
3367 break;
3368 case TGSI_OPCODE_DP3:
3369 val0 = buildDot(3);
3370 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3371 mkMov(dst0[c], val0);
3372 break;
3373 case TGSI_OPCODE_DP4:
3374 val0 = buildDot(4);
3375 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3376 mkMov(dst0[c], val0);
3377 break;
3378 case TGSI_OPCODE_DST:
3379 if (dst0[0])
3380 loadImm(dst0[0], 1.0f);
3381 if (dst0[1]) {
3382 src0 = fetchSrc(0, 1);
3383 src1 = fetchSrc(1, 1);
3384 mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1)
3385 ->dnz = info->io.mul_zero_wins;
3386 }
3387 if (dst0[2])
3388 mkMov(dst0[2], fetchSrc(0, 2));
3389 if (dst0[3])
3390 mkMov(dst0[3], fetchSrc(1, 3));
3391 break;
3392 case TGSI_OPCODE_LRP:
3393 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3394 src0 = fetchSrc(0, c);
3395 src1 = fetchSrc(1, c);
3396 src2 = fetchSrc(2, c);
3397 mkOp3(OP_MAD, TYPE_F32, dst0[c],
3398 mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2)
3399 ->dnz = info->io.mul_zero_wins;
3400 }
3401 break;
3402 case TGSI_OPCODE_LIT:
3403 handleLIT(dst0);
3404 break;
3405 case TGSI_OPCODE_ISSG:
3406 case TGSI_OPCODE_SSG:
3407 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3408 src0 = fetchSrc(0, c);
3409 val0 = getScratch();
3410 val1 = getScratch();
3411 mkCmp(OP_SET, CC_GT, srcTy, val0, srcTy, src0, zero);
3412 mkCmp(OP_SET, CC_LT, srcTy, val1, srcTy, src0, zero);
3413 if (srcTy == TYPE_F32)
3414 mkOp2(OP_SUB, TYPE_F32, dst0[c], val0, val1);
3415 else
3416 mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0);
3417 }
3418 break;
3419 case TGSI_OPCODE_UCMP:
3420 srcTy = TYPE_U32;
3421 /* fallthrough */
3422 case TGSI_OPCODE_CMP:
3423 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3424 src0 = fetchSrc(0, c);
3425 src1 = fetchSrc(1, c);
3426 src2 = fetchSrc(2, c);
3427 if (src1 == src2)
3428 mkMov(dst0[c], src1);
3429 else
3430 mkCmp(OP_SLCT, (srcTy == TYPE_F32) ? CC_LT : CC_NE,
3431 srcTy, dst0[c], srcTy, src1, src2, src0);
3432 }
3433 break;
3434 case TGSI_OPCODE_FRC:
3435 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3436 src0 = fetchSrc(0, c);
3437 val0 = getScratch();
3438 mkOp1(OP_FLOOR, TYPE_F32, val0, src0);
3439 mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0);
3440 }
3441 break;
3442 case TGSI_OPCODE_ROUND:
3443 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3444 mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F32, fetchSrc(0, c))
3445 ->rnd = ROUND_NI;
3446 break;
3447 case TGSI_OPCODE_SLT:
3448 case TGSI_OPCODE_SGE:
3449 case TGSI_OPCODE_SEQ:
3450 case TGSI_OPCODE_SGT:
3451 case TGSI_OPCODE_SLE:
3452 case TGSI_OPCODE_SNE:
3453 case TGSI_OPCODE_FSEQ:
3454 case TGSI_OPCODE_FSGE:
3455 case TGSI_OPCODE_FSLT:
3456 case TGSI_OPCODE_FSNE:
3457 case TGSI_OPCODE_ISGE:
3458 case TGSI_OPCODE_ISLT:
3459 case TGSI_OPCODE_USEQ:
3460 case TGSI_OPCODE_USGE:
3461 case TGSI_OPCODE_USLT:
3462 case TGSI_OPCODE_USNE:
3463 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3464 src0 = fetchSrc(0, c);
3465 src1 = fetchSrc(1, c);
3466 mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1);
3467 }
3468 break;
3469 case TGSI_OPCODE_VOTE_ALL:
3470 case TGSI_OPCODE_VOTE_ANY:
3471 case TGSI_OPCODE_VOTE_EQ:
3472 val0 = new_LValue(func, FILE_PREDICATE);
3473 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3474 mkCmp(OP_SET, CC_NE, TYPE_U32, val0, TYPE_U32, fetchSrc(0, c), zero);
3475 mkOp1(op, dstTy, val0, val0)
3476 ->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
3477 mkCvt(OP_CVT, TYPE_U32, dst0[c], TYPE_U8, val0);
3478 }
3479 break;
3480 case TGSI_OPCODE_BALLOT:
3481 if (!tgsi.getDst(0).isMasked(0)) {
3482 val0 = new_LValue(func, FILE_PREDICATE);
3483 mkCmp(OP_SET, CC_NE, TYPE_U32, val0, TYPE_U32, fetchSrc(0, 0), zero);
3484 mkOp1(op, TYPE_U32, dst0[0], val0)->subOp = NV50_IR_SUBOP_VOTE_ANY;
3485 }
3486 if (!tgsi.getDst(0).isMasked(1))
3487 mkMov(dst0[1], zero, TYPE_U32);
3488 break;
3489 case TGSI_OPCODE_READ_FIRST:
3490 // ReadFirstInvocationARB(src) is implemented as
3491 // ReadInvocationARB(src, findLSB(ballot(true)))
3492 val0 = getScratch();
3493 mkOp1(OP_VOTE, TYPE_U32, val0, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
3494 mkOp2(OP_EXTBF, TYPE_U32, val0, val0, mkImm(0x2000))
3495 ->subOp = NV50_IR_SUBOP_EXTBF_REV;
3496 mkOp1(OP_BFIND, TYPE_U32, val0, val0)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
3497 src1 = val0;
3498 /* fallthrough */
3499 case TGSI_OPCODE_READ_INVOC:
3500 if (tgsi.getOpcode() == TGSI_OPCODE_READ_INVOC)
3501 src1 = fetchSrc(1, 0);
3502 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3503 geni = mkOp3(op, dstTy, dst0[c], fetchSrc(0, c), src1, mkImm(0x1f));
3504 geni->subOp = NV50_IR_SUBOP_SHFL_IDX;
3505 }
3506 break;
3507 case TGSI_OPCODE_CLOCK:
3508 // Stick the 32-bit clock into the high dword of the logical result.
3509 if (!tgsi.getDst(0).isMasked(0))
3510 mkOp1(OP_MOV, TYPE_U32, dst0[0], zero);
3511 if (!tgsi.getDst(0).isMasked(1))
3512 mkOp1(OP_RDSV, TYPE_U32, dst0[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
3513 break;
3514 case TGSI_OPCODE_KILL_IF:
3515 val0 = new_LValue(func, FILE_PREDICATE);
3516 mask = 0;
3517 for (c = 0; c < 4; ++c) {
3518 const int s = tgsi.getSrc(0).getSwizzle(c);
3519 if (mask & (1 << s))
3520 continue;
3521 mask |= 1 << s;
3522 mkCmp(OP_SET, CC_LT, TYPE_F32, val0, TYPE_F32, fetchSrc(0, c), zero);
3523 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, val0);
3524 }
3525 break;
3526 case TGSI_OPCODE_KILL:
3527 mkOp(OP_DISCARD, TYPE_NONE, NULL);
3528 break;
3529 case TGSI_OPCODE_TEX:
3530 case TGSI_OPCODE_TEX_LZ:
3531 case TGSI_OPCODE_TXB:
3532 case TGSI_OPCODE_TXL:
3533 case TGSI_OPCODE_TXP:
3534 case TGSI_OPCODE_LODQ:
3535 // R S L C Dx Dy
3536 handleTEX(dst0, 1, 1, 0x03, 0x0f, 0x00, 0x00);
3537 break;
3538 case TGSI_OPCODE_TXD:
3539 handleTEX(dst0, 3, 3, 0x03, 0x0f, 0x10, 0x20);
3540 break;
3541 case TGSI_OPCODE_TG4:
3542 handleTEX(dst0, 2, 2, 0x03, 0x0f, 0x00, 0x00);
3543 break;
3544 case TGSI_OPCODE_TEX2:
3545 handleTEX(dst0, 2, 2, 0x03, 0x10, 0x00, 0x00);
3546 break;
3547 case TGSI_OPCODE_TXB2:
3548 case TGSI_OPCODE_TXL2:
3549 handleTEX(dst0, 2, 2, 0x10, 0x0f, 0x00, 0x00);
3550 break;
3551 case TGSI_OPCODE_SAMPLE:
3552 case TGSI_OPCODE_SAMPLE_B:
3553 case TGSI_OPCODE_SAMPLE_D:
3554 case TGSI_OPCODE_SAMPLE_L:
3555 case TGSI_OPCODE_SAMPLE_C:
3556 case TGSI_OPCODE_SAMPLE_C_LZ:
3557 handleTEX(dst0, 1, 2, 0x30, 0x30, 0x30, 0x40);
3558 break;
3559 case TGSI_OPCODE_TXF_LZ:
3560 case TGSI_OPCODE_TXF:
3561 handleTXF(dst0, 1, 0x03);
3562 break;
3563 case TGSI_OPCODE_SAMPLE_I:
3564 handleTXF(dst0, 1, 0x03);
3565 break;
3566 case TGSI_OPCODE_SAMPLE_I_MS:
3567 handleTXF(dst0, 1, 0x20);
3568 break;
3569 case TGSI_OPCODE_TXQ:
3570 case TGSI_OPCODE_SVIEWINFO:
3571 handleTXQ(dst0, TXQ_DIMS, 1);
3572 break;
3573 case TGSI_OPCODE_TXQS:
3574 // The TXQ_TYPE query returns samples in its 3rd arg, but we need it to
3575 // be in .x
3576 dst0[1] = dst0[2] = dst0[3] = NULL;
3577 std::swap(dst0[0], dst0[2]);
3578 handleTXQ(dst0, TXQ_TYPE, 0);
3579 std::swap(dst0[0], dst0[2]);
3580 break;
3581 case TGSI_OPCODE_FBFETCH:
3582 handleFBFETCH(dst0);
3583 break;
3584 case TGSI_OPCODE_F2I:
3585 case TGSI_OPCODE_F2U:
3586 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3587 mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c))->rnd = ROUND_Z;
3588 break;
3589 case TGSI_OPCODE_I2F:
3590 case TGSI_OPCODE_U2F:
3591 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3592 mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c));
3593 break;
3594 case TGSI_OPCODE_PK2H:
3595 val0 = getScratch();
3596 val1 = getScratch();
3597 mkCvt(OP_CVT, TYPE_F16, val0, TYPE_F32, fetchSrc(0, 0));
3598 mkCvt(OP_CVT, TYPE_F16, val1, TYPE_F32, fetchSrc(0, 1));
3599 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3600 mkOp3(OP_INSBF, TYPE_U32, dst0[c], val1, mkImm(0x1010), val0);
3601 break;
3602 case TGSI_OPCODE_UP2H:
3603 src0 = fetchSrc(0, 0);
3604 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3605 geni = mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F16, src0);
3606 geni->subOp = c & 1;
3607 }
3608 break;
3609 case TGSI_OPCODE_EMIT:
3610 /* export the saved viewport index */
3611 if (viewport != NULL) {
3612 Symbol *vpSym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_U32,
3613 info->out[info->io.viewportId].slot[0] * 4);
3614 mkStore(OP_EXPORT, TYPE_U32, vpSym, NULL, viewport);
3615 }
3616 /* handle user clip planes for each emitted vertex */
3617 if (info->io.genUserClip > 0)
3618 handleUserClipPlanes();
3619 /* fallthrough */
3620 case TGSI_OPCODE_ENDPRIM:
3621 {
3622 // get vertex stream (must be immediate)
3623 unsigned int stream = tgsi.getSrc(0).getValueU32(0, info);
3624 if (stream && op == OP_RESTART)
3625 break;
3626 if (info->prop.gp.maxVertices == 0)
3627 break;
3628 src0 = mkImm(stream);
3629 mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1;
3630 break;
3631 }
3632 case TGSI_OPCODE_IF:
3633 case TGSI_OPCODE_UIF:
3634 {
3635 BasicBlock *ifBB = new BasicBlock(func);
3636
3637 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
3638 condBBs.push(bb);
3639 joinBBs.push(bb);
3640
3641 mkFlow(OP_BRA, NULL, CC_NOT_P, fetchSrc(0, 0))->setType(srcTy);
3642
3643 setPosition(ifBB, true);
3644 }
3645 break;
3646 case TGSI_OPCODE_ELSE:
3647 {
3648 BasicBlock *elseBB = new BasicBlock(func);
3649 BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
3650
3651 forkBB->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
3652 condBBs.push(bb);
3653
3654 forkBB->getExit()->asFlow()->target.bb = elseBB;
3655 if (!bb->isTerminated())
3656 mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL);
3657
3658 setPosition(elseBB, true);
3659 }
3660 break;
3661 case TGSI_OPCODE_ENDIF:
3662 {
3663 BasicBlock *convBB = new BasicBlock(func);
3664 BasicBlock *prevBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
3665 BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(joinBBs.pop().u.p);
3666
3667 if (!bb->isTerminated()) {
3668 // we only want join if none of the clauses ended with CONT/BREAK/RET
3669 if (prevBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6)
3670 insertConvergenceOps(convBB, forkBB);
3671 mkFlow(OP_BRA, convBB, CC_ALWAYS, NULL);
3672 bb->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD);
3673 }
3674
3675 if (prevBB->getExit()->op == OP_BRA) {
3676 prevBB->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD);
3677 prevBB->getExit()->asFlow()->target.bb = convBB;
3678 }
3679 setPosition(convBB, true);
3680 }
3681 break;
3682 case TGSI_OPCODE_BGNLOOP:
3683 {
3684 BasicBlock *lbgnBB = new BasicBlock(func);
3685 BasicBlock *lbrkBB = new BasicBlock(func);
3686
3687 loopBBs.push(lbgnBB);
3688 breakBBs.push(lbrkBB);
3689 if (loopBBs.getSize() > func->loopNestingBound)
3690 func->loopNestingBound++;
3691
3692 mkFlow(OP_PREBREAK, lbrkBB, CC_ALWAYS, NULL);
3693
3694 bb->cfg.attach(&lbgnBB->cfg, Graph::Edge::TREE);
3695 setPosition(lbgnBB, true);
3696 mkFlow(OP_PRECONT, lbgnBB, CC_ALWAYS, NULL);
3697 }
3698 break;
3699 case TGSI_OPCODE_ENDLOOP:
3700 {
3701 BasicBlock *loopBB = reinterpret_cast<BasicBlock *>(loopBBs.pop().u.p);
3702
3703 if (!bb->isTerminated()) {
3704 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
3705 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
3706 }
3707 setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true);
3708
3709 // If the loop never breaks (e.g. only has RET's inside), then there
3710 // will be no way to get to the break bb. However BGNLOOP will have
3711 // already made a PREBREAK to it, so it must be in the CFG.
3712 if (getBB()->cfg.incidentCount() == 0)
3713 loopBB->cfg.attach(&getBB()->cfg, Graph::Edge::TREE);
3714 }
3715 break;
3716 case TGSI_OPCODE_BRK:
3717 {
3718 if (bb->isTerminated())
3719 break;
3720 BasicBlock *brkBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p);
3721 mkFlow(OP_BREAK, brkBB, CC_ALWAYS, NULL);
3722 bb->cfg.attach(&brkBB->cfg, Graph::Edge::CROSS);
3723 }
3724 break;
3725 case TGSI_OPCODE_CONT:
3726 {
3727 if (bb->isTerminated())
3728 break;
3729 BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p);
3730 mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL);
3731 contBB->explicitCont = true;
3732 bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK);
3733 }
3734 break;
3735 case TGSI_OPCODE_BGNSUB:
3736 {
3737 Subroutine *s = getSubroutine(ip);
3738 BasicBlock *entry = new BasicBlock(s->f);
3739 BasicBlock *leave = new BasicBlock(s->f);
3740
3741 // multiple entrypoints possible, keep the graph connected
3742 if (prog->getType() == Program::TYPE_COMPUTE)
3743 prog->main->call.attach(&s->f->call, Graph::Edge::TREE);
3744
3745 sub.cur = s;
3746 s->f->setEntry(entry);
3747 s->f->setExit(leave);
3748 setPosition(entry, true);
3749 return true;
3750 }
3751 case TGSI_OPCODE_ENDSUB:
3752 {
3753 sub.cur = getSubroutine(prog->main);
3754 setPosition(BasicBlock::get(sub.cur->f->cfg.getRoot()), true);
3755 return true;
3756 }
3757 case TGSI_OPCODE_CAL:
3758 {
3759 Subroutine *s = getSubroutine(tgsi.getLabel());
3760 mkFlow(OP_CALL, s->f, CC_ALWAYS, NULL);
3761 func->call.attach(&s->f->call, Graph::Edge::TREE);
3762 return true;
3763 }
3764 case TGSI_OPCODE_RET:
3765 {
3766 if (bb->isTerminated())
3767 return true;
3768 BasicBlock *leave = BasicBlock::get(func->cfgExit);
3769
3770 if (!isEndOfSubroutine(ip + 1)) {
3771 // insert a PRERET at the entry if this is an early return
3772 // (only needed for sharing code in the epilogue)
3773 BasicBlock *root = BasicBlock::get(func->cfg.getRoot());
3774 if (root->getEntry() == NULL || root->getEntry()->op != OP_PRERET) {
3775 BasicBlock *pos = getBB();
3776 setPosition(root, false);
3777 mkFlow(OP_PRERET, leave, CC_ALWAYS, NULL)->fixed = 1;
3778 setPosition(pos, true);
3779 }
3780 }
3781 mkFlow(OP_RET, NULL, CC_ALWAYS, NULL)->fixed = 1;
3782 bb->cfg.attach(&leave->cfg, Graph::Edge::CROSS);
3783 }
3784 break;
3785 case TGSI_OPCODE_END:
3786 {
3787 // attach and generate epilogue code
3788 BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
3789 bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE);
3790 setPosition(epilogue, true);
3791 if (prog->getType() == Program::TYPE_FRAGMENT)
3792 exportOutputs();
3793 if ((prog->getType() == Program::TYPE_VERTEX ||
3794 prog->getType() == Program::TYPE_TESSELLATION_EVAL
3795 ) && info->io.genUserClip > 0)
3796 handleUserClipPlanes();
3797 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
3798 }
3799 break;
3800 case TGSI_OPCODE_SWITCH:
3801 case TGSI_OPCODE_CASE:
3802 ERROR("switch/case opcode encountered, should have been lowered\n");
3803 abort();
3804 break;
3805 case TGSI_OPCODE_LOAD:
3806 handleLOAD(dst0);
3807 break;
3808 case TGSI_OPCODE_STORE:
3809 handleSTORE();
3810 break;
3811 case TGSI_OPCODE_BARRIER:
3812 geni = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
3813 geni->fixed = 1;
3814 geni->subOp = NV50_IR_SUBOP_BAR_SYNC;
3815 break;
3816 case TGSI_OPCODE_MEMBAR:
3817 {
3818 uint32_t level = tgsi.getSrc(0).getValueU32(0, info);
3819 geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
3820 geni->fixed = 1;
3821 if (!(level & ~(TGSI_MEMBAR_THREAD_GROUP | TGSI_MEMBAR_SHARED)))
3822 geni->subOp = NV50_IR_SUBOP_MEMBAR(M, CTA);
3823 else
3824 geni->subOp = NV50_IR_SUBOP_MEMBAR(M, GL);
3825 }
3826 break;
3827 case TGSI_OPCODE_ATOMUADD:
3828 case TGSI_OPCODE_ATOMXCHG:
3829 case TGSI_OPCODE_ATOMCAS:
3830 case TGSI_OPCODE_ATOMAND:
3831 case TGSI_OPCODE_ATOMOR:
3832 case TGSI_OPCODE_ATOMXOR:
3833 case TGSI_OPCODE_ATOMUMIN:
3834 case TGSI_OPCODE_ATOMIMIN:
3835 case TGSI_OPCODE_ATOMUMAX:
3836 case TGSI_OPCODE_ATOMIMAX:
3837 handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode()));
3838 break;
3839 case TGSI_OPCODE_RESQ:
3840 if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
3841 Value *ind = NULL;
3842 if (tgsi.getSrc(0).isIndirect(0))
3843 ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
3844 geni = mkOp1(OP_BUFQ, TYPE_U32, dst0[0],
3845 makeSym(tgsi.getSrc(0).getFile(),
3846 tgsi.getSrc(0).getIndex(0), -1, 0, 0));
3847 if (ind)
3848 geni->setIndirect(0, 1, ind);
3849 } else {
3850 TexInstruction *texi = new_TexInstruction(func, OP_SUQ);
3851 for (int c = 0, d = 0; c < 4; ++c) {
3852 if (dst0[c]) {
3853 texi->setDef(d++, dst0[c]);
3854 texi->tex.mask |= 1 << c;
3855 }
3856 }
3857 if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMAGE) {
3858 texi->tex.r = tgsi.getSrc(0).getIndex(0);
3859 if (tgsi.getSrc(0).isIndirect(0))
3860 texi->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL));
3861 } else {
3862 texi->tex.bindless = true;
3863 texi->setIndirectR(fetchSrc(0, 0));
3864 }
3865 texi->tex.target = tgsi.getImageTarget();
3866
3867 bb->insertTail(texi);
3868 }
3869 break;
3870 case TGSI_OPCODE_IBFE:
3871 case TGSI_OPCODE_UBFE:
3872 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3873 src0 = fetchSrc(0, c);
3874 val0 = getScratch();
3875 if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE &&
3876 tgsi.getSrc(2).getFile() == TGSI_FILE_IMMEDIATE) {
3877 loadImm(val0, (tgsi.getSrc(2).getValueU32(c, info) << 8) |
3878 tgsi.getSrc(1).getValueU32(c, info));
3879 } else {
3880 src1 = fetchSrc(1, c);
3881 src2 = fetchSrc(2, c);
3882 mkOp3(OP_INSBF, TYPE_U32, val0, src2, mkImm(0x808), src1);
3883 }
3884 mkOp2(OP_EXTBF, dstTy, dst0[c], src0, val0);
3885 }
3886 break;
3887 case TGSI_OPCODE_BFI:
3888 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3889 src0 = fetchSrc(0, c);
3890 src1 = fetchSrc(1, c);
3891 src2 = fetchSrc(2, c);
3892 src3 = fetchSrc(3, c);
3893 val0 = getScratch();
3894 mkOp3(OP_INSBF, TYPE_U32, val0, src3, mkImm(0x808), src2);
3895 mkOp3(OP_INSBF, TYPE_U32, dst0[c], src1, val0, src0);
3896 }
3897 break;
3898 case TGSI_OPCODE_LSB:
3899 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3900 src0 = fetchSrc(0, c);
3901 val0 = getScratch();
3902 geni = mkOp2(OP_EXTBF, TYPE_U32, val0, src0, mkImm(0x2000));
3903 geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
3904 geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], val0);
3905 geni->subOp = NV50_IR_SUBOP_BFIND_SAMT;
3906 }
3907 break;
3908 case TGSI_OPCODE_IMSB:
3909 case TGSI_OPCODE_UMSB:
3910 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3911 src0 = fetchSrc(0, c);
3912 mkOp1(OP_BFIND, srcTy, dst0[c], src0);
3913 }
3914 break;
3915 case TGSI_OPCODE_BREV:
3916 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3917 src0 = fetchSrc(0, c);
3918 geni = mkOp2(OP_EXTBF, TYPE_U32, dst0[c], src0, mkImm(0x2000));
3919 geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
3920 }
3921 break;
3922 case TGSI_OPCODE_POPC:
3923 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3924 src0 = fetchSrc(0, c);
3925 mkOp2(OP_POPCNT, TYPE_U32, dst0[c], src0, src0);
3926 }
3927 break;
3928 case TGSI_OPCODE_INTERP_CENTROID:
3929 case TGSI_OPCODE_INTERP_SAMPLE:
3930 case TGSI_OPCODE_INTERP_OFFSET:
3931 handleINTERP(dst0);
3932 break;
3933 case TGSI_OPCODE_I642F:
3934 case TGSI_OPCODE_U642F:
3935 case TGSI_OPCODE_D2I:
3936 case TGSI_OPCODE_D2U:
3937 case TGSI_OPCODE_D2F: {
3938 int pos = 0;
3939 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3940 Value *dreg = getSSA(8);
3941 src0 = fetchSrc(0, pos);
3942 src1 = fetchSrc(0, pos + 1);
3943 mkOp2(OP_MERGE, TYPE_U64, dreg, src0, src1);
3944 Instruction *cvt = mkCvt(OP_CVT, dstTy, dst0[c], srcTy, dreg);
3945 if (!isFloatType(dstTy))
3946 cvt->rnd = ROUND_Z;
3947 pos += 2;
3948 }
3949 break;
3950 }
3951 case TGSI_OPCODE_I2I64:
3952 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3953 dst0[c] = fetchSrc(0, c / 2);
3954 mkOp2(OP_SHR, TYPE_S32, dst0[c + 1], dst0[c], loadImm(NULL, 31));
3955 c++;
3956 }
3957 break;
3958 case TGSI_OPCODE_U2I64:
3959 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3960 dst0[c] = fetchSrc(0, c / 2);
3961 dst0[c + 1] = zero;
3962 c++;
3963 }
3964 break;
3965 case TGSI_OPCODE_F2I64:
3966 case TGSI_OPCODE_F2U64:
3967 case TGSI_OPCODE_I2D:
3968 case TGSI_OPCODE_U2D:
3969 case TGSI_OPCODE_F2D:
3970 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3971 Value *dreg = getSSA(8);
3972 Instruction *cvt = mkCvt(OP_CVT, dstTy, dreg, srcTy, fetchSrc(0, c / 2));
3973 if (!isFloatType(dstTy))
3974 cvt->rnd = ROUND_Z;
3975 mkSplit(&dst0[c], 4, dreg);
3976 c++;
3977 }
3978 break;
3979 case TGSI_OPCODE_D2I64:
3980 case TGSI_OPCODE_D2U64:
3981 case TGSI_OPCODE_I642D:
3982 case TGSI_OPCODE_U642D:
3983 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3984 src0 = getSSA(8);
3985 Value *dst = getSSA(8), *tmp[2];
3986 tmp[0] = fetchSrc(0, c);
3987 tmp[1] = fetchSrc(0, c + 1);
3988 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
3989 Instruction *cvt = mkCvt(OP_CVT, dstTy, dst, srcTy, src0);
3990 if (!isFloatType(dstTy))
3991 cvt->rnd = ROUND_Z;
3992 mkSplit(&dst0[c], 4, dst);
3993 c++;
3994 }
3995 break;
3996 case TGSI_OPCODE_I64NEG:
3997 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3998 src0 = getSSA(8);
3999 Value *dst = getSSA(8), *tmp[2];
4000 tmp[0] = fetchSrc(0, c);
4001 tmp[1] = fetchSrc(0, c + 1);
4002 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4003 mkOp2(OP_SUB, dstTy, dst, zero, src0);
4004 mkSplit(&dst0[c], 4, dst);
4005 c++;
4006 }
4007 break;
4008 case TGSI_OPCODE_I64ABS:
4009 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4010 src0 = getSSA(8);
4011 Value *neg = getSSA(8), *srcComp[2], *negComp[2];
4012 srcComp[0] = fetchSrc(0, c);
4013 srcComp[1] = fetchSrc(0, c + 1);
4014 mkOp2(OP_MERGE, TYPE_U64, src0, srcComp[0], srcComp[1]);
4015 mkOp2(OP_SUB, dstTy, neg, zero, src0);
4016 mkSplit(negComp, 4, neg);
4017 mkCmp(OP_SLCT, CC_LT, TYPE_S32, dst0[c], TYPE_S32,
4018 negComp[0], srcComp[0], srcComp[1]);
4019 mkCmp(OP_SLCT, CC_LT, TYPE_S32, dst0[c + 1], TYPE_S32,
4020 negComp[1], srcComp[1], srcComp[1]);
4021 c++;
4022 }
4023 break;
4024 case TGSI_OPCODE_DABS:
4025 case TGSI_OPCODE_DNEG:
4026 case TGSI_OPCODE_DRCP:
4027 case TGSI_OPCODE_DSQRT:
4028 case TGSI_OPCODE_DRSQ:
4029 case TGSI_OPCODE_DTRUNC:
4030 case TGSI_OPCODE_DCEIL:
4031 case TGSI_OPCODE_DFLR:
4032 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4033 src0 = getSSA(8);
4034 Value *dst = getSSA(8), *tmp[2];
4035 tmp[0] = fetchSrc(0, c);
4036 tmp[1] = fetchSrc(0, c + 1);
4037 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4038 mkOp1(op, dstTy, dst, src0);
4039 mkSplit(&dst0[c], 4, dst);
4040 c++;
4041 }
4042 break;
4043 case TGSI_OPCODE_DFRAC:
4044 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4045 src0 = getSSA(8);
4046 Value *dst = getSSA(8), *tmp[2];
4047 tmp[0] = fetchSrc(0, c);
4048 tmp[1] = fetchSrc(0, c + 1);
4049 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4050 mkOp1(OP_FLOOR, TYPE_F64, dst, src0);
4051 mkOp2(OP_SUB, TYPE_F64, dst, src0, dst);
4052 mkSplit(&dst0[c], 4, dst);
4053 c++;
4054 }
4055 break;
4056 case TGSI_OPCODE_U64SEQ:
4057 case TGSI_OPCODE_U64SNE:
4058 case TGSI_OPCODE_U64SLT:
4059 case TGSI_OPCODE_U64SGE:
4060 case TGSI_OPCODE_I64SLT:
4061 case TGSI_OPCODE_I64SGE:
4062 case TGSI_OPCODE_DSLT:
4063 case TGSI_OPCODE_DSGE:
4064 case TGSI_OPCODE_DSEQ:
4065 case TGSI_OPCODE_DSNE: {
4066 int pos = 0;
4067 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4068 Value *tmp[2];
4069
4070 src0 = getSSA(8);
4071 src1 = getSSA(8);
4072 tmp[0] = fetchSrc(0, pos);
4073 tmp[1] = fetchSrc(0, pos + 1);
4074 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4075 tmp[0] = fetchSrc(1, pos);
4076 tmp[1] = fetchSrc(1, pos + 1);
4077 mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
4078 mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1);
4079 pos += 2;
4080 }
4081 break;
4082 }
4083 case TGSI_OPCODE_U64MIN:
4084 case TGSI_OPCODE_U64MAX:
4085 case TGSI_OPCODE_I64MIN:
4086 case TGSI_OPCODE_I64MAX: {
4087 dstTy = isSignedIntType(dstTy) ? TYPE_S32 : TYPE_U32;
4088 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4089 Value *flag = getSSA(1, FILE_FLAGS);
4090 src0 = fetchSrc(0, c + 1);
4091 src1 = fetchSrc(1, c + 1);
4092 geni = mkOp2(op, dstTy, dst0[c + 1], src0, src1);
4093 geni->subOp = NV50_IR_SUBOP_MINMAX_HIGH;
4094 geni->setFlagsDef(1, flag);
4095
4096 src0 = fetchSrc(0, c);
4097 src1 = fetchSrc(1, c);
4098 geni = mkOp2(op, TYPE_U32, dst0[c], src0, src1);
4099 geni->subOp = NV50_IR_SUBOP_MINMAX_LOW;
4100 geni->setFlagsSrc(2, flag);
4101
4102 c++;
4103 }
4104 break;
4105 }
4106 case TGSI_OPCODE_U64SHL:
4107 case TGSI_OPCODE_I64SHR:
4108 case TGSI_OPCODE_U64SHR:
4109 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4110 src0 = getSSA(8);
4111 Value *dst = getSSA(8), *tmp[2];
4112 tmp[0] = fetchSrc(0, c);
4113 tmp[1] = fetchSrc(0, c + 1);
4114 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4115 // Theoretically src1 is a 64-bit value but in practice only the low
4116 // bits matter. The IR expects this to be a 32-bit value.
4117 src1 = fetchSrc(1, c);
4118 mkOp2(op, dstTy, dst, src0, src1);
4119 mkSplit(&dst0[c], 4, dst);
4120 c++;
4121 }
4122 break;
4123 case TGSI_OPCODE_U64ADD:
4124 case TGSI_OPCODE_U64MUL:
4125 case TGSI_OPCODE_DADD:
4126 case TGSI_OPCODE_DMUL:
4127 case TGSI_OPCODE_DDIV:
4128 case TGSI_OPCODE_DMAX:
4129 case TGSI_OPCODE_DMIN:
4130 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4131 src0 = getSSA(8);
4132 src1 = getSSA(8);
4133 Value *dst = getSSA(8), *tmp[2];
4134 tmp[0] = fetchSrc(0, c);
4135 tmp[1] = fetchSrc(0, c + 1);
4136 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4137 tmp[0] = fetchSrc(1, c);
4138 tmp[1] = fetchSrc(1, c + 1);
4139 mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
4140 mkOp2(op, dstTy, dst, src0, src1);
4141 mkSplit(&dst0[c], 4, dst);
4142 c++;
4143 }
4144 break;
4145 case TGSI_OPCODE_DMAD:
4146 case TGSI_OPCODE_DFMA:
4147 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4148 src0 = getSSA(8);
4149 src1 = getSSA(8);
4150 src2 = getSSA(8);
4151 Value *dst = getSSA(8), *tmp[2];
4152 tmp[0] = fetchSrc(0, c);
4153 tmp[1] = fetchSrc(0, c + 1);
4154 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4155 tmp[0] = fetchSrc(1, c);
4156 tmp[1] = fetchSrc(1, c + 1);
4157 mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
4158 tmp[0] = fetchSrc(2, c);
4159 tmp[1] = fetchSrc(2, c + 1);
4160 mkOp2(OP_MERGE, TYPE_U64, src2, tmp[0], tmp[1]);
4161 mkOp3(op, dstTy, dst, src0, src1, src2);
4162 mkSplit(&dst0[c], 4, dst);
4163 c++;
4164 }
4165 break;
4166 case TGSI_OPCODE_DROUND:
4167 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4168 src0 = getSSA(8);
4169 Value *dst = getSSA(8), *tmp[2];
4170 tmp[0] = fetchSrc(0, c);
4171 tmp[1] = fetchSrc(0, c + 1);
4172 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4173 mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F64, src0)
4174 ->rnd = ROUND_NI;
4175 mkSplit(&dst0[c], 4, dst);
4176 c++;
4177 }
4178 break;
4179 case TGSI_OPCODE_DSSG:
4180 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4181 src0 = getSSA(8);
4182 Value *dst = getSSA(8), *dstF32 = getSSA(), *tmp[2];
4183 tmp[0] = fetchSrc(0, c);
4184 tmp[1] = fetchSrc(0, c + 1);
4185 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4186
4187 val0 = getScratch();
4188 val1 = getScratch();
4189 // The zero is wrong here since it's only 32-bit, but it works out in
4190 // the end since it gets replaced with $r63.
4191 mkCmp(OP_SET, CC_GT, TYPE_F32, val0, TYPE_F64, src0, zero);
4192 mkCmp(OP_SET, CC_LT, TYPE_F32, val1, TYPE_F64, src0, zero);
4193 mkOp2(OP_SUB, TYPE_F32, dstF32, val0, val1);
4194 mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F32, dstF32);
4195 mkSplit(&dst0[c], 4, dst);
4196 c++;
4197 }
4198 break;
4199 case TGSI_OPCODE_I64SSG:
4200 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4201 src0 = getSSA(8);
4202 Value *tmp[2];
4203 tmp[0] = fetchSrc(0, c);
4204 tmp[1] = fetchSrc(0, c + 1);
4205 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4206
4207 val0 = getScratch();
4208 val1 = getScratch();
4209 mkCmp(OP_SET, CC_GT, TYPE_U32, val0, TYPE_S64, src0, zero);
4210 mkCmp(OP_SET, CC_LT, TYPE_U32, val1, TYPE_S64, src0, zero);
4211 mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0);
4212 mkOp2(OP_SHR, TYPE_S32, dst0[c + 1], dst0[c], loadImm(0, 31));
4213 c++;
4214 }
4215 break;
4216 default:
4217 ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode());
4218 assert(0);
4219 break;
4220 }
4221
4222 if (tgsi.dstCount() && tgsi.getOpcode() != TGSI_OPCODE_STORE) {
4223 for (c = 0; c < 4; ++c) {
4224 if (!dst0[c])
4225 continue;
4226 if (dst0[c] != rDst0[c])
4227 mkMov(rDst0[c], dst0[c]);
4228 storeDst(0, c, rDst0[c]);
4229 }
4230 }
4231 vtxBaseValid = 0;
4232
4233 return true;
4234 }
4235
4236 void
4237 Converter::handleUserClipPlanes()
4238 {
4239 Value *res[8];
4240 int n, i, c;
4241
4242 for (c = 0; c < 4; ++c) {
4243 for (i = 0; i < info->io.genUserClip; ++i) {
4244 Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot,
4245 TYPE_F32, info->io.ucpBase + i * 16 + c * 4);
4246 Value *ucp = mkLoadv(TYPE_F32, sym, NULL);
4247 if (c == 0)
4248 res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp);
4249 else
4250 mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]);
4251 }
4252 }
4253
4254 const int first = info->numOutputs - (info->io.genUserClip + 3) / 4;
4255
4256 for (i = 0; i < info->io.genUserClip; ++i) {
4257 n = i / 4 + first;
4258 c = i % 4;
4259 Symbol *sym =
4260 mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info->out[n].slot[c] * 4);
4261 mkStore(OP_EXPORT, TYPE_F32, sym, NULL, res[i]);
4262 }
4263 }
4264
4265 void
4266 Converter::exportOutputs()
4267 {
4268 if (info->io.alphaRefBase) {
4269 for (unsigned int i = 0; i < info->numOutputs; ++i) {
4270 if (info->out[i].sn != TGSI_SEMANTIC_COLOR ||
4271 info->out[i].si != 0)
4272 continue;
4273 const unsigned int c = 3;
4274 if (!oData.exists(sub.cur->values, i, c))
4275 continue;
4276 Value *val = oData.load(sub.cur->values, i, c, NULL);
4277 if (!val)
4278 continue;
4279
4280 Symbol *ref = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot,
4281 TYPE_U32, info->io.alphaRefBase);
4282 Value *pred = new_LValue(func, FILE_PREDICATE);
4283 mkCmp(OP_SET, CC_TR, TYPE_U32, pred, TYPE_F32, val,
4284 mkLoadv(TYPE_U32, ref, NULL))
4285 ->subOp = 1;
4286 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_NOT_P, pred);
4287 }
4288 }
4289
4290 for (unsigned int i = 0; i < info->numOutputs; ++i) {
4291 for (unsigned int c = 0; c < 4; ++c) {
4292 if (!oData.exists(sub.cur->values, i, c))
4293 continue;
4294 Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32,
4295 info->out[i].slot[c] * 4);
4296 Value *val = oData.load(sub.cur->values, i, c, NULL);
4297 if (val) {
4298 if (info->out[i].sn == TGSI_SEMANTIC_POSITION)
4299 mkOp1(OP_SAT, TYPE_F32, val, val);
4300 mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val);
4301 }
4302 }
4303 }
4304 }
4305
4306 Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir),
4307 code(code),
4308 tgsi(NULL),
4309 tData(this), lData(this), aData(this), oData(this)
4310 {
4311 info = code->info;
4312
4313 const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY);
4314 const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS);
4315 const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT);
4316
4317 tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, FILE_GPR, 0);
4318 lData.setup(TGSI_FILE_TEMPORARY, 1, 0, tSize, 4, 4, FILE_MEMORY_LOCAL, 0);
4319 aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_GPR, 0);
4320 oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0);
4321
4322 zero = mkImm((uint32_t)0);
4323
4324 vtxBaseValid = 0;
4325 }
4326
4327 Converter::~Converter()
4328 {
4329 }
4330
4331 inline const Converter::Location *
4332 Converter::BindArgumentsPass::getValueLocation(Subroutine *s, Value *v)
4333 {
4334 ValueMap::l_iterator it = s->values.l.find(v);
4335 return it == s->values.l.end() ? NULL : &it->second;
4336 }
4337
4338 template<typename T> inline void
4339 Converter::BindArgumentsPass::updateCallArgs(
4340 Instruction *i, void (Instruction::*setArg)(int, Value *),
4341 T (Function::*proto))
4342 {
4343 Function *g = i->asFlow()->target.fn;
4344 Subroutine *subg = conv.getSubroutine(g);
4345
4346 for (unsigned a = 0; a < (g->*proto).size(); ++a) {
4347 Value *v = (g->*proto)[a].get();
4348 const Converter::Location &l = *getValueLocation(subg, v);
4349 Converter::DataArray *array = conv.getArrayForFile(l.array, l.arrayIdx);
4350
4351 (i->*setArg)(a, array->acquire(sub->values, l.i, l.c));
4352 }
4353 }
4354
4355 template<typename T> inline void
4356 Converter::BindArgumentsPass::updatePrototype(
4357 BitSet *set, void (Function::*updateSet)(), T (Function::*proto))
4358 {
4359 (func->*updateSet)();
4360
4361 for (unsigned i = 0; i < set->getSize(); ++i) {
4362 Value *v = func->getLValue(i);
4363 const Converter::Location *l = getValueLocation(sub, v);
4364
4365 // only include values with a matching TGSI register
4366 if (set->test(i) && l && !conv.code->locals.count(*l))
4367 (func->*proto).push_back(v);
4368 }
4369 }
4370
4371 bool
4372 Converter::BindArgumentsPass::visit(Function *f)
4373 {
4374 sub = conv.getSubroutine(f);
4375
4376 for (ArrayList::Iterator bi = f->allBBlocks.iterator();
4377 !bi.end(); bi.next()) {
4378 for (Instruction *i = BasicBlock::get(bi)->getFirst();
4379 i; i = i->next) {
4380 if (i->op == OP_CALL && !i->asFlow()->builtin) {
4381 updateCallArgs(i, &Instruction::setSrc, &Function::ins);
4382 updateCallArgs(i, &Instruction::setDef, &Function::outs);
4383 }
4384 }
4385 }
4386
4387 if (func == prog->main && prog->getType() != Program::TYPE_COMPUTE)
4388 return true;
4389 updatePrototype(&BasicBlock::get(f->cfg.getRoot())->liveSet,
4390 &Function::buildLiveSets, &Function::ins);
4391 updatePrototype(&BasicBlock::get(f->cfgExit)->defSet,
4392 &Function::buildDefSets, &Function::outs);
4393
4394 return true;
4395 }
4396
4397 bool
4398 Converter::run()
4399 {
4400 BasicBlock *entry = new BasicBlock(prog->main);
4401 BasicBlock *leave = new BasicBlock(prog->main);
4402
4403 prog->main->setEntry(entry);
4404 prog->main->setExit(leave);
4405
4406 setPosition(entry, true);
4407 sub.cur = getSubroutine(prog->main);
4408
4409 if (info->io.genUserClip > 0) {
4410 for (int c = 0; c < 4; ++c)
4411 clipVtx[c] = getScratch();
4412 }
4413
4414 switch (prog->getType()) {
4415 case Program::TYPE_TESSELLATION_CONTROL:
4416 outBase = mkOp2v(
4417 OP_SUB, TYPE_U32, getSSA(),
4418 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
4419 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
4420 break;
4421 case Program::TYPE_FRAGMENT: {
4422 Symbol *sv = mkSysVal(SV_POSITION, 3);
4423 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
4424 mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
4425 break;
4426 }
4427 default:
4428 break;
4429 }
4430
4431 if (info->io.viewportId >= 0)
4432 viewport = getScratch();
4433 else
4434 viewport = NULL;
4435
4436 for (ip = 0; ip < code->scan.num_instructions; ++ip) {
4437 if (!handleInstruction(&code->insns[ip]))
4438 return false;
4439 }
4440
4441 if (!BindArgumentsPass(*this).run(prog))
4442 return false;
4443
4444 return true;
4445 }
4446
4447 } // unnamed namespace
4448
4449 namespace nv50_ir {
4450
4451 bool
4452 Program::makeFromTGSI(struct nv50_ir_prog_info *info)
4453 {
4454 tgsi::Source src(info);
4455 if (!src.scanSource())
4456 return false;
4457 tlsSize = info->bin.tlsSpace;
4458
4459 Converter builder(this, &src);
4460 return builder.run();
4461 }
4462
4463 } // namespace nv50_ir