nv50/ir: add support for converting ATOMFADD to proper ir
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_from_tgsi.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "tgsi/tgsi_build.h"
24 #include "tgsi/tgsi_dump.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_util.h"
27
28 #include <set>
29
30 #include "codegen/nv50_ir.h"
31 #include "codegen/nv50_ir_util.h"
32 #include "codegen/nv50_ir_build_util.h"
33
34 namespace tgsi {
35
36 class Source;
37
38 static nv50_ir::operation translateOpcode(uint opcode);
39 static nv50_ir::DataFile translateFile(uint file);
40 static nv50_ir::TexTarget translateTexture(uint texTarg);
41 static nv50_ir::SVSemantic translateSysVal(uint sysval);
42 static nv50_ir::CacheMode translateCacheMode(uint qualifier);
43 static nv50_ir::ImgFormat translateImgFormat(uint format);
44
45 class Instruction
46 {
47 public:
48 Instruction(const struct tgsi_full_instruction *inst) : insn(inst) { }
49
50 class SrcRegister
51 {
52 public:
53 SrcRegister(const struct tgsi_full_src_register *src)
54 : reg(src->Register),
55 fsr(src)
56 { }
57
58 SrcRegister(const struct tgsi_src_register& src) : reg(src), fsr(NULL) { }
59
60 SrcRegister(const struct tgsi_ind_register& ind)
61 : reg(tgsi_util_get_src_from_ind(&ind)),
62 fsr(NULL)
63 { }
64
65 struct tgsi_src_register offsetToSrc(struct tgsi_texture_offset off)
66 {
67 struct tgsi_src_register reg;
68 memset(&reg, 0, sizeof(reg));
69 reg.Index = off.Index;
70 reg.File = off.File;
71 reg.SwizzleX = off.SwizzleX;
72 reg.SwizzleY = off.SwizzleY;
73 reg.SwizzleZ = off.SwizzleZ;
74 return reg;
75 }
76
77 SrcRegister(const struct tgsi_texture_offset& off) :
78 reg(offsetToSrc(off)),
79 fsr(NULL)
80 { }
81
82 uint getFile() const { return reg.File; }
83
84 bool is2D() const { return reg.Dimension; }
85
86 bool isIndirect(int dim) const
87 {
88 return (dim && fsr) ? fsr->Dimension.Indirect : reg.Indirect;
89 }
90
91 int getIndex(int dim) const
92 {
93 return (dim && fsr) ? fsr->Dimension.Index : reg.Index;
94 }
95
96 int getSwizzle(int chan) const
97 {
98 return tgsi_util_get_src_register_swizzle(&reg, chan);
99 }
100
101 int getArrayId() const
102 {
103 if (isIndirect(0))
104 return fsr->Indirect.ArrayID;
105 return 0;
106 }
107
108 nv50_ir::Modifier getMod(int chan) const;
109
110 SrcRegister getIndirect(int dim) const
111 {
112 assert(fsr && isIndirect(dim));
113 if (dim)
114 return SrcRegister(fsr->DimIndirect);
115 return SrcRegister(fsr->Indirect);
116 }
117
118 uint32_t getValueU32(int c, const struct nv50_ir_prog_info *info) const
119 {
120 assert(reg.File == TGSI_FILE_IMMEDIATE);
121 assert(!reg.Absolute);
122 assert(!reg.Negate);
123 return info->immd.data[reg.Index * 4 + getSwizzle(c)];
124 }
125
126 private:
127 const struct tgsi_src_register reg;
128 const struct tgsi_full_src_register *fsr;
129 };
130
131 class DstRegister
132 {
133 public:
134 DstRegister(const struct tgsi_full_dst_register *dst)
135 : reg(dst->Register),
136 fdr(dst)
137 { }
138
139 DstRegister(const struct tgsi_dst_register& dst) : reg(dst), fdr(NULL) { }
140
141 uint getFile() const { return reg.File; }
142
143 bool is2D() const { return reg.Dimension; }
144
145 bool isIndirect(int dim) const
146 {
147 return (dim && fdr) ? fdr->Dimension.Indirect : reg.Indirect;
148 }
149
150 int getIndex(int dim) const
151 {
152 return (dim && fdr) ? fdr->Dimension.Dimension : reg.Index;
153 }
154
155 unsigned int getMask() const { return reg.WriteMask; }
156
157 bool isMasked(int chan) const { return !(getMask() & (1 << chan)); }
158
159 SrcRegister getIndirect(int dim) const
160 {
161 assert(fdr && isIndirect(dim));
162 if (dim)
163 return SrcRegister(fdr->DimIndirect);
164 return SrcRegister(fdr->Indirect);
165 }
166
167 struct tgsi_full_src_register asSrc()
168 {
169 assert(fdr);
170 return tgsi_full_src_register_from_dst(fdr);
171 }
172
173 int getArrayId() const
174 {
175 if (isIndirect(0))
176 return fdr->Indirect.ArrayID;
177 return 0;
178 }
179
180 private:
181 const struct tgsi_dst_register reg;
182 const struct tgsi_full_dst_register *fdr;
183 };
184
185 inline uint getOpcode() const { return insn->Instruction.Opcode; }
186
187 unsigned int srcCount() const { return insn->Instruction.NumSrcRegs; }
188 unsigned int dstCount() const { return insn->Instruction.NumDstRegs; }
189
190 // mask of used components of source s
191 unsigned int srcMask(unsigned int s) const;
192 unsigned int texOffsetMask() const;
193
194 SrcRegister getSrc(unsigned int s) const
195 {
196 assert(s < srcCount());
197 return SrcRegister(&insn->Src[s]);
198 }
199
200 DstRegister getDst(unsigned int d) const
201 {
202 assert(d < dstCount());
203 return DstRegister(&insn->Dst[d]);
204 }
205
206 SrcRegister getTexOffset(unsigned int i) const
207 {
208 assert(i < TGSI_FULL_MAX_TEX_OFFSETS);
209 return SrcRegister(insn->TexOffsets[i]);
210 }
211
212 unsigned int getNumTexOffsets() const { return insn->Texture.NumOffsets; }
213
214 bool checkDstSrcAliasing() const;
215
216 inline nv50_ir::operation getOP() const {
217 return translateOpcode(getOpcode()); }
218
219 nv50_ir::DataType inferSrcType() const;
220 nv50_ir::DataType inferDstType() const;
221
222 nv50_ir::CondCode getSetCond() const;
223
224 nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const;
225
226 const nv50_ir::TexInstruction::ImgFormatDesc *getImageFormat() const {
227 return &nv50_ir::TexInstruction::formatTable[
228 translateImgFormat(insn->Memory.Format)];
229 }
230
231 nv50_ir::TexTarget getImageTarget() const {
232 return translateTexture(insn->Memory.Texture);
233 }
234
235 nv50_ir::CacheMode getCacheMode() const {
236 if (!insn->Instruction.Memory)
237 return nv50_ir::CACHE_CA;
238 return translateCacheMode(insn->Memory.Qualifier);
239 }
240
241 inline uint getLabel() { return insn->Label.Label; }
242
243 unsigned getSaturate() const { return insn->Instruction.Saturate; }
244
245 void print() const
246 {
247 tgsi_dump_instruction(insn, 1);
248 }
249
250 private:
251 const struct tgsi_full_instruction *insn;
252 };
253
254 unsigned int Instruction::texOffsetMask() const
255 {
256 const struct tgsi_instruction_texture *tex = &insn->Texture;
257 assert(insn->Instruction.Texture);
258
259 switch (tex->Texture) {
260 case TGSI_TEXTURE_BUFFER:
261 case TGSI_TEXTURE_1D:
262 case TGSI_TEXTURE_SHADOW1D:
263 case TGSI_TEXTURE_1D_ARRAY:
264 case TGSI_TEXTURE_SHADOW1D_ARRAY:
265 return 0x1;
266 case TGSI_TEXTURE_2D:
267 case TGSI_TEXTURE_SHADOW2D:
268 case TGSI_TEXTURE_2D_ARRAY:
269 case TGSI_TEXTURE_SHADOW2D_ARRAY:
270 case TGSI_TEXTURE_RECT:
271 case TGSI_TEXTURE_SHADOWRECT:
272 case TGSI_TEXTURE_2D_MSAA:
273 case TGSI_TEXTURE_2D_ARRAY_MSAA:
274 return 0x3;
275 case TGSI_TEXTURE_3D:
276 return 0x7;
277 default:
278 assert(!"Unexpected texture target");
279 return 0xf;
280 }
281 }
282
283 unsigned int Instruction::srcMask(unsigned int s) const
284 {
285 unsigned int mask = insn->Dst[0].Register.WriteMask;
286
287 switch (insn->Instruction.Opcode) {
288 case TGSI_OPCODE_COS:
289 case TGSI_OPCODE_SIN:
290 return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
291 case TGSI_OPCODE_DP2:
292 return 0x3;
293 case TGSI_OPCODE_DP3:
294 return 0x7;
295 case TGSI_OPCODE_DP4:
296 case TGSI_OPCODE_KILL_IF: /* WriteMask ignored */
297 return 0xf;
298 case TGSI_OPCODE_DST:
299 return mask & (s ? 0xa : 0x6);
300 case TGSI_OPCODE_EX2:
301 case TGSI_OPCODE_EXP:
302 case TGSI_OPCODE_LG2:
303 case TGSI_OPCODE_LOG:
304 case TGSI_OPCODE_POW:
305 case TGSI_OPCODE_RCP:
306 case TGSI_OPCODE_RSQ:
307 return 0x1;
308 case TGSI_OPCODE_IF:
309 case TGSI_OPCODE_UIF:
310 return 0x1;
311 case TGSI_OPCODE_LIT:
312 return 0xb;
313 case TGSI_OPCODE_TEX2:
314 case TGSI_OPCODE_TXB2:
315 case TGSI_OPCODE_TXL2:
316 return (s == 0) ? 0xf : 0x3;
317 case TGSI_OPCODE_TEX:
318 case TGSI_OPCODE_TXB:
319 case TGSI_OPCODE_TXD:
320 case TGSI_OPCODE_TXL:
321 case TGSI_OPCODE_TXP:
322 case TGSI_OPCODE_TXF:
323 case TGSI_OPCODE_TG4:
324 case TGSI_OPCODE_TEX_LZ:
325 case TGSI_OPCODE_TXF_LZ:
326 case TGSI_OPCODE_LODQ:
327 {
328 const struct tgsi_instruction_texture *tex = &insn->Texture;
329
330 assert(insn->Instruction.Texture);
331
332 mask = 0x7;
333 if (insn->Instruction.Opcode != TGSI_OPCODE_TEX &&
334 insn->Instruction.Opcode != TGSI_OPCODE_TEX_LZ &&
335 insn->Instruction.Opcode != TGSI_OPCODE_TXF_LZ &&
336 insn->Instruction.Opcode != TGSI_OPCODE_TXD)
337 mask |= 0x8; /* bias, lod or proj */
338
339 switch (tex->Texture) {
340 case TGSI_TEXTURE_1D:
341 mask &= 0x9;
342 break;
343 case TGSI_TEXTURE_SHADOW1D:
344 mask &= 0xd;
345 break;
346 case TGSI_TEXTURE_1D_ARRAY:
347 case TGSI_TEXTURE_2D:
348 case TGSI_TEXTURE_RECT:
349 mask &= 0xb;
350 break;
351 case TGSI_TEXTURE_CUBE_ARRAY:
352 case TGSI_TEXTURE_SHADOW2D_ARRAY:
353 case TGSI_TEXTURE_SHADOWCUBE:
354 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
355 mask |= 0x8;
356 break;
357 default:
358 break;
359 }
360 }
361 return mask;
362 case TGSI_OPCODE_TXQ:
363 return 1;
364 case TGSI_OPCODE_D2I:
365 case TGSI_OPCODE_D2U:
366 case TGSI_OPCODE_D2F:
367 case TGSI_OPCODE_DSLT:
368 case TGSI_OPCODE_DSGE:
369 case TGSI_OPCODE_DSEQ:
370 case TGSI_OPCODE_DSNE:
371 case TGSI_OPCODE_U64SEQ:
372 case TGSI_OPCODE_U64SNE:
373 case TGSI_OPCODE_I64SLT:
374 case TGSI_OPCODE_U64SLT:
375 case TGSI_OPCODE_I64SGE:
376 case TGSI_OPCODE_U64SGE:
377 case TGSI_OPCODE_I642F:
378 case TGSI_OPCODE_U642F:
379 switch (util_bitcount(mask)) {
380 case 1: return 0x3;
381 case 2: return 0xf;
382 default:
383 assert(!"unexpected mask");
384 return 0xf;
385 }
386 case TGSI_OPCODE_I2D:
387 case TGSI_OPCODE_U2D:
388 case TGSI_OPCODE_F2D: {
389 unsigned int x = 0;
390 if ((mask & 0x3) == 0x3)
391 x |= 1;
392 if ((mask & 0xc) == 0xc)
393 x |= 2;
394 return x;
395 }
396 case TGSI_OPCODE_PK2H:
397 return 0x3;
398 case TGSI_OPCODE_UP2H:
399 return 0x1;
400 default:
401 break;
402 }
403
404 return mask;
405 }
406
407 nv50_ir::Modifier Instruction::SrcRegister::getMod(int chan) const
408 {
409 nv50_ir::Modifier m(0);
410
411 if (reg.Absolute)
412 m = m | nv50_ir::Modifier(NV50_IR_MOD_ABS);
413 if (reg.Negate)
414 m = m | nv50_ir::Modifier(NV50_IR_MOD_NEG);
415 return m;
416 }
417
418 static nv50_ir::DataFile translateFile(uint file)
419 {
420 switch (file) {
421 case TGSI_FILE_CONSTANT: return nv50_ir::FILE_MEMORY_CONST;
422 case TGSI_FILE_INPUT: return nv50_ir::FILE_SHADER_INPUT;
423 case TGSI_FILE_OUTPUT: return nv50_ir::FILE_SHADER_OUTPUT;
424 case TGSI_FILE_TEMPORARY: return nv50_ir::FILE_GPR;
425 case TGSI_FILE_ADDRESS: return nv50_ir::FILE_ADDRESS;
426 case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;
427 case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;
428 case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_BUFFER;
429 case TGSI_FILE_IMAGE: return nv50_ir::FILE_MEMORY_GLOBAL;
430 case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_GLOBAL;
431 case TGSI_FILE_SAMPLER:
432 case TGSI_FILE_NULL:
433 default:
434 return nv50_ir::FILE_NULL;
435 }
436 }
437
438 static nv50_ir::SVSemantic translateSysVal(uint sysval)
439 {
440 switch (sysval) {
441 case TGSI_SEMANTIC_FACE: return nv50_ir::SV_FACE;
442 case TGSI_SEMANTIC_PSIZE: return nv50_ir::SV_POINT_SIZE;
443 case TGSI_SEMANTIC_PRIMID: return nv50_ir::SV_PRIMITIVE_ID;
444 case TGSI_SEMANTIC_INSTANCEID: return nv50_ir::SV_INSTANCE_ID;
445 case TGSI_SEMANTIC_VERTEXID: return nv50_ir::SV_VERTEX_ID;
446 case TGSI_SEMANTIC_GRID_SIZE: return nv50_ir::SV_NCTAID;
447 case TGSI_SEMANTIC_BLOCK_ID: return nv50_ir::SV_CTAID;
448 case TGSI_SEMANTIC_BLOCK_SIZE: return nv50_ir::SV_NTID;
449 case TGSI_SEMANTIC_THREAD_ID: return nv50_ir::SV_TID;
450 case TGSI_SEMANTIC_SAMPLEID: return nv50_ir::SV_SAMPLE_INDEX;
451 case TGSI_SEMANTIC_SAMPLEPOS: return nv50_ir::SV_SAMPLE_POS;
452 case TGSI_SEMANTIC_SAMPLEMASK: return nv50_ir::SV_SAMPLE_MASK;
453 case TGSI_SEMANTIC_INVOCATIONID: return nv50_ir::SV_INVOCATION_ID;
454 case TGSI_SEMANTIC_TESSCOORD: return nv50_ir::SV_TESS_COORD;
455 case TGSI_SEMANTIC_TESSOUTER: return nv50_ir::SV_TESS_OUTER;
456 case TGSI_SEMANTIC_TESSINNER: return nv50_ir::SV_TESS_INNER;
457 case TGSI_SEMANTIC_VERTICESIN: return nv50_ir::SV_VERTEX_COUNT;
458 case TGSI_SEMANTIC_HELPER_INVOCATION: return nv50_ir::SV_THREAD_KILL;
459 case TGSI_SEMANTIC_BASEVERTEX: return nv50_ir::SV_BASEVERTEX;
460 case TGSI_SEMANTIC_BASEINSTANCE: return nv50_ir::SV_BASEINSTANCE;
461 case TGSI_SEMANTIC_DRAWID: return nv50_ir::SV_DRAWID;
462 case TGSI_SEMANTIC_WORK_DIM: return nv50_ir::SV_WORK_DIM;
463 case TGSI_SEMANTIC_SUBGROUP_INVOCATION: return nv50_ir::SV_LANEID;
464 case TGSI_SEMANTIC_SUBGROUP_EQ_MASK: return nv50_ir::SV_LANEMASK_EQ;
465 case TGSI_SEMANTIC_SUBGROUP_LT_MASK: return nv50_ir::SV_LANEMASK_LT;
466 case TGSI_SEMANTIC_SUBGROUP_LE_MASK: return nv50_ir::SV_LANEMASK_LE;
467 case TGSI_SEMANTIC_SUBGROUP_GT_MASK: return nv50_ir::SV_LANEMASK_GT;
468 case TGSI_SEMANTIC_SUBGROUP_GE_MASK: return nv50_ir::SV_LANEMASK_GE;
469 default:
470 assert(0);
471 return nv50_ir::SV_CLOCK;
472 }
473 }
474
475 #define NV50_IR_TEX_TARG_CASE(a, b) \
476 case TGSI_TEXTURE_##a: return nv50_ir::TEX_TARGET_##b;
477
478 static nv50_ir::TexTarget translateTexture(uint tex)
479 {
480 switch (tex) {
481 NV50_IR_TEX_TARG_CASE(1D, 1D);
482 NV50_IR_TEX_TARG_CASE(2D, 2D);
483 NV50_IR_TEX_TARG_CASE(2D_MSAA, 2D_MS);
484 NV50_IR_TEX_TARG_CASE(3D, 3D);
485 NV50_IR_TEX_TARG_CASE(CUBE, CUBE);
486 NV50_IR_TEX_TARG_CASE(RECT, RECT);
487 NV50_IR_TEX_TARG_CASE(1D_ARRAY, 1D_ARRAY);
488 NV50_IR_TEX_TARG_CASE(2D_ARRAY, 2D_ARRAY);
489 NV50_IR_TEX_TARG_CASE(2D_ARRAY_MSAA, 2D_MS_ARRAY);
490 NV50_IR_TEX_TARG_CASE(CUBE_ARRAY, CUBE_ARRAY);
491 NV50_IR_TEX_TARG_CASE(SHADOW1D, 1D_SHADOW);
492 NV50_IR_TEX_TARG_CASE(SHADOW2D, 2D_SHADOW);
493 NV50_IR_TEX_TARG_CASE(SHADOWCUBE, CUBE_SHADOW);
494 NV50_IR_TEX_TARG_CASE(SHADOWRECT, RECT_SHADOW);
495 NV50_IR_TEX_TARG_CASE(SHADOW1D_ARRAY, 1D_ARRAY_SHADOW);
496 NV50_IR_TEX_TARG_CASE(SHADOW2D_ARRAY, 2D_ARRAY_SHADOW);
497 NV50_IR_TEX_TARG_CASE(SHADOWCUBE_ARRAY, CUBE_ARRAY_SHADOW);
498 NV50_IR_TEX_TARG_CASE(BUFFER, BUFFER);
499
500 case TGSI_TEXTURE_UNKNOWN:
501 default:
502 assert(!"invalid texture target");
503 return nv50_ir::TEX_TARGET_2D;
504 }
505 }
506
507 static nv50_ir::CacheMode translateCacheMode(uint qualifier)
508 {
509 if (qualifier & TGSI_MEMORY_VOLATILE)
510 return nv50_ir::CACHE_CV;
511 if (qualifier & TGSI_MEMORY_COHERENT)
512 return nv50_ir::CACHE_CG;
513 return nv50_ir::CACHE_CA;
514 }
515
516 static nv50_ir::ImgFormat translateImgFormat(uint format)
517 {
518
519 #define FMT_CASE(a, b) \
520 case PIPE_FORMAT_ ## a: return nv50_ir::FMT_ ## b
521
522 switch (format) {
523 FMT_CASE(NONE, NONE);
524
525 FMT_CASE(R32G32B32A32_FLOAT, RGBA32F);
526 FMT_CASE(R16G16B16A16_FLOAT, RGBA16F);
527 FMT_CASE(R32G32_FLOAT, RG32F);
528 FMT_CASE(R16G16_FLOAT, RG16F);
529 FMT_CASE(R11G11B10_FLOAT, R11G11B10F);
530 FMT_CASE(R32_FLOAT, R32F);
531 FMT_CASE(R16_FLOAT, R16F);
532
533 FMT_CASE(R32G32B32A32_UINT, RGBA32UI);
534 FMT_CASE(R16G16B16A16_UINT, RGBA16UI);
535 FMT_CASE(R10G10B10A2_UINT, RGB10A2UI);
536 FMT_CASE(R8G8B8A8_UINT, RGBA8UI);
537 FMT_CASE(R32G32_UINT, RG32UI);
538 FMT_CASE(R16G16_UINT, RG16UI);
539 FMT_CASE(R8G8_UINT, RG8UI);
540 FMT_CASE(R32_UINT, R32UI);
541 FMT_CASE(R16_UINT, R16UI);
542 FMT_CASE(R8_UINT, R8UI);
543
544 FMT_CASE(R32G32B32A32_SINT, RGBA32I);
545 FMT_CASE(R16G16B16A16_SINT, RGBA16I);
546 FMT_CASE(R8G8B8A8_SINT, RGBA8I);
547 FMT_CASE(R32G32_SINT, RG32I);
548 FMT_CASE(R16G16_SINT, RG16I);
549 FMT_CASE(R8G8_SINT, RG8I);
550 FMT_CASE(R32_SINT, R32I);
551 FMT_CASE(R16_SINT, R16I);
552 FMT_CASE(R8_SINT, R8I);
553
554 FMT_CASE(R16G16B16A16_UNORM, RGBA16);
555 FMT_CASE(R10G10B10A2_UNORM, RGB10A2);
556 FMT_CASE(R8G8B8A8_UNORM, RGBA8);
557 FMT_CASE(R16G16_UNORM, RG16);
558 FMT_CASE(R8G8_UNORM, RG8);
559 FMT_CASE(R16_UNORM, R16);
560 FMT_CASE(R8_UNORM, R8);
561
562 FMT_CASE(R16G16B16A16_SNORM, RGBA16_SNORM);
563 FMT_CASE(R8G8B8A8_SNORM, RGBA8_SNORM);
564 FMT_CASE(R16G16_SNORM, RG16_SNORM);
565 FMT_CASE(R8G8_SNORM, RG8_SNORM);
566 FMT_CASE(R16_SNORM, R16_SNORM);
567 FMT_CASE(R8_SNORM, R8_SNORM);
568
569 FMT_CASE(B8G8R8A8_UNORM, BGRA8);
570 }
571
572 assert(!"Unexpected format");
573 return nv50_ir::FMT_NONE;
574 }
575
576 nv50_ir::DataType Instruction::inferSrcType() const
577 {
578 switch (getOpcode()) {
579 case TGSI_OPCODE_UIF:
580 case TGSI_OPCODE_AND:
581 case TGSI_OPCODE_OR:
582 case TGSI_OPCODE_XOR:
583 case TGSI_OPCODE_NOT:
584 case TGSI_OPCODE_SHL:
585 case TGSI_OPCODE_U2F:
586 case TGSI_OPCODE_U2D:
587 case TGSI_OPCODE_U2I64:
588 case TGSI_OPCODE_UADD:
589 case TGSI_OPCODE_UDIV:
590 case TGSI_OPCODE_UMOD:
591 case TGSI_OPCODE_UMAD:
592 case TGSI_OPCODE_UMUL:
593 case TGSI_OPCODE_UMUL_HI:
594 case TGSI_OPCODE_UMAX:
595 case TGSI_OPCODE_UMIN:
596 case TGSI_OPCODE_USEQ:
597 case TGSI_OPCODE_USGE:
598 case TGSI_OPCODE_USLT:
599 case TGSI_OPCODE_USNE:
600 case TGSI_OPCODE_USHR:
601 case TGSI_OPCODE_ATOMUADD:
602 case TGSI_OPCODE_ATOMXCHG:
603 case TGSI_OPCODE_ATOMCAS:
604 case TGSI_OPCODE_ATOMAND:
605 case TGSI_OPCODE_ATOMOR:
606 case TGSI_OPCODE_ATOMXOR:
607 case TGSI_OPCODE_ATOMUMIN:
608 case TGSI_OPCODE_ATOMUMAX:
609 case TGSI_OPCODE_UBFE:
610 case TGSI_OPCODE_UMSB:
611 case TGSI_OPCODE_UP2H:
612 case TGSI_OPCODE_VOTE_ALL:
613 case TGSI_OPCODE_VOTE_ANY:
614 case TGSI_OPCODE_VOTE_EQ:
615 return nv50_ir::TYPE_U32;
616 case TGSI_OPCODE_I2F:
617 case TGSI_OPCODE_I2D:
618 case TGSI_OPCODE_I2I64:
619 case TGSI_OPCODE_IDIV:
620 case TGSI_OPCODE_IMUL_HI:
621 case TGSI_OPCODE_IMAX:
622 case TGSI_OPCODE_IMIN:
623 case TGSI_OPCODE_IABS:
624 case TGSI_OPCODE_INEG:
625 case TGSI_OPCODE_ISGE:
626 case TGSI_OPCODE_ISHR:
627 case TGSI_OPCODE_ISLT:
628 case TGSI_OPCODE_ISSG:
629 case TGSI_OPCODE_MOD:
630 case TGSI_OPCODE_UARL:
631 case TGSI_OPCODE_ATOMIMIN:
632 case TGSI_OPCODE_ATOMIMAX:
633 case TGSI_OPCODE_IBFE:
634 case TGSI_OPCODE_IMSB:
635 return nv50_ir::TYPE_S32;
636 case TGSI_OPCODE_D2F:
637 case TGSI_OPCODE_D2I:
638 case TGSI_OPCODE_D2U:
639 case TGSI_OPCODE_D2I64:
640 case TGSI_OPCODE_D2U64:
641 case TGSI_OPCODE_DABS:
642 case TGSI_OPCODE_DNEG:
643 case TGSI_OPCODE_DADD:
644 case TGSI_OPCODE_DMUL:
645 case TGSI_OPCODE_DDIV:
646 case TGSI_OPCODE_DMAX:
647 case TGSI_OPCODE_DMIN:
648 case TGSI_OPCODE_DSLT:
649 case TGSI_OPCODE_DSGE:
650 case TGSI_OPCODE_DSEQ:
651 case TGSI_OPCODE_DSNE:
652 case TGSI_OPCODE_DRCP:
653 case TGSI_OPCODE_DSQRT:
654 case TGSI_OPCODE_DMAD:
655 case TGSI_OPCODE_DFMA:
656 case TGSI_OPCODE_DFRAC:
657 case TGSI_OPCODE_DRSQ:
658 case TGSI_OPCODE_DTRUNC:
659 case TGSI_OPCODE_DCEIL:
660 case TGSI_OPCODE_DFLR:
661 case TGSI_OPCODE_DROUND:
662 return nv50_ir::TYPE_F64;
663 case TGSI_OPCODE_U64SEQ:
664 case TGSI_OPCODE_U64SNE:
665 case TGSI_OPCODE_U64SLT:
666 case TGSI_OPCODE_U64SGE:
667 case TGSI_OPCODE_U64MIN:
668 case TGSI_OPCODE_U64MAX:
669 case TGSI_OPCODE_U64ADD:
670 case TGSI_OPCODE_U64MUL:
671 case TGSI_OPCODE_U64SHL:
672 case TGSI_OPCODE_U64SHR:
673 case TGSI_OPCODE_U64DIV:
674 case TGSI_OPCODE_U64MOD:
675 case TGSI_OPCODE_U642F:
676 case TGSI_OPCODE_U642D:
677 return nv50_ir::TYPE_U64;
678 case TGSI_OPCODE_I64ABS:
679 case TGSI_OPCODE_I64SSG:
680 case TGSI_OPCODE_I64NEG:
681 case TGSI_OPCODE_I64SLT:
682 case TGSI_OPCODE_I64SGE:
683 case TGSI_OPCODE_I64MIN:
684 case TGSI_OPCODE_I64MAX:
685 case TGSI_OPCODE_I64SHR:
686 case TGSI_OPCODE_I64DIV:
687 case TGSI_OPCODE_I64MOD:
688 case TGSI_OPCODE_I642F:
689 case TGSI_OPCODE_I642D:
690 return nv50_ir::TYPE_S64;
691 default:
692 return nv50_ir::TYPE_F32;
693 }
694 }
695
696 nv50_ir::DataType Instruction::inferDstType() const
697 {
698 switch (getOpcode()) {
699 case TGSI_OPCODE_D2U:
700 case TGSI_OPCODE_F2U: return nv50_ir::TYPE_U32;
701 case TGSI_OPCODE_D2I:
702 case TGSI_OPCODE_F2I: return nv50_ir::TYPE_S32;
703 case TGSI_OPCODE_FSEQ:
704 case TGSI_OPCODE_FSGE:
705 case TGSI_OPCODE_FSLT:
706 case TGSI_OPCODE_FSNE:
707 case TGSI_OPCODE_DSEQ:
708 case TGSI_OPCODE_DSGE:
709 case TGSI_OPCODE_DSLT:
710 case TGSI_OPCODE_DSNE:
711 case TGSI_OPCODE_I64SLT:
712 case TGSI_OPCODE_I64SGE:
713 case TGSI_OPCODE_U64SEQ:
714 case TGSI_OPCODE_U64SNE:
715 case TGSI_OPCODE_U64SLT:
716 case TGSI_OPCODE_U64SGE:
717 case TGSI_OPCODE_PK2H:
718 return nv50_ir::TYPE_U32;
719 case TGSI_OPCODE_I2F:
720 case TGSI_OPCODE_U2F:
721 case TGSI_OPCODE_D2F:
722 case TGSI_OPCODE_I642F:
723 case TGSI_OPCODE_U642F:
724 case TGSI_OPCODE_UP2H:
725 return nv50_ir::TYPE_F32;
726 case TGSI_OPCODE_I2D:
727 case TGSI_OPCODE_U2D:
728 case TGSI_OPCODE_F2D:
729 case TGSI_OPCODE_I642D:
730 case TGSI_OPCODE_U642D:
731 return nv50_ir::TYPE_F64;
732 case TGSI_OPCODE_I2I64:
733 case TGSI_OPCODE_U2I64:
734 case TGSI_OPCODE_F2I64:
735 case TGSI_OPCODE_D2I64:
736 return nv50_ir::TYPE_S64;
737 case TGSI_OPCODE_F2U64:
738 case TGSI_OPCODE_D2U64:
739 return nv50_ir::TYPE_U64;
740 default:
741 return inferSrcType();
742 }
743 }
744
745 nv50_ir::CondCode Instruction::getSetCond() const
746 {
747 using namespace nv50_ir;
748
749 switch (getOpcode()) {
750 case TGSI_OPCODE_SLT:
751 case TGSI_OPCODE_ISLT:
752 case TGSI_OPCODE_USLT:
753 case TGSI_OPCODE_FSLT:
754 case TGSI_OPCODE_DSLT:
755 case TGSI_OPCODE_I64SLT:
756 case TGSI_OPCODE_U64SLT:
757 return CC_LT;
758 case TGSI_OPCODE_SLE:
759 return CC_LE;
760 case TGSI_OPCODE_SGE:
761 case TGSI_OPCODE_ISGE:
762 case TGSI_OPCODE_USGE:
763 case TGSI_OPCODE_FSGE:
764 case TGSI_OPCODE_DSGE:
765 case TGSI_OPCODE_I64SGE:
766 case TGSI_OPCODE_U64SGE:
767 return CC_GE;
768 case TGSI_OPCODE_SGT:
769 return CC_GT;
770 case TGSI_OPCODE_SEQ:
771 case TGSI_OPCODE_USEQ:
772 case TGSI_OPCODE_FSEQ:
773 case TGSI_OPCODE_DSEQ:
774 case TGSI_OPCODE_U64SEQ:
775 return CC_EQ;
776 case TGSI_OPCODE_SNE:
777 case TGSI_OPCODE_FSNE:
778 case TGSI_OPCODE_DSNE:
779 case TGSI_OPCODE_U64SNE:
780 return CC_NEU;
781 case TGSI_OPCODE_USNE:
782 return CC_NE;
783 default:
784 return CC_ALWAYS;
785 }
786 }
787
788 #define NV50_IR_OPCODE_CASE(a, b) case TGSI_OPCODE_##a: return nv50_ir::OP_##b
789
790 static nv50_ir::operation translateOpcode(uint opcode)
791 {
792 switch (opcode) {
793 NV50_IR_OPCODE_CASE(ARL, SHL);
794 NV50_IR_OPCODE_CASE(MOV, MOV);
795
796 NV50_IR_OPCODE_CASE(RCP, RCP);
797 NV50_IR_OPCODE_CASE(RSQ, RSQ);
798 NV50_IR_OPCODE_CASE(SQRT, SQRT);
799
800 NV50_IR_OPCODE_CASE(MUL, MUL);
801 NV50_IR_OPCODE_CASE(ADD, ADD);
802
803 NV50_IR_OPCODE_CASE(MIN, MIN);
804 NV50_IR_OPCODE_CASE(MAX, MAX);
805 NV50_IR_OPCODE_CASE(SLT, SET);
806 NV50_IR_OPCODE_CASE(SGE, SET);
807 NV50_IR_OPCODE_CASE(MAD, MAD);
808 NV50_IR_OPCODE_CASE(FMA, FMA);
809
810 NV50_IR_OPCODE_CASE(FLR, FLOOR);
811 NV50_IR_OPCODE_CASE(ROUND, CVT);
812 NV50_IR_OPCODE_CASE(EX2, EX2);
813 NV50_IR_OPCODE_CASE(LG2, LG2);
814 NV50_IR_OPCODE_CASE(POW, POW);
815
816 NV50_IR_OPCODE_CASE(COS, COS);
817 NV50_IR_OPCODE_CASE(DDX, DFDX);
818 NV50_IR_OPCODE_CASE(DDX_FINE, DFDX);
819 NV50_IR_OPCODE_CASE(DDY, DFDY);
820 NV50_IR_OPCODE_CASE(DDY_FINE, DFDY);
821 NV50_IR_OPCODE_CASE(KILL, DISCARD);
822
823 NV50_IR_OPCODE_CASE(SEQ, SET);
824 NV50_IR_OPCODE_CASE(SGT, SET);
825 NV50_IR_OPCODE_CASE(SIN, SIN);
826 NV50_IR_OPCODE_CASE(SLE, SET);
827 NV50_IR_OPCODE_CASE(SNE, SET);
828 NV50_IR_OPCODE_CASE(TEX, TEX);
829 NV50_IR_OPCODE_CASE(TXD, TXD);
830 NV50_IR_OPCODE_CASE(TXP, TEX);
831
832 NV50_IR_OPCODE_CASE(CAL, CALL);
833 NV50_IR_OPCODE_CASE(RET, RET);
834 NV50_IR_OPCODE_CASE(CMP, SLCT);
835
836 NV50_IR_OPCODE_CASE(TXB, TXB);
837
838 NV50_IR_OPCODE_CASE(DIV, DIV);
839
840 NV50_IR_OPCODE_CASE(TXL, TXL);
841 NV50_IR_OPCODE_CASE(TEX_LZ, TXL);
842
843 NV50_IR_OPCODE_CASE(CEIL, CEIL);
844 NV50_IR_OPCODE_CASE(I2F, CVT);
845 NV50_IR_OPCODE_CASE(NOT, NOT);
846 NV50_IR_OPCODE_CASE(TRUNC, TRUNC);
847 NV50_IR_OPCODE_CASE(SHL, SHL);
848
849 NV50_IR_OPCODE_CASE(AND, AND);
850 NV50_IR_OPCODE_CASE(OR, OR);
851 NV50_IR_OPCODE_CASE(MOD, MOD);
852 NV50_IR_OPCODE_CASE(XOR, XOR);
853 NV50_IR_OPCODE_CASE(TXF, TXF);
854 NV50_IR_OPCODE_CASE(TXF_LZ, TXF);
855 NV50_IR_OPCODE_CASE(TXQ, TXQ);
856 NV50_IR_OPCODE_CASE(TXQS, TXQ);
857 NV50_IR_OPCODE_CASE(TG4, TXG);
858 NV50_IR_OPCODE_CASE(LODQ, TXLQ);
859
860 NV50_IR_OPCODE_CASE(EMIT, EMIT);
861 NV50_IR_OPCODE_CASE(ENDPRIM, RESTART);
862
863 NV50_IR_OPCODE_CASE(KILL_IF, DISCARD);
864
865 NV50_IR_OPCODE_CASE(F2I, CVT);
866 NV50_IR_OPCODE_CASE(FSEQ, SET);
867 NV50_IR_OPCODE_CASE(FSGE, SET);
868 NV50_IR_OPCODE_CASE(FSLT, SET);
869 NV50_IR_OPCODE_CASE(FSNE, SET);
870 NV50_IR_OPCODE_CASE(IDIV, DIV);
871 NV50_IR_OPCODE_CASE(IMAX, MAX);
872 NV50_IR_OPCODE_CASE(IMIN, MIN);
873 NV50_IR_OPCODE_CASE(IABS, ABS);
874 NV50_IR_OPCODE_CASE(INEG, NEG);
875 NV50_IR_OPCODE_CASE(ISGE, SET);
876 NV50_IR_OPCODE_CASE(ISHR, SHR);
877 NV50_IR_OPCODE_CASE(ISLT, SET);
878 NV50_IR_OPCODE_CASE(F2U, CVT);
879 NV50_IR_OPCODE_CASE(U2F, CVT);
880 NV50_IR_OPCODE_CASE(UADD, ADD);
881 NV50_IR_OPCODE_CASE(UDIV, DIV);
882 NV50_IR_OPCODE_CASE(UMAD, MAD);
883 NV50_IR_OPCODE_CASE(UMAX, MAX);
884 NV50_IR_OPCODE_CASE(UMIN, MIN);
885 NV50_IR_OPCODE_CASE(UMOD, MOD);
886 NV50_IR_OPCODE_CASE(UMUL, MUL);
887 NV50_IR_OPCODE_CASE(USEQ, SET);
888 NV50_IR_OPCODE_CASE(USGE, SET);
889 NV50_IR_OPCODE_CASE(USHR, SHR);
890 NV50_IR_OPCODE_CASE(USLT, SET);
891 NV50_IR_OPCODE_CASE(USNE, SET);
892
893 NV50_IR_OPCODE_CASE(DABS, ABS);
894 NV50_IR_OPCODE_CASE(DNEG, NEG);
895 NV50_IR_OPCODE_CASE(DADD, ADD);
896 NV50_IR_OPCODE_CASE(DMUL, MUL);
897 NV50_IR_OPCODE_CASE(DDIV, DIV);
898 NV50_IR_OPCODE_CASE(DMAX, MAX);
899 NV50_IR_OPCODE_CASE(DMIN, MIN);
900 NV50_IR_OPCODE_CASE(DSLT, SET);
901 NV50_IR_OPCODE_CASE(DSGE, SET);
902 NV50_IR_OPCODE_CASE(DSEQ, SET);
903 NV50_IR_OPCODE_CASE(DSNE, SET);
904 NV50_IR_OPCODE_CASE(DRCP, RCP);
905 NV50_IR_OPCODE_CASE(DSQRT, SQRT);
906 NV50_IR_OPCODE_CASE(DMAD, MAD);
907 NV50_IR_OPCODE_CASE(DFMA, FMA);
908 NV50_IR_OPCODE_CASE(D2I, CVT);
909 NV50_IR_OPCODE_CASE(D2U, CVT);
910 NV50_IR_OPCODE_CASE(I2D, CVT);
911 NV50_IR_OPCODE_CASE(U2D, CVT);
912 NV50_IR_OPCODE_CASE(DRSQ, RSQ);
913 NV50_IR_OPCODE_CASE(DTRUNC, TRUNC);
914 NV50_IR_OPCODE_CASE(DCEIL, CEIL);
915 NV50_IR_OPCODE_CASE(DFLR, FLOOR);
916 NV50_IR_OPCODE_CASE(DROUND, CVT);
917
918 NV50_IR_OPCODE_CASE(U64SEQ, SET);
919 NV50_IR_OPCODE_CASE(U64SNE, SET);
920 NV50_IR_OPCODE_CASE(U64SLT, SET);
921 NV50_IR_OPCODE_CASE(U64SGE, SET);
922 NV50_IR_OPCODE_CASE(I64SLT, SET);
923 NV50_IR_OPCODE_CASE(I64SGE, SET);
924 NV50_IR_OPCODE_CASE(I2I64, CVT);
925 NV50_IR_OPCODE_CASE(U2I64, CVT);
926 NV50_IR_OPCODE_CASE(F2I64, CVT);
927 NV50_IR_OPCODE_CASE(F2U64, CVT);
928 NV50_IR_OPCODE_CASE(D2I64, CVT);
929 NV50_IR_OPCODE_CASE(D2U64, CVT);
930 NV50_IR_OPCODE_CASE(I642F, CVT);
931 NV50_IR_OPCODE_CASE(U642F, CVT);
932 NV50_IR_OPCODE_CASE(I642D, CVT);
933 NV50_IR_OPCODE_CASE(U642D, CVT);
934
935 NV50_IR_OPCODE_CASE(I64MIN, MIN);
936 NV50_IR_OPCODE_CASE(U64MIN, MIN);
937 NV50_IR_OPCODE_CASE(I64MAX, MAX);
938 NV50_IR_OPCODE_CASE(U64MAX, MAX);
939 NV50_IR_OPCODE_CASE(I64ABS, ABS);
940 NV50_IR_OPCODE_CASE(I64NEG, NEG);
941 NV50_IR_OPCODE_CASE(U64ADD, ADD);
942 NV50_IR_OPCODE_CASE(U64MUL, MUL);
943 NV50_IR_OPCODE_CASE(U64SHL, SHL);
944 NV50_IR_OPCODE_CASE(I64SHR, SHR);
945 NV50_IR_OPCODE_CASE(U64SHR, SHR);
946
947 NV50_IR_OPCODE_CASE(IMUL_HI, MUL);
948 NV50_IR_OPCODE_CASE(UMUL_HI, MUL);
949
950 NV50_IR_OPCODE_CASE(SAMPLE, TEX);
951 NV50_IR_OPCODE_CASE(SAMPLE_B, TXB);
952 NV50_IR_OPCODE_CASE(SAMPLE_C, TEX);
953 NV50_IR_OPCODE_CASE(SAMPLE_C_LZ, TEX);
954 NV50_IR_OPCODE_CASE(SAMPLE_D, TXD);
955 NV50_IR_OPCODE_CASE(SAMPLE_L, TXL);
956 NV50_IR_OPCODE_CASE(SAMPLE_I, TXF);
957 NV50_IR_OPCODE_CASE(SAMPLE_I_MS, TXF);
958 NV50_IR_OPCODE_CASE(GATHER4, TXG);
959 NV50_IR_OPCODE_CASE(SVIEWINFO, TXQ);
960
961 NV50_IR_OPCODE_CASE(ATOMUADD, ATOM);
962 NV50_IR_OPCODE_CASE(ATOMXCHG, ATOM);
963 NV50_IR_OPCODE_CASE(ATOMCAS, ATOM);
964 NV50_IR_OPCODE_CASE(ATOMAND, ATOM);
965 NV50_IR_OPCODE_CASE(ATOMOR, ATOM);
966 NV50_IR_OPCODE_CASE(ATOMXOR, ATOM);
967 NV50_IR_OPCODE_CASE(ATOMUMIN, ATOM);
968 NV50_IR_OPCODE_CASE(ATOMUMAX, ATOM);
969 NV50_IR_OPCODE_CASE(ATOMIMIN, ATOM);
970 NV50_IR_OPCODE_CASE(ATOMIMAX, ATOM);
971 NV50_IR_OPCODE_CASE(ATOMFADD, ATOM);
972
973 NV50_IR_OPCODE_CASE(TEX2, TEX);
974 NV50_IR_OPCODE_CASE(TXB2, TXB);
975 NV50_IR_OPCODE_CASE(TXL2, TXL);
976
977 NV50_IR_OPCODE_CASE(IBFE, EXTBF);
978 NV50_IR_OPCODE_CASE(UBFE, EXTBF);
979 NV50_IR_OPCODE_CASE(BFI, INSBF);
980 NV50_IR_OPCODE_CASE(BREV, EXTBF);
981 NV50_IR_OPCODE_CASE(POPC, POPCNT);
982 NV50_IR_OPCODE_CASE(LSB, BFIND);
983 NV50_IR_OPCODE_CASE(IMSB, BFIND);
984 NV50_IR_OPCODE_CASE(UMSB, BFIND);
985
986 NV50_IR_OPCODE_CASE(VOTE_ALL, VOTE);
987 NV50_IR_OPCODE_CASE(VOTE_ANY, VOTE);
988 NV50_IR_OPCODE_CASE(VOTE_EQ, VOTE);
989
990 NV50_IR_OPCODE_CASE(BALLOT, VOTE);
991 NV50_IR_OPCODE_CASE(READ_INVOC, SHFL);
992 NV50_IR_OPCODE_CASE(READ_FIRST, SHFL);
993
994 NV50_IR_OPCODE_CASE(END, EXIT);
995
996 default:
997 return nv50_ir::OP_NOP;
998 }
999 }
1000
1001 static uint16_t opcodeToSubOp(uint opcode)
1002 {
1003 switch (opcode) {
1004 case TGSI_OPCODE_ATOMUADD: return NV50_IR_SUBOP_ATOM_ADD;
1005 case TGSI_OPCODE_ATOMXCHG: return NV50_IR_SUBOP_ATOM_EXCH;
1006 case TGSI_OPCODE_ATOMCAS: return NV50_IR_SUBOP_ATOM_CAS;
1007 case TGSI_OPCODE_ATOMAND: return NV50_IR_SUBOP_ATOM_AND;
1008 case TGSI_OPCODE_ATOMOR: return NV50_IR_SUBOP_ATOM_OR;
1009 case TGSI_OPCODE_ATOMXOR: return NV50_IR_SUBOP_ATOM_XOR;
1010 case TGSI_OPCODE_ATOMUMIN: return NV50_IR_SUBOP_ATOM_MIN;
1011 case TGSI_OPCODE_ATOMIMIN: return NV50_IR_SUBOP_ATOM_MIN;
1012 case TGSI_OPCODE_ATOMUMAX: return NV50_IR_SUBOP_ATOM_MAX;
1013 case TGSI_OPCODE_ATOMIMAX: return NV50_IR_SUBOP_ATOM_MAX;
1014 case TGSI_OPCODE_ATOMFADD: return NV50_IR_SUBOP_ATOM_ADD;
1015 case TGSI_OPCODE_IMUL_HI:
1016 case TGSI_OPCODE_UMUL_HI:
1017 return NV50_IR_SUBOP_MUL_HIGH;
1018 case TGSI_OPCODE_VOTE_ALL: return NV50_IR_SUBOP_VOTE_ALL;
1019 case TGSI_OPCODE_VOTE_ANY: return NV50_IR_SUBOP_VOTE_ANY;
1020 case TGSI_OPCODE_VOTE_EQ: return NV50_IR_SUBOP_VOTE_UNI;
1021 default:
1022 return 0;
1023 }
1024 }
1025
1026 bool Instruction::checkDstSrcAliasing() const
1027 {
1028 if (insn->Dst[0].Register.Indirect) // no danger if indirect, using memory
1029 return false;
1030
1031 for (int s = 0; s < TGSI_FULL_MAX_SRC_REGISTERS; ++s) {
1032 if (insn->Src[s].Register.File == TGSI_FILE_NULL)
1033 break;
1034 if (insn->Src[s].Register.File == insn->Dst[0].Register.File &&
1035 insn->Src[s].Register.Index == insn->Dst[0].Register.Index)
1036 return true;
1037 }
1038 return false;
1039 }
1040
1041 class Source
1042 {
1043 public:
1044 Source(struct nv50_ir_prog_info *);
1045 ~Source();
1046
1047 public:
1048 bool scanSource();
1049 unsigned fileSize(unsigned file) const { return scan.file_max[file] + 1; }
1050
1051 public:
1052 struct tgsi_shader_info scan;
1053 struct tgsi_full_instruction *insns;
1054 const struct tgsi_token *tokens;
1055 struct nv50_ir_prog_info *info;
1056
1057 nv50_ir::DynArray tempArrays;
1058 nv50_ir::DynArray immdArrays;
1059
1060 typedef nv50_ir::BuildUtil::Location Location;
1061 // these registers are per-subroutine, cannot be used for parameter passing
1062 std::set<Location> locals;
1063
1064 std::set<int> indirectTempArrays;
1065 std::map<int, int> indirectTempOffsets;
1066 std::map<int, std::pair<int, int> > tempArrayInfo;
1067 std::vector<int> tempArrayId;
1068
1069 int clipVertexOutput;
1070
1071 struct TextureView {
1072 uint8_t target; // TGSI_TEXTURE_*
1073 };
1074 std::vector<TextureView> textureViews;
1075
1076 /*
1077 struct Resource {
1078 uint8_t target; // TGSI_TEXTURE_*
1079 bool raw;
1080 uint8_t slot; // $surface index
1081 };
1082 std::vector<Resource> resources;
1083 */
1084
1085 struct MemoryFile {
1086 uint8_t mem_type; // TGSI_MEMORY_TYPE_*
1087 };
1088 std::vector<MemoryFile> memoryFiles;
1089
1090 private:
1091 int inferSysValDirection(unsigned sn) const;
1092 bool scanDeclaration(const struct tgsi_full_declaration *);
1093 bool scanInstruction(const struct tgsi_full_instruction *);
1094 void scanInstructionSrc(const Instruction& insn,
1095 const Instruction::SrcRegister& src,
1096 unsigned mask);
1097 void scanProperty(const struct tgsi_full_property *);
1098 void scanImmediate(const struct tgsi_full_immediate *);
1099
1100 inline bool isEdgeFlagPassthrough(const Instruction&) const;
1101 };
1102
1103 Source::Source(struct nv50_ir_prog_info *prog) : info(prog)
1104 {
1105 tokens = (const struct tgsi_token *)info->bin.source;
1106
1107 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
1108 tgsi_dump(tokens, 0);
1109 }
1110
1111 Source::~Source()
1112 {
1113 if (insns)
1114 FREE(insns);
1115
1116 if (info->immd.data)
1117 FREE(info->immd.data);
1118 if (info->immd.type)
1119 FREE(info->immd.type);
1120 }
1121
1122 bool Source::scanSource()
1123 {
1124 unsigned insnCount = 0;
1125 struct tgsi_parse_context parse;
1126
1127 tgsi_scan_shader(tokens, &scan);
1128
1129 insns = (struct tgsi_full_instruction *)MALLOC(scan.num_instructions *
1130 sizeof(insns[0]));
1131 if (!insns)
1132 return false;
1133
1134 clipVertexOutput = -1;
1135
1136 textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);
1137 //resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
1138 tempArrayId.resize(scan.file_max[TGSI_FILE_TEMPORARY] + 1);
1139 memoryFiles.resize(scan.file_max[TGSI_FILE_MEMORY] + 1);
1140
1141 info->immd.bufSize = 0;
1142
1143 info->numInputs = scan.file_max[TGSI_FILE_INPUT] + 1;
1144 info->numOutputs = scan.file_max[TGSI_FILE_OUTPUT] + 1;
1145 info->numSysVals = scan.file_max[TGSI_FILE_SYSTEM_VALUE] + 1;
1146
1147 if (info->type == PIPE_SHADER_FRAGMENT) {
1148 info->prop.fp.writesDepth = scan.writes_z;
1149 info->prop.fp.usesDiscard = scan.uses_kill || info->io.alphaRefBase;
1150 } else
1151 if (info->type == PIPE_SHADER_GEOMETRY) {
1152 info->prop.gp.instanceCount = 1; // default value
1153 }
1154
1155 info->io.viewportId = -1;
1156
1157 info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16);
1158 info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte));
1159
1160 tgsi_parse_init(&parse, tokens);
1161 while (!tgsi_parse_end_of_tokens(&parse)) {
1162 tgsi_parse_token(&parse);
1163
1164 switch (parse.FullToken.Token.Type) {
1165 case TGSI_TOKEN_TYPE_IMMEDIATE:
1166 scanImmediate(&parse.FullToken.FullImmediate);
1167 break;
1168 case TGSI_TOKEN_TYPE_DECLARATION:
1169 scanDeclaration(&parse.FullToken.FullDeclaration);
1170 break;
1171 case TGSI_TOKEN_TYPE_INSTRUCTION:
1172 insns[insnCount++] = parse.FullToken.FullInstruction;
1173 scanInstruction(&parse.FullToken.FullInstruction);
1174 break;
1175 case TGSI_TOKEN_TYPE_PROPERTY:
1176 scanProperty(&parse.FullToken.FullProperty);
1177 break;
1178 default:
1179 INFO("unknown TGSI token type: %d\n", parse.FullToken.Token.Type);
1180 break;
1181 }
1182 }
1183 tgsi_parse_free(&parse);
1184
1185 if (indirectTempArrays.size()) {
1186 int tempBase = 0;
1187 for (std::set<int>::const_iterator it = indirectTempArrays.begin();
1188 it != indirectTempArrays.end(); ++it) {
1189 std::pair<int, int>& info = tempArrayInfo[*it];
1190 indirectTempOffsets.insert(std::make_pair(*it, tempBase - info.first));
1191 tempBase += info.second;
1192 }
1193 info->bin.tlsSpace += tempBase * 16;
1194 }
1195
1196 if (info->io.genUserClip > 0) {
1197 info->io.clipDistances = info->io.genUserClip;
1198
1199 const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1200
1201 for (unsigned int n = 0; n < nOut; ++n) {
1202 unsigned int i = info->numOutputs++;
1203 info->out[i].id = i;
1204 info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1205 info->out[i].si = n;
1206 info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1207 }
1208 }
1209
1210 return info->assignSlots(info) == 0;
1211 }
1212
1213 void Source::scanProperty(const struct tgsi_full_property *prop)
1214 {
1215 switch (prop->Property.PropertyName) {
1216 case TGSI_PROPERTY_GS_OUTPUT_PRIM:
1217 info->prop.gp.outputPrim = prop->u[0].Data;
1218 break;
1219 case TGSI_PROPERTY_GS_INPUT_PRIM:
1220 info->prop.gp.inputPrim = prop->u[0].Data;
1221 break;
1222 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
1223 info->prop.gp.maxVertices = prop->u[0].Data;
1224 break;
1225 case TGSI_PROPERTY_GS_INVOCATIONS:
1226 info->prop.gp.instanceCount = prop->u[0].Data;
1227 break;
1228 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
1229 info->prop.fp.separateFragData = true;
1230 break;
1231 case TGSI_PROPERTY_FS_COORD_ORIGIN:
1232 case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
1233 case TGSI_PROPERTY_FS_DEPTH_LAYOUT:
1234 // we don't care
1235 break;
1236 case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
1237 info->io.genUserClip = -1;
1238 break;
1239 case TGSI_PROPERTY_TCS_VERTICES_OUT:
1240 info->prop.tp.outputPatchSize = prop->u[0].Data;
1241 break;
1242 case TGSI_PROPERTY_TES_PRIM_MODE:
1243 info->prop.tp.domain = prop->u[0].Data;
1244 break;
1245 case TGSI_PROPERTY_TES_SPACING:
1246 info->prop.tp.partitioning = prop->u[0].Data;
1247 break;
1248 case TGSI_PROPERTY_TES_VERTEX_ORDER_CW:
1249 info->prop.tp.winding = prop->u[0].Data;
1250 break;
1251 case TGSI_PROPERTY_TES_POINT_MODE:
1252 if (prop->u[0].Data)
1253 info->prop.tp.outputPrim = PIPE_PRIM_POINTS;
1254 else
1255 info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */
1256 break;
1257 case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH:
1258 info->prop.cp.numThreads[0] = prop->u[0].Data;
1259 break;
1260 case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT:
1261 info->prop.cp.numThreads[1] = prop->u[0].Data;
1262 break;
1263 case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH:
1264 info->prop.cp.numThreads[2] = prop->u[0].Data;
1265 break;
1266 case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
1267 info->io.clipDistances = prop->u[0].Data;
1268 break;
1269 case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
1270 info->io.cullDistances = prop->u[0].Data;
1271 break;
1272 case TGSI_PROPERTY_NEXT_SHADER:
1273 /* Do not need to know the next shader stage. */
1274 break;
1275 case TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL:
1276 info->prop.fp.earlyFragTests = prop->u[0].Data;
1277 break;
1278 case TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE:
1279 info->prop.fp.postDepthCoverage = prop->u[0].Data;
1280 break;
1281 case TGSI_PROPERTY_MUL_ZERO_WINS:
1282 info->io.mul_zero_wins = prop->u[0].Data;
1283 break;
1284 default:
1285 INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);
1286 break;
1287 }
1288 }
1289
1290 void Source::scanImmediate(const struct tgsi_full_immediate *imm)
1291 {
1292 const unsigned n = info->immd.count++;
1293
1294 assert(n < scan.immediate_count);
1295
1296 for (int c = 0; c < 4; ++c)
1297 info->immd.data[n * 4 + c] = imm->u[c].Uint;
1298
1299 info->immd.type[n] = imm->Immediate.DataType;
1300 }
1301
1302 int Source::inferSysValDirection(unsigned sn) const
1303 {
1304 switch (sn) {
1305 case TGSI_SEMANTIC_INSTANCEID:
1306 case TGSI_SEMANTIC_VERTEXID:
1307 return 1;
1308 case TGSI_SEMANTIC_LAYER:
1309 #if 0
1310 case TGSI_SEMANTIC_VIEWPORTINDEX:
1311 return 0;
1312 #endif
1313 case TGSI_SEMANTIC_PRIMID:
1314 return (info->type == PIPE_SHADER_FRAGMENT) ? 1 : 0;
1315 default:
1316 return 0;
1317 }
1318 }
1319
1320 bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
1321 {
1322 unsigned i, c;
1323 unsigned sn = TGSI_SEMANTIC_GENERIC;
1324 unsigned si = 0;
1325 const unsigned first = decl->Range.First, last = decl->Range.Last;
1326 const int arrayId = decl->Array.ArrayID;
1327
1328 if (decl->Declaration.Semantic) {
1329 sn = decl->Semantic.Name;
1330 si = decl->Semantic.Index;
1331 }
1332
1333 if (decl->Declaration.Local || decl->Declaration.File == TGSI_FILE_ADDRESS) {
1334 for (i = first; i <= last; ++i) {
1335 for (c = 0; c < 4; ++c) {
1336 locals.insert(
1337 Location(decl->Declaration.File, decl->Dim.Index2D, i, c));
1338 }
1339 }
1340 }
1341
1342 switch (decl->Declaration.File) {
1343 case TGSI_FILE_INPUT:
1344 if (info->type == PIPE_SHADER_VERTEX) {
1345 // all vertex attributes are equal
1346 for (i = first; i <= last; ++i) {
1347 info->in[i].sn = TGSI_SEMANTIC_GENERIC;
1348 info->in[i].si = i;
1349 }
1350 } else {
1351 for (i = first; i <= last; ++i, ++si) {
1352 info->in[i].id = i;
1353 info->in[i].sn = sn;
1354 info->in[i].si = si;
1355 if (info->type == PIPE_SHADER_FRAGMENT) {
1356 // translate interpolation mode
1357 switch (decl->Interp.Interpolate) {
1358 case TGSI_INTERPOLATE_CONSTANT:
1359 info->in[i].flat = 1;
1360 break;
1361 case TGSI_INTERPOLATE_COLOR:
1362 info->in[i].sc = 1;
1363 break;
1364 case TGSI_INTERPOLATE_LINEAR:
1365 info->in[i].linear = 1;
1366 break;
1367 default:
1368 break;
1369 }
1370 if (decl->Interp.Location)
1371 info->in[i].centroid = 1;
1372 }
1373
1374 if (sn == TGSI_SEMANTIC_PATCH)
1375 info->in[i].patch = 1;
1376 if (sn == TGSI_SEMANTIC_PATCH)
1377 info->numPatchConstants = MAX2(info->numPatchConstants, si + 1);
1378 }
1379 }
1380 break;
1381 case TGSI_FILE_OUTPUT:
1382 for (i = first; i <= last; ++i, ++si) {
1383 switch (sn) {
1384 case TGSI_SEMANTIC_POSITION:
1385 if (info->type == PIPE_SHADER_FRAGMENT)
1386 info->io.fragDepth = i;
1387 else
1388 if (clipVertexOutput < 0)
1389 clipVertexOutput = i;
1390 break;
1391 case TGSI_SEMANTIC_COLOR:
1392 if (info->type == PIPE_SHADER_FRAGMENT)
1393 info->prop.fp.numColourResults++;
1394 break;
1395 case TGSI_SEMANTIC_EDGEFLAG:
1396 info->io.edgeFlagOut = i;
1397 break;
1398 case TGSI_SEMANTIC_CLIPVERTEX:
1399 clipVertexOutput = i;
1400 break;
1401 case TGSI_SEMANTIC_CLIPDIST:
1402 info->io.genUserClip = -1;
1403 break;
1404 case TGSI_SEMANTIC_SAMPLEMASK:
1405 info->io.sampleMask = i;
1406 break;
1407 case TGSI_SEMANTIC_VIEWPORT_INDEX:
1408 info->io.viewportId = i;
1409 break;
1410 case TGSI_SEMANTIC_PATCH:
1411 info->numPatchConstants = MAX2(info->numPatchConstants, si + 1);
1412 /* fallthrough */
1413 case TGSI_SEMANTIC_TESSOUTER:
1414 case TGSI_SEMANTIC_TESSINNER:
1415 info->out[i].patch = 1;
1416 break;
1417 default:
1418 break;
1419 }
1420 info->out[i].id = i;
1421 info->out[i].sn = sn;
1422 info->out[i].si = si;
1423 }
1424 break;
1425 case TGSI_FILE_SYSTEM_VALUE:
1426 switch (sn) {
1427 case TGSI_SEMANTIC_INSTANCEID:
1428 info->io.instanceId = first;
1429 break;
1430 case TGSI_SEMANTIC_VERTEXID:
1431 info->io.vertexId = first;
1432 break;
1433 case TGSI_SEMANTIC_BASEVERTEX:
1434 case TGSI_SEMANTIC_BASEINSTANCE:
1435 case TGSI_SEMANTIC_DRAWID:
1436 info->prop.vp.usesDrawParameters = true;
1437 break;
1438 case TGSI_SEMANTIC_SAMPLEID:
1439 case TGSI_SEMANTIC_SAMPLEPOS:
1440 info->prop.fp.persampleInvocation = true;
1441 break;
1442 case TGSI_SEMANTIC_SAMPLEMASK:
1443 info->prop.fp.usesSampleMaskIn = true;
1444 break;
1445 default:
1446 break;
1447 }
1448 for (i = first; i <= last; ++i, ++si) {
1449 info->sv[i].sn = sn;
1450 info->sv[i].si = si;
1451 info->sv[i].input = inferSysValDirection(sn);
1452
1453 switch (sn) {
1454 case TGSI_SEMANTIC_TESSOUTER:
1455 case TGSI_SEMANTIC_TESSINNER:
1456 info->sv[i].patch = 1;
1457 break;
1458 }
1459 }
1460 break;
1461 /*
1462 case TGSI_FILE_RESOURCE:
1463 for (i = first; i <= last; ++i) {
1464 resources[i].target = decl->Resource.Resource;
1465 resources[i].raw = decl->Resource.Raw;
1466 resources[i].slot = i;
1467 }
1468 break;
1469 */
1470 case TGSI_FILE_SAMPLER_VIEW:
1471 for (i = first; i <= last; ++i)
1472 textureViews[i].target = decl->SamplerView.Resource;
1473 break;
1474 case TGSI_FILE_MEMORY:
1475 for (i = first; i <= last; ++i)
1476 memoryFiles[i].mem_type = decl->Declaration.MemType;
1477 break;
1478 case TGSI_FILE_NULL:
1479 case TGSI_FILE_TEMPORARY:
1480 for (i = first; i <= last; ++i)
1481 tempArrayId[i] = arrayId;
1482 if (arrayId)
1483 tempArrayInfo.insert(std::make_pair(arrayId, std::make_pair(
1484 first, last - first + 1)));
1485 break;
1486 case TGSI_FILE_ADDRESS:
1487 case TGSI_FILE_CONSTANT:
1488 case TGSI_FILE_IMMEDIATE:
1489 case TGSI_FILE_SAMPLER:
1490 case TGSI_FILE_BUFFER:
1491 case TGSI_FILE_IMAGE:
1492 break;
1493 default:
1494 ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
1495 return false;
1496 }
1497 return true;
1498 }
1499
1500 inline bool Source::isEdgeFlagPassthrough(const Instruction& insn) const
1501 {
1502 return insn.getOpcode() == TGSI_OPCODE_MOV &&
1503 insn.getDst(0).getIndex(0) == info->io.edgeFlagOut &&
1504 insn.getSrc(0).getFile() == TGSI_FILE_INPUT;
1505 }
1506
1507 void Source::scanInstructionSrc(const Instruction& insn,
1508 const Instruction::SrcRegister& src,
1509 unsigned mask)
1510 {
1511 if (src.getFile() == TGSI_FILE_TEMPORARY) {
1512 if (src.isIndirect(0))
1513 indirectTempArrays.insert(src.getArrayId());
1514 } else
1515 if (src.getFile() == TGSI_FILE_OUTPUT) {
1516 if (src.isIndirect(0)) {
1517 // We don't know which one is accessed, just mark everything for
1518 // reading. This is an extremely unlikely occurrence.
1519 for (unsigned i = 0; i < info->numOutputs; ++i)
1520 info->out[i].oread = 1;
1521 } else {
1522 info->out[src.getIndex(0)].oread = 1;
1523 }
1524 }
1525 if (src.getFile() == TGSI_FILE_SYSTEM_VALUE) {
1526 if (info->sv[src.getIndex(0)].sn == TGSI_SEMANTIC_SAMPLEPOS)
1527 info->prop.fp.readsSampleLocations = true;
1528 }
1529 if (src.getFile() != TGSI_FILE_INPUT)
1530 return;
1531
1532 if (src.isIndirect(0)) {
1533 for (unsigned i = 0; i < info->numInputs; ++i)
1534 info->in[i].mask = 0xf;
1535 } else {
1536 const int i = src.getIndex(0);
1537 for (unsigned c = 0; c < 4; ++c) {
1538 if (!(mask & (1 << c)))
1539 continue;
1540 int k = src.getSwizzle(c);
1541 if (k <= TGSI_SWIZZLE_W)
1542 info->in[i].mask |= 1 << k;
1543 }
1544 switch (info->in[i].sn) {
1545 case TGSI_SEMANTIC_PSIZE:
1546 case TGSI_SEMANTIC_PRIMID:
1547 case TGSI_SEMANTIC_FOG:
1548 info->in[i].mask &= 0x1;
1549 break;
1550 case TGSI_SEMANTIC_PCOORD:
1551 info->in[i].mask &= 0x3;
1552 break;
1553 default:
1554 break;
1555 }
1556 }
1557 }
1558
1559 bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
1560 {
1561 Instruction insn(inst);
1562
1563 if (insn.getOpcode() == TGSI_OPCODE_BARRIER)
1564 info->numBarriers = 1;
1565
1566 if (insn.getOpcode() == TGSI_OPCODE_FBFETCH)
1567 info->prop.fp.readsFramebuffer = true;
1568
1569 if (insn.getOpcode() == TGSI_OPCODE_INTERP_SAMPLE)
1570 info->prop.fp.readsSampleLocations = true;
1571
1572 if (insn.dstCount()) {
1573 Instruction::DstRegister dst = insn.getDst(0);
1574
1575 if (insn.getOpcode() == TGSI_OPCODE_STORE &&
1576 dst.getFile() != TGSI_FILE_MEMORY) {
1577 info->io.globalAccess |= 0x2;
1578 }
1579
1580 if (dst.getFile() == TGSI_FILE_OUTPUT) {
1581 if (dst.isIndirect(0))
1582 for (unsigned i = 0; i < info->numOutputs; ++i)
1583 info->out[i].mask = 0xf;
1584 else
1585 info->out[dst.getIndex(0)].mask |= dst.getMask();
1586
1587 if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE ||
1588 info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PRIMID ||
1589 info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_LAYER ||
1590 info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_VIEWPORT_INDEX ||
1591 info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_FOG)
1592 info->out[dst.getIndex(0)].mask &= 1;
1593
1594 if (isEdgeFlagPassthrough(insn))
1595 info->io.edgeFlagIn = insn.getSrc(0).getIndex(0);
1596 } else
1597 if (dst.getFile() == TGSI_FILE_TEMPORARY) {
1598 if (dst.isIndirect(0))
1599 indirectTempArrays.insert(dst.getArrayId());
1600 } else
1601 if (dst.getFile() == TGSI_FILE_BUFFER ||
1602 dst.getFile() == TGSI_FILE_IMAGE ||
1603 (dst.getFile() == TGSI_FILE_MEMORY &&
1604 memoryFiles[dst.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) {
1605 info->io.globalAccess |= 0x2;
1606 }
1607 }
1608
1609 if (insn.srcCount() && (
1610 insn.getSrc(0).getFile() != TGSI_FILE_MEMORY ||
1611 memoryFiles[insn.getSrc(0).getIndex(0)].mem_type ==
1612 TGSI_MEMORY_TYPE_GLOBAL)) {
1613 switch (insn.getOpcode()) {
1614 case TGSI_OPCODE_ATOMUADD:
1615 case TGSI_OPCODE_ATOMXCHG:
1616 case TGSI_OPCODE_ATOMCAS:
1617 case TGSI_OPCODE_ATOMAND:
1618 case TGSI_OPCODE_ATOMOR:
1619 case TGSI_OPCODE_ATOMXOR:
1620 case TGSI_OPCODE_ATOMUMIN:
1621 case TGSI_OPCODE_ATOMIMIN:
1622 case TGSI_OPCODE_ATOMUMAX:
1623 case TGSI_OPCODE_ATOMIMAX:
1624 case TGSI_OPCODE_ATOMFADD:
1625 case TGSI_OPCODE_LOAD:
1626 info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
1627 0x1 : 0x2;
1628 break;
1629 }
1630 }
1631
1632
1633 for (unsigned s = 0; s < insn.srcCount(); ++s)
1634 scanInstructionSrc(insn, insn.getSrc(s), insn.srcMask(s));
1635
1636 for (unsigned s = 0; s < insn.getNumTexOffsets(); ++s)
1637 scanInstructionSrc(insn, insn.getTexOffset(s), insn.texOffsetMask());
1638
1639 return true;
1640 }
1641
1642 nv50_ir::TexInstruction::Target
1643 Instruction::getTexture(const tgsi::Source *code, int s) const
1644 {
1645 // XXX: indirect access
1646 unsigned int r;
1647
1648 switch (getSrc(s).getFile()) {
1649 /*
1650 case TGSI_FILE_RESOURCE:
1651 r = getSrc(s).getIndex(0);
1652 return translateTexture(code->resources.at(r).target);
1653 */
1654 case TGSI_FILE_SAMPLER_VIEW:
1655 r = getSrc(s).getIndex(0);
1656 return translateTexture(code->textureViews.at(r).target);
1657 default:
1658 return translateTexture(insn->Texture.Texture);
1659 }
1660 }
1661
1662 } // namespace tgsi
1663
1664 namespace {
1665
1666 using namespace nv50_ir;
1667
1668 class Converter : public BuildUtil
1669 {
1670 public:
1671 Converter(Program *, const tgsi::Source *);
1672 ~Converter();
1673
1674 bool run();
1675
1676 private:
1677 struct Subroutine
1678 {
1679 Subroutine(Function *f) : f(f) { }
1680 Function *f;
1681 ValueMap values;
1682 };
1683
1684 Value *shiftAddress(Value *);
1685 Value *getVertexBase(int s);
1686 Value *getOutputBase(int s);
1687 DataArray *getArrayForFile(unsigned file, int idx);
1688 Value *fetchSrc(int s, int c);
1689 Value *fetchDst(int d, int c);
1690 Value *acquireDst(int d, int c);
1691 void storeDst(int d, int c, Value *);
1692
1693 Value *fetchSrc(const tgsi::Instruction::SrcRegister src, int c, Value *ptr);
1694 void storeDst(const tgsi::Instruction::DstRegister dst, int c,
1695 Value *val, Value *ptr);
1696
1697 void adjustTempIndex(int arrayId, int &idx, int &idx2d) const;
1698 Value *applySrcMod(Value *, int s, int c);
1699
1700 Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr);
1701 Symbol *srcToSym(tgsi::Instruction::SrcRegister, int c);
1702 Symbol *dstToSym(tgsi::Instruction::DstRegister, int c);
1703
1704 bool isSubGroupMask(uint8_t semantic);
1705
1706 bool handleInstruction(const struct tgsi_full_instruction *);
1707 void exportOutputs();
1708 inline Subroutine *getSubroutine(unsigned ip);
1709 inline Subroutine *getSubroutine(Function *);
1710 inline bool isEndOfSubroutine(uint ip);
1711
1712 void loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask);
1713
1714 // R,S,L,C,Dx,Dy encode TGSI sources for respective values (0xSf for auto)
1715 void setTexRS(TexInstruction *, unsigned int& s, int R, int S);
1716 void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy);
1717 void handleTXF(Value *dst0[4], int R, int L_M);
1718 void handleTXQ(Value *dst0[4], enum TexQuery, int R);
1719 void handleFBFETCH(Value *dst0[4]);
1720 void handleLIT(Value *dst0[4]);
1721 void handleUserClipPlanes();
1722
1723 // Symbol *getResourceBase(int r);
1724 void getImageCoords(std::vector<Value *>&, int s);
1725
1726 void handleLOAD(Value *dst0[4]);
1727 void handleSTORE();
1728 void handleATOM(Value *dst0[4], DataType, uint16_t subOp);
1729
1730 void handleINTERP(Value *dst0[4]);
1731
1732 uint8_t translateInterpMode(const struct nv50_ir_varying *var,
1733 operation& op);
1734 Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr);
1735
1736 void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
1737
1738 Value *buildDot(int dim);
1739
1740 class BindArgumentsPass : public Pass {
1741 public:
1742 BindArgumentsPass(Converter &conv) : conv(conv) { }
1743
1744 private:
1745 Converter &conv;
1746 Subroutine *sub;
1747
1748 inline const Location *getValueLocation(Subroutine *, Value *);
1749
1750 template<typename T> inline void
1751 updateCallArgs(Instruction *i, void (Instruction::*setArg)(int, Value *),
1752 T (Function::*proto));
1753
1754 template<typename T> inline void
1755 updatePrototype(BitSet *set, void (Function::*updateSet)(),
1756 T (Function::*proto));
1757
1758 protected:
1759 bool visit(Function *);
1760 bool visit(BasicBlock *bb) { return false; }
1761 };
1762
1763 private:
1764 const tgsi::Source *code;
1765 const struct nv50_ir_prog_info *info;
1766
1767 struct {
1768 std::map<unsigned, Subroutine> map;
1769 Subroutine *cur;
1770 } sub;
1771
1772 uint ip; // instruction pointer
1773
1774 tgsi::Instruction tgsi;
1775
1776 DataType dstTy;
1777 DataType srcTy;
1778
1779 DataArray tData; // TGSI_FILE_TEMPORARY
1780 DataArray lData; // TGSI_FILE_TEMPORARY, for indirect arrays
1781 DataArray aData; // TGSI_FILE_ADDRESS
1782 DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers)
1783
1784 Value *zero;
1785 Value *fragCoord[4];
1786 Value *clipVtx[4];
1787
1788 Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP)
1789 uint8_t vtxBaseValid;
1790
1791 Value *outBase; // base address of vertex out patch (for TCP)
1792
1793 Stack condBBs; // fork BB, then else clause BB
1794 Stack joinBBs; // fork BB, for inserting join ops on ENDIF
1795 Stack loopBBs; // loop headers
1796 Stack breakBBs; // end of / after loop
1797
1798 Value *viewport;
1799 };
1800
1801 Symbol *
1802 Converter::srcToSym(tgsi::Instruction::SrcRegister src, int c)
1803 {
1804 const int swz = src.getSwizzle(c);
1805
1806 /* TODO: Use Array ID when it's available for the index */
1807 return makeSym(src.getFile(),
1808 src.is2D() ? src.getIndex(1) : 0,
1809 src.getIndex(0), swz,
1810 src.getIndex(0) * 16 + swz * 4);
1811 }
1812
1813 Symbol *
1814 Converter::dstToSym(tgsi::Instruction::DstRegister dst, int c)
1815 {
1816 /* TODO: Use Array ID when it's available for the index */
1817 return makeSym(dst.getFile(),
1818 dst.is2D() ? dst.getIndex(1) : 0,
1819 dst.getIndex(0), c,
1820 dst.getIndex(0) * 16 + c * 4);
1821 }
1822
1823 Symbol *
1824 Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address)
1825 {
1826 Symbol *sym = new_Symbol(prog, tgsi::translateFile(tgsiFile));
1827
1828 sym->reg.fileIndex = fileIdx;
1829
1830 if (tgsiFile == TGSI_FILE_MEMORY) {
1831 switch (code->memoryFiles[fileIdx].mem_type) {
1832 case TGSI_MEMORY_TYPE_GLOBAL:
1833 /* No-op this is the default for TGSI_FILE_MEMORY */
1834 sym->setFile(FILE_MEMORY_GLOBAL);
1835 break;
1836 case TGSI_MEMORY_TYPE_SHARED:
1837 sym->setFile(FILE_MEMORY_SHARED);
1838 break;
1839 case TGSI_MEMORY_TYPE_INPUT:
1840 assert(prog->getType() == Program::TYPE_COMPUTE);
1841 assert(idx == -1);
1842 sym->setFile(FILE_SHADER_INPUT);
1843 address += info->prop.cp.inputOffset;
1844 break;
1845 default:
1846 assert(0); /* TODO: Add support for global and private memory */
1847 }
1848 }
1849
1850 if (idx >= 0) {
1851 if (sym->reg.file == FILE_SHADER_INPUT)
1852 sym->setOffset(info->in[idx].slot[c] * 4);
1853 else
1854 if (sym->reg.file == FILE_SHADER_OUTPUT)
1855 sym->setOffset(info->out[idx].slot[c] * 4);
1856 else
1857 if (sym->reg.file == FILE_SYSTEM_VALUE)
1858 sym->setSV(tgsi::translateSysVal(info->sv[idx].sn), c);
1859 else
1860 sym->setOffset(address);
1861 } else {
1862 sym->setOffset(address);
1863 }
1864 return sym;
1865 }
1866
1867 uint8_t
1868 Converter::translateInterpMode(const struct nv50_ir_varying *var, operation& op)
1869 {
1870 uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
1871
1872 if (var->flat)
1873 mode = NV50_IR_INTERP_FLAT;
1874 else
1875 if (var->linear)
1876 mode = NV50_IR_INTERP_LINEAR;
1877 else
1878 if (var->sc)
1879 mode = NV50_IR_INTERP_SC;
1880
1881 op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC)
1882 ? OP_PINTERP : OP_LINTERP;
1883
1884 if (var->centroid)
1885 mode |= NV50_IR_INTERP_CENTROID;
1886
1887 return mode;
1888 }
1889
1890 Value *
1891 Converter::interpolate(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
1892 {
1893 operation op;
1894
1895 // XXX: no way to know interpolation mode if we don't know what's accessed
1896 const uint8_t mode = translateInterpMode(&info->in[ptr ? 0 :
1897 src.getIndex(0)], op);
1898
1899 Instruction *insn = new_Instruction(func, op, TYPE_F32);
1900
1901 insn->setDef(0, getScratch());
1902 insn->setSrc(0, srcToSym(src, c));
1903 if (op == OP_PINTERP)
1904 insn->setSrc(1, fragCoord[3]);
1905 if (ptr)
1906 insn->setIndirect(0, 0, ptr);
1907
1908 insn->setInterpolate(mode);
1909
1910 bb->insertTail(insn);
1911 return insn->getDef(0);
1912 }
1913
1914 Value *
1915 Converter::applySrcMod(Value *val, int s, int c)
1916 {
1917 Modifier m = tgsi.getSrc(s).getMod(c);
1918 DataType ty = tgsi.inferSrcType();
1919
1920 if (m & Modifier(NV50_IR_MOD_ABS))
1921 val = mkOp1v(OP_ABS, ty, getScratch(), val);
1922
1923 if (m & Modifier(NV50_IR_MOD_NEG))
1924 val = mkOp1v(OP_NEG, ty, getScratch(), val);
1925
1926 return val;
1927 }
1928
1929 Value *
1930 Converter::getVertexBase(int s)
1931 {
1932 assert(s < 5);
1933 if (!(vtxBaseValid & (1 << s))) {
1934 const int index = tgsi.getSrc(s).getIndex(1);
1935 Value *rel = NULL;
1936 if (tgsi.getSrc(s).isIndirect(1))
1937 rel = fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL);
1938 vtxBaseValid |= 1 << s;
1939 vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
1940 mkImm(index), rel);
1941 }
1942 return vtxBase[s];
1943 }
1944
1945 Value *
1946 Converter::getOutputBase(int s)
1947 {
1948 assert(s < 5);
1949 if (!(vtxBaseValid & (1 << s))) {
1950 Value *offset = loadImm(NULL, tgsi.getSrc(s).getIndex(1));
1951 if (tgsi.getSrc(s).isIndirect(1))
1952 offset = mkOp2v(OP_ADD, TYPE_U32, getSSA(),
1953 fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL),
1954 offset);
1955 vtxBaseValid |= 1 << s;
1956 vtxBase[s] = mkOp2v(OP_ADD, TYPE_U32, getSSA(), outBase, offset);
1957 }
1958 return vtxBase[s];
1959 }
1960
1961 Value *
1962 Converter::fetchSrc(int s, int c)
1963 {
1964 Value *res;
1965 Value *ptr = NULL, *dimRel = NULL;
1966
1967 tgsi::Instruction::SrcRegister src = tgsi.getSrc(s);
1968
1969 if (src.isIndirect(0))
1970 ptr = fetchSrc(src.getIndirect(0), 0, NULL);
1971
1972 if (src.is2D()) {
1973 switch (src.getFile()) {
1974 case TGSI_FILE_OUTPUT:
1975 dimRel = getOutputBase(s);
1976 break;
1977 case TGSI_FILE_INPUT:
1978 dimRel = getVertexBase(s);
1979 break;
1980 case TGSI_FILE_CONSTANT:
1981 // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k]
1982 if (src.isIndirect(1))
1983 dimRel = fetchSrc(src.getIndirect(1), 0, 0);
1984 break;
1985 default:
1986 break;
1987 }
1988 }
1989
1990 res = fetchSrc(src, c, ptr);
1991
1992 if (dimRel)
1993 res->getInsn()->setIndirect(0, 1, dimRel);
1994
1995 return applySrcMod(res, s, c);
1996 }
1997
1998 Value *
1999 Converter::fetchDst(int d, int c)
2000 {
2001 Value *res;
2002 Value *ptr = NULL, *dimRel = NULL;
2003
2004 tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
2005
2006 if (dst.isIndirect(0))
2007 ptr = fetchSrc(dst.getIndirect(0), 0, NULL);
2008
2009 if (dst.is2D()) {
2010 switch (dst.getFile()) {
2011 case TGSI_FILE_OUTPUT:
2012 assert(0); // TODO
2013 dimRel = NULL;
2014 break;
2015 case TGSI_FILE_INPUT:
2016 assert(0); // TODO
2017 dimRel = NULL;
2018 break;
2019 case TGSI_FILE_CONSTANT:
2020 // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k]
2021 if (dst.isIndirect(1))
2022 dimRel = fetchSrc(dst.getIndirect(1), 0, 0);
2023 break;
2024 default:
2025 break;
2026 }
2027 }
2028
2029 struct tgsi_full_src_register fsr = dst.asSrc();
2030 tgsi::Instruction::SrcRegister src(&fsr);
2031 res = fetchSrc(src, c, ptr);
2032
2033 if (dimRel)
2034 res->getInsn()->setIndirect(0, 1, dimRel);
2035
2036 return res;
2037 }
2038
2039 Converter::DataArray *
2040 Converter::getArrayForFile(unsigned file, int idx)
2041 {
2042 switch (file) {
2043 case TGSI_FILE_TEMPORARY:
2044 return idx == 0 ? &tData : &lData;
2045 case TGSI_FILE_ADDRESS:
2046 return &aData;
2047 case TGSI_FILE_OUTPUT:
2048 assert(prog->getType() == Program::TYPE_FRAGMENT);
2049 return &oData;
2050 default:
2051 assert(!"invalid/unhandled TGSI source file");
2052 return NULL;
2053 }
2054 }
2055
2056 Value *
2057 Converter::shiftAddress(Value *index)
2058 {
2059 if (!index)
2060 return NULL;
2061 return mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), index, mkImm(4));
2062 }
2063
2064 void
2065 Converter::adjustTempIndex(int arrayId, int &idx, int &idx2d) const
2066 {
2067 std::map<int, int>::const_iterator it =
2068 code->indirectTempOffsets.find(arrayId);
2069 if (it == code->indirectTempOffsets.end())
2070 return;
2071
2072 idx2d = 1;
2073 idx += it->second;
2074 }
2075
2076 bool
2077 Converter::isSubGroupMask(uint8_t semantic)
2078 {
2079 switch (semantic) {
2080 case TGSI_SEMANTIC_SUBGROUP_EQ_MASK:
2081 case TGSI_SEMANTIC_SUBGROUP_LT_MASK:
2082 case TGSI_SEMANTIC_SUBGROUP_LE_MASK:
2083 case TGSI_SEMANTIC_SUBGROUP_GT_MASK:
2084 case TGSI_SEMANTIC_SUBGROUP_GE_MASK:
2085 return true;
2086 default:
2087 return false;
2088 }
2089 }
2090
2091 Value *
2092 Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
2093 {
2094 int idx2d = src.is2D() ? src.getIndex(1) : 0;
2095 int idx = src.getIndex(0);
2096 const int swz = src.getSwizzle(c);
2097 Instruction *ld;
2098
2099 switch (src.getFile()) {
2100 case TGSI_FILE_IMMEDIATE:
2101 assert(!ptr);
2102 return loadImm(NULL, info->immd.data[idx * 4 + swz]);
2103 case TGSI_FILE_CONSTANT:
2104 return mkLoadv(TYPE_U32, srcToSym(src, c), shiftAddress(ptr));
2105 case TGSI_FILE_INPUT:
2106 if (prog->getType() == Program::TYPE_FRAGMENT) {
2107 // don't load masked inputs, won't be assigned a slot
2108 if (!ptr && !(info->in[idx].mask & (1 << swz)))
2109 return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f);
2110 return interpolate(src, c, shiftAddress(ptr));
2111 } else
2112 if (prog->getType() == Program::TYPE_GEOMETRY) {
2113 if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_PRIMID)
2114 return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_PRIMITIVE_ID, 0));
2115 // XXX: This is going to be a problem with scalar arrays, i.e. when
2116 // we cannot assume that the address is given in units of vec4.
2117 //
2118 // nv50 and nvc0 need different things here, so let the lowering
2119 // passes decide what to do with the address
2120 if (ptr)
2121 return mkLoadv(TYPE_U32, srcToSym(src, c), ptr);
2122 }
2123 ld = mkLoad(TYPE_U32, getSSA(), srcToSym(src, c), shiftAddress(ptr));
2124 ld->perPatch = info->in[idx].patch;
2125 return ld->getDef(0);
2126 case TGSI_FILE_OUTPUT:
2127 assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL);
2128 ld = mkLoad(TYPE_U32, getSSA(), srcToSym(src, c), shiftAddress(ptr));
2129 ld->perPatch = info->out[idx].patch;
2130 return ld->getDef(0);
2131 case TGSI_FILE_SYSTEM_VALUE:
2132 assert(!ptr);
2133 if (info->sv[idx].sn == TGSI_SEMANTIC_THREAD_ID &&
2134 info->prop.cp.numThreads[swz] == 1)
2135 return loadImm(NULL, 0u);
2136 if (isSubGroupMask(info->sv[idx].sn) && swz > 0)
2137 return loadImm(NULL, 0u);
2138 if (info->sv[idx].sn == TGSI_SEMANTIC_SUBGROUP_SIZE)
2139 return loadImm(NULL, 32u);
2140 ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
2141 ld->perPatch = info->sv[idx].patch;
2142 return ld->getDef(0);
2143 case TGSI_FILE_TEMPORARY: {
2144 int arrayid = src.getArrayId();
2145 if (!arrayid)
2146 arrayid = code->tempArrayId[idx];
2147 adjustTempIndex(arrayid, idx, idx2d);
2148 }
2149 /* fallthrough */
2150 default:
2151 return getArrayForFile(src.getFile(), idx2d)->load(
2152 sub.cur->values, idx, swz, shiftAddress(ptr));
2153 }
2154 }
2155
2156 Value *
2157 Converter::acquireDst(int d, int c)
2158 {
2159 const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
2160 const unsigned f = dst.getFile();
2161 int idx = dst.getIndex(0);
2162 int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
2163
2164 if (dst.isMasked(c) || f == TGSI_FILE_BUFFER || f == TGSI_FILE_MEMORY ||
2165 f == TGSI_FILE_IMAGE)
2166 return NULL;
2167
2168 if (dst.isIndirect(0) ||
2169 f == TGSI_FILE_SYSTEM_VALUE ||
2170 (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT))
2171 return getScratch();
2172
2173 if (f == TGSI_FILE_TEMPORARY) {
2174 int arrayid = dst.getArrayId();
2175 if (!arrayid)
2176 arrayid = code->tempArrayId[idx];
2177 adjustTempIndex(arrayid, idx, idx2d);
2178 }
2179
2180 return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c);
2181 }
2182
2183 void
2184 Converter::storeDst(int d, int c, Value *val)
2185 {
2186 const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
2187
2188 if (tgsi.getSaturate()) {
2189 mkOp1(OP_SAT, dstTy, val, val);
2190 }
2191
2192 Value *ptr = NULL;
2193 if (dst.isIndirect(0))
2194 ptr = shiftAddress(fetchSrc(dst.getIndirect(0), 0, NULL));
2195
2196 if (info->io.genUserClip > 0 &&
2197 dst.getFile() == TGSI_FILE_OUTPUT &&
2198 !dst.isIndirect(0) && dst.getIndex(0) == code->clipVertexOutput) {
2199 mkMov(clipVtx[c], val);
2200 val = clipVtx[c];
2201 }
2202
2203 storeDst(dst, c, val, ptr);
2204 }
2205
2206 void
2207 Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,
2208 Value *val, Value *ptr)
2209 {
2210 const unsigned f = dst.getFile();
2211 int idx = dst.getIndex(0);
2212 int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
2213
2214 if (f == TGSI_FILE_SYSTEM_VALUE) {
2215 assert(!ptr);
2216 mkOp2(OP_WRSV, TYPE_U32, NULL, dstToSym(dst, c), val);
2217 } else
2218 if (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT) {
2219
2220 if (ptr || (info->out[idx].mask & (1 << c))) {
2221 /* Save the viewport index into a scratch register so that it can be
2222 exported at EMIT time */
2223 if (info->out[idx].sn == TGSI_SEMANTIC_VIEWPORT_INDEX &&
2224 prog->getType() == Program::TYPE_GEOMETRY &&
2225 viewport != NULL)
2226 mkOp1(OP_MOV, TYPE_U32, viewport, val);
2227 else
2228 mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val)->perPatch =
2229 info->out[idx].patch;
2230 }
2231 } else
2232 if (f == TGSI_FILE_TEMPORARY ||
2233 f == TGSI_FILE_ADDRESS ||
2234 f == TGSI_FILE_OUTPUT) {
2235 if (f == TGSI_FILE_TEMPORARY) {
2236 int arrayid = dst.getArrayId();
2237 if (!arrayid)
2238 arrayid = code->tempArrayId[idx];
2239 adjustTempIndex(arrayid, idx, idx2d);
2240 }
2241
2242 getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val);
2243 } else {
2244 assert(!"invalid dst file");
2245 }
2246 }
2247
2248 #define FOR_EACH_DST_ENABLED_CHANNEL(d, chan, inst) \
2249 for (chan = 0; chan < 4; ++chan) \
2250 if (!inst.getDst(d).isMasked(chan))
2251
2252 Value *
2253 Converter::buildDot(int dim)
2254 {
2255 assert(dim > 0);
2256
2257 Value *src0 = fetchSrc(0, 0), *src1 = fetchSrc(1, 0);
2258 Value *dotp = getScratch();
2259
2260 mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1)
2261 ->dnz = info->io.mul_zero_wins;
2262
2263 for (int c = 1; c < dim; ++c) {
2264 src0 = fetchSrc(0, c);
2265 src1 = fetchSrc(1, c);
2266 mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp)
2267 ->dnz = info->io.mul_zero_wins;
2268 }
2269 return dotp;
2270 }
2271
2272 void
2273 Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork)
2274 {
2275 FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL);
2276 join->fixed = 1;
2277 conv->insertHead(join);
2278
2279 assert(!fork->joinAt);
2280 fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv);
2281 fork->insertBefore(fork->getExit(), fork->joinAt);
2282 }
2283
2284 void
2285 Converter::setTexRS(TexInstruction *tex, unsigned int& s, int R, int S)
2286 {
2287 unsigned rIdx = 0, sIdx = 0;
2288
2289 if (R >= 0 && tgsi.getSrc(R).getFile() != TGSI_FILE_SAMPLER) {
2290 // This is the bindless case. We have to get the actual value and pass
2291 // it in. This will be the complete handle.
2292 tex->tex.rIndirectSrc = s;
2293 tex->setSrc(s++, fetchSrc(R, 0));
2294 tex->setTexture(tgsi.getTexture(code, R), 0xff, 0x1f);
2295 tex->tex.bindless = true;
2296 return;
2297 }
2298
2299 if (R >= 0)
2300 rIdx = tgsi.getSrc(R).getIndex(0);
2301 if (S >= 0)
2302 sIdx = tgsi.getSrc(S).getIndex(0);
2303
2304 tex->setTexture(tgsi.getTexture(code, R), rIdx, sIdx);
2305
2306 if (tgsi.getSrc(R).isIndirect(0)) {
2307 tex->tex.rIndirectSrc = s;
2308 tex->setSrc(s++, fetchSrc(tgsi.getSrc(R).getIndirect(0), 0, NULL));
2309 }
2310 if (S >= 0 && tgsi.getSrc(S).isIndirect(0)) {
2311 tex->tex.sIndirectSrc = s;
2312 tex->setSrc(s++, fetchSrc(tgsi.getSrc(S).getIndirect(0), 0, NULL));
2313 }
2314 }
2315
2316 void
2317 Converter::handleTXQ(Value *dst0[4], enum TexQuery query, int R)
2318 {
2319 TexInstruction *tex = new_TexInstruction(func, OP_TXQ);
2320 tex->tex.query = query;
2321 unsigned int c, d;
2322
2323 for (d = 0, c = 0; c < 4; ++c) {
2324 if (!dst0[c])
2325 continue;
2326 tex->tex.mask |= 1 << c;
2327 tex->setDef(d++, dst0[c]);
2328 }
2329 if (query == TXQ_DIMS)
2330 tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level
2331 else
2332 tex->setSrc((c = 0), zero);
2333
2334 setTexRS(tex, ++c, R, -1);
2335
2336 bb->insertTail(tex);
2337 }
2338
2339 void
2340 Converter::loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask)
2341 {
2342 Value *proj = fetchSrc(0, 3);
2343 Instruction *insn = proj->getUniqueInsn();
2344 int c;
2345
2346 if (insn->op == OP_PINTERP) {
2347 bb->insertTail(insn = cloneForward(func, insn));
2348 insn->op = OP_LINTERP;
2349 insn->setInterpolate(NV50_IR_INTERP_LINEAR | insn->getSampleMode());
2350 insn->setSrc(1, NULL);
2351 proj = insn->getDef(0);
2352 }
2353 proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), proj);
2354
2355 for (c = 0; c < 4; ++c) {
2356 if (!(mask & (1 << c)))
2357 continue;
2358 if ((insn = src[c]->getUniqueInsn())->op != OP_PINTERP)
2359 continue;
2360 mask &= ~(1 << c);
2361
2362 bb->insertTail(insn = cloneForward(func, insn));
2363 insn->setInterpolate(NV50_IR_INTERP_PERSPECTIVE | insn->getSampleMode());
2364 insn->setSrc(1, proj);
2365 dst[c] = insn->getDef(0);
2366 }
2367 if (!mask)
2368 return;
2369
2370 proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), fetchSrc(0, 3));
2371
2372 for (c = 0; c < 4; ++c)
2373 if (mask & (1 << c))
2374 dst[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), src[c], proj);
2375 }
2376
2377 // order of nv50 ir sources: x y z layer lod/bias shadow
2378 // order of TGSI TEX sources: x y z layer shadow lod/bias
2379 // lowering will finally set the hw specific order (like array first on nvc0)
2380 void
2381 Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy)
2382 {
2383 Value *arg[4], *src[8];
2384 Value *lod = NULL, *shd = NULL;
2385 unsigned int s, c, d;
2386 TexInstruction *texi = new_TexInstruction(func, tgsi.getOP());
2387
2388 TexInstruction::Target tgt = tgsi.getTexture(code, R);
2389
2390 for (s = 0; s < tgt.getArgCount(); ++s)
2391 arg[s] = src[s] = fetchSrc(0, s);
2392
2393 if (tgsi.getOpcode() == TGSI_OPCODE_TEX_LZ)
2394 lod = loadImm(NULL, 0);
2395 else if (texi->op == OP_TXL || texi->op == OP_TXB)
2396 lod = fetchSrc(L >> 4, L & 3);
2397
2398 if (C == 0x0f)
2399 C = 0x00 | MAX2(tgt.getArgCount(), 2); // guess DC src
2400
2401 if (tgsi.getOpcode() == TGSI_OPCODE_TG4 &&
2402 tgt == TEX_TARGET_CUBE_ARRAY_SHADOW)
2403 shd = fetchSrc(1, 0);
2404 else if (tgt.isShadow())
2405 shd = fetchSrc(C >> 4, C & 3);
2406
2407 if (texi->op == OP_TXD) {
2408 for (c = 0; c < tgt.getDim() + tgt.isCube(); ++c) {
2409 texi->dPdx[c].set(fetchSrc(Dx >> 4, (Dx & 3) + c));
2410 texi->dPdy[c].set(fetchSrc(Dy >> 4, (Dy & 3) + c));
2411 }
2412 }
2413
2414 // cube textures don't care about projection value, it's divided out
2415 if (tgsi.getOpcode() == TGSI_OPCODE_TXP && !tgt.isCube() && !tgt.isArray()) {
2416 unsigned int n = tgt.getDim();
2417 if (shd) {
2418 arg[n] = shd;
2419 ++n;
2420 assert(tgt.getDim() == tgt.getArgCount());
2421 }
2422 loadProjTexCoords(src, arg, (1 << n) - 1);
2423 if (shd)
2424 shd = src[n - 1];
2425 }
2426
2427 for (c = 0, d = 0; c < 4; ++c) {
2428 if (dst[c]) {
2429 texi->setDef(d++, dst[c]);
2430 texi->tex.mask |= 1 << c;
2431 } else {
2432 // NOTE: maybe hook up def too, for CSE
2433 }
2434 }
2435 for (s = 0; s < tgt.getArgCount(); ++s)
2436 texi->setSrc(s, src[s]);
2437 if (lod)
2438 texi->setSrc(s++, lod);
2439 if (shd)
2440 texi->setSrc(s++, shd);
2441
2442 setTexRS(texi, s, R, S);
2443
2444 if (tgsi.getOpcode() == TGSI_OPCODE_SAMPLE_C_LZ)
2445 texi->tex.levelZero = true;
2446 if (prog->getType() != Program::TYPE_FRAGMENT &&
2447 (tgsi.getOpcode() == TGSI_OPCODE_TEX ||
2448 tgsi.getOpcode() == TGSI_OPCODE_TEX2 ||
2449 tgsi.getOpcode() == TGSI_OPCODE_TXP))
2450 texi->tex.levelZero = true;
2451 if (tgsi.getOpcode() == TGSI_OPCODE_TG4 && !tgt.isShadow())
2452 texi->tex.gatherComp = tgsi.getSrc(1).getValueU32(0, info);
2453
2454 texi->tex.useOffsets = tgsi.getNumTexOffsets();
2455 for (s = 0; s < tgsi.getNumTexOffsets(); ++s) {
2456 for (c = 0; c < 3; ++c) {
2457 texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL));
2458 texi->offset[s][c].setInsn(texi);
2459 }
2460 }
2461
2462 bb->insertTail(texi);
2463 }
2464
2465 // 1st source: xyz = coordinates, w = lod/sample
2466 // 2nd source: offset
2467 void
2468 Converter::handleTXF(Value *dst[4], int R, int L_M)
2469 {
2470 TexInstruction *texi = new_TexInstruction(func, tgsi.getOP());
2471 int ms;
2472 unsigned int c, d, s;
2473
2474 texi->tex.target = tgsi.getTexture(code, R);
2475
2476 ms = texi->tex.target.isMS() ? 1 : 0;
2477 texi->tex.levelZero = ms; /* MS textures don't have mip-maps */
2478
2479 for (c = 0, d = 0; c < 4; ++c) {
2480 if (dst[c]) {
2481 texi->setDef(d++, dst[c]);
2482 texi->tex.mask |= 1 << c;
2483 }
2484 }
2485 for (c = 0; c < (texi->tex.target.getArgCount() - ms); ++c)
2486 texi->setSrc(c, fetchSrc(0, c));
2487 if (!ms && tgsi.getOpcode() == TGSI_OPCODE_TXF_LZ)
2488 texi->setSrc(c++, loadImm(NULL, 0));
2489 else
2490 texi->setSrc(c++, fetchSrc(L_M >> 4, L_M & 3)); // lod or ms
2491
2492 setTexRS(texi, c, R, -1);
2493
2494 texi->tex.useOffsets = tgsi.getNumTexOffsets();
2495 for (s = 0; s < tgsi.getNumTexOffsets(); ++s) {
2496 for (c = 0; c < 3; ++c) {
2497 texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL));
2498 texi->offset[s][c].setInsn(texi);
2499 }
2500 }
2501
2502 bb->insertTail(texi);
2503 }
2504
2505 void
2506 Converter::handleFBFETCH(Value *dst[4])
2507 {
2508 TexInstruction *texi = new_TexInstruction(func, OP_TXF);
2509 unsigned int c, d;
2510
2511 texi->tex.target = TEX_TARGET_2D_MS_ARRAY;
2512 texi->tex.levelZero = 1;
2513 texi->tex.useOffsets = 0;
2514
2515 for (c = 0, d = 0; c < 4; ++c) {
2516 if (dst[c]) {
2517 texi->setDef(d++, dst[c]);
2518 texi->tex.mask |= 1 << c;
2519 }
2520 }
2521
2522 Value *x = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 0));
2523 Value *y = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 1));
2524 Value *z = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_LAYER, 0));
2525 Value *ms = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_SAMPLE_INDEX, 0));
2526
2527 mkCvt(OP_CVT, TYPE_U32, x, TYPE_F32, x)->rnd = ROUND_Z;
2528 mkCvt(OP_CVT, TYPE_U32, y, TYPE_F32, y)->rnd = ROUND_Z;
2529 texi->setSrc(0, x);
2530 texi->setSrc(1, y);
2531 texi->setSrc(2, z);
2532 texi->setSrc(3, ms);
2533
2534 texi->tex.r = texi->tex.s = -1;
2535
2536 bb->insertTail(texi);
2537 }
2538
2539 void
2540 Converter::handleLIT(Value *dst0[4])
2541 {
2542 Value *val0 = NULL;
2543 unsigned int mask = tgsi.getDst(0).getMask();
2544
2545 if (mask & (1 << 0))
2546 loadImm(dst0[0], 1.0f);
2547
2548 if (mask & (1 << 3))
2549 loadImm(dst0[3], 1.0f);
2550
2551 if (mask & (3 << 1)) {
2552 val0 = getScratch();
2553 mkOp2(OP_MAX, TYPE_F32, val0, fetchSrc(0, 0), zero);
2554 if (mask & (1 << 1))
2555 mkMov(dst0[1], val0);
2556 }
2557
2558 if (mask & (1 << 2)) {
2559 Value *src1 = fetchSrc(0, 1), *src3 = fetchSrc(0, 3);
2560 Value *val1 = getScratch(), *val3 = getScratch();
2561
2562 Value *pos128 = loadImm(NULL, +127.999999f);
2563 Value *neg128 = loadImm(NULL, -127.999999f);
2564
2565 mkOp2(OP_MAX, TYPE_F32, val1, src1, zero);
2566 mkOp2(OP_MAX, TYPE_F32, val3, src3, neg128);
2567 mkOp2(OP_MIN, TYPE_F32, val3, val3, pos128);
2568 mkOp2(OP_POW, TYPE_F32, val3, val1, val3);
2569
2570 mkCmp(OP_SLCT, CC_GT, TYPE_F32, dst0[2], TYPE_F32, val3, zero, val0);
2571 }
2572 }
2573
2574 /* Keep this around for now as reference when adding img support
2575 static inline bool
2576 isResourceSpecial(const int r)
2577 {
2578 return (r == TGSI_RESOURCE_GLOBAL ||
2579 r == TGSI_RESOURCE_LOCAL ||
2580 r == TGSI_RESOURCE_PRIVATE ||
2581 r == TGSI_RESOURCE_INPUT);
2582 }
2583
2584 static inline bool
2585 isResourceRaw(const tgsi::Source *code, const int r)
2586 {
2587 return isResourceSpecial(r) || code->resources[r].raw;
2588 }
2589
2590 static inline nv50_ir::TexTarget
2591 getResourceTarget(const tgsi::Source *code, int r)
2592 {
2593 if (isResourceSpecial(r))
2594 return nv50_ir::TEX_TARGET_BUFFER;
2595 return tgsi::translateTexture(code->resources.at(r).target);
2596 }
2597
2598 Symbol *
2599 Converter::getResourceBase(const int r)
2600 {
2601 Symbol *sym = NULL;
2602
2603 switch (r) {
2604 case TGSI_RESOURCE_GLOBAL:
2605 sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL,
2606 info->io.auxCBSlot);
2607 break;
2608 case TGSI_RESOURCE_LOCAL:
2609 assert(prog->getType() == Program::TYPE_COMPUTE);
2610 sym = mkSymbol(nv50_ir::FILE_MEMORY_SHARED, 0, TYPE_U32,
2611 info->prop.cp.sharedOffset);
2612 break;
2613 case TGSI_RESOURCE_PRIVATE:
2614 sym = mkSymbol(nv50_ir::FILE_MEMORY_LOCAL, 0, TYPE_U32,
2615 info->bin.tlsSpace);
2616 break;
2617 case TGSI_RESOURCE_INPUT:
2618 assert(prog->getType() == Program::TYPE_COMPUTE);
2619 sym = mkSymbol(nv50_ir::FILE_SHADER_INPUT, 0, TYPE_U32,
2620 info->prop.cp.inputOffset);
2621 break;
2622 default:
2623 sym = new_Symbol(prog,
2624 nv50_ir::FILE_MEMORY_GLOBAL, code->resources.at(r).slot);
2625 break;
2626 }
2627 return sym;
2628 }
2629
2630 void
2631 Converter::getResourceCoords(std::vector<Value *> &coords, int r, int s)
2632 {
2633 const int arg =
2634 TexInstruction::Target(getResourceTarget(code, r)).getArgCount();
2635
2636 for (int c = 0; c < arg; ++c)
2637 coords.push_back(fetchSrc(s, c));
2638
2639 // NOTE: TGSI_RESOURCE_GLOBAL needs FILE_GPR; this is an nv50 quirk
2640 if (r == TGSI_RESOURCE_LOCAL ||
2641 r == TGSI_RESOURCE_PRIVATE ||
2642 r == TGSI_RESOURCE_INPUT)
2643 coords[0] = mkOp1v(OP_MOV, TYPE_U32, getScratch(4, FILE_ADDRESS),
2644 coords[0]);
2645 }
2646
2647 static inline int
2648 partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask)
2649 {
2650 int n = 0;
2651
2652 while (mask) {
2653 if (mask & 1) {
2654 size[n]++;
2655 } else {
2656 if (size[n])
2657 comp[n = 1] = size[0] + 1;
2658 else
2659 comp[n]++;
2660 }
2661 mask >>= 1;
2662 }
2663 if (size[0] == 3) {
2664 n = 1;
2665 size[0] = (comp[0] == 1) ? 1 : 2;
2666 size[1] = 3 - size[0];
2667 comp[1] = comp[0] + size[0];
2668 }
2669 return n + 1;
2670 }
2671 */
2672 void
2673 Converter::getImageCoords(std::vector<Value *> &coords, int s)
2674 {
2675 TexInstruction::Target t =
2676 TexInstruction::Target(tgsi.getImageTarget());
2677 const int arg = t.getDim() + (t.isArray() || t.isCube());
2678
2679 for (int c = 0; c < arg; ++c)
2680 coords.push_back(fetchSrc(s, c));
2681
2682 if (t.isMS())
2683 coords.push_back(fetchSrc(s, 3));
2684 }
2685
2686 // For raw loads, granularity is 4 byte.
2687 // Usage of the texture read mask on OP_SULDP is not allowed.
2688 void
2689 Converter::handleLOAD(Value *dst0[4])
2690 {
2691 const int r = tgsi.getSrc(0).getIndex(0);
2692 int c;
2693 std::vector<Value *> off, src, ldv, def;
2694 Value *ind = NULL;
2695
2696 if (tgsi.getSrc(0).isIndirect(0))
2697 ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
2698
2699 switch (tgsi.getSrc(0).getFile()) {
2700 case TGSI_FILE_BUFFER:
2701 case TGSI_FILE_MEMORY:
2702 for (c = 0; c < 4; ++c) {
2703 if (!dst0[c])
2704 continue;
2705
2706 Value *off;
2707 Symbol *sym;
2708 uint32_t src0_component_offset = tgsi.getSrc(0).getSwizzle(c) * 4;
2709
2710 if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) {
2711 off = NULL;
2712 sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
2713 tgsi.getSrc(1).getValueU32(0, info) +
2714 src0_component_offset);
2715 } else {
2716 // yzw are ignored for buffers
2717 off = fetchSrc(1, 0);
2718 sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
2719 src0_component_offset);
2720 }
2721
2722 Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off);
2723 ld->cache = tgsi.getCacheMode();
2724 if (ind)
2725 ld->setIndirect(0, 1, ind);
2726 }
2727 break;
2728 default: {
2729 getImageCoords(off, 1);
2730 def.resize(4);
2731
2732 for (c = 0; c < 4; ++c) {
2733 if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c))
2734 def[c] = getScratch();
2735 else
2736 def[c] = dst0[c];
2737 }
2738
2739 bool bindless = tgsi.getSrc(0).getFile() != TGSI_FILE_IMAGE;
2740 if (bindless)
2741 ind = fetchSrc(0, 0);
2742
2743 TexInstruction *ld =
2744 mkTex(OP_SULDP, tgsi.getImageTarget(), 0, 0, def, off);
2745 ld->tex.mask = tgsi.getDst(0).getMask();
2746 ld->tex.format = tgsi.getImageFormat();
2747 ld->cache = tgsi.getCacheMode();
2748 ld->tex.bindless = bindless;
2749 if (!bindless)
2750 ld->tex.r = r;
2751 if (ind)
2752 ld->setIndirectR(ind);
2753
2754 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2755 if (dst0[c] != def[c])
2756 mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
2757 break;
2758 }
2759 }
2760
2761
2762 /* Keep this around for now as reference when adding img support
2763 getResourceCoords(off, r, 1);
2764
2765 if (isResourceRaw(code, r)) {
2766 uint8_t mask = 0;
2767 uint8_t comp[2] = { 0, 0 };
2768 uint8_t size[2] = { 0, 0 };
2769
2770 Symbol *base = getResourceBase(r);
2771
2772 // determine the base and size of the at most 2 load ops
2773 for (c = 0; c < 4; ++c)
2774 if (!tgsi.getDst(0).isMasked(c))
2775 mask |= 1 << (tgsi.getSrc(0).getSwizzle(c) - TGSI_SWIZZLE_X);
2776
2777 int n = partitionLoadStore(comp, size, mask);
2778
2779 src = off;
2780
2781 def.resize(4); // index by component, the ones we need will be non-NULL
2782 for (c = 0; c < 4; ++c) {
2783 if (dst0[c] && tgsi.getSrc(0).getSwizzle(c) == (TGSI_SWIZZLE_X + c))
2784 def[c] = dst0[c];
2785 else
2786 if (mask & (1 << c))
2787 def[c] = getScratch();
2788 }
2789
2790 const bool useLd = isResourceSpecial(r) ||
2791 (info->io.nv50styleSurfaces &&
2792 code->resources[r].target == TGSI_TEXTURE_BUFFER);
2793
2794 for (int i = 0; i < n; ++i) {
2795 ldv.assign(def.begin() + comp[i], def.begin() + comp[i] + size[i]);
2796
2797 if (comp[i]) // adjust x component of source address if necessary
2798 src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),
2799 off[0], mkImm(comp[i] * 4));
2800 else
2801 src[0] = off[0];
2802
2803 if (useLd) {
2804 Instruction *ld =
2805 mkLoad(typeOfSize(size[i] * 4), ldv[0], base, src[0]);
2806 for (size_t c = 1; c < ldv.size(); ++c)
2807 ld->setDef(c, ldv[c]);
2808 } else {
2809 mkTex(OP_SULDB, getResourceTarget(code, r), code->resources[r].slot,
2810 0, ldv, src)->dType = typeOfSize(size[i] * 4);
2811 }
2812 }
2813 } else {
2814 def.resize(4);
2815 for (c = 0; c < 4; ++c) {
2816 if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c))
2817 def[c] = getScratch();
2818 else
2819 def[c] = dst0[c];
2820 }
2821
2822 mkTex(OP_SULDP, getResourceTarget(code, r), code->resources[r].slot, 0,
2823 def, off);
2824 }
2825 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2826 if (dst0[c] != def[c])
2827 mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
2828 */
2829 }
2830
2831 // For formatted stores, the write mask on OP_SUSTP can be used.
2832 // Raw stores have to be split.
2833 void
2834 Converter::handleSTORE()
2835 {
2836 const int r = tgsi.getDst(0).getIndex(0);
2837 int c;
2838 std::vector<Value *> off, src, dummy;
2839 Value *ind = NULL;
2840
2841 if (tgsi.getDst(0).isIndirect(0))
2842 ind = fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0);
2843
2844 switch (tgsi.getDst(0).getFile()) {
2845 case TGSI_FILE_BUFFER:
2846 case TGSI_FILE_MEMORY:
2847 for (c = 0; c < 4; ++c) {
2848 if (!(tgsi.getDst(0).getMask() & (1 << c)))
2849 continue;
2850
2851 Symbol *sym;
2852 Value *off;
2853 if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMMEDIATE) {
2854 off = NULL;
2855 sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c,
2856 tgsi.getSrc(0).getValueU32(0, info) + 4 * c);
2857 } else {
2858 // yzw are ignored for buffers
2859 off = fetchSrc(0, 0);
2860 sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c, 4 * c);
2861 }
2862
2863 Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c));
2864 st->cache = tgsi.getCacheMode();
2865 if (ind)
2866 st->setIndirect(0, 1, ind);
2867 }
2868 break;
2869 default: {
2870 getImageCoords(off, 0);
2871 src = off;
2872
2873 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2874 src.push_back(fetchSrc(1, c));
2875
2876 bool bindless = tgsi.getDst(0).getFile() != TGSI_FILE_IMAGE;
2877 if (bindless)
2878 ind = fetchDst(0, 0);
2879
2880 TexInstruction *st =
2881 mkTex(OP_SUSTP, tgsi.getImageTarget(), 0, 0, dummy, src);
2882 st->tex.mask = tgsi.getDst(0).getMask();
2883 st->tex.format = tgsi.getImageFormat();
2884 st->cache = tgsi.getCacheMode();
2885 st->tex.bindless = bindless;
2886 if (!bindless)
2887 st->tex.r = r;
2888 if (ind)
2889 st->setIndirectR(ind);
2890
2891 break;
2892 }
2893 }
2894
2895 /* Keep this around for now as reference when adding img support
2896 getResourceCoords(off, r, 0);
2897 src = off;
2898 const int s = src.size();
2899
2900 if (isResourceRaw(code, r)) {
2901 uint8_t comp[2] = { 0, 0 };
2902 uint8_t size[2] = { 0, 0 };
2903
2904 int n = partitionLoadStore(comp, size, tgsi.getDst(0).getMask());
2905
2906 Symbol *base = getResourceBase(r);
2907
2908 const bool useSt = isResourceSpecial(r) ||
2909 (info->io.nv50styleSurfaces &&
2910 code->resources[r].target == TGSI_TEXTURE_BUFFER);
2911
2912 for (int i = 0; i < n; ++i) {
2913 if (comp[i]) // adjust x component of source address if necessary
2914 src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),
2915 off[0], mkImm(comp[i] * 4));
2916 else
2917 src[0] = off[0];
2918
2919 const DataType stTy = typeOfSize(size[i] * 4);
2920
2921 if (useSt) {
2922 Instruction *st =
2923 mkStore(OP_STORE, stTy, base, NULL, fetchSrc(1, comp[i]));
2924 for (c = 1; c < size[i]; ++c)
2925 st->setSrc(1 + c, fetchSrc(1, comp[i] + c));
2926 st->setIndirect(0, 0, src[0]);
2927 } else {
2928 // attach values to be stored
2929 src.resize(s + size[i]);
2930 for (c = 0; c < size[i]; ++c)
2931 src[s + c] = fetchSrc(1, comp[i] + c);
2932 mkTex(OP_SUSTB, getResourceTarget(code, r), code->resources[r].slot,
2933 0, dummy, src)->setType(stTy);
2934 }
2935 }
2936 } else {
2937 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2938 src.push_back(fetchSrc(1, c));
2939
2940 mkTex(OP_SUSTP, getResourceTarget(code, r), code->resources[r].slot, 0,
2941 dummy, src)->tex.mask = tgsi.getDst(0).getMask();
2942 }
2943 */
2944 }
2945
2946 // XXX: These only work on resources with the single-component u32/s32 formats.
2947 // Therefore the result is replicated. This might not be intended by TGSI, but
2948 // operating on more than 1 component would produce undefined results because
2949 // they do not exist.
2950 void
2951 Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
2952 {
2953 const int r = tgsi.getSrc(0).getIndex(0);
2954 std::vector<Value *> srcv;
2955 std::vector<Value *> defv;
2956 LValue *dst = getScratch();
2957 Value *ind = NULL;
2958
2959 if (tgsi.getSrc(0).isIndirect(0))
2960 ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
2961
2962 switch (tgsi.getSrc(0).getFile()) {
2963 case TGSI_FILE_BUFFER:
2964 case TGSI_FILE_MEMORY:
2965 for (int c = 0; c < 4; ++c) {
2966 if (!dst0[c])
2967 continue;
2968
2969 Instruction *insn;
2970 Value *off = fetchSrc(1, c);
2971 Value *sym;
2972 if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE)
2973 sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
2974 tgsi.getSrc(1).getValueU32(c, info));
2975 else
2976 sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 0);
2977 if (subOp == NV50_IR_SUBOP_ATOM_CAS)
2978 insn = mkOp3(OP_ATOM, ty, dst, sym, fetchSrc(2, c), fetchSrc(3, c));
2979 else
2980 insn = mkOp2(OP_ATOM, ty, dst, sym, fetchSrc(2, c));
2981 if (tgsi.getSrc(1).getFile() != TGSI_FILE_IMMEDIATE)
2982 insn->setIndirect(0, 0, off);
2983 if (ind)
2984 insn->setIndirect(0, 1, ind);
2985 insn->subOp = subOp;
2986 }
2987 for (int c = 0; c < 4; ++c)
2988 if (dst0[c])
2989 dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
2990 break;
2991 default: {
2992 getImageCoords(srcv, 1);
2993 defv.push_back(dst);
2994 srcv.push_back(fetchSrc(2, 0));
2995
2996 if (subOp == NV50_IR_SUBOP_ATOM_CAS)
2997 srcv.push_back(fetchSrc(3, 0));
2998
2999 bool bindless = tgsi.getSrc(0).getFile() != TGSI_FILE_IMAGE;
3000 if (bindless)
3001 ind = fetchSrc(0, 0);
3002
3003 TexInstruction *tex = mkTex(OP_SUREDP, tgsi.getImageTarget(),
3004 0, 0, defv, srcv);
3005 tex->subOp = subOp;
3006 tex->tex.mask = 1;
3007 tex->tex.format = tgsi.getImageFormat();
3008 tex->setType(ty);
3009 tex->tex.bindless = bindless;
3010 if (!bindless)
3011 tex->tex.r = r;
3012 if (ind)
3013 tex->setIndirectR(ind);
3014
3015 for (int c = 0; c < 4; ++c)
3016 if (dst0[c])
3017 dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
3018 break;
3019 }
3020 }
3021
3022 /* Keep this around for now as reference when adding img support
3023 getResourceCoords(srcv, r, 1);
3024
3025 if (isResourceSpecial(r)) {
3026 assert(r != TGSI_RESOURCE_INPUT);
3027 Instruction *insn;
3028 insn = mkOp2(OP_ATOM, ty, dst, getResourceBase(r), fetchSrc(2, 0));
3029 insn->subOp = subOp;
3030 if (subOp == NV50_IR_SUBOP_ATOM_CAS)
3031 insn->setSrc(2, fetchSrc(3, 0));
3032 insn->setIndirect(0, 0, srcv.at(0));
3033 } else {
3034 operation op = isResourceRaw(code, r) ? OP_SUREDB : OP_SUREDP;
3035 TexTarget targ = getResourceTarget(code, r);
3036 int idx = code->resources[r].slot;
3037 defv.push_back(dst);
3038 srcv.push_back(fetchSrc(2, 0));
3039 if (subOp == NV50_IR_SUBOP_ATOM_CAS)
3040 srcv.push_back(fetchSrc(3, 0));
3041 TexInstruction *tex = mkTex(op, targ, idx, 0, defv, srcv);
3042 tex->subOp = subOp;
3043 tex->tex.mask = 1;
3044 tex->setType(ty);
3045 }
3046
3047 for (int c = 0; c < 4; ++c)
3048 if (dst0[c])
3049 dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
3050 */
3051 }
3052
3053 void
3054 Converter::handleINTERP(Value *dst[4])
3055 {
3056 // Check whether the input is linear. All other attributes ignored.
3057 Instruction *insn;
3058 Value *offset = NULL, *ptr = NULL, *w = NULL;
3059 Symbol *sym[4] = { NULL };
3060 bool linear;
3061 operation op = OP_NOP;
3062 int c, mode = 0;
3063
3064 tgsi::Instruction::SrcRegister src = tgsi.getSrc(0);
3065
3066 // In some odd cases, in large part due to varying packing, the source
3067 // might not actually be an input. This is illegal TGSI, but it's easier to
3068 // account for it here than it is to fix it where the TGSI is being
3069 // generated. In that case, it's going to be a straight up mov (or sequence
3070 // of mov's) from the input in question. We follow the mov chain to see
3071 // which input we need to use.
3072 if (src.getFile() != TGSI_FILE_INPUT) {
3073 if (src.isIndirect(0)) {
3074 ERROR("Ignoring indirect input interpolation\n");
3075 return;
3076 }
3077 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3078 Value *val = fetchSrc(0, c);
3079 assert(val->defs.size() == 1);
3080 insn = val->getInsn();
3081 while (insn->op == OP_MOV) {
3082 assert(insn->getSrc(0)->defs.size() == 1);
3083 insn = insn->getSrc(0)->getInsn();
3084 if (!insn) {
3085 ERROR("Miscompiling shader due to unhandled INTERP\n");
3086 return;
3087 }
3088 }
3089 if (insn->op != OP_LINTERP && insn->op != OP_PINTERP) {
3090 ERROR("Trying to interpolate non-input, this is not allowed.\n");
3091 return;
3092 }
3093 sym[c] = insn->getSrc(0)->asSym();
3094 assert(sym[c]);
3095 op = insn->op;
3096 mode = insn->ipa;
3097 ptr = insn->getIndirect(0, 0);
3098 }
3099 } else {
3100 if (src.isIndirect(0))
3101 ptr = shiftAddress(fetchSrc(src.getIndirect(0), 0, NULL));
3102
3103 // We can assume that the fixed index will point to an input of the same
3104 // interpolation type in case of an indirect.
3105 // TODO: Make use of ArrayID.
3106 linear = info->in[src.getIndex(0)].linear;
3107 if (linear) {
3108 op = OP_LINTERP;
3109 mode = NV50_IR_INTERP_LINEAR;
3110 } else {
3111 op = OP_PINTERP;
3112 mode = NV50_IR_INTERP_PERSPECTIVE;
3113 }
3114 }
3115
3116 switch (tgsi.getOpcode()) {
3117 case TGSI_OPCODE_INTERP_CENTROID:
3118 mode |= NV50_IR_INTERP_CENTROID;
3119 break;
3120 case TGSI_OPCODE_INTERP_SAMPLE:
3121 insn = mkOp1(OP_PIXLD, TYPE_U32, (offset = getScratch()), fetchSrc(1, 0));
3122 insn->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
3123 mode |= NV50_IR_INTERP_OFFSET;
3124 break;
3125 case TGSI_OPCODE_INTERP_OFFSET: {
3126 // The input in src1.xy is float, but we need a single 32-bit value
3127 // where the upper and lower 16 bits are encoded in S0.12 format. We need
3128 // to clamp the input coordinates to (-0.5, 0.4375), multiply by 4096,
3129 // and then convert to s32.
3130 Value *offs[2];
3131 for (c = 0; c < 2; c++) {
3132 offs[c] = getScratch();
3133 mkOp2(OP_MIN, TYPE_F32, offs[c], fetchSrc(1, c), loadImm(NULL, 0.4375f));
3134 mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
3135 mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
3136 mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
3137 }
3138 offset = mkOp3v(OP_INSBF, TYPE_U32, getScratch(),
3139 offs[1], mkImm(0x1010), offs[0]);
3140 mode |= NV50_IR_INTERP_OFFSET;
3141 break;
3142 }
3143 }
3144
3145 if (op == OP_PINTERP) {
3146 if (offset) {
3147 w = mkOp2v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 3), offset);
3148 mkOp1(OP_RCP, TYPE_F32, w, w);
3149 } else {
3150 w = fragCoord[3];
3151 }
3152 }
3153
3154
3155 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3156 insn = mkOp1(op, TYPE_F32, dst[c], sym[c] ? sym[c] : srcToSym(src, c));
3157 if (op == OP_PINTERP)
3158 insn->setSrc(1, w);
3159 if (offset)
3160 insn->setSrc(op == OP_PINTERP ? 2 : 1, offset);
3161 if (ptr)
3162 insn->setIndirect(0, 0, ptr);
3163
3164 insn->setInterpolate(mode);
3165 }
3166 }
3167
3168 Converter::Subroutine *
3169 Converter::getSubroutine(unsigned ip)
3170 {
3171 std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip);
3172
3173 if (it == sub.map.end())
3174 it = sub.map.insert(std::make_pair(
3175 ip, Subroutine(new Function(prog, "SUB", ip)))).first;
3176
3177 return &it->second;
3178 }
3179
3180 Converter::Subroutine *
3181 Converter::getSubroutine(Function *f)
3182 {
3183 unsigned ip = f->getLabel();
3184 std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip);
3185
3186 if (it == sub.map.end())
3187 it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first;
3188
3189 return &it->second;
3190 }
3191
3192 bool
3193 Converter::isEndOfSubroutine(uint ip)
3194 {
3195 assert(ip < code->scan.num_instructions);
3196 tgsi::Instruction insn(&code->insns[ip]);
3197 return (insn.getOpcode() == TGSI_OPCODE_END ||
3198 insn.getOpcode() == TGSI_OPCODE_ENDSUB ||
3199 // does END occur at end of main or the very end ?
3200 insn.getOpcode() == TGSI_OPCODE_BGNSUB);
3201 }
3202
3203 bool
3204 Converter::handleInstruction(const struct tgsi_full_instruction *insn)
3205 {
3206 Instruction *geni;
3207
3208 Value *dst0[4], *rDst0[4];
3209 Value *src0, *src1, *src2, *src3;
3210 Value *val0, *val1;
3211 int c;
3212
3213 tgsi = tgsi::Instruction(insn);
3214
3215 bool useScratchDst = tgsi.checkDstSrcAliasing();
3216
3217 operation op = tgsi.getOP();
3218 dstTy = tgsi.inferDstType();
3219 srcTy = tgsi.inferSrcType();
3220
3221 unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0;
3222
3223 if (tgsi.dstCount() && tgsi.getOpcode() != TGSI_OPCODE_STORE) {
3224 for (c = 0; c < 4; ++c) {
3225 rDst0[c] = acquireDst(0, c);
3226 dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c];
3227 }
3228 }
3229
3230 switch (tgsi.getOpcode()) {
3231 case TGSI_OPCODE_ADD:
3232 case TGSI_OPCODE_UADD:
3233 case TGSI_OPCODE_AND:
3234 case TGSI_OPCODE_DIV:
3235 case TGSI_OPCODE_IDIV:
3236 case TGSI_OPCODE_UDIV:
3237 case TGSI_OPCODE_MAX:
3238 case TGSI_OPCODE_MIN:
3239 case TGSI_OPCODE_IMAX:
3240 case TGSI_OPCODE_IMIN:
3241 case TGSI_OPCODE_UMAX:
3242 case TGSI_OPCODE_UMIN:
3243 case TGSI_OPCODE_MOD:
3244 case TGSI_OPCODE_UMOD:
3245 case TGSI_OPCODE_MUL:
3246 case TGSI_OPCODE_UMUL:
3247 case TGSI_OPCODE_IMUL_HI:
3248 case TGSI_OPCODE_UMUL_HI:
3249 case TGSI_OPCODE_OR:
3250 case TGSI_OPCODE_SHL:
3251 case TGSI_OPCODE_ISHR:
3252 case TGSI_OPCODE_USHR:
3253 case TGSI_OPCODE_XOR:
3254 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3255 src0 = fetchSrc(0, c);
3256 src1 = fetchSrc(1, c);
3257 geni = mkOp2(op, dstTy, dst0[c], src0, src1);
3258 geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
3259 if (op == OP_MUL && dstTy == TYPE_F32)
3260 geni->dnz = info->io.mul_zero_wins;
3261 geni->precise = insn->Instruction.Precise;
3262 }
3263 break;
3264 case TGSI_OPCODE_MAD:
3265 case TGSI_OPCODE_UMAD:
3266 case TGSI_OPCODE_FMA:
3267 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3268 src0 = fetchSrc(0, c);
3269 src1 = fetchSrc(1, c);
3270 src2 = fetchSrc(2, c);
3271 geni = mkOp3(op, dstTy, dst0[c], src0, src1, src2);
3272 if (dstTy == TYPE_F32)
3273 geni->dnz = info->io.mul_zero_wins;
3274 geni->precise = insn->Instruction.Precise;
3275 }
3276 break;
3277 case TGSI_OPCODE_MOV:
3278 case TGSI_OPCODE_CEIL:
3279 case TGSI_OPCODE_FLR:
3280 case TGSI_OPCODE_TRUNC:
3281 case TGSI_OPCODE_RCP:
3282 case TGSI_OPCODE_SQRT:
3283 case TGSI_OPCODE_IABS:
3284 case TGSI_OPCODE_INEG:
3285 case TGSI_OPCODE_NOT:
3286 case TGSI_OPCODE_DDX:
3287 case TGSI_OPCODE_DDY:
3288 case TGSI_OPCODE_DDX_FINE:
3289 case TGSI_OPCODE_DDY_FINE:
3290 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3291 mkOp1(op, dstTy, dst0[c], fetchSrc(0, c));
3292 break;
3293 case TGSI_OPCODE_RSQ:
3294 src0 = fetchSrc(0, 0);
3295 val0 = getScratch();
3296 mkOp1(OP_ABS, TYPE_F32, val0, src0);
3297 mkOp1(OP_RSQ, TYPE_F32, val0, val0);
3298 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3299 mkMov(dst0[c], val0);
3300 break;
3301 case TGSI_OPCODE_ARL:
3302 case TGSI_OPCODE_ARR:
3303 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3304 const RoundMode rnd =
3305 tgsi.getOpcode() == TGSI_OPCODE_ARR ? ROUND_N : ROUND_M;
3306 src0 = fetchSrc(0, c);
3307 mkCvt(OP_CVT, TYPE_S32, dst0[c], TYPE_F32, src0)->rnd = rnd;
3308 }
3309 break;
3310 case TGSI_OPCODE_UARL:
3311 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3312 mkOp1(OP_MOV, TYPE_U32, dst0[c], fetchSrc(0, c));
3313 break;
3314 case TGSI_OPCODE_POW:
3315 val0 = mkOp2v(op, TYPE_F32, getScratch(), fetchSrc(0, 0), fetchSrc(1, 0));
3316 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3317 mkOp1(OP_MOV, TYPE_F32, dst0[c], val0);
3318 break;
3319 case TGSI_OPCODE_EX2:
3320 case TGSI_OPCODE_LG2:
3321 val0 = mkOp1(op, TYPE_F32, getScratch(), fetchSrc(0, 0))->getDef(0);
3322 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3323 mkOp1(OP_MOV, TYPE_F32, dst0[c], val0);
3324 break;
3325 case TGSI_OPCODE_COS:
3326 case TGSI_OPCODE_SIN:
3327 val0 = getScratch();
3328 if (mask & 7) {
3329 mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 0));
3330 mkOp1(op, TYPE_F32, val0, val0);
3331 for (c = 0; c < 3; ++c)
3332 if (dst0[c])
3333 mkMov(dst0[c], val0);
3334 }
3335 if (dst0[3]) {
3336 mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 3));
3337 mkOp1(op, TYPE_F32, dst0[3], val0);
3338 }
3339 break;
3340 case TGSI_OPCODE_EXP:
3341 src0 = fetchSrc(0, 0);
3342 val0 = mkOp1v(OP_FLOOR, TYPE_F32, getSSA(), src0);
3343 if (dst0[1])
3344 mkOp2(OP_SUB, TYPE_F32, dst0[1], src0, val0);
3345 if (dst0[0])
3346 mkOp1(OP_EX2, TYPE_F32, dst0[0], val0);
3347 if (dst0[2])
3348 mkOp1(OP_EX2, TYPE_F32, dst0[2], src0);
3349 if (dst0[3])
3350 loadImm(dst0[3], 1.0f);
3351 break;
3352 case TGSI_OPCODE_LOG:
3353 src0 = mkOp1v(OP_ABS, TYPE_F32, getSSA(), fetchSrc(0, 0));
3354 val0 = mkOp1v(OP_LG2, TYPE_F32, dst0[2] ? dst0[2] : getSSA(), src0);
3355 if (dst0[0] || dst0[1])
3356 val1 = mkOp1v(OP_FLOOR, TYPE_F32, dst0[0] ? dst0[0] : getSSA(), val0);
3357 if (dst0[1]) {
3358 mkOp1(OP_EX2, TYPE_F32, dst0[1], val1);
3359 mkOp1(OP_RCP, TYPE_F32, dst0[1], dst0[1]);
3360 mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0)
3361 ->dnz = info->io.mul_zero_wins;
3362 }
3363 if (dst0[3])
3364 loadImm(dst0[3], 1.0f);
3365 break;
3366 case TGSI_OPCODE_DP2:
3367 val0 = buildDot(2);
3368 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3369 mkMov(dst0[c], val0);
3370 break;
3371 case TGSI_OPCODE_DP3:
3372 val0 = buildDot(3);
3373 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3374 mkMov(dst0[c], val0);
3375 break;
3376 case TGSI_OPCODE_DP4:
3377 val0 = buildDot(4);
3378 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3379 mkMov(dst0[c], val0);
3380 break;
3381 case TGSI_OPCODE_DST:
3382 if (dst0[0])
3383 loadImm(dst0[0], 1.0f);
3384 if (dst0[1]) {
3385 src0 = fetchSrc(0, 1);
3386 src1 = fetchSrc(1, 1);
3387 mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1)
3388 ->dnz = info->io.mul_zero_wins;
3389 }
3390 if (dst0[2])
3391 mkMov(dst0[2], fetchSrc(0, 2));
3392 if (dst0[3])
3393 mkMov(dst0[3], fetchSrc(1, 3));
3394 break;
3395 case TGSI_OPCODE_LRP:
3396 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3397 src0 = fetchSrc(0, c);
3398 src1 = fetchSrc(1, c);
3399 src2 = fetchSrc(2, c);
3400 mkOp3(OP_MAD, TYPE_F32, dst0[c],
3401 mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2)
3402 ->dnz = info->io.mul_zero_wins;
3403 }
3404 break;
3405 case TGSI_OPCODE_LIT:
3406 handleLIT(dst0);
3407 break;
3408 case TGSI_OPCODE_ISSG:
3409 case TGSI_OPCODE_SSG:
3410 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3411 src0 = fetchSrc(0, c);
3412 val0 = getScratch();
3413 val1 = getScratch();
3414 mkCmp(OP_SET, CC_GT, srcTy, val0, srcTy, src0, zero);
3415 mkCmp(OP_SET, CC_LT, srcTy, val1, srcTy, src0, zero);
3416 if (srcTy == TYPE_F32)
3417 mkOp2(OP_SUB, TYPE_F32, dst0[c], val0, val1);
3418 else
3419 mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0);
3420 }
3421 break;
3422 case TGSI_OPCODE_UCMP:
3423 srcTy = TYPE_U32;
3424 /* fallthrough */
3425 case TGSI_OPCODE_CMP:
3426 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3427 src0 = fetchSrc(0, c);
3428 src1 = fetchSrc(1, c);
3429 src2 = fetchSrc(2, c);
3430 if (src1 == src2)
3431 mkMov(dst0[c], src1);
3432 else
3433 mkCmp(OP_SLCT, (srcTy == TYPE_F32) ? CC_LT : CC_NE,
3434 srcTy, dst0[c], srcTy, src1, src2, src0);
3435 }
3436 break;
3437 case TGSI_OPCODE_FRC:
3438 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3439 src0 = fetchSrc(0, c);
3440 val0 = getScratch();
3441 mkOp1(OP_FLOOR, TYPE_F32, val0, src0);
3442 mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0);
3443 }
3444 break;
3445 case TGSI_OPCODE_ROUND:
3446 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3447 mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F32, fetchSrc(0, c))
3448 ->rnd = ROUND_NI;
3449 break;
3450 case TGSI_OPCODE_SLT:
3451 case TGSI_OPCODE_SGE:
3452 case TGSI_OPCODE_SEQ:
3453 case TGSI_OPCODE_SGT:
3454 case TGSI_OPCODE_SLE:
3455 case TGSI_OPCODE_SNE:
3456 case TGSI_OPCODE_FSEQ:
3457 case TGSI_OPCODE_FSGE:
3458 case TGSI_OPCODE_FSLT:
3459 case TGSI_OPCODE_FSNE:
3460 case TGSI_OPCODE_ISGE:
3461 case TGSI_OPCODE_ISLT:
3462 case TGSI_OPCODE_USEQ:
3463 case TGSI_OPCODE_USGE:
3464 case TGSI_OPCODE_USLT:
3465 case TGSI_OPCODE_USNE:
3466 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3467 src0 = fetchSrc(0, c);
3468 src1 = fetchSrc(1, c);
3469 mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1);
3470 }
3471 break;
3472 case TGSI_OPCODE_VOTE_ALL:
3473 case TGSI_OPCODE_VOTE_ANY:
3474 case TGSI_OPCODE_VOTE_EQ:
3475 val0 = new_LValue(func, FILE_PREDICATE);
3476 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3477 mkCmp(OP_SET, CC_NE, TYPE_U32, val0, TYPE_U32, fetchSrc(0, c), zero);
3478 mkOp1(op, dstTy, val0, val0)
3479 ->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
3480 mkCvt(OP_CVT, TYPE_U32, dst0[c], TYPE_U8, val0);
3481 }
3482 break;
3483 case TGSI_OPCODE_BALLOT:
3484 if (!tgsi.getDst(0).isMasked(0)) {
3485 val0 = new_LValue(func, FILE_PREDICATE);
3486 mkCmp(OP_SET, CC_NE, TYPE_U32, val0, TYPE_U32, fetchSrc(0, 0), zero);
3487 mkOp1(op, TYPE_U32, dst0[0], val0)->subOp = NV50_IR_SUBOP_VOTE_ANY;
3488 }
3489 if (!tgsi.getDst(0).isMasked(1))
3490 mkMov(dst0[1], zero, TYPE_U32);
3491 break;
3492 case TGSI_OPCODE_READ_FIRST:
3493 // ReadFirstInvocationARB(src) is implemented as
3494 // ReadInvocationARB(src, findLSB(ballot(true)))
3495 val0 = getScratch();
3496 mkOp1(OP_VOTE, TYPE_U32, val0, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
3497 mkOp2(OP_EXTBF, TYPE_U32, val0, val0, mkImm(0x2000))
3498 ->subOp = NV50_IR_SUBOP_EXTBF_REV;
3499 mkOp1(OP_BFIND, TYPE_U32, val0, val0)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
3500 src1 = val0;
3501 /* fallthrough */
3502 case TGSI_OPCODE_READ_INVOC:
3503 if (tgsi.getOpcode() == TGSI_OPCODE_READ_INVOC)
3504 src1 = fetchSrc(1, 0);
3505 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3506 geni = mkOp3(op, dstTy, dst0[c], fetchSrc(0, c), src1, mkImm(0x1f));
3507 geni->subOp = NV50_IR_SUBOP_SHFL_IDX;
3508 }
3509 break;
3510 case TGSI_OPCODE_CLOCK:
3511 // Stick the 32-bit clock into the high dword of the logical result.
3512 if (!tgsi.getDst(0).isMasked(0))
3513 mkOp1(OP_MOV, TYPE_U32, dst0[0], zero);
3514 if (!tgsi.getDst(0).isMasked(1))
3515 mkOp1(OP_RDSV, TYPE_U32, dst0[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
3516 break;
3517 case TGSI_OPCODE_KILL_IF:
3518 val0 = new_LValue(func, FILE_PREDICATE);
3519 mask = 0;
3520 for (c = 0; c < 4; ++c) {
3521 const int s = tgsi.getSrc(0).getSwizzle(c);
3522 if (mask & (1 << s))
3523 continue;
3524 mask |= 1 << s;
3525 mkCmp(OP_SET, CC_LT, TYPE_F32, val0, TYPE_F32, fetchSrc(0, c), zero);
3526 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, val0);
3527 }
3528 break;
3529 case TGSI_OPCODE_KILL:
3530 mkOp(OP_DISCARD, TYPE_NONE, NULL);
3531 break;
3532 case TGSI_OPCODE_TEX:
3533 case TGSI_OPCODE_TEX_LZ:
3534 case TGSI_OPCODE_TXB:
3535 case TGSI_OPCODE_TXL:
3536 case TGSI_OPCODE_TXP:
3537 case TGSI_OPCODE_LODQ:
3538 // R S L C Dx Dy
3539 handleTEX(dst0, 1, 1, 0x03, 0x0f, 0x00, 0x00);
3540 break;
3541 case TGSI_OPCODE_TXD:
3542 handleTEX(dst0, 3, 3, 0x03, 0x0f, 0x10, 0x20);
3543 break;
3544 case TGSI_OPCODE_TG4:
3545 handleTEX(dst0, 2, 2, 0x03, 0x0f, 0x00, 0x00);
3546 break;
3547 case TGSI_OPCODE_TEX2:
3548 handleTEX(dst0, 2, 2, 0x03, 0x10, 0x00, 0x00);
3549 break;
3550 case TGSI_OPCODE_TXB2:
3551 case TGSI_OPCODE_TXL2:
3552 handleTEX(dst0, 2, 2, 0x10, 0x0f, 0x00, 0x00);
3553 break;
3554 case TGSI_OPCODE_SAMPLE:
3555 case TGSI_OPCODE_SAMPLE_B:
3556 case TGSI_OPCODE_SAMPLE_D:
3557 case TGSI_OPCODE_SAMPLE_L:
3558 case TGSI_OPCODE_SAMPLE_C:
3559 case TGSI_OPCODE_SAMPLE_C_LZ:
3560 handleTEX(dst0, 1, 2, 0x30, 0x30, 0x30, 0x40);
3561 break;
3562 case TGSI_OPCODE_TXF_LZ:
3563 case TGSI_OPCODE_TXF:
3564 handleTXF(dst0, 1, 0x03);
3565 break;
3566 case TGSI_OPCODE_SAMPLE_I:
3567 handleTXF(dst0, 1, 0x03);
3568 break;
3569 case TGSI_OPCODE_SAMPLE_I_MS:
3570 handleTXF(dst0, 1, 0x20);
3571 break;
3572 case TGSI_OPCODE_TXQ:
3573 case TGSI_OPCODE_SVIEWINFO:
3574 handleTXQ(dst0, TXQ_DIMS, 1);
3575 break;
3576 case TGSI_OPCODE_TXQS:
3577 // The TXQ_TYPE query returns samples in its 3rd arg, but we need it to
3578 // be in .x
3579 dst0[1] = dst0[2] = dst0[3] = NULL;
3580 std::swap(dst0[0], dst0[2]);
3581 handleTXQ(dst0, TXQ_TYPE, 0);
3582 std::swap(dst0[0], dst0[2]);
3583 break;
3584 case TGSI_OPCODE_FBFETCH:
3585 handleFBFETCH(dst0);
3586 break;
3587 case TGSI_OPCODE_F2I:
3588 case TGSI_OPCODE_F2U:
3589 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3590 mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c))->rnd = ROUND_Z;
3591 break;
3592 case TGSI_OPCODE_I2F:
3593 case TGSI_OPCODE_U2F:
3594 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3595 mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c));
3596 break;
3597 case TGSI_OPCODE_PK2H:
3598 val0 = getScratch();
3599 val1 = getScratch();
3600 mkCvt(OP_CVT, TYPE_F16, val0, TYPE_F32, fetchSrc(0, 0));
3601 mkCvt(OP_CVT, TYPE_F16, val1, TYPE_F32, fetchSrc(0, 1));
3602 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3603 mkOp3(OP_INSBF, TYPE_U32, dst0[c], val1, mkImm(0x1010), val0);
3604 break;
3605 case TGSI_OPCODE_UP2H:
3606 src0 = fetchSrc(0, 0);
3607 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3608 geni = mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F16, src0);
3609 geni->subOp = c & 1;
3610 }
3611 break;
3612 case TGSI_OPCODE_EMIT:
3613 /* export the saved viewport index */
3614 if (viewport != NULL) {
3615 Symbol *vpSym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_U32,
3616 info->out[info->io.viewportId].slot[0] * 4);
3617 mkStore(OP_EXPORT, TYPE_U32, vpSym, NULL, viewport);
3618 }
3619 /* handle user clip planes for each emitted vertex */
3620 if (info->io.genUserClip > 0)
3621 handleUserClipPlanes();
3622 /* fallthrough */
3623 case TGSI_OPCODE_ENDPRIM:
3624 {
3625 // get vertex stream (must be immediate)
3626 unsigned int stream = tgsi.getSrc(0).getValueU32(0, info);
3627 if (stream && op == OP_RESTART)
3628 break;
3629 if (info->prop.gp.maxVertices == 0)
3630 break;
3631 src0 = mkImm(stream);
3632 mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1;
3633 break;
3634 }
3635 case TGSI_OPCODE_IF:
3636 case TGSI_OPCODE_UIF:
3637 {
3638 BasicBlock *ifBB = new BasicBlock(func);
3639
3640 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
3641 condBBs.push(bb);
3642 joinBBs.push(bb);
3643
3644 mkFlow(OP_BRA, NULL, CC_NOT_P, fetchSrc(0, 0))->setType(srcTy);
3645
3646 setPosition(ifBB, true);
3647 }
3648 break;
3649 case TGSI_OPCODE_ELSE:
3650 {
3651 BasicBlock *elseBB = new BasicBlock(func);
3652 BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
3653
3654 forkBB->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
3655 condBBs.push(bb);
3656
3657 forkBB->getExit()->asFlow()->target.bb = elseBB;
3658 if (!bb->isTerminated())
3659 mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL);
3660
3661 setPosition(elseBB, true);
3662 }
3663 break;
3664 case TGSI_OPCODE_ENDIF:
3665 {
3666 BasicBlock *convBB = new BasicBlock(func);
3667 BasicBlock *prevBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
3668 BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(joinBBs.pop().u.p);
3669
3670 if (!bb->isTerminated()) {
3671 // we only want join if none of the clauses ended with CONT/BREAK/RET
3672 if (prevBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6)
3673 insertConvergenceOps(convBB, forkBB);
3674 mkFlow(OP_BRA, convBB, CC_ALWAYS, NULL);
3675 bb->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD);
3676 }
3677
3678 if (prevBB->getExit()->op == OP_BRA) {
3679 prevBB->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD);
3680 prevBB->getExit()->asFlow()->target.bb = convBB;
3681 }
3682 setPosition(convBB, true);
3683 }
3684 break;
3685 case TGSI_OPCODE_BGNLOOP:
3686 {
3687 BasicBlock *lbgnBB = new BasicBlock(func);
3688 BasicBlock *lbrkBB = new BasicBlock(func);
3689
3690 loopBBs.push(lbgnBB);
3691 breakBBs.push(lbrkBB);
3692 if (loopBBs.getSize() > func->loopNestingBound)
3693 func->loopNestingBound++;
3694
3695 mkFlow(OP_PREBREAK, lbrkBB, CC_ALWAYS, NULL);
3696
3697 bb->cfg.attach(&lbgnBB->cfg, Graph::Edge::TREE);
3698 setPosition(lbgnBB, true);
3699 mkFlow(OP_PRECONT, lbgnBB, CC_ALWAYS, NULL);
3700 }
3701 break;
3702 case TGSI_OPCODE_ENDLOOP:
3703 {
3704 BasicBlock *loopBB = reinterpret_cast<BasicBlock *>(loopBBs.pop().u.p);
3705
3706 if (!bb->isTerminated()) {
3707 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
3708 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
3709 }
3710 setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true);
3711
3712 // If the loop never breaks (e.g. only has RET's inside), then there
3713 // will be no way to get to the break bb. However BGNLOOP will have
3714 // already made a PREBREAK to it, so it must be in the CFG.
3715 if (getBB()->cfg.incidentCount() == 0)
3716 loopBB->cfg.attach(&getBB()->cfg, Graph::Edge::TREE);
3717 }
3718 break;
3719 case TGSI_OPCODE_BRK:
3720 {
3721 if (bb->isTerminated())
3722 break;
3723 BasicBlock *brkBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p);
3724 mkFlow(OP_BREAK, brkBB, CC_ALWAYS, NULL);
3725 bb->cfg.attach(&brkBB->cfg, Graph::Edge::CROSS);
3726 }
3727 break;
3728 case TGSI_OPCODE_CONT:
3729 {
3730 if (bb->isTerminated())
3731 break;
3732 BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p);
3733 mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL);
3734 contBB->explicitCont = true;
3735 bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK);
3736 }
3737 break;
3738 case TGSI_OPCODE_BGNSUB:
3739 {
3740 Subroutine *s = getSubroutine(ip);
3741 BasicBlock *entry = new BasicBlock(s->f);
3742 BasicBlock *leave = new BasicBlock(s->f);
3743
3744 // multiple entrypoints possible, keep the graph connected
3745 if (prog->getType() == Program::TYPE_COMPUTE)
3746 prog->main->call.attach(&s->f->call, Graph::Edge::TREE);
3747
3748 sub.cur = s;
3749 s->f->setEntry(entry);
3750 s->f->setExit(leave);
3751 setPosition(entry, true);
3752 return true;
3753 }
3754 case TGSI_OPCODE_ENDSUB:
3755 {
3756 sub.cur = getSubroutine(prog->main);
3757 setPosition(BasicBlock::get(sub.cur->f->cfg.getRoot()), true);
3758 return true;
3759 }
3760 case TGSI_OPCODE_CAL:
3761 {
3762 Subroutine *s = getSubroutine(tgsi.getLabel());
3763 mkFlow(OP_CALL, s->f, CC_ALWAYS, NULL);
3764 func->call.attach(&s->f->call, Graph::Edge::TREE);
3765 return true;
3766 }
3767 case TGSI_OPCODE_RET:
3768 {
3769 if (bb->isTerminated())
3770 return true;
3771 BasicBlock *leave = BasicBlock::get(func->cfgExit);
3772
3773 if (!isEndOfSubroutine(ip + 1)) {
3774 // insert a PRERET at the entry if this is an early return
3775 // (only needed for sharing code in the epilogue)
3776 BasicBlock *root = BasicBlock::get(func->cfg.getRoot());
3777 if (root->getEntry() == NULL || root->getEntry()->op != OP_PRERET) {
3778 BasicBlock *pos = getBB();
3779 setPosition(root, false);
3780 mkFlow(OP_PRERET, leave, CC_ALWAYS, NULL)->fixed = 1;
3781 setPosition(pos, true);
3782 }
3783 }
3784 mkFlow(OP_RET, NULL, CC_ALWAYS, NULL)->fixed = 1;
3785 bb->cfg.attach(&leave->cfg, Graph::Edge::CROSS);
3786 }
3787 break;
3788 case TGSI_OPCODE_END:
3789 {
3790 // attach and generate epilogue code
3791 BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
3792 bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE);
3793 setPosition(epilogue, true);
3794 if (prog->getType() == Program::TYPE_FRAGMENT)
3795 exportOutputs();
3796 if ((prog->getType() == Program::TYPE_VERTEX ||
3797 prog->getType() == Program::TYPE_TESSELLATION_EVAL
3798 ) && info->io.genUserClip > 0)
3799 handleUserClipPlanes();
3800 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
3801 }
3802 break;
3803 case TGSI_OPCODE_SWITCH:
3804 case TGSI_OPCODE_CASE:
3805 ERROR("switch/case opcode encountered, should have been lowered\n");
3806 abort();
3807 break;
3808 case TGSI_OPCODE_LOAD:
3809 handleLOAD(dst0);
3810 break;
3811 case TGSI_OPCODE_STORE:
3812 handleSTORE();
3813 break;
3814 case TGSI_OPCODE_BARRIER:
3815 geni = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
3816 geni->fixed = 1;
3817 geni->subOp = NV50_IR_SUBOP_BAR_SYNC;
3818 break;
3819 case TGSI_OPCODE_MEMBAR:
3820 {
3821 uint32_t level = tgsi.getSrc(0).getValueU32(0, info);
3822 geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
3823 geni->fixed = 1;
3824 if (!(level & ~(TGSI_MEMBAR_THREAD_GROUP | TGSI_MEMBAR_SHARED)))
3825 geni->subOp = NV50_IR_SUBOP_MEMBAR(M, CTA);
3826 else
3827 geni->subOp = NV50_IR_SUBOP_MEMBAR(M, GL);
3828 }
3829 break;
3830 case TGSI_OPCODE_ATOMUADD:
3831 case TGSI_OPCODE_ATOMXCHG:
3832 case TGSI_OPCODE_ATOMCAS:
3833 case TGSI_OPCODE_ATOMAND:
3834 case TGSI_OPCODE_ATOMOR:
3835 case TGSI_OPCODE_ATOMXOR:
3836 case TGSI_OPCODE_ATOMUMIN:
3837 case TGSI_OPCODE_ATOMIMIN:
3838 case TGSI_OPCODE_ATOMUMAX:
3839 case TGSI_OPCODE_ATOMIMAX:
3840 case TGSI_OPCODE_ATOMFADD:
3841 handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode()));
3842 break;
3843 case TGSI_OPCODE_RESQ:
3844 if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
3845 Value *ind = NULL;
3846 if (tgsi.getSrc(0).isIndirect(0))
3847 ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
3848 geni = mkOp1(OP_BUFQ, TYPE_U32, dst0[0],
3849 makeSym(tgsi.getSrc(0).getFile(),
3850 tgsi.getSrc(0).getIndex(0), -1, 0, 0));
3851 if (ind)
3852 geni->setIndirect(0, 1, ind);
3853 } else {
3854 TexInstruction *texi = new_TexInstruction(func, OP_SUQ);
3855 for (int c = 0, d = 0; c < 4; ++c) {
3856 if (dst0[c]) {
3857 texi->setDef(d++, dst0[c]);
3858 texi->tex.mask |= 1 << c;
3859 }
3860 }
3861 if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMAGE) {
3862 texi->tex.r = tgsi.getSrc(0).getIndex(0);
3863 if (tgsi.getSrc(0).isIndirect(0))
3864 texi->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL));
3865 } else {
3866 texi->tex.bindless = true;
3867 texi->setIndirectR(fetchSrc(0, 0));
3868 }
3869 texi->tex.target = tgsi.getImageTarget();
3870
3871 bb->insertTail(texi);
3872 }
3873 break;
3874 case TGSI_OPCODE_IBFE:
3875 case TGSI_OPCODE_UBFE:
3876 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3877 src0 = fetchSrc(0, c);
3878 val0 = getScratch();
3879 if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE &&
3880 tgsi.getSrc(2).getFile() == TGSI_FILE_IMMEDIATE) {
3881 loadImm(val0, (tgsi.getSrc(2).getValueU32(c, info) << 8) |
3882 tgsi.getSrc(1).getValueU32(c, info));
3883 } else {
3884 src1 = fetchSrc(1, c);
3885 src2 = fetchSrc(2, c);
3886 mkOp3(OP_INSBF, TYPE_U32, val0, src2, mkImm(0x808), src1);
3887 }
3888 mkOp2(OP_EXTBF, dstTy, dst0[c], src0, val0);
3889 }
3890 break;
3891 case TGSI_OPCODE_BFI:
3892 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3893 src0 = fetchSrc(0, c);
3894 src1 = fetchSrc(1, c);
3895 src2 = fetchSrc(2, c);
3896 src3 = fetchSrc(3, c);
3897 val0 = getScratch();
3898 mkOp3(OP_INSBF, TYPE_U32, val0, src3, mkImm(0x808), src2);
3899 mkOp3(OP_INSBF, TYPE_U32, dst0[c], src1, val0, src0);
3900 }
3901 break;
3902 case TGSI_OPCODE_LSB:
3903 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3904 src0 = fetchSrc(0, c);
3905 val0 = getScratch();
3906 geni = mkOp2(OP_EXTBF, TYPE_U32, val0, src0, mkImm(0x2000));
3907 geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
3908 geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], val0);
3909 geni->subOp = NV50_IR_SUBOP_BFIND_SAMT;
3910 }
3911 break;
3912 case TGSI_OPCODE_IMSB:
3913 case TGSI_OPCODE_UMSB:
3914 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3915 src0 = fetchSrc(0, c);
3916 mkOp1(OP_BFIND, srcTy, dst0[c], src0);
3917 }
3918 break;
3919 case TGSI_OPCODE_BREV:
3920 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3921 src0 = fetchSrc(0, c);
3922 geni = mkOp2(OP_EXTBF, TYPE_U32, dst0[c], src0, mkImm(0x2000));
3923 geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
3924 }
3925 break;
3926 case TGSI_OPCODE_POPC:
3927 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3928 src0 = fetchSrc(0, c);
3929 mkOp2(OP_POPCNT, TYPE_U32, dst0[c], src0, src0);
3930 }
3931 break;
3932 case TGSI_OPCODE_INTERP_CENTROID:
3933 case TGSI_OPCODE_INTERP_SAMPLE:
3934 case TGSI_OPCODE_INTERP_OFFSET:
3935 handleINTERP(dst0);
3936 break;
3937 case TGSI_OPCODE_I642F:
3938 case TGSI_OPCODE_U642F:
3939 case TGSI_OPCODE_D2I:
3940 case TGSI_OPCODE_D2U:
3941 case TGSI_OPCODE_D2F: {
3942 int pos = 0;
3943 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3944 Value *dreg = getSSA(8);
3945 src0 = fetchSrc(0, pos);
3946 src1 = fetchSrc(0, pos + 1);
3947 mkOp2(OP_MERGE, TYPE_U64, dreg, src0, src1);
3948 Instruction *cvt = mkCvt(OP_CVT, dstTy, dst0[c], srcTy, dreg);
3949 if (!isFloatType(dstTy))
3950 cvt->rnd = ROUND_Z;
3951 pos += 2;
3952 }
3953 break;
3954 }
3955 case TGSI_OPCODE_I2I64:
3956 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3957 dst0[c] = fetchSrc(0, c / 2);
3958 mkOp2(OP_SHR, TYPE_S32, dst0[c + 1], dst0[c], loadImm(NULL, 31));
3959 c++;
3960 }
3961 break;
3962 case TGSI_OPCODE_U2I64:
3963 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3964 dst0[c] = fetchSrc(0, c / 2);
3965 dst0[c + 1] = zero;
3966 c++;
3967 }
3968 break;
3969 case TGSI_OPCODE_F2I64:
3970 case TGSI_OPCODE_F2U64:
3971 case TGSI_OPCODE_I2D:
3972 case TGSI_OPCODE_U2D:
3973 case TGSI_OPCODE_F2D:
3974 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3975 Value *dreg = getSSA(8);
3976 Instruction *cvt = mkCvt(OP_CVT, dstTy, dreg, srcTy, fetchSrc(0, c / 2));
3977 if (!isFloatType(dstTy))
3978 cvt->rnd = ROUND_Z;
3979 mkSplit(&dst0[c], 4, dreg);
3980 c++;
3981 }
3982 break;
3983 case TGSI_OPCODE_D2I64:
3984 case TGSI_OPCODE_D2U64:
3985 case TGSI_OPCODE_I642D:
3986 case TGSI_OPCODE_U642D:
3987 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3988 src0 = getSSA(8);
3989 Value *dst = getSSA(8), *tmp[2];
3990 tmp[0] = fetchSrc(0, c);
3991 tmp[1] = fetchSrc(0, c + 1);
3992 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
3993 Instruction *cvt = mkCvt(OP_CVT, dstTy, dst, srcTy, src0);
3994 if (!isFloatType(dstTy))
3995 cvt->rnd = ROUND_Z;
3996 mkSplit(&dst0[c], 4, dst);
3997 c++;
3998 }
3999 break;
4000 case TGSI_OPCODE_I64NEG:
4001 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4002 src0 = getSSA(8);
4003 Value *dst = getSSA(8), *tmp[2];
4004 tmp[0] = fetchSrc(0, c);
4005 tmp[1] = fetchSrc(0, c + 1);
4006 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4007 mkOp2(OP_SUB, dstTy, dst, zero, src0);
4008 mkSplit(&dst0[c], 4, dst);
4009 c++;
4010 }
4011 break;
4012 case TGSI_OPCODE_I64ABS:
4013 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4014 src0 = getSSA(8);
4015 Value *neg = getSSA(8), *srcComp[2], *negComp[2];
4016 srcComp[0] = fetchSrc(0, c);
4017 srcComp[1] = fetchSrc(0, c + 1);
4018 mkOp2(OP_MERGE, TYPE_U64, src0, srcComp[0], srcComp[1]);
4019 mkOp2(OP_SUB, dstTy, neg, zero, src0);
4020 mkSplit(negComp, 4, neg);
4021 mkCmp(OP_SLCT, CC_LT, TYPE_S32, dst0[c], TYPE_S32,
4022 negComp[0], srcComp[0], srcComp[1]);
4023 mkCmp(OP_SLCT, CC_LT, TYPE_S32, dst0[c + 1], TYPE_S32,
4024 negComp[1], srcComp[1], srcComp[1]);
4025 c++;
4026 }
4027 break;
4028 case TGSI_OPCODE_DABS:
4029 case TGSI_OPCODE_DNEG:
4030 case TGSI_OPCODE_DRCP:
4031 case TGSI_OPCODE_DSQRT:
4032 case TGSI_OPCODE_DRSQ:
4033 case TGSI_OPCODE_DTRUNC:
4034 case TGSI_OPCODE_DCEIL:
4035 case TGSI_OPCODE_DFLR:
4036 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4037 src0 = getSSA(8);
4038 Value *dst = getSSA(8), *tmp[2];
4039 tmp[0] = fetchSrc(0, c);
4040 tmp[1] = fetchSrc(0, c + 1);
4041 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4042 mkOp1(op, dstTy, dst, src0);
4043 mkSplit(&dst0[c], 4, dst);
4044 c++;
4045 }
4046 break;
4047 case TGSI_OPCODE_DFRAC:
4048 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4049 src0 = getSSA(8);
4050 Value *dst = getSSA(8), *tmp[2];
4051 tmp[0] = fetchSrc(0, c);
4052 tmp[1] = fetchSrc(0, c + 1);
4053 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4054 mkOp1(OP_FLOOR, TYPE_F64, dst, src0);
4055 mkOp2(OP_SUB, TYPE_F64, dst, src0, dst);
4056 mkSplit(&dst0[c], 4, dst);
4057 c++;
4058 }
4059 break;
4060 case TGSI_OPCODE_U64SEQ:
4061 case TGSI_OPCODE_U64SNE:
4062 case TGSI_OPCODE_U64SLT:
4063 case TGSI_OPCODE_U64SGE:
4064 case TGSI_OPCODE_I64SLT:
4065 case TGSI_OPCODE_I64SGE:
4066 case TGSI_OPCODE_DSLT:
4067 case TGSI_OPCODE_DSGE:
4068 case TGSI_OPCODE_DSEQ:
4069 case TGSI_OPCODE_DSNE: {
4070 int pos = 0;
4071 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4072 Value *tmp[2];
4073
4074 src0 = getSSA(8);
4075 src1 = getSSA(8);
4076 tmp[0] = fetchSrc(0, pos);
4077 tmp[1] = fetchSrc(0, pos + 1);
4078 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4079 tmp[0] = fetchSrc(1, pos);
4080 tmp[1] = fetchSrc(1, pos + 1);
4081 mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
4082 mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1);
4083 pos += 2;
4084 }
4085 break;
4086 }
4087 case TGSI_OPCODE_U64MIN:
4088 case TGSI_OPCODE_U64MAX:
4089 case TGSI_OPCODE_I64MIN:
4090 case TGSI_OPCODE_I64MAX: {
4091 dstTy = isSignedIntType(dstTy) ? TYPE_S32 : TYPE_U32;
4092 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4093 Value *flag = getSSA(1, FILE_FLAGS);
4094 src0 = fetchSrc(0, c + 1);
4095 src1 = fetchSrc(1, c + 1);
4096 geni = mkOp2(op, dstTy, dst0[c + 1], src0, src1);
4097 geni->subOp = NV50_IR_SUBOP_MINMAX_HIGH;
4098 geni->setFlagsDef(1, flag);
4099
4100 src0 = fetchSrc(0, c);
4101 src1 = fetchSrc(1, c);
4102 geni = mkOp2(op, TYPE_U32, dst0[c], src0, src1);
4103 geni->subOp = NV50_IR_SUBOP_MINMAX_LOW;
4104 geni->setFlagsSrc(2, flag);
4105
4106 c++;
4107 }
4108 break;
4109 }
4110 case TGSI_OPCODE_U64SHL:
4111 case TGSI_OPCODE_I64SHR:
4112 case TGSI_OPCODE_U64SHR:
4113 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4114 src0 = getSSA(8);
4115 Value *dst = getSSA(8), *tmp[2];
4116 tmp[0] = fetchSrc(0, c);
4117 tmp[1] = fetchSrc(0, c + 1);
4118 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4119 // Theoretically src1 is a 64-bit value but in practice only the low
4120 // bits matter. The IR expects this to be a 32-bit value.
4121 src1 = fetchSrc(1, c);
4122 mkOp2(op, dstTy, dst, src0, src1);
4123 mkSplit(&dst0[c], 4, dst);
4124 c++;
4125 }
4126 break;
4127 case TGSI_OPCODE_U64ADD:
4128 case TGSI_OPCODE_U64MUL:
4129 case TGSI_OPCODE_DADD:
4130 case TGSI_OPCODE_DMUL:
4131 case TGSI_OPCODE_DDIV:
4132 case TGSI_OPCODE_DMAX:
4133 case TGSI_OPCODE_DMIN:
4134 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4135 src0 = getSSA(8);
4136 src1 = getSSA(8);
4137 Value *dst = getSSA(8), *tmp[2];
4138 tmp[0] = fetchSrc(0, c);
4139 tmp[1] = fetchSrc(0, c + 1);
4140 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4141 tmp[0] = fetchSrc(1, c);
4142 tmp[1] = fetchSrc(1, c + 1);
4143 mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
4144 mkOp2(op, dstTy, dst, src0, src1);
4145 mkSplit(&dst0[c], 4, dst);
4146 c++;
4147 }
4148 break;
4149 case TGSI_OPCODE_DMAD:
4150 case TGSI_OPCODE_DFMA:
4151 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4152 src0 = getSSA(8);
4153 src1 = getSSA(8);
4154 src2 = getSSA(8);
4155 Value *dst = getSSA(8), *tmp[2];
4156 tmp[0] = fetchSrc(0, c);
4157 tmp[1] = fetchSrc(0, c + 1);
4158 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4159 tmp[0] = fetchSrc(1, c);
4160 tmp[1] = fetchSrc(1, c + 1);
4161 mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
4162 tmp[0] = fetchSrc(2, c);
4163 tmp[1] = fetchSrc(2, c + 1);
4164 mkOp2(OP_MERGE, TYPE_U64, src2, tmp[0], tmp[1]);
4165 mkOp3(op, dstTy, dst, src0, src1, src2);
4166 mkSplit(&dst0[c], 4, dst);
4167 c++;
4168 }
4169 break;
4170 case TGSI_OPCODE_DROUND:
4171 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4172 src0 = getSSA(8);
4173 Value *dst = getSSA(8), *tmp[2];
4174 tmp[0] = fetchSrc(0, c);
4175 tmp[1] = fetchSrc(0, c + 1);
4176 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4177 mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F64, src0)
4178 ->rnd = ROUND_NI;
4179 mkSplit(&dst0[c], 4, dst);
4180 c++;
4181 }
4182 break;
4183 case TGSI_OPCODE_DSSG:
4184 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4185 src0 = getSSA(8);
4186 Value *dst = getSSA(8), *dstF32 = getSSA(), *tmp[2];
4187 tmp[0] = fetchSrc(0, c);
4188 tmp[1] = fetchSrc(0, c + 1);
4189 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4190
4191 val0 = getScratch();
4192 val1 = getScratch();
4193 // The zero is wrong here since it's only 32-bit, but it works out in
4194 // the end since it gets replaced with $r63.
4195 mkCmp(OP_SET, CC_GT, TYPE_F32, val0, TYPE_F64, src0, zero);
4196 mkCmp(OP_SET, CC_LT, TYPE_F32, val1, TYPE_F64, src0, zero);
4197 mkOp2(OP_SUB, TYPE_F32, dstF32, val0, val1);
4198 mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F32, dstF32);
4199 mkSplit(&dst0[c], 4, dst);
4200 c++;
4201 }
4202 break;
4203 case TGSI_OPCODE_I64SSG:
4204 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4205 src0 = getSSA(8);
4206 Value *tmp[2];
4207 tmp[0] = fetchSrc(0, c);
4208 tmp[1] = fetchSrc(0, c + 1);
4209 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4210
4211 val0 = getScratch();
4212 val1 = getScratch();
4213 mkCmp(OP_SET, CC_GT, TYPE_U32, val0, TYPE_S64, src0, zero);
4214 mkCmp(OP_SET, CC_LT, TYPE_U32, val1, TYPE_S64, src0, zero);
4215 mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0);
4216 mkOp2(OP_SHR, TYPE_S32, dst0[c + 1], dst0[c], loadImm(0, 31));
4217 c++;
4218 }
4219 break;
4220 default:
4221 ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode());
4222 assert(0);
4223 break;
4224 }
4225
4226 if (tgsi.dstCount() && tgsi.getOpcode() != TGSI_OPCODE_STORE) {
4227 for (c = 0; c < 4; ++c) {
4228 if (!dst0[c])
4229 continue;
4230 if (dst0[c] != rDst0[c])
4231 mkMov(rDst0[c], dst0[c]);
4232 storeDst(0, c, rDst0[c]);
4233 }
4234 }
4235 vtxBaseValid = 0;
4236
4237 return true;
4238 }
4239
4240 void
4241 Converter::handleUserClipPlanes()
4242 {
4243 Value *res[8];
4244 int n, i, c;
4245
4246 for (c = 0; c < 4; ++c) {
4247 for (i = 0; i < info->io.genUserClip; ++i) {
4248 Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot,
4249 TYPE_F32, info->io.ucpBase + i * 16 + c * 4);
4250 Value *ucp = mkLoadv(TYPE_F32, sym, NULL);
4251 if (c == 0)
4252 res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp);
4253 else
4254 mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]);
4255 }
4256 }
4257
4258 const int first = info->numOutputs - (info->io.genUserClip + 3) / 4;
4259
4260 for (i = 0; i < info->io.genUserClip; ++i) {
4261 n = i / 4 + first;
4262 c = i % 4;
4263 Symbol *sym =
4264 mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info->out[n].slot[c] * 4);
4265 mkStore(OP_EXPORT, TYPE_F32, sym, NULL, res[i]);
4266 }
4267 }
4268
4269 void
4270 Converter::exportOutputs()
4271 {
4272 if (info->io.alphaRefBase) {
4273 for (unsigned int i = 0; i < info->numOutputs; ++i) {
4274 if (info->out[i].sn != TGSI_SEMANTIC_COLOR ||
4275 info->out[i].si != 0)
4276 continue;
4277 const unsigned int c = 3;
4278 if (!oData.exists(sub.cur->values, i, c))
4279 continue;
4280 Value *val = oData.load(sub.cur->values, i, c, NULL);
4281 if (!val)
4282 continue;
4283
4284 Symbol *ref = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot,
4285 TYPE_U32, info->io.alphaRefBase);
4286 Value *pred = new_LValue(func, FILE_PREDICATE);
4287 mkCmp(OP_SET, CC_TR, TYPE_U32, pred, TYPE_F32, val,
4288 mkLoadv(TYPE_U32, ref, NULL))
4289 ->subOp = 1;
4290 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_NOT_P, pred);
4291 }
4292 }
4293
4294 for (unsigned int i = 0; i < info->numOutputs; ++i) {
4295 for (unsigned int c = 0; c < 4; ++c) {
4296 if (!oData.exists(sub.cur->values, i, c))
4297 continue;
4298 Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32,
4299 info->out[i].slot[c] * 4);
4300 Value *val = oData.load(sub.cur->values, i, c, NULL);
4301 if (val) {
4302 if (info->out[i].sn == TGSI_SEMANTIC_POSITION)
4303 mkOp1(OP_SAT, TYPE_F32, val, val);
4304 mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val);
4305 }
4306 }
4307 }
4308 }
4309
4310 Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir),
4311 code(code),
4312 tgsi(NULL),
4313 tData(this), lData(this), aData(this), oData(this)
4314 {
4315 info = code->info;
4316
4317 const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY);
4318 const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS);
4319 const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT);
4320
4321 tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, FILE_GPR, 0);
4322 lData.setup(TGSI_FILE_TEMPORARY, 1, 0, tSize, 4, 4, FILE_MEMORY_LOCAL, 0);
4323 aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_GPR, 0);
4324 oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0);
4325
4326 zero = mkImm((uint32_t)0);
4327
4328 vtxBaseValid = 0;
4329 }
4330
4331 Converter::~Converter()
4332 {
4333 }
4334
4335 inline const Converter::Location *
4336 Converter::BindArgumentsPass::getValueLocation(Subroutine *s, Value *v)
4337 {
4338 ValueMap::l_iterator it = s->values.l.find(v);
4339 return it == s->values.l.end() ? NULL : &it->second;
4340 }
4341
4342 template<typename T> inline void
4343 Converter::BindArgumentsPass::updateCallArgs(
4344 Instruction *i, void (Instruction::*setArg)(int, Value *),
4345 T (Function::*proto))
4346 {
4347 Function *g = i->asFlow()->target.fn;
4348 Subroutine *subg = conv.getSubroutine(g);
4349
4350 for (unsigned a = 0; a < (g->*proto).size(); ++a) {
4351 Value *v = (g->*proto)[a].get();
4352 const Converter::Location &l = *getValueLocation(subg, v);
4353 Converter::DataArray *array = conv.getArrayForFile(l.array, l.arrayIdx);
4354
4355 (i->*setArg)(a, array->acquire(sub->values, l.i, l.c));
4356 }
4357 }
4358
4359 template<typename T> inline void
4360 Converter::BindArgumentsPass::updatePrototype(
4361 BitSet *set, void (Function::*updateSet)(), T (Function::*proto))
4362 {
4363 (func->*updateSet)();
4364
4365 for (unsigned i = 0; i < set->getSize(); ++i) {
4366 Value *v = func->getLValue(i);
4367 const Converter::Location *l = getValueLocation(sub, v);
4368
4369 // only include values with a matching TGSI register
4370 if (set->test(i) && l && !conv.code->locals.count(*l))
4371 (func->*proto).push_back(v);
4372 }
4373 }
4374
4375 bool
4376 Converter::BindArgumentsPass::visit(Function *f)
4377 {
4378 sub = conv.getSubroutine(f);
4379
4380 for (ArrayList::Iterator bi = f->allBBlocks.iterator();
4381 !bi.end(); bi.next()) {
4382 for (Instruction *i = BasicBlock::get(bi)->getFirst();
4383 i; i = i->next) {
4384 if (i->op == OP_CALL && !i->asFlow()->builtin) {
4385 updateCallArgs(i, &Instruction::setSrc, &Function::ins);
4386 updateCallArgs(i, &Instruction::setDef, &Function::outs);
4387 }
4388 }
4389 }
4390
4391 if (func == prog->main && prog->getType() != Program::TYPE_COMPUTE)
4392 return true;
4393 updatePrototype(&BasicBlock::get(f->cfg.getRoot())->liveSet,
4394 &Function::buildLiveSets, &Function::ins);
4395 updatePrototype(&BasicBlock::get(f->cfgExit)->defSet,
4396 &Function::buildDefSets, &Function::outs);
4397
4398 return true;
4399 }
4400
4401 bool
4402 Converter::run()
4403 {
4404 BasicBlock *entry = new BasicBlock(prog->main);
4405 BasicBlock *leave = new BasicBlock(prog->main);
4406
4407 prog->main->setEntry(entry);
4408 prog->main->setExit(leave);
4409
4410 setPosition(entry, true);
4411 sub.cur = getSubroutine(prog->main);
4412
4413 if (info->io.genUserClip > 0) {
4414 for (int c = 0; c < 4; ++c)
4415 clipVtx[c] = getScratch();
4416 }
4417
4418 switch (prog->getType()) {
4419 case Program::TYPE_TESSELLATION_CONTROL:
4420 outBase = mkOp2v(
4421 OP_SUB, TYPE_U32, getSSA(),
4422 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
4423 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
4424 break;
4425 case Program::TYPE_FRAGMENT: {
4426 Symbol *sv = mkSysVal(SV_POSITION, 3);
4427 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
4428 mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
4429 break;
4430 }
4431 default:
4432 break;
4433 }
4434
4435 if (info->io.viewportId >= 0)
4436 viewport = getScratch();
4437 else
4438 viewport = NULL;
4439
4440 for (ip = 0; ip < code->scan.num_instructions; ++ip) {
4441 if (!handleInstruction(&code->insns[ip]))
4442 return false;
4443 }
4444
4445 if (!BindArgumentsPass(*this).run(prog))
4446 return false;
4447
4448 return true;
4449 }
4450
4451 } // unnamed namespace
4452
4453 namespace nv50_ir {
4454
4455 bool
4456 Program::makeFromTGSI(struct nv50_ir_prog_info *info)
4457 {
4458 tgsi::Source src(info);
4459 if (!src.scanSource())
4460 return false;
4461 tlsSize = info->bin.tlsSpace;
4462
4463 Converter builder(this, &src);
4464 return builder.run();
4465 }
4466
4467 } // namespace nv50_ir