8d9c0774cfbaae0aae7fee2c08bcf73dcba7adf9
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_from_tgsi.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "tgsi/tgsi_dump.h"
24 #include "tgsi/tgsi_scan.h"
25 #include "tgsi/tgsi_util.h"
26
27 #include <set>
28
29 #include "codegen/nv50_ir.h"
30 #include "codegen/nv50_ir_util.h"
31 #include "codegen/nv50_ir_build_util.h"
32
33 namespace tgsi {
34
35 class Source;
36
37 static nv50_ir::operation translateOpcode(uint opcode);
38 static nv50_ir::DataFile translateFile(uint file);
39 static nv50_ir::TexTarget translateTexture(uint texTarg);
40 static nv50_ir::SVSemantic translateSysVal(uint sysval);
41 static nv50_ir::CacheMode translateCacheMode(uint qualifier);
42 static nv50_ir::ImgFormat translateImgFormat(uint format);
43
44 class Instruction
45 {
46 public:
47 Instruction(const struct tgsi_full_instruction *inst) : insn(inst) { }
48
49 class SrcRegister
50 {
51 public:
52 SrcRegister(const struct tgsi_full_src_register *src)
53 : reg(src->Register),
54 fsr(src)
55 { }
56
57 SrcRegister(const struct tgsi_src_register& src) : reg(src), fsr(NULL) { }
58
59 SrcRegister(const struct tgsi_ind_register& ind)
60 : reg(tgsi_util_get_src_from_ind(&ind)),
61 fsr(NULL)
62 { }
63
64 struct tgsi_src_register offsetToSrc(struct tgsi_texture_offset off)
65 {
66 struct tgsi_src_register reg;
67 memset(&reg, 0, sizeof(reg));
68 reg.Index = off.Index;
69 reg.File = off.File;
70 reg.SwizzleX = off.SwizzleX;
71 reg.SwizzleY = off.SwizzleY;
72 reg.SwizzleZ = off.SwizzleZ;
73 return reg;
74 }
75
76 SrcRegister(const struct tgsi_texture_offset& off) :
77 reg(offsetToSrc(off)),
78 fsr(NULL)
79 { }
80
81 uint getFile() const { return reg.File; }
82
83 bool is2D() const { return reg.Dimension; }
84
85 bool isIndirect(int dim) const
86 {
87 return (dim && fsr) ? fsr->Dimension.Indirect : reg.Indirect;
88 }
89
90 int getIndex(int dim) const
91 {
92 return (dim && fsr) ? fsr->Dimension.Index : reg.Index;
93 }
94
95 int getSwizzle(int chan) const
96 {
97 return tgsi_util_get_src_register_swizzle(&reg, chan);
98 }
99
100 int getArrayId() const
101 {
102 if (isIndirect(0))
103 return fsr->Indirect.ArrayID;
104 return 0;
105 }
106
107 nv50_ir::Modifier getMod(int chan) const;
108
109 SrcRegister getIndirect(int dim) const
110 {
111 assert(fsr && isIndirect(dim));
112 if (dim)
113 return SrcRegister(fsr->DimIndirect);
114 return SrcRegister(fsr->Indirect);
115 }
116
117 uint32_t getValueU32(int c, const struct nv50_ir_prog_info *info) const
118 {
119 assert(reg.File == TGSI_FILE_IMMEDIATE);
120 assert(!reg.Absolute);
121 assert(!reg.Negate);
122 return info->immd.data[reg.Index * 4 + getSwizzle(c)];
123 }
124
125 private:
126 const struct tgsi_src_register reg;
127 const struct tgsi_full_src_register *fsr;
128 };
129
130 class DstRegister
131 {
132 public:
133 DstRegister(const struct tgsi_full_dst_register *dst)
134 : reg(dst->Register),
135 fdr(dst)
136 { }
137
138 DstRegister(const struct tgsi_dst_register& dst) : reg(dst), fdr(NULL) { }
139
140 uint getFile() const { return reg.File; }
141
142 bool is2D() const { return reg.Dimension; }
143
144 bool isIndirect(int dim) const
145 {
146 return (dim && fdr) ? fdr->Dimension.Indirect : reg.Indirect;
147 }
148
149 int getIndex(int dim) const
150 {
151 return (dim && fdr) ? fdr->Dimension.Dimension : reg.Index;
152 }
153
154 unsigned int getMask() const { return reg.WriteMask; }
155
156 bool isMasked(int chan) const { return !(getMask() & (1 << chan)); }
157
158 SrcRegister getIndirect(int dim) const
159 {
160 assert(fdr && isIndirect(dim));
161 if (dim)
162 return SrcRegister(fdr->DimIndirect);
163 return SrcRegister(fdr->Indirect);
164 }
165
166 int getArrayId() const
167 {
168 if (isIndirect(0))
169 return fdr->Indirect.ArrayID;
170 return 0;
171 }
172
173 private:
174 const struct tgsi_dst_register reg;
175 const struct tgsi_full_dst_register *fdr;
176 };
177
178 inline uint getOpcode() const { return insn->Instruction.Opcode; }
179
180 unsigned int srcCount() const { return insn->Instruction.NumSrcRegs; }
181 unsigned int dstCount() const { return insn->Instruction.NumDstRegs; }
182
183 // mask of used components of source s
184 unsigned int srcMask(unsigned int s) const;
185 unsigned int texOffsetMask() const;
186
187 SrcRegister getSrc(unsigned int s) const
188 {
189 assert(s < srcCount());
190 return SrcRegister(&insn->Src[s]);
191 }
192
193 DstRegister getDst(unsigned int d) const
194 {
195 assert(d < dstCount());
196 return DstRegister(&insn->Dst[d]);
197 }
198
199 SrcRegister getTexOffset(unsigned int i) const
200 {
201 assert(i < TGSI_FULL_MAX_TEX_OFFSETS);
202 return SrcRegister(insn->TexOffsets[i]);
203 }
204
205 unsigned int getNumTexOffsets() const { return insn->Texture.NumOffsets; }
206
207 bool checkDstSrcAliasing() const;
208
209 inline nv50_ir::operation getOP() const {
210 return translateOpcode(getOpcode()); }
211
212 nv50_ir::DataType inferSrcType() const;
213 nv50_ir::DataType inferDstType() const;
214
215 nv50_ir::CondCode getSetCond() const;
216
217 nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const;
218
219 nv50_ir::CacheMode getCacheMode() const {
220 if (!insn->Instruction.Memory)
221 return nv50_ir::CACHE_CA;
222 return translateCacheMode(insn->Memory.Qualifier);
223 }
224
225 inline uint getLabel() { return insn->Label.Label; }
226
227 unsigned getSaturate() const { return insn->Instruction.Saturate; }
228
229 void print() const
230 {
231 tgsi_dump_instruction(insn, 1);
232 }
233
234 private:
235 const struct tgsi_full_instruction *insn;
236 };
237
238 unsigned int Instruction::texOffsetMask() const
239 {
240 const struct tgsi_instruction_texture *tex = &insn->Texture;
241 assert(insn->Instruction.Texture);
242
243 switch (tex->Texture) {
244 case TGSI_TEXTURE_BUFFER:
245 case TGSI_TEXTURE_1D:
246 case TGSI_TEXTURE_SHADOW1D:
247 case TGSI_TEXTURE_1D_ARRAY:
248 case TGSI_TEXTURE_SHADOW1D_ARRAY:
249 return 0x1;
250 case TGSI_TEXTURE_2D:
251 case TGSI_TEXTURE_SHADOW2D:
252 case TGSI_TEXTURE_2D_ARRAY:
253 case TGSI_TEXTURE_SHADOW2D_ARRAY:
254 case TGSI_TEXTURE_RECT:
255 case TGSI_TEXTURE_SHADOWRECT:
256 case TGSI_TEXTURE_2D_MSAA:
257 case TGSI_TEXTURE_2D_ARRAY_MSAA:
258 return 0x3;
259 case TGSI_TEXTURE_3D:
260 return 0x7;
261 default:
262 assert(!"Unexpected texture target");
263 return 0xf;
264 }
265 }
266
267 unsigned int Instruction::srcMask(unsigned int s) const
268 {
269 unsigned int mask = insn->Dst[0].Register.WriteMask;
270
271 switch (insn->Instruction.Opcode) {
272 case TGSI_OPCODE_COS:
273 case TGSI_OPCODE_SIN:
274 return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
275 case TGSI_OPCODE_DP2:
276 return 0x3;
277 case TGSI_OPCODE_DP3:
278 return 0x7;
279 case TGSI_OPCODE_DP4:
280 case TGSI_OPCODE_DPH:
281 case TGSI_OPCODE_KILL_IF: /* WriteMask ignored */
282 return 0xf;
283 case TGSI_OPCODE_DST:
284 return mask & (s ? 0xa : 0x6);
285 case TGSI_OPCODE_EX2:
286 case TGSI_OPCODE_EXP:
287 case TGSI_OPCODE_LG2:
288 case TGSI_OPCODE_LOG:
289 case TGSI_OPCODE_POW:
290 case TGSI_OPCODE_RCP:
291 case TGSI_OPCODE_RSQ:
292 case TGSI_OPCODE_SCS:
293 return 0x1;
294 case TGSI_OPCODE_IF:
295 case TGSI_OPCODE_UIF:
296 return 0x1;
297 case TGSI_OPCODE_LIT:
298 return 0xb;
299 case TGSI_OPCODE_TEX2:
300 case TGSI_OPCODE_TXB2:
301 case TGSI_OPCODE_TXL2:
302 return (s == 0) ? 0xf : 0x3;
303 case TGSI_OPCODE_TEX:
304 case TGSI_OPCODE_TXB:
305 case TGSI_OPCODE_TXD:
306 case TGSI_OPCODE_TXL:
307 case TGSI_OPCODE_TXP:
308 case TGSI_OPCODE_TXF:
309 case TGSI_OPCODE_TG4:
310 case TGSI_OPCODE_TEX_LZ:
311 case TGSI_OPCODE_TXF_LZ:
312 case TGSI_OPCODE_LODQ:
313 {
314 const struct tgsi_instruction_texture *tex = &insn->Texture;
315
316 assert(insn->Instruction.Texture);
317
318 mask = 0x7;
319 if (insn->Instruction.Opcode != TGSI_OPCODE_TEX &&
320 insn->Instruction.Opcode != TGSI_OPCODE_TEX_LZ &&
321 insn->Instruction.Opcode != TGSI_OPCODE_TXF_LZ &&
322 insn->Instruction.Opcode != TGSI_OPCODE_TXD)
323 mask |= 0x8; /* bias, lod or proj */
324
325 switch (tex->Texture) {
326 case TGSI_TEXTURE_1D:
327 mask &= 0x9;
328 break;
329 case TGSI_TEXTURE_SHADOW1D:
330 mask &= 0xd;
331 break;
332 case TGSI_TEXTURE_1D_ARRAY:
333 case TGSI_TEXTURE_2D:
334 case TGSI_TEXTURE_RECT:
335 mask &= 0xb;
336 break;
337 case TGSI_TEXTURE_CUBE_ARRAY:
338 case TGSI_TEXTURE_SHADOW2D_ARRAY:
339 case TGSI_TEXTURE_SHADOWCUBE:
340 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
341 mask |= 0x8;
342 break;
343 default:
344 break;
345 }
346 }
347 return mask;
348 case TGSI_OPCODE_XPD:
349 {
350 unsigned int x = 0;
351 if (mask & 1) x |= 0x6;
352 if (mask & 2) x |= 0x5;
353 if (mask & 4) x |= 0x3;
354 return x;
355 }
356 case TGSI_OPCODE_D2I:
357 case TGSI_OPCODE_D2U:
358 case TGSI_OPCODE_D2F:
359 case TGSI_OPCODE_DSLT:
360 case TGSI_OPCODE_DSGE:
361 case TGSI_OPCODE_DSEQ:
362 case TGSI_OPCODE_DSNE:
363 case TGSI_OPCODE_U64SEQ:
364 case TGSI_OPCODE_U64SNE:
365 case TGSI_OPCODE_I64SLT:
366 case TGSI_OPCODE_U64SLT:
367 case TGSI_OPCODE_I64SGE:
368 case TGSI_OPCODE_U64SGE:
369 case TGSI_OPCODE_I642F:
370 case TGSI_OPCODE_U642F:
371 switch (util_bitcount(mask)) {
372 case 1: return 0x3;
373 case 2: return 0xf;
374 default:
375 assert(!"unexpected mask");
376 return 0xf;
377 }
378 case TGSI_OPCODE_I2D:
379 case TGSI_OPCODE_U2D:
380 case TGSI_OPCODE_F2D: {
381 unsigned int x = 0;
382 if ((mask & 0x3) == 0x3)
383 x |= 1;
384 if ((mask & 0xc) == 0xc)
385 x |= 2;
386 return x;
387 }
388 case TGSI_OPCODE_PK2H:
389 return 0x3;
390 case TGSI_OPCODE_UP2H:
391 return 0x1;
392 default:
393 break;
394 }
395
396 return mask;
397 }
398
399 nv50_ir::Modifier Instruction::SrcRegister::getMod(int chan) const
400 {
401 nv50_ir::Modifier m(0);
402
403 if (reg.Absolute)
404 m = m | nv50_ir::Modifier(NV50_IR_MOD_ABS);
405 if (reg.Negate)
406 m = m | nv50_ir::Modifier(NV50_IR_MOD_NEG);
407 return m;
408 }
409
410 static nv50_ir::DataFile translateFile(uint file)
411 {
412 switch (file) {
413 case TGSI_FILE_CONSTANT: return nv50_ir::FILE_MEMORY_CONST;
414 case TGSI_FILE_INPUT: return nv50_ir::FILE_SHADER_INPUT;
415 case TGSI_FILE_OUTPUT: return nv50_ir::FILE_SHADER_OUTPUT;
416 case TGSI_FILE_TEMPORARY: return nv50_ir::FILE_GPR;
417 case TGSI_FILE_ADDRESS: return nv50_ir::FILE_ADDRESS;
418 case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;
419 case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;
420 case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_BUFFER;
421 case TGSI_FILE_IMAGE: return nv50_ir::FILE_MEMORY_GLOBAL;
422 case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_GLOBAL;
423 case TGSI_FILE_SAMPLER:
424 case TGSI_FILE_NULL:
425 default:
426 return nv50_ir::FILE_NULL;
427 }
428 }
429
430 static nv50_ir::SVSemantic translateSysVal(uint sysval)
431 {
432 switch (sysval) {
433 case TGSI_SEMANTIC_FACE: return nv50_ir::SV_FACE;
434 case TGSI_SEMANTIC_PSIZE: return nv50_ir::SV_POINT_SIZE;
435 case TGSI_SEMANTIC_PRIMID: return nv50_ir::SV_PRIMITIVE_ID;
436 case TGSI_SEMANTIC_INSTANCEID: return nv50_ir::SV_INSTANCE_ID;
437 case TGSI_SEMANTIC_VERTEXID: return nv50_ir::SV_VERTEX_ID;
438 case TGSI_SEMANTIC_GRID_SIZE: return nv50_ir::SV_NCTAID;
439 case TGSI_SEMANTIC_BLOCK_ID: return nv50_ir::SV_CTAID;
440 case TGSI_SEMANTIC_BLOCK_SIZE: return nv50_ir::SV_NTID;
441 case TGSI_SEMANTIC_THREAD_ID: return nv50_ir::SV_TID;
442 case TGSI_SEMANTIC_SAMPLEID: return nv50_ir::SV_SAMPLE_INDEX;
443 case TGSI_SEMANTIC_SAMPLEPOS: return nv50_ir::SV_SAMPLE_POS;
444 case TGSI_SEMANTIC_SAMPLEMASK: return nv50_ir::SV_SAMPLE_MASK;
445 case TGSI_SEMANTIC_INVOCATIONID: return nv50_ir::SV_INVOCATION_ID;
446 case TGSI_SEMANTIC_TESSCOORD: return nv50_ir::SV_TESS_COORD;
447 case TGSI_SEMANTIC_TESSOUTER: return nv50_ir::SV_TESS_OUTER;
448 case TGSI_SEMANTIC_TESSINNER: return nv50_ir::SV_TESS_INNER;
449 case TGSI_SEMANTIC_VERTICESIN: return nv50_ir::SV_VERTEX_COUNT;
450 case TGSI_SEMANTIC_HELPER_INVOCATION: return nv50_ir::SV_THREAD_KILL;
451 case TGSI_SEMANTIC_BASEVERTEX: return nv50_ir::SV_BASEVERTEX;
452 case TGSI_SEMANTIC_BASEINSTANCE: return nv50_ir::SV_BASEINSTANCE;
453 case TGSI_SEMANTIC_DRAWID: return nv50_ir::SV_DRAWID;
454 case TGSI_SEMANTIC_WORK_DIM: return nv50_ir::SV_WORK_DIM;
455 case TGSI_SEMANTIC_SUBGROUP_INVOCATION: return nv50_ir::SV_LANEID;
456 case TGSI_SEMANTIC_SUBGROUP_EQ_MASK: return nv50_ir::SV_LANEMASK_EQ;
457 case TGSI_SEMANTIC_SUBGROUP_LT_MASK: return nv50_ir::SV_LANEMASK_LT;
458 case TGSI_SEMANTIC_SUBGROUP_LE_MASK: return nv50_ir::SV_LANEMASK_LE;
459 case TGSI_SEMANTIC_SUBGROUP_GT_MASK: return nv50_ir::SV_LANEMASK_GT;
460 case TGSI_SEMANTIC_SUBGROUP_GE_MASK: return nv50_ir::SV_LANEMASK_GE;
461 default:
462 assert(0);
463 return nv50_ir::SV_CLOCK;
464 }
465 }
466
467 #define NV50_IR_TEX_TARG_CASE(a, b) \
468 case TGSI_TEXTURE_##a: return nv50_ir::TEX_TARGET_##b;
469
470 static nv50_ir::TexTarget translateTexture(uint tex)
471 {
472 switch (tex) {
473 NV50_IR_TEX_TARG_CASE(1D, 1D);
474 NV50_IR_TEX_TARG_CASE(2D, 2D);
475 NV50_IR_TEX_TARG_CASE(2D_MSAA, 2D_MS);
476 NV50_IR_TEX_TARG_CASE(3D, 3D);
477 NV50_IR_TEX_TARG_CASE(CUBE, CUBE);
478 NV50_IR_TEX_TARG_CASE(RECT, RECT);
479 NV50_IR_TEX_TARG_CASE(1D_ARRAY, 1D_ARRAY);
480 NV50_IR_TEX_TARG_CASE(2D_ARRAY, 2D_ARRAY);
481 NV50_IR_TEX_TARG_CASE(2D_ARRAY_MSAA, 2D_MS_ARRAY);
482 NV50_IR_TEX_TARG_CASE(CUBE_ARRAY, CUBE_ARRAY);
483 NV50_IR_TEX_TARG_CASE(SHADOW1D, 1D_SHADOW);
484 NV50_IR_TEX_TARG_CASE(SHADOW2D, 2D_SHADOW);
485 NV50_IR_TEX_TARG_CASE(SHADOWCUBE, CUBE_SHADOW);
486 NV50_IR_TEX_TARG_CASE(SHADOWRECT, RECT_SHADOW);
487 NV50_IR_TEX_TARG_CASE(SHADOW1D_ARRAY, 1D_ARRAY_SHADOW);
488 NV50_IR_TEX_TARG_CASE(SHADOW2D_ARRAY, 2D_ARRAY_SHADOW);
489 NV50_IR_TEX_TARG_CASE(SHADOWCUBE_ARRAY, CUBE_ARRAY_SHADOW);
490 NV50_IR_TEX_TARG_CASE(BUFFER, BUFFER);
491
492 case TGSI_TEXTURE_UNKNOWN:
493 default:
494 assert(!"invalid texture target");
495 return nv50_ir::TEX_TARGET_2D;
496 }
497 }
498
499 static nv50_ir::CacheMode translateCacheMode(uint qualifier)
500 {
501 if (qualifier & TGSI_MEMORY_VOLATILE)
502 return nv50_ir::CACHE_CV;
503 if (qualifier & TGSI_MEMORY_COHERENT)
504 return nv50_ir::CACHE_CG;
505 return nv50_ir::CACHE_CA;
506 }
507
508 static nv50_ir::ImgFormat translateImgFormat(uint format)
509 {
510
511 #define FMT_CASE(a, b) \
512 case PIPE_FORMAT_ ## a: return nv50_ir::FMT_ ## b
513
514 switch (format) {
515 FMT_CASE(NONE, NONE);
516
517 FMT_CASE(R32G32B32A32_FLOAT, RGBA32F);
518 FMT_CASE(R16G16B16A16_FLOAT, RGBA16F);
519 FMT_CASE(R32G32_FLOAT, RG32F);
520 FMT_CASE(R16G16_FLOAT, RG16F);
521 FMT_CASE(R11G11B10_FLOAT, R11G11B10F);
522 FMT_CASE(R32_FLOAT, R32F);
523 FMT_CASE(R16_FLOAT, R16F);
524
525 FMT_CASE(R32G32B32A32_UINT, RGBA32UI);
526 FMT_CASE(R16G16B16A16_UINT, RGBA16UI);
527 FMT_CASE(R10G10B10A2_UINT, RGB10A2UI);
528 FMT_CASE(R8G8B8A8_UINT, RGBA8UI);
529 FMT_CASE(R32G32_UINT, RG32UI);
530 FMT_CASE(R16G16_UINT, RG16UI);
531 FMT_CASE(R8G8_UINT, RG8UI);
532 FMT_CASE(R32_UINT, R32UI);
533 FMT_CASE(R16_UINT, R16UI);
534 FMT_CASE(R8_UINT, R8UI);
535
536 FMT_CASE(R32G32B32A32_SINT, RGBA32I);
537 FMT_CASE(R16G16B16A16_SINT, RGBA16I);
538 FMT_CASE(R8G8B8A8_SINT, RGBA8I);
539 FMT_CASE(R32G32_SINT, RG32I);
540 FMT_CASE(R16G16_SINT, RG16I);
541 FMT_CASE(R8G8_SINT, RG8I);
542 FMT_CASE(R32_SINT, R32I);
543 FMT_CASE(R16_SINT, R16I);
544 FMT_CASE(R8_SINT, R8I);
545
546 FMT_CASE(R16G16B16A16_UNORM, RGBA16);
547 FMT_CASE(R10G10B10A2_UNORM, RGB10A2);
548 FMT_CASE(R8G8B8A8_UNORM, RGBA8);
549 FMT_CASE(R16G16_UNORM, RG16);
550 FMT_CASE(R8G8_UNORM, RG8);
551 FMT_CASE(R16_UNORM, R16);
552 FMT_CASE(R8_UNORM, R8);
553
554 FMT_CASE(R16G16B16A16_SNORM, RGBA16_SNORM);
555 FMT_CASE(R8G8B8A8_SNORM, RGBA8_SNORM);
556 FMT_CASE(R16G16_SNORM, RG16_SNORM);
557 FMT_CASE(R8G8_SNORM, RG8_SNORM);
558 FMT_CASE(R16_SNORM, R16_SNORM);
559 FMT_CASE(R8_SNORM, R8_SNORM);
560
561 FMT_CASE(B8G8R8A8_UNORM, BGRA8);
562 }
563
564 assert(!"Unexpected format");
565 return nv50_ir::FMT_NONE;
566 }
567
568 nv50_ir::DataType Instruction::inferSrcType() const
569 {
570 switch (getOpcode()) {
571 case TGSI_OPCODE_UIF:
572 case TGSI_OPCODE_AND:
573 case TGSI_OPCODE_OR:
574 case TGSI_OPCODE_XOR:
575 case TGSI_OPCODE_NOT:
576 case TGSI_OPCODE_SHL:
577 case TGSI_OPCODE_U2F:
578 case TGSI_OPCODE_U2D:
579 case TGSI_OPCODE_U2I64:
580 case TGSI_OPCODE_UADD:
581 case TGSI_OPCODE_UDIV:
582 case TGSI_OPCODE_UMOD:
583 case TGSI_OPCODE_UMAD:
584 case TGSI_OPCODE_UMUL:
585 case TGSI_OPCODE_UMUL_HI:
586 case TGSI_OPCODE_UMAX:
587 case TGSI_OPCODE_UMIN:
588 case TGSI_OPCODE_USEQ:
589 case TGSI_OPCODE_USGE:
590 case TGSI_OPCODE_USLT:
591 case TGSI_OPCODE_USNE:
592 case TGSI_OPCODE_USHR:
593 case TGSI_OPCODE_ATOMUADD:
594 case TGSI_OPCODE_ATOMXCHG:
595 case TGSI_OPCODE_ATOMCAS:
596 case TGSI_OPCODE_ATOMAND:
597 case TGSI_OPCODE_ATOMOR:
598 case TGSI_OPCODE_ATOMXOR:
599 case TGSI_OPCODE_ATOMUMIN:
600 case TGSI_OPCODE_ATOMUMAX:
601 case TGSI_OPCODE_UBFE:
602 case TGSI_OPCODE_UMSB:
603 case TGSI_OPCODE_UP2H:
604 case TGSI_OPCODE_VOTE_ALL:
605 case TGSI_OPCODE_VOTE_ANY:
606 case TGSI_OPCODE_VOTE_EQ:
607 return nv50_ir::TYPE_U32;
608 case TGSI_OPCODE_I2F:
609 case TGSI_OPCODE_I2D:
610 case TGSI_OPCODE_I2I64:
611 case TGSI_OPCODE_IDIV:
612 case TGSI_OPCODE_IMUL_HI:
613 case TGSI_OPCODE_IMAX:
614 case TGSI_OPCODE_IMIN:
615 case TGSI_OPCODE_IABS:
616 case TGSI_OPCODE_INEG:
617 case TGSI_OPCODE_ISGE:
618 case TGSI_OPCODE_ISHR:
619 case TGSI_OPCODE_ISLT:
620 case TGSI_OPCODE_ISSG:
621 case TGSI_OPCODE_SAD: // not sure about SAD, but no one has a float version
622 case TGSI_OPCODE_MOD:
623 case TGSI_OPCODE_UARL:
624 case TGSI_OPCODE_ATOMIMIN:
625 case TGSI_OPCODE_ATOMIMAX:
626 case TGSI_OPCODE_IBFE:
627 case TGSI_OPCODE_IMSB:
628 return nv50_ir::TYPE_S32;
629 case TGSI_OPCODE_D2F:
630 case TGSI_OPCODE_D2I:
631 case TGSI_OPCODE_D2U:
632 case TGSI_OPCODE_D2I64:
633 case TGSI_OPCODE_D2U64:
634 case TGSI_OPCODE_DABS:
635 case TGSI_OPCODE_DNEG:
636 case TGSI_OPCODE_DADD:
637 case TGSI_OPCODE_DMUL:
638 case TGSI_OPCODE_DDIV:
639 case TGSI_OPCODE_DMAX:
640 case TGSI_OPCODE_DMIN:
641 case TGSI_OPCODE_DSLT:
642 case TGSI_OPCODE_DSGE:
643 case TGSI_OPCODE_DSEQ:
644 case TGSI_OPCODE_DSNE:
645 case TGSI_OPCODE_DRCP:
646 case TGSI_OPCODE_DSQRT:
647 case TGSI_OPCODE_DMAD:
648 case TGSI_OPCODE_DFMA:
649 case TGSI_OPCODE_DFRAC:
650 case TGSI_OPCODE_DRSQ:
651 case TGSI_OPCODE_DTRUNC:
652 case TGSI_OPCODE_DCEIL:
653 case TGSI_OPCODE_DFLR:
654 case TGSI_OPCODE_DROUND:
655 return nv50_ir::TYPE_F64;
656 case TGSI_OPCODE_U64SEQ:
657 case TGSI_OPCODE_U64SNE:
658 case TGSI_OPCODE_U64SLT:
659 case TGSI_OPCODE_U64SGE:
660 case TGSI_OPCODE_U64MIN:
661 case TGSI_OPCODE_U64MAX:
662 case TGSI_OPCODE_U64ADD:
663 case TGSI_OPCODE_U64MUL:
664 case TGSI_OPCODE_U64SHL:
665 case TGSI_OPCODE_U64SHR:
666 case TGSI_OPCODE_U64DIV:
667 case TGSI_OPCODE_U64MOD:
668 case TGSI_OPCODE_U642F:
669 case TGSI_OPCODE_U642D:
670 return nv50_ir::TYPE_U64;
671 case TGSI_OPCODE_I64ABS:
672 case TGSI_OPCODE_I64SSG:
673 case TGSI_OPCODE_I64NEG:
674 case TGSI_OPCODE_I64SLT:
675 case TGSI_OPCODE_I64SGE:
676 case TGSI_OPCODE_I64MIN:
677 case TGSI_OPCODE_I64MAX:
678 case TGSI_OPCODE_I64SHR:
679 case TGSI_OPCODE_I64DIV:
680 case TGSI_OPCODE_I64MOD:
681 case TGSI_OPCODE_I642F:
682 case TGSI_OPCODE_I642D:
683 return nv50_ir::TYPE_S64;
684 default:
685 return nv50_ir::TYPE_F32;
686 }
687 }
688
689 nv50_ir::DataType Instruction::inferDstType() const
690 {
691 switch (getOpcode()) {
692 case TGSI_OPCODE_D2U:
693 case TGSI_OPCODE_F2U: return nv50_ir::TYPE_U32;
694 case TGSI_OPCODE_D2I:
695 case TGSI_OPCODE_F2I: return nv50_ir::TYPE_S32;
696 case TGSI_OPCODE_FSEQ:
697 case TGSI_OPCODE_FSGE:
698 case TGSI_OPCODE_FSLT:
699 case TGSI_OPCODE_FSNE:
700 case TGSI_OPCODE_DSEQ:
701 case TGSI_OPCODE_DSGE:
702 case TGSI_OPCODE_DSLT:
703 case TGSI_OPCODE_DSNE:
704 case TGSI_OPCODE_I64SLT:
705 case TGSI_OPCODE_I64SGE:
706 case TGSI_OPCODE_U64SEQ:
707 case TGSI_OPCODE_U64SNE:
708 case TGSI_OPCODE_U64SLT:
709 case TGSI_OPCODE_U64SGE:
710 case TGSI_OPCODE_PK2H:
711 return nv50_ir::TYPE_U32;
712 case TGSI_OPCODE_I2F:
713 case TGSI_OPCODE_U2F:
714 case TGSI_OPCODE_D2F:
715 case TGSI_OPCODE_I642F:
716 case TGSI_OPCODE_U642F:
717 case TGSI_OPCODE_UP2H:
718 return nv50_ir::TYPE_F32;
719 case TGSI_OPCODE_I2D:
720 case TGSI_OPCODE_U2D:
721 case TGSI_OPCODE_F2D:
722 case TGSI_OPCODE_I642D:
723 case TGSI_OPCODE_U642D:
724 return nv50_ir::TYPE_F64;
725 case TGSI_OPCODE_I2I64:
726 case TGSI_OPCODE_U2I64:
727 case TGSI_OPCODE_F2I64:
728 case TGSI_OPCODE_D2I64:
729 return nv50_ir::TYPE_S64;
730 case TGSI_OPCODE_F2U64:
731 case TGSI_OPCODE_D2U64:
732 return nv50_ir::TYPE_U64;
733 default:
734 return inferSrcType();
735 }
736 }
737
738 nv50_ir::CondCode Instruction::getSetCond() const
739 {
740 using namespace nv50_ir;
741
742 switch (getOpcode()) {
743 case TGSI_OPCODE_SLT:
744 case TGSI_OPCODE_ISLT:
745 case TGSI_OPCODE_USLT:
746 case TGSI_OPCODE_FSLT:
747 case TGSI_OPCODE_DSLT:
748 case TGSI_OPCODE_I64SLT:
749 case TGSI_OPCODE_U64SLT:
750 return CC_LT;
751 case TGSI_OPCODE_SLE:
752 return CC_LE;
753 case TGSI_OPCODE_SGE:
754 case TGSI_OPCODE_ISGE:
755 case TGSI_OPCODE_USGE:
756 case TGSI_OPCODE_FSGE:
757 case TGSI_OPCODE_DSGE:
758 case TGSI_OPCODE_I64SGE:
759 case TGSI_OPCODE_U64SGE:
760 return CC_GE;
761 case TGSI_OPCODE_SGT:
762 return CC_GT;
763 case TGSI_OPCODE_SEQ:
764 case TGSI_OPCODE_USEQ:
765 case TGSI_OPCODE_FSEQ:
766 case TGSI_OPCODE_DSEQ:
767 case TGSI_OPCODE_U64SEQ:
768 return CC_EQ;
769 case TGSI_OPCODE_SNE:
770 case TGSI_OPCODE_FSNE:
771 case TGSI_OPCODE_DSNE:
772 case TGSI_OPCODE_U64SNE:
773 return CC_NEU;
774 case TGSI_OPCODE_USNE:
775 return CC_NE;
776 default:
777 return CC_ALWAYS;
778 }
779 }
780
781 #define NV50_IR_OPCODE_CASE(a, b) case TGSI_OPCODE_##a: return nv50_ir::OP_##b
782
783 static nv50_ir::operation translateOpcode(uint opcode)
784 {
785 switch (opcode) {
786 NV50_IR_OPCODE_CASE(ARL, SHL);
787 NV50_IR_OPCODE_CASE(MOV, MOV);
788
789 NV50_IR_OPCODE_CASE(RCP, RCP);
790 NV50_IR_OPCODE_CASE(RSQ, RSQ);
791 NV50_IR_OPCODE_CASE(SQRT, SQRT);
792
793 NV50_IR_OPCODE_CASE(MUL, MUL);
794 NV50_IR_OPCODE_CASE(ADD, ADD);
795
796 NV50_IR_OPCODE_CASE(MIN, MIN);
797 NV50_IR_OPCODE_CASE(MAX, MAX);
798 NV50_IR_OPCODE_CASE(SLT, SET);
799 NV50_IR_OPCODE_CASE(SGE, SET);
800 NV50_IR_OPCODE_CASE(MAD, MAD);
801 NV50_IR_OPCODE_CASE(FMA, FMA);
802
803 NV50_IR_OPCODE_CASE(FLR, FLOOR);
804 NV50_IR_OPCODE_CASE(ROUND, CVT);
805 NV50_IR_OPCODE_CASE(EX2, EX2);
806 NV50_IR_OPCODE_CASE(LG2, LG2);
807 NV50_IR_OPCODE_CASE(POW, POW);
808
809 NV50_IR_OPCODE_CASE(COS, COS);
810 NV50_IR_OPCODE_CASE(DDX, DFDX);
811 NV50_IR_OPCODE_CASE(DDX_FINE, DFDX);
812 NV50_IR_OPCODE_CASE(DDY, DFDY);
813 NV50_IR_OPCODE_CASE(DDY_FINE, DFDY);
814 NV50_IR_OPCODE_CASE(KILL, DISCARD);
815
816 NV50_IR_OPCODE_CASE(SEQ, SET);
817 NV50_IR_OPCODE_CASE(SGT, SET);
818 NV50_IR_OPCODE_CASE(SIN, SIN);
819 NV50_IR_OPCODE_CASE(SLE, SET);
820 NV50_IR_OPCODE_CASE(SNE, SET);
821 NV50_IR_OPCODE_CASE(TEX, TEX);
822 NV50_IR_OPCODE_CASE(TXD, TXD);
823 NV50_IR_OPCODE_CASE(TXP, TEX);
824
825 NV50_IR_OPCODE_CASE(CAL, CALL);
826 NV50_IR_OPCODE_CASE(RET, RET);
827 NV50_IR_OPCODE_CASE(CMP, SLCT);
828
829 NV50_IR_OPCODE_CASE(TXB, TXB);
830
831 NV50_IR_OPCODE_CASE(DIV, DIV);
832
833 NV50_IR_OPCODE_CASE(TXL, TXL);
834 NV50_IR_OPCODE_CASE(TEX_LZ, TXL);
835
836 NV50_IR_OPCODE_CASE(CEIL, CEIL);
837 NV50_IR_OPCODE_CASE(I2F, CVT);
838 NV50_IR_OPCODE_CASE(NOT, NOT);
839 NV50_IR_OPCODE_CASE(TRUNC, TRUNC);
840 NV50_IR_OPCODE_CASE(SHL, SHL);
841
842 NV50_IR_OPCODE_CASE(AND, AND);
843 NV50_IR_OPCODE_CASE(OR, OR);
844 NV50_IR_OPCODE_CASE(MOD, MOD);
845 NV50_IR_OPCODE_CASE(XOR, XOR);
846 NV50_IR_OPCODE_CASE(SAD, SAD);
847 NV50_IR_OPCODE_CASE(TXF, TXF);
848 NV50_IR_OPCODE_CASE(TXF_LZ, TXF);
849 NV50_IR_OPCODE_CASE(TXQ, TXQ);
850 NV50_IR_OPCODE_CASE(TXQS, TXQ);
851 NV50_IR_OPCODE_CASE(TG4, TXG);
852 NV50_IR_OPCODE_CASE(LODQ, TXLQ);
853
854 NV50_IR_OPCODE_CASE(EMIT, EMIT);
855 NV50_IR_OPCODE_CASE(ENDPRIM, RESTART);
856
857 NV50_IR_OPCODE_CASE(KILL_IF, DISCARD);
858
859 NV50_IR_OPCODE_CASE(F2I, CVT);
860 NV50_IR_OPCODE_CASE(FSEQ, SET);
861 NV50_IR_OPCODE_CASE(FSGE, SET);
862 NV50_IR_OPCODE_CASE(FSLT, SET);
863 NV50_IR_OPCODE_CASE(FSNE, SET);
864 NV50_IR_OPCODE_CASE(IDIV, DIV);
865 NV50_IR_OPCODE_CASE(IMAX, MAX);
866 NV50_IR_OPCODE_CASE(IMIN, MIN);
867 NV50_IR_OPCODE_CASE(IABS, ABS);
868 NV50_IR_OPCODE_CASE(INEG, NEG);
869 NV50_IR_OPCODE_CASE(ISGE, SET);
870 NV50_IR_OPCODE_CASE(ISHR, SHR);
871 NV50_IR_OPCODE_CASE(ISLT, SET);
872 NV50_IR_OPCODE_CASE(F2U, CVT);
873 NV50_IR_OPCODE_CASE(U2F, CVT);
874 NV50_IR_OPCODE_CASE(UADD, ADD);
875 NV50_IR_OPCODE_CASE(UDIV, DIV);
876 NV50_IR_OPCODE_CASE(UMAD, MAD);
877 NV50_IR_OPCODE_CASE(UMAX, MAX);
878 NV50_IR_OPCODE_CASE(UMIN, MIN);
879 NV50_IR_OPCODE_CASE(UMOD, MOD);
880 NV50_IR_OPCODE_CASE(UMUL, MUL);
881 NV50_IR_OPCODE_CASE(USEQ, SET);
882 NV50_IR_OPCODE_CASE(USGE, SET);
883 NV50_IR_OPCODE_CASE(USHR, SHR);
884 NV50_IR_OPCODE_CASE(USLT, SET);
885 NV50_IR_OPCODE_CASE(USNE, SET);
886
887 NV50_IR_OPCODE_CASE(DABS, ABS);
888 NV50_IR_OPCODE_CASE(DNEG, NEG);
889 NV50_IR_OPCODE_CASE(DADD, ADD);
890 NV50_IR_OPCODE_CASE(DMUL, MUL);
891 NV50_IR_OPCODE_CASE(DDIV, DIV);
892 NV50_IR_OPCODE_CASE(DMAX, MAX);
893 NV50_IR_OPCODE_CASE(DMIN, MIN);
894 NV50_IR_OPCODE_CASE(DSLT, SET);
895 NV50_IR_OPCODE_CASE(DSGE, SET);
896 NV50_IR_OPCODE_CASE(DSEQ, SET);
897 NV50_IR_OPCODE_CASE(DSNE, SET);
898 NV50_IR_OPCODE_CASE(DRCP, RCP);
899 NV50_IR_OPCODE_CASE(DSQRT, SQRT);
900 NV50_IR_OPCODE_CASE(DMAD, MAD);
901 NV50_IR_OPCODE_CASE(DFMA, FMA);
902 NV50_IR_OPCODE_CASE(D2I, CVT);
903 NV50_IR_OPCODE_CASE(D2U, CVT);
904 NV50_IR_OPCODE_CASE(I2D, CVT);
905 NV50_IR_OPCODE_CASE(U2D, CVT);
906 NV50_IR_OPCODE_CASE(DRSQ, RSQ);
907 NV50_IR_OPCODE_CASE(DTRUNC, TRUNC);
908 NV50_IR_OPCODE_CASE(DCEIL, CEIL);
909 NV50_IR_OPCODE_CASE(DFLR, FLOOR);
910 NV50_IR_OPCODE_CASE(DROUND, CVT);
911
912 NV50_IR_OPCODE_CASE(U64SEQ, SET);
913 NV50_IR_OPCODE_CASE(U64SNE, SET);
914 NV50_IR_OPCODE_CASE(U64SLT, SET);
915 NV50_IR_OPCODE_CASE(U64SGE, SET);
916 NV50_IR_OPCODE_CASE(I64SLT, SET);
917 NV50_IR_OPCODE_CASE(I64SGE, SET);
918 NV50_IR_OPCODE_CASE(I2I64, CVT);
919 NV50_IR_OPCODE_CASE(U2I64, CVT);
920 NV50_IR_OPCODE_CASE(F2I64, CVT);
921 NV50_IR_OPCODE_CASE(F2U64, CVT);
922 NV50_IR_OPCODE_CASE(D2I64, CVT);
923 NV50_IR_OPCODE_CASE(D2U64, CVT);
924 NV50_IR_OPCODE_CASE(I642F, CVT);
925 NV50_IR_OPCODE_CASE(U642F, CVT);
926 NV50_IR_OPCODE_CASE(I642D, CVT);
927 NV50_IR_OPCODE_CASE(U642D, CVT);
928
929 NV50_IR_OPCODE_CASE(I64MIN, MIN);
930 NV50_IR_OPCODE_CASE(U64MIN, MIN);
931 NV50_IR_OPCODE_CASE(I64MAX, MAX);
932 NV50_IR_OPCODE_CASE(U64MAX, MAX);
933 NV50_IR_OPCODE_CASE(I64ABS, ABS);
934 NV50_IR_OPCODE_CASE(I64NEG, NEG);
935 NV50_IR_OPCODE_CASE(U64ADD, ADD);
936 NV50_IR_OPCODE_CASE(U64MUL, MUL);
937 NV50_IR_OPCODE_CASE(U64SHL, SHL);
938 NV50_IR_OPCODE_CASE(I64SHR, SHR);
939 NV50_IR_OPCODE_CASE(U64SHR, SHR);
940
941 NV50_IR_OPCODE_CASE(IMUL_HI, MUL);
942 NV50_IR_OPCODE_CASE(UMUL_HI, MUL);
943
944 NV50_IR_OPCODE_CASE(SAMPLE, TEX);
945 NV50_IR_OPCODE_CASE(SAMPLE_B, TXB);
946 NV50_IR_OPCODE_CASE(SAMPLE_C, TEX);
947 NV50_IR_OPCODE_CASE(SAMPLE_C_LZ, TEX);
948 NV50_IR_OPCODE_CASE(SAMPLE_D, TXD);
949 NV50_IR_OPCODE_CASE(SAMPLE_L, TXL);
950 NV50_IR_OPCODE_CASE(SAMPLE_I, TXF);
951 NV50_IR_OPCODE_CASE(SAMPLE_I_MS, TXF);
952 NV50_IR_OPCODE_CASE(GATHER4, TXG);
953 NV50_IR_OPCODE_CASE(SVIEWINFO, TXQ);
954
955 NV50_IR_OPCODE_CASE(ATOMUADD, ATOM);
956 NV50_IR_OPCODE_CASE(ATOMXCHG, ATOM);
957 NV50_IR_OPCODE_CASE(ATOMCAS, ATOM);
958 NV50_IR_OPCODE_CASE(ATOMAND, ATOM);
959 NV50_IR_OPCODE_CASE(ATOMOR, ATOM);
960 NV50_IR_OPCODE_CASE(ATOMXOR, ATOM);
961 NV50_IR_OPCODE_CASE(ATOMUMIN, ATOM);
962 NV50_IR_OPCODE_CASE(ATOMUMAX, ATOM);
963 NV50_IR_OPCODE_CASE(ATOMIMIN, ATOM);
964 NV50_IR_OPCODE_CASE(ATOMIMAX, ATOM);
965
966 NV50_IR_OPCODE_CASE(TEX2, TEX);
967 NV50_IR_OPCODE_CASE(TXB2, TXB);
968 NV50_IR_OPCODE_CASE(TXL2, TXL);
969
970 NV50_IR_OPCODE_CASE(IBFE, EXTBF);
971 NV50_IR_OPCODE_CASE(UBFE, EXTBF);
972 NV50_IR_OPCODE_CASE(BFI, INSBF);
973 NV50_IR_OPCODE_CASE(BREV, EXTBF);
974 NV50_IR_OPCODE_CASE(POPC, POPCNT);
975 NV50_IR_OPCODE_CASE(LSB, BFIND);
976 NV50_IR_OPCODE_CASE(IMSB, BFIND);
977 NV50_IR_OPCODE_CASE(UMSB, BFIND);
978
979 NV50_IR_OPCODE_CASE(VOTE_ALL, VOTE);
980 NV50_IR_OPCODE_CASE(VOTE_ANY, VOTE);
981 NV50_IR_OPCODE_CASE(VOTE_EQ, VOTE);
982
983 NV50_IR_OPCODE_CASE(BALLOT, VOTE);
984 NV50_IR_OPCODE_CASE(READ_INVOC, SHFL);
985 NV50_IR_OPCODE_CASE(READ_FIRST, SHFL);
986
987 NV50_IR_OPCODE_CASE(END, EXIT);
988
989 default:
990 return nv50_ir::OP_NOP;
991 }
992 }
993
994 static uint16_t opcodeToSubOp(uint opcode)
995 {
996 switch (opcode) {
997 case TGSI_OPCODE_LFENCE: return NV50_IR_SUBOP_MEMBAR(L, GL);
998 case TGSI_OPCODE_SFENCE: return NV50_IR_SUBOP_MEMBAR(S, GL);
999 case TGSI_OPCODE_MFENCE: return NV50_IR_SUBOP_MEMBAR(M, GL);
1000 case TGSI_OPCODE_ATOMUADD: return NV50_IR_SUBOP_ATOM_ADD;
1001 case TGSI_OPCODE_ATOMXCHG: return NV50_IR_SUBOP_ATOM_EXCH;
1002 case TGSI_OPCODE_ATOMCAS: return NV50_IR_SUBOP_ATOM_CAS;
1003 case TGSI_OPCODE_ATOMAND: return NV50_IR_SUBOP_ATOM_AND;
1004 case TGSI_OPCODE_ATOMOR: return NV50_IR_SUBOP_ATOM_OR;
1005 case TGSI_OPCODE_ATOMXOR: return NV50_IR_SUBOP_ATOM_XOR;
1006 case TGSI_OPCODE_ATOMUMIN: return NV50_IR_SUBOP_ATOM_MIN;
1007 case TGSI_OPCODE_ATOMIMIN: return NV50_IR_SUBOP_ATOM_MIN;
1008 case TGSI_OPCODE_ATOMUMAX: return NV50_IR_SUBOP_ATOM_MAX;
1009 case TGSI_OPCODE_ATOMIMAX: return NV50_IR_SUBOP_ATOM_MAX;
1010 case TGSI_OPCODE_IMUL_HI:
1011 case TGSI_OPCODE_UMUL_HI:
1012 return NV50_IR_SUBOP_MUL_HIGH;
1013 case TGSI_OPCODE_VOTE_ALL: return NV50_IR_SUBOP_VOTE_ALL;
1014 case TGSI_OPCODE_VOTE_ANY: return NV50_IR_SUBOP_VOTE_ANY;
1015 case TGSI_OPCODE_VOTE_EQ: return NV50_IR_SUBOP_VOTE_UNI;
1016 default:
1017 return 0;
1018 }
1019 }
1020
1021 bool Instruction::checkDstSrcAliasing() const
1022 {
1023 if (insn->Dst[0].Register.Indirect) // no danger if indirect, using memory
1024 return false;
1025
1026 for (int s = 0; s < TGSI_FULL_MAX_SRC_REGISTERS; ++s) {
1027 if (insn->Src[s].Register.File == TGSI_FILE_NULL)
1028 break;
1029 if (insn->Src[s].Register.File == insn->Dst[0].Register.File &&
1030 insn->Src[s].Register.Index == insn->Dst[0].Register.Index)
1031 return true;
1032 }
1033 return false;
1034 }
1035
1036 class Source
1037 {
1038 public:
1039 Source(struct nv50_ir_prog_info *);
1040 ~Source();
1041
1042 public:
1043 bool scanSource();
1044 unsigned fileSize(unsigned file) const { return scan.file_max[file] + 1; }
1045
1046 public:
1047 struct tgsi_shader_info scan;
1048 struct tgsi_full_instruction *insns;
1049 const struct tgsi_token *tokens;
1050 struct nv50_ir_prog_info *info;
1051
1052 nv50_ir::DynArray tempArrays;
1053 nv50_ir::DynArray immdArrays;
1054
1055 typedef nv50_ir::BuildUtil::Location Location;
1056 // these registers are per-subroutine, cannot be used for parameter passing
1057 std::set<Location> locals;
1058
1059 std::set<int> indirectTempArrays;
1060 std::map<int, int> indirectTempOffsets;
1061 std::map<int, std::pair<int, int> > tempArrayInfo;
1062 std::vector<int> tempArrayId;
1063
1064 int clipVertexOutput;
1065
1066 struct TextureView {
1067 uint8_t target; // TGSI_TEXTURE_*
1068 };
1069 std::vector<TextureView> textureViews;
1070
1071 /*
1072 struct Resource {
1073 uint8_t target; // TGSI_TEXTURE_*
1074 bool raw;
1075 uint8_t slot; // $surface index
1076 };
1077 std::vector<Resource> resources;
1078 */
1079
1080 struct Image {
1081 uint8_t target; // TGSI_TEXTURE_*
1082 bool raw;
1083 uint8_t slot;
1084 uint16_t format; // PIPE_FORMAT_*
1085 };
1086 std::vector<Image> images;
1087
1088 struct MemoryFile {
1089 uint8_t mem_type; // TGSI_MEMORY_TYPE_*
1090 };
1091 std::vector<MemoryFile> memoryFiles;
1092
1093 private:
1094 int inferSysValDirection(unsigned sn) const;
1095 bool scanDeclaration(const struct tgsi_full_declaration *);
1096 bool scanInstruction(const struct tgsi_full_instruction *);
1097 void scanInstructionSrc(const Instruction& insn,
1098 const Instruction::SrcRegister& src,
1099 unsigned mask);
1100 void scanProperty(const struct tgsi_full_property *);
1101 void scanImmediate(const struct tgsi_full_immediate *);
1102
1103 inline bool isEdgeFlagPassthrough(const Instruction&) const;
1104 };
1105
1106 Source::Source(struct nv50_ir_prog_info *prog) : info(prog)
1107 {
1108 tokens = (const struct tgsi_token *)info->bin.source;
1109
1110 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
1111 tgsi_dump(tokens, 0);
1112 }
1113
1114 Source::~Source()
1115 {
1116 if (insns)
1117 FREE(insns);
1118
1119 if (info->immd.data)
1120 FREE(info->immd.data);
1121 if (info->immd.type)
1122 FREE(info->immd.type);
1123 }
1124
1125 bool Source::scanSource()
1126 {
1127 unsigned insnCount = 0;
1128 struct tgsi_parse_context parse;
1129
1130 tgsi_scan_shader(tokens, &scan);
1131
1132 insns = (struct tgsi_full_instruction *)MALLOC(scan.num_instructions *
1133 sizeof(insns[0]));
1134 if (!insns)
1135 return false;
1136
1137 clipVertexOutput = -1;
1138
1139 textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);
1140 //resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
1141 images.resize(scan.file_max[TGSI_FILE_IMAGE] + 1);
1142 tempArrayId.resize(scan.file_max[TGSI_FILE_TEMPORARY] + 1);
1143 memoryFiles.resize(scan.file_max[TGSI_FILE_MEMORY] + 1);
1144
1145 info->immd.bufSize = 0;
1146
1147 info->numInputs = scan.file_max[TGSI_FILE_INPUT] + 1;
1148 info->numOutputs = scan.file_max[TGSI_FILE_OUTPUT] + 1;
1149 info->numSysVals = scan.file_max[TGSI_FILE_SYSTEM_VALUE] + 1;
1150
1151 if (info->type == PIPE_SHADER_FRAGMENT) {
1152 info->prop.fp.writesDepth = scan.writes_z;
1153 info->prop.fp.usesDiscard = scan.uses_kill || info->io.alphaRefBase;
1154 } else
1155 if (info->type == PIPE_SHADER_GEOMETRY) {
1156 info->prop.gp.instanceCount = 1; // default value
1157 }
1158
1159 info->io.viewportId = -1;
1160
1161 info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16);
1162 info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte));
1163
1164 tgsi_parse_init(&parse, tokens);
1165 while (!tgsi_parse_end_of_tokens(&parse)) {
1166 tgsi_parse_token(&parse);
1167
1168 switch (parse.FullToken.Token.Type) {
1169 case TGSI_TOKEN_TYPE_IMMEDIATE:
1170 scanImmediate(&parse.FullToken.FullImmediate);
1171 break;
1172 case TGSI_TOKEN_TYPE_DECLARATION:
1173 scanDeclaration(&parse.FullToken.FullDeclaration);
1174 break;
1175 case TGSI_TOKEN_TYPE_INSTRUCTION:
1176 insns[insnCount++] = parse.FullToken.FullInstruction;
1177 scanInstruction(&parse.FullToken.FullInstruction);
1178 break;
1179 case TGSI_TOKEN_TYPE_PROPERTY:
1180 scanProperty(&parse.FullToken.FullProperty);
1181 break;
1182 default:
1183 INFO("unknown TGSI token type: %d\n", parse.FullToken.Token.Type);
1184 break;
1185 }
1186 }
1187 tgsi_parse_free(&parse);
1188
1189 if (indirectTempArrays.size()) {
1190 int tempBase = 0;
1191 for (std::set<int>::const_iterator it = indirectTempArrays.begin();
1192 it != indirectTempArrays.end(); ++it) {
1193 std::pair<int, int>& info = tempArrayInfo[*it];
1194 indirectTempOffsets.insert(std::make_pair(*it, tempBase - info.first));
1195 tempBase += info.second;
1196 }
1197 info->bin.tlsSpace += tempBase * 16;
1198 }
1199
1200 if (info->io.genUserClip > 0) {
1201 info->io.clipDistances = info->io.genUserClip;
1202
1203 const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1204
1205 for (unsigned int n = 0; n < nOut; ++n) {
1206 unsigned int i = info->numOutputs++;
1207 info->out[i].id = i;
1208 info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1209 info->out[i].si = n;
1210 info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1211 }
1212 }
1213
1214 return info->assignSlots(info) == 0;
1215 }
1216
1217 void Source::scanProperty(const struct tgsi_full_property *prop)
1218 {
1219 switch (prop->Property.PropertyName) {
1220 case TGSI_PROPERTY_GS_OUTPUT_PRIM:
1221 info->prop.gp.outputPrim = prop->u[0].Data;
1222 break;
1223 case TGSI_PROPERTY_GS_INPUT_PRIM:
1224 info->prop.gp.inputPrim = prop->u[0].Data;
1225 break;
1226 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
1227 info->prop.gp.maxVertices = prop->u[0].Data;
1228 break;
1229 case TGSI_PROPERTY_GS_INVOCATIONS:
1230 info->prop.gp.instanceCount = prop->u[0].Data;
1231 break;
1232 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
1233 info->prop.fp.separateFragData = true;
1234 break;
1235 case TGSI_PROPERTY_FS_COORD_ORIGIN:
1236 case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
1237 case TGSI_PROPERTY_FS_DEPTH_LAYOUT:
1238 // we don't care
1239 break;
1240 case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
1241 info->io.genUserClip = -1;
1242 break;
1243 case TGSI_PROPERTY_TCS_VERTICES_OUT:
1244 info->prop.tp.outputPatchSize = prop->u[0].Data;
1245 break;
1246 case TGSI_PROPERTY_TES_PRIM_MODE:
1247 info->prop.tp.domain = prop->u[0].Data;
1248 break;
1249 case TGSI_PROPERTY_TES_SPACING:
1250 info->prop.tp.partitioning = prop->u[0].Data;
1251 break;
1252 case TGSI_PROPERTY_TES_VERTEX_ORDER_CW:
1253 info->prop.tp.winding = prop->u[0].Data;
1254 break;
1255 case TGSI_PROPERTY_TES_POINT_MODE:
1256 if (prop->u[0].Data)
1257 info->prop.tp.outputPrim = PIPE_PRIM_POINTS;
1258 else
1259 info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */
1260 break;
1261 case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH:
1262 info->prop.cp.numThreads[0] = prop->u[0].Data;
1263 break;
1264 case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT:
1265 info->prop.cp.numThreads[1] = prop->u[0].Data;
1266 break;
1267 case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH:
1268 info->prop.cp.numThreads[2] = prop->u[0].Data;
1269 break;
1270 case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
1271 info->io.clipDistances = prop->u[0].Data;
1272 break;
1273 case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
1274 info->io.cullDistances = prop->u[0].Data;
1275 break;
1276 case TGSI_PROPERTY_NEXT_SHADER:
1277 /* Do not need to know the next shader stage. */
1278 break;
1279 case TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL:
1280 info->prop.fp.earlyFragTests = prop->u[0].Data;
1281 break;
1282 case TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE:
1283 info->prop.fp.postDepthCoverage = prop->u[0].Data;
1284 break;
1285 case TGSI_PROPERTY_MUL_ZERO_WINS:
1286 info->io.mul_zero_wins = prop->u[0].Data;
1287 break;
1288 default:
1289 INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);
1290 break;
1291 }
1292 }
1293
1294 void Source::scanImmediate(const struct tgsi_full_immediate *imm)
1295 {
1296 const unsigned n = info->immd.count++;
1297
1298 assert(n < scan.immediate_count);
1299
1300 for (int c = 0; c < 4; ++c)
1301 info->immd.data[n * 4 + c] = imm->u[c].Uint;
1302
1303 info->immd.type[n] = imm->Immediate.DataType;
1304 }
1305
1306 int Source::inferSysValDirection(unsigned sn) const
1307 {
1308 switch (sn) {
1309 case TGSI_SEMANTIC_INSTANCEID:
1310 case TGSI_SEMANTIC_VERTEXID:
1311 return 1;
1312 case TGSI_SEMANTIC_LAYER:
1313 #if 0
1314 case TGSI_SEMANTIC_VIEWPORTINDEX:
1315 return 0;
1316 #endif
1317 case TGSI_SEMANTIC_PRIMID:
1318 return (info->type == PIPE_SHADER_FRAGMENT) ? 1 : 0;
1319 default:
1320 return 0;
1321 }
1322 }
1323
1324 bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
1325 {
1326 unsigned i, c;
1327 unsigned sn = TGSI_SEMANTIC_GENERIC;
1328 unsigned si = 0;
1329 const unsigned first = decl->Range.First, last = decl->Range.Last;
1330 const int arrayId = decl->Array.ArrayID;
1331
1332 if (decl->Declaration.Semantic) {
1333 sn = decl->Semantic.Name;
1334 si = decl->Semantic.Index;
1335 }
1336
1337 if (decl->Declaration.Local || decl->Declaration.File == TGSI_FILE_ADDRESS) {
1338 for (i = first; i <= last; ++i) {
1339 for (c = 0; c < 4; ++c) {
1340 locals.insert(
1341 Location(decl->Declaration.File, decl->Dim.Index2D, i, c));
1342 }
1343 }
1344 }
1345
1346 switch (decl->Declaration.File) {
1347 case TGSI_FILE_INPUT:
1348 if (info->type == PIPE_SHADER_VERTEX) {
1349 // all vertex attributes are equal
1350 for (i = first; i <= last; ++i) {
1351 info->in[i].sn = TGSI_SEMANTIC_GENERIC;
1352 info->in[i].si = i;
1353 }
1354 } else {
1355 for (i = first; i <= last; ++i, ++si) {
1356 info->in[i].id = i;
1357 info->in[i].sn = sn;
1358 info->in[i].si = si;
1359 if (info->type == PIPE_SHADER_FRAGMENT) {
1360 // translate interpolation mode
1361 switch (decl->Interp.Interpolate) {
1362 case TGSI_INTERPOLATE_CONSTANT:
1363 info->in[i].flat = 1;
1364 break;
1365 case TGSI_INTERPOLATE_COLOR:
1366 info->in[i].sc = 1;
1367 break;
1368 case TGSI_INTERPOLATE_LINEAR:
1369 info->in[i].linear = 1;
1370 break;
1371 default:
1372 break;
1373 }
1374 if (decl->Interp.Location)
1375 info->in[i].centroid = 1;
1376 }
1377
1378 if (sn == TGSI_SEMANTIC_PATCH)
1379 info->in[i].patch = 1;
1380 if (sn == TGSI_SEMANTIC_PATCH)
1381 info->numPatchConstants = MAX2(info->numPatchConstants, si + 1);
1382 }
1383 }
1384 break;
1385 case TGSI_FILE_OUTPUT:
1386 for (i = first; i <= last; ++i, ++si) {
1387 switch (sn) {
1388 case TGSI_SEMANTIC_POSITION:
1389 if (info->type == PIPE_SHADER_FRAGMENT)
1390 info->io.fragDepth = i;
1391 else
1392 if (clipVertexOutput < 0)
1393 clipVertexOutput = i;
1394 break;
1395 case TGSI_SEMANTIC_COLOR:
1396 if (info->type == PIPE_SHADER_FRAGMENT)
1397 info->prop.fp.numColourResults++;
1398 break;
1399 case TGSI_SEMANTIC_EDGEFLAG:
1400 info->io.edgeFlagOut = i;
1401 break;
1402 case TGSI_SEMANTIC_CLIPVERTEX:
1403 clipVertexOutput = i;
1404 break;
1405 case TGSI_SEMANTIC_CLIPDIST:
1406 info->io.genUserClip = -1;
1407 break;
1408 case TGSI_SEMANTIC_SAMPLEMASK:
1409 info->io.sampleMask = i;
1410 break;
1411 case TGSI_SEMANTIC_VIEWPORT_INDEX:
1412 info->io.viewportId = i;
1413 break;
1414 case TGSI_SEMANTIC_PATCH:
1415 info->numPatchConstants = MAX2(info->numPatchConstants, si + 1);
1416 /* fallthrough */
1417 case TGSI_SEMANTIC_TESSOUTER:
1418 case TGSI_SEMANTIC_TESSINNER:
1419 info->out[i].patch = 1;
1420 break;
1421 default:
1422 break;
1423 }
1424 info->out[i].id = i;
1425 info->out[i].sn = sn;
1426 info->out[i].si = si;
1427 }
1428 break;
1429 case TGSI_FILE_SYSTEM_VALUE:
1430 switch (sn) {
1431 case TGSI_SEMANTIC_INSTANCEID:
1432 info->io.instanceId = first;
1433 break;
1434 case TGSI_SEMANTIC_VERTEXID:
1435 info->io.vertexId = first;
1436 break;
1437 case TGSI_SEMANTIC_BASEVERTEX:
1438 case TGSI_SEMANTIC_BASEINSTANCE:
1439 case TGSI_SEMANTIC_DRAWID:
1440 info->prop.vp.usesDrawParameters = true;
1441 break;
1442 case TGSI_SEMANTIC_SAMPLEID:
1443 case TGSI_SEMANTIC_SAMPLEPOS:
1444 info->prop.fp.persampleInvocation = true;
1445 break;
1446 case TGSI_SEMANTIC_SAMPLEMASK:
1447 info->prop.fp.usesSampleMaskIn = true;
1448 break;
1449 default:
1450 break;
1451 }
1452 for (i = first; i <= last; ++i, ++si) {
1453 info->sv[i].sn = sn;
1454 info->sv[i].si = si;
1455 info->sv[i].input = inferSysValDirection(sn);
1456
1457 switch (sn) {
1458 case TGSI_SEMANTIC_TESSOUTER:
1459 case TGSI_SEMANTIC_TESSINNER:
1460 info->sv[i].patch = 1;
1461 break;
1462 }
1463 }
1464 break;
1465 /*
1466 case TGSI_FILE_RESOURCE:
1467 for (i = first; i <= last; ++i) {
1468 resources[i].target = decl->Resource.Resource;
1469 resources[i].raw = decl->Resource.Raw;
1470 resources[i].slot = i;
1471 }
1472 break;
1473 */
1474 case TGSI_FILE_IMAGE:
1475 for (i = first; i <= last; ++i) {
1476 images[i].target = decl->Image.Resource;
1477 images[i].raw = decl->Image.Raw;
1478 images[i].format = decl->Image.Format;
1479 images[i].slot = i;
1480 }
1481 break;
1482 case TGSI_FILE_SAMPLER_VIEW:
1483 for (i = first; i <= last; ++i)
1484 textureViews[i].target = decl->SamplerView.Resource;
1485 break;
1486 case TGSI_FILE_MEMORY:
1487 for (i = first; i <= last; ++i)
1488 memoryFiles[i].mem_type = decl->Declaration.MemType;
1489 break;
1490 case TGSI_FILE_NULL:
1491 case TGSI_FILE_TEMPORARY:
1492 for (i = first; i <= last; ++i)
1493 tempArrayId[i] = arrayId;
1494 if (arrayId)
1495 tempArrayInfo.insert(std::make_pair(arrayId, std::make_pair(
1496 first, last - first + 1)));
1497 break;
1498 case TGSI_FILE_ADDRESS:
1499 case TGSI_FILE_CONSTANT:
1500 case TGSI_FILE_IMMEDIATE:
1501 case TGSI_FILE_SAMPLER:
1502 case TGSI_FILE_BUFFER:
1503 break;
1504 default:
1505 ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
1506 return false;
1507 }
1508 return true;
1509 }
1510
1511 inline bool Source::isEdgeFlagPassthrough(const Instruction& insn) const
1512 {
1513 return insn.getOpcode() == TGSI_OPCODE_MOV &&
1514 insn.getDst(0).getIndex(0) == info->io.edgeFlagOut &&
1515 insn.getSrc(0).getFile() == TGSI_FILE_INPUT;
1516 }
1517
1518 void Source::scanInstructionSrc(const Instruction& insn,
1519 const Instruction::SrcRegister& src,
1520 unsigned mask)
1521 {
1522 if (src.getFile() == TGSI_FILE_TEMPORARY) {
1523 if (src.isIndirect(0))
1524 indirectTempArrays.insert(src.getArrayId());
1525 } else
1526 if (src.getFile() == TGSI_FILE_BUFFER ||
1527 src.getFile() == TGSI_FILE_IMAGE ||
1528 (src.getFile() == TGSI_FILE_MEMORY &&
1529 memoryFiles[src.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) {
1530 info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
1531 0x1 : 0x2;
1532 } else
1533 if (src.getFile() == TGSI_FILE_OUTPUT) {
1534 if (src.isIndirect(0)) {
1535 // We don't know which one is accessed, just mark everything for
1536 // reading. This is an extremely unlikely occurrence.
1537 for (unsigned i = 0; i < info->numOutputs; ++i)
1538 info->out[i].oread = 1;
1539 } else {
1540 info->out[src.getIndex(0)].oread = 1;
1541 }
1542 }
1543 if (src.getFile() != TGSI_FILE_INPUT)
1544 return;
1545
1546 if (src.isIndirect(0)) {
1547 for (unsigned i = 0; i < info->numInputs; ++i)
1548 info->in[i].mask = 0xf;
1549 } else {
1550 const int i = src.getIndex(0);
1551 for (unsigned c = 0; c < 4; ++c) {
1552 if (!(mask & (1 << c)))
1553 continue;
1554 int k = src.getSwizzle(c);
1555 if (k <= TGSI_SWIZZLE_W)
1556 info->in[i].mask |= 1 << k;
1557 }
1558 switch (info->in[i].sn) {
1559 case TGSI_SEMANTIC_PSIZE:
1560 case TGSI_SEMANTIC_PRIMID:
1561 case TGSI_SEMANTIC_FOG:
1562 info->in[i].mask &= 0x1;
1563 break;
1564 case TGSI_SEMANTIC_PCOORD:
1565 info->in[i].mask &= 0x3;
1566 break;
1567 default:
1568 break;
1569 }
1570 }
1571 }
1572
1573 bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
1574 {
1575 Instruction insn(inst);
1576
1577 if (insn.getOpcode() == TGSI_OPCODE_BARRIER)
1578 info->numBarriers = 1;
1579
1580 if (insn.getOpcode() == TGSI_OPCODE_FBFETCH)
1581 info->prop.fp.readsFramebuffer = true;
1582
1583 if (insn.dstCount()) {
1584 Instruction::DstRegister dst = insn.getDst(0);
1585
1586 if (dst.getFile() == TGSI_FILE_OUTPUT) {
1587 if (dst.isIndirect(0))
1588 for (unsigned i = 0; i < info->numOutputs; ++i)
1589 info->out[i].mask = 0xf;
1590 else
1591 info->out[dst.getIndex(0)].mask |= dst.getMask();
1592
1593 if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE ||
1594 info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PRIMID ||
1595 info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_LAYER ||
1596 info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_VIEWPORT_INDEX ||
1597 info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_FOG)
1598 info->out[dst.getIndex(0)].mask &= 1;
1599
1600 if (isEdgeFlagPassthrough(insn))
1601 info->io.edgeFlagIn = insn.getSrc(0).getIndex(0);
1602 } else
1603 if (dst.getFile() == TGSI_FILE_TEMPORARY) {
1604 if (dst.isIndirect(0))
1605 indirectTempArrays.insert(dst.getArrayId());
1606 } else
1607 if (dst.getFile() == TGSI_FILE_BUFFER ||
1608 dst.getFile() == TGSI_FILE_IMAGE ||
1609 (dst.getFile() == TGSI_FILE_MEMORY &&
1610 memoryFiles[dst.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) {
1611 info->io.globalAccess |= 0x2;
1612 }
1613 }
1614
1615 for (unsigned s = 0; s < insn.srcCount(); ++s)
1616 scanInstructionSrc(insn, insn.getSrc(s), insn.srcMask(s));
1617
1618 for (unsigned s = 0; s < insn.getNumTexOffsets(); ++s)
1619 scanInstructionSrc(insn, insn.getTexOffset(s), insn.texOffsetMask());
1620
1621 return true;
1622 }
1623
1624 nv50_ir::TexInstruction::Target
1625 Instruction::getTexture(const tgsi::Source *code, int s) const
1626 {
1627 // XXX: indirect access
1628 unsigned int r;
1629
1630 switch (getSrc(s).getFile()) {
1631 /*
1632 case TGSI_FILE_RESOURCE:
1633 r = getSrc(s).getIndex(0);
1634 return translateTexture(code->resources.at(r).target);
1635 */
1636 case TGSI_FILE_SAMPLER_VIEW:
1637 r = getSrc(s).getIndex(0);
1638 return translateTexture(code->textureViews.at(r).target);
1639 default:
1640 return translateTexture(insn->Texture.Texture);
1641 }
1642 }
1643
1644 } // namespace tgsi
1645
1646 namespace {
1647
1648 using namespace nv50_ir;
1649
1650 class Converter : public BuildUtil
1651 {
1652 public:
1653 Converter(Program *, const tgsi::Source *);
1654 ~Converter();
1655
1656 bool run();
1657
1658 private:
1659 struct Subroutine
1660 {
1661 Subroutine(Function *f) : f(f) { }
1662 Function *f;
1663 ValueMap values;
1664 };
1665
1666 Value *shiftAddress(Value *);
1667 Value *getVertexBase(int s);
1668 Value *getOutputBase(int s);
1669 DataArray *getArrayForFile(unsigned file, int idx);
1670 Value *fetchSrc(int s, int c);
1671 Value *acquireDst(int d, int c);
1672 void storeDst(int d, int c, Value *);
1673
1674 Value *fetchSrc(const tgsi::Instruction::SrcRegister src, int c, Value *ptr);
1675 void storeDst(const tgsi::Instruction::DstRegister dst, int c,
1676 Value *val, Value *ptr);
1677
1678 void adjustTempIndex(int arrayId, int &idx, int &idx2d) const;
1679 Value *applySrcMod(Value *, int s, int c);
1680
1681 Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr);
1682 Symbol *srcToSym(tgsi::Instruction::SrcRegister, int c);
1683 Symbol *dstToSym(tgsi::Instruction::DstRegister, int c);
1684
1685 bool isSubGroupMask(uint8_t semantic);
1686
1687 bool handleInstruction(const struct tgsi_full_instruction *);
1688 void exportOutputs();
1689 inline Subroutine *getSubroutine(unsigned ip);
1690 inline Subroutine *getSubroutine(Function *);
1691 inline bool isEndOfSubroutine(uint ip);
1692
1693 void loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask);
1694
1695 // R,S,L,C,Dx,Dy encode TGSI sources for respective values (0xSf for auto)
1696 void setTexRS(TexInstruction *, unsigned int& s, int R, int S);
1697 void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy);
1698 void handleTXF(Value *dst0[4], int R, int L_M);
1699 void handleTXQ(Value *dst0[4], enum TexQuery, int R);
1700 void handleFBFETCH(Value *dst0[4]);
1701 void handleLIT(Value *dst0[4]);
1702 void handleUserClipPlanes();
1703
1704 // Symbol *getResourceBase(int r);
1705 void getImageCoords(std::vector<Value *>&, int r, int s);
1706
1707 void handleLOAD(Value *dst0[4]);
1708 void handleSTORE();
1709 void handleATOM(Value *dst0[4], DataType, uint16_t subOp);
1710
1711 void handleINTERP(Value *dst0[4]);
1712
1713 uint8_t translateInterpMode(const struct nv50_ir_varying *var,
1714 operation& op);
1715 Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr);
1716
1717 void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
1718
1719 Value *buildDot(int dim);
1720
1721 class BindArgumentsPass : public Pass {
1722 public:
1723 BindArgumentsPass(Converter &conv) : conv(conv) { }
1724
1725 private:
1726 Converter &conv;
1727 Subroutine *sub;
1728
1729 inline const Location *getValueLocation(Subroutine *, Value *);
1730
1731 template<typename T> inline void
1732 updateCallArgs(Instruction *i, void (Instruction::*setArg)(int, Value *),
1733 T (Function::*proto));
1734
1735 template<typename T> inline void
1736 updatePrototype(BitSet *set, void (Function::*updateSet)(),
1737 T (Function::*proto));
1738
1739 protected:
1740 bool visit(Function *);
1741 bool visit(BasicBlock *bb) { return false; }
1742 };
1743
1744 private:
1745 const tgsi::Source *code;
1746 const struct nv50_ir_prog_info *info;
1747
1748 struct {
1749 std::map<unsigned, Subroutine> map;
1750 Subroutine *cur;
1751 } sub;
1752
1753 uint ip; // instruction pointer
1754
1755 tgsi::Instruction tgsi;
1756
1757 DataType dstTy;
1758 DataType srcTy;
1759
1760 DataArray tData; // TGSI_FILE_TEMPORARY
1761 DataArray lData; // TGSI_FILE_TEMPORARY, for indirect arrays
1762 DataArray aData; // TGSI_FILE_ADDRESS
1763 DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers)
1764
1765 Value *zero;
1766 Value *fragCoord[4];
1767 Value *clipVtx[4];
1768
1769 Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP)
1770 uint8_t vtxBaseValid;
1771
1772 Value *outBase; // base address of vertex out patch (for TCP)
1773
1774 Stack condBBs; // fork BB, then else clause BB
1775 Stack joinBBs; // fork BB, for inserting join ops on ENDIF
1776 Stack loopBBs; // loop headers
1777 Stack breakBBs; // end of / after loop
1778
1779 Value *viewport;
1780 };
1781
1782 Symbol *
1783 Converter::srcToSym(tgsi::Instruction::SrcRegister src, int c)
1784 {
1785 const int swz = src.getSwizzle(c);
1786
1787 /* TODO: Use Array ID when it's available for the index */
1788 return makeSym(src.getFile(),
1789 src.is2D() ? src.getIndex(1) : 0,
1790 src.getIndex(0), swz,
1791 src.getIndex(0) * 16 + swz * 4);
1792 }
1793
1794 Symbol *
1795 Converter::dstToSym(tgsi::Instruction::DstRegister dst, int c)
1796 {
1797 /* TODO: Use Array ID when it's available for the index */
1798 return makeSym(dst.getFile(),
1799 dst.is2D() ? dst.getIndex(1) : 0,
1800 dst.getIndex(0), c,
1801 dst.getIndex(0) * 16 + c * 4);
1802 }
1803
1804 Symbol *
1805 Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address)
1806 {
1807 Symbol *sym = new_Symbol(prog, tgsi::translateFile(tgsiFile));
1808
1809 sym->reg.fileIndex = fileIdx;
1810
1811 if (tgsiFile == TGSI_FILE_MEMORY) {
1812 switch (code->memoryFiles[fileIdx].mem_type) {
1813 case TGSI_MEMORY_TYPE_GLOBAL:
1814 /* No-op this is the default for TGSI_FILE_MEMORY */
1815 sym->setFile(FILE_MEMORY_GLOBAL);
1816 break;
1817 case TGSI_MEMORY_TYPE_SHARED:
1818 sym->setFile(FILE_MEMORY_SHARED);
1819 break;
1820 case TGSI_MEMORY_TYPE_INPUT:
1821 assert(prog->getType() == Program::TYPE_COMPUTE);
1822 assert(idx == -1);
1823 sym->setFile(FILE_SHADER_INPUT);
1824 address += info->prop.cp.inputOffset;
1825 break;
1826 default:
1827 assert(0); /* TODO: Add support for global and private memory */
1828 }
1829 }
1830
1831 if (idx >= 0) {
1832 if (sym->reg.file == FILE_SHADER_INPUT)
1833 sym->setOffset(info->in[idx].slot[c] * 4);
1834 else
1835 if (sym->reg.file == FILE_SHADER_OUTPUT)
1836 sym->setOffset(info->out[idx].slot[c] * 4);
1837 else
1838 if (sym->reg.file == FILE_SYSTEM_VALUE)
1839 sym->setSV(tgsi::translateSysVal(info->sv[idx].sn), c);
1840 else
1841 sym->setOffset(address);
1842 } else {
1843 sym->setOffset(address);
1844 }
1845 return sym;
1846 }
1847
1848 uint8_t
1849 Converter::translateInterpMode(const struct nv50_ir_varying *var, operation& op)
1850 {
1851 uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
1852
1853 if (var->flat)
1854 mode = NV50_IR_INTERP_FLAT;
1855 else
1856 if (var->linear)
1857 mode = NV50_IR_INTERP_LINEAR;
1858 else
1859 if (var->sc)
1860 mode = NV50_IR_INTERP_SC;
1861
1862 op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC)
1863 ? OP_PINTERP : OP_LINTERP;
1864
1865 if (var->centroid)
1866 mode |= NV50_IR_INTERP_CENTROID;
1867
1868 return mode;
1869 }
1870
1871 Value *
1872 Converter::interpolate(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
1873 {
1874 operation op;
1875
1876 // XXX: no way to know interpolation mode if we don't know what's accessed
1877 const uint8_t mode = translateInterpMode(&info->in[ptr ? 0 :
1878 src.getIndex(0)], op);
1879
1880 Instruction *insn = new_Instruction(func, op, TYPE_F32);
1881
1882 insn->setDef(0, getScratch());
1883 insn->setSrc(0, srcToSym(src, c));
1884 if (op == OP_PINTERP)
1885 insn->setSrc(1, fragCoord[3]);
1886 if (ptr)
1887 insn->setIndirect(0, 0, ptr);
1888
1889 insn->setInterpolate(mode);
1890
1891 bb->insertTail(insn);
1892 return insn->getDef(0);
1893 }
1894
1895 Value *
1896 Converter::applySrcMod(Value *val, int s, int c)
1897 {
1898 Modifier m = tgsi.getSrc(s).getMod(c);
1899 DataType ty = tgsi.inferSrcType();
1900
1901 if (m & Modifier(NV50_IR_MOD_ABS))
1902 val = mkOp1v(OP_ABS, ty, getScratch(), val);
1903
1904 if (m & Modifier(NV50_IR_MOD_NEG))
1905 val = mkOp1v(OP_NEG, ty, getScratch(), val);
1906
1907 return val;
1908 }
1909
1910 Value *
1911 Converter::getVertexBase(int s)
1912 {
1913 assert(s < 5);
1914 if (!(vtxBaseValid & (1 << s))) {
1915 const int index = tgsi.getSrc(s).getIndex(1);
1916 Value *rel = NULL;
1917 if (tgsi.getSrc(s).isIndirect(1))
1918 rel = fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL);
1919 vtxBaseValid |= 1 << s;
1920 vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
1921 mkImm(index), rel);
1922 }
1923 return vtxBase[s];
1924 }
1925
1926 Value *
1927 Converter::getOutputBase(int s)
1928 {
1929 assert(s < 5);
1930 if (!(vtxBaseValid & (1 << s))) {
1931 Value *offset = loadImm(NULL, tgsi.getSrc(s).getIndex(1));
1932 if (tgsi.getSrc(s).isIndirect(1))
1933 offset = mkOp2v(OP_ADD, TYPE_U32, getSSA(),
1934 fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL),
1935 offset);
1936 vtxBaseValid |= 1 << s;
1937 vtxBase[s] = mkOp2v(OP_ADD, TYPE_U32, getSSA(), outBase, offset);
1938 }
1939 return vtxBase[s];
1940 }
1941
1942 Value *
1943 Converter::fetchSrc(int s, int c)
1944 {
1945 Value *res;
1946 Value *ptr = NULL, *dimRel = NULL;
1947
1948 tgsi::Instruction::SrcRegister src = tgsi.getSrc(s);
1949
1950 if (src.isIndirect(0))
1951 ptr = fetchSrc(src.getIndirect(0), 0, NULL);
1952
1953 if (src.is2D()) {
1954 switch (src.getFile()) {
1955 case TGSI_FILE_OUTPUT:
1956 dimRel = getOutputBase(s);
1957 break;
1958 case TGSI_FILE_INPUT:
1959 dimRel = getVertexBase(s);
1960 break;
1961 case TGSI_FILE_CONSTANT:
1962 // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k]
1963 if (src.isIndirect(1))
1964 dimRel = fetchSrc(src.getIndirect(1), 0, 0);
1965 break;
1966 default:
1967 break;
1968 }
1969 }
1970
1971 res = fetchSrc(src, c, ptr);
1972
1973 if (dimRel)
1974 res->getInsn()->setIndirect(0, 1, dimRel);
1975
1976 return applySrcMod(res, s, c);
1977 }
1978
1979 Converter::DataArray *
1980 Converter::getArrayForFile(unsigned file, int idx)
1981 {
1982 switch (file) {
1983 case TGSI_FILE_TEMPORARY:
1984 return idx == 0 ? &tData : &lData;
1985 case TGSI_FILE_ADDRESS:
1986 return &aData;
1987 case TGSI_FILE_OUTPUT:
1988 assert(prog->getType() == Program::TYPE_FRAGMENT);
1989 return &oData;
1990 default:
1991 assert(!"invalid/unhandled TGSI source file");
1992 return NULL;
1993 }
1994 }
1995
1996 Value *
1997 Converter::shiftAddress(Value *index)
1998 {
1999 if (!index)
2000 return NULL;
2001 return mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), index, mkImm(4));
2002 }
2003
2004 void
2005 Converter::adjustTempIndex(int arrayId, int &idx, int &idx2d) const
2006 {
2007 std::map<int, int>::const_iterator it =
2008 code->indirectTempOffsets.find(arrayId);
2009 if (it == code->indirectTempOffsets.end())
2010 return;
2011
2012 idx2d = 1;
2013 idx += it->second;
2014 }
2015
2016 bool
2017 Converter::isSubGroupMask(uint8_t semantic)
2018 {
2019 switch (semantic) {
2020 case TGSI_SEMANTIC_SUBGROUP_EQ_MASK:
2021 case TGSI_SEMANTIC_SUBGROUP_LT_MASK:
2022 case TGSI_SEMANTIC_SUBGROUP_LE_MASK:
2023 case TGSI_SEMANTIC_SUBGROUP_GT_MASK:
2024 case TGSI_SEMANTIC_SUBGROUP_GE_MASK:
2025 return true;
2026 default:
2027 return false;
2028 }
2029 }
2030
2031 Value *
2032 Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
2033 {
2034 int idx2d = src.is2D() ? src.getIndex(1) : 0;
2035 int idx = src.getIndex(0);
2036 const int swz = src.getSwizzle(c);
2037 Instruction *ld;
2038
2039 switch (src.getFile()) {
2040 case TGSI_FILE_IMMEDIATE:
2041 assert(!ptr);
2042 return loadImm(NULL, info->immd.data[idx * 4 + swz]);
2043 case TGSI_FILE_CONSTANT:
2044 return mkLoadv(TYPE_U32, srcToSym(src, c), shiftAddress(ptr));
2045 case TGSI_FILE_INPUT:
2046 if (prog->getType() == Program::TYPE_FRAGMENT) {
2047 // don't load masked inputs, won't be assigned a slot
2048 if (!ptr && !(info->in[idx].mask & (1 << swz)))
2049 return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f);
2050 return interpolate(src, c, shiftAddress(ptr));
2051 } else
2052 if (prog->getType() == Program::TYPE_GEOMETRY) {
2053 if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_PRIMID)
2054 return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_PRIMITIVE_ID, 0));
2055 // XXX: This is going to be a problem with scalar arrays, i.e. when
2056 // we cannot assume that the address is given in units of vec4.
2057 //
2058 // nv50 and nvc0 need different things here, so let the lowering
2059 // passes decide what to do with the address
2060 if (ptr)
2061 return mkLoadv(TYPE_U32, srcToSym(src, c), ptr);
2062 }
2063 ld = mkLoad(TYPE_U32, getSSA(), srcToSym(src, c), shiftAddress(ptr));
2064 ld->perPatch = info->in[idx].patch;
2065 return ld->getDef(0);
2066 case TGSI_FILE_OUTPUT:
2067 assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL);
2068 ld = mkLoad(TYPE_U32, getSSA(), srcToSym(src, c), shiftAddress(ptr));
2069 ld->perPatch = info->out[idx].patch;
2070 return ld->getDef(0);
2071 case TGSI_FILE_SYSTEM_VALUE:
2072 assert(!ptr);
2073 if (info->sv[idx].sn == TGSI_SEMANTIC_THREAD_ID &&
2074 info->prop.cp.numThreads[swz] == 1)
2075 return loadImm(NULL, 0u);
2076 if (isSubGroupMask(info->sv[idx].sn) && swz > 0)
2077 return loadImm(NULL, 0u);
2078 if (info->sv[idx].sn == TGSI_SEMANTIC_SUBGROUP_SIZE)
2079 return loadImm(NULL, 32u);
2080 ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
2081 ld->perPatch = info->sv[idx].patch;
2082 return ld->getDef(0);
2083 case TGSI_FILE_TEMPORARY: {
2084 int arrayid = src.getArrayId();
2085 if (!arrayid)
2086 arrayid = code->tempArrayId[idx];
2087 adjustTempIndex(arrayid, idx, idx2d);
2088 }
2089 /* fallthrough */
2090 default:
2091 return getArrayForFile(src.getFile(), idx2d)->load(
2092 sub.cur->values, idx, swz, shiftAddress(ptr));
2093 }
2094 }
2095
2096 Value *
2097 Converter::acquireDst(int d, int c)
2098 {
2099 const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
2100 const unsigned f = dst.getFile();
2101 int idx = dst.getIndex(0);
2102 int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
2103
2104 if (dst.isMasked(c) || f == TGSI_FILE_BUFFER || f == TGSI_FILE_MEMORY ||
2105 f == TGSI_FILE_IMAGE)
2106 return NULL;
2107
2108 if (dst.isIndirect(0) ||
2109 f == TGSI_FILE_SYSTEM_VALUE ||
2110 (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT))
2111 return getScratch();
2112
2113 if (f == TGSI_FILE_TEMPORARY) {
2114 int arrayid = dst.getArrayId();
2115 if (!arrayid)
2116 arrayid = code->tempArrayId[idx];
2117 adjustTempIndex(arrayid, idx, idx2d);
2118 }
2119
2120 return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c);
2121 }
2122
2123 void
2124 Converter::storeDst(int d, int c, Value *val)
2125 {
2126 const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
2127
2128 if (tgsi.getSaturate()) {
2129 mkOp1(OP_SAT, dstTy, val, val);
2130 }
2131
2132 Value *ptr = NULL;
2133 if (dst.isIndirect(0))
2134 ptr = shiftAddress(fetchSrc(dst.getIndirect(0), 0, NULL));
2135
2136 if (info->io.genUserClip > 0 &&
2137 dst.getFile() == TGSI_FILE_OUTPUT &&
2138 !dst.isIndirect(0) && dst.getIndex(0) == code->clipVertexOutput) {
2139 mkMov(clipVtx[c], val);
2140 val = clipVtx[c];
2141 }
2142
2143 storeDst(dst, c, val, ptr);
2144 }
2145
2146 void
2147 Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,
2148 Value *val, Value *ptr)
2149 {
2150 const unsigned f = dst.getFile();
2151 int idx = dst.getIndex(0);
2152 int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
2153
2154 if (f == TGSI_FILE_SYSTEM_VALUE) {
2155 assert(!ptr);
2156 mkOp2(OP_WRSV, TYPE_U32, NULL, dstToSym(dst, c), val);
2157 } else
2158 if (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT) {
2159
2160 if (ptr || (info->out[idx].mask & (1 << c))) {
2161 /* Save the viewport index into a scratch register so that it can be
2162 exported at EMIT time */
2163 if (info->out[idx].sn == TGSI_SEMANTIC_VIEWPORT_INDEX &&
2164 prog->getType() == Program::TYPE_GEOMETRY &&
2165 viewport != NULL)
2166 mkOp1(OP_MOV, TYPE_U32, viewport, val);
2167 else
2168 mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val)->perPatch =
2169 info->out[idx].patch;
2170 }
2171 } else
2172 if (f == TGSI_FILE_TEMPORARY ||
2173 f == TGSI_FILE_ADDRESS ||
2174 f == TGSI_FILE_OUTPUT) {
2175 if (f == TGSI_FILE_TEMPORARY) {
2176 int arrayid = dst.getArrayId();
2177 if (!arrayid)
2178 arrayid = code->tempArrayId[idx];
2179 adjustTempIndex(arrayid, idx, idx2d);
2180 }
2181
2182 getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val);
2183 } else {
2184 assert(!"invalid dst file");
2185 }
2186 }
2187
2188 #define FOR_EACH_DST_ENABLED_CHANNEL(d, chan, inst) \
2189 for (chan = 0; chan < 4; ++chan) \
2190 if (!inst.getDst(d).isMasked(chan))
2191
2192 Value *
2193 Converter::buildDot(int dim)
2194 {
2195 assert(dim > 0);
2196
2197 Value *src0 = fetchSrc(0, 0), *src1 = fetchSrc(1, 0);
2198 Value *dotp = getScratch();
2199
2200 mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1)
2201 ->dnz = info->io.mul_zero_wins;
2202
2203 for (int c = 1; c < dim; ++c) {
2204 src0 = fetchSrc(0, c);
2205 src1 = fetchSrc(1, c);
2206 mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp)
2207 ->dnz = info->io.mul_zero_wins;
2208 }
2209 return dotp;
2210 }
2211
2212 void
2213 Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork)
2214 {
2215 FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL);
2216 join->fixed = 1;
2217 conv->insertHead(join);
2218
2219 assert(!fork->joinAt);
2220 fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv);
2221 fork->insertBefore(fork->getExit(), fork->joinAt);
2222 }
2223
2224 void
2225 Converter::setTexRS(TexInstruction *tex, unsigned int& s, int R, int S)
2226 {
2227 unsigned rIdx = 0, sIdx = 0;
2228
2229 if (R >= 0)
2230 rIdx = tgsi.getSrc(R).getIndex(0);
2231 if (S >= 0)
2232 sIdx = tgsi.getSrc(S).getIndex(0);
2233
2234 tex->setTexture(tgsi.getTexture(code, R), rIdx, sIdx);
2235
2236 if (tgsi.getSrc(R).isIndirect(0)) {
2237 tex->tex.rIndirectSrc = s;
2238 tex->setSrc(s++, fetchSrc(tgsi.getSrc(R).getIndirect(0), 0, NULL));
2239 }
2240 if (S >= 0 && tgsi.getSrc(S).isIndirect(0)) {
2241 tex->tex.sIndirectSrc = s;
2242 tex->setSrc(s++, fetchSrc(tgsi.getSrc(S).getIndirect(0), 0, NULL));
2243 }
2244 }
2245
2246 void
2247 Converter::handleTXQ(Value *dst0[4], enum TexQuery query, int R)
2248 {
2249 TexInstruction *tex = new_TexInstruction(func, OP_TXQ);
2250 tex->tex.query = query;
2251 unsigned int c, d;
2252
2253 for (d = 0, c = 0; c < 4; ++c) {
2254 if (!dst0[c])
2255 continue;
2256 tex->tex.mask |= 1 << c;
2257 tex->setDef(d++, dst0[c]);
2258 }
2259 if (query == TXQ_DIMS)
2260 tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level
2261 else
2262 tex->setSrc((c = 0), zero);
2263
2264 setTexRS(tex, ++c, R, -1);
2265
2266 bb->insertTail(tex);
2267 }
2268
2269 void
2270 Converter::loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask)
2271 {
2272 Value *proj = fetchSrc(0, 3);
2273 Instruction *insn = proj->getUniqueInsn();
2274 int c;
2275
2276 if (insn->op == OP_PINTERP) {
2277 bb->insertTail(insn = cloneForward(func, insn));
2278 insn->op = OP_LINTERP;
2279 insn->setInterpolate(NV50_IR_INTERP_LINEAR | insn->getSampleMode());
2280 insn->setSrc(1, NULL);
2281 proj = insn->getDef(0);
2282 }
2283 proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), proj);
2284
2285 for (c = 0; c < 4; ++c) {
2286 if (!(mask & (1 << c)))
2287 continue;
2288 if ((insn = src[c]->getUniqueInsn())->op != OP_PINTERP)
2289 continue;
2290 mask &= ~(1 << c);
2291
2292 bb->insertTail(insn = cloneForward(func, insn));
2293 insn->setInterpolate(NV50_IR_INTERP_PERSPECTIVE | insn->getSampleMode());
2294 insn->setSrc(1, proj);
2295 dst[c] = insn->getDef(0);
2296 }
2297 if (!mask)
2298 return;
2299
2300 proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), fetchSrc(0, 3));
2301
2302 for (c = 0; c < 4; ++c)
2303 if (mask & (1 << c))
2304 dst[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), src[c], proj);
2305 }
2306
2307 // order of nv50 ir sources: x y z layer lod/bias shadow
2308 // order of TGSI TEX sources: x y z layer shadow lod/bias
2309 // lowering will finally set the hw specific order (like array first on nvc0)
2310 void
2311 Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy)
2312 {
2313 Value *arg[4], *src[8];
2314 Value *lod = NULL, *shd = NULL;
2315 unsigned int s, c, d;
2316 TexInstruction *texi = new_TexInstruction(func, tgsi.getOP());
2317
2318 TexInstruction::Target tgt = tgsi.getTexture(code, R);
2319
2320 for (s = 0; s < tgt.getArgCount(); ++s)
2321 arg[s] = src[s] = fetchSrc(0, s);
2322
2323 if (tgsi.getOpcode() == TGSI_OPCODE_TEX_LZ)
2324 lod = loadImm(NULL, 0);
2325 else if (texi->op == OP_TXL || texi->op == OP_TXB)
2326 lod = fetchSrc(L >> 4, L & 3);
2327
2328 if (C == 0x0f)
2329 C = 0x00 | MAX2(tgt.getArgCount(), 2); // guess DC src
2330
2331 if (tgsi.getOpcode() == TGSI_OPCODE_TG4 &&
2332 tgt == TEX_TARGET_CUBE_ARRAY_SHADOW)
2333 shd = fetchSrc(1, 0);
2334 else if (tgt.isShadow())
2335 shd = fetchSrc(C >> 4, C & 3);
2336
2337 if (texi->op == OP_TXD) {
2338 for (c = 0; c < tgt.getDim() + tgt.isCube(); ++c) {
2339 texi->dPdx[c].set(fetchSrc(Dx >> 4, (Dx & 3) + c));
2340 texi->dPdy[c].set(fetchSrc(Dy >> 4, (Dy & 3) + c));
2341 }
2342 }
2343
2344 // cube textures don't care about projection value, it's divided out
2345 if (tgsi.getOpcode() == TGSI_OPCODE_TXP && !tgt.isCube() && !tgt.isArray()) {
2346 unsigned int n = tgt.getDim();
2347 if (shd) {
2348 arg[n] = shd;
2349 ++n;
2350 assert(tgt.getDim() == tgt.getArgCount());
2351 }
2352 loadProjTexCoords(src, arg, (1 << n) - 1);
2353 if (shd)
2354 shd = src[n - 1];
2355 }
2356
2357 for (c = 0, d = 0; c < 4; ++c) {
2358 if (dst[c]) {
2359 texi->setDef(d++, dst[c]);
2360 texi->tex.mask |= 1 << c;
2361 } else {
2362 // NOTE: maybe hook up def too, for CSE
2363 }
2364 }
2365 for (s = 0; s < tgt.getArgCount(); ++s)
2366 texi->setSrc(s, src[s]);
2367 if (lod)
2368 texi->setSrc(s++, lod);
2369 if (shd)
2370 texi->setSrc(s++, shd);
2371
2372 setTexRS(texi, s, R, S);
2373
2374 if (tgsi.getOpcode() == TGSI_OPCODE_SAMPLE_C_LZ)
2375 texi->tex.levelZero = true;
2376 if (prog->getType() != Program::TYPE_FRAGMENT &&
2377 (tgsi.getOpcode() == TGSI_OPCODE_TEX ||
2378 tgsi.getOpcode() == TGSI_OPCODE_TEX2 ||
2379 tgsi.getOpcode() == TGSI_OPCODE_TXP))
2380 texi->tex.levelZero = true;
2381 if (tgsi.getOpcode() == TGSI_OPCODE_TG4 && !tgt.isShadow())
2382 texi->tex.gatherComp = tgsi.getSrc(1).getValueU32(0, info);
2383
2384 texi->tex.useOffsets = tgsi.getNumTexOffsets();
2385 for (s = 0; s < tgsi.getNumTexOffsets(); ++s) {
2386 for (c = 0; c < 3; ++c) {
2387 texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL));
2388 texi->offset[s][c].setInsn(texi);
2389 }
2390 }
2391
2392 bb->insertTail(texi);
2393 }
2394
2395 // 1st source: xyz = coordinates, w = lod/sample
2396 // 2nd source: offset
2397 void
2398 Converter::handleTXF(Value *dst[4], int R, int L_M)
2399 {
2400 TexInstruction *texi = new_TexInstruction(func, tgsi.getOP());
2401 int ms;
2402 unsigned int c, d, s;
2403
2404 texi->tex.target = tgsi.getTexture(code, R);
2405
2406 ms = texi->tex.target.isMS() ? 1 : 0;
2407 texi->tex.levelZero = ms; /* MS textures don't have mip-maps */
2408
2409 for (c = 0, d = 0; c < 4; ++c) {
2410 if (dst[c]) {
2411 texi->setDef(d++, dst[c]);
2412 texi->tex.mask |= 1 << c;
2413 }
2414 }
2415 for (c = 0; c < (texi->tex.target.getArgCount() - ms); ++c)
2416 texi->setSrc(c, fetchSrc(0, c));
2417 if (!ms && tgsi.getOpcode() == TGSI_OPCODE_TXF_LZ)
2418 texi->setSrc(c++, loadImm(NULL, 0));
2419 else
2420 texi->setSrc(c++, fetchSrc(L_M >> 4, L_M & 3)); // lod or ms
2421
2422 setTexRS(texi, c, R, -1);
2423
2424 texi->tex.useOffsets = tgsi.getNumTexOffsets();
2425 for (s = 0; s < tgsi.getNumTexOffsets(); ++s) {
2426 for (c = 0; c < 3; ++c) {
2427 texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL));
2428 texi->offset[s][c].setInsn(texi);
2429 }
2430 }
2431
2432 bb->insertTail(texi);
2433 }
2434
2435 void
2436 Converter::handleFBFETCH(Value *dst[4])
2437 {
2438 TexInstruction *texi = new_TexInstruction(func, OP_TXF);
2439 unsigned int c, d;
2440
2441 texi->tex.target = TEX_TARGET_2D_MS_ARRAY;
2442 texi->tex.levelZero = 1;
2443 texi->tex.useOffsets = 0;
2444
2445 for (c = 0, d = 0; c < 4; ++c) {
2446 if (dst[c]) {
2447 texi->setDef(d++, dst[c]);
2448 texi->tex.mask |= 1 << c;
2449 }
2450 }
2451
2452 Value *x = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 0));
2453 Value *y = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 1));
2454 Value *z = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_LAYER, 0));
2455 Value *ms = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_SAMPLE_INDEX, 0));
2456
2457 mkCvt(OP_CVT, TYPE_U32, x, TYPE_F32, x)->rnd = ROUND_Z;
2458 mkCvt(OP_CVT, TYPE_U32, y, TYPE_F32, y)->rnd = ROUND_Z;
2459 texi->setSrc(0, x);
2460 texi->setSrc(1, y);
2461 texi->setSrc(2, z);
2462 texi->setSrc(3, ms);
2463
2464 texi->tex.r = texi->tex.s = -1;
2465
2466 bb->insertTail(texi);
2467 }
2468
2469 void
2470 Converter::handleLIT(Value *dst0[4])
2471 {
2472 Value *val0 = NULL;
2473 unsigned int mask = tgsi.getDst(0).getMask();
2474
2475 if (mask & (1 << 0))
2476 loadImm(dst0[0], 1.0f);
2477
2478 if (mask & (1 << 3))
2479 loadImm(dst0[3], 1.0f);
2480
2481 if (mask & (3 << 1)) {
2482 val0 = getScratch();
2483 mkOp2(OP_MAX, TYPE_F32, val0, fetchSrc(0, 0), zero);
2484 if (mask & (1 << 1))
2485 mkMov(dst0[1], val0);
2486 }
2487
2488 if (mask & (1 << 2)) {
2489 Value *src1 = fetchSrc(0, 1), *src3 = fetchSrc(0, 3);
2490 Value *val1 = getScratch(), *val3 = getScratch();
2491
2492 Value *pos128 = loadImm(NULL, +127.999999f);
2493 Value *neg128 = loadImm(NULL, -127.999999f);
2494
2495 mkOp2(OP_MAX, TYPE_F32, val1, src1, zero);
2496 mkOp2(OP_MAX, TYPE_F32, val3, src3, neg128);
2497 mkOp2(OP_MIN, TYPE_F32, val3, val3, pos128);
2498 mkOp2(OP_POW, TYPE_F32, val3, val1, val3);
2499
2500 mkCmp(OP_SLCT, CC_GT, TYPE_F32, dst0[2], TYPE_F32, val3, zero, val0);
2501 }
2502 }
2503
2504 /* Keep this around for now as reference when adding img support
2505 static inline bool
2506 isResourceSpecial(const int r)
2507 {
2508 return (r == TGSI_RESOURCE_GLOBAL ||
2509 r == TGSI_RESOURCE_LOCAL ||
2510 r == TGSI_RESOURCE_PRIVATE ||
2511 r == TGSI_RESOURCE_INPUT);
2512 }
2513
2514 static inline bool
2515 isResourceRaw(const tgsi::Source *code, const int r)
2516 {
2517 return isResourceSpecial(r) || code->resources[r].raw;
2518 }
2519
2520 static inline nv50_ir::TexTarget
2521 getResourceTarget(const tgsi::Source *code, int r)
2522 {
2523 if (isResourceSpecial(r))
2524 return nv50_ir::TEX_TARGET_BUFFER;
2525 return tgsi::translateTexture(code->resources.at(r).target);
2526 }
2527
2528 Symbol *
2529 Converter::getResourceBase(const int r)
2530 {
2531 Symbol *sym = NULL;
2532
2533 switch (r) {
2534 case TGSI_RESOURCE_GLOBAL:
2535 sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL,
2536 info->io.auxCBSlot);
2537 break;
2538 case TGSI_RESOURCE_LOCAL:
2539 assert(prog->getType() == Program::TYPE_COMPUTE);
2540 sym = mkSymbol(nv50_ir::FILE_MEMORY_SHARED, 0, TYPE_U32,
2541 info->prop.cp.sharedOffset);
2542 break;
2543 case TGSI_RESOURCE_PRIVATE:
2544 sym = mkSymbol(nv50_ir::FILE_MEMORY_LOCAL, 0, TYPE_U32,
2545 info->bin.tlsSpace);
2546 break;
2547 case TGSI_RESOURCE_INPUT:
2548 assert(prog->getType() == Program::TYPE_COMPUTE);
2549 sym = mkSymbol(nv50_ir::FILE_SHADER_INPUT, 0, TYPE_U32,
2550 info->prop.cp.inputOffset);
2551 break;
2552 default:
2553 sym = new_Symbol(prog,
2554 nv50_ir::FILE_MEMORY_GLOBAL, code->resources.at(r).slot);
2555 break;
2556 }
2557 return sym;
2558 }
2559
2560 void
2561 Converter::getResourceCoords(std::vector<Value *> &coords, int r, int s)
2562 {
2563 const int arg =
2564 TexInstruction::Target(getResourceTarget(code, r)).getArgCount();
2565
2566 for (int c = 0; c < arg; ++c)
2567 coords.push_back(fetchSrc(s, c));
2568
2569 // NOTE: TGSI_RESOURCE_GLOBAL needs FILE_GPR; this is an nv50 quirk
2570 if (r == TGSI_RESOURCE_LOCAL ||
2571 r == TGSI_RESOURCE_PRIVATE ||
2572 r == TGSI_RESOURCE_INPUT)
2573 coords[0] = mkOp1v(OP_MOV, TYPE_U32, getScratch(4, FILE_ADDRESS),
2574 coords[0]);
2575 }
2576 */
2577 static inline int
2578 partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask)
2579 {
2580 int n = 0;
2581
2582 while (mask) {
2583 if (mask & 1) {
2584 size[n]++;
2585 } else {
2586 if (size[n])
2587 comp[n = 1] = size[0] + 1;
2588 else
2589 comp[n]++;
2590 }
2591 mask >>= 1;
2592 }
2593 if (size[0] == 3) {
2594 n = 1;
2595 size[0] = (comp[0] == 1) ? 1 : 2;
2596 size[1] = 3 - size[0];
2597 comp[1] = comp[0] + size[0];
2598 }
2599 return n + 1;
2600 }
2601
2602 static inline nv50_ir::TexTarget
2603 getImageTarget(const tgsi::Source *code, int r)
2604 {
2605 return tgsi::translateTexture(code->images.at(r).target);
2606 }
2607
2608 static inline const nv50_ir::TexInstruction::ImgFormatDesc *
2609 getImageFormat(const tgsi::Source *code, int r)
2610 {
2611 return &nv50_ir::TexInstruction::formatTable[
2612 tgsi::translateImgFormat(code->images.at(r).format)];
2613 }
2614
2615 void
2616 Converter::getImageCoords(std::vector<Value *> &coords, int r, int s)
2617 {
2618 TexInstruction::Target t =
2619 TexInstruction::Target(getImageTarget(code, r));
2620 const int arg = t.getDim() + (t.isArray() || t.isCube());
2621
2622 for (int c = 0; c < arg; ++c)
2623 coords.push_back(fetchSrc(s, c));
2624
2625 if (t.isMS())
2626 coords.push_back(fetchSrc(s, 3));
2627 }
2628
2629 // For raw loads, granularity is 4 byte.
2630 // Usage of the texture read mask on OP_SULDP is not allowed.
2631 void
2632 Converter::handleLOAD(Value *dst0[4])
2633 {
2634 const int r = tgsi.getSrc(0).getIndex(0);
2635 int c;
2636 std::vector<Value *> off, src, ldv, def;
2637 Value *ind = NULL;
2638
2639 if (tgsi.getSrc(0).isIndirect(0))
2640 ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
2641
2642 switch (tgsi.getSrc(0).getFile()) {
2643 case TGSI_FILE_BUFFER:
2644 case TGSI_FILE_MEMORY:
2645 for (c = 0; c < 4; ++c) {
2646 if (!dst0[c])
2647 continue;
2648
2649 Value *off;
2650 Symbol *sym;
2651 uint32_t src0_component_offset = tgsi.getSrc(0).getSwizzle(c) * 4;
2652
2653 if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) {
2654 off = NULL;
2655 sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
2656 tgsi.getSrc(1).getValueU32(0, info) +
2657 src0_component_offset);
2658 } else {
2659 // yzw are ignored for buffers
2660 off = fetchSrc(1, 0);
2661 sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
2662 src0_component_offset);
2663 }
2664
2665 Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off);
2666 ld->cache = tgsi.getCacheMode();
2667 if (ind)
2668 ld->setIndirect(0, 1, ind);
2669 }
2670 break;
2671 case TGSI_FILE_IMAGE: {
2672 assert(!code->images[r].raw);
2673
2674 getImageCoords(off, r, 1);
2675 def.resize(4);
2676
2677 for (c = 0; c < 4; ++c) {
2678 if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c))
2679 def[c] = getScratch();
2680 else
2681 def[c] = dst0[c];
2682 }
2683
2684 TexInstruction *ld =
2685 mkTex(OP_SULDP, getImageTarget(code, r), code->images[r].slot, 0,
2686 def, off);
2687 ld->tex.mask = tgsi.getDst(0).getMask();
2688 ld->tex.format = getImageFormat(code, r);
2689 ld->cache = tgsi.getCacheMode();
2690 if (ind)
2691 ld->setIndirectR(ind);
2692
2693 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2694 if (dst0[c] != def[c])
2695 mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
2696 }
2697 break;
2698 default:
2699 assert(!"Unsupported srcFile for LOAD");
2700 }
2701
2702 /* Keep this around for now as reference when adding img support
2703 getResourceCoords(off, r, 1);
2704
2705 if (isResourceRaw(code, r)) {
2706 uint8_t mask = 0;
2707 uint8_t comp[2] = { 0, 0 };
2708 uint8_t size[2] = { 0, 0 };
2709
2710 Symbol *base = getResourceBase(r);
2711
2712 // determine the base and size of the at most 2 load ops
2713 for (c = 0; c < 4; ++c)
2714 if (!tgsi.getDst(0).isMasked(c))
2715 mask |= 1 << (tgsi.getSrc(0).getSwizzle(c) - TGSI_SWIZZLE_X);
2716
2717 int n = partitionLoadStore(comp, size, mask);
2718
2719 src = off;
2720
2721 def.resize(4); // index by component, the ones we need will be non-NULL
2722 for (c = 0; c < 4; ++c) {
2723 if (dst0[c] && tgsi.getSrc(0).getSwizzle(c) == (TGSI_SWIZZLE_X + c))
2724 def[c] = dst0[c];
2725 else
2726 if (mask & (1 << c))
2727 def[c] = getScratch();
2728 }
2729
2730 const bool useLd = isResourceSpecial(r) ||
2731 (info->io.nv50styleSurfaces &&
2732 code->resources[r].target == TGSI_TEXTURE_BUFFER);
2733
2734 for (int i = 0; i < n; ++i) {
2735 ldv.assign(def.begin() + comp[i], def.begin() + comp[i] + size[i]);
2736
2737 if (comp[i]) // adjust x component of source address if necessary
2738 src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),
2739 off[0], mkImm(comp[i] * 4));
2740 else
2741 src[0] = off[0];
2742
2743 if (useLd) {
2744 Instruction *ld =
2745 mkLoad(typeOfSize(size[i] * 4), ldv[0], base, src[0]);
2746 for (size_t c = 1; c < ldv.size(); ++c)
2747 ld->setDef(c, ldv[c]);
2748 } else {
2749 mkTex(OP_SULDB, getResourceTarget(code, r), code->resources[r].slot,
2750 0, ldv, src)->dType = typeOfSize(size[i] * 4);
2751 }
2752 }
2753 } else {
2754 def.resize(4);
2755 for (c = 0; c < 4; ++c) {
2756 if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c))
2757 def[c] = getScratch();
2758 else
2759 def[c] = dst0[c];
2760 }
2761
2762 mkTex(OP_SULDP, getResourceTarget(code, r), code->resources[r].slot, 0,
2763 def, off);
2764 }
2765 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2766 if (dst0[c] != def[c])
2767 mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
2768 */
2769 }
2770
2771 // For formatted stores, the write mask on OP_SUSTP can be used.
2772 // Raw stores have to be split.
2773 void
2774 Converter::handleSTORE()
2775 {
2776 const int r = tgsi.getDst(0).getIndex(0);
2777 int c;
2778 std::vector<Value *> off, src, dummy;
2779 Value *ind = NULL;
2780
2781 if (tgsi.getDst(0).isIndirect(0))
2782 ind = fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0);
2783
2784 switch (tgsi.getDst(0).getFile()) {
2785 case TGSI_FILE_BUFFER:
2786 case TGSI_FILE_MEMORY:
2787 for (c = 0; c < 4; ++c) {
2788 if (!(tgsi.getDst(0).getMask() & (1 << c)))
2789 continue;
2790
2791 Symbol *sym;
2792 Value *off;
2793 if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMMEDIATE) {
2794 off = NULL;
2795 sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c,
2796 tgsi.getSrc(0).getValueU32(0, info) + 4 * c);
2797 } else {
2798 // yzw are ignored for buffers
2799 off = fetchSrc(0, 0);
2800 sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c, 4 * c);
2801 }
2802
2803 Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c));
2804 st->cache = tgsi.getCacheMode();
2805 if (ind)
2806 st->setIndirect(0, 1, ind);
2807 }
2808 break;
2809 case TGSI_FILE_IMAGE: {
2810 assert(!code->images[r].raw);
2811
2812 getImageCoords(off, r, 0);
2813 src = off;
2814
2815 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2816 src.push_back(fetchSrc(1, c));
2817
2818 TexInstruction *st =
2819 mkTex(OP_SUSTP, getImageTarget(code, r), code->images[r].slot,
2820 0, dummy, src);
2821 st->tex.mask = tgsi.getDst(0).getMask();
2822 st->tex.format = getImageFormat(code, r);
2823 st->cache = tgsi.getCacheMode();
2824 if (ind)
2825 st->setIndirectR(ind);
2826 }
2827 break;
2828 default:
2829 assert(!"Unsupported dstFile for STORE");
2830 }
2831
2832 /* Keep this around for now as reference when adding img support
2833 getResourceCoords(off, r, 0);
2834 src = off;
2835 const int s = src.size();
2836
2837 if (isResourceRaw(code, r)) {
2838 uint8_t comp[2] = { 0, 0 };
2839 uint8_t size[2] = { 0, 0 };
2840
2841 int n = partitionLoadStore(comp, size, tgsi.getDst(0).getMask());
2842
2843 Symbol *base = getResourceBase(r);
2844
2845 const bool useSt = isResourceSpecial(r) ||
2846 (info->io.nv50styleSurfaces &&
2847 code->resources[r].target == TGSI_TEXTURE_BUFFER);
2848
2849 for (int i = 0; i < n; ++i) {
2850 if (comp[i]) // adjust x component of source address if necessary
2851 src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),
2852 off[0], mkImm(comp[i] * 4));
2853 else
2854 src[0] = off[0];
2855
2856 const DataType stTy = typeOfSize(size[i] * 4);
2857
2858 if (useSt) {
2859 Instruction *st =
2860 mkStore(OP_STORE, stTy, base, NULL, fetchSrc(1, comp[i]));
2861 for (c = 1; c < size[i]; ++c)
2862 st->setSrc(1 + c, fetchSrc(1, comp[i] + c));
2863 st->setIndirect(0, 0, src[0]);
2864 } else {
2865 // attach values to be stored
2866 src.resize(s + size[i]);
2867 for (c = 0; c < size[i]; ++c)
2868 src[s + c] = fetchSrc(1, comp[i] + c);
2869 mkTex(OP_SUSTB, getResourceTarget(code, r), code->resources[r].slot,
2870 0, dummy, src)->setType(stTy);
2871 }
2872 }
2873 } else {
2874 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2875 src.push_back(fetchSrc(1, c));
2876
2877 mkTex(OP_SUSTP, getResourceTarget(code, r), code->resources[r].slot, 0,
2878 dummy, src)->tex.mask = tgsi.getDst(0).getMask();
2879 }
2880 */
2881 }
2882
2883 // XXX: These only work on resources with the single-component u32/s32 formats.
2884 // Therefore the result is replicated. This might not be intended by TGSI, but
2885 // operating on more than 1 component would produce undefined results because
2886 // they do not exist.
2887 void
2888 Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
2889 {
2890 const int r = tgsi.getSrc(0).getIndex(0);
2891 std::vector<Value *> srcv;
2892 std::vector<Value *> defv;
2893 LValue *dst = getScratch();
2894 Value *ind = NULL;
2895
2896 if (tgsi.getSrc(0).isIndirect(0))
2897 ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
2898
2899 switch (tgsi.getSrc(0).getFile()) {
2900 case TGSI_FILE_BUFFER:
2901 case TGSI_FILE_MEMORY:
2902 for (int c = 0; c < 4; ++c) {
2903 if (!dst0[c])
2904 continue;
2905
2906 Instruction *insn;
2907 Value *off = fetchSrc(1, c);
2908 Value *sym;
2909 if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE)
2910 sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
2911 tgsi.getSrc(1).getValueU32(c, info));
2912 else
2913 sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 0);
2914 if (subOp == NV50_IR_SUBOP_ATOM_CAS)
2915 insn = mkOp3(OP_ATOM, ty, dst, sym, fetchSrc(2, c), fetchSrc(3, c));
2916 else
2917 insn = mkOp2(OP_ATOM, ty, dst, sym, fetchSrc(2, c));
2918 if (tgsi.getSrc(1).getFile() != TGSI_FILE_IMMEDIATE)
2919 insn->setIndirect(0, 0, off);
2920 if (ind)
2921 insn->setIndirect(0, 1, ind);
2922 insn->subOp = subOp;
2923 }
2924 for (int c = 0; c < 4; ++c)
2925 if (dst0[c])
2926 dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
2927 break;
2928 case TGSI_FILE_IMAGE: {
2929 assert(!code->images[r].raw);
2930
2931 getImageCoords(srcv, r, 1);
2932 defv.push_back(dst);
2933 srcv.push_back(fetchSrc(2, 0));
2934
2935 if (subOp == NV50_IR_SUBOP_ATOM_CAS)
2936 srcv.push_back(fetchSrc(3, 0));
2937
2938 TexInstruction *tex = mkTex(OP_SUREDP, getImageTarget(code, r),
2939 code->images[r].slot, 0, defv, srcv);
2940 tex->subOp = subOp;
2941 tex->tex.mask = 1;
2942 tex->tex.format = getImageFormat(code, r);
2943 tex->setType(ty);
2944 if (ind)
2945 tex->setIndirectR(ind);
2946
2947 for (int c = 0; c < 4; ++c)
2948 if (dst0[c])
2949 dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
2950 }
2951 break;
2952 default:
2953 assert(!"Unsupported srcFile for ATOM");
2954 }
2955
2956 /* Keep this around for now as reference when adding img support
2957 getResourceCoords(srcv, r, 1);
2958
2959 if (isResourceSpecial(r)) {
2960 assert(r != TGSI_RESOURCE_INPUT);
2961 Instruction *insn;
2962 insn = mkOp2(OP_ATOM, ty, dst, getResourceBase(r), fetchSrc(2, 0));
2963 insn->subOp = subOp;
2964 if (subOp == NV50_IR_SUBOP_ATOM_CAS)
2965 insn->setSrc(2, fetchSrc(3, 0));
2966 insn->setIndirect(0, 0, srcv.at(0));
2967 } else {
2968 operation op = isResourceRaw(code, r) ? OP_SUREDB : OP_SUREDP;
2969 TexTarget targ = getResourceTarget(code, r);
2970 int idx = code->resources[r].slot;
2971 defv.push_back(dst);
2972 srcv.push_back(fetchSrc(2, 0));
2973 if (subOp == NV50_IR_SUBOP_ATOM_CAS)
2974 srcv.push_back(fetchSrc(3, 0));
2975 TexInstruction *tex = mkTex(op, targ, idx, 0, defv, srcv);
2976 tex->subOp = subOp;
2977 tex->tex.mask = 1;
2978 tex->setType(ty);
2979 }
2980
2981 for (int c = 0; c < 4; ++c)
2982 if (dst0[c])
2983 dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
2984 */
2985 }
2986
2987 void
2988 Converter::handleINTERP(Value *dst[4])
2989 {
2990 // Check whether the input is linear. All other attributes ignored.
2991 Instruction *insn;
2992 Value *offset = NULL, *ptr = NULL, *w = NULL;
2993 Symbol *sym[4] = { NULL };
2994 bool linear;
2995 operation op = OP_NOP;
2996 int c, mode = 0;
2997
2998 tgsi::Instruction::SrcRegister src = tgsi.getSrc(0);
2999
3000 // In some odd cases, in large part due to varying packing, the source
3001 // might not actually be an input. This is illegal TGSI, but it's easier to
3002 // account for it here than it is to fix it where the TGSI is being
3003 // generated. In that case, it's going to be a straight up mov (or sequence
3004 // of mov's) from the input in question. We follow the mov chain to see
3005 // which input we need to use.
3006 if (src.getFile() != TGSI_FILE_INPUT) {
3007 if (src.isIndirect(0)) {
3008 ERROR("Ignoring indirect input interpolation\n");
3009 return;
3010 }
3011 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3012 Value *val = fetchSrc(0, c);
3013 assert(val->defs.size() == 1);
3014 insn = val->getInsn();
3015 while (insn->op == OP_MOV) {
3016 assert(insn->getSrc(0)->defs.size() == 1);
3017 insn = insn->getSrc(0)->getInsn();
3018 if (!insn) {
3019 ERROR("Miscompiling shader due to unhandled INTERP\n");
3020 return;
3021 }
3022 }
3023 if (insn->op != OP_LINTERP && insn->op != OP_PINTERP) {
3024 ERROR("Trying to interpolate non-input, this is not allowed.\n");
3025 return;
3026 }
3027 sym[c] = insn->getSrc(0)->asSym();
3028 assert(sym[c]);
3029 op = insn->op;
3030 mode = insn->ipa;
3031 }
3032 } else {
3033 if (src.isIndirect(0))
3034 ptr = fetchSrc(src.getIndirect(0), 0, NULL);
3035
3036 // We can assume that the fixed index will point to an input of the same
3037 // interpolation type in case of an indirect.
3038 // TODO: Make use of ArrayID.
3039 linear = info->in[src.getIndex(0)].linear;
3040 if (linear) {
3041 op = OP_LINTERP;
3042 mode = NV50_IR_INTERP_LINEAR;
3043 } else {
3044 op = OP_PINTERP;
3045 mode = NV50_IR_INTERP_PERSPECTIVE;
3046 }
3047 }
3048
3049 switch (tgsi.getOpcode()) {
3050 case TGSI_OPCODE_INTERP_CENTROID:
3051 mode |= NV50_IR_INTERP_CENTROID;
3052 break;
3053 case TGSI_OPCODE_INTERP_SAMPLE:
3054 insn = mkOp1(OP_PIXLD, TYPE_U32, (offset = getScratch()), fetchSrc(1, 0));
3055 insn->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
3056 mode |= NV50_IR_INTERP_OFFSET;
3057 break;
3058 case TGSI_OPCODE_INTERP_OFFSET: {
3059 // The input in src1.xy is float, but we need a single 32-bit value
3060 // where the upper and lower 16 bits are encoded in S0.12 format. We need
3061 // to clamp the input coordinates to (-0.5, 0.4375), multiply by 4096,
3062 // and then convert to s32.
3063 Value *offs[2];
3064 for (c = 0; c < 2; c++) {
3065 offs[c] = getScratch();
3066 mkOp2(OP_MIN, TYPE_F32, offs[c], fetchSrc(1, c), loadImm(NULL, 0.4375f));
3067 mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
3068 mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
3069 mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
3070 }
3071 offset = mkOp3v(OP_INSBF, TYPE_U32, getScratch(),
3072 offs[1], mkImm(0x1010), offs[0]);
3073 mode |= NV50_IR_INTERP_OFFSET;
3074 break;
3075 }
3076 }
3077
3078 if (op == OP_PINTERP) {
3079 if (offset) {
3080 w = mkOp2v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 3), offset);
3081 mkOp1(OP_RCP, TYPE_F32, w, w);
3082 } else {
3083 w = fragCoord[3];
3084 }
3085 }
3086
3087
3088 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3089 insn = mkOp1(op, TYPE_F32, dst[c], sym[c] ? sym[c] : srcToSym(src, c));
3090 if (op == OP_PINTERP)
3091 insn->setSrc(1, w);
3092 if (ptr)
3093 insn->setIndirect(0, 0, ptr);
3094 if (offset)
3095 insn->setSrc(op == OP_PINTERP ? 2 : 1, offset);
3096
3097 insn->setInterpolate(mode);
3098 }
3099 }
3100
3101 Converter::Subroutine *
3102 Converter::getSubroutine(unsigned ip)
3103 {
3104 std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip);
3105
3106 if (it == sub.map.end())
3107 it = sub.map.insert(std::make_pair(
3108 ip, Subroutine(new Function(prog, "SUB", ip)))).first;
3109
3110 return &it->second;
3111 }
3112
3113 Converter::Subroutine *
3114 Converter::getSubroutine(Function *f)
3115 {
3116 unsigned ip = f->getLabel();
3117 std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip);
3118
3119 if (it == sub.map.end())
3120 it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first;
3121
3122 return &it->second;
3123 }
3124
3125 bool
3126 Converter::isEndOfSubroutine(uint ip)
3127 {
3128 assert(ip < code->scan.num_instructions);
3129 tgsi::Instruction insn(&code->insns[ip]);
3130 return (insn.getOpcode() == TGSI_OPCODE_END ||
3131 insn.getOpcode() == TGSI_OPCODE_ENDSUB ||
3132 // does END occur at end of main or the very end ?
3133 insn.getOpcode() == TGSI_OPCODE_BGNSUB);
3134 }
3135
3136 bool
3137 Converter::handleInstruction(const struct tgsi_full_instruction *insn)
3138 {
3139 Instruction *geni;
3140
3141 Value *dst0[4], *rDst0[4];
3142 Value *src0, *src1, *src2, *src3;
3143 Value *val0, *val1;
3144 int c;
3145
3146 tgsi = tgsi::Instruction(insn);
3147
3148 bool useScratchDst = tgsi.checkDstSrcAliasing();
3149
3150 operation op = tgsi.getOP();
3151 dstTy = tgsi.inferDstType();
3152 srcTy = tgsi.inferSrcType();
3153
3154 unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0;
3155
3156 if (tgsi.dstCount()) {
3157 for (c = 0; c < 4; ++c) {
3158 rDst0[c] = acquireDst(0, c);
3159 dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c];
3160 }
3161 }
3162
3163 switch (tgsi.getOpcode()) {
3164 case TGSI_OPCODE_ADD:
3165 case TGSI_OPCODE_UADD:
3166 case TGSI_OPCODE_AND:
3167 case TGSI_OPCODE_DIV:
3168 case TGSI_OPCODE_IDIV:
3169 case TGSI_OPCODE_UDIV:
3170 case TGSI_OPCODE_MAX:
3171 case TGSI_OPCODE_MIN:
3172 case TGSI_OPCODE_IMAX:
3173 case TGSI_OPCODE_IMIN:
3174 case TGSI_OPCODE_UMAX:
3175 case TGSI_OPCODE_UMIN:
3176 case TGSI_OPCODE_MOD:
3177 case TGSI_OPCODE_UMOD:
3178 case TGSI_OPCODE_MUL:
3179 case TGSI_OPCODE_UMUL:
3180 case TGSI_OPCODE_IMUL_HI:
3181 case TGSI_OPCODE_UMUL_HI:
3182 case TGSI_OPCODE_OR:
3183 case TGSI_OPCODE_SHL:
3184 case TGSI_OPCODE_ISHR:
3185 case TGSI_OPCODE_USHR:
3186 case TGSI_OPCODE_XOR:
3187 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3188 src0 = fetchSrc(0, c);
3189 src1 = fetchSrc(1, c);
3190 geni = mkOp2(op, dstTy, dst0[c], src0, src1);
3191 geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
3192 if (op == OP_MUL && dstTy == TYPE_F32)
3193 geni->dnz = info->io.mul_zero_wins;
3194 geni->precise = insn->Instruction.Precise;
3195 }
3196 break;
3197 case TGSI_OPCODE_MAD:
3198 case TGSI_OPCODE_UMAD:
3199 case TGSI_OPCODE_SAD:
3200 case TGSI_OPCODE_FMA:
3201 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3202 src0 = fetchSrc(0, c);
3203 src1 = fetchSrc(1, c);
3204 src2 = fetchSrc(2, c);
3205 geni = mkOp3(op, dstTy, dst0[c], src0, src1, src2);
3206 if (dstTy == TYPE_F32)
3207 geni->dnz = info->io.mul_zero_wins;
3208 geni->precise = insn->Instruction.Precise;
3209 }
3210 break;
3211 case TGSI_OPCODE_MOV:
3212 case TGSI_OPCODE_CEIL:
3213 case TGSI_OPCODE_FLR:
3214 case TGSI_OPCODE_TRUNC:
3215 case TGSI_OPCODE_RCP:
3216 case TGSI_OPCODE_SQRT:
3217 case TGSI_OPCODE_IABS:
3218 case TGSI_OPCODE_INEG:
3219 case TGSI_OPCODE_NOT:
3220 case TGSI_OPCODE_DDX:
3221 case TGSI_OPCODE_DDY:
3222 case TGSI_OPCODE_DDX_FINE:
3223 case TGSI_OPCODE_DDY_FINE:
3224 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3225 mkOp1(op, dstTy, dst0[c], fetchSrc(0, c));
3226 break;
3227 case TGSI_OPCODE_RSQ:
3228 src0 = fetchSrc(0, 0);
3229 val0 = getScratch();
3230 mkOp1(OP_ABS, TYPE_F32, val0, src0);
3231 mkOp1(OP_RSQ, TYPE_F32, val0, val0);
3232 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3233 mkMov(dst0[c], val0);
3234 break;
3235 case TGSI_OPCODE_ARL:
3236 case TGSI_OPCODE_ARR:
3237 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3238 const RoundMode rnd =
3239 tgsi.getOpcode() == TGSI_OPCODE_ARR ? ROUND_N : ROUND_M;
3240 src0 = fetchSrc(0, c);
3241 mkCvt(OP_CVT, TYPE_S32, dst0[c], TYPE_F32, src0)->rnd = rnd;
3242 }
3243 break;
3244 case TGSI_OPCODE_UARL:
3245 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3246 mkOp1(OP_MOV, TYPE_U32, dst0[c], fetchSrc(0, c));
3247 break;
3248 case TGSI_OPCODE_POW:
3249 val0 = mkOp2v(op, TYPE_F32, getScratch(), fetchSrc(0, 0), fetchSrc(1, 0));
3250 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3251 mkOp1(OP_MOV, TYPE_F32, dst0[c], val0);
3252 break;
3253 case TGSI_OPCODE_EX2:
3254 case TGSI_OPCODE_LG2:
3255 val0 = mkOp1(op, TYPE_F32, getScratch(), fetchSrc(0, 0))->getDef(0);
3256 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3257 mkOp1(OP_MOV, TYPE_F32, dst0[c], val0);
3258 break;
3259 case TGSI_OPCODE_COS:
3260 case TGSI_OPCODE_SIN:
3261 val0 = getScratch();
3262 if (mask & 7) {
3263 mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 0));
3264 mkOp1(op, TYPE_F32, val0, val0);
3265 for (c = 0; c < 3; ++c)
3266 if (dst0[c])
3267 mkMov(dst0[c], val0);
3268 }
3269 if (dst0[3]) {
3270 mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 3));
3271 mkOp1(op, TYPE_F32, dst0[3], val0);
3272 }
3273 break;
3274 case TGSI_OPCODE_SCS:
3275 if (mask & 3) {
3276 val0 = mkOp1v(OP_PRESIN, TYPE_F32, getSSA(), fetchSrc(0, 0));
3277 if (dst0[0])
3278 mkOp1(OP_COS, TYPE_F32, dst0[0], val0);
3279 if (dst0[1])
3280 mkOp1(OP_SIN, TYPE_F32, dst0[1], val0);
3281 }
3282 if (dst0[2])
3283 loadImm(dst0[2], 0.0f);
3284 if (dst0[3])
3285 loadImm(dst0[3], 1.0f);
3286 break;
3287 case TGSI_OPCODE_EXP:
3288 src0 = fetchSrc(0, 0);
3289 val0 = mkOp1v(OP_FLOOR, TYPE_F32, getSSA(), src0);
3290 if (dst0[1])
3291 mkOp2(OP_SUB, TYPE_F32, dst0[1], src0, val0);
3292 if (dst0[0])
3293 mkOp1(OP_EX2, TYPE_F32, dst0[0], val0);
3294 if (dst0[2])
3295 mkOp1(OP_EX2, TYPE_F32, dst0[2], src0);
3296 if (dst0[3])
3297 loadImm(dst0[3], 1.0f);
3298 break;
3299 case TGSI_OPCODE_LOG:
3300 src0 = mkOp1v(OP_ABS, TYPE_F32, getSSA(), fetchSrc(0, 0));
3301 val0 = mkOp1v(OP_LG2, TYPE_F32, dst0[2] ? dst0[2] : getSSA(), src0);
3302 if (dst0[0] || dst0[1])
3303 val1 = mkOp1v(OP_FLOOR, TYPE_F32, dst0[0] ? dst0[0] : getSSA(), val0);
3304 if (dst0[1]) {
3305 mkOp1(OP_EX2, TYPE_F32, dst0[1], val1);
3306 mkOp1(OP_RCP, TYPE_F32, dst0[1], dst0[1]);
3307 mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0)
3308 ->dnz = info->io.mul_zero_wins;
3309 }
3310 if (dst0[3])
3311 loadImm(dst0[3], 1.0f);
3312 break;
3313 case TGSI_OPCODE_DP2:
3314 val0 = buildDot(2);
3315 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3316 mkMov(dst0[c], val0);
3317 break;
3318 case TGSI_OPCODE_DP3:
3319 val0 = buildDot(3);
3320 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3321 mkMov(dst0[c], val0);
3322 break;
3323 case TGSI_OPCODE_DP4:
3324 val0 = buildDot(4);
3325 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3326 mkMov(dst0[c], val0);
3327 break;
3328 case TGSI_OPCODE_DPH:
3329 val0 = buildDot(3);
3330 src1 = fetchSrc(1, 3);
3331 mkOp2(OP_ADD, TYPE_F32, val0, val0, src1);
3332 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3333 mkMov(dst0[c], val0);
3334 break;
3335 case TGSI_OPCODE_DST:
3336 if (dst0[0])
3337 loadImm(dst0[0], 1.0f);
3338 if (dst0[1]) {
3339 src0 = fetchSrc(0, 1);
3340 src1 = fetchSrc(1, 1);
3341 mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1)
3342 ->dnz = info->io.mul_zero_wins;
3343 }
3344 if (dst0[2])
3345 mkMov(dst0[2], fetchSrc(0, 2));
3346 if (dst0[3])
3347 mkMov(dst0[3], fetchSrc(1, 3));
3348 break;
3349 case TGSI_OPCODE_LRP:
3350 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3351 src0 = fetchSrc(0, c);
3352 src1 = fetchSrc(1, c);
3353 src2 = fetchSrc(2, c);
3354 mkOp3(OP_MAD, TYPE_F32, dst0[c],
3355 mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2)
3356 ->dnz = info->io.mul_zero_wins;
3357 }
3358 break;
3359 case TGSI_OPCODE_LIT:
3360 handleLIT(dst0);
3361 break;
3362 case TGSI_OPCODE_XPD:
3363 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3364 if (c < 3) {
3365 val0 = getSSA();
3366 src0 = fetchSrc(1, (c + 1) % 3);
3367 src1 = fetchSrc(0, (c + 2) % 3);
3368 mkOp2(OP_MUL, TYPE_F32, val0, src0, src1)
3369 ->dnz = info->io.mul_zero_wins;
3370 mkOp1(OP_NEG, TYPE_F32, val0, val0);
3371
3372 src0 = fetchSrc(0, (c + 1) % 3);
3373 src1 = fetchSrc(1, (c + 2) % 3);
3374 mkOp3(OP_MAD, TYPE_F32, dst0[c], src0, src1, val0)
3375 ->dnz = info->io.mul_zero_wins;
3376 } else {
3377 loadImm(dst0[c], 1.0f);
3378 }
3379 }
3380 break;
3381 case TGSI_OPCODE_ISSG:
3382 case TGSI_OPCODE_SSG:
3383 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3384 src0 = fetchSrc(0, c);
3385 val0 = getScratch();
3386 val1 = getScratch();
3387 mkCmp(OP_SET, CC_GT, srcTy, val0, srcTy, src0, zero);
3388 mkCmp(OP_SET, CC_LT, srcTy, val1, srcTy, src0, zero);
3389 if (srcTy == TYPE_F32)
3390 mkOp2(OP_SUB, TYPE_F32, dst0[c], val0, val1);
3391 else
3392 mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0);
3393 }
3394 break;
3395 case TGSI_OPCODE_UCMP:
3396 srcTy = TYPE_U32;
3397 /* fallthrough */
3398 case TGSI_OPCODE_CMP:
3399 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3400 src0 = fetchSrc(0, c);
3401 src1 = fetchSrc(1, c);
3402 src2 = fetchSrc(2, c);
3403 if (src1 == src2)
3404 mkMov(dst0[c], src1);
3405 else
3406 mkCmp(OP_SLCT, (srcTy == TYPE_F32) ? CC_LT : CC_NE,
3407 srcTy, dst0[c], srcTy, src1, src2, src0);
3408 }
3409 break;
3410 case TGSI_OPCODE_FRC:
3411 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3412 src0 = fetchSrc(0, c);
3413 val0 = getScratch();
3414 mkOp1(OP_FLOOR, TYPE_F32, val0, src0);
3415 mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0);
3416 }
3417 break;
3418 case TGSI_OPCODE_ROUND:
3419 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3420 mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F32, fetchSrc(0, c))
3421 ->rnd = ROUND_NI;
3422 break;
3423 case TGSI_OPCODE_SLT:
3424 case TGSI_OPCODE_SGE:
3425 case TGSI_OPCODE_SEQ:
3426 case TGSI_OPCODE_SGT:
3427 case TGSI_OPCODE_SLE:
3428 case TGSI_OPCODE_SNE:
3429 case TGSI_OPCODE_FSEQ:
3430 case TGSI_OPCODE_FSGE:
3431 case TGSI_OPCODE_FSLT:
3432 case TGSI_OPCODE_FSNE:
3433 case TGSI_OPCODE_ISGE:
3434 case TGSI_OPCODE_ISLT:
3435 case TGSI_OPCODE_USEQ:
3436 case TGSI_OPCODE_USGE:
3437 case TGSI_OPCODE_USLT:
3438 case TGSI_OPCODE_USNE:
3439 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3440 src0 = fetchSrc(0, c);
3441 src1 = fetchSrc(1, c);
3442 mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1);
3443 }
3444 break;
3445 case TGSI_OPCODE_VOTE_ALL:
3446 case TGSI_OPCODE_VOTE_ANY:
3447 case TGSI_OPCODE_VOTE_EQ:
3448 val0 = new_LValue(func, FILE_PREDICATE);
3449 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3450 mkCmp(OP_SET, CC_NE, TYPE_U32, val0, TYPE_U32, fetchSrc(0, c), zero);
3451 mkOp1(op, dstTy, val0, val0)
3452 ->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
3453 mkCvt(OP_CVT, TYPE_U32, dst0[c], TYPE_U8, val0);
3454 }
3455 break;
3456 case TGSI_OPCODE_BALLOT:
3457 if (!tgsi.getDst(0).isMasked(0)) {
3458 val0 = new_LValue(func, FILE_PREDICATE);
3459 mkCmp(OP_SET, CC_NE, TYPE_U32, val0, TYPE_U32, fetchSrc(0, 0), zero);
3460 mkOp1(op, TYPE_U32, dst0[0], val0)->subOp = NV50_IR_SUBOP_VOTE_ANY;
3461 }
3462 if (!tgsi.getDst(0).isMasked(1))
3463 mkMov(dst0[1], zero, TYPE_U32);
3464 break;
3465 case TGSI_OPCODE_READ_FIRST:
3466 // ReadFirstInvocationARB(src) is implemented as
3467 // ReadInvocationARB(src, findLSB(ballot(true)))
3468 val0 = getScratch();
3469 mkOp1(OP_VOTE, TYPE_U32, val0, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
3470 mkOp2(OP_EXTBF, TYPE_U32, val0, val0, mkImm(0x2000))
3471 ->subOp = NV50_IR_SUBOP_EXTBF_REV;
3472 mkOp1(OP_BFIND, TYPE_U32, val0, val0)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
3473 src1 = val0;
3474 /* fallthrough */
3475 case TGSI_OPCODE_READ_INVOC:
3476 if (tgsi.getOpcode() == TGSI_OPCODE_READ_INVOC)
3477 src1 = fetchSrc(1, 0);
3478 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3479 geni = mkOp3(op, dstTy, dst0[c], fetchSrc(0, c), src1, mkImm(0x1f));
3480 geni->subOp = NV50_IR_SUBOP_SHFL_IDX;
3481 }
3482 break;
3483 case TGSI_OPCODE_CLOCK:
3484 // Stick the 32-bit clock into the high dword of the logical result.
3485 if (!tgsi.getDst(0).isMasked(0))
3486 mkOp1(OP_MOV, TYPE_U32, dst0[0], zero);
3487 if (!tgsi.getDst(0).isMasked(1))
3488 mkOp1(OP_RDSV, TYPE_U32, dst0[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
3489 break;
3490 case TGSI_OPCODE_KILL_IF:
3491 val0 = new_LValue(func, FILE_PREDICATE);
3492 mask = 0;
3493 for (c = 0; c < 4; ++c) {
3494 const int s = tgsi.getSrc(0).getSwizzle(c);
3495 if (mask & (1 << s))
3496 continue;
3497 mask |= 1 << s;
3498 mkCmp(OP_SET, CC_LT, TYPE_F32, val0, TYPE_F32, fetchSrc(0, c), zero);
3499 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, val0);
3500 }
3501 break;
3502 case TGSI_OPCODE_KILL:
3503 mkOp(OP_DISCARD, TYPE_NONE, NULL);
3504 break;
3505 case TGSI_OPCODE_TEX:
3506 case TGSI_OPCODE_TEX_LZ:
3507 case TGSI_OPCODE_TXB:
3508 case TGSI_OPCODE_TXL:
3509 case TGSI_OPCODE_TXP:
3510 case TGSI_OPCODE_LODQ:
3511 // R S L C Dx Dy
3512 handleTEX(dst0, 1, 1, 0x03, 0x0f, 0x00, 0x00);
3513 break;
3514 case TGSI_OPCODE_TXD:
3515 handleTEX(dst0, 3, 3, 0x03, 0x0f, 0x10, 0x20);
3516 break;
3517 case TGSI_OPCODE_TG4:
3518 handleTEX(dst0, 2, 2, 0x03, 0x0f, 0x00, 0x00);
3519 break;
3520 case TGSI_OPCODE_TEX2:
3521 handleTEX(dst0, 2, 2, 0x03, 0x10, 0x00, 0x00);
3522 break;
3523 case TGSI_OPCODE_TXB2:
3524 case TGSI_OPCODE_TXL2:
3525 handleTEX(dst0, 2, 2, 0x10, 0x0f, 0x00, 0x00);
3526 break;
3527 case TGSI_OPCODE_SAMPLE:
3528 case TGSI_OPCODE_SAMPLE_B:
3529 case TGSI_OPCODE_SAMPLE_D:
3530 case TGSI_OPCODE_SAMPLE_L:
3531 case TGSI_OPCODE_SAMPLE_C:
3532 case TGSI_OPCODE_SAMPLE_C_LZ:
3533 handleTEX(dst0, 1, 2, 0x30, 0x30, 0x30, 0x40);
3534 break;
3535 case TGSI_OPCODE_TXF_LZ:
3536 case TGSI_OPCODE_TXF:
3537 handleTXF(dst0, 1, 0x03);
3538 break;
3539 case TGSI_OPCODE_SAMPLE_I:
3540 handleTXF(dst0, 1, 0x03);
3541 break;
3542 case TGSI_OPCODE_SAMPLE_I_MS:
3543 handleTXF(dst0, 1, 0x20);
3544 break;
3545 case TGSI_OPCODE_TXQ:
3546 case TGSI_OPCODE_SVIEWINFO:
3547 handleTXQ(dst0, TXQ_DIMS, 1);
3548 break;
3549 case TGSI_OPCODE_TXQS:
3550 // The TXQ_TYPE query returns samples in its 3rd arg, but we need it to
3551 // be in .x
3552 dst0[1] = dst0[2] = dst0[3] = NULL;
3553 std::swap(dst0[0], dst0[2]);
3554 handleTXQ(dst0, TXQ_TYPE, 0);
3555 std::swap(dst0[0], dst0[2]);
3556 break;
3557 case TGSI_OPCODE_FBFETCH:
3558 handleFBFETCH(dst0);
3559 break;
3560 case TGSI_OPCODE_F2I:
3561 case TGSI_OPCODE_F2U:
3562 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3563 mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c))->rnd = ROUND_Z;
3564 break;
3565 case TGSI_OPCODE_I2F:
3566 case TGSI_OPCODE_U2F:
3567 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3568 mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c));
3569 break;
3570 case TGSI_OPCODE_PK2H:
3571 val0 = getScratch();
3572 val1 = getScratch();
3573 mkCvt(OP_CVT, TYPE_F16, val0, TYPE_F32, fetchSrc(0, 0));
3574 mkCvt(OP_CVT, TYPE_F16, val1, TYPE_F32, fetchSrc(0, 1));
3575 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3576 mkOp3(OP_INSBF, TYPE_U32, dst0[c], val1, mkImm(0x1010), val0);
3577 break;
3578 case TGSI_OPCODE_UP2H:
3579 src0 = fetchSrc(0, 0);
3580 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3581 geni = mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F16, src0);
3582 geni->subOp = c & 1;
3583 }
3584 break;
3585 case TGSI_OPCODE_EMIT:
3586 /* export the saved viewport index */
3587 if (viewport != NULL) {
3588 Symbol *vpSym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_U32,
3589 info->out[info->io.viewportId].slot[0] * 4);
3590 mkStore(OP_EXPORT, TYPE_U32, vpSym, NULL, viewport);
3591 }
3592 /* fallthrough */
3593 case TGSI_OPCODE_ENDPRIM:
3594 {
3595 // get vertex stream (must be immediate)
3596 unsigned int stream = tgsi.getSrc(0).getValueU32(0, info);
3597 if (stream && op == OP_RESTART)
3598 break;
3599 if (info->prop.gp.maxVertices == 0)
3600 break;
3601 src0 = mkImm(stream);
3602 mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1;
3603 break;
3604 }
3605 case TGSI_OPCODE_IF:
3606 case TGSI_OPCODE_UIF:
3607 {
3608 BasicBlock *ifBB = new BasicBlock(func);
3609
3610 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
3611 condBBs.push(bb);
3612 joinBBs.push(bb);
3613
3614 mkFlow(OP_BRA, NULL, CC_NOT_P, fetchSrc(0, 0))->setType(srcTy);
3615
3616 setPosition(ifBB, true);
3617 }
3618 break;
3619 case TGSI_OPCODE_ELSE:
3620 {
3621 BasicBlock *elseBB = new BasicBlock(func);
3622 BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
3623
3624 forkBB->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
3625 condBBs.push(bb);
3626
3627 forkBB->getExit()->asFlow()->target.bb = elseBB;
3628 if (!bb->isTerminated())
3629 mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL);
3630
3631 setPosition(elseBB, true);
3632 }
3633 break;
3634 case TGSI_OPCODE_ENDIF:
3635 {
3636 BasicBlock *convBB = new BasicBlock(func);
3637 BasicBlock *prevBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
3638 BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(joinBBs.pop().u.p);
3639
3640 if (!bb->isTerminated()) {
3641 // we only want join if none of the clauses ended with CONT/BREAK/RET
3642 if (prevBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6)
3643 insertConvergenceOps(convBB, forkBB);
3644 mkFlow(OP_BRA, convBB, CC_ALWAYS, NULL);
3645 bb->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD);
3646 }
3647
3648 if (prevBB->getExit()->op == OP_BRA) {
3649 prevBB->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD);
3650 prevBB->getExit()->asFlow()->target.bb = convBB;
3651 }
3652 setPosition(convBB, true);
3653 }
3654 break;
3655 case TGSI_OPCODE_BGNLOOP:
3656 {
3657 BasicBlock *lbgnBB = new BasicBlock(func);
3658 BasicBlock *lbrkBB = new BasicBlock(func);
3659
3660 loopBBs.push(lbgnBB);
3661 breakBBs.push(lbrkBB);
3662 if (loopBBs.getSize() > func->loopNestingBound)
3663 func->loopNestingBound++;
3664
3665 mkFlow(OP_PREBREAK, lbrkBB, CC_ALWAYS, NULL);
3666
3667 bb->cfg.attach(&lbgnBB->cfg, Graph::Edge::TREE);
3668 setPosition(lbgnBB, true);
3669 mkFlow(OP_PRECONT, lbgnBB, CC_ALWAYS, NULL);
3670 }
3671 break;
3672 case TGSI_OPCODE_ENDLOOP:
3673 {
3674 BasicBlock *loopBB = reinterpret_cast<BasicBlock *>(loopBBs.pop().u.p);
3675
3676 if (!bb->isTerminated()) {
3677 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
3678 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
3679 }
3680 setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true);
3681
3682 // If the loop never breaks (e.g. only has RET's inside), then there
3683 // will be no way to get to the break bb. However BGNLOOP will have
3684 // already made a PREBREAK to it, so it must be in the CFG.
3685 if (getBB()->cfg.incidentCount() == 0)
3686 loopBB->cfg.attach(&getBB()->cfg, Graph::Edge::TREE);
3687 }
3688 break;
3689 case TGSI_OPCODE_BRK:
3690 {
3691 if (bb->isTerminated())
3692 break;
3693 BasicBlock *brkBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p);
3694 mkFlow(OP_BREAK, brkBB, CC_ALWAYS, NULL);
3695 bb->cfg.attach(&brkBB->cfg, Graph::Edge::CROSS);
3696 }
3697 break;
3698 case TGSI_OPCODE_CONT:
3699 {
3700 if (bb->isTerminated())
3701 break;
3702 BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p);
3703 mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL);
3704 contBB->explicitCont = true;
3705 bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK);
3706 }
3707 break;
3708 case TGSI_OPCODE_BGNSUB:
3709 {
3710 Subroutine *s = getSubroutine(ip);
3711 BasicBlock *entry = new BasicBlock(s->f);
3712 BasicBlock *leave = new BasicBlock(s->f);
3713
3714 // multiple entrypoints possible, keep the graph connected
3715 if (prog->getType() == Program::TYPE_COMPUTE)
3716 prog->main->call.attach(&s->f->call, Graph::Edge::TREE);
3717
3718 sub.cur = s;
3719 s->f->setEntry(entry);
3720 s->f->setExit(leave);
3721 setPosition(entry, true);
3722 return true;
3723 }
3724 case TGSI_OPCODE_ENDSUB:
3725 {
3726 sub.cur = getSubroutine(prog->main);
3727 setPosition(BasicBlock::get(sub.cur->f->cfg.getRoot()), true);
3728 return true;
3729 }
3730 case TGSI_OPCODE_CAL:
3731 {
3732 Subroutine *s = getSubroutine(tgsi.getLabel());
3733 mkFlow(OP_CALL, s->f, CC_ALWAYS, NULL);
3734 func->call.attach(&s->f->call, Graph::Edge::TREE);
3735 return true;
3736 }
3737 case TGSI_OPCODE_RET:
3738 {
3739 if (bb->isTerminated())
3740 return true;
3741 BasicBlock *leave = BasicBlock::get(func->cfgExit);
3742
3743 if (!isEndOfSubroutine(ip + 1)) {
3744 // insert a PRERET at the entry if this is an early return
3745 // (only needed for sharing code in the epilogue)
3746 BasicBlock *root = BasicBlock::get(func->cfg.getRoot());
3747 if (root->getEntry() == NULL || root->getEntry()->op != OP_PRERET) {
3748 BasicBlock *pos = getBB();
3749 setPosition(root, false);
3750 mkFlow(OP_PRERET, leave, CC_ALWAYS, NULL)->fixed = 1;
3751 setPosition(pos, true);
3752 }
3753 }
3754 mkFlow(OP_RET, NULL, CC_ALWAYS, NULL)->fixed = 1;
3755 bb->cfg.attach(&leave->cfg, Graph::Edge::CROSS);
3756 }
3757 break;
3758 case TGSI_OPCODE_END:
3759 {
3760 // attach and generate epilogue code
3761 BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
3762 bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE);
3763 setPosition(epilogue, true);
3764 if (prog->getType() == Program::TYPE_FRAGMENT)
3765 exportOutputs();
3766 if (info->io.genUserClip > 0)
3767 handleUserClipPlanes();
3768 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
3769 }
3770 break;
3771 case TGSI_OPCODE_SWITCH:
3772 case TGSI_OPCODE_CASE:
3773 ERROR("switch/case opcode encountered, should have been lowered\n");
3774 abort();
3775 break;
3776 case TGSI_OPCODE_LOAD:
3777 handleLOAD(dst0);
3778 break;
3779 case TGSI_OPCODE_STORE:
3780 handleSTORE();
3781 break;
3782 case TGSI_OPCODE_BARRIER:
3783 geni = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
3784 geni->fixed = 1;
3785 geni->subOp = NV50_IR_SUBOP_BAR_SYNC;
3786 break;
3787 case TGSI_OPCODE_MFENCE:
3788 case TGSI_OPCODE_LFENCE:
3789 case TGSI_OPCODE_SFENCE:
3790 geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
3791 geni->fixed = 1;
3792 geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
3793 break;
3794 case TGSI_OPCODE_MEMBAR:
3795 {
3796 uint32_t level = tgsi.getSrc(0).getValueU32(0, info);
3797 geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
3798 geni->fixed = 1;
3799 if (!(level & ~(TGSI_MEMBAR_THREAD_GROUP | TGSI_MEMBAR_SHARED)))
3800 geni->subOp = NV50_IR_SUBOP_MEMBAR(M, CTA);
3801 else
3802 geni->subOp = NV50_IR_SUBOP_MEMBAR(M, GL);
3803 }
3804 break;
3805 case TGSI_OPCODE_ATOMUADD:
3806 case TGSI_OPCODE_ATOMXCHG:
3807 case TGSI_OPCODE_ATOMCAS:
3808 case TGSI_OPCODE_ATOMAND:
3809 case TGSI_OPCODE_ATOMOR:
3810 case TGSI_OPCODE_ATOMXOR:
3811 case TGSI_OPCODE_ATOMUMIN:
3812 case TGSI_OPCODE_ATOMIMIN:
3813 case TGSI_OPCODE_ATOMUMAX:
3814 case TGSI_OPCODE_ATOMIMAX:
3815 handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode()));
3816 break;
3817 case TGSI_OPCODE_RESQ:
3818 if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
3819 Value *ind = NULL;
3820 if (tgsi.getSrc(0).isIndirect(0))
3821 ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
3822 geni = mkOp1(OP_BUFQ, TYPE_U32, dst0[0],
3823 makeSym(tgsi.getSrc(0).getFile(),
3824 tgsi.getSrc(0).getIndex(0), -1, 0, 0));
3825 if (ind)
3826 geni->setIndirect(0, 1, ind);
3827 } else {
3828 assert(tgsi.getSrc(0).getFile() == TGSI_FILE_IMAGE);
3829
3830 TexInstruction *texi = new_TexInstruction(func, OP_SUQ);
3831 for (int c = 0, d = 0; c < 4; ++c) {
3832 if (dst0[c]) {
3833 texi->setDef(d++, dst0[c]);
3834 texi->tex.mask |= 1 << c;
3835 }
3836 }
3837 texi->tex.r = tgsi.getSrc(0).getIndex(0);
3838 texi->tex.target = getImageTarget(code, texi->tex.r);
3839
3840 if (tgsi.getSrc(0).isIndirect(0))
3841 texi->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL));
3842
3843 bb->insertTail(texi);
3844 }
3845 break;
3846 case TGSI_OPCODE_IBFE:
3847 case TGSI_OPCODE_UBFE:
3848 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3849 src0 = fetchSrc(0, c);
3850 val0 = getScratch();
3851 if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE &&
3852 tgsi.getSrc(2).getFile() == TGSI_FILE_IMMEDIATE) {
3853 loadImm(val0, (tgsi.getSrc(2).getValueU32(c, info) << 8) |
3854 tgsi.getSrc(1).getValueU32(c, info));
3855 } else {
3856 src1 = fetchSrc(1, c);
3857 src2 = fetchSrc(2, c);
3858 mkOp3(OP_INSBF, TYPE_U32, val0, src2, mkImm(0x808), src1);
3859 }
3860 mkOp2(OP_EXTBF, dstTy, dst0[c], src0, val0);
3861 }
3862 break;
3863 case TGSI_OPCODE_BFI:
3864 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3865 src0 = fetchSrc(0, c);
3866 src1 = fetchSrc(1, c);
3867 src2 = fetchSrc(2, c);
3868 src3 = fetchSrc(3, c);
3869 val0 = getScratch();
3870 mkOp3(OP_INSBF, TYPE_U32, val0, src3, mkImm(0x808), src2);
3871 mkOp3(OP_INSBF, TYPE_U32, dst0[c], src1, val0, src0);
3872 }
3873 break;
3874 case TGSI_OPCODE_LSB:
3875 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3876 src0 = fetchSrc(0, c);
3877 val0 = getScratch();
3878 geni = mkOp2(OP_EXTBF, TYPE_U32, val0, src0, mkImm(0x2000));
3879 geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
3880 geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], val0);
3881 geni->subOp = NV50_IR_SUBOP_BFIND_SAMT;
3882 }
3883 break;
3884 case TGSI_OPCODE_IMSB:
3885 case TGSI_OPCODE_UMSB:
3886 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3887 src0 = fetchSrc(0, c);
3888 mkOp1(OP_BFIND, srcTy, dst0[c], src0);
3889 }
3890 break;
3891 case TGSI_OPCODE_BREV:
3892 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3893 src0 = fetchSrc(0, c);
3894 geni = mkOp2(OP_EXTBF, TYPE_U32, dst0[c], src0, mkImm(0x2000));
3895 geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
3896 }
3897 break;
3898 case TGSI_OPCODE_POPC:
3899 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3900 src0 = fetchSrc(0, c);
3901 mkOp2(OP_POPCNT, TYPE_U32, dst0[c], src0, src0);
3902 }
3903 break;
3904 case TGSI_OPCODE_INTERP_CENTROID:
3905 case TGSI_OPCODE_INTERP_SAMPLE:
3906 case TGSI_OPCODE_INTERP_OFFSET:
3907 handleINTERP(dst0);
3908 break;
3909 case TGSI_OPCODE_I642F:
3910 case TGSI_OPCODE_U642F:
3911 case TGSI_OPCODE_D2I:
3912 case TGSI_OPCODE_D2U:
3913 case TGSI_OPCODE_D2F: {
3914 int pos = 0;
3915 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3916 Value *dreg = getSSA(8);
3917 src0 = fetchSrc(0, pos);
3918 src1 = fetchSrc(0, pos + 1);
3919 mkOp2(OP_MERGE, TYPE_U64, dreg, src0, src1);
3920 Instruction *cvt = mkCvt(OP_CVT, dstTy, dst0[c], srcTy, dreg);
3921 if (!isFloatType(dstTy))
3922 cvt->rnd = ROUND_Z;
3923 pos += 2;
3924 }
3925 break;
3926 }
3927 case TGSI_OPCODE_I2I64:
3928 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3929 dst0[c] = fetchSrc(0, c / 2);
3930 mkOp2(OP_SHR, TYPE_S32, dst0[c + 1], dst0[c], loadImm(NULL, 31));
3931 c++;
3932 }
3933 break;
3934 case TGSI_OPCODE_U2I64:
3935 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3936 dst0[c] = fetchSrc(0, c / 2);
3937 dst0[c + 1] = zero;
3938 c++;
3939 }
3940 break;
3941 case TGSI_OPCODE_F2I64:
3942 case TGSI_OPCODE_F2U64:
3943 case TGSI_OPCODE_I2D:
3944 case TGSI_OPCODE_U2D:
3945 case TGSI_OPCODE_F2D:
3946 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3947 Value *dreg = getSSA(8);
3948 Instruction *cvt = mkCvt(OP_CVT, dstTy, dreg, srcTy, fetchSrc(0, c / 2));
3949 if (!isFloatType(dstTy))
3950 cvt->rnd = ROUND_Z;
3951 mkSplit(&dst0[c], 4, dreg);
3952 c++;
3953 }
3954 break;
3955 case TGSI_OPCODE_D2I64:
3956 case TGSI_OPCODE_D2U64:
3957 case TGSI_OPCODE_I642D:
3958 case TGSI_OPCODE_U642D:
3959 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3960 src0 = getSSA(8);
3961 Value *dst = getSSA(8), *tmp[2];
3962 tmp[0] = fetchSrc(0, c);
3963 tmp[1] = fetchSrc(0, c + 1);
3964 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
3965 Instruction *cvt = mkCvt(OP_CVT, dstTy, dst, srcTy, src0);
3966 if (!isFloatType(dstTy))
3967 cvt->rnd = ROUND_Z;
3968 mkSplit(&dst0[c], 4, dst);
3969 c++;
3970 }
3971 break;
3972 case TGSI_OPCODE_I64NEG:
3973 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3974 src0 = getSSA(8);
3975 Value *dst = getSSA(8), *tmp[2];
3976 tmp[0] = fetchSrc(0, c);
3977 tmp[1] = fetchSrc(0, c + 1);
3978 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
3979 mkOp2(OP_SUB, dstTy, dst, zero, src0);
3980 mkSplit(&dst0[c], 4, dst);
3981 c++;
3982 }
3983 break;
3984 case TGSI_OPCODE_I64ABS:
3985 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3986 src0 = getSSA(8);
3987 Value *neg = getSSA(8), *srcComp[2], *negComp[2];
3988 srcComp[0] = fetchSrc(0, c);
3989 srcComp[1] = fetchSrc(0, c + 1);
3990 mkOp2(OP_MERGE, TYPE_U64, src0, srcComp[0], srcComp[1]);
3991 mkOp2(OP_SUB, dstTy, neg, zero, src0);
3992 mkSplit(negComp, 4, neg);
3993 mkCmp(OP_SLCT, CC_LT, TYPE_S32, dst0[c], TYPE_S32,
3994 negComp[0], srcComp[0], srcComp[1]);
3995 mkCmp(OP_SLCT, CC_LT, TYPE_S32, dst0[c + 1], TYPE_S32,
3996 negComp[1], srcComp[1], srcComp[1]);
3997 c++;
3998 }
3999 break;
4000 case TGSI_OPCODE_DABS:
4001 case TGSI_OPCODE_DNEG:
4002 case TGSI_OPCODE_DRCP:
4003 case TGSI_OPCODE_DSQRT:
4004 case TGSI_OPCODE_DRSQ:
4005 case TGSI_OPCODE_DTRUNC:
4006 case TGSI_OPCODE_DCEIL:
4007 case TGSI_OPCODE_DFLR:
4008 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4009 src0 = getSSA(8);
4010 Value *dst = getSSA(8), *tmp[2];
4011 tmp[0] = fetchSrc(0, c);
4012 tmp[1] = fetchSrc(0, c + 1);
4013 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4014 mkOp1(op, dstTy, dst, src0);
4015 mkSplit(&dst0[c], 4, dst);
4016 c++;
4017 }
4018 break;
4019 case TGSI_OPCODE_DFRAC:
4020 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4021 src0 = getSSA(8);
4022 Value *dst = getSSA(8), *tmp[2];
4023 tmp[0] = fetchSrc(0, c);
4024 tmp[1] = fetchSrc(0, c + 1);
4025 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4026 mkOp1(OP_FLOOR, TYPE_F64, dst, src0);
4027 mkOp2(OP_SUB, TYPE_F64, dst, src0, dst);
4028 mkSplit(&dst0[c], 4, dst);
4029 c++;
4030 }
4031 break;
4032 case TGSI_OPCODE_U64SEQ:
4033 case TGSI_OPCODE_U64SNE:
4034 case TGSI_OPCODE_U64SLT:
4035 case TGSI_OPCODE_U64SGE:
4036 case TGSI_OPCODE_I64SLT:
4037 case TGSI_OPCODE_I64SGE:
4038 case TGSI_OPCODE_DSLT:
4039 case TGSI_OPCODE_DSGE:
4040 case TGSI_OPCODE_DSEQ:
4041 case TGSI_OPCODE_DSNE: {
4042 int pos = 0;
4043 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4044 Value *tmp[2];
4045
4046 src0 = getSSA(8);
4047 src1 = getSSA(8);
4048 tmp[0] = fetchSrc(0, pos);
4049 tmp[1] = fetchSrc(0, pos + 1);
4050 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4051 tmp[0] = fetchSrc(1, pos);
4052 tmp[1] = fetchSrc(1, pos + 1);
4053 mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
4054 mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1);
4055 pos += 2;
4056 }
4057 break;
4058 }
4059 case TGSI_OPCODE_U64MIN:
4060 case TGSI_OPCODE_U64MAX:
4061 case TGSI_OPCODE_I64MIN:
4062 case TGSI_OPCODE_I64MAX: {
4063 dstTy = isSignedIntType(dstTy) ? TYPE_S32 : TYPE_U32;
4064 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4065 Value *flag = getSSA(1, FILE_FLAGS);
4066 src0 = fetchSrc(0, c + 1);
4067 src1 = fetchSrc(1, c + 1);
4068 geni = mkOp2(op, dstTy, dst0[c + 1], src0, src1);
4069 geni->subOp = NV50_IR_SUBOP_MINMAX_HIGH;
4070 geni->setFlagsDef(1, flag);
4071
4072 src0 = fetchSrc(0, c);
4073 src1 = fetchSrc(1, c);
4074 geni = mkOp2(op, TYPE_U32, dst0[c], src0, src1);
4075 geni->subOp = NV50_IR_SUBOP_MINMAX_LOW;
4076 geni->setFlagsSrc(2, flag);
4077
4078 c++;
4079 }
4080 break;
4081 }
4082 case TGSI_OPCODE_U64SHL:
4083 case TGSI_OPCODE_I64SHR:
4084 case TGSI_OPCODE_U64SHR:
4085 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4086 src0 = getSSA(8);
4087 Value *dst = getSSA(8), *tmp[2];
4088 tmp[0] = fetchSrc(0, c);
4089 tmp[1] = fetchSrc(0, c + 1);
4090 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4091 src1 = fetchSrc(1, c / 2);
4092 mkOp2(op, dstTy, dst, src0, src1);
4093 mkSplit(&dst0[c], 4, dst);
4094 c++;
4095 }
4096 break;
4097 case TGSI_OPCODE_U64ADD:
4098 case TGSI_OPCODE_U64MUL:
4099 case TGSI_OPCODE_DADD:
4100 case TGSI_OPCODE_DMUL:
4101 case TGSI_OPCODE_DDIV:
4102 case TGSI_OPCODE_DMAX:
4103 case TGSI_OPCODE_DMIN:
4104 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4105 src0 = getSSA(8);
4106 src1 = getSSA(8);
4107 Value *dst = getSSA(8), *tmp[2];
4108 tmp[0] = fetchSrc(0, c);
4109 tmp[1] = fetchSrc(0, c + 1);
4110 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4111 tmp[0] = fetchSrc(1, c);
4112 tmp[1] = fetchSrc(1, c + 1);
4113 mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
4114 mkOp2(op, dstTy, dst, src0, src1);
4115 mkSplit(&dst0[c], 4, dst);
4116 c++;
4117 }
4118 break;
4119 case TGSI_OPCODE_DMAD:
4120 case TGSI_OPCODE_DFMA:
4121 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4122 src0 = getSSA(8);
4123 src1 = getSSA(8);
4124 src2 = getSSA(8);
4125 Value *dst = getSSA(8), *tmp[2];
4126 tmp[0] = fetchSrc(0, c);
4127 tmp[1] = fetchSrc(0, c + 1);
4128 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4129 tmp[0] = fetchSrc(1, c);
4130 tmp[1] = fetchSrc(1, c + 1);
4131 mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
4132 tmp[0] = fetchSrc(2, c);
4133 tmp[1] = fetchSrc(2, c + 1);
4134 mkOp2(OP_MERGE, TYPE_U64, src2, tmp[0], tmp[1]);
4135 mkOp3(op, dstTy, dst, src0, src1, src2);
4136 mkSplit(&dst0[c], 4, dst);
4137 c++;
4138 }
4139 break;
4140 case TGSI_OPCODE_DROUND:
4141 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4142 src0 = getSSA(8);
4143 Value *dst = getSSA(8), *tmp[2];
4144 tmp[0] = fetchSrc(0, c);
4145 tmp[1] = fetchSrc(0, c + 1);
4146 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4147 mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F64, src0)
4148 ->rnd = ROUND_NI;
4149 mkSplit(&dst0[c], 4, dst);
4150 c++;
4151 }
4152 break;
4153 case TGSI_OPCODE_DSSG:
4154 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4155 src0 = getSSA(8);
4156 Value *dst = getSSA(8), *dstF32 = getSSA(), *tmp[2];
4157 tmp[0] = fetchSrc(0, c);
4158 tmp[1] = fetchSrc(0, c + 1);
4159 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4160
4161 val0 = getScratch();
4162 val1 = getScratch();
4163 // The zero is wrong here since it's only 32-bit, but it works out in
4164 // the end since it gets replaced with $r63.
4165 mkCmp(OP_SET, CC_GT, TYPE_F32, val0, TYPE_F64, src0, zero);
4166 mkCmp(OP_SET, CC_LT, TYPE_F32, val1, TYPE_F64, src0, zero);
4167 mkOp2(OP_SUB, TYPE_F32, dstF32, val0, val1);
4168 mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F32, dstF32);
4169 mkSplit(&dst0[c], 4, dst);
4170 c++;
4171 }
4172 break;
4173 case TGSI_OPCODE_I64SSG:
4174 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4175 src0 = getSSA(8);
4176 Value *tmp[2];
4177 tmp[0] = fetchSrc(0, c);
4178 tmp[1] = fetchSrc(0, c + 1);
4179 mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4180
4181 val0 = getScratch();
4182 val1 = getScratch();
4183 mkCmp(OP_SET, CC_GT, TYPE_U32, val0, TYPE_S64, src0, zero);
4184 mkCmp(OP_SET, CC_LT, TYPE_U32, val1, TYPE_S64, src0, zero);
4185 mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0);
4186 mkOp2(OP_SHR, TYPE_S32, dst0[c + 1], dst0[c], loadImm(0, 31));
4187 c++;
4188 }
4189 break;
4190 default:
4191 ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode());
4192 assert(0);
4193 break;
4194 }
4195
4196 if (tgsi.dstCount()) {
4197 for (c = 0; c < 4; ++c) {
4198 if (!dst0[c])
4199 continue;
4200 if (dst0[c] != rDst0[c])
4201 mkMov(rDst0[c], dst0[c]);
4202 storeDst(0, c, rDst0[c]);
4203 }
4204 }
4205 vtxBaseValid = 0;
4206
4207 return true;
4208 }
4209
4210 void
4211 Converter::handleUserClipPlanes()
4212 {
4213 Value *res[8];
4214 int n, i, c;
4215
4216 for (c = 0; c < 4; ++c) {
4217 for (i = 0; i < info->io.genUserClip; ++i) {
4218 Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot,
4219 TYPE_F32, info->io.ucpBase + i * 16 + c * 4);
4220 Value *ucp = mkLoadv(TYPE_F32, sym, NULL);
4221 if (c == 0)
4222 res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp);
4223 else
4224 mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]);
4225 }
4226 }
4227
4228 const int first = info->numOutputs - (info->io.genUserClip + 3) / 4;
4229
4230 for (i = 0; i < info->io.genUserClip; ++i) {
4231 n = i / 4 + first;
4232 c = i % 4;
4233 Symbol *sym =
4234 mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info->out[n].slot[c] * 4);
4235 mkStore(OP_EXPORT, TYPE_F32, sym, NULL, res[i]);
4236 }
4237 }
4238
4239 void
4240 Converter::exportOutputs()
4241 {
4242 if (info->io.alphaRefBase) {
4243 for (unsigned int i = 0; i < info->numOutputs; ++i) {
4244 if (info->out[i].sn != TGSI_SEMANTIC_COLOR ||
4245 info->out[i].si != 0)
4246 continue;
4247 const unsigned int c = 3;
4248 if (!oData.exists(sub.cur->values, i, c))
4249 continue;
4250 Value *val = oData.load(sub.cur->values, i, c, NULL);
4251 if (!val)
4252 continue;
4253
4254 Symbol *ref = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot,
4255 TYPE_U32, info->io.alphaRefBase);
4256 Value *pred = new_LValue(func, FILE_PREDICATE);
4257 mkCmp(OP_SET, CC_TR, TYPE_U32, pred, TYPE_F32, val,
4258 mkLoadv(TYPE_U32, ref, NULL))
4259 ->subOp = 1;
4260 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_NOT_P, pred);
4261 }
4262 }
4263
4264 for (unsigned int i = 0; i < info->numOutputs; ++i) {
4265 for (unsigned int c = 0; c < 4; ++c) {
4266 if (!oData.exists(sub.cur->values, i, c))
4267 continue;
4268 Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32,
4269 info->out[i].slot[c] * 4);
4270 Value *val = oData.load(sub.cur->values, i, c, NULL);
4271 if (val) {
4272 if (info->out[i].sn == TGSI_SEMANTIC_POSITION)
4273 mkOp1(OP_SAT, TYPE_F32, val, val);
4274 mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val);
4275 }
4276 }
4277 }
4278 }
4279
4280 Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir),
4281 code(code),
4282 tgsi(NULL),
4283 tData(this), lData(this), aData(this), oData(this)
4284 {
4285 info = code->info;
4286
4287 const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY);
4288 const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS);
4289 const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT);
4290
4291 tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, FILE_GPR, 0);
4292 lData.setup(TGSI_FILE_TEMPORARY, 1, 0, tSize, 4, 4, FILE_MEMORY_LOCAL, 0);
4293 aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_GPR, 0);
4294 oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0);
4295
4296 zero = mkImm((uint32_t)0);
4297
4298 vtxBaseValid = 0;
4299 }
4300
4301 Converter::~Converter()
4302 {
4303 }
4304
4305 inline const Converter::Location *
4306 Converter::BindArgumentsPass::getValueLocation(Subroutine *s, Value *v)
4307 {
4308 ValueMap::l_iterator it = s->values.l.find(v);
4309 return it == s->values.l.end() ? NULL : &it->second;
4310 }
4311
4312 template<typename T> inline void
4313 Converter::BindArgumentsPass::updateCallArgs(
4314 Instruction *i, void (Instruction::*setArg)(int, Value *),
4315 T (Function::*proto))
4316 {
4317 Function *g = i->asFlow()->target.fn;
4318 Subroutine *subg = conv.getSubroutine(g);
4319
4320 for (unsigned a = 0; a < (g->*proto).size(); ++a) {
4321 Value *v = (g->*proto)[a].get();
4322 const Converter::Location &l = *getValueLocation(subg, v);
4323 Converter::DataArray *array = conv.getArrayForFile(l.array, l.arrayIdx);
4324
4325 (i->*setArg)(a, array->acquire(sub->values, l.i, l.c));
4326 }
4327 }
4328
4329 template<typename T> inline void
4330 Converter::BindArgumentsPass::updatePrototype(
4331 BitSet *set, void (Function::*updateSet)(), T (Function::*proto))
4332 {
4333 (func->*updateSet)();
4334
4335 for (unsigned i = 0; i < set->getSize(); ++i) {
4336 Value *v = func->getLValue(i);
4337 const Converter::Location *l = getValueLocation(sub, v);
4338
4339 // only include values with a matching TGSI register
4340 if (set->test(i) && l && !conv.code->locals.count(*l))
4341 (func->*proto).push_back(v);
4342 }
4343 }
4344
4345 bool
4346 Converter::BindArgumentsPass::visit(Function *f)
4347 {
4348 sub = conv.getSubroutine(f);
4349
4350 for (ArrayList::Iterator bi = f->allBBlocks.iterator();
4351 !bi.end(); bi.next()) {
4352 for (Instruction *i = BasicBlock::get(bi)->getFirst();
4353 i; i = i->next) {
4354 if (i->op == OP_CALL && !i->asFlow()->builtin) {
4355 updateCallArgs(i, &Instruction::setSrc, &Function::ins);
4356 updateCallArgs(i, &Instruction::setDef, &Function::outs);
4357 }
4358 }
4359 }
4360
4361 if (func == prog->main && prog->getType() != Program::TYPE_COMPUTE)
4362 return true;
4363 updatePrototype(&BasicBlock::get(f->cfg.getRoot())->liveSet,
4364 &Function::buildLiveSets, &Function::ins);
4365 updatePrototype(&BasicBlock::get(f->cfgExit)->defSet,
4366 &Function::buildDefSets, &Function::outs);
4367
4368 return true;
4369 }
4370
4371 bool
4372 Converter::run()
4373 {
4374 BasicBlock *entry = new BasicBlock(prog->main);
4375 BasicBlock *leave = new BasicBlock(prog->main);
4376
4377 prog->main->setEntry(entry);
4378 prog->main->setExit(leave);
4379
4380 setPosition(entry, true);
4381 sub.cur = getSubroutine(prog->main);
4382
4383 if (info->io.genUserClip > 0) {
4384 for (int c = 0; c < 4; ++c)
4385 clipVtx[c] = getScratch();
4386 }
4387
4388 switch (prog->getType()) {
4389 case Program::TYPE_TESSELLATION_CONTROL:
4390 outBase = mkOp2v(
4391 OP_SUB, TYPE_U32, getSSA(),
4392 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
4393 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
4394 break;
4395 case Program::TYPE_FRAGMENT: {
4396 Symbol *sv = mkSysVal(SV_POSITION, 3);
4397 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
4398 mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
4399 break;
4400 }
4401 default:
4402 break;
4403 }
4404
4405 if (info->io.viewportId >= 0)
4406 viewport = getScratch();
4407 else
4408 viewport = NULL;
4409
4410 for (ip = 0; ip < code->scan.num_instructions; ++ip) {
4411 if (!handleInstruction(&code->insns[ip]))
4412 return false;
4413 }
4414
4415 if (!BindArgumentsPass(*this).run(prog))
4416 return false;
4417
4418 return true;
4419 }
4420
4421 } // unnamed namespace
4422
4423 namespace nv50_ir {
4424
4425 bool
4426 Program::makeFromTGSI(struct nv50_ir_prog_info *info)
4427 {
4428 tgsi::Source src(info);
4429 if (!src.scanSource())
4430 return false;
4431 tlsSize = info->bin.tlsSpace;
4432
4433 Converter builder(this, &src);
4434 return builder.run();
4435 }
4436
4437 } // namespace nv50_ir