ilo: update genhw headers
[mesa.git] / src / gallium / drivers / ilo / shader / toy_tgsi.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "tgsi/tgsi_parse.h"
29 #include "tgsi/tgsi_info.h"
30 #include "tgsi/tgsi_strings.h"
31 #include "util/u_hash_table.h"
32 #include "toy_helpers.h"
33 #include "toy_tgsi.h"
34
35 /* map TGSI opcode to GEN opcode 1-to-1 */
36 static const struct {
37 int opcode;
38 int num_dst;
39 int num_src;
40 } aos_simple_opcode_map[TGSI_OPCODE_LAST] = {
41 [TGSI_OPCODE_ARL] = { GEN6_OPCODE_RNDD, 1, 1 },
42 [TGSI_OPCODE_MOV] = { GEN6_OPCODE_MOV, 1, 1 },
43 [TGSI_OPCODE_RCP] = { TOY_OPCODE_INV, 1, 1 },
44 [TGSI_OPCODE_RSQ] = { TOY_OPCODE_RSQ, 1, 1 },
45 [TGSI_OPCODE_MUL] = { GEN6_OPCODE_MUL, 1, 2 },
46 [TGSI_OPCODE_ADD] = { GEN6_OPCODE_ADD, 1, 2 },
47 [TGSI_OPCODE_DP3] = { GEN6_OPCODE_DP3, 1, 2 },
48 [TGSI_OPCODE_DP4] = { GEN6_OPCODE_DP4, 1, 2 },
49 [TGSI_OPCODE_MIN] = { GEN6_OPCODE_SEL, 1, 2 },
50 [TGSI_OPCODE_MAX] = { GEN6_OPCODE_SEL, 1, 2 },
51 /* a later pass will move src[2] to accumulator */
52 [TGSI_OPCODE_MAD] = { GEN6_OPCODE_MAC, 1, 3 },
53 [TGSI_OPCODE_SUB] = { GEN6_OPCODE_ADD, 1, 2 },
54 [TGSI_OPCODE_SQRT] = { TOY_OPCODE_SQRT, 1, 1 },
55 [TGSI_OPCODE_FRC] = { GEN6_OPCODE_FRC, 1, 1 },
56 [TGSI_OPCODE_FLR] = { GEN6_OPCODE_RNDD, 1, 1 },
57 [TGSI_OPCODE_ROUND] = { GEN6_OPCODE_RNDE, 1, 1 },
58 [TGSI_OPCODE_EX2] = { TOY_OPCODE_EXP, 1, 1 },
59 [TGSI_OPCODE_LG2] = { TOY_OPCODE_LOG, 1, 1 },
60 [TGSI_OPCODE_POW] = { TOY_OPCODE_POW, 1, 2 },
61 [TGSI_OPCODE_ABS] = { GEN6_OPCODE_MOV, 1, 1 },
62 [TGSI_OPCODE_DPH] = { GEN6_OPCODE_DPH, 1, 2 },
63 [TGSI_OPCODE_COS] = { TOY_OPCODE_COS, 1, 1 },
64 [TGSI_OPCODE_KILL] = { TOY_OPCODE_KIL, 0, 0 },
65 [TGSI_OPCODE_SIN] = { TOY_OPCODE_SIN, 1, 1 },
66 [TGSI_OPCODE_ARR] = { GEN6_OPCODE_RNDZ, 1, 1 },
67 [TGSI_OPCODE_DP2] = { GEN6_OPCODE_DP2, 1, 2 },
68 [TGSI_OPCODE_IF] = { GEN6_OPCODE_IF, 0, 1 },
69 [TGSI_OPCODE_UIF] = { GEN6_OPCODE_IF, 0, 1 },
70 [TGSI_OPCODE_ELSE] = { GEN6_OPCODE_ELSE, 0, 0 },
71 [TGSI_OPCODE_ENDIF] = { GEN6_OPCODE_ENDIF, 0, 0 },
72 [TGSI_OPCODE_I2F] = { GEN6_OPCODE_MOV, 1, 1 },
73 [TGSI_OPCODE_NOT] = { GEN6_OPCODE_NOT, 1, 1 },
74 [TGSI_OPCODE_TRUNC] = { GEN6_OPCODE_RNDZ, 1, 1 },
75 [TGSI_OPCODE_SHL] = { GEN6_OPCODE_SHL, 1, 2 },
76 [TGSI_OPCODE_AND] = { GEN6_OPCODE_AND, 1, 2 },
77 [TGSI_OPCODE_OR] = { GEN6_OPCODE_OR, 1, 2 },
78 [TGSI_OPCODE_MOD] = { TOY_OPCODE_INT_DIV_REMAINDER, 1, 2 },
79 [TGSI_OPCODE_XOR] = { GEN6_OPCODE_XOR, 1, 2 },
80 [TGSI_OPCODE_EMIT] = { TOY_OPCODE_EMIT, 0, 0 },
81 [TGSI_OPCODE_ENDPRIM] = { TOY_OPCODE_ENDPRIM, 0, 0 },
82 [TGSI_OPCODE_NOP] = { GEN6_OPCODE_NOP, 0, 0 },
83 [TGSI_OPCODE_KILL_IF] = { TOY_OPCODE_KIL, 0, 1 },
84 [TGSI_OPCODE_END] = { GEN6_OPCODE_NOP, 0, 0 },
85 [TGSI_OPCODE_F2I] = { GEN6_OPCODE_MOV, 1, 1 },
86 [TGSI_OPCODE_IDIV] = { TOY_OPCODE_INT_DIV_QUOTIENT, 1, 2 },
87 [TGSI_OPCODE_IMAX] = { GEN6_OPCODE_SEL, 1, 2 },
88 [TGSI_OPCODE_IMIN] = { GEN6_OPCODE_SEL, 1, 2 },
89 [TGSI_OPCODE_INEG] = { GEN6_OPCODE_MOV, 1, 1 },
90 [TGSI_OPCODE_ISHR] = { GEN6_OPCODE_ASR, 1, 2 },
91 [TGSI_OPCODE_F2U] = { GEN6_OPCODE_MOV, 1, 1 },
92 [TGSI_OPCODE_U2F] = { GEN6_OPCODE_MOV, 1, 1 },
93 [TGSI_OPCODE_UADD] = { GEN6_OPCODE_ADD, 1, 2 },
94 [TGSI_OPCODE_UDIV] = { TOY_OPCODE_INT_DIV_QUOTIENT, 1, 2 },
95 /* a later pass will move src[2] to accumulator */
96 [TGSI_OPCODE_UMAD] = { GEN6_OPCODE_MAC, 1, 3 },
97 [TGSI_OPCODE_UMAX] = { GEN6_OPCODE_SEL, 1, 2 },
98 [TGSI_OPCODE_UMIN] = { GEN6_OPCODE_SEL, 1, 2 },
99 [TGSI_OPCODE_UMOD] = { TOY_OPCODE_INT_DIV_REMAINDER, 1, 2 },
100 [TGSI_OPCODE_UMUL] = { GEN6_OPCODE_MUL, 1, 2 },
101 [TGSI_OPCODE_USHR] = { GEN6_OPCODE_SHR, 1, 2 },
102 [TGSI_OPCODE_UARL] = { GEN6_OPCODE_MOV, 1, 1 },
103 [TGSI_OPCODE_IABS] = { GEN6_OPCODE_MOV, 1, 1 },
104 };
105
106 static void
107 aos_simple(struct toy_compiler *tc,
108 const struct tgsi_full_instruction *tgsi_inst,
109 struct toy_dst *dst,
110 struct toy_src *src)
111 {
112 struct toy_inst *inst;
113 int opcode;
114 int cond_modifier = GEN6_COND_NONE;
115 int num_dst = tgsi_inst->Instruction.NumDstRegs;
116 int num_src = tgsi_inst->Instruction.NumSrcRegs;
117 int i;
118
119 opcode = aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].opcode;
120 assert(num_dst == aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].num_dst);
121 assert(num_src == aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].num_src);
122 if (!opcode) {
123 assert(!"invalid aos_simple() call");
124 return;
125 }
126
127 /* no need to emit nop */
128 if (opcode == GEN6_OPCODE_NOP)
129 return;
130
131 inst = tc_add(tc);
132 if (!inst)
133 return;
134
135 inst->opcode = opcode;
136
137 switch (tgsi_inst->Instruction.Opcode) {
138 case TGSI_OPCODE_MIN:
139 case TGSI_OPCODE_IMIN:
140 case TGSI_OPCODE_UMIN:
141 cond_modifier = GEN6_COND_L;
142 break;
143 case TGSI_OPCODE_MAX:
144 case TGSI_OPCODE_IMAX:
145 case TGSI_OPCODE_UMAX:
146 cond_modifier = GEN6_COND_GE;
147 break;
148 case TGSI_OPCODE_SUB:
149 src[1] = tsrc_negate(src[1]);
150 break;
151 case TGSI_OPCODE_ABS:
152 case TGSI_OPCODE_IABS:
153 src[0] = tsrc_absolute(src[0]);
154 break;
155 case TGSI_OPCODE_IF:
156 cond_modifier = GEN6_COND_NZ;
157 num_src = 2;
158 assert(src[0].type == TOY_TYPE_F);
159 src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
160 src[1] = tsrc_imm_f(0.0f);
161 break;
162 case TGSI_OPCODE_UIF:
163 cond_modifier = GEN6_COND_NZ;
164 num_src = 2;
165 assert(src[0].type == TOY_TYPE_UD);
166 src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
167 src[1] = tsrc_imm_d(0);
168 break;
169 case TGSI_OPCODE_INEG:
170 src[0] = tsrc_negate(src[0]);
171 break;
172 case TGSI_OPCODE_RCP:
173 case TGSI_OPCODE_RSQ:
174 case TGSI_OPCODE_EX2:
175 case TGSI_OPCODE_LG2:
176 case TGSI_OPCODE_COS:
177 case TGSI_OPCODE_SIN:
178 src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
179 break;
180 case TGSI_OPCODE_POW:
181 src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
182 src[1] = tsrc_swizzle1(src[1], TOY_SWIZZLE_X);
183 break;
184 }
185
186 inst->cond_modifier = cond_modifier;
187
188 if (num_dst) {
189 assert(num_dst == 1);
190 inst->dst = dst[0];
191 }
192
193 assert(num_src <= Elements(inst->src));
194 for (i = 0; i < num_src; i++)
195 inst->src[i] = src[i];
196 }
197
198 static void
199 aos_set_on_cond(struct toy_compiler *tc,
200 const struct tgsi_full_instruction *tgsi_inst,
201 struct toy_dst *dst,
202 struct toy_src *src)
203 {
204 struct toy_inst *inst;
205 int cond;
206 struct toy_src zero, one;
207
208 switch (tgsi_inst->Instruction.Opcode) {
209 case TGSI_OPCODE_SLT:
210 case TGSI_OPCODE_ISLT:
211 case TGSI_OPCODE_USLT:
212 case TGSI_OPCODE_FSLT:
213 cond = GEN6_COND_L;
214 break;
215 case TGSI_OPCODE_SGE:
216 case TGSI_OPCODE_ISGE:
217 case TGSI_OPCODE_USGE:
218 case TGSI_OPCODE_FSGE:
219 cond = GEN6_COND_GE;
220 break;
221 case TGSI_OPCODE_SEQ:
222 case TGSI_OPCODE_USEQ:
223 case TGSI_OPCODE_FSEQ:
224 cond = GEN6_COND_Z;
225 break;
226 case TGSI_OPCODE_SGT:
227 cond = GEN6_COND_G;
228 break;
229 case TGSI_OPCODE_SLE:
230 cond = GEN6_COND_LE;
231 break;
232 case TGSI_OPCODE_SNE:
233 case TGSI_OPCODE_USNE:
234 case TGSI_OPCODE_FSNE:
235 cond = GEN6_COND_NZ;
236 break;
237 default:
238 assert(!"invalid aos_set_on_cond() call");
239 return;
240 }
241
242 /* note that for integer versions, all bits are set */
243 switch (dst[0].type) {
244 case TOY_TYPE_F:
245 default:
246 zero = tsrc_imm_f(0.0f);
247 one = tsrc_imm_f(1.0f);
248 break;
249 case TOY_TYPE_D:
250 zero = tsrc_imm_d(0);
251 one = tsrc_imm_d(-1);
252 break;
253 case TOY_TYPE_UD:
254 zero = tsrc_imm_ud(0);
255 one = tsrc_imm_ud(~0);
256 break;
257 }
258
259 tc_MOV(tc, dst[0], zero);
260 tc_CMP(tc, tdst_null(), src[0], src[1], cond);
261 inst = tc_MOV(tc, dst[0], one);
262 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
263 }
264
265 static void
266 aos_compare(struct toy_compiler *tc,
267 const struct tgsi_full_instruction *tgsi_inst,
268 struct toy_dst *dst,
269 struct toy_src *src)
270 {
271 struct toy_inst *inst;
272 struct toy_src zero;
273
274 switch (tgsi_inst->Instruction.Opcode) {
275 case TGSI_OPCODE_CMP:
276 zero = tsrc_imm_f(0.0f);
277 break;
278 case TGSI_OPCODE_UCMP:
279 zero = tsrc_imm_ud(0);
280 break;
281 default:
282 assert(!"invalid aos_compare() call");
283 return;
284 }
285
286 tc_CMP(tc, tdst_null(), src[0], zero, GEN6_COND_L);
287 inst = tc_SEL(tc, dst[0], src[1], src[2], GEN6_COND_NONE);
288 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
289 }
290
291 static void
292 aos_set_sign(struct toy_compiler *tc,
293 const struct tgsi_full_instruction *tgsi_inst,
294 struct toy_dst *dst,
295 struct toy_src *src)
296 {
297 struct toy_inst *inst;
298 struct toy_src zero, one, neg_one;
299
300 switch (tgsi_inst->Instruction.Opcode) {
301 case TGSI_OPCODE_SSG:
302 zero = tsrc_imm_f(0.0f);
303 one = tsrc_imm_f(1.0f);
304 neg_one = tsrc_imm_f(-1.0f);
305 break;
306 case TGSI_OPCODE_ISSG:
307 zero = tsrc_imm_d(0);
308 one = tsrc_imm_d(1);
309 neg_one = tsrc_imm_d(-1);
310 break;
311 default:
312 assert(!"invalid aos_set_sign() call");
313 return;
314 }
315
316 tc_MOV(tc, dst[0], zero);
317
318 tc_CMP(tc, tdst_null(), src[0], zero, GEN6_COND_G);
319 inst = tc_MOV(tc, dst[0], one);
320 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
321
322 tc_CMP(tc, tdst_null(), src[0], zero, GEN6_COND_L);
323 inst = tc_MOV(tc, dst[0], neg_one);
324 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
325 }
326
327 static void
328 aos_tex(struct toy_compiler *tc,
329 const struct tgsi_full_instruction *tgsi_inst,
330 struct toy_dst *dst,
331 struct toy_src *src)
332 {
333 struct toy_inst *inst;
334 enum toy_opcode opcode;
335 int i;
336
337 switch (tgsi_inst->Instruction.Opcode) {
338 case TGSI_OPCODE_TEX:
339 opcode = TOY_OPCODE_TGSI_TEX;
340 break;
341 case TGSI_OPCODE_TXD:
342 opcode = TOY_OPCODE_TGSI_TXD;
343 break;
344 case TGSI_OPCODE_TXP:
345 opcode = TOY_OPCODE_TGSI_TXP;
346 break;
347 case TGSI_OPCODE_TXB:
348 opcode = TOY_OPCODE_TGSI_TXB;
349 break;
350 case TGSI_OPCODE_TXL:
351 opcode = TOY_OPCODE_TGSI_TXL;
352 break;
353 case TGSI_OPCODE_TXF:
354 opcode = TOY_OPCODE_TGSI_TXF;
355 break;
356 case TGSI_OPCODE_TXQ:
357 opcode = TOY_OPCODE_TGSI_TXQ;
358 break;
359 case TGSI_OPCODE_TXQ_LZ:
360 opcode = TOY_OPCODE_TGSI_TXQ_LZ;
361 break;
362 case TGSI_OPCODE_TEX2:
363 opcode = TOY_OPCODE_TGSI_TEX2;
364 break;
365 case TGSI_OPCODE_TXB2:
366 opcode = TOY_OPCODE_TGSI_TXB2;
367 break;
368 case TGSI_OPCODE_TXL2:
369 opcode = TOY_OPCODE_TGSI_TXL2;
370 break;
371 default:
372 assert(!"unsupported texturing opcode");
373 return;
374 break;
375 }
376
377 assert(tgsi_inst->Instruction.Texture);
378
379 inst = tc_add(tc);
380 inst->opcode = opcode;
381 inst->tex.target = tgsi_inst->Texture.Texture;
382
383 assert(tgsi_inst->Instruction.NumSrcRegs <= Elements(inst->src));
384 assert(tgsi_inst->Instruction.NumDstRegs == 1);
385
386 inst->dst = dst[0];
387 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
388 inst->src[i] = src[i];
389
390 for (i = 0; i < tgsi_inst->Texture.NumOffsets; i++)
391 tc_fail(tc, "texelFetchOffset unsupported");
392 }
393
394 static void
395 aos_sample(struct toy_compiler *tc,
396 const struct tgsi_full_instruction *tgsi_inst,
397 struct toy_dst *dst,
398 struct toy_src *src)
399 {
400 struct toy_inst *inst;
401 enum toy_opcode opcode;
402 int i;
403
404 assert(!"sampling untested");
405
406 switch (tgsi_inst->Instruction.Opcode) {
407 case TGSI_OPCODE_SAMPLE:
408 opcode = TOY_OPCODE_TGSI_SAMPLE;
409 break;
410 case TGSI_OPCODE_SAMPLE_I:
411 opcode = TOY_OPCODE_TGSI_SAMPLE_I;
412 break;
413 case TGSI_OPCODE_SAMPLE_I_MS:
414 opcode = TOY_OPCODE_TGSI_SAMPLE_I_MS;
415 break;
416 case TGSI_OPCODE_SAMPLE_B:
417 opcode = TOY_OPCODE_TGSI_SAMPLE_B;
418 break;
419 case TGSI_OPCODE_SAMPLE_C:
420 opcode = TOY_OPCODE_TGSI_SAMPLE_C;
421 break;
422 case TGSI_OPCODE_SAMPLE_C_LZ:
423 opcode = TOY_OPCODE_TGSI_SAMPLE_C_LZ;
424 break;
425 case TGSI_OPCODE_SAMPLE_D:
426 opcode = TOY_OPCODE_TGSI_SAMPLE_D;
427 break;
428 case TGSI_OPCODE_SAMPLE_L:
429 opcode = TOY_OPCODE_TGSI_SAMPLE_L;
430 break;
431 case TGSI_OPCODE_GATHER4:
432 opcode = TOY_OPCODE_TGSI_GATHER4;
433 break;
434 case TGSI_OPCODE_SVIEWINFO:
435 opcode = TOY_OPCODE_TGSI_SVIEWINFO;
436 break;
437 case TGSI_OPCODE_SAMPLE_POS:
438 opcode = TOY_OPCODE_TGSI_SAMPLE_POS;
439 break;
440 case TGSI_OPCODE_SAMPLE_INFO:
441 opcode = TOY_OPCODE_TGSI_SAMPLE_INFO;
442 break;
443 default:
444 assert(!"unsupported sampling opcode");
445 return;
446 break;
447 }
448
449 inst = tc_add(tc);
450 inst->opcode = opcode;
451
452 assert(tgsi_inst->Instruction.NumSrcRegs <= Elements(inst->src));
453 assert(tgsi_inst->Instruction.NumDstRegs == 1);
454
455 inst->dst = dst[0];
456 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
457 inst->src[i] = src[i];
458 }
459
460 static void
461 aos_LIT(struct toy_compiler *tc,
462 const struct tgsi_full_instruction *tgsi_inst,
463 struct toy_dst *dst,
464 struct toy_src *src)
465 {
466 struct toy_inst *inst;
467
468 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XW), tsrc_imm_f(1.0f));
469
470 if (!(dst[0].writemask & TOY_WRITEMASK_YZ))
471 return;
472
473 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_YZ), tsrc_imm_f(0.0f));
474
475 tc_CMP(tc, tdst_null(),
476 tsrc_swizzle1(src[0], TOY_SWIZZLE_X),
477 tsrc_imm_f(0.0f),
478 GEN6_COND_G);
479
480 inst = tc_MOV(tc,
481 tdst_writemask(dst[0], TOY_WRITEMASK_Y),
482 tsrc_swizzle1(src[0], TOY_SWIZZLE_X));
483 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
484
485 /* clamp W to (-128, 128)? */
486 inst = tc_POW(tc,
487 tdst_writemask(dst[0], TOY_WRITEMASK_Z),
488 tsrc_swizzle1(src[0], TOY_SWIZZLE_Y),
489 tsrc_swizzle1(src[0], TOY_SWIZZLE_W));
490 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
491 }
492
493 static void
494 aos_EXP(struct toy_compiler *tc,
495 const struct tgsi_full_instruction *tgsi_inst,
496 struct toy_dst *dst,
497 struct toy_src *src)
498 {
499 struct toy_src src0 = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
500
501 if (dst[0].writemask & TOY_WRITEMASK_X) {
502 struct toy_dst tmp =
503 tdst_d(tdst_writemask(tc_alloc_tmp(tc), TOY_WRITEMASK_X));
504
505 tc_RNDD(tc, tmp, src0);
506
507 /* construct the floating point number manually */
508 tc_ADD(tc, tmp, tsrc_from(tmp), tsrc_imm_d(127));
509 tc_SHL(tc, tdst_d(tdst_writemask(dst[0], TOY_WRITEMASK_X)),
510 tsrc_from(tmp), tsrc_imm_d(23));
511 }
512
513 tc_FRC(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Y), src0);
514 tc_EXP(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src0);
515 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
516 }
517
518 static void
519 aos_LOG(struct toy_compiler *tc,
520 const struct tgsi_full_instruction *tgsi_inst,
521 struct toy_dst *dst,
522 struct toy_src *src)
523 {
524 struct toy_src src0 = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
525
526 if (dst[0].writemask & TOY_WRITEMASK_XY) {
527 struct toy_dst tmp;
528
529 tmp = tdst_d(tdst_writemask(tc_alloc_tmp(tc), TOY_WRITEMASK_X));
530
531 /* exponent */
532 tc_SHR(tc, tmp, tsrc_absolute(tsrc_d(src0)), tsrc_imm_d(23));
533 tc_ADD(tc, tdst_writemask(dst[0], TOY_WRITEMASK_X),
534 tsrc_from(tmp), tsrc_imm_d(-127));
535
536 /* mantissa */
537 tc_AND(tc, tmp, tsrc_d(src0), tsrc_imm_d((1 << 23) - 1));
538 tc_OR(tc, tdst_writemask(tdst_d(dst[0]), TOY_WRITEMASK_Y),
539 tsrc_from(tmp), tsrc_imm_d(127 << 23));
540 }
541
542 tc_LOG(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src0);
543 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
544 }
545
546 static void
547 aos_DST(struct toy_compiler *tc,
548 const struct tgsi_full_instruction *tgsi_inst,
549 struct toy_dst *dst,
550 struct toy_src *src)
551 {
552 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_X), tsrc_imm_f(1.0f));
553 tc_MUL(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Y), src[0], src[1]);
554 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src[0]);
555 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), src[1]);
556 }
557
558 static void
559 aos_LRP(struct toy_compiler *tc,
560 const struct tgsi_full_instruction *tgsi_inst,
561 struct toy_dst *dst,
562 struct toy_src *src)
563 {
564 struct toy_dst tmp = tc_alloc_tmp(tc);
565
566 tc_ADD(tc, tmp, tsrc_negate(src[0]), tsrc_imm_f(1.0f));
567 tc_MUL(tc, tmp, tsrc_from(tmp), src[2]);
568 tc_MAC(tc, dst[0], src[0], src[1], tsrc_from(tmp));
569 }
570
571 static void
572 aos_CND(struct toy_compiler *tc,
573 const struct tgsi_full_instruction *tgsi_inst,
574 struct toy_dst *dst,
575 struct toy_src *src)
576 {
577 struct toy_inst *inst;
578
579 assert(!"CND untested");
580
581 tc_CMP(tc, tdst_null(), src[2], tsrc_imm_f(0.5f), GEN6_COND_G);
582 inst = tc_SEL(tc, dst[0], src[0], src[1], GEN6_COND_NONE);
583 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
584 }
585
586 static void
587 aos_DP2A(struct toy_compiler *tc,
588 const struct tgsi_full_instruction *tgsi_inst,
589 struct toy_dst *dst,
590 struct toy_src *src)
591 {
592 struct toy_dst tmp = tc_alloc_tmp(tc);
593
594 assert(!"DP2A untested");
595
596 tc_DP2(tc, tmp, src[0], src[1]);
597 tc_ADD(tc, dst[0], tsrc_swizzle1(tsrc_from(tmp), TOY_SWIZZLE_X), src[2]);
598 }
599
600 static void
601 aos_CLAMP(struct toy_compiler *tc,
602 const struct tgsi_full_instruction *tgsi_inst,
603 struct toy_dst *dst,
604 struct toy_src *src)
605 {
606 assert(!"CLAMP untested");
607
608 tc_SEL(tc, dst[0], src[0], src[1], GEN6_COND_GE);
609 tc_SEL(tc, dst[0], src[2], tsrc_from(dst[0]), GEN6_COND_L);
610 }
611
612 static void
613 aos_XPD(struct toy_compiler *tc,
614 const struct tgsi_full_instruction *tgsi_inst,
615 struct toy_dst *dst,
616 struct toy_src *src)
617 {
618 struct toy_dst tmp = tc_alloc_tmp(tc);
619
620 tc_MUL(tc, tdst_writemask(tmp, TOY_WRITEMASK_XYZ),
621 tsrc_swizzle(src[0], TOY_SWIZZLE_Z, TOY_SWIZZLE_X,
622 TOY_SWIZZLE_Y, TOY_SWIZZLE_W),
623 tsrc_swizzle(src[1], TOY_SWIZZLE_Y, TOY_SWIZZLE_Z,
624 TOY_SWIZZLE_X, TOY_SWIZZLE_W));
625
626 tc_MAC(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XYZ),
627 tsrc_swizzle(src[0], TOY_SWIZZLE_Y, TOY_SWIZZLE_Z,
628 TOY_SWIZZLE_X, TOY_SWIZZLE_W),
629 tsrc_swizzle(src[1], TOY_SWIZZLE_Z, TOY_SWIZZLE_X,
630 TOY_SWIZZLE_Y, TOY_SWIZZLE_W),
631 tsrc_negate(tsrc_from(tmp)));
632
633 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W),
634 tsrc_imm_f(1.0f));
635 }
636
637 static void
638 aos_PK2H(struct toy_compiler *tc,
639 const struct tgsi_full_instruction *tgsi_inst,
640 struct toy_dst *dst,
641 struct toy_src *src)
642 {
643 const struct toy_src h1 = tsrc_ud(tsrc_swizzle1(src[0], TOY_SWIZZLE_X));
644 const struct toy_src h2 = tsrc_ud(tsrc_swizzle1(src[0], TOY_SWIZZLE_Y));
645 struct toy_dst tmp = tdst_ud(tc_alloc_tmp(tc));
646
647 assert(!"PK2H untested");
648
649 tc_SHL(tc, tmp, h2, tsrc_imm_ud(16));
650 tc_OR(tc, tdst_ud(dst[0]), h1, tsrc_from(tmp));
651 }
652
653 static void
654 aos_SFL(struct toy_compiler *tc,
655 const struct tgsi_full_instruction *tgsi_inst,
656 struct toy_dst *dst,
657 struct toy_src *src)
658 {
659 assert(!"SFL untested");
660
661 tc_MOV(tc, dst[0], tsrc_imm_f(0.0f));
662 }
663
664 static void
665 aos_STR(struct toy_compiler *tc,
666 const struct tgsi_full_instruction *tgsi_inst,
667 struct toy_dst *dst,
668 struct toy_src *src)
669 {
670 assert(!"STR untested");
671
672 tc_MOV(tc, dst[0], tsrc_imm_f(1.0f));
673 }
674
675 static void
676 aos_UP2H(struct toy_compiler *tc,
677 const struct tgsi_full_instruction *tgsi_inst,
678 struct toy_dst *dst,
679 struct toy_src *src)
680 {
681 assert(!"UP2H untested");
682
683 tc_AND(tc, tdst_writemask(tdst_ud(dst[0]), TOY_WRITEMASK_XZ),
684 tsrc_ud(src[0]), tsrc_imm_ud(0xffff));
685 tc_SHR(tc, tdst_writemask(tdst_ud(dst[0]), TOY_WRITEMASK_YW),
686 tsrc_ud(src[0]), tsrc_imm_ud(16));
687 }
688
689 static void
690 aos_SCS(struct toy_compiler *tc,
691 const struct tgsi_full_instruction *tgsi_inst,
692 struct toy_dst *dst,
693 struct toy_src *src)
694 {
695 assert(!"SCS untested");
696
697 tc_add1(tc, TOY_OPCODE_COS,
698 tdst_writemask(dst[0], TOY_WRITEMASK_X), src[0]);
699
700 tc_add1(tc, TOY_OPCODE_SIN,
701 tdst_writemask(dst[0], TOY_WRITEMASK_Y), src[0]);
702
703 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), tsrc_imm_f(0.0f));
704 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
705 }
706
707 static void
708 aos_NRM(struct toy_compiler *tc,
709 const struct tgsi_full_instruction *tgsi_inst,
710 struct toy_dst *dst,
711 struct toy_src *src)
712 {
713 struct toy_dst tmp = tc_alloc_tmp(tc);
714
715 assert(!"NRM untested");
716
717 tc_DP3(tc, tmp, src[0], src[0]);
718 tc_INV(tc, tmp, tsrc_from(tmp));
719 tc_MUL(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XYZ),
720 src[0], tsrc_from(tmp));
721
722 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
723 }
724
725 static void
726 aos_DIV(struct toy_compiler *tc,
727 const struct tgsi_full_instruction *tgsi_inst,
728 struct toy_dst *dst,
729 struct toy_src *src)
730 {
731 struct toy_dst tmp = tc_alloc_tmp(tc);
732
733 assert(!"DIV untested");
734
735 tc_INV(tc, tmp, src[1]);
736 tc_MUL(tc, dst[0], src[0], tsrc_from(tmp));
737 }
738
739 static void
740 aos_BRK(struct toy_compiler *tc,
741 const struct tgsi_full_instruction *tgsi_inst,
742 struct toy_dst *dst,
743 struct toy_src *src)
744 {
745 tc_add0(tc, GEN6_OPCODE_BREAK);
746 }
747
748 static void
749 aos_CEIL(struct toy_compiler *tc,
750 const struct tgsi_full_instruction *tgsi_inst,
751 struct toy_dst *dst,
752 struct toy_src *src)
753 {
754 struct toy_dst tmp = tc_alloc_tmp(tc);
755
756 tc_RNDD(tc, tmp, tsrc_negate(src[0]));
757 tc_MOV(tc, dst[0], tsrc_negate(tsrc_from(tmp)));
758 }
759
760 static void
761 aos_SAD(struct toy_compiler *tc,
762 const struct tgsi_full_instruction *tgsi_inst,
763 struct toy_dst *dst,
764 struct toy_src *src)
765 {
766 struct toy_dst tmp = tc_alloc_tmp(tc);
767
768 assert(!"SAD untested");
769
770 tc_ADD(tc, tmp, src[0], tsrc_negate(src[1]));
771 tc_ADD(tc, dst[0], tsrc_absolute(tsrc_from(tmp)), src[2]);
772 }
773
774 static void
775 aos_CONT(struct toy_compiler *tc,
776 const struct tgsi_full_instruction *tgsi_inst,
777 struct toy_dst *dst,
778 struct toy_src *src)
779 {
780 tc_add0(tc, GEN6_OPCODE_CONT);
781 }
782
783 static void
784 aos_BGNLOOP(struct toy_compiler *tc,
785 const struct tgsi_full_instruction *tgsi_inst,
786 struct toy_dst *dst,
787 struct toy_src *src)
788 {
789 struct toy_inst *inst;
790
791 inst = tc_add0(tc, TOY_OPCODE_DO);
792 /* this is just a marker */
793 inst->marker = true;
794 }
795
796 static void
797 aos_ENDLOOP(struct toy_compiler *tc,
798 const struct tgsi_full_instruction *tgsi_inst,
799 struct toy_dst *dst,
800 struct toy_src *src)
801 {
802 tc_add0(tc, GEN6_OPCODE_WHILE);
803 }
804
805 static void
806 aos_NRM4(struct toy_compiler *tc,
807 const struct tgsi_full_instruction *tgsi_inst,
808 struct toy_dst *dst,
809 struct toy_src *src)
810 {
811 struct toy_dst tmp = tc_alloc_tmp(tc);
812
813 assert(!"NRM4 untested");
814
815 tc_DP4(tc, tmp, src[0], src[0]);
816 tc_INV(tc, tmp, tsrc_from(tmp));
817 tc_MUL(tc, dst[0], tsrc_swizzle1(src[0], TOY_SWIZZLE_X), tsrc_from(tmp));
818 }
819
820 static void
821 aos_unsupported(struct toy_compiler *tc,
822 const struct tgsi_full_instruction *tgsi_inst,
823 struct toy_dst *dst,
824 struct toy_src *src)
825 {
826 const char *name = tgsi_get_opcode_name(tgsi_inst->Instruction.Opcode);
827
828 ilo_warn("unsupported TGSI opcode: TGSI_OPCODE_%s\n", name);
829
830 tc_fail(tc, "unsupported TGSI instruction");
831 }
832
833 static const toy_tgsi_translate aos_translate_table[TGSI_OPCODE_LAST] = {
834 [TGSI_OPCODE_ARL] = aos_simple,
835 [TGSI_OPCODE_MOV] = aos_simple,
836 [TGSI_OPCODE_LIT] = aos_LIT,
837 [TGSI_OPCODE_RCP] = aos_simple,
838 [TGSI_OPCODE_RSQ] = aos_simple,
839 [TGSI_OPCODE_EXP] = aos_EXP,
840 [TGSI_OPCODE_LOG] = aos_LOG,
841 [TGSI_OPCODE_MUL] = aos_simple,
842 [TGSI_OPCODE_ADD] = aos_simple,
843 [TGSI_OPCODE_DP3] = aos_simple,
844 [TGSI_OPCODE_DP4] = aos_simple,
845 [TGSI_OPCODE_DST] = aos_DST,
846 [TGSI_OPCODE_MIN] = aos_simple,
847 [TGSI_OPCODE_MAX] = aos_simple,
848 [TGSI_OPCODE_SLT] = aos_set_on_cond,
849 [TGSI_OPCODE_SGE] = aos_set_on_cond,
850 [TGSI_OPCODE_MAD] = aos_simple,
851 [TGSI_OPCODE_SUB] = aos_simple,
852 [TGSI_OPCODE_LRP] = aos_LRP,
853 [TGSI_OPCODE_CND] = aos_CND,
854 [TGSI_OPCODE_SQRT] = aos_simple,
855 [TGSI_OPCODE_DP2A] = aos_DP2A,
856 [22] = aos_unsupported,
857 [23] = aos_unsupported,
858 [TGSI_OPCODE_FRC] = aos_simple,
859 [TGSI_OPCODE_CLAMP] = aos_CLAMP,
860 [TGSI_OPCODE_FLR] = aos_simple,
861 [TGSI_OPCODE_ROUND] = aos_simple,
862 [TGSI_OPCODE_EX2] = aos_simple,
863 [TGSI_OPCODE_LG2] = aos_simple,
864 [TGSI_OPCODE_POW] = aos_simple,
865 [TGSI_OPCODE_XPD] = aos_XPD,
866 [32] = aos_unsupported,
867 [TGSI_OPCODE_ABS] = aos_simple,
868 [TGSI_OPCODE_RCC] = aos_unsupported,
869 [TGSI_OPCODE_DPH] = aos_simple,
870 [TGSI_OPCODE_COS] = aos_simple,
871 [TGSI_OPCODE_DDX] = aos_unsupported,
872 [TGSI_OPCODE_DDY] = aos_unsupported,
873 [TGSI_OPCODE_KILL] = aos_simple,
874 [TGSI_OPCODE_PK2H] = aos_PK2H,
875 [TGSI_OPCODE_PK2US] = aos_unsupported,
876 [TGSI_OPCODE_PK4B] = aos_unsupported,
877 [TGSI_OPCODE_PK4UB] = aos_unsupported,
878 [TGSI_OPCODE_RFL] = aos_unsupported,
879 [TGSI_OPCODE_SEQ] = aos_set_on_cond,
880 [TGSI_OPCODE_SFL] = aos_SFL,
881 [TGSI_OPCODE_SGT] = aos_set_on_cond,
882 [TGSI_OPCODE_SIN] = aos_simple,
883 [TGSI_OPCODE_SLE] = aos_set_on_cond,
884 [TGSI_OPCODE_SNE] = aos_set_on_cond,
885 [TGSI_OPCODE_STR] = aos_STR,
886 [TGSI_OPCODE_TEX] = aos_tex,
887 [TGSI_OPCODE_TXD] = aos_tex,
888 [TGSI_OPCODE_TXP] = aos_tex,
889 [TGSI_OPCODE_UP2H] = aos_UP2H,
890 [TGSI_OPCODE_UP2US] = aos_unsupported,
891 [TGSI_OPCODE_UP4B] = aos_unsupported,
892 [TGSI_OPCODE_UP4UB] = aos_unsupported,
893 [TGSI_OPCODE_X2D] = aos_unsupported,
894 [TGSI_OPCODE_ARA] = aos_unsupported,
895 [TGSI_OPCODE_ARR] = aos_simple,
896 [TGSI_OPCODE_BRA] = aos_unsupported,
897 [TGSI_OPCODE_CAL] = aos_unsupported,
898 [TGSI_OPCODE_RET] = aos_unsupported,
899 [TGSI_OPCODE_SSG] = aos_set_sign,
900 [TGSI_OPCODE_CMP] = aos_compare,
901 [TGSI_OPCODE_SCS] = aos_SCS,
902 [TGSI_OPCODE_TXB] = aos_tex,
903 [TGSI_OPCODE_NRM] = aos_NRM,
904 [TGSI_OPCODE_DIV] = aos_DIV,
905 [TGSI_OPCODE_DP2] = aos_simple,
906 [TGSI_OPCODE_TXL] = aos_tex,
907 [TGSI_OPCODE_BRK] = aos_BRK,
908 [TGSI_OPCODE_IF] = aos_simple,
909 [TGSI_OPCODE_UIF] = aos_simple,
910 [76] = aos_unsupported,
911 [TGSI_OPCODE_ELSE] = aos_simple,
912 [TGSI_OPCODE_ENDIF] = aos_simple,
913 [79] = aos_unsupported,
914 [80] = aos_unsupported,
915 [TGSI_OPCODE_PUSHA] = aos_unsupported,
916 [TGSI_OPCODE_POPA] = aos_unsupported,
917 [TGSI_OPCODE_CEIL] = aos_CEIL,
918 [TGSI_OPCODE_I2F] = aos_simple,
919 [TGSI_OPCODE_NOT] = aos_simple,
920 [TGSI_OPCODE_TRUNC] = aos_simple,
921 [TGSI_OPCODE_SHL] = aos_simple,
922 [88] = aos_unsupported,
923 [TGSI_OPCODE_AND] = aos_simple,
924 [TGSI_OPCODE_OR] = aos_simple,
925 [TGSI_OPCODE_MOD] = aos_simple,
926 [TGSI_OPCODE_XOR] = aos_simple,
927 [TGSI_OPCODE_SAD] = aos_SAD,
928 [TGSI_OPCODE_TXF] = aos_tex,
929 [TGSI_OPCODE_TXQ] = aos_tex,
930 [TGSI_OPCODE_CONT] = aos_CONT,
931 [TGSI_OPCODE_EMIT] = aos_simple,
932 [TGSI_OPCODE_ENDPRIM] = aos_simple,
933 [TGSI_OPCODE_BGNLOOP] = aos_BGNLOOP,
934 [TGSI_OPCODE_BGNSUB] = aos_unsupported,
935 [TGSI_OPCODE_ENDLOOP] = aos_ENDLOOP,
936 [TGSI_OPCODE_ENDSUB] = aos_unsupported,
937 [TGSI_OPCODE_TXQ_LZ] = aos_tex,
938 [104] = aos_unsupported,
939 [105] = aos_unsupported,
940 [106] = aos_unsupported,
941 [TGSI_OPCODE_NOP] = aos_simple,
942 [TGSI_OPCODE_FSEQ] = aos_set_on_cond,
943 [TGSI_OPCODE_FSGE] = aos_set_on_cond,
944 [TGSI_OPCODE_FSLT] = aos_set_on_cond,
945 [TGSI_OPCODE_FSNE] = aos_set_on_cond,
946 [TGSI_OPCODE_NRM4] = aos_NRM4,
947 [TGSI_OPCODE_CALLNZ] = aos_unsupported,
948 [TGSI_OPCODE_BREAKC] = aos_unsupported,
949 [TGSI_OPCODE_KILL_IF] = aos_simple,
950 [TGSI_OPCODE_END] = aos_simple,
951 [118] = aos_unsupported,
952 [TGSI_OPCODE_F2I] = aos_simple,
953 [TGSI_OPCODE_IDIV] = aos_simple,
954 [TGSI_OPCODE_IMAX] = aos_simple,
955 [TGSI_OPCODE_IMIN] = aos_simple,
956 [TGSI_OPCODE_INEG] = aos_simple,
957 [TGSI_OPCODE_ISGE] = aos_set_on_cond,
958 [TGSI_OPCODE_ISHR] = aos_simple,
959 [TGSI_OPCODE_ISLT] = aos_set_on_cond,
960 [TGSI_OPCODE_F2U] = aos_simple,
961 [TGSI_OPCODE_U2F] = aos_simple,
962 [TGSI_OPCODE_UADD] = aos_simple,
963 [TGSI_OPCODE_UDIV] = aos_simple,
964 [TGSI_OPCODE_UMAD] = aos_simple,
965 [TGSI_OPCODE_UMAX] = aos_simple,
966 [TGSI_OPCODE_UMIN] = aos_simple,
967 [TGSI_OPCODE_UMOD] = aos_simple,
968 [TGSI_OPCODE_UMUL] = aos_simple,
969 [TGSI_OPCODE_USEQ] = aos_set_on_cond,
970 [TGSI_OPCODE_USGE] = aos_set_on_cond,
971 [TGSI_OPCODE_USHR] = aos_simple,
972 [TGSI_OPCODE_USLT] = aos_set_on_cond,
973 [TGSI_OPCODE_USNE] = aos_set_on_cond,
974 [TGSI_OPCODE_SWITCH] = aos_unsupported,
975 [TGSI_OPCODE_CASE] = aos_unsupported,
976 [TGSI_OPCODE_DEFAULT] = aos_unsupported,
977 [TGSI_OPCODE_ENDSWITCH] = aos_unsupported,
978 [TGSI_OPCODE_SAMPLE] = aos_sample,
979 [TGSI_OPCODE_SAMPLE_I] = aos_sample,
980 [TGSI_OPCODE_SAMPLE_I_MS] = aos_sample,
981 [TGSI_OPCODE_SAMPLE_B] = aos_sample,
982 [TGSI_OPCODE_SAMPLE_C] = aos_sample,
983 [TGSI_OPCODE_SAMPLE_C_LZ] = aos_sample,
984 [TGSI_OPCODE_SAMPLE_D] = aos_sample,
985 [TGSI_OPCODE_SAMPLE_L] = aos_sample,
986 [TGSI_OPCODE_GATHER4] = aos_sample,
987 [TGSI_OPCODE_SVIEWINFO] = aos_sample,
988 [TGSI_OPCODE_SAMPLE_POS] = aos_sample,
989 [TGSI_OPCODE_SAMPLE_INFO] = aos_sample,
990 [TGSI_OPCODE_UARL] = aos_simple,
991 [TGSI_OPCODE_UCMP] = aos_compare,
992 [TGSI_OPCODE_IABS] = aos_simple,
993 [TGSI_OPCODE_ISSG] = aos_set_sign,
994 [TGSI_OPCODE_LOAD] = aos_unsupported,
995 [TGSI_OPCODE_STORE] = aos_unsupported,
996 [TGSI_OPCODE_MFENCE] = aos_unsupported,
997 [TGSI_OPCODE_LFENCE] = aos_unsupported,
998 [TGSI_OPCODE_SFENCE] = aos_unsupported,
999 [TGSI_OPCODE_BARRIER] = aos_unsupported,
1000 [TGSI_OPCODE_ATOMUADD] = aos_unsupported,
1001 [TGSI_OPCODE_ATOMXCHG] = aos_unsupported,
1002 [TGSI_OPCODE_ATOMCAS] = aos_unsupported,
1003 [TGSI_OPCODE_ATOMAND] = aos_unsupported,
1004 [TGSI_OPCODE_ATOMOR] = aos_unsupported,
1005 [TGSI_OPCODE_ATOMXOR] = aos_unsupported,
1006 [TGSI_OPCODE_ATOMUMIN] = aos_unsupported,
1007 [TGSI_OPCODE_ATOMUMAX] = aos_unsupported,
1008 [TGSI_OPCODE_ATOMIMIN] = aos_unsupported,
1009 [TGSI_OPCODE_ATOMIMAX] = aos_unsupported,
1010 [TGSI_OPCODE_TEX2] = aos_tex,
1011 [TGSI_OPCODE_TXB2] = aos_tex,
1012 [TGSI_OPCODE_TXL2] = aos_tex,
1013 };
1014
1015 static void
1016 soa_passthrough(struct toy_compiler *tc,
1017 const struct tgsi_full_instruction *tgsi_inst,
1018 struct toy_dst *dst_,
1019 struct toy_src *src_)
1020 {
1021 const toy_tgsi_translate translate =
1022 aos_translate_table[tgsi_inst->Instruction.Opcode];
1023
1024 translate(tc, tgsi_inst, dst_, src_);
1025 }
1026
1027 static void
1028 soa_per_channel(struct toy_compiler *tc,
1029 const struct tgsi_full_instruction *tgsi_inst,
1030 struct toy_dst *dst_,
1031 struct toy_src *src_)
1032 {
1033 struct toy_dst dst[TGSI_FULL_MAX_DST_REGISTERS][4];
1034 struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS][4];
1035 int i, ch;
1036
1037 for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
1038 tdst_transpose(dst_[i], dst[i]);
1039 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
1040 tsrc_transpose(src_[i], src[i]);
1041
1042 /* emit the same instruction four times for the four channels */
1043 for (ch = 0; ch < 4; ch++) {
1044 struct toy_dst aos_dst[TGSI_FULL_MAX_DST_REGISTERS];
1045 struct toy_src aos_src[TGSI_FULL_MAX_SRC_REGISTERS];
1046
1047 for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
1048 aos_dst[i] = dst[i][ch];
1049 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
1050 aos_src[i] = src[i][ch];
1051
1052 aos_translate_table[tgsi_inst->Instruction.Opcode](tc,
1053 tgsi_inst, aos_dst, aos_src);
1054 }
1055 }
1056
1057 static void
1058 soa_scalar_replicate(struct toy_compiler *tc,
1059 const struct tgsi_full_instruction *tgsi_inst,
1060 struct toy_dst *dst_,
1061 struct toy_src *src_)
1062 {
1063 struct toy_dst dst0[4], tmp;
1064 struct toy_src srcx[TGSI_FULL_MAX_SRC_REGISTERS];
1065 int opcode, i;
1066
1067 assert(tgsi_inst->Instruction.NumDstRegs == 1);
1068
1069 tdst_transpose(dst_[0], dst0);
1070 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
1071 struct toy_src tmp[4];
1072
1073 tsrc_transpose(src_[i], tmp);
1074 /* only the X channels */
1075 srcx[i] = tmp[0];
1076 }
1077
1078 tmp = tc_alloc_tmp(tc);
1079
1080 opcode = aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].opcode;
1081 assert(opcode);
1082
1083 switch (tgsi_inst->Instruction.Opcode) {
1084 case TGSI_OPCODE_RCP:
1085 case TGSI_OPCODE_RSQ:
1086 case TGSI_OPCODE_SQRT:
1087 case TGSI_OPCODE_EX2:
1088 case TGSI_OPCODE_LG2:
1089 case TGSI_OPCODE_COS:
1090 case TGSI_OPCODE_SIN:
1091 tc_add1(tc, opcode, tmp, srcx[0]);
1092 break;
1093 case TGSI_OPCODE_POW:
1094 tc_add2(tc, opcode, tmp, srcx[0], srcx[1]);
1095 break;
1096 default:
1097 assert(!"invalid soa_scalar_replicate() call");
1098 return;
1099 }
1100
1101 /* replicate the result */
1102 for (i = 0; i < 4; i++)
1103 tc_MOV(tc, dst0[i], tsrc_from(tmp));
1104 }
1105
1106 static void
1107 soa_dot_product(struct toy_compiler *tc,
1108 const struct tgsi_full_instruction *tgsi_inst,
1109 struct toy_dst *dst_,
1110 struct toy_src *src_)
1111 {
1112 struct toy_dst dst0[4], tmp;
1113 struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS][4];
1114 int i;
1115
1116 tdst_transpose(dst_[0], dst0);
1117 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
1118 tsrc_transpose(src_[i], src[i]);
1119
1120 tmp = tc_alloc_tmp(tc);
1121
1122 switch (tgsi_inst->Instruction.Opcode) {
1123 case TGSI_OPCODE_DP2:
1124 tc_MUL(tc, tmp, src[0][1], src[1][1]);
1125 tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
1126 break;
1127 case TGSI_OPCODE_DP2A:
1128 tc_MAC(tc, tmp, src[0][1], src[1][1], src[2][0]);
1129 tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
1130 break;
1131 case TGSI_OPCODE_DP3:
1132 tc_MUL(tc, tmp, src[0][2], src[1][2]);
1133 tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
1134 tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
1135 break;
1136 case TGSI_OPCODE_DPH:
1137 tc_MAC(tc, tmp, src[0][2], src[1][2], src[1][3]);
1138 tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
1139 tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
1140 break;
1141 case TGSI_OPCODE_DP4:
1142 tc_MUL(tc, tmp, src[0][3], src[1][3]);
1143 tc_MAC(tc, tmp, src[0][2], src[1][2], tsrc_from(tmp));
1144 tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
1145 tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
1146 break;
1147 default:
1148 assert(!"invalid soa_dot_product() call");
1149 return;
1150 }
1151
1152 for (i = 0; i < 4; i++)
1153 tc_MOV(tc, dst0[i], tsrc_from(tmp));
1154 }
1155
1156 static void
1157 soa_partial_derivative(struct toy_compiler *tc,
1158 const struct tgsi_full_instruction *tgsi_inst,
1159 struct toy_dst *dst_,
1160 struct toy_src *src_)
1161 {
1162 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_DDX)
1163 tc_add1(tc, TOY_OPCODE_DDX, dst_[0], src_[0]);
1164 else
1165 tc_add1(tc, TOY_OPCODE_DDY, dst_[0], src_[0]);
1166 }
1167
1168 static void
1169 soa_if(struct toy_compiler *tc,
1170 const struct tgsi_full_instruction *tgsi_inst,
1171 struct toy_dst *dst_,
1172 struct toy_src *src_)
1173 {
1174 struct toy_src src0[4];
1175
1176 assert(tsrc_is_swizzle1(src_[0]));
1177 tsrc_transpose(src_[0], src0);
1178
1179 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_IF)
1180 tc_IF(tc, tdst_null(), src0[0], tsrc_imm_f(0.0f), GEN6_COND_NZ);
1181 else
1182 tc_IF(tc, tdst_null(), src0[0], tsrc_imm_d(0), GEN6_COND_NZ);
1183 }
1184
1185 static void
1186 soa_LIT(struct toy_compiler *tc,
1187 const struct tgsi_full_instruction *tgsi_inst,
1188 struct toy_dst *dst_,
1189 struct toy_src *src_)
1190 {
1191 struct toy_inst *inst;
1192 struct toy_dst dst0[4];
1193 struct toy_src src0[4];
1194
1195 tdst_transpose(dst_[0], dst0);
1196 tsrc_transpose(src_[0], src0);
1197
1198 tc_MOV(tc, dst0[0], tsrc_imm_f(1.0f));
1199 tc_MOV(tc, dst0[1], src0[0]);
1200 tc_POW(tc, dst0[2], src0[1], src0[3]);
1201 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1202
1203 /*
1204 * POW is calculated first because math with pred_ctrl is broken here.
1205 * But, why?
1206 */
1207 tc_CMP(tc, tdst_null(), src0[0], tsrc_imm_f(0.0f), GEN6_COND_L);
1208 inst = tc_MOV(tc, dst0[1], tsrc_imm_f(0.0f));
1209 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
1210 inst = tc_MOV(tc, dst0[2], tsrc_imm_f(0.0f));
1211 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
1212 }
1213
1214 static void
1215 soa_EXP(struct toy_compiler *tc,
1216 const struct tgsi_full_instruction *tgsi_inst,
1217 struct toy_dst *dst_,
1218 struct toy_src *src_)
1219 {
1220 struct toy_dst dst0[4];
1221 struct toy_src src0[4];
1222
1223 assert(!"SoA EXP untested");
1224
1225 tdst_transpose(dst_[0], dst0);
1226 tsrc_transpose(src_[0], src0);
1227
1228 if (!tdst_is_null(dst0[0])) {
1229 struct toy_dst tmp = tdst_d(tc_alloc_tmp(tc));
1230
1231 tc_RNDD(tc, tmp, src0[0]);
1232
1233 /* construct the floating point number manually */
1234 tc_ADD(tc, tmp, tsrc_from(tmp), tsrc_imm_d(127));
1235 tc_SHL(tc, tdst_d(dst0[0]), tsrc_from(tmp), tsrc_imm_d(23));
1236 }
1237
1238 tc_FRC(tc, dst0[1], src0[0]);
1239 tc_EXP(tc, dst0[2], src0[0]);
1240 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1241 }
1242
1243 static void
1244 soa_LOG(struct toy_compiler *tc,
1245 const struct tgsi_full_instruction *tgsi_inst,
1246 struct toy_dst *dst_,
1247 struct toy_src *src_)
1248 {
1249 struct toy_dst dst0[4];
1250 struct toy_src src0[4];
1251
1252 assert(!"SoA LOG untested");
1253
1254 tdst_transpose(dst_[0], dst0);
1255 tsrc_transpose(src_[0], src0);
1256
1257 if (dst_[0].writemask & TOY_WRITEMASK_XY) {
1258 struct toy_dst tmp = tdst_d(tc_alloc_tmp(tc));
1259
1260 /* exponent */
1261 tc_SHR(tc, tmp, tsrc_absolute(tsrc_d(src0[0])), tsrc_imm_d(23));
1262 tc_ADD(tc, dst0[0], tsrc_from(tmp), tsrc_imm_d(-127));
1263
1264 /* mantissa */
1265 tc_AND(tc, tmp, tsrc_d(src0[0]), tsrc_imm_d((1 << 23) - 1));
1266 tc_OR(tc, dst0[1], tsrc_from(tmp), tsrc_imm_d(127 << 23));
1267 }
1268
1269 tc_LOG(tc, dst0[2], src0[0]);
1270 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1271 }
1272
1273 static void
1274 soa_DST(struct toy_compiler *tc,
1275 const struct tgsi_full_instruction *tgsi_inst,
1276 struct toy_dst *dst_,
1277 struct toy_src *src_)
1278 {
1279 struct toy_dst dst0[4];
1280 struct toy_src src[2][4];
1281
1282 tdst_transpose(dst_[0], dst0);
1283 tsrc_transpose(src_[0], src[0]);
1284 tsrc_transpose(src_[1], src[1]);
1285
1286 tc_MOV(tc, dst0[0], tsrc_imm_f(1.0f));
1287 tc_MUL(tc, dst0[1], src[0][1], src[1][1]);
1288 tc_MOV(tc, dst0[2], src[0][2]);
1289 tc_MOV(tc, dst0[3], src[1][3]);
1290 }
1291
1292 static void
1293 soa_XPD(struct toy_compiler *tc,
1294 const struct tgsi_full_instruction *tgsi_inst,
1295 struct toy_dst *dst_,
1296 struct toy_src *src_)
1297 {
1298 struct toy_dst dst0[4];
1299 struct toy_src src[2][4];
1300
1301 tdst_transpose(dst_[0], dst0);
1302 tsrc_transpose(src_[0], src[0]);
1303 tsrc_transpose(src_[1], src[1]);
1304
1305 /* dst.x = src0.y * src1.z - src1.y * src0.z */
1306 tc_MUL(tc, dst0[0], src[0][2], src[1][1]);
1307 tc_MAC(tc, dst0[0], src[0][1], src[1][2], tsrc_negate(tsrc_from(dst0[0])));
1308
1309 /* dst.y = src0.z * src1.x - src1.z * src0.x */
1310 tc_MUL(tc, dst0[1], src[0][0], src[1][2]);
1311 tc_MAC(tc, dst0[1], src[0][2], src[1][0], tsrc_negate(tsrc_from(dst0[1])));
1312
1313 /* dst.z = src0.x * src1.y - src1.x * src0.y */
1314 tc_MUL(tc, dst0[2], src[0][1], src[1][0]);
1315 tc_MAC(tc, dst0[2], src[0][0], src[1][1], tsrc_negate(tsrc_from(dst0[2])));
1316
1317 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1318 }
1319
1320 static void
1321 soa_PK2H(struct toy_compiler *tc,
1322 const struct tgsi_full_instruction *tgsi_inst,
1323 struct toy_dst *dst_,
1324 struct toy_src *src_)
1325 {
1326 struct toy_dst tmp = tdst_ud(tc_alloc_tmp(tc));
1327 struct toy_dst dst0[4];
1328 struct toy_src src0[4];
1329 int i;
1330
1331 assert(!"SoA PK2H untested");
1332
1333 tdst_transpose(dst_[0], dst0);
1334 tsrc_transpose(src_[0], src0);
1335
1336 tc_SHL(tc, tmp, src0[1], tsrc_imm_ud(16));
1337 tc_OR(tc, tmp, src0[0], tsrc_from(tmp));
1338
1339 for (i = 0; i < 4; i++)
1340 tc_MOV(tc, dst0[i], tsrc_from(tmp));
1341 }
1342
1343 static void
1344 soa_UP2H(struct toy_compiler *tc,
1345 const struct tgsi_full_instruction *tgsi_inst,
1346 struct toy_dst *dst_,
1347 struct toy_src *src_)
1348 {
1349 struct toy_dst dst0[4];
1350 struct toy_src src0[4];
1351
1352 assert(!"SoA UP2H untested");
1353
1354 tdst_transpose(dst_[0], dst0);
1355 tsrc_transpose(src_[0], src0);
1356
1357 tc_AND(tc, tdst_ud(dst0[0]), tsrc_ud(src0[0]), tsrc_imm_ud(0xffff));
1358 tc_SHR(tc, tdst_ud(dst0[1]), tsrc_ud(src0[1]), tsrc_imm_ud(16));
1359 tc_AND(tc, tdst_ud(dst0[2]), tsrc_ud(src0[2]), tsrc_imm_ud(0xffff));
1360 tc_SHR(tc, tdst_ud(dst0[3]), tsrc_ud(src0[3]), tsrc_imm_ud(16));
1361
1362 }
1363
1364 static void
1365 soa_SCS(struct toy_compiler *tc,
1366 const struct tgsi_full_instruction *tgsi_inst,
1367 struct toy_dst *dst_,
1368 struct toy_src *src_)
1369 {
1370 struct toy_dst dst0[4];
1371 struct toy_src src0[4];
1372
1373 tdst_transpose(dst_[0], dst0);
1374 tsrc_transpose(src_[0], src0);
1375
1376 tc_add1(tc, TOY_OPCODE_COS, dst0[0], src0[0]);
1377 tc_add1(tc, TOY_OPCODE_SIN, dst0[1], src0[0]);
1378 tc_MOV(tc, dst0[2], tsrc_imm_f(0.0f));
1379 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1380 }
1381
1382 static void
1383 soa_NRM(struct toy_compiler *tc,
1384 const struct tgsi_full_instruction *tgsi_inst,
1385 struct toy_dst *dst_,
1386 struct toy_src *src_)
1387 {
1388 const struct toy_dst tmp = tc_alloc_tmp(tc);
1389 struct toy_dst dst0[4];
1390 struct toy_src src0[4];
1391
1392 assert(!"SoA NRM untested");
1393
1394 tdst_transpose(dst_[0], dst0);
1395 tsrc_transpose(src_[0], src0);
1396
1397 tc_MUL(tc, tmp, src0[2], src0[2]);
1398 tc_MAC(tc, tmp, src0[1], src0[1], tsrc_from(tmp));
1399 tc_MAC(tc, tmp, src0[0], src0[0], tsrc_from(tmp));
1400 tc_INV(tc, tmp, tsrc_from(tmp));
1401
1402 tc_MUL(tc, dst0[0], src0[0], tsrc_from(tmp));
1403 tc_MUL(tc, dst0[1], src0[1], tsrc_from(tmp));
1404 tc_MUL(tc, dst0[2], src0[2], tsrc_from(tmp));
1405 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1406 }
1407
1408 static void
1409 soa_NRM4(struct toy_compiler *tc,
1410 const struct tgsi_full_instruction *tgsi_inst,
1411 struct toy_dst *dst_,
1412 struct toy_src *src_)
1413 {
1414 const struct toy_dst tmp = tc_alloc_tmp(tc);
1415 struct toy_dst dst0[4];
1416 struct toy_src src0[4];
1417 int i;
1418
1419 assert(!"SoA NRM4 untested");
1420
1421 tdst_transpose(dst_[0], dst0);
1422 tsrc_transpose(src_[0], src0);
1423
1424 tc_MUL(tc, tmp, src0[3], src0[3]);
1425 tc_MAC(tc, tmp, src0[2], src0[2], tsrc_from(tmp));
1426 tc_MAC(tc, tmp, src0[1], src0[1], tsrc_from(tmp));
1427 tc_MAC(tc, tmp, src0[0], src0[0], tsrc_from(tmp));
1428 tc_INV(tc, tmp, tsrc_from(tmp));
1429
1430 for (i = 0; i < 4; i++)
1431 tc_MUL(tc, dst0[i], src0[0], tsrc_from(tmp));
1432 }
1433
1434 static void
1435 soa_unsupported(struct toy_compiler *tc,
1436 const struct tgsi_full_instruction *tgsi_inst,
1437 struct toy_dst *dst_,
1438 struct toy_src *src_)
1439 {
1440 const struct tgsi_opcode_info *info =
1441 tgsi_get_opcode_info(tgsi_inst->Instruction.Opcode);
1442
1443 ilo_warn("unsupported TGSI opcode in SoA form: TGSI_OPCODE_%s\n",
1444 info->mnemonic);
1445
1446 tc_fail(tc, "unsupported TGSI instruction in SoA form");
1447 }
1448
1449 static const toy_tgsi_translate soa_translate_table[TGSI_OPCODE_LAST] = {
1450 [TGSI_OPCODE_ARL] = soa_per_channel,
1451 [TGSI_OPCODE_MOV] = soa_per_channel,
1452 [TGSI_OPCODE_LIT] = soa_LIT,
1453 [TGSI_OPCODE_RCP] = soa_scalar_replicate,
1454 [TGSI_OPCODE_RSQ] = soa_scalar_replicate,
1455 [TGSI_OPCODE_EXP] = soa_EXP,
1456 [TGSI_OPCODE_LOG] = soa_LOG,
1457 [TGSI_OPCODE_MUL] = soa_per_channel,
1458 [TGSI_OPCODE_ADD] = soa_per_channel,
1459 [TGSI_OPCODE_DP3] = soa_dot_product,
1460 [TGSI_OPCODE_DP4] = soa_dot_product,
1461 [TGSI_OPCODE_DST] = soa_DST,
1462 [TGSI_OPCODE_MIN] = soa_per_channel,
1463 [TGSI_OPCODE_MAX] = soa_per_channel,
1464 [TGSI_OPCODE_SLT] = soa_per_channel,
1465 [TGSI_OPCODE_SGE] = soa_per_channel,
1466 [TGSI_OPCODE_MAD] = soa_per_channel,
1467 [TGSI_OPCODE_SUB] = soa_per_channel,
1468 [TGSI_OPCODE_LRP] = soa_per_channel,
1469 [TGSI_OPCODE_CND] = soa_per_channel,
1470 [TGSI_OPCODE_SQRT] = soa_scalar_replicate,
1471 [TGSI_OPCODE_DP2A] = soa_dot_product,
1472 [22] = soa_unsupported,
1473 [23] = soa_unsupported,
1474 [TGSI_OPCODE_FRC] = soa_per_channel,
1475 [TGSI_OPCODE_CLAMP] = soa_per_channel,
1476 [TGSI_OPCODE_FLR] = soa_per_channel,
1477 [TGSI_OPCODE_ROUND] = soa_per_channel,
1478 [TGSI_OPCODE_EX2] = soa_scalar_replicate,
1479 [TGSI_OPCODE_LG2] = soa_scalar_replicate,
1480 [TGSI_OPCODE_POW] = soa_scalar_replicate,
1481 [TGSI_OPCODE_XPD] = soa_XPD,
1482 [32] = soa_unsupported,
1483 [TGSI_OPCODE_ABS] = soa_per_channel,
1484 [TGSI_OPCODE_RCC] = soa_unsupported,
1485 [TGSI_OPCODE_DPH] = soa_dot_product,
1486 [TGSI_OPCODE_COS] = soa_scalar_replicate,
1487 [TGSI_OPCODE_DDX] = soa_partial_derivative,
1488 [TGSI_OPCODE_DDY] = soa_partial_derivative,
1489 [TGSI_OPCODE_KILL] = soa_passthrough,
1490 [TGSI_OPCODE_PK2H] = soa_PK2H,
1491 [TGSI_OPCODE_PK2US] = soa_unsupported,
1492 [TGSI_OPCODE_PK4B] = soa_unsupported,
1493 [TGSI_OPCODE_PK4UB] = soa_unsupported,
1494 [TGSI_OPCODE_RFL] = soa_unsupported,
1495 [TGSI_OPCODE_SEQ] = soa_per_channel,
1496 [TGSI_OPCODE_SFL] = soa_per_channel,
1497 [TGSI_OPCODE_SGT] = soa_per_channel,
1498 [TGSI_OPCODE_SIN] = soa_scalar_replicate,
1499 [TGSI_OPCODE_SLE] = soa_per_channel,
1500 [TGSI_OPCODE_SNE] = soa_per_channel,
1501 [TGSI_OPCODE_STR] = soa_per_channel,
1502 [TGSI_OPCODE_TEX] = soa_passthrough,
1503 [TGSI_OPCODE_TXD] = soa_passthrough,
1504 [TGSI_OPCODE_TXP] = soa_passthrough,
1505 [TGSI_OPCODE_UP2H] = soa_UP2H,
1506 [TGSI_OPCODE_UP2US] = soa_unsupported,
1507 [TGSI_OPCODE_UP4B] = soa_unsupported,
1508 [TGSI_OPCODE_UP4UB] = soa_unsupported,
1509 [TGSI_OPCODE_X2D] = soa_unsupported,
1510 [TGSI_OPCODE_ARA] = soa_unsupported,
1511 [TGSI_OPCODE_ARR] = soa_per_channel,
1512 [TGSI_OPCODE_BRA] = soa_unsupported,
1513 [TGSI_OPCODE_CAL] = soa_unsupported,
1514 [TGSI_OPCODE_RET] = soa_unsupported,
1515 [TGSI_OPCODE_SSG] = soa_per_channel,
1516 [TGSI_OPCODE_CMP] = soa_per_channel,
1517 [TGSI_OPCODE_SCS] = soa_SCS,
1518 [TGSI_OPCODE_TXB] = soa_passthrough,
1519 [TGSI_OPCODE_NRM] = soa_NRM,
1520 [TGSI_OPCODE_DIV] = soa_per_channel,
1521 [TGSI_OPCODE_DP2] = soa_dot_product,
1522 [TGSI_OPCODE_TXL] = soa_passthrough,
1523 [TGSI_OPCODE_BRK] = soa_passthrough,
1524 [TGSI_OPCODE_IF] = soa_if,
1525 [TGSI_OPCODE_UIF] = soa_if,
1526 [76] = soa_unsupported,
1527 [TGSI_OPCODE_ELSE] = soa_passthrough,
1528 [TGSI_OPCODE_ENDIF] = soa_passthrough,
1529 [79] = soa_unsupported,
1530 [80] = soa_unsupported,
1531 [TGSI_OPCODE_PUSHA] = soa_unsupported,
1532 [TGSI_OPCODE_POPA] = soa_unsupported,
1533 [TGSI_OPCODE_CEIL] = soa_per_channel,
1534 [TGSI_OPCODE_I2F] = soa_per_channel,
1535 [TGSI_OPCODE_NOT] = soa_per_channel,
1536 [TGSI_OPCODE_TRUNC] = soa_per_channel,
1537 [TGSI_OPCODE_SHL] = soa_per_channel,
1538 [88] = soa_unsupported,
1539 [TGSI_OPCODE_AND] = soa_per_channel,
1540 [TGSI_OPCODE_OR] = soa_per_channel,
1541 [TGSI_OPCODE_MOD] = soa_per_channel,
1542 [TGSI_OPCODE_XOR] = soa_per_channel,
1543 [TGSI_OPCODE_SAD] = soa_per_channel,
1544 [TGSI_OPCODE_TXF] = soa_passthrough,
1545 [TGSI_OPCODE_TXQ] = soa_passthrough,
1546 [TGSI_OPCODE_CONT] = soa_passthrough,
1547 [TGSI_OPCODE_EMIT] = soa_unsupported,
1548 [TGSI_OPCODE_ENDPRIM] = soa_unsupported,
1549 [TGSI_OPCODE_BGNLOOP] = soa_passthrough,
1550 [TGSI_OPCODE_BGNSUB] = soa_unsupported,
1551 [TGSI_OPCODE_ENDLOOP] = soa_passthrough,
1552 [TGSI_OPCODE_ENDSUB] = soa_unsupported,
1553 [TGSI_OPCODE_TXQ_LZ] = soa_passthrough,
1554 [104] = soa_unsupported,
1555 [105] = soa_unsupported,
1556 [106] = soa_unsupported,
1557 [TGSI_OPCODE_NOP] = soa_passthrough,
1558 [TGSI_OPCODE_FSEQ] = soa_per_channel,
1559 [TGSI_OPCODE_FSGE] = soa_per_channel,
1560 [TGSI_OPCODE_FSLT] = soa_per_channel,
1561 [TGSI_OPCODE_FSNE] = soa_per_channel,
1562 [TGSI_OPCODE_NRM4] = soa_NRM4,
1563 [TGSI_OPCODE_CALLNZ] = soa_unsupported,
1564 [TGSI_OPCODE_BREAKC] = soa_unsupported,
1565 [TGSI_OPCODE_KILL_IF] = soa_passthrough,
1566 [TGSI_OPCODE_END] = soa_passthrough,
1567 [118] = soa_unsupported,
1568 [TGSI_OPCODE_F2I] = soa_per_channel,
1569 [TGSI_OPCODE_IDIV] = soa_per_channel,
1570 [TGSI_OPCODE_IMAX] = soa_per_channel,
1571 [TGSI_OPCODE_IMIN] = soa_per_channel,
1572 [TGSI_OPCODE_INEG] = soa_per_channel,
1573 [TGSI_OPCODE_ISGE] = soa_per_channel,
1574 [TGSI_OPCODE_ISHR] = soa_per_channel,
1575 [TGSI_OPCODE_ISLT] = soa_per_channel,
1576 [TGSI_OPCODE_F2U] = soa_per_channel,
1577 [TGSI_OPCODE_U2F] = soa_per_channel,
1578 [TGSI_OPCODE_UADD] = soa_per_channel,
1579 [TGSI_OPCODE_UDIV] = soa_per_channel,
1580 [TGSI_OPCODE_UMAD] = soa_per_channel,
1581 [TGSI_OPCODE_UMAX] = soa_per_channel,
1582 [TGSI_OPCODE_UMIN] = soa_per_channel,
1583 [TGSI_OPCODE_UMOD] = soa_per_channel,
1584 [TGSI_OPCODE_UMUL] = soa_per_channel,
1585 [TGSI_OPCODE_USEQ] = soa_per_channel,
1586 [TGSI_OPCODE_USGE] = soa_per_channel,
1587 [TGSI_OPCODE_USHR] = soa_per_channel,
1588 [TGSI_OPCODE_USLT] = soa_per_channel,
1589 [TGSI_OPCODE_USNE] = soa_per_channel,
1590 [TGSI_OPCODE_SWITCH] = soa_unsupported,
1591 [TGSI_OPCODE_CASE] = soa_unsupported,
1592 [TGSI_OPCODE_DEFAULT] = soa_unsupported,
1593 [TGSI_OPCODE_ENDSWITCH] = soa_unsupported,
1594 [TGSI_OPCODE_SAMPLE] = soa_passthrough,
1595 [TGSI_OPCODE_SAMPLE_I] = soa_passthrough,
1596 [TGSI_OPCODE_SAMPLE_I_MS] = soa_passthrough,
1597 [TGSI_OPCODE_SAMPLE_B] = soa_passthrough,
1598 [TGSI_OPCODE_SAMPLE_C] = soa_passthrough,
1599 [TGSI_OPCODE_SAMPLE_C_LZ] = soa_passthrough,
1600 [TGSI_OPCODE_SAMPLE_D] = soa_passthrough,
1601 [TGSI_OPCODE_SAMPLE_L] = soa_passthrough,
1602 [TGSI_OPCODE_GATHER4] = soa_passthrough,
1603 [TGSI_OPCODE_SVIEWINFO] = soa_passthrough,
1604 [TGSI_OPCODE_SAMPLE_POS] = soa_passthrough,
1605 [TGSI_OPCODE_SAMPLE_INFO] = soa_passthrough,
1606 [TGSI_OPCODE_UARL] = soa_per_channel,
1607 [TGSI_OPCODE_UCMP] = soa_per_channel,
1608 [TGSI_OPCODE_IABS] = soa_per_channel,
1609 [TGSI_OPCODE_ISSG] = soa_per_channel,
1610 [TGSI_OPCODE_LOAD] = soa_unsupported,
1611 [TGSI_OPCODE_STORE] = soa_unsupported,
1612 [TGSI_OPCODE_MFENCE] = soa_unsupported,
1613 [TGSI_OPCODE_LFENCE] = soa_unsupported,
1614 [TGSI_OPCODE_SFENCE] = soa_unsupported,
1615 [TGSI_OPCODE_BARRIER] = soa_unsupported,
1616 [TGSI_OPCODE_ATOMUADD] = soa_unsupported,
1617 [TGSI_OPCODE_ATOMXCHG] = soa_unsupported,
1618 [TGSI_OPCODE_ATOMCAS] = soa_unsupported,
1619 [TGSI_OPCODE_ATOMAND] = soa_unsupported,
1620 [TGSI_OPCODE_ATOMOR] = soa_unsupported,
1621 [TGSI_OPCODE_ATOMXOR] = soa_unsupported,
1622 [TGSI_OPCODE_ATOMUMIN] = soa_unsupported,
1623 [TGSI_OPCODE_ATOMUMAX] = soa_unsupported,
1624 [TGSI_OPCODE_ATOMIMIN] = soa_unsupported,
1625 [TGSI_OPCODE_ATOMIMAX] = soa_unsupported,
1626 [TGSI_OPCODE_TEX2] = soa_passthrough,
1627 [TGSI_OPCODE_TXB2] = soa_passthrough,
1628 [TGSI_OPCODE_TXL2] = soa_passthrough,
1629 };
1630
1631 static bool
1632 ra_dst_is_indirect(const struct tgsi_full_dst_register *d)
1633 {
1634 return (d->Register.Indirect ||
1635 (d->Register.Dimension && d->Dimension.Indirect));
1636 }
1637
1638 static int
1639 ra_dst_index(const struct tgsi_full_dst_register *d)
1640 {
1641 assert(!d->Register.Indirect);
1642 return d->Register.Index;
1643 }
1644
1645 static int
1646 ra_dst_dimension(const struct tgsi_full_dst_register *d)
1647 {
1648 if (d->Register.Dimension) {
1649 assert(!d->Dimension.Indirect);
1650 return d->Dimension.Index;
1651 }
1652 else {
1653 return 0;
1654 }
1655 }
1656
1657 static bool
1658 ra_is_src_indirect(const struct tgsi_full_src_register *s)
1659 {
1660 return (s->Register.Indirect ||
1661 (s->Register.Dimension && s->Dimension.Indirect));
1662 }
1663
1664 static int
1665 ra_src_index(const struct tgsi_full_src_register *s)
1666 {
1667 assert(!s->Register.Indirect);
1668 return s->Register.Index;
1669 }
1670
1671 static int
1672 ra_src_dimension(const struct tgsi_full_src_register *s)
1673 {
1674 if (s->Register.Dimension) {
1675 assert(!s->Dimension.Indirect);
1676 return s->Dimension.Index;
1677 }
1678 else {
1679 return 0;
1680 }
1681 }
1682
1683 /**
1684 * Infer the type of either the sources or the destination.
1685 */
1686 static enum toy_type
1687 ra_infer_opcode_type(int tgsi_opcode, bool is_dst)
1688 {
1689 enum tgsi_opcode_type type;
1690
1691 if (is_dst)
1692 type = tgsi_opcode_infer_dst_type(tgsi_opcode);
1693 else
1694 type = tgsi_opcode_infer_src_type(tgsi_opcode);
1695
1696 switch (type) {
1697 case TGSI_TYPE_UNSIGNED:
1698 return TOY_TYPE_UD;
1699 case TGSI_TYPE_SIGNED:
1700 return TOY_TYPE_D;
1701 case TGSI_TYPE_FLOAT:
1702 return TOY_TYPE_F;
1703 case TGSI_TYPE_UNTYPED:
1704 case TGSI_TYPE_VOID:
1705 case TGSI_TYPE_DOUBLE:
1706 default:
1707 assert(!"unsupported TGSI type");
1708 return TOY_TYPE_UD;
1709 }
1710 }
1711
1712 /**
1713 * Return the type of an operand of the specified instruction.
1714 */
1715 static enum toy_type
1716 ra_get_type(struct toy_tgsi *tgsi, const struct tgsi_full_instruction *tgsi_inst,
1717 int operand, bool is_dst)
1718 {
1719 enum toy_type type;
1720 enum tgsi_file_type file;
1721
1722 /* we need to look at both src and dst for MOV */
1723 /* XXX it should not be this complex */
1724 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_MOV) {
1725 const enum tgsi_file_type dst_file = tgsi_inst->Dst[0].Register.File;
1726 const enum tgsi_file_type src_file = tgsi_inst->Src[0].Register.File;
1727
1728 if (dst_file == TGSI_FILE_ADDRESS || src_file == TGSI_FILE_ADDRESS) {
1729 type = TOY_TYPE_D;
1730 }
1731 else if (src_file == TGSI_FILE_IMMEDIATE &&
1732 !tgsi_inst->Src[0].Register.Indirect) {
1733 const int src_idx = tgsi_inst->Src[0].Register.Index;
1734 type = tgsi->imm_data.types[src_idx];
1735 }
1736 else {
1737 /* this is the best we can do */
1738 type = TOY_TYPE_F;
1739 }
1740
1741 return type;
1742 }
1743 else if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_UCMP) {
1744 if (!is_dst && operand == 0)
1745 type = TOY_TYPE_UD;
1746 else
1747 type = TOY_TYPE_F;
1748
1749 return type;
1750 }
1751
1752 type = ra_infer_opcode_type(tgsi_inst->Instruction.Opcode, is_dst);
1753
1754 /* fix the type */
1755 file = (is_dst) ?
1756 tgsi_inst->Dst[operand].Register.File :
1757 tgsi_inst->Src[operand].Register.File;
1758 switch (file) {
1759 case TGSI_FILE_SAMPLER:
1760 case TGSI_FILE_RESOURCE:
1761 case TGSI_FILE_SAMPLER_VIEW:
1762 type = TOY_TYPE_D;
1763 break;
1764 case TGSI_FILE_ADDRESS:
1765 assert(type == TOY_TYPE_D);
1766 break;
1767 default:
1768 break;
1769 }
1770
1771 return type;
1772 }
1773
1774 /**
1775 * Allocate a VRF register.
1776 */
1777 static int
1778 ra_alloc_reg(struct toy_tgsi *tgsi, enum tgsi_file_type file)
1779 {
1780 const int count = (tgsi->aos) ? 1 : 4;
1781 return tc_alloc_vrf(tgsi->tc, count);
1782 }
1783
1784 /**
1785 * Construct the key for VRF mapping look-up.
1786 */
1787 static void *
1788 ra_get_map_key(enum tgsi_file_type file, unsigned dim, unsigned index)
1789 {
1790 intptr_t key;
1791
1792 /* this is ugly... */
1793 assert(file < 1 << 4);
1794 assert(dim < 1 << 12);
1795 assert(index < 1 << 16);
1796 key = (file << 28) | (dim << 16) | index;
1797
1798 return intptr_to_pointer(key);
1799 }
1800
1801 /**
1802 * Map a TGSI register to a VRF register.
1803 */
1804 static int
1805 ra_map_reg(struct toy_tgsi *tgsi, enum tgsi_file_type file,
1806 int dim, int index, bool *is_new)
1807 {
1808 void *key, *val;
1809 intptr_t vrf;
1810
1811 key = ra_get_map_key(file, dim, index);
1812
1813 /*
1814 * because we allocate vrf from 1 and on, val is never NULL as long as the
1815 * key exists
1816 */
1817 val = util_hash_table_get(tgsi->reg_mapping, key);
1818 if (val) {
1819 vrf = pointer_to_intptr(val);
1820
1821 if (is_new)
1822 *is_new = false;
1823 }
1824 else {
1825 vrf = (intptr_t) ra_alloc_reg(tgsi, file);
1826
1827 /* add to the mapping */
1828 val = intptr_to_pointer(vrf);
1829 util_hash_table_set(tgsi->reg_mapping, key, val);
1830
1831 if (is_new)
1832 *is_new = true;
1833 }
1834
1835 return (int) vrf;
1836 }
1837
1838 /**
1839 * Return true if the destination aliases any of the sources.
1840 */
1841 static bool
1842 ra_dst_is_aliasing(const struct tgsi_full_instruction *tgsi_inst, int dst_index)
1843 {
1844 const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[dst_index];
1845 int i;
1846
1847 /* we need a scratch register for indirect dst anyway */
1848 if (ra_dst_is_indirect(d))
1849 return true;
1850
1851 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
1852 const struct tgsi_full_src_register *s = &tgsi_inst->Src[i];
1853
1854 if (s->Register.File != d->Register.File)
1855 continue;
1856
1857 /*
1858 * we can go on to check dimension and index respectively, but
1859 * keep it simple for now
1860 */
1861 if (ra_is_src_indirect(s))
1862 return true;
1863 if (ra_src_dimension(s) == ra_dst_dimension(d) &&
1864 ra_src_index(s) == ra_dst_index(d))
1865 return true;
1866 }
1867
1868 return false;
1869 }
1870
1871 /**
1872 * Return the toy register for a TGSI destination operand.
1873 */
1874 static struct toy_dst
1875 ra_get_dst(struct toy_tgsi *tgsi,
1876 const struct tgsi_full_instruction *tgsi_inst, int dst_index,
1877 bool *is_scratch)
1878 {
1879 const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[dst_index];
1880 bool need_vrf = false;
1881 struct toy_dst dst;
1882
1883 switch (d->Register.File) {
1884 case TGSI_FILE_NULL:
1885 dst = tdst_null();
1886 break;
1887 case TGSI_FILE_OUTPUT:
1888 case TGSI_FILE_TEMPORARY:
1889 case TGSI_FILE_ADDRESS:
1890 case TGSI_FILE_PREDICATE:
1891 need_vrf = true;
1892 break;
1893 default:
1894 assert(!"unhandled dst file");
1895 dst = tdst_null();
1896 break;
1897 }
1898
1899 if (need_vrf) {
1900 /* XXX we do not always need a scratch given the conditions... */
1901 const bool need_scratch =
1902 (ra_dst_is_indirect(d) || ra_dst_is_aliasing(tgsi_inst, dst_index) ||
1903 tgsi_inst->Instruction.Saturate);
1904 const enum toy_type type = ra_get_type(tgsi, tgsi_inst, dst_index, true);
1905 int vrf;
1906
1907 if (need_scratch) {
1908 vrf = ra_alloc_reg(tgsi, d->Register.File);
1909 }
1910 else {
1911 vrf = ra_map_reg(tgsi, d->Register.File,
1912 ra_dst_dimension(d), ra_dst_index(d), NULL);
1913 }
1914
1915 if (is_scratch)
1916 *is_scratch = need_scratch;
1917
1918 dst = tdst_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
1919 false, 0, d->Register.WriteMask, vrf * TOY_REG_WIDTH);
1920 }
1921
1922 return dst;
1923 }
1924
1925 static struct toy_src
1926 ra_get_src_for_vrf(const struct tgsi_full_src_register *s,
1927 enum toy_type type, int vrf)
1928 {
1929 return tsrc_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
1930 false, 0,
1931 s->Register.SwizzleX, s->Register.SwizzleY,
1932 s->Register.SwizzleZ, s->Register.SwizzleW,
1933 s->Register.Absolute, s->Register.Negate,
1934 vrf * TOY_REG_WIDTH);
1935 }
1936
1937 static int
1938 init_tgsi_reg(struct toy_tgsi *tgsi, struct toy_inst *inst,
1939 enum tgsi_file_type file, int index,
1940 const struct tgsi_ind_register *indirect,
1941 const struct tgsi_dimension *dimension,
1942 const struct tgsi_ind_register *dim_indirect)
1943 {
1944 struct toy_src src;
1945 int num_src = 0;
1946
1947 /* src[0]: TGSI file */
1948 inst->src[num_src++] = tsrc_imm_d(file);
1949
1950 /* src[1]: TGSI dimension */
1951 inst->src[num_src++] = tsrc_imm_d((dimension) ? dimension->Index : 0);
1952
1953 /* src[2]: TGSI dimension indirection */
1954 if (dim_indirect) {
1955 const int vrf = ra_map_reg(tgsi, dim_indirect->File, 0,
1956 dim_indirect->Index, NULL);
1957
1958 src = tsrc(TOY_FILE_VRF, vrf, 0);
1959 src = tsrc_swizzle1(tsrc_d(src), indirect->Swizzle);
1960 }
1961 else {
1962 src = tsrc_imm_d(0);
1963 }
1964
1965 inst->src[num_src++] = src;
1966
1967 /* src[3]: TGSI index */
1968 inst->src[num_src++] = tsrc_imm_d(index);
1969
1970 /* src[4]: TGSI index indirection */
1971 if (indirect) {
1972 const int vrf = ra_map_reg(tgsi, indirect->File, 0,
1973 indirect->Index, NULL);
1974
1975 src = tsrc(TOY_FILE_VRF, vrf, 0);
1976 src = tsrc_swizzle1(tsrc_d(src), indirect->Swizzle);
1977 }
1978 else {
1979 src = tsrc_imm_d(0);
1980 }
1981
1982 inst->src[num_src++] = src;
1983
1984 return num_src;
1985 }
1986
1987 static struct toy_src
1988 ra_get_src_indirect(struct toy_tgsi *tgsi,
1989 const struct tgsi_full_instruction *tgsi_inst,
1990 int src_index)
1991 {
1992 const struct tgsi_full_src_register *s = &tgsi_inst->Src[src_index];
1993 bool need_vrf = false, is_resource = false;
1994 struct toy_src src;
1995
1996 switch (s->Register.File) {
1997 case TGSI_FILE_NULL:
1998 src = tsrc_null();
1999 break;
2000 case TGSI_FILE_SAMPLER:
2001 case TGSI_FILE_RESOURCE:
2002 case TGSI_FILE_SAMPLER_VIEW:
2003 is_resource = true;
2004 /* fall through */
2005 case TGSI_FILE_CONSTANT:
2006 case TGSI_FILE_INPUT:
2007 case TGSI_FILE_SYSTEM_VALUE:
2008 case TGSI_FILE_TEMPORARY:
2009 case TGSI_FILE_ADDRESS:
2010 case TGSI_FILE_IMMEDIATE:
2011 case TGSI_FILE_PREDICATE:
2012 need_vrf = true;
2013 break;
2014 default:
2015 assert(!"unhandled src file");
2016 src = tsrc_null();
2017 break;
2018 }
2019
2020 if (need_vrf) {
2021 const enum toy_type type = ra_get_type(tgsi, tgsi_inst, src_index, false);
2022 int vrf;
2023
2024 if (is_resource) {
2025 assert(!s->Register.Dimension);
2026 assert(s->Register.Indirect);
2027
2028 vrf = ra_map_reg(tgsi, s->Indirect.File, 0, s->Indirect.Index, NULL);
2029 }
2030 else {
2031 vrf = ra_alloc_reg(tgsi, s->Register.File);
2032 }
2033
2034 src = ra_get_src_for_vrf(s, type, vrf);
2035
2036 /* emit indirect fetch */
2037 if (!is_resource) {
2038 struct toy_inst *inst;
2039
2040 inst = tc_add(tgsi->tc);
2041 inst->opcode = TOY_OPCODE_TGSI_INDIRECT_FETCH;
2042 inst->dst = tdst_from(src);
2043 inst->dst.writemask = TOY_WRITEMASK_XYZW;
2044
2045 init_tgsi_reg(tgsi, inst, s->Register.File, s->Register.Index,
2046 (s->Register.Indirect) ? &s->Indirect : NULL,
2047 (s->Register.Dimension) ? &s->Dimension : NULL,
2048 (s->Dimension.Indirect) ? &s->DimIndirect : NULL);
2049 }
2050 }
2051
2052 return src;
2053 }
2054
2055 /**
2056 * Return the toy register for a TGSI source operand.
2057 */
2058 static struct toy_src
2059 ra_get_src(struct toy_tgsi *tgsi,
2060 const struct tgsi_full_instruction *tgsi_inst,
2061 int src_index)
2062 {
2063 const struct tgsi_full_src_register *s = &tgsi_inst->Src[src_index];
2064 bool need_vrf = false;
2065 struct toy_src src;
2066
2067 if (ra_is_src_indirect(s))
2068 return ra_get_src_indirect(tgsi, tgsi_inst, src_index);
2069
2070 switch (s->Register.File) {
2071 case TGSI_FILE_NULL:
2072 src = tsrc_null();
2073 break;
2074 case TGSI_FILE_CONSTANT:
2075 case TGSI_FILE_INPUT:
2076 case TGSI_FILE_SYSTEM_VALUE:
2077 need_vrf = true;
2078 break;
2079 case TGSI_FILE_TEMPORARY:
2080 case TGSI_FILE_ADDRESS:
2081 case TGSI_FILE_PREDICATE:
2082 need_vrf = true;
2083 break;
2084 case TGSI_FILE_SAMPLER:
2085 case TGSI_FILE_RESOURCE:
2086 case TGSI_FILE_SAMPLER_VIEW:
2087 assert(!s->Register.Dimension);
2088 src = tsrc_imm_d(s->Register.Index);
2089 break;
2090 case TGSI_FILE_IMMEDIATE:
2091 {
2092 const uint32_t *imm;
2093 enum toy_type imm_type;
2094 bool is_scalar;
2095
2096 imm = toy_tgsi_get_imm(tgsi, s->Register.Index, &imm_type);
2097
2098 is_scalar =
2099 (imm[s->Register.SwizzleX] == imm[s->Register.SwizzleY] &&
2100 imm[s->Register.SwizzleX] == imm[s->Register.SwizzleZ] &&
2101 imm[s->Register.SwizzleX] == imm[s->Register.SwizzleW]);
2102
2103 if (is_scalar) {
2104 const enum toy_type type =
2105 ra_get_type(tgsi, tgsi_inst, src_index, false);
2106
2107 /* ignore imm_type */
2108 src = tsrc_imm_ud(imm[s->Register.SwizzleX]);
2109 src.type = type;
2110 src.absolute = s->Register.Absolute;
2111 src.negate = s->Register.Negate;
2112 }
2113 else {
2114 need_vrf = true;
2115 }
2116 }
2117 break;
2118 default:
2119 assert(!"unhandled src file");
2120 src = tsrc_null();
2121 break;
2122 }
2123
2124 if (need_vrf) {
2125 const enum toy_type type = ra_get_type(tgsi, tgsi_inst, src_index, false);
2126 bool is_new;
2127 int vrf;
2128
2129 vrf = ra_map_reg(tgsi, s->Register.File,
2130 ra_src_dimension(s), ra_src_index(s), &is_new);
2131
2132 src = ra_get_src_for_vrf(s, type, vrf);
2133
2134 if (is_new) {
2135 switch (s->Register.File) {
2136 case TGSI_FILE_TEMPORARY:
2137 case TGSI_FILE_ADDRESS:
2138 case TGSI_FILE_PREDICATE:
2139 {
2140 struct toy_dst dst = tdst_from(src);
2141 dst.writemask = TOY_WRITEMASK_XYZW;
2142
2143 /* always initialize registers before use */
2144 if (tgsi->aos) {
2145 tc_MOV(tgsi->tc, dst, tsrc_type(tsrc_imm_d(0), type));
2146 }
2147 else {
2148 struct toy_dst tdst[4];
2149 int i;
2150
2151 tdst_transpose(dst, tdst);
2152
2153 for (i = 0; i < 4; i++) {
2154 tc_MOV(tgsi->tc, tdst[i],
2155 tsrc_type(tsrc_imm_d(0), type));
2156 }
2157 }
2158 }
2159 break;
2160 default:
2161 break;
2162 }
2163 }
2164
2165 }
2166
2167 return src;
2168 }
2169
2170 static void
2171 parse_instruction(struct toy_tgsi *tgsi,
2172 const struct tgsi_full_instruction *tgsi_inst)
2173 {
2174 struct toy_dst dst[TGSI_FULL_MAX_DST_REGISTERS];
2175 struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS];
2176 bool dst_is_scratch[TGSI_FULL_MAX_DST_REGISTERS];
2177 toy_tgsi_translate translate;
2178 int i;
2179
2180 /* convert TGSI registers to toy registers */
2181 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
2182 src[i] = ra_get_src(tgsi, tgsi_inst, i);
2183 for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
2184 dst[i] = ra_get_dst(tgsi, tgsi_inst, i, &dst_is_scratch[i]);
2185
2186 /* translate the instruction */
2187 translate = tgsi->translate_table[tgsi_inst->Instruction.Opcode];
2188 translate(tgsi->tc, tgsi_inst, dst, src);
2189
2190 /* write the result to the real destinations if needed */
2191 for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) {
2192 const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i];
2193
2194 if (!dst_is_scratch[i])
2195 continue;
2196
2197 if (tgsi_inst->Instruction.Saturate == TGSI_SAT_MINUS_PLUS_ONE)
2198 tc_fail(tgsi->tc, "TGSI_SAT_MINUS_PLUS_ONE unhandled");
2199
2200 tgsi->tc->templ.saturate = tgsi_inst->Instruction.Saturate;
2201
2202 /* emit indirect store */
2203 if (ra_dst_is_indirect(d)) {
2204 struct toy_inst *inst;
2205
2206 inst = tc_add(tgsi->tc);
2207 inst->opcode = TOY_OPCODE_TGSI_INDIRECT_STORE;
2208 inst->dst = dst[i];
2209
2210 init_tgsi_reg(tgsi, inst, d->Register.File, d->Register.Index,
2211 (d->Register.Indirect) ? &d->Indirect : NULL,
2212 (d->Register.Dimension) ? &d->Dimension : NULL,
2213 (d->Dimension.Indirect) ? &d->DimIndirect : NULL);
2214 }
2215 else {
2216 const enum toy_type type = ra_get_type(tgsi, tgsi_inst, i, true);
2217 struct toy_dst real_dst;
2218 int vrf;
2219
2220 vrf = ra_map_reg(tgsi, d->Register.File,
2221 ra_dst_dimension(d), ra_dst_index(d), NULL);
2222 real_dst = tdst_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
2223 false, 0, d->Register.WriteMask, vrf * TOY_REG_WIDTH);
2224
2225 if (tgsi->aos) {
2226 tc_MOV(tgsi->tc, real_dst, tsrc_from(dst[i]));
2227 }
2228 else {
2229 struct toy_dst tdst[4];
2230 struct toy_src tsrc[4];
2231 int j;
2232
2233 tdst_transpose(real_dst, tdst);
2234 tsrc_transpose(tsrc_from(dst[i]), tsrc);
2235
2236 for (j = 0; j < 4; j++)
2237 tc_MOV(tgsi->tc, tdst[j], tsrc[j]);
2238 }
2239 }
2240
2241 tgsi->tc->templ.saturate = false;
2242 }
2243
2244 switch (tgsi_inst->Instruction.Opcode) {
2245 case TGSI_OPCODE_KILL_IF:
2246 case TGSI_OPCODE_KILL:
2247 tgsi->uses_kill = true;
2248 break;
2249 }
2250
2251 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
2252 const struct tgsi_full_src_register *s = &tgsi_inst->Src[i];
2253 if (s->Register.File == TGSI_FILE_CONSTANT && s->Register.Indirect)
2254 tgsi->const_indirect = true;
2255 }
2256
2257 /* remember channels written */
2258 for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) {
2259 const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i];
2260
2261 if (d->Register.File != TGSI_FILE_OUTPUT)
2262 continue;
2263 for (i = 0; i < tgsi->num_outputs; i++) {
2264 if (tgsi->outputs[i].index == d->Register.Index) {
2265 tgsi->outputs[i].undefined_mask &= ~d->Register.WriteMask;
2266 break;
2267 }
2268 }
2269 }
2270 }
2271
2272 static void
2273 decl_add_in(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
2274 {
2275 static const struct tgsi_declaration_interp default_interp = {
2276 TGSI_INTERPOLATE_PERSPECTIVE, false, 0,
2277 };
2278 const struct tgsi_declaration_interp *interp =
2279 (decl->Declaration.Interpolate) ? &decl->Interp: &default_interp;
2280 int index;
2281
2282 if (decl->Range.Last >= Elements(tgsi->inputs)) {
2283 assert(!"invalid IN");
2284 return;
2285 }
2286
2287 for (index = decl->Range.First; index <= decl->Range.Last; index++) {
2288 const int slot = tgsi->num_inputs++;
2289
2290 tgsi->inputs[slot].index = index;
2291 tgsi->inputs[slot].usage_mask = decl->Declaration.UsageMask;
2292 if (decl->Declaration.Semantic) {
2293 tgsi->inputs[slot].semantic_name = decl->Semantic.Name;
2294 tgsi->inputs[slot].semantic_index = decl->Semantic.Index;
2295 }
2296 else {
2297 tgsi->inputs[slot].semantic_name = TGSI_SEMANTIC_GENERIC;
2298 tgsi->inputs[slot].semantic_index = index;
2299 }
2300 tgsi->inputs[slot].interp = interp->Interpolate;
2301 tgsi->inputs[slot].centroid = interp->Location == TGSI_INTERPOLATE_LOC_CENTROID;
2302 }
2303 }
2304
2305 static void
2306 decl_add_out(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
2307 {
2308 int index;
2309
2310 if (decl->Range.Last >= Elements(tgsi->outputs)) {
2311 assert(!"invalid OUT");
2312 return;
2313 }
2314
2315 assert(decl->Declaration.Semantic);
2316
2317 for (index = decl->Range.First; index <= decl->Range.Last; index++) {
2318 const int slot = tgsi->num_outputs++;
2319
2320 tgsi->outputs[slot].index = index;
2321 tgsi->outputs[slot].undefined_mask = TOY_WRITEMASK_XYZW;
2322 tgsi->outputs[slot].usage_mask = decl->Declaration.UsageMask;
2323 tgsi->outputs[slot].semantic_name = decl->Semantic.Name;
2324 tgsi->outputs[slot].semantic_index = decl->Semantic.Index;
2325 }
2326 }
2327
2328 static void
2329 decl_add_sv(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
2330 {
2331 int index;
2332
2333 if (decl->Range.Last >= Elements(tgsi->system_values)) {
2334 assert(!"invalid SV");
2335 return;
2336 }
2337
2338 for (index = decl->Range.First; index <= decl->Range.Last; index++) {
2339 const int slot = tgsi->num_system_values++;
2340
2341 tgsi->system_values[slot].index = index;
2342 if (decl->Declaration.Semantic) {
2343 tgsi->system_values[slot].semantic_name = decl->Semantic.Name;
2344 tgsi->system_values[slot].semantic_index = decl->Semantic.Index;
2345 }
2346 else {
2347 tgsi->system_values[slot].semantic_name = TGSI_SEMANTIC_GENERIC;
2348 tgsi->system_values[slot].semantic_index = index;
2349 }
2350 }
2351 }
2352
2353 /**
2354 * Emit an instruction to fetch the value of a TGSI register.
2355 */
2356 static void
2357 fetch_source(struct toy_tgsi *tgsi, enum tgsi_file_type file, int dim, int idx)
2358 {
2359 struct toy_dst dst;
2360 int vrf;
2361 enum toy_opcode opcode;
2362 enum toy_type type = TOY_TYPE_F;
2363
2364 switch (file) {
2365 case TGSI_FILE_INPUT:
2366 opcode = TOY_OPCODE_TGSI_IN;
2367 break;
2368 case TGSI_FILE_CONSTANT:
2369 opcode = TOY_OPCODE_TGSI_CONST;
2370 break;
2371 case TGSI_FILE_SYSTEM_VALUE:
2372 opcode = TOY_OPCODE_TGSI_SV;
2373 break;
2374 case TGSI_FILE_IMMEDIATE:
2375 opcode = TOY_OPCODE_TGSI_IMM;
2376 toy_tgsi_get_imm(tgsi, idx, &type);
2377 break;
2378 default:
2379 /* no need to fetch */
2380 return;
2381 break;
2382 }
2383
2384 vrf = ra_map_reg(tgsi, file, dim, idx, NULL);
2385 dst = tdst(TOY_FILE_VRF, vrf, 0);
2386 dst = tdst_type(dst, type);
2387
2388 tc_add2(tgsi->tc, opcode, dst, tsrc_imm_d(dim), tsrc_imm_d(idx));
2389 }
2390
2391 static void
2392 parse_declaration(struct toy_tgsi *tgsi,
2393 const struct tgsi_full_declaration *decl)
2394 {
2395 int i;
2396
2397 switch (decl->Declaration.File) {
2398 case TGSI_FILE_INPUT:
2399 decl_add_in(tgsi, decl);
2400 break;
2401 case TGSI_FILE_OUTPUT:
2402 decl_add_out(tgsi, decl);
2403 break;
2404 case TGSI_FILE_SYSTEM_VALUE:
2405 decl_add_sv(tgsi, decl);
2406 break;
2407 case TGSI_FILE_IMMEDIATE:
2408 /* immediates should be declared with TGSI_TOKEN_TYPE_IMMEDIATE */
2409 assert(!"unexpected immediate declaration");
2410 break;
2411 case TGSI_FILE_CONSTANT:
2412 if (tgsi->const_count <= decl->Range.Last)
2413 tgsi->const_count = decl->Range.Last + 1;
2414 break;
2415 case TGSI_FILE_NULL:
2416 case TGSI_FILE_TEMPORARY:
2417 case TGSI_FILE_SAMPLER:
2418 case TGSI_FILE_PREDICATE:
2419 case TGSI_FILE_ADDRESS:
2420 case TGSI_FILE_RESOURCE:
2421 case TGSI_FILE_SAMPLER_VIEW:
2422 /* nothing to do */
2423 break;
2424 default:
2425 assert(!"unhandled TGSI file");
2426 break;
2427 }
2428
2429 /* fetch the registers now */
2430 for (i = decl->Range.First; i <= decl->Range.Last; i++) {
2431 const int dim = (decl->Declaration.Dimension) ? decl->Dim.Index2D : 0;
2432 fetch_source(tgsi, decl->Declaration.File, dim, i);
2433 }
2434 }
2435
2436 static int
2437 add_imm(struct toy_tgsi *tgsi, enum toy_type type, const uint32_t *buf)
2438 {
2439 /* reallocate the buffer if necessary */
2440 if (tgsi->imm_data.cur >= tgsi->imm_data.size) {
2441 const int cur_size = tgsi->imm_data.size;
2442 int new_size;
2443 enum toy_type *new_types;
2444 uint32_t (*new_buf)[4];
2445
2446 new_size = (cur_size) ? cur_size << 1 : 16;
2447 while (new_size <= tgsi->imm_data.cur)
2448 new_size <<= 1;
2449
2450 new_buf = REALLOC(tgsi->imm_data.buf,
2451 cur_size * sizeof(new_buf[0]),
2452 new_size * sizeof(new_buf[0]));
2453 new_types = REALLOC(tgsi->imm_data.types,
2454 cur_size * sizeof(new_types[0]),
2455 new_size * sizeof(new_types[0]));
2456 if (!new_buf || !new_types) {
2457 if (new_buf)
2458 FREE(new_buf);
2459 if (new_types)
2460 FREE(new_types);
2461 return -1;
2462 }
2463
2464 tgsi->imm_data.buf = new_buf;
2465 tgsi->imm_data.types = new_types;
2466 tgsi->imm_data.size = new_size;
2467 }
2468
2469 tgsi->imm_data.types[tgsi->imm_data.cur] = type;
2470 memcpy(&tgsi->imm_data.buf[tgsi->imm_data.cur],
2471 buf, sizeof(tgsi->imm_data.buf[0]));
2472
2473 return tgsi->imm_data.cur++;
2474 }
2475
2476 static void
2477 parse_immediate(struct toy_tgsi *tgsi, const struct tgsi_full_immediate *imm)
2478 {
2479 enum toy_type type;
2480 uint32_t imm_buf[4];
2481 int idx;
2482
2483 switch (imm->Immediate.DataType) {
2484 case TGSI_IMM_FLOAT32:
2485 type = TOY_TYPE_F;
2486 imm_buf[0] = fui(imm->u[0].Float);
2487 imm_buf[1] = fui(imm->u[1].Float);
2488 imm_buf[2] = fui(imm->u[2].Float);
2489 imm_buf[3] = fui(imm->u[3].Float);
2490 break;
2491 case TGSI_IMM_INT32:
2492 type = TOY_TYPE_D;
2493 imm_buf[0] = (uint32_t) imm->u[0].Int;
2494 imm_buf[1] = (uint32_t) imm->u[1].Int;
2495 imm_buf[2] = (uint32_t) imm->u[2].Int;
2496 imm_buf[3] = (uint32_t) imm->u[3].Int;
2497 break;
2498 case TGSI_IMM_UINT32:
2499 type = TOY_TYPE_UD;
2500 imm_buf[0] = imm->u[0].Uint;
2501 imm_buf[1] = imm->u[1].Uint;
2502 imm_buf[2] = imm->u[2].Uint;
2503 imm_buf[3] = imm->u[3].Uint;
2504 break;
2505 default:
2506 assert(!"unhandled TGSI imm type");
2507 type = TOY_TYPE_F;
2508 memset(imm_buf, 0, sizeof(imm_buf));
2509 break;
2510 }
2511
2512 idx = add_imm(tgsi, type, imm_buf);
2513 if (idx >= 0)
2514 fetch_source(tgsi, TGSI_FILE_IMMEDIATE, 0, idx);
2515 else
2516 tc_fail(tgsi->tc, "failed to add TGSI imm");
2517 }
2518
2519 static void
2520 parse_property(struct toy_tgsi *tgsi, const struct tgsi_full_property *prop)
2521 {
2522 switch (prop->Property.PropertyName) {
2523 case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
2524 tgsi->props.vs_prohibit_ucps = prop->u[0].Data;
2525 break;
2526 case TGSI_PROPERTY_FS_COORD_ORIGIN:
2527 tgsi->props.fs_coord_origin = prop->u[0].Data;
2528 break;
2529 case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
2530 tgsi->props.fs_coord_pixel_center = prop->u[0].Data;
2531 break;
2532 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
2533 tgsi->props.fs_color0_writes_all_cbufs = prop->u[0].Data;
2534 break;
2535 case TGSI_PROPERTY_FS_DEPTH_LAYOUT:
2536 tgsi->props.fs_depth_layout = prop->u[0].Data;
2537 break;
2538 case TGSI_PROPERTY_GS_INPUT_PRIM:
2539 tgsi->props.gs_input_prim = prop->u[0].Data;
2540 break;
2541 case TGSI_PROPERTY_GS_OUTPUT_PRIM:
2542 tgsi->props.gs_output_prim = prop->u[0].Data;
2543 break;
2544 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
2545 tgsi->props.gs_max_output_vertices = prop->u[0].Data;
2546 break;
2547 default:
2548 assert(!"unhandled TGSI property");
2549 break;
2550 }
2551 }
2552
2553 static void
2554 parse_token(struct toy_tgsi *tgsi, const union tgsi_full_token *token)
2555 {
2556 switch (token->Token.Type) {
2557 case TGSI_TOKEN_TYPE_DECLARATION:
2558 parse_declaration(tgsi, &token->FullDeclaration);
2559 break;
2560 case TGSI_TOKEN_TYPE_IMMEDIATE:
2561 parse_immediate(tgsi, &token->FullImmediate);
2562 break;
2563 case TGSI_TOKEN_TYPE_INSTRUCTION:
2564 parse_instruction(tgsi, &token->FullInstruction);
2565 break;
2566 case TGSI_TOKEN_TYPE_PROPERTY:
2567 parse_property(tgsi, &token->FullProperty);
2568 break;
2569 default:
2570 assert(!"unhandled TGSI token type");
2571 break;
2572 }
2573 }
2574
2575 static enum pipe_error
2576 dump_reg_mapping(void *key, void *val, void *data)
2577 {
2578 int tgsi_file, tgsi_dim, tgsi_index;
2579 uint32_t sig, vrf;
2580
2581 sig = (uint32_t) pointer_to_intptr(key);
2582 vrf = (uint32_t) pointer_to_intptr(val);
2583
2584 /* see ra_get_map_key() */
2585 tgsi_file = (sig >> 28) & 0xf;
2586 tgsi_dim = (sig >> 16) & 0xfff;
2587 tgsi_index = (sig >> 0) & 0xffff;
2588
2589 if (tgsi_dim) {
2590 ilo_printf(" v%d:\t%s[%d][%d]\n", vrf,
2591 tgsi_file_name(tgsi_file), tgsi_dim, tgsi_index);
2592 }
2593 else {
2594 ilo_printf(" v%d:\t%s[%d]\n", vrf,
2595 tgsi_file_name(tgsi_file), tgsi_index);
2596 }
2597
2598 return PIPE_OK;
2599 }
2600
2601 /**
2602 * Dump the TGSI translator, currently only the register mapping.
2603 */
2604 void
2605 toy_tgsi_dump(const struct toy_tgsi *tgsi)
2606 {
2607 util_hash_table_foreach(tgsi->reg_mapping, dump_reg_mapping, NULL);
2608 }
2609
2610 /**
2611 * Clean up the TGSI translator.
2612 */
2613 void
2614 toy_tgsi_cleanup(struct toy_tgsi *tgsi)
2615 {
2616 FREE(tgsi->imm_data.buf);
2617 FREE(tgsi->imm_data.types);
2618
2619 util_hash_table_destroy(tgsi->reg_mapping);
2620 }
2621
2622 static unsigned
2623 reg_mapping_hash(void *key)
2624 {
2625 return (unsigned) pointer_to_intptr(key);
2626 }
2627
2628 static int
2629 reg_mapping_compare(void *key1, void *key2)
2630 {
2631 return (key1 != key2);
2632 }
2633
2634 /**
2635 * Initialize the TGSI translator.
2636 */
2637 static bool
2638 init_tgsi(struct toy_tgsi *tgsi, struct toy_compiler *tc, bool aos)
2639 {
2640 memset(tgsi, 0, sizeof(*tgsi));
2641
2642 tgsi->tc = tc;
2643 tgsi->aos = aos;
2644 tgsi->translate_table = (aos) ? aos_translate_table : soa_translate_table;
2645
2646 /* create a mapping of TGSI registers to VRF reigsters */
2647 tgsi->reg_mapping =
2648 util_hash_table_create(reg_mapping_hash, reg_mapping_compare);
2649
2650 return (tgsi->reg_mapping != NULL);
2651 }
2652
2653 /**
2654 * Translate TGSI tokens into toy instructions.
2655 */
2656 void
2657 toy_compiler_translate_tgsi(struct toy_compiler *tc,
2658 const struct tgsi_token *tokens, bool aos,
2659 struct toy_tgsi *tgsi)
2660 {
2661 struct tgsi_parse_context parse;
2662
2663 if (!init_tgsi(tgsi, tc, aos)) {
2664 tc_fail(tc, "failed to initialize TGSI translator");
2665 return;
2666 }
2667
2668 tgsi_parse_init(&parse, tokens);
2669 while (!tgsi_parse_end_of_tokens(&parse)) {
2670 tgsi_parse_token(&parse);
2671 parse_token(tgsi, &parse.FullToken);
2672 }
2673 tgsi_parse_free(&parse);
2674 }
2675
2676 /**
2677 * Map the TGSI register to VRF register.
2678 */
2679 int
2680 toy_tgsi_get_vrf(const struct toy_tgsi *tgsi,
2681 enum tgsi_file_type file, int dimension, int index)
2682 {
2683 void *key, *val;
2684
2685 key = ra_get_map_key(file, dimension, index);
2686
2687 val = util_hash_table_get(tgsi->reg_mapping, key);
2688
2689 return (val) ? pointer_to_intptr(val) : -1;
2690 }