4d813f0cf461f8b4a954a6b4a18f2a1705149c00
[mesa.git] / src / gallium / drivers / ilo / shader / toy_tgsi.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "tgsi/tgsi_parse.h"
29 #include "tgsi/tgsi_info.h"
30 #include "tgsi/tgsi_strings.h"
31 #include "util/u_hash_table.h"
32 #include "toy_helpers.h"
33 #include "toy_tgsi.h"
34
35 /* map TGSI opcode to GEN opcode 1-to-1 */
36 static const struct {
37 int opcode;
38 int num_dst;
39 int num_src;
40 } aos_simple_opcode_map[TGSI_OPCODE_LAST] = {
41 [TGSI_OPCODE_ARL] = { GEN6_OPCODE_RNDD, 1, 1 },
42 [TGSI_OPCODE_MOV] = { GEN6_OPCODE_MOV, 1, 1 },
43 [TGSI_OPCODE_RCP] = { TOY_OPCODE_INV, 1, 1 },
44 [TGSI_OPCODE_RSQ] = { TOY_OPCODE_RSQ, 1, 1 },
45 [TGSI_OPCODE_MUL] = { GEN6_OPCODE_MUL, 1, 2 },
46 [TGSI_OPCODE_ADD] = { GEN6_OPCODE_ADD, 1, 2 },
47 [TGSI_OPCODE_DP3] = { GEN6_OPCODE_DP3, 1, 2 },
48 [TGSI_OPCODE_DP4] = { GEN6_OPCODE_DP4, 1, 2 },
49 [TGSI_OPCODE_MIN] = { GEN6_OPCODE_SEL, 1, 2 },
50 [TGSI_OPCODE_MAX] = { GEN6_OPCODE_SEL, 1, 2 },
51 /* a later pass will move src[2] to accumulator */
52 [TGSI_OPCODE_MAD] = { GEN6_OPCODE_MAC, 1, 3 },
53 [TGSI_OPCODE_SQRT] = { TOY_OPCODE_SQRT, 1, 1 },
54 [TGSI_OPCODE_FRC] = { GEN6_OPCODE_FRC, 1, 1 },
55 [TGSI_OPCODE_FLR] = { GEN6_OPCODE_RNDD, 1, 1 },
56 [TGSI_OPCODE_ROUND] = { GEN6_OPCODE_RNDE, 1, 1 },
57 [TGSI_OPCODE_EX2] = { TOY_OPCODE_EXP, 1, 1 },
58 [TGSI_OPCODE_LG2] = { TOY_OPCODE_LOG, 1, 1 },
59 [TGSI_OPCODE_POW] = { TOY_OPCODE_POW, 1, 2 },
60 [TGSI_OPCODE_DPH] = { GEN6_OPCODE_DPH, 1, 2 },
61 [TGSI_OPCODE_COS] = { TOY_OPCODE_COS, 1, 1 },
62 [TGSI_OPCODE_KILL] = { TOY_OPCODE_KIL, 0, 0 },
63 [TGSI_OPCODE_SIN] = { TOY_OPCODE_SIN, 1, 1 },
64 [TGSI_OPCODE_ARR] = { GEN6_OPCODE_RNDZ, 1, 1 },
65 [TGSI_OPCODE_DP2] = { GEN6_OPCODE_DP2, 1, 2 },
66 [TGSI_OPCODE_IF] = { GEN6_OPCODE_IF, 0, 1 },
67 [TGSI_OPCODE_UIF] = { GEN6_OPCODE_IF, 0, 1 },
68 [TGSI_OPCODE_ELSE] = { GEN6_OPCODE_ELSE, 0, 0 },
69 [TGSI_OPCODE_ENDIF] = { GEN6_OPCODE_ENDIF, 0, 0 },
70 [TGSI_OPCODE_I2F] = { GEN6_OPCODE_MOV, 1, 1 },
71 [TGSI_OPCODE_NOT] = { GEN6_OPCODE_NOT, 1, 1 },
72 [TGSI_OPCODE_TRUNC] = { GEN6_OPCODE_RNDZ, 1, 1 },
73 [TGSI_OPCODE_SHL] = { GEN6_OPCODE_SHL, 1, 2 },
74 [TGSI_OPCODE_AND] = { GEN6_OPCODE_AND, 1, 2 },
75 [TGSI_OPCODE_OR] = { GEN6_OPCODE_OR, 1, 2 },
76 [TGSI_OPCODE_MOD] = { TOY_OPCODE_INT_DIV_REMAINDER, 1, 2 },
77 [TGSI_OPCODE_XOR] = { GEN6_OPCODE_XOR, 1, 2 },
78 [TGSI_OPCODE_EMIT] = { TOY_OPCODE_EMIT, 0, 0 },
79 [TGSI_OPCODE_ENDPRIM] = { TOY_OPCODE_ENDPRIM, 0, 0 },
80 [TGSI_OPCODE_NOP] = { GEN6_OPCODE_NOP, 0, 0 },
81 [TGSI_OPCODE_KILL_IF] = { TOY_OPCODE_KIL, 0, 1 },
82 [TGSI_OPCODE_END] = { GEN6_OPCODE_NOP, 0, 0 },
83 [TGSI_OPCODE_F2I] = { GEN6_OPCODE_MOV, 1, 1 },
84 [TGSI_OPCODE_IDIV] = { TOY_OPCODE_INT_DIV_QUOTIENT, 1, 2 },
85 [TGSI_OPCODE_IMAX] = { GEN6_OPCODE_SEL, 1, 2 },
86 [TGSI_OPCODE_IMIN] = { GEN6_OPCODE_SEL, 1, 2 },
87 [TGSI_OPCODE_INEG] = { GEN6_OPCODE_MOV, 1, 1 },
88 [TGSI_OPCODE_ISHR] = { GEN6_OPCODE_ASR, 1, 2 },
89 [TGSI_OPCODE_F2U] = { GEN6_OPCODE_MOV, 1, 1 },
90 [TGSI_OPCODE_U2F] = { GEN6_OPCODE_MOV, 1, 1 },
91 [TGSI_OPCODE_UADD] = { GEN6_OPCODE_ADD, 1, 2 },
92 [TGSI_OPCODE_UDIV] = { TOY_OPCODE_INT_DIV_QUOTIENT, 1, 2 },
93 /* a later pass will move src[2] to accumulator */
94 [TGSI_OPCODE_UMAD] = { GEN6_OPCODE_MAC, 1, 3 },
95 [TGSI_OPCODE_UMAX] = { GEN6_OPCODE_SEL, 1, 2 },
96 [TGSI_OPCODE_UMIN] = { GEN6_OPCODE_SEL, 1, 2 },
97 [TGSI_OPCODE_UMOD] = { TOY_OPCODE_INT_DIV_REMAINDER, 1, 2 },
98 [TGSI_OPCODE_UMUL] = { GEN6_OPCODE_MUL, 1, 2 },
99 [TGSI_OPCODE_USHR] = { GEN6_OPCODE_SHR, 1, 2 },
100 [TGSI_OPCODE_UARL] = { GEN6_OPCODE_MOV, 1, 1 },
101 [TGSI_OPCODE_IABS] = { GEN6_OPCODE_MOV, 1, 1 },
102 };
103
104 static void
105 aos_simple(struct toy_compiler *tc,
106 const struct tgsi_full_instruction *tgsi_inst,
107 struct toy_dst *dst,
108 struct toy_src *src)
109 {
110 struct toy_inst *inst;
111 int opcode;
112 int cond_modifier = GEN6_COND_NONE;
113 int num_dst = tgsi_inst->Instruction.NumDstRegs;
114 int num_src = tgsi_inst->Instruction.NumSrcRegs;
115 int i;
116
117 opcode = aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].opcode;
118 assert(num_dst == aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].num_dst);
119 assert(num_src == aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].num_src);
120 if (!opcode) {
121 assert(!"invalid aos_simple() call");
122 return;
123 }
124
125 /* no need to emit nop */
126 if (opcode == GEN6_OPCODE_NOP)
127 return;
128
129 inst = tc_add(tc);
130 if (!inst)
131 return;
132
133 inst->opcode = opcode;
134
135 switch (tgsi_inst->Instruction.Opcode) {
136 case TGSI_OPCODE_MIN:
137 case TGSI_OPCODE_IMIN:
138 case TGSI_OPCODE_UMIN:
139 cond_modifier = GEN6_COND_L;
140 break;
141 case TGSI_OPCODE_MAX:
142 case TGSI_OPCODE_IMAX:
143 case TGSI_OPCODE_UMAX:
144 cond_modifier = GEN6_COND_GE;
145 break;
146 case TGSI_OPCODE_IABS:
147 src[0] = tsrc_absolute(src[0]);
148 break;
149 case TGSI_OPCODE_IF:
150 cond_modifier = GEN6_COND_NZ;
151 num_src = 2;
152 assert(src[0].type == TOY_TYPE_F);
153 src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
154 src[1] = tsrc_imm_f(0.0f);
155 break;
156 case TGSI_OPCODE_UIF:
157 cond_modifier = GEN6_COND_NZ;
158 num_src = 2;
159 assert(src[0].type == TOY_TYPE_UD);
160 src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
161 src[1] = tsrc_imm_d(0);
162 break;
163 case TGSI_OPCODE_INEG:
164 src[0] = tsrc_negate(src[0]);
165 break;
166 case TGSI_OPCODE_RCP:
167 case TGSI_OPCODE_RSQ:
168 case TGSI_OPCODE_EX2:
169 case TGSI_OPCODE_LG2:
170 case TGSI_OPCODE_COS:
171 case TGSI_OPCODE_SIN:
172 src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
173 break;
174 case TGSI_OPCODE_POW:
175 src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
176 src[1] = tsrc_swizzle1(src[1], TOY_SWIZZLE_X);
177 break;
178 }
179
180 inst->cond_modifier = cond_modifier;
181
182 if (num_dst) {
183 assert(num_dst == 1);
184 inst->dst = dst[0];
185 }
186
187 assert(num_src <= ARRAY_SIZE(inst->src));
188 for (i = 0; i < num_src; i++)
189 inst->src[i] = src[i];
190 }
191
192 static void
193 aos_set_on_cond(struct toy_compiler *tc,
194 const struct tgsi_full_instruction *tgsi_inst,
195 struct toy_dst *dst,
196 struct toy_src *src)
197 {
198 struct toy_inst *inst;
199 int cond;
200 struct toy_src zero, one;
201
202 switch (tgsi_inst->Instruction.Opcode) {
203 case TGSI_OPCODE_SLT:
204 case TGSI_OPCODE_ISLT:
205 case TGSI_OPCODE_USLT:
206 case TGSI_OPCODE_FSLT:
207 cond = GEN6_COND_L;
208 break;
209 case TGSI_OPCODE_SGE:
210 case TGSI_OPCODE_ISGE:
211 case TGSI_OPCODE_USGE:
212 case TGSI_OPCODE_FSGE:
213 cond = GEN6_COND_GE;
214 break;
215 case TGSI_OPCODE_SEQ:
216 case TGSI_OPCODE_USEQ:
217 case TGSI_OPCODE_FSEQ:
218 cond = GEN6_COND_Z;
219 break;
220 case TGSI_OPCODE_SGT:
221 cond = GEN6_COND_G;
222 break;
223 case TGSI_OPCODE_SLE:
224 cond = GEN6_COND_LE;
225 break;
226 case TGSI_OPCODE_SNE:
227 case TGSI_OPCODE_USNE:
228 case TGSI_OPCODE_FSNE:
229 cond = GEN6_COND_NZ;
230 break;
231 default:
232 assert(!"invalid aos_set_on_cond() call");
233 return;
234 }
235
236 /* note that for integer versions, all bits are set */
237 switch (dst[0].type) {
238 case TOY_TYPE_F:
239 default:
240 zero = tsrc_imm_f(0.0f);
241 one = tsrc_imm_f(1.0f);
242 break;
243 case TOY_TYPE_D:
244 zero = tsrc_imm_d(0);
245 one = tsrc_imm_d(-1);
246 break;
247 case TOY_TYPE_UD:
248 zero = tsrc_imm_ud(0);
249 one = tsrc_imm_ud(~0);
250 break;
251 }
252
253 tc_MOV(tc, dst[0], zero);
254 tc_CMP(tc, tdst_null(), src[0], src[1], cond);
255 inst = tc_MOV(tc, dst[0], one);
256 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
257 }
258
259 static void
260 aos_compare(struct toy_compiler *tc,
261 const struct tgsi_full_instruction *tgsi_inst,
262 struct toy_dst *dst,
263 struct toy_src *src)
264 {
265 struct toy_inst *inst;
266 struct toy_src zero;
267
268 switch (tgsi_inst->Instruction.Opcode) {
269 case TGSI_OPCODE_CMP:
270 zero = tsrc_imm_f(0.0f);
271 break;
272 case TGSI_OPCODE_UCMP:
273 zero = tsrc_imm_ud(0);
274 break;
275 default:
276 assert(!"invalid aos_compare() call");
277 return;
278 }
279
280 tc_CMP(tc, tdst_null(), src[0], zero, GEN6_COND_L);
281 inst = tc_SEL(tc, dst[0], src[1], src[2], GEN6_COND_NONE);
282 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
283 }
284
285 static void
286 aos_set_sign(struct toy_compiler *tc,
287 const struct tgsi_full_instruction *tgsi_inst,
288 struct toy_dst *dst,
289 struct toy_src *src)
290 {
291 struct toy_inst *inst;
292 struct toy_src zero, one, neg_one;
293
294 switch (tgsi_inst->Instruction.Opcode) {
295 case TGSI_OPCODE_SSG:
296 zero = tsrc_imm_f(0.0f);
297 one = tsrc_imm_f(1.0f);
298 neg_one = tsrc_imm_f(-1.0f);
299 break;
300 case TGSI_OPCODE_ISSG:
301 zero = tsrc_imm_d(0);
302 one = tsrc_imm_d(1);
303 neg_one = tsrc_imm_d(-1);
304 break;
305 default:
306 assert(!"invalid aos_set_sign() call");
307 return;
308 }
309
310 tc_MOV(tc, dst[0], zero);
311
312 tc_CMP(tc, tdst_null(), src[0], zero, GEN6_COND_G);
313 inst = tc_MOV(tc, dst[0], one);
314 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
315
316 tc_CMP(tc, tdst_null(), src[0], zero, GEN6_COND_L);
317 inst = tc_MOV(tc, dst[0], neg_one);
318 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
319 }
320
321 static void
322 aos_tex(struct toy_compiler *tc,
323 const struct tgsi_full_instruction *tgsi_inst,
324 struct toy_dst *dst,
325 struct toy_src *src)
326 {
327 struct toy_inst *inst;
328 enum toy_opcode opcode;
329 int i;
330
331 switch (tgsi_inst->Instruction.Opcode) {
332 case TGSI_OPCODE_TEX:
333 opcode = TOY_OPCODE_TGSI_TEX;
334 break;
335 case TGSI_OPCODE_TXD:
336 opcode = TOY_OPCODE_TGSI_TXD;
337 break;
338 case TGSI_OPCODE_TXP:
339 opcode = TOY_OPCODE_TGSI_TXP;
340 break;
341 case TGSI_OPCODE_TXB:
342 opcode = TOY_OPCODE_TGSI_TXB;
343 break;
344 case TGSI_OPCODE_TXL:
345 opcode = TOY_OPCODE_TGSI_TXL;
346 break;
347 case TGSI_OPCODE_TXF:
348 opcode = TOY_OPCODE_TGSI_TXF;
349 break;
350 case TGSI_OPCODE_TXQ:
351 opcode = TOY_OPCODE_TGSI_TXQ;
352 break;
353 case TGSI_OPCODE_TXQ_LZ:
354 opcode = TOY_OPCODE_TGSI_TXQ_LZ;
355 break;
356 case TGSI_OPCODE_TEX2:
357 opcode = TOY_OPCODE_TGSI_TEX2;
358 break;
359 case TGSI_OPCODE_TXB2:
360 opcode = TOY_OPCODE_TGSI_TXB2;
361 break;
362 case TGSI_OPCODE_TXL2:
363 opcode = TOY_OPCODE_TGSI_TXL2;
364 break;
365 default:
366 assert(!"unsupported texturing opcode");
367 return;
368 break;
369 }
370
371 assert(tgsi_inst->Instruction.Texture);
372
373 inst = tc_add(tc);
374 inst->opcode = opcode;
375 inst->tex.target = tgsi_inst->Texture.Texture;
376
377 assert(tgsi_inst->Instruction.NumSrcRegs <= ARRAY_SIZE(inst->src));
378 assert(tgsi_inst->Instruction.NumDstRegs == 1);
379
380 inst->dst = dst[0];
381 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
382 inst->src[i] = src[i];
383
384 for (i = 0; i < tgsi_inst->Texture.NumOffsets; i++)
385 tc_fail(tc, "texelFetchOffset unsupported");
386 }
387
388 static void
389 aos_sample(struct toy_compiler *tc,
390 const struct tgsi_full_instruction *tgsi_inst,
391 struct toy_dst *dst,
392 struct toy_src *src)
393 {
394 struct toy_inst *inst;
395 enum toy_opcode opcode;
396 int i;
397
398 assert(!"sampling untested");
399
400 switch (tgsi_inst->Instruction.Opcode) {
401 case TGSI_OPCODE_SAMPLE:
402 opcode = TOY_OPCODE_TGSI_SAMPLE;
403 break;
404 case TGSI_OPCODE_SAMPLE_I:
405 opcode = TOY_OPCODE_TGSI_SAMPLE_I;
406 break;
407 case TGSI_OPCODE_SAMPLE_I_MS:
408 opcode = TOY_OPCODE_TGSI_SAMPLE_I_MS;
409 break;
410 case TGSI_OPCODE_SAMPLE_B:
411 opcode = TOY_OPCODE_TGSI_SAMPLE_B;
412 break;
413 case TGSI_OPCODE_SAMPLE_C:
414 opcode = TOY_OPCODE_TGSI_SAMPLE_C;
415 break;
416 case TGSI_OPCODE_SAMPLE_C_LZ:
417 opcode = TOY_OPCODE_TGSI_SAMPLE_C_LZ;
418 break;
419 case TGSI_OPCODE_SAMPLE_D:
420 opcode = TOY_OPCODE_TGSI_SAMPLE_D;
421 break;
422 case TGSI_OPCODE_SAMPLE_L:
423 opcode = TOY_OPCODE_TGSI_SAMPLE_L;
424 break;
425 case TGSI_OPCODE_GATHER4:
426 opcode = TOY_OPCODE_TGSI_GATHER4;
427 break;
428 case TGSI_OPCODE_SVIEWINFO:
429 opcode = TOY_OPCODE_TGSI_SVIEWINFO;
430 break;
431 case TGSI_OPCODE_SAMPLE_POS:
432 opcode = TOY_OPCODE_TGSI_SAMPLE_POS;
433 break;
434 case TGSI_OPCODE_SAMPLE_INFO:
435 opcode = TOY_OPCODE_TGSI_SAMPLE_INFO;
436 break;
437 default:
438 assert(!"unsupported sampling opcode");
439 return;
440 break;
441 }
442
443 inst = tc_add(tc);
444 inst->opcode = opcode;
445
446 assert(tgsi_inst->Instruction.NumSrcRegs <= ARRAY_SIZE(inst->src));
447 assert(tgsi_inst->Instruction.NumDstRegs == 1);
448
449 inst->dst = dst[0];
450 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
451 inst->src[i] = src[i];
452 }
453
454 static void
455 aos_LIT(struct toy_compiler *tc,
456 const struct tgsi_full_instruction *tgsi_inst,
457 struct toy_dst *dst,
458 struct toy_src *src)
459 {
460 struct toy_inst *inst;
461
462 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XW), tsrc_imm_f(1.0f));
463
464 if (!(dst[0].writemask & TOY_WRITEMASK_YZ))
465 return;
466
467 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_YZ), tsrc_imm_f(0.0f));
468
469 tc_CMP(tc, tdst_null(),
470 tsrc_swizzle1(src[0], TOY_SWIZZLE_X),
471 tsrc_imm_f(0.0f),
472 GEN6_COND_G);
473
474 inst = tc_MOV(tc,
475 tdst_writemask(dst[0], TOY_WRITEMASK_Y),
476 tsrc_swizzle1(src[0], TOY_SWIZZLE_X));
477 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
478
479 /* clamp W to (-128, 128)? */
480 inst = tc_POW(tc,
481 tdst_writemask(dst[0], TOY_WRITEMASK_Z),
482 tsrc_swizzle1(src[0], TOY_SWIZZLE_Y),
483 tsrc_swizzle1(src[0], TOY_SWIZZLE_W));
484 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
485 }
486
487 static void
488 aos_EXP(struct toy_compiler *tc,
489 const struct tgsi_full_instruction *tgsi_inst,
490 struct toy_dst *dst,
491 struct toy_src *src)
492 {
493 struct toy_src src0 = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
494
495 if (dst[0].writemask & TOY_WRITEMASK_X) {
496 struct toy_dst tmp =
497 tdst_d(tdst_writemask(tc_alloc_tmp(tc), TOY_WRITEMASK_X));
498
499 tc_RNDD(tc, tmp, src0);
500
501 /* construct the floating point number manually */
502 tc_ADD(tc, tmp, tsrc_from(tmp), tsrc_imm_d(127));
503 tc_SHL(tc, tdst_d(tdst_writemask(dst[0], TOY_WRITEMASK_X)),
504 tsrc_from(tmp), tsrc_imm_d(23));
505 }
506
507 tc_FRC(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Y), src0);
508 tc_EXP(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src0);
509 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
510 }
511
512 static void
513 aos_LOG(struct toy_compiler *tc,
514 const struct tgsi_full_instruction *tgsi_inst,
515 struct toy_dst *dst,
516 struct toy_src *src)
517 {
518 struct toy_src src0 = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
519
520 if (dst[0].writemask & TOY_WRITEMASK_XY) {
521 struct toy_dst tmp;
522
523 tmp = tdst_d(tdst_writemask(tc_alloc_tmp(tc), TOY_WRITEMASK_X));
524
525 /* exponent */
526 tc_SHR(tc, tmp, tsrc_absolute(tsrc_d(src0)), tsrc_imm_d(23));
527 tc_ADD(tc, tdst_writemask(dst[0], TOY_WRITEMASK_X),
528 tsrc_from(tmp), tsrc_imm_d(-127));
529
530 /* mantissa */
531 tc_AND(tc, tmp, tsrc_d(src0), tsrc_imm_d((1 << 23) - 1));
532 tc_OR(tc, tdst_writemask(tdst_d(dst[0]), TOY_WRITEMASK_Y),
533 tsrc_from(tmp), tsrc_imm_d(127 << 23));
534 }
535
536 tc_LOG(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src0);
537 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
538 }
539
540 static void
541 aos_DST(struct toy_compiler *tc,
542 const struct tgsi_full_instruction *tgsi_inst,
543 struct toy_dst *dst,
544 struct toy_src *src)
545 {
546 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_X), tsrc_imm_f(1.0f));
547 tc_MUL(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Y), src[0], src[1]);
548 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src[0]);
549 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), src[1]);
550 }
551
552 static void
553 aos_LRP(struct toy_compiler *tc,
554 const struct tgsi_full_instruction *tgsi_inst,
555 struct toy_dst *dst,
556 struct toy_src *src)
557 {
558 struct toy_dst tmp = tc_alloc_tmp(tc);
559
560 tc_ADD(tc, tmp, tsrc_negate(src[0]), tsrc_imm_f(1.0f));
561 tc_MUL(tc, tmp, tsrc_from(tmp), src[2]);
562 tc_MAC(tc, dst[0], src[0], src[1], tsrc_from(tmp));
563 }
564
565 static void
566 aos_DP2A(struct toy_compiler *tc,
567 const struct tgsi_full_instruction *tgsi_inst,
568 struct toy_dst *dst,
569 struct toy_src *src)
570 {
571 struct toy_dst tmp = tc_alloc_tmp(tc);
572
573 assert(!"DP2A untested");
574
575 tc_DP2(tc, tmp, src[0], src[1]);
576 tc_ADD(tc, dst[0], tsrc_swizzle1(tsrc_from(tmp), TOY_SWIZZLE_X), src[2]);
577 }
578
579 static void
580 aos_CLAMP(struct toy_compiler *tc,
581 const struct tgsi_full_instruction *tgsi_inst,
582 struct toy_dst *dst,
583 struct toy_src *src)
584 {
585 assert(!"CLAMP untested");
586
587 tc_SEL(tc, dst[0], src[0], src[1], GEN6_COND_GE);
588 tc_SEL(tc, dst[0], src[2], tsrc_from(dst[0]), GEN6_COND_L);
589 }
590
591 static void
592 aos_XPD(struct toy_compiler *tc,
593 const struct tgsi_full_instruction *tgsi_inst,
594 struct toy_dst *dst,
595 struct toy_src *src)
596 {
597 struct toy_dst tmp = tc_alloc_tmp(tc);
598
599 tc_MUL(tc, tdst_writemask(tmp, TOY_WRITEMASK_XYZ),
600 tsrc_swizzle(src[0], TOY_SWIZZLE_Z, TOY_SWIZZLE_X,
601 TOY_SWIZZLE_Y, TOY_SWIZZLE_W),
602 tsrc_swizzle(src[1], TOY_SWIZZLE_Y, TOY_SWIZZLE_Z,
603 TOY_SWIZZLE_X, TOY_SWIZZLE_W));
604
605 tc_MAC(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XYZ),
606 tsrc_swizzle(src[0], TOY_SWIZZLE_Y, TOY_SWIZZLE_Z,
607 TOY_SWIZZLE_X, TOY_SWIZZLE_W),
608 tsrc_swizzle(src[1], TOY_SWIZZLE_Z, TOY_SWIZZLE_X,
609 TOY_SWIZZLE_Y, TOY_SWIZZLE_W),
610 tsrc_negate(tsrc_from(tmp)));
611
612 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W),
613 tsrc_imm_f(1.0f));
614 }
615
616 static void
617 aos_PK2H(struct toy_compiler *tc,
618 const struct tgsi_full_instruction *tgsi_inst,
619 struct toy_dst *dst,
620 struct toy_src *src)
621 {
622 const struct toy_src h1 = tsrc_ud(tsrc_swizzle1(src[0], TOY_SWIZZLE_X));
623 const struct toy_src h2 = tsrc_ud(tsrc_swizzle1(src[0], TOY_SWIZZLE_Y));
624 struct toy_dst tmp = tdst_ud(tc_alloc_tmp(tc));
625
626 assert(!"PK2H untested");
627
628 tc_SHL(tc, tmp, h2, tsrc_imm_ud(16));
629 tc_OR(tc, tdst_ud(dst[0]), h1, tsrc_from(tmp));
630 }
631
632 static void
633 aos_UP2H(struct toy_compiler *tc,
634 const struct tgsi_full_instruction *tgsi_inst,
635 struct toy_dst *dst,
636 struct toy_src *src)
637 {
638 assert(!"UP2H untested");
639
640 tc_AND(tc, tdst_writemask(tdst_ud(dst[0]), TOY_WRITEMASK_XZ),
641 tsrc_ud(src[0]), tsrc_imm_ud(0xffff));
642 tc_SHR(tc, tdst_writemask(tdst_ud(dst[0]), TOY_WRITEMASK_YW),
643 tsrc_ud(src[0]), tsrc_imm_ud(16));
644 }
645
646 static void
647 aos_SCS(struct toy_compiler *tc,
648 const struct tgsi_full_instruction *tgsi_inst,
649 struct toy_dst *dst,
650 struct toy_src *src)
651 {
652 assert(!"SCS untested");
653
654 tc_add1(tc, TOY_OPCODE_COS,
655 tdst_writemask(dst[0], TOY_WRITEMASK_X), src[0]);
656
657 tc_add1(tc, TOY_OPCODE_SIN,
658 tdst_writemask(dst[0], TOY_WRITEMASK_Y), src[0]);
659
660 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), tsrc_imm_f(0.0f));
661 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
662 }
663
664 static void
665 aos_DIV(struct toy_compiler *tc,
666 const struct tgsi_full_instruction *tgsi_inst,
667 struct toy_dst *dst,
668 struct toy_src *src)
669 {
670 struct toy_dst tmp = tc_alloc_tmp(tc);
671
672 assert(!"DIV untested");
673
674 tc_INV(tc, tmp, src[1]);
675 tc_MUL(tc, dst[0], src[0], tsrc_from(tmp));
676 }
677
678 static void
679 aos_BRK(struct toy_compiler *tc,
680 const struct tgsi_full_instruction *tgsi_inst,
681 struct toy_dst *dst,
682 struct toy_src *src)
683 {
684 tc_add0(tc, GEN6_OPCODE_BREAK);
685 }
686
687 static void
688 aos_CEIL(struct toy_compiler *tc,
689 const struct tgsi_full_instruction *tgsi_inst,
690 struct toy_dst *dst,
691 struct toy_src *src)
692 {
693 struct toy_dst tmp = tc_alloc_tmp(tc);
694
695 tc_RNDD(tc, tmp, tsrc_negate(src[0]));
696 tc_MOV(tc, dst[0], tsrc_negate(tsrc_from(tmp)));
697 }
698
699 static void
700 aos_SAD(struct toy_compiler *tc,
701 const struct tgsi_full_instruction *tgsi_inst,
702 struct toy_dst *dst,
703 struct toy_src *src)
704 {
705 struct toy_dst tmp = tc_alloc_tmp(tc);
706
707 assert(!"SAD untested");
708
709 tc_ADD(tc, tmp, src[0], tsrc_negate(src[1]));
710 tc_ADD(tc, dst[0], tsrc_absolute(tsrc_from(tmp)), src[2]);
711 }
712
713 static void
714 aos_CONT(struct toy_compiler *tc,
715 const struct tgsi_full_instruction *tgsi_inst,
716 struct toy_dst *dst,
717 struct toy_src *src)
718 {
719 tc_add0(tc, GEN6_OPCODE_CONT);
720 }
721
722 static void
723 aos_BGNLOOP(struct toy_compiler *tc,
724 const struct tgsi_full_instruction *tgsi_inst,
725 struct toy_dst *dst,
726 struct toy_src *src)
727 {
728 struct toy_inst *inst;
729
730 inst = tc_add0(tc, TOY_OPCODE_DO);
731 /* this is just a marker */
732 inst->marker = true;
733 }
734
735 static void
736 aos_ENDLOOP(struct toy_compiler *tc,
737 const struct tgsi_full_instruction *tgsi_inst,
738 struct toy_dst *dst,
739 struct toy_src *src)
740 {
741 tc_add0(tc, GEN6_OPCODE_WHILE);
742 }
743
744 static void
745 aos_unsupported(struct toy_compiler *tc,
746 const struct tgsi_full_instruction *tgsi_inst,
747 struct toy_dst *dst,
748 struct toy_src *src)
749 {
750 const char *name = tgsi_get_opcode_name(tgsi_inst->Instruction.Opcode);
751
752 ilo_warn("unsupported TGSI opcode: TGSI_OPCODE_%s\n", name);
753
754 tc_fail(tc, "unsupported TGSI instruction");
755 }
756
757 static const toy_tgsi_translate aos_translate_table[TGSI_OPCODE_LAST] = {
758 [TGSI_OPCODE_ARL] = aos_simple,
759 [TGSI_OPCODE_MOV] = aos_simple,
760 [TGSI_OPCODE_LIT] = aos_LIT,
761 [TGSI_OPCODE_RCP] = aos_simple,
762 [TGSI_OPCODE_RSQ] = aos_simple,
763 [TGSI_OPCODE_EXP] = aos_EXP,
764 [TGSI_OPCODE_LOG] = aos_LOG,
765 [TGSI_OPCODE_MUL] = aos_simple,
766 [TGSI_OPCODE_ADD] = aos_simple,
767 [TGSI_OPCODE_DP3] = aos_simple,
768 [TGSI_OPCODE_DP4] = aos_simple,
769 [TGSI_OPCODE_DST] = aos_DST,
770 [TGSI_OPCODE_MIN] = aos_simple,
771 [TGSI_OPCODE_MAX] = aos_simple,
772 [TGSI_OPCODE_SLT] = aos_set_on_cond,
773 [TGSI_OPCODE_SGE] = aos_set_on_cond,
774 [TGSI_OPCODE_MAD] = aos_simple,
775 [TGSI_OPCODE_LRP] = aos_LRP,
776 [TGSI_OPCODE_SQRT] = aos_simple,
777 [TGSI_OPCODE_DP2A] = aos_DP2A,
778 [TGSI_OPCODE_FRC] = aos_simple,
779 [TGSI_OPCODE_CLAMP] = aos_CLAMP,
780 [TGSI_OPCODE_FLR] = aos_simple,
781 [TGSI_OPCODE_ROUND] = aos_simple,
782 [TGSI_OPCODE_EX2] = aos_simple,
783 [TGSI_OPCODE_LG2] = aos_simple,
784 [TGSI_OPCODE_POW] = aos_simple,
785 [TGSI_OPCODE_XPD] = aos_XPD,
786 [TGSI_OPCODE_DPH] = aos_simple,
787 [TGSI_OPCODE_COS] = aos_simple,
788 [TGSI_OPCODE_DDX] = aos_unsupported,
789 [TGSI_OPCODE_DDY] = aos_unsupported,
790 [TGSI_OPCODE_KILL] = aos_simple,
791 [TGSI_OPCODE_PK2H] = aos_PK2H,
792 [TGSI_OPCODE_PK2US] = aos_unsupported,
793 [TGSI_OPCODE_PK4B] = aos_unsupported,
794 [TGSI_OPCODE_PK4UB] = aos_unsupported,
795 [TGSI_OPCODE_SEQ] = aos_set_on_cond,
796 [TGSI_OPCODE_SGT] = aos_set_on_cond,
797 [TGSI_OPCODE_SIN] = aos_simple,
798 [TGSI_OPCODE_SLE] = aos_set_on_cond,
799 [TGSI_OPCODE_SNE] = aos_set_on_cond,
800 [TGSI_OPCODE_TEX] = aos_tex,
801 [TGSI_OPCODE_TXD] = aos_tex,
802 [TGSI_OPCODE_TXP] = aos_tex,
803 [TGSI_OPCODE_UP2H] = aos_UP2H,
804 [TGSI_OPCODE_UP2US] = aos_unsupported,
805 [TGSI_OPCODE_UP4B] = aos_unsupported,
806 [TGSI_OPCODE_UP4UB] = aos_unsupported,
807 [TGSI_OPCODE_ARR] = aos_simple,
808 [TGSI_OPCODE_CAL] = aos_unsupported,
809 [TGSI_OPCODE_RET] = aos_unsupported,
810 [TGSI_OPCODE_SSG] = aos_set_sign,
811 [TGSI_OPCODE_CMP] = aos_compare,
812 [TGSI_OPCODE_SCS] = aos_SCS,
813 [TGSI_OPCODE_TXB] = aos_tex,
814 [TGSI_OPCODE_DIV] = aos_DIV,
815 [TGSI_OPCODE_DP2] = aos_simple,
816 [TGSI_OPCODE_TXL] = aos_tex,
817 [TGSI_OPCODE_BRK] = aos_BRK,
818 [TGSI_OPCODE_IF] = aos_simple,
819 [TGSI_OPCODE_UIF] = aos_simple,
820 [TGSI_OPCODE_ELSE] = aos_simple,
821 [TGSI_OPCODE_ENDIF] = aos_simple,
822 [TGSI_OPCODE_PUSHA] = aos_unsupported,
823 [TGSI_OPCODE_POPA] = aos_unsupported,
824 [TGSI_OPCODE_CEIL] = aos_CEIL,
825 [TGSI_OPCODE_I2F] = aos_simple,
826 [TGSI_OPCODE_NOT] = aos_simple,
827 [TGSI_OPCODE_TRUNC] = aos_simple,
828 [TGSI_OPCODE_SHL] = aos_simple,
829 [TGSI_OPCODE_AND] = aos_simple,
830 [TGSI_OPCODE_OR] = aos_simple,
831 [TGSI_OPCODE_MOD] = aos_simple,
832 [TGSI_OPCODE_XOR] = aos_simple,
833 [TGSI_OPCODE_SAD] = aos_SAD,
834 [TGSI_OPCODE_TXF] = aos_tex,
835 [TGSI_OPCODE_TXQ] = aos_tex,
836 [TGSI_OPCODE_CONT] = aos_CONT,
837 [TGSI_OPCODE_EMIT] = aos_simple,
838 [TGSI_OPCODE_ENDPRIM] = aos_simple,
839 [TGSI_OPCODE_BGNLOOP] = aos_BGNLOOP,
840 [TGSI_OPCODE_BGNSUB] = aos_unsupported,
841 [TGSI_OPCODE_ENDLOOP] = aos_ENDLOOP,
842 [TGSI_OPCODE_ENDSUB] = aos_unsupported,
843 [TGSI_OPCODE_TXQ_LZ] = aos_tex,
844 [TGSI_OPCODE_NOP] = aos_simple,
845 [TGSI_OPCODE_FSEQ] = aos_set_on_cond,
846 [TGSI_OPCODE_FSGE] = aos_set_on_cond,
847 [TGSI_OPCODE_FSLT] = aos_set_on_cond,
848 [TGSI_OPCODE_FSNE] = aos_set_on_cond,
849 [TGSI_OPCODE_CALLNZ] = aos_unsupported,
850 [TGSI_OPCODE_BREAKC] = aos_unsupported,
851 [TGSI_OPCODE_KILL_IF] = aos_simple,
852 [TGSI_OPCODE_END] = aos_simple,
853 [TGSI_OPCODE_F2I] = aos_simple,
854 [TGSI_OPCODE_IDIV] = aos_simple,
855 [TGSI_OPCODE_IMAX] = aos_simple,
856 [TGSI_OPCODE_IMIN] = aos_simple,
857 [TGSI_OPCODE_INEG] = aos_simple,
858 [TGSI_OPCODE_ISGE] = aos_set_on_cond,
859 [TGSI_OPCODE_ISHR] = aos_simple,
860 [TGSI_OPCODE_ISLT] = aos_set_on_cond,
861 [TGSI_OPCODE_F2U] = aos_simple,
862 [TGSI_OPCODE_U2F] = aos_simple,
863 [TGSI_OPCODE_UADD] = aos_simple,
864 [TGSI_OPCODE_UDIV] = aos_simple,
865 [TGSI_OPCODE_UMAD] = aos_simple,
866 [TGSI_OPCODE_UMAX] = aos_simple,
867 [TGSI_OPCODE_UMIN] = aos_simple,
868 [TGSI_OPCODE_UMOD] = aos_simple,
869 [TGSI_OPCODE_UMUL] = aos_simple,
870 [TGSI_OPCODE_USEQ] = aos_set_on_cond,
871 [TGSI_OPCODE_USGE] = aos_set_on_cond,
872 [TGSI_OPCODE_USHR] = aos_simple,
873 [TGSI_OPCODE_USLT] = aos_set_on_cond,
874 [TGSI_OPCODE_USNE] = aos_set_on_cond,
875 [TGSI_OPCODE_SWITCH] = aos_unsupported,
876 [TGSI_OPCODE_CASE] = aos_unsupported,
877 [TGSI_OPCODE_DEFAULT] = aos_unsupported,
878 [TGSI_OPCODE_ENDSWITCH] = aos_unsupported,
879 [TGSI_OPCODE_SAMPLE] = aos_sample,
880 [TGSI_OPCODE_SAMPLE_I] = aos_sample,
881 [TGSI_OPCODE_SAMPLE_I_MS] = aos_sample,
882 [TGSI_OPCODE_SAMPLE_B] = aos_sample,
883 [TGSI_OPCODE_SAMPLE_C] = aos_sample,
884 [TGSI_OPCODE_SAMPLE_C_LZ] = aos_sample,
885 [TGSI_OPCODE_SAMPLE_D] = aos_sample,
886 [TGSI_OPCODE_SAMPLE_L] = aos_sample,
887 [TGSI_OPCODE_GATHER4] = aos_sample,
888 [TGSI_OPCODE_SVIEWINFO] = aos_sample,
889 [TGSI_OPCODE_SAMPLE_POS] = aos_sample,
890 [TGSI_OPCODE_SAMPLE_INFO] = aos_sample,
891 [TGSI_OPCODE_UARL] = aos_simple,
892 [TGSI_OPCODE_UCMP] = aos_compare,
893 [TGSI_OPCODE_IABS] = aos_simple,
894 [TGSI_OPCODE_ISSG] = aos_set_sign,
895 [TGSI_OPCODE_LOAD] = aos_unsupported,
896 [TGSI_OPCODE_STORE] = aos_unsupported,
897 [TGSI_OPCODE_MFENCE] = aos_unsupported,
898 [TGSI_OPCODE_LFENCE] = aos_unsupported,
899 [TGSI_OPCODE_SFENCE] = aos_unsupported,
900 [TGSI_OPCODE_BARRIER] = aos_unsupported,
901 [TGSI_OPCODE_ATOMUADD] = aos_unsupported,
902 [TGSI_OPCODE_ATOMXCHG] = aos_unsupported,
903 [TGSI_OPCODE_ATOMCAS] = aos_unsupported,
904 [TGSI_OPCODE_ATOMAND] = aos_unsupported,
905 [TGSI_OPCODE_ATOMOR] = aos_unsupported,
906 [TGSI_OPCODE_ATOMXOR] = aos_unsupported,
907 [TGSI_OPCODE_ATOMUMIN] = aos_unsupported,
908 [TGSI_OPCODE_ATOMUMAX] = aos_unsupported,
909 [TGSI_OPCODE_ATOMIMIN] = aos_unsupported,
910 [TGSI_OPCODE_ATOMIMAX] = aos_unsupported,
911 [TGSI_OPCODE_TEX2] = aos_tex,
912 [TGSI_OPCODE_TXB2] = aos_tex,
913 [TGSI_OPCODE_TXL2] = aos_tex,
914 };
915
916 static void
917 soa_passthrough(struct toy_compiler *tc,
918 const struct tgsi_full_instruction *tgsi_inst,
919 struct toy_dst *dst_,
920 struct toy_src *src_)
921 {
922 const toy_tgsi_translate translate =
923 aos_translate_table[tgsi_inst->Instruction.Opcode];
924
925 translate(tc, tgsi_inst, dst_, src_);
926 }
927
928 static void
929 soa_per_channel(struct toy_compiler *tc,
930 const struct tgsi_full_instruction *tgsi_inst,
931 struct toy_dst *dst_,
932 struct toy_src *src_)
933 {
934 struct toy_dst dst[TGSI_FULL_MAX_DST_REGISTERS][4];
935 struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS][4];
936 int i, ch;
937
938 for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
939 tdst_transpose(dst_[i], dst[i]);
940 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
941 tsrc_transpose(src_[i], src[i]);
942
943 /* emit the same instruction four times for the four channels */
944 for (ch = 0; ch < 4; ch++) {
945 struct toy_dst aos_dst[TGSI_FULL_MAX_DST_REGISTERS];
946 struct toy_src aos_src[TGSI_FULL_MAX_SRC_REGISTERS];
947
948 for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
949 aos_dst[i] = dst[i][ch];
950 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
951 aos_src[i] = src[i][ch];
952
953 aos_translate_table[tgsi_inst->Instruction.Opcode](tc,
954 tgsi_inst, aos_dst, aos_src);
955 }
956 }
957
958 static void
959 soa_scalar_replicate(struct toy_compiler *tc,
960 const struct tgsi_full_instruction *tgsi_inst,
961 struct toy_dst *dst_,
962 struct toy_src *src_)
963 {
964 struct toy_dst dst0[4], tmp;
965 struct toy_src srcx[TGSI_FULL_MAX_SRC_REGISTERS];
966 int opcode, i;
967
968 assert(tgsi_inst->Instruction.NumDstRegs == 1);
969
970 tdst_transpose(dst_[0], dst0);
971 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
972 struct toy_src tmp[4];
973
974 tsrc_transpose(src_[i], tmp);
975 /* only the X channels */
976 srcx[i] = tmp[0];
977 }
978
979 tmp = tc_alloc_tmp(tc);
980
981 opcode = aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].opcode;
982 assert(opcode);
983
984 switch (tgsi_inst->Instruction.Opcode) {
985 case TGSI_OPCODE_RCP:
986 case TGSI_OPCODE_RSQ:
987 case TGSI_OPCODE_SQRT:
988 case TGSI_OPCODE_EX2:
989 case TGSI_OPCODE_LG2:
990 case TGSI_OPCODE_COS:
991 case TGSI_OPCODE_SIN:
992 tc_add1(tc, opcode, tmp, srcx[0]);
993 break;
994 case TGSI_OPCODE_POW:
995 tc_add2(tc, opcode, tmp, srcx[0], srcx[1]);
996 break;
997 default:
998 assert(!"invalid soa_scalar_replicate() call");
999 return;
1000 }
1001
1002 /* replicate the result */
1003 for (i = 0; i < 4; i++)
1004 tc_MOV(tc, dst0[i], tsrc_from(tmp));
1005 }
1006
1007 static void
1008 soa_dot_product(struct toy_compiler *tc,
1009 const struct tgsi_full_instruction *tgsi_inst,
1010 struct toy_dst *dst_,
1011 struct toy_src *src_)
1012 {
1013 struct toy_dst dst0[4], tmp;
1014 struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS][4];
1015 int i;
1016
1017 tdst_transpose(dst_[0], dst0);
1018 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
1019 tsrc_transpose(src_[i], src[i]);
1020
1021 tmp = tc_alloc_tmp(tc);
1022
1023 switch (tgsi_inst->Instruction.Opcode) {
1024 case TGSI_OPCODE_DP2:
1025 tc_MUL(tc, tmp, src[0][1], src[1][1]);
1026 tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
1027 break;
1028 case TGSI_OPCODE_DP2A:
1029 tc_MAC(tc, tmp, src[0][1], src[1][1], src[2][0]);
1030 tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
1031 break;
1032 case TGSI_OPCODE_DP3:
1033 tc_MUL(tc, tmp, src[0][2], src[1][2]);
1034 tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
1035 tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
1036 break;
1037 case TGSI_OPCODE_DPH:
1038 tc_MAC(tc, tmp, src[0][2], src[1][2], src[1][3]);
1039 tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
1040 tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
1041 break;
1042 case TGSI_OPCODE_DP4:
1043 tc_MUL(tc, tmp, src[0][3], src[1][3]);
1044 tc_MAC(tc, tmp, src[0][2], src[1][2], tsrc_from(tmp));
1045 tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
1046 tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
1047 break;
1048 default:
1049 assert(!"invalid soa_dot_product() call");
1050 return;
1051 }
1052
1053 for (i = 0; i < 4; i++)
1054 tc_MOV(tc, dst0[i], tsrc_from(tmp));
1055 }
1056
1057 static void
1058 soa_partial_derivative(struct toy_compiler *tc,
1059 const struct tgsi_full_instruction *tgsi_inst,
1060 struct toy_dst *dst_,
1061 struct toy_src *src_)
1062 {
1063 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_DDX)
1064 tc_add1(tc, TOY_OPCODE_DDX, dst_[0], src_[0]);
1065 else
1066 tc_add1(tc, TOY_OPCODE_DDY, dst_[0], src_[0]);
1067 }
1068
1069 static void
1070 soa_if(struct toy_compiler *tc,
1071 const struct tgsi_full_instruction *tgsi_inst,
1072 struct toy_dst *dst_,
1073 struct toy_src *src_)
1074 {
1075 struct toy_src src0[4];
1076
1077 assert(tsrc_is_swizzle1(src_[0]));
1078 tsrc_transpose(src_[0], src0);
1079
1080 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_IF)
1081 tc_IF(tc, tdst_null(), src0[0], tsrc_imm_f(0.0f), GEN6_COND_NZ);
1082 else
1083 tc_IF(tc, tdst_null(), src0[0], tsrc_imm_d(0), GEN6_COND_NZ);
1084 }
1085
1086 static void
1087 soa_LIT(struct toy_compiler *tc,
1088 const struct tgsi_full_instruction *tgsi_inst,
1089 struct toy_dst *dst_,
1090 struct toy_src *src_)
1091 {
1092 struct toy_inst *inst;
1093 struct toy_dst dst0[4];
1094 struct toy_src src0[4];
1095
1096 tdst_transpose(dst_[0], dst0);
1097 tsrc_transpose(src_[0], src0);
1098
1099 tc_MOV(tc, dst0[0], tsrc_imm_f(1.0f));
1100 tc_MOV(tc, dst0[1], src0[0]);
1101 tc_POW(tc, dst0[2], src0[1], src0[3]);
1102 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1103
1104 /*
1105 * POW is calculated first because math with pred_ctrl is broken here.
1106 * But, why?
1107 */
1108 tc_CMP(tc, tdst_null(), src0[0], tsrc_imm_f(0.0f), GEN6_COND_L);
1109 inst = tc_MOV(tc, dst0[1], tsrc_imm_f(0.0f));
1110 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
1111 inst = tc_MOV(tc, dst0[2], tsrc_imm_f(0.0f));
1112 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
1113 }
1114
1115 static void
1116 soa_EXP(struct toy_compiler *tc,
1117 const struct tgsi_full_instruction *tgsi_inst,
1118 struct toy_dst *dst_,
1119 struct toy_src *src_)
1120 {
1121 struct toy_dst dst0[4];
1122 struct toy_src src0[4];
1123
1124 assert(!"SoA EXP untested");
1125
1126 tdst_transpose(dst_[0], dst0);
1127 tsrc_transpose(src_[0], src0);
1128
1129 if (!tdst_is_null(dst0[0])) {
1130 struct toy_dst tmp = tdst_d(tc_alloc_tmp(tc));
1131
1132 tc_RNDD(tc, tmp, src0[0]);
1133
1134 /* construct the floating point number manually */
1135 tc_ADD(tc, tmp, tsrc_from(tmp), tsrc_imm_d(127));
1136 tc_SHL(tc, tdst_d(dst0[0]), tsrc_from(tmp), tsrc_imm_d(23));
1137 }
1138
1139 tc_FRC(tc, dst0[1], src0[0]);
1140 tc_EXP(tc, dst0[2], src0[0]);
1141 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1142 }
1143
1144 static void
1145 soa_LOG(struct toy_compiler *tc,
1146 const struct tgsi_full_instruction *tgsi_inst,
1147 struct toy_dst *dst_,
1148 struct toy_src *src_)
1149 {
1150 struct toy_dst dst0[4];
1151 struct toy_src src0[4];
1152
1153 assert(!"SoA LOG untested");
1154
1155 tdst_transpose(dst_[0], dst0);
1156 tsrc_transpose(src_[0], src0);
1157
1158 if (dst_[0].writemask & TOY_WRITEMASK_XY) {
1159 struct toy_dst tmp = tdst_d(tc_alloc_tmp(tc));
1160
1161 /* exponent */
1162 tc_SHR(tc, tmp, tsrc_absolute(tsrc_d(src0[0])), tsrc_imm_d(23));
1163 tc_ADD(tc, dst0[0], tsrc_from(tmp), tsrc_imm_d(-127));
1164
1165 /* mantissa */
1166 tc_AND(tc, tmp, tsrc_d(src0[0]), tsrc_imm_d((1 << 23) - 1));
1167 tc_OR(tc, dst0[1], tsrc_from(tmp), tsrc_imm_d(127 << 23));
1168 }
1169
1170 tc_LOG(tc, dst0[2], src0[0]);
1171 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1172 }
1173
1174 static void
1175 soa_DST(struct toy_compiler *tc,
1176 const struct tgsi_full_instruction *tgsi_inst,
1177 struct toy_dst *dst_,
1178 struct toy_src *src_)
1179 {
1180 struct toy_dst dst0[4];
1181 struct toy_src src[2][4];
1182
1183 tdst_transpose(dst_[0], dst0);
1184 tsrc_transpose(src_[0], src[0]);
1185 tsrc_transpose(src_[1], src[1]);
1186
1187 tc_MOV(tc, dst0[0], tsrc_imm_f(1.0f));
1188 tc_MUL(tc, dst0[1], src[0][1], src[1][1]);
1189 tc_MOV(tc, dst0[2], src[0][2]);
1190 tc_MOV(tc, dst0[3], src[1][3]);
1191 }
1192
1193 static void
1194 soa_XPD(struct toy_compiler *tc,
1195 const struct tgsi_full_instruction *tgsi_inst,
1196 struct toy_dst *dst_,
1197 struct toy_src *src_)
1198 {
1199 struct toy_dst dst0[4];
1200 struct toy_src src[2][4];
1201
1202 tdst_transpose(dst_[0], dst0);
1203 tsrc_transpose(src_[0], src[0]);
1204 tsrc_transpose(src_[1], src[1]);
1205
1206 /* dst.x = src0.y * src1.z - src1.y * src0.z */
1207 tc_MUL(tc, dst0[0], src[0][2], src[1][1]);
1208 tc_MAC(tc, dst0[0], src[0][1], src[1][2], tsrc_negate(tsrc_from(dst0[0])));
1209
1210 /* dst.y = src0.z * src1.x - src1.z * src0.x */
1211 tc_MUL(tc, dst0[1], src[0][0], src[1][2]);
1212 tc_MAC(tc, dst0[1], src[0][2], src[1][0], tsrc_negate(tsrc_from(dst0[1])));
1213
1214 /* dst.z = src0.x * src1.y - src1.x * src0.y */
1215 tc_MUL(tc, dst0[2], src[0][1], src[1][0]);
1216 tc_MAC(tc, dst0[2], src[0][0], src[1][1], tsrc_negate(tsrc_from(dst0[2])));
1217
1218 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1219 }
1220
1221 static void
1222 soa_PK2H(struct toy_compiler *tc,
1223 const struct tgsi_full_instruction *tgsi_inst,
1224 struct toy_dst *dst_,
1225 struct toy_src *src_)
1226 {
1227 struct toy_dst tmp = tdst_ud(tc_alloc_tmp(tc));
1228 struct toy_dst dst0[4];
1229 struct toy_src src0[4];
1230 int i;
1231
1232 assert(!"SoA PK2H untested");
1233
1234 tdst_transpose(dst_[0], dst0);
1235 tsrc_transpose(src_[0], src0);
1236
1237 tc_SHL(tc, tmp, src0[1], tsrc_imm_ud(16));
1238 tc_OR(tc, tmp, src0[0], tsrc_from(tmp));
1239
1240 for (i = 0; i < 4; i++)
1241 tc_MOV(tc, dst0[i], tsrc_from(tmp));
1242 }
1243
1244 static void
1245 soa_UP2H(struct toy_compiler *tc,
1246 const struct tgsi_full_instruction *tgsi_inst,
1247 struct toy_dst *dst_,
1248 struct toy_src *src_)
1249 {
1250 struct toy_dst dst0[4];
1251 struct toy_src src0[4];
1252
1253 assert(!"SoA UP2H untested");
1254
1255 tdst_transpose(dst_[0], dst0);
1256 tsrc_transpose(src_[0], src0);
1257
1258 tc_AND(tc, tdst_ud(dst0[0]), tsrc_ud(src0[0]), tsrc_imm_ud(0xffff));
1259 tc_SHR(tc, tdst_ud(dst0[1]), tsrc_ud(src0[1]), tsrc_imm_ud(16));
1260 tc_AND(tc, tdst_ud(dst0[2]), tsrc_ud(src0[2]), tsrc_imm_ud(0xffff));
1261 tc_SHR(tc, tdst_ud(dst0[3]), tsrc_ud(src0[3]), tsrc_imm_ud(16));
1262
1263 }
1264
1265 static void
1266 soa_SCS(struct toy_compiler *tc,
1267 const struct tgsi_full_instruction *tgsi_inst,
1268 struct toy_dst *dst_,
1269 struct toy_src *src_)
1270 {
1271 struct toy_dst dst0[4];
1272 struct toy_src src0[4];
1273
1274 tdst_transpose(dst_[0], dst0);
1275 tsrc_transpose(src_[0], src0);
1276
1277 tc_add1(tc, TOY_OPCODE_COS, dst0[0], src0[0]);
1278 tc_add1(tc, TOY_OPCODE_SIN, dst0[1], src0[0]);
1279 tc_MOV(tc, dst0[2], tsrc_imm_f(0.0f));
1280 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1281 }
1282
1283 static void
1284 soa_unsupported(struct toy_compiler *tc,
1285 const struct tgsi_full_instruction *tgsi_inst,
1286 struct toy_dst *dst_,
1287 struct toy_src *src_)
1288 {
1289 const struct tgsi_opcode_info *info =
1290 tgsi_get_opcode_info(tgsi_inst->Instruction.Opcode);
1291
1292 ilo_warn("unsupported TGSI opcode in SoA form: TGSI_OPCODE_%s\n",
1293 info->mnemonic);
1294
1295 tc_fail(tc, "unsupported TGSI instruction in SoA form");
1296 }
1297
1298 static const toy_tgsi_translate soa_translate_table[TGSI_OPCODE_LAST] = {
1299 [TGSI_OPCODE_ARL] = soa_per_channel,
1300 [TGSI_OPCODE_MOV] = soa_per_channel,
1301 [TGSI_OPCODE_LIT] = soa_LIT,
1302 [TGSI_OPCODE_RCP] = soa_scalar_replicate,
1303 [TGSI_OPCODE_RSQ] = soa_scalar_replicate,
1304 [TGSI_OPCODE_EXP] = soa_EXP,
1305 [TGSI_OPCODE_LOG] = soa_LOG,
1306 [TGSI_OPCODE_MUL] = soa_per_channel,
1307 [TGSI_OPCODE_ADD] = soa_per_channel,
1308 [TGSI_OPCODE_DP3] = soa_dot_product,
1309 [TGSI_OPCODE_DP4] = soa_dot_product,
1310 [TGSI_OPCODE_DST] = soa_DST,
1311 [TGSI_OPCODE_MIN] = soa_per_channel,
1312 [TGSI_OPCODE_MAX] = soa_per_channel,
1313 [TGSI_OPCODE_SLT] = soa_per_channel,
1314 [TGSI_OPCODE_SGE] = soa_per_channel,
1315 [TGSI_OPCODE_MAD] = soa_per_channel,
1316 [TGSI_OPCODE_LRP] = soa_per_channel,
1317 [TGSI_OPCODE_SQRT] = soa_scalar_replicate,
1318 [TGSI_OPCODE_DP2A] = soa_dot_product,
1319 [TGSI_OPCODE_FRC] = soa_per_channel,
1320 [TGSI_OPCODE_CLAMP] = soa_per_channel,
1321 [TGSI_OPCODE_FLR] = soa_per_channel,
1322 [TGSI_OPCODE_ROUND] = soa_per_channel,
1323 [TGSI_OPCODE_EX2] = soa_scalar_replicate,
1324 [TGSI_OPCODE_LG2] = soa_scalar_replicate,
1325 [TGSI_OPCODE_POW] = soa_scalar_replicate,
1326 [TGSI_OPCODE_XPD] = soa_XPD,
1327 [TGSI_OPCODE_DPH] = soa_dot_product,
1328 [TGSI_OPCODE_COS] = soa_scalar_replicate,
1329 [TGSI_OPCODE_DDX] = soa_partial_derivative,
1330 [TGSI_OPCODE_DDY] = soa_partial_derivative,
1331 [TGSI_OPCODE_KILL] = soa_passthrough,
1332 [TGSI_OPCODE_PK2H] = soa_PK2H,
1333 [TGSI_OPCODE_PK2US] = soa_unsupported,
1334 [TGSI_OPCODE_PK4B] = soa_unsupported,
1335 [TGSI_OPCODE_PK4UB] = soa_unsupported,
1336 [TGSI_OPCODE_SEQ] = soa_per_channel,
1337 [TGSI_OPCODE_SGT] = soa_per_channel,
1338 [TGSI_OPCODE_SIN] = soa_scalar_replicate,
1339 [TGSI_OPCODE_SLE] = soa_per_channel,
1340 [TGSI_OPCODE_SNE] = soa_per_channel,
1341 [TGSI_OPCODE_TEX] = soa_passthrough,
1342 [TGSI_OPCODE_TXD] = soa_passthrough,
1343 [TGSI_OPCODE_TXP] = soa_passthrough,
1344 [TGSI_OPCODE_UP2H] = soa_UP2H,
1345 [TGSI_OPCODE_UP2US] = soa_unsupported,
1346 [TGSI_OPCODE_UP4B] = soa_unsupported,
1347 [TGSI_OPCODE_UP4UB] = soa_unsupported,
1348 [TGSI_OPCODE_ARR] = soa_per_channel,
1349 [TGSI_OPCODE_CAL] = soa_unsupported,
1350 [TGSI_OPCODE_RET] = soa_unsupported,
1351 [TGSI_OPCODE_SSG] = soa_per_channel,
1352 [TGSI_OPCODE_CMP] = soa_per_channel,
1353 [TGSI_OPCODE_SCS] = soa_SCS,
1354 [TGSI_OPCODE_TXB] = soa_passthrough,
1355 [TGSI_OPCODE_DIV] = soa_per_channel,
1356 [TGSI_OPCODE_DP2] = soa_dot_product,
1357 [TGSI_OPCODE_TXL] = soa_passthrough,
1358 [TGSI_OPCODE_BRK] = soa_passthrough,
1359 [TGSI_OPCODE_IF] = soa_if,
1360 [TGSI_OPCODE_UIF] = soa_if,
1361 [TGSI_OPCODE_ELSE] = soa_passthrough,
1362 [TGSI_OPCODE_ENDIF] = soa_passthrough,
1363 [TGSI_OPCODE_PUSHA] = soa_unsupported,
1364 [TGSI_OPCODE_POPA] = soa_unsupported,
1365 [TGSI_OPCODE_CEIL] = soa_per_channel,
1366 [TGSI_OPCODE_I2F] = soa_per_channel,
1367 [TGSI_OPCODE_NOT] = soa_per_channel,
1368 [TGSI_OPCODE_TRUNC] = soa_per_channel,
1369 [TGSI_OPCODE_SHL] = soa_per_channel,
1370 [TGSI_OPCODE_AND] = soa_per_channel,
1371 [TGSI_OPCODE_OR] = soa_per_channel,
1372 [TGSI_OPCODE_MOD] = soa_per_channel,
1373 [TGSI_OPCODE_XOR] = soa_per_channel,
1374 [TGSI_OPCODE_SAD] = soa_per_channel,
1375 [TGSI_OPCODE_TXF] = soa_passthrough,
1376 [TGSI_OPCODE_TXQ] = soa_passthrough,
1377 [TGSI_OPCODE_CONT] = soa_passthrough,
1378 [TGSI_OPCODE_EMIT] = soa_unsupported,
1379 [TGSI_OPCODE_ENDPRIM] = soa_unsupported,
1380 [TGSI_OPCODE_BGNLOOP] = soa_passthrough,
1381 [TGSI_OPCODE_BGNSUB] = soa_unsupported,
1382 [TGSI_OPCODE_ENDLOOP] = soa_passthrough,
1383 [TGSI_OPCODE_ENDSUB] = soa_unsupported,
1384 [TGSI_OPCODE_TXQ_LZ] = soa_passthrough,
1385 [TGSI_OPCODE_NOP] = soa_passthrough,
1386 [TGSI_OPCODE_FSEQ] = soa_per_channel,
1387 [TGSI_OPCODE_FSGE] = soa_per_channel,
1388 [TGSI_OPCODE_FSLT] = soa_per_channel,
1389 [TGSI_OPCODE_FSNE] = soa_per_channel,
1390 [TGSI_OPCODE_CALLNZ] = soa_unsupported,
1391 [TGSI_OPCODE_BREAKC] = soa_unsupported,
1392 [TGSI_OPCODE_KILL_IF] = soa_passthrough,
1393 [TGSI_OPCODE_END] = soa_passthrough,
1394 [TGSI_OPCODE_F2I] = soa_per_channel,
1395 [TGSI_OPCODE_IDIV] = soa_per_channel,
1396 [TGSI_OPCODE_IMAX] = soa_per_channel,
1397 [TGSI_OPCODE_IMIN] = soa_per_channel,
1398 [TGSI_OPCODE_INEG] = soa_per_channel,
1399 [TGSI_OPCODE_ISGE] = soa_per_channel,
1400 [TGSI_OPCODE_ISHR] = soa_per_channel,
1401 [TGSI_OPCODE_ISLT] = soa_per_channel,
1402 [TGSI_OPCODE_F2U] = soa_per_channel,
1403 [TGSI_OPCODE_U2F] = soa_per_channel,
1404 [TGSI_OPCODE_UADD] = soa_per_channel,
1405 [TGSI_OPCODE_UDIV] = soa_per_channel,
1406 [TGSI_OPCODE_UMAD] = soa_per_channel,
1407 [TGSI_OPCODE_UMAX] = soa_per_channel,
1408 [TGSI_OPCODE_UMIN] = soa_per_channel,
1409 [TGSI_OPCODE_UMOD] = soa_per_channel,
1410 [TGSI_OPCODE_UMUL] = soa_per_channel,
1411 [TGSI_OPCODE_USEQ] = soa_per_channel,
1412 [TGSI_OPCODE_USGE] = soa_per_channel,
1413 [TGSI_OPCODE_USHR] = soa_per_channel,
1414 [TGSI_OPCODE_USLT] = soa_per_channel,
1415 [TGSI_OPCODE_USNE] = soa_per_channel,
1416 [TGSI_OPCODE_SWITCH] = soa_unsupported,
1417 [TGSI_OPCODE_CASE] = soa_unsupported,
1418 [TGSI_OPCODE_DEFAULT] = soa_unsupported,
1419 [TGSI_OPCODE_ENDSWITCH] = soa_unsupported,
1420 [TGSI_OPCODE_SAMPLE] = soa_passthrough,
1421 [TGSI_OPCODE_SAMPLE_I] = soa_passthrough,
1422 [TGSI_OPCODE_SAMPLE_I_MS] = soa_passthrough,
1423 [TGSI_OPCODE_SAMPLE_B] = soa_passthrough,
1424 [TGSI_OPCODE_SAMPLE_C] = soa_passthrough,
1425 [TGSI_OPCODE_SAMPLE_C_LZ] = soa_passthrough,
1426 [TGSI_OPCODE_SAMPLE_D] = soa_passthrough,
1427 [TGSI_OPCODE_SAMPLE_L] = soa_passthrough,
1428 [TGSI_OPCODE_GATHER4] = soa_passthrough,
1429 [TGSI_OPCODE_SVIEWINFO] = soa_passthrough,
1430 [TGSI_OPCODE_SAMPLE_POS] = soa_passthrough,
1431 [TGSI_OPCODE_SAMPLE_INFO] = soa_passthrough,
1432 [TGSI_OPCODE_UARL] = soa_per_channel,
1433 [TGSI_OPCODE_UCMP] = soa_per_channel,
1434 [TGSI_OPCODE_IABS] = soa_per_channel,
1435 [TGSI_OPCODE_ISSG] = soa_per_channel,
1436 [TGSI_OPCODE_LOAD] = soa_unsupported,
1437 [TGSI_OPCODE_STORE] = soa_unsupported,
1438 [TGSI_OPCODE_MFENCE] = soa_unsupported,
1439 [TGSI_OPCODE_LFENCE] = soa_unsupported,
1440 [TGSI_OPCODE_SFENCE] = soa_unsupported,
1441 [TGSI_OPCODE_BARRIER] = soa_unsupported,
1442 [TGSI_OPCODE_ATOMUADD] = soa_unsupported,
1443 [TGSI_OPCODE_ATOMXCHG] = soa_unsupported,
1444 [TGSI_OPCODE_ATOMCAS] = soa_unsupported,
1445 [TGSI_OPCODE_ATOMAND] = soa_unsupported,
1446 [TGSI_OPCODE_ATOMOR] = soa_unsupported,
1447 [TGSI_OPCODE_ATOMXOR] = soa_unsupported,
1448 [TGSI_OPCODE_ATOMUMIN] = soa_unsupported,
1449 [TGSI_OPCODE_ATOMUMAX] = soa_unsupported,
1450 [TGSI_OPCODE_ATOMIMIN] = soa_unsupported,
1451 [TGSI_OPCODE_ATOMIMAX] = soa_unsupported,
1452 [TGSI_OPCODE_TEX2] = soa_passthrough,
1453 [TGSI_OPCODE_TXB2] = soa_passthrough,
1454 [TGSI_OPCODE_TXL2] = soa_passthrough,
1455 };
1456
1457 static bool
1458 ra_dst_is_indirect(const struct tgsi_full_dst_register *d)
1459 {
1460 return (d->Register.Indirect ||
1461 (d->Register.Dimension && d->Dimension.Indirect));
1462 }
1463
1464 static int
1465 ra_dst_index(const struct tgsi_full_dst_register *d)
1466 {
1467 assert(!d->Register.Indirect);
1468 return d->Register.Index;
1469 }
1470
1471 static int
1472 ra_dst_dimension(const struct tgsi_full_dst_register *d)
1473 {
1474 if (d->Register.Dimension) {
1475 assert(!d->Dimension.Indirect);
1476 return d->Dimension.Index;
1477 }
1478 else {
1479 return 0;
1480 }
1481 }
1482
1483 static bool
1484 ra_is_src_indirect(const struct tgsi_full_src_register *s)
1485 {
1486 return (s->Register.Indirect ||
1487 (s->Register.Dimension && s->Dimension.Indirect));
1488 }
1489
1490 static int
1491 ra_src_index(const struct tgsi_full_src_register *s)
1492 {
1493 assert(!s->Register.Indirect);
1494 return s->Register.Index;
1495 }
1496
1497 static int
1498 ra_src_dimension(const struct tgsi_full_src_register *s)
1499 {
1500 if (s->Register.Dimension) {
1501 assert(!s->Dimension.Indirect);
1502 return s->Dimension.Index;
1503 }
1504 else {
1505 return 0;
1506 }
1507 }
1508
1509 /**
1510 * Infer the type of either the sources or the destination.
1511 */
1512 static enum toy_type
1513 ra_infer_opcode_type(int tgsi_opcode, bool is_dst)
1514 {
1515 enum tgsi_opcode_type type;
1516
1517 if (is_dst)
1518 type = tgsi_opcode_infer_dst_type(tgsi_opcode);
1519 else
1520 type = tgsi_opcode_infer_src_type(tgsi_opcode);
1521
1522 switch (type) {
1523 case TGSI_TYPE_UNSIGNED:
1524 return TOY_TYPE_UD;
1525 case TGSI_TYPE_SIGNED:
1526 return TOY_TYPE_D;
1527 case TGSI_TYPE_FLOAT:
1528 return TOY_TYPE_F;
1529 case TGSI_TYPE_UNTYPED:
1530 case TGSI_TYPE_VOID:
1531 case TGSI_TYPE_DOUBLE:
1532 default:
1533 assert(!"unsupported TGSI type");
1534 return TOY_TYPE_UD;
1535 }
1536 }
1537
1538 /**
1539 * Return the type of an operand of the specified instruction.
1540 */
1541 static enum toy_type
1542 ra_get_type(struct toy_tgsi *tgsi, const struct tgsi_full_instruction *tgsi_inst,
1543 int operand, bool is_dst)
1544 {
1545 enum toy_type type;
1546 enum tgsi_file_type file;
1547
1548 /* we need to look at both src and dst for MOV */
1549 /* XXX it should not be this complex */
1550 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_MOV) {
1551 const enum tgsi_file_type dst_file = tgsi_inst->Dst[0].Register.File;
1552 const enum tgsi_file_type src_file = tgsi_inst->Src[0].Register.File;
1553
1554 if (dst_file == TGSI_FILE_ADDRESS || src_file == TGSI_FILE_ADDRESS) {
1555 type = TOY_TYPE_D;
1556 }
1557 else if (src_file == TGSI_FILE_IMMEDIATE &&
1558 !tgsi_inst->Src[0].Register.Indirect) {
1559 const int src_idx = tgsi_inst->Src[0].Register.Index;
1560 type = tgsi->imm_data.types[src_idx];
1561 }
1562 else {
1563 /* this is the best we can do */
1564 type = TOY_TYPE_F;
1565 }
1566
1567 return type;
1568 }
1569 else if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_UCMP) {
1570 if (!is_dst && operand == 0)
1571 type = TOY_TYPE_UD;
1572 else
1573 type = TOY_TYPE_F;
1574
1575 return type;
1576 }
1577
1578 type = ra_infer_opcode_type(tgsi_inst->Instruction.Opcode, is_dst);
1579
1580 /* fix the type */
1581 file = (is_dst) ?
1582 tgsi_inst->Dst[operand].Register.File :
1583 tgsi_inst->Src[operand].Register.File;
1584 switch (file) {
1585 case TGSI_FILE_SAMPLER:
1586 case TGSI_FILE_IMAGE:
1587 case TGSI_FILE_SAMPLER_VIEW:
1588 type = TOY_TYPE_D;
1589 break;
1590 case TGSI_FILE_ADDRESS:
1591 assert(type == TOY_TYPE_D);
1592 break;
1593 default:
1594 break;
1595 }
1596
1597 return type;
1598 }
1599
1600 /**
1601 * Allocate a VRF register.
1602 */
1603 static int
1604 ra_alloc_reg(struct toy_tgsi *tgsi, enum tgsi_file_type file)
1605 {
1606 const int count = (tgsi->aos) ? 1 : 4;
1607 return tc_alloc_vrf(tgsi->tc, count);
1608 }
1609
1610 /**
1611 * Construct the key for VRF mapping look-up.
1612 */
1613 static void *
1614 ra_get_map_key(enum tgsi_file_type file, unsigned dim, unsigned index)
1615 {
1616 intptr_t key;
1617
1618 /* this is ugly... */
1619 assert(file < 1 << 4);
1620 assert(dim < 1 << 12);
1621 assert(index < 1 << 16);
1622 key = (file << 28) | (dim << 16) | index;
1623
1624 return intptr_to_pointer(key);
1625 }
1626
1627 /**
1628 * Map a TGSI register to a VRF register.
1629 */
1630 static int
1631 ra_map_reg(struct toy_tgsi *tgsi, enum tgsi_file_type file,
1632 int dim, int index, bool *is_new)
1633 {
1634 void *key, *val;
1635 intptr_t vrf;
1636
1637 key = ra_get_map_key(file, dim, index);
1638
1639 /*
1640 * because we allocate vrf from 1 and on, val is never NULL as long as the
1641 * key exists
1642 */
1643 val = util_hash_table_get(tgsi->reg_mapping, key);
1644 if (val) {
1645 vrf = pointer_to_intptr(val);
1646
1647 if (is_new)
1648 *is_new = false;
1649 }
1650 else {
1651 vrf = (intptr_t) ra_alloc_reg(tgsi, file);
1652
1653 /* add to the mapping */
1654 val = intptr_to_pointer(vrf);
1655 util_hash_table_set(tgsi->reg_mapping, key, val);
1656
1657 if (is_new)
1658 *is_new = true;
1659 }
1660
1661 return (int) vrf;
1662 }
1663
1664 /**
1665 * Return true if the destination aliases any of the sources.
1666 */
1667 static bool
1668 ra_dst_is_aliasing(const struct tgsi_full_instruction *tgsi_inst, int dst_index)
1669 {
1670 const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[dst_index];
1671 int i;
1672
1673 /* we need a scratch register for indirect dst anyway */
1674 if (ra_dst_is_indirect(d))
1675 return true;
1676
1677 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
1678 const struct tgsi_full_src_register *s = &tgsi_inst->Src[i];
1679
1680 if (s->Register.File != d->Register.File)
1681 continue;
1682
1683 /*
1684 * we can go on to check dimension and index respectively, but
1685 * keep it simple for now
1686 */
1687 if (ra_is_src_indirect(s))
1688 return true;
1689 if (ra_src_dimension(s) == ra_dst_dimension(d) &&
1690 ra_src_index(s) == ra_dst_index(d))
1691 return true;
1692 }
1693
1694 return false;
1695 }
1696
1697 /**
1698 * Return the toy register for a TGSI destination operand.
1699 */
1700 static struct toy_dst
1701 ra_get_dst(struct toy_tgsi *tgsi,
1702 const struct tgsi_full_instruction *tgsi_inst, int dst_index,
1703 bool *is_scratch)
1704 {
1705 const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[dst_index];
1706 bool need_vrf = false;
1707 struct toy_dst dst;
1708
1709 switch (d->Register.File) {
1710 case TGSI_FILE_NULL:
1711 dst = tdst_null();
1712 break;
1713 case TGSI_FILE_OUTPUT:
1714 case TGSI_FILE_TEMPORARY:
1715 case TGSI_FILE_ADDRESS:
1716 case TGSI_FILE_PREDICATE:
1717 need_vrf = true;
1718 break;
1719 default:
1720 assert(!"unhandled dst file");
1721 dst = tdst_null();
1722 break;
1723 }
1724
1725 if (need_vrf) {
1726 /* XXX we do not always need a scratch given the conditions... */
1727 const bool need_scratch =
1728 (ra_dst_is_indirect(d) || ra_dst_is_aliasing(tgsi_inst, dst_index) ||
1729 tgsi_inst->Instruction.Saturate);
1730 const enum toy_type type = ra_get_type(tgsi, tgsi_inst, dst_index, true);
1731 int vrf;
1732
1733 if (need_scratch) {
1734 vrf = ra_alloc_reg(tgsi, d->Register.File);
1735 }
1736 else {
1737 vrf = ra_map_reg(tgsi, d->Register.File,
1738 ra_dst_dimension(d), ra_dst_index(d), NULL);
1739 }
1740
1741 if (is_scratch)
1742 *is_scratch = need_scratch;
1743
1744 dst = tdst_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
1745 false, 0, d->Register.WriteMask, vrf * TOY_REG_WIDTH);
1746 }
1747
1748 return dst;
1749 }
1750
1751 static struct toy_src
1752 ra_get_src_for_vrf(const struct tgsi_full_src_register *s,
1753 enum toy_type type, int vrf)
1754 {
1755 return tsrc_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
1756 false, 0,
1757 s->Register.SwizzleX, s->Register.SwizzleY,
1758 s->Register.SwizzleZ, s->Register.SwizzleW,
1759 s->Register.Absolute, s->Register.Negate,
1760 vrf * TOY_REG_WIDTH);
1761 }
1762
1763 static int
1764 init_tgsi_reg(struct toy_tgsi *tgsi, struct toy_inst *inst,
1765 enum tgsi_file_type file, int index,
1766 const struct tgsi_ind_register *indirect,
1767 const struct tgsi_dimension *dimension,
1768 const struct tgsi_ind_register *dim_indirect)
1769 {
1770 struct toy_src src;
1771 int num_src = 0;
1772
1773 /* src[0]: TGSI file */
1774 inst->src[num_src++] = tsrc_imm_d(file);
1775
1776 /* src[1]: TGSI dimension */
1777 inst->src[num_src++] = tsrc_imm_d((dimension) ? dimension->Index : 0);
1778
1779 /* src[2]: TGSI dimension indirection */
1780 if (dim_indirect) {
1781 const int vrf = ra_map_reg(tgsi, dim_indirect->File, 0,
1782 dim_indirect->Index, NULL);
1783
1784 src = tsrc(TOY_FILE_VRF, vrf, 0);
1785 src = tsrc_swizzle1(tsrc_d(src), indirect->Swizzle);
1786 }
1787 else {
1788 src = tsrc_imm_d(0);
1789 }
1790
1791 inst->src[num_src++] = src;
1792
1793 /* src[3]: TGSI index */
1794 inst->src[num_src++] = tsrc_imm_d(index);
1795
1796 /* src[4]: TGSI index indirection */
1797 if (indirect) {
1798 const int vrf = ra_map_reg(tgsi, indirect->File, 0,
1799 indirect->Index, NULL);
1800
1801 src = tsrc(TOY_FILE_VRF, vrf, 0);
1802 src = tsrc_swizzle1(tsrc_d(src), indirect->Swizzle);
1803 }
1804 else {
1805 src = tsrc_imm_d(0);
1806 }
1807
1808 inst->src[num_src++] = src;
1809
1810 return num_src;
1811 }
1812
1813 static struct toy_src
1814 ra_get_src_indirect(struct toy_tgsi *tgsi,
1815 const struct tgsi_full_instruction *tgsi_inst,
1816 int src_index)
1817 {
1818 const struct tgsi_full_src_register *s = &tgsi_inst->Src[src_index];
1819 bool need_vrf = false, is_resource = false;
1820 struct toy_src src;
1821
1822 switch (s->Register.File) {
1823 case TGSI_FILE_NULL:
1824 src = tsrc_null();
1825 break;
1826 case TGSI_FILE_SAMPLER:
1827 case TGSI_FILE_IMAGE:
1828 case TGSI_FILE_SAMPLER_VIEW:
1829 is_resource = true;
1830 /* fall through */
1831 case TGSI_FILE_CONSTANT:
1832 case TGSI_FILE_INPUT:
1833 case TGSI_FILE_SYSTEM_VALUE:
1834 case TGSI_FILE_TEMPORARY:
1835 case TGSI_FILE_ADDRESS:
1836 case TGSI_FILE_IMMEDIATE:
1837 case TGSI_FILE_PREDICATE:
1838 need_vrf = true;
1839 break;
1840 default:
1841 assert(!"unhandled src file");
1842 src = tsrc_null();
1843 break;
1844 }
1845
1846 if (need_vrf) {
1847 const enum toy_type type = ra_get_type(tgsi, tgsi_inst, src_index, false);
1848 int vrf;
1849
1850 if (is_resource) {
1851 assert(!s->Register.Dimension);
1852 assert(s->Register.Indirect);
1853
1854 vrf = ra_map_reg(tgsi, s->Indirect.File, 0, s->Indirect.Index, NULL);
1855 }
1856 else {
1857 vrf = ra_alloc_reg(tgsi, s->Register.File);
1858 }
1859
1860 src = ra_get_src_for_vrf(s, type, vrf);
1861
1862 /* emit indirect fetch */
1863 if (!is_resource) {
1864 struct toy_inst *inst;
1865
1866 inst = tc_add(tgsi->tc);
1867 inst->opcode = TOY_OPCODE_TGSI_INDIRECT_FETCH;
1868 inst->dst = tdst_from(src);
1869 inst->dst.writemask = TOY_WRITEMASK_XYZW;
1870
1871 init_tgsi_reg(tgsi, inst, s->Register.File, s->Register.Index,
1872 (s->Register.Indirect) ? &s->Indirect : NULL,
1873 (s->Register.Dimension) ? &s->Dimension : NULL,
1874 (s->Dimension.Indirect) ? &s->DimIndirect : NULL);
1875 }
1876 }
1877
1878 return src;
1879 }
1880
1881 /**
1882 * Return the toy register for a TGSI source operand.
1883 */
1884 static struct toy_src
1885 ra_get_src(struct toy_tgsi *tgsi,
1886 const struct tgsi_full_instruction *tgsi_inst,
1887 int src_index)
1888 {
1889 const struct tgsi_full_src_register *s = &tgsi_inst->Src[src_index];
1890 bool need_vrf = false;
1891 struct toy_src src;
1892
1893 if (ra_is_src_indirect(s))
1894 return ra_get_src_indirect(tgsi, tgsi_inst, src_index);
1895
1896 switch (s->Register.File) {
1897 case TGSI_FILE_NULL:
1898 src = tsrc_null();
1899 break;
1900 case TGSI_FILE_CONSTANT:
1901 case TGSI_FILE_INPUT:
1902 case TGSI_FILE_SYSTEM_VALUE:
1903 need_vrf = true;
1904 break;
1905 case TGSI_FILE_TEMPORARY:
1906 case TGSI_FILE_ADDRESS:
1907 case TGSI_FILE_PREDICATE:
1908 need_vrf = true;
1909 break;
1910 case TGSI_FILE_SAMPLER:
1911 case TGSI_FILE_IMAGE:
1912 case TGSI_FILE_SAMPLER_VIEW:
1913 assert(!s->Register.Dimension);
1914 src = tsrc_imm_d(s->Register.Index);
1915 break;
1916 case TGSI_FILE_IMMEDIATE:
1917 {
1918 const uint32_t *imm;
1919 enum toy_type imm_type;
1920 bool is_scalar;
1921
1922 imm = toy_tgsi_get_imm(tgsi, s->Register.Index, &imm_type);
1923
1924 is_scalar =
1925 (imm[s->Register.SwizzleX] == imm[s->Register.SwizzleY] &&
1926 imm[s->Register.SwizzleX] == imm[s->Register.SwizzleZ] &&
1927 imm[s->Register.SwizzleX] == imm[s->Register.SwizzleW]);
1928
1929 if (is_scalar) {
1930 const enum toy_type type =
1931 ra_get_type(tgsi, tgsi_inst, src_index, false);
1932
1933 /* ignore imm_type */
1934 src = tsrc_imm_ud(imm[s->Register.SwizzleX]);
1935 src.type = type;
1936 src.absolute = s->Register.Absolute;
1937 src.negate = s->Register.Negate;
1938 }
1939 else {
1940 need_vrf = true;
1941 }
1942 }
1943 break;
1944 default:
1945 assert(!"unhandled src file");
1946 src = tsrc_null();
1947 break;
1948 }
1949
1950 if (need_vrf) {
1951 const enum toy_type type = ra_get_type(tgsi, tgsi_inst, src_index, false);
1952 bool is_new;
1953 int vrf;
1954
1955 vrf = ra_map_reg(tgsi, s->Register.File,
1956 ra_src_dimension(s), ra_src_index(s), &is_new);
1957
1958 src = ra_get_src_for_vrf(s, type, vrf);
1959
1960 if (is_new) {
1961 switch (s->Register.File) {
1962 case TGSI_FILE_TEMPORARY:
1963 case TGSI_FILE_ADDRESS:
1964 case TGSI_FILE_PREDICATE:
1965 {
1966 struct toy_dst dst = tdst_from(src);
1967 dst.writemask = TOY_WRITEMASK_XYZW;
1968
1969 /* always initialize registers before use */
1970 if (tgsi->aos) {
1971 tc_MOV(tgsi->tc, dst, tsrc_type(tsrc_imm_d(0), type));
1972 }
1973 else {
1974 struct toy_dst tdst[4];
1975 int i;
1976
1977 tdst_transpose(dst, tdst);
1978
1979 for (i = 0; i < 4; i++) {
1980 tc_MOV(tgsi->tc, tdst[i],
1981 tsrc_type(tsrc_imm_d(0), type));
1982 }
1983 }
1984 }
1985 break;
1986 default:
1987 break;
1988 }
1989 }
1990
1991 }
1992
1993 return src;
1994 }
1995
1996 static void
1997 parse_instruction(struct toy_tgsi *tgsi,
1998 const struct tgsi_full_instruction *tgsi_inst)
1999 {
2000 struct toy_dst dst[TGSI_FULL_MAX_DST_REGISTERS];
2001 struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS];
2002 bool dst_is_scratch[TGSI_FULL_MAX_DST_REGISTERS];
2003 toy_tgsi_translate translate;
2004 int i;
2005
2006 /* convert TGSI registers to toy registers */
2007 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
2008 src[i] = ra_get_src(tgsi, tgsi_inst, i);
2009 for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
2010 dst[i] = ra_get_dst(tgsi, tgsi_inst, i, &dst_is_scratch[i]);
2011
2012 /* translate the instruction */
2013 translate = tgsi->translate_table[tgsi_inst->Instruction.Opcode];
2014 if (!translate) {
2015 if (tgsi->translate_table == soa_translate_table)
2016 soa_unsupported(tgsi->tc, tgsi_inst, dst, src);
2017 else
2018 aos_unsupported(tgsi->tc, tgsi_inst, dst, src);
2019 }
2020 translate(tgsi->tc, tgsi_inst, dst, src);
2021
2022 /* write the result to the real destinations if needed */
2023 for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) {
2024 const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i];
2025
2026 if (!dst_is_scratch[i])
2027 continue;
2028
2029 tgsi->tc->templ.saturate = tgsi_inst->Instruction.Saturate;
2030
2031 /* emit indirect store */
2032 if (ra_dst_is_indirect(d)) {
2033 struct toy_inst *inst;
2034
2035 inst = tc_add(tgsi->tc);
2036 inst->opcode = TOY_OPCODE_TGSI_INDIRECT_STORE;
2037 inst->dst = dst[i];
2038
2039 init_tgsi_reg(tgsi, inst, d->Register.File, d->Register.Index,
2040 (d->Register.Indirect) ? &d->Indirect : NULL,
2041 (d->Register.Dimension) ? &d->Dimension : NULL,
2042 (d->Dimension.Indirect) ? &d->DimIndirect : NULL);
2043 }
2044 else {
2045 const enum toy_type type = ra_get_type(tgsi, tgsi_inst, i, true);
2046 struct toy_dst real_dst;
2047 int vrf;
2048
2049 vrf = ra_map_reg(tgsi, d->Register.File,
2050 ra_dst_dimension(d), ra_dst_index(d), NULL);
2051 real_dst = tdst_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
2052 false, 0, d->Register.WriteMask, vrf * TOY_REG_WIDTH);
2053
2054 if (tgsi->aos) {
2055 tc_MOV(tgsi->tc, real_dst, tsrc_from(dst[i]));
2056 }
2057 else {
2058 struct toy_dst tdst[4];
2059 struct toy_src tsrc[4];
2060 int j;
2061
2062 tdst_transpose(real_dst, tdst);
2063 tsrc_transpose(tsrc_from(dst[i]), tsrc);
2064
2065 for (j = 0; j < 4; j++)
2066 tc_MOV(tgsi->tc, tdst[j], tsrc[j]);
2067 }
2068 }
2069
2070 tgsi->tc->templ.saturate = false;
2071 }
2072
2073 switch (tgsi_inst->Instruction.Opcode) {
2074 case TGSI_OPCODE_KILL_IF:
2075 case TGSI_OPCODE_KILL:
2076 tgsi->uses_kill = true;
2077 break;
2078 }
2079
2080 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
2081 const struct tgsi_full_src_register *s = &tgsi_inst->Src[i];
2082 if (s->Register.File == TGSI_FILE_CONSTANT && s->Register.Indirect)
2083 tgsi->const_indirect = true;
2084 }
2085
2086 /* remember channels written */
2087 for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) {
2088 const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i];
2089
2090 if (d->Register.File != TGSI_FILE_OUTPUT)
2091 continue;
2092 for (i = 0; i < tgsi->num_outputs; i++) {
2093 if (tgsi->outputs[i].index == d->Register.Index) {
2094 tgsi->outputs[i].undefined_mask &= ~d->Register.WriteMask;
2095 break;
2096 }
2097 }
2098 }
2099 }
2100
2101 static void
2102 decl_add_in(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
2103 {
2104 static const struct tgsi_declaration_interp default_interp = {
2105 TGSI_INTERPOLATE_PERSPECTIVE, false, 0,
2106 };
2107 const struct tgsi_declaration_interp *interp =
2108 (decl->Declaration.Interpolate) ? &decl->Interp: &default_interp;
2109 int index;
2110
2111 if (decl->Range.Last >= ARRAY_SIZE(tgsi->inputs)) {
2112 assert(!"invalid IN");
2113 return;
2114 }
2115
2116 for (index = decl->Range.First; index <= decl->Range.Last; index++) {
2117 const int slot = tgsi->num_inputs++;
2118
2119 tgsi->inputs[slot].index = index;
2120 tgsi->inputs[slot].usage_mask = decl->Declaration.UsageMask;
2121 if (decl->Declaration.Semantic) {
2122 tgsi->inputs[slot].semantic_name = decl->Semantic.Name;
2123 tgsi->inputs[slot].semantic_index = decl->Semantic.Index;
2124 }
2125 else {
2126 tgsi->inputs[slot].semantic_name = TGSI_SEMANTIC_GENERIC;
2127 tgsi->inputs[slot].semantic_index = index;
2128 }
2129 tgsi->inputs[slot].interp = interp->Interpolate;
2130 tgsi->inputs[slot].centroid = interp->Location == TGSI_INTERPOLATE_LOC_CENTROID;
2131 }
2132 }
2133
2134 static void
2135 decl_add_out(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
2136 {
2137 int index;
2138
2139 if (decl->Range.Last >= ARRAY_SIZE(tgsi->outputs)) {
2140 assert(!"invalid OUT");
2141 return;
2142 }
2143
2144 assert(decl->Declaration.Semantic);
2145
2146 for (index = decl->Range.First; index <= decl->Range.Last; index++) {
2147 const int slot = tgsi->num_outputs++;
2148
2149 tgsi->outputs[slot].index = index;
2150 tgsi->outputs[slot].undefined_mask = TOY_WRITEMASK_XYZW;
2151 tgsi->outputs[slot].usage_mask = decl->Declaration.UsageMask;
2152 tgsi->outputs[slot].semantic_name = decl->Semantic.Name;
2153 tgsi->outputs[slot].semantic_index = decl->Semantic.Index;
2154 }
2155 }
2156
2157 static void
2158 decl_add_sv(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
2159 {
2160 int index;
2161
2162 if (decl->Range.Last >= ARRAY_SIZE(tgsi->system_values)) {
2163 assert(!"invalid SV");
2164 return;
2165 }
2166
2167 for (index = decl->Range.First; index <= decl->Range.Last; index++) {
2168 const int slot = tgsi->num_system_values++;
2169
2170 tgsi->system_values[slot].index = index;
2171 if (decl->Declaration.Semantic) {
2172 tgsi->system_values[slot].semantic_name = decl->Semantic.Name;
2173 tgsi->system_values[slot].semantic_index = decl->Semantic.Index;
2174 }
2175 else {
2176 tgsi->system_values[slot].semantic_name = TGSI_SEMANTIC_GENERIC;
2177 tgsi->system_values[slot].semantic_index = index;
2178 }
2179 }
2180 }
2181
2182 /**
2183 * Emit an instruction to fetch the value of a TGSI register.
2184 */
2185 static void
2186 fetch_source(struct toy_tgsi *tgsi, enum tgsi_file_type file, int dim, int idx)
2187 {
2188 struct toy_dst dst;
2189 int vrf;
2190 enum toy_opcode opcode;
2191 enum toy_type type = TOY_TYPE_F;
2192
2193 switch (file) {
2194 case TGSI_FILE_INPUT:
2195 opcode = TOY_OPCODE_TGSI_IN;
2196 break;
2197 case TGSI_FILE_CONSTANT:
2198 opcode = TOY_OPCODE_TGSI_CONST;
2199 break;
2200 case TGSI_FILE_SYSTEM_VALUE:
2201 opcode = TOY_OPCODE_TGSI_SV;
2202 break;
2203 case TGSI_FILE_IMMEDIATE:
2204 opcode = TOY_OPCODE_TGSI_IMM;
2205 toy_tgsi_get_imm(tgsi, idx, &type);
2206 break;
2207 default:
2208 /* no need to fetch */
2209 return;
2210 break;
2211 }
2212
2213 vrf = ra_map_reg(tgsi, file, dim, idx, NULL);
2214 dst = tdst(TOY_FILE_VRF, vrf, 0);
2215 dst = tdst_type(dst, type);
2216
2217 tc_add2(tgsi->tc, opcode, dst, tsrc_imm_d(dim), tsrc_imm_d(idx));
2218 }
2219
2220 static void
2221 parse_declaration(struct toy_tgsi *tgsi,
2222 const struct tgsi_full_declaration *decl)
2223 {
2224 int i;
2225
2226 switch (decl->Declaration.File) {
2227 case TGSI_FILE_INPUT:
2228 decl_add_in(tgsi, decl);
2229 break;
2230 case TGSI_FILE_OUTPUT:
2231 decl_add_out(tgsi, decl);
2232 break;
2233 case TGSI_FILE_SYSTEM_VALUE:
2234 decl_add_sv(tgsi, decl);
2235 break;
2236 case TGSI_FILE_IMMEDIATE:
2237 /* immediates should be declared with TGSI_TOKEN_TYPE_IMMEDIATE */
2238 assert(!"unexpected immediate declaration");
2239 break;
2240 case TGSI_FILE_CONSTANT:
2241 if (tgsi->const_count <= decl->Range.Last)
2242 tgsi->const_count = decl->Range.Last + 1;
2243 break;
2244 case TGSI_FILE_NULL:
2245 case TGSI_FILE_TEMPORARY:
2246 case TGSI_FILE_SAMPLER:
2247 case TGSI_FILE_PREDICATE:
2248 case TGSI_FILE_ADDRESS:
2249 case TGSI_FILE_IMAGE:
2250 case TGSI_FILE_SAMPLER_VIEW:
2251 /* nothing to do */
2252 break;
2253 default:
2254 assert(!"unhandled TGSI file");
2255 break;
2256 }
2257
2258 /* fetch the registers now */
2259 for (i = decl->Range.First; i <= decl->Range.Last; i++) {
2260 const int dim = (decl->Declaration.Dimension) ? decl->Dim.Index2D : 0;
2261 fetch_source(tgsi, decl->Declaration.File, dim, i);
2262 }
2263 }
2264
2265 static int
2266 add_imm(struct toy_tgsi *tgsi, enum toy_type type, const uint32_t *buf)
2267 {
2268 /* reallocate the buffer if necessary */
2269 if (tgsi->imm_data.cur >= tgsi->imm_data.size) {
2270 const int cur_size = tgsi->imm_data.size;
2271 int new_size;
2272 enum toy_type *new_types;
2273 uint32_t (*new_buf)[4];
2274
2275 new_size = (cur_size) ? cur_size << 1 : 16;
2276 while (new_size <= tgsi->imm_data.cur)
2277 new_size <<= 1;
2278
2279 new_buf = REALLOC(tgsi->imm_data.buf,
2280 cur_size * sizeof(new_buf[0]),
2281 new_size * sizeof(new_buf[0]));
2282 new_types = REALLOC(tgsi->imm_data.types,
2283 cur_size * sizeof(new_types[0]),
2284 new_size * sizeof(new_types[0]));
2285 if (!new_buf || !new_types) {
2286 FREE(new_buf);
2287 FREE(new_types);
2288 return -1;
2289 }
2290
2291 tgsi->imm_data.buf = new_buf;
2292 tgsi->imm_data.types = new_types;
2293 tgsi->imm_data.size = new_size;
2294 }
2295
2296 tgsi->imm_data.types[tgsi->imm_data.cur] = type;
2297 memcpy(&tgsi->imm_data.buf[tgsi->imm_data.cur],
2298 buf, sizeof(tgsi->imm_data.buf[0]));
2299
2300 return tgsi->imm_data.cur++;
2301 }
2302
2303 static void
2304 parse_immediate(struct toy_tgsi *tgsi, const struct tgsi_full_immediate *imm)
2305 {
2306 enum toy_type type;
2307 uint32_t imm_buf[4];
2308 int idx;
2309
2310 switch (imm->Immediate.DataType) {
2311 case TGSI_IMM_FLOAT32:
2312 type = TOY_TYPE_F;
2313 imm_buf[0] = fui(imm->u[0].Float);
2314 imm_buf[1] = fui(imm->u[1].Float);
2315 imm_buf[2] = fui(imm->u[2].Float);
2316 imm_buf[3] = fui(imm->u[3].Float);
2317 break;
2318 case TGSI_IMM_INT32:
2319 type = TOY_TYPE_D;
2320 imm_buf[0] = (uint32_t) imm->u[0].Int;
2321 imm_buf[1] = (uint32_t) imm->u[1].Int;
2322 imm_buf[2] = (uint32_t) imm->u[2].Int;
2323 imm_buf[3] = (uint32_t) imm->u[3].Int;
2324 break;
2325 case TGSI_IMM_UINT32:
2326 type = TOY_TYPE_UD;
2327 imm_buf[0] = imm->u[0].Uint;
2328 imm_buf[1] = imm->u[1].Uint;
2329 imm_buf[2] = imm->u[2].Uint;
2330 imm_buf[3] = imm->u[3].Uint;
2331 break;
2332 default:
2333 assert(!"unhandled TGSI imm type");
2334 type = TOY_TYPE_F;
2335 memset(imm_buf, 0, sizeof(imm_buf));
2336 break;
2337 }
2338
2339 idx = add_imm(tgsi, type, imm_buf);
2340 if (idx >= 0)
2341 fetch_source(tgsi, TGSI_FILE_IMMEDIATE, 0, idx);
2342 else
2343 tc_fail(tgsi->tc, "failed to add TGSI imm");
2344 }
2345
2346 static void
2347 parse_property(struct toy_tgsi *tgsi, const struct tgsi_full_property *prop)
2348 {
2349 switch (prop->Property.PropertyName) {
2350 case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
2351 tgsi->props.vs_prohibit_ucps = prop->u[0].Data;
2352 break;
2353 case TGSI_PROPERTY_FS_COORD_ORIGIN:
2354 tgsi->props.fs_coord_origin = prop->u[0].Data;
2355 break;
2356 case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
2357 tgsi->props.fs_coord_pixel_center = prop->u[0].Data;
2358 break;
2359 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
2360 tgsi->props.fs_color0_writes_all_cbufs = prop->u[0].Data;
2361 break;
2362 case TGSI_PROPERTY_FS_DEPTH_LAYOUT:
2363 tgsi->props.fs_depth_layout = prop->u[0].Data;
2364 break;
2365 case TGSI_PROPERTY_GS_INPUT_PRIM:
2366 tgsi->props.gs_input_prim = prop->u[0].Data;
2367 break;
2368 case TGSI_PROPERTY_GS_OUTPUT_PRIM:
2369 tgsi->props.gs_output_prim = prop->u[0].Data;
2370 break;
2371 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
2372 tgsi->props.gs_max_output_vertices = prop->u[0].Data;
2373 break;
2374 default:
2375 assert(!"unhandled TGSI property");
2376 break;
2377 }
2378 }
2379
2380 static void
2381 parse_token(struct toy_tgsi *tgsi, const union tgsi_full_token *token)
2382 {
2383 switch (token->Token.Type) {
2384 case TGSI_TOKEN_TYPE_DECLARATION:
2385 parse_declaration(tgsi, &token->FullDeclaration);
2386 break;
2387 case TGSI_TOKEN_TYPE_IMMEDIATE:
2388 parse_immediate(tgsi, &token->FullImmediate);
2389 break;
2390 case TGSI_TOKEN_TYPE_INSTRUCTION:
2391 parse_instruction(tgsi, &token->FullInstruction);
2392 break;
2393 case TGSI_TOKEN_TYPE_PROPERTY:
2394 parse_property(tgsi, &token->FullProperty);
2395 break;
2396 default:
2397 assert(!"unhandled TGSI token type");
2398 break;
2399 }
2400 }
2401
2402 static enum pipe_error
2403 dump_reg_mapping(void *key, void *val, void *data)
2404 {
2405 int tgsi_file, tgsi_dim, tgsi_index;
2406 uint32_t sig, vrf;
2407
2408 sig = (uint32_t) pointer_to_intptr(key);
2409 vrf = (uint32_t) pointer_to_intptr(val);
2410
2411 /* see ra_get_map_key() */
2412 tgsi_file = (sig >> 28) & 0xf;
2413 tgsi_dim = (sig >> 16) & 0xfff;
2414 tgsi_index = (sig >> 0) & 0xffff;
2415
2416 if (tgsi_dim) {
2417 ilo_printf(" v%d:\t%s[%d][%d]\n", vrf,
2418 tgsi_file_name(tgsi_file), tgsi_dim, tgsi_index);
2419 }
2420 else {
2421 ilo_printf(" v%d:\t%s[%d]\n", vrf,
2422 tgsi_file_name(tgsi_file), tgsi_index);
2423 }
2424
2425 return PIPE_OK;
2426 }
2427
2428 /**
2429 * Dump the TGSI translator, currently only the register mapping.
2430 */
2431 void
2432 toy_tgsi_dump(const struct toy_tgsi *tgsi)
2433 {
2434 util_hash_table_foreach(tgsi->reg_mapping, dump_reg_mapping, NULL);
2435 }
2436
2437 /**
2438 * Clean up the TGSI translator.
2439 */
2440 void
2441 toy_tgsi_cleanup(struct toy_tgsi *tgsi)
2442 {
2443 FREE(tgsi->imm_data.buf);
2444 FREE(tgsi->imm_data.types);
2445
2446 util_hash_table_destroy(tgsi->reg_mapping);
2447 }
2448
2449 static unsigned
2450 reg_mapping_hash(void *key)
2451 {
2452 return (unsigned) pointer_to_intptr(key);
2453 }
2454
2455 static int
2456 reg_mapping_compare(void *key1, void *key2)
2457 {
2458 return (key1 != key2);
2459 }
2460
2461 /**
2462 * Initialize the TGSI translator.
2463 */
2464 static bool
2465 init_tgsi(struct toy_tgsi *tgsi, struct toy_compiler *tc, bool aos)
2466 {
2467 memset(tgsi, 0, sizeof(*tgsi));
2468
2469 tgsi->tc = tc;
2470 tgsi->aos = aos;
2471 tgsi->translate_table = (aos) ? aos_translate_table : soa_translate_table;
2472
2473 /* create a mapping of TGSI registers to VRF reigsters */
2474 tgsi->reg_mapping =
2475 util_hash_table_create(reg_mapping_hash, reg_mapping_compare);
2476
2477 return (tgsi->reg_mapping != NULL);
2478 }
2479
2480 /**
2481 * Translate TGSI tokens into toy instructions.
2482 */
2483 void
2484 toy_compiler_translate_tgsi(struct toy_compiler *tc,
2485 const struct tgsi_token *tokens, bool aos,
2486 struct toy_tgsi *tgsi)
2487 {
2488 struct tgsi_parse_context parse;
2489
2490 if (!init_tgsi(tgsi, tc, aos)) {
2491 tc_fail(tc, "failed to initialize TGSI translator");
2492 return;
2493 }
2494
2495 tgsi_parse_init(&parse, tokens);
2496 while (!tgsi_parse_end_of_tokens(&parse)) {
2497 tgsi_parse_token(&parse);
2498 parse_token(tgsi, &parse.FullToken);
2499 }
2500 tgsi_parse_free(&parse);
2501 }
2502
2503 /**
2504 * Map the TGSI register to VRF register.
2505 */
2506 int
2507 toy_tgsi_get_vrf(const struct toy_tgsi *tgsi,
2508 enum tgsi_file_type file, int dimension, int index)
2509 {
2510 void *key, *val;
2511
2512 key = ra_get_map_key(file, dimension, index);
2513
2514 val = util_hash_table_get(tgsi->reg_mapping, key);
2515
2516 return (val) ? pointer_to_intptr(val) : -1;
2517 }