e5fbb6e592a2a393d4c52831cfdb808f33ce3388
[mesa.git] / src / gallium / drivers / ilo / shader / toy_tgsi.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "tgsi/tgsi_parse.h"
29 #include "tgsi/tgsi_info.h"
30 #include "tgsi/tgsi_strings.h"
31 #include "util/u_hash_table.h"
32 #include "toy_helpers.h"
33 #include "toy_tgsi.h"
34
35 /* map TGSI opcode to GEN opcode 1-to-1 */
36 static const struct {
37 int opcode;
38 int num_dst;
39 int num_src;
40 } aos_simple_opcode_map[TGSI_OPCODE_LAST] = {
41 [TGSI_OPCODE_ARL] = { GEN6_OPCODE_RNDD, 1, 1 },
42 [TGSI_OPCODE_MOV] = { GEN6_OPCODE_MOV, 1, 1 },
43 [TGSI_OPCODE_RCP] = { TOY_OPCODE_INV, 1, 1 },
44 [TGSI_OPCODE_RSQ] = { TOY_OPCODE_RSQ, 1, 1 },
45 [TGSI_OPCODE_MUL] = { GEN6_OPCODE_MUL, 1, 2 },
46 [TGSI_OPCODE_ADD] = { GEN6_OPCODE_ADD, 1, 2 },
47 [TGSI_OPCODE_DP3] = { GEN6_OPCODE_DP3, 1, 2 },
48 [TGSI_OPCODE_DP4] = { GEN6_OPCODE_DP4, 1, 2 },
49 [TGSI_OPCODE_MIN] = { GEN6_OPCODE_SEL, 1, 2 },
50 [TGSI_OPCODE_MAX] = { GEN6_OPCODE_SEL, 1, 2 },
51 /* a later pass will move src[2] to accumulator */
52 [TGSI_OPCODE_MAD] = { GEN6_OPCODE_MAC, 1, 3 },
53 [TGSI_OPCODE_SUB] = { GEN6_OPCODE_ADD, 1, 2 },
54 [TGSI_OPCODE_SQRT] = { TOY_OPCODE_SQRT, 1, 1 },
55 [TGSI_OPCODE_FRC] = { GEN6_OPCODE_FRC, 1, 1 },
56 [TGSI_OPCODE_FLR] = { GEN6_OPCODE_RNDD, 1, 1 },
57 [TGSI_OPCODE_ROUND] = { GEN6_OPCODE_RNDE, 1, 1 },
58 [TGSI_OPCODE_EX2] = { TOY_OPCODE_EXP, 1, 1 },
59 [TGSI_OPCODE_LG2] = { TOY_OPCODE_LOG, 1, 1 },
60 [TGSI_OPCODE_POW] = { TOY_OPCODE_POW, 1, 2 },
61 [TGSI_OPCODE_ABS] = { GEN6_OPCODE_MOV, 1, 1 },
62 [TGSI_OPCODE_DPH] = { GEN6_OPCODE_DPH, 1, 2 },
63 [TGSI_OPCODE_COS] = { TOY_OPCODE_COS, 1, 1 },
64 [TGSI_OPCODE_KILL] = { TOY_OPCODE_KIL, 0, 0 },
65 [TGSI_OPCODE_SIN] = { TOY_OPCODE_SIN, 1, 1 },
66 [TGSI_OPCODE_ARR] = { GEN6_OPCODE_RNDZ, 1, 1 },
67 [TGSI_OPCODE_DP2] = { GEN6_OPCODE_DP2, 1, 2 },
68 [TGSI_OPCODE_IF] = { GEN6_OPCODE_IF, 0, 1 },
69 [TGSI_OPCODE_UIF] = { GEN6_OPCODE_IF, 0, 1 },
70 [TGSI_OPCODE_ELSE] = { GEN6_OPCODE_ELSE, 0, 0 },
71 [TGSI_OPCODE_ENDIF] = { GEN6_OPCODE_ENDIF, 0, 0 },
72 [TGSI_OPCODE_I2F] = { GEN6_OPCODE_MOV, 1, 1 },
73 [TGSI_OPCODE_NOT] = { GEN6_OPCODE_NOT, 1, 1 },
74 [TGSI_OPCODE_TRUNC] = { GEN6_OPCODE_RNDZ, 1, 1 },
75 [TGSI_OPCODE_SHL] = { GEN6_OPCODE_SHL, 1, 2 },
76 [TGSI_OPCODE_AND] = { GEN6_OPCODE_AND, 1, 2 },
77 [TGSI_OPCODE_OR] = { GEN6_OPCODE_OR, 1, 2 },
78 [TGSI_OPCODE_MOD] = { TOY_OPCODE_INT_DIV_REMAINDER, 1, 2 },
79 [TGSI_OPCODE_XOR] = { GEN6_OPCODE_XOR, 1, 2 },
80 [TGSI_OPCODE_EMIT] = { TOY_OPCODE_EMIT, 0, 0 },
81 [TGSI_OPCODE_ENDPRIM] = { TOY_OPCODE_ENDPRIM, 0, 0 },
82 [TGSI_OPCODE_NOP] = { GEN6_OPCODE_NOP, 0, 0 },
83 [TGSI_OPCODE_KILL_IF] = { TOY_OPCODE_KIL, 0, 1 },
84 [TGSI_OPCODE_END] = { GEN6_OPCODE_NOP, 0, 0 },
85 [TGSI_OPCODE_F2I] = { GEN6_OPCODE_MOV, 1, 1 },
86 [TGSI_OPCODE_IDIV] = { TOY_OPCODE_INT_DIV_QUOTIENT, 1, 2 },
87 [TGSI_OPCODE_IMAX] = { GEN6_OPCODE_SEL, 1, 2 },
88 [TGSI_OPCODE_IMIN] = { GEN6_OPCODE_SEL, 1, 2 },
89 [TGSI_OPCODE_INEG] = { GEN6_OPCODE_MOV, 1, 1 },
90 [TGSI_OPCODE_ISHR] = { GEN6_OPCODE_ASR, 1, 2 },
91 [TGSI_OPCODE_F2U] = { GEN6_OPCODE_MOV, 1, 1 },
92 [TGSI_OPCODE_U2F] = { GEN6_OPCODE_MOV, 1, 1 },
93 [TGSI_OPCODE_UADD] = { GEN6_OPCODE_ADD, 1, 2 },
94 [TGSI_OPCODE_UDIV] = { TOY_OPCODE_INT_DIV_QUOTIENT, 1, 2 },
95 /* a later pass will move src[2] to accumulator */
96 [TGSI_OPCODE_UMAD] = { GEN6_OPCODE_MAC, 1, 3 },
97 [TGSI_OPCODE_UMAX] = { GEN6_OPCODE_SEL, 1, 2 },
98 [TGSI_OPCODE_UMIN] = { GEN6_OPCODE_SEL, 1, 2 },
99 [TGSI_OPCODE_UMOD] = { TOY_OPCODE_INT_DIV_REMAINDER, 1, 2 },
100 [TGSI_OPCODE_UMUL] = { GEN6_OPCODE_MUL, 1, 2 },
101 [TGSI_OPCODE_USHR] = { GEN6_OPCODE_SHR, 1, 2 },
102 [TGSI_OPCODE_UARL] = { GEN6_OPCODE_MOV, 1, 1 },
103 [TGSI_OPCODE_IABS] = { GEN6_OPCODE_MOV, 1, 1 },
104 };
105
106 static void
107 aos_simple(struct toy_compiler *tc,
108 const struct tgsi_full_instruction *tgsi_inst,
109 struct toy_dst *dst,
110 struct toy_src *src)
111 {
112 struct toy_inst *inst;
113 int opcode;
114 int cond_modifier = GEN6_COND_NONE;
115 int num_dst = tgsi_inst->Instruction.NumDstRegs;
116 int num_src = tgsi_inst->Instruction.NumSrcRegs;
117 int i;
118
119 opcode = aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].opcode;
120 assert(num_dst == aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].num_dst);
121 assert(num_src == aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].num_src);
122 if (!opcode) {
123 assert(!"invalid aos_simple() call");
124 return;
125 }
126
127 /* no need to emit nop */
128 if (opcode == GEN6_OPCODE_NOP)
129 return;
130
131 inst = tc_add(tc);
132 if (!inst)
133 return;
134
135 inst->opcode = opcode;
136
137 switch (tgsi_inst->Instruction.Opcode) {
138 case TGSI_OPCODE_MIN:
139 case TGSI_OPCODE_IMIN:
140 case TGSI_OPCODE_UMIN:
141 cond_modifier = GEN6_COND_L;
142 break;
143 case TGSI_OPCODE_MAX:
144 case TGSI_OPCODE_IMAX:
145 case TGSI_OPCODE_UMAX:
146 cond_modifier = GEN6_COND_GE;
147 break;
148 case TGSI_OPCODE_SUB:
149 src[1] = tsrc_negate(src[1]);
150 break;
151 case TGSI_OPCODE_ABS:
152 case TGSI_OPCODE_IABS:
153 src[0] = tsrc_absolute(src[0]);
154 break;
155 case TGSI_OPCODE_IF:
156 cond_modifier = GEN6_COND_NZ;
157 num_src = 2;
158 assert(src[0].type == TOY_TYPE_F);
159 src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
160 src[1] = tsrc_imm_f(0.0f);
161 break;
162 case TGSI_OPCODE_UIF:
163 cond_modifier = GEN6_COND_NZ;
164 num_src = 2;
165 assert(src[0].type == TOY_TYPE_UD);
166 src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
167 src[1] = tsrc_imm_d(0);
168 break;
169 case TGSI_OPCODE_INEG:
170 src[0] = tsrc_negate(src[0]);
171 break;
172 case TGSI_OPCODE_RCP:
173 case TGSI_OPCODE_RSQ:
174 case TGSI_OPCODE_EX2:
175 case TGSI_OPCODE_LG2:
176 case TGSI_OPCODE_COS:
177 case TGSI_OPCODE_SIN:
178 src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
179 break;
180 case TGSI_OPCODE_POW:
181 src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
182 src[1] = tsrc_swizzle1(src[1], TOY_SWIZZLE_X);
183 break;
184 }
185
186 inst->cond_modifier = cond_modifier;
187
188 if (num_dst) {
189 assert(num_dst == 1);
190 inst->dst = dst[0];
191 }
192
193 assert(num_src <= ARRAY_SIZE(inst->src));
194 for (i = 0; i < num_src; i++)
195 inst->src[i] = src[i];
196 }
197
198 static void
199 aos_set_on_cond(struct toy_compiler *tc,
200 const struct tgsi_full_instruction *tgsi_inst,
201 struct toy_dst *dst,
202 struct toy_src *src)
203 {
204 struct toy_inst *inst;
205 int cond;
206 struct toy_src zero, one;
207
208 switch (tgsi_inst->Instruction.Opcode) {
209 case TGSI_OPCODE_SLT:
210 case TGSI_OPCODE_ISLT:
211 case TGSI_OPCODE_USLT:
212 case TGSI_OPCODE_FSLT:
213 cond = GEN6_COND_L;
214 break;
215 case TGSI_OPCODE_SGE:
216 case TGSI_OPCODE_ISGE:
217 case TGSI_OPCODE_USGE:
218 case TGSI_OPCODE_FSGE:
219 cond = GEN6_COND_GE;
220 break;
221 case TGSI_OPCODE_SEQ:
222 case TGSI_OPCODE_USEQ:
223 case TGSI_OPCODE_FSEQ:
224 cond = GEN6_COND_Z;
225 break;
226 case TGSI_OPCODE_SGT:
227 cond = GEN6_COND_G;
228 break;
229 case TGSI_OPCODE_SLE:
230 cond = GEN6_COND_LE;
231 break;
232 case TGSI_OPCODE_SNE:
233 case TGSI_OPCODE_USNE:
234 case TGSI_OPCODE_FSNE:
235 cond = GEN6_COND_NZ;
236 break;
237 default:
238 assert(!"invalid aos_set_on_cond() call");
239 return;
240 }
241
242 /* note that for integer versions, all bits are set */
243 switch (dst[0].type) {
244 case TOY_TYPE_F:
245 default:
246 zero = tsrc_imm_f(0.0f);
247 one = tsrc_imm_f(1.0f);
248 break;
249 case TOY_TYPE_D:
250 zero = tsrc_imm_d(0);
251 one = tsrc_imm_d(-1);
252 break;
253 case TOY_TYPE_UD:
254 zero = tsrc_imm_ud(0);
255 one = tsrc_imm_ud(~0);
256 break;
257 }
258
259 tc_MOV(tc, dst[0], zero);
260 tc_CMP(tc, tdst_null(), src[0], src[1], cond);
261 inst = tc_MOV(tc, dst[0], one);
262 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
263 }
264
265 static void
266 aos_compare(struct toy_compiler *tc,
267 const struct tgsi_full_instruction *tgsi_inst,
268 struct toy_dst *dst,
269 struct toy_src *src)
270 {
271 struct toy_inst *inst;
272 struct toy_src zero;
273
274 switch (tgsi_inst->Instruction.Opcode) {
275 case TGSI_OPCODE_CMP:
276 zero = tsrc_imm_f(0.0f);
277 break;
278 case TGSI_OPCODE_UCMP:
279 zero = tsrc_imm_ud(0);
280 break;
281 default:
282 assert(!"invalid aos_compare() call");
283 return;
284 }
285
286 tc_CMP(tc, tdst_null(), src[0], zero, GEN6_COND_L);
287 inst = tc_SEL(tc, dst[0], src[1], src[2], GEN6_COND_NONE);
288 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
289 }
290
291 static void
292 aos_set_sign(struct toy_compiler *tc,
293 const struct tgsi_full_instruction *tgsi_inst,
294 struct toy_dst *dst,
295 struct toy_src *src)
296 {
297 struct toy_inst *inst;
298 struct toy_src zero, one, neg_one;
299
300 switch (tgsi_inst->Instruction.Opcode) {
301 case TGSI_OPCODE_SSG:
302 zero = tsrc_imm_f(0.0f);
303 one = tsrc_imm_f(1.0f);
304 neg_one = tsrc_imm_f(-1.0f);
305 break;
306 case TGSI_OPCODE_ISSG:
307 zero = tsrc_imm_d(0);
308 one = tsrc_imm_d(1);
309 neg_one = tsrc_imm_d(-1);
310 break;
311 default:
312 assert(!"invalid aos_set_sign() call");
313 return;
314 }
315
316 tc_MOV(tc, dst[0], zero);
317
318 tc_CMP(tc, tdst_null(), src[0], zero, GEN6_COND_G);
319 inst = tc_MOV(tc, dst[0], one);
320 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
321
322 tc_CMP(tc, tdst_null(), src[0], zero, GEN6_COND_L);
323 inst = tc_MOV(tc, dst[0], neg_one);
324 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
325 }
326
327 static void
328 aos_tex(struct toy_compiler *tc,
329 const struct tgsi_full_instruction *tgsi_inst,
330 struct toy_dst *dst,
331 struct toy_src *src)
332 {
333 struct toy_inst *inst;
334 enum toy_opcode opcode;
335 int i;
336
337 switch (tgsi_inst->Instruction.Opcode) {
338 case TGSI_OPCODE_TEX:
339 opcode = TOY_OPCODE_TGSI_TEX;
340 break;
341 case TGSI_OPCODE_TXD:
342 opcode = TOY_OPCODE_TGSI_TXD;
343 break;
344 case TGSI_OPCODE_TXP:
345 opcode = TOY_OPCODE_TGSI_TXP;
346 break;
347 case TGSI_OPCODE_TXB:
348 opcode = TOY_OPCODE_TGSI_TXB;
349 break;
350 case TGSI_OPCODE_TXL:
351 opcode = TOY_OPCODE_TGSI_TXL;
352 break;
353 case TGSI_OPCODE_TXF:
354 opcode = TOY_OPCODE_TGSI_TXF;
355 break;
356 case TGSI_OPCODE_TXQ:
357 opcode = TOY_OPCODE_TGSI_TXQ;
358 break;
359 case TGSI_OPCODE_TXQ_LZ:
360 opcode = TOY_OPCODE_TGSI_TXQ_LZ;
361 break;
362 case TGSI_OPCODE_TEX2:
363 opcode = TOY_OPCODE_TGSI_TEX2;
364 break;
365 case TGSI_OPCODE_TXB2:
366 opcode = TOY_OPCODE_TGSI_TXB2;
367 break;
368 case TGSI_OPCODE_TXL2:
369 opcode = TOY_OPCODE_TGSI_TXL2;
370 break;
371 default:
372 assert(!"unsupported texturing opcode");
373 return;
374 break;
375 }
376
377 assert(tgsi_inst->Instruction.Texture);
378
379 inst = tc_add(tc);
380 inst->opcode = opcode;
381 inst->tex.target = tgsi_inst->Texture.Texture;
382
383 assert(tgsi_inst->Instruction.NumSrcRegs <= ARRAY_SIZE(inst->src));
384 assert(tgsi_inst->Instruction.NumDstRegs == 1);
385
386 inst->dst = dst[0];
387 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
388 inst->src[i] = src[i];
389
390 for (i = 0; i < tgsi_inst->Texture.NumOffsets; i++)
391 tc_fail(tc, "texelFetchOffset unsupported");
392 }
393
394 static void
395 aos_sample(struct toy_compiler *tc,
396 const struct tgsi_full_instruction *tgsi_inst,
397 struct toy_dst *dst,
398 struct toy_src *src)
399 {
400 struct toy_inst *inst;
401 enum toy_opcode opcode;
402 int i;
403
404 assert(!"sampling untested");
405
406 switch (tgsi_inst->Instruction.Opcode) {
407 case TGSI_OPCODE_SAMPLE:
408 opcode = TOY_OPCODE_TGSI_SAMPLE;
409 break;
410 case TGSI_OPCODE_SAMPLE_I:
411 opcode = TOY_OPCODE_TGSI_SAMPLE_I;
412 break;
413 case TGSI_OPCODE_SAMPLE_I_MS:
414 opcode = TOY_OPCODE_TGSI_SAMPLE_I_MS;
415 break;
416 case TGSI_OPCODE_SAMPLE_B:
417 opcode = TOY_OPCODE_TGSI_SAMPLE_B;
418 break;
419 case TGSI_OPCODE_SAMPLE_C:
420 opcode = TOY_OPCODE_TGSI_SAMPLE_C;
421 break;
422 case TGSI_OPCODE_SAMPLE_C_LZ:
423 opcode = TOY_OPCODE_TGSI_SAMPLE_C_LZ;
424 break;
425 case TGSI_OPCODE_SAMPLE_D:
426 opcode = TOY_OPCODE_TGSI_SAMPLE_D;
427 break;
428 case TGSI_OPCODE_SAMPLE_L:
429 opcode = TOY_OPCODE_TGSI_SAMPLE_L;
430 break;
431 case TGSI_OPCODE_GATHER4:
432 opcode = TOY_OPCODE_TGSI_GATHER4;
433 break;
434 case TGSI_OPCODE_SVIEWINFO:
435 opcode = TOY_OPCODE_TGSI_SVIEWINFO;
436 break;
437 case TGSI_OPCODE_SAMPLE_POS:
438 opcode = TOY_OPCODE_TGSI_SAMPLE_POS;
439 break;
440 case TGSI_OPCODE_SAMPLE_INFO:
441 opcode = TOY_OPCODE_TGSI_SAMPLE_INFO;
442 break;
443 default:
444 assert(!"unsupported sampling opcode");
445 return;
446 break;
447 }
448
449 inst = tc_add(tc);
450 inst->opcode = opcode;
451
452 assert(tgsi_inst->Instruction.NumSrcRegs <= ARRAY_SIZE(inst->src));
453 assert(tgsi_inst->Instruction.NumDstRegs == 1);
454
455 inst->dst = dst[0];
456 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
457 inst->src[i] = src[i];
458 }
459
460 static void
461 aos_LIT(struct toy_compiler *tc,
462 const struct tgsi_full_instruction *tgsi_inst,
463 struct toy_dst *dst,
464 struct toy_src *src)
465 {
466 struct toy_inst *inst;
467
468 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XW), tsrc_imm_f(1.0f));
469
470 if (!(dst[0].writemask & TOY_WRITEMASK_YZ))
471 return;
472
473 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_YZ), tsrc_imm_f(0.0f));
474
475 tc_CMP(tc, tdst_null(),
476 tsrc_swizzle1(src[0], TOY_SWIZZLE_X),
477 tsrc_imm_f(0.0f),
478 GEN6_COND_G);
479
480 inst = tc_MOV(tc,
481 tdst_writemask(dst[0], TOY_WRITEMASK_Y),
482 tsrc_swizzle1(src[0], TOY_SWIZZLE_X));
483 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
484
485 /* clamp W to (-128, 128)? */
486 inst = tc_POW(tc,
487 tdst_writemask(dst[0], TOY_WRITEMASK_Z),
488 tsrc_swizzle1(src[0], TOY_SWIZZLE_Y),
489 tsrc_swizzle1(src[0], TOY_SWIZZLE_W));
490 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
491 }
492
493 static void
494 aos_EXP(struct toy_compiler *tc,
495 const struct tgsi_full_instruction *tgsi_inst,
496 struct toy_dst *dst,
497 struct toy_src *src)
498 {
499 struct toy_src src0 = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
500
501 if (dst[0].writemask & TOY_WRITEMASK_X) {
502 struct toy_dst tmp =
503 tdst_d(tdst_writemask(tc_alloc_tmp(tc), TOY_WRITEMASK_X));
504
505 tc_RNDD(tc, tmp, src0);
506
507 /* construct the floating point number manually */
508 tc_ADD(tc, tmp, tsrc_from(tmp), tsrc_imm_d(127));
509 tc_SHL(tc, tdst_d(tdst_writemask(dst[0], TOY_WRITEMASK_X)),
510 tsrc_from(tmp), tsrc_imm_d(23));
511 }
512
513 tc_FRC(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Y), src0);
514 tc_EXP(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src0);
515 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
516 }
517
518 static void
519 aos_LOG(struct toy_compiler *tc,
520 const struct tgsi_full_instruction *tgsi_inst,
521 struct toy_dst *dst,
522 struct toy_src *src)
523 {
524 struct toy_src src0 = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
525
526 if (dst[0].writemask & TOY_WRITEMASK_XY) {
527 struct toy_dst tmp;
528
529 tmp = tdst_d(tdst_writemask(tc_alloc_tmp(tc), TOY_WRITEMASK_X));
530
531 /* exponent */
532 tc_SHR(tc, tmp, tsrc_absolute(tsrc_d(src0)), tsrc_imm_d(23));
533 tc_ADD(tc, tdst_writemask(dst[0], TOY_WRITEMASK_X),
534 tsrc_from(tmp), tsrc_imm_d(-127));
535
536 /* mantissa */
537 tc_AND(tc, tmp, tsrc_d(src0), tsrc_imm_d((1 << 23) - 1));
538 tc_OR(tc, tdst_writemask(tdst_d(dst[0]), TOY_WRITEMASK_Y),
539 tsrc_from(tmp), tsrc_imm_d(127 << 23));
540 }
541
542 tc_LOG(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src0);
543 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
544 }
545
546 static void
547 aos_DST(struct toy_compiler *tc,
548 const struct tgsi_full_instruction *tgsi_inst,
549 struct toy_dst *dst,
550 struct toy_src *src)
551 {
552 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_X), tsrc_imm_f(1.0f));
553 tc_MUL(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Y), src[0], src[1]);
554 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src[0]);
555 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), src[1]);
556 }
557
558 static void
559 aos_LRP(struct toy_compiler *tc,
560 const struct tgsi_full_instruction *tgsi_inst,
561 struct toy_dst *dst,
562 struct toy_src *src)
563 {
564 struct toy_dst tmp = tc_alloc_tmp(tc);
565
566 tc_ADD(tc, tmp, tsrc_negate(src[0]), tsrc_imm_f(1.0f));
567 tc_MUL(tc, tmp, tsrc_from(tmp), src[2]);
568 tc_MAC(tc, dst[0], src[0], src[1], tsrc_from(tmp));
569 }
570
571 static void
572 aos_DP2A(struct toy_compiler *tc,
573 const struct tgsi_full_instruction *tgsi_inst,
574 struct toy_dst *dst,
575 struct toy_src *src)
576 {
577 struct toy_dst tmp = tc_alloc_tmp(tc);
578
579 assert(!"DP2A untested");
580
581 tc_DP2(tc, tmp, src[0], src[1]);
582 tc_ADD(tc, dst[0], tsrc_swizzle1(tsrc_from(tmp), TOY_SWIZZLE_X), src[2]);
583 }
584
585 static void
586 aos_CLAMP(struct toy_compiler *tc,
587 const struct tgsi_full_instruction *tgsi_inst,
588 struct toy_dst *dst,
589 struct toy_src *src)
590 {
591 assert(!"CLAMP untested");
592
593 tc_SEL(tc, dst[0], src[0], src[1], GEN6_COND_GE);
594 tc_SEL(tc, dst[0], src[2], tsrc_from(dst[0]), GEN6_COND_L);
595 }
596
597 static void
598 aos_XPD(struct toy_compiler *tc,
599 const struct tgsi_full_instruction *tgsi_inst,
600 struct toy_dst *dst,
601 struct toy_src *src)
602 {
603 struct toy_dst tmp = tc_alloc_tmp(tc);
604
605 tc_MUL(tc, tdst_writemask(tmp, TOY_WRITEMASK_XYZ),
606 tsrc_swizzle(src[0], TOY_SWIZZLE_Z, TOY_SWIZZLE_X,
607 TOY_SWIZZLE_Y, TOY_SWIZZLE_W),
608 tsrc_swizzle(src[1], TOY_SWIZZLE_Y, TOY_SWIZZLE_Z,
609 TOY_SWIZZLE_X, TOY_SWIZZLE_W));
610
611 tc_MAC(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XYZ),
612 tsrc_swizzle(src[0], TOY_SWIZZLE_Y, TOY_SWIZZLE_Z,
613 TOY_SWIZZLE_X, TOY_SWIZZLE_W),
614 tsrc_swizzle(src[1], TOY_SWIZZLE_Z, TOY_SWIZZLE_X,
615 TOY_SWIZZLE_Y, TOY_SWIZZLE_W),
616 tsrc_negate(tsrc_from(tmp)));
617
618 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W),
619 tsrc_imm_f(1.0f));
620 }
621
622 static void
623 aos_PK2H(struct toy_compiler *tc,
624 const struct tgsi_full_instruction *tgsi_inst,
625 struct toy_dst *dst,
626 struct toy_src *src)
627 {
628 const struct toy_src h1 = tsrc_ud(tsrc_swizzle1(src[0], TOY_SWIZZLE_X));
629 const struct toy_src h2 = tsrc_ud(tsrc_swizzle1(src[0], TOY_SWIZZLE_Y));
630 struct toy_dst tmp = tdst_ud(tc_alloc_tmp(tc));
631
632 assert(!"PK2H untested");
633
634 tc_SHL(tc, tmp, h2, tsrc_imm_ud(16));
635 tc_OR(tc, tdst_ud(dst[0]), h1, tsrc_from(tmp));
636 }
637
638 static void
639 aos_UP2H(struct toy_compiler *tc,
640 const struct tgsi_full_instruction *tgsi_inst,
641 struct toy_dst *dst,
642 struct toy_src *src)
643 {
644 assert(!"UP2H untested");
645
646 tc_AND(tc, tdst_writemask(tdst_ud(dst[0]), TOY_WRITEMASK_XZ),
647 tsrc_ud(src[0]), tsrc_imm_ud(0xffff));
648 tc_SHR(tc, tdst_writemask(tdst_ud(dst[0]), TOY_WRITEMASK_YW),
649 tsrc_ud(src[0]), tsrc_imm_ud(16));
650 }
651
652 static void
653 aos_SCS(struct toy_compiler *tc,
654 const struct tgsi_full_instruction *tgsi_inst,
655 struct toy_dst *dst,
656 struct toy_src *src)
657 {
658 assert(!"SCS untested");
659
660 tc_add1(tc, TOY_OPCODE_COS,
661 tdst_writemask(dst[0], TOY_WRITEMASK_X), src[0]);
662
663 tc_add1(tc, TOY_OPCODE_SIN,
664 tdst_writemask(dst[0], TOY_WRITEMASK_Y), src[0]);
665
666 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), tsrc_imm_f(0.0f));
667 tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
668 }
669
670 static void
671 aos_DIV(struct toy_compiler *tc,
672 const struct tgsi_full_instruction *tgsi_inst,
673 struct toy_dst *dst,
674 struct toy_src *src)
675 {
676 struct toy_dst tmp = tc_alloc_tmp(tc);
677
678 assert(!"DIV untested");
679
680 tc_INV(tc, tmp, src[1]);
681 tc_MUL(tc, dst[0], src[0], tsrc_from(tmp));
682 }
683
684 static void
685 aos_BRK(struct toy_compiler *tc,
686 const struct tgsi_full_instruction *tgsi_inst,
687 struct toy_dst *dst,
688 struct toy_src *src)
689 {
690 tc_add0(tc, GEN6_OPCODE_BREAK);
691 }
692
693 static void
694 aos_CEIL(struct toy_compiler *tc,
695 const struct tgsi_full_instruction *tgsi_inst,
696 struct toy_dst *dst,
697 struct toy_src *src)
698 {
699 struct toy_dst tmp = tc_alloc_tmp(tc);
700
701 tc_RNDD(tc, tmp, tsrc_negate(src[0]));
702 tc_MOV(tc, dst[0], tsrc_negate(tsrc_from(tmp)));
703 }
704
705 static void
706 aos_SAD(struct toy_compiler *tc,
707 const struct tgsi_full_instruction *tgsi_inst,
708 struct toy_dst *dst,
709 struct toy_src *src)
710 {
711 struct toy_dst tmp = tc_alloc_tmp(tc);
712
713 assert(!"SAD untested");
714
715 tc_ADD(tc, tmp, src[0], tsrc_negate(src[1]));
716 tc_ADD(tc, dst[0], tsrc_absolute(tsrc_from(tmp)), src[2]);
717 }
718
719 static void
720 aos_CONT(struct toy_compiler *tc,
721 const struct tgsi_full_instruction *tgsi_inst,
722 struct toy_dst *dst,
723 struct toy_src *src)
724 {
725 tc_add0(tc, GEN6_OPCODE_CONT);
726 }
727
728 static void
729 aos_BGNLOOP(struct toy_compiler *tc,
730 const struct tgsi_full_instruction *tgsi_inst,
731 struct toy_dst *dst,
732 struct toy_src *src)
733 {
734 struct toy_inst *inst;
735
736 inst = tc_add0(tc, TOY_OPCODE_DO);
737 /* this is just a marker */
738 inst->marker = true;
739 }
740
741 static void
742 aos_ENDLOOP(struct toy_compiler *tc,
743 const struct tgsi_full_instruction *tgsi_inst,
744 struct toy_dst *dst,
745 struct toy_src *src)
746 {
747 tc_add0(tc, GEN6_OPCODE_WHILE);
748 }
749
750 static void
751 aos_unsupported(struct toy_compiler *tc,
752 const struct tgsi_full_instruction *tgsi_inst,
753 struct toy_dst *dst,
754 struct toy_src *src)
755 {
756 const char *name = tgsi_get_opcode_name(tgsi_inst->Instruction.Opcode);
757
758 ilo_warn("unsupported TGSI opcode: TGSI_OPCODE_%s\n", name);
759
760 tc_fail(tc, "unsupported TGSI instruction");
761 }
762
763 static const toy_tgsi_translate aos_translate_table[TGSI_OPCODE_LAST] = {
764 [TGSI_OPCODE_ARL] = aos_simple,
765 [TGSI_OPCODE_MOV] = aos_simple,
766 [TGSI_OPCODE_LIT] = aos_LIT,
767 [TGSI_OPCODE_RCP] = aos_simple,
768 [TGSI_OPCODE_RSQ] = aos_simple,
769 [TGSI_OPCODE_EXP] = aos_EXP,
770 [TGSI_OPCODE_LOG] = aos_LOG,
771 [TGSI_OPCODE_MUL] = aos_simple,
772 [TGSI_OPCODE_ADD] = aos_simple,
773 [TGSI_OPCODE_DP3] = aos_simple,
774 [TGSI_OPCODE_DP4] = aos_simple,
775 [TGSI_OPCODE_DST] = aos_DST,
776 [TGSI_OPCODE_MIN] = aos_simple,
777 [TGSI_OPCODE_MAX] = aos_simple,
778 [TGSI_OPCODE_SLT] = aos_set_on_cond,
779 [TGSI_OPCODE_SGE] = aos_set_on_cond,
780 [TGSI_OPCODE_MAD] = aos_simple,
781 [TGSI_OPCODE_SUB] = aos_simple,
782 [TGSI_OPCODE_LRP] = aos_LRP,
783 [TGSI_OPCODE_SQRT] = aos_simple,
784 [TGSI_OPCODE_DP2A] = aos_DP2A,
785 [TGSI_OPCODE_FRC] = aos_simple,
786 [TGSI_OPCODE_CLAMP] = aos_CLAMP,
787 [TGSI_OPCODE_FLR] = aos_simple,
788 [TGSI_OPCODE_ROUND] = aos_simple,
789 [TGSI_OPCODE_EX2] = aos_simple,
790 [TGSI_OPCODE_LG2] = aos_simple,
791 [TGSI_OPCODE_POW] = aos_simple,
792 [TGSI_OPCODE_XPD] = aos_XPD,
793 [TGSI_OPCODE_ABS] = aos_simple,
794 [TGSI_OPCODE_DPH] = aos_simple,
795 [TGSI_OPCODE_COS] = aos_simple,
796 [TGSI_OPCODE_DDX] = aos_unsupported,
797 [TGSI_OPCODE_DDY] = aos_unsupported,
798 [TGSI_OPCODE_KILL] = aos_simple,
799 [TGSI_OPCODE_PK2H] = aos_PK2H,
800 [TGSI_OPCODE_PK2US] = aos_unsupported,
801 [TGSI_OPCODE_PK4B] = aos_unsupported,
802 [TGSI_OPCODE_PK4UB] = aos_unsupported,
803 [TGSI_OPCODE_SEQ] = aos_set_on_cond,
804 [TGSI_OPCODE_SGT] = aos_set_on_cond,
805 [TGSI_OPCODE_SIN] = aos_simple,
806 [TGSI_OPCODE_SLE] = aos_set_on_cond,
807 [TGSI_OPCODE_SNE] = aos_set_on_cond,
808 [TGSI_OPCODE_TEX] = aos_tex,
809 [TGSI_OPCODE_TXD] = aos_tex,
810 [TGSI_OPCODE_TXP] = aos_tex,
811 [TGSI_OPCODE_UP2H] = aos_UP2H,
812 [TGSI_OPCODE_UP2US] = aos_unsupported,
813 [TGSI_OPCODE_UP4B] = aos_unsupported,
814 [TGSI_OPCODE_UP4UB] = aos_unsupported,
815 [TGSI_OPCODE_ARR] = aos_simple,
816 [TGSI_OPCODE_CAL] = aos_unsupported,
817 [TGSI_OPCODE_RET] = aos_unsupported,
818 [TGSI_OPCODE_SSG] = aos_set_sign,
819 [TGSI_OPCODE_CMP] = aos_compare,
820 [TGSI_OPCODE_SCS] = aos_SCS,
821 [TGSI_OPCODE_TXB] = aos_tex,
822 [TGSI_OPCODE_DIV] = aos_DIV,
823 [TGSI_OPCODE_DP2] = aos_simple,
824 [TGSI_OPCODE_TXL] = aos_tex,
825 [TGSI_OPCODE_BRK] = aos_BRK,
826 [TGSI_OPCODE_IF] = aos_simple,
827 [TGSI_OPCODE_UIF] = aos_simple,
828 [TGSI_OPCODE_ELSE] = aos_simple,
829 [TGSI_OPCODE_ENDIF] = aos_simple,
830 [TGSI_OPCODE_PUSHA] = aos_unsupported,
831 [TGSI_OPCODE_POPA] = aos_unsupported,
832 [TGSI_OPCODE_CEIL] = aos_CEIL,
833 [TGSI_OPCODE_I2F] = aos_simple,
834 [TGSI_OPCODE_NOT] = aos_simple,
835 [TGSI_OPCODE_TRUNC] = aos_simple,
836 [TGSI_OPCODE_SHL] = aos_simple,
837 [TGSI_OPCODE_AND] = aos_simple,
838 [TGSI_OPCODE_OR] = aos_simple,
839 [TGSI_OPCODE_MOD] = aos_simple,
840 [TGSI_OPCODE_XOR] = aos_simple,
841 [TGSI_OPCODE_SAD] = aos_SAD,
842 [TGSI_OPCODE_TXF] = aos_tex,
843 [TGSI_OPCODE_TXQ] = aos_tex,
844 [TGSI_OPCODE_CONT] = aos_CONT,
845 [TGSI_OPCODE_EMIT] = aos_simple,
846 [TGSI_OPCODE_ENDPRIM] = aos_simple,
847 [TGSI_OPCODE_BGNLOOP] = aos_BGNLOOP,
848 [TGSI_OPCODE_BGNSUB] = aos_unsupported,
849 [TGSI_OPCODE_ENDLOOP] = aos_ENDLOOP,
850 [TGSI_OPCODE_ENDSUB] = aos_unsupported,
851 [TGSI_OPCODE_TXQ_LZ] = aos_tex,
852 [TGSI_OPCODE_NOP] = aos_simple,
853 [TGSI_OPCODE_FSEQ] = aos_set_on_cond,
854 [TGSI_OPCODE_FSGE] = aos_set_on_cond,
855 [TGSI_OPCODE_FSLT] = aos_set_on_cond,
856 [TGSI_OPCODE_FSNE] = aos_set_on_cond,
857 [TGSI_OPCODE_CALLNZ] = aos_unsupported,
858 [TGSI_OPCODE_BREAKC] = aos_unsupported,
859 [TGSI_OPCODE_KILL_IF] = aos_simple,
860 [TGSI_OPCODE_END] = aos_simple,
861 [TGSI_OPCODE_F2I] = aos_simple,
862 [TGSI_OPCODE_IDIV] = aos_simple,
863 [TGSI_OPCODE_IMAX] = aos_simple,
864 [TGSI_OPCODE_IMIN] = aos_simple,
865 [TGSI_OPCODE_INEG] = aos_simple,
866 [TGSI_OPCODE_ISGE] = aos_set_on_cond,
867 [TGSI_OPCODE_ISHR] = aos_simple,
868 [TGSI_OPCODE_ISLT] = aos_set_on_cond,
869 [TGSI_OPCODE_F2U] = aos_simple,
870 [TGSI_OPCODE_U2F] = aos_simple,
871 [TGSI_OPCODE_UADD] = aos_simple,
872 [TGSI_OPCODE_UDIV] = aos_simple,
873 [TGSI_OPCODE_UMAD] = aos_simple,
874 [TGSI_OPCODE_UMAX] = aos_simple,
875 [TGSI_OPCODE_UMIN] = aos_simple,
876 [TGSI_OPCODE_UMOD] = aos_simple,
877 [TGSI_OPCODE_UMUL] = aos_simple,
878 [TGSI_OPCODE_USEQ] = aos_set_on_cond,
879 [TGSI_OPCODE_USGE] = aos_set_on_cond,
880 [TGSI_OPCODE_USHR] = aos_simple,
881 [TGSI_OPCODE_USLT] = aos_set_on_cond,
882 [TGSI_OPCODE_USNE] = aos_set_on_cond,
883 [TGSI_OPCODE_SWITCH] = aos_unsupported,
884 [TGSI_OPCODE_CASE] = aos_unsupported,
885 [TGSI_OPCODE_DEFAULT] = aos_unsupported,
886 [TGSI_OPCODE_ENDSWITCH] = aos_unsupported,
887 [TGSI_OPCODE_SAMPLE] = aos_sample,
888 [TGSI_OPCODE_SAMPLE_I] = aos_sample,
889 [TGSI_OPCODE_SAMPLE_I_MS] = aos_sample,
890 [TGSI_OPCODE_SAMPLE_B] = aos_sample,
891 [TGSI_OPCODE_SAMPLE_C] = aos_sample,
892 [TGSI_OPCODE_SAMPLE_C_LZ] = aos_sample,
893 [TGSI_OPCODE_SAMPLE_D] = aos_sample,
894 [TGSI_OPCODE_SAMPLE_L] = aos_sample,
895 [TGSI_OPCODE_GATHER4] = aos_sample,
896 [TGSI_OPCODE_SVIEWINFO] = aos_sample,
897 [TGSI_OPCODE_SAMPLE_POS] = aos_sample,
898 [TGSI_OPCODE_SAMPLE_INFO] = aos_sample,
899 [TGSI_OPCODE_UARL] = aos_simple,
900 [TGSI_OPCODE_UCMP] = aos_compare,
901 [TGSI_OPCODE_IABS] = aos_simple,
902 [TGSI_OPCODE_ISSG] = aos_set_sign,
903 [TGSI_OPCODE_LOAD] = aos_unsupported,
904 [TGSI_OPCODE_STORE] = aos_unsupported,
905 [TGSI_OPCODE_MFENCE] = aos_unsupported,
906 [TGSI_OPCODE_LFENCE] = aos_unsupported,
907 [TGSI_OPCODE_SFENCE] = aos_unsupported,
908 [TGSI_OPCODE_BARRIER] = aos_unsupported,
909 [TGSI_OPCODE_ATOMUADD] = aos_unsupported,
910 [TGSI_OPCODE_ATOMXCHG] = aos_unsupported,
911 [TGSI_OPCODE_ATOMCAS] = aos_unsupported,
912 [TGSI_OPCODE_ATOMAND] = aos_unsupported,
913 [TGSI_OPCODE_ATOMOR] = aos_unsupported,
914 [TGSI_OPCODE_ATOMXOR] = aos_unsupported,
915 [TGSI_OPCODE_ATOMUMIN] = aos_unsupported,
916 [TGSI_OPCODE_ATOMUMAX] = aos_unsupported,
917 [TGSI_OPCODE_ATOMIMIN] = aos_unsupported,
918 [TGSI_OPCODE_ATOMIMAX] = aos_unsupported,
919 [TGSI_OPCODE_TEX2] = aos_tex,
920 [TGSI_OPCODE_TXB2] = aos_tex,
921 [TGSI_OPCODE_TXL2] = aos_tex,
922 };
923
924 static void
925 soa_passthrough(struct toy_compiler *tc,
926 const struct tgsi_full_instruction *tgsi_inst,
927 struct toy_dst *dst_,
928 struct toy_src *src_)
929 {
930 const toy_tgsi_translate translate =
931 aos_translate_table[tgsi_inst->Instruction.Opcode];
932
933 translate(tc, tgsi_inst, dst_, src_);
934 }
935
936 static void
937 soa_per_channel(struct toy_compiler *tc,
938 const struct tgsi_full_instruction *tgsi_inst,
939 struct toy_dst *dst_,
940 struct toy_src *src_)
941 {
942 struct toy_dst dst[TGSI_FULL_MAX_DST_REGISTERS][4];
943 struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS][4];
944 int i, ch;
945
946 for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
947 tdst_transpose(dst_[i], dst[i]);
948 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
949 tsrc_transpose(src_[i], src[i]);
950
951 /* emit the same instruction four times for the four channels */
952 for (ch = 0; ch < 4; ch++) {
953 struct toy_dst aos_dst[TGSI_FULL_MAX_DST_REGISTERS];
954 struct toy_src aos_src[TGSI_FULL_MAX_SRC_REGISTERS];
955
956 for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
957 aos_dst[i] = dst[i][ch];
958 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
959 aos_src[i] = src[i][ch];
960
961 aos_translate_table[tgsi_inst->Instruction.Opcode](tc,
962 tgsi_inst, aos_dst, aos_src);
963 }
964 }
965
966 static void
967 soa_scalar_replicate(struct toy_compiler *tc,
968 const struct tgsi_full_instruction *tgsi_inst,
969 struct toy_dst *dst_,
970 struct toy_src *src_)
971 {
972 struct toy_dst dst0[4], tmp;
973 struct toy_src srcx[TGSI_FULL_MAX_SRC_REGISTERS];
974 int opcode, i;
975
976 assert(tgsi_inst->Instruction.NumDstRegs == 1);
977
978 tdst_transpose(dst_[0], dst0);
979 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
980 struct toy_src tmp[4];
981
982 tsrc_transpose(src_[i], tmp);
983 /* only the X channels */
984 srcx[i] = tmp[0];
985 }
986
987 tmp = tc_alloc_tmp(tc);
988
989 opcode = aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].opcode;
990 assert(opcode);
991
992 switch (tgsi_inst->Instruction.Opcode) {
993 case TGSI_OPCODE_RCP:
994 case TGSI_OPCODE_RSQ:
995 case TGSI_OPCODE_SQRT:
996 case TGSI_OPCODE_EX2:
997 case TGSI_OPCODE_LG2:
998 case TGSI_OPCODE_COS:
999 case TGSI_OPCODE_SIN:
1000 tc_add1(tc, opcode, tmp, srcx[0]);
1001 break;
1002 case TGSI_OPCODE_POW:
1003 tc_add2(tc, opcode, tmp, srcx[0], srcx[1]);
1004 break;
1005 default:
1006 assert(!"invalid soa_scalar_replicate() call");
1007 return;
1008 }
1009
1010 /* replicate the result */
1011 for (i = 0; i < 4; i++)
1012 tc_MOV(tc, dst0[i], tsrc_from(tmp));
1013 }
1014
1015 static void
1016 soa_dot_product(struct toy_compiler *tc,
1017 const struct tgsi_full_instruction *tgsi_inst,
1018 struct toy_dst *dst_,
1019 struct toy_src *src_)
1020 {
1021 struct toy_dst dst0[4], tmp;
1022 struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS][4];
1023 int i;
1024
1025 tdst_transpose(dst_[0], dst0);
1026 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
1027 tsrc_transpose(src_[i], src[i]);
1028
1029 tmp = tc_alloc_tmp(tc);
1030
1031 switch (tgsi_inst->Instruction.Opcode) {
1032 case TGSI_OPCODE_DP2:
1033 tc_MUL(tc, tmp, src[0][1], src[1][1]);
1034 tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
1035 break;
1036 case TGSI_OPCODE_DP2A:
1037 tc_MAC(tc, tmp, src[0][1], src[1][1], src[2][0]);
1038 tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
1039 break;
1040 case TGSI_OPCODE_DP3:
1041 tc_MUL(tc, tmp, src[0][2], src[1][2]);
1042 tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
1043 tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
1044 break;
1045 case TGSI_OPCODE_DPH:
1046 tc_MAC(tc, tmp, src[0][2], src[1][2], src[1][3]);
1047 tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
1048 tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
1049 break;
1050 case TGSI_OPCODE_DP4:
1051 tc_MUL(tc, tmp, src[0][3], src[1][3]);
1052 tc_MAC(tc, tmp, src[0][2], src[1][2], tsrc_from(tmp));
1053 tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
1054 tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
1055 break;
1056 default:
1057 assert(!"invalid soa_dot_product() call");
1058 return;
1059 }
1060
1061 for (i = 0; i < 4; i++)
1062 tc_MOV(tc, dst0[i], tsrc_from(tmp));
1063 }
1064
1065 static void
1066 soa_partial_derivative(struct toy_compiler *tc,
1067 const struct tgsi_full_instruction *tgsi_inst,
1068 struct toy_dst *dst_,
1069 struct toy_src *src_)
1070 {
1071 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_DDX)
1072 tc_add1(tc, TOY_OPCODE_DDX, dst_[0], src_[0]);
1073 else
1074 tc_add1(tc, TOY_OPCODE_DDY, dst_[0], src_[0]);
1075 }
1076
1077 static void
1078 soa_if(struct toy_compiler *tc,
1079 const struct tgsi_full_instruction *tgsi_inst,
1080 struct toy_dst *dst_,
1081 struct toy_src *src_)
1082 {
1083 struct toy_src src0[4];
1084
1085 assert(tsrc_is_swizzle1(src_[0]));
1086 tsrc_transpose(src_[0], src0);
1087
1088 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_IF)
1089 tc_IF(tc, tdst_null(), src0[0], tsrc_imm_f(0.0f), GEN6_COND_NZ);
1090 else
1091 tc_IF(tc, tdst_null(), src0[0], tsrc_imm_d(0), GEN6_COND_NZ);
1092 }
1093
1094 static void
1095 soa_LIT(struct toy_compiler *tc,
1096 const struct tgsi_full_instruction *tgsi_inst,
1097 struct toy_dst *dst_,
1098 struct toy_src *src_)
1099 {
1100 struct toy_inst *inst;
1101 struct toy_dst dst0[4];
1102 struct toy_src src0[4];
1103
1104 tdst_transpose(dst_[0], dst0);
1105 tsrc_transpose(src_[0], src0);
1106
1107 tc_MOV(tc, dst0[0], tsrc_imm_f(1.0f));
1108 tc_MOV(tc, dst0[1], src0[0]);
1109 tc_POW(tc, dst0[2], src0[1], src0[3]);
1110 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1111
1112 /*
1113 * POW is calculated first because math with pred_ctrl is broken here.
1114 * But, why?
1115 */
1116 tc_CMP(tc, tdst_null(), src0[0], tsrc_imm_f(0.0f), GEN6_COND_L);
1117 inst = tc_MOV(tc, dst0[1], tsrc_imm_f(0.0f));
1118 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
1119 inst = tc_MOV(tc, dst0[2], tsrc_imm_f(0.0f));
1120 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
1121 }
1122
1123 static void
1124 soa_EXP(struct toy_compiler *tc,
1125 const struct tgsi_full_instruction *tgsi_inst,
1126 struct toy_dst *dst_,
1127 struct toy_src *src_)
1128 {
1129 struct toy_dst dst0[4];
1130 struct toy_src src0[4];
1131
1132 assert(!"SoA EXP untested");
1133
1134 tdst_transpose(dst_[0], dst0);
1135 tsrc_transpose(src_[0], src0);
1136
1137 if (!tdst_is_null(dst0[0])) {
1138 struct toy_dst tmp = tdst_d(tc_alloc_tmp(tc));
1139
1140 tc_RNDD(tc, tmp, src0[0]);
1141
1142 /* construct the floating point number manually */
1143 tc_ADD(tc, tmp, tsrc_from(tmp), tsrc_imm_d(127));
1144 tc_SHL(tc, tdst_d(dst0[0]), tsrc_from(tmp), tsrc_imm_d(23));
1145 }
1146
1147 tc_FRC(tc, dst0[1], src0[0]);
1148 tc_EXP(tc, dst0[2], src0[0]);
1149 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1150 }
1151
1152 static void
1153 soa_LOG(struct toy_compiler *tc,
1154 const struct tgsi_full_instruction *tgsi_inst,
1155 struct toy_dst *dst_,
1156 struct toy_src *src_)
1157 {
1158 struct toy_dst dst0[4];
1159 struct toy_src src0[4];
1160
1161 assert(!"SoA LOG untested");
1162
1163 tdst_transpose(dst_[0], dst0);
1164 tsrc_transpose(src_[0], src0);
1165
1166 if (dst_[0].writemask & TOY_WRITEMASK_XY) {
1167 struct toy_dst tmp = tdst_d(tc_alloc_tmp(tc));
1168
1169 /* exponent */
1170 tc_SHR(tc, tmp, tsrc_absolute(tsrc_d(src0[0])), tsrc_imm_d(23));
1171 tc_ADD(tc, dst0[0], tsrc_from(tmp), tsrc_imm_d(-127));
1172
1173 /* mantissa */
1174 tc_AND(tc, tmp, tsrc_d(src0[0]), tsrc_imm_d((1 << 23) - 1));
1175 tc_OR(tc, dst0[1], tsrc_from(tmp), tsrc_imm_d(127 << 23));
1176 }
1177
1178 tc_LOG(tc, dst0[2], src0[0]);
1179 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1180 }
1181
1182 static void
1183 soa_DST(struct toy_compiler *tc,
1184 const struct tgsi_full_instruction *tgsi_inst,
1185 struct toy_dst *dst_,
1186 struct toy_src *src_)
1187 {
1188 struct toy_dst dst0[4];
1189 struct toy_src src[2][4];
1190
1191 tdst_transpose(dst_[0], dst0);
1192 tsrc_transpose(src_[0], src[0]);
1193 tsrc_transpose(src_[1], src[1]);
1194
1195 tc_MOV(tc, dst0[0], tsrc_imm_f(1.0f));
1196 tc_MUL(tc, dst0[1], src[0][1], src[1][1]);
1197 tc_MOV(tc, dst0[2], src[0][2]);
1198 tc_MOV(tc, dst0[3], src[1][3]);
1199 }
1200
1201 static void
1202 soa_XPD(struct toy_compiler *tc,
1203 const struct tgsi_full_instruction *tgsi_inst,
1204 struct toy_dst *dst_,
1205 struct toy_src *src_)
1206 {
1207 struct toy_dst dst0[4];
1208 struct toy_src src[2][4];
1209
1210 tdst_transpose(dst_[0], dst0);
1211 tsrc_transpose(src_[0], src[0]);
1212 tsrc_transpose(src_[1], src[1]);
1213
1214 /* dst.x = src0.y * src1.z - src1.y * src0.z */
1215 tc_MUL(tc, dst0[0], src[0][2], src[1][1]);
1216 tc_MAC(tc, dst0[0], src[0][1], src[1][2], tsrc_negate(tsrc_from(dst0[0])));
1217
1218 /* dst.y = src0.z * src1.x - src1.z * src0.x */
1219 tc_MUL(tc, dst0[1], src[0][0], src[1][2]);
1220 tc_MAC(tc, dst0[1], src[0][2], src[1][0], tsrc_negate(tsrc_from(dst0[1])));
1221
1222 /* dst.z = src0.x * src1.y - src1.x * src0.y */
1223 tc_MUL(tc, dst0[2], src[0][1], src[1][0]);
1224 tc_MAC(tc, dst0[2], src[0][0], src[1][1], tsrc_negate(tsrc_from(dst0[2])));
1225
1226 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1227 }
1228
1229 static void
1230 soa_PK2H(struct toy_compiler *tc,
1231 const struct tgsi_full_instruction *tgsi_inst,
1232 struct toy_dst *dst_,
1233 struct toy_src *src_)
1234 {
1235 struct toy_dst tmp = tdst_ud(tc_alloc_tmp(tc));
1236 struct toy_dst dst0[4];
1237 struct toy_src src0[4];
1238 int i;
1239
1240 assert(!"SoA PK2H untested");
1241
1242 tdst_transpose(dst_[0], dst0);
1243 tsrc_transpose(src_[0], src0);
1244
1245 tc_SHL(tc, tmp, src0[1], tsrc_imm_ud(16));
1246 tc_OR(tc, tmp, src0[0], tsrc_from(tmp));
1247
1248 for (i = 0; i < 4; i++)
1249 tc_MOV(tc, dst0[i], tsrc_from(tmp));
1250 }
1251
1252 static void
1253 soa_UP2H(struct toy_compiler *tc,
1254 const struct tgsi_full_instruction *tgsi_inst,
1255 struct toy_dst *dst_,
1256 struct toy_src *src_)
1257 {
1258 struct toy_dst dst0[4];
1259 struct toy_src src0[4];
1260
1261 assert(!"SoA UP2H untested");
1262
1263 tdst_transpose(dst_[0], dst0);
1264 tsrc_transpose(src_[0], src0);
1265
1266 tc_AND(tc, tdst_ud(dst0[0]), tsrc_ud(src0[0]), tsrc_imm_ud(0xffff));
1267 tc_SHR(tc, tdst_ud(dst0[1]), tsrc_ud(src0[1]), tsrc_imm_ud(16));
1268 tc_AND(tc, tdst_ud(dst0[2]), tsrc_ud(src0[2]), tsrc_imm_ud(0xffff));
1269 tc_SHR(tc, tdst_ud(dst0[3]), tsrc_ud(src0[3]), tsrc_imm_ud(16));
1270
1271 }
1272
1273 static void
1274 soa_SCS(struct toy_compiler *tc,
1275 const struct tgsi_full_instruction *tgsi_inst,
1276 struct toy_dst *dst_,
1277 struct toy_src *src_)
1278 {
1279 struct toy_dst dst0[4];
1280 struct toy_src src0[4];
1281
1282 tdst_transpose(dst_[0], dst0);
1283 tsrc_transpose(src_[0], src0);
1284
1285 tc_add1(tc, TOY_OPCODE_COS, dst0[0], src0[0]);
1286 tc_add1(tc, TOY_OPCODE_SIN, dst0[1], src0[0]);
1287 tc_MOV(tc, dst0[2], tsrc_imm_f(0.0f));
1288 tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1289 }
1290
1291 static void
1292 soa_unsupported(struct toy_compiler *tc,
1293 const struct tgsi_full_instruction *tgsi_inst,
1294 struct toy_dst *dst_,
1295 struct toy_src *src_)
1296 {
1297 const struct tgsi_opcode_info *info =
1298 tgsi_get_opcode_info(tgsi_inst->Instruction.Opcode);
1299
1300 ilo_warn("unsupported TGSI opcode in SoA form: TGSI_OPCODE_%s\n",
1301 info->mnemonic);
1302
1303 tc_fail(tc, "unsupported TGSI instruction in SoA form");
1304 }
1305
1306 static const toy_tgsi_translate soa_translate_table[TGSI_OPCODE_LAST] = {
1307 [TGSI_OPCODE_ARL] = soa_per_channel,
1308 [TGSI_OPCODE_MOV] = soa_per_channel,
1309 [TGSI_OPCODE_LIT] = soa_LIT,
1310 [TGSI_OPCODE_RCP] = soa_scalar_replicate,
1311 [TGSI_OPCODE_RSQ] = soa_scalar_replicate,
1312 [TGSI_OPCODE_EXP] = soa_EXP,
1313 [TGSI_OPCODE_LOG] = soa_LOG,
1314 [TGSI_OPCODE_MUL] = soa_per_channel,
1315 [TGSI_OPCODE_ADD] = soa_per_channel,
1316 [TGSI_OPCODE_DP3] = soa_dot_product,
1317 [TGSI_OPCODE_DP4] = soa_dot_product,
1318 [TGSI_OPCODE_DST] = soa_DST,
1319 [TGSI_OPCODE_MIN] = soa_per_channel,
1320 [TGSI_OPCODE_MAX] = soa_per_channel,
1321 [TGSI_OPCODE_SLT] = soa_per_channel,
1322 [TGSI_OPCODE_SGE] = soa_per_channel,
1323 [TGSI_OPCODE_MAD] = soa_per_channel,
1324 [TGSI_OPCODE_SUB] = soa_per_channel,
1325 [TGSI_OPCODE_LRP] = soa_per_channel,
1326 [TGSI_OPCODE_SQRT] = soa_scalar_replicate,
1327 [TGSI_OPCODE_DP2A] = soa_dot_product,
1328 [TGSI_OPCODE_FRC] = soa_per_channel,
1329 [TGSI_OPCODE_CLAMP] = soa_per_channel,
1330 [TGSI_OPCODE_FLR] = soa_per_channel,
1331 [TGSI_OPCODE_ROUND] = soa_per_channel,
1332 [TGSI_OPCODE_EX2] = soa_scalar_replicate,
1333 [TGSI_OPCODE_LG2] = soa_scalar_replicate,
1334 [TGSI_OPCODE_POW] = soa_scalar_replicate,
1335 [TGSI_OPCODE_XPD] = soa_XPD,
1336 [TGSI_OPCODE_ABS] = soa_per_channel,
1337 [TGSI_OPCODE_DPH] = soa_dot_product,
1338 [TGSI_OPCODE_COS] = soa_scalar_replicate,
1339 [TGSI_OPCODE_DDX] = soa_partial_derivative,
1340 [TGSI_OPCODE_DDY] = soa_partial_derivative,
1341 [TGSI_OPCODE_KILL] = soa_passthrough,
1342 [TGSI_OPCODE_PK2H] = soa_PK2H,
1343 [TGSI_OPCODE_PK2US] = soa_unsupported,
1344 [TGSI_OPCODE_PK4B] = soa_unsupported,
1345 [TGSI_OPCODE_PK4UB] = soa_unsupported,
1346 [TGSI_OPCODE_SEQ] = soa_per_channel,
1347 [TGSI_OPCODE_SGT] = soa_per_channel,
1348 [TGSI_OPCODE_SIN] = soa_scalar_replicate,
1349 [TGSI_OPCODE_SLE] = soa_per_channel,
1350 [TGSI_OPCODE_SNE] = soa_per_channel,
1351 [TGSI_OPCODE_TEX] = soa_passthrough,
1352 [TGSI_OPCODE_TXD] = soa_passthrough,
1353 [TGSI_OPCODE_TXP] = soa_passthrough,
1354 [TGSI_OPCODE_UP2H] = soa_UP2H,
1355 [TGSI_OPCODE_UP2US] = soa_unsupported,
1356 [TGSI_OPCODE_UP4B] = soa_unsupported,
1357 [TGSI_OPCODE_UP4UB] = soa_unsupported,
1358 [TGSI_OPCODE_ARR] = soa_per_channel,
1359 [TGSI_OPCODE_CAL] = soa_unsupported,
1360 [TGSI_OPCODE_RET] = soa_unsupported,
1361 [TGSI_OPCODE_SSG] = soa_per_channel,
1362 [TGSI_OPCODE_CMP] = soa_per_channel,
1363 [TGSI_OPCODE_SCS] = soa_SCS,
1364 [TGSI_OPCODE_TXB] = soa_passthrough,
1365 [TGSI_OPCODE_DIV] = soa_per_channel,
1366 [TGSI_OPCODE_DP2] = soa_dot_product,
1367 [TGSI_OPCODE_TXL] = soa_passthrough,
1368 [TGSI_OPCODE_BRK] = soa_passthrough,
1369 [TGSI_OPCODE_IF] = soa_if,
1370 [TGSI_OPCODE_UIF] = soa_if,
1371 [TGSI_OPCODE_ELSE] = soa_passthrough,
1372 [TGSI_OPCODE_ENDIF] = soa_passthrough,
1373 [TGSI_OPCODE_PUSHA] = soa_unsupported,
1374 [TGSI_OPCODE_POPA] = soa_unsupported,
1375 [TGSI_OPCODE_CEIL] = soa_per_channel,
1376 [TGSI_OPCODE_I2F] = soa_per_channel,
1377 [TGSI_OPCODE_NOT] = soa_per_channel,
1378 [TGSI_OPCODE_TRUNC] = soa_per_channel,
1379 [TGSI_OPCODE_SHL] = soa_per_channel,
1380 [TGSI_OPCODE_AND] = soa_per_channel,
1381 [TGSI_OPCODE_OR] = soa_per_channel,
1382 [TGSI_OPCODE_MOD] = soa_per_channel,
1383 [TGSI_OPCODE_XOR] = soa_per_channel,
1384 [TGSI_OPCODE_SAD] = soa_per_channel,
1385 [TGSI_OPCODE_TXF] = soa_passthrough,
1386 [TGSI_OPCODE_TXQ] = soa_passthrough,
1387 [TGSI_OPCODE_CONT] = soa_passthrough,
1388 [TGSI_OPCODE_EMIT] = soa_unsupported,
1389 [TGSI_OPCODE_ENDPRIM] = soa_unsupported,
1390 [TGSI_OPCODE_BGNLOOP] = soa_passthrough,
1391 [TGSI_OPCODE_BGNSUB] = soa_unsupported,
1392 [TGSI_OPCODE_ENDLOOP] = soa_passthrough,
1393 [TGSI_OPCODE_ENDSUB] = soa_unsupported,
1394 [TGSI_OPCODE_TXQ_LZ] = soa_passthrough,
1395 [TGSI_OPCODE_NOP] = soa_passthrough,
1396 [TGSI_OPCODE_FSEQ] = soa_per_channel,
1397 [TGSI_OPCODE_FSGE] = soa_per_channel,
1398 [TGSI_OPCODE_FSLT] = soa_per_channel,
1399 [TGSI_OPCODE_FSNE] = soa_per_channel,
1400 [TGSI_OPCODE_CALLNZ] = soa_unsupported,
1401 [TGSI_OPCODE_BREAKC] = soa_unsupported,
1402 [TGSI_OPCODE_KILL_IF] = soa_passthrough,
1403 [TGSI_OPCODE_END] = soa_passthrough,
1404 [TGSI_OPCODE_F2I] = soa_per_channel,
1405 [TGSI_OPCODE_IDIV] = soa_per_channel,
1406 [TGSI_OPCODE_IMAX] = soa_per_channel,
1407 [TGSI_OPCODE_IMIN] = soa_per_channel,
1408 [TGSI_OPCODE_INEG] = soa_per_channel,
1409 [TGSI_OPCODE_ISGE] = soa_per_channel,
1410 [TGSI_OPCODE_ISHR] = soa_per_channel,
1411 [TGSI_OPCODE_ISLT] = soa_per_channel,
1412 [TGSI_OPCODE_F2U] = soa_per_channel,
1413 [TGSI_OPCODE_U2F] = soa_per_channel,
1414 [TGSI_OPCODE_UADD] = soa_per_channel,
1415 [TGSI_OPCODE_UDIV] = soa_per_channel,
1416 [TGSI_OPCODE_UMAD] = soa_per_channel,
1417 [TGSI_OPCODE_UMAX] = soa_per_channel,
1418 [TGSI_OPCODE_UMIN] = soa_per_channel,
1419 [TGSI_OPCODE_UMOD] = soa_per_channel,
1420 [TGSI_OPCODE_UMUL] = soa_per_channel,
1421 [TGSI_OPCODE_USEQ] = soa_per_channel,
1422 [TGSI_OPCODE_USGE] = soa_per_channel,
1423 [TGSI_OPCODE_USHR] = soa_per_channel,
1424 [TGSI_OPCODE_USLT] = soa_per_channel,
1425 [TGSI_OPCODE_USNE] = soa_per_channel,
1426 [TGSI_OPCODE_SWITCH] = soa_unsupported,
1427 [TGSI_OPCODE_CASE] = soa_unsupported,
1428 [TGSI_OPCODE_DEFAULT] = soa_unsupported,
1429 [TGSI_OPCODE_ENDSWITCH] = soa_unsupported,
1430 [TGSI_OPCODE_SAMPLE] = soa_passthrough,
1431 [TGSI_OPCODE_SAMPLE_I] = soa_passthrough,
1432 [TGSI_OPCODE_SAMPLE_I_MS] = soa_passthrough,
1433 [TGSI_OPCODE_SAMPLE_B] = soa_passthrough,
1434 [TGSI_OPCODE_SAMPLE_C] = soa_passthrough,
1435 [TGSI_OPCODE_SAMPLE_C_LZ] = soa_passthrough,
1436 [TGSI_OPCODE_SAMPLE_D] = soa_passthrough,
1437 [TGSI_OPCODE_SAMPLE_L] = soa_passthrough,
1438 [TGSI_OPCODE_GATHER4] = soa_passthrough,
1439 [TGSI_OPCODE_SVIEWINFO] = soa_passthrough,
1440 [TGSI_OPCODE_SAMPLE_POS] = soa_passthrough,
1441 [TGSI_OPCODE_SAMPLE_INFO] = soa_passthrough,
1442 [TGSI_OPCODE_UARL] = soa_per_channel,
1443 [TGSI_OPCODE_UCMP] = soa_per_channel,
1444 [TGSI_OPCODE_IABS] = soa_per_channel,
1445 [TGSI_OPCODE_ISSG] = soa_per_channel,
1446 [TGSI_OPCODE_LOAD] = soa_unsupported,
1447 [TGSI_OPCODE_STORE] = soa_unsupported,
1448 [TGSI_OPCODE_MFENCE] = soa_unsupported,
1449 [TGSI_OPCODE_LFENCE] = soa_unsupported,
1450 [TGSI_OPCODE_SFENCE] = soa_unsupported,
1451 [TGSI_OPCODE_BARRIER] = soa_unsupported,
1452 [TGSI_OPCODE_ATOMUADD] = soa_unsupported,
1453 [TGSI_OPCODE_ATOMXCHG] = soa_unsupported,
1454 [TGSI_OPCODE_ATOMCAS] = soa_unsupported,
1455 [TGSI_OPCODE_ATOMAND] = soa_unsupported,
1456 [TGSI_OPCODE_ATOMOR] = soa_unsupported,
1457 [TGSI_OPCODE_ATOMXOR] = soa_unsupported,
1458 [TGSI_OPCODE_ATOMUMIN] = soa_unsupported,
1459 [TGSI_OPCODE_ATOMUMAX] = soa_unsupported,
1460 [TGSI_OPCODE_ATOMIMIN] = soa_unsupported,
1461 [TGSI_OPCODE_ATOMIMAX] = soa_unsupported,
1462 [TGSI_OPCODE_TEX2] = soa_passthrough,
1463 [TGSI_OPCODE_TXB2] = soa_passthrough,
1464 [TGSI_OPCODE_TXL2] = soa_passthrough,
1465 };
1466
1467 static bool
1468 ra_dst_is_indirect(const struct tgsi_full_dst_register *d)
1469 {
1470 return (d->Register.Indirect ||
1471 (d->Register.Dimension && d->Dimension.Indirect));
1472 }
1473
1474 static int
1475 ra_dst_index(const struct tgsi_full_dst_register *d)
1476 {
1477 assert(!d->Register.Indirect);
1478 return d->Register.Index;
1479 }
1480
1481 static int
1482 ra_dst_dimension(const struct tgsi_full_dst_register *d)
1483 {
1484 if (d->Register.Dimension) {
1485 assert(!d->Dimension.Indirect);
1486 return d->Dimension.Index;
1487 }
1488 else {
1489 return 0;
1490 }
1491 }
1492
1493 static bool
1494 ra_is_src_indirect(const struct tgsi_full_src_register *s)
1495 {
1496 return (s->Register.Indirect ||
1497 (s->Register.Dimension && s->Dimension.Indirect));
1498 }
1499
1500 static int
1501 ra_src_index(const struct tgsi_full_src_register *s)
1502 {
1503 assert(!s->Register.Indirect);
1504 return s->Register.Index;
1505 }
1506
1507 static int
1508 ra_src_dimension(const struct tgsi_full_src_register *s)
1509 {
1510 if (s->Register.Dimension) {
1511 assert(!s->Dimension.Indirect);
1512 return s->Dimension.Index;
1513 }
1514 else {
1515 return 0;
1516 }
1517 }
1518
1519 /**
1520 * Infer the type of either the sources or the destination.
1521 */
1522 static enum toy_type
1523 ra_infer_opcode_type(int tgsi_opcode, bool is_dst)
1524 {
1525 enum tgsi_opcode_type type;
1526
1527 if (is_dst)
1528 type = tgsi_opcode_infer_dst_type(tgsi_opcode);
1529 else
1530 type = tgsi_opcode_infer_src_type(tgsi_opcode);
1531
1532 switch (type) {
1533 case TGSI_TYPE_UNSIGNED:
1534 return TOY_TYPE_UD;
1535 case TGSI_TYPE_SIGNED:
1536 return TOY_TYPE_D;
1537 case TGSI_TYPE_FLOAT:
1538 return TOY_TYPE_F;
1539 case TGSI_TYPE_UNTYPED:
1540 case TGSI_TYPE_VOID:
1541 case TGSI_TYPE_DOUBLE:
1542 default:
1543 assert(!"unsupported TGSI type");
1544 return TOY_TYPE_UD;
1545 }
1546 }
1547
1548 /**
1549 * Return the type of an operand of the specified instruction.
1550 */
1551 static enum toy_type
1552 ra_get_type(struct toy_tgsi *tgsi, const struct tgsi_full_instruction *tgsi_inst,
1553 int operand, bool is_dst)
1554 {
1555 enum toy_type type;
1556 enum tgsi_file_type file;
1557
1558 /* we need to look at both src and dst for MOV */
1559 /* XXX it should not be this complex */
1560 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_MOV) {
1561 const enum tgsi_file_type dst_file = tgsi_inst->Dst[0].Register.File;
1562 const enum tgsi_file_type src_file = tgsi_inst->Src[0].Register.File;
1563
1564 if (dst_file == TGSI_FILE_ADDRESS || src_file == TGSI_FILE_ADDRESS) {
1565 type = TOY_TYPE_D;
1566 }
1567 else if (src_file == TGSI_FILE_IMMEDIATE &&
1568 !tgsi_inst->Src[0].Register.Indirect) {
1569 const int src_idx = tgsi_inst->Src[0].Register.Index;
1570 type = tgsi->imm_data.types[src_idx];
1571 }
1572 else {
1573 /* this is the best we can do */
1574 type = TOY_TYPE_F;
1575 }
1576
1577 return type;
1578 }
1579 else if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_UCMP) {
1580 if (!is_dst && operand == 0)
1581 type = TOY_TYPE_UD;
1582 else
1583 type = TOY_TYPE_F;
1584
1585 return type;
1586 }
1587
1588 type = ra_infer_opcode_type(tgsi_inst->Instruction.Opcode, is_dst);
1589
1590 /* fix the type */
1591 file = (is_dst) ?
1592 tgsi_inst->Dst[operand].Register.File :
1593 tgsi_inst->Src[operand].Register.File;
1594 switch (file) {
1595 case TGSI_FILE_SAMPLER:
1596 case TGSI_FILE_IMAGE:
1597 case TGSI_FILE_SAMPLER_VIEW:
1598 type = TOY_TYPE_D;
1599 break;
1600 case TGSI_FILE_ADDRESS:
1601 assert(type == TOY_TYPE_D);
1602 break;
1603 default:
1604 break;
1605 }
1606
1607 return type;
1608 }
1609
1610 /**
1611 * Allocate a VRF register.
1612 */
1613 static int
1614 ra_alloc_reg(struct toy_tgsi *tgsi, enum tgsi_file_type file)
1615 {
1616 const int count = (tgsi->aos) ? 1 : 4;
1617 return tc_alloc_vrf(tgsi->tc, count);
1618 }
1619
1620 /**
1621 * Construct the key for VRF mapping look-up.
1622 */
1623 static void *
1624 ra_get_map_key(enum tgsi_file_type file, unsigned dim, unsigned index)
1625 {
1626 intptr_t key;
1627
1628 /* this is ugly... */
1629 assert(file < 1 << 4);
1630 assert(dim < 1 << 12);
1631 assert(index < 1 << 16);
1632 key = (file << 28) | (dim << 16) | index;
1633
1634 return intptr_to_pointer(key);
1635 }
1636
1637 /**
1638 * Map a TGSI register to a VRF register.
1639 */
1640 static int
1641 ra_map_reg(struct toy_tgsi *tgsi, enum tgsi_file_type file,
1642 int dim, int index, bool *is_new)
1643 {
1644 void *key, *val;
1645 intptr_t vrf;
1646
1647 key = ra_get_map_key(file, dim, index);
1648
1649 /*
1650 * because we allocate vrf from 1 and on, val is never NULL as long as the
1651 * key exists
1652 */
1653 val = util_hash_table_get(tgsi->reg_mapping, key);
1654 if (val) {
1655 vrf = pointer_to_intptr(val);
1656
1657 if (is_new)
1658 *is_new = false;
1659 }
1660 else {
1661 vrf = (intptr_t) ra_alloc_reg(tgsi, file);
1662
1663 /* add to the mapping */
1664 val = intptr_to_pointer(vrf);
1665 util_hash_table_set(tgsi->reg_mapping, key, val);
1666
1667 if (is_new)
1668 *is_new = true;
1669 }
1670
1671 return (int) vrf;
1672 }
1673
1674 /**
1675 * Return true if the destination aliases any of the sources.
1676 */
1677 static bool
1678 ra_dst_is_aliasing(const struct tgsi_full_instruction *tgsi_inst, int dst_index)
1679 {
1680 const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[dst_index];
1681 int i;
1682
1683 /* we need a scratch register for indirect dst anyway */
1684 if (ra_dst_is_indirect(d))
1685 return true;
1686
1687 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
1688 const struct tgsi_full_src_register *s = &tgsi_inst->Src[i];
1689
1690 if (s->Register.File != d->Register.File)
1691 continue;
1692
1693 /*
1694 * we can go on to check dimension and index respectively, but
1695 * keep it simple for now
1696 */
1697 if (ra_is_src_indirect(s))
1698 return true;
1699 if (ra_src_dimension(s) == ra_dst_dimension(d) &&
1700 ra_src_index(s) == ra_dst_index(d))
1701 return true;
1702 }
1703
1704 return false;
1705 }
1706
1707 /**
1708 * Return the toy register for a TGSI destination operand.
1709 */
1710 static struct toy_dst
1711 ra_get_dst(struct toy_tgsi *tgsi,
1712 const struct tgsi_full_instruction *tgsi_inst, int dst_index,
1713 bool *is_scratch)
1714 {
1715 const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[dst_index];
1716 bool need_vrf = false;
1717 struct toy_dst dst;
1718
1719 switch (d->Register.File) {
1720 case TGSI_FILE_NULL:
1721 dst = tdst_null();
1722 break;
1723 case TGSI_FILE_OUTPUT:
1724 case TGSI_FILE_TEMPORARY:
1725 case TGSI_FILE_ADDRESS:
1726 case TGSI_FILE_PREDICATE:
1727 need_vrf = true;
1728 break;
1729 default:
1730 assert(!"unhandled dst file");
1731 dst = tdst_null();
1732 break;
1733 }
1734
1735 if (need_vrf) {
1736 /* XXX we do not always need a scratch given the conditions... */
1737 const bool need_scratch =
1738 (ra_dst_is_indirect(d) || ra_dst_is_aliasing(tgsi_inst, dst_index) ||
1739 tgsi_inst->Instruction.Saturate);
1740 const enum toy_type type = ra_get_type(tgsi, tgsi_inst, dst_index, true);
1741 int vrf;
1742
1743 if (need_scratch) {
1744 vrf = ra_alloc_reg(tgsi, d->Register.File);
1745 }
1746 else {
1747 vrf = ra_map_reg(tgsi, d->Register.File,
1748 ra_dst_dimension(d), ra_dst_index(d), NULL);
1749 }
1750
1751 if (is_scratch)
1752 *is_scratch = need_scratch;
1753
1754 dst = tdst_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
1755 false, 0, d->Register.WriteMask, vrf * TOY_REG_WIDTH);
1756 }
1757
1758 return dst;
1759 }
1760
1761 static struct toy_src
1762 ra_get_src_for_vrf(const struct tgsi_full_src_register *s,
1763 enum toy_type type, int vrf)
1764 {
1765 return tsrc_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
1766 false, 0,
1767 s->Register.SwizzleX, s->Register.SwizzleY,
1768 s->Register.SwizzleZ, s->Register.SwizzleW,
1769 s->Register.Absolute, s->Register.Negate,
1770 vrf * TOY_REG_WIDTH);
1771 }
1772
1773 static int
1774 init_tgsi_reg(struct toy_tgsi *tgsi, struct toy_inst *inst,
1775 enum tgsi_file_type file, int index,
1776 const struct tgsi_ind_register *indirect,
1777 const struct tgsi_dimension *dimension,
1778 const struct tgsi_ind_register *dim_indirect)
1779 {
1780 struct toy_src src;
1781 int num_src = 0;
1782
1783 /* src[0]: TGSI file */
1784 inst->src[num_src++] = tsrc_imm_d(file);
1785
1786 /* src[1]: TGSI dimension */
1787 inst->src[num_src++] = tsrc_imm_d((dimension) ? dimension->Index : 0);
1788
1789 /* src[2]: TGSI dimension indirection */
1790 if (dim_indirect) {
1791 const int vrf = ra_map_reg(tgsi, dim_indirect->File, 0,
1792 dim_indirect->Index, NULL);
1793
1794 src = tsrc(TOY_FILE_VRF, vrf, 0);
1795 src = tsrc_swizzle1(tsrc_d(src), indirect->Swizzle);
1796 }
1797 else {
1798 src = tsrc_imm_d(0);
1799 }
1800
1801 inst->src[num_src++] = src;
1802
1803 /* src[3]: TGSI index */
1804 inst->src[num_src++] = tsrc_imm_d(index);
1805
1806 /* src[4]: TGSI index indirection */
1807 if (indirect) {
1808 const int vrf = ra_map_reg(tgsi, indirect->File, 0,
1809 indirect->Index, NULL);
1810
1811 src = tsrc(TOY_FILE_VRF, vrf, 0);
1812 src = tsrc_swizzle1(tsrc_d(src), indirect->Swizzle);
1813 }
1814 else {
1815 src = tsrc_imm_d(0);
1816 }
1817
1818 inst->src[num_src++] = src;
1819
1820 return num_src;
1821 }
1822
1823 static struct toy_src
1824 ra_get_src_indirect(struct toy_tgsi *tgsi,
1825 const struct tgsi_full_instruction *tgsi_inst,
1826 int src_index)
1827 {
1828 const struct tgsi_full_src_register *s = &tgsi_inst->Src[src_index];
1829 bool need_vrf = false, is_resource = false;
1830 struct toy_src src;
1831
1832 switch (s->Register.File) {
1833 case TGSI_FILE_NULL:
1834 src = tsrc_null();
1835 break;
1836 case TGSI_FILE_SAMPLER:
1837 case TGSI_FILE_IMAGE:
1838 case TGSI_FILE_SAMPLER_VIEW:
1839 is_resource = true;
1840 /* fall through */
1841 case TGSI_FILE_CONSTANT:
1842 case TGSI_FILE_INPUT:
1843 case TGSI_FILE_SYSTEM_VALUE:
1844 case TGSI_FILE_TEMPORARY:
1845 case TGSI_FILE_ADDRESS:
1846 case TGSI_FILE_IMMEDIATE:
1847 case TGSI_FILE_PREDICATE:
1848 need_vrf = true;
1849 break;
1850 default:
1851 assert(!"unhandled src file");
1852 src = tsrc_null();
1853 break;
1854 }
1855
1856 if (need_vrf) {
1857 const enum toy_type type = ra_get_type(tgsi, tgsi_inst, src_index, false);
1858 int vrf;
1859
1860 if (is_resource) {
1861 assert(!s->Register.Dimension);
1862 assert(s->Register.Indirect);
1863
1864 vrf = ra_map_reg(tgsi, s->Indirect.File, 0, s->Indirect.Index, NULL);
1865 }
1866 else {
1867 vrf = ra_alloc_reg(tgsi, s->Register.File);
1868 }
1869
1870 src = ra_get_src_for_vrf(s, type, vrf);
1871
1872 /* emit indirect fetch */
1873 if (!is_resource) {
1874 struct toy_inst *inst;
1875
1876 inst = tc_add(tgsi->tc);
1877 inst->opcode = TOY_OPCODE_TGSI_INDIRECT_FETCH;
1878 inst->dst = tdst_from(src);
1879 inst->dst.writemask = TOY_WRITEMASK_XYZW;
1880
1881 init_tgsi_reg(tgsi, inst, s->Register.File, s->Register.Index,
1882 (s->Register.Indirect) ? &s->Indirect : NULL,
1883 (s->Register.Dimension) ? &s->Dimension : NULL,
1884 (s->Dimension.Indirect) ? &s->DimIndirect : NULL);
1885 }
1886 }
1887
1888 return src;
1889 }
1890
1891 /**
1892 * Return the toy register for a TGSI source operand.
1893 */
1894 static struct toy_src
1895 ra_get_src(struct toy_tgsi *tgsi,
1896 const struct tgsi_full_instruction *tgsi_inst,
1897 int src_index)
1898 {
1899 const struct tgsi_full_src_register *s = &tgsi_inst->Src[src_index];
1900 bool need_vrf = false;
1901 struct toy_src src;
1902
1903 if (ra_is_src_indirect(s))
1904 return ra_get_src_indirect(tgsi, tgsi_inst, src_index);
1905
1906 switch (s->Register.File) {
1907 case TGSI_FILE_NULL:
1908 src = tsrc_null();
1909 break;
1910 case TGSI_FILE_CONSTANT:
1911 case TGSI_FILE_INPUT:
1912 case TGSI_FILE_SYSTEM_VALUE:
1913 need_vrf = true;
1914 break;
1915 case TGSI_FILE_TEMPORARY:
1916 case TGSI_FILE_ADDRESS:
1917 case TGSI_FILE_PREDICATE:
1918 need_vrf = true;
1919 break;
1920 case TGSI_FILE_SAMPLER:
1921 case TGSI_FILE_IMAGE:
1922 case TGSI_FILE_SAMPLER_VIEW:
1923 assert(!s->Register.Dimension);
1924 src = tsrc_imm_d(s->Register.Index);
1925 break;
1926 case TGSI_FILE_IMMEDIATE:
1927 {
1928 const uint32_t *imm;
1929 enum toy_type imm_type;
1930 bool is_scalar;
1931
1932 imm = toy_tgsi_get_imm(tgsi, s->Register.Index, &imm_type);
1933
1934 is_scalar =
1935 (imm[s->Register.SwizzleX] == imm[s->Register.SwizzleY] &&
1936 imm[s->Register.SwizzleX] == imm[s->Register.SwizzleZ] &&
1937 imm[s->Register.SwizzleX] == imm[s->Register.SwizzleW]);
1938
1939 if (is_scalar) {
1940 const enum toy_type type =
1941 ra_get_type(tgsi, tgsi_inst, src_index, false);
1942
1943 /* ignore imm_type */
1944 src = tsrc_imm_ud(imm[s->Register.SwizzleX]);
1945 src.type = type;
1946 src.absolute = s->Register.Absolute;
1947 src.negate = s->Register.Negate;
1948 }
1949 else {
1950 need_vrf = true;
1951 }
1952 }
1953 break;
1954 default:
1955 assert(!"unhandled src file");
1956 src = tsrc_null();
1957 break;
1958 }
1959
1960 if (need_vrf) {
1961 const enum toy_type type = ra_get_type(tgsi, tgsi_inst, src_index, false);
1962 bool is_new;
1963 int vrf;
1964
1965 vrf = ra_map_reg(tgsi, s->Register.File,
1966 ra_src_dimension(s), ra_src_index(s), &is_new);
1967
1968 src = ra_get_src_for_vrf(s, type, vrf);
1969
1970 if (is_new) {
1971 switch (s->Register.File) {
1972 case TGSI_FILE_TEMPORARY:
1973 case TGSI_FILE_ADDRESS:
1974 case TGSI_FILE_PREDICATE:
1975 {
1976 struct toy_dst dst = tdst_from(src);
1977 dst.writemask = TOY_WRITEMASK_XYZW;
1978
1979 /* always initialize registers before use */
1980 if (tgsi->aos) {
1981 tc_MOV(tgsi->tc, dst, tsrc_type(tsrc_imm_d(0), type));
1982 }
1983 else {
1984 struct toy_dst tdst[4];
1985 int i;
1986
1987 tdst_transpose(dst, tdst);
1988
1989 for (i = 0; i < 4; i++) {
1990 tc_MOV(tgsi->tc, tdst[i],
1991 tsrc_type(tsrc_imm_d(0), type));
1992 }
1993 }
1994 }
1995 break;
1996 default:
1997 break;
1998 }
1999 }
2000
2001 }
2002
2003 return src;
2004 }
2005
2006 static void
2007 parse_instruction(struct toy_tgsi *tgsi,
2008 const struct tgsi_full_instruction *tgsi_inst)
2009 {
2010 struct toy_dst dst[TGSI_FULL_MAX_DST_REGISTERS];
2011 struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS];
2012 bool dst_is_scratch[TGSI_FULL_MAX_DST_REGISTERS];
2013 toy_tgsi_translate translate;
2014 int i;
2015
2016 /* convert TGSI registers to toy registers */
2017 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
2018 src[i] = ra_get_src(tgsi, tgsi_inst, i);
2019 for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
2020 dst[i] = ra_get_dst(tgsi, tgsi_inst, i, &dst_is_scratch[i]);
2021
2022 /* translate the instruction */
2023 translate = tgsi->translate_table[tgsi_inst->Instruction.Opcode];
2024 if (!translate) {
2025 if (tgsi->translate_table == soa_translate_table)
2026 soa_unsupported(tgsi->tc, tgsi_inst, dst, src);
2027 else
2028 aos_unsupported(tgsi->tc, tgsi_inst, dst, src);
2029 }
2030 translate(tgsi->tc, tgsi_inst, dst, src);
2031
2032 /* write the result to the real destinations if needed */
2033 for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) {
2034 const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i];
2035
2036 if (!dst_is_scratch[i])
2037 continue;
2038
2039 tgsi->tc->templ.saturate = tgsi_inst->Instruction.Saturate;
2040
2041 /* emit indirect store */
2042 if (ra_dst_is_indirect(d)) {
2043 struct toy_inst *inst;
2044
2045 inst = tc_add(tgsi->tc);
2046 inst->opcode = TOY_OPCODE_TGSI_INDIRECT_STORE;
2047 inst->dst = dst[i];
2048
2049 init_tgsi_reg(tgsi, inst, d->Register.File, d->Register.Index,
2050 (d->Register.Indirect) ? &d->Indirect : NULL,
2051 (d->Register.Dimension) ? &d->Dimension : NULL,
2052 (d->Dimension.Indirect) ? &d->DimIndirect : NULL);
2053 }
2054 else {
2055 const enum toy_type type = ra_get_type(tgsi, tgsi_inst, i, true);
2056 struct toy_dst real_dst;
2057 int vrf;
2058
2059 vrf = ra_map_reg(tgsi, d->Register.File,
2060 ra_dst_dimension(d), ra_dst_index(d), NULL);
2061 real_dst = tdst_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
2062 false, 0, d->Register.WriteMask, vrf * TOY_REG_WIDTH);
2063
2064 if (tgsi->aos) {
2065 tc_MOV(tgsi->tc, real_dst, tsrc_from(dst[i]));
2066 }
2067 else {
2068 struct toy_dst tdst[4];
2069 struct toy_src tsrc[4];
2070 int j;
2071
2072 tdst_transpose(real_dst, tdst);
2073 tsrc_transpose(tsrc_from(dst[i]), tsrc);
2074
2075 for (j = 0; j < 4; j++)
2076 tc_MOV(tgsi->tc, tdst[j], tsrc[j]);
2077 }
2078 }
2079
2080 tgsi->tc->templ.saturate = false;
2081 }
2082
2083 switch (tgsi_inst->Instruction.Opcode) {
2084 case TGSI_OPCODE_KILL_IF:
2085 case TGSI_OPCODE_KILL:
2086 tgsi->uses_kill = true;
2087 break;
2088 }
2089
2090 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
2091 const struct tgsi_full_src_register *s = &tgsi_inst->Src[i];
2092 if (s->Register.File == TGSI_FILE_CONSTANT && s->Register.Indirect)
2093 tgsi->const_indirect = true;
2094 }
2095
2096 /* remember channels written */
2097 for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) {
2098 const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i];
2099
2100 if (d->Register.File != TGSI_FILE_OUTPUT)
2101 continue;
2102 for (i = 0; i < tgsi->num_outputs; i++) {
2103 if (tgsi->outputs[i].index == d->Register.Index) {
2104 tgsi->outputs[i].undefined_mask &= ~d->Register.WriteMask;
2105 break;
2106 }
2107 }
2108 }
2109 }
2110
2111 static void
2112 decl_add_in(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
2113 {
2114 static const struct tgsi_declaration_interp default_interp = {
2115 TGSI_INTERPOLATE_PERSPECTIVE, false, 0,
2116 };
2117 const struct tgsi_declaration_interp *interp =
2118 (decl->Declaration.Interpolate) ? &decl->Interp: &default_interp;
2119 int index;
2120
2121 if (decl->Range.Last >= ARRAY_SIZE(tgsi->inputs)) {
2122 assert(!"invalid IN");
2123 return;
2124 }
2125
2126 for (index = decl->Range.First; index <= decl->Range.Last; index++) {
2127 const int slot = tgsi->num_inputs++;
2128
2129 tgsi->inputs[slot].index = index;
2130 tgsi->inputs[slot].usage_mask = decl->Declaration.UsageMask;
2131 if (decl->Declaration.Semantic) {
2132 tgsi->inputs[slot].semantic_name = decl->Semantic.Name;
2133 tgsi->inputs[slot].semantic_index = decl->Semantic.Index;
2134 }
2135 else {
2136 tgsi->inputs[slot].semantic_name = TGSI_SEMANTIC_GENERIC;
2137 tgsi->inputs[slot].semantic_index = index;
2138 }
2139 tgsi->inputs[slot].interp = interp->Interpolate;
2140 tgsi->inputs[slot].centroid = interp->Location == TGSI_INTERPOLATE_LOC_CENTROID;
2141 }
2142 }
2143
2144 static void
2145 decl_add_out(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
2146 {
2147 int index;
2148
2149 if (decl->Range.Last >= ARRAY_SIZE(tgsi->outputs)) {
2150 assert(!"invalid OUT");
2151 return;
2152 }
2153
2154 assert(decl->Declaration.Semantic);
2155
2156 for (index = decl->Range.First; index <= decl->Range.Last; index++) {
2157 const int slot = tgsi->num_outputs++;
2158
2159 tgsi->outputs[slot].index = index;
2160 tgsi->outputs[slot].undefined_mask = TOY_WRITEMASK_XYZW;
2161 tgsi->outputs[slot].usage_mask = decl->Declaration.UsageMask;
2162 tgsi->outputs[slot].semantic_name = decl->Semantic.Name;
2163 tgsi->outputs[slot].semantic_index = decl->Semantic.Index;
2164 }
2165 }
2166
2167 static void
2168 decl_add_sv(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
2169 {
2170 int index;
2171
2172 if (decl->Range.Last >= ARRAY_SIZE(tgsi->system_values)) {
2173 assert(!"invalid SV");
2174 return;
2175 }
2176
2177 for (index = decl->Range.First; index <= decl->Range.Last; index++) {
2178 const int slot = tgsi->num_system_values++;
2179
2180 tgsi->system_values[slot].index = index;
2181 if (decl->Declaration.Semantic) {
2182 tgsi->system_values[slot].semantic_name = decl->Semantic.Name;
2183 tgsi->system_values[slot].semantic_index = decl->Semantic.Index;
2184 }
2185 else {
2186 tgsi->system_values[slot].semantic_name = TGSI_SEMANTIC_GENERIC;
2187 tgsi->system_values[slot].semantic_index = index;
2188 }
2189 }
2190 }
2191
2192 /**
2193 * Emit an instruction to fetch the value of a TGSI register.
2194 */
2195 static void
2196 fetch_source(struct toy_tgsi *tgsi, enum tgsi_file_type file, int dim, int idx)
2197 {
2198 struct toy_dst dst;
2199 int vrf;
2200 enum toy_opcode opcode;
2201 enum toy_type type = TOY_TYPE_F;
2202
2203 switch (file) {
2204 case TGSI_FILE_INPUT:
2205 opcode = TOY_OPCODE_TGSI_IN;
2206 break;
2207 case TGSI_FILE_CONSTANT:
2208 opcode = TOY_OPCODE_TGSI_CONST;
2209 break;
2210 case TGSI_FILE_SYSTEM_VALUE:
2211 opcode = TOY_OPCODE_TGSI_SV;
2212 break;
2213 case TGSI_FILE_IMMEDIATE:
2214 opcode = TOY_OPCODE_TGSI_IMM;
2215 toy_tgsi_get_imm(tgsi, idx, &type);
2216 break;
2217 default:
2218 /* no need to fetch */
2219 return;
2220 break;
2221 }
2222
2223 vrf = ra_map_reg(tgsi, file, dim, idx, NULL);
2224 dst = tdst(TOY_FILE_VRF, vrf, 0);
2225 dst = tdst_type(dst, type);
2226
2227 tc_add2(tgsi->tc, opcode, dst, tsrc_imm_d(dim), tsrc_imm_d(idx));
2228 }
2229
2230 static void
2231 parse_declaration(struct toy_tgsi *tgsi,
2232 const struct tgsi_full_declaration *decl)
2233 {
2234 int i;
2235
2236 switch (decl->Declaration.File) {
2237 case TGSI_FILE_INPUT:
2238 decl_add_in(tgsi, decl);
2239 break;
2240 case TGSI_FILE_OUTPUT:
2241 decl_add_out(tgsi, decl);
2242 break;
2243 case TGSI_FILE_SYSTEM_VALUE:
2244 decl_add_sv(tgsi, decl);
2245 break;
2246 case TGSI_FILE_IMMEDIATE:
2247 /* immediates should be declared with TGSI_TOKEN_TYPE_IMMEDIATE */
2248 assert(!"unexpected immediate declaration");
2249 break;
2250 case TGSI_FILE_CONSTANT:
2251 if (tgsi->const_count <= decl->Range.Last)
2252 tgsi->const_count = decl->Range.Last + 1;
2253 break;
2254 case TGSI_FILE_NULL:
2255 case TGSI_FILE_TEMPORARY:
2256 case TGSI_FILE_SAMPLER:
2257 case TGSI_FILE_PREDICATE:
2258 case TGSI_FILE_ADDRESS:
2259 case TGSI_FILE_IMAGE:
2260 case TGSI_FILE_SAMPLER_VIEW:
2261 /* nothing to do */
2262 break;
2263 default:
2264 assert(!"unhandled TGSI file");
2265 break;
2266 }
2267
2268 /* fetch the registers now */
2269 for (i = decl->Range.First; i <= decl->Range.Last; i++) {
2270 const int dim = (decl->Declaration.Dimension) ? decl->Dim.Index2D : 0;
2271 fetch_source(tgsi, decl->Declaration.File, dim, i);
2272 }
2273 }
2274
2275 static int
2276 add_imm(struct toy_tgsi *tgsi, enum toy_type type, const uint32_t *buf)
2277 {
2278 /* reallocate the buffer if necessary */
2279 if (tgsi->imm_data.cur >= tgsi->imm_data.size) {
2280 const int cur_size = tgsi->imm_data.size;
2281 int new_size;
2282 enum toy_type *new_types;
2283 uint32_t (*new_buf)[4];
2284
2285 new_size = (cur_size) ? cur_size << 1 : 16;
2286 while (new_size <= tgsi->imm_data.cur)
2287 new_size <<= 1;
2288
2289 new_buf = REALLOC(tgsi->imm_data.buf,
2290 cur_size * sizeof(new_buf[0]),
2291 new_size * sizeof(new_buf[0]));
2292 new_types = REALLOC(tgsi->imm_data.types,
2293 cur_size * sizeof(new_types[0]),
2294 new_size * sizeof(new_types[0]));
2295 if (!new_buf || !new_types) {
2296 FREE(new_buf);
2297 FREE(new_types);
2298 return -1;
2299 }
2300
2301 tgsi->imm_data.buf = new_buf;
2302 tgsi->imm_data.types = new_types;
2303 tgsi->imm_data.size = new_size;
2304 }
2305
2306 tgsi->imm_data.types[tgsi->imm_data.cur] = type;
2307 memcpy(&tgsi->imm_data.buf[tgsi->imm_data.cur],
2308 buf, sizeof(tgsi->imm_data.buf[0]));
2309
2310 return tgsi->imm_data.cur++;
2311 }
2312
2313 static void
2314 parse_immediate(struct toy_tgsi *tgsi, const struct tgsi_full_immediate *imm)
2315 {
2316 enum toy_type type;
2317 uint32_t imm_buf[4];
2318 int idx;
2319
2320 switch (imm->Immediate.DataType) {
2321 case TGSI_IMM_FLOAT32:
2322 type = TOY_TYPE_F;
2323 imm_buf[0] = fui(imm->u[0].Float);
2324 imm_buf[1] = fui(imm->u[1].Float);
2325 imm_buf[2] = fui(imm->u[2].Float);
2326 imm_buf[3] = fui(imm->u[3].Float);
2327 break;
2328 case TGSI_IMM_INT32:
2329 type = TOY_TYPE_D;
2330 imm_buf[0] = (uint32_t) imm->u[0].Int;
2331 imm_buf[1] = (uint32_t) imm->u[1].Int;
2332 imm_buf[2] = (uint32_t) imm->u[2].Int;
2333 imm_buf[3] = (uint32_t) imm->u[3].Int;
2334 break;
2335 case TGSI_IMM_UINT32:
2336 type = TOY_TYPE_UD;
2337 imm_buf[0] = imm->u[0].Uint;
2338 imm_buf[1] = imm->u[1].Uint;
2339 imm_buf[2] = imm->u[2].Uint;
2340 imm_buf[3] = imm->u[3].Uint;
2341 break;
2342 default:
2343 assert(!"unhandled TGSI imm type");
2344 type = TOY_TYPE_F;
2345 memset(imm_buf, 0, sizeof(imm_buf));
2346 break;
2347 }
2348
2349 idx = add_imm(tgsi, type, imm_buf);
2350 if (idx >= 0)
2351 fetch_source(tgsi, TGSI_FILE_IMMEDIATE, 0, idx);
2352 else
2353 tc_fail(tgsi->tc, "failed to add TGSI imm");
2354 }
2355
2356 static void
2357 parse_property(struct toy_tgsi *tgsi, const struct tgsi_full_property *prop)
2358 {
2359 switch (prop->Property.PropertyName) {
2360 case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
2361 tgsi->props.vs_prohibit_ucps = prop->u[0].Data;
2362 break;
2363 case TGSI_PROPERTY_FS_COORD_ORIGIN:
2364 tgsi->props.fs_coord_origin = prop->u[0].Data;
2365 break;
2366 case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
2367 tgsi->props.fs_coord_pixel_center = prop->u[0].Data;
2368 break;
2369 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
2370 tgsi->props.fs_color0_writes_all_cbufs = prop->u[0].Data;
2371 break;
2372 case TGSI_PROPERTY_FS_DEPTH_LAYOUT:
2373 tgsi->props.fs_depth_layout = prop->u[0].Data;
2374 break;
2375 case TGSI_PROPERTY_GS_INPUT_PRIM:
2376 tgsi->props.gs_input_prim = prop->u[0].Data;
2377 break;
2378 case TGSI_PROPERTY_GS_OUTPUT_PRIM:
2379 tgsi->props.gs_output_prim = prop->u[0].Data;
2380 break;
2381 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
2382 tgsi->props.gs_max_output_vertices = prop->u[0].Data;
2383 break;
2384 default:
2385 assert(!"unhandled TGSI property");
2386 break;
2387 }
2388 }
2389
2390 static void
2391 parse_token(struct toy_tgsi *tgsi, const union tgsi_full_token *token)
2392 {
2393 switch (token->Token.Type) {
2394 case TGSI_TOKEN_TYPE_DECLARATION:
2395 parse_declaration(tgsi, &token->FullDeclaration);
2396 break;
2397 case TGSI_TOKEN_TYPE_IMMEDIATE:
2398 parse_immediate(tgsi, &token->FullImmediate);
2399 break;
2400 case TGSI_TOKEN_TYPE_INSTRUCTION:
2401 parse_instruction(tgsi, &token->FullInstruction);
2402 break;
2403 case TGSI_TOKEN_TYPE_PROPERTY:
2404 parse_property(tgsi, &token->FullProperty);
2405 break;
2406 default:
2407 assert(!"unhandled TGSI token type");
2408 break;
2409 }
2410 }
2411
2412 static enum pipe_error
2413 dump_reg_mapping(void *key, void *val, void *data)
2414 {
2415 int tgsi_file, tgsi_dim, tgsi_index;
2416 uint32_t sig, vrf;
2417
2418 sig = (uint32_t) pointer_to_intptr(key);
2419 vrf = (uint32_t) pointer_to_intptr(val);
2420
2421 /* see ra_get_map_key() */
2422 tgsi_file = (sig >> 28) & 0xf;
2423 tgsi_dim = (sig >> 16) & 0xfff;
2424 tgsi_index = (sig >> 0) & 0xffff;
2425
2426 if (tgsi_dim) {
2427 ilo_printf(" v%d:\t%s[%d][%d]\n", vrf,
2428 tgsi_file_name(tgsi_file), tgsi_dim, tgsi_index);
2429 }
2430 else {
2431 ilo_printf(" v%d:\t%s[%d]\n", vrf,
2432 tgsi_file_name(tgsi_file), tgsi_index);
2433 }
2434
2435 return PIPE_OK;
2436 }
2437
2438 /**
2439 * Dump the TGSI translator, currently only the register mapping.
2440 */
2441 void
2442 toy_tgsi_dump(const struct toy_tgsi *tgsi)
2443 {
2444 util_hash_table_foreach(tgsi->reg_mapping, dump_reg_mapping, NULL);
2445 }
2446
2447 /**
2448 * Clean up the TGSI translator.
2449 */
2450 void
2451 toy_tgsi_cleanup(struct toy_tgsi *tgsi)
2452 {
2453 FREE(tgsi->imm_data.buf);
2454 FREE(tgsi->imm_data.types);
2455
2456 util_hash_table_destroy(tgsi->reg_mapping);
2457 }
2458
2459 static unsigned
2460 reg_mapping_hash(void *key)
2461 {
2462 return (unsigned) pointer_to_intptr(key);
2463 }
2464
2465 static int
2466 reg_mapping_compare(void *key1, void *key2)
2467 {
2468 return (key1 != key2);
2469 }
2470
2471 /**
2472 * Initialize the TGSI translator.
2473 */
2474 static bool
2475 init_tgsi(struct toy_tgsi *tgsi, struct toy_compiler *tc, bool aos)
2476 {
2477 memset(tgsi, 0, sizeof(*tgsi));
2478
2479 tgsi->tc = tc;
2480 tgsi->aos = aos;
2481 tgsi->translate_table = (aos) ? aos_translate_table : soa_translate_table;
2482
2483 /* create a mapping of TGSI registers to VRF reigsters */
2484 tgsi->reg_mapping =
2485 util_hash_table_create(reg_mapping_hash, reg_mapping_compare);
2486
2487 return (tgsi->reg_mapping != NULL);
2488 }
2489
2490 /**
2491 * Translate TGSI tokens into toy instructions.
2492 */
2493 void
2494 toy_compiler_translate_tgsi(struct toy_compiler *tc,
2495 const struct tgsi_token *tokens, bool aos,
2496 struct toy_tgsi *tgsi)
2497 {
2498 struct tgsi_parse_context parse;
2499
2500 if (!init_tgsi(tgsi, tc, aos)) {
2501 tc_fail(tc, "failed to initialize TGSI translator");
2502 return;
2503 }
2504
2505 tgsi_parse_init(&parse, tokens);
2506 while (!tgsi_parse_end_of_tokens(&parse)) {
2507 tgsi_parse_token(&parse);
2508 parse_token(tgsi, &parse.FullToken);
2509 }
2510 tgsi_parse_free(&parse);
2511 }
2512
2513 /**
2514 * Map the TGSI register to VRF register.
2515 */
2516 int
2517 toy_tgsi_get_vrf(const struct toy_tgsi *tgsi,
2518 enum tgsi_file_type file, int dimension, int index)
2519 {
2520 void *key, *val;
2521
2522 key = ra_get_map_key(file, dimension, index);
2523
2524 val = util_hash_table_get(tgsi->reg_mapping, key);
2525
2526 return (val) ? pointer_to_intptr(val) : -1;
2527 }