2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "tgsi/tgsi_parse.h"
29 #include "tgsi/tgsi_info.h"
30 #include "tgsi/tgsi_strings.h"
31 #include "util/u_hash_table.h"
32 #include "toy_helpers.h"
35 /* map TGSI opcode to GEN opcode 1-to-1 */
40 } aos_simple_opcode_map
[TGSI_OPCODE_LAST
] = {
41 [TGSI_OPCODE_ARL
] = { GEN6_OPCODE_RNDD
, 1, 1 },
42 [TGSI_OPCODE_MOV
] = { GEN6_OPCODE_MOV
, 1, 1 },
43 [TGSI_OPCODE_RCP
] = { TOY_OPCODE_INV
, 1, 1 },
44 [TGSI_OPCODE_RSQ
] = { TOY_OPCODE_RSQ
, 1, 1 },
45 [TGSI_OPCODE_MUL
] = { GEN6_OPCODE_MUL
, 1, 2 },
46 [TGSI_OPCODE_ADD
] = { GEN6_OPCODE_ADD
, 1, 2 },
47 [TGSI_OPCODE_DP3
] = { GEN6_OPCODE_DP3
, 1, 2 },
48 [TGSI_OPCODE_DP4
] = { GEN6_OPCODE_DP4
, 1, 2 },
49 [TGSI_OPCODE_MIN
] = { GEN6_OPCODE_SEL
, 1, 2 },
50 [TGSI_OPCODE_MAX
] = { GEN6_OPCODE_SEL
, 1, 2 },
51 /* a later pass will move src[2] to accumulator */
52 [TGSI_OPCODE_MAD
] = { GEN6_OPCODE_MAC
, 1, 3 },
53 [TGSI_OPCODE_SUB
] = { GEN6_OPCODE_ADD
, 1, 2 },
54 [TGSI_OPCODE_SQRT
] = { TOY_OPCODE_SQRT
, 1, 1 },
55 [TGSI_OPCODE_FRC
] = { GEN6_OPCODE_FRC
, 1, 1 },
56 [TGSI_OPCODE_FLR
] = { GEN6_OPCODE_RNDD
, 1, 1 },
57 [TGSI_OPCODE_ROUND
] = { GEN6_OPCODE_RNDE
, 1, 1 },
58 [TGSI_OPCODE_EX2
] = { TOY_OPCODE_EXP
, 1, 1 },
59 [TGSI_OPCODE_LG2
] = { TOY_OPCODE_LOG
, 1, 1 },
60 [TGSI_OPCODE_POW
] = { TOY_OPCODE_POW
, 1, 2 },
61 [TGSI_OPCODE_ABS
] = { GEN6_OPCODE_MOV
, 1, 1 },
62 [TGSI_OPCODE_DPH
] = { GEN6_OPCODE_DPH
, 1, 2 },
63 [TGSI_OPCODE_COS
] = { TOY_OPCODE_COS
, 1, 1 },
64 [TGSI_OPCODE_KILL
] = { TOY_OPCODE_KIL
, 0, 0 },
65 [TGSI_OPCODE_SIN
] = { TOY_OPCODE_SIN
, 1, 1 },
66 [TGSI_OPCODE_ARR
] = { GEN6_OPCODE_RNDZ
, 1, 1 },
67 [TGSI_OPCODE_DP2
] = { GEN6_OPCODE_DP2
, 1, 2 },
68 [TGSI_OPCODE_IF
] = { GEN6_OPCODE_IF
, 0, 1 },
69 [TGSI_OPCODE_UIF
] = { GEN6_OPCODE_IF
, 0, 1 },
70 [TGSI_OPCODE_ELSE
] = { GEN6_OPCODE_ELSE
, 0, 0 },
71 [TGSI_OPCODE_ENDIF
] = { GEN6_OPCODE_ENDIF
, 0, 0 },
72 [TGSI_OPCODE_I2F
] = { GEN6_OPCODE_MOV
, 1, 1 },
73 [TGSI_OPCODE_NOT
] = { GEN6_OPCODE_NOT
, 1, 1 },
74 [TGSI_OPCODE_TRUNC
] = { GEN6_OPCODE_RNDZ
, 1, 1 },
75 [TGSI_OPCODE_SHL
] = { GEN6_OPCODE_SHL
, 1, 2 },
76 [TGSI_OPCODE_AND
] = { GEN6_OPCODE_AND
, 1, 2 },
77 [TGSI_OPCODE_OR
] = { GEN6_OPCODE_OR
, 1, 2 },
78 [TGSI_OPCODE_MOD
] = { TOY_OPCODE_INT_DIV_REMAINDER
, 1, 2 },
79 [TGSI_OPCODE_XOR
] = { GEN6_OPCODE_XOR
, 1, 2 },
80 [TGSI_OPCODE_EMIT
] = { TOY_OPCODE_EMIT
, 0, 0 },
81 [TGSI_OPCODE_ENDPRIM
] = { TOY_OPCODE_ENDPRIM
, 0, 0 },
82 [TGSI_OPCODE_NOP
] = { GEN6_OPCODE_NOP
, 0, 0 },
83 [TGSI_OPCODE_KILL_IF
] = { TOY_OPCODE_KIL
, 0, 1 },
84 [TGSI_OPCODE_END
] = { GEN6_OPCODE_NOP
, 0, 0 },
85 [TGSI_OPCODE_F2I
] = { GEN6_OPCODE_MOV
, 1, 1 },
86 [TGSI_OPCODE_IDIV
] = { TOY_OPCODE_INT_DIV_QUOTIENT
, 1, 2 },
87 [TGSI_OPCODE_IMAX
] = { GEN6_OPCODE_SEL
, 1, 2 },
88 [TGSI_OPCODE_IMIN
] = { GEN6_OPCODE_SEL
, 1, 2 },
89 [TGSI_OPCODE_INEG
] = { GEN6_OPCODE_MOV
, 1, 1 },
90 [TGSI_OPCODE_ISHR
] = { GEN6_OPCODE_ASR
, 1, 2 },
91 [TGSI_OPCODE_F2U
] = { GEN6_OPCODE_MOV
, 1, 1 },
92 [TGSI_OPCODE_U2F
] = { GEN6_OPCODE_MOV
, 1, 1 },
93 [TGSI_OPCODE_UADD
] = { GEN6_OPCODE_ADD
, 1, 2 },
94 [TGSI_OPCODE_UDIV
] = { TOY_OPCODE_INT_DIV_QUOTIENT
, 1, 2 },
95 /* a later pass will move src[2] to accumulator */
96 [TGSI_OPCODE_UMAD
] = { GEN6_OPCODE_MAC
, 1, 3 },
97 [TGSI_OPCODE_UMAX
] = { GEN6_OPCODE_SEL
, 1, 2 },
98 [TGSI_OPCODE_UMIN
] = { GEN6_OPCODE_SEL
, 1, 2 },
99 [TGSI_OPCODE_UMOD
] = { TOY_OPCODE_INT_DIV_REMAINDER
, 1, 2 },
100 [TGSI_OPCODE_UMUL
] = { GEN6_OPCODE_MUL
, 1, 2 },
101 [TGSI_OPCODE_USHR
] = { GEN6_OPCODE_SHR
, 1, 2 },
102 [TGSI_OPCODE_UARL
] = { GEN6_OPCODE_MOV
, 1, 1 },
103 [TGSI_OPCODE_IABS
] = { GEN6_OPCODE_MOV
, 1, 1 },
107 aos_simple(struct toy_compiler
*tc
,
108 const struct tgsi_full_instruction
*tgsi_inst
,
112 struct toy_inst
*inst
;
114 int cond_modifier
= GEN6_COND_NORMAL
;
115 int num_dst
= tgsi_inst
->Instruction
.NumDstRegs
;
116 int num_src
= tgsi_inst
->Instruction
.NumSrcRegs
;
119 opcode
= aos_simple_opcode_map
[tgsi_inst
->Instruction
.Opcode
].opcode
;
120 assert(num_dst
== aos_simple_opcode_map
[tgsi_inst
->Instruction
.Opcode
].num_dst
);
121 assert(num_src
== aos_simple_opcode_map
[tgsi_inst
->Instruction
.Opcode
].num_src
);
123 assert(!"invalid aos_simple() call");
127 /* no need to emit nop */
128 if (opcode
== GEN6_OPCODE_NOP
)
135 inst
->opcode
= opcode
;
137 switch (tgsi_inst
->Instruction
.Opcode
) {
138 case TGSI_OPCODE_MIN
:
139 case TGSI_OPCODE_IMIN
:
140 case TGSI_OPCODE_UMIN
:
141 cond_modifier
= GEN6_COND_L
;
143 case TGSI_OPCODE_MAX
:
144 case TGSI_OPCODE_IMAX
:
145 case TGSI_OPCODE_UMAX
:
146 cond_modifier
= GEN6_COND_GE
;
148 case TGSI_OPCODE_SUB
:
149 src
[1] = tsrc_negate(src
[1]);
151 case TGSI_OPCODE_ABS
:
152 case TGSI_OPCODE_IABS
:
153 src
[0] = tsrc_absolute(src
[0]);
156 cond_modifier
= GEN6_COND_NZ
;
158 assert(src
[0].type
== TOY_TYPE_F
);
159 src
[0] = tsrc_swizzle1(src
[0], TOY_SWIZZLE_X
);
160 src
[1] = tsrc_imm_f(0.0f
);
162 case TGSI_OPCODE_UIF
:
163 cond_modifier
= GEN6_COND_NZ
;
165 assert(src
[0].type
== TOY_TYPE_UD
);
166 src
[0] = tsrc_swizzle1(src
[0], TOY_SWIZZLE_X
);
167 src
[1] = tsrc_imm_d(0);
169 case TGSI_OPCODE_INEG
:
170 src
[0] = tsrc_negate(src
[0]);
172 case TGSI_OPCODE_RCP
:
173 case TGSI_OPCODE_RSQ
:
174 case TGSI_OPCODE_EX2
:
175 case TGSI_OPCODE_LG2
:
176 case TGSI_OPCODE_COS
:
177 case TGSI_OPCODE_SIN
:
178 src
[0] = tsrc_swizzle1(src
[0], TOY_SWIZZLE_X
);
180 case TGSI_OPCODE_POW
:
181 src
[0] = tsrc_swizzle1(src
[0], TOY_SWIZZLE_X
);
182 src
[1] = tsrc_swizzle1(src
[1], TOY_SWIZZLE_X
);
186 inst
->cond_modifier
= cond_modifier
;
189 assert(num_dst
== 1);
193 assert(num_src
<= Elements(inst
->src
));
194 for (i
= 0; i
< num_src
; i
++)
195 inst
->src
[i
] = src
[i
];
199 aos_set_on_cond(struct toy_compiler
*tc
,
200 const struct tgsi_full_instruction
*tgsi_inst
,
204 struct toy_inst
*inst
;
206 struct toy_src zero
, one
;
208 switch (tgsi_inst
->Instruction
.Opcode
) {
209 case TGSI_OPCODE_SLT
:
210 case TGSI_OPCODE_ISLT
:
211 case TGSI_OPCODE_USLT
:
212 case TGSI_OPCODE_FSLT
:
215 case TGSI_OPCODE_SGE
:
216 case TGSI_OPCODE_ISGE
:
217 case TGSI_OPCODE_USGE
:
218 case TGSI_OPCODE_FSGE
:
221 case TGSI_OPCODE_SEQ
:
222 case TGSI_OPCODE_USEQ
:
223 case TGSI_OPCODE_FSEQ
:
226 case TGSI_OPCODE_SGT
:
229 case TGSI_OPCODE_SLE
:
232 case TGSI_OPCODE_SNE
:
233 case TGSI_OPCODE_USNE
:
234 case TGSI_OPCODE_FSNE
:
238 assert(!"invalid aos_set_on_cond() call");
242 /* note that for integer versions, all bits are set */
243 switch (dst
[0].type
) {
246 zero
= tsrc_imm_f(0.0f
);
247 one
= tsrc_imm_f(1.0f
);
250 zero
= tsrc_imm_d(0);
251 one
= tsrc_imm_d(-1);
254 zero
= tsrc_imm_ud(0);
255 one
= tsrc_imm_ud(~0);
259 tc_MOV(tc
, dst
[0], zero
);
260 tc_CMP(tc
, tdst_null(), src
[0], src
[1], cond
);
261 inst
= tc_MOV(tc
, dst
[0], one
);
262 inst
->pred_ctrl
= GEN6_PREDCTRL_NORMAL
;
266 aos_compare(struct toy_compiler
*tc
,
267 const struct tgsi_full_instruction
*tgsi_inst
,
271 struct toy_inst
*inst
;
274 switch (tgsi_inst
->Instruction
.Opcode
) {
275 case TGSI_OPCODE_CMP
:
276 zero
= tsrc_imm_f(0.0f
);
278 case TGSI_OPCODE_UCMP
:
279 zero
= tsrc_imm_ud(0);
282 assert(!"invalid aos_compare() call");
286 tc_CMP(tc
, tdst_null(), src
[0], zero
, GEN6_COND_L
);
287 inst
= tc_SEL(tc
, dst
[0], src
[1], src
[2], GEN6_COND_NORMAL
);
288 inst
->pred_ctrl
= GEN6_PREDCTRL_NORMAL
;
292 aos_set_sign(struct toy_compiler
*tc
,
293 const struct tgsi_full_instruction
*tgsi_inst
,
297 struct toy_inst
*inst
;
298 struct toy_src zero
, one
, neg_one
;
300 switch (tgsi_inst
->Instruction
.Opcode
) {
301 case TGSI_OPCODE_SSG
:
302 zero
= tsrc_imm_f(0.0f
);
303 one
= tsrc_imm_f(1.0f
);
304 neg_one
= tsrc_imm_f(-1.0f
);
306 case TGSI_OPCODE_ISSG
:
307 zero
= tsrc_imm_d(0);
309 neg_one
= tsrc_imm_d(-1);
312 assert(!"invalid aos_set_sign() call");
316 tc_MOV(tc
, dst
[0], zero
);
318 tc_CMP(tc
, tdst_null(), src
[0], zero
, GEN6_COND_G
);
319 inst
= tc_MOV(tc
, dst
[0], one
);
320 inst
->pred_ctrl
= GEN6_PREDCTRL_NORMAL
;
322 tc_CMP(tc
, tdst_null(), src
[0], zero
, GEN6_COND_L
);
323 inst
= tc_MOV(tc
, dst
[0], neg_one
);
324 inst
->pred_ctrl
= GEN6_PREDCTRL_NORMAL
;
328 aos_tex(struct toy_compiler
*tc
,
329 const struct tgsi_full_instruction
*tgsi_inst
,
333 struct toy_inst
*inst
;
334 enum toy_opcode opcode
;
337 switch (tgsi_inst
->Instruction
.Opcode
) {
338 case TGSI_OPCODE_TEX
:
339 opcode
= TOY_OPCODE_TGSI_TEX
;
341 case TGSI_OPCODE_TXD
:
342 opcode
= TOY_OPCODE_TGSI_TXD
;
344 case TGSI_OPCODE_TXP
:
345 opcode
= TOY_OPCODE_TGSI_TXP
;
347 case TGSI_OPCODE_TXB
:
348 opcode
= TOY_OPCODE_TGSI_TXB
;
350 case TGSI_OPCODE_TXL
:
351 opcode
= TOY_OPCODE_TGSI_TXL
;
353 case TGSI_OPCODE_TXF
:
354 opcode
= TOY_OPCODE_TGSI_TXF
;
356 case TGSI_OPCODE_TXQ
:
357 opcode
= TOY_OPCODE_TGSI_TXQ
;
359 case TGSI_OPCODE_TXQ_LZ
:
360 opcode
= TOY_OPCODE_TGSI_TXQ_LZ
;
362 case TGSI_OPCODE_TEX2
:
363 opcode
= TOY_OPCODE_TGSI_TEX2
;
365 case TGSI_OPCODE_TXB2
:
366 opcode
= TOY_OPCODE_TGSI_TXB2
;
368 case TGSI_OPCODE_TXL2
:
369 opcode
= TOY_OPCODE_TGSI_TXL2
;
372 assert(!"unsupported texturing opcode");
377 assert(tgsi_inst
->Instruction
.Texture
);
380 inst
->opcode
= opcode
;
381 inst
->tex
.target
= tgsi_inst
->Texture
.Texture
;
383 assert(tgsi_inst
->Instruction
.NumSrcRegs
<= Elements(inst
->src
));
384 assert(tgsi_inst
->Instruction
.NumDstRegs
== 1);
387 for (i
= 0; i
< tgsi_inst
->Instruction
.NumSrcRegs
; i
++)
388 inst
->src
[i
] = src
[i
];
390 for (i
= 0; i
< tgsi_inst
->Texture
.NumOffsets
; i
++)
391 tc_fail(tc
, "texelFetchOffset unsupported");
395 aos_sample(struct toy_compiler
*tc
,
396 const struct tgsi_full_instruction
*tgsi_inst
,
400 struct toy_inst
*inst
;
401 enum toy_opcode opcode
;
404 assert(!"sampling untested");
406 switch (tgsi_inst
->Instruction
.Opcode
) {
407 case TGSI_OPCODE_SAMPLE
:
408 opcode
= TOY_OPCODE_TGSI_SAMPLE
;
410 case TGSI_OPCODE_SAMPLE_I
:
411 opcode
= TOY_OPCODE_TGSI_SAMPLE_I
;
413 case TGSI_OPCODE_SAMPLE_I_MS
:
414 opcode
= TOY_OPCODE_TGSI_SAMPLE_I_MS
;
416 case TGSI_OPCODE_SAMPLE_B
:
417 opcode
= TOY_OPCODE_TGSI_SAMPLE_B
;
419 case TGSI_OPCODE_SAMPLE_C
:
420 opcode
= TOY_OPCODE_TGSI_SAMPLE_C
;
422 case TGSI_OPCODE_SAMPLE_C_LZ
:
423 opcode
= TOY_OPCODE_TGSI_SAMPLE_C_LZ
;
425 case TGSI_OPCODE_SAMPLE_D
:
426 opcode
= TOY_OPCODE_TGSI_SAMPLE_D
;
428 case TGSI_OPCODE_SAMPLE_L
:
429 opcode
= TOY_OPCODE_TGSI_SAMPLE_L
;
431 case TGSI_OPCODE_GATHER4
:
432 opcode
= TOY_OPCODE_TGSI_GATHER4
;
434 case TGSI_OPCODE_SVIEWINFO
:
435 opcode
= TOY_OPCODE_TGSI_SVIEWINFO
;
437 case TGSI_OPCODE_SAMPLE_POS
:
438 opcode
= TOY_OPCODE_TGSI_SAMPLE_POS
;
440 case TGSI_OPCODE_SAMPLE_INFO
:
441 opcode
= TOY_OPCODE_TGSI_SAMPLE_INFO
;
444 assert(!"unsupported sampling opcode");
450 inst
->opcode
= opcode
;
452 assert(tgsi_inst
->Instruction
.NumSrcRegs
<= Elements(inst
->src
));
453 assert(tgsi_inst
->Instruction
.NumDstRegs
== 1);
456 for (i
= 0; i
< tgsi_inst
->Instruction
.NumSrcRegs
; i
++)
457 inst
->src
[i
] = src
[i
];
461 aos_LIT(struct toy_compiler
*tc
,
462 const struct tgsi_full_instruction
*tgsi_inst
,
466 struct toy_inst
*inst
;
468 tc_MOV(tc
, tdst_writemask(dst
[0], TOY_WRITEMASK_XW
), tsrc_imm_f(1.0f
));
470 if (!(dst
[0].writemask
& TOY_WRITEMASK_YZ
))
473 tc_MOV(tc
, tdst_writemask(dst
[0], TOY_WRITEMASK_YZ
), tsrc_imm_f(0.0f
));
475 tc_CMP(tc
, tdst_null(),
476 tsrc_swizzle1(src
[0], TOY_SWIZZLE_X
),
481 tdst_writemask(dst
[0], TOY_WRITEMASK_Y
),
482 tsrc_swizzle1(src
[0], TOY_SWIZZLE_X
));
483 inst
->pred_ctrl
= GEN6_PREDCTRL_NORMAL
;
485 /* clamp W to (-128, 128)? */
487 tdst_writemask(dst
[0], TOY_WRITEMASK_Z
),
488 tsrc_swizzle1(src
[0], TOY_SWIZZLE_Y
),
489 tsrc_swizzle1(src
[0], TOY_SWIZZLE_W
));
490 inst
->pred_ctrl
= GEN6_PREDCTRL_NORMAL
;
494 aos_EXP(struct toy_compiler
*tc
,
495 const struct tgsi_full_instruction
*tgsi_inst
,
499 struct toy_src src0
= tsrc_swizzle1(src
[0], TOY_SWIZZLE_X
);
501 if (dst
[0].writemask
& TOY_WRITEMASK_X
) {
503 tdst_d(tdst_writemask(tc_alloc_tmp(tc
), TOY_WRITEMASK_X
));
505 tc_RNDD(tc
, tmp
, src0
);
507 /* construct the floating point number manually */
508 tc_ADD(tc
, tmp
, tsrc_from(tmp
), tsrc_imm_d(127));
509 tc_SHL(tc
, tdst_d(tdst_writemask(dst
[0], TOY_WRITEMASK_X
)),
510 tsrc_from(tmp
), tsrc_imm_d(23));
513 tc_FRC(tc
, tdst_writemask(dst
[0], TOY_WRITEMASK_Y
), src0
);
514 tc_EXP(tc
, tdst_writemask(dst
[0], TOY_WRITEMASK_Z
), src0
);
515 tc_MOV(tc
, tdst_writemask(dst
[0], TOY_WRITEMASK_W
), tsrc_imm_f(1.0f
));
519 aos_LOG(struct toy_compiler
*tc
,
520 const struct tgsi_full_instruction
*tgsi_inst
,
524 struct toy_src src0
= tsrc_swizzle1(src
[0], TOY_SWIZZLE_X
);
526 if (dst
[0].writemask
& TOY_WRITEMASK_XY
) {
529 tmp
= tdst_d(tdst_writemask(tc_alloc_tmp(tc
), TOY_WRITEMASK_X
));
532 tc_SHR(tc
, tmp
, tsrc_absolute(tsrc_d(src0
)), tsrc_imm_d(23));
533 tc_ADD(tc
, tdst_writemask(dst
[0], TOY_WRITEMASK_X
),
534 tsrc_from(tmp
), tsrc_imm_d(-127));
537 tc_AND(tc
, tmp
, tsrc_d(src0
), tsrc_imm_d((1 << 23) - 1));
538 tc_OR(tc
, tdst_writemask(tdst_d(dst
[0]), TOY_WRITEMASK_Y
),
539 tsrc_from(tmp
), tsrc_imm_d(127 << 23));
542 tc_LOG(tc
, tdst_writemask(dst
[0], TOY_WRITEMASK_Z
), src0
);
543 tc_MOV(tc
, tdst_writemask(dst
[0], TOY_WRITEMASK_W
), tsrc_imm_f(1.0f
));
547 aos_DST(struct toy_compiler
*tc
,
548 const struct tgsi_full_instruction
*tgsi_inst
,
552 tc_MOV(tc
, tdst_writemask(dst
[0], TOY_WRITEMASK_X
), tsrc_imm_f(1.0f
));
553 tc_MUL(tc
, tdst_writemask(dst
[0], TOY_WRITEMASK_Y
), src
[0], src
[1]);
554 tc_MOV(tc
, tdst_writemask(dst
[0], TOY_WRITEMASK_Z
), src
[0]);
555 tc_MOV(tc
, tdst_writemask(dst
[0], TOY_WRITEMASK_W
), src
[1]);
559 aos_LRP(struct toy_compiler
*tc
,
560 const struct tgsi_full_instruction
*tgsi_inst
,
564 struct toy_dst tmp
= tc_alloc_tmp(tc
);
566 tc_ADD(tc
, tmp
, tsrc_negate(src
[0]), tsrc_imm_f(1.0f
));
567 tc_MUL(tc
, tmp
, tsrc_from(tmp
), src
[2]);
568 tc_MAC(tc
, dst
[0], src
[0], src
[1], tsrc_from(tmp
));
572 aos_CND(struct toy_compiler
*tc
,
573 const struct tgsi_full_instruction
*tgsi_inst
,
577 struct toy_inst
*inst
;
579 assert(!"CND untested");
581 tc_CMP(tc
, tdst_null(), src
[2], tsrc_imm_f(0.5f
), GEN6_COND_G
);
582 inst
= tc_SEL(tc
, dst
[0], src
[0], src
[1], GEN6_COND_NORMAL
);
583 inst
->pred_ctrl
= GEN6_PREDCTRL_NORMAL
;
587 aos_DP2A(struct toy_compiler
*tc
,
588 const struct tgsi_full_instruction
*tgsi_inst
,
592 struct toy_dst tmp
= tc_alloc_tmp(tc
);
594 assert(!"DP2A untested");
596 tc_DP2(tc
, tmp
, src
[0], src
[1]);
597 tc_ADD(tc
, dst
[0], tsrc_swizzle1(tsrc_from(tmp
), TOY_SWIZZLE_X
), src
[2]);
601 aos_CLAMP(struct toy_compiler
*tc
,
602 const struct tgsi_full_instruction
*tgsi_inst
,
606 assert(!"CLAMP untested");
608 tc_SEL(tc
, dst
[0], src
[0], src
[1], GEN6_COND_GE
);
609 tc_SEL(tc
, dst
[0], src
[2], tsrc_from(dst
[0]), GEN6_COND_L
);
613 aos_XPD(struct toy_compiler
*tc
,
614 const struct tgsi_full_instruction
*tgsi_inst
,
618 struct toy_dst tmp
= tc_alloc_tmp(tc
);
620 tc_MUL(tc
, tdst_writemask(tmp
, TOY_WRITEMASK_XYZ
),
621 tsrc_swizzle(src
[0], TOY_SWIZZLE_Z
, TOY_SWIZZLE_X
,
622 TOY_SWIZZLE_Y
, TOY_SWIZZLE_W
),
623 tsrc_swizzle(src
[1], TOY_SWIZZLE_Y
, TOY_SWIZZLE_Z
,
624 TOY_SWIZZLE_X
, TOY_SWIZZLE_W
));
626 tc_MAC(tc
, tdst_writemask(dst
[0], TOY_WRITEMASK_XYZ
),
627 tsrc_swizzle(src
[0], TOY_SWIZZLE_Y
, TOY_SWIZZLE_Z
,
628 TOY_SWIZZLE_X
, TOY_SWIZZLE_W
),
629 tsrc_swizzle(src
[1], TOY_SWIZZLE_Z
, TOY_SWIZZLE_X
,
630 TOY_SWIZZLE_Y
, TOY_SWIZZLE_W
),
631 tsrc_negate(tsrc_from(tmp
)));
633 tc_MOV(tc
, tdst_writemask(dst
[0], TOY_WRITEMASK_W
),
638 aos_PK2H(struct toy_compiler
*tc
,
639 const struct tgsi_full_instruction
*tgsi_inst
,
643 const struct toy_src h1
= tsrc_ud(tsrc_swizzle1(src
[0], TOY_SWIZZLE_X
));
644 const struct toy_src h2
= tsrc_ud(tsrc_swizzle1(src
[0], TOY_SWIZZLE_Y
));
645 struct toy_dst tmp
= tdst_ud(tc_alloc_tmp(tc
));
647 assert(!"PK2H untested");
649 tc_SHL(tc
, tmp
, h2
, tsrc_imm_ud(16));
650 tc_OR(tc
, tdst_ud(dst
[0]), h1
, tsrc_from(tmp
));
654 aos_SFL(struct toy_compiler
*tc
,
655 const struct tgsi_full_instruction
*tgsi_inst
,
659 assert(!"SFL untested");
661 tc_MOV(tc
, dst
[0], tsrc_imm_f(0.0f
));
665 aos_STR(struct toy_compiler
*tc
,
666 const struct tgsi_full_instruction
*tgsi_inst
,
670 assert(!"STR untested");
672 tc_MOV(tc
, dst
[0], tsrc_imm_f(1.0f
));
676 aos_UP2H(struct toy_compiler
*tc
,
677 const struct tgsi_full_instruction
*tgsi_inst
,
681 assert(!"UP2H untested");
683 tc_AND(tc
, tdst_writemask(tdst_ud(dst
[0]), TOY_WRITEMASK_XZ
),
684 tsrc_ud(src
[0]), tsrc_imm_ud(0xffff));
685 tc_SHR(tc
, tdst_writemask(tdst_ud(dst
[0]), TOY_WRITEMASK_YW
),
686 tsrc_ud(src
[0]), tsrc_imm_ud(16));
690 aos_SCS(struct toy_compiler
*tc
,
691 const struct tgsi_full_instruction
*tgsi_inst
,
695 assert(!"SCS untested");
697 tc_add1(tc
, TOY_OPCODE_COS
,
698 tdst_writemask(dst
[0], TOY_WRITEMASK_X
), src
[0]);
700 tc_add1(tc
, TOY_OPCODE_SIN
,
701 tdst_writemask(dst
[0], TOY_WRITEMASK_Y
), src
[0]);
703 tc_MOV(tc
, tdst_writemask(dst
[0], TOY_WRITEMASK_Z
), tsrc_imm_f(0.0f
));
704 tc_MOV(tc
, tdst_writemask(dst
[0], TOY_WRITEMASK_W
), tsrc_imm_f(1.0f
));
708 aos_NRM(struct toy_compiler
*tc
,
709 const struct tgsi_full_instruction
*tgsi_inst
,
713 struct toy_dst tmp
= tc_alloc_tmp(tc
);
715 assert(!"NRM untested");
717 tc_DP3(tc
, tmp
, src
[0], src
[0]);
718 tc_INV(tc
, tmp
, tsrc_from(tmp
));
719 tc_MUL(tc
, tdst_writemask(dst
[0], TOY_WRITEMASK_XYZ
),
720 src
[0], tsrc_from(tmp
));
722 tc_MOV(tc
, tdst_writemask(dst
[0], TOY_WRITEMASK_W
), tsrc_imm_f(1.0f
));
726 aos_DIV(struct toy_compiler
*tc
,
727 const struct tgsi_full_instruction
*tgsi_inst
,
731 struct toy_dst tmp
= tc_alloc_tmp(tc
);
733 assert(!"DIV untested");
735 tc_INV(tc
, tmp
, src
[1]);
736 tc_MUL(tc
, dst
[0], src
[0], tsrc_from(tmp
));
740 aos_BRK(struct toy_compiler
*tc
,
741 const struct tgsi_full_instruction
*tgsi_inst
,
745 tc_add0(tc
, GEN6_OPCODE_BREAK
);
749 aos_CEIL(struct toy_compiler
*tc
,
750 const struct tgsi_full_instruction
*tgsi_inst
,
754 struct toy_dst tmp
= tc_alloc_tmp(tc
);
756 tc_RNDD(tc
, tmp
, tsrc_negate(src
[0]));
757 tc_MOV(tc
, dst
[0], tsrc_negate(tsrc_from(tmp
)));
761 aos_SAD(struct toy_compiler
*tc
,
762 const struct tgsi_full_instruction
*tgsi_inst
,
766 struct toy_dst tmp
= tc_alloc_tmp(tc
);
768 assert(!"SAD untested");
770 tc_ADD(tc
, tmp
, src
[0], tsrc_negate(src
[1]));
771 tc_ADD(tc
, dst
[0], tsrc_absolute(tsrc_from(tmp
)), src
[2]);
775 aos_CONT(struct toy_compiler
*tc
,
776 const struct tgsi_full_instruction
*tgsi_inst
,
780 tc_add0(tc
, GEN6_OPCODE_CONT
);
784 aos_BGNLOOP(struct toy_compiler
*tc
,
785 const struct tgsi_full_instruction
*tgsi_inst
,
789 struct toy_inst
*inst
;
791 inst
= tc_add0(tc
, TOY_OPCODE_DO
);
792 /* this is just a marker */
797 aos_ENDLOOP(struct toy_compiler
*tc
,
798 const struct tgsi_full_instruction
*tgsi_inst
,
802 tc_add0(tc
, GEN6_OPCODE_WHILE
);
806 aos_NRM4(struct toy_compiler
*tc
,
807 const struct tgsi_full_instruction
*tgsi_inst
,
811 struct toy_dst tmp
= tc_alloc_tmp(tc
);
813 assert(!"NRM4 untested");
815 tc_DP4(tc
, tmp
, src
[0], src
[0]);
816 tc_INV(tc
, tmp
, tsrc_from(tmp
));
817 tc_MUL(tc
, dst
[0], tsrc_swizzle1(src
[0], TOY_SWIZZLE_X
), tsrc_from(tmp
));
821 aos_unsupported(struct toy_compiler
*tc
,
822 const struct tgsi_full_instruction
*tgsi_inst
,
826 const char *name
= tgsi_get_opcode_name(tgsi_inst
->Instruction
.Opcode
);
828 ilo_warn("unsupported TGSI opcode: TGSI_OPCODE_%s\n", name
);
830 tc_fail(tc
, "unsupported TGSI instruction");
833 static const toy_tgsi_translate aos_translate_table
[TGSI_OPCODE_LAST
] = {
834 [TGSI_OPCODE_ARL
] = aos_simple
,
835 [TGSI_OPCODE_MOV
] = aos_simple
,
836 [TGSI_OPCODE_LIT
] = aos_LIT
,
837 [TGSI_OPCODE_RCP
] = aos_simple
,
838 [TGSI_OPCODE_RSQ
] = aos_simple
,
839 [TGSI_OPCODE_EXP
] = aos_EXP
,
840 [TGSI_OPCODE_LOG
] = aos_LOG
,
841 [TGSI_OPCODE_MUL
] = aos_simple
,
842 [TGSI_OPCODE_ADD
] = aos_simple
,
843 [TGSI_OPCODE_DP3
] = aos_simple
,
844 [TGSI_OPCODE_DP4
] = aos_simple
,
845 [TGSI_OPCODE_DST
] = aos_DST
,
846 [TGSI_OPCODE_MIN
] = aos_simple
,
847 [TGSI_OPCODE_MAX
] = aos_simple
,
848 [TGSI_OPCODE_SLT
] = aos_set_on_cond
,
849 [TGSI_OPCODE_SGE
] = aos_set_on_cond
,
850 [TGSI_OPCODE_MAD
] = aos_simple
,
851 [TGSI_OPCODE_SUB
] = aos_simple
,
852 [TGSI_OPCODE_LRP
] = aos_LRP
,
853 [TGSI_OPCODE_CND
] = aos_CND
,
854 [TGSI_OPCODE_SQRT
] = aos_simple
,
855 [TGSI_OPCODE_DP2A
] = aos_DP2A
,
856 [22] = aos_unsupported
,
857 [23] = aos_unsupported
,
858 [TGSI_OPCODE_FRC
] = aos_simple
,
859 [TGSI_OPCODE_CLAMP
] = aos_CLAMP
,
860 [TGSI_OPCODE_FLR
] = aos_simple
,
861 [TGSI_OPCODE_ROUND
] = aos_simple
,
862 [TGSI_OPCODE_EX2
] = aos_simple
,
863 [TGSI_OPCODE_LG2
] = aos_simple
,
864 [TGSI_OPCODE_POW
] = aos_simple
,
865 [TGSI_OPCODE_XPD
] = aos_XPD
,
866 [32] = aos_unsupported
,
867 [TGSI_OPCODE_ABS
] = aos_simple
,
868 [TGSI_OPCODE_RCC
] = aos_unsupported
,
869 [TGSI_OPCODE_DPH
] = aos_simple
,
870 [TGSI_OPCODE_COS
] = aos_simple
,
871 [TGSI_OPCODE_DDX
] = aos_unsupported
,
872 [TGSI_OPCODE_DDY
] = aos_unsupported
,
873 [TGSI_OPCODE_KILL
] = aos_simple
,
874 [TGSI_OPCODE_PK2H
] = aos_PK2H
,
875 [TGSI_OPCODE_PK2US
] = aos_unsupported
,
876 [TGSI_OPCODE_PK4B
] = aos_unsupported
,
877 [TGSI_OPCODE_PK4UB
] = aos_unsupported
,
878 [TGSI_OPCODE_RFL
] = aos_unsupported
,
879 [TGSI_OPCODE_SEQ
] = aos_set_on_cond
,
880 [TGSI_OPCODE_SFL
] = aos_SFL
,
881 [TGSI_OPCODE_SGT
] = aos_set_on_cond
,
882 [TGSI_OPCODE_SIN
] = aos_simple
,
883 [TGSI_OPCODE_SLE
] = aos_set_on_cond
,
884 [TGSI_OPCODE_SNE
] = aos_set_on_cond
,
885 [TGSI_OPCODE_STR
] = aos_STR
,
886 [TGSI_OPCODE_TEX
] = aos_tex
,
887 [TGSI_OPCODE_TXD
] = aos_tex
,
888 [TGSI_OPCODE_TXP
] = aos_tex
,
889 [TGSI_OPCODE_UP2H
] = aos_UP2H
,
890 [TGSI_OPCODE_UP2US
] = aos_unsupported
,
891 [TGSI_OPCODE_UP4B
] = aos_unsupported
,
892 [TGSI_OPCODE_UP4UB
] = aos_unsupported
,
893 [TGSI_OPCODE_X2D
] = aos_unsupported
,
894 [TGSI_OPCODE_ARA
] = aos_unsupported
,
895 [TGSI_OPCODE_ARR
] = aos_simple
,
896 [TGSI_OPCODE_BRA
] = aos_unsupported
,
897 [TGSI_OPCODE_CAL
] = aos_unsupported
,
898 [TGSI_OPCODE_RET
] = aos_unsupported
,
899 [TGSI_OPCODE_SSG
] = aos_set_sign
,
900 [TGSI_OPCODE_CMP
] = aos_compare
,
901 [TGSI_OPCODE_SCS
] = aos_SCS
,
902 [TGSI_OPCODE_TXB
] = aos_tex
,
903 [TGSI_OPCODE_NRM
] = aos_NRM
,
904 [TGSI_OPCODE_DIV
] = aos_DIV
,
905 [TGSI_OPCODE_DP2
] = aos_simple
,
906 [TGSI_OPCODE_TXL
] = aos_tex
,
907 [TGSI_OPCODE_BRK
] = aos_BRK
,
908 [TGSI_OPCODE_IF
] = aos_simple
,
909 [TGSI_OPCODE_UIF
] = aos_simple
,
910 [76] = aos_unsupported
,
911 [TGSI_OPCODE_ELSE
] = aos_simple
,
912 [TGSI_OPCODE_ENDIF
] = aos_simple
,
913 [79] = aos_unsupported
,
914 [80] = aos_unsupported
,
915 [TGSI_OPCODE_PUSHA
] = aos_unsupported
,
916 [TGSI_OPCODE_POPA
] = aos_unsupported
,
917 [TGSI_OPCODE_CEIL
] = aos_CEIL
,
918 [TGSI_OPCODE_I2F
] = aos_simple
,
919 [TGSI_OPCODE_NOT
] = aos_simple
,
920 [TGSI_OPCODE_TRUNC
] = aos_simple
,
921 [TGSI_OPCODE_SHL
] = aos_simple
,
922 [88] = aos_unsupported
,
923 [TGSI_OPCODE_AND
] = aos_simple
,
924 [TGSI_OPCODE_OR
] = aos_simple
,
925 [TGSI_OPCODE_MOD
] = aos_simple
,
926 [TGSI_OPCODE_XOR
] = aos_simple
,
927 [TGSI_OPCODE_SAD
] = aos_SAD
,
928 [TGSI_OPCODE_TXF
] = aos_tex
,
929 [TGSI_OPCODE_TXQ
] = aos_tex
,
930 [TGSI_OPCODE_CONT
] = aos_CONT
,
931 [TGSI_OPCODE_EMIT
] = aos_simple
,
932 [TGSI_OPCODE_ENDPRIM
] = aos_simple
,
933 [TGSI_OPCODE_BGNLOOP
] = aos_BGNLOOP
,
934 [TGSI_OPCODE_BGNSUB
] = aos_unsupported
,
935 [TGSI_OPCODE_ENDLOOP
] = aos_ENDLOOP
,
936 [TGSI_OPCODE_ENDSUB
] = aos_unsupported
,
937 [TGSI_OPCODE_TXQ_LZ
] = aos_tex
,
938 [104] = aos_unsupported
,
939 [105] = aos_unsupported
,
940 [106] = aos_unsupported
,
941 [TGSI_OPCODE_NOP
] = aos_simple
,
942 [TGSI_OPCODE_FSEQ
] = aos_set_on_cond
,
943 [TGSI_OPCODE_FSGE
] = aos_set_on_cond
,
944 [TGSI_OPCODE_FSLT
] = aos_set_on_cond
,
945 [TGSI_OPCODE_FSNE
] = aos_set_on_cond
,
946 [TGSI_OPCODE_NRM4
] = aos_NRM4
,
947 [TGSI_OPCODE_CALLNZ
] = aos_unsupported
,
948 [TGSI_OPCODE_BREAKC
] = aos_unsupported
,
949 [TGSI_OPCODE_KILL_IF
] = aos_simple
,
950 [TGSI_OPCODE_END
] = aos_simple
,
951 [118] = aos_unsupported
,
952 [TGSI_OPCODE_F2I
] = aos_simple
,
953 [TGSI_OPCODE_IDIV
] = aos_simple
,
954 [TGSI_OPCODE_IMAX
] = aos_simple
,
955 [TGSI_OPCODE_IMIN
] = aos_simple
,
956 [TGSI_OPCODE_INEG
] = aos_simple
,
957 [TGSI_OPCODE_ISGE
] = aos_set_on_cond
,
958 [TGSI_OPCODE_ISHR
] = aos_simple
,
959 [TGSI_OPCODE_ISLT
] = aos_set_on_cond
,
960 [TGSI_OPCODE_F2U
] = aos_simple
,
961 [TGSI_OPCODE_U2F
] = aos_simple
,
962 [TGSI_OPCODE_UADD
] = aos_simple
,
963 [TGSI_OPCODE_UDIV
] = aos_simple
,
964 [TGSI_OPCODE_UMAD
] = aos_simple
,
965 [TGSI_OPCODE_UMAX
] = aos_simple
,
966 [TGSI_OPCODE_UMIN
] = aos_simple
,
967 [TGSI_OPCODE_UMOD
] = aos_simple
,
968 [TGSI_OPCODE_UMUL
] = aos_simple
,
969 [TGSI_OPCODE_USEQ
] = aos_set_on_cond
,
970 [TGSI_OPCODE_USGE
] = aos_set_on_cond
,
971 [TGSI_OPCODE_USHR
] = aos_simple
,
972 [TGSI_OPCODE_USLT
] = aos_set_on_cond
,
973 [TGSI_OPCODE_USNE
] = aos_set_on_cond
,
974 [TGSI_OPCODE_SWITCH
] = aos_unsupported
,
975 [TGSI_OPCODE_CASE
] = aos_unsupported
,
976 [TGSI_OPCODE_DEFAULT
] = aos_unsupported
,
977 [TGSI_OPCODE_ENDSWITCH
] = aos_unsupported
,
978 [TGSI_OPCODE_SAMPLE
] = aos_sample
,
979 [TGSI_OPCODE_SAMPLE_I
] = aos_sample
,
980 [TGSI_OPCODE_SAMPLE_I_MS
] = aos_sample
,
981 [TGSI_OPCODE_SAMPLE_B
] = aos_sample
,
982 [TGSI_OPCODE_SAMPLE_C
] = aos_sample
,
983 [TGSI_OPCODE_SAMPLE_C_LZ
] = aos_sample
,
984 [TGSI_OPCODE_SAMPLE_D
] = aos_sample
,
985 [TGSI_OPCODE_SAMPLE_L
] = aos_sample
,
986 [TGSI_OPCODE_GATHER4
] = aos_sample
,
987 [TGSI_OPCODE_SVIEWINFO
] = aos_sample
,
988 [TGSI_OPCODE_SAMPLE_POS
] = aos_sample
,
989 [TGSI_OPCODE_SAMPLE_INFO
] = aos_sample
,
990 [TGSI_OPCODE_UARL
] = aos_simple
,
991 [TGSI_OPCODE_UCMP
] = aos_compare
,
992 [TGSI_OPCODE_IABS
] = aos_simple
,
993 [TGSI_OPCODE_ISSG
] = aos_set_sign
,
994 [TGSI_OPCODE_LOAD
] = aos_unsupported
,
995 [TGSI_OPCODE_STORE
] = aos_unsupported
,
996 [TGSI_OPCODE_MFENCE
] = aos_unsupported
,
997 [TGSI_OPCODE_LFENCE
] = aos_unsupported
,
998 [TGSI_OPCODE_SFENCE
] = aos_unsupported
,
999 [TGSI_OPCODE_BARRIER
] = aos_unsupported
,
1000 [TGSI_OPCODE_ATOMUADD
] = aos_unsupported
,
1001 [TGSI_OPCODE_ATOMXCHG
] = aos_unsupported
,
1002 [TGSI_OPCODE_ATOMCAS
] = aos_unsupported
,
1003 [TGSI_OPCODE_ATOMAND
] = aos_unsupported
,
1004 [TGSI_OPCODE_ATOMOR
] = aos_unsupported
,
1005 [TGSI_OPCODE_ATOMXOR
] = aos_unsupported
,
1006 [TGSI_OPCODE_ATOMUMIN
] = aos_unsupported
,
1007 [TGSI_OPCODE_ATOMUMAX
] = aos_unsupported
,
1008 [TGSI_OPCODE_ATOMIMIN
] = aos_unsupported
,
1009 [TGSI_OPCODE_ATOMIMAX
] = aos_unsupported
,
1010 [TGSI_OPCODE_TEX2
] = aos_tex
,
1011 [TGSI_OPCODE_TXB2
] = aos_tex
,
1012 [TGSI_OPCODE_TXL2
] = aos_tex
,
1016 soa_passthrough(struct toy_compiler
*tc
,
1017 const struct tgsi_full_instruction
*tgsi_inst
,
1018 struct toy_dst
*dst_
,
1019 struct toy_src
*src_
)
1021 const toy_tgsi_translate translate
=
1022 aos_translate_table
[tgsi_inst
->Instruction
.Opcode
];
1024 translate(tc
, tgsi_inst
, dst_
, src_
);
1028 soa_per_channel(struct toy_compiler
*tc
,
1029 const struct tgsi_full_instruction
*tgsi_inst
,
1030 struct toy_dst
*dst_
,
1031 struct toy_src
*src_
)
1033 struct toy_dst dst
[TGSI_FULL_MAX_DST_REGISTERS
][4];
1034 struct toy_src src
[TGSI_FULL_MAX_SRC_REGISTERS
][4];
1037 for (i
= 0; i
< tgsi_inst
->Instruction
.NumDstRegs
; i
++)
1038 tdst_transpose(dst_
[i
], dst
[i
]);
1039 for (i
= 0; i
< tgsi_inst
->Instruction
.NumSrcRegs
; i
++)
1040 tsrc_transpose(src_
[i
], src
[i
]);
1042 /* emit the same instruction four times for the four channels */
1043 for (ch
= 0; ch
< 4; ch
++) {
1044 struct toy_dst aos_dst
[TGSI_FULL_MAX_DST_REGISTERS
];
1045 struct toy_src aos_src
[TGSI_FULL_MAX_SRC_REGISTERS
];
1047 for (i
= 0; i
< tgsi_inst
->Instruction
.NumDstRegs
; i
++)
1048 aos_dst
[i
] = dst
[i
][ch
];
1049 for (i
= 0; i
< tgsi_inst
->Instruction
.NumSrcRegs
; i
++)
1050 aos_src
[i
] = src
[i
][ch
];
1052 aos_translate_table
[tgsi_inst
->Instruction
.Opcode
](tc
,
1053 tgsi_inst
, aos_dst
, aos_src
);
1058 soa_scalar_replicate(struct toy_compiler
*tc
,
1059 const struct tgsi_full_instruction
*tgsi_inst
,
1060 struct toy_dst
*dst_
,
1061 struct toy_src
*src_
)
1063 struct toy_dst dst0
[4], tmp
;
1064 struct toy_src srcx
[TGSI_FULL_MAX_SRC_REGISTERS
];
1067 assert(tgsi_inst
->Instruction
.NumDstRegs
== 1);
1069 tdst_transpose(dst_
[0], dst0
);
1070 for (i
= 0; i
< tgsi_inst
->Instruction
.NumSrcRegs
; i
++) {
1071 struct toy_src tmp
[4];
1073 tsrc_transpose(src_
[i
], tmp
);
1074 /* only the X channels */
1078 tmp
= tc_alloc_tmp(tc
);
1080 opcode
= aos_simple_opcode_map
[tgsi_inst
->Instruction
.Opcode
].opcode
;
1083 switch (tgsi_inst
->Instruction
.Opcode
) {
1084 case TGSI_OPCODE_RCP
:
1085 case TGSI_OPCODE_RSQ
:
1086 case TGSI_OPCODE_SQRT
:
1087 case TGSI_OPCODE_EX2
:
1088 case TGSI_OPCODE_LG2
:
1089 case TGSI_OPCODE_COS
:
1090 case TGSI_OPCODE_SIN
:
1091 tc_add1(tc
, opcode
, tmp
, srcx
[0]);
1093 case TGSI_OPCODE_POW
:
1094 tc_add2(tc
, opcode
, tmp
, srcx
[0], srcx
[1]);
1097 assert(!"invalid soa_scalar_replicate() call");
1101 /* replicate the result */
1102 for (i
= 0; i
< 4; i
++)
1103 tc_MOV(tc
, dst0
[i
], tsrc_from(tmp
));
1107 soa_dot_product(struct toy_compiler
*tc
,
1108 const struct tgsi_full_instruction
*tgsi_inst
,
1109 struct toy_dst
*dst_
,
1110 struct toy_src
*src_
)
1112 struct toy_dst dst0
[4], tmp
;
1113 struct toy_src src
[TGSI_FULL_MAX_SRC_REGISTERS
][4];
1116 tdst_transpose(dst_
[0], dst0
);
1117 for (i
= 0; i
< tgsi_inst
->Instruction
.NumSrcRegs
; i
++)
1118 tsrc_transpose(src_
[i
], src
[i
]);
1120 tmp
= tc_alloc_tmp(tc
);
1122 switch (tgsi_inst
->Instruction
.Opcode
) {
1123 case TGSI_OPCODE_DP2
:
1124 tc_MUL(tc
, tmp
, src
[0][1], src
[1][1]);
1125 tc_MAC(tc
, tmp
, src
[0][0], src
[1][0], tsrc_from(tmp
));
1127 case TGSI_OPCODE_DP2A
:
1128 tc_MAC(tc
, tmp
, src
[0][1], src
[1][1], src
[2][0]);
1129 tc_MAC(tc
, tmp
, src
[0][0], src
[1][0], tsrc_from(tmp
));
1131 case TGSI_OPCODE_DP3
:
1132 tc_MUL(tc
, tmp
, src
[0][2], src
[1][2]);
1133 tc_MAC(tc
, tmp
, src
[0][1], src
[1][1], tsrc_from(tmp
));
1134 tc_MAC(tc
, tmp
, src
[0][0], src
[1][0], tsrc_from(tmp
));
1136 case TGSI_OPCODE_DPH
:
1137 tc_MAC(tc
, tmp
, src
[0][2], src
[1][2], src
[1][3]);
1138 tc_MAC(tc
, tmp
, src
[0][1], src
[1][1], tsrc_from(tmp
));
1139 tc_MAC(tc
, tmp
, src
[0][0], src
[1][0], tsrc_from(tmp
));
1141 case TGSI_OPCODE_DP4
:
1142 tc_MUL(tc
, tmp
, src
[0][3], src
[1][3]);
1143 tc_MAC(tc
, tmp
, src
[0][2], src
[1][2], tsrc_from(tmp
));
1144 tc_MAC(tc
, tmp
, src
[0][1], src
[1][1], tsrc_from(tmp
));
1145 tc_MAC(tc
, tmp
, src
[0][0], src
[1][0], tsrc_from(tmp
));
1148 assert(!"invalid soa_dot_product() call");
1152 for (i
= 0; i
< 4; i
++)
1153 tc_MOV(tc
, dst0
[i
], tsrc_from(tmp
));
1157 soa_partial_derivative(struct toy_compiler
*tc
,
1158 const struct tgsi_full_instruction
*tgsi_inst
,
1159 struct toy_dst
*dst_
,
1160 struct toy_src
*src_
)
1162 if (tgsi_inst
->Instruction
.Opcode
== TGSI_OPCODE_DDX
)
1163 tc_add1(tc
, TOY_OPCODE_DDX
, dst_
[0], src_
[0]);
1165 tc_add1(tc
, TOY_OPCODE_DDY
, dst_
[0], src_
[0]);
1169 soa_if(struct toy_compiler
*tc
,
1170 const struct tgsi_full_instruction
*tgsi_inst
,
1171 struct toy_dst
*dst_
,
1172 struct toy_src
*src_
)
1174 struct toy_src src0
[4];
1176 assert(tsrc_is_swizzle1(src_
[0]));
1177 tsrc_transpose(src_
[0], src0
);
1179 if (tgsi_inst
->Instruction
.Opcode
== TGSI_OPCODE_IF
)
1180 tc_IF(tc
, tdst_null(), src0
[0], tsrc_imm_f(0.0f
), GEN6_COND_NZ
);
1182 tc_IF(tc
, tdst_null(), src0
[0], tsrc_imm_d(0), GEN6_COND_NZ
);
1186 soa_LIT(struct toy_compiler
*tc
,
1187 const struct tgsi_full_instruction
*tgsi_inst
,
1188 struct toy_dst
*dst_
,
1189 struct toy_src
*src_
)
1191 struct toy_inst
*inst
;
1192 struct toy_dst dst0
[4];
1193 struct toy_src src0
[4];
1195 tdst_transpose(dst_
[0], dst0
);
1196 tsrc_transpose(src_
[0], src0
);
1198 tc_MOV(tc
, dst0
[0], tsrc_imm_f(1.0f
));
1199 tc_MOV(tc
, dst0
[1], src0
[0]);
1200 tc_POW(tc
, dst0
[2], src0
[1], src0
[3]);
1201 tc_MOV(tc
, dst0
[3], tsrc_imm_f(1.0f
));
1204 * POW is calculated first because math with pred_ctrl is broken here.
1207 tc_CMP(tc
, tdst_null(), src0
[0], tsrc_imm_f(0.0f
), GEN6_COND_L
);
1208 inst
= tc_MOV(tc
, dst0
[1], tsrc_imm_f(0.0f
));
1209 inst
->pred_ctrl
= GEN6_PREDCTRL_NORMAL
;
1210 inst
= tc_MOV(tc
, dst0
[2], tsrc_imm_f(0.0f
));
1211 inst
->pred_ctrl
= GEN6_PREDCTRL_NORMAL
;
1215 soa_EXP(struct toy_compiler
*tc
,
1216 const struct tgsi_full_instruction
*tgsi_inst
,
1217 struct toy_dst
*dst_
,
1218 struct toy_src
*src_
)
1220 struct toy_dst dst0
[4];
1221 struct toy_src src0
[4];
1223 assert(!"SoA EXP untested");
1225 tdst_transpose(dst_
[0], dst0
);
1226 tsrc_transpose(src_
[0], src0
);
1228 if (!tdst_is_null(dst0
[0])) {
1229 struct toy_dst tmp
= tdst_d(tc_alloc_tmp(tc
));
1231 tc_RNDD(tc
, tmp
, src0
[0]);
1233 /* construct the floating point number manually */
1234 tc_ADD(tc
, tmp
, tsrc_from(tmp
), tsrc_imm_d(127));
1235 tc_SHL(tc
, tdst_d(dst0
[0]), tsrc_from(tmp
), tsrc_imm_d(23));
1238 tc_FRC(tc
, dst0
[1], src0
[0]);
1239 tc_EXP(tc
, dst0
[2], src0
[0]);
1240 tc_MOV(tc
, dst0
[3], tsrc_imm_f(1.0f
));
1244 soa_LOG(struct toy_compiler
*tc
,
1245 const struct tgsi_full_instruction
*tgsi_inst
,
1246 struct toy_dst
*dst_
,
1247 struct toy_src
*src_
)
1249 struct toy_dst dst0
[4];
1250 struct toy_src src0
[4];
1252 assert(!"SoA LOG untested");
1254 tdst_transpose(dst_
[0], dst0
);
1255 tsrc_transpose(src_
[0], src0
);
1257 if (dst_
[0].writemask
& TOY_WRITEMASK_XY
) {
1258 struct toy_dst tmp
= tdst_d(tc_alloc_tmp(tc
));
1261 tc_SHR(tc
, tmp
, tsrc_absolute(tsrc_d(src0
[0])), tsrc_imm_d(23));
1262 tc_ADD(tc
, dst0
[0], tsrc_from(tmp
), tsrc_imm_d(-127));
1265 tc_AND(tc
, tmp
, tsrc_d(src0
[0]), tsrc_imm_d((1 << 23) - 1));
1266 tc_OR(tc
, dst0
[1], tsrc_from(tmp
), tsrc_imm_d(127 << 23));
1269 tc_LOG(tc
, dst0
[2], src0
[0]);
1270 tc_MOV(tc
, dst0
[3], tsrc_imm_f(1.0f
));
1274 soa_DST(struct toy_compiler
*tc
,
1275 const struct tgsi_full_instruction
*tgsi_inst
,
1276 struct toy_dst
*dst_
,
1277 struct toy_src
*src_
)
1279 struct toy_dst dst0
[4];
1280 struct toy_src src
[2][4];
1282 tdst_transpose(dst_
[0], dst0
);
1283 tsrc_transpose(src_
[0], src
[0]);
1284 tsrc_transpose(src_
[1], src
[1]);
1286 tc_MOV(tc
, dst0
[0], tsrc_imm_f(1.0f
));
1287 tc_MUL(tc
, dst0
[1], src
[0][1], src
[1][1]);
1288 tc_MOV(tc
, dst0
[2], src
[0][2]);
1289 tc_MOV(tc
, dst0
[3], src
[1][3]);
1293 soa_XPD(struct toy_compiler
*tc
,
1294 const struct tgsi_full_instruction
*tgsi_inst
,
1295 struct toy_dst
*dst_
,
1296 struct toy_src
*src_
)
1298 struct toy_dst dst0
[4];
1299 struct toy_src src
[2][4];
1301 tdst_transpose(dst_
[0], dst0
);
1302 tsrc_transpose(src_
[0], src
[0]);
1303 tsrc_transpose(src_
[1], src
[1]);
1305 /* dst.x = src0.y * src1.z - src1.y * src0.z */
1306 tc_MUL(tc
, dst0
[0], src
[0][2], src
[1][1]);
1307 tc_MAC(tc
, dst0
[0], src
[0][1], src
[1][2], tsrc_negate(tsrc_from(dst0
[0])));
1309 /* dst.y = src0.z * src1.x - src1.z * src0.x */
1310 tc_MUL(tc
, dst0
[1], src
[0][0], src
[1][2]);
1311 tc_MAC(tc
, dst0
[1], src
[0][2], src
[1][0], tsrc_negate(tsrc_from(dst0
[1])));
1313 /* dst.z = src0.x * src1.y - src1.x * src0.y */
1314 tc_MUL(tc
, dst0
[2], src
[0][1], src
[1][0]);
1315 tc_MAC(tc
, dst0
[2], src
[0][0], src
[1][1], tsrc_negate(tsrc_from(dst0
[2])));
1317 tc_MOV(tc
, dst0
[3], tsrc_imm_f(1.0f
));
1321 soa_PK2H(struct toy_compiler
*tc
,
1322 const struct tgsi_full_instruction
*tgsi_inst
,
1323 struct toy_dst
*dst_
,
1324 struct toy_src
*src_
)
1326 struct toy_dst tmp
= tdst_ud(tc_alloc_tmp(tc
));
1327 struct toy_dst dst0
[4];
1328 struct toy_src src0
[4];
1331 assert(!"SoA PK2H untested");
1333 tdst_transpose(dst_
[0], dst0
);
1334 tsrc_transpose(src_
[0], src0
);
1336 tc_SHL(tc
, tmp
, src0
[1], tsrc_imm_ud(16));
1337 tc_OR(tc
, tmp
, src0
[0], tsrc_from(tmp
));
1339 for (i
= 0; i
< 4; i
++)
1340 tc_MOV(tc
, dst0
[i
], tsrc_from(tmp
));
1344 soa_UP2H(struct toy_compiler
*tc
,
1345 const struct tgsi_full_instruction
*tgsi_inst
,
1346 struct toy_dst
*dst_
,
1347 struct toy_src
*src_
)
1349 struct toy_dst dst0
[4];
1350 struct toy_src src0
[4];
1352 assert(!"SoA UP2H untested");
1354 tdst_transpose(dst_
[0], dst0
);
1355 tsrc_transpose(src_
[0], src0
);
1357 tc_AND(tc
, tdst_ud(dst0
[0]), tsrc_ud(src0
[0]), tsrc_imm_ud(0xffff));
1358 tc_SHR(tc
, tdst_ud(dst0
[1]), tsrc_ud(src0
[1]), tsrc_imm_ud(16));
1359 tc_AND(tc
, tdst_ud(dst0
[2]), tsrc_ud(src0
[2]), tsrc_imm_ud(0xffff));
1360 tc_SHR(tc
, tdst_ud(dst0
[3]), tsrc_ud(src0
[3]), tsrc_imm_ud(16));
1365 soa_SCS(struct toy_compiler
*tc
,
1366 const struct tgsi_full_instruction
*tgsi_inst
,
1367 struct toy_dst
*dst_
,
1368 struct toy_src
*src_
)
1370 struct toy_dst dst0
[4];
1371 struct toy_src src0
[4];
1373 tdst_transpose(dst_
[0], dst0
);
1374 tsrc_transpose(src_
[0], src0
);
1376 tc_add1(tc
, TOY_OPCODE_COS
, dst0
[0], src0
[0]);
1377 tc_add1(tc
, TOY_OPCODE_SIN
, dst0
[1], src0
[0]);
1378 tc_MOV(tc
, dst0
[2], tsrc_imm_f(0.0f
));
1379 tc_MOV(tc
, dst0
[3], tsrc_imm_f(1.0f
));
1383 soa_NRM(struct toy_compiler
*tc
,
1384 const struct tgsi_full_instruction
*tgsi_inst
,
1385 struct toy_dst
*dst_
,
1386 struct toy_src
*src_
)
1388 const struct toy_dst tmp
= tc_alloc_tmp(tc
);
1389 struct toy_dst dst0
[4];
1390 struct toy_src src0
[4];
1392 assert(!"SoA NRM untested");
1394 tdst_transpose(dst_
[0], dst0
);
1395 tsrc_transpose(src_
[0], src0
);
1397 tc_MUL(tc
, tmp
, src0
[2], src0
[2]);
1398 tc_MAC(tc
, tmp
, src0
[1], src0
[1], tsrc_from(tmp
));
1399 tc_MAC(tc
, tmp
, src0
[0], src0
[0], tsrc_from(tmp
));
1400 tc_INV(tc
, tmp
, tsrc_from(tmp
));
1402 tc_MUL(tc
, dst0
[0], src0
[0], tsrc_from(tmp
));
1403 tc_MUL(tc
, dst0
[1], src0
[1], tsrc_from(tmp
));
1404 tc_MUL(tc
, dst0
[2], src0
[2], tsrc_from(tmp
));
1405 tc_MOV(tc
, dst0
[3], tsrc_imm_f(1.0f
));
1409 soa_NRM4(struct toy_compiler
*tc
,
1410 const struct tgsi_full_instruction
*tgsi_inst
,
1411 struct toy_dst
*dst_
,
1412 struct toy_src
*src_
)
1414 const struct toy_dst tmp
= tc_alloc_tmp(tc
);
1415 struct toy_dst dst0
[4];
1416 struct toy_src src0
[4];
1419 assert(!"SoA NRM4 untested");
1421 tdst_transpose(dst_
[0], dst0
);
1422 tsrc_transpose(src_
[0], src0
);
1424 tc_MUL(tc
, tmp
, src0
[3], src0
[3]);
1425 tc_MAC(tc
, tmp
, src0
[2], src0
[2], tsrc_from(tmp
));
1426 tc_MAC(tc
, tmp
, src0
[1], src0
[1], tsrc_from(tmp
));
1427 tc_MAC(tc
, tmp
, src0
[0], src0
[0], tsrc_from(tmp
));
1428 tc_INV(tc
, tmp
, tsrc_from(tmp
));
1430 for (i
= 0; i
< 4; i
++)
1431 tc_MUL(tc
, dst0
[i
], src0
[0], tsrc_from(tmp
));
1435 soa_unsupported(struct toy_compiler
*tc
,
1436 const struct tgsi_full_instruction
*tgsi_inst
,
1437 struct toy_dst
*dst_
,
1438 struct toy_src
*src_
)
1440 const struct tgsi_opcode_info
*info
=
1441 tgsi_get_opcode_info(tgsi_inst
->Instruction
.Opcode
);
1443 ilo_warn("unsupported TGSI opcode in SoA form: TGSI_OPCODE_%s\n",
1446 tc_fail(tc
, "unsupported TGSI instruction in SoA form");
1449 static const toy_tgsi_translate soa_translate_table
[TGSI_OPCODE_LAST
] = {
1450 [TGSI_OPCODE_ARL
] = soa_per_channel
,
1451 [TGSI_OPCODE_MOV
] = soa_per_channel
,
1452 [TGSI_OPCODE_LIT
] = soa_LIT
,
1453 [TGSI_OPCODE_RCP
] = soa_scalar_replicate
,
1454 [TGSI_OPCODE_RSQ
] = soa_scalar_replicate
,
1455 [TGSI_OPCODE_EXP
] = soa_EXP
,
1456 [TGSI_OPCODE_LOG
] = soa_LOG
,
1457 [TGSI_OPCODE_MUL
] = soa_per_channel
,
1458 [TGSI_OPCODE_ADD
] = soa_per_channel
,
1459 [TGSI_OPCODE_DP3
] = soa_dot_product
,
1460 [TGSI_OPCODE_DP4
] = soa_dot_product
,
1461 [TGSI_OPCODE_DST
] = soa_DST
,
1462 [TGSI_OPCODE_MIN
] = soa_per_channel
,
1463 [TGSI_OPCODE_MAX
] = soa_per_channel
,
1464 [TGSI_OPCODE_SLT
] = soa_per_channel
,
1465 [TGSI_OPCODE_SGE
] = soa_per_channel
,
1466 [TGSI_OPCODE_MAD
] = soa_per_channel
,
1467 [TGSI_OPCODE_SUB
] = soa_per_channel
,
1468 [TGSI_OPCODE_LRP
] = soa_per_channel
,
1469 [TGSI_OPCODE_CND
] = soa_per_channel
,
1470 [TGSI_OPCODE_SQRT
] = soa_scalar_replicate
,
1471 [TGSI_OPCODE_DP2A
] = soa_dot_product
,
1472 [22] = soa_unsupported
,
1473 [23] = soa_unsupported
,
1474 [TGSI_OPCODE_FRC
] = soa_per_channel
,
1475 [TGSI_OPCODE_CLAMP
] = soa_per_channel
,
1476 [TGSI_OPCODE_FLR
] = soa_per_channel
,
1477 [TGSI_OPCODE_ROUND
] = soa_per_channel
,
1478 [TGSI_OPCODE_EX2
] = soa_scalar_replicate
,
1479 [TGSI_OPCODE_LG2
] = soa_scalar_replicate
,
1480 [TGSI_OPCODE_POW
] = soa_scalar_replicate
,
1481 [TGSI_OPCODE_XPD
] = soa_XPD
,
1482 [32] = soa_unsupported
,
1483 [TGSI_OPCODE_ABS
] = soa_per_channel
,
1484 [TGSI_OPCODE_RCC
] = soa_unsupported
,
1485 [TGSI_OPCODE_DPH
] = soa_dot_product
,
1486 [TGSI_OPCODE_COS
] = soa_scalar_replicate
,
1487 [TGSI_OPCODE_DDX
] = soa_partial_derivative
,
1488 [TGSI_OPCODE_DDY
] = soa_partial_derivative
,
1489 [TGSI_OPCODE_KILL
] = soa_passthrough
,
1490 [TGSI_OPCODE_PK2H
] = soa_PK2H
,
1491 [TGSI_OPCODE_PK2US
] = soa_unsupported
,
1492 [TGSI_OPCODE_PK4B
] = soa_unsupported
,
1493 [TGSI_OPCODE_PK4UB
] = soa_unsupported
,
1494 [TGSI_OPCODE_RFL
] = soa_unsupported
,
1495 [TGSI_OPCODE_SEQ
] = soa_per_channel
,
1496 [TGSI_OPCODE_SFL
] = soa_per_channel
,
1497 [TGSI_OPCODE_SGT
] = soa_per_channel
,
1498 [TGSI_OPCODE_SIN
] = soa_scalar_replicate
,
1499 [TGSI_OPCODE_SLE
] = soa_per_channel
,
1500 [TGSI_OPCODE_SNE
] = soa_per_channel
,
1501 [TGSI_OPCODE_STR
] = soa_per_channel
,
1502 [TGSI_OPCODE_TEX
] = soa_passthrough
,
1503 [TGSI_OPCODE_TXD
] = soa_passthrough
,
1504 [TGSI_OPCODE_TXP
] = soa_passthrough
,
1505 [TGSI_OPCODE_UP2H
] = soa_UP2H
,
1506 [TGSI_OPCODE_UP2US
] = soa_unsupported
,
1507 [TGSI_OPCODE_UP4B
] = soa_unsupported
,
1508 [TGSI_OPCODE_UP4UB
] = soa_unsupported
,
1509 [TGSI_OPCODE_X2D
] = soa_unsupported
,
1510 [TGSI_OPCODE_ARA
] = soa_unsupported
,
1511 [TGSI_OPCODE_ARR
] = soa_per_channel
,
1512 [TGSI_OPCODE_BRA
] = soa_unsupported
,
1513 [TGSI_OPCODE_CAL
] = soa_unsupported
,
1514 [TGSI_OPCODE_RET
] = soa_unsupported
,
1515 [TGSI_OPCODE_SSG
] = soa_per_channel
,
1516 [TGSI_OPCODE_CMP
] = soa_per_channel
,
1517 [TGSI_OPCODE_SCS
] = soa_SCS
,
1518 [TGSI_OPCODE_TXB
] = soa_passthrough
,
1519 [TGSI_OPCODE_NRM
] = soa_NRM
,
1520 [TGSI_OPCODE_DIV
] = soa_per_channel
,
1521 [TGSI_OPCODE_DP2
] = soa_dot_product
,
1522 [TGSI_OPCODE_TXL
] = soa_passthrough
,
1523 [TGSI_OPCODE_BRK
] = soa_passthrough
,
1524 [TGSI_OPCODE_IF
] = soa_if
,
1525 [TGSI_OPCODE_UIF
] = soa_if
,
1526 [76] = soa_unsupported
,
1527 [TGSI_OPCODE_ELSE
] = soa_passthrough
,
1528 [TGSI_OPCODE_ENDIF
] = soa_passthrough
,
1529 [79] = soa_unsupported
,
1530 [80] = soa_unsupported
,
1531 [TGSI_OPCODE_PUSHA
] = soa_unsupported
,
1532 [TGSI_OPCODE_POPA
] = soa_unsupported
,
1533 [TGSI_OPCODE_CEIL
] = soa_per_channel
,
1534 [TGSI_OPCODE_I2F
] = soa_per_channel
,
1535 [TGSI_OPCODE_NOT
] = soa_per_channel
,
1536 [TGSI_OPCODE_TRUNC
] = soa_per_channel
,
1537 [TGSI_OPCODE_SHL
] = soa_per_channel
,
1538 [88] = soa_unsupported
,
1539 [TGSI_OPCODE_AND
] = soa_per_channel
,
1540 [TGSI_OPCODE_OR
] = soa_per_channel
,
1541 [TGSI_OPCODE_MOD
] = soa_per_channel
,
1542 [TGSI_OPCODE_XOR
] = soa_per_channel
,
1543 [TGSI_OPCODE_SAD
] = soa_per_channel
,
1544 [TGSI_OPCODE_TXF
] = soa_passthrough
,
1545 [TGSI_OPCODE_TXQ
] = soa_passthrough
,
1546 [TGSI_OPCODE_CONT
] = soa_passthrough
,
1547 [TGSI_OPCODE_EMIT
] = soa_unsupported
,
1548 [TGSI_OPCODE_ENDPRIM
] = soa_unsupported
,
1549 [TGSI_OPCODE_BGNLOOP
] = soa_passthrough
,
1550 [TGSI_OPCODE_BGNSUB
] = soa_unsupported
,
1551 [TGSI_OPCODE_ENDLOOP
] = soa_passthrough
,
1552 [TGSI_OPCODE_ENDSUB
] = soa_unsupported
,
1553 [TGSI_OPCODE_TXQ_LZ
] = soa_passthrough
,
1554 [104] = soa_unsupported
,
1555 [105] = soa_unsupported
,
1556 [106] = soa_unsupported
,
1557 [TGSI_OPCODE_NOP
] = soa_passthrough
,
1558 [TGSI_OPCODE_FSEQ
] = soa_per_channel
,
1559 [TGSI_OPCODE_FSGE
] = soa_per_channel
,
1560 [TGSI_OPCODE_FSLT
] = soa_per_channel
,
1561 [TGSI_OPCODE_FSNE
] = soa_per_channel
,
1562 [TGSI_OPCODE_NRM4
] = soa_NRM4
,
1563 [TGSI_OPCODE_CALLNZ
] = soa_unsupported
,
1564 [TGSI_OPCODE_BREAKC
] = soa_unsupported
,
1565 [TGSI_OPCODE_KILL_IF
] = soa_passthrough
,
1566 [TGSI_OPCODE_END
] = soa_passthrough
,
1567 [118] = soa_unsupported
,
1568 [TGSI_OPCODE_F2I
] = soa_per_channel
,
1569 [TGSI_OPCODE_IDIV
] = soa_per_channel
,
1570 [TGSI_OPCODE_IMAX
] = soa_per_channel
,
1571 [TGSI_OPCODE_IMIN
] = soa_per_channel
,
1572 [TGSI_OPCODE_INEG
] = soa_per_channel
,
1573 [TGSI_OPCODE_ISGE
] = soa_per_channel
,
1574 [TGSI_OPCODE_ISHR
] = soa_per_channel
,
1575 [TGSI_OPCODE_ISLT
] = soa_per_channel
,
1576 [TGSI_OPCODE_F2U
] = soa_per_channel
,
1577 [TGSI_OPCODE_U2F
] = soa_per_channel
,
1578 [TGSI_OPCODE_UADD
] = soa_per_channel
,
1579 [TGSI_OPCODE_UDIV
] = soa_per_channel
,
1580 [TGSI_OPCODE_UMAD
] = soa_per_channel
,
1581 [TGSI_OPCODE_UMAX
] = soa_per_channel
,
1582 [TGSI_OPCODE_UMIN
] = soa_per_channel
,
1583 [TGSI_OPCODE_UMOD
] = soa_per_channel
,
1584 [TGSI_OPCODE_UMUL
] = soa_per_channel
,
1585 [TGSI_OPCODE_USEQ
] = soa_per_channel
,
1586 [TGSI_OPCODE_USGE
] = soa_per_channel
,
1587 [TGSI_OPCODE_USHR
] = soa_per_channel
,
1588 [TGSI_OPCODE_USLT
] = soa_per_channel
,
1589 [TGSI_OPCODE_USNE
] = soa_per_channel
,
1590 [TGSI_OPCODE_SWITCH
] = soa_unsupported
,
1591 [TGSI_OPCODE_CASE
] = soa_unsupported
,
1592 [TGSI_OPCODE_DEFAULT
] = soa_unsupported
,
1593 [TGSI_OPCODE_ENDSWITCH
] = soa_unsupported
,
1594 [TGSI_OPCODE_SAMPLE
] = soa_passthrough
,
1595 [TGSI_OPCODE_SAMPLE_I
] = soa_passthrough
,
1596 [TGSI_OPCODE_SAMPLE_I_MS
] = soa_passthrough
,
1597 [TGSI_OPCODE_SAMPLE_B
] = soa_passthrough
,
1598 [TGSI_OPCODE_SAMPLE_C
] = soa_passthrough
,
1599 [TGSI_OPCODE_SAMPLE_C_LZ
] = soa_passthrough
,
1600 [TGSI_OPCODE_SAMPLE_D
] = soa_passthrough
,
1601 [TGSI_OPCODE_SAMPLE_L
] = soa_passthrough
,
1602 [TGSI_OPCODE_GATHER4
] = soa_passthrough
,
1603 [TGSI_OPCODE_SVIEWINFO
] = soa_passthrough
,
1604 [TGSI_OPCODE_SAMPLE_POS
] = soa_passthrough
,
1605 [TGSI_OPCODE_SAMPLE_INFO
] = soa_passthrough
,
1606 [TGSI_OPCODE_UARL
] = soa_per_channel
,
1607 [TGSI_OPCODE_UCMP
] = soa_per_channel
,
1608 [TGSI_OPCODE_IABS
] = soa_per_channel
,
1609 [TGSI_OPCODE_ISSG
] = soa_per_channel
,
1610 [TGSI_OPCODE_LOAD
] = soa_unsupported
,
1611 [TGSI_OPCODE_STORE
] = soa_unsupported
,
1612 [TGSI_OPCODE_MFENCE
] = soa_unsupported
,
1613 [TGSI_OPCODE_LFENCE
] = soa_unsupported
,
1614 [TGSI_OPCODE_SFENCE
] = soa_unsupported
,
1615 [TGSI_OPCODE_BARRIER
] = soa_unsupported
,
1616 [TGSI_OPCODE_ATOMUADD
] = soa_unsupported
,
1617 [TGSI_OPCODE_ATOMXCHG
] = soa_unsupported
,
1618 [TGSI_OPCODE_ATOMCAS
] = soa_unsupported
,
1619 [TGSI_OPCODE_ATOMAND
] = soa_unsupported
,
1620 [TGSI_OPCODE_ATOMOR
] = soa_unsupported
,
1621 [TGSI_OPCODE_ATOMXOR
] = soa_unsupported
,
1622 [TGSI_OPCODE_ATOMUMIN
] = soa_unsupported
,
1623 [TGSI_OPCODE_ATOMUMAX
] = soa_unsupported
,
1624 [TGSI_OPCODE_ATOMIMIN
] = soa_unsupported
,
1625 [TGSI_OPCODE_ATOMIMAX
] = soa_unsupported
,
1626 [TGSI_OPCODE_TEX2
] = soa_passthrough
,
1627 [TGSI_OPCODE_TXB2
] = soa_passthrough
,
1628 [TGSI_OPCODE_TXL2
] = soa_passthrough
,
1632 ra_dst_is_indirect(const struct tgsi_full_dst_register
*d
)
1634 return (d
->Register
.Indirect
||
1635 (d
->Register
.Dimension
&& d
->Dimension
.Indirect
));
1639 ra_dst_index(const struct tgsi_full_dst_register
*d
)
1641 assert(!d
->Register
.Indirect
);
1642 return d
->Register
.Index
;
1646 ra_dst_dimension(const struct tgsi_full_dst_register
*d
)
1648 if (d
->Register
.Dimension
) {
1649 assert(!d
->Dimension
.Indirect
);
1650 return d
->Dimension
.Index
;
1658 ra_is_src_indirect(const struct tgsi_full_src_register
*s
)
1660 return (s
->Register
.Indirect
||
1661 (s
->Register
.Dimension
&& s
->Dimension
.Indirect
));
1665 ra_src_index(const struct tgsi_full_src_register
*s
)
1667 assert(!s
->Register
.Indirect
);
1668 return s
->Register
.Index
;
1672 ra_src_dimension(const struct tgsi_full_src_register
*s
)
1674 if (s
->Register
.Dimension
) {
1675 assert(!s
->Dimension
.Indirect
);
1676 return s
->Dimension
.Index
;
1684 * Infer the type of either the sources or the destination.
1686 static enum toy_type
1687 ra_infer_opcode_type(int tgsi_opcode
, bool is_dst
)
1689 enum tgsi_opcode_type type
;
1692 type
= tgsi_opcode_infer_dst_type(tgsi_opcode
);
1694 type
= tgsi_opcode_infer_src_type(tgsi_opcode
);
1697 case TGSI_TYPE_UNSIGNED
:
1699 case TGSI_TYPE_SIGNED
:
1701 case TGSI_TYPE_FLOAT
:
1703 case TGSI_TYPE_UNTYPED
:
1704 case TGSI_TYPE_VOID
:
1705 case TGSI_TYPE_DOUBLE
:
1707 assert(!"unsupported TGSI type");
1713 * Return the type of an operand of the specified instruction.
1715 static enum toy_type
1716 ra_get_type(struct toy_tgsi
*tgsi
, const struct tgsi_full_instruction
*tgsi_inst
,
1717 int operand
, bool is_dst
)
1720 enum tgsi_file_type file
;
1722 /* we need to look at both src and dst for MOV */
1723 /* XXX it should not be this complex */
1724 if (tgsi_inst
->Instruction
.Opcode
== TGSI_OPCODE_MOV
) {
1725 const enum tgsi_file_type dst_file
= tgsi_inst
->Dst
[0].Register
.File
;
1726 const enum tgsi_file_type src_file
= tgsi_inst
->Src
[0].Register
.File
;
1728 if (dst_file
== TGSI_FILE_ADDRESS
|| src_file
== TGSI_FILE_ADDRESS
) {
1731 else if (src_file
== TGSI_FILE_IMMEDIATE
&&
1732 !tgsi_inst
->Src
[0].Register
.Indirect
) {
1733 const int src_idx
= tgsi_inst
->Src
[0].Register
.Index
;
1734 type
= tgsi
->imm_data
.types
[src_idx
];
1737 /* this is the best we can do */
1743 else if (tgsi_inst
->Instruction
.Opcode
== TGSI_OPCODE_UCMP
) {
1744 if (!is_dst
&& operand
== 0)
1752 type
= ra_infer_opcode_type(tgsi_inst
->Instruction
.Opcode
, is_dst
);
1756 tgsi_inst
->Dst
[operand
].Register
.File
:
1757 tgsi_inst
->Src
[operand
].Register
.File
;
1759 case TGSI_FILE_SAMPLER
:
1760 case TGSI_FILE_RESOURCE
:
1761 case TGSI_FILE_SAMPLER_VIEW
:
1764 case TGSI_FILE_ADDRESS
:
1765 assert(type
== TOY_TYPE_D
);
1775 * Allocate a VRF register.
1778 ra_alloc_reg(struct toy_tgsi
*tgsi
, enum tgsi_file_type file
)
1780 const int count
= (tgsi
->aos
) ? 1 : 4;
1781 return tc_alloc_vrf(tgsi
->tc
, count
);
1785 * Construct the key for VRF mapping look-up.
1788 ra_get_map_key(enum tgsi_file_type file
, unsigned dim
, unsigned index
)
1792 /* this is ugly... */
1793 assert(file
< 1 << 4);
1794 assert(dim
< 1 << 12);
1795 assert(index
< 1 << 16);
1796 key
= (file
<< 28) | (dim
<< 16) | index
;
1798 return intptr_to_pointer(key
);
1802 * Map a TGSI register to a VRF register.
1805 ra_map_reg(struct toy_tgsi
*tgsi
, enum tgsi_file_type file
,
1806 int dim
, int index
, bool *is_new
)
1811 key
= ra_get_map_key(file
, dim
, index
);
1814 * because we allocate vrf from 1 and on, val is never NULL as long as the
1817 val
= util_hash_table_get(tgsi
->reg_mapping
, key
);
1819 vrf
= pointer_to_intptr(val
);
1825 vrf
= (intptr_t) ra_alloc_reg(tgsi
, file
);
1827 /* add to the mapping */
1828 val
= intptr_to_pointer(vrf
);
1829 util_hash_table_set(tgsi
->reg_mapping
, key
, val
);
1839 * Return true if the destination aliases any of the sources.
1842 ra_dst_is_aliasing(const struct tgsi_full_instruction
*tgsi_inst
, int dst_index
)
1844 const struct tgsi_full_dst_register
*d
= &tgsi_inst
->Dst
[dst_index
];
1847 /* we need a scratch register for indirect dst anyway */
1848 if (ra_dst_is_indirect(d
))
1851 for (i
= 0; i
< tgsi_inst
->Instruction
.NumSrcRegs
; i
++) {
1852 const struct tgsi_full_src_register
*s
= &tgsi_inst
->Src
[i
];
1854 if (s
->Register
.File
!= d
->Register
.File
)
1858 * we can go on to check dimension and index respectively, but
1859 * keep it simple for now
1861 if (ra_is_src_indirect(s
))
1863 if (ra_src_dimension(s
) == ra_dst_dimension(d
) &&
1864 ra_src_index(s
) == ra_dst_index(d
))
1872 * Return the toy register for a TGSI destination operand.
1874 static struct toy_dst
1875 ra_get_dst(struct toy_tgsi
*tgsi
,
1876 const struct tgsi_full_instruction
*tgsi_inst
, int dst_index
,
1879 const struct tgsi_full_dst_register
*d
= &tgsi_inst
->Dst
[dst_index
];
1880 bool need_vrf
= false;
1883 switch (d
->Register
.File
) {
1884 case TGSI_FILE_NULL
:
1887 case TGSI_FILE_OUTPUT
:
1888 case TGSI_FILE_TEMPORARY
:
1889 case TGSI_FILE_ADDRESS
:
1890 case TGSI_FILE_PREDICATE
:
1894 assert(!"unhandled dst file");
1900 /* XXX we do not always need a scratch given the conditions... */
1901 const bool need_scratch
=
1902 (ra_dst_is_indirect(d
) || ra_dst_is_aliasing(tgsi_inst
, dst_index
) ||
1903 tgsi_inst
->Instruction
.Saturate
);
1904 const enum toy_type type
= ra_get_type(tgsi
, tgsi_inst
, dst_index
, true);
1908 vrf
= ra_alloc_reg(tgsi
, d
->Register
.File
);
1911 vrf
= ra_map_reg(tgsi
, d
->Register
.File
,
1912 ra_dst_dimension(d
), ra_dst_index(d
), NULL
);
1916 *is_scratch
= need_scratch
;
1918 dst
= tdst_full(TOY_FILE_VRF
, type
, TOY_RECT_LINEAR
,
1919 false, 0, d
->Register
.WriteMask
, vrf
* TOY_REG_WIDTH
);
1925 static struct toy_src
1926 ra_get_src_for_vrf(const struct tgsi_full_src_register
*s
,
1927 enum toy_type type
, int vrf
)
1929 return tsrc_full(TOY_FILE_VRF
, type
, TOY_RECT_LINEAR
,
1931 s
->Register
.SwizzleX
, s
->Register
.SwizzleY
,
1932 s
->Register
.SwizzleZ
, s
->Register
.SwizzleW
,
1933 s
->Register
.Absolute
, s
->Register
.Negate
,
1934 vrf
* TOY_REG_WIDTH
);
1938 init_tgsi_reg(struct toy_tgsi
*tgsi
, struct toy_inst
*inst
,
1939 enum tgsi_file_type file
, int index
,
1940 const struct tgsi_ind_register
*indirect
,
1941 const struct tgsi_dimension
*dimension
,
1942 const struct tgsi_ind_register
*dim_indirect
)
1947 /* src[0]: TGSI file */
1948 inst
->src
[num_src
++] = tsrc_imm_d(file
);
1950 /* src[1]: TGSI dimension */
1951 inst
->src
[num_src
++] = tsrc_imm_d((dimension
) ? dimension
->Index
: 0);
1953 /* src[2]: TGSI dimension indirection */
1955 const int vrf
= ra_map_reg(tgsi
, dim_indirect
->File
, 0,
1956 dim_indirect
->Index
, NULL
);
1958 src
= tsrc(TOY_FILE_VRF
, vrf
, 0);
1959 src
= tsrc_swizzle1(tsrc_d(src
), indirect
->Swizzle
);
1962 src
= tsrc_imm_d(0);
1965 inst
->src
[num_src
++] = src
;
1967 /* src[3]: TGSI index */
1968 inst
->src
[num_src
++] = tsrc_imm_d(index
);
1970 /* src[4]: TGSI index indirection */
1972 const int vrf
= ra_map_reg(tgsi
, indirect
->File
, 0,
1973 indirect
->Index
, NULL
);
1975 src
= tsrc(TOY_FILE_VRF
, vrf
, 0);
1976 src
= tsrc_swizzle1(tsrc_d(src
), indirect
->Swizzle
);
1979 src
= tsrc_imm_d(0);
1982 inst
->src
[num_src
++] = src
;
1987 static struct toy_src
1988 ra_get_src_indirect(struct toy_tgsi
*tgsi
,
1989 const struct tgsi_full_instruction
*tgsi_inst
,
1992 const struct tgsi_full_src_register
*s
= &tgsi_inst
->Src
[src_index
];
1993 bool need_vrf
= false, is_resource
= false;
1996 switch (s
->Register
.File
) {
1997 case TGSI_FILE_NULL
:
2000 case TGSI_FILE_SAMPLER
:
2001 case TGSI_FILE_RESOURCE
:
2002 case TGSI_FILE_SAMPLER_VIEW
:
2005 case TGSI_FILE_CONSTANT
:
2006 case TGSI_FILE_INPUT
:
2007 case TGSI_FILE_SYSTEM_VALUE
:
2008 case TGSI_FILE_TEMPORARY
:
2009 case TGSI_FILE_ADDRESS
:
2010 case TGSI_FILE_IMMEDIATE
:
2011 case TGSI_FILE_PREDICATE
:
2015 assert(!"unhandled src file");
2021 const enum toy_type type
= ra_get_type(tgsi
, tgsi_inst
, src_index
, false);
2025 assert(!s
->Register
.Dimension
);
2026 assert(s
->Register
.Indirect
);
2028 vrf
= ra_map_reg(tgsi
, s
->Indirect
.File
, 0, s
->Indirect
.Index
, NULL
);
2031 vrf
= ra_alloc_reg(tgsi
, s
->Register
.File
);
2034 src
= ra_get_src_for_vrf(s
, type
, vrf
);
2036 /* emit indirect fetch */
2038 struct toy_inst
*inst
;
2040 inst
= tc_add(tgsi
->tc
);
2041 inst
->opcode
= TOY_OPCODE_TGSI_INDIRECT_FETCH
;
2042 inst
->dst
= tdst_from(src
);
2043 inst
->dst
.writemask
= TOY_WRITEMASK_XYZW
;
2045 init_tgsi_reg(tgsi
, inst
, s
->Register
.File
, s
->Register
.Index
,
2046 (s
->Register
.Indirect
) ? &s
->Indirect
: NULL
,
2047 (s
->Register
.Dimension
) ? &s
->Dimension
: NULL
,
2048 (s
->Dimension
.Indirect
) ? &s
->DimIndirect
: NULL
);
2056 * Return the toy register for a TGSI source operand.
2058 static struct toy_src
2059 ra_get_src(struct toy_tgsi
*tgsi
,
2060 const struct tgsi_full_instruction
*tgsi_inst
,
2063 const struct tgsi_full_src_register
*s
= &tgsi_inst
->Src
[src_index
];
2064 bool need_vrf
= false;
2067 if (ra_is_src_indirect(s
))
2068 return ra_get_src_indirect(tgsi
, tgsi_inst
, src_index
);
2070 switch (s
->Register
.File
) {
2071 case TGSI_FILE_NULL
:
2074 case TGSI_FILE_CONSTANT
:
2075 case TGSI_FILE_INPUT
:
2076 case TGSI_FILE_SYSTEM_VALUE
:
2079 case TGSI_FILE_TEMPORARY
:
2080 case TGSI_FILE_ADDRESS
:
2081 case TGSI_FILE_PREDICATE
:
2084 case TGSI_FILE_SAMPLER
:
2085 case TGSI_FILE_RESOURCE
:
2086 case TGSI_FILE_SAMPLER_VIEW
:
2087 assert(!s
->Register
.Dimension
);
2088 src
= tsrc_imm_d(s
->Register
.Index
);
2090 case TGSI_FILE_IMMEDIATE
:
2092 const uint32_t *imm
;
2093 enum toy_type imm_type
;
2096 imm
= toy_tgsi_get_imm(tgsi
, s
->Register
.Index
, &imm_type
);
2099 (imm
[s
->Register
.SwizzleX
] == imm
[s
->Register
.SwizzleY
] &&
2100 imm
[s
->Register
.SwizzleX
] == imm
[s
->Register
.SwizzleZ
] &&
2101 imm
[s
->Register
.SwizzleX
] == imm
[s
->Register
.SwizzleW
]);
2104 const enum toy_type type
=
2105 ra_get_type(tgsi
, tgsi_inst
, src_index
, false);
2107 /* ignore imm_type */
2108 src
= tsrc_imm_ud(imm
[s
->Register
.SwizzleX
]);
2110 src
.absolute
= s
->Register
.Absolute
;
2111 src
.negate
= s
->Register
.Negate
;
2119 assert(!"unhandled src file");
2125 const enum toy_type type
= ra_get_type(tgsi
, tgsi_inst
, src_index
, false);
2129 vrf
= ra_map_reg(tgsi
, s
->Register
.File
,
2130 ra_src_dimension(s
), ra_src_index(s
), &is_new
);
2132 src
= ra_get_src_for_vrf(s
, type
, vrf
);
2135 switch (s
->Register
.File
) {
2136 case TGSI_FILE_TEMPORARY
:
2137 case TGSI_FILE_ADDRESS
:
2138 case TGSI_FILE_PREDICATE
:
2140 struct toy_dst dst
= tdst_from(src
);
2141 dst
.writemask
= TOY_WRITEMASK_XYZW
;
2143 /* always initialize registers before use */
2145 tc_MOV(tgsi
->tc
, dst
, tsrc_type(tsrc_imm_d(0), type
));
2148 struct toy_dst tdst
[4];
2151 tdst_transpose(dst
, tdst
);
2153 for (i
= 0; i
< 4; i
++) {
2154 tc_MOV(tgsi
->tc
, tdst
[i
],
2155 tsrc_type(tsrc_imm_d(0), type
));
2171 parse_instruction(struct toy_tgsi
*tgsi
,
2172 const struct tgsi_full_instruction
*tgsi_inst
)
2174 struct toy_dst dst
[TGSI_FULL_MAX_DST_REGISTERS
];
2175 struct toy_src src
[TGSI_FULL_MAX_SRC_REGISTERS
];
2176 bool dst_is_scratch
[TGSI_FULL_MAX_DST_REGISTERS
];
2177 toy_tgsi_translate translate
;
2180 /* convert TGSI registers to toy registers */
2181 for (i
= 0; i
< tgsi_inst
->Instruction
.NumSrcRegs
; i
++)
2182 src
[i
] = ra_get_src(tgsi
, tgsi_inst
, i
);
2183 for (i
= 0; i
< tgsi_inst
->Instruction
.NumDstRegs
; i
++)
2184 dst
[i
] = ra_get_dst(tgsi
, tgsi_inst
, i
, &dst_is_scratch
[i
]);
2186 /* translate the instruction */
2187 translate
= tgsi
->translate_table
[tgsi_inst
->Instruction
.Opcode
];
2188 translate(tgsi
->tc
, tgsi_inst
, dst
, src
);
2190 /* write the result to the real destinations if needed */
2191 for (i
= 0; i
< tgsi_inst
->Instruction
.NumDstRegs
; i
++) {
2192 const struct tgsi_full_dst_register
*d
= &tgsi_inst
->Dst
[i
];
2194 if (!dst_is_scratch
[i
])
2197 if (tgsi_inst
->Instruction
.Saturate
== TGSI_SAT_MINUS_PLUS_ONE
)
2198 tc_fail(tgsi
->tc
, "TGSI_SAT_MINUS_PLUS_ONE unhandled");
2200 tgsi
->tc
->templ
.saturate
= tgsi_inst
->Instruction
.Saturate
;
2202 /* emit indirect store */
2203 if (ra_dst_is_indirect(d
)) {
2204 struct toy_inst
*inst
;
2206 inst
= tc_add(tgsi
->tc
);
2207 inst
->opcode
= TOY_OPCODE_TGSI_INDIRECT_STORE
;
2210 init_tgsi_reg(tgsi
, inst
, d
->Register
.File
, d
->Register
.Index
,
2211 (d
->Register
.Indirect
) ? &d
->Indirect
: NULL
,
2212 (d
->Register
.Dimension
) ? &d
->Dimension
: NULL
,
2213 (d
->Dimension
.Indirect
) ? &d
->DimIndirect
: NULL
);
2216 const enum toy_type type
= ra_get_type(tgsi
, tgsi_inst
, i
, true);
2217 struct toy_dst real_dst
;
2220 vrf
= ra_map_reg(tgsi
, d
->Register
.File
,
2221 ra_dst_dimension(d
), ra_dst_index(d
), NULL
);
2222 real_dst
= tdst_full(TOY_FILE_VRF
, type
, TOY_RECT_LINEAR
,
2223 false, 0, d
->Register
.WriteMask
, vrf
* TOY_REG_WIDTH
);
2226 tc_MOV(tgsi
->tc
, real_dst
, tsrc_from(dst
[i
]));
2229 struct toy_dst tdst
[4];
2230 struct toy_src tsrc
[4];
2233 tdst_transpose(real_dst
, tdst
);
2234 tsrc_transpose(tsrc_from(dst
[i
]), tsrc
);
2236 for (j
= 0; j
< 4; j
++)
2237 tc_MOV(tgsi
->tc
, tdst
[j
], tsrc
[j
]);
2241 tgsi
->tc
->templ
.saturate
= false;
2244 switch (tgsi_inst
->Instruction
.Opcode
) {
2245 case TGSI_OPCODE_KILL_IF
:
2246 case TGSI_OPCODE_KILL
:
2247 tgsi
->uses_kill
= true;
2251 for (i
= 0; i
< tgsi_inst
->Instruction
.NumSrcRegs
; i
++) {
2252 const struct tgsi_full_src_register
*s
= &tgsi_inst
->Src
[i
];
2253 if (s
->Register
.File
== TGSI_FILE_CONSTANT
&& s
->Register
.Indirect
)
2254 tgsi
->const_indirect
= true;
2257 /* remember channels written */
2258 for (i
= 0; i
< tgsi_inst
->Instruction
.NumDstRegs
; i
++) {
2259 const struct tgsi_full_dst_register
*d
= &tgsi_inst
->Dst
[i
];
2261 if (d
->Register
.File
!= TGSI_FILE_OUTPUT
)
2263 for (i
= 0; i
< tgsi
->num_outputs
; i
++) {
2264 if (tgsi
->outputs
[i
].index
== d
->Register
.Index
) {
2265 tgsi
->outputs
[i
].undefined_mask
&= ~d
->Register
.WriteMask
;
2273 decl_add_in(struct toy_tgsi
*tgsi
, const struct tgsi_full_declaration
*decl
)
2275 static const struct tgsi_declaration_interp default_interp
= {
2276 TGSI_INTERPOLATE_PERSPECTIVE
, false, 0,
2278 const struct tgsi_declaration_interp
*interp
=
2279 (decl
->Declaration
.Interpolate
) ? &decl
->Interp
: &default_interp
;
2282 if (decl
->Range
.Last
>= Elements(tgsi
->inputs
)) {
2283 assert(!"invalid IN");
2287 for (index
= decl
->Range
.First
; index
<= decl
->Range
.Last
; index
++) {
2288 const int slot
= tgsi
->num_inputs
++;
2290 tgsi
->inputs
[slot
].index
= index
;
2291 tgsi
->inputs
[slot
].usage_mask
= decl
->Declaration
.UsageMask
;
2292 if (decl
->Declaration
.Semantic
) {
2293 tgsi
->inputs
[slot
].semantic_name
= decl
->Semantic
.Name
;
2294 tgsi
->inputs
[slot
].semantic_index
= decl
->Semantic
.Index
;
2297 tgsi
->inputs
[slot
].semantic_name
= TGSI_SEMANTIC_GENERIC
;
2298 tgsi
->inputs
[slot
].semantic_index
= index
;
2300 tgsi
->inputs
[slot
].interp
= interp
->Interpolate
;
2301 tgsi
->inputs
[slot
].centroid
= interp
->Centroid
;
2306 decl_add_out(struct toy_tgsi
*tgsi
, const struct tgsi_full_declaration
*decl
)
2310 if (decl
->Range
.Last
>= Elements(tgsi
->outputs
)) {
2311 assert(!"invalid OUT");
2315 assert(decl
->Declaration
.Semantic
);
2317 for (index
= decl
->Range
.First
; index
<= decl
->Range
.Last
; index
++) {
2318 const int slot
= tgsi
->num_outputs
++;
2320 tgsi
->outputs
[slot
].index
= index
;
2321 tgsi
->outputs
[slot
].undefined_mask
= TOY_WRITEMASK_XYZW
;
2322 tgsi
->outputs
[slot
].usage_mask
= decl
->Declaration
.UsageMask
;
2323 tgsi
->outputs
[slot
].semantic_name
= decl
->Semantic
.Name
;
2324 tgsi
->outputs
[slot
].semantic_index
= decl
->Semantic
.Index
;
2329 decl_add_sv(struct toy_tgsi
*tgsi
, const struct tgsi_full_declaration
*decl
)
2333 if (decl
->Range
.Last
>= Elements(tgsi
->system_values
)) {
2334 assert(!"invalid SV");
2338 for (index
= decl
->Range
.First
; index
<= decl
->Range
.Last
; index
++) {
2339 const int slot
= tgsi
->num_system_values
++;
2341 tgsi
->system_values
[slot
].index
= index
;
2342 if (decl
->Declaration
.Semantic
) {
2343 tgsi
->system_values
[slot
].semantic_name
= decl
->Semantic
.Name
;
2344 tgsi
->system_values
[slot
].semantic_index
= decl
->Semantic
.Index
;
2347 tgsi
->system_values
[slot
].semantic_name
= TGSI_SEMANTIC_GENERIC
;
2348 tgsi
->system_values
[slot
].semantic_index
= index
;
2354 * Emit an instruction to fetch the value of a TGSI register.
2357 fetch_source(struct toy_tgsi
*tgsi
, enum tgsi_file_type file
, int dim
, int idx
)
2361 enum toy_opcode opcode
;
2362 enum toy_type type
= TOY_TYPE_F
;
2365 case TGSI_FILE_INPUT
:
2366 opcode
= TOY_OPCODE_TGSI_IN
;
2368 case TGSI_FILE_CONSTANT
:
2369 opcode
= TOY_OPCODE_TGSI_CONST
;
2371 case TGSI_FILE_SYSTEM_VALUE
:
2372 opcode
= TOY_OPCODE_TGSI_SV
;
2374 case TGSI_FILE_IMMEDIATE
:
2375 opcode
= TOY_OPCODE_TGSI_IMM
;
2376 toy_tgsi_get_imm(tgsi
, idx
, &type
);
2379 /* no need to fetch */
2384 vrf
= ra_map_reg(tgsi
, file
, dim
, idx
, NULL
);
2385 dst
= tdst(TOY_FILE_VRF
, vrf
, 0);
2386 dst
= tdst_type(dst
, type
);
2388 tc_add2(tgsi
->tc
, opcode
, dst
, tsrc_imm_d(dim
), tsrc_imm_d(idx
));
2392 parse_declaration(struct toy_tgsi
*tgsi
,
2393 const struct tgsi_full_declaration
*decl
)
2397 switch (decl
->Declaration
.File
) {
2398 case TGSI_FILE_INPUT
:
2399 decl_add_in(tgsi
, decl
);
2401 case TGSI_FILE_OUTPUT
:
2402 decl_add_out(tgsi
, decl
);
2404 case TGSI_FILE_SYSTEM_VALUE
:
2405 decl_add_sv(tgsi
, decl
);
2407 case TGSI_FILE_IMMEDIATE
:
2408 /* immediates should be declared with TGSI_TOKEN_TYPE_IMMEDIATE */
2409 assert(!"unexpected immediate declaration");
2411 case TGSI_FILE_CONSTANT
:
2412 if (tgsi
->const_count
<= decl
->Range
.Last
)
2413 tgsi
->const_count
= decl
->Range
.Last
+ 1;
2415 case TGSI_FILE_NULL
:
2416 case TGSI_FILE_TEMPORARY
:
2417 case TGSI_FILE_SAMPLER
:
2418 case TGSI_FILE_PREDICATE
:
2419 case TGSI_FILE_ADDRESS
:
2420 case TGSI_FILE_RESOURCE
:
2421 case TGSI_FILE_SAMPLER_VIEW
:
2425 assert(!"unhandled TGSI file");
2429 /* fetch the registers now */
2430 for (i
= decl
->Range
.First
; i
<= decl
->Range
.Last
; i
++) {
2431 const int dim
= (decl
->Declaration
.Dimension
) ? decl
->Dim
.Index2D
: 0;
2432 fetch_source(tgsi
, decl
->Declaration
.File
, dim
, i
);
2437 add_imm(struct toy_tgsi
*tgsi
, enum toy_type type
, const uint32_t *buf
)
2439 /* reallocate the buffer if necessary */
2440 if (tgsi
->imm_data
.cur
>= tgsi
->imm_data
.size
) {
2441 const int cur_size
= tgsi
->imm_data
.size
;
2443 enum toy_type
*new_types
;
2444 uint32_t (*new_buf
)[4];
2446 new_size
= (cur_size
) ? cur_size
<< 1 : 16;
2447 while (new_size
<= tgsi
->imm_data
.cur
)
2450 new_buf
= REALLOC(tgsi
->imm_data
.buf
,
2451 cur_size
* sizeof(new_buf
[0]),
2452 new_size
* sizeof(new_buf
[0]));
2453 new_types
= REALLOC(tgsi
->imm_data
.types
,
2454 cur_size
* sizeof(new_types
[0]),
2455 new_size
* sizeof(new_types
[0]));
2456 if (!new_buf
|| !new_types
) {
2464 tgsi
->imm_data
.buf
= new_buf
;
2465 tgsi
->imm_data
.types
= new_types
;
2466 tgsi
->imm_data
.size
= new_size
;
2469 tgsi
->imm_data
.types
[tgsi
->imm_data
.cur
] = type
;
2470 memcpy(&tgsi
->imm_data
.buf
[tgsi
->imm_data
.cur
],
2471 buf
, sizeof(tgsi
->imm_data
.buf
[0]));
2473 return tgsi
->imm_data
.cur
++;
2477 parse_immediate(struct toy_tgsi
*tgsi
, const struct tgsi_full_immediate
*imm
)
2480 uint32_t imm_buf
[4];
2483 switch (imm
->Immediate
.DataType
) {
2484 case TGSI_IMM_FLOAT32
:
2486 imm_buf
[0] = fui(imm
->u
[0].Float
);
2487 imm_buf
[1] = fui(imm
->u
[1].Float
);
2488 imm_buf
[2] = fui(imm
->u
[2].Float
);
2489 imm_buf
[3] = fui(imm
->u
[3].Float
);
2491 case TGSI_IMM_INT32
:
2493 imm_buf
[0] = (uint32_t) imm
->u
[0].Int
;
2494 imm_buf
[1] = (uint32_t) imm
->u
[1].Int
;
2495 imm_buf
[2] = (uint32_t) imm
->u
[2].Int
;
2496 imm_buf
[3] = (uint32_t) imm
->u
[3].Int
;
2498 case TGSI_IMM_UINT32
:
2500 imm_buf
[0] = imm
->u
[0].Uint
;
2501 imm_buf
[1] = imm
->u
[1].Uint
;
2502 imm_buf
[2] = imm
->u
[2].Uint
;
2503 imm_buf
[3] = imm
->u
[3].Uint
;
2506 assert(!"unhandled TGSI imm type");
2508 memset(imm_buf
, 0, sizeof(imm_buf
));
2512 idx
= add_imm(tgsi
, type
, imm_buf
);
2514 fetch_source(tgsi
, TGSI_FILE_IMMEDIATE
, 0, idx
);
2516 tc_fail(tgsi
->tc
, "failed to add TGSI imm");
2520 parse_property(struct toy_tgsi
*tgsi
, const struct tgsi_full_property
*prop
)
2522 switch (prop
->Property
.PropertyName
) {
2523 case TGSI_PROPERTY_VS_PROHIBIT_UCPS
:
2524 tgsi
->props
.vs_prohibit_ucps
= prop
->u
[0].Data
;
2526 case TGSI_PROPERTY_FS_COORD_ORIGIN
:
2527 tgsi
->props
.fs_coord_origin
= prop
->u
[0].Data
;
2529 case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER
:
2530 tgsi
->props
.fs_coord_pixel_center
= prop
->u
[0].Data
;
2532 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS
:
2533 tgsi
->props
.fs_color0_writes_all_cbufs
= prop
->u
[0].Data
;
2535 case TGSI_PROPERTY_FS_DEPTH_LAYOUT
:
2536 tgsi
->props
.fs_depth_layout
= prop
->u
[0].Data
;
2538 case TGSI_PROPERTY_GS_INPUT_PRIM
:
2539 tgsi
->props
.gs_input_prim
= prop
->u
[0].Data
;
2541 case TGSI_PROPERTY_GS_OUTPUT_PRIM
:
2542 tgsi
->props
.gs_output_prim
= prop
->u
[0].Data
;
2544 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES
:
2545 tgsi
->props
.gs_max_output_vertices
= prop
->u
[0].Data
;
2548 assert(!"unhandled TGSI property");
2554 parse_token(struct toy_tgsi
*tgsi
, const union tgsi_full_token
*token
)
2556 switch (token
->Token
.Type
) {
2557 case TGSI_TOKEN_TYPE_DECLARATION
:
2558 parse_declaration(tgsi
, &token
->FullDeclaration
);
2560 case TGSI_TOKEN_TYPE_IMMEDIATE
:
2561 parse_immediate(tgsi
, &token
->FullImmediate
);
2563 case TGSI_TOKEN_TYPE_INSTRUCTION
:
2564 parse_instruction(tgsi
, &token
->FullInstruction
);
2566 case TGSI_TOKEN_TYPE_PROPERTY
:
2567 parse_property(tgsi
, &token
->FullProperty
);
2570 assert(!"unhandled TGSI token type");
2575 static enum pipe_error
2576 dump_reg_mapping(void *key
, void *val
, void *data
)
2578 int tgsi_file
, tgsi_dim
, tgsi_index
;
2581 sig
= (uint32_t) pointer_to_intptr(key
);
2582 vrf
= (uint32_t) pointer_to_intptr(val
);
2584 /* see ra_get_map_key() */
2585 tgsi_file
= (sig
>> 28) & 0xf;
2586 tgsi_dim
= (sig
>> 16) & 0xfff;
2587 tgsi_index
= (sig
>> 0) & 0xffff;
2590 ilo_printf(" v%d:\t%s[%d][%d]\n", vrf
,
2591 tgsi_file_name(tgsi_file
), tgsi_dim
, tgsi_index
);
2594 ilo_printf(" v%d:\t%s[%d]\n", vrf
,
2595 tgsi_file_name(tgsi_file
), tgsi_index
);
2602 * Dump the TGSI translator, currently only the register mapping.
2605 toy_tgsi_dump(const struct toy_tgsi
*tgsi
)
2607 util_hash_table_foreach(tgsi
->reg_mapping
, dump_reg_mapping
, NULL
);
2611 * Clean up the TGSI translator.
2614 toy_tgsi_cleanup(struct toy_tgsi
*tgsi
)
2616 FREE(tgsi
->imm_data
.buf
);
2617 FREE(tgsi
->imm_data
.types
);
2619 util_hash_table_destroy(tgsi
->reg_mapping
);
2623 reg_mapping_hash(void *key
)
2625 return (unsigned) pointer_to_intptr(key
);
2629 reg_mapping_compare(void *key1
, void *key2
)
2631 return (key1
!= key2
);
2635 * Initialize the TGSI translator.
2638 init_tgsi(struct toy_tgsi
*tgsi
, struct toy_compiler
*tc
, bool aos
)
2640 memset(tgsi
, 0, sizeof(*tgsi
));
2644 tgsi
->translate_table
= (aos
) ? aos_translate_table
: soa_translate_table
;
2646 /* create a mapping of TGSI registers to VRF reigsters */
2648 util_hash_table_create(reg_mapping_hash
, reg_mapping_compare
);
2650 return (tgsi
->reg_mapping
!= NULL
);
2654 * Translate TGSI tokens into toy instructions.
2657 toy_compiler_translate_tgsi(struct toy_compiler
*tc
,
2658 const struct tgsi_token
*tokens
, bool aos
,
2659 struct toy_tgsi
*tgsi
)
2661 struct tgsi_parse_context parse
;
2663 if (!init_tgsi(tgsi
, tc
, aos
)) {
2664 tc_fail(tc
, "failed to initialize TGSI translator");
2668 tgsi_parse_init(&parse
, tokens
);
2669 while (!tgsi_parse_end_of_tokens(&parse
)) {
2670 tgsi_parse_token(&parse
);
2671 parse_token(tgsi
, &parse
.FullToken
);
2673 tgsi_parse_free(&parse
);
2677 * Map the TGSI register to VRF register.
2680 toy_tgsi_get_vrf(const struct toy_tgsi
*tgsi
,
2681 enum tgsi_file_type file
, int dimension
, int index
)
2685 key
= ra_get_map_key(file
, dimension
, index
);
2687 val
= util_hash_table_get(tgsi
->reg_mapping
, key
);
2689 return (val
) ? pointer_to_intptr(val
) : -1;