2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Rob Clark <robclark@freedesktop.org>
27 #include "tgsi/tgsi_transform.h"
28 #include "tgsi/tgsi_scan.h"
29 #include "tgsi/tgsi_dump.h"
31 #include "util/u_debug.h"
32 #include "util/u_math.h"
34 #include "tgsi_lowering.h"
36 struct tgsi_lowering_context
{
37 struct tgsi_transform_context base
;
38 const struct tgsi_lowering_config
*config
;
39 struct tgsi_shader_info
*info
;
40 unsigned two_side_colors
;
41 unsigned two_side_idx
[PIPE_MAX_SHADER_INPUTS
];
42 unsigned color_base
; /* base register for chosen COLOR/BCOLOR's */
46 struct tgsi_full_src_register src
;
47 struct tgsi_full_dst_register dst
;
51 struct tgsi_full_src_register imm
;
56 static inline struct tgsi_lowering_context
*
57 tgsi_lowering_context(struct tgsi_transform_context
*tctx
)
59 return (struct tgsi_lowering_context
*)tctx
;
67 reg_dst(struct tgsi_full_dst_register
*dst
,
68 const struct tgsi_full_dst_register
*orig_dst
, unsigned wrmask
)
71 dst
->Register
.WriteMask
&= wrmask
;
72 assert(dst
->Register
.WriteMask
);
76 get_swiz(unsigned *swiz
, const struct tgsi_src_register
*src
)
78 swiz
[0] = src
->SwizzleX
;
79 swiz
[1] = src
->SwizzleY
;
80 swiz
[2] = src
->SwizzleZ
;
81 swiz
[3] = src
->SwizzleW
;
85 reg_src(struct tgsi_full_src_register
*src
,
86 const struct tgsi_full_src_register
*orig_src
,
87 unsigned sx
, unsigned sy
, unsigned sz
, unsigned sw
)
90 get_swiz(swiz
, &orig_src
->Register
);
92 src
->Register
.SwizzleX
= swiz
[sx
];
93 src
->Register
.SwizzleY
= swiz
[sy
];
94 src
->Register
.SwizzleZ
= swiz
[sz
];
95 src
->Register
.SwizzleW
= swiz
[sw
];
98 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */
99 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \
100 TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
103 * if (dst.x aliases src.x) {
109 * MOV dst.zw, imm{0.0, 1.0}
112 aliases(const struct tgsi_full_dst_register
*dst
, unsigned dst_mask
,
113 const struct tgsi_full_src_register
*src
, unsigned src_mask
)
115 if ((dst
->Register
.File
== src
->Register
.File
) &&
116 (dst
->Register
.Index
== src
->Register
.Index
)) {
117 unsigned i
, actual_mask
= 0;
119 get_swiz(swiz
, &src
->Register
);
120 for (i
= 0; i
< 4; i
++)
121 if (src_mask
& (1 << i
))
122 actual_mask
|= (1 << swiz
[i
]);
123 if (actual_mask
& dst_mask
)
130 create_mov(struct tgsi_transform_context
*tctx
,
131 const struct tgsi_full_dst_register
*dst
,
132 const struct tgsi_full_src_register
*src
,
133 unsigned mask
, unsigned saturate
)
135 struct tgsi_full_instruction new_inst
;
137 new_inst
= tgsi_default_full_instruction();
138 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
139 new_inst
.Instruction
.Saturate
= saturate
;
140 new_inst
.Instruction
.NumDstRegs
= 1;
141 reg_dst(&new_inst
.Dst
[0], dst
, mask
);
142 new_inst
.Instruction
.NumSrcRegs
= 1;
143 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, Y
, Z
, W
));
144 tctx
->emit_instruction(tctx
, &new_inst
);
147 /* to help calculate # of tgsi tokens for a lowering.. we assume
148 * the worst case, ie. removed instructions don't have ADDR[] or
149 * anything which increases the # of tokens per src/dst and the
150 * inserted instructions do.
152 * OINST() - old instruction
153 * 1 : instruction itself
157 * NINST() - new instruction
158 * 1 : instruction itself
163 #define OINST(nargs) (1 + 1 + 1 * (nargs))
164 #define NINST(nargs) (1 + 2 + 2 * (nargs))
167 * Lowering Translators:
170 /* DST - Distance Vector
172 * dst.y = src0.y \times src1.y
176 * ; note: could be more clever and use just a single temp
177 * ; if I was clever enough to re-write the swizzles.
178 * ; needs: 2 tmp, imm{1.0}
179 * if (dst.y aliases src0.z) {
180 * MOV tmpA.yz, src0.yz
183 * if (dst.yz aliases src1.w) {
184 * MOV tmpB.yw, src1.yw
187 * MUL dst.y, src0.y, src1.y
190 * MOV dst.x, imm{1.0}
192 #define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
193 NINST(1) + NINST(1) - OINST(2))
196 transform_dst(struct tgsi_transform_context
*tctx
,
197 struct tgsi_full_instruction
*inst
)
199 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
200 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
201 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
202 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
203 struct tgsi_full_instruction new_inst
;
205 if (aliases(dst
, TGSI_WRITEMASK_Y
, src0
, TGSI_WRITEMASK_Z
)) {
206 create_mov(tctx
, &ctx
->tmp
[A
].dst
, src0
, TGSI_WRITEMASK_YZ
, 0);
207 src0
= &ctx
->tmp
[A
].src
;
210 if (aliases(dst
, TGSI_WRITEMASK_YZ
, src1
, TGSI_WRITEMASK_W
)) {
211 create_mov(tctx
, &ctx
->tmp
[B
].dst
, src1
, TGSI_WRITEMASK_YW
, 0);
212 src1
= &ctx
->tmp
[B
].src
;
215 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
216 /* MUL dst.y, src0.y, src1.y */
217 new_inst
= tgsi_default_full_instruction();
218 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
219 new_inst
.Instruction
.NumDstRegs
= 1;
220 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Y
);
221 new_inst
.Instruction
.NumSrcRegs
= 2;
222 reg_src(&new_inst
.Src
[0], src0
, SWIZ(_
, Y
, _
, _
));
223 reg_src(&new_inst
.Src
[1], src1
, SWIZ(_
, Y
, _
, _
));
224 tctx
->emit_instruction(tctx
, &new_inst
);
227 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
228 /* MOV dst.z, src0.z */
229 new_inst
= tgsi_default_full_instruction();
230 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
231 new_inst
.Instruction
.NumDstRegs
= 1;
232 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Z
);
233 new_inst
.Instruction
.NumSrcRegs
= 1;
234 reg_src(&new_inst
.Src
[0], src0
, SWIZ(_
, _
, Z
, _
));
235 tctx
->emit_instruction(tctx
, &new_inst
);
238 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_W
) {
239 /* MOV dst.w, src1.w */
240 new_inst
= tgsi_default_full_instruction();
241 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
242 new_inst
.Instruction
.NumDstRegs
= 1;
243 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_W
);
244 new_inst
.Instruction
.NumSrcRegs
= 1;
245 reg_src(&new_inst
.Src
[0], src1
, SWIZ(_
, _
, _
, W
));
246 tctx
->emit_instruction(tctx
, &new_inst
);
249 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_X
) {
250 /* MOV dst.x, imm{1.0} */
251 new_inst
= tgsi_default_full_instruction();
252 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
253 new_inst
.Instruction
.NumDstRegs
= 1;
254 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_X
);
255 new_inst
.Instruction
.NumSrcRegs
= 1;
256 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(Y
, _
, _
, _
));
257 tctx
->emit_instruction(tctx
, &new_inst
);
261 /* XPD - Cross Product
262 * dst.x = src0.y \times src1.z - src1.y \times src0.z
263 * dst.y = src0.z \times src1.x - src1.z \times src0.x
264 * dst.z = src0.x \times src1.y - src1.x \times src0.y
267 * ; needs: 1 tmp, imm{1.0}
268 * MUL tmpA.xyz, src1.yzx, src0.zxy
269 * MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz
270 * MOV dst.w, imm{1.0}
272 #define XPD_GROW (NINST(2) + NINST(3) + NINST(1) - OINST(2))
275 transform_xpd(struct tgsi_transform_context
*tctx
,
276 struct tgsi_full_instruction
*inst
)
278 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
279 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
280 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
281 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
282 struct tgsi_full_instruction new_inst
;
284 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZ
) {
285 /* MUL tmpA.xyz, src1.yzx, src0.zxy */
286 new_inst
= tgsi_default_full_instruction();
287 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
288 new_inst
.Instruction
.NumDstRegs
= 1;
289 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XYZ
);
290 new_inst
.Instruction
.NumSrcRegs
= 2;
291 reg_src(&new_inst
.Src
[0], src1
, SWIZ(Y
, Z
, X
, _
));
292 reg_src(&new_inst
.Src
[1], src0
, SWIZ(Z
, X
, Y
, _
));
293 tctx
->emit_instruction(tctx
, &new_inst
);
295 /* MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz */
296 new_inst
= tgsi_default_full_instruction();
297 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAD
;
298 new_inst
.Instruction
.NumDstRegs
= 1;
299 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZ
);
300 new_inst
.Instruction
.NumSrcRegs
= 3;
301 reg_src(&new_inst
.Src
[0], src0
, SWIZ(Y
, Z
, X
, _
));
302 reg_src(&new_inst
.Src
[1], src1
, SWIZ(Z
, X
, Y
, _
));
303 reg_src(&new_inst
.Src
[2], &ctx
->tmp
[A
].src
, SWIZ(X
, Y
, Z
, _
));
304 new_inst
.Src
[2].Register
.Negate
= true;
305 tctx
->emit_instruction(tctx
, &new_inst
);
308 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_W
) {
309 /* MOV dst.w, imm{1.0} */
310 new_inst
= tgsi_default_full_instruction();
311 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
312 new_inst
.Instruction
.NumDstRegs
= 1;
313 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_W
);
314 new_inst
.Instruction
.NumSrcRegs
= 1;
315 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(_
, _
, _
, Y
));
316 tctx
->emit_instruction(tctx
, &new_inst
);
321 * dst.x = \cos{src.x}
322 * dst.y = \sin{src.x}
326 * ; needs: 1 tmp, imm{0.0, 1.0}
327 * if (dst.x aliases src.x) {
333 * MOV dst.zw, imm{0.0, 1.0}
335 #define SCS_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) - OINST(1))
338 transform_scs(struct tgsi_transform_context
*tctx
,
339 struct tgsi_full_instruction
*inst
)
341 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
342 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
343 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
344 struct tgsi_full_instruction new_inst
;
346 if (aliases(dst
, TGSI_WRITEMASK_X
, src
, TGSI_WRITEMASK_X
)) {
347 create_mov(tctx
, &ctx
->tmp
[A
].dst
, src
, TGSI_WRITEMASK_X
, 0);
348 src
= &ctx
->tmp
[A
].src
;
351 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_X
) {
352 /* COS dst.x, src.x */
353 new_inst
= tgsi_default_full_instruction();
354 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_COS
;
355 new_inst
.Instruction
.NumDstRegs
= 1;
356 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_X
);
357 new_inst
.Instruction
.NumSrcRegs
= 1;
358 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, _
, _
, _
));
359 tctx
->emit_instruction(tctx
, &new_inst
);
362 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
363 /* SIN dst.y, src.x */
364 new_inst
= tgsi_default_full_instruction();
365 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_SIN
;
366 new_inst
.Instruction
.NumDstRegs
= 1;
367 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Y
);
368 new_inst
.Instruction
.NumSrcRegs
= 1;
369 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, _
, _
, _
));
370 tctx
->emit_instruction(tctx
, &new_inst
);
373 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_ZW
) {
374 /* MOV dst.zw, imm{0.0, 1.0} */
375 new_inst
= tgsi_default_full_instruction();
376 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
377 new_inst
.Instruction
.NumDstRegs
= 1;
378 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_ZW
);
379 new_inst
.Instruction
.NumSrcRegs
= 1;
380 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(_
, _
, X
, Y
));
381 tctx
->emit_instruction(tctx
, &new_inst
);
385 /* LRP - Linear Interpolate
386 * dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
387 * dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
388 * dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
389 * dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
391 * This becomes: src0 \times src1 + src2 - src0 \times src2, which
392 * can then become: src0 \times src1 - (src0 \times src2 - src2)
395 * MAD tmpA, src0, src2, -src2
396 * MAD dst, src0, src1, -tmpA
398 #define LRP_GROW (NINST(3) + NINST(3) - OINST(3))
401 transform_lrp(struct tgsi_transform_context
*tctx
,
402 struct tgsi_full_instruction
*inst
)
404 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
405 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
406 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
407 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
408 struct tgsi_full_src_register
*src2
= &inst
->Src
[2];
409 struct tgsi_full_instruction new_inst
;
411 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
412 /* MAD tmpA, src0, src2, -src2 */
413 new_inst
= tgsi_default_full_instruction();
414 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAD
;
415 new_inst
.Instruction
.NumDstRegs
= 1;
416 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XYZW
);
417 new_inst
.Instruction
.NumSrcRegs
= 3;
418 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
, Y
, Z
, W
));
419 reg_src(&new_inst
.Src
[1], src2
, SWIZ(X
, Y
, Z
, W
));
420 reg_src(&new_inst
.Src
[2], src2
, SWIZ(X
, Y
, Z
, W
));
421 new_inst
.Src
[2].Register
.Negate
= !new_inst
.Src
[2].Register
.Negate
;
422 tctx
->emit_instruction(tctx
, &new_inst
);
424 /* MAD dst, src0, src1, -tmpA */
425 new_inst
= tgsi_default_full_instruction();
426 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAD
;
427 new_inst
.Instruction
.NumDstRegs
= 1;
428 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
429 new_inst
.Instruction
.NumSrcRegs
= 3;
430 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
, Y
, Z
, W
));
431 reg_src(&new_inst
.Src
[1], src1
, SWIZ(X
, Y
, Z
, W
));
432 reg_src(&new_inst
.Src
[2], &ctx
->tmp
[A
].src
, SWIZ(X
, Y
, Z
, W
));
433 new_inst
.Src
[2].Register
.Negate
= true;
434 tctx
->emit_instruction(tctx
, &new_inst
);
439 * dst.x = src.x - \lfloor src.x\rfloor
440 * dst.y = src.y - \lfloor src.y\rfloor
441 * dst.z = src.z - \lfloor src.z\rfloor
442 * dst.w = src.w - \lfloor src.w\rfloor
448 #define FRC_GROW (NINST(1) + NINST(2) - OINST(1))
451 transform_frc(struct tgsi_transform_context
*tctx
,
452 struct tgsi_full_instruction
*inst
)
454 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
455 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
456 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
457 struct tgsi_full_instruction new_inst
;
459 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
461 new_inst
= tgsi_default_full_instruction();
462 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FLR
;
463 new_inst
.Instruction
.NumDstRegs
= 1;
464 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XYZW
);
465 new_inst
.Instruction
.NumSrcRegs
= 1;
466 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, Y
, Z
, W
));
467 tctx
->emit_instruction(tctx
, &new_inst
);
469 /* SUB dst, src, tmpA */
470 new_inst
= tgsi_default_full_instruction();
471 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
472 new_inst
.Instruction
.NumDstRegs
= 1;
473 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
474 new_inst
.Instruction
.NumSrcRegs
= 2;
475 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, Y
, Z
, W
));
476 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
, Y
, Z
, W
));
477 new_inst
.Src
[1].Register
.Negate
= 1;
478 tctx
->emit_instruction(tctx
, &new_inst
);
483 * dst.x = src0.x^{src1.x}
484 * dst.y = src0.x^{src1.x}
485 * dst.z = src0.x^{src1.x}
486 * dst.w = src0.x^{src1.x}
490 * MUL tmpA.x, src1.x, tmpA.x
493 #define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2))
496 transform_pow(struct tgsi_transform_context
*tctx
,
497 struct tgsi_full_instruction
*inst
)
499 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
500 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
501 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
502 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
503 struct tgsi_full_instruction new_inst
;
505 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
506 /* LG2 tmpA.x, src0.x */
507 new_inst
= tgsi_default_full_instruction();
508 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_LG2
;
509 new_inst
.Instruction
.NumDstRegs
= 1;
510 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
511 new_inst
.Instruction
.NumSrcRegs
= 1;
512 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
, _
, _
, _
));
513 tctx
->emit_instruction(tctx
, &new_inst
);
515 /* MUL tmpA.x, src1.x, tmpA.x */
516 new_inst
= tgsi_default_full_instruction();
517 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
518 new_inst
.Instruction
.NumDstRegs
= 1;
519 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
520 new_inst
.Instruction
.NumSrcRegs
= 2;
521 reg_src(&new_inst
.Src
[0], src1
, SWIZ(X
, _
, _
, _
));
522 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
, _
, _
, _
));
523 tctx
->emit_instruction(tctx
, &new_inst
);
525 /* EX2 dst, tmpA.x */
526 new_inst
= tgsi_default_full_instruction();
527 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
528 new_inst
.Instruction
.NumDstRegs
= 1;
529 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
530 new_inst
.Instruction
.NumSrcRegs
= 1;
531 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
, _
, _
, _
));
532 tctx
->emit_instruction(tctx
, &new_inst
);
536 /* LIT - Light Coefficients
538 * dst.y = max(src.x, 0.0)
539 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
542 * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
543 * MAX tmpA.xy, src.xy, imm{0.0}
544 * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
546 * MUL tmpA.y, tmpA.z, tmpA.y
548 * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
549 * MOV dst.yz, tmpA.xy
550 * MOV dst.xw, imm{1.0}
552 #define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \
553 NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1))
556 transform_lit(struct tgsi_transform_context
*tctx
,
557 struct tgsi_full_instruction
*inst
)
559 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
560 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
561 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
562 struct tgsi_full_instruction new_inst
;
564 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_YZ
) {
565 /* MAX tmpA.xy, src.xy, imm{0.0} */
566 new_inst
= tgsi_default_full_instruction();
567 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAX
;
568 new_inst
.Instruction
.NumDstRegs
= 1;
569 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XY
);
570 new_inst
.Instruction
.NumSrcRegs
= 2;
571 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, Y
, _
, _
));
572 reg_src(&new_inst
.Src
[1], &ctx
->imm
, SWIZ(X
, X
, _
, _
));
573 tctx
->emit_instruction(tctx
, &new_inst
);
575 /* CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} */
576 new_inst
= tgsi_default_full_instruction();
577 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_CLAMP
;
578 new_inst
.Instruction
.NumDstRegs
= 1;
579 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Z
);
580 new_inst
.Instruction
.NumSrcRegs
= 3;
581 reg_src(&new_inst
.Src
[0], src
, SWIZ(_
, _
, W
, _
));
582 reg_src(&new_inst
.Src
[1], &ctx
->imm
, SWIZ(_
, _
, Z
, _
));
583 new_inst
.Src
[1].Register
.Negate
= true;
584 reg_src(&new_inst
.Src
[2], &ctx
->imm
, SWIZ(_
, _
, Z
, _
));
585 tctx
->emit_instruction(tctx
, &new_inst
);
587 /* LG2 tmpA.y, tmpA.y */
588 new_inst
= tgsi_default_full_instruction();
589 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_LG2
;
590 new_inst
.Instruction
.NumDstRegs
= 1;
591 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
592 new_inst
.Instruction
.NumSrcRegs
= 1;
593 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Y
, _
, _
, _
));
594 tctx
->emit_instruction(tctx
, &new_inst
);
596 /* MUL tmpA.y, tmpA.z, tmpA.y */
597 new_inst
= tgsi_default_full_instruction();
598 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
599 new_inst
.Instruction
.NumDstRegs
= 1;
600 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
601 new_inst
.Instruction
.NumSrcRegs
= 2;
602 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
, Z
, _
, _
));
603 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(_
, Y
, _
, _
));
604 tctx
->emit_instruction(tctx
, &new_inst
);
606 /* EX2 tmpA.y, tmpA.y */
607 new_inst
= tgsi_default_full_instruction();
608 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
609 new_inst
.Instruction
.NumDstRegs
= 1;
610 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
611 new_inst
.Instruction
.NumSrcRegs
= 1;
612 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Y
, _
, _
, _
));
613 tctx
->emit_instruction(tctx
, &new_inst
);
615 /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
616 new_inst
= tgsi_default_full_instruction();
617 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_CMP
;
618 new_inst
.Instruction
.NumDstRegs
= 1;
619 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
620 new_inst
.Instruction
.NumSrcRegs
= 3;
621 reg_src(&new_inst
.Src
[0], src
, SWIZ(_
, X
, _
, _
));
622 new_inst
.Src
[0].Register
.Negate
= true;
623 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(_
, Y
, _
, _
));
624 reg_src(&new_inst
.Src
[2], &ctx
->imm
, SWIZ(_
, X
, _
, _
));
625 tctx
->emit_instruction(tctx
, &new_inst
);
627 /* MOV dst.yz, tmpA.xy */
628 new_inst
= tgsi_default_full_instruction();
629 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
630 new_inst
.Instruction
.NumDstRegs
= 1;
631 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_YZ
);
632 new_inst
.Instruction
.NumSrcRegs
= 1;
633 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
, X
, Y
, _
));
634 tctx
->emit_instruction(tctx
, &new_inst
);
637 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XW
) {
638 /* MOV dst.xw, imm{1.0} */
639 new_inst
= tgsi_default_full_instruction();
640 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
641 new_inst
.Instruction
.NumDstRegs
= 1;
642 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XW
);
643 new_inst
.Instruction
.NumSrcRegs
= 1;
644 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(Y
, _
, _
, Y
));
645 tctx
->emit_instruction(tctx
, &new_inst
);
649 /* EXP - Approximate Exponential Base 2
650 * dst.x = 2^{\lfloor src.x\rfloor}
651 * dst.y = src.x - \lfloor src.x\rfloor
655 * ; needs: 1 tmp, imm{1.0}
656 * if (lowering FLR) {
658 * SUB tmpA.x, src.x, tmpA.x
663 * SUB dst.y, src.x, tmpA.x
666 * MOV dst.w, imm{1.0}
668 #define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \
669 NINST(1)+ NINST(1) - OINST(1))
672 transform_exp(struct tgsi_transform_context
*tctx
,
673 struct tgsi_full_instruction
*inst
)
675 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
676 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
677 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
678 struct tgsi_full_instruction new_inst
;
680 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XY
) {
681 if (ctx
->config
->lower_FLR
) {
682 /* FRC tmpA.x, src.x */
683 new_inst
= tgsi_default_full_instruction();
684 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FRC
;
685 new_inst
.Instruction
.NumDstRegs
= 1;
686 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
687 new_inst
.Instruction
.NumSrcRegs
= 1;
688 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, _
, _
, _
));
689 tctx
->emit_instruction(tctx
, &new_inst
);
691 /* SUB tmpA.x, src.x, tmpA.x */
692 new_inst
= tgsi_default_full_instruction();
693 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
694 new_inst
.Instruction
.NumDstRegs
= 1;
695 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
696 new_inst
.Instruction
.NumSrcRegs
= 2;
697 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, _
, _
, _
));
698 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
, _
, _
, _
));
699 new_inst
.Src
[1].Register
.Negate
= 1;
700 tctx
->emit_instruction(tctx
, &new_inst
);
702 /* FLR tmpA.x, src.x */
703 new_inst
= tgsi_default_full_instruction();
704 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FLR
;
705 new_inst
.Instruction
.NumDstRegs
= 1;
706 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
707 new_inst
.Instruction
.NumSrcRegs
= 1;
708 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, _
, _
, _
));
709 tctx
->emit_instruction(tctx
, &new_inst
);
713 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
714 /* EX2 tmpA.y, src.x */
715 new_inst
= tgsi_default_full_instruction();
716 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
717 new_inst
.Instruction
.NumDstRegs
= 1;
718 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
719 new_inst
.Instruction
.NumSrcRegs
= 1;
720 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, _
, _
, _
));
721 tctx
->emit_instruction(tctx
, &new_inst
);
724 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
725 /* SUB dst.y, src.x, tmpA.x */
726 new_inst
= tgsi_default_full_instruction();
727 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
728 new_inst
.Instruction
.NumDstRegs
= 1;
729 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Y
);
730 new_inst
.Instruction
.NumSrcRegs
= 2;
731 reg_src(&new_inst
.Src
[0], src
, SWIZ(_
, X
, _
, _
));
732 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(_
, X
, _
, _
));
733 new_inst
.Src
[1].Register
.Negate
= 1;
734 tctx
->emit_instruction(tctx
, &new_inst
);
737 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_X
) {
738 /* EX2 dst.x, tmpA.x */
739 new_inst
= tgsi_default_full_instruction();
740 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
741 new_inst
.Instruction
.NumDstRegs
= 1;
742 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_X
);
743 new_inst
.Instruction
.NumSrcRegs
= 1;
744 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
, _
, _
, _
));
745 tctx
->emit_instruction(tctx
, &new_inst
);
748 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
749 /* MOV dst.z, tmpA.y */
750 new_inst
= tgsi_default_full_instruction();
751 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
752 new_inst
.Instruction
.NumDstRegs
= 1;
753 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Z
);
754 new_inst
.Instruction
.NumSrcRegs
= 1;
755 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
, _
, Y
, _
));
756 tctx
->emit_instruction(tctx
, &new_inst
);
759 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_W
) {
760 /* MOV dst.w, imm{1.0} */
761 new_inst
= tgsi_default_full_instruction();
762 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
763 new_inst
.Instruction
.NumDstRegs
= 1;
764 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_W
);
765 new_inst
.Instruction
.NumSrcRegs
= 1;
766 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(_
, _
, _
, Y
));
767 tctx
->emit_instruction(tctx
, &new_inst
);
771 /* LOG - Approximate Logarithm Base 2
772 * dst.x = \lfloor\log_2{|src.x|}\rfloor
773 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
774 * dst.z = \log_2{|src.x|}
777 * ; needs: 1 tmp, imm{1.0}
778 * LG2 tmpA.x, |src.x|
779 * if (lowering FLR) {
781 * SUB tmpA.y, tmpA.x, tmpA.y
787 * MUL dst.y, |src.x|, tmpA.z
788 * MOV dst.xz, tmpA.yx
789 * MOV dst.w, imm{1.0}
791 #define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \
792 NINST(2) + NINST(1) + NINST(1) - OINST(1))
795 transform_log(struct tgsi_transform_context
*tctx
,
796 struct tgsi_full_instruction
*inst
)
798 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
799 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
800 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
801 struct tgsi_full_instruction new_inst
;
803 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZ
) {
804 /* LG2 tmpA.x, |src.x| */
805 new_inst
= tgsi_default_full_instruction();
806 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_LG2
;
807 new_inst
.Instruction
.NumDstRegs
= 1;
808 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
809 new_inst
.Instruction
.NumSrcRegs
= 1;
810 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, _
, _
, _
));
811 new_inst
.Src
[0].Register
.Absolute
= true;
812 tctx
->emit_instruction(tctx
, &new_inst
);
815 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XY
) {
816 if (ctx
->config
->lower_FLR
) {
817 /* FRC tmpA.y, tmpA.x */
818 new_inst
= tgsi_default_full_instruction();
819 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FRC
;
820 new_inst
.Instruction
.NumDstRegs
= 1;
821 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
822 new_inst
.Instruction
.NumSrcRegs
= 1;
823 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
, X
, _
, _
));
824 tctx
->emit_instruction(tctx
, &new_inst
);
826 /* SUB tmpA.y, tmpA.x, tmpA.y */
827 new_inst
= tgsi_default_full_instruction();
828 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
829 new_inst
.Instruction
.NumDstRegs
= 1;
830 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
831 new_inst
.Instruction
.NumSrcRegs
= 2;
832 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
, X
, _
, _
));
833 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(_
, Y
, _
, _
));
834 new_inst
.Src
[1].Register
.Negate
= 1;
835 tctx
->emit_instruction(tctx
, &new_inst
);
837 /* FLR tmpA.y, tmpA.x */
838 new_inst
= tgsi_default_full_instruction();
839 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FLR
;
840 new_inst
.Instruction
.NumDstRegs
= 1;
841 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
842 new_inst
.Instruction
.NumSrcRegs
= 1;
843 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
, X
, _
, _
));
844 tctx
->emit_instruction(tctx
, &new_inst
);
848 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
849 /* EX2 tmpA.z, tmpA.y */
850 new_inst
= tgsi_default_full_instruction();
851 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
852 new_inst
.Instruction
.NumDstRegs
= 1;
853 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Z
);
854 new_inst
.Instruction
.NumSrcRegs
= 1;
855 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Y
, _
, _
, _
));
856 tctx
->emit_instruction(tctx
, &new_inst
);
858 /* RCP tmpA.z, tmpA.z */
859 new_inst
= tgsi_default_full_instruction();
860 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_RCP
;
861 new_inst
.Instruction
.NumDstRegs
= 1;
862 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Z
);
863 new_inst
.Instruction
.NumSrcRegs
= 1;
864 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Z
, _
, _
, _
));
865 tctx
->emit_instruction(tctx
, &new_inst
);
867 /* MUL dst.y, |src.x|, tmpA.z */
868 new_inst
= tgsi_default_full_instruction();
869 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
870 new_inst
.Instruction
.NumDstRegs
= 1;
871 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Y
);
872 new_inst
.Instruction
.NumSrcRegs
= 2;
873 reg_src(&new_inst
.Src
[0], src
, SWIZ(_
, X
, _
, _
));
874 new_inst
.Src
[0].Register
.Absolute
= true;
875 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(_
, Z
, _
, _
));
876 tctx
->emit_instruction(tctx
, &new_inst
);
879 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XZ
) {
880 /* MOV dst.xz, tmpA.yx */
881 new_inst
= tgsi_default_full_instruction();
882 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
883 new_inst
.Instruction
.NumDstRegs
= 1;
884 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XZ
);
885 new_inst
.Instruction
.NumSrcRegs
= 1;
886 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Y
, _
, X
, _
));
887 tctx
->emit_instruction(tctx
, &new_inst
);
890 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_W
) {
891 /* MOV dst.w, imm{1.0} */
892 new_inst
= tgsi_default_full_instruction();
893 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
894 new_inst
.Instruction
.NumDstRegs
= 1;
895 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_W
);
896 new_inst
.Instruction
.NumSrcRegs
= 1;
897 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(_
, _
, _
, Y
));
898 tctx
->emit_instruction(tctx
, &new_inst
);
902 /* DP4 - 4-component Dot Product
903 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
905 * DP3 - 3-component Dot Product
906 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
908 * DPH - Homogeneous Dot Product
909 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
911 * DP2 - 2-component Dot Product
912 * dst = src0.x \times src1.x + src0.y \times src1.y
914 * DP2A - 2-component Dot Product And Add
915 * dst = src0.x \times src1.x + src0.y \times src1.y + src2.x
917 * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
918 * operations, which is what you'd prefer for a ISA that is natively
919 * scalar. Probably a native vector ISA would at least already have
920 * DP4/DP3 instructions, but perhaps there is room for an alternative
921 * translation for DPH/DP2/DP2A using vector instructions.
924 * MUL tmpA.x, src0.x, src1.x
925 * MAD tmpA.x, src0.y, src1.y, tmpA.x
926 * if (DPH || DP3 || DP4) {
927 * MAD tmpA.x, src0.z, src1.z, tmpA.x
929 * ADD tmpA.x, src1.w, tmpA.x
931 * MAD tmpA.x, src0.w, src1.w, tmpA.x
934 * ADD tmpA.x, src2.x, tmpA.x
936 * ; fixup last instruction to replicate into dst
938 #define DP4_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
939 #define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2))
940 #define DPH_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(2) - OINST(2))
941 #define DP2_GROW (NINST(2) + NINST(3) - OINST(2))
942 #define DP2A_GROW (NINST(2) + NINST(3) + NINST(2) - OINST(3))
945 transform_dotp(struct tgsi_transform_context
*tctx
,
946 struct tgsi_full_instruction
*inst
)
948 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
949 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
950 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
951 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
952 struct tgsi_full_src_register
*src2
= &inst
->Src
[2]; /* only DP2A */
953 struct tgsi_full_instruction new_inst
;
954 unsigned opcode
= inst
->Instruction
.Opcode
;
956 /* NOTE: any potential last instruction must replicate src on all
957 * components (since it could be re-written to write to final dst)
960 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
961 /* MUL tmpA.x, src0.x, src1.x */
962 new_inst
= tgsi_default_full_instruction();
963 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
964 new_inst
.Instruction
.NumDstRegs
= 1;
965 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
966 new_inst
.Instruction
.NumSrcRegs
= 2;
967 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
, _
, _
, _
));
968 reg_src(&new_inst
.Src
[1], src1
, SWIZ(X
, _
, _
, _
));
969 tctx
->emit_instruction(tctx
, &new_inst
);
971 /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
972 new_inst
= tgsi_default_full_instruction();
973 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAD
;
974 new_inst
.Instruction
.NumDstRegs
= 1;
975 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
976 new_inst
.Instruction
.NumSrcRegs
= 3;
977 reg_src(&new_inst
.Src
[0], src0
, SWIZ(Y
, Y
, Y
, Y
));
978 reg_src(&new_inst
.Src
[1], src1
, SWIZ(Y
, Y
, Y
, Y
));
979 reg_src(&new_inst
.Src
[2], &ctx
->tmp
[A
].src
, SWIZ(X
, X
, X
, X
));
981 if ((opcode
== TGSI_OPCODE_DPH
) ||
982 (opcode
== TGSI_OPCODE_DP3
) ||
983 (opcode
== TGSI_OPCODE_DP4
)) {
984 tctx
->emit_instruction(tctx
, &new_inst
);
986 /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
987 new_inst
= tgsi_default_full_instruction();
988 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAD
;
989 new_inst
.Instruction
.NumDstRegs
= 1;
990 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
991 new_inst
.Instruction
.NumSrcRegs
= 3;
992 reg_src(&new_inst
.Src
[0], src0
, SWIZ(Z
, Z
, Z
, Z
));
993 reg_src(&new_inst
.Src
[1], src1
, SWIZ(Z
, Z
, Z
, Z
));
994 reg_src(&new_inst
.Src
[2], &ctx
->tmp
[A
].src
, SWIZ(X
, X
, X
, X
));
996 if (opcode
== TGSI_OPCODE_DPH
) {
997 tctx
->emit_instruction(tctx
, &new_inst
);
999 /* ADD tmpA.x, src1.w, tmpA.x */
1000 new_inst
= tgsi_default_full_instruction();
1001 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
1002 new_inst
.Instruction
.NumDstRegs
= 1;
1003 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
1004 new_inst
.Instruction
.NumSrcRegs
= 2;
1005 reg_src(&new_inst
.Src
[0], src1
, SWIZ(W
, W
, W
, W
));
1006 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
, X
, X
, X
));
1007 } else if (opcode
== TGSI_OPCODE_DP4
) {
1008 tctx
->emit_instruction(tctx
, &new_inst
);
1010 /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
1011 new_inst
= tgsi_default_full_instruction();
1012 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAD
;
1013 new_inst
.Instruction
.NumDstRegs
= 1;
1014 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
1015 new_inst
.Instruction
.NumSrcRegs
= 3;
1016 reg_src(&new_inst
.Src
[0], src0
, SWIZ(W
, W
, W
, W
));
1017 reg_src(&new_inst
.Src
[1], src1
, SWIZ(W
, W
, W
, W
));
1018 reg_src(&new_inst
.Src
[2], &ctx
->tmp
[A
].src
, SWIZ(X
, X
, X
, X
));
1020 } else if (opcode
== TGSI_OPCODE_DP2A
) {
1021 tctx
->emit_instruction(tctx
, &new_inst
);
1023 /* ADD tmpA.x, src2.x, tmpA.x */
1024 new_inst
= tgsi_default_full_instruction();
1025 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
1026 new_inst
.Instruction
.NumDstRegs
= 1;
1027 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
1028 new_inst
.Instruction
.NumSrcRegs
= 2;
1029 reg_src(&new_inst
.Src
[0], src2
, SWIZ(X
, X
, X
, X
));
1030 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
, X
, X
, X
));
1033 /* fixup last instruction to write to dst: */
1034 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
1036 tctx
->emit_instruction(tctx
, &new_inst
);
1040 /* FLR - floor, CEIL - ceil
1044 * ADD dst, src, tmpA
1047 * SUB dst, src, tmpA
1050 #define FLR_GROW (NINST(1) + NINST(2) - OINST(1))
1051 #define CEIL_GROW (NINST(1) + NINST(2) - OINST(1))
1055 transform_flr_ceil(struct tgsi_transform_context
*tctx
,
1056 struct tgsi_full_instruction
*inst
)
1058 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
1059 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
1060 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
1061 struct tgsi_full_instruction new_inst
;
1062 unsigned opcode
= inst
->Instruction
.Opcode
;
1064 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
1065 /* FLR: FRC tmpA, src CEIL: FRC tmpA, -src */
1066 new_inst
= tgsi_default_full_instruction();
1067 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FRC
;
1068 new_inst
.Instruction
.NumDstRegs
= 1;
1069 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XYZW
);
1070 new_inst
.Instruction
.NumSrcRegs
= 1;
1071 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
, Y
, Z
, W
));
1073 if (opcode
== TGSI_OPCODE_CEIL
)
1074 new_inst
.Src
[0].Register
.Negate
= !new_inst
.Src
[0].Register
.Negate
;
1075 tctx
->emit_instruction(tctx
, &new_inst
);
1077 /* FLR: SUB dst, src, tmpA CEIL: ADD dst, src, tmpA */
1078 new_inst
= tgsi_default_full_instruction();
1079 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
1080 new_inst
.Instruction
.NumDstRegs
= 1;
1081 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
1082 new_inst
.Instruction
.NumSrcRegs
= 2;
1083 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
, Y
, Z
, W
));
1084 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
, Y
, Z
, W
));
1085 if (opcode
== TGSI_OPCODE_FLR
)
1086 new_inst
.Src
[1].Register
.Negate
= 1;
1087 tctx
->emit_instruction(tctx
, &new_inst
);
1091 /* TRUNC - truncate off fractional part
1092 * dst.x = trunc(src.x)
1093 * dst.y = trunc(src.y)
1094 * dst.z = trunc(src.z)
1095 * dst.w = trunc(src.w)
1100 * SUB tmpA, |src|, tmpA
1104 * CMP dst, src, -tmpA, tmpA
1106 #define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1))
1109 transform_trunc(struct tgsi_transform_context
*tctx
,
1110 struct tgsi_full_instruction
*inst
)
1112 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
1113 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
1114 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
1115 struct tgsi_full_instruction new_inst
;
1117 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
1118 if (ctx
->config
->lower_FLR
) {
1119 new_inst
= tgsi_default_full_instruction();
1120 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FRC
;
1121 new_inst
.Instruction
.NumDstRegs
= 1;
1122 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XYZW
);
1123 new_inst
.Instruction
.NumSrcRegs
= 1;
1124 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
, Y
, Z
, W
));
1125 new_inst
.Src
[0].Register
.Absolute
= true;
1126 new_inst
.Src
[0].Register
.Negate
= false;
1127 tctx
->emit_instruction(tctx
, &new_inst
);
1129 new_inst
= tgsi_default_full_instruction();
1130 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
1131 new_inst
.Instruction
.NumDstRegs
= 1;
1132 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XYZW
);
1133 new_inst
.Instruction
.NumSrcRegs
= 2;
1134 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
, Y
, Z
, W
));
1135 new_inst
.Src
[0].Register
.Absolute
= true;
1136 new_inst
.Src
[0].Register
.Negate
= false;
1137 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
, Y
, Z
, W
));
1138 new_inst
.Src
[1].Register
.Negate
= 1;
1139 tctx
->emit_instruction(tctx
, &new_inst
);
1141 new_inst
= tgsi_default_full_instruction();
1142 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FLR
;
1143 new_inst
.Instruction
.NumDstRegs
= 1;
1144 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XYZW
);
1145 new_inst
.Instruction
.NumSrcRegs
= 1;
1146 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
, Y
, Z
, W
));
1147 new_inst
.Src
[0].Register
.Absolute
= true;
1148 new_inst
.Src
[0].Register
.Negate
= false;
1149 tctx
->emit_instruction(tctx
, &new_inst
);
1152 new_inst
= tgsi_default_full_instruction();
1153 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_CMP
;
1154 new_inst
.Instruction
.NumDstRegs
= 1;
1155 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
1156 new_inst
.Instruction
.NumSrcRegs
= 3;
1157 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
, Y
, Z
, W
));
1158 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
, Y
, Z
, W
));
1159 new_inst
.Src
[1].Register
.Negate
= true;
1160 reg_src(&new_inst
.Src
[2], &ctx
->tmp
[A
].src
, SWIZ(X
, Y
, Z
, W
));
1161 tctx
->emit_instruction(tctx
, &new_inst
);
1165 /* Inserts a MOV_SAT for the needed components of tex coord. Note that
1166 * in the case of TXP, the clamping must happen *after* projection, so
1167 * we need to lower TXP to TEX.
1171 * ; do perspective division manually before clamping:
1173 * MUL tmpB.<pmask>, tmpA, tmpB.xxxx
1176 * MOV_SAT tmpA.<mask>, tmpA ; <mask> is the clamped s/t/r coords
1177 * <opc> dst, tmpA, ...
1179 #define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1))
1182 transform_samp(struct tgsi_transform_context
*tctx
,
1183 struct tgsi_full_instruction
*inst
)
1185 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
1186 struct tgsi_full_src_register
*coord
= &inst
->Src
[0];
1187 struct tgsi_full_src_register
*samp
;
1188 struct tgsi_full_instruction new_inst
;
1189 /* mask is clamped coords, pmask is all coords (for projection): */
1190 unsigned mask
= 0, pmask
= 0, smask
;
1191 unsigned tex
= inst
->Texture
.Texture
;
1192 unsigned opcode
= inst
->Instruction
.Opcode
;
1193 bool lower_txp
= (opcode
== TGSI_OPCODE_TXP
) &&
1194 (ctx
->config
->lower_TXP
& (1 << tex
));
1196 if (opcode
== TGSI_OPCODE_TXB2
) {
1197 samp
= &inst
->Src
[2];
1199 samp
= &inst
->Src
[1];
1202 /* convert sampler # to bitmask to test: */
1203 smask
= 1 << samp
->Register
.Index
;
1205 /* check if we actually need to lower this one: */
1206 if (!(ctx
->saturate
& smask
) && !lower_txp
)
1209 /* figure out which coordinates need saturating:
1210 * - RECT textures should not get saturated
1211 * - array index coords should not get saturated
1214 case TGSI_TEXTURE_3D
:
1215 case TGSI_TEXTURE_CUBE
:
1216 case TGSI_TEXTURE_CUBE_ARRAY
:
1217 case TGSI_TEXTURE_SHADOWCUBE
:
1218 case TGSI_TEXTURE_SHADOWCUBE_ARRAY
:
1219 if (ctx
->config
->saturate_r
& smask
)
1220 mask
|= TGSI_WRITEMASK_Z
;
1221 pmask
|= TGSI_WRITEMASK_Z
;
1224 case TGSI_TEXTURE_2D
:
1225 case TGSI_TEXTURE_2D_ARRAY
:
1226 case TGSI_TEXTURE_SHADOW2D
:
1227 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
1228 case TGSI_TEXTURE_2D_MSAA
:
1229 case TGSI_TEXTURE_2D_ARRAY_MSAA
:
1230 if (ctx
->config
->saturate_t
& smask
)
1231 mask
|= TGSI_WRITEMASK_Y
;
1232 pmask
|= TGSI_WRITEMASK_Y
;
1235 case TGSI_TEXTURE_1D
:
1236 case TGSI_TEXTURE_1D_ARRAY
:
1237 case TGSI_TEXTURE_SHADOW1D
:
1238 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
1239 if (ctx
->config
->saturate_s
& smask
)
1240 mask
|= TGSI_WRITEMASK_X
;
1241 pmask
|= TGSI_WRITEMASK_X
;
1244 case TGSI_TEXTURE_RECT
:
1245 case TGSI_TEXTURE_SHADOWRECT
:
1246 /* we don't saturate, but in case of lower_txp we
1247 * still need to do the perspective divide:
1249 pmask
= TGSI_WRITEMASK_XY
;
1253 /* sanity check.. driver could be asking to saturate a non-
1254 * existent coordinate component:
1256 if (!mask
&& !lower_txp
)
1259 /* MOV tmpA, src0 */
1260 create_mov(tctx
, &ctx
->tmp
[A
].dst
, coord
, TGSI_WRITEMASK_XYZW
, 0);
1262 /* This is a bit sad.. we need to clamp *after* the coords
1263 * are projected, which means lowering TXP to TEX and doing
1264 * the projection ourself. But since I haven't figured out
1265 * how to make the lowering code deliver an electric shock
1266 * to anyone using GL_CLAMP, we must do this instead:
1268 if (opcode
== TGSI_OPCODE_TXP
) {
1269 /* RCP tmpB.x tmpA.w */
1270 new_inst
= tgsi_default_full_instruction();
1271 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_RCP
;
1272 new_inst
.Instruction
.NumDstRegs
= 1;
1273 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[B
].dst
, TGSI_WRITEMASK_X
);
1274 new_inst
.Instruction
.NumSrcRegs
= 1;
1275 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(W
, _
, _
, _
));
1276 tctx
->emit_instruction(tctx
, &new_inst
);
1278 /* MUL tmpA.mask, tmpA, tmpB.xxxx */
1279 new_inst
= tgsi_default_full_instruction();
1280 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
1281 new_inst
.Instruction
.NumDstRegs
= 1;
1282 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, pmask
);
1283 new_inst
.Instruction
.NumSrcRegs
= 2;
1284 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
, Y
, Z
, W
));
1285 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[B
].src
, SWIZ(X
, X
, X
, X
));
1286 tctx
->emit_instruction(tctx
, &new_inst
);
1288 opcode
= TGSI_OPCODE_TEX
;
1291 /* MOV_SAT tmpA.<mask>, tmpA */
1293 create_mov(tctx
, &ctx
->tmp
[A
].dst
, &ctx
->tmp
[A
].src
, mask
, 1);
1296 /* modify the texture samp instruction to take fixed up coord: */
1298 new_inst
.Instruction
.Opcode
= opcode
;
1299 new_inst
.Src
[0] = ctx
->tmp
[A
].src
;
1300 tctx
->emit_instruction(tctx
, &new_inst
);
1305 /* Two-sided color emulation:
1306 * For each COLOR input, create a corresponding BCOLOR input, plus
1307 * CMP instruction to select front or back color based on FACE
1309 #define TWOSIDE_GROW(n) ( \
1311 ((n) * 3) + /* IN[], BCOLOR[n], <intrp> */\
1312 ((n) * 1) + /* TEMP[] */ \
1313 ((n) * NINST(3)) /* CMP instr */ \
1317 emit_twoside(struct tgsi_transform_context
*tctx
)
1319 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
1320 struct tgsi_shader_info
*info
= ctx
->info
;
1321 struct tgsi_full_declaration decl
;
1322 struct tgsi_full_instruction new_inst
;
1323 unsigned inbase
, tmpbase
;
1326 inbase
= info
->file_max
[TGSI_FILE_INPUT
] + 1;
1327 tmpbase
= info
->file_max
[TGSI_FILE_TEMPORARY
] + 1;
1329 /* additional inputs for BCOLOR's */
1330 for (i
= 0; i
< ctx
->two_side_colors
; i
++) {
1331 unsigned in_idx
= ctx
->two_side_idx
[i
];
1332 decl
= tgsi_default_full_declaration();
1333 decl
.Declaration
.File
= TGSI_FILE_INPUT
;
1334 decl
.Declaration
.Semantic
= true;
1335 decl
.Range
.First
= decl
.Range
.Last
= inbase
+ i
;
1336 decl
.Semantic
.Name
= TGSI_SEMANTIC_BCOLOR
;
1337 decl
.Semantic
.Index
= info
->input_semantic_index
[in_idx
];
1338 decl
.Declaration
.Interpolate
= true;
1339 decl
.Interp
.Interpolate
= info
->input_interpolate
[in_idx
];
1340 decl
.Interp
.Location
= info
->input_interpolate_loc
[in_idx
];
1341 decl
.Interp
.CylindricalWrap
= info
->input_cylindrical_wrap
[in_idx
];
1342 tctx
->emit_declaration(tctx
, &decl
);
1345 /* additional input for FACE */
1346 if (ctx
->two_side_colors
&& (ctx
->face_idx
== -1)) {
1347 decl
= tgsi_default_full_declaration();
1348 decl
.Declaration
.File
= TGSI_FILE_INPUT
;
1349 decl
.Declaration
.Semantic
= true;
1350 decl
.Range
.First
= decl
.Range
.Last
= inbase
+ ctx
->two_side_colors
;
1351 decl
.Semantic
.Name
= TGSI_SEMANTIC_FACE
;
1352 decl
.Semantic
.Index
= 0;
1353 tctx
->emit_declaration(tctx
, &decl
);
1355 ctx
->face_idx
= decl
.Range
.First
;
1358 /* additional temps for COLOR/BCOLOR selection: */
1359 for (i
= 0; i
< ctx
->two_side_colors
; i
++) {
1360 decl
= tgsi_default_full_declaration();
1361 decl
.Declaration
.File
= TGSI_FILE_TEMPORARY
;
1362 decl
.Range
.First
= decl
.Range
.Last
= tmpbase
+ ctx
->numtmp
+ i
;
1363 tctx
->emit_declaration(tctx
, &decl
);
1366 /* and finally additional instructions to select COLOR/BCOLOR: */
1367 for (i
= 0; i
< ctx
->two_side_colors
; i
++) {
1368 new_inst
= tgsi_default_full_instruction();
1369 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_CMP
;
1371 new_inst
.Instruction
.NumDstRegs
= 1;
1372 new_inst
.Dst
[0].Register
.File
= TGSI_FILE_TEMPORARY
;
1373 new_inst
.Dst
[0].Register
.Index
= tmpbase
+ ctx
->numtmp
+ i
;
1374 new_inst
.Dst
[0].Register
.WriteMask
= TGSI_WRITEMASK_XYZW
;
1376 new_inst
.Instruction
.NumSrcRegs
= 3;
1377 new_inst
.Src
[0].Register
.File
= TGSI_FILE_INPUT
;
1378 new_inst
.Src
[0].Register
.Index
= ctx
->face_idx
;
1379 new_inst
.Src
[0].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1380 new_inst
.Src
[0].Register
.SwizzleY
= TGSI_SWIZZLE_X
;
1381 new_inst
.Src
[0].Register
.SwizzleZ
= TGSI_SWIZZLE_X
;
1382 new_inst
.Src
[0].Register
.SwizzleW
= TGSI_SWIZZLE_X
;
1383 new_inst
.Src
[1].Register
.File
= TGSI_FILE_INPUT
;
1384 new_inst
.Src
[1].Register
.Index
= inbase
+ i
;
1385 new_inst
.Src
[1].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1386 new_inst
.Src
[1].Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
1387 new_inst
.Src
[1].Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
1388 new_inst
.Src
[1].Register
.SwizzleW
= TGSI_SWIZZLE_W
;
1389 new_inst
.Src
[2].Register
.File
= TGSI_FILE_INPUT
;
1390 new_inst
.Src
[2].Register
.Index
= ctx
->two_side_idx
[i
];
1391 new_inst
.Src
[2].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1392 new_inst
.Src
[2].Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
1393 new_inst
.Src
[2].Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
1394 new_inst
.Src
[2].Register
.SwizzleW
= TGSI_SWIZZLE_W
;
1396 tctx
->emit_instruction(tctx
, &new_inst
);
1401 emit_decls(struct tgsi_transform_context
*tctx
)
1403 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
1404 struct tgsi_shader_info
*info
= ctx
->info
;
1405 struct tgsi_full_declaration decl
;
1406 struct tgsi_full_immediate immed
;
1410 tmpbase
= info
->file_max
[TGSI_FILE_TEMPORARY
] + 1;
1412 ctx
->color_base
= tmpbase
+ ctx
->numtmp
;
1414 /* declare immediate: */
1415 immed
= tgsi_default_full_immediate();
1416 immed
.Immediate
.NrTokens
= 1 + 4; /* one for the token itself */
1417 immed
.u
[0].Float
= 0.0;
1418 immed
.u
[1].Float
= 1.0;
1419 immed
.u
[2].Float
= 128.0;
1420 immed
.u
[3].Float
= 0.0;
1421 tctx
->emit_immediate(tctx
, &immed
);
1423 ctx
->imm
.Register
.File
= TGSI_FILE_IMMEDIATE
;
1424 ctx
->imm
.Register
.Index
= info
->immediate_count
;
1425 ctx
->imm
.Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1426 ctx
->imm
.Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
1427 ctx
->imm
.Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
1428 ctx
->imm
.Register
.SwizzleW
= TGSI_SWIZZLE_W
;
1430 /* declare temp regs: */
1431 for (i
= 0; i
< ctx
->numtmp
; i
++) {
1432 decl
= tgsi_default_full_declaration();
1433 decl
.Declaration
.File
= TGSI_FILE_TEMPORARY
;
1434 decl
.Range
.First
= decl
.Range
.Last
= tmpbase
+ i
;
1435 tctx
->emit_declaration(tctx
, &decl
);
1437 ctx
->tmp
[i
].src
.Register
.File
= TGSI_FILE_TEMPORARY
;
1438 ctx
->tmp
[i
].src
.Register
.Index
= tmpbase
+ i
;
1439 ctx
->tmp
[i
].src
.Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1440 ctx
->tmp
[i
].src
.Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
1441 ctx
->tmp
[i
].src
.Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
1442 ctx
->tmp
[i
].src
.Register
.SwizzleW
= TGSI_SWIZZLE_W
;
1444 ctx
->tmp
[i
].dst
.Register
.File
= TGSI_FILE_TEMPORARY
;
1445 ctx
->tmp
[i
].dst
.Register
.Index
= tmpbase
+ i
;
1446 ctx
->tmp
[i
].dst
.Register
.WriteMask
= TGSI_WRITEMASK_XYZW
;
1449 if (ctx
->two_side_colors
)
1454 rename_color_inputs(struct tgsi_lowering_context
*ctx
,
1455 struct tgsi_full_instruction
*inst
)
1458 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
1459 struct tgsi_src_register
*src
= &inst
->Src
[i
].Register
;
1460 if (src
->File
== TGSI_FILE_INPUT
) {
1461 for (j
= 0; j
< ctx
->two_side_colors
; j
++) {
1462 if (src
->Index
== ctx
->two_side_idx
[j
]) {
1463 src
->File
= TGSI_FILE_TEMPORARY
;
1464 src
->Index
= ctx
->color_base
+ j
;
1474 transform_instr(struct tgsi_transform_context
*tctx
,
1475 struct tgsi_full_instruction
*inst
)
1477 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
1479 if (!ctx
->emitted_decls
) {
1481 ctx
->emitted_decls
= 1;
1484 /* if emulating two-sided-color, we need to re-write some
1487 if (ctx
->two_side_colors
)
1488 rename_color_inputs(ctx
, inst
);
1490 switch (inst
->Instruction
.Opcode
) {
1491 case TGSI_OPCODE_DST
:
1492 if (!ctx
->config
->lower_DST
)
1494 transform_dst(tctx
, inst
);
1496 case TGSI_OPCODE_XPD
:
1497 if (!ctx
->config
->lower_XPD
)
1499 transform_xpd(tctx
, inst
);
1501 case TGSI_OPCODE_SCS
:
1502 if (!ctx
->config
->lower_SCS
)
1504 transform_scs(tctx
, inst
);
1506 case TGSI_OPCODE_LRP
:
1507 if (!ctx
->config
->lower_LRP
)
1509 transform_lrp(tctx
, inst
);
1511 case TGSI_OPCODE_FRC
:
1512 if (!ctx
->config
->lower_FRC
)
1514 transform_frc(tctx
, inst
);
1516 case TGSI_OPCODE_POW
:
1517 if (!ctx
->config
->lower_POW
)
1519 transform_pow(tctx
, inst
);
1521 case TGSI_OPCODE_LIT
:
1522 if (!ctx
->config
->lower_LIT
)
1524 transform_lit(tctx
, inst
);
1526 case TGSI_OPCODE_EXP
:
1527 if (!ctx
->config
->lower_EXP
)
1529 transform_exp(tctx
, inst
);
1531 case TGSI_OPCODE_LOG
:
1532 if (!ctx
->config
->lower_LOG
)
1534 transform_log(tctx
, inst
);
1536 case TGSI_OPCODE_DP4
:
1537 if (!ctx
->config
->lower_DP4
)
1539 transform_dotp(tctx
, inst
);
1541 case TGSI_OPCODE_DP3
:
1542 if (!ctx
->config
->lower_DP3
)
1544 transform_dotp(tctx
, inst
);
1546 case TGSI_OPCODE_DPH
:
1547 if (!ctx
->config
->lower_DPH
)
1549 transform_dotp(tctx
, inst
);
1551 case TGSI_OPCODE_DP2
:
1552 if (!ctx
->config
->lower_DP2
)
1554 transform_dotp(tctx
, inst
);
1556 case TGSI_OPCODE_DP2A
:
1557 if (!ctx
->config
->lower_DP2A
)
1559 transform_dotp(tctx
, inst
);
1561 case TGSI_OPCODE_FLR
:
1562 if (!ctx
->config
->lower_FLR
)
1564 transform_flr_ceil(tctx
, inst
);
1566 case TGSI_OPCODE_CEIL
:
1567 if (!ctx
->config
->lower_CEIL
)
1569 transform_flr_ceil(tctx
, inst
);
1571 case TGSI_OPCODE_TRUNC
:
1572 if (!ctx
->config
->lower_TRUNC
)
1574 transform_trunc(tctx
, inst
);
1576 case TGSI_OPCODE_TEX
:
1577 case TGSI_OPCODE_TXP
:
1578 case TGSI_OPCODE_TXB
:
1579 case TGSI_OPCODE_TXB2
:
1580 case TGSI_OPCODE_TXL
:
1581 if (transform_samp(tctx
, inst
))
1586 tctx
->emit_instruction(tctx
, inst
);
1591 /* returns NULL if no lowering required, else returns the new
1592 * tokens (which caller is required to free()). In either case
1593 * returns the current info.
1595 const struct tgsi_token
*
1596 tgsi_transform_lowering(const struct tgsi_lowering_config
*config
,
1597 const struct tgsi_token
*tokens
,
1598 struct tgsi_shader_info
*info
)
1600 struct tgsi_lowering_context ctx
;
1601 struct tgsi_token
*newtoks
;
1604 /* sanity check in case limit is ever increased: */
1605 STATIC_ASSERT((sizeof(config
->saturate_s
) * 8) >= PIPE_MAX_SAMPLERS
);
1607 /* sanity check the lowering */
1608 assert(!(config
->lower_FRC
&& (config
->lower_FLR
|| config
->lower_CEIL
)));
1609 assert(!(config
->lower_FRC
&& config
->lower_TRUNC
));
1611 memset(&ctx
, 0, sizeof(ctx
));
1612 ctx
.base
.transform_instruction
= transform_instr
;
1614 ctx
.config
= config
;
1616 tgsi_scan_shader(tokens
, info
);
1618 /* if we are adding fragment shader support to emulate two-sided
1619 * color, then figure out the number of additional inputs we need
1620 * to create for BCOLOR's..
1622 if ((info
->processor
== PIPE_SHADER_FRAGMENT
) &&
1623 config
->color_two_side
) {
1626 for (i
= 0; i
<= info
->file_max
[TGSI_FILE_INPUT
]; i
++) {
1627 if (info
->input_semantic_name
[i
] == TGSI_SEMANTIC_COLOR
)
1628 ctx
.two_side_idx
[ctx
.two_side_colors
++] = i
;
1629 if (info
->input_semantic_name
[i
] == TGSI_SEMANTIC_FACE
)
1634 ctx
.saturate
= config
->saturate_r
| config
->saturate_s
| config
->saturate_t
;
1636 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
1637 /* if there are no instructions to lower, then we are done: */
1656 ctx
.two_side_colors
||
1661 _debug_printf("BEFORE:");
1662 tgsi_dump(tokens
, 0);
1666 newlen
= tgsi_num_tokens(tokens
);
1668 newlen
+= DST_GROW
* OPCS(DST
);
1669 numtmp
= MAX2(numtmp
, DST_TMP
);
1672 newlen
+= XPD_GROW
* OPCS(XPD
);
1673 numtmp
= MAX2(numtmp
, XPD_TMP
);
1676 newlen
+= SCS_GROW
* OPCS(SCS
);
1677 numtmp
= MAX2(numtmp
, SCS_TMP
);
1680 newlen
+= LRP_GROW
* OPCS(LRP
);
1681 numtmp
= MAX2(numtmp
, LRP_TMP
);
1684 newlen
+= FRC_GROW
* OPCS(FRC
);
1685 numtmp
= MAX2(numtmp
, FRC_TMP
);
1688 newlen
+= POW_GROW
* OPCS(POW
);
1689 numtmp
= MAX2(numtmp
, POW_TMP
);
1692 newlen
+= LIT_GROW
* OPCS(LIT
);
1693 numtmp
= MAX2(numtmp
, LIT_TMP
);
1696 newlen
+= EXP_GROW
* OPCS(EXP
);
1697 numtmp
= MAX2(numtmp
, EXP_TMP
);
1700 newlen
+= LOG_GROW
* OPCS(LOG
);
1701 numtmp
= MAX2(numtmp
, LOG_TMP
);
1704 newlen
+= DP4_GROW
* OPCS(DP4
);
1705 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1708 newlen
+= DP3_GROW
* OPCS(DP3
);
1709 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1712 newlen
+= DPH_GROW
* OPCS(DPH
);
1713 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1716 newlen
+= DP2_GROW
* OPCS(DP2
);
1717 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1720 newlen
+= DP2A_GROW
* OPCS(DP2A
);
1721 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1724 newlen
+= FLR_GROW
* OPCS(FLR
);
1725 numtmp
= MAX2(numtmp
, FLR_TMP
);
1728 newlen
+= CEIL_GROW
* OPCS(CEIL
);
1729 numtmp
= MAX2(numtmp
, CEIL_TMP
);
1732 newlen
+= TRUNC_GROW
* OPCS(TRUNC
);
1733 numtmp
= MAX2(numtmp
, TRUNC_TMP
);
1735 if (ctx
.saturate
|| config
->lower_TXP
) {
1739 n
= info
->opcode_count
[TGSI_OPCODE_TEX
] +
1740 info
->opcode_count
[TGSI_OPCODE_TXP
] +
1741 info
->opcode_count
[TGSI_OPCODE_TXB
] +
1742 info
->opcode_count
[TGSI_OPCODE_TXB2
] +
1743 info
->opcode_count
[TGSI_OPCODE_TXL
];
1744 } else if (config
->lower_TXP
) {
1745 n
= info
->opcode_count
[TGSI_OPCODE_TXP
];
1748 newlen
+= SAMP_GROW
* n
;
1749 numtmp
= MAX2(numtmp
, SAMP_TMP
);
1752 /* specifically don't include two_side_colors temps in the count: */
1753 ctx
.numtmp
= numtmp
;
1755 if (ctx
.two_side_colors
) {
1756 newlen
+= TWOSIDE_GROW(ctx
.two_side_colors
);
1757 /* note: we permanently consume temp regs, re-writing references
1758 * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
1759 * instruction that selects which varying to use):
1761 numtmp
+= ctx
.two_side_colors
;
1764 newlen
+= 2 * numtmp
;
1765 newlen
+= 5; /* immediate */
1767 newtoks
= tgsi_alloc_tokens(newlen
);
1771 tgsi_transform_shader(tokens
, newtoks
, newlen
, &ctx
.base
);
1773 tgsi_scan_shader(newtoks
, info
);
1776 _debug_printf("AFTER:");
1777 tgsi_dump(newtoks
, 0);