2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Rob Clark <robclark@freedesktop.org>
27 #include "tgsi/tgsi_transform.h"
28 #include "tgsi/tgsi_scan.h"
29 #include "tgsi/tgsi_dump.h"
31 #include "util/u_debug.h"
32 #include "util/u_math.h"
34 #include "tgsi_lowering.h"
36 struct tgsi_lowering_context
{
37 struct tgsi_transform_context base
;
38 const struct tgsi_lowering_config
*config
;
39 struct tgsi_shader_info
*info
;
40 unsigned two_side_colors
;
41 unsigned two_side_idx
[PIPE_MAX_SHADER_INPUTS
];
42 unsigned color_base
; /* base register for chosen COLOR/BCOLOR's */
46 struct tgsi_full_src_register src
;
47 struct tgsi_full_dst_register dst
;
51 struct tgsi_full_src_register imm
;
56 static inline struct tgsi_lowering_context
*
57 tgsi_lowering_context(struct tgsi_transform_context
*tctx
)
59 return (struct tgsi_lowering_context
*)tctx
;
67 reg_dst(struct tgsi_full_dst_register
*dst
,
68 const struct tgsi_full_dst_register
*orig_dst
, unsigned wrmask
)
71 dst
->Register
.WriteMask
&= wrmask
;
72 assert(dst
->Register
.WriteMask
);
76 get_swiz(unsigned *swiz
, const struct tgsi_src_register
*src
)
78 swiz
[0] = src
->SwizzleX
;
79 swiz
[1] = src
->SwizzleY
;
80 swiz
[2] = src
->SwizzleZ
;
81 swiz
[3] = src
->SwizzleW
;
85 reg_src(struct tgsi_full_src_register
*src
,
86 const struct tgsi_full_src_register
*orig_src
,
87 unsigned sx
, unsigned sy
, unsigned sz
, unsigned sw
)
90 get_swiz(swiz
, &orig_src
->Register
);
92 src
->Register
.SwizzleX
= swiz
[sx
];
93 src
->Register
.SwizzleY
= swiz
[sy
];
94 src
->Register
.SwizzleZ
= swiz
[sz
];
95 src
->Register
.SwizzleW
= swiz
[sw
];
98 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */
99 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \
100 TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
103 * if (dst.x aliases src.x) {
109 * MOV dst.zw, imm{0.0, 1.0}
112 aliases(const struct tgsi_full_dst_register
*dst
, unsigned dst_mask
,
113 const struct tgsi_full_src_register
*src
, unsigned src_mask
)
115 if ((dst
->Register
.File
== src
->Register
.File
) &&
116 (dst
->Register
.Index
== src
->Register
.Index
)) {
117 unsigned i
, actual_mask
= 0;
119 get_swiz(swiz
, &src
->Register
);
120 for (i
= 0; i
< 4; i
++)
121 if (src_mask
& (1 << i
))
122 actual_mask
|= (1 << swiz
[i
]);
123 if (actual_mask
& dst_mask
)
130 create_mov(struct tgsi_transform_context
*tctx
,
131 const struct tgsi_full_dst_register
*dst
,
132 const struct tgsi_full_src_register
*src
,
133 unsigned mask
, unsigned saturate
)
135 struct tgsi_full_instruction new_inst
;
137 new_inst
= tgsi_default_full_instruction();
138 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
139 new_inst
.Instruction
.Saturate
= saturate
;
140 new_inst
.Instruction
.NumDstRegs
= 1;
141 reg_dst(&new_inst
.Dst
[0], dst
, mask
);
142 new_inst
.Instruction
.NumSrcRegs
= 1;
143 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, Y
, Z
, W
));
144 tctx
->emit_instruction(tctx
, &new_inst
);
147 /* to help calculate # of tgsi tokens for a lowering.. we assume
148 * the worst case, ie. removed instructions don't have ADDR[] or
149 * anything which increases the # of tokens per src/dst and the
150 * inserted instructions do.
152 * OINST() - old instruction
153 * 1 : instruction itself
157 * NINST() - new instruction
158 * 1 : instruction itself
163 #define OINST(nargs) (1 + 1 + 1 * (nargs))
164 #define NINST(nargs) (1 + 2 + 2 * (nargs))
167 * Lowering Translators:
170 /* DST - Distance Vector
172 * dst.y = src0.y \times src1.y
176 * ; note: could be more clever and use just a single temp
177 * ; if I was clever enough to re-write the swizzles.
178 * ; needs: 2 tmp, imm{1.0}
179 * if (dst.y aliases src0.z) {
180 * MOV tmpA.yz, src0.yz
183 * if (dst.yz aliases src1.w) {
184 * MOV tmpB.yw, src1.yw
187 * MUL dst.y, src0.y, src1.y
190 * MOV dst.x, imm{1.0}
192 #define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
193 NINST(1) + NINST(1) - OINST(2))
196 transform_dst(struct tgsi_transform_context
*tctx
,
197 struct tgsi_full_instruction
*inst
)
199 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
200 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
201 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
202 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
203 struct tgsi_full_instruction new_inst
;
205 if (aliases(dst
, TGSI_WRITEMASK_Y
, src0
, TGSI_WRITEMASK_Z
)) {
206 create_mov(tctx
, &ctx
->tmp
[A
].dst
, src0
, TGSI_WRITEMASK_YZ
, 0);
207 src0
= &ctx
->tmp
[A
].src
;
210 if (aliases(dst
, TGSI_WRITEMASK_YZ
, src1
, TGSI_WRITEMASK_W
)) {
211 create_mov(tctx
, &ctx
->tmp
[B
].dst
, src1
, TGSI_WRITEMASK_YW
, 0);
212 src1
= &ctx
->tmp
[B
].src
;
215 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
216 /* MUL dst.y, src0.y, src1.y */
217 new_inst
= tgsi_default_full_instruction();
218 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
219 new_inst
.Instruction
.NumDstRegs
= 1;
220 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Y
);
221 new_inst
.Instruction
.NumSrcRegs
= 2;
222 reg_src(&new_inst
.Src
[0], src0
, SWIZ(_
, Y
, _
, _
));
223 reg_src(&new_inst
.Src
[1], src1
, SWIZ(_
, Y
, _
, _
));
224 tctx
->emit_instruction(tctx
, &new_inst
);
227 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
228 /* MOV dst.z, src0.z */
229 new_inst
= tgsi_default_full_instruction();
230 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
231 new_inst
.Instruction
.NumDstRegs
= 1;
232 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Z
);
233 new_inst
.Instruction
.NumSrcRegs
= 1;
234 reg_src(&new_inst
.Src
[0], src0
, SWIZ(_
, _
, Z
, _
));
235 tctx
->emit_instruction(tctx
, &new_inst
);
238 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_W
) {
239 /* MOV dst.w, src1.w */
240 new_inst
= tgsi_default_full_instruction();
241 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
242 new_inst
.Instruction
.NumDstRegs
= 1;
243 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_W
);
244 new_inst
.Instruction
.NumSrcRegs
= 1;
245 reg_src(&new_inst
.Src
[0], src1
, SWIZ(_
, _
, _
, W
));
246 tctx
->emit_instruction(tctx
, &new_inst
);
249 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_X
) {
250 /* MOV dst.x, imm{1.0} */
251 new_inst
= tgsi_default_full_instruction();
252 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
253 new_inst
.Instruction
.NumDstRegs
= 1;
254 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_X
);
255 new_inst
.Instruction
.NumSrcRegs
= 1;
256 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(Y
, _
, _
, _
));
257 tctx
->emit_instruction(tctx
, &new_inst
);
261 /* XPD - Cross Product
262 * dst.x = src0.y \times src1.z - src1.y \times src0.z
263 * dst.y = src0.z \times src1.x - src1.z \times src0.x
264 * dst.z = src0.x \times src1.y - src1.x \times src0.y
267 * ; needs: 2 tmp, imm{1.0}
268 * MUL tmpA.xyz, src0.yzx, src1.zxy
269 * MUL tmpB.xyz, src1.yzx, src0.zxy
270 * SUB dst.xyz, tmpA.xyz, tmpB.xyz
271 * MOV dst.w, imm{1.0}
273 #define XPD_GROW (NINST(2) + NINST(2) + NINST(2) + NINST(1) - OINST(2))
276 transform_xpd(struct tgsi_transform_context
*tctx
,
277 struct tgsi_full_instruction
*inst
)
279 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
280 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
281 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
282 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
283 struct tgsi_full_instruction new_inst
;
285 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZ
) {
286 /* MUL tmpA.xyz, src0.yzx, src1.zxy */
287 new_inst
= tgsi_default_full_instruction();
288 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
289 new_inst
.Instruction
.NumDstRegs
= 1;
290 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XYZ
);
291 new_inst
.Instruction
.NumSrcRegs
= 2;
292 reg_src(&new_inst
.Src
[0], src0
, SWIZ(Y
, Z
, X
, _
));
293 reg_src(&new_inst
.Src
[1], src1
, SWIZ(Z
, X
, Y
, _
));
294 tctx
->emit_instruction(tctx
, &new_inst
);
296 /* MUL tmpB.xyz, src1.yzx, src0.zxy */
297 new_inst
= tgsi_default_full_instruction();
298 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
299 new_inst
.Instruction
.NumDstRegs
= 1;
300 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[B
].dst
, TGSI_WRITEMASK_XYZ
);
301 new_inst
.Instruction
.NumSrcRegs
= 2;
302 reg_src(&new_inst
.Src
[0], src1
, SWIZ(Y
, Z
, X
, _
));
303 reg_src(&new_inst
.Src
[1], src0
, SWIZ(Z
, X
, Y
, _
));
304 tctx
->emit_instruction(tctx
, &new_inst
);
306 /* SUB dst.xyz, tmpA.xyz, tmpB.xyz */
307 new_inst
= tgsi_default_full_instruction();
308 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_SUB
;
309 new_inst
.Instruction
.NumDstRegs
= 1;
310 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZ
);
311 new_inst
.Instruction
.NumSrcRegs
= 2;
312 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
, Y
, Z
, _
));
313 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[B
].src
, SWIZ(X
, Y
, Z
, _
));
314 tctx
->emit_instruction(tctx
, &new_inst
);
317 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_W
) {
318 /* MOV dst.w, imm{1.0} */
319 new_inst
= tgsi_default_full_instruction();
320 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
321 new_inst
.Instruction
.NumDstRegs
= 1;
322 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_W
);
323 new_inst
.Instruction
.NumSrcRegs
= 1;
324 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(_
, _
, _
, Y
));
325 tctx
->emit_instruction(tctx
, &new_inst
);
330 * dst.x = \cos{src.x}
331 * dst.y = \sin{src.x}
335 * ; needs: 1 tmp, imm{0.0, 1.0}
336 * if (dst.x aliases src.x) {
342 * MOV dst.zw, imm{0.0, 1.0}
344 #define SCS_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) - OINST(1))
347 transform_scs(struct tgsi_transform_context
*tctx
,
348 struct tgsi_full_instruction
*inst
)
350 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
351 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
352 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
353 struct tgsi_full_instruction new_inst
;
355 if (aliases(dst
, TGSI_WRITEMASK_X
, src
, TGSI_WRITEMASK_X
)) {
356 create_mov(tctx
, &ctx
->tmp
[A
].dst
, src
, TGSI_WRITEMASK_X
, 0);
357 src
= &ctx
->tmp
[A
].src
;
360 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_X
) {
361 /* COS dst.x, src.x */
362 new_inst
= tgsi_default_full_instruction();
363 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_COS
;
364 new_inst
.Instruction
.NumDstRegs
= 1;
365 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_X
);
366 new_inst
.Instruction
.NumSrcRegs
= 1;
367 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, _
, _
, _
));
368 tctx
->emit_instruction(tctx
, &new_inst
);
371 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
372 /* SIN dst.y, src.x */
373 new_inst
= tgsi_default_full_instruction();
374 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_SIN
;
375 new_inst
.Instruction
.NumDstRegs
= 1;
376 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Y
);
377 new_inst
.Instruction
.NumSrcRegs
= 1;
378 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, _
, _
, _
));
379 tctx
->emit_instruction(tctx
, &new_inst
);
382 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_ZW
) {
383 /* MOV dst.zw, imm{0.0, 1.0} */
384 new_inst
= tgsi_default_full_instruction();
385 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
386 new_inst
.Instruction
.NumDstRegs
= 1;
387 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_ZW
);
388 new_inst
.Instruction
.NumSrcRegs
= 1;
389 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(_
, _
, X
, Y
));
390 tctx
->emit_instruction(tctx
, &new_inst
);
394 /* LRP - Linear Interpolate
395 * dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
396 * dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
397 * dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
398 * dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
400 * ; needs: 2 tmp, imm{1.0}
401 * MUL tmpA, src0, src1
402 * SUB tmpB, imm{1.0}, src0
403 * MUL tmpB, tmpB, src2
404 * ADD dst, tmpA, tmpB
406 #define LRP_GROW (NINST(2) + NINST(2) + NINST(2) + NINST(2) - OINST(3))
409 transform_lrp(struct tgsi_transform_context
*tctx
,
410 struct tgsi_full_instruction
*inst
)
412 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
413 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
414 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
415 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
416 struct tgsi_full_src_register
*src2
= &inst
->Src
[2];
417 struct tgsi_full_instruction new_inst
;
419 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
420 /* MUL tmpA, src0, src1 */
421 new_inst
= tgsi_default_full_instruction();
422 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
423 new_inst
.Instruction
.NumDstRegs
= 1;
424 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XYZW
);
425 new_inst
.Instruction
.NumSrcRegs
= 2;
426 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
, Y
, Z
, W
));
427 reg_src(&new_inst
.Src
[1], src1
, SWIZ(X
, Y
, Z
, W
));
428 tctx
->emit_instruction(tctx
, &new_inst
);
430 /* SUB tmpB, imm{1.0}, src0 */
431 new_inst
= tgsi_default_full_instruction();
432 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_SUB
;
433 new_inst
.Instruction
.NumDstRegs
= 1;
434 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[B
].dst
, TGSI_WRITEMASK_XYZW
);
435 new_inst
.Instruction
.NumSrcRegs
= 2;
436 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(Y
, Y
, Y
, Y
));
437 reg_src(&new_inst
.Src
[1], src0
, SWIZ(X
, Y
, Z
, W
));
438 tctx
->emit_instruction(tctx
, &new_inst
);
440 /* MUL tmpB, tmpB, src2 */
441 new_inst
= tgsi_default_full_instruction();
442 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
443 new_inst
.Instruction
.NumDstRegs
= 1;
444 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[B
].dst
, TGSI_WRITEMASK_XYZW
);
445 new_inst
.Instruction
.NumSrcRegs
= 2;
446 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[B
].src
, SWIZ(X
, Y
, Z
, W
));
447 reg_src(&new_inst
.Src
[1], src2
, SWIZ(X
, Y
, Z
, W
));
448 tctx
->emit_instruction(tctx
, &new_inst
);
450 /* ADD dst, tmpA, tmpB */
451 new_inst
= tgsi_default_full_instruction();
452 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
453 new_inst
.Instruction
.NumDstRegs
= 1;
454 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
455 new_inst
.Instruction
.NumSrcRegs
= 2;
456 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
, Y
, Z
, W
));
457 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[B
].src
, SWIZ(X
, Y
, Z
, W
));
458 tctx
->emit_instruction(tctx
, &new_inst
);
463 * dst.x = src.x - \lfloor src.x\rfloor
464 * dst.y = src.y - \lfloor src.y\rfloor
465 * dst.z = src.z - \lfloor src.z\rfloor
466 * dst.w = src.w - \lfloor src.w\rfloor
472 #define FRC_GROW (NINST(1) + NINST(2) - OINST(1))
475 transform_frc(struct tgsi_transform_context
*tctx
,
476 struct tgsi_full_instruction
*inst
)
478 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
479 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
480 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
481 struct tgsi_full_instruction new_inst
;
483 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
485 new_inst
= tgsi_default_full_instruction();
486 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FLR
;
487 new_inst
.Instruction
.NumDstRegs
= 1;
488 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XYZW
);
489 new_inst
.Instruction
.NumSrcRegs
= 1;
490 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, Y
, Z
, W
));
491 tctx
->emit_instruction(tctx
, &new_inst
);
493 /* SUB dst, src, tmpA */
494 new_inst
= tgsi_default_full_instruction();
495 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_SUB
;
496 new_inst
.Instruction
.NumDstRegs
= 1;
497 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
498 new_inst
.Instruction
.NumSrcRegs
= 2;
499 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, Y
, Z
, W
));
500 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
, Y
, Z
, W
));
501 tctx
->emit_instruction(tctx
, &new_inst
);
506 * dst.x = src0.x^{src1.x}
507 * dst.y = src0.x^{src1.x}
508 * dst.z = src0.x^{src1.x}
509 * dst.w = src0.x^{src1.x}
513 * MUL tmpA.x, src1.x, tmpA.x
516 #define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2))
519 transform_pow(struct tgsi_transform_context
*tctx
,
520 struct tgsi_full_instruction
*inst
)
522 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
523 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
524 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
525 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
526 struct tgsi_full_instruction new_inst
;
528 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
529 /* LG2 tmpA.x, src0.x */
530 new_inst
= tgsi_default_full_instruction();
531 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_LG2
;
532 new_inst
.Instruction
.NumDstRegs
= 1;
533 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
534 new_inst
.Instruction
.NumSrcRegs
= 1;
535 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
, _
, _
, _
));
536 tctx
->emit_instruction(tctx
, &new_inst
);
538 /* MUL tmpA.x, src1.x, tmpA.x */
539 new_inst
= tgsi_default_full_instruction();
540 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
541 new_inst
.Instruction
.NumDstRegs
= 1;
542 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
543 new_inst
.Instruction
.NumSrcRegs
= 2;
544 reg_src(&new_inst
.Src
[0], src1
, SWIZ(X
, _
, _
, _
));
545 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
, _
, _
, _
));
546 tctx
->emit_instruction(tctx
, &new_inst
);
548 /* EX2 dst, tmpA.x */
549 new_inst
= tgsi_default_full_instruction();
550 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
551 new_inst
.Instruction
.NumDstRegs
= 1;
552 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
553 new_inst
.Instruction
.NumSrcRegs
= 1;
554 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
, _
, _
, _
));
555 tctx
->emit_instruction(tctx
, &new_inst
);
559 /* LIT - Light Coefficients
561 * dst.y = max(src.x, 0.0)
562 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
565 * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
566 * MAX tmpA.xy, src.xy, imm{0.0}
567 * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
569 * MUL tmpA.y, tmpA.z, tmpA.y
571 * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
572 * MOV dst.yz, tmpA.xy
573 * MOV dst.xw, imm{1.0}
575 #define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \
576 NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1))
579 transform_lit(struct tgsi_transform_context
*tctx
,
580 struct tgsi_full_instruction
*inst
)
582 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
583 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
584 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
585 struct tgsi_full_instruction new_inst
;
587 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_YZ
) {
588 /* MAX tmpA.xy, src.xy, imm{0.0} */
589 new_inst
= tgsi_default_full_instruction();
590 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAX
;
591 new_inst
.Instruction
.NumDstRegs
= 1;
592 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XY
);
593 new_inst
.Instruction
.NumSrcRegs
= 2;
594 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, Y
, _
, _
));
595 reg_src(&new_inst
.Src
[1], &ctx
->imm
, SWIZ(X
, X
, _
, _
));
596 tctx
->emit_instruction(tctx
, &new_inst
);
598 /* CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} */
599 new_inst
= tgsi_default_full_instruction();
600 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_CLAMP
;
601 new_inst
.Instruction
.NumDstRegs
= 1;
602 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Z
);
603 new_inst
.Instruction
.NumSrcRegs
= 3;
604 reg_src(&new_inst
.Src
[0], src
, SWIZ(_
, _
, W
, _
));
605 reg_src(&new_inst
.Src
[1], &ctx
->imm
, SWIZ(_
, _
, Z
, _
));
606 new_inst
.Src
[1].Register
.Negate
= true;
607 reg_src(&new_inst
.Src
[2], &ctx
->imm
, SWIZ(_
, _
, Z
, _
));
608 tctx
->emit_instruction(tctx
, &new_inst
);
610 /* LG2 tmpA.y, tmpA.y */
611 new_inst
= tgsi_default_full_instruction();
612 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_LG2
;
613 new_inst
.Instruction
.NumDstRegs
= 1;
614 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
615 new_inst
.Instruction
.NumSrcRegs
= 1;
616 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Y
, _
, _
, _
));
617 tctx
->emit_instruction(tctx
, &new_inst
);
619 /* MUL tmpA.y, tmpA.z, tmpA.y */
620 new_inst
= tgsi_default_full_instruction();
621 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
622 new_inst
.Instruction
.NumDstRegs
= 1;
623 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
624 new_inst
.Instruction
.NumSrcRegs
= 2;
625 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
, Z
, _
, _
));
626 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(_
, Y
, _
, _
));
627 tctx
->emit_instruction(tctx
, &new_inst
);
629 /* EX2 tmpA.y, tmpA.y */
630 new_inst
= tgsi_default_full_instruction();
631 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
632 new_inst
.Instruction
.NumDstRegs
= 1;
633 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
634 new_inst
.Instruction
.NumSrcRegs
= 1;
635 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Y
, _
, _
, _
));
636 tctx
->emit_instruction(tctx
, &new_inst
);
638 /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
639 new_inst
= tgsi_default_full_instruction();
640 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_CMP
;
641 new_inst
.Instruction
.NumDstRegs
= 1;
642 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
643 new_inst
.Instruction
.NumSrcRegs
= 3;
644 reg_src(&new_inst
.Src
[0], src
, SWIZ(_
, X
, _
, _
));
645 new_inst
.Src
[0].Register
.Negate
= true;
646 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(_
, Y
, _
, _
));
647 reg_src(&new_inst
.Src
[2], &ctx
->imm
, SWIZ(_
, X
, _
, _
));
648 tctx
->emit_instruction(tctx
, &new_inst
);
650 /* MOV dst.yz, tmpA.xy */
651 new_inst
= tgsi_default_full_instruction();
652 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
653 new_inst
.Instruction
.NumDstRegs
= 1;
654 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_YZ
);
655 new_inst
.Instruction
.NumSrcRegs
= 1;
656 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
, X
, Y
, _
));
657 tctx
->emit_instruction(tctx
, &new_inst
);
660 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XW
) {
661 /* MOV dst.xw, imm{1.0} */
662 new_inst
= tgsi_default_full_instruction();
663 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
664 new_inst
.Instruction
.NumDstRegs
= 1;
665 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XW
);
666 new_inst
.Instruction
.NumSrcRegs
= 1;
667 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(Y
, _
, _
, Y
));
668 tctx
->emit_instruction(tctx
, &new_inst
);
672 /* EXP - Approximate Exponential Base 2
673 * dst.x = 2^{\lfloor src.x\rfloor}
674 * dst.y = src.x - \lfloor src.x\rfloor
678 * ; needs: 1 tmp, imm{1.0}
681 * SUB dst.y, src.x, tmpA.x
684 * MOV dst.w, imm{1.0}
686 #define EXP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
687 NINST(1)+ NINST(1) - OINST(1))
690 transform_exp(struct tgsi_transform_context
*tctx
,
691 struct tgsi_full_instruction
*inst
)
693 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
694 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
695 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
696 struct tgsi_full_instruction new_inst
;
698 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XY
) {
699 /* FLR tmpA.x, src.x */
700 new_inst
= tgsi_default_full_instruction();
701 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FLR
;
702 new_inst
.Instruction
.NumDstRegs
= 1;
703 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
704 new_inst
.Instruction
.NumSrcRegs
= 1;
705 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, _
, _
, _
));
706 tctx
->emit_instruction(tctx
, &new_inst
);
709 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
710 /* EX2 tmpA.y, src.x */
711 new_inst
= tgsi_default_full_instruction();
712 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
713 new_inst
.Instruction
.NumDstRegs
= 1;
714 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
715 new_inst
.Instruction
.NumSrcRegs
= 1;
716 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, _
, _
, _
));
717 tctx
->emit_instruction(tctx
, &new_inst
);
720 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
721 /* SUB dst.y, src.x, tmpA.x */
722 new_inst
= tgsi_default_full_instruction();
723 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_SUB
;
724 new_inst
.Instruction
.NumDstRegs
= 1;
725 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Y
);
726 new_inst
.Instruction
.NumSrcRegs
= 2;
727 reg_src(&new_inst
.Src
[0], src
, SWIZ(_
, X
, _
, _
));
728 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(_
, X
, _
, _
));
729 tctx
->emit_instruction(tctx
, &new_inst
);
732 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_X
) {
733 /* EX2 dst.x, tmpA.x */
734 new_inst
= tgsi_default_full_instruction();
735 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
736 new_inst
.Instruction
.NumDstRegs
= 1;
737 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_X
);
738 new_inst
.Instruction
.NumSrcRegs
= 1;
739 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
, _
, _
, _
));
740 tctx
->emit_instruction(tctx
, &new_inst
);
743 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
744 /* MOV dst.z, tmpA.y */
745 new_inst
= tgsi_default_full_instruction();
746 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
747 new_inst
.Instruction
.NumDstRegs
= 1;
748 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Z
);
749 new_inst
.Instruction
.NumSrcRegs
= 1;
750 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
, _
, Y
, _
));
751 tctx
->emit_instruction(tctx
, &new_inst
);
754 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_W
) {
755 /* MOV dst.w, imm{1.0} */
756 new_inst
= tgsi_default_full_instruction();
757 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
758 new_inst
.Instruction
.NumDstRegs
= 1;
759 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_W
);
760 new_inst
.Instruction
.NumSrcRegs
= 1;
761 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(_
, _
, _
, Y
));
762 tctx
->emit_instruction(tctx
, &new_inst
);
766 /* LOG - Approximate Logarithm Base 2
767 * dst.x = \lfloor\log_2{|src.x|}\rfloor
768 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
769 * dst.z = \log_2{|src.x|}
772 * ; needs: 1 tmp, imm{1.0}
773 * LG2 tmpA.x, |src.x|
777 * MUL dst.y, |src.x|, tmpA.z
778 * MOV dst.xz, tmpA.yx
779 * MOV dst.w, imm{1.0}
781 #define LOG_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) + \
782 NINST(2) + NINST(1) + NINST(1) - OINST(1))
785 transform_log(struct tgsi_transform_context
*tctx
,
786 struct tgsi_full_instruction
*inst
)
788 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
789 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
790 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
791 struct tgsi_full_instruction new_inst
;
793 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZ
) {
794 /* LG2 tmpA.x, |src.x| */
795 new_inst
= tgsi_default_full_instruction();
796 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_LG2
;
797 new_inst
.Instruction
.NumDstRegs
= 1;
798 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
799 new_inst
.Instruction
.NumSrcRegs
= 1;
800 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, _
, _
, _
));
801 new_inst
.Src
[0].Register
.Absolute
= true;
802 tctx
->emit_instruction(tctx
, &new_inst
);
805 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XY
) {
806 /* FLR tmpA.y, tmpA.x */
807 new_inst
= tgsi_default_full_instruction();
808 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FLR
;
809 new_inst
.Instruction
.NumDstRegs
= 1;
810 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
811 new_inst
.Instruction
.NumSrcRegs
= 1;
812 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
, X
, _
, _
));
813 tctx
->emit_instruction(tctx
, &new_inst
);
816 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
817 /* EX2 tmpA.z, tmpA.y */
818 new_inst
= tgsi_default_full_instruction();
819 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
820 new_inst
.Instruction
.NumDstRegs
= 1;
821 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Z
);
822 new_inst
.Instruction
.NumSrcRegs
= 1;
823 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Y
, _
, _
, _
));
824 tctx
->emit_instruction(tctx
, &new_inst
);
826 /* RCP tmpA.z, tmpA.z */
827 new_inst
= tgsi_default_full_instruction();
828 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_RCP
;
829 new_inst
.Instruction
.NumDstRegs
= 1;
830 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Z
);
831 new_inst
.Instruction
.NumSrcRegs
= 1;
832 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Z
, _
, _
, _
));
833 tctx
->emit_instruction(tctx
, &new_inst
);
835 /* MUL dst.y, |src.x|, tmpA.z */
836 new_inst
= tgsi_default_full_instruction();
837 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
838 new_inst
.Instruction
.NumDstRegs
= 1;
839 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Y
);
840 new_inst
.Instruction
.NumSrcRegs
= 2;
841 reg_src(&new_inst
.Src
[0], src
, SWIZ(_
, X
, _
, _
));
842 new_inst
.Src
[0].Register
.Absolute
= true;
843 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(_
, Z
, _
, _
));
844 tctx
->emit_instruction(tctx
, &new_inst
);
847 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XZ
) {
848 /* MOV dst.xz, tmpA.yx */
849 new_inst
= tgsi_default_full_instruction();
850 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
851 new_inst
.Instruction
.NumDstRegs
= 1;
852 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XZ
);
853 new_inst
.Instruction
.NumSrcRegs
= 1;
854 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Y
, _
, X
, _
));
855 tctx
->emit_instruction(tctx
, &new_inst
);
858 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_W
) {
859 /* MOV dst.w, imm{1.0} */
860 new_inst
= tgsi_default_full_instruction();
861 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
862 new_inst
.Instruction
.NumDstRegs
= 1;
863 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_W
);
864 new_inst
.Instruction
.NumSrcRegs
= 1;
865 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(_
, _
, _
, Y
));
866 tctx
->emit_instruction(tctx
, &new_inst
);
870 /* DP4 - 4-component Dot Product
871 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
873 * DP3 - 3-component Dot Product
874 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
876 * DPH - Homogeneous Dot Product
877 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
879 * DP2 - 2-component Dot Product
880 * dst = src0.x \times src1.x + src0.y \times src1.y
882 * DP2A - 2-component Dot Product And Add
883 * dst = src0.x \times src1.x + src0.y \times src1.y + src2.x
885 * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
886 * operations, which is what you'd prefer for a ISA that is natively
887 * scalar. Probably a native vector ISA would at least already have
888 * DP4/DP3 instructions, but perhaps there is room for an alternative
889 * translation for DPH/DP2/DP2A using vector instructions.
892 * MUL tmpA.x, src0.x, src1.x
893 * MAD tmpA.x, src0.y, src1.y, tmpA.x
894 * if (DPH || DP3 || DP4) {
895 * MAD tmpA.x, src0.z, src1.z, tmpA.x
897 * ADD tmpA.x, src1.w, tmpA.x
899 * MAD tmpA.x, src0.w, src1.w, tmpA.x
902 * ADD tmpA.x, src2.x, tmpA.x
904 * ; fixup last instruction to replicate into dst
906 #define DP4_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
907 #define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2))
908 #define DPH_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(2) - OINST(2))
909 #define DP2_GROW (NINST(2) + NINST(3) - OINST(2))
910 #define DP2A_GROW (NINST(2) + NINST(3) + NINST(2) - OINST(3))
913 transform_dotp(struct tgsi_transform_context
*tctx
,
914 struct tgsi_full_instruction
*inst
)
916 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
917 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
918 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
919 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
920 struct tgsi_full_src_register
*src2
= &inst
->Src
[2]; /* only DP2A */
921 struct tgsi_full_instruction new_inst
;
922 unsigned opcode
= inst
->Instruction
.Opcode
;
924 /* NOTE: any potential last instruction must replicate src on all
925 * components (since it could be re-written to write to final dst)
928 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
929 /* MUL tmpA.x, src0.x, src1.x */
930 new_inst
= tgsi_default_full_instruction();
931 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
932 new_inst
.Instruction
.NumDstRegs
= 1;
933 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
934 new_inst
.Instruction
.NumSrcRegs
= 2;
935 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
, _
, _
, _
));
936 reg_src(&new_inst
.Src
[1], src1
, SWIZ(X
, _
, _
, _
));
937 tctx
->emit_instruction(tctx
, &new_inst
);
939 /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
940 new_inst
= tgsi_default_full_instruction();
941 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAD
;
942 new_inst
.Instruction
.NumDstRegs
= 1;
943 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
944 new_inst
.Instruction
.NumSrcRegs
= 3;
945 reg_src(&new_inst
.Src
[0], src0
, SWIZ(Y
, Y
, Y
, Y
));
946 reg_src(&new_inst
.Src
[1], src1
, SWIZ(Y
, Y
, Y
, Y
));
947 reg_src(&new_inst
.Src
[2], &ctx
->tmp
[A
].src
, SWIZ(X
, X
, X
, X
));
949 if ((opcode
== TGSI_OPCODE_DPH
) ||
950 (opcode
== TGSI_OPCODE_DP3
) ||
951 (opcode
== TGSI_OPCODE_DP4
)) {
952 tctx
->emit_instruction(tctx
, &new_inst
);
954 /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
955 new_inst
= tgsi_default_full_instruction();
956 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAD
;
957 new_inst
.Instruction
.NumDstRegs
= 1;
958 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
959 new_inst
.Instruction
.NumSrcRegs
= 3;
960 reg_src(&new_inst
.Src
[0], src0
, SWIZ(Z
, Z
, Z
, Z
));
961 reg_src(&new_inst
.Src
[1], src1
, SWIZ(Z
, Z
, Z
, Z
));
962 reg_src(&new_inst
.Src
[2], &ctx
->tmp
[A
].src
, SWIZ(X
, X
, X
, X
));
964 if (opcode
== TGSI_OPCODE_DPH
) {
965 tctx
->emit_instruction(tctx
, &new_inst
);
967 /* ADD tmpA.x, src1.w, tmpA.x */
968 new_inst
= tgsi_default_full_instruction();
969 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
970 new_inst
.Instruction
.NumDstRegs
= 1;
971 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
972 new_inst
.Instruction
.NumSrcRegs
= 2;
973 reg_src(&new_inst
.Src
[0], src1
, SWIZ(W
, W
, W
, W
));
974 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
, X
, X
, X
));
975 } else if (opcode
== TGSI_OPCODE_DP4
) {
976 tctx
->emit_instruction(tctx
, &new_inst
);
978 /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
979 new_inst
= tgsi_default_full_instruction();
980 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAD
;
981 new_inst
.Instruction
.NumDstRegs
= 1;
982 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
983 new_inst
.Instruction
.NumSrcRegs
= 3;
984 reg_src(&new_inst
.Src
[0], src0
, SWIZ(W
, W
, W
, W
));
985 reg_src(&new_inst
.Src
[1], src1
, SWIZ(W
, W
, W
, W
));
986 reg_src(&new_inst
.Src
[2], &ctx
->tmp
[A
].src
, SWIZ(X
, X
, X
, X
));
988 } else if (opcode
== TGSI_OPCODE_DP2A
) {
989 tctx
->emit_instruction(tctx
, &new_inst
);
991 /* ADD tmpA.x, src2.x, tmpA.x */
992 new_inst
= tgsi_default_full_instruction();
993 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
994 new_inst
.Instruction
.NumDstRegs
= 1;
995 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
996 new_inst
.Instruction
.NumSrcRegs
= 2;
997 reg_src(&new_inst
.Src
[0], src2
, SWIZ(X
, X
, X
, X
));
998 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
, X
, X
, X
));
1001 /* fixup last instruction to write to dst: */
1002 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
1004 tctx
->emit_instruction(tctx
, &new_inst
);
1008 /* Inserts a MOV_SAT for the needed components of tex coord. Note that
1009 * in the case of TXP, the clamping must happen *after* projection, so
1010 * we need to lower TXP to TEX.
1014 * ; do perspective division manually before clamping:
1016 * MUL tmpB.<pmask>, tmpA, tmpB.xxxx
1019 * MOV_SAT tmpA.<mask>, tmpA ; <mask> is the clamped s/t/r coords
1020 * <opc> dst, tmpA, ...
1022 #define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1))
1025 transform_samp(struct tgsi_transform_context
*tctx
,
1026 struct tgsi_full_instruction
*inst
)
1028 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
1029 struct tgsi_full_src_register
*coord
= &inst
->Src
[0];
1030 struct tgsi_full_src_register
*samp
;
1031 struct tgsi_full_instruction new_inst
;
1032 /* mask is clamped coords, pmask is all coords (for projection): */
1033 unsigned mask
= 0, pmask
= 0, smask
;
1034 unsigned tex
= inst
->Texture
.Texture
;
1035 unsigned opcode
= inst
->Instruction
.Opcode
;
1036 bool lower_txp
= (opcode
== TGSI_OPCODE_TXP
) &&
1037 (ctx
->config
->lower_TXP
& (1 << tex
));
1039 if (opcode
== TGSI_OPCODE_TXB2
) {
1040 samp
= &inst
->Src
[2];
1042 samp
= &inst
->Src
[1];
1045 /* convert sampler # to bitmask to test: */
1046 smask
= 1 << samp
->Register
.Index
;
1048 /* check if we actually need to lower this one: */
1049 if (!(ctx
->saturate
& smask
) && !lower_txp
)
1052 /* figure out which coordinates need saturating:
1053 * - RECT textures should not get saturated
1054 * - array index coords should not get saturated
1057 case TGSI_TEXTURE_3D
:
1058 case TGSI_TEXTURE_CUBE
:
1059 case TGSI_TEXTURE_CUBE_ARRAY
:
1060 case TGSI_TEXTURE_SHADOWCUBE
:
1061 case TGSI_TEXTURE_SHADOWCUBE_ARRAY
:
1062 if (ctx
->config
->saturate_r
& smask
)
1063 mask
|= TGSI_WRITEMASK_Z
;
1064 pmask
|= TGSI_WRITEMASK_Z
;
1067 case TGSI_TEXTURE_2D
:
1068 case TGSI_TEXTURE_2D_ARRAY
:
1069 case TGSI_TEXTURE_SHADOW2D
:
1070 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
1071 case TGSI_TEXTURE_2D_MSAA
:
1072 case TGSI_TEXTURE_2D_ARRAY_MSAA
:
1073 if (ctx
->config
->saturate_t
& smask
)
1074 mask
|= TGSI_WRITEMASK_Y
;
1075 pmask
|= TGSI_WRITEMASK_Y
;
1078 case TGSI_TEXTURE_1D
:
1079 case TGSI_TEXTURE_1D_ARRAY
:
1080 case TGSI_TEXTURE_SHADOW1D
:
1081 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
1082 if (ctx
->config
->saturate_s
& smask
)
1083 mask
|= TGSI_WRITEMASK_X
;
1084 pmask
|= TGSI_WRITEMASK_X
;
1087 case TGSI_TEXTURE_RECT
:
1088 case TGSI_TEXTURE_SHADOWRECT
:
1089 /* we don't saturate, but in case of lower_txp we
1090 * still need to do the perspective divide:
1092 pmask
= TGSI_WRITEMASK_XY
;
1096 /* sanity check.. driver could be asking to saturate a non-
1097 * existent coordinate component:
1099 if (!mask
&& !lower_txp
)
1102 /* MOV tmpA, src0 */
1103 create_mov(tctx
, &ctx
->tmp
[A
].dst
, coord
, TGSI_WRITEMASK_XYZW
, 0);
1105 /* This is a bit sad.. we need to clamp *after* the coords
1106 * are projected, which means lowering TXP to TEX and doing
1107 * the projection ourself. But since I haven't figured out
1108 * how to make the lowering code deliver an electric shock
1109 * to anyone using GL_CLAMP, we must do this instead:
1111 if (opcode
== TGSI_OPCODE_TXP
) {
1112 /* RCP tmpB.x tmpA.w */
1113 new_inst
= tgsi_default_full_instruction();
1114 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_RCP
;
1115 new_inst
.Instruction
.NumDstRegs
= 1;
1116 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[B
].dst
, TGSI_WRITEMASK_X
);
1117 new_inst
.Instruction
.NumSrcRegs
= 1;
1118 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(W
, _
, _
, _
));
1119 tctx
->emit_instruction(tctx
, &new_inst
);
1121 /* MUL tmpA.mask, tmpA, tmpB.xxxx */
1122 new_inst
= tgsi_default_full_instruction();
1123 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
1124 new_inst
.Instruction
.NumDstRegs
= 1;
1125 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, pmask
);
1126 new_inst
.Instruction
.NumSrcRegs
= 2;
1127 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
, Y
, Z
, W
));
1128 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[B
].src
, SWIZ(X
, X
, X
, X
));
1129 tctx
->emit_instruction(tctx
, &new_inst
);
1131 opcode
= TGSI_OPCODE_TEX
;
1134 /* MOV_SAT tmpA.<mask>, tmpA */
1136 create_mov(tctx
, &ctx
->tmp
[A
].dst
, &ctx
->tmp
[A
].src
, mask
,
1140 /* modify the texture samp instruction to take fixed up coord: */
1142 new_inst
.Instruction
.Opcode
= opcode
;
1143 new_inst
.Src
[0] = ctx
->tmp
[A
].src
;
1144 tctx
->emit_instruction(tctx
, &new_inst
);
1149 /* Two-sided color emulation:
1150 * For each COLOR input, create a corresponding BCOLOR input, plus
1151 * CMP instruction to select front or back color based on FACE
1153 #define TWOSIDE_GROW(n) ( \
1155 ((n) * 2) + /* IN[] BCOLOR[n] */ \
1156 ((n) * 1) + /* TEMP[] */ \
1157 ((n) * NINST(3)) /* CMP instr */ \
1161 emit_twoside(struct tgsi_transform_context
*tctx
)
1163 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
1164 struct tgsi_shader_info
*info
= ctx
->info
;
1165 struct tgsi_full_declaration decl
;
1166 struct tgsi_full_instruction new_inst
;
1167 unsigned inbase
, tmpbase
;
1170 inbase
= info
->file_max
[TGSI_FILE_INPUT
] + 1;
1171 tmpbase
= info
->file_max
[TGSI_FILE_TEMPORARY
] + 1;
1173 /* additional inputs for BCOLOR's */
1174 for (i
= 0; i
< ctx
->two_side_colors
; i
++) {
1175 decl
= tgsi_default_full_declaration();
1176 decl
.Declaration
.File
= TGSI_FILE_INPUT
;
1177 decl
.Declaration
.Semantic
= true;
1178 decl
.Range
.First
= decl
.Range
.Last
= inbase
+ i
;
1179 decl
.Semantic
.Name
= TGSI_SEMANTIC_BCOLOR
;
1180 decl
.Semantic
.Index
=
1181 info
->input_semantic_index
[ctx
->two_side_idx
[i
]];
1182 tctx
->emit_declaration(tctx
, &decl
);
1185 /* additional input for FACE */
1186 if (ctx
->two_side_colors
&& (ctx
->face_idx
== -1)) {
1187 decl
= tgsi_default_full_declaration();
1188 decl
.Declaration
.File
= TGSI_FILE_INPUT
;
1189 decl
.Declaration
.Semantic
= true;
1190 decl
.Range
.First
= decl
.Range
.Last
= inbase
+ ctx
->two_side_colors
;
1191 decl
.Semantic
.Name
= TGSI_SEMANTIC_FACE
;
1192 decl
.Semantic
.Index
= 0;
1193 tctx
->emit_declaration(tctx
, &decl
);
1195 ctx
->face_idx
= decl
.Range
.First
;
1198 /* additional temps for COLOR/BCOLOR selection: */
1199 for (i
= 0; i
< ctx
->two_side_colors
; i
++) {
1200 decl
= tgsi_default_full_declaration();
1201 decl
.Declaration
.File
= TGSI_FILE_TEMPORARY
;
1202 decl
.Range
.First
= decl
.Range
.Last
= tmpbase
+ ctx
->numtmp
+ i
;
1203 tctx
->emit_declaration(tctx
, &decl
);
1206 /* and finally additional instructions to select COLOR/BCOLOR: */
1207 for (i
= 0; i
< ctx
->two_side_colors
; i
++) {
1208 new_inst
= tgsi_default_full_instruction();
1209 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_CMP
;
1211 new_inst
.Instruction
.NumDstRegs
= 1;
1212 new_inst
.Dst
[0].Register
.File
= TGSI_FILE_TEMPORARY
;
1213 new_inst
.Dst
[0].Register
.Index
= tmpbase
+ ctx
->numtmp
+ i
;
1214 new_inst
.Dst
[0].Register
.WriteMask
= TGSI_WRITEMASK_XYZW
;
1216 new_inst
.Instruction
.NumSrcRegs
= 3;
1217 new_inst
.Src
[0].Register
.File
= TGSI_FILE_INPUT
;
1218 new_inst
.Src
[0].Register
.Index
= ctx
->face_idx
;
1219 new_inst
.Src
[0].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1220 new_inst
.Src
[0].Register
.SwizzleY
= TGSI_SWIZZLE_X
;
1221 new_inst
.Src
[0].Register
.SwizzleZ
= TGSI_SWIZZLE_X
;
1222 new_inst
.Src
[0].Register
.SwizzleW
= TGSI_SWIZZLE_X
;
1223 new_inst
.Src
[1].Register
.File
= TGSI_FILE_INPUT
;
1224 new_inst
.Src
[1].Register
.Index
= inbase
+ i
;
1225 new_inst
.Src
[1].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1226 new_inst
.Src
[1].Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
1227 new_inst
.Src
[1].Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
1228 new_inst
.Src
[1].Register
.SwizzleW
= TGSI_SWIZZLE_W
;
1229 new_inst
.Src
[2].Register
.File
= TGSI_FILE_INPUT
;
1230 new_inst
.Src
[2].Register
.Index
= ctx
->two_side_idx
[i
];
1231 new_inst
.Src
[2].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1232 new_inst
.Src
[2].Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
1233 new_inst
.Src
[2].Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
1234 new_inst
.Src
[2].Register
.SwizzleW
= TGSI_SWIZZLE_W
;
1236 tctx
->emit_instruction(tctx
, &new_inst
);
1241 emit_decls(struct tgsi_transform_context
*tctx
)
1243 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
1244 struct tgsi_shader_info
*info
= ctx
->info
;
1245 struct tgsi_full_declaration decl
;
1246 struct tgsi_full_immediate immed
;
1250 tmpbase
= info
->file_max
[TGSI_FILE_TEMPORARY
] + 1;
1252 ctx
->color_base
= tmpbase
+ ctx
->numtmp
;
1254 /* declare immediate: */
1255 immed
= tgsi_default_full_immediate();
1256 immed
.Immediate
.NrTokens
= 1 + 4; /* one for the token itself */
1257 immed
.u
[0].Float
= 0.0;
1258 immed
.u
[1].Float
= 1.0;
1259 immed
.u
[2].Float
= 128.0;
1260 immed
.u
[3].Float
= 0.0;
1261 tctx
->emit_immediate(tctx
, &immed
);
1263 ctx
->imm
.Register
.File
= TGSI_FILE_IMMEDIATE
;
1264 ctx
->imm
.Register
.Index
= info
->immediate_count
;
1265 ctx
->imm
.Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1266 ctx
->imm
.Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
1267 ctx
->imm
.Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
1268 ctx
->imm
.Register
.SwizzleW
= TGSI_SWIZZLE_W
;
1270 /* declare temp regs: */
1271 for (i
= 0; i
< ctx
->numtmp
; i
++) {
1272 decl
= tgsi_default_full_declaration();
1273 decl
.Declaration
.File
= TGSI_FILE_TEMPORARY
;
1274 decl
.Range
.First
= decl
.Range
.Last
= tmpbase
+ i
;
1275 tctx
->emit_declaration(tctx
, &decl
);
1277 ctx
->tmp
[i
].src
.Register
.File
= TGSI_FILE_TEMPORARY
;
1278 ctx
->tmp
[i
].src
.Register
.Index
= tmpbase
+ i
;
1279 ctx
->tmp
[i
].src
.Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1280 ctx
->tmp
[i
].src
.Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
1281 ctx
->tmp
[i
].src
.Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
1282 ctx
->tmp
[i
].src
.Register
.SwizzleW
= TGSI_SWIZZLE_W
;
1284 ctx
->tmp
[i
].dst
.Register
.File
= TGSI_FILE_TEMPORARY
;
1285 ctx
->tmp
[i
].dst
.Register
.Index
= tmpbase
+ i
;
1286 ctx
->tmp
[i
].dst
.Register
.WriteMask
= TGSI_WRITEMASK_XYZW
;
1289 if (ctx
->two_side_colors
)
1294 rename_color_inputs(struct tgsi_lowering_context
*ctx
,
1295 struct tgsi_full_instruction
*inst
)
1298 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
1299 struct tgsi_src_register
*src
= &inst
->Src
[i
].Register
;
1300 if (src
->File
== TGSI_FILE_INPUT
) {
1301 for (j
= 0; j
< ctx
->two_side_colors
; j
++) {
1302 if (src
->Index
== ctx
->two_side_idx
[j
]) {
1303 src
->File
= TGSI_FILE_TEMPORARY
;
1304 src
->Index
= ctx
->color_base
+ j
;
1314 transform_instr(struct tgsi_transform_context
*tctx
,
1315 struct tgsi_full_instruction
*inst
)
1317 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
1319 if (!ctx
->emitted_decls
) {
1321 ctx
->emitted_decls
= 1;
1324 /* if emulating two-sided-color, we need to re-write some
1327 if (ctx
->two_side_colors
)
1328 rename_color_inputs(ctx
, inst
);
1330 switch (inst
->Instruction
.Opcode
) {
1331 case TGSI_OPCODE_DST
:
1332 if (!ctx
->config
->lower_DST
)
1334 transform_dst(tctx
, inst
);
1336 case TGSI_OPCODE_XPD
:
1337 if (!ctx
->config
->lower_XPD
)
1339 transform_xpd(tctx
, inst
);
1341 case TGSI_OPCODE_SCS
:
1342 if (!ctx
->config
->lower_SCS
)
1344 transform_scs(tctx
, inst
);
1346 case TGSI_OPCODE_LRP
:
1347 if (!ctx
->config
->lower_LRP
)
1349 transform_lrp(tctx
, inst
);
1351 case TGSI_OPCODE_FRC
:
1352 if (!ctx
->config
->lower_FRC
)
1354 transform_frc(tctx
, inst
);
1356 case TGSI_OPCODE_POW
:
1357 if (!ctx
->config
->lower_POW
)
1359 transform_pow(tctx
, inst
);
1361 case TGSI_OPCODE_LIT
:
1362 if (!ctx
->config
->lower_LIT
)
1364 transform_lit(tctx
, inst
);
1366 case TGSI_OPCODE_EXP
:
1367 if (!ctx
->config
->lower_EXP
)
1369 transform_exp(tctx
, inst
);
1371 case TGSI_OPCODE_LOG
:
1372 if (!ctx
->config
->lower_LOG
)
1374 transform_log(tctx
, inst
);
1376 case TGSI_OPCODE_DP4
:
1377 if (!ctx
->config
->lower_DP4
)
1379 transform_dotp(tctx
, inst
);
1381 case TGSI_OPCODE_DP3
:
1382 if (!ctx
->config
->lower_DP3
)
1384 transform_dotp(tctx
, inst
);
1386 case TGSI_OPCODE_DPH
:
1387 if (!ctx
->config
->lower_DPH
)
1389 transform_dotp(tctx
, inst
);
1391 case TGSI_OPCODE_DP2
:
1392 if (!ctx
->config
->lower_DP2
)
1394 transform_dotp(tctx
, inst
);
1396 case TGSI_OPCODE_DP2A
:
1397 if (!ctx
->config
->lower_DP2A
)
1399 transform_dotp(tctx
, inst
);
1401 case TGSI_OPCODE_TEX
:
1402 case TGSI_OPCODE_TXP
:
1403 case TGSI_OPCODE_TXB
:
1404 case TGSI_OPCODE_TXB2
:
1405 case TGSI_OPCODE_TXL
:
1406 if (transform_samp(tctx
, inst
))
1411 tctx
->emit_instruction(tctx
, inst
);
1416 /* returns NULL if no lowering required, else returns the new
1417 * tokens (which caller is required to free()). In either case
1418 * returns the current info.
1420 const struct tgsi_token
*
1421 tgsi_transform_lowering(const struct tgsi_lowering_config
*config
,
1422 const struct tgsi_token
*tokens
,
1423 struct tgsi_shader_info
*info
)
1425 struct tgsi_lowering_context ctx
;
1426 struct tgsi_token
*newtoks
;
1429 /* sanity check in case limit is ever increased: */
1430 assert((sizeof(config
->saturate_s
) * 8) >= PIPE_MAX_SAMPLERS
);
1432 memset(&ctx
, 0, sizeof(ctx
));
1433 ctx
.base
.transform_instruction
= transform_instr
;
1435 ctx
.config
= config
;
1437 tgsi_scan_shader(tokens
, info
);
1439 /* if we are adding fragment shader support to emulate two-sided
1440 * color, then figure out the number of additional inputs we need
1441 * to create for BCOLOR's..
1443 if ((info
->processor
== TGSI_PROCESSOR_FRAGMENT
) &&
1444 config
->color_two_side
) {
1447 for (i
= 0; i
<= info
->file_max
[TGSI_FILE_INPUT
]; i
++) {
1448 if (info
->input_semantic_name
[i
] == TGSI_SEMANTIC_COLOR
)
1449 ctx
.two_side_idx
[ctx
.two_side_colors
++] = i
;
1450 if (info
->input_semantic_name
[i
] == TGSI_SEMANTIC_FACE
)
1455 ctx
.saturate
= config
->saturate_r
| config
->saturate_s
| config
->saturate_t
;
1457 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
1458 /* if there are no instructions to lower, then we are done: */
1474 ctx
.two_side_colors
||
1479 _debug_printf("BEFORE:");
1480 tgsi_dump(tokens
, 0);
1484 newlen
= tgsi_num_tokens(tokens
);
1486 newlen
+= DST_GROW
* OPCS(DST
);
1487 numtmp
= MAX2(numtmp
, DST_TMP
);
1490 newlen
+= XPD_GROW
* OPCS(XPD
);
1491 numtmp
= MAX2(numtmp
, XPD_TMP
);
1494 newlen
+= SCS_GROW
* OPCS(SCS
);
1495 numtmp
= MAX2(numtmp
, SCS_TMP
);
1498 newlen
+= LRP_GROW
* OPCS(LRP
);
1499 numtmp
= MAX2(numtmp
, LRP_TMP
);
1502 newlen
+= FRC_GROW
* OPCS(FRC
);
1503 numtmp
= MAX2(numtmp
, FRC_TMP
);
1506 newlen
+= POW_GROW
* OPCS(POW
);
1507 numtmp
= MAX2(numtmp
, POW_TMP
);
1510 newlen
+= LIT_GROW
* OPCS(LIT
);
1511 numtmp
= MAX2(numtmp
, LIT_TMP
);
1514 newlen
+= EXP_GROW
* OPCS(EXP
);
1515 numtmp
= MAX2(numtmp
, EXP_TMP
);
1518 newlen
+= LOG_GROW
* OPCS(LOG
);
1519 numtmp
= MAX2(numtmp
, LOG_TMP
);
1522 newlen
+= DP4_GROW
* OPCS(DP4
);
1523 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1526 newlen
+= DP3_GROW
* OPCS(DP3
);
1527 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1530 newlen
+= DPH_GROW
* OPCS(DPH
);
1531 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1534 newlen
+= DP2_GROW
* OPCS(DP2
);
1535 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1538 newlen
+= DP2A_GROW
* OPCS(DP2A
);
1539 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1541 if (ctx
.saturate
|| config
->lower_TXP
) {
1545 n
= info
->opcode_count
[TGSI_OPCODE_TEX
] +
1546 info
->opcode_count
[TGSI_OPCODE_TXP
] +
1547 info
->opcode_count
[TGSI_OPCODE_TXB
] +
1548 info
->opcode_count
[TGSI_OPCODE_TXB2
] +
1549 info
->opcode_count
[TGSI_OPCODE_TXL
];
1550 } else if (config
->lower_TXP
) {
1551 n
= info
->opcode_count
[TGSI_OPCODE_TXP
];
1554 newlen
+= SAMP_GROW
* n
;
1555 numtmp
= MAX2(numtmp
, SAMP_TMP
);
1558 /* specifically don't include two_side_colors temps in the count: */
1559 ctx
.numtmp
= numtmp
;
1561 if (ctx
.two_side_colors
) {
1562 newlen
+= TWOSIDE_GROW(ctx
.two_side_colors
);
1563 /* note: we permanently consume temp regs, re-writing references
1564 * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
1565 * instruction that selects which varying to use):
1567 numtmp
+= ctx
.two_side_colors
;
1570 newlen
+= 2 * numtmp
;
1571 newlen
+= 5; /* immediate */
1573 newtoks
= tgsi_alloc_tokens(newlen
);
1577 tgsi_transform_shader(tokens
, newtoks
, newlen
, &ctx
.base
);
1579 tgsi_scan_shader(newtoks
, info
);
1582 _debug_printf("AFTER:");
1583 tgsi_dump(newtoks
, 0);