1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 * Rob Clark <robclark@freedesktop.org>
29 #include "tgsi/tgsi_transform.h"
30 #include "tgsi/tgsi_scan.h"
31 #include "tgsi/tgsi_dump.h"
33 #include "util/u_debug.h"
34 #include "util/u_math.h"
36 #include "freedreno_lowering.h"
38 struct fd_lowering_context
{
39 struct tgsi_transform_context base
;
40 const struct fd_lowering_config
*config
;
41 struct tgsi_shader_info
*info
;
42 unsigned two_side_colors
;
43 unsigned two_side_idx
[PIPE_MAX_SHADER_INPUTS
];
44 unsigned color_base
; /* base register for chosen COLOR/BCOLOR's */
48 struct tgsi_full_src_register src
;
49 struct tgsi_full_dst_register dst
;
53 struct tgsi_full_src_register imm
;
58 static inline struct fd_lowering_context
*
59 fd_lowering_context(struct tgsi_transform_context
*tctx
)
61 return (struct fd_lowering_context
*)tctx
;
69 reg_dst(struct tgsi_full_dst_register
*dst
,
70 const struct tgsi_full_dst_register
*orig_dst
, unsigned wrmask
)
73 dst
->Register
.WriteMask
&= wrmask
;
74 assert(dst
->Register
.WriteMask
);
78 get_swiz(unsigned *swiz
, const struct tgsi_src_register
*src
)
80 swiz
[0] = src
->SwizzleX
;
81 swiz
[1] = src
->SwizzleY
;
82 swiz
[2] = src
->SwizzleZ
;
83 swiz
[3] = src
->SwizzleW
;
87 reg_src(struct tgsi_full_src_register
*src
,
88 const struct tgsi_full_src_register
*orig_src
,
89 unsigned sx
, unsigned sy
, unsigned sz
, unsigned sw
)
92 get_swiz(swiz
, &orig_src
->Register
);
94 src
->Register
.SwizzleX
= swiz
[sx
];
95 src
->Register
.SwizzleY
= swiz
[sy
];
96 src
->Register
.SwizzleZ
= swiz
[sz
];
97 src
->Register
.SwizzleW
= swiz
[sw
];
100 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */
101 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \
102 TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
105 * if (dst.x aliases src.x) {
111 * MOV dst.zw, imm{0.0, 1.0}
114 aliases(const struct tgsi_full_dst_register
*dst
, unsigned dst_mask
,
115 const struct tgsi_full_src_register
*src
, unsigned src_mask
)
117 if ((dst
->Register
.File
== src
->Register
.File
) &&
118 (dst
->Register
.Index
== src
->Register
.Index
)) {
119 unsigned i
, actual_mask
= 0;
121 get_swiz(swiz
, &src
->Register
);
122 for (i
= 0; i
< 4; i
++)
123 if (src_mask
& (1 << i
))
124 actual_mask
|= (1 << swiz
[i
]);
125 if (actual_mask
& dst_mask
)
132 create_mov(struct tgsi_transform_context
*tctx
,
133 const struct tgsi_full_dst_register
*dst
,
134 const struct tgsi_full_src_register
*src
,
135 unsigned mask
, unsigned saturate
)
137 struct tgsi_full_instruction new_inst
;
139 new_inst
= tgsi_default_full_instruction();
140 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
141 new_inst
.Instruction
.Saturate
= saturate
;
142 new_inst
.Instruction
.NumDstRegs
= 1;
143 reg_dst(&new_inst
.Dst
[0], dst
, mask
);
144 new_inst
.Instruction
.NumSrcRegs
= 1;
145 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,Y
,Z
,W
));
146 tctx
->emit_instruction(tctx
, &new_inst
);
149 /* to help calculate # of tgsi tokens for a lowering.. we assume
150 * the worst case, ie. removed instructions don't have ADDR[] or
151 * anything which increases the # of tokens per src/dst and the
152 * inserted instructions do.
154 * OINST() - old instruction
155 * 1 : instruction itself
159 * NINST() - new instruction
160 * 1 : instruction itself
165 #define OINST(nargs) (1 + 1 + 1 * (nargs))
166 #define NINST(nargs) (1 + 2 + 2 * (nargs))
169 * Lowering Translators:
172 /* DST - Distance Vector
174 * dst.y = src0.y \times src1.y
178 * ; note: could be more clever and use just a single temp
179 * ; if I was clever enough to re-write the swizzles.
180 * ; needs: 2 tmp, imm{1.0}
181 * if (dst.y aliases src0.z) {
182 * MOV tmpA.yz, src0.yz
185 * if (dst.yz aliases src1.w) {
186 * MOV tmpB.yw, src1.yw
189 * MUL dst.y, src0.y, src1.y
192 * MOV dst.x, imm{1.0}
194 #define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
195 NINST(1) + NINST(1) - OINST(2))
198 transform_dst(struct tgsi_transform_context
*tctx
,
199 struct tgsi_full_instruction
*inst
)
201 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
202 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
203 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
204 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
205 struct tgsi_full_instruction new_inst
;
207 if (aliases(dst
, TGSI_WRITEMASK_Y
, src0
, TGSI_WRITEMASK_Z
)) {
208 create_mov(tctx
, &ctx
->tmp
[A
].dst
, src0
, TGSI_WRITEMASK_YZ
, 0);
209 src0
= &ctx
->tmp
[A
].src
;
212 if (aliases(dst
, TGSI_WRITEMASK_YZ
, src1
, TGSI_WRITEMASK_W
)) {
213 create_mov(tctx
, &ctx
->tmp
[B
].dst
, src1
, TGSI_WRITEMASK_YW
, 0);
214 src1
= &ctx
->tmp
[B
].src
;
217 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
218 /* MUL dst.y, src0.y, src1.y */
219 new_inst
= tgsi_default_full_instruction();
220 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
221 new_inst
.Instruction
.NumDstRegs
= 1;
222 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Y
);
223 new_inst
.Instruction
.NumSrcRegs
= 2;
224 reg_src(&new_inst
.Src
[0], src0
, SWIZ(_
,Y
,_
,_
));
225 reg_src(&new_inst
.Src
[1], src1
, SWIZ(_
,Y
,_
,_
));
226 tctx
->emit_instruction(tctx
, &new_inst
);
229 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
230 /* MOV dst.z, src0.z */
231 new_inst
= tgsi_default_full_instruction();
232 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
233 new_inst
.Instruction
.NumDstRegs
= 1;
234 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Z
);
235 new_inst
.Instruction
.NumSrcRegs
= 1;
236 reg_src(&new_inst
.Src
[0], src0
, SWIZ(_
,_
,Z
,_
));
237 tctx
->emit_instruction(tctx
, &new_inst
);
240 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_W
) {
241 /* MOV dst.w, src1.w */
242 new_inst
= tgsi_default_full_instruction();
243 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
244 new_inst
.Instruction
.NumDstRegs
= 1;
245 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_W
);
246 new_inst
.Instruction
.NumSrcRegs
= 1;
247 reg_src(&new_inst
.Src
[0], src1
, SWIZ(_
,_
,_
,W
));
248 tctx
->emit_instruction(tctx
, &new_inst
);
251 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_X
) {
252 /* MOV dst.x, imm{1.0} */
253 new_inst
= tgsi_default_full_instruction();
254 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
255 new_inst
.Instruction
.NumDstRegs
= 1;
256 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_X
);
257 new_inst
.Instruction
.NumSrcRegs
= 1;
258 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(Y
,_
,_
,_
));
259 tctx
->emit_instruction(tctx
, &new_inst
);
263 /* XPD - Cross Product
264 * dst.x = src0.y \times src1.z - src1.y \times src0.z
265 * dst.y = src0.z \times src1.x - src1.z \times src0.x
266 * dst.z = src0.x \times src1.y - src1.x \times src0.y
269 * ; needs: 2 tmp, imm{1.0}
270 * MUL tmpA.xyz, src0.yzx, src1.zxy
271 * MUL tmpB.xyz, src1.yzx, src0.zxy
272 * SUB dst.xyz, tmpA.xyz, tmpB.xyz
273 * MOV dst.w, imm{1.0}
275 #define XPD_GROW (NINST(2) + NINST(2) + NINST(2) + NINST(1) - OINST(2))
278 transform_xpd(struct tgsi_transform_context
*tctx
,
279 struct tgsi_full_instruction
*inst
)
281 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
282 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
283 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
284 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
285 struct tgsi_full_instruction new_inst
;
287 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZ
) {
288 /* MUL tmpA.xyz, src0.yzx, src1.zxy */
289 new_inst
= tgsi_default_full_instruction();
290 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
291 new_inst
.Instruction
.NumDstRegs
= 1;
292 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XYZ
);
293 new_inst
.Instruction
.NumSrcRegs
= 2;
294 reg_src(&new_inst
.Src
[0], src0
, SWIZ(Y
,Z
,X
,_
));
295 reg_src(&new_inst
.Src
[1], src1
, SWIZ(Z
,X
,Y
,_
));
296 tctx
->emit_instruction(tctx
, &new_inst
);
298 /* MUL tmpB.xyz, src1.yzx, src0.zxy */
299 new_inst
= tgsi_default_full_instruction();
300 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
301 new_inst
.Instruction
.NumDstRegs
= 1;
302 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[B
].dst
, TGSI_WRITEMASK_XYZ
);
303 new_inst
.Instruction
.NumSrcRegs
= 2;
304 reg_src(&new_inst
.Src
[0], src1
, SWIZ(Y
,Z
,X
,_
));
305 reg_src(&new_inst
.Src
[1], src0
, SWIZ(Z
,X
,Y
,_
));
306 tctx
->emit_instruction(tctx
, &new_inst
);
308 /* SUB dst.xyz, tmpA.xyz, tmpB.xyz */
309 new_inst
= tgsi_default_full_instruction();
310 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_SUB
;
311 new_inst
.Instruction
.NumDstRegs
= 1;
312 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZ
);
313 new_inst
.Instruction
.NumSrcRegs
= 2;
314 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
,Y
,Z
,_
));
315 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[B
].src
, SWIZ(X
,Y
,Z
,_
));
316 tctx
->emit_instruction(tctx
, &new_inst
);
319 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_W
) {
320 /* MOV dst.w, imm{1.0} */
321 new_inst
= tgsi_default_full_instruction();
322 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
323 new_inst
.Instruction
.NumDstRegs
= 1;
324 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_W
);
325 new_inst
.Instruction
.NumSrcRegs
= 1;
326 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(_
,_
,_
,Y
));
327 tctx
->emit_instruction(tctx
, &new_inst
);
332 * dst.x = \cos{src.x}
333 * dst.y = \sin{src.x}
337 * ; needs: 1 tmp, imm{0.0, 1.0}
338 * if (dst.x aliases src.x) {
344 * MOV dst.zw, imm{0.0, 1.0}
346 #define SCS_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) - OINST(1))
349 transform_scs(struct tgsi_transform_context
*tctx
,
350 struct tgsi_full_instruction
*inst
)
352 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
353 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
354 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
355 struct tgsi_full_instruction new_inst
;
357 if (aliases(dst
, TGSI_WRITEMASK_X
, src
, TGSI_WRITEMASK_X
)) {
358 create_mov(tctx
, &ctx
->tmp
[A
].dst
, src
, TGSI_WRITEMASK_X
, 0);
359 src
= &ctx
->tmp
[A
].src
;
362 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_X
) {
363 /* COS dst.x, src.x */
364 new_inst
= tgsi_default_full_instruction();
365 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_COS
;
366 new_inst
.Instruction
.NumDstRegs
= 1;
367 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_X
);
368 new_inst
.Instruction
.NumSrcRegs
= 1;
369 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,_
,_
,_
));
370 tctx
->emit_instruction(tctx
, &new_inst
);
373 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
374 /* SIN dst.y, src.x */
375 new_inst
= tgsi_default_full_instruction();
376 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_SIN
;
377 new_inst
.Instruction
.NumDstRegs
= 1;
378 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Y
);
379 new_inst
.Instruction
.NumSrcRegs
= 1;
380 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,_
,_
,_
));
381 tctx
->emit_instruction(tctx
, &new_inst
);
384 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_ZW
) {
385 /* MOV dst.zw, imm{0.0, 1.0} */
386 new_inst
= tgsi_default_full_instruction();
387 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
388 new_inst
.Instruction
.NumDstRegs
= 1;
389 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_ZW
);
390 new_inst
.Instruction
.NumSrcRegs
= 1;
391 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(_
,_
,X
,Y
));
392 tctx
->emit_instruction(tctx
, &new_inst
);
396 /* LRP - Linear Interpolate
397 * dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
398 * dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
399 * dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
400 * dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
402 * ; needs: 2 tmp, imm{1.0}
403 * MUL tmpA, src0, src1
404 * SUB tmpB, imm{1.0}, src0
405 * MUL tmpB, tmpB, src2
406 * ADD dst, tmpA, tmpB
408 #define LRP_GROW (NINST(2) + NINST(2) + NINST(2) + NINST(2) - OINST(3))
411 transform_lrp(struct tgsi_transform_context
*tctx
,
412 struct tgsi_full_instruction
*inst
)
414 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
415 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
416 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
417 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
418 struct tgsi_full_src_register
*src2
= &inst
->Src
[2];
419 struct tgsi_full_instruction new_inst
;
421 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
422 /* MUL tmpA, src0, src1 */
423 new_inst
= tgsi_default_full_instruction();
424 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
425 new_inst
.Instruction
.NumDstRegs
= 1;
426 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XYZW
);
427 new_inst
.Instruction
.NumSrcRegs
= 2;
428 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
,Y
,Z
,W
));
429 reg_src(&new_inst
.Src
[1], src1
, SWIZ(X
,Y
,Z
,W
));
430 tctx
->emit_instruction(tctx
, &new_inst
);
432 /* SUB tmpB, imm{1.0}, src0 */
433 new_inst
= tgsi_default_full_instruction();
434 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_SUB
;
435 new_inst
.Instruction
.NumDstRegs
= 1;
436 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[B
].dst
, TGSI_WRITEMASK_XYZW
);
437 new_inst
.Instruction
.NumSrcRegs
= 2;
438 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(Y
,Y
,Y
,Y
));
439 reg_src(&new_inst
.Src
[1], src0
, SWIZ(X
,Y
,Z
,W
));
440 tctx
->emit_instruction(tctx
, &new_inst
);
442 /* MUL tmpB, tmpB, src2 */
443 new_inst
= tgsi_default_full_instruction();
444 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
445 new_inst
.Instruction
.NumDstRegs
= 1;
446 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[B
].dst
, TGSI_WRITEMASK_XYZW
);
447 new_inst
.Instruction
.NumSrcRegs
= 2;
448 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[B
].src
, SWIZ(X
,Y
,Z
,W
));
449 reg_src(&new_inst
.Src
[1], src2
, SWIZ(X
,Y
,Z
,W
));
450 tctx
->emit_instruction(tctx
, &new_inst
);
452 /* ADD dst, tmpA, tmpB */
453 new_inst
= tgsi_default_full_instruction();
454 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
455 new_inst
.Instruction
.NumDstRegs
= 1;
456 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
457 new_inst
.Instruction
.NumSrcRegs
= 2;
458 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
,Y
,Z
,W
));
459 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[B
].src
, SWIZ(X
,Y
,Z
,W
));
460 tctx
->emit_instruction(tctx
, &new_inst
);
465 * dst.x = src.x - \lfloor src.x\rfloor
466 * dst.y = src.y - \lfloor src.y\rfloor
467 * dst.z = src.z - \lfloor src.z\rfloor
468 * dst.w = src.w - \lfloor src.w\rfloor
474 #define FRC_GROW (NINST(1) + NINST(2) - OINST(1))
477 transform_frc(struct tgsi_transform_context
*tctx
,
478 struct tgsi_full_instruction
*inst
)
480 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
481 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
482 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
483 struct tgsi_full_instruction new_inst
;
485 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
487 new_inst
= tgsi_default_full_instruction();
488 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FLR
;
489 new_inst
.Instruction
.NumDstRegs
= 1;
490 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XYZW
);
491 new_inst
.Instruction
.NumSrcRegs
= 1;
492 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,Y
,Z
,W
));
493 tctx
->emit_instruction(tctx
, &new_inst
);
495 /* SUB dst, src, tmpA */
496 new_inst
= tgsi_default_full_instruction();
497 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_SUB
;
498 new_inst
.Instruction
.NumDstRegs
= 1;
499 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
500 new_inst
.Instruction
.NumSrcRegs
= 2;
501 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,Y
,Z
,W
));
502 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
,Y
,Z
,W
));
503 tctx
->emit_instruction(tctx
, &new_inst
);
508 * dst.x = src0.x^{src1.x}
509 * dst.y = src0.x^{src1.x}
510 * dst.z = src0.x^{src1.x}
511 * dst.w = src0.x^{src1.x}
515 * MUL tmpA.x, src1.x, tmpA.x
518 #define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2))
521 transform_pow(struct tgsi_transform_context
*tctx
,
522 struct tgsi_full_instruction
*inst
)
524 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
525 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
526 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
527 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
528 struct tgsi_full_instruction new_inst
;
530 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
531 /* LG2 tmpA.x, src0.x */
532 new_inst
= tgsi_default_full_instruction();
533 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_LG2
;
534 new_inst
.Instruction
.NumDstRegs
= 1;
535 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
536 new_inst
.Instruction
.NumSrcRegs
= 1;
537 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
,_
,_
,_
));
538 tctx
->emit_instruction(tctx
, &new_inst
);
540 /* MUL tmpA.x, src1.x, tmpA.x */
541 new_inst
= tgsi_default_full_instruction();
542 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
543 new_inst
.Instruction
.NumDstRegs
= 1;
544 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
545 new_inst
.Instruction
.NumSrcRegs
= 2;
546 reg_src(&new_inst
.Src
[0], src1
, SWIZ(X
,_
,_
,_
));
547 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
,_
,_
,_
));
548 tctx
->emit_instruction(tctx
, &new_inst
);
550 /* EX2 dst, tmpA.x */
551 new_inst
= tgsi_default_full_instruction();
552 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
553 new_inst
.Instruction
.NumDstRegs
= 1;
554 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
555 new_inst
.Instruction
.NumSrcRegs
= 1;
556 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
,_
,_
,_
));
557 tctx
->emit_instruction(tctx
, &new_inst
);
561 /* LIT - Light Coefficients
563 * dst.y = max(src.x, 0.0)
564 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
567 * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
568 * MAX tmpA.xy, src.xy, imm{0.0}
569 * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
571 * MUL tmpA.y, tmpA.z, tmpA.y
573 * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
574 * MOV dst.yz, tmpA.xy
575 * MOV dst.xw, imm{1.0}
577 #define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \
578 NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1))
581 transform_lit(struct tgsi_transform_context
*tctx
,
582 struct tgsi_full_instruction
*inst
)
584 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
585 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
586 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
587 struct tgsi_full_instruction new_inst
;
589 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_YZ
) {
590 /* MAX tmpA.xy, src.xy, imm{0.0} */
591 new_inst
= tgsi_default_full_instruction();
592 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAX
;
593 new_inst
.Instruction
.NumDstRegs
= 1;
594 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XY
);
595 new_inst
.Instruction
.NumSrcRegs
= 2;
596 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,Y
,_
,_
));
597 reg_src(&new_inst
.Src
[1], &ctx
->imm
, SWIZ(X
,X
,_
,_
));
598 tctx
->emit_instruction(tctx
, &new_inst
);
600 /* CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} */
601 new_inst
= tgsi_default_full_instruction();
602 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_CLAMP
;
603 new_inst
.Instruction
.NumDstRegs
= 1;
604 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Z
);
605 new_inst
.Instruction
.NumSrcRegs
= 3;
606 reg_src(&new_inst
.Src
[0], src
, SWIZ(_
,_
,W
,_
));
607 reg_src(&new_inst
.Src
[1], &ctx
->imm
, SWIZ(_
,_
,Z
,_
));
608 new_inst
.Src
[1].Register
.Negate
= true;
609 reg_src(&new_inst
.Src
[2], &ctx
->imm
, SWIZ(_
,_
,Z
,_
));
610 tctx
->emit_instruction(tctx
, &new_inst
);
612 /* LG2 tmpA.y, tmpA.y */
613 new_inst
= tgsi_default_full_instruction();
614 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_LG2
;
615 new_inst
.Instruction
.NumDstRegs
= 1;
616 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
617 new_inst
.Instruction
.NumSrcRegs
= 1;
618 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Y
,_
,_
,_
));
619 tctx
->emit_instruction(tctx
, &new_inst
);
621 /* MUL tmpA.y, tmpA.z, tmpA.y */
622 new_inst
= tgsi_default_full_instruction();
623 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
624 new_inst
.Instruction
.NumDstRegs
= 1;
625 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
626 new_inst
.Instruction
.NumSrcRegs
= 2;
627 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
,Z
,_
,_
));
628 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(_
,Y
,_
,_
));
629 tctx
->emit_instruction(tctx
, &new_inst
);
631 /* EX2 tmpA.y, tmpA.y */
632 new_inst
= tgsi_default_full_instruction();
633 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
634 new_inst
.Instruction
.NumDstRegs
= 1;
635 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
636 new_inst
.Instruction
.NumSrcRegs
= 1;
637 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Y
,_
,_
,_
));
638 tctx
->emit_instruction(tctx
, &new_inst
);
640 /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
641 new_inst
= tgsi_default_full_instruction();
642 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_CMP
;
643 new_inst
.Instruction
.NumDstRegs
= 1;
644 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
645 new_inst
.Instruction
.NumSrcRegs
= 3;
646 reg_src(&new_inst
.Src
[0], src
, SWIZ(_
,X
,_
,_
));
647 new_inst
.Src
[0].Register
.Negate
= true;
648 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(_
,Y
,_
,_
));
649 reg_src(&new_inst
.Src
[2], &ctx
->imm
, SWIZ(_
,X
,_
,_
));
650 tctx
->emit_instruction(tctx
, &new_inst
);
652 /* MOV dst.yz, tmpA.xy */
653 new_inst
= tgsi_default_full_instruction();
654 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
655 new_inst
.Instruction
.NumDstRegs
= 1;
656 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_YZ
);
657 new_inst
.Instruction
.NumSrcRegs
= 1;
658 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
,X
,Y
,_
));
659 tctx
->emit_instruction(tctx
, &new_inst
);
662 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XW
) {
663 /* MOV dst.xw, imm{1.0} */
664 new_inst
= tgsi_default_full_instruction();
665 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
666 new_inst
.Instruction
.NumDstRegs
= 1;
667 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XW
);
668 new_inst
.Instruction
.NumSrcRegs
= 1;
669 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(Y
,_
,_
,Y
));
670 tctx
->emit_instruction(tctx
, &new_inst
);
674 /* EXP - Approximate Exponential Base 2
675 * dst.x = 2^{\lfloor src.x\rfloor}
676 * dst.y = src.x - \lfloor src.x\rfloor
680 * ; needs: 1 tmp, imm{1.0}
683 * SUB dst.y, src.x, tmpA.x
686 * MOV dst.w, imm{1.0}
688 #define EXP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
689 NINST(1)+ NINST(1) - OINST(1))
692 transform_exp(struct tgsi_transform_context
*tctx
,
693 struct tgsi_full_instruction
*inst
)
695 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
696 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
697 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
698 struct tgsi_full_instruction new_inst
;
700 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XY
) {
701 /* FLR tmpA.x, src.x */
702 new_inst
= tgsi_default_full_instruction();
703 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FLR
;
704 new_inst
.Instruction
.NumDstRegs
= 1;
705 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
706 new_inst
.Instruction
.NumSrcRegs
= 1;
707 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,_
,_
,_
));
708 tctx
->emit_instruction(tctx
, &new_inst
);
711 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
712 /* EX2 tmpA.y, src.x */
713 new_inst
= tgsi_default_full_instruction();
714 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
715 new_inst
.Instruction
.NumDstRegs
= 1;
716 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
717 new_inst
.Instruction
.NumSrcRegs
= 1;
718 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,_
,_
,_
));
719 tctx
->emit_instruction(tctx
, &new_inst
);
722 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
723 /* SUB dst.y, src.x, tmpA.x */
724 new_inst
= tgsi_default_full_instruction();
725 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_SUB
;
726 new_inst
.Instruction
.NumDstRegs
= 1;
727 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Y
);
728 new_inst
.Instruction
.NumSrcRegs
= 2;
729 reg_src(&new_inst
.Src
[0], src
, SWIZ(_
,X
,_
,_
));
730 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(_
,X
,_
,_
));
731 tctx
->emit_instruction(tctx
, &new_inst
);
734 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_X
) {
735 /* EX2 dst.x, tmpA.x */
736 new_inst
= tgsi_default_full_instruction();
737 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
738 new_inst
.Instruction
.NumDstRegs
= 1;
739 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_X
);
740 new_inst
.Instruction
.NumSrcRegs
= 1;
741 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
,_
,_
,_
));
742 tctx
->emit_instruction(tctx
, &new_inst
);
745 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
746 /* MOV dst.z, tmpA.y */
747 new_inst
= tgsi_default_full_instruction();
748 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
749 new_inst
.Instruction
.NumDstRegs
= 1;
750 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Z
);
751 new_inst
.Instruction
.NumSrcRegs
= 1;
752 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
,_
,Y
,_
));
753 tctx
->emit_instruction(tctx
, &new_inst
);
756 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_W
) {
757 /* MOV dst.w, imm{1.0} */
758 new_inst
= tgsi_default_full_instruction();
759 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
760 new_inst
.Instruction
.NumDstRegs
= 1;
761 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_W
);
762 new_inst
.Instruction
.NumSrcRegs
= 1;
763 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(_
,_
,_
,Y
));
764 tctx
->emit_instruction(tctx
, &new_inst
);
768 /* LOG - Approximate Logarithm Base 2
769 * dst.x = \lfloor\log_2{|src.x|}\rfloor
770 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
771 * dst.z = \log_2{|src.x|}
774 * ; needs: 1 tmp, imm{1.0}
775 * LG2 tmpA.x, |src.x|
779 * MUL dst.y, |src.x|, tmpA.z
780 * MOV dst.xz, tmpA.yx
781 * MOV dst.w, imm{1.0}
783 #define LOG_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) + \
784 NINST(2) + NINST(1) + NINST(1) - OINST(1))
787 transform_log(struct tgsi_transform_context
*tctx
,
788 struct tgsi_full_instruction
*inst
)
790 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
791 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
792 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
793 struct tgsi_full_instruction new_inst
;
795 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZ
) {
796 /* LG2 tmpA.x, |src.x| */
797 new_inst
= tgsi_default_full_instruction();
798 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_LG2
;
799 new_inst
.Instruction
.NumDstRegs
= 1;
800 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
801 new_inst
.Instruction
.NumSrcRegs
= 1;
802 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,_
,_
,_
));
803 new_inst
.Src
[0].Register
.Absolute
= true;
804 tctx
->emit_instruction(tctx
, &new_inst
);
807 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XY
) {
808 /* FLR tmpA.y, tmpA.x */
809 new_inst
= tgsi_default_full_instruction();
810 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FLR
;
811 new_inst
.Instruction
.NumDstRegs
= 1;
812 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
813 new_inst
.Instruction
.NumSrcRegs
= 1;
814 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
,X
,_
,_
));
815 tctx
->emit_instruction(tctx
, &new_inst
);
818 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
819 /* EX2 tmpA.z, tmpA.y */
820 new_inst
= tgsi_default_full_instruction();
821 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
822 new_inst
.Instruction
.NumDstRegs
= 1;
823 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Z
);
824 new_inst
.Instruction
.NumSrcRegs
= 1;
825 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Y
,_
,_
,_
));
826 tctx
->emit_instruction(tctx
, &new_inst
);
828 /* RCP tmpA.z, tmpA.z */
829 new_inst
= tgsi_default_full_instruction();
830 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_RCP
;
831 new_inst
.Instruction
.NumDstRegs
= 1;
832 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Z
);
833 new_inst
.Instruction
.NumSrcRegs
= 1;
834 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Z
,_
,_
,_
));
835 tctx
->emit_instruction(tctx
, &new_inst
);
837 /* MUL dst.y, |src.x|, tmpA.z */
838 new_inst
= tgsi_default_full_instruction();
839 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
840 new_inst
.Instruction
.NumDstRegs
= 1;
841 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Y
);
842 new_inst
.Instruction
.NumSrcRegs
= 2;
843 reg_src(&new_inst
.Src
[0], src
, SWIZ(_
,X
,_
,_
));
844 new_inst
.Src
[0].Register
.Absolute
= true;
845 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(_
,Z
,_
,_
));
846 tctx
->emit_instruction(tctx
, &new_inst
);
849 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XZ
) {
850 /* MOV dst.xz, tmpA.yx */
851 new_inst
= tgsi_default_full_instruction();
852 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
853 new_inst
.Instruction
.NumDstRegs
= 1;
854 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XZ
);
855 new_inst
.Instruction
.NumSrcRegs
= 1;
856 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Y
,_
,X
,_
));
857 tctx
->emit_instruction(tctx
, &new_inst
);
860 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_W
) {
861 /* MOV dst.w, imm{1.0} */
862 new_inst
= tgsi_default_full_instruction();
863 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
864 new_inst
.Instruction
.NumDstRegs
= 1;
865 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_W
);
866 new_inst
.Instruction
.NumSrcRegs
= 1;
867 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(_
,_
,_
,Y
));
868 tctx
->emit_instruction(tctx
, &new_inst
);
872 /* DP4 - 4-component Dot Product
873 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
875 * DP3 - 3-component Dot Product
876 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
878 * DPH - Homogeneous Dot Product
879 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
881 * DP2 - 2-component Dot Product
882 * dst = src0.x \times src1.x + src0.y \times src1.y
884 * DP2A - 2-component Dot Product And Add
885 * dst = src0.x \times src1.x + src0.y \times src1.y + src2.x
887 * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
888 * operations, which is what you'd prefer for a ISA that is natively
889 * scalar. Probably a native vector ISA would at least already have
890 * DP4/DP3 instructions, but perhaps there is room for an alternative
891 * translation for DPH/DP2/DP2A using vector instructions.
894 * MUL tmpA.x, src0.x, src1.x
895 * MAD tmpA.x, src0.y, src1.y, tmpA.x
896 * if (DPH || DP3 || DP4) {
897 * MAD tmpA.x, src0.z, src1.z, tmpA.x
899 * ADD tmpA.x, src1.w, tmpA.x
901 * MAD tmpA.x, src0.w, src1.w, tmpA.x
904 * ADD tmpA.x, src2.x, tmpA.x
906 * ; fixup last instruction to replicate into dst
908 #define DP4_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
909 #define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2))
910 #define DPH_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(2) - OINST(2))
911 #define DP2_GROW (NINST(2) + NINST(3) - OINST(2))
912 #define DP2A_GROW (NINST(2) + NINST(3) + NINST(2) - OINST(3))
915 transform_dotp(struct tgsi_transform_context
*tctx
,
916 struct tgsi_full_instruction
*inst
)
918 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
919 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
920 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
921 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
922 struct tgsi_full_src_register
*src2
= &inst
->Src
[2]; /* only DP2A */
923 struct tgsi_full_instruction new_inst
;
924 unsigned opcode
= inst
->Instruction
.Opcode
;
926 /* NOTE: any potential last instruction must replicate src on all
927 * components (since it could be re-written to write to final dst)
930 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
931 /* MUL tmpA.x, src0.x, src1.x */
932 new_inst
= tgsi_default_full_instruction();
933 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
934 new_inst
.Instruction
.NumDstRegs
= 1;
935 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
936 new_inst
.Instruction
.NumSrcRegs
= 2;
937 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
,_
,_
,_
));
938 reg_src(&new_inst
.Src
[1], src1
, SWIZ(X
,_
,_
,_
));
939 tctx
->emit_instruction(tctx
, &new_inst
);
941 /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
942 new_inst
= tgsi_default_full_instruction();
943 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAD
;
944 new_inst
.Instruction
.NumDstRegs
= 1;
945 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
946 new_inst
.Instruction
.NumSrcRegs
= 3;
947 reg_src(&new_inst
.Src
[0], src0
, SWIZ(Y
,Y
,Y
,Y
));
948 reg_src(&new_inst
.Src
[1], src1
, SWIZ(Y
,Y
,Y
,Y
));
949 reg_src(&new_inst
.Src
[2], &ctx
->tmp
[A
].src
, SWIZ(X
,X
,X
,X
));
951 if ((opcode
== TGSI_OPCODE_DPH
) ||
952 (opcode
== TGSI_OPCODE_DP3
) ||
953 (opcode
== TGSI_OPCODE_DP4
)) {
954 tctx
->emit_instruction(tctx
, &new_inst
);
956 /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
957 new_inst
= tgsi_default_full_instruction();
958 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAD
;
959 new_inst
.Instruction
.NumDstRegs
= 1;
960 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
961 new_inst
.Instruction
.NumSrcRegs
= 3;
962 reg_src(&new_inst
.Src
[0], src0
, SWIZ(Z
,Z
,Z
,Z
));
963 reg_src(&new_inst
.Src
[1], src1
, SWIZ(Z
,Z
,Z
,Z
));
964 reg_src(&new_inst
.Src
[2], &ctx
->tmp
[A
].src
, SWIZ(X
,X
,X
,X
));
966 if (opcode
== TGSI_OPCODE_DPH
) {
967 tctx
->emit_instruction(tctx
, &new_inst
);
969 /* ADD tmpA.x, src1.w, tmpA.x */
970 new_inst
= tgsi_default_full_instruction();
971 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
972 new_inst
.Instruction
.NumDstRegs
= 1;
973 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
974 new_inst
.Instruction
.NumSrcRegs
= 2;
975 reg_src(&new_inst
.Src
[0], src1
, SWIZ(W
,W
,W
,W
));
976 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
,X
,X
,X
));
977 } else if (opcode
== TGSI_OPCODE_DP4
) {
978 tctx
->emit_instruction(tctx
, &new_inst
);
980 /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
981 new_inst
= tgsi_default_full_instruction();
982 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAD
;
983 new_inst
.Instruction
.NumDstRegs
= 1;
984 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
985 new_inst
.Instruction
.NumSrcRegs
= 3;
986 reg_src(&new_inst
.Src
[0], src0
, SWIZ(W
,W
,W
,W
));
987 reg_src(&new_inst
.Src
[1], src1
, SWIZ(W
,W
,W
,W
));
988 reg_src(&new_inst
.Src
[2], &ctx
->tmp
[A
].src
, SWIZ(X
,X
,X
,X
));
990 } else if (opcode
== TGSI_OPCODE_DP2A
) {
991 tctx
->emit_instruction(tctx
, &new_inst
);
993 /* ADD tmpA.x, src2.x, tmpA.x */
994 new_inst
= tgsi_default_full_instruction();
995 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
996 new_inst
.Instruction
.NumDstRegs
= 1;
997 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
998 new_inst
.Instruction
.NumSrcRegs
= 2;
999 reg_src(&new_inst
.Src
[0], src2
, SWIZ(X
,X
,X
,X
));
1000 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
,X
,X
,X
));
1003 /* fixup last instruction to write to dst: */
1004 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
1006 tctx
->emit_instruction(tctx
, &new_inst
);
1010 /* Inserts a MOV_SAT for the needed components of tex coord. Note that
1011 * in the case of TXP, the clamping must happen *after* projection, so
1012 * we need to lower TXP to TEX.
1016 * ; do perspective division manually before clamping:
1018 * MUL tmpB.<pmask>, tmpA, tmpB.xxxx
1021 * MOV_SAT tmpA.<mask>, tmpA ; <mask> is the clamped s/t/r coords
1022 * <opc> dst, tmpA, ...
1024 #define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1))
1027 transform_samp(struct tgsi_transform_context
*tctx
,
1028 struct tgsi_full_instruction
*inst
)
1030 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
1031 struct tgsi_full_src_register
*coord
= &inst
->Src
[0];
1032 struct tgsi_full_src_register
*samp
;
1033 struct tgsi_full_instruction new_inst
;
1034 /* mask is clamped coords, pmask is all coords (for projection): */
1035 unsigned mask
= 0, pmask
= 0, smask
;
1036 unsigned opcode
= inst
->Instruction
.Opcode
;
1038 if (opcode
== TGSI_OPCODE_TXB2
) {
1039 samp
= &inst
->Src
[2];
1041 samp
= &inst
->Src
[1];
1044 /* convert sampler # to bitmask to test: */
1045 smask
= 1 << samp
->Register
.Index
;
1047 /* check if we actually need to lower this one: */
1048 if (!(ctx
->saturate
& smask
))
1051 /* figure out which coordinates need saturating:
1052 * - RECT textures should not get saturated
1053 * - array index coords should not get saturated
1055 switch (inst
->Texture
.Texture
) {
1056 case TGSI_TEXTURE_3D
:
1057 case TGSI_TEXTURE_CUBE
:
1058 case TGSI_TEXTURE_CUBE_ARRAY
:
1059 case TGSI_TEXTURE_SHADOWCUBE
:
1060 case TGSI_TEXTURE_SHADOWCUBE_ARRAY
:
1061 if (ctx
->config
->saturate_r
& smask
)
1062 mask
|= TGSI_WRITEMASK_Z
;
1063 pmask
|= TGSI_WRITEMASK_Z
;
1066 case TGSI_TEXTURE_2D
:
1067 case TGSI_TEXTURE_2D_ARRAY
:
1068 case TGSI_TEXTURE_SHADOW2D
:
1069 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
1070 case TGSI_TEXTURE_2D_MSAA
:
1071 case TGSI_TEXTURE_2D_ARRAY_MSAA
:
1072 if (ctx
->config
->saturate_t
& smask
)
1073 mask
|= TGSI_WRITEMASK_Y
;
1074 pmask
|= TGSI_WRITEMASK_Y
;
1077 case TGSI_TEXTURE_1D
:
1078 case TGSI_TEXTURE_1D_ARRAY
:
1079 case TGSI_TEXTURE_SHADOW1D
:
1080 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
1081 if (ctx
->config
->saturate_s
& smask
)
1082 mask
|= TGSI_WRITEMASK_X
;
1083 pmask
|= TGSI_WRITEMASK_X
;
1086 /* TODO: I think we should ignore these?
1087 case TGSI_TEXTURE_RECT:
1088 case TGSI_TEXTURE_SHADOWRECT:
1092 /* sanity check.. driver could be asking to saturate a non-
1093 * existent coordinate component:
1098 /* MOV tmpA, src0 */
1099 create_mov(tctx
, &ctx
->tmp
[A
].dst
, coord
, TGSI_WRITEMASK_XYZW
, 0);
1101 /* This is a bit sad.. we need to clamp *after* the coords
1102 * are projected, which means lowering TXP to TEX and doing
1103 * the projection ourself. But since I haven't figured out
1104 * how to make the lowering code deliver an electric shock
1105 * to anyone using GL_CLAMP, we must do this instead:
1107 if (opcode
== TGSI_OPCODE_TXP
) {
1108 /* RCP tmpB.x tmpA.w */
1109 new_inst
= tgsi_default_full_instruction();
1110 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_RCP
;
1111 new_inst
.Instruction
.NumDstRegs
= 1;
1112 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[B
].dst
, TGSI_WRITEMASK_X
);
1113 new_inst
.Instruction
.NumSrcRegs
= 1;
1114 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(W
,_
,_
,_
));
1115 tctx
->emit_instruction(tctx
, &new_inst
);
1117 /* MUL tmpA.mask, tmpA, tmpB.xxxx */
1118 new_inst
= tgsi_default_full_instruction();
1119 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
1120 new_inst
.Instruction
.NumDstRegs
= 1;
1121 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, pmask
);
1122 new_inst
.Instruction
.NumSrcRegs
= 2;
1123 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
,Y
,Z
,W
));
1124 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[B
].src
, SWIZ(X
,X
,X
,X
));
1125 tctx
->emit_instruction(tctx
, &new_inst
);
1127 opcode
= TGSI_OPCODE_TEX
;
1130 /* MOV_SAT tmpA.<mask>, tmpA */
1131 create_mov(tctx
, &ctx
->tmp
[A
].dst
, &ctx
->tmp
[A
].src
, mask
,
1134 /* modify the texture samp instruction to take fixed up coord: */
1136 new_inst
.Instruction
.Opcode
= opcode
;
1137 new_inst
.Src
[0] = ctx
->tmp
[A
].src
;
1138 tctx
->emit_instruction(tctx
, &new_inst
);
1143 /* Two-sided color emulation:
1144 * For each COLOR input, create a corresponding BCOLOR input, plus
1145 * CMP instruction to select front or back color based on FACE
1147 #define TWOSIDE_GROW(n) ( \
1149 ((n) * 2) + /* IN[] BCOLOR[n] */ \
1150 ((n) * 1) + /* TEMP[] */ \
1151 ((n) * NINST(3)) /* CMP instr */ \
1155 emit_twoside(struct tgsi_transform_context
*tctx
)
1157 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
1158 struct tgsi_shader_info
*info
= ctx
->info
;
1159 struct tgsi_full_declaration decl
;
1160 struct tgsi_full_instruction new_inst
;
1161 unsigned inbase
, tmpbase
;
1164 inbase
= info
->file_max
[TGSI_FILE_INPUT
] + 1;
1165 tmpbase
= info
->file_max
[TGSI_FILE_TEMPORARY
] + 1;
1167 /* additional inputs for BCOLOR's */
1168 for (i
= 0; i
< ctx
->two_side_colors
; i
++) {
1169 decl
= tgsi_default_full_declaration();
1170 decl
.Declaration
.File
= TGSI_FILE_INPUT
;
1171 decl
.Declaration
.Semantic
= true;
1172 decl
.Range
.First
= decl
.Range
.Last
= inbase
+ i
;
1173 decl
.Semantic
.Name
= TGSI_SEMANTIC_BCOLOR
;
1174 decl
.Semantic
.Index
=
1175 info
->input_semantic_index
[ctx
->two_side_idx
[i
]];
1176 tctx
->emit_declaration(tctx
, &decl
);
1179 /* additional input for FACE */
1180 if (ctx
->two_side_colors
&& (ctx
->face_idx
== -1)) {
1181 decl
= tgsi_default_full_declaration();
1182 decl
.Declaration
.File
= TGSI_FILE_INPUT
;
1183 decl
.Declaration
.Semantic
= true;
1184 decl
.Range
.First
= decl
.Range
.Last
= inbase
+ ctx
->two_side_colors
;
1185 decl
.Semantic
.Name
= TGSI_SEMANTIC_FACE
;
1186 decl
.Semantic
.Index
= 0;
1187 tctx
->emit_declaration(tctx
, &decl
);
1189 ctx
->face_idx
= decl
.Range
.First
;
1192 /* additional temps for COLOR/BCOLOR selection: */
1193 for (i
= 0; i
< ctx
->two_side_colors
; i
++) {
1194 decl
= tgsi_default_full_declaration();
1195 decl
.Declaration
.File
= TGSI_FILE_TEMPORARY
;
1196 decl
.Range
.First
= decl
.Range
.Last
= tmpbase
+ ctx
->numtmp
+ i
;
1197 tctx
->emit_declaration(tctx
, &decl
);
1200 /* and finally additional instructions to select COLOR/BCOLOR: */
1201 for (i
= 0; i
< ctx
->two_side_colors
; i
++) {
1202 new_inst
= tgsi_default_full_instruction();
1203 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_CMP
;
1205 new_inst
.Instruction
.NumDstRegs
= 1;
1206 new_inst
.Dst
[0].Register
.File
= TGSI_FILE_TEMPORARY
;
1207 new_inst
.Dst
[0].Register
.Index
= tmpbase
+ ctx
->numtmp
+ i
;
1208 new_inst
.Dst
[0].Register
.WriteMask
= TGSI_WRITEMASK_XYZW
;
1210 new_inst
.Instruction
.NumSrcRegs
= 3;
1211 new_inst
.Src
[0].Register
.File
= TGSI_FILE_INPUT
;
1212 new_inst
.Src
[0].Register
.Index
= ctx
->face_idx
;
1213 new_inst
.Src
[0].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1214 new_inst
.Src
[0].Register
.SwizzleY
= TGSI_SWIZZLE_X
;
1215 new_inst
.Src
[0].Register
.SwizzleZ
= TGSI_SWIZZLE_X
;
1216 new_inst
.Src
[0].Register
.SwizzleW
= TGSI_SWIZZLE_X
;
1217 new_inst
.Src
[1].Register
.File
= TGSI_FILE_INPUT
;
1218 new_inst
.Src
[1].Register
.Index
= inbase
+ i
;
1219 new_inst
.Src
[1].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1220 new_inst
.Src
[1].Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
1221 new_inst
.Src
[1].Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
1222 new_inst
.Src
[1].Register
.SwizzleW
= TGSI_SWIZZLE_W
;
1223 new_inst
.Src
[2].Register
.File
= TGSI_FILE_INPUT
;
1224 new_inst
.Src
[2].Register
.Index
= ctx
->two_side_idx
[i
];
1225 new_inst
.Src
[2].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1226 new_inst
.Src
[2].Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
1227 new_inst
.Src
[2].Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
1228 new_inst
.Src
[2].Register
.SwizzleW
= TGSI_SWIZZLE_W
;
1230 tctx
->emit_instruction(tctx
, &new_inst
);
1235 emit_decls(struct tgsi_transform_context
*tctx
)
1237 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
1238 struct tgsi_shader_info
*info
= ctx
->info
;
1239 struct tgsi_full_declaration decl
;
1240 struct tgsi_full_immediate immed
;
1244 tmpbase
= info
->file_max
[TGSI_FILE_TEMPORARY
] + 1;
1246 ctx
->color_base
= tmpbase
+ ctx
->numtmp
;
1248 /* declare immediate: */
1249 immed
= tgsi_default_full_immediate();
1250 immed
.Immediate
.NrTokens
= 1 + 4; /* one for the token itself */
1251 immed
.u
[0].Float
= 0.0;
1252 immed
.u
[1].Float
= 1.0;
1253 immed
.u
[2].Float
= 128.0;
1254 immed
.u
[3].Float
= 0.0;
1255 tctx
->emit_immediate(tctx
, &immed
);
1257 ctx
->imm
.Register
.File
= TGSI_FILE_IMMEDIATE
;
1258 ctx
->imm
.Register
.Index
= info
->immediate_count
;
1259 ctx
->imm
.Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1260 ctx
->imm
.Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
1261 ctx
->imm
.Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
1262 ctx
->imm
.Register
.SwizzleW
= TGSI_SWIZZLE_W
;
1264 /* declare temp regs: */
1265 for (i
= 0; i
< ctx
->numtmp
; i
++) {
1266 decl
= tgsi_default_full_declaration();
1267 decl
.Declaration
.File
= TGSI_FILE_TEMPORARY
;
1268 decl
.Range
.First
= decl
.Range
.Last
= tmpbase
+ i
;
1269 tctx
->emit_declaration(tctx
, &decl
);
1271 ctx
->tmp
[i
].src
.Register
.File
= TGSI_FILE_TEMPORARY
;
1272 ctx
->tmp
[i
].src
.Register
.Index
= tmpbase
+ i
;
1273 ctx
->tmp
[i
].src
.Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1274 ctx
->tmp
[i
].src
.Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
1275 ctx
->tmp
[i
].src
.Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
1276 ctx
->tmp
[i
].src
.Register
.SwizzleW
= TGSI_SWIZZLE_W
;
1278 ctx
->tmp
[i
].dst
.Register
.File
= TGSI_FILE_TEMPORARY
;
1279 ctx
->tmp
[i
].dst
.Register
.Index
= tmpbase
+ i
;
1280 ctx
->tmp
[i
].dst
.Register
.WriteMask
= TGSI_WRITEMASK_XYZW
;
1283 if (ctx
->two_side_colors
)
1288 rename_color_inputs(struct fd_lowering_context
*ctx
,
1289 struct tgsi_full_instruction
*inst
)
1292 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
1293 struct tgsi_src_register
*src
= &inst
->Src
[i
].Register
;
1294 if (src
->File
== TGSI_FILE_INPUT
) {
1295 for (j
= 0; j
< ctx
->two_side_colors
; j
++) {
1296 if (src
->Index
== ctx
->two_side_idx
[j
]) {
1297 src
->File
= TGSI_FILE_TEMPORARY
;
1298 src
->Index
= ctx
->color_base
+ j
;
1308 transform_instr(struct tgsi_transform_context
*tctx
,
1309 struct tgsi_full_instruction
*inst
)
1311 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
1313 if (!ctx
->emitted_decls
) {
1315 ctx
->emitted_decls
= 1;
1318 /* if emulating two-sided-color, we need to re-write some
1321 if (ctx
->two_side_colors
)
1322 rename_color_inputs(ctx
, inst
);
1324 switch (inst
->Instruction
.Opcode
) {
1325 case TGSI_OPCODE_DST
:
1326 if (!ctx
->config
->lower_DST
)
1328 transform_dst(tctx
, inst
);
1330 case TGSI_OPCODE_XPD
:
1331 if (!ctx
->config
->lower_XPD
)
1333 transform_xpd(tctx
, inst
);
1335 case TGSI_OPCODE_SCS
:
1336 if (!ctx
->config
->lower_SCS
)
1338 transform_scs(tctx
, inst
);
1340 case TGSI_OPCODE_LRP
:
1341 if (!ctx
->config
->lower_LRP
)
1343 transform_lrp(tctx
, inst
);
1345 case TGSI_OPCODE_FRC
:
1346 if (!ctx
->config
->lower_FRC
)
1348 transform_frc(tctx
, inst
);
1350 case TGSI_OPCODE_POW
:
1351 if (!ctx
->config
->lower_POW
)
1353 transform_pow(tctx
, inst
);
1355 case TGSI_OPCODE_LIT
:
1356 if (!ctx
->config
->lower_LIT
)
1358 transform_lit(tctx
, inst
);
1360 case TGSI_OPCODE_EXP
:
1361 if (!ctx
->config
->lower_EXP
)
1363 transform_exp(tctx
, inst
);
1365 case TGSI_OPCODE_LOG
:
1366 if (!ctx
->config
->lower_LOG
)
1368 transform_log(tctx
, inst
);
1370 case TGSI_OPCODE_DP4
:
1371 if (!ctx
->config
->lower_DP4
)
1373 transform_dotp(tctx
, inst
);
1375 case TGSI_OPCODE_DP3
:
1376 if (!ctx
->config
->lower_DP3
)
1378 transform_dotp(tctx
, inst
);
1380 case TGSI_OPCODE_DPH
:
1381 if (!ctx
->config
->lower_DPH
)
1383 transform_dotp(tctx
, inst
);
1385 case TGSI_OPCODE_DP2
:
1386 if (!ctx
->config
->lower_DP2
)
1388 transform_dotp(tctx
, inst
);
1390 case TGSI_OPCODE_DP2A
:
1391 if (!ctx
->config
->lower_DP2A
)
1393 transform_dotp(tctx
, inst
);
1395 case TGSI_OPCODE_TEX
:
1396 case TGSI_OPCODE_TXP
:
1397 case TGSI_OPCODE_TXB
:
1398 case TGSI_OPCODE_TXB2
:
1399 case TGSI_OPCODE_TXL
:
1400 if (transform_samp(tctx
, inst
))
1405 tctx
->emit_instruction(tctx
, inst
);
1410 /* returns NULL if no lowering required, else returns the new
1411 * tokens (which caller is required to free()). In either case
1412 * returns the current info.
1414 const struct tgsi_token
*
1415 fd_transform_lowering(const struct fd_lowering_config
*config
,
1416 const struct tgsi_token
*tokens
,
1417 struct tgsi_shader_info
*info
)
1419 struct fd_lowering_context ctx
;
1420 struct tgsi_token
*newtoks
;
1423 /* sanity check in case limit is ever increased: */
1424 assert((sizeof(config
->saturate_s
) * 8) >= PIPE_MAX_SAMPLERS
);
1426 memset(&ctx
, 0, sizeof(ctx
));
1427 ctx
.base
.transform_instruction
= transform_instr
;
1429 ctx
.config
= config
;
1431 tgsi_scan_shader(tokens
, info
);
1433 /* if we are adding fragment shader support to emulate two-sided
1434 * color, then figure out the number of additional inputs we need
1435 * to create for BCOLOR's..
1437 if ((info
->processor
== TGSI_PROCESSOR_FRAGMENT
) &&
1438 config
->color_two_side
) {
1441 for (i
= 0; i
<= info
->file_max
[TGSI_FILE_INPUT
]; i
++) {
1442 if (info
->input_semantic_name
[i
] == TGSI_SEMANTIC_COLOR
)
1443 ctx
.two_side_idx
[ctx
.two_side_colors
++] = i
;
1444 if (info
->input_semantic_name
[i
] == TGSI_SEMANTIC_FACE
)
1449 ctx
.saturate
= config
->saturate_r
| config
->saturate_s
| config
->saturate_t
;
1451 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
1452 /* if there are no instructions to lower, then we are done: */
1467 ctx
.two_side_colors
||
1472 _debug_printf("BEFORE:");
1473 tgsi_dump(tokens
, 0);
1477 newlen
= tgsi_num_tokens(tokens
);
1479 newlen
+= DST_GROW
* OPCS(DST
);
1480 numtmp
= MAX2(numtmp
, DST_TMP
);
1483 newlen
+= XPD_GROW
* OPCS(XPD
);
1484 numtmp
= MAX2(numtmp
, XPD_TMP
);
1487 newlen
+= SCS_GROW
* OPCS(SCS
);
1488 numtmp
= MAX2(numtmp
, SCS_TMP
);
1491 newlen
+= LRP_GROW
* OPCS(LRP
);
1492 numtmp
= MAX2(numtmp
, LRP_TMP
);
1495 newlen
+= FRC_GROW
* OPCS(FRC
);
1496 numtmp
= MAX2(numtmp
, FRC_TMP
);
1499 newlen
+= POW_GROW
* OPCS(POW
);
1500 numtmp
= MAX2(numtmp
, POW_TMP
);
1503 newlen
+= LIT_GROW
* OPCS(LIT
);
1504 numtmp
= MAX2(numtmp
, LIT_TMP
);
1507 newlen
+= EXP_GROW
* OPCS(EXP
);
1508 numtmp
= MAX2(numtmp
, EXP_TMP
);
1511 newlen
+= LOG_GROW
* OPCS(LOG
);
1512 numtmp
= MAX2(numtmp
, LOG_TMP
);
1515 newlen
+= DP4_GROW
* OPCS(DP4
);
1516 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1519 newlen
+= DP3_GROW
* OPCS(DP3
);
1520 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1523 newlen
+= DPH_GROW
* OPCS(DPH
);
1524 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1527 newlen
+= DP2_GROW
* OPCS(DP2
);
1528 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1531 newlen
+= DP2A_GROW
* OPCS(DP2A
);
1532 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1535 int n
= info
->opcode_count
[TGSI_OPCODE_TEX
] +
1536 info
->opcode_count
[TGSI_OPCODE_TXP
] +
1537 info
->opcode_count
[TGSI_OPCODE_TXB
] +
1538 info
->opcode_count
[TGSI_OPCODE_TXB2
] +
1539 info
->opcode_count
[TGSI_OPCODE_TXL
];
1540 newlen
+= SAMP_GROW
* n
;
1541 numtmp
= MAX2(numtmp
, SAMP_TMP
);
1544 /* specifically don't include two_side_colors temps in the count: */
1545 ctx
.numtmp
= numtmp
;
1547 if (ctx
.two_side_colors
) {
1548 newlen
+= TWOSIDE_GROW(ctx
.two_side_colors
);
1549 /* note: we permanently consume temp regs, re-writing references
1550 * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
1551 * instruction that selects which varying to use):
1553 numtmp
+= ctx
.two_side_colors
;
1556 newlen
+= 2 * numtmp
;
1557 newlen
+= 5; /* immediate */
1559 newtoks
= tgsi_alloc_tokens(newlen
);
1563 tgsi_transform_shader(tokens
, newtoks
, newlen
, &ctx
.base
);
1565 tgsi_scan_shader(newtoks
, info
);
1568 _debug_printf("AFTER:");
1569 tgsi_dump(newtoks
, 0);