1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 * Rob Clark <robclark@freedesktop.org>
29 #include "tgsi/tgsi_transform.h"
30 #include "tgsi/tgsi_scan.h"
31 #include "tgsi/tgsi_dump.h"
33 #include "util/u_debug.h"
34 #include "util/u_math.h"
36 #include "freedreno_lowering.h"
38 struct fd_lowering_context
{
39 struct tgsi_transform_context base
;
40 const struct fd_lowering_config
*config
;
41 struct tgsi_shader_info
*info
;
42 unsigned two_side_colors
;
43 unsigned two_side_idx
[PIPE_MAX_SHADER_INPUTS
];
44 unsigned color_base
; /* base register for chosen COLOR/BCOLOR's */
48 struct tgsi_full_src_register src
;
49 struct tgsi_full_dst_register dst
;
53 struct tgsi_full_src_register imm
;
58 static inline struct fd_lowering_context
*
59 fd_lowering_context(struct tgsi_transform_context
*tctx
)
61 return (struct fd_lowering_context
*)tctx
;
69 reg_dst(struct tgsi_full_dst_register
*dst
,
70 const struct tgsi_full_dst_register
*orig_dst
, unsigned wrmask
)
73 dst
->Register
.WriteMask
&= wrmask
;
74 assert(dst
->Register
.WriteMask
);
78 get_swiz(unsigned *swiz
, const struct tgsi_src_register
*src
)
80 swiz
[0] = src
->SwizzleX
;
81 swiz
[1] = src
->SwizzleY
;
82 swiz
[2] = src
->SwizzleZ
;
83 swiz
[3] = src
->SwizzleW
;
87 reg_src(struct tgsi_full_src_register
*src
,
88 const struct tgsi_full_src_register
*orig_src
,
89 unsigned sx
, unsigned sy
, unsigned sz
, unsigned sw
)
92 get_swiz(swiz
, &orig_src
->Register
);
94 src
->Register
.SwizzleX
= swiz
[sx
];
95 src
->Register
.SwizzleY
= swiz
[sy
];
96 src
->Register
.SwizzleZ
= swiz
[sz
];
97 src
->Register
.SwizzleW
= swiz
[sw
];
100 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */
101 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \
102 TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
105 * if (dst.x aliases src.x) {
111 * MOV dst.zw, imm{0.0, 1.0}
114 aliases(const struct tgsi_full_dst_register
*dst
, unsigned dst_mask
,
115 const struct tgsi_full_src_register
*src
, unsigned src_mask
)
117 if ((dst
->Register
.File
== src
->Register
.File
) &&
118 (dst
->Register
.Index
== src
->Register
.Index
)) {
119 unsigned i
, actual_mask
= 0;
121 get_swiz(swiz
, &src
->Register
);
122 for (i
= 0; i
< 4; i
++)
123 if (src_mask
& (1 << i
))
124 actual_mask
|= (1 << swiz
[i
]);
125 if (actual_mask
& dst_mask
)
132 create_mov(struct tgsi_transform_context
*tctx
,
133 const struct tgsi_full_dst_register
*dst
,
134 const struct tgsi_full_src_register
*src
,
135 unsigned mask
, unsigned saturate
)
137 struct tgsi_full_instruction new_inst
;
139 new_inst
= tgsi_default_full_instruction();
140 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
141 new_inst
.Instruction
.Saturate
= saturate
;
142 new_inst
.Instruction
.NumDstRegs
= 1;
143 reg_dst(&new_inst
.Dst
[0], dst
, mask
);
144 new_inst
.Instruction
.NumSrcRegs
= 1;
145 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,Y
,Z
,W
));
146 tctx
->emit_instruction(tctx
, &new_inst
);
150 * Lowering Translators:
153 /* DST - Distance Vector
155 * dst.y = src0.y \times src1.y
159 * ; note: could be more clever and use just a single temp
160 * ; if I was clever enough to re-write the swizzles.
161 * ; needs: 2 tmp, imm{1.0}
162 * if (dst.y aliases src0.z) {
163 * MOV tmpA.yz, src0.yz
166 * if (dst.yz aliases src1.w) {
167 * MOV tmpB.yw, src1.yw
170 * MUL dst.y, src0.y, src1.y
173 * MOV dst.x, imm{1.0}
175 #define DST_GROW (19 - 4)
178 transform_dst(struct tgsi_transform_context
*tctx
,
179 struct tgsi_full_instruction
*inst
)
181 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
182 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
183 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
184 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
185 struct tgsi_full_instruction new_inst
;
187 if (aliases(dst
, TGSI_WRITEMASK_Y
, src0
, TGSI_WRITEMASK_Z
)) {
188 create_mov(tctx
, &ctx
->tmp
[A
].dst
, src0
, TGSI_WRITEMASK_YZ
, 0);
189 src0
= &ctx
->tmp
[A
].src
;
192 if (aliases(dst
, TGSI_WRITEMASK_YZ
, src1
, TGSI_WRITEMASK_W
)) {
193 create_mov(tctx
, &ctx
->tmp
[B
].dst
, src1
, TGSI_WRITEMASK_YW
, 0);
194 src1
= &ctx
->tmp
[B
].src
;
197 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
198 /* MUL dst.y, src0.y, src1.y */
199 new_inst
= tgsi_default_full_instruction();
200 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
201 new_inst
.Instruction
.NumDstRegs
= 1;
202 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Y
);
203 new_inst
.Instruction
.NumSrcRegs
= 2;
204 reg_src(&new_inst
.Src
[0], src0
, SWIZ(_
,Y
,_
,_
));
205 reg_src(&new_inst
.Src
[1], src1
, SWIZ(_
,Y
,_
,_
));
206 tctx
->emit_instruction(tctx
, &new_inst
);
209 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
210 /* MOV dst.z, src0.z */
211 new_inst
= tgsi_default_full_instruction();
212 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
213 new_inst
.Instruction
.NumDstRegs
= 1;
214 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Z
);
215 new_inst
.Instruction
.NumSrcRegs
= 1;
216 reg_src(&new_inst
.Src
[0], src0
, SWIZ(_
,_
,Z
,_
));
217 tctx
->emit_instruction(tctx
, &new_inst
);
220 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_W
) {
221 /* MOV dst.w, src1.w */
222 new_inst
= tgsi_default_full_instruction();
223 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
224 new_inst
.Instruction
.NumDstRegs
= 1;
225 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_W
);
226 new_inst
.Instruction
.NumSrcRegs
= 1;
227 reg_src(&new_inst
.Src
[0], src1
, SWIZ(_
,_
,_
,W
));
228 tctx
->emit_instruction(tctx
, &new_inst
);
231 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_X
) {
232 /* MOV dst.x, imm{1.0} */
233 new_inst
= tgsi_default_full_instruction();
234 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
235 new_inst
.Instruction
.NumDstRegs
= 1;
236 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_X
);
237 new_inst
.Instruction
.NumSrcRegs
= 1;
238 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(Y
,_
,_
,_
));
239 tctx
->emit_instruction(tctx
, &new_inst
);
243 /* XPD - Cross Product
244 * dst.x = src0.y \times src1.z - src1.y \times src0.z
245 * dst.y = src0.z \times src1.x - src1.z \times src0.x
246 * dst.z = src0.x \times src1.y - src1.x \times src0.y
249 * ; needs: 2 tmp, imm{1.0}
250 * MUL tmpA.xyz, src0.yzx, src1.zxy
251 * MUL tmpB.xyz, src1.yzx, src0.zxy
252 * SUB dst.xyz, tmpA.xyz, tmpB.xyz
253 * MOV dst.w, imm{1.0}
255 #define XPD_GROW (15 - 4)
258 transform_xpd(struct tgsi_transform_context
*tctx
,
259 struct tgsi_full_instruction
*inst
)
261 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
262 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
263 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
264 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
265 struct tgsi_full_instruction new_inst
;
267 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZ
) {
268 /* MUL tmpA.xyz, src0.yzx, src1.zxy */
269 new_inst
= tgsi_default_full_instruction();
270 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
271 new_inst
.Instruction
.NumDstRegs
= 1;
272 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XYZ
);
273 new_inst
.Instruction
.NumSrcRegs
= 2;
274 reg_src(&new_inst
.Src
[0], src0
, SWIZ(Y
,Z
,X
,_
));
275 reg_src(&new_inst
.Src
[1], src1
, SWIZ(Z
,X
,Y
,_
));
276 tctx
->emit_instruction(tctx
, &new_inst
);
278 /* MUL tmpB.xyz, src1.yzx, src0.zxy */
279 new_inst
= tgsi_default_full_instruction();
280 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
281 new_inst
.Instruction
.NumDstRegs
= 1;
282 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[B
].dst
, TGSI_WRITEMASK_XYZ
);
283 new_inst
.Instruction
.NumSrcRegs
= 2;
284 reg_src(&new_inst
.Src
[0], src1
, SWIZ(Y
,Z
,X
,_
));
285 reg_src(&new_inst
.Src
[1], src0
, SWIZ(Z
,X
,Y
,_
));
286 tctx
->emit_instruction(tctx
, &new_inst
);
288 /* SUB dst.xyz, tmpA.xyz, tmpB.xyz */
289 new_inst
= tgsi_default_full_instruction();
290 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_SUB
;
291 new_inst
.Instruction
.NumDstRegs
= 1;
292 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZ
);
293 new_inst
.Instruction
.NumSrcRegs
= 2;
294 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
,Y
,Z
,_
));
295 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[B
].src
, SWIZ(X
,Y
,Z
,_
));
296 tctx
->emit_instruction(tctx
, &new_inst
);
299 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_W
) {
300 /* MOV dst.w, imm{1.0} */
301 new_inst
= tgsi_default_full_instruction();
302 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
303 new_inst
.Instruction
.NumDstRegs
= 1;
304 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_W
);
305 new_inst
.Instruction
.NumSrcRegs
= 1;
306 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(_
,_
,_
,Y
));
307 tctx
->emit_instruction(tctx
, &new_inst
);
312 * dst.x = \cos{src.x}
313 * dst.y = \sin{src.x}
317 * ; needs: 1 tmp, imm{0.0, 1.0}
318 * if (dst.x aliases src.x) {
324 * MOV dst.zw, imm{0.0, 1.0}
326 #define SCS_GROW (12 - 3)
329 transform_scs(struct tgsi_transform_context
*tctx
,
330 struct tgsi_full_instruction
*inst
)
332 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
333 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
334 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
335 struct tgsi_full_instruction new_inst
;
337 if (aliases(dst
, TGSI_WRITEMASK_X
, src
, TGSI_WRITEMASK_X
)) {
338 create_mov(tctx
, &ctx
->tmp
[A
].dst
, src
, TGSI_WRITEMASK_X
, 0);
339 src
= &ctx
->tmp
[A
].src
;
342 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_X
) {
343 /* COS dst.x, src.x */
344 new_inst
= tgsi_default_full_instruction();
345 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_COS
;
346 new_inst
.Instruction
.NumDstRegs
= 1;
347 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_X
);
348 new_inst
.Instruction
.NumSrcRegs
= 1;
349 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,_
,_
,_
));
350 tctx
->emit_instruction(tctx
, &new_inst
);
353 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
354 /* SIN dst.y, src.x */
355 new_inst
= tgsi_default_full_instruction();
356 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_SIN
;
357 new_inst
.Instruction
.NumDstRegs
= 1;
358 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Y
);
359 new_inst
.Instruction
.NumSrcRegs
= 1;
360 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,_
,_
,_
));
361 tctx
->emit_instruction(tctx
, &new_inst
);
364 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_ZW
) {
365 /* MOV dst.zw, imm{0.0, 1.0} */
366 new_inst
= tgsi_default_full_instruction();
367 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
368 new_inst
.Instruction
.NumDstRegs
= 1;
369 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_ZW
);
370 new_inst
.Instruction
.NumSrcRegs
= 1;
371 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(_
,_
,X
,Y
));
372 tctx
->emit_instruction(tctx
, &new_inst
);
376 /* LRP - Linear Interpolate
377 * dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
378 * dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
379 * dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
380 * dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
382 * ; needs: 2 tmp, imm{1.0}
383 * MUL tmpA, src0, src1
384 * SUB tmpB, imm{1.0}, src0
385 * MUL tmpB, tmpB, src2
386 * ADD dst, tmpA, tmpB
388 #define LRP_GROW (16 - 4)
391 transform_lrp(struct tgsi_transform_context
*tctx
,
392 struct tgsi_full_instruction
*inst
)
394 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
395 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
396 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
397 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
398 struct tgsi_full_src_register
*src2
= &inst
->Src
[2];
399 struct tgsi_full_instruction new_inst
;
401 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
402 /* MUL tmpA, src0, src1 */
403 new_inst
= tgsi_default_full_instruction();
404 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
405 new_inst
.Instruction
.NumDstRegs
= 1;
406 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XYZW
);
407 new_inst
.Instruction
.NumSrcRegs
= 2;
408 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
,Y
,Z
,W
));
409 reg_src(&new_inst
.Src
[1], src1
, SWIZ(X
,Y
,Z
,W
));
410 tctx
->emit_instruction(tctx
, &new_inst
);
412 /* SUB tmpB, imm{1.0}, src0 */
413 new_inst
= tgsi_default_full_instruction();
414 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_SUB
;
415 new_inst
.Instruction
.NumDstRegs
= 1;
416 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[B
].dst
, TGSI_WRITEMASK_XYZW
);
417 new_inst
.Instruction
.NumSrcRegs
= 2;
418 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(Y
,Y
,Y
,Y
));
419 reg_src(&new_inst
.Src
[1], src0
, SWIZ(X
,Y
,Z
,W
));
420 tctx
->emit_instruction(tctx
, &new_inst
);
422 /* MUL tmpB, tmpB, src2 */
423 new_inst
= tgsi_default_full_instruction();
424 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
425 new_inst
.Instruction
.NumDstRegs
= 1;
426 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[B
].dst
, TGSI_WRITEMASK_XYZW
);
427 new_inst
.Instruction
.NumSrcRegs
= 2;
428 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[B
].src
, SWIZ(X
,Y
,Z
,W
));
429 reg_src(&new_inst
.Src
[1], src2
, SWIZ(X
,Y
,Z
,W
));
430 tctx
->emit_instruction(tctx
, &new_inst
);
432 /* ADD dst, tmpA, tmpB */
433 new_inst
= tgsi_default_full_instruction();
434 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
435 new_inst
.Instruction
.NumDstRegs
= 1;
436 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
437 new_inst
.Instruction
.NumSrcRegs
= 2;
438 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
,Y
,Z
,W
));
439 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[B
].src
, SWIZ(X
,Y
,Z
,W
));
440 tctx
->emit_instruction(tctx
, &new_inst
);
445 * dst.x = src.x - \lfloor src.x\rfloor
446 * dst.y = src.y - \lfloor src.y\rfloor
447 * dst.z = src.z - \lfloor src.z\rfloor
448 * dst.w = src.w - \lfloor src.w\rfloor
454 #define FRC_GROW (7 - 3)
457 transform_frc(struct tgsi_transform_context
*tctx
,
458 struct tgsi_full_instruction
*inst
)
460 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
461 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
462 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
463 struct tgsi_full_instruction new_inst
;
465 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
467 new_inst
= tgsi_default_full_instruction();
468 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FLR
;
469 new_inst
.Instruction
.NumDstRegs
= 1;
470 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XYZW
);
471 new_inst
.Instruction
.NumSrcRegs
= 1;
472 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,Y
,Z
,W
));
473 tctx
->emit_instruction(tctx
, &new_inst
);
475 /* SUB dst, src, tmpA */
476 new_inst
= tgsi_default_full_instruction();
477 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_SUB
;
478 new_inst
.Instruction
.NumDstRegs
= 1;
479 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
480 new_inst
.Instruction
.NumSrcRegs
= 2;
481 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,Y
,Z
,W
));
482 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
,Y
,Z
,W
));
483 tctx
->emit_instruction(tctx
, &new_inst
);
488 * dst.x = src0.x^{src1.x}
489 * dst.y = src0.x^{src1.x}
490 * dst.z = src0.x^{src1.x}
491 * dst.w = src0.x^{src1.x}
495 * MUL tmpA.x, src1.x, tmpA.x
498 #define POW_GROW (10 - 4)
501 transform_pow(struct tgsi_transform_context
*tctx
,
502 struct tgsi_full_instruction
*inst
)
504 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
505 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
506 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
507 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
508 struct tgsi_full_instruction new_inst
;
510 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
511 /* LG2 tmpA.x, src0.x */
512 new_inst
= tgsi_default_full_instruction();
513 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_LG2
;
514 new_inst
.Instruction
.NumDstRegs
= 1;
515 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
516 new_inst
.Instruction
.NumSrcRegs
= 1;
517 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
,_
,_
,_
));
518 tctx
->emit_instruction(tctx
, &new_inst
);
520 /* MUL tmpA.x, src1.x, tmpA.x */
521 new_inst
= tgsi_default_full_instruction();
522 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
523 new_inst
.Instruction
.NumDstRegs
= 1;
524 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
525 new_inst
.Instruction
.NumSrcRegs
= 2;
526 reg_src(&new_inst
.Src
[0], src1
, SWIZ(X
,_
,_
,_
));
527 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
,_
,_
,_
));
528 tctx
->emit_instruction(tctx
, &new_inst
);
530 /* EX2 dst, tmpA.x */
531 new_inst
= tgsi_default_full_instruction();
532 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
533 new_inst
.Instruction
.NumDstRegs
= 1;
534 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
535 new_inst
.Instruction
.NumSrcRegs
= 1;
536 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
,_
,_
,_
));
537 tctx
->emit_instruction(tctx
, &new_inst
);
541 /* LIT - Light Coefficients
543 * dst.y = max(src.x, 0.0)
544 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
547 * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
548 * MAX tmpA.xy, src.xy, imm{0.0}
549 * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
551 * MUL tmpA.y, tmpA.z, tmpA.y
553 * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
554 * MOV dst.yz, tmpA.xy
555 * MOV dst.xw, imm{1.0}
557 #define LIT_GROW (30 - 3)
560 transform_lit(struct tgsi_transform_context
*tctx
,
561 struct tgsi_full_instruction
*inst
)
563 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
564 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
565 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
566 struct tgsi_full_instruction new_inst
;
568 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_YZ
) {
569 /* MAX tmpA.xy, src.xy, imm{0.0} */
570 new_inst
= tgsi_default_full_instruction();
571 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAX
;
572 new_inst
.Instruction
.NumDstRegs
= 1;
573 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XY
);
574 new_inst
.Instruction
.NumSrcRegs
= 2;
575 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,Y
,_
,_
));
576 reg_src(&new_inst
.Src
[1], &ctx
->imm
, SWIZ(X
,X
,_
,_
));
577 tctx
->emit_instruction(tctx
, &new_inst
);
579 /* CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} */
580 new_inst
= tgsi_default_full_instruction();
581 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_CLAMP
;
582 new_inst
.Instruction
.NumDstRegs
= 1;
583 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Z
);
584 new_inst
.Instruction
.NumSrcRegs
= 3;
585 reg_src(&new_inst
.Src
[0], src
, SWIZ(_
,_
,W
,_
));
586 reg_src(&new_inst
.Src
[1], &ctx
->imm
, SWIZ(_
,_
,Z
,_
));
587 new_inst
.Src
[1].Register
.Negate
= true;
588 reg_src(&new_inst
.Src
[2], &ctx
->imm
, SWIZ(_
,_
,Z
,_
));
589 tctx
->emit_instruction(tctx
, &new_inst
);
591 /* LG2 tmpA.y, tmpA.y */
592 new_inst
= tgsi_default_full_instruction();
593 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_LG2
;
594 new_inst
.Instruction
.NumDstRegs
= 1;
595 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
596 new_inst
.Instruction
.NumSrcRegs
= 1;
597 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Y
,_
,_
,_
));
598 tctx
->emit_instruction(tctx
, &new_inst
);
600 /* MUL tmpA.y, tmpA.z, tmpA.y */
601 new_inst
= tgsi_default_full_instruction();
602 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
603 new_inst
.Instruction
.NumDstRegs
= 1;
604 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
605 new_inst
.Instruction
.NumSrcRegs
= 2;
606 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
,Z
,_
,_
));
607 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(_
,Y
,_
,_
));
608 tctx
->emit_instruction(tctx
, &new_inst
);
610 /* EX2 tmpA.y, tmpA.y */
611 new_inst
= tgsi_default_full_instruction();
612 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
613 new_inst
.Instruction
.NumDstRegs
= 1;
614 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
615 new_inst
.Instruction
.NumSrcRegs
= 1;
616 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Y
,_
,_
,_
));
617 tctx
->emit_instruction(tctx
, &new_inst
);
619 /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
620 new_inst
= tgsi_default_full_instruction();
621 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_CMP
;
622 new_inst
.Instruction
.NumDstRegs
= 1;
623 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
624 new_inst
.Instruction
.NumSrcRegs
= 3;
625 reg_src(&new_inst
.Src
[0], src
, SWIZ(_
,X
,_
,_
));
626 new_inst
.Src
[0].Register
.Negate
= true;
627 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(_
,Y
,_
,_
));
628 reg_src(&new_inst
.Src
[2], &ctx
->imm
, SWIZ(_
,X
,_
,_
));
629 tctx
->emit_instruction(tctx
, &new_inst
);
631 /* MOV dst.yz, tmpA.xy */
632 new_inst
= tgsi_default_full_instruction();
633 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
634 new_inst
.Instruction
.NumDstRegs
= 1;
635 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_YZ
);
636 new_inst
.Instruction
.NumSrcRegs
= 1;
637 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
,X
,Y
,_
));
638 tctx
->emit_instruction(tctx
, &new_inst
);
641 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XW
) {
642 /* MOV dst.xw, imm{1.0} */
643 new_inst
= tgsi_default_full_instruction();
644 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
645 new_inst
.Instruction
.NumDstRegs
= 1;
646 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XW
);
647 new_inst
.Instruction
.NumSrcRegs
= 1;
648 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(Y
,_
,_
,Y
));
649 tctx
->emit_instruction(tctx
, &new_inst
);
653 /* EXP - Approximate Exponential Base 2
654 * dst.x = 2^{\lfloor src.x\rfloor}
655 * dst.y = src.x - \lfloor src.x\rfloor
659 * ; needs: 1 tmp, imm{1.0}
662 * SUB dst.y, src.x, tmpA.x
665 * MOV dst.w, imm{1.0}
667 #define EXP_GROW (19 - 3)
670 transform_exp(struct tgsi_transform_context
*tctx
,
671 struct tgsi_full_instruction
*inst
)
673 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
674 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
675 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
676 struct tgsi_full_instruction new_inst
;
678 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XY
) {
679 /* FLR tmpA.x, src.x */
680 new_inst
= tgsi_default_full_instruction();
681 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FLR
;
682 new_inst
.Instruction
.NumDstRegs
= 1;
683 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
684 new_inst
.Instruction
.NumSrcRegs
= 1;
685 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,_
,_
,_
));
686 tctx
->emit_instruction(tctx
, &new_inst
);
689 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
690 /* EX2 tmpA.y, src.x */
691 new_inst
= tgsi_default_full_instruction();
692 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
693 new_inst
.Instruction
.NumDstRegs
= 1;
694 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
695 new_inst
.Instruction
.NumSrcRegs
= 1;
696 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,_
,_
,_
));
697 tctx
->emit_instruction(tctx
, &new_inst
);
700 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
701 /* SUB dst.y, src.x, tmpA.x */
702 new_inst
= tgsi_default_full_instruction();
703 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_SUB
;
704 new_inst
.Instruction
.NumDstRegs
= 1;
705 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Y
);
706 new_inst
.Instruction
.NumSrcRegs
= 2;
707 reg_src(&new_inst
.Src
[0], src
, SWIZ(_
,X
,_
,_
));
708 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(_
,X
,_
,_
));
709 tctx
->emit_instruction(tctx
, &new_inst
);
712 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_X
) {
713 /* EX2 dst.x, tmpA.x */
714 new_inst
= tgsi_default_full_instruction();
715 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
716 new_inst
.Instruction
.NumDstRegs
= 1;
717 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_X
);
718 new_inst
.Instruction
.NumSrcRegs
= 1;
719 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
,_
,_
,_
));
720 tctx
->emit_instruction(tctx
, &new_inst
);
723 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
724 /* MOV dst.z, tmpA.y */
725 new_inst
= tgsi_default_full_instruction();
726 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
727 new_inst
.Instruction
.NumDstRegs
= 1;
728 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Z
);
729 new_inst
.Instruction
.NumSrcRegs
= 1;
730 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
,_
,Y
,_
));
731 tctx
->emit_instruction(tctx
, &new_inst
);
734 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_W
) {
735 /* MOV dst.w, imm{1.0} */
736 new_inst
= tgsi_default_full_instruction();
737 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
738 new_inst
.Instruction
.NumDstRegs
= 1;
739 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_W
);
740 new_inst
.Instruction
.NumSrcRegs
= 1;
741 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(_
,_
,_
,Y
));
742 tctx
->emit_instruction(tctx
, &new_inst
);
746 /* LOG - Approximate Logarithm Base 2
747 * dst.x = \lfloor\log_2{|src.x|}\rfloor
748 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
749 * dst.z = \log_2{|src.x|}
752 * ; needs: 1 tmp, imm{1.0}
753 * LG2 tmpA.x, |src.x|
757 * MUL dst.y, |src.x|, tmpA.z
758 * MOV dst.xz, tmpA.yx
759 * MOV dst.w, imm{1.0}
761 #define LOG_GROW (25 - 3)
764 transform_log(struct tgsi_transform_context
*tctx
,
765 struct tgsi_full_instruction
*inst
)
767 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
768 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
769 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
770 struct tgsi_full_instruction new_inst
;
772 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZ
) {
773 /* LG2 tmpA.x, |src.x| */
774 new_inst
= tgsi_default_full_instruction();
775 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_LG2
;
776 new_inst
.Instruction
.NumDstRegs
= 1;
777 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
778 new_inst
.Instruction
.NumSrcRegs
= 1;
779 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,_
,_
,_
));
780 new_inst
.Src
[0].Register
.Absolute
= true;
781 tctx
->emit_instruction(tctx
, &new_inst
);
784 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XY
) {
785 /* FLR tmpA.y, tmpA.x */
786 new_inst
= tgsi_default_full_instruction();
787 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FLR
;
788 new_inst
.Instruction
.NumDstRegs
= 1;
789 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
790 new_inst
.Instruction
.NumSrcRegs
= 1;
791 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
,X
,_
,_
));
792 tctx
->emit_instruction(tctx
, &new_inst
);
795 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
796 /* EX2 tmpA.z, tmpA.y */
797 new_inst
= tgsi_default_full_instruction();
798 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
799 new_inst
.Instruction
.NumDstRegs
= 1;
800 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Z
);
801 new_inst
.Instruction
.NumSrcRegs
= 1;
802 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Y
,_
,_
,_
));
803 tctx
->emit_instruction(tctx
, &new_inst
);
805 /* RCP tmpA.z, tmpA.z */
806 new_inst
= tgsi_default_full_instruction();
807 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_RCP
;
808 new_inst
.Instruction
.NumDstRegs
= 1;
809 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Z
);
810 new_inst
.Instruction
.NumSrcRegs
= 1;
811 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Z
,_
,_
,_
));
812 tctx
->emit_instruction(tctx
, &new_inst
);
814 /* MUL dst.y, |src.x|, tmpA.z */
815 new_inst
= tgsi_default_full_instruction();
816 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
817 new_inst
.Instruction
.NumDstRegs
= 1;
818 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Y
);
819 new_inst
.Instruction
.NumSrcRegs
= 2;
820 reg_src(&new_inst
.Src
[0], src
, SWIZ(_
,X
,_
,_
));
821 new_inst
.Src
[0].Register
.Absolute
= true;
822 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(_
,Z
,_
,_
));
823 tctx
->emit_instruction(tctx
, &new_inst
);
826 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XZ
) {
827 /* MOV dst.xz, tmpA.yx */
828 new_inst
= tgsi_default_full_instruction();
829 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
830 new_inst
.Instruction
.NumDstRegs
= 1;
831 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XZ
);
832 new_inst
.Instruction
.NumSrcRegs
= 1;
833 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Y
,_
,X
,_
));
834 tctx
->emit_instruction(tctx
, &new_inst
);
837 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_W
) {
838 /* MOV dst.w, imm{1.0} */
839 new_inst
= tgsi_default_full_instruction();
840 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
841 new_inst
.Instruction
.NumDstRegs
= 1;
842 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_W
);
843 new_inst
.Instruction
.NumSrcRegs
= 1;
844 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(_
,_
,_
,Y
));
845 tctx
->emit_instruction(tctx
, &new_inst
);
849 /* DP4 - 4-component Dot Product
850 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
852 * DP3 - 3-component Dot Product
853 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
855 * DPH - Homogeneous Dot Product
856 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
858 * DP2 - 2-component Dot Product
859 * dst = src0.x \times src1.x + src0.y \times src1.y
861 * DP2A - 2-component Dot Product And Add
862 * dst = src0.x \times src1.x + src0.y \times src1.y + src2.x
864 * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
865 * operations, which is what you'd prefer for a ISA that is natively
866 * scalar. Probably a native vector ISA would at least already have
867 * DP4/DP3 instructions, but perhaps there is room for an alternative
868 * translation for DPH/DP2/DP2A using vector instructions.
871 * MUL tmpA.x, src0.x, src1.x
872 * MAD tmpA.x, src0.y, src1.y, tmpA.x
873 * if (DPH || DP3 || DP4) {
874 * MAD tmpA.x, src0.z, src1.z, tmpA.x
876 * ADD tmpA.x, src1.w, tmpA.x
878 * MAD tmpA.x, src0.w, src1.w, tmpA.x
881 * ADD tmpA.x, src2.x, tmpA.x
883 * ; fixup last instruction to replicate into dst
885 #define DP4_GROW (19 - 4)
886 #define DP3_GROW (14 - 4)
887 #define DPH_GROW (18 - 4)
888 #define DP2_GROW ( 9 - 4)
889 #define DP2A_GROW (13 - 4)
892 transform_dotp(struct tgsi_transform_context
*tctx
,
893 struct tgsi_full_instruction
*inst
)
895 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
896 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
897 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
898 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
899 struct tgsi_full_src_register
*src2
= &inst
->Src
[2]; /* only DP2A */
900 struct tgsi_full_instruction new_inst
;
901 unsigned opcode
= inst
->Instruction
.Opcode
;
903 /* NOTE: any potential last instruction must replicate src on all
904 * components (since it could be re-written to write to final dst)
907 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
908 /* MUL tmpA.x, src0.x, src1.x */
909 new_inst
= tgsi_default_full_instruction();
910 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
911 new_inst
.Instruction
.NumDstRegs
= 1;
912 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
913 new_inst
.Instruction
.NumSrcRegs
= 2;
914 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
,_
,_
,_
));
915 reg_src(&new_inst
.Src
[1], src1
, SWIZ(X
,_
,_
,_
));
916 tctx
->emit_instruction(tctx
, &new_inst
);
918 /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
919 new_inst
= tgsi_default_full_instruction();
920 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAD
;
921 new_inst
.Instruction
.NumDstRegs
= 1;
922 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
923 new_inst
.Instruction
.NumSrcRegs
= 3;
924 reg_src(&new_inst
.Src
[0], src0
, SWIZ(Y
,Y
,Y
,Y
));
925 reg_src(&new_inst
.Src
[1], src1
, SWIZ(Y
,Y
,Y
,Y
));
926 reg_src(&new_inst
.Src
[2], &ctx
->tmp
[A
].src
, SWIZ(X
,X
,X
,X
));
928 if ((opcode
== TGSI_OPCODE_DPH
) ||
929 (opcode
== TGSI_OPCODE_DP3
) ||
930 (opcode
== TGSI_OPCODE_DP4
)) {
931 tctx
->emit_instruction(tctx
, &new_inst
);
933 /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
934 new_inst
= tgsi_default_full_instruction();
935 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAD
;
936 new_inst
.Instruction
.NumDstRegs
= 1;
937 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
938 new_inst
.Instruction
.NumSrcRegs
= 3;
939 reg_src(&new_inst
.Src
[0], src0
, SWIZ(Z
,Z
,Z
,Z
));
940 reg_src(&new_inst
.Src
[1], src1
, SWIZ(Z
,Z
,Z
,Z
));
941 reg_src(&new_inst
.Src
[2], &ctx
->tmp
[A
].src
, SWIZ(X
,X
,X
,X
));
943 if (opcode
== TGSI_OPCODE_DPH
) {
944 tctx
->emit_instruction(tctx
, &new_inst
);
946 /* ADD tmpA.x, src1.w, tmpA.x */
947 new_inst
= tgsi_default_full_instruction();
948 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
949 new_inst
.Instruction
.NumDstRegs
= 1;
950 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
951 new_inst
.Instruction
.NumSrcRegs
= 2;
952 reg_src(&new_inst
.Src
[0], src1
, SWIZ(W
,W
,W
,W
));
953 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
,X
,X
,X
));
954 } else if (opcode
== TGSI_OPCODE_DP4
) {
955 tctx
->emit_instruction(tctx
, &new_inst
);
957 /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
958 new_inst
= tgsi_default_full_instruction();
959 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAD
;
960 new_inst
.Instruction
.NumDstRegs
= 1;
961 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
962 new_inst
.Instruction
.NumSrcRegs
= 3;
963 reg_src(&new_inst
.Src
[0], src0
, SWIZ(W
,W
,W
,W
));
964 reg_src(&new_inst
.Src
[1], src1
, SWIZ(W
,W
,W
,W
));
965 reg_src(&new_inst
.Src
[2], &ctx
->tmp
[A
].src
, SWIZ(X
,X
,X
,X
));
967 } else if (opcode
== TGSI_OPCODE_DP2A
) {
968 tctx
->emit_instruction(tctx
, &new_inst
);
970 /* ADD tmpA.x, src2.x, tmpA.x */
971 new_inst
= tgsi_default_full_instruction();
972 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
973 new_inst
.Instruction
.NumDstRegs
= 1;
974 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
975 new_inst
.Instruction
.NumSrcRegs
= 2;
976 reg_src(&new_inst
.Src
[0], src2
, SWIZ(X
,X
,X
,X
));
977 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
,X
,X
,X
));
980 /* fixup last instruction to write to dst: */
981 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
983 tctx
->emit_instruction(tctx
, &new_inst
);
987 /* Inserts a MOV_SAT for the needed components of tex coord. Note that
988 * in the case of TXP, the clamping must happen *after* projection, so
989 * we need to lower TXP to TEX.
993 * ; do perspective division manually before clamping:
995 * MUL tmpB.<pmask>, tmpA, tmpB.xxxx
998 * MOV_SAT tmpA.<mask>, tmpA ; <mask> is the clamped s/t/r coords
999 * <opc> dst, tmpA, ...
1001 #define SAMP_GROW (13)
1004 transform_samp(struct tgsi_transform_context
*tctx
,
1005 struct tgsi_full_instruction
*inst
)
1007 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
1008 struct tgsi_full_src_register
*coord
= &inst
->Src
[0];
1009 struct tgsi_full_src_register
*samp
;
1010 struct tgsi_full_instruction new_inst
;
1011 /* mask is clamped coords, pmask is all coords (for projection): */
1012 unsigned mask
= 0, pmask
= 0, smask
;
1013 unsigned opcode
= inst
->Instruction
.Opcode
;
1015 if (opcode
== TGSI_OPCODE_TXB2
) {
1016 samp
= &inst
->Src
[2];
1018 samp
= &inst
->Src
[1];
1021 /* convert sampler # to bitmask to test: */
1022 smask
= 1 << samp
->Register
.Index
;
1024 /* check if we actually need to lower this one: */
1025 if (!(ctx
->saturate
& smask
))
1028 /* figure out which coordinates need saturating:
1029 * - RECT textures should not get saturated
1030 * - array index coords should not get saturated
1032 switch (inst
->Texture
.Texture
) {
1033 case TGSI_TEXTURE_3D
:
1034 case TGSI_TEXTURE_CUBE
:
1035 case TGSI_TEXTURE_CUBE_ARRAY
:
1036 case TGSI_TEXTURE_SHADOWCUBE
:
1037 case TGSI_TEXTURE_SHADOWCUBE_ARRAY
:
1038 if (ctx
->config
->saturate_r
& smask
)
1039 mask
|= TGSI_WRITEMASK_Z
;
1040 pmask
|= TGSI_WRITEMASK_Z
;
1043 case TGSI_TEXTURE_2D
:
1044 case TGSI_TEXTURE_2D_ARRAY
:
1045 case TGSI_TEXTURE_SHADOW2D
:
1046 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
1047 case TGSI_TEXTURE_2D_MSAA
:
1048 case TGSI_TEXTURE_2D_ARRAY_MSAA
:
1049 if (ctx
->config
->saturate_t
& smask
)
1050 mask
|= TGSI_WRITEMASK_Y
;
1051 pmask
|= TGSI_WRITEMASK_Y
;
1054 case TGSI_TEXTURE_1D
:
1055 case TGSI_TEXTURE_1D_ARRAY
:
1056 case TGSI_TEXTURE_SHADOW1D
:
1057 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
1058 if (ctx
->config
->saturate_s
& smask
)
1059 mask
|= TGSI_WRITEMASK_X
;
1060 pmask
|= TGSI_WRITEMASK_X
;
1063 /* TODO: I think we should ignore these?
1064 case TGSI_TEXTURE_RECT:
1065 case TGSI_TEXTURE_SHADOWRECT:
1069 /* sanity check.. driver could be asking to saturate a non-
1070 * existent coordinate component:
1075 /* MOV tmpA, src0 */
1076 create_mov(tctx
, &ctx
->tmp
[A
].dst
, coord
, TGSI_WRITEMASK_XYZW
, 0);
1078 /* This is a bit sad.. we need to clamp *after* the coords
1079 * are projected, which means lowering TXP to TEX and doing
1080 * the projection ourself. But since I haven't figured out
1081 * how to make the lowering code deliver an electric shock
1082 * to anyone using GL_CLAMP, we must do this instead:
1084 if (opcode
== TGSI_OPCODE_TXP
) {
1085 /* RCP tmpB.x tmpA.w */
1086 new_inst
= tgsi_default_full_instruction();
1087 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_RCP
;
1088 new_inst
.Instruction
.NumDstRegs
= 1;
1089 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[B
].dst
, TGSI_WRITEMASK_X
);
1090 new_inst
.Instruction
.NumSrcRegs
= 1;
1091 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(W
,_
,_
,_
));
1092 tctx
->emit_instruction(tctx
, &new_inst
);
1094 /* MUL tmpA.mask, tmpA, tmpB.xxxx */
1095 new_inst
= tgsi_default_full_instruction();
1096 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
1097 new_inst
.Instruction
.NumDstRegs
= 1;
1098 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, pmask
);
1099 new_inst
.Instruction
.NumSrcRegs
= 2;
1100 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
,Y
,Z
,W
));
1101 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[B
].src
, SWIZ(X
,X
,X
,X
));
1102 tctx
->emit_instruction(tctx
, &new_inst
);
1104 opcode
= TGSI_OPCODE_TEX
;
1107 /* MOV_SAT tmpA.<mask>, tmpA */
1108 create_mov(tctx
, &ctx
->tmp
[A
].dst
, &ctx
->tmp
[A
].src
, mask
,
1111 /* modify the texture samp instruction to take fixed up coord: */
1113 new_inst
.Instruction
.Opcode
= opcode
;
1114 new_inst
.Src
[0] = ctx
->tmp
[A
].src
;
1115 tctx
->emit_instruction(tctx
, &new_inst
);
1120 /* Two-sided color emulation:
1121 * For each COLOR input, create a corresponding BCOLOR input, plus
1122 * CMP instruction to select front or back color based on FACE
1124 #define TWOSIDE_GROW(n) ( \
1126 ((n) * 2) + /* IN[] BCOLOR[n] */ \
1127 ((n) * 1) + /* TEMP[] */ \
1128 ((n) * 5) /* CMP instr */ \
1132 emit_twoside(struct tgsi_transform_context
*tctx
)
1134 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
1135 struct tgsi_shader_info
*info
= ctx
->info
;
1136 struct tgsi_full_declaration decl
;
1137 struct tgsi_full_instruction new_inst
;
1138 unsigned inbase
, tmpbase
;
1141 inbase
= info
->file_max
[TGSI_FILE_INPUT
] + 1;
1142 tmpbase
= info
->file_max
[TGSI_FILE_TEMPORARY
] + 1;
1144 /* additional inputs for BCOLOR's */
1145 for (i
= 0; i
< ctx
->two_side_colors
; i
++) {
1146 decl
= tgsi_default_full_declaration();
1147 decl
.Declaration
.File
= TGSI_FILE_INPUT
;
1148 decl
.Declaration
.Semantic
= true;
1149 decl
.Range
.First
= decl
.Range
.Last
= inbase
+ i
;
1150 decl
.Semantic
.Name
= TGSI_SEMANTIC_BCOLOR
;
1151 decl
.Semantic
.Index
=
1152 info
->input_semantic_index
[ctx
->two_side_idx
[i
]];
1153 tctx
->emit_declaration(tctx
, &decl
);
1156 /* additional input for FACE */
1157 if (ctx
->two_side_colors
&& (ctx
->face_idx
== -1)) {
1158 decl
= tgsi_default_full_declaration();
1159 decl
.Declaration
.File
= TGSI_FILE_INPUT
;
1160 decl
.Declaration
.Semantic
= true;
1161 decl
.Range
.First
= decl
.Range
.Last
= inbase
+ ctx
->two_side_colors
;
1162 decl
.Semantic
.Name
= TGSI_SEMANTIC_FACE
;
1163 decl
.Semantic
.Index
= 0;
1164 tctx
->emit_declaration(tctx
, &decl
);
1166 ctx
->face_idx
= decl
.Range
.First
;
1169 /* additional temps for COLOR/BCOLOR selection: */
1170 for (i
= 0; i
< ctx
->two_side_colors
; i
++) {
1171 decl
= tgsi_default_full_declaration();
1172 decl
.Declaration
.File
= TGSI_FILE_TEMPORARY
;
1173 decl
.Range
.First
= decl
.Range
.Last
= tmpbase
+ ctx
->numtmp
+ i
;
1174 tctx
->emit_declaration(tctx
, &decl
);
1177 /* and finally additional instructions to select COLOR/BCOLOR: */
1178 for (i
= 0; i
< ctx
->two_side_colors
; i
++) {
1179 new_inst
= tgsi_default_full_instruction();
1180 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_CMP
;
1182 new_inst
.Instruction
.NumDstRegs
= 1;
1183 new_inst
.Dst
[0].Register
.File
= TGSI_FILE_TEMPORARY
;
1184 new_inst
.Dst
[0].Register
.Index
= tmpbase
+ ctx
->numtmp
+ i
;
1185 new_inst
.Dst
[0].Register
.WriteMask
= TGSI_WRITEMASK_XYZW
;
1187 new_inst
.Instruction
.NumSrcRegs
= 3;
1188 new_inst
.Src
[0].Register
.File
= TGSI_FILE_INPUT
;
1189 new_inst
.Src
[0].Register
.Index
= ctx
->face_idx
;
1190 new_inst
.Src
[0].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1191 new_inst
.Src
[0].Register
.SwizzleY
= TGSI_SWIZZLE_X
;
1192 new_inst
.Src
[0].Register
.SwizzleZ
= TGSI_SWIZZLE_X
;
1193 new_inst
.Src
[0].Register
.SwizzleW
= TGSI_SWIZZLE_X
;
1194 new_inst
.Src
[1].Register
.File
= TGSI_FILE_INPUT
;
1195 new_inst
.Src
[1].Register
.Index
= inbase
+ i
;
1196 new_inst
.Src
[1].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1197 new_inst
.Src
[1].Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
1198 new_inst
.Src
[1].Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
1199 new_inst
.Src
[1].Register
.SwizzleW
= TGSI_SWIZZLE_W
;
1200 new_inst
.Src
[2].Register
.File
= TGSI_FILE_INPUT
;
1201 new_inst
.Src
[2].Register
.Index
= ctx
->two_side_idx
[i
];
1202 new_inst
.Src
[2].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1203 new_inst
.Src
[2].Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
1204 new_inst
.Src
[2].Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
1205 new_inst
.Src
[2].Register
.SwizzleW
= TGSI_SWIZZLE_W
;
1207 tctx
->emit_instruction(tctx
, &new_inst
);
1212 emit_decls(struct tgsi_transform_context
*tctx
)
1214 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
1215 struct tgsi_shader_info
*info
= ctx
->info
;
1216 struct tgsi_full_declaration decl
;
1217 struct tgsi_full_immediate immed
;
1221 tmpbase
= info
->file_max
[TGSI_FILE_TEMPORARY
] + 1;
1223 ctx
->color_base
= tmpbase
+ ctx
->numtmp
;
1225 /* declare immediate: */
1226 immed
= tgsi_default_full_immediate();
1227 immed
.Immediate
.NrTokens
= 1 + 4; /* one for the token itself */
1228 immed
.u
[0].Float
= 0.0;
1229 immed
.u
[1].Float
= 1.0;
1230 immed
.u
[2].Float
= 128.0;
1231 immed
.u
[3].Float
= 0.0;
1232 tctx
->emit_immediate(tctx
, &immed
);
1234 ctx
->imm
.Register
.File
= TGSI_FILE_IMMEDIATE
;
1235 ctx
->imm
.Register
.Index
= info
->immediate_count
;
1236 ctx
->imm
.Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1237 ctx
->imm
.Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
1238 ctx
->imm
.Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
1239 ctx
->imm
.Register
.SwizzleW
= TGSI_SWIZZLE_W
;
1241 /* declare temp regs: */
1242 for (i
= 0; i
< ctx
->numtmp
; i
++) {
1243 decl
= tgsi_default_full_declaration();
1244 decl
.Declaration
.File
= TGSI_FILE_TEMPORARY
;
1245 decl
.Range
.First
= decl
.Range
.Last
= tmpbase
+ i
;
1246 tctx
->emit_declaration(tctx
, &decl
);
1248 ctx
->tmp
[i
].src
.Register
.File
= TGSI_FILE_TEMPORARY
;
1249 ctx
->tmp
[i
].src
.Register
.Index
= tmpbase
+ i
;
1250 ctx
->tmp
[i
].src
.Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1251 ctx
->tmp
[i
].src
.Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
1252 ctx
->tmp
[i
].src
.Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
1253 ctx
->tmp
[i
].src
.Register
.SwizzleW
= TGSI_SWIZZLE_W
;
1255 ctx
->tmp
[i
].dst
.Register
.File
= TGSI_FILE_TEMPORARY
;
1256 ctx
->tmp
[i
].dst
.Register
.Index
= tmpbase
+ i
;
1257 ctx
->tmp
[i
].dst
.Register
.WriteMask
= TGSI_WRITEMASK_XYZW
;
1260 if (ctx
->two_side_colors
)
1265 rename_color_inputs(struct fd_lowering_context
*ctx
,
1266 struct tgsi_full_instruction
*inst
)
1269 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
1270 struct tgsi_src_register
*src
= &inst
->Src
[i
].Register
;
1271 if (src
->File
== TGSI_FILE_INPUT
) {
1272 for (j
= 0; j
< ctx
->two_side_colors
; j
++) {
1273 if (src
->Index
== ctx
->two_side_idx
[j
]) {
1274 src
->File
= TGSI_FILE_TEMPORARY
;
1275 src
->Index
= ctx
->color_base
+ j
;
1285 transform_instr(struct tgsi_transform_context
*tctx
,
1286 struct tgsi_full_instruction
*inst
)
1288 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
1290 if (!ctx
->emitted_decls
) {
1292 ctx
->emitted_decls
= 1;
1295 /* if emulating two-sided-color, we need to re-write some
1298 if (ctx
->two_side_colors
)
1299 rename_color_inputs(ctx
, inst
);
1301 switch (inst
->Instruction
.Opcode
) {
1302 case TGSI_OPCODE_DST
:
1303 if (!ctx
->config
->lower_DST
)
1305 transform_dst(tctx
, inst
);
1307 case TGSI_OPCODE_XPD
:
1308 if (!ctx
->config
->lower_XPD
)
1310 transform_xpd(tctx
, inst
);
1312 case TGSI_OPCODE_SCS
:
1313 if (!ctx
->config
->lower_SCS
)
1315 transform_scs(tctx
, inst
);
1317 case TGSI_OPCODE_LRP
:
1318 if (!ctx
->config
->lower_LRP
)
1320 transform_lrp(tctx
, inst
);
1322 case TGSI_OPCODE_FRC
:
1323 if (!ctx
->config
->lower_FRC
)
1325 transform_frc(tctx
, inst
);
1327 case TGSI_OPCODE_POW
:
1328 if (!ctx
->config
->lower_POW
)
1330 transform_pow(tctx
, inst
);
1332 case TGSI_OPCODE_LIT
:
1333 if (!ctx
->config
->lower_LIT
)
1335 transform_lit(tctx
, inst
);
1337 case TGSI_OPCODE_EXP
:
1338 if (!ctx
->config
->lower_EXP
)
1340 transform_exp(tctx
, inst
);
1342 case TGSI_OPCODE_LOG
:
1343 if (!ctx
->config
->lower_LOG
)
1345 transform_log(tctx
, inst
);
1347 case TGSI_OPCODE_DP4
:
1348 if (!ctx
->config
->lower_DP4
)
1350 transform_dotp(tctx
, inst
);
1352 case TGSI_OPCODE_DP3
:
1353 if (!ctx
->config
->lower_DP3
)
1355 transform_dotp(tctx
, inst
);
1357 case TGSI_OPCODE_DPH
:
1358 if (!ctx
->config
->lower_DPH
)
1360 transform_dotp(tctx
, inst
);
1362 case TGSI_OPCODE_DP2
:
1363 if (!ctx
->config
->lower_DP2
)
1365 transform_dotp(tctx
, inst
);
1367 case TGSI_OPCODE_DP2A
:
1368 if (!ctx
->config
->lower_DP2A
)
1370 transform_dotp(tctx
, inst
);
1372 case TGSI_OPCODE_TEX
:
1373 case TGSI_OPCODE_TXP
:
1374 case TGSI_OPCODE_TXB
:
1375 case TGSI_OPCODE_TXB2
:
1376 case TGSI_OPCODE_TXL
:
1377 if (transform_samp(tctx
, inst
))
1382 tctx
->emit_instruction(tctx
, inst
);
1387 /* returns NULL if no lowering required, else returns the new
1388 * tokens (which caller is required to free()). In either case
1389 * returns the current info.
1391 const struct tgsi_token
*
1392 fd_transform_lowering(const struct fd_lowering_config
*config
,
1393 const struct tgsi_token
*tokens
,
1394 struct tgsi_shader_info
*info
)
1396 struct fd_lowering_context ctx
;
1397 struct tgsi_token
*newtoks
;
1400 /* sanity check in case limit is ever increased: */
1401 assert((sizeof(config
->saturate_s
) * 8) >= PIPE_MAX_SAMPLERS
);
1403 memset(&ctx
, 0, sizeof(ctx
));
1404 ctx
.base
.transform_instruction
= transform_instr
;
1406 ctx
.config
= config
;
1408 tgsi_scan_shader(tokens
, info
);
1410 /* if we are adding fragment shader support to emulate two-sided
1411 * color, then figure out the number of additional inputs we need
1412 * to create for BCOLOR's..
1414 if ((info
->processor
== TGSI_PROCESSOR_FRAGMENT
) &&
1415 config
->color_two_side
) {
1418 for (i
= 0; i
<= info
->file_max
[TGSI_FILE_INPUT
]; i
++) {
1419 if (info
->input_semantic_name
[i
] == TGSI_SEMANTIC_COLOR
)
1420 ctx
.two_side_idx
[ctx
.two_side_colors
++] = i
;
1421 if (info
->input_semantic_name
[i
] == TGSI_SEMANTIC_FACE
)
1426 ctx
.saturate
= config
->saturate_r
| config
->saturate_s
| config
->saturate_t
;
1428 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
1429 /* if there are no instructions to lower, then we are done: */
1444 ctx
.two_side_colors
||
1449 _debug_printf("BEFORE:");
1450 tgsi_dump(tokens
, 0);
1454 newlen
= tgsi_num_tokens(tokens
);
1456 newlen
+= DST_GROW
* OPCS(DST
);
1457 numtmp
= MAX2(numtmp
, DST_TMP
);
1460 newlen
+= XPD_GROW
* OPCS(XPD
);
1461 numtmp
= MAX2(numtmp
, XPD_TMP
);
1464 newlen
+= SCS_GROW
* OPCS(SCS
);
1465 numtmp
= MAX2(numtmp
, SCS_TMP
);
1468 newlen
+= LRP_GROW
* OPCS(LRP
);
1469 numtmp
= MAX2(numtmp
, LRP_TMP
);
1472 newlen
+= FRC_GROW
* OPCS(FRC
);
1473 numtmp
= MAX2(numtmp
, FRC_TMP
);
1476 newlen
+= POW_GROW
* OPCS(POW
);
1477 numtmp
= MAX2(numtmp
, POW_TMP
);
1480 newlen
+= LIT_GROW
* OPCS(LIT
);
1481 numtmp
= MAX2(numtmp
, LIT_TMP
);
1484 newlen
+= EXP_GROW
* OPCS(EXP
);
1485 numtmp
= MAX2(numtmp
, EXP_TMP
);
1488 newlen
+= LOG_GROW
* OPCS(LOG
);
1489 numtmp
= MAX2(numtmp
, LOG_TMP
);
1492 newlen
+= DP4_GROW
* OPCS(DP4
);
1493 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1496 newlen
+= DP3_GROW
* OPCS(DP3
);
1497 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1500 newlen
+= DPH_GROW
* OPCS(DPH
);
1501 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1504 newlen
+= DP2_GROW
* OPCS(DP2
);
1505 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1508 newlen
+= DP2A_GROW
* OPCS(DP2A
);
1509 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1512 int n
= info
->opcode_count
[TGSI_OPCODE_TEX
] +
1513 info
->opcode_count
[TGSI_OPCODE_TXP
] +
1514 info
->opcode_count
[TGSI_OPCODE_TXB
] +
1515 info
->opcode_count
[TGSI_OPCODE_TXB2
] +
1516 info
->opcode_count
[TGSI_OPCODE_TXL
];
1517 newlen
+= SAMP_GROW
* n
;
1518 numtmp
= MAX2(numtmp
, SAMP_TMP
);
1521 /* specifically don't include two_side_colors temps in the count: */
1522 ctx
.numtmp
= numtmp
;
1524 if (ctx
.two_side_colors
) {
1525 newlen
+= TWOSIDE_GROW(ctx
.two_side_colors
);
1526 /* note: we permanently consume temp regs, re-writing references
1527 * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
1528 * instruction that selects which varying to use):
1530 numtmp
+= ctx
.two_side_colors
;
1533 newlen
+= 2 * numtmp
;
1534 newlen
+= 5; /* immediate */
1536 newtoks
= tgsi_alloc_tokens(newlen
);
1540 tgsi_transform_shader(tokens
, newtoks
, newlen
, &ctx
.base
);
1542 tgsi_scan_shader(newtoks
, info
);
1545 _debug_printf("AFTER:");
1546 tgsi_dump(newtoks
, 0);