2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Rob Clark <robclark@freedesktop.org>
27 #include "tgsi/tgsi_transform.h"
28 #include "tgsi/tgsi_scan.h"
29 #include "tgsi/tgsi_dump.h"
31 #include "util/u_debug.h"
32 #include "util/u_math.h"
34 #include "tgsi_lowering.h"
36 struct tgsi_lowering_context
{
37 struct tgsi_transform_context base
;
38 const struct tgsi_lowering_config
*config
;
39 struct tgsi_shader_info
*info
;
40 unsigned two_side_colors
;
41 unsigned two_side_idx
[PIPE_MAX_SHADER_INPUTS
];
42 unsigned color_base
; /* base register for chosen COLOR/BCOLOR's */
46 struct tgsi_full_src_register src
;
47 struct tgsi_full_dst_register dst
;
51 struct tgsi_full_src_register imm
;
56 static inline struct tgsi_lowering_context
*
57 tgsi_lowering_context(struct tgsi_transform_context
*tctx
)
59 return (struct tgsi_lowering_context
*)tctx
;
67 reg_dst(struct tgsi_full_dst_register
*dst
,
68 const struct tgsi_full_dst_register
*orig_dst
, unsigned wrmask
)
71 dst
->Register
.WriteMask
&= wrmask
;
72 assert(dst
->Register
.WriteMask
);
76 get_swiz(unsigned *swiz
, const struct tgsi_src_register
*src
)
78 swiz
[0] = src
->SwizzleX
;
79 swiz
[1] = src
->SwizzleY
;
80 swiz
[2] = src
->SwizzleZ
;
81 swiz
[3] = src
->SwizzleW
;
85 reg_src(struct tgsi_full_src_register
*src
,
86 const struct tgsi_full_src_register
*orig_src
,
87 unsigned sx
, unsigned sy
, unsigned sz
, unsigned sw
)
90 get_swiz(swiz
, &orig_src
->Register
);
92 src
->Register
.SwizzleX
= swiz
[sx
];
93 src
->Register
.SwizzleY
= swiz
[sy
];
94 src
->Register
.SwizzleZ
= swiz
[sz
];
95 src
->Register
.SwizzleW
= swiz
[sw
];
98 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */
99 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \
100 TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
103 * if (dst.x aliases src.x) {
109 * MOV dst.zw, imm{0.0, 1.0}
112 aliases(const struct tgsi_full_dst_register
*dst
, unsigned dst_mask
,
113 const struct tgsi_full_src_register
*src
, unsigned src_mask
)
115 if ((dst
->Register
.File
== src
->Register
.File
) &&
116 (dst
->Register
.Index
== src
->Register
.Index
)) {
117 unsigned i
, actual_mask
= 0;
119 get_swiz(swiz
, &src
->Register
);
120 for (i
= 0; i
< 4; i
++)
121 if (src_mask
& (1 << i
))
122 actual_mask
|= (1 << swiz
[i
]);
123 if (actual_mask
& dst_mask
)
130 create_mov(struct tgsi_transform_context
*tctx
,
131 const struct tgsi_full_dst_register
*dst
,
132 const struct tgsi_full_src_register
*src
,
133 unsigned mask
, unsigned saturate
)
135 struct tgsi_full_instruction new_inst
;
137 new_inst
= tgsi_default_full_instruction();
138 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
139 new_inst
.Instruction
.Saturate
= saturate
;
140 new_inst
.Instruction
.NumDstRegs
= 1;
141 reg_dst(&new_inst
.Dst
[0], dst
, mask
);
142 new_inst
.Instruction
.NumSrcRegs
= 1;
143 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, Y
, Z
, W
));
144 tctx
->emit_instruction(tctx
, &new_inst
);
147 /* to help calculate # of tgsi tokens for a lowering.. we assume
148 * the worst case, ie. removed instructions don't have ADDR[] or
149 * anything which increases the # of tokens per src/dst and the
150 * inserted instructions do.
152 * OINST() - old instruction
153 * 1 : instruction itself
157 * NINST() - new instruction
158 * 1 : instruction itself
163 #define OINST(nargs) (1 + 1 + 1 * (nargs))
164 #define NINST(nargs) (1 + 2 + 2 * (nargs))
167 * Lowering Translators:
170 /* DST - Distance Vector
172 * dst.y = src0.y \times src1.y
176 * ; note: could be more clever and use just a single temp
177 * ; if I was clever enough to re-write the swizzles.
178 * ; needs: 2 tmp, imm{1.0}
179 * if (dst.y aliases src0.z) {
180 * MOV tmpA.yz, src0.yz
183 * if (dst.yz aliases src1.w) {
184 * MOV tmpB.yw, src1.yw
187 * MUL dst.y, src0.y, src1.y
190 * MOV dst.x, imm{1.0}
192 #define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
193 NINST(1) + NINST(1) - OINST(2))
196 transform_dst(struct tgsi_transform_context
*tctx
,
197 struct tgsi_full_instruction
*inst
)
199 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
200 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
201 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
202 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
203 struct tgsi_full_instruction new_inst
;
205 if (aliases(dst
, TGSI_WRITEMASK_Y
, src0
, TGSI_WRITEMASK_Z
)) {
206 create_mov(tctx
, &ctx
->tmp
[A
].dst
, src0
, TGSI_WRITEMASK_YZ
, 0);
207 src0
= &ctx
->tmp
[A
].src
;
210 if (aliases(dst
, TGSI_WRITEMASK_YZ
, src1
, TGSI_WRITEMASK_W
)) {
211 create_mov(tctx
, &ctx
->tmp
[B
].dst
, src1
, TGSI_WRITEMASK_YW
, 0);
212 src1
= &ctx
->tmp
[B
].src
;
215 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
216 /* MUL dst.y, src0.y, src1.y */
217 new_inst
= tgsi_default_full_instruction();
218 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
219 new_inst
.Instruction
.NumDstRegs
= 1;
220 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Y
);
221 new_inst
.Instruction
.NumSrcRegs
= 2;
222 reg_src(&new_inst
.Src
[0], src0
, SWIZ(_
, Y
, _
, _
));
223 reg_src(&new_inst
.Src
[1], src1
, SWIZ(_
, Y
, _
, _
));
224 tctx
->emit_instruction(tctx
, &new_inst
);
227 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
228 /* MOV dst.z, src0.z */
229 new_inst
= tgsi_default_full_instruction();
230 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
231 new_inst
.Instruction
.NumDstRegs
= 1;
232 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Z
);
233 new_inst
.Instruction
.NumSrcRegs
= 1;
234 reg_src(&new_inst
.Src
[0], src0
, SWIZ(_
, _
, Z
, _
));
235 tctx
->emit_instruction(tctx
, &new_inst
);
238 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_W
) {
239 /* MOV dst.w, src1.w */
240 new_inst
= tgsi_default_full_instruction();
241 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
242 new_inst
.Instruction
.NumDstRegs
= 1;
243 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_W
);
244 new_inst
.Instruction
.NumSrcRegs
= 1;
245 reg_src(&new_inst
.Src
[0], src1
, SWIZ(_
, _
, _
, W
));
246 tctx
->emit_instruction(tctx
, &new_inst
);
249 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_X
) {
250 /* MOV dst.x, imm{1.0} */
251 new_inst
= tgsi_default_full_instruction();
252 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
253 new_inst
.Instruction
.NumDstRegs
= 1;
254 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_X
);
255 new_inst
.Instruction
.NumSrcRegs
= 1;
256 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(Y
, _
, _
, _
));
257 tctx
->emit_instruction(tctx
, &new_inst
);
261 /* LRP - Linear Interpolate
262 * dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
263 * dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
264 * dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
265 * dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
267 * This becomes: src0 \times src1 + src2 - src0 \times src2, which
268 * can then become: src0 \times src1 - (src0 \times src2 - src2)
271 * MAD tmpA, src0, src2, -src2
272 * MAD dst, src0, src1, -tmpA
274 #define LRP_GROW (NINST(3) + NINST(3) - OINST(3))
277 transform_lrp(struct tgsi_transform_context
*tctx
,
278 struct tgsi_full_instruction
*inst
)
280 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
281 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
282 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
283 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
284 struct tgsi_full_src_register
*src2
= &inst
->Src
[2];
285 struct tgsi_full_instruction new_inst
;
287 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
288 /* MAD tmpA, src0, src2, -src2 */
289 new_inst
= tgsi_default_full_instruction();
290 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAD
;
291 new_inst
.Instruction
.NumDstRegs
= 1;
292 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XYZW
);
293 new_inst
.Instruction
.NumSrcRegs
= 3;
294 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
, Y
, Z
, W
));
295 reg_src(&new_inst
.Src
[1], src2
, SWIZ(X
, Y
, Z
, W
));
296 reg_src(&new_inst
.Src
[2], src2
, SWIZ(X
, Y
, Z
, W
));
297 new_inst
.Src
[2].Register
.Negate
= !new_inst
.Src
[2].Register
.Negate
;
298 tctx
->emit_instruction(tctx
, &new_inst
);
300 /* MAD dst, src0, src1, -tmpA */
301 new_inst
= tgsi_default_full_instruction();
302 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAD
;
303 new_inst
.Instruction
.NumDstRegs
= 1;
304 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
305 new_inst
.Instruction
.NumSrcRegs
= 3;
306 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
, Y
, Z
, W
));
307 reg_src(&new_inst
.Src
[1], src1
, SWIZ(X
, Y
, Z
, W
));
308 reg_src(&new_inst
.Src
[2], &ctx
->tmp
[A
].src
, SWIZ(X
, Y
, Z
, W
));
309 new_inst
.Src
[2].Register
.Negate
= true;
310 tctx
->emit_instruction(tctx
, &new_inst
);
315 * dst.x = src.x - \lfloor src.x\rfloor
316 * dst.y = src.y - \lfloor src.y\rfloor
317 * dst.z = src.z - \lfloor src.z\rfloor
318 * dst.w = src.w - \lfloor src.w\rfloor
324 #define FRC_GROW (NINST(1) + NINST(2) - OINST(1))
327 transform_frc(struct tgsi_transform_context
*tctx
,
328 struct tgsi_full_instruction
*inst
)
330 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
331 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
332 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
333 struct tgsi_full_instruction new_inst
;
335 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
337 new_inst
= tgsi_default_full_instruction();
338 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FLR
;
339 new_inst
.Instruction
.NumDstRegs
= 1;
340 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XYZW
);
341 new_inst
.Instruction
.NumSrcRegs
= 1;
342 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, Y
, Z
, W
));
343 tctx
->emit_instruction(tctx
, &new_inst
);
345 /* SUB dst, src, tmpA */
346 new_inst
= tgsi_default_full_instruction();
347 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
348 new_inst
.Instruction
.NumDstRegs
= 1;
349 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
350 new_inst
.Instruction
.NumSrcRegs
= 2;
351 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, Y
, Z
, W
));
352 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
, Y
, Z
, W
));
353 new_inst
.Src
[1].Register
.Negate
= 1;
354 tctx
->emit_instruction(tctx
, &new_inst
);
359 * dst.x = src0.x^{src1.x}
360 * dst.y = src0.x^{src1.x}
361 * dst.z = src0.x^{src1.x}
362 * dst.w = src0.x^{src1.x}
366 * MUL tmpA.x, src1.x, tmpA.x
369 #define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2))
372 transform_pow(struct tgsi_transform_context
*tctx
,
373 struct tgsi_full_instruction
*inst
)
375 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
376 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
377 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
378 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
379 struct tgsi_full_instruction new_inst
;
381 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
382 /* LG2 tmpA.x, src0.x */
383 new_inst
= tgsi_default_full_instruction();
384 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_LG2
;
385 new_inst
.Instruction
.NumDstRegs
= 1;
386 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
387 new_inst
.Instruction
.NumSrcRegs
= 1;
388 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
, _
, _
, _
));
389 tctx
->emit_instruction(tctx
, &new_inst
);
391 /* MUL tmpA.x, src1.x, tmpA.x */
392 new_inst
= tgsi_default_full_instruction();
393 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
394 new_inst
.Instruction
.NumDstRegs
= 1;
395 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
396 new_inst
.Instruction
.NumSrcRegs
= 2;
397 reg_src(&new_inst
.Src
[0], src1
, SWIZ(X
, _
, _
, _
));
398 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
, _
, _
, _
));
399 tctx
->emit_instruction(tctx
, &new_inst
);
401 /* EX2 dst, tmpA.x */
402 new_inst
= tgsi_default_full_instruction();
403 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
404 new_inst
.Instruction
.NumDstRegs
= 1;
405 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
406 new_inst
.Instruction
.NumSrcRegs
= 1;
407 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
, _
, _
, _
));
408 tctx
->emit_instruction(tctx
, &new_inst
);
412 /* LIT - Light Coefficients
414 * dst.y = max(src.x, 0.0)
415 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
418 * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
419 * MAX tmpA.xy, src.xy, imm{0.0}
420 * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
422 * MUL tmpA.y, tmpA.z, tmpA.y
424 * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
425 * MOV dst.yz, tmpA.xy
426 * MOV dst.xw, imm{1.0}
428 #define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \
429 NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1))
432 transform_lit(struct tgsi_transform_context
*tctx
,
433 struct tgsi_full_instruction
*inst
)
435 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
436 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
437 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
438 struct tgsi_full_instruction new_inst
;
440 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_YZ
) {
441 /* MAX tmpA.xy, src.xy, imm{0.0} */
442 new_inst
= tgsi_default_full_instruction();
443 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAX
;
444 new_inst
.Instruction
.NumDstRegs
= 1;
445 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XY
);
446 new_inst
.Instruction
.NumSrcRegs
= 2;
447 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, Y
, _
, _
));
448 reg_src(&new_inst
.Src
[1], &ctx
->imm
, SWIZ(X
, X
, _
, _
));
449 tctx
->emit_instruction(tctx
, &new_inst
);
451 /* MIN tmpA.z, src.w, imm{128.0} */
452 new_inst
= tgsi_default_full_instruction();
453 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MIN
;
454 new_inst
.Instruction
.NumDstRegs
= 1;
455 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Z
);
456 new_inst
.Instruction
.NumSrcRegs
= 2;
457 reg_src(&new_inst
.Src
[0], src
, SWIZ(_
, _
, W
, _
));
458 reg_src(&new_inst
.Src
[1], &ctx
->imm
, SWIZ(_
, _
, Z
, _
));
459 tctx
->emit_instruction(tctx
, &new_inst
);
461 /* MAX tmpA.z, tmpA.z, -imm{128.0} */
462 new_inst
= tgsi_default_full_instruction();
463 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAX
;
464 new_inst
.Instruction
.NumDstRegs
= 1;
465 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Z
);
466 new_inst
.Instruction
.NumSrcRegs
= 2;
467 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
, _
, Z
, _
));
468 reg_src(&new_inst
.Src
[1], &ctx
->imm
, SWIZ(_
, _
, Z
, _
));
469 new_inst
.Src
[1].Register
.Negate
= true;
470 tctx
->emit_instruction(tctx
, &new_inst
);
472 /* LG2 tmpA.y, tmpA.y */
473 new_inst
= tgsi_default_full_instruction();
474 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_LG2
;
475 new_inst
.Instruction
.NumDstRegs
= 1;
476 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
477 new_inst
.Instruction
.NumSrcRegs
= 1;
478 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Y
, _
, _
, _
));
479 tctx
->emit_instruction(tctx
, &new_inst
);
481 /* MUL tmpA.y, tmpA.z, tmpA.y */
482 new_inst
= tgsi_default_full_instruction();
483 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
484 new_inst
.Instruction
.NumDstRegs
= 1;
485 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
486 new_inst
.Instruction
.NumSrcRegs
= 2;
487 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
, Z
, _
, _
));
488 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(_
, Y
, _
, _
));
489 tctx
->emit_instruction(tctx
, &new_inst
);
491 /* EX2 tmpA.y, tmpA.y */
492 new_inst
= tgsi_default_full_instruction();
493 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
494 new_inst
.Instruction
.NumDstRegs
= 1;
495 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
496 new_inst
.Instruction
.NumSrcRegs
= 1;
497 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Y
, _
, _
, _
));
498 tctx
->emit_instruction(tctx
, &new_inst
);
500 /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
501 new_inst
= tgsi_default_full_instruction();
502 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_CMP
;
503 new_inst
.Instruction
.NumDstRegs
= 1;
504 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
505 new_inst
.Instruction
.NumSrcRegs
= 3;
506 reg_src(&new_inst
.Src
[0], src
, SWIZ(_
, X
, _
, _
));
507 new_inst
.Src
[0].Register
.Negate
= true;
508 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(_
, Y
, _
, _
));
509 reg_src(&new_inst
.Src
[2], &ctx
->imm
, SWIZ(_
, X
, _
, _
));
510 tctx
->emit_instruction(tctx
, &new_inst
);
512 /* MOV dst.yz, tmpA.xy */
513 new_inst
= tgsi_default_full_instruction();
514 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
515 new_inst
.Instruction
.NumDstRegs
= 1;
516 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_YZ
);
517 new_inst
.Instruction
.NumSrcRegs
= 1;
518 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
, X
, Y
, _
));
519 tctx
->emit_instruction(tctx
, &new_inst
);
522 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XW
) {
523 /* MOV dst.xw, imm{1.0} */
524 new_inst
= tgsi_default_full_instruction();
525 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
526 new_inst
.Instruction
.NumDstRegs
= 1;
527 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XW
);
528 new_inst
.Instruction
.NumSrcRegs
= 1;
529 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(Y
, _
, _
, Y
));
530 tctx
->emit_instruction(tctx
, &new_inst
);
534 /* EXP - Approximate Exponential Base 2
535 * dst.x = 2^{\lfloor src.x\rfloor}
536 * dst.y = src.x - \lfloor src.x\rfloor
540 * ; needs: 1 tmp, imm{1.0}
541 * if (lowering FLR) {
543 * SUB tmpA.x, src.x, tmpA.x
548 * SUB dst.y, src.x, tmpA.x
551 * MOV dst.w, imm{1.0}
553 #define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \
554 NINST(1)+ NINST(1) - OINST(1))
557 transform_exp(struct tgsi_transform_context
*tctx
,
558 struct tgsi_full_instruction
*inst
)
560 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
561 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
562 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
563 struct tgsi_full_instruction new_inst
;
565 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XY
) {
566 if (ctx
->config
->lower_FLR
) {
567 /* FRC tmpA.x, src.x */
568 new_inst
= tgsi_default_full_instruction();
569 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FRC
;
570 new_inst
.Instruction
.NumDstRegs
= 1;
571 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
572 new_inst
.Instruction
.NumSrcRegs
= 1;
573 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, _
, _
, _
));
574 tctx
->emit_instruction(tctx
, &new_inst
);
576 /* SUB tmpA.x, src.x, tmpA.x */
577 new_inst
= tgsi_default_full_instruction();
578 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
579 new_inst
.Instruction
.NumDstRegs
= 1;
580 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
581 new_inst
.Instruction
.NumSrcRegs
= 2;
582 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, _
, _
, _
));
583 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
, _
, _
, _
));
584 new_inst
.Src
[1].Register
.Negate
= 1;
585 tctx
->emit_instruction(tctx
, &new_inst
);
587 /* FLR tmpA.x, src.x */
588 new_inst
= tgsi_default_full_instruction();
589 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FLR
;
590 new_inst
.Instruction
.NumDstRegs
= 1;
591 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
592 new_inst
.Instruction
.NumSrcRegs
= 1;
593 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, _
, _
, _
));
594 tctx
->emit_instruction(tctx
, &new_inst
);
598 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
599 /* EX2 tmpA.y, src.x */
600 new_inst
= tgsi_default_full_instruction();
601 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
602 new_inst
.Instruction
.NumDstRegs
= 1;
603 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
604 new_inst
.Instruction
.NumSrcRegs
= 1;
605 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, _
, _
, _
));
606 tctx
->emit_instruction(tctx
, &new_inst
);
609 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
610 /* SUB dst.y, src.x, tmpA.x */
611 new_inst
= tgsi_default_full_instruction();
612 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
613 new_inst
.Instruction
.NumDstRegs
= 1;
614 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Y
);
615 new_inst
.Instruction
.NumSrcRegs
= 2;
616 reg_src(&new_inst
.Src
[0], src
, SWIZ(_
, X
, _
, _
));
617 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(_
, X
, _
, _
));
618 new_inst
.Src
[1].Register
.Negate
= 1;
619 tctx
->emit_instruction(tctx
, &new_inst
);
622 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_X
) {
623 /* EX2 dst.x, tmpA.x */
624 new_inst
= tgsi_default_full_instruction();
625 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
626 new_inst
.Instruction
.NumDstRegs
= 1;
627 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_X
);
628 new_inst
.Instruction
.NumSrcRegs
= 1;
629 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
, _
, _
, _
));
630 tctx
->emit_instruction(tctx
, &new_inst
);
633 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
634 /* MOV dst.z, tmpA.y */
635 new_inst
= tgsi_default_full_instruction();
636 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
637 new_inst
.Instruction
.NumDstRegs
= 1;
638 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Z
);
639 new_inst
.Instruction
.NumSrcRegs
= 1;
640 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
, _
, Y
, _
));
641 tctx
->emit_instruction(tctx
, &new_inst
);
644 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_W
) {
645 /* MOV dst.w, imm{1.0} */
646 new_inst
= tgsi_default_full_instruction();
647 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
648 new_inst
.Instruction
.NumDstRegs
= 1;
649 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_W
);
650 new_inst
.Instruction
.NumSrcRegs
= 1;
651 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(_
, _
, _
, Y
));
652 tctx
->emit_instruction(tctx
, &new_inst
);
656 /* LOG - Approximate Logarithm Base 2
657 * dst.x = \lfloor\log_2{|src.x|}\rfloor
658 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
659 * dst.z = \log_2{|src.x|}
662 * ; needs: 1 tmp, imm{1.0}
663 * LG2 tmpA.x, |src.x|
664 * if (lowering FLR) {
666 * SUB tmpA.y, tmpA.x, tmpA.y
672 * MUL dst.y, |src.x|, tmpA.z
673 * MOV dst.xz, tmpA.yx
674 * MOV dst.w, imm{1.0}
676 #define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \
677 NINST(2) + NINST(1) + NINST(1) - OINST(1))
680 transform_log(struct tgsi_transform_context
*tctx
,
681 struct tgsi_full_instruction
*inst
)
683 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
684 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
685 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
686 struct tgsi_full_instruction new_inst
;
688 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZ
) {
689 /* LG2 tmpA.x, |src.x| */
690 new_inst
= tgsi_default_full_instruction();
691 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_LG2
;
692 new_inst
.Instruction
.NumDstRegs
= 1;
693 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
694 new_inst
.Instruction
.NumSrcRegs
= 1;
695 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
, _
, _
, _
));
696 new_inst
.Src
[0].Register
.Absolute
= true;
697 tctx
->emit_instruction(tctx
, &new_inst
);
700 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XY
) {
701 if (ctx
->config
->lower_FLR
) {
702 /* FRC tmpA.y, tmpA.x */
703 new_inst
= tgsi_default_full_instruction();
704 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FRC
;
705 new_inst
.Instruction
.NumDstRegs
= 1;
706 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
707 new_inst
.Instruction
.NumSrcRegs
= 1;
708 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
, X
, _
, _
));
709 tctx
->emit_instruction(tctx
, &new_inst
);
711 /* SUB tmpA.y, tmpA.x, tmpA.y */
712 new_inst
= tgsi_default_full_instruction();
713 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
714 new_inst
.Instruction
.NumDstRegs
= 1;
715 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
716 new_inst
.Instruction
.NumSrcRegs
= 2;
717 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
, X
, _
, _
));
718 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(_
, Y
, _
, _
));
719 new_inst
.Src
[1].Register
.Negate
= 1;
720 tctx
->emit_instruction(tctx
, &new_inst
);
722 /* FLR tmpA.y, tmpA.x */
723 new_inst
= tgsi_default_full_instruction();
724 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FLR
;
725 new_inst
.Instruction
.NumDstRegs
= 1;
726 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
727 new_inst
.Instruction
.NumSrcRegs
= 1;
728 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
, X
, _
, _
));
729 tctx
->emit_instruction(tctx
, &new_inst
);
733 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
734 /* EX2 tmpA.z, tmpA.y */
735 new_inst
= tgsi_default_full_instruction();
736 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
737 new_inst
.Instruction
.NumDstRegs
= 1;
738 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Z
);
739 new_inst
.Instruction
.NumSrcRegs
= 1;
740 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Y
, _
, _
, _
));
741 tctx
->emit_instruction(tctx
, &new_inst
);
743 /* RCP tmpA.z, tmpA.z */
744 new_inst
= tgsi_default_full_instruction();
745 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_RCP
;
746 new_inst
.Instruction
.NumDstRegs
= 1;
747 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Z
);
748 new_inst
.Instruction
.NumSrcRegs
= 1;
749 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Z
, _
, _
, _
));
750 tctx
->emit_instruction(tctx
, &new_inst
);
752 /* MUL dst.y, |src.x|, tmpA.z */
753 new_inst
= tgsi_default_full_instruction();
754 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
755 new_inst
.Instruction
.NumDstRegs
= 1;
756 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Y
);
757 new_inst
.Instruction
.NumSrcRegs
= 2;
758 reg_src(&new_inst
.Src
[0], src
, SWIZ(_
, X
, _
, _
));
759 new_inst
.Src
[0].Register
.Absolute
= true;
760 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(_
, Z
, _
, _
));
761 tctx
->emit_instruction(tctx
, &new_inst
);
764 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XZ
) {
765 /* MOV dst.xz, tmpA.yx */
766 new_inst
= tgsi_default_full_instruction();
767 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
768 new_inst
.Instruction
.NumDstRegs
= 1;
769 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XZ
);
770 new_inst
.Instruction
.NumSrcRegs
= 1;
771 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Y
, _
, X
, _
));
772 tctx
->emit_instruction(tctx
, &new_inst
);
775 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_W
) {
776 /* MOV dst.w, imm{1.0} */
777 new_inst
= tgsi_default_full_instruction();
778 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
779 new_inst
.Instruction
.NumDstRegs
= 1;
780 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_W
);
781 new_inst
.Instruction
.NumSrcRegs
= 1;
782 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(_
, _
, _
, Y
));
783 tctx
->emit_instruction(tctx
, &new_inst
);
787 /* DP4 - 4-component Dot Product
788 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
790 * DP3 - 3-component Dot Product
791 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
793 * DP2 - 2-component Dot Product
794 * dst = src0.x \times src1.x + src0.y \times src1.y
796 * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
797 * operations, which is what you'd prefer for a ISA that is natively
798 * scalar. Probably a native vector ISA would at least already have
799 * DP4/DP3 instructions, but perhaps there is room for an alternative
800 * translation for DP2 using vector instructions.
803 * MUL tmpA.x, src0.x, src1.x
804 * MAD tmpA.x, src0.y, src1.y, tmpA.x
806 * MAD tmpA.x, src0.z, src1.z, tmpA.x
808 * MAD tmpA.x, src0.w, src1.w, tmpA.x
811 * ; fixup last instruction to replicate into dst
813 #define DP4_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
814 #define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2))
815 #define DP2_GROW (NINST(2) + NINST(3) - OINST(2))
818 transform_dotp(struct tgsi_transform_context
*tctx
,
819 struct tgsi_full_instruction
*inst
)
821 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
822 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
823 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
824 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
825 struct tgsi_full_instruction new_inst
;
826 enum tgsi_opcode opcode
= inst
->Instruction
.Opcode
;
828 /* NOTE: any potential last instruction must replicate src on all
829 * components (since it could be re-written to write to final dst)
832 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
833 /* MUL tmpA.x, src0.x, src1.x */
834 new_inst
= tgsi_default_full_instruction();
835 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
836 new_inst
.Instruction
.NumDstRegs
= 1;
837 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
838 new_inst
.Instruction
.NumSrcRegs
= 2;
839 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
, _
, _
, _
));
840 reg_src(&new_inst
.Src
[1], src1
, SWIZ(X
, _
, _
, _
));
841 tctx
->emit_instruction(tctx
, &new_inst
);
843 /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
844 new_inst
= tgsi_default_full_instruction();
845 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAD
;
846 new_inst
.Instruction
.NumDstRegs
= 1;
847 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
848 new_inst
.Instruction
.NumSrcRegs
= 3;
849 reg_src(&new_inst
.Src
[0], src0
, SWIZ(Y
, Y
, Y
, Y
));
850 reg_src(&new_inst
.Src
[1], src1
, SWIZ(Y
, Y
, Y
, Y
));
851 reg_src(&new_inst
.Src
[2], &ctx
->tmp
[A
].src
, SWIZ(X
, X
, X
, X
));
853 if ((opcode
== TGSI_OPCODE_DP3
) ||
854 (opcode
== TGSI_OPCODE_DP4
)) {
855 tctx
->emit_instruction(tctx
, &new_inst
);
857 /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
858 new_inst
= tgsi_default_full_instruction();
859 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAD
;
860 new_inst
.Instruction
.NumDstRegs
= 1;
861 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
862 new_inst
.Instruction
.NumSrcRegs
= 3;
863 reg_src(&new_inst
.Src
[0], src0
, SWIZ(Z
, Z
, Z
, Z
));
864 reg_src(&new_inst
.Src
[1], src1
, SWIZ(Z
, Z
, Z
, Z
));
865 reg_src(&new_inst
.Src
[2], &ctx
->tmp
[A
].src
, SWIZ(X
, X
, X
, X
));
867 if (opcode
== TGSI_OPCODE_DP4
) {
868 tctx
->emit_instruction(tctx
, &new_inst
);
870 /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
871 new_inst
= tgsi_default_full_instruction();
872 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAD
;
873 new_inst
.Instruction
.NumDstRegs
= 1;
874 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
875 new_inst
.Instruction
.NumSrcRegs
= 3;
876 reg_src(&new_inst
.Src
[0], src0
, SWIZ(W
, W
, W
, W
));
877 reg_src(&new_inst
.Src
[1], src1
, SWIZ(W
, W
, W
, W
));
878 reg_src(&new_inst
.Src
[2], &ctx
->tmp
[A
].src
, SWIZ(X
, X
, X
, X
));
882 /* fixup last instruction to write to dst: */
883 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
885 tctx
->emit_instruction(tctx
, &new_inst
);
889 /* FLR - floor, CEIL - ceil
899 #define FLR_GROW (NINST(1) + NINST(2) - OINST(1))
900 #define CEIL_GROW (NINST(1) + NINST(2) - OINST(1))
904 transform_flr_ceil(struct tgsi_transform_context
*tctx
,
905 struct tgsi_full_instruction
*inst
)
907 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
908 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
909 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
910 struct tgsi_full_instruction new_inst
;
911 enum tgsi_opcode opcode
= inst
->Instruction
.Opcode
;
913 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
914 /* FLR: FRC tmpA, src CEIL: FRC tmpA, -src */
915 new_inst
= tgsi_default_full_instruction();
916 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FRC
;
917 new_inst
.Instruction
.NumDstRegs
= 1;
918 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XYZW
);
919 new_inst
.Instruction
.NumSrcRegs
= 1;
920 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
, Y
, Z
, W
));
922 if (opcode
== TGSI_OPCODE_CEIL
)
923 new_inst
.Src
[0].Register
.Negate
= !new_inst
.Src
[0].Register
.Negate
;
924 tctx
->emit_instruction(tctx
, &new_inst
);
926 /* FLR: SUB dst, src, tmpA CEIL: ADD dst, src, tmpA */
927 new_inst
= tgsi_default_full_instruction();
928 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
929 new_inst
.Instruction
.NumDstRegs
= 1;
930 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
931 new_inst
.Instruction
.NumSrcRegs
= 2;
932 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
, Y
, Z
, W
));
933 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
, Y
, Z
, W
));
934 if (opcode
== TGSI_OPCODE_FLR
)
935 new_inst
.Src
[1].Register
.Negate
= 1;
936 tctx
->emit_instruction(tctx
, &new_inst
);
940 /* TRUNC - truncate off fractional part
941 * dst.x = trunc(src.x)
942 * dst.y = trunc(src.y)
943 * dst.z = trunc(src.z)
944 * dst.w = trunc(src.w)
949 * SUB tmpA, |src|, tmpA
953 * CMP dst, src, -tmpA, tmpA
955 #define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1))
958 transform_trunc(struct tgsi_transform_context
*tctx
,
959 struct tgsi_full_instruction
*inst
)
961 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
962 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
963 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
964 struct tgsi_full_instruction new_inst
;
966 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
967 if (ctx
->config
->lower_FLR
) {
968 new_inst
= tgsi_default_full_instruction();
969 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FRC
;
970 new_inst
.Instruction
.NumDstRegs
= 1;
971 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XYZW
);
972 new_inst
.Instruction
.NumSrcRegs
= 1;
973 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
, Y
, Z
, W
));
974 new_inst
.Src
[0].Register
.Absolute
= true;
975 new_inst
.Src
[0].Register
.Negate
= false;
976 tctx
->emit_instruction(tctx
, &new_inst
);
978 new_inst
= tgsi_default_full_instruction();
979 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
980 new_inst
.Instruction
.NumDstRegs
= 1;
981 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XYZW
);
982 new_inst
.Instruction
.NumSrcRegs
= 2;
983 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
, Y
, Z
, W
));
984 new_inst
.Src
[0].Register
.Absolute
= true;
985 new_inst
.Src
[0].Register
.Negate
= false;
986 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
, Y
, Z
, W
));
987 new_inst
.Src
[1].Register
.Negate
= 1;
988 tctx
->emit_instruction(tctx
, &new_inst
);
990 new_inst
= tgsi_default_full_instruction();
991 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FLR
;
992 new_inst
.Instruction
.NumDstRegs
= 1;
993 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XYZW
);
994 new_inst
.Instruction
.NumSrcRegs
= 1;
995 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
, Y
, Z
, W
));
996 new_inst
.Src
[0].Register
.Absolute
= true;
997 new_inst
.Src
[0].Register
.Negate
= false;
998 tctx
->emit_instruction(tctx
, &new_inst
);
1001 new_inst
= tgsi_default_full_instruction();
1002 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_CMP
;
1003 new_inst
.Instruction
.NumDstRegs
= 1;
1004 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
1005 new_inst
.Instruction
.NumSrcRegs
= 3;
1006 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
, Y
, Z
, W
));
1007 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
, Y
, Z
, W
));
1008 new_inst
.Src
[1].Register
.Negate
= true;
1009 reg_src(&new_inst
.Src
[2], &ctx
->tmp
[A
].src
, SWIZ(X
, Y
, Z
, W
));
1010 tctx
->emit_instruction(tctx
, &new_inst
);
1014 /* Inserts a MOV_SAT for the needed components of tex coord. Note that
1015 * in the case of TXP, the clamping must happen *after* projection, so
1016 * we need to lower TXP to TEX.
1020 * ; do perspective division manually before clamping:
1022 * MUL tmpB.<pmask>, tmpA, tmpB.xxxx
1025 * MOV_SAT tmpA.<mask>, tmpA ; <mask> is the clamped s/t/r coords
1026 * <opc> dst, tmpA, ...
1028 #define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1))
1031 transform_samp(struct tgsi_transform_context
*tctx
,
1032 struct tgsi_full_instruction
*inst
)
1034 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
1035 struct tgsi_full_src_register
*coord
= &inst
->Src
[0];
1036 struct tgsi_full_src_register
*samp
;
1037 struct tgsi_full_instruction new_inst
;
1038 /* mask is clamped coords, pmask is all coords (for projection): */
1039 unsigned mask
= 0, pmask
= 0, smask
;
1040 unsigned tex
= inst
->Texture
.Texture
;
1041 enum tgsi_opcode opcode
= inst
->Instruction
.Opcode
;
1042 bool lower_txp
= (opcode
== TGSI_OPCODE_TXP
) &&
1043 (ctx
->config
->lower_TXP
& (1 << tex
));
1045 if (opcode
== TGSI_OPCODE_TXB2
) {
1046 samp
= &inst
->Src
[2];
1048 samp
= &inst
->Src
[1];
1051 /* convert sampler # to bitmask to test: */
1052 smask
= 1 << samp
->Register
.Index
;
1054 /* check if we actually need to lower this one: */
1055 if (!(ctx
->saturate
& smask
) && !lower_txp
)
1058 /* figure out which coordinates need saturating:
1059 * - RECT textures should not get saturated
1060 * - array index coords should not get saturated
1063 case TGSI_TEXTURE_3D
:
1064 case TGSI_TEXTURE_CUBE
:
1065 case TGSI_TEXTURE_CUBE_ARRAY
:
1066 case TGSI_TEXTURE_SHADOWCUBE
:
1067 case TGSI_TEXTURE_SHADOWCUBE_ARRAY
:
1068 if (ctx
->config
->saturate_r
& smask
)
1069 mask
|= TGSI_WRITEMASK_Z
;
1070 pmask
|= TGSI_WRITEMASK_Z
;
1073 case TGSI_TEXTURE_2D
:
1074 case TGSI_TEXTURE_2D_ARRAY
:
1075 case TGSI_TEXTURE_SHADOW2D
:
1076 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
1077 case TGSI_TEXTURE_2D_MSAA
:
1078 case TGSI_TEXTURE_2D_ARRAY_MSAA
:
1079 if (ctx
->config
->saturate_t
& smask
)
1080 mask
|= TGSI_WRITEMASK_Y
;
1081 pmask
|= TGSI_WRITEMASK_Y
;
1084 case TGSI_TEXTURE_1D
:
1085 case TGSI_TEXTURE_1D_ARRAY
:
1086 case TGSI_TEXTURE_SHADOW1D
:
1087 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
1088 if (ctx
->config
->saturate_s
& smask
)
1089 mask
|= TGSI_WRITEMASK_X
;
1090 pmask
|= TGSI_WRITEMASK_X
;
1093 case TGSI_TEXTURE_RECT
:
1094 case TGSI_TEXTURE_SHADOWRECT
:
1095 /* we don't saturate, but in case of lower_txp we
1096 * still need to do the perspective divide:
1098 pmask
= TGSI_WRITEMASK_XY
;
1102 /* sanity check.. driver could be asking to saturate a non-
1103 * existent coordinate component:
1105 if (!mask
&& !lower_txp
)
1108 /* MOV tmpA, src0 */
1109 create_mov(tctx
, &ctx
->tmp
[A
].dst
, coord
, TGSI_WRITEMASK_XYZW
, 0);
1111 /* This is a bit sad.. we need to clamp *after* the coords
1112 * are projected, which means lowering TXP to TEX and doing
1113 * the projection ourself. But since I haven't figured out
1114 * how to make the lowering code deliver an electric shock
1115 * to anyone using GL_CLAMP, we must do this instead:
1117 if (opcode
== TGSI_OPCODE_TXP
) {
1118 /* RCP tmpB.x tmpA.w */
1119 new_inst
= tgsi_default_full_instruction();
1120 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_RCP
;
1121 new_inst
.Instruction
.NumDstRegs
= 1;
1122 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[B
].dst
, TGSI_WRITEMASK_X
);
1123 new_inst
.Instruction
.NumSrcRegs
= 1;
1124 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(W
, _
, _
, _
));
1125 tctx
->emit_instruction(tctx
, &new_inst
);
1127 /* MUL tmpA.mask, tmpA, tmpB.xxxx */
1128 new_inst
= tgsi_default_full_instruction();
1129 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
1130 new_inst
.Instruction
.NumDstRegs
= 1;
1131 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, pmask
);
1132 new_inst
.Instruction
.NumSrcRegs
= 2;
1133 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
, Y
, Z
, W
));
1134 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[B
].src
, SWIZ(X
, X
, X
, X
));
1135 tctx
->emit_instruction(tctx
, &new_inst
);
1137 opcode
= TGSI_OPCODE_TEX
;
1140 /* MOV_SAT tmpA.<mask>, tmpA */
1142 create_mov(tctx
, &ctx
->tmp
[A
].dst
, &ctx
->tmp
[A
].src
, mask
, 1);
1145 /* modify the texture samp instruction to take fixed up coord: */
1147 new_inst
.Instruction
.Opcode
= opcode
;
1148 new_inst
.Src
[0] = ctx
->tmp
[A
].src
;
1149 tctx
->emit_instruction(tctx
, &new_inst
);
1154 /* Two-sided color emulation:
1155 * For each COLOR input, create a corresponding BCOLOR input, plus
1156 * CMP instruction to select front or back color based on FACE
1158 #define TWOSIDE_GROW(n) ( \
1160 ((n) * 3) + /* IN[], BCOLOR[n], <intrp> */\
1161 ((n) * 1) + /* TEMP[] */ \
1162 ((n) * NINST(3)) /* CMP instr */ \
1166 emit_twoside(struct tgsi_transform_context
*tctx
)
1168 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
1169 struct tgsi_shader_info
*info
= ctx
->info
;
1170 struct tgsi_full_declaration decl
;
1171 struct tgsi_full_instruction new_inst
;
1172 unsigned inbase
, tmpbase
;
1175 inbase
= info
->file_max
[TGSI_FILE_INPUT
] + 1;
1176 tmpbase
= info
->file_max
[TGSI_FILE_TEMPORARY
] + 1;
1178 /* additional inputs for BCOLOR's */
1179 for (i
= 0; i
< ctx
->two_side_colors
; i
++) {
1180 unsigned in_idx
= ctx
->two_side_idx
[i
];
1181 decl
= tgsi_default_full_declaration();
1182 decl
.Declaration
.File
= TGSI_FILE_INPUT
;
1183 decl
.Declaration
.Semantic
= true;
1184 decl
.Range
.First
= decl
.Range
.Last
= inbase
+ i
;
1185 decl
.Semantic
.Name
= TGSI_SEMANTIC_BCOLOR
;
1186 decl
.Semantic
.Index
= info
->input_semantic_index
[in_idx
];
1187 decl
.Declaration
.Interpolate
= true;
1188 decl
.Interp
.Interpolate
= info
->input_interpolate
[in_idx
];
1189 decl
.Interp
.Location
= info
->input_interpolate_loc
[in_idx
];
1190 decl
.Interp
.CylindricalWrap
= info
->input_cylindrical_wrap
[in_idx
];
1191 tctx
->emit_declaration(tctx
, &decl
);
1194 /* additional input for FACE */
1195 if (ctx
->two_side_colors
&& (ctx
->face_idx
== -1)) {
1196 decl
= tgsi_default_full_declaration();
1197 decl
.Declaration
.File
= TGSI_FILE_INPUT
;
1198 decl
.Declaration
.Semantic
= true;
1199 decl
.Range
.First
= decl
.Range
.Last
= inbase
+ ctx
->two_side_colors
;
1200 decl
.Semantic
.Name
= TGSI_SEMANTIC_FACE
;
1201 decl
.Semantic
.Index
= 0;
1202 tctx
->emit_declaration(tctx
, &decl
);
1204 ctx
->face_idx
= decl
.Range
.First
;
1207 /* additional temps for COLOR/BCOLOR selection: */
1208 for (i
= 0; i
< ctx
->two_side_colors
; i
++) {
1209 decl
= tgsi_default_full_declaration();
1210 decl
.Declaration
.File
= TGSI_FILE_TEMPORARY
;
1211 decl
.Range
.First
= decl
.Range
.Last
= tmpbase
+ ctx
->numtmp
+ i
;
1212 tctx
->emit_declaration(tctx
, &decl
);
1215 /* and finally additional instructions to select COLOR/BCOLOR: */
1216 for (i
= 0; i
< ctx
->two_side_colors
; i
++) {
1217 new_inst
= tgsi_default_full_instruction();
1218 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_CMP
;
1220 new_inst
.Instruction
.NumDstRegs
= 1;
1221 new_inst
.Dst
[0].Register
.File
= TGSI_FILE_TEMPORARY
;
1222 new_inst
.Dst
[0].Register
.Index
= tmpbase
+ ctx
->numtmp
+ i
;
1223 new_inst
.Dst
[0].Register
.WriteMask
= TGSI_WRITEMASK_XYZW
;
1225 new_inst
.Instruction
.NumSrcRegs
= 3;
1226 new_inst
.Src
[0].Register
.File
= TGSI_FILE_INPUT
;
1227 new_inst
.Src
[0].Register
.Index
= ctx
->face_idx
;
1228 new_inst
.Src
[0].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1229 new_inst
.Src
[0].Register
.SwizzleY
= TGSI_SWIZZLE_X
;
1230 new_inst
.Src
[0].Register
.SwizzleZ
= TGSI_SWIZZLE_X
;
1231 new_inst
.Src
[0].Register
.SwizzleW
= TGSI_SWIZZLE_X
;
1232 new_inst
.Src
[1].Register
.File
= TGSI_FILE_INPUT
;
1233 new_inst
.Src
[1].Register
.Index
= inbase
+ i
;
1234 new_inst
.Src
[1].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1235 new_inst
.Src
[1].Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
1236 new_inst
.Src
[1].Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
1237 new_inst
.Src
[1].Register
.SwizzleW
= TGSI_SWIZZLE_W
;
1238 new_inst
.Src
[2].Register
.File
= TGSI_FILE_INPUT
;
1239 new_inst
.Src
[2].Register
.Index
= ctx
->two_side_idx
[i
];
1240 new_inst
.Src
[2].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1241 new_inst
.Src
[2].Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
1242 new_inst
.Src
[2].Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
1243 new_inst
.Src
[2].Register
.SwizzleW
= TGSI_SWIZZLE_W
;
1245 tctx
->emit_instruction(tctx
, &new_inst
);
1250 emit_decls(struct tgsi_transform_context
*tctx
)
1252 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
1253 struct tgsi_shader_info
*info
= ctx
->info
;
1254 struct tgsi_full_declaration decl
;
1255 struct tgsi_full_immediate immed
;
1259 tmpbase
= info
->file_max
[TGSI_FILE_TEMPORARY
] + 1;
1261 ctx
->color_base
= tmpbase
+ ctx
->numtmp
;
1263 /* declare immediate: */
1264 immed
= tgsi_default_full_immediate();
1265 immed
.Immediate
.NrTokens
= 1 + 4; /* one for the token itself */
1266 immed
.u
[0].Float
= 0.0;
1267 immed
.u
[1].Float
= 1.0;
1268 immed
.u
[2].Float
= 128.0;
1269 immed
.u
[3].Float
= 0.0;
1270 tctx
->emit_immediate(tctx
, &immed
);
1272 ctx
->imm
.Register
.File
= TGSI_FILE_IMMEDIATE
;
1273 ctx
->imm
.Register
.Index
= info
->immediate_count
;
1274 ctx
->imm
.Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1275 ctx
->imm
.Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
1276 ctx
->imm
.Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
1277 ctx
->imm
.Register
.SwizzleW
= TGSI_SWIZZLE_W
;
1279 /* declare temp regs: */
1280 for (i
= 0; i
< ctx
->numtmp
; i
++) {
1281 decl
= tgsi_default_full_declaration();
1282 decl
.Declaration
.File
= TGSI_FILE_TEMPORARY
;
1283 decl
.Range
.First
= decl
.Range
.Last
= tmpbase
+ i
;
1284 tctx
->emit_declaration(tctx
, &decl
);
1286 ctx
->tmp
[i
].src
.Register
.File
= TGSI_FILE_TEMPORARY
;
1287 ctx
->tmp
[i
].src
.Register
.Index
= tmpbase
+ i
;
1288 ctx
->tmp
[i
].src
.Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1289 ctx
->tmp
[i
].src
.Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
1290 ctx
->tmp
[i
].src
.Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
1291 ctx
->tmp
[i
].src
.Register
.SwizzleW
= TGSI_SWIZZLE_W
;
1293 ctx
->tmp
[i
].dst
.Register
.File
= TGSI_FILE_TEMPORARY
;
1294 ctx
->tmp
[i
].dst
.Register
.Index
= tmpbase
+ i
;
1295 ctx
->tmp
[i
].dst
.Register
.WriteMask
= TGSI_WRITEMASK_XYZW
;
1298 if (ctx
->two_side_colors
)
1303 rename_color_inputs(struct tgsi_lowering_context
*ctx
,
1304 struct tgsi_full_instruction
*inst
)
1307 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
1308 struct tgsi_src_register
*src
= &inst
->Src
[i
].Register
;
1309 if (src
->File
== TGSI_FILE_INPUT
) {
1310 for (j
= 0; j
< ctx
->two_side_colors
; j
++) {
1311 if (src
->Index
== (int)ctx
->two_side_idx
[j
]) {
1312 src
->File
= TGSI_FILE_TEMPORARY
;
1313 src
->Index
= ctx
->color_base
+ j
;
1323 transform_instr(struct tgsi_transform_context
*tctx
,
1324 struct tgsi_full_instruction
*inst
)
1326 struct tgsi_lowering_context
*ctx
= tgsi_lowering_context(tctx
);
1328 if (!ctx
->emitted_decls
) {
1330 ctx
->emitted_decls
= 1;
1333 /* if emulating two-sided-color, we need to re-write some
1336 if (ctx
->two_side_colors
)
1337 rename_color_inputs(ctx
, inst
);
1339 switch (inst
->Instruction
.Opcode
) {
1340 case TGSI_OPCODE_DST
:
1341 if (!ctx
->config
->lower_DST
)
1343 transform_dst(tctx
, inst
);
1345 case TGSI_OPCODE_LRP
:
1346 if (!ctx
->config
->lower_LRP
)
1348 transform_lrp(tctx
, inst
);
1350 case TGSI_OPCODE_FRC
:
1351 if (!ctx
->config
->lower_FRC
)
1353 transform_frc(tctx
, inst
);
1355 case TGSI_OPCODE_POW
:
1356 if (!ctx
->config
->lower_POW
)
1358 transform_pow(tctx
, inst
);
1360 case TGSI_OPCODE_LIT
:
1361 if (!ctx
->config
->lower_LIT
)
1363 transform_lit(tctx
, inst
);
1365 case TGSI_OPCODE_EXP
:
1366 if (!ctx
->config
->lower_EXP
)
1368 transform_exp(tctx
, inst
);
1370 case TGSI_OPCODE_LOG
:
1371 if (!ctx
->config
->lower_LOG
)
1373 transform_log(tctx
, inst
);
1375 case TGSI_OPCODE_DP4
:
1376 if (!ctx
->config
->lower_DP4
)
1378 transform_dotp(tctx
, inst
);
1380 case TGSI_OPCODE_DP3
:
1381 if (!ctx
->config
->lower_DP3
)
1383 transform_dotp(tctx
, inst
);
1385 case TGSI_OPCODE_DP2
:
1386 if (!ctx
->config
->lower_DP2
)
1388 transform_dotp(tctx
, inst
);
1390 case TGSI_OPCODE_FLR
:
1391 if (!ctx
->config
->lower_FLR
)
1393 transform_flr_ceil(tctx
, inst
);
1395 case TGSI_OPCODE_CEIL
:
1396 if (!ctx
->config
->lower_CEIL
)
1398 transform_flr_ceil(tctx
, inst
);
1400 case TGSI_OPCODE_TRUNC
:
1401 if (!ctx
->config
->lower_TRUNC
)
1403 transform_trunc(tctx
, inst
);
1405 case TGSI_OPCODE_TEX
:
1406 case TGSI_OPCODE_TXP
:
1407 case TGSI_OPCODE_TXB
:
1408 case TGSI_OPCODE_TXB2
:
1409 case TGSI_OPCODE_TXL
:
1410 if (transform_samp(tctx
, inst
))
1415 tctx
->emit_instruction(tctx
, inst
);
1420 /* returns NULL if no lowering required, else returns the new
1421 * tokens (which caller is required to free()). In either case
1422 * returns the current info.
1424 const struct tgsi_token
*
1425 tgsi_transform_lowering(const struct tgsi_lowering_config
*config
,
1426 const struct tgsi_token
*tokens
,
1427 struct tgsi_shader_info
*info
)
1429 struct tgsi_lowering_context ctx
;
1430 struct tgsi_token
*newtoks
;
1433 /* sanity check in case limit is ever increased: */
1434 STATIC_ASSERT((sizeof(config
->saturate_s
) * 8) >= PIPE_MAX_SAMPLERS
);
1436 /* sanity check the lowering */
1437 assert(!(config
->lower_FRC
&& (config
->lower_FLR
|| config
->lower_CEIL
)));
1438 assert(!(config
->lower_FRC
&& config
->lower_TRUNC
));
1440 memset(&ctx
, 0, sizeof(ctx
));
1441 ctx
.base
.transform_instruction
= transform_instr
;
1443 ctx
.config
= config
;
1445 tgsi_scan_shader(tokens
, info
);
1447 /* if we are adding fragment shader support to emulate two-sided
1448 * color, then figure out the number of additional inputs we need
1449 * to create for BCOLOR's..
1451 if ((info
->processor
== PIPE_SHADER_FRAGMENT
) &&
1452 config
->color_two_side
) {
1455 for (i
= 0; i
<= info
->file_max
[TGSI_FILE_INPUT
]; i
++) {
1456 if (info
->input_semantic_name
[i
] == TGSI_SEMANTIC_COLOR
)
1457 ctx
.two_side_idx
[ctx
.two_side_colors
++] = i
;
1458 if (info
->input_semantic_name
[i
] == TGSI_SEMANTIC_FACE
)
1463 ctx
.saturate
= config
->saturate_r
| config
->saturate_s
| config
->saturate_t
;
1465 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
1466 /* if there are no instructions to lower, then we are done: */
1481 ctx
.two_side_colors
||
1486 _debug_printf("BEFORE:");
1487 tgsi_dump(tokens
, 0);
1491 newlen
= tgsi_num_tokens(tokens
);
1493 newlen
+= DST_GROW
* OPCS(DST
);
1494 numtmp
= MAX2(numtmp
, DST_TMP
);
1497 newlen
+= LRP_GROW
* OPCS(LRP
);
1498 numtmp
= MAX2(numtmp
, LRP_TMP
);
1501 newlen
+= FRC_GROW
* OPCS(FRC
);
1502 numtmp
= MAX2(numtmp
, FRC_TMP
);
1505 newlen
+= POW_GROW
* OPCS(POW
);
1506 numtmp
= MAX2(numtmp
, POW_TMP
);
1509 newlen
+= LIT_GROW
* OPCS(LIT
);
1510 numtmp
= MAX2(numtmp
, LIT_TMP
);
1513 newlen
+= EXP_GROW
* OPCS(EXP
);
1514 numtmp
= MAX2(numtmp
, EXP_TMP
);
1517 newlen
+= LOG_GROW
* OPCS(LOG
);
1518 numtmp
= MAX2(numtmp
, LOG_TMP
);
1521 newlen
+= DP4_GROW
* OPCS(DP4
);
1522 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1525 newlen
+= DP3_GROW
* OPCS(DP3
);
1526 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1529 newlen
+= DP2_GROW
* OPCS(DP2
);
1530 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1533 newlen
+= FLR_GROW
* OPCS(FLR
);
1534 numtmp
= MAX2(numtmp
, FLR_TMP
);
1537 newlen
+= CEIL_GROW
* OPCS(CEIL
);
1538 numtmp
= MAX2(numtmp
, CEIL_TMP
);
1541 newlen
+= TRUNC_GROW
* OPCS(TRUNC
);
1542 numtmp
= MAX2(numtmp
, TRUNC_TMP
);
1544 if (ctx
.saturate
|| config
->lower_TXP
) {
1548 n
= info
->opcode_count
[TGSI_OPCODE_TEX
] +
1549 info
->opcode_count
[TGSI_OPCODE_TXP
] +
1550 info
->opcode_count
[TGSI_OPCODE_TXB
] +
1551 info
->opcode_count
[TGSI_OPCODE_TXB2
] +
1552 info
->opcode_count
[TGSI_OPCODE_TXL
];
1553 } else if (config
->lower_TXP
) {
1554 n
= info
->opcode_count
[TGSI_OPCODE_TXP
];
1557 newlen
+= SAMP_GROW
* n
;
1558 numtmp
= MAX2(numtmp
, SAMP_TMP
);
1561 /* specifically don't include two_side_colors temps in the count: */
1562 ctx
.numtmp
= numtmp
;
1564 if (ctx
.two_side_colors
) {
1565 newlen
+= TWOSIDE_GROW(ctx
.two_side_colors
);
1566 /* note: we permanently consume temp regs, re-writing references
1567 * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
1568 * instruction that selects which varying to use):
1570 numtmp
+= ctx
.two_side_colors
;
1573 newlen
+= 2 * numtmp
;
1574 newlen
+= 5; /* immediate */
1576 newtoks
= tgsi_alloc_tokens(newlen
);
1580 tgsi_transform_shader(tokens
, newtoks
, newlen
, &ctx
.base
);
1582 tgsi_scan_shader(newtoks
, info
);
1585 _debug_printf("AFTER:");
1586 tgsi_dump(newtoks
, 0);