1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 * Rob Clark <robclark@freedesktop.org>
29 #include "tgsi/tgsi_transform.h"
30 #include "tgsi/tgsi_scan.h"
31 #include "tgsi/tgsi_dump.h"
33 #include "util/u_debug.h"
34 #include "util/u_math.h"
36 #include "freedreno_lowering.h"
38 struct fd_lowering_context
{
39 struct tgsi_transform_context base
;
40 const struct fd_lowering_config
*config
;
41 struct tgsi_shader_info
*info
;
42 unsigned two_side_colors
;
43 unsigned two_side_idx
[PIPE_MAX_SHADER_INPUTS
];
44 unsigned color_base
; /* base register for chosen COLOR/BCOLOR's */
48 struct tgsi_full_src_register src
;
49 struct tgsi_full_dst_register dst
;
53 struct tgsi_full_src_register imm
;
57 static inline struct fd_lowering_context
*
58 fd_lowering_context(struct tgsi_transform_context
*tctx
)
60 return (struct fd_lowering_context
*)tctx
;
68 reg_dst(struct tgsi_full_dst_register
*dst
,
69 const struct tgsi_full_dst_register
*orig_dst
, unsigned wrmask
)
72 dst
->Register
.WriteMask
&= wrmask
;
73 assert(dst
->Register
.WriteMask
);
77 get_swiz(unsigned *swiz
, const struct tgsi_src_register
*src
)
79 swiz
[0] = src
->SwizzleX
;
80 swiz
[1] = src
->SwizzleY
;
81 swiz
[2] = src
->SwizzleZ
;
82 swiz
[3] = src
->SwizzleW
;
86 reg_src(struct tgsi_full_src_register
*src
,
87 const struct tgsi_full_src_register
*orig_src
,
88 unsigned sx
, unsigned sy
, unsigned sz
, unsigned sw
)
91 get_swiz(swiz
, &orig_src
->Register
);
93 src
->Register
.SwizzleX
= swiz
[sx
];
94 src
->Register
.SwizzleY
= swiz
[sy
];
95 src
->Register
.SwizzleZ
= swiz
[sz
];
96 src
->Register
.SwizzleW
= swiz
[sw
];
99 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */
100 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \
101 TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
104 * if (dst.x aliases src.x) {
110 * MOV dst.zw, imm{0.0, 1.0}
113 aliases(const struct tgsi_full_dst_register
*dst
, unsigned dst_mask
,
114 const struct tgsi_full_src_register
*src
, unsigned src_mask
)
116 if ((dst
->Register
.File
== src
->Register
.File
) &&
117 (dst
->Register
.Index
== src
->Register
.Index
)) {
118 unsigned i
, actual_mask
= 0;
120 get_swiz(swiz
, &src
->Register
);
121 for (i
= 0; i
< 4; i
++)
122 if (src_mask
& (1 << i
))
123 actual_mask
|= (1 << swiz
[i
]);
124 if (actual_mask
& dst_mask
)
131 create_mov(struct tgsi_transform_context
*tctx
,
132 const struct tgsi_full_dst_register
*dst
,
133 const struct tgsi_full_src_register
*src
, unsigned mask
)
135 struct tgsi_full_instruction new_inst
;
137 new_inst
= tgsi_default_full_instruction();
138 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
139 new_inst
.Instruction
.NumDstRegs
= 1;
140 reg_dst(&new_inst
.Dst
[0], dst
, mask
);
141 new_inst
.Instruction
.NumSrcRegs
= 1;
142 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,Y
,Z
,W
));
143 tctx
->emit_instruction(tctx
, &new_inst
);
147 * Lowering Translators:
150 /* DST - Distance Vector
152 * dst.y = src0.y \times src1.y
156 * ; note: could be more clever and use just a single temp
157 * ; if I was clever enough to re-write the swizzles.
158 * ; needs: 2 tmp, imm{1.0}
159 * if (dst.y aliases src0.z) {
160 * MOV tmpA.yz, src0.yz
163 * if (dst.yz aliases src1.w) {
164 * MOV tmpB.yw, src1.yw
167 * MUL dst.y, src0.y, src1.y
170 * MOV dst.x, imm{1.0}
172 #define DST_GROW (19 - 4)
175 transform_dst(struct tgsi_transform_context
*tctx
,
176 struct tgsi_full_instruction
*inst
)
178 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
179 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
180 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
181 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
182 struct tgsi_full_instruction new_inst
;
184 if (aliases(dst
, TGSI_WRITEMASK_Y
, src0
, TGSI_WRITEMASK_Z
)) {
185 create_mov(tctx
, &ctx
->tmp
[A
].dst
, src0
, TGSI_WRITEMASK_YZ
);
186 src0
= &ctx
->tmp
[A
].src
;
189 if (aliases(dst
, TGSI_WRITEMASK_YZ
, src1
, TGSI_WRITEMASK_W
)) {
190 create_mov(tctx
, &ctx
->tmp
[B
].dst
, src1
, TGSI_WRITEMASK_YW
);
191 src1
= &ctx
->tmp
[B
].src
;
194 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
195 /* MUL dst.y, src0.y, src1.y */
196 new_inst
= tgsi_default_full_instruction();
197 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
198 new_inst
.Instruction
.NumDstRegs
= 1;
199 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Y
);
200 new_inst
.Instruction
.NumSrcRegs
= 2;
201 reg_src(&new_inst
.Src
[0], src0
, SWIZ(_
,Y
,_
,_
));
202 reg_src(&new_inst
.Src
[1], src1
, SWIZ(_
,Y
,_
,_
));
203 tctx
->emit_instruction(tctx
, &new_inst
);
206 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
207 /* MOV dst.z, src0.z */
208 new_inst
= tgsi_default_full_instruction();
209 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
210 new_inst
.Instruction
.NumDstRegs
= 1;
211 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Z
);
212 new_inst
.Instruction
.NumSrcRegs
= 1;
213 reg_src(&new_inst
.Src
[0], src0
, SWIZ(_
,_
,Z
,_
));
214 tctx
->emit_instruction(tctx
, &new_inst
);
217 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_W
) {
218 /* MOV dst.w, src1.w */
219 new_inst
= tgsi_default_full_instruction();
220 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
221 new_inst
.Instruction
.NumDstRegs
= 1;
222 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_W
);
223 new_inst
.Instruction
.NumSrcRegs
= 1;
224 reg_src(&new_inst
.Src
[0], src1
, SWIZ(_
,_
,_
,W
));
225 tctx
->emit_instruction(tctx
, &new_inst
);
228 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_X
) {
229 /* MOV dst.x, imm{1.0} */
230 new_inst
= tgsi_default_full_instruction();
231 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
232 new_inst
.Instruction
.NumDstRegs
= 1;
233 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_X
);
234 new_inst
.Instruction
.NumSrcRegs
= 1;
235 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(Y
,_
,_
,_
));
236 tctx
->emit_instruction(tctx
, &new_inst
);
240 /* XPD - Cross Product
241 * dst.x = src0.y \times src1.z - src1.y \times src0.z
242 * dst.y = src0.z \times src1.x - src1.z \times src0.x
243 * dst.z = src0.x \times src1.y - src1.x \times src0.y
246 * ; needs: 2 tmp, imm{1.0}
247 * MUL tmpA.xyz, src0.yzx, src1.zxy
248 * MUL tmpB.xyz, src1.yzx, src0.zxy
249 * SUB dst.xyz, tmpA.xyz, tmpB.xyz
250 * MOV dst.w, imm{1.0}
252 #define XPD_GROW (15 - 4)
255 transform_xpd(struct tgsi_transform_context
*tctx
,
256 struct tgsi_full_instruction
*inst
)
258 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
259 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
260 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
261 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
262 struct tgsi_full_instruction new_inst
;
264 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZ
) {
265 /* MUL tmpA.xyz, src0.yzx, src1.zxy */
266 new_inst
= tgsi_default_full_instruction();
267 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
268 new_inst
.Instruction
.NumDstRegs
= 1;
269 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XYZ
);
270 new_inst
.Instruction
.NumSrcRegs
= 2;
271 reg_src(&new_inst
.Src
[0], src0
, SWIZ(Y
,Z
,X
,_
));
272 reg_src(&new_inst
.Src
[1], src1
, SWIZ(Z
,X
,Y
,_
));
273 tctx
->emit_instruction(tctx
, &new_inst
);
275 /* MUL tmpB.xyz, src1.yzx, src0.zxy */
276 new_inst
= tgsi_default_full_instruction();
277 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
278 new_inst
.Instruction
.NumDstRegs
= 1;
279 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[B
].dst
, TGSI_WRITEMASK_XYZ
);
280 new_inst
.Instruction
.NumSrcRegs
= 2;
281 reg_src(&new_inst
.Src
[0], src1
, SWIZ(Y
,Z
,X
,_
));
282 reg_src(&new_inst
.Src
[1], src0
, SWIZ(Z
,X
,Y
,_
));
283 tctx
->emit_instruction(tctx
, &new_inst
);
285 /* SUB dst.xyz, tmpA.xyz, tmpB.xyz */
286 new_inst
= tgsi_default_full_instruction();
287 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_SUB
;
288 new_inst
.Instruction
.NumDstRegs
= 1;
289 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZ
);
290 new_inst
.Instruction
.NumSrcRegs
= 2;
291 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
,Y
,Z
,_
));
292 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[B
].src
, SWIZ(X
,Y
,Z
,_
));
293 tctx
->emit_instruction(tctx
, &new_inst
);
296 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_W
) {
297 /* MOV dst.w, imm{1.0} */
298 new_inst
= tgsi_default_full_instruction();
299 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
300 new_inst
.Instruction
.NumDstRegs
= 1;
301 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_W
);
302 new_inst
.Instruction
.NumSrcRegs
= 1;
303 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(_
,_
,_
,Y
));
304 tctx
->emit_instruction(tctx
, &new_inst
);
309 * dst.x = \cos{src.x}
310 * dst.y = \sin{src.x}
314 * ; needs: 1 tmp, imm{0.0, 1.0}
315 * if (dst.x aliases src.x) {
321 * MOV dst.zw, imm{0.0, 1.0}
323 #define SCS_GROW (12 - 3)
326 transform_scs(struct tgsi_transform_context
*tctx
,
327 struct tgsi_full_instruction
*inst
)
329 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
330 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
331 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
332 struct tgsi_full_instruction new_inst
;
334 if (aliases(dst
, TGSI_WRITEMASK_X
, src
, TGSI_WRITEMASK_X
)) {
335 create_mov(tctx
, &ctx
->tmp
[A
].dst
, src
, TGSI_WRITEMASK_X
);
336 src
= &ctx
->tmp
[A
].src
;
339 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_X
) {
340 /* COS dst.x, src.x */
341 new_inst
= tgsi_default_full_instruction();
342 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_COS
;
343 new_inst
.Instruction
.NumDstRegs
= 1;
344 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_X
);
345 new_inst
.Instruction
.NumSrcRegs
= 1;
346 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,_
,_
,_
));
347 tctx
->emit_instruction(tctx
, &new_inst
);
350 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
351 /* SIN dst.y, src.x */
352 new_inst
= tgsi_default_full_instruction();
353 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_SIN
;
354 new_inst
.Instruction
.NumDstRegs
= 1;
355 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Y
);
356 new_inst
.Instruction
.NumSrcRegs
= 1;
357 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,_
,_
,_
));
358 tctx
->emit_instruction(tctx
, &new_inst
);
361 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_ZW
) {
362 /* MOV dst.zw, imm{0.0, 1.0} */
363 new_inst
= tgsi_default_full_instruction();
364 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
365 new_inst
.Instruction
.NumDstRegs
= 1;
366 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_ZW
);
367 new_inst
.Instruction
.NumSrcRegs
= 1;
368 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(_
,_
,X
,Y
));
369 tctx
->emit_instruction(tctx
, &new_inst
);
373 /* LRP - Linear Interpolate
374 * dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
375 * dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
376 * dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
377 * dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
379 * ; needs: 2 tmp, imm{1.0}
380 * MUL tmpA, src0, src1
381 * SUB tmpB, imm{1.0}, src0
382 * MUL tmpB, tmpB, src2
383 * ADD dst, tmpA, tmpB
385 #define LRP_GROW (16 - 4)
388 transform_lrp(struct tgsi_transform_context
*tctx
,
389 struct tgsi_full_instruction
*inst
)
391 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
392 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
393 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
394 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
395 struct tgsi_full_src_register
*src2
= &inst
->Src
[2];
396 struct tgsi_full_instruction new_inst
;
398 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
399 /* MUL tmpA, src0, src1 */
400 new_inst
= tgsi_default_full_instruction();
401 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
402 new_inst
.Instruction
.NumDstRegs
= 1;
403 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XYZW
);
404 new_inst
.Instruction
.NumSrcRegs
= 2;
405 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
,Y
,Z
,W
));
406 reg_src(&new_inst
.Src
[1], src1
, SWIZ(X
,Y
,Z
,W
));
407 tctx
->emit_instruction(tctx
, &new_inst
);
409 /* SUB tmpB, imm{1.0}, src0 */
410 new_inst
= tgsi_default_full_instruction();
411 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_SUB
;
412 new_inst
.Instruction
.NumDstRegs
= 1;
413 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[B
].dst
, TGSI_WRITEMASK_XYZW
);
414 new_inst
.Instruction
.NumSrcRegs
= 2;
415 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(Y
,Y
,Y
,Y
));
416 reg_src(&new_inst
.Src
[1], src0
, SWIZ(X
,Y
,Z
,W
));
417 tctx
->emit_instruction(tctx
, &new_inst
);
419 /* MUL tmpB, tmpB, src2 */
420 new_inst
= tgsi_default_full_instruction();
421 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
422 new_inst
.Instruction
.NumDstRegs
= 1;
423 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[B
].dst
, TGSI_WRITEMASK_XYZW
);
424 new_inst
.Instruction
.NumSrcRegs
= 2;
425 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[B
].src
, SWIZ(X
,Y
,Z
,W
));
426 reg_src(&new_inst
.Src
[1], src2
, SWIZ(X
,Y
,Z
,W
));
427 tctx
->emit_instruction(tctx
, &new_inst
);
429 /* ADD dst, tmpA, tmpB */
430 new_inst
= tgsi_default_full_instruction();
431 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
432 new_inst
.Instruction
.NumDstRegs
= 1;
433 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
434 new_inst
.Instruction
.NumSrcRegs
= 2;
435 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
,Y
,Z
,W
));
436 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[B
].src
, SWIZ(X
,Y
,Z
,W
));
437 tctx
->emit_instruction(tctx
, &new_inst
);
442 * dst.x = src.x - \lfloor src.x\rfloor
443 * dst.y = src.y - \lfloor src.y\rfloor
444 * dst.z = src.z - \lfloor src.z\rfloor
445 * dst.w = src.w - \lfloor src.w\rfloor
451 #define FRC_GROW (7 - 3)
454 transform_frc(struct tgsi_transform_context
*tctx
,
455 struct tgsi_full_instruction
*inst
)
457 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
458 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
459 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
460 struct tgsi_full_instruction new_inst
;
462 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
464 new_inst
= tgsi_default_full_instruction();
465 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FLR
;
466 new_inst
.Instruction
.NumDstRegs
= 1;
467 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XYZW
);
468 new_inst
.Instruction
.NumSrcRegs
= 1;
469 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,Y
,Z
,W
));
470 tctx
->emit_instruction(tctx
, &new_inst
);
472 /* SUB dst, src, tmpA */
473 new_inst
= tgsi_default_full_instruction();
474 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_SUB
;
475 new_inst
.Instruction
.NumDstRegs
= 1;
476 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
477 new_inst
.Instruction
.NumSrcRegs
= 2;
478 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,Y
,Z
,W
));
479 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
,Y
,Z
,W
));
480 tctx
->emit_instruction(tctx
, &new_inst
);
485 * dst.x = src0.x^{src1.x}
486 * dst.y = src0.x^{src1.x}
487 * dst.z = src0.x^{src1.x}
488 * dst.w = src0.x^{src1.x}
492 * MUL tmpA.x, src1.x, tmpA.x
495 #define POW_GROW (10 - 4)
498 transform_pow(struct tgsi_transform_context
*tctx
,
499 struct tgsi_full_instruction
*inst
)
501 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
502 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
503 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
504 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
505 struct tgsi_full_instruction new_inst
;
507 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
508 /* LG2 tmpA.x, src0.x */
509 new_inst
= tgsi_default_full_instruction();
510 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_LG2
;
511 new_inst
.Instruction
.NumDstRegs
= 1;
512 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
513 new_inst
.Instruction
.NumSrcRegs
= 1;
514 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
,_
,_
,_
));
515 tctx
->emit_instruction(tctx
, &new_inst
);
517 /* MUL tmpA.x, src1.x, tmpA.x */
518 new_inst
= tgsi_default_full_instruction();
519 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
520 new_inst
.Instruction
.NumDstRegs
= 1;
521 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
522 new_inst
.Instruction
.NumSrcRegs
= 2;
523 reg_src(&new_inst
.Src
[0], src1
, SWIZ(X
,_
,_
,_
));
524 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
,_
,_
,_
));
525 tctx
->emit_instruction(tctx
, &new_inst
);
527 /* EX2 dst, tmpA.x */
528 new_inst
= tgsi_default_full_instruction();
529 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
530 new_inst
.Instruction
.NumDstRegs
= 1;
531 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
532 new_inst
.Instruction
.NumSrcRegs
= 1;
533 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
,_
,_
,_
));
534 tctx
->emit_instruction(tctx
, &new_inst
);
538 /* LIT - Light Coefficients
540 * dst.y = max(src.x, 0.0)
541 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
544 * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
545 * MAX tmpA.xy, src.xy, imm{0.0}
546 * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
548 * MUL tmpA.y, tmpA.z, tmpA.y
550 * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
551 * MOV dst.yz, tmpA.xy
552 * MOV dst.xw, imm{1.0}
554 #define LIT_GROW (30 - 3)
557 transform_lit(struct tgsi_transform_context
*tctx
,
558 struct tgsi_full_instruction
*inst
)
560 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
561 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
562 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
563 struct tgsi_full_instruction new_inst
;
565 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_YZ
) {
566 /* MAX tmpA.xy, src.xy, imm{0.0} */
567 new_inst
= tgsi_default_full_instruction();
568 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAX
;
569 new_inst
.Instruction
.NumDstRegs
= 1;
570 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_XY
);
571 new_inst
.Instruction
.NumSrcRegs
= 2;
572 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,Y
,_
,_
));
573 reg_src(&new_inst
.Src
[1], &ctx
->imm
, SWIZ(X
,X
,_
,_
));
574 tctx
->emit_instruction(tctx
, &new_inst
);
576 /* CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} */
577 new_inst
= tgsi_default_full_instruction();
578 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_CLAMP
;
579 new_inst
.Instruction
.NumDstRegs
= 1;
580 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Z
);
581 new_inst
.Instruction
.NumSrcRegs
= 3;
582 reg_src(&new_inst
.Src
[0], src
, SWIZ(_
,_
,W
,_
));
583 reg_src(&new_inst
.Src
[1], &ctx
->imm
, SWIZ(_
,_
,Z
,_
));
584 new_inst
.Src
[1].Register
.Negate
= true;
585 reg_src(&new_inst
.Src
[2], &ctx
->imm
, SWIZ(_
,_
,Z
,_
));
586 tctx
->emit_instruction(tctx
, &new_inst
);
588 /* LG2 tmpA.y, tmpA.y */
589 new_inst
= tgsi_default_full_instruction();
590 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_LG2
;
591 new_inst
.Instruction
.NumDstRegs
= 1;
592 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
593 new_inst
.Instruction
.NumSrcRegs
= 1;
594 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Y
,_
,_
,_
));
595 tctx
->emit_instruction(tctx
, &new_inst
);
597 /* MUL tmpA.y, tmpA.z, tmpA.y */
598 new_inst
= tgsi_default_full_instruction();
599 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
600 new_inst
.Instruction
.NumDstRegs
= 1;
601 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
602 new_inst
.Instruction
.NumSrcRegs
= 2;
603 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
,Z
,_
,_
));
604 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(_
,Y
,_
,_
));
605 tctx
->emit_instruction(tctx
, &new_inst
);
607 /* EX2 tmpA.y, tmpA.y */
608 new_inst
= tgsi_default_full_instruction();
609 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
610 new_inst
.Instruction
.NumDstRegs
= 1;
611 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
612 new_inst
.Instruction
.NumSrcRegs
= 1;
613 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Y
,_
,_
,_
));
614 tctx
->emit_instruction(tctx
, &new_inst
);
616 /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
617 new_inst
= tgsi_default_full_instruction();
618 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_CMP
;
619 new_inst
.Instruction
.NumDstRegs
= 1;
620 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
621 new_inst
.Instruction
.NumSrcRegs
= 3;
622 reg_src(&new_inst
.Src
[0], src
, SWIZ(_
,X
,_
,_
));
623 new_inst
.Src
[0].Register
.Negate
= true;
624 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(_
,Y
,_
,_
));
625 reg_src(&new_inst
.Src
[2], &ctx
->imm
, SWIZ(_
,X
,_
,_
));
626 tctx
->emit_instruction(tctx
, &new_inst
);
628 /* MOV dst.yz, tmpA.xy */
629 new_inst
= tgsi_default_full_instruction();
630 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
631 new_inst
.Instruction
.NumDstRegs
= 1;
632 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_YZ
);
633 new_inst
.Instruction
.NumSrcRegs
= 1;
634 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
,X
,Y
,_
));
635 tctx
->emit_instruction(tctx
, &new_inst
);
638 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XW
) {
639 /* MOV dst.xw, imm{1.0} */
640 new_inst
= tgsi_default_full_instruction();
641 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
642 new_inst
.Instruction
.NumDstRegs
= 1;
643 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XW
);
644 new_inst
.Instruction
.NumSrcRegs
= 1;
645 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(Y
,_
,_
,Y
));
646 tctx
->emit_instruction(tctx
, &new_inst
);
650 /* EXP - Approximate Exponential Base 2
651 * dst.x = 2^{\lfloor src.x\rfloor}
652 * dst.y = src.x - \lfloor src.x\rfloor
656 * ; needs: 1 tmp, imm{1.0}
659 * SUB dst.y, src.x, tmpA.x
662 * MOV dst.w, imm{1.0}
664 #define EXP_GROW (19 - 3)
667 transform_exp(struct tgsi_transform_context
*tctx
,
668 struct tgsi_full_instruction
*inst
)
670 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
671 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
672 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
673 struct tgsi_full_instruction new_inst
;
675 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XY
) {
676 /* FLR tmpA.x, src.x */
677 new_inst
= tgsi_default_full_instruction();
678 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FLR
;
679 new_inst
.Instruction
.NumDstRegs
= 1;
680 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
681 new_inst
.Instruction
.NumSrcRegs
= 1;
682 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,_
,_
,_
));
683 tctx
->emit_instruction(tctx
, &new_inst
);
686 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
687 /* EX2 tmpA.y, src.x */
688 new_inst
= tgsi_default_full_instruction();
689 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
690 new_inst
.Instruction
.NumDstRegs
= 1;
691 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
692 new_inst
.Instruction
.NumSrcRegs
= 1;
693 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,_
,_
,_
));
694 tctx
->emit_instruction(tctx
, &new_inst
);
697 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
698 /* SUB dst.y, src.x, tmpA.x */
699 new_inst
= tgsi_default_full_instruction();
700 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_SUB
;
701 new_inst
.Instruction
.NumDstRegs
= 1;
702 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Y
);
703 new_inst
.Instruction
.NumSrcRegs
= 2;
704 reg_src(&new_inst
.Src
[0], src
, SWIZ(_
,X
,_
,_
));
705 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(_
,X
,_
,_
));
706 tctx
->emit_instruction(tctx
, &new_inst
);
709 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_X
) {
710 /* EX2 dst.x, tmpA.x */
711 new_inst
= tgsi_default_full_instruction();
712 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
713 new_inst
.Instruction
.NumDstRegs
= 1;
714 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_X
);
715 new_inst
.Instruction
.NumSrcRegs
= 1;
716 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(X
,_
,_
,_
));
717 tctx
->emit_instruction(tctx
, &new_inst
);
720 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
721 /* MOV dst.z, tmpA.y */
722 new_inst
= tgsi_default_full_instruction();
723 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
724 new_inst
.Instruction
.NumDstRegs
= 1;
725 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Z
);
726 new_inst
.Instruction
.NumSrcRegs
= 1;
727 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
,_
,Y
,_
));
728 tctx
->emit_instruction(tctx
, &new_inst
);
731 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_W
) {
732 /* MOV dst.w, imm{1.0} */
733 new_inst
= tgsi_default_full_instruction();
734 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
735 new_inst
.Instruction
.NumDstRegs
= 1;
736 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_W
);
737 new_inst
.Instruction
.NumSrcRegs
= 1;
738 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(_
,_
,_
,Y
));
739 tctx
->emit_instruction(tctx
, &new_inst
);
743 /* LOG - Approximate Logarithm Base 2
744 * dst.x = \lfloor\log_2{|src.x|}\rfloor
745 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
746 * dst.z = \log_2{|src.x|}
749 * ; needs: 1 tmp, imm{1.0}
750 * LG2 tmpA.x, |src.x|
754 * MUL dst.y, |src.x|, tmpA.z
755 * MOV dst.xz, tmpA.yx
756 * MOV dst.w, imm{1.0}
758 #define LOG_GROW (25 - 3)
761 transform_log(struct tgsi_transform_context
*tctx
,
762 struct tgsi_full_instruction
*inst
)
764 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
765 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
766 struct tgsi_full_src_register
*src
= &inst
->Src
[0];
767 struct tgsi_full_instruction new_inst
;
769 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZ
) {
770 /* LG2 tmpA.x, |src.x| */
771 new_inst
= tgsi_default_full_instruction();
772 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_LG2
;
773 new_inst
.Instruction
.NumDstRegs
= 1;
774 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
775 new_inst
.Instruction
.NumSrcRegs
= 1;
776 reg_src(&new_inst
.Src
[0], src
, SWIZ(X
,_
,_
,_
));
777 new_inst
.Src
[0].Register
.Absolute
= true;
778 tctx
->emit_instruction(tctx
, &new_inst
);
781 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XY
) {
782 /* FLR tmpA.y, tmpA.x */
783 new_inst
= tgsi_default_full_instruction();
784 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_FLR
;
785 new_inst
.Instruction
.NumDstRegs
= 1;
786 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Y
);
787 new_inst
.Instruction
.NumSrcRegs
= 1;
788 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(_
,X
,_
,_
));
789 tctx
->emit_instruction(tctx
, &new_inst
);
792 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
793 /* EX2 tmpA.z, tmpA.y */
794 new_inst
= tgsi_default_full_instruction();
795 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
796 new_inst
.Instruction
.NumDstRegs
= 1;
797 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Z
);
798 new_inst
.Instruction
.NumSrcRegs
= 1;
799 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Y
,_
,_
,_
));
800 tctx
->emit_instruction(tctx
, &new_inst
);
802 /* RCP tmpA.z, tmpA.z */
803 new_inst
= tgsi_default_full_instruction();
804 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_RCP
;
805 new_inst
.Instruction
.NumDstRegs
= 1;
806 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_Z
);
807 new_inst
.Instruction
.NumSrcRegs
= 1;
808 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Z
,_
,_
,_
));
809 tctx
->emit_instruction(tctx
, &new_inst
);
811 /* MUL dst.y, |src.x|, tmpA.z */
812 new_inst
= tgsi_default_full_instruction();
813 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
814 new_inst
.Instruction
.NumDstRegs
= 1;
815 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_Y
);
816 new_inst
.Instruction
.NumSrcRegs
= 2;
817 reg_src(&new_inst
.Src
[0], src
, SWIZ(_
,X
,_
,_
));
818 new_inst
.Src
[0].Register
.Absolute
= true;
819 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(_
,Z
,_
,_
));
820 tctx
->emit_instruction(tctx
, &new_inst
);
823 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XZ
) {
824 /* MOV dst.xz, tmpA.yx */
825 new_inst
= tgsi_default_full_instruction();
826 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
827 new_inst
.Instruction
.NumDstRegs
= 1;
828 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XZ
);
829 new_inst
.Instruction
.NumSrcRegs
= 1;
830 reg_src(&new_inst
.Src
[0], &ctx
->tmp
[A
].src
, SWIZ(Y
,_
,X
,_
));
831 tctx
->emit_instruction(tctx
, &new_inst
);
834 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_W
) {
835 /* MOV dst.w, imm{1.0} */
836 new_inst
= tgsi_default_full_instruction();
837 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
838 new_inst
.Instruction
.NumDstRegs
= 1;
839 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_W
);
840 new_inst
.Instruction
.NumSrcRegs
= 1;
841 reg_src(&new_inst
.Src
[0], &ctx
->imm
, SWIZ(_
,_
,_
,Y
));
842 tctx
->emit_instruction(tctx
, &new_inst
);
846 /* DP4 - 4-component Dot Product
847 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
849 * DP3 - 3-component Dot Product
850 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
852 * DPH - Homogeneous Dot Product
853 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
855 * DP2 - 2-component Dot Product
856 * dst = src0.x \times src1.x + src0.y \times src1.y
858 * DP2A - 2-component Dot Product And Add
859 * dst = src0.x \times src1.x + src0.y \times src1.y + src2.x
861 * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
862 * operations, which is what you'd prefer for a ISA that is natively
863 * scalar. Probably a native vector ISA would at least already have
864 * DP4/DP3 instructions, but perhaps there is room for an alternative
865 * translation for DPH/DP2/DP2A using vector instructions.
868 * MUL tmpA.x, src0.x, src1.x
869 * MAD tmpA.x, src0.y, src1.y, tmpA.x
870 * if (DPH || DP3 || DP4) {
871 * MAD tmpA.x, src0.z, src1.z, tmpA.x
873 * ADD tmpA.x, src1.w, tmpA.x
875 * MAD tmpA.x, src0.w, src1.w, tmpA.x
878 * ADD tmpA.x, src2.x, tmpA.x
880 * ; fixup last instruction to replicate into dst
882 #define DP4_GROW (19 - 4)
883 #define DP3_GROW (14 - 4)
884 #define DPH_GROW (18 - 4)
885 #define DP2_GROW ( 9 - 4)
886 #define DP2A_GROW (13 - 4)
889 transform_dotp(struct tgsi_transform_context
*tctx
,
890 struct tgsi_full_instruction
*inst
)
892 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
893 struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
894 struct tgsi_full_src_register
*src0
= &inst
->Src
[0];
895 struct tgsi_full_src_register
*src1
= &inst
->Src
[1];
896 struct tgsi_full_src_register
*src2
= &inst
->Src
[2]; /* only DP2A */
897 struct tgsi_full_instruction new_inst
;
898 unsigned opcode
= inst
->Instruction
.Opcode
;
900 /* NOTE: any potential last instruction must replicate src on all
901 * components (since it could be re-written to write to final dst)
904 if (dst
->Register
.WriteMask
& TGSI_WRITEMASK_XYZW
) {
905 /* MUL tmpA.x, src0.x, src1.x */
906 new_inst
= tgsi_default_full_instruction();
907 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
908 new_inst
.Instruction
.NumDstRegs
= 1;
909 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
910 new_inst
.Instruction
.NumSrcRegs
= 2;
911 reg_src(&new_inst
.Src
[0], src0
, SWIZ(X
,_
,_
,_
));
912 reg_src(&new_inst
.Src
[1], src1
, SWIZ(X
,_
,_
,_
));
913 tctx
->emit_instruction(tctx
, &new_inst
);
915 /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
916 new_inst
= tgsi_default_full_instruction();
917 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAD
;
918 new_inst
.Instruction
.NumDstRegs
= 1;
919 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
920 new_inst
.Instruction
.NumSrcRegs
= 3;
921 reg_src(&new_inst
.Src
[0], src0
, SWIZ(Y
,Y
,Y
,Y
));
922 reg_src(&new_inst
.Src
[1], src1
, SWIZ(Y
,Y
,Y
,Y
));
923 reg_src(&new_inst
.Src
[2], &ctx
->tmp
[A
].src
, SWIZ(X
,X
,X
,X
));
925 if ((opcode
== TGSI_OPCODE_DPH
) ||
926 (opcode
== TGSI_OPCODE_DP3
) ||
927 (opcode
== TGSI_OPCODE_DP4
)) {
928 tctx
->emit_instruction(tctx
, &new_inst
);
930 /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
931 new_inst
= tgsi_default_full_instruction();
932 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAD
;
933 new_inst
.Instruction
.NumDstRegs
= 1;
934 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
935 new_inst
.Instruction
.NumSrcRegs
= 3;
936 reg_src(&new_inst
.Src
[0], src0
, SWIZ(Z
,Z
,Z
,Z
));
937 reg_src(&new_inst
.Src
[1], src1
, SWIZ(Z
,Z
,Z
,Z
));
938 reg_src(&new_inst
.Src
[2], &ctx
->tmp
[A
].src
, SWIZ(X
,X
,X
,X
));
940 if (opcode
== TGSI_OPCODE_DPH
) {
941 tctx
->emit_instruction(tctx
, &new_inst
);
943 /* ADD tmpA.x, src1.w, tmpA.x */
944 new_inst
= tgsi_default_full_instruction();
945 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
946 new_inst
.Instruction
.NumDstRegs
= 1;
947 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
948 new_inst
.Instruction
.NumSrcRegs
= 2;
949 reg_src(&new_inst
.Src
[0], src1
, SWIZ(W
,W
,W
,W
));
950 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
,X
,X
,X
));
951 } else if (opcode
== TGSI_OPCODE_DP4
) {
952 tctx
->emit_instruction(tctx
, &new_inst
);
954 /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
955 new_inst
= tgsi_default_full_instruction();
956 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_MAD
;
957 new_inst
.Instruction
.NumDstRegs
= 1;
958 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
959 new_inst
.Instruction
.NumSrcRegs
= 3;
960 reg_src(&new_inst
.Src
[0], src0
, SWIZ(W
,W
,W
,W
));
961 reg_src(&new_inst
.Src
[1], src1
, SWIZ(W
,W
,W
,W
));
962 reg_src(&new_inst
.Src
[2], &ctx
->tmp
[A
].src
, SWIZ(X
,X
,X
,X
));
964 } else if (opcode
== TGSI_OPCODE_DP2A
) {
965 tctx
->emit_instruction(tctx
, &new_inst
);
967 /* ADD tmpA.x, src2.x, tmpA.x */
968 new_inst
= tgsi_default_full_instruction();
969 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_ADD
;
970 new_inst
.Instruction
.NumDstRegs
= 1;
971 reg_dst(&new_inst
.Dst
[0], &ctx
->tmp
[A
].dst
, TGSI_WRITEMASK_X
);
972 new_inst
.Instruction
.NumSrcRegs
= 2;
973 reg_src(&new_inst
.Src
[0], src2
, SWIZ(X
,X
,X
,X
));
974 reg_src(&new_inst
.Src
[1], &ctx
->tmp
[A
].src
, SWIZ(X
,X
,X
,X
));
977 /* fixup last instruction to write to dst: */
978 reg_dst(&new_inst
.Dst
[0], dst
, TGSI_WRITEMASK_XYZW
);
980 tctx
->emit_instruction(tctx
, &new_inst
);
985 /* Two-sided color emulation:
986 * For each COLOR input, create a corresponding BCOLOR input, plus
987 * CMP instruction to select front or back color based on FACE
989 #define TWOSIDE_GROW(n) ( \
991 ((n) * 2) + /* IN[] BCOLOR[n] */ \
992 ((n) * 1) + /* TEMP[] */ \
993 ((n) * 5) /* CMP instr */ \
997 emit_twoside(struct tgsi_transform_context
*tctx
)
999 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
1000 struct tgsi_shader_info
*info
= ctx
->info
;
1001 struct tgsi_full_declaration decl
;
1002 struct tgsi_full_instruction new_inst
;
1003 unsigned inbase
, tmpbase
;
1006 inbase
= info
->file_max
[TGSI_FILE_INPUT
] + 1;
1007 tmpbase
= info
->file_max
[TGSI_FILE_TEMPORARY
] + 1;
1009 /* additional inputs for BCOLOR's */
1010 for (i
= 0; i
< ctx
->two_side_colors
; i
++) {
1011 decl
= tgsi_default_full_declaration();
1012 decl
.Declaration
.File
= TGSI_FILE_INPUT
;
1013 decl
.Declaration
.Semantic
= true;
1014 decl
.Range
.First
= decl
.Range
.Last
= inbase
+ i
;
1015 decl
.Semantic
.Name
= TGSI_SEMANTIC_BCOLOR
;
1016 decl
.Semantic
.Index
=
1017 info
->input_semantic_index
[ctx
->two_side_idx
[i
]];
1018 tctx
->emit_declaration(tctx
, &decl
);
1021 /* additional input for FACE */
1022 if (ctx
->two_side_colors
&& (ctx
->face_idx
== -1)) {
1023 decl
= tgsi_default_full_declaration();
1024 decl
.Declaration
.File
= TGSI_FILE_INPUT
;
1025 decl
.Declaration
.Semantic
= true;
1026 decl
.Range
.First
= decl
.Range
.Last
= inbase
+ ctx
->two_side_colors
;
1027 decl
.Semantic
.Name
= TGSI_SEMANTIC_FACE
;
1028 decl
.Semantic
.Index
= 0;
1029 tctx
->emit_declaration(tctx
, &decl
);
1031 ctx
->face_idx
= decl
.Range
.First
;
1034 /* additional temps for COLOR/BCOLOR selection: */
1035 for (i
= 0; i
< ctx
->two_side_colors
; i
++) {
1036 decl
= tgsi_default_full_declaration();
1037 decl
.Declaration
.File
= TGSI_FILE_TEMPORARY
;
1038 decl
.Range
.First
= decl
.Range
.Last
= tmpbase
+ ctx
->numtmp
+ i
;
1039 tctx
->emit_declaration(tctx
, &decl
);
1042 /* and finally additional instructions to select COLOR/BCOLOR: */
1043 for (i
= 0; i
< ctx
->two_side_colors
; i
++) {
1044 new_inst
= tgsi_default_full_instruction();
1045 new_inst
.Instruction
.Opcode
= TGSI_OPCODE_CMP
;
1047 new_inst
.Instruction
.NumDstRegs
= 1;
1048 new_inst
.Dst
[0].Register
.File
= TGSI_FILE_TEMPORARY
;
1049 new_inst
.Dst
[0].Register
.Index
= tmpbase
+ ctx
->numtmp
+ i
;
1050 new_inst
.Dst
[0].Register
.WriteMask
= TGSI_WRITEMASK_XYZW
;
1052 new_inst
.Instruction
.NumSrcRegs
= 3;
1053 new_inst
.Src
[0].Register
.File
= TGSI_FILE_INPUT
;
1054 new_inst
.Src
[0].Register
.Index
= ctx
->face_idx
;
1055 new_inst
.Src
[0].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1056 new_inst
.Src
[0].Register
.SwizzleY
= TGSI_SWIZZLE_X
;
1057 new_inst
.Src
[0].Register
.SwizzleZ
= TGSI_SWIZZLE_X
;
1058 new_inst
.Src
[0].Register
.SwizzleW
= TGSI_SWIZZLE_X
;
1059 new_inst
.Src
[1].Register
.File
= TGSI_FILE_INPUT
;
1060 new_inst
.Src
[1].Register
.Index
= inbase
+ i
;
1061 new_inst
.Src
[1].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1062 new_inst
.Src
[1].Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
1063 new_inst
.Src
[1].Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
1064 new_inst
.Src
[1].Register
.SwizzleW
= TGSI_SWIZZLE_W
;
1065 new_inst
.Src
[2].Register
.File
= TGSI_FILE_INPUT
;
1066 new_inst
.Src
[2].Register
.Index
= ctx
->two_side_idx
[i
];
1067 new_inst
.Src
[2].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1068 new_inst
.Src
[2].Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
1069 new_inst
.Src
[2].Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
1070 new_inst
.Src
[2].Register
.SwizzleW
= TGSI_SWIZZLE_W
;
1072 tctx
->emit_instruction(tctx
, &new_inst
);
1077 emit_decls(struct tgsi_transform_context
*tctx
)
1079 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
1080 struct tgsi_shader_info
*info
= ctx
->info
;
1081 struct tgsi_full_declaration decl
;
1082 struct tgsi_full_immediate immed
;
1086 tmpbase
= info
->file_max
[TGSI_FILE_TEMPORARY
] + 1;
1088 ctx
->color_base
= tmpbase
+ ctx
->numtmp
;
1090 /* declare immediate: */
1091 immed
= tgsi_default_full_immediate();
1092 immed
.Immediate
.NrTokens
= 1 + 4; /* one for the token itself */
1093 immed
.u
[0].Float
= 0.0;
1094 immed
.u
[1].Float
= 1.0;
1095 immed
.u
[2].Float
= 128.0;
1096 immed
.u
[3].Float
= 0.0;
1097 tctx
->emit_immediate(tctx
, &immed
);
1099 ctx
->imm
.Register
.File
= TGSI_FILE_IMMEDIATE
;
1100 ctx
->imm
.Register
.Index
= info
->immediate_count
;
1101 ctx
->imm
.Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1102 ctx
->imm
.Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
1103 ctx
->imm
.Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
1104 ctx
->imm
.Register
.SwizzleW
= TGSI_SWIZZLE_W
;
1106 /* declare temp regs: */
1107 for (i
= 0; i
< ctx
->numtmp
; i
++) {
1108 decl
= tgsi_default_full_declaration();
1109 decl
.Declaration
.File
= TGSI_FILE_TEMPORARY
;
1110 decl
.Range
.First
= decl
.Range
.Last
= tmpbase
+ i
;
1111 tctx
->emit_declaration(tctx
, &decl
);
1113 ctx
->tmp
[i
].src
.Register
.File
= TGSI_FILE_TEMPORARY
;
1114 ctx
->tmp
[i
].src
.Register
.Index
= tmpbase
+ i
;
1115 ctx
->tmp
[i
].src
.Register
.SwizzleX
= TGSI_SWIZZLE_X
;
1116 ctx
->tmp
[i
].src
.Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
1117 ctx
->tmp
[i
].src
.Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
1118 ctx
->tmp
[i
].src
.Register
.SwizzleW
= TGSI_SWIZZLE_W
;
1120 ctx
->tmp
[i
].dst
.Register
.File
= TGSI_FILE_TEMPORARY
;
1121 ctx
->tmp
[i
].dst
.Register
.Index
= tmpbase
+ i
;
1122 ctx
->tmp
[i
].dst
.Register
.WriteMask
= TGSI_WRITEMASK_XYZW
;
1125 if (ctx
->two_side_colors
)
1130 rename_color_inputs(struct fd_lowering_context
*ctx
,
1131 struct tgsi_full_instruction
*inst
)
1134 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
1135 struct tgsi_src_register
*src
= &inst
->Src
[i
].Register
;
1136 if (src
->File
== TGSI_FILE_INPUT
) {
1137 for (j
= 0; j
< ctx
->two_side_colors
; j
++) {
1138 if (src
->Index
== ctx
->two_side_idx
[j
]) {
1139 src
->File
= TGSI_FILE_TEMPORARY
;
1140 src
->Index
= ctx
->color_base
+ j
;
1150 transform_instr(struct tgsi_transform_context
*tctx
,
1151 struct tgsi_full_instruction
*inst
)
1153 struct fd_lowering_context
*ctx
= fd_lowering_context(tctx
);
1155 if (!ctx
->emitted_decls
) {
1157 ctx
->emitted_decls
= 1;
1160 /* if emulating two-sided-color, we need to re-write some
1163 if (ctx
->two_side_colors
)
1164 rename_color_inputs(ctx
, inst
);
1166 switch (inst
->Instruction
.Opcode
) {
1167 case TGSI_OPCODE_DST
:
1168 if (!ctx
->config
->lower_DST
)
1170 transform_dst(tctx
, inst
);
1172 case TGSI_OPCODE_XPD
:
1173 if (!ctx
->config
->lower_XPD
)
1175 transform_xpd(tctx
, inst
);
1177 case TGSI_OPCODE_SCS
:
1178 if (!ctx
->config
->lower_SCS
)
1180 transform_scs(tctx
, inst
);
1182 case TGSI_OPCODE_LRP
:
1183 if (!ctx
->config
->lower_LRP
)
1185 transform_lrp(tctx
, inst
);
1187 case TGSI_OPCODE_FRC
:
1188 if (!ctx
->config
->lower_FRC
)
1190 transform_frc(tctx
, inst
);
1192 case TGSI_OPCODE_POW
:
1193 if (!ctx
->config
->lower_POW
)
1195 transform_pow(tctx
, inst
);
1197 case TGSI_OPCODE_LIT
:
1198 if (!ctx
->config
->lower_LIT
)
1200 transform_lit(tctx
, inst
);
1202 case TGSI_OPCODE_EXP
:
1203 if (!ctx
->config
->lower_EXP
)
1205 transform_exp(tctx
, inst
);
1207 case TGSI_OPCODE_LOG
:
1208 if (!ctx
->config
->lower_LOG
)
1210 transform_log(tctx
, inst
);
1212 case TGSI_OPCODE_DP4
:
1213 if (!ctx
->config
->lower_DP4
)
1215 transform_dotp(tctx
, inst
);
1217 case TGSI_OPCODE_DP3
:
1218 if (!ctx
->config
->lower_DP3
)
1220 transform_dotp(tctx
, inst
);
1222 case TGSI_OPCODE_DPH
:
1223 if (!ctx
->config
->lower_DPH
)
1225 transform_dotp(tctx
, inst
);
1227 case TGSI_OPCODE_DP2
:
1228 if (!ctx
->config
->lower_DP2
)
1230 transform_dotp(tctx
, inst
);
1232 case TGSI_OPCODE_DP2A
:
1233 if (!ctx
->config
->lower_DP2A
)
1235 transform_dotp(tctx
, inst
);
1239 tctx
->emit_instruction(tctx
, inst
);
1244 /* returns NULL if no lowering required, else returns the new
1245 * tokens (which caller is required to free()). In either case
1246 * returns the current info.
1248 const struct tgsi_token
*
1249 fd_transform_lowering(const struct fd_lowering_config
*config
,
1250 const struct tgsi_token
*tokens
,
1251 struct tgsi_shader_info
*info
)
1253 struct fd_lowering_context ctx
;
1254 struct tgsi_token
*newtoks
;
1257 memset(&ctx
, 0, sizeof(ctx
));
1258 ctx
.base
.transform_instruction
= transform_instr
;
1260 ctx
.config
= config
;
1262 tgsi_scan_shader(tokens
, info
);
1264 /* if we are adding fragment shader support to emulate two-sided
1265 * color, then figure out the number of additional inputs we need
1266 * to create for BCOLOR's..
1268 if ((info
->processor
== TGSI_PROCESSOR_FRAGMENT
) &&
1269 config
->color_two_side
) {
1272 for (i
= 0; i
<= info
->file_max
[TGSI_FILE_INPUT
]; i
++) {
1273 if (info
->input_semantic_name
[i
] == TGSI_SEMANTIC_COLOR
)
1274 ctx
.two_side_idx
[ctx
.two_side_colors
++] = i
;
1275 if (info
->input_semantic_name
[i
] == TGSI_SEMANTIC_FACE
)
1280 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
1281 /* if there are no instructions to lower, then we are done: */
1296 ctx
.two_side_colors
))
1300 _debug_printf("BEFORE:");
1301 tgsi_dump(tokens
, 0);
1305 newlen
= tgsi_num_tokens(tokens
);
1307 newlen
+= DST_GROW
* OPCS(DST
);
1308 numtmp
= MAX2(numtmp
, DST_TMP
);
1311 newlen
+= XPD_GROW
* OPCS(XPD
);
1312 numtmp
= MAX2(numtmp
, XPD_TMP
);
1315 newlen
+= SCS_GROW
* OPCS(SCS
);
1316 numtmp
= MAX2(numtmp
, SCS_TMP
);
1319 newlen
+= LRP_GROW
* OPCS(LRP
);
1320 numtmp
= MAX2(numtmp
, LRP_TMP
);
1323 newlen
+= FRC_GROW
* OPCS(FRC
);
1324 numtmp
= MAX2(numtmp
, FRC_TMP
);
1327 newlen
+= POW_GROW
* OPCS(POW
);
1328 numtmp
= MAX2(numtmp
, POW_TMP
);
1331 newlen
+= LIT_GROW
* OPCS(LIT
);
1332 numtmp
= MAX2(numtmp
, LIT_TMP
);
1335 newlen
+= EXP_GROW
* OPCS(EXP
);
1336 numtmp
= MAX2(numtmp
, EXP_TMP
);
1339 newlen
+= LOG_GROW
* OPCS(LOG
);
1340 numtmp
= MAX2(numtmp
, LOG_TMP
);
1343 newlen
+= DP4_GROW
* OPCS(DP4
);
1344 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1347 newlen
+= DP3_GROW
* OPCS(DP3
);
1348 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1351 newlen
+= DPH_GROW
* OPCS(DPH
);
1352 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1355 newlen
+= DP2_GROW
* OPCS(DP2
);
1356 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1359 newlen
+= DP2A_GROW
* OPCS(DP2A
);
1360 numtmp
= MAX2(numtmp
, DOTP_TMP
);
1363 /* specifically don't include two_side_colors temps in the count: */
1364 ctx
.numtmp
= numtmp
;
1366 if (ctx
.two_side_colors
) {
1367 newlen
+= TWOSIDE_GROW(ctx
.two_side_colors
);
1368 /* note: we permanently consume temp regs, re-writing references
1369 * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
1370 * instruction that selects which varying to use):
1372 numtmp
+= ctx
.two_side_colors
;
1375 newlen
+= 2 * numtmp
;
1376 newlen
+= 5; /* immediate */
1378 newtoks
= tgsi_alloc_tokens(newlen
);
1382 tgsi_transform_shader(tokens
, newtoks
, newlen
, &ctx
.base
);
1384 tgsi_scan_shader(newtoks
, info
);
1387 _debug_printf("AFTER:");
1388 tgsi_dump(newtoks
, 0);